From 378d4ce7552df580e3ddd89c2faa9f8c5086d646 Mon Sep 17 00:00:00 2001 From: Uros Majstorovic Date: Wed, 2 Feb 2022 06:25:38 +0100 Subject: renamed crypto -> ext --- ext/libressl/crypto/aead/Makefile | 13 + ext/libressl/crypto/aead/e_chacha20poly1305.c | 395 ++ ext/libressl/crypto/aes/Makefile | 14 + ext/libressl/crypto/aes/aes-elf-armv4.S | 1074 +++++ ext/libressl/crypto/aes/aes-elf-x86_64.S | 2547 ++++++++++ ext/libressl/crypto/aes/aes-macosx-x86_64.S | 2544 ++++++++++ ext/libressl/crypto/aes/aes-masm-x86_64.S | 2948 ++++++++++++ ext/libressl/crypto/aes/aes-mingw64-x86_64.S | 2861 ++++++++++++ ext/libressl/crypto/aes/aes_cbc.c | 65 + ext/libressl/crypto/aes/aes_cfb.c | 84 + ext/libressl/crypto/aes/aes_core.c | 1374 ++++++ ext/libressl/crypto/aes/aes_ctr.c | 62 + ext/libressl/crypto/aes/aes_ecb.c | 69 + ext/libressl/crypto/aes/aes_ige.c | 194 + ext/libressl/crypto/aes/aes_locl.h | 83 + ext/libressl/crypto/aes/aes_misc.c | 65 + ext/libressl/crypto/aes/aes_ofb.c | 61 + ext/libressl/crypto/aes/aes_wrap.c | 133 + ext/libressl/crypto/aes/aesni-elf-x86_64.S | 2539 ++++++++++ ext/libressl/crypto/aes/aesni-macosx-x86_64.S | 2536 ++++++++++ ext/libressl/crypto/aes/aesni-masm-x86_64.S | 3099 ++++++++++++ ext/libressl/crypto/aes/aesni-mingw64-x86_64.S | 3008 ++++++++++++ ext/libressl/crypto/aes/aesni-sha1-elf-x86_64.S | 1401 ++++++ ext/libressl/crypto/aes/aesni-sha1-macosx-x86_64.S | 1398 ++++++ ext/libressl/crypto/aes/aesni-sha1-masm-x86_64.S | 1616 +++++++ .../crypto/aes/aesni-sha1-mingw64-x86_64.S | 1536 ++++++ ext/libressl/crypto/aes/bsaes-elf-x86_64.S | 2502 ++++++++++ ext/libressl/crypto/aes/bsaes-macosx-x86_64.S | 2499 ++++++++++ ext/libressl/crypto/aes/bsaes-masm-x86_64.S | 2803 +++++++++++ ext/libressl/crypto/aes/bsaes-mingw64-x86_64.S | 2725 +++++++++++ ext/libressl/crypto/aes/vpaes-elf-x86_64.S | 832 ++++ ext/libressl/crypto/aes/vpaes-macosx-x86_64.S | 829 ++++ ext/libressl/crypto/aes/vpaes-masm-x86_64.S | 1213 +++++ ext/libressl/crypto/aes/vpaes-mingw64-x86_64.S | 1125 +++++ ext/libressl/crypto/bf/Makefile | 14 + ext/libressl/crypto/bf/bf_cfb64.c | 121 + ext/libressl/crypto/bf/bf_ecb.c | 94 + ext/libressl/crypto/bf/bf_enc.c | 306 ++ ext/libressl/crypto/bf/bf_locl.h | 219 + ext/libressl/crypto/bf/bf_ofb64.c | 110 + ext/libressl/crypto/bf/bf_pi.h | 328 ++ ext/libressl/crypto/bf/bf_skey.c | 117 + ext/libressl/crypto/chacha/Makefile | 13 + ext/libressl/crypto/chacha/chacha-merged.c | 325 ++ ext/libressl/crypto/chacha/chacha.c | 87 + ext/libressl/crypto/compat/Makefile | 13 + ext/libressl/crypto/compat/arc4random.c | 216 + ext/libressl/crypto/compat/arc4random.h | 41 + ext/libressl/crypto/compat/arc4random_aix.h | 81 + ext/libressl/crypto/compat/arc4random_fe310.h | 27 + ext/libressl/crypto/compat/arc4random_freebsd.h | 87 + ext/libressl/crypto/compat/arc4random_hpux.h | 81 + ext/libressl/crypto/compat/arc4random_linux.h | 88 + ext/libressl/crypto/compat/arc4random_netbsd.h | 87 + ext/libressl/crypto/compat/arc4random_osx.h | 81 + ext/libressl/crypto/compat/arc4random_solaris.h | 81 + ext/libressl/crypto/compat/arc4random_uniform.c | 56 + ext/libressl/crypto/compat/arc4random_win.h | 78 + ext/libressl/crypto/compat/chacha_private.h | 222 + ext/libressl/crypto/compat/explicit_bzero.c | 19 + ext/libressl/crypto/compat/explicit_bzero_win.c | 13 + ext/libressl/crypto/compat/getentropy_aix.c | 402 ++ ext/libressl/crypto/compat/getentropy_freebsd.c | 60 + ext/libressl/crypto/compat/getentropy_hpux.c | 396 ++ ext/libressl/crypto/compat/getentropy_linux.c | 525 +++ ext/libressl/crypto/compat/getentropy_netbsd.c | 62 + ext/libressl/crypto/compat/getentropy_osx.c | 417 ++ ext/libressl/crypto/compat/getentropy_solaris.c | 422 ++ ext/libressl/crypto/compat/getentropy_win.c | 50 + ext/libressl/crypto/compat/timingsafe_bcmp.c | 29 + ext/libressl/crypto/compat/timingsafe_memcmp.c | 46 + ext/libressl/crypto/curve25519/Makefile | 14 + .../crypto/curve25519/curve25519-generic.c | 34 + ext/libressl/crypto/curve25519/curve25519.c | 4935 ++++++++++++++++++++ .../crypto/curve25519/curve25519_internal.h | 99 + ext/libressl/crypto/modes/Makefile | 14 + ext/libressl/crypto/modes/cbc128.c | 202 + ext/libressl/crypto/modes/ccm128.c | 441 ++ ext/libressl/crypto/modes/cfb128.c | 234 + ext/libressl/crypto/modes/ctr128.c | 251 + ext/libressl/crypto/modes/cts128.c | 267 ++ ext/libressl/crypto/modes/gcm128.c | 1566 +++++++ ext/libressl/crypto/modes/ghash-elf-armv4.S | 412 ++ ext/libressl/crypto/modes/ghash-elf-x86_64.S | 1030 ++++ ext/libressl/crypto/modes/ghash-macosx-x86_64.S | 1027 ++++ ext/libressl/crypto/modes/ghash-masm-x86_64.S | 1256 +++++ ext/libressl/crypto/modes/ghash-mingw64-x86_64.S | 1175 +++++ ext/libressl/crypto/modes/modes_lcl.h | 113 + ext/libressl/crypto/modes/ofb128.c | 119 + ext/libressl/crypto/modes/xts128.c | 185 + ext/libressl/crypto/poly1305/Makefile | 13 + ext/libressl/crypto/poly1305/poly1305-donna.c | 321 ++ ext/libressl/crypto/poly1305/poly1305.c | 38 + ext/libressl/crypto/sha/Makefile | 14 + ext/libressl/crypto/sha/sha1-elf-armv4.S | 455 ++ ext/libressl/crypto/sha/sha1-elf-x86_64.S | 2491 ++++++++++ ext/libressl/crypto/sha/sha1-macosx-x86_64.S | 2488 ++++++++++ ext/libressl/crypto/sha/sha1-masm-x86_64.S | 2746 +++++++++++ ext/libressl/crypto/sha/sha1-mingw64-x86_64.S | 2664 +++++++++++ ext/libressl/crypto/sha/sha1_one.c | 81 + ext/libressl/crypto/sha/sha1dgst.c | 72 + ext/libressl/crypto/sha/sha256-elf-armv4.S | 1520 ++++++ ext/libressl/crypto/sha/sha256-elf-x86_64.S | 1782 +++++++ ext/libressl/crypto/sha/sha256-macosx-x86_64.S | 1779 +++++++ ext/libressl/crypto/sha/sha256-masm-x86_64.S | 1864 ++++++++ ext/libressl/crypto/sha/sha256-mingw64-x86_64.S | 1790 +++++++ ext/libressl/crypto/sha/sha256.c | 284 ++ ext/libressl/crypto/sha/sha512-elf-armv4.S | 1786 +++++++ ext/libressl/crypto/sha/sha512-elf-x86_64.S | 1806 +++++++ ext/libressl/crypto/sha/sha512-macosx-x86_64.S | 1803 +++++++ ext/libressl/crypto/sha/sha512-masm-x86_64.S | 1888 ++++++++ ext/libressl/crypto/sha/sha512-mingw64-x86_64.S | 1814 +++++++ ext/libressl/crypto/sha/sha512.c | 547 +++ ext/libressl/crypto/sha/sha_locl.h | 419 ++ 114 files changed, 95500 insertions(+) create mode 100644 ext/libressl/crypto/aead/Makefile create mode 100644 ext/libressl/crypto/aead/e_chacha20poly1305.c create mode 100644 ext/libressl/crypto/aes/Makefile create mode 100644 ext/libressl/crypto/aes/aes-elf-armv4.S create mode 100644 ext/libressl/crypto/aes/aes-elf-x86_64.S create mode 100644 ext/libressl/crypto/aes/aes-macosx-x86_64.S create mode 100644 ext/libressl/crypto/aes/aes-masm-x86_64.S create mode 100644 ext/libressl/crypto/aes/aes-mingw64-x86_64.S create mode 100644 ext/libressl/crypto/aes/aes_cbc.c create mode 100644 ext/libressl/crypto/aes/aes_cfb.c create mode 100644 ext/libressl/crypto/aes/aes_core.c create mode 100644 ext/libressl/crypto/aes/aes_ctr.c create mode 100644 ext/libressl/crypto/aes/aes_ecb.c create mode 100644 ext/libressl/crypto/aes/aes_ige.c create mode 100644 ext/libressl/crypto/aes/aes_locl.h create mode 100644 ext/libressl/crypto/aes/aes_misc.c create mode 100644 ext/libressl/crypto/aes/aes_ofb.c create mode 100644 ext/libressl/crypto/aes/aes_wrap.c create mode 100644 ext/libressl/crypto/aes/aesni-elf-x86_64.S create mode 100644 ext/libressl/crypto/aes/aesni-macosx-x86_64.S create mode 100644 ext/libressl/crypto/aes/aesni-masm-x86_64.S create mode 100644 ext/libressl/crypto/aes/aesni-mingw64-x86_64.S create mode 100644 ext/libressl/crypto/aes/aesni-sha1-elf-x86_64.S create mode 100644 ext/libressl/crypto/aes/aesni-sha1-macosx-x86_64.S create mode 100644 ext/libressl/crypto/aes/aesni-sha1-masm-x86_64.S create mode 100644 ext/libressl/crypto/aes/aesni-sha1-mingw64-x86_64.S create mode 100644 ext/libressl/crypto/aes/bsaes-elf-x86_64.S create mode 100644 ext/libressl/crypto/aes/bsaes-macosx-x86_64.S create mode 100644 ext/libressl/crypto/aes/bsaes-masm-x86_64.S create mode 100644 ext/libressl/crypto/aes/bsaes-mingw64-x86_64.S create mode 100644 ext/libressl/crypto/aes/vpaes-elf-x86_64.S create mode 100644 ext/libressl/crypto/aes/vpaes-macosx-x86_64.S create mode 100644 ext/libressl/crypto/aes/vpaes-masm-x86_64.S create mode 100644 ext/libressl/crypto/aes/vpaes-mingw64-x86_64.S create mode 100644 ext/libressl/crypto/bf/Makefile create mode 100644 ext/libressl/crypto/bf/bf_cfb64.c create mode 100644 ext/libressl/crypto/bf/bf_ecb.c create mode 100644 ext/libressl/crypto/bf/bf_enc.c create mode 100644 ext/libressl/crypto/bf/bf_locl.h create mode 100644 ext/libressl/crypto/bf/bf_ofb64.c create mode 100644 ext/libressl/crypto/bf/bf_pi.h create mode 100644 ext/libressl/crypto/bf/bf_skey.c create mode 100644 ext/libressl/crypto/chacha/Makefile create mode 100644 ext/libressl/crypto/chacha/chacha-merged.c create mode 100644 ext/libressl/crypto/chacha/chacha.c create mode 100644 ext/libressl/crypto/compat/Makefile create mode 100644 ext/libressl/crypto/compat/arc4random.c create mode 100644 ext/libressl/crypto/compat/arc4random.h create mode 100644 ext/libressl/crypto/compat/arc4random_aix.h create mode 100644 ext/libressl/crypto/compat/arc4random_fe310.h create mode 100644 ext/libressl/crypto/compat/arc4random_freebsd.h create mode 100644 ext/libressl/crypto/compat/arc4random_hpux.h create mode 100644 ext/libressl/crypto/compat/arc4random_linux.h create mode 100644 ext/libressl/crypto/compat/arc4random_netbsd.h create mode 100644 ext/libressl/crypto/compat/arc4random_osx.h create mode 100644 ext/libressl/crypto/compat/arc4random_solaris.h create mode 100644 ext/libressl/crypto/compat/arc4random_uniform.c create mode 100644 ext/libressl/crypto/compat/arc4random_win.h create mode 100644 ext/libressl/crypto/compat/chacha_private.h create mode 100644 ext/libressl/crypto/compat/explicit_bzero.c create mode 100644 ext/libressl/crypto/compat/explicit_bzero_win.c create mode 100644 ext/libressl/crypto/compat/getentropy_aix.c create mode 100644 ext/libressl/crypto/compat/getentropy_freebsd.c create mode 100644 ext/libressl/crypto/compat/getentropy_hpux.c create mode 100644 ext/libressl/crypto/compat/getentropy_linux.c create mode 100644 ext/libressl/crypto/compat/getentropy_netbsd.c create mode 100644 ext/libressl/crypto/compat/getentropy_osx.c create mode 100644 ext/libressl/crypto/compat/getentropy_solaris.c create mode 100644 ext/libressl/crypto/compat/getentropy_win.c create mode 100644 ext/libressl/crypto/compat/timingsafe_bcmp.c create mode 100644 ext/libressl/crypto/compat/timingsafe_memcmp.c create mode 100644 ext/libressl/crypto/curve25519/Makefile create mode 100644 ext/libressl/crypto/curve25519/curve25519-generic.c create mode 100644 ext/libressl/crypto/curve25519/curve25519.c create mode 100644 ext/libressl/crypto/curve25519/curve25519_internal.h create mode 100644 ext/libressl/crypto/modes/Makefile create mode 100644 ext/libressl/crypto/modes/cbc128.c create mode 100644 ext/libressl/crypto/modes/ccm128.c create mode 100644 ext/libressl/crypto/modes/cfb128.c create mode 100644 ext/libressl/crypto/modes/ctr128.c create mode 100644 ext/libressl/crypto/modes/cts128.c create mode 100644 ext/libressl/crypto/modes/gcm128.c create mode 100644 ext/libressl/crypto/modes/ghash-elf-armv4.S create mode 100644 ext/libressl/crypto/modes/ghash-elf-x86_64.S create mode 100644 ext/libressl/crypto/modes/ghash-macosx-x86_64.S create mode 100644 ext/libressl/crypto/modes/ghash-masm-x86_64.S create mode 100644 ext/libressl/crypto/modes/ghash-mingw64-x86_64.S create mode 100644 ext/libressl/crypto/modes/modes_lcl.h create mode 100644 ext/libressl/crypto/modes/ofb128.c create mode 100644 ext/libressl/crypto/modes/xts128.c create mode 100644 ext/libressl/crypto/poly1305/Makefile create mode 100644 ext/libressl/crypto/poly1305/poly1305-donna.c create mode 100644 ext/libressl/crypto/poly1305/poly1305.c create mode 100644 ext/libressl/crypto/sha/Makefile create mode 100644 ext/libressl/crypto/sha/sha1-elf-armv4.S create mode 100644 ext/libressl/crypto/sha/sha1-elf-x86_64.S create mode 100644 ext/libressl/crypto/sha/sha1-macosx-x86_64.S create mode 100644 ext/libressl/crypto/sha/sha1-masm-x86_64.S create mode 100644 ext/libressl/crypto/sha/sha1-mingw64-x86_64.S create mode 100644 ext/libressl/crypto/sha/sha1_one.c create mode 100644 ext/libressl/crypto/sha/sha1dgst.c create mode 100644 ext/libressl/crypto/sha/sha256-elf-armv4.S create mode 100644 ext/libressl/crypto/sha/sha256-elf-x86_64.S create mode 100644 ext/libressl/crypto/sha/sha256-macosx-x86_64.S create mode 100644 ext/libressl/crypto/sha/sha256-masm-x86_64.S create mode 100644 ext/libressl/crypto/sha/sha256-mingw64-x86_64.S create mode 100644 ext/libressl/crypto/sha/sha256.c create mode 100644 ext/libressl/crypto/sha/sha512-elf-armv4.S create mode 100644 ext/libressl/crypto/sha/sha512-elf-x86_64.S create mode 100644 ext/libressl/crypto/sha/sha512-macosx-x86_64.S create mode 100644 ext/libressl/crypto/sha/sha512-masm-x86_64.S create mode 100644 ext/libressl/crypto/sha/sha512-mingw64-x86_64.S create mode 100644 ext/libressl/crypto/sha/sha512.c create mode 100644 ext/libressl/crypto/sha/sha_locl.h (limited to 'ext/libressl/crypto') diff --git a/ext/libressl/crypto/aead/Makefile b/ext/libressl/crypto/aead/Makefile new file mode 100644 index 0000000..6bf1ccf --- /dev/null +++ b/ext/libressl/crypto/aead/Makefile @@ -0,0 +1,13 @@ +include ../../ssl_common.mk + +obj = e_chacha20poly1305.o + + +all: $(obj) +dep: all + +%.o: %.c + $(CC) $(CFLAGS) -c $< + +clean: + rm -f *.o *.a diff --git a/ext/libressl/crypto/aead/e_chacha20poly1305.c b/ext/libressl/crypto/aead/e_chacha20poly1305.c new file mode 100644 index 0000000..9d8291e --- /dev/null +++ b/ext/libressl/crypto/aead/e_chacha20poly1305.c @@ -0,0 +1,395 @@ +/* $OpenBSD: e_chacha20poly1305.c,v 1.21 2019/03/27 15:34:01 jsing Exp $ */ + +/* + * Copyright (c) 2015 Reyk Floter + * Copyright (c) 2014, Google Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include + +#include + +#if !defined(OPENSSL_NO_CHACHA) && !defined(OPENSSL_NO_POLY1305) + +//#include +//#include +#include +#include + +//#include "evp_locl.h" +#define EVPerror(X) ; + +#define POLY1305_TAG_LEN 16 + +#define CHACHA20_CONSTANT_LEN 4 +#define CHACHA20_IV_LEN 8 +#define CHACHA20_NONCE_LEN (CHACHA20_CONSTANT_LEN + CHACHA20_IV_LEN) +#define XCHACHA20_NONCE_LEN 24 + +#if 0 +struct aead_chacha20_poly1305_ctx { + unsigned char key[32]; + unsigned char tag_len; +}; + +static int +aead_chacha20_poly1305_init(EVP_AEAD_CTX *ctx, const unsigned char *key, + size_t key_len, size_t tag_len) +{ + struct aead_chacha20_poly1305_ctx *c20_ctx; + + if (tag_len == 0) + tag_len = POLY1305_TAG_LEN; + + if (tag_len > POLY1305_TAG_LEN) { + EVPerror(EVP_R_TOO_LARGE); + return 0; + } + + /* Internal error - EVP_AEAD_CTX_init should catch this. */ + if (key_len != sizeof(c20_ctx->key)) + return 0; + + c20_ctx = malloc(sizeof(struct aead_chacha20_poly1305_ctx)); + if (c20_ctx == NULL) + return 0; + + memcpy(&c20_ctx->key[0], key, key_len); + c20_ctx->tag_len = tag_len; + ctx->aead_state = c20_ctx; + + return 1; +} + +static void +aead_chacha20_poly1305_cleanup(EVP_AEAD_CTX *ctx) +{ + struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state; + + freezero(c20_ctx, sizeof(*c20_ctx)); +} + +#endif + +static void +poly1305_update_with_length(poly1305_state *poly1305, + const unsigned char *data, size_t data_len) +{ + size_t j = data_len; + unsigned char length_bytes[8]; + unsigned i; + + for (i = 0; i < sizeof(length_bytes); i++) { + length_bytes[i] = j; + j >>= 8; + } + + if (data != NULL) + CRYPTO_poly1305_update(poly1305, data, data_len); + CRYPTO_poly1305_update(poly1305, length_bytes, sizeof(length_bytes)); +} + +static void +poly1305_update_with_pad16(poly1305_state *poly1305, + const unsigned char *data, size_t data_len) +{ + static const unsigned char zero_pad16[16]; + size_t pad_len; + + CRYPTO_poly1305_update(poly1305, data, data_len); + + /* pad16() is defined in RFC 7539 2.8.1. */ + if ((pad_len = data_len % 16) == 0) + return; + + CRYPTO_poly1305_update(poly1305, zero_pad16, 16 - pad_len); +} + +int +aead_chacha20_poly1305_seal(unsigned char key[32], unsigned char tag_len, + unsigned char *out, size_t *out_len, size_t max_out_len, + const unsigned char *nonce, size_t nonce_len, + const unsigned char *in, size_t in_len, + const unsigned char *ad, size_t ad_len) +{ + unsigned char poly1305_key[32]; + poly1305_state poly1305; + const unsigned char *iv; + const uint64_t in_len_64 = in_len; + uint64_t ctr; + + /* The underlying ChaCha implementation may not overflow the block + * counter into the second counter word. Therefore we disallow + * individual operations that work on more than 2TB at a time. + * in_len_64 is needed because, on 32-bit platforms, size_t is only + * 32-bits and this produces a warning because it's always false. + * Casting to uint64_t inside the conditional is not sufficient to stop + * the warning. */ + if (in_len_64 >= (1ULL << 32) * 64 - 64) { + EVPerror(EVP_R_TOO_LARGE); + return 0; + } + + if (max_out_len < in_len + tag_len) { + EVPerror(EVP_R_BUFFER_TOO_SMALL); + return 0; + } + + if (nonce_len != CHACHA20_NONCE_LEN) { + EVPerror(EVP_R_IV_TOO_LARGE); + return 0; + } + + ctr = (uint64_t)((uint32_t)(nonce[0]) | (uint32_t)(nonce[1]) << 8 | + (uint32_t)(nonce[2]) << 16 | (uint32_t)(nonce[3]) << 24) << 32; + iv = nonce + CHACHA20_CONSTANT_LEN; + + memset(poly1305_key, 0, sizeof(poly1305_key)); + CRYPTO_chacha_20(poly1305_key, poly1305_key, + sizeof(poly1305_key), key, iv, ctr); + + CRYPTO_poly1305_init(&poly1305, poly1305_key); + poly1305_update_with_pad16(&poly1305, ad, ad_len); + CRYPTO_chacha_20(out, in, in_len, key, iv, ctr + 1); + poly1305_update_with_pad16(&poly1305, out, in_len); + poly1305_update_with_length(&poly1305, NULL, ad_len); + poly1305_update_with_length(&poly1305, NULL, in_len); + + if (tag_len != POLY1305_TAG_LEN) { + unsigned char tag[POLY1305_TAG_LEN]; + CRYPTO_poly1305_finish(&poly1305, tag); + memcpy(out + in_len, tag, tag_len); + *out_len = in_len + tag_len; + return 1; + } + + CRYPTO_poly1305_finish(&poly1305, out + in_len); + *out_len = in_len + POLY1305_TAG_LEN; + return 1; +} + +int +aead_chacha20_poly1305_open(unsigned char key[32], unsigned char tag_len, + unsigned char *out, size_t *out_len, size_t max_out_len, + const unsigned char *nonce, size_t nonce_len, + const unsigned char *in, size_t in_len, + const unsigned char *ad, size_t ad_len) +{ + unsigned char mac[POLY1305_TAG_LEN]; + unsigned char poly1305_key[32]; + const unsigned char *iv = nonce; + poly1305_state poly1305; + const uint64_t in_len_64 = in_len; + size_t plaintext_len; + uint64_t ctr = 0; + + if (in_len < tag_len) { + EVPerror(EVP_R_BAD_DECRYPT); + return 0; + } + + /* The underlying ChaCha implementation may not overflow the block + * counter into the second counter word. Therefore we disallow + * individual operations that work on more than 2TB at a time. + * in_len_64 is needed because, on 32-bit platforms, size_t is only + * 32-bits and this produces a warning because it's always false. + * Casting to uint64_t inside the conditional is not sufficient to stop + * the warning. */ + if (in_len_64 >= (1ULL << 32) * 64 - 64) { + EVPerror(EVP_R_TOO_LARGE); + return 0; + } + + if (nonce_len != CHACHA20_NONCE_LEN) { + EVPerror(EVP_R_IV_TOO_LARGE); + return 0; + } + + plaintext_len = in_len - tag_len; + + if (max_out_len < plaintext_len) { + EVPerror(EVP_R_BUFFER_TOO_SMALL); + return 0; + } + + ctr = (uint64_t)((uint32_t)(nonce[0]) | (uint32_t)(nonce[1]) << 8 | + (uint32_t)(nonce[2]) << 16 | (uint32_t)(nonce[3]) << 24) << 32; + iv = nonce + CHACHA20_CONSTANT_LEN; + + memset(poly1305_key, 0, sizeof(poly1305_key)); + CRYPTO_chacha_20(poly1305_key, poly1305_key, + sizeof(poly1305_key), key, iv, ctr); + + CRYPTO_poly1305_init(&poly1305, poly1305_key); + poly1305_update_with_pad16(&poly1305, ad, ad_len); + poly1305_update_with_pad16(&poly1305, in, plaintext_len); + poly1305_update_with_length(&poly1305, NULL, ad_len); + poly1305_update_with_length(&poly1305, NULL, plaintext_len); + + CRYPTO_poly1305_finish(&poly1305, mac); + + if (timingsafe_memcmp(mac, in + plaintext_len, tag_len) != 0) { + EVPerror(EVP_R_BAD_DECRYPT); + return 0; + } + + CRYPTO_chacha_20(out, in, plaintext_len, key, iv, ctr + 1); + *out_len = plaintext_len; + return 1; +} + +int +aead_xchacha20_poly1305_seal(unsigned char key[32], unsigned char tag_len, + unsigned char *out, size_t *out_len, size_t max_out_len, + const unsigned char *nonce, size_t nonce_len, + const unsigned char *in, size_t in_len, + const unsigned char *ad, size_t ad_len) +{ + unsigned char poly1305_key[32]; + unsigned char subkey[32]; + poly1305_state poly1305; + + if (max_out_len < in_len + tag_len) { + EVPerror(EVP_R_BUFFER_TOO_SMALL); + return 0; + } + + if (nonce_len != XCHACHA20_NONCE_LEN) { + EVPerror(EVP_R_IV_TOO_LARGE); + return 0; + } + + CRYPTO_hchacha_20(subkey, key, nonce); + + CRYPTO_chacha_20(out, in, in_len, subkey, nonce + 16, 1); + + memset(poly1305_key, 0, sizeof(poly1305_key)); + CRYPTO_chacha_20(poly1305_key, poly1305_key, sizeof(poly1305_key), + subkey, nonce + 16, 0); + + CRYPTO_poly1305_init(&poly1305, poly1305_key); + poly1305_update_with_pad16(&poly1305, ad, ad_len); + poly1305_update_with_pad16(&poly1305, out, in_len); + poly1305_update_with_length(&poly1305, NULL, ad_len); + poly1305_update_with_length(&poly1305, NULL, in_len); + + if (tag_len != POLY1305_TAG_LEN) { + unsigned char tag[POLY1305_TAG_LEN]; + CRYPTO_poly1305_finish(&poly1305, tag); + memcpy(out + in_len, tag, tag_len); + *out_len = in_len + tag_len; + return 1; + } + + CRYPTO_poly1305_finish(&poly1305, out + in_len); + *out_len = in_len + POLY1305_TAG_LEN; + return 1; +} + +int +aead_xchacha20_poly1305_open(unsigned char key[32], unsigned char tag_len, + unsigned char *out, size_t *out_len, size_t max_out_len, + const unsigned char *nonce, size_t nonce_len, + const unsigned char *in, size_t in_len, + const unsigned char *ad, size_t ad_len) +{ + unsigned char mac[POLY1305_TAG_LEN]; + unsigned char poly1305_key[32]; + unsigned char subkey[32]; + poly1305_state poly1305; + size_t plaintext_len; + + if (in_len < tag_len) { + EVPerror(EVP_R_BAD_DECRYPT); + return 0; + } + + if (nonce_len != XCHACHA20_NONCE_LEN) { + EVPerror(EVP_R_IV_TOO_LARGE); + return 0; + } + + plaintext_len = in_len - tag_len; + + if (max_out_len < plaintext_len) { + EVPerror(EVP_R_BUFFER_TOO_SMALL); + return 0; + } + + CRYPTO_hchacha_20(subkey, key, nonce); + + memset(poly1305_key, 0, sizeof(poly1305_key)); + CRYPTO_chacha_20(poly1305_key, poly1305_key, sizeof(poly1305_key), + subkey, nonce + 16, 0); + + CRYPTO_poly1305_init(&poly1305, poly1305_key); + poly1305_update_with_pad16(&poly1305, ad, ad_len); + poly1305_update_with_pad16(&poly1305, in, plaintext_len); + poly1305_update_with_length(&poly1305, NULL, ad_len); + poly1305_update_with_length(&poly1305, NULL, plaintext_len); + + CRYPTO_poly1305_finish(&poly1305, mac); + if (timingsafe_memcmp(mac, in + plaintext_len, tag_len) != 0) { + EVPerror(EVP_R_BAD_DECRYPT); + return 0; + } + + CRYPTO_chacha_20(out, in, plaintext_len, subkey, nonce + 16, 1); + + *out_len = plaintext_len; + return 1; +} + +#if 0 +/* RFC 7539 */ +static const EVP_AEAD aead_chacha20_poly1305 = { + .key_len = 32, + .nonce_len = CHACHA20_NONCE_LEN, + .overhead = POLY1305_TAG_LEN, + .max_tag_len = POLY1305_TAG_LEN, + + .init = aead_chacha20_poly1305_init, + .cleanup = aead_chacha20_poly1305_cleanup, + .seal = aead_chacha20_poly1305_seal, + .open = aead_chacha20_poly1305_open, +}; + +const EVP_AEAD * +EVP_aead_chacha20_poly1305() +{ + return &aead_chacha20_poly1305; +} + +static const EVP_AEAD aead_xchacha20_poly1305 = { + .key_len = 32, + .nonce_len = XCHACHA20_NONCE_LEN, + .overhead = POLY1305_TAG_LEN, + .max_tag_len = POLY1305_TAG_LEN, + + .init = aead_chacha20_poly1305_init, + .cleanup = aead_chacha20_poly1305_cleanup, + .seal = aead_xchacha20_poly1305_seal, + .open = aead_xchacha20_poly1305_open, +}; + +const EVP_AEAD * +EVP_aead_xchacha20_poly1305() +{ + return &aead_xchacha20_poly1305; +} +#endif + +#endif /* !OPENSSL_NO_CHACHA && !OPENSSL_NO_POLY1305 */ diff --git a/ext/libressl/crypto/aes/Makefile b/ext/libressl/crypto/aes/Makefile new file mode 100644 index 0000000..2b3c04c --- /dev/null +++ b/ext/libressl/crypto/aes/Makefile @@ -0,0 +1,14 @@ +include ../../ssl_common.mk +CFLAGS+= -D__BEGIN_HIDDEN_DECLS= -D__END_HIDDEN_DECLS= + +obj = aes_core.o aes_ecb.o aes_cbc.o aes_cfb.o aes_ctr.o aes_ige.o aes_ofb.o + + +all: $(obj) +dep: all + +%.o: %.c + $(CC) $(CFLAGS) -c $< + +clean: + rm -f *.o *.a diff --git a/ext/libressl/crypto/aes/aes-elf-armv4.S b/ext/libressl/crypto/aes/aes-elf-armv4.S new file mode 100644 index 0000000..8164b53 --- /dev/null +++ b/ext/libressl/crypto/aes/aes-elf-armv4.S @@ -0,0 +1,1074 @@ +#include "arm_arch.h" +.text +.code 32 + +.type AES_Te,%object +.align 5 +AES_Te: +.word 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d +.word 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554 +.word 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d +.word 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a +.word 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87 +.word 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b +.word 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea +.word 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b +.word 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a +.word 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f +.word 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108 +.word 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f +.word 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e +.word 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5 +.word 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d +.word 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f +.word 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e +.word 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb +.word 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce +.word 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497 +.word 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c +.word 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed +.word 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b +.word 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a +.word 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16 +.word 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594 +.word 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81 +.word 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3 +.word 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a +.word 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504 +.word 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163 +.word 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d +.word 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f +.word 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739 +.word 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47 +.word 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395 +.word 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f +.word 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883 +.word 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c +.word 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76 +.word 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e +.word 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4 +.word 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6 +.word 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b +.word 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7 +.word 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0 +.word 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25 +.word 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818 +.word 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72 +.word 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651 +.word 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21 +.word 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85 +.word 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa +.word 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12 +.word 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0 +.word 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9 +.word 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133 +.word 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7 +.word 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920 +.word 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a +.word 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17 +.word 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8 +.word 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11 +.word 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a +@ Te4[256] +.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 +.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 +.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 +.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 +.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc +.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 +.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a +.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 +.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 +.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 +.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b +.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf +.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 +.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 +.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 +.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 +.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 +.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 +.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 +.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb +.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c +.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 +.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 +.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 +.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 +.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a +.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e +.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e +.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 +.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf +.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 +.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 +@ rcon[] +.word 0x01000000, 0x02000000, 0x04000000, 0x08000000 +.word 0x10000000, 0x20000000, 0x40000000, 0x80000000 +.word 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0 +.size AES_Te,.-AES_Te + +@ void AES_encrypt(const unsigned char *in, unsigned char *out, +@ const AES_KEY *key) { +.global AES_encrypt +.type AES_encrypt,%function +.align 5 +AES_encrypt: + sub r3,pc,#8 @ AES_encrypt + stmdb sp!,{r1,r4-r12,lr} + mov r12,r0 @ inp + mov r11,r2 + sub r10,r3,#AES_encrypt-AES_Te @ Te +#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT) + ldrb r0,[r12,#3] @ load input data in endian-neutral + ldrb r4,[r12,#2] @ manner... + ldrb r5,[r12,#1] + ldrb r6,[r12,#0] + orr r0,r0,r4,lsl#8 + ldrb r1,[r12,#7] + orr r0,r0,r5,lsl#16 + ldrb r4,[r12,#6] + orr r0,r0,r6,lsl#24 + ldrb r5,[r12,#5] + ldrb r6,[r12,#4] + orr r1,r1,r4,lsl#8 + ldrb r2,[r12,#11] + orr r1,r1,r5,lsl#16 + ldrb r4,[r12,#10] + orr r1,r1,r6,lsl#24 + ldrb r5,[r12,#9] + ldrb r6,[r12,#8] + orr r2,r2,r4,lsl#8 + ldrb r3,[r12,#15] + orr r2,r2,r5,lsl#16 + ldrb r4,[r12,#14] + orr r2,r2,r6,lsl#24 + ldrb r5,[r12,#13] + ldrb r6,[r12,#12] + orr r3,r3,r4,lsl#8 + orr r3,r3,r5,lsl#16 + orr r3,r3,r6,lsl#24 +#else + ldr r0,[r12,#0] + ldr r1,[r12,#4] + ldr r2,[r12,#8] + ldr r3,[r12,#12] +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif +#endif + bl _armv4_AES_encrypt + + ldr r12,[sp],#4 @ pop out +#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif + str r0,[r12,#0] + str r1,[r12,#4] + str r2,[r12,#8] + str r3,[r12,#12] +#else + mov r4,r0,lsr#24 @ write output in endian-neutral + mov r5,r0,lsr#16 @ manner... + mov r6,r0,lsr#8 + strb r4,[r12,#0] + strb r5,[r12,#1] + mov r4,r1,lsr#24 + strb r6,[r12,#2] + mov r5,r1,lsr#16 + strb r0,[r12,#3] + mov r6,r1,lsr#8 + strb r4,[r12,#4] + strb r5,[r12,#5] + mov r4,r2,lsr#24 + strb r6,[r12,#6] + mov r5,r2,lsr#16 + strb r1,[r12,#7] + mov r6,r2,lsr#8 + strb r4,[r12,#8] + strb r5,[r12,#9] + mov r4,r3,lsr#24 + strb r6,[r12,#10] + mov r5,r3,lsr#16 + strb r2,[r12,#11] + mov r6,r3,lsr#8 + strb r4,[r12,#12] + strb r5,[r12,#13] + strb r6,[r12,#14] + strb r3,[r12,#15] +#endif +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else + ldmia sp!,{r4-r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size AES_encrypt,.-AES_encrypt + +.type _armv4_AES_encrypt,%function +.align 2 +_armv4_AES_encrypt: + str lr,[sp,#-4]! @ push lr + ldmia r11!,{r4-r7} + eor r0,r0,r4 + ldr r12,[r11,#240-16] + eor r1,r1,r5 + eor r2,r2,r6 + eor r3,r3,r7 + sub r12,r12,#1 + mov lr,#255 + + and r7,lr,r0 + and r8,lr,r0,lsr#8 + and r9,lr,r0,lsr#16 + mov r0,r0,lsr#24 +.Lenc_loop: + ldr r4,[r10,r7,lsl#2] @ Te3[s0>>0] + and r7,lr,r1,lsr#16 @ i0 + ldr r5,[r10,r8,lsl#2] @ Te2[s0>>8] + and r8,lr,r1 + ldr r6,[r10,r9,lsl#2] @ Te1[s0>>16] + and r9,lr,r1,lsr#8 + ldr r0,[r10,r0,lsl#2] @ Te0[s0>>24] + mov r1,r1,lsr#24 + + ldr r7,[r10,r7,lsl#2] @ Te1[s1>>16] + ldr r8,[r10,r8,lsl#2] @ Te3[s1>>0] + ldr r9,[r10,r9,lsl#2] @ Te2[s1>>8] + eor r0,r0,r7,ror#8 + ldr r1,[r10,r1,lsl#2] @ Te0[s1>>24] + and r7,lr,r2,lsr#8 @ i0 + eor r5,r5,r8,ror#8 + and r8,lr,r2,lsr#16 @ i1 + eor r6,r6,r9,ror#8 + and r9,lr,r2 + ldr r7,[r10,r7,lsl#2] @ Te2[s2>>8] + eor r1,r1,r4,ror#24 + ldr r8,[r10,r8,lsl#2] @ Te1[s2>>16] + mov r2,r2,lsr#24 + + ldr r9,[r10,r9,lsl#2] @ Te3[s2>>0] + eor r0,r0,r7,ror#16 + ldr r2,[r10,r2,lsl#2] @ Te0[s2>>24] + and r7,lr,r3 @ i0 + eor r1,r1,r8,ror#8 + and r8,lr,r3,lsr#8 @ i1 + eor r6,r6,r9,ror#16 + and r9,lr,r3,lsr#16 @ i2 + ldr r7,[r10,r7,lsl#2] @ Te3[s3>>0] + eor r2,r2,r5,ror#16 + ldr r8,[r10,r8,lsl#2] @ Te2[s3>>8] + mov r3,r3,lsr#24 + + ldr r9,[r10,r9,lsl#2] @ Te1[s3>>16] + eor r0,r0,r7,ror#24 + ldr r7,[r11],#16 + eor r1,r1,r8,ror#16 + ldr r3,[r10,r3,lsl#2] @ Te0[s3>>24] + eor r2,r2,r9,ror#8 + ldr r4,[r11,#-12] + eor r3,r3,r6,ror#8 + + ldr r5,[r11,#-8] + eor r0,r0,r7 + ldr r6,[r11,#-4] + and r7,lr,r0 + eor r1,r1,r4 + and r8,lr,r0,lsr#8 + eor r2,r2,r5 + and r9,lr,r0,lsr#16 + eor r3,r3,r6 + mov r0,r0,lsr#24 + + subs r12,r12,#1 + bne .Lenc_loop + + add r10,r10,#2 + + ldrb r4,[r10,r7,lsl#2] @ Te4[s0>>0] + and r7,lr,r1,lsr#16 @ i0 + ldrb r5,[r10,r8,lsl#2] @ Te4[s0>>8] + and r8,lr,r1 + ldrb r6,[r10,r9,lsl#2] @ Te4[s0>>16] + and r9,lr,r1,lsr#8 + ldrb r0,[r10,r0,lsl#2] @ Te4[s0>>24] + mov r1,r1,lsr#24 + + ldrb r7,[r10,r7,lsl#2] @ Te4[s1>>16] + ldrb r8,[r10,r8,lsl#2] @ Te4[s1>>0] + ldrb r9,[r10,r9,lsl#2] @ Te4[s1>>8] + eor r0,r7,r0,lsl#8 + ldrb r1,[r10,r1,lsl#2] @ Te4[s1>>24] + and r7,lr,r2,lsr#8 @ i0 + eor r5,r8,r5,lsl#8 + and r8,lr,r2,lsr#16 @ i1 + eor r6,r9,r6,lsl#8 + and r9,lr,r2 + ldrb r7,[r10,r7,lsl#2] @ Te4[s2>>8] + eor r1,r4,r1,lsl#24 + ldrb r8,[r10,r8,lsl#2] @ Te4[s2>>16] + mov r2,r2,lsr#24 + + ldrb r9,[r10,r9,lsl#2] @ Te4[s2>>0] + eor r0,r7,r0,lsl#8 + ldrb r2,[r10,r2,lsl#2] @ Te4[s2>>24] + and r7,lr,r3 @ i0 + eor r1,r1,r8,lsl#16 + and r8,lr,r3,lsr#8 @ i1 + eor r6,r9,r6,lsl#8 + and r9,lr,r3,lsr#16 @ i2 + ldrb r7,[r10,r7,lsl#2] @ Te4[s3>>0] + eor r2,r5,r2,lsl#24 + ldrb r8,[r10,r8,lsl#2] @ Te4[s3>>8] + mov r3,r3,lsr#24 + + ldrb r9,[r10,r9,lsl#2] @ Te4[s3>>16] + eor r0,r7,r0,lsl#8 + ldr r7,[r11,#0] + ldrb r3,[r10,r3,lsl#2] @ Te4[s3>>24] + eor r1,r1,r8,lsl#8 + ldr r4,[r11,#4] + eor r2,r2,r9,lsl#16 + ldr r5,[r11,#8] + eor r3,r6,r3,lsl#24 + ldr r6,[r11,#12] + + eor r0,r0,r7 + eor r1,r1,r4 + eor r2,r2,r5 + eor r3,r3,r6 + + sub r10,r10,#2 + ldr pc,[sp],#4 @ pop and return +.size _armv4_AES_encrypt,.-_armv4_AES_encrypt + +.global AES_set_encrypt_key +.type AES_set_encrypt_key,%function +.align 5 +AES_set_encrypt_key: +_armv4_AES_set_encrypt_key: + sub r3,pc,#8 @ AES_set_encrypt_key + teq r0,#0 + moveq r0,#-1 + beq .Labrt + teq r2,#0 + moveq r0,#-1 + beq .Labrt + + teq r1,#128 + beq .Lok + teq r1,#192 + beq .Lok + teq r1,#256 + movne r0,#-1 + bne .Labrt + +.Lok: stmdb sp!,{r4-r12,lr} + sub r10,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4 + + mov r12,r0 @ inp + mov lr,r1 @ bits + mov r11,r2 @ key + +#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT) + ldrb r0,[r12,#3] @ load input data in endian-neutral + ldrb r4,[r12,#2] @ manner... + ldrb r5,[r12,#1] + ldrb r6,[r12,#0] + orr r0,r0,r4,lsl#8 + ldrb r1,[r12,#7] + orr r0,r0,r5,lsl#16 + ldrb r4,[r12,#6] + orr r0,r0,r6,lsl#24 + ldrb r5,[r12,#5] + ldrb r6,[r12,#4] + orr r1,r1,r4,lsl#8 + ldrb r2,[r12,#11] + orr r1,r1,r5,lsl#16 + ldrb r4,[r12,#10] + orr r1,r1,r6,lsl#24 + ldrb r5,[r12,#9] + ldrb r6,[r12,#8] + orr r2,r2,r4,lsl#8 + ldrb r3,[r12,#15] + orr r2,r2,r5,lsl#16 + ldrb r4,[r12,#14] + orr r2,r2,r6,lsl#24 + ldrb r5,[r12,#13] + ldrb r6,[r12,#12] + orr r3,r3,r4,lsl#8 + str r0,[r11],#16 + orr r3,r3,r5,lsl#16 + str r1,[r11,#-12] + orr r3,r3,r6,lsl#24 + str r2,[r11,#-8] + str r3,[r11,#-4] +#else + ldr r0,[r12,#0] + ldr r1,[r12,#4] + ldr r2,[r12,#8] + ldr r3,[r12,#12] +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif + str r0,[r11],#16 + str r1,[r11,#-12] + str r2,[r11,#-8] + str r3,[r11,#-4] +#endif + + teq lr,#128 + bne .Lnot128 + mov r12,#10 + str r12,[r11,#240-16] + add r6,r10,#256 @ rcon + mov lr,#255 + +.L128_loop: + and r5,lr,r3,lsr#24 + and r7,lr,r3,lsr#16 + ldrb r5,[r10,r5] + and r8,lr,r3,lsr#8 + ldrb r7,[r10,r7] + and r9,lr,r3 + ldrb r8,[r10,r8] + orr r5,r5,r7,lsl#24 + ldrb r9,[r10,r9] + orr r5,r5,r8,lsl#16 + ldr r4,[r6],#4 @ rcon[i++] + orr r5,r5,r9,lsl#8 + eor r5,r5,r4 + eor r0,r0,r5 @ rk[4]=rk[0]^... + eor r1,r1,r0 @ rk[5]=rk[1]^rk[4] + str r0,[r11],#16 + eor r2,r2,r1 @ rk[6]=rk[2]^rk[5] + str r1,[r11,#-12] + eor r3,r3,r2 @ rk[7]=rk[3]^rk[6] + str r2,[r11,#-8] + subs r12,r12,#1 + str r3,[r11,#-4] + bne .L128_loop + sub r2,r11,#176 + b .Ldone + +.Lnot128: +#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT) + ldrb r8,[r12,#19] + ldrb r4,[r12,#18] + ldrb r5,[r12,#17] + ldrb r6,[r12,#16] + orr r8,r8,r4,lsl#8 + ldrb r9,[r12,#23] + orr r8,r8,r5,lsl#16 + ldrb r4,[r12,#22] + orr r8,r8,r6,lsl#24 + ldrb r5,[r12,#21] + ldrb r6,[r12,#20] + orr r9,r9,r4,lsl#8 + orr r9,r9,r5,lsl#16 + str r8,[r11],#8 + orr r9,r9,r6,lsl#24 + str r9,[r11,#-4] +#else + ldr r8,[r12,#16] + ldr r9,[r12,#20] +#ifdef __ARMEL__ + rev r8,r8 + rev r9,r9 +#endif + str r8,[r11],#8 + str r9,[r11,#-4] +#endif + + teq lr,#192 + bne .Lnot192 + mov r12,#12 + str r12,[r11,#240-24] + add r6,r10,#256 @ rcon + mov lr,#255 + mov r12,#8 + +.L192_loop: + and r5,lr,r9,lsr#24 + and r7,lr,r9,lsr#16 + ldrb r5,[r10,r5] + and r8,lr,r9,lsr#8 + ldrb r7,[r10,r7] + and r9,lr,r9 + ldrb r8,[r10,r8] + orr r5,r5,r7,lsl#24 + ldrb r9,[r10,r9] + orr r5,r5,r8,lsl#16 + ldr r4,[r6],#4 @ rcon[i++] + orr r5,r5,r9,lsl#8 + eor r9,r5,r4 + eor r0,r0,r9 @ rk[6]=rk[0]^... + eor r1,r1,r0 @ rk[7]=rk[1]^rk[6] + str r0,[r11],#24 + eor r2,r2,r1 @ rk[8]=rk[2]^rk[7] + str r1,[r11,#-20] + eor r3,r3,r2 @ rk[9]=rk[3]^rk[8] + str r2,[r11,#-16] + subs r12,r12,#1 + str r3,[r11,#-12] + subeq r2,r11,#216 + beq .Ldone + + ldr r7,[r11,#-32] + ldr r8,[r11,#-28] + eor r7,r7,r3 @ rk[10]=rk[4]^rk[9] + eor r9,r8,r7 @ rk[11]=rk[5]^rk[10] + str r7,[r11,#-8] + str r9,[r11,#-4] + b .L192_loop + +.Lnot192: +#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT) + ldrb r8,[r12,#27] + ldrb r4,[r12,#26] + ldrb r5,[r12,#25] + ldrb r6,[r12,#24] + orr r8,r8,r4,lsl#8 + ldrb r9,[r12,#31] + orr r8,r8,r5,lsl#16 + ldrb r4,[r12,#30] + orr r8,r8,r6,lsl#24 + ldrb r5,[r12,#29] + ldrb r6,[r12,#28] + orr r9,r9,r4,lsl#8 + orr r9,r9,r5,lsl#16 + str r8,[r11],#8 + orr r9,r9,r6,lsl#24 + str r9,[r11,#-4] +#else + ldr r8,[r12,#24] + ldr r9,[r12,#28] +#ifdef __ARMEL__ + rev r8,r8 + rev r9,r9 +#endif + str r8,[r11],#8 + str r9,[r11,#-4] +#endif + + mov r12,#14 + str r12,[r11,#240-32] + add r6,r10,#256 @ rcon + mov lr,#255 + mov r12,#7 + +.L256_loop: + and r5,lr,r9,lsr#24 + and r7,lr,r9,lsr#16 + ldrb r5,[r10,r5] + and r8,lr,r9,lsr#8 + ldrb r7,[r10,r7] + and r9,lr,r9 + ldrb r8,[r10,r8] + orr r5,r5,r7,lsl#24 + ldrb r9,[r10,r9] + orr r5,r5,r8,lsl#16 + ldr r4,[r6],#4 @ rcon[i++] + orr r5,r5,r9,lsl#8 + eor r9,r5,r4 + eor r0,r0,r9 @ rk[8]=rk[0]^... + eor r1,r1,r0 @ rk[9]=rk[1]^rk[8] + str r0,[r11],#32 + eor r2,r2,r1 @ rk[10]=rk[2]^rk[9] + str r1,[r11,#-28] + eor r3,r3,r2 @ rk[11]=rk[3]^rk[10] + str r2,[r11,#-24] + subs r12,r12,#1 + str r3,[r11,#-20] + subeq r2,r11,#256 + beq .Ldone + + and r5,lr,r3 + and r7,lr,r3,lsr#8 + ldrb r5,[r10,r5] + and r8,lr,r3,lsr#16 + ldrb r7,[r10,r7] + and r9,lr,r3,lsr#24 + ldrb r8,[r10,r8] + orr r5,r5,r7,lsl#8 + ldrb r9,[r10,r9] + orr r5,r5,r8,lsl#16 + ldr r4,[r11,#-48] + orr r5,r5,r9,lsl#24 + + ldr r7,[r11,#-44] + ldr r8,[r11,#-40] + eor r4,r4,r5 @ rk[12]=rk[4]^... + ldr r9,[r11,#-36] + eor r7,r7,r4 @ rk[13]=rk[5]^rk[12] + str r4,[r11,#-16] + eor r8,r8,r7 @ rk[14]=rk[6]^rk[13] + str r7,[r11,#-12] + eor r9,r9,r8 @ rk[15]=rk[7]^rk[14] + str r8,[r11,#-8] + str r9,[r11,#-4] + b .L256_loop + +.Ldone: mov r0,#0 + ldmia sp!,{r4-r12,lr} +.Labrt: tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +.size AES_set_encrypt_key,.-AES_set_encrypt_key + +.global AES_set_decrypt_key +.type AES_set_decrypt_key,%function +.align 5 +AES_set_decrypt_key: + str lr,[sp,#-4]! @ push lr + bl _armv4_AES_set_encrypt_key + teq r0,#0 + ldrne lr,[sp],#4 @ pop lr + bne .Labrt + + stmdb sp!,{r4-r12} + + ldr r12,[r2,#240] @ AES_set_encrypt_key preserves r2, + mov r11,r2 @ which is AES_KEY *key + mov r7,r2 + add r8,r2,r12,lsl#4 + +.Linv: ldr r0,[r7] + ldr r1,[r7,#4] + ldr r2,[r7,#8] + ldr r3,[r7,#12] + ldr r4,[r8] + ldr r5,[r8,#4] + ldr r6,[r8,#8] + ldr r9,[r8,#12] + str r0,[r8],#-16 + str r1,[r8,#16+4] + str r2,[r8,#16+8] + str r3,[r8,#16+12] + str r4,[r7],#16 + str r5,[r7,#-12] + str r6,[r7,#-8] + str r9,[r7,#-4] + teq r7,r8 + bne .Linv + ldr r0,[r11,#16]! @ prefetch tp1 + mov r7,#0x80 + mov r8,#0x1b + orr r7,r7,#0x8000 + orr r8,r8,#0x1b00 + orr r7,r7,r7,lsl#16 + orr r8,r8,r8,lsl#16 + sub r12,r12,#1 + mvn r9,r7 + mov r12,r12,lsl#2 @ (rounds-1)*4 + +.Lmix: and r4,r0,r7 + and r1,r0,r9 + sub r4,r4,r4,lsr#7 + and r4,r4,r8 + eor r1,r4,r1,lsl#1 @ tp2 + + and r4,r1,r7 + and r2,r1,r9 + sub r4,r4,r4,lsr#7 + and r4,r4,r8 + eor r2,r4,r2,lsl#1 @ tp4 + + and r4,r2,r7 + and r3,r2,r9 + sub r4,r4,r4,lsr#7 + and r4,r4,r8 + eor r3,r4,r3,lsl#1 @ tp8 + + eor r4,r1,r2 + eor r5,r0,r3 @ tp9 + eor r4,r4,r3 @ tpe + eor r4,r4,r1,ror#24 + eor r4,r4,r5,ror#24 @ ^= ROTATE(tpb=tp9^tp2,8) + eor r4,r4,r2,ror#16 + eor r4,r4,r5,ror#16 @ ^= ROTATE(tpd=tp9^tp4,16) + eor r4,r4,r5,ror#8 @ ^= ROTATE(tp9,24) + + ldr r0,[r11,#4] @ prefetch tp1 + str r4,[r11],#4 + subs r12,r12,#1 + bne .Lmix + + mov r0,#0 +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else + ldmia sp!,{r4-r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size AES_set_decrypt_key,.-AES_set_decrypt_key + +.type AES_Td,%object +.align 5 +AES_Td: +.word 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96 +.word 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393 +.word 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25 +.word 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f +.word 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1 +.word 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6 +.word 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da +.word 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844 +.word 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd +.word 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4 +.word 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45 +.word 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94 +.word 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7 +.word 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a +.word 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5 +.word 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c +.word 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1 +.word 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a +.word 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75 +.word 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051 +.word 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46 +.word 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff +.word 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77 +.word 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb +.word 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000 +.word 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e +.word 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927 +.word 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a +.word 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e +.word 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16 +.word 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d +.word 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8 +.word 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd +.word 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34 +.word 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163 +.word 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120 +.word 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d +.word 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0 +.word 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422 +.word 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef +.word 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36 +.word 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4 +.word 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662 +.word 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5 +.word 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3 +.word 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b +.word 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8 +.word 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6 +.word 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6 +.word 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0 +.word 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815 +.word 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f +.word 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df +.word 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f +.word 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e +.word 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713 +.word 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89 +.word 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c +.word 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf +.word 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86 +.word 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f +.word 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541 +.word 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190 +.word 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742 +@ Td4[256] +.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 +.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb +.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 +.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb +.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d +.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e +.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 +.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 +.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 +.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 +.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda +.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 +.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a +.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 +.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 +.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b +.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea +.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 +.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 +.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e +.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 +.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b +.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 +.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 +.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 +.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f +.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d +.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef +.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 +.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 +.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 +.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d +.size AES_Td,.-AES_Td + +@ void AES_decrypt(const unsigned char *in, unsigned char *out, +@ const AES_KEY *key) { +.global AES_decrypt +.type AES_decrypt,%function +.align 5 +AES_decrypt: + sub r3,pc,#8 @ AES_decrypt + stmdb sp!,{r1,r4-r12,lr} + mov r12,r0 @ inp + mov r11,r2 + sub r10,r3,#AES_decrypt-AES_Td @ Td +#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT) + ldrb r0,[r12,#3] @ load input data in endian-neutral + ldrb r4,[r12,#2] @ manner... + ldrb r5,[r12,#1] + ldrb r6,[r12,#0] + orr r0,r0,r4,lsl#8 + ldrb r1,[r12,#7] + orr r0,r0,r5,lsl#16 + ldrb r4,[r12,#6] + orr r0,r0,r6,lsl#24 + ldrb r5,[r12,#5] + ldrb r6,[r12,#4] + orr r1,r1,r4,lsl#8 + ldrb r2,[r12,#11] + orr r1,r1,r5,lsl#16 + ldrb r4,[r12,#10] + orr r1,r1,r6,lsl#24 + ldrb r5,[r12,#9] + ldrb r6,[r12,#8] + orr r2,r2,r4,lsl#8 + ldrb r3,[r12,#15] + orr r2,r2,r5,lsl#16 + ldrb r4,[r12,#14] + orr r2,r2,r6,lsl#24 + ldrb r5,[r12,#13] + ldrb r6,[r12,#12] + orr r3,r3,r4,lsl#8 + orr r3,r3,r5,lsl#16 + orr r3,r3,r6,lsl#24 +#else + ldr r0,[r12,#0] + ldr r1,[r12,#4] + ldr r2,[r12,#8] + ldr r3,[r12,#12] +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif +#endif + bl _armv4_AES_decrypt + + ldr r12,[sp],#4 @ pop out +#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif + str r0,[r12,#0] + str r1,[r12,#4] + str r2,[r12,#8] + str r3,[r12,#12] +#else + mov r4,r0,lsr#24 @ write output in endian-neutral + mov r5,r0,lsr#16 @ manner... + mov r6,r0,lsr#8 + strb r4,[r12,#0] + strb r5,[r12,#1] + mov r4,r1,lsr#24 + strb r6,[r12,#2] + mov r5,r1,lsr#16 + strb r0,[r12,#3] + mov r6,r1,lsr#8 + strb r4,[r12,#4] + strb r5,[r12,#5] + mov r4,r2,lsr#24 + strb r6,[r12,#6] + mov r5,r2,lsr#16 + strb r1,[r12,#7] + mov r6,r2,lsr#8 + strb r4,[r12,#8] + strb r5,[r12,#9] + mov r4,r3,lsr#24 + strb r6,[r12,#10] + mov r5,r3,lsr#16 + strb r2,[r12,#11] + mov r6,r3,lsr#8 + strb r4,[r12,#12] + strb r5,[r12,#13] + strb r6,[r12,#14] + strb r3,[r12,#15] +#endif +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else + ldmia sp!,{r4-r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size AES_decrypt,.-AES_decrypt + +.type _armv4_AES_decrypt,%function +.align 2 +_armv4_AES_decrypt: + str lr,[sp,#-4]! @ push lr + ldmia r11!,{r4-r7} + eor r0,r0,r4 + ldr r12,[r11,#240-16] + eor r1,r1,r5 + eor r2,r2,r6 + eor r3,r3,r7 + sub r12,r12,#1 + mov lr,#255 + + and r7,lr,r0,lsr#16 + and r8,lr,r0,lsr#8 + and r9,lr,r0 + mov r0,r0,lsr#24 +.Ldec_loop: + ldr r4,[r10,r7,lsl#2] @ Td1[s0>>16] + and r7,lr,r1 @ i0 + ldr r5,[r10,r8,lsl#2] @ Td2[s0>>8] + and r8,lr,r1,lsr#16 + ldr r6,[r10,r9,lsl#2] @ Td3[s0>>0] + and r9,lr,r1,lsr#8 + ldr r0,[r10,r0,lsl#2] @ Td0[s0>>24] + mov r1,r1,lsr#24 + + ldr r7,[r10,r7,lsl#2] @ Td3[s1>>0] + ldr r8,[r10,r8,lsl#2] @ Td1[s1>>16] + ldr r9,[r10,r9,lsl#2] @ Td2[s1>>8] + eor r0,r0,r7,ror#24 + ldr r1,[r10,r1,lsl#2] @ Td0[s1>>24] + and r7,lr,r2,lsr#8 @ i0 + eor r5,r8,r5,ror#8 + and r8,lr,r2 @ i1 + eor r6,r9,r6,ror#8 + and r9,lr,r2,lsr#16 + ldr r7,[r10,r7,lsl#2] @ Td2[s2>>8] + eor r1,r1,r4,ror#8 + ldr r8,[r10,r8,lsl#2] @ Td3[s2>>0] + mov r2,r2,lsr#24 + + ldr r9,[r10,r9,lsl#2] @ Td1[s2>>16] + eor r0,r0,r7,ror#16 + ldr r2,[r10,r2,lsl#2] @ Td0[s2>>24] + and r7,lr,r3,lsr#16 @ i0 + eor r1,r1,r8,ror#24 + and r8,lr,r3,lsr#8 @ i1 + eor r6,r9,r6,ror#8 + and r9,lr,r3 @ i2 + ldr r7,[r10,r7,lsl#2] @ Td1[s3>>16] + eor r2,r2,r5,ror#8 + ldr r8,[r10,r8,lsl#2] @ Td2[s3>>8] + mov r3,r3,lsr#24 + + ldr r9,[r10,r9,lsl#2] @ Td3[s3>>0] + eor r0,r0,r7,ror#8 + ldr r7,[r11],#16 + eor r1,r1,r8,ror#16 + ldr r3,[r10,r3,lsl#2] @ Td0[s3>>24] + eor r2,r2,r9,ror#24 + + ldr r4,[r11,#-12] + eor r0,r0,r7 + ldr r5,[r11,#-8] + eor r3,r3,r6,ror#8 + ldr r6,[r11,#-4] + and r7,lr,r0,lsr#16 + eor r1,r1,r4 + and r8,lr,r0,lsr#8 + eor r2,r2,r5 + and r9,lr,r0 + eor r3,r3,r6 + mov r0,r0,lsr#24 + + subs r12,r12,#1 + bne .Ldec_loop + + add r10,r10,#1024 + + ldr r5,[r10,#0] @ prefetch Td4 + ldr r6,[r10,#32] + ldr r4,[r10,#64] + ldr r5,[r10,#96] + ldr r6,[r10,#128] + ldr r4,[r10,#160] + ldr r5,[r10,#192] + ldr r6,[r10,#224] + + ldrb r0,[r10,r0] @ Td4[s0>>24] + ldrb r4,[r10,r7] @ Td4[s0>>16] + and r7,lr,r1 @ i0 + ldrb r5,[r10,r8] @ Td4[s0>>8] + and r8,lr,r1,lsr#16 + ldrb r6,[r10,r9] @ Td4[s0>>0] + and r9,lr,r1,lsr#8 + + ldrb r7,[r10,r7] @ Td4[s1>>0] + ldrb r1,[r10,r1,lsr#24] @ Td4[s1>>24] + ldrb r8,[r10,r8] @ Td4[s1>>16] + eor r0,r7,r0,lsl#24 + ldrb r9,[r10,r9] @ Td4[s1>>8] + eor r1,r4,r1,lsl#8 + and r7,lr,r2,lsr#8 @ i0 + eor r5,r5,r8,lsl#8 + and r8,lr,r2 @ i1 + ldrb r7,[r10,r7] @ Td4[s2>>8] + eor r6,r6,r9,lsl#8 + ldrb r8,[r10,r8] @ Td4[s2>>0] + and r9,lr,r2,lsr#16 + + ldrb r2,[r10,r2,lsr#24] @ Td4[s2>>24] + eor r0,r0,r7,lsl#8 + ldrb r9,[r10,r9] @ Td4[s2>>16] + eor r1,r8,r1,lsl#16 + and r7,lr,r3,lsr#16 @ i0 + eor r2,r5,r2,lsl#16 + and r8,lr,r3,lsr#8 @ i1 + ldrb r7,[r10,r7] @ Td4[s3>>16] + eor r6,r6,r9,lsl#16 + ldrb r8,[r10,r8] @ Td4[s3>>8] + and r9,lr,r3 @ i2 + + ldrb r9,[r10,r9] @ Td4[s3>>0] + ldrb r3,[r10,r3,lsr#24] @ Td4[s3>>24] + eor r0,r0,r7,lsl#16 + ldr r7,[r11,#0] + eor r1,r1,r8,lsl#8 + ldr r4,[r11,#4] + eor r2,r9,r2,lsl#8 + ldr r5,[r11,#8] + eor r3,r6,r3,lsl#24 + ldr r6,[r11,#12] + + eor r0,r0,r7 + eor r1,r1,r4 + eor r2,r2,r5 + eor r3,r3,r6 + + sub r10,r10,#1024 + ldr pc,[sp],#4 @ pop and return +.size _armv4_AES_decrypt,.-_armv4_AES_decrypt +.asciz "AES for ARMv4, CRYPTOGAMS by " +.align 2 +#if defined(HAVE_GNU_STACK) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/ext/libressl/crypto/aes/aes-elf-x86_64.S b/ext/libressl/crypto/aes/aes-elf-x86_64.S new file mode 100644 index 0000000..83c0053 --- /dev/null +++ b/ext/libressl/crypto/aes/aes-elf-x86_64.S @@ -0,0 +1,2547 @@ +#include "x86_arch.h" + +.text +.type _x86_64_AES_encrypt,@function +.align 16 +_x86_64_AES_encrypt: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + + movl 240(%r15),%r13d + subl $1,%r13d + jmp .Lenc_loop +.align 16 +.Lenc_loop: + + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movl 0(%r14,%rsi,8),%r10d + movl 0(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r12d + + movzbl %bh,%esi + movzbl %ch,%edi + movzbl %dl,%ebp + xorl 3(%r14,%rsi,8),%r10d + xorl 3(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r8d + + movzbl %dh,%esi + shrl $16,%ecx + movzbl %ah,%ebp + xorl 3(%r14,%rsi,8),%r12d + shrl $16,%edx + xorl 3(%r14,%rbp,8),%r8d + + shrl $16,%ebx + leaq 16(%r15),%r15 + shrl $16,%eax + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + xorl 2(%r14,%rsi,8),%r10d + xorl 2(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r12d + + movzbl %dh,%esi + movzbl %ah,%edi + movzbl %bl,%ebp + xorl 1(%r14,%rsi,8),%r10d + xorl 1(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r8d + + movl 12(%r15),%edx + movzbl %bh,%edi + movzbl %ch,%ebp + movl 0(%r15),%eax + xorl 1(%r14,%rdi,8),%r12d + xorl 1(%r14,%rbp,8),%r8d + + movl 4(%r15),%ebx + movl 8(%r15),%ecx + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + subl $1,%r13d + jnz .Lenc_loop + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movzbl 2(%r14,%rsi,8),%r10d + movzbl 2(%r14,%rdi,8),%r11d + movzbl 2(%r14,%rbp,8),%r12d + + movzbl %dl,%esi + movzbl %bh,%edi + movzbl %ch,%ebp + movzbl 2(%r14,%rsi,8),%r8d + movl 0(%r14,%rdi,8),%edi + movl 0(%r14,%rbp,8),%ebp + + andl $65280,%edi + andl $65280,%ebp + + xorl %edi,%r10d + xorl %ebp,%r11d + shrl $16,%ecx + + movzbl %dh,%esi + movzbl %ah,%edi + shrl $16,%edx + movl 0(%r14,%rsi,8),%esi + movl 0(%r14,%rdi,8),%edi + + andl $65280,%esi + andl $65280,%edi + shrl $16,%ebx + xorl %esi,%r12d + xorl %edi,%r8d + shrl $16,%eax + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + movl 0(%r14,%rsi,8),%esi + movl 0(%r14,%rdi,8),%edi + movl 0(%r14,%rbp,8),%ebp + + andl $16711680,%esi + andl $16711680,%edi + andl $16711680,%ebp + + xorl %esi,%r10d + xorl %edi,%r11d + xorl %ebp,%r12d + + movzbl %bl,%esi + movzbl %dh,%edi + movzbl %ah,%ebp + movl 0(%r14,%rsi,8),%esi + movl 2(%r14,%rdi,8),%edi + movl 2(%r14,%rbp,8),%ebp + + andl $16711680,%esi + andl $4278190080,%edi + andl $4278190080,%ebp + + xorl %esi,%r8d + xorl %edi,%r10d + xorl %ebp,%r11d + + movzbl %bh,%esi + movzbl %ch,%edi + movl 16+12(%r15),%edx + movl 2(%r14,%rsi,8),%esi + movl 2(%r14,%rdi,8),%edi + movl 16+0(%r15),%eax + + andl $4278190080,%esi + andl $4278190080,%edi + + xorl %esi,%r12d + xorl %edi,%r8d + + movl 16+4(%r15),%ebx + movl 16+8(%r15),%ecx + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + retq +.size _x86_64_AES_encrypt,.-_x86_64_AES_encrypt +.type _x86_64_AES_encrypt_compact,@function +.align 16 +_x86_64_AES_encrypt_compact: + leaq 128(%r14),%r8 + movl 0-128(%r8),%edi + movl 32-128(%r8),%ebp + movl 64-128(%r8),%r10d + movl 96-128(%r8),%r11d + movl 128-128(%r8),%edi + movl 160-128(%r8),%ebp + movl 192-128(%r8),%r10d + movl 224-128(%r8),%r11d + jmp .Lenc_loop_compact +.align 16 +.Lenc_loop_compact: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + leaq 16(%r15),%r15 + movzbl %al,%r10d + movzbl %bl,%r11d + movzbl %cl,%r12d + movzbl (%r14,%r10,1),%r10d + movzbl (%r14,%r11,1),%r11d + movzbl (%r14,%r12,1),%r12d + + movzbl %dl,%r8d + movzbl %bh,%esi + movzbl %ch,%edi + movzbl (%r14,%r8,1),%r8d + movzbl (%r14,%rsi,1),%r9d + movzbl (%r14,%rdi,1),%r13d + + movzbl %dh,%ebp + movzbl %ah,%esi + shrl $16,%ecx + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + shrl $16,%edx + + movzbl %cl,%edi + shll $8,%r9d + shll $8,%r13d + movzbl (%r14,%rdi,1),%edi + xorl %r9d,%r10d + xorl %r13d,%r11d + + movzbl %dl,%r9d + shrl $16,%eax + shrl $16,%ebx + movzbl %al,%r13d + shll $8,%ebp + shll $8,%esi + movzbl (%r14,%r9,1),%r9d + movzbl (%r14,%r13,1),%r13d + xorl %ebp,%r12d + xorl %esi,%r8d + + movzbl %bl,%ebp + movzbl %dh,%esi + shll $16,%edi + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + xorl %edi,%r10d + + movzbl %ah,%edi + shrl $8,%ecx + shrl $8,%ebx + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rcx,1),%edx + movzbl (%r14,%rbx,1),%ecx + shll $16,%r9d + shll $16,%r13d + shll $16,%ebp + xorl %r9d,%r11d + xorl %r13d,%r12d + xorl %ebp,%r8d + + shll $24,%esi + shll $24,%edi + shll $24,%edx + xorl %esi,%r10d + shll $24,%ecx + xorl %edi,%r11d + movl %r10d,%eax + movl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + cmpq 16(%rsp),%r15 + je .Lenc_compact_done + movl %eax,%esi + movl %ebx,%edi + andl $2155905152,%esi + andl $2155905152,%edi + movl %esi,%r10d + movl %edi,%r11d + shrl $7,%r10d + leal (%rax,%rax,1),%r8d + shrl $7,%r11d + leal (%rbx,%rbx,1),%r9d + subl %r10d,%esi + subl %r11d,%edi + andl $4278124286,%r8d + andl $4278124286,%r9d + andl $454761243,%esi + andl $454761243,%edi + movl %eax,%r10d + movl %ebx,%r11d + xorl %esi,%r8d + xorl %edi,%r9d + + xorl %r8d,%eax + xorl %r9d,%ebx + movl %ecx,%esi + movl %edx,%edi + roll $24,%eax + roll $24,%ebx + andl $2155905152,%esi + andl $2155905152,%edi + xorl %r8d,%eax + xorl %r9d,%ebx + movl %esi,%r12d + movl %edi,%ebp + rorl $16,%r10d + rorl $16,%r11d + shrl $7,%r12d + leal (%rcx,%rcx,1),%r8d + xorl %r10d,%eax + xorl %r11d,%ebx + shrl $7,%ebp + leal (%rdx,%rdx,1),%r9d + rorl $8,%r10d + rorl $8,%r11d + subl %r12d,%esi + subl %ebp,%edi + xorl %r10d,%eax + xorl %r11d,%ebx + + andl $4278124286,%r8d + andl $4278124286,%r9d + andl $454761243,%esi + andl $454761243,%edi + movl %ecx,%r12d + movl %edx,%ebp + xorl %esi,%r8d + xorl %edi,%r9d + + xorl %r8d,%ecx + xorl %r9d,%edx + roll $24,%ecx + roll $24,%edx + xorl %r8d,%ecx + xorl %r9d,%edx + movl 0(%r14),%esi + rorl $16,%r12d + rorl $16,%ebp + movl 64(%r14),%edi + xorl %r12d,%ecx + xorl %ebp,%edx + movl 128(%r14),%r8d + rorl $8,%r12d + rorl $8,%ebp + movl 192(%r14),%r9d + xorl %r12d,%ecx + xorl %ebp,%edx + jmp .Lenc_loop_compact +.align 16 +.Lenc_compact_done: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + retq +.size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact +.globl AES_encrypt +.type AES_encrypt,@function +.align 16 +.globl asm_AES_encrypt +.hidden asm_AES_encrypt +asm_AES_encrypt: +AES_encrypt: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + movq %rsp,%r10 + leaq -63(%rdx),%rcx + andq $-64,%rsp + subq %rsp,%rcx + negq %rcx + andq $960,%rcx + subq %rcx,%rsp + subq $32,%rsp + + movq %rsi,16(%rsp) + movq %r10,24(%rsp) +.Lenc_prologue: + + movq %rdx,%r15 + movl 240(%r15),%r13d + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + + shll $4,%r13d + leaq (%r15,%r13,1),%rbp + movq %r15,(%rsp) + movq %rbp,8(%rsp) + + + leaq .LAES_Te+2048(%rip),%r14 + leaq 768(%rsp),%rbp + subq %r14,%rbp + andq $768,%rbp + leaq (%r14,%rbp,1),%r14 + + call _x86_64_AES_encrypt_compact + + movq 16(%rsp),%r9 + movq 24(%rsp),%rsi + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lenc_epilogue: + retq +.size AES_encrypt,.-AES_encrypt +.type _x86_64_AES_decrypt,@function +.align 16 +_x86_64_AES_decrypt: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + + movl 240(%r15),%r13d + subl $1,%r13d + jmp .Ldec_loop +.align 16 +.Ldec_loop: + + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movl 0(%r14,%rsi,8),%r10d + movl 0(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r12d + + movzbl %dh,%esi + movzbl %ah,%edi + movzbl %dl,%ebp + xorl 3(%r14,%rsi,8),%r10d + xorl 3(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r8d + + movzbl %bh,%esi + shrl $16,%eax + movzbl %ch,%ebp + xorl 3(%r14,%rsi,8),%r12d + shrl $16,%edx + xorl 3(%r14,%rbp,8),%r8d + + shrl $16,%ebx + leaq 16(%r15),%r15 + shrl $16,%ecx + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + xorl 2(%r14,%rsi,8),%r10d + xorl 2(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r12d + + movzbl %bh,%esi + movzbl %ch,%edi + movzbl %bl,%ebp + xorl 1(%r14,%rsi,8),%r10d + xorl 1(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r8d + + movzbl %dh,%esi + movl 12(%r15),%edx + movzbl %ah,%ebp + xorl 1(%r14,%rsi,8),%r12d + movl 0(%r15),%eax + xorl 1(%r14,%rbp,8),%r8d + + xorl %r10d,%eax + movl 4(%r15),%ebx + movl 8(%r15),%ecx + xorl %r12d,%ecx + xorl %r11d,%ebx + xorl %r8d,%edx + subl $1,%r13d + jnz .Ldec_loop + leaq 2048(%r14),%r14 + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movzbl (%r14,%rsi,1),%r10d + movzbl (%r14,%rdi,1),%r11d + movzbl (%r14,%rbp,1),%r12d + + movzbl %dl,%esi + movzbl %dh,%edi + movzbl %ah,%ebp + movzbl (%r14,%rsi,1),%r8d + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $8,%edi + shll $8,%ebp + + xorl %edi,%r10d + xorl %ebp,%r11d + shrl $16,%edx + + movzbl %bh,%esi + movzbl %ch,%edi + shrl $16,%eax + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + + shll $8,%esi + shll $8,%edi + shrl $16,%ebx + xorl %esi,%r12d + xorl %edi,%r8d + shrl $16,%ecx + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $16,%esi + shll $16,%edi + shll $16,%ebp + + xorl %esi,%r10d + xorl %edi,%r11d + xorl %ebp,%r12d + + movzbl %bl,%esi + movzbl %bh,%edi + movzbl %ch,%ebp + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $16,%esi + shll $24,%edi + shll $24,%ebp + + xorl %esi,%r8d + xorl %edi,%r10d + xorl %ebp,%r11d + + movzbl %dh,%esi + movzbl %ah,%edi + movl 16+12(%r15),%edx + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movl 16+0(%r15),%eax + + shll $24,%esi + shll $24,%edi + + xorl %esi,%r12d + xorl %edi,%r8d + + movl 16+4(%r15),%ebx + movl 16+8(%r15),%ecx + leaq -2048(%r14),%r14 + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + retq +.size _x86_64_AES_decrypt,.-_x86_64_AES_decrypt +.type _x86_64_AES_decrypt_compact,@function +.align 16 +_x86_64_AES_decrypt_compact: + leaq 128(%r14),%r8 + movl 0-128(%r8),%edi + movl 32-128(%r8),%ebp + movl 64-128(%r8),%r10d + movl 96-128(%r8),%r11d + movl 128-128(%r8),%edi + movl 160-128(%r8),%ebp + movl 192-128(%r8),%r10d + movl 224-128(%r8),%r11d + jmp .Ldec_loop_compact + +.align 16 +.Ldec_loop_compact: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + leaq 16(%r15),%r15 + movzbl %al,%r10d + movzbl %bl,%r11d + movzbl %cl,%r12d + movzbl (%r14,%r10,1),%r10d + movzbl (%r14,%r11,1),%r11d + movzbl (%r14,%r12,1),%r12d + + movzbl %dl,%r8d + movzbl %dh,%esi + movzbl %ah,%edi + movzbl (%r14,%r8,1),%r8d + movzbl (%r14,%rsi,1),%r9d + movzbl (%r14,%rdi,1),%r13d + + movzbl %bh,%ebp + movzbl %ch,%esi + shrl $16,%ecx + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + shrl $16,%edx + + movzbl %cl,%edi + shll $8,%r9d + shll $8,%r13d + movzbl (%r14,%rdi,1),%edi + xorl %r9d,%r10d + xorl %r13d,%r11d + + movzbl %dl,%r9d + shrl $16,%eax + shrl $16,%ebx + movzbl %al,%r13d + shll $8,%ebp + shll $8,%esi + movzbl (%r14,%r9,1),%r9d + movzbl (%r14,%r13,1),%r13d + xorl %ebp,%r12d + xorl %esi,%r8d + + movzbl %bl,%ebp + movzbl %bh,%esi + shll $16,%edi + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + xorl %edi,%r10d + + movzbl %ch,%edi + shll $16,%r9d + shll $16,%r13d + movzbl (%r14,%rdi,1),%ebx + xorl %r9d,%r11d + xorl %r13d,%r12d + + movzbl %dh,%edi + shrl $8,%eax + shll $16,%ebp + movzbl (%r14,%rdi,1),%ecx + movzbl (%r14,%rax,1),%edx + xorl %ebp,%r8d + + shll $24,%esi + shll $24,%ebx + shll $24,%ecx + xorl %esi,%r10d + shll $24,%edx + xorl %r11d,%ebx + movl %r10d,%eax + xorl %r12d,%ecx + xorl %r8d,%edx + cmpq 16(%rsp),%r15 + je .Ldec_compact_done + + movq 256+0(%r14),%rsi + shlq $32,%rbx + shlq $32,%rdx + movq 256+8(%r14),%rdi + orq %rbx,%rax + orq %rdx,%rcx + movq 256+16(%r14),%rbp + movq %rax,%rbx + movq %rcx,%rdx + andq %rsi,%rbx + andq %rsi,%rdx + movq %rbx,%r9 + movq %rdx,%r12 + shrq $7,%r9 + leaq (%rax,%rax,1),%r8 + shrq $7,%r12 + leaq (%rcx,%rcx,1),%r11 + subq %r9,%rbx + subq %r12,%rdx + andq %rdi,%r8 + andq %rdi,%r11 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %r8,%rbx + xorq %r11,%rdx + movq %rbx,%r8 + movq %rdx,%r11 + + andq %rsi,%rbx + andq %rsi,%rdx + movq %rbx,%r10 + movq %rdx,%r13 + shrq $7,%r10 + leaq (%r8,%r8,1),%r9 + shrq $7,%r13 + leaq (%r11,%r11,1),%r12 + subq %r10,%rbx + subq %r13,%rdx + andq %rdi,%r9 + andq %rdi,%r12 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %r9,%rbx + xorq %r12,%rdx + movq %rbx,%r9 + movq %rdx,%r12 + + andq %rsi,%rbx + andq %rsi,%rdx + movq %rbx,%r10 + movq %rdx,%r13 + shrq $7,%r10 + xorq %rax,%r8 + shrq $7,%r13 + xorq %rcx,%r11 + subq %r10,%rbx + subq %r13,%rdx + leaq (%r9,%r9,1),%r10 + leaq (%r12,%r12,1),%r13 + xorq %rax,%r9 + xorq %rcx,%r12 + andq %rdi,%r10 + andq %rdi,%r13 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r10 + xorq %rdx,%r13 + + xorq %r10,%rax + xorq %r13,%rcx + xorq %r10,%r8 + xorq %r13,%r11 + movq %rax,%rbx + movq %rcx,%rdx + xorq %r10,%r9 + xorq %r13,%r12 + shrq $32,%rbx + shrq $32,%rdx + xorq %r8,%r10 + xorq %r11,%r13 + roll $8,%eax + roll $8,%ecx + xorq %r9,%r10 + xorq %r12,%r13 + + roll $8,%ebx + roll $8,%edx + xorl %r10d,%eax + xorl %r13d,%ecx + shrq $32,%r10 + shrq $32,%r13 + xorl %r10d,%ebx + xorl %r13d,%edx + + movq %r8,%r10 + movq %r11,%r13 + shrq $32,%r10 + shrq $32,%r13 + roll $24,%r8d + roll $24,%r11d + roll $24,%r10d + roll $24,%r13d + xorl %r8d,%eax + xorl %r11d,%ecx + movq %r9,%r8 + movq %r12,%r11 + xorl %r10d,%ebx + xorl %r13d,%edx + + movq 0(%r14),%rsi + shrq $32,%r8 + shrq $32,%r11 + movq 64(%r14),%rdi + roll $16,%r9d + roll $16,%r12d + movq 128(%r14),%rbp + roll $16,%r8d + roll $16,%r11d + movq 192(%r14),%r10 + xorl %r9d,%eax + xorl %r12d,%ecx + movq 256(%r14),%r13 + xorl %r8d,%ebx + xorl %r11d,%edx + jmp .Ldec_loop_compact +.align 16 +.Ldec_compact_done: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + retq +.size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact +.globl AES_decrypt +.type AES_decrypt,@function +.align 16 +.globl asm_AES_decrypt +.hidden asm_AES_decrypt +asm_AES_decrypt: +AES_decrypt: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + movq %rsp,%r10 + leaq -63(%rdx),%rcx + andq $-64,%rsp + subq %rsp,%rcx + negq %rcx + andq $960,%rcx + subq %rcx,%rsp + subq $32,%rsp + + movq %rsi,16(%rsp) + movq %r10,24(%rsp) +.Ldec_prologue: + + movq %rdx,%r15 + movl 240(%r15),%r13d + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + + shll $4,%r13d + leaq (%r15,%r13,1),%rbp + movq %r15,(%rsp) + movq %rbp,8(%rsp) + + + leaq .LAES_Td+2048(%rip),%r14 + leaq 768(%rsp),%rbp + subq %r14,%rbp + andq $768,%rbp + leaq (%r14,%rbp,1),%r14 + shrq $3,%rbp + addq %rbp,%r14 + + call _x86_64_AES_decrypt_compact + + movq 16(%rsp),%r9 + movq 24(%rsp),%rsi + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Ldec_epilogue: + retq +.size AES_decrypt,.-AES_decrypt +.globl AES_set_encrypt_key +.type AES_set_encrypt_key,@function +.align 16 +AES_set_encrypt_key: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $8,%rsp +.Lenc_key_prologue: + + call _x86_64_AES_set_encrypt_key + + movq 8(%rsp),%r15 + movq 16(%rsp),%r14 + movq 24(%rsp),%r13 + movq 32(%rsp),%r12 + movq 40(%rsp),%rbp + movq 48(%rsp),%rbx + addq $56,%rsp +.Lenc_key_epilogue: + retq +.size AES_set_encrypt_key,.-AES_set_encrypt_key + +.type _x86_64_AES_set_encrypt_key,@function +.align 16 +_x86_64_AES_set_encrypt_key: + movl %esi,%ecx + movq %rdi,%rsi + movq %rdx,%rdi + + testq $-1,%rsi + jz .Lbadpointer + testq $-1,%rdi + jz .Lbadpointer + + leaq .LAES_Te(%rip),%rbp + leaq 2048+128(%rbp),%rbp + + + movl 0-128(%rbp),%eax + movl 32-128(%rbp),%ebx + movl 64-128(%rbp),%r8d + movl 96-128(%rbp),%edx + movl 128-128(%rbp),%eax + movl 160-128(%rbp),%ebx + movl 192-128(%rbp),%r8d + movl 224-128(%rbp),%edx + + cmpl $128,%ecx + je .L10rounds + cmpl $192,%ecx + je .L12rounds + cmpl $256,%ecx + je .L14rounds + movq $-2,%rax + jmp .Lexit + +.L10rounds: + movq 0(%rsi),%rax + movq 8(%rsi),%rdx + movq %rax,0(%rdi) + movq %rdx,8(%rdi) + + shrq $32,%rdx + xorl %ecx,%ecx + jmp .L10shortcut +.align 4 +.L10loop: + movl 0(%rdi),%eax + movl 12(%rdi),%edx +.L10shortcut: + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + + xorl 1024-128(%rbp,%rcx,4),%eax + movl %eax,16(%rdi) + xorl 4(%rdi),%eax + movl %eax,20(%rdi) + xorl 8(%rdi),%eax + movl %eax,24(%rdi) + xorl 12(%rdi),%eax + movl %eax,28(%rdi) + addl $1,%ecx + leaq 16(%rdi),%rdi + cmpl $10,%ecx + jl .L10loop + + movl $10,80(%rdi) + xorq %rax,%rax + jmp .Lexit + +.L12rounds: + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 16(%rsi),%rdx + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rdx,16(%rdi) + + shrq $32,%rdx + xorl %ecx,%ecx + jmp .L12shortcut +.align 4 +.L12loop: + movl 0(%rdi),%eax + movl 20(%rdi),%edx +.L12shortcut: + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + + xorl 1024-128(%rbp,%rcx,4),%eax + movl %eax,24(%rdi) + xorl 4(%rdi),%eax + movl %eax,28(%rdi) + xorl 8(%rdi),%eax + movl %eax,32(%rdi) + xorl 12(%rdi),%eax + movl %eax,36(%rdi) + + cmpl $7,%ecx + je .L12break + addl $1,%ecx + + xorl 16(%rdi),%eax + movl %eax,40(%rdi) + xorl 20(%rdi),%eax + movl %eax,44(%rdi) + + leaq 24(%rdi),%rdi + jmp .L12loop +.L12break: + movl $12,72(%rdi) + xorq %rax,%rax + jmp .Lexit + +.L14rounds: + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 16(%rsi),%rcx + movq 24(%rsi),%rdx + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + + shrq $32,%rdx + xorl %ecx,%ecx + jmp .L14shortcut +.align 4 +.L14loop: + movl 0(%rdi),%eax + movl 28(%rdi),%edx +.L14shortcut: + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + + xorl 1024-128(%rbp,%rcx,4),%eax + movl %eax,32(%rdi) + xorl 4(%rdi),%eax + movl %eax,36(%rdi) + xorl 8(%rdi),%eax + movl %eax,40(%rdi) + xorl 12(%rdi),%eax + movl %eax,44(%rdi) + + cmpl $6,%ecx + je .L14break + addl $1,%ecx + + movl %eax,%edx + movl 16(%rdi),%eax + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + shll $8,%ebx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $16,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $24,%ebx + xorl %ebx,%eax + + movl %eax,48(%rdi) + xorl 20(%rdi),%eax + movl %eax,52(%rdi) + xorl 24(%rdi),%eax + movl %eax,56(%rdi) + xorl 28(%rdi),%eax + movl %eax,60(%rdi) + + leaq 32(%rdi),%rdi + jmp .L14loop +.L14break: + movl $14,48(%rdi) + xorq %rax,%rax + jmp .Lexit + +.Lbadpointer: + movq $-1,%rax +.Lexit: + retq +.size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key +.globl AES_set_decrypt_key +.type AES_set_decrypt_key,@function +.align 16 +AES_set_decrypt_key: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rdx +.Ldec_key_prologue: + + call _x86_64_AES_set_encrypt_key + movq (%rsp),%r8 + cmpl $0,%eax + jne .Labort + + movl 240(%r8),%r14d + xorq %rdi,%rdi + leaq (%rdi,%r14,4),%rcx + movq %r8,%rsi + leaq (%r8,%rcx,4),%rdi +.align 4 +.Linvert: + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 0(%rdi),%rcx + movq 8(%rdi),%rdx + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,0(%rsi) + movq %rdx,8(%rsi) + leaq 16(%rsi),%rsi + leaq -16(%rdi),%rdi + cmpq %rsi,%rdi + jne .Linvert + + leaq .LAES_Te+2048+1024(%rip),%rax + + movq 40(%rax),%rsi + movq 48(%rax),%rdi + movq 56(%rax),%rbp + + movq %r8,%r15 + subl $1,%r14d +.align 4 +.Lpermute: + leaq 16(%r15),%r15 + movq 0(%r15),%rax + movq 8(%r15),%rcx + movq %rax,%rbx + movq %rcx,%rdx + andq %rsi,%rbx + andq %rsi,%rdx + movq %rbx,%r9 + movq %rdx,%r12 + shrq $7,%r9 + leaq (%rax,%rax,1),%r8 + shrq $7,%r12 + leaq (%rcx,%rcx,1),%r11 + subq %r9,%rbx + subq %r12,%rdx + andq %rdi,%r8 + andq %rdi,%r11 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %r8,%rbx + xorq %r11,%rdx + movq %rbx,%r8 + movq %rdx,%r11 + + andq %rsi,%rbx + andq %rsi,%rdx + movq %rbx,%r10 + movq %rdx,%r13 + shrq $7,%r10 + leaq (%r8,%r8,1),%r9 + shrq $7,%r13 + leaq (%r11,%r11,1),%r12 + subq %r10,%rbx + subq %r13,%rdx + andq %rdi,%r9 + andq %rdi,%r12 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %r9,%rbx + xorq %r12,%rdx + movq %rbx,%r9 + movq %rdx,%r12 + + andq %rsi,%rbx + andq %rsi,%rdx + movq %rbx,%r10 + movq %rdx,%r13 + shrq $7,%r10 + xorq %rax,%r8 + shrq $7,%r13 + xorq %rcx,%r11 + subq %r10,%rbx + subq %r13,%rdx + leaq (%r9,%r9,1),%r10 + leaq (%r12,%r12,1),%r13 + xorq %rax,%r9 + xorq %rcx,%r12 + andq %rdi,%r10 + andq %rdi,%r13 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r10 + xorq %rdx,%r13 + + xorq %r10,%rax + xorq %r13,%rcx + xorq %r10,%r8 + xorq %r13,%r11 + movq %rax,%rbx + movq %rcx,%rdx + xorq %r10,%r9 + xorq %r13,%r12 + shrq $32,%rbx + shrq $32,%rdx + xorq %r8,%r10 + xorq %r11,%r13 + roll $8,%eax + roll $8,%ecx + xorq %r9,%r10 + xorq %r12,%r13 + + roll $8,%ebx + roll $8,%edx + xorl %r10d,%eax + xorl %r13d,%ecx + shrq $32,%r10 + shrq $32,%r13 + xorl %r10d,%ebx + xorl %r13d,%edx + + movq %r8,%r10 + movq %r11,%r13 + shrq $32,%r10 + shrq $32,%r13 + roll $24,%r8d + roll $24,%r11d + roll $24,%r10d + roll $24,%r13d + xorl %r8d,%eax + xorl %r11d,%ecx + movq %r9,%r8 + movq %r12,%r11 + xorl %r10d,%ebx + xorl %r13d,%edx + + + shrq $32,%r8 + shrq $32,%r11 + + roll $16,%r9d + roll $16,%r12d + + roll $16,%r8d + roll $16,%r11d + + xorl %r9d,%eax + xorl %r12d,%ecx + + xorl %r8d,%ebx + xorl %r11d,%edx + movl %eax,0(%r15) + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + subl $1,%r14d + jnz .Lpermute + + xorq %rax,%rax +.Labort: + movq 8(%rsp),%r15 + movq 16(%rsp),%r14 + movq 24(%rsp),%r13 + movq 32(%rsp),%r12 + movq 40(%rsp),%rbp + movq 48(%rsp),%rbx + addq $56,%rsp +.Ldec_key_epilogue: + retq +.size AES_set_decrypt_key,.-AES_set_decrypt_key +.globl AES_cbc_encrypt +.type AES_cbc_encrypt,@function +.align 16 + +.hidden OPENSSL_ia32cap_P +.globl asm_AES_cbc_encrypt +.hidden asm_AES_cbc_encrypt +asm_AES_cbc_encrypt: +AES_cbc_encrypt: + cmpq $0,%rdx + je .Lcbc_epilogue + pushfq + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 +.Lcbc_prologue: + + cld + movl %r9d,%r9d + + leaq .LAES_Te(%rip),%r14 + cmpq $0,%r9 + jne .Lcbc_picked_te + leaq .LAES_Td(%rip),%r14 +.Lcbc_picked_te: + + movl OPENSSL_ia32cap_P(%rip),%r10d + cmpq $512,%rdx + jb .Lcbc_slow_prologue + testq $15,%rdx + jnz .Lcbc_slow_prologue + btl $IA32CAP_BIT0_HT,%r10d + jc .Lcbc_slow_prologue + + + leaq -88-248(%rsp),%r15 + andq $-64,%r15 + + + movq %r14,%r10 + leaq 2304(%r14),%r11 + movq %r15,%r12 + andq $4095,%r10 + andq $4095,%r11 + andq $4095,%r12 + + cmpq %r11,%r12 + jb .Lcbc_te_break_out + subq %r11,%r12 + subq %r12,%r15 + jmp .Lcbc_te_ok +.Lcbc_te_break_out: + subq %r10,%r12 + andq $4095,%r12 + addq $320,%r12 + subq %r12,%r15 +.align 4 +.Lcbc_te_ok: + + xchgq %rsp,%r15 + + movq %r15,16(%rsp) +.Lcbc_fast_body: + movq %rdi,24(%rsp) + movq %rsi,32(%rsp) + movq %rdx,40(%rsp) + movq %rcx,48(%rsp) + movq %r8,56(%rsp) + movl $0,80+240(%rsp) + movq %r8,%rbp + movq %r9,%rbx + movq %rsi,%r9 + movq %rdi,%r8 + movq %rcx,%r15 + + movl 240(%r15),%eax + + movq %r15,%r10 + subq %r14,%r10 + andq $4095,%r10 + cmpq $2304,%r10 + jb .Lcbc_do_ecopy + cmpq $4096-248,%r10 + jb .Lcbc_skip_ecopy +.align 4 +.Lcbc_do_ecopy: + movq %r15,%rsi + leaq 80(%rsp),%rdi + leaq 80(%rsp),%r15 + movl $30,%ecx +.long 0x90A548F3 + movl %eax,(%rdi) +.Lcbc_skip_ecopy: + movq %r15,0(%rsp) + + movl $18,%ecx +.align 4 +.Lcbc_prefetch_te: + movq 0(%r14),%r10 + movq 32(%r14),%r11 + movq 64(%r14),%r12 + movq 96(%r14),%r13 + leaq 128(%r14),%r14 + subl $1,%ecx + jnz .Lcbc_prefetch_te + leaq -2304(%r14),%r14 + + cmpq $0,%rbx + je .LFAST_DECRYPT + + + movl 0(%rbp),%eax + movl 4(%rbp),%ebx + movl 8(%rbp),%ecx + movl 12(%rbp),%edx + +.align 4 +.Lcbc_fast_enc_loop: + xorl 0(%r8),%eax + xorl 4(%r8),%ebx + xorl 8(%r8),%ecx + xorl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + + call _x86_64_AES_encrypt + + movq 24(%rsp),%r8 + movq 40(%rsp),%r10 + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + subq $16,%r10 + testq $-16,%r10 + movq %r10,40(%rsp) + jnz .Lcbc_fast_enc_loop + movq 56(%rsp),%rbp + movl %eax,0(%rbp) + movl %ebx,4(%rbp) + movl %ecx,8(%rbp) + movl %edx,12(%rbp) + + jmp .Lcbc_fast_cleanup + + +.align 16 +.LFAST_DECRYPT: + cmpq %r8,%r9 + je .Lcbc_fast_dec_in_place + + movq %rbp,64(%rsp) +.align 4 +.Lcbc_fast_dec_loop: + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + + call _x86_64_AES_decrypt + + movq 64(%rsp),%rbp + movq 24(%rsp),%r8 + movq 40(%rsp),%r10 + xorl 0(%rbp),%eax + xorl 4(%rbp),%ebx + xorl 8(%rbp),%ecx + xorl 12(%rbp),%edx + movq %r8,%rbp + + subq $16,%r10 + movq %r10,40(%rsp) + movq %rbp,64(%rsp) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + jnz .Lcbc_fast_dec_loop + movq 56(%rsp),%r12 + movq 0(%rbp),%r10 + movq 8(%rbp),%r11 + movq %r10,0(%r12) + movq %r11,8(%r12) + jmp .Lcbc_fast_cleanup + +.align 16 +.Lcbc_fast_dec_in_place: + movq 0(%rbp),%r10 + movq 8(%rbp),%r11 + movq %r10,0+64(%rsp) + movq %r11,8+64(%rsp) +.align 4 +.Lcbc_fast_dec_in_place_loop: + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + + call _x86_64_AES_decrypt + + movq 24(%rsp),%r8 + movq 40(%rsp),%r10 + xorl 0+64(%rsp),%eax + xorl 4+64(%rsp),%ebx + xorl 8+64(%rsp),%ecx + xorl 12+64(%rsp),%edx + + movq 0(%r8),%r11 + movq 8(%r8),%r12 + subq $16,%r10 + jz .Lcbc_fast_dec_in_place_done + + movq %r11,0+64(%rsp) + movq %r12,8+64(%rsp) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + movq %r10,40(%rsp) + jmp .Lcbc_fast_dec_in_place_loop +.Lcbc_fast_dec_in_place_done: + movq 56(%rsp),%rdi + movq %r11,0(%rdi) + movq %r12,8(%rdi) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + +.align 4 +.Lcbc_fast_cleanup: + cmpl $0,80+240(%rsp) + leaq 80(%rsp),%rdi + je .Lcbc_exit + movl $30,%ecx + xorq %rax,%rax +.long 0x90AB48F3 + + jmp .Lcbc_exit + + +.align 16 +.Lcbc_slow_prologue: + + leaq -88(%rsp),%rbp + andq $-64,%rbp + + leaq -88-63(%rcx),%r10 + subq %rbp,%r10 + negq %r10 + andq $960,%r10 + subq %r10,%rbp + + xchgq %rsp,%rbp + + movq %rbp,16(%rsp) +.Lcbc_slow_body: + + + + + movq %r8,56(%rsp) + movq %r8,%rbp + movq %r9,%rbx + movq %rsi,%r9 + movq %rdi,%r8 + movq %rcx,%r15 + movq %rdx,%r10 + + movl 240(%r15),%eax + movq %r15,0(%rsp) + shll $4,%eax + leaq (%r15,%rax,1),%rax + movq %rax,8(%rsp) + + + leaq 2048(%r14),%r14 + leaq 768-8(%rsp),%rax + subq %r14,%rax + andq $768,%rax + leaq (%r14,%rax,1),%r14 + + cmpq $0,%rbx + je .LSLOW_DECRYPT + + + testq $-16,%r10 + movl 0(%rbp),%eax + movl 4(%rbp),%ebx + movl 8(%rbp),%ecx + movl 12(%rbp),%edx + jz .Lcbc_slow_enc_tail + +.align 4 +.Lcbc_slow_enc_loop: + xorl 0(%r8),%eax + xorl 4(%r8),%ebx + xorl 8(%r8),%ecx + xorl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + movq %r9,32(%rsp) + movq %r10,40(%rsp) + + call _x86_64_AES_encrypt_compact + + movq 24(%rsp),%r8 + movq 32(%rsp),%r9 + movq 40(%rsp),%r10 + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + subq $16,%r10 + testq $-16,%r10 + jnz .Lcbc_slow_enc_loop + testq $15,%r10 + jnz .Lcbc_slow_enc_tail + movq 56(%rsp),%rbp + movl %eax,0(%rbp) + movl %ebx,4(%rbp) + movl %ecx,8(%rbp) + movl %edx,12(%rbp) + + jmp .Lcbc_exit + +.align 4 +.Lcbc_slow_enc_tail: + movq %rax,%r11 + movq %rcx,%r12 + movq %r10,%rcx + movq %r8,%rsi + movq %r9,%rdi +.long 0x9066A4F3 + movq $16,%rcx + subq %r10,%rcx + xorq %rax,%rax +.long 0x9066AAF3 + movq %r9,%r8 + movq $16,%r10 + movq %r11,%rax + movq %r12,%rcx + jmp .Lcbc_slow_enc_loop + +.align 16 +.LSLOW_DECRYPT: + shrq $3,%rax + addq %rax,%r14 + + movq 0(%rbp),%r11 + movq 8(%rbp),%r12 + movq %r11,0+64(%rsp) + movq %r12,8+64(%rsp) + +.align 4 +.Lcbc_slow_dec_loop: + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + movq %r9,32(%rsp) + movq %r10,40(%rsp) + + call _x86_64_AES_decrypt_compact + + movq 24(%rsp),%r8 + movq 32(%rsp),%r9 + movq 40(%rsp),%r10 + xorl 0+64(%rsp),%eax + xorl 4+64(%rsp),%ebx + xorl 8+64(%rsp),%ecx + xorl 12+64(%rsp),%edx + + movq 0(%r8),%r11 + movq 8(%r8),%r12 + subq $16,%r10 + jc .Lcbc_slow_dec_partial + jz .Lcbc_slow_dec_done + + movq %r11,0+64(%rsp) + movq %r12,8+64(%rsp) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + jmp .Lcbc_slow_dec_loop +.Lcbc_slow_dec_done: + movq 56(%rsp),%rdi + movq %r11,0(%rdi) + movq %r12,8(%rdi) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + jmp .Lcbc_exit + +.align 4 +.Lcbc_slow_dec_partial: + movq 56(%rsp),%rdi + movq %r11,0(%rdi) + movq %r12,8(%rdi) + + movl %eax,0+64(%rsp) + movl %ebx,4+64(%rsp) + movl %ecx,8+64(%rsp) + movl %edx,12+64(%rsp) + + movq %r9,%rdi + leaq 64(%rsp),%rsi + leaq 16(%r10),%rcx +.long 0x9066A4F3 + jmp .Lcbc_exit + +.align 16 +.Lcbc_exit: + movq 16(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lcbc_popfq: + popfq +.Lcbc_epilogue: + retq +.size AES_cbc_encrypt,.-AES_cbc_encrypt +.align 64 +.LAES_Te: +.long 0xa56363c6,0xa56363c6 +.long 0x847c7cf8,0x847c7cf8 +.long 0x997777ee,0x997777ee +.long 0x8d7b7bf6,0x8d7b7bf6 +.long 0x0df2f2ff,0x0df2f2ff +.long 0xbd6b6bd6,0xbd6b6bd6 +.long 0xb16f6fde,0xb16f6fde +.long 0x54c5c591,0x54c5c591 +.long 0x50303060,0x50303060 +.long 0x03010102,0x03010102 +.long 0xa96767ce,0xa96767ce +.long 0x7d2b2b56,0x7d2b2b56 +.long 0x19fefee7,0x19fefee7 +.long 0x62d7d7b5,0x62d7d7b5 +.long 0xe6abab4d,0xe6abab4d +.long 0x9a7676ec,0x9a7676ec +.long 0x45caca8f,0x45caca8f +.long 0x9d82821f,0x9d82821f +.long 0x40c9c989,0x40c9c989 +.long 0x877d7dfa,0x877d7dfa +.long 0x15fafaef,0x15fafaef +.long 0xeb5959b2,0xeb5959b2 +.long 0xc947478e,0xc947478e +.long 0x0bf0f0fb,0x0bf0f0fb +.long 0xecadad41,0xecadad41 +.long 0x67d4d4b3,0x67d4d4b3 +.long 0xfda2a25f,0xfda2a25f +.long 0xeaafaf45,0xeaafaf45 +.long 0xbf9c9c23,0xbf9c9c23 +.long 0xf7a4a453,0xf7a4a453 +.long 0x967272e4,0x967272e4 +.long 0x5bc0c09b,0x5bc0c09b +.long 0xc2b7b775,0xc2b7b775 +.long 0x1cfdfde1,0x1cfdfde1 +.long 0xae93933d,0xae93933d +.long 0x6a26264c,0x6a26264c +.long 0x5a36366c,0x5a36366c +.long 0x413f3f7e,0x413f3f7e +.long 0x02f7f7f5,0x02f7f7f5 +.long 0x4fcccc83,0x4fcccc83 +.long 0x5c343468,0x5c343468 +.long 0xf4a5a551,0xf4a5a551 +.long 0x34e5e5d1,0x34e5e5d1 +.long 0x08f1f1f9,0x08f1f1f9 +.long 0x937171e2,0x937171e2 +.long 0x73d8d8ab,0x73d8d8ab +.long 0x53313162,0x53313162 +.long 0x3f15152a,0x3f15152a +.long 0x0c040408,0x0c040408 +.long 0x52c7c795,0x52c7c795 +.long 0x65232346,0x65232346 +.long 0x5ec3c39d,0x5ec3c39d +.long 0x28181830,0x28181830 +.long 0xa1969637,0xa1969637 +.long 0x0f05050a,0x0f05050a +.long 0xb59a9a2f,0xb59a9a2f +.long 0x0907070e,0x0907070e +.long 0x36121224,0x36121224 +.long 0x9b80801b,0x9b80801b +.long 0x3de2e2df,0x3de2e2df +.long 0x26ebebcd,0x26ebebcd +.long 0x6927274e,0x6927274e +.long 0xcdb2b27f,0xcdb2b27f +.long 0x9f7575ea,0x9f7575ea +.long 0x1b090912,0x1b090912 +.long 0x9e83831d,0x9e83831d +.long 0x742c2c58,0x742c2c58 +.long 0x2e1a1a34,0x2e1a1a34 +.long 0x2d1b1b36,0x2d1b1b36 +.long 0xb26e6edc,0xb26e6edc +.long 0xee5a5ab4,0xee5a5ab4 +.long 0xfba0a05b,0xfba0a05b +.long 0xf65252a4,0xf65252a4 +.long 0x4d3b3b76,0x4d3b3b76 +.long 0x61d6d6b7,0x61d6d6b7 +.long 0xceb3b37d,0xceb3b37d +.long 0x7b292952,0x7b292952 +.long 0x3ee3e3dd,0x3ee3e3dd +.long 0x712f2f5e,0x712f2f5e +.long 0x97848413,0x97848413 +.long 0xf55353a6,0xf55353a6 +.long 0x68d1d1b9,0x68d1d1b9 +.long 0x00000000,0x00000000 +.long 0x2cededc1,0x2cededc1 +.long 0x60202040,0x60202040 +.long 0x1ffcfce3,0x1ffcfce3 +.long 0xc8b1b179,0xc8b1b179 +.long 0xed5b5bb6,0xed5b5bb6 +.long 0xbe6a6ad4,0xbe6a6ad4 +.long 0x46cbcb8d,0x46cbcb8d +.long 0xd9bebe67,0xd9bebe67 +.long 0x4b393972,0x4b393972 +.long 0xde4a4a94,0xde4a4a94 +.long 0xd44c4c98,0xd44c4c98 +.long 0xe85858b0,0xe85858b0 +.long 0x4acfcf85,0x4acfcf85 +.long 0x6bd0d0bb,0x6bd0d0bb +.long 0x2aefefc5,0x2aefefc5 +.long 0xe5aaaa4f,0xe5aaaa4f +.long 0x16fbfbed,0x16fbfbed +.long 0xc5434386,0xc5434386 +.long 0xd74d4d9a,0xd74d4d9a +.long 0x55333366,0x55333366 +.long 0x94858511,0x94858511 +.long 0xcf45458a,0xcf45458a +.long 0x10f9f9e9,0x10f9f9e9 +.long 0x06020204,0x06020204 +.long 0x817f7ffe,0x817f7ffe +.long 0xf05050a0,0xf05050a0 +.long 0x443c3c78,0x443c3c78 +.long 0xba9f9f25,0xba9f9f25 +.long 0xe3a8a84b,0xe3a8a84b +.long 0xf35151a2,0xf35151a2 +.long 0xfea3a35d,0xfea3a35d +.long 0xc0404080,0xc0404080 +.long 0x8a8f8f05,0x8a8f8f05 +.long 0xad92923f,0xad92923f +.long 0xbc9d9d21,0xbc9d9d21 +.long 0x48383870,0x48383870 +.long 0x04f5f5f1,0x04f5f5f1 +.long 0xdfbcbc63,0xdfbcbc63 +.long 0xc1b6b677,0xc1b6b677 +.long 0x75dadaaf,0x75dadaaf +.long 0x63212142,0x63212142 +.long 0x30101020,0x30101020 +.long 0x1affffe5,0x1affffe5 +.long 0x0ef3f3fd,0x0ef3f3fd +.long 0x6dd2d2bf,0x6dd2d2bf +.long 0x4ccdcd81,0x4ccdcd81 +.long 0x140c0c18,0x140c0c18 +.long 0x35131326,0x35131326 +.long 0x2fececc3,0x2fececc3 +.long 0xe15f5fbe,0xe15f5fbe +.long 0xa2979735,0xa2979735 +.long 0xcc444488,0xcc444488 +.long 0x3917172e,0x3917172e +.long 0x57c4c493,0x57c4c493 +.long 0xf2a7a755,0xf2a7a755 +.long 0x827e7efc,0x827e7efc +.long 0x473d3d7a,0x473d3d7a +.long 0xac6464c8,0xac6464c8 +.long 0xe75d5dba,0xe75d5dba +.long 0x2b191932,0x2b191932 +.long 0x957373e6,0x957373e6 +.long 0xa06060c0,0xa06060c0 +.long 0x98818119,0x98818119 +.long 0xd14f4f9e,0xd14f4f9e +.long 0x7fdcdca3,0x7fdcdca3 +.long 0x66222244,0x66222244 +.long 0x7e2a2a54,0x7e2a2a54 +.long 0xab90903b,0xab90903b +.long 0x8388880b,0x8388880b +.long 0xca46468c,0xca46468c +.long 0x29eeeec7,0x29eeeec7 +.long 0xd3b8b86b,0xd3b8b86b +.long 0x3c141428,0x3c141428 +.long 0x79dedea7,0x79dedea7 +.long 0xe25e5ebc,0xe25e5ebc +.long 0x1d0b0b16,0x1d0b0b16 +.long 0x76dbdbad,0x76dbdbad +.long 0x3be0e0db,0x3be0e0db +.long 0x56323264,0x56323264 +.long 0x4e3a3a74,0x4e3a3a74 +.long 0x1e0a0a14,0x1e0a0a14 +.long 0xdb494992,0xdb494992 +.long 0x0a06060c,0x0a06060c +.long 0x6c242448,0x6c242448 +.long 0xe45c5cb8,0xe45c5cb8 +.long 0x5dc2c29f,0x5dc2c29f +.long 0x6ed3d3bd,0x6ed3d3bd +.long 0xefacac43,0xefacac43 +.long 0xa66262c4,0xa66262c4 +.long 0xa8919139,0xa8919139 +.long 0xa4959531,0xa4959531 +.long 0x37e4e4d3,0x37e4e4d3 +.long 0x8b7979f2,0x8b7979f2 +.long 0x32e7e7d5,0x32e7e7d5 +.long 0x43c8c88b,0x43c8c88b +.long 0x5937376e,0x5937376e +.long 0xb76d6dda,0xb76d6dda +.long 0x8c8d8d01,0x8c8d8d01 +.long 0x64d5d5b1,0x64d5d5b1 +.long 0xd24e4e9c,0xd24e4e9c +.long 0xe0a9a949,0xe0a9a949 +.long 0xb46c6cd8,0xb46c6cd8 +.long 0xfa5656ac,0xfa5656ac +.long 0x07f4f4f3,0x07f4f4f3 +.long 0x25eaeacf,0x25eaeacf +.long 0xaf6565ca,0xaf6565ca +.long 0x8e7a7af4,0x8e7a7af4 +.long 0xe9aeae47,0xe9aeae47 +.long 0x18080810,0x18080810 +.long 0xd5baba6f,0xd5baba6f +.long 0x887878f0,0x887878f0 +.long 0x6f25254a,0x6f25254a +.long 0x722e2e5c,0x722e2e5c +.long 0x241c1c38,0x241c1c38 +.long 0xf1a6a657,0xf1a6a657 +.long 0xc7b4b473,0xc7b4b473 +.long 0x51c6c697,0x51c6c697 +.long 0x23e8e8cb,0x23e8e8cb +.long 0x7cdddda1,0x7cdddda1 +.long 0x9c7474e8,0x9c7474e8 +.long 0x211f1f3e,0x211f1f3e +.long 0xdd4b4b96,0xdd4b4b96 +.long 0xdcbdbd61,0xdcbdbd61 +.long 0x868b8b0d,0x868b8b0d +.long 0x858a8a0f,0x858a8a0f +.long 0x907070e0,0x907070e0 +.long 0x423e3e7c,0x423e3e7c +.long 0xc4b5b571,0xc4b5b571 +.long 0xaa6666cc,0xaa6666cc +.long 0xd8484890,0xd8484890 +.long 0x05030306,0x05030306 +.long 0x01f6f6f7,0x01f6f6f7 +.long 0x120e0e1c,0x120e0e1c +.long 0xa36161c2,0xa36161c2 +.long 0x5f35356a,0x5f35356a +.long 0xf95757ae,0xf95757ae +.long 0xd0b9b969,0xd0b9b969 +.long 0x91868617,0x91868617 +.long 0x58c1c199,0x58c1c199 +.long 0x271d1d3a,0x271d1d3a +.long 0xb99e9e27,0xb99e9e27 +.long 0x38e1e1d9,0x38e1e1d9 +.long 0x13f8f8eb,0x13f8f8eb +.long 0xb398982b,0xb398982b +.long 0x33111122,0x33111122 +.long 0xbb6969d2,0xbb6969d2 +.long 0x70d9d9a9,0x70d9d9a9 +.long 0x898e8e07,0x898e8e07 +.long 0xa7949433,0xa7949433 +.long 0xb69b9b2d,0xb69b9b2d +.long 0x221e1e3c,0x221e1e3c +.long 0x92878715,0x92878715 +.long 0x20e9e9c9,0x20e9e9c9 +.long 0x49cece87,0x49cece87 +.long 0xff5555aa,0xff5555aa +.long 0x78282850,0x78282850 +.long 0x7adfdfa5,0x7adfdfa5 +.long 0x8f8c8c03,0x8f8c8c03 +.long 0xf8a1a159,0xf8a1a159 +.long 0x80898909,0x80898909 +.long 0x170d0d1a,0x170d0d1a +.long 0xdabfbf65,0xdabfbf65 +.long 0x31e6e6d7,0x31e6e6d7 +.long 0xc6424284,0xc6424284 +.long 0xb86868d0,0xb86868d0 +.long 0xc3414182,0xc3414182 +.long 0xb0999929,0xb0999929 +.long 0x772d2d5a,0x772d2d5a +.long 0x110f0f1e,0x110f0f1e +.long 0xcbb0b07b,0xcbb0b07b +.long 0xfc5454a8,0xfc5454a8 +.long 0xd6bbbb6d,0xd6bbbb6d +.long 0x3a16162c,0x3a16162c +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.long 0x00000001, 0x00000002, 0x00000004, 0x00000008 +.long 0x00000010, 0x00000020, 0x00000040, 0x00000080 +.long 0x0000001b, 0x00000036, 0x80808080, 0x80808080 +.long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b +.align 64 +.LAES_Td: +.long 0x50a7f451,0x50a7f451 +.long 0x5365417e,0x5365417e +.long 0xc3a4171a,0xc3a4171a +.long 0x965e273a,0x965e273a +.long 0xcb6bab3b,0xcb6bab3b +.long 0xf1459d1f,0xf1459d1f +.long 0xab58faac,0xab58faac +.long 0x9303e34b,0x9303e34b +.long 0x55fa3020,0x55fa3020 +.long 0xf66d76ad,0xf66d76ad +.long 0x9176cc88,0x9176cc88 +.long 0x254c02f5,0x254c02f5 +.long 0xfcd7e54f,0xfcd7e54f +.long 0xd7cb2ac5,0xd7cb2ac5 +.long 0x80443526,0x80443526 +.long 0x8fa362b5,0x8fa362b5 +.long 0x495ab1de,0x495ab1de +.long 0x671bba25,0x671bba25 +.long 0x980eea45,0x980eea45 +.long 0xe1c0fe5d,0xe1c0fe5d +.long 0x02752fc3,0x02752fc3 +.long 0x12f04c81,0x12f04c81 +.long 0xa397468d,0xa397468d +.long 0xc6f9d36b,0xc6f9d36b +.long 0xe75f8f03,0xe75f8f03 +.long 0x959c9215,0x959c9215 +.long 0xeb7a6dbf,0xeb7a6dbf +.long 0xda595295,0xda595295 +.long 0x2d83bed4,0x2d83bed4 +.long 0xd3217458,0xd3217458 +.long 0x2969e049,0x2969e049 +.long 0x44c8c98e,0x44c8c98e +.long 0x6a89c275,0x6a89c275 +.long 0x78798ef4,0x78798ef4 +.long 0x6b3e5899,0x6b3e5899 +.long 0xdd71b927,0xdd71b927 +.long 0xb64fe1be,0xb64fe1be +.long 0x17ad88f0,0x17ad88f0 +.long 0x66ac20c9,0x66ac20c9 +.long 0xb43ace7d,0xb43ace7d +.long 0x184adf63,0x184adf63 +.long 0x82311ae5,0x82311ae5 +.long 0x60335197,0x60335197 +.long 0x457f5362,0x457f5362 +.long 0xe07764b1,0xe07764b1 +.long 0x84ae6bbb,0x84ae6bbb +.long 0x1ca081fe,0x1ca081fe +.long 0x942b08f9,0x942b08f9 +.long 0x58684870,0x58684870 +.long 0x19fd458f,0x19fd458f +.long 0x876cde94,0x876cde94 +.long 0xb7f87b52,0xb7f87b52 +.long 0x23d373ab,0x23d373ab +.long 0xe2024b72,0xe2024b72 +.long 0x578f1fe3,0x578f1fe3 +.long 0x2aab5566,0x2aab5566 +.long 0x0728ebb2,0x0728ebb2 +.long 0x03c2b52f,0x03c2b52f +.long 0x9a7bc586,0x9a7bc586 +.long 0xa50837d3,0xa50837d3 +.long 0xf2872830,0xf2872830 +.long 0xb2a5bf23,0xb2a5bf23 +.long 0xba6a0302,0xba6a0302 +.long 0x5c8216ed,0x5c8216ed +.long 0x2b1ccf8a,0x2b1ccf8a +.long 0x92b479a7,0x92b479a7 +.long 0xf0f207f3,0xf0f207f3 +.long 0xa1e2694e,0xa1e2694e +.long 0xcdf4da65,0xcdf4da65 +.long 0xd5be0506,0xd5be0506 +.long 0x1f6234d1,0x1f6234d1 +.long 0x8afea6c4,0x8afea6c4 +.long 0x9d532e34,0x9d532e34 +.long 0xa055f3a2,0xa055f3a2 +.long 0x32e18a05,0x32e18a05 +.long 0x75ebf6a4,0x75ebf6a4 +.long 0x39ec830b,0x39ec830b +.long 0xaaef6040,0xaaef6040 +.long 0x069f715e,0x069f715e +.long 0x51106ebd,0x51106ebd +.long 0xf98a213e,0xf98a213e +.long 0x3d06dd96,0x3d06dd96 +.long 0xae053edd,0xae053edd +.long 0x46bde64d,0x46bde64d +.long 0xb58d5491,0xb58d5491 +.long 0x055dc471,0x055dc471 +.long 0x6fd40604,0x6fd40604 +.long 0xff155060,0xff155060 +.long 0x24fb9819,0x24fb9819 +.long 0x97e9bdd6,0x97e9bdd6 +.long 0xcc434089,0xcc434089 +.long 0x779ed967,0x779ed967 +.long 0xbd42e8b0,0xbd42e8b0 +.long 0x888b8907,0x888b8907 +.long 0x385b19e7,0x385b19e7 +.long 0xdbeec879,0xdbeec879 +.long 0x470a7ca1,0x470a7ca1 +.long 0xe90f427c,0xe90f427c +.long 0xc91e84f8,0xc91e84f8 +.long 0x00000000,0x00000000 +.long 0x83868009,0x83868009 +.long 0x48ed2b32,0x48ed2b32 +.long 0xac70111e,0xac70111e +.long 0x4e725a6c,0x4e725a6c +.long 0xfbff0efd,0xfbff0efd +.long 0x5638850f,0x5638850f +.long 0x1ed5ae3d,0x1ed5ae3d +.long 0x27392d36,0x27392d36 +.long 0x64d90f0a,0x64d90f0a +.long 0x21a65c68,0x21a65c68 +.long 0xd1545b9b,0xd1545b9b +.long 0x3a2e3624,0x3a2e3624 +.long 0xb1670a0c,0xb1670a0c +.long 0x0fe75793,0x0fe75793 +.long 0xd296eeb4,0xd296eeb4 +.long 0x9e919b1b,0x9e919b1b +.long 0x4fc5c080,0x4fc5c080 +.long 0xa220dc61,0xa220dc61 +.long 0x694b775a,0x694b775a +.long 0x161a121c,0x161a121c +.long 0x0aba93e2,0x0aba93e2 +.long 0xe52aa0c0,0xe52aa0c0 +.long 0x43e0223c,0x43e0223c +.long 0x1d171b12,0x1d171b12 +.long 0x0b0d090e,0x0b0d090e +.long 0xadc78bf2,0xadc78bf2 +.long 0xb9a8b62d,0xb9a8b62d +.long 0xc8a91e14,0xc8a91e14 +.long 0x8519f157,0x8519f157 +.long 0x4c0775af,0x4c0775af +.long 0xbbdd99ee,0xbbdd99ee +.long 0xfd607fa3,0xfd607fa3 +.long 0x9f2601f7,0x9f2601f7 +.long 0xbcf5725c,0xbcf5725c +.long 0xc53b6644,0xc53b6644 +.long 0x347efb5b,0x347efb5b +.long 0x7629438b,0x7629438b +.long 0xdcc623cb,0xdcc623cb +.long 0x68fcedb6,0x68fcedb6 +.long 0x63f1e4b8,0x63f1e4b8 +.long 0xcadc31d7,0xcadc31d7 +.long 0x10856342,0x10856342 +.long 0x40229713,0x40229713 +.long 0x2011c684,0x2011c684 +.long 0x7d244a85,0x7d244a85 +.long 0xf83dbbd2,0xf83dbbd2 +.long 0x1132f9ae,0x1132f9ae +.long 0x6da129c7,0x6da129c7 +.long 0x4b2f9e1d,0x4b2f9e1d +.long 0xf330b2dc,0xf330b2dc +.long 0xec52860d,0xec52860d +.long 0xd0e3c177,0xd0e3c177 +.long 0x6c16b32b,0x6c16b32b +.long 0x99b970a9,0x99b970a9 +.long 0xfa489411,0xfa489411 +.long 0x2264e947,0x2264e947 +.long 0xc48cfca8,0xc48cfca8 +.long 0x1a3ff0a0,0x1a3ff0a0 +.long 0xd82c7d56,0xd82c7d56 +.long 0xef903322,0xef903322 +.long 0xc74e4987,0xc74e4987 +.long 0xc1d138d9,0xc1d138d9 +.long 0xfea2ca8c,0xfea2ca8c +.long 0x360bd498,0x360bd498 +.long 0xcf81f5a6,0xcf81f5a6 +.long 0x28de7aa5,0x28de7aa5 +.long 0x268eb7da,0x268eb7da +.long 0xa4bfad3f,0xa4bfad3f +.long 0xe49d3a2c,0xe49d3a2c +.long 0x0d927850,0x0d927850 +.long 0x9bcc5f6a,0x9bcc5f6a +.long 0x62467e54,0x62467e54 +.long 0xc2138df6,0xc2138df6 +.long 0xe8b8d890,0xe8b8d890 +.long 0x5ef7392e,0x5ef7392e +.long 0xf5afc382,0xf5afc382 +.long 0xbe805d9f,0xbe805d9f +.long 0x7c93d069,0x7c93d069 +.long 0xa92dd56f,0xa92dd56f +.long 0xb31225cf,0xb31225cf +.long 0x3b99acc8,0x3b99acc8 +.long 0xa77d1810,0xa77d1810 +.long 0x6e639ce8,0x6e639ce8 +.long 0x7bbb3bdb,0x7bbb3bdb +.long 0x097826cd,0x097826cd +.long 0xf418596e,0xf418596e +.long 0x01b79aec,0x01b79aec +.long 0xa89a4f83,0xa89a4f83 +.long 0x656e95e6,0x656e95e6 +.long 0x7ee6ffaa,0x7ee6ffaa +.long 0x08cfbc21,0x08cfbc21 +.long 0xe6e815ef,0xe6e815ef +.long 0xd99be7ba,0xd99be7ba +.long 0xce366f4a,0xce366f4a +.long 0xd4099fea,0xd4099fea +.long 0xd67cb029,0xd67cb029 +.long 0xafb2a431,0xafb2a431 +.long 0x31233f2a,0x31233f2a +.long 0x3094a5c6,0x3094a5c6 +.long 0xc066a235,0xc066a235 +.long 0x37bc4e74,0x37bc4e74 +.long 0xa6ca82fc,0xa6ca82fc +.long 0xb0d090e0,0xb0d090e0 +.long 0x15d8a733,0x15d8a733 +.long 0x4a9804f1,0x4a9804f1 +.long 0xf7daec41,0xf7daec41 +.long 0x0e50cd7f,0x0e50cd7f +.long 0x2ff69117,0x2ff69117 +.long 0x8dd64d76,0x8dd64d76 +.long 0x4db0ef43,0x4db0ef43 +.long 0x544daacc,0x544daacc +.long 0xdf0496e4,0xdf0496e4 +.long 0xe3b5d19e,0xe3b5d19e +.long 0x1b886a4c,0x1b886a4c +.long 0xb81f2cc1,0xb81f2cc1 +.long 0x7f516546,0x7f516546 +.long 0x04ea5e9d,0x04ea5e9d +.long 0x5d358c01,0x5d358c01 +.long 0x737487fa,0x737487fa +.long 0x2e410bfb,0x2e410bfb +.long 0x5a1d67b3,0x5a1d67b3 +.long 0x52d2db92,0x52d2db92 +.long 0x335610e9,0x335610e9 +.long 0x1347d66d,0x1347d66d +.long 0x8c61d79a,0x8c61d79a +.long 0x7a0ca137,0x7a0ca137 +.long 0x8e14f859,0x8e14f859 +.long 0x893c13eb,0x893c13eb +.long 0xee27a9ce,0xee27a9ce +.long 0x35c961b7,0x35c961b7 +.long 0xede51ce1,0xede51ce1 +.long 0x3cb1477a,0x3cb1477a +.long 0x59dfd29c,0x59dfd29c +.long 0x3f73f255,0x3f73f255 +.long 0x79ce1418,0x79ce1418 +.long 0xbf37c773,0xbf37c773 +.long 0xeacdf753,0xeacdf753 +.long 0x5baafd5f,0x5baafd5f +.long 0x146f3ddf,0x146f3ddf +.long 0x86db4478,0x86db4478 +.long 0x81f3afca,0x81f3afca +.long 0x3ec468b9,0x3ec468b9 +.long 0x2c342438,0x2c342438 +.long 0x5f40a3c2,0x5f40a3c2 +.long 0x72c31d16,0x72c31d16 +.long 0x0c25e2bc,0x0c25e2bc +.long 0x8b493c28,0x8b493c28 +.long 0x41950dff,0x41950dff +.long 0x7101a839,0x7101a839 +.long 0xdeb30c08,0xdeb30c08 +.long 0x9ce4b4d8,0x9ce4b4d8 +.long 0x90c15664,0x90c15664 +.long 0x6184cb7b,0x6184cb7b +.long 0x70b632d5,0x70b632d5 +.long 0x745c6c48,0x745c6c48 +.long 0x4257b8d0,0x4257b8d0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 +#if defined(HAVE_GNU_STACK) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/ext/libressl/crypto/aes/aes-macosx-x86_64.S b/ext/libressl/crypto/aes/aes-macosx-x86_64.S new file mode 100644 index 0000000..8a9c36e --- /dev/null +++ b/ext/libressl/crypto/aes/aes-macosx-x86_64.S @@ -0,0 +1,2544 @@ +#include "x86_arch.h" + +.text + +.p2align 4 +_x86_64_AES_encrypt: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + + movl 240(%r15),%r13d + subl $1,%r13d + jmp L$enc_loop +.p2align 4 +L$enc_loop: + + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movl 0(%r14,%rsi,8),%r10d + movl 0(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r12d + + movzbl %bh,%esi + movzbl %ch,%edi + movzbl %dl,%ebp + xorl 3(%r14,%rsi,8),%r10d + xorl 3(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r8d + + movzbl %dh,%esi + shrl $16,%ecx + movzbl %ah,%ebp + xorl 3(%r14,%rsi,8),%r12d + shrl $16,%edx + xorl 3(%r14,%rbp,8),%r8d + + shrl $16,%ebx + leaq 16(%r15),%r15 + shrl $16,%eax + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + xorl 2(%r14,%rsi,8),%r10d + xorl 2(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r12d + + movzbl %dh,%esi + movzbl %ah,%edi + movzbl %bl,%ebp + xorl 1(%r14,%rsi,8),%r10d + xorl 1(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r8d + + movl 12(%r15),%edx + movzbl %bh,%edi + movzbl %ch,%ebp + movl 0(%r15),%eax + xorl 1(%r14,%rdi,8),%r12d + xorl 1(%r14,%rbp,8),%r8d + + movl 4(%r15),%ebx + movl 8(%r15),%ecx + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + subl $1,%r13d + jnz L$enc_loop + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movzbl 2(%r14,%rsi,8),%r10d + movzbl 2(%r14,%rdi,8),%r11d + movzbl 2(%r14,%rbp,8),%r12d + + movzbl %dl,%esi + movzbl %bh,%edi + movzbl %ch,%ebp + movzbl 2(%r14,%rsi,8),%r8d + movl 0(%r14,%rdi,8),%edi + movl 0(%r14,%rbp,8),%ebp + + andl $65280,%edi + andl $65280,%ebp + + xorl %edi,%r10d + xorl %ebp,%r11d + shrl $16,%ecx + + movzbl %dh,%esi + movzbl %ah,%edi + shrl $16,%edx + movl 0(%r14,%rsi,8),%esi + movl 0(%r14,%rdi,8),%edi + + andl $65280,%esi + andl $65280,%edi + shrl $16,%ebx + xorl %esi,%r12d + xorl %edi,%r8d + shrl $16,%eax + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + movl 0(%r14,%rsi,8),%esi + movl 0(%r14,%rdi,8),%edi + movl 0(%r14,%rbp,8),%ebp + + andl $16711680,%esi + andl $16711680,%edi + andl $16711680,%ebp + + xorl %esi,%r10d + xorl %edi,%r11d + xorl %ebp,%r12d + + movzbl %bl,%esi + movzbl %dh,%edi + movzbl %ah,%ebp + movl 0(%r14,%rsi,8),%esi + movl 2(%r14,%rdi,8),%edi + movl 2(%r14,%rbp,8),%ebp + + andl $16711680,%esi + andl $4278190080,%edi + andl $4278190080,%ebp + + xorl %esi,%r8d + xorl %edi,%r10d + xorl %ebp,%r11d + + movzbl %bh,%esi + movzbl %ch,%edi + movl 16+12(%r15),%edx + movl 2(%r14,%rsi,8),%esi + movl 2(%r14,%rdi,8),%edi + movl 16+0(%r15),%eax + + andl $4278190080,%esi + andl $4278190080,%edi + + xorl %esi,%r12d + xorl %edi,%r8d + + movl 16+4(%r15),%ebx + movl 16+8(%r15),%ecx + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + retq + + +.p2align 4 +_x86_64_AES_encrypt_compact: + leaq 128(%r14),%r8 + movl 0-128(%r8),%edi + movl 32-128(%r8),%ebp + movl 64-128(%r8),%r10d + movl 96-128(%r8),%r11d + movl 128-128(%r8),%edi + movl 160-128(%r8),%ebp + movl 192-128(%r8),%r10d + movl 224-128(%r8),%r11d + jmp L$enc_loop_compact +.p2align 4 +L$enc_loop_compact: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + leaq 16(%r15),%r15 + movzbl %al,%r10d + movzbl %bl,%r11d + movzbl %cl,%r12d + movzbl (%r14,%r10,1),%r10d + movzbl (%r14,%r11,1),%r11d + movzbl (%r14,%r12,1),%r12d + + movzbl %dl,%r8d + movzbl %bh,%esi + movzbl %ch,%edi + movzbl (%r14,%r8,1),%r8d + movzbl (%r14,%rsi,1),%r9d + movzbl (%r14,%rdi,1),%r13d + + movzbl %dh,%ebp + movzbl %ah,%esi + shrl $16,%ecx + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + shrl $16,%edx + + movzbl %cl,%edi + shll $8,%r9d + shll $8,%r13d + movzbl (%r14,%rdi,1),%edi + xorl %r9d,%r10d + xorl %r13d,%r11d + + movzbl %dl,%r9d + shrl $16,%eax + shrl $16,%ebx + movzbl %al,%r13d + shll $8,%ebp + shll $8,%esi + movzbl (%r14,%r9,1),%r9d + movzbl (%r14,%r13,1),%r13d + xorl %ebp,%r12d + xorl %esi,%r8d + + movzbl %bl,%ebp + movzbl %dh,%esi + shll $16,%edi + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + xorl %edi,%r10d + + movzbl %ah,%edi + shrl $8,%ecx + shrl $8,%ebx + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rcx,1),%edx + movzbl (%r14,%rbx,1),%ecx + shll $16,%r9d + shll $16,%r13d + shll $16,%ebp + xorl %r9d,%r11d + xorl %r13d,%r12d + xorl %ebp,%r8d + + shll $24,%esi + shll $24,%edi + shll $24,%edx + xorl %esi,%r10d + shll $24,%ecx + xorl %edi,%r11d + movl %r10d,%eax + movl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + cmpq 16(%rsp),%r15 + je L$enc_compact_done + movl %eax,%esi + movl %ebx,%edi + andl $2155905152,%esi + andl $2155905152,%edi + movl %esi,%r10d + movl %edi,%r11d + shrl $7,%r10d + leal (%rax,%rax,1),%r8d + shrl $7,%r11d + leal (%rbx,%rbx,1),%r9d + subl %r10d,%esi + subl %r11d,%edi + andl $4278124286,%r8d + andl $4278124286,%r9d + andl $454761243,%esi + andl $454761243,%edi + movl %eax,%r10d + movl %ebx,%r11d + xorl %esi,%r8d + xorl %edi,%r9d + + xorl %r8d,%eax + xorl %r9d,%ebx + movl %ecx,%esi + movl %edx,%edi + roll $24,%eax + roll $24,%ebx + andl $2155905152,%esi + andl $2155905152,%edi + xorl %r8d,%eax + xorl %r9d,%ebx + movl %esi,%r12d + movl %edi,%ebp + rorl $16,%r10d + rorl $16,%r11d + shrl $7,%r12d + leal (%rcx,%rcx,1),%r8d + xorl %r10d,%eax + xorl %r11d,%ebx + shrl $7,%ebp + leal (%rdx,%rdx,1),%r9d + rorl $8,%r10d + rorl $8,%r11d + subl %r12d,%esi + subl %ebp,%edi + xorl %r10d,%eax + xorl %r11d,%ebx + + andl $4278124286,%r8d + andl $4278124286,%r9d + andl $454761243,%esi + andl $454761243,%edi + movl %ecx,%r12d + movl %edx,%ebp + xorl %esi,%r8d + xorl %edi,%r9d + + xorl %r8d,%ecx + xorl %r9d,%edx + roll $24,%ecx + roll $24,%edx + xorl %r8d,%ecx + xorl %r9d,%edx + movl 0(%r14),%esi + rorl $16,%r12d + rorl $16,%ebp + movl 64(%r14),%edi + xorl %r12d,%ecx + xorl %ebp,%edx + movl 128(%r14),%r8d + rorl $8,%r12d + rorl $8,%ebp + movl 192(%r14),%r9d + xorl %r12d,%ecx + xorl %ebp,%edx + jmp L$enc_loop_compact +.p2align 4 +L$enc_compact_done: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + retq + +.globl _AES_encrypt + +.p2align 4 +.globl _asm_AES_encrypt +.private_extern _asm_AES_encrypt +_asm_AES_encrypt: +_AES_encrypt: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + movq %rsp,%r10 + leaq -63(%rdx),%rcx + andq $-64,%rsp + subq %rsp,%rcx + negq %rcx + andq $960,%rcx + subq %rcx,%rsp + subq $32,%rsp + + movq %rsi,16(%rsp) + movq %r10,24(%rsp) +L$enc_prologue: + + movq %rdx,%r15 + movl 240(%r15),%r13d + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + + shll $4,%r13d + leaq (%r15,%r13,1),%rbp + movq %r15,(%rsp) + movq %rbp,8(%rsp) + + + leaq L$AES_Te+2048(%rip),%r14 + leaq 768(%rsp),%rbp + subq %r14,%rbp + andq $768,%rbp + leaq (%r14,%rbp,1),%r14 + + call _x86_64_AES_encrypt_compact + + movq 16(%rsp),%r9 + movq 24(%rsp),%rsi + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$enc_epilogue: + retq + + +.p2align 4 +_x86_64_AES_decrypt: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + + movl 240(%r15),%r13d + subl $1,%r13d + jmp L$dec_loop +.p2align 4 +L$dec_loop: + + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movl 0(%r14,%rsi,8),%r10d + movl 0(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r12d + + movzbl %dh,%esi + movzbl %ah,%edi + movzbl %dl,%ebp + xorl 3(%r14,%rsi,8),%r10d + xorl 3(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r8d + + movzbl %bh,%esi + shrl $16,%eax + movzbl %ch,%ebp + xorl 3(%r14,%rsi,8),%r12d + shrl $16,%edx + xorl 3(%r14,%rbp,8),%r8d + + shrl $16,%ebx + leaq 16(%r15),%r15 + shrl $16,%ecx + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + xorl 2(%r14,%rsi,8),%r10d + xorl 2(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r12d + + movzbl %bh,%esi + movzbl %ch,%edi + movzbl %bl,%ebp + xorl 1(%r14,%rsi,8),%r10d + xorl 1(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r8d + + movzbl %dh,%esi + movl 12(%r15),%edx + movzbl %ah,%ebp + xorl 1(%r14,%rsi,8),%r12d + movl 0(%r15),%eax + xorl 1(%r14,%rbp,8),%r8d + + xorl %r10d,%eax + movl 4(%r15),%ebx + movl 8(%r15),%ecx + xorl %r12d,%ecx + xorl %r11d,%ebx + xorl %r8d,%edx + subl $1,%r13d + jnz L$dec_loop + leaq 2048(%r14),%r14 + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movzbl (%r14,%rsi,1),%r10d + movzbl (%r14,%rdi,1),%r11d + movzbl (%r14,%rbp,1),%r12d + + movzbl %dl,%esi + movzbl %dh,%edi + movzbl %ah,%ebp + movzbl (%r14,%rsi,1),%r8d + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $8,%edi + shll $8,%ebp + + xorl %edi,%r10d + xorl %ebp,%r11d + shrl $16,%edx + + movzbl %bh,%esi + movzbl %ch,%edi + shrl $16,%eax + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + + shll $8,%esi + shll $8,%edi + shrl $16,%ebx + xorl %esi,%r12d + xorl %edi,%r8d + shrl $16,%ecx + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $16,%esi + shll $16,%edi + shll $16,%ebp + + xorl %esi,%r10d + xorl %edi,%r11d + xorl %ebp,%r12d + + movzbl %bl,%esi + movzbl %bh,%edi + movzbl %ch,%ebp + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $16,%esi + shll $24,%edi + shll $24,%ebp + + xorl %esi,%r8d + xorl %edi,%r10d + xorl %ebp,%r11d + + movzbl %dh,%esi + movzbl %ah,%edi + movl 16+12(%r15),%edx + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movl 16+0(%r15),%eax + + shll $24,%esi + shll $24,%edi + + xorl %esi,%r12d + xorl %edi,%r8d + + movl 16+4(%r15),%ebx + movl 16+8(%r15),%ecx + leaq -2048(%r14),%r14 + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + retq + + +.p2align 4 +_x86_64_AES_decrypt_compact: + leaq 128(%r14),%r8 + movl 0-128(%r8),%edi + movl 32-128(%r8),%ebp + movl 64-128(%r8),%r10d + movl 96-128(%r8),%r11d + movl 128-128(%r8),%edi + movl 160-128(%r8),%ebp + movl 192-128(%r8),%r10d + movl 224-128(%r8),%r11d + jmp L$dec_loop_compact + +.p2align 4 +L$dec_loop_compact: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + leaq 16(%r15),%r15 + movzbl %al,%r10d + movzbl %bl,%r11d + movzbl %cl,%r12d + movzbl (%r14,%r10,1),%r10d + movzbl (%r14,%r11,1),%r11d + movzbl (%r14,%r12,1),%r12d + + movzbl %dl,%r8d + movzbl %dh,%esi + movzbl %ah,%edi + movzbl (%r14,%r8,1),%r8d + movzbl (%r14,%rsi,1),%r9d + movzbl (%r14,%rdi,1),%r13d + + movzbl %bh,%ebp + movzbl %ch,%esi + shrl $16,%ecx + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + shrl $16,%edx + + movzbl %cl,%edi + shll $8,%r9d + shll $8,%r13d + movzbl (%r14,%rdi,1),%edi + xorl %r9d,%r10d + xorl %r13d,%r11d + + movzbl %dl,%r9d + shrl $16,%eax + shrl $16,%ebx + movzbl %al,%r13d + shll $8,%ebp + shll $8,%esi + movzbl (%r14,%r9,1),%r9d + movzbl (%r14,%r13,1),%r13d + xorl %ebp,%r12d + xorl %esi,%r8d + + movzbl %bl,%ebp + movzbl %bh,%esi + shll $16,%edi + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + xorl %edi,%r10d + + movzbl %ch,%edi + shll $16,%r9d + shll $16,%r13d + movzbl (%r14,%rdi,1),%ebx + xorl %r9d,%r11d + xorl %r13d,%r12d + + movzbl %dh,%edi + shrl $8,%eax + shll $16,%ebp + movzbl (%r14,%rdi,1),%ecx + movzbl (%r14,%rax,1),%edx + xorl %ebp,%r8d + + shll $24,%esi + shll $24,%ebx + shll $24,%ecx + xorl %esi,%r10d + shll $24,%edx + xorl %r11d,%ebx + movl %r10d,%eax + xorl %r12d,%ecx + xorl %r8d,%edx + cmpq 16(%rsp),%r15 + je L$dec_compact_done + + movq 256+0(%r14),%rsi + shlq $32,%rbx + shlq $32,%rdx + movq 256+8(%r14),%rdi + orq %rbx,%rax + orq %rdx,%rcx + movq 256+16(%r14),%rbp + movq %rax,%rbx + movq %rcx,%rdx + andq %rsi,%rbx + andq %rsi,%rdx + movq %rbx,%r9 + movq %rdx,%r12 + shrq $7,%r9 + leaq (%rax,%rax,1),%r8 + shrq $7,%r12 + leaq (%rcx,%rcx,1),%r11 + subq %r9,%rbx + subq %r12,%rdx + andq %rdi,%r8 + andq %rdi,%r11 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %r8,%rbx + xorq %r11,%rdx + movq %rbx,%r8 + movq %rdx,%r11 + + andq %rsi,%rbx + andq %rsi,%rdx + movq %rbx,%r10 + movq %rdx,%r13 + shrq $7,%r10 + leaq (%r8,%r8,1),%r9 + shrq $7,%r13 + leaq (%r11,%r11,1),%r12 + subq %r10,%rbx + subq %r13,%rdx + andq %rdi,%r9 + andq %rdi,%r12 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %r9,%rbx + xorq %r12,%rdx + movq %rbx,%r9 + movq %rdx,%r12 + + andq %rsi,%rbx + andq %rsi,%rdx + movq %rbx,%r10 + movq %rdx,%r13 + shrq $7,%r10 + xorq %rax,%r8 + shrq $7,%r13 + xorq %rcx,%r11 + subq %r10,%rbx + subq %r13,%rdx + leaq (%r9,%r9,1),%r10 + leaq (%r12,%r12,1),%r13 + xorq %rax,%r9 + xorq %rcx,%r12 + andq %rdi,%r10 + andq %rdi,%r13 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r10 + xorq %rdx,%r13 + + xorq %r10,%rax + xorq %r13,%rcx + xorq %r10,%r8 + xorq %r13,%r11 + movq %rax,%rbx + movq %rcx,%rdx + xorq %r10,%r9 + xorq %r13,%r12 + shrq $32,%rbx + shrq $32,%rdx + xorq %r8,%r10 + xorq %r11,%r13 + roll $8,%eax + roll $8,%ecx + xorq %r9,%r10 + xorq %r12,%r13 + + roll $8,%ebx + roll $8,%edx + xorl %r10d,%eax + xorl %r13d,%ecx + shrq $32,%r10 + shrq $32,%r13 + xorl %r10d,%ebx + xorl %r13d,%edx + + movq %r8,%r10 + movq %r11,%r13 + shrq $32,%r10 + shrq $32,%r13 + roll $24,%r8d + roll $24,%r11d + roll $24,%r10d + roll $24,%r13d + xorl %r8d,%eax + xorl %r11d,%ecx + movq %r9,%r8 + movq %r12,%r11 + xorl %r10d,%ebx + xorl %r13d,%edx + + movq 0(%r14),%rsi + shrq $32,%r8 + shrq $32,%r11 + movq 64(%r14),%rdi + roll $16,%r9d + roll $16,%r12d + movq 128(%r14),%rbp + roll $16,%r8d + roll $16,%r11d + movq 192(%r14),%r10 + xorl %r9d,%eax + xorl %r12d,%ecx + movq 256(%r14),%r13 + xorl %r8d,%ebx + xorl %r11d,%edx + jmp L$dec_loop_compact +.p2align 4 +L$dec_compact_done: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + retq + +.globl _AES_decrypt + +.p2align 4 +.globl _asm_AES_decrypt +.private_extern _asm_AES_decrypt +_asm_AES_decrypt: +_AES_decrypt: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + movq %rsp,%r10 + leaq -63(%rdx),%rcx + andq $-64,%rsp + subq %rsp,%rcx + negq %rcx + andq $960,%rcx + subq %rcx,%rsp + subq $32,%rsp + + movq %rsi,16(%rsp) + movq %r10,24(%rsp) +L$dec_prologue: + + movq %rdx,%r15 + movl 240(%r15),%r13d + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + + shll $4,%r13d + leaq (%r15,%r13,1),%rbp + movq %r15,(%rsp) + movq %rbp,8(%rsp) + + + leaq L$AES_Td+2048(%rip),%r14 + leaq 768(%rsp),%rbp + subq %r14,%rbp + andq $768,%rbp + leaq (%r14,%rbp,1),%r14 + shrq $3,%rbp + addq %rbp,%r14 + + call _x86_64_AES_decrypt_compact + + movq 16(%rsp),%r9 + movq 24(%rsp),%rsi + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$dec_epilogue: + retq + +.globl _AES_set_encrypt_key + +.p2align 4 +_AES_set_encrypt_key: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $8,%rsp +L$enc_key_prologue: + + call _x86_64_AES_set_encrypt_key + + movq 8(%rsp),%r15 + movq 16(%rsp),%r14 + movq 24(%rsp),%r13 + movq 32(%rsp),%r12 + movq 40(%rsp),%rbp + movq 48(%rsp),%rbx + addq $56,%rsp +L$enc_key_epilogue: + retq + + + +.p2align 4 +_x86_64_AES_set_encrypt_key: + movl %esi,%ecx + movq %rdi,%rsi + movq %rdx,%rdi + + testq $-1,%rsi + jz L$badpointer + testq $-1,%rdi + jz L$badpointer + + leaq L$AES_Te(%rip),%rbp + leaq 2048+128(%rbp),%rbp + + + movl 0-128(%rbp),%eax + movl 32-128(%rbp),%ebx + movl 64-128(%rbp),%r8d + movl 96-128(%rbp),%edx + movl 128-128(%rbp),%eax + movl 160-128(%rbp),%ebx + movl 192-128(%rbp),%r8d + movl 224-128(%rbp),%edx + + cmpl $128,%ecx + je L$10rounds + cmpl $192,%ecx + je L$12rounds + cmpl $256,%ecx + je L$14rounds + movq $-2,%rax + jmp L$exit + +L$10rounds: + movq 0(%rsi),%rax + movq 8(%rsi),%rdx + movq %rax,0(%rdi) + movq %rdx,8(%rdi) + + shrq $32,%rdx + xorl %ecx,%ecx + jmp L$10shortcut +.p2align 2 +L$10loop: + movl 0(%rdi),%eax + movl 12(%rdi),%edx +L$10shortcut: + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + + xorl 1024-128(%rbp,%rcx,4),%eax + movl %eax,16(%rdi) + xorl 4(%rdi),%eax + movl %eax,20(%rdi) + xorl 8(%rdi),%eax + movl %eax,24(%rdi) + xorl 12(%rdi),%eax + movl %eax,28(%rdi) + addl $1,%ecx + leaq 16(%rdi),%rdi + cmpl $10,%ecx + jl L$10loop + + movl $10,80(%rdi) + xorq %rax,%rax + jmp L$exit + +L$12rounds: + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 16(%rsi),%rdx + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rdx,16(%rdi) + + shrq $32,%rdx + xorl %ecx,%ecx + jmp L$12shortcut +.p2align 2 +L$12loop: + movl 0(%rdi),%eax + movl 20(%rdi),%edx +L$12shortcut: + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + + xorl 1024-128(%rbp,%rcx,4),%eax + movl %eax,24(%rdi) + xorl 4(%rdi),%eax + movl %eax,28(%rdi) + xorl 8(%rdi),%eax + movl %eax,32(%rdi) + xorl 12(%rdi),%eax + movl %eax,36(%rdi) + + cmpl $7,%ecx + je L$12break + addl $1,%ecx + + xorl 16(%rdi),%eax + movl %eax,40(%rdi) + xorl 20(%rdi),%eax + movl %eax,44(%rdi) + + leaq 24(%rdi),%rdi + jmp L$12loop +L$12break: + movl $12,72(%rdi) + xorq %rax,%rax + jmp L$exit + +L$14rounds: + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 16(%rsi),%rcx + movq 24(%rsi),%rdx + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + + shrq $32,%rdx + xorl %ecx,%ecx + jmp L$14shortcut +.p2align 2 +L$14loop: + movl 0(%rdi),%eax + movl 28(%rdi),%edx +L$14shortcut: + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + + xorl 1024-128(%rbp,%rcx,4),%eax + movl %eax,32(%rdi) + xorl 4(%rdi),%eax + movl %eax,36(%rdi) + xorl 8(%rdi),%eax + movl %eax,40(%rdi) + xorl 12(%rdi),%eax + movl %eax,44(%rdi) + + cmpl $6,%ecx + je L$14break + addl $1,%ecx + + movl %eax,%edx + movl 16(%rdi),%eax + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + shll $8,%ebx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $16,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $24,%ebx + xorl %ebx,%eax + + movl %eax,48(%rdi) + xorl 20(%rdi),%eax + movl %eax,52(%rdi) + xorl 24(%rdi),%eax + movl %eax,56(%rdi) + xorl 28(%rdi),%eax + movl %eax,60(%rdi) + + leaq 32(%rdi),%rdi + jmp L$14loop +L$14break: + movl $14,48(%rdi) + xorq %rax,%rax + jmp L$exit + +L$badpointer: + movq $-1,%rax +L$exit: + retq + +.globl _AES_set_decrypt_key + +.p2align 4 +_AES_set_decrypt_key: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rdx +L$dec_key_prologue: + + call _x86_64_AES_set_encrypt_key + movq (%rsp),%r8 + cmpl $0,%eax + jne L$abort + + movl 240(%r8),%r14d + xorq %rdi,%rdi + leaq (%rdi,%r14,4),%rcx + movq %r8,%rsi + leaq (%r8,%rcx,4),%rdi +.p2align 2 +L$invert: + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 0(%rdi),%rcx + movq 8(%rdi),%rdx + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,0(%rsi) + movq %rdx,8(%rsi) + leaq 16(%rsi),%rsi + leaq -16(%rdi),%rdi + cmpq %rsi,%rdi + jne L$invert + + leaq L$AES_Te+2048+1024(%rip),%rax + + movq 40(%rax),%rsi + movq 48(%rax),%rdi + movq 56(%rax),%rbp + + movq %r8,%r15 + subl $1,%r14d +.p2align 2 +L$permute: + leaq 16(%r15),%r15 + movq 0(%r15),%rax + movq 8(%r15),%rcx + movq %rax,%rbx + movq %rcx,%rdx + andq %rsi,%rbx + andq %rsi,%rdx + movq %rbx,%r9 + movq %rdx,%r12 + shrq $7,%r9 + leaq (%rax,%rax,1),%r8 + shrq $7,%r12 + leaq (%rcx,%rcx,1),%r11 + subq %r9,%rbx + subq %r12,%rdx + andq %rdi,%r8 + andq %rdi,%r11 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %r8,%rbx + xorq %r11,%rdx + movq %rbx,%r8 + movq %rdx,%r11 + + andq %rsi,%rbx + andq %rsi,%rdx + movq %rbx,%r10 + movq %rdx,%r13 + shrq $7,%r10 + leaq (%r8,%r8,1),%r9 + shrq $7,%r13 + leaq (%r11,%r11,1),%r12 + subq %r10,%rbx + subq %r13,%rdx + andq %rdi,%r9 + andq %rdi,%r12 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %r9,%rbx + xorq %r12,%rdx + movq %rbx,%r9 + movq %rdx,%r12 + + andq %rsi,%rbx + andq %rsi,%rdx + movq %rbx,%r10 + movq %rdx,%r13 + shrq $7,%r10 + xorq %rax,%r8 + shrq $7,%r13 + xorq %rcx,%r11 + subq %r10,%rbx + subq %r13,%rdx + leaq (%r9,%r9,1),%r10 + leaq (%r12,%r12,1),%r13 + xorq %rax,%r9 + xorq %rcx,%r12 + andq %rdi,%r10 + andq %rdi,%r13 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r10 + xorq %rdx,%r13 + + xorq %r10,%rax + xorq %r13,%rcx + xorq %r10,%r8 + xorq %r13,%r11 + movq %rax,%rbx + movq %rcx,%rdx + xorq %r10,%r9 + xorq %r13,%r12 + shrq $32,%rbx + shrq $32,%rdx + xorq %r8,%r10 + xorq %r11,%r13 + roll $8,%eax + roll $8,%ecx + xorq %r9,%r10 + xorq %r12,%r13 + + roll $8,%ebx + roll $8,%edx + xorl %r10d,%eax + xorl %r13d,%ecx + shrq $32,%r10 + shrq $32,%r13 + xorl %r10d,%ebx + xorl %r13d,%edx + + movq %r8,%r10 + movq %r11,%r13 + shrq $32,%r10 + shrq $32,%r13 + roll $24,%r8d + roll $24,%r11d + roll $24,%r10d + roll $24,%r13d + xorl %r8d,%eax + xorl %r11d,%ecx + movq %r9,%r8 + movq %r12,%r11 + xorl %r10d,%ebx + xorl %r13d,%edx + + + shrq $32,%r8 + shrq $32,%r11 + + roll $16,%r9d + roll $16,%r12d + + roll $16,%r8d + roll $16,%r11d + + xorl %r9d,%eax + xorl %r12d,%ecx + + xorl %r8d,%ebx + xorl %r11d,%edx + movl %eax,0(%r15) + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + subl $1,%r14d + jnz L$permute + + xorq %rax,%rax +L$abort: + movq 8(%rsp),%r15 + movq 16(%rsp),%r14 + movq 24(%rsp),%r13 + movq 32(%rsp),%r12 + movq 40(%rsp),%rbp + movq 48(%rsp),%rbx + addq $56,%rsp +L$dec_key_epilogue: + retq + +.globl _AES_cbc_encrypt + +.p2align 4 + +.private_extern _OPENSSL_ia32cap_P +.globl _asm_AES_cbc_encrypt +.private_extern _asm_AES_cbc_encrypt +_asm_AES_cbc_encrypt: +_AES_cbc_encrypt: + cmpq $0,%rdx + je L$cbc_epilogue + pushfq + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 +L$cbc_prologue: + + cld + movl %r9d,%r9d + + leaq L$AES_Te(%rip),%r14 + cmpq $0,%r9 + jne L$cbc_picked_te + leaq L$AES_Td(%rip),%r14 +L$cbc_picked_te: + + movl _OPENSSL_ia32cap_P(%rip),%r10d + cmpq $512,%rdx + jb L$cbc_slow_prologue + testq $15,%rdx + jnz L$cbc_slow_prologue + btl $IA32CAP_BIT0_HT,%r10d + jc L$cbc_slow_prologue + + + leaq -88-248(%rsp),%r15 + andq $-64,%r15 + + + movq %r14,%r10 + leaq 2304(%r14),%r11 + movq %r15,%r12 + andq $4095,%r10 + andq $4095,%r11 + andq $4095,%r12 + + cmpq %r11,%r12 + jb L$cbc_te_break_out + subq %r11,%r12 + subq %r12,%r15 + jmp L$cbc_te_ok +L$cbc_te_break_out: + subq %r10,%r12 + andq $4095,%r12 + addq $320,%r12 + subq %r12,%r15 +.p2align 2 +L$cbc_te_ok: + + xchgq %rsp,%r15 + + movq %r15,16(%rsp) +L$cbc_fast_body: + movq %rdi,24(%rsp) + movq %rsi,32(%rsp) + movq %rdx,40(%rsp) + movq %rcx,48(%rsp) + movq %r8,56(%rsp) + movl $0,80+240(%rsp) + movq %r8,%rbp + movq %r9,%rbx + movq %rsi,%r9 + movq %rdi,%r8 + movq %rcx,%r15 + + movl 240(%r15),%eax + + movq %r15,%r10 + subq %r14,%r10 + andq $4095,%r10 + cmpq $2304,%r10 + jb L$cbc_do_ecopy + cmpq $4096-248,%r10 + jb L$cbc_skip_ecopy +.p2align 2 +L$cbc_do_ecopy: + movq %r15,%rsi + leaq 80(%rsp),%rdi + leaq 80(%rsp),%r15 + movl $30,%ecx +.long 0x90A548F3 + movl %eax,(%rdi) +L$cbc_skip_ecopy: + movq %r15,0(%rsp) + + movl $18,%ecx +.p2align 2 +L$cbc_prefetch_te: + movq 0(%r14),%r10 + movq 32(%r14),%r11 + movq 64(%r14),%r12 + movq 96(%r14),%r13 + leaq 128(%r14),%r14 + subl $1,%ecx + jnz L$cbc_prefetch_te + leaq -2304(%r14),%r14 + + cmpq $0,%rbx + je L$FAST_DECRYPT + + + movl 0(%rbp),%eax + movl 4(%rbp),%ebx + movl 8(%rbp),%ecx + movl 12(%rbp),%edx + +.p2align 2 +L$cbc_fast_enc_loop: + xorl 0(%r8),%eax + xorl 4(%r8),%ebx + xorl 8(%r8),%ecx + xorl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + + call _x86_64_AES_encrypt + + movq 24(%rsp),%r8 + movq 40(%rsp),%r10 + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + subq $16,%r10 + testq $-16,%r10 + movq %r10,40(%rsp) + jnz L$cbc_fast_enc_loop + movq 56(%rsp),%rbp + movl %eax,0(%rbp) + movl %ebx,4(%rbp) + movl %ecx,8(%rbp) + movl %edx,12(%rbp) + + jmp L$cbc_fast_cleanup + + +.p2align 4 +L$FAST_DECRYPT: + cmpq %r8,%r9 + je L$cbc_fast_dec_in_place + + movq %rbp,64(%rsp) +.p2align 2 +L$cbc_fast_dec_loop: + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + + call _x86_64_AES_decrypt + + movq 64(%rsp),%rbp + movq 24(%rsp),%r8 + movq 40(%rsp),%r10 + xorl 0(%rbp),%eax + xorl 4(%rbp),%ebx + xorl 8(%rbp),%ecx + xorl 12(%rbp),%edx + movq %r8,%rbp + + subq $16,%r10 + movq %r10,40(%rsp) + movq %rbp,64(%rsp) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + jnz L$cbc_fast_dec_loop + movq 56(%rsp),%r12 + movq 0(%rbp),%r10 + movq 8(%rbp),%r11 + movq %r10,0(%r12) + movq %r11,8(%r12) + jmp L$cbc_fast_cleanup + +.p2align 4 +L$cbc_fast_dec_in_place: + movq 0(%rbp),%r10 + movq 8(%rbp),%r11 + movq %r10,0+64(%rsp) + movq %r11,8+64(%rsp) +.p2align 2 +L$cbc_fast_dec_in_place_loop: + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + + call _x86_64_AES_decrypt + + movq 24(%rsp),%r8 + movq 40(%rsp),%r10 + xorl 0+64(%rsp),%eax + xorl 4+64(%rsp),%ebx + xorl 8+64(%rsp),%ecx + xorl 12+64(%rsp),%edx + + movq 0(%r8),%r11 + movq 8(%r8),%r12 + subq $16,%r10 + jz L$cbc_fast_dec_in_place_done + + movq %r11,0+64(%rsp) + movq %r12,8+64(%rsp) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + movq %r10,40(%rsp) + jmp L$cbc_fast_dec_in_place_loop +L$cbc_fast_dec_in_place_done: + movq 56(%rsp),%rdi + movq %r11,0(%rdi) + movq %r12,8(%rdi) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + +.p2align 2 +L$cbc_fast_cleanup: + cmpl $0,80+240(%rsp) + leaq 80(%rsp),%rdi + je L$cbc_exit + movl $30,%ecx + xorq %rax,%rax +.long 0x90AB48F3 + + jmp L$cbc_exit + + +.p2align 4 +L$cbc_slow_prologue: + + leaq -88(%rsp),%rbp + andq $-64,%rbp + + leaq -88-63(%rcx),%r10 + subq %rbp,%r10 + negq %r10 + andq $960,%r10 + subq %r10,%rbp + + xchgq %rsp,%rbp + + movq %rbp,16(%rsp) +L$cbc_slow_body: + + + + + movq %r8,56(%rsp) + movq %r8,%rbp + movq %r9,%rbx + movq %rsi,%r9 + movq %rdi,%r8 + movq %rcx,%r15 + movq %rdx,%r10 + + movl 240(%r15),%eax + movq %r15,0(%rsp) + shll $4,%eax + leaq (%r15,%rax,1),%rax + movq %rax,8(%rsp) + + + leaq 2048(%r14),%r14 + leaq 768-8(%rsp),%rax + subq %r14,%rax + andq $768,%rax + leaq (%r14,%rax,1),%r14 + + cmpq $0,%rbx + je L$SLOW_DECRYPT + + + testq $-16,%r10 + movl 0(%rbp),%eax + movl 4(%rbp),%ebx + movl 8(%rbp),%ecx + movl 12(%rbp),%edx + jz L$cbc_slow_enc_tail + +.p2align 2 +L$cbc_slow_enc_loop: + xorl 0(%r8),%eax + xorl 4(%r8),%ebx + xorl 8(%r8),%ecx + xorl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + movq %r9,32(%rsp) + movq %r10,40(%rsp) + + call _x86_64_AES_encrypt_compact + + movq 24(%rsp),%r8 + movq 32(%rsp),%r9 + movq 40(%rsp),%r10 + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + subq $16,%r10 + testq $-16,%r10 + jnz L$cbc_slow_enc_loop + testq $15,%r10 + jnz L$cbc_slow_enc_tail + movq 56(%rsp),%rbp + movl %eax,0(%rbp) + movl %ebx,4(%rbp) + movl %ecx,8(%rbp) + movl %edx,12(%rbp) + + jmp L$cbc_exit + +.p2align 2 +L$cbc_slow_enc_tail: + movq %rax,%r11 + movq %rcx,%r12 + movq %r10,%rcx + movq %r8,%rsi + movq %r9,%rdi +.long 0x9066A4F3 + movq $16,%rcx + subq %r10,%rcx + xorq %rax,%rax +.long 0x9066AAF3 + movq %r9,%r8 + movq $16,%r10 + movq %r11,%rax + movq %r12,%rcx + jmp L$cbc_slow_enc_loop + +.p2align 4 +L$SLOW_DECRYPT: + shrq $3,%rax + addq %rax,%r14 + + movq 0(%rbp),%r11 + movq 8(%rbp),%r12 + movq %r11,0+64(%rsp) + movq %r12,8+64(%rsp) + +.p2align 2 +L$cbc_slow_dec_loop: + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + movq %r9,32(%rsp) + movq %r10,40(%rsp) + + call _x86_64_AES_decrypt_compact + + movq 24(%rsp),%r8 + movq 32(%rsp),%r9 + movq 40(%rsp),%r10 + xorl 0+64(%rsp),%eax + xorl 4+64(%rsp),%ebx + xorl 8+64(%rsp),%ecx + xorl 12+64(%rsp),%edx + + movq 0(%r8),%r11 + movq 8(%r8),%r12 + subq $16,%r10 + jc L$cbc_slow_dec_partial + jz L$cbc_slow_dec_done + + movq %r11,0+64(%rsp) + movq %r12,8+64(%rsp) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + jmp L$cbc_slow_dec_loop +L$cbc_slow_dec_done: + movq 56(%rsp),%rdi + movq %r11,0(%rdi) + movq %r12,8(%rdi) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + jmp L$cbc_exit + +.p2align 2 +L$cbc_slow_dec_partial: + movq 56(%rsp),%rdi + movq %r11,0(%rdi) + movq %r12,8(%rdi) + + movl %eax,0+64(%rsp) + movl %ebx,4+64(%rsp) + movl %ecx,8+64(%rsp) + movl %edx,12+64(%rsp) + + movq %r9,%rdi + leaq 64(%rsp),%rsi + leaq 16(%r10),%rcx +.long 0x9066A4F3 + jmp L$cbc_exit + +.p2align 4 +L$cbc_exit: + movq 16(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$cbc_popfq: + popfq +L$cbc_epilogue: + retq + +.p2align 6 +L$AES_Te: +.long 0xa56363c6,0xa56363c6 +.long 0x847c7cf8,0x847c7cf8 +.long 0x997777ee,0x997777ee +.long 0x8d7b7bf6,0x8d7b7bf6 +.long 0x0df2f2ff,0x0df2f2ff +.long 0xbd6b6bd6,0xbd6b6bd6 +.long 0xb16f6fde,0xb16f6fde +.long 0x54c5c591,0x54c5c591 +.long 0x50303060,0x50303060 +.long 0x03010102,0x03010102 +.long 0xa96767ce,0xa96767ce +.long 0x7d2b2b56,0x7d2b2b56 +.long 0x19fefee7,0x19fefee7 +.long 0x62d7d7b5,0x62d7d7b5 +.long 0xe6abab4d,0xe6abab4d +.long 0x9a7676ec,0x9a7676ec +.long 0x45caca8f,0x45caca8f +.long 0x9d82821f,0x9d82821f +.long 0x40c9c989,0x40c9c989 +.long 0x877d7dfa,0x877d7dfa +.long 0x15fafaef,0x15fafaef +.long 0xeb5959b2,0xeb5959b2 +.long 0xc947478e,0xc947478e +.long 0x0bf0f0fb,0x0bf0f0fb +.long 0xecadad41,0xecadad41 +.long 0x67d4d4b3,0x67d4d4b3 +.long 0xfda2a25f,0xfda2a25f +.long 0xeaafaf45,0xeaafaf45 +.long 0xbf9c9c23,0xbf9c9c23 +.long 0xf7a4a453,0xf7a4a453 +.long 0x967272e4,0x967272e4 +.long 0x5bc0c09b,0x5bc0c09b +.long 0xc2b7b775,0xc2b7b775 +.long 0x1cfdfde1,0x1cfdfde1 +.long 0xae93933d,0xae93933d +.long 0x6a26264c,0x6a26264c +.long 0x5a36366c,0x5a36366c +.long 0x413f3f7e,0x413f3f7e +.long 0x02f7f7f5,0x02f7f7f5 +.long 0x4fcccc83,0x4fcccc83 +.long 0x5c343468,0x5c343468 +.long 0xf4a5a551,0xf4a5a551 +.long 0x34e5e5d1,0x34e5e5d1 +.long 0x08f1f1f9,0x08f1f1f9 +.long 0x937171e2,0x937171e2 +.long 0x73d8d8ab,0x73d8d8ab +.long 0x53313162,0x53313162 +.long 0x3f15152a,0x3f15152a +.long 0x0c040408,0x0c040408 +.long 0x52c7c795,0x52c7c795 +.long 0x65232346,0x65232346 +.long 0x5ec3c39d,0x5ec3c39d +.long 0x28181830,0x28181830 +.long 0xa1969637,0xa1969637 +.long 0x0f05050a,0x0f05050a +.long 0xb59a9a2f,0xb59a9a2f +.long 0x0907070e,0x0907070e +.long 0x36121224,0x36121224 +.long 0x9b80801b,0x9b80801b +.long 0x3de2e2df,0x3de2e2df +.long 0x26ebebcd,0x26ebebcd +.long 0x6927274e,0x6927274e +.long 0xcdb2b27f,0xcdb2b27f +.long 0x9f7575ea,0x9f7575ea +.long 0x1b090912,0x1b090912 +.long 0x9e83831d,0x9e83831d +.long 0x742c2c58,0x742c2c58 +.long 0x2e1a1a34,0x2e1a1a34 +.long 0x2d1b1b36,0x2d1b1b36 +.long 0xb26e6edc,0xb26e6edc +.long 0xee5a5ab4,0xee5a5ab4 +.long 0xfba0a05b,0xfba0a05b +.long 0xf65252a4,0xf65252a4 +.long 0x4d3b3b76,0x4d3b3b76 +.long 0x61d6d6b7,0x61d6d6b7 +.long 0xceb3b37d,0xceb3b37d +.long 0x7b292952,0x7b292952 +.long 0x3ee3e3dd,0x3ee3e3dd +.long 0x712f2f5e,0x712f2f5e +.long 0x97848413,0x97848413 +.long 0xf55353a6,0xf55353a6 +.long 0x68d1d1b9,0x68d1d1b9 +.long 0x00000000,0x00000000 +.long 0x2cededc1,0x2cededc1 +.long 0x60202040,0x60202040 +.long 0x1ffcfce3,0x1ffcfce3 +.long 0xc8b1b179,0xc8b1b179 +.long 0xed5b5bb6,0xed5b5bb6 +.long 0xbe6a6ad4,0xbe6a6ad4 +.long 0x46cbcb8d,0x46cbcb8d +.long 0xd9bebe67,0xd9bebe67 +.long 0x4b393972,0x4b393972 +.long 0xde4a4a94,0xde4a4a94 +.long 0xd44c4c98,0xd44c4c98 +.long 0xe85858b0,0xe85858b0 +.long 0x4acfcf85,0x4acfcf85 +.long 0x6bd0d0bb,0x6bd0d0bb +.long 0x2aefefc5,0x2aefefc5 +.long 0xe5aaaa4f,0xe5aaaa4f +.long 0x16fbfbed,0x16fbfbed +.long 0xc5434386,0xc5434386 +.long 0xd74d4d9a,0xd74d4d9a +.long 0x55333366,0x55333366 +.long 0x94858511,0x94858511 +.long 0xcf45458a,0xcf45458a +.long 0x10f9f9e9,0x10f9f9e9 +.long 0x06020204,0x06020204 +.long 0x817f7ffe,0x817f7ffe +.long 0xf05050a0,0xf05050a0 +.long 0x443c3c78,0x443c3c78 +.long 0xba9f9f25,0xba9f9f25 +.long 0xe3a8a84b,0xe3a8a84b +.long 0xf35151a2,0xf35151a2 +.long 0xfea3a35d,0xfea3a35d +.long 0xc0404080,0xc0404080 +.long 0x8a8f8f05,0x8a8f8f05 +.long 0xad92923f,0xad92923f +.long 0xbc9d9d21,0xbc9d9d21 +.long 0x48383870,0x48383870 +.long 0x04f5f5f1,0x04f5f5f1 +.long 0xdfbcbc63,0xdfbcbc63 +.long 0xc1b6b677,0xc1b6b677 +.long 0x75dadaaf,0x75dadaaf +.long 0x63212142,0x63212142 +.long 0x30101020,0x30101020 +.long 0x1affffe5,0x1affffe5 +.long 0x0ef3f3fd,0x0ef3f3fd +.long 0x6dd2d2bf,0x6dd2d2bf +.long 0x4ccdcd81,0x4ccdcd81 +.long 0x140c0c18,0x140c0c18 +.long 0x35131326,0x35131326 +.long 0x2fececc3,0x2fececc3 +.long 0xe15f5fbe,0xe15f5fbe +.long 0xa2979735,0xa2979735 +.long 0xcc444488,0xcc444488 +.long 0x3917172e,0x3917172e +.long 0x57c4c493,0x57c4c493 +.long 0xf2a7a755,0xf2a7a755 +.long 0x827e7efc,0x827e7efc +.long 0x473d3d7a,0x473d3d7a +.long 0xac6464c8,0xac6464c8 +.long 0xe75d5dba,0xe75d5dba +.long 0x2b191932,0x2b191932 +.long 0x957373e6,0x957373e6 +.long 0xa06060c0,0xa06060c0 +.long 0x98818119,0x98818119 +.long 0xd14f4f9e,0xd14f4f9e +.long 0x7fdcdca3,0x7fdcdca3 +.long 0x66222244,0x66222244 +.long 0x7e2a2a54,0x7e2a2a54 +.long 0xab90903b,0xab90903b +.long 0x8388880b,0x8388880b +.long 0xca46468c,0xca46468c +.long 0x29eeeec7,0x29eeeec7 +.long 0xd3b8b86b,0xd3b8b86b +.long 0x3c141428,0x3c141428 +.long 0x79dedea7,0x79dedea7 +.long 0xe25e5ebc,0xe25e5ebc +.long 0x1d0b0b16,0x1d0b0b16 +.long 0x76dbdbad,0x76dbdbad +.long 0x3be0e0db,0x3be0e0db +.long 0x56323264,0x56323264 +.long 0x4e3a3a74,0x4e3a3a74 +.long 0x1e0a0a14,0x1e0a0a14 +.long 0xdb494992,0xdb494992 +.long 0x0a06060c,0x0a06060c +.long 0x6c242448,0x6c242448 +.long 0xe45c5cb8,0xe45c5cb8 +.long 0x5dc2c29f,0x5dc2c29f +.long 0x6ed3d3bd,0x6ed3d3bd +.long 0xefacac43,0xefacac43 +.long 0xa66262c4,0xa66262c4 +.long 0xa8919139,0xa8919139 +.long 0xa4959531,0xa4959531 +.long 0x37e4e4d3,0x37e4e4d3 +.long 0x8b7979f2,0x8b7979f2 +.long 0x32e7e7d5,0x32e7e7d5 +.long 0x43c8c88b,0x43c8c88b +.long 0x5937376e,0x5937376e +.long 0xb76d6dda,0xb76d6dda +.long 0x8c8d8d01,0x8c8d8d01 +.long 0x64d5d5b1,0x64d5d5b1 +.long 0xd24e4e9c,0xd24e4e9c +.long 0xe0a9a949,0xe0a9a949 +.long 0xb46c6cd8,0xb46c6cd8 +.long 0xfa5656ac,0xfa5656ac +.long 0x07f4f4f3,0x07f4f4f3 +.long 0x25eaeacf,0x25eaeacf +.long 0xaf6565ca,0xaf6565ca +.long 0x8e7a7af4,0x8e7a7af4 +.long 0xe9aeae47,0xe9aeae47 +.long 0x18080810,0x18080810 +.long 0xd5baba6f,0xd5baba6f +.long 0x887878f0,0x887878f0 +.long 0x6f25254a,0x6f25254a +.long 0x722e2e5c,0x722e2e5c +.long 0x241c1c38,0x241c1c38 +.long 0xf1a6a657,0xf1a6a657 +.long 0xc7b4b473,0xc7b4b473 +.long 0x51c6c697,0x51c6c697 +.long 0x23e8e8cb,0x23e8e8cb +.long 0x7cdddda1,0x7cdddda1 +.long 0x9c7474e8,0x9c7474e8 +.long 0x211f1f3e,0x211f1f3e +.long 0xdd4b4b96,0xdd4b4b96 +.long 0xdcbdbd61,0xdcbdbd61 +.long 0x868b8b0d,0x868b8b0d +.long 0x858a8a0f,0x858a8a0f +.long 0x907070e0,0x907070e0 +.long 0x423e3e7c,0x423e3e7c +.long 0xc4b5b571,0xc4b5b571 +.long 0xaa6666cc,0xaa6666cc +.long 0xd8484890,0xd8484890 +.long 0x05030306,0x05030306 +.long 0x01f6f6f7,0x01f6f6f7 +.long 0x120e0e1c,0x120e0e1c +.long 0xa36161c2,0xa36161c2 +.long 0x5f35356a,0x5f35356a +.long 0xf95757ae,0xf95757ae +.long 0xd0b9b969,0xd0b9b969 +.long 0x91868617,0x91868617 +.long 0x58c1c199,0x58c1c199 +.long 0x271d1d3a,0x271d1d3a +.long 0xb99e9e27,0xb99e9e27 +.long 0x38e1e1d9,0x38e1e1d9 +.long 0x13f8f8eb,0x13f8f8eb +.long 0xb398982b,0xb398982b +.long 0x33111122,0x33111122 +.long 0xbb6969d2,0xbb6969d2 +.long 0x70d9d9a9,0x70d9d9a9 +.long 0x898e8e07,0x898e8e07 +.long 0xa7949433,0xa7949433 +.long 0xb69b9b2d,0xb69b9b2d +.long 0x221e1e3c,0x221e1e3c +.long 0x92878715,0x92878715 +.long 0x20e9e9c9,0x20e9e9c9 +.long 0x49cece87,0x49cece87 +.long 0xff5555aa,0xff5555aa +.long 0x78282850,0x78282850 +.long 0x7adfdfa5,0x7adfdfa5 +.long 0x8f8c8c03,0x8f8c8c03 +.long 0xf8a1a159,0xf8a1a159 +.long 0x80898909,0x80898909 +.long 0x170d0d1a,0x170d0d1a +.long 0xdabfbf65,0xdabfbf65 +.long 0x31e6e6d7,0x31e6e6d7 +.long 0xc6424284,0xc6424284 +.long 0xb86868d0,0xb86868d0 +.long 0xc3414182,0xc3414182 +.long 0xb0999929,0xb0999929 +.long 0x772d2d5a,0x772d2d5a +.long 0x110f0f1e,0x110f0f1e +.long 0xcbb0b07b,0xcbb0b07b +.long 0xfc5454a8,0xfc5454a8 +.long 0xd6bbbb6d,0xd6bbbb6d +.long 0x3a16162c,0x3a16162c +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.long 0x00000001, 0x00000002, 0x00000004, 0x00000008 +.long 0x00000010, 0x00000020, 0x00000040, 0x00000080 +.long 0x0000001b, 0x00000036, 0x80808080, 0x80808080 +.long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b +.p2align 6 +L$AES_Td: +.long 0x50a7f451,0x50a7f451 +.long 0x5365417e,0x5365417e +.long 0xc3a4171a,0xc3a4171a +.long 0x965e273a,0x965e273a +.long 0xcb6bab3b,0xcb6bab3b +.long 0xf1459d1f,0xf1459d1f +.long 0xab58faac,0xab58faac +.long 0x9303e34b,0x9303e34b +.long 0x55fa3020,0x55fa3020 +.long 0xf66d76ad,0xf66d76ad +.long 0x9176cc88,0x9176cc88 +.long 0x254c02f5,0x254c02f5 +.long 0xfcd7e54f,0xfcd7e54f +.long 0xd7cb2ac5,0xd7cb2ac5 +.long 0x80443526,0x80443526 +.long 0x8fa362b5,0x8fa362b5 +.long 0x495ab1de,0x495ab1de +.long 0x671bba25,0x671bba25 +.long 0x980eea45,0x980eea45 +.long 0xe1c0fe5d,0xe1c0fe5d +.long 0x02752fc3,0x02752fc3 +.long 0x12f04c81,0x12f04c81 +.long 0xa397468d,0xa397468d +.long 0xc6f9d36b,0xc6f9d36b +.long 0xe75f8f03,0xe75f8f03 +.long 0x959c9215,0x959c9215 +.long 0xeb7a6dbf,0xeb7a6dbf +.long 0xda595295,0xda595295 +.long 0x2d83bed4,0x2d83bed4 +.long 0xd3217458,0xd3217458 +.long 0x2969e049,0x2969e049 +.long 0x44c8c98e,0x44c8c98e +.long 0x6a89c275,0x6a89c275 +.long 0x78798ef4,0x78798ef4 +.long 0x6b3e5899,0x6b3e5899 +.long 0xdd71b927,0xdd71b927 +.long 0xb64fe1be,0xb64fe1be +.long 0x17ad88f0,0x17ad88f0 +.long 0x66ac20c9,0x66ac20c9 +.long 0xb43ace7d,0xb43ace7d +.long 0x184adf63,0x184adf63 +.long 0x82311ae5,0x82311ae5 +.long 0x60335197,0x60335197 +.long 0x457f5362,0x457f5362 +.long 0xe07764b1,0xe07764b1 +.long 0x84ae6bbb,0x84ae6bbb +.long 0x1ca081fe,0x1ca081fe +.long 0x942b08f9,0x942b08f9 +.long 0x58684870,0x58684870 +.long 0x19fd458f,0x19fd458f +.long 0x876cde94,0x876cde94 +.long 0xb7f87b52,0xb7f87b52 +.long 0x23d373ab,0x23d373ab +.long 0xe2024b72,0xe2024b72 +.long 0x578f1fe3,0x578f1fe3 +.long 0x2aab5566,0x2aab5566 +.long 0x0728ebb2,0x0728ebb2 +.long 0x03c2b52f,0x03c2b52f +.long 0x9a7bc586,0x9a7bc586 +.long 0xa50837d3,0xa50837d3 +.long 0xf2872830,0xf2872830 +.long 0xb2a5bf23,0xb2a5bf23 +.long 0xba6a0302,0xba6a0302 +.long 0x5c8216ed,0x5c8216ed +.long 0x2b1ccf8a,0x2b1ccf8a +.long 0x92b479a7,0x92b479a7 +.long 0xf0f207f3,0xf0f207f3 +.long 0xa1e2694e,0xa1e2694e +.long 0xcdf4da65,0xcdf4da65 +.long 0xd5be0506,0xd5be0506 +.long 0x1f6234d1,0x1f6234d1 +.long 0x8afea6c4,0x8afea6c4 +.long 0x9d532e34,0x9d532e34 +.long 0xa055f3a2,0xa055f3a2 +.long 0x32e18a05,0x32e18a05 +.long 0x75ebf6a4,0x75ebf6a4 +.long 0x39ec830b,0x39ec830b +.long 0xaaef6040,0xaaef6040 +.long 0x069f715e,0x069f715e +.long 0x51106ebd,0x51106ebd +.long 0xf98a213e,0xf98a213e +.long 0x3d06dd96,0x3d06dd96 +.long 0xae053edd,0xae053edd +.long 0x46bde64d,0x46bde64d +.long 0xb58d5491,0xb58d5491 +.long 0x055dc471,0x055dc471 +.long 0x6fd40604,0x6fd40604 +.long 0xff155060,0xff155060 +.long 0x24fb9819,0x24fb9819 +.long 0x97e9bdd6,0x97e9bdd6 +.long 0xcc434089,0xcc434089 +.long 0x779ed967,0x779ed967 +.long 0xbd42e8b0,0xbd42e8b0 +.long 0x888b8907,0x888b8907 +.long 0x385b19e7,0x385b19e7 +.long 0xdbeec879,0xdbeec879 +.long 0x470a7ca1,0x470a7ca1 +.long 0xe90f427c,0xe90f427c +.long 0xc91e84f8,0xc91e84f8 +.long 0x00000000,0x00000000 +.long 0x83868009,0x83868009 +.long 0x48ed2b32,0x48ed2b32 +.long 0xac70111e,0xac70111e +.long 0x4e725a6c,0x4e725a6c +.long 0xfbff0efd,0xfbff0efd +.long 0x5638850f,0x5638850f +.long 0x1ed5ae3d,0x1ed5ae3d +.long 0x27392d36,0x27392d36 +.long 0x64d90f0a,0x64d90f0a +.long 0x21a65c68,0x21a65c68 +.long 0xd1545b9b,0xd1545b9b +.long 0x3a2e3624,0x3a2e3624 +.long 0xb1670a0c,0xb1670a0c +.long 0x0fe75793,0x0fe75793 +.long 0xd296eeb4,0xd296eeb4 +.long 0x9e919b1b,0x9e919b1b +.long 0x4fc5c080,0x4fc5c080 +.long 0xa220dc61,0xa220dc61 +.long 0x694b775a,0x694b775a +.long 0x161a121c,0x161a121c +.long 0x0aba93e2,0x0aba93e2 +.long 0xe52aa0c0,0xe52aa0c0 +.long 0x43e0223c,0x43e0223c +.long 0x1d171b12,0x1d171b12 +.long 0x0b0d090e,0x0b0d090e +.long 0xadc78bf2,0xadc78bf2 +.long 0xb9a8b62d,0xb9a8b62d +.long 0xc8a91e14,0xc8a91e14 +.long 0x8519f157,0x8519f157 +.long 0x4c0775af,0x4c0775af +.long 0xbbdd99ee,0xbbdd99ee +.long 0xfd607fa3,0xfd607fa3 +.long 0x9f2601f7,0x9f2601f7 +.long 0xbcf5725c,0xbcf5725c +.long 0xc53b6644,0xc53b6644 +.long 0x347efb5b,0x347efb5b +.long 0x7629438b,0x7629438b +.long 0xdcc623cb,0xdcc623cb +.long 0x68fcedb6,0x68fcedb6 +.long 0x63f1e4b8,0x63f1e4b8 +.long 0xcadc31d7,0xcadc31d7 +.long 0x10856342,0x10856342 +.long 0x40229713,0x40229713 +.long 0x2011c684,0x2011c684 +.long 0x7d244a85,0x7d244a85 +.long 0xf83dbbd2,0xf83dbbd2 +.long 0x1132f9ae,0x1132f9ae +.long 0x6da129c7,0x6da129c7 +.long 0x4b2f9e1d,0x4b2f9e1d +.long 0xf330b2dc,0xf330b2dc +.long 0xec52860d,0xec52860d +.long 0xd0e3c177,0xd0e3c177 +.long 0x6c16b32b,0x6c16b32b +.long 0x99b970a9,0x99b970a9 +.long 0xfa489411,0xfa489411 +.long 0x2264e947,0x2264e947 +.long 0xc48cfca8,0xc48cfca8 +.long 0x1a3ff0a0,0x1a3ff0a0 +.long 0xd82c7d56,0xd82c7d56 +.long 0xef903322,0xef903322 +.long 0xc74e4987,0xc74e4987 +.long 0xc1d138d9,0xc1d138d9 +.long 0xfea2ca8c,0xfea2ca8c +.long 0x360bd498,0x360bd498 +.long 0xcf81f5a6,0xcf81f5a6 +.long 0x28de7aa5,0x28de7aa5 +.long 0x268eb7da,0x268eb7da +.long 0xa4bfad3f,0xa4bfad3f +.long 0xe49d3a2c,0xe49d3a2c +.long 0x0d927850,0x0d927850 +.long 0x9bcc5f6a,0x9bcc5f6a +.long 0x62467e54,0x62467e54 +.long 0xc2138df6,0xc2138df6 +.long 0xe8b8d890,0xe8b8d890 +.long 0x5ef7392e,0x5ef7392e +.long 0xf5afc382,0xf5afc382 +.long 0xbe805d9f,0xbe805d9f +.long 0x7c93d069,0x7c93d069 +.long 0xa92dd56f,0xa92dd56f +.long 0xb31225cf,0xb31225cf +.long 0x3b99acc8,0x3b99acc8 +.long 0xa77d1810,0xa77d1810 +.long 0x6e639ce8,0x6e639ce8 +.long 0x7bbb3bdb,0x7bbb3bdb +.long 0x097826cd,0x097826cd +.long 0xf418596e,0xf418596e +.long 0x01b79aec,0x01b79aec +.long 0xa89a4f83,0xa89a4f83 +.long 0x656e95e6,0x656e95e6 +.long 0x7ee6ffaa,0x7ee6ffaa +.long 0x08cfbc21,0x08cfbc21 +.long 0xe6e815ef,0xe6e815ef +.long 0xd99be7ba,0xd99be7ba +.long 0xce366f4a,0xce366f4a +.long 0xd4099fea,0xd4099fea +.long 0xd67cb029,0xd67cb029 +.long 0xafb2a431,0xafb2a431 +.long 0x31233f2a,0x31233f2a +.long 0x3094a5c6,0x3094a5c6 +.long 0xc066a235,0xc066a235 +.long 0x37bc4e74,0x37bc4e74 +.long 0xa6ca82fc,0xa6ca82fc +.long 0xb0d090e0,0xb0d090e0 +.long 0x15d8a733,0x15d8a733 +.long 0x4a9804f1,0x4a9804f1 +.long 0xf7daec41,0xf7daec41 +.long 0x0e50cd7f,0x0e50cd7f +.long 0x2ff69117,0x2ff69117 +.long 0x8dd64d76,0x8dd64d76 +.long 0x4db0ef43,0x4db0ef43 +.long 0x544daacc,0x544daacc +.long 0xdf0496e4,0xdf0496e4 +.long 0xe3b5d19e,0xe3b5d19e +.long 0x1b886a4c,0x1b886a4c +.long 0xb81f2cc1,0xb81f2cc1 +.long 0x7f516546,0x7f516546 +.long 0x04ea5e9d,0x04ea5e9d +.long 0x5d358c01,0x5d358c01 +.long 0x737487fa,0x737487fa +.long 0x2e410bfb,0x2e410bfb +.long 0x5a1d67b3,0x5a1d67b3 +.long 0x52d2db92,0x52d2db92 +.long 0x335610e9,0x335610e9 +.long 0x1347d66d,0x1347d66d +.long 0x8c61d79a,0x8c61d79a +.long 0x7a0ca137,0x7a0ca137 +.long 0x8e14f859,0x8e14f859 +.long 0x893c13eb,0x893c13eb +.long 0xee27a9ce,0xee27a9ce +.long 0x35c961b7,0x35c961b7 +.long 0xede51ce1,0xede51ce1 +.long 0x3cb1477a,0x3cb1477a +.long 0x59dfd29c,0x59dfd29c +.long 0x3f73f255,0x3f73f255 +.long 0x79ce1418,0x79ce1418 +.long 0xbf37c773,0xbf37c773 +.long 0xeacdf753,0xeacdf753 +.long 0x5baafd5f,0x5baafd5f +.long 0x146f3ddf,0x146f3ddf +.long 0x86db4478,0x86db4478 +.long 0x81f3afca,0x81f3afca +.long 0x3ec468b9,0x3ec468b9 +.long 0x2c342438,0x2c342438 +.long 0x5f40a3c2,0x5f40a3c2 +.long 0x72c31d16,0x72c31d16 +.long 0x0c25e2bc,0x0c25e2bc +.long 0x8b493c28,0x8b493c28 +.long 0x41950dff,0x41950dff +.long 0x7101a839,0x7101a839 +.long 0xdeb30c08,0xdeb30c08 +.long 0x9ce4b4d8,0x9ce4b4d8 +.long 0x90c15664,0x90c15664 +.long 0x6184cb7b,0x6184cb7b +.long 0x70b632d5,0x70b632d5 +.long 0x745c6c48,0x745c6c48 +.long 0x4257b8d0,0x4257b8d0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 diff --git a/ext/libressl/crypto/aes/aes-masm-x86_64.S b/ext/libressl/crypto/aes/aes-masm-x86_64.S new file mode 100644 index 0000000..9094c72 --- /dev/null +++ b/ext/libressl/crypto/aes/aes-masm-x86_64.S @@ -0,0 +1,2948 @@ +; 1 "crypto/aes/aes-masm-x86_64.S.tmp" +; 1 "" 1 +; 1 "" 3 +; 340 "" 3 +; 1 "" 1 +; 1 "" 2 +; 1 "crypto/aes/aes-masm-x86_64.S.tmp" 2 +OPTION DOTNAME + +; 1 "./crypto/x86_arch.h" 1 + + +; 16 "./crypto/x86_arch.h" + + + + + + + + + +; 40 "./crypto/x86_arch.h" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +; 3 "crypto/aes/aes-masm-x86_64.S.tmp" 2 + +.text$ SEGMENT ALIGN(64) 'CODE' + +ALIGN 16 +_x86_64_AES_encrypt PROC PRIVATE + xor eax,DWORD PTR[r15] + xor ebx,DWORD PTR[4+r15] + xor ecx,DWORD PTR[8+r15] + xor edx,DWORD PTR[12+r15] + + mov r13d,DWORD PTR[240+r15] + sub r13d,1 + jmp $L$enc_loop +ALIGN 16 +$L$enc_loop:: + + movzx esi,al + movzx edi,bl + movzx ebp,cl + mov r10d,DWORD PTR[rsi*8+r14] + mov r11d,DWORD PTR[rdi*8+r14] + mov r12d,DWORD PTR[rbp*8+r14] + + movzx esi,bh + movzx edi,ch + movzx ebp,dl + xor r10d,DWORD PTR[3+rsi*8+r14] + xor r11d,DWORD PTR[3+rdi*8+r14] + mov r8d,DWORD PTR[rbp*8+r14] + + movzx esi,dh + shr ecx,16 + movzx ebp,ah + xor r12d,DWORD PTR[3+rsi*8+r14] + shr edx,16 + xor r8d,DWORD PTR[3+rbp*8+r14] + + shr ebx,16 + lea r15,QWORD PTR[16+r15] + shr eax,16 + + movzx esi,cl + movzx edi,dl + movzx ebp,al + xor r10d,DWORD PTR[2+rsi*8+r14] + xor r11d,DWORD PTR[2+rdi*8+r14] + xor r12d,DWORD PTR[2+rbp*8+r14] + + movzx esi,dh + movzx edi,ah + movzx ebp,bl + xor r10d,DWORD PTR[1+rsi*8+r14] + xor r11d,DWORD PTR[1+rdi*8+r14] + xor r8d,DWORD PTR[2+rbp*8+r14] + + mov edx,DWORD PTR[12+r15] + movzx edi,bh + movzx ebp,ch + mov eax,DWORD PTR[r15] + xor r12d,DWORD PTR[1+rdi*8+r14] + xor r8d,DWORD PTR[1+rbp*8+r14] + + mov ebx,DWORD PTR[4+r15] + mov ecx,DWORD PTR[8+r15] + xor eax,r10d + xor ebx,r11d + xor ecx,r12d + xor edx,r8d + sub r13d,1 + jnz $L$enc_loop + movzx esi,al + movzx edi,bl + movzx ebp,cl + movzx r10d,BYTE PTR[2+rsi*8+r14] + movzx r11d,BYTE PTR[2+rdi*8+r14] + movzx r12d,BYTE PTR[2+rbp*8+r14] + + movzx esi,dl + movzx edi,bh + movzx ebp,ch + movzx r8d,BYTE PTR[2+rsi*8+r14] + mov edi,DWORD PTR[rdi*8+r14] + mov ebp,DWORD PTR[rbp*8+r14] + + and edi,00000ff00h + and ebp,00000ff00h + + xor r10d,edi + xor r11d,ebp + shr ecx,16 + + movzx esi,dh + movzx edi,ah + shr edx,16 + mov esi,DWORD PTR[rsi*8+r14] + mov edi,DWORD PTR[rdi*8+r14] + + and esi,00000ff00h + and edi,00000ff00h + shr ebx,16 + xor r12d,esi + xor r8d,edi + shr eax,16 + + movzx esi,cl + movzx edi,dl + movzx ebp,al + mov esi,DWORD PTR[rsi*8+r14] + mov edi,DWORD PTR[rdi*8+r14] + mov ebp,DWORD PTR[rbp*8+r14] + + and esi,000ff0000h + and edi,000ff0000h + and ebp,000ff0000h + + xor r10d,esi + xor r11d,edi + xor r12d,ebp + + movzx esi,bl + movzx edi,dh + movzx ebp,ah + mov esi,DWORD PTR[rsi*8+r14] + mov edi,DWORD PTR[2+rdi*8+r14] + mov ebp,DWORD PTR[2+rbp*8+r14] + + and esi,000ff0000h + and edi,0ff000000h + and ebp,0ff000000h + + xor r8d,esi + xor r10d,edi + xor r11d,ebp + + movzx esi,bh + movzx edi,ch + mov edx,DWORD PTR[((16+12))+r15] + mov esi,DWORD PTR[2+rsi*8+r14] + mov edi,DWORD PTR[2+rdi*8+r14] + mov eax,DWORD PTR[((16+0))+r15] + + and esi,0ff000000h + and edi,0ff000000h + + xor r12d,esi + xor r8d,edi + + mov ebx,DWORD PTR[((16+4))+r15] + mov ecx,DWORD PTR[((16+8))+r15] + xor eax,r10d + xor ebx,r11d + xor ecx,r12d + xor edx,r8d + DB 0F3h,0C3h ;repret +_x86_64_AES_encrypt ENDP + +ALIGN 16 +_x86_64_AES_encrypt_compact PROC PRIVATE + lea r8,QWORD PTR[128+r14] + mov edi,DWORD PTR[((0-128))+r8] + mov ebp,DWORD PTR[((32-128))+r8] + mov r10d,DWORD PTR[((64-128))+r8] + mov r11d,DWORD PTR[((96-128))+r8] + mov edi,DWORD PTR[((128-128))+r8] + mov ebp,DWORD PTR[((160-128))+r8] + mov r10d,DWORD PTR[((192-128))+r8] + mov r11d,DWORD PTR[((224-128))+r8] + jmp $L$enc_loop_compact +ALIGN 16 +$L$enc_loop_compact:: + xor eax,DWORD PTR[r15] + xor ebx,DWORD PTR[4+r15] + xor ecx,DWORD PTR[8+r15] + xor edx,DWORD PTR[12+r15] + lea r15,QWORD PTR[16+r15] + movzx r10d,al + movzx r11d,bl + movzx r12d,cl + movzx r10d,BYTE PTR[r10*1+r14] + movzx r11d,BYTE PTR[r11*1+r14] + movzx r12d,BYTE PTR[r12*1+r14] + + movzx r8d,dl + movzx esi,bh + movzx edi,ch + movzx r8d,BYTE PTR[r8*1+r14] + movzx r9d,BYTE PTR[rsi*1+r14] + movzx r13d,BYTE PTR[rdi*1+r14] + + movzx ebp,dh + movzx esi,ah + shr ecx,16 + movzx ebp,BYTE PTR[rbp*1+r14] + movzx esi,BYTE PTR[rsi*1+r14] + shr edx,16 + + movzx edi,cl + shl r9d,8 + shl r13d,8 + movzx edi,BYTE PTR[rdi*1+r14] + xor r10d,r9d + xor r11d,r13d + + movzx r9d,dl + shr eax,16 + shr ebx,16 + movzx r13d,al + shl ebp,8 + shl esi,8 + movzx r9d,BYTE PTR[r9*1+r14] + movzx r13d,BYTE PTR[r13*1+r14] + xor r12d,ebp + xor r8d,esi + + movzx ebp,bl + movzx esi,dh + shl edi,16 + movzx ebp,BYTE PTR[rbp*1+r14] + movzx esi,BYTE PTR[rsi*1+r14] + xor r10d,edi + + movzx edi,ah + shr ecx,8 + shr ebx,8 + movzx edi,BYTE PTR[rdi*1+r14] + movzx edx,BYTE PTR[rcx*1+r14] + movzx ecx,BYTE PTR[rbx*1+r14] + shl r9d,16 + shl r13d,16 + shl ebp,16 + xor r11d,r9d + xor r12d,r13d + xor r8d,ebp + + shl esi,24 + shl edi,24 + shl edx,24 + xor r10d,esi + shl ecx,24 + xor r11d,edi + mov eax,r10d + mov ebx,r11d + xor ecx,r12d + xor edx,r8d + cmp r15,QWORD PTR[16+rsp] + je $L$enc_compact_done + mov esi,eax + mov edi,ebx + and esi,080808080h + and edi,080808080h + mov r10d,esi + mov r11d,edi + shr r10d,7 + lea r8d,DWORD PTR[rax*1+rax] + shr r11d,7 + lea r9d,DWORD PTR[rbx*1+rbx] + sub esi,r10d + sub edi,r11d + and r8d,0fefefefeh + and r9d,0fefefefeh + and esi,01b1b1b1bh + and edi,01b1b1b1bh + mov r10d,eax + mov r11d,ebx + xor r8d,esi + xor r9d,edi + + xor eax,r8d + xor ebx,r9d + mov esi,ecx + mov edi,edx + rol eax,24 + rol ebx,24 + and esi,080808080h + and edi,080808080h + xor eax,r8d + xor ebx,r9d + mov r12d,esi + mov ebp,edi + ror r10d,16 + ror r11d,16 + shr r12d,7 + lea r8d,DWORD PTR[rcx*1+rcx] + xor eax,r10d + xor ebx,r11d + shr ebp,7 + lea r9d,DWORD PTR[rdx*1+rdx] + ror r10d,8 + ror r11d,8 + sub esi,r12d + sub edi,ebp + xor eax,r10d + xor ebx,r11d + + and r8d,0fefefefeh + and r9d,0fefefefeh + and esi,01b1b1b1bh + and edi,01b1b1b1bh + mov r12d,ecx + mov ebp,edx + xor r8d,esi + xor r9d,edi + + xor ecx,r8d + xor edx,r9d + rol ecx,24 + rol edx,24 + xor ecx,r8d + xor edx,r9d + mov esi,DWORD PTR[r14] + ror r12d,16 + ror ebp,16 + mov edi,DWORD PTR[64+r14] + xor ecx,r12d + xor edx,ebp + mov r8d,DWORD PTR[128+r14] + ror r12d,8 + ror ebp,8 + mov r9d,DWORD PTR[192+r14] + xor ecx,r12d + xor edx,ebp + jmp $L$enc_loop_compact +ALIGN 16 +$L$enc_compact_done:: + xor eax,DWORD PTR[r15] + xor ebx,DWORD PTR[4+r15] + xor ecx,DWORD PTR[8+r15] + xor edx,DWORD PTR[12+r15] + DB 0F3h,0C3h ;repret +_x86_64_AES_encrypt_compact ENDP +PUBLIC AES_encrypt + +ALIGN 16 +PUBLIC asm_AES_encrypt + +asm_AES_encrypt:: +AES_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_AES_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + + mov r10,rsp + lea rcx,QWORD PTR[((-63))+rdx] + and rsp,-64 + sub rcx,rsp + neg rcx + and rcx,03c0h + sub rsp,rcx + sub rsp,32 + + mov QWORD PTR[16+rsp],rsi + mov QWORD PTR[24+rsp],r10 +$L$enc_prologue:: + + mov r15,rdx + mov r13d,DWORD PTR[240+r15] + + mov eax,DWORD PTR[rdi] + mov ebx,DWORD PTR[4+rdi] + mov ecx,DWORD PTR[8+rdi] + mov edx,DWORD PTR[12+rdi] + + shl r13d,4 + lea rbp,QWORD PTR[r13*1+r15] + mov QWORD PTR[rsp],r15 + mov QWORD PTR[8+rsp],rbp + + + lea r14,QWORD PTR[(($L$AES_Te+2048))] + lea rbp,QWORD PTR[768+rsp] + sub rbp,r14 + and rbp,0300h + lea r14,QWORD PTR[rbp*1+r14] + + call _x86_64_AES_encrypt_compact + + mov r9,QWORD PTR[16+rsp] + mov rsi,QWORD PTR[24+rsp] + mov DWORD PTR[r9],eax + mov DWORD PTR[4+r9],ebx + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$enc_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_AES_encrypt:: +AES_encrypt ENDP + +ALIGN 16 +_x86_64_AES_decrypt PROC PRIVATE + xor eax,DWORD PTR[r15] + xor ebx,DWORD PTR[4+r15] + xor ecx,DWORD PTR[8+r15] + xor edx,DWORD PTR[12+r15] + + mov r13d,DWORD PTR[240+r15] + sub r13d,1 + jmp $L$dec_loop +ALIGN 16 +$L$dec_loop:: + + movzx esi,al + movzx edi,bl + movzx ebp,cl + mov r10d,DWORD PTR[rsi*8+r14] + mov r11d,DWORD PTR[rdi*8+r14] + mov r12d,DWORD PTR[rbp*8+r14] + + movzx esi,dh + movzx edi,ah + movzx ebp,dl + xor r10d,DWORD PTR[3+rsi*8+r14] + xor r11d,DWORD PTR[3+rdi*8+r14] + mov r8d,DWORD PTR[rbp*8+r14] + + movzx esi,bh + shr eax,16 + movzx ebp,ch + xor r12d,DWORD PTR[3+rsi*8+r14] + shr edx,16 + xor r8d,DWORD PTR[3+rbp*8+r14] + + shr ebx,16 + lea r15,QWORD PTR[16+r15] + shr ecx,16 + + movzx esi,cl + movzx edi,dl + movzx ebp,al + xor r10d,DWORD PTR[2+rsi*8+r14] + xor r11d,DWORD PTR[2+rdi*8+r14] + xor r12d,DWORD PTR[2+rbp*8+r14] + + movzx esi,bh + movzx edi,ch + movzx ebp,bl + xor r10d,DWORD PTR[1+rsi*8+r14] + xor r11d,DWORD PTR[1+rdi*8+r14] + xor r8d,DWORD PTR[2+rbp*8+r14] + + movzx esi,dh + mov edx,DWORD PTR[12+r15] + movzx ebp,ah + xor r12d,DWORD PTR[1+rsi*8+r14] + mov eax,DWORD PTR[r15] + xor r8d,DWORD PTR[1+rbp*8+r14] + + xor eax,r10d + mov ebx,DWORD PTR[4+r15] + mov ecx,DWORD PTR[8+r15] + xor ecx,r12d + xor ebx,r11d + xor edx,r8d + sub r13d,1 + jnz $L$dec_loop + lea r14,QWORD PTR[2048+r14] + movzx esi,al + movzx edi,bl + movzx ebp,cl + movzx r10d,BYTE PTR[rsi*1+r14] + movzx r11d,BYTE PTR[rdi*1+r14] + movzx r12d,BYTE PTR[rbp*1+r14] + + movzx esi,dl + movzx edi,dh + movzx ebp,ah + movzx r8d,BYTE PTR[rsi*1+r14] + movzx edi,BYTE PTR[rdi*1+r14] + movzx ebp,BYTE PTR[rbp*1+r14] + + shl edi,8 + shl ebp,8 + + xor r10d,edi + xor r11d,ebp + shr edx,16 + + movzx esi,bh + movzx edi,ch + shr eax,16 + movzx esi,BYTE PTR[rsi*1+r14] + movzx edi,BYTE PTR[rdi*1+r14] + + shl esi,8 + shl edi,8 + shr ebx,16 + xor r12d,esi + xor r8d,edi + shr ecx,16 + + movzx esi,cl + movzx edi,dl + movzx ebp,al + movzx esi,BYTE PTR[rsi*1+r14] + movzx edi,BYTE PTR[rdi*1+r14] + movzx ebp,BYTE PTR[rbp*1+r14] + + shl esi,16 + shl edi,16 + shl ebp,16 + + xor r10d,esi + xor r11d,edi + xor r12d,ebp + + movzx esi,bl + movzx edi,bh + movzx ebp,ch + movzx esi,BYTE PTR[rsi*1+r14] + movzx edi,BYTE PTR[rdi*1+r14] + movzx ebp,BYTE PTR[rbp*1+r14] + + shl esi,16 + shl edi,24 + shl ebp,24 + + xor r8d,esi + xor r10d,edi + xor r11d,ebp + + movzx esi,dh + movzx edi,ah + mov edx,DWORD PTR[((16+12))+r15] + movzx esi,BYTE PTR[rsi*1+r14] + movzx edi,BYTE PTR[rdi*1+r14] + mov eax,DWORD PTR[((16+0))+r15] + + shl esi,24 + shl edi,24 + + xor r12d,esi + xor r8d,edi + + mov ebx,DWORD PTR[((16+4))+r15] + mov ecx,DWORD PTR[((16+8))+r15] + lea r14,QWORD PTR[((-2048))+r14] + xor eax,r10d + xor ebx,r11d + xor ecx,r12d + xor edx,r8d + DB 0F3h,0C3h ;repret +_x86_64_AES_decrypt ENDP + +ALIGN 16 +_x86_64_AES_decrypt_compact PROC PRIVATE + lea r8,QWORD PTR[128+r14] + mov edi,DWORD PTR[((0-128))+r8] + mov ebp,DWORD PTR[((32-128))+r8] + mov r10d,DWORD PTR[((64-128))+r8] + mov r11d,DWORD PTR[((96-128))+r8] + mov edi,DWORD PTR[((128-128))+r8] + mov ebp,DWORD PTR[((160-128))+r8] + mov r10d,DWORD PTR[((192-128))+r8] + mov r11d,DWORD PTR[((224-128))+r8] + jmp $L$dec_loop_compact + +ALIGN 16 +$L$dec_loop_compact:: + xor eax,DWORD PTR[r15] + xor ebx,DWORD PTR[4+r15] + xor ecx,DWORD PTR[8+r15] + xor edx,DWORD PTR[12+r15] + lea r15,QWORD PTR[16+r15] + movzx r10d,al + movzx r11d,bl + movzx r12d,cl + movzx r10d,BYTE PTR[r10*1+r14] + movzx r11d,BYTE PTR[r11*1+r14] + movzx r12d,BYTE PTR[r12*1+r14] + + movzx r8d,dl + movzx esi,dh + movzx edi,ah + movzx r8d,BYTE PTR[r8*1+r14] + movzx r9d,BYTE PTR[rsi*1+r14] + movzx r13d,BYTE PTR[rdi*1+r14] + + movzx ebp,bh + movzx esi,ch + shr ecx,16 + movzx ebp,BYTE PTR[rbp*1+r14] + movzx esi,BYTE PTR[rsi*1+r14] + shr edx,16 + + movzx edi,cl + shl r9d,8 + shl r13d,8 + movzx edi,BYTE PTR[rdi*1+r14] + xor r10d,r9d + xor r11d,r13d + + movzx r9d,dl + shr eax,16 + shr ebx,16 + movzx r13d,al + shl ebp,8 + shl esi,8 + movzx r9d,BYTE PTR[r9*1+r14] + movzx r13d,BYTE PTR[r13*1+r14] + xor r12d,ebp + xor r8d,esi + + movzx ebp,bl + movzx esi,bh + shl edi,16 + movzx ebp,BYTE PTR[rbp*1+r14] + movzx esi,BYTE PTR[rsi*1+r14] + xor r10d,edi + + movzx edi,ch + shl r9d,16 + shl r13d,16 + movzx ebx,BYTE PTR[rdi*1+r14] + xor r11d,r9d + xor r12d,r13d + + movzx edi,dh + shr eax,8 + shl ebp,16 + movzx ecx,BYTE PTR[rdi*1+r14] + movzx edx,BYTE PTR[rax*1+r14] + xor r8d,ebp + + shl esi,24 + shl ebx,24 + shl ecx,24 + xor r10d,esi + shl edx,24 + xor ebx,r11d + mov eax,r10d + xor ecx,r12d + xor edx,r8d + cmp r15,QWORD PTR[16+rsp] + je $L$dec_compact_done + + mov rsi,QWORD PTR[((256+0))+r14] + shl rbx,32 + shl rdx,32 + mov rdi,QWORD PTR[((256+8))+r14] + or rax,rbx + or rcx,rdx + mov rbp,QWORD PTR[((256+16))+r14] + mov rbx,rax + mov rdx,rcx + and rbx,rsi + and rdx,rsi + mov r9,rbx + mov r12,rdx + shr r9,7 + lea r8,QWORD PTR[rax*1+rax] + shr r12,7 + lea r11,QWORD PTR[rcx*1+rcx] + sub rbx,r9 + sub rdx,r12 + and r8,rdi + and r11,rdi + and rbx,rbp + and rdx,rbp + xor rbx,r8 + xor rdx,r11 + mov r8,rbx + mov r11,rdx + + and rbx,rsi + and rdx,rsi + mov r10,rbx + mov r13,rdx + shr r10,7 + lea r9,QWORD PTR[r8*1+r8] + shr r13,7 + lea r12,QWORD PTR[r11*1+r11] + sub rbx,r10 + sub rdx,r13 + and r9,rdi + and r12,rdi + and rbx,rbp + and rdx,rbp + xor rbx,r9 + xor rdx,r12 + mov r9,rbx + mov r12,rdx + + and rbx,rsi + and rdx,rsi + mov r10,rbx + mov r13,rdx + shr r10,7 + xor r8,rax + shr r13,7 + xor r11,rcx + sub rbx,r10 + sub rdx,r13 + lea r10,QWORD PTR[r9*1+r9] + lea r13,QWORD PTR[r12*1+r12] + xor r9,rax + xor r12,rcx + and r10,rdi + and r13,rdi + and rbx,rbp + and rdx,rbp + xor r10,rbx + xor r13,rdx + + xor rax,r10 + xor rcx,r13 + xor r8,r10 + xor r11,r13 + mov rbx,rax + mov rdx,rcx + xor r9,r10 + xor r12,r13 + shr rbx,32 + shr rdx,32 + xor r10,r8 + xor r13,r11 + rol eax,8 + rol ecx,8 + xor r10,r9 + xor r13,r12 + + rol ebx,8 + rol edx,8 + xor eax,r10d + xor ecx,r13d + shr r10,32 + shr r13,32 + xor ebx,r10d + xor edx,r13d + + mov r10,r8 + mov r13,r11 + shr r10,32 + shr r13,32 + rol r8d,24 + rol r11d,24 + rol r10d,24 + rol r13d,24 + xor eax,r8d + xor ecx,r11d + mov r8,r9 + mov r11,r12 + xor ebx,r10d + xor edx,r13d + + mov rsi,QWORD PTR[r14] + shr r8,32 + shr r11,32 + mov rdi,QWORD PTR[64+r14] + rol r9d,16 + rol r12d,16 + mov rbp,QWORD PTR[128+r14] + rol r8d,16 + rol r11d,16 + mov r10,QWORD PTR[192+r14] + xor eax,r9d + xor ecx,r12d + mov r13,QWORD PTR[256+r14] + xor ebx,r8d + xor edx,r11d + jmp $L$dec_loop_compact +ALIGN 16 +$L$dec_compact_done:: + xor eax,DWORD PTR[r15] + xor ebx,DWORD PTR[4+r15] + xor ecx,DWORD PTR[8+r15] + xor edx,DWORD PTR[12+r15] + DB 0F3h,0C3h ;repret +_x86_64_AES_decrypt_compact ENDP +PUBLIC AES_decrypt + +ALIGN 16 +PUBLIC asm_AES_decrypt + +asm_AES_decrypt:: +AES_decrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_AES_decrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + + mov r10,rsp + lea rcx,QWORD PTR[((-63))+rdx] + and rsp,-64 + sub rcx,rsp + neg rcx + and rcx,03c0h + sub rsp,rcx + sub rsp,32 + + mov QWORD PTR[16+rsp],rsi + mov QWORD PTR[24+rsp],r10 +$L$dec_prologue:: + + mov r15,rdx + mov r13d,DWORD PTR[240+r15] + + mov eax,DWORD PTR[rdi] + mov ebx,DWORD PTR[4+rdi] + mov ecx,DWORD PTR[8+rdi] + mov edx,DWORD PTR[12+rdi] + + shl r13d,4 + lea rbp,QWORD PTR[r13*1+r15] + mov QWORD PTR[rsp],r15 + mov QWORD PTR[8+rsp],rbp + + + lea r14,QWORD PTR[(($L$AES_Td+2048))] + lea rbp,QWORD PTR[768+rsp] + sub rbp,r14 + and rbp,0300h + lea r14,QWORD PTR[rbp*1+r14] + shr rbp,3 + add r14,rbp + + call _x86_64_AES_decrypt_compact + + mov r9,QWORD PTR[16+rsp] + mov rsi,QWORD PTR[24+rsp] + mov DWORD PTR[r9],eax + mov DWORD PTR[4+r9],ebx + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$dec_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_AES_decrypt:: +AES_decrypt ENDP +PUBLIC AES_set_encrypt_key + +ALIGN 16 +AES_set_encrypt_key PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_AES_set_encrypt_key:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + sub rsp,8 +$L$enc_key_prologue:: + + call _x86_64_AES_set_encrypt_key + + mov r15,QWORD PTR[8+rsp] + mov r14,QWORD PTR[16+rsp] + mov r13,QWORD PTR[24+rsp] + mov r12,QWORD PTR[32+rsp] + mov rbp,QWORD PTR[40+rsp] + mov rbx,QWORD PTR[48+rsp] + add rsp,56 +$L$enc_key_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_AES_set_encrypt_key:: +AES_set_encrypt_key ENDP + + +ALIGN 16 +_x86_64_AES_set_encrypt_key PROC PRIVATE + mov ecx,esi + mov rsi,rdi + mov rdi,rdx + + test rsi,-1 + jz $L$badpointer + test rdi,-1 + jz $L$badpointer + + lea rbp,QWORD PTR[$L$AES_Te] + lea rbp,QWORD PTR[((2048+128))+rbp] + + + mov eax,DWORD PTR[((0-128))+rbp] + mov ebx,DWORD PTR[((32-128))+rbp] + mov r8d,DWORD PTR[((64-128))+rbp] + mov edx,DWORD PTR[((96-128))+rbp] + mov eax,DWORD PTR[((128-128))+rbp] + mov ebx,DWORD PTR[((160-128))+rbp] + mov r8d,DWORD PTR[((192-128))+rbp] + mov edx,DWORD PTR[((224-128))+rbp] + + cmp ecx,128 + je $L$10rounds + cmp ecx,192 + je $L$12rounds + cmp ecx,256 + je $L$14rounds + mov rax,-2 + jmp $L$exit + +$L$10rounds:: + mov rax,QWORD PTR[rsi] + mov rdx,QWORD PTR[8+rsi] + mov QWORD PTR[rdi],rax + mov QWORD PTR[8+rdi],rdx + + shr rdx,32 + xor ecx,ecx + jmp $L$10shortcut +ALIGN 4 +$L$10loop:: + mov eax,DWORD PTR[rdi] + mov edx,DWORD PTR[12+rdi] +$L$10shortcut:: + movzx esi,dl + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,24 + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + shr edx,16 + movzx esi,dl + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,8 + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + shl ebx,16 + xor eax,ebx + + xor eax,DWORD PTR[((1024-128))+rcx*4+rbp] + mov DWORD PTR[16+rdi],eax + xor eax,DWORD PTR[4+rdi] + mov DWORD PTR[20+rdi],eax + xor eax,DWORD PTR[8+rdi] + mov DWORD PTR[24+rdi],eax + xor eax,DWORD PTR[12+rdi] + mov DWORD PTR[28+rdi],eax + add ecx,1 + lea rdi,QWORD PTR[16+rdi] + cmp ecx,10 + jl $L$10loop + + mov DWORD PTR[80+rdi],10 + xor rax,rax + jmp $L$exit + +$L$12rounds:: + mov rax,QWORD PTR[rsi] + mov rbx,QWORD PTR[8+rsi] + mov rdx,QWORD PTR[16+rsi] + mov QWORD PTR[rdi],rax + mov QWORD PTR[8+rdi],rbx + mov QWORD PTR[16+rdi],rdx + + shr rdx,32 + xor ecx,ecx + jmp $L$12shortcut +ALIGN 4 +$L$12loop:: + mov eax,DWORD PTR[rdi] + mov edx,DWORD PTR[20+rdi] +$L$12shortcut:: + movzx esi,dl + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,24 + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + shr edx,16 + movzx esi,dl + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,8 + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + shl ebx,16 + xor eax,ebx + + xor eax,DWORD PTR[((1024-128))+rcx*4+rbp] + mov DWORD PTR[24+rdi],eax + xor eax,DWORD PTR[4+rdi] + mov DWORD PTR[28+rdi],eax + xor eax,DWORD PTR[8+rdi] + mov DWORD PTR[32+rdi],eax + xor eax,DWORD PTR[12+rdi] + mov DWORD PTR[36+rdi],eax + + cmp ecx,7 + je $L$12break + add ecx,1 + + xor eax,DWORD PTR[16+rdi] + mov DWORD PTR[40+rdi],eax + xor eax,DWORD PTR[20+rdi] + mov DWORD PTR[44+rdi],eax + + lea rdi,QWORD PTR[24+rdi] + jmp $L$12loop +$L$12break:: + mov DWORD PTR[72+rdi],12 + xor rax,rax + jmp $L$exit + +$L$14rounds:: + mov rax,QWORD PTR[rsi] + mov rbx,QWORD PTR[8+rsi] + mov rcx,QWORD PTR[16+rsi] + mov rdx,QWORD PTR[24+rsi] + mov QWORD PTR[rdi],rax + mov QWORD PTR[8+rdi],rbx + mov QWORD PTR[16+rdi],rcx + mov QWORD PTR[24+rdi],rdx + + shr rdx,32 + xor ecx,ecx + jmp $L$14shortcut +ALIGN 4 +$L$14loop:: + mov eax,DWORD PTR[rdi] + mov edx,DWORD PTR[28+rdi] +$L$14shortcut:: + movzx esi,dl + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,24 + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + shr edx,16 + movzx esi,dl + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,8 + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + shl ebx,16 + xor eax,ebx + + xor eax,DWORD PTR[((1024-128))+rcx*4+rbp] + mov DWORD PTR[32+rdi],eax + xor eax,DWORD PTR[4+rdi] + mov DWORD PTR[36+rdi],eax + xor eax,DWORD PTR[8+rdi] + mov DWORD PTR[40+rdi],eax + xor eax,DWORD PTR[12+rdi] + mov DWORD PTR[44+rdi],eax + + cmp ecx,6 + je $L$14break + add ecx,1 + + mov edx,eax + mov eax,DWORD PTR[16+rdi] + movzx esi,dl + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + movzx esi,dh + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + shr edx,16 + shl ebx,8 + movzx esi,dl + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,16 + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + shl ebx,24 + xor eax,ebx + + mov DWORD PTR[48+rdi],eax + xor eax,DWORD PTR[20+rdi] + mov DWORD PTR[52+rdi],eax + xor eax,DWORD PTR[24+rdi] + mov DWORD PTR[56+rdi],eax + xor eax,DWORD PTR[28+rdi] + mov DWORD PTR[60+rdi],eax + + lea rdi,QWORD PTR[32+rdi] + jmp $L$14loop +$L$14break:: + mov DWORD PTR[48+rdi],14 + xor rax,rax + jmp $L$exit + +$L$badpointer:: + mov rax,-1 +$L$exit:: + DB 0F3h,0C3h ;repret +_x86_64_AES_set_encrypt_key ENDP +PUBLIC AES_set_decrypt_key + +ALIGN 16 +AES_set_decrypt_key PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_AES_set_decrypt_key:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + push rdx +$L$dec_key_prologue:: + + call _x86_64_AES_set_encrypt_key + mov r8,QWORD PTR[rsp] + cmp eax,0 + jne $L$abort + + mov r14d,DWORD PTR[240+r8] + xor rdi,rdi + lea rcx,QWORD PTR[r14*4+rdi] + mov rsi,r8 + lea rdi,QWORD PTR[rcx*4+r8] +ALIGN 4 +$L$invert:: + mov rax,QWORD PTR[rsi] + mov rbx,QWORD PTR[8+rsi] + mov rcx,QWORD PTR[rdi] + mov rdx,QWORD PTR[8+rdi] + mov QWORD PTR[rdi],rax + mov QWORD PTR[8+rdi],rbx + mov QWORD PTR[rsi],rcx + mov QWORD PTR[8+rsi],rdx + lea rsi,QWORD PTR[16+rsi] + lea rdi,QWORD PTR[((-16))+rdi] + cmp rdi,rsi + jne $L$invert + + lea rax,QWORD PTR[(($L$AES_Te+2048+1024))] + + mov rsi,QWORD PTR[40+rax] + mov rdi,QWORD PTR[48+rax] + mov rbp,QWORD PTR[56+rax] + + mov r15,r8 + sub r14d,1 +ALIGN 4 +$L$permute:: + lea r15,QWORD PTR[16+r15] + mov rax,QWORD PTR[r15] + mov rcx,QWORD PTR[8+r15] + mov rbx,rax + mov rdx,rcx + and rbx,rsi + and rdx,rsi + mov r9,rbx + mov r12,rdx + shr r9,7 + lea r8,QWORD PTR[rax*1+rax] + shr r12,7 + lea r11,QWORD PTR[rcx*1+rcx] + sub rbx,r9 + sub rdx,r12 + and r8,rdi + and r11,rdi + and rbx,rbp + and rdx,rbp + xor rbx,r8 + xor rdx,r11 + mov r8,rbx + mov r11,rdx + + and rbx,rsi + and rdx,rsi + mov r10,rbx + mov r13,rdx + shr r10,7 + lea r9,QWORD PTR[r8*1+r8] + shr r13,7 + lea r12,QWORD PTR[r11*1+r11] + sub rbx,r10 + sub rdx,r13 + and r9,rdi + and r12,rdi + and rbx,rbp + and rdx,rbp + xor rbx,r9 + xor rdx,r12 + mov r9,rbx + mov r12,rdx + + and rbx,rsi + and rdx,rsi + mov r10,rbx + mov r13,rdx + shr r10,7 + xor r8,rax + shr r13,7 + xor r11,rcx + sub rbx,r10 + sub rdx,r13 + lea r10,QWORD PTR[r9*1+r9] + lea r13,QWORD PTR[r12*1+r12] + xor r9,rax + xor r12,rcx + and r10,rdi + and r13,rdi + and rbx,rbp + and rdx,rbp + xor r10,rbx + xor r13,rdx + + xor rax,r10 + xor rcx,r13 + xor r8,r10 + xor r11,r13 + mov rbx,rax + mov rdx,rcx + xor r9,r10 + xor r12,r13 + shr rbx,32 + shr rdx,32 + xor r10,r8 + xor r13,r11 + rol eax,8 + rol ecx,8 + xor r10,r9 + xor r13,r12 + + rol ebx,8 + rol edx,8 + xor eax,r10d + xor ecx,r13d + shr r10,32 + shr r13,32 + xor ebx,r10d + xor edx,r13d + + mov r10,r8 + mov r13,r11 + shr r10,32 + shr r13,32 + rol r8d,24 + rol r11d,24 + rol r10d,24 + rol r13d,24 + xor eax,r8d + xor ecx,r11d + mov r8,r9 + mov r11,r12 + xor ebx,r10d + xor edx,r13d + + + shr r8,32 + shr r11,32 + + rol r9d,16 + rol r12d,16 + + rol r8d,16 + rol r11d,16 + + xor eax,r9d + xor ecx,r12d + + xor ebx,r8d + xor edx,r11d + mov DWORD PTR[r15],eax + mov DWORD PTR[4+r15],ebx + mov DWORD PTR[8+r15],ecx + mov DWORD PTR[12+r15],edx + sub r14d,1 + jnz $L$permute + + xor rax,rax +$L$abort:: + mov r15,QWORD PTR[8+rsp] + mov r14,QWORD PTR[16+rsp] + mov r13,QWORD PTR[24+rsp] + mov r12,QWORD PTR[32+rsp] + mov rbp,QWORD PTR[40+rsp] + mov rbx,QWORD PTR[48+rsp] + add rsp,56 +$L$dec_key_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_AES_set_decrypt_key:: +AES_set_decrypt_key ENDP +PUBLIC AES_cbc_encrypt + +ALIGN 16 +EXTERN OPENSSL_ia32cap_P:NEAR + +PUBLIC asm_AES_cbc_encrypt + +asm_AES_cbc_encrypt:: +AES_cbc_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_AES_cbc_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + cmp rdx,0 + je $L$cbc_epilogue + pushfq + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 +$L$cbc_prologue:: + + cld + mov r9d,r9d + + lea r14,QWORD PTR[$L$AES_Te] + cmp r9,0 + jne $L$cbc_picked_te + lea r14,QWORD PTR[$L$AES_Td] +$L$cbc_picked_te:: + + mov r10d,DWORD PTR[OPENSSL_ia32cap_P] + cmp rdx,512 + jb $L$cbc_slow_prologue + test rdx,15 + jnz $L$cbc_slow_prologue + bt r10d,28 + jc $L$cbc_slow_prologue + + + lea r15,QWORD PTR[((-88-248))+rsp] + and r15,-64 + + + mov r10,r14 + lea r11,QWORD PTR[2304+r14] + mov r12,r15 + and r10,0FFFh + and r11,0FFFh + and r12,0FFFh + + cmp r12,r11 + jb $L$cbc_te_break_out + sub r12,r11 + sub r15,r12 + jmp $L$cbc_te_ok +$L$cbc_te_break_out:: + sub r12,r10 + and r12,0FFFh + add r12,320 + sub r15,r12 +ALIGN 4 +$L$cbc_te_ok:: + + xchg r15,rsp + + mov QWORD PTR[16+rsp],r15 +$L$cbc_fast_body:: + mov QWORD PTR[24+rsp],rdi + mov QWORD PTR[32+rsp],rsi + mov QWORD PTR[40+rsp],rdx + mov QWORD PTR[48+rsp],rcx + mov QWORD PTR[56+rsp],r8 + mov DWORD PTR[((80+240))+rsp],0 + mov rbp,r8 + mov rbx,r9 + mov r9,rsi + mov r8,rdi + mov r15,rcx + + mov eax,DWORD PTR[240+r15] + + mov r10,r15 + sub r10,r14 + and r10,0fffh + cmp r10,2304 + jb $L$cbc_do_ecopy + cmp r10,4096-248 + jb $L$cbc_skip_ecopy +ALIGN 4 +$L$cbc_do_ecopy:: + mov rsi,r15 + lea rdi,QWORD PTR[80+rsp] + lea r15,QWORD PTR[80+rsp] + mov ecx,240/8 + DD 090A548F3h + mov DWORD PTR[rdi],eax +$L$cbc_skip_ecopy:: + mov QWORD PTR[rsp],r15 + + mov ecx,18 +ALIGN 4 +$L$cbc_prefetch_te:: + mov r10,QWORD PTR[r14] + mov r11,QWORD PTR[32+r14] + mov r12,QWORD PTR[64+r14] + mov r13,QWORD PTR[96+r14] + lea r14,QWORD PTR[128+r14] + sub ecx,1 + jnz $L$cbc_prefetch_te + lea r14,QWORD PTR[((-2304))+r14] + + cmp rbx,0 + je $L$FAST_DECRYPT + + + mov eax,DWORD PTR[rbp] + mov ebx,DWORD PTR[4+rbp] + mov ecx,DWORD PTR[8+rbp] + mov edx,DWORD PTR[12+rbp] + +ALIGN 4 +$L$cbc_fast_enc_loop:: + xor eax,DWORD PTR[r8] + xor ebx,DWORD PTR[4+r8] + xor ecx,DWORD PTR[8+r8] + xor edx,DWORD PTR[12+r8] + mov r15,QWORD PTR[rsp] + mov QWORD PTR[24+rsp],r8 + + call _x86_64_AES_encrypt + + mov r8,QWORD PTR[24+rsp] + mov r10,QWORD PTR[40+rsp] + mov DWORD PTR[r9],eax + mov DWORD PTR[4+r9],ebx + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + + lea r8,QWORD PTR[16+r8] + lea r9,QWORD PTR[16+r9] + sub r10,16 + test r10,-16 + mov QWORD PTR[40+rsp],r10 + jnz $L$cbc_fast_enc_loop + mov rbp,QWORD PTR[56+rsp] + mov DWORD PTR[rbp],eax + mov DWORD PTR[4+rbp],ebx + mov DWORD PTR[8+rbp],ecx + mov DWORD PTR[12+rbp],edx + + jmp $L$cbc_fast_cleanup + + +ALIGN 16 +$L$FAST_DECRYPT:: + cmp r9,r8 + je $L$cbc_fast_dec_in_place + + mov QWORD PTR[64+rsp],rbp +ALIGN 4 +$L$cbc_fast_dec_loop:: + mov eax,DWORD PTR[r8] + mov ebx,DWORD PTR[4+r8] + mov ecx,DWORD PTR[8+r8] + mov edx,DWORD PTR[12+r8] + mov r15,QWORD PTR[rsp] + mov QWORD PTR[24+rsp],r8 + + call _x86_64_AES_decrypt + + mov rbp,QWORD PTR[64+rsp] + mov r8,QWORD PTR[24+rsp] + mov r10,QWORD PTR[40+rsp] + xor eax,DWORD PTR[rbp] + xor ebx,DWORD PTR[4+rbp] + xor ecx,DWORD PTR[8+rbp] + xor edx,DWORD PTR[12+rbp] + mov rbp,r8 + + sub r10,16 + mov QWORD PTR[40+rsp],r10 + mov QWORD PTR[64+rsp],rbp + + mov DWORD PTR[r9],eax + mov DWORD PTR[4+r9],ebx + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + + lea r8,QWORD PTR[16+r8] + lea r9,QWORD PTR[16+r9] + jnz $L$cbc_fast_dec_loop + mov r12,QWORD PTR[56+rsp] + mov r10,QWORD PTR[rbp] + mov r11,QWORD PTR[8+rbp] + mov QWORD PTR[r12],r10 + mov QWORD PTR[8+r12],r11 + jmp $L$cbc_fast_cleanup + +ALIGN 16 +$L$cbc_fast_dec_in_place:: + mov r10,QWORD PTR[rbp] + mov r11,QWORD PTR[8+rbp] + mov QWORD PTR[((0+64))+rsp],r10 + mov QWORD PTR[((8+64))+rsp],r11 +ALIGN 4 +$L$cbc_fast_dec_in_place_loop:: + mov eax,DWORD PTR[r8] + mov ebx,DWORD PTR[4+r8] + mov ecx,DWORD PTR[8+r8] + mov edx,DWORD PTR[12+r8] + mov r15,QWORD PTR[rsp] + mov QWORD PTR[24+rsp],r8 + + call _x86_64_AES_decrypt + + mov r8,QWORD PTR[24+rsp] + mov r10,QWORD PTR[40+rsp] + xor eax,DWORD PTR[((0+64))+rsp] + xor ebx,DWORD PTR[((4+64))+rsp] + xor ecx,DWORD PTR[((8+64))+rsp] + xor edx,DWORD PTR[((12+64))+rsp] + + mov r11,QWORD PTR[r8] + mov r12,QWORD PTR[8+r8] + sub r10,16 + jz $L$cbc_fast_dec_in_place_done + + mov QWORD PTR[((0+64))+rsp],r11 + mov QWORD PTR[((8+64))+rsp],r12 + + mov DWORD PTR[r9],eax + mov DWORD PTR[4+r9],ebx + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + + lea r8,QWORD PTR[16+r8] + lea r9,QWORD PTR[16+r9] + mov QWORD PTR[40+rsp],r10 + jmp $L$cbc_fast_dec_in_place_loop +$L$cbc_fast_dec_in_place_done:: + mov rdi,QWORD PTR[56+rsp] + mov QWORD PTR[rdi],r11 + mov QWORD PTR[8+rdi],r12 + + mov DWORD PTR[r9],eax + mov DWORD PTR[4+r9],ebx + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + +ALIGN 4 +$L$cbc_fast_cleanup:: + cmp DWORD PTR[((80+240))+rsp],0 + lea rdi,QWORD PTR[80+rsp] + je $L$cbc_exit + mov ecx,240/8 + xor rax,rax + DD 090AB48F3h + + jmp $L$cbc_exit + + +ALIGN 16 +$L$cbc_slow_prologue:: + + lea rbp,QWORD PTR[((-88))+rsp] + and rbp,-64 + + lea r10,QWORD PTR[((-88-63))+rcx] + sub r10,rbp + neg r10 + and r10,03c0h + sub rbp,r10 + + xchg rbp,rsp + + mov QWORD PTR[16+rsp],rbp +$L$cbc_slow_body:: + + + + + mov QWORD PTR[56+rsp],r8 + mov rbp,r8 + mov rbx,r9 + mov r9,rsi + mov r8,rdi + mov r15,rcx + mov r10,rdx + + mov eax,DWORD PTR[240+r15] + mov QWORD PTR[rsp],r15 + shl eax,4 + lea rax,QWORD PTR[rax*1+r15] + mov QWORD PTR[8+rsp],rax + + + lea r14,QWORD PTR[2048+r14] + lea rax,QWORD PTR[((768-8))+rsp] + sub rax,r14 + and rax,0300h + lea r14,QWORD PTR[rax*1+r14] + + cmp rbx,0 + je $L$SLOW_DECRYPT + + + test r10,-16 + mov eax,DWORD PTR[rbp] + mov ebx,DWORD PTR[4+rbp] + mov ecx,DWORD PTR[8+rbp] + mov edx,DWORD PTR[12+rbp] + jz $L$cbc_slow_enc_tail + +ALIGN 4 +$L$cbc_slow_enc_loop:: + xor eax,DWORD PTR[r8] + xor ebx,DWORD PTR[4+r8] + xor ecx,DWORD PTR[8+r8] + xor edx,DWORD PTR[12+r8] + mov r15,QWORD PTR[rsp] + mov QWORD PTR[24+rsp],r8 + mov QWORD PTR[32+rsp],r9 + mov QWORD PTR[40+rsp],r10 + + call _x86_64_AES_encrypt_compact + + mov r8,QWORD PTR[24+rsp] + mov r9,QWORD PTR[32+rsp] + mov r10,QWORD PTR[40+rsp] + mov DWORD PTR[r9],eax + mov DWORD PTR[4+r9],ebx + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + + lea r8,QWORD PTR[16+r8] + lea r9,QWORD PTR[16+r9] + sub r10,16 + test r10,-16 + jnz $L$cbc_slow_enc_loop + test r10,15 + jnz $L$cbc_slow_enc_tail + mov rbp,QWORD PTR[56+rsp] + mov DWORD PTR[rbp],eax + mov DWORD PTR[4+rbp],ebx + mov DWORD PTR[8+rbp],ecx + mov DWORD PTR[12+rbp],edx + + jmp $L$cbc_exit + +ALIGN 4 +$L$cbc_slow_enc_tail:: + mov r11,rax + mov r12,rcx + mov rcx,r10 + mov rsi,r8 + mov rdi,r9 + DD 09066A4F3h + mov rcx,16 + sub rcx,r10 + xor rax,rax + DD 09066AAF3h + mov r8,r9 + mov r10,16 + mov rax,r11 + mov rcx,r12 + jmp $L$cbc_slow_enc_loop + +ALIGN 16 +$L$SLOW_DECRYPT:: + shr rax,3 + add r14,rax + + mov r11,QWORD PTR[rbp] + mov r12,QWORD PTR[8+rbp] + mov QWORD PTR[((0+64))+rsp],r11 + mov QWORD PTR[((8+64))+rsp],r12 + +ALIGN 4 +$L$cbc_slow_dec_loop:: + mov eax,DWORD PTR[r8] + mov ebx,DWORD PTR[4+r8] + mov ecx,DWORD PTR[8+r8] + mov edx,DWORD PTR[12+r8] + mov r15,QWORD PTR[rsp] + mov QWORD PTR[24+rsp],r8 + mov QWORD PTR[32+rsp],r9 + mov QWORD PTR[40+rsp],r10 + + call _x86_64_AES_decrypt_compact + + mov r8,QWORD PTR[24+rsp] + mov r9,QWORD PTR[32+rsp] + mov r10,QWORD PTR[40+rsp] + xor eax,DWORD PTR[((0+64))+rsp] + xor ebx,DWORD PTR[((4+64))+rsp] + xor ecx,DWORD PTR[((8+64))+rsp] + xor edx,DWORD PTR[((12+64))+rsp] + + mov r11,QWORD PTR[r8] + mov r12,QWORD PTR[8+r8] + sub r10,16 + jc $L$cbc_slow_dec_partial + jz $L$cbc_slow_dec_done + + mov QWORD PTR[((0+64))+rsp],r11 + mov QWORD PTR[((8+64))+rsp],r12 + + mov DWORD PTR[r9],eax + mov DWORD PTR[4+r9],ebx + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + + lea r8,QWORD PTR[16+r8] + lea r9,QWORD PTR[16+r9] + jmp $L$cbc_slow_dec_loop +$L$cbc_slow_dec_done:: + mov rdi,QWORD PTR[56+rsp] + mov QWORD PTR[rdi],r11 + mov QWORD PTR[8+rdi],r12 + + mov DWORD PTR[r9],eax + mov DWORD PTR[4+r9],ebx + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + + jmp $L$cbc_exit + +ALIGN 4 +$L$cbc_slow_dec_partial:: + mov rdi,QWORD PTR[56+rsp] + mov QWORD PTR[rdi],r11 + mov QWORD PTR[8+rdi],r12 + + mov DWORD PTR[((0+64))+rsp],eax + mov DWORD PTR[((4+64))+rsp],ebx + mov DWORD PTR[((8+64))+rsp],ecx + mov DWORD PTR[((12+64))+rsp],edx + + mov rdi,r9 + lea rsi,QWORD PTR[64+rsp] + lea rcx,QWORD PTR[16+r10] + DD 09066A4F3h + jmp $L$cbc_exit + +ALIGN 16 +$L$cbc_exit:: + mov rsi,QWORD PTR[16+rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$cbc_popfq:: + popfq +$L$cbc_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_AES_cbc_encrypt:: +AES_cbc_encrypt ENDP +ALIGN 64 +$L$AES_Te:: + DD 0a56363c6h,0a56363c6h + DD 0847c7cf8h,0847c7cf8h + DD 0997777eeh,0997777eeh + DD 08d7b7bf6h,08d7b7bf6h + DD 00df2f2ffh,00df2f2ffh + DD 0bd6b6bd6h,0bd6b6bd6h + DD 0b16f6fdeh,0b16f6fdeh + DD 054c5c591h,054c5c591h + DD 050303060h,050303060h + DD 003010102h,003010102h + DD 0a96767ceh,0a96767ceh + DD 07d2b2b56h,07d2b2b56h + DD 019fefee7h,019fefee7h + DD 062d7d7b5h,062d7d7b5h + DD 0e6abab4dh,0e6abab4dh + DD 09a7676ech,09a7676ech + DD 045caca8fh,045caca8fh + DD 09d82821fh,09d82821fh + DD 040c9c989h,040c9c989h + DD 0877d7dfah,0877d7dfah + DD 015fafaefh,015fafaefh + DD 0eb5959b2h,0eb5959b2h + DD 0c947478eh,0c947478eh + DD 00bf0f0fbh,00bf0f0fbh + DD 0ecadad41h,0ecadad41h + DD 067d4d4b3h,067d4d4b3h + DD 0fda2a25fh,0fda2a25fh + DD 0eaafaf45h,0eaafaf45h + DD 0bf9c9c23h,0bf9c9c23h + DD 0f7a4a453h,0f7a4a453h + DD 0967272e4h,0967272e4h + DD 05bc0c09bh,05bc0c09bh + DD 0c2b7b775h,0c2b7b775h + DD 01cfdfde1h,01cfdfde1h + DD 0ae93933dh,0ae93933dh + DD 06a26264ch,06a26264ch + DD 05a36366ch,05a36366ch + DD 0413f3f7eh,0413f3f7eh + DD 002f7f7f5h,002f7f7f5h + DD 04fcccc83h,04fcccc83h + DD 05c343468h,05c343468h + DD 0f4a5a551h,0f4a5a551h + DD 034e5e5d1h,034e5e5d1h + DD 008f1f1f9h,008f1f1f9h + DD 0937171e2h,0937171e2h + DD 073d8d8abh,073d8d8abh + DD 053313162h,053313162h + DD 03f15152ah,03f15152ah + DD 00c040408h,00c040408h + DD 052c7c795h,052c7c795h + DD 065232346h,065232346h + DD 05ec3c39dh,05ec3c39dh + DD 028181830h,028181830h + DD 0a1969637h,0a1969637h + DD 00f05050ah,00f05050ah + DD 0b59a9a2fh,0b59a9a2fh + DD 00907070eh,00907070eh + DD 036121224h,036121224h + DD 09b80801bh,09b80801bh + DD 03de2e2dfh,03de2e2dfh + DD 026ebebcdh,026ebebcdh + DD 06927274eh,06927274eh + DD 0cdb2b27fh,0cdb2b27fh + DD 09f7575eah,09f7575eah + DD 01b090912h,01b090912h + DD 09e83831dh,09e83831dh + DD 0742c2c58h,0742c2c58h + DD 02e1a1a34h,02e1a1a34h + DD 02d1b1b36h,02d1b1b36h + DD 0b26e6edch,0b26e6edch + DD 0ee5a5ab4h,0ee5a5ab4h + DD 0fba0a05bh,0fba0a05bh + DD 0f65252a4h,0f65252a4h + DD 04d3b3b76h,04d3b3b76h + DD 061d6d6b7h,061d6d6b7h + DD 0ceb3b37dh,0ceb3b37dh + DD 07b292952h,07b292952h + DD 03ee3e3ddh,03ee3e3ddh + DD 0712f2f5eh,0712f2f5eh + DD 097848413h,097848413h + DD 0f55353a6h,0f55353a6h + DD 068d1d1b9h,068d1d1b9h + DD 000000000h,000000000h + DD 02cededc1h,02cededc1h + DD 060202040h,060202040h + DD 01ffcfce3h,01ffcfce3h + DD 0c8b1b179h,0c8b1b179h + DD 0ed5b5bb6h,0ed5b5bb6h + DD 0be6a6ad4h,0be6a6ad4h + DD 046cbcb8dh,046cbcb8dh + DD 0d9bebe67h,0d9bebe67h + DD 04b393972h,04b393972h + DD 0de4a4a94h,0de4a4a94h + DD 0d44c4c98h,0d44c4c98h + DD 0e85858b0h,0e85858b0h + DD 04acfcf85h,04acfcf85h + DD 06bd0d0bbh,06bd0d0bbh + DD 02aefefc5h,02aefefc5h + DD 0e5aaaa4fh,0e5aaaa4fh + DD 016fbfbedh,016fbfbedh + DD 0c5434386h,0c5434386h + DD 0d74d4d9ah,0d74d4d9ah + DD 055333366h,055333366h + DD 094858511h,094858511h + DD 0cf45458ah,0cf45458ah + DD 010f9f9e9h,010f9f9e9h + DD 006020204h,006020204h + DD 0817f7ffeh,0817f7ffeh + DD 0f05050a0h,0f05050a0h + DD 0443c3c78h,0443c3c78h + DD 0ba9f9f25h,0ba9f9f25h + DD 0e3a8a84bh,0e3a8a84bh + DD 0f35151a2h,0f35151a2h + DD 0fea3a35dh,0fea3a35dh + DD 0c0404080h,0c0404080h + DD 08a8f8f05h,08a8f8f05h + DD 0ad92923fh,0ad92923fh + DD 0bc9d9d21h,0bc9d9d21h + DD 048383870h,048383870h + DD 004f5f5f1h,004f5f5f1h + DD 0dfbcbc63h,0dfbcbc63h + DD 0c1b6b677h,0c1b6b677h + DD 075dadaafh,075dadaafh + DD 063212142h,063212142h + DD 030101020h,030101020h + DD 01affffe5h,01affffe5h + DD 00ef3f3fdh,00ef3f3fdh + DD 06dd2d2bfh,06dd2d2bfh + DD 04ccdcd81h,04ccdcd81h + DD 0140c0c18h,0140c0c18h + DD 035131326h,035131326h + DD 02fececc3h,02fececc3h + DD 0e15f5fbeh,0e15f5fbeh + DD 0a2979735h,0a2979735h + DD 0cc444488h,0cc444488h + DD 03917172eh,03917172eh + DD 057c4c493h,057c4c493h + DD 0f2a7a755h,0f2a7a755h + DD 0827e7efch,0827e7efch + DD 0473d3d7ah,0473d3d7ah + DD 0ac6464c8h,0ac6464c8h + DD 0e75d5dbah,0e75d5dbah + DD 02b191932h,02b191932h + DD 0957373e6h,0957373e6h + DD 0a06060c0h,0a06060c0h + DD 098818119h,098818119h + DD 0d14f4f9eh,0d14f4f9eh + DD 07fdcdca3h,07fdcdca3h + DD 066222244h,066222244h + DD 07e2a2a54h,07e2a2a54h + DD 0ab90903bh,0ab90903bh + DD 08388880bh,08388880bh + DD 0ca46468ch,0ca46468ch + DD 029eeeec7h,029eeeec7h + DD 0d3b8b86bh,0d3b8b86bh + DD 03c141428h,03c141428h + DD 079dedea7h,079dedea7h + DD 0e25e5ebch,0e25e5ebch + DD 01d0b0b16h,01d0b0b16h + DD 076dbdbadh,076dbdbadh + DD 03be0e0dbh,03be0e0dbh + DD 056323264h,056323264h + DD 04e3a3a74h,04e3a3a74h + DD 01e0a0a14h,01e0a0a14h + DD 0db494992h,0db494992h + DD 00a06060ch,00a06060ch + DD 06c242448h,06c242448h + DD 0e45c5cb8h,0e45c5cb8h + DD 05dc2c29fh,05dc2c29fh + DD 06ed3d3bdh,06ed3d3bdh + DD 0efacac43h,0efacac43h + DD 0a66262c4h,0a66262c4h + DD 0a8919139h,0a8919139h + DD 0a4959531h,0a4959531h + DD 037e4e4d3h,037e4e4d3h + DD 08b7979f2h,08b7979f2h + DD 032e7e7d5h,032e7e7d5h + DD 043c8c88bh,043c8c88bh + DD 05937376eh,05937376eh + DD 0b76d6ddah,0b76d6ddah + DD 08c8d8d01h,08c8d8d01h + DD 064d5d5b1h,064d5d5b1h + DD 0d24e4e9ch,0d24e4e9ch + DD 0e0a9a949h,0e0a9a949h + DD 0b46c6cd8h,0b46c6cd8h + DD 0fa5656ach,0fa5656ach + DD 007f4f4f3h,007f4f4f3h + DD 025eaeacfh,025eaeacfh + DD 0af6565cah,0af6565cah + DD 08e7a7af4h,08e7a7af4h + DD 0e9aeae47h,0e9aeae47h + DD 018080810h,018080810h + DD 0d5baba6fh,0d5baba6fh + DD 0887878f0h,0887878f0h + DD 06f25254ah,06f25254ah + DD 0722e2e5ch,0722e2e5ch + DD 0241c1c38h,0241c1c38h + DD 0f1a6a657h,0f1a6a657h + DD 0c7b4b473h,0c7b4b473h + DD 051c6c697h,051c6c697h + DD 023e8e8cbh,023e8e8cbh + DD 07cdddda1h,07cdddda1h + DD 09c7474e8h,09c7474e8h + DD 0211f1f3eh,0211f1f3eh + DD 0dd4b4b96h,0dd4b4b96h + DD 0dcbdbd61h,0dcbdbd61h + DD 0868b8b0dh,0868b8b0dh + DD 0858a8a0fh,0858a8a0fh + DD 0907070e0h,0907070e0h + DD 0423e3e7ch,0423e3e7ch + DD 0c4b5b571h,0c4b5b571h + DD 0aa6666cch,0aa6666cch + DD 0d8484890h,0d8484890h + DD 005030306h,005030306h + DD 001f6f6f7h,001f6f6f7h + DD 0120e0e1ch,0120e0e1ch + DD 0a36161c2h,0a36161c2h + DD 05f35356ah,05f35356ah + DD 0f95757aeh,0f95757aeh + DD 0d0b9b969h,0d0b9b969h + DD 091868617h,091868617h + DD 058c1c199h,058c1c199h + DD 0271d1d3ah,0271d1d3ah + DD 0b99e9e27h,0b99e9e27h + DD 038e1e1d9h,038e1e1d9h + DD 013f8f8ebh,013f8f8ebh + DD 0b398982bh,0b398982bh + DD 033111122h,033111122h + DD 0bb6969d2h,0bb6969d2h + DD 070d9d9a9h,070d9d9a9h + DD 0898e8e07h,0898e8e07h + DD 0a7949433h,0a7949433h + DD 0b69b9b2dh,0b69b9b2dh + DD 0221e1e3ch,0221e1e3ch + DD 092878715h,092878715h + DD 020e9e9c9h,020e9e9c9h + DD 049cece87h,049cece87h + DD 0ff5555aah,0ff5555aah + DD 078282850h,078282850h + DD 07adfdfa5h,07adfdfa5h + DD 08f8c8c03h,08f8c8c03h + DD 0f8a1a159h,0f8a1a159h + DD 080898909h,080898909h + DD 0170d0d1ah,0170d0d1ah + DD 0dabfbf65h,0dabfbf65h + DD 031e6e6d7h,031e6e6d7h + DD 0c6424284h,0c6424284h + DD 0b86868d0h,0b86868d0h + DD 0c3414182h,0c3414182h + DD 0b0999929h,0b0999929h + DD 0772d2d5ah,0772d2d5ah + DD 0110f0f1eh,0110f0f1eh + DD 0cbb0b07bh,0cbb0b07bh + DD 0fc5454a8h,0fc5454a8h + DD 0d6bbbb6dh,0d6bbbb6dh + DD 03a16162ch,03a16162ch +DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h +DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h +DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h +DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h +DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch +DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h +DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah +DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h +DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h +DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h +DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh +DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh +DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h +DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h +DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h +DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h +DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h +DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h +DB 060h,081h,04fh,0dch,022h,02ah,090h,088h +DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh +DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch +DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h +DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h +DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h +DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h +DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah +DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh +DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh +DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h +DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh +DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h +DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h +DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h +DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h +DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h +DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h +DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch +DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h +DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah +DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h +DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h +DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h +DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh +DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh +DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h +DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h +DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h +DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h +DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h +DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h +DB 060h,081h,04fh,0dch,022h,02ah,090h,088h +DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh +DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch +DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h +DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h +DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h +DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h +DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah +DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh +DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh +DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h +DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh +DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h +DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h +DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h +DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h +DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h +DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h +DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch +DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h +DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah +DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h +DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h +DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h +DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh +DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh +DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h +DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h +DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h +DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h +DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h +DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h +DB 060h,081h,04fh,0dch,022h,02ah,090h,088h +DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh +DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch +DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h +DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h +DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h +DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h +DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah +DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh +DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh +DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h +DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh +DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h +DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h +DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h +DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h +DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h +DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h +DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch +DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h +DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah +DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h +DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h +DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h +DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh +DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh +DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h +DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h +DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h +DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h +DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h +DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h +DB 060h,081h,04fh,0dch,022h,02ah,090h,088h +DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh +DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch +DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h +DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h +DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h +DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h +DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah +DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh +DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh +DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h +DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh +DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h +DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h + DD 000000001h,000000002h,000000004h,000000008h + DD 000000010h,000000020h,000000040h,000000080h + DD 00000001bh,000000036h,080808080h,080808080h + DD 0fefefefeh,0fefefefeh,01b1b1b1bh,01b1b1b1bh +ALIGN 64 +$L$AES_Td:: + DD 050a7f451h,050a7f451h + DD 05365417eh,05365417eh + DD 0c3a4171ah,0c3a4171ah + DD 0965e273ah,0965e273ah + DD 0cb6bab3bh,0cb6bab3bh + DD 0f1459d1fh,0f1459d1fh + DD 0ab58faach,0ab58faach + DD 09303e34bh,09303e34bh + DD 055fa3020h,055fa3020h + DD 0f66d76adh,0f66d76adh + DD 09176cc88h,09176cc88h + DD 0254c02f5h,0254c02f5h + DD 0fcd7e54fh,0fcd7e54fh + DD 0d7cb2ac5h,0d7cb2ac5h + DD 080443526h,080443526h + DD 08fa362b5h,08fa362b5h + DD 0495ab1deh,0495ab1deh + DD 0671bba25h,0671bba25h + DD 0980eea45h,0980eea45h + DD 0e1c0fe5dh,0e1c0fe5dh + DD 002752fc3h,002752fc3h + DD 012f04c81h,012f04c81h + DD 0a397468dh,0a397468dh + DD 0c6f9d36bh,0c6f9d36bh + DD 0e75f8f03h,0e75f8f03h + DD 0959c9215h,0959c9215h + DD 0eb7a6dbfh,0eb7a6dbfh + DD 0da595295h,0da595295h + DD 02d83bed4h,02d83bed4h + DD 0d3217458h,0d3217458h + DD 02969e049h,02969e049h + DD 044c8c98eh,044c8c98eh + DD 06a89c275h,06a89c275h + DD 078798ef4h,078798ef4h + DD 06b3e5899h,06b3e5899h + DD 0dd71b927h,0dd71b927h + DD 0b64fe1beh,0b64fe1beh + DD 017ad88f0h,017ad88f0h + DD 066ac20c9h,066ac20c9h + DD 0b43ace7dh,0b43ace7dh + DD 0184adf63h,0184adf63h + DD 082311ae5h,082311ae5h + DD 060335197h,060335197h + DD 0457f5362h,0457f5362h + DD 0e07764b1h,0e07764b1h + DD 084ae6bbbh,084ae6bbbh + DD 01ca081feh,01ca081feh + DD 0942b08f9h,0942b08f9h + DD 058684870h,058684870h + DD 019fd458fh,019fd458fh + DD 0876cde94h,0876cde94h + DD 0b7f87b52h,0b7f87b52h + DD 023d373abh,023d373abh + DD 0e2024b72h,0e2024b72h + DD 0578f1fe3h,0578f1fe3h + DD 02aab5566h,02aab5566h + DD 00728ebb2h,00728ebb2h + DD 003c2b52fh,003c2b52fh + DD 09a7bc586h,09a7bc586h + DD 0a50837d3h,0a50837d3h + DD 0f2872830h,0f2872830h + DD 0b2a5bf23h,0b2a5bf23h + DD 0ba6a0302h,0ba6a0302h + DD 05c8216edh,05c8216edh + DD 02b1ccf8ah,02b1ccf8ah + DD 092b479a7h,092b479a7h + DD 0f0f207f3h,0f0f207f3h + DD 0a1e2694eh,0a1e2694eh + DD 0cdf4da65h,0cdf4da65h + DD 0d5be0506h,0d5be0506h + DD 01f6234d1h,01f6234d1h + DD 08afea6c4h,08afea6c4h + DD 09d532e34h,09d532e34h + DD 0a055f3a2h,0a055f3a2h + DD 032e18a05h,032e18a05h + DD 075ebf6a4h,075ebf6a4h + DD 039ec830bh,039ec830bh + DD 0aaef6040h,0aaef6040h + DD 0069f715eh,0069f715eh + DD 051106ebdh,051106ebdh + DD 0f98a213eh,0f98a213eh + DD 03d06dd96h,03d06dd96h + DD 0ae053eddh,0ae053eddh + DD 046bde64dh,046bde64dh + DD 0b58d5491h,0b58d5491h + DD 0055dc471h,0055dc471h + DD 06fd40604h,06fd40604h + DD 0ff155060h,0ff155060h + DD 024fb9819h,024fb9819h + DD 097e9bdd6h,097e9bdd6h + DD 0cc434089h,0cc434089h + DD 0779ed967h,0779ed967h + DD 0bd42e8b0h,0bd42e8b0h + DD 0888b8907h,0888b8907h + DD 0385b19e7h,0385b19e7h + DD 0dbeec879h,0dbeec879h + DD 0470a7ca1h,0470a7ca1h + DD 0e90f427ch,0e90f427ch + DD 0c91e84f8h,0c91e84f8h + DD 000000000h,000000000h + DD 083868009h,083868009h + DD 048ed2b32h,048ed2b32h + DD 0ac70111eh,0ac70111eh + DD 04e725a6ch,04e725a6ch + DD 0fbff0efdh,0fbff0efdh + DD 05638850fh,05638850fh + DD 01ed5ae3dh,01ed5ae3dh + DD 027392d36h,027392d36h + DD 064d90f0ah,064d90f0ah + DD 021a65c68h,021a65c68h + DD 0d1545b9bh,0d1545b9bh + DD 03a2e3624h,03a2e3624h + DD 0b1670a0ch,0b1670a0ch + DD 00fe75793h,00fe75793h + DD 0d296eeb4h,0d296eeb4h + DD 09e919b1bh,09e919b1bh + DD 04fc5c080h,04fc5c080h + DD 0a220dc61h,0a220dc61h + DD 0694b775ah,0694b775ah + DD 0161a121ch,0161a121ch + DD 00aba93e2h,00aba93e2h + DD 0e52aa0c0h,0e52aa0c0h + DD 043e0223ch,043e0223ch + DD 01d171b12h,01d171b12h + DD 00b0d090eh,00b0d090eh + DD 0adc78bf2h,0adc78bf2h + DD 0b9a8b62dh,0b9a8b62dh + DD 0c8a91e14h,0c8a91e14h + DD 08519f157h,08519f157h + DD 04c0775afh,04c0775afh + DD 0bbdd99eeh,0bbdd99eeh + DD 0fd607fa3h,0fd607fa3h + DD 09f2601f7h,09f2601f7h + DD 0bcf5725ch,0bcf5725ch + DD 0c53b6644h,0c53b6644h + DD 0347efb5bh,0347efb5bh + DD 07629438bh,07629438bh + DD 0dcc623cbh,0dcc623cbh + DD 068fcedb6h,068fcedb6h + DD 063f1e4b8h,063f1e4b8h + DD 0cadc31d7h,0cadc31d7h + DD 010856342h,010856342h + DD 040229713h,040229713h + DD 02011c684h,02011c684h + DD 07d244a85h,07d244a85h + DD 0f83dbbd2h,0f83dbbd2h + DD 01132f9aeh,01132f9aeh + DD 06da129c7h,06da129c7h + DD 04b2f9e1dh,04b2f9e1dh + DD 0f330b2dch,0f330b2dch + DD 0ec52860dh,0ec52860dh + DD 0d0e3c177h,0d0e3c177h + DD 06c16b32bh,06c16b32bh + DD 099b970a9h,099b970a9h + DD 0fa489411h,0fa489411h + DD 02264e947h,02264e947h + DD 0c48cfca8h,0c48cfca8h + DD 01a3ff0a0h,01a3ff0a0h + DD 0d82c7d56h,0d82c7d56h + DD 0ef903322h,0ef903322h + DD 0c74e4987h,0c74e4987h + DD 0c1d138d9h,0c1d138d9h + DD 0fea2ca8ch,0fea2ca8ch + DD 0360bd498h,0360bd498h + DD 0cf81f5a6h,0cf81f5a6h + DD 028de7aa5h,028de7aa5h + DD 0268eb7dah,0268eb7dah + DD 0a4bfad3fh,0a4bfad3fh + DD 0e49d3a2ch,0e49d3a2ch + DD 00d927850h,00d927850h + DD 09bcc5f6ah,09bcc5f6ah + DD 062467e54h,062467e54h + DD 0c2138df6h,0c2138df6h + DD 0e8b8d890h,0e8b8d890h + DD 05ef7392eh,05ef7392eh + DD 0f5afc382h,0f5afc382h + DD 0be805d9fh,0be805d9fh + DD 07c93d069h,07c93d069h + DD 0a92dd56fh,0a92dd56fh + DD 0b31225cfh,0b31225cfh + DD 03b99acc8h,03b99acc8h + DD 0a77d1810h,0a77d1810h + DD 06e639ce8h,06e639ce8h + DD 07bbb3bdbh,07bbb3bdbh + DD 0097826cdh,0097826cdh + DD 0f418596eh,0f418596eh + DD 001b79aech,001b79aech + DD 0a89a4f83h,0a89a4f83h + DD 0656e95e6h,0656e95e6h + DD 07ee6ffaah,07ee6ffaah + DD 008cfbc21h,008cfbc21h + DD 0e6e815efh,0e6e815efh + DD 0d99be7bah,0d99be7bah + DD 0ce366f4ah,0ce366f4ah + DD 0d4099feah,0d4099feah + DD 0d67cb029h,0d67cb029h + DD 0afb2a431h,0afb2a431h + DD 031233f2ah,031233f2ah + DD 03094a5c6h,03094a5c6h + DD 0c066a235h,0c066a235h + DD 037bc4e74h,037bc4e74h + DD 0a6ca82fch,0a6ca82fch + DD 0b0d090e0h,0b0d090e0h + DD 015d8a733h,015d8a733h + DD 04a9804f1h,04a9804f1h + DD 0f7daec41h,0f7daec41h + DD 00e50cd7fh,00e50cd7fh + DD 02ff69117h,02ff69117h + DD 08dd64d76h,08dd64d76h + DD 04db0ef43h,04db0ef43h + DD 0544daacch,0544daacch + DD 0df0496e4h,0df0496e4h + DD 0e3b5d19eh,0e3b5d19eh + DD 01b886a4ch,01b886a4ch + DD 0b81f2cc1h,0b81f2cc1h + DD 07f516546h,07f516546h + DD 004ea5e9dh,004ea5e9dh + DD 05d358c01h,05d358c01h + DD 0737487fah,0737487fah + DD 02e410bfbh,02e410bfbh + DD 05a1d67b3h,05a1d67b3h + DD 052d2db92h,052d2db92h + DD 0335610e9h,0335610e9h + DD 01347d66dh,01347d66dh + DD 08c61d79ah,08c61d79ah + DD 07a0ca137h,07a0ca137h + DD 08e14f859h,08e14f859h + DD 0893c13ebh,0893c13ebh + DD 0ee27a9ceh,0ee27a9ceh + DD 035c961b7h,035c961b7h + DD 0ede51ce1h,0ede51ce1h + DD 03cb1477ah,03cb1477ah + DD 059dfd29ch,059dfd29ch + DD 03f73f255h,03f73f255h + DD 079ce1418h,079ce1418h + DD 0bf37c773h,0bf37c773h + DD 0eacdf753h,0eacdf753h + DD 05baafd5fh,05baafd5fh + DD 0146f3ddfh,0146f3ddfh + DD 086db4478h,086db4478h + DD 081f3afcah,081f3afcah + DD 03ec468b9h,03ec468b9h + DD 02c342438h,02c342438h + DD 05f40a3c2h,05f40a3c2h + DD 072c31d16h,072c31d16h + DD 00c25e2bch,00c25e2bch + DD 08b493c28h,08b493c28h + DD 041950dffh,041950dffh + DD 07101a839h,07101a839h + DD 0deb30c08h,0deb30c08h + DD 09ce4b4d8h,09ce4b4d8h + DD 090c15664h,090c15664h + DD 06184cb7bh,06184cb7bh + DD 070b632d5h,070b632d5h + DD 0745c6c48h,0745c6c48h + DD 04257b8d0h,04257b8d0h +DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h +DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh +DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h +DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh +DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh +DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh +DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h +DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h +DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h +DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h +DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah +DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h +DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah +DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h +DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h +DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh +DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah +DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h +DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h +DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh +DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h +DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh +DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h +DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h +DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h +DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh +DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh +DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh +DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h +DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h +DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h +DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh + DD 080808080h,080808080h,0fefefefeh,0fefefefeh + DD 01b1b1b1bh,01b1b1b1bh,0,0 +DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h +DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh +DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h +DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh +DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh +DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh +DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h +DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h +DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h +DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h +DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah +DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h +DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah +DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h +DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h +DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh +DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah +DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h +DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h +DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh +DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h +DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh +DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h +DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h +DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h +DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh +DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh +DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh +DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h +DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h +DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h +DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh + DD 080808080h,080808080h,0fefefefeh,0fefefefeh + DD 01b1b1b1bh,01b1b1b1bh,0,0 +DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h +DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh +DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h +DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh +DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh +DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh +DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h +DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h +DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h +DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h +DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah +DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h +DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah +DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h +DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h +DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh +DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah +DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h +DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h +DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh +DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h +DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh +DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h +DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h +DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h +DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh +DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh +DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh +DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h +DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h +DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h +DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh + DD 080808080h,080808080h,0fefefefeh,0fefefefeh + DD 01b1b1b1bh,01b1b1b1bh,0,0 +DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h +DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh +DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h +DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh +DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh +DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh +DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h +DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h +DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h +DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h +DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah +DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h +DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah +DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h +DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h +DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh +DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah +DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h +DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h +DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh +DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h +DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh +DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h +DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h +DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h +DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh +DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh +DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh +DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h +DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h +DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h +DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh + DD 080808080h,080808080h,0fefefefeh,0fefefefeh + DD 01b1b1b1bh,01b1b1b1bh,0,0 +DB 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32 +DB 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +DB 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +DB 62,0 +ALIGN 64 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +block_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_block_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_block_prologue + + mov rax,QWORD PTR[24+rax] + lea rax,QWORD PTR[48+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$in_block_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + jmp $L$common_seh_exit +block_se_handler ENDP + + +ALIGN 16 +key_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_key_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_key_prologue + + lea rax,QWORD PTR[56+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$in_key_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + jmp $L$common_seh_exit +key_se_handler ENDP + + +ALIGN 16 +cbc_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$cbc_prologue] + cmp rbx,r10 + jb $L$in_cbc_prologue + + lea r10,QWORD PTR[$L$cbc_fast_body] + cmp rbx,r10 + jb $L$in_cbc_frame_setup + + lea r10,QWORD PTR[$L$cbc_slow_prologue] + cmp rbx,r10 + jb $L$in_cbc_body + + lea r10,QWORD PTR[$L$cbc_slow_body] + cmp rbx,r10 + jb $L$in_cbc_frame_setup + +$L$in_cbc_body:: + mov rax,QWORD PTR[152+r8] + + lea r10,QWORD PTR[$L$cbc_epilogue] + cmp rbx,r10 + jae $L$in_cbc_prologue + + lea rax,QWORD PTR[8+rax] + + lea r10,QWORD PTR[$L$cbc_popfq] + cmp rbx,r10 + jae $L$in_cbc_prologue + + mov rax,QWORD PTR[8+rax] + lea rax,QWORD PTR[56+rax] + +$L$in_cbc_frame_setup:: + mov rbx,QWORD PTR[((-16))+rax] + mov rbp,QWORD PTR[((-24))+rax] + mov r12,QWORD PTR[((-32))+rax] + mov r13,QWORD PTR[((-40))+rax] + mov r14,QWORD PTR[((-48))+rax] + mov r15,QWORD PTR[((-56))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$in_cbc_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + +$L$common_seh_exit:: + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +cbc_se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_AES_encrypt + DD imagerel $L$SEH_end_AES_encrypt + DD imagerel $L$SEH_info_AES_encrypt + + DD imagerel $L$SEH_begin_AES_decrypt + DD imagerel $L$SEH_end_AES_decrypt + DD imagerel $L$SEH_info_AES_decrypt + + DD imagerel $L$SEH_begin_AES_set_encrypt_key + DD imagerel $L$SEH_end_AES_set_encrypt_key + DD imagerel $L$SEH_info_AES_set_encrypt_key + + DD imagerel $L$SEH_begin_AES_set_decrypt_key + DD imagerel $L$SEH_end_AES_set_decrypt_key + DD imagerel $L$SEH_info_AES_set_decrypt_key + + DD imagerel $L$SEH_begin_AES_cbc_encrypt + DD imagerel $L$SEH_end_AES_cbc_encrypt + DD imagerel $L$SEH_info_AES_cbc_encrypt + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_AES_encrypt:: +DB 9,0,0,0 + DD imagerel block_se_handler + DD imagerel $L$enc_prologue,imagerel $L$enc_epilogue +$L$SEH_info_AES_decrypt:: +DB 9,0,0,0 + DD imagerel block_se_handler + DD imagerel $L$dec_prologue,imagerel $L$dec_epilogue +$L$SEH_info_AES_set_encrypt_key:: +DB 9,0,0,0 + DD imagerel key_se_handler + DD imagerel $L$enc_key_prologue,imagerel $L$enc_key_epilogue +$L$SEH_info_AES_set_decrypt_key:: +DB 9,0,0,0 + DD imagerel key_se_handler + DD imagerel $L$dec_key_prologue,imagerel $L$dec_key_epilogue +$L$SEH_info_AES_cbc_encrypt:: +DB 9,0,0,0 + DD imagerel cbc_se_handler + +.xdata ENDS +END + diff --git a/ext/libressl/crypto/aes/aes-mingw64-x86_64.S b/ext/libressl/crypto/aes/aes-mingw64-x86_64.S new file mode 100644 index 0000000..ca2d60f --- /dev/null +++ b/ext/libressl/crypto/aes/aes-mingw64-x86_64.S @@ -0,0 +1,2861 @@ +#include "x86_arch.h" + +.text +.def _x86_64_AES_encrypt; .scl 3; .type 32; .endef +.p2align 4 +_x86_64_AES_encrypt: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + + movl 240(%r15),%r13d + subl $1,%r13d + jmp .Lenc_loop +.p2align 4 +.Lenc_loop: + + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movl 0(%r14,%rsi,8),%r10d + movl 0(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r12d + + movzbl %bh,%esi + movzbl %ch,%edi + movzbl %dl,%ebp + xorl 3(%r14,%rsi,8),%r10d + xorl 3(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r8d + + movzbl %dh,%esi + shrl $16,%ecx + movzbl %ah,%ebp + xorl 3(%r14,%rsi,8),%r12d + shrl $16,%edx + xorl 3(%r14,%rbp,8),%r8d + + shrl $16,%ebx + leaq 16(%r15),%r15 + shrl $16,%eax + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + xorl 2(%r14,%rsi,8),%r10d + xorl 2(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r12d + + movzbl %dh,%esi + movzbl %ah,%edi + movzbl %bl,%ebp + xorl 1(%r14,%rsi,8),%r10d + xorl 1(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r8d + + movl 12(%r15),%edx + movzbl %bh,%edi + movzbl %ch,%ebp + movl 0(%r15),%eax + xorl 1(%r14,%rdi,8),%r12d + xorl 1(%r14,%rbp,8),%r8d + + movl 4(%r15),%ebx + movl 8(%r15),%ecx + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + subl $1,%r13d + jnz .Lenc_loop + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movzbl 2(%r14,%rsi,8),%r10d + movzbl 2(%r14,%rdi,8),%r11d + movzbl 2(%r14,%rbp,8),%r12d + + movzbl %dl,%esi + movzbl %bh,%edi + movzbl %ch,%ebp + movzbl 2(%r14,%rsi,8),%r8d + movl 0(%r14,%rdi,8),%edi + movl 0(%r14,%rbp,8),%ebp + + andl $65280,%edi + andl $65280,%ebp + + xorl %edi,%r10d + xorl %ebp,%r11d + shrl $16,%ecx + + movzbl %dh,%esi + movzbl %ah,%edi + shrl $16,%edx + movl 0(%r14,%rsi,8),%esi + movl 0(%r14,%rdi,8),%edi + + andl $65280,%esi + andl $65280,%edi + shrl $16,%ebx + xorl %esi,%r12d + xorl %edi,%r8d + shrl $16,%eax + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + movl 0(%r14,%rsi,8),%esi + movl 0(%r14,%rdi,8),%edi + movl 0(%r14,%rbp,8),%ebp + + andl $16711680,%esi + andl $16711680,%edi + andl $16711680,%ebp + + xorl %esi,%r10d + xorl %edi,%r11d + xorl %ebp,%r12d + + movzbl %bl,%esi + movzbl %dh,%edi + movzbl %ah,%ebp + movl 0(%r14,%rsi,8),%esi + movl 2(%r14,%rdi,8),%edi + movl 2(%r14,%rbp,8),%ebp + + andl $16711680,%esi + andl $4278190080,%edi + andl $4278190080,%ebp + + xorl %esi,%r8d + xorl %edi,%r10d + xorl %ebp,%r11d + + movzbl %bh,%esi + movzbl %ch,%edi + movl 16+12(%r15),%edx + movl 2(%r14,%rsi,8),%esi + movl 2(%r14,%rdi,8),%edi + movl 16+0(%r15),%eax + + andl $4278190080,%esi + andl $4278190080,%edi + + xorl %esi,%r12d + xorl %edi,%r8d + + movl 16+4(%r15),%ebx + movl 16+8(%r15),%ecx + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + retq + +.def _x86_64_AES_encrypt_compact; .scl 3; .type 32; .endef +.p2align 4 +_x86_64_AES_encrypt_compact: + leaq 128(%r14),%r8 + movl 0-128(%r8),%edi + movl 32-128(%r8),%ebp + movl 64-128(%r8),%r10d + movl 96-128(%r8),%r11d + movl 128-128(%r8),%edi + movl 160-128(%r8),%ebp + movl 192-128(%r8),%r10d + movl 224-128(%r8),%r11d + jmp .Lenc_loop_compact +.p2align 4 +.Lenc_loop_compact: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + leaq 16(%r15),%r15 + movzbl %al,%r10d + movzbl %bl,%r11d + movzbl %cl,%r12d + movzbl (%r14,%r10,1),%r10d + movzbl (%r14,%r11,1),%r11d + movzbl (%r14,%r12,1),%r12d + + movzbl %dl,%r8d + movzbl %bh,%esi + movzbl %ch,%edi + movzbl (%r14,%r8,1),%r8d + movzbl (%r14,%rsi,1),%r9d + movzbl (%r14,%rdi,1),%r13d + + movzbl %dh,%ebp + movzbl %ah,%esi + shrl $16,%ecx + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + shrl $16,%edx + + movzbl %cl,%edi + shll $8,%r9d + shll $8,%r13d + movzbl (%r14,%rdi,1),%edi + xorl %r9d,%r10d + xorl %r13d,%r11d + + movzbl %dl,%r9d + shrl $16,%eax + shrl $16,%ebx + movzbl %al,%r13d + shll $8,%ebp + shll $8,%esi + movzbl (%r14,%r9,1),%r9d + movzbl (%r14,%r13,1),%r13d + xorl %ebp,%r12d + xorl %esi,%r8d + + movzbl %bl,%ebp + movzbl %dh,%esi + shll $16,%edi + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + xorl %edi,%r10d + + movzbl %ah,%edi + shrl $8,%ecx + shrl $8,%ebx + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rcx,1),%edx + movzbl (%r14,%rbx,1),%ecx + shll $16,%r9d + shll $16,%r13d + shll $16,%ebp + xorl %r9d,%r11d + xorl %r13d,%r12d + xorl %ebp,%r8d + + shll $24,%esi + shll $24,%edi + shll $24,%edx + xorl %esi,%r10d + shll $24,%ecx + xorl %edi,%r11d + movl %r10d,%eax + movl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + cmpq 16(%rsp),%r15 + je .Lenc_compact_done + movl %eax,%esi + movl %ebx,%edi + andl $2155905152,%esi + andl $2155905152,%edi + movl %esi,%r10d + movl %edi,%r11d + shrl $7,%r10d + leal (%rax,%rax,1),%r8d + shrl $7,%r11d + leal (%rbx,%rbx,1),%r9d + subl %r10d,%esi + subl %r11d,%edi + andl $4278124286,%r8d + andl $4278124286,%r9d + andl $454761243,%esi + andl $454761243,%edi + movl %eax,%r10d + movl %ebx,%r11d + xorl %esi,%r8d + xorl %edi,%r9d + + xorl %r8d,%eax + xorl %r9d,%ebx + movl %ecx,%esi + movl %edx,%edi + roll $24,%eax + roll $24,%ebx + andl $2155905152,%esi + andl $2155905152,%edi + xorl %r8d,%eax + xorl %r9d,%ebx + movl %esi,%r12d + movl %edi,%ebp + rorl $16,%r10d + rorl $16,%r11d + shrl $7,%r12d + leal (%rcx,%rcx,1),%r8d + xorl %r10d,%eax + xorl %r11d,%ebx + shrl $7,%ebp + leal (%rdx,%rdx,1),%r9d + rorl $8,%r10d + rorl $8,%r11d + subl %r12d,%esi + subl %ebp,%edi + xorl %r10d,%eax + xorl %r11d,%ebx + + andl $4278124286,%r8d + andl $4278124286,%r9d + andl $454761243,%esi + andl $454761243,%edi + movl %ecx,%r12d + movl %edx,%ebp + xorl %esi,%r8d + xorl %edi,%r9d + + xorl %r8d,%ecx + xorl %r9d,%edx + roll $24,%ecx + roll $24,%edx + xorl %r8d,%ecx + xorl %r9d,%edx + movl 0(%r14),%esi + rorl $16,%r12d + rorl $16,%ebp + movl 64(%r14),%edi + xorl %r12d,%ecx + xorl %ebp,%edx + movl 128(%r14),%r8d + rorl $8,%r12d + rorl $8,%ebp + movl 192(%r14),%r9d + xorl %r12d,%ecx + xorl %ebp,%edx + jmp .Lenc_loop_compact +.p2align 4 +.Lenc_compact_done: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + retq + +.globl AES_encrypt +.def AES_encrypt; .scl 2; .type 32; .endef +.p2align 4 +.globl asm_AES_encrypt + +asm_AES_encrypt: +AES_encrypt: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_AES_encrypt: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + movq %rsp,%r10 + leaq -63(%rdx),%rcx + andq $-64,%rsp + subq %rsp,%rcx + negq %rcx + andq $960,%rcx + subq %rcx,%rsp + subq $32,%rsp + + movq %rsi,16(%rsp) + movq %r10,24(%rsp) +.Lenc_prologue: + + movq %rdx,%r15 + movl 240(%r15),%r13d + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + + shll $4,%r13d + leaq (%r15,%r13,1),%rbp + movq %r15,(%rsp) + movq %rbp,8(%rsp) + + + leaq .LAES_Te+2048(%rip),%r14 + leaq 768(%rsp),%rbp + subq %r14,%rbp + andq $768,%rbp + leaq (%r14,%rbp,1),%r14 + + call _x86_64_AES_encrypt_compact + + movq 16(%rsp),%r9 + movq 24(%rsp),%rsi + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lenc_epilogue: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + retq +.LSEH_end_AES_encrypt: +.def _x86_64_AES_decrypt; .scl 3; .type 32; .endef +.p2align 4 +_x86_64_AES_decrypt: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + + movl 240(%r15),%r13d + subl $1,%r13d + jmp .Ldec_loop +.p2align 4 +.Ldec_loop: + + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movl 0(%r14,%rsi,8),%r10d + movl 0(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r12d + + movzbl %dh,%esi + movzbl %ah,%edi + movzbl %dl,%ebp + xorl 3(%r14,%rsi,8),%r10d + xorl 3(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r8d + + movzbl %bh,%esi + shrl $16,%eax + movzbl %ch,%ebp + xorl 3(%r14,%rsi,8),%r12d + shrl $16,%edx + xorl 3(%r14,%rbp,8),%r8d + + shrl $16,%ebx + leaq 16(%r15),%r15 + shrl $16,%ecx + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + xorl 2(%r14,%rsi,8),%r10d + xorl 2(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r12d + + movzbl %bh,%esi + movzbl %ch,%edi + movzbl %bl,%ebp + xorl 1(%r14,%rsi,8),%r10d + xorl 1(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r8d + + movzbl %dh,%esi + movl 12(%r15),%edx + movzbl %ah,%ebp + xorl 1(%r14,%rsi,8),%r12d + movl 0(%r15),%eax + xorl 1(%r14,%rbp,8),%r8d + + xorl %r10d,%eax + movl 4(%r15),%ebx + movl 8(%r15),%ecx + xorl %r12d,%ecx + xorl %r11d,%ebx + xorl %r8d,%edx + subl $1,%r13d + jnz .Ldec_loop + leaq 2048(%r14),%r14 + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movzbl (%r14,%rsi,1),%r10d + movzbl (%r14,%rdi,1),%r11d + movzbl (%r14,%rbp,1),%r12d + + movzbl %dl,%esi + movzbl %dh,%edi + movzbl %ah,%ebp + movzbl (%r14,%rsi,1),%r8d + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $8,%edi + shll $8,%ebp + + xorl %edi,%r10d + xorl %ebp,%r11d + shrl $16,%edx + + movzbl %bh,%esi + movzbl %ch,%edi + shrl $16,%eax + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + + shll $8,%esi + shll $8,%edi + shrl $16,%ebx + xorl %esi,%r12d + xorl %edi,%r8d + shrl $16,%ecx + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $16,%esi + shll $16,%edi + shll $16,%ebp + + xorl %esi,%r10d + xorl %edi,%r11d + xorl %ebp,%r12d + + movzbl %bl,%esi + movzbl %bh,%edi + movzbl %ch,%ebp + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $16,%esi + shll $24,%edi + shll $24,%ebp + + xorl %esi,%r8d + xorl %edi,%r10d + xorl %ebp,%r11d + + movzbl %dh,%esi + movzbl %ah,%edi + movl 16+12(%r15),%edx + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movl 16+0(%r15),%eax + + shll $24,%esi + shll $24,%edi + + xorl %esi,%r12d + xorl %edi,%r8d + + movl 16+4(%r15),%ebx + movl 16+8(%r15),%ecx + leaq -2048(%r14),%r14 + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + retq + +.def _x86_64_AES_decrypt_compact; .scl 3; .type 32; .endef +.p2align 4 +_x86_64_AES_decrypt_compact: + leaq 128(%r14),%r8 + movl 0-128(%r8),%edi + movl 32-128(%r8),%ebp + movl 64-128(%r8),%r10d + movl 96-128(%r8),%r11d + movl 128-128(%r8),%edi + movl 160-128(%r8),%ebp + movl 192-128(%r8),%r10d + movl 224-128(%r8),%r11d + jmp .Ldec_loop_compact + +.p2align 4 +.Ldec_loop_compact: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + leaq 16(%r15),%r15 + movzbl %al,%r10d + movzbl %bl,%r11d + movzbl %cl,%r12d + movzbl (%r14,%r10,1),%r10d + movzbl (%r14,%r11,1),%r11d + movzbl (%r14,%r12,1),%r12d + + movzbl %dl,%r8d + movzbl %dh,%esi + movzbl %ah,%edi + movzbl (%r14,%r8,1),%r8d + movzbl (%r14,%rsi,1),%r9d + movzbl (%r14,%rdi,1),%r13d + + movzbl %bh,%ebp + movzbl %ch,%esi + shrl $16,%ecx + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + shrl $16,%edx + + movzbl %cl,%edi + shll $8,%r9d + shll $8,%r13d + movzbl (%r14,%rdi,1),%edi + xorl %r9d,%r10d + xorl %r13d,%r11d + + movzbl %dl,%r9d + shrl $16,%eax + shrl $16,%ebx + movzbl %al,%r13d + shll $8,%ebp + shll $8,%esi + movzbl (%r14,%r9,1),%r9d + movzbl (%r14,%r13,1),%r13d + xorl %ebp,%r12d + xorl %esi,%r8d + + movzbl %bl,%ebp + movzbl %bh,%esi + shll $16,%edi + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + xorl %edi,%r10d + + movzbl %ch,%edi + shll $16,%r9d + shll $16,%r13d + movzbl (%r14,%rdi,1),%ebx + xorl %r9d,%r11d + xorl %r13d,%r12d + + movzbl %dh,%edi + shrl $8,%eax + shll $16,%ebp + movzbl (%r14,%rdi,1),%ecx + movzbl (%r14,%rax,1),%edx + xorl %ebp,%r8d + + shll $24,%esi + shll $24,%ebx + shll $24,%ecx + xorl %esi,%r10d + shll $24,%edx + xorl %r11d,%ebx + movl %r10d,%eax + xorl %r12d,%ecx + xorl %r8d,%edx + cmpq 16(%rsp),%r15 + je .Ldec_compact_done + + movq 256+0(%r14),%rsi + shlq $32,%rbx + shlq $32,%rdx + movq 256+8(%r14),%rdi + orq %rbx,%rax + orq %rdx,%rcx + movq 256+16(%r14),%rbp + movq %rax,%rbx + movq %rcx,%rdx + andq %rsi,%rbx + andq %rsi,%rdx + movq %rbx,%r9 + movq %rdx,%r12 + shrq $7,%r9 + leaq (%rax,%rax,1),%r8 + shrq $7,%r12 + leaq (%rcx,%rcx,1),%r11 + subq %r9,%rbx + subq %r12,%rdx + andq %rdi,%r8 + andq %rdi,%r11 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %r8,%rbx + xorq %r11,%rdx + movq %rbx,%r8 + movq %rdx,%r11 + + andq %rsi,%rbx + andq %rsi,%rdx + movq %rbx,%r10 + movq %rdx,%r13 + shrq $7,%r10 + leaq (%r8,%r8,1),%r9 + shrq $7,%r13 + leaq (%r11,%r11,1),%r12 + subq %r10,%rbx + subq %r13,%rdx + andq %rdi,%r9 + andq %rdi,%r12 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %r9,%rbx + xorq %r12,%rdx + movq %rbx,%r9 + movq %rdx,%r12 + + andq %rsi,%rbx + andq %rsi,%rdx + movq %rbx,%r10 + movq %rdx,%r13 + shrq $7,%r10 + xorq %rax,%r8 + shrq $7,%r13 + xorq %rcx,%r11 + subq %r10,%rbx + subq %r13,%rdx + leaq (%r9,%r9,1),%r10 + leaq (%r12,%r12,1),%r13 + xorq %rax,%r9 + xorq %rcx,%r12 + andq %rdi,%r10 + andq %rdi,%r13 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r10 + xorq %rdx,%r13 + + xorq %r10,%rax + xorq %r13,%rcx + xorq %r10,%r8 + xorq %r13,%r11 + movq %rax,%rbx + movq %rcx,%rdx + xorq %r10,%r9 + xorq %r13,%r12 + shrq $32,%rbx + shrq $32,%rdx + xorq %r8,%r10 + xorq %r11,%r13 + roll $8,%eax + roll $8,%ecx + xorq %r9,%r10 + xorq %r12,%r13 + + roll $8,%ebx + roll $8,%edx + xorl %r10d,%eax + xorl %r13d,%ecx + shrq $32,%r10 + shrq $32,%r13 + xorl %r10d,%ebx + xorl %r13d,%edx + + movq %r8,%r10 + movq %r11,%r13 + shrq $32,%r10 + shrq $32,%r13 + roll $24,%r8d + roll $24,%r11d + roll $24,%r10d + roll $24,%r13d + xorl %r8d,%eax + xorl %r11d,%ecx + movq %r9,%r8 + movq %r12,%r11 + xorl %r10d,%ebx + xorl %r13d,%edx + + movq 0(%r14),%rsi + shrq $32,%r8 + shrq $32,%r11 + movq 64(%r14),%rdi + roll $16,%r9d + roll $16,%r12d + movq 128(%r14),%rbp + roll $16,%r8d + roll $16,%r11d + movq 192(%r14),%r10 + xorl %r9d,%eax + xorl %r12d,%ecx + movq 256(%r14),%r13 + xorl %r8d,%ebx + xorl %r11d,%edx + jmp .Ldec_loop_compact +.p2align 4 +.Ldec_compact_done: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + retq + +.globl AES_decrypt +.def AES_decrypt; .scl 2; .type 32; .endef +.p2align 4 +.globl asm_AES_decrypt + +asm_AES_decrypt: +AES_decrypt: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_AES_decrypt: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + movq %rsp,%r10 + leaq -63(%rdx),%rcx + andq $-64,%rsp + subq %rsp,%rcx + negq %rcx + andq $960,%rcx + subq %rcx,%rsp + subq $32,%rsp + + movq %rsi,16(%rsp) + movq %r10,24(%rsp) +.Ldec_prologue: + + movq %rdx,%r15 + movl 240(%r15),%r13d + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + + shll $4,%r13d + leaq (%r15,%r13,1),%rbp + movq %r15,(%rsp) + movq %rbp,8(%rsp) + + + leaq .LAES_Td+2048(%rip),%r14 + leaq 768(%rsp),%rbp + subq %r14,%rbp + andq $768,%rbp + leaq (%r14,%rbp,1),%r14 + shrq $3,%rbp + addq %rbp,%r14 + + call _x86_64_AES_decrypt_compact + + movq 16(%rsp),%r9 + movq 24(%rsp),%rsi + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Ldec_epilogue: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + retq +.LSEH_end_AES_decrypt: +.globl AES_set_encrypt_key +.def AES_set_encrypt_key; .scl 2; .type 32; .endef +.p2align 4 +AES_set_encrypt_key: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_AES_set_encrypt_key: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $8,%rsp +.Lenc_key_prologue: + + call _x86_64_AES_set_encrypt_key + + movq 8(%rsp),%r15 + movq 16(%rsp),%r14 + movq 24(%rsp),%r13 + movq 32(%rsp),%r12 + movq 40(%rsp),%rbp + movq 48(%rsp),%rbx + addq $56,%rsp +.Lenc_key_epilogue: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + retq +.LSEH_end_AES_set_encrypt_key: + +.def _x86_64_AES_set_encrypt_key; .scl 3; .type 32; .endef +.p2align 4 +_x86_64_AES_set_encrypt_key: + movl %esi,%ecx + movq %rdi,%rsi + movq %rdx,%rdi + + testq $-1,%rsi + jz .Lbadpointer + testq $-1,%rdi + jz .Lbadpointer + + leaq .LAES_Te(%rip),%rbp + leaq 2048+128(%rbp),%rbp + + + movl 0-128(%rbp),%eax + movl 32-128(%rbp),%ebx + movl 64-128(%rbp),%r8d + movl 96-128(%rbp),%edx + movl 128-128(%rbp),%eax + movl 160-128(%rbp),%ebx + movl 192-128(%rbp),%r8d + movl 224-128(%rbp),%edx + + cmpl $128,%ecx + je .L10rounds + cmpl $192,%ecx + je .L12rounds + cmpl $256,%ecx + je .L14rounds + movq $-2,%rax + jmp .Lexit + +.L10rounds: + movq 0(%rsi),%rax + movq 8(%rsi),%rdx + movq %rax,0(%rdi) + movq %rdx,8(%rdi) + + shrq $32,%rdx + xorl %ecx,%ecx + jmp .L10shortcut +.p2align 2 +.L10loop: + movl 0(%rdi),%eax + movl 12(%rdi),%edx +.L10shortcut: + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + + xorl 1024-128(%rbp,%rcx,4),%eax + movl %eax,16(%rdi) + xorl 4(%rdi),%eax + movl %eax,20(%rdi) + xorl 8(%rdi),%eax + movl %eax,24(%rdi) + xorl 12(%rdi),%eax + movl %eax,28(%rdi) + addl $1,%ecx + leaq 16(%rdi),%rdi + cmpl $10,%ecx + jl .L10loop + + movl $10,80(%rdi) + xorq %rax,%rax + jmp .Lexit + +.L12rounds: + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 16(%rsi),%rdx + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rdx,16(%rdi) + + shrq $32,%rdx + xorl %ecx,%ecx + jmp .L12shortcut +.p2align 2 +.L12loop: + movl 0(%rdi),%eax + movl 20(%rdi),%edx +.L12shortcut: + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + + xorl 1024-128(%rbp,%rcx,4),%eax + movl %eax,24(%rdi) + xorl 4(%rdi),%eax + movl %eax,28(%rdi) + xorl 8(%rdi),%eax + movl %eax,32(%rdi) + xorl 12(%rdi),%eax + movl %eax,36(%rdi) + + cmpl $7,%ecx + je .L12break + addl $1,%ecx + + xorl 16(%rdi),%eax + movl %eax,40(%rdi) + xorl 20(%rdi),%eax + movl %eax,44(%rdi) + + leaq 24(%rdi),%rdi + jmp .L12loop +.L12break: + movl $12,72(%rdi) + xorq %rax,%rax + jmp .Lexit + +.L14rounds: + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 16(%rsi),%rcx + movq 24(%rsi),%rdx + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + + shrq $32,%rdx + xorl %ecx,%ecx + jmp .L14shortcut +.p2align 2 +.L14loop: + movl 0(%rdi),%eax + movl 28(%rdi),%edx +.L14shortcut: + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + + xorl 1024-128(%rbp,%rcx,4),%eax + movl %eax,32(%rdi) + xorl 4(%rdi),%eax + movl %eax,36(%rdi) + xorl 8(%rdi),%eax + movl %eax,40(%rdi) + xorl 12(%rdi),%eax + movl %eax,44(%rdi) + + cmpl $6,%ecx + je .L14break + addl $1,%ecx + + movl %eax,%edx + movl 16(%rdi),%eax + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + shll $8,%ebx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $16,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $24,%ebx + xorl %ebx,%eax + + movl %eax,48(%rdi) + xorl 20(%rdi),%eax + movl %eax,52(%rdi) + xorl 24(%rdi),%eax + movl %eax,56(%rdi) + xorl 28(%rdi),%eax + movl %eax,60(%rdi) + + leaq 32(%rdi),%rdi + jmp .L14loop +.L14break: + movl $14,48(%rdi) + xorq %rax,%rax + jmp .Lexit + +.Lbadpointer: + movq $-1,%rax +.Lexit: + retq + +.globl AES_set_decrypt_key +.def AES_set_decrypt_key; .scl 2; .type 32; .endef +.p2align 4 +AES_set_decrypt_key: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_AES_set_decrypt_key: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rdx +.Ldec_key_prologue: + + call _x86_64_AES_set_encrypt_key + movq (%rsp),%r8 + cmpl $0,%eax + jne .Labort + + movl 240(%r8),%r14d + xorq %rdi,%rdi + leaq (%rdi,%r14,4),%rcx + movq %r8,%rsi + leaq (%r8,%rcx,4),%rdi +.p2align 2 +.Linvert: + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 0(%rdi),%rcx + movq 8(%rdi),%rdx + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,0(%rsi) + movq %rdx,8(%rsi) + leaq 16(%rsi),%rsi + leaq -16(%rdi),%rdi + cmpq %rsi,%rdi + jne .Linvert + + leaq .LAES_Te+2048+1024(%rip),%rax + + movq 40(%rax),%rsi + movq 48(%rax),%rdi + movq 56(%rax),%rbp + + movq %r8,%r15 + subl $1,%r14d +.p2align 2 +.Lpermute: + leaq 16(%r15),%r15 + movq 0(%r15),%rax + movq 8(%r15),%rcx + movq %rax,%rbx + movq %rcx,%rdx + andq %rsi,%rbx + andq %rsi,%rdx + movq %rbx,%r9 + movq %rdx,%r12 + shrq $7,%r9 + leaq (%rax,%rax,1),%r8 + shrq $7,%r12 + leaq (%rcx,%rcx,1),%r11 + subq %r9,%rbx + subq %r12,%rdx + andq %rdi,%r8 + andq %rdi,%r11 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %r8,%rbx + xorq %r11,%rdx + movq %rbx,%r8 + movq %rdx,%r11 + + andq %rsi,%rbx + andq %rsi,%rdx + movq %rbx,%r10 + movq %rdx,%r13 + shrq $7,%r10 + leaq (%r8,%r8,1),%r9 + shrq $7,%r13 + leaq (%r11,%r11,1),%r12 + subq %r10,%rbx + subq %r13,%rdx + andq %rdi,%r9 + andq %rdi,%r12 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %r9,%rbx + xorq %r12,%rdx + movq %rbx,%r9 + movq %rdx,%r12 + + andq %rsi,%rbx + andq %rsi,%rdx + movq %rbx,%r10 + movq %rdx,%r13 + shrq $7,%r10 + xorq %rax,%r8 + shrq $7,%r13 + xorq %rcx,%r11 + subq %r10,%rbx + subq %r13,%rdx + leaq (%r9,%r9,1),%r10 + leaq (%r12,%r12,1),%r13 + xorq %rax,%r9 + xorq %rcx,%r12 + andq %rdi,%r10 + andq %rdi,%r13 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r10 + xorq %rdx,%r13 + + xorq %r10,%rax + xorq %r13,%rcx + xorq %r10,%r8 + xorq %r13,%r11 + movq %rax,%rbx + movq %rcx,%rdx + xorq %r10,%r9 + xorq %r13,%r12 + shrq $32,%rbx + shrq $32,%rdx + xorq %r8,%r10 + xorq %r11,%r13 + roll $8,%eax + roll $8,%ecx + xorq %r9,%r10 + xorq %r12,%r13 + + roll $8,%ebx + roll $8,%edx + xorl %r10d,%eax + xorl %r13d,%ecx + shrq $32,%r10 + shrq $32,%r13 + xorl %r10d,%ebx + xorl %r13d,%edx + + movq %r8,%r10 + movq %r11,%r13 + shrq $32,%r10 + shrq $32,%r13 + roll $24,%r8d + roll $24,%r11d + roll $24,%r10d + roll $24,%r13d + xorl %r8d,%eax + xorl %r11d,%ecx + movq %r9,%r8 + movq %r12,%r11 + xorl %r10d,%ebx + xorl %r13d,%edx + + + shrq $32,%r8 + shrq $32,%r11 + + roll $16,%r9d + roll $16,%r12d + + roll $16,%r8d + roll $16,%r11d + + xorl %r9d,%eax + xorl %r12d,%ecx + + xorl %r8d,%ebx + xorl %r11d,%edx + movl %eax,0(%r15) + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + subl $1,%r14d + jnz .Lpermute + + xorq %rax,%rax +.Labort: + movq 8(%rsp),%r15 + movq 16(%rsp),%r14 + movq 24(%rsp),%r13 + movq 32(%rsp),%r12 + movq 40(%rsp),%rbp + movq 48(%rsp),%rbx + addq $56,%rsp +.Ldec_key_epilogue: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + retq +.LSEH_end_AES_set_decrypt_key: +.globl AES_cbc_encrypt +.def AES_cbc_encrypt; .scl 2; .type 32; .endef +.p2align 4 + + +.globl asm_AES_cbc_encrypt + +asm_AES_cbc_encrypt: +AES_cbc_encrypt: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_AES_cbc_encrypt: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + movq 40(%rsp),%r8 + movq 48(%rsp),%r9 + + cmpq $0,%rdx + je .Lcbc_epilogue + pushfq + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 +.Lcbc_prologue: + + cld + movl %r9d,%r9d + + leaq .LAES_Te(%rip),%r14 + cmpq $0,%r9 + jne .Lcbc_picked_te + leaq .LAES_Td(%rip),%r14 +.Lcbc_picked_te: + + movl OPENSSL_ia32cap_P(%rip),%r10d + cmpq $512,%rdx + jb .Lcbc_slow_prologue + testq $15,%rdx + jnz .Lcbc_slow_prologue + btl $IA32CAP_BIT0_HT,%r10d + jc .Lcbc_slow_prologue + + + leaq -88-248(%rsp),%r15 + andq $-64,%r15 + + + movq %r14,%r10 + leaq 2304(%r14),%r11 + movq %r15,%r12 + andq $4095,%r10 + andq $4095,%r11 + andq $4095,%r12 + + cmpq %r11,%r12 + jb .Lcbc_te_break_out + subq %r11,%r12 + subq %r12,%r15 + jmp .Lcbc_te_ok +.Lcbc_te_break_out: + subq %r10,%r12 + andq $4095,%r12 + addq $320,%r12 + subq %r12,%r15 +.p2align 2 +.Lcbc_te_ok: + + xchgq %rsp,%r15 + + movq %r15,16(%rsp) +.Lcbc_fast_body: + movq %rdi,24(%rsp) + movq %rsi,32(%rsp) + movq %rdx,40(%rsp) + movq %rcx,48(%rsp) + movq %r8,56(%rsp) + movl $0,80+240(%rsp) + movq %r8,%rbp + movq %r9,%rbx + movq %rsi,%r9 + movq %rdi,%r8 + movq %rcx,%r15 + + movl 240(%r15),%eax + + movq %r15,%r10 + subq %r14,%r10 + andq $4095,%r10 + cmpq $2304,%r10 + jb .Lcbc_do_ecopy + cmpq $4096-248,%r10 + jb .Lcbc_skip_ecopy +.p2align 2 +.Lcbc_do_ecopy: + movq %r15,%rsi + leaq 80(%rsp),%rdi + leaq 80(%rsp),%r15 + movl $30,%ecx +.long 0x90A548F3 + movl %eax,(%rdi) +.Lcbc_skip_ecopy: + movq %r15,0(%rsp) + + movl $18,%ecx +.p2align 2 +.Lcbc_prefetch_te: + movq 0(%r14),%r10 + movq 32(%r14),%r11 + movq 64(%r14),%r12 + movq 96(%r14),%r13 + leaq 128(%r14),%r14 + subl $1,%ecx + jnz .Lcbc_prefetch_te + leaq -2304(%r14),%r14 + + cmpq $0,%rbx + je .LFAST_DECRYPT + + + movl 0(%rbp),%eax + movl 4(%rbp),%ebx + movl 8(%rbp),%ecx + movl 12(%rbp),%edx + +.p2align 2 +.Lcbc_fast_enc_loop: + xorl 0(%r8),%eax + xorl 4(%r8),%ebx + xorl 8(%r8),%ecx + xorl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + + call _x86_64_AES_encrypt + + movq 24(%rsp),%r8 + movq 40(%rsp),%r10 + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + subq $16,%r10 + testq $-16,%r10 + movq %r10,40(%rsp) + jnz .Lcbc_fast_enc_loop + movq 56(%rsp),%rbp + movl %eax,0(%rbp) + movl %ebx,4(%rbp) + movl %ecx,8(%rbp) + movl %edx,12(%rbp) + + jmp .Lcbc_fast_cleanup + + +.p2align 4 +.LFAST_DECRYPT: + cmpq %r8,%r9 + je .Lcbc_fast_dec_in_place + + movq %rbp,64(%rsp) +.p2align 2 +.Lcbc_fast_dec_loop: + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + + call _x86_64_AES_decrypt + + movq 64(%rsp),%rbp + movq 24(%rsp),%r8 + movq 40(%rsp),%r10 + xorl 0(%rbp),%eax + xorl 4(%rbp),%ebx + xorl 8(%rbp),%ecx + xorl 12(%rbp),%edx + movq %r8,%rbp + + subq $16,%r10 + movq %r10,40(%rsp) + movq %rbp,64(%rsp) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + jnz .Lcbc_fast_dec_loop + movq 56(%rsp),%r12 + movq 0(%rbp),%r10 + movq 8(%rbp),%r11 + movq %r10,0(%r12) + movq %r11,8(%r12) + jmp .Lcbc_fast_cleanup + +.p2align 4 +.Lcbc_fast_dec_in_place: + movq 0(%rbp),%r10 + movq 8(%rbp),%r11 + movq %r10,0+64(%rsp) + movq %r11,8+64(%rsp) +.p2align 2 +.Lcbc_fast_dec_in_place_loop: + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + + call _x86_64_AES_decrypt + + movq 24(%rsp),%r8 + movq 40(%rsp),%r10 + xorl 0+64(%rsp),%eax + xorl 4+64(%rsp),%ebx + xorl 8+64(%rsp),%ecx + xorl 12+64(%rsp),%edx + + movq 0(%r8),%r11 + movq 8(%r8),%r12 + subq $16,%r10 + jz .Lcbc_fast_dec_in_place_done + + movq %r11,0+64(%rsp) + movq %r12,8+64(%rsp) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + movq %r10,40(%rsp) + jmp .Lcbc_fast_dec_in_place_loop +.Lcbc_fast_dec_in_place_done: + movq 56(%rsp),%rdi + movq %r11,0(%rdi) + movq %r12,8(%rdi) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + +.p2align 2 +.Lcbc_fast_cleanup: + cmpl $0,80+240(%rsp) + leaq 80(%rsp),%rdi + je .Lcbc_exit + movl $30,%ecx + xorq %rax,%rax +.long 0x90AB48F3 + + jmp .Lcbc_exit + + +.p2align 4 +.Lcbc_slow_prologue: + + leaq -88(%rsp),%rbp + andq $-64,%rbp + + leaq -88-63(%rcx),%r10 + subq %rbp,%r10 + negq %r10 + andq $960,%r10 + subq %r10,%rbp + + xchgq %rsp,%rbp + + movq %rbp,16(%rsp) +.Lcbc_slow_body: + + + + + movq %r8,56(%rsp) + movq %r8,%rbp + movq %r9,%rbx + movq %rsi,%r9 + movq %rdi,%r8 + movq %rcx,%r15 + movq %rdx,%r10 + + movl 240(%r15),%eax + movq %r15,0(%rsp) + shll $4,%eax + leaq (%r15,%rax,1),%rax + movq %rax,8(%rsp) + + + leaq 2048(%r14),%r14 + leaq 768-8(%rsp),%rax + subq %r14,%rax + andq $768,%rax + leaq (%r14,%rax,1),%r14 + + cmpq $0,%rbx + je .LSLOW_DECRYPT + + + testq $-16,%r10 + movl 0(%rbp),%eax + movl 4(%rbp),%ebx + movl 8(%rbp),%ecx + movl 12(%rbp),%edx + jz .Lcbc_slow_enc_tail + +.p2align 2 +.Lcbc_slow_enc_loop: + xorl 0(%r8),%eax + xorl 4(%r8),%ebx + xorl 8(%r8),%ecx + xorl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + movq %r9,32(%rsp) + movq %r10,40(%rsp) + + call _x86_64_AES_encrypt_compact + + movq 24(%rsp),%r8 + movq 32(%rsp),%r9 + movq 40(%rsp),%r10 + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + subq $16,%r10 + testq $-16,%r10 + jnz .Lcbc_slow_enc_loop + testq $15,%r10 + jnz .Lcbc_slow_enc_tail + movq 56(%rsp),%rbp + movl %eax,0(%rbp) + movl %ebx,4(%rbp) + movl %ecx,8(%rbp) + movl %edx,12(%rbp) + + jmp .Lcbc_exit + +.p2align 2 +.Lcbc_slow_enc_tail: + movq %rax,%r11 + movq %rcx,%r12 + movq %r10,%rcx + movq %r8,%rsi + movq %r9,%rdi +.long 0x9066A4F3 + movq $16,%rcx + subq %r10,%rcx + xorq %rax,%rax +.long 0x9066AAF3 + movq %r9,%r8 + movq $16,%r10 + movq %r11,%rax + movq %r12,%rcx + jmp .Lcbc_slow_enc_loop + +.p2align 4 +.LSLOW_DECRYPT: + shrq $3,%rax + addq %rax,%r14 + + movq 0(%rbp),%r11 + movq 8(%rbp),%r12 + movq %r11,0+64(%rsp) + movq %r12,8+64(%rsp) + +.p2align 2 +.Lcbc_slow_dec_loop: + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + movq %r9,32(%rsp) + movq %r10,40(%rsp) + + call _x86_64_AES_decrypt_compact + + movq 24(%rsp),%r8 + movq 32(%rsp),%r9 + movq 40(%rsp),%r10 + xorl 0+64(%rsp),%eax + xorl 4+64(%rsp),%ebx + xorl 8+64(%rsp),%ecx + xorl 12+64(%rsp),%edx + + movq 0(%r8),%r11 + movq 8(%r8),%r12 + subq $16,%r10 + jc .Lcbc_slow_dec_partial + jz .Lcbc_slow_dec_done + + movq %r11,0+64(%rsp) + movq %r12,8+64(%rsp) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + jmp .Lcbc_slow_dec_loop +.Lcbc_slow_dec_done: + movq 56(%rsp),%rdi + movq %r11,0(%rdi) + movq %r12,8(%rdi) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + jmp .Lcbc_exit + +.p2align 2 +.Lcbc_slow_dec_partial: + movq 56(%rsp),%rdi + movq %r11,0(%rdi) + movq %r12,8(%rdi) + + movl %eax,0+64(%rsp) + movl %ebx,4+64(%rsp) + movl %ecx,8+64(%rsp) + movl %edx,12+64(%rsp) + + movq %r9,%rdi + leaq 64(%rsp),%rsi + leaq 16(%r10),%rcx +.long 0x9066A4F3 + jmp .Lcbc_exit + +.p2align 4 +.Lcbc_exit: + movq 16(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lcbc_popfq: + popfq +.Lcbc_epilogue: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + retq +.LSEH_end_AES_cbc_encrypt: +.p2align 6 +.LAES_Te: +.long 0xa56363c6,0xa56363c6 +.long 0x847c7cf8,0x847c7cf8 +.long 0x997777ee,0x997777ee +.long 0x8d7b7bf6,0x8d7b7bf6 +.long 0x0df2f2ff,0x0df2f2ff +.long 0xbd6b6bd6,0xbd6b6bd6 +.long 0xb16f6fde,0xb16f6fde +.long 0x54c5c591,0x54c5c591 +.long 0x50303060,0x50303060 +.long 0x03010102,0x03010102 +.long 0xa96767ce,0xa96767ce +.long 0x7d2b2b56,0x7d2b2b56 +.long 0x19fefee7,0x19fefee7 +.long 0x62d7d7b5,0x62d7d7b5 +.long 0xe6abab4d,0xe6abab4d +.long 0x9a7676ec,0x9a7676ec +.long 0x45caca8f,0x45caca8f +.long 0x9d82821f,0x9d82821f +.long 0x40c9c989,0x40c9c989 +.long 0x877d7dfa,0x877d7dfa +.long 0x15fafaef,0x15fafaef +.long 0xeb5959b2,0xeb5959b2 +.long 0xc947478e,0xc947478e +.long 0x0bf0f0fb,0x0bf0f0fb +.long 0xecadad41,0xecadad41 +.long 0x67d4d4b3,0x67d4d4b3 +.long 0xfda2a25f,0xfda2a25f +.long 0xeaafaf45,0xeaafaf45 +.long 0xbf9c9c23,0xbf9c9c23 +.long 0xf7a4a453,0xf7a4a453 +.long 0x967272e4,0x967272e4 +.long 0x5bc0c09b,0x5bc0c09b +.long 0xc2b7b775,0xc2b7b775 +.long 0x1cfdfde1,0x1cfdfde1 +.long 0xae93933d,0xae93933d +.long 0x6a26264c,0x6a26264c +.long 0x5a36366c,0x5a36366c +.long 0x413f3f7e,0x413f3f7e +.long 0x02f7f7f5,0x02f7f7f5 +.long 0x4fcccc83,0x4fcccc83 +.long 0x5c343468,0x5c343468 +.long 0xf4a5a551,0xf4a5a551 +.long 0x34e5e5d1,0x34e5e5d1 +.long 0x08f1f1f9,0x08f1f1f9 +.long 0x937171e2,0x937171e2 +.long 0x73d8d8ab,0x73d8d8ab +.long 0x53313162,0x53313162 +.long 0x3f15152a,0x3f15152a +.long 0x0c040408,0x0c040408 +.long 0x52c7c795,0x52c7c795 +.long 0x65232346,0x65232346 +.long 0x5ec3c39d,0x5ec3c39d +.long 0x28181830,0x28181830 +.long 0xa1969637,0xa1969637 +.long 0x0f05050a,0x0f05050a +.long 0xb59a9a2f,0xb59a9a2f +.long 0x0907070e,0x0907070e +.long 0x36121224,0x36121224 +.long 0x9b80801b,0x9b80801b +.long 0x3de2e2df,0x3de2e2df +.long 0x26ebebcd,0x26ebebcd +.long 0x6927274e,0x6927274e +.long 0xcdb2b27f,0xcdb2b27f +.long 0x9f7575ea,0x9f7575ea +.long 0x1b090912,0x1b090912 +.long 0x9e83831d,0x9e83831d +.long 0x742c2c58,0x742c2c58 +.long 0x2e1a1a34,0x2e1a1a34 +.long 0x2d1b1b36,0x2d1b1b36 +.long 0xb26e6edc,0xb26e6edc +.long 0xee5a5ab4,0xee5a5ab4 +.long 0xfba0a05b,0xfba0a05b +.long 0xf65252a4,0xf65252a4 +.long 0x4d3b3b76,0x4d3b3b76 +.long 0x61d6d6b7,0x61d6d6b7 +.long 0xceb3b37d,0xceb3b37d +.long 0x7b292952,0x7b292952 +.long 0x3ee3e3dd,0x3ee3e3dd +.long 0x712f2f5e,0x712f2f5e +.long 0x97848413,0x97848413 +.long 0xf55353a6,0xf55353a6 +.long 0x68d1d1b9,0x68d1d1b9 +.long 0x00000000,0x00000000 +.long 0x2cededc1,0x2cededc1 +.long 0x60202040,0x60202040 +.long 0x1ffcfce3,0x1ffcfce3 +.long 0xc8b1b179,0xc8b1b179 +.long 0xed5b5bb6,0xed5b5bb6 +.long 0xbe6a6ad4,0xbe6a6ad4 +.long 0x46cbcb8d,0x46cbcb8d +.long 0xd9bebe67,0xd9bebe67 +.long 0x4b393972,0x4b393972 +.long 0xde4a4a94,0xde4a4a94 +.long 0xd44c4c98,0xd44c4c98 +.long 0xe85858b0,0xe85858b0 +.long 0x4acfcf85,0x4acfcf85 +.long 0x6bd0d0bb,0x6bd0d0bb +.long 0x2aefefc5,0x2aefefc5 +.long 0xe5aaaa4f,0xe5aaaa4f +.long 0x16fbfbed,0x16fbfbed +.long 0xc5434386,0xc5434386 +.long 0xd74d4d9a,0xd74d4d9a +.long 0x55333366,0x55333366 +.long 0x94858511,0x94858511 +.long 0xcf45458a,0xcf45458a +.long 0x10f9f9e9,0x10f9f9e9 +.long 0x06020204,0x06020204 +.long 0x817f7ffe,0x817f7ffe +.long 0xf05050a0,0xf05050a0 +.long 0x443c3c78,0x443c3c78 +.long 0xba9f9f25,0xba9f9f25 +.long 0xe3a8a84b,0xe3a8a84b +.long 0xf35151a2,0xf35151a2 +.long 0xfea3a35d,0xfea3a35d +.long 0xc0404080,0xc0404080 +.long 0x8a8f8f05,0x8a8f8f05 +.long 0xad92923f,0xad92923f +.long 0xbc9d9d21,0xbc9d9d21 +.long 0x48383870,0x48383870 +.long 0x04f5f5f1,0x04f5f5f1 +.long 0xdfbcbc63,0xdfbcbc63 +.long 0xc1b6b677,0xc1b6b677 +.long 0x75dadaaf,0x75dadaaf +.long 0x63212142,0x63212142 +.long 0x30101020,0x30101020 +.long 0x1affffe5,0x1affffe5 +.long 0x0ef3f3fd,0x0ef3f3fd +.long 0x6dd2d2bf,0x6dd2d2bf +.long 0x4ccdcd81,0x4ccdcd81 +.long 0x140c0c18,0x140c0c18 +.long 0x35131326,0x35131326 +.long 0x2fececc3,0x2fececc3 +.long 0xe15f5fbe,0xe15f5fbe +.long 0xa2979735,0xa2979735 +.long 0xcc444488,0xcc444488 +.long 0x3917172e,0x3917172e +.long 0x57c4c493,0x57c4c493 +.long 0xf2a7a755,0xf2a7a755 +.long 0x827e7efc,0x827e7efc +.long 0x473d3d7a,0x473d3d7a +.long 0xac6464c8,0xac6464c8 +.long 0xe75d5dba,0xe75d5dba +.long 0x2b191932,0x2b191932 +.long 0x957373e6,0x957373e6 +.long 0xa06060c0,0xa06060c0 +.long 0x98818119,0x98818119 +.long 0xd14f4f9e,0xd14f4f9e +.long 0x7fdcdca3,0x7fdcdca3 +.long 0x66222244,0x66222244 +.long 0x7e2a2a54,0x7e2a2a54 +.long 0xab90903b,0xab90903b +.long 0x8388880b,0x8388880b +.long 0xca46468c,0xca46468c +.long 0x29eeeec7,0x29eeeec7 +.long 0xd3b8b86b,0xd3b8b86b +.long 0x3c141428,0x3c141428 +.long 0x79dedea7,0x79dedea7 +.long 0xe25e5ebc,0xe25e5ebc +.long 0x1d0b0b16,0x1d0b0b16 +.long 0x76dbdbad,0x76dbdbad +.long 0x3be0e0db,0x3be0e0db +.long 0x56323264,0x56323264 +.long 0x4e3a3a74,0x4e3a3a74 +.long 0x1e0a0a14,0x1e0a0a14 +.long 0xdb494992,0xdb494992 +.long 0x0a06060c,0x0a06060c +.long 0x6c242448,0x6c242448 +.long 0xe45c5cb8,0xe45c5cb8 +.long 0x5dc2c29f,0x5dc2c29f +.long 0x6ed3d3bd,0x6ed3d3bd +.long 0xefacac43,0xefacac43 +.long 0xa66262c4,0xa66262c4 +.long 0xa8919139,0xa8919139 +.long 0xa4959531,0xa4959531 +.long 0x37e4e4d3,0x37e4e4d3 +.long 0x8b7979f2,0x8b7979f2 +.long 0x32e7e7d5,0x32e7e7d5 +.long 0x43c8c88b,0x43c8c88b +.long 0x5937376e,0x5937376e +.long 0xb76d6dda,0xb76d6dda +.long 0x8c8d8d01,0x8c8d8d01 +.long 0x64d5d5b1,0x64d5d5b1 +.long 0xd24e4e9c,0xd24e4e9c +.long 0xe0a9a949,0xe0a9a949 +.long 0xb46c6cd8,0xb46c6cd8 +.long 0xfa5656ac,0xfa5656ac +.long 0x07f4f4f3,0x07f4f4f3 +.long 0x25eaeacf,0x25eaeacf +.long 0xaf6565ca,0xaf6565ca +.long 0x8e7a7af4,0x8e7a7af4 +.long 0xe9aeae47,0xe9aeae47 +.long 0x18080810,0x18080810 +.long 0xd5baba6f,0xd5baba6f +.long 0x887878f0,0x887878f0 +.long 0x6f25254a,0x6f25254a +.long 0x722e2e5c,0x722e2e5c +.long 0x241c1c38,0x241c1c38 +.long 0xf1a6a657,0xf1a6a657 +.long 0xc7b4b473,0xc7b4b473 +.long 0x51c6c697,0x51c6c697 +.long 0x23e8e8cb,0x23e8e8cb +.long 0x7cdddda1,0x7cdddda1 +.long 0x9c7474e8,0x9c7474e8 +.long 0x211f1f3e,0x211f1f3e +.long 0xdd4b4b96,0xdd4b4b96 +.long 0xdcbdbd61,0xdcbdbd61 +.long 0x868b8b0d,0x868b8b0d +.long 0x858a8a0f,0x858a8a0f +.long 0x907070e0,0x907070e0 +.long 0x423e3e7c,0x423e3e7c +.long 0xc4b5b571,0xc4b5b571 +.long 0xaa6666cc,0xaa6666cc +.long 0xd8484890,0xd8484890 +.long 0x05030306,0x05030306 +.long 0x01f6f6f7,0x01f6f6f7 +.long 0x120e0e1c,0x120e0e1c +.long 0xa36161c2,0xa36161c2 +.long 0x5f35356a,0x5f35356a +.long 0xf95757ae,0xf95757ae +.long 0xd0b9b969,0xd0b9b969 +.long 0x91868617,0x91868617 +.long 0x58c1c199,0x58c1c199 +.long 0x271d1d3a,0x271d1d3a +.long 0xb99e9e27,0xb99e9e27 +.long 0x38e1e1d9,0x38e1e1d9 +.long 0x13f8f8eb,0x13f8f8eb +.long 0xb398982b,0xb398982b +.long 0x33111122,0x33111122 +.long 0xbb6969d2,0xbb6969d2 +.long 0x70d9d9a9,0x70d9d9a9 +.long 0x898e8e07,0x898e8e07 +.long 0xa7949433,0xa7949433 +.long 0xb69b9b2d,0xb69b9b2d +.long 0x221e1e3c,0x221e1e3c +.long 0x92878715,0x92878715 +.long 0x20e9e9c9,0x20e9e9c9 +.long 0x49cece87,0x49cece87 +.long 0xff5555aa,0xff5555aa +.long 0x78282850,0x78282850 +.long 0x7adfdfa5,0x7adfdfa5 +.long 0x8f8c8c03,0x8f8c8c03 +.long 0xf8a1a159,0xf8a1a159 +.long 0x80898909,0x80898909 +.long 0x170d0d1a,0x170d0d1a +.long 0xdabfbf65,0xdabfbf65 +.long 0x31e6e6d7,0x31e6e6d7 +.long 0xc6424284,0xc6424284 +.long 0xb86868d0,0xb86868d0 +.long 0xc3414182,0xc3414182 +.long 0xb0999929,0xb0999929 +.long 0x772d2d5a,0x772d2d5a +.long 0x110f0f1e,0x110f0f1e +.long 0xcbb0b07b,0xcbb0b07b +.long 0xfc5454a8,0xfc5454a8 +.long 0xd6bbbb6d,0xd6bbbb6d +.long 0x3a16162c,0x3a16162c +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.long 0x00000001, 0x00000002, 0x00000004, 0x00000008 +.long 0x00000010, 0x00000020, 0x00000040, 0x00000080 +.long 0x0000001b, 0x00000036, 0x80808080, 0x80808080 +.long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b +.p2align 6 +.LAES_Td: +.long 0x50a7f451,0x50a7f451 +.long 0x5365417e,0x5365417e +.long 0xc3a4171a,0xc3a4171a +.long 0x965e273a,0x965e273a +.long 0xcb6bab3b,0xcb6bab3b +.long 0xf1459d1f,0xf1459d1f +.long 0xab58faac,0xab58faac +.long 0x9303e34b,0x9303e34b +.long 0x55fa3020,0x55fa3020 +.long 0xf66d76ad,0xf66d76ad +.long 0x9176cc88,0x9176cc88 +.long 0x254c02f5,0x254c02f5 +.long 0xfcd7e54f,0xfcd7e54f +.long 0xd7cb2ac5,0xd7cb2ac5 +.long 0x80443526,0x80443526 +.long 0x8fa362b5,0x8fa362b5 +.long 0x495ab1de,0x495ab1de +.long 0x671bba25,0x671bba25 +.long 0x980eea45,0x980eea45 +.long 0xe1c0fe5d,0xe1c0fe5d +.long 0x02752fc3,0x02752fc3 +.long 0x12f04c81,0x12f04c81 +.long 0xa397468d,0xa397468d +.long 0xc6f9d36b,0xc6f9d36b +.long 0xe75f8f03,0xe75f8f03 +.long 0x959c9215,0x959c9215 +.long 0xeb7a6dbf,0xeb7a6dbf +.long 0xda595295,0xda595295 +.long 0x2d83bed4,0x2d83bed4 +.long 0xd3217458,0xd3217458 +.long 0x2969e049,0x2969e049 +.long 0x44c8c98e,0x44c8c98e +.long 0x6a89c275,0x6a89c275 +.long 0x78798ef4,0x78798ef4 +.long 0x6b3e5899,0x6b3e5899 +.long 0xdd71b927,0xdd71b927 +.long 0xb64fe1be,0xb64fe1be +.long 0x17ad88f0,0x17ad88f0 +.long 0x66ac20c9,0x66ac20c9 +.long 0xb43ace7d,0xb43ace7d +.long 0x184adf63,0x184adf63 +.long 0x82311ae5,0x82311ae5 +.long 0x60335197,0x60335197 +.long 0x457f5362,0x457f5362 +.long 0xe07764b1,0xe07764b1 +.long 0x84ae6bbb,0x84ae6bbb +.long 0x1ca081fe,0x1ca081fe +.long 0x942b08f9,0x942b08f9 +.long 0x58684870,0x58684870 +.long 0x19fd458f,0x19fd458f +.long 0x876cde94,0x876cde94 +.long 0xb7f87b52,0xb7f87b52 +.long 0x23d373ab,0x23d373ab +.long 0xe2024b72,0xe2024b72 +.long 0x578f1fe3,0x578f1fe3 +.long 0x2aab5566,0x2aab5566 +.long 0x0728ebb2,0x0728ebb2 +.long 0x03c2b52f,0x03c2b52f +.long 0x9a7bc586,0x9a7bc586 +.long 0xa50837d3,0xa50837d3 +.long 0xf2872830,0xf2872830 +.long 0xb2a5bf23,0xb2a5bf23 +.long 0xba6a0302,0xba6a0302 +.long 0x5c8216ed,0x5c8216ed +.long 0x2b1ccf8a,0x2b1ccf8a +.long 0x92b479a7,0x92b479a7 +.long 0xf0f207f3,0xf0f207f3 +.long 0xa1e2694e,0xa1e2694e +.long 0xcdf4da65,0xcdf4da65 +.long 0xd5be0506,0xd5be0506 +.long 0x1f6234d1,0x1f6234d1 +.long 0x8afea6c4,0x8afea6c4 +.long 0x9d532e34,0x9d532e34 +.long 0xa055f3a2,0xa055f3a2 +.long 0x32e18a05,0x32e18a05 +.long 0x75ebf6a4,0x75ebf6a4 +.long 0x39ec830b,0x39ec830b +.long 0xaaef6040,0xaaef6040 +.long 0x069f715e,0x069f715e +.long 0x51106ebd,0x51106ebd +.long 0xf98a213e,0xf98a213e +.long 0x3d06dd96,0x3d06dd96 +.long 0xae053edd,0xae053edd +.long 0x46bde64d,0x46bde64d +.long 0xb58d5491,0xb58d5491 +.long 0x055dc471,0x055dc471 +.long 0x6fd40604,0x6fd40604 +.long 0xff155060,0xff155060 +.long 0x24fb9819,0x24fb9819 +.long 0x97e9bdd6,0x97e9bdd6 +.long 0xcc434089,0xcc434089 +.long 0x779ed967,0x779ed967 +.long 0xbd42e8b0,0xbd42e8b0 +.long 0x888b8907,0x888b8907 +.long 0x385b19e7,0x385b19e7 +.long 0xdbeec879,0xdbeec879 +.long 0x470a7ca1,0x470a7ca1 +.long 0xe90f427c,0xe90f427c +.long 0xc91e84f8,0xc91e84f8 +.long 0x00000000,0x00000000 +.long 0x83868009,0x83868009 +.long 0x48ed2b32,0x48ed2b32 +.long 0xac70111e,0xac70111e +.long 0x4e725a6c,0x4e725a6c +.long 0xfbff0efd,0xfbff0efd +.long 0x5638850f,0x5638850f +.long 0x1ed5ae3d,0x1ed5ae3d +.long 0x27392d36,0x27392d36 +.long 0x64d90f0a,0x64d90f0a +.long 0x21a65c68,0x21a65c68 +.long 0xd1545b9b,0xd1545b9b +.long 0x3a2e3624,0x3a2e3624 +.long 0xb1670a0c,0xb1670a0c +.long 0x0fe75793,0x0fe75793 +.long 0xd296eeb4,0xd296eeb4 +.long 0x9e919b1b,0x9e919b1b +.long 0x4fc5c080,0x4fc5c080 +.long 0xa220dc61,0xa220dc61 +.long 0x694b775a,0x694b775a +.long 0x161a121c,0x161a121c +.long 0x0aba93e2,0x0aba93e2 +.long 0xe52aa0c0,0xe52aa0c0 +.long 0x43e0223c,0x43e0223c +.long 0x1d171b12,0x1d171b12 +.long 0x0b0d090e,0x0b0d090e +.long 0xadc78bf2,0xadc78bf2 +.long 0xb9a8b62d,0xb9a8b62d +.long 0xc8a91e14,0xc8a91e14 +.long 0x8519f157,0x8519f157 +.long 0x4c0775af,0x4c0775af +.long 0xbbdd99ee,0xbbdd99ee +.long 0xfd607fa3,0xfd607fa3 +.long 0x9f2601f7,0x9f2601f7 +.long 0xbcf5725c,0xbcf5725c +.long 0xc53b6644,0xc53b6644 +.long 0x347efb5b,0x347efb5b +.long 0x7629438b,0x7629438b +.long 0xdcc623cb,0xdcc623cb +.long 0x68fcedb6,0x68fcedb6 +.long 0x63f1e4b8,0x63f1e4b8 +.long 0xcadc31d7,0xcadc31d7 +.long 0x10856342,0x10856342 +.long 0x40229713,0x40229713 +.long 0x2011c684,0x2011c684 +.long 0x7d244a85,0x7d244a85 +.long 0xf83dbbd2,0xf83dbbd2 +.long 0x1132f9ae,0x1132f9ae +.long 0x6da129c7,0x6da129c7 +.long 0x4b2f9e1d,0x4b2f9e1d +.long 0xf330b2dc,0xf330b2dc +.long 0xec52860d,0xec52860d +.long 0xd0e3c177,0xd0e3c177 +.long 0x6c16b32b,0x6c16b32b +.long 0x99b970a9,0x99b970a9 +.long 0xfa489411,0xfa489411 +.long 0x2264e947,0x2264e947 +.long 0xc48cfca8,0xc48cfca8 +.long 0x1a3ff0a0,0x1a3ff0a0 +.long 0xd82c7d56,0xd82c7d56 +.long 0xef903322,0xef903322 +.long 0xc74e4987,0xc74e4987 +.long 0xc1d138d9,0xc1d138d9 +.long 0xfea2ca8c,0xfea2ca8c +.long 0x360bd498,0x360bd498 +.long 0xcf81f5a6,0xcf81f5a6 +.long 0x28de7aa5,0x28de7aa5 +.long 0x268eb7da,0x268eb7da +.long 0xa4bfad3f,0xa4bfad3f +.long 0xe49d3a2c,0xe49d3a2c +.long 0x0d927850,0x0d927850 +.long 0x9bcc5f6a,0x9bcc5f6a +.long 0x62467e54,0x62467e54 +.long 0xc2138df6,0xc2138df6 +.long 0xe8b8d890,0xe8b8d890 +.long 0x5ef7392e,0x5ef7392e +.long 0xf5afc382,0xf5afc382 +.long 0xbe805d9f,0xbe805d9f +.long 0x7c93d069,0x7c93d069 +.long 0xa92dd56f,0xa92dd56f +.long 0xb31225cf,0xb31225cf +.long 0x3b99acc8,0x3b99acc8 +.long 0xa77d1810,0xa77d1810 +.long 0x6e639ce8,0x6e639ce8 +.long 0x7bbb3bdb,0x7bbb3bdb +.long 0x097826cd,0x097826cd +.long 0xf418596e,0xf418596e +.long 0x01b79aec,0x01b79aec +.long 0xa89a4f83,0xa89a4f83 +.long 0x656e95e6,0x656e95e6 +.long 0x7ee6ffaa,0x7ee6ffaa +.long 0x08cfbc21,0x08cfbc21 +.long 0xe6e815ef,0xe6e815ef +.long 0xd99be7ba,0xd99be7ba +.long 0xce366f4a,0xce366f4a +.long 0xd4099fea,0xd4099fea +.long 0xd67cb029,0xd67cb029 +.long 0xafb2a431,0xafb2a431 +.long 0x31233f2a,0x31233f2a +.long 0x3094a5c6,0x3094a5c6 +.long 0xc066a235,0xc066a235 +.long 0x37bc4e74,0x37bc4e74 +.long 0xa6ca82fc,0xa6ca82fc +.long 0xb0d090e0,0xb0d090e0 +.long 0x15d8a733,0x15d8a733 +.long 0x4a9804f1,0x4a9804f1 +.long 0xf7daec41,0xf7daec41 +.long 0x0e50cd7f,0x0e50cd7f +.long 0x2ff69117,0x2ff69117 +.long 0x8dd64d76,0x8dd64d76 +.long 0x4db0ef43,0x4db0ef43 +.long 0x544daacc,0x544daacc +.long 0xdf0496e4,0xdf0496e4 +.long 0xe3b5d19e,0xe3b5d19e +.long 0x1b886a4c,0x1b886a4c +.long 0xb81f2cc1,0xb81f2cc1 +.long 0x7f516546,0x7f516546 +.long 0x04ea5e9d,0x04ea5e9d +.long 0x5d358c01,0x5d358c01 +.long 0x737487fa,0x737487fa +.long 0x2e410bfb,0x2e410bfb +.long 0x5a1d67b3,0x5a1d67b3 +.long 0x52d2db92,0x52d2db92 +.long 0x335610e9,0x335610e9 +.long 0x1347d66d,0x1347d66d +.long 0x8c61d79a,0x8c61d79a +.long 0x7a0ca137,0x7a0ca137 +.long 0x8e14f859,0x8e14f859 +.long 0x893c13eb,0x893c13eb +.long 0xee27a9ce,0xee27a9ce +.long 0x35c961b7,0x35c961b7 +.long 0xede51ce1,0xede51ce1 +.long 0x3cb1477a,0x3cb1477a +.long 0x59dfd29c,0x59dfd29c +.long 0x3f73f255,0x3f73f255 +.long 0x79ce1418,0x79ce1418 +.long 0xbf37c773,0xbf37c773 +.long 0xeacdf753,0xeacdf753 +.long 0x5baafd5f,0x5baafd5f +.long 0x146f3ddf,0x146f3ddf +.long 0x86db4478,0x86db4478 +.long 0x81f3afca,0x81f3afca +.long 0x3ec468b9,0x3ec468b9 +.long 0x2c342438,0x2c342438 +.long 0x5f40a3c2,0x5f40a3c2 +.long 0x72c31d16,0x72c31d16 +.long 0x0c25e2bc,0x0c25e2bc +.long 0x8b493c28,0x8b493c28 +.long 0x41950dff,0x41950dff +.long 0x7101a839,0x7101a839 +.long 0xdeb30c08,0xdeb30c08 +.long 0x9ce4b4d8,0x9ce4b4d8 +.long 0x90c15664,0x90c15664 +.long 0x6184cb7b,0x6184cb7b +.long 0x70b632d5,0x70b632d5 +.long 0x745c6c48,0x745c6c48 +.long 0x4257b8d0,0x4257b8d0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 + +.def block_se_handler; .scl 3; .type 32; .endef +.p2align 4 +block_se_handler: + pushq %rsi + pushq %rdi + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushfq + subq $64,%rsp + + movq 120(%r8),%rax + movq 248(%r8),%rbx + + movq 8(%r9),%rsi + movq 56(%r9),%r11 + + movl 0(%r11),%r10d + leaq (%rsi,%r10,1),%r10 + cmpq %r10,%rbx + jb .Lin_block_prologue + + movq 152(%r8),%rax + + movl 4(%r11),%r10d + leaq (%rsi,%r10,1),%r10 + cmpq %r10,%rbx + jae .Lin_block_prologue + + movq 24(%rax),%rax + leaq 48(%rax),%rax + + movq -8(%rax),%rbx + movq -16(%rax),%rbp + movq -24(%rax),%r12 + movq -32(%rax),%r13 + movq -40(%rax),%r14 + movq -48(%rax),%r15 + movq %rbx,144(%r8) + movq %rbp,160(%r8) + movq %r12,216(%r8) + movq %r13,224(%r8) + movq %r14,232(%r8) + movq %r15,240(%r8) + +.Lin_block_prologue: + movq 8(%rax),%rdi + movq 16(%rax),%rsi + movq %rax,152(%r8) + movq %rsi,168(%r8) + movq %rdi,176(%r8) + + jmp .Lcommon_seh_exit + + +.def key_se_handler; .scl 3; .type 32; .endef +.p2align 4 +key_se_handler: + pushq %rsi + pushq %rdi + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushfq + subq $64,%rsp + + movq 120(%r8),%rax + movq 248(%r8),%rbx + + movq 8(%r9),%rsi + movq 56(%r9),%r11 + + movl 0(%r11),%r10d + leaq (%rsi,%r10,1),%r10 + cmpq %r10,%rbx + jb .Lin_key_prologue + + movq 152(%r8),%rax + + movl 4(%r11),%r10d + leaq (%rsi,%r10,1),%r10 + cmpq %r10,%rbx + jae .Lin_key_prologue + + leaq 56(%rax),%rax + + movq -8(%rax),%rbx + movq -16(%rax),%rbp + movq -24(%rax),%r12 + movq -32(%rax),%r13 + movq -40(%rax),%r14 + movq -48(%rax),%r15 + movq %rbx,144(%r8) + movq %rbp,160(%r8) + movq %r12,216(%r8) + movq %r13,224(%r8) + movq %r14,232(%r8) + movq %r15,240(%r8) + +.Lin_key_prologue: + movq 8(%rax),%rdi + movq 16(%rax),%rsi + movq %rax,152(%r8) + movq %rsi,168(%r8) + movq %rdi,176(%r8) + + jmp .Lcommon_seh_exit + + +.def cbc_se_handler; .scl 3; .type 32; .endef +.p2align 4 +cbc_se_handler: + pushq %rsi + pushq %rdi + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushfq + subq $64,%rsp + + movq 120(%r8),%rax + movq 248(%r8),%rbx + + leaq .Lcbc_prologue(%rip),%r10 + cmpq %r10,%rbx + jb .Lin_cbc_prologue + + leaq .Lcbc_fast_body(%rip),%r10 + cmpq %r10,%rbx + jb .Lin_cbc_frame_setup + + leaq .Lcbc_slow_prologue(%rip),%r10 + cmpq %r10,%rbx + jb .Lin_cbc_body + + leaq .Lcbc_slow_body(%rip),%r10 + cmpq %r10,%rbx + jb .Lin_cbc_frame_setup + +.Lin_cbc_body: + movq 152(%r8),%rax + + leaq .Lcbc_epilogue(%rip),%r10 + cmpq %r10,%rbx + jae .Lin_cbc_prologue + + leaq 8(%rax),%rax + + leaq .Lcbc_popfq(%rip),%r10 + cmpq %r10,%rbx + jae .Lin_cbc_prologue + + movq 8(%rax),%rax + leaq 56(%rax),%rax + +.Lin_cbc_frame_setup: + movq -16(%rax),%rbx + movq -24(%rax),%rbp + movq -32(%rax),%r12 + movq -40(%rax),%r13 + movq -48(%rax),%r14 + movq -56(%rax),%r15 + movq %rbx,144(%r8) + movq %rbp,160(%r8) + movq %r12,216(%r8) + movq %r13,224(%r8) + movq %r14,232(%r8) + movq %r15,240(%r8) + +.Lin_cbc_prologue: + movq 8(%rax),%rdi + movq 16(%rax),%rsi + movq %rax,152(%r8) + movq %rsi,168(%r8) + movq %rdi,176(%r8) + +.Lcommon_seh_exit: + + movq 40(%r9),%rdi + movq %r8,%rsi + movl $154,%ecx +.long 0xa548f3fc + + movq %r9,%rsi + xorq %rcx,%rcx + movq 8(%rsi),%rdx + movq 0(%rsi),%r8 + movq 16(%rsi),%r9 + movq 40(%rsi),%r10 + leaq 56(%rsi),%r11 + leaq 24(%rsi),%r12 + movq %r10,32(%rsp) + movq %r11,40(%rsp) + movq %r12,48(%rsp) + movq %rcx,56(%rsp) + call *__imp_RtlVirtualUnwind(%rip) + + movl $1,%eax + addq $64,%rsp + popfq + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + popq %rdi + popq %rsi + retq + + +.section .pdata +.p2align 2 +.rva .LSEH_begin_AES_encrypt +.rva .LSEH_end_AES_encrypt +.rva .LSEH_info_AES_encrypt + +.rva .LSEH_begin_AES_decrypt +.rva .LSEH_end_AES_decrypt +.rva .LSEH_info_AES_decrypt + +.rva .LSEH_begin_AES_set_encrypt_key +.rva .LSEH_end_AES_set_encrypt_key +.rva .LSEH_info_AES_set_encrypt_key + +.rva .LSEH_begin_AES_set_decrypt_key +.rva .LSEH_end_AES_set_decrypt_key +.rva .LSEH_info_AES_set_decrypt_key + +.rva .LSEH_begin_AES_cbc_encrypt +.rva .LSEH_end_AES_cbc_encrypt +.rva .LSEH_info_AES_cbc_encrypt + +.section .xdata +.p2align 3 +.LSEH_info_AES_encrypt: +.byte 9,0,0,0 +.rva block_se_handler +.rva .Lenc_prologue,.Lenc_epilogue +.LSEH_info_AES_decrypt: +.byte 9,0,0,0 +.rva block_se_handler +.rva .Ldec_prologue,.Ldec_epilogue +.LSEH_info_AES_set_encrypt_key: +.byte 9,0,0,0 +.rva key_se_handler +.rva .Lenc_key_prologue,.Lenc_key_epilogue +.LSEH_info_AES_set_decrypt_key: +.byte 9,0,0,0 +.rva key_se_handler +.rva .Ldec_key_prologue,.Ldec_key_epilogue +.LSEH_info_AES_cbc_encrypt: +.byte 9,0,0,0 +.rva cbc_se_handler diff --git a/ext/libressl/crypto/aes/aes_cbc.c b/ext/libressl/crypto/aes/aes_cbc.c new file mode 100644 index 0000000..5e76f6e --- /dev/null +++ b/ext/libressl/crypto/aes/aes_cbc.c @@ -0,0 +1,65 @@ +/* $OpenBSD: aes_cbc.c,v 1.12 2014/06/12 15:49:27 deraadt Exp $ */ +/* ==================================================================== + * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + */ + +#include +#include + +void +AES_cbc_encrypt(const unsigned char *in, unsigned char *out, + size_t len, const AES_KEY *key, unsigned char *ivec, const int enc) +{ + if (enc) + CRYPTO_cbc128_encrypt(in, out, len, key, ivec, + (block128_f)AES_encrypt); + else + CRYPTO_cbc128_decrypt(in, out, len, key, ivec, + (block128_f)AES_decrypt); +} diff --git a/ext/libressl/crypto/aes/aes_cfb.c b/ext/libressl/crypto/aes/aes_cfb.c new file mode 100644 index 0000000..a6384f9 --- /dev/null +++ b/ext/libressl/crypto/aes/aes_cfb.c @@ -0,0 +1,84 @@ +/* $OpenBSD: aes_cfb.c,v 1.8 2014/06/12 15:49:27 deraadt Exp $ */ +/* ==================================================================== + * Copyright (c) 2002-2006 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + */ + +#include +#include + +/* The input and output encrypted as though 128bit cfb mode is being + * used. The extra state information to record how much of the + * 128bit block we have used is contained in *num; + */ + +void +AES_cfb128_encrypt(const unsigned char *in, unsigned char *out, size_t length, + const AES_KEY *key, unsigned char *ivec, int *num, const int enc) +{ + CRYPTO_cfb128_encrypt(in, out, length, key, ivec, num, enc, + (block128_f)AES_encrypt); +} + +/* N.B. This expects the input to be packed, MS bit first */ +void +AES_cfb1_encrypt(const unsigned char *in, unsigned char *out, size_t length, + const AES_KEY *key, unsigned char *ivec, int *num, const int enc) +{ + CRYPTO_cfb128_1_encrypt(in, out, length, key, ivec, num, enc, + (block128_f)AES_encrypt); +} + +void +AES_cfb8_encrypt(const unsigned char *in, unsigned char *out, size_t length, + const AES_KEY *key, unsigned char *ivec, int *num, const int enc) +{ + CRYPTO_cfb128_8_encrypt(in, out, length, key, ivec, num, enc, + (block128_f)AES_encrypt); +} + diff --git a/ext/libressl/crypto/aes/aes_core.c b/ext/libressl/crypto/aes/aes_core.c new file mode 100644 index 0000000..1b8a24c --- /dev/null +++ b/ext/libressl/crypto/aes/aes_core.c @@ -0,0 +1,1374 @@ +/* $OpenBSD: aes_core.c,v 1.13 2015/11/05 21:59:13 miod Exp $ */ +/** + * rijndael-alg-fst.c + * + * @version 3.0 (December 2000) + * + * Optimised ANSI C code for the Rijndael cipher (now AES) + * + * @author Vincent Rijmen + * @author Antoon Bosselaers + * @author Paulo Barreto + * + * This code is hereby placed in the public domain. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Note: rewritten a little bit to provide error control and an OpenSSL- + compatible API */ + +#ifndef AES_DEBUG +# ifndef NDEBUG +# define NDEBUG +# endif +#endif + +#include +#include +#include "aes_locl.h" + +#ifndef AES_ASM +/* +Te0[x] = S [x].[02, 01, 01, 03]; +Te1[x] = S [x].[03, 02, 01, 01]; +Te2[x] = S [x].[01, 03, 02, 01]; +Te3[x] = S [x].[01, 01, 03, 02]; + +Td0[x] = Si[x].[0e, 09, 0d, 0b]; +Td1[x] = Si[x].[0b, 0e, 09, 0d]; +Td2[x] = Si[x].[0d, 0b, 0e, 09]; +Td3[x] = Si[x].[09, 0d, 0b, 0e]; +Td4[x] = Si[x].[01]; +*/ + +static const u32 Te0[256] = { + 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU, + 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U, + 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU, + 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU, + 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U, + 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU, + 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU, + 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU, + 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU, + 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU, + 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U, + 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU, + 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU, + 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U, + 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU, + 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU, + 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU, + 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU, + 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU, + 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U, + 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU, + 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU, + 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU, + 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU, + 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U, + 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U, + 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U, + 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U, + 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU, + 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U, + 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U, + 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU, + 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU, + 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U, + 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U, + 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U, + 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU, + 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U, + 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU, + 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U, + 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU, + 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U, + 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U, + 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU, + 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U, + 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U, + 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U, + 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U, + 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U, + 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U, + 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U, + 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U, + 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU, + 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U, + 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U, + 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U, + 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U, + 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U, + 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U, + 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU, + 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U, + 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U, + 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U, + 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU, +}; +static const u32 Te1[256] = { + 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU, + 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U, + 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU, + 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U, + 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU, + 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U, + 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU, + 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U, + 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U, + 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU, + 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U, + 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U, + 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U, + 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU, + 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U, + 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U, + 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU, + 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U, + 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U, + 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U, + 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU, + 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU, + 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U, + 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU, + 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU, + 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U, + 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU, + 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U, + 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU, + 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U, + 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U, + 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U, + 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU, + 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U, + 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU, + 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U, + 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU, + 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U, + 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U, + 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU, + 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU, + 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU, + 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U, + 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U, + 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU, + 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U, + 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU, + 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U, + 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU, + 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U, + 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU, + 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU, + 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U, + 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU, + 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U, + 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU, + 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U, + 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U, + 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U, + 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU, + 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU, + 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U, + 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU, + 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U, +}; +static const u32 Te2[256] = { + 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU, + 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U, + 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU, + 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U, + 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU, + 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U, + 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU, + 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U, + 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U, + 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU, + 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U, + 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U, + 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U, + 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU, + 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U, + 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U, + 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU, + 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U, + 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U, + 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U, + 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU, + 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU, + 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U, + 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU, + 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU, + 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U, + 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU, + 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U, + 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU, + 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U, + 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U, + 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U, + 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU, + 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U, + 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU, + 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U, + 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU, + 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U, + 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U, + 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU, + 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU, + 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU, + 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U, + 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U, + 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU, + 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U, + 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU, + 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U, + 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU, + 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U, + 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU, + 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU, + 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U, + 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU, + 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U, + 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU, + 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U, + 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U, + 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U, + 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU, + 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU, + 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U, + 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU, + 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U, +}; +static const u32 Te3[256] = { + 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U, + 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U, + 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U, + 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU, + 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU, + 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU, + 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U, + 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU, + 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU, + 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U, + 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U, + 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU, + 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU, + 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU, + 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU, + 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU, + 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U, + 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU, + 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU, + 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U, + 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U, + 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U, + 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U, + 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U, + 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU, + 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U, + 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU, + 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU, + 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U, + 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U, + 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U, + 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU, + 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U, + 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU, + 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU, + 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U, + 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U, + 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU, + 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U, + 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU, + 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U, + 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U, + 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U, + 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U, + 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU, + 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U, + 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU, + 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U, + 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU, + 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U, + 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU, + 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU, + 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU, + 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU, + 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U, + 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U, + 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U, + 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U, + 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U, + 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U, + 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU, + 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U, + 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU, + 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU, +}; + +static const u32 Td0[256] = { + 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U, + 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U, + 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U, + 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU, + 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U, + 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U, + 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU, + 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U, + 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU, + 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U, + 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U, + 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U, + 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U, + 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU, + 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U, + 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU, + 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U, + 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU, + 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U, + 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U, + 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U, + 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU, + 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U, + 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU, + 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U, + 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU, + 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U, + 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU, + 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU, + 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U, + 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU, + 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U, + 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU, + 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U, + 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U, + 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U, + 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU, + 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U, + 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U, + 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU, + 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U, + 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U, + 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U, + 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U, + 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U, + 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU, + 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U, + 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U, + 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U, + 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U, + 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U, + 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU, + 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU, + 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU, + 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU, + 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U, + 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U, + 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU, + 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU, + 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U, + 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU, + 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U, + 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U, + 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U, +}; +static const u32 Td1[256] = { + 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU, + 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U, + 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU, + 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U, + 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U, + 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U, + 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U, + 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U, + 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U, + 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU, + 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU, + 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU, + 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U, + 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU, + 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U, + 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U, + 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U, + 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU, + 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU, + 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U, + 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU, + 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U, + 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU, + 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU, + 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U, + 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U, + 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U, + 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU, + 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U, + 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU, + 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U, + 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U, + 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U, + 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU, + 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U, + 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U, + 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U, + 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U, + 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U, + 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U, + 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU, + 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU, + 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U, + 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU, + 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U, + 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU, + 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU, + 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U, + 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU, + 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U, + 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U, + 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U, + 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U, + 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U, + 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U, + 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U, + 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU, + 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U, + 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U, + 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU, + 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U, + 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U, + 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U, + 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U, +}; +static const u32 Td2[256] = { + 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U, + 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U, + 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U, + 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U, + 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU, + 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U, + 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U, + 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U, + 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U, + 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU, + 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U, + 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U, + 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU, + 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U, + 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U, + 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U, + 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U, + 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U, + 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U, + 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU, + 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U, + 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U, + 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U, + 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U, + 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U, + 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU, + 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU, + 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U, + 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU, + 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U, + 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU, + 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU, + 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU, + 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU, + 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U, + 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U, + 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U, + 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U, + 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U, + 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U, + 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U, + 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU, + 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU, + 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U, + 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U, + 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU, + 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU, + 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U, + 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U, + 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U, + 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U, + 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U, + 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U, + 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U, + 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU, + 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U, + 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U, + 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U, + 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U, + 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U, + 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U, + 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU, + 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U, + 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U, +}; +static const u32 Td3[256] = { + 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU, + 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU, + 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U, + 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U, + 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU, + 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU, + 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U, + 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU, + 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U, + 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU, + 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U, + 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U, + 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U, + 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U, + 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U, + 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU, + 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU, + 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U, + 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U, + 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU, + 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU, + 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U, + 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U, + 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U, + 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U, + 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU, + 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U, + 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U, + 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU, + 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU, + 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U, + 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U, + 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U, + 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU, + 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U, + 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U, + 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U, + 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U, + 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U, + 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U, + 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U, + 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU, + 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U, + 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U, + 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU, + 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU, + 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U, + 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU, + 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U, + 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U, + 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U, + 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U, + 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U, + 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U, + 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU, + 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU, + 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU, + 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU, + 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U, + 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U, + 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U, + 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU, + 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U, + 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U, +}; +static const u8 Td4[256] = { + 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U, + 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU, + 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U, + 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU, + 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU, + 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU, + 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U, + 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U, + 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U, + 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U, + 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU, + 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U, + 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU, + 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U, + 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U, + 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU, + 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU, + 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U, + 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U, + 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU, + 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U, + 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU, + 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U, + 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U, + 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U, + 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU, + 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU, + 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU, + 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U, + 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U, + 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U, + 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU, +}; +static const u32 rcon[] = { + 0x01000000, 0x02000000, 0x04000000, 0x08000000, + 0x10000000, 0x20000000, 0x40000000, 0x80000000, + 0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */ +}; + +/** + * Expand the cipher key into the encryption key schedule. + */ +int +AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key) +{ + u32 *rk; + int i = 0; + u32 temp; + + if (!userKey || !key) + return -1; + if (bits != 128 && bits != 192 && bits != 256) + return -2; + + rk = key->rd_key; + + if (bits == 128) + key->rounds = 10; + else if (bits == 192) + key->rounds = 12; + else + key->rounds = 14; + + rk[0] = GETU32(userKey); + rk[1] = GETU32(userKey + 4); + rk[2] = GETU32(userKey + 8); + rk[3] = GETU32(userKey + 12); + if (bits == 128) { + while (1) { + temp = rk[3]; + rk[4] = rk[0] ^ + (Te2[(temp >> 16) & 0xff] & 0xff000000) ^ + (Te3[(temp >> 8) & 0xff] & 0x00ff0000) ^ + (Te0[(temp) & 0xff] & 0x0000ff00) ^ + (Te1[(temp >> 24)] & 0x000000ff) ^ + rcon[i]; + rk[5] = rk[1] ^ rk[4]; + rk[6] = rk[2] ^ rk[5]; + rk[7] = rk[3] ^ rk[6]; + if (++i == 10) { + return 0; + } + rk += 4; + } + } + rk[4] = GETU32(userKey + 16); + rk[5] = GETU32(userKey + 20); + if (bits == 192) { + while (1) { + temp = rk[5]; + rk[6] = rk[ 0] ^ + (Te2[(temp >> 16) & 0xff] & 0xff000000) ^ + (Te3[(temp >> 8) & 0xff] & 0x00ff0000) ^ + (Te0[(temp) & 0xff] & 0x0000ff00) ^ + (Te1[(temp >> 24)] & 0x000000ff) ^ + rcon[i]; + rk[7] = rk[1] ^ rk[6]; + rk[8] = rk[2] ^ rk[7]; + rk[9] = rk[3] ^ rk[8]; + if (++i == 8) { + return 0; + } + rk[10] = rk[4] ^ rk[9]; + rk[11] = rk[5] ^ rk[10]; + rk += 6; + } + } + rk[6] = GETU32(userKey + 24); + rk[7] = GETU32(userKey + 28); + if (bits == 256) { + while (1) { + temp = rk[7]; + rk[8] = rk[0] ^ + (Te2[(temp >> 16) & 0xff] & 0xff000000) ^ + (Te3[(temp >> 8) & 0xff] & 0x00ff0000) ^ + (Te0[(temp) & 0xff] & 0x0000ff00) ^ + (Te1[(temp >> 24)] & 0x000000ff) ^ + rcon[i]; + rk[9] = rk[1] ^ rk[8]; + rk[10] = rk[2] ^ rk[9]; + rk[11] = rk[3] ^ rk[10]; + if (++i == 7) { + return 0; + } + temp = rk[11]; + rk[12] = rk[4] ^ + (Te2[(temp >> 24)] & 0xff000000) ^ + (Te3[(temp >> 16) & 0xff] & 0x00ff0000) ^ + (Te0[(temp >> 8) & 0xff] & 0x0000ff00) ^ + (Te1[(temp) & 0xff] & 0x000000ff); + rk[13] = rk[5] ^ rk[12]; + rk[14] = rk[6] ^ rk[13]; + rk[15] = rk[7] ^ rk[14]; + + rk += 8; + } + } + return 0; +} + +/** + * Expand the cipher key into the decryption key schedule. + */ +int +AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key) +{ + u32 *rk; + int i, j, status; + u32 temp; + + /* first, start with an encryption schedule */ + status = AES_set_encrypt_key(userKey, bits, key); + if (status < 0) + return status; + + rk = key->rd_key; + + /* invert the order of the round keys: */ + for (i = 0, j = 4 * (key->rounds); i < j; i += 4, j -= 4) { + temp = rk[i]; + rk[i] = rk[j]; + rk[j] = temp; + temp = rk[i + 1]; + rk[i + 1] = rk[j + 1]; + rk[j + 1] = temp; + temp = rk[i + 2]; + rk[i + 2] = rk[j + 2]; + rk[j + 2] = temp; + temp = rk[i + 3]; + rk[i + 3] = rk[j + 3]; + rk[j + 3] = temp; + } + /* apply the inverse MixColumn transform to all round keys but the first and the last: */ + for (i = 1; i < (key->rounds); i++) { + rk += 4; + rk[0] = + Td0[Te1[(rk[0] >> 24)] & 0xff] ^ + Td1[Te1[(rk[0] >> 16) & 0xff] & 0xff] ^ + Td2[Te1[(rk[0] >> 8) & 0xff] & 0xff] ^ + Td3[Te1[(rk[0]) & 0xff] & 0xff]; + rk[1] = + Td0[Te1[(rk[1] >> 24)] & 0xff] ^ + Td1[Te1[(rk[1] >> 16) & 0xff] & 0xff] ^ + Td2[Te1[(rk[1] >> 8) & 0xff] & 0xff] ^ + Td3[Te1[(rk[1]) & 0xff] & 0xff]; + rk[2] = + Td0[Te1[(rk[2] >> 24)] & 0xff] ^ + Td1[Te1[(rk[2] >> 16) & 0xff] & 0xff] ^ + Td2[Te1[(rk[2] >> 8) & 0xff] & 0xff] ^ + Td3[Te1[(rk[2]) & 0xff] & 0xff]; + rk[3] = + Td0[Te1[(rk[3] >> 24)] & 0xff] ^ + Td1[Te1[(rk[3] >> 16) & 0xff] & 0xff] ^ + Td2[Te1[(rk[3] >> 8) & 0xff] & 0xff] ^ + Td3[Te1[(rk[3]) & 0xff] & 0xff]; + } + return 0; +} + +/* + * Encrypt a single block + * in and out can overlap + */ +void +AES_encrypt(const unsigned char *in, unsigned char *out, const AES_KEY *key) +{ + const u32 *rk; + u32 s0, s1, s2, s3, t0, t1, t2, t3; +#ifndef FULL_UNROLL + int r; +#endif /* ?FULL_UNROLL */ + + rk = key->rd_key; + + /* + * map byte array block to cipher state + * and add initial round key: + */ + s0 = GETU32(in ) ^ rk[0]; + s1 = GETU32(in + 4) ^ rk[1]; + s2 = GETU32(in + 8) ^ rk[2]; + s3 = GETU32(in + 12) ^ rk[3]; +#ifdef FULL_UNROLL + /* round 1: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7]; + /* round 2: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11]; + /* round 3: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15]; + /* round 4: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19]; + /* round 5: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23]; + /* round 6: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27]; + /* round 7: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31]; + /* round 8: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35]; + /* round 9: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39]; + if (key->rounds > 10) { + /* round 10: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43]; + /* round 11: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47]; + if (key->rounds > 12) { + /* round 12: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51]; + /* round 13: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55]; + } + } + rk += key->rounds << 2; +#else /* !FULL_UNROLL */ + /* + * Nr - 1 full rounds: + */ + r = key->rounds >> 1; + for (;;) { + t0 = + Te0[(s0 >> 24)] ^ + Te1[(s1 >> 16) & 0xff] ^ + Te2[(s2 >> 8) & 0xff] ^ + Te3[(s3) & 0xff] ^ + rk[4]; + t1 = + Te0[(s1 >> 24)] ^ + Te1[(s2 >> 16) & 0xff] ^ + Te2[(s3 >> 8) & 0xff] ^ + Te3[(s0) & 0xff] ^ + rk[5]; + t2 = + Te0[(s2 >> 24)] ^ + Te1[(s3 >> 16) & 0xff] ^ + Te2[(s0 >> 8) & 0xff] ^ + Te3[(s1) & 0xff] ^ + rk[6]; + t3 = + Te0[(s3 >> 24)] ^ + Te1[(s0 >> 16) & 0xff] ^ + Te2[(s1 >> 8) & 0xff] ^ + Te3[(s2) & 0xff] ^ + rk[7]; + + rk += 8; + if (--r == 0) { + break; + } + + s0 = + Te0[(t0 >> 24)] ^ + Te1[(t1 >> 16) & 0xff] ^ + Te2[(t2 >> 8) & 0xff] ^ + Te3[(t3) & 0xff] ^ + rk[0]; + s1 = + Te0[(t1 >> 24)] ^ + Te1[(t2 >> 16) & 0xff] ^ + Te2[(t3 >> 8) & 0xff] ^ + Te3[(t0) & 0xff] ^ + rk[1]; + s2 = + Te0[(t2 >> 24)] ^ + Te1[(t3 >> 16) & 0xff] ^ + Te2[(t0 >> 8) & 0xff] ^ + Te3[(t1) & 0xff] ^ + rk[2]; + s3 = + Te0[(t3 >> 24)] ^ + Te1[(t0 >> 16) & 0xff] ^ + Te2[(t1 >> 8) & 0xff] ^ + Te3[(t2) & 0xff] ^ + rk[3]; + } +#endif /* ?FULL_UNROLL */ + /* + * apply last round and + * map cipher state to byte array block: + */ + s0 = + (Te2[(t0 >> 24)] & 0xff000000) ^ + (Te3[(t1 >> 16) & 0xff] & 0x00ff0000) ^ + (Te0[(t2 >> 8) & 0xff] & 0x0000ff00) ^ + (Te1[(t3) & 0xff] & 0x000000ff) ^ + rk[0]; + PUTU32(out, s0); + s1 = + (Te2[(t1 >> 24)] & 0xff000000) ^ + (Te3[(t2 >> 16) & 0xff] & 0x00ff0000) ^ + (Te0[(t3 >> 8) & 0xff] & 0x0000ff00) ^ + (Te1[(t0) & 0xff] & 0x000000ff) ^ + rk[1]; + PUTU32(out + 4, s1); + s2 = + (Te2[(t2 >> 24)] & 0xff000000) ^ + (Te3[(t3 >> 16) & 0xff] & 0x00ff0000) ^ + (Te0[(t0 >> 8) & 0xff] & 0x0000ff00) ^ + (Te1[(t1) & 0xff] & 0x000000ff) ^ + rk[2]; + PUTU32(out + 8, s2); + s3 = + (Te2[(t3 >> 24)] & 0xff000000) ^ + (Te3[(t0 >> 16) & 0xff] & 0x00ff0000) ^ + (Te0[(t1 >> 8) & 0xff] & 0x0000ff00) ^ + (Te1[(t2) & 0xff] & 0x000000ff) ^ + rk[3]; + PUTU32(out + 12, s3); +} + +/* + * Decrypt a single block + * in and out can overlap + */ +void +AES_decrypt(const unsigned char *in, unsigned char *out, const AES_KEY *key) +{ + const u32 *rk; + u32 s0, s1, s2, s3, t0, t1, t2, t3; +#ifndef FULL_UNROLL + int r; +#endif /* ?FULL_UNROLL */ + + rk = key->rd_key; + + /* + * map byte array block to cipher state + * and add initial round key: + */ + s0 = GETU32(in) ^ rk[0]; + s1 = GETU32(in + 4) ^ rk[1]; + s2 = GETU32(in + 8) ^ rk[2]; + s3 = GETU32(in + 12) ^ rk[3]; +#ifdef FULL_UNROLL + /* round 1: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7]; + /* round 2: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11]; + /* round 3: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15]; + /* round 4: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19]; + /* round 5: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23]; + /* round 6: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27]; + /* round 7: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31]; + /* round 8: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35]; + /* round 9: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39]; + if (key->rounds > 10) { + /* round 10: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43]; + /* round 11: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47]; + if (key->rounds > 12) { + /* round 12: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51]; + /* round 13: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55]; + } + } + rk += key->rounds << 2; +#else /* !FULL_UNROLL */ + /* + * Nr - 1 full rounds: + */ + r = key->rounds >> 1; + for (;;) { + t0 = + Td0[(s0 >> 24)] ^ + Td1[(s3 >> 16) & 0xff] ^ + Td2[(s2 >> 8) & 0xff] ^ + Td3[(s1) & 0xff] ^ + rk[4]; + t1 = + Td0[(s1 >> 24)] ^ + Td1[(s0 >> 16) & 0xff] ^ + Td2[(s3 >> 8) & 0xff] ^ + Td3[(s2) & 0xff] ^ + rk[5]; + t2 = + Td0[(s2 >> 24)] ^ + Td1[(s1 >> 16) & 0xff] ^ + Td2[(s0 >> 8) & 0xff] ^ + Td3[(s3) & 0xff] ^ + rk[6]; + t3 = + Td0[(s3 >> 24)] ^ + Td1[(s2 >> 16) & 0xff] ^ + Td2[(s1 >> 8) & 0xff] ^ + Td3[(s0) & 0xff] ^ + rk[7]; + + rk += 8; + if (--r == 0) { + break; + } + + s0 = + Td0[(t0 >> 24)] ^ + Td1[(t3 >> 16) & 0xff] ^ + Td2[(t2 >> 8) & 0xff] ^ + Td3[(t1) & 0xff] ^ + rk[0]; + s1 = + Td0[(t1 >> 24)] ^ + Td1[(t0 >> 16) & 0xff] ^ + Td2[(t3 >> 8) & 0xff] ^ + Td3[(t2) & 0xff] ^ + rk[1]; + s2 = + Td0[(t2 >> 24)] ^ + Td1[(t1 >> 16) & 0xff] ^ + Td2[(t0 >> 8) & 0xff] ^ + Td3[(t3) & 0xff] ^ + rk[2]; + s3 = + Td0[(t3 >> 24)] ^ + Td1[(t2 >> 16) & 0xff] ^ + Td2[(t1 >> 8) & 0xff] ^ + Td3[(t0) & 0xff] ^ + rk[3]; + } +#endif /* ?FULL_UNROLL */ + /* + * apply last round and + * map cipher state to byte array block: + */ + s0 = + (((uint32_t)Td4[(t0 >> 24)]) << 24) ^ + (Td4[(t3 >> 16) & 0xff] << 16) ^ + (Td4[(t2 >> 8) & 0xff] << 8) ^ + (Td4[(t1) & 0xff]) ^ + rk[0]; + PUTU32(out, s0); + s1 = + (((uint32_t)Td4[(t1 >> 24)]) << 24) ^ + (Td4[(t0 >> 16) & 0xff] << 16) ^ + (Td4[(t3 >> 8) & 0xff] << 8) ^ + (Td4[(t2) & 0xff]) ^ + rk[1]; + PUTU32(out + 4, s1); + s2 = + (((uint32_t)Td4[(t2 >> 24)]) << 24) ^ + (Td4[(t1 >> 16) & 0xff] << 16) ^ + (Td4[(t0 >> 8) & 0xff] << 8) ^ + (Td4[(t3) & 0xff]) ^ + rk[2]; + PUTU32(out + 8, s2); + s3 = + (((uint32_t)Td4[(t3 >> 24)]) << 24) ^ + (Td4[(t2 >> 16) & 0xff] << 16) ^ + (Td4[(t1 >> 8) & 0xff] << 8) ^ + (Td4[(t0) & 0xff]) ^ + rk[3]; + PUTU32(out + 12, s3); +} + +#else /* AES_ASM */ + +static const u8 Te4[256] = { + 0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U, + 0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U, + 0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U, + 0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U, + 0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU, + 0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U, + 0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU, + 0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U, + 0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U, + 0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U, + 0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU, + 0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU, + 0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U, + 0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U, + 0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U, + 0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U, + 0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U, + 0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U, + 0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U, + 0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU, + 0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU, + 0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U, + 0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U, + 0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U, + 0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U, + 0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU, + 0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU, + 0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU, + 0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U, + 0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU, + 0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U, + 0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U +}; +static const u32 rcon[] = { + 0x01000000, 0x02000000, 0x04000000, 0x08000000, + 0x10000000, 0x20000000, 0x40000000, 0x80000000, + 0x1B000000, 0x36000000, + /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */ +}; + +/** + * Expand the cipher key into the encryption key schedule. + */ +int +AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key) +{ + u32 *rk; + int i = 0; + u32 temp; + + if (!userKey || !key) + return -1; + if (bits != 128 && bits != 192 && bits != 256) + return -2; + + rk = key->rd_key; + + if (bits == 128) + key->rounds = 10; + else if (bits == 192) + key->rounds = 12; + else + key->rounds = 14; + + rk[0] = GETU32(userKey); + rk[1] = GETU32(userKey + 4); + rk[2] = GETU32(userKey + 8); + rk[3] = GETU32(userKey + 12); + if (bits == 128) { + while (1) { + temp = rk[3]; + rk[4] = rk[0] ^ + (Te4[(temp >> 16) & 0xff] << 24) ^ + (Te4[(temp >> 8) & 0xff] << 16) ^ + (Te4[(temp) & 0xff] << 8) ^ + (Te4[(temp >> 24)]) ^ + rcon[i]; + rk[5] = rk[1] ^ rk[4]; + rk[6] = rk[2] ^ rk[5]; + rk[7] = rk[3] ^ rk[6]; + if (++i == 10) { + return 0; + } + rk += 4; + } + } + rk[4] = GETU32(userKey + 16); + rk[5] = GETU32(userKey + 20); + if (bits == 192) { + while (1) { + temp = rk[5]; + rk[6] = rk[0] ^ + (Te4[(temp >> 16) & 0xff] << 24) ^ + (Te4[(temp >> 8) & 0xff] << 16) ^ + (Te4[(temp) & 0xff] << 8) ^ + (Te4[(temp >> 24)]) ^ + rcon[i]; + rk[7] = rk[1] ^ rk[6]; + rk[8] = rk[2] ^ rk[7]; + rk[9] = rk[3] ^ rk[8]; + if (++i == 8) { + return 0; + } + rk[10] = rk[4] ^ rk[9]; + rk[11] = rk[5] ^ rk[10]; + rk += 6; + } + } + rk[6] = GETU32(userKey + 24); + rk[7] = GETU32(userKey + 28); + if (bits == 256) { + while (1) { + temp = rk[7]; + rk[8] = rk[0] ^ + (Te4[(temp >> 16) & 0xff] << 24) ^ + (Te4[(temp >> 8) & 0xff] << 16) ^ + (Te4[(temp) & 0xff] << 8) ^ + (Te4[(temp >> 24)]) ^ + rcon[i]; + rk[9] = rk[1] ^ rk[8]; + rk[10] = rk[2] ^ rk[9]; + rk[11] = rk[3] ^ rk[10]; + if (++i == 7) { + return 0; + } + temp = rk[11]; + rk[12] = rk[4] ^ + (Te4[(temp >> 24)] << 24) ^ + (Te4[(temp >> 16) & 0xff] << 16) ^ + (Te4[(temp >> 8) & 0xff] << 8) ^ + (Te4[(temp) & 0xff]); + rk[13] = rk[5] ^ rk[12]; + rk[14] = rk[6] ^ rk[13]; + rk[15] = rk[7] ^ rk[14]; + + rk += 8; + } + } + return 0; +} + +/** + * Expand the cipher key into the decryption key schedule. + */ +int +AES_set_decrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key) +{ + u32 *rk; + int i, j, status; + u32 temp; + + /* first, start with an encryption schedule */ + status = AES_set_encrypt_key(userKey, bits, key); + if (status < 0) + return status; + + rk = key->rd_key; + + /* invert the order of the round keys: */ + for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) { + temp = rk[i]; + rk[i] = rk[j]; + rk[j] = temp; + temp = rk[i + 1]; + rk[i + 1] = rk[j + 1]; + rk[j + 1] = temp; + temp = rk[i + 2]; + rk[i + 2] = rk[j + 2]; + rk[j + 2] = temp; + temp = rk[i + 3]; + rk[i + 3] = rk[j + 3]; + rk[j + 3] = temp; + } + /* apply the inverse MixColumn transform to all round keys but the first and the last: */ + for (i = 1; i < (key->rounds); i++) { + rk += 4; + for (j = 0; j < 4; j++) { + u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m; + + tp1 = rk[j]; + m = tp1 & 0x80808080; + tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^ + ((m - (m >> 7)) & 0x1b1b1b1b); + m = tp2 & 0x80808080; + tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^ + ((m - (m >> 7)) & 0x1b1b1b1b); + m = tp4 & 0x80808080; + tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^ + ((m - (m >> 7)) & 0x1b1b1b1b); + tp9 = tp8 ^ tp1; + tpb = tp9 ^ tp2; + tpd = tp9 ^ tp4; + tpe = tp8 ^ tp4 ^ tp2; +#if defined(ROTATE) + rk[j] = tpe ^ ROTATE(tpd, 16) ^ + ROTATE(tp9, 24) ^ ROTATE(tpb, 8); +#else + rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^ + (tp9 >> 8) ^ (tp9 << 24) ^ + (tpb >> 24) ^ (tpb << 8); +#endif + } + } + return 0; +} + +#endif /* AES_ASM */ diff --git a/ext/libressl/crypto/aes/aes_ctr.c b/ext/libressl/crypto/aes/aes_ctr.c new file mode 100644 index 0000000..6079145 --- /dev/null +++ b/ext/libressl/crypto/aes/aes_ctr.c @@ -0,0 +1,62 @@ +/* $OpenBSD: aes_ctr.c,v 1.9 2014/06/12 15:49:27 deraadt Exp $ */ +/* ==================================================================== + * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + */ + +#include +#include + +void +AES_ctr128_encrypt(const unsigned char *in, unsigned char *out, + size_t length, const AES_KEY *key, unsigned char ivec[AES_BLOCK_SIZE], + unsigned char ecount_buf[AES_BLOCK_SIZE], unsigned int *num) +{ + CRYPTO_ctr128_encrypt(in, out, length, key, ivec, ecount_buf, num, + (block128_f)AES_encrypt); +} diff --git a/ext/libressl/crypto/aes/aes_ecb.c b/ext/libressl/crypto/aes/aes_ecb.c new file mode 100644 index 0000000..b05e539 --- /dev/null +++ b/ext/libressl/crypto/aes/aes_ecb.c @@ -0,0 +1,69 @@ +/* $OpenBSD: aes_ecb.c,v 1.6 2015/02/10 09:46:30 miod Exp $ */ +/* ==================================================================== + * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + */ + +#ifndef AES_DEBUG +# ifndef NDEBUG +# define NDEBUG +# endif +#endif + +#include +#include "aes_locl.h" + +void +AES_ecb_encrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key, const int enc) +{ + if (AES_ENCRYPT == enc) + AES_encrypt(in, out, key); + else + AES_decrypt(in, out, key); +} diff --git a/ext/libressl/crypto/aes/aes_ige.c b/ext/libressl/crypto/aes/aes_ige.c new file mode 100644 index 0000000..85b7f69 --- /dev/null +++ b/ext/libressl/crypto/aes/aes_ige.c @@ -0,0 +1,194 @@ +/* $OpenBSD: aes_ige.c,v 1.7 2015/02/10 09:46:30 miod Exp $ */ +/* ==================================================================== + * Copyright (c) 2006 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + */ + +#include +#include + +#include "aes_locl.h" + +#define N_WORDS (AES_BLOCK_SIZE / sizeof(unsigned long)) +typedef struct { + unsigned long data[N_WORDS]; +} aes_block_t; + +/* XXX: probably some better way to do this */ +#if defined(__i386__) || defined(__x86_64__) +#define UNALIGNED_MEMOPS_ARE_FAST 1 +#else +#define UNALIGNED_MEMOPS_ARE_FAST 0 +#endif + +#if UNALIGNED_MEMOPS_ARE_FAST +#define load_block(d, s) (d) = *(const aes_block_t *)(s) +#define store_block(d, s) *(aes_block_t *)(d) = (s) +#else +#define load_block(d, s) memcpy((d).data, (s), AES_BLOCK_SIZE) +#define store_block(d, s) memcpy((d), (s).data, AES_BLOCK_SIZE) +#endif + +/* N.B. The IV for this mode is _twice_ the block size */ + +void +AES_ige_encrypt(const unsigned char *in, unsigned char *out, size_t length, + const AES_KEY *key, unsigned char *ivec, const int enc) +{ + size_t n; + size_t len; + + OPENSSL_assert((length % AES_BLOCK_SIZE) == 0); + + len = length / AES_BLOCK_SIZE; + + if (AES_ENCRYPT == enc) { + if (in != out && (UNALIGNED_MEMOPS_ARE_FAST || + ((size_t)in|(size_t)out|(size_t)ivec) % + sizeof(long) == 0)) { + aes_block_t *ivp = (aes_block_t *)ivec; + aes_block_t *iv2p = (aes_block_t *)(ivec + AES_BLOCK_SIZE); + + while (len) { + aes_block_t *inp = (aes_block_t *)in; + aes_block_t *outp = (aes_block_t *)out; + + for (n = 0; n < N_WORDS; ++n) + outp->data[n] = inp->data[n] ^ ivp->data[n]; + AES_encrypt((unsigned char *)outp->data, (unsigned char *)outp->data, key); + for (n = 0; n < N_WORDS; ++n) + outp->data[n] ^= iv2p->data[n]; + ivp = outp; + iv2p = inp; + --len; + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + } + memcpy(ivec, ivp->data, AES_BLOCK_SIZE); + memcpy(ivec + AES_BLOCK_SIZE, iv2p->data, AES_BLOCK_SIZE); + } else { + aes_block_t tmp, tmp2; + aes_block_t iv; + aes_block_t iv2; + + load_block(iv, ivec); + load_block(iv2, ivec + AES_BLOCK_SIZE); + + while (len) { + load_block(tmp, in); + for (n = 0; n < N_WORDS; ++n) + tmp2.data[n] = tmp.data[n] ^ iv.data[n]; + AES_encrypt((unsigned char *)tmp2.data, + (unsigned char *)tmp2.data, key); + for (n = 0; n < N_WORDS; ++n) + tmp2.data[n] ^= iv2.data[n]; + store_block(out, tmp2); + iv = tmp2; + iv2 = tmp; + --len; + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + } + memcpy(ivec, iv.data, AES_BLOCK_SIZE); + memcpy(ivec + AES_BLOCK_SIZE, iv2.data, AES_BLOCK_SIZE); + } + } else { + if (in != out && (UNALIGNED_MEMOPS_ARE_FAST || + ((size_t)in|(size_t)out|(size_t)ivec) % + sizeof(long) == 0)) { + aes_block_t *ivp = (aes_block_t *)ivec; + aes_block_t *iv2p = (aes_block_t *)(ivec + AES_BLOCK_SIZE); + + while (len) { + aes_block_t tmp; + aes_block_t *inp = (aes_block_t *)in; + aes_block_t *outp = (aes_block_t *)out; + + for (n = 0; n < N_WORDS; ++n) + tmp.data[n] = inp->data[n] ^ iv2p->data[n]; + AES_decrypt((unsigned char *)tmp.data, + (unsigned char *)outp->data, key); + for (n = 0; n < N_WORDS; ++n) + outp->data[n] ^= ivp->data[n]; + ivp = inp; + iv2p = outp; + --len; + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + } + memcpy(ivec, ivp->data, AES_BLOCK_SIZE); + memcpy(ivec + AES_BLOCK_SIZE, iv2p->data, AES_BLOCK_SIZE); + } else { + aes_block_t tmp, tmp2; + aes_block_t iv; + aes_block_t iv2; + + load_block(iv, ivec); + load_block(iv2, ivec + AES_BLOCK_SIZE); + + while (len) { + load_block(tmp, in); + tmp2 = tmp; + for (n = 0; n < N_WORDS; ++n) + tmp.data[n] ^= iv2.data[n]; + AES_decrypt((unsigned char *)tmp.data, + (unsigned char *)tmp.data, key); + for (n = 0; n < N_WORDS; ++n) + tmp.data[n] ^= iv.data[n]; + store_block(out, tmp); + iv = tmp2; + iv2 = tmp; + --len; + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + } + memcpy(ivec, iv.data, AES_BLOCK_SIZE); + memcpy(ivec + AES_BLOCK_SIZE, iv2.data, AES_BLOCK_SIZE); + } + } +} diff --git a/ext/libressl/crypto/aes/aes_locl.h b/ext/libressl/crypto/aes/aes_locl.h new file mode 100644 index 0000000..c47f65d --- /dev/null +++ b/ext/libressl/crypto/aes/aes_locl.h @@ -0,0 +1,83 @@ +/* $OpenBSD: aes_locl.h,v 1.11 2016/12/21 15:49:29 jsing Exp $ */ +/* ==================================================================== + * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + */ + +#ifndef HEADER_AES_LOCL_H +#define HEADER_AES_LOCL_H + +#include + +#ifdef OPENSSL_NO_AES +#error AES is disabled. +#endif + +#include +#include +#include + +__BEGIN_HIDDEN_DECLS + +#define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] << 8) ^ ((u32)(pt)[3])) +#define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >> 8); (ct)[3] = (u8)(st); } + +typedef unsigned int u32; +typedef unsigned short u16; +typedef unsigned char u8; + +#define MAXKC (256/32) +#define MAXKB (256/8) +#define MAXNR 14 + +/* This controls loop-unrolling in aes_core.c */ +#undef FULL_UNROLL + +__END_HIDDEN_DECLS + +#endif /* !HEADER_AES_LOCL_H */ diff --git a/ext/libressl/crypto/aes/aes_misc.c b/ext/libressl/crypto/aes/aes_misc.c new file mode 100644 index 0000000..6c1506d --- /dev/null +++ b/ext/libressl/crypto/aes/aes_misc.c @@ -0,0 +1,65 @@ +/* $OpenBSD: aes_misc.c,v 1.10 2014/07/09 11:10:50 bcook Exp $ */ +/* ==================================================================== + * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + */ + +#include +#include +#include +#include "aes_locl.h" + +const char * +AES_options(void) +{ +#ifdef FULL_UNROLL + return "aes(full)"; +#else + return "aes(partial)"; +#endif +} diff --git a/ext/libressl/crypto/aes/aes_ofb.c b/ext/libressl/crypto/aes/aes_ofb.c new file mode 100644 index 0000000..f8dc03a --- /dev/null +++ b/ext/libressl/crypto/aes/aes_ofb.c @@ -0,0 +1,61 @@ +/* $OpenBSD: aes_ofb.c,v 1.6 2014/06/12 15:49:27 deraadt Exp $ */ +/* ==================================================================== + * Copyright (c) 2002-2006 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + */ + +#include +#include + +void +AES_ofb128_encrypt(const unsigned char *in, unsigned char *out, size_t length, + const AES_KEY *key, unsigned char *ivec, int *num) +{ + CRYPTO_ofb128_encrypt(in, out, length, key, ivec, num, + (block128_f)AES_encrypt); +} diff --git a/ext/libressl/crypto/aes/aes_wrap.c b/ext/libressl/crypto/aes/aes_wrap.c new file mode 100644 index 0000000..b30630f --- /dev/null +++ b/ext/libressl/crypto/aes/aes_wrap.c @@ -0,0 +1,133 @@ +/* $OpenBSD: aes_wrap.c,v 1.12 2018/11/07 18:31:16 tb Exp $ */ +/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project. + */ +/* ==================================================================== + * Copyright (c) 2008 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include + +#include +#include + +static const unsigned char default_iv[] = { + 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, +}; + +int +AES_wrap_key(AES_KEY *key, const unsigned char *iv, unsigned char *out, + const unsigned char *in, unsigned int inlen) +{ + unsigned char *A, B[16], *R; + unsigned int i, j, t; + + if ((inlen & 0x7) || (inlen < 16)) + return -1; + A = B; + t = 1; + memmove(out + 8, in, inlen); + if (!iv) + iv = default_iv; + + memcpy(A, iv, 8); + + for (j = 0; j < 6; j++) { + R = out + 8; + for (i = 0; i < inlen; i += 8, t++, R += 8) { + memcpy(B + 8, R, 8); + AES_encrypt(B, B, key); + A[7] ^= (unsigned char)(t & 0xff); + if (t > 0xff) { + A[6] ^= (unsigned char)((t >> 8) & 0xff); + A[5] ^= (unsigned char)((t >> 16) & 0xff); + A[4] ^= (unsigned char)((t >> 24) & 0xff); + } + memcpy(R, B + 8, 8); + } + } + memcpy(out, A, 8); + return inlen + 8; +} + +int +AES_unwrap_key(AES_KEY *key, const unsigned char *iv, unsigned char *out, + const unsigned char *in, unsigned int inlen) +{ + unsigned char *A, B[16], *R; + unsigned int i, j, t; + + if ((inlen & 0x7) || (inlen < 24)) + return -1; + inlen -= 8; + A = B; + t = 6 * (inlen >> 3); + memcpy(A, in, 8); + memmove(out, in + 8, inlen); + for (j = 0; j < 6; j++) { + R = out + inlen - 8; + for (i = 0; i < inlen; i += 8, t--, R -= 8) { + A[7] ^= (unsigned char)(t & 0xff); + if (t > 0xff) { + A[6] ^= (unsigned char)((t >> 8) & 0xff); + A[5] ^= (unsigned char)((t >> 16) & 0xff); + A[4] ^= (unsigned char)((t >> 24) & 0xff); + } + memcpy(B + 8, R, 8); + AES_decrypt(B, B, key); + memcpy(R, B + 8, 8); + } + } + if (!iv) + iv = default_iv; + if (memcmp(A, iv, 8)) { + explicit_bzero(out, inlen); + return 0; + } + return inlen; +} diff --git a/ext/libressl/crypto/aes/aesni-elf-x86_64.S b/ext/libressl/crypto/aes/aesni-elf-x86_64.S new file mode 100644 index 0000000..3b3dabf --- /dev/null +++ b/ext/libressl/crypto/aes/aesni-elf-x86_64.S @@ -0,0 +1,2539 @@ +#include "x86_arch.h" +.text +.globl aesni_encrypt +.type aesni_encrypt,@function +.align 16 +aesni_encrypt: + movups (%rdi),%xmm2 + movl 240(%rdx),%eax + movups (%rdx),%xmm0 + movups 16(%rdx),%xmm1 + leaq 32(%rdx),%rdx + xorps %xmm0,%xmm2 +.Loop_enc1_1: + aesenc %xmm1,%xmm2 + decl %eax + movups (%rdx),%xmm1 + leaq 16(%rdx),%rdx + jnz .Loop_enc1_1 + aesenclast %xmm1,%xmm2 + movups %xmm2,(%rsi) + retq +.size aesni_encrypt,.-aesni_encrypt + +.globl aesni_decrypt +.type aesni_decrypt,@function +.align 16 +aesni_decrypt: + movups (%rdi),%xmm2 + movl 240(%rdx),%eax + movups (%rdx),%xmm0 + movups 16(%rdx),%xmm1 + leaq 32(%rdx),%rdx + xorps %xmm0,%xmm2 +.Loop_dec1_2: + aesdec %xmm1,%xmm2 + decl %eax + movups (%rdx),%xmm1 + leaq 16(%rdx),%rdx + jnz .Loop_dec1_2 + aesdeclast %xmm1,%xmm2 + movups %xmm2,(%rsi) + retq +.size aesni_decrypt, .-aesni_decrypt +.type _aesni_encrypt3,@function +.align 16 +_aesni_encrypt3: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + movups (%rcx),%xmm0 + +.Lenc_loop3: + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + decl %eax + aesenc %xmm1,%xmm4 + movups 16(%rcx),%xmm1 + aesenc %xmm0,%xmm2 + aesenc %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesenc %xmm0,%xmm4 + movups (%rcx),%xmm0 + jnz .Lenc_loop3 + + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + aesenc %xmm1,%xmm4 + aesenclast %xmm0,%xmm2 + aesenclast %xmm0,%xmm3 + aesenclast %xmm0,%xmm4 + retq +.size _aesni_encrypt3,.-_aesni_encrypt3 +.type _aesni_decrypt3,@function +.align 16 +_aesni_decrypt3: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + movups (%rcx),%xmm0 + +.Ldec_loop3: + aesdec %xmm1,%xmm2 + aesdec %xmm1,%xmm3 + decl %eax + aesdec %xmm1,%xmm4 + movups 16(%rcx),%xmm1 + aesdec %xmm0,%xmm2 + aesdec %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesdec %xmm0,%xmm4 + movups (%rcx),%xmm0 + jnz .Ldec_loop3 + + aesdec %xmm1,%xmm2 + aesdec %xmm1,%xmm3 + aesdec %xmm1,%xmm4 + aesdeclast %xmm0,%xmm2 + aesdeclast %xmm0,%xmm3 + aesdeclast %xmm0,%xmm4 + retq +.size _aesni_decrypt3,.-_aesni_decrypt3 +.type _aesni_encrypt4,@function +.align 16 +_aesni_encrypt4: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + xorps %xmm0,%xmm5 + movups (%rcx),%xmm0 + +.Lenc_loop4: + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + decl %eax + aesenc %xmm1,%xmm4 + aesenc %xmm1,%xmm5 + movups 16(%rcx),%xmm1 + aesenc %xmm0,%xmm2 + aesenc %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesenc %xmm0,%xmm4 + aesenc %xmm0,%xmm5 + movups (%rcx),%xmm0 + jnz .Lenc_loop4 + + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + aesenc %xmm1,%xmm4 + aesenc %xmm1,%xmm5 + aesenclast %xmm0,%xmm2 + aesenclast %xmm0,%xmm3 + aesenclast %xmm0,%xmm4 + aesenclast %xmm0,%xmm5 + retq +.size _aesni_encrypt4,.-_aesni_encrypt4 +.type _aesni_decrypt4,@function +.align 16 +_aesni_decrypt4: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + xorps %xmm0,%xmm5 + movups (%rcx),%xmm0 + +.Ldec_loop4: + aesdec %xmm1,%xmm2 + aesdec %xmm1,%xmm3 + decl %eax + aesdec %xmm1,%xmm4 + aesdec %xmm1,%xmm5 + movups 16(%rcx),%xmm1 + aesdec %xmm0,%xmm2 + aesdec %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesdec %xmm0,%xmm4 + aesdec %xmm0,%xmm5 + movups (%rcx),%xmm0 + jnz .Ldec_loop4 + + aesdec %xmm1,%xmm2 + aesdec %xmm1,%xmm3 + aesdec %xmm1,%xmm4 + aesdec %xmm1,%xmm5 + aesdeclast %xmm0,%xmm2 + aesdeclast %xmm0,%xmm3 + aesdeclast %xmm0,%xmm4 + aesdeclast %xmm0,%xmm5 + retq +.size _aesni_decrypt4,.-_aesni_decrypt4 +.type _aesni_encrypt6,@function +.align 16 +_aesni_encrypt6: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + aesenc %xmm1,%xmm2 + pxor %xmm0,%xmm4 + aesenc %xmm1,%xmm3 + pxor %xmm0,%xmm5 + aesenc %xmm1,%xmm4 + pxor %xmm0,%xmm6 + aesenc %xmm1,%xmm5 + pxor %xmm0,%xmm7 + decl %eax + aesenc %xmm1,%xmm6 + movups (%rcx),%xmm0 + aesenc %xmm1,%xmm7 + jmp .Lenc_loop6_enter +.align 16 +.Lenc_loop6: + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + decl %eax + aesenc %xmm1,%xmm4 + aesenc %xmm1,%xmm5 + aesenc %xmm1,%xmm6 + aesenc %xmm1,%xmm7 +.Lenc_loop6_enter: + movups 16(%rcx),%xmm1 + aesenc %xmm0,%xmm2 + aesenc %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesenc %xmm0,%xmm4 + aesenc %xmm0,%xmm5 + aesenc %xmm0,%xmm6 + aesenc %xmm0,%xmm7 + movups (%rcx),%xmm0 + jnz .Lenc_loop6 + + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + aesenc %xmm1,%xmm4 + aesenc %xmm1,%xmm5 + aesenc %xmm1,%xmm6 + aesenc %xmm1,%xmm7 + aesenclast %xmm0,%xmm2 + aesenclast %xmm0,%xmm3 + aesenclast %xmm0,%xmm4 + aesenclast %xmm0,%xmm5 + aesenclast %xmm0,%xmm6 + aesenclast %xmm0,%xmm7 + retq +.size _aesni_encrypt6,.-_aesni_encrypt6 +.type _aesni_decrypt6,@function +.align 16 +_aesni_decrypt6: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + aesdec %xmm1,%xmm2 + pxor %xmm0,%xmm4 + aesdec %xmm1,%xmm3 + pxor %xmm0,%xmm5 + aesdec %xmm1,%xmm4 + pxor %xmm0,%xmm6 + aesdec %xmm1,%xmm5 + pxor %xmm0,%xmm7 + decl %eax + aesdec %xmm1,%xmm6 + movups (%rcx),%xmm0 + aesdec %xmm1,%xmm7 + jmp .Ldec_loop6_enter +.align 16 +.Ldec_loop6: + aesdec %xmm1,%xmm2 + aesdec %xmm1,%xmm3 + decl %eax + aesdec %xmm1,%xmm4 + aesdec %xmm1,%xmm5 + aesdec %xmm1,%xmm6 + aesdec %xmm1,%xmm7 +.Ldec_loop6_enter: + movups 16(%rcx),%xmm1 + aesdec %xmm0,%xmm2 + aesdec %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesdec %xmm0,%xmm4 + aesdec %xmm0,%xmm5 + aesdec %xmm0,%xmm6 + aesdec %xmm0,%xmm7 + movups (%rcx),%xmm0 + jnz .Ldec_loop6 + + aesdec %xmm1,%xmm2 + aesdec %xmm1,%xmm3 + aesdec %xmm1,%xmm4 + aesdec %xmm1,%xmm5 + aesdec %xmm1,%xmm6 + aesdec %xmm1,%xmm7 + aesdeclast %xmm0,%xmm2 + aesdeclast %xmm0,%xmm3 + aesdeclast %xmm0,%xmm4 + aesdeclast %xmm0,%xmm5 + aesdeclast %xmm0,%xmm6 + aesdeclast %xmm0,%xmm7 + retq +.size _aesni_decrypt6,.-_aesni_decrypt6 +.type _aesni_encrypt8,@function +.align 16 +_aesni_encrypt8: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + aesenc %xmm1,%xmm2 + pxor %xmm0,%xmm4 + aesenc %xmm1,%xmm3 + pxor %xmm0,%xmm5 + aesenc %xmm1,%xmm4 + pxor %xmm0,%xmm6 + aesenc %xmm1,%xmm5 + pxor %xmm0,%xmm7 + decl %eax + aesenc %xmm1,%xmm6 + pxor %xmm0,%xmm8 + aesenc %xmm1,%xmm7 + pxor %xmm0,%xmm9 + movups (%rcx),%xmm0 + aesenc %xmm1,%xmm8 + aesenc %xmm1,%xmm9 + movups 16(%rcx),%xmm1 + jmp .Lenc_loop8_enter +.align 16 +.Lenc_loop8: + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + decl %eax + aesenc %xmm1,%xmm4 + aesenc %xmm1,%xmm5 + aesenc %xmm1,%xmm6 + aesenc %xmm1,%xmm7 + aesenc %xmm1,%xmm8 + aesenc %xmm1,%xmm9 + movups 16(%rcx),%xmm1 +.Lenc_loop8_enter: + aesenc %xmm0,%xmm2 + aesenc %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesenc %xmm0,%xmm4 + aesenc %xmm0,%xmm5 + aesenc %xmm0,%xmm6 + aesenc %xmm0,%xmm7 + aesenc %xmm0,%xmm8 + aesenc %xmm0,%xmm9 + movups (%rcx),%xmm0 + jnz .Lenc_loop8 + + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + aesenc %xmm1,%xmm4 + aesenc %xmm1,%xmm5 + aesenc %xmm1,%xmm6 + aesenc %xmm1,%xmm7 + aesenc %xmm1,%xmm8 + aesenc %xmm1,%xmm9 + aesenclast %xmm0,%xmm2 + aesenclast %xmm0,%xmm3 + aesenclast %xmm0,%xmm4 + aesenclast %xmm0,%xmm5 + aesenclast %xmm0,%xmm6 + aesenclast %xmm0,%xmm7 + aesenclast %xmm0,%xmm8 + aesenclast %xmm0,%xmm9 + retq +.size _aesni_encrypt8,.-_aesni_encrypt8 +.type _aesni_decrypt8,@function +.align 16 +_aesni_decrypt8: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + aesdec %xmm1,%xmm2 + pxor %xmm0,%xmm4 + aesdec %xmm1,%xmm3 + pxor %xmm0,%xmm5 + aesdec %xmm1,%xmm4 + pxor %xmm0,%xmm6 + aesdec %xmm1,%xmm5 + pxor %xmm0,%xmm7 + decl %eax + aesdec %xmm1,%xmm6 + pxor %xmm0,%xmm8 + aesdec %xmm1,%xmm7 + pxor %xmm0,%xmm9 + movups (%rcx),%xmm0 + aesdec %xmm1,%xmm8 + aesdec %xmm1,%xmm9 + movups 16(%rcx),%xmm1 + jmp .Ldec_loop8_enter +.align 16 +.Ldec_loop8: + aesdec %xmm1,%xmm2 + aesdec %xmm1,%xmm3 + decl %eax + aesdec %xmm1,%xmm4 + aesdec %xmm1,%xmm5 + aesdec %xmm1,%xmm6 + aesdec %xmm1,%xmm7 + aesdec %xmm1,%xmm8 + aesdec %xmm1,%xmm9 + movups 16(%rcx),%xmm1 +.Ldec_loop8_enter: + aesdec %xmm0,%xmm2 + aesdec %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesdec %xmm0,%xmm4 + aesdec %xmm0,%xmm5 + aesdec %xmm0,%xmm6 + aesdec %xmm0,%xmm7 + aesdec %xmm0,%xmm8 + aesdec %xmm0,%xmm9 + movups (%rcx),%xmm0 + jnz .Ldec_loop8 + + aesdec %xmm1,%xmm2 + aesdec %xmm1,%xmm3 + aesdec %xmm1,%xmm4 + aesdec %xmm1,%xmm5 + aesdec %xmm1,%xmm6 + aesdec %xmm1,%xmm7 + aesdec %xmm1,%xmm8 + aesdec %xmm1,%xmm9 + aesdeclast %xmm0,%xmm2 + aesdeclast %xmm0,%xmm3 + aesdeclast %xmm0,%xmm4 + aesdeclast %xmm0,%xmm5 + aesdeclast %xmm0,%xmm6 + aesdeclast %xmm0,%xmm7 + aesdeclast %xmm0,%xmm8 + aesdeclast %xmm0,%xmm9 + retq +.size _aesni_decrypt8,.-_aesni_decrypt8 +.globl aesni_ecb_encrypt +.type aesni_ecb_encrypt,@function +.align 16 +aesni_ecb_encrypt: + andq $-16,%rdx + jz .Lecb_ret + + movl 240(%rcx),%eax + movups (%rcx),%xmm0 + movq %rcx,%r11 + movl %eax,%r10d + testl %r8d,%r8d + jz .Lecb_decrypt + + cmpq $128,%rdx + jb .Lecb_enc_tail + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + movdqu 96(%rdi),%xmm8 + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi + subq $128,%rdx + jmp .Lecb_enc_loop8_enter +.align 16 +.Lecb_enc_loop8: + movups %xmm2,(%rsi) + movq %r11,%rcx + movdqu (%rdi),%xmm2 + movl %r10d,%eax + movups %xmm3,16(%rsi) + movdqu 16(%rdi),%xmm3 + movups %xmm4,32(%rsi) + movdqu 32(%rdi),%xmm4 + movups %xmm5,48(%rsi) + movdqu 48(%rdi),%xmm5 + movups %xmm6,64(%rsi) + movdqu 64(%rdi),%xmm6 + movups %xmm7,80(%rsi) + movdqu 80(%rdi),%xmm7 + movups %xmm8,96(%rsi) + movdqu 96(%rdi),%xmm8 + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi +.Lecb_enc_loop8_enter: + + call _aesni_encrypt8 + + subq $128,%rdx + jnc .Lecb_enc_loop8 + + movups %xmm2,(%rsi) + movq %r11,%rcx + movups %xmm3,16(%rsi) + movl %r10d,%eax + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + addq $128,%rdx + jz .Lecb_ret + +.Lecb_enc_tail: + movups (%rdi),%xmm2 + cmpq $32,%rdx + jb .Lecb_enc_one + movups 16(%rdi),%xmm3 + je .Lecb_enc_two + movups 32(%rdi),%xmm4 + cmpq $64,%rdx + jb .Lecb_enc_three + movups 48(%rdi),%xmm5 + je .Lecb_enc_four + movups 64(%rdi),%xmm6 + cmpq $96,%rdx + jb .Lecb_enc_five + movups 80(%rdi),%xmm7 + je .Lecb_enc_six + movdqu 96(%rdi),%xmm8 + call _aesni_encrypt8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_3: + aesenc %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_3 + aesenclast %xmm1,%xmm2 + movups %xmm2,(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_two: + xorps %xmm4,%xmm4 + call _aesni_encrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_three: + call _aesni_encrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_four: + call _aesni_encrypt4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_five: + xorps %xmm7,%xmm7 + call _aesni_encrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_six: + call _aesni_encrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + jmp .Lecb_ret + +.align 16 +.Lecb_decrypt: + cmpq $128,%rdx + jb .Lecb_dec_tail + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + movdqu 96(%rdi),%xmm8 + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi + subq $128,%rdx + jmp .Lecb_dec_loop8_enter +.align 16 +.Lecb_dec_loop8: + movups %xmm2,(%rsi) + movq %r11,%rcx + movdqu (%rdi),%xmm2 + movl %r10d,%eax + movups %xmm3,16(%rsi) + movdqu 16(%rdi),%xmm3 + movups %xmm4,32(%rsi) + movdqu 32(%rdi),%xmm4 + movups %xmm5,48(%rsi) + movdqu 48(%rdi),%xmm5 + movups %xmm6,64(%rsi) + movdqu 64(%rdi),%xmm6 + movups %xmm7,80(%rsi) + movdqu 80(%rdi),%xmm7 + movups %xmm8,96(%rsi) + movdqu 96(%rdi),%xmm8 + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi +.Lecb_dec_loop8_enter: + + call _aesni_decrypt8 + + movups (%r11),%xmm0 + subq $128,%rdx + jnc .Lecb_dec_loop8 + + movups %xmm2,(%rsi) + movq %r11,%rcx + movups %xmm3,16(%rsi) + movl %r10d,%eax + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + addq $128,%rdx + jz .Lecb_ret + +.Lecb_dec_tail: + movups (%rdi),%xmm2 + cmpq $32,%rdx + jb .Lecb_dec_one + movups 16(%rdi),%xmm3 + je .Lecb_dec_two + movups 32(%rdi),%xmm4 + cmpq $64,%rdx + jb .Lecb_dec_three + movups 48(%rdi),%xmm5 + je .Lecb_dec_four + movups 64(%rdi),%xmm6 + cmpq $96,%rdx + jb .Lecb_dec_five + movups 80(%rdi),%xmm7 + je .Lecb_dec_six + movups 96(%rdi),%xmm8 + movups (%rcx),%xmm0 + call _aesni_decrypt8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_dec_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_4: + aesdec %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_4 + aesdeclast %xmm1,%xmm2 + movups %xmm2,(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_dec_two: + xorps %xmm4,%xmm4 + call _aesni_decrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_dec_three: + call _aesni_decrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_dec_four: + call _aesni_decrypt4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_dec_five: + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_dec_six: + call _aesni_decrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + +.Lecb_ret: + retq +.size aesni_ecb_encrypt,.-aesni_ecb_encrypt +.globl aesni_ccm64_encrypt_blocks +.type aesni_ccm64_encrypt_blocks,@function +.align 16 +aesni_ccm64_encrypt_blocks: + movl 240(%rcx),%eax + movdqu (%r8),%xmm9 + movdqa .Lincrement64(%rip),%xmm6 + movdqa .Lbswap_mask(%rip),%xmm7 + + shrl $1,%eax + leaq 0(%rcx),%r11 + movdqu (%r9),%xmm3 + movdqa %xmm9,%xmm2 + movl %eax,%r10d +.byte 102,68,15,56,0,207 + jmp .Lccm64_enc_outer +.align 16 +.Lccm64_enc_outer: + movups (%r11),%xmm0 + movl %r10d,%eax + movups (%rdi),%xmm8 + + xorps %xmm0,%xmm2 + movups 16(%r11),%xmm1 + xorps %xmm8,%xmm0 + leaq 32(%r11),%rcx + xorps %xmm0,%xmm3 + movups (%rcx),%xmm0 + +.Lccm64_enc2_loop: + aesenc %xmm1,%xmm2 + decl %eax + aesenc %xmm1,%xmm3 + movups 16(%rcx),%xmm1 + aesenc %xmm0,%xmm2 + leaq 32(%rcx),%rcx + aesenc %xmm0,%xmm3 + movups 0(%rcx),%xmm0 + jnz .Lccm64_enc2_loop + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + paddq %xmm6,%xmm9 + aesenclast %xmm0,%xmm2 + aesenclast %xmm0,%xmm3 + + decq %rdx + leaq 16(%rdi),%rdi + xorps %xmm2,%xmm8 + movdqa %xmm9,%xmm2 + movups %xmm8,(%rsi) + leaq 16(%rsi),%rsi +.byte 102,15,56,0,215 + jnz .Lccm64_enc_outer + + movups %xmm3,(%r9) + retq +.size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks +.globl aesni_ccm64_decrypt_blocks +.type aesni_ccm64_decrypt_blocks,@function +.align 16 +aesni_ccm64_decrypt_blocks: + movl 240(%rcx),%eax + movups (%r8),%xmm9 + movdqu (%r9),%xmm3 + movdqa .Lincrement64(%rip),%xmm6 + movdqa .Lbswap_mask(%rip),%xmm7 + + movaps %xmm9,%xmm2 + movl %eax,%r10d + movq %rcx,%r11 +.byte 102,68,15,56,0,207 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_5: + aesenc %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_5 + aesenclast %xmm1,%xmm2 + movups (%rdi),%xmm8 + paddq %xmm6,%xmm9 + leaq 16(%rdi),%rdi + jmp .Lccm64_dec_outer +.align 16 +.Lccm64_dec_outer: + xorps %xmm2,%xmm8 + movdqa %xmm9,%xmm2 + movl %r10d,%eax + movups %xmm8,(%rsi) + leaq 16(%rsi),%rsi +.byte 102,15,56,0,215 + + subq $1,%rdx + jz .Lccm64_dec_break + + movups (%r11),%xmm0 + shrl $1,%eax + movups 16(%r11),%xmm1 + xorps %xmm0,%xmm8 + leaq 32(%r11),%rcx + xorps %xmm0,%xmm2 + xorps %xmm8,%xmm3 + movups (%rcx),%xmm0 + +.Lccm64_dec2_loop: + aesenc %xmm1,%xmm2 + decl %eax + aesenc %xmm1,%xmm3 + movups 16(%rcx),%xmm1 + aesenc %xmm0,%xmm2 + leaq 32(%rcx),%rcx + aesenc %xmm0,%xmm3 + movups 0(%rcx),%xmm0 + jnz .Lccm64_dec2_loop + movups (%rdi),%xmm8 + paddq %xmm6,%xmm9 + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + leaq 16(%rdi),%rdi + aesenclast %xmm0,%xmm2 + aesenclast %xmm0,%xmm3 + jmp .Lccm64_dec_outer + +.align 16 +.Lccm64_dec_break: + + movups (%r11),%xmm0 + movups 16(%r11),%xmm1 + xorps %xmm0,%xmm8 + leaq 32(%r11),%r11 + xorps %xmm8,%xmm3 +.Loop_enc1_6: + aesenc %xmm1,%xmm3 + decl %eax + movups (%r11),%xmm1 + leaq 16(%r11),%r11 + jnz .Loop_enc1_6 + aesenclast %xmm1,%xmm3 + movups %xmm3,(%r9) + retq +.size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks +.globl aesni_ctr32_encrypt_blocks +.type aesni_ctr32_encrypt_blocks,@function +.align 16 +aesni_ctr32_encrypt_blocks: + cmpq $1,%rdx + je .Lctr32_one_shortcut + + movdqu (%r8),%xmm14 + movdqa .Lbswap_mask(%rip),%xmm15 + xorl %eax,%eax +.byte 102,69,15,58,22,242,3 +.byte 102,68,15,58,34,240,3 + + movl 240(%rcx),%eax + bswapl %r10d + pxor %xmm12,%xmm12 + pxor %xmm13,%xmm13 +.byte 102,69,15,58,34,226,0 + leaq 3(%r10),%r11 +.byte 102,69,15,58,34,235,0 + incl %r10d +.byte 102,69,15,58,34,226,1 + incq %r11 +.byte 102,69,15,58,34,235,1 + incl %r10d +.byte 102,69,15,58,34,226,2 + incq %r11 +.byte 102,69,15,58,34,235,2 + movdqa %xmm12,-40(%rsp) +.byte 102,69,15,56,0,231 + movdqa %xmm13,-24(%rsp) +.byte 102,69,15,56,0,239 + + pshufd $192,%xmm12,%xmm2 + pshufd $128,%xmm12,%xmm3 + pshufd $64,%xmm12,%xmm4 + cmpq $6,%rdx + jb .Lctr32_tail + shrl $1,%eax + movq %rcx,%r11 + movl %eax,%r10d + subq $6,%rdx + jmp .Lctr32_loop6 + +.align 16 +.Lctr32_loop6: + pshufd $192,%xmm13,%xmm5 + por %xmm14,%xmm2 + movups (%r11),%xmm0 + pshufd $128,%xmm13,%xmm6 + por %xmm14,%xmm3 + movups 16(%r11),%xmm1 + pshufd $64,%xmm13,%xmm7 + por %xmm14,%xmm4 + por %xmm14,%xmm5 + xorps %xmm0,%xmm2 + por %xmm14,%xmm6 + por %xmm14,%xmm7 + + + + + pxor %xmm0,%xmm3 + aesenc %xmm1,%xmm2 + leaq 32(%r11),%rcx + pxor %xmm0,%xmm4 + aesenc %xmm1,%xmm3 + movdqa .Lincrement32(%rip),%xmm13 + pxor %xmm0,%xmm5 + aesenc %xmm1,%xmm4 + movdqa -40(%rsp),%xmm12 + pxor %xmm0,%xmm6 + aesenc %xmm1,%xmm5 + pxor %xmm0,%xmm7 + movups (%rcx),%xmm0 + decl %eax + aesenc %xmm1,%xmm6 + aesenc %xmm1,%xmm7 + jmp .Lctr32_enc_loop6_enter +.align 16 +.Lctr32_enc_loop6: + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + decl %eax + aesenc %xmm1,%xmm4 + aesenc %xmm1,%xmm5 + aesenc %xmm1,%xmm6 + aesenc %xmm1,%xmm7 +.Lctr32_enc_loop6_enter: + movups 16(%rcx),%xmm1 + aesenc %xmm0,%xmm2 + aesenc %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesenc %xmm0,%xmm4 + aesenc %xmm0,%xmm5 + aesenc %xmm0,%xmm6 + aesenc %xmm0,%xmm7 + movups (%rcx),%xmm0 + jnz .Lctr32_enc_loop6 + + aesenc %xmm1,%xmm2 + paddd %xmm13,%xmm12 + aesenc %xmm1,%xmm3 + paddd -24(%rsp),%xmm13 + aesenc %xmm1,%xmm4 + movdqa %xmm12,-40(%rsp) + aesenc %xmm1,%xmm5 + movdqa %xmm13,-24(%rsp) + aesenc %xmm1,%xmm6 +.byte 102,69,15,56,0,231 + aesenc %xmm1,%xmm7 +.byte 102,69,15,56,0,239 + + aesenclast %xmm0,%xmm2 + movups (%rdi),%xmm8 + aesenclast %xmm0,%xmm3 + movups 16(%rdi),%xmm9 + aesenclast %xmm0,%xmm4 + movups 32(%rdi),%xmm10 + aesenclast %xmm0,%xmm5 + movups 48(%rdi),%xmm11 + aesenclast %xmm0,%xmm6 + movups 64(%rdi),%xmm1 + aesenclast %xmm0,%xmm7 + movups 80(%rdi),%xmm0 + leaq 96(%rdi),%rdi + + xorps %xmm2,%xmm8 + pshufd $192,%xmm12,%xmm2 + xorps %xmm3,%xmm9 + pshufd $128,%xmm12,%xmm3 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + pshufd $64,%xmm12,%xmm4 + movups %xmm9,16(%rsi) + xorps %xmm5,%xmm11 + movups %xmm10,32(%rsi) + xorps %xmm6,%xmm1 + movups %xmm11,48(%rsi) + xorps %xmm7,%xmm0 + movups %xmm1,64(%rsi) + movups %xmm0,80(%rsi) + leaq 96(%rsi),%rsi + movl %r10d,%eax + subq $6,%rdx + jnc .Lctr32_loop6 + + addq $6,%rdx + jz .Lctr32_done + movq %r11,%rcx + leal 1(%rax,%rax,1),%eax + +.Lctr32_tail: + por %xmm14,%xmm2 + movups (%rdi),%xmm8 + cmpq $2,%rdx + jb .Lctr32_one + + por %xmm14,%xmm3 + movups 16(%rdi),%xmm9 + je .Lctr32_two + + pshufd $192,%xmm13,%xmm5 + por %xmm14,%xmm4 + movups 32(%rdi),%xmm10 + cmpq $4,%rdx + jb .Lctr32_three + + pshufd $128,%xmm13,%xmm6 + por %xmm14,%xmm5 + movups 48(%rdi),%xmm11 + je .Lctr32_four + + por %xmm14,%xmm6 + xorps %xmm7,%xmm7 + + call _aesni_encrypt6 + + movups 64(%rdi),%xmm1 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + movups %xmm9,16(%rsi) + xorps %xmm5,%xmm11 + movups %xmm10,32(%rsi) + xorps %xmm6,%xmm1 + movups %xmm11,48(%rsi) + movups %xmm1,64(%rsi) + jmp .Lctr32_done + +.align 16 +.Lctr32_one_shortcut: + movups (%r8),%xmm2 + movups (%rdi),%xmm8 + movl 240(%rcx),%eax +.Lctr32_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_7: + aesenc %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_7 + aesenclast %xmm1,%xmm2 + xorps %xmm2,%xmm8 + movups %xmm8,(%rsi) + jmp .Lctr32_done + +.align 16 +.Lctr32_two: + xorps %xmm4,%xmm4 + call _aesni_encrypt3 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + movups %xmm9,16(%rsi) + jmp .Lctr32_done + +.align 16 +.Lctr32_three: + call _aesni_encrypt3 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + movups %xmm9,16(%rsi) + movups %xmm10,32(%rsi) + jmp .Lctr32_done + +.align 16 +.Lctr32_four: + call _aesni_encrypt4 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + movups %xmm9,16(%rsi) + xorps %xmm5,%xmm11 + movups %xmm10,32(%rsi) + movups %xmm11,48(%rsi) + +.Lctr32_done: + retq +.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks +.globl aesni_xts_encrypt +.type aesni_xts_encrypt,@function +.align 16 +aesni_xts_encrypt: + leaq -104(%rsp),%rsp + movups (%r9),%xmm15 + movl 240(%r8),%eax + movl 240(%rcx),%r10d + movups (%r8),%xmm0 + movups 16(%r8),%xmm1 + leaq 32(%r8),%r8 + xorps %xmm0,%xmm15 +.Loop_enc1_8: + aesenc %xmm1,%xmm15 + decl %eax + movups (%r8),%xmm1 + leaq 16(%r8),%r8 + jnz .Loop_enc1_8 + aesenclast %xmm1,%xmm15 + movq %rcx,%r11 + movl %r10d,%eax + movq %rdx,%r9 + andq $-16,%rdx + + movdqa .Lxts_magic(%rip),%xmm8 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + subq $96,%rdx + jc .Lxts_enc_short + + shrl $1,%eax + subl $1,%eax + movl %eax,%r10d + jmp .Lxts_enc_grandloop + +.align 16 +.Lxts_enc_grandloop: + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movdqu 0(%rdi),%xmm2 + pand %xmm8,%xmm9 + movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + pxor %xmm12,%xmm4 + movdqu 80(%rdi),%xmm7 + leaq 96(%rdi),%rdi + pxor %xmm13,%xmm5 + movups (%r11),%xmm0 + pxor %xmm14,%xmm6 + pxor %xmm15,%xmm7 + + + + movups 16(%r11),%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movdqa %xmm10,0(%rsp) + aesenc %xmm1,%xmm2 + leaq 32(%r11),%rcx + pxor %xmm0,%xmm4 + movdqa %xmm11,16(%rsp) + aesenc %xmm1,%xmm3 + pxor %xmm0,%xmm5 + movdqa %xmm12,32(%rsp) + aesenc %xmm1,%xmm4 + pxor %xmm0,%xmm6 + movdqa %xmm13,48(%rsp) + aesenc %xmm1,%xmm5 + pxor %xmm0,%xmm7 + movups (%rcx),%xmm0 + decl %eax + movdqa %xmm14,64(%rsp) + aesenc %xmm1,%xmm6 + movdqa %xmm15,80(%rsp) + aesenc %xmm1,%xmm7 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + jmp .Lxts_enc_loop6_enter + +.align 16 +.Lxts_enc_loop6: + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + decl %eax + aesenc %xmm1,%xmm4 + aesenc %xmm1,%xmm5 + aesenc %xmm1,%xmm6 + aesenc %xmm1,%xmm7 +.Lxts_enc_loop6_enter: + movups 16(%rcx),%xmm1 + aesenc %xmm0,%xmm2 + aesenc %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesenc %xmm0,%xmm4 + aesenc %xmm0,%xmm5 + aesenc %xmm0,%xmm6 + aesenc %xmm0,%xmm7 + movups (%rcx),%xmm0 + jnz .Lxts_enc_loop6 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + paddq %xmm15,%xmm15 + aesenc %xmm1,%xmm2 + pand %xmm8,%xmm9 + aesenc %xmm1,%xmm3 + pcmpgtd %xmm15,%xmm14 + aesenc %xmm1,%xmm4 + pxor %xmm9,%xmm15 + aesenc %xmm1,%xmm5 + aesenc %xmm1,%xmm6 + aesenc %xmm1,%xmm7 + movups 16(%rcx),%xmm1 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + aesenc %xmm0,%xmm2 + pand %xmm8,%xmm9 + aesenc %xmm0,%xmm3 + pcmpgtd %xmm15,%xmm14 + aesenc %xmm0,%xmm4 + pxor %xmm9,%xmm15 + aesenc %xmm0,%xmm5 + aesenc %xmm0,%xmm6 + aesenc %xmm0,%xmm7 + movups 32(%rcx),%xmm0 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 + aesenc %xmm1,%xmm2 + pand %xmm8,%xmm9 + aesenc %xmm1,%xmm3 + pcmpgtd %xmm15,%xmm14 + aesenc %xmm1,%xmm4 + pxor %xmm9,%xmm15 + aesenc %xmm1,%xmm5 + aesenc %xmm1,%xmm6 + aesenc %xmm1,%xmm7 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 + paddq %xmm15,%xmm15 + aesenclast %xmm0,%xmm2 + pand %xmm8,%xmm9 + aesenclast %xmm0,%xmm3 + pcmpgtd %xmm15,%xmm14 + aesenclast %xmm0,%xmm4 + pxor %xmm9,%xmm15 + aesenclast %xmm0,%xmm5 + aesenclast %xmm0,%xmm6 + aesenclast %xmm0,%xmm7 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 + paddq %xmm15,%xmm15 + xorps 0(%rsp),%xmm2 + pand %xmm8,%xmm9 + xorps 16(%rsp),%xmm3 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + + xorps 32(%rsp),%xmm4 + movups %xmm2,0(%rsi) + xorps 48(%rsp),%xmm5 + movups %xmm3,16(%rsi) + xorps 64(%rsp),%xmm6 + movups %xmm4,32(%rsi) + xorps 80(%rsp),%xmm7 + movups %xmm5,48(%rsi) + movl %r10d,%eax + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + leaq 96(%rsi),%rsi + subq $96,%rdx + jnc .Lxts_enc_grandloop + + leal 3(%rax,%rax,1),%eax + movq %r11,%rcx + movl %eax,%r10d + +.Lxts_enc_short: + addq $96,%rdx + jz .Lxts_enc_done + + cmpq $32,%rdx + jb .Lxts_enc_one + je .Lxts_enc_two + + cmpq $64,%rdx + jb .Lxts_enc_three + je .Lxts_enc_four + + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movdqu (%rdi),%xmm2 + pand %xmm8,%xmm9 + movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + leaq 80(%rdi),%rdi + pxor %xmm12,%xmm4 + pxor %xmm13,%xmm5 + pxor %xmm14,%xmm6 + + call _aesni_encrypt6 + + xorps %xmm10,%xmm2 + movdqa %xmm15,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + xorps %xmm14,%xmm6 + movdqu %xmm4,32(%rsi) + movdqu %xmm5,48(%rsi) + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_one: + movups (%rdi),%xmm2 + leaq 16(%rdi),%rdi + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_9: + aesenc %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_9 + aesenclast %xmm1,%xmm2 + xorps %xmm10,%xmm2 + movdqa %xmm11,%xmm10 + movups %xmm2,(%rsi) + leaq 16(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_two: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + leaq 32(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + + call _aesni_encrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm12,%xmm10 + xorps %xmm11,%xmm3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + leaq 32(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_three: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + leaq 48(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + + call _aesni_encrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm13,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + leaq 48(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_four: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + xorps %xmm10,%xmm2 + movups 48(%rdi),%xmm5 + leaq 64(%rdi),%rdi + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + xorps %xmm13,%xmm5 + + call _aesni_encrypt4 + + xorps %xmm10,%xmm2 + movdqa %xmm15,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_done: + andq $15,%r9 + jz .Lxts_enc_ret + movq %r9,%rdx + +.Lxts_enc_steal: + movzbl (%rdi),%eax + movzbl -16(%rsi),%ecx + leaq 1(%rdi),%rdi + movb %al,-16(%rsi) + movb %cl,0(%rsi) + leaq 1(%rsi),%rsi + subq $1,%rdx + jnz .Lxts_enc_steal + + subq %r9,%rsi + movq %r11,%rcx + movl %r10d,%eax + + movups -16(%rsi),%xmm2 + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_10: + aesenc %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_10 + aesenclast %xmm1,%xmm2 + xorps %xmm10,%xmm2 + movups %xmm2,-16(%rsi) + +.Lxts_enc_ret: + leaq 104(%rsp),%rsp +.Lxts_enc_epilogue: + retq +.size aesni_xts_encrypt,.-aesni_xts_encrypt +.globl aesni_xts_decrypt +.type aesni_xts_decrypt,@function +.align 16 +aesni_xts_decrypt: + leaq -104(%rsp),%rsp + movups (%r9),%xmm15 + movl 240(%r8),%eax + movl 240(%rcx),%r10d + movups (%r8),%xmm0 + movups 16(%r8),%xmm1 + leaq 32(%r8),%r8 + xorps %xmm0,%xmm15 +.Loop_enc1_11: + aesenc %xmm1,%xmm15 + decl %eax + movups (%r8),%xmm1 + leaq 16(%r8),%r8 + jnz .Loop_enc1_11 + aesenclast %xmm1,%xmm15 + xorl %eax,%eax + testq $15,%rdx + setnz %al + shlq $4,%rax + subq %rax,%rdx + + movq %rcx,%r11 + movl %r10d,%eax + movq %rdx,%r9 + andq $-16,%rdx + + movdqa .Lxts_magic(%rip),%xmm8 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + subq $96,%rdx + jc .Lxts_dec_short + + shrl $1,%eax + subl $1,%eax + movl %eax,%r10d + jmp .Lxts_dec_grandloop + +.align 16 +.Lxts_dec_grandloop: + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movdqu 0(%rdi),%xmm2 + pand %xmm8,%xmm9 + movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + pxor %xmm12,%xmm4 + movdqu 80(%rdi),%xmm7 + leaq 96(%rdi),%rdi + pxor %xmm13,%xmm5 + movups (%r11),%xmm0 + pxor %xmm14,%xmm6 + pxor %xmm15,%xmm7 + + + + movups 16(%r11),%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movdqa %xmm10,0(%rsp) + aesdec %xmm1,%xmm2 + leaq 32(%r11),%rcx + pxor %xmm0,%xmm4 + movdqa %xmm11,16(%rsp) + aesdec %xmm1,%xmm3 + pxor %xmm0,%xmm5 + movdqa %xmm12,32(%rsp) + aesdec %xmm1,%xmm4 + pxor %xmm0,%xmm6 + movdqa %xmm13,48(%rsp) + aesdec %xmm1,%xmm5 + pxor %xmm0,%xmm7 + movups (%rcx),%xmm0 + decl %eax + movdqa %xmm14,64(%rsp) + aesdec %xmm1,%xmm6 + movdqa %xmm15,80(%rsp) + aesdec %xmm1,%xmm7 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + jmp .Lxts_dec_loop6_enter + +.align 16 +.Lxts_dec_loop6: + aesdec %xmm1,%xmm2 + aesdec %xmm1,%xmm3 + decl %eax + aesdec %xmm1,%xmm4 + aesdec %xmm1,%xmm5 + aesdec %xmm1,%xmm6 + aesdec %xmm1,%xmm7 +.Lxts_dec_loop6_enter: + movups 16(%rcx),%xmm1 + aesdec %xmm0,%xmm2 + aesdec %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesdec %xmm0,%xmm4 + aesdec %xmm0,%xmm5 + aesdec %xmm0,%xmm6 + aesdec %xmm0,%xmm7 + movups (%rcx),%xmm0 + jnz .Lxts_dec_loop6 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + paddq %xmm15,%xmm15 + aesdec %xmm1,%xmm2 + pand %xmm8,%xmm9 + aesdec %xmm1,%xmm3 + pcmpgtd %xmm15,%xmm14 + aesdec %xmm1,%xmm4 + pxor %xmm9,%xmm15 + aesdec %xmm1,%xmm5 + aesdec %xmm1,%xmm6 + aesdec %xmm1,%xmm7 + movups 16(%rcx),%xmm1 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + aesdec %xmm0,%xmm2 + pand %xmm8,%xmm9 + aesdec %xmm0,%xmm3 + pcmpgtd %xmm15,%xmm14 + aesdec %xmm0,%xmm4 + pxor %xmm9,%xmm15 + aesdec %xmm0,%xmm5 + aesdec %xmm0,%xmm6 + aesdec %xmm0,%xmm7 + movups 32(%rcx),%xmm0 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 + aesdec %xmm1,%xmm2 + pand %xmm8,%xmm9 + aesdec %xmm1,%xmm3 + pcmpgtd %xmm15,%xmm14 + aesdec %xmm1,%xmm4 + pxor %xmm9,%xmm15 + aesdec %xmm1,%xmm5 + aesdec %xmm1,%xmm6 + aesdec %xmm1,%xmm7 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 + paddq %xmm15,%xmm15 + aesdeclast %xmm0,%xmm2 + pand %xmm8,%xmm9 + aesdeclast %xmm0,%xmm3 + pcmpgtd %xmm15,%xmm14 + aesdeclast %xmm0,%xmm4 + pxor %xmm9,%xmm15 + aesdeclast %xmm0,%xmm5 + aesdeclast %xmm0,%xmm6 + aesdeclast %xmm0,%xmm7 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 + paddq %xmm15,%xmm15 + xorps 0(%rsp),%xmm2 + pand %xmm8,%xmm9 + xorps 16(%rsp),%xmm3 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + + xorps 32(%rsp),%xmm4 + movups %xmm2,0(%rsi) + xorps 48(%rsp),%xmm5 + movups %xmm3,16(%rsi) + xorps 64(%rsp),%xmm6 + movups %xmm4,32(%rsi) + xorps 80(%rsp),%xmm7 + movups %xmm5,48(%rsi) + movl %r10d,%eax + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + leaq 96(%rsi),%rsi + subq $96,%rdx + jnc .Lxts_dec_grandloop + + leal 3(%rax,%rax,1),%eax + movq %r11,%rcx + movl %eax,%r10d + +.Lxts_dec_short: + addq $96,%rdx + jz .Lxts_dec_done + + cmpq $32,%rdx + jb .Lxts_dec_one + je .Lxts_dec_two + + cmpq $64,%rdx + jb .Lxts_dec_three + je .Lxts_dec_four + + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movdqu (%rdi),%xmm2 + pand %xmm8,%xmm9 + movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + leaq 80(%rdi),%rdi + pxor %xmm12,%xmm4 + pxor %xmm13,%xmm5 + pxor %xmm14,%xmm6 + + call _aesni_decrypt6 + + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + xorps %xmm14,%xmm6 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm14 + movdqu %xmm5,48(%rsi) + pcmpgtd %xmm15,%xmm14 + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + pshufd $19,%xmm14,%xmm11 + andq $15,%r9 + jz .Lxts_dec_ret + + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm11 + pxor %xmm15,%xmm11 + jmp .Lxts_dec_done2 + +.align 16 +.Lxts_dec_one: + movups (%rdi),%xmm2 + leaq 16(%rdi),%rdi + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_12: + aesdec %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_12 + aesdeclast %xmm1,%xmm2 + xorps %xmm10,%xmm2 + movdqa %xmm11,%xmm10 + movups %xmm2,(%rsi) + movdqa %xmm12,%xmm11 + leaq 16(%rsi),%rsi + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_two: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + leaq 32(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + + call _aesni_decrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm12,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm13,%xmm11 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + leaq 32(%rsi),%rsi + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_three: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + leaq 48(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + + call _aesni_decrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm13,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm15,%xmm11 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + leaq 48(%rsi),%rsi + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_four: + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movups (%rdi),%xmm2 + pand %xmm8,%xmm9 + movups 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movups 32(%rdi),%xmm4 + xorps %xmm10,%xmm2 + movups 48(%rdi),%xmm5 + leaq 64(%rdi),%rdi + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + xorps %xmm13,%xmm5 + + call _aesni_decrypt4 + + xorps %xmm10,%xmm2 + movdqa %xmm14,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm15,%xmm11 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_done: + andq $15,%r9 + jz .Lxts_dec_ret +.Lxts_dec_done2: + movq %r9,%rdx + movq %r11,%rcx + movl %r10d,%eax + + movups (%rdi),%xmm2 + xorps %xmm11,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_13: + aesdec %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_13 + aesdeclast %xmm1,%xmm2 + xorps %xmm11,%xmm2 + movups %xmm2,(%rsi) + +.Lxts_dec_steal: + movzbl 16(%rdi),%eax + movzbl (%rsi),%ecx + leaq 1(%rdi),%rdi + movb %al,(%rsi) + movb %cl,16(%rsi) + leaq 1(%rsi),%rsi + subq $1,%rdx + jnz .Lxts_dec_steal + + subq %r9,%rsi + movq %r11,%rcx + movl %r10d,%eax + + movups (%rsi),%xmm2 + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_14: + aesdec %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_14 + aesdeclast %xmm1,%xmm2 + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + +.Lxts_dec_ret: + leaq 104(%rsp),%rsp +.Lxts_dec_epilogue: + retq +.size aesni_xts_decrypt,.-aesni_xts_decrypt +.globl aesni_cbc_encrypt +.type aesni_cbc_encrypt,@function +.align 16 +aesni_cbc_encrypt: + testq %rdx,%rdx + jz .Lcbc_ret + + movl 240(%rcx),%r10d + movq %rcx,%r11 + testl %r9d,%r9d + jz .Lcbc_decrypt + + movups (%r8),%xmm2 + movl %r10d,%eax + cmpq $16,%rdx + jb .Lcbc_enc_tail + subq $16,%rdx + jmp .Lcbc_enc_loop +.align 16 +.Lcbc_enc_loop: + movups (%rdi),%xmm3 + leaq 16(%rdi),%rdi + + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm3 + leaq 32(%rcx),%rcx + xorps %xmm3,%xmm2 +.Loop_enc1_15: + aesenc %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_15 + aesenclast %xmm1,%xmm2 + movl %r10d,%eax + movq %r11,%rcx + movups %xmm2,0(%rsi) + leaq 16(%rsi),%rsi + subq $16,%rdx + jnc .Lcbc_enc_loop + addq $16,%rdx + jnz .Lcbc_enc_tail + movups %xmm2,(%r8) + jmp .Lcbc_ret + +.Lcbc_enc_tail: + movq %rdx,%rcx + xchgq %rdi,%rsi +.long 0x9066A4F3 + movl $16,%ecx + subq %rdx,%rcx + xorl %eax,%eax +.long 0x9066AAF3 + leaq -16(%rdi),%rdi + movl %r10d,%eax + movq %rdi,%rsi + movq %r11,%rcx + xorq %rdx,%rdx + jmp .Lcbc_enc_loop + +.align 16 +.Lcbc_decrypt: + movups (%r8),%xmm9 + movl %r10d,%eax + cmpq $112,%rdx + jbe .Lcbc_dec_tail + shrl $1,%r10d + subq $112,%rdx + movl %r10d,%eax + movaps %xmm9,-24(%rsp) + jmp .Lcbc_dec_loop8_enter +.align 16 +.Lcbc_dec_loop8: + movaps %xmm0,-24(%rsp) + movups %xmm9,(%rsi) + leaq 16(%rsi),%rsi +.Lcbc_dec_loop8_enter: + movups (%rcx),%xmm0 + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 16(%rcx),%xmm1 + + leaq 32(%rcx),%rcx + movdqu 32(%rdi),%xmm4 + xorps %xmm0,%xmm2 + movdqu 48(%rdi),%xmm5 + xorps %xmm0,%xmm3 + movdqu 64(%rdi),%xmm6 + aesdec %xmm1,%xmm2 + pxor %xmm0,%xmm4 + movdqu 80(%rdi),%xmm7 + aesdec %xmm1,%xmm3 + pxor %xmm0,%xmm5 + movdqu 96(%rdi),%xmm8 + aesdec %xmm1,%xmm4 + pxor %xmm0,%xmm6 + movdqu 112(%rdi),%xmm9 + aesdec %xmm1,%xmm5 + pxor %xmm0,%xmm7 + decl %eax + aesdec %xmm1,%xmm6 + pxor %xmm0,%xmm8 + aesdec %xmm1,%xmm7 + pxor %xmm0,%xmm9 + movups (%rcx),%xmm0 + aesdec %xmm1,%xmm8 + aesdec %xmm1,%xmm9 + movups 16(%rcx),%xmm1 + + call .Ldec_loop8_enter + + movups (%rdi),%xmm1 + movups 16(%rdi),%xmm0 + xorps -24(%rsp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%rdi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%rdi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%rdi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%rdi),%xmm0 + xorps %xmm1,%xmm7 + movups 96(%rdi),%xmm1 + xorps %xmm0,%xmm8 + movups 112(%rdi),%xmm0 + xorps %xmm1,%xmm9 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movl %r10d,%eax + movups %xmm6,64(%rsi) + movq %r11,%rcx + movups %xmm7,80(%rsi) + leaq 128(%rdi),%rdi + movups %xmm8,96(%rsi) + leaq 112(%rsi),%rsi + subq $128,%rdx + ja .Lcbc_dec_loop8 + + movaps %xmm9,%xmm2 + movaps %xmm0,%xmm9 + addq $112,%rdx + jle .Lcbc_dec_tail_collected + movups %xmm2,(%rsi) + leal 1(%r10,%r10,1),%eax + leaq 16(%rsi),%rsi +.Lcbc_dec_tail: + movups (%rdi),%xmm2 + movaps %xmm2,%xmm8 + cmpq $16,%rdx + jbe .Lcbc_dec_one + + movups 16(%rdi),%xmm3 + movaps %xmm3,%xmm7 + cmpq $32,%rdx + jbe .Lcbc_dec_two + + movups 32(%rdi),%xmm4 + movaps %xmm4,%xmm6 + cmpq $48,%rdx + jbe .Lcbc_dec_three + + movups 48(%rdi),%xmm5 + cmpq $64,%rdx + jbe .Lcbc_dec_four + + movups 64(%rdi),%xmm6 + cmpq $80,%rdx + jbe .Lcbc_dec_five + + movups 80(%rdi),%xmm7 + cmpq $96,%rdx + jbe .Lcbc_dec_six + + movups 96(%rdi),%xmm8 + movaps %xmm9,-24(%rsp) + call _aesni_decrypt8 + movups (%rdi),%xmm1 + movups 16(%rdi),%xmm0 + xorps -24(%rsp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%rdi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%rdi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%rdi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%rdi),%xmm0 + xorps %xmm1,%xmm7 + movups 96(%rdi),%xmm9 + xorps %xmm0,%xmm8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + leaq 96(%rsi),%rsi + movaps %xmm8,%xmm2 + subq $112,%rdx + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_16: + aesdec %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_16 + aesdeclast %xmm1,%xmm2 + xorps %xmm9,%xmm2 + movaps %xmm8,%xmm9 + subq $16,%rdx + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_two: + xorps %xmm4,%xmm4 + call _aesni_decrypt3 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + movups %xmm2,(%rsi) + movaps %xmm7,%xmm9 + movaps %xmm3,%xmm2 + leaq 16(%rsi),%rsi + subq $32,%rdx + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_three: + call _aesni_decrypt3 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + movups %xmm2,(%rsi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%rsi) + movaps %xmm6,%xmm9 + movaps %xmm4,%xmm2 + leaq 32(%rsi),%rsi + subq $48,%rdx + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_four: + call _aesni_decrypt4 + xorps %xmm9,%xmm2 + movups 48(%rdi),%xmm9 + xorps %xmm8,%xmm3 + movups %xmm2,(%rsi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%rsi) + xorps %xmm6,%xmm5 + movups %xmm4,32(%rsi) + movaps %xmm5,%xmm2 + leaq 48(%rsi),%rsi + subq $64,%rdx + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_five: + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups 16(%rdi),%xmm1 + movups 32(%rdi),%xmm0 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + xorps %xmm1,%xmm4 + movups 48(%rdi),%xmm1 + xorps %xmm0,%xmm5 + movups 64(%rdi),%xmm9 + xorps %xmm1,%xmm6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + movaps %xmm6,%xmm2 + subq $80,%rdx + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_six: + call _aesni_decrypt6 + movups 16(%rdi),%xmm1 + movups 32(%rdi),%xmm0 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + xorps %xmm1,%xmm4 + movups 48(%rdi),%xmm1 + xorps %xmm0,%xmm5 + movups 64(%rdi),%xmm0 + xorps %xmm1,%xmm6 + movups 80(%rdi),%xmm9 + xorps %xmm0,%xmm7 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + movaps %xmm7,%xmm2 + subq $96,%rdx + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_tail_collected: + andq $15,%rdx + movups %xmm9,(%r8) + jnz .Lcbc_dec_tail_partial + movups %xmm2,(%rsi) + jmp .Lcbc_dec_ret +.align 16 +.Lcbc_dec_tail_partial: + movaps %xmm2,-24(%rsp) + movq $16,%rcx + movq %rsi,%rdi + subq %rdx,%rcx + leaq -24(%rsp),%rsi +.long 0x9066A4F3 + +.Lcbc_dec_ret: +.Lcbc_ret: + retq +.size aesni_cbc_encrypt,.-aesni_cbc_encrypt +.globl aesni_set_decrypt_key +.type aesni_set_decrypt_key,@function +.align 16 +aesni_set_decrypt_key: + subq $8,%rsp + call __aesni_set_encrypt_key + shll $4,%esi + testl %eax,%eax + jnz .Ldec_key_ret + leaq 16(%rdx,%rsi,1),%rdi + + movups (%rdx),%xmm0 + movups (%rdi),%xmm1 + movups %xmm0,(%rdi) + movups %xmm1,(%rdx) + leaq 16(%rdx),%rdx + leaq -16(%rdi),%rdi + +.Ldec_key_inverse: + movups (%rdx),%xmm0 + movups (%rdi),%xmm1 + aesimc %xmm0,%xmm0 + aesimc %xmm1,%xmm1 + leaq 16(%rdx),%rdx + leaq -16(%rdi),%rdi + movups %xmm0,16(%rdi) + movups %xmm1,-16(%rdx) + cmpq %rdx,%rdi + ja .Ldec_key_inverse + + movups (%rdx),%xmm0 + aesimc %xmm0,%xmm0 + movups %xmm0,(%rdi) +.Ldec_key_ret: + addq $8,%rsp + retq +.LSEH_end_set_decrypt_key: +.size aesni_set_decrypt_key,.-aesni_set_decrypt_key +.globl aesni_set_encrypt_key +.type aesni_set_encrypt_key,@function +.align 16 +aesni_set_encrypt_key: +__aesni_set_encrypt_key: + subq $8,%rsp + movq $-1,%rax + testq %rdi,%rdi + jz .Lenc_key_ret + testq %rdx,%rdx + jz .Lenc_key_ret + + movups (%rdi),%xmm0 + xorps %xmm4,%xmm4 + leaq 16(%rdx),%rax + cmpl $256,%esi + je .L14rounds + cmpl $192,%esi + je .L12rounds + cmpl $128,%esi + jne .Lbad_keybits + +.L10rounds: + movl $9,%esi + movups %xmm0,(%rdx) + aeskeygenassist $1,%xmm0,%xmm1 + call .Lkey_expansion_128_cold + aeskeygenassist $2,%xmm0,%xmm1 + call .Lkey_expansion_128 + aeskeygenassist $4,%xmm0,%xmm1 + call .Lkey_expansion_128 + aeskeygenassist $8,%xmm0,%xmm1 + call .Lkey_expansion_128 + aeskeygenassist $16,%xmm0,%xmm1 + call .Lkey_expansion_128 + aeskeygenassist $32,%xmm0,%xmm1 + call .Lkey_expansion_128 + aeskeygenassist $64,%xmm0,%xmm1 + call .Lkey_expansion_128 + aeskeygenassist $128,%xmm0,%xmm1 + call .Lkey_expansion_128 + aeskeygenassist $27,%xmm0,%xmm1 + call .Lkey_expansion_128 + aeskeygenassist $54,%xmm0,%xmm1 + call .Lkey_expansion_128 + movups %xmm0,(%rax) + movl %esi,80(%rax) + xorl %eax,%eax + jmp .Lenc_key_ret + +.align 16 +.L12rounds: + movq 16(%rdi),%xmm2 + movl $11,%esi + movups %xmm0,(%rdx) + aeskeygenassist $1,%xmm2,%xmm1 + call .Lkey_expansion_192a_cold + aeskeygenassist $2,%xmm2,%xmm1 + call .Lkey_expansion_192b + aeskeygenassist $4,%xmm2,%xmm1 + call .Lkey_expansion_192a + aeskeygenassist $8,%xmm2,%xmm1 + call .Lkey_expansion_192b + aeskeygenassist $16,%xmm2,%xmm1 + call .Lkey_expansion_192a + aeskeygenassist $32,%xmm2,%xmm1 + call .Lkey_expansion_192b + aeskeygenassist $64,%xmm2,%xmm1 + call .Lkey_expansion_192a + aeskeygenassist $128,%xmm2,%xmm1 + call .Lkey_expansion_192b + movups %xmm0,(%rax) + movl %esi,48(%rax) + xorq %rax,%rax + jmp .Lenc_key_ret + +.align 16 +.L14rounds: + movups 16(%rdi),%xmm2 + movl $13,%esi + leaq 16(%rax),%rax + movups %xmm0,(%rdx) + movups %xmm2,16(%rdx) + aeskeygenassist $1,%xmm2,%xmm1 + call .Lkey_expansion_256a_cold + aeskeygenassist $1,%xmm0,%xmm1 + call .Lkey_expansion_256b + aeskeygenassist $2,%xmm2,%xmm1 + call .Lkey_expansion_256a + aeskeygenassist $2,%xmm0,%xmm1 + call .Lkey_expansion_256b + aeskeygenassist $4,%xmm2,%xmm1 + call .Lkey_expansion_256a + aeskeygenassist $4,%xmm0,%xmm1 + call .Lkey_expansion_256b + aeskeygenassist $8,%xmm2,%xmm1 + call .Lkey_expansion_256a + aeskeygenassist $8,%xmm0,%xmm1 + call .Lkey_expansion_256b + aeskeygenassist $16,%xmm2,%xmm1 + call .Lkey_expansion_256a + aeskeygenassist $16,%xmm0,%xmm1 + call .Lkey_expansion_256b + aeskeygenassist $32,%xmm2,%xmm1 + call .Lkey_expansion_256a + aeskeygenassist $32,%xmm0,%xmm1 + call .Lkey_expansion_256b + aeskeygenassist $64,%xmm2,%xmm1 + call .Lkey_expansion_256a + movups %xmm0,(%rax) + movl %esi,16(%rax) + xorq %rax,%rax + jmp .Lenc_key_ret + +.align 16 +.Lbad_keybits: + movq $-2,%rax +.Lenc_key_ret: + addq $8,%rsp + retq +.LSEH_end_set_encrypt_key: + +.align 16 +.Lkey_expansion_128: + movups %xmm0,(%rax) + leaq 16(%rax),%rax +.Lkey_expansion_128_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + retq + +.align 16 +.Lkey_expansion_192a: + movups %xmm0,(%rax) + leaq 16(%rax),%rax +.Lkey_expansion_192a_cold: + movaps %xmm2,%xmm5 +.Lkey_expansion_192b_warm: + shufps $16,%xmm0,%xmm4 + movdqa %xmm2,%xmm3 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + pslldq $4,%xmm3 + xorps %xmm4,%xmm0 + pshufd $85,%xmm1,%xmm1 + pxor %xmm3,%xmm2 + pxor %xmm1,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm3,%xmm2 + retq + +.align 16 +.Lkey_expansion_192b: + movaps %xmm0,%xmm3 + shufps $68,%xmm0,%xmm5 + movups %xmm5,(%rax) + shufps $78,%xmm2,%xmm3 + movups %xmm3,16(%rax) + leaq 32(%rax),%rax + jmp .Lkey_expansion_192b_warm + +.align 16 +.Lkey_expansion_256a: + movups %xmm2,(%rax) + leaq 16(%rax),%rax +.Lkey_expansion_256a_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + retq + +.align 16 +.Lkey_expansion_256b: + movups %xmm0,(%rax) + leaq 16(%rax),%rax + + shufps $16,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $140,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $170,%xmm1,%xmm1 + xorps %xmm1,%xmm2 + retq +.size aesni_set_encrypt_key,.-aesni_set_encrypt_key +.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key +.align 64 +.Lbswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.Lincrement32: +.long 6,6,6,0 +.Lincrement64: +.long 1,0,0,0 +.Lxts_magic: +.long 0x87,0,1,0 + +.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 +#if defined(HAVE_GNU_STACK) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/ext/libressl/crypto/aes/aesni-macosx-x86_64.S b/ext/libressl/crypto/aes/aesni-macosx-x86_64.S new file mode 100644 index 0000000..6b3216b --- /dev/null +++ b/ext/libressl/crypto/aes/aesni-macosx-x86_64.S @@ -0,0 +1,2536 @@ +#include "x86_arch.h" +.text +.globl _aesni_encrypt + +.p2align 4 +_aesni_encrypt: + movups (%rdi),%xmm2 + movl 240(%rdx),%eax + movups (%rdx),%xmm0 + movups 16(%rdx),%xmm1 + leaq 32(%rdx),%rdx + xorps %xmm0,%xmm2 +L$oop_enc1_1: + aesenc %xmm1,%xmm2 + decl %eax + movups (%rdx),%xmm1 + leaq 16(%rdx),%rdx + jnz L$oop_enc1_1 + aesenclast %xmm1,%xmm2 + movups %xmm2,(%rsi) + retq + + +.globl _aesni_decrypt + +.p2align 4 +_aesni_decrypt: + movups (%rdi),%xmm2 + movl 240(%rdx),%eax + movups (%rdx),%xmm0 + movups 16(%rdx),%xmm1 + leaq 32(%rdx),%rdx + xorps %xmm0,%xmm2 +L$oop_dec1_2: + aesdec %xmm1,%xmm2 + decl %eax + movups (%rdx),%xmm1 + leaq 16(%rdx),%rdx + jnz L$oop_dec1_2 + aesdeclast %xmm1,%xmm2 + movups %xmm2,(%rsi) + retq + + +.p2align 4 +_aesni_encrypt3: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + movups (%rcx),%xmm0 + +L$enc_loop3: + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + decl %eax + aesenc %xmm1,%xmm4 + movups 16(%rcx),%xmm1 + aesenc %xmm0,%xmm2 + aesenc %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesenc %xmm0,%xmm4 + movups (%rcx),%xmm0 + jnz L$enc_loop3 + + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + aesenc %xmm1,%xmm4 + aesenclast %xmm0,%xmm2 + aesenclast %xmm0,%xmm3 + aesenclast %xmm0,%xmm4 + retq + + +.p2align 4 +_aesni_decrypt3: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + movups (%rcx),%xmm0 + +L$dec_loop3: + aesdec %xmm1,%xmm2 + aesdec %xmm1,%xmm3 + decl %eax + aesdec %xmm1,%xmm4 + movups 16(%rcx),%xmm1 + aesdec %xmm0,%xmm2 + aesdec %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesdec %xmm0,%xmm4 + movups (%rcx),%xmm0 + jnz L$dec_loop3 + + aesdec %xmm1,%xmm2 + aesdec %xmm1,%xmm3 + aesdec %xmm1,%xmm4 + aesdeclast %xmm0,%xmm2 + aesdeclast %xmm0,%xmm3 + aesdeclast %xmm0,%xmm4 + retq + + +.p2align 4 +_aesni_encrypt4: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + xorps %xmm0,%xmm5 + movups (%rcx),%xmm0 + +L$enc_loop4: + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + decl %eax + aesenc %xmm1,%xmm4 + aesenc %xmm1,%xmm5 + movups 16(%rcx),%xmm1 + aesenc %xmm0,%xmm2 + aesenc %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesenc %xmm0,%xmm4 + aesenc %xmm0,%xmm5 + movups (%rcx),%xmm0 + jnz L$enc_loop4 + + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + aesenc %xmm1,%xmm4 + aesenc %xmm1,%xmm5 + aesenclast %xmm0,%xmm2 + aesenclast %xmm0,%xmm3 + aesenclast %xmm0,%xmm4 + aesenclast %xmm0,%xmm5 + retq + + +.p2align 4 +_aesni_decrypt4: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + xorps %xmm0,%xmm5 + movups (%rcx),%xmm0 + +L$dec_loop4: + aesdec %xmm1,%xmm2 + aesdec %xmm1,%xmm3 + decl %eax + aesdec %xmm1,%xmm4 + aesdec %xmm1,%xmm5 + movups 16(%rcx),%xmm1 + aesdec %xmm0,%xmm2 + aesdec %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesdec %xmm0,%xmm4 + aesdec %xmm0,%xmm5 + movups (%rcx),%xmm0 + jnz L$dec_loop4 + + aesdec %xmm1,%xmm2 + aesdec %xmm1,%xmm3 + aesdec %xmm1,%xmm4 + aesdec %xmm1,%xmm5 + aesdeclast %xmm0,%xmm2 + aesdeclast %xmm0,%xmm3 + aesdeclast %xmm0,%xmm4 + aesdeclast %xmm0,%xmm5 + retq + + +.p2align 4 +_aesni_encrypt6: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + aesenc %xmm1,%xmm2 + pxor %xmm0,%xmm4 + aesenc %xmm1,%xmm3 + pxor %xmm0,%xmm5 + aesenc %xmm1,%xmm4 + pxor %xmm0,%xmm6 + aesenc %xmm1,%xmm5 + pxor %xmm0,%xmm7 + decl %eax + aesenc %xmm1,%xmm6 + movups (%rcx),%xmm0 + aesenc %xmm1,%xmm7 + jmp L$enc_loop6_enter +.p2align 4 +L$enc_loop6: + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + decl %eax + aesenc %xmm1,%xmm4 + aesenc %xmm1,%xmm5 + aesenc %xmm1,%xmm6 + aesenc %xmm1,%xmm7 +L$enc_loop6_enter: + movups 16(%rcx),%xmm1 + aesenc %xmm0,%xmm2 + aesenc %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesenc %xmm0,%xmm4 + aesenc %xmm0,%xmm5 + aesenc %xmm0,%xmm6 + aesenc %xmm0,%xmm7 + movups (%rcx),%xmm0 + jnz L$enc_loop6 + + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + aesenc %xmm1,%xmm4 + aesenc %xmm1,%xmm5 + aesenc %xmm1,%xmm6 + aesenc %xmm1,%xmm7 + aesenclast %xmm0,%xmm2 + aesenclast %xmm0,%xmm3 + aesenclast %xmm0,%xmm4 + aesenclast %xmm0,%xmm5 + aesenclast %xmm0,%xmm6 + aesenclast %xmm0,%xmm7 + retq + + +.p2align 4 +_aesni_decrypt6: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + aesdec %xmm1,%xmm2 + pxor %xmm0,%xmm4 + aesdec %xmm1,%xmm3 + pxor %xmm0,%xmm5 + aesdec %xmm1,%xmm4 + pxor %xmm0,%xmm6 + aesdec %xmm1,%xmm5 + pxor %xmm0,%xmm7 + decl %eax + aesdec %xmm1,%xmm6 + movups (%rcx),%xmm0 + aesdec %xmm1,%xmm7 + jmp L$dec_loop6_enter +.p2align 4 +L$dec_loop6: + aesdec %xmm1,%xmm2 + aesdec %xmm1,%xmm3 + decl %eax + aesdec %xmm1,%xmm4 + aesdec %xmm1,%xmm5 + aesdec %xmm1,%xmm6 + aesdec %xmm1,%xmm7 +L$dec_loop6_enter: + movups 16(%rcx),%xmm1 + aesdec %xmm0,%xmm2 + aesdec %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesdec %xmm0,%xmm4 + aesdec %xmm0,%xmm5 + aesdec %xmm0,%xmm6 + aesdec %xmm0,%xmm7 + movups (%rcx),%xmm0 + jnz L$dec_loop6 + + aesdec %xmm1,%xmm2 + aesdec %xmm1,%xmm3 + aesdec %xmm1,%xmm4 + aesdec %xmm1,%xmm5 + aesdec %xmm1,%xmm6 + aesdec %xmm1,%xmm7 + aesdeclast %xmm0,%xmm2 + aesdeclast %xmm0,%xmm3 + aesdeclast %xmm0,%xmm4 + aesdeclast %xmm0,%xmm5 + aesdeclast %xmm0,%xmm6 + aesdeclast %xmm0,%xmm7 + retq + + +.p2align 4 +_aesni_encrypt8: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + aesenc %xmm1,%xmm2 + pxor %xmm0,%xmm4 + aesenc %xmm1,%xmm3 + pxor %xmm0,%xmm5 + aesenc %xmm1,%xmm4 + pxor %xmm0,%xmm6 + aesenc %xmm1,%xmm5 + pxor %xmm0,%xmm7 + decl %eax + aesenc %xmm1,%xmm6 + pxor %xmm0,%xmm8 + aesenc %xmm1,%xmm7 + pxor %xmm0,%xmm9 + movups (%rcx),%xmm0 + aesenc %xmm1,%xmm8 + aesenc %xmm1,%xmm9 + movups 16(%rcx),%xmm1 + jmp L$enc_loop8_enter +.p2align 4 +L$enc_loop8: + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + decl %eax + aesenc %xmm1,%xmm4 + aesenc %xmm1,%xmm5 + aesenc %xmm1,%xmm6 + aesenc %xmm1,%xmm7 + aesenc %xmm1,%xmm8 + aesenc %xmm1,%xmm9 + movups 16(%rcx),%xmm1 +L$enc_loop8_enter: + aesenc %xmm0,%xmm2 + aesenc %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesenc %xmm0,%xmm4 + aesenc %xmm0,%xmm5 + aesenc %xmm0,%xmm6 + aesenc %xmm0,%xmm7 + aesenc %xmm0,%xmm8 + aesenc %xmm0,%xmm9 + movups (%rcx),%xmm0 + jnz L$enc_loop8 + + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + aesenc %xmm1,%xmm4 + aesenc %xmm1,%xmm5 + aesenc %xmm1,%xmm6 + aesenc %xmm1,%xmm7 + aesenc %xmm1,%xmm8 + aesenc %xmm1,%xmm9 + aesenclast %xmm0,%xmm2 + aesenclast %xmm0,%xmm3 + aesenclast %xmm0,%xmm4 + aesenclast %xmm0,%xmm5 + aesenclast %xmm0,%xmm6 + aesenclast %xmm0,%xmm7 + aesenclast %xmm0,%xmm8 + aesenclast %xmm0,%xmm9 + retq + + +.p2align 4 +_aesni_decrypt8: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + aesdec %xmm1,%xmm2 + pxor %xmm0,%xmm4 + aesdec %xmm1,%xmm3 + pxor %xmm0,%xmm5 + aesdec %xmm1,%xmm4 + pxor %xmm0,%xmm6 + aesdec %xmm1,%xmm5 + pxor %xmm0,%xmm7 + decl %eax + aesdec %xmm1,%xmm6 + pxor %xmm0,%xmm8 + aesdec %xmm1,%xmm7 + pxor %xmm0,%xmm9 + movups (%rcx),%xmm0 + aesdec %xmm1,%xmm8 + aesdec %xmm1,%xmm9 + movups 16(%rcx),%xmm1 + jmp L$dec_loop8_enter +.p2align 4 +L$dec_loop8: + aesdec %xmm1,%xmm2 + aesdec %xmm1,%xmm3 + decl %eax + aesdec %xmm1,%xmm4 + aesdec %xmm1,%xmm5 + aesdec %xmm1,%xmm6 + aesdec %xmm1,%xmm7 + aesdec %xmm1,%xmm8 + aesdec %xmm1,%xmm9 + movups 16(%rcx),%xmm1 +L$dec_loop8_enter: + aesdec %xmm0,%xmm2 + aesdec %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesdec %xmm0,%xmm4 + aesdec %xmm0,%xmm5 + aesdec %xmm0,%xmm6 + aesdec %xmm0,%xmm7 + aesdec %xmm0,%xmm8 + aesdec %xmm0,%xmm9 + movups (%rcx),%xmm0 + jnz L$dec_loop8 + + aesdec %xmm1,%xmm2 + aesdec %xmm1,%xmm3 + aesdec %xmm1,%xmm4 + aesdec %xmm1,%xmm5 + aesdec %xmm1,%xmm6 + aesdec %xmm1,%xmm7 + aesdec %xmm1,%xmm8 + aesdec %xmm1,%xmm9 + aesdeclast %xmm0,%xmm2 + aesdeclast %xmm0,%xmm3 + aesdeclast %xmm0,%xmm4 + aesdeclast %xmm0,%xmm5 + aesdeclast %xmm0,%xmm6 + aesdeclast %xmm0,%xmm7 + aesdeclast %xmm0,%xmm8 + aesdeclast %xmm0,%xmm9 + retq + +.globl _aesni_ecb_encrypt + +.p2align 4 +_aesni_ecb_encrypt: + andq $-16,%rdx + jz L$ecb_ret + + movl 240(%rcx),%eax + movups (%rcx),%xmm0 + movq %rcx,%r11 + movl %eax,%r10d + testl %r8d,%r8d + jz L$ecb_decrypt + + cmpq $128,%rdx + jb L$ecb_enc_tail + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + movdqu 96(%rdi),%xmm8 + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi + subq $128,%rdx + jmp L$ecb_enc_loop8_enter +.p2align 4 +L$ecb_enc_loop8: + movups %xmm2,(%rsi) + movq %r11,%rcx + movdqu (%rdi),%xmm2 + movl %r10d,%eax + movups %xmm3,16(%rsi) + movdqu 16(%rdi),%xmm3 + movups %xmm4,32(%rsi) + movdqu 32(%rdi),%xmm4 + movups %xmm5,48(%rsi) + movdqu 48(%rdi),%xmm5 + movups %xmm6,64(%rsi) + movdqu 64(%rdi),%xmm6 + movups %xmm7,80(%rsi) + movdqu 80(%rdi),%xmm7 + movups %xmm8,96(%rsi) + movdqu 96(%rdi),%xmm8 + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi +L$ecb_enc_loop8_enter: + + call _aesni_encrypt8 + + subq $128,%rdx + jnc L$ecb_enc_loop8 + + movups %xmm2,(%rsi) + movq %r11,%rcx + movups %xmm3,16(%rsi) + movl %r10d,%eax + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + addq $128,%rdx + jz L$ecb_ret + +L$ecb_enc_tail: + movups (%rdi),%xmm2 + cmpq $32,%rdx + jb L$ecb_enc_one + movups 16(%rdi),%xmm3 + je L$ecb_enc_two + movups 32(%rdi),%xmm4 + cmpq $64,%rdx + jb L$ecb_enc_three + movups 48(%rdi),%xmm5 + je L$ecb_enc_four + movups 64(%rdi),%xmm6 + cmpq $96,%rdx + jb L$ecb_enc_five + movups 80(%rdi),%xmm7 + je L$ecb_enc_six + movdqu 96(%rdi),%xmm8 + call _aesni_encrypt8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_enc1_3: + aesenc %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_3 + aesenclast %xmm1,%xmm2 + movups %xmm2,(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_two: + xorps %xmm4,%xmm4 + call _aesni_encrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_three: + call _aesni_encrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_four: + call _aesni_encrypt4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_five: + xorps %xmm7,%xmm7 + call _aesni_encrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_six: + call _aesni_encrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + jmp L$ecb_ret + +.p2align 4 +L$ecb_decrypt: + cmpq $128,%rdx + jb L$ecb_dec_tail + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + movdqu 96(%rdi),%xmm8 + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi + subq $128,%rdx + jmp L$ecb_dec_loop8_enter +.p2align 4 +L$ecb_dec_loop8: + movups %xmm2,(%rsi) + movq %r11,%rcx + movdqu (%rdi),%xmm2 + movl %r10d,%eax + movups %xmm3,16(%rsi) + movdqu 16(%rdi),%xmm3 + movups %xmm4,32(%rsi) + movdqu 32(%rdi),%xmm4 + movups %xmm5,48(%rsi) + movdqu 48(%rdi),%xmm5 + movups %xmm6,64(%rsi) + movdqu 64(%rdi),%xmm6 + movups %xmm7,80(%rsi) + movdqu 80(%rdi),%xmm7 + movups %xmm8,96(%rsi) + movdqu 96(%rdi),%xmm8 + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi +L$ecb_dec_loop8_enter: + + call _aesni_decrypt8 + + movups (%r11),%xmm0 + subq $128,%rdx + jnc L$ecb_dec_loop8 + + movups %xmm2,(%rsi) + movq %r11,%rcx + movups %xmm3,16(%rsi) + movl %r10d,%eax + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + addq $128,%rdx + jz L$ecb_ret + +L$ecb_dec_tail: + movups (%rdi),%xmm2 + cmpq $32,%rdx + jb L$ecb_dec_one + movups 16(%rdi),%xmm3 + je L$ecb_dec_two + movups 32(%rdi),%xmm4 + cmpq $64,%rdx + jb L$ecb_dec_three + movups 48(%rdi),%xmm5 + je L$ecb_dec_four + movups 64(%rdi),%xmm6 + cmpq $96,%rdx + jb L$ecb_dec_five + movups 80(%rdi),%xmm7 + je L$ecb_dec_six + movups 96(%rdi),%xmm8 + movups (%rcx),%xmm0 + call _aesni_decrypt8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_dec1_4: + aesdec %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_dec1_4 + aesdeclast %xmm1,%xmm2 + movups %xmm2,(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_two: + xorps %xmm4,%xmm4 + call _aesni_decrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_three: + call _aesni_decrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_four: + call _aesni_decrypt4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_five: + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_six: + call _aesni_decrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + +L$ecb_ret: + retq + +.globl _aesni_ccm64_encrypt_blocks + +.p2align 4 +_aesni_ccm64_encrypt_blocks: + movl 240(%rcx),%eax + movdqu (%r8),%xmm9 + movdqa L$increment64(%rip),%xmm6 + movdqa L$bswap_mask(%rip),%xmm7 + + shrl $1,%eax + leaq 0(%rcx),%r11 + movdqu (%r9),%xmm3 + movdqa %xmm9,%xmm2 + movl %eax,%r10d +.byte 102,68,15,56,0,207 + jmp L$ccm64_enc_outer +.p2align 4 +L$ccm64_enc_outer: + movups (%r11),%xmm0 + movl %r10d,%eax + movups (%rdi),%xmm8 + + xorps %xmm0,%xmm2 + movups 16(%r11),%xmm1 + xorps %xmm8,%xmm0 + leaq 32(%r11),%rcx + xorps %xmm0,%xmm3 + movups (%rcx),%xmm0 + +L$ccm64_enc2_loop: + aesenc %xmm1,%xmm2 + decl %eax + aesenc %xmm1,%xmm3 + movups 16(%rcx),%xmm1 + aesenc %xmm0,%xmm2 + leaq 32(%rcx),%rcx + aesenc %xmm0,%xmm3 + movups 0(%rcx),%xmm0 + jnz L$ccm64_enc2_loop + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + paddq %xmm6,%xmm9 + aesenclast %xmm0,%xmm2 + aesenclast %xmm0,%xmm3 + + decq %rdx + leaq 16(%rdi),%rdi + xorps %xmm2,%xmm8 + movdqa %xmm9,%xmm2 + movups %xmm8,(%rsi) + leaq 16(%rsi),%rsi +.byte 102,15,56,0,215 + jnz L$ccm64_enc_outer + + movups %xmm3,(%r9) + retq + +.globl _aesni_ccm64_decrypt_blocks + +.p2align 4 +_aesni_ccm64_decrypt_blocks: + movl 240(%rcx),%eax + movups (%r8),%xmm9 + movdqu (%r9),%xmm3 + movdqa L$increment64(%rip),%xmm6 + movdqa L$bswap_mask(%rip),%xmm7 + + movaps %xmm9,%xmm2 + movl %eax,%r10d + movq %rcx,%r11 +.byte 102,68,15,56,0,207 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_enc1_5: + aesenc %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_5 + aesenclast %xmm1,%xmm2 + movups (%rdi),%xmm8 + paddq %xmm6,%xmm9 + leaq 16(%rdi),%rdi + jmp L$ccm64_dec_outer +.p2align 4 +L$ccm64_dec_outer: + xorps %xmm2,%xmm8 + movdqa %xmm9,%xmm2 + movl %r10d,%eax + movups %xmm8,(%rsi) + leaq 16(%rsi),%rsi +.byte 102,15,56,0,215 + + subq $1,%rdx + jz L$ccm64_dec_break + + movups (%r11),%xmm0 + shrl $1,%eax + movups 16(%r11),%xmm1 + xorps %xmm0,%xmm8 + leaq 32(%r11),%rcx + xorps %xmm0,%xmm2 + xorps %xmm8,%xmm3 + movups (%rcx),%xmm0 + +L$ccm64_dec2_loop: + aesenc %xmm1,%xmm2 + decl %eax + aesenc %xmm1,%xmm3 + movups 16(%rcx),%xmm1 + aesenc %xmm0,%xmm2 + leaq 32(%rcx),%rcx + aesenc %xmm0,%xmm3 + movups 0(%rcx),%xmm0 + jnz L$ccm64_dec2_loop + movups (%rdi),%xmm8 + paddq %xmm6,%xmm9 + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + leaq 16(%rdi),%rdi + aesenclast %xmm0,%xmm2 + aesenclast %xmm0,%xmm3 + jmp L$ccm64_dec_outer + +.p2align 4 +L$ccm64_dec_break: + + movups (%r11),%xmm0 + movups 16(%r11),%xmm1 + xorps %xmm0,%xmm8 + leaq 32(%r11),%r11 + xorps %xmm8,%xmm3 +L$oop_enc1_6: + aesenc %xmm1,%xmm3 + decl %eax + movups (%r11),%xmm1 + leaq 16(%r11),%r11 + jnz L$oop_enc1_6 + aesenclast %xmm1,%xmm3 + movups %xmm3,(%r9) + retq + +.globl _aesni_ctr32_encrypt_blocks + +.p2align 4 +_aesni_ctr32_encrypt_blocks: + cmpq $1,%rdx + je L$ctr32_one_shortcut + + movdqu (%r8),%xmm14 + movdqa L$bswap_mask(%rip),%xmm15 + xorl %eax,%eax +.byte 102,69,15,58,22,242,3 +.byte 102,68,15,58,34,240,3 + + movl 240(%rcx),%eax + bswapl %r10d + pxor %xmm12,%xmm12 + pxor %xmm13,%xmm13 +.byte 102,69,15,58,34,226,0 + leaq 3(%r10),%r11 +.byte 102,69,15,58,34,235,0 + incl %r10d +.byte 102,69,15,58,34,226,1 + incq %r11 +.byte 102,69,15,58,34,235,1 + incl %r10d +.byte 102,69,15,58,34,226,2 + incq %r11 +.byte 102,69,15,58,34,235,2 + movdqa %xmm12,-40(%rsp) +.byte 102,69,15,56,0,231 + movdqa %xmm13,-24(%rsp) +.byte 102,69,15,56,0,239 + + pshufd $192,%xmm12,%xmm2 + pshufd $128,%xmm12,%xmm3 + pshufd $64,%xmm12,%xmm4 + cmpq $6,%rdx + jb L$ctr32_tail + shrl $1,%eax + movq %rcx,%r11 + movl %eax,%r10d + subq $6,%rdx + jmp L$ctr32_loop6 + +.p2align 4 +L$ctr32_loop6: + pshufd $192,%xmm13,%xmm5 + por %xmm14,%xmm2 + movups (%r11),%xmm0 + pshufd $128,%xmm13,%xmm6 + por %xmm14,%xmm3 + movups 16(%r11),%xmm1 + pshufd $64,%xmm13,%xmm7 + por %xmm14,%xmm4 + por %xmm14,%xmm5 + xorps %xmm0,%xmm2 + por %xmm14,%xmm6 + por %xmm14,%xmm7 + + + + + pxor %xmm0,%xmm3 + aesenc %xmm1,%xmm2 + leaq 32(%r11),%rcx + pxor %xmm0,%xmm4 + aesenc %xmm1,%xmm3 + movdqa L$increment32(%rip),%xmm13 + pxor %xmm0,%xmm5 + aesenc %xmm1,%xmm4 + movdqa -40(%rsp),%xmm12 + pxor %xmm0,%xmm6 + aesenc %xmm1,%xmm5 + pxor %xmm0,%xmm7 + movups (%rcx),%xmm0 + decl %eax + aesenc %xmm1,%xmm6 + aesenc %xmm1,%xmm7 + jmp L$ctr32_enc_loop6_enter +.p2align 4 +L$ctr32_enc_loop6: + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + decl %eax + aesenc %xmm1,%xmm4 + aesenc %xmm1,%xmm5 + aesenc %xmm1,%xmm6 + aesenc %xmm1,%xmm7 +L$ctr32_enc_loop6_enter: + movups 16(%rcx),%xmm1 + aesenc %xmm0,%xmm2 + aesenc %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesenc %xmm0,%xmm4 + aesenc %xmm0,%xmm5 + aesenc %xmm0,%xmm6 + aesenc %xmm0,%xmm7 + movups (%rcx),%xmm0 + jnz L$ctr32_enc_loop6 + + aesenc %xmm1,%xmm2 + paddd %xmm13,%xmm12 + aesenc %xmm1,%xmm3 + paddd -24(%rsp),%xmm13 + aesenc %xmm1,%xmm4 + movdqa %xmm12,-40(%rsp) + aesenc %xmm1,%xmm5 + movdqa %xmm13,-24(%rsp) + aesenc %xmm1,%xmm6 +.byte 102,69,15,56,0,231 + aesenc %xmm1,%xmm7 +.byte 102,69,15,56,0,239 + + aesenclast %xmm0,%xmm2 + movups (%rdi),%xmm8 + aesenclast %xmm0,%xmm3 + movups 16(%rdi),%xmm9 + aesenclast %xmm0,%xmm4 + movups 32(%rdi),%xmm10 + aesenclast %xmm0,%xmm5 + movups 48(%rdi),%xmm11 + aesenclast %xmm0,%xmm6 + movups 64(%rdi),%xmm1 + aesenclast %xmm0,%xmm7 + movups 80(%rdi),%xmm0 + leaq 96(%rdi),%rdi + + xorps %xmm2,%xmm8 + pshufd $192,%xmm12,%xmm2 + xorps %xmm3,%xmm9 + pshufd $128,%xmm12,%xmm3 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + pshufd $64,%xmm12,%xmm4 + movups %xmm9,16(%rsi) + xorps %xmm5,%xmm11 + movups %xmm10,32(%rsi) + xorps %xmm6,%xmm1 + movups %xmm11,48(%rsi) + xorps %xmm7,%xmm0 + movups %xmm1,64(%rsi) + movups %xmm0,80(%rsi) + leaq 96(%rsi),%rsi + movl %r10d,%eax + subq $6,%rdx + jnc L$ctr32_loop6 + + addq $6,%rdx + jz L$ctr32_done + movq %r11,%rcx + leal 1(%rax,%rax,1),%eax + +L$ctr32_tail: + por %xmm14,%xmm2 + movups (%rdi),%xmm8 + cmpq $2,%rdx + jb L$ctr32_one + + por %xmm14,%xmm3 + movups 16(%rdi),%xmm9 + je L$ctr32_two + + pshufd $192,%xmm13,%xmm5 + por %xmm14,%xmm4 + movups 32(%rdi),%xmm10 + cmpq $4,%rdx + jb L$ctr32_three + + pshufd $128,%xmm13,%xmm6 + por %xmm14,%xmm5 + movups 48(%rdi),%xmm11 + je L$ctr32_four + + por %xmm14,%xmm6 + xorps %xmm7,%xmm7 + + call _aesni_encrypt6 + + movups 64(%rdi),%xmm1 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + movups %xmm9,16(%rsi) + xorps %xmm5,%xmm11 + movups %xmm10,32(%rsi) + xorps %xmm6,%xmm1 + movups %xmm11,48(%rsi) + movups %xmm1,64(%rsi) + jmp L$ctr32_done + +.p2align 4 +L$ctr32_one_shortcut: + movups (%r8),%xmm2 + movups (%rdi),%xmm8 + movl 240(%rcx),%eax +L$ctr32_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_enc1_7: + aesenc %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_7 + aesenclast %xmm1,%xmm2 + xorps %xmm2,%xmm8 + movups %xmm8,(%rsi) + jmp L$ctr32_done + +.p2align 4 +L$ctr32_two: + xorps %xmm4,%xmm4 + call _aesni_encrypt3 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + movups %xmm9,16(%rsi) + jmp L$ctr32_done + +.p2align 4 +L$ctr32_three: + call _aesni_encrypt3 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + movups %xmm9,16(%rsi) + movups %xmm10,32(%rsi) + jmp L$ctr32_done + +.p2align 4 +L$ctr32_four: + call _aesni_encrypt4 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + movups %xmm9,16(%rsi) + xorps %xmm5,%xmm11 + movups %xmm10,32(%rsi) + movups %xmm11,48(%rsi) + +L$ctr32_done: + retq + +.globl _aesni_xts_encrypt + +.p2align 4 +_aesni_xts_encrypt: + leaq -104(%rsp),%rsp + movups (%r9),%xmm15 + movl 240(%r8),%eax + movl 240(%rcx),%r10d + movups (%r8),%xmm0 + movups 16(%r8),%xmm1 + leaq 32(%r8),%r8 + xorps %xmm0,%xmm15 +L$oop_enc1_8: + aesenc %xmm1,%xmm15 + decl %eax + movups (%r8),%xmm1 + leaq 16(%r8),%r8 + jnz L$oop_enc1_8 + aesenclast %xmm1,%xmm15 + movq %rcx,%r11 + movl %r10d,%eax + movq %rdx,%r9 + andq $-16,%rdx + + movdqa L$xts_magic(%rip),%xmm8 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + subq $96,%rdx + jc L$xts_enc_short + + shrl $1,%eax + subl $1,%eax + movl %eax,%r10d + jmp L$xts_enc_grandloop + +.p2align 4 +L$xts_enc_grandloop: + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movdqu 0(%rdi),%xmm2 + pand %xmm8,%xmm9 + movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + pxor %xmm12,%xmm4 + movdqu 80(%rdi),%xmm7 + leaq 96(%rdi),%rdi + pxor %xmm13,%xmm5 + movups (%r11),%xmm0 + pxor %xmm14,%xmm6 + pxor %xmm15,%xmm7 + + + + movups 16(%r11),%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movdqa %xmm10,0(%rsp) + aesenc %xmm1,%xmm2 + leaq 32(%r11),%rcx + pxor %xmm0,%xmm4 + movdqa %xmm11,16(%rsp) + aesenc %xmm1,%xmm3 + pxor %xmm0,%xmm5 + movdqa %xmm12,32(%rsp) + aesenc %xmm1,%xmm4 + pxor %xmm0,%xmm6 + movdqa %xmm13,48(%rsp) + aesenc %xmm1,%xmm5 + pxor %xmm0,%xmm7 + movups (%rcx),%xmm0 + decl %eax + movdqa %xmm14,64(%rsp) + aesenc %xmm1,%xmm6 + movdqa %xmm15,80(%rsp) + aesenc %xmm1,%xmm7 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + jmp L$xts_enc_loop6_enter + +.p2align 4 +L$xts_enc_loop6: + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + decl %eax + aesenc %xmm1,%xmm4 + aesenc %xmm1,%xmm5 + aesenc %xmm1,%xmm6 + aesenc %xmm1,%xmm7 +L$xts_enc_loop6_enter: + movups 16(%rcx),%xmm1 + aesenc %xmm0,%xmm2 + aesenc %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesenc %xmm0,%xmm4 + aesenc %xmm0,%xmm5 + aesenc %xmm0,%xmm6 + aesenc %xmm0,%xmm7 + movups (%rcx),%xmm0 + jnz L$xts_enc_loop6 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + paddq %xmm15,%xmm15 + aesenc %xmm1,%xmm2 + pand %xmm8,%xmm9 + aesenc %xmm1,%xmm3 + pcmpgtd %xmm15,%xmm14 + aesenc %xmm1,%xmm4 + pxor %xmm9,%xmm15 + aesenc %xmm1,%xmm5 + aesenc %xmm1,%xmm6 + aesenc %xmm1,%xmm7 + movups 16(%rcx),%xmm1 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + aesenc %xmm0,%xmm2 + pand %xmm8,%xmm9 + aesenc %xmm0,%xmm3 + pcmpgtd %xmm15,%xmm14 + aesenc %xmm0,%xmm4 + pxor %xmm9,%xmm15 + aesenc %xmm0,%xmm5 + aesenc %xmm0,%xmm6 + aesenc %xmm0,%xmm7 + movups 32(%rcx),%xmm0 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 + aesenc %xmm1,%xmm2 + pand %xmm8,%xmm9 + aesenc %xmm1,%xmm3 + pcmpgtd %xmm15,%xmm14 + aesenc %xmm1,%xmm4 + pxor %xmm9,%xmm15 + aesenc %xmm1,%xmm5 + aesenc %xmm1,%xmm6 + aesenc %xmm1,%xmm7 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 + paddq %xmm15,%xmm15 + aesenclast %xmm0,%xmm2 + pand %xmm8,%xmm9 + aesenclast %xmm0,%xmm3 + pcmpgtd %xmm15,%xmm14 + aesenclast %xmm0,%xmm4 + pxor %xmm9,%xmm15 + aesenclast %xmm0,%xmm5 + aesenclast %xmm0,%xmm6 + aesenclast %xmm0,%xmm7 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 + paddq %xmm15,%xmm15 + xorps 0(%rsp),%xmm2 + pand %xmm8,%xmm9 + xorps 16(%rsp),%xmm3 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + + xorps 32(%rsp),%xmm4 + movups %xmm2,0(%rsi) + xorps 48(%rsp),%xmm5 + movups %xmm3,16(%rsi) + xorps 64(%rsp),%xmm6 + movups %xmm4,32(%rsi) + xorps 80(%rsp),%xmm7 + movups %xmm5,48(%rsi) + movl %r10d,%eax + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + leaq 96(%rsi),%rsi + subq $96,%rdx + jnc L$xts_enc_grandloop + + leal 3(%rax,%rax,1),%eax + movq %r11,%rcx + movl %eax,%r10d + +L$xts_enc_short: + addq $96,%rdx + jz L$xts_enc_done + + cmpq $32,%rdx + jb L$xts_enc_one + je L$xts_enc_two + + cmpq $64,%rdx + jb L$xts_enc_three + je L$xts_enc_four + + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movdqu (%rdi),%xmm2 + pand %xmm8,%xmm9 + movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + leaq 80(%rdi),%rdi + pxor %xmm12,%xmm4 + pxor %xmm13,%xmm5 + pxor %xmm14,%xmm6 + + call _aesni_encrypt6 + + xorps %xmm10,%xmm2 + movdqa %xmm15,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + xorps %xmm14,%xmm6 + movdqu %xmm4,32(%rsi) + movdqu %xmm5,48(%rsi) + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + jmp L$xts_enc_done + +.p2align 4 +L$xts_enc_one: + movups (%rdi),%xmm2 + leaq 16(%rdi),%rdi + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_enc1_9: + aesenc %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_9 + aesenclast %xmm1,%xmm2 + xorps %xmm10,%xmm2 + movdqa %xmm11,%xmm10 + movups %xmm2,(%rsi) + leaq 16(%rsi),%rsi + jmp L$xts_enc_done + +.p2align 4 +L$xts_enc_two: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + leaq 32(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + + call _aesni_encrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm12,%xmm10 + xorps %xmm11,%xmm3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + leaq 32(%rsi),%rsi + jmp L$xts_enc_done + +.p2align 4 +L$xts_enc_three: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + leaq 48(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + + call _aesni_encrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm13,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + leaq 48(%rsi),%rsi + jmp L$xts_enc_done + +.p2align 4 +L$xts_enc_four: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + xorps %xmm10,%xmm2 + movups 48(%rdi),%xmm5 + leaq 64(%rdi),%rdi + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + xorps %xmm13,%xmm5 + + call _aesni_encrypt4 + + xorps %xmm10,%xmm2 + movdqa %xmm15,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + jmp L$xts_enc_done + +.p2align 4 +L$xts_enc_done: + andq $15,%r9 + jz L$xts_enc_ret + movq %r9,%rdx + +L$xts_enc_steal: + movzbl (%rdi),%eax + movzbl -16(%rsi),%ecx + leaq 1(%rdi),%rdi + movb %al,-16(%rsi) + movb %cl,0(%rsi) + leaq 1(%rsi),%rsi + subq $1,%rdx + jnz L$xts_enc_steal + + subq %r9,%rsi + movq %r11,%rcx + movl %r10d,%eax + + movups -16(%rsi),%xmm2 + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_enc1_10: + aesenc %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_10 + aesenclast %xmm1,%xmm2 + xorps %xmm10,%xmm2 + movups %xmm2,-16(%rsi) + +L$xts_enc_ret: + leaq 104(%rsp),%rsp +L$xts_enc_epilogue: + retq + +.globl _aesni_xts_decrypt + +.p2align 4 +_aesni_xts_decrypt: + leaq -104(%rsp),%rsp + movups (%r9),%xmm15 + movl 240(%r8),%eax + movl 240(%rcx),%r10d + movups (%r8),%xmm0 + movups 16(%r8),%xmm1 + leaq 32(%r8),%r8 + xorps %xmm0,%xmm15 +L$oop_enc1_11: + aesenc %xmm1,%xmm15 + decl %eax + movups (%r8),%xmm1 + leaq 16(%r8),%r8 + jnz L$oop_enc1_11 + aesenclast %xmm1,%xmm15 + xorl %eax,%eax + testq $15,%rdx + setnz %al + shlq $4,%rax + subq %rax,%rdx + + movq %rcx,%r11 + movl %r10d,%eax + movq %rdx,%r9 + andq $-16,%rdx + + movdqa L$xts_magic(%rip),%xmm8 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + subq $96,%rdx + jc L$xts_dec_short + + shrl $1,%eax + subl $1,%eax + movl %eax,%r10d + jmp L$xts_dec_grandloop + +.p2align 4 +L$xts_dec_grandloop: + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movdqu 0(%rdi),%xmm2 + pand %xmm8,%xmm9 + movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + pxor %xmm12,%xmm4 + movdqu 80(%rdi),%xmm7 + leaq 96(%rdi),%rdi + pxor %xmm13,%xmm5 + movups (%r11),%xmm0 + pxor %xmm14,%xmm6 + pxor %xmm15,%xmm7 + + + + movups 16(%r11),%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movdqa %xmm10,0(%rsp) + aesdec %xmm1,%xmm2 + leaq 32(%r11),%rcx + pxor %xmm0,%xmm4 + movdqa %xmm11,16(%rsp) + aesdec %xmm1,%xmm3 + pxor %xmm0,%xmm5 + movdqa %xmm12,32(%rsp) + aesdec %xmm1,%xmm4 + pxor %xmm0,%xmm6 + movdqa %xmm13,48(%rsp) + aesdec %xmm1,%xmm5 + pxor %xmm0,%xmm7 + movups (%rcx),%xmm0 + decl %eax + movdqa %xmm14,64(%rsp) + aesdec %xmm1,%xmm6 + movdqa %xmm15,80(%rsp) + aesdec %xmm1,%xmm7 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + jmp L$xts_dec_loop6_enter + +.p2align 4 +L$xts_dec_loop6: + aesdec %xmm1,%xmm2 + aesdec %xmm1,%xmm3 + decl %eax + aesdec %xmm1,%xmm4 + aesdec %xmm1,%xmm5 + aesdec %xmm1,%xmm6 + aesdec %xmm1,%xmm7 +L$xts_dec_loop6_enter: + movups 16(%rcx),%xmm1 + aesdec %xmm0,%xmm2 + aesdec %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesdec %xmm0,%xmm4 + aesdec %xmm0,%xmm5 + aesdec %xmm0,%xmm6 + aesdec %xmm0,%xmm7 + movups (%rcx),%xmm0 + jnz L$xts_dec_loop6 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + paddq %xmm15,%xmm15 + aesdec %xmm1,%xmm2 + pand %xmm8,%xmm9 + aesdec %xmm1,%xmm3 + pcmpgtd %xmm15,%xmm14 + aesdec %xmm1,%xmm4 + pxor %xmm9,%xmm15 + aesdec %xmm1,%xmm5 + aesdec %xmm1,%xmm6 + aesdec %xmm1,%xmm7 + movups 16(%rcx),%xmm1 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + aesdec %xmm0,%xmm2 + pand %xmm8,%xmm9 + aesdec %xmm0,%xmm3 + pcmpgtd %xmm15,%xmm14 + aesdec %xmm0,%xmm4 + pxor %xmm9,%xmm15 + aesdec %xmm0,%xmm5 + aesdec %xmm0,%xmm6 + aesdec %xmm0,%xmm7 + movups 32(%rcx),%xmm0 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 + aesdec %xmm1,%xmm2 + pand %xmm8,%xmm9 + aesdec %xmm1,%xmm3 + pcmpgtd %xmm15,%xmm14 + aesdec %xmm1,%xmm4 + pxor %xmm9,%xmm15 + aesdec %xmm1,%xmm5 + aesdec %xmm1,%xmm6 + aesdec %xmm1,%xmm7 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 + paddq %xmm15,%xmm15 + aesdeclast %xmm0,%xmm2 + pand %xmm8,%xmm9 + aesdeclast %xmm0,%xmm3 + pcmpgtd %xmm15,%xmm14 + aesdeclast %xmm0,%xmm4 + pxor %xmm9,%xmm15 + aesdeclast %xmm0,%xmm5 + aesdeclast %xmm0,%xmm6 + aesdeclast %xmm0,%xmm7 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 + paddq %xmm15,%xmm15 + xorps 0(%rsp),%xmm2 + pand %xmm8,%xmm9 + xorps 16(%rsp),%xmm3 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + + xorps 32(%rsp),%xmm4 + movups %xmm2,0(%rsi) + xorps 48(%rsp),%xmm5 + movups %xmm3,16(%rsi) + xorps 64(%rsp),%xmm6 + movups %xmm4,32(%rsi) + xorps 80(%rsp),%xmm7 + movups %xmm5,48(%rsi) + movl %r10d,%eax + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + leaq 96(%rsi),%rsi + subq $96,%rdx + jnc L$xts_dec_grandloop + + leal 3(%rax,%rax,1),%eax + movq %r11,%rcx + movl %eax,%r10d + +L$xts_dec_short: + addq $96,%rdx + jz L$xts_dec_done + + cmpq $32,%rdx + jb L$xts_dec_one + je L$xts_dec_two + + cmpq $64,%rdx + jb L$xts_dec_three + je L$xts_dec_four + + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movdqu (%rdi),%xmm2 + pand %xmm8,%xmm9 + movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + leaq 80(%rdi),%rdi + pxor %xmm12,%xmm4 + pxor %xmm13,%xmm5 + pxor %xmm14,%xmm6 + + call _aesni_decrypt6 + + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + xorps %xmm14,%xmm6 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm14 + movdqu %xmm5,48(%rsi) + pcmpgtd %xmm15,%xmm14 + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + pshufd $19,%xmm14,%xmm11 + andq $15,%r9 + jz L$xts_dec_ret + + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm11 + pxor %xmm15,%xmm11 + jmp L$xts_dec_done2 + +.p2align 4 +L$xts_dec_one: + movups (%rdi),%xmm2 + leaq 16(%rdi),%rdi + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_dec1_12: + aesdec %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_dec1_12 + aesdeclast %xmm1,%xmm2 + xorps %xmm10,%xmm2 + movdqa %xmm11,%xmm10 + movups %xmm2,(%rsi) + movdqa %xmm12,%xmm11 + leaq 16(%rsi),%rsi + jmp L$xts_dec_done + +.p2align 4 +L$xts_dec_two: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + leaq 32(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + + call _aesni_decrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm12,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm13,%xmm11 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + leaq 32(%rsi),%rsi + jmp L$xts_dec_done + +.p2align 4 +L$xts_dec_three: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + leaq 48(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + + call _aesni_decrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm13,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm15,%xmm11 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + leaq 48(%rsi),%rsi + jmp L$xts_dec_done + +.p2align 4 +L$xts_dec_four: + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movups (%rdi),%xmm2 + pand %xmm8,%xmm9 + movups 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movups 32(%rdi),%xmm4 + xorps %xmm10,%xmm2 + movups 48(%rdi),%xmm5 + leaq 64(%rdi),%rdi + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + xorps %xmm13,%xmm5 + + call _aesni_decrypt4 + + xorps %xmm10,%xmm2 + movdqa %xmm14,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm15,%xmm11 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + jmp L$xts_dec_done + +.p2align 4 +L$xts_dec_done: + andq $15,%r9 + jz L$xts_dec_ret +L$xts_dec_done2: + movq %r9,%rdx + movq %r11,%rcx + movl %r10d,%eax + + movups (%rdi),%xmm2 + xorps %xmm11,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_dec1_13: + aesdec %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_dec1_13 + aesdeclast %xmm1,%xmm2 + xorps %xmm11,%xmm2 + movups %xmm2,(%rsi) + +L$xts_dec_steal: + movzbl 16(%rdi),%eax + movzbl (%rsi),%ecx + leaq 1(%rdi),%rdi + movb %al,(%rsi) + movb %cl,16(%rsi) + leaq 1(%rsi),%rsi + subq $1,%rdx + jnz L$xts_dec_steal + + subq %r9,%rsi + movq %r11,%rcx + movl %r10d,%eax + + movups (%rsi),%xmm2 + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_dec1_14: + aesdec %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_dec1_14 + aesdeclast %xmm1,%xmm2 + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + +L$xts_dec_ret: + leaq 104(%rsp),%rsp +L$xts_dec_epilogue: + retq + +.globl _aesni_cbc_encrypt + +.p2align 4 +_aesni_cbc_encrypt: + testq %rdx,%rdx + jz L$cbc_ret + + movl 240(%rcx),%r10d + movq %rcx,%r11 + testl %r9d,%r9d + jz L$cbc_decrypt + + movups (%r8),%xmm2 + movl %r10d,%eax + cmpq $16,%rdx + jb L$cbc_enc_tail + subq $16,%rdx + jmp L$cbc_enc_loop +.p2align 4 +L$cbc_enc_loop: + movups (%rdi),%xmm3 + leaq 16(%rdi),%rdi + + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm3 + leaq 32(%rcx),%rcx + xorps %xmm3,%xmm2 +L$oop_enc1_15: + aesenc %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_15 + aesenclast %xmm1,%xmm2 + movl %r10d,%eax + movq %r11,%rcx + movups %xmm2,0(%rsi) + leaq 16(%rsi),%rsi + subq $16,%rdx + jnc L$cbc_enc_loop + addq $16,%rdx + jnz L$cbc_enc_tail + movups %xmm2,(%r8) + jmp L$cbc_ret + +L$cbc_enc_tail: + movq %rdx,%rcx + xchgq %rdi,%rsi +.long 0x9066A4F3 + movl $16,%ecx + subq %rdx,%rcx + xorl %eax,%eax +.long 0x9066AAF3 + leaq -16(%rdi),%rdi + movl %r10d,%eax + movq %rdi,%rsi + movq %r11,%rcx + xorq %rdx,%rdx + jmp L$cbc_enc_loop + +.p2align 4 +L$cbc_decrypt: + movups (%r8),%xmm9 + movl %r10d,%eax + cmpq $112,%rdx + jbe L$cbc_dec_tail + shrl $1,%r10d + subq $112,%rdx + movl %r10d,%eax + movaps %xmm9,-24(%rsp) + jmp L$cbc_dec_loop8_enter +.p2align 4 +L$cbc_dec_loop8: + movaps %xmm0,-24(%rsp) + movups %xmm9,(%rsi) + leaq 16(%rsi),%rsi +L$cbc_dec_loop8_enter: + movups (%rcx),%xmm0 + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 16(%rcx),%xmm1 + + leaq 32(%rcx),%rcx + movdqu 32(%rdi),%xmm4 + xorps %xmm0,%xmm2 + movdqu 48(%rdi),%xmm5 + xorps %xmm0,%xmm3 + movdqu 64(%rdi),%xmm6 + aesdec %xmm1,%xmm2 + pxor %xmm0,%xmm4 + movdqu 80(%rdi),%xmm7 + aesdec %xmm1,%xmm3 + pxor %xmm0,%xmm5 + movdqu 96(%rdi),%xmm8 + aesdec %xmm1,%xmm4 + pxor %xmm0,%xmm6 + movdqu 112(%rdi),%xmm9 + aesdec %xmm1,%xmm5 + pxor %xmm0,%xmm7 + decl %eax + aesdec %xmm1,%xmm6 + pxor %xmm0,%xmm8 + aesdec %xmm1,%xmm7 + pxor %xmm0,%xmm9 + movups (%rcx),%xmm0 + aesdec %xmm1,%xmm8 + aesdec %xmm1,%xmm9 + movups 16(%rcx),%xmm1 + + call L$dec_loop8_enter + + movups (%rdi),%xmm1 + movups 16(%rdi),%xmm0 + xorps -24(%rsp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%rdi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%rdi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%rdi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%rdi),%xmm0 + xorps %xmm1,%xmm7 + movups 96(%rdi),%xmm1 + xorps %xmm0,%xmm8 + movups 112(%rdi),%xmm0 + xorps %xmm1,%xmm9 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movl %r10d,%eax + movups %xmm6,64(%rsi) + movq %r11,%rcx + movups %xmm7,80(%rsi) + leaq 128(%rdi),%rdi + movups %xmm8,96(%rsi) + leaq 112(%rsi),%rsi + subq $128,%rdx + ja L$cbc_dec_loop8 + + movaps %xmm9,%xmm2 + movaps %xmm0,%xmm9 + addq $112,%rdx + jle L$cbc_dec_tail_collected + movups %xmm2,(%rsi) + leal 1(%r10,%r10,1),%eax + leaq 16(%rsi),%rsi +L$cbc_dec_tail: + movups (%rdi),%xmm2 + movaps %xmm2,%xmm8 + cmpq $16,%rdx + jbe L$cbc_dec_one + + movups 16(%rdi),%xmm3 + movaps %xmm3,%xmm7 + cmpq $32,%rdx + jbe L$cbc_dec_two + + movups 32(%rdi),%xmm4 + movaps %xmm4,%xmm6 + cmpq $48,%rdx + jbe L$cbc_dec_three + + movups 48(%rdi),%xmm5 + cmpq $64,%rdx + jbe L$cbc_dec_four + + movups 64(%rdi),%xmm6 + cmpq $80,%rdx + jbe L$cbc_dec_five + + movups 80(%rdi),%xmm7 + cmpq $96,%rdx + jbe L$cbc_dec_six + + movups 96(%rdi),%xmm8 + movaps %xmm9,-24(%rsp) + call _aesni_decrypt8 + movups (%rdi),%xmm1 + movups 16(%rdi),%xmm0 + xorps -24(%rsp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%rdi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%rdi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%rdi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%rdi),%xmm0 + xorps %xmm1,%xmm7 + movups 96(%rdi),%xmm9 + xorps %xmm0,%xmm8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + leaq 96(%rsi),%rsi + movaps %xmm8,%xmm2 + subq $112,%rdx + jmp L$cbc_dec_tail_collected +.p2align 4 +L$cbc_dec_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_dec1_16: + aesdec %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_dec1_16 + aesdeclast %xmm1,%xmm2 + xorps %xmm9,%xmm2 + movaps %xmm8,%xmm9 + subq $16,%rdx + jmp L$cbc_dec_tail_collected +.p2align 4 +L$cbc_dec_two: + xorps %xmm4,%xmm4 + call _aesni_decrypt3 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + movups %xmm2,(%rsi) + movaps %xmm7,%xmm9 + movaps %xmm3,%xmm2 + leaq 16(%rsi),%rsi + subq $32,%rdx + jmp L$cbc_dec_tail_collected +.p2align 4 +L$cbc_dec_three: + call _aesni_decrypt3 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + movups %xmm2,(%rsi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%rsi) + movaps %xmm6,%xmm9 + movaps %xmm4,%xmm2 + leaq 32(%rsi),%rsi + subq $48,%rdx + jmp L$cbc_dec_tail_collected +.p2align 4 +L$cbc_dec_four: + call _aesni_decrypt4 + xorps %xmm9,%xmm2 + movups 48(%rdi),%xmm9 + xorps %xmm8,%xmm3 + movups %xmm2,(%rsi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%rsi) + xorps %xmm6,%xmm5 + movups %xmm4,32(%rsi) + movaps %xmm5,%xmm2 + leaq 48(%rsi),%rsi + subq $64,%rdx + jmp L$cbc_dec_tail_collected +.p2align 4 +L$cbc_dec_five: + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups 16(%rdi),%xmm1 + movups 32(%rdi),%xmm0 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + xorps %xmm1,%xmm4 + movups 48(%rdi),%xmm1 + xorps %xmm0,%xmm5 + movups 64(%rdi),%xmm9 + xorps %xmm1,%xmm6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + movaps %xmm6,%xmm2 + subq $80,%rdx + jmp L$cbc_dec_tail_collected +.p2align 4 +L$cbc_dec_six: + call _aesni_decrypt6 + movups 16(%rdi),%xmm1 + movups 32(%rdi),%xmm0 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + xorps %xmm1,%xmm4 + movups 48(%rdi),%xmm1 + xorps %xmm0,%xmm5 + movups 64(%rdi),%xmm0 + xorps %xmm1,%xmm6 + movups 80(%rdi),%xmm9 + xorps %xmm0,%xmm7 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + movaps %xmm7,%xmm2 + subq $96,%rdx + jmp L$cbc_dec_tail_collected +.p2align 4 +L$cbc_dec_tail_collected: + andq $15,%rdx + movups %xmm9,(%r8) + jnz L$cbc_dec_tail_partial + movups %xmm2,(%rsi) + jmp L$cbc_dec_ret +.p2align 4 +L$cbc_dec_tail_partial: + movaps %xmm2,-24(%rsp) + movq $16,%rcx + movq %rsi,%rdi + subq %rdx,%rcx + leaq -24(%rsp),%rsi +.long 0x9066A4F3 + +L$cbc_dec_ret: +L$cbc_ret: + retq + +.globl _aesni_set_decrypt_key + +.p2align 4 +_aesni_set_decrypt_key: + subq $8,%rsp + call __aesni_set_encrypt_key + shll $4,%esi + testl %eax,%eax + jnz L$dec_key_ret + leaq 16(%rdx,%rsi,1),%rdi + + movups (%rdx),%xmm0 + movups (%rdi),%xmm1 + movups %xmm0,(%rdi) + movups %xmm1,(%rdx) + leaq 16(%rdx),%rdx + leaq -16(%rdi),%rdi + +L$dec_key_inverse: + movups (%rdx),%xmm0 + movups (%rdi),%xmm1 + aesimc %xmm0,%xmm0 + aesimc %xmm1,%xmm1 + leaq 16(%rdx),%rdx + leaq -16(%rdi),%rdi + movups %xmm0,16(%rdi) + movups %xmm1,-16(%rdx) + cmpq %rdx,%rdi + ja L$dec_key_inverse + + movups (%rdx),%xmm0 + aesimc %xmm0,%xmm0 + movups %xmm0,(%rdi) +L$dec_key_ret: + addq $8,%rsp + retq +L$SEH_end_set_decrypt_key: + +.globl _aesni_set_encrypt_key + +.p2align 4 +_aesni_set_encrypt_key: +__aesni_set_encrypt_key: + subq $8,%rsp + movq $-1,%rax + testq %rdi,%rdi + jz L$enc_key_ret + testq %rdx,%rdx + jz L$enc_key_ret + + movups (%rdi),%xmm0 + xorps %xmm4,%xmm4 + leaq 16(%rdx),%rax + cmpl $256,%esi + je L$14rounds + cmpl $192,%esi + je L$12rounds + cmpl $128,%esi + jne L$bad_keybits + +L$10rounds: + movl $9,%esi + movups %xmm0,(%rdx) + aeskeygenassist $1,%xmm0,%xmm1 + call L$key_expansion_128_cold + aeskeygenassist $2,%xmm0,%xmm1 + call L$key_expansion_128 + aeskeygenassist $4,%xmm0,%xmm1 + call L$key_expansion_128 + aeskeygenassist $8,%xmm0,%xmm1 + call L$key_expansion_128 + aeskeygenassist $16,%xmm0,%xmm1 + call L$key_expansion_128 + aeskeygenassist $32,%xmm0,%xmm1 + call L$key_expansion_128 + aeskeygenassist $64,%xmm0,%xmm1 + call L$key_expansion_128 + aeskeygenassist $128,%xmm0,%xmm1 + call L$key_expansion_128 + aeskeygenassist $27,%xmm0,%xmm1 + call L$key_expansion_128 + aeskeygenassist $54,%xmm0,%xmm1 + call L$key_expansion_128 + movups %xmm0,(%rax) + movl %esi,80(%rax) + xorl %eax,%eax + jmp L$enc_key_ret + +.p2align 4 +L$12rounds: + movq 16(%rdi),%xmm2 + movl $11,%esi + movups %xmm0,(%rdx) + aeskeygenassist $1,%xmm2,%xmm1 + call L$key_expansion_192a_cold + aeskeygenassist $2,%xmm2,%xmm1 + call L$key_expansion_192b + aeskeygenassist $4,%xmm2,%xmm1 + call L$key_expansion_192a + aeskeygenassist $8,%xmm2,%xmm1 + call L$key_expansion_192b + aeskeygenassist $16,%xmm2,%xmm1 + call L$key_expansion_192a + aeskeygenassist $32,%xmm2,%xmm1 + call L$key_expansion_192b + aeskeygenassist $64,%xmm2,%xmm1 + call L$key_expansion_192a + aeskeygenassist $128,%xmm2,%xmm1 + call L$key_expansion_192b + movups %xmm0,(%rax) + movl %esi,48(%rax) + xorq %rax,%rax + jmp L$enc_key_ret + +.p2align 4 +L$14rounds: + movups 16(%rdi),%xmm2 + movl $13,%esi + leaq 16(%rax),%rax + movups %xmm0,(%rdx) + movups %xmm2,16(%rdx) + aeskeygenassist $1,%xmm2,%xmm1 + call L$key_expansion_256a_cold + aeskeygenassist $1,%xmm0,%xmm1 + call L$key_expansion_256b + aeskeygenassist $2,%xmm2,%xmm1 + call L$key_expansion_256a + aeskeygenassist $2,%xmm0,%xmm1 + call L$key_expansion_256b + aeskeygenassist $4,%xmm2,%xmm1 + call L$key_expansion_256a + aeskeygenassist $4,%xmm0,%xmm1 + call L$key_expansion_256b + aeskeygenassist $8,%xmm2,%xmm1 + call L$key_expansion_256a + aeskeygenassist $8,%xmm0,%xmm1 + call L$key_expansion_256b + aeskeygenassist $16,%xmm2,%xmm1 + call L$key_expansion_256a + aeskeygenassist $16,%xmm0,%xmm1 + call L$key_expansion_256b + aeskeygenassist $32,%xmm2,%xmm1 + call L$key_expansion_256a + aeskeygenassist $32,%xmm0,%xmm1 + call L$key_expansion_256b + aeskeygenassist $64,%xmm2,%xmm1 + call L$key_expansion_256a + movups %xmm0,(%rax) + movl %esi,16(%rax) + xorq %rax,%rax + jmp L$enc_key_ret + +.p2align 4 +L$bad_keybits: + movq $-2,%rax +L$enc_key_ret: + addq $8,%rsp + retq +L$SEH_end_set_encrypt_key: + +.p2align 4 +L$key_expansion_128: + movups %xmm0,(%rax) + leaq 16(%rax),%rax +L$key_expansion_128_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + retq + +.p2align 4 +L$key_expansion_192a: + movups %xmm0,(%rax) + leaq 16(%rax),%rax +L$key_expansion_192a_cold: + movaps %xmm2,%xmm5 +L$key_expansion_192b_warm: + shufps $16,%xmm0,%xmm4 + movdqa %xmm2,%xmm3 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + pslldq $4,%xmm3 + xorps %xmm4,%xmm0 + pshufd $85,%xmm1,%xmm1 + pxor %xmm3,%xmm2 + pxor %xmm1,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm3,%xmm2 + retq + +.p2align 4 +L$key_expansion_192b: + movaps %xmm0,%xmm3 + shufps $68,%xmm0,%xmm5 + movups %xmm5,(%rax) + shufps $78,%xmm2,%xmm3 + movups %xmm3,16(%rax) + leaq 32(%rax),%rax + jmp L$key_expansion_192b_warm + +.p2align 4 +L$key_expansion_256a: + movups %xmm2,(%rax) + leaq 16(%rax),%rax +L$key_expansion_256a_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + retq + +.p2align 4 +L$key_expansion_256b: + movups %xmm0,(%rax) + leaq 16(%rax),%rax + + shufps $16,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $140,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $170,%xmm1,%xmm1 + xorps %xmm1,%xmm2 + retq + + +.p2align 6 +L$bswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +L$increment32: +.long 6,6,6,0 +L$increment64: +.long 1,0,0,0 +L$xts_magic: +.long 0x87,0,1,0 + +.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 diff --git a/ext/libressl/crypto/aes/aesni-masm-x86_64.S b/ext/libressl/crypto/aes/aesni-masm-x86_64.S new file mode 100644 index 0000000..f2a2490 --- /dev/null +++ b/ext/libressl/crypto/aes/aesni-masm-x86_64.S @@ -0,0 +1,3099 @@ +; 1 "crypto/aes/aesni-masm-x86_64.S.tmp" +; 1 "" 1 +; 1 "" 3 +; 340 "" 3 +; 1 "" 1 +; 1 "" 2 +; 1 "crypto/aes/aesni-masm-x86_64.S.tmp" 2 +OPTION DOTNAME + +; 1 "./crypto/x86_arch.h" 1 + + +; 16 "./crypto/x86_arch.h" + + + + + + + + + +; 40 "./crypto/x86_arch.h" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +; 3 "crypto/aes/aesni-masm-x86_64.S.tmp" 2 +.text$ SEGMENT ALIGN(64) 'CODE' +PUBLIC aesni_encrypt + +ALIGN 16 +aesni_encrypt PROC PUBLIC + movups xmm2,XMMWORD PTR[rcx] + mov eax,DWORD PTR[240+r8] + movups xmm0,XMMWORD PTR[r8] + movups xmm1,XMMWORD PTR[16+r8] + lea r8,QWORD PTR[32+r8] + xorps xmm2,xmm0 +$L$oop_enc1_1:: + aesenc xmm2,xmm1 + dec eax + movups xmm1,XMMWORD PTR[r8] + lea r8,QWORD PTR[16+r8] + jnz $L$oop_enc1_1 + aesenclast xmm2,xmm1 + movups XMMWORD PTR[rdx],xmm2 + DB 0F3h,0C3h ;repret +aesni_encrypt ENDP + +PUBLIC aesni_decrypt + +ALIGN 16 +aesni_decrypt PROC PUBLIC + movups xmm2,XMMWORD PTR[rcx] + mov eax,DWORD PTR[240+r8] + movups xmm0,XMMWORD PTR[r8] + movups xmm1,XMMWORD PTR[16+r8] + lea r8,QWORD PTR[32+r8] + xorps xmm2,xmm0 +$L$oop_dec1_2:: + aesdec xmm2,xmm1 + dec eax + movups xmm1,XMMWORD PTR[r8] + lea r8,QWORD PTR[16+r8] + jnz $L$oop_dec1_2 + aesdeclast xmm2,xmm1 + movups XMMWORD PTR[rdx],xmm2 + DB 0F3h,0C3h ;repret +aesni_decrypt ENDP + +ALIGN 16 +_aesni_encrypt3 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shr eax,1 + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + xorps xmm4,xmm0 + movups xmm0,XMMWORD PTR[rcx] + +$L$enc_loop3:: + aesenc xmm2,xmm1 + aesenc xmm3,xmm1 + dec eax + aesenc xmm4,xmm1 + movups xmm1,XMMWORD PTR[16+rcx] + aesenc xmm2,xmm0 + aesenc xmm3,xmm0 + lea rcx,QWORD PTR[32+rcx] + aesenc xmm4,xmm0 + movups xmm0,XMMWORD PTR[rcx] + jnz $L$enc_loop3 + + aesenc xmm2,xmm1 + aesenc xmm3,xmm1 + aesenc xmm4,xmm1 + aesenclast xmm2,xmm0 + aesenclast xmm3,xmm0 + aesenclast xmm4,xmm0 + DB 0F3h,0C3h ;repret +_aesni_encrypt3 ENDP + +ALIGN 16 +_aesni_decrypt3 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shr eax,1 + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + xorps xmm4,xmm0 + movups xmm0,XMMWORD PTR[rcx] + +$L$dec_loop3:: + aesdec xmm2,xmm1 + aesdec xmm3,xmm1 + dec eax + aesdec xmm4,xmm1 + movups xmm1,XMMWORD PTR[16+rcx] + aesdec xmm2,xmm0 + aesdec xmm3,xmm0 + lea rcx,QWORD PTR[32+rcx] + aesdec xmm4,xmm0 + movups xmm0,XMMWORD PTR[rcx] + jnz $L$dec_loop3 + + aesdec xmm2,xmm1 + aesdec xmm3,xmm1 + aesdec xmm4,xmm1 + aesdeclast xmm2,xmm0 + aesdeclast xmm3,xmm0 + aesdeclast xmm4,xmm0 + DB 0F3h,0C3h ;repret +_aesni_decrypt3 ENDP + +ALIGN 16 +_aesni_encrypt4 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shr eax,1 + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + xorps xmm4,xmm0 + xorps xmm5,xmm0 + movups xmm0,XMMWORD PTR[rcx] + +$L$enc_loop4:: + aesenc xmm2,xmm1 + aesenc xmm3,xmm1 + dec eax + aesenc xmm4,xmm1 + aesenc xmm5,xmm1 + movups xmm1,XMMWORD PTR[16+rcx] + aesenc xmm2,xmm0 + aesenc xmm3,xmm0 + lea rcx,QWORD PTR[32+rcx] + aesenc xmm4,xmm0 + aesenc xmm5,xmm0 + movups xmm0,XMMWORD PTR[rcx] + jnz $L$enc_loop4 + + aesenc xmm2,xmm1 + aesenc xmm3,xmm1 + aesenc xmm4,xmm1 + aesenc xmm5,xmm1 + aesenclast xmm2,xmm0 + aesenclast xmm3,xmm0 + aesenclast xmm4,xmm0 + aesenclast xmm5,xmm0 + DB 0F3h,0C3h ;repret +_aesni_encrypt4 ENDP + +ALIGN 16 +_aesni_decrypt4 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shr eax,1 + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + xorps xmm4,xmm0 + xorps xmm5,xmm0 + movups xmm0,XMMWORD PTR[rcx] + +$L$dec_loop4:: + aesdec xmm2,xmm1 + aesdec xmm3,xmm1 + dec eax + aesdec xmm4,xmm1 + aesdec xmm5,xmm1 + movups xmm1,XMMWORD PTR[16+rcx] + aesdec xmm2,xmm0 + aesdec xmm3,xmm0 + lea rcx,QWORD PTR[32+rcx] + aesdec xmm4,xmm0 + aesdec xmm5,xmm0 + movups xmm0,XMMWORD PTR[rcx] + jnz $L$dec_loop4 + + aesdec xmm2,xmm1 + aesdec xmm3,xmm1 + aesdec xmm4,xmm1 + aesdec xmm5,xmm1 + aesdeclast xmm2,xmm0 + aesdeclast xmm3,xmm0 + aesdeclast xmm4,xmm0 + aesdeclast xmm5,xmm0 + DB 0F3h,0C3h ;repret +_aesni_decrypt4 ENDP + +ALIGN 16 +_aesni_encrypt6 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shr eax,1 + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + aesenc xmm2,xmm1 + pxor xmm4,xmm0 + aesenc xmm3,xmm1 + pxor xmm5,xmm0 + aesenc xmm4,xmm1 + pxor xmm6,xmm0 + aesenc xmm5,xmm1 + pxor xmm7,xmm0 + dec eax + aesenc xmm6,xmm1 + movups xmm0,XMMWORD PTR[rcx] + aesenc xmm7,xmm1 + jmp $L$enc_loop6_enter +ALIGN 16 +$L$enc_loop6:: + aesenc xmm2,xmm1 + aesenc xmm3,xmm1 + dec eax + aesenc xmm4,xmm1 + aesenc xmm5,xmm1 + aesenc xmm6,xmm1 + aesenc xmm7,xmm1 +$L$enc_loop6_enter:: + movups xmm1,XMMWORD PTR[16+rcx] + aesenc xmm2,xmm0 + aesenc xmm3,xmm0 + lea rcx,QWORD PTR[32+rcx] + aesenc xmm4,xmm0 + aesenc xmm5,xmm0 + aesenc xmm6,xmm0 + aesenc xmm7,xmm0 + movups xmm0,XMMWORD PTR[rcx] + jnz $L$enc_loop6 + + aesenc xmm2,xmm1 + aesenc xmm3,xmm1 + aesenc xmm4,xmm1 + aesenc xmm5,xmm1 + aesenc xmm6,xmm1 + aesenc xmm7,xmm1 + aesenclast xmm2,xmm0 + aesenclast xmm3,xmm0 + aesenclast xmm4,xmm0 + aesenclast xmm5,xmm0 + aesenclast xmm6,xmm0 + aesenclast xmm7,xmm0 + DB 0F3h,0C3h ;repret +_aesni_encrypt6 ENDP + +ALIGN 16 +_aesni_decrypt6 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shr eax,1 + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + aesdec xmm2,xmm1 + pxor xmm4,xmm0 + aesdec xmm3,xmm1 + pxor xmm5,xmm0 + aesdec xmm4,xmm1 + pxor xmm6,xmm0 + aesdec xmm5,xmm1 + pxor xmm7,xmm0 + dec eax + aesdec xmm6,xmm1 + movups xmm0,XMMWORD PTR[rcx] + aesdec xmm7,xmm1 + jmp $L$dec_loop6_enter +ALIGN 16 +$L$dec_loop6:: + aesdec xmm2,xmm1 + aesdec xmm3,xmm1 + dec eax + aesdec xmm4,xmm1 + aesdec xmm5,xmm1 + aesdec xmm6,xmm1 + aesdec xmm7,xmm1 +$L$dec_loop6_enter:: + movups xmm1,XMMWORD PTR[16+rcx] + aesdec xmm2,xmm0 + aesdec xmm3,xmm0 + lea rcx,QWORD PTR[32+rcx] + aesdec xmm4,xmm0 + aesdec xmm5,xmm0 + aesdec xmm6,xmm0 + aesdec xmm7,xmm0 + movups xmm0,XMMWORD PTR[rcx] + jnz $L$dec_loop6 + + aesdec xmm2,xmm1 + aesdec xmm3,xmm1 + aesdec xmm4,xmm1 + aesdec xmm5,xmm1 + aesdec xmm6,xmm1 + aesdec xmm7,xmm1 + aesdeclast xmm2,xmm0 + aesdeclast xmm3,xmm0 + aesdeclast xmm4,xmm0 + aesdeclast xmm5,xmm0 + aesdeclast xmm6,xmm0 + aesdeclast xmm7,xmm0 + DB 0F3h,0C3h ;repret +_aesni_decrypt6 ENDP + +ALIGN 16 +_aesni_encrypt8 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shr eax,1 + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + aesenc xmm2,xmm1 + pxor xmm4,xmm0 + aesenc xmm3,xmm1 + pxor xmm5,xmm0 + aesenc xmm4,xmm1 + pxor xmm6,xmm0 + aesenc xmm5,xmm1 + pxor xmm7,xmm0 + dec eax + aesenc xmm6,xmm1 + pxor xmm8,xmm0 + aesenc xmm7,xmm1 + pxor xmm9,xmm0 + movups xmm0,XMMWORD PTR[rcx] + aesenc xmm8,xmm1 + aesenc xmm9,xmm1 + movups xmm1,XMMWORD PTR[16+rcx] + jmp $L$enc_loop8_enter +ALIGN 16 +$L$enc_loop8:: + aesenc xmm2,xmm1 + aesenc xmm3,xmm1 + dec eax + aesenc xmm4,xmm1 + aesenc xmm5,xmm1 + aesenc xmm6,xmm1 + aesenc xmm7,xmm1 + aesenc xmm8,xmm1 + aesenc xmm9,xmm1 + movups xmm1,XMMWORD PTR[16+rcx] +$L$enc_loop8_enter:: + aesenc xmm2,xmm0 + aesenc xmm3,xmm0 + lea rcx,QWORD PTR[32+rcx] + aesenc xmm4,xmm0 + aesenc xmm5,xmm0 + aesenc xmm6,xmm0 + aesenc xmm7,xmm0 + aesenc xmm8,xmm0 + aesenc xmm9,xmm0 + movups xmm0,XMMWORD PTR[rcx] + jnz $L$enc_loop8 + + aesenc xmm2,xmm1 + aesenc xmm3,xmm1 + aesenc xmm4,xmm1 + aesenc xmm5,xmm1 + aesenc xmm6,xmm1 + aesenc xmm7,xmm1 + aesenc xmm8,xmm1 + aesenc xmm9,xmm1 + aesenclast xmm2,xmm0 + aesenclast xmm3,xmm0 + aesenclast xmm4,xmm0 + aesenclast xmm5,xmm0 + aesenclast xmm6,xmm0 + aesenclast xmm7,xmm0 + aesenclast xmm8,xmm0 + aesenclast xmm9,xmm0 + DB 0F3h,0C3h ;repret +_aesni_encrypt8 ENDP + +ALIGN 16 +_aesni_decrypt8 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shr eax,1 + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + aesdec xmm2,xmm1 + pxor xmm4,xmm0 + aesdec xmm3,xmm1 + pxor xmm5,xmm0 + aesdec xmm4,xmm1 + pxor xmm6,xmm0 + aesdec xmm5,xmm1 + pxor xmm7,xmm0 + dec eax + aesdec xmm6,xmm1 + pxor xmm8,xmm0 + aesdec xmm7,xmm1 + pxor xmm9,xmm0 + movups xmm0,XMMWORD PTR[rcx] + aesdec xmm8,xmm1 + aesdec xmm9,xmm1 + movups xmm1,XMMWORD PTR[16+rcx] + jmp $L$dec_loop8_enter +ALIGN 16 +$L$dec_loop8:: + aesdec xmm2,xmm1 + aesdec xmm3,xmm1 + dec eax + aesdec xmm4,xmm1 + aesdec xmm5,xmm1 + aesdec xmm6,xmm1 + aesdec xmm7,xmm1 + aesdec xmm8,xmm1 + aesdec xmm9,xmm1 + movups xmm1,XMMWORD PTR[16+rcx] +$L$dec_loop8_enter:: + aesdec xmm2,xmm0 + aesdec xmm3,xmm0 + lea rcx,QWORD PTR[32+rcx] + aesdec xmm4,xmm0 + aesdec xmm5,xmm0 + aesdec xmm6,xmm0 + aesdec xmm7,xmm0 + aesdec xmm8,xmm0 + aesdec xmm9,xmm0 + movups xmm0,XMMWORD PTR[rcx] + jnz $L$dec_loop8 + + aesdec xmm2,xmm1 + aesdec xmm3,xmm1 + aesdec xmm4,xmm1 + aesdec xmm5,xmm1 + aesdec xmm6,xmm1 + aesdec xmm7,xmm1 + aesdec xmm8,xmm1 + aesdec xmm9,xmm1 + aesdeclast xmm2,xmm0 + aesdeclast xmm3,xmm0 + aesdeclast xmm4,xmm0 + aesdeclast xmm5,xmm0 + aesdeclast xmm6,xmm0 + aesdeclast xmm7,xmm0 + aesdeclast xmm8,xmm0 + aesdeclast xmm9,xmm0 + DB 0F3h,0C3h ;repret +_aesni_decrypt8 ENDP +PUBLIC aesni_ecb_encrypt + +ALIGN 16 +aesni_ecb_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_ecb_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + + + and rdx,-16 + jz $L$ecb_ret + + mov eax,DWORD PTR[240+rcx] + movups xmm0,XMMWORD PTR[rcx] + mov r11,rcx + mov r10d,eax + test r8d,r8d + jz $L$ecb_decrypt + + cmp rdx,080h + jb $L$ecb_enc_tail + + movdqu xmm2,XMMWORD PTR[rdi] + movdqu xmm3,XMMWORD PTR[16+rdi] + movdqu xmm4,XMMWORD PTR[32+rdi] + movdqu xmm5,XMMWORD PTR[48+rdi] + movdqu xmm6,XMMWORD PTR[64+rdi] + movdqu xmm7,XMMWORD PTR[80+rdi] + movdqu xmm8,XMMWORD PTR[96+rdi] + movdqu xmm9,XMMWORD PTR[112+rdi] + lea rdi,QWORD PTR[128+rdi] + sub rdx,080h + jmp $L$ecb_enc_loop8_enter +ALIGN 16 +$L$ecb_enc_loop8:: + movups XMMWORD PTR[rsi],xmm2 + mov rcx,r11 + movdqu xmm2,XMMWORD PTR[rdi] + mov eax,r10d + movups XMMWORD PTR[16+rsi],xmm3 + movdqu xmm3,XMMWORD PTR[16+rdi] + movups XMMWORD PTR[32+rsi],xmm4 + movdqu xmm4,XMMWORD PTR[32+rdi] + movups XMMWORD PTR[48+rsi],xmm5 + movdqu xmm5,XMMWORD PTR[48+rdi] + movups XMMWORD PTR[64+rsi],xmm6 + movdqu xmm6,XMMWORD PTR[64+rdi] + movups XMMWORD PTR[80+rsi],xmm7 + movdqu xmm7,XMMWORD PTR[80+rdi] + movups XMMWORD PTR[96+rsi],xmm8 + movdqu xmm8,XMMWORD PTR[96+rdi] + movups XMMWORD PTR[112+rsi],xmm9 + lea rsi,QWORD PTR[128+rsi] + movdqu xmm9,XMMWORD PTR[112+rdi] + lea rdi,QWORD PTR[128+rdi] +$L$ecb_enc_loop8_enter:: + + call _aesni_encrypt8 + + sub rdx,080h + jnc $L$ecb_enc_loop8 + + movups XMMWORD PTR[rsi],xmm2 + mov rcx,r11 + movups XMMWORD PTR[16+rsi],xmm3 + mov eax,r10d + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + movups XMMWORD PTR[96+rsi],xmm8 + movups XMMWORD PTR[112+rsi],xmm9 + lea rsi,QWORD PTR[128+rsi] + add rdx,080h + jz $L$ecb_ret + +$L$ecb_enc_tail:: + movups xmm2,XMMWORD PTR[rdi] + cmp rdx,020h + jb $L$ecb_enc_one + movups xmm3,XMMWORD PTR[16+rdi] + je $L$ecb_enc_two + movups xmm4,XMMWORD PTR[32+rdi] + cmp rdx,040h + jb $L$ecb_enc_three + movups xmm5,XMMWORD PTR[48+rdi] + je $L$ecb_enc_four + movups xmm6,XMMWORD PTR[64+rdi] + cmp rdx,060h + jb $L$ecb_enc_five + movups xmm7,XMMWORD PTR[80+rdi] + je $L$ecb_enc_six + movdqu xmm8,XMMWORD PTR[96+rdi] + call _aesni_encrypt8 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + movups XMMWORD PTR[96+rsi],xmm8 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_enc_one:: + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_3:: + aesenc xmm2,xmm1 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_enc1_3 + aesenclast xmm2,xmm1 + movups XMMWORD PTR[rsi],xmm2 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_enc_two:: + xorps xmm4,xmm4 + call _aesni_encrypt3 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_enc_three:: + call _aesni_encrypt3 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_enc_four:: + call _aesni_encrypt4 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_enc_five:: + xorps xmm7,xmm7 + call _aesni_encrypt6 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_enc_six:: + call _aesni_encrypt6 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + jmp $L$ecb_ret + +ALIGN 16 +$L$ecb_decrypt:: + cmp rdx,080h + jb $L$ecb_dec_tail + + movdqu xmm2,XMMWORD PTR[rdi] + movdqu xmm3,XMMWORD PTR[16+rdi] + movdqu xmm4,XMMWORD PTR[32+rdi] + movdqu xmm5,XMMWORD PTR[48+rdi] + movdqu xmm6,XMMWORD PTR[64+rdi] + movdqu xmm7,XMMWORD PTR[80+rdi] + movdqu xmm8,XMMWORD PTR[96+rdi] + movdqu xmm9,XMMWORD PTR[112+rdi] + lea rdi,QWORD PTR[128+rdi] + sub rdx,080h + jmp $L$ecb_dec_loop8_enter +ALIGN 16 +$L$ecb_dec_loop8:: + movups XMMWORD PTR[rsi],xmm2 + mov rcx,r11 + movdqu xmm2,XMMWORD PTR[rdi] + mov eax,r10d + movups XMMWORD PTR[16+rsi],xmm3 + movdqu xmm3,XMMWORD PTR[16+rdi] + movups XMMWORD PTR[32+rsi],xmm4 + movdqu xmm4,XMMWORD PTR[32+rdi] + movups XMMWORD PTR[48+rsi],xmm5 + movdqu xmm5,XMMWORD PTR[48+rdi] + movups XMMWORD PTR[64+rsi],xmm6 + movdqu xmm6,XMMWORD PTR[64+rdi] + movups XMMWORD PTR[80+rsi],xmm7 + movdqu xmm7,XMMWORD PTR[80+rdi] + movups XMMWORD PTR[96+rsi],xmm8 + movdqu xmm8,XMMWORD PTR[96+rdi] + movups XMMWORD PTR[112+rsi],xmm9 + lea rsi,QWORD PTR[128+rsi] + movdqu xmm9,XMMWORD PTR[112+rdi] + lea rdi,QWORD PTR[128+rdi] +$L$ecb_dec_loop8_enter:: + + call _aesni_decrypt8 + + movups xmm0,XMMWORD PTR[r11] + sub rdx,080h + jnc $L$ecb_dec_loop8 + + movups XMMWORD PTR[rsi],xmm2 + mov rcx,r11 + movups XMMWORD PTR[16+rsi],xmm3 + mov eax,r10d + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + movups XMMWORD PTR[96+rsi],xmm8 + movups XMMWORD PTR[112+rsi],xmm9 + lea rsi,QWORD PTR[128+rsi] + add rdx,080h + jz $L$ecb_ret + +$L$ecb_dec_tail:: + movups xmm2,XMMWORD PTR[rdi] + cmp rdx,020h + jb $L$ecb_dec_one + movups xmm3,XMMWORD PTR[16+rdi] + je $L$ecb_dec_two + movups xmm4,XMMWORD PTR[32+rdi] + cmp rdx,040h + jb $L$ecb_dec_three + movups xmm5,XMMWORD PTR[48+rdi] + je $L$ecb_dec_four + movups xmm6,XMMWORD PTR[64+rdi] + cmp rdx,060h + jb $L$ecb_dec_five + movups xmm7,XMMWORD PTR[80+rdi] + je $L$ecb_dec_six + movups xmm8,XMMWORD PTR[96+rdi] + movups xmm0,XMMWORD PTR[rcx] + call _aesni_decrypt8 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + movups XMMWORD PTR[96+rsi],xmm8 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_dec_one:: + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_4:: + aesdec xmm2,xmm1 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_dec1_4 + aesdeclast xmm2,xmm1 + movups XMMWORD PTR[rsi],xmm2 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_dec_two:: + xorps xmm4,xmm4 + call _aesni_decrypt3 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_dec_three:: + call _aesni_decrypt3 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_dec_four:: + call _aesni_decrypt4 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_dec_five:: + xorps xmm7,xmm7 + call _aesni_decrypt6 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_dec_six:: + call _aesni_decrypt6 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + +$L$ecb_ret:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_ecb_encrypt:: +aesni_ecb_encrypt ENDP +PUBLIC aesni_ccm64_encrypt_blocks + +ALIGN 16 +aesni_ccm64_encrypt_blocks PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_ccm64_encrypt_blocks:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + lea rsp,QWORD PTR[((-88))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 +$L$ccm64_enc_body:: + mov eax,DWORD PTR[240+rcx] + movdqu xmm9,XMMWORD PTR[r8] + movdqa xmm6,XMMWORD PTR[$L$increment64] + movdqa xmm7,XMMWORD PTR[$L$bswap_mask] + + shr eax,1 + lea r11,QWORD PTR[rcx] + movdqu xmm3,XMMWORD PTR[r9] + movdqa xmm2,xmm9 + mov r10d,eax +DB 102,68,15,56,0,207 + jmp $L$ccm64_enc_outer +ALIGN 16 +$L$ccm64_enc_outer:: + movups xmm0,XMMWORD PTR[r11] + mov eax,r10d + movups xmm8,XMMWORD PTR[rdi] + + xorps xmm2,xmm0 + movups xmm1,XMMWORD PTR[16+r11] + xorps xmm0,xmm8 + lea rcx,QWORD PTR[32+r11] + xorps xmm3,xmm0 + movups xmm0,XMMWORD PTR[rcx] + +$L$ccm64_enc2_loop:: + aesenc xmm2,xmm1 + dec eax + aesenc xmm3,xmm1 + movups xmm1,XMMWORD PTR[16+rcx] + aesenc xmm2,xmm0 + lea rcx,QWORD PTR[32+rcx] + aesenc xmm3,xmm0 + movups xmm0,XMMWORD PTR[rcx] + jnz $L$ccm64_enc2_loop + aesenc xmm2,xmm1 + aesenc xmm3,xmm1 + paddq xmm9,xmm6 + aesenclast xmm2,xmm0 + aesenclast xmm3,xmm0 + + dec rdx + lea rdi,QWORD PTR[16+rdi] + xorps xmm8,xmm2 + movdqa xmm2,xmm9 + movups XMMWORD PTR[rsi],xmm8 + lea rsi,QWORD PTR[16+rsi] +DB 102,15,56,0,215 + jnz $L$ccm64_enc_outer + + movups XMMWORD PTR[r9],xmm3 + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + movaps xmm8,XMMWORD PTR[32+rsp] + movaps xmm9,XMMWORD PTR[48+rsp] + lea rsp,QWORD PTR[88+rsp] +$L$ccm64_enc_ret:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_ccm64_encrypt_blocks:: +aesni_ccm64_encrypt_blocks ENDP +PUBLIC aesni_ccm64_decrypt_blocks + +ALIGN 16 +aesni_ccm64_decrypt_blocks PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_ccm64_decrypt_blocks:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + lea rsp,QWORD PTR[((-88))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 +$L$ccm64_dec_body:: + mov eax,DWORD PTR[240+rcx] + movups xmm9,XMMWORD PTR[r8] + movdqu xmm3,XMMWORD PTR[r9] + movdqa xmm6,XMMWORD PTR[$L$increment64] + movdqa xmm7,XMMWORD PTR[$L$bswap_mask] + + movaps xmm2,xmm9 + mov r10d,eax + mov r11,rcx +DB 102,68,15,56,0,207 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_5:: + aesenc xmm2,xmm1 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_enc1_5 + aesenclast xmm2,xmm1 + movups xmm8,XMMWORD PTR[rdi] + paddq xmm9,xmm6 + lea rdi,QWORD PTR[16+rdi] + jmp $L$ccm64_dec_outer +ALIGN 16 +$L$ccm64_dec_outer:: + xorps xmm8,xmm2 + movdqa xmm2,xmm9 + mov eax,r10d + movups XMMWORD PTR[rsi],xmm8 + lea rsi,QWORD PTR[16+rsi] +DB 102,15,56,0,215 + + sub rdx,1 + jz $L$ccm64_dec_break + + movups xmm0,XMMWORD PTR[r11] + shr eax,1 + movups xmm1,XMMWORD PTR[16+r11] + xorps xmm8,xmm0 + lea rcx,QWORD PTR[32+r11] + xorps xmm2,xmm0 + xorps xmm3,xmm8 + movups xmm0,XMMWORD PTR[rcx] + +$L$ccm64_dec2_loop:: + aesenc xmm2,xmm1 + dec eax + aesenc xmm3,xmm1 + movups xmm1,XMMWORD PTR[16+rcx] + aesenc xmm2,xmm0 + lea rcx,QWORD PTR[32+rcx] + aesenc xmm3,xmm0 + movups xmm0,XMMWORD PTR[rcx] + jnz $L$ccm64_dec2_loop + movups xmm8,XMMWORD PTR[rdi] + paddq xmm9,xmm6 + aesenc xmm2,xmm1 + aesenc xmm3,xmm1 + lea rdi,QWORD PTR[16+rdi] + aesenclast xmm2,xmm0 + aesenclast xmm3,xmm0 + jmp $L$ccm64_dec_outer + +ALIGN 16 +$L$ccm64_dec_break:: + + movups xmm0,XMMWORD PTR[r11] + movups xmm1,XMMWORD PTR[16+r11] + xorps xmm8,xmm0 + lea r11,QWORD PTR[32+r11] + xorps xmm3,xmm8 +$L$oop_enc1_6:: + aesenc xmm3,xmm1 + dec eax + movups xmm1,XMMWORD PTR[r11] + lea r11,QWORD PTR[16+r11] + jnz $L$oop_enc1_6 + aesenclast xmm3,xmm1 + movups XMMWORD PTR[r9],xmm3 + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + movaps xmm8,XMMWORD PTR[32+rsp] + movaps xmm9,XMMWORD PTR[48+rsp] + lea rsp,QWORD PTR[88+rsp] +$L$ccm64_dec_ret:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_ccm64_decrypt_blocks:: +aesni_ccm64_decrypt_blocks ENDP +PUBLIC aesni_ctr32_encrypt_blocks + +ALIGN 16 +aesni_ctr32_encrypt_blocks PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_ctr32_encrypt_blocks:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + + + lea rsp,QWORD PTR[((-200))+rsp] + movaps XMMWORD PTR[32+rsp],xmm6 + movaps XMMWORD PTR[48+rsp],xmm7 + movaps XMMWORD PTR[64+rsp],xmm8 + movaps XMMWORD PTR[80+rsp],xmm9 + movaps XMMWORD PTR[96+rsp],xmm10 + movaps XMMWORD PTR[112+rsp],xmm11 + movaps XMMWORD PTR[128+rsp],xmm12 + movaps XMMWORD PTR[144+rsp],xmm13 + movaps XMMWORD PTR[160+rsp],xmm14 + movaps XMMWORD PTR[176+rsp],xmm15 +$L$ctr32_body:: + cmp rdx,1 + je $L$ctr32_one_shortcut + + movdqu xmm14,XMMWORD PTR[r8] + movdqa xmm15,XMMWORD PTR[$L$bswap_mask] + xor eax,eax +DB 102,69,15,58,22,242,3 +DB 102,68,15,58,34,240,3 + + mov eax,DWORD PTR[240+rcx] + bswap r10d + pxor xmm12,xmm12 + pxor xmm13,xmm13 +DB 102,69,15,58,34,226,0 + lea r11,QWORD PTR[3+r10] +DB 102,69,15,58,34,235,0 + inc r10d +DB 102,69,15,58,34,226,1 + inc r11 +DB 102,69,15,58,34,235,1 + inc r10d +DB 102,69,15,58,34,226,2 + inc r11 +DB 102,69,15,58,34,235,2 + movdqa XMMWORD PTR[rsp],xmm12 +DB 102,69,15,56,0,231 + movdqa XMMWORD PTR[16+rsp],xmm13 +DB 102,69,15,56,0,239 + + pshufd xmm2,xmm12,192 + pshufd xmm3,xmm12,128 + pshufd xmm4,xmm12,64 + cmp rdx,6 + jb $L$ctr32_tail + shr eax,1 + mov r11,rcx + mov r10d,eax + sub rdx,6 + jmp $L$ctr32_loop6 + +ALIGN 16 +$L$ctr32_loop6:: + pshufd xmm5,xmm13,192 + por xmm2,xmm14 + movups xmm0,XMMWORD PTR[r11] + pshufd xmm6,xmm13,128 + por xmm3,xmm14 + movups xmm1,XMMWORD PTR[16+r11] + pshufd xmm7,xmm13,64 + por xmm4,xmm14 + por xmm5,xmm14 + xorps xmm2,xmm0 + por xmm6,xmm14 + por xmm7,xmm14 + + + + + pxor xmm3,xmm0 + aesenc xmm2,xmm1 + lea rcx,QWORD PTR[32+r11] + pxor xmm4,xmm0 + aesenc xmm3,xmm1 + movdqa xmm13,XMMWORD PTR[$L$increment32] + pxor xmm5,xmm0 + aesenc xmm4,xmm1 + movdqa xmm12,XMMWORD PTR[rsp] + pxor xmm6,xmm0 + aesenc xmm5,xmm1 + pxor xmm7,xmm0 + movups xmm0,XMMWORD PTR[rcx] + dec eax + aesenc xmm6,xmm1 + aesenc xmm7,xmm1 + jmp $L$ctr32_enc_loop6_enter +ALIGN 16 +$L$ctr32_enc_loop6:: + aesenc xmm2,xmm1 + aesenc xmm3,xmm1 + dec eax + aesenc xmm4,xmm1 + aesenc xmm5,xmm1 + aesenc xmm6,xmm1 + aesenc xmm7,xmm1 +$L$ctr32_enc_loop6_enter:: + movups xmm1,XMMWORD PTR[16+rcx] + aesenc xmm2,xmm0 + aesenc xmm3,xmm0 + lea rcx,QWORD PTR[32+rcx] + aesenc xmm4,xmm0 + aesenc xmm5,xmm0 + aesenc xmm6,xmm0 + aesenc xmm7,xmm0 + movups xmm0,XMMWORD PTR[rcx] + jnz $L$ctr32_enc_loop6 + + aesenc xmm2,xmm1 + paddd xmm12,xmm13 + aesenc xmm3,xmm1 + paddd xmm13,XMMWORD PTR[16+rsp] + aesenc xmm4,xmm1 + movdqa XMMWORD PTR[rsp],xmm12 + aesenc xmm5,xmm1 + movdqa XMMWORD PTR[16+rsp],xmm13 + aesenc xmm6,xmm1 +DB 102,69,15,56,0,231 + aesenc xmm7,xmm1 +DB 102,69,15,56,0,239 + + aesenclast xmm2,xmm0 + movups xmm8,XMMWORD PTR[rdi] + aesenclast xmm3,xmm0 + movups xmm9,XMMWORD PTR[16+rdi] + aesenclast xmm4,xmm0 + movups xmm10,XMMWORD PTR[32+rdi] + aesenclast xmm5,xmm0 + movups xmm11,XMMWORD PTR[48+rdi] + aesenclast xmm6,xmm0 + movups xmm1,XMMWORD PTR[64+rdi] + aesenclast xmm7,xmm0 + movups xmm0,XMMWORD PTR[80+rdi] + lea rdi,QWORD PTR[96+rdi] + + xorps xmm8,xmm2 + pshufd xmm2,xmm12,192 + xorps xmm9,xmm3 + pshufd xmm3,xmm12,128 + movups XMMWORD PTR[rsi],xmm8 + xorps xmm10,xmm4 + pshufd xmm4,xmm12,64 + movups XMMWORD PTR[16+rsi],xmm9 + xorps xmm11,xmm5 + movups XMMWORD PTR[32+rsi],xmm10 + xorps xmm1,xmm6 + movups XMMWORD PTR[48+rsi],xmm11 + xorps xmm0,xmm7 + movups XMMWORD PTR[64+rsi],xmm1 + movups XMMWORD PTR[80+rsi],xmm0 + lea rsi,QWORD PTR[96+rsi] + mov eax,r10d + sub rdx,6 + jnc $L$ctr32_loop6 + + add rdx,6 + jz $L$ctr32_done + mov rcx,r11 + lea eax,DWORD PTR[1+rax*1+rax] + +$L$ctr32_tail:: + por xmm2,xmm14 + movups xmm8,XMMWORD PTR[rdi] + cmp rdx,2 + jb $L$ctr32_one + + por xmm3,xmm14 + movups xmm9,XMMWORD PTR[16+rdi] + je $L$ctr32_two + + pshufd xmm5,xmm13,192 + por xmm4,xmm14 + movups xmm10,XMMWORD PTR[32+rdi] + cmp rdx,4 + jb $L$ctr32_three + + pshufd xmm6,xmm13,128 + por xmm5,xmm14 + movups xmm11,XMMWORD PTR[48+rdi] + je $L$ctr32_four + + por xmm6,xmm14 + xorps xmm7,xmm7 + + call _aesni_encrypt6 + + movups xmm1,XMMWORD PTR[64+rdi] + xorps xmm8,xmm2 + xorps xmm9,xmm3 + movups XMMWORD PTR[rsi],xmm8 + xorps xmm10,xmm4 + movups XMMWORD PTR[16+rsi],xmm9 + xorps xmm11,xmm5 + movups XMMWORD PTR[32+rsi],xmm10 + xorps xmm1,xmm6 + movups XMMWORD PTR[48+rsi],xmm11 + movups XMMWORD PTR[64+rsi],xmm1 + jmp $L$ctr32_done + +ALIGN 16 +$L$ctr32_one_shortcut:: + movups xmm2,XMMWORD PTR[r8] + movups xmm8,XMMWORD PTR[rdi] + mov eax,DWORD PTR[240+rcx] +$L$ctr32_one:: + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_7:: + aesenc xmm2,xmm1 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_enc1_7 + aesenclast xmm2,xmm1 + xorps xmm8,xmm2 + movups XMMWORD PTR[rsi],xmm8 + jmp $L$ctr32_done + +ALIGN 16 +$L$ctr32_two:: + xorps xmm4,xmm4 + call _aesni_encrypt3 + xorps xmm8,xmm2 + xorps xmm9,xmm3 + movups XMMWORD PTR[rsi],xmm8 + movups XMMWORD PTR[16+rsi],xmm9 + jmp $L$ctr32_done + +ALIGN 16 +$L$ctr32_three:: + call _aesni_encrypt3 + xorps xmm8,xmm2 + xorps xmm9,xmm3 + movups XMMWORD PTR[rsi],xmm8 + xorps xmm10,xmm4 + movups XMMWORD PTR[16+rsi],xmm9 + movups XMMWORD PTR[32+rsi],xmm10 + jmp $L$ctr32_done + +ALIGN 16 +$L$ctr32_four:: + call _aesni_encrypt4 + xorps xmm8,xmm2 + xorps xmm9,xmm3 + movups XMMWORD PTR[rsi],xmm8 + xorps xmm10,xmm4 + movups XMMWORD PTR[16+rsi],xmm9 + xorps xmm11,xmm5 + movups XMMWORD PTR[32+rsi],xmm10 + movups XMMWORD PTR[48+rsi],xmm11 + +$L$ctr32_done:: + movaps xmm6,XMMWORD PTR[32+rsp] + movaps xmm7,XMMWORD PTR[48+rsp] + movaps xmm8,XMMWORD PTR[64+rsp] + movaps xmm9,XMMWORD PTR[80+rsp] + movaps xmm10,XMMWORD PTR[96+rsp] + movaps xmm11,XMMWORD PTR[112+rsp] + movaps xmm12,XMMWORD PTR[128+rsp] + movaps xmm13,XMMWORD PTR[144+rsp] + movaps xmm14,XMMWORD PTR[160+rsp] + movaps xmm15,XMMWORD PTR[176+rsp] + lea rsp,QWORD PTR[200+rsp] +$L$ctr32_ret:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_ctr32_encrypt_blocks:: +aesni_ctr32_encrypt_blocks ENDP +PUBLIC aesni_xts_encrypt + +ALIGN 16 +aesni_xts_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_xts_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + lea rsp,QWORD PTR[((-264))+rsp] + movaps XMMWORD PTR[96+rsp],xmm6 + movaps XMMWORD PTR[112+rsp],xmm7 + movaps XMMWORD PTR[128+rsp],xmm8 + movaps XMMWORD PTR[144+rsp],xmm9 + movaps XMMWORD PTR[160+rsp],xmm10 + movaps XMMWORD PTR[176+rsp],xmm11 + movaps XMMWORD PTR[192+rsp],xmm12 + movaps XMMWORD PTR[208+rsp],xmm13 + movaps XMMWORD PTR[224+rsp],xmm14 + movaps XMMWORD PTR[240+rsp],xmm15 +$L$xts_enc_body:: + movups xmm15,XMMWORD PTR[r9] + mov eax,DWORD PTR[240+r8] + mov r10d,DWORD PTR[240+rcx] + movups xmm0,XMMWORD PTR[r8] + movups xmm1,XMMWORD PTR[16+r8] + lea r8,QWORD PTR[32+r8] + xorps xmm15,xmm0 +$L$oop_enc1_8:: + aesenc xmm15,xmm1 + dec eax + movups xmm1,XMMWORD PTR[r8] + lea r8,QWORD PTR[16+r8] + jnz $L$oop_enc1_8 + aesenclast xmm15,xmm1 + mov r11,rcx + mov eax,r10d + mov r9,rdx + and rdx,-16 + + movdqa xmm8,XMMWORD PTR[$L$xts_magic] + pxor xmm14,xmm14 + pcmpgtd xmm14,xmm15 + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm10,xmm15 + paddq xmm15,xmm15 + pand xmm9,xmm8 + pcmpgtd xmm14,xmm15 + pxor xmm15,xmm9 + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm11,xmm15 + paddq xmm15,xmm15 + pand xmm9,xmm8 + pcmpgtd xmm14,xmm15 + pxor xmm15,xmm9 + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm12,xmm15 + paddq xmm15,xmm15 + pand xmm9,xmm8 + pcmpgtd xmm14,xmm15 + pxor xmm15,xmm9 + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm13,xmm15 + paddq xmm15,xmm15 + pand xmm9,xmm8 + pcmpgtd xmm14,xmm15 + pxor xmm15,xmm9 + sub rdx,16*6 + jc $L$xts_enc_short + + shr eax,1 + sub eax,1 + mov r10d,eax + jmp $L$xts_enc_grandloop + +ALIGN 16 +$L$xts_enc_grandloop:: + pshufd xmm9,xmm14,013h + movdqa xmm14,xmm15 + paddq xmm15,xmm15 + movdqu xmm2,XMMWORD PTR[rdi] + pand xmm9,xmm8 + movdqu xmm3,XMMWORD PTR[16+rdi] + pxor xmm15,xmm9 + + movdqu xmm4,XMMWORD PTR[32+rdi] + pxor xmm2,xmm10 + movdqu xmm5,XMMWORD PTR[48+rdi] + pxor xmm3,xmm11 + movdqu xmm6,XMMWORD PTR[64+rdi] + pxor xmm4,xmm12 + movdqu xmm7,XMMWORD PTR[80+rdi] + lea rdi,QWORD PTR[96+rdi] + pxor xmm5,xmm13 + movups xmm0,XMMWORD PTR[r11] + pxor xmm6,xmm14 + pxor xmm7,xmm15 + + + + movups xmm1,XMMWORD PTR[16+r11] + pxor xmm2,xmm0 + pxor xmm3,xmm0 + movdqa XMMWORD PTR[rsp],xmm10 + aesenc xmm2,xmm1 + lea rcx,QWORD PTR[32+r11] + pxor xmm4,xmm0 + movdqa XMMWORD PTR[16+rsp],xmm11 + aesenc xmm3,xmm1 + pxor xmm5,xmm0 + movdqa XMMWORD PTR[32+rsp],xmm12 + aesenc xmm4,xmm1 + pxor xmm6,xmm0 + movdqa XMMWORD PTR[48+rsp],xmm13 + aesenc xmm5,xmm1 + pxor xmm7,xmm0 + movups xmm0,XMMWORD PTR[rcx] + dec eax + movdqa XMMWORD PTR[64+rsp],xmm14 + aesenc xmm6,xmm1 + movdqa XMMWORD PTR[80+rsp],xmm15 + aesenc xmm7,xmm1 + pxor xmm14,xmm14 + pcmpgtd xmm14,xmm15 + jmp $L$xts_enc_loop6_enter + +ALIGN 16 +$L$xts_enc_loop6:: + aesenc xmm2,xmm1 + aesenc xmm3,xmm1 + dec eax + aesenc xmm4,xmm1 + aesenc xmm5,xmm1 + aesenc xmm6,xmm1 + aesenc xmm7,xmm1 +$L$xts_enc_loop6_enter:: + movups xmm1,XMMWORD PTR[16+rcx] + aesenc xmm2,xmm0 + aesenc xmm3,xmm0 + lea rcx,QWORD PTR[32+rcx] + aesenc xmm4,xmm0 + aesenc xmm5,xmm0 + aesenc xmm6,xmm0 + aesenc xmm7,xmm0 + movups xmm0,XMMWORD PTR[rcx] + jnz $L$xts_enc_loop6 + + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + paddq xmm15,xmm15 + aesenc xmm2,xmm1 + pand xmm9,xmm8 + aesenc xmm3,xmm1 + pcmpgtd xmm14,xmm15 + aesenc xmm4,xmm1 + pxor xmm15,xmm9 + aesenc xmm5,xmm1 + aesenc xmm6,xmm1 + aesenc xmm7,xmm1 + movups xmm1,XMMWORD PTR[16+rcx] + + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm10,xmm15 + paddq xmm15,xmm15 + aesenc xmm2,xmm0 + pand xmm9,xmm8 + aesenc xmm3,xmm0 + pcmpgtd xmm14,xmm15 + aesenc xmm4,xmm0 + pxor xmm15,xmm9 + aesenc xmm5,xmm0 + aesenc xmm6,xmm0 + aesenc xmm7,xmm0 + movups xmm0,XMMWORD PTR[32+rcx] + + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm11,xmm15 + paddq xmm15,xmm15 + aesenc xmm2,xmm1 + pand xmm9,xmm8 + aesenc xmm3,xmm1 + pcmpgtd xmm14,xmm15 + aesenc xmm4,xmm1 + pxor xmm15,xmm9 + aesenc xmm5,xmm1 + aesenc xmm6,xmm1 + aesenc xmm7,xmm1 + + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm12,xmm15 + paddq xmm15,xmm15 + aesenclast xmm2,xmm0 + pand xmm9,xmm8 + aesenclast xmm3,xmm0 + pcmpgtd xmm14,xmm15 + aesenclast xmm4,xmm0 + pxor xmm15,xmm9 + aesenclast xmm5,xmm0 + aesenclast xmm6,xmm0 + aesenclast xmm7,xmm0 + + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm13,xmm15 + paddq xmm15,xmm15 + xorps xmm2,XMMWORD PTR[rsp] + pand xmm9,xmm8 + xorps xmm3,XMMWORD PTR[16+rsp] + pcmpgtd xmm14,xmm15 + pxor xmm15,xmm9 + + xorps xmm4,XMMWORD PTR[32+rsp] + movups XMMWORD PTR[rsi],xmm2 + xorps xmm5,XMMWORD PTR[48+rsp] + movups XMMWORD PTR[16+rsi],xmm3 + xorps xmm6,XMMWORD PTR[64+rsp] + movups XMMWORD PTR[32+rsi],xmm4 + xorps xmm7,XMMWORD PTR[80+rsp] + movups XMMWORD PTR[48+rsi],xmm5 + mov eax,r10d + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + lea rsi,QWORD PTR[96+rsi] + sub rdx,16*6 + jnc $L$xts_enc_grandloop + + lea eax,DWORD PTR[3+rax*1+rax] + mov rcx,r11 + mov r10d,eax + +$L$xts_enc_short:: + add rdx,16*6 + jz $L$xts_enc_done + + cmp rdx,020h + jb $L$xts_enc_one + je $L$xts_enc_two + + cmp rdx,040h + jb $L$xts_enc_three + je $L$xts_enc_four + + pshufd xmm9,xmm14,013h + movdqa xmm14,xmm15 + paddq xmm15,xmm15 + movdqu xmm2,XMMWORD PTR[rdi] + pand xmm9,xmm8 + movdqu xmm3,XMMWORD PTR[16+rdi] + pxor xmm15,xmm9 + + movdqu xmm4,XMMWORD PTR[32+rdi] + pxor xmm2,xmm10 + movdqu xmm5,XMMWORD PTR[48+rdi] + pxor xmm3,xmm11 + movdqu xmm6,XMMWORD PTR[64+rdi] + lea rdi,QWORD PTR[80+rdi] + pxor xmm4,xmm12 + pxor xmm5,xmm13 + pxor xmm6,xmm14 + + call _aesni_encrypt6 + + xorps xmm2,xmm10 + movdqa xmm10,xmm15 + xorps xmm3,xmm11 + xorps xmm4,xmm12 + movdqu XMMWORD PTR[rsi],xmm2 + xorps xmm5,xmm13 + movdqu XMMWORD PTR[16+rsi],xmm3 + xorps xmm6,xmm14 + movdqu XMMWORD PTR[32+rsi],xmm4 + movdqu XMMWORD PTR[48+rsi],xmm5 + movdqu XMMWORD PTR[64+rsi],xmm6 + lea rsi,QWORD PTR[80+rsi] + jmp $L$xts_enc_done + +ALIGN 16 +$L$xts_enc_one:: + movups xmm2,XMMWORD PTR[rdi] + lea rdi,QWORD PTR[16+rdi] + xorps xmm2,xmm10 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_9:: + aesenc xmm2,xmm1 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_enc1_9 + aesenclast xmm2,xmm1 + xorps xmm2,xmm10 + movdqa xmm10,xmm11 + movups XMMWORD PTR[rsi],xmm2 + lea rsi,QWORD PTR[16+rsi] + jmp $L$xts_enc_done + +ALIGN 16 +$L$xts_enc_two:: + movups xmm2,XMMWORD PTR[rdi] + movups xmm3,XMMWORD PTR[16+rdi] + lea rdi,QWORD PTR[32+rdi] + xorps xmm2,xmm10 + xorps xmm3,xmm11 + + call _aesni_encrypt3 + + xorps xmm2,xmm10 + movdqa xmm10,xmm12 + xorps xmm3,xmm11 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + lea rsi,QWORD PTR[32+rsi] + jmp $L$xts_enc_done + +ALIGN 16 +$L$xts_enc_three:: + movups xmm2,XMMWORD PTR[rdi] + movups xmm3,XMMWORD PTR[16+rdi] + movups xmm4,XMMWORD PTR[32+rdi] + lea rdi,QWORD PTR[48+rdi] + xorps xmm2,xmm10 + xorps xmm3,xmm11 + xorps xmm4,xmm12 + + call _aesni_encrypt3 + + xorps xmm2,xmm10 + movdqa xmm10,xmm13 + xorps xmm3,xmm11 + xorps xmm4,xmm12 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + lea rsi,QWORD PTR[48+rsi] + jmp $L$xts_enc_done + +ALIGN 16 +$L$xts_enc_four:: + movups xmm2,XMMWORD PTR[rdi] + movups xmm3,XMMWORD PTR[16+rdi] + movups xmm4,XMMWORD PTR[32+rdi] + xorps xmm2,xmm10 + movups xmm5,XMMWORD PTR[48+rdi] + lea rdi,QWORD PTR[64+rdi] + xorps xmm3,xmm11 + xorps xmm4,xmm12 + xorps xmm5,xmm13 + + call _aesni_encrypt4 + + xorps xmm2,xmm10 + movdqa xmm10,xmm15 + xorps xmm3,xmm11 + xorps xmm4,xmm12 + movups XMMWORD PTR[rsi],xmm2 + xorps xmm5,xmm13 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + lea rsi,QWORD PTR[64+rsi] + jmp $L$xts_enc_done + +ALIGN 16 +$L$xts_enc_done:: + and r9,15 + jz $L$xts_enc_ret + mov rdx,r9 + +$L$xts_enc_steal:: + movzx eax,BYTE PTR[rdi] + movzx ecx,BYTE PTR[((-16))+rsi] + lea rdi,QWORD PTR[1+rdi] + mov BYTE PTR[((-16))+rsi],al + mov BYTE PTR[rsi],cl + lea rsi,QWORD PTR[1+rsi] + sub rdx,1 + jnz $L$xts_enc_steal + + sub rsi,r9 + mov rcx,r11 + mov eax,r10d + + movups xmm2,XMMWORD PTR[((-16))+rsi] + xorps xmm2,xmm10 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_10:: + aesenc xmm2,xmm1 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_enc1_10 + aesenclast xmm2,xmm1 + xorps xmm2,xmm10 + movups XMMWORD PTR[(-16)+rsi],xmm2 + +$L$xts_enc_ret:: + movaps xmm6,XMMWORD PTR[96+rsp] + movaps xmm7,XMMWORD PTR[112+rsp] + movaps xmm8,XMMWORD PTR[128+rsp] + movaps xmm9,XMMWORD PTR[144+rsp] + movaps xmm10,XMMWORD PTR[160+rsp] + movaps xmm11,XMMWORD PTR[176+rsp] + movaps xmm12,XMMWORD PTR[192+rsp] + movaps xmm13,XMMWORD PTR[208+rsp] + movaps xmm14,XMMWORD PTR[224+rsp] + movaps xmm15,XMMWORD PTR[240+rsp] + lea rsp,QWORD PTR[264+rsp] +$L$xts_enc_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_xts_encrypt:: +aesni_xts_encrypt ENDP +PUBLIC aesni_xts_decrypt + +ALIGN 16 +aesni_xts_decrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_xts_decrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + lea rsp,QWORD PTR[((-264))+rsp] + movaps XMMWORD PTR[96+rsp],xmm6 + movaps XMMWORD PTR[112+rsp],xmm7 + movaps XMMWORD PTR[128+rsp],xmm8 + movaps XMMWORD PTR[144+rsp],xmm9 + movaps XMMWORD PTR[160+rsp],xmm10 + movaps XMMWORD PTR[176+rsp],xmm11 + movaps XMMWORD PTR[192+rsp],xmm12 + movaps XMMWORD PTR[208+rsp],xmm13 + movaps XMMWORD PTR[224+rsp],xmm14 + movaps XMMWORD PTR[240+rsp],xmm15 +$L$xts_dec_body:: + movups xmm15,XMMWORD PTR[r9] + mov eax,DWORD PTR[240+r8] + mov r10d,DWORD PTR[240+rcx] + movups xmm0,XMMWORD PTR[r8] + movups xmm1,XMMWORD PTR[16+r8] + lea r8,QWORD PTR[32+r8] + xorps xmm15,xmm0 +$L$oop_enc1_11:: + aesenc xmm15,xmm1 + dec eax + movups xmm1,XMMWORD PTR[r8] + lea r8,QWORD PTR[16+r8] + jnz $L$oop_enc1_11 + aesenclast xmm15,xmm1 + xor eax,eax + test rdx,15 + setnz al + shl rax,4 + sub rdx,rax + + mov r11,rcx + mov eax,r10d + mov r9,rdx + and rdx,-16 + + movdqa xmm8,XMMWORD PTR[$L$xts_magic] + pxor xmm14,xmm14 + pcmpgtd xmm14,xmm15 + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm10,xmm15 + paddq xmm15,xmm15 + pand xmm9,xmm8 + pcmpgtd xmm14,xmm15 + pxor xmm15,xmm9 + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm11,xmm15 + paddq xmm15,xmm15 + pand xmm9,xmm8 + pcmpgtd xmm14,xmm15 + pxor xmm15,xmm9 + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm12,xmm15 + paddq xmm15,xmm15 + pand xmm9,xmm8 + pcmpgtd xmm14,xmm15 + pxor xmm15,xmm9 + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm13,xmm15 + paddq xmm15,xmm15 + pand xmm9,xmm8 + pcmpgtd xmm14,xmm15 + pxor xmm15,xmm9 + sub rdx,16*6 + jc $L$xts_dec_short + + shr eax,1 + sub eax,1 + mov r10d,eax + jmp $L$xts_dec_grandloop + +ALIGN 16 +$L$xts_dec_grandloop:: + pshufd xmm9,xmm14,013h + movdqa xmm14,xmm15 + paddq xmm15,xmm15 + movdqu xmm2,XMMWORD PTR[rdi] + pand xmm9,xmm8 + movdqu xmm3,XMMWORD PTR[16+rdi] + pxor xmm15,xmm9 + + movdqu xmm4,XMMWORD PTR[32+rdi] + pxor xmm2,xmm10 + movdqu xmm5,XMMWORD PTR[48+rdi] + pxor xmm3,xmm11 + movdqu xmm6,XMMWORD PTR[64+rdi] + pxor xmm4,xmm12 + movdqu xmm7,XMMWORD PTR[80+rdi] + lea rdi,QWORD PTR[96+rdi] + pxor xmm5,xmm13 + movups xmm0,XMMWORD PTR[r11] + pxor xmm6,xmm14 + pxor xmm7,xmm15 + + + + movups xmm1,XMMWORD PTR[16+r11] + pxor xmm2,xmm0 + pxor xmm3,xmm0 + movdqa XMMWORD PTR[rsp],xmm10 + aesdec xmm2,xmm1 + lea rcx,QWORD PTR[32+r11] + pxor xmm4,xmm0 + movdqa XMMWORD PTR[16+rsp],xmm11 + aesdec xmm3,xmm1 + pxor xmm5,xmm0 + movdqa XMMWORD PTR[32+rsp],xmm12 + aesdec xmm4,xmm1 + pxor xmm6,xmm0 + movdqa XMMWORD PTR[48+rsp],xmm13 + aesdec xmm5,xmm1 + pxor xmm7,xmm0 + movups xmm0,XMMWORD PTR[rcx] + dec eax + movdqa XMMWORD PTR[64+rsp],xmm14 + aesdec xmm6,xmm1 + movdqa XMMWORD PTR[80+rsp],xmm15 + aesdec xmm7,xmm1 + pxor xmm14,xmm14 + pcmpgtd xmm14,xmm15 + jmp $L$xts_dec_loop6_enter + +ALIGN 16 +$L$xts_dec_loop6:: + aesdec xmm2,xmm1 + aesdec xmm3,xmm1 + dec eax + aesdec xmm4,xmm1 + aesdec xmm5,xmm1 + aesdec xmm6,xmm1 + aesdec xmm7,xmm1 +$L$xts_dec_loop6_enter:: + movups xmm1,XMMWORD PTR[16+rcx] + aesdec xmm2,xmm0 + aesdec xmm3,xmm0 + lea rcx,QWORD PTR[32+rcx] + aesdec xmm4,xmm0 + aesdec xmm5,xmm0 + aesdec xmm6,xmm0 + aesdec xmm7,xmm0 + movups xmm0,XMMWORD PTR[rcx] + jnz $L$xts_dec_loop6 + + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + paddq xmm15,xmm15 + aesdec xmm2,xmm1 + pand xmm9,xmm8 + aesdec xmm3,xmm1 + pcmpgtd xmm14,xmm15 + aesdec xmm4,xmm1 + pxor xmm15,xmm9 + aesdec xmm5,xmm1 + aesdec xmm6,xmm1 + aesdec xmm7,xmm1 + movups xmm1,XMMWORD PTR[16+rcx] + + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm10,xmm15 + paddq xmm15,xmm15 + aesdec xmm2,xmm0 + pand xmm9,xmm8 + aesdec xmm3,xmm0 + pcmpgtd xmm14,xmm15 + aesdec xmm4,xmm0 + pxor xmm15,xmm9 + aesdec xmm5,xmm0 + aesdec xmm6,xmm0 + aesdec xmm7,xmm0 + movups xmm0,XMMWORD PTR[32+rcx] + + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm11,xmm15 + paddq xmm15,xmm15 + aesdec xmm2,xmm1 + pand xmm9,xmm8 + aesdec xmm3,xmm1 + pcmpgtd xmm14,xmm15 + aesdec xmm4,xmm1 + pxor xmm15,xmm9 + aesdec xmm5,xmm1 + aesdec xmm6,xmm1 + aesdec xmm7,xmm1 + + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm12,xmm15 + paddq xmm15,xmm15 + aesdeclast xmm2,xmm0 + pand xmm9,xmm8 + aesdeclast xmm3,xmm0 + pcmpgtd xmm14,xmm15 + aesdeclast xmm4,xmm0 + pxor xmm15,xmm9 + aesdeclast xmm5,xmm0 + aesdeclast xmm6,xmm0 + aesdeclast xmm7,xmm0 + + pshufd xmm9,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm13,xmm15 + paddq xmm15,xmm15 + xorps xmm2,XMMWORD PTR[rsp] + pand xmm9,xmm8 + xorps xmm3,XMMWORD PTR[16+rsp] + pcmpgtd xmm14,xmm15 + pxor xmm15,xmm9 + + xorps xmm4,XMMWORD PTR[32+rsp] + movups XMMWORD PTR[rsi],xmm2 + xorps xmm5,XMMWORD PTR[48+rsp] + movups XMMWORD PTR[16+rsi],xmm3 + xorps xmm6,XMMWORD PTR[64+rsp] + movups XMMWORD PTR[32+rsi],xmm4 + xorps xmm7,XMMWORD PTR[80+rsp] + movups XMMWORD PTR[48+rsi],xmm5 + mov eax,r10d + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + lea rsi,QWORD PTR[96+rsi] + sub rdx,16*6 + jnc $L$xts_dec_grandloop + + lea eax,DWORD PTR[3+rax*1+rax] + mov rcx,r11 + mov r10d,eax + +$L$xts_dec_short:: + add rdx,16*6 + jz $L$xts_dec_done + + cmp rdx,020h + jb $L$xts_dec_one + je $L$xts_dec_two + + cmp rdx,040h + jb $L$xts_dec_three + je $L$xts_dec_four + + pshufd xmm9,xmm14,013h + movdqa xmm14,xmm15 + paddq xmm15,xmm15 + movdqu xmm2,XMMWORD PTR[rdi] + pand xmm9,xmm8 + movdqu xmm3,XMMWORD PTR[16+rdi] + pxor xmm15,xmm9 + + movdqu xmm4,XMMWORD PTR[32+rdi] + pxor xmm2,xmm10 + movdqu xmm5,XMMWORD PTR[48+rdi] + pxor xmm3,xmm11 + movdqu xmm6,XMMWORD PTR[64+rdi] + lea rdi,QWORD PTR[80+rdi] + pxor xmm4,xmm12 + pxor xmm5,xmm13 + pxor xmm6,xmm14 + + call _aesni_decrypt6 + + xorps xmm2,xmm10 + xorps xmm3,xmm11 + xorps xmm4,xmm12 + movdqu XMMWORD PTR[rsi],xmm2 + xorps xmm5,xmm13 + movdqu XMMWORD PTR[16+rsi],xmm3 + xorps xmm6,xmm14 + movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm14,xmm14 + movdqu XMMWORD PTR[48+rsi],xmm5 + pcmpgtd xmm14,xmm15 + movdqu XMMWORD PTR[64+rsi],xmm6 + lea rsi,QWORD PTR[80+rsi] + pshufd xmm11,xmm14,013h + and r9,15 + jz $L$xts_dec_ret + + movdqa xmm10,xmm15 + paddq xmm15,xmm15 + pand xmm11,xmm8 + pxor xmm11,xmm15 + jmp $L$xts_dec_done2 + +ALIGN 16 +$L$xts_dec_one:: + movups xmm2,XMMWORD PTR[rdi] + lea rdi,QWORD PTR[16+rdi] + xorps xmm2,xmm10 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_12:: + aesdec xmm2,xmm1 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_dec1_12 + aesdeclast xmm2,xmm1 + xorps xmm2,xmm10 + movdqa xmm10,xmm11 + movups XMMWORD PTR[rsi],xmm2 + movdqa xmm11,xmm12 + lea rsi,QWORD PTR[16+rsi] + jmp $L$xts_dec_done + +ALIGN 16 +$L$xts_dec_two:: + movups xmm2,XMMWORD PTR[rdi] + movups xmm3,XMMWORD PTR[16+rdi] + lea rdi,QWORD PTR[32+rdi] + xorps xmm2,xmm10 + xorps xmm3,xmm11 + + call _aesni_decrypt3 + + xorps xmm2,xmm10 + movdqa xmm10,xmm12 + xorps xmm3,xmm11 + movdqa xmm11,xmm13 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + lea rsi,QWORD PTR[32+rsi] + jmp $L$xts_dec_done + +ALIGN 16 +$L$xts_dec_three:: + movups xmm2,XMMWORD PTR[rdi] + movups xmm3,XMMWORD PTR[16+rdi] + movups xmm4,XMMWORD PTR[32+rdi] + lea rdi,QWORD PTR[48+rdi] + xorps xmm2,xmm10 + xorps xmm3,xmm11 + xorps xmm4,xmm12 + + call _aesni_decrypt3 + + xorps xmm2,xmm10 + movdqa xmm10,xmm13 + xorps xmm3,xmm11 + movdqa xmm11,xmm15 + xorps xmm4,xmm12 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + lea rsi,QWORD PTR[48+rsi] + jmp $L$xts_dec_done + +ALIGN 16 +$L$xts_dec_four:: + pshufd xmm9,xmm14,013h + movdqa xmm14,xmm15 + paddq xmm15,xmm15 + movups xmm2,XMMWORD PTR[rdi] + pand xmm9,xmm8 + movups xmm3,XMMWORD PTR[16+rdi] + pxor xmm15,xmm9 + + movups xmm4,XMMWORD PTR[32+rdi] + xorps xmm2,xmm10 + movups xmm5,XMMWORD PTR[48+rdi] + lea rdi,QWORD PTR[64+rdi] + xorps xmm3,xmm11 + xorps xmm4,xmm12 + xorps xmm5,xmm13 + + call _aesni_decrypt4 + + xorps xmm2,xmm10 + movdqa xmm10,xmm14 + xorps xmm3,xmm11 + movdqa xmm11,xmm15 + xorps xmm4,xmm12 + movups XMMWORD PTR[rsi],xmm2 + xorps xmm5,xmm13 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + lea rsi,QWORD PTR[64+rsi] + jmp $L$xts_dec_done + +ALIGN 16 +$L$xts_dec_done:: + and r9,15 + jz $L$xts_dec_ret +$L$xts_dec_done2:: + mov rdx,r9 + mov rcx,r11 + mov eax,r10d + + movups xmm2,XMMWORD PTR[rdi] + xorps xmm2,xmm11 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_13:: + aesdec xmm2,xmm1 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_dec1_13 + aesdeclast xmm2,xmm1 + xorps xmm2,xmm11 + movups XMMWORD PTR[rsi],xmm2 + +$L$xts_dec_steal:: + movzx eax,BYTE PTR[16+rdi] + movzx ecx,BYTE PTR[rsi] + lea rdi,QWORD PTR[1+rdi] + mov BYTE PTR[rsi],al + mov BYTE PTR[16+rsi],cl + lea rsi,QWORD PTR[1+rsi] + sub rdx,1 + jnz $L$xts_dec_steal + + sub rsi,r9 + mov rcx,r11 + mov eax,r10d + + movups xmm2,XMMWORD PTR[rsi] + xorps xmm2,xmm10 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_14:: + aesdec xmm2,xmm1 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_dec1_14 + aesdeclast xmm2,xmm1 + xorps xmm2,xmm10 + movups XMMWORD PTR[rsi],xmm2 + +$L$xts_dec_ret:: + movaps xmm6,XMMWORD PTR[96+rsp] + movaps xmm7,XMMWORD PTR[112+rsp] + movaps xmm8,XMMWORD PTR[128+rsp] + movaps xmm9,XMMWORD PTR[144+rsp] + movaps xmm10,XMMWORD PTR[160+rsp] + movaps xmm11,XMMWORD PTR[176+rsp] + movaps xmm12,XMMWORD PTR[192+rsp] + movaps xmm13,XMMWORD PTR[208+rsp] + movaps xmm14,XMMWORD PTR[224+rsp] + movaps xmm15,XMMWORD PTR[240+rsp] + lea rsp,QWORD PTR[264+rsp] +$L$xts_dec_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_xts_decrypt:: +aesni_xts_decrypt ENDP +PUBLIC aesni_cbc_encrypt + +ALIGN 16 +aesni_cbc_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_cbc_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + test rdx,rdx + jz $L$cbc_ret + + mov r10d,DWORD PTR[240+rcx] + mov r11,rcx + test r9d,r9d + jz $L$cbc_decrypt + + movups xmm2,XMMWORD PTR[r8] + mov eax,r10d + cmp rdx,16 + jb $L$cbc_enc_tail + sub rdx,16 + jmp $L$cbc_enc_loop +ALIGN 16 +$L$cbc_enc_loop:: + movups xmm3,XMMWORD PTR[rdi] + lea rdi,QWORD PTR[16+rdi] + + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + xorps xmm3,xmm0 + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm3 +$L$oop_enc1_15:: + aesenc xmm2,xmm1 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_enc1_15 + aesenclast xmm2,xmm1 + mov eax,r10d + mov rcx,r11 + movups XMMWORD PTR[rsi],xmm2 + lea rsi,QWORD PTR[16+rsi] + sub rdx,16 + jnc $L$cbc_enc_loop + add rdx,16 + jnz $L$cbc_enc_tail + movups XMMWORD PTR[r8],xmm2 + jmp $L$cbc_ret + +$L$cbc_enc_tail:: + mov rcx,rdx + xchg rsi,rdi + DD 09066A4F3h + mov ecx,16 + sub rcx,rdx + xor eax,eax + DD 09066AAF3h + lea rdi,QWORD PTR[((-16))+rdi] + mov eax,r10d + mov rsi,rdi + mov rcx,r11 + xor rdx,rdx + jmp $L$cbc_enc_loop + +ALIGN 16 +$L$cbc_decrypt:: + lea rsp,QWORD PTR[((-88))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 +$L$cbc_decrypt_body:: + movups xmm9,XMMWORD PTR[r8] + mov eax,r10d + cmp rdx,070h + jbe $L$cbc_dec_tail + shr r10d,1 + sub rdx,070h + mov eax,r10d + movaps XMMWORD PTR[64+rsp],xmm9 + jmp $L$cbc_dec_loop8_enter +ALIGN 16 +$L$cbc_dec_loop8:: + movaps XMMWORD PTR[64+rsp],xmm0 + movups XMMWORD PTR[rsi],xmm9 + lea rsi,QWORD PTR[16+rsi] +$L$cbc_dec_loop8_enter:: + movups xmm0,XMMWORD PTR[rcx] + movups xmm2,XMMWORD PTR[rdi] + movups xmm3,XMMWORD PTR[16+rdi] + movups xmm1,XMMWORD PTR[16+rcx] + + lea rcx,QWORD PTR[32+rcx] + movdqu xmm4,XMMWORD PTR[32+rdi] + xorps xmm2,xmm0 + movdqu xmm5,XMMWORD PTR[48+rdi] + xorps xmm3,xmm0 + movdqu xmm6,XMMWORD PTR[64+rdi] + aesdec xmm2,xmm1 + pxor xmm4,xmm0 + movdqu xmm7,XMMWORD PTR[80+rdi] + aesdec xmm3,xmm1 + pxor xmm5,xmm0 + movdqu xmm8,XMMWORD PTR[96+rdi] + aesdec xmm4,xmm1 + pxor xmm6,xmm0 + movdqu xmm9,XMMWORD PTR[112+rdi] + aesdec xmm5,xmm1 + pxor xmm7,xmm0 + dec eax + aesdec xmm6,xmm1 + pxor xmm8,xmm0 + aesdec xmm7,xmm1 + pxor xmm9,xmm0 + movups xmm0,XMMWORD PTR[rcx] + aesdec xmm8,xmm1 + aesdec xmm9,xmm1 + movups xmm1,XMMWORD PTR[16+rcx] + + call $L$dec_loop8_enter + + movups xmm1,XMMWORD PTR[rdi] + movups xmm0,XMMWORD PTR[16+rdi] + xorps xmm2,XMMWORD PTR[64+rsp] + xorps xmm3,xmm1 + movups xmm1,XMMWORD PTR[32+rdi] + xorps xmm4,xmm0 + movups xmm0,XMMWORD PTR[48+rdi] + xorps xmm5,xmm1 + movups xmm1,XMMWORD PTR[64+rdi] + xorps xmm6,xmm0 + movups xmm0,XMMWORD PTR[80+rdi] + xorps xmm7,xmm1 + movups xmm1,XMMWORD PTR[96+rdi] + xorps xmm8,xmm0 + movups xmm0,XMMWORD PTR[112+rdi] + xorps xmm9,xmm1 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + mov eax,r10d + movups XMMWORD PTR[64+rsi],xmm6 + mov rcx,r11 + movups XMMWORD PTR[80+rsi],xmm7 + lea rdi,QWORD PTR[128+rdi] + movups XMMWORD PTR[96+rsi],xmm8 + lea rsi,QWORD PTR[112+rsi] + sub rdx,080h + ja $L$cbc_dec_loop8 + + movaps xmm2,xmm9 + movaps xmm9,xmm0 + add rdx,070h + jle $L$cbc_dec_tail_collected + movups XMMWORD PTR[rsi],xmm2 + lea eax,DWORD PTR[1+r10*1+r10] + lea rsi,QWORD PTR[16+rsi] +$L$cbc_dec_tail:: + movups xmm2,XMMWORD PTR[rdi] + movaps xmm8,xmm2 + cmp rdx,010h + jbe $L$cbc_dec_one + + movups xmm3,XMMWORD PTR[16+rdi] + movaps xmm7,xmm3 + cmp rdx,020h + jbe $L$cbc_dec_two + + movups xmm4,XMMWORD PTR[32+rdi] + movaps xmm6,xmm4 + cmp rdx,030h + jbe $L$cbc_dec_three + + movups xmm5,XMMWORD PTR[48+rdi] + cmp rdx,040h + jbe $L$cbc_dec_four + + movups xmm6,XMMWORD PTR[64+rdi] + cmp rdx,050h + jbe $L$cbc_dec_five + + movups xmm7,XMMWORD PTR[80+rdi] + cmp rdx,060h + jbe $L$cbc_dec_six + + movups xmm8,XMMWORD PTR[96+rdi] + movaps XMMWORD PTR[64+rsp],xmm9 + call _aesni_decrypt8 + movups xmm1,XMMWORD PTR[rdi] + movups xmm0,XMMWORD PTR[16+rdi] + xorps xmm2,XMMWORD PTR[64+rsp] + xorps xmm3,xmm1 + movups xmm1,XMMWORD PTR[32+rdi] + xorps xmm4,xmm0 + movups xmm0,XMMWORD PTR[48+rdi] + xorps xmm5,xmm1 + movups xmm1,XMMWORD PTR[64+rdi] + xorps xmm6,xmm0 + movups xmm0,XMMWORD PTR[80+rdi] + xorps xmm7,xmm1 + movups xmm9,XMMWORD PTR[96+rdi] + xorps xmm8,xmm0 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + lea rsi,QWORD PTR[96+rsi] + movaps xmm2,xmm8 + sub rdx,070h + jmp $L$cbc_dec_tail_collected +ALIGN 16 +$L$cbc_dec_one:: + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_16:: + aesdec xmm2,xmm1 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_dec1_16 + aesdeclast xmm2,xmm1 + xorps xmm2,xmm9 + movaps xmm9,xmm8 + sub rdx,010h + jmp $L$cbc_dec_tail_collected +ALIGN 16 +$L$cbc_dec_two:: + xorps xmm4,xmm4 + call _aesni_decrypt3 + xorps xmm2,xmm9 + xorps xmm3,xmm8 + movups XMMWORD PTR[rsi],xmm2 + movaps xmm9,xmm7 + movaps xmm2,xmm3 + lea rsi,QWORD PTR[16+rsi] + sub rdx,020h + jmp $L$cbc_dec_tail_collected +ALIGN 16 +$L$cbc_dec_three:: + call _aesni_decrypt3 + xorps xmm2,xmm9 + xorps xmm3,xmm8 + movups XMMWORD PTR[rsi],xmm2 + xorps xmm4,xmm7 + movups XMMWORD PTR[16+rsi],xmm3 + movaps xmm9,xmm6 + movaps xmm2,xmm4 + lea rsi,QWORD PTR[32+rsi] + sub rdx,030h + jmp $L$cbc_dec_tail_collected +ALIGN 16 +$L$cbc_dec_four:: + call _aesni_decrypt4 + xorps xmm2,xmm9 + movups xmm9,XMMWORD PTR[48+rdi] + xorps xmm3,xmm8 + movups XMMWORD PTR[rsi],xmm2 + xorps xmm4,xmm7 + movups XMMWORD PTR[16+rsi],xmm3 + xorps xmm5,xmm6 + movups XMMWORD PTR[32+rsi],xmm4 + movaps xmm2,xmm5 + lea rsi,QWORD PTR[48+rsi] + sub rdx,040h + jmp $L$cbc_dec_tail_collected +ALIGN 16 +$L$cbc_dec_five:: + xorps xmm7,xmm7 + call _aesni_decrypt6 + movups xmm1,XMMWORD PTR[16+rdi] + movups xmm0,XMMWORD PTR[32+rdi] + xorps xmm2,xmm9 + xorps xmm3,xmm8 + xorps xmm4,xmm1 + movups xmm1,XMMWORD PTR[48+rdi] + xorps xmm5,xmm0 + movups xmm9,XMMWORD PTR[64+rdi] + xorps xmm6,xmm1 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + lea rsi,QWORD PTR[64+rsi] + movaps xmm2,xmm6 + sub rdx,050h + jmp $L$cbc_dec_tail_collected +ALIGN 16 +$L$cbc_dec_six:: + call _aesni_decrypt6 + movups xmm1,XMMWORD PTR[16+rdi] + movups xmm0,XMMWORD PTR[32+rdi] + xorps xmm2,xmm9 + xorps xmm3,xmm8 + xorps xmm4,xmm1 + movups xmm1,XMMWORD PTR[48+rdi] + xorps xmm5,xmm0 + movups xmm0,XMMWORD PTR[64+rdi] + xorps xmm6,xmm1 + movups xmm9,XMMWORD PTR[80+rdi] + xorps xmm7,xmm0 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + lea rsi,QWORD PTR[80+rsi] + movaps xmm2,xmm7 + sub rdx,060h + jmp $L$cbc_dec_tail_collected +ALIGN 16 +$L$cbc_dec_tail_collected:: + and rdx,15 + movups XMMWORD PTR[r8],xmm9 + jnz $L$cbc_dec_tail_partial + movups XMMWORD PTR[rsi],xmm2 + jmp $L$cbc_dec_ret +ALIGN 16 +$L$cbc_dec_tail_partial:: + movaps XMMWORD PTR[64+rsp],xmm2 + mov rcx,16 + mov rdi,rsi + sub rcx,rdx + lea rsi,QWORD PTR[64+rsp] + DD 09066A4F3h + +$L$cbc_dec_ret:: + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + movaps xmm8,XMMWORD PTR[32+rsp] + movaps xmm9,XMMWORD PTR[48+rsp] + lea rsp,QWORD PTR[88+rsp] +$L$cbc_ret:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_cbc_encrypt:: +aesni_cbc_encrypt ENDP +PUBLIC aesni_set_decrypt_key + +ALIGN 16 +aesni_set_decrypt_key PROC PUBLIC + sub rsp,8 + call __aesni_set_encrypt_key + shl edx,4 + test eax,eax + jnz $L$dec_key_ret + lea rcx,QWORD PTR[16+rdx*1+r8] + + movups xmm0,XMMWORD PTR[r8] + movups xmm1,XMMWORD PTR[rcx] + movups XMMWORD PTR[rcx],xmm0 + movups XMMWORD PTR[r8],xmm1 + lea r8,QWORD PTR[16+r8] + lea rcx,QWORD PTR[((-16))+rcx] + +$L$dec_key_inverse:: + movups xmm0,XMMWORD PTR[r8] + movups xmm1,XMMWORD PTR[rcx] + aesimc xmm0,xmm0 + aesimc xmm1,xmm1 + lea r8,QWORD PTR[16+r8] + lea rcx,QWORD PTR[((-16))+rcx] + movups XMMWORD PTR[16+rcx],xmm0 + movups XMMWORD PTR[(-16)+r8],xmm1 + cmp rcx,r8 + ja $L$dec_key_inverse + + movups xmm0,XMMWORD PTR[r8] + aesimc xmm0,xmm0 + movups XMMWORD PTR[rcx],xmm0 +$L$dec_key_ret:: + add rsp,8 + DB 0F3h,0C3h ;repret +$L$SEH_end_set_decrypt_key:: +aesni_set_decrypt_key ENDP +PUBLIC aesni_set_encrypt_key + +ALIGN 16 +aesni_set_encrypt_key PROC PUBLIC +__aesni_set_encrypt_key:: + sub rsp,8 + mov rax,-1 + test rcx,rcx + jz $L$enc_key_ret + test r8,r8 + jz $L$enc_key_ret + + movups xmm0,XMMWORD PTR[rcx] + xorps xmm4,xmm4 + lea rax,QWORD PTR[16+r8] + cmp edx,256 + je $L$14rounds + cmp edx,192 + je $L$12rounds + cmp edx,128 + jne $L$bad_keybits + +$L$10rounds:: + mov edx,9 + movups XMMWORD PTR[r8],xmm0 + aeskeygenassist xmm1,xmm0,01h + call $L$key_expansion_128_cold + aeskeygenassist xmm1,xmm0,02h + call $L$key_expansion_128 + aeskeygenassist xmm1,xmm0,04h + call $L$key_expansion_128 + aeskeygenassist xmm1,xmm0,08h + call $L$key_expansion_128 + aeskeygenassist xmm1,xmm0,010h + call $L$key_expansion_128 + aeskeygenassist xmm1,xmm0,020h + call $L$key_expansion_128 + aeskeygenassist xmm1,xmm0,040h + call $L$key_expansion_128 + aeskeygenassist xmm1,xmm0,080h + call $L$key_expansion_128 + aeskeygenassist xmm1,xmm0,01bh + call $L$key_expansion_128 + aeskeygenassist xmm1,xmm0,036h + call $L$key_expansion_128 + movups XMMWORD PTR[rax],xmm0 + mov DWORD PTR[80+rax],edx + xor eax,eax + jmp $L$enc_key_ret + +ALIGN 16 +$L$12rounds:: + movq xmm2,QWORD PTR[16+rcx] + mov edx,11 + movups XMMWORD PTR[r8],xmm0 + aeskeygenassist xmm1,xmm2,01h + call $L$key_expansion_192a_cold + aeskeygenassist xmm1,xmm2,02h + call $L$key_expansion_192b + aeskeygenassist xmm1,xmm2,04h + call $L$key_expansion_192a + aeskeygenassist xmm1,xmm2,08h + call $L$key_expansion_192b + aeskeygenassist xmm1,xmm2,010h + call $L$key_expansion_192a + aeskeygenassist xmm1,xmm2,020h + call $L$key_expansion_192b + aeskeygenassist xmm1,xmm2,040h + call $L$key_expansion_192a + aeskeygenassist xmm1,xmm2,080h + call $L$key_expansion_192b + movups XMMWORD PTR[rax],xmm0 + mov DWORD PTR[48+rax],edx + xor rax,rax + jmp $L$enc_key_ret + +ALIGN 16 +$L$14rounds:: + movups xmm2,XMMWORD PTR[16+rcx] + mov edx,13 + lea rax,QWORD PTR[16+rax] + movups XMMWORD PTR[r8],xmm0 + movups XMMWORD PTR[16+r8],xmm2 + aeskeygenassist xmm1,xmm2,01h + call $L$key_expansion_256a_cold + aeskeygenassist xmm1,xmm0,01h + call $L$key_expansion_256b + aeskeygenassist xmm1,xmm2,02h + call $L$key_expansion_256a + aeskeygenassist xmm1,xmm0,02h + call $L$key_expansion_256b + aeskeygenassist xmm1,xmm2,04h + call $L$key_expansion_256a + aeskeygenassist xmm1,xmm0,04h + call $L$key_expansion_256b + aeskeygenassist xmm1,xmm2,08h + call $L$key_expansion_256a + aeskeygenassist xmm1,xmm0,08h + call $L$key_expansion_256b + aeskeygenassist xmm1,xmm2,010h + call $L$key_expansion_256a + aeskeygenassist xmm1,xmm0,010h + call $L$key_expansion_256b + aeskeygenassist xmm1,xmm2,020h + call $L$key_expansion_256a + aeskeygenassist xmm1,xmm0,020h + call $L$key_expansion_256b + aeskeygenassist xmm1,xmm2,040h + call $L$key_expansion_256a + movups XMMWORD PTR[rax],xmm0 + mov DWORD PTR[16+rax],edx + xor rax,rax + jmp $L$enc_key_ret + +ALIGN 16 +$L$bad_keybits:: + mov rax,-2 +$L$enc_key_ret:: + add rsp,8 + DB 0F3h,0C3h ;repret +$L$SEH_end_set_encrypt_key:: + +ALIGN 16 +$L$key_expansion_128:: + movups XMMWORD PTR[rax],xmm0 + lea rax,QWORD PTR[16+rax] +$L$key_expansion_128_cold:: + shufps xmm4,xmm0,16 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + xorps xmm0,xmm4 + shufps xmm1,xmm1,255 + xorps xmm0,xmm1 + DB 0F3h,0C3h ;repret + +ALIGN 16 +$L$key_expansion_192a:: + movups XMMWORD PTR[rax],xmm0 + lea rax,QWORD PTR[16+rax] +$L$key_expansion_192a_cold:: + movaps xmm5,xmm2 +$L$key_expansion_192b_warm:: + shufps xmm4,xmm0,16 + movdqa xmm3,xmm2 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + pslldq xmm3,4 + xorps xmm0,xmm4 + pshufd xmm1,xmm1,85 + pxor xmm2,xmm3 + pxor xmm0,xmm1 + pshufd xmm3,xmm0,255 + pxor xmm2,xmm3 + DB 0F3h,0C3h ;repret + +ALIGN 16 +$L$key_expansion_192b:: + movaps xmm3,xmm0 + shufps xmm5,xmm0,68 + movups XMMWORD PTR[rax],xmm5 + shufps xmm3,xmm2,78 + movups XMMWORD PTR[16+rax],xmm3 + lea rax,QWORD PTR[32+rax] + jmp $L$key_expansion_192b_warm + +ALIGN 16 +$L$key_expansion_256a:: + movups XMMWORD PTR[rax],xmm2 + lea rax,QWORD PTR[16+rax] +$L$key_expansion_256a_cold:: + shufps xmm4,xmm0,16 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + xorps xmm0,xmm4 + shufps xmm1,xmm1,255 + xorps xmm0,xmm1 + DB 0F3h,0C3h ;repret + +ALIGN 16 +$L$key_expansion_256b:: + movups XMMWORD PTR[rax],xmm0 + lea rax,QWORD PTR[16+rax] + + shufps xmm4,xmm2,16 + xorps xmm2,xmm4 + shufps xmm4,xmm2,140 + xorps xmm2,xmm4 + shufps xmm1,xmm1,170 + xorps xmm2,xmm1 + DB 0F3h,0C3h ;repret +aesni_set_encrypt_key ENDP + +ALIGN 64 +$L$bswap_mask:: +DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +$L$increment32:: + DD 6,6,6,0 +$L$increment64:: + DD 1,0,0,0 +$L$xts_magic:: + DD 087h,0,1,0 + +DB 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 +DB 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 +DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +DB 115,108,46,111,114,103,62,0 +ALIGN 64 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +ecb_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[152+r8] + + jmp $L$common_seh_tail +ecb_se_handler ENDP + + +ALIGN 16 +ccm64_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + lea rsi,QWORD PTR[rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,8 + DD 0a548f3fch + lea rax,QWORD PTR[88+rax] + + jmp $L$common_seh_tail +ccm64_se_handler ENDP + + +ALIGN 16 +ctr32_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$ctr32_body] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + lea r10,QWORD PTR[$L$ctr32_ret] + cmp rbx,r10 + jae $L$common_seh_tail + + lea rsi,QWORD PTR[32+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + lea rax,QWORD PTR[200+rax] + + jmp $L$common_seh_tail +ctr32_se_handler ENDP + + +ALIGN 16 +xts_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + lea rsi,QWORD PTR[96+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + lea rax,QWORD PTR[((104+160))+rax] + + jmp $L$common_seh_tail +xts_se_handler ENDP + +ALIGN 16 +cbc_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[152+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$cbc_decrypt] + cmp rbx,r10 + jb $L$common_seh_tail + + lea r10,QWORD PTR[$L$cbc_decrypt_body] + cmp rbx,r10 + jb $L$restore_cbc_rax + + lea r10,QWORD PTR[$L$cbc_ret] + cmp rbx,r10 + jae $L$common_seh_tail + + lea rsi,QWORD PTR[rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,8 + DD 0a548f3fch + lea rax,QWORD PTR[88+rax] + jmp $L$common_seh_tail + +$L$restore_cbc_rax:: + mov rax,QWORD PTR[120+r8] + +$L$common_seh_tail:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +cbc_se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_aesni_ecb_encrypt + DD imagerel $L$SEH_end_aesni_ecb_encrypt + DD imagerel $L$SEH_info_ecb + + DD imagerel $L$SEH_begin_aesni_ccm64_encrypt_blocks + DD imagerel $L$SEH_end_aesni_ccm64_encrypt_blocks + DD imagerel $L$SEH_info_ccm64_enc + + DD imagerel $L$SEH_begin_aesni_ccm64_decrypt_blocks + DD imagerel $L$SEH_end_aesni_ccm64_decrypt_blocks + DD imagerel $L$SEH_info_ccm64_dec + + DD imagerel $L$SEH_begin_aesni_ctr32_encrypt_blocks + DD imagerel $L$SEH_end_aesni_ctr32_encrypt_blocks + DD imagerel $L$SEH_info_ctr32 + + DD imagerel $L$SEH_begin_aesni_xts_encrypt + DD imagerel $L$SEH_end_aesni_xts_encrypt + DD imagerel $L$SEH_info_xts_enc + + DD imagerel $L$SEH_begin_aesni_xts_decrypt + DD imagerel $L$SEH_end_aesni_xts_decrypt + DD imagerel $L$SEH_info_xts_dec + DD imagerel $L$SEH_begin_aesni_cbc_encrypt + DD imagerel $L$SEH_end_aesni_cbc_encrypt + DD imagerel $L$SEH_info_cbc + + DD imagerel aesni_set_decrypt_key + DD imagerel $L$SEH_end_set_decrypt_key + DD imagerel $L$SEH_info_key + + DD imagerel aesni_set_encrypt_key + DD imagerel $L$SEH_end_set_encrypt_key + DD imagerel $L$SEH_info_key +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_ecb:: +DB 9,0,0,0 + DD imagerel ecb_se_handler +$L$SEH_info_ccm64_enc:: +DB 9,0,0,0 + DD imagerel ccm64_se_handler + DD imagerel $L$ccm64_enc_body,imagerel $L$ccm64_enc_ret +$L$SEH_info_ccm64_dec:: +DB 9,0,0,0 + DD imagerel ccm64_se_handler + DD imagerel $L$ccm64_dec_body,imagerel $L$ccm64_dec_ret +$L$SEH_info_ctr32:: +DB 9,0,0,0 + DD imagerel ctr32_se_handler +$L$SEH_info_xts_enc:: +DB 9,0,0,0 + DD imagerel xts_se_handler + DD imagerel $L$xts_enc_body,imagerel $L$xts_enc_epilogue +$L$SEH_info_xts_dec:: +DB 9,0,0,0 + DD imagerel xts_se_handler + DD imagerel $L$xts_dec_body,imagerel $L$xts_dec_epilogue +$L$SEH_info_cbc:: +DB 9,0,0,0 + DD imagerel cbc_se_handler +$L$SEH_info_key:: +DB 001h,004h,001h,000h +DB 004h,002h,000h,000h + +.xdata ENDS +END + diff --git a/ext/libressl/crypto/aes/aesni-mingw64-x86_64.S b/ext/libressl/crypto/aes/aesni-mingw64-x86_64.S new file mode 100644 index 0000000..0a82a10 --- /dev/null +++ b/ext/libressl/crypto/aes/aesni-mingw64-x86_64.S @@ -0,0 +1,3008 @@ +#include "x86_arch.h" +.text +.globl aesni_encrypt +.def aesni_encrypt; .scl 2; .type 32; .endef +.p2align 4 +aesni_encrypt: + movups (%rcx),%xmm2 + movl 240(%r8),%eax + movups (%r8),%xmm0 + movups 16(%r8),%xmm1 + leaq 32(%r8),%r8 + xorps %xmm0,%xmm2 +.Loop_enc1_1: + aesenc %xmm1,%xmm2 + decl %eax + movups (%r8),%xmm1 + leaq 16(%r8),%r8 + jnz .Loop_enc1_1 + aesenclast %xmm1,%xmm2 + movups %xmm2,(%rdx) + retq + + +.globl aesni_decrypt +.def aesni_decrypt; .scl 2; .type 32; .endef +.p2align 4 +aesni_decrypt: + movups (%rcx),%xmm2 + movl 240(%r8),%eax + movups (%r8),%xmm0 + movups 16(%r8),%xmm1 + leaq 32(%r8),%r8 + xorps %xmm0,%xmm2 +.Loop_dec1_2: + aesdec %xmm1,%xmm2 + decl %eax + movups (%r8),%xmm1 + leaq 16(%r8),%r8 + jnz .Loop_dec1_2 + aesdeclast %xmm1,%xmm2 + movups %xmm2,(%rdx) + retq + +.def _aesni_encrypt3; .scl 3; .type 32; .endef +.p2align 4 +_aesni_encrypt3: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + movups (%rcx),%xmm0 + +.Lenc_loop3: + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + decl %eax + aesenc %xmm1,%xmm4 + movups 16(%rcx),%xmm1 + aesenc %xmm0,%xmm2 + aesenc %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesenc %xmm0,%xmm4 + movups (%rcx),%xmm0 + jnz .Lenc_loop3 + + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + aesenc %xmm1,%xmm4 + aesenclast %xmm0,%xmm2 + aesenclast %xmm0,%xmm3 + aesenclast %xmm0,%xmm4 + retq + +.def _aesni_decrypt3; .scl 3; .type 32; .endef +.p2align 4 +_aesni_decrypt3: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + movups (%rcx),%xmm0 + +.Ldec_loop3: + aesdec %xmm1,%xmm2 + aesdec %xmm1,%xmm3 + decl %eax + aesdec %xmm1,%xmm4 + movups 16(%rcx),%xmm1 + aesdec %xmm0,%xmm2 + aesdec %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesdec %xmm0,%xmm4 + movups (%rcx),%xmm0 + jnz .Ldec_loop3 + + aesdec %xmm1,%xmm2 + aesdec %xmm1,%xmm3 + aesdec %xmm1,%xmm4 + aesdeclast %xmm0,%xmm2 + aesdeclast %xmm0,%xmm3 + aesdeclast %xmm0,%xmm4 + retq + +.def _aesni_encrypt4; .scl 3; .type 32; .endef +.p2align 4 +_aesni_encrypt4: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + xorps %xmm0,%xmm5 + movups (%rcx),%xmm0 + +.Lenc_loop4: + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + decl %eax + aesenc %xmm1,%xmm4 + aesenc %xmm1,%xmm5 + movups 16(%rcx),%xmm1 + aesenc %xmm0,%xmm2 + aesenc %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesenc %xmm0,%xmm4 + aesenc %xmm0,%xmm5 + movups (%rcx),%xmm0 + jnz .Lenc_loop4 + + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + aesenc %xmm1,%xmm4 + aesenc %xmm1,%xmm5 + aesenclast %xmm0,%xmm2 + aesenclast %xmm0,%xmm3 + aesenclast %xmm0,%xmm4 + aesenclast %xmm0,%xmm5 + retq + +.def _aesni_decrypt4; .scl 3; .type 32; .endef +.p2align 4 +_aesni_decrypt4: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + xorps %xmm0,%xmm5 + movups (%rcx),%xmm0 + +.Ldec_loop4: + aesdec %xmm1,%xmm2 + aesdec %xmm1,%xmm3 + decl %eax + aesdec %xmm1,%xmm4 + aesdec %xmm1,%xmm5 + movups 16(%rcx),%xmm1 + aesdec %xmm0,%xmm2 + aesdec %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesdec %xmm0,%xmm4 + aesdec %xmm0,%xmm5 + movups (%rcx),%xmm0 + jnz .Ldec_loop4 + + aesdec %xmm1,%xmm2 + aesdec %xmm1,%xmm3 + aesdec %xmm1,%xmm4 + aesdec %xmm1,%xmm5 + aesdeclast %xmm0,%xmm2 + aesdeclast %xmm0,%xmm3 + aesdeclast %xmm0,%xmm4 + aesdeclast %xmm0,%xmm5 + retq + +.def _aesni_encrypt6; .scl 3; .type 32; .endef +.p2align 4 +_aesni_encrypt6: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + aesenc %xmm1,%xmm2 + pxor %xmm0,%xmm4 + aesenc %xmm1,%xmm3 + pxor %xmm0,%xmm5 + aesenc %xmm1,%xmm4 + pxor %xmm0,%xmm6 + aesenc %xmm1,%xmm5 + pxor %xmm0,%xmm7 + decl %eax + aesenc %xmm1,%xmm6 + movups (%rcx),%xmm0 + aesenc %xmm1,%xmm7 + jmp .Lenc_loop6_enter +.p2align 4 +.Lenc_loop6: + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + decl %eax + aesenc %xmm1,%xmm4 + aesenc %xmm1,%xmm5 + aesenc %xmm1,%xmm6 + aesenc %xmm1,%xmm7 +.Lenc_loop6_enter: + movups 16(%rcx),%xmm1 + aesenc %xmm0,%xmm2 + aesenc %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesenc %xmm0,%xmm4 + aesenc %xmm0,%xmm5 + aesenc %xmm0,%xmm6 + aesenc %xmm0,%xmm7 + movups (%rcx),%xmm0 + jnz .Lenc_loop6 + + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + aesenc %xmm1,%xmm4 + aesenc %xmm1,%xmm5 + aesenc %xmm1,%xmm6 + aesenc %xmm1,%xmm7 + aesenclast %xmm0,%xmm2 + aesenclast %xmm0,%xmm3 + aesenclast %xmm0,%xmm4 + aesenclast %xmm0,%xmm5 + aesenclast %xmm0,%xmm6 + aesenclast %xmm0,%xmm7 + retq + +.def _aesni_decrypt6; .scl 3; .type 32; .endef +.p2align 4 +_aesni_decrypt6: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + aesdec %xmm1,%xmm2 + pxor %xmm0,%xmm4 + aesdec %xmm1,%xmm3 + pxor %xmm0,%xmm5 + aesdec %xmm1,%xmm4 + pxor %xmm0,%xmm6 + aesdec %xmm1,%xmm5 + pxor %xmm0,%xmm7 + decl %eax + aesdec %xmm1,%xmm6 + movups (%rcx),%xmm0 + aesdec %xmm1,%xmm7 + jmp .Ldec_loop6_enter +.p2align 4 +.Ldec_loop6: + aesdec %xmm1,%xmm2 + aesdec %xmm1,%xmm3 + decl %eax + aesdec %xmm1,%xmm4 + aesdec %xmm1,%xmm5 + aesdec %xmm1,%xmm6 + aesdec %xmm1,%xmm7 +.Ldec_loop6_enter: + movups 16(%rcx),%xmm1 + aesdec %xmm0,%xmm2 + aesdec %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesdec %xmm0,%xmm4 + aesdec %xmm0,%xmm5 + aesdec %xmm0,%xmm6 + aesdec %xmm0,%xmm7 + movups (%rcx),%xmm0 + jnz .Ldec_loop6 + + aesdec %xmm1,%xmm2 + aesdec %xmm1,%xmm3 + aesdec %xmm1,%xmm4 + aesdec %xmm1,%xmm5 + aesdec %xmm1,%xmm6 + aesdec %xmm1,%xmm7 + aesdeclast %xmm0,%xmm2 + aesdeclast %xmm0,%xmm3 + aesdeclast %xmm0,%xmm4 + aesdeclast %xmm0,%xmm5 + aesdeclast %xmm0,%xmm6 + aesdeclast %xmm0,%xmm7 + retq + +.def _aesni_encrypt8; .scl 3; .type 32; .endef +.p2align 4 +_aesni_encrypt8: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + aesenc %xmm1,%xmm2 + pxor %xmm0,%xmm4 + aesenc %xmm1,%xmm3 + pxor %xmm0,%xmm5 + aesenc %xmm1,%xmm4 + pxor %xmm0,%xmm6 + aesenc %xmm1,%xmm5 + pxor %xmm0,%xmm7 + decl %eax + aesenc %xmm1,%xmm6 + pxor %xmm0,%xmm8 + aesenc %xmm1,%xmm7 + pxor %xmm0,%xmm9 + movups (%rcx),%xmm0 + aesenc %xmm1,%xmm8 + aesenc %xmm1,%xmm9 + movups 16(%rcx),%xmm1 + jmp .Lenc_loop8_enter +.p2align 4 +.Lenc_loop8: + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + decl %eax + aesenc %xmm1,%xmm4 + aesenc %xmm1,%xmm5 + aesenc %xmm1,%xmm6 + aesenc %xmm1,%xmm7 + aesenc %xmm1,%xmm8 + aesenc %xmm1,%xmm9 + movups 16(%rcx),%xmm1 +.Lenc_loop8_enter: + aesenc %xmm0,%xmm2 + aesenc %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesenc %xmm0,%xmm4 + aesenc %xmm0,%xmm5 + aesenc %xmm0,%xmm6 + aesenc %xmm0,%xmm7 + aesenc %xmm0,%xmm8 + aesenc %xmm0,%xmm9 + movups (%rcx),%xmm0 + jnz .Lenc_loop8 + + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + aesenc %xmm1,%xmm4 + aesenc %xmm1,%xmm5 + aesenc %xmm1,%xmm6 + aesenc %xmm1,%xmm7 + aesenc %xmm1,%xmm8 + aesenc %xmm1,%xmm9 + aesenclast %xmm0,%xmm2 + aesenclast %xmm0,%xmm3 + aesenclast %xmm0,%xmm4 + aesenclast %xmm0,%xmm5 + aesenclast %xmm0,%xmm6 + aesenclast %xmm0,%xmm7 + aesenclast %xmm0,%xmm8 + aesenclast %xmm0,%xmm9 + retq + +.def _aesni_decrypt8; .scl 3; .type 32; .endef +.p2align 4 +_aesni_decrypt8: + movups (%rcx),%xmm0 + shrl $1,%eax + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + aesdec %xmm1,%xmm2 + pxor %xmm0,%xmm4 + aesdec %xmm1,%xmm3 + pxor %xmm0,%xmm5 + aesdec %xmm1,%xmm4 + pxor %xmm0,%xmm6 + aesdec %xmm1,%xmm5 + pxor %xmm0,%xmm7 + decl %eax + aesdec %xmm1,%xmm6 + pxor %xmm0,%xmm8 + aesdec %xmm1,%xmm7 + pxor %xmm0,%xmm9 + movups (%rcx),%xmm0 + aesdec %xmm1,%xmm8 + aesdec %xmm1,%xmm9 + movups 16(%rcx),%xmm1 + jmp .Ldec_loop8_enter +.p2align 4 +.Ldec_loop8: + aesdec %xmm1,%xmm2 + aesdec %xmm1,%xmm3 + decl %eax + aesdec %xmm1,%xmm4 + aesdec %xmm1,%xmm5 + aesdec %xmm1,%xmm6 + aesdec %xmm1,%xmm7 + aesdec %xmm1,%xmm8 + aesdec %xmm1,%xmm9 + movups 16(%rcx),%xmm1 +.Ldec_loop8_enter: + aesdec %xmm0,%xmm2 + aesdec %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesdec %xmm0,%xmm4 + aesdec %xmm0,%xmm5 + aesdec %xmm0,%xmm6 + aesdec %xmm0,%xmm7 + aesdec %xmm0,%xmm8 + aesdec %xmm0,%xmm9 + movups (%rcx),%xmm0 + jnz .Ldec_loop8 + + aesdec %xmm1,%xmm2 + aesdec %xmm1,%xmm3 + aesdec %xmm1,%xmm4 + aesdec %xmm1,%xmm5 + aesdec %xmm1,%xmm6 + aesdec %xmm1,%xmm7 + aesdec %xmm1,%xmm8 + aesdec %xmm1,%xmm9 + aesdeclast %xmm0,%xmm2 + aesdeclast %xmm0,%xmm3 + aesdeclast %xmm0,%xmm4 + aesdeclast %xmm0,%xmm5 + aesdeclast %xmm0,%xmm6 + aesdeclast %xmm0,%xmm7 + aesdeclast %xmm0,%xmm8 + aesdeclast %xmm0,%xmm9 + retq + +.globl aesni_ecb_encrypt +.def aesni_ecb_encrypt; .scl 2; .type 32; .endef +.p2align 4 +aesni_ecb_encrypt: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_aesni_ecb_encrypt: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + movq 40(%rsp),%r8 + + andq $-16,%rdx + jz .Lecb_ret + + movl 240(%rcx),%eax + movups (%rcx),%xmm0 + movq %rcx,%r11 + movl %eax,%r10d + testl %r8d,%r8d + jz .Lecb_decrypt + + cmpq $128,%rdx + jb .Lecb_enc_tail + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + movdqu 96(%rdi),%xmm8 + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi + subq $128,%rdx + jmp .Lecb_enc_loop8_enter +.p2align 4 +.Lecb_enc_loop8: + movups %xmm2,(%rsi) + movq %r11,%rcx + movdqu (%rdi),%xmm2 + movl %r10d,%eax + movups %xmm3,16(%rsi) + movdqu 16(%rdi),%xmm3 + movups %xmm4,32(%rsi) + movdqu 32(%rdi),%xmm4 + movups %xmm5,48(%rsi) + movdqu 48(%rdi),%xmm5 + movups %xmm6,64(%rsi) + movdqu 64(%rdi),%xmm6 + movups %xmm7,80(%rsi) + movdqu 80(%rdi),%xmm7 + movups %xmm8,96(%rsi) + movdqu 96(%rdi),%xmm8 + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi +.Lecb_enc_loop8_enter: + + call _aesni_encrypt8 + + subq $128,%rdx + jnc .Lecb_enc_loop8 + + movups %xmm2,(%rsi) + movq %r11,%rcx + movups %xmm3,16(%rsi) + movl %r10d,%eax + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + addq $128,%rdx + jz .Lecb_ret + +.Lecb_enc_tail: + movups (%rdi),%xmm2 + cmpq $32,%rdx + jb .Lecb_enc_one + movups 16(%rdi),%xmm3 + je .Lecb_enc_two + movups 32(%rdi),%xmm4 + cmpq $64,%rdx + jb .Lecb_enc_three + movups 48(%rdi),%xmm5 + je .Lecb_enc_four + movups 64(%rdi),%xmm6 + cmpq $96,%rdx + jb .Lecb_enc_five + movups 80(%rdi),%xmm7 + je .Lecb_enc_six + movdqu 96(%rdi),%xmm8 + call _aesni_encrypt8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + jmp .Lecb_ret +.p2align 4 +.Lecb_enc_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_3: + aesenc %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_3 + aesenclast %xmm1,%xmm2 + movups %xmm2,(%rsi) + jmp .Lecb_ret +.p2align 4 +.Lecb_enc_two: + xorps %xmm4,%xmm4 + call _aesni_encrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + jmp .Lecb_ret +.p2align 4 +.Lecb_enc_three: + call _aesni_encrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + jmp .Lecb_ret +.p2align 4 +.Lecb_enc_four: + call _aesni_encrypt4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + jmp .Lecb_ret +.p2align 4 +.Lecb_enc_five: + xorps %xmm7,%xmm7 + call _aesni_encrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + jmp .Lecb_ret +.p2align 4 +.Lecb_enc_six: + call _aesni_encrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + jmp .Lecb_ret + +.p2align 4 +.Lecb_decrypt: + cmpq $128,%rdx + jb .Lecb_dec_tail + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + movdqu 96(%rdi),%xmm8 + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi + subq $128,%rdx + jmp .Lecb_dec_loop8_enter +.p2align 4 +.Lecb_dec_loop8: + movups %xmm2,(%rsi) + movq %r11,%rcx + movdqu (%rdi),%xmm2 + movl %r10d,%eax + movups %xmm3,16(%rsi) + movdqu 16(%rdi),%xmm3 + movups %xmm4,32(%rsi) + movdqu 32(%rdi),%xmm4 + movups %xmm5,48(%rsi) + movdqu 48(%rdi),%xmm5 + movups %xmm6,64(%rsi) + movdqu 64(%rdi),%xmm6 + movups %xmm7,80(%rsi) + movdqu 80(%rdi),%xmm7 + movups %xmm8,96(%rsi) + movdqu 96(%rdi),%xmm8 + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi +.Lecb_dec_loop8_enter: + + call _aesni_decrypt8 + + movups (%r11),%xmm0 + subq $128,%rdx + jnc .Lecb_dec_loop8 + + movups %xmm2,(%rsi) + movq %r11,%rcx + movups %xmm3,16(%rsi) + movl %r10d,%eax + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + addq $128,%rdx + jz .Lecb_ret + +.Lecb_dec_tail: + movups (%rdi),%xmm2 + cmpq $32,%rdx + jb .Lecb_dec_one + movups 16(%rdi),%xmm3 + je .Lecb_dec_two + movups 32(%rdi),%xmm4 + cmpq $64,%rdx + jb .Lecb_dec_three + movups 48(%rdi),%xmm5 + je .Lecb_dec_four + movups 64(%rdi),%xmm6 + cmpq $96,%rdx + jb .Lecb_dec_five + movups 80(%rdi),%xmm7 + je .Lecb_dec_six + movups 96(%rdi),%xmm8 + movups (%rcx),%xmm0 + call _aesni_decrypt8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + jmp .Lecb_ret +.p2align 4 +.Lecb_dec_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_4: + aesdec %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_4 + aesdeclast %xmm1,%xmm2 + movups %xmm2,(%rsi) + jmp .Lecb_ret +.p2align 4 +.Lecb_dec_two: + xorps %xmm4,%xmm4 + call _aesni_decrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + jmp .Lecb_ret +.p2align 4 +.Lecb_dec_three: + call _aesni_decrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + jmp .Lecb_ret +.p2align 4 +.Lecb_dec_four: + call _aesni_decrypt4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + jmp .Lecb_ret +.p2align 4 +.Lecb_dec_five: + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + jmp .Lecb_ret +.p2align 4 +.Lecb_dec_six: + call _aesni_decrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + +.Lecb_ret: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + retq +.LSEH_end_aesni_ecb_encrypt: +.globl aesni_ccm64_encrypt_blocks +.def aesni_ccm64_encrypt_blocks; .scl 2; .type 32; .endef +.p2align 4 +aesni_ccm64_encrypt_blocks: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_aesni_ccm64_encrypt_blocks: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + movq 40(%rsp),%r8 + movq 48(%rsp),%r9 + + leaq -88(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,16(%rsp) + movaps %xmm8,32(%rsp) + movaps %xmm9,48(%rsp) +.Lccm64_enc_body: + movl 240(%rcx),%eax + movdqu (%r8),%xmm9 + movdqa .Lincrement64(%rip),%xmm6 + movdqa .Lbswap_mask(%rip),%xmm7 + + shrl $1,%eax + leaq 0(%rcx),%r11 + movdqu (%r9),%xmm3 + movdqa %xmm9,%xmm2 + movl %eax,%r10d +.byte 102,68,15,56,0,207 + jmp .Lccm64_enc_outer +.p2align 4 +.Lccm64_enc_outer: + movups (%r11),%xmm0 + movl %r10d,%eax + movups (%rdi),%xmm8 + + xorps %xmm0,%xmm2 + movups 16(%r11),%xmm1 + xorps %xmm8,%xmm0 + leaq 32(%r11),%rcx + xorps %xmm0,%xmm3 + movups (%rcx),%xmm0 + +.Lccm64_enc2_loop: + aesenc %xmm1,%xmm2 + decl %eax + aesenc %xmm1,%xmm3 + movups 16(%rcx),%xmm1 + aesenc %xmm0,%xmm2 + leaq 32(%rcx),%rcx + aesenc %xmm0,%xmm3 + movups 0(%rcx),%xmm0 + jnz .Lccm64_enc2_loop + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + paddq %xmm6,%xmm9 + aesenclast %xmm0,%xmm2 + aesenclast %xmm0,%xmm3 + + decq %rdx + leaq 16(%rdi),%rdi + xorps %xmm2,%xmm8 + movdqa %xmm9,%xmm2 + movups %xmm8,(%rsi) + leaq 16(%rsi),%rsi +.byte 102,15,56,0,215 + jnz .Lccm64_enc_outer + + movups %xmm3,(%r9) + movaps (%rsp),%xmm6 + movaps 16(%rsp),%xmm7 + movaps 32(%rsp),%xmm8 + movaps 48(%rsp),%xmm9 + leaq 88(%rsp),%rsp +.Lccm64_enc_ret: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + retq +.LSEH_end_aesni_ccm64_encrypt_blocks: +.globl aesni_ccm64_decrypt_blocks +.def aesni_ccm64_decrypt_blocks; .scl 2; .type 32; .endef +.p2align 4 +aesni_ccm64_decrypt_blocks: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_aesni_ccm64_decrypt_blocks: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + movq 40(%rsp),%r8 + movq 48(%rsp),%r9 + + leaq -88(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,16(%rsp) + movaps %xmm8,32(%rsp) + movaps %xmm9,48(%rsp) +.Lccm64_dec_body: + movl 240(%rcx),%eax + movups (%r8),%xmm9 + movdqu (%r9),%xmm3 + movdqa .Lincrement64(%rip),%xmm6 + movdqa .Lbswap_mask(%rip),%xmm7 + + movaps %xmm9,%xmm2 + movl %eax,%r10d + movq %rcx,%r11 +.byte 102,68,15,56,0,207 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_5: + aesenc %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_5 + aesenclast %xmm1,%xmm2 + movups (%rdi),%xmm8 + paddq %xmm6,%xmm9 + leaq 16(%rdi),%rdi + jmp .Lccm64_dec_outer +.p2align 4 +.Lccm64_dec_outer: + xorps %xmm2,%xmm8 + movdqa %xmm9,%xmm2 + movl %r10d,%eax + movups %xmm8,(%rsi) + leaq 16(%rsi),%rsi +.byte 102,15,56,0,215 + + subq $1,%rdx + jz .Lccm64_dec_break + + movups (%r11),%xmm0 + shrl $1,%eax + movups 16(%r11),%xmm1 + xorps %xmm0,%xmm8 + leaq 32(%r11),%rcx + xorps %xmm0,%xmm2 + xorps %xmm8,%xmm3 + movups (%rcx),%xmm0 + +.Lccm64_dec2_loop: + aesenc %xmm1,%xmm2 + decl %eax + aesenc %xmm1,%xmm3 + movups 16(%rcx),%xmm1 + aesenc %xmm0,%xmm2 + leaq 32(%rcx),%rcx + aesenc %xmm0,%xmm3 + movups 0(%rcx),%xmm0 + jnz .Lccm64_dec2_loop + movups (%rdi),%xmm8 + paddq %xmm6,%xmm9 + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + leaq 16(%rdi),%rdi + aesenclast %xmm0,%xmm2 + aesenclast %xmm0,%xmm3 + jmp .Lccm64_dec_outer + +.p2align 4 +.Lccm64_dec_break: + + movups (%r11),%xmm0 + movups 16(%r11),%xmm1 + xorps %xmm0,%xmm8 + leaq 32(%r11),%r11 + xorps %xmm8,%xmm3 +.Loop_enc1_6: + aesenc %xmm1,%xmm3 + decl %eax + movups (%r11),%xmm1 + leaq 16(%r11),%r11 + jnz .Loop_enc1_6 + aesenclast %xmm1,%xmm3 + movups %xmm3,(%r9) + movaps (%rsp),%xmm6 + movaps 16(%rsp),%xmm7 + movaps 32(%rsp),%xmm8 + movaps 48(%rsp),%xmm9 + leaq 88(%rsp),%rsp +.Lccm64_dec_ret: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + retq +.LSEH_end_aesni_ccm64_decrypt_blocks: +.globl aesni_ctr32_encrypt_blocks +.def aesni_ctr32_encrypt_blocks; .scl 2; .type 32; .endef +.p2align 4 +aesni_ctr32_encrypt_blocks: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_aesni_ctr32_encrypt_blocks: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + movq 40(%rsp),%r8 + + leaq -200(%rsp),%rsp + movaps %xmm6,32(%rsp) + movaps %xmm7,48(%rsp) + movaps %xmm8,64(%rsp) + movaps %xmm9,80(%rsp) + movaps %xmm10,96(%rsp) + movaps %xmm11,112(%rsp) + movaps %xmm12,128(%rsp) + movaps %xmm13,144(%rsp) + movaps %xmm14,160(%rsp) + movaps %xmm15,176(%rsp) +.Lctr32_body: + cmpq $1,%rdx + je .Lctr32_one_shortcut + + movdqu (%r8),%xmm14 + movdqa .Lbswap_mask(%rip),%xmm15 + xorl %eax,%eax +.byte 102,69,15,58,22,242,3 +.byte 102,68,15,58,34,240,3 + + movl 240(%rcx),%eax + bswapl %r10d + pxor %xmm12,%xmm12 + pxor %xmm13,%xmm13 +.byte 102,69,15,58,34,226,0 + leaq 3(%r10),%r11 +.byte 102,69,15,58,34,235,0 + incl %r10d +.byte 102,69,15,58,34,226,1 + incq %r11 +.byte 102,69,15,58,34,235,1 + incl %r10d +.byte 102,69,15,58,34,226,2 + incq %r11 +.byte 102,69,15,58,34,235,2 + movdqa %xmm12,0(%rsp) +.byte 102,69,15,56,0,231 + movdqa %xmm13,16(%rsp) +.byte 102,69,15,56,0,239 + + pshufd $192,%xmm12,%xmm2 + pshufd $128,%xmm12,%xmm3 + pshufd $64,%xmm12,%xmm4 + cmpq $6,%rdx + jb .Lctr32_tail + shrl $1,%eax + movq %rcx,%r11 + movl %eax,%r10d + subq $6,%rdx + jmp .Lctr32_loop6 + +.p2align 4 +.Lctr32_loop6: + pshufd $192,%xmm13,%xmm5 + por %xmm14,%xmm2 + movups (%r11),%xmm0 + pshufd $128,%xmm13,%xmm6 + por %xmm14,%xmm3 + movups 16(%r11),%xmm1 + pshufd $64,%xmm13,%xmm7 + por %xmm14,%xmm4 + por %xmm14,%xmm5 + xorps %xmm0,%xmm2 + por %xmm14,%xmm6 + por %xmm14,%xmm7 + + + + + pxor %xmm0,%xmm3 + aesenc %xmm1,%xmm2 + leaq 32(%r11),%rcx + pxor %xmm0,%xmm4 + aesenc %xmm1,%xmm3 + movdqa .Lincrement32(%rip),%xmm13 + pxor %xmm0,%xmm5 + aesenc %xmm1,%xmm4 + movdqa 0(%rsp),%xmm12 + pxor %xmm0,%xmm6 + aesenc %xmm1,%xmm5 + pxor %xmm0,%xmm7 + movups (%rcx),%xmm0 + decl %eax + aesenc %xmm1,%xmm6 + aesenc %xmm1,%xmm7 + jmp .Lctr32_enc_loop6_enter +.p2align 4 +.Lctr32_enc_loop6: + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + decl %eax + aesenc %xmm1,%xmm4 + aesenc %xmm1,%xmm5 + aesenc %xmm1,%xmm6 + aesenc %xmm1,%xmm7 +.Lctr32_enc_loop6_enter: + movups 16(%rcx),%xmm1 + aesenc %xmm0,%xmm2 + aesenc %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesenc %xmm0,%xmm4 + aesenc %xmm0,%xmm5 + aesenc %xmm0,%xmm6 + aesenc %xmm0,%xmm7 + movups (%rcx),%xmm0 + jnz .Lctr32_enc_loop6 + + aesenc %xmm1,%xmm2 + paddd %xmm13,%xmm12 + aesenc %xmm1,%xmm3 + paddd 16(%rsp),%xmm13 + aesenc %xmm1,%xmm4 + movdqa %xmm12,0(%rsp) + aesenc %xmm1,%xmm5 + movdqa %xmm13,16(%rsp) + aesenc %xmm1,%xmm6 +.byte 102,69,15,56,0,231 + aesenc %xmm1,%xmm7 +.byte 102,69,15,56,0,239 + + aesenclast %xmm0,%xmm2 + movups (%rdi),%xmm8 + aesenclast %xmm0,%xmm3 + movups 16(%rdi),%xmm9 + aesenclast %xmm0,%xmm4 + movups 32(%rdi),%xmm10 + aesenclast %xmm0,%xmm5 + movups 48(%rdi),%xmm11 + aesenclast %xmm0,%xmm6 + movups 64(%rdi),%xmm1 + aesenclast %xmm0,%xmm7 + movups 80(%rdi),%xmm0 + leaq 96(%rdi),%rdi + + xorps %xmm2,%xmm8 + pshufd $192,%xmm12,%xmm2 + xorps %xmm3,%xmm9 + pshufd $128,%xmm12,%xmm3 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + pshufd $64,%xmm12,%xmm4 + movups %xmm9,16(%rsi) + xorps %xmm5,%xmm11 + movups %xmm10,32(%rsi) + xorps %xmm6,%xmm1 + movups %xmm11,48(%rsi) + xorps %xmm7,%xmm0 + movups %xmm1,64(%rsi) + movups %xmm0,80(%rsi) + leaq 96(%rsi),%rsi + movl %r10d,%eax + subq $6,%rdx + jnc .Lctr32_loop6 + + addq $6,%rdx + jz .Lctr32_done + movq %r11,%rcx + leal 1(%rax,%rax,1),%eax + +.Lctr32_tail: + por %xmm14,%xmm2 + movups (%rdi),%xmm8 + cmpq $2,%rdx + jb .Lctr32_one + + por %xmm14,%xmm3 + movups 16(%rdi),%xmm9 + je .Lctr32_two + + pshufd $192,%xmm13,%xmm5 + por %xmm14,%xmm4 + movups 32(%rdi),%xmm10 + cmpq $4,%rdx + jb .Lctr32_three + + pshufd $128,%xmm13,%xmm6 + por %xmm14,%xmm5 + movups 48(%rdi),%xmm11 + je .Lctr32_four + + por %xmm14,%xmm6 + xorps %xmm7,%xmm7 + + call _aesni_encrypt6 + + movups 64(%rdi),%xmm1 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + movups %xmm9,16(%rsi) + xorps %xmm5,%xmm11 + movups %xmm10,32(%rsi) + xorps %xmm6,%xmm1 + movups %xmm11,48(%rsi) + movups %xmm1,64(%rsi) + jmp .Lctr32_done + +.p2align 4 +.Lctr32_one_shortcut: + movups (%r8),%xmm2 + movups (%rdi),%xmm8 + movl 240(%rcx),%eax +.Lctr32_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_7: + aesenc %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_7 + aesenclast %xmm1,%xmm2 + xorps %xmm2,%xmm8 + movups %xmm8,(%rsi) + jmp .Lctr32_done + +.p2align 4 +.Lctr32_two: + xorps %xmm4,%xmm4 + call _aesni_encrypt3 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + movups %xmm9,16(%rsi) + jmp .Lctr32_done + +.p2align 4 +.Lctr32_three: + call _aesni_encrypt3 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + movups %xmm9,16(%rsi) + movups %xmm10,32(%rsi) + jmp .Lctr32_done + +.p2align 4 +.Lctr32_four: + call _aesni_encrypt4 + xorps %xmm2,%xmm8 + xorps %xmm3,%xmm9 + movups %xmm8,(%rsi) + xorps %xmm4,%xmm10 + movups %xmm9,16(%rsi) + xorps %xmm5,%xmm11 + movups %xmm10,32(%rsi) + movups %xmm11,48(%rsi) + +.Lctr32_done: + movaps 32(%rsp),%xmm6 + movaps 48(%rsp),%xmm7 + movaps 64(%rsp),%xmm8 + movaps 80(%rsp),%xmm9 + movaps 96(%rsp),%xmm10 + movaps 112(%rsp),%xmm11 + movaps 128(%rsp),%xmm12 + movaps 144(%rsp),%xmm13 + movaps 160(%rsp),%xmm14 + movaps 176(%rsp),%xmm15 + leaq 200(%rsp),%rsp +.Lctr32_ret: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + retq +.LSEH_end_aesni_ctr32_encrypt_blocks: +.globl aesni_xts_encrypt +.def aesni_xts_encrypt; .scl 2; .type 32; .endef +.p2align 4 +aesni_xts_encrypt: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_aesni_xts_encrypt: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + movq 40(%rsp),%r8 + movq 48(%rsp),%r9 + + leaq -264(%rsp),%rsp + movaps %xmm6,96(%rsp) + movaps %xmm7,112(%rsp) + movaps %xmm8,128(%rsp) + movaps %xmm9,144(%rsp) + movaps %xmm10,160(%rsp) + movaps %xmm11,176(%rsp) + movaps %xmm12,192(%rsp) + movaps %xmm13,208(%rsp) + movaps %xmm14,224(%rsp) + movaps %xmm15,240(%rsp) +.Lxts_enc_body: + movups (%r9),%xmm15 + movl 240(%r8),%eax + movl 240(%rcx),%r10d + movups (%r8),%xmm0 + movups 16(%r8),%xmm1 + leaq 32(%r8),%r8 + xorps %xmm0,%xmm15 +.Loop_enc1_8: + aesenc %xmm1,%xmm15 + decl %eax + movups (%r8),%xmm1 + leaq 16(%r8),%r8 + jnz .Loop_enc1_8 + aesenclast %xmm1,%xmm15 + movq %rcx,%r11 + movl %r10d,%eax + movq %rdx,%r9 + andq $-16,%rdx + + movdqa .Lxts_magic(%rip),%xmm8 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + subq $96,%rdx + jc .Lxts_enc_short + + shrl $1,%eax + subl $1,%eax + movl %eax,%r10d + jmp .Lxts_enc_grandloop + +.p2align 4 +.Lxts_enc_grandloop: + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movdqu 0(%rdi),%xmm2 + pand %xmm8,%xmm9 + movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + pxor %xmm12,%xmm4 + movdqu 80(%rdi),%xmm7 + leaq 96(%rdi),%rdi + pxor %xmm13,%xmm5 + movups (%r11),%xmm0 + pxor %xmm14,%xmm6 + pxor %xmm15,%xmm7 + + + + movups 16(%r11),%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movdqa %xmm10,0(%rsp) + aesenc %xmm1,%xmm2 + leaq 32(%r11),%rcx + pxor %xmm0,%xmm4 + movdqa %xmm11,16(%rsp) + aesenc %xmm1,%xmm3 + pxor %xmm0,%xmm5 + movdqa %xmm12,32(%rsp) + aesenc %xmm1,%xmm4 + pxor %xmm0,%xmm6 + movdqa %xmm13,48(%rsp) + aesenc %xmm1,%xmm5 + pxor %xmm0,%xmm7 + movups (%rcx),%xmm0 + decl %eax + movdqa %xmm14,64(%rsp) + aesenc %xmm1,%xmm6 + movdqa %xmm15,80(%rsp) + aesenc %xmm1,%xmm7 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + jmp .Lxts_enc_loop6_enter + +.p2align 4 +.Lxts_enc_loop6: + aesenc %xmm1,%xmm2 + aesenc %xmm1,%xmm3 + decl %eax + aesenc %xmm1,%xmm4 + aesenc %xmm1,%xmm5 + aesenc %xmm1,%xmm6 + aesenc %xmm1,%xmm7 +.Lxts_enc_loop6_enter: + movups 16(%rcx),%xmm1 + aesenc %xmm0,%xmm2 + aesenc %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesenc %xmm0,%xmm4 + aesenc %xmm0,%xmm5 + aesenc %xmm0,%xmm6 + aesenc %xmm0,%xmm7 + movups (%rcx),%xmm0 + jnz .Lxts_enc_loop6 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + paddq %xmm15,%xmm15 + aesenc %xmm1,%xmm2 + pand %xmm8,%xmm9 + aesenc %xmm1,%xmm3 + pcmpgtd %xmm15,%xmm14 + aesenc %xmm1,%xmm4 + pxor %xmm9,%xmm15 + aesenc %xmm1,%xmm5 + aesenc %xmm1,%xmm6 + aesenc %xmm1,%xmm7 + movups 16(%rcx),%xmm1 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + aesenc %xmm0,%xmm2 + pand %xmm8,%xmm9 + aesenc %xmm0,%xmm3 + pcmpgtd %xmm15,%xmm14 + aesenc %xmm0,%xmm4 + pxor %xmm9,%xmm15 + aesenc %xmm0,%xmm5 + aesenc %xmm0,%xmm6 + aesenc %xmm0,%xmm7 + movups 32(%rcx),%xmm0 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 + aesenc %xmm1,%xmm2 + pand %xmm8,%xmm9 + aesenc %xmm1,%xmm3 + pcmpgtd %xmm15,%xmm14 + aesenc %xmm1,%xmm4 + pxor %xmm9,%xmm15 + aesenc %xmm1,%xmm5 + aesenc %xmm1,%xmm6 + aesenc %xmm1,%xmm7 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 + paddq %xmm15,%xmm15 + aesenclast %xmm0,%xmm2 + pand %xmm8,%xmm9 + aesenclast %xmm0,%xmm3 + pcmpgtd %xmm15,%xmm14 + aesenclast %xmm0,%xmm4 + pxor %xmm9,%xmm15 + aesenclast %xmm0,%xmm5 + aesenclast %xmm0,%xmm6 + aesenclast %xmm0,%xmm7 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 + paddq %xmm15,%xmm15 + xorps 0(%rsp),%xmm2 + pand %xmm8,%xmm9 + xorps 16(%rsp),%xmm3 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + + xorps 32(%rsp),%xmm4 + movups %xmm2,0(%rsi) + xorps 48(%rsp),%xmm5 + movups %xmm3,16(%rsi) + xorps 64(%rsp),%xmm6 + movups %xmm4,32(%rsi) + xorps 80(%rsp),%xmm7 + movups %xmm5,48(%rsi) + movl %r10d,%eax + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + leaq 96(%rsi),%rsi + subq $96,%rdx + jnc .Lxts_enc_grandloop + + leal 3(%rax,%rax,1),%eax + movq %r11,%rcx + movl %eax,%r10d + +.Lxts_enc_short: + addq $96,%rdx + jz .Lxts_enc_done + + cmpq $32,%rdx + jb .Lxts_enc_one + je .Lxts_enc_two + + cmpq $64,%rdx + jb .Lxts_enc_three + je .Lxts_enc_four + + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movdqu (%rdi),%xmm2 + pand %xmm8,%xmm9 + movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + leaq 80(%rdi),%rdi + pxor %xmm12,%xmm4 + pxor %xmm13,%xmm5 + pxor %xmm14,%xmm6 + + call _aesni_encrypt6 + + xorps %xmm10,%xmm2 + movdqa %xmm15,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + xorps %xmm14,%xmm6 + movdqu %xmm4,32(%rsi) + movdqu %xmm5,48(%rsi) + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + jmp .Lxts_enc_done + +.p2align 4 +.Lxts_enc_one: + movups (%rdi),%xmm2 + leaq 16(%rdi),%rdi + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_9: + aesenc %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_9 + aesenclast %xmm1,%xmm2 + xorps %xmm10,%xmm2 + movdqa %xmm11,%xmm10 + movups %xmm2,(%rsi) + leaq 16(%rsi),%rsi + jmp .Lxts_enc_done + +.p2align 4 +.Lxts_enc_two: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + leaq 32(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + + call _aesni_encrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm12,%xmm10 + xorps %xmm11,%xmm3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + leaq 32(%rsi),%rsi + jmp .Lxts_enc_done + +.p2align 4 +.Lxts_enc_three: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + leaq 48(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + + call _aesni_encrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm13,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + leaq 48(%rsi),%rsi + jmp .Lxts_enc_done + +.p2align 4 +.Lxts_enc_four: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + xorps %xmm10,%xmm2 + movups 48(%rdi),%xmm5 + leaq 64(%rdi),%rdi + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + xorps %xmm13,%xmm5 + + call _aesni_encrypt4 + + xorps %xmm10,%xmm2 + movdqa %xmm15,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + jmp .Lxts_enc_done + +.p2align 4 +.Lxts_enc_done: + andq $15,%r9 + jz .Lxts_enc_ret + movq %r9,%rdx + +.Lxts_enc_steal: + movzbl (%rdi),%eax + movzbl -16(%rsi),%ecx + leaq 1(%rdi),%rdi + movb %al,-16(%rsi) + movb %cl,0(%rsi) + leaq 1(%rsi),%rsi + subq $1,%rdx + jnz .Lxts_enc_steal + + subq %r9,%rsi + movq %r11,%rcx + movl %r10d,%eax + + movups -16(%rsi),%xmm2 + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_10: + aesenc %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_10 + aesenclast %xmm1,%xmm2 + xorps %xmm10,%xmm2 + movups %xmm2,-16(%rsi) + +.Lxts_enc_ret: + movaps 96(%rsp),%xmm6 + movaps 112(%rsp),%xmm7 + movaps 128(%rsp),%xmm8 + movaps 144(%rsp),%xmm9 + movaps 160(%rsp),%xmm10 + movaps 176(%rsp),%xmm11 + movaps 192(%rsp),%xmm12 + movaps 208(%rsp),%xmm13 + movaps 224(%rsp),%xmm14 + movaps 240(%rsp),%xmm15 + leaq 264(%rsp),%rsp +.Lxts_enc_epilogue: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + retq +.LSEH_end_aesni_xts_encrypt: +.globl aesni_xts_decrypt +.def aesni_xts_decrypt; .scl 2; .type 32; .endef +.p2align 4 +aesni_xts_decrypt: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_aesni_xts_decrypt: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + movq 40(%rsp),%r8 + movq 48(%rsp),%r9 + + leaq -264(%rsp),%rsp + movaps %xmm6,96(%rsp) + movaps %xmm7,112(%rsp) + movaps %xmm8,128(%rsp) + movaps %xmm9,144(%rsp) + movaps %xmm10,160(%rsp) + movaps %xmm11,176(%rsp) + movaps %xmm12,192(%rsp) + movaps %xmm13,208(%rsp) + movaps %xmm14,224(%rsp) + movaps %xmm15,240(%rsp) +.Lxts_dec_body: + movups (%r9),%xmm15 + movl 240(%r8),%eax + movl 240(%rcx),%r10d + movups (%r8),%xmm0 + movups 16(%r8),%xmm1 + leaq 32(%r8),%r8 + xorps %xmm0,%xmm15 +.Loop_enc1_11: + aesenc %xmm1,%xmm15 + decl %eax + movups (%r8),%xmm1 + leaq 16(%r8),%r8 + jnz .Loop_enc1_11 + aesenclast %xmm1,%xmm15 + xorl %eax,%eax + testq $15,%rdx + setnz %al + shlq $4,%rax + subq %rax,%rdx + + movq %rcx,%r11 + movl %r10d,%eax + movq %rdx,%r9 + andq $-16,%rdx + + movdqa .Lxts_magic(%rip),%xmm8 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + subq $96,%rdx + jc .Lxts_dec_short + + shrl $1,%eax + subl $1,%eax + movl %eax,%r10d + jmp .Lxts_dec_grandloop + +.p2align 4 +.Lxts_dec_grandloop: + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movdqu 0(%rdi),%xmm2 + pand %xmm8,%xmm9 + movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + pxor %xmm12,%xmm4 + movdqu 80(%rdi),%xmm7 + leaq 96(%rdi),%rdi + pxor %xmm13,%xmm5 + movups (%r11),%xmm0 + pxor %xmm14,%xmm6 + pxor %xmm15,%xmm7 + + + + movups 16(%r11),%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movdqa %xmm10,0(%rsp) + aesdec %xmm1,%xmm2 + leaq 32(%r11),%rcx + pxor %xmm0,%xmm4 + movdqa %xmm11,16(%rsp) + aesdec %xmm1,%xmm3 + pxor %xmm0,%xmm5 + movdqa %xmm12,32(%rsp) + aesdec %xmm1,%xmm4 + pxor %xmm0,%xmm6 + movdqa %xmm13,48(%rsp) + aesdec %xmm1,%xmm5 + pxor %xmm0,%xmm7 + movups (%rcx),%xmm0 + decl %eax + movdqa %xmm14,64(%rsp) + aesdec %xmm1,%xmm6 + movdqa %xmm15,80(%rsp) + aesdec %xmm1,%xmm7 + pxor %xmm14,%xmm14 + pcmpgtd %xmm15,%xmm14 + jmp .Lxts_dec_loop6_enter + +.p2align 4 +.Lxts_dec_loop6: + aesdec %xmm1,%xmm2 + aesdec %xmm1,%xmm3 + decl %eax + aesdec %xmm1,%xmm4 + aesdec %xmm1,%xmm5 + aesdec %xmm1,%xmm6 + aesdec %xmm1,%xmm7 +.Lxts_dec_loop6_enter: + movups 16(%rcx),%xmm1 + aesdec %xmm0,%xmm2 + aesdec %xmm0,%xmm3 + leaq 32(%rcx),%rcx + aesdec %xmm0,%xmm4 + aesdec %xmm0,%xmm5 + aesdec %xmm0,%xmm6 + aesdec %xmm0,%xmm7 + movups (%rcx),%xmm0 + jnz .Lxts_dec_loop6 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + paddq %xmm15,%xmm15 + aesdec %xmm1,%xmm2 + pand %xmm8,%xmm9 + aesdec %xmm1,%xmm3 + pcmpgtd %xmm15,%xmm14 + aesdec %xmm1,%xmm4 + pxor %xmm9,%xmm15 + aesdec %xmm1,%xmm5 + aesdec %xmm1,%xmm6 + aesdec %xmm1,%xmm7 + movups 16(%rcx),%xmm1 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + aesdec %xmm0,%xmm2 + pand %xmm8,%xmm9 + aesdec %xmm0,%xmm3 + pcmpgtd %xmm15,%xmm14 + aesdec %xmm0,%xmm4 + pxor %xmm9,%xmm15 + aesdec %xmm0,%xmm5 + aesdec %xmm0,%xmm6 + aesdec %xmm0,%xmm7 + movups 32(%rcx),%xmm0 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm11 + paddq %xmm15,%xmm15 + aesdec %xmm1,%xmm2 + pand %xmm8,%xmm9 + aesdec %xmm1,%xmm3 + pcmpgtd %xmm15,%xmm14 + aesdec %xmm1,%xmm4 + pxor %xmm9,%xmm15 + aesdec %xmm1,%xmm5 + aesdec %xmm1,%xmm6 + aesdec %xmm1,%xmm7 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm12 + paddq %xmm15,%xmm15 + aesdeclast %xmm0,%xmm2 + pand %xmm8,%xmm9 + aesdeclast %xmm0,%xmm3 + pcmpgtd %xmm15,%xmm14 + aesdeclast %xmm0,%xmm4 + pxor %xmm9,%xmm15 + aesdeclast %xmm0,%xmm5 + aesdeclast %xmm0,%xmm6 + aesdeclast %xmm0,%xmm7 + + pshufd $19,%xmm14,%xmm9 + pxor %xmm14,%xmm14 + movdqa %xmm15,%xmm13 + paddq %xmm15,%xmm15 + xorps 0(%rsp),%xmm2 + pand %xmm8,%xmm9 + xorps 16(%rsp),%xmm3 + pcmpgtd %xmm15,%xmm14 + pxor %xmm9,%xmm15 + + xorps 32(%rsp),%xmm4 + movups %xmm2,0(%rsi) + xorps 48(%rsp),%xmm5 + movups %xmm3,16(%rsi) + xorps 64(%rsp),%xmm6 + movups %xmm4,32(%rsi) + xorps 80(%rsp),%xmm7 + movups %xmm5,48(%rsi) + movl %r10d,%eax + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + leaq 96(%rsi),%rsi + subq $96,%rdx + jnc .Lxts_dec_grandloop + + leal 3(%rax,%rax,1),%eax + movq %r11,%rcx + movl %eax,%r10d + +.Lxts_dec_short: + addq $96,%rdx + jz .Lxts_dec_done + + cmpq $32,%rdx + jb .Lxts_dec_one + je .Lxts_dec_two + + cmpq $64,%rdx + jb .Lxts_dec_three + je .Lxts_dec_four + + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movdqu (%rdi),%xmm2 + pand %xmm8,%xmm9 + movdqu 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + leaq 80(%rdi),%rdi + pxor %xmm12,%xmm4 + pxor %xmm13,%xmm5 + pxor %xmm14,%xmm6 + + call _aesni_decrypt6 + + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + xorps %xmm14,%xmm6 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm14 + movdqu %xmm5,48(%rsi) + pcmpgtd %xmm15,%xmm14 + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + pshufd $19,%xmm14,%xmm11 + andq $15,%r9 + jz .Lxts_dec_ret + + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm11 + pxor %xmm15,%xmm11 + jmp .Lxts_dec_done2 + +.p2align 4 +.Lxts_dec_one: + movups (%rdi),%xmm2 + leaq 16(%rdi),%rdi + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_12: + aesdec %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_12 + aesdeclast %xmm1,%xmm2 + xorps %xmm10,%xmm2 + movdqa %xmm11,%xmm10 + movups %xmm2,(%rsi) + movdqa %xmm12,%xmm11 + leaq 16(%rsi),%rsi + jmp .Lxts_dec_done + +.p2align 4 +.Lxts_dec_two: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + leaq 32(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + + call _aesni_decrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm12,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm13,%xmm11 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + leaq 32(%rsi),%rsi + jmp .Lxts_dec_done + +.p2align 4 +.Lxts_dec_three: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + leaq 48(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + + call _aesni_decrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm13,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm15,%xmm11 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + leaq 48(%rsi),%rsi + jmp .Lxts_dec_done + +.p2align 4 +.Lxts_dec_four: + pshufd $19,%xmm14,%xmm9 + movdqa %xmm15,%xmm14 + paddq %xmm15,%xmm15 + movups (%rdi),%xmm2 + pand %xmm8,%xmm9 + movups 16(%rdi),%xmm3 + pxor %xmm9,%xmm15 + + movups 32(%rdi),%xmm4 + xorps %xmm10,%xmm2 + movups 48(%rdi),%xmm5 + leaq 64(%rdi),%rdi + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + xorps %xmm13,%xmm5 + + call _aesni_decrypt4 + + xorps %xmm10,%xmm2 + movdqa %xmm14,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm15,%xmm11 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + jmp .Lxts_dec_done + +.p2align 4 +.Lxts_dec_done: + andq $15,%r9 + jz .Lxts_dec_ret +.Lxts_dec_done2: + movq %r9,%rdx + movq %r11,%rcx + movl %r10d,%eax + + movups (%rdi),%xmm2 + xorps %xmm11,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_13: + aesdec %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_13 + aesdeclast %xmm1,%xmm2 + xorps %xmm11,%xmm2 + movups %xmm2,(%rsi) + +.Lxts_dec_steal: + movzbl 16(%rdi),%eax + movzbl (%rsi),%ecx + leaq 1(%rdi),%rdi + movb %al,(%rsi) + movb %cl,16(%rsi) + leaq 1(%rsi),%rsi + subq $1,%rdx + jnz .Lxts_dec_steal + + subq %r9,%rsi + movq %r11,%rcx + movl %r10d,%eax + + movups (%rsi),%xmm2 + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_14: + aesdec %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_14 + aesdeclast %xmm1,%xmm2 + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + +.Lxts_dec_ret: + movaps 96(%rsp),%xmm6 + movaps 112(%rsp),%xmm7 + movaps 128(%rsp),%xmm8 + movaps 144(%rsp),%xmm9 + movaps 160(%rsp),%xmm10 + movaps 176(%rsp),%xmm11 + movaps 192(%rsp),%xmm12 + movaps 208(%rsp),%xmm13 + movaps 224(%rsp),%xmm14 + movaps 240(%rsp),%xmm15 + leaq 264(%rsp),%rsp +.Lxts_dec_epilogue: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + retq +.LSEH_end_aesni_xts_decrypt: +.globl aesni_cbc_encrypt +.def aesni_cbc_encrypt; .scl 2; .type 32; .endef +.p2align 4 +aesni_cbc_encrypt: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_aesni_cbc_encrypt: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + movq 40(%rsp),%r8 + movq 48(%rsp),%r9 + + testq %rdx,%rdx + jz .Lcbc_ret + + movl 240(%rcx),%r10d + movq %rcx,%r11 + testl %r9d,%r9d + jz .Lcbc_decrypt + + movups (%r8),%xmm2 + movl %r10d,%eax + cmpq $16,%rdx + jb .Lcbc_enc_tail + subq $16,%rdx + jmp .Lcbc_enc_loop +.p2align 4 +.Lcbc_enc_loop: + movups (%rdi),%xmm3 + leaq 16(%rdi),%rdi + + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm3 + leaq 32(%rcx),%rcx + xorps %xmm3,%xmm2 +.Loop_enc1_15: + aesenc %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_15 + aesenclast %xmm1,%xmm2 + movl %r10d,%eax + movq %r11,%rcx + movups %xmm2,0(%rsi) + leaq 16(%rsi),%rsi + subq $16,%rdx + jnc .Lcbc_enc_loop + addq $16,%rdx + jnz .Lcbc_enc_tail + movups %xmm2,(%r8) + jmp .Lcbc_ret + +.Lcbc_enc_tail: + movq %rdx,%rcx + xchgq %rdi,%rsi +.long 0x9066A4F3 + movl $16,%ecx + subq %rdx,%rcx + xorl %eax,%eax +.long 0x9066AAF3 + leaq -16(%rdi),%rdi + movl %r10d,%eax + movq %rdi,%rsi + movq %r11,%rcx + xorq %rdx,%rdx + jmp .Lcbc_enc_loop + +.p2align 4 +.Lcbc_decrypt: + leaq -88(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,16(%rsp) + movaps %xmm8,32(%rsp) + movaps %xmm9,48(%rsp) +.Lcbc_decrypt_body: + movups (%r8),%xmm9 + movl %r10d,%eax + cmpq $112,%rdx + jbe .Lcbc_dec_tail + shrl $1,%r10d + subq $112,%rdx + movl %r10d,%eax + movaps %xmm9,64(%rsp) + jmp .Lcbc_dec_loop8_enter +.p2align 4 +.Lcbc_dec_loop8: + movaps %xmm0,64(%rsp) + movups %xmm9,(%rsi) + leaq 16(%rsi),%rsi +.Lcbc_dec_loop8_enter: + movups (%rcx),%xmm0 + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 16(%rcx),%xmm1 + + leaq 32(%rcx),%rcx + movdqu 32(%rdi),%xmm4 + xorps %xmm0,%xmm2 + movdqu 48(%rdi),%xmm5 + xorps %xmm0,%xmm3 + movdqu 64(%rdi),%xmm6 + aesdec %xmm1,%xmm2 + pxor %xmm0,%xmm4 + movdqu 80(%rdi),%xmm7 + aesdec %xmm1,%xmm3 + pxor %xmm0,%xmm5 + movdqu 96(%rdi),%xmm8 + aesdec %xmm1,%xmm4 + pxor %xmm0,%xmm6 + movdqu 112(%rdi),%xmm9 + aesdec %xmm1,%xmm5 + pxor %xmm0,%xmm7 + decl %eax + aesdec %xmm1,%xmm6 + pxor %xmm0,%xmm8 + aesdec %xmm1,%xmm7 + pxor %xmm0,%xmm9 + movups (%rcx),%xmm0 + aesdec %xmm1,%xmm8 + aesdec %xmm1,%xmm9 + movups 16(%rcx),%xmm1 + + call .Ldec_loop8_enter + + movups (%rdi),%xmm1 + movups 16(%rdi),%xmm0 + xorps 64(%rsp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%rdi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%rdi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%rdi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%rdi),%xmm0 + xorps %xmm1,%xmm7 + movups 96(%rdi),%xmm1 + xorps %xmm0,%xmm8 + movups 112(%rdi),%xmm0 + xorps %xmm1,%xmm9 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movl %r10d,%eax + movups %xmm6,64(%rsi) + movq %r11,%rcx + movups %xmm7,80(%rsi) + leaq 128(%rdi),%rdi + movups %xmm8,96(%rsi) + leaq 112(%rsi),%rsi + subq $128,%rdx + ja .Lcbc_dec_loop8 + + movaps %xmm9,%xmm2 + movaps %xmm0,%xmm9 + addq $112,%rdx + jle .Lcbc_dec_tail_collected + movups %xmm2,(%rsi) + leal 1(%r10,%r10,1),%eax + leaq 16(%rsi),%rsi +.Lcbc_dec_tail: + movups (%rdi),%xmm2 + movaps %xmm2,%xmm8 + cmpq $16,%rdx + jbe .Lcbc_dec_one + + movups 16(%rdi),%xmm3 + movaps %xmm3,%xmm7 + cmpq $32,%rdx + jbe .Lcbc_dec_two + + movups 32(%rdi),%xmm4 + movaps %xmm4,%xmm6 + cmpq $48,%rdx + jbe .Lcbc_dec_three + + movups 48(%rdi),%xmm5 + cmpq $64,%rdx + jbe .Lcbc_dec_four + + movups 64(%rdi),%xmm6 + cmpq $80,%rdx + jbe .Lcbc_dec_five + + movups 80(%rdi),%xmm7 + cmpq $96,%rdx + jbe .Lcbc_dec_six + + movups 96(%rdi),%xmm8 + movaps %xmm9,64(%rsp) + call _aesni_decrypt8 + movups (%rdi),%xmm1 + movups 16(%rdi),%xmm0 + xorps 64(%rsp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%rdi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%rdi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%rdi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%rdi),%xmm0 + xorps %xmm1,%xmm7 + movups 96(%rdi),%xmm9 + xorps %xmm0,%xmm8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + leaq 96(%rsi),%rsi + movaps %xmm8,%xmm2 + subq $112,%rdx + jmp .Lcbc_dec_tail_collected +.p2align 4 +.Lcbc_dec_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_16: + aesdec %xmm1,%xmm2 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_16 + aesdeclast %xmm1,%xmm2 + xorps %xmm9,%xmm2 + movaps %xmm8,%xmm9 + subq $16,%rdx + jmp .Lcbc_dec_tail_collected +.p2align 4 +.Lcbc_dec_two: + xorps %xmm4,%xmm4 + call _aesni_decrypt3 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + movups %xmm2,(%rsi) + movaps %xmm7,%xmm9 + movaps %xmm3,%xmm2 + leaq 16(%rsi),%rsi + subq $32,%rdx + jmp .Lcbc_dec_tail_collected +.p2align 4 +.Lcbc_dec_three: + call _aesni_decrypt3 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + movups %xmm2,(%rsi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%rsi) + movaps %xmm6,%xmm9 + movaps %xmm4,%xmm2 + leaq 32(%rsi),%rsi + subq $48,%rdx + jmp .Lcbc_dec_tail_collected +.p2align 4 +.Lcbc_dec_four: + call _aesni_decrypt4 + xorps %xmm9,%xmm2 + movups 48(%rdi),%xmm9 + xorps %xmm8,%xmm3 + movups %xmm2,(%rsi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%rsi) + xorps %xmm6,%xmm5 + movups %xmm4,32(%rsi) + movaps %xmm5,%xmm2 + leaq 48(%rsi),%rsi + subq $64,%rdx + jmp .Lcbc_dec_tail_collected +.p2align 4 +.Lcbc_dec_five: + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups 16(%rdi),%xmm1 + movups 32(%rdi),%xmm0 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + xorps %xmm1,%xmm4 + movups 48(%rdi),%xmm1 + xorps %xmm0,%xmm5 + movups 64(%rdi),%xmm9 + xorps %xmm1,%xmm6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + movaps %xmm6,%xmm2 + subq $80,%rdx + jmp .Lcbc_dec_tail_collected +.p2align 4 +.Lcbc_dec_six: + call _aesni_decrypt6 + movups 16(%rdi),%xmm1 + movups 32(%rdi),%xmm0 + xorps %xmm9,%xmm2 + xorps %xmm8,%xmm3 + xorps %xmm1,%xmm4 + movups 48(%rdi),%xmm1 + xorps %xmm0,%xmm5 + movups 64(%rdi),%xmm0 + xorps %xmm1,%xmm6 + movups 80(%rdi),%xmm9 + xorps %xmm0,%xmm7 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + movaps %xmm7,%xmm2 + subq $96,%rdx + jmp .Lcbc_dec_tail_collected +.p2align 4 +.Lcbc_dec_tail_collected: + andq $15,%rdx + movups %xmm9,(%r8) + jnz .Lcbc_dec_tail_partial + movups %xmm2,(%rsi) + jmp .Lcbc_dec_ret +.p2align 4 +.Lcbc_dec_tail_partial: + movaps %xmm2,64(%rsp) + movq $16,%rcx + movq %rsi,%rdi + subq %rdx,%rcx + leaq 64(%rsp),%rsi +.long 0x9066A4F3 + +.Lcbc_dec_ret: + movaps (%rsp),%xmm6 + movaps 16(%rsp),%xmm7 + movaps 32(%rsp),%xmm8 + movaps 48(%rsp),%xmm9 + leaq 88(%rsp),%rsp +.Lcbc_ret: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + retq +.LSEH_end_aesni_cbc_encrypt: +.globl aesni_set_decrypt_key +.def aesni_set_decrypt_key; .scl 2; .type 32; .endef +.p2align 4 +aesni_set_decrypt_key: + subq $8,%rsp + call __aesni_set_encrypt_key + shll $4,%edx + testl %eax,%eax + jnz .Ldec_key_ret + leaq 16(%r8,%rdx,1),%rcx + + movups (%r8),%xmm0 + movups (%rcx),%xmm1 + movups %xmm0,(%rcx) + movups %xmm1,(%r8) + leaq 16(%r8),%r8 + leaq -16(%rcx),%rcx + +.Ldec_key_inverse: + movups (%r8),%xmm0 + movups (%rcx),%xmm1 + aesimc %xmm0,%xmm0 + aesimc %xmm1,%xmm1 + leaq 16(%r8),%r8 + leaq -16(%rcx),%rcx + movups %xmm0,16(%rcx) + movups %xmm1,-16(%r8) + cmpq %r8,%rcx + ja .Ldec_key_inverse + + movups (%r8),%xmm0 + aesimc %xmm0,%xmm0 + movups %xmm0,(%rcx) +.Ldec_key_ret: + addq $8,%rsp + retq +.LSEH_end_set_decrypt_key: + +.globl aesni_set_encrypt_key +.def aesni_set_encrypt_key; .scl 2; .type 32; .endef +.p2align 4 +aesni_set_encrypt_key: +__aesni_set_encrypt_key: + subq $8,%rsp + movq $-1,%rax + testq %rcx,%rcx + jz .Lenc_key_ret + testq %r8,%r8 + jz .Lenc_key_ret + + movups (%rcx),%xmm0 + xorps %xmm4,%xmm4 + leaq 16(%r8),%rax + cmpl $256,%edx + je .L14rounds + cmpl $192,%edx + je .L12rounds + cmpl $128,%edx + jne .Lbad_keybits + +.L10rounds: + movl $9,%edx + movups %xmm0,(%r8) + aeskeygenassist $1,%xmm0,%xmm1 + call .Lkey_expansion_128_cold + aeskeygenassist $2,%xmm0,%xmm1 + call .Lkey_expansion_128 + aeskeygenassist $4,%xmm0,%xmm1 + call .Lkey_expansion_128 + aeskeygenassist $8,%xmm0,%xmm1 + call .Lkey_expansion_128 + aeskeygenassist $16,%xmm0,%xmm1 + call .Lkey_expansion_128 + aeskeygenassist $32,%xmm0,%xmm1 + call .Lkey_expansion_128 + aeskeygenassist $64,%xmm0,%xmm1 + call .Lkey_expansion_128 + aeskeygenassist $128,%xmm0,%xmm1 + call .Lkey_expansion_128 + aeskeygenassist $27,%xmm0,%xmm1 + call .Lkey_expansion_128 + aeskeygenassist $54,%xmm0,%xmm1 + call .Lkey_expansion_128 + movups %xmm0,(%rax) + movl %edx,80(%rax) + xorl %eax,%eax + jmp .Lenc_key_ret + +.p2align 4 +.L12rounds: + movq 16(%rcx),%xmm2 + movl $11,%edx + movups %xmm0,(%r8) + aeskeygenassist $1,%xmm2,%xmm1 + call .Lkey_expansion_192a_cold + aeskeygenassist $2,%xmm2,%xmm1 + call .Lkey_expansion_192b + aeskeygenassist $4,%xmm2,%xmm1 + call .Lkey_expansion_192a + aeskeygenassist $8,%xmm2,%xmm1 + call .Lkey_expansion_192b + aeskeygenassist $16,%xmm2,%xmm1 + call .Lkey_expansion_192a + aeskeygenassist $32,%xmm2,%xmm1 + call .Lkey_expansion_192b + aeskeygenassist $64,%xmm2,%xmm1 + call .Lkey_expansion_192a + aeskeygenassist $128,%xmm2,%xmm1 + call .Lkey_expansion_192b + movups %xmm0,(%rax) + movl %edx,48(%rax) + xorq %rax,%rax + jmp .Lenc_key_ret + +.p2align 4 +.L14rounds: + movups 16(%rcx),%xmm2 + movl $13,%edx + leaq 16(%rax),%rax + movups %xmm0,(%r8) + movups %xmm2,16(%r8) + aeskeygenassist $1,%xmm2,%xmm1 + call .Lkey_expansion_256a_cold + aeskeygenassist $1,%xmm0,%xmm1 + call .Lkey_expansion_256b + aeskeygenassist $2,%xmm2,%xmm1 + call .Lkey_expansion_256a + aeskeygenassist $2,%xmm0,%xmm1 + call .Lkey_expansion_256b + aeskeygenassist $4,%xmm2,%xmm1 + call .Lkey_expansion_256a + aeskeygenassist $4,%xmm0,%xmm1 + call .Lkey_expansion_256b + aeskeygenassist $8,%xmm2,%xmm1 + call .Lkey_expansion_256a + aeskeygenassist $8,%xmm0,%xmm1 + call .Lkey_expansion_256b + aeskeygenassist $16,%xmm2,%xmm1 + call .Lkey_expansion_256a + aeskeygenassist $16,%xmm0,%xmm1 + call .Lkey_expansion_256b + aeskeygenassist $32,%xmm2,%xmm1 + call .Lkey_expansion_256a + aeskeygenassist $32,%xmm0,%xmm1 + call .Lkey_expansion_256b + aeskeygenassist $64,%xmm2,%xmm1 + call .Lkey_expansion_256a + movups %xmm0,(%rax) + movl %edx,16(%rax) + xorq %rax,%rax + jmp .Lenc_key_ret + +.p2align 4 +.Lbad_keybits: + movq $-2,%rax +.Lenc_key_ret: + addq $8,%rsp + retq +.LSEH_end_set_encrypt_key: + +.p2align 4 +.Lkey_expansion_128: + movups %xmm0,(%rax) + leaq 16(%rax),%rax +.Lkey_expansion_128_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + retq + +.p2align 4 +.Lkey_expansion_192a: + movups %xmm0,(%rax) + leaq 16(%rax),%rax +.Lkey_expansion_192a_cold: + movaps %xmm2,%xmm5 +.Lkey_expansion_192b_warm: + shufps $16,%xmm0,%xmm4 + movdqa %xmm2,%xmm3 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + pslldq $4,%xmm3 + xorps %xmm4,%xmm0 + pshufd $85,%xmm1,%xmm1 + pxor %xmm3,%xmm2 + pxor %xmm1,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm3,%xmm2 + retq + +.p2align 4 +.Lkey_expansion_192b: + movaps %xmm0,%xmm3 + shufps $68,%xmm0,%xmm5 + movups %xmm5,(%rax) + shufps $78,%xmm2,%xmm3 + movups %xmm3,16(%rax) + leaq 32(%rax),%rax + jmp .Lkey_expansion_192b_warm + +.p2align 4 +.Lkey_expansion_256a: + movups %xmm2,(%rax) + leaq 16(%rax),%rax +.Lkey_expansion_256a_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + retq + +.p2align 4 +.Lkey_expansion_256b: + movups %xmm0,(%rax) + leaq 16(%rax),%rax + + shufps $16,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $140,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $170,%xmm1,%xmm1 + xorps %xmm1,%xmm2 + retq + + +.p2align 6 +.Lbswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.Lincrement32: +.long 6,6,6,0 +.Lincrement64: +.long 1,0,0,0 +.Lxts_magic: +.long 0x87,0,1,0 + +.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 + +.def ecb_se_handler; .scl 3; .type 32; .endef +.p2align 4 +ecb_se_handler: + pushq %rsi + pushq %rdi + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushfq + subq $64,%rsp + + movq 152(%r8),%rax + + jmp .Lcommon_seh_tail + + +.def ccm64_se_handler; .scl 3; .type 32; .endef +.p2align 4 +ccm64_se_handler: + pushq %rsi + pushq %rdi + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushfq + subq $64,%rsp + + movq 120(%r8),%rax + movq 248(%r8),%rbx + + movq 8(%r9),%rsi + movq 56(%r9),%r11 + + movl 0(%r11),%r10d + leaq (%rsi,%r10,1),%r10 + cmpq %r10,%rbx + jb .Lcommon_seh_tail + + movq 152(%r8),%rax + + movl 4(%r11),%r10d + leaq (%rsi,%r10,1),%r10 + cmpq %r10,%rbx + jae .Lcommon_seh_tail + + leaq 0(%rax),%rsi + leaq 512(%r8),%rdi + movl $8,%ecx +.long 0xa548f3fc + leaq 88(%rax),%rax + + jmp .Lcommon_seh_tail + + +.def ctr32_se_handler; .scl 3; .type 32; .endef +.p2align 4 +ctr32_se_handler: + pushq %rsi + pushq %rdi + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushfq + subq $64,%rsp + + movq 120(%r8),%rax + movq 248(%r8),%rbx + + leaq .Lctr32_body(%rip),%r10 + cmpq %r10,%rbx + jb .Lcommon_seh_tail + + movq 152(%r8),%rax + + leaq .Lctr32_ret(%rip),%r10 + cmpq %r10,%rbx + jae .Lcommon_seh_tail + + leaq 32(%rax),%rsi + leaq 512(%r8),%rdi + movl $20,%ecx +.long 0xa548f3fc + leaq 200(%rax),%rax + + jmp .Lcommon_seh_tail + + +.def xts_se_handler; .scl 3; .type 32; .endef +.p2align 4 +xts_se_handler: + pushq %rsi + pushq %rdi + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushfq + subq $64,%rsp + + movq 120(%r8),%rax + movq 248(%r8),%rbx + + movq 8(%r9),%rsi + movq 56(%r9),%r11 + + movl 0(%r11),%r10d + leaq (%rsi,%r10,1),%r10 + cmpq %r10,%rbx + jb .Lcommon_seh_tail + + movq 152(%r8),%rax + + movl 4(%r11),%r10d + leaq (%rsi,%r10,1),%r10 + cmpq %r10,%rbx + jae .Lcommon_seh_tail + + leaq 96(%rax),%rsi + leaq 512(%r8),%rdi + movl $20,%ecx +.long 0xa548f3fc + leaq 104+160(%rax),%rax + + jmp .Lcommon_seh_tail + +.def cbc_se_handler; .scl 3; .type 32; .endef +.p2align 4 +cbc_se_handler: + pushq %rsi + pushq %rdi + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushfq + subq $64,%rsp + + movq 152(%r8),%rax + movq 248(%r8),%rbx + + leaq .Lcbc_decrypt(%rip),%r10 + cmpq %r10,%rbx + jb .Lcommon_seh_tail + + leaq .Lcbc_decrypt_body(%rip),%r10 + cmpq %r10,%rbx + jb .Lrestore_cbc_rax + + leaq .Lcbc_ret(%rip),%r10 + cmpq %r10,%rbx + jae .Lcommon_seh_tail + + leaq 0(%rax),%rsi + leaq 512(%r8),%rdi + movl $8,%ecx +.long 0xa548f3fc + leaq 88(%rax),%rax + jmp .Lcommon_seh_tail + +.Lrestore_cbc_rax: + movq 120(%r8),%rax + +.Lcommon_seh_tail: + movq 8(%rax),%rdi + movq 16(%rax),%rsi + movq %rax,152(%r8) + movq %rsi,168(%r8) + movq %rdi,176(%r8) + + movq 40(%r9),%rdi + movq %r8,%rsi + movl $154,%ecx +.long 0xa548f3fc + + movq %r9,%rsi + xorq %rcx,%rcx + movq 8(%rsi),%rdx + movq 0(%rsi),%r8 + movq 16(%rsi),%r9 + movq 40(%rsi),%r10 + leaq 56(%rsi),%r11 + leaq 24(%rsi),%r12 + movq %r10,32(%rsp) + movq %r11,40(%rsp) + movq %r12,48(%rsp) + movq %rcx,56(%rsp) + call *__imp_RtlVirtualUnwind(%rip) + + movl $1,%eax + addq $64,%rsp + popfq + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + popq %rdi + popq %rsi + retq + + +.section .pdata +.p2align 2 +.rva .LSEH_begin_aesni_ecb_encrypt +.rva .LSEH_end_aesni_ecb_encrypt +.rva .LSEH_info_ecb + +.rva .LSEH_begin_aesni_ccm64_encrypt_blocks +.rva .LSEH_end_aesni_ccm64_encrypt_blocks +.rva .LSEH_info_ccm64_enc + +.rva .LSEH_begin_aesni_ccm64_decrypt_blocks +.rva .LSEH_end_aesni_ccm64_decrypt_blocks +.rva .LSEH_info_ccm64_dec + +.rva .LSEH_begin_aesni_ctr32_encrypt_blocks +.rva .LSEH_end_aesni_ctr32_encrypt_blocks +.rva .LSEH_info_ctr32 + +.rva .LSEH_begin_aesni_xts_encrypt +.rva .LSEH_end_aesni_xts_encrypt +.rva .LSEH_info_xts_enc + +.rva .LSEH_begin_aesni_xts_decrypt +.rva .LSEH_end_aesni_xts_decrypt +.rva .LSEH_info_xts_dec +.rva .LSEH_begin_aesni_cbc_encrypt +.rva .LSEH_end_aesni_cbc_encrypt +.rva .LSEH_info_cbc + +.rva aesni_set_decrypt_key +.rva .LSEH_end_set_decrypt_key +.rva .LSEH_info_key + +.rva aesni_set_encrypt_key +.rva .LSEH_end_set_encrypt_key +.rva .LSEH_info_key +.section .xdata +.p2align 3 +.LSEH_info_ecb: +.byte 9,0,0,0 +.rva ecb_se_handler +.LSEH_info_ccm64_enc: +.byte 9,0,0,0 +.rva ccm64_se_handler +.rva .Lccm64_enc_body,.Lccm64_enc_ret +.LSEH_info_ccm64_dec: +.byte 9,0,0,0 +.rva ccm64_se_handler +.rva .Lccm64_dec_body,.Lccm64_dec_ret +.LSEH_info_ctr32: +.byte 9,0,0,0 +.rva ctr32_se_handler +.LSEH_info_xts_enc: +.byte 9,0,0,0 +.rva xts_se_handler +.rva .Lxts_enc_body,.Lxts_enc_epilogue +.LSEH_info_xts_dec: +.byte 9,0,0,0 +.rva xts_se_handler +.rva .Lxts_dec_body,.Lxts_dec_epilogue +.LSEH_info_cbc: +.byte 9,0,0,0 +.rva cbc_se_handler +.LSEH_info_key: +.byte 0x01,0x04,0x01,0x00 +.byte 0x04,0x02,0x00,0x00 diff --git a/ext/libressl/crypto/aes/aesni-sha1-elf-x86_64.S b/ext/libressl/crypto/aes/aesni-sha1-elf-x86_64.S new file mode 100644 index 0000000..c0b3e5f --- /dev/null +++ b/ext/libressl/crypto/aes/aesni-sha1-elf-x86_64.S @@ -0,0 +1,1401 @@ +#include "x86_arch.h" +.text + +.hidden OPENSSL_ia32cap_P + +.globl aesni_cbc_sha1_enc +.type aesni_cbc_sha1_enc,@function +.align 16 +aesni_cbc_sha1_enc: + + movl OPENSSL_ia32cap_P+0(%rip),%r10d + movl OPENSSL_ia32cap_P+4(%rip),%r11d + jmp aesni_cbc_sha1_enc_ssse3 + retq +.size aesni_cbc_sha1_enc,.-aesni_cbc_sha1_enc +.type aesni_cbc_sha1_enc_ssse3,@function +.align 16 +aesni_cbc_sha1_enc_ssse3: + movq 8(%rsp),%r10 + + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -104(%rsp),%rsp + + + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + movdqu (%r8),%xmm11 + movq %r8,88(%rsp) + shlq $6,%r14 + subq %r12,%r13 + movl 240(%r15),%r8d + addq %r10,%r14 + + leaq K_XX_XX(%rip),%r11 + movl 0(%r9),%eax + movl 4(%r9),%ebx + movl 8(%r9),%ecx + movl 12(%r9),%edx + movl %ebx,%esi + movl 16(%r9),%ebp + + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r10),%xmm0 + movdqu 16(%r10),%xmm1 + movdqu 32(%r10),%xmm2 + movdqu 48(%r10),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r10 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 +.byte 102,15,56,0,222 + paddd %xmm9,%xmm0 + paddd %xmm9,%xmm1 + paddd %xmm9,%xmm2 + movdqa %xmm0,0(%rsp) + psubd %xmm9,%xmm0 + movdqa %xmm1,16(%rsp) + psubd %xmm9,%xmm1 + movdqa %xmm2,32(%rsp) + psubd %xmm9,%xmm2 + movups (%r15),%xmm13 + movups 16(%r15),%xmm14 + jmp .Loop_ssse3 +.align 16 +.Loop_ssse3: + movdqa %xmm1,%xmm4 + addl 0(%rsp),%ebp + movups 0(%r12),%xmm12 + xorps %xmm13,%xmm12 + xorps %xmm12,%xmm11 + aesenc %xmm14,%xmm11 + movups 32(%r15),%xmm15 + xorl %edx,%ecx + movdqa %xmm3,%xmm8 +.byte 102,15,58,15,224,8 + movl %eax,%edi + roll $5,%eax + paddd %xmm3,%xmm9 + andl %ecx,%esi + xorl %edx,%ecx + psrldq $4,%xmm8 + xorl %edx,%esi + addl %eax,%ebp + pxor %xmm0,%xmm4 + rorl $2,%ebx + addl %esi,%ebp + pxor %xmm2,%xmm8 + addl 4(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pxor %xmm8,%xmm4 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm9,48(%rsp) + xorl %ecx,%edi + aesenc %xmm15,%xmm11 + movups 48(%r15),%xmm14 + addl %ebp,%edx + movdqa %xmm4,%xmm10 + movdqa %xmm4,%xmm8 + rorl $7,%eax + addl %edi,%edx + addl 8(%rsp),%ecx + xorl %ebx,%eax + pslldq $12,%xmm10 + paddd %xmm4,%xmm4 + movl %edx,%edi + roll $5,%edx + andl %eax,%esi + xorl %ebx,%eax + psrld $31,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + movdqa %xmm10,%xmm9 + rorl $7,%ebp + addl %esi,%ecx + psrld $30,%xmm10 + por %xmm8,%xmm4 + addl 12(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx + aesenc %xmm14,%xmm11 + movups 64(%r15),%xmm15 + pslld $2,%xmm9 + pxor %xmm10,%xmm4 + andl %ebp,%edi + xorl %eax,%ebp + movdqa 0(%r11),%xmm10 + xorl %eax,%edi + addl %ecx,%ebx + pxor %xmm9,%xmm4 + rorl $7,%edx + addl %edi,%ebx + movdqa %xmm2,%xmm5 + addl 16(%rsp),%eax + xorl %ebp,%edx + movdqa %xmm4,%xmm9 +.byte 102,15,58,15,233,8 + movl %ebx,%edi + roll $5,%ebx + paddd %xmm4,%xmm10 + andl %edx,%esi + xorl %ebp,%edx + psrldq $4,%xmm9 + xorl %ebp,%esi + addl %ebx,%eax + pxor %xmm1,%xmm5 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm3,%xmm9 + addl 20(%rsp),%ebp + aesenc %xmm15,%xmm11 + movups 80(%r15),%xmm14 + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pxor %xmm9,%xmm5 + andl %ecx,%edi + xorl %edx,%ecx + movdqa %xmm10,0(%rsp) + xorl %edx,%edi + addl %eax,%ebp + movdqa %xmm5,%xmm8 + movdqa %xmm5,%xmm9 + rorl $7,%ebx + addl %edi,%ebp + addl 24(%rsp),%edx + xorl %ecx,%ebx + pslldq $12,%xmm8 + paddd %xmm5,%xmm5 + movl %ebp,%edi + roll $5,%ebp + andl %ebx,%esi + xorl %ecx,%ebx + psrld $31,%xmm9 + xorl %ecx,%esi + aesenc %xmm14,%xmm11 + movups 96(%r15),%xmm15 + addl %ebp,%edx + movdqa %xmm8,%xmm10 + rorl $7,%eax + addl %esi,%edx + psrld $30,%xmm8 + por %xmm9,%xmm5 + addl 28(%rsp),%ecx + xorl %ebx,%eax + movl %edx,%esi + roll $5,%edx + pslld $2,%xmm10 + pxor %xmm8,%xmm5 + andl %eax,%edi + xorl %ebx,%eax + movdqa 16(%r11),%xmm8 + xorl %ebx,%edi + addl %edx,%ecx + pxor %xmm10,%xmm5 + rorl $7,%ebp + addl %edi,%ecx + movdqa %xmm3,%xmm6 + addl 32(%rsp),%ebx + xorl %eax,%ebp + movdqa %xmm5,%xmm10 +.byte 102,15,58,15,242,8 + movl %ecx,%edi + roll $5,%ecx + aesenc %xmm15,%xmm11 + movups 112(%r15),%xmm14 + paddd %xmm5,%xmm8 + andl %ebp,%esi + xorl %eax,%ebp + psrldq $4,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + pxor %xmm2,%xmm6 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm4,%xmm10 + addl 36(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + pxor %xmm10,%xmm6 + andl %edx,%edi + xorl %ebp,%edx + movdqa %xmm8,16(%rsp) + xorl %ebp,%edi + addl %ebx,%eax + movdqa %xmm6,%xmm9 + movdqa %xmm6,%xmm10 + rorl $7,%ecx + addl %edi,%eax + addl 40(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 128(%r15),%xmm15 + xorl %edx,%ecx + pslldq $12,%xmm9 + paddd %xmm6,%xmm6 + movl %eax,%edi + roll $5,%eax + andl %ecx,%esi + xorl %edx,%ecx + psrld $31,%xmm10 + xorl %edx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + psrld $30,%xmm9 + por %xmm10,%xmm6 + addl 44(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pslld $2,%xmm8 + pxor %xmm9,%xmm6 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa 16(%r11),%xmm9 + xorl %ecx,%edi + aesenc %xmm15,%xmm11 + movups 144(%r15),%xmm14 + addl %ebp,%edx + pxor %xmm8,%xmm6 + rorl $7,%eax + addl %edi,%edx + movdqa %xmm4,%xmm7 + addl 48(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm6,%xmm8 +.byte 102,15,58,15,251,8 + movl %edx,%edi + roll $5,%edx + paddd %xmm6,%xmm9 + andl %eax,%esi + xorl %ebx,%eax + psrldq $4,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + pxor %xmm3,%xmm7 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm5,%xmm8 + addl 52(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx + aesenc %xmm14,%xmm11 + movups 160(%r15),%xmm15 + pxor %xmm8,%xmm7 + andl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm9,32(%rsp) + xorl %eax,%edi + addl %ecx,%ebx + movdqa %xmm7,%xmm10 + movdqa %xmm7,%xmm8 + rorl $7,%edx + addl %edi,%ebx + addl 56(%rsp),%eax + xorl %ebp,%edx + pslldq $12,%xmm10 + paddd %xmm7,%xmm7 + movl %ebx,%edi + roll $5,%ebx + andl %edx,%esi + xorl %ebp,%edx + psrld $31,%xmm8 + xorl %ebp,%esi + addl %ebx,%eax + movdqa %xmm10,%xmm9 + rorl $7,%ecx + addl %esi,%eax + psrld $30,%xmm10 + por %xmm8,%xmm7 + addl 60(%rsp),%ebp + cmpl $11,%r8d + jb .Laesenclast1 + movups 176(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 192(%r15),%xmm15 + aesenc %xmm14,%xmm11 + je .Laesenclast1 + movups 208(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 224(%r15),%xmm15 + aesenc %xmm14,%xmm11 +.Laesenclast1: + aesenclast %xmm15,%xmm11 + movups 16(%r15),%xmm14 + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + andl %ecx,%edi + xorl %edx,%ecx + movdqa 16(%r11),%xmm10 + xorl %edx,%edi + addl %eax,%ebp + pxor %xmm9,%xmm7 + rorl $7,%ebx + addl %edi,%ebp + movdqa %xmm7,%xmm9 + addl 0(%rsp),%edx + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,206,8 + xorl %ecx,%ebx + movl %ebp,%edi + roll $5,%ebp + pxor %xmm1,%xmm0 + andl %ebx,%esi + xorl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm7,%xmm10 + xorl %ecx,%esi + movups 16(%r12),%xmm12 + xorps %xmm13,%xmm12 + movups %xmm11,0(%r13,%r12,1) + xorps %xmm12,%xmm11 + aesenc %xmm14,%xmm11 + movups 32(%r15),%xmm15 + addl %ebp,%edx + pxor %xmm9,%xmm0 + rorl $7,%eax + addl %esi,%edx + addl 4(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm0,%xmm9 + movdqa %xmm10,48(%rsp) + movl %edx,%esi + roll $5,%edx + andl %eax,%edi + xorl %ebx,%eax + pslld $2,%xmm0 + xorl %ebx,%edi + addl %edx,%ecx + psrld $30,%xmm9 + rorl $7,%ebp + addl %edi,%ecx + addl 8(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%edi + roll $5,%ecx + aesenc %xmm15,%xmm11 + movups 48(%r15),%xmm14 + por %xmm9,%xmm0 + andl %ebp,%esi + xorl %eax,%ebp + movdqa %xmm0,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + andl %edx,%edi + xorl %ebp,%edx + xorl %ebp,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 16(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 64(%r15),%xmm15 + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,215,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm2,%xmm1 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm8,%xmm9 + paddd %xmm0,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm10,%xmm1 + addl 20(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm1,%xmm10 + movdqa %xmm8,0(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm1 + addl 24(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm10 + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + aesenc %xmm15,%xmm11 + movups 80(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm10,%xmm1 + addl 28(%rsp),%ebx + xorl %eax,%edi + movdqa %xmm1,%xmm8 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 32(%rsp),%eax + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,192,8 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + pxor %xmm3,%xmm2 + xorl %edx,%esi + addl %ebx,%eax + movdqa 32(%r11),%xmm10 + paddd %xmm1,%xmm9 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm8,%xmm2 + addl 36(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 96(%r15),%xmm15 + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm8 + movdqa %xmm9,16(%rsp) + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + pslld $2,%xmm2 + addl 40(%rsp),%edx + xorl %ecx,%esi + psrld $30,%xmm8 + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + por %xmm8,%xmm2 + addl 44(%rsp),%ecx + xorl %ebx,%edi + movdqa %xmm2,%xmm9 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + aesenc %xmm15,%xmm11 + movups 112(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 48(%rsp),%ebx + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,201,8 + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + pxor %xmm4,%xmm3 + xorl %ebp,%esi + addl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm2,%xmm10 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm9,%xmm3 + addl 52(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm9 + movdqa %xmm10,32(%rsp) + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + pslld $2,%xmm3 + addl 56(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 128(%r15),%xmm15 + xorl %edx,%esi + psrld $30,%xmm9 + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + por %xmm9,%xmm3 + addl 60(%rsp),%edx + xorl %ecx,%edi + movdqa %xmm3,%xmm10 + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 0(%rsp),%ecx + pxor %xmm0,%xmm4 +.byte 102,68,15,58,15,210,8 + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + pxor %xmm5,%xmm4 + xorl %eax,%esi + aesenc %xmm15,%xmm11 + movups 144(%r15),%xmm14 + addl %edx,%ecx + movdqa %xmm8,%xmm9 + paddd %xmm3,%xmm8 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm10,%xmm4 + addl 4(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm10 + movdqa %xmm8,48(%rsp) + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + pslld $2,%xmm4 + addl 8(%rsp),%eax + xorl %ebp,%esi + psrld $30,%xmm10 + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + por %xmm10,%xmm4 + addl 12(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 160(%r15),%xmm15 + xorl %edx,%edi + movdqa %xmm4,%xmm8 + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 16(%rsp),%edx + pxor %xmm1,%xmm5 +.byte 102,68,15,58,15,195,8 + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + pxor %xmm6,%xmm5 + xorl %ebx,%esi + addl %ebp,%edx + movdqa %xmm9,%xmm10 + paddd %xmm4,%xmm9 + rorl $7,%eax + addl %esi,%edx + pxor %xmm8,%xmm5 + addl 20(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm8 + movdqa %xmm9,0(%rsp) + xorl %eax,%edi + cmpl $11,%r8d + jb .Laesenclast2 + movups 176(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 192(%r15),%xmm15 + aesenc %xmm14,%xmm11 + je .Laesenclast2 + movups 208(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 224(%r15),%xmm15 + aesenc %xmm14,%xmm11 +.Laesenclast2: + aesenclast %xmm15,%xmm11 + movups 16(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + pslld $2,%xmm5 + addl 24(%rsp),%ebx + xorl %eax,%esi + psrld $30,%xmm8 + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + por %xmm8,%xmm5 + addl 28(%rsp),%eax + xorl %ebp,%edi + movdqa %xmm5,%xmm9 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + movl %ecx,%edi + movups 32(%r12),%xmm12 + xorps %xmm13,%xmm12 + movups %xmm11,16(%r13,%r12,1) + xorps %xmm12,%xmm11 + aesenc %xmm14,%xmm11 + movups 32(%r15),%xmm15 + pxor %xmm2,%xmm6 +.byte 102,68,15,58,15,204,8 + xorl %edx,%ecx + addl 32(%rsp),%ebp + andl %edx,%edi + pxor %xmm7,%xmm6 + andl %ecx,%esi + rorl $7,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm5,%xmm10 + addl %edi,%ebp + movl %eax,%edi + pxor %xmm9,%xmm6 + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movdqa %xmm6,%xmm9 + movdqa %xmm10,16(%rsp) + movl %ebx,%esi + xorl %ecx,%ebx + addl 36(%rsp),%edx + andl %ecx,%esi + pslld $2,%xmm6 + andl %ebx,%edi + rorl $7,%eax + psrld $30,%xmm9 + addl %esi,%edx + movl %ebp,%esi + roll $5,%ebp + aesenc %xmm15,%xmm11 + movups 48(%r15),%xmm14 + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + por %xmm9,%xmm6 + movl %eax,%edi + xorl %ebx,%eax + movdqa %xmm6,%xmm10 + addl 40(%rsp),%ecx + andl %ebx,%edi + andl %eax,%esi + rorl $7,%ebp + addl %edi,%ecx + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%esi + xorl %eax,%ebp + addl 44(%rsp),%ebx + andl %eax,%esi + andl %ebp,%edi + aesenc %xmm14,%xmm11 + movups 64(%r15),%xmm15 + rorl $7,%edx + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%edi + pxor %xmm3,%xmm7 +.byte 102,68,15,58,15,213,8 + xorl %ebp,%edx + addl 48(%rsp),%eax + andl %ebp,%edi + pxor %xmm0,%xmm7 + andl %edx,%esi + rorl $7,%ecx + movdqa 48(%r11),%xmm9 + paddd %xmm6,%xmm8 + addl %edi,%eax + movl %ebx,%edi + pxor %xmm10,%xmm7 + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movdqa %xmm7,%xmm10 + movdqa %xmm8,32(%rsp) + movl %ecx,%esi + aesenc %xmm15,%xmm11 + movups 80(%r15),%xmm14 + xorl %edx,%ecx + addl 52(%rsp),%ebp + andl %edx,%esi + pslld $2,%xmm7 + andl %ecx,%edi + rorl $7,%ebx + psrld $30,%xmm10 + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + por %xmm10,%xmm7 + movl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm7,%xmm8 + addl 56(%rsp),%edx + andl %ecx,%edi + andl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + movl %ebp,%edi + roll $5,%ebp + aesenc %xmm14,%xmm11 + movups 96(%r15),%xmm15 + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%esi + xorl %ebx,%eax + addl 60(%rsp),%ecx + andl %ebx,%esi + andl %eax,%edi + rorl $7,%ebp + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%edi + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,198,8 + xorl %eax,%ebp + addl 0(%rsp),%ebx + andl %eax,%edi + pxor %xmm1,%xmm0 + andl %ebp,%esi + aesenc %xmm15,%xmm11 + movups 112(%r15),%xmm14 + rorl $7,%edx + movdqa %xmm9,%xmm10 + paddd %xmm7,%xmm9 + addl %edi,%ebx + movl %ecx,%edi + pxor %xmm8,%xmm0 + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movdqa %xmm0,%xmm8 + movdqa %xmm9,48(%rsp) + movl %edx,%esi + xorl %ebp,%edx + addl 4(%rsp),%eax + andl %ebp,%esi + pslld $2,%xmm0 + andl %edx,%edi + rorl $7,%ecx + psrld $30,%xmm8 + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + por %xmm8,%xmm0 + movl %ecx,%edi + aesenc %xmm14,%xmm11 + movups 128(%r15),%xmm15 + xorl %edx,%ecx + movdqa %xmm0,%xmm9 + addl 8(%rsp),%ebp + andl %edx,%edi + andl %ecx,%esi + rorl $7,%ebx + addl %edi,%ebp + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%esi + xorl %ecx,%ebx + addl 12(%rsp),%edx + andl %ecx,%esi + andl %ebx,%edi + rorl $7,%eax + addl %esi,%edx + movl %ebp,%esi + roll $5,%ebp + aesenc %xmm15,%xmm11 + movups 144(%r15),%xmm14 + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%edi + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,207,8 + xorl %ebx,%eax + addl 16(%rsp),%ecx + andl %ebx,%edi + pxor %xmm2,%xmm1 + andl %eax,%esi + rorl $7,%ebp + movdqa %xmm10,%xmm8 + paddd %xmm0,%xmm10 + addl %edi,%ecx + movl %edx,%edi + pxor %xmm9,%xmm1 + roll $5,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movdqa %xmm1,%xmm9 + movdqa %xmm10,0(%rsp) + movl %ebp,%esi + xorl %eax,%ebp + addl 20(%rsp),%ebx + andl %eax,%esi + pslld $2,%xmm1 + andl %ebp,%edi + aesenc %xmm14,%xmm11 + movups 160(%r15),%xmm15 + rorl $7,%edx + psrld $30,%xmm9 + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + por %xmm9,%xmm1 + movl %edx,%edi + xorl %ebp,%edx + movdqa %xmm1,%xmm10 + addl 24(%rsp),%eax + andl %ebp,%edi + andl %edx,%esi + rorl $7,%ecx + addl %edi,%eax + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movl %ecx,%esi + cmpl $11,%r8d + jb .Laesenclast3 + movups 176(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 192(%r15),%xmm15 + aesenc %xmm14,%xmm11 + je .Laesenclast3 + movups 208(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 224(%r15),%xmm15 + aesenc %xmm14,%xmm11 +.Laesenclast3: + aesenclast %xmm15,%xmm11 + movups 16(%r15),%xmm14 + xorl %edx,%ecx + addl 28(%rsp),%ebp + andl %edx,%esi + andl %ecx,%edi + rorl $7,%ebx + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%edi + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,208,8 + xorl %ecx,%ebx + addl 32(%rsp),%edx + andl %ecx,%edi + pxor %xmm3,%xmm2 + andl %ebx,%esi + rorl $7,%eax + movdqa %xmm8,%xmm9 + paddd %xmm1,%xmm8 + addl %edi,%edx + movl %ebp,%edi + pxor %xmm10,%xmm2 + roll $5,%ebp + movups 48(%r12),%xmm12 + xorps %xmm13,%xmm12 + movups %xmm11,32(%r13,%r12,1) + xorps %xmm12,%xmm11 + aesenc %xmm14,%xmm11 + movups 32(%r15),%xmm15 + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movdqa %xmm2,%xmm10 + movdqa %xmm8,16(%rsp) + movl %eax,%esi + xorl %ebx,%eax + addl 36(%rsp),%ecx + andl %ebx,%esi + pslld $2,%xmm2 + andl %eax,%edi + rorl $7,%ebp + psrld $30,%xmm10 + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + por %xmm10,%xmm2 + movl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm2,%xmm8 + addl 40(%rsp),%ebx + andl %eax,%edi + andl %ebp,%esi + aesenc %xmm15,%xmm11 + movups 48(%r15),%xmm14 + rorl $7,%edx + addl %edi,%ebx + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%esi + xorl %ebp,%edx + addl 44(%rsp),%eax + andl %ebp,%esi + andl %edx,%edi + rorl $7,%ecx + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + addl 48(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 64(%r15),%xmm15 + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,193,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm4,%xmm3 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm10 + paddd %xmm2,%xmm9 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm8,%xmm3 + addl 52(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm3,%xmm8 + movdqa %xmm9,32(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm3 + addl 56(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm8 + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + aesenc %xmm15,%xmm11 + movups 80(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm8,%xmm3 + addl 60(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 0(%rsp),%eax + paddd %xmm3,%xmm10 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + movdqa %xmm10,48(%rsp) + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 4(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 96(%r15),%xmm15 + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 8(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 12(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + aesenc %xmm15,%xmm11 + movups 112(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + cmpq %r14,%r10 + je .Ldone_ssse3 + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r10),%xmm0 + movdqu 16(%r10),%xmm1 + movdqu 32(%r10),%xmm2 + movdqu 48(%r10),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r10 + addl 16(%rsp),%ebx + xorl %eax,%esi +.byte 102,15,56,0,206 + movl %ecx,%edi + roll $5,%ecx + paddd %xmm9,%xmm0 + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + movdqa %xmm0,0(%rsp) + addl 20(%rsp),%eax + xorl %ebp,%edi + psubd %xmm9,%xmm0 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 128(%r15),%xmm15 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi +.byte 102,15,56,0,214 + movl %edx,%edi + roll $5,%edx + paddd %xmm9,%xmm1 + xorl %eax,%esi + aesenc %xmm15,%xmm11 + movups 144(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + movdqa %xmm1,16(%rsp) + addl 36(%rsp),%ebx + xorl %eax,%edi + psubd %xmm9,%xmm1 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 160(%r15),%xmm15 + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi +.byte 102,15,56,0,222 + movl %ebp,%edi + roll $5,%ebp + paddd %xmm9,%xmm2 + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + movdqa %xmm2,32(%rsp) + addl 52(%rsp),%ecx + xorl %ebx,%edi + psubd %xmm9,%xmm2 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + cmpl $11,%r8d + jb .Laesenclast4 + movups 176(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 192(%r15),%xmm15 + aesenc %xmm14,%xmm11 + je .Laesenclast4 + movups 208(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 224(%r15),%xmm15 + aesenc %xmm14,%xmm11 +.Laesenclast4: + aesenclast %xmm15,%xmm11 + movups 16(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + movups %xmm11,48(%r13,%r12,1) + leaq 64(%r12),%r12 + + addl 0(%r9),%eax + addl 4(%r9),%esi + addl 8(%r9),%ecx + addl 12(%r9),%edx + movl %eax,0(%r9) + addl 16(%r9),%ebp + movl %esi,4(%r9) + movl %esi,%ebx + movl %ecx,8(%r9) + movl %edx,12(%r9) + movl %ebp,16(%r9) + jmp .Loop_ssse3 + +.align 16 +.Ldone_ssse3: + addl 16(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 20(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 128(%r15),%xmm15 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + aesenc %xmm15,%xmm11 + movups 144(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + addl 36(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 160(%r15),%xmm15 + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 52(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + cmpl $11,%r8d + jb .Laesenclast5 + movups 176(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 192(%r15),%xmm15 + aesenc %xmm14,%xmm11 + je .Laesenclast5 + movups 208(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 224(%r15),%xmm15 + aesenc %xmm14,%xmm11 +.Laesenclast5: + aesenclast %xmm15,%xmm11 + movups 16(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + movups %xmm11,48(%r13,%r12,1) + movq 88(%rsp),%r8 + + addl 0(%r9),%eax + addl 4(%r9),%esi + addl 8(%r9),%ecx + movl %eax,0(%r9) + addl 12(%r9),%edx + movl %esi,4(%r9) + addl 16(%r9),%ebp + movl %ecx,8(%r9) + movl %edx,12(%r9) + movl %ebp,16(%r9) + movups %xmm11,(%r8) + leaq 104(%rsp),%rsi + movq 0(%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_ssse3: + retq +.size aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3 +.align 64 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + +.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 +#if defined(HAVE_GNU_STACK) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/ext/libressl/crypto/aes/aesni-sha1-macosx-x86_64.S b/ext/libressl/crypto/aes/aesni-sha1-macosx-x86_64.S new file mode 100644 index 0000000..3e88b1a --- /dev/null +++ b/ext/libressl/crypto/aes/aesni-sha1-macosx-x86_64.S @@ -0,0 +1,1398 @@ +#include "x86_arch.h" +.text + +.private_extern _OPENSSL_ia32cap_P + +.globl _aesni_cbc_sha1_enc + +.p2align 4 +_aesni_cbc_sha1_enc: + + movl _OPENSSL_ia32cap_P+0(%rip),%r10d + movl _OPENSSL_ia32cap_P+4(%rip),%r11d + jmp aesni_cbc_sha1_enc_ssse3 + retq + + +.p2align 4 +aesni_cbc_sha1_enc_ssse3: + movq 8(%rsp),%r10 + + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -104(%rsp),%rsp + + + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + movdqu (%r8),%xmm11 + movq %r8,88(%rsp) + shlq $6,%r14 + subq %r12,%r13 + movl 240(%r15),%r8d + addq %r10,%r14 + + leaq K_XX_XX(%rip),%r11 + movl 0(%r9),%eax + movl 4(%r9),%ebx + movl 8(%r9),%ecx + movl 12(%r9),%edx + movl %ebx,%esi + movl 16(%r9),%ebp + + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r10),%xmm0 + movdqu 16(%r10),%xmm1 + movdqu 32(%r10),%xmm2 + movdqu 48(%r10),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r10 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 +.byte 102,15,56,0,222 + paddd %xmm9,%xmm0 + paddd %xmm9,%xmm1 + paddd %xmm9,%xmm2 + movdqa %xmm0,0(%rsp) + psubd %xmm9,%xmm0 + movdqa %xmm1,16(%rsp) + psubd %xmm9,%xmm1 + movdqa %xmm2,32(%rsp) + psubd %xmm9,%xmm2 + movups (%r15),%xmm13 + movups 16(%r15),%xmm14 + jmp L$oop_ssse3 +.p2align 4 +L$oop_ssse3: + movdqa %xmm1,%xmm4 + addl 0(%rsp),%ebp + movups 0(%r12),%xmm12 + xorps %xmm13,%xmm12 + xorps %xmm12,%xmm11 + aesenc %xmm14,%xmm11 + movups 32(%r15),%xmm15 + xorl %edx,%ecx + movdqa %xmm3,%xmm8 +.byte 102,15,58,15,224,8 + movl %eax,%edi + roll $5,%eax + paddd %xmm3,%xmm9 + andl %ecx,%esi + xorl %edx,%ecx + psrldq $4,%xmm8 + xorl %edx,%esi + addl %eax,%ebp + pxor %xmm0,%xmm4 + rorl $2,%ebx + addl %esi,%ebp + pxor %xmm2,%xmm8 + addl 4(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pxor %xmm8,%xmm4 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm9,48(%rsp) + xorl %ecx,%edi + aesenc %xmm15,%xmm11 + movups 48(%r15),%xmm14 + addl %ebp,%edx + movdqa %xmm4,%xmm10 + movdqa %xmm4,%xmm8 + rorl $7,%eax + addl %edi,%edx + addl 8(%rsp),%ecx + xorl %ebx,%eax + pslldq $12,%xmm10 + paddd %xmm4,%xmm4 + movl %edx,%edi + roll $5,%edx + andl %eax,%esi + xorl %ebx,%eax + psrld $31,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + movdqa %xmm10,%xmm9 + rorl $7,%ebp + addl %esi,%ecx + psrld $30,%xmm10 + por %xmm8,%xmm4 + addl 12(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx + aesenc %xmm14,%xmm11 + movups 64(%r15),%xmm15 + pslld $2,%xmm9 + pxor %xmm10,%xmm4 + andl %ebp,%edi + xorl %eax,%ebp + movdqa 0(%r11),%xmm10 + xorl %eax,%edi + addl %ecx,%ebx + pxor %xmm9,%xmm4 + rorl $7,%edx + addl %edi,%ebx + movdqa %xmm2,%xmm5 + addl 16(%rsp),%eax + xorl %ebp,%edx + movdqa %xmm4,%xmm9 +.byte 102,15,58,15,233,8 + movl %ebx,%edi + roll $5,%ebx + paddd %xmm4,%xmm10 + andl %edx,%esi + xorl %ebp,%edx + psrldq $4,%xmm9 + xorl %ebp,%esi + addl %ebx,%eax + pxor %xmm1,%xmm5 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm3,%xmm9 + addl 20(%rsp),%ebp + aesenc %xmm15,%xmm11 + movups 80(%r15),%xmm14 + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pxor %xmm9,%xmm5 + andl %ecx,%edi + xorl %edx,%ecx + movdqa %xmm10,0(%rsp) + xorl %edx,%edi + addl %eax,%ebp + movdqa %xmm5,%xmm8 + movdqa %xmm5,%xmm9 + rorl $7,%ebx + addl %edi,%ebp + addl 24(%rsp),%edx + xorl %ecx,%ebx + pslldq $12,%xmm8 + paddd %xmm5,%xmm5 + movl %ebp,%edi + roll $5,%ebp + andl %ebx,%esi + xorl %ecx,%ebx + psrld $31,%xmm9 + xorl %ecx,%esi + aesenc %xmm14,%xmm11 + movups 96(%r15),%xmm15 + addl %ebp,%edx + movdqa %xmm8,%xmm10 + rorl $7,%eax + addl %esi,%edx + psrld $30,%xmm8 + por %xmm9,%xmm5 + addl 28(%rsp),%ecx + xorl %ebx,%eax + movl %edx,%esi + roll $5,%edx + pslld $2,%xmm10 + pxor %xmm8,%xmm5 + andl %eax,%edi + xorl %ebx,%eax + movdqa 16(%r11),%xmm8 + xorl %ebx,%edi + addl %edx,%ecx + pxor %xmm10,%xmm5 + rorl $7,%ebp + addl %edi,%ecx + movdqa %xmm3,%xmm6 + addl 32(%rsp),%ebx + xorl %eax,%ebp + movdqa %xmm5,%xmm10 +.byte 102,15,58,15,242,8 + movl %ecx,%edi + roll $5,%ecx + aesenc %xmm15,%xmm11 + movups 112(%r15),%xmm14 + paddd %xmm5,%xmm8 + andl %ebp,%esi + xorl %eax,%ebp + psrldq $4,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + pxor %xmm2,%xmm6 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm4,%xmm10 + addl 36(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + pxor %xmm10,%xmm6 + andl %edx,%edi + xorl %ebp,%edx + movdqa %xmm8,16(%rsp) + xorl %ebp,%edi + addl %ebx,%eax + movdqa %xmm6,%xmm9 + movdqa %xmm6,%xmm10 + rorl $7,%ecx + addl %edi,%eax + addl 40(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 128(%r15),%xmm15 + xorl %edx,%ecx + pslldq $12,%xmm9 + paddd %xmm6,%xmm6 + movl %eax,%edi + roll $5,%eax + andl %ecx,%esi + xorl %edx,%ecx + psrld $31,%xmm10 + xorl %edx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + psrld $30,%xmm9 + por %xmm10,%xmm6 + addl 44(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pslld $2,%xmm8 + pxor %xmm9,%xmm6 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa 16(%r11),%xmm9 + xorl %ecx,%edi + aesenc %xmm15,%xmm11 + movups 144(%r15),%xmm14 + addl %ebp,%edx + pxor %xmm8,%xmm6 + rorl $7,%eax + addl %edi,%edx + movdqa %xmm4,%xmm7 + addl 48(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm6,%xmm8 +.byte 102,15,58,15,251,8 + movl %edx,%edi + roll $5,%edx + paddd %xmm6,%xmm9 + andl %eax,%esi + xorl %ebx,%eax + psrldq $4,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + pxor %xmm3,%xmm7 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm5,%xmm8 + addl 52(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx + aesenc %xmm14,%xmm11 + movups 160(%r15),%xmm15 + pxor %xmm8,%xmm7 + andl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm9,32(%rsp) + xorl %eax,%edi + addl %ecx,%ebx + movdqa %xmm7,%xmm10 + movdqa %xmm7,%xmm8 + rorl $7,%edx + addl %edi,%ebx + addl 56(%rsp),%eax + xorl %ebp,%edx + pslldq $12,%xmm10 + paddd %xmm7,%xmm7 + movl %ebx,%edi + roll $5,%ebx + andl %edx,%esi + xorl %ebp,%edx + psrld $31,%xmm8 + xorl %ebp,%esi + addl %ebx,%eax + movdqa %xmm10,%xmm9 + rorl $7,%ecx + addl %esi,%eax + psrld $30,%xmm10 + por %xmm8,%xmm7 + addl 60(%rsp),%ebp + cmpl $11,%r8d + jb L$aesenclast1 + movups 176(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 192(%r15),%xmm15 + aesenc %xmm14,%xmm11 + je L$aesenclast1 + movups 208(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 224(%r15),%xmm15 + aesenc %xmm14,%xmm11 +L$aesenclast1: + aesenclast %xmm15,%xmm11 + movups 16(%r15),%xmm14 + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + andl %ecx,%edi + xorl %edx,%ecx + movdqa 16(%r11),%xmm10 + xorl %edx,%edi + addl %eax,%ebp + pxor %xmm9,%xmm7 + rorl $7,%ebx + addl %edi,%ebp + movdqa %xmm7,%xmm9 + addl 0(%rsp),%edx + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,206,8 + xorl %ecx,%ebx + movl %ebp,%edi + roll $5,%ebp + pxor %xmm1,%xmm0 + andl %ebx,%esi + xorl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm7,%xmm10 + xorl %ecx,%esi + movups 16(%r12),%xmm12 + xorps %xmm13,%xmm12 + movups %xmm11,0(%r13,%r12,1) + xorps %xmm12,%xmm11 + aesenc %xmm14,%xmm11 + movups 32(%r15),%xmm15 + addl %ebp,%edx + pxor %xmm9,%xmm0 + rorl $7,%eax + addl %esi,%edx + addl 4(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm0,%xmm9 + movdqa %xmm10,48(%rsp) + movl %edx,%esi + roll $5,%edx + andl %eax,%edi + xorl %ebx,%eax + pslld $2,%xmm0 + xorl %ebx,%edi + addl %edx,%ecx + psrld $30,%xmm9 + rorl $7,%ebp + addl %edi,%ecx + addl 8(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%edi + roll $5,%ecx + aesenc %xmm15,%xmm11 + movups 48(%r15),%xmm14 + por %xmm9,%xmm0 + andl %ebp,%esi + xorl %eax,%ebp + movdqa %xmm0,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + andl %edx,%edi + xorl %ebp,%edx + xorl %ebp,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 16(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 64(%r15),%xmm15 + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,215,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm2,%xmm1 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm8,%xmm9 + paddd %xmm0,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm10,%xmm1 + addl 20(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm1,%xmm10 + movdqa %xmm8,0(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm1 + addl 24(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm10 + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + aesenc %xmm15,%xmm11 + movups 80(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm10,%xmm1 + addl 28(%rsp),%ebx + xorl %eax,%edi + movdqa %xmm1,%xmm8 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 32(%rsp),%eax + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,192,8 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + pxor %xmm3,%xmm2 + xorl %edx,%esi + addl %ebx,%eax + movdqa 32(%r11),%xmm10 + paddd %xmm1,%xmm9 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm8,%xmm2 + addl 36(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 96(%r15),%xmm15 + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm8 + movdqa %xmm9,16(%rsp) + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + pslld $2,%xmm2 + addl 40(%rsp),%edx + xorl %ecx,%esi + psrld $30,%xmm8 + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + por %xmm8,%xmm2 + addl 44(%rsp),%ecx + xorl %ebx,%edi + movdqa %xmm2,%xmm9 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + aesenc %xmm15,%xmm11 + movups 112(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 48(%rsp),%ebx + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,201,8 + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + pxor %xmm4,%xmm3 + xorl %ebp,%esi + addl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm2,%xmm10 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm9,%xmm3 + addl 52(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm9 + movdqa %xmm10,32(%rsp) + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + pslld $2,%xmm3 + addl 56(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 128(%r15),%xmm15 + xorl %edx,%esi + psrld $30,%xmm9 + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + por %xmm9,%xmm3 + addl 60(%rsp),%edx + xorl %ecx,%edi + movdqa %xmm3,%xmm10 + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 0(%rsp),%ecx + pxor %xmm0,%xmm4 +.byte 102,68,15,58,15,210,8 + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + pxor %xmm5,%xmm4 + xorl %eax,%esi + aesenc %xmm15,%xmm11 + movups 144(%r15),%xmm14 + addl %edx,%ecx + movdqa %xmm8,%xmm9 + paddd %xmm3,%xmm8 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm10,%xmm4 + addl 4(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm10 + movdqa %xmm8,48(%rsp) + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + pslld $2,%xmm4 + addl 8(%rsp),%eax + xorl %ebp,%esi + psrld $30,%xmm10 + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + por %xmm10,%xmm4 + addl 12(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 160(%r15),%xmm15 + xorl %edx,%edi + movdqa %xmm4,%xmm8 + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 16(%rsp),%edx + pxor %xmm1,%xmm5 +.byte 102,68,15,58,15,195,8 + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + pxor %xmm6,%xmm5 + xorl %ebx,%esi + addl %ebp,%edx + movdqa %xmm9,%xmm10 + paddd %xmm4,%xmm9 + rorl $7,%eax + addl %esi,%edx + pxor %xmm8,%xmm5 + addl 20(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm8 + movdqa %xmm9,0(%rsp) + xorl %eax,%edi + cmpl $11,%r8d + jb L$aesenclast2 + movups 176(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 192(%r15),%xmm15 + aesenc %xmm14,%xmm11 + je L$aesenclast2 + movups 208(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 224(%r15),%xmm15 + aesenc %xmm14,%xmm11 +L$aesenclast2: + aesenclast %xmm15,%xmm11 + movups 16(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + pslld $2,%xmm5 + addl 24(%rsp),%ebx + xorl %eax,%esi + psrld $30,%xmm8 + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + por %xmm8,%xmm5 + addl 28(%rsp),%eax + xorl %ebp,%edi + movdqa %xmm5,%xmm9 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + movl %ecx,%edi + movups 32(%r12),%xmm12 + xorps %xmm13,%xmm12 + movups %xmm11,16(%r13,%r12,1) + xorps %xmm12,%xmm11 + aesenc %xmm14,%xmm11 + movups 32(%r15),%xmm15 + pxor %xmm2,%xmm6 +.byte 102,68,15,58,15,204,8 + xorl %edx,%ecx + addl 32(%rsp),%ebp + andl %edx,%edi + pxor %xmm7,%xmm6 + andl %ecx,%esi + rorl $7,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm5,%xmm10 + addl %edi,%ebp + movl %eax,%edi + pxor %xmm9,%xmm6 + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movdqa %xmm6,%xmm9 + movdqa %xmm10,16(%rsp) + movl %ebx,%esi + xorl %ecx,%ebx + addl 36(%rsp),%edx + andl %ecx,%esi + pslld $2,%xmm6 + andl %ebx,%edi + rorl $7,%eax + psrld $30,%xmm9 + addl %esi,%edx + movl %ebp,%esi + roll $5,%ebp + aesenc %xmm15,%xmm11 + movups 48(%r15),%xmm14 + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + por %xmm9,%xmm6 + movl %eax,%edi + xorl %ebx,%eax + movdqa %xmm6,%xmm10 + addl 40(%rsp),%ecx + andl %ebx,%edi + andl %eax,%esi + rorl $7,%ebp + addl %edi,%ecx + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%esi + xorl %eax,%ebp + addl 44(%rsp),%ebx + andl %eax,%esi + andl %ebp,%edi + aesenc %xmm14,%xmm11 + movups 64(%r15),%xmm15 + rorl $7,%edx + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%edi + pxor %xmm3,%xmm7 +.byte 102,68,15,58,15,213,8 + xorl %ebp,%edx + addl 48(%rsp),%eax + andl %ebp,%edi + pxor %xmm0,%xmm7 + andl %edx,%esi + rorl $7,%ecx + movdqa 48(%r11),%xmm9 + paddd %xmm6,%xmm8 + addl %edi,%eax + movl %ebx,%edi + pxor %xmm10,%xmm7 + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movdqa %xmm7,%xmm10 + movdqa %xmm8,32(%rsp) + movl %ecx,%esi + aesenc %xmm15,%xmm11 + movups 80(%r15),%xmm14 + xorl %edx,%ecx + addl 52(%rsp),%ebp + andl %edx,%esi + pslld $2,%xmm7 + andl %ecx,%edi + rorl $7,%ebx + psrld $30,%xmm10 + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + por %xmm10,%xmm7 + movl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm7,%xmm8 + addl 56(%rsp),%edx + andl %ecx,%edi + andl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + movl %ebp,%edi + roll $5,%ebp + aesenc %xmm14,%xmm11 + movups 96(%r15),%xmm15 + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%esi + xorl %ebx,%eax + addl 60(%rsp),%ecx + andl %ebx,%esi + andl %eax,%edi + rorl $7,%ebp + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%edi + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,198,8 + xorl %eax,%ebp + addl 0(%rsp),%ebx + andl %eax,%edi + pxor %xmm1,%xmm0 + andl %ebp,%esi + aesenc %xmm15,%xmm11 + movups 112(%r15),%xmm14 + rorl $7,%edx + movdqa %xmm9,%xmm10 + paddd %xmm7,%xmm9 + addl %edi,%ebx + movl %ecx,%edi + pxor %xmm8,%xmm0 + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movdqa %xmm0,%xmm8 + movdqa %xmm9,48(%rsp) + movl %edx,%esi + xorl %ebp,%edx + addl 4(%rsp),%eax + andl %ebp,%esi + pslld $2,%xmm0 + andl %edx,%edi + rorl $7,%ecx + psrld $30,%xmm8 + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + por %xmm8,%xmm0 + movl %ecx,%edi + aesenc %xmm14,%xmm11 + movups 128(%r15),%xmm15 + xorl %edx,%ecx + movdqa %xmm0,%xmm9 + addl 8(%rsp),%ebp + andl %edx,%edi + andl %ecx,%esi + rorl $7,%ebx + addl %edi,%ebp + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%esi + xorl %ecx,%ebx + addl 12(%rsp),%edx + andl %ecx,%esi + andl %ebx,%edi + rorl $7,%eax + addl %esi,%edx + movl %ebp,%esi + roll $5,%ebp + aesenc %xmm15,%xmm11 + movups 144(%r15),%xmm14 + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%edi + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,207,8 + xorl %ebx,%eax + addl 16(%rsp),%ecx + andl %ebx,%edi + pxor %xmm2,%xmm1 + andl %eax,%esi + rorl $7,%ebp + movdqa %xmm10,%xmm8 + paddd %xmm0,%xmm10 + addl %edi,%ecx + movl %edx,%edi + pxor %xmm9,%xmm1 + roll $5,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movdqa %xmm1,%xmm9 + movdqa %xmm10,0(%rsp) + movl %ebp,%esi + xorl %eax,%ebp + addl 20(%rsp),%ebx + andl %eax,%esi + pslld $2,%xmm1 + andl %ebp,%edi + aesenc %xmm14,%xmm11 + movups 160(%r15),%xmm15 + rorl $7,%edx + psrld $30,%xmm9 + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + por %xmm9,%xmm1 + movl %edx,%edi + xorl %ebp,%edx + movdqa %xmm1,%xmm10 + addl 24(%rsp),%eax + andl %ebp,%edi + andl %edx,%esi + rorl $7,%ecx + addl %edi,%eax + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movl %ecx,%esi + cmpl $11,%r8d + jb L$aesenclast3 + movups 176(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 192(%r15),%xmm15 + aesenc %xmm14,%xmm11 + je L$aesenclast3 + movups 208(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 224(%r15),%xmm15 + aesenc %xmm14,%xmm11 +L$aesenclast3: + aesenclast %xmm15,%xmm11 + movups 16(%r15),%xmm14 + xorl %edx,%ecx + addl 28(%rsp),%ebp + andl %edx,%esi + andl %ecx,%edi + rorl $7,%ebx + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%edi + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,208,8 + xorl %ecx,%ebx + addl 32(%rsp),%edx + andl %ecx,%edi + pxor %xmm3,%xmm2 + andl %ebx,%esi + rorl $7,%eax + movdqa %xmm8,%xmm9 + paddd %xmm1,%xmm8 + addl %edi,%edx + movl %ebp,%edi + pxor %xmm10,%xmm2 + roll $5,%ebp + movups 48(%r12),%xmm12 + xorps %xmm13,%xmm12 + movups %xmm11,32(%r13,%r12,1) + xorps %xmm12,%xmm11 + aesenc %xmm14,%xmm11 + movups 32(%r15),%xmm15 + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movdqa %xmm2,%xmm10 + movdqa %xmm8,16(%rsp) + movl %eax,%esi + xorl %ebx,%eax + addl 36(%rsp),%ecx + andl %ebx,%esi + pslld $2,%xmm2 + andl %eax,%edi + rorl $7,%ebp + psrld $30,%xmm10 + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + por %xmm10,%xmm2 + movl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm2,%xmm8 + addl 40(%rsp),%ebx + andl %eax,%edi + andl %ebp,%esi + aesenc %xmm15,%xmm11 + movups 48(%r15),%xmm14 + rorl $7,%edx + addl %edi,%ebx + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%esi + xorl %ebp,%edx + addl 44(%rsp),%eax + andl %ebp,%esi + andl %edx,%edi + rorl $7,%ecx + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + addl 48(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 64(%r15),%xmm15 + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,193,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm4,%xmm3 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm10 + paddd %xmm2,%xmm9 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm8,%xmm3 + addl 52(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm3,%xmm8 + movdqa %xmm9,32(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm3 + addl 56(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm8 + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + aesenc %xmm15,%xmm11 + movups 80(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm8,%xmm3 + addl 60(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 0(%rsp),%eax + paddd %xmm3,%xmm10 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + movdqa %xmm10,48(%rsp) + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 4(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 96(%r15),%xmm15 + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 8(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 12(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + aesenc %xmm15,%xmm11 + movups 112(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + cmpq %r14,%r10 + je L$done_ssse3 + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r10),%xmm0 + movdqu 16(%r10),%xmm1 + movdqu 32(%r10),%xmm2 + movdqu 48(%r10),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r10 + addl 16(%rsp),%ebx + xorl %eax,%esi +.byte 102,15,56,0,206 + movl %ecx,%edi + roll $5,%ecx + paddd %xmm9,%xmm0 + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + movdqa %xmm0,0(%rsp) + addl 20(%rsp),%eax + xorl %ebp,%edi + psubd %xmm9,%xmm0 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 128(%r15),%xmm15 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi +.byte 102,15,56,0,214 + movl %edx,%edi + roll $5,%edx + paddd %xmm9,%xmm1 + xorl %eax,%esi + aesenc %xmm15,%xmm11 + movups 144(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + movdqa %xmm1,16(%rsp) + addl 36(%rsp),%ebx + xorl %eax,%edi + psubd %xmm9,%xmm1 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 160(%r15),%xmm15 + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi +.byte 102,15,56,0,222 + movl %ebp,%edi + roll $5,%ebp + paddd %xmm9,%xmm2 + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + movdqa %xmm2,32(%rsp) + addl 52(%rsp),%ecx + xorl %ebx,%edi + psubd %xmm9,%xmm2 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + cmpl $11,%r8d + jb L$aesenclast4 + movups 176(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 192(%r15),%xmm15 + aesenc %xmm14,%xmm11 + je L$aesenclast4 + movups 208(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 224(%r15),%xmm15 + aesenc %xmm14,%xmm11 +L$aesenclast4: + aesenclast %xmm15,%xmm11 + movups 16(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + movups %xmm11,48(%r13,%r12,1) + leaq 64(%r12),%r12 + + addl 0(%r9),%eax + addl 4(%r9),%esi + addl 8(%r9),%ecx + addl 12(%r9),%edx + movl %eax,0(%r9) + addl 16(%r9),%ebp + movl %esi,4(%r9) + movl %esi,%ebx + movl %ecx,8(%r9) + movl %edx,12(%r9) + movl %ebp,16(%r9) + jmp L$oop_ssse3 + +.p2align 4 +L$done_ssse3: + addl 16(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 20(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 128(%r15),%xmm15 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + aesenc %xmm15,%xmm11 + movups 144(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + addl 36(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 160(%r15),%xmm15 + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 52(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + cmpl $11,%r8d + jb L$aesenclast5 + movups 176(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 192(%r15),%xmm15 + aesenc %xmm14,%xmm11 + je L$aesenclast5 + movups 208(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 224(%r15),%xmm15 + aesenc %xmm14,%xmm11 +L$aesenclast5: + aesenclast %xmm15,%xmm11 + movups 16(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + movups %xmm11,48(%r13,%r12,1) + movq 88(%rsp),%r8 + + addl 0(%r9),%eax + addl 4(%r9),%esi + addl 8(%r9),%ecx + movl %eax,0(%r9) + addl 12(%r9),%edx + movl %esi,4(%r9) + addl 16(%r9),%ebp + movl %ecx,8(%r9) + movl %edx,12(%r9) + movl %ebp,16(%r9) + movups %xmm11,(%r8) + leaq 104(%rsp),%rsi + movq 0(%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$epilogue_ssse3: + retq + +.p2align 6 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + +.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 diff --git a/ext/libressl/crypto/aes/aesni-sha1-masm-x86_64.S b/ext/libressl/crypto/aes/aesni-sha1-masm-x86_64.S new file mode 100644 index 0000000..db95881 --- /dev/null +++ b/ext/libressl/crypto/aes/aesni-sha1-masm-x86_64.S @@ -0,0 +1,1616 @@ +; 1 "crypto/aes/aesni-sha1-masm-x86_64.S.tmp" +; 1 "" 1 +; 1 "" 3 +; 340 "" 3 +; 1 "" 1 +; 1 "" 2 +; 1 "crypto/aes/aesni-sha1-masm-x86_64.S.tmp" 2 +OPTION DOTNAME + +; 1 "./crypto/x86_arch.h" 1 + + +; 16 "./crypto/x86_arch.h" + + + + + + + + + +; 40 "./crypto/x86_arch.h" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +; 3 "crypto/aes/aesni-sha1-masm-x86_64.S.tmp" 2 +.text$ SEGMENT ALIGN(64) 'CODE' +EXTERN OPENSSL_ia32cap_P:NEAR + + +PUBLIC aesni_cbc_sha1_enc + +ALIGN 16 +aesni_cbc_sha1_enc PROC PUBLIC + + mov r10d,DWORD PTR[((OPENSSL_ia32cap_P+0))] + mov r11d,DWORD PTR[((OPENSSL_ia32cap_P+4))] + jmp aesni_cbc_sha1_enc_ssse3 + DB 0F3h,0C3h ;repret +aesni_cbc_sha1_enc ENDP + +ALIGN 16 +aesni_cbc_sha1_enc_ssse3 PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_cbc_sha1_enc_ssse3:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + mov r10,QWORD PTR[56+rsp] + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-264))+rsp] + + + movaps XMMWORD PTR[(96+0)+rsp],xmm6 + movaps XMMWORD PTR[(96+16)+rsp],xmm7 + movaps XMMWORD PTR[(96+32)+rsp],xmm8 + movaps XMMWORD PTR[(96+48)+rsp],xmm9 + movaps XMMWORD PTR[(96+64)+rsp],xmm10 + movaps XMMWORD PTR[(96+80)+rsp],xmm11 + movaps XMMWORD PTR[(96+96)+rsp],xmm12 + movaps XMMWORD PTR[(96+112)+rsp],xmm13 + movaps XMMWORD PTR[(96+128)+rsp],xmm14 + movaps XMMWORD PTR[(96+144)+rsp],xmm15 +$L$prologue_ssse3:: + mov r12,rdi + mov r13,rsi + mov r14,rdx + mov r15,rcx + movdqu xmm11,XMMWORD PTR[r8] + mov QWORD PTR[88+rsp],r8 + shl r14,6 + sub r13,r12 + mov r8d,DWORD PTR[240+r15] + add r14,r10 + + lea r11,QWORD PTR[K_XX_XX] + mov eax,DWORD PTR[r9] + mov ebx,DWORD PTR[4+r9] + mov ecx,DWORD PTR[8+r9] + mov edx,DWORD PTR[12+r9] + mov esi,ebx + mov ebp,DWORD PTR[16+r9] + + movdqa xmm6,XMMWORD PTR[64+r11] + movdqa xmm9,XMMWORD PTR[r11] + movdqu xmm0,XMMWORD PTR[r10] + movdqu xmm1,XMMWORD PTR[16+r10] + movdqu xmm2,XMMWORD PTR[32+r10] + movdqu xmm3,XMMWORD PTR[48+r10] +DB 102,15,56,0,198 + add r10,64 +DB 102,15,56,0,206 +DB 102,15,56,0,214 +DB 102,15,56,0,222 + paddd xmm0,xmm9 + paddd xmm1,xmm9 + paddd xmm2,xmm9 + movdqa XMMWORD PTR[rsp],xmm0 + psubd xmm0,xmm9 + movdqa XMMWORD PTR[16+rsp],xmm1 + psubd xmm1,xmm9 + movdqa XMMWORD PTR[32+rsp],xmm2 + psubd xmm2,xmm9 + movups xmm13,XMMWORD PTR[r15] + movups xmm14,XMMWORD PTR[16+r15] + jmp $L$oop_ssse3 +ALIGN 16 +$L$oop_ssse3:: + movdqa xmm4,xmm1 + add ebp,DWORD PTR[rsp] + movups xmm12,XMMWORD PTR[r12] + xorps xmm12,xmm13 + xorps xmm11,xmm12 + aesenc xmm11,xmm14 + movups xmm15,XMMWORD PTR[32+r15] + xor ecx,edx + movdqa xmm8,xmm3 +DB 102,15,58,15,224,8 + mov edi,eax + rol eax,5 + paddd xmm9,xmm3 + and esi,ecx + xor ecx,edx + psrldq xmm8,4 + xor esi,edx + add ebp,eax + pxor xmm4,xmm0 + ror ebx,2 + add ebp,esi + pxor xmm8,xmm2 + add edx,DWORD PTR[4+rsp] + xor ebx,ecx + mov esi,ebp + rol ebp,5 + pxor xmm4,xmm8 + and edi,ebx + xor ebx,ecx + movdqa XMMWORD PTR[48+rsp],xmm9 + xor edi,ecx + aesenc xmm11,xmm15 + movups xmm14,XMMWORD PTR[48+r15] + add edx,ebp + movdqa xmm10,xmm4 + movdqa xmm8,xmm4 + ror eax,7 + add edx,edi + add ecx,DWORD PTR[8+rsp] + xor eax,ebx + pslldq xmm10,12 + paddd xmm4,xmm4 + mov edi,edx + rol edx,5 + and esi,eax + xor eax,ebx + psrld xmm8,31 + xor esi,ebx + add ecx,edx + movdqa xmm9,xmm10 + ror ebp,7 + add ecx,esi + psrld xmm10,30 + por xmm4,xmm8 + add ebx,DWORD PTR[12+rsp] + xor ebp,eax + mov esi,ecx + rol ecx,5 + aesenc xmm11,xmm14 + movups xmm15,XMMWORD PTR[64+r15] + pslld xmm9,2 + pxor xmm4,xmm10 + and edi,ebp + xor ebp,eax + movdqa xmm10,XMMWORD PTR[r11] + xor edi,eax + add ebx,ecx + pxor xmm4,xmm9 + ror edx,7 + add ebx,edi + movdqa xmm5,xmm2 + add eax,DWORD PTR[16+rsp] + xor edx,ebp + movdqa xmm9,xmm4 +DB 102,15,58,15,233,8 + mov edi,ebx + rol ebx,5 + paddd xmm10,xmm4 + and esi,edx + xor edx,ebp + psrldq xmm9,4 + xor esi,ebp + add eax,ebx + pxor xmm5,xmm1 + ror ecx,7 + add eax,esi + pxor xmm9,xmm3 + add ebp,DWORD PTR[20+rsp] + aesenc xmm11,xmm15 + movups xmm14,XMMWORD PTR[80+r15] + xor ecx,edx + mov esi,eax + rol eax,5 + pxor xmm5,xmm9 + and edi,ecx + xor ecx,edx + movdqa XMMWORD PTR[rsp],xmm10 + xor edi,edx + add ebp,eax + movdqa xmm8,xmm5 + movdqa xmm9,xmm5 + ror ebx,7 + add ebp,edi + add edx,DWORD PTR[24+rsp] + xor ebx,ecx + pslldq xmm8,12 + paddd xmm5,xmm5 + mov edi,ebp + rol ebp,5 + and esi,ebx + xor ebx,ecx + psrld xmm9,31 + xor esi,ecx + aesenc xmm11,xmm14 + movups xmm15,XMMWORD PTR[96+r15] + add edx,ebp + movdqa xmm10,xmm8 + ror eax,7 + add edx,esi + psrld xmm8,30 + por xmm5,xmm9 + add ecx,DWORD PTR[28+rsp] + xor eax,ebx + mov esi,edx + rol edx,5 + pslld xmm10,2 + pxor xmm5,xmm8 + and edi,eax + xor eax,ebx + movdqa xmm8,XMMWORD PTR[16+r11] + xor edi,ebx + add ecx,edx + pxor xmm5,xmm10 + ror ebp,7 + add ecx,edi + movdqa xmm6,xmm3 + add ebx,DWORD PTR[32+rsp] + xor ebp,eax + movdqa xmm10,xmm5 +DB 102,15,58,15,242,8 + mov edi,ecx + rol ecx,5 + aesenc xmm11,xmm15 + movups xmm14,XMMWORD PTR[112+r15] + paddd xmm8,xmm5 + and esi,ebp + xor ebp,eax + psrldq xmm10,4 + xor esi,eax + add ebx,ecx + pxor xmm6,xmm2 + ror edx,7 + add ebx,esi + pxor xmm10,xmm4 + add eax,DWORD PTR[36+rsp] + xor edx,ebp + mov esi,ebx + rol ebx,5 + pxor xmm6,xmm10 + and edi,edx + xor edx,ebp + movdqa XMMWORD PTR[16+rsp],xmm8 + xor edi,ebp + add eax,ebx + movdqa xmm9,xmm6 + movdqa xmm10,xmm6 + ror ecx,7 + add eax,edi + add ebp,DWORD PTR[40+rsp] + aesenc xmm11,xmm14 + movups xmm15,XMMWORD PTR[128+r15] + xor ecx,edx + pslldq xmm9,12 + paddd xmm6,xmm6 + mov edi,eax + rol eax,5 + and esi,ecx + xor ecx,edx + psrld xmm10,31 + xor esi,edx + add ebp,eax + movdqa xmm8,xmm9 + ror ebx,7 + add ebp,esi + psrld xmm9,30 + por xmm6,xmm10 + add edx,DWORD PTR[44+rsp] + xor ebx,ecx + mov esi,ebp + rol ebp,5 + pslld xmm8,2 + pxor xmm6,xmm9 + and edi,ebx + xor ebx,ecx + movdqa xmm9,XMMWORD PTR[16+r11] + xor edi,ecx + aesenc xmm11,xmm15 + movups xmm14,XMMWORD PTR[144+r15] + add edx,ebp + pxor xmm6,xmm8 + ror eax,7 + add edx,edi + movdqa xmm7,xmm4 + add ecx,DWORD PTR[48+rsp] + xor eax,ebx + movdqa xmm8,xmm6 +DB 102,15,58,15,251,8 + mov edi,edx + rol edx,5 + paddd xmm9,xmm6 + and esi,eax + xor eax,ebx + psrldq xmm8,4 + xor esi,ebx + add ecx,edx + pxor xmm7,xmm3 + ror ebp,7 + add ecx,esi + pxor xmm8,xmm5 + add ebx,DWORD PTR[52+rsp] + xor ebp,eax + mov esi,ecx + rol ecx,5 + aesenc xmm11,xmm14 + movups xmm15,XMMWORD PTR[160+r15] + pxor xmm7,xmm8 + and edi,ebp + xor ebp,eax + movdqa XMMWORD PTR[32+rsp],xmm9 + xor edi,eax + add ebx,ecx + movdqa xmm10,xmm7 + movdqa xmm8,xmm7 + ror edx,7 + add ebx,edi + add eax,DWORD PTR[56+rsp] + xor edx,ebp + pslldq xmm10,12 + paddd xmm7,xmm7 + mov edi,ebx + rol ebx,5 + and esi,edx + xor edx,ebp + psrld xmm8,31 + xor esi,ebp + add eax,ebx + movdqa xmm9,xmm10 + ror ecx,7 + add eax,esi + psrld xmm10,30 + por xmm7,xmm8 + add ebp,DWORD PTR[60+rsp] + cmp r8d,11 + jb $L$aesenclast1 + movups xmm14,XMMWORD PTR[176+r15] + aesenc xmm11,xmm15 + movups xmm15,XMMWORD PTR[192+r15] + aesenc xmm11,xmm14 + je $L$aesenclast1 + movups xmm14,XMMWORD PTR[208+r15] + aesenc xmm11,xmm15 + movups xmm15,XMMWORD PTR[224+r15] + aesenc xmm11,xmm14 +$L$aesenclast1:: + aesenclast xmm11,xmm15 + movups xmm14,XMMWORD PTR[16+r15] + xor ecx,edx + mov esi,eax + rol eax,5 + pslld xmm9,2 + pxor xmm7,xmm10 + and edi,ecx + xor ecx,edx + movdqa xmm10,XMMWORD PTR[16+r11] + xor edi,edx + add ebp,eax + pxor xmm7,xmm9 + ror ebx,7 + add ebp,edi + movdqa xmm9,xmm7 + add edx,DWORD PTR[rsp] + pxor xmm0,xmm4 +DB 102,68,15,58,15,206,8 + xor ebx,ecx + mov edi,ebp + rol ebp,5 + pxor xmm0,xmm1 + and esi,ebx + xor ebx,ecx + movdqa xmm8,xmm10 + paddd xmm10,xmm7 + xor esi,ecx + movups xmm12,XMMWORD PTR[16+r12] + xorps xmm12,xmm13 + movups XMMWORD PTR[r12*1+r13],xmm11 + xorps xmm11,xmm12 + aesenc xmm11,xmm14 + movups xmm15,XMMWORD PTR[32+r15] + add edx,ebp + pxor xmm0,xmm9 + ror eax,7 + add edx,esi + add ecx,DWORD PTR[4+rsp] + xor eax,ebx + movdqa xmm9,xmm0 + movdqa XMMWORD PTR[48+rsp],xmm10 + mov esi,edx + rol edx,5 + and edi,eax + xor eax,ebx + pslld xmm0,2 + xor edi,ebx + add ecx,edx + psrld xmm9,30 + ror ebp,7 + add ecx,edi + add ebx,DWORD PTR[8+rsp] + xor ebp,eax + mov edi,ecx + rol ecx,5 + aesenc xmm11,xmm15 + movups xmm14,XMMWORD PTR[48+r15] + por xmm0,xmm9 + and esi,ebp + xor ebp,eax + movdqa xmm10,xmm0 + xor esi,eax + add ebx,ecx + ror edx,7 + add ebx,esi + add eax,DWORD PTR[12+rsp] + xor edx,ebp + mov esi,ebx + rol ebx,5 + and edi,edx + xor edx,ebp + xor edi,ebp + add eax,ebx + ror ecx,7 + add eax,edi + add ebp,DWORD PTR[16+rsp] + aesenc xmm11,xmm14 + movups xmm15,XMMWORD PTR[64+r15] + pxor xmm1,xmm5 +DB 102,68,15,58,15,215,8 + xor esi,edx + mov edi,eax + rol eax,5 + pxor xmm1,xmm2 + xor esi,ecx + add ebp,eax + movdqa xmm9,xmm8 + paddd xmm8,xmm0 + ror ebx,7 + add ebp,esi + pxor xmm1,xmm10 + add edx,DWORD PTR[20+rsp] + xor edi,ecx + mov esi,ebp + rol ebp,5 + movdqa xmm10,xmm1 + movdqa XMMWORD PTR[rsp],xmm8 + xor edi,ebx + add edx,ebp + ror eax,7 + add edx,edi + pslld xmm1,2 + add ecx,DWORD PTR[24+rsp] + xor esi,ebx + psrld xmm10,30 + mov edi,edx + rol edx,5 + xor esi,eax + aesenc xmm11,xmm15 + movups xmm14,XMMWORD PTR[80+r15] + add ecx,edx + ror ebp,7 + add ecx,esi + por xmm1,xmm10 + add ebx,DWORD PTR[28+rsp] + xor edi,eax + movdqa xmm8,xmm1 + mov esi,ecx + rol ecx,5 + xor edi,ebp + add ebx,ecx + ror edx,7 + add ebx,edi + add eax,DWORD PTR[32+rsp] + pxor xmm2,xmm6 +DB 102,68,15,58,15,192,8 + xor esi,ebp + mov edi,ebx + rol ebx,5 + pxor xmm2,xmm3 + xor esi,edx + add eax,ebx + movdqa xmm10,XMMWORD PTR[32+r11] + paddd xmm9,xmm1 + ror ecx,7 + add eax,esi + pxor xmm2,xmm8 + add ebp,DWORD PTR[36+rsp] + aesenc xmm11,xmm14 + movups xmm15,XMMWORD PTR[96+r15] + xor edi,edx + mov esi,eax + rol eax,5 + movdqa xmm8,xmm2 + movdqa XMMWORD PTR[16+rsp],xmm9 + xor edi,ecx + add ebp,eax + ror ebx,7 + add ebp,edi + pslld xmm2,2 + add edx,DWORD PTR[40+rsp] + xor esi,ecx + psrld xmm8,30 + mov edi,ebp + rol ebp,5 + xor esi,ebx + add edx,ebp + ror eax,7 + add edx,esi + por xmm2,xmm8 + add ecx,DWORD PTR[44+rsp] + xor edi,ebx + movdqa xmm9,xmm2 + mov esi,edx + rol edx,5 + xor edi,eax + aesenc xmm11,xmm15 + movups xmm14,XMMWORD PTR[112+r15] + add ecx,edx + ror ebp,7 + add ecx,edi + add ebx,DWORD PTR[48+rsp] + pxor xmm3,xmm7 +DB 102,68,15,58,15,201,8 + xor esi,eax + mov edi,ecx + rol ecx,5 + pxor xmm3,xmm4 + xor esi,ebp + add ebx,ecx + movdqa xmm8,xmm10 + paddd xmm10,xmm2 + ror edx,7 + add ebx,esi + pxor xmm3,xmm9 + add eax,DWORD PTR[52+rsp] + xor edi,ebp + mov esi,ebx + rol ebx,5 + movdqa xmm9,xmm3 + movdqa XMMWORD PTR[32+rsp],xmm10 + xor edi,edx + add eax,ebx + ror ecx,7 + add eax,edi + pslld xmm3,2 + add ebp,DWORD PTR[56+rsp] + aesenc xmm11,xmm14 + movups xmm15,XMMWORD PTR[128+r15] + xor esi,edx + psrld xmm9,30 + mov edi,eax + rol eax,5 + xor esi,ecx + add ebp,eax + ror ebx,7 + add ebp,esi + por xmm3,xmm9 + add edx,DWORD PTR[60+rsp] + xor edi,ecx + movdqa xmm10,xmm3 + mov esi,ebp + rol ebp,5 + xor edi,ebx + add edx,ebp + ror eax,7 + add edx,edi + add ecx,DWORD PTR[rsp] + pxor xmm4,xmm0 +DB 102,68,15,58,15,210,8 + xor esi,ebx + mov edi,edx + rol edx,5 + pxor xmm4,xmm5 + xor esi,eax + aesenc xmm11,xmm15 + movups xmm14,XMMWORD PTR[144+r15] + add ecx,edx + movdqa xmm9,xmm8 + paddd xmm8,xmm3 + ror ebp,7 + add ecx,esi + pxor xmm4,xmm10 + add ebx,DWORD PTR[4+rsp] + xor edi,eax + mov esi,ecx + rol ecx,5 + movdqa xmm10,xmm4 + movdqa XMMWORD PTR[48+rsp],xmm8 + xor edi,ebp + add ebx,ecx + ror edx,7 + add ebx,edi + pslld xmm4,2 + add eax,DWORD PTR[8+rsp] + xor esi,ebp + psrld xmm10,30 + mov edi,ebx + rol ebx,5 + xor esi,edx + add eax,ebx + ror ecx,7 + add eax,esi + por xmm4,xmm10 + add ebp,DWORD PTR[12+rsp] + aesenc xmm11,xmm14 + movups xmm15,XMMWORD PTR[160+r15] + xor edi,edx + movdqa xmm8,xmm4 + mov esi,eax + rol eax,5 + xor edi,ecx + add ebp,eax + ror ebx,7 + add ebp,edi + add edx,DWORD PTR[16+rsp] + pxor xmm5,xmm1 +DB 102,68,15,58,15,195,8 + xor esi,ecx + mov edi,ebp + rol ebp,5 + pxor xmm5,xmm6 + xor esi,ebx + add edx,ebp + movdqa xmm10,xmm9 + paddd xmm9,xmm4 + ror eax,7 + add edx,esi + pxor xmm5,xmm8 + add ecx,DWORD PTR[20+rsp] + xor edi,ebx + mov esi,edx + rol edx,5 + movdqa xmm8,xmm5 + movdqa XMMWORD PTR[rsp],xmm9 + xor edi,eax + cmp r8d,11 + jb $L$aesenclast2 + movups xmm14,XMMWORD PTR[176+r15] + aesenc xmm11,xmm15 + movups xmm15,XMMWORD PTR[192+r15] + aesenc xmm11,xmm14 + je $L$aesenclast2 + movups xmm14,XMMWORD PTR[208+r15] + aesenc xmm11,xmm15 + movups xmm15,XMMWORD PTR[224+r15] + aesenc xmm11,xmm14 +$L$aesenclast2:: + aesenclast xmm11,xmm15 + movups xmm14,XMMWORD PTR[16+r15] + add ecx,edx + ror ebp,7 + add ecx,edi + pslld xmm5,2 + add ebx,DWORD PTR[24+rsp] + xor esi,eax + psrld xmm8,30 + mov edi,ecx + rol ecx,5 + xor esi,ebp + add ebx,ecx + ror edx,7 + add ebx,esi + por xmm5,xmm8 + add eax,DWORD PTR[28+rsp] + xor edi,ebp + movdqa xmm9,xmm5 + mov esi,ebx + rol ebx,5 + xor edi,edx + add eax,ebx + ror ecx,7 + add eax,edi + mov edi,ecx + movups xmm12,XMMWORD PTR[32+r12] + xorps xmm12,xmm13 + movups XMMWORD PTR[16+r12*1+r13],xmm11 + xorps xmm11,xmm12 + aesenc xmm11,xmm14 + movups xmm15,XMMWORD PTR[32+r15] + pxor xmm6,xmm2 +DB 102,68,15,58,15,204,8 + xor ecx,edx + add ebp,DWORD PTR[32+rsp] + and edi,edx + pxor xmm6,xmm7 + and esi,ecx + ror ebx,7 + movdqa xmm8,xmm10 + paddd xmm10,xmm5 + add ebp,edi + mov edi,eax + pxor xmm6,xmm9 + rol eax,5 + add ebp,esi + xor ecx,edx + add ebp,eax + movdqa xmm9,xmm6 + movdqa XMMWORD PTR[16+rsp],xmm10 + mov esi,ebx + xor ebx,ecx + add edx,DWORD PTR[36+rsp] + and esi,ecx + pslld xmm6,2 + and edi,ebx + ror eax,7 + psrld xmm9,30 + add edx,esi + mov esi,ebp + rol ebp,5 + aesenc xmm11,xmm15 + movups xmm14,XMMWORD PTR[48+r15] + add edx,edi + xor ebx,ecx + add edx,ebp + por xmm6,xmm9 + mov edi,eax + xor eax,ebx + movdqa xmm10,xmm6 + add ecx,DWORD PTR[40+rsp] + and edi,ebx + and esi,eax + ror ebp,7 + add ecx,edi + mov edi,edx + rol edx,5 + add ecx,esi + xor eax,ebx + add ecx,edx + mov esi,ebp + xor ebp,eax + add ebx,DWORD PTR[44+rsp] + and esi,eax + and edi,ebp + aesenc xmm11,xmm14 + movups xmm15,XMMWORD PTR[64+r15] + ror edx,7 + add ebx,esi + mov esi,ecx + rol ecx,5 + add ebx,edi + xor ebp,eax + add ebx,ecx + mov edi,edx + pxor xmm7,xmm3 +DB 102,68,15,58,15,213,8 + xor edx,ebp + add eax,DWORD PTR[48+rsp] + and edi,ebp + pxor xmm7,xmm0 + and esi,edx + ror ecx,7 + movdqa xmm9,XMMWORD PTR[48+r11] + paddd xmm8,xmm6 + add eax,edi + mov edi,ebx + pxor xmm7,xmm10 + rol ebx,5 + add eax,esi + xor edx,ebp + add eax,ebx + movdqa xmm10,xmm7 + movdqa XMMWORD PTR[32+rsp],xmm8 + mov esi,ecx + aesenc xmm11,xmm15 + movups xmm14,XMMWORD PTR[80+r15] + xor ecx,edx + add ebp,DWORD PTR[52+rsp] + and esi,edx + pslld xmm7,2 + and edi,ecx + ror ebx,7 + psrld xmm10,30 + add ebp,esi + mov esi,eax + rol eax,5 + add ebp,edi + xor ecx,edx + add ebp,eax + por xmm7,xmm10 + mov edi,ebx + xor ebx,ecx + movdqa xmm8,xmm7 + add edx,DWORD PTR[56+rsp] + and edi,ecx + and esi,ebx + ror eax,7 + add edx,edi + mov edi,ebp + rol ebp,5 + aesenc xmm11,xmm14 + movups xmm15,XMMWORD PTR[96+r15] + add edx,esi + xor ebx,ecx + add edx,ebp + mov esi,eax + xor eax,ebx + add ecx,DWORD PTR[60+rsp] + and esi,ebx + and edi,eax + ror ebp,7 + add ecx,esi + mov esi,edx + rol edx,5 + add ecx,edi + xor eax,ebx + add ecx,edx + mov edi,ebp + pxor xmm0,xmm4 +DB 102,68,15,58,15,198,8 + xor ebp,eax + add ebx,DWORD PTR[rsp] + and edi,eax + pxor xmm0,xmm1 + and esi,ebp + aesenc xmm11,xmm15 + movups xmm14,XMMWORD PTR[112+r15] + ror edx,7 + movdqa xmm10,xmm9 + paddd xmm9,xmm7 + add ebx,edi + mov edi,ecx + pxor xmm0,xmm8 + rol ecx,5 + add ebx,esi + xor ebp,eax + add ebx,ecx + movdqa xmm8,xmm0 + movdqa XMMWORD PTR[48+rsp],xmm9 + mov esi,edx + xor edx,ebp + add eax,DWORD PTR[4+rsp] + and esi,ebp + pslld xmm0,2 + and edi,edx + ror ecx,7 + psrld xmm8,30 + add eax,esi + mov esi,ebx + rol ebx,5 + add eax,edi + xor edx,ebp + add eax,ebx + por xmm0,xmm8 + mov edi,ecx + aesenc xmm11,xmm14 + movups xmm15,XMMWORD PTR[128+r15] + xor ecx,edx + movdqa xmm9,xmm0 + add ebp,DWORD PTR[8+rsp] + and edi,edx + and esi,ecx + ror ebx,7 + add ebp,edi + mov edi,eax + rol eax,5 + add ebp,esi + xor ecx,edx + add ebp,eax + mov esi,ebx + xor ebx,ecx + add edx,DWORD PTR[12+rsp] + and esi,ecx + and edi,ebx + ror eax,7 + add edx,esi + mov esi,ebp + rol ebp,5 + aesenc xmm11,xmm15 + movups xmm14,XMMWORD PTR[144+r15] + add edx,edi + xor ebx,ecx + add edx,ebp + mov edi,eax + pxor xmm1,xmm5 +DB 102,68,15,58,15,207,8 + xor eax,ebx + add ecx,DWORD PTR[16+rsp] + and edi,ebx + pxor xmm1,xmm2 + and esi,eax + ror ebp,7 + movdqa xmm8,xmm10 + paddd xmm10,xmm0 + add ecx,edi + mov edi,edx + pxor xmm1,xmm9 + rol edx,5 + add ecx,esi + xor eax,ebx + add ecx,edx + movdqa xmm9,xmm1 + movdqa XMMWORD PTR[rsp],xmm10 + mov esi,ebp + xor ebp,eax + add ebx,DWORD PTR[20+rsp] + and esi,eax + pslld xmm1,2 + and edi,ebp + aesenc xmm11,xmm14 + movups xmm15,XMMWORD PTR[160+r15] + ror edx,7 + psrld xmm9,30 + add ebx,esi + mov esi,ecx + rol ecx,5 + add ebx,edi + xor ebp,eax + add ebx,ecx + por xmm1,xmm9 + mov edi,edx + xor edx,ebp + movdqa xmm10,xmm1 + add eax,DWORD PTR[24+rsp] + and edi,ebp + and esi,edx + ror ecx,7 + add eax,edi + mov edi,ebx + rol ebx,5 + add eax,esi + xor edx,ebp + add eax,ebx + mov esi,ecx + cmp r8d,11 + jb $L$aesenclast3 + movups xmm14,XMMWORD PTR[176+r15] + aesenc xmm11,xmm15 + movups xmm15,XMMWORD PTR[192+r15] + aesenc xmm11,xmm14 + je $L$aesenclast3 + movups xmm14,XMMWORD PTR[208+r15] + aesenc xmm11,xmm15 + movups xmm15,XMMWORD PTR[224+r15] + aesenc xmm11,xmm14 +$L$aesenclast3:: + aesenclast xmm11,xmm15 + movups xmm14,XMMWORD PTR[16+r15] + xor ecx,edx + add ebp,DWORD PTR[28+rsp] + and esi,edx + and edi,ecx + ror ebx,7 + add ebp,esi + mov esi,eax + rol eax,5 + add ebp,edi + xor ecx,edx + add ebp,eax + mov edi,ebx + pxor xmm2,xmm6 +DB 102,68,15,58,15,208,8 + xor ebx,ecx + add edx,DWORD PTR[32+rsp] + and edi,ecx + pxor xmm2,xmm3 + and esi,ebx + ror eax,7 + movdqa xmm9,xmm8 + paddd xmm8,xmm1 + add edx,edi + mov edi,ebp + pxor xmm2,xmm10 + rol ebp,5 + movups xmm12,XMMWORD PTR[48+r12] + xorps xmm12,xmm13 + movups XMMWORD PTR[32+r12*1+r13],xmm11 + xorps xmm11,xmm12 + aesenc xmm11,xmm14 + movups xmm15,XMMWORD PTR[32+r15] + add edx,esi + xor ebx,ecx + add edx,ebp + movdqa xmm10,xmm2 + movdqa XMMWORD PTR[16+rsp],xmm8 + mov esi,eax + xor eax,ebx + add ecx,DWORD PTR[36+rsp] + and esi,ebx + pslld xmm2,2 + and edi,eax + ror ebp,7 + psrld xmm10,30 + add ecx,esi + mov esi,edx + rol edx,5 + add ecx,edi + xor eax,ebx + add ecx,edx + por xmm2,xmm10 + mov edi,ebp + xor ebp,eax + movdqa xmm8,xmm2 + add ebx,DWORD PTR[40+rsp] + and edi,eax + and esi,ebp + aesenc xmm11,xmm15 + movups xmm14,XMMWORD PTR[48+r15] + ror edx,7 + add ebx,edi + mov edi,ecx + rol ecx,5 + add ebx,esi + xor ebp,eax + add ebx,ecx + mov esi,edx + xor edx,ebp + add eax,DWORD PTR[44+rsp] + and esi,ebp + and edi,edx + ror ecx,7 + add eax,esi + mov esi,ebx + rol ebx,5 + add eax,edi + xor edx,ebp + add eax,ebx + add ebp,DWORD PTR[48+rsp] + aesenc xmm11,xmm14 + movups xmm15,XMMWORD PTR[64+r15] + pxor xmm3,xmm7 +DB 102,68,15,58,15,193,8 + xor esi,edx + mov edi,eax + rol eax,5 + pxor xmm3,xmm4 + xor esi,ecx + add ebp,eax + movdqa xmm10,xmm9 + paddd xmm9,xmm2 + ror ebx,7 + add ebp,esi + pxor xmm3,xmm8 + add edx,DWORD PTR[52+rsp] + xor edi,ecx + mov esi,ebp + rol ebp,5 + movdqa xmm8,xmm3 + movdqa XMMWORD PTR[32+rsp],xmm9 + xor edi,ebx + add edx,ebp + ror eax,7 + add edx,edi + pslld xmm3,2 + add ecx,DWORD PTR[56+rsp] + xor esi,ebx + psrld xmm8,30 + mov edi,edx + rol edx,5 + xor esi,eax + aesenc xmm11,xmm15 + movups xmm14,XMMWORD PTR[80+r15] + add ecx,edx + ror ebp,7 + add ecx,esi + por xmm3,xmm8 + add ebx,DWORD PTR[60+rsp] + xor edi,eax + mov esi,ecx + rol ecx,5 + xor edi,ebp + add ebx,ecx + ror edx,7 + add ebx,edi + add eax,DWORD PTR[rsp] + paddd xmm10,xmm3 + xor esi,ebp + mov edi,ebx + rol ebx,5 + xor esi,edx + movdqa XMMWORD PTR[48+rsp],xmm10 + add eax,ebx + ror ecx,7 + add eax,esi + add ebp,DWORD PTR[4+rsp] + aesenc xmm11,xmm14 + movups xmm15,XMMWORD PTR[96+r15] + xor edi,edx + mov esi,eax + rol eax,5 + xor edi,ecx + add ebp,eax + ror ebx,7 + add ebp,edi + add edx,DWORD PTR[8+rsp] + xor esi,ecx + mov edi,ebp + rol ebp,5 + xor esi,ebx + add edx,ebp + ror eax,7 + add edx,esi + add ecx,DWORD PTR[12+rsp] + xor edi,ebx + mov esi,edx + rol edx,5 + xor edi,eax + aesenc xmm11,xmm15 + movups xmm14,XMMWORD PTR[112+r15] + add ecx,edx + ror ebp,7 + add ecx,edi + cmp r10,r14 + je $L$done_ssse3 + movdqa xmm6,XMMWORD PTR[64+r11] + movdqa xmm9,XMMWORD PTR[r11] + movdqu xmm0,XMMWORD PTR[r10] + movdqu xmm1,XMMWORD PTR[16+r10] + movdqu xmm2,XMMWORD PTR[32+r10] + movdqu xmm3,XMMWORD PTR[48+r10] +DB 102,15,56,0,198 + add r10,64 + add ebx,DWORD PTR[16+rsp] + xor esi,eax +DB 102,15,56,0,206 + mov edi,ecx + rol ecx,5 + paddd xmm0,xmm9 + xor esi,ebp + add ebx,ecx + ror edx,7 + add ebx,esi + movdqa XMMWORD PTR[rsp],xmm0 + add eax,DWORD PTR[20+rsp] + xor edi,ebp + psubd xmm0,xmm9 + mov esi,ebx + rol ebx,5 + xor edi,edx + add eax,ebx + ror ecx,7 + add eax,edi + add ebp,DWORD PTR[24+rsp] + aesenc xmm11,xmm14 + movups xmm15,XMMWORD PTR[128+r15] + xor esi,edx + mov edi,eax + rol eax,5 + xor esi,ecx + add ebp,eax + ror ebx,7 + add ebp,esi + add edx,DWORD PTR[28+rsp] + xor edi,ecx + mov esi,ebp + rol ebp,5 + xor edi,ebx + add edx,ebp + ror eax,7 + add edx,edi + add ecx,DWORD PTR[32+rsp] + xor esi,ebx +DB 102,15,56,0,214 + mov edi,edx + rol edx,5 + paddd xmm1,xmm9 + xor esi,eax + aesenc xmm11,xmm15 + movups xmm14,XMMWORD PTR[144+r15] + add ecx,edx + ror ebp,7 + add ecx,esi + movdqa XMMWORD PTR[16+rsp],xmm1 + add ebx,DWORD PTR[36+rsp] + xor edi,eax + psubd xmm1,xmm9 + mov esi,ecx + rol ecx,5 + xor edi,ebp + add ebx,ecx + ror edx,7 + add ebx,edi + add eax,DWORD PTR[40+rsp] + xor esi,ebp + mov edi,ebx + rol ebx,5 + xor esi,edx + add eax,ebx + ror ecx,7 + add eax,esi + add ebp,DWORD PTR[44+rsp] + aesenc xmm11,xmm14 + movups xmm15,XMMWORD PTR[160+r15] + xor edi,edx + mov esi,eax + rol eax,5 + xor edi,ecx + add ebp,eax + ror ebx,7 + add ebp,edi + add edx,DWORD PTR[48+rsp] + xor esi,ecx +DB 102,15,56,0,222 + mov edi,ebp + rol ebp,5 + paddd xmm2,xmm9 + xor esi,ebx + add edx,ebp + ror eax,7 + add edx,esi + movdqa XMMWORD PTR[32+rsp],xmm2 + add ecx,DWORD PTR[52+rsp] + xor edi,ebx + psubd xmm2,xmm9 + mov esi,edx + rol edx,5 + xor edi,eax + cmp r8d,11 + jb $L$aesenclast4 + movups xmm14,XMMWORD PTR[176+r15] + aesenc xmm11,xmm15 + movups xmm15,XMMWORD PTR[192+r15] + aesenc xmm11,xmm14 + je $L$aesenclast4 + movups xmm14,XMMWORD PTR[208+r15] + aesenc xmm11,xmm15 + movups xmm15,XMMWORD PTR[224+r15] + aesenc xmm11,xmm14 +$L$aesenclast4:: + aesenclast xmm11,xmm15 + movups xmm14,XMMWORD PTR[16+r15] + add ecx,edx + ror ebp,7 + add ecx,edi + add ebx,DWORD PTR[56+rsp] + xor esi,eax + mov edi,ecx + rol ecx,5 + xor esi,ebp + add ebx,ecx + ror edx,7 + add ebx,esi + add eax,DWORD PTR[60+rsp] + xor edi,ebp + mov esi,ebx + rol ebx,5 + xor edi,edx + add eax,ebx + ror ecx,7 + add eax,edi + movups XMMWORD PTR[48+r12*1+r13],xmm11 + lea r12,QWORD PTR[64+r12] + + add eax,DWORD PTR[r9] + add esi,DWORD PTR[4+r9] + add ecx,DWORD PTR[8+r9] + add edx,DWORD PTR[12+r9] + mov DWORD PTR[r9],eax + add ebp,DWORD PTR[16+r9] + mov DWORD PTR[4+r9],esi + mov ebx,esi + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + mov DWORD PTR[16+r9],ebp + jmp $L$oop_ssse3 + +ALIGN 16 +$L$done_ssse3:: + add ebx,DWORD PTR[16+rsp] + xor esi,eax + mov edi,ecx + rol ecx,5 + xor esi,ebp + add ebx,ecx + ror edx,7 + add ebx,esi + add eax,DWORD PTR[20+rsp] + xor edi,ebp + mov esi,ebx + rol ebx,5 + xor edi,edx + add eax,ebx + ror ecx,7 + add eax,edi + add ebp,DWORD PTR[24+rsp] + aesenc xmm11,xmm14 + movups xmm15,XMMWORD PTR[128+r15] + xor esi,edx + mov edi,eax + rol eax,5 + xor esi,ecx + add ebp,eax + ror ebx,7 + add ebp,esi + add edx,DWORD PTR[28+rsp] + xor edi,ecx + mov esi,ebp + rol ebp,5 + xor edi,ebx + add edx,ebp + ror eax,7 + add edx,edi + add ecx,DWORD PTR[32+rsp] + xor esi,ebx + mov edi,edx + rol edx,5 + xor esi,eax + aesenc xmm11,xmm15 + movups xmm14,XMMWORD PTR[144+r15] + add ecx,edx + ror ebp,7 + add ecx,esi + add ebx,DWORD PTR[36+rsp] + xor edi,eax + mov esi,ecx + rol ecx,5 + xor edi,ebp + add ebx,ecx + ror edx,7 + add ebx,edi + add eax,DWORD PTR[40+rsp] + xor esi,ebp + mov edi,ebx + rol ebx,5 + xor esi,edx + add eax,ebx + ror ecx,7 + add eax,esi + add ebp,DWORD PTR[44+rsp] + aesenc xmm11,xmm14 + movups xmm15,XMMWORD PTR[160+r15] + xor edi,edx + mov esi,eax + rol eax,5 + xor edi,ecx + add ebp,eax + ror ebx,7 + add ebp,edi + add edx,DWORD PTR[48+rsp] + xor esi,ecx + mov edi,ebp + rol ebp,5 + xor esi,ebx + add edx,ebp + ror eax,7 + add edx,esi + add ecx,DWORD PTR[52+rsp] + xor edi,ebx + mov esi,edx + rol edx,5 + xor edi,eax + cmp r8d,11 + jb $L$aesenclast5 + movups xmm14,XMMWORD PTR[176+r15] + aesenc xmm11,xmm15 + movups xmm15,XMMWORD PTR[192+r15] + aesenc xmm11,xmm14 + je $L$aesenclast5 + movups xmm14,XMMWORD PTR[208+r15] + aesenc xmm11,xmm15 + movups xmm15,XMMWORD PTR[224+r15] + aesenc xmm11,xmm14 +$L$aesenclast5:: + aesenclast xmm11,xmm15 + movups xmm14,XMMWORD PTR[16+r15] + add ecx,edx + ror ebp,7 + add ecx,edi + add ebx,DWORD PTR[56+rsp] + xor esi,eax + mov edi,ecx + rol ecx,5 + xor esi,ebp + add ebx,ecx + ror edx,7 + add ebx,esi + add eax,DWORD PTR[60+rsp] + xor edi,ebp + mov esi,ebx + rol ebx,5 + xor edi,edx + add eax,ebx + ror ecx,7 + add eax,edi + movups XMMWORD PTR[48+r12*1+r13],xmm11 + mov r8,QWORD PTR[88+rsp] + + add eax,DWORD PTR[r9] + add esi,DWORD PTR[4+r9] + add ecx,DWORD PTR[8+r9] + mov DWORD PTR[r9],eax + add edx,DWORD PTR[12+r9] + mov DWORD PTR[4+r9],esi + add ebp,DWORD PTR[16+r9] + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + mov DWORD PTR[16+r9],ebp + movups XMMWORD PTR[r8],xmm11 + movaps xmm6,XMMWORD PTR[((96+0))+rsp] + movaps xmm7,XMMWORD PTR[((96+16))+rsp] + movaps xmm8,XMMWORD PTR[((96+32))+rsp] + movaps xmm9,XMMWORD PTR[((96+48))+rsp] + movaps xmm10,XMMWORD PTR[((96+64))+rsp] + movaps xmm11,XMMWORD PTR[((96+80))+rsp] + movaps xmm12,XMMWORD PTR[((96+96))+rsp] + movaps xmm13,XMMWORD PTR[((96+112))+rsp] + movaps xmm14,XMMWORD PTR[((96+128))+rsp] + movaps xmm15,XMMWORD PTR[((96+144))+rsp] + lea rsi,QWORD PTR[264+rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$epilogue_ssse3:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_cbc_sha1_enc_ssse3:: +aesni_cbc_sha1_enc_ssse3 ENDP +ALIGN 64 +K_XX_XX:: + DD 05a827999h,05a827999h,05a827999h,05a827999h + DD 06ed9eba1h,06ed9eba1h,06ed9eba1h,06ed9eba1h + DD 08f1bbcdch,08f1bbcdch,08f1bbcdch,08f1bbcdch + DD 0ca62c1d6h,0ca62c1d6h,0ca62c1d6h,0ca62c1d6h + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh + +DB 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115 +DB 116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52 +DB 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 +DB 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 +DB 114,103,62,0 +ALIGN 64 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +ssse3_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + lea rsi,QWORD PTR[96+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + lea rax,QWORD PTR[264+rax] + + mov r15,QWORD PTR[rax] + mov r14,QWORD PTR[8+rax] + mov r13,QWORD PTR[16+rax] + mov r12,QWORD PTR[24+rax] + mov rbp,QWORD PTR[32+rax] + mov rbx,QWORD PTR[40+rax] + lea rax,QWORD PTR[48+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$common_seh_tail:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +ssse3_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_aesni_cbc_sha1_enc_ssse3 + DD imagerel $L$SEH_end_aesni_cbc_sha1_enc_ssse3 + DD imagerel $L$SEH_info_aesni_cbc_sha1_enc_ssse3 +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_aesni_cbc_sha1_enc_ssse3:: +DB 9,0,0,0 + DD imagerel ssse3_handler + DD imagerel $L$prologue_ssse3,imagerel $L$epilogue_ssse3 + +.xdata ENDS +END + diff --git a/ext/libressl/crypto/aes/aesni-sha1-mingw64-x86_64.S b/ext/libressl/crypto/aes/aesni-sha1-mingw64-x86_64.S new file mode 100644 index 0000000..c7a2d5c --- /dev/null +++ b/ext/libressl/crypto/aes/aesni-sha1-mingw64-x86_64.S @@ -0,0 +1,1536 @@ +#include "x86_arch.h" +.text + + + +.globl aesni_cbc_sha1_enc +.def aesni_cbc_sha1_enc; .scl 2; .type 32; .endef +.p2align 4 +aesni_cbc_sha1_enc: + + movl OPENSSL_ia32cap_P+0(%rip),%r10d + movl OPENSSL_ia32cap_P+4(%rip),%r11d + jmp aesni_cbc_sha1_enc_ssse3 + retq + +.def aesni_cbc_sha1_enc_ssse3; .scl 3; .type 32; .endef +.p2align 4 +aesni_cbc_sha1_enc_ssse3: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_aesni_cbc_sha1_enc_ssse3: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + movq 40(%rsp),%r8 + movq 48(%rsp),%r9 + + movq 56(%rsp),%r10 + + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -264(%rsp),%rsp + + + movaps %xmm6,96+0(%rsp) + movaps %xmm7,96+16(%rsp) + movaps %xmm8,96+32(%rsp) + movaps %xmm9,96+48(%rsp) + movaps %xmm10,96+64(%rsp) + movaps %xmm11,96+80(%rsp) + movaps %xmm12,96+96(%rsp) + movaps %xmm13,96+112(%rsp) + movaps %xmm14,96+128(%rsp) + movaps %xmm15,96+144(%rsp) +.Lprologue_ssse3: + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + movdqu (%r8),%xmm11 + movq %r8,88(%rsp) + shlq $6,%r14 + subq %r12,%r13 + movl 240(%r15),%r8d + addq %r10,%r14 + + leaq K_XX_XX(%rip),%r11 + movl 0(%r9),%eax + movl 4(%r9),%ebx + movl 8(%r9),%ecx + movl 12(%r9),%edx + movl %ebx,%esi + movl 16(%r9),%ebp + + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r10),%xmm0 + movdqu 16(%r10),%xmm1 + movdqu 32(%r10),%xmm2 + movdqu 48(%r10),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r10 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 +.byte 102,15,56,0,222 + paddd %xmm9,%xmm0 + paddd %xmm9,%xmm1 + paddd %xmm9,%xmm2 + movdqa %xmm0,0(%rsp) + psubd %xmm9,%xmm0 + movdqa %xmm1,16(%rsp) + psubd %xmm9,%xmm1 + movdqa %xmm2,32(%rsp) + psubd %xmm9,%xmm2 + movups (%r15),%xmm13 + movups 16(%r15),%xmm14 + jmp .Loop_ssse3 +.p2align 4 +.Loop_ssse3: + movdqa %xmm1,%xmm4 + addl 0(%rsp),%ebp + movups 0(%r12),%xmm12 + xorps %xmm13,%xmm12 + xorps %xmm12,%xmm11 + aesenc %xmm14,%xmm11 + movups 32(%r15),%xmm15 + xorl %edx,%ecx + movdqa %xmm3,%xmm8 +.byte 102,15,58,15,224,8 + movl %eax,%edi + roll $5,%eax + paddd %xmm3,%xmm9 + andl %ecx,%esi + xorl %edx,%ecx + psrldq $4,%xmm8 + xorl %edx,%esi + addl %eax,%ebp + pxor %xmm0,%xmm4 + rorl $2,%ebx + addl %esi,%ebp + pxor %xmm2,%xmm8 + addl 4(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pxor %xmm8,%xmm4 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm9,48(%rsp) + xorl %ecx,%edi + aesenc %xmm15,%xmm11 + movups 48(%r15),%xmm14 + addl %ebp,%edx + movdqa %xmm4,%xmm10 + movdqa %xmm4,%xmm8 + rorl $7,%eax + addl %edi,%edx + addl 8(%rsp),%ecx + xorl %ebx,%eax + pslldq $12,%xmm10 + paddd %xmm4,%xmm4 + movl %edx,%edi + roll $5,%edx + andl %eax,%esi + xorl %ebx,%eax + psrld $31,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + movdqa %xmm10,%xmm9 + rorl $7,%ebp + addl %esi,%ecx + psrld $30,%xmm10 + por %xmm8,%xmm4 + addl 12(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx + aesenc %xmm14,%xmm11 + movups 64(%r15),%xmm15 + pslld $2,%xmm9 + pxor %xmm10,%xmm4 + andl %ebp,%edi + xorl %eax,%ebp + movdqa 0(%r11),%xmm10 + xorl %eax,%edi + addl %ecx,%ebx + pxor %xmm9,%xmm4 + rorl $7,%edx + addl %edi,%ebx + movdqa %xmm2,%xmm5 + addl 16(%rsp),%eax + xorl %ebp,%edx + movdqa %xmm4,%xmm9 +.byte 102,15,58,15,233,8 + movl %ebx,%edi + roll $5,%ebx + paddd %xmm4,%xmm10 + andl %edx,%esi + xorl %ebp,%edx + psrldq $4,%xmm9 + xorl %ebp,%esi + addl %ebx,%eax + pxor %xmm1,%xmm5 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm3,%xmm9 + addl 20(%rsp),%ebp + aesenc %xmm15,%xmm11 + movups 80(%r15),%xmm14 + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pxor %xmm9,%xmm5 + andl %ecx,%edi + xorl %edx,%ecx + movdqa %xmm10,0(%rsp) + xorl %edx,%edi + addl %eax,%ebp + movdqa %xmm5,%xmm8 + movdqa %xmm5,%xmm9 + rorl $7,%ebx + addl %edi,%ebp + addl 24(%rsp),%edx + xorl %ecx,%ebx + pslldq $12,%xmm8 + paddd %xmm5,%xmm5 + movl %ebp,%edi + roll $5,%ebp + andl %ebx,%esi + xorl %ecx,%ebx + psrld $31,%xmm9 + xorl %ecx,%esi + aesenc %xmm14,%xmm11 + movups 96(%r15),%xmm15 + addl %ebp,%edx + movdqa %xmm8,%xmm10 + rorl $7,%eax + addl %esi,%edx + psrld $30,%xmm8 + por %xmm9,%xmm5 + addl 28(%rsp),%ecx + xorl %ebx,%eax + movl %edx,%esi + roll $5,%edx + pslld $2,%xmm10 + pxor %xmm8,%xmm5 + andl %eax,%edi + xorl %ebx,%eax + movdqa 16(%r11),%xmm8 + xorl %ebx,%edi + addl %edx,%ecx + pxor %xmm10,%xmm5 + rorl $7,%ebp + addl %edi,%ecx + movdqa %xmm3,%xmm6 + addl 32(%rsp),%ebx + xorl %eax,%ebp + movdqa %xmm5,%xmm10 +.byte 102,15,58,15,242,8 + movl %ecx,%edi + roll $5,%ecx + aesenc %xmm15,%xmm11 + movups 112(%r15),%xmm14 + paddd %xmm5,%xmm8 + andl %ebp,%esi + xorl %eax,%ebp + psrldq $4,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + pxor %xmm2,%xmm6 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm4,%xmm10 + addl 36(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + pxor %xmm10,%xmm6 + andl %edx,%edi + xorl %ebp,%edx + movdqa %xmm8,16(%rsp) + xorl %ebp,%edi + addl %ebx,%eax + movdqa %xmm6,%xmm9 + movdqa %xmm6,%xmm10 + rorl $7,%ecx + addl %edi,%eax + addl 40(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 128(%r15),%xmm15 + xorl %edx,%ecx + pslldq $12,%xmm9 + paddd %xmm6,%xmm6 + movl %eax,%edi + roll $5,%eax + andl %ecx,%esi + xorl %edx,%ecx + psrld $31,%xmm10 + xorl %edx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + psrld $30,%xmm9 + por %xmm10,%xmm6 + addl 44(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pslld $2,%xmm8 + pxor %xmm9,%xmm6 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa 16(%r11),%xmm9 + xorl %ecx,%edi + aesenc %xmm15,%xmm11 + movups 144(%r15),%xmm14 + addl %ebp,%edx + pxor %xmm8,%xmm6 + rorl $7,%eax + addl %edi,%edx + movdqa %xmm4,%xmm7 + addl 48(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm6,%xmm8 +.byte 102,15,58,15,251,8 + movl %edx,%edi + roll $5,%edx + paddd %xmm6,%xmm9 + andl %eax,%esi + xorl %ebx,%eax + psrldq $4,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + pxor %xmm3,%xmm7 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm5,%xmm8 + addl 52(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx + aesenc %xmm14,%xmm11 + movups 160(%r15),%xmm15 + pxor %xmm8,%xmm7 + andl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm9,32(%rsp) + xorl %eax,%edi + addl %ecx,%ebx + movdqa %xmm7,%xmm10 + movdqa %xmm7,%xmm8 + rorl $7,%edx + addl %edi,%ebx + addl 56(%rsp),%eax + xorl %ebp,%edx + pslldq $12,%xmm10 + paddd %xmm7,%xmm7 + movl %ebx,%edi + roll $5,%ebx + andl %edx,%esi + xorl %ebp,%edx + psrld $31,%xmm8 + xorl %ebp,%esi + addl %ebx,%eax + movdqa %xmm10,%xmm9 + rorl $7,%ecx + addl %esi,%eax + psrld $30,%xmm10 + por %xmm8,%xmm7 + addl 60(%rsp),%ebp + cmpl $11,%r8d + jb .Laesenclast1 + movups 176(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 192(%r15),%xmm15 + aesenc %xmm14,%xmm11 + je .Laesenclast1 + movups 208(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 224(%r15),%xmm15 + aesenc %xmm14,%xmm11 +.Laesenclast1: + aesenclast %xmm15,%xmm11 + movups 16(%r15),%xmm14 + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + andl %ecx,%edi + xorl %edx,%ecx + movdqa 16(%r11),%xmm10 + xorl %edx,%edi + addl %eax,%ebp + pxor %xmm9,%xmm7 + rorl $7,%ebx + addl %edi,%ebp + movdqa %xmm7,%xmm9 + addl 0(%rsp),%edx + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,206,8 + xorl %ecx,%ebx + movl %ebp,%edi + roll $5,%ebp + pxor %xmm1,%xmm0 + andl %ebx,%esi + xorl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm7,%xmm10 + xorl %ecx,%esi + movups 16(%r12),%xmm12 + xorps %xmm13,%xmm12 + movups %xmm11,0(%r13,%r12,1) + xorps %xmm12,%xmm11 + aesenc %xmm14,%xmm11 + movups 32(%r15),%xmm15 + addl %ebp,%edx + pxor %xmm9,%xmm0 + rorl $7,%eax + addl %esi,%edx + addl 4(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm0,%xmm9 + movdqa %xmm10,48(%rsp) + movl %edx,%esi + roll $5,%edx + andl %eax,%edi + xorl %ebx,%eax + pslld $2,%xmm0 + xorl %ebx,%edi + addl %edx,%ecx + psrld $30,%xmm9 + rorl $7,%ebp + addl %edi,%ecx + addl 8(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%edi + roll $5,%ecx + aesenc %xmm15,%xmm11 + movups 48(%r15),%xmm14 + por %xmm9,%xmm0 + andl %ebp,%esi + xorl %eax,%ebp + movdqa %xmm0,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + andl %edx,%edi + xorl %ebp,%edx + xorl %ebp,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 16(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 64(%r15),%xmm15 + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,215,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm2,%xmm1 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm8,%xmm9 + paddd %xmm0,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm10,%xmm1 + addl 20(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm1,%xmm10 + movdqa %xmm8,0(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm1 + addl 24(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm10 + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + aesenc %xmm15,%xmm11 + movups 80(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm10,%xmm1 + addl 28(%rsp),%ebx + xorl %eax,%edi + movdqa %xmm1,%xmm8 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 32(%rsp),%eax + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,192,8 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + pxor %xmm3,%xmm2 + xorl %edx,%esi + addl %ebx,%eax + movdqa 32(%r11),%xmm10 + paddd %xmm1,%xmm9 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm8,%xmm2 + addl 36(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 96(%r15),%xmm15 + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm8 + movdqa %xmm9,16(%rsp) + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + pslld $2,%xmm2 + addl 40(%rsp),%edx + xorl %ecx,%esi + psrld $30,%xmm8 + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + por %xmm8,%xmm2 + addl 44(%rsp),%ecx + xorl %ebx,%edi + movdqa %xmm2,%xmm9 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + aesenc %xmm15,%xmm11 + movups 112(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 48(%rsp),%ebx + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,201,8 + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + pxor %xmm4,%xmm3 + xorl %ebp,%esi + addl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm2,%xmm10 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm9,%xmm3 + addl 52(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm9 + movdqa %xmm10,32(%rsp) + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + pslld $2,%xmm3 + addl 56(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 128(%r15),%xmm15 + xorl %edx,%esi + psrld $30,%xmm9 + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + por %xmm9,%xmm3 + addl 60(%rsp),%edx + xorl %ecx,%edi + movdqa %xmm3,%xmm10 + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 0(%rsp),%ecx + pxor %xmm0,%xmm4 +.byte 102,68,15,58,15,210,8 + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + pxor %xmm5,%xmm4 + xorl %eax,%esi + aesenc %xmm15,%xmm11 + movups 144(%r15),%xmm14 + addl %edx,%ecx + movdqa %xmm8,%xmm9 + paddd %xmm3,%xmm8 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm10,%xmm4 + addl 4(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm10 + movdqa %xmm8,48(%rsp) + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + pslld $2,%xmm4 + addl 8(%rsp),%eax + xorl %ebp,%esi + psrld $30,%xmm10 + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + por %xmm10,%xmm4 + addl 12(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 160(%r15),%xmm15 + xorl %edx,%edi + movdqa %xmm4,%xmm8 + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 16(%rsp),%edx + pxor %xmm1,%xmm5 +.byte 102,68,15,58,15,195,8 + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + pxor %xmm6,%xmm5 + xorl %ebx,%esi + addl %ebp,%edx + movdqa %xmm9,%xmm10 + paddd %xmm4,%xmm9 + rorl $7,%eax + addl %esi,%edx + pxor %xmm8,%xmm5 + addl 20(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm8 + movdqa %xmm9,0(%rsp) + xorl %eax,%edi + cmpl $11,%r8d + jb .Laesenclast2 + movups 176(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 192(%r15),%xmm15 + aesenc %xmm14,%xmm11 + je .Laesenclast2 + movups 208(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 224(%r15),%xmm15 + aesenc %xmm14,%xmm11 +.Laesenclast2: + aesenclast %xmm15,%xmm11 + movups 16(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + pslld $2,%xmm5 + addl 24(%rsp),%ebx + xorl %eax,%esi + psrld $30,%xmm8 + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + por %xmm8,%xmm5 + addl 28(%rsp),%eax + xorl %ebp,%edi + movdqa %xmm5,%xmm9 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + movl %ecx,%edi + movups 32(%r12),%xmm12 + xorps %xmm13,%xmm12 + movups %xmm11,16(%r13,%r12,1) + xorps %xmm12,%xmm11 + aesenc %xmm14,%xmm11 + movups 32(%r15),%xmm15 + pxor %xmm2,%xmm6 +.byte 102,68,15,58,15,204,8 + xorl %edx,%ecx + addl 32(%rsp),%ebp + andl %edx,%edi + pxor %xmm7,%xmm6 + andl %ecx,%esi + rorl $7,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm5,%xmm10 + addl %edi,%ebp + movl %eax,%edi + pxor %xmm9,%xmm6 + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movdqa %xmm6,%xmm9 + movdqa %xmm10,16(%rsp) + movl %ebx,%esi + xorl %ecx,%ebx + addl 36(%rsp),%edx + andl %ecx,%esi + pslld $2,%xmm6 + andl %ebx,%edi + rorl $7,%eax + psrld $30,%xmm9 + addl %esi,%edx + movl %ebp,%esi + roll $5,%ebp + aesenc %xmm15,%xmm11 + movups 48(%r15),%xmm14 + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + por %xmm9,%xmm6 + movl %eax,%edi + xorl %ebx,%eax + movdqa %xmm6,%xmm10 + addl 40(%rsp),%ecx + andl %ebx,%edi + andl %eax,%esi + rorl $7,%ebp + addl %edi,%ecx + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%esi + xorl %eax,%ebp + addl 44(%rsp),%ebx + andl %eax,%esi + andl %ebp,%edi + aesenc %xmm14,%xmm11 + movups 64(%r15),%xmm15 + rorl $7,%edx + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%edi + pxor %xmm3,%xmm7 +.byte 102,68,15,58,15,213,8 + xorl %ebp,%edx + addl 48(%rsp),%eax + andl %ebp,%edi + pxor %xmm0,%xmm7 + andl %edx,%esi + rorl $7,%ecx + movdqa 48(%r11),%xmm9 + paddd %xmm6,%xmm8 + addl %edi,%eax + movl %ebx,%edi + pxor %xmm10,%xmm7 + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movdqa %xmm7,%xmm10 + movdqa %xmm8,32(%rsp) + movl %ecx,%esi + aesenc %xmm15,%xmm11 + movups 80(%r15),%xmm14 + xorl %edx,%ecx + addl 52(%rsp),%ebp + andl %edx,%esi + pslld $2,%xmm7 + andl %ecx,%edi + rorl $7,%ebx + psrld $30,%xmm10 + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + por %xmm10,%xmm7 + movl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm7,%xmm8 + addl 56(%rsp),%edx + andl %ecx,%edi + andl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + movl %ebp,%edi + roll $5,%ebp + aesenc %xmm14,%xmm11 + movups 96(%r15),%xmm15 + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%esi + xorl %ebx,%eax + addl 60(%rsp),%ecx + andl %ebx,%esi + andl %eax,%edi + rorl $7,%ebp + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%edi + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,198,8 + xorl %eax,%ebp + addl 0(%rsp),%ebx + andl %eax,%edi + pxor %xmm1,%xmm0 + andl %ebp,%esi + aesenc %xmm15,%xmm11 + movups 112(%r15),%xmm14 + rorl $7,%edx + movdqa %xmm9,%xmm10 + paddd %xmm7,%xmm9 + addl %edi,%ebx + movl %ecx,%edi + pxor %xmm8,%xmm0 + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movdqa %xmm0,%xmm8 + movdqa %xmm9,48(%rsp) + movl %edx,%esi + xorl %ebp,%edx + addl 4(%rsp),%eax + andl %ebp,%esi + pslld $2,%xmm0 + andl %edx,%edi + rorl $7,%ecx + psrld $30,%xmm8 + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + por %xmm8,%xmm0 + movl %ecx,%edi + aesenc %xmm14,%xmm11 + movups 128(%r15),%xmm15 + xorl %edx,%ecx + movdqa %xmm0,%xmm9 + addl 8(%rsp),%ebp + andl %edx,%edi + andl %ecx,%esi + rorl $7,%ebx + addl %edi,%ebp + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%esi + xorl %ecx,%ebx + addl 12(%rsp),%edx + andl %ecx,%esi + andl %ebx,%edi + rorl $7,%eax + addl %esi,%edx + movl %ebp,%esi + roll $5,%ebp + aesenc %xmm15,%xmm11 + movups 144(%r15),%xmm14 + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%edi + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,207,8 + xorl %ebx,%eax + addl 16(%rsp),%ecx + andl %ebx,%edi + pxor %xmm2,%xmm1 + andl %eax,%esi + rorl $7,%ebp + movdqa %xmm10,%xmm8 + paddd %xmm0,%xmm10 + addl %edi,%ecx + movl %edx,%edi + pxor %xmm9,%xmm1 + roll $5,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movdqa %xmm1,%xmm9 + movdqa %xmm10,0(%rsp) + movl %ebp,%esi + xorl %eax,%ebp + addl 20(%rsp),%ebx + andl %eax,%esi + pslld $2,%xmm1 + andl %ebp,%edi + aesenc %xmm14,%xmm11 + movups 160(%r15),%xmm15 + rorl $7,%edx + psrld $30,%xmm9 + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + por %xmm9,%xmm1 + movl %edx,%edi + xorl %ebp,%edx + movdqa %xmm1,%xmm10 + addl 24(%rsp),%eax + andl %ebp,%edi + andl %edx,%esi + rorl $7,%ecx + addl %edi,%eax + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movl %ecx,%esi + cmpl $11,%r8d + jb .Laesenclast3 + movups 176(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 192(%r15),%xmm15 + aesenc %xmm14,%xmm11 + je .Laesenclast3 + movups 208(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 224(%r15),%xmm15 + aesenc %xmm14,%xmm11 +.Laesenclast3: + aesenclast %xmm15,%xmm11 + movups 16(%r15),%xmm14 + xorl %edx,%ecx + addl 28(%rsp),%ebp + andl %edx,%esi + andl %ecx,%edi + rorl $7,%ebx + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%edi + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,208,8 + xorl %ecx,%ebx + addl 32(%rsp),%edx + andl %ecx,%edi + pxor %xmm3,%xmm2 + andl %ebx,%esi + rorl $7,%eax + movdqa %xmm8,%xmm9 + paddd %xmm1,%xmm8 + addl %edi,%edx + movl %ebp,%edi + pxor %xmm10,%xmm2 + roll $5,%ebp + movups 48(%r12),%xmm12 + xorps %xmm13,%xmm12 + movups %xmm11,32(%r13,%r12,1) + xorps %xmm12,%xmm11 + aesenc %xmm14,%xmm11 + movups 32(%r15),%xmm15 + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movdqa %xmm2,%xmm10 + movdqa %xmm8,16(%rsp) + movl %eax,%esi + xorl %ebx,%eax + addl 36(%rsp),%ecx + andl %ebx,%esi + pslld $2,%xmm2 + andl %eax,%edi + rorl $7,%ebp + psrld $30,%xmm10 + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + por %xmm10,%xmm2 + movl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm2,%xmm8 + addl 40(%rsp),%ebx + andl %eax,%edi + andl %ebp,%esi + aesenc %xmm15,%xmm11 + movups 48(%r15),%xmm14 + rorl $7,%edx + addl %edi,%ebx + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%esi + xorl %ebp,%edx + addl 44(%rsp),%eax + andl %ebp,%esi + andl %edx,%edi + rorl $7,%ecx + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + addl 48(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 64(%r15),%xmm15 + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,193,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm4,%xmm3 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm10 + paddd %xmm2,%xmm9 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm8,%xmm3 + addl 52(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm3,%xmm8 + movdqa %xmm9,32(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm3 + addl 56(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm8 + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + aesenc %xmm15,%xmm11 + movups 80(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm8,%xmm3 + addl 60(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 0(%rsp),%eax + paddd %xmm3,%xmm10 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + movdqa %xmm10,48(%rsp) + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 4(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 96(%r15),%xmm15 + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 8(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 12(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + aesenc %xmm15,%xmm11 + movups 112(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + cmpq %r14,%r10 + je .Ldone_ssse3 + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r10),%xmm0 + movdqu 16(%r10),%xmm1 + movdqu 32(%r10),%xmm2 + movdqu 48(%r10),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r10 + addl 16(%rsp),%ebx + xorl %eax,%esi +.byte 102,15,56,0,206 + movl %ecx,%edi + roll $5,%ecx + paddd %xmm9,%xmm0 + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + movdqa %xmm0,0(%rsp) + addl 20(%rsp),%eax + xorl %ebp,%edi + psubd %xmm9,%xmm0 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 128(%r15),%xmm15 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi +.byte 102,15,56,0,214 + movl %edx,%edi + roll $5,%edx + paddd %xmm9,%xmm1 + xorl %eax,%esi + aesenc %xmm15,%xmm11 + movups 144(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + movdqa %xmm1,16(%rsp) + addl 36(%rsp),%ebx + xorl %eax,%edi + psubd %xmm9,%xmm1 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 160(%r15),%xmm15 + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi +.byte 102,15,56,0,222 + movl %ebp,%edi + roll $5,%ebp + paddd %xmm9,%xmm2 + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + movdqa %xmm2,32(%rsp) + addl 52(%rsp),%ecx + xorl %ebx,%edi + psubd %xmm9,%xmm2 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + cmpl $11,%r8d + jb .Laesenclast4 + movups 176(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 192(%r15),%xmm15 + aesenc %xmm14,%xmm11 + je .Laesenclast4 + movups 208(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 224(%r15),%xmm15 + aesenc %xmm14,%xmm11 +.Laesenclast4: + aesenclast %xmm15,%xmm11 + movups 16(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + movups %xmm11,48(%r13,%r12,1) + leaq 64(%r12),%r12 + + addl 0(%r9),%eax + addl 4(%r9),%esi + addl 8(%r9),%ecx + addl 12(%r9),%edx + movl %eax,0(%r9) + addl 16(%r9),%ebp + movl %esi,4(%r9) + movl %esi,%ebx + movl %ecx,8(%r9) + movl %edx,12(%r9) + movl %ebp,16(%r9) + jmp .Loop_ssse3 + +.p2align 4 +.Ldone_ssse3: + addl 16(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 20(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 128(%r15),%xmm15 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + aesenc %xmm15,%xmm11 + movups 144(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + addl 36(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp + aesenc %xmm14,%xmm11 + movups 160(%r15),%xmm15 + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 52(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + cmpl $11,%r8d + jb .Laesenclast5 + movups 176(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 192(%r15),%xmm15 + aesenc %xmm14,%xmm11 + je .Laesenclast5 + movups 208(%r15),%xmm14 + aesenc %xmm15,%xmm11 + movups 224(%r15),%xmm15 + aesenc %xmm14,%xmm11 +.Laesenclast5: + aesenclast %xmm15,%xmm11 + movups 16(%r15),%xmm14 + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + movups %xmm11,48(%r13,%r12,1) + movq 88(%rsp),%r8 + + addl 0(%r9),%eax + addl 4(%r9),%esi + addl 8(%r9),%ecx + movl %eax,0(%r9) + addl 12(%r9),%edx + movl %esi,4(%r9) + addl 16(%r9),%ebp + movl %ecx,8(%r9) + movl %edx,12(%r9) + movl %ebp,16(%r9) + movups %xmm11,(%r8) + movaps 96+0(%rsp),%xmm6 + movaps 96+16(%rsp),%xmm7 + movaps 96+32(%rsp),%xmm8 + movaps 96+48(%rsp),%xmm9 + movaps 96+64(%rsp),%xmm10 + movaps 96+80(%rsp),%xmm11 + movaps 96+96(%rsp),%xmm12 + movaps 96+112(%rsp),%xmm13 + movaps 96+128(%rsp),%xmm14 + movaps 96+144(%rsp),%xmm15 + leaq 264(%rsp),%rsi + movq 0(%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_ssse3: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + retq +.LSEH_end_aesni_cbc_sha1_enc_ssse3: +.p2align 6 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + +.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 + +.def ssse3_handler; .scl 3; .type 32; .endef +.p2align 4 +ssse3_handler: + pushq %rsi + pushq %rdi + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushfq + subq $64,%rsp + + movq 120(%r8),%rax + movq 248(%r8),%rbx + + movq 8(%r9),%rsi + movq 56(%r9),%r11 + + movl 0(%r11),%r10d + leaq (%rsi,%r10,1),%r10 + cmpq %r10,%rbx + jb .Lcommon_seh_tail + + movq 152(%r8),%rax + + movl 4(%r11),%r10d + leaq (%rsi,%r10,1),%r10 + cmpq %r10,%rbx + jae .Lcommon_seh_tail + + leaq 96(%rax),%rsi + leaq 512(%r8),%rdi + movl $20,%ecx +.long 0xa548f3fc + leaq 264(%rax),%rax + + movq 0(%rax),%r15 + movq 8(%rax),%r14 + movq 16(%rax),%r13 + movq 24(%rax),%r12 + movq 32(%rax),%rbp + movq 40(%rax),%rbx + leaq 48(%rax),%rax + movq %rbx,144(%r8) + movq %rbp,160(%r8) + movq %r12,216(%r8) + movq %r13,224(%r8) + movq %r14,232(%r8) + movq %r15,240(%r8) + +.Lcommon_seh_tail: + movq 8(%rax),%rdi + movq 16(%rax),%rsi + movq %rax,152(%r8) + movq %rsi,168(%r8) + movq %rdi,176(%r8) + + movq 40(%r9),%rdi + movq %r8,%rsi + movl $154,%ecx +.long 0xa548f3fc + + movq %r9,%rsi + xorq %rcx,%rcx + movq 8(%rsi),%rdx + movq 0(%rsi),%r8 + movq 16(%rsi),%r9 + movq 40(%rsi),%r10 + leaq 56(%rsi),%r11 + leaq 24(%rsi),%r12 + movq %r10,32(%rsp) + movq %r11,40(%rsp) + movq %r12,48(%rsp) + movq %rcx,56(%rsp) + call *__imp_RtlVirtualUnwind(%rip) + + movl $1,%eax + addq $64,%rsp + popfq + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + popq %rdi + popq %rsi + retq + + +.section .pdata +.p2align 2 +.rva .LSEH_begin_aesni_cbc_sha1_enc_ssse3 +.rva .LSEH_end_aesni_cbc_sha1_enc_ssse3 +.rva .LSEH_info_aesni_cbc_sha1_enc_ssse3 +.section .xdata +.p2align 3 +.LSEH_info_aesni_cbc_sha1_enc_ssse3: +.byte 9,0,0,0 +.rva ssse3_handler +.rva .Lprologue_ssse3,.Lepilogue_ssse3 diff --git a/ext/libressl/crypto/aes/bsaes-elf-x86_64.S b/ext/libressl/crypto/aes/bsaes-elf-x86_64.S new file mode 100644 index 0000000..903e374 --- /dev/null +++ b/ext/libressl/crypto/aes/bsaes-elf-x86_64.S @@ -0,0 +1,2502 @@ +#include "x86_arch.h" +.text + + + + +.type _bsaes_encrypt8,@function +.align 64 +_bsaes_encrypt8: + leaq .LBS0(%rip),%r11 + + movdqa (%rax),%xmm8 + leaq 16(%rax),%rax + movdqa 80(%r11),%xmm7 + pxor %xmm8,%xmm15 + pxor %xmm8,%xmm0 +.byte 102,68,15,56,0,255 + pxor %xmm8,%xmm1 +.byte 102,15,56,0,199 + pxor %xmm8,%xmm2 +.byte 102,15,56,0,207 + pxor %xmm8,%xmm3 +.byte 102,15,56,0,215 + pxor %xmm8,%xmm4 +.byte 102,15,56,0,223 + pxor %xmm8,%xmm5 +.byte 102,15,56,0,231 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 +_bsaes_encrypt8_bitslice: + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm5,%xmm9 + psrlq $1,%xmm5 + movdqa %xmm3,%xmm10 + psrlq $1,%xmm3 + pxor %xmm6,%xmm5 + pxor %xmm4,%xmm3 + pand %xmm7,%xmm5 + pand %xmm7,%xmm3 + pxor %xmm5,%xmm6 + psllq $1,%xmm5 + pxor %xmm3,%xmm4 + psllq $1,%xmm3 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm3 + movdqa %xmm1,%xmm9 + psrlq $1,%xmm1 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm2,%xmm1 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm1 + pand %xmm7,%xmm15 + pxor %xmm1,%xmm2 + psllq $1,%xmm1 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm1 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm4,%xmm9 + psrlq $2,%xmm4 + movdqa %xmm3,%xmm10 + psrlq $2,%xmm3 + pxor %xmm6,%xmm4 + pxor %xmm5,%xmm3 + pand %xmm8,%xmm4 + pand %xmm8,%xmm3 + pxor %xmm4,%xmm6 + psllq $2,%xmm4 + pxor %xmm3,%xmm5 + psllq $2,%xmm3 + pxor %xmm9,%xmm4 + pxor %xmm10,%xmm3 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm2,%xmm0 + pxor %xmm1,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm2 + psllq $2,%xmm0 + pxor %xmm15,%xmm1 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm2,%xmm9 + psrlq $4,%xmm2 + movdqa %xmm1,%xmm10 + psrlq $4,%xmm1 + pxor %xmm6,%xmm2 + pxor %xmm5,%xmm1 + pand %xmm7,%xmm2 + pand %xmm7,%xmm1 + pxor %xmm2,%xmm6 + psllq $4,%xmm2 + pxor %xmm1,%xmm5 + psllq $4,%xmm1 + pxor %xmm9,%xmm2 + pxor %xmm10,%xmm1 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm4 + psllq $4,%xmm0 + pxor %xmm15,%xmm3 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + decl %r10d + jmp .Lenc_sbox +.align 16 +.Lenc_loop: + pxor 0(%rax),%xmm15 + pxor 16(%rax),%xmm0 +.byte 102,68,15,56,0,255 + pxor 32(%rax),%xmm1 +.byte 102,15,56,0,199 + pxor 48(%rax),%xmm2 +.byte 102,15,56,0,207 + pxor 64(%rax),%xmm3 +.byte 102,15,56,0,215 + pxor 80(%rax),%xmm4 +.byte 102,15,56,0,223 + pxor 96(%rax),%xmm5 +.byte 102,15,56,0,231 + pxor 112(%rax),%xmm6 +.byte 102,15,56,0,239 + leaq 128(%rax),%rax +.byte 102,15,56,0,247 +.Lenc_sbox: + pxor %xmm5,%xmm4 + pxor %xmm0,%xmm1 + pxor %xmm15,%xmm2 + pxor %xmm1,%xmm5 + pxor %xmm15,%xmm4 + + pxor %xmm2,%xmm5 + pxor %xmm6,%xmm2 + pxor %xmm4,%xmm6 + pxor %xmm3,%xmm2 + pxor %xmm4,%xmm3 + pxor %xmm0,%xmm2 + + pxor %xmm6,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm6,%xmm10 + movdqa %xmm0,%xmm9 + movdqa %xmm4,%xmm8 + movdqa %xmm1,%xmm12 + movdqa %xmm5,%xmm11 + + pxor %xmm3,%xmm10 + pxor %xmm1,%xmm9 + pxor %xmm2,%xmm8 + movdqa %xmm10,%xmm13 + pxor %xmm3,%xmm12 + movdqa %xmm9,%xmm7 + pxor %xmm15,%xmm11 + movdqa %xmm10,%xmm14 + + por %xmm8,%xmm9 + por %xmm11,%xmm10 + pxor %xmm7,%xmm14 + pand %xmm11,%xmm13 + pxor %xmm8,%xmm11 + pand %xmm8,%xmm7 + pand %xmm11,%xmm14 + movdqa %xmm2,%xmm11 + pxor %xmm15,%xmm11 + pand %xmm11,%xmm12 + pxor %xmm12,%xmm10 + pxor %xmm12,%xmm9 + movdqa %xmm6,%xmm12 + movdqa %xmm4,%xmm11 + pxor %xmm0,%xmm12 + pxor %xmm5,%xmm11 + movdqa %xmm12,%xmm8 + pand %xmm11,%xmm12 + por %xmm11,%xmm8 + pxor %xmm12,%xmm7 + pxor %xmm14,%xmm10 + pxor %xmm13,%xmm9 + pxor %xmm14,%xmm8 + movdqa %xmm1,%xmm11 + pxor %xmm13,%xmm7 + movdqa %xmm3,%xmm12 + pxor %xmm13,%xmm8 + movdqa %xmm0,%xmm13 + pand %xmm2,%xmm11 + movdqa %xmm6,%xmm14 + pand %xmm15,%xmm12 + pand %xmm4,%xmm13 + por %xmm5,%xmm14 + pxor %xmm11,%xmm10 + pxor %xmm12,%xmm9 + pxor %xmm13,%xmm8 + pxor %xmm14,%xmm7 + + + + + + movdqa %xmm10,%xmm11 + pand %xmm8,%xmm10 + pxor %xmm9,%xmm11 + + movdqa %xmm7,%xmm13 + movdqa %xmm11,%xmm14 + pxor %xmm10,%xmm13 + pand %xmm13,%xmm14 + + movdqa %xmm8,%xmm12 + pxor %xmm9,%xmm14 + pxor %xmm7,%xmm12 + + pxor %xmm9,%xmm10 + + pand %xmm10,%xmm12 + + movdqa %xmm13,%xmm9 + pxor %xmm7,%xmm12 + + pxor %xmm12,%xmm9 + pxor %xmm12,%xmm8 + + pand %xmm7,%xmm9 + + pxor %xmm9,%xmm13 + pxor %xmm9,%xmm8 + + pand %xmm14,%xmm13 + + pxor %xmm11,%xmm13 + movdqa %xmm5,%xmm11 + movdqa %xmm4,%xmm7 + movdqa %xmm14,%xmm9 + pxor %xmm13,%xmm9 + pand %xmm5,%xmm9 + pxor %xmm4,%xmm5 + pand %xmm14,%xmm4 + pand %xmm13,%xmm5 + pxor %xmm4,%xmm5 + pxor %xmm9,%xmm4 + pxor %xmm15,%xmm11 + pxor %xmm2,%xmm7 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm15,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm2,%xmm15 + pand %xmm14,%xmm7 + pand %xmm12,%xmm2 + pand %xmm13,%xmm11 + pand %xmm8,%xmm15 + pxor %xmm11,%xmm7 + pxor %xmm2,%xmm15 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm2 + pxor %xmm11,%xmm5 + pxor %xmm11,%xmm15 + pxor %xmm7,%xmm4 + pxor %xmm7,%xmm2 + + movdqa %xmm6,%xmm11 + movdqa %xmm0,%xmm7 + pxor %xmm3,%xmm11 + pxor %xmm1,%xmm7 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm3,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm1,%xmm3 + pand %xmm14,%xmm7 + pand %xmm12,%xmm1 + pand %xmm13,%xmm11 + pand %xmm8,%xmm3 + pxor %xmm11,%xmm7 + pxor %xmm1,%xmm3 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm1 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + pxor %xmm13,%xmm10 + pand %xmm6,%xmm10 + pxor %xmm0,%xmm6 + pand %xmm14,%xmm0 + pand %xmm13,%xmm6 + pxor %xmm0,%xmm6 + pxor %xmm10,%xmm0 + pxor %xmm11,%xmm6 + pxor %xmm11,%xmm3 + pxor %xmm7,%xmm0 + pxor %xmm7,%xmm1 + pxor %xmm15,%xmm6 + pxor %xmm5,%xmm0 + pxor %xmm6,%xmm3 + pxor %xmm15,%xmm5 + pxor %xmm0,%xmm15 + + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + pxor %xmm2,%xmm1 + pxor %xmm4,%xmm2 + pxor %xmm4,%xmm3 + + pxor %xmm2,%xmm5 + decl %r10d + jl .Lenc_done + pshufd $147,%xmm15,%xmm7 + pshufd $147,%xmm0,%xmm8 + pxor %xmm7,%xmm15 + pshufd $147,%xmm3,%xmm9 + pxor %xmm8,%xmm0 + pshufd $147,%xmm5,%xmm10 + pxor %xmm9,%xmm3 + pshufd $147,%xmm2,%xmm11 + pxor %xmm10,%xmm5 + pshufd $147,%xmm6,%xmm12 + pxor %xmm11,%xmm2 + pshufd $147,%xmm1,%xmm13 + pxor %xmm12,%xmm6 + pshufd $147,%xmm4,%xmm14 + pxor %xmm13,%xmm1 + pxor %xmm14,%xmm4 + + pxor %xmm15,%xmm8 + pxor %xmm4,%xmm7 + pxor %xmm4,%xmm8 + pshufd $78,%xmm15,%xmm15 + pxor %xmm0,%xmm9 + pshufd $78,%xmm0,%xmm0 + pxor %xmm2,%xmm12 + pxor %xmm7,%xmm15 + pxor %xmm6,%xmm13 + pxor %xmm8,%xmm0 + pxor %xmm5,%xmm11 + pshufd $78,%xmm2,%xmm7 + pxor %xmm1,%xmm14 + pshufd $78,%xmm6,%xmm8 + pxor %xmm3,%xmm10 + pshufd $78,%xmm5,%xmm2 + pxor %xmm4,%xmm10 + pshufd $78,%xmm4,%xmm6 + pxor %xmm4,%xmm11 + pshufd $78,%xmm1,%xmm5 + pxor %xmm11,%xmm7 + pshufd $78,%xmm3,%xmm1 + pxor %xmm12,%xmm8 + pxor %xmm10,%xmm2 + pxor %xmm14,%xmm6 + pxor %xmm13,%xmm5 + movdqa %xmm7,%xmm3 + pxor %xmm9,%xmm1 + movdqa %xmm8,%xmm4 + movdqa 48(%r11),%xmm7 + jnz .Lenc_loop + movdqa 64(%r11),%xmm7 + jmp .Lenc_loop +.align 16 +.Lenc_done: + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm1,%xmm9 + psrlq $1,%xmm1 + movdqa %xmm2,%xmm10 + psrlq $1,%xmm2 + pxor %xmm4,%xmm1 + pxor %xmm6,%xmm2 + pand %xmm7,%xmm1 + pand %xmm7,%xmm2 + pxor %xmm1,%xmm4 + psllq $1,%xmm1 + pxor %xmm2,%xmm6 + psllq $1,%xmm2 + pxor %xmm9,%xmm1 + pxor %xmm10,%xmm2 + movdqa %xmm3,%xmm9 + psrlq $1,%xmm3 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm5,%xmm3 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm3 + pand %xmm7,%xmm15 + pxor %xmm3,%xmm5 + psllq $1,%xmm3 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm3 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm6,%xmm9 + psrlq $2,%xmm6 + movdqa %xmm2,%xmm10 + psrlq $2,%xmm2 + pxor %xmm4,%xmm6 + pxor %xmm1,%xmm2 + pand %xmm8,%xmm6 + pand %xmm8,%xmm2 + pxor %xmm6,%xmm4 + psllq $2,%xmm6 + pxor %xmm2,%xmm1 + psllq $2,%xmm2 + pxor %xmm9,%xmm6 + pxor %xmm10,%xmm2 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm5,%xmm0 + pxor %xmm3,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm5 + psllq $2,%xmm0 + pxor %xmm15,%xmm3 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm5,%xmm9 + psrlq $4,%xmm5 + movdqa %xmm3,%xmm10 + psrlq $4,%xmm3 + pxor %xmm4,%xmm5 + pxor %xmm1,%xmm3 + pand %xmm7,%xmm5 + pand %xmm7,%xmm3 + pxor %xmm5,%xmm4 + psllq $4,%xmm5 + pxor %xmm3,%xmm1 + psllq $4,%xmm3 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm3 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm6,%xmm0 + pxor %xmm2,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm6 + psllq $4,%xmm0 + pxor %xmm15,%xmm2 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa (%rax),%xmm7 + pxor %xmm7,%xmm3 + pxor %xmm7,%xmm5 + pxor %xmm7,%xmm2 + pxor %xmm7,%xmm6 + pxor %xmm7,%xmm1 + pxor %xmm7,%xmm4 + pxor %xmm7,%xmm15 + pxor %xmm7,%xmm0 + retq +.size _bsaes_encrypt8,.-_bsaes_encrypt8 + +.type _bsaes_decrypt8,@function +.align 64 +_bsaes_decrypt8: + leaq .LBS0(%rip),%r11 + + movdqa (%rax),%xmm8 + leaq 16(%rax),%rax + movdqa -48(%r11),%xmm7 + pxor %xmm8,%xmm15 + pxor %xmm8,%xmm0 +.byte 102,68,15,56,0,255 + pxor %xmm8,%xmm1 +.byte 102,15,56,0,199 + pxor %xmm8,%xmm2 +.byte 102,15,56,0,207 + pxor %xmm8,%xmm3 +.byte 102,15,56,0,215 + pxor %xmm8,%xmm4 +.byte 102,15,56,0,223 + pxor %xmm8,%xmm5 +.byte 102,15,56,0,231 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm5,%xmm9 + psrlq $1,%xmm5 + movdqa %xmm3,%xmm10 + psrlq $1,%xmm3 + pxor %xmm6,%xmm5 + pxor %xmm4,%xmm3 + pand %xmm7,%xmm5 + pand %xmm7,%xmm3 + pxor %xmm5,%xmm6 + psllq $1,%xmm5 + pxor %xmm3,%xmm4 + psllq $1,%xmm3 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm3 + movdqa %xmm1,%xmm9 + psrlq $1,%xmm1 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm2,%xmm1 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm1 + pand %xmm7,%xmm15 + pxor %xmm1,%xmm2 + psllq $1,%xmm1 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm1 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm4,%xmm9 + psrlq $2,%xmm4 + movdqa %xmm3,%xmm10 + psrlq $2,%xmm3 + pxor %xmm6,%xmm4 + pxor %xmm5,%xmm3 + pand %xmm8,%xmm4 + pand %xmm8,%xmm3 + pxor %xmm4,%xmm6 + psllq $2,%xmm4 + pxor %xmm3,%xmm5 + psllq $2,%xmm3 + pxor %xmm9,%xmm4 + pxor %xmm10,%xmm3 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm2,%xmm0 + pxor %xmm1,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm2 + psllq $2,%xmm0 + pxor %xmm15,%xmm1 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm2,%xmm9 + psrlq $4,%xmm2 + movdqa %xmm1,%xmm10 + psrlq $4,%xmm1 + pxor %xmm6,%xmm2 + pxor %xmm5,%xmm1 + pand %xmm7,%xmm2 + pand %xmm7,%xmm1 + pxor %xmm2,%xmm6 + psllq $4,%xmm2 + pxor %xmm1,%xmm5 + psllq $4,%xmm1 + pxor %xmm9,%xmm2 + pxor %xmm10,%xmm1 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm4 + psllq $4,%xmm0 + pxor %xmm15,%xmm3 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + decl %r10d + jmp .Ldec_sbox +.align 16 +.Ldec_loop: + pxor 0(%rax),%xmm15 + pxor 16(%rax),%xmm0 +.byte 102,68,15,56,0,255 + pxor 32(%rax),%xmm1 +.byte 102,15,56,0,199 + pxor 48(%rax),%xmm2 +.byte 102,15,56,0,207 + pxor 64(%rax),%xmm3 +.byte 102,15,56,0,215 + pxor 80(%rax),%xmm4 +.byte 102,15,56,0,223 + pxor 96(%rax),%xmm5 +.byte 102,15,56,0,231 + pxor 112(%rax),%xmm6 +.byte 102,15,56,0,239 + leaq 128(%rax),%rax +.byte 102,15,56,0,247 +.Ldec_sbox: + pxor %xmm3,%xmm2 + + pxor %xmm6,%xmm3 + pxor %xmm6,%xmm1 + pxor %xmm3,%xmm5 + pxor %xmm5,%xmm6 + pxor %xmm6,%xmm0 + + pxor %xmm0,%xmm15 + pxor %xmm4,%xmm1 + pxor %xmm15,%xmm2 + pxor %xmm15,%xmm4 + pxor %xmm2,%xmm0 + movdqa %xmm2,%xmm10 + movdqa %xmm6,%xmm9 + movdqa %xmm0,%xmm8 + movdqa %xmm3,%xmm12 + movdqa %xmm4,%xmm11 + + pxor %xmm15,%xmm10 + pxor %xmm3,%xmm9 + pxor %xmm5,%xmm8 + movdqa %xmm10,%xmm13 + pxor %xmm15,%xmm12 + movdqa %xmm9,%xmm7 + pxor %xmm1,%xmm11 + movdqa %xmm10,%xmm14 + + por %xmm8,%xmm9 + por %xmm11,%xmm10 + pxor %xmm7,%xmm14 + pand %xmm11,%xmm13 + pxor %xmm8,%xmm11 + pand %xmm8,%xmm7 + pand %xmm11,%xmm14 + movdqa %xmm5,%xmm11 + pxor %xmm1,%xmm11 + pand %xmm11,%xmm12 + pxor %xmm12,%xmm10 + pxor %xmm12,%xmm9 + movdqa %xmm2,%xmm12 + movdqa %xmm0,%xmm11 + pxor %xmm6,%xmm12 + pxor %xmm4,%xmm11 + movdqa %xmm12,%xmm8 + pand %xmm11,%xmm12 + por %xmm11,%xmm8 + pxor %xmm12,%xmm7 + pxor %xmm14,%xmm10 + pxor %xmm13,%xmm9 + pxor %xmm14,%xmm8 + movdqa %xmm3,%xmm11 + pxor %xmm13,%xmm7 + movdqa %xmm15,%xmm12 + pxor %xmm13,%xmm8 + movdqa %xmm6,%xmm13 + pand %xmm5,%xmm11 + movdqa %xmm2,%xmm14 + pand %xmm1,%xmm12 + pand %xmm0,%xmm13 + por %xmm4,%xmm14 + pxor %xmm11,%xmm10 + pxor %xmm12,%xmm9 + pxor %xmm13,%xmm8 + pxor %xmm14,%xmm7 + + + + + + movdqa %xmm10,%xmm11 + pand %xmm8,%xmm10 + pxor %xmm9,%xmm11 + + movdqa %xmm7,%xmm13 + movdqa %xmm11,%xmm14 + pxor %xmm10,%xmm13 + pand %xmm13,%xmm14 + + movdqa %xmm8,%xmm12 + pxor %xmm9,%xmm14 + pxor %xmm7,%xmm12 + + pxor %xmm9,%xmm10 + + pand %xmm10,%xmm12 + + movdqa %xmm13,%xmm9 + pxor %xmm7,%xmm12 + + pxor %xmm12,%xmm9 + pxor %xmm12,%xmm8 + + pand %xmm7,%xmm9 + + pxor %xmm9,%xmm13 + pxor %xmm9,%xmm8 + + pand %xmm14,%xmm13 + + pxor %xmm11,%xmm13 + movdqa %xmm4,%xmm11 + movdqa %xmm0,%xmm7 + movdqa %xmm14,%xmm9 + pxor %xmm13,%xmm9 + pand %xmm4,%xmm9 + pxor %xmm0,%xmm4 + pand %xmm14,%xmm0 + pand %xmm13,%xmm4 + pxor %xmm0,%xmm4 + pxor %xmm9,%xmm0 + pxor %xmm1,%xmm11 + pxor %xmm5,%xmm7 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm1,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm5,%xmm1 + pand %xmm14,%xmm7 + pand %xmm12,%xmm5 + pand %xmm13,%xmm11 + pand %xmm8,%xmm1 + pxor %xmm11,%xmm7 + pxor %xmm5,%xmm1 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm5 + pxor %xmm11,%xmm4 + pxor %xmm11,%xmm1 + pxor %xmm7,%xmm0 + pxor %xmm7,%xmm5 + + movdqa %xmm2,%xmm11 + movdqa %xmm6,%xmm7 + pxor %xmm15,%xmm11 + pxor %xmm3,%xmm7 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm15,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm3,%xmm15 + pand %xmm14,%xmm7 + pand %xmm12,%xmm3 + pand %xmm13,%xmm11 + pand %xmm8,%xmm15 + pxor %xmm11,%xmm7 + pxor %xmm3,%xmm15 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm3 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + pxor %xmm13,%xmm10 + pand %xmm2,%xmm10 + pxor %xmm6,%xmm2 + pand %xmm14,%xmm6 + pand %xmm13,%xmm2 + pxor %xmm6,%xmm2 + pxor %xmm10,%xmm6 + pxor %xmm11,%xmm2 + pxor %xmm11,%xmm15 + pxor %xmm7,%xmm6 + pxor %xmm7,%xmm3 + pxor %xmm6,%xmm0 + pxor %xmm4,%xmm5 + + pxor %xmm0,%xmm3 + pxor %xmm6,%xmm1 + pxor %xmm6,%xmm4 + pxor %xmm1,%xmm3 + pxor %xmm15,%xmm6 + pxor %xmm4,%xmm3 + pxor %xmm5,%xmm2 + pxor %xmm0,%xmm5 + pxor %xmm3,%xmm2 + + pxor %xmm15,%xmm3 + pxor %xmm2,%xmm6 + decl %r10d + jl .Ldec_done + + pshufd $78,%xmm15,%xmm7 + pshufd $78,%xmm2,%xmm13 + pxor %xmm15,%xmm7 + pshufd $78,%xmm4,%xmm14 + pxor %xmm2,%xmm13 + pshufd $78,%xmm0,%xmm8 + pxor %xmm4,%xmm14 + pshufd $78,%xmm5,%xmm9 + pxor %xmm0,%xmm8 + pshufd $78,%xmm3,%xmm10 + pxor %xmm5,%xmm9 + pxor %xmm13,%xmm15 + pxor %xmm13,%xmm0 + pshufd $78,%xmm1,%xmm11 + pxor %xmm3,%xmm10 + pxor %xmm7,%xmm5 + pxor %xmm8,%xmm3 + pshufd $78,%xmm6,%xmm12 + pxor %xmm1,%xmm11 + pxor %xmm14,%xmm0 + pxor %xmm9,%xmm1 + pxor %xmm6,%xmm12 + + pxor %xmm14,%xmm5 + pxor %xmm13,%xmm3 + pxor %xmm13,%xmm1 + pxor %xmm10,%xmm6 + pxor %xmm11,%xmm2 + pxor %xmm14,%xmm1 + pxor %xmm14,%xmm6 + pxor %xmm12,%xmm4 + pshufd $147,%xmm15,%xmm7 + pshufd $147,%xmm0,%xmm8 + pxor %xmm7,%xmm15 + pshufd $147,%xmm5,%xmm9 + pxor %xmm8,%xmm0 + pshufd $147,%xmm3,%xmm10 + pxor %xmm9,%xmm5 + pshufd $147,%xmm1,%xmm11 + pxor %xmm10,%xmm3 + pshufd $147,%xmm6,%xmm12 + pxor %xmm11,%xmm1 + pshufd $147,%xmm2,%xmm13 + pxor %xmm12,%xmm6 + pshufd $147,%xmm4,%xmm14 + pxor %xmm13,%xmm2 + pxor %xmm14,%xmm4 + + pxor %xmm15,%xmm8 + pxor %xmm4,%xmm7 + pxor %xmm4,%xmm8 + pshufd $78,%xmm15,%xmm15 + pxor %xmm0,%xmm9 + pshufd $78,%xmm0,%xmm0 + pxor %xmm1,%xmm12 + pxor %xmm7,%xmm15 + pxor %xmm6,%xmm13 + pxor %xmm8,%xmm0 + pxor %xmm3,%xmm11 + pshufd $78,%xmm1,%xmm7 + pxor %xmm2,%xmm14 + pshufd $78,%xmm6,%xmm8 + pxor %xmm5,%xmm10 + pshufd $78,%xmm3,%xmm1 + pxor %xmm4,%xmm10 + pshufd $78,%xmm4,%xmm6 + pxor %xmm4,%xmm11 + pshufd $78,%xmm2,%xmm3 + pxor %xmm11,%xmm7 + pshufd $78,%xmm5,%xmm2 + pxor %xmm12,%xmm8 + pxor %xmm1,%xmm10 + pxor %xmm14,%xmm6 + pxor %xmm3,%xmm13 + movdqa %xmm7,%xmm3 + pxor %xmm9,%xmm2 + movdqa %xmm13,%xmm5 + movdqa %xmm8,%xmm4 + movdqa %xmm2,%xmm1 + movdqa %xmm10,%xmm2 + movdqa -16(%r11),%xmm7 + jnz .Ldec_loop + movdqa -32(%r11),%xmm7 + jmp .Ldec_loop +.align 16 +.Ldec_done: + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm2,%xmm9 + psrlq $1,%xmm2 + movdqa %xmm1,%xmm10 + psrlq $1,%xmm1 + pxor %xmm4,%xmm2 + pxor %xmm6,%xmm1 + pand %xmm7,%xmm2 + pand %xmm7,%xmm1 + pxor %xmm2,%xmm4 + psllq $1,%xmm2 + pxor %xmm1,%xmm6 + psllq $1,%xmm1 + pxor %xmm9,%xmm2 + pxor %xmm10,%xmm1 + movdqa %xmm5,%xmm9 + psrlq $1,%xmm5 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm3,%xmm5 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm5 + pand %xmm7,%xmm15 + pxor %xmm5,%xmm3 + psllq $1,%xmm5 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm6,%xmm9 + psrlq $2,%xmm6 + movdqa %xmm1,%xmm10 + psrlq $2,%xmm1 + pxor %xmm4,%xmm6 + pxor %xmm2,%xmm1 + pand %xmm8,%xmm6 + pand %xmm8,%xmm1 + pxor %xmm6,%xmm4 + psllq $2,%xmm6 + pxor %xmm1,%xmm2 + psllq $2,%xmm1 + pxor %xmm9,%xmm6 + pxor %xmm10,%xmm1 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm3 + psllq $2,%xmm0 + pxor %xmm15,%xmm5 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm3,%xmm9 + psrlq $4,%xmm3 + movdqa %xmm5,%xmm10 + psrlq $4,%xmm5 + pxor %xmm4,%xmm3 + pxor %xmm2,%xmm5 + pand %xmm7,%xmm3 + pand %xmm7,%xmm5 + pxor %xmm3,%xmm4 + psllq $4,%xmm3 + pxor %xmm5,%xmm2 + psllq $4,%xmm5 + pxor %xmm9,%xmm3 + pxor %xmm10,%xmm5 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm6,%xmm0 + pxor %xmm1,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm6 + psllq $4,%xmm0 + pxor %xmm15,%xmm1 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa (%rax),%xmm7 + pxor %xmm7,%xmm5 + pxor %xmm7,%xmm3 + pxor %xmm7,%xmm1 + pxor %xmm7,%xmm6 + pxor %xmm7,%xmm2 + pxor %xmm7,%xmm4 + pxor %xmm7,%xmm15 + pxor %xmm7,%xmm0 + retq +.size _bsaes_decrypt8,.-_bsaes_decrypt8 +.type _bsaes_key_convert,@function +.align 16 +_bsaes_key_convert: + leaq .Lmasks(%rip),%r11 + movdqu (%rcx),%xmm7 + leaq 16(%rcx),%rcx + movdqa 0(%r11),%xmm0 + movdqa 16(%r11),%xmm1 + movdqa 32(%r11),%xmm2 + movdqa 48(%r11),%xmm3 + movdqa 64(%r11),%xmm4 + pcmpeqd %xmm5,%xmm5 + + movdqu (%rcx),%xmm6 + movdqa %xmm7,(%rax) + leaq 16(%rax),%rax + decl %r10d + jmp .Lkey_loop +.align 16 +.Lkey_loop: +.byte 102,15,56,0,244 + + movdqa %xmm0,%xmm8 + movdqa %xmm1,%xmm9 + + pand %xmm6,%xmm8 + pand %xmm6,%xmm9 + movdqa %xmm2,%xmm10 + pcmpeqb %xmm0,%xmm8 + psllq $4,%xmm0 + movdqa %xmm3,%xmm11 + pcmpeqb %xmm1,%xmm9 + psllq $4,%xmm1 + + pand %xmm6,%xmm10 + pand %xmm6,%xmm11 + movdqa %xmm0,%xmm12 + pcmpeqb %xmm2,%xmm10 + psllq $4,%xmm2 + movdqa %xmm1,%xmm13 + pcmpeqb %xmm3,%xmm11 + psllq $4,%xmm3 + + movdqa %xmm2,%xmm14 + movdqa %xmm3,%xmm15 + pxor %xmm5,%xmm8 + pxor %xmm5,%xmm9 + + pand %xmm6,%xmm12 + pand %xmm6,%xmm13 + movdqa %xmm8,0(%rax) + pcmpeqb %xmm0,%xmm12 + psrlq $4,%xmm0 + movdqa %xmm9,16(%rax) + pcmpeqb %xmm1,%xmm13 + psrlq $4,%xmm1 + leaq 16(%rcx),%rcx + + pand %xmm6,%xmm14 + pand %xmm6,%xmm15 + movdqa %xmm10,32(%rax) + pcmpeqb %xmm2,%xmm14 + psrlq $4,%xmm2 + movdqa %xmm11,48(%rax) + pcmpeqb %xmm3,%xmm15 + psrlq $4,%xmm3 + movdqu (%rcx),%xmm6 + + pxor %xmm5,%xmm13 + pxor %xmm5,%xmm14 + movdqa %xmm12,64(%rax) + movdqa %xmm13,80(%rax) + movdqa %xmm14,96(%rax) + movdqa %xmm15,112(%rax) + leaq 128(%rax),%rax + decl %r10d + jnz .Lkey_loop + + movdqa 80(%r11),%xmm7 + + retq +.size _bsaes_key_convert,.-_bsaes_key_convert + +.globl bsaes_cbc_encrypt +.type bsaes_cbc_encrypt,@function +.align 16 +bsaes_cbc_encrypt: + cmpl $0,%r9d + jne asm_AES_cbc_encrypt + cmpq $128,%rdx + jb asm_AES_cbc_encrypt + + movq %rsp,%rax +.Lcbc_dec_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movl 240(%rcx),%eax + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + movq %r8,%rbx + shrq $4,%r14 + + movl %eax,%edx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %edx,%r10d + call _bsaes_key_convert + pxor (%rsp),%xmm7 + movdqa %xmm6,(%rax) + movdqa %xmm7,(%rsp) + + movdqu (%rbx),%xmm14 + subq $8,%r14 +.Lcbc_dec_loop: + movdqu 0(%r12),%xmm15 + movdqu 16(%r12),%xmm0 + movdqu 32(%r12),%xmm1 + movdqu 48(%r12),%xmm2 + movdqu 64(%r12),%xmm3 + movdqu 80(%r12),%xmm4 + movq %rsp,%rax + movdqu 96(%r12),%xmm5 + movl %edx,%r10d + movdqu 112(%r12),%xmm6 + movdqa %xmm14,32(%rbp) + + call _bsaes_decrypt8 + + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm1 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm6 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm2 + movdqu 112(%r12),%xmm14 + pxor %xmm13,%xmm4 + movdqu %xmm15,0(%r13) + leaq 128(%r12),%r12 + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + movdqu %xmm2,96(%r13) + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + subq $8,%r14 + jnc .Lcbc_dec_loop + + addq $8,%r14 + jz .Lcbc_dec_done + + movdqu 0(%r12),%xmm15 + movq %rsp,%rax + movl %edx,%r10d + cmpq $2,%r14 + jb .Lcbc_dec_one + movdqu 16(%r12),%xmm0 + je .Lcbc_dec_two + movdqu 32(%r12),%xmm1 + cmpq $4,%r14 + jb .Lcbc_dec_three + movdqu 48(%r12),%xmm2 + je .Lcbc_dec_four + movdqu 64(%r12),%xmm3 + cmpq $6,%r14 + jb .Lcbc_dec_five + movdqu 80(%r12),%xmm4 + je .Lcbc_dec_six + movdqu 96(%r12),%xmm5 + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm1 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm6 + movdqu 96(%r12),%xmm14 + pxor %xmm12,%xmm2 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + movdqu %xmm2,96(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_six: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm1 + movdqu 80(%r12),%xmm14 + pxor %xmm11,%xmm6 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_five: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm14 + pxor %xmm10,%xmm1 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_four: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm14 + pxor %xmm9,%xmm3 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_three: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm14 + pxor %xmm8,%xmm5 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_two: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm14 + pxor %xmm7,%xmm0 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_one: + leaq (%r12),%rdi + leaq 32(%rbp),%rsi + leaq (%r15),%rdx + call asm_AES_decrypt + pxor 32(%rbp),%xmm14 + movdqu %xmm14,(%r13) + movdqa %xmm15,%xmm14 + +.Lcbc_dec_done: + movdqu %xmm14,(%rbx) + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +.Lcbc_dec_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja .Lcbc_dec_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +.Lcbc_dec_epilogue: + retq +.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt + +.globl bsaes_ctr32_encrypt_blocks +.type bsaes_ctr32_encrypt_blocks,@function +.align 16 +bsaes_ctr32_encrypt_blocks: + movq %rsp,%rax +.Lctr_enc_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movdqu (%r8),%xmm0 + movl 240(%rcx),%eax + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + movdqa %xmm0,32(%rbp) + cmpq $8,%rdx + jb .Lctr_enc_short + + movl %eax,%ebx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %ebx,%r10d + call _bsaes_key_convert + pxor %xmm6,%xmm7 + movdqa %xmm7,(%rax) + + movdqa (%rsp),%xmm8 + leaq .LADD1(%rip),%r11 + movdqa 32(%rbp),%xmm15 + movdqa -32(%r11),%xmm7 +.byte 102,68,15,56,0,199 +.byte 102,68,15,56,0,255 + movdqa %xmm8,(%rsp) + jmp .Lctr_enc_loop +.align 16 +.Lctr_enc_loop: + movdqa %xmm15,32(%rbp) + movdqa %xmm15,%xmm0 + movdqa %xmm15,%xmm1 + paddd 0(%r11),%xmm0 + movdqa %xmm15,%xmm2 + paddd 16(%r11),%xmm1 + movdqa %xmm15,%xmm3 + paddd 32(%r11),%xmm2 + movdqa %xmm15,%xmm4 + paddd 48(%r11),%xmm3 + movdqa %xmm15,%xmm5 + paddd 64(%r11),%xmm4 + movdqa %xmm15,%xmm6 + paddd 80(%r11),%xmm5 + paddd 96(%r11),%xmm6 + + + + movdqa (%rsp),%xmm8 + leaq 16(%rsp),%rax + movdqa -16(%r11),%xmm7 + pxor %xmm8,%xmm15 + pxor %xmm8,%xmm0 +.byte 102,68,15,56,0,255 + pxor %xmm8,%xmm1 +.byte 102,15,56,0,199 + pxor %xmm8,%xmm2 +.byte 102,15,56,0,207 + pxor %xmm8,%xmm3 +.byte 102,15,56,0,215 + pxor %xmm8,%xmm4 +.byte 102,15,56,0,223 + pxor %xmm8,%xmm5 +.byte 102,15,56,0,231 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,239 + leaq .LBS0(%rip),%r11 +.byte 102,15,56,0,247 + movl %ebx,%r10d + + call _bsaes_encrypt8_bitslice + + subq $8,%r14 + jc .Lctr_enc_loop_done + + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + movdqu 32(%r12),%xmm9 + movdqu 48(%r12),%xmm10 + movdqu 64(%r12),%xmm11 + movdqu 80(%r12),%xmm12 + movdqu 96(%r12),%xmm13 + movdqu 112(%r12),%xmm14 + leaq 128(%r12),%r12 + pxor %xmm15,%xmm7 + movdqa 32(%rbp),%xmm15 + pxor %xmm8,%xmm0 + movdqu %xmm7,0(%r13) + pxor %xmm9,%xmm3 + movdqu %xmm0,16(%r13) + pxor %xmm10,%xmm5 + movdqu %xmm3,32(%r13) + pxor %xmm11,%xmm2 + movdqu %xmm5,48(%r13) + pxor %xmm12,%xmm6 + movdqu %xmm2,64(%r13) + pxor %xmm13,%xmm1 + movdqu %xmm6,80(%r13) + pxor %xmm14,%xmm4 + movdqu %xmm1,96(%r13) + leaq .LADD1(%rip),%r11 + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + paddd 112(%r11),%xmm15 + jnz .Lctr_enc_loop + + jmp .Lctr_enc_done +.align 16 +.Lctr_enc_loop_done: + addq $8,%r14 + movdqu 0(%r12),%xmm7 + pxor %xmm7,%xmm15 + movdqu %xmm15,0(%r13) + cmpq $2,%r14 + jb .Lctr_enc_done + movdqu 16(%r12),%xmm8 + pxor %xmm8,%xmm0 + movdqu %xmm0,16(%r13) + je .Lctr_enc_done + movdqu 32(%r12),%xmm9 + pxor %xmm9,%xmm3 + movdqu %xmm3,32(%r13) + cmpq $4,%r14 + jb .Lctr_enc_done + movdqu 48(%r12),%xmm10 + pxor %xmm10,%xmm5 + movdqu %xmm5,48(%r13) + je .Lctr_enc_done + movdqu 64(%r12),%xmm11 + pxor %xmm11,%xmm2 + movdqu %xmm2,64(%r13) + cmpq $6,%r14 + jb .Lctr_enc_done + movdqu 80(%r12),%xmm12 + pxor %xmm12,%xmm6 + movdqu %xmm6,80(%r13) + je .Lctr_enc_done + movdqu 96(%r12),%xmm13 + pxor %xmm13,%xmm1 + movdqu %xmm1,96(%r13) + jmp .Lctr_enc_done + +.align 16 +.Lctr_enc_short: + leaq 32(%rbp),%rdi + leaq 48(%rbp),%rsi + leaq (%r15),%rdx + call asm_AES_encrypt + movdqu (%r12),%xmm0 + leaq 16(%r12),%r12 + movl 44(%rbp),%eax + bswapl %eax + pxor 48(%rbp),%xmm0 + incl %eax + movdqu %xmm0,(%r13) + bswapl %eax + leaq 16(%r13),%r13 + movl %eax,44(%rsp) + decq %r14 + jnz .Lctr_enc_short + +.Lctr_enc_done: + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +.Lctr_enc_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja .Lctr_enc_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +.Lctr_enc_epilogue: + retq +.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks +.globl bsaes_xts_encrypt +.type bsaes_xts_encrypt,@function +.align 16 +bsaes_xts_encrypt: + movq %rsp,%rax +.Lxts_enc_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + + leaq (%r9),%rdi + leaq 32(%rbp),%rsi + leaq (%r8),%rdx + call asm_AES_encrypt + + movl 240(%r15),%eax + movq %r14,%rbx + + movl %eax,%edx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %edx,%r10d + call _bsaes_key_convert + pxor %xmm6,%xmm7 + movdqa %xmm7,(%rax) + + andq $-16,%r14 + subq $128,%rsp + movdqa 32(%rbp),%xmm6 + + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + + subq $128,%r14 + jc .Lxts_enc_short + jmp .Lxts_enc_loop + +.align 16 +.Lxts_enc_loop: + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqu 112(%r12),%xmm14 + leaq 128(%r12),%r12 + movdqa %xmm6,112(%rsp) + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + pxor %xmm14,%xmm6 + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm2,64(%r13) + pxor 96(%rsp),%xmm1 + movdqu %xmm6,80(%r13) + pxor 112(%rsp),%xmm4 + movdqu %xmm1,96(%r13) + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + + subq $128,%r14 + jnc .Lxts_enc_loop + +.Lxts_enc_short: + addq $128,%r14 + jz .Lxts_enc_done + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + cmpq $16,%r14 + je .Lxts_enc_1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + cmpq $32,%r14 + je .Lxts_enc_2 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + cmpq $48,%r14 + je .Lxts_enc_3 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + cmpq $64,%r14 + je .Lxts_enc_4 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + cmpq $80,%r14 + je .Lxts_enc_5 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + cmpq $96,%r14 + je .Lxts_enc_6 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqa %xmm6,112(%rsp) + leaq 112(%r12),%r12 + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm2,64(%r13) + pxor 96(%rsp),%xmm1 + movdqu %xmm6,80(%r13) + movdqu %xmm1,96(%r13) + leaq 112(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_6: + pxor %xmm11,%xmm3 + leaq 96(%r12),%r12 + pxor %xmm12,%xmm4 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm2,64(%r13) + movdqu %xmm6,80(%r13) + leaq 96(%r13),%r13 + + movdqa 96(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_5: + pxor %xmm10,%xmm2 + leaq 80(%r12),%r12 + pxor %xmm11,%xmm3 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + movdqu %xmm2,64(%r13) + leaq 80(%r13),%r13 + + movdqa 80(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_4: + pxor %xmm9,%xmm1 + leaq 64(%r12),%r12 + pxor %xmm10,%xmm2 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + movdqu %xmm5,48(%r13) + leaq 64(%r13),%r13 + + movdqa 64(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_3: + pxor %xmm8,%xmm0 + leaq 48(%r12),%r12 + pxor %xmm9,%xmm1 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + movdqu %xmm3,32(%r13) + leaq 48(%r13),%r13 + + movdqa 48(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_2: + pxor %xmm7,%xmm15 + leaq 32(%r12),%r12 + pxor %xmm8,%xmm0 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + leaq 32(%r13),%r13 + + movdqa 32(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_1: + pxor %xmm15,%xmm7 + leaq 16(%r12),%r12 + movdqa %xmm7,32(%rbp) + leaq 32(%rbp),%rdi + leaq 32(%rbp),%rsi + leaq (%r15),%rdx + call asm_AES_encrypt + pxor 32(%rbp),%xmm15 + + + + + + movdqu %xmm15,0(%r13) + leaq 16(%r13),%r13 + + movdqa 16(%rsp),%xmm6 + +.Lxts_enc_done: + andl $15,%ebx + jz .Lxts_enc_ret + movq %r13,%rdx + +.Lxts_enc_steal: + movzbl (%r12),%eax + movzbl -16(%rdx),%ecx + leaq 1(%r12),%r12 + movb %al,-16(%rdx) + movb %cl,0(%rdx) + leaq 1(%rdx),%rdx + subl $1,%ebx + jnz .Lxts_enc_steal + + movdqu -16(%r13),%xmm15 + leaq 32(%rbp),%rdi + pxor %xmm6,%xmm15 + leaq 32(%rbp),%rsi + movdqa %xmm15,32(%rbp) + leaq (%r15),%rdx + call asm_AES_encrypt + pxor 32(%rbp),%xmm6 + movdqu %xmm6,-16(%r13) + +.Lxts_enc_ret: + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +.Lxts_enc_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja .Lxts_enc_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +.Lxts_enc_epilogue: + retq +.size bsaes_xts_encrypt,.-bsaes_xts_encrypt + +.globl bsaes_xts_decrypt +.type bsaes_xts_decrypt,@function +.align 16 +bsaes_xts_decrypt: + movq %rsp,%rax +.Lxts_dec_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + + leaq (%r9),%rdi + leaq 32(%rbp),%rsi + leaq (%r8),%rdx + call asm_AES_encrypt + + movl 240(%r15),%eax + movq %r14,%rbx + + movl %eax,%edx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %edx,%r10d + call _bsaes_key_convert + pxor (%rsp),%xmm7 + movdqa %xmm6,(%rax) + movdqa %xmm7,(%rsp) + + xorl %eax,%eax + andq $-16,%r14 + testl $15,%ebx + setnz %al + shlq $4,%rax + subq %rax,%r14 + + subq $128,%rsp + movdqa 32(%rbp),%xmm6 + + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + + subq $128,%r14 + jc .Lxts_dec_short + jmp .Lxts_dec_loop + +.align 16 +.Lxts_dec_loop: + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqu 112(%r12),%xmm14 + leaq 128(%r12),%r12 + movdqa %xmm6,112(%rsp) + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + pxor %xmm14,%xmm6 + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm1,64(%r13) + pxor 96(%rsp),%xmm2 + movdqu %xmm6,80(%r13) + pxor 112(%rsp),%xmm4 + movdqu %xmm2,96(%r13) + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + + subq $128,%r14 + jnc .Lxts_dec_loop + +.Lxts_dec_short: + addq $128,%r14 + jz .Lxts_dec_done + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + cmpq $16,%r14 + je .Lxts_dec_1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + cmpq $32,%r14 + je .Lxts_dec_2 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + cmpq $48,%r14 + je .Lxts_dec_3 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + cmpq $64,%r14 + je .Lxts_dec_4 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + cmpq $80,%r14 + je .Lxts_dec_5 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + cmpq $96,%r14 + je .Lxts_dec_6 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqa %xmm6,112(%rsp) + leaq 112(%r12),%r12 + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm1,64(%r13) + pxor 96(%rsp),%xmm2 + movdqu %xmm6,80(%r13) + movdqu %xmm2,96(%r13) + leaq 112(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_6: + pxor %xmm11,%xmm3 + leaq 96(%r12),%r12 + pxor %xmm12,%xmm4 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + leaq 96(%r13),%r13 + + movdqa 96(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_5: + pxor %xmm10,%xmm2 + leaq 80(%r12),%r12 + pxor %xmm11,%xmm3 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + leaq 80(%r13),%r13 + + movdqa 80(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_4: + pxor %xmm9,%xmm1 + leaq 64(%r12),%r12 + pxor %xmm10,%xmm2 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + leaq 64(%r13),%r13 + + movdqa 64(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_3: + pxor %xmm8,%xmm0 + leaq 48(%r12),%r12 + pxor %xmm9,%xmm1 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + leaq 48(%r13),%r13 + + movdqa 48(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_2: + pxor %xmm7,%xmm15 + leaq 32(%r12),%r12 + pxor %xmm8,%xmm0 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + leaq 32(%r13),%r13 + + movdqa 32(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_1: + pxor %xmm15,%xmm7 + leaq 16(%r12),%r12 + movdqa %xmm7,32(%rbp) + leaq 32(%rbp),%rdi + leaq 32(%rbp),%rsi + leaq (%r15),%rdx + call asm_AES_decrypt + pxor 32(%rbp),%xmm15 + + + + + + movdqu %xmm15,0(%r13) + leaq 16(%r13),%r13 + + movdqa 16(%rsp),%xmm6 + +.Lxts_dec_done: + andl $15,%ebx + jz .Lxts_dec_ret + + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + pshufd $19,%xmm14,%xmm13 + movdqa %xmm6,%xmm5 + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + movdqu (%r12),%xmm15 + pxor %xmm13,%xmm6 + + leaq 32(%rbp),%rdi + pxor %xmm6,%xmm15 + leaq 32(%rbp),%rsi + movdqa %xmm15,32(%rbp) + leaq (%r15),%rdx + call asm_AES_decrypt + pxor 32(%rbp),%xmm6 + movq %r13,%rdx + movdqu %xmm6,(%r13) + +.Lxts_dec_steal: + movzbl 16(%r12),%eax + movzbl (%rdx),%ecx + leaq 1(%r12),%r12 + movb %al,(%rdx) + movb %cl,16(%rdx) + leaq 1(%rdx),%rdx + subl $1,%ebx + jnz .Lxts_dec_steal + + movdqu (%r13),%xmm15 + leaq 32(%rbp),%rdi + pxor %xmm5,%xmm15 + leaq 32(%rbp),%rsi + movdqa %xmm15,32(%rbp) + leaq (%r15),%rdx + call asm_AES_decrypt + pxor 32(%rbp),%xmm5 + movdqu %xmm5,(%r13) + +.Lxts_dec_ret: + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +.Lxts_dec_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja .Lxts_dec_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +.Lxts_dec_epilogue: + retq +.size bsaes_xts_decrypt,.-bsaes_xts_decrypt +.type _bsaes_const,@object +.align 64 +_bsaes_const: +.LM0ISR: +.quad 0x0a0e0206070b0f03, 0x0004080c0d010509 +.LISRM0: +.quad 0x01040b0e0205080f, 0x0306090c00070a0d +.LISR: +.quad 0x0504070602010003, 0x0f0e0d0c080b0a09 +.LBS0: +.quad 0x5555555555555555, 0x5555555555555555 +.LBS1: +.quad 0x3333333333333333, 0x3333333333333333 +.LBS2: +.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +.LSR: +.quad 0x0504070600030201, 0x0f0e0d0c0a09080b +.LSRM0: +.quad 0x0304090e00050a0f, 0x01060b0c0207080d +.LM0SR: +.quad 0x0a0e02060f03070b, 0x0004080c05090d01 +.LSWPUP: +.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908 +.LSWPUPM0SR: +.quad 0x0a0d02060c03070b, 0x0004080f05090e01 +.LADD1: +.quad 0x0000000000000000, 0x0000000100000000 +.LADD2: +.quad 0x0000000000000000, 0x0000000200000000 +.LADD3: +.quad 0x0000000000000000, 0x0000000300000000 +.LADD4: +.quad 0x0000000000000000, 0x0000000400000000 +.LADD5: +.quad 0x0000000000000000, 0x0000000500000000 +.LADD6: +.quad 0x0000000000000000, 0x0000000600000000 +.LADD7: +.quad 0x0000000000000000, 0x0000000700000000 +.LADD8: +.quad 0x0000000000000000, 0x0000000800000000 +.Lxts_magic: +.long 0x87,0,1,0 +.Lmasks: +.quad 0x0101010101010101, 0x0101010101010101 +.quad 0x0202020202020202, 0x0202020202020202 +.quad 0x0404040404040404, 0x0404040404040404 +.quad 0x0808080808080808, 0x0808080808080808 +.LM0: +.quad 0x02060a0e03070b0f, 0x0004080c0105090d +.L63: +.quad 0x6363636363636363, 0x6363636363636363 +.byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44,32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32,65,110,100,121,32,80,111,108,121,97,107,111,118,0 +.align 64 +.size _bsaes_const,.-_bsaes_const +#if defined(HAVE_GNU_STACK) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/ext/libressl/crypto/aes/bsaes-macosx-x86_64.S b/ext/libressl/crypto/aes/bsaes-macosx-x86_64.S new file mode 100644 index 0000000..5f780f0 --- /dev/null +++ b/ext/libressl/crypto/aes/bsaes-macosx-x86_64.S @@ -0,0 +1,2499 @@ +#include "x86_arch.h" +.text + + + + + +.p2align 6 +_bsaes_encrypt8: + leaq L$BS0(%rip),%r11 + + movdqa (%rax),%xmm8 + leaq 16(%rax),%rax + movdqa 80(%r11),%xmm7 + pxor %xmm8,%xmm15 + pxor %xmm8,%xmm0 +.byte 102,68,15,56,0,255 + pxor %xmm8,%xmm1 +.byte 102,15,56,0,199 + pxor %xmm8,%xmm2 +.byte 102,15,56,0,207 + pxor %xmm8,%xmm3 +.byte 102,15,56,0,215 + pxor %xmm8,%xmm4 +.byte 102,15,56,0,223 + pxor %xmm8,%xmm5 +.byte 102,15,56,0,231 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 +_bsaes_encrypt8_bitslice: + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm5,%xmm9 + psrlq $1,%xmm5 + movdqa %xmm3,%xmm10 + psrlq $1,%xmm3 + pxor %xmm6,%xmm5 + pxor %xmm4,%xmm3 + pand %xmm7,%xmm5 + pand %xmm7,%xmm3 + pxor %xmm5,%xmm6 + psllq $1,%xmm5 + pxor %xmm3,%xmm4 + psllq $1,%xmm3 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm3 + movdqa %xmm1,%xmm9 + psrlq $1,%xmm1 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm2,%xmm1 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm1 + pand %xmm7,%xmm15 + pxor %xmm1,%xmm2 + psllq $1,%xmm1 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm1 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm4,%xmm9 + psrlq $2,%xmm4 + movdqa %xmm3,%xmm10 + psrlq $2,%xmm3 + pxor %xmm6,%xmm4 + pxor %xmm5,%xmm3 + pand %xmm8,%xmm4 + pand %xmm8,%xmm3 + pxor %xmm4,%xmm6 + psllq $2,%xmm4 + pxor %xmm3,%xmm5 + psllq $2,%xmm3 + pxor %xmm9,%xmm4 + pxor %xmm10,%xmm3 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm2,%xmm0 + pxor %xmm1,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm2 + psllq $2,%xmm0 + pxor %xmm15,%xmm1 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm2,%xmm9 + psrlq $4,%xmm2 + movdqa %xmm1,%xmm10 + psrlq $4,%xmm1 + pxor %xmm6,%xmm2 + pxor %xmm5,%xmm1 + pand %xmm7,%xmm2 + pand %xmm7,%xmm1 + pxor %xmm2,%xmm6 + psllq $4,%xmm2 + pxor %xmm1,%xmm5 + psllq $4,%xmm1 + pxor %xmm9,%xmm2 + pxor %xmm10,%xmm1 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm4 + psllq $4,%xmm0 + pxor %xmm15,%xmm3 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + decl %r10d + jmp L$enc_sbox +.p2align 4 +L$enc_loop: + pxor 0(%rax),%xmm15 + pxor 16(%rax),%xmm0 +.byte 102,68,15,56,0,255 + pxor 32(%rax),%xmm1 +.byte 102,15,56,0,199 + pxor 48(%rax),%xmm2 +.byte 102,15,56,0,207 + pxor 64(%rax),%xmm3 +.byte 102,15,56,0,215 + pxor 80(%rax),%xmm4 +.byte 102,15,56,0,223 + pxor 96(%rax),%xmm5 +.byte 102,15,56,0,231 + pxor 112(%rax),%xmm6 +.byte 102,15,56,0,239 + leaq 128(%rax),%rax +.byte 102,15,56,0,247 +L$enc_sbox: + pxor %xmm5,%xmm4 + pxor %xmm0,%xmm1 + pxor %xmm15,%xmm2 + pxor %xmm1,%xmm5 + pxor %xmm15,%xmm4 + + pxor %xmm2,%xmm5 + pxor %xmm6,%xmm2 + pxor %xmm4,%xmm6 + pxor %xmm3,%xmm2 + pxor %xmm4,%xmm3 + pxor %xmm0,%xmm2 + + pxor %xmm6,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm6,%xmm10 + movdqa %xmm0,%xmm9 + movdqa %xmm4,%xmm8 + movdqa %xmm1,%xmm12 + movdqa %xmm5,%xmm11 + + pxor %xmm3,%xmm10 + pxor %xmm1,%xmm9 + pxor %xmm2,%xmm8 + movdqa %xmm10,%xmm13 + pxor %xmm3,%xmm12 + movdqa %xmm9,%xmm7 + pxor %xmm15,%xmm11 + movdqa %xmm10,%xmm14 + + por %xmm8,%xmm9 + por %xmm11,%xmm10 + pxor %xmm7,%xmm14 + pand %xmm11,%xmm13 + pxor %xmm8,%xmm11 + pand %xmm8,%xmm7 + pand %xmm11,%xmm14 + movdqa %xmm2,%xmm11 + pxor %xmm15,%xmm11 + pand %xmm11,%xmm12 + pxor %xmm12,%xmm10 + pxor %xmm12,%xmm9 + movdqa %xmm6,%xmm12 + movdqa %xmm4,%xmm11 + pxor %xmm0,%xmm12 + pxor %xmm5,%xmm11 + movdqa %xmm12,%xmm8 + pand %xmm11,%xmm12 + por %xmm11,%xmm8 + pxor %xmm12,%xmm7 + pxor %xmm14,%xmm10 + pxor %xmm13,%xmm9 + pxor %xmm14,%xmm8 + movdqa %xmm1,%xmm11 + pxor %xmm13,%xmm7 + movdqa %xmm3,%xmm12 + pxor %xmm13,%xmm8 + movdqa %xmm0,%xmm13 + pand %xmm2,%xmm11 + movdqa %xmm6,%xmm14 + pand %xmm15,%xmm12 + pand %xmm4,%xmm13 + por %xmm5,%xmm14 + pxor %xmm11,%xmm10 + pxor %xmm12,%xmm9 + pxor %xmm13,%xmm8 + pxor %xmm14,%xmm7 + + + + + + movdqa %xmm10,%xmm11 + pand %xmm8,%xmm10 + pxor %xmm9,%xmm11 + + movdqa %xmm7,%xmm13 + movdqa %xmm11,%xmm14 + pxor %xmm10,%xmm13 + pand %xmm13,%xmm14 + + movdqa %xmm8,%xmm12 + pxor %xmm9,%xmm14 + pxor %xmm7,%xmm12 + + pxor %xmm9,%xmm10 + + pand %xmm10,%xmm12 + + movdqa %xmm13,%xmm9 + pxor %xmm7,%xmm12 + + pxor %xmm12,%xmm9 + pxor %xmm12,%xmm8 + + pand %xmm7,%xmm9 + + pxor %xmm9,%xmm13 + pxor %xmm9,%xmm8 + + pand %xmm14,%xmm13 + + pxor %xmm11,%xmm13 + movdqa %xmm5,%xmm11 + movdqa %xmm4,%xmm7 + movdqa %xmm14,%xmm9 + pxor %xmm13,%xmm9 + pand %xmm5,%xmm9 + pxor %xmm4,%xmm5 + pand %xmm14,%xmm4 + pand %xmm13,%xmm5 + pxor %xmm4,%xmm5 + pxor %xmm9,%xmm4 + pxor %xmm15,%xmm11 + pxor %xmm2,%xmm7 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm15,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm2,%xmm15 + pand %xmm14,%xmm7 + pand %xmm12,%xmm2 + pand %xmm13,%xmm11 + pand %xmm8,%xmm15 + pxor %xmm11,%xmm7 + pxor %xmm2,%xmm15 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm2 + pxor %xmm11,%xmm5 + pxor %xmm11,%xmm15 + pxor %xmm7,%xmm4 + pxor %xmm7,%xmm2 + + movdqa %xmm6,%xmm11 + movdqa %xmm0,%xmm7 + pxor %xmm3,%xmm11 + pxor %xmm1,%xmm7 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm3,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm1,%xmm3 + pand %xmm14,%xmm7 + pand %xmm12,%xmm1 + pand %xmm13,%xmm11 + pand %xmm8,%xmm3 + pxor %xmm11,%xmm7 + pxor %xmm1,%xmm3 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm1 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + pxor %xmm13,%xmm10 + pand %xmm6,%xmm10 + pxor %xmm0,%xmm6 + pand %xmm14,%xmm0 + pand %xmm13,%xmm6 + pxor %xmm0,%xmm6 + pxor %xmm10,%xmm0 + pxor %xmm11,%xmm6 + pxor %xmm11,%xmm3 + pxor %xmm7,%xmm0 + pxor %xmm7,%xmm1 + pxor %xmm15,%xmm6 + pxor %xmm5,%xmm0 + pxor %xmm6,%xmm3 + pxor %xmm15,%xmm5 + pxor %xmm0,%xmm15 + + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + pxor %xmm2,%xmm1 + pxor %xmm4,%xmm2 + pxor %xmm4,%xmm3 + + pxor %xmm2,%xmm5 + decl %r10d + jl L$enc_done + pshufd $147,%xmm15,%xmm7 + pshufd $147,%xmm0,%xmm8 + pxor %xmm7,%xmm15 + pshufd $147,%xmm3,%xmm9 + pxor %xmm8,%xmm0 + pshufd $147,%xmm5,%xmm10 + pxor %xmm9,%xmm3 + pshufd $147,%xmm2,%xmm11 + pxor %xmm10,%xmm5 + pshufd $147,%xmm6,%xmm12 + pxor %xmm11,%xmm2 + pshufd $147,%xmm1,%xmm13 + pxor %xmm12,%xmm6 + pshufd $147,%xmm4,%xmm14 + pxor %xmm13,%xmm1 + pxor %xmm14,%xmm4 + + pxor %xmm15,%xmm8 + pxor %xmm4,%xmm7 + pxor %xmm4,%xmm8 + pshufd $78,%xmm15,%xmm15 + pxor %xmm0,%xmm9 + pshufd $78,%xmm0,%xmm0 + pxor %xmm2,%xmm12 + pxor %xmm7,%xmm15 + pxor %xmm6,%xmm13 + pxor %xmm8,%xmm0 + pxor %xmm5,%xmm11 + pshufd $78,%xmm2,%xmm7 + pxor %xmm1,%xmm14 + pshufd $78,%xmm6,%xmm8 + pxor %xmm3,%xmm10 + pshufd $78,%xmm5,%xmm2 + pxor %xmm4,%xmm10 + pshufd $78,%xmm4,%xmm6 + pxor %xmm4,%xmm11 + pshufd $78,%xmm1,%xmm5 + pxor %xmm11,%xmm7 + pshufd $78,%xmm3,%xmm1 + pxor %xmm12,%xmm8 + pxor %xmm10,%xmm2 + pxor %xmm14,%xmm6 + pxor %xmm13,%xmm5 + movdqa %xmm7,%xmm3 + pxor %xmm9,%xmm1 + movdqa %xmm8,%xmm4 + movdqa 48(%r11),%xmm7 + jnz L$enc_loop + movdqa 64(%r11),%xmm7 + jmp L$enc_loop +.p2align 4 +L$enc_done: + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm1,%xmm9 + psrlq $1,%xmm1 + movdqa %xmm2,%xmm10 + psrlq $1,%xmm2 + pxor %xmm4,%xmm1 + pxor %xmm6,%xmm2 + pand %xmm7,%xmm1 + pand %xmm7,%xmm2 + pxor %xmm1,%xmm4 + psllq $1,%xmm1 + pxor %xmm2,%xmm6 + psllq $1,%xmm2 + pxor %xmm9,%xmm1 + pxor %xmm10,%xmm2 + movdqa %xmm3,%xmm9 + psrlq $1,%xmm3 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm5,%xmm3 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm3 + pand %xmm7,%xmm15 + pxor %xmm3,%xmm5 + psllq $1,%xmm3 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm3 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm6,%xmm9 + psrlq $2,%xmm6 + movdqa %xmm2,%xmm10 + psrlq $2,%xmm2 + pxor %xmm4,%xmm6 + pxor %xmm1,%xmm2 + pand %xmm8,%xmm6 + pand %xmm8,%xmm2 + pxor %xmm6,%xmm4 + psllq $2,%xmm6 + pxor %xmm2,%xmm1 + psllq $2,%xmm2 + pxor %xmm9,%xmm6 + pxor %xmm10,%xmm2 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm5,%xmm0 + pxor %xmm3,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm5 + psllq $2,%xmm0 + pxor %xmm15,%xmm3 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm5,%xmm9 + psrlq $4,%xmm5 + movdqa %xmm3,%xmm10 + psrlq $4,%xmm3 + pxor %xmm4,%xmm5 + pxor %xmm1,%xmm3 + pand %xmm7,%xmm5 + pand %xmm7,%xmm3 + pxor %xmm5,%xmm4 + psllq $4,%xmm5 + pxor %xmm3,%xmm1 + psllq $4,%xmm3 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm3 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm6,%xmm0 + pxor %xmm2,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm6 + psllq $4,%xmm0 + pxor %xmm15,%xmm2 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa (%rax),%xmm7 + pxor %xmm7,%xmm3 + pxor %xmm7,%xmm5 + pxor %xmm7,%xmm2 + pxor %xmm7,%xmm6 + pxor %xmm7,%xmm1 + pxor %xmm7,%xmm4 + pxor %xmm7,%xmm15 + pxor %xmm7,%xmm0 + retq + + + +.p2align 6 +_bsaes_decrypt8: + leaq L$BS0(%rip),%r11 + + movdqa (%rax),%xmm8 + leaq 16(%rax),%rax + movdqa -48(%r11),%xmm7 + pxor %xmm8,%xmm15 + pxor %xmm8,%xmm0 +.byte 102,68,15,56,0,255 + pxor %xmm8,%xmm1 +.byte 102,15,56,0,199 + pxor %xmm8,%xmm2 +.byte 102,15,56,0,207 + pxor %xmm8,%xmm3 +.byte 102,15,56,0,215 + pxor %xmm8,%xmm4 +.byte 102,15,56,0,223 + pxor %xmm8,%xmm5 +.byte 102,15,56,0,231 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm5,%xmm9 + psrlq $1,%xmm5 + movdqa %xmm3,%xmm10 + psrlq $1,%xmm3 + pxor %xmm6,%xmm5 + pxor %xmm4,%xmm3 + pand %xmm7,%xmm5 + pand %xmm7,%xmm3 + pxor %xmm5,%xmm6 + psllq $1,%xmm5 + pxor %xmm3,%xmm4 + psllq $1,%xmm3 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm3 + movdqa %xmm1,%xmm9 + psrlq $1,%xmm1 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm2,%xmm1 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm1 + pand %xmm7,%xmm15 + pxor %xmm1,%xmm2 + psllq $1,%xmm1 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm1 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm4,%xmm9 + psrlq $2,%xmm4 + movdqa %xmm3,%xmm10 + psrlq $2,%xmm3 + pxor %xmm6,%xmm4 + pxor %xmm5,%xmm3 + pand %xmm8,%xmm4 + pand %xmm8,%xmm3 + pxor %xmm4,%xmm6 + psllq $2,%xmm4 + pxor %xmm3,%xmm5 + psllq $2,%xmm3 + pxor %xmm9,%xmm4 + pxor %xmm10,%xmm3 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm2,%xmm0 + pxor %xmm1,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm2 + psllq $2,%xmm0 + pxor %xmm15,%xmm1 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm2,%xmm9 + psrlq $4,%xmm2 + movdqa %xmm1,%xmm10 + psrlq $4,%xmm1 + pxor %xmm6,%xmm2 + pxor %xmm5,%xmm1 + pand %xmm7,%xmm2 + pand %xmm7,%xmm1 + pxor %xmm2,%xmm6 + psllq $4,%xmm2 + pxor %xmm1,%xmm5 + psllq $4,%xmm1 + pxor %xmm9,%xmm2 + pxor %xmm10,%xmm1 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm4 + psllq $4,%xmm0 + pxor %xmm15,%xmm3 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + decl %r10d + jmp L$dec_sbox +.p2align 4 +L$dec_loop: + pxor 0(%rax),%xmm15 + pxor 16(%rax),%xmm0 +.byte 102,68,15,56,0,255 + pxor 32(%rax),%xmm1 +.byte 102,15,56,0,199 + pxor 48(%rax),%xmm2 +.byte 102,15,56,0,207 + pxor 64(%rax),%xmm3 +.byte 102,15,56,0,215 + pxor 80(%rax),%xmm4 +.byte 102,15,56,0,223 + pxor 96(%rax),%xmm5 +.byte 102,15,56,0,231 + pxor 112(%rax),%xmm6 +.byte 102,15,56,0,239 + leaq 128(%rax),%rax +.byte 102,15,56,0,247 +L$dec_sbox: + pxor %xmm3,%xmm2 + + pxor %xmm6,%xmm3 + pxor %xmm6,%xmm1 + pxor %xmm3,%xmm5 + pxor %xmm5,%xmm6 + pxor %xmm6,%xmm0 + + pxor %xmm0,%xmm15 + pxor %xmm4,%xmm1 + pxor %xmm15,%xmm2 + pxor %xmm15,%xmm4 + pxor %xmm2,%xmm0 + movdqa %xmm2,%xmm10 + movdqa %xmm6,%xmm9 + movdqa %xmm0,%xmm8 + movdqa %xmm3,%xmm12 + movdqa %xmm4,%xmm11 + + pxor %xmm15,%xmm10 + pxor %xmm3,%xmm9 + pxor %xmm5,%xmm8 + movdqa %xmm10,%xmm13 + pxor %xmm15,%xmm12 + movdqa %xmm9,%xmm7 + pxor %xmm1,%xmm11 + movdqa %xmm10,%xmm14 + + por %xmm8,%xmm9 + por %xmm11,%xmm10 + pxor %xmm7,%xmm14 + pand %xmm11,%xmm13 + pxor %xmm8,%xmm11 + pand %xmm8,%xmm7 + pand %xmm11,%xmm14 + movdqa %xmm5,%xmm11 + pxor %xmm1,%xmm11 + pand %xmm11,%xmm12 + pxor %xmm12,%xmm10 + pxor %xmm12,%xmm9 + movdqa %xmm2,%xmm12 + movdqa %xmm0,%xmm11 + pxor %xmm6,%xmm12 + pxor %xmm4,%xmm11 + movdqa %xmm12,%xmm8 + pand %xmm11,%xmm12 + por %xmm11,%xmm8 + pxor %xmm12,%xmm7 + pxor %xmm14,%xmm10 + pxor %xmm13,%xmm9 + pxor %xmm14,%xmm8 + movdqa %xmm3,%xmm11 + pxor %xmm13,%xmm7 + movdqa %xmm15,%xmm12 + pxor %xmm13,%xmm8 + movdqa %xmm6,%xmm13 + pand %xmm5,%xmm11 + movdqa %xmm2,%xmm14 + pand %xmm1,%xmm12 + pand %xmm0,%xmm13 + por %xmm4,%xmm14 + pxor %xmm11,%xmm10 + pxor %xmm12,%xmm9 + pxor %xmm13,%xmm8 + pxor %xmm14,%xmm7 + + + + + + movdqa %xmm10,%xmm11 + pand %xmm8,%xmm10 + pxor %xmm9,%xmm11 + + movdqa %xmm7,%xmm13 + movdqa %xmm11,%xmm14 + pxor %xmm10,%xmm13 + pand %xmm13,%xmm14 + + movdqa %xmm8,%xmm12 + pxor %xmm9,%xmm14 + pxor %xmm7,%xmm12 + + pxor %xmm9,%xmm10 + + pand %xmm10,%xmm12 + + movdqa %xmm13,%xmm9 + pxor %xmm7,%xmm12 + + pxor %xmm12,%xmm9 + pxor %xmm12,%xmm8 + + pand %xmm7,%xmm9 + + pxor %xmm9,%xmm13 + pxor %xmm9,%xmm8 + + pand %xmm14,%xmm13 + + pxor %xmm11,%xmm13 + movdqa %xmm4,%xmm11 + movdqa %xmm0,%xmm7 + movdqa %xmm14,%xmm9 + pxor %xmm13,%xmm9 + pand %xmm4,%xmm9 + pxor %xmm0,%xmm4 + pand %xmm14,%xmm0 + pand %xmm13,%xmm4 + pxor %xmm0,%xmm4 + pxor %xmm9,%xmm0 + pxor %xmm1,%xmm11 + pxor %xmm5,%xmm7 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm1,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm5,%xmm1 + pand %xmm14,%xmm7 + pand %xmm12,%xmm5 + pand %xmm13,%xmm11 + pand %xmm8,%xmm1 + pxor %xmm11,%xmm7 + pxor %xmm5,%xmm1 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm5 + pxor %xmm11,%xmm4 + pxor %xmm11,%xmm1 + pxor %xmm7,%xmm0 + pxor %xmm7,%xmm5 + + movdqa %xmm2,%xmm11 + movdqa %xmm6,%xmm7 + pxor %xmm15,%xmm11 + pxor %xmm3,%xmm7 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm15,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm3,%xmm15 + pand %xmm14,%xmm7 + pand %xmm12,%xmm3 + pand %xmm13,%xmm11 + pand %xmm8,%xmm15 + pxor %xmm11,%xmm7 + pxor %xmm3,%xmm15 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm3 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + pxor %xmm13,%xmm10 + pand %xmm2,%xmm10 + pxor %xmm6,%xmm2 + pand %xmm14,%xmm6 + pand %xmm13,%xmm2 + pxor %xmm6,%xmm2 + pxor %xmm10,%xmm6 + pxor %xmm11,%xmm2 + pxor %xmm11,%xmm15 + pxor %xmm7,%xmm6 + pxor %xmm7,%xmm3 + pxor %xmm6,%xmm0 + pxor %xmm4,%xmm5 + + pxor %xmm0,%xmm3 + pxor %xmm6,%xmm1 + pxor %xmm6,%xmm4 + pxor %xmm1,%xmm3 + pxor %xmm15,%xmm6 + pxor %xmm4,%xmm3 + pxor %xmm5,%xmm2 + pxor %xmm0,%xmm5 + pxor %xmm3,%xmm2 + + pxor %xmm15,%xmm3 + pxor %xmm2,%xmm6 + decl %r10d + jl L$dec_done + + pshufd $78,%xmm15,%xmm7 + pshufd $78,%xmm2,%xmm13 + pxor %xmm15,%xmm7 + pshufd $78,%xmm4,%xmm14 + pxor %xmm2,%xmm13 + pshufd $78,%xmm0,%xmm8 + pxor %xmm4,%xmm14 + pshufd $78,%xmm5,%xmm9 + pxor %xmm0,%xmm8 + pshufd $78,%xmm3,%xmm10 + pxor %xmm5,%xmm9 + pxor %xmm13,%xmm15 + pxor %xmm13,%xmm0 + pshufd $78,%xmm1,%xmm11 + pxor %xmm3,%xmm10 + pxor %xmm7,%xmm5 + pxor %xmm8,%xmm3 + pshufd $78,%xmm6,%xmm12 + pxor %xmm1,%xmm11 + pxor %xmm14,%xmm0 + pxor %xmm9,%xmm1 + pxor %xmm6,%xmm12 + + pxor %xmm14,%xmm5 + pxor %xmm13,%xmm3 + pxor %xmm13,%xmm1 + pxor %xmm10,%xmm6 + pxor %xmm11,%xmm2 + pxor %xmm14,%xmm1 + pxor %xmm14,%xmm6 + pxor %xmm12,%xmm4 + pshufd $147,%xmm15,%xmm7 + pshufd $147,%xmm0,%xmm8 + pxor %xmm7,%xmm15 + pshufd $147,%xmm5,%xmm9 + pxor %xmm8,%xmm0 + pshufd $147,%xmm3,%xmm10 + pxor %xmm9,%xmm5 + pshufd $147,%xmm1,%xmm11 + pxor %xmm10,%xmm3 + pshufd $147,%xmm6,%xmm12 + pxor %xmm11,%xmm1 + pshufd $147,%xmm2,%xmm13 + pxor %xmm12,%xmm6 + pshufd $147,%xmm4,%xmm14 + pxor %xmm13,%xmm2 + pxor %xmm14,%xmm4 + + pxor %xmm15,%xmm8 + pxor %xmm4,%xmm7 + pxor %xmm4,%xmm8 + pshufd $78,%xmm15,%xmm15 + pxor %xmm0,%xmm9 + pshufd $78,%xmm0,%xmm0 + pxor %xmm1,%xmm12 + pxor %xmm7,%xmm15 + pxor %xmm6,%xmm13 + pxor %xmm8,%xmm0 + pxor %xmm3,%xmm11 + pshufd $78,%xmm1,%xmm7 + pxor %xmm2,%xmm14 + pshufd $78,%xmm6,%xmm8 + pxor %xmm5,%xmm10 + pshufd $78,%xmm3,%xmm1 + pxor %xmm4,%xmm10 + pshufd $78,%xmm4,%xmm6 + pxor %xmm4,%xmm11 + pshufd $78,%xmm2,%xmm3 + pxor %xmm11,%xmm7 + pshufd $78,%xmm5,%xmm2 + pxor %xmm12,%xmm8 + pxor %xmm1,%xmm10 + pxor %xmm14,%xmm6 + pxor %xmm3,%xmm13 + movdqa %xmm7,%xmm3 + pxor %xmm9,%xmm2 + movdqa %xmm13,%xmm5 + movdqa %xmm8,%xmm4 + movdqa %xmm2,%xmm1 + movdqa %xmm10,%xmm2 + movdqa -16(%r11),%xmm7 + jnz L$dec_loop + movdqa -32(%r11),%xmm7 + jmp L$dec_loop +.p2align 4 +L$dec_done: + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm2,%xmm9 + psrlq $1,%xmm2 + movdqa %xmm1,%xmm10 + psrlq $1,%xmm1 + pxor %xmm4,%xmm2 + pxor %xmm6,%xmm1 + pand %xmm7,%xmm2 + pand %xmm7,%xmm1 + pxor %xmm2,%xmm4 + psllq $1,%xmm2 + pxor %xmm1,%xmm6 + psllq $1,%xmm1 + pxor %xmm9,%xmm2 + pxor %xmm10,%xmm1 + movdqa %xmm5,%xmm9 + psrlq $1,%xmm5 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm3,%xmm5 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm5 + pand %xmm7,%xmm15 + pxor %xmm5,%xmm3 + psllq $1,%xmm5 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm6,%xmm9 + psrlq $2,%xmm6 + movdqa %xmm1,%xmm10 + psrlq $2,%xmm1 + pxor %xmm4,%xmm6 + pxor %xmm2,%xmm1 + pand %xmm8,%xmm6 + pand %xmm8,%xmm1 + pxor %xmm6,%xmm4 + psllq $2,%xmm6 + pxor %xmm1,%xmm2 + psllq $2,%xmm1 + pxor %xmm9,%xmm6 + pxor %xmm10,%xmm1 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm3 + psllq $2,%xmm0 + pxor %xmm15,%xmm5 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm3,%xmm9 + psrlq $4,%xmm3 + movdqa %xmm5,%xmm10 + psrlq $4,%xmm5 + pxor %xmm4,%xmm3 + pxor %xmm2,%xmm5 + pand %xmm7,%xmm3 + pand %xmm7,%xmm5 + pxor %xmm3,%xmm4 + psllq $4,%xmm3 + pxor %xmm5,%xmm2 + psllq $4,%xmm5 + pxor %xmm9,%xmm3 + pxor %xmm10,%xmm5 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm6,%xmm0 + pxor %xmm1,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm6 + psllq $4,%xmm0 + pxor %xmm15,%xmm1 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa (%rax),%xmm7 + pxor %xmm7,%xmm5 + pxor %xmm7,%xmm3 + pxor %xmm7,%xmm1 + pxor %xmm7,%xmm6 + pxor %xmm7,%xmm2 + pxor %xmm7,%xmm4 + pxor %xmm7,%xmm15 + pxor %xmm7,%xmm0 + retq + + +.p2align 4 +_bsaes_key_convert: + leaq L$masks(%rip),%r11 + movdqu (%rcx),%xmm7 + leaq 16(%rcx),%rcx + movdqa 0(%r11),%xmm0 + movdqa 16(%r11),%xmm1 + movdqa 32(%r11),%xmm2 + movdqa 48(%r11),%xmm3 + movdqa 64(%r11),%xmm4 + pcmpeqd %xmm5,%xmm5 + + movdqu (%rcx),%xmm6 + movdqa %xmm7,(%rax) + leaq 16(%rax),%rax + decl %r10d + jmp L$key_loop +.p2align 4 +L$key_loop: +.byte 102,15,56,0,244 + + movdqa %xmm0,%xmm8 + movdqa %xmm1,%xmm9 + + pand %xmm6,%xmm8 + pand %xmm6,%xmm9 + movdqa %xmm2,%xmm10 + pcmpeqb %xmm0,%xmm8 + psllq $4,%xmm0 + movdqa %xmm3,%xmm11 + pcmpeqb %xmm1,%xmm9 + psllq $4,%xmm1 + + pand %xmm6,%xmm10 + pand %xmm6,%xmm11 + movdqa %xmm0,%xmm12 + pcmpeqb %xmm2,%xmm10 + psllq $4,%xmm2 + movdqa %xmm1,%xmm13 + pcmpeqb %xmm3,%xmm11 + psllq $4,%xmm3 + + movdqa %xmm2,%xmm14 + movdqa %xmm3,%xmm15 + pxor %xmm5,%xmm8 + pxor %xmm5,%xmm9 + + pand %xmm6,%xmm12 + pand %xmm6,%xmm13 + movdqa %xmm8,0(%rax) + pcmpeqb %xmm0,%xmm12 + psrlq $4,%xmm0 + movdqa %xmm9,16(%rax) + pcmpeqb %xmm1,%xmm13 + psrlq $4,%xmm1 + leaq 16(%rcx),%rcx + + pand %xmm6,%xmm14 + pand %xmm6,%xmm15 + movdqa %xmm10,32(%rax) + pcmpeqb %xmm2,%xmm14 + psrlq $4,%xmm2 + movdqa %xmm11,48(%rax) + pcmpeqb %xmm3,%xmm15 + psrlq $4,%xmm3 + movdqu (%rcx),%xmm6 + + pxor %xmm5,%xmm13 + pxor %xmm5,%xmm14 + movdqa %xmm12,64(%rax) + movdqa %xmm13,80(%rax) + movdqa %xmm14,96(%rax) + movdqa %xmm15,112(%rax) + leaq 128(%rax),%rax + decl %r10d + jnz L$key_loop + + movdqa 80(%r11),%xmm7 + + retq + + +.globl _bsaes_cbc_encrypt + +.p2align 4 +_bsaes_cbc_encrypt: + cmpl $0,%r9d + jne _asm_AES_cbc_encrypt + cmpq $128,%rdx + jb _asm_AES_cbc_encrypt + + movq %rsp,%rax +L$cbc_dec_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movl 240(%rcx),%eax + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + movq %r8,%rbx + shrq $4,%r14 + + movl %eax,%edx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %edx,%r10d + call _bsaes_key_convert + pxor (%rsp),%xmm7 + movdqa %xmm6,(%rax) + movdqa %xmm7,(%rsp) + + movdqu (%rbx),%xmm14 + subq $8,%r14 +L$cbc_dec_loop: + movdqu 0(%r12),%xmm15 + movdqu 16(%r12),%xmm0 + movdqu 32(%r12),%xmm1 + movdqu 48(%r12),%xmm2 + movdqu 64(%r12),%xmm3 + movdqu 80(%r12),%xmm4 + movq %rsp,%rax + movdqu 96(%r12),%xmm5 + movl %edx,%r10d + movdqu 112(%r12),%xmm6 + movdqa %xmm14,32(%rbp) + + call _bsaes_decrypt8 + + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm1 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm6 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm2 + movdqu 112(%r12),%xmm14 + pxor %xmm13,%xmm4 + movdqu %xmm15,0(%r13) + leaq 128(%r12),%r12 + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + movdqu %xmm2,96(%r13) + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + subq $8,%r14 + jnc L$cbc_dec_loop + + addq $8,%r14 + jz L$cbc_dec_done + + movdqu 0(%r12),%xmm15 + movq %rsp,%rax + movl %edx,%r10d + cmpq $2,%r14 + jb L$cbc_dec_one + movdqu 16(%r12),%xmm0 + je L$cbc_dec_two + movdqu 32(%r12),%xmm1 + cmpq $4,%r14 + jb L$cbc_dec_three + movdqu 48(%r12),%xmm2 + je L$cbc_dec_four + movdqu 64(%r12),%xmm3 + cmpq $6,%r14 + jb L$cbc_dec_five + movdqu 80(%r12),%xmm4 + je L$cbc_dec_six + movdqu 96(%r12),%xmm5 + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm1 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm6 + movdqu 96(%r12),%xmm14 + pxor %xmm12,%xmm2 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + movdqu %xmm2,96(%r13) + jmp L$cbc_dec_done +.p2align 4 +L$cbc_dec_six: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm1 + movdqu 80(%r12),%xmm14 + pxor %xmm11,%xmm6 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + jmp L$cbc_dec_done +.p2align 4 +L$cbc_dec_five: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm14 + pxor %xmm10,%xmm1 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + jmp L$cbc_dec_done +.p2align 4 +L$cbc_dec_four: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm14 + pxor %xmm9,%xmm3 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + jmp L$cbc_dec_done +.p2align 4 +L$cbc_dec_three: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm14 + pxor %xmm8,%xmm5 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + jmp L$cbc_dec_done +.p2align 4 +L$cbc_dec_two: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm14 + pxor %xmm7,%xmm0 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + jmp L$cbc_dec_done +.p2align 4 +L$cbc_dec_one: + leaq (%r12),%rdi + leaq 32(%rbp),%rsi + leaq (%r15),%rdx + call _asm_AES_decrypt + pxor 32(%rbp),%xmm14 + movdqu %xmm14,(%r13) + movdqa %xmm15,%xmm14 + +L$cbc_dec_done: + movdqu %xmm14,(%rbx) + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +L$cbc_dec_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja L$cbc_dec_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +L$cbc_dec_epilogue: + retq + + +.globl _bsaes_ctr32_encrypt_blocks + +.p2align 4 +_bsaes_ctr32_encrypt_blocks: + movq %rsp,%rax +L$ctr_enc_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movdqu (%r8),%xmm0 + movl 240(%rcx),%eax + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + movdqa %xmm0,32(%rbp) + cmpq $8,%rdx + jb L$ctr_enc_short + + movl %eax,%ebx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %ebx,%r10d + call _bsaes_key_convert + pxor %xmm6,%xmm7 + movdqa %xmm7,(%rax) + + movdqa (%rsp),%xmm8 + leaq L$ADD1(%rip),%r11 + movdqa 32(%rbp),%xmm15 + movdqa -32(%r11),%xmm7 +.byte 102,68,15,56,0,199 +.byte 102,68,15,56,0,255 + movdqa %xmm8,(%rsp) + jmp L$ctr_enc_loop +.p2align 4 +L$ctr_enc_loop: + movdqa %xmm15,32(%rbp) + movdqa %xmm15,%xmm0 + movdqa %xmm15,%xmm1 + paddd 0(%r11),%xmm0 + movdqa %xmm15,%xmm2 + paddd 16(%r11),%xmm1 + movdqa %xmm15,%xmm3 + paddd 32(%r11),%xmm2 + movdqa %xmm15,%xmm4 + paddd 48(%r11),%xmm3 + movdqa %xmm15,%xmm5 + paddd 64(%r11),%xmm4 + movdqa %xmm15,%xmm6 + paddd 80(%r11),%xmm5 + paddd 96(%r11),%xmm6 + + + + movdqa (%rsp),%xmm8 + leaq 16(%rsp),%rax + movdqa -16(%r11),%xmm7 + pxor %xmm8,%xmm15 + pxor %xmm8,%xmm0 +.byte 102,68,15,56,0,255 + pxor %xmm8,%xmm1 +.byte 102,15,56,0,199 + pxor %xmm8,%xmm2 +.byte 102,15,56,0,207 + pxor %xmm8,%xmm3 +.byte 102,15,56,0,215 + pxor %xmm8,%xmm4 +.byte 102,15,56,0,223 + pxor %xmm8,%xmm5 +.byte 102,15,56,0,231 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,239 + leaq L$BS0(%rip),%r11 +.byte 102,15,56,0,247 + movl %ebx,%r10d + + call _bsaes_encrypt8_bitslice + + subq $8,%r14 + jc L$ctr_enc_loop_done + + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + movdqu 32(%r12),%xmm9 + movdqu 48(%r12),%xmm10 + movdqu 64(%r12),%xmm11 + movdqu 80(%r12),%xmm12 + movdqu 96(%r12),%xmm13 + movdqu 112(%r12),%xmm14 + leaq 128(%r12),%r12 + pxor %xmm15,%xmm7 + movdqa 32(%rbp),%xmm15 + pxor %xmm8,%xmm0 + movdqu %xmm7,0(%r13) + pxor %xmm9,%xmm3 + movdqu %xmm0,16(%r13) + pxor %xmm10,%xmm5 + movdqu %xmm3,32(%r13) + pxor %xmm11,%xmm2 + movdqu %xmm5,48(%r13) + pxor %xmm12,%xmm6 + movdqu %xmm2,64(%r13) + pxor %xmm13,%xmm1 + movdqu %xmm6,80(%r13) + pxor %xmm14,%xmm4 + movdqu %xmm1,96(%r13) + leaq L$ADD1(%rip),%r11 + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + paddd 112(%r11),%xmm15 + jnz L$ctr_enc_loop + + jmp L$ctr_enc_done +.p2align 4 +L$ctr_enc_loop_done: + addq $8,%r14 + movdqu 0(%r12),%xmm7 + pxor %xmm7,%xmm15 + movdqu %xmm15,0(%r13) + cmpq $2,%r14 + jb L$ctr_enc_done + movdqu 16(%r12),%xmm8 + pxor %xmm8,%xmm0 + movdqu %xmm0,16(%r13) + je L$ctr_enc_done + movdqu 32(%r12),%xmm9 + pxor %xmm9,%xmm3 + movdqu %xmm3,32(%r13) + cmpq $4,%r14 + jb L$ctr_enc_done + movdqu 48(%r12),%xmm10 + pxor %xmm10,%xmm5 + movdqu %xmm5,48(%r13) + je L$ctr_enc_done + movdqu 64(%r12),%xmm11 + pxor %xmm11,%xmm2 + movdqu %xmm2,64(%r13) + cmpq $6,%r14 + jb L$ctr_enc_done + movdqu 80(%r12),%xmm12 + pxor %xmm12,%xmm6 + movdqu %xmm6,80(%r13) + je L$ctr_enc_done + movdqu 96(%r12),%xmm13 + pxor %xmm13,%xmm1 + movdqu %xmm1,96(%r13) + jmp L$ctr_enc_done + +.p2align 4 +L$ctr_enc_short: + leaq 32(%rbp),%rdi + leaq 48(%rbp),%rsi + leaq (%r15),%rdx + call _asm_AES_encrypt + movdqu (%r12),%xmm0 + leaq 16(%r12),%r12 + movl 44(%rbp),%eax + bswapl %eax + pxor 48(%rbp),%xmm0 + incl %eax + movdqu %xmm0,(%r13) + bswapl %eax + leaq 16(%r13),%r13 + movl %eax,44(%rsp) + decq %r14 + jnz L$ctr_enc_short + +L$ctr_enc_done: + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +L$ctr_enc_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja L$ctr_enc_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +L$ctr_enc_epilogue: + retq + +.globl _bsaes_xts_encrypt + +.p2align 4 +_bsaes_xts_encrypt: + movq %rsp,%rax +L$xts_enc_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + + leaq (%r9),%rdi + leaq 32(%rbp),%rsi + leaq (%r8),%rdx + call _asm_AES_encrypt + + movl 240(%r15),%eax + movq %r14,%rbx + + movl %eax,%edx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %edx,%r10d + call _bsaes_key_convert + pxor %xmm6,%xmm7 + movdqa %xmm7,(%rax) + + andq $-16,%r14 + subq $128,%rsp + movdqa 32(%rbp),%xmm6 + + pxor %xmm14,%xmm14 + movdqa L$xts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + + subq $128,%r14 + jc L$xts_enc_short + jmp L$xts_enc_loop + +.p2align 4 +L$xts_enc_loop: + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqu 112(%r12),%xmm14 + leaq 128(%r12),%r12 + movdqa %xmm6,112(%rsp) + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + pxor %xmm14,%xmm6 + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm2,64(%r13) + pxor 96(%rsp),%xmm1 + movdqu %xmm6,80(%r13) + pxor 112(%rsp),%xmm4 + movdqu %xmm1,96(%r13) + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + pxor %xmm14,%xmm14 + movdqa L$xts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + + subq $128,%r14 + jnc L$xts_enc_loop + +L$xts_enc_short: + addq $128,%r14 + jz L$xts_enc_done + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + cmpq $16,%r14 + je L$xts_enc_1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + cmpq $32,%r14 + je L$xts_enc_2 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + cmpq $48,%r14 + je L$xts_enc_3 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + cmpq $64,%r14 + je L$xts_enc_4 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + cmpq $80,%r14 + je L$xts_enc_5 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + cmpq $96,%r14 + je L$xts_enc_6 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqa %xmm6,112(%rsp) + leaq 112(%r12),%r12 + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm2,64(%r13) + pxor 96(%rsp),%xmm1 + movdqu %xmm6,80(%r13) + movdqu %xmm1,96(%r13) + leaq 112(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + jmp L$xts_enc_done +.p2align 4 +L$xts_enc_6: + pxor %xmm11,%xmm3 + leaq 96(%r12),%r12 + pxor %xmm12,%xmm4 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm2,64(%r13) + movdqu %xmm6,80(%r13) + leaq 96(%r13),%r13 + + movdqa 96(%rsp),%xmm6 + jmp L$xts_enc_done +.p2align 4 +L$xts_enc_5: + pxor %xmm10,%xmm2 + leaq 80(%r12),%r12 + pxor %xmm11,%xmm3 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + movdqu %xmm2,64(%r13) + leaq 80(%r13),%r13 + + movdqa 80(%rsp),%xmm6 + jmp L$xts_enc_done +.p2align 4 +L$xts_enc_4: + pxor %xmm9,%xmm1 + leaq 64(%r12),%r12 + pxor %xmm10,%xmm2 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + movdqu %xmm5,48(%r13) + leaq 64(%r13),%r13 + + movdqa 64(%rsp),%xmm6 + jmp L$xts_enc_done +.p2align 4 +L$xts_enc_3: + pxor %xmm8,%xmm0 + leaq 48(%r12),%r12 + pxor %xmm9,%xmm1 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + movdqu %xmm3,32(%r13) + leaq 48(%r13),%r13 + + movdqa 48(%rsp),%xmm6 + jmp L$xts_enc_done +.p2align 4 +L$xts_enc_2: + pxor %xmm7,%xmm15 + leaq 32(%r12),%r12 + pxor %xmm8,%xmm0 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + leaq 32(%r13),%r13 + + movdqa 32(%rsp),%xmm6 + jmp L$xts_enc_done +.p2align 4 +L$xts_enc_1: + pxor %xmm15,%xmm7 + leaq 16(%r12),%r12 + movdqa %xmm7,32(%rbp) + leaq 32(%rbp),%rdi + leaq 32(%rbp),%rsi + leaq (%r15),%rdx + call _asm_AES_encrypt + pxor 32(%rbp),%xmm15 + + + + + + movdqu %xmm15,0(%r13) + leaq 16(%r13),%r13 + + movdqa 16(%rsp),%xmm6 + +L$xts_enc_done: + andl $15,%ebx + jz L$xts_enc_ret + movq %r13,%rdx + +L$xts_enc_steal: + movzbl (%r12),%eax + movzbl -16(%rdx),%ecx + leaq 1(%r12),%r12 + movb %al,-16(%rdx) + movb %cl,0(%rdx) + leaq 1(%rdx),%rdx + subl $1,%ebx + jnz L$xts_enc_steal + + movdqu -16(%r13),%xmm15 + leaq 32(%rbp),%rdi + pxor %xmm6,%xmm15 + leaq 32(%rbp),%rsi + movdqa %xmm15,32(%rbp) + leaq (%r15),%rdx + call _asm_AES_encrypt + pxor 32(%rbp),%xmm6 + movdqu %xmm6,-16(%r13) + +L$xts_enc_ret: + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +L$xts_enc_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja L$xts_enc_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +L$xts_enc_epilogue: + retq + + +.globl _bsaes_xts_decrypt + +.p2align 4 +_bsaes_xts_decrypt: + movq %rsp,%rax +L$xts_dec_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + + leaq (%r9),%rdi + leaq 32(%rbp),%rsi + leaq (%r8),%rdx + call _asm_AES_encrypt + + movl 240(%r15),%eax + movq %r14,%rbx + + movl %eax,%edx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %edx,%r10d + call _bsaes_key_convert + pxor (%rsp),%xmm7 + movdqa %xmm6,(%rax) + movdqa %xmm7,(%rsp) + + xorl %eax,%eax + andq $-16,%r14 + testl $15,%ebx + setnz %al + shlq $4,%rax + subq %rax,%r14 + + subq $128,%rsp + movdqa 32(%rbp),%xmm6 + + pxor %xmm14,%xmm14 + movdqa L$xts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + + subq $128,%r14 + jc L$xts_dec_short + jmp L$xts_dec_loop + +.p2align 4 +L$xts_dec_loop: + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqu 112(%r12),%xmm14 + leaq 128(%r12),%r12 + movdqa %xmm6,112(%rsp) + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + pxor %xmm14,%xmm6 + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm1,64(%r13) + pxor 96(%rsp),%xmm2 + movdqu %xmm6,80(%r13) + pxor 112(%rsp),%xmm4 + movdqu %xmm2,96(%r13) + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + pxor %xmm14,%xmm14 + movdqa L$xts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + + subq $128,%r14 + jnc L$xts_dec_loop + +L$xts_dec_short: + addq $128,%r14 + jz L$xts_dec_done + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + cmpq $16,%r14 + je L$xts_dec_1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + cmpq $32,%r14 + je L$xts_dec_2 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + cmpq $48,%r14 + je L$xts_dec_3 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + cmpq $64,%r14 + je L$xts_dec_4 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + cmpq $80,%r14 + je L$xts_dec_5 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + cmpq $96,%r14 + je L$xts_dec_6 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqa %xmm6,112(%rsp) + leaq 112(%r12),%r12 + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm1,64(%r13) + pxor 96(%rsp),%xmm2 + movdqu %xmm6,80(%r13) + movdqu %xmm2,96(%r13) + leaq 112(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + jmp L$xts_dec_done +.p2align 4 +L$xts_dec_6: + pxor %xmm11,%xmm3 + leaq 96(%r12),%r12 + pxor %xmm12,%xmm4 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + leaq 96(%r13),%r13 + + movdqa 96(%rsp),%xmm6 + jmp L$xts_dec_done +.p2align 4 +L$xts_dec_5: + pxor %xmm10,%xmm2 + leaq 80(%r12),%r12 + pxor %xmm11,%xmm3 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + leaq 80(%r13),%r13 + + movdqa 80(%rsp),%xmm6 + jmp L$xts_dec_done +.p2align 4 +L$xts_dec_4: + pxor %xmm9,%xmm1 + leaq 64(%r12),%r12 + pxor %xmm10,%xmm2 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + leaq 64(%r13),%r13 + + movdqa 64(%rsp),%xmm6 + jmp L$xts_dec_done +.p2align 4 +L$xts_dec_3: + pxor %xmm8,%xmm0 + leaq 48(%r12),%r12 + pxor %xmm9,%xmm1 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + leaq 48(%r13),%r13 + + movdqa 48(%rsp),%xmm6 + jmp L$xts_dec_done +.p2align 4 +L$xts_dec_2: + pxor %xmm7,%xmm15 + leaq 32(%r12),%r12 + pxor %xmm8,%xmm0 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + leaq 32(%r13),%r13 + + movdqa 32(%rsp),%xmm6 + jmp L$xts_dec_done +.p2align 4 +L$xts_dec_1: + pxor %xmm15,%xmm7 + leaq 16(%r12),%r12 + movdqa %xmm7,32(%rbp) + leaq 32(%rbp),%rdi + leaq 32(%rbp),%rsi + leaq (%r15),%rdx + call _asm_AES_decrypt + pxor 32(%rbp),%xmm15 + + + + + + movdqu %xmm15,0(%r13) + leaq 16(%r13),%r13 + + movdqa 16(%rsp),%xmm6 + +L$xts_dec_done: + andl $15,%ebx + jz L$xts_dec_ret + + pxor %xmm14,%xmm14 + movdqa L$xts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + pshufd $19,%xmm14,%xmm13 + movdqa %xmm6,%xmm5 + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + movdqu (%r12),%xmm15 + pxor %xmm13,%xmm6 + + leaq 32(%rbp),%rdi + pxor %xmm6,%xmm15 + leaq 32(%rbp),%rsi + movdqa %xmm15,32(%rbp) + leaq (%r15),%rdx + call _asm_AES_decrypt + pxor 32(%rbp),%xmm6 + movq %r13,%rdx + movdqu %xmm6,(%r13) + +L$xts_dec_steal: + movzbl 16(%r12),%eax + movzbl (%rdx),%ecx + leaq 1(%r12),%r12 + movb %al,(%rdx) + movb %cl,16(%rdx) + leaq 1(%rdx),%rdx + subl $1,%ebx + jnz L$xts_dec_steal + + movdqu (%r13),%xmm15 + leaq 32(%rbp),%rdi + pxor %xmm5,%xmm15 + leaq 32(%rbp),%rsi + movdqa %xmm15,32(%rbp) + leaq (%r15),%rdx + call _asm_AES_decrypt + pxor 32(%rbp),%xmm5 + movdqu %xmm5,(%r13) + +L$xts_dec_ret: + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +L$xts_dec_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja L$xts_dec_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +L$xts_dec_epilogue: + retq + + +.p2align 6 +_bsaes_const: +L$M0ISR: +.quad 0x0a0e0206070b0f03, 0x0004080c0d010509 +L$ISRM0: +.quad 0x01040b0e0205080f, 0x0306090c00070a0d +L$ISR: +.quad 0x0504070602010003, 0x0f0e0d0c080b0a09 +L$BS0: +.quad 0x5555555555555555, 0x5555555555555555 +L$BS1: +.quad 0x3333333333333333, 0x3333333333333333 +L$BS2: +.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +L$SR: +.quad 0x0504070600030201, 0x0f0e0d0c0a09080b +L$SRM0: +.quad 0x0304090e00050a0f, 0x01060b0c0207080d +L$M0SR: +.quad 0x0a0e02060f03070b, 0x0004080c05090d01 +L$SWPUP: +.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908 +L$SWPUPM0SR: +.quad 0x0a0d02060c03070b, 0x0004080f05090e01 +L$ADD1: +.quad 0x0000000000000000, 0x0000000100000000 +L$ADD2: +.quad 0x0000000000000000, 0x0000000200000000 +L$ADD3: +.quad 0x0000000000000000, 0x0000000300000000 +L$ADD4: +.quad 0x0000000000000000, 0x0000000400000000 +L$ADD5: +.quad 0x0000000000000000, 0x0000000500000000 +L$ADD6: +.quad 0x0000000000000000, 0x0000000600000000 +L$ADD7: +.quad 0x0000000000000000, 0x0000000700000000 +L$ADD8: +.quad 0x0000000000000000, 0x0000000800000000 +L$xts_magic: +.long 0x87,0,1,0 +L$masks: +.quad 0x0101010101010101, 0x0101010101010101 +.quad 0x0202020202020202, 0x0202020202020202 +.quad 0x0404040404040404, 0x0404040404040404 +.quad 0x0808080808080808, 0x0808080808080808 +L$M0: +.quad 0x02060a0e03070b0f, 0x0004080c0105090d +L$63: +.quad 0x6363636363636363, 0x6363636363636363 +.byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44,32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32,65,110,100,121,32,80,111,108,121,97,107,111,118,0 +.p2align 6 + diff --git a/ext/libressl/crypto/aes/bsaes-masm-x86_64.S b/ext/libressl/crypto/aes/bsaes-masm-x86_64.S new file mode 100644 index 0000000..6b1a97d --- /dev/null +++ b/ext/libressl/crypto/aes/bsaes-masm-x86_64.S @@ -0,0 +1,2803 @@ +; 1 "crypto/aes/bsaes-masm-x86_64.S.tmp" +; 1 "" 1 +; 1 "" 3 +; 340 "" 3 +; 1 "" 1 +; 1 "" 2 +; 1 "crypto/aes/bsaes-masm-x86_64.S.tmp" 2 +OPTION DOTNAME + +; 1 "./crypto/x86_arch.h" 1 + + +; 16 "./crypto/x86_arch.h" + + + + + + + + + +; 40 "./crypto/x86_arch.h" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +; 3 "crypto/aes/bsaes-masm-x86_64.S.tmp" 2 +.text$ SEGMENT ALIGN(64) 'CODE' + +EXTERN asm_AES_encrypt:NEAR +EXTERN asm_AES_decrypt:NEAR + + +ALIGN 64 +_bsaes_encrypt8 PROC PRIVATE + lea r11,QWORD PTR[$L$BS0] + + movdqa xmm8,XMMWORD PTR[rax] + lea rax,QWORD PTR[16+rax] + movdqa xmm7,XMMWORD PTR[80+r11] + pxor xmm15,xmm8 + pxor xmm0,xmm8 +DB 102,68,15,56,0,255 + pxor xmm1,xmm8 +DB 102,15,56,0,199 + pxor xmm2,xmm8 +DB 102,15,56,0,207 + pxor xmm3,xmm8 +DB 102,15,56,0,215 + pxor xmm4,xmm8 +DB 102,15,56,0,223 + pxor xmm5,xmm8 +DB 102,15,56,0,231 + pxor xmm6,xmm8 +DB 102,15,56,0,239 +DB 102,15,56,0,247 +_bsaes_encrypt8_bitslice:: + movdqa xmm7,XMMWORD PTR[r11] + movdqa xmm8,XMMWORD PTR[16+r11] + movdqa xmm9,xmm5 + psrlq xmm5,1 + movdqa xmm10,xmm3 + psrlq xmm3,1 + pxor xmm5,xmm6 + pxor xmm3,xmm4 + pand xmm5,xmm7 + pand xmm3,xmm7 + pxor xmm6,xmm5 + psllq xmm5,1 + pxor xmm4,xmm3 + psllq xmm3,1 + pxor xmm5,xmm9 + pxor xmm3,xmm10 + movdqa xmm9,xmm1 + psrlq xmm1,1 + movdqa xmm10,xmm15 + psrlq xmm15,1 + pxor xmm1,xmm2 + pxor xmm15,xmm0 + pand xmm1,xmm7 + pand xmm15,xmm7 + pxor xmm2,xmm1 + psllq xmm1,1 + pxor xmm0,xmm15 + psllq xmm15,1 + pxor xmm1,xmm9 + pxor xmm15,xmm10 + movdqa xmm7,XMMWORD PTR[32+r11] + movdqa xmm9,xmm4 + psrlq xmm4,2 + movdqa xmm10,xmm3 + psrlq xmm3,2 + pxor xmm4,xmm6 + pxor xmm3,xmm5 + pand xmm4,xmm8 + pand xmm3,xmm8 + pxor xmm6,xmm4 + psllq xmm4,2 + pxor xmm5,xmm3 + psllq xmm3,2 + pxor xmm4,xmm9 + pxor xmm3,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,2 + movdqa xmm10,xmm15 + psrlq xmm15,2 + pxor xmm0,xmm2 + pxor xmm15,xmm1 + pand xmm0,xmm8 + pand xmm15,xmm8 + pxor xmm2,xmm0 + psllq xmm0,2 + pxor xmm1,xmm15 + psllq xmm15,2 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + movdqa xmm9,xmm2 + psrlq xmm2,4 + movdqa xmm10,xmm1 + psrlq xmm1,4 + pxor xmm2,xmm6 + pxor xmm1,xmm5 + pand xmm2,xmm7 + pand xmm1,xmm7 + pxor xmm6,xmm2 + psllq xmm2,4 + pxor xmm5,xmm1 + psllq xmm1,4 + pxor xmm2,xmm9 + pxor xmm1,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,4 + movdqa xmm10,xmm15 + psrlq xmm15,4 + pxor xmm0,xmm4 + pxor xmm15,xmm3 + pand xmm0,xmm7 + pand xmm15,xmm7 + pxor xmm4,xmm0 + psllq xmm0,4 + pxor xmm3,xmm15 + psllq xmm15,4 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + dec r10d + jmp $L$enc_sbox +ALIGN 16 +$L$enc_loop:: + pxor xmm15,XMMWORD PTR[rax] + pxor xmm0,XMMWORD PTR[16+rax] +DB 102,68,15,56,0,255 + pxor xmm1,XMMWORD PTR[32+rax] +DB 102,15,56,0,199 + pxor xmm2,XMMWORD PTR[48+rax] +DB 102,15,56,0,207 + pxor xmm3,XMMWORD PTR[64+rax] +DB 102,15,56,0,215 + pxor xmm4,XMMWORD PTR[80+rax] +DB 102,15,56,0,223 + pxor xmm5,XMMWORD PTR[96+rax] +DB 102,15,56,0,231 + pxor xmm6,XMMWORD PTR[112+rax] +DB 102,15,56,0,239 + lea rax,QWORD PTR[128+rax] +DB 102,15,56,0,247 +$L$enc_sbox:: + pxor xmm4,xmm5 + pxor xmm1,xmm0 + pxor xmm2,xmm15 + pxor xmm5,xmm1 + pxor xmm4,xmm15 + + pxor xmm5,xmm2 + pxor xmm2,xmm6 + pxor xmm6,xmm4 + pxor xmm2,xmm3 + pxor xmm3,xmm4 + pxor xmm2,xmm0 + + pxor xmm1,xmm6 + pxor xmm0,xmm4 + movdqa xmm10,xmm6 + movdqa xmm9,xmm0 + movdqa xmm8,xmm4 + movdqa xmm12,xmm1 + movdqa xmm11,xmm5 + + pxor xmm10,xmm3 + pxor xmm9,xmm1 + pxor xmm8,xmm2 + movdqa xmm13,xmm10 + pxor xmm12,xmm3 + movdqa xmm7,xmm9 + pxor xmm11,xmm15 + movdqa xmm14,xmm10 + + por xmm9,xmm8 + por xmm10,xmm11 + pxor xmm14,xmm7 + pand xmm13,xmm11 + pxor xmm11,xmm8 + pand xmm7,xmm8 + pand xmm14,xmm11 + movdqa xmm11,xmm2 + pxor xmm11,xmm15 + pand xmm12,xmm11 + pxor xmm10,xmm12 + pxor xmm9,xmm12 + movdqa xmm12,xmm6 + movdqa xmm11,xmm4 + pxor xmm12,xmm0 + pxor xmm11,xmm5 + movdqa xmm8,xmm12 + pand xmm12,xmm11 + por xmm8,xmm11 + pxor xmm7,xmm12 + pxor xmm10,xmm14 + pxor xmm9,xmm13 + pxor xmm8,xmm14 + movdqa xmm11,xmm1 + pxor xmm7,xmm13 + movdqa xmm12,xmm3 + pxor xmm8,xmm13 + movdqa xmm13,xmm0 + pand xmm11,xmm2 + movdqa xmm14,xmm6 + pand xmm12,xmm15 + pand xmm13,xmm4 + por xmm14,xmm5 + pxor xmm10,xmm11 + pxor xmm9,xmm12 + pxor xmm8,xmm13 + pxor xmm7,xmm14 + + + + + + movdqa xmm11,xmm10 + pand xmm10,xmm8 + pxor xmm11,xmm9 + + movdqa xmm13,xmm7 + movdqa xmm14,xmm11 + pxor xmm13,xmm10 + pand xmm14,xmm13 + + movdqa xmm12,xmm8 + pxor xmm14,xmm9 + pxor xmm12,xmm7 + + pxor xmm10,xmm9 + + pand xmm12,xmm10 + + movdqa xmm9,xmm13 + pxor xmm12,xmm7 + + pxor xmm9,xmm12 + pxor xmm8,xmm12 + + pand xmm9,xmm7 + + pxor xmm13,xmm9 + pxor xmm8,xmm9 + + pand xmm13,xmm14 + + pxor xmm13,xmm11 + movdqa xmm11,xmm5 + movdqa xmm7,xmm4 + movdqa xmm9,xmm14 + pxor xmm9,xmm13 + pand xmm9,xmm5 + pxor xmm5,xmm4 + pand xmm4,xmm14 + pand xmm5,xmm13 + pxor xmm5,xmm4 + pxor xmm4,xmm9 + pxor xmm11,xmm15 + pxor xmm7,xmm2 + pxor xmm14,xmm12 + pxor xmm13,xmm8 + movdqa xmm10,xmm14 + movdqa xmm9,xmm12 + pxor xmm10,xmm13 + pxor xmm9,xmm8 + pand xmm10,xmm11 + pand xmm9,xmm15 + pxor xmm11,xmm7 + pxor xmm15,xmm2 + pand xmm7,xmm14 + pand xmm2,xmm12 + pand xmm11,xmm13 + pand xmm15,xmm8 + pxor xmm7,xmm11 + pxor xmm15,xmm2 + pxor xmm11,xmm10 + pxor xmm2,xmm9 + pxor xmm5,xmm11 + pxor xmm15,xmm11 + pxor xmm4,xmm7 + pxor xmm2,xmm7 + + movdqa xmm11,xmm6 + movdqa xmm7,xmm0 + pxor xmm11,xmm3 + pxor xmm7,xmm1 + movdqa xmm10,xmm14 + movdqa xmm9,xmm12 + pxor xmm10,xmm13 + pxor xmm9,xmm8 + pand xmm10,xmm11 + pand xmm9,xmm3 + pxor xmm11,xmm7 + pxor xmm3,xmm1 + pand xmm7,xmm14 + pand xmm1,xmm12 + pand xmm11,xmm13 + pand xmm3,xmm8 + pxor xmm7,xmm11 + pxor xmm3,xmm1 + pxor xmm11,xmm10 + pxor xmm1,xmm9 + pxor xmm14,xmm12 + pxor xmm13,xmm8 + movdqa xmm10,xmm14 + pxor xmm10,xmm13 + pand xmm10,xmm6 + pxor xmm6,xmm0 + pand xmm0,xmm14 + pand xmm6,xmm13 + pxor xmm6,xmm0 + pxor xmm0,xmm10 + pxor xmm6,xmm11 + pxor xmm3,xmm11 + pxor xmm0,xmm7 + pxor xmm1,xmm7 + pxor xmm6,xmm15 + pxor xmm0,xmm5 + pxor xmm3,xmm6 + pxor xmm5,xmm15 + pxor xmm15,xmm0 + + pxor xmm0,xmm4 + pxor xmm4,xmm1 + pxor xmm1,xmm2 + pxor xmm2,xmm4 + pxor xmm3,xmm4 + + pxor xmm5,xmm2 + dec r10d + jl $L$enc_done + pshufd xmm7,xmm15,093h + pshufd xmm8,xmm0,093h + pxor xmm15,xmm7 + pshufd xmm9,xmm3,093h + pxor xmm0,xmm8 + pshufd xmm10,xmm5,093h + pxor xmm3,xmm9 + pshufd xmm11,xmm2,093h + pxor xmm5,xmm10 + pshufd xmm12,xmm6,093h + pxor xmm2,xmm11 + pshufd xmm13,xmm1,093h + pxor xmm6,xmm12 + pshufd xmm14,xmm4,093h + pxor xmm1,xmm13 + pxor xmm4,xmm14 + + pxor xmm8,xmm15 + pxor xmm7,xmm4 + pxor xmm8,xmm4 + pshufd xmm15,xmm15,04Eh + pxor xmm9,xmm0 + pshufd xmm0,xmm0,04Eh + pxor xmm12,xmm2 + pxor xmm15,xmm7 + pxor xmm13,xmm6 + pxor xmm0,xmm8 + pxor xmm11,xmm5 + pshufd xmm7,xmm2,04Eh + pxor xmm14,xmm1 + pshufd xmm8,xmm6,04Eh + pxor xmm10,xmm3 + pshufd xmm2,xmm5,04Eh + pxor xmm10,xmm4 + pshufd xmm6,xmm4,04Eh + pxor xmm11,xmm4 + pshufd xmm5,xmm1,04Eh + pxor xmm7,xmm11 + pshufd xmm1,xmm3,04Eh + pxor xmm8,xmm12 + pxor xmm2,xmm10 + pxor xmm6,xmm14 + pxor xmm5,xmm13 + movdqa xmm3,xmm7 + pxor xmm1,xmm9 + movdqa xmm4,xmm8 + movdqa xmm7,XMMWORD PTR[48+r11] + jnz $L$enc_loop + movdqa xmm7,XMMWORD PTR[64+r11] + jmp $L$enc_loop +ALIGN 16 +$L$enc_done:: + movdqa xmm7,XMMWORD PTR[r11] + movdqa xmm8,XMMWORD PTR[16+r11] + movdqa xmm9,xmm1 + psrlq xmm1,1 + movdqa xmm10,xmm2 + psrlq xmm2,1 + pxor xmm1,xmm4 + pxor xmm2,xmm6 + pand xmm1,xmm7 + pand xmm2,xmm7 + pxor xmm4,xmm1 + psllq xmm1,1 + pxor xmm6,xmm2 + psllq xmm2,1 + pxor xmm1,xmm9 + pxor xmm2,xmm10 + movdqa xmm9,xmm3 + psrlq xmm3,1 + movdqa xmm10,xmm15 + psrlq xmm15,1 + pxor xmm3,xmm5 + pxor xmm15,xmm0 + pand xmm3,xmm7 + pand xmm15,xmm7 + pxor xmm5,xmm3 + psllq xmm3,1 + pxor xmm0,xmm15 + psllq xmm15,1 + pxor xmm3,xmm9 + pxor xmm15,xmm10 + movdqa xmm7,XMMWORD PTR[32+r11] + movdqa xmm9,xmm6 + psrlq xmm6,2 + movdqa xmm10,xmm2 + psrlq xmm2,2 + pxor xmm6,xmm4 + pxor xmm2,xmm1 + pand xmm6,xmm8 + pand xmm2,xmm8 + pxor xmm4,xmm6 + psllq xmm6,2 + pxor xmm1,xmm2 + psllq xmm2,2 + pxor xmm6,xmm9 + pxor xmm2,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,2 + movdqa xmm10,xmm15 + psrlq xmm15,2 + pxor xmm0,xmm5 + pxor xmm15,xmm3 + pand xmm0,xmm8 + pand xmm15,xmm8 + pxor xmm5,xmm0 + psllq xmm0,2 + pxor xmm3,xmm15 + psllq xmm15,2 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + movdqa xmm9,xmm5 + psrlq xmm5,4 + movdqa xmm10,xmm3 + psrlq xmm3,4 + pxor xmm5,xmm4 + pxor xmm3,xmm1 + pand xmm5,xmm7 + pand xmm3,xmm7 + pxor xmm4,xmm5 + psllq xmm5,4 + pxor xmm1,xmm3 + psllq xmm3,4 + pxor xmm5,xmm9 + pxor xmm3,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,4 + movdqa xmm10,xmm15 + psrlq xmm15,4 + pxor xmm0,xmm6 + pxor xmm15,xmm2 + pand xmm0,xmm7 + pand xmm15,xmm7 + pxor xmm6,xmm0 + psllq xmm0,4 + pxor xmm2,xmm15 + psllq xmm15,4 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + movdqa xmm7,XMMWORD PTR[rax] + pxor xmm3,xmm7 + pxor xmm5,xmm7 + pxor xmm2,xmm7 + pxor xmm6,xmm7 + pxor xmm1,xmm7 + pxor xmm4,xmm7 + pxor xmm15,xmm7 + pxor xmm0,xmm7 + DB 0F3h,0C3h ;repret +_bsaes_encrypt8 ENDP + + +ALIGN 64 +_bsaes_decrypt8 PROC PRIVATE + lea r11,QWORD PTR[$L$BS0] + + movdqa xmm8,XMMWORD PTR[rax] + lea rax,QWORD PTR[16+rax] + movdqa xmm7,XMMWORD PTR[((-48))+r11] + pxor xmm15,xmm8 + pxor xmm0,xmm8 +DB 102,68,15,56,0,255 + pxor xmm1,xmm8 +DB 102,15,56,0,199 + pxor xmm2,xmm8 +DB 102,15,56,0,207 + pxor xmm3,xmm8 +DB 102,15,56,0,215 + pxor xmm4,xmm8 +DB 102,15,56,0,223 + pxor xmm5,xmm8 +DB 102,15,56,0,231 + pxor xmm6,xmm8 +DB 102,15,56,0,239 +DB 102,15,56,0,247 + movdqa xmm7,XMMWORD PTR[r11] + movdqa xmm8,XMMWORD PTR[16+r11] + movdqa xmm9,xmm5 + psrlq xmm5,1 + movdqa xmm10,xmm3 + psrlq xmm3,1 + pxor xmm5,xmm6 + pxor xmm3,xmm4 + pand xmm5,xmm7 + pand xmm3,xmm7 + pxor xmm6,xmm5 + psllq xmm5,1 + pxor xmm4,xmm3 + psllq xmm3,1 + pxor xmm5,xmm9 + pxor xmm3,xmm10 + movdqa xmm9,xmm1 + psrlq xmm1,1 + movdqa xmm10,xmm15 + psrlq xmm15,1 + pxor xmm1,xmm2 + pxor xmm15,xmm0 + pand xmm1,xmm7 + pand xmm15,xmm7 + pxor xmm2,xmm1 + psllq xmm1,1 + pxor xmm0,xmm15 + psllq xmm15,1 + pxor xmm1,xmm9 + pxor xmm15,xmm10 + movdqa xmm7,XMMWORD PTR[32+r11] + movdqa xmm9,xmm4 + psrlq xmm4,2 + movdqa xmm10,xmm3 + psrlq xmm3,2 + pxor xmm4,xmm6 + pxor xmm3,xmm5 + pand xmm4,xmm8 + pand xmm3,xmm8 + pxor xmm6,xmm4 + psllq xmm4,2 + pxor xmm5,xmm3 + psllq xmm3,2 + pxor xmm4,xmm9 + pxor xmm3,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,2 + movdqa xmm10,xmm15 + psrlq xmm15,2 + pxor xmm0,xmm2 + pxor xmm15,xmm1 + pand xmm0,xmm8 + pand xmm15,xmm8 + pxor xmm2,xmm0 + psllq xmm0,2 + pxor xmm1,xmm15 + psllq xmm15,2 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + movdqa xmm9,xmm2 + psrlq xmm2,4 + movdqa xmm10,xmm1 + psrlq xmm1,4 + pxor xmm2,xmm6 + pxor xmm1,xmm5 + pand xmm2,xmm7 + pand xmm1,xmm7 + pxor xmm6,xmm2 + psllq xmm2,4 + pxor xmm5,xmm1 + psllq xmm1,4 + pxor xmm2,xmm9 + pxor xmm1,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,4 + movdqa xmm10,xmm15 + psrlq xmm15,4 + pxor xmm0,xmm4 + pxor xmm15,xmm3 + pand xmm0,xmm7 + pand xmm15,xmm7 + pxor xmm4,xmm0 + psllq xmm0,4 + pxor xmm3,xmm15 + psllq xmm15,4 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + dec r10d + jmp $L$dec_sbox +ALIGN 16 +$L$dec_loop:: + pxor xmm15,XMMWORD PTR[rax] + pxor xmm0,XMMWORD PTR[16+rax] +DB 102,68,15,56,0,255 + pxor xmm1,XMMWORD PTR[32+rax] +DB 102,15,56,0,199 + pxor xmm2,XMMWORD PTR[48+rax] +DB 102,15,56,0,207 + pxor xmm3,XMMWORD PTR[64+rax] +DB 102,15,56,0,215 + pxor xmm4,XMMWORD PTR[80+rax] +DB 102,15,56,0,223 + pxor xmm5,XMMWORD PTR[96+rax] +DB 102,15,56,0,231 + pxor xmm6,XMMWORD PTR[112+rax] +DB 102,15,56,0,239 + lea rax,QWORD PTR[128+rax] +DB 102,15,56,0,247 +$L$dec_sbox:: + pxor xmm2,xmm3 + + pxor xmm3,xmm6 + pxor xmm1,xmm6 + pxor xmm5,xmm3 + pxor xmm6,xmm5 + pxor xmm0,xmm6 + + pxor xmm15,xmm0 + pxor xmm1,xmm4 + pxor xmm2,xmm15 + pxor xmm4,xmm15 + pxor xmm0,xmm2 + movdqa xmm10,xmm2 + movdqa xmm9,xmm6 + movdqa xmm8,xmm0 + movdqa xmm12,xmm3 + movdqa xmm11,xmm4 + + pxor xmm10,xmm15 + pxor xmm9,xmm3 + pxor xmm8,xmm5 + movdqa xmm13,xmm10 + pxor xmm12,xmm15 + movdqa xmm7,xmm9 + pxor xmm11,xmm1 + movdqa xmm14,xmm10 + + por xmm9,xmm8 + por xmm10,xmm11 + pxor xmm14,xmm7 + pand xmm13,xmm11 + pxor xmm11,xmm8 + pand xmm7,xmm8 + pand xmm14,xmm11 + movdqa xmm11,xmm5 + pxor xmm11,xmm1 + pand xmm12,xmm11 + pxor xmm10,xmm12 + pxor xmm9,xmm12 + movdqa xmm12,xmm2 + movdqa xmm11,xmm0 + pxor xmm12,xmm6 + pxor xmm11,xmm4 + movdqa xmm8,xmm12 + pand xmm12,xmm11 + por xmm8,xmm11 + pxor xmm7,xmm12 + pxor xmm10,xmm14 + pxor xmm9,xmm13 + pxor xmm8,xmm14 + movdqa xmm11,xmm3 + pxor xmm7,xmm13 + movdqa xmm12,xmm15 + pxor xmm8,xmm13 + movdqa xmm13,xmm6 + pand xmm11,xmm5 + movdqa xmm14,xmm2 + pand xmm12,xmm1 + pand xmm13,xmm0 + por xmm14,xmm4 + pxor xmm10,xmm11 + pxor xmm9,xmm12 + pxor xmm8,xmm13 + pxor xmm7,xmm14 + + + + + + movdqa xmm11,xmm10 + pand xmm10,xmm8 + pxor xmm11,xmm9 + + movdqa xmm13,xmm7 + movdqa xmm14,xmm11 + pxor xmm13,xmm10 + pand xmm14,xmm13 + + movdqa xmm12,xmm8 + pxor xmm14,xmm9 + pxor xmm12,xmm7 + + pxor xmm10,xmm9 + + pand xmm12,xmm10 + + movdqa xmm9,xmm13 + pxor xmm12,xmm7 + + pxor xmm9,xmm12 + pxor xmm8,xmm12 + + pand xmm9,xmm7 + + pxor xmm13,xmm9 + pxor xmm8,xmm9 + + pand xmm13,xmm14 + + pxor xmm13,xmm11 + movdqa xmm11,xmm4 + movdqa xmm7,xmm0 + movdqa xmm9,xmm14 + pxor xmm9,xmm13 + pand xmm9,xmm4 + pxor xmm4,xmm0 + pand xmm0,xmm14 + pand xmm4,xmm13 + pxor xmm4,xmm0 + pxor xmm0,xmm9 + pxor xmm11,xmm1 + pxor xmm7,xmm5 + pxor xmm14,xmm12 + pxor xmm13,xmm8 + movdqa xmm10,xmm14 + movdqa xmm9,xmm12 + pxor xmm10,xmm13 + pxor xmm9,xmm8 + pand xmm10,xmm11 + pand xmm9,xmm1 + pxor xmm11,xmm7 + pxor xmm1,xmm5 + pand xmm7,xmm14 + pand xmm5,xmm12 + pand xmm11,xmm13 + pand xmm1,xmm8 + pxor xmm7,xmm11 + pxor xmm1,xmm5 + pxor xmm11,xmm10 + pxor xmm5,xmm9 + pxor xmm4,xmm11 + pxor xmm1,xmm11 + pxor xmm0,xmm7 + pxor xmm5,xmm7 + + movdqa xmm11,xmm2 + movdqa xmm7,xmm6 + pxor xmm11,xmm15 + pxor xmm7,xmm3 + movdqa xmm10,xmm14 + movdqa xmm9,xmm12 + pxor xmm10,xmm13 + pxor xmm9,xmm8 + pand xmm10,xmm11 + pand xmm9,xmm15 + pxor xmm11,xmm7 + pxor xmm15,xmm3 + pand xmm7,xmm14 + pand xmm3,xmm12 + pand xmm11,xmm13 + pand xmm15,xmm8 + pxor xmm7,xmm11 + pxor xmm15,xmm3 + pxor xmm11,xmm10 + pxor xmm3,xmm9 + pxor xmm14,xmm12 + pxor xmm13,xmm8 + movdqa xmm10,xmm14 + pxor xmm10,xmm13 + pand xmm10,xmm2 + pxor xmm2,xmm6 + pand xmm6,xmm14 + pand xmm2,xmm13 + pxor xmm2,xmm6 + pxor xmm6,xmm10 + pxor xmm2,xmm11 + pxor xmm15,xmm11 + pxor xmm6,xmm7 + pxor xmm3,xmm7 + pxor xmm0,xmm6 + pxor xmm5,xmm4 + + pxor xmm3,xmm0 + pxor xmm1,xmm6 + pxor xmm4,xmm6 + pxor xmm3,xmm1 + pxor xmm6,xmm15 + pxor xmm3,xmm4 + pxor xmm2,xmm5 + pxor xmm5,xmm0 + pxor xmm2,xmm3 + + pxor xmm3,xmm15 + pxor xmm6,xmm2 + dec r10d + jl $L$dec_done + + pshufd xmm7,xmm15,04Eh + pshufd xmm13,xmm2,04Eh + pxor xmm7,xmm15 + pshufd xmm14,xmm4,04Eh + pxor xmm13,xmm2 + pshufd xmm8,xmm0,04Eh + pxor xmm14,xmm4 + pshufd xmm9,xmm5,04Eh + pxor xmm8,xmm0 + pshufd xmm10,xmm3,04Eh + pxor xmm9,xmm5 + pxor xmm15,xmm13 + pxor xmm0,xmm13 + pshufd xmm11,xmm1,04Eh + pxor xmm10,xmm3 + pxor xmm5,xmm7 + pxor xmm3,xmm8 + pshufd xmm12,xmm6,04Eh + pxor xmm11,xmm1 + pxor xmm0,xmm14 + pxor xmm1,xmm9 + pxor xmm12,xmm6 + + pxor xmm5,xmm14 + pxor xmm3,xmm13 + pxor xmm1,xmm13 + pxor xmm6,xmm10 + pxor xmm2,xmm11 + pxor xmm1,xmm14 + pxor xmm6,xmm14 + pxor xmm4,xmm12 + pshufd xmm7,xmm15,093h + pshufd xmm8,xmm0,093h + pxor xmm15,xmm7 + pshufd xmm9,xmm5,093h + pxor xmm0,xmm8 + pshufd xmm10,xmm3,093h + pxor xmm5,xmm9 + pshufd xmm11,xmm1,093h + pxor xmm3,xmm10 + pshufd xmm12,xmm6,093h + pxor xmm1,xmm11 + pshufd xmm13,xmm2,093h + pxor xmm6,xmm12 + pshufd xmm14,xmm4,093h + pxor xmm2,xmm13 + pxor xmm4,xmm14 + + pxor xmm8,xmm15 + pxor xmm7,xmm4 + pxor xmm8,xmm4 + pshufd xmm15,xmm15,04Eh + pxor xmm9,xmm0 + pshufd xmm0,xmm0,04Eh + pxor xmm12,xmm1 + pxor xmm15,xmm7 + pxor xmm13,xmm6 + pxor xmm0,xmm8 + pxor xmm11,xmm3 + pshufd xmm7,xmm1,04Eh + pxor xmm14,xmm2 + pshufd xmm8,xmm6,04Eh + pxor xmm10,xmm5 + pshufd xmm1,xmm3,04Eh + pxor xmm10,xmm4 + pshufd xmm6,xmm4,04Eh + pxor xmm11,xmm4 + pshufd xmm3,xmm2,04Eh + pxor xmm7,xmm11 + pshufd xmm2,xmm5,04Eh + pxor xmm8,xmm12 + pxor xmm10,xmm1 + pxor xmm6,xmm14 + pxor xmm13,xmm3 + movdqa xmm3,xmm7 + pxor xmm2,xmm9 + movdqa xmm5,xmm13 + movdqa xmm4,xmm8 + movdqa xmm1,xmm2 + movdqa xmm2,xmm10 + movdqa xmm7,XMMWORD PTR[((-16))+r11] + jnz $L$dec_loop + movdqa xmm7,XMMWORD PTR[((-32))+r11] + jmp $L$dec_loop +ALIGN 16 +$L$dec_done:: + movdqa xmm7,XMMWORD PTR[r11] + movdqa xmm8,XMMWORD PTR[16+r11] + movdqa xmm9,xmm2 + psrlq xmm2,1 + movdqa xmm10,xmm1 + psrlq xmm1,1 + pxor xmm2,xmm4 + pxor xmm1,xmm6 + pand xmm2,xmm7 + pand xmm1,xmm7 + pxor xmm4,xmm2 + psllq xmm2,1 + pxor xmm6,xmm1 + psllq xmm1,1 + pxor xmm2,xmm9 + pxor xmm1,xmm10 + movdqa xmm9,xmm5 + psrlq xmm5,1 + movdqa xmm10,xmm15 + psrlq xmm15,1 + pxor xmm5,xmm3 + pxor xmm15,xmm0 + pand xmm5,xmm7 + pand xmm15,xmm7 + pxor xmm3,xmm5 + psllq xmm5,1 + pxor xmm0,xmm15 + psllq xmm15,1 + pxor xmm5,xmm9 + pxor xmm15,xmm10 + movdqa xmm7,XMMWORD PTR[32+r11] + movdqa xmm9,xmm6 + psrlq xmm6,2 + movdqa xmm10,xmm1 + psrlq xmm1,2 + pxor xmm6,xmm4 + pxor xmm1,xmm2 + pand xmm6,xmm8 + pand xmm1,xmm8 + pxor xmm4,xmm6 + psllq xmm6,2 + pxor xmm2,xmm1 + psllq xmm1,2 + pxor xmm6,xmm9 + pxor xmm1,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,2 + movdqa xmm10,xmm15 + psrlq xmm15,2 + pxor xmm0,xmm3 + pxor xmm15,xmm5 + pand xmm0,xmm8 + pand xmm15,xmm8 + pxor xmm3,xmm0 + psllq xmm0,2 + pxor xmm5,xmm15 + psllq xmm15,2 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + movdqa xmm9,xmm3 + psrlq xmm3,4 + movdqa xmm10,xmm5 + psrlq xmm5,4 + pxor xmm3,xmm4 + pxor xmm5,xmm2 + pand xmm3,xmm7 + pand xmm5,xmm7 + pxor xmm4,xmm3 + psllq xmm3,4 + pxor xmm2,xmm5 + psllq xmm5,4 + pxor xmm3,xmm9 + pxor xmm5,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,4 + movdqa xmm10,xmm15 + psrlq xmm15,4 + pxor xmm0,xmm6 + pxor xmm15,xmm1 + pand xmm0,xmm7 + pand xmm15,xmm7 + pxor xmm6,xmm0 + psllq xmm0,4 + pxor xmm1,xmm15 + psllq xmm15,4 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + movdqa xmm7,XMMWORD PTR[rax] + pxor xmm5,xmm7 + pxor xmm3,xmm7 + pxor xmm1,xmm7 + pxor xmm6,xmm7 + pxor xmm2,xmm7 + pxor xmm4,xmm7 + pxor xmm15,xmm7 + pxor xmm0,xmm7 + DB 0F3h,0C3h ;repret +_bsaes_decrypt8 ENDP + +ALIGN 16 +_bsaes_key_convert PROC PRIVATE + lea r11,QWORD PTR[$L$masks] + movdqu xmm7,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + movdqa xmm0,XMMWORD PTR[r11] + movdqa xmm1,XMMWORD PTR[16+r11] + movdqa xmm2,XMMWORD PTR[32+r11] + movdqa xmm3,XMMWORD PTR[48+r11] + movdqa xmm4,XMMWORD PTR[64+r11] + pcmpeqd xmm5,xmm5 + + movdqu xmm6,XMMWORD PTR[rcx] + movdqa XMMWORD PTR[rax],xmm7 + lea rax,QWORD PTR[16+rax] + dec r10d + jmp $L$key_loop +ALIGN 16 +$L$key_loop:: +DB 102,15,56,0,244 + + movdqa xmm8,xmm0 + movdqa xmm9,xmm1 + + pand xmm8,xmm6 + pand xmm9,xmm6 + movdqa xmm10,xmm2 + pcmpeqb xmm8,xmm0 + psllq xmm0,4 + movdqa xmm11,xmm3 + pcmpeqb xmm9,xmm1 + psllq xmm1,4 + + pand xmm10,xmm6 + pand xmm11,xmm6 + movdqa xmm12,xmm0 + pcmpeqb xmm10,xmm2 + psllq xmm2,4 + movdqa xmm13,xmm1 + pcmpeqb xmm11,xmm3 + psllq xmm3,4 + + movdqa xmm14,xmm2 + movdqa xmm15,xmm3 + pxor xmm8,xmm5 + pxor xmm9,xmm5 + + pand xmm12,xmm6 + pand xmm13,xmm6 + movdqa XMMWORD PTR[rax],xmm8 + pcmpeqb xmm12,xmm0 + psrlq xmm0,4 + movdqa XMMWORD PTR[16+rax],xmm9 + pcmpeqb xmm13,xmm1 + psrlq xmm1,4 + lea rcx,QWORD PTR[16+rcx] + + pand xmm14,xmm6 + pand xmm15,xmm6 + movdqa XMMWORD PTR[32+rax],xmm10 + pcmpeqb xmm14,xmm2 + psrlq xmm2,4 + movdqa XMMWORD PTR[48+rax],xmm11 + pcmpeqb xmm15,xmm3 + psrlq xmm3,4 + movdqu xmm6,XMMWORD PTR[rcx] + + pxor xmm13,xmm5 + pxor xmm14,xmm5 + movdqa XMMWORD PTR[64+rax],xmm12 + movdqa XMMWORD PTR[80+rax],xmm13 + movdqa XMMWORD PTR[96+rax],xmm14 + movdqa XMMWORD PTR[112+rax],xmm15 + lea rax,QWORD PTR[128+rax] + dec r10d + jnz $L$key_loop + + movdqa xmm7,XMMWORD PTR[80+r11] + + DB 0F3h,0C3h ;repret +_bsaes_key_convert ENDP +EXTERN asm_AES_cbc_encrypt:NEAR +PUBLIC bsaes_cbc_encrypt + +ALIGN 16 +bsaes_cbc_encrypt PROC PUBLIC + mov r11d,DWORD PTR[48+rsp] + cmp r11d,0 + jne asm_AES_cbc_encrypt + cmp r8,128 + jb asm_AES_cbc_encrypt + + mov rax,rsp +$L$cbc_dec_prologue:: + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-72))+rsp] + mov r10,QWORD PTR[160+rsp] + lea rsp,QWORD PTR[((-160))+rsp] + movaps XMMWORD PTR[64+rsp],xmm6 + movaps XMMWORD PTR[80+rsp],xmm7 + movaps XMMWORD PTR[96+rsp],xmm8 + movaps XMMWORD PTR[112+rsp],xmm9 + movaps XMMWORD PTR[128+rsp],xmm10 + movaps XMMWORD PTR[144+rsp],xmm11 + movaps XMMWORD PTR[160+rsp],xmm12 + movaps XMMWORD PTR[176+rsp],xmm13 + movaps XMMWORD PTR[192+rsp],xmm14 + movaps XMMWORD PTR[208+rsp],xmm15 +$L$cbc_dec_body:: + mov rbp,rsp + mov eax,DWORD PTR[240+r9] + mov r12,rcx + mov r13,rdx + mov r14,r8 + mov r15,r9 + mov rbx,r10 + shr r14,4 + + mov edx,eax + shl rax,7 + sub rax,96 + sub rsp,rax + + mov rax,rsp + mov rcx,r15 + mov r10d,edx + call _bsaes_key_convert + pxor xmm7,XMMWORD PTR[rsp] + movdqa XMMWORD PTR[rax],xmm6 + movdqa XMMWORD PTR[rsp],xmm7 + + movdqu xmm14,XMMWORD PTR[rbx] + sub r14,8 +$L$cbc_dec_loop:: + movdqu xmm15,XMMWORD PTR[r12] + movdqu xmm0,XMMWORD PTR[16+r12] + movdqu xmm1,XMMWORD PTR[32+r12] + movdqu xmm2,XMMWORD PTR[48+r12] + movdqu xmm3,XMMWORD PTR[64+r12] + movdqu xmm4,XMMWORD PTR[80+r12] + mov rax,rsp + movdqu xmm5,XMMWORD PTR[96+r12] + mov r10d,edx + movdqu xmm6,XMMWORD PTR[112+r12] + movdqa XMMWORD PTR[32+rbp],xmm14 + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD PTR[32+rbp] + movdqu xmm7,XMMWORD PTR[r12] + movdqu xmm8,XMMWORD PTR[16+r12] + pxor xmm0,xmm7 + movdqu xmm9,XMMWORD PTR[32+r12] + pxor xmm5,xmm8 + movdqu xmm10,XMMWORD PTR[48+r12] + pxor xmm3,xmm9 + movdqu xmm11,XMMWORD PTR[64+r12] + pxor xmm1,xmm10 + movdqu xmm12,XMMWORD PTR[80+r12] + pxor xmm6,xmm11 + movdqu xmm13,XMMWORD PTR[96+r12] + pxor xmm2,xmm12 + movdqu xmm14,XMMWORD PTR[112+r12] + pxor xmm4,xmm13 + movdqu XMMWORD PTR[r13],xmm15 + lea r12,QWORD PTR[128+r12] + movdqu XMMWORD PTR[16+r13],xmm0 + movdqu XMMWORD PTR[32+r13],xmm5 + movdqu XMMWORD PTR[48+r13],xmm3 + movdqu XMMWORD PTR[64+r13],xmm1 + movdqu XMMWORD PTR[80+r13],xmm6 + movdqu XMMWORD PTR[96+r13],xmm2 + movdqu XMMWORD PTR[112+r13],xmm4 + lea r13,QWORD PTR[128+r13] + sub r14,8 + jnc $L$cbc_dec_loop + + add r14,8 + jz $L$cbc_dec_done + + movdqu xmm15,XMMWORD PTR[r12] + mov rax,rsp + mov r10d,edx + cmp r14,2 + jb $L$cbc_dec_one + movdqu xmm0,XMMWORD PTR[16+r12] + je $L$cbc_dec_two + movdqu xmm1,XMMWORD PTR[32+r12] + cmp r14,4 + jb $L$cbc_dec_three + movdqu xmm2,XMMWORD PTR[48+r12] + je $L$cbc_dec_four + movdqu xmm3,XMMWORD PTR[64+r12] + cmp r14,6 + jb $L$cbc_dec_five + movdqu xmm4,XMMWORD PTR[80+r12] + je $L$cbc_dec_six + movdqu xmm5,XMMWORD PTR[96+r12] + movdqa XMMWORD PTR[32+rbp],xmm14 + call _bsaes_decrypt8 + pxor xmm15,XMMWORD PTR[32+rbp] + movdqu xmm7,XMMWORD PTR[r12] + movdqu xmm8,XMMWORD PTR[16+r12] + pxor xmm0,xmm7 + movdqu xmm9,XMMWORD PTR[32+r12] + pxor xmm5,xmm8 + movdqu xmm10,XMMWORD PTR[48+r12] + pxor xmm3,xmm9 + movdqu xmm11,XMMWORD PTR[64+r12] + pxor xmm1,xmm10 + movdqu xmm12,XMMWORD PTR[80+r12] + pxor xmm6,xmm11 + movdqu xmm14,XMMWORD PTR[96+r12] + pxor xmm2,xmm12 + movdqu XMMWORD PTR[r13],xmm15 + movdqu XMMWORD PTR[16+r13],xmm0 + movdqu XMMWORD PTR[32+r13],xmm5 + movdqu XMMWORD PTR[48+r13],xmm3 + movdqu XMMWORD PTR[64+r13],xmm1 + movdqu XMMWORD PTR[80+r13],xmm6 + movdqu XMMWORD PTR[96+r13],xmm2 + jmp $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_six:: + movdqa XMMWORD PTR[32+rbp],xmm14 + call _bsaes_decrypt8 + pxor xmm15,XMMWORD PTR[32+rbp] + movdqu xmm7,XMMWORD PTR[r12] + movdqu xmm8,XMMWORD PTR[16+r12] + pxor xmm0,xmm7 + movdqu xmm9,XMMWORD PTR[32+r12] + pxor xmm5,xmm8 + movdqu xmm10,XMMWORD PTR[48+r12] + pxor xmm3,xmm9 + movdqu xmm11,XMMWORD PTR[64+r12] + pxor xmm1,xmm10 + movdqu xmm14,XMMWORD PTR[80+r12] + pxor xmm6,xmm11 + movdqu XMMWORD PTR[r13],xmm15 + movdqu XMMWORD PTR[16+r13],xmm0 + movdqu XMMWORD PTR[32+r13],xmm5 + movdqu XMMWORD PTR[48+r13],xmm3 + movdqu XMMWORD PTR[64+r13],xmm1 + movdqu XMMWORD PTR[80+r13],xmm6 + jmp $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_five:: + movdqa XMMWORD PTR[32+rbp],xmm14 + call _bsaes_decrypt8 + pxor xmm15,XMMWORD PTR[32+rbp] + movdqu xmm7,XMMWORD PTR[r12] + movdqu xmm8,XMMWORD PTR[16+r12] + pxor xmm0,xmm7 + movdqu xmm9,XMMWORD PTR[32+r12] + pxor xmm5,xmm8 + movdqu xmm10,XMMWORD PTR[48+r12] + pxor xmm3,xmm9 + movdqu xmm14,XMMWORD PTR[64+r12] + pxor xmm1,xmm10 + movdqu XMMWORD PTR[r13],xmm15 + movdqu XMMWORD PTR[16+r13],xmm0 + movdqu XMMWORD PTR[32+r13],xmm5 + movdqu XMMWORD PTR[48+r13],xmm3 + movdqu XMMWORD PTR[64+r13],xmm1 + jmp $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_four:: + movdqa XMMWORD PTR[32+rbp],xmm14 + call _bsaes_decrypt8 + pxor xmm15,XMMWORD PTR[32+rbp] + movdqu xmm7,XMMWORD PTR[r12] + movdqu xmm8,XMMWORD PTR[16+r12] + pxor xmm0,xmm7 + movdqu xmm9,XMMWORD PTR[32+r12] + pxor xmm5,xmm8 + movdqu xmm14,XMMWORD PTR[48+r12] + pxor xmm3,xmm9 + movdqu XMMWORD PTR[r13],xmm15 + movdqu XMMWORD PTR[16+r13],xmm0 + movdqu XMMWORD PTR[32+r13],xmm5 + movdqu XMMWORD PTR[48+r13],xmm3 + jmp $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_three:: + movdqa XMMWORD PTR[32+rbp],xmm14 + call _bsaes_decrypt8 + pxor xmm15,XMMWORD PTR[32+rbp] + movdqu xmm7,XMMWORD PTR[r12] + movdqu xmm8,XMMWORD PTR[16+r12] + pxor xmm0,xmm7 + movdqu xmm14,XMMWORD PTR[32+r12] + pxor xmm5,xmm8 + movdqu XMMWORD PTR[r13],xmm15 + movdqu XMMWORD PTR[16+r13],xmm0 + movdqu XMMWORD PTR[32+r13],xmm5 + jmp $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_two:: + movdqa XMMWORD PTR[32+rbp],xmm14 + call _bsaes_decrypt8 + pxor xmm15,XMMWORD PTR[32+rbp] + movdqu xmm7,XMMWORD PTR[r12] + movdqu xmm14,XMMWORD PTR[16+r12] + pxor xmm0,xmm7 + movdqu XMMWORD PTR[r13],xmm15 + movdqu XMMWORD PTR[16+r13],xmm0 + jmp $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_one:: + lea rcx,QWORD PTR[r12] + lea rdx,QWORD PTR[32+rbp] + lea r8,QWORD PTR[r15] + call asm_AES_decrypt + pxor xmm14,XMMWORD PTR[32+rbp] + movdqu XMMWORD PTR[r13],xmm14 + movdqa xmm14,xmm15 + +$L$cbc_dec_done:: + movdqu XMMWORD PTR[rbx],xmm14 + lea rax,QWORD PTR[rsp] + pxor xmm0,xmm0 +$L$cbc_dec_bzero:: + movdqa XMMWORD PTR[rax],xmm0 + movdqa XMMWORD PTR[16+rax],xmm0 + lea rax,QWORD PTR[32+rax] + cmp rbp,rax + ja $L$cbc_dec_bzero + + lea rsp,QWORD PTR[rbp] + movaps xmm6,XMMWORD PTR[64+rbp] + movaps xmm7,XMMWORD PTR[80+rbp] + movaps xmm8,XMMWORD PTR[96+rbp] + movaps xmm9,XMMWORD PTR[112+rbp] + movaps xmm10,XMMWORD PTR[128+rbp] + movaps xmm11,XMMWORD PTR[144+rbp] + movaps xmm12,XMMWORD PTR[160+rbp] + movaps xmm13,XMMWORD PTR[176+rbp] + movaps xmm14,XMMWORD PTR[192+rbp] + movaps xmm15,XMMWORD PTR[208+rbp] + lea rsp,QWORD PTR[160+rbp] + mov r15,QWORD PTR[72+rsp] + mov r14,QWORD PTR[80+rsp] + mov r13,QWORD PTR[88+rsp] + mov r12,QWORD PTR[96+rsp] + mov rbx,QWORD PTR[104+rsp] + mov rax,QWORD PTR[112+rsp] + lea rsp,QWORD PTR[120+rsp] + mov rbp,rax +$L$cbc_dec_epilogue:: + DB 0F3h,0C3h ;repret +bsaes_cbc_encrypt ENDP + +PUBLIC bsaes_ctr32_encrypt_blocks + +ALIGN 16 +bsaes_ctr32_encrypt_blocks PROC PUBLIC + mov rax,rsp +$L$ctr_enc_prologue:: + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-72))+rsp] + mov r10,QWORD PTR[160+rsp] + lea rsp,QWORD PTR[((-160))+rsp] + movaps XMMWORD PTR[64+rsp],xmm6 + movaps XMMWORD PTR[80+rsp],xmm7 + movaps XMMWORD PTR[96+rsp],xmm8 + movaps XMMWORD PTR[112+rsp],xmm9 + movaps XMMWORD PTR[128+rsp],xmm10 + movaps XMMWORD PTR[144+rsp],xmm11 + movaps XMMWORD PTR[160+rsp],xmm12 + movaps XMMWORD PTR[176+rsp],xmm13 + movaps XMMWORD PTR[192+rsp],xmm14 + movaps XMMWORD PTR[208+rsp],xmm15 +$L$ctr_enc_body:: + mov rbp,rsp + movdqu xmm0,XMMWORD PTR[r10] + mov eax,DWORD PTR[240+r9] + mov r12,rcx + mov r13,rdx + mov r14,r8 + mov r15,r9 + movdqa XMMWORD PTR[32+rbp],xmm0 + cmp r8,8 + jb $L$ctr_enc_short + + mov ebx,eax + shl rax,7 + sub rax,96 + sub rsp,rax + + mov rax,rsp + mov rcx,r15 + mov r10d,ebx + call _bsaes_key_convert + pxor xmm7,xmm6 + movdqa XMMWORD PTR[rax],xmm7 + + movdqa xmm8,XMMWORD PTR[rsp] + lea r11,QWORD PTR[$L$ADD1] + movdqa xmm15,XMMWORD PTR[32+rbp] + movdqa xmm7,XMMWORD PTR[((-32))+r11] +DB 102,68,15,56,0,199 +DB 102,68,15,56,0,255 + movdqa XMMWORD PTR[rsp],xmm8 + jmp $L$ctr_enc_loop +ALIGN 16 +$L$ctr_enc_loop:: + movdqa XMMWORD PTR[32+rbp],xmm15 + movdqa xmm0,xmm15 + movdqa xmm1,xmm15 + paddd xmm0,XMMWORD PTR[r11] + movdqa xmm2,xmm15 + paddd xmm1,XMMWORD PTR[16+r11] + movdqa xmm3,xmm15 + paddd xmm2,XMMWORD PTR[32+r11] + movdqa xmm4,xmm15 + paddd xmm3,XMMWORD PTR[48+r11] + movdqa xmm5,xmm15 + paddd xmm4,XMMWORD PTR[64+r11] + movdqa xmm6,xmm15 + paddd xmm5,XMMWORD PTR[80+r11] + paddd xmm6,XMMWORD PTR[96+r11] + + + + movdqa xmm8,XMMWORD PTR[rsp] + lea rax,QWORD PTR[16+rsp] + movdqa xmm7,XMMWORD PTR[((-16))+r11] + pxor xmm15,xmm8 + pxor xmm0,xmm8 +DB 102,68,15,56,0,255 + pxor xmm1,xmm8 +DB 102,15,56,0,199 + pxor xmm2,xmm8 +DB 102,15,56,0,207 + pxor xmm3,xmm8 +DB 102,15,56,0,215 + pxor xmm4,xmm8 +DB 102,15,56,0,223 + pxor xmm5,xmm8 +DB 102,15,56,0,231 + pxor xmm6,xmm8 +DB 102,15,56,0,239 + lea r11,QWORD PTR[$L$BS0] +DB 102,15,56,0,247 + mov r10d,ebx + + call _bsaes_encrypt8_bitslice + + sub r14,8 + jc $L$ctr_enc_loop_done + + movdqu xmm7,XMMWORD PTR[r12] + movdqu xmm8,XMMWORD PTR[16+r12] + movdqu xmm9,XMMWORD PTR[32+r12] + movdqu xmm10,XMMWORD PTR[48+r12] + movdqu xmm11,XMMWORD PTR[64+r12] + movdqu xmm12,XMMWORD PTR[80+r12] + movdqu xmm13,XMMWORD PTR[96+r12] + movdqu xmm14,XMMWORD PTR[112+r12] + lea r12,QWORD PTR[128+r12] + pxor xmm7,xmm15 + movdqa xmm15,XMMWORD PTR[32+rbp] + pxor xmm0,xmm8 + movdqu XMMWORD PTR[r13],xmm7 + pxor xmm3,xmm9 + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm5,xmm10 + movdqu XMMWORD PTR[32+r13],xmm3 + pxor xmm2,xmm11 + movdqu XMMWORD PTR[48+r13],xmm5 + pxor xmm6,xmm12 + movdqu XMMWORD PTR[64+r13],xmm2 + pxor xmm1,xmm13 + movdqu XMMWORD PTR[80+r13],xmm6 + pxor xmm4,xmm14 + movdqu XMMWORD PTR[96+r13],xmm1 + lea r11,QWORD PTR[$L$ADD1] + movdqu XMMWORD PTR[112+r13],xmm4 + lea r13,QWORD PTR[128+r13] + paddd xmm15,XMMWORD PTR[112+r11] + jnz $L$ctr_enc_loop + + jmp $L$ctr_enc_done +ALIGN 16 +$L$ctr_enc_loop_done:: + add r14,8 + movdqu xmm7,XMMWORD PTR[r12] + pxor xmm15,xmm7 + movdqu XMMWORD PTR[r13],xmm15 + cmp r14,2 + jb $L$ctr_enc_done + movdqu xmm8,XMMWORD PTR[16+r12] + pxor xmm0,xmm8 + movdqu XMMWORD PTR[16+r13],xmm0 + je $L$ctr_enc_done + movdqu xmm9,XMMWORD PTR[32+r12] + pxor xmm3,xmm9 + movdqu XMMWORD PTR[32+r13],xmm3 + cmp r14,4 + jb $L$ctr_enc_done + movdqu xmm10,XMMWORD PTR[48+r12] + pxor xmm5,xmm10 + movdqu XMMWORD PTR[48+r13],xmm5 + je $L$ctr_enc_done + movdqu xmm11,XMMWORD PTR[64+r12] + pxor xmm2,xmm11 + movdqu XMMWORD PTR[64+r13],xmm2 + cmp r14,6 + jb $L$ctr_enc_done + movdqu xmm12,XMMWORD PTR[80+r12] + pxor xmm6,xmm12 + movdqu XMMWORD PTR[80+r13],xmm6 + je $L$ctr_enc_done + movdqu xmm13,XMMWORD PTR[96+r12] + pxor xmm1,xmm13 + movdqu XMMWORD PTR[96+r13],xmm1 + jmp $L$ctr_enc_done + +ALIGN 16 +$L$ctr_enc_short:: + lea rcx,QWORD PTR[32+rbp] + lea rdx,QWORD PTR[48+rbp] + lea r8,QWORD PTR[r15] + call asm_AES_encrypt + movdqu xmm0,XMMWORD PTR[r12] + lea r12,QWORD PTR[16+r12] + mov eax,DWORD PTR[44+rbp] + bswap eax + pxor xmm0,XMMWORD PTR[48+rbp] + inc eax + movdqu XMMWORD PTR[r13],xmm0 + bswap eax + lea r13,QWORD PTR[16+r13] + mov DWORD PTR[44+rsp],eax + dec r14 + jnz $L$ctr_enc_short + +$L$ctr_enc_done:: + lea rax,QWORD PTR[rsp] + pxor xmm0,xmm0 +$L$ctr_enc_bzero:: + movdqa XMMWORD PTR[rax],xmm0 + movdqa XMMWORD PTR[16+rax],xmm0 + lea rax,QWORD PTR[32+rax] + cmp rbp,rax + ja $L$ctr_enc_bzero + + lea rsp,QWORD PTR[rbp] + movaps xmm6,XMMWORD PTR[64+rbp] + movaps xmm7,XMMWORD PTR[80+rbp] + movaps xmm8,XMMWORD PTR[96+rbp] + movaps xmm9,XMMWORD PTR[112+rbp] + movaps xmm10,XMMWORD PTR[128+rbp] + movaps xmm11,XMMWORD PTR[144+rbp] + movaps xmm12,XMMWORD PTR[160+rbp] + movaps xmm13,XMMWORD PTR[176+rbp] + movaps xmm14,XMMWORD PTR[192+rbp] + movaps xmm15,XMMWORD PTR[208+rbp] + lea rsp,QWORD PTR[160+rbp] + mov r15,QWORD PTR[72+rsp] + mov r14,QWORD PTR[80+rsp] + mov r13,QWORD PTR[88+rsp] + mov r12,QWORD PTR[96+rsp] + mov rbx,QWORD PTR[104+rsp] + mov rax,QWORD PTR[112+rsp] + lea rsp,QWORD PTR[120+rsp] + mov rbp,rax +$L$ctr_enc_epilogue:: + DB 0F3h,0C3h ;repret +bsaes_ctr32_encrypt_blocks ENDP +PUBLIC bsaes_xts_encrypt + +ALIGN 16 +bsaes_xts_encrypt PROC PUBLIC + mov rax,rsp +$L$xts_enc_prologue:: + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-72))+rsp] + mov r10,QWORD PTR[160+rsp] + mov r11,QWORD PTR[168+rsp] + lea rsp,QWORD PTR[((-160))+rsp] + movaps XMMWORD PTR[64+rsp],xmm6 + movaps XMMWORD PTR[80+rsp],xmm7 + movaps XMMWORD PTR[96+rsp],xmm8 + movaps XMMWORD PTR[112+rsp],xmm9 + movaps XMMWORD PTR[128+rsp],xmm10 + movaps XMMWORD PTR[144+rsp],xmm11 + movaps XMMWORD PTR[160+rsp],xmm12 + movaps XMMWORD PTR[176+rsp],xmm13 + movaps XMMWORD PTR[192+rsp],xmm14 + movaps XMMWORD PTR[208+rsp],xmm15 +$L$xts_enc_body:: + mov rbp,rsp + mov r12,rcx + mov r13,rdx + mov r14,r8 + mov r15,r9 + + lea rcx,QWORD PTR[r11] + lea rdx,QWORD PTR[32+rbp] + lea r8,QWORD PTR[r10] + call asm_AES_encrypt + + mov eax,DWORD PTR[240+r15] + mov rbx,r14 + + mov edx,eax + shl rax,7 + sub rax,96 + sub rsp,rax + + mov rax,rsp + mov rcx,r15 + mov r10d,edx + call _bsaes_key_convert + pxor xmm7,xmm6 + movdqa XMMWORD PTR[rax],xmm7 + + and r14,-16 + sub rsp,080h + movdqa xmm6,XMMWORD PTR[32+rbp] + + pxor xmm14,xmm14 + movdqa xmm12,XMMWORD PTR[$L$xts_magic] + pcmpgtd xmm14,xmm6 + + sub r14,080h + jc $L$xts_enc_short + jmp $L$xts_enc_loop + +ALIGN 16 +$L$xts_enc_loop:: + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm15,xmm6 + movdqa XMMWORD PTR[rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm0,xmm6 + movdqa XMMWORD PTR[16+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm7,XMMWORD PTR[r12] + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm1,xmm6 + movdqa XMMWORD PTR[32+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm8,XMMWORD PTR[16+r12] + pxor xmm15,xmm7 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm2,xmm6 + movdqa XMMWORD PTR[48+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm9,XMMWORD PTR[32+r12] + pxor xmm0,xmm8 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm3,xmm6 + movdqa XMMWORD PTR[64+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm10,XMMWORD PTR[48+r12] + pxor xmm1,xmm9 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm4,xmm6 + movdqa XMMWORD PTR[80+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm11,XMMWORD PTR[64+r12] + pxor xmm2,xmm10 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm5,xmm6 + movdqa XMMWORD PTR[96+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm12,XMMWORD PTR[80+r12] + pxor xmm3,xmm11 + movdqu xmm13,XMMWORD PTR[96+r12] + pxor xmm4,xmm12 + movdqu xmm14,XMMWORD PTR[112+r12] + lea r12,QWORD PTR[128+r12] + movdqa XMMWORD PTR[112+rsp],xmm6 + pxor xmm5,xmm13 + lea rax,QWORD PTR[128+rsp] + pxor xmm6,xmm14 + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm3,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm5,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm3 + pxor xmm2,XMMWORD PTR[64+rsp] + movdqu XMMWORD PTR[48+r13],xmm5 + pxor xmm6,XMMWORD PTR[80+rsp] + movdqu XMMWORD PTR[64+r13],xmm2 + pxor xmm1,XMMWORD PTR[96+rsp] + movdqu XMMWORD PTR[80+r13],xmm6 + pxor xmm4,XMMWORD PTR[112+rsp] + movdqu XMMWORD PTR[96+r13],xmm1 + movdqu XMMWORD PTR[112+r13],xmm4 + lea r13,QWORD PTR[128+r13] + + movdqa xmm6,XMMWORD PTR[112+rsp] + pxor xmm14,xmm14 + movdqa xmm12,XMMWORD PTR[$L$xts_magic] + pcmpgtd xmm14,xmm6 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + + sub r14,080h + jnc $L$xts_enc_loop + +$L$xts_enc_short:: + add r14,080h + jz $L$xts_enc_done + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm15,xmm6 + movdqa XMMWORD PTR[rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm0,xmm6 + movdqa XMMWORD PTR[16+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm7,XMMWORD PTR[r12] + cmp r14,16 + je $L$xts_enc_1 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm1,xmm6 + movdqa XMMWORD PTR[32+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm8,XMMWORD PTR[16+r12] + cmp r14,32 + je $L$xts_enc_2 + pxor xmm15,xmm7 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm2,xmm6 + movdqa XMMWORD PTR[48+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm9,XMMWORD PTR[32+r12] + cmp r14,48 + je $L$xts_enc_3 + pxor xmm0,xmm8 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm3,xmm6 + movdqa XMMWORD PTR[64+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm10,XMMWORD PTR[48+r12] + cmp r14,64 + je $L$xts_enc_4 + pxor xmm1,xmm9 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm4,xmm6 + movdqa XMMWORD PTR[80+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm11,XMMWORD PTR[64+r12] + cmp r14,80 + je $L$xts_enc_5 + pxor xmm2,xmm10 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm5,xmm6 + movdqa XMMWORD PTR[96+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm12,XMMWORD PTR[80+r12] + cmp r14,96 + je $L$xts_enc_6 + pxor xmm3,xmm11 + movdqu xmm13,XMMWORD PTR[96+r12] + pxor xmm4,xmm12 + movdqa XMMWORD PTR[112+rsp],xmm6 + lea r12,QWORD PTR[112+r12] + pxor xmm5,xmm13 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm3,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm5,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm3 + pxor xmm2,XMMWORD PTR[64+rsp] + movdqu XMMWORD PTR[48+r13],xmm5 + pxor xmm6,XMMWORD PTR[80+rsp] + movdqu XMMWORD PTR[64+r13],xmm2 + pxor xmm1,XMMWORD PTR[96+rsp] + movdqu XMMWORD PTR[80+r13],xmm6 + movdqu XMMWORD PTR[96+r13],xmm1 + lea r13,QWORD PTR[112+r13] + + movdqa xmm6,XMMWORD PTR[112+rsp] + jmp $L$xts_enc_done +ALIGN 16 +$L$xts_enc_6:: + pxor xmm3,xmm11 + lea r12,QWORD PTR[96+r12] + pxor xmm4,xmm12 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm3,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm5,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm3 + pxor xmm2,XMMWORD PTR[64+rsp] + movdqu XMMWORD PTR[48+r13],xmm5 + pxor xmm6,XMMWORD PTR[80+rsp] + movdqu XMMWORD PTR[64+r13],xmm2 + movdqu XMMWORD PTR[80+r13],xmm6 + lea r13,QWORD PTR[96+r13] + + movdqa xmm6,XMMWORD PTR[96+rsp] + jmp $L$xts_enc_done +ALIGN 16 +$L$xts_enc_5:: + pxor xmm2,xmm10 + lea r12,QWORD PTR[80+r12] + pxor xmm3,xmm11 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm3,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm5,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm3 + pxor xmm2,XMMWORD PTR[64+rsp] + movdqu XMMWORD PTR[48+r13],xmm5 + movdqu XMMWORD PTR[64+r13],xmm2 + lea r13,QWORD PTR[80+r13] + + movdqa xmm6,XMMWORD PTR[80+rsp] + jmp $L$xts_enc_done +ALIGN 16 +$L$xts_enc_4:: + pxor xmm1,xmm9 + lea r12,QWORD PTR[64+r12] + pxor xmm2,xmm10 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm3,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm5,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm3 + movdqu XMMWORD PTR[48+r13],xmm5 + lea r13,QWORD PTR[64+r13] + + movdqa xmm6,XMMWORD PTR[64+rsp] + jmp $L$xts_enc_done +ALIGN 16 +$L$xts_enc_3:: + pxor xmm0,xmm8 + lea r12,QWORD PTR[48+r12] + pxor xmm1,xmm9 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm3,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + movdqu XMMWORD PTR[32+r13],xmm3 + lea r13,QWORD PTR[48+r13] + + movdqa xmm6,XMMWORD PTR[48+rsp] + jmp $L$xts_enc_done +ALIGN 16 +$L$xts_enc_2:: + pxor xmm15,xmm7 + lea r12,QWORD PTR[32+r12] + pxor xmm0,xmm8 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + movdqu XMMWORD PTR[16+r13],xmm0 + lea r13,QWORD PTR[32+r13] + + movdqa xmm6,XMMWORD PTR[32+rsp] + jmp $L$xts_enc_done +ALIGN 16 +$L$xts_enc_1:: + pxor xmm7,xmm15 + lea r12,QWORD PTR[16+r12] + movdqa XMMWORD PTR[32+rbp],xmm7 + lea rcx,QWORD PTR[32+rbp] + lea rdx,QWORD PTR[32+rbp] + lea r8,QWORD PTR[r15] + call asm_AES_encrypt + pxor xmm15,XMMWORD PTR[32+rbp] + + + + + + movdqu XMMWORD PTR[r13],xmm15 + lea r13,QWORD PTR[16+r13] + + movdqa xmm6,XMMWORD PTR[16+rsp] + +$L$xts_enc_done:: + and ebx,15 + jz $L$xts_enc_ret + mov rdx,r13 + +$L$xts_enc_steal:: + movzx eax,BYTE PTR[r12] + movzx ecx,BYTE PTR[((-16))+rdx] + lea r12,QWORD PTR[1+r12] + mov BYTE PTR[((-16))+rdx],al + mov BYTE PTR[rdx],cl + lea rdx,QWORD PTR[1+rdx] + sub ebx,1 + jnz $L$xts_enc_steal + + movdqu xmm15,XMMWORD PTR[((-16))+r13] + lea rcx,QWORD PTR[32+rbp] + pxor xmm15,xmm6 + lea rdx,QWORD PTR[32+rbp] + movdqa XMMWORD PTR[32+rbp],xmm15 + lea r8,QWORD PTR[r15] + call asm_AES_encrypt + pxor xmm6,XMMWORD PTR[32+rbp] + movdqu XMMWORD PTR[(-16)+r13],xmm6 + +$L$xts_enc_ret:: + lea rax,QWORD PTR[rsp] + pxor xmm0,xmm0 +$L$xts_enc_bzero:: + movdqa XMMWORD PTR[rax],xmm0 + movdqa XMMWORD PTR[16+rax],xmm0 + lea rax,QWORD PTR[32+rax] + cmp rbp,rax + ja $L$xts_enc_bzero + + lea rsp,QWORD PTR[rbp] + movaps xmm6,XMMWORD PTR[64+rbp] + movaps xmm7,XMMWORD PTR[80+rbp] + movaps xmm8,XMMWORD PTR[96+rbp] + movaps xmm9,XMMWORD PTR[112+rbp] + movaps xmm10,XMMWORD PTR[128+rbp] + movaps xmm11,XMMWORD PTR[144+rbp] + movaps xmm12,XMMWORD PTR[160+rbp] + movaps xmm13,XMMWORD PTR[176+rbp] + movaps xmm14,XMMWORD PTR[192+rbp] + movaps xmm15,XMMWORD PTR[208+rbp] + lea rsp,QWORD PTR[160+rbp] + mov r15,QWORD PTR[72+rsp] + mov r14,QWORD PTR[80+rsp] + mov r13,QWORD PTR[88+rsp] + mov r12,QWORD PTR[96+rsp] + mov rbx,QWORD PTR[104+rsp] + mov rax,QWORD PTR[112+rsp] + lea rsp,QWORD PTR[120+rsp] + mov rbp,rax +$L$xts_enc_epilogue:: + DB 0F3h,0C3h ;repret +bsaes_xts_encrypt ENDP + +PUBLIC bsaes_xts_decrypt + +ALIGN 16 +bsaes_xts_decrypt PROC PUBLIC + mov rax,rsp +$L$xts_dec_prologue:: + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-72))+rsp] + mov r10,QWORD PTR[160+rsp] + mov r11,QWORD PTR[168+rsp] + lea rsp,QWORD PTR[((-160))+rsp] + movaps XMMWORD PTR[64+rsp],xmm6 + movaps XMMWORD PTR[80+rsp],xmm7 + movaps XMMWORD PTR[96+rsp],xmm8 + movaps XMMWORD PTR[112+rsp],xmm9 + movaps XMMWORD PTR[128+rsp],xmm10 + movaps XMMWORD PTR[144+rsp],xmm11 + movaps XMMWORD PTR[160+rsp],xmm12 + movaps XMMWORD PTR[176+rsp],xmm13 + movaps XMMWORD PTR[192+rsp],xmm14 + movaps XMMWORD PTR[208+rsp],xmm15 +$L$xts_dec_body:: + mov rbp,rsp + mov r12,rcx + mov r13,rdx + mov r14,r8 + mov r15,r9 + + lea rcx,QWORD PTR[r11] + lea rdx,QWORD PTR[32+rbp] + lea r8,QWORD PTR[r10] + call asm_AES_encrypt + + mov eax,DWORD PTR[240+r15] + mov rbx,r14 + + mov edx,eax + shl rax,7 + sub rax,96 + sub rsp,rax + + mov rax,rsp + mov rcx,r15 + mov r10d,edx + call _bsaes_key_convert + pxor xmm7,XMMWORD PTR[rsp] + movdqa XMMWORD PTR[rax],xmm6 + movdqa XMMWORD PTR[rsp],xmm7 + + xor eax,eax + and r14,-16 + test ebx,15 + setnz al + shl rax,4 + sub r14,rax + + sub rsp,080h + movdqa xmm6,XMMWORD PTR[32+rbp] + + pxor xmm14,xmm14 + movdqa xmm12,XMMWORD PTR[$L$xts_magic] + pcmpgtd xmm14,xmm6 + + sub r14,080h + jc $L$xts_dec_short + jmp $L$xts_dec_loop + +ALIGN 16 +$L$xts_dec_loop:: + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm15,xmm6 + movdqa XMMWORD PTR[rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm0,xmm6 + movdqa XMMWORD PTR[16+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm7,XMMWORD PTR[r12] + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm1,xmm6 + movdqa XMMWORD PTR[32+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm8,XMMWORD PTR[16+r12] + pxor xmm15,xmm7 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm2,xmm6 + movdqa XMMWORD PTR[48+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm9,XMMWORD PTR[32+r12] + pxor xmm0,xmm8 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm3,xmm6 + movdqa XMMWORD PTR[64+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm10,XMMWORD PTR[48+r12] + pxor xmm1,xmm9 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm4,xmm6 + movdqa XMMWORD PTR[80+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm11,XMMWORD PTR[64+r12] + pxor xmm2,xmm10 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm5,xmm6 + movdqa XMMWORD PTR[96+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm12,XMMWORD PTR[80+r12] + pxor xmm3,xmm11 + movdqu xmm13,XMMWORD PTR[96+r12] + pxor xmm4,xmm12 + movdqu xmm14,XMMWORD PTR[112+r12] + lea r12,QWORD PTR[128+r12] + movdqa XMMWORD PTR[112+rsp],xmm6 + pxor xmm5,xmm13 + lea rax,QWORD PTR[128+rsp] + pxor xmm6,xmm14 + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm5,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm3,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm5 + pxor xmm1,XMMWORD PTR[64+rsp] + movdqu XMMWORD PTR[48+r13],xmm3 + pxor xmm6,XMMWORD PTR[80+rsp] + movdqu XMMWORD PTR[64+r13],xmm1 + pxor xmm2,XMMWORD PTR[96+rsp] + movdqu XMMWORD PTR[80+r13],xmm6 + pxor xmm4,XMMWORD PTR[112+rsp] + movdqu XMMWORD PTR[96+r13],xmm2 + movdqu XMMWORD PTR[112+r13],xmm4 + lea r13,QWORD PTR[128+r13] + + movdqa xmm6,XMMWORD PTR[112+rsp] + pxor xmm14,xmm14 + movdqa xmm12,XMMWORD PTR[$L$xts_magic] + pcmpgtd xmm14,xmm6 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + + sub r14,080h + jnc $L$xts_dec_loop + +$L$xts_dec_short:: + add r14,080h + jz $L$xts_dec_done + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm15,xmm6 + movdqa XMMWORD PTR[rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm0,xmm6 + movdqa XMMWORD PTR[16+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm7,XMMWORD PTR[r12] + cmp r14,16 + je $L$xts_dec_1 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm1,xmm6 + movdqa XMMWORD PTR[32+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm8,XMMWORD PTR[16+r12] + cmp r14,32 + je $L$xts_dec_2 + pxor xmm15,xmm7 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm2,xmm6 + movdqa XMMWORD PTR[48+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm9,XMMWORD PTR[32+r12] + cmp r14,48 + je $L$xts_dec_3 + pxor xmm0,xmm8 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm3,xmm6 + movdqa XMMWORD PTR[64+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm10,XMMWORD PTR[48+r12] + cmp r14,64 + je $L$xts_dec_4 + pxor xmm1,xmm9 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm4,xmm6 + movdqa XMMWORD PTR[80+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm11,XMMWORD PTR[64+r12] + cmp r14,80 + je $L$xts_dec_5 + pxor xmm2,xmm10 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm5,xmm6 + movdqa XMMWORD PTR[96+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm12,XMMWORD PTR[80+r12] + cmp r14,96 + je $L$xts_dec_6 + pxor xmm3,xmm11 + movdqu xmm13,XMMWORD PTR[96+r12] + pxor xmm4,xmm12 + movdqa XMMWORD PTR[112+rsp],xmm6 + lea r12,QWORD PTR[112+r12] + pxor xmm5,xmm13 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm5,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm3,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm5 + pxor xmm1,XMMWORD PTR[64+rsp] + movdqu XMMWORD PTR[48+r13],xmm3 + pxor xmm6,XMMWORD PTR[80+rsp] + movdqu XMMWORD PTR[64+r13],xmm1 + pxor xmm2,XMMWORD PTR[96+rsp] + movdqu XMMWORD PTR[80+r13],xmm6 + movdqu XMMWORD PTR[96+r13],xmm2 + lea r13,QWORD PTR[112+r13] + + movdqa xmm6,XMMWORD PTR[112+rsp] + jmp $L$xts_dec_done +ALIGN 16 +$L$xts_dec_6:: + pxor xmm3,xmm11 + lea r12,QWORD PTR[96+r12] + pxor xmm4,xmm12 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm5,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm3,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm5 + pxor xmm1,XMMWORD PTR[64+rsp] + movdqu XMMWORD PTR[48+r13],xmm3 + pxor xmm6,XMMWORD PTR[80+rsp] + movdqu XMMWORD PTR[64+r13],xmm1 + movdqu XMMWORD PTR[80+r13],xmm6 + lea r13,QWORD PTR[96+r13] + + movdqa xmm6,XMMWORD PTR[96+rsp] + jmp $L$xts_dec_done +ALIGN 16 +$L$xts_dec_5:: + pxor xmm2,xmm10 + lea r12,QWORD PTR[80+r12] + pxor xmm3,xmm11 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm5,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm3,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm5 + pxor xmm1,XMMWORD PTR[64+rsp] + movdqu XMMWORD PTR[48+r13],xmm3 + movdqu XMMWORD PTR[64+r13],xmm1 + lea r13,QWORD PTR[80+r13] + + movdqa xmm6,XMMWORD PTR[80+rsp] + jmp $L$xts_dec_done +ALIGN 16 +$L$xts_dec_4:: + pxor xmm1,xmm9 + lea r12,QWORD PTR[64+r12] + pxor xmm2,xmm10 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm5,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm3,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm5 + movdqu XMMWORD PTR[48+r13],xmm3 + lea r13,QWORD PTR[64+r13] + + movdqa xmm6,XMMWORD PTR[64+rsp] + jmp $L$xts_dec_done +ALIGN 16 +$L$xts_dec_3:: + pxor xmm0,xmm8 + lea r12,QWORD PTR[48+r12] + pxor xmm1,xmm9 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm5,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + movdqu XMMWORD PTR[32+r13],xmm5 + lea r13,QWORD PTR[48+r13] + + movdqa xmm6,XMMWORD PTR[48+rsp] + jmp $L$xts_dec_done +ALIGN 16 +$L$xts_dec_2:: + pxor xmm15,xmm7 + lea r12,QWORD PTR[32+r12] + pxor xmm0,xmm8 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + movdqu XMMWORD PTR[16+r13],xmm0 + lea r13,QWORD PTR[32+r13] + + movdqa xmm6,XMMWORD PTR[32+rsp] + jmp $L$xts_dec_done +ALIGN 16 +$L$xts_dec_1:: + pxor xmm7,xmm15 + lea r12,QWORD PTR[16+r12] + movdqa XMMWORD PTR[32+rbp],xmm7 + lea rcx,QWORD PTR[32+rbp] + lea rdx,QWORD PTR[32+rbp] + lea r8,QWORD PTR[r15] + call asm_AES_decrypt + pxor xmm15,XMMWORD PTR[32+rbp] + + + + + + movdqu XMMWORD PTR[r13],xmm15 + lea r13,QWORD PTR[16+r13] + + movdqa xmm6,XMMWORD PTR[16+rsp] + +$L$xts_dec_done:: + and ebx,15 + jz $L$xts_dec_ret + + pxor xmm14,xmm14 + movdqa xmm12,XMMWORD PTR[$L$xts_magic] + pcmpgtd xmm14,xmm6 + pshufd xmm13,xmm14,013h + movdqa xmm5,xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + movdqu xmm15,XMMWORD PTR[r12] + pxor xmm6,xmm13 + + lea rcx,QWORD PTR[32+rbp] + pxor xmm15,xmm6 + lea rdx,QWORD PTR[32+rbp] + movdqa XMMWORD PTR[32+rbp],xmm15 + lea r8,QWORD PTR[r15] + call asm_AES_decrypt + pxor xmm6,XMMWORD PTR[32+rbp] + mov rdx,r13 + movdqu XMMWORD PTR[r13],xmm6 + +$L$xts_dec_steal:: + movzx eax,BYTE PTR[16+r12] + movzx ecx,BYTE PTR[rdx] + lea r12,QWORD PTR[1+r12] + mov BYTE PTR[rdx],al + mov BYTE PTR[16+rdx],cl + lea rdx,QWORD PTR[1+rdx] + sub ebx,1 + jnz $L$xts_dec_steal + + movdqu xmm15,XMMWORD PTR[r13] + lea rcx,QWORD PTR[32+rbp] + pxor xmm15,xmm5 + lea rdx,QWORD PTR[32+rbp] + movdqa XMMWORD PTR[32+rbp],xmm15 + lea r8,QWORD PTR[r15] + call asm_AES_decrypt + pxor xmm5,XMMWORD PTR[32+rbp] + movdqu XMMWORD PTR[r13],xmm5 + +$L$xts_dec_ret:: + lea rax,QWORD PTR[rsp] + pxor xmm0,xmm0 +$L$xts_dec_bzero:: + movdqa XMMWORD PTR[rax],xmm0 + movdqa XMMWORD PTR[16+rax],xmm0 + lea rax,QWORD PTR[32+rax] + cmp rbp,rax + ja $L$xts_dec_bzero + + lea rsp,QWORD PTR[rbp] + movaps xmm6,XMMWORD PTR[64+rbp] + movaps xmm7,XMMWORD PTR[80+rbp] + movaps xmm8,XMMWORD PTR[96+rbp] + movaps xmm9,XMMWORD PTR[112+rbp] + movaps xmm10,XMMWORD PTR[128+rbp] + movaps xmm11,XMMWORD PTR[144+rbp] + movaps xmm12,XMMWORD PTR[160+rbp] + movaps xmm13,XMMWORD PTR[176+rbp] + movaps xmm14,XMMWORD PTR[192+rbp] + movaps xmm15,XMMWORD PTR[208+rbp] + lea rsp,QWORD PTR[160+rbp] + mov r15,QWORD PTR[72+rsp] + mov r14,QWORD PTR[80+rsp] + mov r13,QWORD PTR[88+rsp] + mov r12,QWORD PTR[96+rsp] + mov rbx,QWORD PTR[104+rsp] + mov rax,QWORD PTR[112+rsp] + lea rsp,QWORD PTR[120+rsp] + mov rbp,rax +$L$xts_dec_epilogue:: + DB 0F3h,0C3h ;repret +bsaes_xts_decrypt ENDP + +ALIGN 64 +_bsaes_const:: +$L$M0ISR:: + DQ 00a0e0206070b0f03h,00004080c0d010509h +$L$ISRM0:: + DQ 001040b0e0205080fh,00306090c00070a0dh +$L$ISR:: + DQ 00504070602010003h,00f0e0d0c080b0a09h +$L$BS0:: + DQ 05555555555555555h,05555555555555555h +$L$BS1:: + DQ 03333333333333333h,03333333333333333h +$L$BS2:: + DQ 00f0f0f0f0f0f0f0fh,00f0f0f0f0f0f0f0fh +$L$SR:: + DQ 00504070600030201h,00f0e0d0c0a09080bh +$L$SRM0:: + DQ 00304090e00050a0fh,001060b0c0207080dh +$L$M0SR:: + DQ 00a0e02060f03070bh,00004080c05090d01h +$L$SWPUP:: + DQ 00706050403020100h,00c0d0e0f0b0a0908h +$L$SWPUPM0SR:: + DQ 00a0d02060c03070bh,00004080f05090e01h +$L$ADD1:: + DQ 00000000000000000h,00000000100000000h +$L$ADD2:: + DQ 00000000000000000h,00000000200000000h +$L$ADD3:: + DQ 00000000000000000h,00000000300000000h +$L$ADD4:: + DQ 00000000000000000h,00000000400000000h +$L$ADD5:: + DQ 00000000000000000h,00000000500000000h +$L$ADD6:: + DQ 00000000000000000h,00000000600000000h +$L$ADD7:: + DQ 00000000000000000h,00000000700000000h +$L$ADD8:: + DQ 00000000000000000h,00000000800000000h +$L$xts_magic:: + DD 087h,0,1,0 +$L$masks:: + DQ 00101010101010101h,00101010101010101h + DQ 00202020202020202h,00202020202020202h + DQ 00404040404040404h,00404040404040404h + DQ 00808080808080808h,00808080808080808h +$L$M0:: + DQ 002060a0e03070b0fh,00004080c0105090dh +$L$63:: + DQ 06363636363636363h,06363636363636363h +DB 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102 +DB 111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44 +DB 32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44 +DB 32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32 +DB 65,110,100,121,32,80,111,108,121,97,107,111,118,0 +ALIGN 64 + +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_prologue + + mov rax,QWORD PTR[160+r8] + + lea rsi,QWORD PTR[64+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + lea rax,QWORD PTR[160+rax] + + mov rbp,QWORD PTR[112+rax] + mov rbx,QWORD PTR[104+rax] + mov r12,QWORD PTR[96+rax] + mov r13,QWORD PTR[88+rax] + mov r14,QWORD PTR[80+rax] + mov r15,QWORD PTR[72+rax] + lea rax,QWORD PTR[120+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$in_prologue:: + mov QWORD PTR[152+r8],rax + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$cbc_dec_prologue + DD imagerel $L$cbc_dec_epilogue + DD imagerel $L$cbc_dec_info + + DD imagerel $L$ctr_enc_prologue + DD imagerel $L$ctr_enc_epilogue + DD imagerel $L$ctr_enc_info + + DD imagerel $L$xts_enc_prologue + DD imagerel $L$xts_enc_epilogue + DD imagerel $L$xts_enc_info + + DD imagerel $L$xts_dec_prologue + DD imagerel $L$xts_dec_epilogue + DD imagerel $L$xts_dec_info + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$cbc_dec_info:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$cbc_dec_body,imagerel $L$cbc_dec_epilogue +$L$ctr_enc_info:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$ctr_enc_body,imagerel $L$ctr_enc_epilogue +$L$xts_enc_info:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$xts_enc_body,imagerel $L$xts_enc_epilogue +$L$xts_dec_info:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$xts_dec_body,imagerel $L$xts_dec_epilogue + +.xdata ENDS +END + diff --git a/ext/libressl/crypto/aes/bsaes-mingw64-x86_64.S b/ext/libressl/crypto/aes/bsaes-mingw64-x86_64.S new file mode 100644 index 0000000..f0b07cb --- /dev/null +++ b/ext/libressl/crypto/aes/bsaes-mingw64-x86_64.S @@ -0,0 +1,2725 @@ +#include "x86_arch.h" +.text + + + + +.def _bsaes_encrypt8; .scl 3; .type 32; .endef +.p2align 6 +_bsaes_encrypt8: + leaq .LBS0(%rip),%r11 + + movdqa (%rax),%xmm8 + leaq 16(%rax),%rax + movdqa 80(%r11),%xmm7 + pxor %xmm8,%xmm15 + pxor %xmm8,%xmm0 +.byte 102,68,15,56,0,255 + pxor %xmm8,%xmm1 +.byte 102,15,56,0,199 + pxor %xmm8,%xmm2 +.byte 102,15,56,0,207 + pxor %xmm8,%xmm3 +.byte 102,15,56,0,215 + pxor %xmm8,%xmm4 +.byte 102,15,56,0,223 + pxor %xmm8,%xmm5 +.byte 102,15,56,0,231 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 +_bsaes_encrypt8_bitslice: + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm5,%xmm9 + psrlq $1,%xmm5 + movdqa %xmm3,%xmm10 + psrlq $1,%xmm3 + pxor %xmm6,%xmm5 + pxor %xmm4,%xmm3 + pand %xmm7,%xmm5 + pand %xmm7,%xmm3 + pxor %xmm5,%xmm6 + psllq $1,%xmm5 + pxor %xmm3,%xmm4 + psllq $1,%xmm3 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm3 + movdqa %xmm1,%xmm9 + psrlq $1,%xmm1 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm2,%xmm1 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm1 + pand %xmm7,%xmm15 + pxor %xmm1,%xmm2 + psllq $1,%xmm1 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm1 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm4,%xmm9 + psrlq $2,%xmm4 + movdqa %xmm3,%xmm10 + psrlq $2,%xmm3 + pxor %xmm6,%xmm4 + pxor %xmm5,%xmm3 + pand %xmm8,%xmm4 + pand %xmm8,%xmm3 + pxor %xmm4,%xmm6 + psllq $2,%xmm4 + pxor %xmm3,%xmm5 + psllq $2,%xmm3 + pxor %xmm9,%xmm4 + pxor %xmm10,%xmm3 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm2,%xmm0 + pxor %xmm1,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm2 + psllq $2,%xmm0 + pxor %xmm15,%xmm1 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm2,%xmm9 + psrlq $4,%xmm2 + movdqa %xmm1,%xmm10 + psrlq $4,%xmm1 + pxor %xmm6,%xmm2 + pxor %xmm5,%xmm1 + pand %xmm7,%xmm2 + pand %xmm7,%xmm1 + pxor %xmm2,%xmm6 + psllq $4,%xmm2 + pxor %xmm1,%xmm5 + psllq $4,%xmm1 + pxor %xmm9,%xmm2 + pxor %xmm10,%xmm1 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm4 + psllq $4,%xmm0 + pxor %xmm15,%xmm3 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + decl %r10d + jmp .Lenc_sbox +.p2align 4 +.Lenc_loop: + pxor 0(%rax),%xmm15 + pxor 16(%rax),%xmm0 +.byte 102,68,15,56,0,255 + pxor 32(%rax),%xmm1 +.byte 102,15,56,0,199 + pxor 48(%rax),%xmm2 +.byte 102,15,56,0,207 + pxor 64(%rax),%xmm3 +.byte 102,15,56,0,215 + pxor 80(%rax),%xmm4 +.byte 102,15,56,0,223 + pxor 96(%rax),%xmm5 +.byte 102,15,56,0,231 + pxor 112(%rax),%xmm6 +.byte 102,15,56,0,239 + leaq 128(%rax),%rax +.byte 102,15,56,0,247 +.Lenc_sbox: + pxor %xmm5,%xmm4 + pxor %xmm0,%xmm1 + pxor %xmm15,%xmm2 + pxor %xmm1,%xmm5 + pxor %xmm15,%xmm4 + + pxor %xmm2,%xmm5 + pxor %xmm6,%xmm2 + pxor %xmm4,%xmm6 + pxor %xmm3,%xmm2 + pxor %xmm4,%xmm3 + pxor %xmm0,%xmm2 + + pxor %xmm6,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm6,%xmm10 + movdqa %xmm0,%xmm9 + movdqa %xmm4,%xmm8 + movdqa %xmm1,%xmm12 + movdqa %xmm5,%xmm11 + + pxor %xmm3,%xmm10 + pxor %xmm1,%xmm9 + pxor %xmm2,%xmm8 + movdqa %xmm10,%xmm13 + pxor %xmm3,%xmm12 + movdqa %xmm9,%xmm7 + pxor %xmm15,%xmm11 + movdqa %xmm10,%xmm14 + + por %xmm8,%xmm9 + por %xmm11,%xmm10 + pxor %xmm7,%xmm14 + pand %xmm11,%xmm13 + pxor %xmm8,%xmm11 + pand %xmm8,%xmm7 + pand %xmm11,%xmm14 + movdqa %xmm2,%xmm11 + pxor %xmm15,%xmm11 + pand %xmm11,%xmm12 + pxor %xmm12,%xmm10 + pxor %xmm12,%xmm9 + movdqa %xmm6,%xmm12 + movdqa %xmm4,%xmm11 + pxor %xmm0,%xmm12 + pxor %xmm5,%xmm11 + movdqa %xmm12,%xmm8 + pand %xmm11,%xmm12 + por %xmm11,%xmm8 + pxor %xmm12,%xmm7 + pxor %xmm14,%xmm10 + pxor %xmm13,%xmm9 + pxor %xmm14,%xmm8 + movdqa %xmm1,%xmm11 + pxor %xmm13,%xmm7 + movdqa %xmm3,%xmm12 + pxor %xmm13,%xmm8 + movdqa %xmm0,%xmm13 + pand %xmm2,%xmm11 + movdqa %xmm6,%xmm14 + pand %xmm15,%xmm12 + pand %xmm4,%xmm13 + por %xmm5,%xmm14 + pxor %xmm11,%xmm10 + pxor %xmm12,%xmm9 + pxor %xmm13,%xmm8 + pxor %xmm14,%xmm7 + + + + + + movdqa %xmm10,%xmm11 + pand %xmm8,%xmm10 + pxor %xmm9,%xmm11 + + movdqa %xmm7,%xmm13 + movdqa %xmm11,%xmm14 + pxor %xmm10,%xmm13 + pand %xmm13,%xmm14 + + movdqa %xmm8,%xmm12 + pxor %xmm9,%xmm14 + pxor %xmm7,%xmm12 + + pxor %xmm9,%xmm10 + + pand %xmm10,%xmm12 + + movdqa %xmm13,%xmm9 + pxor %xmm7,%xmm12 + + pxor %xmm12,%xmm9 + pxor %xmm12,%xmm8 + + pand %xmm7,%xmm9 + + pxor %xmm9,%xmm13 + pxor %xmm9,%xmm8 + + pand %xmm14,%xmm13 + + pxor %xmm11,%xmm13 + movdqa %xmm5,%xmm11 + movdqa %xmm4,%xmm7 + movdqa %xmm14,%xmm9 + pxor %xmm13,%xmm9 + pand %xmm5,%xmm9 + pxor %xmm4,%xmm5 + pand %xmm14,%xmm4 + pand %xmm13,%xmm5 + pxor %xmm4,%xmm5 + pxor %xmm9,%xmm4 + pxor %xmm15,%xmm11 + pxor %xmm2,%xmm7 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm15,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm2,%xmm15 + pand %xmm14,%xmm7 + pand %xmm12,%xmm2 + pand %xmm13,%xmm11 + pand %xmm8,%xmm15 + pxor %xmm11,%xmm7 + pxor %xmm2,%xmm15 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm2 + pxor %xmm11,%xmm5 + pxor %xmm11,%xmm15 + pxor %xmm7,%xmm4 + pxor %xmm7,%xmm2 + + movdqa %xmm6,%xmm11 + movdqa %xmm0,%xmm7 + pxor %xmm3,%xmm11 + pxor %xmm1,%xmm7 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm3,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm1,%xmm3 + pand %xmm14,%xmm7 + pand %xmm12,%xmm1 + pand %xmm13,%xmm11 + pand %xmm8,%xmm3 + pxor %xmm11,%xmm7 + pxor %xmm1,%xmm3 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm1 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + pxor %xmm13,%xmm10 + pand %xmm6,%xmm10 + pxor %xmm0,%xmm6 + pand %xmm14,%xmm0 + pand %xmm13,%xmm6 + pxor %xmm0,%xmm6 + pxor %xmm10,%xmm0 + pxor %xmm11,%xmm6 + pxor %xmm11,%xmm3 + pxor %xmm7,%xmm0 + pxor %xmm7,%xmm1 + pxor %xmm15,%xmm6 + pxor %xmm5,%xmm0 + pxor %xmm6,%xmm3 + pxor %xmm15,%xmm5 + pxor %xmm0,%xmm15 + + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + pxor %xmm2,%xmm1 + pxor %xmm4,%xmm2 + pxor %xmm4,%xmm3 + + pxor %xmm2,%xmm5 + decl %r10d + jl .Lenc_done + pshufd $147,%xmm15,%xmm7 + pshufd $147,%xmm0,%xmm8 + pxor %xmm7,%xmm15 + pshufd $147,%xmm3,%xmm9 + pxor %xmm8,%xmm0 + pshufd $147,%xmm5,%xmm10 + pxor %xmm9,%xmm3 + pshufd $147,%xmm2,%xmm11 + pxor %xmm10,%xmm5 + pshufd $147,%xmm6,%xmm12 + pxor %xmm11,%xmm2 + pshufd $147,%xmm1,%xmm13 + pxor %xmm12,%xmm6 + pshufd $147,%xmm4,%xmm14 + pxor %xmm13,%xmm1 + pxor %xmm14,%xmm4 + + pxor %xmm15,%xmm8 + pxor %xmm4,%xmm7 + pxor %xmm4,%xmm8 + pshufd $78,%xmm15,%xmm15 + pxor %xmm0,%xmm9 + pshufd $78,%xmm0,%xmm0 + pxor %xmm2,%xmm12 + pxor %xmm7,%xmm15 + pxor %xmm6,%xmm13 + pxor %xmm8,%xmm0 + pxor %xmm5,%xmm11 + pshufd $78,%xmm2,%xmm7 + pxor %xmm1,%xmm14 + pshufd $78,%xmm6,%xmm8 + pxor %xmm3,%xmm10 + pshufd $78,%xmm5,%xmm2 + pxor %xmm4,%xmm10 + pshufd $78,%xmm4,%xmm6 + pxor %xmm4,%xmm11 + pshufd $78,%xmm1,%xmm5 + pxor %xmm11,%xmm7 + pshufd $78,%xmm3,%xmm1 + pxor %xmm12,%xmm8 + pxor %xmm10,%xmm2 + pxor %xmm14,%xmm6 + pxor %xmm13,%xmm5 + movdqa %xmm7,%xmm3 + pxor %xmm9,%xmm1 + movdqa %xmm8,%xmm4 + movdqa 48(%r11),%xmm7 + jnz .Lenc_loop + movdqa 64(%r11),%xmm7 + jmp .Lenc_loop +.p2align 4 +.Lenc_done: + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm1,%xmm9 + psrlq $1,%xmm1 + movdqa %xmm2,%xmm10 + psrlq $1,%xmm2 + pxor %xmm4,%xmm1 + pxor %xmm6,%xmm2 + pand %xmm7,%xmm1 + pand %xmm7,%xmm2 + pxor %xmm1,%xmm4 + psllq $1,%xmm1 + pxor %xmm2,%xmm6 + psllq $1,%xmm2 + pxor %xmm9,%xmm1 + pxor %xmm10,%xmm2 + movdqa %xmm3,%xmm9 + psrlq $1,%xmm3 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm5,%xmm3 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm3 + pand %xmm7,%xmm15 + pxor %xmm3,%xmm5 + psllq $1,%xmm3 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm3 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm6,%xmm9 + psrlq $2,%xmm6 + movdqa %xmm2,%xmm10 + psrlq $2,%xmm2 + pxor %xmm4,%xmm6 + pxor %xmm1,%xmm2 + pand %xmm8,%xmm6 + pand %xmm8,%xmm2 + pxor %xmm6,%xmm4 + psllq $2,%xmm6 + pxor %xmm2,%xmm1 + psllq $2,%xmm2 + pxor %xmm9,%xmm6 + pxor %xmm10,%xmm2 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm5,%xmm0 + pxor %xmm3,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm5 + psllq $2,%xmm0 + pxor %xmm15,%xmm3 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm5,%xmm9 + psrlq $4,%xmm5 + movdqa %xmm3,%xmm10 + psrlq $4,%xmm3 + pxor %xmm4,%xmm5 + pxor %xmm1,%xmm3 + pand %xmm7,%xmm5 + pand %xmm7,%xmm3 + pxor %xmm5,%xmm4 + psllq $4,%xmm5 + pxor %xmm3,%xmm1 + psllq $4,%xmm3 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm3 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm6,%xmm0 + pxor %xmm2,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm6 + psllq $4,%xmm0 + pxor %xmm15,%xmm2 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa (%rax),%xmm7 + pxor %xmm7,%xmm3 + pxor %xmm7,%xmm5 + pxor %xmm7,%xmm2 + pxor %xmm7,%xmm6 + pxor %xmm7,%xmm1 + pxor %xmm7,%xmm4 + pxor %xmm7,%xmm15 + pxor %xmm7,%xmm0 + retq + + +.def _bsaes_decrypt8; .scl 3; .type 32; .endef +.p2align 6 +_bsaes_decrypt8: + leaq .LBS0(%rip),%r11 + + movdqa (%rax),%xmm8 + leaq 16(%rax),%rax + movdqa -48(%r11),%xmm7 + pxor %xmm8,%xmm15 + pxor %xmm8,%xmm0 +.byte 102,68,15,56,0,255 + pxor %xmm8,%xmm1 +.byte 102,15,56,0,199 + pxor %xmm8,%xmm2 +.byte 102,15,56,0,207 + pxor %xmm8,%xmm3 +.byte 102,15,56,0,215 + pxor %xmm8,%xmm4 +.byte 102,15,56,0,223 + pxor %xmm8,%xmm5 +.byte 102,15,56,0,231 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm5,%xmm9 + psrlq $1,%xmm5 + movdqa %xmm3,%xmm10 + psrlq $1,%xmm3 + pxor %xmm6,%xmm5 + pxor %xmm4,%xmm3 + pand %xmm7,%xmm5 + pand %xmm7,%xmm3 + pxor %xmm5,%xmm6 + psllq $1,%xmm5 + pxor %xmm3,%xmm4 + psllq $1,%xmm3 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm3 + movdqa %xmm1,%xmm9 + psrlq $1,%xmm1 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm2,%xmm1 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm1 + pand %xmm7,%xmm15 + pxor %xmm1,%xmm2 + psllq $1,%xmm1 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm1 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm4,%xmm9 + psrlq $2,%xmm4 + movdqa %xmm3,%xmm10 + psrlq $2,%xmm3 + pxor %xmm6,%xmm4 + pxor %xmm5,%xmm3 + pand %xmm8,%xmm4 + pand %xmm8,%xmm3 + pxor %xmm4,%xmm6 + psllq $2,%xmm4 + pxor %xmm3,%xmm5 + psllq $2,%xmm3 + pxor %xmm9,%xmm4 + pxor %xmm10,%xmm3 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm2,%xmm0 + pxor %xmm1,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm2 + psllq $2,%xmm0 + pxor %xmm15,%xmm1 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm2,%xmm9 + psrlq $4,%xmm2 + movdqa %xmm1,%xmm10 + psrlq $4,%xmm1 + pxor %xmm6,%xmm2 + pxor %xmm5,%xmm1 + pand %xmm7,%xmm2 + pand %xmm7,%xmm1 + pxor %xmm2,%xmm6 + psllq $4,%xmm2 + pxor %xmm1,%xmm5 + psllq $4,%xmm1 + pxor %xmm9,%xmm2 + pxor %xmm10,%xmm1 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm4 + psllq $4,%xmm0 + pxor %xmm15,%xmm3 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + decl %r10d + jmp .Ldec_sbox +.p2align 4 +.Ldec_loop: + pxor 0(%rax),%xmm15 + pxor 16(%rax),%xmm0 +.byte 102,68,15,56,0,255 + pxor 32(%rax),%xmm1 +.byte 102,15,56,0,199 + pxor 48(%rax),%xmm2 +.byte 102,15,56,0,207 + pxor 64(%rax),%xmm3 +.byte 102,15,56,0,215 + pxor 80(%rax),%xmm4 +.byte 102,15,56,0,223 + pxor 96(%rax),%xmm5 +.byte 102,15,56,0,231 + pxor 112(%rax),%xmm6 +.byte 102,15,56,0,239 + leaq 128(%rax),%rax +.byte 102,15,56,0,247 +.Ldec_sbox: + pxor %xmm3,%xmm2 + + pxor %xmm6,%xmm3 + pxor %xmm6,%xmm1 + pxor %xmm3,%xmm5 + pxor %xmm5,%xmm6 + pxor %xmm6,%xmm0 + + pxor %xmm0,%xmm15 + pxor %xmm4,%xmm1 + pxor %xmm15,%xmm2 + pxor %xmm15,%xmm4 + pxor %xmm2,%xmm0 + movdqa %xmm2,%xmm10 + movdqa %xmm6,%xmm9 + movdqa %xmm0,%xmm8 + movdqa %xmm3,%xmm12 + movdqa %xmm4,%xmm11 + + pxor %xmm15,%xmm10 + pxor %xmm3,%xmm9 + pxor %xmm5,%xmm8 + movdqa %xmm10,%xmm13 + pxor %xmm15,%xmm12 + movdqa %xmm9,%xmm7 + pxor %xmm1,%xmm11 + movdqa %xmm10,%xmm14 + + por %xmm8,%xmm9 + por %xmm11,%xmm10 + pxor %xmm7,%xmm14 + pand %xmm11,%xmm13 + pxor %xmm8,%xmm11 + pand %xmm8,%xmm7 + pand %xmm11,%xmm14 + movdqa %xmm5,%xmm11 + pxor %xmm1,%xmm11 + pand %xmm11,%xmm12 + pxor %xmm12,%xmm10 + pxor %xmm12,%xmm9 + movdqa %xmm2,%xmm12 + movdqa %xmm0,%xmm11 + pxor %xmm6,%xmm12 + pxor %xmm4,%xmm11 + movdqa %xmm12,%xmm8 + pand %xmm11,%xmm12 + por %xmm11,%xmm8 + pxor %xmm12,%xmm7 + pxor %xmm14,%xmm10 + pxor %xmm13,%xmm9 + pxor %xmm14,%xmm8 + movdqa %xmm3,%xmm11 + pxor %xmm13,%xmm7 + movdqa %xmm15,%xmm12 + pxor %xmm13,%xmm8 + movdqa %xmm6,%xmm13 + pand %xmm5,%xmm11 + movdqa %xmm2,%xmm14 + pand %xmm1,%xmm12 + pand %xmm0,%xmm13 + por %xmm4,%xmm14 + pxor %xmm11,%xmm10 + pxor %xmm12,%xmm9 + pxor %xmm13,%xmm8 + pxor %xmm14,%xmm7 + + + + + + movdqa %xmm10,%xmm11 + pand %xmm8,%xmm10 + pxor %xmm9,%xmm11 + + movdqa %xmm7,%xmm13 + movdqa %xmm11,%xmm14 + pxor %xmm10,%xmm13 + pand %xmm13,%xmm14 + + movdqa %xmm8,%xmm12 + pxor %xmm9,%xmm14 + pxor %xmm7,%xmm12 + + pxor %xmm9,%xmm10 + + pand %xmm10,%xmm12 + + movdqa %xmm13,%xmm9 + pxor %xmm7,%xmm12 + + pxor %xmm12,%xmm9 + pxor %xmm12,%xmm8 + + pand %xmm7,%xmm9 + + pxor %xmm9,%xmm13 + pxor %xmm9,%xmm8 + + pand %xmm14,%xmm13 + + pxor %xmm11,%xmm13 + movdqa %xmm4,%xmm11 + movdqa %xmm0,%xmm7 + movdqa %xmm14,%xmm9 + pxor %xmm13,%xmm9 + pand %xmm4,%xmm9 + pxor %xmm0,%xmm4 + pand %xmm14,%xmm0 + pand %xmm13,%xmm4 + pxor %xmm0,%xmm4 + pxor %xmm9,%xmm0 + pxor %xmm1,%xmm11 + pxor %xmm5,%xmm7 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm1,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm5,%xmm1 + pand %xmm14,%xmm7 + pand %xmm12,%xmm5 + pand %xmm13,%xmm11 + pand %xmm8,%xmm1 + pxor %xmm11,%xmm7 + pxor %xmm5,%xmm1 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm5 + pxor %xmm11,%xmm4 + pxor %xmm11,%xmm1 + pxor %xmm7,%xmm0 + pxor %xmm7,%xmm5 + + movdqa %xmm2,%xmm11 + movdqa %xmm6,%xmm7 + pxor %xmm15,%xmm11 + pxor %xmm3,%xmm7 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm15,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm3,%xmm15 + pand %xmm14,%xmm7 + pand %xmm12,%xmm3 + pand %xmm13,%xmm11 + pand %xmm8,%xmm15 + pxor %xmm11,%xmm7 + pxor %xmm3,%xmm15 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm3 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + pxor %xmm13,%xmm10 + pand %xmm2,%xmm10 + pxor %xmm6,%xmm2 + pand %xmm14,%xmm6 + pand %xmm13,%xmm2 + pxor %xmm6,%xmm2 + pxor %xmm10,%xmm6 + pxor %xmm11,%xmm2 + pxor %xmm11,%xmm15 + pxor %xmm7,%xmm6 + pxor %xmm7,%xmm3 + pxor %xmm6,%xmm0 + pxor %xmm4,%xmm5 + + pxor %xmm0,%xmm3 + pxor %xmm6,%xmm1 + pxor %xmm6,%xmm4 + pxor %xmm1,%xmm3 + pxor %xmm15,%xmm6 + pxor %xmm4,%xmm3 + pxor %xmm5,%xmm2 + pxor %xmm0,%xmm5 + pxor %xmm3,%xmm2 + + pxor %xmm15,%xmm3 + pxor %xmm2,%xmm6 + decl %r10d + jl .Ldec_done + + pshufd $78,%xmm15,%xmm7 + pshufd $78,%xmm2,%xmm13 + pxor %xmm15,%xmm7 + pshufd $78,%xmm4,%xmm14 + pxor %xmm2,%xmm13 + pshufd $78,%xmm0,%xmm8 + pxor %xmm4,%xmm14 + pshufd $78,%xmm5,%xmm9 + pxor %xmm0,%xmm8 + pshufd $78,%xmm3,%xmm10 + pxor %xmm5,%xmm9 + pxor %xmm13,%xmm15 + pxor %xmm13,%xmm0 + pshufd $78,%xmm1,%xmm11 + pxor %xmm3,%xmm10 + pxor %xmm7,%xmm5 + pxor %xmm8,%xmm3 + pshufd $78,%xmm6,%xmm12 + pxor %xmm1,%xmm11 + pxor %xmm14,%xmm0 + pxor %xmm9,%xmm1 + pxor %xmm6,%xmm12 + + pxor %xmm14,%xmm5 + pxor %xmm13,%xmm3 + pxor %xmm13,%xmm1 + pxor %xmm10,%xmm6 + pxor %xmm11,%xmm2 + pxor %xmm14,%xmm1 + pxor %xmm14,%xmm6 + pxor %xmm12,%xmm4 + pshufd $147,%xmm15,%xmm7 + pshufd $147,%xmm0,%xmm8 + pxor %xmm7,%xmm15 + pshufd $147,%xmm5,%xmm9 + pxor %xmm8,%xmm0 + pshufd $147,%xmm3,%xmm10 + pxor %xmm9,%xmm5 + pshufd $147,%xmm1,%xmm11 + pxor %xmm10,%xmm3 + pshufd $147,%xmm6,%xmm12 + pxor %xmm11,%xmm1 + pshufd $147,%xmm2,%xmm13 + pxor %xmm12,%xmm6 + pshufd $147,%xmm4,%xmm14 + pxor %xmm13,%xmm2 + pxor %xmm14,%xmm4 + + pxor %xmm15,%xmm8 + pxor %xmm4,%xmm7 + pxor %xmm4,%xmm8 + pshufd $78,%xmm15,%xmm15 + pxor %xmm0,%xmm9 + pshufd $78,%xmm0,%xmm0 + pxor %xmm1,%xmm12 + pxor %xmm7,%xmm15 + pxor %xmm6,%xmm13 + pxor %xmm8,%xmm0 + pxor %xmm3,%xmm11 + pshufd $78,%xmm1,%xmm7 + pxor %xmm2,%xmm14 + pshufd $78,%xmm6,%xmm8 + pxor %xmm5,%xmm10 + pshufd $78,%xmm3,%xmm1 + pxor %xmm4,%xmm10 + pshufd $78,%xmm4,%xmm6 + pxor %xmm4,%xmm11 + pshufd $78,%xmm2,%xmm3 + pxor %xmm11,%xmm7 + pshufd $78,%xmm5,%xmm2 + pxor %xmm12,%xmm8 + pxor %xmm1,%xmm10 + pxor %xmm14,%xmm6 + pxor %xmm3,%xmm13 + movdqa %xmm7,%xmm3 + pxor %xmm9,%xmm2 + movdqa %xmm13,%xmm5 + movdqa %xmm8,%xmm4 + movdqa %xmm2,%xmm1 + movdqa %xmm10,%xmm2 + movdqa -16(%r11),%xmm7 + jnz .Ldec_loop + movdqa -32(%r11),%xmm7 + jmp .Ldec_loop +.p2align 4 +.Ldec_done: + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm2,%xmm9 + psrlq $1,%xmm2 + movdqa %xmm1,%xmm10 + psrlq $1,%xmm1 + pxor %xmm4,%xmm2 + pxor %xmm6,%xmm1 + pand %xmm7,%xmm2 + pand %xmm7,%xmm1 + pxor %xmm2,%xmm4 + psllq $1,%xmm2 + pxor %xmm1,%xmm6 + psllq $1,%xmm1 + pxor %xmm9,%xmm2 + pxor %xmm10,%xmm1 + movdqa %xmm5,%xmm9 + psrlq $1,%xmm5 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm3,%xmm5 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm5 + pand %xmm7,%xmm15 + pxor %xmm5,%xmm3 + psllq $1,%xmm5 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm6,%xmm9 + psrlq $2,%xmm6 + movdqa %xmm1,%xmm10 + psrlq $2,%xmm1 + pxor %xmm4,%xmm6 + pxor %xmm2,%xmm1 + pand %xmm8,%xmm6 + pand %xmm8,%xmm1 + pxor %xmm6,%xmm4 + psllq $2,%xmm6 + pxor %xmm1,%xmm2 + psllq $2,%xmm1 + pxor %xmm9,%xmm6 + pxor %xmm10,%xmm1 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm3 + psllq $2,%xmm0 + pxor %xmm15,%xmm5 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm3,%xmm9 + psrlq $4,%xmm3 + movdqa %xmm5,%xmm10 + psrlq $4,%xmm5 + pxor %xmm4,%xmm3 + pxor %xmm2,%xmm5 + pand %xmm7,%xmm3 + pand %xmm7,%xmm5 + pxor %xmm3,%xmm4 + psllq $4,%xmm3 + pxor %xmm5,%xmm2 + psllq $4,%xmm5 + pxor %xmm9,%xmm3 + pxor %xmm10,%xmm5 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm6,%xmm0 + pxor %xmm1,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm6 + psllq $4,%xmm0 + pxor %xmm15,%xmm1 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa (%rax),%xmm7 + pxor %xmm7,%xmm5 + pxor %xmm7,%xmm3 + pxor %xmm7,%xmm1 + pxor %xmm7,%xmm6 + pxor %xmm7,%xmm2 + pxor %xmm7,%xmm4 + pxor %xmm7,%xmm15 + pxor %xmm7,%xmm0 + retq + +.def _bsaes_key_convert; .scl 3; .type 32; .endef +.p2align 4 +_bsaes_key_convert: + leaq .Lmasks(%rip),%r11 + movdqu (%rcx),%xmm7 + leaq 16(%rcx),%rcx + movdqa 0(%r11),%xmm0 + movdqa 16(%r11),%xmm1 + movdqa 32(%r11),%xmm2 + movdqa 48(%r11),%xmm3 + movdqa 64(%r11),%xmm4 + pcmpeqd %xmm5,%xmm5 + + movdqu (%rcx),%xmm6 + movdqa %xmm7,(%rax) + leaq 16(%rax),%rax + decl %r10d + jmp .Lkey_loop +.p2align 4 +.Lkey_loop: +.byte 102,15,56,0,244 + + movdqa %xmm0,%xmm8 + movdqa %xmm1,%xmm9 + + pand %xmm6,%xmm8 + pand %xmm6,%xmm9 + movdqa %xmm2,%xmm10 + pcmpeqb %xmm0,%xmm8 + psllq $4,%xmm0 + movdqa %xmm3,%xmm11 + pcmpeqb %xmm1,%xmm9 + psllq $4,%xmm1 + + pand %xmm6,%xmm10 + pand %xmm6,%xmm11 + movdqa %xmm0,%xmm12 + pcmpeqb %xmm2,%xmm10 + psllq $4,%xmm2 + movdqa %xmm1,%xmm13 + pcmpeqb %xmm3,%xmm11 + psllq $4,%xmm3 + + movdqa %xmm2,%xmm14 + movdqa %xmm3,%xmm15 + pxor %xmm5,%xmm8 + pxor %xmm5,%xmm9 + + pand %xmm6,%xmm12 + pand %xmm6,%xmm13 + movdqa %xmm8,0(%rax) + pcmpeqb %xmm0,%xmm12 + psrlq $4,%xmm0 + movdqa %xmm9,16(%rax) + pcmpeqb %xmm1,%xmm13 + psrlq $4,%xmm1 + leaq 16(%rcx),%rcx + + pand %xmm6,%xmm14 + pand %xmm6,%xmm15 + movdqa %xmm10,32(%rax) + pcmpeqb %xmm2,%xmm14 + psrlq $4,%xmm2 + movdqa %xmm11,48(%rax) + pcmpeqb %xmm3,%xmm15 + psrlq $4,%xmm3 + movdqu (%rcx),%xmm6 + + pxor %xmm5,%xmm13 + pxor %xmm5,%xmm14 + movdqa %xmm12,64(%rax) + movdqa %xmm13,80(%rax) + movdqa %xmm14,96(%rax) + movdqa %xmm15,112(%rax) + leaq 128(%rax),%rax + decl %r10d + jnz .Lkey_loop + + movdqa 80(%r11),%xmm7 + + retq + + +.globl bsaes_cbc_encrypt +.def bsaes_cbc_encrypt; .scl 2; .type 32; .endef +.p2align 4 +bsaes_cbc_encrypt: + movl 48(%rsp),%r11d + cmpl $0,%r11d + jne asm_AES_cbc_encrypt + cmpq $128,%r8 + jb asm_AES_cbc_encrypt + + movq %rsp,%rax +.Lcbc_dec_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq 160(%rsp),%r10 + leaq -160(%rsp),%rsp + movaps %xmm6,64(%rsp) + movaps %xmm7,80(%rsp) + movaps %xmm8,96(%rsp) + movaps %xmm9,112(%rsp) + movaps %xmm10,128(%rsp) + movaps %xmm11,144(%rsp) + movaps %xmm12,160(%rsp) + movaps %xmm13,176(%rsp) + movaps %xmm14,192(%rsp) + movaps %xmm15,208(%rsp) +.Lcbc_dec_body: + movq %rsp,%rbp + movl 240(%r9),%eax + movq %rcx,%r12 + movq %rdx,%r13 + movq %r8,%r14 + movq %r9,%r15 + movq %r10,%rbx + shrq $4,%r14 + + movl %eax,%edx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %edx,%r10d + call _bsaes_key_convert + pxor (%rsp),%xmm7 + movdqa %xmm6,(%rax) + movdqa %xmm7,(%rsp) + + movdqu (%rbx),%xmm14 + subq $8,%r14 +.Lcbc_dec_loop: + movdqu 0(%r12),%xmm15 + movdqu 16(%r12),%xmm0 + movdqu 32(%r12),%xmm1 + movdqu 48(%r12),%xmm2 + movdqu 64(%r12),%xmm3 + movdqu 80(%r12),%xmm4 + movq %rsp,%rax + movdqu 96(%r12),%xmm5 + movl %edx,%r10d + movdqu 112(%r12),%xmm6 + movdqa %xmm14,32(%rbp) + + call _bsaes_decrypt8 + + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm1 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm6 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm2 + movdqu 112(%r12),%xmm14 + pxor %xmm13,%xmm4 + movdqu %xmm15,0(%r13) + leaq 128(%r12),%r12 + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + movdqu %xmm2,96(%r13) + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + subq $8,%r14 + jnc .Lcbc_dec_loop + + addq $8,%r14 + jz .Lcbc_dec_done + + movdqu 0(%r12),%xmm15 + movq %rsp,%rax + movl %edx,%r10d + cmpq $2,%r14 + jb .Lcbc_dec_one + movdqu 16(%r12),%xmm0 + je .Lcbc_dec_two + movdqu 32(%r12),%xmm1 + cmpq $4,%r14 + jb .Lcbc_dec_three + movdqu 48(%r12),%xmm2 + je .Lcbc_dec_four + movdqu 64(%r12),%xmm3 + cmpq $6,%r14 + jb .Lcbc_dec_five + movdqu 80(%r12),%xmm4 + je .Lcbc_dec_six + movdqu 96(%r12),%xmm5 + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm1 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm6 + movdqu 96(%r12),%xmm14 + pxor %xmm12,%xmm2 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + movdqu %xmm2,96(%r13) + jmp .Lcbc_dec_done +.p2align 4 +.Lcbc_dec_six: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm1 + movdqu 80(%r12),%xmm14 + pxor %xmm11,%xmm6 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + jmp .Lcbc_dec_done +.p2align 4 +.Lcbc_dec_five: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm14 + pxor %xmm10,%xmm1 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + jmp .Lcbc_dec_done +.p2align 4 +.Lcbc_dec_four: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm14 + pxor %xmm9,%xmm3 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + jmp .Lcbc_dec_done +.p2align 4 +.Lcbc_dec_three: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm14 + pxor %xmm8,%xmm5 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + jmp .Lcbc_dec_done +.p2align 4 +.Lcbc_dec_two: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm14 + pxor %xmm7,%xmm0 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + jmp .Lcbc_dec_done +.p2align 4 +.Lcbc_dec_one: + leaq (%r12),%rcx + leaq 32(%rbp),%rdx + leaq (%r15),%r8 + call asm_AES_decrypt + pxor 32(%rbp),%xmm14 + movdqu %xmm14,(%r13) + movdqa %xmm15,%xmm14 + +.Lcbc_dec_done: + movdqu %xmm14,(%rbx) + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +.Lcbc_dec_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja .Lcbc_dec_bzero + + leaq (%rbp),%rsp + movaps 64(%rbp),%xmm6 + movaps 80(%rbp),%xmm7 + movaps 96(%rbp),%xmm8 + movaps 112(%rbp),%xmm9 + movaps 128(%rbp),%xmm10 + movaps 144(%rbp),%xmm11 + movaps 160(%rbp),%xmm12 + movaps 176(%rbp),%xmm13 + movaps 192(%rbp),%xmm14 + movaps 208(%rbp),%xmm15 + leaq 160(%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +.Lcbc_dec_epilogue: + retq + + +.globl bsaes_ctr32_encrypt_blocks +.def bsaes_ctr32_encrypt_blocks; .scl 2; .type 32; .endef +.p2align 4 +bsaes_ctr32_encrypt_blocks: + movq %rsp,%rax +.Lctr_enc_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq 160(%rsp),%r10 + leaq -160(%rsp),%rsp + movaps %xmm6,64(%rsp) + movaps %xmm7,80(%rsp) + movaps %xmm8,96(%rsp) + movaps %xmm9,112(%rsp) + movaps %xmm10,128(%rsp) + movaps %xmm11,144(%rsp) + movaps %xmm12,160(%rsp) + movaps %xmm13,176(%rsp) + movaps %xmm14,192(%rsp) + movaps %xmm15,208(%rsp) +.Lctr_enc_body: + movq %rsp,%rbp + movdqu (%r10),%xmm0 + movl 240(%r9),%eax + movq %rcx,%r12 + movq %rdx,%r13 + movq %r8,%r14 + movq %r9,%r15 + movdqa %xmm0,32(%rbp) + cmpq $8,%r8 + jb .Lctr_enc_short + + movl %eax,%ebx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %ebx,%r10d + call _bsaes_key_convert + pxor %xmm6,%xmm7 + movdqa %xmm7,(%rax) + + movdqa (%rsp),%xmm8 + leaq .LADD1(%rip),%r11 + movdqa 32(%rbp),%xmm15 + movdqa -32(%r11),%xmm7 +.byte 102,68,15,56,0,199 +.byte 102,68,15,56,0,255 + movdqa %xmm8,(%rsp) + jmp .Lctr_enc_loop +.p2align 4 +.Lctr_enc_loop: + movdqa %xmm15,32(%rbp) + movdqa %xmm15,%xmm0 + movdqa %xmm15,%xmm1 + paddd 0(%r11),%xmm0 + movdqa %xmm15,%xmm2 + paddd 16(%r11),%xmm1 + movdqa %xmm15,%xmm3 + paddd 32(%r11),%xmm2 + movdqa %xmm15,%xmm4 + paddd 48(%r11),%xmm3 + movdqa %xmm15,%xmm5 + paddd 64(%r11),%xmm4 + movdqa %xmm15,%xmm6 + paddd 80(%r11),%xmm5 + paddd 96(%r11),%xmm6 + + + + movdqa (%rsp),%xmm8 + leaq 16(%rsp),%rax + movdqa -16(%r11),%xmm7 + pxor %xmm8,%xmm15 + pxor %xmm8,%xmm0 +.byte 102,68,15,56,0,255 + pxor %xmm8,%xmm1 +.byte 102,15,56,0,199 + pxor %xmm8,%xmm2 +.byte 102,15,56,0,207 + pxor %xmm8,%xmm3 +.byte 102,15,56,0,215 + pxor %xmm8,%xmm4 +.byte 102,15,56,0,223 + pxor %xmm8,%xmm5 +.byte 102,15,56,0,231 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,239 + leaq .LBS0(%rip),%r11 +.byte 102,15,56,0,247 + movl %ebx,%r10d + + call _bsaes_encrypt8_bitslice + + subq $8,%r14 + jc .Lctr_enc_loop_done + + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + movdqu 32(%r12),%xmm9 + movdqu 48(%r12),%xmm10 + movdqu 64(%r12),%xmm11 + movdqu 80(%r12),%xmm12 + movdqu 96(%r12),%xmm13 + movdqu 112(%r12),%xmm14 + leaq 128(%r12),%r12 + pxor %xmm15,%xmm7 + movdqa 32(%rbp),%xmm15 + pxor %xmm8,%xmm0 + movdqu %xmm7,0(%r13) + pxor %xmm9,%xmm3 + movdqu %xmm0,16(%r13) + pxor %xmm10,%xmm5 + movdqu %xmm3,32(%r13) + pxor %xmm11,%xmm2 + movdqu %xmm5,48(%r13) + pxor %xmm12,%xmm6 + movdqu %xmm2,64(%r13) + pxor %xmm13,%xmm1 + movdqu %xmm6,80(%r13) + pxor %xmm14,%xmm4 + movdqu %xmm1,96(%r13) + leaq .LADD1(%rip),%r11 + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + paddd 112(%r11),%xmm15 + jnz .Lctr_enc_loop + + jmp .Lctr_enc_done +.p2align 4 +.Lctr_enc_loop_done: + addq $8,%r14 + movdqu 0(%r12),%xmm7 + pxor %xmm7,%xmm15 + movdqu %xmm15,0(%r13) + cmpq $2,%r14 + jb .Lctr_enc_done + movdqu 16(%r12),%xmm8 + pxor %xmm8,%xmm0 + movdqu %xmm0,16(%r13) + je .Lctr_enc_done + movdqu 32(%r12),%xmm9 + pxor %xmm9,%xmm3 + movdqu %xmm3,32(%r13) + cmpq $4,%r14 + jb .Lctr_enc_done + movdqu 48(%r12),%xmm10 + pxor %xmm10,%xmm5 + movdqu %xmm5,48(%r13) + je .Lctr_enc_done + movdqu 64(%r12),%xmm11 + pxor %xmm11,%xmm2 + movdqu %xmm2,64(%r13) + cmpq $6,%r14 + jb .Lctr_enc_done + movdqu 80(%r12),%xmm12 + pxor %xmm12,%xmm6 + movdqu %xmm6,80(%r13) + je .Lctr_enc_done + movdqu 96(%r12),%xmm13 + pxor %xmm13,%xmm1 + movdqu %xmm1,96(%r13) + jmp .Lctr_enc_done + +.p2align 4 +.Lctr_enc_short: + leaq 32(%rbp),%rcx + leaq 48(%rbp),%rdx + leaq (%r15),%r8 + call asm_AES_encrypt + movdqu (%r12),%xmm0 + leaq 16(%r12),%r12 + movl 44(%rbp),%eax + bswapl %eax + pxor 48(%rbp),%xmm0 + incl %eax + movdqu %xmm0,(%r13) + bswapl %eax + leaq 16(%r13),%r13 + movl %eax,44(%rsp) + decq %r14 + jnz .Lctr_enc_short + +.Lctr_enc_done: + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +.Lctr_enc_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja .Lctr_enc_bzero + + leaq (%rbp),%rsp + movaps 64(%rbp),%xmm6 + movaps 80(%rbp),%xmm7 + movaps 96(%rbp),%xmm8 + movaps 112(%rbp),%xmm9 + movaps 128(%rbp),%xmm10 + movaps 144(%rbp),%xmm11 + movaps 160(%rbp),%xmm12 + movaps 176(%rbp),%xmm13 + movaps 192(%rbp),%xmm14 + movaps 208(%rbp),%xmm15 + leaq 160(%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +.Lctr_enc_epilogue: + retq + +.globl bsaes_xts_encrypt +.def bsaes_xts_encrypt; .scl 2; .type 32; .endef +.p2align 4 +bsaes_xts_encrypt: + movq %rsp,%rax +.Lxts_enc_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq 160(%rsp),%r10 + movq 168(%rsp),%r11 + leaq -160(%rsp),%rsp + movaps %xmm6,64(%rsp) + movaps %xmm7,80(%rsp) + movaps %xmm8,96(%rsp) + movaps %xmm9,112(%rsp) + movaps %xmm10,128(%rsp) + movaps %xmm11,144(%rsp) + movaps %xmm12,160(%rsp) + movaps %xmm13,176(%rsp) + movaps %xmm14,192(%rsp) + movaps %xmm15,208(%rsp) +.Lxts_enc_body: + movq %rsp,%rbp + movq %rcx,%r12 + movq %rdx,%r13 + movq %r8,%r14 + movq %r9,%r15 + + leaq (%r11),%rcx + leaq 32(%rbp),%rdx + leaq (%r10),%r8 + call asm_AES_encrypt + + movl 240(%r15),%eax + movq %r14,%rbx + + movl %eax,%edx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %edx,%r10d + call _bsaes_key_convert + pxor %xmm6,%xmm7 + movdqa %xmm7,(%rax) + + andq $-16,%r14 + subq $128,%rsp + movdqa 32(%rbp),%xmm6 + + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + + subq $128,%r14 + jc .Lxts_enc_short + jmp .Lxts_enc_loop + +.p2align 4 +.Lxts_enc_loop: + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqu 112(%r12),%xmm14 + leaq 128(%r12),%r12 + movdqa %xmm6,112(%rsp) + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + pxor %xmm14,%xmm6 + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm2,64(%r13) + pxor 96(%rsp),%xmm1 + movdqu %xmm6,80(%r13) + pxor 112(%rsp),%xmm4 + movdqu %xmm1,96(%r13) + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + + subq $128,%r14 + jnc .Lxts_enc_loop + +.Lxts_enc_short: + addq $128,%r14 + jz .Lxts_enc_done + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + cmpq $16,%r14 + je .Lxts_enc_1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + cmpq $32,%r14 + je .Lxts_enc_2 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + cmpq $48,%r14 + je .Lxts_enc_3 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + cmpq $64,%r14 + je .Lxts_enc_4 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + cmpq $80,%r14 + je .Lxts_enc_5 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + cmpq $96,%r14 + je .Lxts_enc_6 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqa %xmm6,112(%rsp) + leaq 112(%r12),%r12 + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm2,64(%r13) + pxor 96(%rsp),%xmm1 + movdqu %xmm6,80(%r13) + movdqu %xmm1,96(%r13) + leaq 112(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + jmp .Lxts_enc_done +.p2align 4 +.Lxts_enc_6: + pxor %xmm11,%xmm3 + leaq 96(%r12),%r12 + pxor %xmm12,%xmm4 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm2,64(%r13) + movdqu %xmm6,80(%r13) + leaq 96(%r13),%r13 + + movdqa 96(%rsp),%xmm6 + jmp .Lxts_enc_done +.p2align 4 +.Lxts_enc_5: + pxor %xmm10,%xmm2 + leaq 80(%r12),%r12 + pxor %xmm11,%xmm3 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + movdqu %xmm2,64(%r13) + leaq 80(%r13),%r13 + + movdqa 80(%rsp),%xmm6 + jmp .Lxts_enc_done +.p2align 4 +.Lxts_enc_4: + pxor %xmm9,%xmm1 + leaq 64(%r12),%r12 + pxor %xmm10,%xmm2 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + movdqu %xmm5,48(%r13) + leaq 64(%r13),%r13 + + movdqa 64(%rsp),%xmm6 + jmp .Lxts_enc_done +.p2align 4 +.Lxts_enc_3: + pxor %xmm8,%xmm0 + leaq 48(%r12),%r12 + pxor %xmm9,%xmm1 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + movdqu %xmm3,32(%r13) + leaq 48(%r13),%r13 + + movdqa 48(%rsp),%xmm6 + jmp .Lxts_enc_done +.p2align 4 +.Lxts_enc_2: + pxor %xmm7,%xmm15 + leaq 32(%r12),%r12 + pxor %xmm8,%xmm0 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + leaq 32(%r13),%r13 + + movdqa 32(%rsp),%xmm6 + jmp .Lxts_enc_done +.p2align 4 +.Lxts_enc_1: + pxor %xmm15,%xmm7 + leaq 16(%r12),%r12 + movdqa %xmm7,32(%rbp) + leaq 32(%rbp),%rcx + leaq 32(%rbp),%rdx + leaq (%r15),%r8 + call asm_AES_encrypt + pxor 32(%rbp),%xmm15 + + + + + + movdqu %xmm15,0(%r13) + leaq 16(%r13),%r13 + + movdqa 16(%rsp),%xmm6 + +.Lxts_enc_done: + andl $15,%ebx + jz .Lxts_enc_ret + movq %r13,%rdx + +.Lxts_enc_steal: + movzbl (%r12),%eax + movzbl -16(%rdx),%ecx + leaq 1(%r12),%r12 + movb %al,-16(%rdx) + movb %cl,0(%rdx) + leaq 1(%rdx),%rdx + subl $1,%ebx + jnz .Lxts_enc_steal + + movdqu -16(%r13),%xmm15 + leaq 32(%rbp),%rcx + pxor %xmm6,%xmm15 + leaq 32(%rbp),%rdx + movdqa %xmm15,32(%rbp) + leaq (%r15),%r8 + call asm_AES_encrypt + pxor 32(%rbp),%xmm6 + movdqu %xmm6,-16(%r13) + +.Lxts_enc_ret: + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +.Lxts_enc_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja .Lxts_enc_bzero + + leaq (%rbp),%rsp + movaps 64(%rbp),%xmm6 + movaps 80(%rbp),%xmm7 + movaps 96(%rbp),%xmm8 + movaps 112(%rbp),%xmm9 + movaps 128(%rbp),%xmm10 + movaps 144(%rbp),%xmm11 + movaps 160(%rbp),%xmm12 + movaps 176(%rbp),%xmm13 + movaps 192(%rbp),%xmm14 + movaps 208(%rbp),%xmm15 + leaq 160(%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +.Lxts_enc_epilogue: + retq + + +.globl bsaes_xts_decrypt +.def bsaes_xts_decrypt; .scl 2; .type 32; .endef +.p2align 4 +bsaes_xts_decrypt: + movq %rsp,%rax +.Lxts_dec_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq 160(%rsp),%r10 + movq 168(%rsp),%r11 + leaq -160(%rsp),%rsp + movaps %xmm6,64(%rsp) + movaps %xmm7,80(%rsp) + movaps %xmm8,96(%rsp) + movaps %xmm9,112(%rsp) + movaps %xmm10,128(%rsp) + movaps %xmm11,144(%rsp) + movaps %xmm12,160(%rsp) + movaps %xmm13,176(%rsp) + movaps %xmm14,192(%rsp) + movaps %xmm15,208(%rsp) +.Lxts_dec_body: + movq %rsp,%rbp + movq %rcx,%r12 + movq %rdx,%r13 + movq %r8,%r14 + movq %r9,%r15 + + leaq (%r11),%rcx + leaq 32(%rbp),%rdx + leaq (%r10),%r8 + call asm_AES_encrypt + + movl 240(%r15),%eax + movq %r14,%rbx + + movl %eax,%edx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %edx,%r10d + call _bsaes_key_convert + pxor (%rsp),%xmm7 + movdqa %xmm6,(%rax) + movdqa %xmm7,(%rsp) + + xorl %eax,%eax + andq $-16,%r14 + testl $15,%ebx + setnz %al + shlq $4,%rax + subq %rax,%r14 + + subq $128,%rsp + movdqa 32(%rbp),%xmm6 + + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + + subq $128,%r14 + jc .Lxts_dec_short + jmp .Lxts_dec_loop + +.p2align 4 +.Lxts_dec_loop: + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqu 112(%r12),%xmm14 + leaq 128(%r12),%r12 + movdqa %xmm6,112(%rsp) + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + pxor %xmm14,%xmm6 + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm1,64(%r13) + pxor 96(%rsp),%xmm2 + movdqu %xmm6,80(%r13) + pxor 112(%rsp),%xmm4 + movdqu %xmm2,96(%r13) + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + + subq $128,%r14 + jnc .Lxts_dec_loop + +.Lxts_dec_short: + addq $128,%r14 + jz .Lxts_dec_done + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + cmpq $16,%r14 + je .Lxts_dec_1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + cmpq $32,%r14 + je .Lxts_dec_2 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + cmpq $48,%r14 + je .Lxts_dec_3 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + cmpq $64,%r14 + je .Lxts_dec_4 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + cmpq $80,%r14 + je .Lxts_dec_5 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + cmpq $96,%r14 + je .Lxts_dec_6 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqa %xmm6,112(%rsp) + leaq 112(%r12),%r12 + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm1,64(%r13) + pxor 96(%rsp),%xmm2 + movdqu %xmm6,80(%r13) + movdqu %xmm2,96(%r13) + leaq 112(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + jmp .Lxts_dec_done +.p2align 4 +.Lxts_dec_6: + pxor %xmm11,%xmm3 + leaq 96(%r12),%r12 + pxor %xmm12,%xmm4 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + leaq 96(%r13),%r13 + + movdqa 96(%rsp),%xmm6 + jmp .Lxts_dec_done +.p2align 4 +.Lxts_dec_5: + pxor %xmm10,%xmm2 + leaq 80(%r12),%r12 + pxor %xmm11,%xmm3 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + leaq 80(%r13),%r13 + + movdqa 80(%rsp),%xmm6 + jmp .Lxts_dec_done +.p2align 4 +.Lxts_dec_4: + pxor %xmm9,%xmm1 + leaq 64(%r12),%r12 + pxor %xmm10,%xmm2 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + leaq 64(%r13),%r13 + + movdqa 64(%rsp),%xmm6 + jmp .Lxts_dec_done +.p2align 4 +.Lxts_dec_3: + pxor %xmm8,%xmm0 + leaq 48(%r12),%r12 + pxor %xmm9,%xmm1 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + leaq 48(%r13),%r13 + + movdqa 48(%rsp),%xmm6 + jmp .Lxts_dec_done +.p2align 4 +.Lxts_dec_2: + pxor %xmm7,%xmm15 + leaq 32(%r12),%r12 + pxor %xmm8,%xmm0 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + leaq 32(%r13),%r13 + + movdqa 32(%rsp),%xmm6 + jmp .Lxts_dec_done +.p2align 4 +.Lxts_dec_1: + pxor %xmm15,%xmm7 + leaq 16(%r12),%r12 + movdqa %xmm7,32(%rbp) + leaq 32(%rbp),%rcx + leaq 32(%rbp),%rdx + leaq (%r15),%r8 + call asm_AES_decrypt + pxor 32(%rbp),%xmm15 + + + + + + movdqu %xmm15,0(%r13) + leaq 16(%r13),%r13 + + movdqa 16(%rsp),%xmm6 + +.Lxts_dec_done: + andl $15,%ebx + jz .Lxts_dec_ret + + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + pshufd $19,%xmm14,%xmm13 + movdqa %xmm6,%xmm5 + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + movdqu (%r12),%xmm15 + pxor %xmm13,%xmm6 + + leaq 32(%rbp),%rcx + pxor %xmm6,%xmm15 + leaq 32(%rbp),%rdx + movdqa %xmm15,32(%rbp) + leaq (%r15),%r8 + call asm_AES_decrypt + pxor 32(%rbp),%xmm6 + movq %r13,%rdx + movdqu %xmm6,(%r13) + +.Lxts_dec_steal: + movzbl 16(%r12),%eax + movzbl (%rdx),%ecx + leaq 1(%r12),%r12 + movb %al,(%rdx) + movb %cl,16(%rdx) + leaq 1(%rdx),%rdx + subl $1,%ebx + jnz .Lxts_dec_steal + + movdqu (%r13),%xmm15 + leaq 32(%rbp),%rcx + pxor %xmm5,%xmm15 + leaq 32(%rbp),%rdx + movdqa %xmm15,32(%rbp) + leaq (%r15),%r8 + call asm_AES_decrypt + pxor 32(%rbp),%xmm5 + movdqu %xmm5,(%r13) + +.Lxts_dec_ret: + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +.Lxts_dec_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja .Lxts_dec_bzero + + leaq (%rbp),%rsp + movaps 64(%rbp),%xmm6 + movaps 80(%rbp),%xmm7 + movaps 96(%rbp),%xmm8 + movaps 112(%rbp),%xmm9 + movaps 128(%rbp),%xmm10 + movaps 144(%rbp),%xmm11 + movaps 160(%rbp),%xmm12 + movaps 176(%rbp),%xmm13 + movaps 192(%rbp),%xmm14 + movaps 208(%rbp),%xmm15 + leaq 160(%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +.Lxts_dec_epilogue: + retq + + +.p2align 6 +_bsaes_const: +.LM0ISR: +.quad 0x0a0e0206070b0f03, 0x0004080c0d010509 +.LISRM0: +.quad 0x01040b0e0205080f, 0x0306090c00070a0d +.LISR: +.quad 0x0504070602010003, 0x0f0e0d0c080b0a09 +.LBS0: +.quad 0x5555555555555555, 0x5555555555555555 +.LBS1: +.quad 0x3333333333333333, 0x3333333333333333 +.LBS2: +.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +.LSR: +.quad 0x0504070600030201, 0x0f0e0d0c0a09080b +.LSRM0: +.quad 0x0304090e00050a0f, 0x01060b0c0207080d +.LM0SR: +.quad 0x0a0e02060f03070b, 0x0004080c05090d01 +.LSWPUP: +.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908 +.LSWPUPM0SR: +.quad 0x0a0d02060c03070b, 0x0004080f05090e01 +.LADD1: +.quad 0x0000000000000000, 0x0000000100000000 +.LADD2: +.quad 0x0000000000000000, 0x0000000200000000 +.LADD3: +.quad 0x0000000000000000, 0x0000000300000000 +.LADD4: +.quad 0x0000000000000000, 0x0000000400000000 +.LADD5: +.quad 0x0000000000000000, 0x0000000500000000 +.LADD6: +.quad 0x0000000000000000, 0x0000000600000000 +.LADD7: +.quad 0x0000000000000000, 0x0000000700000000 +.LADD8: +.quad 0x0000000000000000, 0x0000000800000000 +.Lxts_magic: +.long 0x87,0,1,0 +.Lmasks: +.quad 0x0101010101010101, 0x0101010101010101 +.quad 0x0202020202020202, 0x0202020202020202 +.quad 0x0404040404040404, 0x0404040404040404 +.quad 0x0808080808080808, 0x0808080808080808 +.LM0: +.quad 0x02060a0e03070b0f, 0x0004080c0105090d +.L63: +.quad 0x6363636363636363, 0x6363636363636363 +.byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44,32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32,65,110,100,121,32,80,111,108,121,97,107,111,118,0 +.p2align 6 + + +.def se_handler; .scl 3; .type 32; .endef +.p2align 4 +se_handler: + pushq %rsi + pushq %rdi + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushfq + subq $64,%rsp + + movq 120(%r8),%rax + movq 248(%r8),%rbx + + movq 8(%r9),%rsi + movq 56(%r9),%r11 + + movl 0(%r11),%r10d + leaq (%rsi,%r10,1),%r10 + cmpq %r10,%rbx + jb .Lin_prologue + + movq 152(%r8),%rax + + movl 4(%r11),%r10d + leaq (%rsi,%r10,1),%r10 + cmpq %r10,%rbx + jae .Lin_prologue + + movq 160(%r8),%rax + + leaq 64(%rax),%rsi + leaq 512(%r8),%rdi + movl $20,%ecx +.long 0xa548f3fc + leaq 160(%rax),%rax + + movq 112(%rax),%rbp + movq 104(%rax),%rbx + movq 96(%rax),%r12 + movq 88(%rax),%r13 + movq 80(%rax),%r14 + movq 72(%rax),%r15 + leaq 120(%rax),%rax + movq %rbx,144(%r8) + movq %rbp,160(%r8) + movq %r12,216(%r8) + movq %r13,224(%r8) + movq %r14,232(%r8) + movq %r15,240(%r8) + +.Lin_prologue: + movq %rax,152(%r8) + + movq 40(%r9),%rdi + movq %r8,%rsi + movl $154,%ecx +.long 0xa548f3fc + + movq %r9,%rsi + xorq %rcx,%rcx + movq 8(%rsi),%rdx + movq 0(%rsi),%r8 + movq 16(%rsi),%r9 + movq 40(%rsi),%r10 + leaq 56(%rsi),%r11 + leaq 24(%rsi),%r12 + movq %r10,32(%rsp) + movq %r11,40(%rsp) + movq %r12,48(%rsp) + movq %rcx,56(%rsp) + call *__imp_RtlVirtualUnwind(%rip) + + movl $1,%eax + addq $64,%rsp + popfq + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + popq %rdi + popq %rsi + retq + + +.section .pdata +.p2align 2 +.rva .Lcbc_dec_prologue +.rva .Lcbc_dec_epilogue +.rva .Lcbc_dec_info + +.rva .Lctr_enc_prologue +.rva .Lctr_enc_epilogue +.rva .Lctr_enc_info + +.rva .Lxts_enc_prologue +.rva .Lxts_enc_epilogue +.rva .Lxts_enc_info + +.rva .Lxts_dec_prologue +.rva .Lxts_dec_epilogue +.rva .Lxts_dec_info + +.section .xdata +.p2align 3 +.Lcbc_dec_info: +.byte 9,0,0,0 +.rva se_handler +.rva .Lcbc_dec_body,.Lcbc_dec_epilogue +.Lctr_enc_info: +.byte 9,0,0,0 +.rva se_handler +.rva .Lctr_enc_body,.Lctr_enc_epilogue +.Lxts_enc_info: +.byte 9,0,0,0 +.rva se_handler +.rva .Lxts_enc_body,.Lxts_enc_epilogue +.Lxts_dec_info: +.byte 9,0,0,0 +.rva se_handler +.rva .Lxts_dec_body,.Lxts_dec_epilogue diff --git a/ext/libressl/crypto/aes/vpaes-elf-x86_64.S b/ext/libressl/crypto/aes/vpaes-elf-x86_64.S new file mode 100644 index 0000000..1e1a6e8 --- /dev/null +++ b/ext/libressl/crypto/aes/vpaes-elf-x86_64.S @@ -0,0 +1,832 @@ +#include "x86_arch.h" +.text + + + + + + + + + + + + + + + + +.type _vpaes_encrypt_core,@function +.align 16 +_vpaes_encrypt_core: + movq %rdx,%r9 + movq $16,%r11 + movl 240(%rdx),%eax + movdqa %xmm9,%xmm1 + movdqa .Lk_ipt(%rip),%xmm2 + pandn %xmm0,%xmm1 + movdqu (%r9),%xmm5 + psrld $4,%xmm1 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa .Lk_ipt+16(%rip),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm5,%xmm2 + pxor %xmm2,%xmm0 + addq $16,%r9 + leaq .Lk_mc_backward(%rip),%r10 + jmp .Lenc_entry + +.align 16 +.Lenc_loop: + + movdqa %xmm13,%xmm4 +.byte 102,15,56,0,226 + pxor %xmm5,%xmm4 + movdqa %xmm12,%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + movdqa %xmm15,%xmm5 +.byte 102,15,56,0,234 + movdqa -64(%r11,%r10,1),%xmm1 + movdqa %xmm14,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm5,%xmm2 + movdqa (%r11,%r10,1),%xmm4 + movdqa %xmm0,%xmm3 +.byte 102,15,56,0,193 + addq $16,%r9 + pxor %xmm2,%xmm0 +.byte 102,15,56,0,220 + addq $16,%r11 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,193 + andq $48,%r11 + pxor %xmm3,%xmm0 + subq $1,%rax + +.Lenc_entry: + + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa %xmm11,%xmm5 +.byte 102,15,56,0,232 + pxor %xmm1,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm5,%xmm3 + movdqa %xmm10,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm5,%xmm4 + movdqa %xmm10,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm10,%xmm3 + movdqu (%r9),%xmm5 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + jnz .Lenc_loop + + + movdqa -96(%r10),%xmm4 + movdqa -80(%r10),%xmm0 +.byte 102,15,56,0,226 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,195 + movdqa 64(%r11,%r10,1),%xmm1 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,193 + retq +.size _vpaes_encrypt_core,.-_vpaes_encrypt_core + + + + + + +.type _vpaes_decrypt_core,@function +.align 16 +_vpaes_decrypt_core: + movq %rdx,%r9 + movl 240(%rdx),%eax + movdqa %xmm9,%xmm1 + movdqa .Lk_dipt(%rip),%xmm2 + pandn %xmm0,%xmm1 + movq %rax,%r11 + psrld $4,%xmm1 + movdqu (%r9),%xmm5 + shlq $4,%r11 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa .Lk_dipt+16(%rip),%xmm0 + xorq $48,%r11 + leaq .Lk_dsbd(%rip),%r10 +.byte 102,15,56,0,193 + andq $48,%r11 + pxor %xmm5,%xmm2 + movdqa .Lk_mc_forward+48(%rip),%xmm5 + pxor %xmm2,%xmm0 + addq $16,%r9 + addq %r10,%r11 + jmp .Ldec_entry + +.align 16 +.Ldec_loop: + + + + movdqa -32(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa -16(%r10),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + addq $16,%r9 + +.byte 102,15,56,0,197 + movdqa 0(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 16(%r10),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + subq $1,%rax + +.byte 102,15,56,0,197 + movdqa 32(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 48(%r10),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + +.byte 102,15,56,0,197 + movdqa 64(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 80(%r10),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + +.byte 102,15,58,15,237,12 + +.Ldec_entry: + + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa %xmm11,%xmm2 +.byte 102,15,56,0,208 + pxor %xmm1,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + movdqa %xmm10,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm10,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + movdqu (%r9),%xmm0 + jnz .Ldec_loop + + + movdqa 96(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 112(%r10),%xmm0 + movdqa -352(%r11),%xmm2 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,194 + retq +.size _vpaes_decrypt_core,.-_vpaes_decrypt_core + + + + + + +.type _vpaes_schedule_core,@function +.align 16 +_vpaes_schedule_core: + + + + + + call _vpaes_preheat + movdqa .Lk_rcon(%rip),%xmm8 + movdqu (%rdi),%xmm0 + + + movdqa %xmm0,%xmm3 + leaq .Lk_ipt(%rip),%r11 + call _vpaes_schedule_transform + movdqa %xmm0,%xmm7 + + leaq .Lk_sr(%rip),%r10 + testq %rcx,%rcx + jnz .Lschedule_am_decrypting + + + movdqu %xmm0,(%rdx) + jmp .Lschedule_go + +.Lschedule_am_decrypting: + + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,217 + movdqu %xmm3,(%rdx) + xorq $48,%r8 + +.Lschedule_go: + cmpl $192,%esi + ja .Lschedule_256 + je .Lschedule_192 + + + + + + + + + + +.Lschedule_128: + movl $10,%esi + +.Loop_schedule_128: + call _vpaes_schedule_round + decq %rsi + jz .Lschedule_mangle_last + call _vpaes_schedule_mangle + jmp .Loop_schedule_128 + + + + + + + + + + + + + + + + +.align 16 +.Lschedule_192: + movdqu 8(%rdi),%xmm0 + call _vpaes_schedule_transform + movdqa %xmm0,%xmm6 + pxor %xmm4,%xmm4 + movhlps %xmm4,%xmm6 + movl $4,%esi + +.Loop_schedule_192: + call _vpaes_schedule_round +.byte 102,15,58,15,198,8 + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + call _vpaes_schedule_mangle + call _vpaes_schedule_round + decq %rsi + jz .Lschedule_mangle_last + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + jmp .Loop_schedule_192 + + + + + + + + + + + +.align 16 +.Lschedule_256: + movdqu 16(%rdi),%xmm0 + call _vpaes_schedule_transform + movl $7,%esi + +.Loop_schedule_256: + call _vpaes_schedule_mangle + movdqa %xmm0,%xmm6 + + + call _vpaes_schedule_round + decq %rsi + jz .Lschedule_mangle_last + call _vpaes_schedule_mangle + + + pshufd $255,%xmm0,%xmm0 + movdqa %xmm7,%xmm5 + movdqa %xmm6,%xmm7 + call _vpaes_schedule_low_round + movdqa %xmm5,%xmm7 + + jmp .Loop_schedule_256 + + + + + + + + + + + + +.align 16 +.Lschedule_mangle_last: + + leaq .Lk_deskew(%rip),%r11 + testq %rcx,%rcx + jnz .Lschedule_mangle_last_dec + + + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,193 + leaq .Lk_opt(%rip),%r11 + addq $32,%rdx + +.Lschedule_mangle_last_dec: + addq $-16,%rdx + pxor .Lk_s63(%rip),%xmm0 + call _vpaes_schedule_transform + movdqu %xmm0,(%rdx) + + + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + retq +.size _vpaes_schedule_core,.-_vpaes_schedule_core + + + + + + + + + + + + + + + +.type _vpaes_schedule_192_smear,@function +.align 16 +_vpaes_schedule_192_smear: + pshufd $128,%xmm6,%xmm0 + pxor %xmm0,%xmm6 + pshufd $254,%xmm7,%xmm0 + pxor %xmm0,%xmm6 + movdqa %xmm6,%xmm0 + pxor %xmm1,%xmm1 + movhlps %xmm1,%xmm6 + retq +.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear + + + + + + + + + + + + + + + + + + + +.type _vpaes_schedule_round,@function +.align 16 +_vpaes_schedule_round: + + pxor %xmm1,%xmm1 +.byte 102,65,15,58,15,200,15 +.byte 102,69,15,58,15,192,15 + pxor %xmm1,%xmm7 + + + pshufd $255,%xmm0,%xmm0 +.byte 102,15,58,15,192,1 + + + + +_vpaes_schedule_low_round: + + movdqa %xmm7,%xmm1 + pslldq $4,%xmm7 + pxor %xmm1,%xmm7 + movdqa %xmm7,%xmm1 + pslldq $8,%xmm7 + pxor %xmm1,%xmm7 + pxor .Lk_s63(%rip),%xmm7 + + + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa %xmm11,%xmm2 +.byte 102,15,56,0,208 + pxor %xmm1,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + movdqa %xmm10,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm10,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + movdqa %xmm13,%xmm4 +.byte 102,15,56,0,226 + movdqa %xmm12,%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + + + pxor %xmm7,%xmm0 + movdqa %xmm0,%xmm7 + retq +.size _vpaes_schedule_round,.-_vpaes_schedule_round + + + + + + + + + + +.type _vpaes_schedule_transform,@function +.align 16 +_vpaes_schedule_transform: + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa (%r11),%xmm2 +.byte 102,15,56,0,208 + movdqa 16(%r11),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm2,%xmm0 + retq +.size _vpaes_schedule_transform,.-_vpaes_schedule_transform + + + + + + + + + + + + + + + + + + + + + + + + +.type _vpaes_schedule_mangle,@function +.align 16 +_vpaes_schedule_mangle: + movdqa %xmm0,%xmm4 + movdqa .Lk_mc_forward(%rip),%xmm5 + testq %rcx,%rcx + jnz .Lschedule_mangle_dec + + + addq $16,%rdx + pxor .Lk_s63(%rip),%xmm4 +.byte 102,15,56,0,229 + movdqa %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 + + jmp .Lschedule_mangle_both +.align 16 +.Lschedule_mangle_dec: + + leaq .Lk_dksd(%rip),%r11 + movdqa %xmm9,%xmm1 + pandn %xmm4,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm4 + + movdqa 0(%r11),%xmm2 +.byte 102,15,56,0,212 + movdqa 16(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 32(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 48(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 64(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 80(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 96(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 112(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + + addq $-16,%rdx + +.Lschedule_mangle_both: + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,217 + addq $-16,%r8 + andq $48,%r8 + movdqu %xmm3,(%rdx) + retq +.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle + + + + +.globl vpaes_set_encrypt_key +.type vpaes_set_encrypt_key,@function +.align 16 +vpaes_set_encrypt_key: + movl %esi,%eax + shrl $5,%eax + addl $5,%eax + movl %eax,240(%rdx) + + movl $0,%ecx + movl $48,%r8d + call _vpaes_schedule_core + xorl %eax,%eax + retq +.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key + +.globl vpaes_set_decrypt_key +.type vpaes_set_decrypt_key,@function +.align 16 +vpaes_set_decrypt_key: + movl %esi,%eax + shrl $5,%eax + addl $5,%eax + movl %eax,240(%rdx) + shll $4,%eax + leaq 16(%rdx,%rax,1),%rdx + + movl $1,%ecx + movl %esi,%r8d + shrl $1,%r8d + andl $32,%r8d + xorl $32,%r8d + call _vpaes_schedule_core + xorl %eax,%eax + retq +.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key + +.globl vpaes_encrypt +.type vpaes_encrypt,@function +.align 16 +vpaes_encrypt: + movdqu (%rdi),%xmm0 + call _vpaes_preheat + call _vpaes_encrypt_core + movdqu %xmm0,(%rsi) + retq +.size vpaes_encrypt,.-vpaes_encrypt + +.globl vpaes_decrypt +.type vpaes_decrypt,@function +.align 16 +vpaes_decrypt: + movdqu (%rdi),%xmm0 + call _vpaes_preheat + call _vpaes_decrypt_core + movdqu %xmm0,(%rsi) + retq +.size vpaes_decrypt,.-vpaes_decrypt +.globl vpaes_cbc_encrypt +.type vpaes_cbc_encrypt,@function +.align 16 +vpaes_cbc_encrypt: + xchgq %rcx,%rdx + subq $16,%rcx + jc .Lcbc_abort + movdqu (%r8),%xmm6 + subq %rdi,%rsi + call _vpaes_preheat + cmpl $0,%r9d + je .Lcbc_dec_loop + jmp .Lcbc_enc_loop +.align 16 +.Lcbc_enc_loop: + movdqu (%rdi),%xmm0 + pxor %xmm6,%xmm0 + call _vpaes_encrypt_core + movdqa %xmm0,%xmm6 + movdqu %xmm0,(%rsi,%rdi,1) + leaq 16(%rdi),%rdi + subq $16,%rcx + jnc .Lcbc_enc_loop + jmp .Lcbc_done +.align 16 +.Lcbc_dec_loop: + movdqu (%rdi),%xmm0 + movdqa %xmm0,%xmm7 + call _vpaes_decrypt_core + pxor %xmm6,%xmm0 + movdqa %xmm7,%xmm6 + movdqu %xmm0,(%rsi,%rdi,1) + leaq 16(%rdi),%rdi + subq $16,%rcx + jnc .Lcbc_dec_loop +.Lcbc_done: + movdqu %xmm6,(%r8) +.Lcbc_abort: + retq +.size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt + + + + + + +.type _vpaes_preheat,@function +.align 16 +_vpaes_preheat: + leaq .Lk_s0F(%rip),%r10 + movdqa -32(%r10),%xmm10 + movdqa -16(%r10),%xmm11 + movdqa 0(%r10),%xmm9 + movdqa 48(%r10),%xmm13 + movdqa 64(%r10),%xmm12 + movdqa 80(%r10),%xmm15 + movdqa 96(%r10),%xmm14 + retq +.size _vpaes_preheat,.-_vpaes_preheat + + + + + +.type _vpaes_consts,@object +.align 64 +_vpaes_consts: +.Lk_inv: +.quad 0x0E05060F0D080180, 0x040703090A0B0C02 +.quad 0x01040A060F0B0780, 0x030D0E0C02050809 + +.Lk_s0F: +.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F + +.Lk_ipt: +.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 +.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 + +.Lk_sb1: +.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 +.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF +.Lk_sb2: +.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD +.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A +.Lk_sbo: +.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 +.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA + +.Lk_mc_forward: +.quad 0x0407060500030201, 0x0C0F0E0D080B0A09 +.quad 0x080B0A0904070605, 0x000302010C0F0E0D +.quad 0x0C0F0E0D080B0A09, 0x0407060500030201 +.quad 0x000302010C0F0E0D, 0x080B0A0904070605 + +.Lk_mc_backward: +.quad 0x0605040702010003, 0x0E0D0C0F0A09080B +.quad 0x020100030E0D0C0F, 0x0A09080B06050407 +.quad 0x0E0D0C0F0A09080B, 0x0605040702010003 +.quad 0x0A09080B06050407, 0x020100030E0D0C0F + +.Lk_sr: +.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 +.quad 0x030E09040F0A0500, 0x0B06010C07020D08 +.quad 0x0F060D040B020900, 0x070E050C030A0108 +.quad 0x0B0E0104070A0D00, 0x0306090C0F020508 + +.Lk_rcon: +.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 + +.Lk_s63: +.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B + +.Lk_opt: +.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 +.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 + +.Lk_deskew: +.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A +.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 + + + + + +.Lk_dksd: +.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 +.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E +.Lk_dksb: +.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 +.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 +.Lk_dkse: +.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 +.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 +.Lk_dks9: +.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC +.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE + + + + + +.Lk_dipt: +.quad 0x0F505B040B545F00, 0x154A411E114E451A +.quad 0x86E383E660056500, 0x12771772F491F194 + +.Lk_dsb9: +.quad 0x851C03539A86D600, 0xCAD51F504F994CC9 +.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 +.Lk_dsbd: +.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 +.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 +.Lk_dsbb: +.quad 0xD022649296B44200, 0x602646F6B0F2D404 +.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B +.Lk_dsbe: +.quad 0x46F2929626D4D000, 0x2242600464B4F6B0 +.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 +.Lk_dsbo: +.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D +.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C +.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 +.align 64 +.size _vpaes_consts,.-_vpaes_consts +#if defined(HAVE_GNU_STACK) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/ext/libressl/crypto/aes/vpaes-macosx-x86_64.S b/ext/libressl/crypto/aes/vpaes-macosx-x86_64.S new file mode 100644 index 0000000..0a892a9 --- /dev/null +++ b/ext/libressl/crypto/aes/vpaes-macosx-x86_64.S @@ -0,0 +1,829 @@ +#include "x86_arch.h" +.text + + + + + + + + + + + + + + + + + +.p2align 4 +_vpaes_encrypt_core: + movq %rdx,%r9 + movq $16,%r11 + movl 240(%rdx),%eax + movdqa %xmm9,%xmm1 + movdqa L$k_ipt(%rip),%xmm2 + pandn %xmm0,%xmm1 + movdqu (%r9),%xmm5 + psrld $4,%xmm1 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa L$k_ipt+16(%rip),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm5,%xmm2 + pxor %xmm2,%xmm0 + addq $16,%r9 + leaq L$k_mc_backward(%rip),%r10 + jmp L$enc_entry + +.p2align 4 +L$enc_loop: + + movdqa %xmm13,%xmm4 +.byte 102,15,56,0,226 + pxor %xmm5,%xmm4 + movdqa %xmm12,%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + movdqa %xmm15,%xmm5 +.byte 102,15,56,0,234 + movdqa -64(%r11,%r10,1),%xmm1 + movdqa %xmm14,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm5,%xmm2 + movdqa (%r11,%r10,1),%xmm4 + movdqa %xmm0,%xmm3 +.byte 102,15,56,0,193 + addq $16,%r9 + pxor %xmm2,%xmm0 +.byte 102,15,56,0,220 + addq $16,%r11 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,193 + andq $48,%r11 + pxor %xmm3,%xmm0 + subq $1,%rax + +L$enc_entry: + + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa %xmm11,%xmm5 +.byte 102,15,56,0,232 + pxor %xmm1,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm5,%xmm3 + movdqa %xmm10,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm5,%xmm4 + movdqa %xmm10,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm10,%xmm3 + movdqu (%r9),%xmm5 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + jnz L$enc_loop + + + movdqa -96(%r10),%xmm4 + movdqa -80(%r10),%xmm0 +.byte 102,15,56,0,226 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,195 + movdqa 64(%r11,%r10,1),%xmm1 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,193 + retq + + + + + + + + +.p2align 4 +_vpaes_decrypt_core: + movq %rdx,%r9 + movl 240(%rdx),%eax + movdqa %xmm9,%xmm1 + movdqa L$k_dipt(%rip),%xmm2 + pandn %xmm0,%xmm1 + movq %rax,%r11 + psrld $4,%xmm1 + movdqu (%r9),%xmm5 + shlq $4,%r11 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa L$k_dipt+16(%rip),%xmm0 + xorq $48,%r11 + leaq L$k_dsbd(%rip),%r10 +.byte 102,15,56,0,193 + andq $48,%r11 + pxor %xmm5,%xmm2 + movdqa L$k_mc_forward+48(%rip),%xmm5 + pxor %xmm2,%xmm0 + addq $16,%r9 + addq %r10,%r11 + jmp L$dec_entry + +.p2align 4 +L$dec_loop: + + + + movdqa -32(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa -16(%r10),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + addq $16,%r9 + +.byte 102,15,56,0,197 + movdqa 0(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 16(%r10),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + subq $1,%rax + +.byte 102,15,56,0,197 + movdqa 32(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 48(%r10),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + +.byte 102,15,56,0,197 + movdqa 64(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 80(%r10),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + +.byte 102,15,58,15,237,12 + +L$dec_entry: + + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa %xmm11,%xmm2 +.byte 102,15,56,0,208 + pxor %xmm1,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + movdqa %xmm10,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm10,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + movdqu (%r9),%xmm0 + jnz L$dec_loop + + + movdqa 96(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 112(%r10),%xmm0 + movdqa -352(%r11),%xmm2 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,194 + retq + + + + + + + + +.p2align 4 +_vpaes_schedule_core: + + + + + + call _vpaes_preheat + movdqa L$k_rcon(%rip),%xmm8 + movdqu (%rdi),%xmm0 + + + movdqa %xmm0,%xmm3 + leaq L$k_ipt(%rip),%r11 + call _vpaes_schedule_transform + movdqa %xmm0,%xmm7 + + leaq L$k_sr(%rip),%r10 + testq %rcx,%rcx + jnz L$schedule_am_decrypting + + + movdqu %xmm0,(%rdx) + jmp L$schedule_go + +L$schedule_am_decrypting: + + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,217 + movdqu %xmm3,(%rdx) + xorq $48,%r8 + +L$schedule_go: + cmpl $192,%esi + ja L$schedule_256 + je L$schedule_192 + + + + + + + + + + +L$schedule_128: + movl $10,%esi + +L$oop_schedule_128: + call _vpaes_schedule_round + decq %rsi + jz L$schedule_mangle_last + call _vpaes_schedule_mangle + jmp L$oop_schedule_128 + + + + + + + + + + + + + + + + +.p2align 4 +L$schedule_192: + movdqu 8(%rdi),%xmm0 + call _vpaes_schedule_transform + movdqa %xmm0,%xmm6 + pxor %xmm4,%xmm4 + movhlps %xmm4,%xmm6 + movl $4,%esi + +L$oop_schedule_192: + call _vpaes_schedule_round +.byte 102,15,58,15,198,8 + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + call _vpaes_schedule_mangle + call _vpaes_schedule_round + decq %rsi + jz L$schedule_mangle_last + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + jmp L$oop_schedule_192 + + + + + + + + + + + +.p2align 4 +L$schedule_256: + movdqu 16(%rdi),%xmm0 + call _vpaes_schedule_transform + movl $7,%esi + +L$oop_schedule_256: + call _vpaes_schedule_mangle + movdqa %xmm0,%xmm6 + + + call _vpaes_schedule_round + decq %rsi + jz L$schedule_mangle_last + call _vpaes_schedule_mangle + + + pshufd $255,%xmm0,%xmm0 + movdqa %xmm7,%xmm5 + movdqa %xmm6,%xmm7 + call _vpaes_schedule_low_round + movdqa %xmm5,%xmm7 + + jmp L$oop_schedule_256 + + + + + + + + + + + + +.p2align 4 +L$schedule_mangle_last: + + leaq L$k_deskew(%rip),%r11 + testq %rcx,%rcx + jnz L$schedule_mangle_last_dec + + + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,193 + leaq L$k_opt(%rip),%r11 + addq $32,%rdx + +L$schedule_mangle_last_dec: + addq $-16,%rdx + pxor L$k_s63(%rip),%xmm0 + call _vpaes_schedule_transform + movdqu %xmm0,(%rdx) + + + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + retq + + + + + + + + + + + + + + + + + +.p2align 4 +_vpaes_schedule_192_smear: + pshufd $128,%xmm6,%xmm0 + pxor %xmm0,%xmm6 + pshufd $254,%xmm7,%xmm0 + pxor %xmm0,%xmm6 + movdqa %xmm6,%xmm0 + pxor %xmm1,%xmm1 + movhlps %xmm1,%xmm6 + retq + + + + + + + + + + + + + + + + + + + + + +.p2align 4 +_vpaes_schedule_round: + + pxor %xmm1,%xmm1 +.byte 102,65,15,58,15,200,15 +.byte 102,69,15,58,15,192,15 + pxor %xmm1,%xmm7 + + + pshufd $255,%xmm0,%xmm0 +.byte 102,15,58,15,192,1 + + + + +_vpaes_schedule_low_round: + + movdqa %xmm7,%xmm1 + pslldq $4,%xmm7 + pxor %xmm1,%xmm7 + movdqa %xmm7,%xmm1 + pslldq $8,%xmm7 + pxor %xmm1,%xmm7 + pxor L$k_s63(%rip),%xmm7 + + + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa %xmm11,%xmm2 +.byte 102,15,56,0,208 + pxor %xmm1,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + movdqa %xmm10,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm10,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + movdqa %xmm13,%xmm4 +.byte 102,15,56,0,226 + movdqa %xmm12,%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + + + pxor %xmm7,%xmm0 + movdqa %xmm0,%xmm7 + retq + + + + + + + + + + + + +.p2align 4 +_vpaes_schedule_transform: + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa (%r11),%xmm2 +.byte 102,15,56,0,208 + movdqa 16(%r11),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm2,%xmm0 + retq + + + + + + + + + + + + + + + + + + + + + + + + + + +.p2align 4 +_vpaes_schedule_mangle: + movdqa %xmm0,%xmm4 + movdqa L$k_mc_forward(%rip),%xmm5 + testq %rcx,%rcx + jnz L$schedule_mangle_dec + + + addq $16,%rdx + pxor L$k_s63(%rip),%xmm4 +.byte 102,15,56,0,229 + movdqa %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 + + jmp L$schedule_mangle_both +.p2align 4 +L$schedule_mangle_dec: + + leaq L$k_dksd(%rip),%r11 + movdqa %xmm9,%xmm1 + pandn %xmm4,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm4 + + movdqa 0(%r11),%xmm2 +.byte 102,15,56,0,212 + movdqa 16(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 32(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 48(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 64(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 80(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 96(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 112(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + + addq $-16,%rdx + +L$schedule_mangle_both: + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,217 + addq $-16,%r8 + andq $48,%r8 + movdqu %xmm3,(%rdx) + retq + + + + + +.globl _vpaes_set_encrypt_key + +.p2align 4 +_vpaes_set_encrypt_key: + movl %esi,%eax + shrl $5,%eax + addl $5,%eax + movl %eax,240(%rdx) + + movl $0,%ecx + movl $48,%r8d + call _vpaes_schedule_core + xorl %eax,%eax + retq + + +.globl _vpaes_set_decrypt_key + +.p2align 4 +_vpaes_set_decrypt_key: + movl %esi,%eax + shrl $5,%eax + addl $5,%eax + movl %eax,240(%rdx) + shll $4,%eax + leaq 16(%rdx,%rax,1),%rdx + + movl $1,%ecx + movl %esi,%r8d + shrl $1,%r8d + andl $32,%r8d + xorl $32,%r8d + call _vpaes_schedule_core + xorl %eax,%eax + retq + + +.globl _vpaes_encrypt + +.p2align 4 +_vpaes_encrypt: + movdqu (%rdi),%xmm0 + call _vpaes_preheat + call _vpaes_encrypt_core + movdqu %xmm0,(%rsi) + retq + + +.globl _vpaes_decrypt + +.p2align 4 +_vpaes_decrypt: + movdqu (%rdi),%xmm0 + call _vpaes_preheat + call _vpaes_decrypt_core + movdqu %xmm0,(%rsi) + retq + +.globl _vpaes_cbc_encrypt + +.p2align 4 +_vpaes_cbc_encrypt: + xchgq %rcx,%rdx + subq $16,%rcx + jc L$cbc_abort + movdqu (%r8),%xmm6 + subq %rdi,%rsi + call _vpaes_preheat + cmpl $0,%r9d + je L$cbc_dec_loop + jmp L$cbc_enc_loop +.p2align 4 +L$cbc_enc_loop: + movdqu (%rdi),%xmm0 + pxor %xmm6,%xmm0 + call _vpaes_encrypt_core + movdqa %xmm0,%xmm6 + movdqu %xmm0,(%rsi,%rdi,1) + leaq 16(%rdi),%rdi + subq $16,%rcx + jnc L$cbc_enc_loop + jmp L$cbc_done +.p2align 4 +L$cbc_dec_loop: + movdqu (%rdi),%xmm0 + movdqa %xmm0,%xmm7 + call _vpaes_decrypt_core + pxor %xmm6,%xmm0 + movdqa %xmm7,%xmm6 + movdqu %xmm0,(%rsi,%rdi,1) + leaq 16(%rdi),%rdi + subq $16,%rcx + jnc L$cbc_dec_loop +L$cbc_done: + movdqu %xmm6,(%r8) +L$cbc_abort: + retq + + + + + + + + +.p2align 4 +_vpaes_preheat: + leaq L$k_s0F(%rip),%r10 + movdqa -32(%r10),%xmm10 + movdqa -16(%r10),%xmm11 + movdqa 0(%r10),%xmm9 + movdqa 48(%r10),%xmm13 + movdqa 64(%r10),%xmm12 + movdqa 80(%r10),%xmm15 + movdqa 96(%r10),%xmm14 + retq + + + + + + + +.p2align 6 +_vpaes_consts: +L$k_inv: +.quad 0x0E05060F0D080180, 0x040703090A0B0C02 +.quad 0x01040A060F0B0780, 0x030D0E0C02050809 + +L$k_s0F: +.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F + +L$k_ipt: +.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 +.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 + +L$k_sb1: +.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 +.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF +L$k_sb2: +.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD +.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A +L$k_sbo: +.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 +.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA + +L$k_mc_forward: +.quad 0x0407060500030201, 0x0C0F0E0D080B0A09 +.quad 0x080B0A0904070605, 0x000302010C0F0E0D +.quad 0x0C0F0E0D080B0A09, 0x0407060500030201 +.quad 0x000302010C0F0E0D, 0x080B0A0904070605 + +L$k_mc_backward: +.quad 0x0605040702010003, 0x0E0D0C0F0A09080B +.quad 0x020100030E0D0C0F, 0x0A09080B06050407 +.quad 0x0E0D0C0F0A09080B, 0x0605040702010003 +.quad 0x0A09080B06050407, 0x020100030E0D0C0F + +L$k_sr: +.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 +.quad 0x030E09040F0A0500, 0x0B06010C07020D08 +.quad 0x0F060D040B020900, 0x070E050C030A0108 +.quad 0x0B0E0104070A0D00, 0x0306090C0F020508 + +L$k_rcon: +.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 + +L$k_s63: +.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B + +L$k_opt: +.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 +.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 + +L$k_deskew: +.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A +.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 + + + + + +L$k_dksd: +.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 +.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E +L$k_dksb: +.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 +.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 +L$k_dkse: +.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 +.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 +L$k_dks9: +.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC +.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE + + + + + +L$k_dipt: +.quad 0x0F505B040B545F00, 0x154A411E114E451A +.quad 0x86E383E660056500, 0x12771772F491F194 + +L$k_dsb9: +.quad 0x851C03539A86D600, 0xCAD51F504F994CC9 +.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 +L$k_dsbd: +.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 +.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 +L$k_dsbb: +.quad 0xD022649296B44200, 0x602646F6B0F2D404 +.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B +L$k_dsbe: +.quad 0x46F2929626D4D000, 0x2242600464B4F6B0 +.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 +L$k_dsbo: +.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D +.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C +.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 +.p2align 6 + diff --git a/ext/libressl/crypto/aes/vpaes-masm-x86_64.S b/ext/libressl/crypto/aes/vpaes-masm-x86_64.S new file mode 100644 index 0000000..e10d98d --- /dev/null +++ b/ext/libressl/crypto/aes/vpaes-masm-x86_64.S @@ -0,0 +1,1213 @@ +; 1 "crypto/aes/vpaes-masm-x86_64.S.tmp" +; 1 "" 1 +; 1 "" 3 +; 340 "" 3 +; 1 "" 1 +; 1 "" 2 +; 1 "crypto/aes/vpaes-masm-x86_64.S.tmp" 2 +OPTION DOTNAME + +; 1 "./crypto/x86_arch.h" 1 + + +; 16 "./crypto/x86_arch.h" + + + + + + + + + +; 40 "./crypto/x86_arch.h" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +; 3 "crypto/aes/vpaes-masm-x86_64.S.tmp" 2 +.text$ SEGMENT ALIGN(64) 'CODE' + + + + + + + + + + + + + + + + + +ALIGN 16 +_vpaes_encrypt_core PROC PRIVATE + mov r9,rdx + mov r11,16 + mov eax,DWORD PTR[240+rdx] + movdqa xmm1,xmm9 + movdqa xmm2,XMMWORD PTR[$L$k_ipt] + pandn xmm1,xmm0 + movdqu xmm5,XMMWORD PTR[r9] + psrld xmm1,4 + pand xmm0,xmm9 +DB 102,15,56,0,208 + movdqa xmm0,XMMWORD PTR[(($L$k_ipt+16))] +DB 102,15,56,0,193 + pxor xmm2,xmm5 + pxor xmm0,xmm2 + add r9,16 + lea r10,QWORD PTR[$L$k_mc_backward] + jmp $L$enc_entry + +ALIGN 16 +$L$enc_loop:: + + movdqa xmm4,xmm13 +DB 102,15,56,0,226 + pxor xmm4,xmm5 + movdqa xmm0,xmm12 +DB 102,15,56,0,195 + pxor xmm0,xmm4 + movdqa xmm5,xmm15 +DB 102,15,56,0,234 + movdqa xmm1,XMMWORD PTR[((-64))+r10*1+r11] + movdqa xmm2,xmm14 +DB 102,15,56,0,211 + pxor xmm2,xmm5 + movdqa xmm4,XMMWORD PTR[r10*1+r11] + movdqa xmm3,xmm0 +DB 102,15,56,0,193 + add r9,16 + pxor xmm0,xmm2 +DB 102,15,56,0,220 + add r11,16 + pxor xmm3,xmm0 +DB 102,15,56,0,193 + and r11,030h + pxor xmm0,xmm3 + sub rax,1 + +$L$enc_entry:: + + movdqa xmm1,xmm9 + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm9 + movdqa xmm5,xmm11 +DB 102,15,56,0,232 + pxor xmm0,xmm1 + movdqa xmm3,xmm10 +DB 102,15,56,0,217 + pxor xmm3,xmm5 + movdqa xmm4,xmm10 +DB 102,15,56,0,224 + pxor xmm4,xmm5 + movdqa xmm2,xmm10 +DB 102,15,56,0,211 + pxor xmm2,xmm0 + movdqa xmm3,xmm10 + movdqu xmm5,XMMWORD PTR[r9] +DB 102,15,56,0,220 + pxor xmm3,xmm1 + jnz $L$enc_loop + + + movdqa xmm4,XMMWORD PTR[((-96))+r10] + movdqa xmm0,XMMWORD PTR[((-80))+r10] +DB 102,15,56,0,226 + pxor xmm4,xmm5 +DB 102,15,56,0,195 + movdqa xmm1,XMMWORD PTR[64+r10*1+r11] + pxor xmm0,xmm4 +DB 102,15,56,0,193 + DB 0F3h,0C3h ;repret +_vpaes_encrypt_core ENDP + + + + + + + +ALIGN 16 +_vpaes_decrypt_core PROC PRIVATE + mov r9,rdx + mov eax,DWORD PTR[240+rdx] + movdqa xmm1,xmm9 + movdqa xmm2,XMMWORD PTR[$L$k_dipt] + pandn xmm1,xmm0 + mov r11,rax + psrld xmm1,4 + movdqu xmm5,XMMWORD PTR[r9] + shl r11,4 + pand xmm0,xmm9 +DB 102,15,56,0,208 + movdqa xmm0,XMMWORD PTR[(($L$k_dipt+16))] + xor r11,030h + lea r10,QWORD PTR[$L$k_dsbd] +DB 102,15,56,0,193 + and r11,030h + pxor xmm2,xmm5 + movdqa xmm5,XMMWORD PTR[(($L$k_mc_forward+48))] + pxor xmm0,xmm2 + add r9,16 + add r11,r10 + jmp $L$dec_entry + +ALIGN 16 +$L$dec_loop:: + + + + movdqa xmm4,XMMWORD PTR[((-32))+r10] +DB 102,15,56,0,226 + pxor xmm4,xmm0 + movdqa xmm0,XMMWORD PTR[((-16))+r10] +DB 102,15,56,0,195 + pxor xmm0,xmm4 + add r9,16 + +DB 102,15,56,0,197 + movdqa xmm4,XMMWORD PTR[r10] +DB 102,15,56,0,226 + pxor xmm4,xmm0 + movdqa xmm0,XMMWORD PTR[16+r10] +DB 102,15,56,0,195 + pxor xmm0,xmm4 + sub rax,1 + +DB 102,15,56,0,197 + movdqa xmm4,XMMWORD PTR[32+r10] +DB 102,15,56,0,226 + pxor xmm4,xmm0 + movdqa xmm0,XMMWORD PTR[48+r10] +DB 102,15,56,0,195 + pxor xmm0,xmm4 + +DB 102,15,56,0,197 + movdqa xmm4,XMMWORD PTR[64+r10] +DB 102,15,56,0,226 + pxor xmm4,xmm0 + movdqa xmm0,XMMWORD PTR[80+r10] +DB 102,15,56,0,195 + pxor xmm0,xmm4 + +DB 102,15,58,15,237,12 + +$L$dec_entry:: + + movdqa xmm1,xmm9 + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm9 + movdqa xmm2,xmm11 +DB 102,15,56,0,208 + pxor xmm0,xmm1 + movdqa xmm3,xmm10 +DB 102,15,56,0,217 + pxor xmm3,xmm2 + movdqa xmm4,xmm10 +DB 102,15,56,0,224 + pxor xmm4,xmm2 + movdqa xmm2,xmm10 +DB 102,15,56,0,211 + pxor xmm2,xmm0 + movdqa xmm3,xmm10 +DB 102,15,56,0,220 + pxor xmm3,xmm1 + movdqu xmm0,XMMWORD PTR[r9] + jnz $L$dec_loop + + + movdqa xmm4,XMMWORD PTR[96+r10] +DB 102,15,56,0,226 + pxor xmm4,xmm0 + movdqa xmm0,XMMWORD PTR[112+r10] + movdqa xmm2,XMMWORD PTR[((-352))+r11] +DB 102,15,56,0,195 + pxor xmm0,xmm4 +DB 102,15,56,0,194 + DB 0F3h,0C3h ;repret +_vpaes_decrypt_core ENDP + + + + + + + +ALIGN 16 +_vpaes_schedule_core PROC PRIVATE + + + + + + call _vpaes_preheat + movdqa xmm8,XMMWORD PTR[$L$k_rcon] + movdqu xmm0,XMMWORD PTR[rdi] + + + movdqa xmm3,xmm0 + lea r11,QWORD PTR[$L$k_ipt] + call _vpaes_schedule_transform + movdqa xmm7,xmm0 + + lea r10,QWORD PTR[$L$k_sr] + test rcx,rcx + jnz $L$schedule_am_decrypting + + + movdqu XMMWORD PTR[rdx],xmm0 + jmp $L$schedule_go + +$L$schedule_am_decrypting:: + + movdqa xmm1,XMMWORD PTR[r10*1+r8] +DB 102,15,56,0,217 + movdqu XMMWORD PTR[rdx],xmm3 + xor r8,030h + +$L$schedule_go:: + cmp esi,192 + ja $L$schedule_256 + je $L$schedule_192 + + + + + + + + + + +$L$schedule_128:: + mov esi,10 + +$L$oop_schedule_128:: + call _vpaes_schedule_round + dec rsi + jz $L$schedule_mangle_last + call _vpaes_schedule_mangle + jmp $L$oop_schedule_128 + + + + + + + + + + + + + + + + +ALIGN 16 +$L$schedule_192:: + movdqu xmm0,XMMWORD PTR[8+rdi] + call _vpaes_schedule_transform + movdqa xmm6,xmm0 + pxor xmm4,xmm4 + movhlps xmm6,xmm4 + mov esi,4 + +$L$oop_schedule_192:: + call _vpaes_schedule_round +DB 102,15,58,15,198,8 + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + call _vpaes_schedule_mangle + call _vpaes_schedule_round + dec rsi + jz $L$schedule_mangle_last + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + jmp $L$oop_schedule_192 + + + + + + + + + + + +ALIGN 16 +$L$schedule_256:: + movdqu xmm0,XMMWORD PTR[16+rdi] + call _vpaes_schedule_transform + mov esi,7 + +$L$oop_schedule_256:: + call _vpaes_schedule_mangle + movdqa xmm6,xmm0 + + + call _vpaes_schedule_round + dec rsi + jz $L$schedule_mangle_last + call _vpaes_schedule_mangle + + + pshufd xmm0,xmm0,0FFh + movdqa xmm5,xmm7 + movdqa xmm7,xmm6 + call _vpaes_schedule_low_round + movdqa xmm7,xmm5 + + jmp $L$oop_schedule_256 + + + + + + + + + + + + +ALIGN 16 +$L$schedule_mangle_last:: + + lea r11,QWORD PTR[$L$k_deskew] + test rcx,rcx + jnz $L$schedule_mangle_last_dec + + + movdqa xmm1,XMMWORD PTR[r10*1+r8] +DB 102,15,56,0,193 + lea r11,QWORD PTR[$L$k_opt] + add rdx,32 + +$L$schedule_mangle_last_dec:: + add rdx,-16 + pxor xmm0,XMMWORD PTR[$L$k_s63] + call _vpaes_schedule_transform + movdqu XMMWORD PTR[rdx],xmm0 + + + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pxor xmm7,xmm7 + DB 0F3h,0C3h ;repret +_vpaes_schedule_core ENDP + + + + + + + + + + + + + + + + +ALIGN 16 +_vpaes_schedule_192_smear PROC PRIVATE + pshufd xmm0,xmm6,080h + pxor xmm6,xmm0 + pshufd xmm0,xmm7,0FEh + pxor xmm6,xmm0 + movdqa xmm0,xmm6 + pxor xmm1,xmm1 + movhlps xmm6,xmm1 + DB 0F3h,0C3h ;repret +_vpaes_schedule_192_smear ENDP + + + + + + + + + + + + + + + + + + + + +ALIGN 16 +_vpaes_schedule_round PROC PRIVATE + + pxor xmm1,xmm1 +DB 102,65,15,58,15,200,15 +DB 102,69,15,58,15,192,15 + pxor xmm7,xmm1 + + + pshufd xmm0,xmm0,0FFh +DB 102,15,58,15,192,1 + + + + +_vpaes_schedule_low_round:: + + movdqa xmm1,xmm7 + pslldq xmm7,4 + pxor xmm7,xmm1 + movdqa xmm1,xmm7 + pslldq xmm7,8 + pxor xmm7,xmm1 + pxor xmm7,XMMWORD PTR[$L$k_s63] + + + movdqa xmm1,xmm9 + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm9 + movdqa xmm2,xmm11 +DB 102,15,56,0,208 + pxor xmm0,xmm1 + movdqa xmm3,xmm10 +DB 102,15,56,0,217 + pxor xmm3,xmm2 + movdqa xmm4,xmm10 +DB 102,15,56,0,224 + pxor xmm4,xmm2 + movdqa xmm2,xmm10 +DB 102,15,56,0,211 + pxor xmm2,xmm0 + movdqa xmm3,xmm10 +DB 102,15,56,0,220 + pxor xmm3,xmm1 + movdqa xmm4,xmm13 +DB 102,15,56,0,226 + movdqa xmm0,xmm12 +DB 102,15,56,0,195 + pxor xmm0,xmm4 + + + pxor xmm0,xmm7 + movdqa xmm7,xmm0 + DB 0F3h,0C3h ;repret +_vpaes_schedule_round ENDP + + + + + + + + + + + +ALIGN 16 +_vpaes_schedule_transform PROC PRIVATE + movdqa xmm1,xmm9 + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm9 + movdqa xmm2,XMMWORD PTR[r11] +DB 102,15,56,0,208 + movdqa xmm0,XMMWORD PTR[16+r11] +DB 102,15,56,0,193 + pxor xmm0,xmm2 + DB 0F3h,0C3h ;repret +_vpaes_schedule_transform ENDP + + + + + + + + + + + + + + + + + + + + + + + + + +ALIGN 16 +_vpaes_schedule_mangle PROC PRIVATE + movdqa xmm4,xmm0 + movdqa xmm5,XMMWORD PTR[$L$k_mc_forward] + test rcx,rcx + jnz $L$schedule_mangle_dec + + + add rdx,16 + pxor xmm4,XMMWORD PTR[$L$k_s63] +DB 102,15,56,0,229 + movdqa xmm3,xmm4 +DB 102,15,56,0,229 + pxor xmm3,xmm4 +DB 102,15,56,0,229 + pxor xmm3,xmm4 + + jmp $L$schedule_mangle_both +ALIGN 16 +$L$schedule_mangle_dec:: + + lea r11,QWORD PTR[$L$k_dksd] + movdqa xmm1,xmm9 + pandn xmm1,xmm4 + psrld xmm1,4 + pand xmm4,xmm9 + + movdqa xmm2,XMMWORD PTR[r11] +DB 102,15,56,0,212 + movdqa xmm3,XMMWORD PTR[16+r11] +DB 102,15,56,0,217 + pxor xmm3,xmm2 +DB 102,15,56,0,221 + + movdqa xmm2,XMMWORD PTR[32+r11] +DB 102,15,56,0,212 + pxor xmm2,xmm3 + movdqa xmm3,XMMWORD PTR[48+r11] +DB 102,15,56,0,217 + pxor xmm3,xmm2 +DB 102,15,56,0,221 + + movdqa xmm2,XMMWORD PTR[64+r11] +DB 102,15,56,0,212 + pxor xmm2,xmm3 + movdqa xmm3,XMMWORD PTR[80+r11] +DB 102,15,56,0,217 + pxor xmm3,xmm2 +DB 102,15,56,0,221 + + movdqa xmm2,XMMWORD PTR[96+r11] +DB 102,15,56,0,212 + pxor xmm2,xmm3 + movdqa xmm3,XMMWORD PTR[112+r11] +DB 102,15,56,0,217 + pxor xmm3,xmm2 + + add rdx,-16 + +$L$schedule_mangle_both:: + movdqa xmm1,XMMWORD PTR[r10*1+r8] +DB 102,15,56,0,217 + add r8,-16 + and r8,030h + movdqu XMMWORD PTR[rdx],xmm3 + DB 0F3h,0C3h ;repret +_vpaes_schedule_mangle ENDP + + + + +PUBLIC vpaes_set_encrypt_key + +ALIGN 16 +vpaes_set_encrypt_key PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_vpaes_set_encrypt_key:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + lea rsp,QWORD PTR[((-184))+rsp] + movaps XMMWORD PTR[16+rsp],xmm6 + movaps XMMWORD PTR[32+rsp],xmm7 + movaps XMMWORD PTR[48+rsp],xmm8 + movaps XMMWORD PTR[64+rsp],xmm9 + movaps XMMWORD PTR[80+rsp],xmm10 + movaps XMMWORD PTR[96+rsp],xmm11 + movaps XMMWORD PTR[112+rsp],xmm12 + movaps XMMWORD PTR[128+rsp],xmm13 + movaps XMMWORD PTR[144+rsp],xmm14 + movaps XMMWORD PTR[160+rsp],xmm15 +$L$enc_key_body:: + mov eax,esi + shr eax,5 + add eax,5 + mov DWORD PTR[240+rdx],eax + + mov ecx,0 + mov r8d,030h + call _vpaes_schedule_core + movaps xmm6,XMMWORD PTR[16+rsp] + movaps xmm7,XMMWORD PTR[32+rsp] + movaps xmm8,XMMWORD PTR[48+rsp] + movaps xmm9,XMMWORD PTR[64+rsp] + movaps xmm10,XMMWORD PTR[80+rsp] + movaps xmm11,XMMWORD PTR[96+rsp] + movaps xmm12,XMMWORD PTR[112+rsp] + movaps xmm13,XMMWORD PTR[128+rsp] + movaps xmm14,XMMWORD PTR[144+rsp] + movaps xmm15,XMMWORD PTR[160+rsp] + lea rsp,QWORD PTR[184+rsp] +$L$enc_key_epilogue:: + xor eax,eax + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_vpaes_set_encrypt_key:: +vpaes_set_encrypt_key ENDP + +PUBLIC vpaes_set_decrypt_key + +ALIGN 16 +vpaes_set_decrypt_key PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_vpaes_set_decrypt_key:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + lea rsp,QWORD PTR[((-184))+rsp] + movaps XMMWORD PTR[16+rsp],xmm6 + movaps XMMWORD PTR[32+rsp],xmm7 + movaps XMMWORD PTR[48+rsp],xmm8 + movaps XMMWORD PTR[64+rsp],xmm9 + movaps XMMWORD PTR[80+rsp],xmm10 + movaps XMMWORD PTR[96+rsp],xmm11 + movaps XMMWORD PTR[112+rsp],xmm12 + movaps XMMWORD PTR[128+rsp],xmm13 + movaps XMMWORD PTR[144+rsp],xmm14 + movaps XMMWORD PTR[160+rsp],xmm15 +$L$dec_key_body:: + mov eax,esi + shr eax,5 + add eax,5 + mov DWORD PTR[240+rdx],eax + shl eax,4 + lea rdx,QWORD PTR[16+rax*1+rdx] + + mov ecx,1 + mov r8d,esi + shr r8d,1 + and r8d,32 + xor r8d,32 + call _vpaes_schedule_core + movaps xmm6,XMMWORD PTR[16+rsp] + movaps xmm7,XMMWORD PTR[32+rsp] + movaps xmm8,XMMWORD PTR[48+rsp] + movaps xmm9,XMMWORD PTR[64+rsp] + movaps xmm10,XMMWORD PTR[80+rsp] + movaps xmm11,XMMWORD PTR[96+rsp] + movaps xmm12,XMMWORD PTR[112+rsp] + movaps xmm13,XMMWORD PTR[128+rsp] + movaps xmm14,XMMWORD PTR[144+rsp] + movaps xmm15,XMMWORD PTR[160+rsp] + lea rsp,QWORD PTR[184+rsp] +$L$dec_key_epilogue:: + xor eax,eax + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_vpaes_set_decrypt_key:: +vpaes_set_decrypt_key ENDP + +PUBLIC vpaes_encrypt + +ALIGN 16 +vpaes_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_vpaes_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + lea rsp,QWORD PTR[((-184))+rsp] + movaps XMMWORD PTR[16+rsp],xmm6 + movaps XMMWORD PTR[32+rsp],xmm7 + movaps XMMWORD PTR[48+rsp],xmm8 + movaps XMMWORD PTR[64+rsp],xmm9 + movaps XMMWORD PTR[80+rsp],xmm10 + movaps XMMWORD PTR[96+rsp],xmm11 + movaps XMMWORD PTR[112+rsp],xmm12 + movaps XMMWORD PTR[128+rsp],xmm13 + movaps XMMWORD PTR[144+rsp],xmm14 + movaps XMMWORD PTR[160+rsp],xmm15 +$L$enc_body:: + movdqu xmm0,XMMWORD PTR[rdi] + call _vpaes_preheat + call _vpaes_encrypt_core + movdqu XMMWORD PTR[rsi],xmm0 + movaps xmm6,XMMWORD PTR[16+rsp] + movaps xmm7,XMMWORD PTR[32+rsp] + movaps xmm8,XMMWORD PTR[48+rsp] + movaps xmm9,XMMWORD PTR[64+rsp] + movaps xmm10,XMMWORD PTR[80+rsp] + movaps xmm11,XMMWORD PTR[96+rsp] + movaps xmm12,XMMWORD PTR[112+rsp] + movaps xmm13,XMMWORD PTR[128+rsp] + movaps xmm14,XMMWORD PTR[144+rsp] + movaps xmm15,XMMWORD PTR[160+rsp] + lea rsp,QWORD PTR[184+rsp] +$L$enc_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_vpaes_encrypt:: +vpaes_encrypt ENDP + +PUBLIC vpaes_decrypt + +ALIGN 16 +vpaes_decrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_vpaes_decrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + lea rsp,QWORD PTR[((-184))+rsp] + movaps XMMWORD PTR[16+rsp],xmm6 + movaps XMMWORD PTR[32+rsp],xmm7 + movaps XMMWORD PTR[48+rsp],xmm8 + movaps XMMWORD PTR[64+rsp],xmm9 + movaps XMMWORD PTR[80+rsp],xmm10 + movaps XMMWORD PTR[96+rsp],xmm11 + movaps XMMWORD PTR[112+rsp],xmm12 + movaps XMMWORD PTR[128+rsp],xmm13 + movaps XMMWORD PTR[144+rsp],xmm14 + movaps XMMWORD PTR[160+rsp],xmm15 +$L$dec_body:: + movdqu xmm0,XMMWORD PTR[rdi] + call _vpaes_preheat + call _vpaes_decrypt_core + movdqu XMMWORD PTR[rsi],xmm0 + movaps xmm6,XMMWORD PTR[16+rsp] + movaps xmm7,XMMWORD PTR[32+rsp] + movaps xmm8,XMMWORD PTR[48+rsp] + movaps xmm9,XMMWORD PTR[64+rsp] + movaps xmm10,XMMWORD PTR[80+rsp] + movaps xmm11,XMMWORD PTR[96+rsp] + movaps xmm12,XMMWORD PTR[112+rsp] + movaps xmm13,XMMWORD PTR[128+rsp] + movaps xmm14,XMMWORD PTR[144+rsp] + movaps xmm15,XMMWORD PTR[160+rsp] + lea rsp,QWORD PTR[184+rsp] +$L$dec_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_vpaes_decrypt:: +vpaes_decrypt ENDP +PUBLIC vpaes_cbc_encrypt + +ALIGN 16 +vpaes_cbc_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_vpaes_cbc_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + xchg rdx,rcx + sub rcx,16 + jc $L$cbc_abort + lea rsp,QWORD PTR[((-184))+rsp] + movaps XMMWORD PTR[16+rsp],xmm6 + movaps XMMWORD PTR[32+rsp],xmm7 + movaps XMMWORD PTR[48+rsp],xmm8 + movaps XMMWORD PTR[64+rsp],xmm9 + movaps XMMWORD PTR[80+rsp],xmm10 + movaps XMMWORD PTR[96+rsp],xmm11 + movaps XMMWORD PTR[112+rsp],xmm12 + movaps XMMWORD PTR[128+rsp],xmm13 + movaps XMMWORD PTR[144+rsp],xmm14 + movaps XMMWORD PTR[160+rsp],xmm15 +$L$cbc_body:: + movdqu xmm6,XMMWORD PTR[r8] + sub rsi,rdi + call _vpaes_preheat + cmp r9d,0 + je $L$cbc_dec_loop + jmp $L$cbc_enc_loop +ALIGN 16 +$L$cbc_enc_loop:: + movdqu xmm0,XMMWORD PTR[rdi] + pxor xmm0,xmm6 + call _vpaes_encrypt_core + movdqa xmm6,xmm0 + movdqu XMMWORD PTR[rdi*1+rsi],xmm0 + lea rdi,QWORD PTR[16+rdi] + sub rcx,16 + jnc $L$cbc_enc_loop + jmp $L$cbc_done +ALIGN 16 +$L$cbc_dec_loop:: + movdqu xmm0,XMMWORD PTR[rdi] + movdqa xmm7,xmm0 + call _vpaes_decrypt_core + pxor xmm0,xmm6 + movdqa xmm6,xmm7 + movdqu XMMWORD PTR[rdi*1+rsi],xmm0 + lea rdi,QWORD PTR[16+rdi] + sub rcx,16 + jnc $L$cbc_dec_loop +$L$cbc_done:: + movdqu XMMWORD PTR[r8],xmm6 + movaps xmm6,XMMWORD PTR[16+rsp] + movaps xmm7,XMMWORD PTR[32+rsp] + movaps xmm8,XMMWORD PTR[48+rsp] + movaps xmm9,XMMWORD PTR[64+rsp] + movaps xmm10,XMMWORD PTR[80+rsp] + movaps xmm11,XMMWORD PTR[96+rsp] + movaps xmm12,XMMWORD PTR[112+rsp] + movaps xmm13,XMMWORD PTR[128+rsp] + movaps xmm14,XMMWORD PTR[144+rsp] + movaps xmm15,XMMWORD PTR[160+rsp] + lea rsp,QWORD PTR[184+rsp] +$L$cbc_epilogue:: +$L$cbc_abort:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_vpaes_cbc_encrypt:: +vpaes_cbc_encrypt ENDP + + + + + + + +ALIGN 16 +_vpaes_preheat PROC PRIVATE + lea r10,QWORD PTR[$L$k_s0F] + movdqa xmm10,XMMWORD PTR[((-32))+r10] + movdqa xmm11,XMMWORD PTR[((-16))+r10] + movdqa xmm9,XMMWORD PTR[r10] + movdqa xmm13,XMMWORD PTR[48+r10] + movdqa xmm12,XMMWORD PTR[64+r10] + movdqa xmm15,XMMWORD PTR[80+r10] + movdqa xmm14,XMMWORD PTR[96+r10] + DB 0F3h,0C3h ;repret +_vpaes_preheat ENDP + + + + + + +ALIGN 64 +_vpaes_consts:: +$L$k_inv:: + DQ 00E05060F0D080180h,0040703090A0B0C02h + DQ 001040A060F0B0780h,0030D0E0C02050809h + +$L$k_s0F:: + DQ 00F0F0F0F0F0F0F0Fh,00F0F0F0F0F0F0F0Fh + +$L$k_ipt:: + DQ 0C2B2E8985A2A7000h,0CABAE09052227808h + DQ 04C01307D317C4D00h,0CD80B1FCB0FDCC81h + +$L$k_sb1:: + DQ 0B19BE18FCB503E00h,0A5DF7A6E142AF544h + DQ 03618D415FAE22300h,03BF7CCC10D2ED9EFh +$L$k_sb2:: + DQ 0E27A93C60B712400h,05EB7E955BC982FCDh + DQ 069EB88400AE12900h,0C2A163C8AB82234Ah +$L$k_sbo:: + DQ 0D0D26D176FBDC700h,015AABF7AC502A878h + DQ 0CFE474A55FBB6A00h,08E1E90D1412B35FAh + +$L$k_mc_forward:: + DQ 00407060500030201h,00C0F0E0D080B0A09h + DQ 0080B0A0904070605h,0000302010C0F0E0Dh + DQ 00C0F0E0D080B0A09h,00407060500030201h + DQ 0000302010C0F0E0Dh,0080B0A0904070605h + +$L$k_mc_backward:: + DQ 00605040702010003h,00E0D0C0F0A09080Bh + DQ 0020100030E0D0C0Fh,00A09080B06050407h + DQ 00E0D0C0F0A09080Bh,00605040702010003h + DQ 00A09080B06050407h,0020100030E0D0C0Fh + +$L$k_sr:: + DQ 00706050403020100h,00F0E0D0C0B0A0908h + DQ 0030E09040F0A0500h,00B06010C07020D08h + DQ 00F060D040B020900h,0070E050C030A0108h + DQ 00B0E0104070A0D00h,00306090C0F020508h + +$L$k_rcon:: + DQ 01F8391B9AF9DEEB6h,0702A98084D7C7D81h + +$L$k_s63:: + DQ 05B5B5B5B5B5B5B5Bh,05B5B5B5B5B5B5B5Bh + +$L$k_opt:: + DQ 0FF9F4929D6B66000h,0F7974121DEBE6808h + DQ 001EDBD5150BCEC00h,0E10D5DB1B05C0CE0h + +$L$k_deskew:: + DQ 007E4A34047A4E300h,01DFEB95A5DBEF91Ah + DQ 05F36B5DC83EA6900h,02841C2ABF49D1E77h + + + + + +$L$k_dksd:: + DQ 0FEB91A5DA3E44700h,00740E3A45A1DBEF9h + DQ 041C277F4B5368300h,05FDC69EAAB289D1Eh +$L$k_dksb:: + DQ 09A4FCA1F8550D500h,003D653861CC94C99h + DQ 0115BEDA7B6FC4A00h,0D993256F7E3482C8h +$L$k_dkse:: + DQ 0D5031CCA1FC9D600h,053859A4C994F5086h + DQ 0A23196054FDC7BE8h,0CD5EF96A20B31487h +$L$k_dks9:: + DQ 0B6116FC87ED9A700h,04AED933482255BFCh + DQ 04576516227143300h,08BB89FACE9DAFDCEh + + + + + +$L$k_dipt:: + DQ 00F505B040B545F00h,0154A411E114E451Ah + DQ 086E383E660056500h,012771772F491F194h + +$L$k_dsb9:: + DQ 0851C03539A86D600h,0CAD51F504F994CC9h + DQ 0C03B1789ECD74900h,0725E2C9EB2FBA565h +$L$k_dsbd:: + DQ 07D57CCDFE6B1A200h,0F56E9B13882A4439h + DQ 03CE2FAF724C6CB00h,02931180D15DEEFD3h +$L$k_dsbb:: + DQ 0D022649296B44200h,0602646F6B0F2D404h + DQ 0C19498A6CD596700h,0F3FF0C3E3255AA6Bh +$L$k_dsbe:: + DQ 046F2929626D4D000h,02242600464B4F6B0h + DQ 00C55A6CDFFAAC100h,09467F36B98593E32h +$L$k_dsbo:: + DQ 01387EA537EF94000h,0C7AA6DB9D4943E2Dh + DQ 012D7560F93441D00h,0CA4B8159D8C58E9Ch +DB 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105 +DB 111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54 +DB 52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97 +DB 109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32 +DB 85,110,105,118,101,114,115,105,116,121,41,0 +ALIGN 64 + +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_prologue + + lea rsi,QWORD PTR[16+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + lea rax,QWORD PTR[184+rax] + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_vpaes_set_encrypt_key + DD imagerel $L$SEH_end_vpaes_set_encrypt_key + DD imagerel $L$SEH_info_vpaes_set_encrypt_key + + DD imagerel $L$SEH_begin_vpaes_set_decrypt_key + DD imagerel $L$SEH_end_vpaes_set_decrypt_key + DD imagerel $L$SEH_info_vpaes_set_decrypt_key + + DD imagerel $L$SEH_begin_vpaes_encrypt + DD imagerel $L$SEH_end_vpaes_encrypt + DD imagerel $L$SEH_info_vpaes_encrypt + + DD imagerel $L$SEH_begin_vpaes_decrypt + DD imagerel $L$SEH_end_vpaes_decrypt + DD imagerel $L$SEH_info_vpaes_decrypt + + DD imagerel $L$SEH_begin_vpaes_cbc_encrypt + DD imagerel $L$SEH_end_vpaes_cbc_encrypt + DD imagerel $L$SEH_info_vpaes_cbc_encrypt + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_vpaes_set_encrypt_key:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$enc_key_body,imagerel $L$enc_key_epilogue +$L$SEH_info_vpaes_set_decrypt_key:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$dec_key_body,imagerel $L$dec_key_epilogue +$L$SEH_info_vpaes_encrypt:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$enc_body,imagerel $L$enc_epilogue +$L$SEH_info_vpaes_decrypt:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$dec_body,imagerel $L$dec_epilogue +$L$SEH_info_vpaes_cbc_encrypt:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$cbc_body,imagerel $L$cbc_epilogue + +.xdata ENDS +END + diff --git a/ext/libressl/crypto/aes/vpaes-mingw64-x86_64.S b/ext/libressl/crypto/aes/vpaes-mingw64-x86_64.S new file mode 100644 index 0000000..d6cb860 --- /dev/null +++ b/ext/libressl/crypto/aes/vpaes-mingw64-x86_64.S @@ -0,0 +1,1125 @@ +#include "x86_arch.h" +.text + + + + + + + + + + + + + + + + +.def _vpaes_encrypt_core; .scl 3; .type 32; .endef +.p2align 4 +_vpaes_encrypt_core: + movq %rdx,%r9 + movq $16,%r11 + movl 240(%rdx),%eax + movdqa %xmm9,%xmm1 + movdqa .Lk_ipt(%rip),%xmm2 + pandn %xmm0,%xmm1 + movdqu (%r9),%xmm5 + psrld $4,%xmm1 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa .Lk_ipt+16(%rip),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm5,%xmm2 + pxor %xmm2,%xmm0 + addq $16,%r9 + leaq .Lk_mc_backward(%rip),%r10 + jmp .Lenc_entry + +.p2align 4 +.Lenc_loop: + + movdqa %xmm13,%xmm4 +.byte 102,15,56,0,226 + pxor %xmm5,%xmm4 + movdqa %xmm12,%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + movdqa %xmm15,%xmm5 +.byte 102,15,56,0,234 + movdqa -64(%r11,%r10,1),%xmm1 + movdqa %xmm14,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm5,%xmm2 + movdqa (%r11,%r10,1),%xmm4 + movdqa %xmm0,%xmm3 +.byte 102,15,56,0,193 + addq $16,%r9 + pxor %xmm2,%xmm0 +.byte 102,15,56,0,220 + addq $16,%r11 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,193 + andq $48,%r11 + pxor %xmm3,%xmm0 + subq $1,%rax + +.Lenc_entry: + + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa %xmm11,%xmm5 +.byte 102,15,56,0,232 + pxor %xmm1,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm5,%xmm3 + movdqa %xmm10,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm5,%xmm4 + movdqa %xmm10,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm10,%xmm3 + movdqu (%r9),%xmm5 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + jnz .Lenc_loop + + + movdqa -96(%r10),%xmm4 + movdqa -80(%r10),%xmm0 +.byte 102,15,56,0,226 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,195 + movdqa 64(%r11,%r10,1),%xmm1 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,193 + retq + + + + + + + +.def _vpaes_decrypt_core; .scl 3; .type 32; .endef +.p2align 4 +_vpaes_decrypt_core: + movq %rdx,%r9 + movl 240(%rdx),%eax + movdqa %xmm9,%xmm1 + movdqa .Lk_dipt(%rip),%xmm2 + pandn %xmm0,%xmm1 + movq %rax,%r11 + psrld $4,%xmm1 + movdqu (%r9),%xmm5 + shlq $4,%r11 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa .Lk_dipt+16(%rip),%xmm0 + xorq $48,%r11 + leaq .Lk_dsbd(%rip),%r10 +.byte 102,15,56,0,193 + andq $48,%r11 + pxor %xmm5,%xmm2 + movdqa .Lk_mc_forward+48(%rip),%xmm5 + pxor %xmm2,%xmm0 + addq $16,%r9 + addq %r10,%r11 + jmp .Ldec_entry + +.p2align 4 +.Ldec_loop: + + + + movdqa -32(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa -16(%r10),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + addq $16,%r9 + +.byte 102,15,56,0,197 + movdqa 0(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 16(%r10),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + subq $1,%rax + +.byte 102,15,56,0,197 + movdqa 32(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 48(%r10),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + +.byte 102,15,56,0,197 + movdqa 64(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 80(%r10),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + +.byte 102,15,58,15,237,12 + +.Ldec_entry: + + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa %xmm11,%xmm2 +.byte 102,15,56,0,208 + pxor %xmm1,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + movdqa %xmm10,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm10,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + movdqu (%r9),%xmm0 + jnz .Ldec_loop + + + movdqa 96(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 112(%r10),%xmm0 + movdqa -352(%r11),%xmm2 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,194 + retq + + + + + + + +.def _vpaes_schedule_core; .scl 3; .type 32; .endef +.p2align 4 +_vpaes_schedule_core: + + + + + + call _vpaes_preheat + movdqa .Lk_rcon(%rip),%xmm8 + movdqu (%rdi),%xmm0 + + + movdqa %xmm0,%xmm3 + leaq .Lk_ipt(%rip),%r11 + call _vpaes_schedule_transform + movdqa %xmm0,%xmm7 + + leaq .Lk_sr(%rip),%r10 + testq %rcx,%rcx + jnz .Lschedule_am_decrypting + + + movdqu %xmm0,(%rdx) + jmp .Lschedule_go + +.Lschedule_am_decrypting: + + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,217 + movdqu %xmm3,(%rdx) + xorq $48,%r8 + +.Lschedule_go: + cmpl $192,%esi + ja .Lschedule_256 + je .Lschedule_192 + + + + + + + + + + +.Lschedule_128: + movl $10,%esi + +.Loop_schedule_128: + call _vpaes_schedule_round + decq %rsi + jz .Lschedule_mangle_last + call _vpaes_schedule_mangle + jmp .Loop_schedule_128 + + + + + + + + + + + + + + + + +.p2align 4 +.Lschedule_192: + movdqu 8(%rdi),%xmm0 + call _vpaes_schedule_transform + movdqa %xmm0,%xmm6 + pxor %xmm4,%xmm4 + movhlps %xmm4,%xmm6 + movl $4,%esi + +.Loop_schedule_192: + call _vpaes_schedule_round +.byte 102,15,58,15,198,8 + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + call _vpaes_schedule_mangle + call _vpaes_schedule_round + decq %rsi + jz .Lschedule_mangle_last + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + jmp .Loop_schedule_192 + + + + + + + + + + + +.p2align 4 +.Lschedule_256: + movdqu 16(%rdi),%xmm0 + call _vpaes_schedule_transform + movl $7,%esi + +.Loop_schedule_256: + call _vpaes_schedule_mangle + movdqa %xmm0,%xmm6 + + + call _vpaes_schedule_round + decq %rsi + jz .Lschedule_mangle_last + call _vpaes_schedule_mangle + + + pshufd $255,%xmm0,%xmm0 + movdqa %xmm7,%xmm5 + movdqa %xmm6,%xmm7 + call _vpaes_schedule_low_round + movdqa %xmm5,%xmm7 + + jmp .Loop_schedule_256 + + + + + + + + + + + + +.p2align 4 +.Lschedule_mangle_last: + + leaq .Lk_deskew(%rip),%r11 + testq %rcx,%rcx + jnz .Lschedule_mangle_last_dec + + + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,193 + leaq .Lk_opt(%rip),%r11 + addq $32,%rdx + +.Lschedule_mangle_last_dec: + addq $-16,%rdx + pxor .Lk_s63(%rip),%xmm0 + call _vpaes_schedule_transform + movdqu %xmm0,(%rdx) + + + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + retq + + + + + + + + + + + + + + + + +.def _vpaes_schedule_192_smear; .scl 3; .type 32; .endef +.p2align 4 +_vpaes_schedule_192_smear: + pshufd $128,%xmm6,%xmm0 + pxor %xmm0,%xmm6 + pshufd $254,%xmm7,%xmm0 + pxor %xmm0,%xmm6 + movdqa %xmm6,%xmm0 + pxor %xmm1,%xmm1 + movhlps %xmm1,%xmm6 + retq + + + + + + + + + + + + + + + + + + + + +.def _vpaes_schedule_round; .scl 3; .type 32; .endef +.p2align 4 +_vpaes_schedule_round: + + pxor %xmm1,%xmm1 +.byte 102,65,15,58,15,200,15 +.byte 102,69,15,58,15,192,15 + pxor %xmm1,%xmm7 + + + pshufd $255,%xmm0,%xmm0 +.byte 102,15,58,15,192,1 + + + + +_vpaes_schedule_low_round: + + movdqa %xmm7,%xmm1 + pslldq $4,%xmm7 + pxor %xmm1,%xmm7 + movdqa %xmm7,%xmm1 + pslldq $8,%xmm7 + pxor %xmm1,%xmm7 + pxor .Lk_s63(%rip),%xmm7 + + + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa %xmm11,%xmm2 +.byte 102,15,56,0,208 + pxor %xmm1,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + movdqa %xmm10,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm10,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + movdqa %xmm13,%xmm4 +.byte 102,15,56,0,226 + movdqa %xmm12,%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + + + pxor %xmm7,%xmm0 + movdqa %xmm0,%xmm7 + retq + + + + + + + + + + + +.def _vpaes_schedule_transform; .scl 3; .type 32; .endef +.p2align 4 +_vpaes_schedule_transform: + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa (%r11),%xmm2 +.byte 102,15,56,0,208 + movdqa 16(%r11),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm2,%xmm0 + retq + + + + + + + + + + + + + + + + + + + + + + + + + +.def _vpaes_schedule_mangle; .scl 3; .type 32; .endef +.p2align 4 +_vpaes_schedule_mangle: + movdqa %xmm0,%xmm4 + movdqa .Lk_mc_forward(%rip),%xmm5 + testq %rcx,%rcx + jnz .Lschedule_mangle_dec + + + addq $16,%rdx + pxor .Lk_s63(%rip),%xmm4 +.byte 102,15,56,0,229 + movdqa %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 + + jmp .Lschedule_mangle_both +.p2align 4 +.Lschedule_mangle_dec: + + leaq .Lk_dksd(%rip),%r11 + movdqa %xmm9,%xmm1 + pandn %xmm4,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm4 + + movdqa 0(%r11),%xmm2 +.byte 102,15,56,0,212 + movdqa 16(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 32(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 48(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 64(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 80(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 96(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 112(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + + addq $-16,%rdx + +.Lschedule_mangle_both: + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,217 + addq $-16,%r8 + andq $48,%r8 + movdqu %xmm3,(%rdx) + retq + + + + + +.globl vpaes_set_encrypt_key +.def vpaes_set_encrypt_key; .scl 2; .type 32; .endef +.p2align 4 +vpaes_set_encrypt_key: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_vpaes_set_encrypt_key: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + + leaq -184(%rsp),%rsp + movaps %xmm6,16(%rsp) + movaps %xmm7,32(%rsp) + movaps %xmm8,48(%rsp) + movaps %xmm9,64(%rsp) + movaps %xmm10,80(%rsp) + movaps %xmm11,96(%rsp) + movaps %xmm12,112(%rsp) + movaps %xmm13,128(%rsp) + movaps %xmm14,144(%rsp) + movaps %xmm15,160(%rsp) +.Lenc_key_body: + movl %esi,%eax + shrl $5,%eax + addl $5,%eax + movl %eax,240(%rdx) + + movl $0,%ecx + movl $48,%r8d + call _vpaes_schedule_core + movaps 16(%rsp),%xmm6 + movaps 32(%rsp),%xmm7 + movaps 48(%rsp),%xmm8 + movaps 64(%rsp),%xmm9 + movaps 80(%rsp),%xmm10 + movaps 96(%rsp),%xmm11 + movaps 112(%rsp),%xmm12 + movaps 128(%rsp),%xmm13 + movaps 144(%rsp),%xmm14 + movaps 160(%rsp),%xmm15 + leaq 184(%rsp),%rsp +.Lenc_key_epilogue: + xorl %eax,%eax + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + retq +.LSEH_end_vpaes_set_encrypt_key: + +.globl vpaes_set_decrypt_key +.def vpaes_set_decrypt_key; .scl 2; .type 32; .endef +.p2align 4 +vpaes_set_decrypt_key: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_vpaes_set_decrypt_key: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + + leaq -184(%rsp),%rsp + movaps %xmm6,16(%rsp) + movaps %xmm7,32(%rsp) + movaps %xmm8,48(%rsp) + movaps %xmm9,64(%rsp) + movaps %xmm10,80(%rsp) + movaps %xmm11,96(%rsp) + movaps %xmm12,112(%rsp) + movaps %xmm13,128(%rsp) + movaps %xmm14,144(%rsp) + movaps %xmm15,160(%rsp) +.Ldec_key_body: + movl %esi,%eax + shrl $5,%eax + addl $5,%eax + movl %eax,240(%rdx) + shll $4,%eax + leaq 16(%rdx,%rax,1),%rdx + + movl $1,%ecx + movl %esi,%r8d + shrl $1,%r8d + andl $32,%r8d + xorl $32,%r8d + call _vpaes_schedule_core + movaps 16(%rsp),%xmm6 + movaps 32(%rsp),%xmm7 + movaps 48(%rsp),%xmm8 + movaps 64(%rsp),%xmm9 + movaps 80(%rsp),%xmm10 + movaps 96(%rsp),%xmm11 + movaps 112(%rsp),%xmm12 + movaps 128(%rsp),%xmm13 + movaps 144(%rsp),%xmm14 + movaps 160(%rsp),%xmm15 + leaq 184(%rsp),%rsp +.Ldec_key_epilogue: + xorl %eax,%eax + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + retq +.LSEH_end_vpaes_set_decrypt_key: + +.globl vpaes_encrypt +.def vpaes_encrypt; .scl 2; .type 32; .endef +.p2align 4 +vpaes_encrypt: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_vpaes_encrypt: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + + leaq -184(%rsp),%rsp + movaps %xmm6,16(%rsp) + movaps %xmm7,32(%rsp) + movaps %xmm8,48(%rsp) + movaps %xmm9,64(%rsp) + movaps %xmm10,80(%rsp) + movaps %xmm11,96(%rsp) + movaps %xmm12,112(%rsp) + movaps %xmm13,128(%rsp) + movaps %xmm14,144(%rsp) + movaps %xmm15,160(%rsp) +.Lenc_body: + movdqu (%rdi),%xmm0 + call _vpaes_preheat + call _vpaes_encrypt_core + movdqu %xmm0,(%rsi) + movaps 16(%rsp),%xmm6 + movaps 32(%rsp),%xmm7 + movaps 48(%rsp),%xmm8 + movaps 64(%rsp),%xmm9 + movaps 80(%rsp),%xmm10 + movaps 96(%rsp),%xmm11 + movaps 112(%rsp),%xmm12 + movaps 128(%rsp),%xmm13 + movaps 144(%rsp),%xmm14 + movaps 160(%rsp),%xmm15 + leaq 184(%rsp),%rsp +.Lenc_epilogue: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + retq +.LSEH_end_vpaes_encrypt: + +.globl vpaes_decrypt +.def vpaes_decrypt; .scl 2; .type 32; .endef +.p2align 4 +vpaes_decrypt: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_vpaes_decrypt: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + + leaq -184(%rsp),%rsp + movaps %xmm6,16(%rsp) + movaps %xmm7,32(%rsp) + movaps %xmm8,48(%rsp) + movaps %xmm9,64(%rsp) + movaps %xmm10,80(%rsp) + movaps %xmm11,96(%rsp) + movaps %xmm12,112(%rsp) + movaps %xmm13,128(%rsp) + movaps %xmm14,144(%rsp) + movaps %xmm15,160(%rsp) +.Ldec_body: + movdqu (%rdi),%xmm0 + call _vpaes_preheat + call _vpaes_decrypt_core + movdqu %xmm0,(%rsi) + movaps 16(%rsp),%xmm6 + movaps 32(%rsp),%xmm7 + movaps 48(%rsp),%xmm8 + movaps 64(%rsp),%xmm9 + movaps 80(%rsp),%xmm10 + movaps 96(%rsp),%xmm11 + movaps 112(%rsp),%xmm12 + movaps 128(%rsp),%xmm13 + movaps 144(%rsp),%xmm14 + movaps 160(%rsp),%xmm15 + leaq 184(%rsp),%rsp +.Ldec_epilogue: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + retq +.LSEH_end_vpaes_decrypt: +.globl vpaes_cbc_encrypt +.def vpaes_cbc_encrypt; .scl 2; .type 32; .endef +.p2align 4 +vpaes_cbc_encrypt: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_vpaes_cbc_encrypt: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + movq 40(%rsp),%r8 + movq 48(%rsp),%r9 + + xchgq %rcx,%rdx + subq $16,%rcx + jc .Lcbc_abort + leaq -184(%rsp),%rsp + movaps %xmm6,16(%rsp) + movaps %xmm7,32(%rsp) + movaps %xmm8,48(%rsp) + movaps %xmm9,64(%rsp) + movaps %xmm10,80(%rsp) + movaps %xmm11,96(%rsp) + movaps %xmm12,112(%rsp) + movaps %xmm13,128(%rsp) + movaps %xmm14,144(%rsp) + movaps %xmm15,160(%rsp) +.Lcbc_body: + movdqu (%r8),%xmm6 + subq %rdi,%rsi + call _vpaes_preheat + cmpl $0,%r9d + je .Lcbc_dec_loop + jmp .Lcbc_enc_loop +.p2align 4 +.Lcbc_enc_loop: + movdqu (%rdi),%xmm0 + pxor %xmm6,%xmm0 + call _vpaes_encrypt_core + movdqa %xmm0,%xmm6 + movdqu %xmm0,(%rsi,%rdi,1) + leaq 16(%rdi),%rdi + subq $16,%rcx + jnc .Lcbc_enc_loop + jmp .Lcbc_done +.p2align 4 +.Lcbc_dec_loop: + movdqu (%rdi),%xmm0 + movdqa %xmm0,%xmm7 + call _vpaes_decrypt_core + pxor %xmm6,%xmm0 + movdqa %xmm7,%xmm6 + movdqu %xmm0,(%rsi,%rdi,1) + leaq 16(%rdi),%rdi + subq $16,%rcx + jnc .Lcbc_dec_loop +.Lcbc_done: + movdqu %xmm6,(%r8) + movaps 16(%rsp),%xmm6 + movaps 32(%rsp),%xmm7 + movaps 48(%rsp),%xmm8 + movaps 64(%rsp),%xmm9 + movaps 80(%rsp),%xmm10 + movaps 96(%rsp),%xmm11 + movaps 112(%rsp),%xmm12 + movaps 128(%rsp),%xmm13 + movaps 144(%rsp),%xmm14 + movaps 160(%rsp),%xmm15 + leaq 184(%rsp),%rsp +.Lcbc_epilogue: +.Lcbc_abort: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + retq +.LSEH_end_vpaes_cbc_encrypt: + + + + + + +.def _vpaes_preheat; .scl 3; .type 32; .endef +.p2align 4 +_vpaes_preheat: + leaq .Lk_s0F(%rip),%r10 + movdqa -32(%r10),%xmm10 + movdqa -16(%r10),%xmm11 + movdqa 0(%r10),%xmm9 + movdqa 48(%r10),%xmm13 + movdqa 64(%r10),%xmm12 + movdqa 80(%r10),%xmm15 + movdqa 96(%r10),%xmm14 + retq + + + + + + + +.p2align 6 +_vpaes_consts: +.Lk_inv: +.quad 0x0E05060F0D080180, 0x040703090A0B0C02 +.quad 0x01040A060F0B0780, 0x030D0E0C02050809 + +.Lk_s0F: +.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F + +.Lk_ipt: +.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 +.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 + +.Lk_sb1: +.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 +.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF +.Lk_sb2: +.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD +.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A +.Lk_sbo: +.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 +.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA + +.Lk_mc_forward: +.quad 0x0407060500030201, 0x0C0F0E0D080B0A09 +.quad 0x080B0A0904070605, 0x000302010C0F0E0D +.quad 0x0C0F0E0D080B0A09, 0x0407060500030201 +.quad 0x000302010C0F0E0D, 0x080B0A0904070605 + +.Lk_mc_backward: +.quad 0x0605040702010003, 0x0E0D0C0F0A09080B +.quad 0x020100030E0D0C0F, 0x0A09080B06050407 +.quad 0x0E0D0C0F0A09080B, 0x0605040702010003 +.quad 0x0A09080B06050407, 0x020100030E0D0C0F + +.Lk_sr: +.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 +.quad 0x030E09040F0A0500, 0x0B06010C07020D08 +.quad 0x0F060D040B020900, 0x070E050C030A0108 +.quad 0x0B0E0104070A0D00, 0x0306090C0F020508 + +.Lk_rcon: +.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 + +.Lk_s63: +.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B + +.Lk_opt: +.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 +.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 + +.Lk_deskew: +.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A +.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 + + + + + +.Lk_dksd: +.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 +.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E +.Lk_dksb: +.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 +.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 +.Lk_dkse: +.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 +.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 +.Lk_dks9: +.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC +.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE + + + + + +.Lk_dipt: +.quad 0x0F505B040B545F00, 0x154A411E114E451A +.quad 0x86E383E660056500, 0x12771772F491F194 + +.Lk_dsb9: +.quad 0x851C03539A86D600, 0xCAD51F504F994CC9 +.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 +.Lk_dsbd: +.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 +.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 +.Lk_dsbb: +.quad 0xD022649296B44200, 0x602646F6B0F2D404 +.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B +.Lk_dsbe: +.quad 0x46F2929626D4D000, 0x2242600464B4F6B0 +.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 +.Lk_dsbo: +.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D +.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C +.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 +.p2align 6 + + +.def se_handler; .scl 3; .type 32; .endef +.p2align 4 +se_handler: + pushq %rsi + pushq %rdi + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushfq + subq $64,%rsp + + movq 120(%r8),%rax + movq 248(%r8),%rbx + + movq 8(%r9),%rsi + movq 56(%r9),%r11 + + movl 0(%r11),%r10d + leaq (%rsi,%r10,1),%r10 + cmpq %r10,%rbx + jb .Lin_prologue + + movq 152(%r8),%rax + + movl 4(%r11),%r10d + leaq (%rsi,%r10,1),%r10 + cmpq %r10,%rbx + jae .Lin_prologue + + leaq 16(%rax),%rsi + leaq 512(%r8),%rdi + movl $20,%ecx +.long 0xa548f3fc + leaq 184(%rax),%rax + +.Lin_prologue: + movq 8(%rax),%rdi + movq 16(%rax),%rsi + movq %rax,152(%r8) + movq %rsi,168(%r8) + movq %rdi,176(%r8) + + movq 40(%r9),%rdi + movq %r8,%rsi + movl $154,%ecx +.long 0xa548f3fc + + movq %r9,%rsi + xorq %rcx,%rcx + movq 8(%rsi),%rdx + movq 0(%rsi),%r8 + movq 16(%rsi),%r9 + movq 40(%rsi),%r10 + leaq 56(%rsi),%r11 + leaq 24(%rsi),%r12 + movq %r10,32(%rsp) + movq %r11,40(%rsp) + movq %r12,48(%rsp) + movq %rcx,56(%rsp) + call *__imp_RtlVirtualUnwind(%rip) + + movl $1,%eax + addq $64,%rsp + popfq + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + popq %rdi + popq %rsi + retq + + +.section .pdata +.p2align 2 +.rva .LSEH_begin_vpaes_set_encrypt_key +.rva .LSEH_end_vpaes_set_encrypt_key +.rva .LSEH_info_vpaes_set_encrypt_key + +.rva .LSEH_begin_vpaes_set_decrypt_key +.rva .LSEH_end_vpaes_set_decrypt_key +.rva .LSEH_info_vpaes_set_decrypt_key + +.rva .LSEH_begin_vpaes_encrypt +.rva .LSEH_end_vpaes_encrypt +.rva .LSEH_info_vpaes_encrypt + +.rva .LSEH_begin_vpaes_decrypt +.rva .LSEH_end_vpaes_decrypt +.rva .LSEH_info_vpaes_decrypt + +.rva .LSEH_begin_vpaes_cbc_encrypt +.rva .LSEH_end_vpaes_cbc_encrypt +.rva .LSEH_info_vpaes_cbc_encrypt + +.section .xdata +.p2align 3 +.LSEH_info_vpaes_set_encrypt_key: +.byte 9,0,0,0 +.rva se_handler +.rva .Lenc_key_body,.Lenc_key_epilogue +.LSEH_info_vpaes_set_decrypt_key: +.byte 9,0,0,0 +.rva se_handler +.rva .Ldec_key_body,.Ldec_key_epilogue +.LSEH_info_vpaes_encrypt: +.byte 9,0,0,0 +.rva se_handler +.rva .Lenc_body,.Lenc_epilogue +.LSEH_info_vpaes_decrypt: +.byte 9,0,0,0 +.rva se_handler +.rva .Ldec_body,.Ldec_epilogue +.LSEH_info_vpaes_cbc_encrypt: +.byte 9,0,0,0 +.rva se_handler +.rva .Lcbc_body,.Lcbc_epilogue diff --git a/ext/libressl/crypto/bf/Makefile b/ext/libressl/crypto/bf/Makefile new file mode 100644 index 0000000..dac4aba --- /dev/null +++ b/ext/libressl/crypto/bf/Makefile @@ -0,0 +1,14 @@ +include ../../ssl_common.mk +CFLAGS+= -D__BEGIN_HIDDEN_DECLS= -D__END_HIDDEN_DECLS= + +obj = bf_skey.o bf_enc.o bf_ecb.o bf_cfb64.o bf_ofb64.o + + +all: $(obj) +dep: all + +%.o: %.c + $(CC) $(CFLAGS) -c $< + +clean: + rm -f *.o *.a diff --git a/ext/libressl/crypto/bf/bf_cfb64.c b/ext/libressl/crypto/bf/bf_cfb64.c new file mode 100644 index 0000000..6cc0bb9 --- /dev/null +++ b/ext/libressl/crypto/bf/bf_cfb64.c @@ -0,0 +1,121 @@ +/* $OpenBSD: bf_cfb64.c,v 1.5 2014/10/28 07:35:58 jsg Exp $ */ +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + +#include +#include "bf_locl.h" + +/* The input and output encrypted as though 64bit cfb mode is being + * used. The extra state information to record how much of the + * 64bit block we have used is contained in *num; + */ + +void BF_cfb64_encrypt(const unsigned char *in, unsigned char *out, long length, + const BF_KEY *schedule, unsigned char *ivec, int *num, int encrypt) + { + BF_LONG v0,v1,t; + int n= *num; + long l=length; + BF_LONG ti[2]; + unsigned char *iv,c,cc; + + iv=(unsigned char *)ivec; + if (encrypt) + { + while (l--) + { + if (n == 0) + { + n2l(iv,v0); ti[0]=v0; + n2l(iv,v1); ti[1]=v1; + BF_encrypt((BF_LONG *)ti,schedule); + iv=(unsigned char *)ivec; + t=ti[0]; l2n(t,iv); + t=ti[1]; l2n(t,iv); + iv=(unsigned char *)ivec; + } + c= *(in++)^iv[n]; + *(out++)=c; + iv[n]=c; + n=(n+1)&0x07; + } + } + else + { + while (l--) + { + if (n == 0) + { + n2l(iv,v0); ti[0]=v0; + n2l(iv,v1); ti[1]=v1; + BF_encrypt((BF_LONG *)ti,schedule); + iv=(unsigned char *)ivec; + t=ti[0]; l2n(t,iv); + t=ti[1]; l2n(t,iv); + iv=(unsigned char *)ivec; + } + cc= *(in++); + c=iv[n]; + iv[n]=cc; + *(out++)=c^cc; + n=(n+1)&0x07; + } + } + v0=v1=ti[0]=ti[1]=t=c=cc=0; + *num=n; + } + diff --git a/ext/libressl/crypto/bf/bf_ecb.c b/ext/libressl/crypto/bf/bf_ecb.c new file mode 100644 index 0000000..305bd78 --- /dev/null +++ b/ext/libressl/crypto/bf/bf_ecb.c @@ -0,0 +1,94 @@ +/* $OpenBSD: bf_ecb.c,v 1.6 2014/07/09 11:10:50 bcook Exp $ */ +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + +#include +#include "bf_locl.h" +#include + +/* Blowfish as implemented from 'Blowfish: Springer-Verlag paper' + * (From LECTURE NOTES IN COMPUTER SCIENCE 809, FAST SOFTWARE ENCRYPTION, + * CAMBRIDGE SECURITY WORKSHOP, CAMBRIDGE, U.K., DECEMBER 9-11, 1993) + */ + +const char *BF_options(void) + { +#ifdef BF_PTR + return("blowfish(ptr)"); +#elif defined(BF_PTR2) + return("blowfish(ptr2)"); +#else + return("blowfish(idx)"); +#endif + } + +void BF_ecb_encrypt(const unsigned char *in, unsigned char *out, + const BF_KEY *key, int encrypt) + { + BF_LONG l,d[2]; + + n2l(in,l); d[0]=l; + n2l(in,l); d[1]=l; + if (encrypt) + BF_encrypt(d,key); + else + BF_decrypt(d,key); + l=d[0]; l2n(l,out); + l=d[1]; l2n(l,out); + l=d[0]=d[1]=0; + } + diff --git a/ext/libressl/crypto/bf/bf_enc.c b/ext/libressl/crypto/bf/bf_enc.c new file mode 100644 index 0000000..2cf1c86 --- /dev/null +++ b/ext/libressl/crypto/bf/bf_enc.c @@ -0,0 +1,306 @@ +/* $OpenBSD: bf_enc.c,v 1.6 2014/10/28 07:35:58 jsg Exp $ */ +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + +#include +#include "bf_locl.h" + +/* Blowfish as implemented from 'Blowfish: Springer-Verlag paper' + * (From LECTURE NOTES IN COMPUTER SCIENCE 809, FAST SOFTWARE ENCRYPTION, + * CAMBRIDGE SECURITY WORKSHOP, CAMBRIDGE, U.K., DECEMBER 9-11, 1993) + */ + +#if (BF_ROUNDS != 16) && (BF_ROUNDS != 20) +#error If you set BF_ROUNDS to some value other than 16 or 20, you will have \ +to modify the code. +#endif + +void BF_encrypt(BF_LONG *data, const BF_KEY *key) + { +#ifndef BF_PTR2 + BF_LONG l,r; + const BF_LONG *p,*s; + + p=key->P; + s= &(key->S[0]); + l=data[0]; + r=data[1]; + + l^=p[0]; + BF_ENC(r,l,s,p[ 1]); + BF_ENC(l,r,s,p[ 2]); + BF_ENC(r,l,s,p[ 3]); + BF_ENC(l,r,s,p[ 4]); + BF_ENC(r,l,s,p[ 5]); + BF_ENC(l,r,s,p[ 6]); + BF_ENC(r,l,s,p[ 7]); + BF_ENC(l,r,s,p[ 8]); + BF_ENC(r,l,s,p[ 9]); + BF_ENC(l,r,s,p[10]); + BF_ENC(r,l,s,p[11]); + BF_ENC(l,r,s,p[12]); + BF_ENC(r,l,s,p[13]); + BF_ENC(l,r,s,p[14]); + BF_ENC(r,l,s,p[15]); + BF_ENC(l,r,s,p[16]); +#if BF_ROUNDS == 20 + BF_ENC(r,l,s,p[17]); + BF_ENC(l,r,s,p[18]); + BF_ENC(r,l,s,p[19]); + BF_ENC(l,r,s,p[20]); +#endif + r^=p[BF_ROUNDS+1]; + + data[1]=l&0xffffffffL; + data[0]=r&0xffffffffL; +#else + BF_LONG l,r,t,*k; + + l=data[0]; + r=data[1]; + k=(BF_LONG*)key; + + l^=k[0]; + BF_ENC(r,l,k, 1); + BF_ENC(l,r,k, 2); + BF_ENC(r,l,k, 3); + BF_ENC(l,r,k, 4); + BF_ENC(r,l,k, 5); + BF_ENC(l,r,k, 6); + BF_ENC(r,l,k, 7); + BF_ENC(l,r,k, 8); + BF_ENC(r,l,k, 9); + BF_ENC(l,r,k,10); + BF_ENC(r,l,k,11); + BF_ENC(l,r,k,12); + BF_ENC(r,l,k,13); + BF_ENC(l,r,k,14); + BF_ENC(r,l,k,15); + BF_ENC(l,r,k,16); +#if BF_ROUNDS == 20 + BF_ENC(r,l,k,17); + BF_ENC(l,r,k,18); + BF_ENC(r,l,k,19); + BF_ENC(l,r,k,20); +#endif + r^=k[BF_ROUNDS+1]; + + data[1]=l&0xffffffffL; + data[0]=r&0xffffffffL; +#endif + } + +#ifndef BF_DEFAULT_OPTIONS + +void BF_decrypt(BF_LONG *data, const BF_KEY *key) + { +#ifndef BF_PTR2 + BF_LONG l,r; + const BF_LONG *p,*s; + + p=key->P; + s= &(key->S[0]); + l=data[0]; + r=data[1]; + + l^=p[BF_ROUNDS+1]; +#if BF_ROUNDS == 20 + BF_ENC(r,l,s,p[20]); + BF_ENC(l,r,s,p[19]); + BF_ENC(r,l,s,p[18]); + BF_ENC(l,r,s,p[17]); +#endif + BF_ENC(r,l,s,p[16]); + BF_ENC(l,r,s,p[15]); + BF_ENC(r,l,s,p[14]); + BF_ENC(l,r,s,p[13]); + BF_ENC(r,l,s,p[12]); + BF_ENC(l,r,s,p[11]); + BF_ENC(r,l,s,p[10]); + BF_ENC(l,r,s,p[ 9]); + BF_ENC(r,l,s,p[ 8]); + BF_ENC(l,r,s,p[ 7]); + BF_ENC(r,l,s,p[ 6]); + BF_ENC(l,r,s,p[ 5]); + BF_ENC(r,l,s,p[ 4]); + BF_ENC(l,r,s,p[ 3]); + BF_ENC(r,l,s,p[ 2]); + BF_ENC(l,r,s,p[ 1]); + r^=p[0]; + + data[1]=l&0xffffffffL; + data[0]=r&0xffffffffL; +#else + BF_LONG l,r,t,*k; + + l=data[0]; + r=data[1]; + k=(BF_LONG *)key; + + l^=k[BF_ROUNDS+1]; +#if BF_ROUNDS == 20 + BF_ENC(r,l,k,20); + BF_ENC(l,r,k,19); + BF_ENC(r,l,k,18); + BF_ENC(l,r,k,17); +#endif + BF_ENC(r,l,k,16); + BF_ENC(l,r,k,15); + BF_ENC(r,l,k,14); + BF_ENC(l,r,k,13); + BF_ENC(r,l,k,12); + BF_ENC(l,r,k,11); + BF_ENC(r,l,k,10); + BF_ENC(l,r,k, 9); + BF_ENC(r,l,k, 8); + BF_ENC(l,r,k, 7); + BF_ENC(r,l,k, 6); + BF_ENC(l,r,k, 5); + BF_ENC(r,l,k, 4); + BF_ENC(l,r,k, 3); + BF_ENC(r,l,k, 2); + BF_ENC(l,r,k, 1); + r^=k[0]; + + data[1]=l&0xffffffffL; + data[0]=r&0xffffffffL; +#endif + } + +void BF_cbc_encrypt(const unsigned char *in, unsigned char *out, long length, + const BF_KEY *schedule, unsigned char *ivec, int encrypt) + { + BF_LONG tin0,tin1; + BF_LONG tout0,tout1,xor0,xor1; + long l=length; + BF_LONG tin[2]; + + if (encrypt) + { + n2l(ivec,tout0); + n2l(ivec,tout1); + ivec-=8; + for (l-=8; l>=0; l-=8) + { + n2l(in,tin0); + n2l(in,tin1); + tin0^=tout0; + tin1^=tout1; + tin[0]=tin0; + tin[1]=tin1; + BF_encrypt(tin,schedule); + tout0=tin[0]; + tout1=tin[1]; + l2n(tout0,out); + l2n(tout1,out); + } + if (l != -8) + { + n2ln(in,tin0,tin1,l+8); + tin0^=tout0; + tin1^=tout1; + tin[0]=tin0; + tin[1]=tin1; + BF_encrypt(tin,schedule); + tout0=tin[0]; + tout1=tin[1]; + l2n(tout0,out); + l2n(tout1,out); + } + l2n(tout0,ivec); + l2n(tout1,ivec); + } + else + { + n2l(ivec,xor0); + n2l(ivec,xor1); + ivec-=8; + for (l-=8; l>=0; l-=8) + { + n2l(in,tin0); + n2l(in,tin1); + tin[0]=tin0; + tin[1]=tin1; + BF_decrypt(tin,schedule); + tout0=tin[0]^xor0; + tout1=tin[1]^xor1; + l2n(tout0,out); + l2n(tout1,out); + xor0=tin0; + xor1=tin1; + } + if (l != -8) + { + n2l(in,tin0); + n2l(in,tin1); + tin[0]=tin0; + tin[1]=tin1; + BF_decrypt(tin,schedule); + tout0=tin[0]^xor0; + tout1=tin[1]^xor1; + l2nn(tout0,tout1,out,l+8); + xor0=tin0; + xor1=tin1; + } + l2n(xor0,ivec); + l2n(xor1,ivec); + } + tin0=tin1=tout0=tout1=xor0=xor1=0; + tin[0]=tin[1]=0; + } + +#endif diff --git a/ext/libressl/crypto/bf/bf_locl.h b/ext/libressl/crypto/bf/bf_locl.h new file mode 100644 index 0000000..0b66362 --- /dev/null +++ b/ext/libressl/crypto/bf/bf_locl.h @@ -0,0 +1,219 @@ +/* $OpenBSD: bf_locl.h,v 1.3 2014/06/12 15:49:28 deraadt Exp $ */ +/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + +#ifndef HEADER_BF_LOCL_H +#define HEADER_BF_LOCL_H +#include /* BF_PTR, BF_PTR2 */ + +#undef c2l +#define c2l(c,l) (l =((unsigned long)(*((c)++))) , \ + l|=((unsigned long)(*((c)++)))<< 8L, \ + l|=((unsigned long)(*((c)++)))<<16L, \ + l|=((unsigned long)(*((c)++)))<<24L) + +/* NOTE - c is not incremented as per c2l */ +#undef c2ln +#define c2ln(c,l1,l2,n) { \ + c+=n; \ + l1=l2=0; \ + switch (n) { \ + case 8: l2 =((unsigned long)(*(--(c))))<<24L; \ + case 7: l2|=((unsigned long)(*(--(c))))<<16L; \ + case 6: l2|=((unsigned long)(*(--(c))))<< 8L; \ + case 5: l2|=((unsigned long)(*(--(c)))); \ + case 4: l1 =((unsigned long)(*(--(c))))<<24L; \ + case 3: l1|=((unsigned long)(*(--(c))))<<16L; \ + case 2: l1|=((unsigned long)(*(--(c))))<< 8L; \ + case 1: l1|=((unsigned long)(*(--(c)))); \ + } \ + } + +#undef l2c +#define l2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \ + *((c)++)=(unsigned char)(((l)>> 8L)&0xff), \ + *((c)++)=(unsigned char)(((l)>>16L)&0xff), \ + *((c)++)=(unsigned char)(((l)>>24L)&0xff)) + +/* NOTE - c is not incremented as per l2c */ +#undef l2cn +#define l2cn(l1,l2,c,n) { \ + c+=n; \ + switch (n) { \ + case 8: *(--(c))=(unsigned char)(((l2)>>24L)&0xff); \ + case 7: *(--(c))=(unsigned char)(((l2)>>16L)&0xff); \ + case 6: *(--(c))=(unsigned char)(((l2)>> 8L)&0xff); \ + case 5: *(--(c))=(unsigned char)(((l2) )&0xff); \ + case 4: *(--(c))=(unsigned char)(((l1)>>24L)&0xff); \ + case 3: *(--(c))=(unsigned char)(((l1)>>16L)&0xff); \ + case 2: *(--(c))=(unsigned char)(((l1)>> 8L)&0xff); \ + case 1: *(--(c))=(unsigned char)(((l1) )&0xff); \ + } \ + } + +/* NOTE - c is not incremented as per n2l */ +#define n2ln(c,l1,l2,n) { \ + c+=n; \ + l1=l2=0; \ + switch (n) { \ + case 8: l2 =((unsigned long)(*(--(c)))) ; \ + case 7: l2|=((unsigned long)(*(--(c))))<< 8; \ + case 6: l2|=((unsigned long)(*(--(c))))<<16; \ + case 5: l2|=((unsigned long)(*(--(c))))<<24; \ + case 4: l1 =((unsigned long)(*(--(c)))) ; \ + case 3: l1|=((unsigned long)(*(--(c))))<< 8; \ + case 2: l1|=((unsigned long)(*(--(c))))<<16; \ + case 1: l1|=((unsigned long)(*(--(c))))<<24; \ + } \ + } + +/* NOTE - c is not incremented as per l2n */ +#define l2nn(l1,l2,c,n) { \ + c+=n; \ + switch (n) { \ + case 8: *(--(c))=(unsigned char)(((l2) )&0xff); \ + case 7: *(--(c))=(unsigned char)(((l2)>> 8)&0xff); \ + case 6: *(--(c))=(unsigned char)(((l2)>>16)&0xff); \ + case 5: *(--(c))=(unsigned char)(((l2)>>24)&0xff); \ + case 4: *(--(c))=(unsigned char)(((l1) )&0xff); \ + case 3: *(--(c))=(unsigned char)(((l1)>> 8)&0xff); \ + case 2: *(--(c))=(unsigned char)(((l1)>>16)&0xff); \ + case 1: *(--(c))=(unsigned char)(((l1)>>24)&0xff); \ + } \ + } + +#undef n2l +#define n2l(c,l) (l =((unsigned long)(*((c)++)))<<24L, \ + l|=((unsigned long)(*((c)++)))<<16L, \ + l|=((unsigned long)(*((c)++)))<< 8L, \ + l|=((unsigned long)(*((c)++)))) + +#undef l2n +#define l2n(l,c) (*((c)++)=(unsigned char)(((l)>>24L)&0xff), \ + *((c)++)=(unsigned char)(((l)>>16L)&0xff), \ + *((c)++)=(unsigned char)(((l)>> 8L)&0xff), \ + *((c)++)=(unsigned char)(((l) )&0xff)) + +/* This is actually a big endian algorithm, the most significant byte + * is used to lookup array 0 */ + +#if defined(BF_PTR2) + +/* + * This is basically a special Intel version. Point is that Intel + * doesn't have many registers, but offers a reach choice of addressing + * modes. So we spare some registers by directly traversing BF_KEY + * structure and hiring the most decorated addressing mode. The code + * generated by EGCS is *perfectly* competitive with assembler + * implementation! + */ +#define BF_ENC(LL,R,KEY,Pi) (\ + LL^=KEY[Pi], \ + t= KEY[BF_ROUNDS+2 + 0 + ((R>>24)&0xFF)], \ + t+= KEY[BF_ROUNDS+2 + 256 + ((R>>16)&0xFF)], \ + t^= KEY[BF_ROUNDS+2 + 512 + ((R>>8 )&0xFF)], \ + t+= KEY[BF_ROUNDS+2 + 768 + ((R )&0xFF)], \ + LL^=t \ + ) + +#elif defined(BF_PTR) + +#ifndef BF_LONG_LOG2 +#define BF_LONG_LOG2 2 /* default to BF_LONG being 32 bits */ +#endif +#define BF_M (0xFF<>BF_i)&BF_M gets folded into a single instruction, namely + * rlwinm. So let'em double-check if their compiler does it. + */ + +#define BF_ENC(LL,R,S,P) ( \ + LL^=P, \ + LL^= (((*(BF_LONG *)((unsigned char *)&(S[ 0])+((R>>BF_0)&BF_M))+ \ + *(BF_LONG *)((unsigned char *)&(S[256])+((R>>BF_1)&BF_M)))^ \ + *(BF_LONG *)((unsigned char *)&(S[512])+((R>>BF_2)&BF_M)))+ \ + *(BF_LONG *)((unsigned char *)&(S[768])+((R<>24)&0xff)] + \ + S[0x0100+((int)(R>>16)&0xff)])^ \ + S[0x0200+((int)(R>> 8)&0xff)])+ \ + S[0x0300+((int)(R )&0xff)])&0xffffffffL \ + ) +#endif + +#endif diff --git a/ext/libressl/crypto/bf/bf_ofb64.c b/ext/libressl/crypto/bf/bf_ofb64.c new file mode 100644 index 0000000..9e33162 --- /dev/null +++ b/ext/libressl/crypto/bf/bf_ofb64.c @@ -0,0 +1,110 @@ +/* $OpenBSD: bf_ofb64.c,v 1.5 2014/10/28 07:35:58 jsg Exp $ */ +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + +#include +#include "bf_locl.h" + +/* The input and output encrypted as though 64bit ofb mode is being + * used. The extra state information to record how much of the + * 64bit block we have used is contained in *num; + */ +void BF_ofb64_encrypt(const unsigned char *in, unsigned char *out, long length, + const BF_KEY *schedule, unsigned char *ivec, int *num) + { + BF_LONG v0,v1,t; + int n= *num; + long l=length; + unsigned char d[8]; + char *dp; + BF_LONG ti[2]; + unsigned char *iv; + int save=0; + + iv=(unsigned char *)ivec; + n2l(iv,v0); + n2l(iv,v1); + ti[0]=v0; + ti[1]=v1; + dp=(char *)d; + l2n(v0,dp); + l2n(v1,dp); + while (l--) + { + if (n == 0) + { + BF_encrypt((BF_LONG *)ti,schedule); + dp=(char *)d; + t=ti[0]; l2n(t,dp); + t=ti[1]; l2n(t,dp); + save++; + } + *(out++)= *(in++)^d[n]; + n=(n+1)&0x07; + } + if (save) + { + v0=ti[0]; + v1=ti[1]; + iv=(unsigned char *)ivec; + l2n(v0,iv); + l2n(v1,iv); + } + t=v0=v1=ti[0]=ti[1]=0; + *num=n; + } + diff --git a/ext/libressl/crypto/bf/bf_pi.h b/ext/libressl/crypto/bf/bf_pi.h new file mode 100644 index 0000000..ce4843a --- /dev/null +++ b/ext/libressl/crypto/bf/bf_pi.h @@ -0,0 +1,328 @@ +/* $OpenBSD: bf_pi.h,v 1.4 2016/12/21 15:49:29 jsing Exp $ */ +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + +__BEGIN_HIDDEN_DECLS + +static const BF_KEY bf_init= { + { + 0x243f6a88L, 0x85a308d3L, 0x13198a2eL, 0x03707344L, + 0xa4093822L, 0x299f31d0L, 0x082efa98L, 0xec4e6c89L, + 0x452821e6L, 0x38d01377L, 0xbe5466cfL, 0x34e90c6cL, + 0xc0ac29b7L, 0xc97c50ddL, 0x3f84d5b5L, 0xb5470917L, + 0x9216d5d9L, 0x8979fb1b + },{ + 0xd1310ba6L, 0x98dfb5acL, 0x2ffd72dbL, 0xd01adfb7L, + 0xb8e1afedL, 0x6a267e96L, 0xba7c9045L, 0xf12c7f99L, + 0x24a19947L, 0xb3916cf7L, 0x0801f2e2L, 0x858efc16L, + 0x636920d8L, 0x71574e69L, 0xa458fea3L, 0xf4933d7eL, + 0x0d95748fL, 0x728eb658L, 0x718bcd58L, 0x82154aeeL, + 0x7b54a41dL, 0xc25a59b5L, 0x9c30d539L, 0x2af26013L, + 0xc5d1b023L, 0x286085f0L, 0xca417918L, 0xb8db38efL, + 0x8e79dcb0L, 0x603a180eL, 0x6c9e0e8bL, 0xb01e8a3eL, + 0xd71577c1L, 0xbd314b27L, 0x78af2fdaL, 0x55605c60L, + 0xe65525f3L, 0xaa55ab94L, 0x57489862L, 0x63e81440L, + 0x55ca396aL, 0x2aab10b6L, 0xb4cc5c34L, 0x1141e8ceL, + 0xa15486afL, 0x7c72e993L, 0xb3ee1411L, 0x636fbc2aL, + 0x2ba9c55dL, 0x741831f6L, 0xce5c3e16L, 0x9b87931eL, + 0xafd6ba33L, 0x6c24cf5cL, 0x7a325381L, 0x28958677L, + 0x3b8f4898L, 0x6b4bb9afL, 0xc4bfe81bL, 0x66282193L, + 0x61d809ccL, 0xfb21a991L, 0x487cac60L, 0x5dec8032L, + 0xef845d5dL, 0xe98575b1L, 0xdc262302L, 0xeb651b88L, + 0x23893e81L, 0xd396acc5L, 0x0f6d6ff3L, 0x83f44239L, + 0x2e0b4482L, 0xa4842004L, 0x69c8f04aL, 0x9e1f9b5eL, + 0x21c66842L, 0xf6e96c9aL, 0x670c9c61L, 0xabd388f0L, + 0x6a51a0d2L, 0xd8542f68L, 0x960fa728L, 0xab5133a3L, + 0x6eef0b6cL, 0x137a3be4L, 0xba3bf050L, 0x7efb2a98L, + 0xa1f1651dL, 0x39af0176L, 0x66ca593eL, 0x82430e88L, + 0x8cee8619L, 0x456f9fb4L, 0x7d84a5c3L, 0x3b8b5ebeL, + 0xe06f75d8L, 0x85c12073L, 0x401a449fL, 0x56c16aa6L, + 0x4ed3aa62L, 0x363f7706L, 0x1bfedf72L, 0x429b023dL, + 0x37d0d724L, 0xd00a1248L, 0xdb0fead3L, 0x49f1c09bL, + 0x075372c9L, 0x80991b7bL, 0x25d479d8L, 0xf6e8def7L, + 0xe3fe501aL, 0xb6794c3bL, 0x976ce0bdL, 0x04c006baL, + 0xc1a94fb6L, 0x409f60c4L, 0x5e5c9ec2L, 0x196a2463L, + 0x68fb6fafL, 0x3e6c53b5L, 0x1339b2ebL, 0x3b52ec6fL, + 0x6dfc511fL, 0x9b30952cL, 0xcc814544L, 0xaf5ebd09L, + 0xbee3d004L, 0xde334afdL, 0x660f2807L, 0x192e4bb3L, + 0xc0cba857L, 0x45c8740fL, 0xd20b5f39L, 0xb9d3fbdbL, + 0x5579c0bdL, 0x1a60320aL, 0xd6a100c6L, 0x402c7279L, + 0x679f25feL, 0xfb1fa3ccL, 0x8ea5e9f8L, 0xdb3222f8L, + 0x3c7516dfL, 0xfd616b15L, 0x2f501ec8L, 0xad0552abL, + 0x323db5faL, 0xfd238760L, 0x53317b48L, 0x3e00df82L, + 0x9e5c57bbL, 0xca6f8ca0L, 0x1a87562eL, 0xdf1769dbL, + 0xd542a8f6L, 0x287effc3L, 0xac6732c6L, 0x8c4f5573L, + 0x695b27b0L, 0xbbca58c8L, 0xe1ffa35dL, 0xb8f011a0L, + 0x10fa3d98L, 0xfd2183b8L, 0x4afcb56cL, 0x2dd1d35bL, + 0x9a53e479L, 0xb6f84565L, 0xd28e49bcL, 0x4bfb9790L, + 0xe1ddf2daL, 0xa4cb7e33L, 0x62fb1341L, 0xcee4c6e8L, + 0xef20cadaL, 0x36774c01L, 0xd07e9efeL, 0x2bf11fb4L, + 0x95dbda4dL, 0xae909198L, 0xeaad8e71L, 0x6b93d5a0L, + 0xd08ed1d0L, 0xafc725e0L, 0x8e3c5b2fL, 0x8e7594b7L, + 0x8ff6e2fbL, 0xf2122b64L, 0x8888b812L, 0x900df01cL, + 0x4fad5ea0L, 0x688fc31cL, 0xd1cff191L, 0xb3a8c1adL, + 0x2f2f2218L, 0xbe0e1777L, 0xea752dfeL, 0x8b021fa1L, + 0xe5a0cc0fL, 0xb56f74e8L, 0x18acf3d6L, 0xce89e299L, + 0xb4a84fe0L, 0xfd13e0b7L, 0x7cc43b81L, 0xd2ada8d9L, + 0x165fa266L, 0x80957705L, 0x93cc7314L, 0x211a1477L, + 0xe6ad2065L, 0x77b5fa86L, 0xc75442f5L, 0xfb9d35cfL, + 0xebcdaf0cL, 0x7b3e89a0L, 0xd6411bd3L, 0xae1e7e49L, + 0x00250e2dL, 0x2071b35eL, 0x226800bbL, 0x57b8e0afL, + 0x2464369bL, 0xf009b91eL, 0x5563911dL, 0x59dfa6aaL, + 0x78c14389L, 0xd95a537fL, 0x207d5ba2L, 0x02e5b9c5L, + 0x83260376L, 0x6295cfa9L, 0x11c81968L, 0x4e734a41L, + 0xb3472dcaL, 0x7b14a94aL, 0x1b510052L, 0x9a532915L, + 0xd60f573fL, 0xbc9bc6e4L, 0x2b60a476L, 0x81e67400L, + 0x08ba6fb5L, 0x571be91fL, 0xf296ec6bL, 0x2a0dd915L, + 0xb6636521L, 0xe7b9f9b6L, 0xff34052eL, 0xc5855664L, + 0x53b02d5dL, 0xa99f8fa1L, 0x08ba4799L, 0x6e85076aL, + 0x4b7a70e9L, 0xb5b32944L, 0xdb75092eL, 0xc4192623L, + 0xad6ea6b0L, 0x49a7df7dL, 0x9cee60b8L, 0x8fedb266L, + 0xecaa8c71L, 0x699a17ffL, 0x5664526cL, 0xc2b19ee1L, + 0x193602a5L, 0x75094c29L, 0xa0591340L, 0xe4183a3eL, + 0x3f54989aL, 0x5b429d65L, 0x6b8fe4d6L, 0x99f73fd6L, + 0xa1d29c07L, 0xefe830f5L, 0x4d2d38e6L, 0xf0255dc1L, + 0x4cdd2086L, 0x8470eb26L, 0x6382e9c6L, 0x021ecc5eL, + 0x09686b3fL, 0x3ebaefc9L, 0x3c971814L, 0x6b6a70a1L, + 0x687f3584L, 0x52a0e286L, 0xb79c5305L, 0xaa500737L, + 0x3e07841cL, 0x7fdeae5cL, 0x8e7d44ecL, 0x5716f2b8L, + 0xb03ada37L, 0xf0500c0dL, 0xf01c1f04L, 0x0200b3ffL, + 0xae0cf51aL, 0x3cb574b2L, 0x25837a58L, 0xdc0921bdL, + 0xd19113f9L, 0x7ca92ff6L, 0x94324773L, 0x22f54701L, + 0x3ae5e581L, 0x37c2dadcL, 0xc8b57634L, 0x9af3dda7L, + 0xa9446146L, 0x0fd0030eL, 0xecc8c73eL, 0xa4751e41L, + 0xe238cd99L, 0x3bea0e2fL, 0x3280bba1L, 0x183eb331L, + 0x4e548b38L, 0x4f6db908L, 0x6f420d03L, 0xf60a04bfL, + 0x2cb81290L, 0x24977c79L, 0x5679b072L, 0xbcaf89afL, + 0xde9a771fL, 0xd9930810L, 0xb38bae12L, 0xdccf3f2eL, + 0x5512721fL, 0x2e6b7124L, 0x501adde6L, 0x9f84cd87L, + 0x7a584718L, 0x7408da17L, 0xbc9f9abcL, 0xe94b7d8cL, + 0xec7aec3aL, 0xdb851dfaL, 0x63094366L, 0xc464c3d2L, + 0xef1c1847L, 0x3215d908L, 0xdd433b37L, 0x24c2ba16L, + 0x12a14d43L, 0x2a65c451L, 0x50940002L, 0x133ae4ddL, + 0x71dff89eL, 0x10314e55L, 0x81ac77d6L, 0x5f11199bL, + 0x043556f1L, 0xd7a3c76bL, 0x3c11183bL, 0x5924a509L, + 0xf28fe6edL, 0x97f1fbfaL, 0x9ebabf2cL, 0x1e153c6eL, + 0x86e34570L, 0xeae96fb1L, 0x860e5e0aL, 0x5a3e2ab3L, + 0x771fe71cL, 0x4e3d06faL, 0x2965dcb9L, 0x99e71d0fL, + 0x803e89d6L, 0x5266c825L, 0x2e4cc978L, 0x9c10b36aL, + 0xc6150ebaL, 0x94e2ea78L, 0xa5fc3c53L, 0x1e0a2df4L, + 0xf2f74ea7L, 0x361d2b3dL, 0x1939260fL, 0x19c27960L, + 0x5223a708L, 0xf71312b6L, 0xebadfe6eL, 0xeac31f66L, + 0xe3bc4595L, 0xa67bc883L, 0xb17f37d1L, 0x018cff28L, + 0xc332ddefL, 0xbe6c5aa5L, 0x65582185L, 0x68ab9802L, + 0xeecea50fL, 0xdb2f953bL, 0x2aef7dadL, 0x5b6e2f84L, + 0x1521b628L, 0x29076170L, 0xecdd4775L, 0x619f1510L, + 0x13cca830L, 0xeb61bd96L, 0x0334fe1eL, 0xaa0363cfL, + 0xb5735c90L, 0x4c70a239L, 0xd59e9e0bL, 0xcbaade14L, + 0xeecc86bcL, 0x60622ca7L, 0x9cab5cabL, 0xb2f3846eL, + 0x648b1eafL, 0x19bdf0caL, 0xa02369b9L, 0x655abb50L, + 0x40685a32L, 0x3c2ab4b3L, 0x319ee9d5L, 0xc021b8f7L, + 0x9b540b19L, 0x875fa099L, 0x95f7997eL, 0x623d7da8L, + 0xf837889aL, 0x97e32d77L, 0x11ed935fL, 0x16681281L, + 0x0e358829L, 0xc7e61fd6L, 0x96dedfa1L, 0x7858ba99L, + 0x57f584a5L, 0x1b227263L, 0x9b83c3ffL, 0x1ac24696L, + 0xcdb30aebL, 0x532e3054L, 0x8fd948e4L, 0x6dbc3128L, + 0x58ebf2efL, 0x34c6ffeaL, 0xfe28ed61L, 0xee7c3c73L, + 0x5d4a14d9L, 0xe864b7e3L, 0x42105d14L, 0x203e13e0L, + 0x45eee2b6L, 0xa3aaabeaL, 0xdb6c4f15L, 0xfacb4fd0L, + 0xc742f442L, 0xef6abbb5L, 0x654f3b1dL, 0x41cd2105L, + 0xd81e799eL, 0x86854dc7L, 0xe44b476aL, 0x3d816250L, + 0xcf62a1f2L, 0x5b8d2646L, 0xfc8883a0L, 0xc1c7b6a3L, + 0x7f1524c3L, 0x69cb7492L, 0x47848a0bL, 0x5692b285L, + 0x095bbf00L, 0xad19489dL, 0x1462b174L, 0x23820e00L, + 0x58428d2aL, 0x0c55f5eaL, 0x1dadf43eL, 0x233f7061L, + 0x3372f092L, 0x8d937e41L, 0xd65fecf1L, 0x6c223bdbL, + 0x7cde3759L, 0xcbee7460L, 0x4085f2a7L, 0xce77326eL, + 0xa6078084L, 0x19f8509eL, 0xe8efd855L, 0x61d99735L, + 0xa969a7aaL, 0xc50c06c2L, 0x5a04abfcL, 0x800bcadcL, + 0x9e447a2eL, 0xc3453484L, 0xfdd56705L, 0x0e1e9ec9L, + 0xdb73dbd3L, 0x105588cdL, 0x675fda79L, 0xe3674340L, + 0xc5c43465L, 0x713e38d8L, 0x3d28f89eL, 0xf16dff20L, + 0x153e21e7L, 0x8fb03d4aL, 0xe6e39f2bL, 0xdb83adf7L, + 0xe93d5a68L, 0x948140f7L, 0xf64c261cL, 0x94692934L, + 0x411520f7L, 0x7602d4f7L, 0xbcf46b2eL, 0xd4a20068L, + 0xd4082471L, 0x3320f46aL, 0x43b7d4b7L, 0x500061afL, + 0x1e39f62eL, 0x97244546L, 0x14214f74L, 0xbf8b8840L, + 0x4d95fc1dL, 0x96b591afL, 0x70f4ddd3L, 0x66a02f45L, + 0xbfbc09ecL, 0x03bd9785L, 0x7fac6dd0L, 0x31cb8504L, + 0x96eb27b3L, 0x55fd3941L, 0xda2547e6L, 0xabca0a9aL, + 0x28507825L, 0x530429f4L, 0x0a2c86daL, 0xe9b66dfbL, + 0x68dc1462L, 0xd7486900L, 0x680ec0a4L, 0x27a18deeL, + 0x4f3ffea2L, 0xe887ad8cL, 0xb58ce006L, 0x7af4d6b6L, + 0xaace1e7cL, 0xd3375fecL, 0xce78a399L, 0x406b2a42L, + 0x20fe9e35L, 0xd9f385b9L, 0xee39d7abL, 0x3b124e8bL, + 0x1dc9faf7L, 0x4b6d1856L, 0x26a36631L, 0xeae397b2L, + 0x3a6efa74L, 0xdd5b4332L, 0x6841e7f7L, 0xca7820fbL, + 0xfb0af54eL, 0xd8feb397L, 0x454056acL, 0xba489527L, + 0x55533a3aL, 0x20838d87L, 0xfe6ba9b7L, 0xd096954bL, + 0x55a867bcL, 0xa1159a58L, 0xcca92963L, 0x99e1db33L, + 0xa62a4a56L, 0x3f3125f9L, 0x5ef47e1cL, 0x9029317cL, + 0xfdf8e802L, 0x04272f70L, 0x80bb155cL, 0x05282ce3L, + 0x95c11548L, 0xe4c66d22L, 0x48c1133fL, 0xc70f86dcL, + 0x07f9c9eeL, 0x41041f0fL, 0x404779a4L, 0x5d886e17L, + 0x325f51ebL, 0xd59bc0d1L, 0xf2bcc18fL, 0x41113564L, + 0x257b7834L, 0x602a9c60L, 0xdff8e8a3L, 0x1f636c1bL, + 0x0e12b4c2L, 0x02e1329eL, 0xaf664fd1L, 0xcad18115L, + 0x6b2395e0L, 0x333e92e1L, 0x3b240b62L, 0xeebeb922L, + 0x85b2a20eL, 0xe6ba0d99L, 0xde720c8cL, 0x2da2f728L, + 0xd0127845L, 0x95b794fdL, 0x647d0862L, 0xe7ccf5f0L, + 0x5449a36fL, 0x877d48faL, 0xc39dfd27L, 0xf33e8d1eL, + 0x0a476341L, 0x992eff74L, 0x3a6f6eabL, 0xf4f8fd37L, + 0xa812dc60L, 0xa1ebddf8L, 0x991be14cL, 0xdb6e6b0dL, + 0xc67b5510L, 0x6d672c37L, 0x2765d43bL, 0xdcd0e804L, + 0xf1290dc7L, 0xcc00ffa3L, 0xb5390f92L, 0x690fed0bL, + 0x667b9ffbL, 0xcedb7d9cL, 0xa091cf0bL, 0xd9155ea3L, + 0xbb132f88L, 0x515bad24L, 0x7b9479bfL, 0x763bd6ebL, + 0x37392eb3L, 0xcc115979L, 0x8026e297L, 0xf42e312dL, + 0x6842ada7L, 0xc66a2b3bL, 0x12754cccL, 0x782ef11cL, + 0x6a124237L, 0xb79251e7L, 0x06a1bbe6L, 0x4bfb6350L, + 0x1a6b1018L, 0x11caedfaL, 0x3d25bdd8L, 0xe2e1c3c9L, + 0x44421659L, 0x0a121386L, 0xd90cec6eL, 0xd5abea2aL, + 0x64af674eL, 0xda86a85fL, 0xbebfe988L, 0x64e4c3feL, + 0x9dbc8057L, 0xf0f7c086L, 0x60787bf8L, 0x6003604dL, + 0xd1fd8346L, 0xf6381fb0L, 0x7745ae04L, 0xd736fcccL, + 0x83426b33L, 0xf01eab71L, 0xb0804187L, 0x3c005e5fL, + 0x77a057beL, 0xbde8ae24L, 0x55464299L, 0xbf582e61L, + 0x4e58f48fL, 0xf2ddfda2L, 0xf474ef38L, 0x8789bdc2L, + 0x5366f9c3L, 0xc8b38e74L, 0xb475f255L, 0x46fcd9b9L, + 0x7aeb2661L, 0x8b1ddf84L, 0x846a0e79L, 0x915f95e2L, + 0x466e598eL, 0x20b45770L, 0x8cd55591L, 0xc902de4cL, + 0xb90bace1L, 0xbb8205d0L, 0x11a86248L, 0x7574a99eL, + 0xb77f19b6L, 0xe0a9dc09L, 0x662d09a1L, 0xc4324633L, + 0xe85a1f02L, 0x09f0be8cL, 0x4a99a025L, 0x1d6efe10L, + 0x1ab93d1dL, 0x0ba5a4dfL, 0xa186f20fL, 0x2868f169L, + 0xdcb7da83L, 0x573906feL, 0xa1e2ce9bL, 0x4fcd7f52L, + 0x50115e01L, 0xa70683faL, 0xa002b5c4L, 0x0de6d027L, + 0x9af88c27L, 0x773f8641L, 0xc3604c06L, 0x61a806b5L, + 0xf0177a28L, 0xc0f586e0L, 0x006058aaL, 0x30dc7d62L, + 0x11e69ed7L, 0x2338ea63L, 0x53c2dd94L, 0xc2c21634L, + 0xbbcbee56L, 0x90bcb6deL, 0xebfc7da1L, 0xce591d76L, + 0x6f05e409L, 0x4b7c0188L, 0x39720a3dL, 0x7c927c24L, + 0x86e3725fL, 0x724d9db9L, 0x1ac15bb4L, 0xd39eb8fcL, + 0xed545578L, 0x08fca5b5L, 0xd83d7cd3L, 0x4dad0fc4L, + 0x1e50ef5eL, 0xb161e6f8L, 0xa28514d9L, 0x6c51133cL, + 0x6fd5c7e7L, 0x56e14ec4L, 0x362abfceL, 0xddc6c837L, + 0xd79a3234L, 0x92638212L, 0x670efa8eL, 0x406000e0L, + 0x3a39ce37L, 0xd3faf5cfL, 0xabc27737L, 0x5ac52d1bL, + 0x5cb0679eL, 0x4fa33742L, 0xd3822740L, 0x99bc9bbeL, + 0xd5118e9dL, 0xbf0f7315L, 0xd62d1c7eL, 0xc700c47bL, + 0xb78c1b6bL, 0x21a19045L, 0xb26eb1beL, 0x6a366eb4L, + 0x5748ab2fL, 0xbc946e79L, 0xc6a376d2L, 0x6549c2c8L, + 0x530ff8eeL, 0x468dde7dL, 0xd5730a1dL, 0x4cd04dc6L, + 0x2939bbdbL, 0xa9ba4650L, 0xac9526e8L, 0xbe5ee304L, + 0xa1fad5f0L, 0x6a2d519aL, 0x63ef8ce2L, 0x9a86ee22L, + 0xc089c2b8L, 0x43242ef6L, 0xa51e03aaL, 0x9cf2d0a4L, + 0x83c061baL, 0x9be96a4dL, 0x8fe51550L, 0xba645bd6L, + 0x2826a2f9L, 0xa73a3ae1L, 0x4ba99586L, 0xef5562e9L, + 0xc72fefd3L, 0xf752f7daL, 0x3f046f69L, 0x77fa0a59L, + 0x80e4a915L, 0x87b08601L, 0x9b09e6adL, 0x3b3ee593L, + 0xe990fd5aL, 0x9e34d797L, 0x2cf0b7d9L, 0x022b8b51L, + 0x96d5ac3aL, 0x017da67dL, 0xd1cf3ed6L, 0x7c7d2d28L, + 0x1f9f25cfL, 0xadf2b89bL, 0x5ad6b472L, 0x5a88f54cL, + 0xe029ac71L, 0xe019a5e6L, 0x47b0acfdL, 0xed93fa9bL, + 0xe8d3c48dL, 0x283b57ccL, 0xf8d56629L, 0x79132e28L, + 0x785f0191L, 0xed756055L, 0xf7960e44L, 0xe3d35e8cL, + 0x15056dd4L, 0x88f46dbaL, 0x03a16125L, 0x0564f0bdL, + 0xc3eb9e15L, 0x3c9057a2L, 0x97271aecL, 0xa93a072aL, + 0x1b3f6d9bL, 0x1e6321f5L, 0xf59c66fbL, 0x26dcf319L, + 0x7533d928L, 0xb155fdf5L, 0x03563482L, 0x8aba3cbbL, + 0x28517711L, 0xc20ad9f8L, 0xabcc5167L, 0xccad925fL, + 0x4de81751L, 0x3830dc8eL, 0x379d5862L, 0x9320f991L, + 0xea7a90c2L, 0xfb3e7bceL, 0x5121ce64L, 0x774fbe32L, + 0xa8b6e37eL, 0xc3293d46L, 0x48de5369L, 0x6413e680L, + 0xa2ae0810L, 0xdd6db224L, 0x69852dfdL, 0x09072166L, + 0xb39a460aL, 0x6445c0ddL, 0x586cdecfL, 0x1c20c8aeL, + 0x5bbef7ddL, 0x1b588d40L, 0xccd2017fL, 0x6bb4e3bbL, + 0xdda26a7eL, 0x3a59ff45L, 0x3e350a44L, 0xbcb4cdd5L, + 0x72eacea8L, 0xfa6484bbL, 0x8d6612aeL, 0xbf3c6f47L, + 0xd29be463L, 0x542f5d9eL, 0xaec2771bL, 0xf64e6370L, + 0x740e0d8dL, 0xe75b1357L, 0xf8721671L, 0xaf537d5dL, + 0x4040cb08L, 0x4eb4e2ccL, 0x34d2466aL, 0x0115af84L, + 0xe1b00428L, 0x95983a1dL, 0x06b89fb4L, 0xce6ea048L, + 0x6f3f3b82L, 0x3520ab82L, 0x011a1d4bL, 0x277227f8L, + 0x611560b1L, 0xe7933fdcL, 0xbb3a792bL, 0x344525bdL, + 0xa08839e1L, 0x51ce794bL, 0x2f32c9b7L, 0xa01fbac9L, + 0xe01cc87eL, 0xbcc7d1f6L, 0xcf0111c3L, 0xa1e8aac7L, + 0x1a908749L, 0xd44fbd9aL, 0xd0dadecbL, 0xd50ada38L, + 0x0339c32aL, 0xc6913667L, 0x8df9317cL, 0xe0b12b4fL, + 0xf79e59b7L, 0x43f5bb3aL, 0xf2d519ffL, 0x27d9459cL, + 0xbf97222cL, 0x15e6fc2aL, 0x0f91fc71L, 0x9b941525L, + 0xfae59361L, 0xceb69cebL, 0xc2a86459L, 0x12baa8d1L, + 0xb6c1075eL, 0xe3056a0cL, 0x10d25065L, 0xcb03a442L, + 0xe0ec6e0eL, 0x1698db3bL, 0x4c98a0beL, 0x3278e964L, + 0x9f1f9532L, 0xe0d392dfL, 0xd3a0342bL, 0x8971f21eL, + 0x1b0a7441L, 0x4ba3348cL, 0xc5be7120L, 0xc37632d8L, + 0xdf359f8dL, 0x9b992f2eL, 0xe60b6f47L, 0x0fe3f11dL, + 0xe54cda54L, 0x1edad891L, 0xce6279cfL, 0xcd3e7e6fL, + 0x1618b166L, 0xfd2c1d05L, 0x848fd2c5L, 0xf6fb2299L, + 0xf523f357L, 0xa6327623L, 0x93a83531L, 0x56cccd02L, + 0xacf08162L, 0x5a75ebb5L, 0x6e163697L, 0x88d273ccL, + 0xde966292L, 0x81b949d0L, 0x4c50901bL, 0x71c65614L, + 0xe6c6c7bdL, 0x327a140aL, 0x45e1d006L, 0xc3f27b9aL, + 0xc9aa53fdL, 0x62a80f00L, 0xbb25bfe2L, 0x35bdd2f6L, + 0x71126905L, 0xb2040222L, 0xb6cbcf7cL, 0xcd769c2bL, + 0x53113ec0L, 0x1640e3d3L, 0x38abbd60L, 0x2547adf0L, + 0xba38209cL, 0xf746ce76L, 0x77afa1c5L, 0x20756060L, + 0x85cbfe4eL, 0x8ae88dd8L, 0x7aaaf9b0L, 0x4cf9aa7eL, + 0x1948c25cL, 0x02fb8a8cL, 0x01c36ae4L, 0xd6ebe1f9L, + 0x90d4f869L, 0xa65cdea0L, 0x3f09252dL, 0xc208e69fL, + 0xb74e6132L, 0xce77e25bL, 0x578fdfe3L, 0x3ac372e6L, + } + }; + +__END_HIDDEN_DECLS diff --git a/ext/libressl/crypto/bf/bf_skey.c b/ext/libressl/crypto/bf/bf_skey.c new file mode 100644 index 0000000..8191d17 --- /dev/null +++ b/ext/libressl/crypto/bf/bf_skey.c @@ -0,0 +1,117 @@ +/* $OpenBSD: bf_skey.c,v 1.12 2014/06/12 15:49:28 deraadt Exp $ */ +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + +#include +#include +#include +#include +#include "bf_locl.h" +#include "bf_pi.h" + +void BF_set_key(BF_KEY *key, int len, const unsigned char *data) + { + int i; + BF_LONG *p,ri,in[2]; + const unsigned char *d,*end; + + + memcpy(key,&bf_init,sizeof(BF_KEY)); + p=key->P; + + if (len > ((BF_ROUNDS+2)*4)) len=(BF_ROUNDS+2)*4; + + d=data; + end= &(data[len]); + for (i=0; i<(BF_ROUNDS+2); i++) + { + ri= *(d++); + if (d >= end) d=data; + + ri<<=8; + ri|= *(d++); + if (d >= end) d=data; + + ri<<=8; + ri|= *(d++); + if (d >= end) d=data; + + ri<<=8; + ri|= *(d++); + if (d >= end) d=data; + + p[i]^=ri; + } + + in[0]=0L; + in[1]=0L; + for (i=0; i<(BF_ROUNDS+2); i+=2) + { + BF_encrypt(in,key); + p[i ]=in[0]; + p[i+1]=in[1]; + } + + p=key->S; + for (i=0; i<4*256; i+=2) + { + BF_encrypt(in,key); + p[i ]=in[0]; + p[i+1]=in[1]; + } + } + diff --git a/ext/libressl/crypto/chacha/Makefile b/ext/libressl/crypto/chacha/Makefile new file mode 100644 index 0000000..2eb56cb --- /dev/null +++ b/ext/libressl/crypto/chacha/Makefile @@ -0,0 +1,13 @@ +include ../../ssl_common.mk + +obj = chacha.o + + +all: $(obj) +dep: all + +%.o: %.c + $(CC) $(CFLAGS) -c $< + +clean: + rm -f *.o *.a diff --git a/ext/libressl/crypto/chacha/chacha-merged.c b/ext/libressl/crypto/chacha/chacha-merged.c new file mode 100644 index 0000000..67508f2 --- /dev/null +++ b/ext/libressl/crypto/chacha/chacha-merged.c @@ -0,0 +1,325 @@ +/* $OpenBSD: chacha-merged.c,v 1.9 2019/01/22 00:59:21 dlg Exp $ */ +/* +chacha-merged.c version 20080118 +D. J. Bernstein +Public domain. +*/ + +#include + +#include + +#define CHACHA_MINKEYLEN 16 +#define CHACHA_NONCELEN 8 +#define CHACHA_CTRLEN 8 +#define CHACHA_STATELEN (CHACHA_NONCELEN+CHACHA_CTRLEN) +#define CHACHA_BLOCKLEN 64 + +struct chacha_ctx { + u_int input[16]; + uint8_t ks[CHACHA_BLOCKLEN]; + uint8_t unused; +}; + +static inline void chacha_keysetup(struct chacha_ctx *x, const u_char *k, + u_int kbits) + __attribute__((__bounded__(__minbytes__, 2, CHACHA_MINKEYLEN))); +static inline void chacha_ivsetup(struct chacha_ctx *x, const u_char *iv, + const u_char *ctr) + __attribute__((__bounded__(__minbytes__, 2, CHACHA_NONCELEN))) + __attribute__((__bounded__(__minbytes__, 3, CHACHA_CTRLEN))); +static inline void chacha_encrypt_bytes(struct chacha_ctx *x, const u_char *m, + u_char *c, u_int bytes) + __attribute__((__bounded__(__buffer__, 2, 4))) + __attribute__((__bounded__(__buffer__, 3, 4))); + +typedef unsigned char u8; +typedef unsigned int u32; + +typedef struct chacha_ctx chacha_ctx; + +#define U8C(v) (v##U) +#define U32C(v) (v##U) + +#define U8V(v) ((u8)(v) & U8C(0xFF)) +#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF)) + +#define ROTL32(v, n) \ + (U32V((v) << (n)) | ((v) >> (32 - (n)))) + +#define U8TO32_LITTLE(p) \ + (((u32)((p)[0])) | \ + ((u32)((p)[1]) << 8) | \ + ((u32)((p)[2]) << 16) | \ + ((u32)((p)[3]) << 24)) + +#define U32TO8_LITTLE(p, v) \ + do { \ + (p)[0] = U8V((v)); \ + (p)[1] = U8V((v) >> 8); \ + (p)[2] = U8V((v) >> 16); \ + (p)[3] = U8V((v) >> 24); \ + } while (0) + +#define ROTATE(v,c) (ROTL32(v,c)) +#define XOR(v,w) ((v) ^ (w)) +#define PLUS(v,w) (U32V((v) + (w))) +#define PLUSONE(v) (PLUS((v),1)) + +#define QUARTERROUND(a,b,c,d) \ + a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \ + c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \ + a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \ + c = PLUS(c,d); b = ROTATE(XOR(b,c), 7); + +/* Initialise with "expand 32-byte k". */ +static const char sigma[16] = { + 0x65, 0x78, 0x70, 0x61, 0x6e, 0x64, 0x20, 0x33, + 0x32, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20, 0x6b, +}; + +/* Initialise with "expand 16-byte k". */ +static const char tau[16] = { + 0x65, 0x78, 0x70, 0x61, 0x6e, 0x64, 0x20, 0x31, + 0x36, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20, 0x6b, +}; + +static inline void +chacha_keysetup(chacha_ctx *x, const u8 *k, u32 kbits) +{ + const char *constants; + + x->input[4] = U8TO32_LITTLE(k + 0); + x->input[5] = U8TO32_LITTLE(k + 4); + x->input[6] = U8TO32_LITTLE(k + 8); + x->input[7] = U8TO32_LITTLE(k + 12); + if (kbits == 256) { /* recommended */ + k += 16; + constants = sigma; + } else { /* kbits == 128 */ + constants = tau; + } + x->input[8] = U8TO32_LITTLE(k + 0); + x->input[9] = U8TO32_LITTLE(k + 4); + x->input[10] = U8TO32_LITTLE(k + 8); + x->input[11] = U8TO32_LITTLE(k + 12); + x->input[0] = U8TO32_LITTLE(constants + 0); + x->input[1] = U8TO32_LITTLE(constants + 4); + x->input[2] = U8TO32_LITTLE(constants + 8); + x->input[3] = U8TO32_LITTLE(constants + 12); +} + +static inline void +chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter) +{ + x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0); + x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4); + x->input[14] = U8TO32_LITTLE(iv + 0); + x->input[15] = U8TO32_LITTLE(iv + 4); +} + +static inline void +chacha_encrypt_bytes(chacha_ctx *x, const u8 *m, u8 *c, u32 bytes) +{ + u32 x0, x1, x2, x3, x4, x5, x6, x7; + u32 x8, x9, x10, x11, x12, x13, x14, x15; + u32 j0, j1, j2, j3, j4, j5, j6, j7; + u32 j8, j9, j10, j11, j12, j13, j14, j15; + u8 *ctarget = NULL; + u8 tmp[64]; + u_int i; + + if (!bytes) + return; + + j0 = x->input[0]; + j1 = x->input[1]; + j2 = x->input[2]; + j3 = x->input[3]; + j4 = x->input[4]; + j5 = x->input[5]; + j6 = x->input[6]; + j7 = x->input[7]; + j8 = x->input[8]; + j9 = x->input[9]; + j10 = x->input[10]; + j11 = x->input[11]; + j12 = x->input[12]; + j13 = x->input[13]; + j14 = x->input[14]; + j15 = x->input[15]; + + for (;;) { + if (bytes < 64) { + for (i = 0; i < bytes; ++i) + tmp[i] = m[i]; + m = tmp; + ctarget = c; + c = tmp; + } + x0 = j0; + x1 = j1; + x2 = j2; + x3 = j3; + x4 = j4; + x5 = j5; + x6 = j6; + x7 = j7; + x8 = j8; + x9 = j9; + x10 = j10; + x11 = j11; + x12 = j12; + x13 = j13; + x14 = j14; + x15 = j15; + for (i = 20; i > 0; i -= 2) { + QUARTERROUND(x0, x4, x8, x12) + QUARTERROUND(x1, x5, x9, x13) + QUARTERROUND(x2, x6, x10, x14) + QUARTERROUND(x3, x7, x11, x15) + QUARTERROUND(x0, x5, x10, x15) + QUARTERROUND(x1, x6, x11, x12) + QUARTERROUND(x2, x7, x8, x13) + QUARTERROUND(x3, x4, x9, x14) + } + x0 = PLUS(x0, j0); + x1 = PLUS(x1, j1); + x2 = PLUS(x2, j2); + x3 = PLUS(x3, j3); + x4 = PLUS(x4, j4); + x5 = PLUS(x5, j5); + x6 = PLUS(x6, j6); + x7 = PLUS(x7, j7); + x8 = PLUS(x8, j8); + x9 = PLUS(x9, j9); + x10 = PLUS(x10, j10); + x11 = PLUS(x11, j11); + x12 = PLUS(x12, j12); + x13 = PLUS(x13, j13); + x14 = PLUS(x14, j14); + x15 = PLUS(x15, j15); + + if (bytes < 64) { + U32TO8_LITTLE(x->ks + 0, x0); + U32TO8_LITTLE(x->ks + 4, x1); + U32TO8_LITTLE(x->ks + 8, x2); + U32TO8_LITTLE(x->ks + 12, x3); + U32TO8_LITTLE(x->ks + 16, x4); + U32TO8_LITTLE(x->ks + 20, x5); + U32TO8_LITTLE(x->ks + 24, x6); + U32TO8_LITTLE(x->ks + 28, x7); + U32TO8_LITTLE(x->ks + 32, x8); + U32TO8_LITTLE(x->ks + 36, x9); + U32TO8_LITTLE(x->ks + 40, x10); + U32TO8_LITTLE(x->ks + 44, x11); + U32TO8_LITTLE(x->ks + 48, x12); + U32TO8_LITTLE(x->ks + 52, x13); + U32TO8_LITTLE(x->ks + 56, x14); + U32TO8_LITTLE(x->ks + 60, x15); + } + + x0 = XOR(x0, U8TO32_LITTLE(m + 0)); + x1 = XOR(x1, U8TO32_LITTLE(m + 4)); + x2 = XOR(x2, U8TO32_LITTLE(m + 8)); + x3 = XOR(x3, U8TO32_LITTLE(m + 12)); + x4 = XOR(x4, U8TO32_LITTLE(m + 16)); + x5 = XOR(x5, U8TO32_LITTLE(m + 20)); + x6 = XOR(x6, U8TO32_LITTLE(m + 24)); + x7 = XOR(x7, U8TO32_LITTLE(m + 28)); + x8 = XOR(x8, U8TO32_LITTLE(m + 32)); + x9 = XOR(x9, U8TO32_LITTLE(m + 36)); + x10 = XOR(x10, U8TO32_LITTLE(m + 40)); + x11 = XOR(x11, U8TO32_LITTLE(m + 44)); + x12 = XOR(x12, U8TO32_LITTLE(m + 48)); + x13 = XOR(x13, U8TO32_LITTLE(m + 52)); + x14 = XOR(x14, U8TO32_LITTLE(m + 56)); + x15 = XOR(x15, U8TO32_LITTLE(m + 60)); + + j12 = PLUSONE(j12); + if (!j12) { + j13 = PLUSONE(j13); + /* + * Stopping at 2^70 bytes per nonce is the user's + * responsibility. + */ + } + + U32TO8_LITTLE(c + 0, x0); + U32TO8_LITTLE(c + 4, x1); + U32TO8_LITTLE(c + 8, x2); + U32TO8_LITTLE(c + 12, x3); + U32TO8_LITTLE(c + 16, x4); + U32TO8_LITTLE(c + 20, x5); + U32TO8_LITTLE(c + 24, x6); + U32TO8_LITTLE(c + 28, x7); + U32TO8_LITTLE(c + 32, x8); + U32TO8_LITTLE(c + 36, x9); + U32TO8_LITTLE(c + 40, x10); + U32TO8_LITTLE(c + 44, x11); + U32TO8_LITTLE(c + 48, x12); + U32TO8_LITTLE(c + 52, x13); + U32TO8_LITTLE(c + 56, x14); + U32TO8_LITTLE(c + 60, x15); + + if (bytes <= 64) { + if (bytes < 64) { + for (i = 0; i < bytes; ++i) + ctarget[i] = c[i]; + } + x->input[12] = j12; + x->input[13] = j13; + x->unused = 64 - bytes; + return; + } + bytes -= 64; + c += 64; + m += 64; + } +} + +void +CRYPTO_hchacha_20(unsigned char subkey[32], const unsigned char key[32], + const unsigned char nonce[16]) +{ + uint32_t x[16]; + int i; + + x[0] = U8TO32_LITTLE(sigma + 0); + x[1] = U8TO32_LITTLE(sigma + 4); + x[2] = U8TO32_LITTLE(sigma + 8); + x[3] = U8TO32_LITTLE(sigma + 12); + x[4] = U8TO32_LITTLE(key + 0); + x[5] = U8TO32_LITTLE(key + 4); + x[6] = U8TO32_LITTLE(key + 8); + x[7] = U8TO32_LITTLE(key + 12); + x[8] = U8TO32_LITTLE(key + 16); + x[9] = U8TO32_LITTLE(key + 20); + x[10] = U8TO32_LITTLE(key + 24); + x[11] = U8TO32_LITTLE(key + 28); + x[12] = U8TO32_LITTLE(nonce + 0); + x[13] = U8TO32_LITTLE(nonce + 4); + x[14] = U8TO32_LITTLE(nonce + 8); + x[15] = U8TO32_LITTLE(nonce + 12); + + for (i = 20; i > 0; i -= 2) { + QUARTERROUND(x[0], x[4], x[8], x[12]) + QUARTERROUND(x[1], x[5], x[9], x[13]) + QUARTERROUND(x[2], x[6], x[10], x[14]) + QUARTERROUND(x[3], x[7], x[11], x[15]) + QUARTERROUND(x[0], x[5], x[10], x[15]) + QUARTERROUND(x[1], x[6], x[11], x[12]) + QUARTERROUND(x[2], x[7], x[8], x[13]) + QUARTERROUND(x[3], x[4], x[9], x[14]) + } + + U32TO8_LITTLE(subkey + 0, x[0]); + U32TO8_LITTLE(subkey + 4, x[1]); + U32TO8_LITTLE(subkey + 8, x[2]); + U32TO8_LITTLE(subkey + 12, x[3]); + + U32TO8_LITTLE(subkey + 16, x[12]); + U32TO8_LITTLE(subkey + 20, x[13]); + U32TO8_LITTLE(subkey + 24, x[14]); + U32TO8_LITTLE(subkey + 28, x[15]); +} diff --git a/ext/libressl/crypto/chacha/chacha.c b/ext/libressl/crypto/chacha/chacha.c new file mode 100644 index 0000000..f62a84d --- /dev/null +++ b/ext/libressl/crypto/chacha/chacha.c @@ -0,0 +1,87 @@ +/* $OpenBSD: chacha.c,v 1.8 2019/01/22 00:59:21 dlg Exp $ */ +/* + * Copyright (c) 2014 Joel Sing + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include + +#include + +#include "chacha-merged.c" + +void +ChaCha_set_key(ChaCha_ctx *ctx, const unsigned char *key, unsigned int keybits) +{ + chacha_keysetup((chacha_ctx *)ctx, key, keybits); + ctx->unused = 0; +} + +void +ChaCha_set_iv(ChaCha_ctx *ctx, const unsigned char *iv, + const unsigned char *counter) +{ + chacha_ivsetup((chacha_ctx *)ctx, iv, counter); + ctx->unused = 0; +} + +void +ChaCha(ChaCha_ctx *ctx, unsigned char *out, const unsigned char *in, size_t len) +{ + unsigned char *k; + int i, l; + + /* Consume remaining keystream, if any exists. */ + if (ctx->unused > 0) { + k = ctx->ks + 64 - ctx->unused; + l = (len > ctx->unused) ? ctx->unused : len; + for (i = 0; i < l; i++) + *(out++) = *(in++) ^ *(k++); + ctx->unused -= l; + len -= l; + } + + chacha_encrypt_bytes((chacha_ctx *)ctx, in, out, (uint32_t)len); +} + +void +CRYPTO_chacha_20(unsigned char *out, const unsigned char *in, size_t len, + const unsigned char key[32], const unsigned char iv[8], uint64_t counter) +{ + struct chacha_ctx ctx; + + /* + * chacha_ivsetup expects the counter to be in u8. Rather than + * converting size_t to u8 and then back again, pass a counter of + * NULL and manually assign it afterwards. + */ + chacha_keysetup(&ctx, key, 256); + chacha_ivsetup(&ctx, iv, NULL); + if (counter != 0) { + ctx.input[12] = (uint32_t)counter; + ctx.input[13] = (uint32_t)(counter >> 32); + } + + chacha_encrypt_bytes(&ctx, in, out, (uint32_t)len); +} + +void +CRYPTO_xchacha_20(unsigned char *out, const unsigned char *in, size_t len, + const unsigned char key[32], const unsigned char iv[24]) +{ + uint8_t subkey[32]; + + CRYPTO_hchacha_20(subkey, key, iv); + CRYPTO_chacha_20(out, in, len, subkey, iv + 16, 0); +} diff --git a/ext/libressl/crypto/compat/Makefile b/ext/libressl/crypto/compat/Makefile new file mode 100644 index 0000000..00e3a67 --- /dev/null +++ b/ext/libressl/crypto/compat/Makefile @@ -0,0 +1,13 @@ +include ../../ssl_common.mk + +obj = arc4random.o explicit_bzero.o timingsafe_bcmp.o timingsafe_memcmp.o + + +all: $(obj) +dep: all + +%.o: %.c + $(CC) $(CFLAGS) -c $< + +clean: + rm -f *.o *.a diff --git a/ext/libressl/crypto/compat/arc4random.c b/ext/libressl/crypto/compat/arc4random.c new file mode 100644 index 0000000..67a47f6 --- /dev/null +++ b/ext/libressl/crypto/compat/arc4random.c @@ -0,0 +1,216 @@ +/* $OpenBSD: arc4random.c,v 1.55 2019/03/24 17:56:54 deraadt Exp $ */ + +/* + * Copyright (c) 1996, David Mazieres + * Copyright (c) 2008, Damien Miller + * Copyright (c) 2013, Markus Friedl + * Copyright (c) 2014, Theo de Raadt + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * ChaCha based random number generator for OpenBSD. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define KEYSTREAM_ONLY +#include "chacha_private.h" + +#define minimum(a, b) ((a) < (b) ? (a) : (b)) + +#if defined(__GNUC__) || defined(_MSC_VER) +#define inline __inline +#else /* __GNUC__ || _MSC_VER */ +#define inline +#endif /* !__GNUC__ && !_MSC_VER */ + +#define KEYSZ 32 +#define IVSZ 8 +#define BLOCKSZ 64 +#if defined(__FE310__) +#define RSBLKS 16 +#define RSBUFSZ (BLOCKSZ) +#else +#define RSBUFSZ (16*BLOCKSZ) +#endif + +/* Marked MAP_INHERIT_ZERO, so zero'd out in fork children. */ +static struct _rs { + size_t rs_have; /* valid bytes at end of rs_buf */ + size_t rs_count; /* bytes till reseed */ +#if defined(__FE310__) + size_t rs_blocks; /* blocks till rekey */ +#endif +} *rs; + +/* Maybe be preserved in fork children, if _rs_allocate() decides. */ +static struct _rsx { + chacha_ctx rs_chacha; /* chacha context for random keystream */ + u_char rs_buf[RSBUFSZ]; /* keystream blocks */ +} *rsx; + +static inline int _rs_allocate(struct _rs **, struct _rsx **); +static inline void _rs_forkdetect(void); +#include "arc4random.h" + +static inline void _rs_rekey(u_char *dat, size_t datlen); + +static inline void +_rs_init(u_char *buf, size_t n) +{ + if (n < KEYSZ + IVSZ) + return; + + if (rs == NULL) { + if (_rs_allocate(&rs, &rsx) == -1) + _exit(1); +#if defined(__FE310__) + rs->rs_blocks = (RSBLKS - 1); +#endif + } + + chacha_keysetup(&rsx->rs_chacha, buf, KEYSZ * 8, 0); + chacha_ivsetup(&rsx->rs_chacha, buf + KEYSZ); +} + +static void +_rs_stir(void) +{ + u_char rnd[KEYSZ + IVSZ]; + + if (getentropy(rnd, sizeof rnd) == -1) + _getentropy_fail(); + + if (!rs) + _rs_init(rnd, sizeof(rnd)); + else + _rs_rekey(rnd, sizeof(rnd)); + explicit_bzero(rnd, sizeof(rnd)); /* discard source seed */ + + /* invalidate rs_buf */ + rs->rs_have = 0; + memset(rsx->rs_buf, 0, sizeof(rsx->rs_buf)); + + rs->rs_count = 1600000; +} + +static inline void +_rs_stir_if_needed(size_t len) +{ + _rs_forkdetect(); + if (!rs || rs->rs_count <= len) + _rs_stir(); + if (rs->rs_count <= len) + rs->rs_count = 0; + else + rs->rs_count -= len; +} + +static inline void +_rs_rekey(u_char *dat, size_t datlen) +{ +#ifndef KEYSTREAM_ONLY + memset(rsx->rs_buf, 0, sizeof(rsx->rs_buf)); +#endif + /* fill rs_buf with the keystream */ + chacha_encrypt_bytes(&rsx->rs_chacha, rsx->rs_buf, + rsx->rs_buf, sizeof(rsx->rs_buf)); + /* mix in optional user provided data */ + if (dat) { + size_t i, m; + + m = minimum(datlen, KEYSZ + IVSZ); + for (i = 0; i < m; i++) + rsx->rs_buf[i] ^= dat[i]; + } +#if defined(__FE310__) + if (dat || (rs->rs_blocks == 0)) { + rs->rs_blocks = (RSBLKS - 1); + } else { + rs->rs_blocks--; + rs->rs_have = sizeof(rsx->rs_buf); + return; + } +#endif + /* immediately reinit for backtracking resistance */ + _rs_init(rsx->rs_buf, KEYSZ + IVSZ); + memset(rsx->rs_buf, 0, KEYSZ + IVSZ); + rs->rs_have = sizeof(rsx->rs_buf) - KEYSZ - IVSZ; +} + +static inline void +_rs_random_buf(void *_buf, size_t n) +{ + u_char *buf = (u_char *)_buf; + u_char *keystream; + size_t m; + + _rs_stir_if_needed(n); + while (n > 0) { + if (rs->rs_have > 0) { + m = minimum(n, rs->rs_have); + keystream = rsx->rs_buf + sizeof(rsx->rs_buf) + - rs->rs_have; + memcpy(buf, keystream, m); + memset(keystream, 0, m); + buf += m; + n -= m; + rs->rs_have -= m; + } + if (rs->rs_have == 0) + _rs_rekey(NULL, 0); + } +} + +static inline void +_rs_random_u32(uint32_t *val) +{ + u_char *keystream; + + _rs_stir_if_needed(sizeof(*val)); + if (rs->rs_have < sizeof(*val)) + _rs_rekey(NULL, 0); + keystream = rsx->rs_buf + sizeof(rsx->rs_buf) - rs->rs_have; + memcpy(val, keystream, sizeof(*val)); + memset(keystream, 0, sizeof(*val)); + rs->rs_have -= sizeof(*val); +} + +uint32_t +arc4random(void) +{ + uint32_t val; + + _ARC4_LOCK(); + _rs_random_u32(&val); + _ARC4_UNLOCK(); + return val; +} + +void +arc4random_buf(void *buf, size_t n) +{ + _ARC4_LOCK(); + _rs_random_buf(buf, n); + _ARC4_UNLOCK(); +} diff --git a/ext/libressl/crypto/compat/arc4random.h b/ext/libressl/crypto/compat/arc4random.h new file mode 100644 index 0000000..8a308a9 --- /dev/null +++ b/ext/libressl/crypto/compat/arc4random.h @@ -0,0 +1,41 @@ +#ifndef LIBCRYPTOCOMPAT_ARC4RANDOM_H +#define LIBCRYPTOCOMPAT_ARC4RANDOM_H + +#include + +#if defined(_AIX) +#include "arc4random_aix.h" + +#elif defined(__FreeBSD__) +#include "arc4random_freebsd.h" + +#elif defined(__hpux) +#include "arc4random_hpux.h" + +#elif defined(__linux__) +#include "arc4random_linux.h" + +#elif defined(__midipix__) +#include "arc4random_linux.h" + +#elif defined(__NetBSD__) +#include "arc4random_netbsd.h" + +#elif defined(__APPLE__) +#include "arc4random_osx.h" + +#elif defined(__sun) +#include "arc4random_solaris.h" + +#elif defined(_WIN32) +#include "arc4random_win.h" + +#elif defined(__FE310__) +#include "arc4random_fe310.h" + +#else +#error "No arc4random hooks defined for this platform." + +#endif + +#endif diff --git a/ext/libressl/crypto/compat/arc4random_aix.h b/ext/libressl/crypto/compat/arc4random_aix.h new file mode 100644 index 0000000..3142a1f --- /dev/null +++ b/ext/libressl/crypto/compat/arc4random_aix.h @@ -0,0 +1,81 @@ +/* $OpenBSD: arc4random_aix.h,v 1.2 2016/06/30 12:19:51 bcook Exp $ */ + +/* + * Copyright (c) 1996, David Mazieres + * Copyright (c) 2008, Damien Miller + * Copyright (c) 2013, Markus Friedl + * Copyright (c) 2014, Theo de Raadt + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Stub functions for portability. + */ + +#include + +#include +#include + +static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER; +#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx) +#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx) + +#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f)) + +static inline void +_getentropy_fail(void) +{ + raise(SIGKILL); +} + +static volatile sig_atomic_t _rs_forked; + +static inline void +_rs_forkhandler(void) +{ + _rs_forked = 1; +} + +static inline void +_rs_forkdetect(void) +{ + static pid_t _rs_pid = 0; + pid_t pid = getpid(); + + if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) { + _rs_pid = pid; + _rs_forked = 0; + if (rs) + memset(rs, 0, sizeof(*rs)); + } +} + +static inline int +_rs_allocate(struct _rs **rsp, struct _rsx **rsxp) +{ + if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) + return (-1); + + if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) { + munmap(*rsp, sizeof(**rsp)); + *rsp = NULL; + return (-1); + } + + _ARC4_ATFORK(_rs_forkhandler); + return (0); +} diff --git a/ext/libressl/crypto/compat/arc4random_fe310.h b/ext/libressl/crypto/compat/arc4random_fe310.h new file mode 100644 index 0000000..131b0d3 --- /dev/null +++ b/ext/libressl/crypto/compat/arc4random_fe310.h @@ -0,0 +1,27 @@ +#define _ARC4_LOCK() ; +#define _ARC4_UNLOCK() ; + +static inline void +_getentropy_fail(void) +{ + _exit(1); +} + +static inline void +_rs_forkdetect(void) +{ +} + +int arc4random_alloc(void **rsp, size_t rsp_size, void **rsxp, size_t rsxp_size); + +static inline int +_rs_allocate(struct _rs **rsp, struct _rsx **rsxp) +{ + return arc4random_alloc((void **)rsp, sizeof(**rsp), (void **)rsxp, sizeof(**rsxp)); +} + +void arc4random_close(void) +{ + rs = NULL; + rsx = NULL; +} \ No newline at end of file diff --git a/ext/libressl/crypto/compat/arc4random_freebsd.h b/ext/libressl/crypto/compat/arc4random_freebsd.h new file mode 100644 index 0000000..3faa5e4 --- /dev/null +++ b/ext/libressl/crypto/compat/arc4random_freebsd.h @@ -0,0 +1,87 @@ +/* $OpenBSD: arc4random_freebsd.h,v 1.4 2016/06/30 12:19:51 bcook Exp $ */ + +/* + * Copyright (c) 1996, David Mazieres + * Copyright (c) 2008, Damien Miller + * Copyright (c) 2013, Markus Friedl + * Copyright (c) 2014, Theo de Raadt + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Stub functions for portability. + */ + +#include + +#include +#include + +static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER; +#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx) +#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx) + +/* + * Unfortunately, pthread_atfork() is broken on FreeBSD (at least 9 and 10) if + * a program does not link to -lthr. Callbacks registered with pthread_atfork() + * appear to fail silently. So, it is not always possible to detect a PID + * wraparound. + */ +#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f)) + +static inline void +_getentropy_fail(void) +{ + raise(SIGKILL); +} + +static volatile sig_atomic_t _rs_forked; + +static inline void +_rs_forkhandler(void) +{ + _rs_forked = 1; +} + +static inline void +_rs_forkdetect(void) +{ + static pid_t _rs_pid = 0; + pid_t pid = getpid(); + + if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) { + _rs_pid = pid; + _rs_forked = 0; + if (rs) + memset(rs, 0, sizeof(*rs)); + } +} + +static inline int +_rs_allocate(struct _rs **rsp, struct _rsx **rsxp) +{ + if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) + return (-1); + + if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) { + munmap(*rsp, sizeof(**rsp)); + *rsp = NULL; + return (-1); + } + + _ARC4_ATFORK(_rs_forkhandler); + return (0); +} diff --git a/ext/libressl/crypto/compat/arc4random_hpux.h b/ext/libressl/crypto/compat/arc4random_hpux.h new file mode 100644 index 0000000..2a3fe8c --- /dev/null +++ b/ext/libressl/crypto/compat/arc4random_hpux.h @@ -0,0 +1,81 @@ +/* $OpenBSD: arc4random_hpux.h,v 1.3 2016/06/30 12:19:51 bcook Exp $ */ + +/* + * Copyright (c) 1996, David Mazieres + * Copyright (c) 2008, Damien Miller + * Copyright (c) 2013, Markus Friedl + * Copyright (c) 2014, Theo de Raadt + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Stub functions for portability. + */ + +#include + +#include +#include + +static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER; +#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx) +#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx) + +#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f)) + +static inline void +_getentropy_fail(void) +{ + raise(SIGKILL); +} + +static volatile sig_atomic_t _rs_forked; + +static inline void +_rs_forkhandler(void) +{ + _rs_forked = 1; +} + +static inline void +_rs_forkdetect(void) +{ + static pid_t _rs_pid = 0; + pid_t pid = getpid(); + + if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) { + _rs_pid = pid; + _rs_forked = 0; + if (rs) + memset(rs, 0, sizeof(*rs)); + } +} + +static inline int +_rs_allocate(struct _rs **rsp, struct _rsx **rsxp) +{ + if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) + return (-1); + + if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) { + munmap(*rsp, sizeof(**rsp)); + *rsp = NULL; + return (-1); + } + + _ARC4_ATFORK(_rs_forkhandler); + return (0); +} diff --git a/ext/libressl/crypto/compat/arc4random_linux.h b/ext/libressl/crypto/compat/arc4random_linux.h new file mode 100644 index 0000000..5e1cf34 --- /dev/null +++ b/ext/libressl/crypto/compat/arc4random_linux.h @@ -0,0 +1,88 @@ +/* $OpenBSD: arc4random_linux.h,v 1.12 2019/07/11 10:37:28 inoguchi Exp $ */ + +/* + * Copyright (c) 1996, David Mazieres + * Copyright (c) 2008, Damien Miller + * Copyright (c) 2013, Markus Friedl + * Copyright (c) 2014, Theo de Raadt + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Stub functions for portability. + */ + +#include + +#include +#include + +static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER; +#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx) +#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx) + +#if defined(__GLIBC__) && !(defined(__UCLIBC__) && !defined(__ARCH_USE_MMU__)) +extern void *__dso_handle; +extern int __register_atfork(void (*)(void), void(*)(void), void (*)(void), void *); +#define _ARC4_ATFORK(f) __register_atfork(NULL, NULL, (f), __dso_handle) +#else +#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f)) +#endif + +static inline void +_getentropy_fail(void) +{ + raise(SIGKILL); +} + +static volatile sig_atomic_t _rs_forked; + +static inline void +_rs_forkhandler(void) +{ + _rs_forked = 1; +} + +static inline void +_rs_forkdetect(void) +{ + static pid_t _rs_pid = 0; + pid_t pid = getpid(); + + /* XXX unusual calls to clone() can bypass checks */ + if (_rs_pid == 0 || _rs_pid == 1 || _rs_pid != pid || _rs_forked) { + _rs_pid = pid; + _rs_forked = 0; + if (rs) + memset(rs, 0, sizeof(*rs)); + } +} + +static inline int +_rs_allocate(struct _rs **rsp, struct _rsx **rsxp) +{ + if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) + return (-1); + + if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) { + munmap(*rsp, sizeof(**rsp)); + *rsp = NULL; + return (-1); + } + + _ARC4_ATFORK(_rs_forkhandler); + return (0); +} diff --git a/ext/libressl/crypto/compat/arc4random_netbsd.h b/ext/libressl/crypto/compat/arc4random_netbsd.h new file mode 100644 index 0000000..611997d --- /dev/null +++ b/ext/libressl/crypto/compat/arc4random_netbsd.h @@ -0,0 +1,87 @@ +/* $OpenBSD: arc4random_netbsd.h,v 1.3 2016/06/30 12:19:51 bcook Exp $ */ + +/* + * Copyright (c) 1996, David Mazieres + * Copyright (c) 2008, Damien Miller + * Copyright (c) 2013, Markus Friedl + * Copyright (c) 2014, Theo de Raadt + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Stub functions for portability. + */ + +#include + +#include +#include + +static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER; +#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx) +#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx) + +/* + * Unfortunately, pthread_atfork() is broken on FreeBSD (at least 9 and 10) if + * a program does not link to -lthr. Callbacks registered with pthread_atfork() + * appear to fail silently. So, it is not always possible to detect a PID + * wraparound. + */ +#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f)) + +static inline void +_getentropy_fail(void) +{ + raise(SIGKILL); +} + +static volatile sig_atomic_t _rs_forked; + +static inline void +_rs_forkhandler(void) +{ + _rs_forked = 1; +} + +static inline void +_rs_forkdetect(void) +{ + static pid_t _rs_pid = 0; + pid_t pid = getpid(); + + if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) { + _rs_pid = pid; + _rs_forked = 0; + if (rs) + memset(rs, 0, sizeof(*rs)); + } +} + +static inline int +_rs_allocate(struct _rs **rsp, struct _rsx **rsxp) +{ + if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) + return (-1); + + if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) { + munmap(*rsp, sizeof(**rsp)); + *rsp = NULL; + return (-1); + } + + _ARC4_ATFORK(_rs_forkhandler); + return (0); +} diff --git a/ext/libressl/crypto/compat/arc4random_osx.h b/ext/libressl/crypto/compat/arc4random_osx.h new file mode 100644 index 0000000..818ae6b --- /dev/null +++ b/ext/libressl/crypto/compat/arc4random_osx.h @@ -0,0 +1,81 @@ +/* $OpenBSD: arc4random_osx.h,v 1.11 2016/06/30 12:19:51 bcook Exp $ */ + +/* + * Copyright (c) 1996, David Mazieres + * Copyright (c) 2008, Damien Miller + * Copyright (c) 2013, Markus Friedl + * Copyright (c) 2014, Theo de Raadt + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Stub functions for portability. + */ + +#include + +#include +#include + +static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER; +#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx) +#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx) + +#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f)) + +static inline void +_getentropy_fail(void) +{ + raise(SIGKILL); +} + +static volatile sig_atomic_t _rs_forked; + +static inline void +_rs_forkhandler(void) +{ + _rs_forked = 1; +} + +static inline void +_rs_forkdetect(void) +{ + static pid_t _rs_pid = 0; + pid_t pid = getpid(); + + if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) { + _rs_pid = pid; + _rs_forked = 0; + if (rs) + memset(rs, 0, sizeof(*rs)); + } +} + +static inline int +_rs_allocate(struct _rs **rsp, struct _rsx **rsxp) +{ + if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) + return (-1); + + if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) { + munmap(*rsp, sizeof(**rsp)); + *rsp = NULL; + return (-1); + } + + _ARC4_ATFORK(_rs_forkhandler); + return (0); +} diff --git a/ext/libressl/crypto/compat/arc4random_solaris.h b/ext/libressl/crypto/compat/arc4random_solaris.h new file mode 100644 index 0000000..b1084cd --- /dev/null +++ b/ext/libressl/crypto/compat/arc4random_solaris.h @@ -0,0 +1,81 @@ +/* $OpenBSD: arc4random_solaris.h,v 1.10 2016/06/30 12:19:51 bcook Exp $ */ + +/* + * Copyright (c) 1996, David Mazieres + * Copyright (c) 2008, Damien Miller + * Copyright (c) 2013, Markus Friedl + * Copyright (c) 2014, Theo de Raadt + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Stub functions for portability. + */ + +#include + +#include +#include + +static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER; +#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx) +#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx) + +#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f)) + +static inline void +_getentropy_fail(void) +{ + raise(SIGKILL); +} + +static volatile sig_atomic_t _rs_forked; + +static inline void +_rs_forkhandler(void) +{ + _rs_forked = 1; +} + +static inline void +_rs_forkdetect(void) +{ + static pid_t _rs_pid = 0; + pid_t pid = getpid(); + + if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) { + _rs_pid = pid; + _rs_forked = 0; + if (rs) + memset(rs, 0, sizeof(*rs)); + } +} + +static inline int +_rs_allocate(struct _rs **rsp, struct _rsx **rsxp) +{ + if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) + return (-1); + + if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) { + munmap(*rsp, sizeof(**rsp)); + *rsp = NULL; + return (-1); + } + + _ARC4_ATFORK(_rs_forkhandler); + return (0); +} diff --git a/ext/libressl/crypto/compat/arc4random_uniform.c b/ext/libressl/crypto/compat/arc4random_uniform.c new file mode 100644 index 0000000..06cd29c --- /dev/null +++ b/ext/libressl/crypto/compat/arc4random_uniform.c @@ -0,0 +1,56 @@ +/* $OpenBSD: arc4random_uniform.c,v 1.3 2019/01/20 02:59:07 bcook Exp $ */ + +/* + * Copyright (c) 2008, Damien Miller + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include + +/* + * Calculate a uniformly distributed random number less than upper_bound + * avoiding "modulo bias". + * + * Uniformity is achieved by generating new random numbers until the one + * returned is outside the range [0, 2**32 % upper_bound). This + * guarantees the selected random number will be inside + * [2**32 % upper_bound, 2**32) which maps back to [0, upper_bound) + * after reduction modulo upper_bound. + */ +uint32_t +arc4random_uniform(uint32_t upper_bound) +{ + uint32_t r, min; + + if (upper_bound < 2) + return 0; + + /* 2**32 % x == (2**32 - x) % x */ + min = -upper_bound % upper_bound; + + /* + * This could theoretically loop forever but each retry has + * p > 0.5 (worst case, usually far better) of selecting a + * number inside the range we need, so it should rarely need + * to re-roll. + */ + for (;;) { + r = arc4random(); + if (r >= min) + break; + } + + return r % upper_bound; +} diff --git a/ext/libressl/crypto/compat/arc4random_win.h b/ext/libressl/crypto/compat/arc4random_win.h new file mode 100644 index 0000000..deec8a1 --- /dev/null +++ b/ext/libressl/crypto/compat/arc4random_win.h @@ -0,0 +1,78 @@ +/* $OpenBSD: arc4random_win.h,v 1.6 2016/06/30 12:17:29 bcook Exp $ */ + +/* + * Copyright (c) 1996, David Mazieres + * Copyright (c) 2008, Damien Miller + * Copyright (c) 2013, Markus Friedl + * Copyright (c) 2014, Theo de Raadt + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Stub functions for portability. + */ + +#include + +static volatile HANDLE arc4random_mtx = NULL; + +/* + * Initialize the mutex on the first lock attempt. On collision, each thread + * will attempt to allocate a mutex and compare-and-swap it into place as the + * global mutex. On failure to swap in the global mutex, the mutex is closed. + */ +#define _ARC4_LOCK() { \ + if (!arc4random_mtx) { \ + HANDLE p = CreateMutex(NULL, FALSE, NULL); \ + if (InterlockedCompareExchangePointer((void **)&arc4random_mtx, (void *)p, NULL)) \ + CloseHandle(p); \ + } \ + WaitForSingleObject(arc4random_mtx, INFINITE); \ +} \ + +#define _ARC4_UNLOCK() ReleaseMutex(arc4random_mtx) + +static inline void +_getentropy_fail(void) +{ + TerminateProcess(GetCurrentProcess(), 0); +} + +static inline int +_rs_allocate(struct _rs **rsp, struct _rsx **rsxp) +{ + *rsp = VirtualAlloc(NULL, sizeof(**rsp), + MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); + if (*rsp == NULL) + return (-1); + + *rsxp = VirtualAlloc(NULL, sizeof(**rsxp), + MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); + if (*rsxp == NULL) { + VirtualFree(*rsp, 0, MEM_RELEASE); + *rsp = NULL; + return (-1); + } + return (0); +} + +static inline void +_rs_forkhandler(void) +{ +} + +static inline void +_rs_forkdetect(void) +{ +} diff --git a/ext/libressl/crypto/compat/chacha_private.h b/ext/libressl/crypto/compat/chacha_private.h new file mode 100644 index 0000000..7c3680f --- /dev/null +++ b/ext/libressl/crypto/compat/chacha_private.h @@ -0,0 +1,222 @@ +/* +chacha-merged.c version 20080118 +D. J. Bernstein +Public domain. +*/ + +/* $OpenBSD: chacha_private.h,v 1.2 2013/10/04 07:02:27 djm Exp $ */ + +typedef unsigned char u8; +typedef unsigned int u32; + +typedef struct +{ + u32 input[16]; /* could be compressed */ +} chacha_ctx; + +#define U8C(v) (v##U) +#define U32C(v) (v##U) + +#define U8V(v) ((u8)(v) & U8C(0xFF)) +#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF)) + +#define ROTL32(v, n) \ + (U32V((v) << (n)) | ((v) >> (32 - (n)))) + +#define U8TO32_LITTLE(p) \ + (((u32)((p)[0]) ) | \ + ((u32)((p)[1]) << 8) | \ + ((u32)((p)[2]) << 16) | \ + ((u32)((p)[3]) << 24)) + +#define U32TO8_LITTLE(p, v) \ + do { \ + (p)[0] = U8V((v) ); \ + (p)[1] = U8V((v) >> 8); \ + (p)[2] = U8V((v) >> 16); \ + (p)[3] = U8V((v) >> 24); \ + } while (0) + +#define ROTATE(v,c) (ROTL32(v,c)) +#define XOR(v,w) ((v) ^ (w)) +#define PLUS(v,w) (U32V((v) + (w))) +#define PLUSONE(v) (PLUS((v),1)) + +#define QUARTERROUND(a,b,c,d) \ + a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \ + c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \ + a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \ + c = PLUS(c,d); b = ROTATE(XOR(b,c), 7); + +static const char sigma[16] = "expand 32-byte k"; +static const char tau[16] = "expand 16-byte k"; + +static void +chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits,u32 ivbits) +{ + const char *constants; + + x->input[4] = U8TO32_LITTLE(k + 0); + x->input[5] = U8TO32_LITTLE(k + 4); + x->input[6] = U8TO32_LITTLE(k + 8); + x->input[7] = U8TO32_LITTLE(k + 12); + if (kbits == 256) { /* recommended */ + k += 16; + constants = sigma; + } else { /* kbits == 128 */ + constants = tau; + } + x->input[8] = U8TO32_LITTLE(k + 0); + x->input[9] = U8TO32_LITTLE(k + 4); + x->input[10] = U8TO32_LITTLE(k + 8); + x->input[11] = U8TO32_LITTLE(k + 12); + x->input[0] = U8TO32_LITTLE(constants + 0); + x->input[1] = U8TO32_LITTLE(constants + 4); + x->input[2] = U8TO32_LITTLE(constants + 8); + x->input[3] = U8TO32_LITTLE(constants + 12); +} + +static void +chacha_ivsetup(chacha_ctx *x,const u8 *iv) +{ + x->input[12] = 0; + x->input[13] = 0; + x->input[14] = U8TO32_LITTLE(iv + 0); + x->input[15] = U8TO32_LITTLE(iv + 4); +} + +static void +chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes) +{ + u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; + u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; + u8 *ctarget = NULL; + u8 tmp[64]; + u_int i; + + if (!bytes) return; + + j0 = x->input[0]; + j1 = x->input[1]; + j2 = x->input[2]; + j3 = x->input[3]; + j4 = x->input[4]; + j5 = x->input[5]; + j6 = x->input[6]; + j7 = x->input[7]; + j8 = x->input[8]; + j9 = x->input[9]; + j10 = x->input[10]; + j11 = x->input[11]; + j12 = x->input[12]; + j13 = x->input[13]; + j14 = x->input[14]; + j15 = x->input[15]; + + for (;;) { + if (bytes < 64) { + for (i = 0;i < bytes;++i) tmp[i] = m[i]; + m = tmp; + ctarget = c; + c = tmp; + } + x0 = j0; + x1 = j1; + x2 = j2; + x3 = j3; + x4 = j4; + x5 = j5; + x6 = j6; + x7 = j7; + x8 = j8; + x9 = j9; + x10 = j10; + x11 = j11; + x12 = j12; + x13 = j13; + x14 = j14; + x15 = j15; + for (i = 20;i > 0;i -= 2) { + QUARTERROUND( x0, x4, x8,x12) + QUARTERROUND( x1, x5, x9,x13) + QUARTERROUND( x2, x6,x10,x14) + QUARTERROUND( x3, x7,x11,x15) + QUARTERROUND( x0, x5,x10,x15) + QUARTERROUND( x1, x6,x11,x12) + QUARTERROUND( x2, x7, x8,x13) + QUARTERROUND( x3, x4, x9,x14) + } + x0 = PLUS(x0,j0); + x1 = PLUS(x1,j1); + x2 = PLUS(x2,j2); + x3 = PLUS(x3,j3); + x4 = PLUS(x4,j4); + x5 = PLUS(x5,j5); + x6 = PLUS(x6,j6); + x7 = PLUS(x7,j7); + x8 = PLUS(x8,j8); + x9 = PLUS(x9,j9); + x10 = PLUS(x10,j10); + x11 = PLUS(x11,j11); + x12 = PLUS(x12,j12); + x13 = PLUS(x13,j13); + x14 = PLUS(x14,j14); + x15 = PLUS(x15,j15); + +#ifndef KEYSTREAM_ONLY + x0 = XOR(x0,U8TO32_LITTLE(m + 0)); + x1 = XOR(x1,U8TO32_LITTLE(m + 4)); + x2 = XOR(x2,U8TO32_LITTLE(m + 8)); + x3 = XOR(x3,U8TO32_LITTLE(m + 12)); + x4 = XOR(x4,U8TO32_LITTLE(m + 16)); + x5 = XOR(x5,U8TO32_LITTLE(m + 20)); + x6 = XOR(x6,U8TO32_LITTLE(m + 24)); + x7 = XOR(x7,U8TO32_LITTLE(m + 28)); + x8 = XOR(x8,U8TO32_LITTLE(m + 32)); + x9 = XOR(x9,U8TO32_LITTLE(m + 36)); + x10 = XOR(x10,U8TO32_LITTLE(m + 40)); + x11 = XOR(x11,U8TO32_LITTLE(m + 44)); + x12 = XOR(x12,U8TO32_LITTLE(m + 48)); + x13 = XOR(x13,U8TO32_LITTLE(m + 52)); + x14 = XOR(x14,U8TO32_LITTLE(m + 56)); + x15 = XOR(x15,U8TO32_LITTLE(m + 60)); +#endif + + j12 = PLUSONE(j12); + if (!j12) { + j13 = PLUSONE(j13); + /* stopping at 2^70 bytes per nonce is user's responsibility */ + } + + U32TO8_LITTLE(c + 0,x0); + U32TO8_LITTLE(c + 4,x1); + U32TO8_LITTLE(c + 8,x2); + U32TO8_LITTLE(c + 12,x3); + U32TO8_LITTLE(c + 16,x4); + U32TO8_LITTLE(c + 20,x5); + U32TO8_LITTLE(c + 24,x6); + U32TO8_LITTLE(c + 28,x7); + U32TO8_LITTLE(c + 32,x8); + U32TO8_LITTLE(c + 36,x9); + U32TO8_LITTLE(c + 40,x10); + U32TO8_LITTLE(c + 44,x11); + U32TO8_LITTLE(c + 48,x12); + U32TO8_LITTLE(c + 52,x13); + U32TO8_LITTLE(c + 56,x14); + U32TO8_LITTLE(c + 60,x15); + + if (bytes <= 64) { + if (bytes < 64) { + for (i = 0;i < bytes;++i) ctarget[i] = c[i]; + } + x->input[12] = j12; + x->input[13] = j13; + return; + } + bytes -= 64; + c += 64; +#ifndef KEYSTREAM_ONLY + m += 64; +#endif + } +} diff --git a/ext/libressl/crypto/compat/explicit_bzero.c b/ext/libressl/crypto/compat/explicit_bzero.c new file mode 100644 index 0000000..5dd0103 --- /dev/null +++ b/ext/libressl/crypto/compat/explicit_bzero.c @@ -0,0 +1,19 @@ +/* $OpenBSD: explicit_bzero.c,v 1.4 2015/08/31 02:53:57 guenther Exp $ */ +/* + * Public domain. + * Written by Matthew Dempsky. + */ + +#include + +__attribute__((weak)) void +__explicit_bzero_hook(void *buf, size_t len) +{ +} + +void +explicit_bzero(void *buf, size_t len) +{ + memset(buf, 0, len); + __explicit_bzero_hook(buf, len); +} diff --git a/ext/libressl/crypto/compat/explicit_bzero_win.c b/ext/libressl/crypto/compat/explicit_bzero_win.c new file mode 100644 index 0000000..0d09d90 --- /dev/null +++ b/ext/libressl/crypto/compat/explicit_bzero_win.c @@ -0,0 +1,13 @@ +/* + * Public domain. + * Win32 explicit_bzero compatibility shim. + */ + +#include +#include + +void +explicit_bzero(void *buf, size_t len) +{ + SecureZeroMemory(buf, len); +} diff --git a/ext/libressl/crypto/compat/getentropy_aix.c b/ext/libressl/crypto/compat/getentropy_aix.c new file mode 100644 index 0000000..422e685 --- /dev/null +++ b/ext/libressl/crypto/compat/getentropy_aix.c @@ -0,0 +1,402 @@ +/* $OpenBSD: getentropy_aix.c,v 1.7 2020/05/17 14:44:20 deraadt Exp $ */ + +/* + * Copyright (c) 2015 Michael Felt + * Copyright (c) 2014 Theo de Raadt + * Copyright (c) 2014 Bob Beck + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Emulation of getentropy(2) as documented at: + * http://man.openbsd.org/getentropy.2 + */ +/* + * -lperfstat is needed for the psuedo entropy data + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#define REPEAT 5 +#define MINIMUM(a, b) (((a) < (b)) ? (a) : (b)) + +#define HX(a, b) \ + do { \ + if ((a)) \ + HD(errno); \ + else \ + HD(b); \ + } while (0) + +#define HR(x, l) (SHA512_Update(&ctx, (char *)(x), (l))) +#define HD(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (x))) +#define HF(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (void*))) + +int getentropy(void *buf, size_t len); + +static int getentropy_urandom(void *buf, size_t len, const char *path, + int devfscheck); +static int getentropy_fallback(void *buf, size_t len); + +int +getentropy(void *buf, size_t len) +{ + int ret = -1; + + if (len > 256) { + errno = EIO; + return (-1); + } + + /* + * Try to get entropy with /dev/urandom + */ + ret = getentropy_urandom(buf, len, "/dev/urandom", 0); + if (ret != -1) + return (ret); + + /* + * Entropy collection via /dev/urandom has failed. + * + * No other API exists for collecting entropy, and we have + * no failsafe way to get it on AIX that is not sensitive + * to resource exhaustion. + * + * We have very few options: + * - Even syslog_r is unsafe to call at this low level, so + * there is no way to alert the user or program. + * - Cannot call abort() because some systems have unsafe + * corefiles. + * - Could raise(SIGKILL) resulting in silent program termination. + * - Return EIO, to hint that arc4random's stir function + * should raise(SIGKILL) + * - Do the best under the circumstances.... + * + * This code path exists to bring light to the issue that AIX + * does not provide a failsafe API for entropy collection. + * + * We hope this demonstrates that AIX should consider + * providing a new failsafe API which works in a chroot or + * when file descriptors are exhausted. + */ +#undef FAIL_INSTEAD_OF_TRYING_FALLBACK +#ifdef FAIL_INSTEAD_OF_TRYING_FALLBACK + raise(SIGKILL); +#endif + ret = getentropy_fallback(buf, len); + if (ret != -1) + return (ret); + + errno = EIO; + return (ret); +} + +static int +getentropy_urandom(void *buf, size_t len, const char *path, int devfscheck) +{ + struct stat st; + size_t i; + int fd, flags; + int save_errno = errno; + +start: + + flags = O_RDONLY; +#ifdef O_NOFOLLOW + flags |= O_NOFOLLOW; +#endif +#ifdef O_CLOEXEC + flags |= O_CLOEXEC; +#endif + fd = open(path, flags, 0); + if (fd == -1) { + if (errno == EINTR) + goto start; + goto nodevrandom; + } +#ifndef O_CLOEXEC + fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); +#endif + + /* Lightly verify that the device node looks sane */ + if (fstat(fd, &st) == -1 || !S_ISCHR(st.st_mode)) { + close(fd); + goto nodevrandom; + } + for (i = 0; i < len; ) { + size_t wanted = len - i; + ssize_t ret = read(fd, (char *)buf + i, wanted); + + if (ret == -1) { + if (errno == EAGAIN || errno == EINTR) + continue; + close(fd); + goto nodevrandom; + } + i += ret; + } + close(fd); + errno = save_errno; + return (0); /* satisfied */ +nodevrandom: + errno = EIO; + return (-1); +} + +static const int cl[] = { + CLOCK_REALTIME, +#ifdef CLOCK_MONOTONIC + CLOCK_MONOTONIC, +#endif +#ifdef CLOCK_MONOTONIC_RAW + CLOCK_MONOTONIC_RAW, +#endif +#ifdef CLOCK_TAI + CLOCK_TAI, +#endif +#ifdef CLOCK_VIRTUAL + CLOCK_VIRTUAL, +#endif +#ifdef CLOCK_UPTIME + CLOCK_UPTIME, +#endif +#ifdef CLOCK_PROCESS_CPUTIME_ID + CLOCK_PROCESS_CPUTIME_ID, +#endif +#ifdef CLOCK_THREAD_CPUTIME_ID + CLOCK_THREAD_CPUTIME_ID, +#endif +}; + +static int +getentropy_fallback(void *buf, size_t len) +{ + uint8_t results[SHA512_DIGEST_LENGTH]; + int save_errno = errno, e, pgs = sysconf(_SC_PAGESIZE), faster = 0, repeat; + static int cnt; + struct timespec ts; + struct timeval tv; + perfstat_cpu_total_t cpustats; +#ifdef _AIX61 + perfstat_cpu_total_wpar_t cpustats_wpar; +#endif + perfstat_partition_total_t lparstats; + perfstat_disk_total_t diskinfo; + perfstat_netinterface_total_t netinfo; + struct rusage ru; + sigset_t sigset; + struct stat st; + SHA512_CTX ctx; + static pid_t lastpid; + pid_t pid; + size_t i, ii, m; + char *p; + + pid = getpid(); + if (lastpid == pid) { + faster = 1; + repeat = 2; + } else { + faster = 0; + lastpid = pid; + repeat = REPEAT; + } + for (i = 0; i < len; ) { + int j; + SHA512_Init(&ctx); + for (j = 0; j < repeat; j++) { + HX((e = gettimeofday(&tv, NULL)) == -1, tv); + if (e != -1) { + cnt += (int)tv.tv_sec; + cnt += (int)tv.tv_usec; + } + + HX(perfstat_cpu_total(NULL, &cpustats, + sizeof(cpustats), 1) == -1, cpustats); + +#ifdef _AIX61 + HX(perfstat_cpu_total_wpar(NULL, &cpustats_wpar, + sizeof(cpustats_wpar), 1) == -1, cpustats_wpar); +#endif + + HX(perfstat_partition_total(NULL, &lparstats, + sizeof(lparstats), 1) == -1, lparstats); + + HX(perfstat_disk_total(NULL, &diskinfo, + sizeof(diskinfo), 1) == -1, diskinfo); + + HX(perfstat_netinterface_total(NULL, &netinfo, + sizeof(netinfo), 1) == -1, netinfo); + + for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]); ii++) + HX(clock_gettime(cl[ii], &ts) == -1, ts); + + HX((pid = getpid()) == -1, pid); + HX((pid = getsid(pid)) == -1, pid); + HX((pid = getppid()) == -1, pid); + HX((pid = getpgid(0)) == -1, pid); + HX((e = getpriority(0, 0)) == -1, e); + + if (!faster) { + ts.tv_sec = 0; + ts.tv_nsec = 1; + (void) nanosleep(&ts, NULL); + } + + HX(sigpending(&sigset) == -1, sigset); + HX(sigprocmask(SIG_BLOCK, NULL, &sigset) == -1, + sigset); + + HF(getentropy); /* an addr in this library */ + HF(printf); /* an addr in libc */ + p = (char *)&p; + HD(p); /* an addr on stack */ + p = (char *)&errno; + HD(p); /* the addr of errno */ + + if (i == 0) { + struct sockaddr_storage ss; + struct statvfs stvfs; + struct termios tios; + socklen_t ssl; + off_t off; + + /* + * Prime-sized mappings encourage fragmentation; + * thus exposing some address entropy. + */ + struct mm { + size_t npg; + void *p; + } mm[] = { + { 17, MAP_FAILED }, { 3, MAP_FAILED }, + { 11, MAP_FAILED }, { 2, MAP_FAILED }, + { 5, MAP_FAILED }, { 3, MAP_FAILED }, + { 7, MAP_FAILED }, { 1, MAP_FAILED }, + { 57, MAP_FAILED }, { 3, MAP_FAILED }, + { 131, MAP_FAILED }, { 1, MAP_FAILED }, + }; + + for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) { + HX(mm[m].p = mmap(NULL, + mm[m].npg * pgs, + PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, + (off_t)0), mm[m].p); + if (mm[m].p != MAP_FAILED) { + size_t mo; + + /* Touch some memory... */ + p = mm[m].p; + mo = cnt % + (mm[m].npg * pgs - 1); + p[mo] = 1; + cnt += (int)((long)(mm[m].p) + / pgs); + } + + /* Check cnts and times... */ + for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]); + ii++) { + HX((e = clock_gettime(cl[ii], + &ts)) == -1, ts); + if (e != -1) + cnt += (int)ts.tv_nsec; + } + + HX((e = getrusage(RUSAGE_SELF, + &ru)) == -1, ru); + if (e != -1) { + cnt += (int)ru.ru_utime.tv_sec; + cnt += (int)ru.ru_utime.tv_usec; + } + } + + for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) { + if (mm[m].p != MAP_FAILED) + munmap(mm[m].p, mm[m].npg * pgs); + mm[m].p = MAP_FAILED; + } + + HX(stat(".", &st) == -1, st); + HX(statvfs(".", &stvfs) == -1, stvfs); + + HX(stat("/", &st) == -1, st); + HX(statvfs("/", &stvfs) == -1, stvfs); + + HX((e = fstat(0, &st)) == -1, st); + if (e == -1) { + if (S_ISREG(st.st_mode) || + S_ISFIFO(st.st_mode) || + S_ISSOCK(st.st_mode)) { + HX(fstatvfs(0, &stvfs) == -1, + stvfs); + HX((off = lseek(0, (off_t)0, + SEEK_CUR)) < 0, off); + } + if (S_ISCHR(st.st_mode)) { + HX(tcgetattr(0, &tios) == -1, + tios); + } else if (S_ISSOCK(st.st_mode)) { + memset(&ss, 0, sizeof ss); + ssl = sizeof(ss); + HX(getpeername(0, + (void *)&ss, &ssl) == -1, + ss); + } + } + + HX((e = getrusage(RUSAGE_CHILDREN, + &ru)) == -1, ru); + if (e != -1) { + cnt += (int)ru.ru_utime.tv_sec; + cnt += (int)ru.ru_utime.tv_usec; + } + } else { + /* Subsequent hashes absorb previous result */ + HD(results); + } + + HX((e = gettimeofday(&tv, NULL)) == -1, tv); + if (e != -1) { + cnt += (int)tv.tv_sec; + cnt += (int)tv.tv_usec; + } + + HD(cnt); + } + SHA512_Final(results, &ctx); + memcpy((char *)buf + i, results, MINIMUM(sizeof(results), len - i)); + i += MINIMUM(sizeof(results), len - i); + } + explicit_bzero(&ctx, sizeof ctx); + explicit_bzero(results, sizeof results); + errno = save_errno; + return (0); /* satisfied */ +} diff --git a/ext/libressl/crypto/compat/getentropy_freebsd.c b/ext/libressl/crypto/compat/getentropy_freebsd.c new file mode 100644 index 0000000..ea90ffe --- /dev/null +++ b/ext/libressl/crypto/compat/getentropy_freebsd.c @@ -0,0 +1,60 @@ +/* $OpenBSD: getentropy_freebsd.c,v 1.4 2020/10/12 22:08:33 deraadt Exp $ */ + +/* + * Copyright (c) 2014 Pawel Jakub Dawidek + * Copyright (c) 2014 Brent Cook + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Emulation of getentropy(2) as documented at: + * http://man.openbsd.org/getentropy.2 + */ + +#include +#include + +#include +#include + +/* + * Derived from lib/libc/gen/arc4random.c from FreeBSD. + */ +static size_t +getentropy_sysctl(u_char *buf, size_t size) +{ + const int mib[2] = { CTL_KERN, KERN_ARND }; + size_t len, done; + + done = 0; + + do { + len = size; + if (sysctl(mib, 2, buf, &len, NULL, 0) == -1) + return (done); + done += len; + buf += len; + size -= len; + } while (size > 0); + + return (done); +} + +int +getentropy(void *buf, size_t len) +{ + if (len <= 256 && getentropy_sysctl(buf, len) == len) + return (0); + + errno = EIO; + return (-1); +} diff --git a/ext/libressl/crypto/compat/getentropy_hpux.c b/ext/libressl/crypto/compat/getentropy_hpux.c new file mode 100644 index 0000000..c981880 --- /dev/null +++ b/ext/libressl/crypto/compat/getentropy_hpux.c @@ -0,0 +1,396 @@ +/* $OpenBSD: getentropy_hpux.c,v 1.7 2020/05/17 14:44:20 deraadt Exp $ */ + +/* + * Copyright (c) 2014 Theo de Raadt + * Copyright (c) 2014 Bob Beck + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Emulation of getentropy(2) as documented at: + * http://man.openbsd.org/getentropy.2 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#define REPEAT 5 +#define MINIMUM(a, b) (((a) < (b)) ? (a) : (b)) + +#define HX(a, b) \ + do { \ + if ((a)) \ + HD(errno); \ + else \ + HD(b); \ + } while (0) + +#define HR(x, l) (SHA512_Update(&ctx, (char *)(x), (l))) +#define HD(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (x))) +#define HF(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (void*))) + +int getentropy(void *buf, size_t len); + +static int getentropy_urandom(void *buf, size_t len, const char *path, + int devfscheck); +static int getentropy_fallback(void *buf, size_t len); + +int +getentropy(void *buf, size_t len) +{ + int ret = -1; + + if (len > 256) { + errno = EIO; + return (-1); + } + + /* + * Try to get entropy with /dev/urandom + */ + ret = getentropy_urandom(buf, len, "/dev/urandom", 0); + if (ret != -1) + return (ret); + + /* + * Entropy collection via /dev/urandom has failed. + * + * No other API exists for collecting entropy, and we have + * no failsafe way to get it on hpux that is not sensitive + * to resource exhaustion. + * + * We have very few options: + * - Even syslog_r is unsafe to call at this low level, so + * there is no way to alert the user or program. + * - Cannot call abort() because some systems have unsafe + * corefiles. + * - Could raise(SIGKILL) resulting in silent program termination. + * - Return EIO, to hint that arc4random's stir function + * should raise(SIGKILL) + * - Do the best under the circumstances.... + * + * This code path exists to bring light to the issue that hpux + * does not provide a failsafe API for entropy collection. + * + * We hope this demonstrates that hpux should consider + * providing a new failsafe API which works in a chroot or + * when file descriptors are exhausted. + */ +#undef FAIL_INSTEAD_OF_TRYING_FALLBACK +#ifdef FAIL_INSTEAD_OF_TRYING_FALLBACK + raise(SIGKILL); +#endif + ret = getentropy_fallback(buf, len); + if (ret != -1) + return (ret); + + errno = EIO; + return (ret); +} + +static int +getentropy_urandom(void *buf, size_t len, const char *path, int devfscheck) +{ + struct stat st; + size_t i; + int fd, flags; + int save_errno = errno; + +start: + + flags = O_RDONLY; +#ifdef O_NOFOLLOW + flags |= O_NOFOLLOW; +#endif +#ifdef O_CLOEXEC + flags |= O_CLOEXEC; +#endif + fd = open(path, flags, 0); + if (fd == -1) { + if (errno == EINTR) + goto start; + goto nodevrandom; + } +#ifndef O_CLOEXEC + fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); +#endif + + /* Lightly verify that the device node looks sane */ + if (fstat(fd, &st) == -1 || !S_ISCHR(st.st_mode)) { + close(fd); + goto nodevrandom; + } + for (i = 0; i < len; ) { + size_t wanted = len - i; + ssize_t ret = read(fd, (char *)buf + i, wanted); + + if (ret == -1) { + if (errno == EAGAIN || errno == EINTR) + continue; + close(fd); + goto nodevrandom; + } + i += ret; + } + close(fd); + errno = save_errno; + return (0); /* satisfied */ +nodevrandom: + errno = EIO; + return (-1); +} + +static const int cl[] = { + CLOCK_REALTIME, +#ifdef CLOCK_MONOTONIC + CLOCK_MONOTONIC, +#endif +#ifdef CLOCK_MONOTONIC_RAW + CLOCK_MONOTONIC_RAW, +#endif +#ifdef CLOCK_TAI + CLOCK_TAI, +#endif +#ifdef CLOCK_VIRTUAL + CLOCK_VIRTUAL, +#endif +#ifdef CLOCK_UPTIME + CLOCK_UPTIME, +#endif +#ifdef CLOCK_PROCESS_CPUTIME_ID + CLOCK_PROCESS_CPUTIME_ID, +#endif +#ifdef CLOCK_THREAD_CPUTIME_ID + CLOCK_THREAD_CPUTIME_ID, +#endif +}; + +static int +getentropy_fallback(void *buf, size_t len) +{ + uint8_t results[SHA512_DIGEST_LENGTH]; + int save_errno = errno, e, pgs = sysconf(_SC_PAGESIZE), faster = 0, repeat; + static int cnt; + struct timespec ts; + struct timeval tv; + struct pst_vminfo pvi; + struct pst_vm_status pvs; + struct pst_dynamic pdy; + struct rusage ru; + sigset_t sigset; + struct stat st; + SHA512_CTX ctx; + static pid_t lastpid; + pid_t pid; + size_t i, ii, m; + char *p; + + pid = getpid(); + if (lastpid == pid) { + faster = 1; + repeat = 2; + } else { + faster = 0; + lastpid = pid; + repeat = REPEAT; + } + for (i = 0; i < len; ) { + int j; + SHA512_Init(&ctx); + for (j = 0; j < repeat; j++) { + HX((e = gettimeofday(&tv, NULL)) == -1, tv); + if (e != -1) { + cnt += (int)tv.tv_sec; + cnt += (int)tv.tv_usec; + } + + HX(pstat_getvminfo(&pvi, sizeof(pvi), 1, 0) != 1, pvi); + HX(pstat_getprocvm(&pvs, sizeof(pvs), 0, 0) != 1, pvs); + + for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]); ii++) + HX(clock_gettime(cl[ii], &ts) == -1, ts); + + HX((pid = getpid()) == -1, pid); + HX((pid = getsid(pid)) == -1, pid); + HX((pid = getppid()) == -1, pid); + HX((pid = getpgid(0)) == -1, pid); + HX((e = getpriority(0, 0)) == -1, e); + + if(pstat_getdynamic(&pdy, sizeof(pdy), 1, 0) != 1) { + HD(errno); + } else { + HD(pdy.psd_avg_1_min); + HD(pdy.psd_avg_5_min); + HD(pdy.psd_avg_15_min); + } + + if (!faster) { + ts.tv_sec = 0; + ts.tv_nsec = 1; + (void) nanosleep(&ts, NULL); + } + + HX(sigpending(&sigset) == -1, sigset); + HX(sigprocmask(SIG_BLOCK, NULL, &sigset) == -1, + sigset); + + HF(getentropy); /* an addr in this library */ + HF(printf); /* an addr in libc */ + p = (char *)&p; + HD(p); /* an addr on stack */ + p = (char *)&errno; + HD(p); /* the addr of errno */ + + if (i == 0) { + struct sockaddr_storage ss; + struct statvfs stvfs; + struct termios tios; + socklen_t ssl; + off_t off; + + /* + * Prime-sized mappings encourage fragmentation; + * thus exposing some address entropy. + */ + struct mm { + size_t npg; + void *p; + } mm[] = { + { 17, MAP_FAILED }, { 3, MAP_FAILED }, + { 11, MAP_FAILED }, { 2, MAP_FAILED }, + { 5, MAP_FAILED }, { 3, MAP_FAILED }, + { 7, MAP_FAILED }, { 1, MAP_FAILED }, + { 57, MAP_FAILED }, { 3, MAP_FAILED }, + { 131, MAP_FAILED }, { 1, MAP_FAILED }, + }; + + for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) { + HX(mm[m].p = mmap(NULL, + mm[m].npg * pgs, + PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, + (off_t)0), mm[m].p); + if (mm[m].p != MAP_FAILED) { + size_t mo; + + /* Touch some memory... */ + p = mm[m].p; + mo = cnt % + (mm[m].npg * pgs - 1); + p[mo] = 1; + cnt += (int)((long)(mm[m].p) + / pgs); + } + + /* Check cnts and times... */ + for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]); + ii++) { + HX((e = clock_gettime(cl[ii], + &ts)) == -1, ts); + if (e != -1) + cnt += (int)ts.tv_nsec; + } + + HX((e = getrusage(RUSAGE_SELF, + &ru)) == -1, ru); + if (e != -1) { + cnt += (int)ru.ru_utime.tv_sec; + cnt += (int)ru.ru_utime.tv_usec; + } + } + + for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) { + if (mm[m].p != MAP_FAILED) + munmap(mm[m].p, mm[m].npg * pgs); + mm[m].p = MAP_FAILED; + } + + HX(stat(".", &st) == -1, st); + HX(statvfs(".", &stvfs) == -1, stvfs); + + HX(stat("/", &st) == -1, st); + HX(statvfs("/", &stvfs) == -1, stvfs); + + HX((e = fstat(0, &st)) == -1, st); + if (e == -1) { + if (S_ISREG(st.st_mode) || + S_ISFIFO(st.st_mode) || + S_ISSOCK(st.st_mode)) { + HX(fstatvfs(0, &stvfs) == -1, + stvfs); + HX((off = lseek(0, (off_t)0, + SEEK_CUR)) < 0, off); + } + if (S_ISCHR(st.st_mode)) { + HX(tcgetattr(0, &tios) == -1, + tios); + } else if (S_ISSOCK(st.st_mode)) { + memset(&ss, 0, sizeof ss); + ssl = sizeof(ss); + HX(getpeername(0, + (void *)&ss, &ssl) == -1, + ss); + } + } + + HX((e = getrusage(RUSAGE_CHILDREN, + &ru)) == -1, ru); + if (e != -1) { + cnt += (int)ru.ru_utime.tv_sec; + cnt += (int)ru.ru_utime.tv_usec; + } + } else { + /* Subsequent hashes absorb previous result */ + HD(results); + } + + HX((e = gettimeofday(&tv, NULL)) == -1, tv); + if (e != -1) { + cnt += (int)tv.tv_sec; + cnt += (int)tv.tv_usec; + } + + HD(cnt); + } + SHA512_Final(results, &ctx); + memcpy((char *)buf + i, results, MINIMUM(sizeof(results), len - i)); + i += MINIMUM(sizeof(results), len - i); + } + explicit_bzero(&ctx, sizeof ctx); + explicit_bzero(results, sizeof results); + errno = save_errno; + return (0); /* satisfied */ +} diff --git a/ext/libressl/crypto/compat/getentropy_linux.c b/ext/libressl/crypto/compat/getentropy_linux.c new file mode 100644 index 0000000..bc7a6be --- /dev/null +++ b/ext/libressl/crypto/compat/getentropy_linux.c @@ -0,0 +1,525 @@ +/* $OpenBSD: getentropy_linux.c,v 1.47 2020/05/17 14:44:20 deraadt Exp $ */ + +/* + * Copyright (c) 2014 Theo de Raadt + * Copyright (c) 2014 Bob Beck + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Emulation of getentropy(2) as documented at: + * http://man.openbsd.org/getentropy.2 + */ + +#define _POSIX_C_SOURCE 199309L +#define _GNU_SOURCE 1 +#include +#include +#include +#include +#include +#ifdef SYS__sysctl +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#ifdef HAVE_GETAUXVAL +#include +#endif +#include + +#define REPEAT 5 +#define MINIMUM(a, b) (((a) < (b)) ? (a) : (b)) + +#define HX(a, b) \ + do { \ + if ((a)) \ + HD(errno); \ + else \ + HD(b); \ + } while (0) + +#define HR(x, l) (SHA512_Update(&ctx, (char *)(x), (l))) +#define HD(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (x))) +#define HF(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (void*))) + +int getentropy(void *buf, size_t len); + +#if defined(SYS_getrandom) && defined(GRND_NONBLOCK) +static int getentropy_getrandom(void *buf, size_t len); +#endif +static int getentropy_urandom(void *buf, size_t len); +#ifdef SYS__sysctl +static int getentropy_sysctl(void *buf, size_t len); +#endif +static int getentropy_fallback(void *buf, size_t len); +static int getentropy_phdr(struct dl_phdr_info *info, size_t size, void *data); + +int +getentropy(void *buf, size_t len) +{ + int ret = -1; + + if (len > 256) { + errno = EIO; + return (-1); + } + +#if defined(SYS_getrandom) && defined(GRND_NONBLOCK) + /* + * Try descriptor-less getrandom(), in non-blocking mode. + * + * The design of Linux getrandom is broken. It has an + * uninitialized phase coupled with blocking behaviour, which + * is unacceptable from within a library at boot time without + * possible recovery. See http://bugs.python.org/issue26839#msg267745 + */ + ret = getentropy_getrandom(buf, len); + if (ret != -1) + return (ret); +#endif + + /* + * Try to get entropy with /dev/urandom + * + * This can fail if the process is inside a chroot or if file + * descriptors are exhausted. + */ + ret = getentropy_urandom(buf, len); + if (ret != -1) + return (ret); + +#ifdef SYS__sysctl + /* + * Try to use sysctl CTL_KERN, KERN_RANDOM, RANDOM_UUID. + * sysctl is a failsafe API, so it guarantees a result. This + * should work inside a chroot, or when file descriptors are + * exhausted. + * + * However this can fail if the Linux kernel removes support + * for sysctl. Starting in 2007, there have been efforts to + * deprecate the sysctl API/ABI, and push callers towards use + * of the chroot-unavailable fd-using /proc mechanism -- + * essentially the same problems as /dev/urandom. + * + * Numerous setbacks have been encountered in their deprecation + * schedule, so as of June 2014 the kernel ABI still exists on + * most Linux architectures. The sysctl() stub in libc is missing + * on some systems. There are also reports that some kernels + * spew messages to the console. + */ + ret = getentropy_sysctl(buf, len); + if (ret != -1) + return (ret); +#endif /* SYS__sysctl */ + + /* + * Entropy collection via /dev/urandom and sysctl have failed. + * + * No other API exists for collecting entropy. See the large + * comment block above. + * + * We have very few options: + * - Even syslog_r is unsafe to call at this low level, so + * there is no way to alert the user or program. + * - Cannot call abort() because some systems have unsafe + * corefiles. + * - Could raise(SIGKILL) resulting in silent program termination. + * - Return EIO, to hint that arc4random's stir function + * should raise(SIGKILL) + * - Do the best under the circumstances.... + * + * This code path exists to bring light to the issue that Linux + * still does not provide a failsafe API for entropy collection. + * + * We hope this demonstrates that Linux should either retain their + * sysctl ABI, or consider providing a new failsafe API which + * works in a chroot or when file descriptors are exhausted. + */ +#undef FAIL_INSTEAD_OF_TRYING_FALLBACK +#ifdef FAIL_INSTEAD_OF_TRYING_FALLBACK + raise(SIGKILL); +#endif + ret = getentropy_fallback(buf, len); + if (ret != -1) + return (ret); + + errno = EIO; + return (ret); +} + +#if defined(SYS_getrandom) && defined(GRND_NONBLOCK) +static int +getentropy_getrandom(void *buf, size_t len) +{ + int pre_errno = errno; + int ret; + if (len > 256) + return (-1); + do { + ret = syscall(SYS_getrandom, buf, len, GRND_NONBLOCK); + } while (ret == -1 && errno == EINTR); + + if (ret != len) + return (-1); + errno = pre_errno; + return (0); +} +#endif + +static int +getentropy_urandom(void *buf, size_t len) +{ + struct stat st; + size_t i; + int fd, cnt, flags; + int save_errno = errno; + +start: + + flags = O_RDONLY; +#ifdef O_NOFOLLOW + flags |= O_NOFOLLOW; +#endif +#ifdef O_CLOEXEC + flags |= O_CLOEXEC; +#endif + fd = open("/dev/urandom", flags, 0); + if (fd == -1) { + if (errno == EINTR) + goto start; + goto nodevrandom; + } +#ifndef O_CLOEXEC + fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); +#endif + + /* Lightly verify that the device node looks sane */ + if (fstat(fd, &st) == -1 || !S_ISCHR(st.st_mode)) { + close(fd); + goto nodevrandom; + } + if (ioctl(fd, RNDGETENTCNT, &cnt) == -1) { + close(fd); + goto nodevrandom; + } + for (i = 0; i < len; ) { + size_t wanted = len - i; + ssize_t ret = read(fd, (char *)buf + i, wanted); + + if (ret == -1) { + if (errno == EAGAIN || errno == EINTR) + continue; + close(fd); + goto nodevrandom; + } + i += ret; + } + close(fd); + errno = save_errno; + return (0); /* satisfied */ +nodevrandom: + errno = EIO; + return (-1); +} + +#ifdef SYS__sysctl +static int +getentropy_sysctl(void *buf, size_t len) +{ + static int mib[] = { CTL_KERN, KERN_RANDOM, RANDOM_UUID }; + size_t i; + int save_errno = errno; + + for (i = 0; i < len; ) { + size_t chunk = MINIMUM(len - i, 16); + + /* SYS__sysctl because some systems already removed sysctl() */ + struct __sysctl_args args = { + .name = mib, + .nlen = 3, + .oldval = (char *)buf + i, + .oldlenp = &chunk, + }; + if (syscall(SYS__sysctl, &args) != 0) + goto sysctlfailed; + i += chunk; + } + errno = save_errno; + return (0); /* satisfied */ +sysctlfailed: + errno = EIO; + return (-1); +} +#endif /* SYS__sysctl */ + +static const int cl[] = { + CLOCK_REALTIME, +#ifdef CLOCK_MONOTONIC + CLOCK_MONOTONIC, +#endif +#ifdef CLOCK_MONOTONIC_RAW + CLOCK_MONOTONIC_RAW, +#endif +#ifdef CLOCK_TAI + CLOCK_TAI, +#endif +#ifdef CLOCK_VIRTUAL + CLOCK_VIRTUAL, +#endif +#ifdef CLOCK_UPTIME + CLOCK_UPTIME, +#endif +#ifdef CLOCK_PROCESS_CPUTIME_ID + CLOCK_PROCESS_CPUTIME_ID, +#endif +#ifdef CLOCK_THREAD_CPUTIME_ID + CLOCK_THREAD_CPUTIME_ID, +#endif +}; + +static int +getentropy_phdr(struct dl_phdr_info *info, size_t size, void *data) +{ + SHA512_CTX *ctx = data; + + SHA512_Update(ctx, &info->dlpi_addr, sizeof (info->dlpi_addr)); + return (0); +} + +static int +getentropy_fallback(void *buf, size_t len) +{ + uint8_t results[SHA512_DIGEST_LENGTH]; + int save_errno = errno, e, pgs = getpagesize(), faster = 0, repeat; + static int cnt; + struct timespec ts; + struct timeval tv; + struct rusage ru; + sigset_t sigset; + struct stat st; + SHA512_CTX ctx; + static pid_t lastpid; + pid_t pid; + size_t i, ii, m; + char *p; + + pid = getpid(); + if (lastpid == pid) { + faster = 1; + repeat = 2; + } else { + faster = 0; + lastpid = pid; + repeat = REPEAT; + } + for (i = 0; i < len; ) { + int j; + SHA512_Init(&ctx); + for (j = 0; j < repeat; j++) { + HX((e = gettimeofday(&tv, NULL)) == -1, tv); + if (e != -1) { + cnt += (int)tv.tv_sec; + cnt += (int)tv.tv_usec; + } + + dl_iterate_phdr(getentropy_phdr, &ctx); + + for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]); ii++) + HX(clock_gettime(cl[ii], &ts) == -1, ts); + + HX((pid = getpid()) == -1, pid); + HX((pid = getsid(pid)) == -1, pid); + HX((pid = getppid()) == -1, pid); + HX((pid = getpgid(0)) == -1, pid); + HX((e = getpriority(0, 0)) == -1, e); + + if (!faster) { + ts.tv_sec = 0; + ts.tv_nsec = 1; + (void) nanosleep(&ts, NULL); + } + + HX(sigpending(&sigset) == -1, sigset); + HX(sigprocmask(SIG_BLOCK, NULL, &sigset) == -1, + sigset); + + HF(getentropy); /* an addr in this library */ + HF(printf); /* an addr in libc */ + p = (char *)&p; + HD(p); /* an addr on stack */ + p = (char *)&errno; + HD(p); /* the addr of errno */ + + if (i == 0) { + struct sockaddr_storage ss; + struct statvfs stvfs; + struct termios tios; + struct statfs stfs; + socklen_t ssl; + off_t off; + + /* + * Prime-sized mappings encourage fragmentation; + * thus exposing some address entropy. + */ + struct mm { + size_t npg; + void *p; + } mm[] = { + { 17, MAP_FAILED }, { 3, MAP_FAILED }, + { 11, MAP_FAILED }, { 2, MAP_FAILED }, + { 5, MAP_FAILED }, { 3, MAP_FAILED }, + { 7, MAP_FAILED }, { 1, MAP_FAILED }, + { 57, MAP_FAILED }, { 3, MAP_FAILED }, + { 131, MAP_FAILED }, { 1, MAP_FAILED }, + }; + + for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) { + HX(mm[m].p = mmap(NULL, + mm[m].npg * pgs, + PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, + (off_t)0), mm[m].p); + if (mm[m].p != MAP_FAILED) { + size_t mo; + + /* Touch some memory... */ + p = mm[m].p; + mo = cnt % + (mm[m].npg * pgs - 1); + p[mo] = 1; + cnt += (int)((long)(mm[m].p) + / pgs); + } + + /* Check cnts and times... */ + for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]); + ii++) { + HX((e = clock_gettime(cl[ii], + &ts)) == -1, ts); + if (e != -1) + cnt += (int)ts.tv_nsec; + } + + HX((e = getrusage(RUSAGE_SELF, + &ru)) == -1, ru); + if (e != -1) { + cnt += (int)ru.ru_utime.tv_sec; + cnt += (int)ru.ru_utime.tv_usec; + } + } + + for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) { + if (mm[m].p != MAP_FAILED) + munmap(mm[m].p, mm[m].npg * pgs); + mm[m].p = MAP_FAILED; + } + + HX(stat(".", &st) == -1, st); + HX(statvfs(".", &stvfs) == -1, stvfs); + HX(statfs(".", &stfs) == -1, stfs); + + HX(stat("/", &st) == -1, st); + HX(statvfs("/", &stvfs) == -1, stvfs); + HX(statfs("/", &stfs) == -1, stfs); + + HX((e = fstat(0, &st)) == -1, st); + if (e == -1) { + if (S_ISREG(st.st_mode) || + S_ISFIFO(st.st_mode) || + S_ISSOCK(st.st_mode)) { + HX(fstatvfs(0, &stvfs) == -1, + stvfs); + HX(fstatfs(0, &stfs) == -1, + stfs); + HX((off = lseek(0, (off_t)0, + SEEK_CUR)) < 0, off); + } + if (S_ISCHR(st.st_mode)) { + HX(tcgetattr(0, &tios) == -1, + tios); + } else if (S_ISSOCK(st.st_mode)) { + memset(&ss, 0, sizeof ss); + ssl = sizeof(ss); + HX(getpeername(0, + (void *)&ss, &ssl) == -1, + ss); + } + } + + HX((e = getrusage(RUSAGE_CHILDREN, + &ru)) == -1, ru); + if (e != -1) { + cnt += (int)ru.ru_utime.tv_sec; + cnt += (int)ru.ru_utime.tv_usec; + } + } else { + /* Subsequent hashes absorb previous result */ + HD(results); + } + + HX((e = gettimeofday(&tv, NULL)) == -1, tv); + if (e != -1) { + cnt += (int)tv.tv_sec; + cnt += (int)tv.tv_usec; + } + + HD(cnt); + } +#ifdef HAVE_GETAUXVAL +#ifdef AT_RANDOM + /* Not as random as you think but we take what we are given */ + p = (char *) getauxval(AT_RANDOM); + if (p) + HR(p, 16); +#endif +#ifdef AT_SYSINFO_EHDR + p = (char *) getauxval(AT_SYSINFO_EHDR); + if (p) + HR(p, pgs); +#endif +#ifdef AT_BASE + p = (char *) getauxval(AT_BASE); + if (p) + HD(p); +#endif +#endif + + SHA512_Final(results, &ctx); + memcpy((char *)buf + i, results, MINIMUM(sizeof(results), len - i)); + i += MINIMUM(sizeof(results), len - i); + } + explicit_bzero(&ctx, sizeof ctx); + explicit_bzero(results, sizeof results); + errno = save_errno; + return (0); /* satisfied */ +} diff --git a/ext/libressl/crypto/compat/getentropy_netbsd.c b/ext/libressl/crypto/compat/getentropy_netbsd.c new file mode 100644 index 0000000..5dc8959 --- /dev/null +++ b/ext/libressl/crypto/compat/getentropy_netbsd.c @@ -0,0 +1,62 @@ +/* $OpenBSD: getentropy_netbsd.c,v 1.4 2020/10/12 22:08:33 deraadt Exp $ */ + +/* + * Copyright (c) 2014 Pawel Jakub Dawidek + * Copyright (c) 2014 Brent Cook + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Emulation of getentropy(2) as documented at: + * http://man.openbsd.org/getentropy.2 + */ + +#include +#include + +#include +#include + +/* + * Derived from lib/libc/gen/arc4random.c from FreeBSD. + */ +static size_t +getentropy_sysctl(u_char *buf, size_t size) +{ + const int mib[2] = { CTL_KERN, KERN_ARND }; + size_t len, done; + + done = 0; + + do { + len = size; + if (sysctl(mib, 2, buf, &len, NULL, 0) == -1) + return (done); + done += len; + buf += len; + size -= len; + } while (size > 0); + + return (done); +} + +int +getentropy(void *buf, size_t len) +{ + if (len <= 256 && + getentropy_sysctl(buf, len) == len) { + return (0); + } + + errno = EIO; + return (-1); +} diff --git a/ext/libressl/crypto/compat/getentropy_osx.c b/ext/libressl/crypto/compat/getentropy_osx.c new file mode 100644 index 0000000..5d4067b --- /dev/null +++ b/ext/libressl/crypto/compat/getentropy_osx.c @@ -0,0 +1,417 @@ +/* $OpenBSD: getentropy_osx.c,v 1.13 2020/05/17 14:44:20 deraadt Exp $ */ + +/* + * Copyright (c) 2014 Theo de Raadt + * Copyright (c) 2014 Bob Beck + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Emulation of getentropy(2) as documented at: + * http://man.openbsd.org/getentropy.2 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if TARGET_OS_OSX +#include +#include +#endif +#include +#include +#if TARGET_OS_OSX +#include +#include +#include +#include +#endif +#include +#define SHA512_Update(a, b, c) (CC_SHA512_Update((a), (b), (c))) +#define SHA512_Init(xxx) (CC_SHA512_Init((xxx))) +#define SHA512_Final(xxx, yyy) (CC_SHA512_Final((xxx), (yyy))) +#define SHA512_CTX CC_SHA512_CTX +#define SHA512_DIGEST_LENGTH CC_SHA512_DIGEST_LENGTH + +#define REPEAT 5 +#define MINIMUM(a, b) (((a) < (b)) ? (a) : (b)) + +#define HX(a, b) \ + do { \ + if ((a)) \ + HD(errno); \ + else \ + HD(b); \ + } while (0) + +#define HR(x, l) (SHA512_Update(&ctx, (char *)(x), (l))) +#define HD(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (x))) +#define HF(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (void*))) + +int getentropy(void *buf, size_t len); + +static int getentropy_urandom(void *buf, size_t len); +static int getentropy_fallback(void *buf, size_t len); + +int +getentropy(void *buf, size_t len) +{ + int ret = -1; + + if (len > 256) { + errno = EIO; + return (-1); + } + + /* + * Try to get entropy with /dev/urandom + * + * This can fail if the process is inside a chroot or if file + * descriptors are exhausted. + */ + ret = getentropy_urandom(buf, len); + if (ret != -1) + return (ret); + + /* + * Entropy collection via /dev/urandom and sysctl have failed. + * + * No other API exists for collecting entropy, and we have + * no failsafe way to get it on OSX that is not sensitive + * to resource exhaustion. + * + * We have very few options: + * - Even syslog_r is unsafe to call at this low level, so + * there is no way to alert the user or program. + * - Cannot call abort() because some systems have unsafe + * corefiles. + * - Could raise(SIGKILL) resulting in silent program termination. + * - Return EIO, to hint that arc4random's stir function + * should raise(SIGKILL) + * - Do the best under the circumstances.... + * + * This code path exists to bring light to the issue that OSX + * does not provide a failsafe API for entropy collection. + * + * We hope this demonstrates that OSX should consider + * providing a new failsafe API which works in a chroot or + * when file descriptors are exhausted. + */ +#undef FAIL_INSTEAD_OF_TRYING_FALLBACK +#ifdef FAIL_INSTEAD_OF_TRYING_FALLBACK + raise(SIGKILL); +#endif + ret = getentropy_fallback(buf, len); + if (ret != -1) + return (ret); + + errno = EIO; + return (ret); +} + +static int +getentropy_urandom(void *buf, size_t len) +{ + struct stat st; + size_t i; + int fd, flags; + int save_errno = errno; + +start: + + flags = O_RDONLY; +#ifdef O_NOFOLLOW + flags |= O_NOFOLLOW; +#endif +#ifdef O_CLOEXEC + flags |= O_CLOEXEC; +#endif + fd = open("/dev/urandom", flags, 0); + if (fd == -1) { + if (errno == EINTR) + goto start; + goto nodevrandom; + } +#ifndef O_CLOEXEC + fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); +#endif + + /* Lightly verify that the device node looks sane */ + if (fstat(fd, &st) == -1 || !S_ISCHR(st.st_mode)) { + close(fd); + goto nodevrandom; + } + for (i = 0; i < len; ) { + size_t wanted = len - i; + ssize_t ret = read(fd, (char *)buf + i, wanted); + + if (ret == -1) { + if (errno == EAGAIN || errno == EINTR) + continue; + close(fd); + goto nodevrandom; + } + i += ret; + } + close(fd); + errno = save_errno; + return (0); /* satisfied */ +nodevrandom: + errno = EIO; + return (-1); +} + +#if TARGET_OS_OSX +static int tcpmib[] = { CTL_NET, AF_INET, IPPROTO_TCP, TCPCTL_STATS }; +static int udpmib[] = { CTL_NET, AF_INET, IPPROTO_UDP, UDPCTL_STATS }; +static int ipmib[] = { CTL_NET, AF_INET, IPPROTO_IP, IPCTL_STATS }; +#endif +static int kmib[] = { CTL_KERN, KERN_USRSTACK }; +static int hwmib[] = { CTL_HW, HW_USERMEM }; + +static int +getentropy_fallback(void *buf, size_t len) +{ + uint8_t results[SHA512_DIGEST_LENGTH]; + int save_errno = errno, e, pgs = getpagesize(), faster = 0, repeat; + static int cnt; + struct timespec ts; + struct timeval tv; + struct rusage ru; + sigset_t sigset; + struct stat st; + SHA512_CTX ctx; + static pid_t lastpid; + pid_t pid; + size_t i, ii, m; + char *p; +#if TARGET_OS_OSX + struct tcpstat tcpstat; + struct udpstat udpstat; + struct ipstat ipstat; +#endif + u_int64_t mach_time; + unsigned int idata; + void *addr; + + pid = getpid(); + if (lastpid == pid) { + faster = 1; + repeat = 2; + } else { + faster = 0; + lastpid = pid; + repeat = REPEAT; + } + for (i = 0; i < len; ) { + int j; + SHA512_Init(&ctx); + for (j = 0; j < repeat; j++) { + HX((e = gettimeofday(&tv, NULL)) == -1, tv); + if (e != -1) { + cnt += (int)tv.tv_sec; + cnt += (int)tv.tv_usec; + } + + mach_time = mach_absolute_time(); + HD(mach_time); + + ii = sizeof(addr); + HX(sysctl(kmib, sizeof(kmib) / sizeof(kmib[0]), + &addr, &ii, NULL, 0) == -1, addr); + + ii = sizeof(idata); + HX(sysctl(hwmib, sizeof(hwmib) / sizeof(hwmib[0]), + &idata, &ii, NULL, 0) == -1, idata); + +#if TARGET_OS_OSX + ii = sizeof(tcpstat); + HX(sysctl(tcpmib, sizeof(tcpmib) / sizeof(tcpmib[0]), + &tcpstat, &ii, NULL, 0) == -1, tcpstat); + + ii = sizeof(udpstat); + HX(sysctl(udpmib, sizeof(udpmib) / sizeof(udpmib[0]), + &udpstat, &ii, NULL, 0) == -1, udpstat); + + ii = sizeof(ipstat); + HX(sysctl(ipmib, sizeof(ipmib) / sizeof(ipmib[0]), + &ipstat, &ii, NULL, 0) == -1, ipstat); +#endif + + HX((pid = getpid()) == -1, pid); + HX((pid = getsid(pid)) == -1, pid); + HX((pid = getppid()) == -1, pid); + HX((pid = getpgid(0)) == -1, pid); + HX((e = getpriority(0, 0)) == -1, e); + + if (!faster) { + ts.tv_sec = 0; + ts.tv_nsec = 1; + (void) nanosleep(&ts, NULL); + } + + HX(sigpending(&sigset) == -1, sigset); + HX(sigprocmask(SIG_BLOCK, NULL, &sigset) == -1, + sigset); + + HF(getentropy); /* an addr in this library */ + HF(printf); /* an addr in libc */ + p = (char *)&p; + HD(p); /* an addr on stack */ + p = (char *)&errno; + HD(p); /* the addr of errno */ + + if (i == 0) { + struct sockaddr_storage ss; + struct statvfs stvfs; + struct termios tios; + struct statfs stfs; + socklen_t ssl; + off_t off; + + /* + * Prime-sized mappings encourage fragmentation; + * thus exposing some address entropy. + */ + struct mm { + size_t npg; + void *p; + } mm[] = { + { 17, MAP_FAILED }, { 3, MAP_FAILED }, + { 11, MAP_FAILED }, { 2, MAP_FAILED }, + { 5, MAP_FAILED }, { 3, MAP_FAILED }, + { 7, MAP_FAILED }, { 1, MAP_FAILED }, + { 57, MAP_FAILED }, { 3, MAP_FAILED }, + { 131, MAP_FAILED }, { 1, MAP_FAILED }, + }; + + for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) { + HX(mm[m].p = mmap(NULL, + mm[m].npg * pgs, + PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, + (off_t)0), mm[m].p); + if (mm[m].p != MAP_FAILED) { + size_t mo; + + /* Touch some memory... */ + p = mm[m].p; + mo = cnt % + (mm[m].npg * pgs - 1); + p[mo] = 1; + cnt += (int)((long)(mm[m].p) + / pgs); + } + + /* Check cnts and times... */ + mach_time = mach_absolute_time(); + HD(mach_time); + cnt += (int)mach_time; + + HX((e = getrusage(RUSAGE_SELF, + &ru)) == -1, ru); + if (e != -1) { + cnt += (int)ru.ru_utime.tv_sec; + cnt += (int)ru.ru_utime.tv_usec; + } + } + + for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) { + if (mm[m].p != MAP_FAILED) + munmap(mm[m].p, mm[m].npg * pgs); + mm[m].p = MAP_FAILED; + } + + HX(stat(".", &st) == -1, st); + HX(statvfs(".", &stvfs) == -1, stvfs); + HX(statfs(".", &stfs) == -1, stfs); + + HX(stat("/", &st) == -1, st); + HX(statvfs("/", &stvfs) == -1, stvfs); + HX(statfs("/", &stfs) == -1, stfs); + + HX((e = fstat(0, &st)) == -1, st); + if (e == -1) { + if (S_ISREG(st.st_mode) || + S_ISFIFO(st.st_mode) || + S_ISSOCK(st.st_mode)) { + HX(fstatvfs(0, &stvfs) == -1, + stvfs); + HX(fstatfs(0, &stfs) == -1, + stfs); + HX((off = lseek(0, (off_t)0, + SEEK_CUR)) < 0, off); + } + if (S_ISCHR(st.st_mode)) { + HX(tcgetattr(0, &tios) == -1, + tios); + } else if (S_ISSOCK(st.st_mode)) { + memset(&ss, 0, sizeof ss); + ssl = sizeof(ss); + HX(getpeername(0, + (void *)&ss, &ssl) == -1, + ss); + } + } + + HX((e = getrusage(RUSAGE_CHILDREN, + &ru)) == -1, ru); + if (e != -1) { + cnt += (int)ru.ru_utime.tv_sec; + cnt += (int)ru.ru_utime.tv_usec; + } + } else { + /* Subsequent hashes absorb previous result */ + HD(results); + } + + HX((e = gettimeofday(&tv, NULL)) == -1, tv); + if (e != -1) { + cnt += (int)tv.tv_sec; + cnt += (int)tv.tv_usec; + } + + HD(cnt); + } + + SHA512_Final(results, &ctx); + memcpy((char *)buf + i, results, MINIMUM(sizeof(results), len - i)); + i += MINIMUM(sizeof(results), len - i); + } + explicit_bzero(&ctx, sizeof ctx); + explicit_bzero(results, sizeof results); + errno = save_errno; + return (0); /* satisfied */ +} diff --git a/ext/libressl/crypto/compat/getentropy_solaris.c b/ext/libressl/crypto/compat/getentropy_solaris.c new file mode 100644 index 0000000..cf5b9bf --- /dev/null +++ b/ext/libressl/crypto/compat/getentropy_solaris.c @@ -0,0 +1,422 @@ +/* $OpenBSD: getentropy_solaris.c,v 1.14 2020/05/17 14:44:20 deraadt Exp $ */ + +/* + * Copyright (c) 2014 Theo de Raadt + * Copyright (c) 2014 Bob Beck + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Emulation of getentropy(2) as documented at: + * http://man.openbsd.org/getentropy.2 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define SHA512_Init SHA512Init +#define SHA512_Update SHA512Update +#define SHA512_Final SHA512Final + +#include +#include +#include + +#define REPEAT 5 +#define MINIMUM(a, b) (((a) < (b)) ? (a) : (b)) + +#define HX(a, b) \ + do { \ + if ((a)) \ + HD(errno); \ + else \ + HD(b); \ + } while (0) + +#define HR(x, l) (SHA512_Update(&ctx, (char *)(x), (l))) +#define HD(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (x))) +#define HF(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (void*))) + +int getentropy(void *buf, size_t len); + +static int getentropy_urandom(void *buf, size_t len, const char *path, + int devfscheck); +static int getentropy_fallback(void *buf, size_t len); +static int getentropy_phdr(struct dl_phdr_info *info, size_t size, void *data); + +int +getentropy(void *buf, size_t len) +{ + int ret = -1; + + if (len > 256) { + errno = EIO; + return (-1); + } + + /* + * Try to get entropy with /dev/urandom + * + * Solaris provides /dev/urandom as a symbolic link to + * /devices/pseudo/random@0:urandom which is provided by + * a devfs filesystem. Best practice is to use O_NOFOLLOW, + * so we must try the unpublished name directly. + * + * This can fail if the process is inside a chroot which lacks + * the devfs mount, or if file descriptors are exhausted. + */ + ret = getentropy_urandom(buf, len, + "/devices/pseudo/random@0:urandom", 1); + if (ret != -1) + return (ret); + + /* + * Unfortunately, chroot spaces on Solaris are sometimes setup + * with direct device node of the well-known /dev/urandom name + * (perhaps to avoid dragging all of devfs into the space). + * + * This can fail if the process is inside a chroot or if file + * descriptors are exhausted. + */ + ret = getentropy_urandom(buf, len, "/dev/urandom", 0); + if (ret != -1) + return (ret); + + /* + * Entropy collection via /dev/urandom has failed. + * + * No other API exists for collecting entropy, and we have + * no failsafe way to get it on Solaris that is not sensitive + * to resource exhaustion. + * + * We have very few options: + * - Even syslog_r is unsafe to call at this low level, so + * there is no way to alert the user or program. + * - Cannot call abort() because some systems have unsafe + * corefiles. + * - Could raise(SIGKILL) resulting in silent program termination. + * - Return EIO, to hint that arc4random's stir function + * should raise(SIGKILL) + * - Do the best under the circumstances.... + * + * This code path exists to bring light to the issue that Solaris + * does not provide a failsafe API for entropy collection. + * + * We hope this demonstrates that Solaris should consider + * providing a new failsafe API which works in a chroot or + * when file descriptors are exhausted. + */ +#undef FAIL_INSTEAD_OF_TRYING_FALLBACK +#ifdef FAIL_INSTEAD_OF_TRYING_FALLBACK + raise(SIGKILL); +#endif + ret = getentropy_fallback(buf, len); + if (ret != -1) + return (ret); + + errno = EIO; + return (ret); +} + +static int +getentropy_urandom(void *buf, size_t len, const char *path, int devfscheck) +{ + struct stat st; + size_t i; + int fd, flags; + int save_errno = errno; + +start: + + flags = O_RDONLY; +#ifdef O_NOFOLLOW + flags |= O_NOFOLLOW; +#endif +#ifdef O_CLOEXEC + flags |= O_CLOEXEC; +#endif + fd = open(path, flags, 0); + if (fd == -1) { + if (errno == EINTR) + goto start; + goto nodevrandom; + } +#ifndef O_CLOEXEC + fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); +#endif + + /* Lightly verify that the device node looks sane */ + if (fstat(fd, &st) == -1 || !S_ISCHR(st.st_mode) || + (devfscheck && (strcmp(st.st_fstype, "devfs") != 0))) { + close(fd); + goto nodevrandom; + } + for (i = 0; i < len; ) { + size_t wanted = len - i; + ssize_t ret = read(fd, (char *)buf + i, wanted); + + if (ret == -1) { + if (errno == EAGAIN || errno == EINTR) + continue; + close(fd); + goto nodevrandom; + } + i += ret; + } + close(fd); + errno = save_errno; + return (0); /* satisfied */ +nodevrandom: + errno = EIO; + return (-1); +} + +static const int cl[] = { + CLOCK_REALTIME, +#ifdef CLOCK_MONOTONIC + CLOCK_MONOTONIC, +#endif +#ifdef CLOCK_MONOTONIC_RAW + CLOCK_MONOTONIC_RAW, +#endif +#ifdef CLOCK_TAI + CLOCK_TAI, +#endif +#ifdef CLOCK_VIRTUAL + CLOCK_VIRTUAL, +#endif +#ifdef CLOCK_UPTIME + CLOCK_UPTIME, +#endif +#ifdef CLOCK_PROCESS_CPUTIME_ID + CLOCK_PROCESS_CPUTIME_ID, +#endif +#ifdef CLOCK_THREAD_CPUTIME_ID + CLOCK_THREAD_CPUTIME_ID, +#endif +}; + +static int +getentropy_phdr(struct dl_phdr_info *info, size_t size, void *data) +{ + SHA512_CTX *ctx = data; + + SHA512_Update(ctx, &info->dlpi_addr, sizeof (info->dlpi_addr)); + return (0); +} + +static int +getentropy_fallback(void *buf, size_t len) +{ + uint8_t results[SHA512_DIGEST_LENGTH]; + int save_errno = errno, e, pgs = getpagesize(), faster = 0, repeat; + static int cnt; + struct timespec ts; + struct timeval tv; + double loadavg[3]; + struct rusage ru; + sigset_t sigset; + struct stat st; + SHA512_CTX ctx; + static pid_t lastpid; + pid_t pid; + size_t i, ii, m; + char *p; + + pid = getpid(); + if (lastpid == pid) { + faster = 1; + repeat = 2; + } else { + faster = 0; + lastpid = pid; + repeat = REPEAT; + } + for (i = 0; i < len; ) { + int j; + SHA512_Init(&ctx); + for (j = 0; j < repeat; j++) { + HX((e = gettimeofday(&tv, NULL)) == -1, tv); + if (e != -1) { + cnt += (int)tv.tv_sec; + cnt += (int)tv.tv_usec; + } + + dl_iterate_phdr(getentropy_phdr, &ctx); + + for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]); ii++) + HX(clock_gettime(cl[ii], &ts) == -1, ts); + + HX((pid = getpid()) == -1, pid); + HX((pid = getsid(pid)) == -1, pid); + HX((pid = getppid()) == -1, pid); + HX((pid = getpgid(0)) == -1, pid); + HX((e = getpriority(0, 0)) == -1, e); + HX((getloadavg(loadavg, 3) == -1), loadavg); + + if (!faster) { + ts.tv_sec = 0; + ts.tv_nsec = 1; + (void) nanosleep(&ts, NULL); + } + + HX(sigpending(&sigset) == -1, sigset); + HX(sigprocmask(SIG_BLOCK, NULL, &sigset) == -1, + sigset); + + HF(getentropy); /* an addr in this library */ + HF(printf); /* an addr in libc */ + p = (char *)&p; + HD(p); /* an addr on stack */ + p = (char *)&errno; + HD(p); /* the addr of errno */ + + if (i == 0) { + struct sockaddr_storage ss; + struct statvfs stvfs; + struct termios tios; + socklen_t ssl; + off_t off; + + /* + * Prime-sized mappings encourage fragmentation; + * thus exposing some address entropy. + */ + struct mm { + size_t npg; + void *p; + } mm[] = { + { 17, MAP_FAILED }, { 3, MAP_FAILED }, + { 11, MAP_FAILED }, { 2, MAP_FAILED }, + { 5, MAP_FAILED }, { 3, MAP_FAILED }, + { 7, MAP_FAILED }, { 1, MAP_FAILED }, + { 57, MAP_FAILED }, { 3, MAP_FAILED }, + { 131, MAP_FAILED }, { 1, MAP_FAILED }, + }; + + for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) { + HX(mm[m].p = mmap(NULL, + mm[m].npg * pgs, + PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, + (off_t)0), mm[m].p); + if (mm[m].p != MAP_FAILED) { + size_t mo; + + /* Touch some memory... */ + p = mm[m].p; + mo = cnt % + (mm[m].npg * pgs - 1); + p[mo] = 1; + cnt += (int)((long)(mm[m].p) + / pgs); + } + + /* Check cnts and times... */ + for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]); + ii++) { + HX((e = clock_gettime(cl[ii], + &ts)) == -1, ts); + if (e != -1) + cnt += (int)ts.tv_nsec; + } + + HX((e = getrusage(RUSAGE_SELF, + &ru)) == -1, ru); + if (e != -1) { + cnt += (int)ru.ru_utime.tv_sec; + cnt += (int)ru.ru_utime.tv_usec; + } + } + + for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) { + if (mm[m].p != MAP_FAILED) + munmap(mm[m].p, mm[m].npg * pgs); + mm[m].p = MAP_FAILED; + } + + HX(stat(".", &st) == -1, st); + HX(statvfs(".", &stvfs) == -1, stvfs); + + HX(stat("/", &st) == -1, st); + HX(statvfs("/", &stvfs) == -1, stvfs); + + HX((e = fstat(0, &st)) == -1, st); + if (e == -1) { + if (S_ISREG(st.st_mode) || + S_ISFIFO(st.st_mode) || + S_ISSOCK(st.st_mode)) { + HX(fstatvfs(0, &stvfs) == -1, + stvfs); + HX((off = lseek(0, (off_t)0, + SEEK_CUR)) < 0, off); + } + if (S_ISCHR(st.st_mode)) { + HX(tcgetattr(0, &tios) == -1, + tios); + } else if (S_ISSOCK(st.st_mode)) { + memset(&ss, 0, sizeof ss); + ssl = sizeof(ss); + HX(getpeername(0, + (void *)&ss, &ssl) == -1, + ss); + } + } + + HX((e = getrusage(RUSAGE_CHILDREN, + &ru)) == -1, ru); + if (e != -1) { + cnt += (int)ru.ru_utime.tv_sec; + cnt += (int)ru.ru_utime.tv_usec; + } + } else { + /* Subsequent hashes absorb previous result */ + HD(results); + } + + HX((e = gettimeofday(&tv, NULL)) == -1, tv); + if (e != -1) { + cnt += (int)tv.tv_sec; + cnt += (int)tv.tv_usec; + } + + HD(cnt); + } + SHA512_Final(results, &ctx); + memcpy((char *)buf + i, results, MINIMUM(sizeof(results), len - i)); + i += MINIMUM(sizeof(results), len - i); + } + explicit_bzero(&ctx, sizeof ctx); + explicit_bzero(results, sizeof results); + errno = save_errno; + return (0); /* satisfied */ +} diff --git a/ext/libressl/crypto/compat/getentropy_win.c b/ext/libressl/crypto/compat/getentropy_win.c new file mode 100644 index 0000000..64514b3 --- /dev/null +++ b/ext/libressl/crypto/compat/getentropy_win.c @@ -0,0 +1,50 @@ +/* $OpenBSD: getentropy_win.c,v 1.6 2020/11/11 10:41:24 bcook Exp $ */ + +/* + * Copyright (c) 2014, Theo de Raadt + * Copyright (c) 2014, Bob Beck + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Emulation of getentropy(2) as documented at: + * http://man.openbsd.org/getentropy.2 + */ + +#include +#include +#include +#include +#include + +int getentropy(void *buf, size_t len); + +/* + * On Windows, BCryptGenRandom with BCRYPT_USE_SYSTEM_PREFERRED_RNG is supposed + * to be a well-seeded, cryptographically strong random number generator. + * https://docs.microsoft.com/en-us/windows/win32/api/bcrypt/nf-bcrypt-bcryptgenrandom + */ +int +getentropy(void *buf, size_t len) +{ + if (len > 256) { + errno = EIO; + return (-1); + } + + if (FAILED(BCryptGenRandom(NULL, buf, len, BCRYPT_USE_SYSTEM_PREFERRED_RNG))) { + errno = EIO; + return (-1); + } + + return (0); +} diff --git a/ext/libressl/crypto/compat/timingsafe_bcmp.c b/ext/libressl/crypto/compat/timingsafe_bcmp.c new file mode 100644 index 0000000..552e844 --- /dev/null +++ b/ext/libressl/crypto/compat/timingsafe_bcmp.c @@ -0,0 +1,29 @@ +/* $OpenBSD: timingsafe_bcmp.c,v 1.3 2015/08/31 02:53:57 guenther Exp $ */ +/* + * Copyright (c) 2010 Damien Miller. All rights reserved. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include + +int +timingsafe_bcmp(const void *b1, const void *b2, size_t n) +{ + const unsigned char *p1 = b1, *p2 = b2; + int ret = 0; + + for (; n > 0; n--) + ret |= *p1++ ^ *p2++; + return (ret != 0); +} diff --git a/ext/libressl/crypto/compat/timingsafe_memcmp.c b/ext/libressl/crypto/compat/timingsafe_memcmp.c new file mode 100644 index 0000000..bb210a3 --- /dev/null +++ b/ext/libressl/crypto/compat/timingsafe_memcmp.c @@ -0,0 +1,46 @@ +/* $OpenBSD: timingsafe_memcmp.c,v 1.2 2015/08/31 02:53:57 guenther Exp $ */ +/* + * Copyright (c) 2014 Google Inc. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include + +int +timingsafe_memcmp(const void *b1, const void *b2, size_t len) +{ + const unsigned char *p1 = b1, *p2 = b2; + size_t i; + int res = 0, done = 0; + + for (i = 0; i < len; i++) { + /* lt is -1 if p1[i] < p2[i]; else 0. */ + int lt = (p1[i] - p2[i]) >> CHAR_BIT; + + /* gt is -1 if p1[i] > p2[i]; else 0. */ + int gt = (p2[i] - p1[i]) >> CHAR_BIT; + + /* cmp is 1 if p1[i] > p2[i]; -1 if p1[i] < p2[i]; else 0. */ + int cmp = lt - gt; + + /* set res = cmp if !done. */ + res |= cmp & ~done; + + /* set done if p1[i] != p2[i]. */ + done |= lt | gt; + } + + return (res); +} diff --git a/ext/libressl/crypto/curve25519/Makefile b/ext/libressl/crypto/curve25519/Makefile new file mode 100644 index 0000000..459383d --- /dev/null +++ b/ext/libressl/crypto/curve25519/Makefile @@ -0,0 +1,14 @@ +include ../../ssl_common.mk +CFLAGS += -D__BEGIN_HIDDEN_DECLS= -D__END_HIDDEN_DECLS= -DED25519 + +obj = curve25519.o curve25519-generic.o + + +all: $(obj) +dep: all + +%.o: %.c + $(CC) $(CFLAGS) -c $< + +clean: + rm -f *.o *.a diff --git a/ext/libressl/crypto/curve25519/curve25519-generic.c b/ext/libressl/crypto/curve25519/curve25519-generic.c new file mode 100644 index 0000000..d533731 --- /dev/null +++ b/ext/libressl/crypto/curve25519/curve25519-generic.c @@ -0,0 +1,34 @@ +/* $OpenBSD: curve25519-generic.c,v 1.2 2019/05/11 15:55:52 tb Exp $ */ +/* + * Copyright (c) 2015, Google Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * This code is mostly taken from the ref10 version of Ed25519 in SUPERCOP + * 20141124 (http://bench.cr.yp.to/supercop.html). That code is released as + * public domain but this file has the ISC license just to keep licencing + * simple. + * + * The field functions are shared by Ed25519 and X25519 where possible. + */ + +#include "curve25519_internal.h" + +void +x25519_scalar_mult(uint8_t out[32], const uint8_t scalar[32], + const uint8_t point[32]) +{ + x25519_scalar_mult_generic(out, scalar, point); +} diff --git a/ext/libressl/crypto/curve25519/curve25519.c b/ext/libressl/crypto/curve25519/curve25519.c new file mode 100644 index 0000000..13b54c3 --- /dev/null +++ b/ext/libressl/crypto/curve25519/curve25519.c @@ -0,0 +1,4935 @@ +/* $OpenBSD: curve25519.c,v 1.5 2019/05/11 15:55:52 tb Exp $ */ +/* + * Copyright (c) 2015, Google Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * This code is mostly taken from the ref10 version of Ed25519 in SUPERCOP + * 20141124 (http://bench.cr.yp.to/supercop.html). That code is released as + * public domain but this file has the ISC license just to keep licencing + * simple. + * + * The field functions are shared by Ed25519 and X25519 where possible. + */ + +#include +#include + +#include + +#ifdef ED25519 +#include +#endif + +#include "curve25519_internal.h" + +static const int64_t kBottom25Bits = 0x1ffffffLL; +static const int64_t kBottom26Bits = 0x3ffffffLL; +static const int64_t kTop39Bits = 0xfffffffffe000000LL; +static const int64_t kTop38Bits = 0xfffffffffc000000LL; + +static uint64_t load_3(const uint8_t *in) { + uint64_t result; + result = (uint64_t)in[0]; + result |= ((uint64_t)in[1]) << 8; + result |= ((uint64_t)in[2]) << 16; + return result; +} + +static uint64_t load_4(const uint8_t *in) { + uint64_t result; + result = (uint64_t)in[0]; + result |= ((uint64_t)in[1]) << 8; + result |= ((uint64_t)in[2]) << 16; + result |= ((uint64_t)in[3]) << 24; + return result; +} + +static void fe_frombytes(fe h, const uint8_t *s) { + /* Ignores top bit of h. */ + int64_t h0 = load_4(s); + int64_t h1 = load_3(s + 4) << 6; + int64_t h2 = load_3(s + 7) << 5; + int64_t h3 = load_3(s + 10) << 3; + int64_t h4 = load_3(s + 13) << 2; + int64_t h5 = load_4(s + 16); + int64_t h6 = load_3(s + 20) << 7; + int64_t h7 = load_3(s + 23) << 5; + int64_t h8 = load_3(s + 26) << 4; + int64_t h9 = (load_3(s + 29) & 8388607) << 2; + int64_t carry0; + int64_t carry1; + int64_t carry2; + int64_t carry3; + int64_t carry4; + int64_t carry5; + int64_t carry6; + int64_t carry7; + int64_t carry8; + int64_t carry9; + + carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits; + carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits; + carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits; + carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits; + carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits; + + carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits; + carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits; + carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits; + carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits; + carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits; + + h[0] = h0; + h[1] = h1; + h[2] = h2; + h[3] = h3; + h[4] = h4; + h[5] = h5; + h[6] = h6; + h[7] = h7; + h[8] = h8; + h[9] = h9; +} + +/* Preconditions: + * |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. + * + * Write p=2^255-19; q=floor(h/p). + * Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))). + * + * Proof: + * Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4. + * Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4. + * + * Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9). + * Then 0> 25; + q = (h0 + q) >> 26; + q = (h1 + q) >> 25; + q = (h2 + q) >> 26; + q = (h3 + q) >> 25; + q = (h4 + q) >> 26; + q = (h5 + q) >> 25; + q = (h6 + q) >> 26; + q = (h7 + q) >> 25; + q = (h8 + q) >> 26; + q = (h9 + q) >> 25; + + /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */ + h0 += 19 * q; + /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */ + + h1 += h0 >> 26; h0 &= kBottom26Bits; + h2 += h1 >> 25; h1 &= kBottom25Bits; + h3 += h2 >> 26; h2 &= kBottom26Bits; + h4 += h3 >> 25; h3 &= kBottom25Bits; + h5 += h4 >> 26; h4 &= kBottom26Bits; + h6 += h5 >> 25; h5 &= kBottom25Bits; + h7 += h6 >> 26; h6 &= kBottom26Bits; + h8 += h7 >> 25; h7 &= kBottom25Bits; + h9 += h8 >> 26; h8 &= kBottom26Bits; + h9 &= kBottom25Bits; + /* h10 = carry9 */ + + /* Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20. + * Have h0+...+2^230 h9 between 0 and 2^255-1; + * evidently 2^255 h10-2^255 q = 0. + * Goal: Output h0+...+2^230 h9. */ + + s[0] = h0 >> 0; + s[1] = h0 >> 8; + s[2] = h0 >> 16; + s[3] = (h0 >> 24) | ((uint32_t)(h1) << 2); + s[4] = h1 >> 6; + s[5] = h1 >> 14; + s[6] = (h1 >> 22) | ((uint32_t)(h2) << 3); + s[7] = h2 >> 5; + s[8] = h2 >> 13; + s[9] = (h2 >> 21) | ((uint32_t)(h3) << 5); + s[10] = h3 >> 3; + s[11] = h3 >> 11; + s[12] = (h3 >> 19) | ((uint32_t)(h4) << 6); + s[13] = h4 >> 2; + s[14] = h4 >> 10; + s[15] = h4 >> 18; + s[16] = h5 >> 0; + s[17] = h5 >> 8; + s[18] = h5 >> 16; + s[19] = (h5 >> 24) | ((uint32_t)(h6) << 1); + s[20] = h6 >> 7; + s[21] = h6 >> 15; + s[22] = (h6 >> 23) | ((uint32_t)(h7) << 3); + s[23] = h7 >> 5; + s[24] = h7 >> 13; + s[25] = (h7 >> 21) | ((uint32_t)(h8) << 4); + s[26] = h8 >> 4; + s[27] = h8 >> 12; + s[28] = (h8 >> 20) | ((uint32_t)(h9) << 6); + s[29] = h9 >> 2; + s[30] = h9 >> 10; + s[31] = h9 >> 18; +} + +/* h = f */ +static void fe_copy(fe h, const fe f) { + memmove(h, f, sizeof(int32_t) * 10); +} + +/* h = 0 */ +static void fe_0(fe h) { memset(h, 0, sizeof(int32_t) * 10); } + +/* h = 1 */ +static void fe_1(fe h) { + memset(h, 0, sizeof(int32_t) * 10); + h[0] = 1; +} + +/* h = f + g + * Can overlap h with f or g. + * + * Preconditions: + * |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. + * |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. + * + * Postconditions: + * |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. */ +static void fe_add(fe h, const fe f, const fe g) { + unsigned i; + for (i = 0; i < 10; i++) { + h[i] = f[i] + g[i]; + } +} + +/* h = f - g + * Can overlap h with f or g. + * + * Preconditions: + * |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. + * |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. + * + * Postconditions: + * |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. */ +static void fe_sub(fe h, const fe f, const fe g) { + unsigned i; + for (i = 0; i < 10; i++) { + h[i] = f[i] - g[i]; + } +} + +/* h = f * g + * Can overlap h with f or g. + * + * Preconditions: + * |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. + * |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. + * + * Postconditions: + * |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. + * + * Notes on implementation strategy: + * + * Using schoolbook multiplication. + * Karatsuba would save a little in some cost models. + * + * Most multiplications by 2 and 19 are 32-bit precomputations; + * cheaper than 64-bit postcomputations. + * + * There is one remaining multiplication by 19 in the carry chain; + * one *19 precomputation can be merged into this, + * but the resulting data flow is considerably less clean. + * + * There are 12 carries below. + * 10 of them are 2-way parallelizable and vectorizable. + * Can get away with 11 carries, but then data flow is much deeper. + * + * With tighter constraints on inputs can squeeze carries into int32. */ +static void fe_mul(fe h, const fe f, const fe g) { + int32_t f0 = f[0]; + int32_t f1 = f[1]; + int32_t f2 = f[2]; + int32_t f3 = f[3]; + int32_t f4 = f[4]; + int32_t f5 = f[5]; + int32_t f6 = f[6]; + int32_t f7 = f[7]; + int32_t f8 = f[8]; + int32_t f9 = f[9]; + int32_t g0 = g[0]; + int32_t g1 = g[1]; + int32_t g2 = g[2]; + int32_t g3 = g[3]; + int32_t g4 = g[4]; + int32_t g5 = g[5]; + int32_t g6 = g[6]; + int32_t g7 = g[7]; + int32_t g8 = g[8]; + int32_t g9 = g[9]; + int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */ + int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */ + int32_t g3_19 = 19 * g3; + int32_t g4_19 = 19 * g4; + int32_t g5_19 = 19 * g5; + int32_t g6_19 = 19 * g6; + int32_t g7_19 = 19 * g7; + int32_t g8_19 = 19 * g8; + int32_t g9_19 = 19 * g9; + int32_t f1_2 = 2 * f1; + int32_t f3_2 = 2 * f3; + int32_t f5_2 = 2 * f5; + int32_t f7_2 = 2 * f7; + int32_t f9_2 = 2 * f9; + int64_t f0g0 = f0 * (int64_t) g0; + int64_t f0g1 = f0 * (int64_t) g1; + int64_t f0g2 = f0 * (int64_t) g2; + int64_t f0g3 = f0 * (int64_t) g3; + int64_t f0g4 = f0 * (int64_t) g4; + int64_t f0g5 = f0 * (int64_t) g5; + int64_t f0g6 = f0 * (int64_t) g6; + int64_t f0g7 = f0 * (int64_t) g7; + int64_t f0g8 = f0 * (int64_t) g8; + int64_t f0g9 = f0 * (int64_t) g9; + int64_t f1g0 = f1 * (int64_t) g0; + int64_t f1g1_2 = f1_2 * (int64_t) g1; + int64_t f1g2 = f1 * (int64_t) g2; + int64_t f1g3_2 = f1_2 * (int64_t) g3; + int64_t f1g4 = f1 * (int64_t) g4; + int64_t f1g5_2 = f1_2 * (int64_t) g5; + int64_t f1g6 = f1 * (int64_t) g6; + int64_t f1g7_2 = f1_2 * (int64_t) g7; + int64_t f1g8 = f1 * (int64_t) g8; + int64_t f1g9_38 = f1_2 * (int64_t) g9_19; + int64_t f2g0 = f2 * (int64_t) g0; + int64_t f2g1 = f2 * (int64_t) g1; + int64_t f2g2 = f2 * (int64_t) g2; + int64_t f2g3 = f2 * (int64_t) g3; + int64_t f2g4 = f2 * (int64_t) g4; + int64_t f2g5 = f2 * (int64_t) g5; + int64_t f2g6 = f2 * (int64_t) g6; + int64_t f2g7 = f2 * (int64_t) g7; + int64_t f2g8_19 = f2 * (int64_t) g8_19; + int64_t f2g9_19 = f2 * (int64_t) g9_19; + int64_t f3g0 = f3 * (int64_t) g0; + int64_t f3g1_2 = f3_2 * (int64_t) g1; + int64_t f3g2 = f3 * (int64_t) g2; + int64_t f3g3_2 = f3_2 * (int64_t) g3; + int64_t f3g4 = f3 * (int64_t) g4; + int64_t f3g5_2 = f3_2 * (int64_t) g5; + int64_t f3g6 = f3 * (int64_t) g6; + int64_t f3g7_38 = f3_2 * (int64_t) g7_19; + int64_t f3g8_19 = f3 * (int64_t) g8_19; + int64_t f3g9_38 = f3_2 * (int64_t) g9_19; + int64_t f4g0 = f4 * (int64_t) g0; + int64_t f4g1 = f4 * (int64_t) g1; + int64_t f4g2 = f4 * (int64_t) g2; + int64_t f4g3 = f4 * (int64_t) g3; + int64_t f4g4 = f4 * (int64_t) g4; + int64_t f4g5 = f4 * (int64_t) g5; + int64_t f4g6_19 = f4 * (int64_t) g6_19; + int64_t f4g7_19 = f4 * (int64_t) g7_19; + int64_t f4g8_19 = f4 * (int64_t) g8_19; + int64_t f4g9_19 = f4 * (int64_t) g9_19; + int64_t f5g0 = f5 * (int64_t) g0; + int64_t f5g1_2 = f5_2 * (int64_t) g1; + int64_t f5g2 = f5 * (int64_t) g2; + int64_t f5g3_2 = f5_2 * (int64_t) g3; + int64_t f5g4 = f5 * (int64_t) g4; + int64_t f5g5_38 = f5_2 * (int64_t) g5_19; + int64_t f5g6_19 = f5 * (int64_t) g6_19; + int64_t f5g7_38 = f5_2 * (int64_t) g7_19; + int64_t f5g8_19 = f5 * (int64_t) g8_19; + int64_t f5g9_38 = f5_2 * (int64_t) g9_19; + int64_t f6g0 = f6 * (int64_t) g0; + int64_t f6g1 = f6 * (int64_t) g1; + int64_t f6g2 = f6 * (int64_t) g2; + int64_t f6g3 = f6 * (int64_t) g3; + int64_t f6g4_19 = f6 * (int64_t) g4_19; + int64_t f6g5_19 = f6 * (int64_t) g5_19; + int64_t f6g6_19 = f6 * (int64_t) g6_19; + int64_t f6g7_19 = f6 * (int64_t) g7_19; + int64_t f6g8_19 = f6 * (int64_t) g8_19; + int64_t f6g9_19 = f6 * (int64_t) g9_19; + int64_t f7g0 = f7 * (int64_t) g0; + int64_t f7g1_2 = f7_2 * (int64_t) g1; + int64_t f7g2 = f7 * (int64_t) g2; + int64_t f7g3_38 = f7_2 * (int64_t) g3_19; + int64_t f7g4_19 = f7 * (int64_t) g4_19; + int64_t f7g5_38 = f7_2 * (int64_t) g5_19; + int64_t f7g6_19 = f7 * (int64_t) g6_19; + int64_t f7g7_38 = f7_2 * (int64_t) g7_19; + int64_t f7g8_19 = f7 * (int64_t) g8_19; + int64_t f7g9_38 = f7_2 * (int64_t) g9_19; + int64_t f8g0 = f8 * (int64_t) g0; + int64_t f8g1 = f8 * (int64_t) g1; + int64_t f8g2_19 = f8 * (int64_t) g2_19; + int64_t f8g3_19 = f8 * (int64_t) g3_19; + int64_t f8g4_19 = f8 * (int64_t) g4_19; + int64_t f8g5_19 = f8 * (int64_t) g5_19; + int64_t f8g6_19 = f8 * (int64_t) g6_19; + int64_t f8g7_19 = f8 * (int64_t) g7_19; + int64_t f8g8_19 = f8 * (int64_t) g8_19; + int64_t f8g9_19 = f8 * (int64_t) g9_19; + int64_t f9g0 = f9 * (int64_t) g0; + int64_t f9g1_38 = f9_2 * (int64_t) g1_19; + int64_t f9g2_19 = f9 * (int64_t) g2_19; + int64_t f9g3_38 = f9_2 * (int64_t) g3_19; + int64_t f9g4_19 = f9 * (int64_t) g4_19; + int64_t f9g5_38 = f9_2 * (int64_t) g5_19; + int64_t f9g6_19 = f9 * (int64_t) g6_19; + int64_t f9g7_38 = f9_2 * (int64_t) g7_19; + int64_t f9g8_19 = f9 * (int64_t) g8_19; + int64_t f9g9_38 = f9_2 * (int64_t) g9_19; + int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38; + int64_t h1 = f0g1+f1g0 +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19; + int64_t h2 = f0g2+f1g1_2 +f2g0 +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38; + int64_t h3 = f0g3+f1g2 +f2g1 +f3g0 +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19; + int64_t h4 = f0g4+f1g3_2 +f2g2 +f3g1_2 +f4g0 +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38; + int64_t h5 = f0g5+f1g4 +f2g3 +f3g2 +f4g1 +f5g0 +f6g9_19+f7g8_19+f8g7_19+f9g6_19; + int64_t h6 = f0g6+f1g5_2 +f2g4 +f3g3_2 +f4g2 +f5g1_2 +f6g0 +f7g9_38+f8g8_19+f9g7_38; + int64_t h7 = f0g7+f1g6 +f2g5 +f3g4 +f4g3 +f5g2 +f6g1 +f7g0 +f8g9_19+f9g8_19; + int64_t h8 = f0g8+f1g7_2 +f2g6 +f3g5_2 +f4g4 +f5g3_2 +f6g2 +f7g1_2 +f8g0 +f9g9_38; + int64_t h9 = f0g9+f1g8 +f2g7 +f3g6 +f4g5 +f5g4 +f6g3 +f7g2 +f8g1 +f9g0 ; + int64_t carry0; + int64_t carry1; + int64_t carry2; + int64_t carry3; + int64_t carry4; + int64_t carry5; + int64_t carry6; + int64_t carry7; + int64_t carry8; + int64_t carry9; + + /* |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38)) + * i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8 + * |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19)) + * i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9 */ + + carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits; + carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits; + /* |h0| <= 2^25 */ + /* |h4| <= 2^25 */ + /* |h1| <= 1.71*2^59 */ + /* |h5| <= 1.71*2^59 */ + + carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits; + carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits; + /* |h1| <= 2^24; from now on fits into int32 */ + /* |h5| <= 2^24; from now on fits into int32 */ + /* |h2| <= 1.41*2^60 */ + /* |h6| <= 1.41*2^60 */ + + carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits; + carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits; + /* |h2| <= 2^25; from now on fits into int32 unchanged */ + /* |h6| <= 2^25; from now on fits into int32 unchanged */ + /* |h3| <= 1.71*2^59 */ + /* |h7| <= 1.71*2^59 */ + + carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits; + carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits; + /* |h3| <= 2^24; from now on fits into int32 unchanged */ + /* |h7| <= 2^24; from now on fits into int32 unchanged */ + /* |h4| <= 1.72*2^34 */ + /* |h8| <= 1.41*2^60 */ + + carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits; + carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits; + /* |h4| <= 2^25; from now on fits into int32 unchanged */ + /* |h8| <= 2^25; from now on fits into int32 unchanged */ + /* |h5| <= 1.01*2^24 */ + /* |h9| <= 1.71*2^59 */ + + carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits; + /* |h9| <= 2^24; from now on fits into int32 unchanged */ + /* |h0| <= 1.1*2^39 */ + + carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits; + /* |h0| <= 2^25; from now on fits into int32 unchanged */ + /* |h1| <= 1.01*2^24 */ + + h[0] = h0; + h[1] = h1; + h[2] = h2; + h[3] = h3; + h[4] = h4; + h[5] = h5; + h[6] = h6; + h[7] = h7; + h[8] = h8; + h[9] = h9; +} + +/* h = f * f + * Can overlap h with f. + * + * Preconditions: + * |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. + * + * Postconditions: + * |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. + * + * See fe_mul.c for discussion of implementation strategy. */ +static void fe_sq(fe h, const fe f) { + int32_t f0 = f[0]; + int32_t f1 = f[1]; + int32_t f2 = f[2]; + int32_t f3 = f[3]; + int32_t f4 = f[4]; + int32_t f5 = f[5]; + int32_t f6 = f[6]; + int32_t f7 = f[7]; + int32_t f8 = f[8]; + int32_t f9 = f[9]; + int32_t f0_2 = 2 * f0; + int32_t f1_2 = 2 * f1; + int32_t f2_2 = 2 * f2; + int32_t f3_2 = 2 * f3; + int32_t f4_2 = 2 * f4; + int32_t f5_2 = 2 * f5; + int32_t f6_2 = 2 * f6; + int32_t f7_2 = 2 * f7; + int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */ + int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */ + int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */ + int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */ + int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */ + int64_t f0f0 = f0 * (int64_t) f0; + int64_t f0f1_2 = f0_2 * (int64_t) f1; + int64_t f0f2_2 = f0_2 * (int64_t) f2; + int64_t f0f3_2 = f0_2 * (int64_t) f3; + int64_t f0f4_2 = f0_2 * (int64_t) f4; + int64_t f0f5_2 = f0_2 * (int64_t) f5; + int64_t f0f6_2 = f0_2 * (int64_t) f6; + int64_t f0f7_2 = f0_2 * (int64_t) f7; + int64_t f0f8_2 = f0_2 * (int64_t) f8; + int64_t f0f9_2 = f0_2 * (int64_t) f9; + int64_t f1f1_2 = f1_2 * (int64_t) f1; + int64_t f1f2_2 = f1_2 * (int64_t) f2; + int64_t f1f3_4 = f1_2 * (int64_t) f3_2; + int64_t f1f4_2 = f1_2 * (int64_t) f4; + int64_t f1f5_4 = f1_2 * (int64_t) f5_2; + int64_t f1f6_2 = f1_2 * (int64_t) f6; + int64_t f1f7_4 = f1_2 * (int64_t) f7_2; + int64_t f1f8_2 = f1_2 * (int64_t) f8; + int64_t f1f9_76 = f1_2 * (int64_t) f9_38; + int64_t f2f2 = f2 * (int64_t) f2; + int64_t f2f3_2 = f2_2 * (int64_t) f3; + int64_t f2f4_2 = f2_2 * (int64_t) f4; + int64_t f2f5_2 = f2_2 * (int64_t) f5; + int64_t f2f6_2 = f2_2 * (int64_t) f6; + int64_t f2f7_2 = f2_2 * (int64_t) f7; + int64_t f2f8_38 = f2_2 * (int64_t) f8_19; + int64_t f2f9_38 = f2 * (int64_t) f9_38; + int64_t f3f3_2 = f3_2 * (int64_t) f3; + int64_t f3f4_2 = f3_2 * (int64_t) f4; + int64_t f3f5_4 = f3_2 * (int64_t) f5_2; + int64_t f3f6_2 = f3_2 * (int64_t) f6; + int64_t f3f7_76 = f3_2 * (int64_t) f7_38; + int64_t f3f8_38 = f3_2 * (int64_t) f8_19; + int64_t f3f9_76 = f3_2 * (int64_t) f9_38; + int64_t f4f4 = f4 * (int64_t) f4; + int64_t f4f5_2 = f4_2 * (int64_t) f5; + int64_t f4f6_38 = f4_2 * (int64_t) f6_19; + int64_t f4f7_38 = f4 * (int64_t) f7_38; + int64_t f4f8_38 = f4_2 * (int64_t) f8_19; + int64_t f4f9_38 = f4 * (int64_t) f9_38; + int64_t f5f5_38 = f5 * (int64_t) f5_38; + int64_t f5f6_38 = f5_2 * (int64_t) f6_19; + int64_t f5f7_76 = f5_2 * (int64_t) f7_38; + int64_t f5f8_38 = f5_2 * (int64_t) f8_19; + int64_t f5f9_76 = f5_2 * (int64_t) f9_38; + int64_t f6f6_19 = f6 * (int64_t) f6_19; + int64_t f6f7_38 = f6 * (int64_t) f7_38; + int64_t f6f8_38 = f6_2 * (int64_t) f8_19; + int64_t f6f9_38 = f6 * (int64_t) f9_38; + int64_t f7f7_38 = f7 * (int64_t) f7_38; + int64_t f7f8_38 = f7_2 * (int64_t) f8_19; + int64_t f7f9_76 = f7_2 * (int64_t) f9_38; + int64_t f8f8_19 = f8 * (int64_t) f8_19; + int64_t f8f9_38 = f8 * (int64_t) f9_38; + int64_t f9f9_38 = f9 * (int64_t) f9_38; + int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38; + int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38; + int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19; + int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38; + int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38; + int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38; + int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19; + int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38; + int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38; + int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2; + int64_t carry0; + int64_t carry1; + int64_t carry2; + int64_t carry3; + int64_t carry4; + int64_t carry5; + int64_t carry6; + int64_t carry7; + int64_t carry8; + int64_t carry9; + + carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits; + carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits; + + carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits; + carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits; + + carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits; + carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits; + + carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits; + carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits; + + carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits; + carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits; + + carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits; + + carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits; + + h[0] = h0; + h[1] = h1; + h[2] = h2; + h[3] = h3; + h[4] = h4; + h[5] = h5; + h[6] = h6; + h[7] = h7; + h[8] = h8; + h[9] = h9; +} + +static void fe_invert(fe out, const fe z) { + fe t0; + fe t1; + fe t2; + fe t3; + int i; + + fe_sq(t0, z); + for (i = 1; i < 1; ++i) { + fe_sq(t0, t0); + } + fe_sq(t1, t0); + for (i = 1; i < 2; ++i) { + fe_sq(t1, t1); + } + fe_mul(t1, z, t1); + fe_mul(t0, t0, t1); + fe_sq(t2, t0); + for (i = 1; i < 1; ++i) { + fe_sq(t2, t2); + } + fe_mul(t1, t1, t2); + fe_sq(t2, t1); + for (i = 1; i < 5; ++i) { + fe_sq(t2, t2); + } + fe_mul(t1, t2, t1); + fe_sq(t2, t1); + for (i = 1; i < 10; ++i) { + fe_sq(t2, t2); + } + fe_mul(t2, t2, t1); + fe_sq(t3, t2); + for (i = 1; i < 20; ++i) { + fe_sq(t3, t3); + } + fe_mul(t2, t3, t2); + fe_sq(t2, t2); + for (i = 1; i < 10; ++i) { + fe_sq(t2, t2); + } + fe_mul(t1, t2, t1); + fe_sq(t2, t1); + for (i = 1; i < 50; ++i) { + fe_sq(t2, t2); + } + fe_mul(t2, t2, t1); + fe_sq(t3, t2); + for (i = 1; i < 100; ++i) { + fe_sq(t3, t3); + } + fe_mul(t2, t3, t2); + fe_sq(t2, t2); + for (i = 1; i < 50; ++i) { + fe_sq(t2, t2); + } + fe_mul(t1, t2, t1); + fe_sq(t1, t1); + for (i = 1; i < 5; ++i) { + fe_sq(t1, t1); + } + fe_mul(out, t1, t0); +} + +/* h = -f + * + * Preconditions: + * |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. + * + * Postconditions: + * |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. */ +static void fe_neg(fe h, const fe f) { + unsigned i; + for (i = 0; i < 10; i++) { + h[i] = -f[i]; + } +} + +/* Replace (f,g) with (g,g) if b == 1; + * replace (f,g) with (f,g) if b == 0. + * + * Preconditions: b in {0,1}. */ +static void fe_cmov(fe f, const fe g, unsigned b) { + b = 0-b; + unsigned i; + for (i = 0; i < 10; i++) { + int32_t x = f[i] ^ g[i]; + x &= b; + f[i] ^= x; + } +} + +/* return 0 if f == 0 + * return 1 if f != 0 + * + * Preconditions: + * |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. */ +static int fe_isnonzero(const fe f) { + uint8_t s[32]; + fe_tobytes(s, f); + + static const uint8_t zero[32] = {0}; + return timingsafe_memcmp(s, zero, sizeof(zero)) != 0; +} + +/* return 1 if f is in {1,3,5,...,q-2} + * return 0 if f is in {0,2,4,...,q-1} + * + * Preconditions: + * |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. */ +static int fe_isnegative(const fe f) { + uint8_t s[32]; + fe_tobytes(s, f); + return s[0] & 1; +} + +/* h = 2 * f * f + * Can overlap h with f. + * + * Preconditions: + * |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc. + * + * Postconditions: + * |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. + * + * See fe_mul.c for discussion of implementation strategy. */ +static void fe_sq2(fe h, const fe f) { + int32_t f0 = f[0]; + int32_t f1 = f[1]; + int32_t f2 = f[2]; + int32_t f3 = f[3]; + int32_t f4 = f[4]; + int32_t f5 = f[5]; + int32_t f6 = f[6]; + int32_t f7 = f[7]; + int32_t f8 = f[8]; + int32_t f9 = f[9]; + int32_t f0_2 = 2 * f0; + int32_t f1_2 = 2 * f1; + int32_t f2_2 = 2 * f2; + int32_t f3_2 = 2 * f3; + int32_t f4_2 = 2 * f4; + int32_t f5_2 = 2 * f5; + int32_t f6_2 = 2 * f6; + int32_t f7_2 = 2 * f7; + int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */ + int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */ + int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */ + int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */ + int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */ + int64_t f0f0 = f0 * (int64_t) f0; + int64_t f0f1_2 = f0_2 * (int64_t) f1; + int64_t f0f2_2 = f0_2 * (int64_t) f2; + int64_t f0f3_2 = f0_2 * (int64_t) f3; + int64_t f0f4_2 = f0_2 * (int64_t) f4; + int64_t f0f5_2 = f0_2 * (int64_t) f5; + int64_t f0f6_2 = f0_2 * (int64_t) f6; + int64_t f0f7_2 = f0_2 * (int64_t) f7; + int64_t f0f8_2 = f0_2 * (int64_t) f8; + int64_t f0f9_2 = f0_2 * (int64_t) f9; + int64_t f1f1_2 = f1_2 * (int64_t) f1; + int64_t f1f2_2 = f1_2 * (int64_t) f2; + int64_t f1f3_4 = f1_2 * (int64_t) f3_2; + int64_t f1f4_2 = f1_2 * (int64_t) f4; + int64_t f1f5_4 = f1_2 * (int64_t) f5_2; + int64_t f1f6_2 = f1_2 * (int64_t) f6; + int64_t f1f7_4 = f1_2 * (int64_t) f7_2; + int64_t f1f8_2 = f1_2 * (int64_t) f8; + int64_t f1f9_76 = f1_2 * (int64_t) f9_38; + int64_t f2f2 = f2 * (int64_t) f2; + int64_t f2f3_2 = f2_2 * (int64_t) f3; + int64_t f2f4_2 = f2_2 * (int64_t) f4; + int64_t f2f5_2 = f2_2 * (int64_t) f5; + int64_t f2f6_2 = f2_2 * (int64_t) f6; + int64_t f2f7_2 = f2_2 * (int64_t) f7; + int64_t f2f8_38 = f2_2 * (int64_t) f8_19; + int64_t f2f9_38 = f2 * (int64_t) f9_38; + int64_t f3f3_2 = f3_2 * (int64_t) f3; + int64_t f3f4_2 = f3_2 * (int64_t) f4; + int64_t f3f5_4 = f3_2 * (int64_t) f5_2; + int64_t f3f6_2 = f3_2 * (int64_t) f6; + int64_t f3f7_76 = f3_2 * (int64_t) f7_38; + int64_t f3f8_38 = f3_2 * (int64_t) f8_19; + int64_t f3f9_76 = f3_2 * (int64_t) f9_38; + int64_t f4f4 = f4 * (int64_t) f4; + int64_t f4f5_2 = f4_2 * (int64_t) f5; + int64_t f4f6_38 = f4_2 * (int64_t) f6_19; + int64_t f4f7_38 = f4 * (int64_t) f7_38; + int64_t f4f8_38 = f4_2 * (int64_t) f8_19; + int64_t f4f9_38 = f4 * (int64_t) f9_38; + int64_t f5f5_38 = f5 * (int64_t) f5_38; + int64_t f5f6_38 = f5_2 * (int64_t) f6_19; + int64_t f5f7_76 = f5_2 * (int64_t) f7_38; + int64_t f5f8_38 = f5_2 * (int64_t) f8_19; + int64_t f5f9_76 = f5_2 * (int64_t) f9_38; + int64_t f6f6_19 = f6 * (int64_t) f6_19; + int64_t f6f7_38 = f6 * (int64_t) f7_38; + int64_t f6f8_38 = f6_2 * (int64_t) f8_19; + int64_t f6f9_38 = f6 * (int64_t) f9_38; + int64_t f7f7_38 = f7 * (int64_t) f7_38; + int64_t f7f8_38 = f7_2 * (int64_t) f8_19; + int64_t f7f9_76 = f7_2 * (int64_t) f9_38; + int64_t f8f8_19 = f8 * (int64_t) f8_19; + int64_t f8f9_38 = f8 * (int64_t) f9_38; + int64_t f9f9_38 = f9 * (int64_t) f9_38; + int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38; + int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38; + int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19; + int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38; + int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38; + int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38; + int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19; + int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38; + int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38; + int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2; + int64_t carry0; + int64_t carry1; + int64_t carry2; + int64_t carry3; + int64_t carry4; + int64_t carry5; + int64_t carry6; + int64_t carry7; + int64_t carry8; + int64_t carry9; + + h0 += h0; + h1 += h1; + h2 += h2; + h3 += h3; + h4 += h4; + h5 += h5; + h6 += h6; + h7 += h7; + h8 += h8; + h9 += h9; + + carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits; + carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits; + + carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits; + carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits; + + carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits; + carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits; + + carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits; + carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits; + + carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits; + carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits; + + carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits; + + carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits; + + h[0] = h0; + h[1] = h1; + h[2] = h2; + h[3] = h3; + h[4] = h4; + h[5] = h5; + h[6] = h6; + h[7] = h7; + h[8] = h8; + h[9] = h9; +} + +static void fe_pow22523(fe out, const fe z) { + fe t0; + fe t1; + fe t2; + int i; + + fe_sq(t0, z); + for (i = 1; i < 1; ++i) { + fe_sq(t0, t0); + } + fe_sq(t1, t0); + for (i = 1; i < 2; ++i) { + fe_sq(t1, t1); + } + fe_mul(t1, z, t1); + fe_mul(t0, t0, t1); + fe_sq(t0, t0); + for (i = 1; i < 1; ++i) { + fe_sq(t0, t0); + } + fe_mul(t0, t1, t0); + fe_sq(t1, t0); + for (i = 1; i < 5; ++i) { + fe_sq(t1, t1); + } + fe_mul(t0, t1, t0); + fe_sq(t1, t0); + for (i = 1; i < 10; ++i) { + fe_sq(t1, t1); + } + fe_mul(t1, t1, t0); + fe_sq(t2, t1); + for (i = 1; i < 20; ++i) { + fe_sq(t2, t2); + } + fe_mul(t1, t2, t1); + fe_sq(t1, t1); + for (i = 1; i < 10; ++i) { + fe_sq(t1, t1); + } + fe_mul(t0, t1, t0); + fe_sq(t1, t0); + for (i = 1; i < 50; ++i) { + fe_sq(t1, t1); + } + fe_mul(t1, t1, t0); + fe_sq(t2, t1); + for (i = 1; i < 100; ++i) { + fe_sq(t2, t2); + } + fe_mul(t1, t2, t1); + fe_sq(t1, t1); + for (i = 1; i < 50; ++i) { + fe_sq(t1, t1); + } + fe_mul(t0, t1, t0); + fe_sq(t0, t0); + for (i = 1; i < 2; ++i) { + fe_sq(t0, t0); + } + fe_mul(out, t0, z); +} + +void x25519_ge_tobytes(uint8_t *s, const ge_p2 *h) { + fe recip; + fe x; + fe y; + + fe_invert(recip, h->Z); + fe_mul(x, h->X, recip); + fe_mul(y, h->Y, recip); + fe_tobytes(s, y); + s[31] ^= fe_isnegative(x) << 7; +} + +#ifdef ED25519 +static void ge_p3_tobytes(uint8_t *s, const ge_p3 *h) { + fe recip; + fe x; + fe y; + + fe_invert(recip, h->Z); + fe_mul(x, h->X, recip); + fe_mul(y, h->Y, recip); + fe_tobytes(s, y); + s[31] ^= fe_isnegative(x) << 7; +} +#endif + +static const fe d = {-10913610, 13857413, -15372611, 6949391, 114729, + -8787816, -6275908, -3247719, -18696448, -12055116}; + +static const fe sqrtm1 = {-32595792, -7943725, 9377950, 3500415, 12389472, + -272473, -25146209, -2005654, 326686, 11406482}; + +int x25519_ge_frombytes_vartime(ge_p3 *h, const uint8_t *s) { + fe u; + fe v; + fe v3; + fe vxx; + fe check; + + fe_frombytes(h->Y, s); + fe_1(h->Z); + fe_sq(u, h->Y); + fe_mul(v, u, d); + fe_sub(u, u, h->Z); /* u = y^2-1 */ + fe_add(v, v, h->Z); /* v = dy^2+1 */ + + fe_sq(v3, v); + fe_mul(v3, v3, v); /* v3 = v^3 */ + fe_sq(h->X, v3); + fe_mul(h->X, h->X, v); + fe_mul(h->X, h->X, u); /* x = uv^7 */ + + fe_pow22523(h->X, h->X); /* x = (uv^7)^((q-5)/8) */ + fe_mul(h->X, h->X, v3); + fe_mul(h->X, h->X, u); /* x = uv^3(uv^7)^((q-5)/8) */ + + fe_sq(vxx, h->X); + fe_mul(vxx, vxx, v); + fe_sub(check, vxx, u); /* vx^2-u */ + if (fe_isnonzero(check)) { + fe_add(check, vxx, u); /* vx^2+u */ + if (fe_isnonzero(check)) { + return -1; + } + fe_mul(h->X, h->X, sqrtm1); + } + + if (fe_isnegative(h->X) != (s[31] >> 7)) { + fe_neg(h->X, h->X); + } + + fe_mul(h->T, h->X, h->Y); + return 0; +} + +static void ge_p2_0(ge_p2 *h) { + fe_0(h->X); + fe_1(h->Y); + fe_1(h->Z); +} + +static void ge_p3_0(ge_p3 *h) { + fe_0(h->X); + fe_1(h->Y); + fe_1(h->Z); + fe_0(h->T); +} + +static void ge_cached_0(ge_cached *h) { + fe_1(h->YplusX); + fe_1(h->YminusX); + fe_1(h->Z); + fe_0(h->T2d); +} + +static void ge_precomp_0(ge_precomp *h) { + fe_1(h->yplusx); + fe_1(h->yminusx); + fe_0(h->xy2d); +} + +/* r = p */ +static void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p) { + fe_copy(r->X, p->X); + fe_copy(r->Y, p->Y); + fe_copy(r->Z, p->Z); +} + +static const fe d2 = {-21827239, -5839606, -30745221, 13898782, 229458, + 15978800, -12551817, -6495438, 29715968, 9444199}; + +/* r = p */ +void x25519_ge_p3_to_cached(ge_cached *r, const ge_p3 *p) { + fe_add(r->YplusX, p->Y, p->X); + fe_sub(r->YminusX, p->Y, p->X); + fe_copy(r->Z, p->Z); + fe_mul(r->T2d, p->T, d2); +} + +/* r = p */ +void x25519_ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p) { + fe_mul(r->X, p->X, p->T); + fe_mul(r->Y, p->Y, p->Z); + fe_mul(r->Z, p->Z, p->T); +} + +/* r = p */ +void x25519_ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p) { + fe_mul(r->X, p->X, p->T); + fe_mul(r->Y, p->Y, p->Z); + fe_mul(r->Z, p->Z, p->T); + fe_mul(r->T, p->X, p->Y); +} + +/* r = p */ +static void ge_p1p1_to_cached(ge_cached *r, const ge_p1p1 *p) { + ge_p3 t; + x25519_ge_p1p1_to_p3(&t, p); + x25519_ge_p3_to_cached(r, &t); +} + +/* r = 2 * p */ +static void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p) { + fe t0; + + fe_sq(r->X, p->X); + fe_sq(r->Z, p->Y); + fe_sq2(r->T, p->Z); + fe_add(r->Y, p->X, p->Y); + fe_sq(t0, r->Y); + fe_add(r->Y, r->Z, r->X); + fe_sub(r->Z, r->Z, r->X); + fe_sub(r->X, t0, r->Y); + fe_sub(r->T, r->T, r->Z); +} + +/* r = 2 * p */ +static void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p) { + ge_p2 q; + ge_p3_to_p2(&q, p); + ge_p2_dbl(r, &q); +} + +/* r = p + q */ +static void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) { + fe t0; + + fe_add(r->X, p->Y, p->X); + fe_sub(r->Y, p->Y, p->X); + fe_mul(r->Z, r->X, q->yplusx); + fe_mul(r->Y, r->Y, q->yminusx); + fe_mul(r->T, q->xy2d, p->T); + fe_add(t0, p->Z, p->Z); + fe_sub(r->X, r->Z, r->Y); + fe_add(r->Y, r->Z, r->Y); + fe_add(r->Z, t0, r->T); + fe_sub(r->T, t0, r->T); +} + +#ifdef ED25519 +/* r = p - q */ +static void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) { + fe t0; + + fe_add(r->X, p->Y, p->X); + fe_sub(r->Y, p->Y, p->X); + fe_mul(r->Z, r->X, q->yminusx); + fe_mul(r->Y, r->Y, q->yplusx); + fe_mul(r->T, q->xy2d, p->T); + fe_add(t0, p->Z, p->Z); + fe_sub(r->X, r->Z, r->Y); + fe_add(r->Y, r->Z, r->Y); + fe_sub(r->Z, t0, r->T); + fe_add(r->T, t0, r->T); +} +#endif + +/* r = p + q */ +void x25519_ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) { + fe t0; + + fe_add(r->X, p->Y, p->X); + fe_sub(r->Y, p->Y, p->X); + fe_mul(r->Z, r->X, q->YplusX); + fe_mul(r->Y, r->Y, q->YminusX); + fe_mul(r->T, q->T2d, p->T); + fe_mul(r->X, p->Z, q->Z); + fe_add(t0, r->X, r->X); + fe_sub(r->X, r->Z, r->Y); + fe_add(r->Y, r->Z, r->Y); + fe_add(r->Z, t0, r->T); + fe_sub(r->T, t0, r->T); +} + +/* r = p - q */ +void x25519_ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) { + fe t0; + + fe_add(r->X, p->Y, p->X); + fe_sub(r->Y, p->Y, p->X); + fe_mul(r->Z, r->X, q->YminusX); + fe_mul(r->Y, r->Y, q->YplusX); + fe_mul(r->T, q->T2d, p->T); + fe_mul(r->X, p->Z, q->Z); + fe_add(t0, r->X, r->X); + fe_sub(r->X, r->Z, r->Y); + fe_add(r->Y, r->Z, r->Y); + fe_sub(r->Z, t0, r->T); + fe_add(r->T, t0, r->T); +} + +static uint8_t equal(signed char b, signed char c) { + uint8_t ub = b; + uint8_t uc = c; + uint8_t x = ub ^ uc; /* 0: yes; 1..255: no */ + uint32_t y = x; /* 0: yes; 1..255: no */ + y -= 1; /* 4294967295: yes; 0..254: no */ + y >>= 31; /* 1: yes; 0: no */ + return y; +} + +static void cmov(ge_precomp *t, const ge_precomp *u, uint8_t b) { + fe_cmov(t->yplusx, u->yplusx, b); + fe_cmov(t->yminusx, u->yminusx, b); + fe_cmov(t->xy2d, u->xy2d, b); +} + +void x25519_ge_scalarmult_small_precomp( + ge_p3 *h, const uint8_t a[32], const uint8_t precomp_table[15 * 2 * 32]) { + /* precomp_table is first expanded into matching |ge_precomp| + * elements. */ + ge_precomp multiples[15]; + + unsigned i; + for (i = 0; i < 15; i++) { + const uint8_t *bytes = &precomp_table[i*(2 * 32)]; + fe x, y; + fe_frombytes(x, bytes); + fe_frombytes(y, bytes + 32); + + ge_precomp *out = &multiples[i]; + fe_add(out->yplusx, y, x); + fe_sub(out->yminusx, y, x); + fe_mul(out->xy2d, x, y); + fe_mul(out->xy2d, out->xy2d, d2); + } + + /* See the comment above |k25519SmallPrecomp| about the structure of the + * precomputed elements. This loop does 64 additions and 64 doublings to + * calculate the result. */ + ge_p3_0(h); + + for (i = 63; i < 64; i--) { + unsigned j; + signed char index = 0; + + for (j = 0; j < 4; j++) { + const uint8_t bit = 1 & (a[(8 * j) + (i / 8)] >> (i & 7)); + index |= (bit << j); + } + + ge_precomp e; + ge_precomp_0(&e); + + for (j = 1; j < 16; j++) { + cmov(&e, &multiples[j-1], equal(index, j)); + } + + ge_cached cached; + ge_p1p1 r; + x25519_ge_p3_to_cached(&cached, h); + x25519_ge_add(&r, h, &cached); + x25519_ge_p1p1_to_p3(h, &r); + + ge_madd(&r, h, &e); + x25519_ge_p1p1_to_p3(h, &r); + } +} + +#if defined(OPENSSL_SMALL) + +/* This block of code replaces the standard base-point table with a much smaller + * one. The standard table is 30,720 bytes while this one is just 960. + * + * This table contains 15 pairs of group elements, (x, y), where each field + * element is serialised with |fe_tobytes|. If |i| is the index of the group + * element then consider i+1 as a four-bit number: (iâ‚€, iâ‚, iâ‚‚, i₃) (where iâ‚€ + * is the most significant bit). The value of the group element is then: + * (i₀×2^192 + iâ‚×2^128 + i₂×2^64 + i₃)G, where G is the generator. */ +static const uint8_t k25519SmallPrecomp[15 * 2 * 32] = { + 0x1a, 0xd5, 0x25, 0x8f, 0x60, 0x2d, 0x56, 0xc9, 0xb2, 0xa7, 0x25, 0x95, + 0x60, 0xc7, 0x2c, 0x69, 0x5c, 0xdc, 0xd6, 0xfd, 0x31, 0xe2, 0xa4, 0xc0, + 0xfe, 0x53, 0x6e, 0xcd, 0xd3, 0x36, 0x69, 0x21, 0x58, 0x66, 0x66, 0x66, + 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, + 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, + 0x66, 0x66, 0x66, 0x66, 0x02, 0xa2, 0xed, 0xf4, 0x8f, 0x6b, 0x0b, 0x3e, + 0xeb, 0x35, 0x1a, 0xd5, 0x7e, 0xdb, 0x78, 0x00, 0x96, 0x8a, 0xa0, 0xb4, + 0xcf, 0x60, 0x4b, 0xd4, 0xd5, 0xf9, 0x2d, 0xbf, 0x88, 0xbd, 0x22, 0x62, + 0x13, 0x53, 0xe4, 0x82, 0x57, 0xfa, 0x1e, 0x8f, 0x06, 0x2b, 0x90, 0xba, + 0x08, 0xb6, 0x10, 0x54, 0x4f, 0x7c, 0x1b, 0x26, 0xed, 0xda, 0x6b, 0xdd, + 0x25, 0xd0, 0x4e, 0xea, 0x42, 0xbb, 0x25, 0x03, 0xa2, 0xfb, 0xcc, 0x61, + 0x67, 0x06, 0x70, 0x1a, 0xc4, 0x78, 0x3a, 0xff, 0x32, 0x62, 0xdd, 0x2c, + 0xab, 0x50, 0x19, 0x3b, 0xf2, 0x9b, 0x7d, 0xb8, 0xfd, 0x4f, 0x29, 0x9c, + 0xa7, 0x91, 0xba, 0x0e, 0x46, 0x5e, 0x51, 0xfe, 0x1d, 0xbf, 0xe5, 0xe5, + 0x9b, 0x95, 0x0d, 0x67, 0xf8, 0xd1, 0xb5, 0x5a, 0xa1, 0x93, 0x2c, 0xc3, + 0xde, 0x0e, 0x97, 0x85, 0x2d, 0x7f, 0xea, 0xab, 0x3e, 0x47, 0x30, 0x18, + 0x24, 0xe8, 0xb7, 0x60, 0xae, 0x47, 0x80, 0xfc, 0xe5, 0x23, 0xe7, 0xc2, + 0xc9, 0x85, 0xe6, 0x98, 0xa0, 0x29, 0x4e, 0xe1, 0x84, 0x39, 0x2d, 0x95, + 0x2c, 0xf3, 0x45, 0x3c, 0xff, 0xaf, 0x27, 0x4c, 0x6b, 0xa6, 0xf5, 0x4b, + 0x11, 0xbd, 0xba, 0x5b, 0x9e, 0xc4, 0xa4, 0x51, 0x1e, 0xbe, 0xd0, 0x90, + 0x3a, 0x9c, 0xc2, 0x26, 0xb6, 0x1e, 0xf1, 0x95, 0x7d, 0xc8, 0x6d, 0x52, + 0xe6, 0x99, 0x2c, 0x5f, 0x9a, 0x96, 0x0c, 0x68, 0x29, 0xfd, 0xe2, 0xfb, + 0xe6, 0xbc, 0xec, 0x31, 0x08, 0xec, 0xe6, 0xb0, 0x53, 0x60, 0xc3, 0x8c, + 0xbe, 0xc1, 0xb3, 0x8a, 0x8f, 0xe4, 0x88, 0x2b, 0x55, 0xe5, 0x64, 0x6e, + 0x9b, 0xd0, 0xaf, 0x7b, 0x64, 0x2a, 0x35, 0x25, 0x10, 0x52, 0xc5, 0x9e, + 0x58, 0x11, 0x39, 0x36, 0x45, 0x51, 0xb8, 0x39, 0x93, 0xfc, 0x9d, 0x6a, + 0xbe, 0x58, 0xcb, 0xa4, 0x0f, 0x51, 0x3c, 0x38, 0x05, 0xca, 0xab, 0x43, + 0x63, 0x0e, 0xf3, 0x8b, 0x41, 0xa6, 0xf8, 0x9b, 0x53, 0x70, 0x80, 0x53, + 0x86, 0x5e, 0x8f, 0xe3, 0xc3, 0x0d, 0x18, 0xc8, 0x4b, 0x34, 0x1f, 0xd8, + 0x1d, 0xbc, 0xf2, 0x6d, 0x34, 0x3a, 0xbe, 0xdf, 0xd9, 0xf6, 0xf3, 0x89, + 0xa1, 0xe1, 0x94, 0x9f, 0x5d, 0x4c, 0x5d, 0xe9, 0xa1, 0x49, 0x92, 0xef, + 0x0e, 0x53, 0x81, 0x89, 0x58, 0x87, 0xa6, 0x37, 0xf1, 0xdd, 0x62, 0x60, + 0x63, 0x5a, 0x9d, 0x1b, 0x8c, 0xc6, 0x7d, 0x52, 0xea, 0x70, 0x09, 0x6a, + 0xe1, 0x32, 0xf3, 0x73, 0x21, 0x1f, 0x07, 0x7b, 0x7c, 0x9b, 0x49, 0xd8, + 0xc0, 0xf3, 0x25, 0x72, 0x6f, 0x9d, 0xed, 0x31, 0x67, 0x36, 0x36, 0x54, + 0x40, 0x92, 0x71, 0xe6, 0x11, 0x28, 0x11, 0xad, 0x93, 0x32, 0x85, 0x7b, + 0x3e, 0xb7, 0x3b, 0x49, 0x13, 0x1c, 0x07, 0xb0, 0x2e, 0x93, 0xaa, 0xfd, + 0xfd, 0x28, 0x47, 0x3d, 0x8d, 0xd2, 0xda, 0xc7, 0x44, 0xd6, 0x7a, 0xdb, + 0x26, 0x7d, 0x1d, 0xb8, 0xe1, 0xde, 0x9d, 0x7a, 0x7d, 0x17, 0x7e, 0x1c, + 0x37, 0x04, 0x8d, 0x2d, 0x7c, 0x5e, 0x18, 0x38, 0x1e, 0xaf, 0xc7, 0x1b, + 0x33, 0x48, 0x31, 0x00, 0x59, 0xf6, 0xf2, 0xca, 0x0f, 0x27, 0x1b, 0x63, + 0x12, 0x7e, 0x02, 0x1d, 0x49, 0xc0, 0x5d, 0x79, 0x87, 0xef, 0x5e, 0x7a, + 0x2f, 0x1f, 0x66, 0x55, 0xd8, 0x09, 0xd9, 0x61, 0x38, 0x68, 0xb0, 0x07, + 0xa3, 0xfc, 0xcc, 0x85, 0x10, 0x7f, 0x4c, 0x65, 0x65, 0xb3, 0xfa, 0xfa, + 0xa5, 0x53, 0x6f, 0xdb, 0x74, 0x4c, 0x56, 0x46, 0x03, 0xe2, 0xd5, 0x7a, + 0x29, 0x1c, 0xc6, 0x02, 0xbc, 0x59, 0xf2, 0x04, 0x75, 0x63, 0xc0, 0x84, + 0x2f, 0x60, 0x1c, 0x67, 0x76, 0xfd, 0x63, 0x86, 0xf3, 0xfa, 0xbf, 0xdc, + 0xd2, 0x2d, 0x90, 0x91, 0xbd, 0x33, 0xa9, 0xe5, 0x66, 0x0c, 0xda, 0x42, + 0x27, 0xca, 0xf4, 0x66, 0xc2, 0xec, 0x92, 0x14, 0x57, 0x06, 0x63, 0xd0, + 0x4d, 0x15, 0x06, 0xeb, 0x69, 0x58, 0x4f, 0x77, 0xc5, 0x8b, 0xc7, 0xf0, + 0x8e, 0xed, 0x64, 0xa0, 0xb3, 0x3c, 0x66, 0x71, 0xc6, 0x2d, 0xda, 0x0a, + 0x0d, 0xfe, 0x70, 0x27, 0x64, 0xf8, 0x27, 0xfa, 0xf6, 0x5f, 0x30, 0xa5, + 0x0d, 0x6c, 0xda, 0xf2, 0x62, 0x5e, 0x78, 0x47, 0xd3, 0x66, 0x00, 0x1c, + 0xfd, 0x56, 0x1f, 0x5d, 0x3f, 0x6f, 0xf4, 0x4c, 0xd8, 0xfd, 0x0e, 0x27, + 0xc9, 0x5c, 0x2b, 0xbc, 0xc0, 0xa4, 0xe7, 0x23, 0x29, 0x02, 0x9f, 0x31, + 0xd6, 0xe9, 0xd7, 0x96, 0xf4, 0xe0, 0x5e, 0x0b, 0x0e, 0x13, 0xee, 0x3c, + 0x09, 0xed, 0xf2, 0x3d, 0x76, 0x91, 0xc3, 0xa4, 0x97, 0xae, 0xd4, 0x87, + 0xd0, 0x5d, 0xf6, 0x18, 0x47, 0x1f, 0x1d, 0x67, 0xf2, 0xcf, 0x63, 0xa0, + 0x91, 0x27, 0xf8, 0x93, 0x45, 0x75, 0x23, 0x3f, 0xd1, 0xf1, 0xad, 0x23, + 0xdd, 0x64, 0x93, 0x96, 0x41, 0x70, 0x7f, 0xf7, 0xf5, 0xa9, 0x89, 0xa2, + 0x34, 0xb0, 0x8d, 0x1b, 0xae, 0x19, 0x15, 0x49, 0x58, 0x23, 0x6d, 0x87, + 0x15, 0x4f, 0x81, 0x76, 0xfb, 0x23, 0xb5, 0xea, 0xcf, 0xac, 0x54, 0x8d, + 0x4e, 0x42, 0x2f, 0xeb, 0x0f, 0x63, 0xdb, 0x68, 0x37, 0xa8, 0xcf, 0x8b, + 0xab, 0xf5, 0xa4, 0x6e, 0x96, 0x2a, 0xb2, 0xd6, 0xbe, 0x9e, 0xbd, 0x0d, + 0xb4, 0x42, 0xa9, 0xcf, 0x01, 0x83, 0x8a, 0x17, 0x47, 0x76, 0xc4, 0xc6, + 0x83, 0x04, 0x95, 0x0b, 0xfc, 0x11, 0xc9, 0x62, 0xb8, 0x0c, 0x76, 0x84, + 0xd9, 0xb9, 0x37, 0xfa, 0xfc, 0x7c, 0xc2, 0x6d, 0x58, 0x3e, 0xb3, 0x04, + 0xbb, 0x8c, 0x8f, 0x48, 0xbc, 0x91, 0x27, 0xcc, 0xf9, 0xb7, 0x22, 0x19, + 0x83, 0x2e, 0x09, 0xb5, 0x72, 0xd9, 0x54, 0x1c, 0x4d, 0xa1, 0xea, 0x0b, + 0xf1, 0xc6, 0x08, 0x72, 0x46, 0x87, 0x7a, 0x6e, 0x80, 0x56, 0x0a, 0x8a, + 0xc0, 0xdd, 0x11, 0x6b, 0xd6, 0xdd, 0x47, 0xdf, 0x10, 0xd9, 0xd8, 0xea, + 0x7c, 0xb0, 0x8f, 0x03, 0x00, 0x2e, 0xc1, 0x8f, 0x44, 0xa8, 0xd3, 0x30, + 0x06, 0x89, 0xa2, 0xf9, 0x34, 0xad, 0xdc, 0x03, 0x85, 0xed, 0x51, 0xa7, + 0x82, 0x9c, 0xe7, 0x5d, 0x52, 0x93, 0x0c, 0x32, 0x9a, 0x5b, 0xe1, 0xaa, + 0xca, 0xb8, 0x02, 0x6d, 0x3a, 0xd4, 0xb1, 0x3a, 0xf0, 0x5f, 0xbe, 0xb5, + 0x0d, 0x10, 0x6b, 0x38, 0x32, 0xac, 0x76, 0x80, 0xbd, 0xca, 0x94, 0x71, + 0x7a, 0xf2, 0xc9, 0x35, 0x2a, 0xde, 0x9f, 0x42, 0x49, 0x18, 0x01, 0xab, + 0xbc, 0xef, 0x7c, 0x64, 0x3f, 0x58, 0x3d, 0x92, 0x59, 0xdb, 0x13, 0xdb, + 0x58, 0x6e, 0x0a, 0xe0, 0xb7, 0x91, 0x4a, 0x08, 0x20, 0xd6, 0x2e, 0x3c, + 0x45, 0xc9, 0x8b, 0x17, 0x79, 0xe7, 0xc7, 0x90, 0x99, 0x3a, 0x18, 0x25, +}; + +void x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t a[32]) { + x25519_ge_scalarmult_small_precomp(h, a, k25519SmallPrecomp); +} + +#else + +/* k25519Precomp[i][j] = (j+1)*256^i*B */ +static const ge_precomp k25519Precomp[32][8] = { + { + { + {25967493, -14356035, 29566456, 3660896, -12694345, 4014787, + 27544626, -11754271, -6079156, 2047605}, + {-12545711, 934262, -2722910, 3049990, -727428, 9406986, 12720692, + 5043384, 19500929, -15469378}, + {-8738181, 4489570, 9688441, -14785194, 10184609, -12363380, + 29287919, 11864899, -24514362, -4438546}, + }, + { + {-12815894, -12976347, -21581243, 11784320, -25355658, -2750717, + -11717903, -3814571, -358445, -10211303}, + {-21703237, 6903825, 27185491, 6451973, -29577724, -9554005, + -15616551, 11189268, -26829678, -5319081}, + {26966642, 11152617, 32442495, 15396054, 14353839, -12752335, + -3128826, -9541118, -15472047, -4166697}, + }, + { + {15636291, -9688557, 24204773, -7912398, 616977, -16685262, + 27787600, -14772189, 28944400, -1550024}, + {16568933, 4717097, -11556148, -1102322, 15682896, -11807043, + 16354577, -11775962, 7689662, 11199574}, + {30464156, -5976125, -11779434, -15670865, 23220365, 15915852, + 7512774, 10017326, -17749093, -9920357}, + }, + { + {-17036878, 13921892, 10945806, -6033431, 27105052, -16084379, + -28926210, 15006023, 3284568, -6276540}, + {23599295, -8306047, -11193664, -7687416, 13236774, 10506355, + 7464579, 9656445, 13059162, 10374397}, + {7798556, 16710257, 3033922, 2874086, 28997861, 2835604, 32406664, + -3839045, -641708, -101325}, + }, + { + {10861363, 11473154, 27284546, 1981175, -30064349, 12577861, + 32867885, 14515107, -15438304, 10819380}, + {4708026, 6336745, 20377586, 9066809, -11272109, 6594696, -25653668, + 12483688, -12668491, 5581306}, + {19563160, 16186464, -29386857, 4097519, 10237984, -4348115, + 28542350, 13850243, -23678021, -15815942}, + }, + { + {-15371964, -12862754, 32573250, 4720197, -26436522, 5875511, + -19188627, -15224819, -9818940, -12085777}, + {-8549212, 109983, 15149363, 2178705, 22900618, 4543417, 3044240, + -15689887, 1762328, 14866737}, + {-18199695, -15951423, -10473290, 1707278, -17185920, 3916101, + -28236412, 3959421, 27914454, 4383652}, + }, + { + {5153746, 9909285, 1723747, -2777874, 30523605, 5516873, 19480852, + 5230134, -23952439, -15175766}, + {-30269007, -3463509, 7665486, 10083793, 28475525, 1649722, + 20654025, 16520125, 30598449, 7715701}, + {28881845, 14381568, 9657904, 3680757, -20181635, 7843316, + -31400660, 1370708, 29794553, -1409300}, + }, + { + {14499471, -2729599, -33191113, -4254652, 28494862, 14271267, + 30290735, 10876454, -33154098, 2381726}, + {-7195431, -2655363, -14730155, 462251, -27724326, 3941372, + -6236617, 3696005, -32300832, 15351955}, + {27431194, 8222322, 16448760, -3907995, -18707002, 11938355, + -32961401, -2970515, 29551813, 10109425}, + }, + }, + { + { + {-13657040, -13155431, -31283750, 11777098, 21447386, 6519384, + -2378284, -1627556, 10092783, -4764171}, + {27939166, 14210322, 4677035, 16277044, -22964462, -12398139, + -32508754, 12005538, -17810127, 12803510}, + {17228999, -15661624, -1233527, 300140, -1224870, -11714777, + 30364213, -9038194, 18016357, 4397660}, + }, + { + {-10958843, -7690207, 4776341, -14954238, 27850028, -15602212, + -26619106, 14544525, -17477504, 982639}, + {29253598, 15796703, -2863982, -9908884, 10057023, 3163536, 7332899, + -4120128, -21047696, 9934963}, + {5793303, 16271923, -24131614, -10116404, 29188560, 1206517, + -14747930, 4559895, -30123922, -10897950}, + }, + { + {-27643952, -11493006, 16282657, -11036493, 28414021, -15012264, + 24191034, 4541697, -13338309, 5500568}, + {12650548, -1497113, 9052871, 11355358, -17680037, -8400164, + -17430592, 12264343, 10874051, 13524335}, + {25556948, -3045990, 714651, 2510400, 23394682, -10415330, 33119038, + 5080568, -22528059, 5376628}, + }, + { + {-26088264, -4011052, -17013699, -3537628, -6726793, 1920897, + -22321305, -9447443, 4535768, 1569007}, + {-2255422, 14606630, -21692440, -8039818, 28430649, 8775819, + -30494562, 3044290, 31848280, 12543772}, + {-22028579, 2943893, -31857513, 6777306, 13784462, -4292203, + -27377195, -2062731, 7718482, 14474653}, + }, + { + {2385315, 2454213, -22631320, 46603, -4437935, -15680415, 656965, + -7236665, 24316168, -5253567}, + {13741529, 10911568, -33233417, -8603737, -20177830, -1033297, + 33040651, -13424532, -20729456, 8321686}, + {21060490, -2212744, 15712757, -4336099, 1639040, 10656336, + 23845965, -11874838, -9984458, 608372}, + }, + { + {-13672732, -15087586, -10889693, -7557059, -6036909, 11305547, + 1123968, -6780577, 27229399, 23887}, + {-23244140, -294205, -11744728, 14712571, -29465699, -2029617, + 12797024, -6440308, -1633405, 16678954}, + {-29500620, 4770662, -16054387, 14001338, 7830047, 9564805, + -1508144, -4795045, -17169265, 4904953}, + }, + { + {24059557, 14617003, 19037157, -15039908, 19766093, -14906429, + 5169211, 16191880, 2128236, -4326833}, + {-16981152, 4124966, -8540610, -10653797, 30336522, -14105247, + -29806336, 916033, -6882542, -2986532}, + {-22630907, 12419372, -7134229, -7473371, -16478904, 16739175, + 285431, 2763829, 15736322, 4143876}, + }, + { + {2379352, 11839345, -4110402, -5988665, 11274298, 794957, 212801, + -14594663, 23527084, -16458268}, + {33431127, -11130478, -17838966, -15626900, 8909499, 8376530, + -32625340, 4087881, -15188911, -14416214}, + {1767683, 7197987, -13205226, -2022635, -13091350, 448826, 5799055, + 4357868, -4774191, -16323038}, + }, + }, + { + { + {6721966, 13833823, -23523388, -1551314, 26354293, -11863321, + 23365147, -3949732, 7390890, 2759800}, + {4409041, 2052381, 23373853, 10530217, 7676779, -12885954, 21302353, + -4264057, 1244380, -12919645}, + {-4421239, 7169619, 4982368, -2957590, 30256825, -2777540, 14086413, + 9208236, 15886429, 16489664}, + }, + { + {1996075, 10375649, 14346367, 13311202, -6874135, -16438411, + -13693198, 398369, -30606455, -712933}, + {-25307465, 9795880, -2777414, 14878809, -33531835, 14780363, + 13348553, 12076947, -30836462, 5113182}, + {-17770784, 11797796, 31950843, 13929123, -25888302, 12288344, + -30341101, -7336386, 13847711, 5387222}, + }, + { + {-18582163, -3416217, 17824843, -2340966, 22744343, -10442611, + 8763061, 3617786, -19600662, 10370991}, + {20246567, -14369378, 22358229, -543712, 18507283, -10413996, + 14554437, -8746092, 32232924, 16763880}, + {9648505, 10094563, 26416693, 14745928, -30374318, -6472621, + 11094161, 15689506, 3140038, -16510092}, + }, + { + {-16160072, 5472695, 31895588, 4744994, 8823515, 10365685, + -27224800, 9448613, -28774454, 366295}, + {19153450, 11523972, -11096490, -6503142, -24647631, 5420647, + 28344573, 8041113, 719605, 11671788}, + {8678025, 2694440, -6808014, 2517372, 4964326, 11152271, -15432916, + -15266516, 27000813, -10195553}, + }, + { + {-15157904, 7134312, 8639287, -2814877, -7235688, 10421742, 564065, + 5336097, 6750977, -14521026}, + {11836410, -3979488, 26297894, 16080799, 23455045, 15735944, + 1695823, -8819122, 8169720, 16220347}, + {-18115838, 8653647, 17578566, -6092619, -8025777, -16012763, + -11144307, -2627664, -5990708, -14166033}, + }, + { + {-23308498, -10968312, 15213228, -10081214, -30853605, -11050004, + 27884329, 2847284, 2655861, 1738395}, + {-27537433, -14253021, -25336301, -8002780, -9370762, 8129821, + 21651608, -3239336, -19087449, -11005278}, + {1533110, 3437855, 23735889, 459276, 29970501, 11335377, 26030092, + 5821408, 10478196, 8544890}, + }, + { + {32173121, -16129311, 24896207, 3921497, 22579056, -3410854, + 19270449, 12217473, 17789017, -3395995}, + {-30552961, -2228401, -15578829, -10147201, 13243889, 517024, + 15479401, -3853233, 30460520, 1052596}, + {-11614875, 13323618, 32618793, 8175907, -15230173, 12596687, + 27491595, -4612359, 3179268, -9478891}, + }, + { + {31947069, -14366651, -4640583, -15339921, -15125977, -6039709, + -14756777, -16411740, 19072640, -9511060}, + {11685058, 11822410, 3158003, -13952594, 33402194, -4165066, + 5977896, -5215017, 473099, 5040608}, + {-20290863, 8198642, -27410132, 11602123, 1290375, -2799760, + 28326862, 1721092, -19558642, -3131606}, + }, + }, + { + { + {7881532, 10687937, 7578723, 7738378, -18951012, -2553952, 21820786, + 8076149, -27868496, 11538389}, + {-19935666, 3899861, 18283497, -6801568, -15728660, -11249211, + 8754525, 7446702, -5676054, 5797016}, + {-11295600, -3793569, -15782110, -7964573, 12708869, -8456199, + 2014099, -9050574, -2369172, -5877341}, + }, + { + {-22472376, -11568741, -27682020, 1146375, 18956691, 16640559, + 1192730, -3714199, 15123619, 10811505}, + {14352098, -3419715, -18942044, 10822655, 32750596, 4699007, -70363, + 15776356, -28886779, -11974553}, + {-28241164, -8072475, -4978962, -5315317, 29416931, 1847569, + -20654173, -16484855, 4714547, -9600655}, + }, + { + {15200332, 8368572, 19679101, 15970074, -31872674, 1959451, + 24611599, -4543832, -11745876, 12340220}, + {12876937, -10480056, 33134381, 6590940, -6307776, 14872440, + 9613953, 8241152, 15370987, 9608631}, + {-4143277, -12014408, 8446281, -391603, 4407738, 13629032, -7724868, + 15866074, -28210621, -8814099}, + }, + { + {26660628, -15677655, 8393734, 358047, -7401291, 992988, -23904233, + 858697, 20571223, 8420556}, + {14620715, 13067227, -15447274, 8264467, 14106269, 15080814, + 33531827, 12516406, -21574435, -12476749}, + {236881, 10476226, 57258, -14677024, 6472998, 2466984, 17258519, + 7256740, 8791136, 15069930}, + }, + { + {1276410, -9371918, 22949635, -16322807, -23493039, -5702186, + 14711875, 4874229, -30663140, -2331391}, + {5855666, 4990204, -13711848, 7294284, -7804282, 1924647, -1423175, + -7912378, -33069337, 9234253}, + {20590503, -9018988, 31529744, -7352666, -2706834, 10650548, + 31559055, -11609587, 18979186, 13396066}, + }, + { + {24474287, 4968103, 22267082, 4407354, 24063882, -8325180, + -18816887, 13594782, 33514650, 7021958}, + {-11566906, -6565505, -21365085, 15928892, -26158305, 4315421, + -25948728, -3916677, -21480480, 12868082}, + {-28635013, 13504661, 19988037, -2132761, 21078225, 6443208, + -21446107, 2244500, -12455797, -8089383}, + }, + { + {-30595528, 13793479, -5852820, 319136, -25723172, -6263899, + 33086546, 8957937, -15233648, 5540521}, + {-11630176, -11503902, -8119500, -7643073, 2620056, 1022908, + -23710744, -1568984, -16128528, -14962807}, + {23152971, 775386, 27395463, 14006635, -9701118, 4649512, 1689819, + 892185, -11513277, -15205948}, + }, + { + {9770129, 9586738, 26496094, 4324120, 1556511, -3550024, 27453819, + 4763127, -19179614, 5867134}, + {-32765025, 1927590, 31726409, -4753295, 23962434, -16019500, + 27846559, 5931263, -29749703, -16108455}, + {27461885, -2977536, 22380810, 1815854, -23033753, -3031938, + 7283490, -15148073, -19526700, 7734629}, + }, + }, + { + { + {-8010264, -9590817, -11120403, 6196038, 29344158, -13430885, + 7585295, -3176626, 18549497, 15302069}, + {-32658337, -6171222, -7672793, -11051681, 6258878, 13504381, + 10458790, -6418461, -8872242, 8424746}, + {24687205, 8613276, -30667046, -3233545, 1863892, -1830544, + 19206234, 7134917, -11284482, -828919}, + }, + { + {11334899, -9218022, 8025293, 12707519, 17523892, -10476071, + 10243738, -14685461, -5066034, 16498837}, + {8911542, 6887158, -9584260, -6958590, 11145641, -9543680, 17303925, + -14124238, 6536641, 10543906}, + {-28946384, 15479763, -17466835, 568876, -1497683, 11223454, + -2669190, -16625574, -27235709, 8876771}, + }, + { + {-25742899, -12566864, -15649966, -846607, -33026686, -796288, + -33481822, 15824474, -604426, -9039817}, + {10330056, 70051, 7957388, -9002667, 9764902, 15609756, 27698697, + -4890037, 1657394, 3084098}, + {10477963, -7470260, 12119566, -13250805, 29016247, -5365589, + 31280319, 14396151, -30233575, 15272409}, + }, + { + {-12288309, 3169463, 28813183, 16658753, 25116432, -5630466, + -25173957, -12636138, -25014757, 1950504}, + {-26180358, 9489187, 11053416, -14746161, -31053720, 5825630, + -8384306, -8767532, 15341279, 8373727}, + {28685821, 7759505, -14378516, -12002860, -31971820, 4079242, + 298136, -10232602, -2878207, 15190420}, + }, + { + {-32932876, 13806336, -14337485, -15794431, -24004620, 10940928, + 8669718, 2742393, -26033313, -6875003}, + {-1580388, -11729417, -25979658, -11445023, -17411874, -10912854, + 9291594, -16247779, -12154742, 6048605}, + {-30305315, 14843444, 1539301, 11864366, 20201677, 1900163, + 13934231, 5128323, 11213262, 9168384}, + }, + { + {-26280513, 11007847, 19408960, -940758, -18592965, -4328580, + -5088060, -11105150, 20470157, -16398701}, + {-23136053, 9282192, 14855179, -15390078, -7362815, -14408560, + -22783952, 14461608, 14042978, 5230683}, + {29969567, -2741594, -16711867, -8552442, 9175486, -2468974, + 21556951, 3506042, -5933891, -12449708}, + }, + { + {-3144746, 8744661, 19704003, 4581278, -20430686, 6830683, + -21284170, 8971513, -28539189, 15326563}, + {-19464629, 10110288, -17262528, -3503892, -23500387, 1355669, + -15523050, 15300988, -20514118, 9168260}, + {-5353335, 4488613, -23803248, 16314347, 7780487, -15638939, + -28948358, 9601605, 33087103, -9011387}, + }, + { + {-19443170, -15512900, -20797467, -12445323, -29824447, 10229461, + -27444329, -15000531, -5996870, 15664672}, + {23294591, -16632613, -22650781, -8470978, 27844204, 11461195, + 13099750, -2460356, 18151676, 13417686}, + {-24722913, -4176517, -31150679, 5988919, -26858785, 6685065, + 1661597, -12551441, 15271676, -15452665}, + }, + }, + { + { + {11433042, -13228665, 8239631, -5279517, -1985436, -725718, + -18698764, 2167544, -6921301, -13440182}, + {-31436171, 15575146, 30436815, 12192228, -22463353, 9395379, + -9917708, -8638997, 12215110, 12028277}, + {14098400, 6555944, 23007258, 5757252, -15427832, -12950502, + 30123440, 4617780, -16900089, -655628}, + }, + { + {-4026201, -15240835, 11893168, 13718664, -14809462, 1847385, + -15819999, 10154009, 23973261, -12684474}, + {-26531820, -3695990, -1908898, 2534301, -31870557, -16550355, + 18341390, -11419951, 32013174, -10103539}, + {-25479301, 10876443, -11771086, -14625140, -12369567, 1838104, + 21911214, 6354752, 4425632, -837822}, + }, + { + {-10433389, -14612966, 22229858, -3091047, -13191166, 776729, + -17415375, -12020462, 4725005, 14044970}, + {19268650, -7304421, 1555349, 8692754, -21474059, -9910664, 6347390, + -1411784, -19522291, -16109756}, + {-24864089, 12986008, -10898878, -5558584, -11312371, -148526, + 19541418, 8180106, 9282262, 10282508}, + }, + { + {-26205082, 4428547, -8661196, -13194263, 4098402, -14165257, + 15522535, 8372215, 5542595, -10702683}, + {-10562541, 14895633, 26814552, -16673850, -17480754, -2489360, + -2781891, 6993761, -18093885, 10114655}, + {-20107055, -929418, 31422704, 10427861, -7110749, 6150669, + -29091755, -11529146, 25953725, -106158}, + }, + { + {-4234397, -8039292, -9119125, 3046000, 2101609, -12607294, + 19390020, 6094296, -3315279, 12831125}, + {-15998678, 7578152, 5310217, 14408357, -33548620, -224739, + 31575954, 6326196, 7381791, -2421839}, + {-20902779, 3296811, 24736065, -16328389, 18374254, 7318640, + 6295303, 8082724, -15362489, 12339664}, + }, + { + {27724736, 2291157, 6088201, -14184798, 1792727, 5857634, 13848414, + 15768922, 25091167, 14856294}, + {-18866652, 8331043, 24373479, 8541013, -701998, -9269457, 12927300, + -12695493, -22182473, -9012899}, + {-11423429, -5421590, 11632845, 3405020, 30536730, -11674039, + -27260765, 13866390, 30146206, 9142070}, + }, + { + {3924129, -15307516, -13817122, -10054960, 12291820, -668366, + -27702774, 9326384, -8237858, 4171294}, + {-15921940, 16037937, 6713787, 16606682, -21612135, 2790944, + 26396185, 3731949, 345228, -5462949}, + {-21327538, 13448259, 25284571, 1143661, 20614966, -8849387, + 2031539, -12391231, -16253183, -13582083}, + }, + { + {31016211, -16722429, 26371392, -14451233, -5027349, 14854137, + 17477601, 3842657, 28012650, -16405420}, + {-5075835, 9368966, -8562079, -4600902, -15249953, 6970560, + -9189873, 16292057, -8867157, 3507940}, + {29439664, 3537914, 23333589, 6997794, -17555561, -11018068, + -15209202, -15051267, -9164929, 6580396}, + }, + }, + { + { + {-12185861, -7679788, 16438269, 10826160, -8696817, -6235611, + 17860444, -9273846, -2095802, 9304567}, + {20714564, -4336911, 29088195, 7406487, 11426967, -5095705, + 14792667, -14608617, 5289421, -477127}, + {-16665533, -10650790, -6160345, -13305760, 9192020, -1802462, + 17271490, 12349094, 26939669, -3752294}, + }, + { + {-12889898, 9373458, 31595848, 16374215, 21471720, 13221525, + -27283495, -12348559, -3698806, 117887}, + {22263325, -6560050, 3984570, -11174646, -15114008, -566785, + 28311253, 5358056, -23319780, 541964}, + {16259219, 3261970, 2309254, -15534474, -16885711, -4581916, + 24134070, -16705829, -13337066, -13552195}, + }, + { + {9378160, -13140186, -22845982, -12745264, 28198281, -7244098, + -2399684, -717351, 690426, 14876244}, + {24977353, -314384, -8223969, -13465086, 28432343, -1176353, + -13068804, -12297348, -22380984, 6618999}, + {-1538174, 11685646, 12944378, 13682314, -24389511, -14413193, + 8044829, -13817328, 32239829, -5652762}, + }, + { + {-18603066, 4762990, -926250, 8885304, -28412480, -3187315, 9781647, + -10350059, 32779359, 5095274}, + {-33008130, -5214506, -32264887, -3685216, 9460461, -9327423, + -24601656, 14506724, 21639561, -2630236}, + {-16400943, -13112215, 25239338, 15531969, 3987758, -4499318, + -1289502, -6863535, 17874574, 558605}, + }, + { + {-13600129, 10240081, 9171883, 16131053, -20869254, 9599700, + 33499487, 5080151, 2085892, 5119761}, + {-22205145, -2519528, -16381601, 414691, -25019550, 2170430, + 30634760, -8363614, -31999993, -5759884}, + {-6845704, 15791202, 8550074, -1312654, 29928809, -12092256, + 27534430, -7192145, -22351378, 12961482}, + }, + { + {-24492060, -9570771, 10368194, 11582341, -23397293, -2245287, + 16533930, 8206996, -30194652, -5159638}, + {-11121496, -3382234, 2307366, 6362031, -135455, 8868177, -16835630, + 7031275, 7589640, 8945490}, + {-32152748, 8917967, 6661220, -11677616, -1192060, -15793393, + 7251489, -11182180, 24099109, -14456170}, + }, + { + {5019558, -7907470, 4244127, -14714356, -26933272, 6453165, + -19118182, -13289025, -6231896, -10280736}, + {10853594, 10721687, 26480089, 5861829, -22995819, 1972175, + -1866647, -10557898, -3363451, -6441124}, + {-17002408, 5906790, 221599, -6563147, 7828208, -13248918, 24362661, + -2008168, -13866408, 7421392}, + }, + { + {8139927, -6546497, 32257646, -5890546, 30375719, 1886181, + -21175108, 15441252, 28826358, -4123029}, + {6267086, 9695052, 7709135, -16603597, -32869068, -1886135, + 14795160, -7840124, 13746021, -1742048}, + {28584902, 7787108, -6732942, -15050729, 22846041, -7571236, + -3181936, -363524, 4771362, -8419958}, + }, + }, + { + { + {24949256, 6376279, -27466481, -8174608, -18646154, -9930606, + 33543569, -12141695, 3569627, 11342593}, + {26514989, 4740088, 27912651, 3697550, 19331575, -11472339, 6809886, + 4608608, 7325975, -14801071}, + {-11618399, -14554430, -24321212, 7655128, -1369274, 5214312, + -27400540, 10258390, -17646694, -8186692}, + }, + { + {11431204, 15823007, 26570245, 14329124, 18029990, 4796082, + -31446179, 15580664, 9280358, -3973687}, + {-160783, -10326257, -22855316, -4304997, -20861367, -13621002, + -32810901, -11181622, -15545091, 4387441}, + {-20799378, 12194512, 3937617, -5805892, -27154820, 9340370, + -24513992, 8548137, 20617071, -7482001}, + }, + { + {-938825, -3930586, -8714311, 16124718, 24603125, -6225393, + -13775352, -11875822, 24345683, 10325460}, + {-19855277, -1568885, -22202708, 8714034, 14007766, 6928528, + 16318175, -1010689, 4766743, 3552007}, + {-21751364, -16730916, 1351763, -803421, -4009670, 3950935, 3217514, + 14481909, 10988822, -3994762}, + }, + { + {15564307, -14311570, 3101243, 5684148, 30446780, -8051356, + 12677127, -6505343, -8295852, 13296005}, + {-9442290, 6624296, -30298964, -11913677, -4670981, -2057379, + 31521204, 9614054, -30000824, 12074674}, + {4771191, -135239, 14290749, -13089852, 27992298, 14998318, + -1413936, -1556716, 29832613, -16391035}, + }, + { + {7064884, -7541174, -19161962, -5067537, -18891269, -2912736, + 25825242, 5293297, -27122660, 13101590}, + {-2298563, 2439670, -7466610, 1719965, -27267541, -16328445, + 32512469, -5317593, -30356070, -4190957}, + {-30006540, 10162316, -33180176, 3981723, -16482138, -13070044, + 14413974, 9515896, 19568978, 9628812}, + }, + { + {33053803, 199357, 15894591, 1583059, 27380243, -4580435, -17838894, + -6106839, -6291786, 3437740}, + {-18978877, 3884493, 19469877, 12726490, 15913552, 13614290, + -22961733, 70104, 7463304, 4176122}, + {-27124001, 10659917, 11482427, -16070381, 12771467, -6635117, + -32719404, -5322751, 24216882, 5944158}, + }, + { + {8894125, 7450974, -2664149, -9765752, -28080517, -12389115, + 19345746, 14680796, 11632993, 5847885}, + {26942781, -2315317, 9129564, -4906607, 26024105, 11769399, + -11518837, 6367194, -9727230, 4782140}, + {19916461, -4828410, -22910704, -11414391, 25606324, -5972441, + 33253853, 8220911, 6358847, -1873857}, + }, + { + {801428, -2081702, 16569428, 11065167, 29875704, 96627, 7908388, + -4480480, -13538503, 1387155}, + {19646058, 5720633, -11416706, 12814209, 11607948, 12749789, + 14147075, 15156355, -21866831, 11835260}, + {19299512, 1155910, 28703737, 14890794, 2925026, 7269399, 26121523, + 15467869, -26560550, 5052483}, + }, + }, + { + { + {-3017432, 10058206, 1980837, 3964243, 22160966, 12322533, -6431123, + -12618185, 12228557, -7003677}, + {32944382, 14922211, -22844894, 5188528, 21913450, -8719943, + 4001465, 13238564, -6114803, 8653815}, + {22865569, -4652735, 27603668, -12545395, 14348958, 8234005, + 24808405, 5719875, 28483275, 2841751}, + }, + { + {-16420968, -1113305, -327719, -12107856, 21886282, -15552774, + -1887966, -315658, 19932058, -12739203}, + {-11656086, 10087521, -8864888, -5536143, -19278573, -3055912, + 3999228, 13239134, -4777469, -13910208}, + {1382174, -11694719, 17266790, 9194690, -13324356, 9720081, + 20403944, 11284705, -14013818, 3093230}, + }, + { + {16650921, -11037932, -1064178, 1570629, -8329746, 7352753, -302424, + 16271225, -24049421, -6691850}, + {-21911077, -5927941, -4611316, -5560156, -31744103, -10785293, + 24123614, 15193618, -21652117, -16739389}, + {-9935934, -4289447, -25279823, 4372842, 2087473, 10399484, + 31870908, 14690798, 17361620, 11864968}, + }, + { + {-11307610, 6210372, 13206574, 5806320, -29017692, -13967200, + -12331205, -7486601, -25578460, -16240689}, + {14668462, -12270235, 26039039, 15305210, 25515617, 4542480, + 10453892, 6577524, 9145645, -6443880}, + {5974874, 3053895, -9433049, -10385191, -31865124, 3225009, + -7972642, 3936128, -5652273, -3050304}, + }, + { + {30625386, -4729400, -25555961, -12792866, -20484575, 7695099, + 17097188, -16303496, -27999779, 1803632}, + {-3553091, 9865099, -5228566, 4272701, -5673832, -16689700, + 14911344, 12196514, -21405489, 7047412}, + {20093277, 9920966, -11138194, -5343857, 13161587, 12044805, + -32856851, 4124601, -32343828, -10257566}, + }, + { + {-20788824, 14084654, -13531713, 7842147, 19119038, -13822605, + 4752377, -8714640, -21679658, 2288038}, + {-26819236, -3283715, 29965059, 3039786, -14473765, 2540457, + 29457502, 14625692, -24819617, 12570232}, + {-1063558, -11551823, 16920318, 12494842, 1278292, -5869109, + -21159943, -3498680, -11974704, 4724943}, + }, + { + {17960970, -11775534, -4140968, -9702530, -8876562, -1410617, + -12907383, -8659932, -29576300, 1903856}, + {23134274, -14279132, -10681997, -1611936, 20684485, 15770816, + -12989750, 3190296, 26955097, 14109738}, + {15308788, 5320727, -30113809, -14318877, 22902008, 7767164, + 29425325, -11277562, 31960942, 11934971}, + }, + { + {-27395711, 8435796, 4109644, 12222639, -24627868, 14818669, + 20638173, 4875028, 10491392, 1379718}, + {-13159415, 9197841, 3875503, -8936108, -1383712, -5879801, + 33518459, 16176658, 21432314, 12180697}, + {-11787308, 11500838, 13787581, -13832590, -22430679, 10140205, + 1465425, 12689540, -10301319, -13872883}, + }, + }, + { + { + {5414091, -15386041, -21007664, 9643570, 12834970, 1186149, + -2622916, -1342231, 26128231, 6032912}, + {-26337395, -13766162, 32496025, -13653919, 17847801, -12669156, + 3604025, 8316894, -25875034, -10437358}, + {3296484, 6223048, 24680646, -12246460, -23052020, 5903205, + -8862297, -4639164, 12376617, 3188849}, + }, + { + {29190488, -14659046, 27549113, -1183516, 3520066, -10697301, + 32049515, -7309113, -16109234, -9852307}, + {-14744486, -9309156, 735818, -598978, -20407687, -5057904, + 25246078, -15795669, 18640741, -960977}, + {-6928835, -16430795, 10361374, 5642961, 4910474, 12345252, + -31638386, -494430, 10530747, 1053335}, + }, + { + {-29265967, -14186805, -13538216, -12117373, -19457059, -10655384, + -31462369, -2948985, 24018831, 15026644}, + {-22592535, -3145277, -2289276, 5953843, -13440189, 9425631, + 25310643, 13003497, -2314791, -15145616}, + {-27419985, -603321, -8043984, -1669117, -26092265, 13987819, + -27297622, 187899, -23166419, -2531735}, + }, + { + {-21744398, -13810475, 1844840, 5021428, -10434399, -15911473, + 9716667, 16266922, -5070217, 726099}, + {29370922, -6053998, 7334071, -15342259, 9385287, 2247707, + -13661962, -4839461, 30007388, -15823341}, + {-936379, 16086691, 23751945, -543318, -1167538, -5189036, 9137109, + 730663, 9835848, 4555336}, + }, + { + {-23376435, 1410446, -22253753, -12899614, 30867635, 15826977, + 17693930, 544696, -11985298, 12422646}, + {31117226, -12215734, -13502838, 6561947, -9876867, -12757670, + -5118685, -4096706, 29120153, 13924425}, + {-17400879, -14233209, 19675799, -2734756, -11006962, -5858820, + -9383939, -11317700, 7240931, -237388}, + }, + { + {-31361739, -11346780, -15007447, -5856218, -22453340, -12152771, + 1222336, 4389483, 3293637, -15551743}, + {-16684801, -14444245, 11038544, 11054958, -13801175, -3338533, + -24319580, 7733547, 12796905, -6335822}, + {-8759414, -10817836, -25418864, 10783769, -30615557, -9746811, + -28253339, 3647836, 3222231, -11160462}, + }, + { + {18606113, 1693100, -25448386, -15170272, 4112353, 10045021, + 23603893, -2048234, -7550776, 2484985}, + {9255317, -3131197, -12156162, -1004256, 13098013, -9214866, + 16377220, -2102812, -19802075, -3034702}, + {-22729289, 7496160, -5742199, 11329249, 19991973, -3347502, + -31718148, 9936966, -30097688, -10618797}, + }, + { + {21878590, -5001297, 4338336, 13643897, -3036865, 13160960, + 19708896, 5415497, -7360503, -4109293}, + {27736861, 10103576, 12500508, 8502413, -3413016, -9633558, + 10436918, -1550276, -23659143, -8132100}, + {19492550, -12104365, -29681976, -852630, -3208171, 12403437, + 30066266, 8367329, 13243957, 8709688}, + }, + }, + { + { + {12015105, 2801261, 28198131, 10151021, 24818120, -4743133, + -11194191, -5645734, 5150968, 7274186}, + {2831366, -12492146, 1478975, 6122054, 23825128, -12733586, + 31097299, 6083058, 31021603, -9793610}, + {-2529932, -2229646, 445613, 10720828, -13849527, -11505937, + -23507731, 16354465, 15067285, -14147707}, + }, + { + {7840942, 14037873, -33364863, 15934016, -728213, -3642706, + 21403988, 1057586, -19379462, -12403220}, + {915865, -16469274, 15608285, -8789130, -24357026, 6060030, + -17371319, 8410997, -7220461, 16527025}, + {32922597, -556987, 20336074, -16184568, 10903705, -5384487, + 16957574, 52992, 23834301, 6588044}, + }, + { + {32752030, 11232950, 3381995, -8714866, 22652988, -10744103, + 17159699, 16689107, -20314580, -1305992}, + {-4689649, 9166776, -25710296, -10847306, 11576752, 12733943, + 7924251, -2752281, 1976123, -7249027}, + {21251222, 16309901, -2983015, -6783122, 30810597, 12967303, 156041, + -3371252, 12331345, -8237197}, + }, + { + {8651614, -4477032, -16085636, -4996994, 13002507, 2950805, + 29054427, -5106970, 10008136, -4667901}, + {31486080, 15114593, -14261250, 12951354, 14369431, -7387845, + 16347321, -13662089, 8684155, -10532952}, + {19443825, 11385320, 24468943, -9659068, -23919258, 2187569, + -26263207, -6086921, 31316348, 14219878}, + }, + { + {-28594490, 1193785, 32245219, 11392485, 31092169, 15722801, + 27146014, 6992409, 29126555, 9207390}, + {32382935, 1110093, 18477781, 11028262, -27411763, -7548111, + -4980517, 10843782, -7957600, -14435730}, + {2814918, 7836403, 27519878, -7868156, -20894015, -11553689, + -21494559, 8550130, 28346258, 1994730}, + }, + { + {-19578299, 8085545, -14000519, -3948622, 2785838, -16231307, + -19516951, 7174894, 22628102, 8115180}, + {-30405132, 955511, -11133838, -15078069, -32447087, -13278079, + -25651578, 3317160, -9943017, 930272}, + {-15303681, -6833769, 28856490, 1357446, 23421993, 1057177, + 24091212, -1388970, -22765376, -10650715}, + }, + { + {-22751231, -5303997, -12907607, -12768866, -15811511, -7797053, + -14839018, -16554220, -1867018, 8398970}, + {-31969310, 2106403, -4736360, 1362501, 12813763, 16200670, + 22981545, -6291273, 18009408, -15772772}, + {-17220923, -9545221, -27784654, 14166835, 29815394, 7444469, + 29551787, -3727419, 19288549, 1325865}, + }, + { + {15100157, -15835752, -23923978, -1005098, -26450192, 15509408, + 12376730, -3479146, 33166107, -8042750}, + {20909231, 13023121, -9209752, 16251778, -5778415, -8094914, + 12412151, 10018715, 2213263, -13878373}, + {32529814, -11074689, 30361439, -16689753, -9135940, 1513226, + 22922121, 6382134, -5766928, 8371348}, + }, + }, + { + { + {9923462, 11271500, 12616794, 3544722, -29998368, -1721626, + 12891687, -8193132, -26442943, 10486144}, + {-22597207, -7012665, 8587003, -8257861, 4084309, -12970062, 361726, + 2610596, -23921530, -11455195}, + {5408411, -1136691, -4969122, 10561668, 24145918, 14240566, + 31319731, -4235541, 19985175, -3436086}, + }, + { + {-13994457, 16616821, 14549246, 3341099, 32155958, 13648976, + -17577068, 8849297, 65030, 8370684}, + {-8320926, -12049626, 31204563, 5839400, -20627288, -1057277, + -19442942, 6922164, 12743482, -9800518}, + {-2361371, 12678785, 28815050, 4759974, -23893047, 4884717, + 23783145, 11038569, 18800704, 255233}, + }, + { + {-5269658, -1773886, 13957886, 7990715, 23132995, 728773, 13393847, + 9066957, 19258688, -14753793}, + {-2936654, -10827535, -10432089, 14516793, -3640786, 4372541, + -31934921, 2209390, -1524053, 2055794}, + {580882, 16705327, 5468415, -2683018, -30926419, -14696000, + -7203346, -8994389, -30021019, 7394435}, + }, + { + {23838809, 1822728, -15738443, 15242727, 8318092, -3733104, + -21672180, -3492205, -4821741, 14799921}, + {13345610, 9759151, 3371034, -16137791, 16353039, 8577942, 31129804, + 13496856, -9056018, 7402518}, + {2286874, -4435931, -20042458, -2008336, -13696227, 5038122, + 11006906, -15760352, 8205061, 1607563}, + }, + { + {14414086, -8002132, 3331830, -3208217, 22249151, -5594188, + 18364661, -2906958, 30019587, -9029278}, + {-27688051, 1585953, -10775053, 931069, -29120221, -11002319, + -14410829, 12029093, 9944378, 8024}, + {4368715, -3709630, 29874200, -15022983, -20230386, -11410704, + -16114594, -999085, -8142388, 5640030}, + }, + { + {10299610, 13746483, 11661824, 16234854, 7630238, 5998374, 9809887, + -16694564, 15219798, -14327783}, + {27425505, -5719081, 3055006, 10660664, 23458024, 595578, -15398605, + -1173195, -18342183, 9742717}, + {6744077, 2427284, 26042789, 2720740, -847906, 1118974, 32324614, + 7406442, 12420155, 1994844}, + }, + { + {14012521, -5024720, -18384453, -9578469, -26485342, -3936439, + -13033478, -10909803, 24319929, -6446333}, + {16412690, -4507367, 10772641, 15929391, -17068788, -4658621, + 10555945, -10484049, -30102368, -4739048}, + {22397382, -7767684, -9293161, -12792868, 17166287, -9755136, + -27333065, 6199366, 21880021, -12250760}, + }, + { + {-4283307, 5368523, -31117018, 8163389, -30323063, 3209128, + 16557151, 8890729, 8840445, 4957760}, + {-15447727, 709327, -6919446, -10870178, -29777922, 6522332, + -21720181, 12130072, -14796503, 5005757}, + {-2114751, -14308128, 23019042, 15765735, -25269683, 6002752, + 10183197, -13239326, -16395286, -2176112}, + }, + }, + { + { + {-19025756, 1632005, 13466291, -7995100, -23640451, 16573537, + -32013908, -3057104, 22208662, 2000468}, + {3065073, -1412761, -25598674, -361432, -17683065, -5703415, + -8164212, 11248527, -3691214, -7414184}, + {10379208, -6045554, 8877319, 1473647, -29291284, -12507580, + 16690915, 2553332, -3132688, 16400289}, + }, + { + {15716668, 1254266, -18472690, 7446274, -8448918, 6344164, + -22097271, -7285580, 26894937, 9132066}, + {24158887, 12938817, 11085297, -8177598, -28063478, -4457083, + -30576463, 64452, -6817084, -2692882}, + {13488534, 7794716, 22236231, 5989356, 25426474, -12578208, 2350710, + -3418511, -4688006, 2364226}, + }, + { + {16335052, 9132434, 25640582, 6678888, 1725628, 8517937, -11807024, + -11697457, 15445875, -7798101}, + {29004207, -7867081, 28661402, -640412, -12794003, -7943086, + 31863255, -4135540, -278050, -15759279}, + {-6122061, -14866665, -28614905, 14569919, -10857999, -3591829, + 10343412, -6976290, -29828287, -10815811}, + }, + { + {27081650, 3463984, 14099042, -4517604, 1616303, -6205604, 29542636, + 15372179, 17293797, 960709}, + {20263915, 11434237, -5765435, 11236810, 13505955, -10857102, + -16111345, 6493122, -19384511, 7639714}, + {-2830798, -14839232, 25403038, -8215196, -8317012, -16173699, + 18006287, -16043750, 29994677, -15808121}, + }, + { + {9769828, 5202651, -24157398, -13631392, -28051003, -11561624, + -24613141, -13860782, -31184575, 709464}, + {12286395, 13076066, -21775189, -1176622, -25003198, 4057652, + -32018128, -8890874, 16102007, 13205847}, + {13733362, 5599946, 10557076, 3195751, -5557991, 8536970, -25540170, + 8525972, 10151379, 10394400}, + }, + { + {4024660, -16137551, 22436262, 12276534, -9099015, -2686099, + 19698229, 11743039, -33302334, 8934414}, + {-15879800, -4525240, -8580747, -2934061, 14634845, -698278, + -9449077, 3137094, -11536886, 11721158}, + {17555939, -5013938, 8268606, 2331751, -22738815, 9761013, 9319229, + 8835153, -9205489, -1280045}, + }, + { + {-461409, -7830014, 20614118, 16688288, -7514766, -4807119, + 22300304, 505429, 6108462, -6183415}, + {-5070281, 12367917, -30663534, 3234473, 32617080, -8422642, + 29880583, -13483331, -26898490, -7867459}, + {-31975283, 5726539, 26934134, 10237677, -3173717, -605053, + 24199304, 3795095, 7592688, -14992079}, + }, + { + {21594432, -14964228, 17466408, -4077222, 32537084, 2739898, + 6407723, 12018833, -28256052, 4298412}, + {-20650503, -11961496, -27236275, 570498, 3767144, -1717540, + 13891942, -1569194, 13717174, 10805743}, + {-14676630, -15644296, 15287174, 11927123, 24177847, -8175568, + -796431, 14860609, -26938930, -5863836}, + }, + }, + { + { + {12962541, 5311799, -10060768, 11658280, 18855286, -7954201, + 13286263, -12808704, -4381056, 9882022}, + {18512079, 11319350, -20123124, 15090309, 18818594, 5271736, + -22727904, 3666879, -23967430, -3299429}, + {-6789020, -3146043, 16192429, 13241070, 15898607, -14206114, + -10084880, -6661110, -2403099, 5276065}, + }, + { + {30169808, -5317648, 26306206, -11750859, 27814964, 7069267, + 7152851, 3684982, 1449224, 13082861}, + {10342826, 3098505, 2119311, 193222, 25702612, 12233820, 23697382, + 15056736, -21016438, -8202000}, + {-33150110, 3261608, 22745853, 7948688, 19370557, -15177665, + -26171976, 6482814, -10300080, -11060101}, + }, + { + {32869458, -5408545, 25609743, 15678670, -10687769, -15471071, + 26112421, 2521008, -22664288, 6904815}, + {29506923, 4457497, 3377935, -9796444, -30510046, 12935080, 1561737, + 3841096, -29003639, -6657642}, + {10340844, -6630377, -18656632, -2278430, 12621151, -13339055, + 30878497, -11824370, -25584551, 5181966}, + }, + { + {25940115, -12658025, 17324188, -10307374, -8671468, 15029094, + 24396252, -16450922, -2322852, -12388574}, + {-21765684, 9916823, -1300409, 4079498, -1028346, 11909559, 1782390, + 12641087, 20603771, -6561742}, + {-18882287, -11673380, 24849422, 11501709, 13161720, -4768874, + 1925523, 11914390, 4662781, 7820689}, + }, + { + {12241050, -425982, 8132691, 9393934, 32846760, -1599620, 29749456, + 12172924, 16136752, 15264020}, + {-10349955, -14680563, -8211979, 2330220, -17662549, -14545780, + 10658213, 6671822, 19012087, 3772772}, + {3753511, -3421066, 10617074, 2028709, 14841030, -6721664, 28718732, + -15762884, 20527771, 12988982}, + }, + { + {-14822485, -5797269, -3707987, 12689773, -898983, -10914866, + -24183046, -10564943, 3299665, -12424953}, + {-16777703, -15253301, -9642417, 4978983, 3308785, 8755439, 6943197, + 6461331, -25583147, 8991218}, + {-17226263, 1816362, -1673288, -6086439, 31783888, -8175991, + -32948145, 7417950, -30242287, 1507265}, + }, + { + {29692663, 6829891, -10498800, 4334896, 20945975, -11906496, + -28887608, 8209391, 14606362, -10647073}, + {-3481570, 8707081, 32188102, 5672294, 22096700, 1711240, -33020695, + 9761487, 4170404, -2085325}, + {-11587470, 14855945, -4127778, -1531857, -26649089, 15084046, + 22186522, 16002000, -14276837, -8400798}, + }, + { + {-4811456, 13761029, -31703877, -2483919, -3312471, 7869047, + -7113572, -9620092, 13240845, 10965870}, + {-7742563, -8256762, -14768334, -13656260, -23232383, 12387166, + 4498947, 14147411, 29514390, 4302863}, + {-13413405, -12407859, 20757302, -13801832, 14785143, 8976368, + -5061276, -2144373, 17846988, -13971927}, + }, + }, + { + { + {-2244452, -754728, -4597030, -1066309, -6247172, 1455299, + -21647728, -9214789, -5222701, 12650267}, + {-9906797, -16070310, 21134160, 12198166, -27064575, 708126, 387813, + 13770293, -19134326, 10958663}, + {22470984, 12369526, 23446014, -5441109, -21520802, -9698723, + -11772496, -11574455, -25083830, 4271862}, + }, + { + {-25169565, -10053642, -19909332, 15361595, -5984358, 2159192, + 75375, -4278529, -32526221, 8469673}, + {15854970, 4148314, -8893890, 7259002, 11666551, 13824734, + -30531198, 2697372, 24154791, -9460943}, + {15446137, -15806644, 29759747, 14019369, 30811221, -9610191, + -31582008, 12840104, 24913809, 9815020}, + }, + { + {-4709286, -5614269, -31841498, -12288893, -14443537, 10799414, + -9103676, 13438769, 18735128, 9466238}, + {11933045, 9281483, 5081055, -5183824, -2628162, -4905629, -7727821, + -10896103, -22728655, 16199064}, + {14576810, 379472, -26786533, -8317236, -29426508, -10812974, + -102766, 1876699, 30801119, 2164795}, + }, + { + {15995086, 3199873, 13672555, 13712240, -19378835, -4647646, + -13081610, -15496269, -13492807, 1268052}, + {-10290614, -3659039, -3286592, 10948818, 23037027, 3794475, + -3470338, -12600221, -17055369, 3565904}, + {29210088, -9419337, -5919792, -4952785, 10834811, -13327726, + -16512102, -10820713, -27162222, -14030531}, + }, + { + {-13161890, 15508588, 16663704, -8156150, -28349942, 9019123, + -29183421, -3769423, 2244111, -14001979}, + {-5152875, -3800936, -9306475, -6071583, 16243069, 14684434, + -25673088, -16180800, 13491506, 4641841}, + {10813417, 643330, -19188515, -728916, 30292062, -16600078, + 27548447, -7721242, 14476989, -12767431}, + }, + { + {10292079, 9984945, 6481436, 8279905, -7251514, 7032743, 27282937, + -1644259, -27912810, 12651324}, + {-31185513, -813383, 22271204, 11835308, 10201545, 15351028, + 17099662, 3988035, 21721536, -3148940}, + {10202177, -6545839, -31373232, -9574638, -32150642, -8119683, + -12906320, 3852694, 13216206, 14842320}, + }, + { + {-15815640, -10601066, -6538952, -7258995, -6984659, -6581778, + -31500847, 13765824, -27434397, 9900184}, + {14465505, -13833331, -32133984, -14738873, -27443187, 12990492, + 33046193, 15796406, -7051866, -8040114}, + {30924417, -8279620, 6359016, -12816335, 16508377, 9071735, + -25488601, 15413635, 9524356, -7018878}, + }, + { + {12274201, -13175547, 32627641, -1785326, 6736625, 13267305, + 5237659, -5109483, 15663516, 4035784}, + {-2951309, 8903985, 17349946, 601635, -16432815, -4612556, + -13732739, -15889334, -22258478, 4659091}, + {-16916263, -4952973, -30393711, -15158821, 20774812, 15897498, + 5736189, 15026997, -2178256, -13455585}, + }, + }, + { + { + {-8858980, -2219056, 28571666, -10155518, -474467, -10105698, + -3801496, 278095, 23440562, -290208}, + {10226241, -5928702, 15139956, 120818, -14867693, 5218603, 32937275, + 11551483, -16571960, -7442864}, + {17932739, -12437276, -24039557, 10749060, 11316803, 7535897, + 22503767, 5561594, -3646624, 3898661}, + }, + { + {7749907, -969567, -16339731, -16464, -25018111, 15122143, -1573531, + 7152530, 21831162, 1245233}, + {26958459, -14658026, 4314586, 8346991, -5677764, 11960072, + -32589295, -620035, -30402091, -16716212}, + {-12165896, 9166947, 33491384, 13673479, 29787085, 13096535, + 6280834, 14587357, -22338025, 13987525}, + }, + { + {-24349909, 7778775, 21116000, 15572597, -4833266, -5357778, + -4300898, -5124639, -7469781, -2858068}, + {9681908, -6737123, -31951644, 13591838, -6883821, 386950, 31622781, + 6439245, -14581012, 4091397}, + {-8426427, 1470727, -28109679, -1596990, 3978627, -5123623, + -19622683, 12092163, 29077877, -14741988}, + }, + { + {5269168, -6859726, -13230211, -8020715, 25932563, 1763552, + -5606110, -5505881, -20017847, 2357889}, + {32264008, -15407652, -5387735, -1160093, -2091322, -3946900, + 23104804, -12869908, 5727338, 189038}, + {14609123, -8954470, -6000566, -16622781, -14577387, -7743898, + -26745169, 10942115, -25888931, -14884697}, + }, + { + {20513500, 5557931, -15604613, 7829531, 26413943, -2019404, + -21378968, 7471781, 13913677, -5137875}, + {-25574376, 11967826, 29233242, 12948236, -6754465, 4713227, + -8940970, 14059180, 12878652, 8511905}, + {-25656801, 3393631, -2955415, -7075526, -2250709, 9366908, + -30223418, 6812974, 5568676, -3127656}, + }, + { + {11630004, 12144454, 2116339, 13606037, 27378885, 15676917, + -17408753, -13504373, -14395196, 8070818}, + {27117696, -10007378, -31282771, -5570088, 1127282, 12772488, + -29845906, 10483306, -11552749, -1028714}, + {10637467, -5688064, 5674781, 1072708, -26343588, -6982302, + -1683975, 9177853, -27493162, 15431203}, + }, + { + {20525145, 10892566, -12742472, 12779443, -29493034, 16150075, + -28240519, 14943142, -15056790, -7935931}, + {-30024462, 5626926, -551567, -9981087, 753598, 11981191, 25244767, + -3239766, -3356550, 9594024}, + {-23752644, 2636870, -5163910, -10103818, 585134, 7877383, 11345683, + -6492290, 13352335, -10977084}, + }, + { + {-1931799, -5407458, 3304649, -12884869, 17015806, -4877091, + -29783850, -7752482, -13215537, -319204}, + {20239939, 6607058, 6203985, 3483793, -18386976, -779229, -20723742, + 15077870, -22750759, 14523817}, + {27406042, -6041657, 27423596, -4497394, 4996214, 10002360, + -28842031, -4545494, -30172742, -4805667}, + }, + }, + { + { + {11374242, 12660715, 17861383, -12540833, 10935568, 1099227, + -13886076, -9091740, -27727044, 11358504}, + {-12730809, 10311867, 1510375, 10778093, -2119455, -9145702, + 32676003, 11149336, -26123651, 4985768}, + {-19096303, 341147, -6197485, -239033, 15756973, -8796662, -983043, + 13794114, -19414307, -15621255}, + }, + { + {6490081, 11940286, 25495923, -7726360, 8668373, -8751316, 3367603, + 6970005, -1691065, -9004790}, + {1656497, 13457317, 15370807, 6364910, 13605745, 8362338, -19174622, + -5475723, -16796596, -5031438}, + {-22273315, -13524424, -64685, -4334223, -18605636, -10921968, + -20571065, -7007978, -99853, -10237333}, + }, + { + {17747465, 10039260, 19368299, -4050591, -20630635, -16041286, + 31992683, -15857976, -29260363, -5511971}, + {31932027, -4986141, -19612382, 16366580, 22023614, 88450, 11371999, + -3744247, 4882242, -10626905}, + {29796507, 37186, 19818052, 10115756, -11829032, 3352736, 18551198, + 3272828, -5190932, -4162409}, + }, + { + {12501286, 4044383, -8612957, -13392385, -32430052, 5136599, + -19230378, -3529697, 330070, -3659409}, + {6384877, 2899513, 17807477, 7663917, -2358888, 12363165, 25366522, + -8573892, -271295, 12071499}, + {-8365515, -4042521, 25133448, -4517355, -6211027, 2265927, + -32769618, 1936675, -5159697, 3829363}, + }, + { + {28425966, -5835433, -577090, -4697198, -14217555, 6870930, 7921550, + -6567787, 26333140, 14267664}, + {-11067219, 11871231, 27385719, -10559544, -4585914, -11189312, + 10004786, -8709488, -21761224, 8930324}, + {-21197785, -16396035, 25654216, -1725397, 12282012, 11008919, + 1541940, 4757911, -26491501, -16408940}, + }, + { + {13537262, -7759490, -20604840, 10961927, -5922820, -13218065, + -13156584, 6217254, -15943699, 13814990}, + {-17422573, 15157790, 18705543, 29619, 24409717, -260476, 27361681, + 9257833, -1956526, -1776914}, + {-25045300, -10191966, 15366585, 15166509, -13105086, 8423556, + -29171540, 12361135, -18685978, 4578290}, + }, + { + {24579768, 3711570, 1342322, -11180126, -27005135, 14124956, + -22544529, 14074919, 21964432, 8235257}, + {-6528613, -2411497, 9442966, -5925588, 12025640, -1487420, + -2981514, -1669206, 13006806, 2355433}, + {-16304899, -13605259, -6632427, -5142349, 16974359, -10911083, + 27202044, 1719366, 1141648, -12796236}, + }, + { + {-12863944, -13219986, -8318266, -11018091, -6810145, -4843894, + 13475066, -3133972, 32674895, 13715045}, + {11423335, -5468059, 32344216, 8962751, 24989809, 9241752, + -13265253, 16086212, -28740881, -15642093}, + {-1409668, 12530728, -6368726, 10847387, 19531186, -14132160, + -11709148, 7791794, -27245943, 4383347}, + }, + }, + { + { + {-28970898, 5271447, -1266009, -9736989, -12455236, 16732599, + -4862407, -4906449, 27193557, 6245191}, + {-15193956, 5362278, -1783893, 2695834, 4960227, 12840725, 23061898, + 3260492, 22510453, 8577507}, + {-12632451, 11257346, -32692994, 13548177, -721004, 10879011, + 31168030, 13952092, -29571492, -3635906}, + }, + { + {3877321, -9572739, 32416692, 5405324, -11004407, -13656635, + 3759769, 11935320, 5611860, 8164018}, + {-16275802, 14667797, 15906460, 12155291, -22111149, -9039718, + 32003002, -8832289, 5773085, -8422109}, + {-23788118, -8254300, 1950875, 8937633, 18686727, 16459170, -905725, + 12376320, 31632953, 190926}, + }, + { + {-24593607, -16138885, -8423991, 13378746, 14162407, 6901328, + -8288749, 4508564, -25341555, -3627528}, + {8884438, -5884009, 6023974, 10104341, -6881569, -4941533, 18722941, + -14786005, -1672488, 827625}, + {-32720583, -16289296, -32503547, 7101210, 13354605, 2659080, + -1800575, -14108036, -24878478, 1541286}, + }, + { + {2901347, -1117687, 3880376, -10059388, -17620940, -3612781, + -21802117, -3567481, 20456845, -1885033}, + {27019610, 12299467, -13658288, -1603234, -12861660, -4861471, + -19540150, -5016058, 29439641, 15138866}, + {21536104, -6626420, -32447818, -10690208, -22408077, 5175814, + -5420040, -16361163, 7779328, 109896}, + }, + { + {30279744, 14648750, -8044871, 6425558, 13639621, -743509, 28698390, + 12180118, 23177719, -554075}, + {26572847, 3405927, -31701700, 12890905, -19265668, 5335866, + -6493768, 2378492, 4439158, -13279347}, + {-22716706, 3489070, -9225266, -332753, 18875722, -1140095, + 14819434, -12731527, -17717757, -5461437}, + }, + { + {-5056483, 16566551, 15953661, 3767752, -10436499, 15627060, + -820954, 2177225, 8550082, -15114165}, + {-18473302, 16596775, -381660, 15663611, 22860960, 15585581, + -27844109, -3582739, -23260460, -8428588}, + {-32480551, 15707275, -8205912, -5652081, 29464558, 2713815, + -22725137, 15860482, -21902570, 1494193}, + }, + { + {-19562091, -14087393, -25583872, -9299552, 13127842, 759709, + 21923482, 16529112, 8742704, 12967017}, + {-28464899, 1553205, 32536856, -10473729, -24691605, -406174, + -8914625, -2933896, -29903758, 15553883}, + {21877909, 3230008, 9881174, 10539357, -4797115, 2841332, 11543572, + 14513274, 19375923, -12647961}, + }, + { + {8832269, -14495485, 13253511, 5137575, 5037871, 4078777, 24880818, + -6222716, 2862653, 9455043}, + {29306751, 5123106, 20245049, -14149889, 9592566, 8447059, -2077124, + -2990080, 15511449, 4789663}, + {-20679756, 7004547, 8824831, -9434977, -4045704, -3750736, + -5754762, 108893, 23513200, 16652362}, + }, + }, + { + { + {-33256173, 4144782, -4476029, -6579123, 10770039, -7155542, + -6650416, -12936300, -18319198, 10212860}, + {2756081, 8598110, 7383731, -6859892, 22312759, -1105012, 21179801, + 2600940, -9988298, -12506466}, + {-24645692, 13317462, -30449259, -15653928, 21365574, -10869657, + 11344424, 864440, -2499677, -16710063}, + }, + { + {-26432803, 6148329, -17184412, -14474154, 18782929, -275997, + -22561534, 211300, 2719757, 4940997}, + {-1323882, 3911313, -6948744, 14759765, -30027150, 7851207, + 21690126, 8518463, 26699843, 5276295}, + {-13149873, -6429067, 9396249, 365013, 24703301, -10488939, 1321586, + 149635, -15452774, 7159369}, + }, + { + {9987780, -3404759, 17507962, 9505530, 9731535, -2165514, 22356009, + 8312176, 22477218, -8403385}, + {18155857, -16504990, 19744716, 9006923, 15154154, -10538976, + 24256460, -4864995, -22548173, 9334109}, + {2986088, -4911893, 10776628, -3473844, 10620590, -7083203, + -21413845, 14253545, -22587149, 536906}, + }, + { + {4377756, 8115836, 24567078, 15495314, 11625074, 13064599, 7390551, + 10589625, 10838060, -15420424}, + {-19342404, 867880, 9277171, -3218459, -14431572, -1986443, + 19295826, -15796950, 6378260, 699185}, + {7895026, 4057113, -7081772, -13077756, -17886831, -323126, -716039, + 15693155, -5045064, -13373962}, + }, + { + {-7737563, -5869402, -14566319, -7406919, 11385654, 13201616, + 31730678, -10962840, -3918636, -9669325}, + {10188286, -15770834, -7336361, 13427543, 22223443, 14896287, + 30743455, 7116568, -21786507, 5427593}, + {696102, 13206899, 27047647, -10632082, 15285305, -9853179, + 10798490, -4578720, 19236243, 12477404}, + }, + { + {-11229439, 11243796, -17054270, -8040865, -788228, -8167967, + -3897669, 11180504, -23169516, 7733644}, + {17800790, -14036179, -27000429, -11766671, 23887827, 3149671, + 23466177, -10538171, 10322027, 15313801}, + {26246234, 11968874, 32263343, -5468728, 6830755, -13323031, + -15794704, -101982, -24449242, 10890804}, + }, + { + {-31365647, 10271363, -12660625, -6267268, 16690207, -13062544, + -14982212, 16484931, 25180797, -5334884}, + {-586574, 10376444, -32586414, -11286356, 19801893, 10997610, + 2276632, 9482883, 316878, 13820577}, + {-9882808, -4510367, -2115506, 16457136, -11100081, 11674996, + 30756178, -7515054, 30696930, -3712849}, + }, + { + {32988917, -9603412, 12499366, 7910787, -10617257, -11931514, + -7342816, -9985397, -32349517, 7392473}, + {-8855661, 15927861, 9866406, -3649411, -2396914, -16655781, + -30409476, -9134995, 25112947, -2926644}, + {-2504044, -436966, 25621774, -5678772, 15085042, -5479877, + -24884878, -13526194, 5537438, -13914319}, + }, + }, + { + { + {-11225584, 2320285, -9584280, 10149187, -33444663, 5808648, + -14876251, -1729667, 31234590, 6090599}, + {-9633316, 116426, 26083934, 2897444, -6364437, -2688086, 609721, + 15878753, -6970405, -9034768}, + {-27757857, 247744, -15194774, -9002551, 23288161, -10011936, + -23869595, 6503646, 20650474, 1804084}, + }, + { + {-27589786, 15456424, 8972517, 8469608, 15640622, 4439847, 3121995, + -10329713, 27842616, -202328}, + {-15306973, 2839644, 22530074, 10026331, 4602058, 5048462, 28248656, + 5031932, -11375082, 12714369}, + {20807691, -7270825, 29286141, 11421711, -27876523, -13868230, + -21227475, 1035546, -19733229, 12796920}, + }, + { + {12076899, -14301286, -8785001, -11848922, -25012791, 16400684, + -17591495, -12899438, 3480665, -15182815}, + {-32361549, 5457597, 28548107, 7833186, 7303070, -11953545, + -24363064, -15921875, -33374054, 2771025}, + {-21389266, 421932, 26597266, 6860826, 22486084, -6737172, + -17137485, -4210226, -24552282, 15673397}, + }, + { + {-20184622, 2338216, 19788685, -9620956, -4001265, -8740893, + -20271184, 4733254, 3727144, -12934448}, + {6120119, 814863, -11794402, -622716, 6812205, -15747771, 2019594, + 7975683, 31123697, -10958981}, + {30069250, -11435332, 30434654, 2958439, 18399564, -976289, + 12296869, 9204260, -16432438, 9648165}, + }, + { + {32705432, -1550977, 30705658, 7451065, -11805606, 9631813, 3305266, + 5248604, -26008332, -11377501}, + {17219865, 2375039, -31570947, -5575615, -19459679, 9219903, 294711, + 15298639, 2662509, -16297073}, + {-1172927, -7558695, -4366770, -4287744, -21346413, -8434326, + 32087529, -1222777, 32247248, -14389861}, + }, + { + {14312628, 1221556, 17395390, -8700143, -4945741, -8684635, + -28197744, -9637817, -16027623, -13378845}, + {-1428825, -9678990, -9235681, 6549687, -7383069, -468664, 23046502, + 9803137, 17597934, 2346211}, + {18510800, 15337574, 26171504, 981392, -22241552, 7827556, + -23491134, -11323352, 3059833, -11782870}, + }, + { + {10141598, 6082907, 17829293, -1947643, 9830092, 13613136, + -25556636, -5544586, -33502212, 3592096}, + {33114168, -15889352, -26525686, -13343397, 33076705, 8716171, + 1151462, 1521897, -982665, -6837803}, + {-32939165, -4255815, 23947181, -324178, -33072974, -12305637, + -16637686, 3891704, 26353178, 693168}, + }, + { + {30374239, 1595580, -16884039, 13186931, 4600344, 406904, 9585294, + -400668, 31375464, 14369965}, + {-14370654, -7772529, 1510301, 6434173, -18784789, -6262728, + 32732230, -13108839, 17901441, 16011505}, + {18171223, -11934626, -12500402, 15197122, -11038147, -15230035, + -19172240, -16046376, 8764035, 12309598}, + }, + }, + { + { + {5975908, -5243188, -19459362, -9681747, -11541277, 14015782, + -23665757, 1228319, 17544096, -10593782}, + {5811932, -1715293, 3442887, -2269310, -18367348, -8359541, + -18044043, -15410127, -5565381, 12348900}, + {-31399660, 11407555, 25755363, 6891399, -3256938, 14872274, + -24849353, 8141295, -10632534, -585479}, + }, + { + {-12675304, 694026, -5076145, 13300344, 14015258, -14451394, + -9698672, -11329050, 30944593, 1130208}, + {8247766, -6710942, -26562381, -7709309, -14401939, -14648910, + 4652152, 2488540, 23550156, -271232}, + {17294316, -3788438, 7026748, 15626851, 22990044, 113481, 2267737, + -5908146, -408818, -137719}, + }, + { + {16091085, -16253926, 18599252, 7340678, 2137637, -1221657, + -3364161, 14550936, 3260525, -7166271}, + {-4910104, -13332887, 18550887, 10864893, -16459325, -7291596, + -23028869, -13204905, -12748722, 2701326}, + {-8574695, 16099415, 4629974, -16340524, -20786213, -6005432, + -10018363, 9276971, 11329923, 1862132}, + }, + { + {14763076, -15903608, -30918270, 3689867, 3511892, 10313526, + -21951088, 12219231, -9037963, -940300}, + {8894987, -3446094, 6150753, 3013931, 301220, 15693451, -31981216, + -2909717, -15438168, 11595570}, + {15214962, 3537601, -26238722, -14058872, 4418657, -15230761, + 13947276, 10730794, -13489462, -4363670}, + }, + { + {-2538306, 7682793, 32759013, 263109, -29984731, -7955452, + -22332124, -10188635, 977108, 699994}, + {-12466472, 4195084, -9211532, 550904, -15565337, 12917920, + 19118110, -439841, -30534533, -14337913}, + {31788461, -14507657, 4799989, 7372237, 8808585, -14747943, 9408237, + -10051775, 12493932, -5409317}, + }, + { + {-25680606, 5260744, -19235809, -6284470, -3695942, 16566087, + 27218280, 2607121, 29375955, 6024730}, + {842132, -2794693, -4763381, -8722815, 26332018, -12405641, + 11831880, 6985184, -9940361, 2854096}, + {-4847262, -7969331, 2516242, -5847713, 9695691, -7221186, 16512645, + 960770, 12121869, 16648078}, + }, + { + {-15218652, 14667096, -13336229, 2013717, 30598287, -464137, + -31504922, -7882064, 20237806, 2838411}, + {-19288047, 4453152, 15298546, -16178388, 22115043, -15972604, + 12544294, -13470457, 1068881, -12499905}, + {-9558883, -16518835, 33238498, 13506958, 30505848, -1114596, + -8486907, -2630053, 12521378, 4845654}, + }, + { + {-28198521, 10744108, -2958380, 10199664, 7759311, -13088600, + 3409348, -873400, -6482306, -12885870}, + {-23561822, 6230156, -20382013, 10655314, -24040585, -11621172, + 10477734, -1240216, -3113227, 13974498}, + {12966261, 15550616, -32038948, -1615346, 21025980, -629444, + 5642325, 7188737, 18895762, 12629579}, + }, + }, + { + { + {14741879, -14946887, 22177208, -11721237, 1279741, 8058600, + 11758140, 789443, 32195181, 3895677}, + {10758205, 15755439, -4509950, 9243698, -4879422, 6879879, -2204575, + -3566119, -8982069, 4429647}, + {-2453894, 15725973, -20436342, -10410672, -5803908, -11040220, + -7135870, -11642895, 18047436, -15281743}, + }, + { + {-25173001, -11307165, 29759956, 11776784, -22262383, -15820455, + 10993114, -12850837, -17620701, -9408468}, + {21987233, 700364, -24505048, 14972008, -7774265, -5718395, + 32155026, 2581431, -29958985, 8773375}, + {-25568350, 454463, -13211935, 16126715, 25240068, 8594567, + 20656846, 12017935, -7874389, -13920155}, + }, + { + {6028182, 6263078, -31011806, -11301710, -818919, 2461772, + -31841174, -5468042, -1721788, -2776725}, + {-12278994, 16624277, 987579, -5922598, 32908203, 1248608, 7719845, + -4166698, 28408820, 6816612}, + {-10358094, -8237829, 19549651, -12169222, 22082623, 16147817, + 20613181, 13982702, -10339570, 5067943}, + }, + { + {-30505967, -3821767, 12074681, 13582412, -19877972, 2443951, + -19719286, 12746132, 5331210, -10105944}, + {30528811, 3601899, -1957090, 4619785, -27361822, -15436388, + 24180793, -12570394, 27679908, -1648928}, + {9402404, -13957065, 32834043, 10838634, -26580150, -13237195, + 26653274, -8685565, 22611444, -12715406}, + }, + { + {22190590, 1118029, 22736441, 15130463, -30460692, -5991321, + 19189625, -4648942, 4854859, 6622139}, + {-8310738, -2953450, -8262579, -3388049, -10401731, -271929, + 13424426, -3567227, 26404409, 13001963}, + {-31241838, -15415700, -2994250, 8939346, 11562230, -12840670, + -26064365, -11621720, -15405155, 11020693}, + }, + { + {1866042, -7949489, -7898649, -10301010, 12483315, 13477547, + 3175636, -12424163, 28761762, 1406734}, + {-448555, -1777666, 13018551, 3194501, -9580420, -11161737, + 24760585, -4347088, 25577411, -13378680}, + {-24290378, 4759345, -690653, -1852816, 2066747, 10693769, + -29595790, 9884936, -9368926, 4745410}, + }, + { + {-9141284, 6049714, -19531061, -4341411, -31260798, 9944276, + -15462008, -11311852, 10931924, -11931931}, + {-16561513, 14112680, -8012645, 4817318, -8040464, -11414606, + -22853429, 10856641, -20470770, 13434654}, + {22759489, -10073434, -16766264, -1871422, 13637442, -10168091, + 1765144, -12654326, 28445307, -5364710}, + }, + { + {29875063, 12493613, 2795536, -3786330, 1710620, 15181182, + -10195717, -8788675, 9074234, 1167180}, + {-26205683, 11014233, -9842651, -2635485, -26908120, 7532294, + -18716888, -9535498, 3843903, 9367684}, + {-10969595, -6403711, 9591134, 9582310, 11349256, 108879, 16235123, + 8601684, -139197, 4242895}, + }, + }, + { + { + {22092954, -13191123, -2042793, -11968512, 32186753, -11517388, + -6574341, 2470660, -27417366, 16625501}, + {-11057722, 3042016, 13770083, -9257922, 584236, -544855, -7770857, + 2602725, -27351616, 14247413}, + {6314175, -10264892, -32772502, 15957557, -10157730, 168750, + -8618807, 14290061, 27108877, -1180880}, + }, + { + {-8586597, -7170966, 13241782, 10960156, -32991015, -13794596, + 33547976, -11058889, -27148451, 981874}, + {22833440, 9293594, -32649448, -13618667, -9136966, 14756819, + -22928859, -13970780, -10479804, -16197962}, + {-7768587, 3326786, -28111797, 10783824, 19178761, 14905060, + 22680049, 13906969, -15933690, 3797899}, + }, + { + {21721356, -4212746, -12206123, 9310182, -3882239, -13653110, + 23740224, -2709232, 20491983, -8042152}, + {9209270, -15135055, -13256557, -6167798, -731016, 15289673, + 25947805, 15286587, 30997318, -6703063}, + {7392032, 16618386, 23946583, -8039892, -13265164, -1533858, + -14197445, -2321576, 17649998, -250080}, + }, + { + {-9301088, -14193827, 30609526, -3049543, -25175069, -1283752, + -15241566, -9525724, -2233253, 7662146}, + {-17558673, 1763594, -33114336, 15908610, -30040870, -12174295, + 7335080, -8472199, -3174674, 3440183}, + {-19889700, -5977008, -24111293, -9688870, 10799743, -16571957, + 40450, -4431835, 4862400, 1133}, + }, + { + {-32856209, -7873957, -5422389, 14860950, -16319031, 7956142, + 7258061, 311861, -30594991, -7379421}, + {-3773428, -1565936, 28985340, 7499440, 24445838, 9325937, 29727763, + 16527196, 18278453, 15405622}, + {-4381906, 8508652, -19898366, -3674424, -5984453, 15149970, + -13313598, 843523, -21875062, 13626197}, + }, + { + {2281448, -13487055, -10915418, -2609910, 1879358, 16164207, + -10783882, 3953792, 13340839, 15928663}, + {31727126, -7179855, -18437503, -8283652, 2875793, -16390330, + -25269894, -7014826, -23452306, 5964753}, + {4100420, -5959452, -17179337, 6017714, -18705837, 12227141, + -26684835, 11344144, 2538215, -7570755}, + }, + { + {-9433605, 6123113, 11159803, -2156608, 30016280, 14966241, + -20474983, 1485421, -629256, -15958862}, + {-26804558, 4260919, 11851389, 9658551, -32017107, 16367492, + -20205425, -13191288, 11659922, -11115118}, + {26180396, 10015009, -30844224, -8581293, 5418197, 9480663, 2231568, + -10170080, 33100372, -1306171}, + }, + { + {15121113, -5201871, -10389905, 15427821, -27509937, -15992507, + 21670947, 4486675, -5931810, -14466380}, + {16166486, -9483733, -11104130, 6023908, -31926798, -1364923, + 2340060, -16254968, -10735770, -10039824}, + {28042865, -3557089, -12126526, 12259706, -3717498, -6945899, + 6766453, -8689599, 18036436, 5803270}, + }, + }, + { + { + {-817581, 6763912, 11803561, 1585585, 10958447, -2671165, 23855391, + 4598332, -6159431, -14117438}, + {-31031306, -14256194, 17332029, -2383520, 31312682, -5967183, + 696309, 50292, -20095739, 11763584}, + {-594563, -2514283, -32234153, 12643980, 12650761, 14811489, 665117, + -12613632, -19773211, -10713562}, + }, + { + {30464590, -11262872, -4127476, -12734478, 19835327, -7105613, + -24396175, 2075773, -17020157, 992471}, + {18357185, -6994433, 7766382, 16342475, -29324918, 411174, 14578841, + 8080033, -11574335, -10601610}, + {19598397, 10334610, 12555054, 2555664, 18821899, -10339780, + 21873263, 16014234, 26224780, 16452269}, + }, + { + {-30223925, 5145196, 5944548, 16385966, 3976735, 2009897, -11377804, + -7618186, -20533829, 3698650}, + {14187449, 3448569, -10636236, -10810935, -22663880, -3433596, + 7268410, -10890444, 27394301, 12015369}, + {19695761, 16087646, 28032085, 12999827, 6817792, 11427614, + 20244189, -1312777, -13259127, -3402461}, + }, + { + {30860103, 12735208, -1888245, -4699734, -16974906, 2256940, + -8166013, 12298312, -8550524, -10393462}, + {-5719826, -11245325, -1910649, 15569035, 26642876, -7587760, + -5789354, -15118654, -4976164, 12651793}, + {-2848395, 9953421, 11531313, -5282879, 26895123, -12697089, + -13118820, -16517902, 9768698, -2533218}, + }, + { + {-24719459, 1894651, -287698, -4704085, 15348719, -8156530, + 32767513, 12765450, 4940095, 10678226}, + {18860224, 15980149, -18987240, -1562570, -26233012, -11071856, + -7843882, 13944024, -24372348, 16582019}, + {-15504260, 4970268, -29893044, 4175593, -20993212, -2199756, + -11704054, 15444560, -11003761, 7989037}, + }, + { + {31490452, 5568061, -2412803, 2182383, -32336847, 4531686, + -32078269, 6200206, -19686113, -14800171}, + {-17308668, -15879940, -31522777, -2831, -32887382, 16375549, + 8680158, -16371713, 28550068, -6857132}, + {-28126887, -5688091, 16837845, -1820458, -6850681, 12700016, + -30039981, 4364038, 1155602, 5988841}, + }, + { + {21890435, -13272907, -12624011, 12154349, -7831873, 15300496, + 23148983, -4470481, 24618407, 8283181}, + {-33136107, -10512751, 9975416, 6841041, -31559793, 16356536, + 3070187, -7025928, 1466169, 10740210}, + {-1509399, -15488185, -13503385, -10655916, 32799044, 909394, + -13938903, -5779719, -32164649, -15327040}, + }, + { + {3960823, -14267803, -28026090, -15918051, -19404858, 13146868, + 15567327, 951507, -3260321, -573935}, + {24740841, 5052253, -30094131, 8961361, 25877428, 6165135, + -24368180, 14397372, -7380369, -6144105}, + {-28888365, 3510803, -28103278, -1158478, -11238128, -10631454, + -15441463, -14453128, -1625486, -6494814}, + }, + }, + { + { + {793299, -9230478, 8836302, -6235707, -27360908, -2369593, 33152843, + -4885251, -9906200, -621852}, + {5666233, 525582, 20782575, -8038419, -24538499, 14657740, 16099374, + 1468826, -6171428, -15186581}, + {-4859255, -3779343, -2917758, -6748019, 7778750, 11688288, + -30404353, -9871238, -1558923, -9863646}, + }, + { + {10896332, -7719704, 824275, 472601, -19460308, 3009587, 25248958, + 14783338, -30581476, -15757844}, + {10566929, 12612572, -31944212, 11118703, -12633376, 12362879, + 21752402, 8822496, 24003793, 14264025}, + {27713862, -7355973, -11008240, 9227530, 27050101, 2504721, + 23886875, -13117525, 13958495, -5732453}, + }, + { + {-23481610, 4867226, -27247128, 3900521, 29838369, -8212291, + -31889399, -10041781, 7340521, -15410068}, + {4646514, -8011124, -22766023, -11532654, 23184553, 8566613, + 31366726, -1381061, -15066784, -10375192}, + {-17270517, 12723032, -16993061, 14878794, 21619651, -6197576, + 27584817, 3093888, -8843694, 3849921}, + }, + { + {-9064912, 2103172, 25561640, -15125738, -5239824, 9582958, + 32477045, -9017955, 5002294, -15550259}, + {-12057553, -11177906, 21115585, -13365155, 8808712, -12030708, + 16489530, 13378448, -25845716, 12741426}, + {-5946367, 10645103, -30911586, 15390284, -3286982, -7118677, + 24306472, 15852464, 28834118, -7646072}, + }, + { + {-17335748, -9107057, -24531279, 9434953, -8472084, -583362, + -13090771, 455841, 20461858, 5491305}, + {13669248, -16095482, -12481974, -10203039, -14569770, -11893198, + -24995986, 11293807, -28588204, -9421832}, + {28497928, 6272777, -33022994, 14470570, 8906179, -1225630, + 18504674, -14165166, 29867745, -8795943}, + }, + { + {-16207023, 13517196, -27799630, -13697798, 24009064, -6373891, + -6367600, -13175392, 22853429, -4012011}, + {24191378, 16712145, -13931797, 15217831, 14542237, 1646131, + 18603514, -11037887, 12876623, -2112447}, + {17902668, 4518229, -411702, -2829247, 26878217, 5258055, -12860753, + 608397, 16031844, 3723494}, + }, + { + {-28632773, 12763728, -20446446, 7577504, 33001348, -13017745, + 17558842, -7872890, 23896954, -4314245}, + {-20005381, -12011952, 31520464, 605201, 2543521, 5991821, -2945064, + 7229064, -9919646, -8826859}, + {28816045, 298879, -28165016, -15920938, 19000928, -1665890, + -12680833, -2949325, -18051778, -2082915}, + }, + { + {16000882, -344896, 3493092, -11447198, -29504595, -13159789, + 12577740, 16041268, -19715240, 7847707}, + {10151868, 10572098, 27312476, 7922682, 14825339, 4723128, + -32855931, -6519018, -10020567, 3852848}, + {-11430470, 15697596, -21121557, -4420647, 5386314, 15063598, + 16514493, -15932110, 29330899, -15076224}, + }, + }, + { + { + {-25499735, -4378794, -15222908, -6901211, 16615731, 2051784, + 3303702, 15490, -27548796, 12314391}, + {15683520, -6003043, 18109120, -9980648, 15337968, -5997823, + -16717435, 15921866, 16103996, -3731215}, + {-23169824, -10781249, 13588192, -1628807, -3798557, -1074929, + -19273607, 5402699, -29815713, -9841101}, + }, + { + {23190676, 2384583, -32714340, 3462154, -29903655, -1529132, + -11266856, 8911517, -25205859, 2739713}, + {21374101, -3554250, -33524649, 9874411, 15377179, 11831242, + -33529904, 6134907, 4931255, 11987849}, + {-7732, -2978858, -16223486, 7277597, 105524, -322051, -31480539, + 13861388, -30076310, 10117930}, + }, + { + {-29501170, -10744872, -26163768, 13051539, -25625564, 5089643, + -6325503, 6704079, 12890019, 15728940}, + {-21972360, -11771379, -951059, -4418840, 14704840, 2695116, 903376, + -10428139, 12885167, 8311031}, + {-17516482, 5352194, 10384213, -13811658, 7506451, 13453191, + 26423267, 4384730, 1888765, -5435404}, + }, + { + {-25817338, -3107312, -13494599, -3182506, 30896459, -13921729, + -32251644, -12707869, -19464434, -3340243}, + {-23607977, -2665774, -526091, 4651136, 5765089, 4618330, 6092245, + 14845197, 17151279, -9854116}, + {-24830458, -12733720, -15165978, 10367250, -29530908, -265356, + 22825805, -7087279, -16866484, 16176525}, + }, + { + {-23583256, 6564961, 20063689, 3798228, -4740178, 7359225, 2006182, + -10363426, -28746253, -10197509}, + {-10626600, -4486402, -13320562, -5125317, 3432136, -6393229, + 23632037, -1940610, 32808310, 1099883}, + {15030977, 5768825, -27451236, -2887299, -6427378, -15361371, + -15277896, -6809350, 2051441, -15225865}, + }, + { + {-3362323, -7239372, 7517890, 9824992, 23555850, 295369, 5148398, + -14154188, -22686354, 16633660}, + {4577086, -16752288, 13249841, -15304328, 19958763, -14537274, + 18559670, -10759549, 8402478, -9864273}, + {-28406330, -1051581, -26790155, -907698, -17212414, -11030789, + 9453451, -14980072, 17983010, 9967138}, + }, + { + {-25762494, 6524722, 26585488, 9969270, 24709298, 1220360, -1677990, + 7806337, 17507396, 3651560}, + {-10420457, -4118111, 14584639, 15971087, -15768321, 8861010, + 26556809, -5574557, -18553322, -11357135}, + {2839101, 14284142, 4029895, 3472686, 14402957, 12689363, -26642121, + 8459447, -5605463, -7621941}, + }, + { + {-4839289, -3535444, 9744961, 2871048, 25113978, 3187018, -25110813, + -849066, 17258084, -7977739}, + {18164541, -10595176, -17154882, -1542417, 19237078, -9745295, + 23357533, -15217008, 26908270, 12150756}, + {-30264870, -7647865, 5112249, -7036672, -1499807, -6974257, 43168, + -5537701, -32302074, 16215819}, + }, + }, + { + { + {-6898905, 9824394, -12304779, -4401089, -31397141, -6276835, + 32574489, 12532905, -7503072, -8675347}, + {-27343522, -16515468, -27151524, -10722951, 946346, 16291093, + 254968, 7168080, 21676107, -1943028}, + {21260961, -8424752, -16831886, -11920822, -23677961, 3968121, + -3651949, -6215466, -3556191, -7913075}, + }, + { + {16544754, 13250366, -16804428, 15546242, -4583003, 12757258, + -2462308, -8680336, -18907032, -9662799}, + {-2415239, -15577728, 18312303, 4964443, -15272530, -12653564, + 26820651, 16690659, 25459437, -4564609}, + {-25144690, 11425020, 28423002, -11020557, -6144921, -15826224, + 9142795, -2391602, -6432418, -1644817}, + }, + { + {-23104652, 6253476, 16964147, -3768872, -25113972, -12296437, + -27457225, -16344658, 6335692, 7249989}, + {-30333227, 13979675, 7503222, -12368314, -11956721, -4621693, + -30272269, 2682242, 25993170, -12478523}, + {4364628, 5930691, 32304656, -10044554, -8054781, 15091131, + 22857016, -10598955, 31820368, 15075278}, + }, + { + {31879134, -8918693, 17258761, 90626, -8041836, -4917709, 24162788, + -9650886, -17970238, 12833045}, + {19073683, 14851414, -24403169, -11860168, 7625278, 11091125, + -19619190, 2074449, -9413939, 14905377}, + {24483667, -11935567, -2518866, -11547418, -1553130, 15355506, + -25282080, 9253129, 27628530, -7555480}, + }, + { + {17597607, 8340603, 19355617, 552187, 26198470, -3176583, 4593324, + -9157582, -14110875, 15297016}, + {510886, 14337390, -31785257, 16638632, 6328095, 2713355, -20217417, + -11864220, 8683221, 2921426}, + {18606791, 11874196, 27155355, -5281482, -24031742, 6265446, + -25178240, -1278924, 4674690, 13890525}, + }, + { + {13609624, 13069022, -27372361, -13055908, 24360586, 9592974, + 14977157, 9835105, 4389687, 288396}, + {9922506, -519394, 13613107, 5883594, -18758345, -434263, -12304062, + 8317628, 23388070, 16052080}, + {12720016, 11937594, -31970060, -5028689, 26900120, 8561328, + -20155687, -11632979, -14754271, -10812892}, + }, + { + {15961858, 14150409, 26716931, -665832, -22794328, 13603569, + 11829573, 7467844, -28822128, 929275}, + {11038231, -11582396, -27310482, -7316562, -10498527, -16307831, + -23479533, -9371869, -21393143, 2465074}, + {20017163, -4323226, 27915242, 1529148, 12396362, 15675764, + 13817261, -9658066, 2463391, -4622140}, + }, + { + {-16358878, -12663911, -12065183, 4996454, -1256422, 1073572, + 9583558, 12851107, 4003896, 12673717}, + {-1731589, -15155870, -3262930, 16143082, 19294135, 13385325, + 14741514, -9103726, 7903886, 2348101}, + {24536016, -16515207, 12715592, -3862155, 1511293, 10047386, + -3842346, -7129159, -28377538, 10048127}, + }, + }, + { + { + {-12622226, -6204820, 30718825, 2591312, -10617028, 12192840, + 18873298, -7297090, -32297756, 15221632}, + {-26478122, -11103864, 11546244, -1852483, 9180880, 7656409, + -21343950, 2095755, 29769758, 6593415}, + {-31994208, -2907461, 4176912, 3264766, 12538965, -868111, 26312345, + -6118678, 30958054, 8292160}, + }, + { + {31429822, -13959116, 29173532, 15632448, 12174511, -2760094, + 32808831, 3977186, 26143136, -3148876}, + {22648901, 1402143, -22799984, 13746059, 7936347, 365344, -8668633, + -1674433, -3758243, -2304625}, + {-15491917, 8012313, -2514730, -12702462, -23965846, -10254029, + -1612713, -1535569, -16664475, 8194478}, + }, + { + {27338066, -7507420, -7414224, 10140405, -19026427, -6589889, + 27277191, 8855376, 28572286, 3005164}, + {26287124, 4821776, 25476601, -4145903, -3764513, -15788984, + -18008582, 1182479, -26094821, -13079595}, + {-7171154, 3178080, 23970071, 6201893, -17195577, -4489192, + -21876275, -13982627, 32208683, -1198248}, + }, + { + {-16657702, 2817643, -10286362, 14811298, 6024667, 13349505, + -27315504, -10497842, -27672585, -11539858}, + {15941029, -9405932, -21367050, 8062055, 31876073, -238629, + -15278393, -1444429, 15397331, -4130193}, + {8934485, -13485467, -23286397, -13423241, -32446090, 14047986, + 31170398, -1441021, -27505566, 15087184}, + }, + { + {-18357243, -2156491, 24524913, -16677868, 15520427, -6360776, + -15502406, 11461896, 16788528, -5868942}, + {-1947386, 16013773, 21750665, 3714552, -17401782, -16055433, + -3770287, -10323320, 31322514, -11615635}, + {21426655, -5650218, -13648287, -5347537, -28812189, -4920970, + -18275391, -14621414, 13040862, -12112948}, + }, + { + {11293895, 12478086, -27136401, 15083750, -29307421, 14748872, + 14555558, -13417103, 1613711, 4896935}, + {-25894883, 15323294, -8489791, -8057900, 25967126, -13425460, + 2825960, -4897045, -23971776, -11267415}, + {-15924766, -5229880, -17443532, 6410664, 3622847, 10243618, + 20615400, 12405433, -23753030, -8436416}, + }, + { + {-7091295, 12556208, -20191352, 9025187, -17072479, 4333801, + 4378436, 2432030, 23097949, -566018}, + {4565804, -16025654, 20084412, -7842817, 1724999, 189254, 24767264, + 10103221, -18512313, 2424778}, + {366633, -11976806, 8173090, -6890119, 30788634, 5745705, -7168678, + 1344109, -3642553, 12412659}, + }, + { + {-24001791, 7690286, 14929416, -168257, -32210835, -13412986, + 24162697, -15326504, -3141501, 11179385}, + {18289522, -14724954, 8056945, 16430056, -21729724, 7842514, + -6001441, -1486897, -18684645, -11443503}, + {476239, 6601091, -6152790, -9723375, 17503545, -4863900, 27672959, + 13403813, 11052904, 5219329}, + }, + }, + { + { + {20678546, -8375738, -32671898, 8849123, -5009758, 14574752, + 31186971, -3973730, 9014762, -8579056}, + {-13644050, -10350239, -15962508, 5075808, -1514661, -11534600, + -33102500, 9160280, 8473550, -3256838}, + {24900749, 14435722, 17209120, -15292541, -22592275, 9878983, + -7689309, -16335821, -24568481, 11788948}, + }, + { + {-3118155, -11395194, -13802089, 14797441, 9652448, -6845904, + -20037437, 10410733, -24568470, -1458691}, + {-15659161, 16736706, -22467150, 10215878, -9097177, 7563911, + 11871841, -12505194, -18513325, 8464118}, + {-23400612, 8348507, -14585951, -861714, -3950205, -6373419, + 14325289, 8628612, 33313881, -8370517}, + }, + { + {-20186973, -4967935, 22367356, 5271547, -1097117, -4788838, + -24805667, -10236854, -8940735, -5818269}, + {-6948785, -1795212, -32625683, -16021179, 32635414, -7374245, + 15989197, -12838188, 28358192, -4253904}, + {-23561781, -2799059, -32351682, -1661963, -9147719, 10429267, + -16637684, 4072016, -5351664, 5596589}, + }, + { + {-28236598, -3390048, 12312896, 6213178, 3117142, 16078565, + 29266239, 2557221, 1768301, 15373193}, + {-7243358, -3246960, -4593467, -7553353, -127927, -912245, -1090902, + -4504991, -24660491, 3442910}, + {-30210571, 5124043, 14181784, 8197961, 18964734, -11939093, + 22597931, 7176455, -18585478, 13365930}, + }, + { + {-7877390, -1499958, 8324673, 4690079, 6261860, 890446, 24538107, + -8570186, -9689599, -3031667}, + {25008904, -10771599, -4305031, -9638010, 16265036, 15721635, + 683793, -11823784, 15723479, -15163481}, + {-9660625, 12374379, -27006999, -7026148, -7724114, -12314514, + 11879682, 5400171, 519526, -1235876}, + }, + { + {22258397, -16332233, -7869817, 14613016, -22520255, -2950923, + -20353881, 7315967, 16648397, 7605640}, + {-8081308, -8464597, -8223311, 9719710, 19259459, -15348212, + 23994942, -5281555, -9468848, 4763278}, + {-21699244, 9220969, -15730624, 1084137, -25476107, -2852390, + 31088447, -7764523, -11356529, 728112}, + }, + { + {26047220, -11751471, -6900323, -16521798, 24092068, 9158119, + -4273545, -12555558, -29365436, -5498272}, + {17510331, -322857, 5854289, 8403524, 17133918, -3112612, -28111007, + 12327945, 10750447, 10014012}, + {-10312768, 3936952, 9156313, -8897683, 16498692, -994647, + -27481051, -666732, 3424691, 7540221}, + }, + { + {30322361, -6964110, 11361005, -4143317, 7433304, 4989748, -7071422, + -16317219, -9244265, 15258046}, + {13054562, -2779497, 19155474, 469045, -12482797, 4566042, 5631406, + 2711395, 1062915, -5136345}, + {-19240248, -11254599, -29509029, -7499965, -5835763, 13005411, + -6066489, 12194497, 32960380, 1459310}, + }, + }, + { + { + {19852034, 7027924, 23669353, 10020366, 8586503, -6657907, 394197, + -6101885, 18638003, -11174937}, + {31395534, 15098109, 26581030, 8030562, -16527914, -5007134, + 9012486, -7584354, -6643087, -5442636}, + {-9192165, -2347377, -1997099, 4529534, 25766844, 607986, -13222, + 9677543, -32294889, -6456008}, + }, + { + {-2444496, -149937, 29348902, 8186665, 1873760, 12489863, -30934579, + -7839692, -7852844, -8138429}, + {-15236356, -15433509, 7766470, 746860, 26346930, -10221762, + -27333451, 10754588, -9431476, 5203576}, + {31834314, 14135496, -770007, 5159118, 20917671, -16768096, + -7467973, -7337524, 31809243, 7347066}, + }, + { + {-9606723, -11874240, 20414459, 13033986, 13716524, -11691881, + 19797970, -12211255, 15192876, -2087490}, + {-12663563, -2181719, 1168162, -3804809, 26747877, -14138091, + 10609330, 12694420, 33473243, -13382104}, + {33184999, 11180355, 15832085, -11385430, -1633671, 225884, + 15089336, -11023903, -6135662, 14480053}, + }, + { + {31308717, -5619998, 31030840, -1897099, 15674547, -6582883, + 5496208, 13685227, 27595050, 8737275}, + {-20318852, -15150239, 10933843, -16178022, 8335352, -7546022, + -31008351, -12610604, 26498114, 66511}, + {22644454, -8761729, -16671776, 4884562, -3105614, -13559366, + 30540766, -4286747, -13327787, -7515095}, + }, + { + {-28017847, 9834845, 18617207, -2681312, -3401956, -13307506, + 8205540, 13585437, -17127465, 15115439}, + {23711543, -672915, 31206561, -8362711, 6164647, -9709987, + -33535882, -1426096, 8236921, 16492939}, + {-23910559, -13515526, -26299483, -4503841, 25005590, -7687270, + 19574902, 10071562, 6708380, -6222424}, + }, + { + {2101391, -4930054, 19702731, 2367575, -15427167, 1047675, 5301017, + 9328700, 29955601, -11678310}, + {3096359, 9271816, -21620864, -15521844, -14847996, -7592937, + -25892142, -12635595, -9917575, 6216608}, + {-32615849, 338663, -25195611, 2510422, -29213566, -13820213, + 24822830, -6146567, -26767480, 7525079}, + }, + { + {-23066649, -13985623, 16133487, -7896178, -3389565, 778788, + -910336, -2782495, -19386633, 11994101}, + {21691500, -13624626, -641331, -14367021, 3285881, -3483596, + -25064666, 9718258, -7477437, 13381418}, + {18445390, -4202236, 14979846, 11622458, -1727110, -3582980, + 23111648, -6375247, 28535282, 15779576}, + }, + { + {30098053, 3089662, -9234387, 16662135, -21306940, 11308411, + -14068454, 12021730, 9955285, -16303356}, + {9734894, -14576830, -7473633, -9138735, 2060392, 11313496, + -18426029, 9924399, 20194861, 13380996}, + {-26378102, -7965207, -22167821, 15789297, -18055342, -6168792, + -1984914, 15707771, 26342023, 10146099}, + }, + }, + { + { + {-26016874, -219943, 21339191, -41388, 19745256, -2878700, + -29637280, 2227040, 21612326, -545728}, + {-13077387, 1184228, 23562814, -5970442, -20351244, -6348714, + 25764461, 12243797, -20856566, 11649658}, + {-10031494, 11262626, 27384172, 2271902, 26947504, -15997771, 39944, + 6114064, 33514190, 2333242}, + }, + { + {-21433588, -12421821, 8119782, 7219913, -21830522, -9016134, + -6679750, -12670638, 24350578, -13450001}, + {-4116307, -11271533, -23886186, 4843615, -30088339, 690623, + -31536088, -10406836, 8317860, 12352766}, + {18200138, -14475911, -33087759, -2696619, -23702521, -9102511, + -23552096, -2287550, 20712163, 6719373}, + }, + { + {26656208, 6075253, -7858556, 1886072, -28344043, 4262326, 11117530, + -3763210, 26224235, -3297458}, + {-17168938, -14854097, -3395676, -16369877, -19954045, 14050420, + 21728352, 9493610, 18620611, -16428628}, + {-13323321, 13325349, 11432106, 5964811, 18609221, 6062965, + -5269471, -9725556, -30701573, -16479657}, + }, + { + {-23860538, -11233159, 26961357, 1640861, -32413112, -16737940, + 12248509, -5240639, 13735342, 1934062}, + {25089769, 6742589, 17081145, -13406266, 21909293, -16067981, + -15136294, -3765346, -21277997, 5473616}, + {31883677, -7961101, 1083432, -11572403, 22828471, 13290673, + -7125085, 12469656, 29111212, -5451014}, + }, + { + {24244947, -15050407, -26262976, 2791540, -14997599, 16666678, + 24367466, 6388839, -10295587, 452383}, + {-25640782, -3417841, 5217916, 16224624, 19987036, -4082269, + -24236251, -5915248, 15766062, 8407814}, + {-20406999, 13990231, 15495425, 16395525, 5377168, 15166495, + -8917023, -4388953, -8067909, 2276718}, + }, + { + {30157918, 12924066, -17712050, 9245753, 19895028, 3368142, + -23827587, 5096219, 22740376, -7303417}, + {2041139, -14256350, 7783687, 13876377, -25946985, -13352459, + 24051124, 13742383, -15637599, 13295222}, + {33338237, -8505733, 12532113, 7977527, 9106186, -1715251, + -17720195, -4612972, -4451357, -14669444}, + }, + { + {-20045281, 5454097, -14346548, 6447146, 28862071, 1883651, + -2469266, -4141880, 7770569, 9620597}, + {23208068, 7979712, 33071466, 8149229, 1758231, -10834995, 30945528, + -1694323, -33502340, -14767970}, + {1439958, -16270480, -1079989, -793782, 4625402, 10647766, -5043801, + 1220118, 30494170, -11440799}, + }, + { + {-5037580, -13028295, -2970559, -3061767, 15640974, -6701666, + -26739026, 926050, -1684339, -13333647}, + {13908495, -3549272, 30919928, -6273825, -21521863, 7989039, + 9021034, 9078865, 3353509, 4033511}, + {-29663431, -15113610, 32259991, -344482, 24295849, -12912123, + 23161163, 8839127, 27485041, 7356032}, + }, + }, + { + { + {9661027, 705443, 11980065, -5370154, -1628543, 14661173, -6346142, + 2625015, 28431036, -16771834}, + {-23839233, -8311415, -25945511, 7480958, -17681669, -8354183, + -22545972, 14150565, 15970762, 4099461}, + {29262576, 16756590, 26350592, -8793563, 8529671, -11208050, + 13617293, -9937143, 11465739, 8317062}, + }, + { + {-25493081, -6962928, 32500200, -9419051, -23038724, -2302222, + 14898637, 3848455, 20969334, -5157516}, + {-20384450, -14347713, -18336405, 13884722, -33039454, 2842114, + -21610826, -3649888, 11177095, 14989547}, + {-24496721, -11716016, 16959896, 2278463, 12066309, 10137771, + 13515641, 2581286, -28487508, 9930240}, + }, + { + {-17751622, -2097826, 16544300, -13009300, -15914807, -14949081, + 18345767, -13403753, 16291481, -5314038}, + {-33229194, 2553288, 32678213, 9875984, 8534129, 6889387, -9676774, + 6957617, 4368891, 9788741}, + {16660756, 7281060, -10830758, 12911820, 20108584, -8101676, + -21722536, -8613148, 16250552, -11111103}, + }, + { + {-19765507, 2390526, -16551031, 14161980, 1905286, 6414907, 4689584, + 10604807, -30190403, 4782747}, + {-1354539, 14736941, -7367442, -13292886, 7710542, -14155590, + -9981571, 4383045, 22546403, 437323}, + {31665577, -12180464, -16186830, 1491339, -18368625, 3294682, + 27343084, 2786261, -30633590, -14097016}, + }, + { + {-14467279, -683715, -33374107, 7448552, 19294360, 14334329, + -19690631, 2355319, -19284671, -6114373}, + {15121312, -15796162, 6377020, -6031361, -10798111, -12957845, + 18952177, 15496498, -29380133, 11754228}, + {-2637277, -13483075, 8488727, -14303896, 12728761, -1622493, + 7141596, 11724556, 22761615, -10134141}, + }, + { + {16918416, 11729663, -18083579, 3022987, -31015732, -13339659, + -28741185, -12227393, 32851222, 11717399}, + {11166634, 7338049, -6722523, 4531520, -29468672, -7302055, + 31474879, 3483633, -1193175, -4030831}, + {-185635, 9921305, 31456609, -13536438, -12013818, 13348923, + 33142652, 6546660, -19985279, -3948376}, + }, + { + {-32460596, 11266712, -11197107, -7899103, 31703694, 3855903, + -8537131, -12833048, -30772034, -15486313}, + {-18006477, 12709068, 3991746, -6479188, -21491523, -10550425, + -31135347, -16049879, 10928917, 3011958}, + {-6957757, -15594337, 31696059, 334240, 29576716, 14796075, + -30831056, -12805180, 18008031, 10258577}, + }, + { + {-22448644, 15655569, 7018479, -4410003, -30314266, -1201591, + -1853465, 1367120, 25127874, 6671743}, + {29701166, -14373934, -10878120, 9279288, -17568, 13127210, + 21382910, 11042292, 25838796, 4642684}, + {-20430234, 14955537, -24126347, 8124619, -5369288, -5990470, + 30468147, -13900640, 18423289, 4177476}, + }, + }, +}; + +static uint8_t negative(signed char b) { + uint32_t x = b; + x >>= 31; /* 1: yes; 0: no */ + return x; +} + +static void table_select(ge_precomp *t, int pos, signed char b) { + ge_precomp minust; + uint8_t bnegative = negative(b); + uint8_t babs = b - ((uint8_t)((-bnegative) & b) << 1); + + ge_precomp_0(t); + cmov(t, &k25519Precomp[pos][0], equal(babs, 1)); + cmov(t, &k25519Precomp[pos][1], equal(babs, 2)); + cmov(t, &k25519Precomp[pos][2], equal(babs, 3)); + cmov(t, &k25519Precomp[pos][3], equal(babs, 4)); + cmov(t, &k25519Precomp[pos][4], equal(babs, 5)); + cmov(t, &k25519Precomp[pos][5], equal(babs, 6)); + cmov(t, &k25519Precomp[pos][6], equal(babs, 7)); + cmov(t, &k25519Precomp[pos][7], equal(babs, 8)); + fe_copy(minust.yplusx, t->yminusx); + fe_copy(minust.yminusx, t->yplusx); + fe_neg(minust.xy2d, t->xy2d); + cmov(t, &minust, bnegative); +} + +/* h = a * B + * where a = a[0]+256*a[1]+...+256^31 a[31] + * B is the Ed25519 base point (x,4/5) with x positive. + * + * Preconditions: + * a[31] <= 127 */ +void x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t *a) { + signed char e[64]; + signed char carry; + ge_p1p1 r; + ge_p2 s; + ge_precomp t; + int i; + + for (i = 0; i < 32; ++i) { + e[2 * i + 0] = (a[i] >> 0) & 15; + e[2 * i + 1] = (a[i] >> 4) & 15; + } + /* each e[i] is between 0 and 15 */ + /* e[63] is between 0 and 7 */ + + carry = 0; + for (i = 0; i < 63; ++i) { + e[i] += carry; + carry = e[i] + 8; + carry >>= 4; + e[i] -= carry << 4; + } + e[63] += carry; + /* each e[i] is between -8 and 8 */ + + ge_p3_0(h); + for (i = 1; i < 64; i += 2) { + table_select(&t, i / 2, e[i]); + ge_madd(&r, h, &t); + x25519_ge_p1p1_to_p3(h, &r); + } + + ge_p3_dbl(&r, h); + x25519_ge_p1p1_to_p2(&s, &r); + ge_p2_dbl(&r, &s); + x25519_ge_p1p1_to_p2(&s, &r); + ge_p2_dbl(&r, &s); + x25519_ge_p1p1_to_p2(&s, &r); + ge_p2_dbl(&r, &s); + x25519_ge_p1p1_to_p3(h, &r); + + for (i = 0; i < 64; i += 2) { + table_select(&t, i / 2, e[i]); + ge_madd(&r, h, &t); + x25519_ge_p1p1_to_p3(h, &r); + } +} + +#endif + +static void cmov_cached(ge_cached *t, ge_cached *u, uint8_t b) { + fe_cmov(t->YplusX, u->YplusX, b); + fe_cmov(t->YminusX, u->YminusX, b); + fe_cmov(t->Z, u->Z, b); + fe_cmov(t->T2d, u->T2d, b); +} + +/* r = scalar * A. + * where a = a[0]+256*a[1]+...+256^31 a[31]. */ +void x25519_ge_scalarmult(ge_p2 *r, const uint8_t *scalar, const ge_p3 *A) { + ge_p2 Ai_p2[8]; + ge_cached Ai[16]; + ge_p1p1 t; + + ge_cached_0(&Ai[0]); + x25519_ge_p3_to_cached(&Ai[1], A); + ge_p3_to_p2(&Ai_p2[1], A); + + unsigned i; + for (i = 2; i < 16; i += 2) { + ge_p2_dbl(&t, &Ai_p2[i / 2]); + ge_p1p1_to_cached(&Ai[i], &t); + if (i < 8) { + x25519_ge_p1p1_to_p2(&Ai_p2[i], &t); + } + x25519_ge_add(&t, A, &Ai[i]); + ge_p1p1_to_cached(&Ai[i + 1], &t); + if (i < 7) { + x25519_ge_p1p1_to_p2(&Ai_p2[i + 1], &t); + } + } + + ge_p2_0(r); + ge_p3 u; + + for (i = 0; i < 256; i += 4) { + ge_p2_dbl(&t, r); + x25519_ge_p1p1_to_p2(r, &t); + ge_p2_dbl(&t, r); + x25519_ge_p1p1_to_p2(r, &t); + ge_p2_dbl(&t, r); + x25519_ge_p1p1_to_p2(r, &t); + ge_p2_dbl(&t, r); + x25519_ge_p1p1_to_p3(&u, &t); + + uint8_t index = scalar[31 - i/8]; + index >>= 4 - (i & 4); + index &= 0xf; + + unsigned j; + ge_cached selected; + ge_cached_0(&selected); + for (j = 0; j < 16; j++) { + cmov_cached(&selected, &Ai[j], equal(j, index)); + } + + x25519_ge_add(&t, &u, &selected); + x25519_ge_p1p1_to_p2(r, &t); + } +} + +#ifdef ED25519 +static void slide(signed char *r, const uint8_t *a) { + int i; + int b; + int k; + + for (i = 0; i < 256; ++i) { + r[i] = 1 & (a[i >> 3] >> (i & 7)); + } + + for (i = 0; i < 256; ++i) { + if (r[i]) { + for (b = 1; b <= 6 && i + b < 256; ++b) { + if (r[i + b]) { + if (r[i] + (r[i + b] << b) <= 15) { + r[i] += r[i + b] << b; + r[i + b] = 0; + } else if (r[i] - (r[i + b] << b) >= -15) { + r[i] -= r[i + b] << b; + for (k = i + b; k < 256; ++k) { + if (!r[k]) { + r[k] = 1; + break; + } + r[k] = 0; + } + } else { + break; + } + } + } + } + } +} + +static const ge_precomp Bi[8] = { + { + {25967493, -14356035, 29566456, 3660896, -12694345, 4014787, 27544626, + -11754271, -6079156, 2047605}, + {-12545711, 934262, -2722910, 3049990, -727428, 9406986, 12720692, + 5043384, 19500929, -15469378}, + {-8738181, 4489570, 9688441, -14785194, 10184609, -12363380, 29287919, + 11864899, -24514362, -4438546}, + }, + { + {15636291, -9688557, 24204773, -7912398, 616977, -16685262, 27787600, + -14772189, 28944400, -1550024}, + {16568933, 4717097, -11556148, -1102322, 15682896, -11807043, 16354577, + -11775962, 7689662, 11199574}, + {30464156, -5976125, -11779434, -15670865, 23220365, 15915852, 7512774, + 10017326, -17749093, -9920357}, + }, + { + {10861363, 11473154, 27284546, 1981175, -30064349, 12577861, 32867885, + 14515107, -15438304, 10819380}, + {4708026, 6336745, 20377586, 9066809, -11272109, 6594696, -25653668, + 12483688, -12668491, 5581306}, + {19563160, 16186464, -29386857, 4097519, 10237984, -4348115, 28542350, + 13850243, -23678021, -15815942}, + }, + { + {5153746, 9909285, 1723747, -2777874, 30523605, 5516873, 19480852, + 5230134, -23952439, -15175766}, + {-30269007, -3463509, 7665486, 10083793, 28475525, 1649722, 20654025, + 16520125, 30598449, 7715701}, + {28881845, 14381568, 9657904, 3680757, -20181635, 7843316, -31400660, + 1370708, 29794553, -1409300}, + }, + { + {-22518993, -6692182, 14201702, -8745502, -23510406, 8844726, 18474211, + -1361450, -13062696, 13821877}, + {-6455177, -7839871, 3374702, -4740862, -27098617, -10571707, 31655028, + -7212327, 18853322, -14220951}, + {4566830, -12963868, -28974889, -12240689, -7602672, -2830569, -8514358, + -10431137, 2207753, -3209784}, + }, + { + {-25154831, -4185821, 29681144, 7868801, -6854661, -9423865, -12437364, + -663000, -31111463, -16132436}, + {25576264, -2703214, 7349804, -11814844, 16472782, 9300885, 3844789, + 15725684, 171356, 6466918}, + {23103977, 13316479, 9739013, -16149481, 817875, -15038942, 8965339, + -14088058, -30714912, 16193877}, + }, + { + {-33521811, 3180713, -2394130, 14003687, -16903474, -16270840, 17238398, + 4729455, -18074513, 9256800}, + {-25182317, -4174131, 32336398, 5036987, -21236817, 11360617, 22616405, + 9761698, -19827198, 630305}, + {-13720693, 2639453, -24237460, -7406481, 9494427, -5774029, -6554551, + -15960994, -2449256, -14291300}, + }, + { + {-3151181, -5046075, 9282714, 6866145, -31907062, -863023, -18940575, + 15033784, 25105118, -7894876}, + {-24326370, 15950226, -31801215, -14592823, -11662737, -5090925, + 1573892, -2625887, 2198790, -15804619}, + {-3099351, 10324967, -2241613, 7453183, -5446979, -2735503, -13812022, + -16236442, -32461234, -12290683}, + }, +}; + +/* r = a * A + b * B + * where a = a[0]+256*a[1]+...+256^31 a[31]. + * and b = b[0]+256*b[1]+...+256^31 b[31]. + * B is the Ed25519 base point (x,4/5) with x positive. */ +static void +ge_double_scalarmult_vartime(ge_p2 *r, const uint8_t *a, + const ge_p3 *A, const uint8_t *b) { + signed char aslide[256]; + signed char bslide[256]; + ge_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */ + ge_p1p1 t; + ge_p3 u; + ge_p3 A2; + int i; + + slide(aslide, a); + slide(bslide, b); + + x25519_ge_p3_to_cached(&Ai[0], A); + ge_p3_dbl(&t, A); + x25519_ge_p1p1_to_p3(&A2, &t); + x25519_ge_add(&t, &A2, &Ai[0]); + x25519_ge_p1p1_to_p3(&u, &t); + x25519_ge_p3_to_cached(&Ai[1], &u); + x25519_ge_add(&t, &A2, &Ai[1]); + x25519_ge_p1p1_to_p3(&u, &t); + x25519_ge_p3_to_cached(&Ai[2], &u); + x25519_ge_add(&t, &A2, &Ai[2]); + x25519_ge_p1p1_to_p3(&u, &t); + x25519_ge_p3_to_cached(&Ai[3], &u); + x25519_ge_add(&t, &A2, &Ai[3]); + x25519_ge_p1p1_to_p3(&u, &t); + x25519_ge_p3_to_cached(&Ai[4], &u); + x25519_ge_add(&t, &A2, &Ai[4]); + x25519_ge_p1p1_to_p3(&u, &t); + x25519_ge_p3_to_cached(&Ai[5], &u); + x25519_ge_add(&t, &A2, &Ai[5]); + x25519_ge_p1p1_to_p3(&u, &t); + x25519_ge_p3_to_cached(&Ai[6], &u); + x25519_ge_add(&t, &A2, &Ai[6]); + x25519_ge_p1p1_to_p3(&u, &t); + x25519_ge_p3_to_cached(&Ai[7], &u); + + ge_p2_0(r); + + for (i = 255; i >= 0; --i) { + if (aslide[i] || bslide[i]) { + break; + } + } + + for (; i >= 0; --i) { + ge_p2_dbl(&t, r); + + if (aslide[i] > 0) { + x25519_ge_p1p1_to_p3(&u, &t); + x25519_ge_add(&t, &u, &Ai[aslide[i] / 2]); + } else if (aslide[i] < 0) { + x25519_ge_p1p1_to_p3(&u, &t); + x25519_ge_sub(&t, &u, &Ai[(-aslide[i]) / 2]); + } + + if (bslide[i] > 0) { + x25519_ge_p1p1_to_p3(&u, &t); + ge_madd(&t, &u, &Bi[bslide[i] / 2]); + } else if (bslide[i] < 0) { + x25519_ge_p1p1_to_p3(&u, &t); + ge_msub(&t, &u, &Bi[(-bslide[i]) / 2]); + } + + x25519_ge_p1p1_to_p2(r, &t); + } +} +#endif + +/* The set of scalars is \Z/l + * where l = 2^252 + 27742317777372353535851937790883648493. */ + +/* Input: + * s[0]+256*s[1]+...+256^63*s[63] = s + * + * Output: + * s[0]+256*s[1]+...+256^31*s[31] = s mod l + * where l = 2^252 + 27742317777372353535851937790883648493. + * Overwrites s in place. */ +void +x25519_sc_reduce(uint8_t *s) { + int64_t s0 = 2097151 & load_3(s); + int64_t s1 = 2097151 & (load_4(s + 2) >> 5); + int64_t s2 = 2097151 & (load_3(s + 5) >> 2); + int64_t s3 = 2097151 & (load_4(s + 7) >> 7); + int64_t s4 = 2097151 & (load_4(s + 10) >> 4); + int64_t s5 = 2097151 & (load_3(s + 13) >> 1); + int64_t s6 = 2097151 & (load_4(s + 15) >> 6); + int64_t s7 = 2097151 & (load_3(s + 18) >> 3); + int64_t s8 = 2097151 & load_3(s + 21); + int64_t s9 = 2097151 & (load_4(s + 23) >> 5); + int64_t s10 = 2097151 & (load_3(s + 26) >> 2); + int64_t s11 = 2097151 & (load_4(s + 28) >> 7); + int64_t s12 = 2097151 & (load_4(s + 31) >> 4); + int64_t s13 = 2097151 & (load_3(s + 34) >> 1); + int64_t s14 = 2097151 & (load_4(s + 36) >> 6); + int64_t s15 = 2097151 & (load_3(s + 39) >> 3); + int64_t s16 = 2097151 & load_3(s + 42); + int64_t s17 = 2097151 & (load_4(s + 44) >> 5); + int64_t s18 = 2097151 & (load_3(s + 47) >> 2); + int64_t s19 = 2097151 & (load_4(s + 49) >> 7); + int64_t s20 = 2097151 & (load_4(s + 52) >> 4); + int64_t s21 = 2097151 & (load_3(s + 55) >> 1); + int64_t s22 = 2097151 & (load_4(s + 57) >> 6); + int64_t s23 = (load_4(s + 60) >> 3); + int64_t carry0; + int64_t carry1; + int64_t carry2; + int64_t carry3; + int64_t carry4; + int64_t carry5; + int64_t carry6; + int64_t carry7; + int64_t carry8; + int64_t carry9; + int64_t carry10; + int64_t carry11; + int64_t carry12; + int64_t carry13; + int64_t carry14; + int64_t carry15; + int64_t carry16; + + s11 += s23 * 666643; + s12 += s23 * 470296; + s13 += s23 * 654183; + s14 -= s23 * 997805; + s15 += s23 * 136657; + s16 -= s23 * 683901; + s23 = 0; + + s10 += s22 * 666643; + s11 += s22 * 470296; + s12 += s22 * 654183; + s13 -= s22 * 997805; + s14 += s22 * 136657; + s15 -= s22 * 683901; + s22 = 0; + + s9 += s21 * 666643; + s10 += s21 * 470296; + s11 += s21 * 654183; + s12 -= s21 * 997805; + s13 += s21 * 136657; + s14 -= s21 * 683901; + s21 = 0; + + s8 += s20 * 666643; + s9 += s20 * 470296; + s10 += s20 * 654183; + s11 -= s20 * 997805; + s12 += s20 * 136657; + s13 -= s20 * 683901; + s20 = 0; + + s7 += s19 * 666643; + s8 += s19 * 470296; + s9 += s19 * 654183; + s10 -= s19 * 997805; + s11 += s19 * 136657; + s12 -= s19 * 683901; + s19 = 0; + + s6 += s18 * 666643; + s7 += s18 * 470296; + s8 += s18 * 654183; + s9 -= s18 * 997805; + s10 += s18 * 136657; + s11 -= s18 * 683901; + s18 = 0; + + carry6 = (s6 + (1 << 20)) >> 21; + s7 += carry6; + s6 -= carry6 << 21; + carry8 = (s8 + (1 << 20)) >> 21; + s9 += carry8; + s8 -= carry8 << 21; + carry10 = (s10 + (1 << 20)) >> 21; + s11 += carry10; + s10 -= carry10 << 21; + carry12 = (s12 + (1 << 20)) >> 21; + s13 += carry12; + s12 -= carry12 << 21; + carry14 = (s14 + (1 << 20)) >> 21; + s15 += carry14; + s14 -= carry14 << 21; + carry16 = (s16 + (1 << 20)) >> 21; + s17 += carry16; + s16 -= carry16 << 21; + + carry7 = (s7 + (1 << 20)) >> 21; + s8 += carry7; + s7 -= carry7 << 21; + carry9 = (s9 + (1 << 20)) >> 21; + s10 += carry9; + s9 -= carry9 << 21; + carry11 = (s11 + (1 << 20)) >> 21; + s12 += carry11; + s11 -= carry11 << 21; + carry13 = (s13 + (1 << 20)) >> 21; + s14 += carry13; + s13 -= carry13 << 21; + carry15 = (s15 + (1 << 20)) >> 21; + s16 += carry15; + s15 -= carry15 << 21; + + s5 += s17 * 666643; + s6 += s17 * 470296; + s7 += s17 * 654183; + s8 -= s17 * 997805; + s9 += s17 * 136657; + s10 -= s17 * 683901; + s17 = 0; + + s4 += s16 * 666643; + s5 += s16 * 470296; + s6 += s16 * 654183; + s7 -= s16 * 997805; + s8 += s16 * 136657; + s9 -= s16 * 683901; + s16 = 0; + + s3 += s15 * 666643; + s4 += s15 * 470296; + s5 += s15 * 654183; + s6 -= s15 * 997805; + s7 += s15 * 136657; + s8 -= s15 * 683901; + s15 = 0; + + s2 += s14 * 666643; + s3 += s14 * 470296; + s4 += s14 * 654183; + s5 -= s14 * 997805; + s6 += s14 * 136657; + s7 -= s14 * 683901; + s14 = 0; + + s1 += s13 * 666643; + s2 += s13 * 470296; + s3 += s13 * 654183; + s4 -= s13 * 997805; + s5 += s13 * 136657; + s6 -= s13 * 683901; + s13 = 0; + + s0 += s12 * 666643; + s1 += s12 * 470296; + s2 += s12 * 654183; + s3 -= s12 * 997805; + s4 += s12 * 136657; + s5 -= s12 * 683901; + s12 = 0; + + carry0 = (s0 + (1 << 20)) >> 21; + s1 += carry0; + s0 -= carry0 << 21; + carry2 = (s2 + (1 << 20)) >> 21; + s3 += carry2; + s2 -= carry2 << 21; + carry4 = (s4 + (1 << 20)) >> 21; + s5 += carry4; + s4 -= carry4 << 21; + carry6 = (s6 + (1 << 20)) >> 21; + s7 += carry6; + s6 -= carry6 << 21; + carry8 = (s8 + (1 << 20)) >> 21; + s9 += carry8; + s8 -= carry8 << 21; + carry10 = (s10 + (1 << 20)) >> 21; + s11 += carry10; + s10 -= carry10 << 21; + + carry1 = (s1 + (1 << 20)) >> 21; + s2 += carry1; + s1 -= carry1 << 21; + carry3 = (s3 + (1 << 20)) >> 21; + s4 += carry3; + s3 -= carry3 << 21; + carry5 = (s5 + (1 << 20)) >> 21; + s6 += carry5; + s5 -= carry5 << 21; + carry7 = (s7 + (1 << 20)) >> 21; + s8 += carry7; + s7 -= carry7 << 21; + carry9 = (s9 + (1 << 20)) >> 21; + s10 += carry9; + s9 -= carry9 << 21; + carry11 = (s11 + (1 << 20)) >> 21; + s12 += carry11; + s11 -= carry11 << 21; + + s0 += s12 * 666643; + s1 += s12 * 470296; + s2 += s12 * 654183; + s3 -= s12 * 997805; + s4 += s12 * 136657; + s5 -= s12 * 683901; + s12 = 0; + + carry0 = s0 >> 21; + s1 += carry0; + s0 -= carry0 << 21; + carry1 = s1 >> 21; + s2 += carry1; + s1 -= carry1 << 21; + carry2 = s2 >> 21; + s3 += carry2; + s2 -= carry2 << 21; + carry3 = s3 >> 21; + s4 += carry3; + s3 -= carry3 << 21; + carry4 = s4 >> 21; + s5 += carry4; + s4 -= carry4 << 21; + carry5 = s5 >> 21; + s6 += carry5; + s5 -= carry5 << 21; + carry6 = s6 >> 21; + s7 += carry6; + s6 -= carry6 << 21; + carry7 = s7 >> 21; + s8 += carry7; + s7 -= carry7 << 21; + carry8 = s8 >> 21; + s9 += carry8; + s8 -= carry8 << 21; + carry9 = s9 >> 21; + s10 += carry9; + s9 -= carry9 << 21; + carry10 = s10 >> 21; + s11 += carry10; + s10 -= carry10 << 21; + carry11 = s11 >> 21; + s12 += carry11; + s11 -= carry11 << 21; + + s0 += s12 * 666643; + s1 += s12 * 470296; + s2 += s12 * 654183; + s3 -= s12 * 997805; + s4 += s12 * 136657; + s5 -= s12 * 683901; + s12 = 0; + + carry0 = s0 >> 21; + s1 += carry0; + s0 -= carry0 << 21; + carry1 = s1 >> 21; + s2 += carry1; + s1 -= carry1 << 21; + carry2 = s2 >> 21; + s3 += carry2; + s2 -= carry2 << 21; + carry3 = s3 >> 21; + s4 += carry3; + s3 -= carry3 << 21; + carry4 = s4 >> 21; + s5 += carry4; + s4 -= carry4 << 21; + carry5 = s5 >> 21; + s6 += carry5; + s5 -= carry5 << 21; + carry6 = s6 >> 21; + s7 += carry6; + s6 -= carry6 << 21; + carry7 = s7 >> 21; + s8 += carry7; + s7 -= carry7 << 21; + carry8 = s8 >> 21; + s9 += carry8; + s8 -= carry8 << 21; + carry9 = s9 >> 21; + s10 += carry9; + s9 -= carry9 << 21; + carry10 = s10 >> 21; + s11 += carry10; + s10 -= carry10 << 21; + + s[0] = s0 >> 0; + s[1] = s0 >> 8; + s[2] = (s0 >> 16) | (s1 << 5); + s[3] = s1 >> 3; + s[4] = s1 >> 11; + s[5] = (s1 >> 19) | (s2 << 2); + s[6] = s2 >> 6; + s[7] = (s2 >> 14) | (s3 << 7); + s[8] = s3 >> 1; + s[9] = s3 >> 9; + s[10] = (s3 >> 17) | (s4 << 4); + s[11] = s4 >> 4; + s[12] = s4 >> 12; + s[13] = (s4 >> 20) | (s5 << 1); + s[14] = s5 >> 7; + s[15] = (s5 >> 15) | (s6 << 6); + s[16] = s6 >> 2; + s[17] = s6 >> 10; + s[18] = (s6 >> 18) | (s7 << 3); + s[19] = s7 >> 5; + s[20] = s7 >> 13; + s[21] = s8 >> 0; + s[22] = s8 >> 8; + s[23] = (s8 >> 16) | (s9 << 5); + s[24] = s9 >> 3; + s[25] = s9 >> 11; + s[26] = (s9 >> 19) | (s10 << 2); + s[27] = s10 >> 6; + s[28] = (s10 >> 14) | (s11 << 7); + s[29] = s11 >> 1; + s[30] = s11 >> 9; + s[31] = s11 >> 17; +} + +#ifdef ED25519 +/* Input: + * a[0]+256*a[1]+...+256^31*a[31] = a + * b[0]+256*b[1]+...+256^31*b[31] = b + * c[0]+256*c[1]+...+256^31*c[31] = c + * + * Output: + * s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l + * where l = 2^252 + 27742317777372353535851937790883648493. */ +static void +sc_muladd(uint8_t *s, const uint8_t *a, const uint8_t *b, + const uint8_t *c) +{ + int64_t a0 = 2097151 & load_3(a); + int64_t a1 = 2097151 & (load_4(a + 2) >> 5); + int64_t a2 = 2097151 & (load_3(a + 5) >> 2); + int64_t a3 = 2097151 & (load_4(a + 7) >> 7); + int64_t a4 = 2097151 & (load_4(a + 10) >> 4); + int64_t a5 = 2097151 & (load_3(a + 13) >> 1); + int64_t a6 = 2097151 & (load_4(a + 15) >> 6); + int64_t a7 = 2097151 & (load_3(a + 18) >> 3); + int64_t a8 = 2097151 & load_3(a + 21); + int64_t a9 = 2097151 & (load_4(a + 23) >> 5); + int64_t a10 = 2097151 & (load_3(a + 26) >> 2); + int64_t a11 = (load_4(a + 28) >> 7); + int64_t b0 = 2097151 & load_3(b); + int64_t b1 = 2097151 & (load_4(b + 2) >> 5); + int64_t b2 = 2097151 & (load_3(b + 5) >> 2); + int64_t b3 = 2097151 & (load_4(b + 7) >> 7); + int64_t b4 = 2097151 & (load_4(b + 10) >> 4); + int64_t b5 = 2097151 & (load_3(b + 13) >> 1); + int64_t b6 = 2097151 & (load_4(b + 15) >> 6); + int64_t b7 = 2097151 & (load_3(b + 18) >> 3); + int64_t b8 = 2097151 & load_3(b + 21); + int64_t b9 = 2097151 & (load_4(b + 23) >> 5); + int64_t b10 = 2097151 & (load_3(b + 26) >> 2); + int64_t b11 = (load_4(b + 28) >> 7); + int64_t c0 = 2097151 & load_3(c); + int64_t c1 = 2097151 & (load_4(c + 2) >> 5); + int64_t c2 = 2097151 & (load_3(c + 5) >> 2); + int64_t c3 = 2097151 & (load_4(c + 7) >> 7); + int64_t c4 = 2097151 & (load_4(c + 10) >> 4); + int64_t c5 = 2097151 & (load_3(c + 13) >> 1); + int64_t c6 = 2097151 & (load_4(c + 15) >> 6); + int64_t c7 = 2097151 & (load_3(c + 18) >> 3); + int64_t c8 = 2097151 & load_3(c + 21); + int64_t c9 = 2097151 & (load_4(c + 23) >> 5); + int64_t c10 = 2097151 & (load_3(c + 26) >> 2); + int64_t c11 = (load_4(c + 28) >> 7); + int64_t s0; + int64_t s1; + int64_t s2; + int64_t s3; + int64_t s4; + int64_t s5; + int64_t s6; + int64_t s7; + int64_t s8; + int64_t s9; + int64_t s10; + int64_t s11; + int64_t s12; + int64_t s13; + int64_t s14; + int64_t s15; + int64_t s16; + int64_t s17; + int64_t s18; + int64_t s19; + int64_t s20; + int64_t s21; + int64_t s22; + int64_t s23; + int64_t carry0; + int64_t carry1; + int64_t carry2; + int64_t carry3; + int64_t carry4; + int64_t carry5; + int64_t carry6; + int64_t carry7; + int64_t carry8; + int64_t carry9; + int64_t carry10; + int64_t carry11; + int64_t carry12; + int64_t carry13; + int64_t carry14; + int64_t carry15; + int64_t carry16; + int64_t carry17; + int64_t carry18; + int64_t carry19; + int64_t carry20; + int64_t carry21; + int64_t carry22; + + s0 = c0 + a0 * b0; + s1 = c1 + a0 * b1 + a1 * b0; + s2 = c2 + a0 * b2 + a1 * b1 + a2 * b0; + s3 = c3 + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0; + s4 = c4 + a0 * b4 + a1 * b3 + a2 * b2 + a3 * b1 + a4 * b0; + s5 = c5 + a0 * b5 + a1 * b4 + a2 * b3 + a3 * b2 + a4 * b1 + a5 * b0; + s6 = c6 + a0 * b6 + a1 * b5 + a2 * b4 + a3 * b3 + a4 * b2 + a5 * b1 + a6 * b0; + s7 = c7 + a0 * b7 + a1 * b6 + a2 * b5 + a3 * b4 + a4 * b3 + a5 * b2 + + a6 * b1 + a7 * b0; + s8 = c8 + a0 * b8 + a1 * b7 + a2 * b6 + a3 * b5 + a4 * b4 + a5 * b3 + + a6 * b2 + a7 * b1 + a8 * b0; + s9 = c9 + a0 * b9 + a1 * b8 + a2 * b7 + a3 * b6 + a4 * b5 + a5 * b4 + + a6 * b3 + a7 * b2 + a8 * b1 + a9 * b0; + s10 = c10 + a0 * b10 + a1 * b9 + a2 * b8 + a3 * b7 + a4 * b6 + a5 * b5 + + a6 * b4 + a7 * b3 + a8 * b2 + a9 * b1 + a10 * b0; + s11 = c11 + a0 * b11 + a1 * b10 + a2 * b9 + a3 * b8 + a4 * b7 + a5 * b6 + + a6 * b5 + a7 * b4 + a8 * b3 + a9 * b2 + a10 * b1 + a11 * b0; + s12 = a1 * b11 + a2 * b10 + a3 * b9 + a4 * b8 + a5 * b7 + a6 * b6 + a7 * b5 + + a8 * b4 + a9 * b3 + a10 * b2 + a11 * b1; + s13 = a2 * b11 + a3 * b10 + a4 * b9 + a5 * b8 + a6 * b7 + a7 * b6 + a8 * b5 + + a9 * b4 + a10 * b3 + a11 * b2; + s14 = a3 * b11 + a4 * b10 + a5 * b9 + a6 * b8 + a7 * b7 + a8 * b6 + a9 * b5 + + a10 * b4 + a11 * b3; + s15 = a4 * b11 + a5 * b10 + a6 * b9 + a7 * b8 + a8 * b7 + a9 * b6 + a10 * b5 + + a11 * b4; + s16 = a5 * b11 + a6 * b10 + a7 * b9 + a8 * b8 + a9 * b7 + a10 * b6 + a11 * b5; + s17 = a6 * b11 + a7 * b10 + a8 * b9 + a9 * b8 + a10 * b7 + a11 * b6; + s18 = a7 * b11 + a8 * b10 + a9 * b9 + a10 * b8 + a11 * b7; + s19 = a8 * b11 + a9 * b10 + a10 * b9 + a11 * b8; + s20 = a9 * b11 + a10 * b10 + a11 * b9; + s21 = a10 * b11 + a11 * b10; + s22 = a11 * b11; + s23 = 0; + + carry0 = (s0 + (1 << 20)) >> 21; + s1 += carry0; + s0 -= carry0 << 21; + carry2 = (s2 + (1 << 20)) >> 21; + s3 += carry2; + s2 -= carry2 << 21; + carry4 = (s4 + (1 << 20)) >> 21; + s5 += carry4; + s4 -= carry4 << 21; + carry6 = (s6 + (1 << 20)) >> 21; + s7 += carry6; + s6 -= carry6 << 21; + carry8 = (s8 + (1 << 20)) >> 21; + s9 += carry8; + s8 -= carry8 << 21; + carry10 = (s10 + (1 << 20)) >> 21; + s11 += carry10; + s10 -= carry10 << 21; + carry12 = (s12 + (1 << 20)) >> 21; + s13 += carry12; + s12 -= carry12 << 21; + carry14 = (s14 + (1 << 20)) >> 21; + s15 += carry14; + s14 -= carry14 << 21; + carry16 = (s16 + (1 << 20)) >> 21; + s17 += carry16; + s16 -= carry16 << 21; + carry18 = (s18 + (1 << 20)) >> 21; + s19 += carry18; + s18 -= carry18 << 21; + carry20 = (s20 + (1 << 20)) >> 21; + s21 += carry20; + s20 -= carry20 << 21; + carry22 = (s22 + (1 << 20)) >> 21; + s23 += carry22; + s22 -= carry22 << 21; + + carry1 = (s1 + (1 << 20)) >> 21; + s2 += carry1; + s1 -= carry1 << 21; + carry3 = (s3 + (1 << 20)) >> 21; + s4 += carry3; + s3 -= carry3 << 21; + carry5 = (s5 + (1 << 20)) >> 21; + s6 += carry5; + s5 -= carry5 << 21; + carry7 = (s7 + (1 << 20)) >> 21; + s8 += carry7; + s7 -= carry7 << 21; + carry9 = (s9 + (1 << 20)) >> 21; + s10 += carry9; + s9 -= carry9 << 21; + carry11 = (s11 + (1 << 20)) >> 21; + s12 += carry11; + s11 -= carry11 << 21; + carry13 = (s13 + (1 << 20)) >> 21; + s14 += carry13; + s13 -= carry13 << 21; + carry15 = (s15 + (1 << 20)) >> 21; + s16 += carry15; + s15 -= carry15 << 21; + carry17 = (s17 + (1 << 20)) >> 21; + s18 += carry17; + s17 -= carry17 << 21; + carry19 = (s19 + (1 << 20)) >> 21; + s20 += carry19; + s19 -= carry19 << 21; + carry21 = (s21 + (1 << 20)) >> 21; + s22 += carry21; + s21 -= carry21 << 21; + + s11 += s23 * 666643; + s12 += s23 * 470296; + s13 += s23 * 654183; + s14 -= s23 * 997805; + s15 += s23 * 136657; + s16 -= s23 * 683901; + s23 = 0; + + s10 += s22 * 666643; + s11 += s22 * 470296; + s12 += s22 * 654183; + s13 -= s22 * 997805; + s14 += s22 * 136657; + s15 -= s22 * 683901; + s22 = 0; + + s9 += s21 * 666643; + s10 += s21 * 470296; + s11 += s21 * 654183; + s12 -= s21 * 997805; + s13 += s21 * 136657; + s14 -= s21 * 683901; + s21 = 0; + + s8 += s20 * 666643; + s9 += s20 * 470296; + s10 += s20 * 654183; + s11 -= s20 * 997805; + s12 += s20 * 136657; + s13 -= s20 * 683901; + s20 = 0; + + s7 += s19 * 666643; + s8 += s19 * 470296; + s9 += s19 * 654183; + s10 -= s19 * 997805; + s11 += s19 * 136657; + s12 -= s19 * 683901; + s19 = 0; + + s6 += s18 * 666643; + s7 += s18 * 470296; + s8 += s18 * 654183; + s9 -= s18 * 997805; + s10 += s18 * 136657; + s11 -= s18 * 683901; + s18 = 0; + + carry6 = (s6 + (1 << 20)) >> 21; + s7 += carry6; + s6 -= carry6 << 21; + carry8 = (s8 + (1 << 20)) >> 21; + s9 += carry8; + s8 -= carry8 << 21; + carry10 = (s10 + (1 << 20)) >> 21; + s11 += carry10; + s10 -= carry10 << 21; + carry12 = (s12 + (1 << 20)) >> 21; + s13 += carry12; + s12 -= carry12 << 21; + carry14 = (s14 + (1 << 20)) >> 21; + s15 += carry14; + s14 -= carry14 << 21; + carry16 = (s16 + (1 << 20)) >> 21; + s17 += carry16; + s16 -= carry16 << 21; + + carry7 = (s7 + (1 << 20)) >> 21; + s8 += carry7; + s7 -= carry7 << 21; + carry9 = (s9 + (1 << 20)) >> 21; + s10 += carry9; + s9 -= carry9 << 21; + carry11 = (s11 + (1 << 20)) >> 21; + s12 += carry11; + s11 -= carry11 << 21; + carry13 = (s13 + (1 << 20)) >> 21; + s14 += carry13; + s13 -= carry13 << 21; + carry15 = (s15 + (1 << 20)) >> 21; + s16 += carry15; + s15 -= carry15 << 21; + + s5 += s17 * 666643; + s6 += s17 * 470296; + s7 += s17 * 654183; + s8 -= s17 * 997805; + s9 += s17 * 136657; + s10 -= s17 * 683901; + s17 = 0; + + s4 += s16 * 666643; + s5 += s16 * 470296; + s6 += s16 * 654183; + s7 -= s16 * 997805; + s8 += s16 * 136657; + s9 -= s16 * 683901; + s16 = 0; + + s3 += s15 * 666643; + s4 += s15 * 470296; + s5 += s15 * 654183; + s6 -= s15 * 997805; + s7 += s15 * 136657; + s8 -= s15 * 683901; + s15 = 0; + + s2 += s14 * 666643; + s3 += s14 * 470296; + s4 += s14 * 654183; + s5 -= s14 * 997805; + s6 += s14 * 136657; + s7 -= s14 * 683901; + s14 = 0; + + s1 += s13 * 666643; + s2 += s13 * 470296; + s3 += s13 * 654183; + s4 -= s13 * 997805; + s5 += s13 * 136657; + s6 -= s13 * 683901; + s13 = 0; + + s0 += s12 * 666643; + s1 += s12 * 470296; + s2 += s12 * 654183; + s3 -= s12 * 997805; + s4 += s12 * 136657; + s5 -= s12 * 683901; + s12 = 0; + + carry0 = (s0 + (1 << 20)) >> 21; + s1 += carry0; + s0 -= carry0 << 21; + carry2 = (s2 + (1 << 20)) >> 21; + s3 += carry2; + s2 -= carry2 << 21; + carry4 = (s4 + (1 << 20)) >> 21; + s5 += carry4; + s4 -= carry4 << 21; + carry6 = (s6 + (1 << 20)) >> 21; + s7 += carry6; + s6 -= carry6 << 21; + carry8 = (s8 + (1 << 20)) >> 21; + s9 += carry8; + s8 -= carry8 << 21; + carry10 = (s10 + (1 << 20)) >> 21; + s11 += carry10; + s10 -= carry10 << 21; + + carry1 = (s1 + (1 << 20)) >> 21; + s2 += carry1; + s1 -= carry1 << 21; + carry3 = (s3 + (1 << 20)) >> 21; + s4 += carry3; + s3 -= carry3 << 21; + carry5 = (s5 + (1 << 20)) >> 21; + s6 += carry5; + s5 -= carry5 << 21; + carry7 = (s7 + (1 << 20)) >> 21; + s8 += carry7; + s7 -= carry7 << 21; + carry9 = (s9 + (1 << 20)) >> 21; + s10 += carry9; + s9 -= carry9 << 21; + carry11 = (s11 + (1 << 20)) >> 21; + s12 += carry11; + s11 -= carry11 << 21; + + s0 += s12 * 666643; + s1 += s12 * 470296; + s2 += s12 * 654183; + s3 -= s12 * 997805; + s4 += s12 * 136657; + s5 -= s12 * 683901; + s12 = 0; + + carry0 = s0 >> 21; + s1 += carry0; + s0 -= carry0 << 21; + carry1 = s1 >> 21; + s2 += carry1; + s1 -= carry1 << 21; + carry2 = s2 >> 21; + s3 += carry2; + s2 -= carry2 << 21; + carry3 = s3 >> 21; + s4 += carry3; + s3 -= carry3 << 21; + carry4 = s4 >> 21; + s5 += carry4; + s4 -= carry4 << 21; + carry5 = s5 >> 21; + s6 += carry5; + s5 -= carry5 << 21; + carry6 = s6 >> 21; + s7 += carry6; + s6 -= carry6 << 21; + carry7 = s7 >> 21; + s8 += carry7; + s7 -= carry7 << 21; + carry8 = s8 >> 21; + s9 += carry8; + s8 -= carry8 << 21; + carry9 = s9 >> 21; + s10 += carry9; + s9 -= carry9 << 21; + carry10 = s10 >> 21; + s11 += carry10; + s10 -= carry10 << 21; + carry11 = s11 >> 21; + s12 += carry11; + s11 -= carry11 << 21; + + s0 += s12 * 666643; + s1 += s12 * 470296; + s2 += s12 * 654183; + s3 -= s12 * 997805; + s4 += s12 * 136657; + s5 -= s12 * 683901; + s12 = 0; + + carry0 = s0 >> 21; + s1 += carry0; + s0 -= carry0 << 21; + carry1 = s1 >> 21; + s2 += carry1; + s1 -= carry1 << 21; + carry2 = s2 >> 21; + s3 += carry2; + s2 -= carry2 << 21; + carry3 = s3 >> 21; + s4 += carry3; + s3 -= carry3 << 21; + carry4 = s4 >> 21; + s5 += carry4; + s4 -= carry4 << 21; + carry5 = s5 >> 21; + s6 += carry5; + s5 -= carry5 << 21; + carry6 = s6 >> 21; + s7 += carry6; + s6 -= carry6 << 21; + carry7 = s7 >> 21; + s8 += carry7; + s7 -= carry7 << 21; + carry8 = s8 >> 21; + s9 += carry8; + s8 -= carry8 << 21; + carry9 = s9 >> 21; + s10 += carry9; + s9 -= carry9 << 21; + carry10 = s10 >> 21; + s11 += carry10; + s10 -= carry10 << 21; + + s[0] = s0 >> 0; + s[1] = s0 >> 8; + s[2] = (s0 >> 16) | (s1 << 5); + s[3] = s1 >> 3; + s[4] = s1 >> 11; + s[5] = (s1 >> 19) | (s2 << 2); + s[6] = s2 >> 6; + s[7] = (s2 >> 14) | (s3 << 7); + s[8] = s3 >> 1; + s[9] = s3 >> 9; + s[10] = (s3 >> 17) | (s4 << 4); + s[11] = s4 >> 4; + s[12] = s4 >> 12; + s[13] = (s4 >> 20) | (s5 << 1); + s[14] = s5 >> 7; + s[15] = (s5 >> 15) | (s6 << 6); + s[16] = s6 >> 2; + s[17] = s6 >> 10; + s[18] = (s6 >> 18) | (s7 << 3); + s[19] = s7 >> 5; + s[20] = s7 >> 13; + s[21] = s8 >> 0; + s[22] = s8 >> 8; + s[23] = (s8 >> 16) | (s9 << 5); + s[24] = s9 >> 3; + s[25] = s9 >> 11; + s[26] = (s9 >> 19) | (s10 << 2); + s[27] = s10 >> 6; + s[28] = (s10 >> 14) | (s11 << 7); + s[29] = s11 >> 1; + s[30] = s11 >> 9; + s[31] = s11 >> 17; +} +#endif + +#ifdef ED25519 +void ED25519_keypair(uint8_t out_public_key[32], uint8_t out_private_key[64]) { + uint8_t seed[32]; + arc4random_buf(seed, 32); + + uint8_t az[SHA512_DIGEST_LENGTH]; + SHA512(seed, 32, az); + + az[0] &= 248; + az[31] &= 63; + az[31] |= 64; + + ge_p3 A; + x25519_ge_scalarmult_base(&A, az); + ge_p3_tobytes(out_public_key, &A); + + memcpy(out_private_key, seed, 32); + memmove(out_private_key + 32, out_public_key, 32); +} + +int ED25519_sign(uint8_t *out_sig, const uint8_t *message, size_t message_len, + const uint8_t private_key[64]) { + uint8_t az[SHA512_DIGEST_LENGTH]; + SHA512(private_key, 32, az); + + az[0] &= 248; + az[31] &= 63; + az[31] |= 64; + + SHA512_CTX hash_ctx; + SHA512_Init(&hash_ctx); + SHA512_Update(&hash_ctx, az + 32, 32); + SHA512_Update(&hash_ctx, message, message_len); + uint8_t nonce[SHA512_DIGEST_LENGTH]; + SHA512_Final(nonce, &hash_ctx); + + x25519_sc_reduce(nonce); + ge_p3 R; + x25519_ge_scalarmult_base(&R, nonce); + ge_p3_tobytes(out_sig, &R); + + SHA512_Init(&hash_ctx); + SHA512_Update(&hash_ctx, out_sig, 32); + SHA512_Update(&hash_ctx, private_key + 32, 32); + SHA512_Update(&hash_ctx, message, message_len); + uint8_t hram[SHA512_DIGEST_LENGTH]; + SHA512_Final(hram, &hash_ctx); + + x25519_sc_reduce(hram); + sc_muladd(out_sig + 32, hram, az, nonce); + + return 1; +} + +int ED25519_verify(const uint8_t *message, size_t message_len, + const uint8_t signature[64], const uint8_t public_key[32]) { + ge_p3 A; + if ((signature[63] & 224) != 0 || + x25519_ge_frombytes_vartime(&A, public_key) != 0) { + return 0; + } + + fe_neg(A.X, A.X); + fe_neg(A.T, A.T); + + uint8_t pkcopy[32]; + memcpy(pkcopy, public_key, 32); + uint8_t rcopy[32]; + memcpy(rcopy, signature, 32); + uint8_t scopy[32]; + memcpy(scopy, signature + 32, 32); + + SHA512_CTX hash_ctx; + SHA512_Init(&hash_ctx); + SHA512_Update(&hash_ctx, signature, 32); + SHA512_Update(&hash_ctx, public_key, 32); + SHA512_Update(&hash_ctx, message, message_len); + uint8_t h[SHA512_DIGEST_LENGTH]; + SHA512_Final(h, &hash_ctx); + + x25519_sc_reduce(h); + + ge_p2 R; + ge_double_scalarmult_vartime(&R, h, &A, scopy); + + uint8_t rcheck[32]; + x25519_ge_tobytes(rcheck, &R); + + return timingsafe_memcmp(rcheck, rcopy, sizeof(rcheck)) == 0; +} +#endif + +/* Replace (f,g) with (g,f) if b == 1; + * replace (f,g) with (f,g) if b == 0. + * + * Preconditions: b in {0,1}. */ +static void fe_cswap(fe f, fe g, unsigned int b) { + b = 0-b; + unsigned i; + for (i = 0; i < 10; i++) { + int32_t x = f[i] ^ g[i]; + x &= b; + f[i] ^= x; + g[i] ^= x; + } +} + +/* h = f * 121666 + * Can overlap h with f. + * + * Preconditions: + * |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. + * + * Postconditions: + * |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. */ +static void fe_mul121666(fe h, fe f) { + int32_t f0 = f[0]; + int32_t f1 = f[1]; + int32_t f2 = f[2]; + int32_t f3 = f[3]; + int32_t f4 = f[4]; + int32_t f5 = f[5]; + int32_t f6 = f[6]; + int32_t f7 = f[7]; + int32_t f8 = f[8]; + int32_t f9 = f[9]; + int64_t h0 = f0 * (int64_t) 121666; + int64_t h1 = f1 * (int64_t) 121666; + int64_t h2 = f2 * (int64_t) 121666; + int64_t h3 = f3 * (int64_t) 121666; + int64_t h4 = f4 * (int64_t) 121666; + int64_t h5 = f5 * (int64_t) 121666; + int64_t h6 = f6 * (int64_t) 121666; + int64_t h7 = f7 * (int64_t) 121666; + int64_t h8 = f8 * (int64_t) 121666; + int64_t h9 = f9 * (int64_t) 121666; + int64_t carry0; + int64_t carry1; + int64_t carry2; + int64_t carry3; + int64_t carry4; + int64_t carry5; + int64_t carry6; + int64_t carry7; + int64_t carry8; + int64_t carry9; + + carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits; + carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits; + carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits; + carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits; + carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits; + + carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits; + carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits; + carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits; + carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits; + carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits; + + h[0] = h0; + h[1] = h1; + h[2] = h2; + h[3] = h3; + h[4] = h4; + h[5] = h5; + h[6] = h6; + h[7] = h7; + h[8] = h8; + h[9] = h9; +} + +void +x25519_scalar_mult_generic(uint8_t out[32], const uint8_t scalar[32], + const uint8_t point[32]) { + fe x1, x2, z2, x3, z3, tmp0, tmp1; + + uint8_t e[32]; + memcpy(e, scalar, 32); + e[0] &= 248; + e[31] &= 127; + e[31] |= 64; + fe_frombytes(x1, point); + fe_1(x2); + fe_0(z2); + fe_copy(x3, x1); + fe_1(z3); + + unsigned swap = 0; + int pos; + for (pos = 254; pos >= 0; --pos) { + unsigned b = 1 & (e[pos / 8] >> (pos & 7)); + swap ^= b; + fe_cswap(x2, x3, swap); + fe_cswap(z2, z3, swap); + swap = b; + fe_sub(tmp0, x3, z3); + fe_sub(tmp1, x2, z2); + fe_add(x2, x2, z2); + fe_add(z2, x3, z3); + fe_mul(z3, tmp0, x2); + fe_mul(z2, z2, tmp1); + fe_sq(tmp0, tmp1); + fe_sq(tmp1, x2); + fe_add(x3, z3, z2); + fe_sub(z2, z3, z2); + fe_mul(x2, tmp1, tmp0); + fe_sub(tmp1, tmp1, tmp0); + fe_sq(z2, z2); + fe_mul121666(z3, tmp1); + fe_sq(x3, x3); + fe_add(tmp0, tmp0, z3); + fe_mul(z3, x1, z2); + fe_mul(z2, tmp1, tmp0); + } + fe_cswap(x2, x3, swap); + fe_cswap(z2, z3, swap); + + fe_invert(z2, z2); + fe_mul(x2, x2, z2); + fe_tobytes(out, x2); +} + +#ifdef unused +void +x25519_public_from_private_generic(uint8_t out_public_value[32], + const uint8_t private_key[32]) +{ + uint8_t e[32]; + + memcpy(e, private_key, 32); + e[0] &= 248; + e[31] &= 127; + e[31] |= 64; + + ge_p3 A; + x25519_ge_scalarmult_base(&A, e); + + /* We only need the u-coordinate of the curve25519 point. The map is + * u=(y+1)/(1-y). Since y=Y/Z, this gives u=(Z+Y)/(Z-Y). */ + fe zplusy, zminusy, zminusy_inv; + fe_add(zplusy, A.Z, A.Y); + fe_sub(zminusy, A.Z, A.Y); + fe_invert(zminusy_inv, zminusy); + fe_mul(zplusy, zplusy, zminusy_inv); + fe_tobytes(out_public_value, zplusy); +} +#endif + +void +x25519_public_from_private(uint8_t out_public_value[32], + const uint8_t private_key[32]) +{ + static const uint8_t kMongomeryBasePoint[32] = {9}; + + x25519_scalar_mult(out_public_value, private_key, kMongomeryBasePoint); +} + +void +X25519_keypair(uint8_t out_public_value[X25519_KEY_LENGTH], + uint8_t out_private_key[X25519_KEY_LENGTH]) +{ + /* All X25519 implementations should decode scalars correctly (see + * https://tools.ietf.org/html/rfc7748#section-5). However, if an + * implementation doesn't then it might interoperate with random keys a + * fraction of the time because they'll, randomly, happen to be correctly + * formed. + * + * Thus we do the opposite of the masking here to make sure that our private + * keys are never correctly masked and so, hopefully, any incorrect + * implementations are deterministically broken. + * + * This does not affect security because, although we're throwing away + * entropy, a valid implementation of scalarmult should throw away the exact + * same bits anyway. */ + arc4random_buf(out_private_key, 32); + + out_private_key[0] |= 7; + out_private_key[31] &= 63; + out_private_key[31] |= 128; + + x25519_public_from_private(out_public_value, out_private_key); +} + +int +X25519(uint8_t out_shared_key[X25519_KEY_LENGTH], + const uint8_t private_key[X25519_KEY_LENGTH], + const uint8_t peer_public_value[X25519_KEY_LENGTH]) +{ + static const uint8_t kZeros[32] = {0}; + + x25519_scalar_mult(out_shared_key, private_key, peer_public_value); + + /* The all-zero output results when the input is a point of small order. */ + return timingsafe_memcmp(kZeros, out_shared_key, 32) != 0; +} diff --git a/ext/libressl/crypto/curve25519/curve25519_internal.h b/ext/libressl/crypto/curve25519/curve25519_internal.h new file mode 100644 index 0000000..09d20a4 --- /dev/null +++ b/ext/libressl/crypto/curve25519/curve25519_internal.h @@ -0,0 +1,99 @@ +/* $OpenBSD: curve25519_internal.h,v 1.3 2019/05/11 15:55:52 tb Exp $ */ +/* + * Copyright (c) 2015, Google Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef HEADER_CURVE25519_INTERNAL_H +#define HEADER_CURVE25519_INTERNAL_H + +#include + +__BEGIN_HIDDEN_DECLS + +/* fe means field element. Here the field is \Z/(2^255-19). An element t, + * entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77 + * t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on + * context. */ +typedef int32_t fe[10]; + +/* ge means group element. + + * Here the group is the set of pairs (x,y) of field elements (see fe.h) + * satisfying -x^2 + y^2 = 1 + d x^2y^2 + * where d = -121665/121666. + * + * Representations: + * ge_p2 (projective): (X:Y:Z) satisfying x=X/Z, y=Y/Z + * ge_p3 (extended): (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT + * ge_p1p1 (completed): ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T + * ge_precomp (Duif): (y+x,y-x,2dxy) */ + +typedef struct { + fe X; + fe Y; + fe Z; +} ge_p2; + +typedef struct { + fe X; + fe Y; + fe Z; + fe T; +} ge_p3; + +typedef struct { + fe X; + fe Y; + fe Z; + fe T; +} ge_p1p1; + +typedef struct { + fe yplusx; + fe yminusx; + fe xy2d; +} ge_precomp; + +typedef struct { + fe YplusX; + fe YminusX; + fe Z; + fe T2d; +} ge_cached; + +void x25519_ge_tobytes(uint8_t *s, const ge_p2 *h); +int x25519_ge_frombytes_vartime(ge_p3 *h, const uint8_t *s); +void x25519_ge_p3_to_cached(ge_cached *r, const ge_p3 *p); +void x25519_ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p); +void x25519_ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p); +void x25519_ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q); +void x25519_ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q); +void x25519_ge_scalarmult_small_precomp(ge_p3 *h, const uint8_t a[32], + const uint8_t precomp_table[15 * 2 * 32]); +void x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t a[32]); +void x25519_ge_scalarmult(ge_p2 *r, const uint8_t *scalar, const ge_p3 *A); +void x25519_sc_reduce(uint8_t *s); + +void x25519_public_from_private(uint8_t out_public_value[32], + const uint8_t private_key[32]); + +void x25519_scalar_mult(uint8_t out[32], const uint8_t scalar[32], + const uint8_t point[32]); +void x25519_scalar_mult_generic(uint8_t out[32], const uint8_t scalar[32], + const uint8_t point[32]); + +__END_HIDDEN_DECLS + +#endif /* HEADER_CURVE25519_INTERNAL_H */ diff --git a/ext/libressl/crypto/modes/Makefile b/ext/libressl/crypto/modes/Makefile new file mode 100644 index 0000000..aeba042 --- /dev/null +++ b/ext/libressl/crypto/modes/Makefile @@ -0,0 +1,14 @@ +include ../../ssl_common.mk +CFLAGS += -D__BEGIN_HIDDEN_DECLS= -D__END_HIDDEN_DECLS= + +obj = cbc128.o ccm128.o cfb128.o ctr128.o cts128.o gcm128.o ofb128.o xts128.o + + +all: $(obj) +dep: all + +%.o: %.c + $(CC) $(CFLAGS) -c $< + +clean: + rm -f *.o *.a diff --git a/ext/libressl/crypto/modes/cbc128.c b/ext/libressl/crypto/modes/cbc128.c new file mode 100644 index 0000000..7502a48 --- /dev/null +++ b/ext/libressl/crypto/modes/cbc128.c @@ -0,0 +1,202 @@ +/* $OpenBSD: cbc128.c,v 1.4 2015/02/10 09:46:30 miod Exp $ */ +/* ==================================================================== + * Copyright (c) 2008 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + */ + +#include +#include "modes_lcl.h" +#include + +#ifndef MODES_DEBUG +# ifndef NDEBUG +# define NDEBUG +# endif +#endif + +#undef STRICT_ALIGNMENT +#ifdef __STRICT_ALIGNMENT +#define STRICT_ALIGNMENT 1 +#else +#define STRICT_ALIGNMENT 0 +#endif + +void CRYPTO_cbc128_encrypt(const unsigned char *in, unsigned char *out, + size_t len, const void *key, + unsigned char ivec[16], block128_f block) +{ + size_t n; + const unsigned char *iv = ivec; + +#if !defined(OPENSSL_SMALL_FOOTPRINT) + if (STRICT_ALIGNMENT && + ((size_t)in|(size_t)out|(size_t)ivec)%sizeof(size_t) != 0) { + while (len>=16) { + for(n=0; n<16; ++n) + out[n] = in[n] ^ iv[n]; + (*block)(out, out, key); + iv = out; + len -= 16; + in += 16; + out += 16; + } + } else { + while (len>=16) { + for(n=0; n<16; n+=sizeof(size_t)) + *(size_t*)(out+n) = + *(size_t*)(in+n) ^ *(size_t*)(iv+n); + (*block)(out, out, key); + iv = out; + len -= 16; + in += 16; + out += 16; + } + } +#endif + while (len) { + for(n=0; n<16 && n=16) { + (*block)(in, out, key); + for(n=0; n<16; ++n) + out[n] ^= iv[n]; + iv = in; + len -= 16; + in += 16; + out += 16; + } + } else if (16%sizeof(size_t) == 0) { /* always true */ + while (len>=16) { + size_t *out_t=(size_t *)out, *iv_t=(size_t *)iv; + + (*block)(in, out, key); + for(n=0; n<16/sizeof(size_t); n++) + out_t[n] ^= iv_t[n]; + iv = in; + len -= 16; + in += 16; + out += 16; + } + } + memcpy(ivec,iv,16); + } else { + if (STRICT_ALIGNMENT && + ((size_t)in|(size_t)out|(size_t)ivec)%sizeof(size_t) != 0) { + unsigned char c; + while (len>=16) { + (*block)(in, tmp.c, key); + for(n=0; n<16; ++n) { + c = in[n]; + out[n] = tmp.c[n] ^ ivec[n]; + ivec[n] = c; + } + len -= 16; + in += 16; + out += 16; + } + } else if (16%sizeof(size_t) == 0) { /* always true */ + while (len>=16) { + size_t c, *out_t=(size_t *)out, *ivec_t=(size_t *)ivec; + const size_t *in_t=(const size_t *)in; + + (*block)(in, tmp.c, key); + for(n=0; n<16/sizeof(size_t); n++) { + c = in_t[n]; + out_t[n] = tmp.t[n] ^ ivec_t[n]; + ivec_t[n] = c; + } + len -= 16; + in += 16; + out += 16; + } + } + } +#endif + while (len) { + unsigned char c; + (*block)(in, tmp.c, key); + for(n=0; n<16 && n +#include "modes_lcl.h" +#include + +#ifndef MODES_DEBUG +# ifndef NDEBUG +# define NDEBUG +# endif +#endif + +/* First you setup M and L parameters and pass the key schedule. + * This is called once per session setup... */ +void CRYPTO_ccm128_init(CCM128_CONTEXT *ctx, + unsigned int M,unsigned int L,void *key,block128_f block) +{ + memset(ctx->nonce.c,0,sizeof(ctx->nonce.c)); + ctx->nonce.c[0] = ((u8)(L-1)&7) | (u8)(((M-2)/2)&7)<<3; + ctx->blocks = 0; + ctx->block = block; + ctx->key = key; +} + +/* !!! Following interfaces are to be called *once* per packet !!! */ + +/* Then you setup per-message nonce and pass the length of the message */ +int CRYPTO_ccm128_setiv(CCM128_CONTEXT *ctx, + const unsigned char *nonce,size_t nlen,size_t mlen) +{ + unsigned int L = ctx->nonce.c[0]&7; /* the L parameter */ + + if (nlen<(14-L)) return -1; /* nonce is too short */ + + if (sizeof(mlen)==8 && L>=3) { + ctx->nonce.c[8] = (u8)(mlen>>(56%(sizeof(mlen)*8))); + ctx->nonce.c[9] = (u8)(mlen>>(48%(sizeof(mlen)*8))); + ctx->nonce.c[10] = (u8)(mlen>>(40%(sizeof(mlen)*8))); + ctx->nonce.c[11] = (u8)(mlen>>(32%(sizeof(mlen)*8))); + } + else + ctx->nonce.u[1] = 0; + + ctx->nonce.c[12] = (u8)(mlen>>24); + ctx->nonce.c[13] = (u8)(mlen>>16); + ctx->nonce.c[14] = (u8)(mlen>>8); + ctx->nonce.c[15] = (u8)mlen; + + ctx->nonce.c[0] &= ~0x40; /* clear Adata flag */ + memcpy(&ctx->nonce.c[1],nonce,14-L); + + return 0; +} + +/* Then you pass additional authentication data, this is optional */ +void CRYPTO_ccm128_aad(CCM128_CONTEXT *ctx, + const unsigned char *aad,size_t alen) +{ unsigned int i; + block128_f block = ctx->block; + + if (alen==0) return; + + ctx->nonce.c[0] |= 0x40; /* set Adata flag */ + (*block)(ctx->nonce.c,ctx->cmac.c,ctx->key), + ctx->blocks++; + + if (alen<(0x10000-0x100)) { + ctx->cmac.c[0] ^= (u8)(alen>>8); + ctx->cmac.c[1] ^= (u8)alen; + i=2; + } + else if (sizeof(alen)==8 && alen>=(size_t)1<<(32%(sizeof(alen)*8))) { + ctx->cmac.c[0] ^= 0xFF; + ctx->cmac.c[1] ^= 0xFF; + ctx->cmac.c[2] ^= (u8)(alen>>(56%(sizeof(alen)*8))); + ctx->cmac.c[3] ^= (u8)(alen>>(48%(sizeof(alen)*8))); + ctx->cmac.c[4] ^= (u8)(alen>>(40%(sizeof(alen)*8))); + ctx->cmac.c[5] ^= (u8)(alen>>(32%(sizeof(alen)*8))); + ctx->cmac.c[6] ^= (u8)(alen>>24); + ctx->cmac.c[7] ^= (u8)(alen>>16); + ctx->cmac.c[8] ^= (u8)(alen>>8); + ctx->cmac.c[9] ^= (u8)alen; + i=10; + } + else { + ctx->cmac.c[0] ^= 0xFF; + ctx->cmac.c[1] ^= 0xFE; + ctx->cmac.c[2] ^= (u8)(alen>>24); + ctx->cmac.c[3] ^= (u8)(alen>>16); + ctx->cmac.c[4] ^= (u8)(alen>>8); + ctx->cmac.c[5] ^= (u8)alen; + i=6; + } + + do { + for(;i<16 && alen;++i,++aad,--alen) + ctx->cmac.c[i] ^= *aad; + (*block)(ctx->cmac.c,ctx->cmac.c,ctx->key), + ctx->blocks++; + i=0; + } while (alen); +} + +/* Finally you encrypt or decrypt the message */ + +/* counter part of nonce may not be larger than L*8 bits, + * L is not larger than 8, therefore 64-bit counter... */ +static void ctr64_inc(unsigned char *counter) { + unsigned int n=8; + u8 c; + + counter += 8; + do { + --n; + c = counter[n]; + ++c; + counter[n] = c; + if (c) return; + } while (n); +} + +int CRYPTO_ccm128_encrypt(CCM128_CONTEXT *ctx, + const unsigned char *inp, unsigned char *out, + size_t len) +{ + size_t n; + unsigned int i,L; + unsigned char flags0 = ctx->nonce.c[0]; + block128_f block = ctx->block; + void * key = ctx->key; + union { u64 u[2]; u8 c[16]; } scratch; + + if (!(flags0&0x40)) + (*block)(ctx->nonce.c,ctx->cmac.c,key), + ctx->blocks++; + + ctx->nonce.c[0] = L = flags0&7; + for (n=0,i=15-L;i<15;++i) { + n |= ctx->nonce.c[i]; + ctx->nonce.c[i]=0; + n <<= 8; + } + n |= ctx->nonce.c[15]; /* reconstructed length */ + ctx->nonce.c[15]=1; + + if (n!=len) return -1; /* length mismatch */ + + ctx->blocks += ((len+15)>>3)|1; + if (ctx->blocks > (U64(1)<<61)) return -2; /* too much data */ + + while (len>=16) { +#ifdef __STRICT_ALIGNMENT + union { u64 u[2]; u8 c[16]; } temp; + + memcpy (temp.c,inp,16); + ctx->cmac.u[0] ^= temp.u[0]; + ctx->cmac.u[1] ^= temp.u[1]; +#else + ctx->cmac.u[0] ^= ((u64*)inp)[0]; + ctx->cmac.u[1] ^= ((u64*)inp)[1]; +#endif + (*block)(ctx->cmac.c,ctx->cmac.c,key); + (*block)(ctx->nonce.c,scratch.c,key); + ctr64_inc(ctx->nonce.c); +#ifdef __STRICT_ALIGNMENT + temp.u[0] ^= scratch.u[0]; + temp.u[1] ^= scratch.u[1]; + memcpy(out,temp.c,16); +#else + ((u64*)out)[0] = scratch.u[0]^((u64*)inp)[0]; + ((u64*)out)[1] = scratch.u[1]^((u64*)inp)[1]; +#endif + inp += 16; + out += 16; + len -= 16; + } + + if (len) { + for (i=0; icmac.c[i] ^= inp[i]; + (*block)(ctx->cmac.c,ctx->cmac.c,key); + (*block)(ctx->nonce.c,scratch.c,key); + for (i=0; inonce.c[i]=0; + + (*block)(ctx->nonce.c,scratch.c,key); + ctx->cmac.u[0] ^= scratch.u[0]; + ctx->cmac.u[1] ^= scratch.u[1]; + + ctx->nonce.c[0] = flags0; + + return 0; +} + +int CRYPTO_ccm128_decrypt(CCM128_CONTEXT *ctx, + const unsigned char *inp, unsigned char *out, + size_t len) +{ + size_t n; + unsigned int i,L; + unsigned char flags0 = ctx->nonce.c[0]; + block128_f block = ctx->block; + void * key = ctx->key; + union { u64 u[2]; u8 c[16]; } scratch; + + if (!(flags0&0x40)) + (*block)(ctx->nonce.c,ctx->cmac.c,key); + + ctx->nonce.c[0] = L = flags0&7; + for (n=0,i=15-L;i<15;++i) { + n |= ctx->nonce.c[i]; + ctx->nonce.c[i]=0; + n <<= 8; + } + n |= ctx->nonce.c[15]; /* reconstructed length */ + ctx->nonce.c[15]=1; + + if (n!=len) return -1; + + while (len>=16) { +#ifdef __STRICT_ALIGNMENT + union { u64 u[2]; u8 c[16]; } temp; +#endif + (*block)(ctx->nonce.c,scratch.c,key); + ctr64_inc(ctx->nonce.c); +#ifdef __STRICT_ALIGNMENT + memcpy (temp.c,inp,16); + ctx->cmac.u[0] ^= (scratch.u[0] ^= temp.u[0]); + ctx->cmac.u[1] ^= (scratch.u[1] ^= temp.u[1]); + memcpy (out,scratch.c,16); +#else + ctx->cmac.u[0] ^= (((u64*)out)[0] = scratch.u[0]^((u64*)inp)[0]); + ctx->cmac.u[1] ^= (((u64*)out)[1] = scratch.u[1]^((u64*)inp)[1]); +#endif + (*block)(ctx->cmac.c,ctx->cmac.c,key); + + inp += 16; + out += 16; + len -= 16; + } + + if (len) { + (*block)(ctx->nonce.c,scratch.c,key); + for (i=0; icmac.c[i] ^= (out[i] = scratch.c[i]^inp[i]); + (*block)(ctx->cmac.c,ctx->cmac.c,key); + } + + for (i=15-L;i<16;++i) + ctx->nonce.c[i]=0; + + (*block)(ctx->nonce.c,scratch.c,key); + ctx->cmac.u[0] ^= scratch.u[0]; + ctx->cmac.u[1] ^= scratch.u[1]; + + ctx->nonce.c[0] = flags0; + + return 0; +} + +static void ctr64_add (unsigned char *counter,size_t inc) +{ size_t n=8, val=0; + + counter += 8; + do { + --n; + val += counter[n] + (inc&0xff); + counter[n] = (unsigned char)val; + val >>= 8; /* carry bit */ + inc >>= 8; + } while(n && (inc || val)); +} + +int CRYPTO_ccm128_encrypt_ccm64(CCM128_CONTEXT *ctx, + const unsigned char *inp, unsigned char *out, + size_t len,ccm128_f stream) +{ + size_t n; + unsigned int i,L; + unsigned char flags0 = ctx->nonce.c[0]; + block128_f block = ctx->block; + void * key = ctx->key; + union { u64 u[2]; u8 c[16]; } scratch; + + if (!(flags0&0x40)) + (*block)(ctx->nonce.c,ctx->cmac.c,key), + ctx->blocks++; + + ctx->nonce.c[0] = L = flags0&7; + for (n=0,i=15-L;i<15;++i) { + n |= ctx->nonce.c[i]; + ctx->nonce.c[i]=0; + n <<= 8; + } + n |= ctx->nonce.c[15]; /* reconstructed length */ + ctx->nonce.c[15]=1; + + if (n!=len) return -1; /* length mismatch */ + + ctx->blocks += ((len+15)>>3)|1; + if (ctx->blocks > (U64(1)<<61)) return -2; /* too much data */ + + if ((n=len/16)) { + (*stream)(inp,out,n,key,ctx->nonce.c,ctx->cmac.c); + n *= 16; + inp += n; + out += n; + len -= n; + if (len) ctr64_add(ctx->nonce.c,n/16); + } + + if (len) { + for (i=0; icmac.c[i] ^= inp[i]; + (*block)(ctx->cmac.c,ctx->cmac.c,key); + (*block)(ctx->nonce.c,scratch.c,key); + for (i=0; inonce.c[i]=0; + + (*block)(ctx->nonce.c,scratch.c,key); + ctx->cmac.u[0] ^= scratch.u[0]; + ctx->cmac.u[1] ^= scratch.u[1]; + + ctx->nonce.c[0] = flags0; + + return 0; +} + +int CRYPTO_ccm128_decrypt_ccm64(CCM128_CONTEXT *ctx, + const unsigned char *inp, unsigned char *out, + size_t len,ccm128_f stream) +{ + size_t n; + unsigned int i,L; + unsigned char flags0 = ctx->nonce.c[0]; + block128_f block = ctx->block; + void * key = ctx->key; + union { u64 u[2]; u8 c[16]; } scratch; + + if (!(flags0&0x40)) + (*block)(ctx->nonce.c,ctx->cmac.c,key); + + ctx->nonce.c[0] = L = flags0&7; + for (n=0,i=15-L;i<15;++i) { + n |= ctx->nonce.c[i]; + ctx->nonce.c[i]=0; + n <<= 8; + } + n |= ctx->nonce.c[15]; /* reconstructed length */ + ctx->nonce.c[15]=1; + + if (n!=len) return -1; + + if ((n=len/16)) { + (*stream)(inp,out,n,key,ctx->nonce.c,ctx->cmac.c); + n *= 16; + inp += n; + out += n; + len -= n; + if (len) ctr64_add(ctx->nonce.c,n/16); + } + + if (len) { + (*block)(ctx->nonce.c,scratch.c,key); + for (i=0; icmac.c[i] ^= (out[i] = scratch.c[i]^inp[i]); + (*block)(ctx->cmac.c,ctx->cmac.c,key); + } + + for (i=15-L;i<16;++i) + ctx->nonce.c[i]=0; + + (*block)(ctx->nonce.c,scratch.c,key); + ctx->cmac.u[0] ^= scratch.u[0]; + ctx->cmac.u[1] ^= scratch.u[1]; + + ctx->nonce.c[0] = flags0; + + return 0; +} + +size_t CRYPTO_ccm128_tag(CCM128_CONTEXT *ctx,unsigned char *tag,size_t len) +{ unsigned int M = (ctx->nonce.c[0]>>3)&7; /* the M parameter */ + + M *= 2; M += 2; + if (len != M) return 0; + memcpy(tag,ctx->cmac.c,M); + return M; +} diff --git a/ext/libressl/crypto/modes/cfb128.c b/ext/libressl/crypto/modes/cfb128.c new file mode 100644 index 0000000..88bfbc4 --- /dev/null +++ b/ext/libressl/crypto/modes/cfb128.c @@ -0,0 +1,234 @@ +/* $OpenBSD: cfb128.c,v 1.4 2015/02/10 09:46:30 miod Exp $ */ +/* ==================================================================== + * Copyright (c) 2008 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + */ + +#include +#include "modes_lcl.h" +#include + +#ifndef MODES_DEBUG +# ifndef NDEBUG +# define NDEBUG +# endif +#endif + +/* The input and output encrypted as though 128bit cfb mode is being + * used. The extra state information to record how much of the + * 128bit block we have used is contained in *num; + */ +void CRYPTO_cfb128_encrypt(const unsigned char *in, unsigned char *out, + size_t len, const void *key, + unsigned char ivec[16], int *num, + int enc, block128_f block) +{ + unsigned int n; + size_t l = 0; + + n = *num; + + if (enc) { +#if !defined(OPENSSL_SMALL_FOOTPRINT) + if (16%sizeof(size_t) == 0) do { /* always true actually */ + while (n && len) { + *(out++) = ivec[n] ^= *(in++); + --len; + n = (n+1) % 16; + } +#ifdef __STRICT_ALIGNMENT + if (((size_t)in|(size_t)out|(size_t)ivec)%sizeof(size_t) != 0) + break; +#endif + while (len>=16) { + (*block)(ivec, ivec, key); + for (; n<16; n+=sizeof(size_t)) { + *(size_t*)(out+n) = + *(size_t*)(ivec+n) ^= *(size_t*)(in+n); + } + len -= 16; + out += 16; + in += 16; + n = 0; + } + if (len) { + (*block)(ivec, ivec, key); + while (len--) { + out[n] = ivec[n] ^= in[n]; + ++n; + } + } + *num = n; + return; + } while (0); + /* the rest would be commonly eliminated by x86* compiler */ +#endif + while (l=16) { + (*block)(ivec, ivec, key); + for (; n<16; n+=sizeof(size_t)) { + size_t t = *(size_t*)(in+n); + *(size_t*)(out+n) = *(size_t*)(ivec+n) ^ t; + *(size_t*)(ivec+n) = t; + } + len -= 16; + out += 16; + in += 16; + n = 0; + } + if (len) { + (*block)(ivec, ivec, key); + while (len--) { + unsigned char c; + out[n] = ivec[n] ^ (c = in[n]); ivec[n] = c; + ++n; + } + } + *num = n; + return; + } while (0); + /* the rest would be commonly eliminated by x86* compiler */ +#endif + while (l128) return; + + /* fill in the first half of the new IV with the current IV */ + memcpy(ovec,ivec,16); + /* construct the new IV */ + (*block)(ivec,ivec,key); + num = (nbits+7)/8; + if (enc) /* encrypt the input */ + for(n=0 ; n < num ; ++n) + out[n] = (ovec[16+n] = in[n] ^ ivec[n]); + else /* decrypt the input */ + for(n=0 ; n < num ; ++n) + out[n] = (ovec[16+n] = in[n]) ^ ivec[n]; + /* shift ovec left... */ + rem = nbits%8; + num = nbits/8; + if(rem==0) + memcpy(ivec,ovec+num,16); + else + for(n=0 ; n < 16 ; ++n) + ivec[n] = ovec[n+num]<>(8-rem); + + /* it is not necessary to cleanse ovec, since the IV is not secret */ +} + +/* N.B. This expects the input to be packed, MS bit first */ +void CRYPTO_cfb128_1_encrypt(const unsigned char *in, unsigned char *out, + size_t bits, const void *key, + unsigned char ivec[16], int *num, + int enc, block128_f block) +{ + size_t n; + unsigned char c[1],d[1]; + + for(n=0 ; n> (unsigned int)(n%8)); + } +} + +void CRYPTO_cfb128_8_encrypt(const unsigned char *in, unsigned char *out, + size_t length, const void *key, + unsigned char ivec[16], int *num, + int enc, block128_f block) +{ + size_t n; + + for(n=0 ; n +#include "modes_lcl.h" +#include + +#ifndef MODES_DEBUG +# ifndef NDEBUG +# define NDEBUG +# endif +#endif +#include + +/* NOTE: the IV/counter CTR mode is big-endian. The code itself + * is endian-neutral. */ + +/* increment counter (128-bit int) by 1 */ +static void ctr128_inc(unsigned char *counter) { + u32 n=16; + u8 c; + + do { + --n; + c = counter[n]; + ++c; + counter[n] = c; + if (c) return; + } while (n); +} + +#if !defined(OPENSSL_SMALL_FOOTPRINT) +static void +ctr128_inc_aligned(unsigned char *counter) +{ +#if BYTE_ORDER == LITTLE_ENDIAN + ctr128_inc(counter); +#else + size_t *data, c, n; + data = (size_t *)counter; + n = 16 / sizeof(size_t); + do { + --n; + c = data[n]; + ++c; + data[n] = c; + if (c) + return; + } while (n); +#endif +} +#endif + +/* The input encrypted as though 128bit counter mode is being + * used. The extra state information to record how much of the + * 128bit block we have used is contained in *num, and the + * encrypted counter is kept in ecount_buf. Both *num and + * ecount_buf must be initialised with zeros before the first + * call to CRYPTO_ctr128_encrypt(). + * + * This algorithm assumes that the counter is in the x lower bits + * of the IV (ivec), and that the application has full control over + * overflow and the rest of the IV. This implementation takes NO + * responsability for checking that the counter doesn't overflow + * into the rest of the IV when incremented. + */ +void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out, + size_t len, const void *key, + unsigned char ivec[16], unsigned char ecount_buf[16], + unsigned int *num, block128_f block) +{ + unsigned int n; + size_t l=0; + + assert(*num < 16); + + n = *num; + +#if !defined(OPENSSL_SMALL_FOOTPRINT) + if (16%sizeof(size_t) == 0) do { /* always true actually */ + while (n && len) { + *(out++) = *(in++) ^ ecount_buf[n]; + --len; + n = (n+1) % 16; + } + +#ifdef __STRICT_ALIGNMENT + if (((size_t)in|(size_t)out|(size_t)ivec)%sizeof(size_t) != 0) + break; +#endif + while (len>=16) { + (*block)(ivec, ecount_buf, key); + ctr128_inc_aligned(ivec); + for (; n<16; n+=sizeof(size_t)) + *(size_t *)(out+n) = + *(size_t *)(in+n) ^ *(size_t *)(ecount_buf+n); + len -= 16; + out += 16; + in += 16; + n = 0; + } + if (len) { + (*block)(ivec, ecount_buf, key); + ctr128_inc_aligned(ivec); + while (len--) { + out[n] = in[n] ^ ecount_buf[n]; + ++n; + } + } + *num = n; + return; + } while(0); + /* the rest would be commonly eliminated by x86* compiler */ +#endif + while (l=16) { + size_t blocks = len/16; + /* + * 1<<28 is just a not-so-small yet not-so-large number... + * Below condition is practically never met, but it has to + * be checked for code correctness. + */ + if (sizeof(size_t)>sizeof(unsigned int) && blocks>(1U<<28)) + blocks = (1U<<28); + /* + * As (*func) operates on 32-bit counter, caller + * has to handle overflow. 'if' below detects the + * overflow, which is then handled by limiting the + * amount of blocks to the exact overflow point... + */ + ctr32 += (u32)blocks; + if (ctr32 < blocks) { + blocks -= ctr32; + ctr32 = 0; + } + (*func)(in,out,blocks,key,ivec); + /* (*ctr) does not update ivec, caller does: */ + PUTU32(ivec+12,ctr32); + /* ... overflow was detected, propogate carry. */ + if (ctr32 == 0) ctr96_inc(ivec); + blocks *= 16; + len -= blocks; + out += blocks; + in += blocks; + } + if (len) { + memset(ecount_buf,0,16); + (*func)(ecount_buf,ecount_buf,1,key,ivec); + ++ctr32; + PUTU32(ivec+12,ctr32); + if (ctr32 == 0) ctr96_inc(ivec); + while (len--) { + out[n] = in[n] ^ ecount_buf[n]; + ++n; + } + } + + *num=n; +} diff --git a/ext/libressl/crypto/modes/cts128.c b/ext/libressl/crypto/modes/cts128.c new file mode 100644 index 0000000..b2f7174 --- /dev/null +++ b/ext/libressl/crypto/modes/cts128.c @@ -0,0 +1,267 @@ +/* $OpenBSD: cts128.c,v 1.5 2015/07/19 18:27:26 miod Exp $ */ +/* ==================================================================== + * Copyright (c) 2008 The OpenSSL Project. All rights reserved. + * + * Rights for redistribution and usage in source and binary + * forms are granted according to the OpenSSL license. + */ + +#include +#include "modes_lcl.h" +#include + +#ifndef MODES_DEBUG +# ifndef NDEBUG +# define NDEBUG +# endif +#endif + +/* + * Trouble with Ciphertext Stealing, CTS, mode is that there is no + * common official specification, but couple of cipher/application + * specific ones: RFC2040 and RFC3962. Then there is 'Proposal to + * Extend CBC Mode By "Ciphertext Stealing"' at NIST site, which + * deviates from mentioned RFCs. Most notably it allows input to be + * of block length and it doesn't flip the order of the last two + * blocks. CTS is being discussed even in ECB context, but it's not + * adopted for any known application. This implementation provides + * two interfaces: one compliant with above mentioned RFCs and one + * compliant with the NIST proposal, both extending CBC mode. + */ + +size_t CRYPTO_cts128_encrypt_block(const unsigned char *in, unsigned char *out, + size_t len, const void *key, + unsigned char ivec[16], block128_f block) +{ size_t residue, n; + + if (len <= 16) return 0; + + if ((residue=len%16) == 0) residue = 16; + + len -= residue; + + CRYPTO_cbc128_encrypt(in,out,len,key,ivec,block); + + in += len; + out += len; + + for (n=0; n +#include "modes_lcl.h" +#include + +#ifndef MODES_DEBUG +# ifndef NDEBUG +# define NDEBUG +# endif +#endif + +#if defined(BSWAP4) && defined(__STRICT_ALIGNMENT) +/* redefine, because alignment is ensured */ +#undef GETU32 +#define GETU32(p) BSWAP4(*(const u32 *)(p)) +#undef PUTU32 +#define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v) +#endif + +#define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16)) +#define REDUCE1BIT(V) \ + do { \ + if (sizeof(size_t)==8) { \ + u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \ + V.lo = (V.hi<<63)|(V.lo>>1); \ + V.hi = (V.hi>>1 )^T; \ + } else { \ + u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \ + V.lo = (V.hi<<63)|(V.lo>>1); \ + V.hi = (V.hi>>1 )^((u64)T<<32); \ + } \ + } while(0) + +/* + * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should + * never be set to 8. 8 is effectively reserved for testing purposes. + * TABLE_BITS>1 are lookup-table-driven implementations referred to as + * "Shoup's" in GCM specification. In other words OpenSSL does not cover + * whole spectrum of possible table driven implementations. Why? In + * non-"Shoup's" case memory access pattern is segmented in such manner, + * that it's trivial to see that cache timing information can reveal + * fair portion of intermediate hash value. Given that ciphertext is + * always available to attacker, it's possible for him to attempt to + * deduce secret parameter H and if successful, tamper with messages + * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's + * not as trivial, but there is no reason to believe that it's resistant + * to cache-timing attack. And the thing about "8-bit" implementation is + * that it consumes 16 (sixteen) times more memory, 4KB per individual + * key + 1KB shared. Well, on pros side it should be twice as fast as + * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version + * was observed to run ~75% faster, closer to 100% for commercial + * compilers... Yet "4-bit" procedure is preferred, because it's + * believed to provide better security-performance balance and adequate + * all-round performance. "All-round" refers to things like: + * + * - shorter setup time effectively improves overall timing for + * handling short messages; + * - larger table allocation can become unbearable because of VM + * subsystem penalties (for example on Windows large enough free + * results in VM working set trimming, meaning that consequent + * malloc would immediately incur working set expansion); + * - larger table has larger cache footprint, which can affect + * performance of other code paths (not necessarily even from same + * thread in Hyper-Threading world); + * + * Value of 1 is not appropriate for performance reasons. + */ +#if TABLE_BITS==8 + +static void gcm_init_8bit(u128 Htable[256], u64 H[2]) +{ + int i, j; + u128 V; + + Htable[0].hi = 0; + Htable[0].lo = 0; + V.hi = H[0]; + V.lo = H[1]; + + for (Htable[128]=V, i=64; i>0; i>>=1) { + REDUCE1BIT(V); + Htable[i] = V; + } + + for (i=2; i<256; i<<=1) { + u128 *Hi = Htable+i, H0 = *Hi; + for (j=1; j>8); + Z.hi = (Z.hi>>8); +#if SIZE_MAX == 0xffffffffffffffff + Z.hi ^= rem_8bit[rem]; +#else + Z.hi ^= (u64)rem_8bit[rem]<<32; +#endif + } + +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP8 + Xi[0] = BSWAP8(Z.hi); + Xi[1] = BSWAP8(Z.lo); +#else + u8 *p = (u8 *)Xi; + u32 v; + v = (u32)(Z.hi>>32); PUTU32(p,v); + v = (u32)(Z.hi); PUTU32(p+4,v); + v = (u32)(Z.lo>>32); PUTU32(p+8,v); + v = (u32)(Z.lo); PUTU32(p+12,v); +#endif +#else /* BIG_ENDIAN */ + Xi[0] = Z.hi; + Xi[1] = Z.lo; +#endif +} +#define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable) + +#elif TABLE_BITS==4 + +static void gcm_init_4bit(u128 Htable[16], u64 H[2]) +{ + u128 V; +#if defined(OPENSSL_SMALL_FOOTPRINT) + int i; +#endif + + Htable[0].hi = 0; + Htable[0].lo = 0; + V.hi = H[0]; + V.lo = H[1]; + +#if defined(OPENSSL_SMALL_FOOTPRINT) + for (Htable[8]=V, i=4; i>0; i>>=1) { + REDUCE1BIT(V); + Htable[i] = V; + } + + for (i=2; i<16; i<<=1) { + u128 *Hi = Htable+i; + int j; + for (V=*Hi, j=1; j>32; + Htable[j].lo = V.hi<<32|V.hi>>32; + } +#endif + } +#endif +} + +#ifndef GHASH_ASM +static const size_t rem_4bit[16] = { + PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460), + PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0), + PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560), + PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) }; + +static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]) +{ + u128 Z; + int cnt = 15; + size_t rem, nlo, nhi; + + nlo = ((const u8 *)Xi)[15]; + nhi = nlo>>4; + nlo &= 0xf; + + Z.hi = Htable[nlo].hi; + Z.lo = Htable[nlo].lo; + + while (1) { + rem = (size_t)Z.lo&0xf; + Z.lo = (Z.hi<<60)|(Z.lo>>4); + Z.hi = (Z.hi>>4); +#if SIZE_MAX == 0xffffffffffffffff + Z.hi ^= rem_4bit[rem]; +#else + Z.hi ^= (u64)rem_4bit[rem]<<32; +#endif + Z.hi ^= Htable[nhi].hi; + Z.lo ^= Htable[nhi].lo; + + if (--cnt<0) break; + + nlo = ((const u8 *)Xi)[cnt]; + nhi = nlo>>4; + nlo &= 0xf; + + rem = (size_t)Z.lo&0xf; + Z.lo = (Z.hi<<60)|(Z.lo>>4); + Z.hi = (Z.hi>>4); +#if SIZE_MAX == 0xffffffffffffffff + Z.hi ^= rem_4bit[rem]; +#else + Z.hi ^= (u64)rem_4bit[rem]<<32; +#endif + Z.hi ^= Htable[nlo].hi; + Z.lo ^= Htable[nlo].lo; + } + +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP8 + Xi[0] = BSWAP8(Z.hi); + Xi[1] = BSWAP8(Z.lo); +#else + u8 *p = (u8 *)Xi; + u32 v; + v = (u32)(Z.hi>>32); PUTU32(p,v); + v = (u32)(Z.hi); PUTU32(p+4,v); + v = (u32)(Z.lo>>32); PUTU32(p+8,v); + v = (u32)(Z.lo); PUTU32(p+12,v); +#endif +#else /* BIG_ENDIAN */ + Xi[0] = Z.hi; + Xi[1] = Z.lo; +#endif +} + +#if !defined(OPENSSL_SMALL_FOOTPRINT) +/* + * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for + * details... Compiler-generated code doesn't seem to give any + * performance improvement, at least not on x86[_64]. It's here + * mostly as reference and a placeholder for possible future + * non-trivial optimization[s]... + */ +static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16], + const u8 *inp,size_t len) +{ + u128 Z; + int cnt; + size_t rem, nlo, nhi; + +#if 1 + do { + cnt = 15; + nlo = ((const u8 *)Xi)[15]; + nlo ^= inp[15]; + nhi = nlo>>4; + nlo &= 0xf; + + Z.hi = Htable[nlo].hi; + Z.lo = Htable[nlo].lo; + + while (1) { + rem = (size_t)Z.lo&0xf; + Z.lo = (Z.hi<<60)|(Z.lo>>4); + Z.hi = (Z.hi>>4); +#if SIZE_MAX == 0xffffffffffffffff + Z.hi ^= rem_4bit[rem]; +#else + Z.hi ^= (u64)rem_4bit[rem]<<32; +#endif + Z.hi ^= Htable[nhi].hi; + Z.lo ^= Htable[nhi].lo; + + if (--cnt<0) break; + + nlo = ((const u8 *)Xi)[cnt]; + nlo ^= inp[cnt]; + nhi = nlo>>4; + nlo &= 0xf; + + rem = (size_t)Z.lo&0xf; + Z.lo = (Z.hi<<60)|(Z.lo>>4); + Z.hi = (Z.hi>>4); +#if SIZE_MAX == 0xffffffffffffffff + Z.hi ^= rem_4bit[rem]; +#else + Z.hi ^= (u64)rem_4bit[rem]<<32; +#endif + Z.hi ^= Htable[nlo].hi; + Z.lo ^= Htable[nlo].lo; + } +#else + /* + * Extra 256+16 bytes per-key plus 512 bytes shared tables + * [should] give ~50% improvement... One could have PACK()-ed + * the rem_8bit even here, but the priority is to minimize + * cache footprint... + */ + u128 Hshr4[16]; /* Htable shifted right by 4 bits */ + u8 Hshl4[16]; /* Htable shifted left by 4 bits */ + static const unsigned short rem_8bit[256] = { + 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E, + 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E, + 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E, + 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E, + 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E, + 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E, + 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E, + 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E, + 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE, + 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE, + 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE, + 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE, + 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E, + 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E, + 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE, + 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE, + 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E, + 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E, + 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E, + 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E, + 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E, + 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E, + 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E, + 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E, + 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE, + 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE, + 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE, + 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE, + 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E, + 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E, + 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE, + 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE }; + /* + * This pre-processing phase slows down procedure by approximately + * same time as it makes each loop spin faster. In other words + * single block performance is approximately same as straightforward + * "4-bit" implementation, and then it goes only faster... + */ + for (cnt=0; cnt<16; ++cnt) { + Z.hi = Htable[cnt].hi; + Z.lo = Htable[cnt].lo; + Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4); + Hshr4[cnt].hi = (Z.hi>>4); + Hshl4[cnt] = (u8)(Z.lo<<4); + } + + do { + for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) { + nlo = ((const u8 *)Xi)[cnt]; + nlo ^= inp[cnt]; + nhi = nlo>>4; + nlo &= 0xf; + + Z.hi ^= Htable[nlo].hi; + Z.lo ^= Htable[nlo].lo; + + rem = (size_t)Z.lo&0xff; + + Z.lo = (Z.hi<<56)|(Z.lo>>8); + Z.hi = (Z.hi>>8); + + Z.hi ^= Hshr4[nhi].hi; + Z.lo ^= Hshr4[nhi].lo; + Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48; + } + + nlo = ((const u8 *)Xi)[0]; + nlo ^= inp[0]; + nhi = nlo>>4; + nlo &= 0xf; + + Z.hi ^= Htable[nlo].hi; + Z.lo ^= Htable[nlo].lo; + + rem = (size_t)Z.lo&0xf; + + Z.lo = (Z.hi<<60)|(Z.lo>>4); + Z.hi = (Z.hi>>4); + + Z.hi ^= Htable[nhi].hi; + Z.lo ^= Htable[nhi].lo; + Z.hi ^= ((u64)rem_8bit[rem<<4])<<48; +#endif + +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP8 + Xi[0] = BSWAP8(Z.hi); + Xi[1] = BSWAP8(Z.lo); +#else + u8 *p = (u8 *)Xi; + u32 v; + v = (u32)(Z.hi>>32); PUTU32(p,v); + v = (u32)(Z.hi); PUTU32(p+4,v); + v = (u32)(Z.lo>>32); PUTU32(p+8,v); + v = (u32)(Z.lo); PUTU32(p+12,v); +#endif +#else /* BIG_ENDIAN */ + Xi[0] = Z.hi; + Xi[1] = Z.lo; +#endif + } while (inp+=16, len-=16); +} +#endif +#else +void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]); +void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); +#endif + +#define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable) +#if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT) +#define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len) +/* GHASH_CHUNK is "stride parameter" missioned to mitigate cache + * trashing effect. In other words idea is to hash data while it's + * still in L1 cache after encryption pass... */ +#define GHASH_CHUNK (3*1024) +#endif + +#else /* TABLE_BITS */ + +static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2]) +{ + u128 V,Z = { 0,0 }; + long X; + int i,j; + const long *xi = (const long *)Xi; + + V.hi = H[0]; /* H is in host byte order, no byte swapping */ + V.lo = H[1]; + + for (j=0; j<16/sizeof(long); ++j) { +#if BYTE_ORDER == LITTLE_ENDIAN +#if SIZE_MAX == 0xffffffffffffffff +#ifdef BSWAP8 + X = (long)(BSWAP8(xi[j])); +#else + const u8 *p = (const u8 *)(xi+j); + X = (long)((u64)GETU32(p)<<32|GETU32(p+4)); +#endif +#else + const u8 *p = (const u8 *)(xi+j); + X = (long)GETU32(p); +#endif +#else /* BIG_ENDIAN */ + X = xi[j]; +#endif + + for (i=0; i<8*sizeof(long); ++i, X<<=1) { + u64 M = (u64)(X>>(8*sizeof(long)-1)); + Z.hi ^= V.hi&M; + Z.lo ^= V.lo&M; + + REDUCE1BIT(V); + } + } + +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP8 + Xi[0] = BSWAP8(Z.hi); + Xi[1] = BSWAP8(Z.lo); +#else + u8 *p = (u8 *)Xi; + u32 v; + v = (u32)(Z.hi>>32); PUTU32(p,v); + v = (u32)(Z.hi); PUTU32(p+4,v); + v = (u32)(Z.lo>>32); PUTU32(p+8,v); + v = (u32)(Z.lo); PUTU32(p+12,v); +#endif +#else /* BIG_ENDIAN */ + Xi[0] = Z.hi; + Xi[1] = Z.lo; +#endif +} +#define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u) + +#endif + +#if defined(GHASH_ASM) && \ + (defined(__i386) || defined(__i386__) || \ + defined(__x86_64) || defined(__x86_64__) || \ + defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)) +#include "x86_arch.h" +#endif + +#if TABLE_BITS==4 && defined(GHASH_ASM) +# if (defined(__i386) || defined(__i386__) || \ + defined(__x86_64) || defined(__x86_64__) || \ + defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)) +# define GHASH_ASM_X86_OR_64 +# define GCM_FUNCREF_4BIT + +void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]); +void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]); +void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); + +# if defined(__i386) || defined(__i386__) || defined(_M_IX86) +# define GHASH_ASM_X86 +void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]); +void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); + +void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]); +void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); +# endif +# elif defined(__arm__) || defined(__arm) +# include "arm_arch.h" +# if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) +# define GHASH_ASM_ARM +# define GCM_FUNCREF_4BIT +void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]); +void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); +# endif +# endif +#endif + +#ifdef GCM_FUNCREF_4BIT +# undef GCM_MUL +# define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable) +# ifdef GHASH +# undef GHASH +# define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len) +# endif +#endif + +void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block) +{ + memset(ctx,0,sizeof(*ctx)); + ctx->block = block; + ctx->key = key; + + (*block)(ctx->H.c,ctx->H.c,key); + +#if BYTE_ORDER == LITTLE_ENDIAN + /* H is stored in host byte order */ +#ifdef BSWAP8 + ctx->H.u[0] = BSWAP8(ctx->H.u[0]); + ctx->H.u[1] = BSWAP8(ctx->H.u[1]); +#else + u8 *p = ctx->H.c; + u64 hi,lo; + hi = (u64)GETU32(p) <<32|GETU32(p+4); + lo = (u64)GETU32(p+8)<<32|GETU32(p+12); + ctx->H.u[0] = hi; + ctx->H.u[1] = lo; +#endif +#endif + +#if TABLE_BITS==8 + gcm_init_8bit(ctx->Htable,ctx->H.u); +#elif TABLE_BITS==4 +# if defined(GHASH_ASM_X86_OR_64) +# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2) + /* check FXSR and PCLMULQDQ bits */ + if ((OPENSSL_cpu_caps() & (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) == + (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) { + gcm_init_clmul(ctx->Htable,ctx->H.u); + ctx->gmult = gcm_gmult_clmul; + ctx->ghash = gcm_ghash_clmul; + return; + } +# endif + gcm_init_4bit(ctx->Htable,ctx->H.u); +# if defined(GHASH_ASM_X86) /* x86 only */ +# if defined(OPENSSL_IA32_SSE2) + if (OPENSSL_cpu_caps() & CPUCAP_MASK_SSE) { /* check SSE bit */ +# else + if (OPENSSL_cpu_caps() & CPUCAP_MASK_MMX) { /* check MMX bit */ +# endif + ctx->gmult = gcm_gmult_4bit_mmx; + ctx->ghash = gcm_ghash_4bit_mmx; + } else { + ctx->gmult = gcm_gmult_4bit_x86; + ctx->ghash = gcm_ghash_4bit_x86; + } +# else + ctx->gmult = gcm_gmult_4bit; + ctx->ghash = gcm_ghash_4bit; +# endif +# elif defined(GHASH_ASM_ARM) + if (OPENSSL_armcap_P & ARMV7_NEON) { + ctx->gmult = gcm_gmult_neon; + ctx->ghash = gcm_ghash_neon; + } else { + gcm_init_4bit(ctx->Htable,ctx->H.u); + ctx->gmult = gcm_gmult_4bit; + ctx->ghash = gcm_ghash_4bit; + } +# else + gcm_init_4bit(ctx->Htable,ctx->H.u); +# endif +#endif +} + +void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len) +{ + unsigned int ctr; +#ifdef GCM_FUNCREF_4BIT + void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; +#endif + + ctx->Yi.u[0] = 0; + ctx->Yi.u[1] = 0; + ctx->Xi.u[0] = 0; + ctx->Xi.u[1] = 0; + ctx->len.u[0] = 0; /* AAD length */ + ctx->len.u[1] = 0; /* message length */ + ctx->ares = 0; + ctx->mres = 0; + + if (len==12) { + memcpy(ctx->Yi.c,iv,12); + ctx->Yi.c[15]=1; + ctr=1; + } + else { + size_t i; + u64 len0 = len; + + while (len>=16) { + for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i]; + GCM_MUL(ctx,Yi); + iv += 16; + len -= 16; + } + if (len) { + for (i=0; iYi.c[i] ^= iv[i]; + GCM_MUL(ctx,Yi); + } + len0 <<= 3; +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP8 + ctx->Yi.u[1] ^= BSWAP8(len0); +#else + ctx->Yi.c[8] ^= (u8)(len0>>56); + ctx->Yi.c[9] ^= (u8)(len0>>48); + ctx->Yi.c[10] ^= (u8)(len0>>40); + ctx->Yi.c[11] ^= (u8)(len0>>32); + ctx->Yi.c[12] ^= (u8)(len0>>24); + ctx->Yi.c[13] ^= (u8)(len0>>16); + ctx->Yi.c[14] ^= (u8)(len0>>8); + ctx->Yi.c[15] ^= (u8)(len0); +#endif +#else /* BIG_ENDIAN */ + ctx->Yi.u[1] ^= len0; +#endif + + GCM_MUL(ctx,Yi); + +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP4 + ctr = BSWAP4(ctx->Yi.d[3]); +#else + ctr = GETU32(ctx->Yi.c+12); +#endif +#else /* BIG_ENDIAN */ + ctr = ctx->Yi.d[3]; +#endif + } + + (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key); + ++ctr; +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif +#else /* BIG_ENDIAN */ + ctx->Yi.d[3] = ctr; +#endif +} + +int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len) +{ + size_t i; + unsigned int n; + u64 alen = ctx->len.u[0]; +#ifdef GCM_FUNCREF_4BIT + void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; +# ifdef GHASH + void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16], + const u8 *inp,size_t len) = ctx->ghash; +# endif +#endif + + if (ctx->len.u[1]) return -2; + + alen += len; + if (alen>(U64(1)<<61) || (sizeof(len)==8 && alenlen.u[0] = alen; + + n = ctx->ares; + if (n) { + while (n && len) { + ctx->Xi.c[n] ^= *(aad++); + --len; + n = (n+1)%16; + } + if (n==0) GCM_MUL(ctx,Xi); + else { + ctx->ares = n; + return 0; + } + } + +#ifdef GHASH + if ((i = (len&(size_t)-16))) { + GHASH(ctx,aad,i); + aad += i; + len -= i; + } +#else + while (len>=16) { + for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i]; + GCM_MUL(ctx,Xi); + aad += 16; + len -= 16; + } +#endif + if (len) { + n = (unsigned int)len; + for (i=0; iXi.c[i] ^= aad[i]; + } + + ctx->ares = n; + return 0; +} + +int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, + const unsigned char *in, unsigned char *out, + size_t len) +{ + unsigned int n, ctr; + size_t i; + u64 mlen = ctx->len.u[1]; + block128_f block = ctx->block; + void *key = ctx->key; +#ifdef GCM_FUNCREF_4BIT + void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; +# ifdef GHASH + void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16], + const u8 *inp,size_t len) = ctx->ghash; +# endif +#endif + + mlen += len; + if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlenlen.u[1] = mlen; + + if (ctx->ares) { + /* First call to encrypt finalizes GHASH(AAD) */ + GCM_MUL(ctx,Xi); + ctx->ares = 0; + } + +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP4 + ctr = BSWAP4(ctx->Yi.d[3]); +#else + ctr = GETU32(ctx->Yi.c+12); +#endif +#else /* BIG_ENDIAN */ + ctr = ctx->Yi.d[3]; +#endif + + n = ctx->mres; +#if !defined(OPENSSL_SMALL_FOOTPRINT) + if (16%sizeof(size_t) == 0) do { /* always true actually */ + if (n) { + while (n && len) { + ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n]; + --len; + n = (n+1)%16; + } + if (n==0) GCM_MUL(ctx,Xi); + else { + ctx->mres = n; + return 0; + } + } +#ifdef __STRICT_ALIGNMENT + if (((size_t)in|(size_t)out)%sizeof(size_t) != 0) + break; +#endif +#if defined(GHASH) && defined(GHASH_CHUNK) + while (len>=GHASH_CHUNK) { + size_t j=GHASH_CHUNK; + + while (j) { + size_t *out_t=(size_t *)out; + const size_t *in_t=(const size_t *)in; + + (*block)(ctx->Yi.c,ctx->EKi.c,key); + ++ctr; +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif +#else /* BIG_ENDIAN */ + ctx->Yi.d[3] = ctr; +#endif + for (i=0; i<16/sizeof(size_t); ++i) + out_t[i] = in_t[i] ^ ctx->EKi.t[i]; + out += 16; + in += 16; + j -= 16; + } + GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK); + len -= GHASH_CHUNK; + } + if ((i = (len&(size_t)-16))) { + size_t j=i; + + while (len>=16) { + size_t *out_t=(size_t *)out; + const size_t *in_t=(const size_t *)in; + + (*block)(ctx->Yi.c,ctx->EKi.c,key); + ++ctr; +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif +#else /* BIG_ENDIAN */ + ctx->Yi.d[3] = ctr; +#endif + for (i=0; i<16/sizeof(size_t); ++i) + out_t[i] = in_t[i] ^ ctx->EKi.t[i]; + out += 16; + in += 16; + len -= 16; + } + GHASH(ctx,out-j,j); + } +#else + while (len>=16) { + size_t *out_t=(size_t *)out; + const size_t *in_t=(const size_t *)in; + + (*block)(ctx->Yi.c,ctx->EKi.c,key); + ++ctr; +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif +#else /* BIG_ENDIAN */ + ctx->Yi.d[3] = ctr; +#endif + for (i=0; i<16/sizeof(size_t); ++i) + ctx->Xi.t[i] ^= + out_t[i] = in_t[i]^ctx->EKi.t[i]; + GCM_MUL(ctx,Xi); + out += 16; + in += 16; + len -= 16; + } +#endif + if (len) { + (*block)(ctx->Yi.c,ctx->EKi.c,key); + ++ctr; +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif +#else /* BIG_ENDIAN */ + ctx->Yi.d[3] = ctr; +#endif + while (len--) { + ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n]; + ++n; + } + } + + ctx->mres = n; + return 0; + } while(0); +#endif + for (i=0;iYi.c,ctx->EKi.c,key); + ++ctr; +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif +#else /* BIG_ENDIAN */ + ctx->Yi.d[3] = ctr; +#endif + } + ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n]; + n = (n+1)%16; + if (n==0) + GCM_MUL(ctx,Xi); + } + + ctx->mres = n; + return 0; +} + +int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, + const unsigned char *in, unsigned char *out, + size_t len) +{ + unsigned int n, ctr; + size_t i; + u64 mlen = ctx->len.u[1]; + block128_f block = ctx->block; + void *key = ctx->key; +#ifdef GCM_FUNCREF_4BIT + void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; +# ifdef GHASH + void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16], + const u8 *inp,size_t len) = ctx->ghash; +# endif +#endif + + mlen += len; + if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlenlen.u[1] = mlen; + + if (ctx->ares) { + /* First call to decrypt finalizes GHASH(AAD) */ + GCM_MUL(ctx,Xi); + ctx->ares = 0; + } + +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP4 + ctr = BSWAP4(ctx->Yi.d[3]); +#else + ctr = GETU32(ctx->Yi.c+12); +#endif +#else /* BIG_ENDIAN */ + ctr = ctx->Yi.d[3]; +#endif + + n = ctx->mres; +#if !defined(OPENSSL_SMALL_FOOTPRINT) + if (16%sizeof(size_t) == 0) do { /* always true actually */ + if (n) { + while (n && len) { + u8 c = *(in++); + *(out++) = c^ctx->EKi.c[n]; + ctx->Xi.c[n] ^= c; + --len; + n = (n+1)%16; + } + if (n==0) GCM_MUL (ctx,Xi); + else { + ctx->mres = n; + return 0; + } + } +#ifdef __STRICT_ALIGNMENT + if (((size_t)in|(size_t)out)%sizeof(size_t) != 0) + break; +#endif +#if defined(GHASH) && defined(GHASH_CHUNK) + while (len>=GHASH_CHUNK) { + size_t j=GHASH_CHUNK; + + GHASH(ctx,in,GHASH_CHUNK); + while (j) { + size_t *out_t=(size_t *)out; + const size_t *in_t=(const size_t *)in; + + (*block)(ctx->Yi.c,ctx->EKi.c,key); + ++ctr; +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif +#else /* BIG_ENDIAN */ + ctx->Yi.d[3] = ctr; +#endif + for (i=0; i<16/sizeof(size_t); ++i) + out_t[i] = in_t[i]^ctx->EKi.t[i]; + out += 16; + in += 16; + j -= 16; + } + len -= GHASH_CHUNK; + } + if ((i = (len&(size_t)-16))) { + GHASH(ctx,in,i); + while (len>=16) { + size_t *out_t=(size_t *)out; + const size_t *in_t=(const size_t *)in; + + (*block)(ctx->Yi.c,ctx->EKi.c,key); + ++ctr; +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif +#else /* BIG_ENDIAN */ + ctx->Yi.d[3] = ctr; +#endif + for (i=0; i<16/sizeof(size_t); ++i) + out_t[i] = in_t[i]^ctx->EKi.t[i]; + out += 16; + in += 16; + len -= 16; + } + } +#else + while (len>=16) { + size_t *out_t=(size_t *)out; + const size_t *in_t=(const size_t *)in; + + (*block)(ctx->Yi.c,ctx->EKi.c,key); + ++ctr; +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif +#else /* BIG_ENDIAN */ + ctx->Yi.d[3] = ctr; +#endif + for (i=0; i<16/sizeof(size_t); ++i) { + size_t c = in[i]; + out[i] = c^ctx->EKi.t[i]; + ctx->Xi.t[i] ^= c; + } + GCM_MUL(ctx,Xi); + out += 16; + in += 16; + len -= 16; + } +#endif + if (len) { + (*block)(ctx->Yi.c,ctx->EKi.c,key); + ++ctr; +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif +#else /* BIG_ENDIAN */ + ctx->Yi.d[3] = ctr; +#endif + while (len--) { + u8 c = in[n]; + ctx->Xi.c[n] ^= c; + out[n] = c^ctx->EKi.c[n]; + ++n; + } + } + + ctx->mres = n; + return 0; + } while(0); +#endif + for (i=0;iYi.c,ctx->EKi.c,key); + ++ctr; +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif +#else /* BIG_ENDIAN */ + ctx->Yi.d[3] = ctr; +#endif + } + c = in[i]; + out[i] = c^ctx->EKi.c[n]; + ctx->Xi.c[n] ^= c; + n = (n+1)%16; + if (n==0) + GCM_MUL(ctx,Xi); + } + + ctx->mres = n; + return 0; +} + +int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, + const unsigned char *in, unsigned char *out, + size_t len, ctr128_f stream) +{ + unsigned int n, ctr; + size_t i; + u64 mlen = ctx->len.u[1]; + void *key = ctx->key; +#ifdef GCM_FUNCREF_4BIT + void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; +# ifdef GHASH + void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16], + const u8 *inp,size_t len) = ctx->ghash; +# endif +#endif + + mlen += len; + if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlenlen.u[1] = mlen; + + if (ctx->ares) { + /* First call to encrypt finalizes GHASH(AAD) */ + GCM_MUL(ctx,Xi); + ctx->ares = 0; + } + +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP4 + ctr = BSWAP4(ctx->Yi.d[3]); +#else + ctr = GETU32(ctx->Yi.c+12); +#endif +#else /* BIG_ENDIAN */ + ctr = ctx->Yi.d[3]; +#endif + + n = ctx->mres; + if (n) { + while (n && len) { + ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n]; + --len; + n = (n+1)%16; + } + if (n==0) GCM_MUL(ctx,Xi); + else { + ctx->mres = n; + return 0; + } + } +#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) + while (len>=GHASH_CHUNK) { + (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c); + ctr += GHASH_CHUNK/16; +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif +#else /* BIG_ENDIAN */ + ctx->Yi.d[3] = ctr; +#endif + GHASH(ctx,out,GHASH_CHUNK); + out += GHASH_CHUNK; + in += GHASH_CHUNK; + len -= GHASH_CHUNK; + } +#endif + if ((i = (len&(size_t)-16))) { + size_t j=i/16; + + (*stream)(in,out,j,key,ctx->Yi.c); + ctr += (unsigned int)j; +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif +#else /* BIG_ENDIAN */ + ctx->Yi.d[3] = ctr; +#endif + in += i; + len -= i; +#if defined(GHASH) + GHASH(ctx,out,i); + out += i; +#else + while (j--) { + for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i]; + GCM_MUL(ctx,Xi); + out += 16; + } +#endif + } + if (len) { + (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key); + ++ctr; +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif +#else /* BIG_ENDIAN */ + ctx->Yi.d[3] = ctr; +#endif + while (len--) { + ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n]; + ++n; + } + } + + ctx->mres = n; + return 0; +} + +int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, + const unsigned char *in, unsigned char *out, + size_t len,ctr128_f stream) +{ + unsigned int n, ctr; + size_t i; + u64 mlen = ctx->len.u[1]; + void *key = ctx->key; +#ifdef GCM_FUNCREF_4BIT + void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; +# ifdef GHASH + void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16], + const u8 *inp,size_t len) = ctx->ghash; +# endif +#endif + + mlen += len; + if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlenlen.u[1] = mlen; + + if (ctx->ares) { + /* First call to decrypt finalizes GHASH(AAD) */ + GCM_MUL(ctx,Xi); + ctx->ares = 0; + } + +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP4 + ctr = BSWAP4(ctx->Yi.d[3]); +#else + ctr = GETU32(ctx->Yi.c+12); +#endif +#else /* BIG_ENDIAN */ + ctr = ctx->Yi.d[3]; +#endif + + n = ctx->mres; + if (n) { + while (n && len) { + u8 c = *(in++); + *(out++) = c^ctx->EKi.c[n]; + ctx->Xi.c[n] ^= c; + --len; + n = (n+1)%16; + } + if (n==0) GCM_MUL (ctx,Xi); + else { + ctx->mres = n; + return 0; + } + } +#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) + while (len>=GHASH_CHUNK) { + GHASH(ctx,in,GHASH_CHUNK); + (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c); + ctr += GHASH_CHUNK/16; +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif +#else /* BIG_ENDIAN */ + ctx->Yi.d[3] = ctr; +#endif + out += GHASH_CHUNK; + in += GHASH_CHUNK; + len -= GHASH_CHUNK; + } +#endif + if ((i = (len&(size_t)-16))) { + size_t j=i/16; + +#if defined(GHASH) + GHASH(ctx,in,i); +#else + while (j--) { + size_t k; + for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k]; + GCM_MUL(ctx,Xi); + in += 16; + } + j = i/16; + in -= i; +#endif + (*stream)(in,out,j,key,ctx->Yi.c); + ctr += (unsigned int)j; +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif +#else /* BIG_ENDIAN */ + ctx->Yi.d[3] = ctr; +#endif + out += i; + in += i; + len -= i; + } + if (len) { + (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key); + ++ctr; +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP4 + ctx->Yi.d[3] = BSWAP4(ctr); +#else + PUTU32(ctx->Yi.c+12,ctr); +#endif +#else /* BIG_ENDIAN */ + ctx->Yi.d[3] = ctr; +#endif + while (len--) { + u8 c = in[n]; + ctx->Xi.c[n] ^= c; + out[n] = c^ctx->EKi.c[n]; + ++n; + } + } + + ctx->mres = n; + return 0; +} + +int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag, + size_t len) +{ + u64 alen = ctx->len.u[0]<<3; + u64 clen = ctx->len.u[1]<<3; +#ifdef GCM_FUNCREF_4BIT + void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; +#endif + + if (ctx->mres || ctx->ares) + GCM_MUL(ctx,Xi); + +#if BYTE_ORDER == LITTLE_ENDIAN +#ifdef BSWAP8 + alen = BSWAP8(alen); + clen = BSWAP8(clen); +#else + { + u8 *p = ctx->len.c; + + ctx->len.u[0] = alen; + ctx->len.u[1] = clen; + + alen = (u64)GETU32(p) <<32|GETU32(p+4); + clen = (u64)GETU32(p+8)<<32|GETU32(p+12); + } +#endif +#endif + + ctx->Xi.u[0] ^= alen; + ctx->Xi.u[1] ^= clen; + GCM_MUL(ctx,Xi); + + ctx->Xi.u[0] ^= ctx->EK0.u[0]; + ctx->Xi.u[1] ^= ctx->EK0.u[1]; + + if (tag && len<=sizeof(ctx->Xi)) + return memcmp(ctx->Xi.c,tag,len); + else + return -1; +} + +void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) +{ + CRYPTO_gcm128_finish(ctx, NULL, 0); + memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c)); +} + +#if 0 + +GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block) +{ + GCM128_CONTEXT *ret; + + if ((ret = malloc(sizeof(GCM128_CONTEXT)))) + CRYPTO_gcm128_init(ret,key,block); + + return ret; +} + +void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx) +{ + freezero(ctx, sizeof(*ctx)); +} + +#endif diff --git a/ext/libressl/crypto/modes/ghash-elf-armv4.S b/ext/libressl/crypto/modes/ghash-elf-armv4.S new file mode 100644 index 0000000..af42593 --- /dev/null +++ b/ext/libressl/crypto/modes/ghash-elf-armv4.S @@ -0,0 +1,412 @@ +#include "arm_arch.h" + +.text +.syntax unified +.code 32 + +.type rem_4bit,%object +.align 5 +rem_4bit: +.short 0x0000,0x1C20,0x3840,0x2460 +.short 0x7080,0x6CA0,0x48C0,0x54E0 +.short 0xE100,0xFD20,0xD940,0xC560 +.short 0x9180,0x8DA0,0xA9C0,0xB5E0 +.size rem_4bit,.-rem_4bit + +.type rem_4bit_get,%function +rem_4bit_get: + sub r2,pc,#8 + sub r2,r2,#32 @ &rem_4bit + b .Lrem_4bit_got + nop +.size rem_4bit_get,.-rem_4bit_get + +.global gcm_ghash_4bit +.type gcm_ghash_4bit,%function +gcm_ghash_4bit: + sub r12,pc,#8 + add r3,r2,r3 @ r3 to point at the end + stmdb sp!,{r3-r11,lr} @ save r3/end too + sub r12,r12,#48 @ &rem_4bit + + ldmia r12,{r4-r11} @ copy rem_4bit ... + stmdb sp!,{r4-r11} @ ... to stack + + ldrb r12,[r2,#15] + ldrb r14,[r0,#15] +.Louter: + eor r12,r12,r14 + and r14,r12,#0xf0 + and r12,r12,#0x0f + mov r3,#14 + + add r7,r1,r12,lsl#4 + ldmia r7,{r4-r7} @ load Htbl[nlo] + add r11,r1,r14 + ldrb r12,[r2,#14] + + and r14,r4,#0xf @ rem + ldmia r11,{r8-r11} @ load Htbl[nhi] + add r14,r14,r14 + eor r4,r8,r4,lsr#4 + ldrh r8,[sp,r14] @ rem_4bit[rem] + eor r4,r4,r5,lsl#28 + ldrb r14,[r0,#14] + eor r5,r9,r5,lsr#4 + eor r5,r5,r6,lsl#28 + eor r6,r10,r6,lsr#4 + eor r6,r6,r7,lsl#28 + eor r7,r11,r7,lsr#4 + eor r12,r12,r14 + and r14,r12,#0xf0 + and r12,r12,#0x0f + eor r7,r7,r8,lsl#16 + +.Linner: + add r11,r1,r12,lsl#4 + and r12,r4,#0xf @ rem + subs r3,r3,#1 + add r12,r12,r12 + ldmia r11,{r8-r11} @ load Htbl[nlo] + eor r4,r8,r4,lsr#4 + eor r4,r4,r5,lsl#28 + eor r5,r9,r5,lsr#4 + eor r5,r5,r6,lsl#28 + ldrh r8,[sp,r12] @ rem_4bit[rem] + eor r6,r10,r6,lsr#4 + ldrbpl r12,[r2,r3] + eor r6,r6,r7,lsl#28 + eor r7,r11,r7,lsr#4 + + add r11,r1,r14 + and r14,r4,#0xf @ rem + eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem] + add r14,r14,r14 + ldmia r11,{r8-r11} @ load Htbl[nhi] + eor r4,r8,r4,lsr#4 + ldrbpl r8,[r0,r3] + eor r4,r4,r5,lsl#28 + eor r5,r9,r5,lsr#4 + ldrh r9,[sp,r14] + eor r5,r5,r6,lsl#28 + eor r6,r10,r6,lsr#4 + eor r6,r6,r7,lsl#28 + eorpl r12,r12,r8 + eor r7,r11,r7,lsr#4 + andpl r14,r12,#0xf0 + andpl r12,r12,#0x0f + eor r7,r7,r9,lsl#16 @ ^= rem_4bit[rem] + bpl .Linner + + ldr r3,[sp,#32] @ re-load r3/end + add r2,r2,#16 + mov r14,r4 +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r4,r4 + str r4,[r0,#12] +#elif defined(__ARMEB__) + str r4,[r0,#12] +#else + mov r9,r4,lsr#8 + strb r4,[r0,#12+3] + mov r10,r4,lsr#16 + strb r9,[r0,#12+2] + mov r11,r4,lsr#24 + strb r10,[r0,#12+1] + strb r11,[r0,#12] +#endif + cmp r2,r3 +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r5,r5 + str r5,[r0,#8] +#elif defined(__ARMEB__) + str r5,[r0,#8] +#else + mov r9,r5,lsr#8 + strb r5,[r0,#8+3] + mov r10,r5,lsr#16 + strb r9,[r0,#8+2] + mov r11,r5,lsr#24 + strb r10,[r0,#8+1] + strb r11,[r0,#8] +#endif + ldrbne r12,[r2,#15] +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r6,r6 + str r6,[r0,#4] +#elif defined(__ARMEB__) + str r6,[r0,#4] +#else + mov r9,r6,lsr#8 + strb r6,[r0,#4+3] + mov r10,r6,lsr#16 + strb r9,[r0,#4+2] + mov r11,r6,lsr#24 + strb r10,[r0,#4+1] + strb r11,[r0,#4] +#endif + +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r7,r7 + str r7,[r0,#0] +#elif defined(__ARMEB__) + str r7,[r0,#0] +#else + mov r9,r7,lsr#8 + strb r7,[r0,#0+3] + mov r10,r7,lsr#16 + strb r9,[r0,#0+2] + mov r11,r7,lsr#24 + strb r10,[r0,#0+1] + strb r11,[r0,#0] +#endif + + bne .Louter + + add sp,sp,#36 +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r11,pc} +#else + ldmia sp!,{r4-r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size gcm_ghash_4bit,.-gcm_ghash_4bit + +.global gcm_gmult_4bit +.type gcm_gmult_4bit,%function +gcm_gmult_4bit: + stmdb sp!,{r4-r11,lr} + ldrb r12,[r0,#15] + b rem_4bit_get +.Lrem_4bit_got: + and r14,r12,#0xf0 + and r12,r12,#0x0f + mov r3,#14 + + add r7,r1,r12,lsl#4 + ldmia r7,{r4-r7} @ load Htbl[nlo] + ldrb r12,[r0,#14] + + add r11,r1,r14 + and r14,r4,#0xf @ rem + ldmia r11,{r8-r11} @ load Htbl[nhi] + add r14,r14,r14 + eor r4,r8,r4,lsr#4 + ldrh r8,[r2,r14] @ rem_4bit[rem] + eor r4,r4,r5,lsl#28 + eor r5,r9,r5,lsr#4 + eor r5,r5,r6,lsl#28 + eor r6,r10,r6,lsr#4 + eor r6,r6,r7,lsl#28 + eor r7,r11,r7,lsr#4 + and r14,r12,#0xf0 + eor r7,r7,r8,lsl#16 + and r12,r12,#0x0f + +.Loop: + add r11,r1,r12,lsl#4 + and r12,r4,#0xf @ rem + subs r3,r3,#1 + add r12,r12,r12 + ldmia r11,{r8-r11} @ load Htbl[nlo] + eor r4,r8,r4,lsr#4 + eor r4,r4,r5,lsl#28 + eor r5,r9,r5,lsr#4 + eor r5,r5,r6,lsl#28 + ldrh r8,[r2,r12] @ rem_4bit[rem] + eor r6,r10,r6,lsr#4 + ldrbpl r12,[r0,r3] + eor r6,r6,r7,lsl#28 + eor r7,r11,r7,lsr#4 + + add r11,r1,r14 + and r14,r4,#0xf @ rem + eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem] + add r14,r14,r14 + ldmia r11,{r8-r11} @ load Htbl[nhi] + eor r4,r8,r4,lsr#4 + eor r4,r4,r5,lsl#28 + eor r5,r9,r5,lsr#4 + ldrh r8,[r2,r14] @ rem_4bit[rem] + eor r5,r5,r6,lsl#28 + eor r6,r10,r6,lsr#4 + eor r6,r6,r7,lsl#28 + eor r7,r11,r7,lsr#4 + andpl r14,r12,#0xf0 + andpl r12,r12,#0x0f + eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem] + bpl .Loop +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r4,r4 + str r4,[r0,#12] +#elif defined(__ARMEB__) + str r4,[r0,#12] +#else + mov r9,r4,lsr#8 + strb r4,[r0,#12+3] + mov r10,r4,lsr#16 + strb r9,[r0,#12+2] + mov r11,r4,lsr#24 + strb r10,[r0,#12+1] + strb r11,[r0,#12] +#endif + +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r5,r5 + str r5,[r0,#8] +#elif defined(__ARMEB__) + str r5,[r0,#8] +#else + mov r9,r5,lsr#8 + strb r5,[r0,#8+3] + mov r10,r5,lsr#16 + strb r9,[r0,#8+2] + mov r11,r5,lsr#24 + strb r10,[r0,#8+1] + strb r11,[r0,#8] +#endif + +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r6,r6 + str r6,[r0,#4] +#elif defined(__ARMEB__) + str r6,[r0,#4] +#else + mov r9,r6,lsr#8 + strb r6,[r0,#4+3] + mov r10,r6,lsr#16 + strb r9,[r0,#4+2] + mov r11,r6,lsr#24 + strb r10,[r0,#4+1] + strb r11,[r0,#4] +#endif + +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r7,r7 + str r7,[r0,#0] +#elif defined(__ARMEB__) + str r7,[r0,#0] +#else + mov r9,r7,lsr#8 + strb r7,[r0,#0+3] + mov r10,r7,lsr#16 + strb r9,[r0,#0+2] + mov r11,r7,lsr#24 + strb r10,[r0,#0+1] + strb r11,[r0,#0] +#endif + +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r11,pc} +#else + ldmia sp!,{r4-r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size gcm_gmult_4bit,.-gcm_gmult_4bit +#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) +.fpu neon + +.global gcm_gmult_neon +.type gcm_gmult_neon,%function +.align 4 +gcm_gmult_neon: + sub r1,#16 @ point at H in GCM128_CTX + vld1.64 d29,[r0,:64]!@ load Xi + vmov.i32 d5,#0xe1 @ our irreducible polynomial + vld1.64 d28,[r0,:64]! + vshr.u64 d5,#32 + vldmia r1,{d0-d1} @ load H + veor q12,q12 +#ifdef __ARMEL__ + vrev64.8 q14,q14 +#endif + veor q13,q13 + veor q11,q11 + mov r1,#16 + veor q10,q10 + mov r3,#16 + veor d2,d2 + vdup.8 d4,d28[0] @ broadcast lowest byte + b .Linner_neon +.size gcm_gmult_neon,.-gcm_gmult_neon + +.global gcm_ghash_neon +.type gcm_ghash_neon,%function +.align 4 +gcm_ghash_neon: + vld1.64 d21,[r0,:64]! @ load Xi + vmov.i32 d5,#0xe1 @ our irreducible polynomial + vld1.64 d20,[r0,:64]! + vshr.u64 d5,#32 + vldmia r0,{d0-d1} @ load H + veor q12,q12 + nop +#ifdef __ARMEL__ + vrev64.8 q10,q10 +#endif +.Louter_neon: + vld1.64 d29,[r2]! @ load inp + veor q13,q13 + vld1.64 d28,[r2]! + veor q11,q11 + mov r1,#16 +#ifdef __ARMEL__ + vrev64.8 q14,q14 +#endif + veor d2,d2 + veor q14,q10 @ inp^=Xi + veor q10,q10 + vdup.8 d4,d28[0] @ broadcast lowest byte +.Linner_neon: + subs r1,r1,#1 + vmull.p8 q9,d1,d4 @ H.lo·Xi[i] + vmull.p8 q8,d0,d4 @ H.hi·Xi[i] + vext.8 q14,q12,#1 @ IN>>=8 + + veor q10,q13 @ modulo-scheduled part + vshl.i64 d22,#48 + vdup.8 d4,d28[0] @ broadcast lowest byte + veor d3,d18,d20 + + veor d21,d22 + vuzp.8 q9,q8 + vsli.8 d2,d3,#1 @ compose the "carry" byte + vext.8 q10,q12,#1 @ Z>>=8 + + vmull.p8 q11,d2,d5 @ "carry"·0xe1 + vshr.u8 d2,d3,#7 @ save Z's bottom bit + vext.8 q13,q9,q12,#1 @ Qlo>>=8 + veor q10,q8 + bne .Linner_neon + + veor q10,q13 @ modulo-scheduled artefact + vshl.i64 d22,#48 + veor d21,d22 + + @ finalization, normalize Z:Zo + vand d2,d5 @ suffices to mask the bit + vshr.u64 d3,d20,#63 + vshl.i64 q10,#1 + subs r3,#16 + vorr q10,q1 @ Z=Z:Zo<<1 + bne .Louter_neon + +#ifdef __ARMEL__ + vrev64.8 q10,q10 +#endif + sub r0,#16 + vst1.64 d21,[r0,:64]! @ write out Xi + vst1.64 d20,[r0,:64] + + .word 0xe12fff1e +.size gcm_ghash_neon,.-gcm_ghash_neon +#endif +.asciz "GHASH for ARMv4/NEON, CRYPTOGAMS by " +.align 2 +#if defined(HAVE_GNU_STACK) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/ext/libressl/crypto/modes/ghash-elf-x86_64.S b/ext/libressl/crypto/modes/ghash-elf-x86_64.S new file mode 100644 index 0000000..5f31626 --- /dev/null +++ b/ext/libressl/crypto/modes/ghash-elf-x86_64.S @@ -0,0 +1,1030 @@ +#include "x86_arch.h" +.text + +.globl gcm_gmult_4bit +.type gcm_gmult_4bit,@function +.align 16 +gcm_gmult_4bit: + pushq %rbx + pushq %rbp + pushq %r12 +.Lgmult_prologue: + + movzbq 15(%rdi),%r8 + leaq .Lrem_4bit(%rip),%r11 + xorq %rax,%rax + xorq %rbx,%rbx + movb %r8b,%al + movb %r8b,%bl + shlb $4,%al + movq $14,%rcx + movq 8(%rsi,%rax,1),%r8 + movq (%rsi,%rax,1),%r9 + andb $240,%bl + movq %r8,%rdx + jmp .Loop1 + +.align 16 +.Loop1: + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + movb (%rdi,%rcx,1),%al + shrq $4,%r9 + xorq 8(%rsi,%rbx,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rbx,1),%r9 + movb %al,%bl + xorq (%r11,%rdx,8),%r9 + movq %r8,%rdx + shlb $4,%al + xorq %r10,%r8 + decq %rcx + js .Lbreak1 + + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + shrq $4,%r9 + xorq 8(%rsi,%rax,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rax,1),%r9 + andb $240,%bl + xorq (%r11,%rdx,8),%r9 + movq %r8,%rdx + xorq %r10,%r8 + jmp .Loop1 + +.align 16 +.Lbreak1: + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + shrq $4,%r9 + xorq 8(%rsi,%rax,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rax,1),%r9 + andb $240,%bl + xorq (%r11,%rdx,8),%r9 + movq %r8,%rdx + xorq %r10,%r8 + + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + shrq $4,%r9 + xorq 8(%rsi,%rbx,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rbx,1),%r9 + xorq %r10,%r8 + xorq (%r11,%rdx,8),%r9 + + bswapq %r8 + bswapq %r9 + movq %r8,8(%rdi) + movq %r9,(%rdi) + + movq 16(%rsp),%rbx + leaq 24(%rsp),%rsp +.Lgmult_epilogue: + retq +.size gcm_gmult_4bit,.-gcm_gmult_4bit +.globl gcm_ghash_4bit +.type gcm_ghash_4bit,@function +.align 16 +gcm_ghash_4bit: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $280,%rsp +.Lghash_prologue: + movq %rdx,%r14 + movq %rcx,%r15 + subq $-128,%rsi + leaq 16+128(%rsp),%rbp + xorl %edx,%edx + movq 0+0-128(%rsi),%r8 + movq 0+8-128(%rsi),%rax + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq 16+0-128(%rsi),%r9 + shlb $4,%dl + movq 16+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,0(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,0(%rbp) + movq 32+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,0-128(%rbp) + movq 32+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,1(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,8(%rbp) + movq 48+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,8-128(%rbp) + movq 48+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,2(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,16(%rbp) + movq 64+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,16-128(%rbp) + movq 64+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,3(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,24(%rbp) + movq 80+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,24-128(%rbp) + movq 80+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,4(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,32(%rbp) + movq 96+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,32-128(%rbp) + movq 96+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,5(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,40(%rbp) + movq 112+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,40-128(%rbp) + movq 112+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,6(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,48(%rbp) + movq 128+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,48-128(%rbp) + movq 128+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,7(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,56(%rbp) + movq 144+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,56-128(%rbp) + movq 144+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,8(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,64(%rbp) + movq 160+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,64-128(%rbp) + movq 160+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,9(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,72(%rbp) + movq 176+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,72-128(%rbp) + movq 176+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,10(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,80(%rbp) + movq 192+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,80-128(%rbp) + movq 192+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,11(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,88(%rbp) + movq 208+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,88-128(%rbp) + movq 208+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,12(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,96(%rbp) + movq 224+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,96-128(%rbp) + movq 224+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,13(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,104(%rbp) + movq 240+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,104-128(%rbp) + movq 240+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,14(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,112(%rbp) + shlb $4,%dl + movq %rax,112-128(%rbp) + shlq $60,%r10 + movb %dl,15(%rsp) + orq %r10,%rbx + movq %r9,120(%rbp) + movq %rbx,120-128(%rbp) + addq $-128,%rsi + movq 8(%rdi),%r8 + movq 0(%rdi),%r9 + addq %r14,%r15 + leaq .Lrem_8bit(%rip),%r11 + jmp .Louter_loop +.align 16 +.Louter_loop: + xorq (%r14),%r9 + movq 8(%r14),%rdx + leaq 16(%r14),%r14 + xorq %r8,%rdx + movq %r9,(%rdi) + movq %rdx,8(%rdi) + shrq $32,%rdx + xorq %rax,%rax + roll $8,%edx + movb %dl,%al + movzbl %dl,%ebx + shlb $4,%al + shrl $4,%ebx + roll $8,%edx + movq 8(%rsi,%rax,1),%r8 + movq (%rsi,%rax,1),%r9 + movb %dl,%al + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + xorq %r8,%r12 + movq %r9,%r10 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl 8(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl 4(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl 0(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + andl $240,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl -4(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + movzwq (%r11,%r12,2),%r12 + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + shlq $48,%r12 + xorq %r10,%r8 + xorq %r12,%r9 + movzbq %r8b,%r13 + shrq $4,%r8 + movq %r9,%r10 + shlb $4,%r13b + shrq $4,%r9 + xorq 8(%rsi,%rcx,1),%r8 + movzwq (%r11,%r13,2),%r13 + shlq $60,%r10 + xorq (%rsi,%rcx,1),%r9 + xorq %r10,%r8 + shlq $48,%r13 + bswapq %r8 + xorq %r13,%r9 + bswapq %r9 + cmpq %r15,%r14 + jb .Louter_loop + movq %r8,8(%rdi) + movq %r9,(%rdi) + + leaq 280(%rsp),%rsi + movq 0(%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lghash_epilogue: + retq +.size gcm_ghash_4bit,.-gcm_ghash_4bit +.globl gcm_init_clmul +.type gcm_init_clmul,@function +.align 16 +gcm_init_clmul: + movdqu (%rsi),%xmm2 + pshufd $78,%xmm2,%xmm2 + + + pshufd $255,%xmm2,%xmm4 + movdqa %xmm2,%xmm3 + psllq $1,%xmm2 + pxor %xmm5,%xmm5 + psrlq $63,%xmm3 + pcmpgtd %xmm4,%xmm5 + pslldq $8,%xmm3 + por %xmm3,%xmm2 + + + pand .L0x1c2_polynomial(%rip),%xmm5 + pxor %xmm5,%xmm2 + + + movdqa %xmm2,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm4 + pslldq $8,%xmm0 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + movdqu %xmm2,(%rdi) + movdqu %xmm0,16(%rdi) + retq +.size gcm_init_clmul,.-gcm_init_clmul +.globl gcm_gmult_clmul +.type gcm_gmult_clmul,@function +.align 16 +gcm_gmult_clmul: + movdqu (%rdi),%xmm0 + movdqa .Lbswap_mask(%rip),%xmm5 + movdqu (%rsi),%xmm2 +.byte 102,15,56,0,197 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm4 + pslldq $8,%xmm0 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,197 + movdqu %xmm0,(%rdi) + retq +.size gcm_gmult_clmul,.-gcm_gmult_clmul +.globl gcm_ghash_clmul +.type gcm_ghash_clmul,@function +.align 16 +gcm_ghash_clmul: + movdqa .Lbswap_mask(%rip),%xmm5 + + movdqu (%rdi),%xmm0 + movdqu (%rsi),%xmm2 +.byte 102,15,56,0,197 + + subq $16,%rcx + jz .Lodd_tail + + movdqu 16(%rsi),%xmm8 + + + + + + movdqu (%rdx),%xmm3 + movdqu 16(%rdx),%xmm6 +.byte 102,15,56,0,221 +.byte 102,15,56,0,245 + pxor %xmm3,%xmm0 + movdqa %xmm6,%xmm7 + pshufd $78,%xmm6,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm6,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,242,0 +.byte 102,15,58,68,250,17 +.byte 102,15,58,68,220,0 + pxor %xmm6,%xmm3 + pxor %xmm7,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm7 + pxor %xmm4,%xmm6 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm8,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm8,%xmm4 + + leaq 32(%rdx),%rdx + subq $32,%rcx + jbe .Leven_tail + +.Lmod_loop: +.byte 102,65,15,58,68,192,0 +.byte 102,65,15,58,68,200,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqu (%rdx),%xmm3 + pxor %xmm6,%xmm0 + pxor %xmm7,%xmm1 + + movdqu 16(%rdx),%xmm6 +.byte 102,15,56,0,221 +.byte 102,15,56,0,245 + + movdqa %xmm6,%xmm7 + pshufd $78,%xmm6,%xmm9 + pshufd $78,%xmm2,%xmm10 + pxor %xmm6,%xmm9 + pxor %xmm2,%xmm10 + pxor %xmm3,%xmm1 + + movdqa %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 +.byte 102,15,58,68,242,0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm4 + pslldq $8,%xmm0 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 + +.byte 102,15,58,68,250,17 + movdqa %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + +.byte 102,69,15,58,68,202,0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm8,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm8,%xmm4 + + pxor %xmm6,%xmm9 + pxor %xmm7,%xmm9 + movdqa %xmm9,%xmm10 + psrldq $8,%xmm9 + pslldq $8,%xmm10 + pxor %xmm9,%xmm7 + pxor %xmm10,%xmm6 + + leaq 32(%rdx),%rdx + subq $32,%rcx + ja .Lmod_loop + +.Leven_tail: +.byte 102,65,15,58,68,192,0 +.byte 102,65,15,58,68,200,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm6,%xmm0 + pxor %xmm7,%xmm1 + + movdqa %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm4 + pslldq $8,%xmm0 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + testq %rcx,%rcx + jnz .Ldone + +.Lodd_tail: + movdqu (%rdx),%xmm3 +.byte 102,15,56,0,221 + pxor %xmm3,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm4 + pslldq $8,%xmm0 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 +.Ldone: +.byte 102,15,56,0,197 + movdqu %xmm0,(%rdi) + retq +.LSEH_end_gcm_ghash_clmul: +.size gcm_ghash_clmul,.-gcm_ghash_clmul +.align 64 +.Lbswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.L0x1c2_polynomial: +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +.align 64 +.type .Lrem_4bit,@object +.Lrem_4bit: +.long 0,0,0,471859200,0,943718400,0,610271232 +.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 +.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 +.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 +.type .Lrem_8bit,@object +.Lrem_8bit: +.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E +.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E +.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E +.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E +.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E +.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E +.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E +.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E +.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE +.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE +.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE +.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE +.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E +.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E +.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE +.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE +.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E +.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E +.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E +.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E +.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E +.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E +.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E +.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E +.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE +.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE +.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE +.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE +.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E +.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E +.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE +.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE + +.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 +#if defined(HAVE_GNU_STACK) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/ext/libressl/crypto/modes/ghash-macosx-x86_64.S b/ext/libressl/crypto/modes/ghash-macosx-x86_64.S new file mode 100644 index 0000000..e6840a7 --- /dev/null +++ b/ext/libressl/crypto/modes/ghash-macosx-x86_64.S @@ -0,0 +1,1027 @@ +#include "x86_arch.h" +.text + +.globl _gcm_gmult_4bit + +.p2align 4 +_gcm_gmult_4bit: + pushq %rbx + pushq %rbp + pushq %r12 +L$gmult_prologue: + + movzbq 15(%rdi),%r8 + leaq L$rem_4bit(%rip),%r11 + xorq %rax,%rax + xorq %rbx,%rbx + movb %r8b,%al + movb %r8b,%bl + shlb $4,%al + movq $14,%rcx + movq 8(%rsi,%rax,1),%r8 + movq (%rsi,%rax,1),%r9 + andb $240,%bl + movq %r8,%rdx + jmp L$oop1 + +.p2align 4 +L$oop1: + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + movb (%rdi,%rcx,1),%al + shrq $4,%r9 + xorq 8(%rsi,%rbx,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rbx,1),%r9 + movb %al,%bl + xorq (%r11,%rdx,8),%r9 + movq %r8,%rdx + shlb $4,%al + xorq %r10,%r8 + decq %rcx + js L$break1 + + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + shrq $4,%r9 + xorq 8(%rsi,%rax,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rax,1),%r9 + andb $240,%bl + xorq (%r11,%rdx,8),%r9 + movq %r8,%rdx + xorq %r10,%r8 + jmp L$oop1 + +.p2align 4 +L$break1: + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + shrq $4,%r9 + xorq 8(%rsi,%rax,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rax,1),%r9 + andb $240,%bl + xorq (%r11,%rdx,8),%r9 + movq %r8,%rdx + xorq %r10,%r8 + + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + shrq $4,%r9 + xorq 8(%rsi,%rbx,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rbx,1),%r9 + xorq %r10,%r8 + xorq (%r11,%rdx,8),%r9 + + bswapq %r8 + bswapq %r9 + movq %r8,8(%rdi) + movq %r9,(%rdi) + + movq 16(%rsp),%rbx + leaq 24(%rsp),%rsp +L$gmult_epilogue: + retq + +.globl _gcm_ghash_4bit + +.p2align 4 +_gcm_ghash_4bit: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $280,%rsp +L$ghash_prologue: + movq %rdx,%r14 + movq %rcx,%r15 + subq $-128,%rsi + leaq 16+128(%rsp),%rbp + xorl %edx,%edx + movq 0+0-128(%rsi),%r8 + movq 0+8-128(%rsi),%rax + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq 16+0-128(%rsi),%r9 + shlb $4,%dl + movq 16+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,0(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,0(%rbp) + movq 32+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,0-128(%rbp) + movq 32+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,1(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,8(%rbp) + movq 48+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,8-128(%rbp) + movq 48+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,2(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,16(%rbp) + movq 64+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,16-128(%rbp) + movq 64+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,3(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,24(%rbp) + movq 80+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,24-128(%rbp) + movq 80+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,4(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,32(%rbp) + movq 96+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,32-128(%rbp) + movq 96+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,5(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,40(%rbp) + movq 112+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,40-128(%rbp) + movq 112+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,6(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,48(%rbp) + movq 128+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,48-128(%rbp) + movq 128+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,7(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,56(%rbp) + movq 144+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,56-128(%rbp) + movq 144+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,8(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,64(%rbp) + movq 160+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,64-128(%rbp) + movq 160+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,9(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,72(%rbp) + movq 176+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,72-128(%rbp) + movq 176+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,10(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,80(%rbp) + movq 192+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,80-128(%rbp) + movq 192+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,11(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,88(%rbp) + movq 208+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,88-128(%rbp) + movq 208+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,12(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,96(%rbp) + movq 224+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,96-128(%rbp) + movq 224+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,13(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,104(%rbp) + movq 240+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,104-128(%rbp) + movq 240+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,14(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,112(%rbp) + shlb $4,%dl + movq %rax,112-128(%rbp) + shlq $60,%r10 + movb %dl,15(%rsp) + orq %r10,%rbx + movq %r9,120(%rbp) + movq %rbx,120-128(%rbp) + addq $-128,%rsi + movq 8(%rdi),%r8 + movq 0(%rdi),%r9 + addq %r14,%r15 + leaq L$rem_8bit(%rip),%r11 + jmp L$outer_loop +.p2align 4 +L$outer_loop: + xorq (%r14),%r9 + movq 8(%r14),%rdx + leaq 16(%r14),%r14 + xorq %r8,%rdx + movq %r9,(%rdi) + movq %rdx,8(%rdi) + shrq $32,%rdx + xorq %rax,%rax + roll $8,%edx + movb %dl,%al + movzbl %dl,%ebx + shlb $4,%al + shrl $4,%ebx + roll $8,%edx + movq 8(%rsi,%rax,1),%r8 + movq (%rsi,%rax,1),%r9 + movb %dl,%al + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + xorq %r8,%r12 + movq %r9,%r10 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl 8(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl 4(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl 0(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + andl $240,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl -4(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + movzwq (%r11,%r12,2),%r12 + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + shlq $48,%r12 + xorq %r10,%r8 + xorq %r12,%r9 + movzbq %r8b,%r13 + shrq $4,%r8 + movq %r9,%r10 + shlb $4,%r13b + shrq $4,%r9 + xorq 8(%rsi,%rcx,1),%r8 + movzwq (%r11,%r13,2),%r13 + shlq $60,%r10 + xorq (%rsi,%rcx,1),%r9 + xorq %r10,%r8 + shlq $48,%r13 + bswapq %r8 + xorq %r13,%r9 + bswapq %r9 + cmpq %r15,%r14 + jb L$outer_loop + movq %r8,8(%rdi) + movq %r9,(%rdi) + + leaq 280(%rsp),%rsi + movq 0(%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$ghash_epilogue: + retq + +.globl _gcm_init_clmul + +.p2align 4 +_gcm_init_clmul: + movdqu (%rsi),%xmm2 + pshufd $78,%xmm2,%xmm2 + + + pshufd $255,%xmm2,%xmm4 + movdqa %xmm2,%xmm3 + psllq $1,%xmm2 + pxor %xmm5,%xmm5 + psrlq $63,%xmm3 + pcmpgtd %xmm4,%xmm5 + pslldq $8,%xmm3 + por %xmm3,%xmm2 + + + pand L$0x1c2_polynomial(%rip),%xmm5 + pxor %xmm5,%xmm2 + + + movdqa %xmm2,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm4 + pslldq $8,%xmm0 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + movdqu %xmm2,(%rdi) + movdqu %xmm0,16(%rdi) + retq + +.globl _gcm_gmult_clmul + +.p2align 4 +_gcm_gmult_clmul: + movdqu (%rdi),%xmm0 + movdqa L$bswap_mask(%rip),%xmm5 + movdqu (%rsi),%xmm2 +.byte 102,15,56,0,197 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm4 + pslldq $8,%xmm0 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,197 + movdqu %xmm0,(%rdi) + retq + +.globl _gcm_ghash_clmul + +.p2align 4 +_gcm_ghash_clmul: + movdqa L$bswap_mask(%rip),%xmm5 + + movdqu (%rdi),%xmm0 + movdqu (%rsi),%xmm2 +.byte 102,15,56,0,197 + + subq $16,%rcx + jz L$odd_tail + + movdqu 16(%rsi),%xmm8 + + + + + + movdqu (%rdx),%xmm3 + movdqu 16(%rdx),%xmm6 +.byte 102,15,56,0,221 +.byte 102,15,56,0,245 + pxor %xmm3,%xmm0 + movdqa %xmm6,%xmm7 + pshufd $78,%xmm6,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm6,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,242,0 +.byte 102,15,58,68,250,17 +.byte 102,15,58,68,220,0 + pxor %xmm6,%xmm3 + pxor %xmm7,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm7 + pxor %xmm4,%xmm6 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm8,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm8,%xmm4 + + leaq 32(%rdx),%rdx + subq $32,%rcx + jbe L$even_tail + +L$mod_loop: +.byte 102,65,15,58,68,192,0 +.byte 102,65,15,58,68,200,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqu (%rdx),%xmm3 + pxor %xmm6,%xmm0 + pxor %xmm7,%xmm1 + + movdqu 16(%rdx),%xmm6 +.byte 102,15,56,0,221 +.byte 102,15,56,0,245 + + movdqa %xmm6,%xmm7 + pshufd $78,%xmm6,%xmm9 + pshufd $78,%xmm2,%xmm10 + pxor %xmm6,%xmm9 + pxor %xmm2,%xmm10 + pxor %xmm3,%xmm1 + + movdqa %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 +.byte 102,15,58,68,242,0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm4 + pslldq $8,%xmm0 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 + +.byte 102,15,58,68,250,17 + movdqa %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + +.byte 102,69,15,58,68,202,0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm8,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm8,%xmm4 + + pxor %xmm6,%xmm9 + pxor %xmm7,%xmm9 + movdqa %xmm9,%xmm10 + psrldq $8,%xmm9 + pslldq $8,%xmm10 + pxor %xmm9,%xmm7 + pxor %xmm10,%xmm6 + + leaq 32(%rdx),%rdx + subq $32,%rcx + ja L$mod_loop + +L$even_tail: +.byte 102,65,15,58,68,192,0 +.byte 102,65,15,58,68,200,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm6,%xmm0 + pxor %xmm7,%xmm1 + + movdqa %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm4 + pslldq $8,%xmm0 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + testq %rcx,%rcx + jnz L$done + +L$odd_tail: + movdqu (%rdx),%xmm3 +.byte 102,15,56,0,221 + pxor %xmm3,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm4 + pslldq $8,%xmm0 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 +L$done: +.byte 102,15,56,0,197 + movdqu %xmm0,(%rdi) + retq +L$SEH_end_gcm_ghash_clmul: + +.p2align 6 +L$bswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +L$0x1c2_polynomial: +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +.p2align 6 + +L$rem_4bit: +.long 0,0,0,471859200,0,943718400,0,610271232 +.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 +.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 +.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 + +L$rem_8bit: +.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E +.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E +.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E +.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E +.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E +.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E +.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E +.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E +.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE +.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE +.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE +.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE +.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E +.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E +.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE +.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE +.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E +.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E +.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E +.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E +.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E +.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E +.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E +.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E +.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE +.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE +.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE +.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE +.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E +.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E +.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE +.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE + +.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 diff --git a/ext/libressl/crypto/modes/ghash-masm-x86_64.S b/ext/libressl/crypto/modes/ghash-masm-x86_64.S new file mode 100644 index 0000000..ffdc1b5 --- /dev/null +++ b/ext/libressl/crypto/modes/ghash-masm-x86_64.S @@ -0,0 +1,1256 @@ +; 1 "crypto/modes/ghash-masm-x86_64.S.tmp" +; 1 "" 1 +; 1 "" 3 +; 340 "" 3 +; 1 "" 1 +; 1 "" 2 +; 1 "crypto/modes/ghash-masm-x86_64.S.tmp" 2 +OPTION DOTNAME + +; 1 "./crypto/x86_arch.h" 1 + + +; 16 "./crypto/x86_arch.h" + + + + + + + + + +; 40 "./crypto/x86_arch.h" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +; 3 "crypto/modes/ghash-masm-x86_64.S.tmp" 2 +.text$ SEGMENT ALIGN(64) 'CODE' + +PUBLIC gcm_gmult_4bit + +ALIGN 16 +gcm_gmult_4bit PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_gcm_gmult_4bit:: + mov rdi,rcx + mov rsi,rdx + + + push rbx + push rbp + push r12 +$L$gmult_prologue:: + + movzx r8,BYTE PTR[15+rdi] + lea r11,QWORD PTR[$L$rem_4bit] + xor rax,rax + xor rbx,rbx + mov al,r8b + mov bl,r8b + shl al,4 + mov rcx,14 + mov r8,QWORD PTR[8+rax*1+rsi] + mov r9,QWORD PTR[rax*1+rsi] + and bl,0f0h + mov rdx,r8 + jmp $L$oop1 + +ALIGN 16 +$L$oop1:: + shr r8,4 + and rdx,0fh + mov r10,r9 + mov al,BYTE PTR[rcx*1+rdi] + shr r9,4 + xor r8,QWORD PTR[8+rbx*1+rsi] + shl r10,60 + xor r9,QWORD PTR[rbx*1+rsi] + mov bl,al + xor r9,QWORD PTR[rdx*8+r11] + mov rdx,r8 + shl al,4 + xor r8,r10 + dec rcx + js $L$break1 + + shr r8,4 + and rdx,0fh + mov r10,r9 + shr r9,4 + xor r8,QWORD PTR[8+rax*1+rsi] + shl r10,60 + xor r9,QWORD PTR[rax*1+rsi] + and bl,0f0h + xor r9,QWORD PTR[rdx*8+r11] + mov rdx,r8 + xor r8,r10 + jmp $L$oop1 + +ALIGN 16 +$L$break1:: + shr r8,4 + and rdx,0fh + mov r10,r9 + shr r9,4 + xor r8,QWORD PTR[8+rax*1+rsi] + shl r10,60 + xor r9,QWORD PTR[rax*1+rsi] + and bl,0f0h + xor r9,QWORD PTR[rdx*8+r11] + mov rdx,r8 + xor r8,r10 + + shr r8,4 + and rdx,0fh + mov r10,r9 + shr r9,4 + xor r8,QWORD PTR[8+rbx*1+rsi] + shl r10,60 + xor r9,QWORD PTR[rbx*1+rsi] + xor r8,r10 + xor r9,QWORD PTR[rdx*8+r11] + + bswap r8 + bswap r9 + mov QWORD PTR[8+rdi],r8 + mov QWORD PTR[rdi],r9 + + mov rbx,QWORD PTR[16+rsp] + lea rsp,QWORD PTR[24+rsp] +$L$gmult_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_gcm_gmult_4bit:: +gcm_gmult_4bit ENDP +PUBLIC gcm_ghash_4bit + +ALIGN 16 +gcm_ghash_4bit PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_gcm_ghash_4bit:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + sub rsp,280 +$L$ghash_prologue:: + mov r14,rdx + mov r15,rcx + sub rsi,-128 + lea rbp,QWORD PTR[((16+128))+rsp] + xor edx,edx + mov r8,QWORD PTR[((0+0-128))+rsi] + mov rax,QWORD PTR[((0+8-128))+rsi] + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov r9,QWORD PTR[((16+0-128))+rsi] + shl dl,4 + mov rbx,QWORD PTR[((16+8-128))+rsi] + shl r10,60 + mov BYTE PTR[rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD PTR[rbp],r8 + mov r8,QWORD PTR[((32+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((0-128))+rbp],rax + mov rax,QWORD PTR[((32+8-128))+rsi] + shl r10,60 + mov BYTE PTR[1+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD PTR[8+rbp],r9 + mov r9,QWORD PTR[((48+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((8-128))+rbp],rbx + mov rbx,QWORD PTR[((48+8-128))+rsi] + shl r10,60 + mov BYTE PTR[2+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD PTR[16+rbp],r8 + mov r8,QWORD PTR[((64+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((16-128))+rbp],rax + mov rax,QWORD PTR[((64+8-128))+rsi] + shl r10,60 + mov BYTE PTR[3+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD PTR[24+rbp],r9 + mov r9,QWORD PTR[((80+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((24-128))+rbp],rbx + mov rbx,QWORD PTR[((80+8-128))+rsi] + shl r10,60 + mov BYTE PTR[4+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD PTR[32+rbp],r8 + mov r8,QWORD PTR[((96+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((32-128))+rbp],rax + mov rax,QWORD PTR[((96+8-128))+rsi] + shl r10,60 + mov BYTE PTR[5+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD PTR[40+rbp],r9 + mov r9,QWORD PTR[((112+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((40-128))+rbp],rbx + mov rbx,QWORD PTR[((112+8-128))+rsi] + shl r10,60 + mov BYTE PTR[6+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD PTR[48+rbp],r8 + mov r8,QWORD PTR[((128+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((48-128))+rbp],rax + mov rax,QWORD PTR[((128+8-128))+rsi] + shl r10,60 + mov BYTE PTR[7+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD PTR[56+rbp],r9 + mov r9,QWORD PTR[((144+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((56-128))+rbp],rbx + mov rbx,QWORD PTR[((144+8-128))+rsi] + shl r10,60 + mov BYTE PTR[8+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD PTR[64+rbp],r8 + mov r8,QWORD PTR[((160+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((64-128))+rbp],rax + mov rax,QWORD PTR[((160+8-128))+rsi] + shl r10,60 + mov BYTE PTR[9+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD PTR[72+rbp],r9 + mov r9,QWORD PTR[((176+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((72-128))+rbp],rbx + mov rbx,QWORD PTR[((176+8-128))+rsi] + shl r10,60 + mov BYTE PTR[10+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD PTR[80+rbp],r8 + mov r8,QWORD PTR[((192+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((80-128))+rbp],rax + mov rax,QWORD PTR[((192+8-128))+rsi] + shl r10,60 + mov BYTE PTR[11+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD PTR[88+rbp],r9 + mov r9,QWORD PTR[((208+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((88-128))+rbp],rbx + mov rbx,QWORD PTR[((208+8-128))+rsi] + shl r10,60 + mov BYTE PTR[12+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD PTR[96+rbp],r8 + mov r8,QWORD PTR[((224+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((96-128))+rbp],rax + mov rax,QWORD PTR[((224+8-128))+rsi] + shl r10,60 + mov BYTE PTR[13+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD PTR[104+rbp],r9 + mov r9,QWORD PTR[((240+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((104-128))+rbp],rbx + mov rbx,QWORD PTR[((240+8-128))+rsi] + shl r10,60 + mov BYTE PTR[14+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD PTR[112+rbp],r8 + shl dl,4 + mov QWORD PTR[((112-128))+rbp],rax + shl r10,60 + mov BYTE PTR[15+rsp],dl + or rbx,r10 + mov QWORD PTR[120+rbp],r9 + mov QWORD PTR[((120-128))+rbp],rbx + add rsi,-128 + mov r8,QWORD PTR[8+rdi] + mov r9,QWORD PTR[rdi] + add r15,r14 + lea r11,QWORD PTR[$L$rem_8bit] + jmp $L$outer_loop +ALIGN 16 +$L$outer_loop:: + xor r9,QWORD PTR[r14] + mov rdx,QWORD PTR[8+r14] + lea r14,QWORD PTR[16+r14] + xor rdx,r8 + mov QWORD PTR[rdi],r9 + mov QWORD PTR[8+rdi],rdx + shr rdx,32 + xor rax,rax + rol edx,8 + mov al,dl + movzx ebx,dl + shl al,4 + shr ebx,4 + rol edx,8 + mov r8,QWORD PTR[8+rax*1+rsi] + mov r9,QWORD PTR[rax*1+rsi] + mov al,dl + movzx ecx,dl + shl al,4 + movzx r12,BYTE PTR[rbx*1+rsp] + shr ecx,4 + xor r12,r8 + mov r10,r9 + shr r8,8 + movzx r12,r12b + shr r9,8 + xor r8,QWORD PTR[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rbx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD PTR[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE PTR[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD PTR[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rcx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD PTR[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE PTR[rbx*1+rsp] + shr ecx,4 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + mov edx,DWORD PTR[8+rdi] + shr r9,8 + xor r8,QWORD PTR[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rbx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD PTR[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE PTR[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD PTR[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rcx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD PTR[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE PTR[rbx*1+rsp] + shr ecx,4 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + shr r9,8 + xor r8,QWORD PTR[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rbx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD PTR[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE PTR[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD PTR[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rcx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD PTR[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE PTR[rbx*1+rsp] + shr ecx,4 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + mov edx,DWORD PTR[4+rdi] + shr r9,8 + xor r8,QWORD PTR[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rbx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD PTR[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE PTR[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD PTR[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rcx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD PTR[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE PTR[rbx*1+rsp] + shr ecx,4 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + shr r9,8 + xor r8,QWORD PTR[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rbx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD PTR[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE PTR[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD PTR[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rcx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD PTR[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE PTR[rbx*1+rsp] + shr ecx,4 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + mov edx,DWORD PTR[rdi] + shr r9,8 + xor r8,QWORD PTR[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rbx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD PTR[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE PTR[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD PTR[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rcx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD PTR[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE PTR[rbx*1+rsp] + shr ecx,4 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + shr r9,8 + xor r8,QWORD PTR[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rbx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD PTR[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE PTR[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD PTR[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rcx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD PTR[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE PTR[rbx*1+rsp] + and ecx,240 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + mov edx,DWORD PTR[((-4))+rdi] + shr r9,8 + xor r8,QWORD PTR[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rbx*8+rbp] + movzx r12,WORD PTR[r12*2+r11] + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + shl r12,48 + xor r8,r10 + xor r9,r12 + movzx r13,r8b + shr r8,4 + mov r10,r9 + shl r13b,4 + shr r9,4 + xor r8,QWORD PTR[8+rcx*1+rsi] + movzx r13,WORD PTR[r13*2+r11] + shl r10,60 + xor r9,QWORD PTR[rcx*1+rsi] + xor r8,r10 + shl r13,48 + bswap r8 + xor r9,r13 + bswap r9 + cmp r14,r15 + jb $L$outer_loop + mov QWORD PTR[8+rdi],r8 + mov QWORD PTR[rdi],r9 + + lea rsi,QWORD PTR[280+rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$ghash_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_gcm_ghash_4bit:: +gcm_ghash_4bit ENDP +PUBLIC gcm_init_clmul + +ALIGN 16 +gcm_init_clmul PROC PUBLIC + movdqu xmm2,XMMWORD PTR[rdx] + pshufd xmm2,xmm2,78 + + + pshufd xmm4,xmm2,255 + movdqa xmm3,xmm2 + psllq xmm2,1 + pxor xmm5,xmm5 + psrlq xmm3,63 + pcmpgtd xmm5,xmm4 + pslldq xmm3,8 + por xmm2,xmm3 + + + pand xmm5,XMMWORD PTR[$L$0x1c2_polynomial] + pxor xmm2,xmm5 + + + movdqa xmm0,xmm2 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pshufd xmm4,xmm2,78 + pxor xmm3,xmm0 + pxor xmm4,xmm2 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,220,0 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + + movdqa xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,5 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm4,xmm0 + pslldq xmm0,8 + psrldq xmm4,8 + pxor xmm0,xmm3 + pxor xmm1,xmm4 + + + movdqa xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm4 + pxor xmm4,xmm1 + psrlq xmm0,1 + pxor xmm0,xmm4 + movdqu XMMWORD PTR[rcx],xmm2 + movdqu XMMWORD PTR[16+rcx],xmm0 + DB 0F3h,0C3h ;repret +gcm_init_clmul ENDP +PUBLIC gcm_gmult_clmul + +ALIGN 16 +gcm_gmult_clmul PROC PUBLIC + movdqu xmm0,XMMWORD PTR[rcx] + movdqa xmm5,XMMWORD PTR[$L$bswap_mask] + movdqu xmm2,XMMWORD PTR[rdx] +DB 102,15,56,0,197 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pshufd xmm4,xmm2,78 + pxor xmm3,xmm0 + pxor xmm4,xmm2 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,220,0 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + + movdqa xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,5 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm4,xmm0 + pslldq xmm0,8 + psrldq xmm4,8 + pxor xmm0,xmm3 + pxor xmm1,xmm4 + + + movdqa xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm4 + pxor xmm4,xmm1 + psrlq xmm0,1 + pxor xmm0,xmm4 +DB 102,15,56,0,197 + movdqu XMMWORD PTR[rcx],xmm0 + DB 0F3h,0C3h ;repret +gcm_gmult_clmul ENDP +PUBLIC gcm_ghash_clmul + +ALIGN 16 +gcm_ghash_clmul PROC PUBLIC +$L$SEH_begin_gcm_ghash_clmul:: + +DB 048h,083h,0ech,058h +DB 00fh,029h,034h,024h +DB 00fh,029h,07ch,024h,010h +DB 044h,00fh,029h,044h,024h,020h +DB 044h,00fh,029h,04ch,024h,030h +DB 044h,00fh,029h,054h,024h,040h + movdqa xmm5,XMMWORD PTR[$L$bswap_mask] + + movdqu xmm0,XMMWORD PTR[rcx] + movdqu xmm2,XMMWORD PTR[rdx] +DB 102,15,56,0,197 + + sub r9,010h + jz $L$odd_tail + + movdqu xmm8,XMMWORD PTR[16+rdx] + + + + + + movdqu xmm3,XMMWORD PTR[r8] + movdqu xmm6,XMMWORD PTR[16+r8] +DB 102,15,56,0,221 +DB 102,15,56,0,245 + pxor xmm0,xmm3 + movdqa xmm7,xmm6 + pshufd xmm3,xmm6,78 + pshufd xmm4,xmm2,78 + pxor xmm3,xmm6 + pxor xmm4,xmm2 +DB 102,15,58,68,242,0 +DB 102,15,58,68,250,17 +DB 102,15,58,68,220,0 + pxor xmm3,xmm6 + pxor xmm3,xmm7 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm7,xmm3 + pxor xmm6,xmm4 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pshufd xmm4,xmm8,78 + pxor xmm3,xmm0 + pxor xmm4,xmm8 + + lea r8,QWORD PTR[32+r8] + sub r9,020h + jbe $L$even_tail + +$L$mod_loop:: +DB 102,65,15,58,68,192,0 +DB 102,65,15,58,68,200,17 +DB 102,15,58,68,220,0 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + movdqu xmm3,XMMWORD PTR[r8] + pxor xmm0,xmm6 + pxor xmm1,xmm7 + + movdqu xmm6,XMMWORD PTR[16+r8] +DB 102,15,56,0,221 +DB 102,15,56,0,245 + + movdqa xmm7,xmm6 + pshufd xmm9,xmm6,78 + pshufd xmm10,xmm2,78 + pxor xmm9,xmm6 + pxor xmm10,xmm2 + pxor xmm1,xmm3 + + movdqa xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,5 + pxor xmm0,xmm3 +DB 102,15,58,68,242,0 + psllq xmm0,57 + movdqa xmm4,xmm0 + pslldq xmm0,8 + psrldq xmm4,8 + pxor xmm0,xmm3 + pxor xmm1,xmm4 + +DB 102,15,58,68,250,17 + movdqa xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm4 + pxor xmm4,xmm1 + psrlq xmm0,1 + pxor xmm0,xmm4 + +DB 102,69,15,58,68,202,0 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pshufd xmm4,xmm8,78 + pxor xmm3,xmm0 + pxor xmm4,xmm8 + + pxor xmm9,xmm6 + pxor xmm9,xmm7 + movdqa xmm10,xmm9 + psrldq xmm9,8 + pslldq xmm10,8 + pxor xmm7,xmm9 + pxor xmm6,xmm10 + + lea r8,QWORD PTR[32+r8] + sub r9,020h + ja $L$mod_loop + +$L$even_tail:: +DB 102,65,15,58,68,192,0 +DB 102,65,15,58,68,200,17 +DB 102,15,58,68,220,0 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + pxor xmm0,xmm6 + pxor xmm1,xmm7 + + movdqa xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,5 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm4,xmm0 + pslldq xmm0,8 + psrldq xmm4,8 + pxor xmm0,xmm3 + pxor xmm1,xmm4 + + + movdqa xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm4 + pxor xmm4,xmm1 + psrlq xmm0,1 + pxor xmm0,xmm4 + test r9,r9 + jnz $L$done + +$L$odd_tail:: + movdqu xmm3,XMMWORD PTR[r8] +DB 102,15,56,0,221 + pxor xmm0,xmm3 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pshufd xmm4,xmm2,78 + pxor xmm3,xmm0 + pxor xmm4,xmm2 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,220,0 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + + movdqa xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,5 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm4,xmm0 + pslldq xmm0,8 + psrldq xmm4,8 + pxor xmm0,xmm3 + pxor xmm1,xmm4 + + + movdqa xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm4 + pxor xmm4,xmm1 + psrlq xmm0,1 + pxor xmm0,xmm4 +$L$done:: +DB 102,15,56,0,197 + movdqu XMMWORD PTR[rcx],xmm0 + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + movaps xmm8,XMMWORD PTR[32+rsp] + movaps xmm9,XMMWORD PTR[48+rsp] + movaps xmm10,XMMWORD PTR[64+rsp] + add rsp,058h + DB 0F3h,0C3h ;repret +$L$SEH_end_gcm_ghash_clmul:: +gcm_ghash_clmul ENDP +ALIGN 64 +$L$bswap_mask:: +DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +$L$0x1c2_polynomial:: +DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0c2h +ALIGN 64 + +$L$rem_4bit:: + DD 0,0,0,471859200,0,943718400,0,610271232 + DD 0,1887436800,0,1822425088,0,1220542464,0,1423966208 + DD 0,3774873600,0,4246732800,0,3644850176,0,3311403008 + DD 0,2441084928,0,2376073216,0,2847932416,0,3051356160 + +$L$rem_8bit:: + DW 00000h,001C2h,00384h,00246h,00708h,006CAh,0048Ch,0054Eh + DW 00E10h,00FD2h,00D94h,00C56h,00918h,008DAh,00A9Ch,00B5Eh + DW 01C20h,01DE2h,01FA4h,01E66h,01B28h,01AEAh,018ACh,0196Eh + DW 01230h,013F2h,011B4h,01076h,01538h,014FAh,016BCh,0177Eh + DW 03840h,03982h,03BC4h,03A06h,03F48h,03E8Ah,03CCCh,03D0Eh + DW 03650h,03792h,035D4h,03416h,03158h,0309Ah,032DCh,0331Eh + DW 02460h,025A2h,027E4h,02626h,02368h,022AAh,020ECh,0212Eh + DW 02A70h,02BB2h,029F4h,02836h,02D78h,02CBAh,02EFCh,02F3Eh + DW 07080h,07142h,07304h,072C6h,07788h,0764Ah,0740Ch,075CEh + DW 07E90h,07F52h,07D14h,07CD6h,07998h,0785Ah,07A1Ch,07BDEh + DW 06CA0h,06D62h,06F24h,06EE6h,06BA8h,06A6Ah,0682Ch,069EEh + DW 062B0h,06372h,06134h,060F6h,065B8h,0647Ah,0663Ch,067FEh + DW 048C0h,04902h,04B44h,04A86h,04FC8h,04E0Ah,04C4Ch,04D8Eh + DW 046D0h,04712h,04554h,04496h,041D8h,0401Ah,0425Ch,0439Eh + DW 054E0h,05522h,05764h,056A6h,053E8h,0522Ah,0506Ch,051AEh + DW 05AF0h,05B32h,05974h,058B6h,05DF8h,05C3Ah,05E7Ch,05FBEh + DW 0E100h,0E0C2h,0E284h,0E346h,0E608h,0E7CAh,0E58Ch,0E44Eh + DW 0EF10h,0EED2h,0EC94h,0ED56h,0E818h,0E9DAh,0EB9Ch,0EA5Eh + DW 0FD20h,0FCE2h,0FEA4h,0FF66h,0FA28h,0FBEAh,0F9ACh,0F86Eh + DW 0F330h,0F2F2h,0F0B4h,0F176h,0F438h,0F5FAh,0F7BCh,0F67Eh + DW 0D940h,0D882h,0DAC4h,0DB06h,0DE48h,0DF8Ah,0DDCCh,0DC0Eh + DW 0D750h,0D692h,0D4D4h,0D516h,0D058h,0D19Ah,0D3DCh,0D21Eh + DW 0C560h,0C4A2h,0C6E4h,0C726h,0C268h,0C3AAh,0C1ECh,0C02Eh + DW 0CB70h,0CAB2h,0C8F4h,0C936h,0CC78h,0CDBAh,0CFFCh,0CE3Eh + DW 09180h,09042h,09204h,093C6h,09688h,0974Ah,0950Ch,094CEh + DW 09F90h,09E52h,09C14h,09DD6h,09898h,0995Ah,09B1Ch,09ADEh + DW 08DA0h,08C62h,08E24h,08FE6h,08AA8h,08B6Ah,0892Ch,088EEh + DW 083B0h,08272h,08034h,081F6h,084B8h,0857Ah,0873Ch,086FEh + DW 0A9C0h,0A802h,0AA44h,0AB86h,0AEC8h,0AF0Ah,0AD4Ch,0AC8Eh + DW 0A7D0h,0A612h,0A454h,0A596h,0A0D8h,0A11Ah,0A35Ch,0A29Eh + DW 0B5E0h,0B422h,0B664h,0B7A6h,0B2E8h,0B32Ah,0B16Ch,0B0AEh + DW 0BBF0h,0BA32h,0B874h,0B9B6h,0BCF8h,0BD3Ah,0BF7Ch,0BEBEh + +DB 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52 +DB 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 +DB 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 +DB 114,103,62,0 +ALIGN 64 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_prologue + + lea rax,QWORD PTR[24+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_gcm_gmult_4bit + DD imagerel $L$SEH_end_gcm_gmult_4bit + DD imagerel $L$SEH_info_gcm_gmult_4bit + + DD imagerel $L$SEH_begin_gcm_ghash_4bit + DD imagerel $L$SEH_end_gcm_ghash_4bit + DD imagerel $L$SEH_info_gcm_ghash_4bit + + DD imagerel $L$SEH_begin_gcm_ghash_clmul + DD imagerel $L$SEH_end_gcm_ghash_clmul + DD imagerel $L$SEH_info_gcm_ghash_clmul + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_gcm_gmult_4bit:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$gmult_prologue,imagerel $L$gmult_epilogue +$L$SEH_info_gcm_ghash_4bit:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$ghash_prologue,imagerel $L$ghash_epilogue +$L$SEH_info_gcm_ghash_clmul:: +DB 001h,01fh,00bh,000h +DB 01fh,0a8h,004h,000h +DB 019h,098h,003h,000h +DB 013h,088h,002h,000h +DB 00dh,078h,001h,000h +DB 008h,068h,000h,000h +DB 004h,0a2h,000h,000h + +.xdata ENDS +END + diff --git a/ext/libressl/crypto/modes/ghash-mingw64-x86_64.S b/ext/libressl/crypto/modes/ghash-mingw64-x86_64.S new file mode 100644 index 0000000..cd0823b --- /dev/null +++ b/ext/libressl/crypto/modes/ghash-mingw64-x86_64.S @@ -0,0 +1,1175 @@ +#include "x86_arch.h" +.text + +.globl gcm_gmult_4bit +.def gcm_gmult_4bit; .scl 2; .type 32; .endef +.p2align 4 +gcm_gmult_4bit: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_gcm_gmult_4bit: + movq %rcx,%rdi + movq %rdx,%rsi + + pushq %rbx + pushq %rbp + pushq %r12 +.Lgmult_prologue: + + movzbq 15(%rdi),%r8 + leaq .Lrem_4bit(%rip),%r11 + xorq %rax,%rax + xorq %rbx,%rbx + movb %r8b,%al + movb %r8b,%bl + shlb $4,%al + movq $14,%rcx + movq 8(%rsi,%rax,1),%r8 + movq (%rsi,%rax,1),%r9 + andb $240,%bl + movq %r8,%rdx + jmp .Loop1 + +.p2align 4 +.Loop1: + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + movb (%rdi,%rcx,1),%al + shrq $4,%r9 + xorq 8(%rsi,%rbx,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rbx,1),%r9 + movb %al,%bl + xorq (%r11,%rdx,8),%r9 + movq %r8,%rdx + shlb $4,%al + xorq %r10,%r8 + decq %rcx + js .Lbreak1 + + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + shrq $4,%r9 + xorq 8(%rsi,%rax,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rax,1),%r9 + andb $240,%bl + xorq (%r11,%rdx,8),%r9 + movq %r8,%rdx + xorq %r10,%r8 + jmp .Loop1 + +.p2align 4 +.Lbreak1: + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + shrq $4,%r9 + xorq 8(%rsi,%rax,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rax,1),%r9 + andb $240,%bl + xorq (%r11,%rdx,8),%r9 + movq %r8,%rdx + xorq %r10,%r8 + + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + shrq $4,%r9 + xorq 8(%rsi,%rbx,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rbx,1),%r9 + xorq %r10,%r8 + xorq (%r11,%rdx,8),%r9 + + bswapq %r8 + bswapq %r9 + movq %r8,8(%rdi) + movq %r9,(%rdi) + + movq 16(%rsp),%rbx + leaq 24(%rsp),%rsp +.Lgmult_epilogue: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + retq +.LSEH_end_gcm_gmult_4bit: +.globl gcm_ghash_4bit +.def gcm_ghash_4bit; .scl 2; .type 32; .endef +.p2align 4 +gcm_ghash_4bit: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_gcm_ghash_4bit: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $280,%rsp +.Lghash_prologue: + movq %rdx,%r14 + movq %rcx,%r15 + subq $-128,%rsi + leaq 16+128(%rsp),%rbp + xorl %edx,%edx + movq 0+0-128(%rsi),%r8 + movq 0+8-128(%rsi),%rax + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq 16+0-128(%rsi),%r9 + shlb $4,%dl + movq 16+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,0(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,0(%rbp) + movq 32+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,0-128(%rbp) + movq 32+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,1(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,8(%rbp) + movq 48+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,8-128(%rbp) + movq 48+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,2(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,16(%rbp) + movq 64+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,16-128(%rbp) + movq 64+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,3(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,24(%rbp) + movq 80+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,24-128(%rbp) + movq 80+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,4(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,32(%rbp) + movq 96+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,32-128(%rbp) + movq 96+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,5(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,40(%rbp) + movq 112+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,40-128(%rbp) + movq 112+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,6(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,48(%rbp) + movq 128+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,48-128(%rbp) + movq 128+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,7(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,56(%rbp) + movq 144+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,56-128(%rbp) + movq 144+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,8(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,64(%rbp) + movq 160+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,64-128(%rbp) + movq 160+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,9(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,72(%rbp) + movq 176+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,72-128(%rbp) + movq 176+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,10(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,80(%rbp) + movq 192+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,80-128(%rbp) + movq 192+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,11(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,88(%rbp) + movq 208+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,88-128(%rbp) + movq 208+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,12(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,96(%rbp) + movq 224+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,96-128(%rbp) + movq 224+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,13(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,104(%rbp) + movq 240+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,104-128(%rbp) + movq 240+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,14(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,112(%rbp) + shlb $4,%dl + movq %rax,112-128(%rbp) + shlq $60,%r10 + movb %dl,15(%rsp) + orq %r10,%rbx + movq %r9,120(%rbp) + movq %rbx,120-128(%rbp) + addq $-128,%rsi + movq 8(%rdi),%r8 + movq 0(%rdi),%r9 + addq %r14,%r15 + leaq .Lrem_8bit(%rip),%r11 + jmp .Louter_loop +.p2align 4 +.Louter_loop: + xorq (%r14),%r9 + movq 8(%r14),%rdx + leaq 16(%r14),%r14 + xorq %r8,%rdx + movq %r9,(%rdi) + movq %rdx,8(%rdi) + shrq $32,%rdx + xorq %rax,%rax + roll $8,%edx + movb %dl,%al + movzbl %dl,%ebx + shlb $4,%al + shrl $4,%ebx + roll $8,%edx + movq 8(%rsi,%rax,1),%r8 + movq (%rsi,%rax,1),%r9 + movb %dl,%al + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + xorq %r8,%r12 + movq %r9,%r10 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl 8(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl 4(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl 0(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + andl $240,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl -4(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + movzwq (%r11,%r12,2),%r12 + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + shlq $48,%r12 + xorq %r10,%r8 + xorq %r12,%r9 + movzbq %r8b,%r13 + shrq $4,%r8 + movq %r9,%r10 + shlb $4,%r13b + shrq $4,%r9 + xorq 8(%rsi,%rcx,1),%r8 + movzwq (%r11,%r13,2),%r13 + shlq $60,%r10 + xorq (%rsi,%rcx,1),%r9 + xorq %r10,%r8 + shlq $48,%r13 + bswapq %r8 + xorq %r13,%r9 + bswapq %r9 + cmpq %r15,%r14 + jb .Louter_loop + movq %r8,8(%rdi) + movq %r9,(%rdi) + + leaq 280(%rsp),%rsi + movq 0(%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lghash_epilogue: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + retq +.LSEH_end_gcm_ghash_4bit: +.globl gcm_init_clmul +.def gcm_init_clmul; .scl 2; .type 32; .endef +.p2align 4 +gcm_init_clmul: + movdqu (%rdx),%xmm2 + pshufd $78,%xmm2,%xmm2 + + + pshufd $255,%xmm2,%xmm4 + movdqa %xmm2,%xmm3 + psllq $1,%xmm2 + pxor %xmm5,%xmm5 + psrlq $63,%xmm3 + pcmpgtd %xmm4,%xmm5 + pslldq $8,%xmm3 + por %xmm3,%xmm2 + + + pand .L0x1c2_polynomial(%rip),%xmm5 + pxor %xmm5,%xmm2 + + + movdqa %xmm2,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm4 + pslldq $8,%xmm0 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + movdqu %xmm2,(%rcx) + movdqu %xmm0,16(%rcx) + retq + +.globl gcm_gmult_clmul +.def gcm_gmult_clmul; .scl 2; .type 32; .endef +.p2align 4 +gcm_gmult_clmul: + movdqu (%rcx),%xmm0 + movdqa .Lbswap_mask(%rip),%xmm5 + movdqu (%rdx),%xmm2 +.byte 102,15,56,0,197 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm4 + pslldq $8,%xmm0 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,197 + movdqu %xmm0,(%rcx) + retq + +.globl gcm_ghash_clmul +.def gcm_ghash_clmul; .scl 2; .type 32; .endef +.p2align 4 +gcm_ghash_clmul: +.LSEH_begin_gcm_ghash_clmul: + +.byte 0x48,0x83,0xec,0x58 +.byte 0x0f,0x29,0x34,0x24 +.byte 0x0f,0x29,0x7c,0x24,0x10 +.byte 0x44,0x0f,0x29,0x44,0x24,0x20 +.byte 0x44,0x0f,0x29,0x4c,0x24,0x30 +.byte 0x44,0x0f,0x29,0x54,0x24,0x40 + movdqa .Lbswap_mask(%rip),%xmm5 + + movdqu (%rcx),%xmm0 + movdqu (%rdx),%xmm2 +.byte 102,15,56,0,197 + + subq $16,%r9 + jz .Lodd_tail + + movdqu 16(%rdx),%xmm8 + + + + + + movdqu (%r8),%xmm3 + movdqu 16(%r8),%xmm6 +.byte 102,15,56,0,221 +.byte 102,15,56,0,245 + pxor %xmm3,%xmm0 + movdqa %xmm6,%xmm7 + pshufd $78,%xmm6,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm6,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,242,0 +.byte 102,15,58,68,250,17 +.byte 102,15,58,68,220,0 + pxor %xmm6,%xmm3 + pxor %xmm7,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm7 + pxor %xmm4,%xmm6 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm8,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm8,%xmm4 + + leaq 32(%r8),%r8 + subq $32,%r9 + jbe .Leven_tail + +.Lmod_loop: +.byte 102,65,15,58,68,192,0 +.byte 102,65,15,58,68,200,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqu (%r8),%xmm3 + pxor %xmm6,%xmm0 + pxor %xmm7,%xmm1 + + movdqu 16(%r8),%xmm6 +.byte 102,15,56,0,221 +.byte 102,15,56,0,245 + + movdqa %xmm6,%xmm7 + pshufd $78,%xmm6,%xmm9 + pshufd $78,%xmm2,%xmm10 + pxor %xmm6,%xmm9 + pxor %xmm2,%xmm10 + pxor %xmm3,%xmm1 + + movdqa %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 +.byte 102,15,58,68,242,0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm4 + pslldq $8,%xmm0 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 + +.byte 102,15,58,68,250,17 + movdqa %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + +.byte 102,69,15,58,68,202,0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm8,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm8,%xmm4 + + pxor %xmm6,%xmm9 + pxor %xmm7,%xmm9 + movdqa %xmm9,%xmm10 + psrldq $8,%xmm9 + pslldq $8,%xmm10 + pxor %xmm9,%xmm7 + pxor %xmm10,%xmm6 + + leaq 32(%r8),%r8 + subq $32,%r9 + ja .Lmod_loop + +.Leven_tail: +.byte 102,65,15,58,68,192,0 +.byte 102,65,15,58,68,200,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm6,%xmm0 + pxor %xmm7,%xmm1 + + movdqa %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm4 + pslldq $8,%xmm0 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + testq %r9,%r9 + jnz .Ldone + +.Lodd_tail: + movdqu (%r8),%xmm3 +.byte 102,15,56,0,221 + pxor %xmm3,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $5,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm4 + pslldq $8,%xmm0 + psrldq $8,%xmm4 + pxor %xmm3,%xmm0 + pxor %xmm4,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm0 +.Ldone: +.byte 102,15,56,0,197 + movdqu %xmm0,(%rcx) + movaps (%rsp),%xmm6 + movaps 16(%rsp),%xmm7 + movaps 32(%rsp),%xmm8 + movaps 48(%rsp),%xmm9 + movaps 64(%rsp),%xmm10 + addq $88,%rsp + retq +.LSEH_end_gcm_ghash_clmul: + +.p2align 6 +.Lbswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.L0x1c2_polynomial: +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +.p2align 6 + +.Lrem_4bit: +.long 0,0,0,471859200,0,943718400,0,610271232 +.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 +.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 +.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 + +.Lrem_8bit: +.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E +.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E +.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E +.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E +.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E +.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E +.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E +.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E +.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE +.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE +.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE +.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE +.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E +.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E +.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE +.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE +.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E +.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E +.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E +.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E +.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E +.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E +.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E +.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E +.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE +.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE +.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE +.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE +.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E +.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E +.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE +.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE + +.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 + +.def se_handler; .scl 3; .type 32; .endef +.p2align 4 +se_handler: + pushq %rsi + pushq %rdi + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushfq + subq $64,%rsp + + movq 120(%r8),%rax + movq 248(%r8),%rbx + + movq 8(%r9),%rsi + movq 56(%r9),%r11 + + movl 0(%r11),%r10d + leaq (%rsi,%r10,1),%r10 + cmpq %r10,%rbx + jb .Lin_prologue + + movq 152(%r8),%rax + + movl 4(%r11),%r10d + leaq (%rsi,%r10,1),%r10 + cmpq %r10,%rbx + jae .Lin_prologue + + leaq 24(%rax),%rax + + movq -8(%rax),%rbx + movq -16(%rax),%rbp + movq -24(%rax),%r12 + movq %rbx,144(%r8) + movq %rbp,160(%r8) + movq %r12,216(%r8) + +.Lin_prologue: + movq 8(%rax),%rdi + movq 16(%rax),%rsi + movq %rax,152(%r8) + movq %rsi,168(%r8) + movq %rdi,176(%r8) + + movq 40(%r9),%rdi + movq %r8,%rsi + movl $154,%ecx +.long 0xa548f3fc + + movq %r9,%rsi + xorq %rcx,%rcx + movq 8(%rsi),%rdx + movq 0(%rsi),%r8 + movq 16(%rsi),%r9 + movq 40(%rsi),%r10 + leaq 56(%rsi),%r11 + leaq 24(%rsi),%r12 + movq %r10,32(%rsp) + movq %r11,40(%rsp) + movq %r12,48(%rsp) + movq %rcx,56(%rsp) + call *__imp_RtlVirtualUnwind(%rip) + + movl $1,%eax + addq $64,%rsp + popfq + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + popq %rdi + popq %rsi + retq + + +.section .pdata +.p2align 2 +.rva .LSEH_begin_gcm_gmult_4bit +.rva .LSEH_end_gcm_gmult_4bit +.rva .LSEH_info_gcm_gmult_4bit + +.rva .LSEH_begin_gcm_ghash_4bit +.rva .LSEH_end_gcm_ghash_4bit +.rva .LSEH_info_gcm_ghash_4bit + +.rva .LSEH_begin_gcm_ghash_clmul +.rva .LSEH_end_gcm_ghash_clmul +.rva .LSEH_info_gcm_ghash_clmul + +.section .xdata +.p2align 3 +.LSEH_info_gcm_gmult_4bit: +.byte 9,0,0,0 +.rva se_handler +.rva .Lgmult_prologue,.Lgmult_epilogue +.LSEH_info_gcm_ghash_4bit: +.byte 9,0,0,0 +.rva se_handler +.rva .Lghash_prologue,.Lghash_epilogue +.LSEH_info_gcm_ghash_clmul: +.byte 0x01,0x1f,0x0b,0x00 +.byte 0x1f,0xa8,0x04,0x00 +.byte 0x19,0x98,0x03,0x00 +.byte 0x13,0x88,0x02,0x00 +.byte 0x0d,0x78,0x01,0x00 +.byte 0x08,0x68,0x00,0x00 +.byte 0x04,0xa2,0x00,0x00 diff --git a/ext/libressl/crypto/modes/modes_lcl.h b/ext/libressl/crypto/modes/modes_lcl.h new file mode 100644 index 0000000..bfea189 --- /dev/null +++ b/ext/libressl/crypto/modes/modes_lcl.h @@ -0,0 +1,113 @@ +/* $OpenBSD: modes_lcl.h,v 1.10 2016/12/21 15:49:29 jsing Exp $ */ +/* ==================================================================== + * Copyright (c) 2010 The OpenSSL Project. All rights reserved. + * + * Redistribution and use is governed by OpenSSL license. + * ==================================================================== + */ + +#include + +#include + +#include + +__BEGIN_HIDDEN_DECLS + +#if defined(_LP64) +typedef long i64; +typedef unsigned long u64; +#define U64(C) C##UL +#else +typedef long long i64; +typedef unsigned long long u64; +#define U64(C) C##ULL +#endif + +typedef unsigned int u32; +typedef unsigned char u8; + +#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) +#if defined(__GNUC__) && __GNUC__>=2 +# if defined(__x86_64) || defined(__x86_64__) +# define BSWAP8(x) ({ u64 ret=(x); \ + asm ("bswapq %0" \ + : "+r"(ret)); ret; }) +# define BSWAP4(x) ({ u32 ret=(x); \ + asm ("bswapl %0" \ + : "+r"(ret)); ret; }) +# elif (defined(__i386) || defined(__i386__)) +# define BSWAP8(x) ({ u32 lo=(u64)(x)>>32,hi=(x); \ + asm ("bswapl %0; bswapl %1" \ + : "+r"(hi),"+r"(lo)); \ + (u64)hi<<32|lo; }) +# define BSWAP4(x) ({ u32 ret=(x); \ + asm ("bswapl %0" \ + : "+r"(ret)); ret; }) +# elif (defined(__arm__) || defined(__arm)) && !defined(__STRICT_ALIGNMENT) +# if (__ARM_ARCH >= 6) +# define BSWAP8(x) ({ u32 lo=(u64)(x)>>32,hi=(x); \ + asm ("rev %0,%0; rev %1,%1" \ + : "+r"(hi),"+r"(lo)); \ + (u64)hi<<32|lo; }) +# define BSWAP4(x) ({ u32 ret; \ + asm ("rev %0,%1" \ + : "=r"(ret) : "r"((u32)(x))); \ + ret; }) +# endif +# endif +#endif +#endif + +#if defined(BSWAP4) && !defined(__STRICT_ALIGNMENT) +#define GETU32(p) BSWAP4(*(const u32 *)(p)) +#define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v) +#else +#define GETU32(p) ((u32)(p)[0]<<24|(u32)(p)[1]<<16|(u32)(p)[2]<<8|(u32)(p)[3]) +#define PUTU32(p,v) ((p)[0]=(u8)((v)>>24),(p)[1]=(u8)((v)>>16),(p)[2]=(u8)((v)>>8),(p)[3]=(u8)(v)) +#endif + +/* GCM definitions */ + +typedef struct { u64 hi,lo; } u128; + +#ifdef TABLE_BITS +#undef TABLE_BITS +#endif +/* + * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should + * never be set to 8 [or 1]. For further information see gcm128.c. + */ +#define TABLE_BITS 4 + +struct gcm128_context { + /* Following 6 names follow names in GCM specification */ + union { u64 u[2]; u32 d[4]; u8 c[16]; size_t t[16/sizeof(size_t)]; } + Yi,EKi,EK0,len,Xi,H; + /* Relative position of Xi, H and pre-computed Htable is used + * in some assembler modules, i.e. don't change the order! */ +#if TABLE_BITS==8 + u128 Htable[256]; +#else + u128 Htable[16]; + void (*gmult)(u64 Xi[2],const u128 Htable[16]); + void (*ghash)(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); +#endif + unsigned int mres, ares; + block128_f block; + void *key; +}; + +struct xts128_context { + void *key1, *key2; + block128_f block1,block2; +}; + +struct ccm128_context { + union { u64 u[2]; u8 c[16]; } nonce, cmac; + u64 blocks; + block128_f block; + void *key; +}; + +__END_HIDDEN_DECLS diff --git a/ext/libressl/crypto/modes/ofb128.c b/ext/libressl/crypto/modes/ofb128.c new file mode 100644 index 0000000..c6ca67a --- /dev/null +++ b/ext/libressl/crypto/modes/ofb128.c @@ -0,0 +1,119 @@ +/* $OpenBSD: ofb128.c,v 1.4 2015/02/10 09:46:30 miod Exp $ */ +/* ==================================================================== + * Copyright (c) 2008 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + */ + +#include +#include "modes_lcl.h" +#include + +#ifndef MODES_DEBUG +# ifndef NDEBUG +# define NDEBUG +# endif +#endif + +/* The input and output encrypted as though 128bit ofb mode is being + * used. The extra state information to record how much of the + * 128bit block we have used is contained in *num; + */ +void CRYPTO_ofb128_encrypt(const unsigned char *in, unsigned char *out, + size_t len, const void *key, + unsigned char ivec[16], int *num, + block128_f block) +{ + unsigned int n; + size_t l=0; + + n = *num; + +#if !defined(OPENSSL_SMALL_FOOTPRINT) + if (16%sizeof(size_t) == 0) do { /* always true actually */ + while (n && len) { + *(out++) = *(in++) ^ ivec[n]; + --len; + n = (n+1) % 16; + } +#ifdef __STRICT_ALIGNMENT + if (((size_t)in|(size_t)out|(size_t)ivec)%sizeof(size_t) != 0) + break; +#endif + while (len>=16) { + (*block)(ivec, ivec, key); + for (; n<16; n+=sizeof(size_t)) + *(size_t*)(out+n) = + *(size_t*)(in+n) ^ *(size_t*)(ivec+n); + len -= 16; + out += 16; + in += 16; + n = 0; + } + if (len) { + (*block)(ivec, ivec, key); + while (len--) { + out[n] = in[n] ^ ivec[n]; + ++n; + } + } + *num = n; + return; + } while(0); + /* the rest would be commonly eliminated by x86* compiler */ +#endif + while (l +#include +#include "modes_lcl.h" +#include + +#ifndef MODES_DEBUG +# ifndef NDEBUG +# define NDEBUG +# endif +#endif + +int CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16], + const unsigned char *inp, unsigned char *out, + size_t len, int enc) +{ + union { u64 u[2]; u32 d[4]; u8 c[16]; } tweak, scratch; + unsigned int i; + + if (len<16) return -1; + + memcpy(tweak.c, iv, 16); + + (*ctx->block2)(tweak.c,tweak.c,ctx->key2); + + if (!enc && (len%16)) len-=16; + + while (len>=16) { +#ifdef __STRICT_ALIGNMENT + memcpy(scratch.c,inp,16); + scratch.u[0] ^= tweak.u[0]; + scratch.u[1] ^= tweak.u[1]; +#else + scratch.u[0] = ((u64*)inp)[0]^tweak.u[0]; + scratch.u[1] = ((u64*)inp)[1]^tweak.u[1]; +#endif + (*ctx->block1)(scratch.c,scratch.c,ctx->key1); +#ifdef __STRICT_ALIGNMENT + scratch.u[0] ^= tweak.u[0]; + scratch.u[1] ^= tweak.u[1]; + memcpy(out,scratch.c,16); +#else + ((u64*)out)[0] = scratch.u[0]^=tweak.u[0]; + ((u64*)out)[1] = scratch.u[1]^=tweak.u[1]; +#endif + inp += 16; + out += 16; + len -= 16; + + if (len==0) return 0; + +#if BYTE_ORDER == LITTLE_ENDIAN + unsigned int carry,res; + + res = 0x87&(((int)tweak.d[3])>>31); + carry = (unsigned int)(tweak.u[0]>>63); + tweak.u[0] = (tweak.u[0]<<1)^res; + tweak.u[1] = (tweak.u[1]<<1)|carry; +#else /* BIG_ENDIAN */ + size_t c; + + for (c=0,i=0;i<16;++i) { + /*+ substitutes for |, because c is 1 bit */ + c += ((size_t)tweak.c[i])<<1; + tweak.c[i] = (u8)c; + c = c>>8; + } + tweak.c[0] ^= (u8)(0x87&(0-c)); +#endif + } + if (enc) { + for (i=0;iblock1)(scratch.c,scratch.c,ctx->key1); + scratch.u[0] ^= tweak.u[0]; + scratch.u[1] ^= tweak.u[1]; + memcpy(out-16,scratch.c,16); + } + else { + union { u64 u[2]; u8 c[16]; } tweak1; + +#if BYTE_ORDER == LITTLE_ENDIAN + unsigned int carry,res; + + res = 0x87&(((int)tweak.d[3])>>31); + carry = (unsigned int)(tweak.u[0]>>63); + tweak1.u[0] = (tweak.u[0]<<1)^res; + tweak1.u[1] = (tweak.u[1]<<1)|carry; +#else + size_t c; + + for (c=0,i=0;i<16;++i) { + /*+ substitutes for |, because c is 1 bit */ + c += ((size_t)tweak.c[i])<<1; + tweak1.c[i] = (u8)c; + c = c>>8; + } + tweak1.c[0] ^= (u8)(0x87&(0-c)); +#endif +#ifdef __STRICT_ALIGNMENT + memcpy(scratch.c,inp,16); + scratch.u[0] ^= tweak1.u[0]; + scratch.u[1] ^= tweak1.u[1]; +#else + scratch.u[0] = ((u64*)inp)[0]^tweak1.u[0]; + scratch.u[1] = ((u64*)inp)[1]^tweak1.u[1]; +#endif + (*ctx->block1)(scratch.c,scratch.c,ctx->key1); + scratch.u[0] ^= tweak1.u[0]; + scratch.u[1] ^= tweak1.u[1]; + + for (i=0;iblock1)(scratch.c,scratch.c,ctx->key1); +#ifdef __STRICT_ALIGNMENT + scratch.u[0] ^= tweak.u[0]; + scratch.u[1] ^= tweak.u[1]; + memcpy (out,scratch.c,16); +#else + ((u64*)out)[0] = scratch.u[0]^tweak.u[0]; + ((u64*)out)[1] = scratch.u[1]^tweak.u[1]; +#endif + } + + return 0; +} diff --git a/ext/libressl/crypto/poly1305/Makefile b/ext/libressl/crypto/poly1305/Makefile new file mode 100644 index 0000000..94ceaf6 --- /dev/null +++ b/ext/libressl/crypto/poly1305/Makefile @@ -0,0 +1,13 @@ +include ../../ssl_common.mk + +obj = poly1305.o + + +all: $(obj) +dep: all + +%.o: %.c + $(CC) $(CFLAGS) -c $< + +clean: + rm -f *.o *.a diff --git a/ext/libressl/crypto/poly1305/poly1305-donna.c b/ext/libressl/crypto/poly1305/poly1305-donna.c new file mode 100644 index 0000000..773ea4e --- /dev/null +++ b/ext/libressl/crypto/poly1305/poly1305-donna.c @@ -0,0 +1,321 @@ +/* $OpenBSD: poly1305-donna.c,v 1.3 2014/06/12 15:49:30 deraadt Exp $ */ +/* + * Public Domain poly1305 from Andrew Moon + * Based on poly1305-donna.c, poly1305-donna-32.h and poly1305-donna.h from: + * https://github.com/floodyberry/poly1305-donna + */ + +#include + +static inline void poly1305_init(poly1305_context *ctx, + const unsigned char key[32]); +static inline void poly1305_update(poly1305_context *ctx, + const unsigned char *m, size_t bytes); +static inline void poly1305_finish(poly1305_context *ctx, + unsigned char mac[16]); + +/* + * poly1305 implementation using 32 bit * 32 bit = 64 bit multiplication + * and 64 bit addition. + */ + +#define poly1305_block_size 16 + +/* 17 + sizeof(size_t) + 14*sizeof(unsigned long) */ +typedef struct poly1305_state_internal_t { + unsigned long r[5]; + unsigned long h[5]; + unsigned long pad[4]; + size_t leftover; + unsigned char buffer[poly1305_block_size]; + unsigned char final; +} poly1305_state_internal_t; + +/* interpret four 8 bit unsigned integers as a 32 bit unsigned integer in little endian */ +static unsigned long +U8TO32(const unsigned char *p) +{ + return (((unsigned long)(p[0] & 0xff)) | + ((unsigned long)(p[1] & 0xff) << 8) | + ((unsigned long)(p[2] & 0xff) << 16) | + ((unsigned long)(p[3] & 0xff) << 24)); +} + +/* store a 32 bit unsigned integer as four 8 bit unsigned integers in little endian */ +static void +U32TO8(unsigned char *p, unsigned long v) +{ + p[0] = (v) & 0xff; + p[1] = (v >> 8) & 0xff; + p[2] = (v >> 16) & 0xff; + p[3] = (v >> 24) & 0xff; +} + +static inline void +poly1305_init(poly1305_context *ctx, const unsigned char key[32]) +{ + poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx; + + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ + st->r[0] = (U8TO32(&key[0])) & 0x3ffffff; + st->r[1] = (U8TO32(&key[3]) >> 2) & 0x3ffff03; + st->r[2] = (U8TO32(&key[6]) >> 4) & 0x3ffc0ff; + st->r[3] = (U8TO32(&key[9]) >> 6) & 0x3f03fff; + st->r[4] = (U8TO32(&key[12]) >> 8) & 0x00fffff; + + /* h = 0 */ + st->h[0] = 0; + st->h[1] = 0; + st->h[2] = 0; + st->h[3] = 0; + st->h[4] = 0; + + /* save pad for later */ + st->pad[0] = U8TO32(&key[16]); + st->pad[1] = U8TO32(&key[20]); + st->pad[2] = U8TO32(&key[24]); + st->pad[3] = U8TO32(&key[28]); + + st->leftover = 0; + st->final = 0; +} + +static void +poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m, size_t bytes) +{ + const unsigned long hibit = (st->final) ? 0 : (1 << 24); /* 1 << 128 */ + unsigned long r0, r1, r2, r3, r4; + unsigned long s1, s2, s3, s4; + unsigned long h0, h1, h2, h3, h4; + unsigned long long d0, d1, d2, d3, d4; + unsigned long c; + + r0 = st->r[0]; + r1 = st->r[1]; + r2 = st->r[2]; + r3 = st->r[3]; + r4 = st->r[4]; + + s1 = r1 * 5; + s2 = r2 * 5; + s3 = r3 * 5; + s4 = r4 * 5; + + h0 = st->h[0]; + h1 = st->h[1]; + h2 = st->h[2]; + h3 = st->h[3]; + h4 = st->h[4]; + + while (bytes >= poly1305_block_size) { + /* h += m[i] */ + h0 += (U8TO32(m + 0)) & 0x3ffffff; + h1 += (U8TO32(m + 3) >> 2) & 0x3ffffff; + h2 += (U8TO32(m + 6) >> 4) & 0x3ffffff; + h3 += (U8TO32(m + 9) >> 6) & 0x3ffffff; + h4 += (U8TO32(m + 12) >> 8) | hibit; + + /* h *= r */ + d0 = ((unsigned long long)h0 * r0) + + ((unsigned long long)h1 * s4) + + ((unsigned long long)h2 * s3) + + ((unsigned long long)h3 * s2) + + ((unsigned long long)h4 * s1); + d1 = ((unsigned long long)h0 * r1) + + ((unsigned long long)h1 * r0) + + ((unsigned long long)h2 * s4) + + ((unsigned long long)h3 * s3) + + ((unsigned long long)h4 * s2); + d2 = ((unsigned long long)h0 * r2) + + ((unsigned long long)h1 * r1) + + ((unsigned long long)h2 * r0) + + ((unsigned long long)h3 * s4) + + ((unsigned long long)h4 * s3); + d3 = ((unsigned long long)h0 * r3) + + ((unsigned long long)h1 * r2) + + ((unsigned long long)h2 * r1) + + ((unsigned long long)h3 * r0) + + ((unsigned long long)h4 * s4); + d4 = ((unsigned long long)h0 * r4) + + ((unsigned long long)h1 * r3) + + ((unsigned long long)h2 * r2) + + ((unsigned long long)h3 * r1) + + ((unsigned long long)h4 * r0); + + /* (partial) h %= p */ + c = (unsigned long)(d0 >> 26); + h0 = (unsigned long)d0 & 0x3ffffff; + d1 += c; + c = (unsigned long)(d1 >> 26); + h1 = (unsigned long)d1 & 0x3ffffff; + d2 += c; + c = (unsigned long)(d2 >> 26); + h2 = (unsigned long)d2 & 0x3ffffff; + d3 += c; + c = (unsigned long)(d3 >> 26); + h3 = (unsigned long)d3 & 0x3ffffff; + d4 += c; + c = (unsigned long)(d4 >> 26); + h4 = (unsigned long)d4 & 0x3ffffff; + h0 += c * 5; + c = (h0 >> 26); + h0 = h0 & 0x3ffffff; + h1 += c; + + m += poly1305_block_size; + bytes -= poly1305_block_size; + } + + st->h[0] = h0; + st->h[1] = h1; + st->h[2] = h2; + st->h[3] = h3; + st->h[4] = h4; +} + +static inline void +poly1305_update(poly1305_context *ctx, const unsigned char *m, size_t bytes) +{ + poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx; + size_t i; + + /* handle leftover */ + if (st->leftover) { + size_t want = (poly1305_block_size - st->leftover); + if (want > bytes) + want = bytes; + for (i = 0; i < want; i++) + st->buffer[st->leftover + i] = m[i]; + bytes -= want; + m += want; + st->leftover += want; + if (st->leftover < poly1305_block_size) + return; + poly1305_blocks(st, st->buffer, poly1305_block_size); + st->leftover = 0; + } + + /* process full blocks */ + if (bytes >= poly1305_block_size) { + size_t want = (bytes & ~(poly1305_block_size - 1)); + poly1305_blocks(st, m, want); + m += want; + bytes -= want; + } + + /* store leftover */ + if (bytes) { + for (i = 0; i < bytes; i++) + st->buffer[st->leftover + i] = m[i]; + st->leftover += bytes; + } +} + +static inline void +poly1305_finish(poly1305_context *ctx, unsigned char mac[16]) +{ + poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx; + unsigned long h0, h1, h2, h3, h4, c; + unsigned long g0, g1, g2, g3, g4; + unsigned long long f; + unsigned long mask; + + /* process the remaining block */ + if (st->leftover) { + size_t i = st->leftover; + st->buffer[i++] = 1; + for (; i < poly1305_block_size; i++) + st->buffer[i] = 0; + st->final = 1; + poly1305_blocks(st, st->buffer, poly1305_block_size); + } + + /* fully carry h */ + h0 = st->h[0]; + h1 = st->h[1]; + h2 = st->h[2]; + h3 = st->h[3]; + h4 = st->h[4]; + + c = h1 >> 26; + h1 = h1 & 0x3ffffff; + h2 += c; + c = h2 >> 26; + h2 = h2 & 0x3ffffff; + h3 += c; + c = h3 >> 26; + h3 = h3 & 0x3ffffff; + h4 += c; + c = h4 >> 26; + h4 = h4 & 0x3ffffff; + h0 += c * 5; + c = h0 >> 26; + h0 = h0 & 0x3ffffff; + h1 += c; + + /* compute h + -p */ + g0 = h0 + 5; + c = g0 >> 26; + g0 &= 0x3ffffff; + g1 = h1 + c; + c = g1 >> 26; + g1 &= 0x3ffffff; + g2 = h2 + c; + c = g2 >> 26; + g2 &= 0x3ffffff; + g3 = h3 + c; + c = g3 >> 26; + g3 &= 0x3ffffff; + g4 = h4 + c - (1 << 26); + + /* select h if h < p, or h + -p if h >= p */ + mask = (g4 >> ((sizeof(unsigned long) * 8) - 1)) - 1; + g0 &= mask; + g1 &= mask; + g2 &= mask; + g3 &= mask; + g4 &= mask; + mask = ~mask; + h0 = (h0 & mask) | g0; + h1 = (h1 & mask) | g1; + h2 = (h2 & mask) | g2; + h3 = (h3 & mask) | g3; + h4 = (h4 & mask) | g4; + + /* h = h % (2^128) */ + h0 = ((h0) | (h1 << 26)) & 0xffffffff; + h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff; + h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff; + h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff; + + /* mac = (h + pad) % (2^128) */ + f = (unsigned long long)h0 + st->pad[0]; + h0 = (unsigned long)f; + f = (unsigned long long)h1 + st->pad[1] + (f >> 32); + h1 = (unsigned long)f; + f = (unsigned long long)h2 + st->pad[2] + (f >> 32); + h2 = (unsigned long)f; + f = (unsigned long long)h3 + st->pad[3] + (f >> 32); + h3 = (unsigned long)f; + + U32TO8(mac + 0, h0); + U32TO8(mac + 4, h1); + U32TO8(mac + 8, h2); + U32TO8(mac + 12, h3); + + /* zero out the state */ + st->h[0] = 0; + st->h[1] = 0; + st->h[2] = 0; + st->h[3] = 0; + st->h[4] = 0; + st->r[0] = 0; + st->r[1] = 0; + st->r[2] = 0; + st->r[3] = 0; + st->r[4] = 0; + st->pad[0] = 0; + st->pad[1] = 0; + st->pad[2] = 0; + st->pad[3] = 0; +} diff --git a/ext/libressl/crypto/poly1305/poly1305.c b/ext/libressl/crypto/poly1305/poly1305.c new file mode 100644 index 0000000..75a34cc --- /dev/null +++ b/ext/libressl/crypto/poly1305/poly1305.c @@ -0,0 +1,38 @@ +/* $OpenBSD: poly1305.c,v 1.3 2014/06/12 15:49:30 deraadt Exp $ */ +/* + * Copyright (c) 2014 Joel Sing + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include "poly1305-donna.c" + +void +CRYPTO_poly1305_init(poly1305_context *ctx, const unsigned char key[32]) +{ + poly1305_init(ctx, key); +} + +void +CRYPTO_poly1305_update(poly1305_context *ctx, const unsigned char *in, + size_t len) +{ + poly1305_update(ctx, in, len); +} + +void +CRYPTO_poly1305_finish(poly1305_context *ctx, unsigned char mac[16]) +{ + poly1305_finish(ctx, mac); +} diff --git a/ext/libressl/crypto/sha/Makefile b/ext/libressl/crypto/sha/Makefile new file mode 100644 index 0000000..6eb0c20 --- /dev/null +++ b/ext/libressl/crypto/sha/Makefile @@ -0,0 +1,14 @@ +include ../../ssl_common.mk +CFLAGS += -D__BEGIN_HIDDEN_DECLS= -D__END_HIDDEN_DECLS= + +obj = sha1dgst.o sha1_one.o sha256.o sha512.o + + +all: $(obj) +dep: all + +%.o: %.c + $(CC) $(CFLAGS) -c $< + +clean: + rm -f *.o *.a diff --git a/ext/libressl/crypto/sha/sha1-elf-armv4.S b/ext/libressl/crypto/sha/sha1-elf-armv4.S new file mode 100644 index 0000000..5aeaf7c --- /dev/null +++ b/ext/libressl/crypto/sha/sha1-elf-armv4.S @@ -0,0 +1,455 @@ +#include "arm_arch.h" + +.text + +.global sha1_block_data_order +.type sha1_block_data_order,%function + +.align 2 +sha1_block_data_order: + stmdb sp!,{r4-r12,lr} + add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 + ldmia r0,{r3,r4,r5,r6,r7} +.Lloop: + ldr r8,.LK_00_19 + mov r14,sp + sub sp,sp,#15*4 + mov r5,r5,ror#30 + mov r6,r6,ror#30 + mov r7,r7,ror#30 @ [6] +.L_00_15: +#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT) + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r7,r8,r7,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r5,r6 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r7,r8,r7,ror#2 @ E+=K_00_19 + eor r10,r5,r6 @ F_xx_xx + add r7,r7,r3,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r4,r10,ror#2 + add r7,r7,r9 @ E+=X[i] + eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r7,r7,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT) + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r6,r8,r6,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r4,r5 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r6,r8,r6,ror#2 @ E+=K_00_19 + eor r10,r4,r5 @ F_xx_xx + add r6,r6,r7,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r3,r10,ror#2 + add r6,r6,r9 @ E+=X[i] + eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r6,r6,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT) + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r5,r8,r5,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r3,r4 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r5,r8,r5,ror#2 @ E+=K_00_19 + eor r10,r3,r4 @ F_xx_xx + add r5,r5,r6,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r7,r10,ror#2 + add r5,r5,r9 @ E+=X[i] + eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r5,r5,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT) + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r4,r8,r4,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r7,r3 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r4,r8,r4,ror#2 @ E+=K_00_19 + eor r10,r7,r3 @ F_xx_xx + add r4,r4,r5,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r6,r10,ror#2 + add r4,r4,r9 @ E+=X[i] + eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r4,r4,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT) + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r3,r8,r3,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r6,r7 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r3,r8,r3,ror#2 @ E+=K_00_19 + eor r10,r6,r7 @ F_xx_xx + add r3,r3,r4,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r5,r10,ror#2 + add r3,r3,r9 @ E+=X[i] + eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r3,r3,r10 @ E+=F_00_19(B,C,D) + teq r14,sp + bne .L_00_15 @ [((11+4)*5+2)*3] + sub sp,sp,#25*4 +#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT) + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r7,r8,r7,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r5,r6 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r7,r8,r7,ror#2 @ E+=K_00_19 + eor r10,r5,r6 @ F_xx_xx + add r7,r7,r3,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r4,r10,ror#2 + add r7,r7,r9 @ E+=X[i] + eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r7,r7,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r6,r8,r6,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r4,r5 @ F_xx_xx + mov r9,r9,ror#31 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r3,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r6,r6,r9 @ E+=X[i] + eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) + add r6,r6,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r5,r8,r5,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r3,r4 @ F_xx_xx + mov r9,r9,ror#31 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r7,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r5,r5,r9 @ E+=X[i] + eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) + add r5,r5,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r4,r8,r4,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r7,r3 @ F_xx_xx + mov r9,r9,ror#31 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r6,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r4,r4,r9 @ E+=X[i] + eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) + add r4,r4,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r3,r8,r3,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r6,r7 @ F_xx_xx + mov r9,r9,ror#31 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r5,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r3,r3,r9 @ E+=X[i] + eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) + add r3,r3,r10 @ E+=F_00_19(B,C,D) + + ldr r8,.LK_20_39 @ [+15+16*4] + cmn sp,#0 @ [+3], clear carry to denote 20_39 +.L_20_39_or_60_79: + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r7,r8,r7,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r5,r6 @ F_xx_xx + mov r9,r9,ror#31 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r4,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r7,r7,r9 @ E+=X[i] + add r7,r7,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r6,r8,r6,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r4,r5 @ F_xx_xx + mov r9,r9,ror#31 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r3,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r6,r6,r9 @ E+=X[i] + add r6,r6,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r5,r8,r5,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r3,r4 @ F_xx_xx + mov r9,r9,ror#31 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r7,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r5,r5,r9 @ E+=X[i] + add r5,r5,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r4,r8,r4,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r7,r3 @ F_xx_xx + mov r9,r9,ror#31 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r6,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r4,r4,r9 @ E+=X[i] + add r4,r4,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r3,r8,r3,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r6,r7 @ F_xx_xx + mov r9,r9,ror#31 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r5,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r3,r3,r9 @ E+=X[i] + add r3,r3,r10 @ E+=F_20_39(B,C,D) + teq r14,sp @ preserve carry + bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] + bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes + + ldr r8,.LK_40_59 + sub sp,sp,#20*4 @ [+2] +.L_40_59: + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r7,r8,r7,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r5,r6 @ F_xx_xx + mov r9,r9,ror#31 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r4,r10,ror#2 @ F_xx_xx + and r11,r5,r6 @ F_xx_xx + add r7,r7,r9 @ E+=X[i] + add r7,r7,r10 @ E+=F_40_59(B,C,D) + add r7,r7,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r6,r8,r6,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r4,r5 @ F_xx_xx + mov r9,r9,ror#31 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r3,r10,ror#2 @ F_xx_xx + and r11,r4,r5 @ F_xx_xx + add r6,r6,r9 @ E+=X[i] + add r6,r6,r10 @ E+=F_40_59(B,C,D) + add r6,r6,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r5,r8,r5,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r3,r4 @ F_xx_xx + mov r9,r9,ror#31 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r7,r10,ror#2 @ F_xx_xx + and r11,r3,r4 @ F_xx_xx + add r5,r5,r9 @ E+=X[i] + add r5,r5,r10 @ E+=F_40_59(B,C,D) + add r5,r5,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r4,r8,r4,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r7,r3 @ F_xx_xx + mov r9,r9,ror#31 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r6,r10,ror#2 @ F_xx_xx + and r11,r7,r3 @ F_xx_xx + add r4,r4,r9 @ E+=X[i] + add r4,r4,r10 @ E+=F_40_59(B,C,D) + add r4,r4,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r3,r8,r3,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r6,r7 @ F_xx_xx + mov r9,r9,ror#31 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r5,r10,ror#2 @ F_xx_xx + and r11,r6,r7 @ F_xx_xx + add r3,r3,r9 @ E+=X[i] + add r3,r3,r10 @ E+=F_40_59(B,C,D) + add r3,r3,r11,ror#2 + teq r14,sp + bne .L_40_59 @ [+((12+5)*5+2)*4] + + ldr r8,.LK_60_79 + sub sp,sp,#20*4 + cmp sp,#0 @ set carry to denote 60_79 + b .L_20_39_or_60_79 @ [+4], spare 300 bytes +.L_done: + add sp,sp,#80*4 @ "deallocate" stack frame + ldmia r0,{r8,r9,r10,r11,r12} + add r3,r8,r3 + add r4,r9,r4 + add r5,r10,r5,ror#2 + add r6,r11,r6,ror#2 + add r7,r12,r7,ror#2 + stmia r0,{r3,r4,r5,r6,r7} + teq r1,r2 + bne .Lloop @ [+18], total 1307 + +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else + ldmia sp!,{r4-r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.align 2 +.LK_00_19: .word 0x5a827999 +.LK_20_39: .word 0x6ed9eba1 +.LK_40_59: .word 0x8f1bbcdc +.LK_60_79: .word 0xca62c1d6 +.size sha1_block_data_order,.-sha1_block_data_order +.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by " +.align 2 +#if defined(HAVE_GNU_STACK) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/ext/libressl/crypto/sha/sha1-elf-x86_64.S b/ext/libressl/crypto/sha/sha1-elf-x86_64.S new file mode 100644 index 0000000..5a37019 --- /dev/null +++ b/ext/libressl/crypto/sha/sha1-elf-x86_64.S @@ -0,0 +1,2491 @@ +#include "x86_arch.h" +.text + +.hidden OPENSSL_ia32cap_P + +.globl sha1_block_data_order +.type sha1_block_data_order,@function +.align 16 +sha1_block_data_order: + movl OPENSSL_ia32cap_P+0(%rip),%r9d + movl OPENSSL_ia32cap_P+4(%rip),%r8d + testl $IA32CAP_MASK1_SSSE3,%r8d + jz .Lialu + jmp _ssse3_shortcut + +.align 16 +.Lialu: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + movq %rsp,%r11 + movq %rdi,%r8 + subq $72,%rsp + movq %rsi,%r9 + andq $-64,%rsp + movq %rdx,%r10 + movq %r11,64(%rsp) +.Lprologue: + + movl 0(%r8),%esi + movl 4(%r8),%edi + movl 8(%r8),%r11d + movl 12(%r8),%r12d + movl 16(%r8),%r13d + jmp .Lloop + +.align 16 +.Lloop: + movl 0(%r9),%edx + bswapl %edx + movl %edx,0(%rsp) + movl %r11d,%eax + movl 4(%r9),%ebp + movl %esi,%ecx + xorl %r12d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r13,1),%r13d + andl %edi,%eax + movl %ebp,4(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 8(%r9),%edx + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r12,1),%r12d + andl %esi,%eax + movl %edx,8(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 12(%r9),%ebp + movl %r12d,%ecx + xorl %edi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r11,1),%r11d + andl %r13d,%eax + movl %ebp,12(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 16(%r9),%edx + movl %r11d,%ecx + xorl %esi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rdi,1),%edi + andl %r12d,%eax + movl %edx,16(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 20(%r9),%ebp + movl %edi,%ecx + xorl %r13d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rsi,1),%esi + andl %r11d,%eax + movl %ebp,20(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl %r11d,%eax + movl 24(%r9),%edx + movl %esi,%ecx + xorl %r12d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r13,1),%r13d + andl %edi,%eax + movl %edx,24(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 28(%r9),%ebp + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r12,1),%r12d + andl %esi,%eax + movl %ebp,28(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 32(%r9),%edx + movl %r12d,%ecx + xorl %edi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r11,1),%r11d + andl %r13d,%eax + movl %edx,32(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 36(%r9),%ebp + movl %r11d,%ecx + xorl %esi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rdi,1),%edi + andl %r12d,%eax + movl %ebp,36(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 40(%r9),%edx + movl %edi,%ecx + xorl %r13d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rsi,1),%esi + andl %r11d,%eax + movl %edx,40(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl %r11d,%eax + movl 44(%r9),%ebp + movl %esi,%ecx + xorl %r12d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r13,1),%r13d + andl %edi,%eax + movl %ebp,44(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 48(%r9),%edx + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r12,1),%r12d + andl %esi,%eax + movl %edx,48(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 52(%r9),%ebp + movl %r12d,%ecx + xorl %edi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r11,1),%r11d + andl %r13d,%eax + movl %ebp,52(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 56(%r9),%edx + movl %r11d,%ecx + xorl %esi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rdi,1),%edi + andl %r12d,%eax + movl %edx,56(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 60(%r9),%ebp + movl %edi,%ecx + xorl %r13d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rsi,1),%esi + andl %r11d,%eax + movl %ebp,60(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl 0(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 8(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 32(%rsp),%edx + andl %edi,%eax + leal 1518500249(%rbp,%r13,1),%r13d + xorl 52(%rsp),%edx + xorl %r12d,%eax + roll $1,%edx + addl %ecx,%r13d + roll $30,%edi + movl %edx,0(%rsp) + addl %eax,%r13d + movl 4(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 12(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 36(%rsp),%ebp + andl %esi,%eax + leal 1518500249(%rdx,%r12,1),%r12d + xorl 56(%rsp),%ebp + xorl %r11d,%eax + roll $1,%ebp + addl %ecx,%r12d + roll $30,%esi + movl %ebp,4(%rsp) + addl %eax,%r12d + movl 8(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 16(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 40(%rsp),%edx + andl %r13d,%eax + leal 1518500249(%rbp,%r11,1),%r11d + xorl 60(%rsp),%edx + xorl %edi,%eax + roll $1,%edx + addl %ecx,%r11d + roll $30,%r13d + movl %edx,8(%rsp) + addl %eax,%r11d + movl 12(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 20(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 44(%rsp),%ebp + andl %r12d,%eax + leal 1518500249(%rdx,%rdi,1),%edi + xorl 0(%rsp),%ebp + xorl %esi,%eax + roll $1,%ebp + addl %ecx,%edi + roll $30,%r12d + movl %ebp,12(%rsp) + addl %eax,%edi + movl 16(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 24(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + xorl 48(%rsp),%edx + andl %r11d,%eax + leal 1518500249(%rbp,%rsi,1),%esi + xorl 4(%rsp),%edx + xorl %r13d,%eax + roll $1,%edx + addl %ecx,%esi + roll $30,%r11d + movl %edx,16(%rsp) + addl %eax,%esi + movl 20(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 28(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r13,1),%r13d + xorl 52(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 8(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 32(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r12,1),%r12d + xorl 56(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 12(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 36(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r11,1),%r11d + xorl 60(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 16(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 40(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rdi,1),%edi + xorl 0(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 20(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 44(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rsi,1),%esi + xorl 4(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 24(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,36(%rsp) + movl 40(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 48(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r13,1),%r13d + xorl 8(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 28(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,40(%rsp) + movl 44(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 52(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r12,1),%r12d + xorl 12(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 32(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,44(%rsp) + movl 48(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 56(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r11,1),%r11d + xorl 16(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 36(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,48(%rsp) + movl 52(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 60(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rdi,1),%edi + xorl 20(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 40(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,52(%rsp) + movl 56(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 0(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rsi,1),%esi + xorl 24(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 44(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,56(%rsp) + movl 60(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r13,1),%r13d + xorl 28(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 48(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,60(%rsp) + movl 0(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r12,1),%r12d + xorl 32(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 52(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,0(%rsp) + movl 4(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r11,1),%r11d + xorl 36(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 56(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,4(%rsp) + movl 8(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 16(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rdi,1),%edi + xorl 40(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 60(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,8(%rsp) + movl 12(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 20(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rsi,1),%esi + xorl 44(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 0(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,12(%rsp) + movl 16(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r13,1),%r13d + xorl 48(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 4(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,16(%rsp) + movl 20(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r12,1),%r12d + xorl 52(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 8(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r11,1),%r11d + xorl 56(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 12(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rdi,1),%edi + xorl 60(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 16(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 40(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rsi,1),%esi + xorl 0(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 20(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %r11d,%eax + movl %r11d,%ebx + xorl 44(%rsp),%ebp + andl %r12d,%eax + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %r12d,%ebx + leal -1894007588(%rdx,%r13,1),%r13d + roll $5,%ecx + xorl 24(%rsp),%ebp + addl %eax,%r13d + andl %edi,%ebx + roll $1,%ebp + addl %ebx,%r13d + roll $30,%edi + movl %ebp,36(%rsp) + addl %ecx,%r13d + movl 40(%rsp),%edx + movl %edi,%eax + movl %edi,%ebx + xorl 48(%rsp),%edx + andl %r11d,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + xorl %r11d,%ebx + leal -1894007588(%rbp,%r12,1),%r12d + roll $5,%ecx + xorl 28(%rsp),%edx + addl %eax,%r12d + andl %esi,%ebx + roll $1,%edx + addl %ebx,%r12d + roll $30,%esi + movl %edx,40(%rsp) + addl %ecx,%r12d + movl 44(%rsp),%ebp + movl %esi,%eax + movl %esi,%ebx + xorl 52(%rsp),%ebp + andl %edi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + xorl %edi,%ebx + leal -1894007588(%rdx,%r11,1),%r11d + roll $5,%ecx + xorl 32(%rsp),%ebp + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%ebp + addl %ebx,%r11d + roll $30,%r13d + movl %ebp,44(%rsp) + addl %ecx,%r11d + movl 48(%rsp),%edx + movl %r13d,%eax + movl %r13d,%ebx + xorl 56(%rsp),%edx + andl %esi,%eax + movl %r11d,%ecx + xorl 16(%rsp),%edx + xorl %esi,%ebx + leal -1894007588(%rbp,%rdi,1),%edi + roll $5,%ecx + xorl 36(%rsp),%edx + addl %eax,%edi + andl %r12d,%ebx + roll $1,%edx + addl %ebx,%edi + roll $30,%r12d + movl %edx,48(%rsp) + addl %ecx,%edi + movl 52(%rsp),%ebp + movl %r12d,%eax + movl %r12d,%ebx + xorl 60(%rsp),%ebp + andl %r13d,%eax + movl %edi,%ecx + xorl 20(%rsp),%ebp + xorl %r13d,%ebx + leal -1894007588(%rdx,%rsi,1),%esi + roll $5,%ecx + xorl 40(%rsp),%ebp + addl %eax,%esi + andl %r11d,%ebx + roll $1,%ebp + addl %ebx,%esi + roll $30,%r11d + movl %ebp,52(%rsp) + addl %ecx,%esi + movl 56(%rsp),%edx + movl %r11d,%eax + movl %r11d,%ebx + xorl 0(%rsp),%edx + andl %r12d,%eax + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %r12d,%ebx + leal -1894007588(%rbp,%r13,1),%r13d + roll $5,%ecx + xorl 44(%rsp),%edx + addl %eax,%r13d + andl %edi,%ebx + roll $1,%edx + addl %ebx,%r13d + roll $30,%edi + movl %edx,56(%rsp) + addl %ecx,%r13d + movl 60(%rsp),%ebp + movl %edi,%eax + movl %edi,%ebx + xorl 4(%rsp),%ebp + andl %r11d,%eax + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %r11d,%ebx + leal -1894007588(%rdx,%r12,1),%r12d + roll $5,%ecx + xorl 48(%rsp),%ebp + addl %eax,%r12d + andl %esi,%ebx + roll $1,%ebp + addl %ebx,%r12d + roll $30,%esi + movl %ebp,60(%rsp) + addl %ecx,%r12d + movl 0(%rsp),%edx + movl %esi,%eax + movl %esi,%ebx + xorl 8(%rsp),%edx + andl %edi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + xorl %edi,%ebx + leal -1894007588(%rbp,%r11,1),%r11d + roll $5,%ecx + xorl 52(%rsp),%edx + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%edx + addl %ebx,%r11d + roll $30,%r13d + movl %edx,0(%rsp) + addl %ecx,%r11d + movl 4(%rsp),%ebp + movl %r13d,%eax + movl %r13d,%ebx + xorl 12(%rsp),%ebp + andl %esi,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + xorl %esi,%ebx + leal -1894007588(%rdx,%rdi,1),%edi + roll $5,%ecx + xorl 56(%rsp),%ebp + addl %eax,%edi + andl %r12d,%ebx + roll $1,%ebp + addl %ebx,%edi + roll $30,%r12d + movl %ebp,4(%rsp) + addl %ecx,%edi + movl 8(%rsp),%edx + movl %r12d,%eax + movl %r12d,%ebx + xorl 16(%rsp),%edx + andl %r13d,%eax + movl %edi,%ecx + xorl 40(%rsp),%edx + xorl %r13d,%ebx + leal -1894007588(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl 60(%rsp),%edx + addl %eax,%esi + andl %r11d,%ebx + roll $1,%edx + addl %ebx,%esi + roll $30,%r11d + movl %edx,8(%rsp) + addl %ecx,%esi + movl 12(%rsp),%ebp + movl %r11d,%eax + movl %r11d,%ebx + xorl 20(%rsp),%ebp + andl %r12d,%eax + movl %esi,%ecx + xorl 44(%rsp),%ebp + xorl %r12d,%ebx + leal -1894007588(%rdx,%r13,1),%r13d + roll $5,%ecx + xorl 0(%rsp),%ebp + addl %eax,%r13d + andl %edi,%ebx + roll $1,%ebp + addl %ebx,%r13d + roll $30,%edi + movl %ebp,12(%rsp) + addl %ecx,%r13d + movl 16(%rsp),%edx + movl %edi,%eax + movl %edi,%ebx + xorl 24(%rsp),%edx + andl %r11d,%eax + movl %r13d,%ecx + xorl 48(%rsp),%edx + xorl %r11d,%ebx + leal -1894007588(%rbp,%r12,1),%r12d + roll $5,%ecx + xorl 4(%rsp),%edx + addl %eax,%r12d + andl %esi,%ebx + roll $1,%edx + addl %ebx,%r12d + roll $30,%esi + movl %edx,16(%rsp) + addl %ecx,%r12d + movl 20(%rsp),%ebp + movl %esi,%eax + movl %esi,%ebx + xorl 28(%rsp),%ebp + andl %edi,%eax + movl %r12d,%ecx + xorl 52(%rsp),%ebp + xorl %edi,%ebx + leal -1894007588(%rdx,%r11,1),%r11d + roll $5,%ecx + xorl 8(%rsp),%ebp + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%ebp + addl %ebx,%r11d + roll $30,%r13d + movl %ebp,20(%rsp) + addl %ecx,%r11d + movl 24(%rsp),%edx + movl %r13d,%eax + movl %r13d,%ebx + xorl 32(%rsp),%edx + andl %esi,%eax + movl %r11d,%ecx + xorl 56(%rsp),%edx + xorl %esi,%ebx + leal -1894007588(%rbp,%rdi,1),%edi + roll $5,%ecx + xorl 12(%rsp),%edx + addl %eax,%edi + andl %r12d,%ebx + roll $1,%edx + addl %ebx,%edi + roll $30,%r12d + movl %edx,24(%rsp) + addl %ecx,%edi + movl 28(%rsp),%ebp + movl %r12d,%eax + movl %r12d,%ebx + xorl 36(%rsp),%ebp + andl %r13d,%eax + movl %edi,%ecx + xorl 60(%rsp),%ebp + xorl %r13d,%ebx + leal -1894007588(%rdx,%rsi,1),%esi + roll $5,%ecx + xorl 16(%rsp),%ebp + addl %eax,%esi + andl %r11d,%ebx + roll $1,%ebp + addl %ebx,%esi + roll $30,%r11d + movl %ebp,28(%rsp) + addl %ecx,%esi + movl 32(%rsp),%edx + movl %r11d,%eax + movl %r11d,%ebx + xorl 40(%rsp),%edx + andl %r12d,%eax + movl %esi,%ecx + xorl 0(%rsp),%edx + xorl %r12d,%ebx + leal -1894007588(%rbp,%r13,1),%r13d + roll $5,%ecx + xorl 20(%rsp),%edx + addl %eax,%r13d + andl %edi,%ebx + roll $1,%edx + addl %ebx,%r13d + roll $30,%edi + movl %edx,32(%rsp) + addl %ecx,%r13d + movl 36(%rsp),%ebp + movl %edi,%eax + movl %edi,%ebx + xorl 44(%rsp),%ebp + andl %r11d,%eax + movl %r13d,%ecx + xorl 4(%rsp),%ebp + xorl %r11d,%ebx + leal -1894007588(%rdx,%r12,1),%r12d + roll $5,%ecx + xorl 24(%rsp),%ebp + addl %eax,%r12d + andl %esi,%ebx + roll $1,%ebp + addl %ebx,%r12d + roll $30,%esi + movl %ebp,36(%rsp) + addl %ecx,%r12d + movl 40(%rsp),%edx + movl %esi,%eax + movl %esi,%ebx + xorl 48(%rsp),%edx + andl %edi,%eax + movl %r12d,%ecx + xorl 8(%rsp),%edx + xorl %edi,%ebx + leal -1894007588(%rbp,%r11,1),%r11d + roll $5,%ecx + xorl 28(%rsp),%edx + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%edx + addl %ebx,%r11d + roll $30,%r13d + movl %edx,40(%rsp) + addl %ecx,%r11d + movl 44(%rsp),%ebp + movl %r13d,%eax + movl %r13d,%ebx + xorl 52(%rsp),%ebp + andl %esi,%eax + movl %r11d,%ecx + xorl 12(%rsp),%ebp + xorl %esi,%ebx + leal -1894007588(%rdx,%rdi,1),%edi + roll $5,%ecx + xorl 32(%rsp),%ebp + addl %eax,%edi + andl %r12d,%ebx + roll $1,%ebp + addl %ebx,%edi + roll $30,%r12d + movl %ebp,44(%rsp) + addl %ecx,%edi + movl 48(%rsp),%edx + movl %r12d,%eax + movl %r12d,%ebx + xorl 56(%rsp),%edx + andl %r13d,%eax + movl %edi,%ecx + xorl 16(%rsp),%edx + xorl %r13d,%ebx + leal -1894007588(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl 36(%rsp),%edx + addl %eax,%esi + andl %r11d,%ebx + roll $1,%edx + addl %ebx,%esi + roll $30,%r11d + movl %edx,48(%rsp) + addl %ecx,%esi + movl 52(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 60(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r13,1),%r13d + xorl 20(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 40(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,52(%rsp) + movl 56(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 0(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r12,1),%r12d + xorl 24(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 44(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,56(%rsp) + movl 60(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 4(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rdx,%r11,1),%r11d + xorl 28(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 48(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,60(%rsp) + movl 0(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 8(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rdi,1),%edi + xorl 32(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 52(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,0(%rsp) + movl 4(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 12(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rsi,1),%esi + xorl 36(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 56(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,4(%rsp) + movl 8(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 16(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r13,1),%r13d + xorl 40(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 60(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,8(%rsp) + movl 12(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 20(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r12,1),%r12d + xorl 44(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 0(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,12(%rsp) + movl 16(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 24(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rbp,%r11,1),%r11d + xorl 48(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 4(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,16(%rsp) + movl 20(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 28(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rdi,1),%edi + xorl 52(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 8(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 32(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rsi,1),%esi + xorl 56(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 12(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 36(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r13,1),%r13d + xorl 60(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 16(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 40(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r12,1),%r12d + xorl 0(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 20(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 44(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rdx,%r11,1),%r11d + xorl 4(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 24(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,36(%rsp) + movl 40(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 48(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rdi,1),%edi + xorl 8(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 28(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,40(%rsp) + movl 44(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 52(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rsi,1),%esi + xorl 12(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 32(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,44(%rsp) + movl 48(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 56(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r13,1),%r13d + xorl 16(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 36(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,48(%rsp) + movl 52(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 60(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r12,1),%r12d + xorl 20(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 40(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl 56(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 0(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rbp,%r11,1),%r11d + xorl 24(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 44(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl 60(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 4(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rdi,1),%edi + xorl 28(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 48(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl %r11d,%eax + leal -899497514(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl %r13d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + addl 0(%r8),%esi + addl 4(%r8),%edi + addl 8(%r8),%r11d + addl 12(%r8),%r12d + addl 16(%r8),%r13d + movl %esi,0(%r8) + movl %edi,4(%r8) + movl %r11d,8(%r8) + movl %r12d,12(%r8) + movl %r13d,16(%r8) + + subq $1,%r10 + leaq 64(%r9),%r9 + jnz .Lloop + + movq 64(%rsp),%rsi + movq (%rsi),%r13 + movq 8(%rsi),%r12 + movq 16(%rsi),%rbp + movq 24(%rsi),%rbx + leaq 32(%rsi),%rsp +.Lepilogue: + retq +.size sha1_block_data_order,.-sha1_block_data_order +.type sha1_block_data_order_ssse3,@function +.align 16 +sha1_block_data_order_ssse3: +_ssse3_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + leaq -64(%rsp),%rsp + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + shlq $6,%r10 + addq %r9,%r10 + leaq K_XX_XX(%rip),%r11 + + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl %ebx,%esi + movl 16(%r8),%ebp + + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 +.byte 102,15,56,0,222 + paddd %xmm9,%xmm0 + paddd %xmm9,%xmm1 + paddd %xmm9,%xmm2 + movdqa %xmm0,0(%rsp) + psubd %xmm9,%xmm0 + movdqa %xmm1,16(%rsp) + psubd %xmm9,%xmm1 + movdqa %xmm2,32(%rsp) + psubd %xmm9,%xmm2 + jmp .Loop_ssse3 +.align 16 +.Loop_ssse3: + movdqa %xmm1,%xmm4 + addl 0(%rsp),%ebp + xorl %edx,%ecx + movdqa %xmm3,%xmm8 +.byte 102,15,58,15,224,8 + movl %eax,%edi + roll $5,%eax + paddd %xmm3,%xmm9 + andl %ecx,%esi + xorl %edx,%ecx + psrldq $4,%xmm8 + xorl %edx,%esi + addl %eax,%ebp + pxor %xmm0,%xmm4 + rorl $2,%ebx + addl %esi,%ebp + pxor %xmm2,%xmm8 + addl 4(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pxor %xmm8,%xmm4 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm9,48(%rsp) + xorl %ecx,%edi + addl %ebp,%edx + movdqa %xmm4,%xmm10 + movdqa %xmm4,%xmm8 + rorl $7,%eax + addl %edi,%edx + addl 8(%rsp),%ecx + xorl %ebx,%eax + pslldq $12,%xmm10 + paddd %xmm4,%xmm4 + movl %edx,%edi + roll $5,%edx + andl %eax,%esi + xorl %ebx,%eax + psrld $31,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + movdqa %xmm10,%xmm9 + rorl $7,%ebp + addl %esi,%ecx + psrld $30,%xmm10 + por %xmm8,%xmm4 + addl 12(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx + pslld $2,%xmm9 + pxor %xmm10,%xmm4 + andl %ebp,%edi + xorl %eax,%ebp + movdqa 0(%r11),%xmm10 + xorl %eax,%edi + addl %ecx,%ebx + pxor %xmm9,%xmm4 + rorl $7,%edx + addl %edi,%ebx + movdqa %xmm2,%xmm5 + addl 16(%rsp),%eax + xorl %ebp,%edx + movdqa %xmm4,%xmm9 +.byte 102,15,58,15,233,8 + movl %ebx,%edi + roll $5,%ebx + paddd %xmm4,%xmm10 + andl %edx,%esi + xorl %ebp,%edx + psrldq $4,%xmm9 + xorl %ebp,%esi + addl %ebx,%eax + pxor %xmm1,%xmm5 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm3,%xmm9 + addl 20(%rsp),%ebp + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pxor %xmm9,%xmm5 + andl %ecx,%edi + xorl %edx,%ecx + movdqa %xmm10,0(%rsp) + xorl %edx,%edi + addl %eax,%ebp + movdqa %xmm5,%xmm8 + movdqa %xmm5,%xmm9 + rorl $7,%ebx + addl %edi,%ebp + addl 24(%rsp),%edx + xorl %ecx,%ebx + pslldq $12,%xmm8 + paddd %xmm5,%xmm5 + movl %ebp,%edi + roll $5,%ebp + andl %ebx,%esi + xorl %ecx,%ebx + psrld $31,%xmm9 + xorl %ecx,%esi + addl %ebp,%edx + movdqa %xmm8,%xmm10 + rorl $7,%eax + addl %esi,%edx + psrld $30,%xmm8 + por %xmm9,%xmm5 + addl 28(%rsp),%ecx + xorl %ebx,%eax + movl %edx,%esi + roll $5,%edx + pslld $2,%xmm10 + pxor %xmm8,%xmm5 + andl %eax,%edi + xorl %ebx,%eax + movdqa 16(%r11),%xmm8 + xorl %ebx,%edi + addl %edx,%ecx + pxor %xmm10,%xmm5 + rorl $7,%ebp + addl %edi,%ecx + movdqa %xmm3,%xmm6 + addl 32(%rsp),%ebx + xorl %eax,%ebp + movdqa %xmm5,%xmm10 +.byte 102,15,58,15,242,8 + movl %ecx,%edi + roll $5,%ecx + paddd %xmm5,%xmm8 + andl %ebp,%esi + xorl %eax,%ebp + psrldq $4,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + pxor %xmm2,%xmm6 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm4,%xmm10 + addl 36(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + pxor %xmm10,%xmm6 + andl %edx,%edi + xorl %ebp,%edx + movdqa %xmm8,16(%rsp) + xorl %ebp,%edi + addl %ebx,%eax + movdqa %xmm6,%xmm9 + movdqa %xmm6,%xmm10 + rorl $7,%ecx + addl %edi,%eax + addl 40(%rsp),%ebp + xorl %edx,%ecx + pslldq $12,%xmm9 + paddd %xmm6,%xmm6 + movl %eax,%edi + roll $5,%eax + andl %ecx,%esi + xorl %edx,%ecx + psrld $31,%xmm10 + xorl %edx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + psrld $30,%xmm9 + por %xmm10,%xmm6 + addl 44(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pslld $2,%xmm8 + pxor %xmm9,%xmm6 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa 16(%r11),%xmm9 + xorl %ecx,%edi + addl %ebp,%edx + pxor %xmm8,%xmm6 + rorl $7,%eax + addl %edi,%edx + movdqa %xmm4,%xmm7 + addl 48(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm6,%xmm8 +.byte 102,15,58,15,251,8 + movl %edx,%edi + roll $5,%edx + paddd %xmm6,%xmm9 + andl %eax,%esi + xorl %ebx,%eax + psrldq $4,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + pxor %xmm3,%xmm7 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm5,%xmm8 + addl 52(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx + pxor %xmm8,%xmm7 + andl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm9,32(%rsp) + xorl %eax,%edi + addl %ecx,%ebx + movdqa %xmm7,%xmm10 + movdqa %xmm7,%xmm8 + rorl $7,%edx + addl %edi,%ebx + addl 56(%rsp),%eax + xorl %ebp,%edx + pslldq $12,%xmm10 + paddd %xmm7,%xmm7 + movl %ebx,%edi + roll $5,%ebx + andl %edx,%esi + xorl %ebp,%edx + psrld $31,%xmm8 + xorl %ebp,%esi + addl %ebx,%eax + movdqa %xmm10,%xmm9 + rorl $7,%ecx + addl %esi,%eax + psrld $30,%xmm10 + por %xmm8,%xmm7 + addl 60(%rsp),%ebp + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + andl %ecx,%edi + xorl %edx,%ecx + movdqa 16(%r11),%xmm10 + xorl %edx,%edi + addl %eax,%ebp + pxor %xmm9,%xmm7 + rorl $7,%ebx + addl %edi,%ebp + movdqa %xmm7,%xmm9 + addl 0(%rsp),%edx + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,206,8 + xorl %ecx,%ebx + movl %ebp,%edi + roll $5,%ebp + pxor %xmm1,%xmm0 + andl %ebx,%esi + xorl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm7,%xmm10 + xorl %ecx,%esi + addl %ebp,%edx + pxor %xmm9,%xmm0 + rorl $7,%eax + addl %esi,%edx + addl 4(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm0,%xmm9 + movdqa %xmm10,48(%rsp) + movl %edx,%esi + roll $5,%edx + andl %eax,%edi + xorl %ebx,%eax + pslld $2,%xmm0 + xorl %ebx,%edi + addl %edx,%ecx + psrld $30,%xmm9 + rorl $7,%ebp + addl %edi,%ecx + addl 8(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%edi + roll $5,%ecx + por %xmm9,%xmm0 + andl %ebp,%esi + xorl %eax,%ebp + movdqa %xmm0,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + andl %edx,%edi + xorl %ebp,%edx + xorl %ebp,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 16(%rsp),%ebp + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,215,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm2,%xmm1 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm8,%xmm9 + paddd %xmm0,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm10,%xmm1 + addl 20(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm1,%xmm10 + movdqa %xmm8,0(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm1 + addl 24(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm10 + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm10,%xmm1 + addl 28(%rsp),%ebx + xorl %eax,%edi + movdqa %xmm1,%xmm8 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 32(%rsp),%eax + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,192,8 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + pxor %xmm3,%xmm2 + xorl %edx,%esi + addl %ebx,%eax + movdqa 32(%r11),%xmm10 + paddd %xmm1,%xmm9 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm8,%xmm2 + addl 36(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm8 + movdqa %xmm9,16(%rsp) + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + pslld $2,%xmm2 + addl 40(%rsp),%edx + xorl %ecx,%esi + psrld $30,%xmm8 + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + por %xmm8,%xmm2 + addl 44(%rsp),%ecx + xorl %ebx,%edi + movdqa %xmm2,%xmm9 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 48(%rsp),%ebx + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,201,8 + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + pxor %xmm4,%xmm3 + xorl %ebp,%esi + addl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm2,%xmm10 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm9,%xmm3 + addl 52(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm9 + movdqa %xmm10,32(%rsp) + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + pslld $2,%xmm3 + addl 56(%rsp),%ebp + xorl %edx,%esi + psrld $30,%xmm9 + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + por %xmm9,%xmm3 + addl 60(%rsp),%edx + xorl %ecx,%edi + movdqa %xmm3,%xmm10 + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 0(%rsp),%ecx + pxor %xmm0,%xmm4 +.byte 102,68,15,58,15,210,8 + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + pxor %xmm5,%xmm4 + xorl %eax,%esi + addl %edx,%ecx + movdqa %xmm8,%xmm9 + paddd %xmm3,%xmm8 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm10,%xmm4 + addl 4(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm10 + movdqa %xmm8,48(%rsp) + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + pslld $2,%xmm4 + addl 8(%rsp),%eax + xorl %ebp,%esi + psrld $30,%xmm10 + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + por %xmm10,%xmm4 + addl 12(%rsp),%ebp + xorl %edx,%edi + movdqa %xmm4,%xmm8 + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 16(%rsp),%edx + pxor %xmm1,%xmm5 +.byte 102,68,15,58,15,195,8 + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + pxor %xmm6,%xmm5 + xorl %ebx,%esi + addl %ebp,%edx + movdqa %xmm9,%xmm10 + paddd %xmm4,%xmm9 + rorl $7,%eax + addl %esi,%edx + pxor %xmm8,%xmm5 + addl 20(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm8 + movdqa %xmm9,0(%rsp) + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + pslld $2,%xmm5 + addl 24(%rsp),%ebx + xorl %eax,%esi + psrld $30,%xmm8 + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + por %xmm8,%xmm5 + addl 28(%rsp),%eax + xorl %ebp,%edi + movdqa %xmm5,%xmm9 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + movl %ecx,%edi + pxor %xmm2,%xmm6 +.byte 102,68,15,58,15,204,8 + xorl %edx,%ecx + addl 32(%rsp),%ebp + andl %edx,%edi + pxor %xmm7,%xmm6 + andl %ecx,%esi + rorl $7,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm5,%xmm10 + addl %edi,%ebp + movl %eax,%edi + pxor %xmm9,%xmm6 + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movdqa %xmm6,%xmm9 + movdqa %xmm10,16(%rsp) + movl %ebx,%esi + xorl %ecx,%ebx + addl 36(%rsp),%edx + andl %ecx,%esi + pslld $2,%xmm6 + andl %ebx,%edi + rorl $7,%eax + psrld $30,%xmm9 + addl %esi,%edx + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + por %xmm9,%xmm6 + movl %eax,%edi + xorl %ebx,%eax + movdqa %xmm6,%xmm10 + addl 40(%rsp),%ecx + andl %ebx,%edi + andl %eax,%esi + rorl $7,%ebp + addl %edi,%ecx + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%esi + xorl %eax,%ebp + addl 44(%rsp),%ebx + andl %eax,%esi + andl %ebp,%edi + rorl $7,%edx + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%edi + pxor %xmm3,%xmm7 +.byte 102,68,15,58,15,213,8 + xorl %ebp,%edx + addl 48(%rsp),%eax + andl %ebp,%edi + pxor %xmm0,%xmm7 + andl %edx,%esi + rorl $7,%ecx + movdqa 48(%r11),%xmm9 + paddd %xmm6,%xmm8 + addl %edi,%eax + movl %ebx,%edi + pxor %xmm10,%xmm7 + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movdqa %xmm7,%xmm10 + movdqa %xmm8,32(%rsp) + movl %ecx,%esi + xorl %edx,%ecx + addl 52(%rsp),%ebp + andl %edx,%esi + pslld $2,%xmm7 + andl %ecx,%edi + rorl $7,%ebx + psrld $30,%xmm10 + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + por %xmm10,%xmm7 + movl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm7,%xmm8 + addl 56(%rsp),%edx + andl %ecx,%edi + andl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%esi + xorl %ebx,%eax + addl 60(%rsp),%ecx + andl %ebx,%esi + andl %eax,%edi + rorl $7,%ebp + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%edi + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,198,8 + xorl %eax,%ebp + addl 0(%rsp),%ebx + andl %eax,%edi + pxor %xmm1,%xmm0 + andl %ebp,%esi + rorl $7,%edx + movdqa %xmm9,%xmm10 + paddd %xmm7,%xmm9 + addl %edi,%ebx + movl %ecx,%edi + pxor %xmm8,%xmm0 + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movdqa %xmm0,%xmm8 + movdqa %xmm9,48(%rsp) + movl %edx,%esi + xorl %ebp,%edx + addl 4(%rsp),%eax + andl %ebp,%esi + pslld $2,%xmm0 + andl %edx,%edi + rorl $7,%ecx + psrld $30,%xmm8 + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + por %xmm8,%xmm0 + movl %ecx,%edi + xorl %edx,%ecx + movdqa %xmm0,%xmm9 + addl 8(%rsp),%ebp + andl %edx,%edi + andl %ecx,%esi + rorl $7,%ebx + addl %edi,%ebp + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%esi + xorl %ecx,%ebx + addl 12(%rsp),%edx + andl %ecx,%esi + andl %ebx,%edi + rorl $7,%eax + addl %esi,%edx + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%edi + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,207,8 + xorl %ebx,%eax + addl 16(%rsp),%ecx + andl %ebx,%edi + pxor %xmm2,%xmm1 + andl %eax,%esi + rorl $7,%ebp + movdqa %xmm10,%xmm8 + paddd %xmm0,%xmm10 + addl %edi,%ecx + movl %edx,%edi + pxor %xmm9,%xmm1 + roll $5,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movdqa %xmm1,%xmm9 + movdqa %xmm10,0(%rsp) + movl %ebp,%esi + xorl %eax,%ebp + addl 20(%rsp),%ebx + andl %eax,%esi + pslld $2,%xmm1 + andl %ebp,%edi + rorl $7,%edx + psrld $30,%xmm9 + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + por %xmm9,%xmm1 + movl %edx,%edi + xorl %ebp,%edx + movdqa %xmm1,%xmm10 + addl 24(%rsp),%eax + andl %ebp,%edi + andl %edx,%esi + rorl $7,%ecx + addl %edi,%eax + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movl %ecx,%esi + xorl %edx,%ecx + addl 28(%rsp),%ebp + andl %edx,%esi + andl %ecx,%edi + rorl $7,%ebx + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%edi + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,208,8 + xorl %ecx,%ebx + addl 32(%rsp),%edx + andl %ecx,%edi + pxor %xmm3,%xmm2 + andl %ebx,%esi + rorl $7,%eax + movdqa %xmm8,%xmm9 + paddd %xmm1,%xmm8 + addl %edi,%edx + movl %ebp,%edi + pxor %xmm10,%xmm2 + roll $5,%ebp + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movdqa %xmm2,%xmm10 + movdqa %xmm8,16(%rsp) + movl %eax,%esi + xorl %ebx,%eax + addl 36(%rsp),%ecx + andl %ebx,%esi + pslld $2,%xmm2 + andl %eax,%edi + rorl $7,%ebp + psrld $30,%xmm10 + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + por %xmm10,%xmm2 + movl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm2,%xmm8 + addl 40(%rsp),%ebx + andl %eax,%edi + andl %ebp,%esi + rorl $7,%edx + addl %edi,%ebx + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%esi + xorl %ebp,%edx + addl 44(%rsp),%eax + andl %ebp,%esi + andl %edx,%edi + rorl $7,%ecx + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + addl 48(%rsp),%ebp + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,193,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm4,%xmm3 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm10 + paddd %xmm2,%xmm9 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm8,%xmm3 + addl 52(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm3,%xmm8 + movdqa %xmm9,32(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm3 + addl 56(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm8 + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm8,%xmm3 + addl 60(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 0(%rsp),%eax + paddd %xmm3,%xmm10 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + movdqa %xmm10,48(%rsp) + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 4(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 8(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 12(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + cmpq %r10,%r9 + je .Ldone_ssse3 + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 + addl 16(%rsp),%ebx + xorl %eax,%esi +.byte 102,15,56,0,206 + movl %ecx,%edi + roll $5,%ecx + paddd %xmm9,%xmm0 + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + movdqa %xmm0,0(%rsp) + addl 20(%rsp),%eax + xorl %ebp,%edi + psubd %xmm9,%xmm0 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi +.byte 102,15,56,0,214 + movl %edx,%edi + roll $5,%edx + paddd %xmm9,%xmm1 + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + movdqa %xmm1,16(%rsp) + addl 36(%rsp),%ebx + xorl %eax,%edi + psubd %xmm9,%xmm1 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi +.byte 102,15,56,0,222 + movl %ebp,%edi + roll $5,%ebp + paddd %xmm9,%xmm2 + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + movdqa %xmm2,32(%rsp) + addl 52(%rsp),%ecx + xorl %ebx,%edi + psubd %xmm9,%xmm2 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + addl 12(%r8),%edx + movl %eax,0(%r8) + addl 16(%r8),%ebp + movl %esi,4(%r8) + movl %esi,%ebx + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + jmp .Loop_ssse3 + +.align 16 +.Ldone_ssse3: + addl 16(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 20(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + addl 36(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 52(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + movl %eax,0(%r8) + addl 12(%r8),%edx + movl %esi,4(%r8) + addl 16(%r8),%ebp + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + leaq 64(%rsp),%rsi + movq 0(%rsi),%r12 + movq 8(%rsi),%rbp + movq 16(%rsi),%rbx + leaq 24(%rsi),%rsp +.Lepilogue_ssse3: + retq +.size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3 +.align 64 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 +#if defined(HAVE_GNU_STACK) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/ext/libressl/crypto/sha/sha1-macosx-x86_64.S b/ext/libressl/crypto/sha/sha1-macosx-x86_64.S new file mode 100644 index 0000000..04a8aff --- /dev/null +++ b/ext/libressl/crypto/sha/sha1-macosx-x86_64.S @@ -0,0 +1,2488 @@ +#include "x86_arch.h" +.text + +.private_extern _OPENSSL_ia32cap_P + +.globl _sha1_block_data_order + +.p2align 4 +_sha1_block_data_order: + movl _OPENSSL_ia32cap_P+0(%rip),%r9d + movl _OPENSSL_ia32cap_P+4(%rip),%r8d + testl $IA32CAP_MASK1_SSSE3,%r8d + jz L$ialu + jmp _ssse3_shortcut + +.p2align 4 +L$ialu: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + movq %rsp,%r11 + movq %rdi,%r8 + subq $72,%rsp + movq %rsi,%r9 + andq $-64,%rsp + movq %rdx,%r10 + movq %r11,64(%rsp) +L$prologue: + + movl 0(%r8),%esi + movl 4(%r8),%edi + movl 8(%r8),%r11d + movl 12(%r8),%r12d + movl 16(%r8),%r13d + jmp L$loop + +.p2align 4 +L$loop: + movl 0(%r9),%edx + bswapl %edx + movl %edx,0(%rsp) + movl %r11d,%eax + movl 4(%r9),%ebp + movl %esi,%ecx + xorl %r12d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r13,1),%r13d + andl %edi,%eax + movl %ebp,4(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 8(%r9),%edx + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r12,1),%r12d + andl %esi,%eax + movl %edx,8(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 12(%r9),%ebp + movl %r12d,%ecx + xorl %edi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r11,1),%r11d + andl %r13d,%eax + movl %ebp,12(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 16(%r9),%edx + movl %r11d,%ecx + xorl %esi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rdi,1),%edi + andl %r12d,%eax + movl %edx,16(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 20(%r9),%ebp + movl %edi,%ecx + xorl %r13d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rsi,1),%esi + andl %r11d,%eax + movl %ebp,20(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl %r11d,%eax + movl 24(%r9),%edx + movl %esi,%ecx + xorl %r12d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r13,1),%r13d + andl %edi,%eax + movl %edx,24(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 28(%r9),%ebp + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r12,1),%r12d + andl %esi,%eax + movl %ebp,28(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 32(%r9),%edx + movl %r12d,%ecx + xorl %edi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r11,1),%r11d + andl %r13d,%eax + movl %edx,32(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 36(%r9),%ebp + movl %r11d,%ecx + xorl %esi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rdi,1),%edi + andl %r12d,%eax + movl %ebp,36(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 40(%r9),%edx + movl %edi,%ecx + xorl %r13d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rsi,1),%esi + andl %r11d,%eax + movl %edx,40(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl %r11d,%eax + movl 44(%r9),%ebp + movl %esi,%ecx + xorl %r12d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r13,1),%r13d + andl %edi,%eax + movl %ebp,44(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 48(%r9),%edx + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r12,1),%r12d + andl %esi,%eax + movl %edx,48(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 52(%r9),%ebp + movl %r12d,%ecx + xorl %edi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r11,1),%r11d + andl %r13d,%eax + movl %ebp,52(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 56(%r9),%edx + movl %r11d,%ecx + xorl %esi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rdi,1),%edi + andl %r12d,%eax + movl %edx,56(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 60(%r9),%ebp + movl %edi,%ecx + xorl %r13d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rsi,1),%esi + andl %r11d,%eax + movl %ebp,60(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl 0(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 8(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 32(%rsp),%edx + andl %edi,%eax + leal 1518500249(%rbp,%r13,1),%r13d + xorl 52(%rsp),%edx + xorl %r12d,%eax + roll $1,%edx + addl %ecx,%r13d + roll $30,%edi + movl %edx,0(%rsp) + addl %eax,%r13d + movl 4(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 12(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 36(%rsp),%ebp + andl %esi,%eax + leal 1518500249(%rdx,%r12,1),%r12d + xorl 56(%rsp),%ebp + xorl %r11d,%eax + roll $1,%ebp + addl %ecx,%r12d + roll $30,%esi + movl %ebp,4(%rsp) + addl %eax,%r12d + movl 8(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 16(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 40(%rsp),%edx + andl %r13d,%eax + leal 1518500249(%rbp,%r11,1),%r11d + xorl 60(%rsp),%edx + xorl %edi,%eax + roll $1,%edx + addl %ecx,%r11d + roll $30,%r13d + movl %edx,8(%rsp) + addl %eax,%r11d + movl 12(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 20(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 44(%rsp),%ebp + andl %r12d,%eax + leal 1518500249(%rdx,%rdi,1),%edi + xorl 0(%rsp),%ebp + xorl %esi,%eax + roll $1,%ebp + addl %ecx,%edi + roll $30,%r12d + movl %ebp,12(%rsp) + addl %eax,%edi + movl 16(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 24(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + xorl 48(%rsp),%edx + andl %r11d,%eax + leal 1518500249(%rbp,%rsi,1),%esi + xorl 4(%rsp),%edx + xorl %r13d,%eax + roll $1,%edx + addl %ecx,%esi + roll $30,%r11d + movl %edx,16(%rsp) + addl %eax,%esi + movl 20(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 28(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r13,1),%r13d + xorl 52(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 8(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 32(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r12,1),%r12d + xorl 56(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 12(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 36(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r11,1),%r11d + xorl 60(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 16(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 40(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rdi,1),%edi + xorl 0(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 20(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 44(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rsi,1),%esi + xorl 4(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 24(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,36(%rsp) + movl 40(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 48(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r13,1),%r13d + xorl 8(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 28(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,40(%rsp) + movl 44(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 52(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r12,1),%r12d + xorl 12(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 32(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,44(%rsp) + movl 48(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 56(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r11,1),%r11d + xorl 16(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 36(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,48(%rsp) + movl 52(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 60(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rdi,1),%edi + xorl 20(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 40(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,52(%rsp) + movl 56(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 0(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rsi,1),%esi + xorl 24(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 44(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,56(%rsp) + movl 60(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r13,1),%r13d + xorl 28(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 48(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,60(%rsp) + movl 0(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r12,1),%r12d + xorl 32(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 52(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,0(%rsp) + movl 4(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r11,1),%r11d + xorl 36(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 56(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,4(%rsp) + movl 8(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 16(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rdi,1),%edi + xorl 40(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 60(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,8(%rsp) + movl 12(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 20(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rsi,1),%esi + xorl 44(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 0(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,12(%rsp) + movl 16(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r13,1),%r13d + xorl 48(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 4(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,16(%rsp) + movl 20(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r12,1),%r12d + xorl 52(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 8(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r11,1),%r11d + xorl 56(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 12(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rdi,1),%edi + xorl 60(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 16(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 40(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rsi,1),%esi + xorl 0(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 20(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %r11d,%eax + movl %r11d,%ebx + xorl 44(%rsp),%ebp + andl %r12d,%eax + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %r12d,%ebx + leal -1894007588(%rdx,%r13,1),%r13d + roll $5,%ecx + xorl 24(%rsp),%ebp + addl %eax,%r13d + andl %edi,%ebx + roll $1,%ebp + addl %ebx,%r13d + roll $30,%edi + movl %ebp,36(%rsp) + addl %ecx,%r13d + movl 40(%rsp),%edx + movl %edi,%eax + movl %edi,%ebx + xorl 48(%rsp),%edx + andl %r11d,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + xorl %r11d,%ebx + leal -1894007588(%rbp,%r12,1),%r12d + roll $5,%ecx + xorl 28(%rsp),%edx + addl %eax,%r12d + andl %esi,%ebx + roll $1,%edx + addl %ebx,%r12d + roll $30,%esi + movl %edx,40(%rsp) + addl %ecx,%r12d + movl 44(%rsp),%ebp + movl %esi,%eax + movl %esi,%ebx + xorl 52(%rsp),%ebp + andl %edi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + xorl %edi,%ebx + leal -1894007588(%rdx,%r11,1),%r11d + roll $5,%ecx + xorl 32(%rsp),%ebp + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%ebp + addl %ebx,%r11d + roll $30,%r13d + movl %ebp,44(%rsp) + addl %ecx,%r11d + movl 48(%rsp),%edx + movl %r13d,%eax + movl %r13d,%ebx + xorl 56(%rsp),%edx + andl %esi,%eax + movl %r11d,%ecx + xorl 16(%rsp),%edx + xorl %esi,%ebx + leal -1894007588(%rbp,%rdi,1),%edi + roll $5,%ecx + xorl 36(%rsp),%edx + addl %eax,%edi + andl %r12d,%ebx + roll $1,%edx + addl %ebx,%edi + roll $30,%r12d + movl %edx,48(%rsp) + addl %ecx,%edi + movl 52(%rsp),%ebp + movl %r12d,%eax + movl %r12d,%ebx + xorl 60(%rsp),%ebp + andl %r13d,%eax + movl %edi,%ecx + xorl 20(%rsp),%ebp + xorl %r13d,%ebx + leal -1894007588(%rdx,%rsi,1),%esi + roll $5,%ecx + xorl 40(%rsp),%ebp + addl %eax,%esi + andl %r11d,%ebx + roll $1,%ebp + addl %ebx,%esi + roll $30,%r11d + movl %ebp,52(%rsp) + addl %ecx,%esi + movl 56(%rsp),%edx + movl %r11d,%eax + movl %r11d,%ebx + xorl 0(%rsp),%edx + andl %r12d,%eax + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %r12d,%ebx + leal -1894007588(%rbp,%r13,1),%r13d + roll $5,%ecx + xorl 44(%rsp),%edx + addl %eax,%r13d + andl %edi,%ebx + roll $1,%edx + addl %ebx,%r13d + roll $30,%edi + movl %edx,56(%rsp) + addl %ecx,%r13d + movl 60(%rsp),%ebp + movl %edi,%eax + movl %edi,%ebx + xorl 4(%rsp),%ebp + andl %r11d,%eax + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %r11d,%ebx + leal -1894007588(%rdx,%r12,1),%r12d + roll $5,%ecx + xorl 48(%rsp),%ebp + addl %eax,%r12d + andl %esi,%ebx + roll $1,%ebp + addl %ebx,%r12d + roll $30,%esi + movl %ebp,60(%rsp) + addl %ecx,%r12d + movl 0(%rsp),%edx + movl %esi,%eax + movl %esi,%ebx + xorl 8(%rsp),%edx + andl %edi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + xorl %edi,%ebx + leal -1894007588(%rbp,%r11,1),%r11d + roll $5,%ecx + xorl 52(%rsp),%edx + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%edx + addl %ebx,%r11d + roll $30,%r13d + movl %edx,0(%rsp) + addl %ecx,%r11d + movl 4(%rsp),%ebp + movl %r13d,%eax + movl %r13d,%ebx + xorl 12(%rsp),%ebp + andl %esi,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + xorl %esi,%ebx + leal -1894007588(%rdx,%rdi,1),%edi + roll $5,%ecx + xorl 56(%rsp),%ebp + addl %eax,%edi + andl %r12d,%ebx + roll $1,%ebp + addl %ebx,%edi + roll $30,%r12d + movl %ebp,4(%rsp) + addl %ecx,%edi + movl 8(%rsp),%edx + movl %r12d,%eax + movl %r12d,%ebx + xorl 16(%rsp),%edx + andl %r13d,%eax + movl %edi,%ecx + xorl 40(%rsp),%edx + xorl %r13d,%ebx + leal -1894007588(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl 60(%rsp),%edx + addl %eax,%esi + andl %r11d,%ebx + roll $1,%edx + addl %ebx,%esi + roll $30,%r11d + movl %edx,8(%rsp) + addl %ecx,%esi + movl 12(%rsp),%ebp + movl %r11d,%eax + movl %r11d,%ebx + xorl 20(%rsp),%ebp + andl %r12d,%eax + movl %esi,%ecx + xorl 44(%rsp),%ebp + xorl %r12d,%ebx + leal -1894007588(%rdx,%r13,1),%r13d + roll $5,%ecx + xorl 0(%rsp),%ebp + addl %eax,%r13d + andl %edi,%ebx + roll $1,%ebp + addl %ebx,%r13d + roll $30,%edi + movl %ebp,12(%rsp) + addl %ecx,%r13d + movl 16(%rsp),%edx + movl %edi,%eax + movl %edi,%ebx + xorl 24(%rsp),%edx + andl %r11d,%eax + movl %r13d,%ecx + xorl 48(%rsp),%edx + xorl %r11d,%ebx + leal -1894007588(%rbp,%r12,1),%r12d + roll $5,%ecx + xorl 4(%rsp),%edx + addl %eax,%r12d + andl %esi,%ebx + roll $1,%edx + addl %ebx,%r12d + roll $30,%esi + movl %edx,16(%rsp) + addl %ecx,%r12d + movl 20(%rsp),%ebp + movl %esi,%eax + movl %esi,%ebx + xorl 28(%rsp),%ebp + andl %edi,%eax + movl %r12d,%ecx + xorl 52(%rsp),%ebp + xorl %edi,%ebx + leal -1894007588(%rdx,%r11,1),%r11d + roll $5,%ecx + xorl 8(%rsp),%ebp + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%ebp + addl %ebx,%r11d + roll $30,%r13d + movl %ebp,20(%rsp) + addl %ecx,%r11d + movl 24(%rsp),%edx + movl %r13d,%eax + movl %r13d,%ebx + xorl 32(%rsp),%edx + andl %esi,%eax + movl %r11d,%ecx + xorl 56(%rsp),%edx + xorl %esi,%ebx + leal -1894007588(%rbp,%rdi,1),%edi + roll $5,%ecx + xorl 12(%rsp),%edx + addl %eax,%edi + andl %r12d,%ebx + roll $1,%edx + addl %ebx,%edi + roll $30,%r12d + movl %edx,24(%rsp) + addl %ecx,%edi + movl 28(%rsp),%ebp + movl %r12d,%eax + movl %r12d,%ebx + xorl 36(%rsp),%ebp + andl %r13d,%eax + movl %edi,%ecx + xorl 60(%rsp),%ebp + xorl %r13d,%ebx + leal -1894007588(%rdx,%rsi,1),%esi + roll $5,%ecx + xorl 16(%rsp),%ebp + addl %eax,%esi + andl %r11d,%ebx + roll $1,%ebp + addl %ebx,%esi + roll $30,%r11d + movl %ebp,28(%rsp) + addl %ecx,%esi + movl 32(%rsp),%edx + movl %r11d,%eax + movl %r11d,%ebx + xorl 40(%rsp),%edx + andl %r12d,%eax + movl %esi,%ecx + xorl 0(%rsp),%edx + xorl %r12d,%ebx + leal -1894007588(%rbp,%r13,1),%r13d + roll $5,%ecx + xorl 20(%rsp),%edx + addl %eax,%r13d + andl %edi,%ebx + roll $1,%edx + addl %ebx,%r13d + roll $30,%edi + movl %edx,32(%rsp) + addl %ecx,%r13d + movl 36(%rsp),%ebp + movl %edi,%eax + movl %edi,%ebx + xorl 44(%rsp),%ebp + andl %r11d,%eax + movl %r13d,%ecx + xorl 4(%rsp),%ebp + xorl %r11d,%ebx + leal -1894007588(%rdx,%r12,1),%r12d + roll $5,%ecx + xorl 24(%rsp),%ebp + addl %eax,%r12d + andl %esi,%ebx + roll $1,%ebp + addl %ebx,%r12d + roll $30,%esi + movl %ebp,36(%rsp) + addl %ecx,%r12d + movl 40(%rsp),%edx + movl %esi,%eax + movl %esi,%ebx + xorl 48(%rsp),%edx + andl %edi,%eax + movl %r12d,%ecx + xorl 8(%rsp),%edx + xorl %edi,%ebx + leal -1894007588(%rbp,%r11,1),%r11d + roll $5,%ecx + xorl 28(%rsp),%edx + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%edx + addl %ebx,%r11d + roll $30,%r13d + movl %edx,40(%rsp) + addl %ecx,%r11d + movl 44(%rsp),%ebp + movl %r13d,%eax + movl %r13d,%ebx + xorl 52(%rsp),%ebp + andl %esi,%eax + movl %r11d,%ecx + xorl 12(%rsp),%ebp + xorl %esi,%ebx + leal -1894007588(%rdx,%rdi,1),%edi + roll $5,%ecx + xorl 32(%rsp),%ebp + addl %eax,%edi + andl %r12d,%ebx + roll $1,%ebp + addl %ebx,%edi + roll $30,%r12d + movl %ebp,44(%rsp) + addl %ecx,%edi + movl 48(%rsp),%edx + movl %r12d,%eax + movl %r12d,%ebx + xorl 56(%rsp),%edx + andl %r13d,%eax + movl %edi,%ecx + xorl 16(%rsp),%edx + xorl %r13d,%ebx + leal -1894007588(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl 36(%rsp),%edx + addl %eax,%esi + andl %r11d,%ebx + roll $1,%edx + addl %ebx,%esi + roll $30,%r11d + movl %edx,48(%rsp) + addl %ecx,%esi + movl 52(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 60(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r13,1),%r13d + xorl 20(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 40(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,52(%rsp) + movl 56(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 0(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r12,1),%r12d + xorl 24(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 44(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,56(%rsp) + movl 60(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 4(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rdx,%r11,1),%r11d + xorl 28(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 48(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,60(%rsp) + movl 0(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 8(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rdi,1),%edi + xorl 32(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 52(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,0(%rsp) + movl 4(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 12(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rsi,1),%esi + xorl 36(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 56(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,4(%rsp) + movl 8(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 16(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r13,1),%r13d + xorl 40(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 60(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,8(%rsp) + movl 12(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 20(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r12,1),%r12d + xorl 44(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 0(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,12(%rsp) + movl 16(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 24(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rbp,%r11,1),%r11d + xorl 48(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 4(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,16(%rsp) + movl 20(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 28(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rdi,1),%edi + xorl 52(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 8(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 32(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rsi,1),%esi + xorl 56(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 12(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 36(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r13,1),%r13d + xorl 60(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 16(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 40(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r12,1),%r12d + xorl 0(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 20(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 44(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rdx,%r11,1),%r11d + xorl 4(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 24(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,36(%rsp) + movl 40(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 48(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rdi,1),%edi + xorl 8(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 28(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,40(%rsp) + movl 44(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 52(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rsi,1),%esi + xorl 12(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 32(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,44(%rsp) + movl 48(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 56(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r13,1),%r13d + xorl 16(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 36(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,48(%rsp) + movl 52(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 60(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r12,1),%r12d + xorl 20(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 40(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl 56(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 0(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rbp,%r11,1),%r11d + xorl 24(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 44(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl 60(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 4(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rdi,1),%edi + xorl 28(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 48(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl %r11d,%eax + leal -899497514(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl %r13d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + addl 0(%r8),%esi + addl 4(%r8),%edi + addl 8(%r8),%r11d + addl 12(%r8),%r12d + addl 16(%r8),%r13d + movl %esi,0(%r8) + movl %edi,4(%r8) + movl %r11d,8(%r8) + movl %r12d,12(%r8) + movl %r13d,16(%r8) + + subq $1,%r10 + leaq 64(%r9),%r9 + jnz L$loop + + movq 64(%rsp),%rsi + movq (%rsi),%r13 + movq 8(%rsi),%r12 + movq 16(%rsi),%rbp + movq 24(%rsi),%rbx + leaq 32(%rsi),%rsp +L$epilogue: + retq + + +.p2align 4 +sha1_block_data_order_ssse3: +_ssse3_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + leaq -64(%rsp),%rsp + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + shlq $6,%r10 + addq %r9,%r10 + leaq K_XX_XX(%rip),%r11 + + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl %ebx,%esi + movl 16(%r8),%ebp + + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 +.byte 102,15,56,0,222 + paddd %xmm9,%xmm0 + paddd %xmm9,%xmm1 + paddd %xmm9,%xmm2 + movdqa %xmm0,0(%rsp) + psubd %xmm9,%xmm0 + movdqa %xmm1,16(%rsp) + psubd %xmm9,%xmm1 + movdqa %xmm2,32(%rsp) + psubd %xmm9,%xmm2 + jmp L$oop_ssse3 +.p2align 4 +L$oop_ssse3: + movdqa %xmm1,%xmm4 + addl 0(%rsp),%ebp + xorl %edx,%ecx + movdqa %xmm3,%xmm8 +.byte 102,15,58,15,224,8 + movl %eax,%edi + roll $5,%eax + paddd %xmm3,%xmm9 + andl %ecx,%esi + xorl %edx,%ecx + psrldq $4,%xmm8 + xorl %edx,%esi + addl %eax,%ebp + pxor %xmm0,%xmm4 + rorl $2,%ebx + addl %esi,%ebp + pxor %xmm2,%xmm8 + addl 4(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pxor %xmm8,%xmm4 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm9,48(%rsp) + xorl %ecx,%edi + addl %ebp,%edx + movdqa %xmm4,%xmm10 + movdqa %xmm4,%xmm8 + rorl $7,%eax + addl %edi,%edx + addl 8(%rsp),%ecx + xorl %ebx,%eax + pslldq $12,%xmm10 + paddd %xmm4,%xmm4 + movl %edx,%edi + roll $5,%edx + andl %eax,%esi + xorl %ebx,%eax + psrld $31,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + movdqa %xmm10,%xmm9 + rorl $7,%ebp + addl %esi,%ecx + psrld $30,%xmm10 + por %xmm8,%xmm4 + addl 12(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx + pslld $2,%xmm9 + pxor %xmm10,%xmm4 + andl %ebp,%edi + xorl %eax,%ebp + movdqa 0(%r11),%xmm10 + xorl %eax,%edi + addl %ecx,%ebx + pxor %xmm9,%xmm4 + rorl $7,%edx + addl %edi,%ebx + movdqa %xmm2,%xmm5 + addl 16(%rsp),%eax + xorl %ebp,%edx + movdqa %xmm4,%xmm9 +.byte 102,15,58,15,233,8 + movl %ebx,%edi + roll $5,%ebx + paddd %xmm4,%xmm10 + andl %edx,%esi + xorl %ebp,%edx + psrldq $4,%xmm9 + xorl %ebp,%esi + addl %ebx,%eax + pxor %xmm1,%xmm5 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm3,%xmm9 + addl 20(%rsp),%ebp + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pxor %xmm9,%xmm5 + andl %ecx,%edi + xorl %edx,%ecx + movdqa %xmm10,0(%rsp) + xorl %edx,%edi + addl %eax,%ebp + movdqa %xmm5,%xmm8 + movdqa %xmm5,%xmm9 + rorl $7,%ebx + addl %edi,%ebp + addl 24(%rsp),%edx + xorl %ecx,%ebx + pslldq $12,%xmm8 + paddd %xmm5,%xmm5 + movl %ebp,%edi + roll $5,%ebp + andl %ebx,%esi + xorl %ecx,%ebx + psrld $31,%xmm9 + xorl %ecx,%esi + addl %ebp,%edx + movdqa %xmm8,%xmm10 + rorl $7,%eax + addl %esi,%edx + psrld $30,%xmm8 + por %xmm9,%xmm5 + addl 28(%rsp),%ecx + xorl %ebx,%eax + movl %edx,%esi + roll $5,%edx + pslld $2,%xmm10 + pxor %xmm8,%xmm5 + andl %eax,%edi + xorl %ebx,%eax + movdqa 16(%r11),%xmm8 + xorl %ebx,%edi + addl %edx,%ecx + pxor %xmm10,%xmm5 + rorl $7,%ebp + addl %edi,%ecx + movdqa %xmm3,%xmm6 + addl 32(%rsp),%ebx + xorl %eax,%ebp + movdqa %xmm5,%xmm10 +.byte 102,15,58,15,242,8 + movl %ecx,%edi + roll $5,%ecx + paddd %xmm5,%xmm8 + andl %ebp,%esi + xorl %eax,%ebp + psrldq $4,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + pxor %xmm2,%xmm6 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm4,%xmm10 + addl 36(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + pxor %xmm10,%xmm6 + andl %edx,%edi + xorl %ebp,%edx + movdqa %xmm8,16(%rsp) + xorl %ebp,%edi + addl %ebx,%eax + movdqa %xmm6,%xmm9 + movdqa %xmm6,%xmm10 + rorl $7,%ecx + addl %edi,%eax + addl 40(%rsp),%ebp + xorl %edx,%ecx + pslldq $12,%xmm9 + paddd %xmm6,%xmm6 + movl %eax,%edi + roll $5,%eax + andl %ecx,%esi + xorl %edx,%ecx + psrld $31,%xmm10 + xorl %edx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + psrld $30,%xmm9 + por %xmm10,%xmm6 + addl 44(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pslld $2,%xmm8 + pxor %xmm9,%xmm6 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa 16(%r11),%xmm9 + xorl %ecx,%edi + addl %ebp,%edx + pxor %xmm8,%xmm6 + rorl $7,%eax + addl %edi,%edx + movdqa %xmm4,%xmm7 + addl 48(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm6,%xmm8 +.byte 102,15,58,15,251,8 + movl %edx,%edi + roll $5,%edx + paddd %xmm6,%xmm9 + andl %eax,%esi + xorl %ebx,%eax + psrldq $4,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + pxor %xmm3,%xmm7 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm5,%xmm8 + addl 52(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx + pxor %xmm8,%xmm7 + andl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm9,32(%rsp) + xorl %eax,%edi + addl %ecx,%ebx + movdqa %xmm7,%xmm10 + movdqa %xmm7,%xmm8 + rorl $7,%edx + addl %edi,%ebx + addl 56(%rsp),%eax + xorl %ebp,%edx + pslldq $12,%xmm10 + paddd %xmm7,%xmm7 + movl %ebx,%edi + roll $5,%ebx + andl %edx,%esi + xorl %ebp,%edx + psrld $31,%xmm8 + xorl %ebp,%esi + addl %ebx,%eax + movdqa %xmm10,%xmm9 + rorl $7,%ecx + addl %esi,%eax + psrld $30,%xmm10 + por %xmm8,%xmm7 + addl 60(%rsp),%ebp + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + andl %ecx,%edi + xorl %edx,%ecx + movdqa 16(%r11),%xmm10 + xorl %edx,%edi + addl %eax,%ebp + pxor %xmm9,%xmm7 + rorl $7,%ebx + addl %edi,%ebp + movdqa %xmm7,%xmm9 + addl 0(%rsp),%edx + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,206,8 + xorl %ecx,%ebx + movl %ebp,%edi + roll $5,%ebp + pxor %xmm1,%xmm0 + andl %ebx,%esi + xorl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm7,%xmm10 + xorl %ecx,%esi + addl %ebp,%edx + pxor %xmm9,%xmm0 + rorl $7,%eax + addl %esi,%edx + addl 4(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm0,%xmm9 + movdqa %xmm10,48(%rsp) + movl %edx,%esi + roll $5,%edx + andl %eax,%edi + xorl %ebx,%eax + pslld $2,%xmm0 + xorl %ebx,%edi + addl %edx,%ecx + psrld $30,%xmm9 + rorl $7,%ebp + addl %edi,%ecx + addl 8(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%edi + roll $5,%ecx + por %xmm9,%xmm0 + andl %ebp,%esi + xorl %eax,%ebp + movdqa %xmm0,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + andl %edx,%edi + xorl %ebp,%edx + xorl %ebp,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 16(%rsp),%ebp + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,215,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm2,%xmm1 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm8,%xmm9 + paddd %xmm0,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm10,%xmm1 + addl 20(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm1,%xmm10 + movdqa %xmm8,0(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm1 + addl 24(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm10 + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm10,%xmm1 + addl 28(%rsp),%ebx + xorl %eax,%edi + movdqa %xmm1,%xmm8 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 32(%rsp),%eax + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,192,8 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + pxor %xmm3,%xmm2 + xorl %edx,%esi + addl %ebx,%eax + movdqa 32(%r11),%xmm10 + paddd %xmm1,%xmm9 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm8,%xmm2 + addl 36(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm8 + movdqa %xmm9,16(%rsp) + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + pslld $2,%xmm2 + addl 40(%rsp),%edx + xorl %ecx,%esi + psrld $30,%xmm8 + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + por %xmm8,%xmm2 + addl 44(%rsp),%ecx + xorl %ebx,%edi + movdqa %xmm2,%xmm9 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 48(%rsp),%ebx + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,201,8 + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + pxor %xmm4,%xmm3 + xorl %ebp,%esi + addl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm2,%xmm10 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm9,%xmm3 + addl 52(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm9 + movdqa %xmm10,32(%rsp) + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + pslld $2,%xmm3 + addl 56(%rsp),%ebp + xorl %edx,%esi + psrld $30,%xmm9 + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + por %xmm9,%xmm3 + addl 60(%rsp),%edx + xorl %ecx,%edi + movdqa %xmm3,%xmm10 + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 0(%rsp),%ecx + pxor %xmm0,%xmm4 +.byte 102,68,15,58,15,210,8 + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + pxor %xmm5,%xmm4 + xorl %eax,%esi + addl %edx,%ecx + movdqa %xmm8,%xmm9 + paddd %xmm3,%xmm8 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm10,%xmm4 + addl 4(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm10 + movdqa %xmm8,48(%rsp) + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + pslld $2,%xmm4 + addl 8(%rsp),%eax + xorl %ebp,%esi + psrld $30,%xmm10 + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + por %xmm10,%xmm4 + addl 12(%rsp),%ebp + xorl %edx,%edi + movdqa %xmm4,%xmm8 + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 16(%rsp),%edx + pxor %xmm1,%xmm5 +.byte 102,68,15,58,15,195,8 + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + pxor %xmm6,%xmm5 + xorl %ebx,%esi + addl %ebp,%edx + movdqa %xmm9,%xmm10 + paddd %xmm4,%xmm9 + rorl $7,%eax + addl %esi,%edx + pxor %xmm8,%xmm5 + addl 20(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm8 + movdqa %xmm9,0(%rsp) + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + pslld $2,%xmm5 + addl 24(%rsp),%ebx + xorl %eax,%esi + psrld $30,%xmm8 + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + por %xmm8,%xmm5 + addl 28(%rsp),%eax + xorl %ebp,%edi + movdqa %xmm5,%xmm9 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + movl %ecx,%edi + pxor %xmm2,%xmm6 +.byte 102,68,15,58,15,204,8 + xorl %edx,%ecx + addl 32(%rsp),%ebp + andl %edx,%edi + pxor %xmm7,%xmm6 + andl %ecx,%esi + rorl $7,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm5,%xmm10 + addl %edi,%ebp + movl %eax,%edi + pxor %xmm9,%xmm6 + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movdqa %xmm6,%xmm9 + movdqa %xmm10,16(%rsp) + movl %ebx,%esi + xorl %ecx,%ebx + addl 36(%rsp),%edx + andl %ecx,%esi + pslld $2,%xmm6 + andl %ebx,%edi + rorl $7,%eax + psrld $30,%xmm9 + addl %esi,%edx + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + por %xmm9,%xmm6 + movl %eax,%edi + xorl %ebx,%eax + movdqa %xmm6,%xmm10 + addl 40(%rsp),%ecx + andl %ebx,%edi + andl %eax,%esi + rorl $7,%ebp + addl %edi,%ecx + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%esi + xorl %eax,%ebp + addl 44(%rsp),%ebx + andl %eax,%esi + andl %ebp,%edi + rorl $7,%edx + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%edi + pxor %xmm3,%xmm7 +.byte 102,68,15,58,15,213,8 + xorl %ebp,%edx + addl 48(%rsp),%eax + andl %ebp,%edi + pxor %xmm0,%xmm7 + andl %edx,%esi + rorl $7,%ecx + movdqa 48(%r11),%xmm9 + paddd %xmm6,%xmm8 + addl %edi,%eax + movl %ebx,%edi + pxor %xmm10,%xmm7 + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movdqa %xmm7,%xmm10 + movdqa %xmm8,32(%rsp) + movl %ecx,%esi + xorl %edx,%ecx + addl 52(%rsp),%ebp + andl %edx,%esi + pslld $2,%xmm7 + andl %ecx,%edi + rorl $7,%ebx + psrld $30,%xmm10 + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + por %xmm10,%xmm7 + movl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm7,%xmm8 + addl 56(%rsp),%edx + andl %ecx,%edi + andl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%esi + xorl %ebx,%eax + addl 60(%rsp),%ecx + andl %ebx,%esi + andl %eax,%edi + rorl $7,%ebp + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%edi + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,198,8 + xorl %eax,%ebp + addl 0(%rsp),%ebx + andl %eax,%edi + pxor %xmm1,%xmm0 + andl %ebp,%esi + rorl $7,%edx + movdqa %xmm9,%xmm10 + paddd %xmm7,%xmm9 + addl %edi,%ebx + movl %ecx,%edi + pxor %xmm8,%xmm0 + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movdqa %xmm0,%xmm8 + movdqa %xmm9,48(%rsp) + movl %edx,%esi + xorl %ebp,%edx + addl 4(%rsp),%eax + andl %ebp,%esi + pslld $2,%xmm0 + andl %edx,%edi + rorl $7,%ecx + psrld $30,%xmm8 + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + por %xmm8,%xmm0 + movl %ecx,%edi + xorl %edx,%ecx + movdqa %xmm0,%xmm9 + addl 8(%rsp),%ebp + andl %edx,%edi + andl %ecx,%esi + rorl $7,%ebx + addl %edi,%ebp + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%esi + xorl %ecx,%ebx + addl 12(%rsp),%edx + andl %ecx,%esi + andl %ebx,%edi + rorl $7,%eax + addl %esi,%edx + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%edi + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,207,8 + xorl %ebx,%eax + addl 16(%rsp),%ecx + andl %ebx,%edi + pxor %xmm2,%xmm1 + andl %eax,%esi + rorl $7,%ebp + movdqa %xmm10,%xmm8 + paddd %xmm0,%xmm10 + addl %edi,%ecx + movl %edx,%edi + pxor %xmm9,%xmm1 + roll $5,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movdqa %xmm1,%xmm9 + movdqa %xmm10,0(%rsp) + movl %ebp,%esi + xorl %eax,%ebp + addl 20(%rsp),%ebx + andl %eax,%esi + pslld $2,%xmm1 + andl %ebp,%edi + rorl $7,%edx + psrld $30,%xmm9 + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + por %xmm9,%xmm1 + movl %edx,%edi + xorl %ebp,%edx + movdqa %xmm1,%xmm10 + addl 24(%rsp),%eax + andl %ebp,%edi + andl %edx,%esi + rorl $7,%ecx + addl %edi,%eax + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movl %ecx,%esi + xorl %edx,%ecx + addl 28(%rsp),%ebp + andl %edx,%esi + andl %ecx,%edi + rorl $7,%ebx + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%edi + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,208,8 + xorl %ecx,%ebx + addl 32(%rsp),%edx + andl %ecx,%edi + pxor %xmm3,%xmm2 + andl %ebx,%esi + rorl $7,%eax + movdqa %xmm8,%xmm9 + paddd %xmm1,%xmm8 + addl %edi,%edx + movl %ebp,%edi + pxor %xmm10,%xmm2 + roll $5,%ebp + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movdqa %xmm2,%xmm10 + movdqa %xmm8,16(%rsp) + movl %eax,%esi + xorl %ebx,%eax + addl 36(%rsp),%ecx + andl %ebx,%esi + pslld $2,%xmm2 + andl %eax,%edi + rorl $7,%ebp + psrld $30,%xmm10 + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + por %xmm10,%xmm2 + movl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm2,%xmm8 + addl 40(%rsp),%ebx + andl %eax,%edi + andl %ebp,%esi + rorl $7,%edx + addl %edi,%ebx + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%esi + xorl %ebp,%edx + addl 44(%rsp),%eax + andl %ebp,%esi + andl %edx,%edi + rorl $7,%ecx + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + addl 48(%rsp),%ebp + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,193,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm4,%xmm3 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm10 + paddd %xmm2,%xmm9 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm8,%xmm3 + addl 52(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm3,%xmm8 + movdqa %xmm9,32(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm3 + addl 56(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm8 + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm8,%xmm3 + addl 60(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 0(%rsp),%eax + paddd %xmm3,%xmm10 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + movdqa %xmm10,48(%rsp) + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 4(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 8(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 12(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + cmpq %r10,%r9 + je L$done_ssse3 + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 + addl 16(%rsp),%ebx + xorl %eax,%esi +.byte 102,15,56,0,206 + movl %ecx,%edi + roll $5,%ecx + paddd %xmm9,%xmm0 + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + movdqa %xmm0,0(%rsp) + addl 20(%rsp),%eax + xorl %ebp,%edi + psubd %xmm9,%xmm0 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi +.byte 102,15,56,0,214 + movl %edx,%edi + roll $5,%edx + paddd %xmm9,%xmm1 + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + movdqa %xmm1,16(%rsp) + addl 36(%rsp),%ebx + xorl %eax,%edi + psubd %xmm9,%xmm1 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi +.byte 102,15,56,0,222 + movl %ebp,%edi + roll $5,%ebp + paddd %xmm9,%xmm2 + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + movdqa %xmm2,32(%rsp) + addl 52(%rsp),%ecx + xorl %ebx,%edi + psubd %xmm9,%xmm2 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + addl 12(%r8),%edx + movl %eax,0(%r8) + addl 16(%r8),%ebp + movl %esi,4(%r8) + movl %esi,%ebx + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + jmp L$oop_ssse3 + +.p2align 4 +L$done_ssse3: + addl 16(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 20(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + addl 36(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 52(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + movl %eax,0(%r8) + addl 12(%r8),%edx + movl %esi,4(%r8) + addl 16(%r8),%ebp + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + leaq 64(%rsp),%rsi + movq 0(%rsi),%r12 + movq 8(%rsi),%rbp + movq 16(%rsi),%rbx + leaq 24(%rsi),%rsp +L$epilogue_ssse3: + retq + +.p2align 6 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 diff --git a/ext/libressl/crypto/sha/sha1-masm-x86_64.S b/ext/libressl/crypto/sha/sha1-masm-x86_64.S new file mode 100644 index 0000000..36d8732 --- /dev/null +++ b/ext/libressl/crypto/sha/sha1-masm-x86_64.S @@ -0,0 +1,2746 @@ +; 1 "crypto/sha/sha1-masm-x86_64.S.tmp" +; 1 "" 1 +; 1 "" 3 +; 340 "" 3 +; 1 "" 1 +; 1 "" 2 +; 1 "crypto/sha/sha1-masm-x86_64.S.tmp" 2 +OPTION DOTNAME + +; 1 "./crypto/x86_arch.h" 1 + + +; 16 "./crypto/x86_arch.h" + + + + + + + + + +; 40 "./crypto/x86_arch.h" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +; 3 "crypto/sha/sha1-masm-x86_64.S.tmp" 2 +.text$ SEGMENT ALIGN(64) 'CODE' +EXTERN OPENSSL_ia32cap_P:NEAR + + +PUBLIC sha1_block_data_order + +ALIGN 16 +sha1_block_data_order PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_block_data_order:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + mov r9d,DWORD PTR[((OPENSSL_ia32cap_P+0))] + mov r8d,DWORD PTR[((OPENSSL_ia32cap_P+4))] + test r8d,(1 SHL 9) + jz $L$ialu + jmp _ssse3_shortcut + +ALIGN 16 +$L$ialu:: + push rbx + push rbp + push r12 + push r13 + mov r11,rsp + mov r8,rdi + sub rsp,72 + mov r9,rsi + and rsp,-64 + mov r10,rdx + mov QWORD PTR[64+rsp],r11 +$L$prologue:: + + mov esi,DWORD PTR[r8] + mov edi,DWORD PTR[4+r8] + mov r11d,DWORD PTR[8+r8] + mov r12d,DWORD PTR[12+r8] + mov r13d,DWORD PTR[16+r8] + jmp $L$loop + +ALIGN 16 +$L$loop:: + mov edx,DWORD PTR[r9] + bswap edx + mov DWORD PTR[rsp],edx + mov eax,r11d + mov ebp,DWORD PTR[4+r9] + mov ecx,esi + xor eax,r12d + bswap ebp + rol ecx,5 + lea r13d,DWORD PTR[1518500249+r13*1+rdx] + and eax,edi + mov DWORD PTR[4+rsp],ebp + add r13d,ecx + xor eax,r12d + rol edi,30 + add r13d,eax + mov eax,edi + mov edx,DWORD PTR[8+r9] + mov ecx,r13d + xor eax,r11d + bswap edx + rol ecx,5 + lea r12d,DWORD PTR[1518500249+r12*1+rbp] + and eax,esi + mov DWORD PTR[8+rsp],edx + add r12d,ecx + xor eax,r11d + rol esi,30 + add r12d,eax + mov eax,esi + mov ebp,DWORD PTR[12+r9] + mov ecx,r12d + xor eax,edi + bswap ebp + rol ecx,5 + lea r11d,DWORD PTR[1518500249+r11*1+rdx] + and eax,r13d + mov DWORD PTR[12+rsp],ebp + add r11d,ecx + xor eax,edi + rol r13d,30 + add r11d,eax + mov eax,r13d + mov edx,DWORD PTR[16+r9] + mov ecx,r11d + xor eax,esi + bswap edx + rol ecx,5 + lea edi,DWORD PTR[1518500249+rdi*1+rbp] + and eax,r12d + mov DWORD PTR[16+rsp],edx + add edi,ecx + xor eax,esi + rol r12d,30 + add edi,eax + mov eax,r12d + mov ebp,DWORD PTR[20+r9] + mov ecx,edi + xor eax,r13d + bswap ebp + rol ecx,5 + lea esi,DWORD PTR[1518500249+rsi*1+rdx] + and eax,r11d + mov DWORD PTR[20+rsp],ebp + add esi,ecx + xor eax,r13d + rol r11d,30 + add esi,eax + mov eax,r11d + mov edx,DWORD PTR[24+r9] + mov ecx,esi + xor eax,r12d + bswap edx + rol ecx,5 + lea r13d,DWORD PTR[1518500249+r13*1+rbp] + and eax,edi + mov DWORD PTR[24+rsp],edx + add r13d,ecx + xor eax,r12d + rol edi,30 + add r13d,eax + mov eax,edi + mov ebp,DWORD PTR[28+r9] + mov ecx,r13d + xor eax,r11d + bswap ebp + rol ecx,5 + lea r12d,DWORD PTR[1518500249+r12*1+rdx] + and eax,esi + mov DWORD PTR[28+rsp],ebp + add r12d,ecx + xor eax,r11d + rol esi,30 + add r12d,eax + mov eax,esi + mov edx,DWORD PTR[32+r9] + mov ecx,r12d + xor eax,edi + bswap edx + rol ecx,5 + lea r11d,DWORD PTR[1518500249+r11*1+rbp] + and eax,r13d + mov DWORD PTR[32+rsp],edx + add r11d,ecx + xor eax,edi + rol r13d,30 + add r11d,eax + mov eax,r13d + mov ebp,DWORD PTR[36+r9] + mov ecx,r11d + xor eax,esi + bswap ebp + rol ecx,5 + lea edi,DWORD PTR[1518500249+rdi*1+rdx] + and eax,r12d + mov DWORD PTR[36+rsp],ebp + add edi,ecx + xor eax,esi + rol r12d,30 + add edi,eax + mov eax,r12d + mov edx,DWORD PTR[40+r9] + mov ecx,edi + xor eax,r13d + bswap edx + rol ecx,5 + lea esi,DWORD PTR[1518500249+rsi*1+rbp] + and eax,r11d + mov DWORD PTR[40+rsp],edx + add esi,ecx + xor eax,r13d + rol r11d,30 + add esi,eax + mov eax,r11d + mov ebp,DWORD PTR[44+r9] + mov ecx,esi + xor eax,r12d + bswap ebp + rol ecx,5 + lea r13d,DWORD PTR[1518500249+r13*1+rdx] + and eax,edi + mov DWORD PTR[44+rsp],ebp + add r13d,ecx + xor eax,r12d + rol edi,30 + add r13d,eax + mov eax,edi + mov edx,DWORD PTR[48+r9] + mov ecx,r13d + xor eax,r11d + bswap edx + rol ecx,5 + lea r12d,DWORD PTR[1518500249+r12*1+rbp] + and eax,esi + mov DWORD PTR[48+rsp],edx + add r12d,ecx + xor eax,r11d + rol esi,30 + add r12d,eax + mov eax,esi + mov ebp,DWORD PTR[52+r9] + mov ecx,r12d + xor eax,edi + bswap ebp + rol ecx,5 + lea r11d,DWORD PTR[1518500249+r11*1+rdx] + and eax,r13d + mov DWORD PTR[52+rsp],ebp + add r11d,ecx + xor eax,edi + rol r13d,30 + add r11d,eax + mov eax,r13d + mov edx,DWORD PTR[56+r9] + mov ecx,r11d + xor eax,esi + bswap edx + rol ecx,5 + lea edi,DWORD PTR[1518500249+rdi*1+rbp] + and eax,r12d + mov DWORD PTR[56+rsp],edx + add edi,ecx + xor eax,esi + rol r12d,30 + add edi,eax + mov eax,r12d + mov ebp,DWORD PTR[60+r9] + mov ecx,edi + xor eax,r13d + bswap ebp + rol ecx,5 + lea esi,DWORD PTR[1518500249+rsi*1+rdx] + and eax,r11d + mov DWORD PTR[60+rsp],ebp + add esi,ecx + xor eax,r13d + rol r11d,30 + add esi,eax + mov edx,DWORD PTR[rsp] + mov eax,r11d + mov ecx,esi + xor edx,DWORD PTR[8+rsp] + xor eax,r12d + rol ecx,5 + xor edx,DWORD PTR[32+rsp] + and eax,edi + lea r13d,DWORD PTR[1518500249+r13*1+rbp] + xor edx,DWORD PTR[52+rsp] + xor eax,r12d + rol edx,1 + add r13d,ecx + rol edi,30 + mov DWORD PTR[rsp],edx + add r13d,eax + mov ebp,DWORD PTR[4+rsp] + mov eax,edi + mov ecx,r13d + xor ebp,DWORD PTR[12+rsp] + xor eax,r11d + rol ecx,5 + xor ebp,DWORD PTR[36+rsp] + and eax,esi + lea r12d,DWORD PTR[1518500249+r12*1+rdx] + xor ebp,DWORD PTR[56+rsp] + xor eax,r11d + rol ebp,1 + add r12d,ecx + rol esi,30 + mov DWORD PTR[4+rsp],ebp + add r12d,eax + mov edx,DWORD PTR[8+rsp] + mov eax,esi + mov ecx,r12d + xor edx,DWORD PTR[16+rsp] + xor eax,edi + rol ecx,5 + xor edx,DWORD PTR[40+rsp] + and eax,r13d + lea r11d,DWORD PTR[1518500249+r11*1+rbp] + xor edx,DWORD PTR[60+rsp] + xor eax,edi + rol edx,1 + add r11d,ecx + rol r13d,30 + mov DWORD PTR[8+rsp],edx + add r11d,eax + mov ebp,DWORD PTR[12+rsp] + mov eax,r13d + mov ecx,r11d + xor ebp,DWORD PTR[20+rsp] + xor eax,esi + rol ecx,5 + xor ebp,DWORD PTR[44+rsp] + and eax,r12d + lea edi,DWORD PTR[1518500249+rdi*1+rdx] + xor ebp,DWORD PTR[rsp] + xor eax,esi + rol ebp,1 + add edi,ecx + rol r12d,30 + mov DWORD PTR[12+rsp],ebp + add edi,eax + mov edx,DWORD PTR[16+rsp] + mov eax,r12d + mov ecx,edi + xor edx,DWORD PTR[24+rsp] + xor eax,r13d + rol ecx,5 + xor edx,DWORD PTR[48+rsp] + and eax,r11d + lea esi,DWORD PTR[1518500249+rsi*1+rbp] + xor edx,DWORD PTR[4+rsp] + xor eax,r13d + rol edx,1 + add esi,ecx + rol r11d,30 + mov DWORD PTR[16+rsp],edx + add esi,eax + mov ebp,DWORD PTR[20+rsp] + mov eax,r11d + mov ecx,esi + xor ebp,DWORD PTR[28+rsp] + xor eax,edi + rol ecx,5 + lea r13d,DWORD PTR[1859775393+r13*1+rdx] + xor ebp,DWORD PTR[52+rsp] + xor eax,r12d + add r13d,ecx + xor ebp,DWORD PTR[8+rsp] + rol edi,30 + add r13d,eax + rol ebp,1 + mov DWORD PTR[20+rsp],ebp + mov edx,DWORD PTR[24+rsp] + mov eax,edi + mov ecx,r13d + xor edx,DWORD PTR[32+rsp] + xor eax,esi + rol ecx,5 + lea r12d,DWORD PTR[1859775393+r12*1+rbp] + xor edx,DWORD PTR[56+rsp] + xor eax,r11d + add r12d,ecx + xor edx,DWORD PTR[12+rsp] + rol esi,30 + add r12d,eax + rol edx,1 + mov DWORD PTR[24+rsp],edx + mov ebp,DWORD PTR[28+rsp] + mov eax,esi + mov ecx,r12d + xor ebp,DWORD PTR[36+rsp] + xor eax,r13d + rol ecx,5 + lea r11d,DWORD PTR[1859775393+r11*1+rdx] + xor ebp,DWORD PTR[60+rsp] + xor eax,edi + add r11d,ecx + xor ebp,DWORD PTR[16+rsp] + rol r13d,30 + add r11d,eax + rol ebp,1 + mov DWORD PTR[28+rsp],ebp + mov edx,DWORD PTR[32+rsp] + mov eax,r13d + mov ecx,r11d + xor edx,DWORD PTR[40+rsp] + xor eax,r12d + rol ecx,5 + lea edi,DWORD PTR[1859775393+rdi*1+rbp] + xor edx,DWORD PTR[rsp] + xor eax,esi + add edi,ecx + xor edx,DWORD PTR[20+rsp] + rol r12d,30 + add edi,eax + rol edx,1 + mov DWORD PTR[32+rsp],edx + mov ebp,DWORD PTR[36+rsp] + mov eax,r12d + mov ecx,edi + xor ebp,DWORD PTR[44+rsp] + xor eax,r11d + rol ecx,5 + lea esi,DWORD PTR[1859775393+rsi*1+rdx] + xor ebp,DWORD PTR[4+rsp] + xor eax,r13d + add esi,ecx + xor ebp,DWORD PTR[24+rsp] + rol r11d,30 + add esi,eax + rol ebp,1 + mov DWORD PTR[36+rsp],ebp + mov edx,DWORD PTR[40+rsp] + mov eax,r11d + mov ecx,esi + xor edx,DWORD PTR[48+rsp] + xor eax,edi + rol ecx,5 + lea r13d,DWORD PTR[1859775393+r13*1+rbp] + xor edx,DWORD PTR[8+rsp] + xor eax,r12d + add r13d,ecx + xor edx,DWORD PTR[28+rsp] + rol edi,30 + add r13d,eax + rol edx,1 + mov DWORD PTR[40+rsp],edx + mov ebp,DWORD PTR[44+rsp] + mov eax,edi + mov ecx,r13d + xor ebp,DWORD PTR[52+rsp] + xor eax,esi + rol ecx,5 + lea r12d,DWORD PTR[1859775393+r12*1+rdx] + xor ebp,DWORD PTR[12+rsp] + xor eax,r11d + add r12d,ecx + xor ebp,DWORD PTR[32+rsp] + rol esi,30 + add r12d,eax + rol ebp,1 + mov DWORD PTR[44+rsp],ebp + mov edx,DWORD PTR[48+rsp] + mov eax,esi + mov ecx,r12d + xor edx,DWORD PTR[56+rsp] + xor eax,r13d + rol ecx,5 + lea r11d,DWORD PTR[1859775393+r11*1+rbp] + xor edx,DWORD PTR[16+rsp] + xor eax,edi + add r11d,ecx + xor edx,DWORD PTR[36+rsp] + rol r13d,30 + add r11d,eax + rol edx,1 + mov DWORD PTR[48+rsp],edx + mov ebp,DWORD PTR[52+rsp] + mov eax,r13d + mov ecx,r11d + xor ebp,DWORD PTR[60+rsp] + xor eax,r12d + rol ecx,5 + lea edi,DWORD PTR[1859775393+rdi*1+rdx] + xor ebp,DWORD PTR[20+rsp] + xor eax,esi + add edi,ecx + xor ebp,DWORD PTR[40+rsp] + rol r12d,30 + add edi,eax + rol ebp,1 + mov DWORD PTR[52+rsp],ebp + mov edx,DWORD PTR[56+rsp] + mov eax,r12d + mov ecx,edi + xor edx,DWORD PTR[rsp] + xor eax,r11d + rol ecx,5 + lea esi,DWORD PTR[1859775393+rsi*1+rbp] + xor edx,DWORD PTR[24+rsp] + xor eax,r13d + add esi,ecx + xor edx,DWORD PTR[44+rsp] + rol r11d,30 + add esi,eax + rol edx,1 + mov DWORD PTR[56+rsp],edx + mov ebp,DWORD PTR[60+rsp] + mov eax,r11d + mov ecx,esi + xor ebp,DWORD PTR[4+rsp] + xor eax,edi + rol ecx,5 + lea r13d,DWORD PTR[1859775393+r13*1+rdx] + xor ebp,DWORD PTR[28+rsp] + xor eax,r12d + add r13d,ecx + xor ebp,DWORD PTR[48+rsp] + rol edi,30 + add r13d,eax + rol ebp,1 + mov DWORD PTR[60+rsp],ebp + mov edx,DWORD PTR[rsp] + mov eax,edi + mov ecx,r13d + xor edx,DWORD PTR[8+rsp] + xor eax,esi + rol ecx,5 + lea r12d,DWORD PTR[1859775393+r12*1+rbp] + xor edx,DWORD PTR[32+rsp] + xor eax,r11d + add r12d,ecx + xor edx,DWORD PTR[52+rsp] + rol esi,30 + add r12d,eax + rol edx,1 + mov DWORD PTR[rsp],edx + mov ebp,DWORD PTR[4+rsp] + mov eax,esi + mov ecx,r12d + xor ebp,DWORD PTR[12+rsp] + xor eax,r13d + rol ecx,5 + lea r11d,DWORD PTR[1859775393+r11*1+rdx] + xor ebp,DWORD PTR[36+rsp] + xor eax,edi + add r11d,ecx + xor ebp,DWORD PTR[56+rsp] + rol r13d,30 + add r11d,eax + rol ebp,1 + mov DWORD PTR[4+rsp],ebp + mov edx,DWORD PTR[8+rsp] + mov eax,r13d + mov ecx,r11d + xor edx,DWORD PTR[16+rsp] + xor eax,r12d + rol ecx,5 + lea edi,DWORD PTR[1859775393+rdi*1+rbp] + xor edx,DWORD PTR[40+rsp] + xor eax,esi + add edi,ecx + xor edx,DWORD PTR[60+rsp] + rol r12d,30 + add edi,eax + rol edx,1 + mov DWORD PTR[8+rsp],edx + mov ebp,DWORD PTR[12+rsp] + mov eax,r12d + mov ecx,edi + xor ebp,DWORD PTR[20+rsp] + xor eax,r11d + rol ecx,5 + lea esi,DWORD PTR[1859775393+rsi*1+rdx] + xor ebp,DWORD PTR[44+rsp] + xor eax,r13d + add esi,ecx + xor ebp,DWORD PTR[rsp] + rol r11d,30 + add esi,eax + rol ebp,1 + mov DWORD PTR[12+rsp],ebp + mov edx,DWORD PTR[16+rsp] + mov eax,r11d + mov ecx,esi + xor edx,DWORD PTR[24+rsp] + xor eax,edi + rol ecx,5 + lea r13d,DWORD PTR[1859775393+r13*1+rbp] + xor edx,DWORD PTR[48+rsp] + xor eax,r12d + add r13d,ecx + xor edx,DWORD PTR[4+rsp] + rol edi,30 + add r13d,eax + rol edx,1 + mov DWORD PTR[16+rsp],edx + mov ebp,DWORD PTR[20+rsp] + mov eax,edi + mov ecx,r13d + xor ebp,DWORD PTR[28+rsp] + xor eax,esi + rol ecx,5 + lea r12d,DWORD PTR[1859775393+r12*1+rdx] + xor ebp,DWORD PTR[52+rsp] + xor eax,r11d + add r12d,ecx + xor ebp,DWORD PTR[8+rsp] + rol esi,30 + add r12d,eax + rol ebp,1 + mov DWORD PTR[20+rsp],ebp + mov edx,DWORD PTR[24+rsp] + mov eax,esi + mov ecx,r12d + xor edx,DWORD PTR[32+rsp] + xor eax,r13d + rol ecx,5 + lea r11d,DWORD PTR[1859775393+r11*1+rbp] + xor edx,DWORD PTR[56+rsp] + xor eax,edi + add r11d,ecx + xor edx,DWORD PTR[12+rsp] + rol r13d,30 + add r11d,eax + rol edx,1 + mov DWORD PTR[24+rsp],edx + mov ebp,DWORD PTR[28+rsp] + mov eax,r13d + mov ecx,r11d + xor ebp,DWORD PTR[36+rsp] + xor eax,r12d + rol ecx,5 + lea edi,DWORD PTR[1859775393+rdi*1+rdx] + xor ebp,DWORD PTR[60+rsp] + xor eax,esi + add edi,ecx + xor ebp,DWORD PTR[16+rsp] + rol r12d,30 + add edi,eax + rol ebp,1 + mov DWORD PTR[28+rsp],ebp + mov edx,DWORD PTR[32+rsp] + mov eax,r12d + mov ecx,edi + xor edx,DWORD PTR[40+rsp] + xor eax,r11d + rol ecx,5 + lea esi,DWORD PTR[1859775393+rsi*1+rbp] + xor edx,DWORD PTR[rsp] + xor eax,r13d + add esi,ecx + xor edx,DWORD PTR[20+rsp] + rol r11d,30 + add esi,eax + rol edx,1 + mov DWORD PTR[32+rsp],edx + mov ebp,DWORD PTR[36+rsp] + mov eax,r11d + mov ebx,r11d + xor ebp,DWORD PTR[44+rsp] + and eax,r12d + mov ecx,esi + xor ebp,DWORD PTR[4+rsp] + xor ebx,r12d + lea r13d,DWORD PTR[((-1894007588))+r13*1+rdx] + rol ecx,5 + xor ebp,DWORD PTR[24+rsp] + add r13d,eax + and ebx,edi + rol ebp,1 + add r13d,ebx + rol edi,30 + mov DWORD PTR[36+rsp],ebp + add r13d,ecx + mov edx,DWORD PTR[40+rsp] + mov eax,edi + mov ebx,edi + xor edx,DWORD PTR[48+rsp] + and eax,r11d + mov ecx,r13d + xor edx,DWORD PTR[8+rsp] + xor ebx,r11d + lea r12d,DWORD PTR[((-1894007588))+r12*1+rbp] + rol ecx,5 + xor edx,DWORD PTR[28+rsp] + add r12d,eax + and ebx,esi + rol edx,1 + add r12d,ebx + rol esi,30 + mov DWORD PTR[40+rsp],edx + add r12d,ecx + mov ebp,DWORD PTR[44+rsp] + mov eax,esi + mov ebx,esi + xor ebp,DWORD PTR[52+rsp] + and eax,edi + mov ecx,r12d + xor ebp,DWORD PTR[12+rsp] + xor ebx,edi + lea r11d,DWORD PTR[((-1894007588))+r11*1+rdx] + rol ecx,5 + xor ebp,DWORD PTR[32+rsp] + add r11d,eax + and ebx,r13d + rol ebp,1 + add r11d,ebx + rol r13d,30 + mov DWORD PTR[44+rsp],ebp + add r11d,ecx + mov edx,DWORD PTR[48+rsp] + mov eax,r13d + mov ebx,r13d + xor edx,DWORD PTR[56+rsp] + and eax,esi + mov ecx,r11d + xor edx,DWORD PTR[16+rsp] + xor ebx,esi + lea edi,DWORD PTR[((-1894007588))+rdi*1+rbp] + rol ecx,5 + xor edx,DWORD PTR[36+rsp] + add edi,eax + and ebx,r12d + rol edx,1 + add edi,ebx + rol r12d,30 + mov DWORD PTR[48+rsp],edx + add edi,ecx + mov ebp,DWORD PTR[52+rsp] + mov eax,r12d + mov ebx,r12d + xor ebp,DWORD PTR[60+rsp] + and eax,r13d + mov ecx,edi + xor ebp,DWORD PTR[20+rsp] + xor ebx,r13d + lea esi,DWORD PTR[((-1894007588))+rsi*1+rdx] + rol ecx,5 + xor ebp,DWORD PTR[40+rsp] + add esi,eax + and ebx,r11d + rol ebp,1 + add esi,ebx + rol r11d,30 + mov DWORD PTR[52+rsp],ebp + add esi,ecx + mov edx,DWORD PTR[56+rsp] + mov eax,r11d + mov ebx,r11d + xor edx,DWORD PTR[rsp] + and eax,r12d + mov ecx,esi + xor edx,DWORD PTR[24+rsp] + xor ebx,r12d + lea r13d,DWORD PTR[((-1894007588))+r13*1+rbp] + rol ecx,5 + xor edx,DWORD PTR[44+rsp] + add r13d,eax + and ebx,edi + rol edx,1 + add r13d,ebx + rol edi,30 + mov DWORD PTR[56+rsp],edx + add r13d,ecx + mov ebp,DWORD PTR[60+rsp] + mov eax,edi + mov ebx,edi + xor ebp,DWORD PTR[4+rsp] + and eax,r11d + mov ecx,r13d + xor ebp,DWORD PTR[28+rsp] + xor ebx,r11d + lea r12d,DWORD PTR[((-1894007588))+r12*1+rdx] + rol ecx,5 + xor ebp,DWORD PTR[48+rsp] + add r12d,eax + and ebx,esi + rol ebp,1 + add r12d,ebx + rol esi,30 + mov DWORD PTR[60+rsp],ebp + add r12d,ecx + mov edx,DWORD PTR[rsp] + mov eax,esi + mov ebx,esi + xor edx,DWORD PTR[8+rsp] + and eax,edi + mov ecx,r12d + xor edx,DWORD PTR[32+rsp] + xor ebx,edi + lea r11d,DWORD PTR[((-1894007588))+r11*1+rbp] + rol ecx,5 + xor edx,DWORD PTR[52+rsp] + add r11d,eax + and ebx,r13d + rol edx,1 + add r11d,ebx + rol r13d,30 + mov DWORD PTR[rsp],edx + add r11d,ecx + mov ebp,DWORD PTR[4+rsp] + mov eax,r13d + mov ebx,r13d + xor ebp,DWORD PTR[12+rsp] + and eax,esi + mov ecx,r11d + xor ebp,DWORD PTR[36+rsp] + xor ebx,esi + lea edi,DWORD PTR[((-1894007588))+rdi*1+rdx] + rol ecx,5 + xor ebp,DWORD PTR[56+rsp] + add edi,eax + and ebx,r12d + rol ebp,1 + add edi,ebx + rol r12d,30 + mov DWORD PTR[4+rsp],ebp + add edi,ecx + mov edx,DWORD PTR[8+rsp] + mov eax,r12d + mov ebx,r12d + xor edx,DWORD PTR[16+rsp] + and eax,r13d + mov ecx,edi + xor edx,DWORD PTR[40+rsp] + xor ebx,r13d + lea esi,DWORD PTR[((-1894007588))+rsi*1+rbp] + rol ecx,5 + xor edx,DWORD PTR[60+rsp] + add esi,eax + and ebx,r11d + rol edx,1 + add esi,ebx + rol r11d,30 + mov DWORD PTR[8+rsp],edx + add esi,ecx + mov ebp,DWORD PTR[12+rsp] + mov eax,r11d + mov ebx,r11d + xor ebp,DWORD PTR[20+rsp] + and eax,r12d + mov ecx,esi + xor ebp,DWORD PTR[44+rsp] + xor ebx,r12d + lea r13d,DWORD PTR[((-1894007588))+r13*1+rdx] + rol ecx,5 + xor ebp,DWORD PTR[rsp] + add r13d,eax + and ebx,edi + rol ebp,1 + add r13d,ebx + rol edi,30 + mov DWORD PTR[12+rsp],ebp + add r13d,ecx + mov edx,DWORD PTR[16+rsp] + mov eax,edi + mov ebx,edi + xor edx,DWORD PTR[24+rsp] + and eax,r11d + mov ecx,r13d + xor edx,DWORD PTR[48+rsp] + xor ebx,r11d + lea r12d,DWORD PTR[((-1894007588))+r12*1+rbp] + rol ecx,5 + xor edx,DWORD PTR[4+rsp] + add r12d,eax + and ebx,esi + rol edx,1 + add r12d,ebx + rol esi,30 + mov DWORD PTR[16+rsp],edx + add r12d,ecx + mov ebp,DWORD PTR[20+rsp] + mov eax,esi + mov ebx,esi + xor ebp,DWORD PTR[28+rsp] + and eax,edi + mov ecx,r12d + xor ebp,DWORD PTR[52+rsp] + xor ebx,edi + lea r11d,DWORD PTR[((-1894007588))+r11*1+rdx] + rol ecx,5 + xor ebp,DWORD PTR[8+rsp] + add r11d,eax + and ebx,r13d + rol ebp,1 + add r11d,ebx + rol r13d,30 + mov DWORD PTR[20+rsp],ebp + add r11d,ecx + mov edx,DWORD PTR[24+rsp] + mov eax,r13d + mov ebx,r13d + xor edx,DWORD PTR[32+rsp] + and eax,esi + mov ecx,r11d + xor edx,DWORD PTR[56+rsp] + xor ebx,esi + lea edi,DWORD PTR[((-1894007588))+rdi*1+rbp] + rol ecx,5 + xor edx,DWORD PTR[12+rsp] + add edi,eax + and ebx,r12d + rol edx,1 + add edi,ebx + rol r12d,30 + mov DWORD PTR[24+rsp],edx + add edi,ecx + mov ebp,DWORD PTR[28+rsp] + mov eax,r12d + mov ebx,r12d + xor ebp,DWORD PTR[36+rsp] + and eax,r13d + mov ecx,edi + xor ebp,DWORD PTR[60+rsp] + xor ebx,r13d + lea esi,DWORD PTR[((-1894007588))+rsi*1+rdx] + rol ecx,5 + xor ebp,DWORD PTR[16+rsp] + add esi,eax + and ebx,r11d + rol ebp,1 + add esi,ebx + rol r11d,30 + mov DWORD PTR[28+rsp],ebp + add esi,ecx + mov edx,DWORD PTR[32+rsp] + mov eax,r11d + mov ebx,r11d + xor edx,DWORD PTR[40+rsp] + and eax,r12d + mov ecx,esi + xor edx,DWORD PTR[rsp] + xor ebx,r12d + lea r13d,DWORD PTR[((-1894007588))+r13*1+rbp] + rol ecx,5 + xor edx,DWORD PTR[20+rsp] + add r13d,eax + and ebx,edi + rol edx,1 + add r13d,ebx + rol edi,30 + mov DWORD PTR[32+rsp],edx + add r13d,ecx + mov ebp,DWORD PTR[36+rsp] + mov eax,edi + mov ebx,edi + xor ebp,DWORD PTR[44+rsp] + and eax,r11d + mov ecx,r13d + xor ebp,DWORD PTR[4+rsp] + xor ebx,r11d + lea r12d,DWORD PTR[((-1894007588))+r12*1+rdx] + rol ecx,5 + xor ebp,DWORD PTR[24+rsp] + add r12d,eax + and ebx,esi + rol ebp,1 + add r12d,ebx + rol esi,30 + mov DWORD PTR[36+rsp],ebp + add r12d,ecx + mov edx,DWORD PTR[40+rsp] + mov eax,esi + mov ebx,esi + xor edx,DWORD PTR[48+rsp] + and eax,edi + mov ecx,r12d + xor edx,DWORD PTR[8+rsp] + xor ebx,edi + lea r11d,DWORD PTR[((-1894007588))+r11*1+rbp] + rol ecx,5 + xor edx,DWORD PTR[28+rsp] + add r11d,eax + and ebx,r13d + rol edx,1 + add r11d,ebx + rol r13d,30 + mov DWORD PTR[40+rsp],edx + add r11d,ecx + mov ebp,DWORD PTR[44+rsp] + mov eax,r13d + mov ebx,r13d + xor ebp,DWORD PTR[52+rsp] + and eax,esi + mov ecx,r11d + xor ebp,DWORD PTR[12+rsp] + xor ebx,esi + lea edi,DWORD PTR[((-1894007588))+rdi*1+rdx] + rol ecx,5 + xor ebp,DWORD PTR[32+rsp] + add edi,eax + and ebx,r12d + rol ebp,1 + add edi,ebx + rol r12d,30 + mov DWORD PTR[44+rsp],ebp + add edi,ecx + mov edx,DWORD PTR[48+rsp] + mov eax,r12d + mov ebx,r12d + xor edx,DWORD PTR[56+rsp] + and eax,r13d + mov ecx,edi + xor edx,DWORD PTR[16+rsp] + xor ebx,r13d + lea esi,DWORD PTR[((-1894007588))+rsi*1+rbp] + rol ecx,5 + xor edx,DWORD PTR[36+rsp] + add esi,eax + and ebx,r11d + rol edx,1 + add esi,ebx + rol r11d,30 + mov DWORD PTR[48+rsp],edx + add esi,ecx + mov ebp,DWORD PTR[52+rsp] + mov eax,r11d + mov ecx,esi + xor ebp,DWORD PTR[60+rsp] + xor eax,edi + rol ecx,5 + lea r13d,DWORD PTR[((-899497514))+r13*1+rdx] + xor ebp,DWORD PTR[20+rsp] + xor eax,r12d + add r13d,ecx + xor ebp,DWORD PTR[40+rsp] + rol edi,30 + add r13d,eax + rol ebp,1 + mov DWORD PTR[52+rsp],ebp + mov edx,DWORD PTR[56+rsp] + mov eax,edi + mov ecx,r13d + xor edx,DWORD PTR[rsp] + xor eax,esi + rol ecx,5 + lea r12d,DWORD PTR[((-899497514))+r12*1+rbp] + xor edx,DWORD PTR[24+rsp] + xor eax,r11d + add r12d,ecx + xor edx,DWORD PTR[44+rsp] + rol esi,30 + add r12d,eax + rol edx,1 + mov DWORD PTR[56+rsp],edx + mov ebp,DWORD PTR[60+rsp] + mov eax,esi + mov ecx,r12d + xor ebp,DWORD PTR[4+rsp] + xor eax,r13d + rol ecx,5 + lea r11d,DWORD PTR[((-899497514))+r11*1+rdx] + xor ebp,DWORD PTR[28+rsp] + xor eax,edi + add r11d,ecx + xor ebp,DWORD PTR[48+rsp] + rol r13d,30 + add r11d,eax + rol ebp,1 + mov DWORD PTR[60+rsp],ebp + mov edx,DWORD PTR[rsp] + mov eax,r13d + mov ecx,r11d + xor edx,DWORD PTR[8+rsp] + xor eax,r12d + rol ecx,5 + lea edi,DWORD PTR[((-899497514))+rdi*1+rbp] + xor edx,DWORD PTR[32+rsp] + xor eax,esi + add edi,ecx + xor edx,DWORD PTR[52+rsp] + rol r12d,30 + add edi,eax + rol edx,1 + mov DWORD PTR[rsp],edx + mov ebp,DWORD PTR[4+rsp] + mov eax,r12d + mov ecx,edi + xor ebp,DWORD PTR[12+rsp] + xor eax,r11d + rol ecx,5 + lea esi,DWORD PTR[((-899497514))+rsi*1+rdx] + xor ebp,DWORD PTR[36+rsp] + xor eax,r13d + add esi,ecx + xor ebp,DWORD PTR[56+rsp] + rol r11d,30 + add esi,eax + rol ebp,1 + mov DWORD PTR[4+rsp],ebp + mov edx,DWORD PTR[8+rsp] + mov eax,r11d + mov ecx,esi + xor edx,DWORD PTR[16+rsp] + xor eax,edi + rol ecx,5 + lea r13d,DWORD PTR[((-899497514))+r13*1+rbp] + xor edx,DWORD PTR[40+rsp] + xor eax,r12d + add r13d,ecx + xor edx,DWORD PTR[60+rsp] + rol edi,30 + add r13d,eax + rol edx,1 + mov DWORD PTR[8+rsp],edx + mov ebp,DWORD PTR[12+rsp] + mov eax,edi + mov ecx,r13d + xor ebp,DWORD PTR[20+rsp] + xor eax,esi + rol ecx,5 + lea r12d,DWORD PTR[((-899497514))+r12*1+rdx] + xor ebp,DWORD PTR[44+rsp] + xor eax,r11d + add r12d,ecx + xor ebp,DWORD PTR[rsp] + rol esi,30 + add r12d,eax + rol ebp,1 + mov DWORD PTR[12+rsp],ebp + mov edx,DWORD PTR[16+rsp] + mov eax,esi + mov ecx,r12d + xor edx,DWORD PTR[24+rsp] + xor eax,r13d + rol ecx,5 + lea r11d,DWORD PTR[((-899497514))+r11*1+rbp] + xor edx,DWORD PTR[48+rsp] + xor eax,edi + add r11d,ecx + xor edx,DWORD PTR[4+rsp] + rol r13d,30 + add r11d,eax + rol edx,1 + mov DWORD PTR[16+rsp],edx + mov ebp,DWORD PTR[20+rsp] + mov eax,r13d + mov ecx,r11d + xor ebp,DWORD PTR[28+rsp] + xor eax,r12d + rol ecx,5 + lea edi,DWORD PTR[((-899497514))+rdi*1+rdx] + xor ebp,DWORD PTR[52+rsp] + xor eax,esi + add edi,ecx + xor ebp,DWORD PTR[8+rsp] + rol r12d,30 + add edi,eax + rol ebp,1 + mov DWORD PTR[20+rsp],ebp + mov edx,DWORD PTR[24+rsp] + mov eax,r12d + mov ecx,edi + xor edx,DWORD PTR[32+rsp] + xor eax,r11d + rol ecx,5 + lea esi,DWORD PTR[((-899497514))+rsi*1+rbp] + xor edx,DWORD PTR[56+rsp] + xor eax,r13d + add esi,ecx + xor edx,DWORD PTR[12+rsp] + rol r11d,30 + add esi,eax + rol edx,1 + mov DWORD PTR[24+rsp],edx + mov ebp,DWORD PTR[28+rsp] + mov eax,r11d + mov ecx,esi + xor ebp,DWORD PTR[36+rsp] + xor eax,edi + rol ecx,5 + lea r13d,DWORD PTR[((-899497514))+r13*1+rdx] + xor ebp,DWORD PTR[60+rsp] + xor eax,r12d + add r13d,ecx + xor ebp,DWORD PTR[16+rsp] + rol edi,30 + add r13d,eax + rol ebp,1 + mov DWORD PTR[28+rsp],ebp + mov edx,DWORD PTR[32+rsp] + mov eax,edi + mov ecx,r13d + xor edx,DWORD PTR[40+rsp] + xor eax,esi + rol ecx,5 + lea r12d,DWORD PTR[((-899497514))+r12*1+rbp] + xor edx,DWORD PTR[rsp] + xor eax,r11d + add r12d,ecx + xor edx,DWORD PTR[20+rsp] + rol esi,30 + add r12d,eax + rol edx,1 + mov DWORD PTR[32+rsp],edx + mov ebp,DWORD PTR[36+rsp] + mov eax,esi + mov ecx,r12d + xor ebp,DWORD PTR[44+rsp] + xor eax,r13d + rol ecx,5 + lea r11d,DWORD PTR[((-899497514))+r11*1+rdx] + xor ebp,DWORD PTR[4+rsp] + xor eax,edi + add r11d,ecx + xor ebp,DWORD PTR[24+rsp] + rol r13d,30 + add r11d,eax + rol ebp,1 + mov DWORD PTR[36+rsp],ebp + mov edx,DWORD PTR[40+rsp] + mov eax,r13d + mov ecx,r11d + xor edx,DWORD PTR[48+rsp] + xor eax,r12d + rol ecx,5 + lea edi,DWORD PTR[((-899497514))+rdi*1+rbp] + xor edx,DWORD PTR[8+rsp] + xor eax,esi + add edi,ecx + xor edx,DWORD PTR[28+rsp] + rol r12d,30 + add edi,eax + rol edx,1 + mov DWORD PTR[40+rsp],edx + mov ebp,DWORD PTR[44+rsp] + mov eax,r12d + mov ecx,edi + xor ebp,DWORD PTR[52+rsp] + xor eax,r11d + rol ecx,5 + lea esi,DWORD PTR[((-899497514))+rsi*1+rdx] + xor ebp,DWORD PTR[12+rsp] + xor eax,r13d + add esi,ecx + xor ebp,DWORD PTR[32+rsp] + rol r11d,30 + add esi,eax + rol ebp,1 + mov DWORD PTR[44+rsp],ebp + mov edx,DWORD PTR[48+rsp] + mov eax,r11d + mov ecx,esi + xor edx,DWORD PTR[56+rsp] + xor eax,edi + rol ecx,5 + lea r13d,DWORD PTR[((-899497514))+r13*1+rbp] + xor edx,DWORD PTR[16+rsp] + xor eax,r12d + add r13d,ecx + xor edx,DWORD PTR[36+rsp] + rol edi,30 + add r13d,eax + rol edx,1 + mov DWORD PTR[48+rsp],edx + mov ebp,DWORD PTR[52+rsp] + mov eax,edi + mov ecx,r13d + xor ebp,DWORD PTR[60+rsp] + xor eax,esi + rol ecx,5 + lea r12d,DWORD PTR[((-899497514))+r12*1+rdx] + xor ebp,DWORD PTR[20+rsp] + xor eax,r11d + add r12d,ecx + xor ebp,DWORD PTR[40+rsp] + rol esi,30 + add r12d,eax + rol ebp,1 + mov edx,DWORD PTR[56+rsp] + mov eax,esi + mov ecx,r12d + xor edx,DWORD PTR[rsp] + xor eax,r13d + rol ecx,5 + lea r11d,DWORD PTR[((-899497514))+r11*1+rbp] + xor edx,DWORD PTR[24+rsp] + xor eax,edi + add r11d,ecx + xor edx,DWORD PTR[44+rsp] + rol r13d,30 + add r11d,eax + rol edx,1 + mov ebp,DWORD PTR[60+rsp] + mov eax,r13d + mov ecx,r11d + xor ebp,DWORD PTR[4+rsp] + xor eax,r12d + rol ecx,5 + lea edi,DWORD PTR[((-899497514))+rdi*1+rdx] + xor ebp,DWORD PTR[28+rsp] + xor eax,esi + add edi,ecx + xor ebp,DWORD PTR[48+rsp] + rol r12d,30 + add edi,eax + rol ebp,1 + mov eax,r12d + mov ecx,edi + xor eax,r11d + lea esi,DWORD PTR[((-899497514))+rsi*1+rbp] + rol ecx,5 + xor eax,r13d + add esi,ecx + rol r11d,30 + add esi,eax + add esi,DWORD PTR[r8] + add edi,DWORD PTR[4+r8] + add r11d,DWORD PTR[8+r8] + add r12d,DWORD PTR[12+r8] + add r13d,DWORD PTR[16+r8] + mov DWORD PTR[r8],esi + mov DWORD PTR[4+r8],edi + mov DWORD PTR[8+r8],r11d + mov DWORD PTR[12+r8],r12d + mov DWORD PTR[16+r8],r13d + + sub r10,1 + lea r9,QWORD PTR[64+r9] + jnz $L$loop + + mov rsi,QWORD PTR[64+rsp] + mov r13,QWORD PTR[rsi] + mov r12,QWORD PTR[8+rsi] + mov rbp,QWORD PTR[16+rsi] + mov rbx,QWORD PTR[24+rsi] + lea rsp,QWORD PTR[32+rsi] +$L$epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha1_block_data_order:: +sha1_block_data_order ENDP + +ALIGN 16 +sha1_block_data_order_ssse3 PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_block_data_order_ssse3:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +_ssse3_shortcut:: + push rbx + push rbp + push r12 + lea rsp,QWORD PTR[((-144))+rsp] + movaps XMMWORD PTR[(64+0)+rsp],xmm6 + movaps XMMWORD PTR[(64+16)+rsp],xmm7 + movaps XMMWORD PTR[(64+32)+rsp],xmm8 + movaps XMMWORD PTR[(64+48)+rsp],xmm9 + movaps XMMWORD PTR[(64+64)+rsp],xmm10 +$L$prologue_ssse3:: + mov r8,rdi + mov r9,rsi + mov r10,rdx + + shl r10,6 + add r10,r9 + lea r11,QWORD PTR[K_XX_XX] + + mov eax,DWORD PTR[r8] + mov ebx,DWORD PTR[4+r8] + mov ecx,DWORD PTR[8+r8] + mov edx,DWORD PTR[12+r8] + mov esi,ebx + mov ebp,DWORD PTR[16+r8] + + movdqa xmm6,XMMWORD PTR[64+r11] + movdqa xmm9,XMMWORD PTR[r11] + movdqu xmm0,XMMWORD PTR[r9] + movdqu xmm1,XMMWORD PTR[16+r9] + movdqu xmm2,XMMWORD PTR[32+r9] + movdqu xmm3,XMMWORD PTR[48+r9] +DB 102,15,56,0,198 + add r9,64 +DB 102,15,56,0,206 +DB 102,15,56,0,214 +DB 102,15,56,0,222 + paddd xmm0,xmm9 + paddd xmm1,xmm9 + paddd xmm2,xmm9 + movdqa XMMWORD PTR[rsp],xmm0 + psubd xmm0,xmm9 + movdqa XMMWORD PTR[16+rsp],xmm1 + psubd xmm1,xmm9 + movdqa XMMWORD PTR[32+rsp],xmm2 + psubd xmm2,xmm9 + jmp $L$oop_ssse3 +ALIGN 16 +$L$oop_ssse3:: + movdqa xmm4,xmm1 + add ebp,DWORD PTR[rsp] + xor ecx,edx + movdqa xmm8,xmm3 +DB 102,15,58,15,224,8 + mov edi,eax + rol eax,5 + paddd xmm9,xmm3 + and esi,ecx + xor ecx,edx + psrldq xmm8,4 + xor esi,edx + add ebp,eax + pxor xmm4,xmm0 + ror ebx,2 + add ebp,esi + pxor xmm8,xmm2 + add edx,DWORD PTR[4+rsp] + xor ebx,ecx + mov esi,ebp + rol ebp,5 + pxor xmm4,xmm8 + and edi,ebx + xor ebx,ecx + movdqa XMMWORD PTR[48+rsp],xmm9 + xor edi,ecx + add edx,ebp + movdqa xmm10,xmm4 + movdqa xmm8,xmm4 + ror eax,7 + add edx,edi + add ecx,DWORD PTR[8+rsp] + xor eax,ebx + pslldq xmm10,12 + paddd xmm4,xmm4 + mov edi,edx + rol edx,5 + and esi,eax + xor eax,ebx + psrld xmm8,31 + xor esi,ebx + add ecx,edx + movdqa xmm9,xmm10 + ror ebp,7 + add ecx,esi + psrld xmm10,30 + por xmm4,xmm8 + add ebx,DWORD PTR[12+rsp] + xor ebp,eax + mov esi,ecx + rol ecx,5 + pslld xmm9,2 + pxor xmm4,xmm10 + and edi,ebp + xor ebp,eax + movdqa xmm10,XMMWORD PTR[r11] + xor edi,eax + add ebx,ecx + pxor xmm4,xmm9 + ror edx,7 + add ebx,edi + movdqa xmm5,xmm2 + add eax,DWORD PTR[16+rsp] + xor edx,ebp + movdqa xmm9,xmm4 +DB 102,15,58,15,233,8 + mov edi,ebx + rol ebx,5 + paddd xmm10,xmm4 + and esi,edx + xor edx,ebp + psrldq xmm9,4 + xor esi,ebp + add eax,ebx + pxor xmm5,xmm1 + ror ecx,7 + add eax,esi + pxor xmm9,xmm3 + add ebp,DWORD PTR[20+rsp] + xor ecx,edx + mov esi,eax + rol eax,5 + pxor xmm5,xmm9 + and edi,ecx + xor ecx,edx + movdqa XMMWORD PTR[rsp],xmm10 + xor edi,edx + add ebp,eax + movdqa xmm8,xmm5 + movdqa xmm9,xmm5 + ror ebx,7 + add ebp,edi + add edx,DWORD PTR[24+rsp] + xor ebx,ecx + pslldq xmm8,12 + paddd xmm5,xmm5 + mov edi,ebp + rol ebp,5 + and esi,ebx + xor ebx,ecx + psrld xmm9,31 + xor esi,ecx + add edx,ebp + movdqa xmm10,xmm8 + ror eax,7 + add edx,esi + psrld xmm8,30 + por xmm5,xmm9 + add ecx,DWORD PTR[28+rsp] + xor eax,ebx + mov esi,edx + rol edx,5 + pslld xmm10,2 + pxor xmm5,xmm8 + and edi,eax + xor eax,ebx + movdqa xmm8,XMMWORD PTR[16+r11] + xor edi,ebx + add ecx,edx + pxor xmm5,xmm10 + ror ebp,7 + add ecx,edi + movdqa xmm6,xmm3 + add ebx,DWORD PTR[32+rsp] + xor ebp,eax + movdqa xmm10,xmm5 +DB 102,15,58,15,242,8 + mov edi,ecx + rol ecx,5 + paddd xmm8,xmm5 + and esi,ebp + xor ebp,eax + psrldq xmm10,4 + xor esi,eax + add ebx,ecx + pxor xmm6,xmm2 + ror edx,7 + add ebx,esi + pxor xmm10,xmm4 + add eax,DWORD PTR[36+rsp] + xor edx,ebp + mov esi,ebx + rol ebx,5 + pxor xmm6,xmm10 + and edi,edx + xor edx,ebp + movdqa XMMWORD PTR[16+rsp],xmm8 + xor edi,ebp + add eax,ebx + movdqa xmm9,xmm6 + movdqa xmm10,xmm6 + ror ecx,7 + add eax,edi + add ebp,DWORD PTR[40+rsp] + xor ecx,edx + pslldq xmm9,12 + paddd xmm6,xmm6 + mov edi,eax + rol eax,5 + and esi,ecx + xor ecx,edx + psrld xmm10,31 + xor esi,edx + add ebp,eax + movdqa xmm8,xmm9 + ror ebx,7 + add ebp,esi + psrld xmm9,30 + por xmm6,xmm10 + add edx,DWORD PTR[44+rsp] + xor ebx,ecx + mov esi,ebp + rol ebp,5 + pslld xmm8,2 + pxor xmm6,xmm9 + and edi,ebx + xor ebx,ecx + movdqa xmm9,XMMWORD PTR[16+r11] + xor edi,ecx + add edx,ebp + pxor xmm6,xmm8 + ror eax,7 + add edx,edi + movdqa xmm7,xmm4 + add ecx,DWORD PTR[48+rsp] + xor eax,ebx + movdqa xmm8,xmm6 +DB 102,15,58,15,251,8 + mov edi,edx + rol edx,5 + paddd xmm9,xmm6 + and esi,eax + xor eax,ebx + psrldq xmm8,4 + xor esi,ebx + add ecx,edx + pxor xmm7,xmm3 + ror ebp,7 + add ecx,esi + pxor xmm8,xmm5 + add ebx,DWORD PTR[52+rsp] + xor ebp,eax + mov esi,ecx + rol ecx,5 + pxor xmm7,xmm8 + and edi,ebp + xor ebp,eax + movdqa XMMWORD PTR[32+rsp],xmm9 + xor edi,eax + add ebx,ecx + movdqa xmm10,xmm7 + movdqa xmm8,xmm7 + ror edx,7 + add ebx,edi + add eax,DWORD PTR[56+rsp] + xor edx,ebp + pslldq xmm10,12 + paddd xmm7,xmm7 + mov edi,ebx + rol ebx,5 + and esi,edx + xor edx,ebp + psrld xmm8,31 + xor esi,ebp + add eax,ebx + movdqa xmm9,xmm10 + ror ecx,7 + add eax,esi + psrld xmm10,30 + por xmm7,xmm8 + add ebp,DWORD PTR[60+rsp] + xor ecx,edx + mov esi,eax + rol eax,5 + pslld xmm9,2 + pxor xmm7,xmm10 + and edi,ecx + xor ecx,edx + movdqa xmm10,XMMWORD PTR[16+r11] + xor edi,edx + add ebp,eax + pxor xmm7,xmm9 + ror ebx,7 + add ebp,edi + movdqa xmm9,xmm7 + add edx,DWORD PTR[rsp] + pxor xmm0,xmm4 +DB 102,68,15,58,15,206,8 + xor ebx,ecx + mov edi,ebp + rol ebp,5 + pxor xmm0,xmm1 + and esi,ebx + xor ebx,ecx + movdqa xmm8,xmm10 + paddd xmm10,xmm7 + xor esi,ecx + add edx,ebp + pxor xmm0,xmm9 + ror eax,7 + add edx,esi + add ecx,DWORD PTR[4+rsp] + xor eax,ebx + movdqa xmm9,xmm0 + movdqa XMMWORD PTR[48+rsp],xmm10 + mov esi,edx + rol edx,5 + and edi,eax + xor eax,ebx + pslld xmm0,2 + xor edi,ebx + add ecx,edx + psrld xmm9,30 + ror ebp,7 + add ecx,edi + add ebx,DWORD PTR[8+rsp] + xor ebp,eax + mov edi,ecx + rol ecx,5 + por xmm0,xmm9 + and esi,ebp + xor ebp,eax + movdqa xmm10,xmm0 + xor esi,eax + add ebx,ecx + ror edx,7 + add ebx,esi + add eax,DWORD PTR[12+rsp] + xor edx,ebp + mov esi,ebx + rol ebx,5 + and edi,edx + xor edx,ebp + xor edi,ebp + add eax,ebx + ror ecx,7 + add eax,edi + add ebp,DWORD PTR[16+rsp] + pxor xmm1,xmm5 +DB 102,68,15,58,15,215,8 + xor esi,edx + mov edi,eax + rol eax,5 + pxor xmm1,xmm2 + xor esi,ecx + add ebp,eax + movdqa xmm9,xmm8 + paddd xmm8,xmm0 + ror ebx,7 + add ebp,esi + pxor xmm1,xmm10 + add edx,DWORD PTR[20+rsp] + xor edi,ecx + mov esi,ebp + rol ebp,5 + movdqa xmm10,xmm1 + movdqa XMMWORD PTR[rsp],xmm8 + xor edi,ebx + add edx,ebp + ror eax,7 + add edx,edi + pslld xmm1,2 + add ecx,DWORD PTR[24+rsp] + xor esi,ebx + psrld xmm10,30 + mov edi,edx + rol edx,5 + xor esi,eax + add ecx,edx + ror ebp,7 + add ecx,esi + por xmm1,xmm10 + add ebx,DWORD PTR[28+rsp] + xor edi,eax + movdqa xmm8,xmm1 + mov esi,ecx + rol ecx,5 + xor edi,ebp + add ebx,ecx + ror edx,7 + add ebx,edi + add eax,DWORD PTR[32+rsp] + pxor xmm2,xmm6 +DB 102,68,15,58,15,192,8 + xor esi,ebp + mov edi,ebx + rol ebx,5 + pxor xmm2,xmm3 + xor esi,edx + add eax,ebx + movdqa xmm10,XMMWORD PTR[32+r11] + paddd xmm9,xmm1 + ror ecx,7 + add eax,esi + pxor xmm2,xmm8 + add ebp,DWORD PTR[36+rsp] + xor edi,edx + mov esi,eax + rol eax,5 + movdqa xmm8,xmm2 + movdqa XMMWORD PTR[16+rsp],xmm9 + xor edi,ecx + add ebp,eax + ror ebx,7 + add ebp,edi + pslld xmm2,2 + add edx,DWORD PTR[40+rsp] + xor esi,ecx + psrld xmm8,30 + mov edi,ebp + rol ebp,5 + xor esi,ebx + add edx,ebp + ror eax,7 + add edx,esi + por xmm2,xmm8 + add ecx,DWORD PTR[44+rsp] + xor edi,ebx + movdqa xmm9,xmm2 + mov esi,edx + rol edx,5 + xor edi,eax + add ecx,edx + ror ebp,7 + add ecx,edi + add ebx,DWORD PTR[48+rsp] + pxor xmm3,xmm7 +DB 102,68,15,58,15,201,8 + xor esi,eax + mov edi,ecx + rol ecx,5 + pxor xmm3,xmm4 + xor esi,ebp + add ebx,ecx + movdqa xmm8,xmm10 + paddd xmm10,xmm2 + ror edx,7 + add ebx,esi + pxor xmm3,xmm9 + add eax,DWORD PTR[52+rsp] + xor edi,ebp + mov esi,ebx + rol ebx,5 + movdqa xmm9,xmm3 + movdqa XMMWORD PTR[32+rsp],xmm10 + xor edi,edx + add eax,ebx + ror ecx,7 + add eax,edi + pslld xmm3,2 + add ebp,DWORD PTR[56+rsp] + xor esi,edx + psrld xmm9,30 + mov edi,eax + rol eax,5 + xor esi,ecx + add ebp,eax + ror ebx,7 + add ebp,esi + por xmm3,xmm9 + add edx,DWORD PTR[60+rsp] + xor edi,ecx + movdqa xmm10,xmm3 + mov esi,ebp + rol ebp,5 + xor edi,ebx + add edx,ebp + ror eax,7 + add edx,edi + add ecx,DWORD PTR[rsp] + pxor xmm4,xmm0 +DB 102,68,15,58,15,210,8 + xor esi,ebx + mov edi,edx + rol edx,5 + pxor xmm4,xmm5 + xor esi,eax + add ecx,edx + movdqa xmm9,xmm8 + paddd xmm8,xmm3 + ror ebp,7 + add ecx,esi + pxor xmm4,xmm10 + add ebx,DWORD PTR[4+rsp] + xor edi,eax + mov esi,ecx + rol ecx,5 + movdqa xmm10,xmm4 + movdqa XMMWORD PTR[48+rsp],xmm8 + xor edi,ebp + add ebx,ecx + ror edx,7 + add ebx,edi + pslld xmm4,2 + add eax,DWORD PTR[8+rsp] + xor esi,ebp + psrld xmm10,30 + mov edi,ebx + rol ebx,5 + xor esi,edx + add eax,ebx + ror ecx,7 + add eax,esi + por xmm4,xmm10 + add ebp,DWORD PTR[12+rsp] + xor edi,edx + movdqa xmm8,xmm4 + mov esi,eax + rol eax,5 + xor edi,ecx + add ebp,eax + ror ebx,7 + add ebp,edi + add edx,DWORD PTR[16+rsp] + pxor xmm5,xmm1 +DB 102,68,15,58,15,195,8 + xor esi,ecx + mov edi,ebp + rol ebp,5 + pxor xmm5,xmm6 + xor esi,ebx + add edx,ebp + movdqa xmm10,xmm9 + paddd xmm9,xmm4 + ror eax,7 + add edx,esi + pxor xmm5,xmm8 + add ecx,DWORD PTR[20+rsp] + xor edi,ebx + mov esi,edx + rol edx,5 + movdqa xmm8,xmm5 + movdqa XMMWORD PTR[rsp],xmm9 + xor edi,eax + add ecx,edx + ror ebp,7 + add ecx,edi + pslld xmm5,2 + add ebx,DWORD PTR[24+rsp] + xor esi,eax + psrld xmm8,30 + mov edi,ecx + rol ecx,5 + xor esi,ebp + add ebx,ecx + ror edx,7 + add ebx,esi + por xmm5,xmm8 + add eax,DWORD PTR[28+rsp] + xor edi,ebp + movdqa xmm9,xmm5 + mov esi,ebx + rol ebx,5 + xor edi,edx + add eax,ebx + ror ecx,7 + add eax,edi + mov edi,ecx + pxor xmm6,xmm2 +DB 102,68,15,58,15,204,8 + xor ecx,edx + add ebp,DWORD PTR[32+rsp] + and edi,edx + pxor xmm6,xmm7 + and esi,ecx + ror ebx,7 + movdqa xmm8,xmm10 + paddd xmm10,xmm5 + add ebp,edi + mov edi,eax + pxor xmm6,xmm9 + rol eax,5 + add ebp,esi + xor ecx,edx + add ebp,eax + movdqa xmm9,xmm6 + movdqa XMMWORD PTR[16+rsp],xmm10 + mov esi,ebx + xor ebx,ecx + add edx,DWORD PTR[36+rsp] + and esi,ecx + pslld xmm6,2 + and edi,ebx + ror eax,7 + psrld xmm9,30 + add edx,esi + mov esi,ebp + rol ebp,5 + add edx,edi + xor ebx,ecx + add edx,ebp + por xmm6,xmm9 + mov edi,eax + xor eax,ebx + movdqa xmm10,xmm6 + add ecx,DWORD PTR[40+rsp] + and edi,ebx + and esi,eax + ror ebp,7 + add ecx,edi + mov edi,edx + rol edx,5 + add ecx,esi + xor eax,ebx + add ecx,edx + mov esi,ebp + xor ebp,eax + add ebx,DWORD PTR[44+rsp] + and esi,eax + and edi,ebp + ror edx,7 + add ebx,esi + mov esi,ecx + rol ecx,5 + add ebx,edi + xor ebp,eax + add ebx,ecx + mov edi,edx + pxor xmm7,xmm3 +DB 102,68,15,58,15,213,8 + xor edx,ebp + add eax,DWORD PTR[48+rsp] + and edi,ebp + pxor xmm7,xmm0 + and esi,edx + ror ecx,7 + movdqa xmm9,XMMWORD PTR[48+r11] + paddd xmm8,xmm6 + add eax,edi + mov edi,ebx + pxor xmm7,xmm10 + rol ebx,5 + add eax,esi + xor edx,ebp + add eax,ebx + movdqa xmm10,xmm7 + movdqa XMMWORD PTR[32+rsp],xmm8 + mov esi,ecx + xor ecx,edx + add ebp,DWORD PTR[52+rsp] + and esi,edx + pslld xmm7,2 + and edi,ecx + ror ebx,7 + psrld xmm10,30 + add ebp,esi + mov esi,eax + rol eax,5 + add ebp,edi + xor ecx,edx + add ebp,eax + por xmm7,xmm10 + mov edi,ebx + xor ebx,ecx + movdqa xmm8,xmm7 + add edx,DWORD PTR[56+rsp] + and edi,ecx + and esi,ebx + ror eax,7 + add edx,edi + mov edi,ebp + rol ebp,5 + add edx,esi + xor ebx,ecx + add edx,ebp + mov esi,eax + xor eax,ebx + add ecx,DWORD PTR[60+rsp] + and esi,ebx + and edi,eax + ror ebp,7 + add ecx,esi + mov esi,edx + rol edx,5 + add ecx,edi + xor eax,ebx + add ecx,edx + mov edi,ebp + pxor xmm0,xmm4 +DB 102,68,15,58,15,198,8 + xor ebp,eax + add ebx,DWORD PTR[rsp] + and edi,eax + pxor xmm0,xmm1 + and esi,ebp + ror edx,7 + movdqa xmm10,xmm9 + paddd xmm9,xmm7 + add ebx,edi + mov edi,ecx + pxor xmm0,xmm8 + rol ecx,5 + add ebx,esi + xor ebp,eax + add ebx,ecx + movdqa xmm8,xmm0 + movdqa XMMWORD PTR[48+rsp],xmm9 + mov esi,edx + xor edx,ebp + add eax,DWORD PTR[4+rsp] + and esi,ebp + pslld xmm0,2 + and edi,edx + ror ecx,7 + psrld xmm8,30 + add eax,esi + mov esi,ebx + rol ebx,5 + add eax,edi + xor edx,ebp + add eax,ebx + por xmm0,xmm8 + mov edi,ecx + xor ecx,edx + movdqa xmm9,xmm0 + add ebp,DWORD PTR[8+rsp] + and edi,edx + and esi,ecx + ror ebx,7 + add ebp,edi + mov edi,eax + rol eax,5 + add ebp,esi + xor ecx,edx + add ebp,eax + mov esi,ebx + xor ebx,ecx + add edx,DWORD PTR[12+rsp] + and esi,ecx + and edi,ebx + ror eax,7 + add edx,esi + mov esi,ebp + rol ebp,5 + add edx,edi + xor ebx,ecx + add edx,ebp + mov edi,eax + pxor xmm1,xmm5 +DB 102,68,15,58,15,207,8 + xor eax,ebx + add ecx,DWORD PTR[16+rsp] + and edi,ebx + pxor xmm1,xmm2 + and esi,eax + ror ebp,7 + movdqa xmm8,xmm10 + paddd xmm10,xmm0 + add ecx,edi + mov edi,edx + pxor xmm1,xmm9 + rol edx,5 + add ecx,esi + xor eax,ebx + add ecx,edx + movdqa xmm9,xmm1 + movdqa XMMWORD PTR[rsp],xmm10 + mov esi,ebp + xor ebp,eax + add ebx,DWORD PTR[20+rsp] + and esi,eax + pslld xmm1,2 + and edi,ebp + ror edx,7 + psrld xmm9,30 + add ebx,esi + mov esi,ecx + rol ecx,5 + add ebx,edi + xor ebp,eax + add ebx,ecx + por xmm1,xmm9 + mov edi,edx + xor edx,ebp + movdqa xmm10,xmm1 + add eax,DWORD PTR[24+rsp] + and edi,ebp + and esi,edx + ror ecx,7 + add eax,edi + mov edi,ebx + rol ebx,5 + add eax,esi + xor edx,ebp + add eax,ebx + mov esi,ecx + xor ecx,edx + add ebp,DWORD PTR[28+rsp] + and esi,edx + and edi,ecx + ror ebx,7 + add ebp,esi + mov esi,eax + rol eax,5 + add ebp,edi + xor ecx,edx + add ebp,eax + mov edi,ebx + pxor xmm2,xmm6 +DB 102,68,15,58,15,208,8 + xor ebx,ecx + add edx,DWORD PTR[32+rsp] + and edi,ecx + pxor xmm2,xmm3 + and esi,ebx + ror eax,7 + movdqa xmm9,xmm8 + paddd xmm8,xmm1 + add edx,edi + mov edi,ebp + pxor xmm2,xmm10 + rol ebp,5 + add edx,esi + xor ebx,ecx + add edx,ebp + movdqa xmm10,xmm2 + movdqa XMMWORD PTR[16+rsp],xmm8 + mov esi,eax + xor eax,ebx + add ecx,DWORD PTR[36+rsp] + and esi,ebx + pslld xmm2,2 + and edi,eax + ror ebp,7 + psrld xmm10,30 + add ecx,esi + mov esi,edx + rol edx,5 + add ecx,edi + xor eax,ebx + add ecx,edx + por xmm2,xmm10 + mov edi,ebp + xor ebp,eax + movdqa xmm8,xmm2 + add ebx,DWORD PTR[40+rsp] + and edi,eax + and esi,ebp + ror edx,7 + add ebx,edi + mov edi,ecx + rol ecx,5 + add ebx,esi + xor ebp,eax + add ebx,ecx + mov esi,edx + xor edx,ebp + add eax,DWORD PTR[44+rsp] + and esi,ebp + and edi,edx + ror ecx,7 + add eax,esi + mov esi,ebx + rol ebx,5 + add eax,edi + xor edx,ebp + add eax,ebx + add ebp,DWORD PTR[48+rsp] + pxor xmm3,xmm7 +DB 102,68,15,58,15,193,8 + xor esi,edx + mov edi,eax + rol eax,5 + pxor xmm3,xmm4 + xor esi,ecx + add ebp,eax + movdqa xmm10,xmm9 + paddd xmm9,xmm2 + ror ebx,7 + add ebp,esi + pxor xmm3,xmm8 + add edx,DWORD PTR[52+rsp] + xor edi,ecx + mov esi,ebp + rol ebp,5 + movdqa xmm8,xmm3 + movdqa XMMWORD PTR[32+rsp],xmm9 + xor edi,ebx + add edx,ebp + ror eax,7 + add edx,edi + pslld xmm3,2 + add ecx,DWORD PTR[56+rsp] + xor esi,ebx + psrld xmm8,30 + mov edi,edx + rol edx,5 + xor esi,eax + add ecx,edx + ror ebp,7 + add ecx,esi + por xmm3,xmm8 + add ebx,DWORD PTR[60+rsp] + xor edi,eax + mov esi,ecx + rol ecx,5 + xor edi,ebp + add ebx,ecx + ror edx,7 + add ebx,edi + add eax,DWORD PTR[rsp] + paddd xmm10,xmm3 + xor esi,ebp + mov edi,ebx + rol ebx,5 + xor esi,edx + movdqa XMMWORD PTR[48+rsp],xmm10 + add eax,ebx + ror ecx,7 + add eax,esi + add ebp,DWORD PTR[4+rsp] + xor edi,edx + mov esi,eax + rol eax,5 + xor edi,ecx + add ebp,eax + ror ebx,7 + add ebp,edi + add edx,DWORD PTR[8+rsp] + xor esi,ecx + mov edi,ebp + rol ebp,5 + xor esi,ebx + add edx,ebp + ror eax,7 + add edx,esi + add ecx,DWORD PTR[12+rsp] + xor edi,ebx + mov esi,edx + rol edx,5 + xor edi,eax + add ecx,edx + ror ebp,7 + add ecx,edi + cmp r9,r10 + je $L$done_ssse3 + movdqa xmm6,XMMWORD PTR[64+r11] + movdqa xmm9,XMMWORD PTR[r11] + movdqu xmm0,XMMWORD PTR[r9] + movdqu xmm1,XMMWORD PTR[16+r9] + movdqu xmm2,XMMWORD PTR[32+r9] + movdqu xmm3,XMMWORD PTR[48+r9] +DB 102,15,56,0,198 + add r9,64 + add ebx,DWORD PTR[16+rsp] + xor esi,eax +DB 102,15,56,0,206 + mov edi,ecx + rol ecx,5 + paddd xmm0,xmm9 + xor esi,ebp + add ebx,ecx + ror edx,7 + add ebx,esi + movdqa XMMWORD PTR[rsp],xmm0 + add eax,DWORD PTR[20+rsp] + xor edi,ebp + psubd xmm0,xmm9 + mov esi,ebx + rol ebx,5 + xor edi,edx + add eax,ebx + ror ecx,7 + add eax,edi + add ebp,DWORD PTR[24+rsp] + xor esi,edx + mov edi,eax + rol eax,5 + xor esi,ecx + add ebp,eax + ror ebx,7 + add ebp,esi + add edx,DWORD PTR[28+rsp] + xor edi,ecx + mov esi,ebp + rol ebp,5 + xor edi,ebx + add edx,ebp + ror eax,7 + add edx,edi + add ecx,DWORD PTR[32+rsp] + xor esi,ebx +DB 102,15,56,0,214 + mov edi,edx + rol edx,5 + paddd xmm1,xmm9 + xor esi,eax + add ecx,edx + ror ebp,7 + add ecx,esi + movdqa XMMWORD PTR[16+rsp],xmm1 + add ebx,DWORD PTR[36+rsp] + xor edi,eax + psubd xmm1,xmm9 + mov esi,ecx + rol ecx,5 + xor edi,ebp + add ebx,ecx + ror edx,7 + add ebx,edi + add eax,DWORD PTR[40+rsp] + xor esi,ebp + mov edi,ebx + rol ebx,5 + xor esi,edx + add eax,ebx + ror ecx,7 + add eax,esi + add ebp,DWORD PTR[44+rsp] + xor edi,edx + mov esi,eax + rol eax,5 + xor edi,ecx + add ebp,eax + ror ebx,7 + add ebp,edi + add edx,DWORD PTR[48+rsp] + xor esi,ecx +DB 102,15,56,0,222 + mov edi,ebp + rol ebp,5 + paddd xmm2,xmm9 + xor esi,ebx + add edx,ebp + ror eax,7 + add edx,esi + movdqa XMMWORD PTR[32+rsp],xmm2 + add ecx,DWORD PTR[52+rsp] + xor edi,ebx + psubd xmm2,xmm9 + mov esi,edx + rol edx,5 + xor edi,eax + add ecx,edx + ror ebp,7 + add ecx,edi + add ebx,DWORD PTR[56+rsp] + xor esi,eax + mov edi,ecx + rol ecx,5 + xor esi,ebp + add ebx,ecx + ror edx,7 + add ebx,esi + add eax,DWORD PTR[60+rsp] + xor edi,ebp + mov esi,ebx + rol ebx,5 + xor edi,edx + add eax,ebx + ror ecx,7 + add eax,edi + add eax,DWORD PTR[r8] + add esi,DWORD PTR[4+r8] + add ecx,DWORD PTR[8+r8] + add edx,DWORD PTR[12+r8] + mov DWORD PTR[r8],eax + add ebp,DWORD PTR[16+r8] + mov DWORD PTR[4+r8],esi + mov ebx,esi + mov DWORD PTR[8+r8],ecx + mov DWORD PTR[12+r8],edx + mov DWORD PTR[16+r8],ebp + jmp $L$oop_ssse3 + +ALIGN 16 +$L$done_ssse3:: + add ebx,DWORD PTR[16+rsp] + xor esi,eax + mov edi,ecx + rol ecx,5 + xor esi,ebp + add ebx,ecx + ror edx,7 + add ebx,esi + add eax,DWORD PTR[20+rsp] + xor edi,ebp + mov esi,ebx + rol ebx,5 + xor edi,edx + add eax,ebx + ror ecx,7 + add eax,edi + add ebp,DWORD PTR[24+rsp] + xor esi,edx + mov edi,eax + rol eax,5 + xor esi,ecx + add ebp,eax + ror ebx,7 + add ebp,esi + add edx,DWORD PTR[28+rsp] + xor edi,ecx + mov esi,ebp + rol ebp,5 + xor edi,ebx + add edx,ebp + ror eax,7 + add edx,edi + add ecx,DWORD PTR[32+rsp] + xor esi,ebx + mov edi,edx + rol edx,5 + xor esi,eax + add ecx,edx + ror ebp,7 + add ecx,esi + add ebx,DWORD PTR[36+rsp] + xor edi,eax + mov esi,ecx + rol ecx,5 + xor edi,ebp + add ebx,ecx + ror edx,7 + add ebx,edi + add eax,DWORD PTR[40+rsp] + xor esi,ebp + mov edi,ebx + rol ebx,5 + xor esi,edx + add eax,ebx + ror ecx,7 + add eax,esi + add ebp,DWORD PTR[44+rsp] + xor edi,edx + mov esi,eax + rol eax,5 + xor edi,ecx + add ebp,eax + ror ebx,7 + add ebp,edi + add edx,DWORD PTR[48+rsp] + xor esi,ecx + mov edi,ebp + rol ebp,5 + xor esi,ebx + add edx,ebp + ror eax,7 + add edx,esi + add ecx,DWORD PTR[52+rsp] + xor edi,ebx + mov esi,edx + rol edx,5 + xor edi,eax + add ecx,edx + ror ebp,7 + add ecx,edi + add ebx,DWORD PTR[56+rsp] + xor esi,eax + mov edi,ecx + rol ecx,5 + xor esi,ebp + add ebx,ecx + ror edx,7 + add ebx,esi + add eax,DWORD PTR[60+rsp] + xor edi,ebp + mov esi,ebx + rol ebx,5 + xor edi,edx + add eax,ebx + ror ecx,7 + add eax,edi + add eax,DWORD PTR[r8] + add esi,DWORD PTR[4+r8] + add ecx,DWORD PTR[8+r8] + mov DWORD PTR[r8],eax + add edx,DWORD PTR[12+r8] + mov DWORD PTR[4+r8],esi + add ebp,DWORD PTR[16+r8] + mov DWORD PTR[8+r8],ecx + mov DWORD PTR[12+r8],edx + mov DWORD PTR[16+r8],ebp + movaps xmm6,XMMWORD PTR[((64+0))+rsp] + movaps xmm7,XMMWORD PTR[((64+16))+rsp] + movaps xmm8,XMMWORD PTR[((64+32))+rsp] + movaps xmm9,XMMWORD PTR[((64+48))+rsp] + movaps xmm10,XMMWORD PTR[((64+64))+rsp] + lea rsi,QWORD PTR[144+rsp] + mov r12,QWORD PTR[rsi] + mov rbp,QWORD PTR[8+rsi] + mov rbx,QWORD PTR[16+rsi] + lea rsp,QWORD PTR[24+rsi] +$L$epilogue_ssse3:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha1_block_data_order_ssse3:: +sha1_block_data_order_ssse3 ENDP +ALIGN 64 +K_XX_XX:: + DD 05a827999h,05a827999h,05a827999h,05a827999h + DD 06ed9eba1h,06ed9eba1h,06ed9eba1h,06ed9eba1h + DD 08f1bbcdch,08f1bbcdch,08f1bbcdch,08f1bbcdch + DD 0ca62c1d6h,0ca62c1d6h,0ca62c1d6h,0ca62c1d6h + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh +DB 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 +DB 102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44 +DB 32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60 +DB 97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114 +DB 103,62,0 +ALIGN 64 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$prologue] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + lea r10,QWORD PTR[$L$epilogue] + cmp rbx,r10 + jae $L$common_seh_tail + + mov rax,QWORD PTR[64+rax] + lea rax,QWORD PTR[32+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + + jmp $L$common_seh_tail +se_handler ENDP + + +ALIGN 16 +ssse3_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + lea rsi,QWORD PTR[64+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,10 + DD 0a548f3fch + lea rax,QWORD PTR[168+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + +$L$common_seh_tail:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +ssse3_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_sha1_block_data_order + DD imagerel $L$SEH_end_sha1_block_data_order + DD imagerel $L$SEH_info_sha1_block_data_order + DD imagerel $L$SEH_begin_sha1_block_data_order_ssse3 + DD imagerel $L$SEH_end_sha1_block_data_order_ssse3 + DD imagerel $L$SEH_info_sha1_block_data_order_ssse3 +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_sha1_block_data_order:: +DB 9,0,0,0 + DD imagerel se_handler +$L$SEH_info_sha1_block_data_order_ssse3:: +DB 9,0,0,0 + DD imagerel ssse3_handler + DD imagerel $L$prologue_ssse3,imagerel $L$epilogue_ssse3 + +.xdata ENDS +END + diff --git a/ext/libressl/crypto/sha/sha1-mingw64-x86_64.S b/ext/libressl/crypto/sha/sha1-mingw64-x86_64.S new file mode 100644 index 0000000..3ce9fc9 --- /dev/null +++ b/ext/libressl/crypto/sha/sha1-mingw64-x86_64.S @@ -0,0 +1,2664 @@ +#include "x86_arch.h" +.text + + + +.globl sha1_block_data_order +.def sha1_block_data_order; .scl 2; .type 32; .endef +.p2align 4 +sha1_block_data_order: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_sha1_block_data_order: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + + movl OPENSSL_ia32cap_P+0(%rip),%r9d + movl OPENSSL_ia32cap_P+4(%rip),%r8d + testl $IA32CAP_MASK1_SSSE3,%r8d + jz .Lialu + jmp _ssse3_shortcut + +.p2align 4 +.Lialu: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + movq %rsp,%r11 + movq %rdi,%r8 + subq $72,%rsp + movq %rsi,%r9 + andq $-64,%rsp + movq %rdx,%r10 + movq %r11,64(%rsp) +.Lprologue: + + movl 0(%r8),%esi + movl 4(%r8),%edi + movl 8(%r8),%r11d + movl 12(%r8),%r12d + movl 16(%r8),%r13d + jmp .Lloop + +.p2align 4 +.Lloop: + movl 0(%r9),%edx + bswapl %edx + movl %edx,0(%rsp) + movl %r11d,%eax + movl 4(%r9),%ebp + movl %esi,%ecx + xorl %r12d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r13,1),%r13d + andl %edi,%eax + movl %ebp,4(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 8(%r9),%edx + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r12,1),%r12d + andl %esi,%eax + movl %edx,8(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 12(%r9),%ebp + movl %r12d,%ecx + xorl %edi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r11,1),%r11d + andl %r13d,%eax + movl %ebp,12(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 16(%r9),%edx + movl %r11d,%ecx + xorl %esi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rdi,1),%edi + andl %r12d,%eax + movl %edx,16(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 20(%r9),%ebp + movl %edi,%ecx + xorl %r13d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rsi,1),%esi + andl %r11d,%eax + movl %ebp,20(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl %r11d,%eax + movl 24(%r9),%edx + movl %esi,%ecx + xorl %r12d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r13,1),%r13d + andl %edi,%eax + movl %edx,24(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 28(%r9),%ebp + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r12,1),%r12d + andl %esi,%eax + movl %ebp,28(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 32(%r9),%edx + movl %r12d,%ecx + xorl %edi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r11,1),%r11d + andl %r13d,%eax + movl %edx,32(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 36(%r9),%ebp + movl %r11d,%ecx + xorl %esi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rdi,1),%edi + andl %r12d,%eax + movl %ebp,36(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 40(%r9),%edx + movl %edi,%ecx + xorl %r13d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rsi,1),%esi + andl %r11d,%eax + movl %edx,40(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl %r11d,%eax + movl 44(%r9),%ebp + movl %esi,%ecx + xorl %r12d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r13,1),%r13d + andl %edi,%eax + movl %ebp,44(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 48(%r9),%edx + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r12,1),%r12d + andl %esi,%eax + movl %edx,48(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 52(%r9),%ebp + movl %r12d,%ecx + xorl %edi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r11,1),%r11d + andl %r13d,%eax + movl %ebp,52(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 56(%r9),%edx + movl %r11d,%ecx + xorl %esi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rdi,1),%edi + andl %r12d,%eax + movl %edx,56(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 60(%r9),%ebp + movl %edi,%ecx + xorl %r13d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rsi,1),%esi + andl %r11d,%eax + movl %ebp,60(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl 0(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 8(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 32(%rsp),%edx + andl %edi,%eax + leal 1518500249(%rbp,%r13,1),%r13d + xorl 52(%rsp),%edx + xorl %r12d,%eax + roll $1,%edx + addl %ecx,%r13d + roll $30,%edi + movl %edx,0(%rsp) + addl %eax,%r13d + movl 4(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 12(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 36(%rsp),%ebp + andl %esi,%eax + leal 1518500249(%rdx,%r12,1),%r12d + xorl 56(%rsp),%ebp + xorl %r11d,%eax + roll $1,%ebp + addl %ecx,%r12d + roll $30,%esi + movl %ebp,4(%rsp) + addl %eax,%r12d + movl 8(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 16(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 40(%rsp),%edx + andl %r13d,%eax + leal 1518500249(%rbp,%r11,1),%r11d + xorl 60(%rsp),%edx + xorl %edi,%eax + roll $1,%edx + addl %ecx,%r11d + roll $30,%r13d + movl %edx,8(%rsp) + addl %eax,%r11d + movl 12(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 20(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 44(%rsp),%ebp + andl %r12d,%eax + leal 1518500249(%rdx,%rdi,1),%edi + xorl 0(%rsp),%ebp + xorl %esi,%eax + roll $1,%ebp + addl %ecx,%edi + roll $30,%r12d + movl %ebp,12(%rsp) + addl %eax,%edi + movl 16(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 24(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + xorl 48(%rsp),%edx + andl %r11d,%eax + leal 1518500249(%rbp,%rsi,1),%esi + xorl 4(%rsp),%edx + xorl %r13d,%eax + roll $1,%edx + addl %ecx,%esi + roll $30,%r11d + movl %edx,16(%rsp) + addl %eax,%esi + movl 20(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 28(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r13,1),%r13d + xorl 52(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 8(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 32(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r12,1),%r12d + xorl 56(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 12(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 36(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r11,1),%r11d + xorl 60(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 16(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 40(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rdi,1),%edi + xorl 0(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 20(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 44(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rsi,1),%esi + xorl 4(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 24(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,36(%rsp) + movl 40(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 48(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r13,1),%r13d + xorl 8(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 28(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,40(%rsp) + movl 44(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 52(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r12,1),%r12d + xorl 12(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 32(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,44(%rsp) + movl 48(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 56(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r11,1),%r11d + xorl 16(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 36(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,48(%rsp) + movl 52(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 60(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rdi,1),%edi + xorl 20(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 40(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,52(%rsp) + movl 56(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 0(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rsi,1),%esi + xorl 24(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 44(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,56(%rsp) + movl 60(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r13,1),%r13d + xorl 28(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 48(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,60(%rsp) + movl 0(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r12,1),%r12d + xorl 32(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 52(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,0(%rsp) + movl 4(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r11,1),%r11d + xorl 36(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 56(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,4(%rsp) + movl 8(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 16(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rdi,1),%edi + xorl 40(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 60(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,8(%rsp) + movl 12(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 20(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rsi,1),%esi + xorl 44(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 0(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,12(%rsp) + movl 16(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r13,1),%r13d + xorl 48(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 4(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,16(%rsp) + movl 20(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r12,1),%r12d + xorl 52(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 8(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r11,1),%r11d + xorl 56(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 12(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rdi,1),%edi + xorl 60(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 16(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 40(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rsi,1),%esi + xorl 0(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 20(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %r11d,%eax + movl %r11d,%ebx + xorl 44(%rsp),%ebp + andl %r12d,%eax + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %r12d,%ebx + leal -1894007588(%rdx,%r13,1),%r13d + roll $5,%ecx + xorl 24(%rsp),%ebp + addl %eax,%r13d + andl %edi,%ebx + roll $1,%ebp + addl %ebx,%r13d + roll $30,%edi + movl %ebp,36(%rsp) + addl %ecx,%r13d + movl 40(%rsp),%edx + movl %edi,%eax + movl %edi,%ebx + xorl 48(%rsp),%edx + andl %r11d,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + xorl %r11d,%ebx + leal -1894007588(%rbp,%r12,1),%r12d + roll $5,%ecx + xorl 28(%rsp),%edx + addl %eax,%r12d + andl %esi,%ebx + roll $1,%edx + addl %ebx,%r12d + roll $30,%esi + movl %edx,40(%rsp) + addl %ecx,%r12d + movl 44(%rsp),%ebp + movl %esi,%eax + movl %esi,%ebx + xorl 52(%rsp),%ebp + andl %edi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + xorl %edi,%ebx + leal -1894007588(%rdx,%r11,1),%r11d + roll $5,%ecx + xorl 32(%rsp),%ebp + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%ebp + addl %ebx,%r11d + roll $30,%r13d + movl %ebp,44(%rsp) + addl %ecx,%r11d + movl 48(%rsp),%edx + movl %r13d,%eax + movl %r13d,%ebx + xorl 56(%rsp),%edx + andl %esi,%eax + movl %r11d,%ecx + xorl 16(%rsp),%edx + xorl %esi,%ebx + leal -1894007588(%rbp,%rdi,1),%edi + roll $5,%ecx + xorl 36(%rsp),%edx + addl %eax,%edi + andl %r12d,%ebx + roll $1,%edx + addl %ebx,%edi + roll $30,%r12d + movl %edx,48(%rsp) + addl %ecx,%edi + movl 52(%rsp),%ebp + movl %r12d,%eax + movl %r12d,%ebx + xorl 60(%rsp),%ebp + andl %r13d,%eax + movl %edi,%ecx + xorl 20(%rsp),%ebp + xorl %r13d,%ebx + leal -1894007588(%rdx,%rsi,1),%esi + roll $5,%ecx + xorl 40(%rsp),%ebp + addl %eax,%esi + andl %r11d,%ebx + roll $1,%ebp + addl %ebx,%esi + roll $30,%r11d + movl %ebp,52(%rsp) + addl %ecx,%esi + movl 56(%rsp),%edx + movl %r11d,%eax + movl %r11d,%ebx + xorl 0(%rsp),%edx + andl %r12d,%eax + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %r12d,%ebx + leal -1894007588(%rbp,%r13,1),%r13d + roll $5,%ecx + xorl 44(%rsp),%edx + addl %eax,%r13d + andl %edi,%ebx + roll $1,%edx + addl %ebx,%r13d + roll $30,%edi + movl %edx,56(%rsp) + addl %ecx,%r13d + movl 60(%rsp),%ebp + movl %edi,%eax + movl %edi,%ebx + xorl 4(%rsp),%ebp + andl %r11d,%eax + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %r11d,%ebx + leal -1894007588(%rdx,%r12,1),%r12d + roll $5,%ecx + xorl 48(%rsp),%ebp + addl %eax,%r12d + andl %esi,%ebx + roll $1,%ebp + addl %ebx,%r12d + roll $30,%esi + movl %ebp,60(%rsp) + addl %ecx,%r12d + movl 0(%rsp),%edx + movl %esi,%eax + movl %esi,%ebx + xorl 8(%rsp),%edx + andl %edi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + xorl %edi,%ebx + leal -1894007588(%rbp,%r11,1),%r11d + roll $5,%ecx + xorl 52(%rsp),%edx + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%edx + addl %ebx,%r11d + roll $30,%r13d + movl %edx,0(%rsp) + addl %ecx,%r11d + movl 4(%rsp),%ebp + movl %r13d,%eax + movl %r13d,%ebx + xorl 12(%rsp),%ebp + andl %esi,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + xorl %esi,%ebx + leal -1894007588(%rdx,%rdi,1),%edi + roll $5,%ecx + xorl 56(%rsp),%ebp + addl %eax,%edi + andl %r12d,%ebx + roll $1,%ebp + addl %ebx,%edi + roll $30,%r12d + movl %ebp,4(%rsp) + addl %ecx,%edi + movl 8(%rsp),%edx + movl %r12d,%eax + movl %r12d,%ebx + xorl 16(%rsp),%edx + andl %r13d,%eax + movl %edi,%ecx + xorl 40(%rsp),%edx + xorl %r13d,%ebx + leal -1894007588(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl 60(%rsp),%edx + addl %eax,%esi + andl %r11d,%ebx + roll $1,%edx + addl %ebx,%esi + roll $30,%r11d + movl %edx,8(%rsp) + addl %ecx,%esi + movl 12(%rsp),%ebp + movl %r11d,%eax + movl %r11d,%ebx + xorl 20(%rsp),%ebp + andl %r12d,%eax + movl %esi,%ecx + xorl 44(%rsp),%ebp + xorl %r12d,%ebx + leal -1894007588(%rdx,%r13,1),%r13d + roll $5,%ecx + xorl 0(%rsp),%ebp + addl %eax,%r13d + andl %edi,%ebx + roll $1,%ebp + addl %ebx,%r13d + roll $30,%edi + movl %ebp,12(%rsp) + addl %ecx,%r13d + movl 16(%rsp),%edx + movl %edi,%eax + movl %edi,%ebx + xorl 24(%rsp),%edx + andl %r11d,%eax + movl %r13d,%ecx + xorl 48(%rsp),%edx + xorl %r11d,%ebx + leal -1894007588(%rbp,%r12,1),%r12d + roll $5,%ecx + xorl 4(%rsp),%edx + addl %eax,%r12d + andl %esi,%ebx + roll $1,%edx + addl %ebx,%r12d + roll $30,%esi + movl %edx,16(%rsp) + addl %ecx,%r12d + movl 20(%rsp),%ebp + movl %esi,%eax + movl %esi,%ebx + xorl 28(%rsp),%ebp + andl %edi,%eax + movl %r12d,%ecx + xorl 52(%rsp),%ebp + xorl %edi,%ebx + leal -1894007588(%rdx,%r11,1),%r11d + roll $5,%ecx + xorl 8(%rsp),%ebp + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%ebp + addl %ebx,%r11d + roll $30,%r13d + movl %ebp,20(%rsp) + addl %ecx,%r11d + movl 24(%rsp),%edx + movl %r13d,%eax + movl %r13d,%ebx + xorl 32(%rsp),%edx + andl %esi,%eax + movl %r11d,%ecx + xorl 56(%rsp),%edx + xorl %esi,%ebx + leal -1894007588(%rbp,%rdi,1),%edi + roll $5,%ecx + xorl 12(%rsp),%edx + addl %eax,%edi + andl %r12d,%ebx + roll $1,%edx + addl %ebx,%edi + roll $30,%r12d + movl %edx,24(%rsp) + addl %ecx,%edi + movl 28(%rsp),%ebp + movl %r12d,%eax + movl %r12d,%ebx + xorl 36(%rsp),%ebp + andl %r13d,%eax + movl %edi,%ecx + xorl 60(%rsp),%ebp + xorl %r13d,%ebx + leal -1894007588(%rdx,%rsi,1),%esi + roll $5,%ecx + xorl 16(%rsp),%ebp + addl %eax,%esi + andl %r11d,%ebx + roll $1,%ebp + addl %ebx,%esi + roll $30,%r11d + movl %ebp,28(%rsp) + addl %ecx,%esi + movl 32(%rsp),%edx + movl %r11d,%eax + movl %r11d,%ebx + xorl 40(%rsp),%edx + andl %r12d,%eax + movl %esi,%ecx + xorl 0(%rsp),%edx + xorl %r12d,%ebx + leal -1894007588(%rbp,%r13,1),%r13d + roll $5,%ecx + xorl 20(%rsp),%edx + addl %eax,%r13d + andl %edi,%ebx + roll $1,%edx + addl %ebx,%r13d + roll $30,%edi + movl %edx,32(%rsp) + addl %ecx,%r13d + movl 36(%rsp),%ebp + movl %edi,%eax + movl %edi,%ebx + xorl 44(%rsp),%ebp + andl %r11d,%eax + movl %r13d,%ecx + xorl 4(%rsp),%ebp + xorl %r11d,%ebx + leal -1894007588(%rdx,%r12,1),%r12d + roll $5,%ecx + xorl 24(%rsp),%ebp + addl %eax,%r12d + andl %esi,%ebx + roll $1,%ebp + addl %ebx,%r12d + roll $30,%esi + movl %ebp,36(%rsp) + addl %ecx,%r12d + movl 40(%rsp),%edx + movl %esi,%eax + movl %esi,%ebx + xorl 48(%rsp),%edx + andl %edi,%eax + movl %r12d,%ecx + xorl 8(%rsp),%edx + xorl %edi,%ebx + leal -1894007588(%rbp,%r11,1),%r11d + roll $5,%ecx + xorl 28(%rsp),%edx + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%edx + addl %ebx,%r11d + roll $30,%r13d + movl %edx,40(%rsp) + addl %ecx,%r11d + movl 44(%rsp),%ebp + movl %r13d,%eax + movl %r13d,%ebx + xorl 52(%rsp),%ebp + andl %esi,%eax + movl %r11d,%ecx + xorl 12(%rsp),%ebp + xorl %esi,%ebx + leal -1894007588(%rdx,%rdi,1),%edi + roll $5,%ecx + xorl 32(%rsp),%ebp + addl %eax,%edi + andl %r12d,%ebx + roll $1,%ebp + addl %ebx,%edi + roll $30,%r12d + movl %ebp,44(%rsp) + addl %ecx,%edi + movl 48(%rsp),%edx + movl %r12d,%eax + movl %r12d,%ebx + xorl 56(%rsp),%edx + andl %r13d,%eax + movl %edi,%ecx + xorl 16(%rsp),%edx + xorl %r13d,%ebx + leal -1894007588(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl 36(%rsp),%edx + addl %eax,%esi + andl %r11d,%ebx + roll $1,%edx + addl %ebx,%esi + roll $30,%r11d + movl %edx,48(%rsp) + addl %ecx,%esi + movl 52(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 60(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r13,1),%r13d + xorl 20(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 40(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,52(%rsp) + movl 56(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 0(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r12,1),%r12d + xorl 24(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 44(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,56(%rsp) + movl 60(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 4(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rdx,%r11,1),%r11d + xorl 28(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 48(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,60(%rsp) + movl 0(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 8(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rdi,1),%edi + xorl 32(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 52(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,0(%rsp) + movl 4(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 12(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rsi,1),%esi + xorl 36(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 56(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,4(%rsp) + movl 8(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 16(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r13,1),%r13d + xorl 40(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 60(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,8(%rsp) + movl 12(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 20(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r12,1),%r12d + xorl 44(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 0(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,12(%rsp) + movl 16(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 24(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rbp,%r11,1),%r11d + xorl 48(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 4(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,16(%rsp) + movl 20(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 28(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rdi,1),%edi + xorl 52(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 8(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 32(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rsi,1),%esi + xorl 56(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 12(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 36(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r13,1),%r13d + xorl 60(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 16(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 40(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r12,1),%r12d + xorl 0(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 20(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 44(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rdx,%r11,1),%r11d + xorl 4(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 24(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,36(%rsp) + movl 40(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 48(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rdi,1),%edi + xorl 8(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 28(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,40(%rsp) + movl 44(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 52(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rsi,1),%esi + xorl 12(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 32(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,44(%rsp) + movl 48(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 56(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r13,1),%r13d + xorl 16(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 36(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,48(%rsp) + movl 52(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 60(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r12,1),%r12d + xorl 20(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 40(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl 56(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 0(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rbp,%r11,1),%r11d + xorl 24(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 44(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl 60(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 4(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rdi,1),%edi + xorl 28(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 48(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl %r11d,%eax + leal -899497514(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl %r13d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + addl 0(%r8),%esi + addl 4(%r8),%edi + addl 8(%r8),%r11d + addl 12(%r8),%r12d + addl 16(%r8),%r13d + movl %esi,0(%r8) + movl %edi,4(%r8) + movl %r11d,8(%r8) + movl %r12d,12(%r8) + movl %r13d,16(%r8) + + subq $1,%r10 + leaq 64(%r9),%r9 + jnz .Lloop + + movq 64(%rsp),%rsi + movq (%rsi),%r13 + movq 8(%rsi),%r12 + movq 16(%rsi),%rbp + movq 24(%rsi),%rbx + leaq 32(%rsi),%rsp +.Lepilogue: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + retq +.LSEH_end_sha1_block_data_order: +.def sha1_block_data_order_ssse3; .scl 3; .type 32; .endef +.p2align 4 +sha1_block_data_order_ssse3: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_sha1_block_data_order_ssse3: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + +_ssse3_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + leaq -144(%rsp),%rsp + movaps %xmm6,64+0(%rsp) + movaps %xmm7,64+16(%rsp) + movaps %xmm8,64+32(%rsp) + movaps %xmm9,64+48(%rsp) + movaps %xmm10,64+64(%rsp) +.Lprologue_ssse3: + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + shlq $6,%r10 + addq %r9,%r10 + leaq K_XX_XX(%rip),%r11 + + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl %ebx,%esi + movl 16(%r8),%ebp + + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 +.byte 102,15,56,0,222 + paddd %xmm9,%xmm0 + paddd %xmm9,%xmm1 + paddd %xmm9,%xmm2 + movdqa %xmm0,0(%rsp) + psubd %xmm9,%xmm0 + movdqa %xmm1,16(%rsp) + psubd %xmm9,%xmm1 + movdqa %xmm2,32(%rsp) + psubd %xmm9,%xmm2 + jmp .Loop_ssse3 +.p2align 4 +.Loop_ssse3: + movdqa %xmm1,%xmm4 + addl 0(%rsp),%ebp + xorl %edx,%ecx + movdqa %xmm3,%xmm8 +.byte 102,15,58,15,224,8 + movl %eax,%edi + roll $5,%eax + paddd %xmm3,%xmm9 + andl %ecx,%esi + xorl %edx,%ecx + psrldq $4,%xmm8 + xorl %edx,%esi + addl %eax,%ebp + pxor %xmm0,%xmm4 + rorl $2,%ebx + addl %esi,%ebp + pxor %xmm2,%xmm8 + addl 4(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pxor %xmm8,%xmm4 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm9,48(%rsp) + xorl %ecx,%edi + addl %ebp,%edx + movdqa %xmm4,%xmm10 + movdqa %xmm4,%xmm8 + rorl $7,%eax + addl %edi,%edx + addl 8(%rsp),%ecx + xorl %ebx,%eax + pslldq $12,%xmm10 + paddd %xmm4,%xmm4 + movl %edx,%edi + roll $5,%edx + andl %eax,%esi + xorl %ebx,%eax + psrld $31,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + movdqa %xmm10,%xmm9 + rorl $7,%ebp + addl %esi,%ecx + psrld $30,%xmm10 + por %xmm8,%xmm4 + addl 12(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx + pslld $2,%xmm9 + pxor %xmm10,%xmm4 + andl %ebp,%edi + xorl %eax,%ebp + movdqa 0(%r11),%xmm10 + xorl %eax,%edi + addl %ecx,%ebx + pxor %xmm9,%xmm4 + rorl $7,%edx + addl %edi,%ebx + movdqa %xmm2,%xmm5 + addl 16(%rsp),%eax + xorl %ebp,%edx + movdqa %xmm4,%xmm9 +.byte 102,15,58,15,233,8 + movl %ebx,%edi + roll $5,%ebx + paddd %xmm4,%xmm10 + andl %edx,%esi + xorl %ebp,%edx + psrldq $4,%xmm9 + xorl %ebp,%esi + addl %ebx,%eax + pxor %xmm1,%xmm5 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm3,%xmm9 + addl 20(%rsp),%ebp + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pxor %xmm9,%xmm5 + andl %ecx,%edi + xorl %edx,%ecx + movdqa %xmm10,0(%rsp) + xorl %edx,%edi + addl %eax,%ebp + movdqa %xmm5,%xmm8 + movdqa %xmm5,%xmm9 + rorl $7,%ebx + addl %edi,%ebp + addl 24(%rsp),%edx + xorl %ecx,%ebx + pslldq $12,%xmm8 + paddd %xmm5,%xmm5 + movl %ebp,%edi + roll $5,%ebp + andl %ebx,%esi + xorl %ecx,%ebx + psrld $31,%xmm9 + xorl %ecx,%esi + addl %ebp,%edx + movdqa %xmm8,%xmm10 + rorl $7,%eax + addl %esi,%edx + psrld $30,%xmm8 + por %xmm9,%xmm5 + addl 28(%rsp),%ecx + xorl %ebx,%eax + movl %edx,%esi + roll $5,%edx + pslld $2,%xmm10 + pxor %xmm8,%xmm5 + andl %eax,%edi + xorl %ebx,%eax + movdqa 16(%r11),%xmm8 + xorl %ebx,%edi + addl %edx,%ecx + pxor %xmm10,%xmm5 + rorl $7,%ebp + addl %edi,%ecx + movdqa %xmm3,%xmm6 + addl 32(%rsp),%ebx + xorl %eax,%ebp + movdqa %xmm5,%xmm10 +.byte 102,15,58,15,242,8 + movl %ecx,%edi + roll $5,%ecx + paddd %xmm5,%xmm8 + andl %ebp,%esi + xorl %eax,%ebp + psrldq $4,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + pxor %xmm2,%xmm6 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm4,%xmm10 + addl 36(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + pxor %xmm10,%xmm6 + andl %edx,%edi + xorl %ebp,%edx + movdqa %xmm8,16(%rsp) + xorl %ebp,%edi + addl %ebx,%eax + movdqa %xmm6,%xmm9 + movdqa %xmm6,%xmm10 + rorl $7,%ecx + addl %edi,%eax + addl 40(%rsp),%ebp + xorl %edx,%ecx + pslldq $12,%xmm9 + paddd %xmm6,%xmm6 + movl %eax,%edi + roll $5,%eax + andl %ecx,%esi + xorl %edx,%ecx + psrld $31,%xmm10 + xorl %edx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + psrld $30,%xmm9 + por %xmm10,%xmm6 + addl 44(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pslld $2,%xmm8 + pxor %xmm9,%xmm6 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa 16(%r11),%xmm9 + xorl %ecx,%edi + addl %ebp,%edx + pxor %xmm8,%xmm6 + rorl $7,%eax + addl %edi,%edx + movdqa %xmm4,%xmm7 + addl 48(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm6,%xmm8 +.byte 102,15,58,15,251,8 + movl %edx,%edi + roll $5,%edx + paddd %xmm6,%xmm9 + andl %eax,%esi + xorl %ebx,%eax + psrldq $4,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + pxor %xmm3,%xmm7 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm5,%xmm8 + addl 52(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx + pxor %xmm8,%xmm7 + andl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm9,32(%rsp) + xorl %eax,%edi + addl %ecx,%ebx + movdqa %xmm7,%xmm10 + movdqa %xmm7,%xmm8 + rorl $7,%edx + addl %edi,%ebx + addl 56(%rsp),%eax + xorl %ebp,%edx + pslldq $12,%xmm10 + paddd %xmm7,%xmm7 + movl %ebx,%edi + roll $5,%ebx + andl %edx,%esi + xorl %ebp,%edx + psrld $31,%xmm8 + xorl %ebp,%esi + addl %ebx,%eax + movdqa %xmm10,%xmm9 + rorl $7,%ecx + addl %esi,%eax + psrld $30,%xmm10 + por %xmm8,%xmm7 + addl 60(%rsp),%ebp + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + andl %ecx,%edi + xorl %edx,%ecx + movdqa 16(%r11),%xmm10 + xorl %edx,%edi + addl %eax,%ebp + pxor %xmm9,%xmm7 + rorl $7,%ebx + addl %edi,%ebp + movdqa %xmm7,%xmm9 + addl 0(%rsp),%edx + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,206,8 + xorl %ecx,%ebx + movl %ebp,%edi + roll $5,%ebp + pxor %xmm1,%xmm0 + andl %ebx,%esi + xorl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm7,%xmm10 + xorl %ecx,%esi + addl %ebp,%edx + pxor %xmm9,%xmm0 + rorl $7,%eax + addl %esi,%edx + addl 4(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm0,%xmm9 + movdqa %xmm10,48(%rsp) + movl %edx,%esi + roll $5,%edx + andl %eax,%edi + xorl %ebx,%eax + pslld $2,%xmm0 + xorl %ebx,%edi + addl %edx,%ecx + psrld $30,%xmm9 + rorl $7,%ebp + addl %edi,%ecx + addl 8(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%edi + roll $5,%ecx + por %xmm9,%xmm0 + andl %ebp,%esi + xorl %eax,%ebp + movdqa %xmm0,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + andl %edx,%edi + xorl %ebp,%edx + xorl %ebp,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 16(%rsp),%ebp + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,215,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm2,%xmm1 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm8,%xmm9 + paddd %xmm0,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm10,%xmm1 + addl 20(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm1,%xmm10 + movdqa %xmm8,0(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm1 + addl 24(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm10 + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm10,%xmm1 + addl 28(%rsp),%ebx + xorl %eax,%edi + movdqa %xmm1,%xmm8 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 32(%rsp),%eax + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,192,8 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + pxor %xmm3,%xmm2 + xorl %edx,%esi + addl %ebx,%eax + movdqa 32(%r11),%xmm10 + paddd %xmm1,%xmm9 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm8,%xmm2 + addl 36(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm8 + movdqa %xmm9,16(%rsp) + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + pslld $2,%xmm2 + addl 40(%rsp),%edx + xorl %ecx,%esi + psrld $30,%xmm8 + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + por %xmm8,%xmm2 + addl 44(%rsp),%ecx + xorl %ebx,%edi + movdqa %xmm2,%xmm9 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 48(%rsp),%ebx + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,201,8 + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + pxor %xmm4,%xmm3 + xorl %ebp,%esi + addl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm2,%xmm10 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm9,%xmm3 + addl 52(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm9 + movdqa %xmm10,32(%rsp) + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + pslld $2,%xmm3 + addl 56(%rsp),%ebp + xorl %edx,%esi + psrld $30,%xmm9 + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + por %xmm9,%xmm3 + addl 60(%rsp),%edx + xorl %ecx,%edi + movdqa %xmm3,%xmm10 + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 0(%rsp),%ecx + pxor %xmm0,%xmm4 +.byte 102,68,15,58,15,210,8 + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + pxor %xmm5,%xmm4 + xorl %eax,%esi + addl %edx,%ecx + movdqa %xmm8,%xmm9 + paddd %xmm3,%xmm8 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm10,%xmm4 + addl 4(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm10 + movdqa %xmm8,48(%rsp) + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + pslld $2,%xmm4 + addl 8(%rsp),%eax + xorl %ebp,%esi + psrld $30,%xmm10 + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + por %xmm10,%xmm4 + addl 12(%rsp),%ebp + xorl %edx,%edi + movdqa %xmm4,%xmm8 + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 16(%rsp),%edx + pxor %xmm1,%xmm5 +.byte 102,68,15,58,15,195,8 + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + pxor %xmm6,%xmm5 + xorl %ebx,%esi + addl %ebp,%edx + movdqa %xmm9,%xmm10 + paddd %xmm4,%xmm9 + rorl $7,%eax + addl %esi,%edx + pxor %xmm8,%xmm5 + addl 20(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm8 + movdqa %xmm9,0(%rsp) + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + pslld $2,%xmm5 + addl 24(%rsp),%ebx + xorl %eax,%esi + psrld $30,%xmm8 + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + por %xmm8,%xmm5 + addl 28(%rsp),%eax + xorl %ebp,%edi + movdqa %xmm5,%xmm9 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + movl %ecx,%edi + pxor %xmm2,%xmm6 +.byte 102,68,15,58,15,204,8 + xorl %edx,%ecx + addl 32(%rsp),%ebp + andl %edx,%edi + pxor %xmm7,%xmm6 + andl %ecx,%esi + rorl $7,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm5,%xmm10 + addl %edi,%ebp + movl %eax,%edi + pxor %xmm9,%xmm6 + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movdqa %xmm6,%xmm9 + movdqa %xmm10,16(%rsp) + movl %ebx,%esi + xorl %ecx,%ebx + addl 36(%rsp),%edx + andl %ecx,%esi + pslld $2,%xmm6 + andl %ebx,%edi + rorl $7,%eax + psrld $30,%xmm9 + addl %esi,%edx + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + por %xmm9,%xmm6 + movl %eax,%edi + xorl %ebx,%eax + movdqa %xmm6,%xmm10 + addl 40(%rsp),%ecx + andl %ebx,%edi + andl %eax,%esi + rorl $7,%ebp + addl %edi,%ecx + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%esi + xorl %eax,%ebp + addl 44(%rsp),%ebx + andl %eax,%esi + andl %ebp,%edi + rorl $7,%edx + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%edi + pxor %xmm3,%xmm7 +.byte 102,68,15,58,15,213,8 + xorl %ebp,%edx + addl 48(%rsp),%eax + andl %ebp,%edi + pxor %xmm0,%xmm7 + andl %edx,%esi + rorl $7,%ecx + movdqa 48(%r11),%xmm9 + paddd %xmm6,%xmm8 + addl %edi,%eax + movl %ebx,%edi + pxor %xmm10,%xmm7 + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movdqa %xmm7,%xmm10 + movdqa %xmm8,32(%rsp) + movl %ecx,%esi + xorl %edx,%ecx + addl 52(%rsp),%ebp + andl %edx,%esi + pslld $2,%xmm7 + andl %ecx,%edi + rorl $7,%ebx + psrld $30,%xmm10 + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + por %xmm10,%xmm7 + movl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm7,%xmm8 + addl 56(%rsp),%edx + andl %ecx,%edi + andl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%esi + xorl %ebx,%eax + addl 60(%rsp),%ecx + andl %ebx,%esi + andl %eax,%edi + rorl $7,%ebp + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%edi + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,198,8 + xorl %eax,%ebp + addl 0(%rsp),%ebx + andl %eax,%edi + pxor %xmm1,%xmm0 + andl %ebp,%esi + rorl $7,%edx + movdqa %xmm9,%xmm10 + paddd %xmm7,%xmm9 + addl %edi,%ebx + movl %ecx,%edi + pxor %xmm8,%xmm0 + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movdqa %xmm0,%xmm8 + movdqa %xmm9,48(%rsp) + movl %edx,%esi + xorl %ebp,%edx + addl 4(%rsp),%eax + andl %ebp,%esi + pslld $2,%xmm0 + andl %edx,%edi + rorl $7,%ecx + psrld $30,%xmm8 + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + por %xmm8,%xmm0 + movl %ecx,%edi + xorl %edx,%ecx + movdqa %xmm0,%xmm9 + addl 8(%rsp),%ebp + andl %edx,%edi + andl %ecx,%esi + rorl $7,%ebx + addl %edi,%ebp + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%esi + xorl %ecx,%ebx + addl 12(%rsp),%edx + andl %ecx,%esi + andl %ebx,%edi + rorl $7,%eax + addl %esi,%edx + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%edi + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,207,8 + xorl %ebx,%eax + addl 16(%rsp),%ecx + andl %ebx,%edi + pxor %xmm2,%xmm1 + andl %eax,%esi + rorl $7,%ebp + movdqa %xmm10,%xmm8 + paddd %xmm0,%xmm10 + addl %edi,%ecx + movl %edx,%edi + pxor %xmm9,%xmm1 + roll $5,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movdqa %xmm1,%xmm9 + movdqa %xmm10,0(%rsp) + movl %ebp,%esi + xorl %eax,%ebp + addl 20(%rsp),%ebx + andl %eax,%esi + pslld $2,%xmm1 + andl %ebp,%edi + rorl $7,%edx + psrld $30,%xmm9 + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + por %xmm9,%xmm1 + movl %edx,%edi + xorl %ebp,%edx + movdqa %xmm1,%xmm10 + addl 24(%rsp),%eax + andl %ebp,%edi + andl %edx,%esi + rorl $7,%ecx + addl %edi,%eax + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movl %ecx,%esi + xorl %edx,%ecx + addl 28(%rsp),%ebp + andl %edx,%esi + andl %ecx,%edi + rorl $7,%ebx + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%edi + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,208,8 + xorl %ecx,%ebx + addl 32(%rsp),%edx + andl %ecx,%edi + pxor %xmm3,%xmm2 + andl %ebx,%esi + rorl $7,%eax + movdqa %xmm8,%xmm9 + paddd %xmm1,%xmm8 + addl %edi,%edx + movl %ebp,%edi + pxor %xmm10,%xmm2 + roll $5,%ebp + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movdqa %xmm2,%xmm10 + movdqa %xmm8,16(%rsp) + movl %eax,%esi + xorl %ebx,%eax + addl 36(%rsp),%ecx + andl %ebx,%esi + pslld $2,%xmm2 + andl %eax,%edi + rorl $7,%ebp + psrld $30,%xmm10 + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + por %xmm10,%xmm2 + movl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm2,%xmm8 + addl 40(%rsp),%ebx + andl %eax,%edi + andl %ebp,%esi + rorl $7,%edx + addl %edi,%ebx + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%esi + xorl %ebp,%edx + addl 44(%rsp),%eax + andl %ebp,%esi + andl %edx,%edi + rorl $7,%ecx + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + addl 48(%rsp),%ebp + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,193,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm4,%xmm3 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm10 + paddd %xmm2,%xmm9 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm8,%xmm3 + addl 52(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm3,%xmm8 + movdqa %xmm9,32(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm3 + addl 56(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm8 + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm8,%xmm3 + addl 60(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 0(%rsp),%eax + paddd %xmm3,%xmm10 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + movdqa %xmm10,48(%rsp) + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 4(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 8(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 12(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + cmpq %r10,%r9 + je .Ldone_ssse3 + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 + addl 16(%rsp),%ebx + xorl %eax,%esi +.byte 102,15,56,0,206 + movl %ecx,%edi + roll $5,%ecx + paddd %xmm9,%xmm0 + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + movdqa %xmm0,0(%rsp) + addl 20(%rsp),%eax + xorl %ebp,%edi + psubd %xmm9,%xmm0 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi +.byte 102,15,56,0,214 + movl %edx,%edi + roll $5,%edx + paddd %xmm9,%xmm1 + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + movdqa %xmm1,16(%rsp) + addl 36(%rsp),%ebx + xorl %eax,%edi + psubd %xmm9,%xmm1 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi +.byte 102,15,56,0,222 + movl %ebp,%edi + roll $5,%ebp + paddd %xmm9,%xmm2 + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + movdqa %xmm2,32(%rsp) + addl 52(%rsp),%ecx + xorl %ebx,%edi + psubd %xmm9,%xmm2 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + addl 12(%r8),%edx + movl %eax,0(%r8) + addl 16(%r8),%ebp + movl %esi,4(%r8) + movl %esi,%ebx + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + jmp .Loop_ssse3 + +.p2align 4 +.Ldone_ssse3: + addl 16(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 20(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + addl 36(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 52(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + movl %eax,0(%r8) + addl 12(%r8),%edx + movl %esi,4(%r8) + addl 16(%r8),%ebp + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + movaps 64+0(%rsp),%xmm6 + movaps 64+16(%rsp),%xmm7 + movaps 64+32(%rsp),%xmm8 + movaps 64+48(%rsp),%xmm9 + movaps 64+64(%rsp),%xmm10 + leaq 144(%rsp),%rsi + movq 0(%rsi),%r12 + movq 8(%rsi),%rbp + movq 16(%rsi),%rbx + leaq 24(%rsi),%rsp +.Lepilogue_ssse3: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + retq +.LSEH_end_sha1_block_data_order_ssse3: +.p2align 6 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 + +.def se_handler; .scl 3; .type 32; .endef +.p2align 4 +se_handler: + pushq %rsi + pushq %rdi + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushfq + subq $64,%rsp + + movq 120(%r8),%rax + movq 248(%r8),%rbx + + leaq .Lprologue(%rip),%r10 + cmpq %r10,%rbx + jb .Lcommon_seh_tail + + movq 152(%r8),%rax + + leaq .Lepilogue(%rip),%r10 + cmpq %r10,%rbx + jae .Lcommon_seh_tail + + movq 64(%rax),%rax + leaq 32(%rax),%rax + + movq -8(%rax),%rbx + movq -16(%rax),%rbp + movq -24(%rax),%r12 + movq -32(%rax),%r13 + movq %rbx,144(%r8) + movq %rbp,160(%r8) + movq %r12,216(%r8) + movq %r13,224(%r8) + + jmp .Lcommon_seh_tail + + +.def ssse3_handler; .scl 3; .type 32; .endef +.p2align 4 +ssse3_handler: + pushq %rsi + pushq %rdi + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushfq + subq $64,%rsp + + movq 120(%r8),%rax + movq 248(%r8),%rbx + + movq 8(%r9),%rsi + movq 56(%r9),%r11 + + movl 0(%r11),%r10d + leaq (%rsi,%r10,1),%r10 + cmpq %r10,%rbx + jb .Lcommon_seh_tail + + movq 152(%r8),%rax + + movl 4(%r11),%r10d + leaq (%rsi,%r10,1),%r10 + cmpq %r10,%rbx + jae .Lcommon_seh_tail + + leaq 64(%rax),%rsi + leaq 512(%r8),%rdi + movl $10,%ecx +.long 0xa548f3fc + leaq 168(%rax),%rax + + movq -8(%rax),%rbx + movq -16(%rax),%rbp + movq -24(%rax),%r12 + movq %rbx,144(%r8) + movq %rbp,160(%r8) + movq %r12,216(%r8) + +.Lcommon_seh_tail: + movq 8(%rax),%rdi + movq 16(%rax),%rsi + movq %rax,152(%r8) + movq %rsi,168(%r8) + movq %rdi,176(%r8) + + movq 40(%r9),%rdi + movq %r8,%rsi + movl $154,%ecx +.long 0xa548f3fc + + movq %r9,%rsi + xorq %rcx,%rcx + movq 8(%rsi),%rdx + movq 0(%rsi),%r8 + movq 16(%rsi),%r9 + movq 40(%rsi),%r10 + leaq 56(%rsi),%r11 + leaq 24(%rsi),%r12 + movq %r10,32(%rsp) + movq %r11,40(%rsp) + movq %r12,48(%rsp) + movq %rcx,56(%rsp) + call *__imp_RtlVirtualUnwind(%rip) + + movl $1,%eax + addq $64,%rsp + popfq + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + popq %rdi + popq %rsi + retq + + +.section .pdata +.p2align 2 +.rva .LSEH_begin_sha1_block_data_order +.rva .LSEH_end_sha1_block_data_order +.rva .LSEH_info_sha1_block_data_order +.rva .LSEH_begin_sha1_block_data_order_ssse3 +.rva .LSEH_end_sha1_block_data_order_ssse3 +.rva .LSEH_info_sha1_block_data_order_ssse3 +.section .xdata +.p2align 3 +.LSEH_info_sha1_block_data_order: +.byte 9,0,0,0 +.rva se_handler +.LSEH_info_sha1_block_data_order_ssse3: +.byte 9,0,0,0 +.rva ssse3_handler +.rva .Lprologue_ssse3,.Lepilogue_ssse3 diff --git a/ext/libressl/crypto/sha/sha1_one.c b/ext/libressl/crypto/sha/sha1_one.c new file mode 100644 index 0000000..57e5220 --- /dev/null +++ b/ext/libressl/crypto/sha/sha1_one.c @@ -0,0 +1,81 @@ +/* $OpenBSD: sha1_one.c,v 1.12 2015/09/10 15:56:26 jsing Exp $ */ +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + +#include +#include + +#include + +#include +#include + +#ifndef OPENSSL_NO_SHA1 +unsigned char *SHA1(const unsigned char *d, size_t n, unsigned char *md) + { + SHA_CTX c; + static unsigned char m[SHA_DIGEST_LENGTH]; + + if (md == NULL) md=m; + if (!SHA1_Init(&c)) + return NULL; + SHA1_Update(&c,d,n); + SHA1_Final(md,&c); + explicit_bzero(&c,sizeof(c)); + return(md); + } +#endif diff --git a/ext/libressl/crypto/sha/sha1dgst.c b/ext/libressl/crypto/sha/sha1dgst.c new file mode 100644 index 0000000..0c3df49 --- /dev/null +++ b/ext/libressl/crypto/sha/sha1dgst.c @@ -0,0 +1,72 @@ +/* $OpenBSD: sha1dgst.c,v 1.14 2015/09/13 21:09:56 doug Exp $ */ +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + +#include + +#include + +#if !defined(OPENSSL_NO_SHA1) && !defined(OPENSSL_NO_SHA) + +#include + +/* The implementation is in ../md32_common.h */ + +#include "sha_locl.h" + +#endif + diff --git a/ext/libressl/crypto/sha/sha256-elf-armv4.S b/ext/libressl/crypto/sha/sha256-elf-armv4.S new file mode 100644 index 0000000..9b155c7 --- /dev/null +++ b/ext/libressl/crypto/sha/sha256-elf-armv4.S @@ -0,0 +1,1520 @@ +#include "arm_arch.h" + +.text +.code 32 + +.type K256,%object +.align 5 +K256: +.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.size K256,.-K256 + +.global sha256_block_data_order +.type sha256_block_data_order,%function +sha256_block_data_order: + sub r3,pc,#8 @ sha256_block_data_order + add r2,r1,r2,lsl#6 @ len to point at the end of inp + stmdb sp!,{r0,r1,r2,r4-r11,lr} + ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} + sub r14,r3,#256 @ K256 + sub sp,sp,#16*4 @ alloca(X[16]) +.Loop: +#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) + ldr r3,[r1],#4 +#else + ldrb r3,[r1,#3] @ 0 + ldrb r12,[r1,#2] + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 +#endif + mov r0,r8,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r8,ror#11 + eor r2,r9,r10 +#if 0>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 0==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r8,ror#25 @ Sigma1(e) + and r2,r2,r8 + str r3,[sp,#0*4] + add r3,r3,r0 + eor r2,r2,r10 @ Ch(e,f,g) + add r3,r3,r11 + mov r11,r4,ror#2 + add r3,r3,r2 + eor r11,r11,r4,ror#13 + add r3,r3,r12 + eor r11,r11,r4,ror#22 @ Sigma0(a) +#if 0>=15 + ldr r1,[sp,#2*4] @ from BODY_16_xx +#endif + orr r0,r4,r5 + and r2,r4,r5 + and r0,r0,r6 + add r11,r11,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r7,r7,r3 + add r11,r11,r0 +#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) + ldr r3,[r1],#4 +#else + ldrb r3,[r1,#3] @ 1 + ldrb r12,[r1,#2] + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 +#endif + mov r0,r7,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r7,ror#11 + eor r2,r8,r9 +#if 1>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 1==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r7,ror#25 @ Sigma1(e) + and r2,r2,r7 + str r3,[sp,#1*4] + add r3,r3,r0 + eor r2,r2,r9 @ Ch(e,f,g) + add r3,r3,r10 + mov r10,r11,ror#2 + add r3,r3,r2 + eor r10,r10,r11,ror#13 + add r3,r3,r12 + eor r10,r10,r11,ror#22 @ Sigma0(a) +#if 1>=15 + ldr r1,[sp,#3*4] @ from BODY_16_xx +#endif + orr r0,r11,r4 + and r2,r11,r4 + and r0,r0,r5 + add r10,r10,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r6,r6,r3 + add r10,r10,r0 +#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) + ldr r3,[r1],#4 +#else + ldrb r3,[r1,#3] @ 2 + ldrb r12,[r1,#2] + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 +#endif + mov r0,r6,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r6,ror#11 + eor r2,r7,r8 +#if 2>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 2==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r6,ror#25 @ Sigma1(e) + and r2,r2,r6 + str r3,[sp,#2*4] + add r3,r3,r0 + eor r2,r2,r8 @ Ch(e,f,g) + add r3,r3,r9 + mov r9,r10,ror#2 + add r3,r3,r2 + eor r9,r9,r10,ror#13 + add r3,r3,r12 + eor r9,r9,r10,ror#22 @ Sigma0(a) +#if 2>=15 + ldr r1,[sp,#4*4] @ from BODY_16_xx +#endif + orr r0,r10,r11 + and r2,r10,r11 + and r0,r0,r4 + add r9,r9,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r5,r5,r3 + add r9,r9,r0 +#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) + ldr r3,[r1],#4 +#else + ldrb r3,[r1,#3] @ 3 + ldrb r12,[r1,#2] + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 +#endif + mov r0,r5,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r5,ror#11 + eor r2,r6,r7 +#if 3>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 3==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r5,ror#25 @ Sigma1(e) + and r2,r2,r5 + str r3,[sp,#3*4] + add r3,r3,r0 + eor r2,r2,r7 @ Ch(e,f,g) + add r3,r3,r8 + mov r8,r9,ror#2 + add r3,r3,r2 + eor r8,r8,r9,ror#13 + add r3,r3,r12 + eor r8,r8,r9,ror#22 @ Sigma0(a) +#if 3>=15 + ldr r1,[sp,#5*4] @ from BODY_16_xx +#endif + orr r0,r9,r10 + and r2,r9,r10 + and r0,r0,r11 + add r8,r8,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r4,r4,r3 + add r8,r8,r0 +#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) + ldr r3,[r1],#4 +#else + ldrb r3,[r1,#3] @ 4 + ldrb r12,[r1,#2] + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 +#endif + mov r0,r4,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r4,ror#11 + eor r2,r5,r6 +#if 4>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 4==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r4,ror#25 @ Sigma1(e) + and r2,r2,r4 + str r3,[sp,#4*4] + add r3,r3,r0 + eor r2,r2,r6 @ Ch(e,f,g) + add r3,r3,r7 + mov r7,r8,ror#2 + add r3,r3,r2 + eor r7,r7,r8,ror#13 + add r3,r3,r12 + eor r7,r7,r8,ror#22 @ Sigma0(a) +#if 4>=15 + ldr r1,[sp,#6*4] @ from BODY_16_xx +#endif + orr r0,r8,r9 + and r2,r8,r9 + and r0,r0,r10 + add r7,r7,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r11,r11,r3 + add r7,r7,r0 +#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) + ldr r3,[r1],#4 +#else + ldrb r3,[r1,#3] @ 5 + ldrb r12,[r1,#2] + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 +#endif + mov r0,r11,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r11,ror#11 + eor r2,r4,r5 +#if 5>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 5==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r11,ror#25 @ Sigma1(e) + and r2,r2,r11 + str r3,[sp,#5*4] + add r3,r3,r0 + eor r2,r2,r5 @ Ch(e,f,g) + add r3,r3,r6 + mov r6,r7,ror#2 + add r3,r3,r2 + eor r6,r6,r7,ror#13 + add r3,r3,r12 + eor r6,r6,r7,ror#22 @ Sigma0(a) +#if 5>=15 + ldr r1,[sp,#7*4] @ from BODY_16_xx +#endif + orr r0,r7,r8 + and r2,r7,r8 + and r0,r0,r9 + add r6,r6,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r10,r10,r3 + add r6,r6,r0 +#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) + ldr r3,[r1],#4 +#else + ldrb r3,[r1,#3] @ 6 + ldrb r12,[r1,#2] + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 +#endif + mov r0,r10,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r10,ror#11 + eor r2,r11,r4 +#if 6>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 6==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r10,ror#25 @ Sigma1(e) + and r2,r2,r10 + str r3,[sp,#6*4] + add r3,r3,r0 + eor r2,r2,r4 @ Ch(e,f,g) + add r3,r3,r5 + mov r5,r6,ror#2 + add r3,r3,r2 + eor r5,r5,r6,ror#13 + add r3,r3,r12 + eor r5,r5,r6,ror#22 @ Sigma0(a) +#if 6>=15 + ldr r1,[sp,#8*4] @ from BODY_16_xx +#endif + orr r0,r6,r7 + and r2,r6,r7 + and r0,r0,r8 + add r5,r5,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r9,r9,r3 + add r5,r5,r0 +#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) + ldr r3,[r1],#4 +#else + ldrb r3,[r1,#3] @ 7 + ldrb r12,[r1,#2] + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 +#endif + mov r0,r9,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r9,ror#11 + eor r2,r10,r11 +#if 7>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 7==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r9,ror#25 @ Sigma1(e) + and r2,r2,r9 + str r3,[sp,#7*4] + add r3,r3,r0 + eor r2,r2,r11 @ Ch(e,f,g) + add r3,r3,r4 + mov r4,r5,ror#2 + add r3,r3,r2 + eor r4,r4,r5,ror#13 + add r3,r3,r12 + eor r4,r4,r5,ror#22 @ Sigma0(a) +#if 7>=15 + ldr r1,[sp,#9*4] @ from BODY_16_xx +#endif + orr r0,r5,r6 + and r2,r5,r6 + and r0,r0,r7 + add r4,r4,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r8,r8,r3 + add r4,r4,r0 +#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) + ldr r3,[r1],#4 +#else + ldrb r3,[r1,#3] @ 8 + ldrb r12,[r1,#2] + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 +#endif + mov r0,r8,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r8,ror#11 + eor r2,r9,r10 +#if 8>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 8==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r8,ror#25 @ Sigma1(e) + and r2,r2,r8 + str r3,[sp,#8*4] + add r3,r3,r0 + eor r2,r2,r10 @ Ch(e,f,g) + add r3,r3,r11 + mov r11,r4,ror#2 + add r3,r3,r2 + eor r11,r11,r4,ror#13 + add r3,r3,r12 + eor r11,r11,r4,ror#22 @ Sigma0(a) +#if 8>=15 + ldr r1,[sp,#10*4] @ from BODY_16_xx +#endif + orr r0,r4,r5 + and r2,r4,r5 + and r0,r0,r6 + add r11,r11,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r7,r7,r3 + add r11,r11,r0 +#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) + ldr r3,[r1],#4 +#else + ldrb r3,[r1,#3] @ 9 + ldrb r12,[r1,#2] + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 +#endif + mov r0,r7,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r7,ror#11 + eor r2,r8,r9 +#if 9>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 9==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r7,ror#25 @ Sigma1(e) + and r2,r2,r7 + str r3,[sp,#9*4] + add r3,r3,r0 + eor r2,r2,r9 @ Ch(e,f,g) + add r3,r3,r10 + mov r10,r11,ror#2 + add r3,r3,r2 + eor r10,r10,r11,ror#13 + add r3,r3,r12 + eor r10,r10,r11,ror#22 @ Sigma0(a) +#if 9>=15 + ldr r1,[sp,#11*4] @ from BODY_16_xx +#endif + orr r0,r11,r4 + and r2,r11,r4 + and r0,r0,r5 + add r10,r10,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r6,r6,r3 + add r10,r10,r0 +#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) + ldr r3,[r1],#4 +#else + ldrb r3,[r1,#3] @ 10 + ldrb r12,[r1,#2] + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 +#endif + mov r0,r6,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r6,ror#11 + eor r2,r7,r8 +#if 10>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 10==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r6,ror#25 @ Sigma1(e) + and r2,r2,r6 + str r3,[sp,#10*4] + add r3,r3,r0 + eor r2,r2,r8 @ Ch(e,f,g) + add r3,r3,r9 + mov r9,r10,ror#2 + add r3,r3,r2 + eor r9,r9,r10,ror#13 + add r3,r3,r12 + eor r9,r9,r10,ror#22 @ Sigma0(a) +#if 10>=15 + ldr r1,[sp,#12*4] @ from BODY_16_xx +#endif + orr r0,r10,r11 + and r2,r10,r11 + and r0,r0,r4 + add r9,r9,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r5,r5,r3 + add r9,r9,r0 +#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) + ldr r3,[r1],#4 +#else + ldrb r3,[r1,#3] @ 11 + ldrb r12,[r1,#2] + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 +#endif + mov r0,r5,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r5,ror#11 + eor r2,r6,r7 +#if 11>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 11==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r5,ror#25 @ Sigma1(e) + and r2,r2,r5 + str r3,[sp,#11*4] + add r3,r3,r0 + eor r2,r2,r7 @ Ch(e,f,g) + add r3,r3,r8 + mov r8,r9,ror#2 + add r3,r3,r2 + eor r8,r8,r9,ror#13 + add r3,r3,r12 + eor r8,r8,r9,ror#22 @ Sigma0(a) +#if 11>=15 + ldr r1,[sp,#13*4] @ from BODY_16_xx +#endif + orr r0,r9,r10 + and r2,r9,r10 + and r0,r0,r11 + add r8,r8,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r4,r4,r3 + add r8,r8,r0 +#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) + ldr r3,[r1],#4 +#else + ldrb r3,[r1,#3] @ 12 + ldrb r12,[r1,#2] + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 +#endif + mov r0,r4,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r4,ror#11 + eor r2,r5,r6 +#if 12>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 12==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r4,ror#25 @ Sigma1(e) + and r2,r2,r4 + str r3,[sp,#12*4] + add r3,r3,r0 + eor r2,r2,r6 @ Ch(e,f,g) + add r3,r3,r7 + mov r7,r8,ror#2 + add r3,r3,r2 + eor r7,r7,r8,ror#13 + add r3,r3,r12 + eor r7,r7,r8,ror#22 @ Sigma0(a) +#if 12>=15 + ldr r1,[sp,#14*4] @ from BODY_16_xx +#endif + orr r0,r8,r9 + and r2,r8,r9 + and r0,r0,r10 + add r7,r7,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r11,r11,r3 + add r7,r7,r0 +#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) + ldr r3,[r1],#4 +#else + ldrb r3,[r1,#3] @ 13 + ldrb r12,[r1,#2] + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 +#endif + mov r0,r11,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r11,ror#11 + eor r2,r4,r5 +#if 13>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 13==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r11,ror#25 @ Sigma1(e) + and r2,r2,r11 + str r3,[sp,#13*4] + add r3,r3,r0 + eor r2,r2,r5 @ Ch(e,f,g) + add r3,r3,r6 + mov r6,r7,ror#2 + add r3,r3,r2 + eor r6,r6,r7,ror#13 + add r3,r3,r12 + eor r6,r6,r7,ror#22 @ Sigma0(a) +#if 13>=15 + ldr r1,[sp,#15*4] @ from BODY_16_xx +#endif + orr r0,r7,r8 + and r2,r7,r8 + and r0,r0,r9 + add r6,r6,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r10,r10,r3 + add r6,r6,r0 +#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) + ldr r3,[r1],#4 +#else + ldrb r3,[r1,#3] @ 14 + ldrb r12,[r1,#2] + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 +#endif + mov r0,r10,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r10,ror#11 + eor r2,r11,r4 +#if 14>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 14==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r10,ror#25 @ Sigma1(e) + and r2,r2,r10 + str r3,[sp,#14*4] + add r3,r3,r0 + eor r2,r2,r4 @ Ch(e,f,g) + add r3,r3,r5 + mov r5,r6,ror#2 + add r3,r3,r2 + eor r5,r5,r6,ror#13 + add r3,r3,r12 + eor r5,r5,r6,ror#22 @ Sigma0(a) +#if 14>=15 + ldr r1,[sp,#0*4] @ from BODY_16_xx +#endif + orr r0,r6,r7 + and r2,r6,r7 + and r0,r0,r8 + add r5,r5,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r9,r9,r3 + add r5,r5,r0 +#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) + ldr r3,[r1],#4 +#else + ldrb r3,[r1,#3] @ 15 + ldrb r12,[r1,#2] + ldrb r2,[r1,#1] + ldrb r0,[r1],#4 + orr r3,r3,r12,lsl#8 + orr r3,r3,r2,lsl#16 + orr r3,r3,r0,lsl#24 +#endif + mov r0,r9,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r9,ror#11 + eor r2,r10,r11 +#if 15>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 15==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r9,ror#25 @ Sigma1(e) + and r2,r2,r9 + str r3,[sp,#15*4] + add r3,r3,r0 + eor r2,r2,r11 @ Ch(e,f,g) + add r3,r3,r4 + mov r4,r5,ror#2 + add r3,r3,r2 + eor r4,r4,r5,ror#13 + add r3,r3,r12 + eor r4,r4,r5,ror#22 @ Sigma0(a) +#if 15>=15 + ldr r1,[sp,#1*4] @ from BODY_16_xx +#endif + orr r0,r5,r6 + and r2,r5,r6 + and r0,r0,r7 + add r4,r4,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r8,r8,r3 + add r4,r4,r0 +.Lrounds_16_xx: + @ ldr r1,[sp,#1*4] @ 16 + ldr r12,[sp,#14*4] + mov r0,r1,ror#7 + ldr r3,[sp,#0*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#9*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 + add r3,r3,r0 + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r8,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r8,ror#11 + eor r2,r9,r10 +#if 16>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 16==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r8,ror#25 @ Sigma1(e) + and r2,r2,r8 + str r3,[sp,#0*4] + add r3,r3,r0 + eor r2,r2,r10 @ Ch(e,f,g) + add r3,r3,r11 + mov r11,r4,ror#2 + add r3,r3,r2 + eor r11,r11,r4,ror#13 + add r3,r3,r12 + eor r11,r11,r4,ror#22 @ Sigma0(a) +#if 16>=15 + ldr r1,[sp,#2*4] @ from BODY_16_xx +#endif + orr r0,r4,r5 + and r2,r4,r5 + and r0,r0,r6 + add r11,r11,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r7,r7,r3 + add r11,r11,r0 + @ ldr r1,[sp,#2*4] @ 17 + ldr r12,[sp,#15*4] + mov r0,r1,ror#7 + ldr r3,[sp,#1*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#10*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 + add r3,r3,r0 + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r7,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r7,ror#11 + eor r2,r8,r9 +#if 17>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 17==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r7,ror#25 @ Sigma1(e) + and r2,r2,r7 + str r3,[sp,#1*4] + add r3,r3,r0 + eor r2,r2,r9 @ Ch(e,f,g) + add r3,r3,r10 + mov r10,r11,ror#2 + add r3,r3,r2 + eor r10,r10,r11,ror#13 + add r3,r3,r12 + eor r10,r10,r11,ror#22 @ Sigma0(a) +#if 17>=15 + ldr r1,[sp,#3*4] @ from BODY_16_xx +#endif + orr r0,r11,r4 + and r2,r11,r4 + and r0,r0,r5 + add r10,r10,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r6,r6,r3 + add r10,r10,r0 + @ ldr r1,[sp,#3*4] @ 18 + ldr r12,[sp,#0*4] + mov r0,r1,ror#7 + ldr r3,[sp,#2*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#11*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 + add r3,r3,r0 + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r6,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r6,ror#11 + eor r2,r7,r8 +#if 18>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 18==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r6,ror#25 @ Sigma1(e) + and r2,r2,r6 + str r3,[sp,#2*4] + add r3,r3,r0 + eor r2,r2,r8 @ Ch(e,f,g) + add r3,r3,r9 + mov r9,r10,ror#2 + add r3,r3,r2 + eor r9,r9,r10,ror#13 + add r3,r3,r12 + eor r9,r9,r10,ror#22 @ Sigma0(a) +#if 18>=15 + ldr r1,[sp,#4*4] @ from BODY_16_xx +#endif + orr r0,r10,r11 + and r2,r10,r11 + and r0,r0,r4 + add r9,r9,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r5,r5,r3 + add r9,r9,r0 + @ ldr r1,[sp,#4*4] @ 19 + ldr r12,[sp,#1*4] + mov r0,r1,ror#7 + ldr r3,[sp,#3*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#12*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 + add r3,r3,r0 + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r5,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r5,ror#11 + eor r2,r6,r7 +#if 19>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 19==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r5,ror#25 @ Sigma1(e) + and r2,r2,r5 + str r3,[sp,#3*4] + add r3,r3,r0 + eor r2,r2,r7 @ Ch(e,f,g) + add r3,r3,r8 + mov r8,r9,ror#2 + add r3,r3,r2 + eor r8,r8,r9,ror#13 + add r3,r3,r12 + eor r8,r8,r9,ror#22 @ Sigma0(a) +#if 19>=15 + ldr r1,[sp,#5*4] @ from BODY_16_xx +#endif + orr r0,r9,r10 + and r2,r9,r10 + and r0,r0,r11 + add r8,r8,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r4,r4,r3 + add r8,r8,r0 + @ ldr r1,[sp,#5*4] @ 20 + ldr r12,[sp,#2*4] + mov r0,r1,ror#7 + ldr r3,[sp,#4*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#13*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 + add r3,r3,r0 + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r4,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r4,ror#11 + eor r2,r5,r6 +#if 20>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 20==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r4,ror#25 @ Sigma1(e) + and r2,r2,r4 + str r3,[sp,#4*4] + add r3,r3,r0 + eor r2,r2,r6 @ Ch(e,f,g) + add r3,r3,r7 + mov r7,r8,ror#2 + add r3,r3,r2 + eor r7,r7,r8,ror#13 + add r3,r3,r12 + eor r7,r7,r8,ror#22 @ Sigma0(a) +#if 20>=15 + ldr r1,[sp,#6*4] @ from BODY_16_xx +#endif + orr r0,r8,r9 + and r2,r8,r9 + and r0,r0,r10 + add r7,r7,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r11,r11,r3 + add r7,r7,r0 + @ ldr r1,[sp,#6*4] @ 21 + ldr r12,[sp,#3*4] + mov r0,r1,ror#7 + ldr r3,[sp,#5*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#14*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 + add r3,r3,r0 + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r11,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r11,ror#11 + eor r2,r4,r5 +#if 21>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 21==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r11,ror#25 @ Sigma1(e) + and r2,r2,r11 + str r3,[sp,#5*4] + add r3,r3,r0 + eor r2,r2,r5 @ Ch(e,f,g) + add r3,r3,r6 + mov r6,r7,ror#2 + add r3,r3,r2 + eor r6,r6,r7,ror#13 + add r3,r3,r12 + eor r6,r6,r7,ror#22 @ Sigma0(a) +#if 21>=15 + ldr r1,[sp,#7*4] @ from BODY_16_xx +#endif + orr r0,r7,r8 + and r2,r7,r8 + and r0,r0,r9 + add r6,r6,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r10,r10,r3 + add r6,r6,r0 + @ ldr r1,[sp,#7*4] @ 22 + ldr r12,[sp,#4*4] + mov r0,r1,ror#7 + ldr r3,[sp,#6*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#15*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 + add r3,r3,r0 + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r10,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r10,ror#11 + eor r2,r11,r4 +#if 22>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 22==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r10,ror#25 @ Sigma1(e) + and r2,r2,r10 + str r3,[sp,#6*4] + add r3,r3,r0 + eor r2,r2,r4 @ Ch(e,f,g) + add r3,r3,r5 + mov r5,r6,ror#2 + add r3,r3,r2 + eor r5,r5,r6,ror#13 + add r3,r3,r12 + eor r5,r5,r6,ror#22 @ Sigma0(a) +#if 22>=15 + ldr r1,[sp,#8*4] @ from BODY_16_xx +#endif + orr r0,r6,r7 + and r2,r6,r7 + and r0,r0,r8 + add r5,r5,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r9,r9,r3 + add r5,r5,r0 + @ ldr r1,[sp,#8*4] @ 23 + ldr r12,[sp,#5*4] + mov r0,r1,ror#7 + ldr r3,[sp,#7*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#0*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 + add r3,r3,r0 + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r9,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r9,ror#11 + eor r2,r10,r11 +#if 23>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 23==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r9,ror#25 @ Sigma1(e) + and r2,r2,r9 + str r3,[sp,#7*4] + add r3,r3,r0 + eor r2,r2,r11 @ Ch(e,f,g) + add r3,r3,r4 + mov r4,r5,ror#2 + add r3,r3,r2 + eor r4,r4,r5,ror#13 + add r3,r3,r12 + eor r4,r4,r5,ror#22 @ Sigma0(a) +#if 23>=15 + ldr r1,[sp,#9*4] @ from BODY_16_xx +#endif + orr r0,r5,r6 + and r2,r5,r6 + and r0,r0,r7 + add r4,r4,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r8,r8,r3 + add r4,r4,r0 + @ ldr r1,[sp,#9*4] @ 24 + ldr r12,[sp,#6*4] + mov r0,r1,ror#7 + ldr r3,[sp,#8*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#1*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 + add r3,r3,r0 + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r8,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r8,ror#11 + eor r2,r9,r10 +#if 24>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 24==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r8,ror#25 @ Sigma1(e) + and r2,r2,r8 + str r3,[sp,#8*4] + add r3,r3,r0 + eor r2,r2,r10 @ Ch(e,f,g) + add r3,r3,r11 + mov r11,r4,ror#2 + add r3,r3,r2 + eor r11,r11,r4,ror#13 + add r3,r3,r12 + eor r11,r11,r4,ror#22 @ Sigma0(a) +#if 24>=15 + ldr r1,[sp,#10*4] @ from BODY_16_xx +#endif + orr r0,r4,r5 + and r2,r4,r5 + and r0,r0,r6 + add r11,r11,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r7,r7,r3 + add r11,r11,r0 + @ ldr r1,[sp,#10*4] @ 25 + ldr r12,[sp,#7*4] + mov r0,r1,ror#7 + ldr r3,[sp,#9*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#2*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 + add r3,r3,r0 + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r7,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r7,ror#11 + eor r2,r8,r9 +#if 25>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 25==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r7,ror#25 @ Sigma1(e) + and r2,r2,r7 + str r3,[sp,#9*4] + add r3,r3,r0 + eor r2,r2,r9 @ Ch(e,f,g) + add r3,r3,r10 + mov r10,r11,ror#2 + add r3,r3,r2 + eor r10,r10,r11,ror#13 + add r3,r3,r12 + eor r10,r10,r11,ror#22 @ Sigma0(a) +#if 25>=15 + ldr r1,[sp,#11*4] @ from BODY_16_xx +#endif + orr r0,r11,r4 + and r2,r11,r4 + and r0,r0,r5 + add r10,r10,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r6,r6,r3 + add r10,r10,r0 + @ ldr r1,[sp,#11*4] @ 26 + ldr r12,[sp,#8*4] + mov r0,r1,ror#7 + ldr r3,[sp,#10*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#3*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 + add r3,r3,r0 + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r6,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r6,ror#11 + eor r2,r7,r8 +#if 26>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 26==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r6,ror#25 @ Sigma1(e) + and r2,r2,r6 + str r3,[sp,#10*4] + add r3,r3,r0 + eor r2,r2,r8 @ Ch(e,f,g) + add r3,r3,r9 + mov r9,r10,ror#2 + add r3,r3,r2 + eor r9,r9,r10,ror#13 + add r3,r3,r12 + eor r9,r9,r10,ror#22 @ Sigma0(a) +#if 26>=15 + ldr r1,[sp,#12*4] @ from BODY_16_xx +#endif + orr r0,r10,r11 + and r2,r10,r11 + and r0,r0,r4 + add r9,r9,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r5,r5,r3 + add r9,r9,r0 + @ ldr r1,[sp,#12*4] @ 27 + ldr r12,[sp,#9*4] + mov r0,r1,ror#7 + ldr r3,[sp,#11*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#4*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 + add r3,r3,r0 + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r5,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r5,ror#11 + eor r2,r6,r7 +#if 27>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 27==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r5,ror#25 @ Sigma1(e) + and r2,r2,r5 + str r3,[sp,#11*4] + add r3,r3,r0 + eor r2,r2,r7 @ Ch(e,f,g) + add r3,r3,r8 + mov r8,r9,ror#2 + add r3,r3,r2 + eor r8,r8,r9,ror#13 + add r3,r3,r12 + eor r8,r8,r9,ror#22 @ Sigma0(a) +#if 27>=15 + ldr r1,[sp,#13*4] @ from BODY_16_xx +#endif + orr r0,r9,r10 + and r2,r9,r10 + and r0,r0,r11 + add r8,r8,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r4,r4,r3 + add r8,r8,r0 + @ ldr r1,[sp,#13*4] @ 28 + ldr r12,[sp,#10*4] + mov r0,r1,ror#7 + ldr r3,[sp,#12*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#5*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 + add r3,r3,r0 + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r4,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r4,ror#11 + eor r2,r5,r6 +#if 28>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 28==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r4,ror#25 @ Sigma1(e) + and r2,r2,r4 + str r3,[sp,#12*4] + add r3,r3,r0 + eor r2,r2,r6 @ Ch(e,f,g) + add r3,r3,r7 + mov r7,r8,ror#2 + add r3,r3,r2 + eor r7,r7,r8,ror#13 + add r3,r3,r12 + eor r7,r7,r8,ror#22 @ Sigma0(a) +#if 28>=15 + ldr r1,[sp,#14*4] @ from BODY_16_xx +#endif + orr r0,r8,r9 + and r2,r8,r9 + and r0,r0,r10 + add r7,r7,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r11,r11,r3 + add r7,r7,r0 + @ ldr r1,[sp,#14*4] @ 29 + ldr r12,[sp,#11*4] + mov r0,r1,ror#7 + ldr r3,[sp,#13*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#6*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 + add r3,r3,r0 + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r11,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r11,ror#11 + eor r2,r4,r5 +#if 29>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 29==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r11,ror#25 @ Sigma1(e) + and r2,r2,r11 + str r3,[sp,#13*4] + add r3,r3,r0 + eor r2,r2,r5 @ Ch(e,f,g) + add r3,r3,r6 + mov r6,r7,ror#2 + add r3,r3,r2 + eor r6,r6,r7,ror#13 + add r3,r3,r12 + eor r6,r6,r7,ror#22 @ Sigma0(a) +#if 29>=15 + ldr r1,[sp,#15*4] @ from BODY_16_xx +#endif + orr r0,r7,r8 + and r2,r7,r8 + and r0,r0,r9 + add r6,r6,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r10,r10,r3 + add r6,r6,r0 + @ ldr r1,[sp,#15*4] @ 30 + ldr r12,[sp,#12*4] + mov r0,r1,ror#7 + ldr r3,[sp,#14*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#7*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 + add r3,r3,r0 + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r10,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r10,ror#11 + eor r2,r11,r4 +#if 30>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 30==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r10,ror#25 @ Sigma1(e) + and r2,r2,r10 + str r3,[sp,#14*4] + add r3,r3,r0 + eor r2,r2,r4 @ Ch(e,f,g) + add r3,r3,r5 + mov r5,r6,ror#2 + add r3,r3,r2 + eor r5,r5,r6,ror#13 + add r3,r3,r12 + eor r5,r5,r6,ror#22 @ Sigma0(a) +#if 30>=15 + ldr r1,[sp,#0*4] @ from BODY_16_xx +#endif + orr r0,r6,r7 + and r2,r6,r7 + and r0,r0,r8 + add r5,r5,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r9,r9,r3 + add r5,r5,r0 + @ ldr r1,[sp,#0*4] @ 31 + ldr r12,[sp,#13*4] + mov r0,r1,ror#7 + ldr r3,[sp,#15*4] + eor r0,r0,r1,ror#18 + ldr r2,[sp,#8*4] + eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) + mov r1,r12,ror#17 + add r3,r3,r0 + eor r1,r1,r12,ror#19 + add r3,r3,r2 + eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) + @ add r3,r3,r1 + mov r0,r9,ror#6 + ldr r12,[r14],#4 @ *K256++ + eor r0,r0,r9,ror#11 + eor r2,r10,r11 +#if 31>=16 + add r3,r3,r1 @ from BODY_16_xx +#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT) + rev r3,r3 +#endif +#if 31==15 + str r1,[sp,#17*4] @ leave room for r1 +#endif + eor r0,r0,r9,ror#25 @ Sigma1(e) + and r2,r2,r9 + str r3,[sp,#15*4] + add r3,r3,r0 + eor r2,r2,r11 @ Ch(e,f,g) + add r3,r3,r4 + mov r4,r5,ror#2 + add r3,r3,r2 + eor r4,r4,r5,ror#13 + add r3,r3,r12 + eor r4,r4,r5,ror#22 @ Sigma0(a) +#if 31>=15 + ldr r1,[sp,#1*4] @ from BODY_16_xx +#endif + orr r0,r5,r6 + and r2,r5,r6 + and r0,r0,r7 + add r4,r4,r3 + orr r0,r0,r2 @ Maj(a,b,c) + add r8,r8,r3 + add r4,r4,r0 + and r12,r12,#0xff + cmp r12,#0xf2 + bne .Lrounds_16_xx + + ldr r3,[sp,#16*4] @ pull ctx + ldr r0,[r3,#0] + ldr r2,[r3,#4] + ldr r12,[r3,#8] + add r4,r4,r0 + ldr r0,[r3,#12] + add r5,r5,r2 + ldr r2,[r3,#16] + add r6,r6,r12 + ldr r12,[r3,#20] + add r7,r7,r0 + ldr r0,[r3,#24] + add r8,r8,r2 + ldr r2,[r3,#28] + add r9,r9,r12 + ldr r1,[sp,#17*4] @ pull inp + ldr r12,[sp,#18*4] @ pull inp+len + add r10,r10,r0 + add r11,r11,r2 + stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11} + cmp r1,r12 + sub r14,r14,#256 @ rewind Ktbl + bne .Loop + + add sp,sp,#19*4 @ destroy frame +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r11,pc} +#else + ldmia sp!,{r4-r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size sha256_block_data_order,.-sha256_block_data_order +.asciz "SHA256 block transform for ARMv4, CRYPTOGAMS by " +.align 2 +#if defined(HAVE_GNU_STACK) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/ext/libressl/crypto/sha/sha256-elf-x86_64.S b/ext/libressl/crypto/sha/sha256-elf-x86_64.S new file mode 100644 index 0000000..9eea6a7 --- /dev/null +++ b/ext/libressl/crypto/sha/sha256-elf-x86_64.S @@ -0,0 +1,1782 @@ +#include "x86_arch.h" +.text + +.globl sha256_block_data_order +.type sha256_block_data_order,@function +.align 16 +sha256_block_data_order: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $64+32,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) +.Lprologue: + + leaq K256(%rip),%rbp + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + jmp .Lloop + +.align 16 +.Lloop: + xorq %rdi,%rdi + movl 0(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + movl %r12d,0(%rsp) + + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d + + rorl $11,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + xorl %ecx,%r11d + xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d + + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d + + addl %r12d,%edx + addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d + + movl 4(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%r15d + movl %r12d,4(%rsp) + + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d + + rorl $11,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + xorl %ebx,%r10d + xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d + + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d + + addl %r12d,%ecx + addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d + + movl 8(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + movl %r12d,8(%rsp) + + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d + + rorl $11,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + xorl %eax,%r9d + xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d + + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d + + addl %r12d,%ebx + addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d + + movl 12(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%r15d + movl %r12d,12(%rsp) + + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d + + rorl $11,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + xorl %r11d,%r8d + xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d + + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d + + addl %r12d,%eax + addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d + + movl 16(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + movl %r12d,16(%rsp) + + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx + + rorl $11,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + xorl %r10d,%edx + xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d + + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx + + addl %r12d,%r11d + addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx + + movl 20(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%r15d + movl %r12d,20(%rsp) + + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx + + rorl $11,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + xorl %r9d,%ecx + xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d + + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx + + addl %r12d,%r10d + addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx + + movl 24(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + movl %r12d,24(%rsp) + + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx + + rorl $11,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + xorl %r8d,%ebx + xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d + + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx + + addl %r12d,%r9d + addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx + + movl 28(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%r15d + movl %r12d,28(%rsp) + + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax + + rorl $11,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + xorl %edx,%eax + xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d + + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax + + addl %r12d,%r8d + addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax + + movl 32(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + movl %r12d,32(%rsp) + + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d + + rorl $11,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + xorl %ecx,%r11d + xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d + + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d + + addl %r12d,%edx + addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d + + movl 36(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%r15d + movl %r12d,36(%rsp) + + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d + + rorl $11,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + xorl %ebx,%r10d + xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d + + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d + + addl %r12d,%ecx + addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d + + movl 40(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + movl %r12d,40(%rsp) + + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d + + rorl $11,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + xorl %eax,%r9d + xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d + + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d + + addl %r12d,%ebx + addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d + + movl 44(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%r15d + movl %r12d,44(%rsp) + + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d + + rorl $11,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + xorl %r11d,%r8d + xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d + + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d + + addl %r12d,%eax + addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d + + movl 48(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + movl %r12d,48(%rsp) + + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx + + rorl $11,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + xorl %r10d,%edx + xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d + + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx + + addl %r12d,%r11d + addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx + + movl 52(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%r15d + movl %r12d,52(%rsp) + + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx + + rorl $11,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + xorl %r9d,%ecx + xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d + + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx + + addl %r12d,%r10d + addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx + + movl 56(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + movl %r12d,56(%rsp) + + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx + + rorl $11,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + xorl %r8d,%ebx + xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d + + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx + + addl %r12d,%r9d + addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx + + movl 60(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%r15d + movl %r12d,60(%rsp) + + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax + + rorl $11,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + xorl %edx,%eax + xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d + + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax + + addl %r12d,%r8d + addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax + + jmp .Lrounds_16_xx +.align 16 +.Lrounds_16_xx: + movl 4(%rsp),%r13d + movl 56(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 36(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 0(%rsp),%r12d + movl %r8d,%r13d + addl %r14d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + movl %r12d,0(%rsp) + + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d + + rorl $11,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + xorl %ecx,%r11d + xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d + + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d + + addl %r12d,%edx + addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d + + movl 8(%rsp),%r13d + movl 60(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 40(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 4(%rsp),%r12d + movl %edx,%r13d + addl %r14d,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%r15d + movl %r12d,4(%rsp) + + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d + + rorl $11,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + xorl %ebx,%r10d + xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d + + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d + + addl %r12d,%ecx + addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d + + movl 12(%rsp),%r13d + movl 0(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 44(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 8(%rsp),%r12d + movl %ecx,%r13d + addl %r14d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + movl %r12d,8(%rsp) + + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d + + rorl $11,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + xorl %eax,%r9d + xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d + + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d + + addl %r12d,%ebx + addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d + + movl 16(%rsp),%r13d + movl 4(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 48(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 12(%rsp),%r12d + movl %ebx,%r13d + addl %r14d,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%r15d + movl %r12d,12(%rsp) + + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d + + rorl $11,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + xorl %r11d,%r8d + xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d + + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d + + addl %r12d,%eax + addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d + + movl 20(%rsp),%r13d + movl 8(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 52(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 16(%rsp),%r12d + movl %eax,%r13d + addl %r14d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + movl %r12d,16(%rsp) + + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx + + rorl $11,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + xorl %r10d,%edx + xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d + + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx + + addl %r12d,%r11d + addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx + + movl 24(%rsp),%r13d + movl 12(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 56(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 20(%rsp),%r12d + movl %r11d,%r13d + addl %r14d,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%r15d + movl %r12d,20(%rsp) + + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx + + rorl $11,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + xorl %r9d,%ecx + xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d + + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx + + addl %r12d,%r10d + addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx + + movl 28(%rsp),%r13d + movl 16(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 60(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 24(%rsp),%r12d + movl %r10d,%r13d + addl %r14d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + movl %r12d,24(%rsp) + + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx + + rorl $11,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + xorl %r8d,%ebx + xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d + + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx + + addl %r12d,%r9d + addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx + + movl 32(%rsp),%r13d + movl 20(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 0(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 28(%rsp),%r12d + movl %r9d,%r13d + addl %r14d,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%r15d + movl %r12d,28(%rsp) + + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax + + rorl $11,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + xorl %edx,%eax + xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d + + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax + + addl %r12d,%r8d + addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax + + movl 36(%rsp),%r13d + movl 24(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 4(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 32(%rsp),%r12d + movl %r8d,%r13d + addl %r14d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + movl %r12d,32(%rsp) + + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d + + rorl $11,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + xorl %ecx,%r11d + xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d + + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d + + addl %r12d,%edx + addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d + + movl 40(%rsp),%r13d + movl 28(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 8(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 36(%rsp),%r12d + movl %edx,%r13d + addl %r14d,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%r15d + movl %r12d,36(%rsp) + + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d + + rorl $11,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + xorl %ebx,%r10d + xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d + + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d + + addl %r12d,%ecx + addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d + + movl 44(%rsp),%r13d + movl 32(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 12(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 40(%rsp),%r12d + movl %ecx,%r13d + addl %r14d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + movl %r12d,40(%rsp) + + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d + + rorl $11,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + xorl %eax,%r9d + xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d + + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d + + addl %r12d,%ebx + addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d + + movl 48(%rsp),%r13d + movl 36(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 16(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 44(%rsp),%r12d + movl %ebx,%r13d + addl %r14d,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%r15d + movl %r12d,44(%rsp) + + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d + + rorl $11,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + xorl %r11d,%r8d + xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d + + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d + + addl %r12d,%eax + addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d + + movl 52(%rsp),%r13d + movl 40(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 20(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 48(%rsp),%r12d + movl %eax,%r13d + addl %r14d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + movl %r12d,48(%rsp) + + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx + + rorl $11,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + xorl %r10d,%edx + xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d + + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx + + addl %r12d,%r11d + addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx + + movl 56(%rsp),%r13d + movl 44(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 24(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 52(%rsp),%r12d + movl %r11d,%r13d + addl %r14d,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%r15d + movl %r12d,52(%rsp) + + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx + + rorl $11,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + xorl %r9d,%ecx + xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d + + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx + + addl %r12d,%r10d + addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx + + movl 60(%rsp),%r13d + movl 48(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 28(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 56(%rsp),%r12d + movl %r10d,%r13d + addl %r14d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + movl %r12d,56(%rsp) + + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx + + rorl $11,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + xorl %r8d,%ebx + xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d + + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx + + addl %r12d,%r9d + addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx + + movl 0(%rsp),%r13d + movl 52(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 32(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 60(%rsp),%r12d + movl %r9d,%r13d + addl %r14d,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%r15d + movl %r12d,60(%rsp) + + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax + + rorl $11,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + xorl %edx,%eax + xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d + + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax + + addl %r12d,%r8d + addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax + + cmpq $64,%rdi + jb .Lrounds_16_xx + + movq 64+0(%rsp),%rdi + leaq 64(%rsi),%rsi + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop + + movq 64+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue: + retq +.size sha256_block_data_order,.-sha256_block_data_order +.align 64 +.type K256,@object +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +#if defined(HAVE_GNU_STACK) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/ext/libressl/crypto/sha/sha256-macosx-x86_64.S b/ext/libressl/crypto/sha/sha256-macosx-x86_64.S new file mode 100644 index 0000000..4b468b7 --- /dev/null +++ b/ext/libressl/crypto/sha/sha256-macosx-x86_64.S @@ -0,0 +1,1779 @@ +#include "x86_arch.h" +.text + +.globl _sha256_block_data_order + +.p2align 4 +_sha256_block_data_order: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $64+32,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) +L$prologue: + + leaq K256(%rip),%rbp + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + jmp L$loop + +.p2align 4 +L$loop: + xorq %rdi,%rdi + movl 0(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + movl %r12d,0(%rsp) + + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d + + rorl $11,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + xorl %ecx,%r11d + xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d + + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d + + addl %r12d,%edx + addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d + + movl 4(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%r15d + movl %r12d,4(%rsp) + + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d + + rorl $11,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + xorl %ebx,%r10d + xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d + + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d + + addl %r12d,%ecx + addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d + + movl 8(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + movl %r12d,8(%rsp) + + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d + + rorl $11,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + xorl %eax,%r9d + xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d + + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d + + addl %r12d,%ebx + addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d + + movl 12(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%r15d + movl %r12d,12(%rsp) + + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d + + rorl $11,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + xorl %r11d,%r8d + xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d + + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d + + addl %r12d,%eax + addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d + + movl 16(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + movl %r12d,16(%rsp) + + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx + + rorl $11,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + xorl %r10d,%edx + xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d + + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx + + addl %r12d,%r11d + addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx + + movl 20(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%r15d + movl %r12d,20(%rsp) + + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx + + rorl $11,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + xorl %r9d,%ecx + xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d + + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx + + addl %r12d,%r10d + addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx + + movl 24(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + movl %r12d,24(%rsp) + + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx + + rorl $11,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + xorl %r8d,%ebx + xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d + + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx + + addl %r12d,%r9d + addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx + + movl 28(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%r15d + movl %r12d,28(%rsp) + + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax + + rorl $11,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + xorl %edx,%eax + xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d + + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax + + addl %r12d,%r8d + addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax + + movl 32(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + movl %r12d,32(%rsp) + + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d + + rorl $11,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + xorl %ecx,%r11d + xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d + + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d + + addl %r12d,%edx + addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d + + movl 36(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%r15d + movl %r12d,36(%rsp) + + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d + + rorl $11,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + xorl %ebx,%r10d + xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d + + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d + + addl %r12d,%ecx + addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d + + movl 40(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + movl %r12d,40(%rsp) + + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d + + rorl $11,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + xorl %eax,%r9d + xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d + + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d + + addl %r12d,%ebx + addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d + + movl 44(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%r15d + movl %r12d,44(%rsp) + + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d + + rorl $11,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + xorl %r11d,%r8d + xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d + + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d + + addl %r12d,%eax + addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d + + movl 48(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + movl %r12d,48(%rsp) + + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx + + rorl $11,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + xorl %r10d,%edx + xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d + + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx + + addl %r12d,%r11d + addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx + + movl 52(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%r15d + movl %r12d,52(%rsp) + + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx + + rorl $11,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + xorl %r9d,%ecx + xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d + + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx + + addl %r12d,%r10d + addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx + + movl 56(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + movl %r12d,56(%rsp) + + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx + + rorl $11,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + xorl %r8d,%ebx + xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d + + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx + + addl %r12d,%r9d + addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx + + movl 60(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%r15d + movl %r12d,60(%rsp) + + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax + + rorl $11,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + xorl %edx,%eax + xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d + + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax + + addl %r12d,%r8d + addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax + + jmp L$rounds_16_xx +.p2align 4 +L$rounds_16_xx: + movl 4(%rsp),%r13d + movl 56(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 36(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 0(%rsp),%r12d + movl %r8d,%r13d + addl %r14d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + movl %r12d,0(%rsp) + + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d + + rorl $11,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + xorl %ecx,%r11d + xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d + + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d + + addl %r12d,%edx + addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d + + movl 8(%rsp),%r13d + movl 60(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 40(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 4(%rsp),%r12d + movl %edx,%r13d + addl %r14d,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%r15d + movl %r12d,4(%rsp) + + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d + + rorl $11,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + xorl %ebx,%r10d + xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d + + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d + + addl %r12d,%ecx + addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d + + movl 12(%rsp),%r13d + movl 0(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 44(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 8(%rsp),%r12d + movl %ecx,%r13d + addl %r14d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + movl %r12d,8(%rsp) + + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d + + rorl $11,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + xorl %eax,%r9d + xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d + + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d + + addl %r12d,%ebx + addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d + + movl 16(%rsp),%r13d + movl 4(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 48(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 12(%rsp),%r12d + movl %ebx,%r13d + addl %r14d,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%r15d + movl %r12d,12(%rsp) + + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d + + rorl $11,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + xorl %r11d,%r8d + xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d + + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d + + addl %r12d,%eax + addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d + + movl 20(%rsp),%r13d + movl 8(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 52(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 16(%rsp),%r12d + movl %eax,%r13d + addl %r14d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + movl %r12d,16(%rsp) + + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx + + rorl $11,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + xorl %r10d,%edx + xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d + + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx + + addl %r12d,%r11d + addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx + + movl 24(%rsp),%r13d + movl 12(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 56(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 20(%rsp),%r12d + movl %r11d,%r13d + addl %r14d,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%r15d + movl %r12d,20(%rsp) + + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx + + rorl $11,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + xorl %r9d,%ecx + xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d + + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx + + addl %r12d,%r10d + addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx + + movl 28(%rsp),%r13d + movl 16(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 60(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 24(%rsp),%r12d + movl %r10d,%r13d + addl %r14d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + movl %r12d,24(%rsp) + + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx + + rorl $11,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + xorl %r8d,%ebx + xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d + + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx + + addl %r12d,%r9d + addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx + + movl 32(%rsp),%r13d + movl 20(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 0(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 28(%rsp),%r12d + movl %r9d,%r13d + addl %r14d,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%r15d + movl %r12d,28(%rsp) + + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax + + rorl $11,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + xorl %edx,%eax + xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d + + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax + + addl %r12d,%r8d + addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax + + movl 36(%rsp),%r13d + movl 24(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 4(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 32(%rsp),%r12d + movl %r8d,%r13d + addl %r14d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + movl %r12d,32(%rsp) + + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d + + rorl $11,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + xorl %ecx,%r11d + xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d + + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d + + addl %r12d,%edx + addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d + + movl 40(%rsp),%r13d + movl 28(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 8(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 36(%rsp),%r12d + movl %edx,%r13d + addl %r14d,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%r15d + movl %r12d,36(%rsp) + + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d + + rorl $11,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + xorl %ebx,%r10d + xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d + + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d + + addl %r12d,%ecx + addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d + + movl 44(%rsp),%r13d + movl 32(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 12(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 40(%rsp),%r12d + movl %ecx,%r13d + addl %r14d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + movl %r12d,40(%rsp) + + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d + + rorl $11,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + xorl %eax,%r9d + xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d + + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d + + addl %r12d,%ebx + addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d + + movl 48(%rsp),%r13d + movl 36(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 16(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 44(%rsp),%r12d + movl %ebx,%r13d + addl %r14d,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%r15d + movl %r12d,44(%rsp) + + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d + + rorl $11,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + xorl %r11d,%r8d + xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d + + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d + + addl %r12d,%eax + addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d + + movl 52(%rsp),%r13d + movl 40(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 20(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 48(%rsp),%r12d + movl %eax,%r13d + addl %r14d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + movl %r12d,48(%rsp) + + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx + + rorl $11,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + xorl %r10d,%edx + xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d + + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx + + addl %r12d,%r11d + addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx + + movl 56(%rsp),%r13d + movl 44(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 24(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 52(%rsp),%r12d + movl %r11d,%r13d + addl %r14d,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%r15d + movl %r12d,52(%rsp) + + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx + + rorl $11,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + xorl %r9d,%ecx + xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d + + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx + + addl %r12d,%r10d + addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx + + movl 60(%rsp),%r13d + movl 48(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 28(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 56(%rsp),%r12d + movl %r10d,%r13d + addl %r14d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + movl %r12d,56(%rsp) + + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx + + rorl $11,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + xorl %r8d,%ebx + xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d + + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx + + addl %r12d,%r9d + addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx + + movl 0(%rsp),%r13d + movl 52(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 32(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 60(%rsp),%r12d + movl %r9d,%r13d + addl %r14d,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%r15d + movl %r12d,60(%rsp) + + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax + + rorl $11,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + xorl %edx,%eax + xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d + + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax + + addl %r12d,%r8d + addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax + + cmpq $64,%rdi + jb L$rounds_16_xx + + movq 64+0(%rsp),%rdi + leaq 64(%rsi),%rsi + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb L$loop + + movq 64+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$epilogue: + retq + +.p2align 6 + +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 diff --git a/ext/libressl/crypto/sha/sha256-masm-x86_64.S b/ext/libressl/crypto/sha/sha256-masm-x86_64.S new file mode 100644 index 0000000..33c705d --- /dev/null +++ b/ext/libressl/crypto/sha/sha256-masm-x86_64.S @@ -0,0 +1,1864 @@ +; 1 "crypto/sha/sha256-masm-x86_64.S.tmp" +; 1 "" 1 +; 1 "" 3 +; 340 "" 3 +; 1 "" 1 +; 1 "" 2 +; 1 "crypto/sha/sha256-masm-x86_64.S.tmp" 2 +OPTION DOTNAME + +; 1 "./crypto/x86_arch.h" 1 + + +; 16 "./crypto/x86_arch.h" + + + + + + + + + +; 40 "./crypto/x86_arch.h" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +; 3 "crypto/sha/sha256-masm-x86_64.S.tmp" 2 +.text$ SEGMENT ALIGN(64) 'CODE' + +PUBLIC sha256_block_data_order + +ALIGN 16 +sha256_block_data_order PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_block_data_order:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + mov r11,rsp + shl rdx,4 + sub rsp,16*4+4*8 + lea rdx,QWORD PTR[rdx*4+rsi] + and rsp,-64 + mov QWORD PTR[((64+0))+rsp],rdi + mov QWORD PTR[((64+8))+rsp],rsi + mov QWORD PTR[((64+16))+rsp],rdx + mov QWORD PTR[((64+24))+rsp],r11 +$L$prologue:: + + lea rbp,QWORD PTR[K256] + + mov eax,DWORD PTR[rdi] + mov ebx,DWORD PTR[4+rdi] + mov ecx,DWORD PTR[8+rdi] + mov edx,DWORD PTR[12+rdi] + mov r8d,DWORD PTR[16+rdi] + mov r9d,DWORD PTR[20+rdi] + mov r10d,DWORD PTR[24+rdi] + mov r11d,DWORD PTR[28+rdi] + jmp $L$loop + +ALIGN 16 +$L$loop:: + xor rdi,rdi + mov r12d,DWORD PTR[rsi] + mov r13d,r8d + mov r14d,eax + bswap r12d + ror r13d,14 + mov r15d,r9d + mov DWORD PTR[rsp],r12d + + ror r14d,9 + xor r13d,r8d + xor r15d,r10d + + ror r13d,5 + add r12d,r11d + xor r14d,eax + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,r8d + mov r11d,ebx + + ror r14d,11 + xor r13d,r8d + xor r15d,r10d + + xor r11d,ecx + xor r14d,eax + add r12d,r15d + mov r15d,ebx + + ror r13d,6 + and r11d,eax + and r15d,ecx + + ror r14d,2 + add r12d,r13d + add r11d,r15d + + add edx,r12d + add r11d,r12d + lea rdi,QWORD PTR[1+rdi] + add r11d,r14d + + mov r12d,DWORD PTR[4+rsi] + mov r13d,edx + mov r14d,r11d + bswap r12d + ror r13d,14 + mov r15d,r8d + mov DWORD PTR[4+rsp],r12d + + ror r14d,9 + xor r13d,edx + xor r15d,r9d + + ror r13d,5 + add r12d,r10d + xor r14d,r11d + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,edx + mov r10d,eax + + ror r14d,11 + xor r13d,edx + xor r15d,r9d + + xor r10d,ebx + xor r14d,r11d + add r12d,r15d + mov r15d,eax + + ror r13d,6 + and r10d,r11d + and r15d,ebx + + ror r14d,2 + add r12d,r13d + add r10d,r15d + + add ecx,r12d + add r10d,r12d + lea rdi,QWORD PTR[1+rdi] + add r10d,r14d + + mov r12d,DWORD PTR[8+rsi] + mov r13d,ecx + mov r14d,r10d + bswap r12d + ror r13d,14 + mov r15d,edx + mov DWORD PTR[8+rsp],r12d + + ror r14d,9 + xor r13d,ecx + xor r15d,r8d + + ror r13d,5 + add r12d,r9d + xor r14d,r10d + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,ecx + mov r9d,r11d + + ror r14d,11 + xor r13d,ecx + xor r15d,r8d + + xor r9d,eax + xor r14d,r10d + add r12d,r15d + mov r15d,r11d + + ror r13d,6 + and r9d,r10d + and r15d,eax + + ror r14d,2 + add r12d,r13d + add r9d,r15d + + add ebx,r12d + add r9d,r12d + lea rdi,QWORD PTR[1+rdi] + add r9d,r14d + + mov r12d,DWORD PTR[12+rsi] + mov r13d,ebx + mov r14d,r9d + bswap r12d + ror r13d,14 + mov r15d,ecx + mov DWORD PTR[12+rsp],r12d + + ror r14d,9 + xor r13d,ebx + xor r15d,edx + + ror r13d,5 + add r12d,r8d + xor r14d,r9d + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,ebx + mov r8d,r10d + + ror r14d,11 + xor r13d,ebx + xor r15d,edx + + xor r8d,r11d + xor r14d,r9d + add r12d,r15d + mov r15d,r10d + + ror r13d,6 + and r8d,r9d + and r15d,r11d + + ror r14d,2 + add r12d,r13d + add r8d,r15d + + add eax,r12d + add r8d,r12d + lea rdi,QWORD PTR[1+rdi] + add r8d,r14d + + mov r12d,DWORD PTR[16+rsi] + mov r13d,eax + mov r14d,r8d + bswap r12d + ror r13d,14 + mov r15d,ebx + mov DWORD PTR[16+rsp],r12d + + ror r14d,9 + xor r13d,eax + xor r15d,ecx + + ror r13d,5 + add r12d,edx + xor r14d,r8d + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,eax + mov edx,r9d + + ror r14d,11 + xor r13d,eax + xor r15d,ecx + + xor edx,r10d + xor r14d,r8d + add r12d,r15d + mov r15d,r9d + + ror r13d,6 + and edx,r8d + and r15d,r10d + + ror r14d,2 + add r12d,r13d + add edx,r15d + + add r11d,r12d + add edx,r12d + lea rdi,QWORD PTR[1+rdi] + add edx,r14d + + mov r12d,DWORD PTR[20+rsi] + mov r13d,r11d + mov r14d,edx + bswap r12d + ror r13d,14 + mov r15d,eax + mov DWORD PTR[20+rsp],r12d + + ror r14d,9 + xor r13d,r11d + xor r15d,ebx + + ror r13d,5 + add r12d,ecx + xor r14d,edx + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,r11d + mov ecx,r8d + + ror r14d,11 + xor r13d,r11d + xor r15d,ebx + + xor ecx,r9d + xor r14d,edx + add r12d,r15d + mov r15d,r8d + + ror r13d,6 + and ecx,edx + and r15d,r9d + + ror r14d,2 + add r12d,r13d + add ecx,r15d + + add r10d,r12d + add ecx,r12d + lea rdi,QWORD PTR[1+rdi] + add ecx,r14d + + mov r12d,DWORD PTR[24+rsi] + mov r13d,r10d + mov r14d,ecx + bswap r12d + ror r13d,14 + mov r15d,r11d + mov DWORD PTR[24+rsp],r12d + + ror r14d,9 + xor r13d,r10d + xor r15d,eax + + ror r13d,5 + add r12d,ebx + xor r14d,ecx + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,r10d + mov ebx,edx + + ror r14d,11 + xor r13d,r10d + xor r15d,eax + + xor ebx,r8d + xor r14d,ecx + add r12d,r15d + mov r15d,edx + + ror r13d,6 + and ebx,ecx + and r15d,r8d + + ror r14d,2 + add r12d,r13d + add ebx,r15d + + add r9d,r12d + add ebx,r12d + lea rdi,QWORD PTR[1+rdi] + add ebx,r14d + + mov r12d,DWORD PTR[28+rsi] + mov r13d,r9d + mov r14d,ebx + bswap r12d + ror r13d,14 + mov r15d,r10d + mov DWORD PTR[28+rsp],r12d + + ror r14d,9 + xor r13d,r9d + xor r15d,r11d + + ror r13d,5 + add r12d,eax + xor r14d,ebx + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,r9d + mov eax,ecx + + ror r14d,11 + xor r13d,r9d + xor r15d,r11d + + xor eax,edx + xor r14d,ebx + add r12d,r15d + mov r15d,ecx + + ror r13d,6 + and eax,ebx + and r15d,edx + + ror r14d,2 + add r12d,r13d + add eax,r15d + + add r8d,r12d + add eax,r12d + lea rdi,QWORD PTR[1+rdi] + add eax,r14d + + mov r12d,DWORD PTR[32+rsi] + mov r13d,r8d + mov r14d,eax + bswap r12d + ror r13d,14 + mov r15d,r9d + mov DWORD PTR[32+rsp],r12d + + ror r14d,9 + xor r13d,r8d + xor r15d,r10d + + ror r13d,5 + add r12d,r11d + xor r14d,eax + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,r8d + mov r11d,ebx + + ror r14d,11 + xor r13d,r8d + xor r15d,r10d + + xor r11d,ecx + xor r14d,eax + add r12d,r15d + mov r15d,ebx + + ror r13d,6 + and r11d,eax + and r15d,ecx + + ror r14d,2 + add r12d,r13d + add r11d,r15d + + add edx,r12d + add r11d,r12d + lea rdi,QWORD PTR[1+rdi] + add r11d,r14d + + mov r12d,DWORD PTR[36+rsi] + mov r13d,edx + mov r14d,r11d + bswap r12d + ror r13d,14 + mov r15d,r8d + mov DWORD PTR[36+rsp],r12d + + ror r14d,9 + xor r13d,edx + xor r15d,r9d + + ror r13d,5 + add r12d,r10d + xor r14d,r11d + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,edx + mov r10d,eax + + ror r14d,11 + xor r13d,edx + xor r15d,r9d + + xor r10d,ebx + xor r14d,r11d + add r12d,r15d + mov r15d,eax + + ror r13d,6 + and r10d,r11d + and r15d,ebx + + ror r14d,2 + add r12d,r13d + add r10d,r15d + + add ecx,r12d + add r10d,r12d + lea rdi,QWORD PTR[1+rdi] + add r10d,r14d + + mov r12d,DWORD PTR[40+rsi] + mov r13d,ecx + mov r14d,r10d + bswap r12d + ror r13d,14 + mov r15d,edx + mov DWORD PTR[40+rsp],r12d + + ror r14d,9 + xor r13d,ecx + xor r15d,r8d + + ror r13d,5 + add r12d,r9d + xor r14d,r10d + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,ecx + mov r9d,r11d + + ror r14d,11 + xor r13d,ecx + xor r15d,r8d + + xor r9d,eax + xor r14d,r10d + add r12d,r15d + mov r15d,r11d + + ror r13d,6 + and r9d,r10d + and r15d,eax + + ror r14d,2 + add r12d,r13d + add r9d,r15d + + add ebx,r12d + add r9d,r12d + lea rdi,QWORD PTR[1+rdi] + add r9d,r14d + + mov r12d,DWORD PTR[44+rsi] + mov r13d,ebx + mov r14d,r9d + bswap r12d + ror r13d,14 + mov r15d,ecx + mov DWORD PTR[44+rsp],r12d + + ror r14d,9 + xor r13d,ebx + xor r15d,edx + + ror r13d,5 + add r12d,r8d + xor r14d,r9d + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,ebx + mov r8d,r10d + + ror r14d,11 + xor r13d,ebx + xor r15d,edx + + xor r8d,r11d + xor r14d,r9d + add r12d,r15d + mov r15d,r10d + + ror r13d,6 + and r8d,r9d + and r15d,r11d + + ror r14d,2 + add r12d,r13d + add r8d,r15d + + add eax,r12d + add r8d,r12d + lea rdi,QWORD PTR[1+rdi] + add r8d,r14d + + mov r12d,DWORD PTR[48+rsi] + mov r13d,eax + mov r14d,r8d + bswap r12d + ror r13d,14 + mov r15d,ebx + mov DWORD PTR[48+rsp],r12d + + ror r14d,9 + xor r13d,eax + xor r15d,ecx + + ror r13d,5 + add r12d,edx + xor r14d,r8d + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,eax + mov edx,r9d + + ror r14d,11 + xor r13d,eax + xor r15d,ecx + + xor edx,r10d + xor r14d,r8d + add r12d,r15d + mov r15d,r9d + + ror r13d,6 + and edx,r8d + and r15d,r10d + + ror r14d,2 + add r12d,r13d + add edx,r15d + + add r11d,r12d + add edx,r12d + lea rdi,QWORD PTR[1+rdi] + add edx,r14d + + mov r12d,DWORD PTR[52+rsi] + mov r13d,r11d + mov r14d,edx + bswap r12d + ror r13d,14 + mov r15d,eax + mov DWORD PTR[52+rsp],r12d + + ror r14d,9 + xor r13d,r11d + xor r15d,ebx + + ror r13d,5 + add r12d,ecx + xor r14d,edx + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,r11d + mov ecx,r8d + + ror r14d,11 + xor r13d,r11d + xor r15d,ebx + + xor ecx,r9d + xor r14d,edx + add r12d,r15d + mov r15d,r8d + + ror r13d,6 + and ecx,edx + and r15d,r9d + + ror r14d,2 + add r12d,r13d + add ecx,r15d + + add r10d,r12d + add ecx,r12d + lea rdi,QWORD PTR[1+rdi] + add ecx,r14d + + mov r12d,DWORD PTR[56+rsi] + mov r13d,r10d + mov r14d,ecx + bswap r12d + ror r13d,14 + mov r15d,r11d + mov DWORD PTR[56+rsp],r12d + + ror r14d,9 + xor r13d,r10d + xor r15d,eax + + ror r13d,5 + add r12d,ebx + xor r14d,ecx + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,r10d + mov ebx,edx + + ror r14d,11 + xor r13d,r10d + xor r15d,eax + + xor ebx,r8d + xor r14d,ecx + add r12d,r15d + mov r15d,edx + + ror r13d,6 + and ebx,ecx + and r15d,r8d + + ror r14d,2 + add r12d,r13d + add ebx,r15d + + add r9d,r12d + add ebx,r12d + lea rdi,QWORD PTR[1+rdi] + add ebx,r14d + + mov r12d,DWORD PTR[60+rsi] + mov r13d,r9d + mov r14d,ebx + bswap r12d + ror r13d,14 + mov r15d,r10d + mov DWORD PTR[60+rsp],r12d + + ror r14d,9 + xor r13d,r9d + xor r15d,r11d + + ror r13d,5 + add r12d,eax + xor r14d,ebx + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,r9d + mov eax,ecx + + ror r14d,11 + xor r13d,r9d + xor r15d,r11d + + xor eax,edx + xor r14d,ebx + add r12d,r15d + mov r15d,ecx + + ror r13d,6 + and eax,ebx + and r15d,edx + + ror r14d,2 + add r12d,r13d + add eax,r15d + + add r8d,r12d + add eax,r12d + lea rdi,QWORD PTR[1+rdi] + add eax,r14d + + jmp $L$rounds_16_xx +ALIGN 16 +$L$rounds_16_xx:: + mov r13d,DWORD PTR[4+rsp] + mov r14d,DWORD PTR[56+rsp] + mov r12d,r13d + mov r15d,r14d + + ror r12d,11 + xor r12d,r13d + shr r13d,3 + + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[36+rsp] + + ror r15d,2 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + add r12d,r13d + xor r14d,r15d + + add r12d,DWORD PTR[rsp] + mov r13d,r8d + add r12d,r14d + mov r14d,eax + ror r13d,14 + mov r15d,r9d + mov DWORD PTR[rsp],r12d + + ror r14d,9 + xor r13d,r8d + xor r15d,r10d + + ror r13d,5 + add r12d,r11d + xor r14d,eax + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,r8d + mov r11d,ebx + + ror r14d,11 + xor r13d,r8d + xor r15d,r10d + + xor r11d,ecx + xor r14d,eax + add r12d,r15d + mov r15d,ebx + + ror r13d,6 + and r11d,eax + and r15d,ecx + + ror r14d,2 + add r12d,r13d + add r11d,r15d + + add edx,r12d + add r11d,r12d + lea rdi,QWORD PTR[1+rdi] + add r11d,r14d + + mov r13d,DWORD PTR[8+rsp] + mov r14d,DWORD PTR[60+rsp] + mov r12d,r13d + mov r15d,r14d + + ror r12d,11 + xor r12d,r13d + shr r13d,3 + + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[40+rsp] + + ror r15d,2 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + add r12d,r13d + xor r14d,r15d + + add r12d,DWORD PTR[4+rsp] + mov r13d,edx + add r12d,r14d + mov r14d,r11d + ror r13d,14 + mov r15d,r8d + mov DWORD PTR[4+rsp],r12d + + ror r14d,9 + xor r13d,edx + xor r15d,r9d + + ror r13d,5 + add r12d,r10d + xor r14d,r11d + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,edx + mov r10d,eax + + ror r14d,11 + xor r13d,edx + xor r15d,r9d + + xor r10d,ebx + xor r14d,r11d + add r12d,r15d + mov r15d,eax + + ror r13d,6 + and r10d,r11d + and r15d,ebx + + ror r14d,2 + add r12d,r13d + add r10d,r15d + + add ecx,r12d + add r10d,r12d + lea rdi,QWORD PTR[1+rdi] + add r10d,r14d + + mov r13d,DWORD PTR[12+rsp] + mov r14d,DWORD PTR[rsp] + mov r12d,r13d + mov r15d,r14d + + ror r12d,11 + xor r12d,r13d + shr r13d,3 + + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[44+rsp] + + ror r15d,2 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + add r12d,r13d + xor r14d,r15d + + add r12d,DWORD PTR[8+rsp] + mov r13d,ecx + add r12d,r14d + mov r14d,r10d + ror r13d,14 + mov r15d,edx + mov DWORD PTR[8+rsp],r12d + + ror r14d,9 + xor r13d,ecx + xor r15d,r8d + + ror r13d,5 + add r12d,r9d + xor r14d,r10d + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,ecx + mov r9d,r11d + + ror r14d,11 + xor r13d,ecx + xor r15d,r8d + + xor r9d,eax + xor r14d,r10d + add r12d,r15d + mov r15d,r11d + + ror r13d,6 + and r9d,r10d + and r15d,eax + + ror r14d,2 + add r12d,r13d + add r9d,r15d + + add ebx,r12d + add r9d,r12d + lea rdi,QWORD PTR[1+rdi] + add r9d,r14d + + mov r13d,DWORD PTR[16+rsp] + mov r14d,DWORD PTR[4+rsp] + mov r12d,r13d + mov r15d,r14d + + ror r12d,11 + xor r12d,r13d + shr r13d,3 + + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[48+rsp] + + ror r15d,2 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + add r12d,r13d + xor r14d,r15d + + add r12d,DWORD PTR[12+rsp] + mov r13d,ebx + add r12d,r14d + mov r14d,r9d + ror r13d,14 + mov r15d,ecx + mov DWORD PTR[12+rsp],r12d + + ror r14d,9 + xor r13d,ebx + xor r15d,edx + + ror r13d,5 + add r12d,r8d + xor r14d,r9d + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,ebx + mov r8d,r10d + + ror r14d,11 + xor r13d,ebx + xor r15d,edx + + xor r8d,r11d + xor r14d,r9d + add r12d,r15d + mov r15d,r10d + + ror r13d,6 + and r8d,r9d + and r15d,r11d + + ror r14d,2 + add r12d,r13d + add r8d,r15d + + add eax,r12d + add r8d,r12d + lea rdi,QWORD PTR[1+rdi] + add r8d,r14d + + mov r13d,DWORD PTR[20+rsp] + mov r14d,DWORD PTR[8+rsp] + mov r12d,r13d + mov r15d,r14d + + ror r12d,11 + xor r12d,r13d + shr r13d,3 + + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[52+rsp] + + ror r15d,2 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + add r12d,r13d + xor r14d,r15d + + add r12d,DWORD PTR[16+rsp] + mov r13d,eax + add r12d,r14d + mov r14d,r8d + ror r13d,14 + mov r15d,ebx + mov DWORD PTR[16+rsp],r12d + + ror r14d,9 + xor r13d,eax + xor r15d,ecx + + ror r13d,5 + add r12d,edx + xor r14d,r8d + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,eax + mov edx,r9d + + ror r14d,11 + xor r13d,eax + xor r15d,ecx + + xor edx,r10d + xor r14d,r8d + add r12d,r15d + mov r15d,r9d + + ror r13d,6 + and edx,r8d + and r15d,r10d + + ror r14d,2 + add r12d,r13d + add edx,r15d + + add r11d,r12d + add edx,r12d + lea rdi,QWORD PTR[1+rdi] + add edx,r14d + + mov r13d,DWORD PTR[24+rsp] + mov r14d,DWORD PTR[12+rsp] + mov r12d,r13d + mov r15d,r14d + + ror r12d,11 + xor r12d,r13d + shr r13d,3 + + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[56+rsp] + + ror r15d,2 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + add r12d,r13d + xor r14d,r15d + + add r12d,DWORD PTR[20+rsp] + mov r13d,r11d + add r12d,r14d + mov r14d,edx + ror r13d,14 + mov r15d,eax + mov DWORD PTR[20+rsp],r12d + + ror r14d,9 + xor r13d,r11d + xor r15d,ebx + + ror r13d,5 + add r12d,ecx + xor r14d,edx + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,r11d + mov ecx,r8d + + ror r14d,11 + xor r13d,r11d + xor r15d,ebx + + xor ecx,r9d + xor r14d,edx + add r12d,r15d + mov r15d,r8d + + ror r13d,6 + and ecx,edx + and r15d,r9d + + ror r14d,2 + add r12d,r13d + add ecx,r15d + + add r10d,r12d + add ecx,r12d + lea rdi,QWORD PTR[1+rdi] + add ecx,r14d + + mov r13d,DWORD PTR[28+rsp] + mov r14d,DWORD PTR[16+rsp] + mov r12d,r13d + mov r15d,r14d + + ror r12d,11 + xor r12d,r13d + shr r13d,3 + + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[60+rsp] + + ror r15d,2 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + add r12d,r13d + xor r14d,r15d + + add r12d,DWORD PTR[24+rsp] + mov r13d,r10d + add r12d,r14d + mov r14d,ecx + ror r13d,14 + mov r15d,r11d + mov DWORD PTR[24+rsp],r12d + + ror r14d,9 + xor r13d,r10d + xor r15d,eax + + ror r13d,5 + add r12d,ebx + xor r14d,ecx + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,r10d + mov ebx,edx + + ror r14d,11 + xor r13d,r10d + xor r15d,eax + + xor ebx,r8d + xor r14d,ecx + add r12d,r15d + mov r15d,edx + + ror r13d,6 + and ebx,ecx + and r15d,r8d + + ror r14d,2 + add r12d,r13d + add ebx,r15d + + add r9d,r12d + add ebx,r12d + lea rdi,QWORD PTR[1+rdi] + add ebx,r14d + + mov r13d,DWORD PTR[32+rsp] + mov r14d,DWORD PTR[20+rsp] + mov r12d,r13d + mov r15d,r14d + + ror r12d,11 + xor r12d,r13d + shr r13d,3 + + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[rsp] + + ror r15d,2 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + add r12d,r13d + xor r14d,r15d + + add r12d,DWORD PTR[28+rsp] + mov r13d,r9d + add r12d,r14d + mov r14d,ebx + ror r13d,14 + mov r15d,r10d + mov DWORD PTR[28+rsp],r12d + + ror r14d,9 + xor r13d,r9d + xor r15d,r11d + + ror r13d,5 + add r12d,eax + xor r14d,ebx + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,r9d + mov eax,ecx + + ror r14d,11 + xor r13d,r9d + xor r15d,r11d + + xor eax,edx + xor r14d,ebx + add r12d,r15d + mov r15d,ecx + + ror r13d,6 + and eax,ebx + and r15d,edx + + ror r14d,2 + add r12d,r13d + add eax,r15d + + add r8d,r12d + add eax,r12d + lea rdi,QWORD PTR[1+rdi] + add eax,r14d + + mov r13d,DWORD PTR[36+rsp] + mov r14d,DWORD PTR[24+rsp] + mov r12d,r13d + mov r15d,r14d + + ror r12d,11 + xor r12d,r13d + shr r13d,3 + + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[4+rsp] + + ror r15d,2 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + add r12d,r13d + xor r14d,r15d + + add r12d,DWORD PTR[32+rsp] + mov r13d,r8d + add r12d,r14d + mov r14d,eax + ror r13d,14 + mov r15d,r9d + mov DWORD PTR[32+rsp],r12d + + ror r14d,9 + xor r13d,r8d + xor r15d,r10d + + ror r13d,5 + add r12d,r11d + xor r14d,eax + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,r8d + mov r11d,ebx + + ror r14d,11 + xor r13d,r8d + xor r15d,r10d + + xor r11d,ecx + xor r14d,eax + add r12d,r15d + mov r15d,ebx + + ror r13d,6 + and r11d,eax + and r15d,ecx + + ror r14d,2 + add r12d,r13d + add r11d,r15d + + add edx,r12d + add r11d,r12d + lea rdi,QWORD PTR[1+rdi] + add r11d,r14d + + mov r13d,DWORD PTR[40+rsp] + mov r14d,DWORD PTR[28+rsp] + mov r12d,r13d + mov r15d,r14d + + ror r12d,11 + xor r12d,r13d + shr r13d,3 + + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[8+rsp] + + ror r15d,2 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + add r12d,r13d + xor r14d,r15d + + add r12d,DWORD PTR[36+rsp] + mov r13d,edx + add r12d,r14d + mov r14d,r11d + ror r13d,14 + mov r15d,r8d + mov DWORD PTR[36+rsp],r12d + + ror r14d,9 + xor r13d,edx + xor r15d,r9d + + ror r13d,5 + add r12d,r10d + xor r14d,r11d + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,edx + mov r10d,eax + + ror r14d,11 + xor r13d,edx + xor r15d,r9d + + xor r10d,ebx + xor r14d,r11d + add r12d,r15d + mov r15d,eax + + ror r13d,6 + and r10d,r11d + and r15d,ebx + + ror r14d,2 + add r12d,r13d + add r10d,r15d + + add ecx,r12d + add r10d,r12d + lea rdi,QWORD PTR[1+rdi] + add r10d,r14d + + mov r13d,DWORD PTR[44+rsp] + mov r14d,DWORD PTR[32+rsp] + mov r12d,r13d + mov r15d,r14d + + ror r12d,11 + xor r12d,r13d + shr r13d,3 + + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[12+rsp] + + ror r15d,2 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + add r12d,r13d + xor r14d,r15d + + add r12d,DWORD PTR[40+rsp] + mov r13d,ecx + add r12d,r14d + mov r14d,r10d + ror r13d,14 + mov r15d,edx + mov DWORD PTR[40+rsp],r12d + + ror r14d,9 + xor r13d,ecx + xor r15d,r8d + + ror r13d,5 + add r12d,r9d + xor r14d,r10d + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,ecx + mov r9d,r11d + + ror r14d,11 + xor r13d,ecx + xor r15d,r8d + + xor r9d,eax + xor r14d,r10d + add r12d,r15d + mov r15d,r11d + + ror r13d,6 + and r9d,r10d + and r15d,eax + + ror r14d,2 + add r12d,r13d + add r9d,r15d + + add ebx,r12d + add r9d,r12d + lea rdi,QWORD PTR[1+rdi] + add r9d,r14d + + mov r13d,DWORD PTR[48+rsp] + mov r14d,DWORD PTR[36+rsp] + mov r12d,r13d + mov r15d,r14d + + ror r12d,11 + xor r12d,r13d + shr r13d,3 + + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[16+rsp] + + ror r15d,2 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + add r12d,r13d + xor r14d,r15d + + add r12d,DWORD PTR[44+rsp] + mov r13d,ebx + add r12d,r14d + mov r14d,r9d + ror r13d,14 + mov r15d,ecx + mov DWORD PTR[44+rsp],r12d + + ror r14d,9 + xor r13d,ebx + xor r15d,edx + + ror r13d,5 + add r12d,r8d + xor r14d,r9d + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,ebx + mov r8d,r10d + + ror r14d,11 + xor r13d,ebx + xor r15d,edx + + xor r8d,r11d + xor r14d,r9d + add r12d,r15d + mov r15d,r10d + + ror r13d,6 + and r8d,r9d + and r15d,r11d + + ror r14d,2 + add r12d,r13d + add r8d,r15d + + add eax,r12d + add r8d,r12d + lea rdi,QWORD PTR[1+rdi] + add r8d,r14d + + mov r13d,DWORD PTR[52+rsp] + mov r14d,DWORD PTR[40+rsp] + mov r12d,r13d + mov r15d,r14d + + ror r12d,11 + xor r12d,r13d + shr r13d,3 + + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[20+rsp] + + ror r15d,2 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + add r12d,r13d + xor r14d,r15d + + add r12d,DWORD PTR[48+rsp] + mov r13d,eax + add r12d,r14d + mov r14d,r8d + ror r13d,14 + mov r15d,ebx + mov DWORD PTR[48+rsp],r12d + + ror r14d,9 + xor r13d,eax + xor r15d,ecx + + ror r13d,5 + add r12d,edx + xor r14d,r8d + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,eax + mov edx,r9d + + ror r14d,11 + xor r13d,eax + xor r15d,ecx + + xor edx,r10d + xor r14d,r8d + add r12d,r15d + mov r15d,r9d + + ror r13d,6 + and edx,r8d + and r15d,r10d + + ror r14d,2 + add r12d,r13d + add edx,r15d + + add r11d,r12d + add edx,r12d + lea rdi,QWORD PTR[1+rdi] + add edx,r14d + + mov r13d,DWORD PTR[56+rsp] + mov r14d,DWORD PTR[44+rsp] + mov r12d,r13d + mov r15d,r14d + + ror r12d,11 + xor r12d,r13d + shr r13d,3 + + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[24+rsp] + + ror r15d,2 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + add r12d,r13d + xor r14d,r15d + + add r12d,DWORD PTR[52+rsp] + mov r13d,r11d + add r12d,r14d + mov r14d,edx + ror r13d,14 + mov r15d,eax + mov DWORD PTR[52+rsp],r12d + + ror r14d,9 + xor r13d,r11d + xor r15d,ebx + + ror r13d,5 + add r12d,ecx + xor r14d,edx + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,r11d + mov ecx,r8d + + ror r14d,11 + xor r13d,r11d + xor r15d,ebx + + xor ecx,r9d + xor r14d,edx + add r12d,r15d + mov r15d,r8d + + ror r13d,6 + and ecx,edx + and r15d,r9d + + ror r14d,2 + add r12d,r13d + add ecx,r15d + + add r10d,r12d + add ecx,r12d + lea rdi,QWORD PTR[1+rdi] + add ecx,r14d + + mov r13d,DWORD PTR[60+rsp] + mov r14d,DWORD PTR[48+rsp] + mov r12d,r13d + mov r15d,r14d + + ror r12d,11 + xor r12d,r13d + shr r13d,3 + + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[28+rsp] + + ror r15d,2 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + add r12d,r13d + xor r14d,r15d + + add r12d,DWORD PTR[56+rsp] + mov r13d,r10d + add r12d,r14d + mov r14d,ecx + ror r13d,14 + mov r15d,r11d + mov DWORD PTR[56+rsp],r12d + + ror r14d,9 + xor r13d,r10d + xor r15d,eax + + ror r13d,5 + add r12d,ebx + xor r14d,ecx + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,r10d + mov ebx,edx + + ror r14d,11 + xor r13d,r10d + xor r15d,eax + + xor ebx,r8d + xor r14d,ecx + add r12d,r15d + mov r15d,edx + + ror r13d,6 + and ebx,ecx + and r15d,r8d + + ror r14d,2 + add r12d,r13d + add ebx,r15d + + add r9d,r12d + add ebx,r12d + lea rdi,QWORD PTR[1+rdi] + add ebx,r14d + + mov r13d,DWORD PTR[rsp] + mov r14d,DWORD PTR[52+rsp] + mov r12d,r13d + mov r15d,r14d + + ror r12d,11 + xor r12d,r13d + shr r13d,3 + + ror r12d,7 + xor r13d,r12d + mov r12d,DWORD PTR[32+rsp] + + ror r15d,2 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + add r12d,r13d + xor r14d,r15d + + add r12d,DWORD PTR[60+rsp] + mov r13d,r9d + add r12d,r14d + mov r14d,ebx + ror r13d,14 + mov r15d,r10d + mov DWORD PTR[60+rsp],r12d + + ror r14d,9 + xor r13d,r9d + xor r15d,r11d + + ror r13d,5 + add r12d,eax + xor r14d,ebx + + add r12d,DWORD PTR[rdi*4+rbp] + and r15d,r9d + mov eax,ecx + + ror r14d,11 + xor r13d,r9d + xor r15d,r11d + + xor eax,edx + xor r14d,ebx + add r12d,r15d + mov r15d,ecx + + ror r13d,6 + and eax,ebx + and r15d,edx + + ror r14d,2 + add r12d,r13d + add eax,r15d + + add r8d,r12d + add eax,r12d + lea rdi,QWORD PTR[1+rdi] + add eax,r14d + + cmp rdi,64 + jb $L$rounds_16_xx + + mov rdi,QWORD PTR[((64+0))+rsp] + lea rsi,QWORD PTR[64+rsi] + + add eax,DWORD PTR[rdi] + add ebx,DWORD PTR[4+rdi] + add ecx,DWORD PTR[8+rdi] + add edx,DWORD PTR[12+rdi] + add r8d,DWORD PTR[16+rdi] + add r9d,DWORD PTR[20+rdi] + add r10d,DWORD PTR[24+rdi] + add r11d,DWORD PTR[28+rdi] + + cmp rsi,QWORD PTR[((64+16))+rsp] + + mov DWORD PTR[rdi],eax + mov DWORD PTR[4+rdi],ebx + mov DWORD PTR[8+rdi],ecx + mov DWORD PTR[12+rdi],edx + mov DWORD PTR[16+rdi],r8d + mov DWORD PTR[20+rdi],r9d + mov DWORD PTR[24+rdi],r10d + mov DWORD PTR[28+rdi],r11d + jb $L$loop + + mov rsi,QWORD PTR[((64+24))+rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha256_block_data_order:: +sha256_block_data_order ENDP +ALIGN 64 + +K256:: + DD 0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h + DD 03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h + DD 0d807aa98h,012835b01h,0243185beh,0550c7dc3h + DD 072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h + DD 0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch + DD 02de92c6fh,04a7484aah,05cb0a9dch,076f988dah + DD 0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h + DD 0c6e00bf3h,0d5a79147h,006ca6351h,014292967h + DD 027b70a85h,02e1b2138h,04d2c6dfch,053380d13h + DD 0650a7354h,0766a0abbh,081c2c92eh,092722c85h + DD 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h + DD 0d192e819h,0d6990624h,0f40e3585h,0106aa070h + DD 019a4c116h,01e376c08h,02748774ch,034b0bcb5h + DD 0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h + DD 0748f82eeh,078a5636fh,084c87814h,08cc70208h + DD 090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h + +.text$ ENDS +END + diff --git a/ext/libressl/crypto/sha/sha256-mingw64-x86_64.S b/ext/libressl/crypto/sha/sha256-mingw64-x86_64.S new file mode 100644 index 0000000..3de981b --- /dev/null +++ b/ext/libressl/crypto/sha/sha256-mingw64-x86_64.S @@ -0,0 +1,1790 @@ +#include "x86_arch.h" +.text + +.globl sha256_block_data_order +.def sha256_block_data_order; .scl 2; .type 32; .endef +.p2align 4 +sha256_block_data_order: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_sha256_block_data_order: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $64+32,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) +.Lprologue: + + leaq K256(%rip),%rbp + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + jmp .Lloop + +.p2align 4 +.Lloop: + xorq %rdi,%rdi + movl 0(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + movl %r12d,0(%rsp) + + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d + + rorl $11,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + xorl %ecx,%r11d + xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d + + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d + + addl %r12d,%edx + addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d + + movl 4(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%r15d + movl %r12d,4(%rsp) + + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d + + rorl $11,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + xorl %ebx,%r10d + xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d + + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d + + addl %r12d,%ecx + addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d + + movl 8(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + movl %r12d,8(%rsp) + + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d + + rorl $11,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + xorl %eax,%r9d + xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d + + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d + + addl %r12d,%ebx + addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d + + movl 12(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%r15d + movl %r12d,12(%rsp) + + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d + + rorl $11,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + xorl %r11d,%r8d + xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d + + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d + + addl %r12d,%eax + addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d + + movl 16(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + movl %r12d,16(%rsp) + + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx + + rorl $11,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + xorl %r10d,%edx + xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d + + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx + + addl %r12d,%r11d + addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx + + movl 20(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%r15d + movl %r12d,20(%rsp) + + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx + + rorl $11,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + xorl %r9d,%ecx + xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d + + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx + + addl %r12d,%r10d + addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx + + movl 24(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + movl %r12d,24(%rsp) + + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx + + rorl $11,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + xorl %r8d,%ebx + xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d + + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx + + addl %r12d,%r9d + addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx + + movl 28(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%r15d + movl %r12d,28(%rsp) + + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax + + rorl $11,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + xorl %edx,%eax + xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d + + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax + + addl %r12d,%r8d + addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax + + movl 32(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + movl %r12d,32(%rsp) + + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d + + rorl $11,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + xorl %ecx,%r11d + xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d + + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d + + addl %r12d,%edx + addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d + + movl 36(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%r15d + movl %r12d,36(%rsp) + + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d + + rorl $11,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + xorl %ebx,%r10d + xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d + + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d + + addl %r12d,%ecx + addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d + + movl 40(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + movl %r12d,40(%rsp) + + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d + + rorl $11,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + xorl %eax,%r9d + xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d + + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d + + addl %r12d,%ebx + addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d + + movl 44(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%r15d + movl %r12d,44(%rsp) + + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d + + rorl $11,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + xorl %r11d,%r8d + xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d + + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d + + addl %r12d,%eax + addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d + + movl 48(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + movl %r12d,48(%rsp) + + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx + + rorl $11,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + xorl %r10d,%edx + xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d + + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx + + addl %r12d,%r11d + addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx + + movl 52(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%r15d + movl %r12d,52(%rsp) + + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx + + rorl $11,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + xorl %r9d,%ecx + xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d + + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx + + addl %r12d,%r10d + addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx + + movl 56(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + movl %r12d,56(%rsp) + + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx + + rorl $11,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + xorl %r8d,%ebx + xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d + + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx + + addl %r12d,%r9d + addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx + + movl 60(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%r15d + movl %r12d,60(%rsp) + + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax + + rorl $11,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + xorl %edx,%eax + xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d + + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax + + addl %r12d,%r8d + addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax + + jmp .Lrounds_16_xx +.p2align 4 +.Lrounds_16_xx: + movl 4(%rsp),%r13d + movl 56(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 36(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 0(%rsp),%r12d + movl %r8d,%r13d + addl %r14d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + movl %r12d,0(%rsp) + + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d + + rorl $11,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + xorl %ecx,%r11d + xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d + + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d + + addl %r12d,%edx + addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d + + movl 8(%rsp),%r13d + movl 60(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 40(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 4(%rsp),%r12d + movl %edx,%r13d + addl %r14d,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%r15d + movl %r12d,4(%rsp) + + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d + + rorl $11,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + xorl %ebx,%r10d + xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d + + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d + + addl %r12d,%ecx + addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d + + movl 12(%rsp),%r13d + movl 0(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 44(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 8(%rsp),%r12d + movl %ecx,%r13d + addl %r14d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + movl %r12d,8(%rsp) + + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d + + rorl $11,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + xorl %eax,%r9d + xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d + + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d + + addl %r12d,%ebx + addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d + + movl 16(%rsp),%r13d + movl 4(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 48(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 12(%rsp),%r12d + movl %ebx,%r13d + addl %r14d,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%r15d + movl %r12d,12(%rsp) + + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d + + rorl $11,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + xorl %r11d,%r8d + xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d + + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d + + addl %r12d,%eax + addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d + + movl 20(%rsp),%r13d + movl 8(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 52(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 16(%rsp),%r12d + movl %eax,%r13d + addl %r14d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + movl %r12d,16(%rsp) + + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx + + rorl $11,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + xorl %r10d,%edx + xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d + + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx + + addl %r12d,%r11d + addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx + + movl 24(%rsp),%r13d + movl 12(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 56(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 20(%rsp),%r12d + movl %r11d,%r13d + addl %r14d,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%r15d + movl %r12d,20(%rsp) + + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx + + rorl $11,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + xorl %r9d,%ecx + xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d + + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx + + addl %r12d,%r10d + addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx + + movl 28(%rsp),%r13d + movl 16(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 60(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 24(%rsp),%r12d + movl %r10d,%r13d + addl %r14d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + movl %r12d,24(%rsp) + + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx + + rorl $11,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + xorl %r8d,%ebx + xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d + + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx + + addl %r12d,%r9d + addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx + + movl 32(%rsp),%r13d + movl 20(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 0(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 28(%rsp),%r12d + movl %r9d,%r13d + addl %r14d,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%r15d + movl %r12d,28(%rsp) + + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax + + rorl $11,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + xorl %edx,%eax + xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d + + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax + + addl %r12d,%r8d + addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax + + movl 36(%rsp),%r13d + movl 24(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 4(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 32(%rsp),%r12d + movl %r8d,%r13d + addl %r14d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + movl %r12d,32(%rsp) + + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d + + rorl $11,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + xorl %ecx,%r11d + xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d + + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d + + addl %r12d,%edx + addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d + + movl 40(%rsp),%r13d + movl 28(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 8(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 36(%rsp),%r12d + movl %edx,%r13d + addl %r14d,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%r15d + movl %r12d,36(%rsp) + + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d + + rorl $11,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + xorl %ebx,%r10d + xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d + + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d + + addl %r12d,%ecx + addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d + + movl 44(%rsp),%r13d + movl 32(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 12(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 40(%rsp),%r12d + movl %ecx,%r13d + addl %r14d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + movl %r12d,40(%rsp) + + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d + + rorl $11,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + xorl %eax,%r9d + xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d + + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d + + addl %r12d,%ebx + addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d + + movl 48(%rsp),%r13d + movl 36(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 16(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 44(%rsp),%r12d + movl %ebx,%r13d + addl %r14d,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%r15d + movl %r12d,44(%rsp) + + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d + + rorl $11,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + xorl %r11d,%r8d + xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d + + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d + + addl %r12d,%eax + addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d + + movl 52(%rsp),%r13d + movl 40(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 20(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 48(%rsp),%r12d + movl %eax,%r13d + addl %r14d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + movl %r12d,48(%rsp) + + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx + + rorl $11,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + xorl %r10d,%edx + xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d + + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx + + addl %r12d,%r11d + addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx + + movl 56(%rsp),%r13d + movl 44(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 24(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 52(%rsp),%r12d + movl %r11d,%r13d + addl %r14d,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%r15d + movl %r12d,52(%rsp) + + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx + + rorl $11,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + xorl %r9d,%ecx + xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d + + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx + + addl %r12d,%r10d + addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx + + movl 60(%rsp),%r13d + movl 48(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 28(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 56(%rsp),%r12d + movl %r10d,%r13d + addl %r14d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + movl %r12d,56(%rsp) + + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx + + rorl $11,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + xorl %r8d,%ebx + xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d + + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx + + addl %r12d,%r9d + addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx + + movl 0(%rsp),%r13d + movl 52(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 32(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 60(%rsp),%r12d + movl %r9d,%r13d + addl %r14d,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%r15d + movl %r12d,60(%rsp) + + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax + + rorl $11,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + xorl %edx,%eax + xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d + + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax + + addl %r12d,%r8d + addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax + + cmpq $64,%rdi + jb .Lrounds_16_xx + + movq 64+0(%rsp),%rdi + leaq 64(%rsi),%rsi + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop + + movq 64+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + retq +.LSEH_end_sha256_block_data_order: +.p2align 6 + +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 diff --git a/ext/libressl/crypto/sha/sha256.c b/ext/libressl/crypto/sha/sha256.c new file mode 100644 index 0000000..9c05d3b --- /dev/null +++ b/ext/libressl/crypto/sha/sha256.c @@ -0,0 +1,284 @@ +/* $OpenBSD: sha256.c,v 1.10 2019/01/21 23:20:31 jsg Exp $ */ +/* ==================================================================== + * Copyright (c) 2004 The OpenSSL Project. All rights reserved + * according to the OpenSSL license [found in ../../LICENSE]. + * ==================================================================== + */ + +#include + +#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA256) + +#include + +#include +#include + +#include +#include +#include + +int SHA224_Init(SHA256_CTX *c) + { + memset (c,0,sizeof(*c)); + c->h[0]=0xc1059ed8UL; c->h[1]=0x367cd507UL; + c->h[2]=0x3070dd17UL; c->h[3]=0xf70e5939UL; + c->h[4]=0xffc00b31UL; c->h[5]=0x68581511UL; + c->h[6]=0x64f98fa7UL; c->h[7]=0xbefa4fa4UL; + c->md_len=SHA224_DIGEST_LENGTH; + return 1; + } + +int SHA256_Init(SHA256_CTX *c) + { + memset (c,0,sizeof(*c)); + c->h[0]=0x6a09e667UL; c->h[1]=0xbb67ae85UL; + c->h[2]=0x3c6ef372UL; c->h[3]=0xa54ff53aUL; + c->h[4]=0x510e527fUL; c->h[5]=0x9b05688cUL; + c->h[6]=0x1f83d9abUL; c->h[7]=0x5be0cd19UL; + c->md_len=SHA256_DIGEST_LENGTH; + return 1; + } + +unsigned char *SHA224(const unsigned char *d, size_t n, unsigned char *md) + { + SHA256_CTX c; + static unsigned char m[SHA224_DIGEST_LENGTH]; + + if (md == NULL) md=m; + SHA224_Init(&c); + SHA256_Update(&c,d,n); + SHA256_Final(md,&c); + explicit_bzero(&c,sizeof(c)); + return(md); + } + +unsigned char *SHA256(const unsigned char *d, size_t n, unsigned char *md) + { + SHA256_CTX c; + static unsigned char m[SHA256_DIGEST_LENGTH]; + + if (md == NULL) md=m; + SHA256_Init(&c); + SHA256_Update(&c,d,n); + SHA256_Final(md,&c); + explicit_bzero(&c,sizeof(c)); + return(md); + } + +int SHA224_Update(SHA256_CTX *c, const void *data, size_t len) +{ return SHA256_Update (c,data,len); } +int SHA224_Final (unsigned char *md, SHA256_CTX *c) +{ return SHA256_Final (md,c); } + +#define DATA_ORDER_IS_BIG_ENDIAN + +#define HASH_LONG SHA_LONG +#define HASH_CTX SHA256_CTX +#define HASH_CBLOCK SHA_CBLOCK +/* + * Note that FIPS180-2 discusses "Truncation of the Hash Function Output." + * default: case below covers for it. It's not clear however if it's + * permitted to truncate to amount of bytes not divisible by 4. I bet not, + * but if it is, then default: case shall be extended. For reference. + * Idea behind separate cases for pre-defined lengths is to let the + * compiler decide if it's appropriate to unroll small loops. + */ +#define HASH_MAKE_STRING(c,s) do { \ + unsigned long ll; \ + unsigned int nn; \ + switch ((c)->md_len) \ + { case SHA224_DIGEST_LENGTH: \ + for (nn=0;nnh[nn]; HOST_l2c(ll,(s)); } \ + break; \ + case SHA256_DIGEST_LENGTH: \ + for (nn=0;nnh[nn]; HOST_l2c(ll,(s)); } \ + break; \ + default: \ + if ((c)->md_len > SHA256_DIGEST_LENGTH) \ + return 0; \ + for (nn=0;nn<(c)->md_len/4;nn++) \ + { ll=(c)->h[nn]; HOST_l2c(ll,(s)); } \ + break; \ + } \ + } while (0) + +#define HASH_UPDATE SHA256_Update +#define HASH_TRANSFORM SHA256_Transform +#define HASH_FINAL SHA256_Final +#define HASH_BLOCK_DATA_ORDER sha256_block_data_order +#ifndef SHA256_ASM +static +#endif +void sha256_block_data_order (SHA256_CTX *ctx, const void *in, size_t num); + +#include "md32_common.h" + +#ifndef SHA256_ASM +static const SHA_LONG K256[64] = { + 0x428a2f98UL,0x71374491UL,0xb5c0fbcfUL,0xe9b5dba5UL, + 0x3956c25bUL,0x59f111f1UL,0x923f82a4UL,0xab1c5ed5UL, + 0xd807aa98UL,0x12835b01UL,0x243185beUL,0x550c7dc3UL, + 0x72be5d74UL,0x80deb1feUL,0x9bdc06a7UL,0xc19bf174UL, + 0xe49b69c1UL,0xefbe4786UL,0x0fc19dc6UL,0x240ca1ccUL, + 0x2de92c6fUL,0x4a7484aaUL,0x5cb0a9dcUL,0x76f988daUL, + 0x983e5152UL,0xa831c66dUL,0xb00327c8UL,0xbf597fc7UL, + 0xc6e00bf3UL,0xd5a79147UL,0x06ca6351UL,0x14292967UL, + 0x27b70a85UL,0x2e1b2138UL,0x4d2c6dfcUL,0x53380d13UL, + 0x650a7354UL,0x766a0abbUL,0x81c2c92eUL,0x92722c85UL, + 0xa2bfe8a1UL,0xa81a664bUL,0xc24b8b70UL,0xc76c51a3UL, + 0xd192e819UL,0xd6990624UL,0xf40e3585UL,0x106aa070UL, + 0x19a4c116UL,0x1e376c08UL,0x2748774cUL,0x34b0bcb5UL, + 0x391c0cb3UL,0x4ed8aa4aUL,0x5b9cca4fUL,0x682e6ff3UL, + 0x748f82eeUL,0x78a5636fUL,0x84c87814UL,0x8cc70208UL, + 0x90befffaUL,0xa4506cebUL,0xbef9a3f7UL,0xc67178f2UL }; + +/* + * FIPS specification refers to right rotations, while our ROTATE macro + * is left one. This is why you might notice that rotation coefficients + * differ from those observed in FIPS document by 32-N... + */ +#define Sigma0(x) (ROTATE((x),30) ^ ROTATE((x),19) ^ ROTATE((x),10)) +#define Sigma1(x) (ROTATE((x),26) ^ ROTATE((x),21) ^ ROTATE((x),7)) +#define sigma0(x) (ROTATE((x),25) ^ ROTATE((x),14) ^ ((x)>>3)) +#define sigma1(x) (ROTATE((x),15) ^ ROTATE((x),13) ^ ((x)>>10)) + +#define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z))) +#define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) + +#ifdef OPENSSL_SMALL_FOOTPRINT + +static void sha256_block_data_order (SHA256_CTX *ctx, const void *in, size_t num) + { + unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1,T2; + SHA_LONG X[16],l; + int i; + const unsigned char *data=in; + + while (num--) { + + a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3]; + e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7]; + + for (i=0;i<16;i++) + { + HOST_c2l(data,l); T1 = X[i] = l; + T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i]; + T2 = Sigma0(a) + Maj(a,b,c); + h = g; g = f; f = e; e = d + T1; + d = c; c = b; b = a; a = T1 + T2; + } + + for (;i<64;i++) + { + s0 = X[(i+1)&0x0f]; s0 = sigma0(s0); + s1 = X[(i+14)&0x0f]; s1 = sigma1(s1); + + T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf]; + T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i]; + T2 = Sigma0(a) + Maj(a,b,c); + h = g; g = f; f = e; e = d + T1; + d = c; c = b; b = a; a = T1 + T2; + } + + ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d; + ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h; + + } +} + +#else + +#define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \ + T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i]; \ + h = Sigma0(a) + Maj(a,b,c); \ + d += T1; h += T1; } while (0) + +#define ROUND_16_63(i,a,b,c,d,e,f,g,h,X) do { \ + s0 = X[(i+1)&0x0f]; s0 = sigma0(s0); \ + s1 = X[(i+14)&0x0f]; s1 = sigma1(s1); \ + T1 = X[(i)&0x0f] += s0 + s1 + X[(i+9)&0x0f]; \ + ROUND_00_15(i,a,b,c,d,e,f,g,h); } while (0) + +static void sha256_block_data_order (SHA256_CTX *ctx, const void *in, size_t num) + { + unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1; + SHA_LONG X[16]; + int i; + const unsigned char *data=in; + + while (num--) { + + a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3]; + e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7]; + + if (BYTE_ORDER != LITTLE_ENDIAN && + sizeof(SHA_LONG)==4 && ((size_t)in%4)==0) + { + const SHA_LONG *W=(const SHA_LONG *)data; + + T1 = X[0] = W[0]; ROUND_00_15(0,a,b,c,d,e,f,g,h); + T1 = X[1] = W[1]; ROUND_00_15(1,h,a,b,c,d,e,f,g); + T1 = X[2] = W[2]; ROUND_00_15(2,g,h,a,b,c,d,e,f); + T1 = X[3] = W[3]; ROUND_00_15(3,f,g,h,a,b,c,d,e); + T1 = X[4] = W[4]; ROUND_00_15(4,e,f,g,h,a,b,c,d); + T1 = X[5] = W[5]; ROUND_00_15(5,d,e,f,g,h,a,b,c); + T1 = X[6] = W[6]; ROUND_00_15(6,c,d,e,f,g,h,a,b); + T1 = X[7] = W[7]; ROUND_00_15(7,b,c,d,e,f,g,h,a); + T1 = X[8] = W[8]; ROUND_00_15(8,a,b,c,d,e,f,g,h); + T1 = X[9] = W[9]; ROUND_00_15(9,h,a,b,c,d,e,f,g); + T1 = X[10] = W[10]; ROUND_00_15(10,g,h,a,b,c,d,e,f); + T1 = X[11] = W[11]; ROUND_00_15(11,f,g,h,a,b,c,d,e); + T1 = X[12] = W[12]; ROUND_00_15(12,e,f,g,h,a,b,c,d); + T1 = X[13] = W[13]; ROUND_00_15(13,d,e,f,g,h,a,b,c); + T1 = X[14] = W[14]; ROUND_00_15(14,c,d,e,f,g,h,a,b); + T1 = X[15] = W[15]; ROUND_00_15(15,b,c,d,e,f,g,h,a); + + data += SHA256_CBLOCK; + } + else + { + SHA_LONG l; + + HOST_c2l(data,l); T1 = X[0] = l; ROUND_00_15(0,a,b,c,d,e,f,g,h); + HOST_c2l(data,l); T1 = X[1] = l; ROUND_00_15(1,h,a,b,c,d,e,f,g); + HOST_c2l(data,l); T1 = X[2] = l; ROUND_00_15(2,g,h,a,b,c,d,e,f); + HOST_c2l(data,l); T1 = X[3] = l; ROUND_00_15(3,f,g,h,a,b,c,d,e); + HOST_c2l(data,l); T1 = X[4] = l; ROUND_00_15(4,e,f,g,h,a,b,c,d); + HOST_c2l(data,l); T1 = X[5] = l; ROUND_00_15(5,d,e,f,g,h,a,b,c); + HOST_c2l(data,l); T1 = X[6] = l; ROUND_00_15(6,c,d,e,f,g,h,a,b); + HOST_c2l(data,l); T1 = X[7] = l; ROUND_00_15(7,b,c,d,e,f,g,h,a); + HOST_c2l(data,l); T1 = X[8] = l; ROUND_00_15(8,a,b,c,d,e,f,g,h); + HOST_c2l(data,l); T1 = X[9] = l; ROUND_00_15(9,h,a,b,c,d,e,f,g); + HOST_c2l(data,l); T1 = X[10] = l; ROUND_00_15(10,g,h,a,b,c,d,e,f); + HOST_c2l(data,l); T1 = X[11] = l; ROUND_00_15(11,f,g,h,a,b,c,d,e); + HOST_c2l(data,l); T1 = X[12] = l; ROUND_00_15(12,e,f,g,h,a,b,c,d); + HOST_c2l(data,l); T1 = X[13] = l; ROUND_00_15(13,d,e,f,g,h,a,b,c); + HOST_c2l(data,l); T1 = X[14] = l; ROUND_00_15(14,c,d,e,f,g,h,a,b); + HOST_c2l(data,l); T1 = X[15] = l; ROUND_00_15(15,b,c,d,e,f,g,h,a); + } + + for (i=16;i<64;i+=8) + { + ROUND_16_63(i+0,a,b,c,d,e,f,g,h,X); + ROUND_16_63(i+1,h,a,b,c,d,e,f,g,X); + ROUND_16_63(i+2,g,h,a,b,c,d,e,f,X); + ROUND_16_63(i+3,f,g,h,a,b,c,d,e,X); + ROUND_16_63(i+4,e,f,g,h,a,b,c,d,X); + ROUND_16_63(i+5,d,e,f,g,h,a,b,c,X); + ROUND_16_63(i+6,c,d,e,f,g,h,a,b,X); + ROUND_16_63(i+7,b,c,d,e,f,g,h,a,X); + } + + ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d; + ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h; + + } + } + +#endif +#endif /* SHA256_ASM */ + +#endif /* OPENSSL_NO_SHA256 */ diff --git a/ext/libressl/crypto/sha/sha512-elf-armv4.S b/ext/libressl/crypto/sha/sha512-elf-armv4.S new file mode 100644 index 0000000..8abf8d5 --- /dev/null +++ b/ext/libressl/crypto/sha/sha512-elf-armv4.S @@ -0,0 +1,1786 @@ +#include "arm_arch.h" +#ifdef __ARMEL__ +# define LO 0 +# define HI 4 +# define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1 +#else +# define HI 0 +# define LO 4 +# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1 +#endif + +.text +.code 32 +.type K512,%object +.align 5 +K512: +WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd) +WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc) +WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019) +WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118) +WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe) +WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2) +WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1) +WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694) +WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3) +WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65) +WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483) +WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5) +WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210) +WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4) +WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725) +WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70) +WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926) +WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df) +WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8) +WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b) +WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001) +WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30) +WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910) +WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8) +WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53) +WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8) +WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb) +WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3) +WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60) +WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec) +WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9) +WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b) +WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207) +WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178) +WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6) +WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b) +WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493) +WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c) +WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a) +WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) +.size K512,.-K512 +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha512_block_data_order +.skip 32-4 + +.global sha512_block_data_order +.type sha512_block_data_order,%function +sha512_block_data_order: + sub r3,pc,#8 @ sha512_block_data_order + add r2,r1,r2,lsl#7 @ len to point at the end of inp +#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P + tst r12,#1 + bne .LNEON +#endif + stmdb sp!,{r4-r12,lr} + sub r14,r3,#672 @ K512 + sub sp,sp,#9*8 + + ldr r7,[r0,#32+LO] + ldr r8,[r0,#32+HI] + ldr r9, [r0,#48+LO] + ldr r10, [r0,#48+HI] + ldr r11, [r0,#56+LO] + ldr r12, [r0,#56+HI] +.Loop: + str r9, [sp,#48+0] + str r10, [sp,#48+4] + str r11, [sp,#56+0] + str r12, [sp,#56+4] + ldr r5,[r0,#0+LO] + ldr r6,[r0,#0+HI] + ldr r3,[r0,#8+LO] + ldr r4,[r0,#8+HI] + ldr r9, [r0,#16+LO] + ldr r10, [r0,#16+HI] + ldr r11, [r0,#24+LO] + ldr r12, [r0,#24+HI] + str r3,[sp,#8+0] + str r4,[sp,#8+4] + str r9, [sp,#16+0] + str r10, [sp,#16+4] + str r11, [sp,#24+0] + str r12, [sp,#24+4] + ldr r3,[r0,#40+LO] + ldr r4,[r0,#40+HI] + str r3,[sp,#40+0] + str r4,[sp,#40+4] + +.L00_15: +#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT) + ldrb r3,[r1,#7] + ldrb r9, [r1,#6] + ldrb r10, [r1,#5] + ldrb r11, [r1,#4] + ldrb r4,[r1,#3] + ldrb r12, [r1,#2] + orr r3,r3,r9,lsl#8 + ldrb r9, [r1,#1] + orr r3,r3,r10,lsl#16 + ldrb r10, [r1],#8 + orr r3,r3,r11,lsl#24 + orr r4,r4,r12,lsl#8 + orr r4,r4,r9,lsl#16 + orr r4,r4,r10,lsl#24 +#else + ldr r3,[r1,#4] + ldr r4,[r1],#8 +#ifdef __ARMEL__ + rev r3,r3 + rev r4,r4 +#endif +#endif + @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) + @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 + @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 + mov r9,r7,lsr#14 + str r3,[sp,#64+0] + mov r10,r8,lsr#14 + str r4,[sp,#64+4] + eor r9,r9,r8,lsl#18 + ldr r11,[sp,#56+0] @ h.lo + eor r10,r10,r7,lsl#18 + ldr r12,[sp,#56+4] @ h.hi + eor r9,r9,r7,lsr#18 + eor r10,r10,r8,lsr#18 + eor r9,r9,r8,lsl#14 + eor r10,r10,r7,lsl#14 + eor r9,r9,r8,lsr#9 + eor r10,r10,r7,lsr#9 + eor r9,r9,r7,lsl#23 + eor r10,r10,r8,lsl#23 @ Sigma1(e) + adds r3,r3,r9 + ldr r9,[sp,#40+0] @ f.lo + adc r4,r4,r10 @ T += Sigma1(e) + ldr r10,[sp,#40+4] @ f.hi + adds r3,r3,r11 + ldr r11,[sp,#48+0] @ g.lo + adc r4,r4,r12 @ T += h + ldr r12,[sp,#48+4] @ g.hi + + eor r9,r9,r11 + str r7,[sp,#32+0] + eor r10,r10,r12 + str r8,[sp,#32+4] + and r9,r9,r7 + str r5,[sp,#0+0] + and r10,r10,r8 + str r6,[sp,#0+4] + eor r9,r9,r11 + ldr r11,[r14,#LO] @ K[i].lo + eor r10,r10,r12 @ Ch(e,f,g) + ldr r12,[r14,#HI] @ K[i].hi + + adds r3,r3,r9 + ldr r7,[sp,#24+0] @ d.lo + adc r4,r4,r10 @ T += Ch(e,f,g) + ldr r8,[sp,#24+4] @ d.hi + adds r3,r3,r11 + and r9,r11,#0xff + adc r4,r4,r12 @ T += K[i] + adds r7,r7,r3 + ldr r11,[sp,#8+0] @ b.lo + adc r8,r8,r4 @ d += T + teq r9,#148 + + ldr r12,[sp,#16+0] @ c.lo + orreq r14,r14,#1 + @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) + @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 + @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 + mov r9,r5,lsr#28 + mov r10,r6,lsr#28 + eor r9,r9,r6,lsl#4 + eor r10,r10,r5,lsl#4 + eor r9,r9,r6,lsr#2 + eor r10,r10,r5,lsr#2 + eor r9,r9,r5,lsl#30 + eor r10,r10,r6,lsl#30 + eor r9,r9,r6,lsr#7 + eor r10,r10,r5,lsr#7 + eor r9,r9,r5,lsl#25 + eor r10,r10,r6,lsl#25 @ Sigma0(a) + adds r3,r3,r9 + and r9,r5,r11 + adc r4,r4,r10 @ T += Sigma0(a) + + ldr r10,[sp,#8+4] @ b.hi + orr r5,r5,r11 + ldr r11,[sp,#16+4] @ c.hi + and r5,r5,r12 + and r12,r6,r10 + orr r6,r6,r10 + orr r5,r5,r9 @ Maj(a,b,c).lo + and r6,r6,r11 + adds r5,r5,r3 + orr r6,r6,r12 @ Maj(a,b,c).hi + sub sp,sp,#8 + adc r6,r6,r4 @ h += T + tst r14,#1 + add r14,r14,#8 + tst r14,#1 + beq .L00_15 + ldr r9,[sp,#184+0] + ldr r10,[sp,#184+4] + bic r14,r14,#1 +.L16_79: + @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) + @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25 + @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7 + mov r3,r9,lsr#1 + ldr r11,[sp,#80+0] + mov r4,r10,lsr#1 + ldr r12,[sp,#80+4] + eor r3,r3,r10,lsl#31 + eor r4,r4,r9,lsl#31 + eor r3,r3,r9,lsr#8 + eor r4,r4,r10,lsr#8 + eor r3,r3,r10,lsl#24 + eor r4,r4,r9,lsl#24 + eor r3,r3,r9,lsr#7 + eor r4,r4,r10,lsr#7 + eor r3,r3,r10,lsl#25 + + @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) + @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26 + @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6 + mov r9,r11,lsr#19 + mov r10,r12,lsr#19 + eor r9,r9,r12,lsl#13 + eor r10,r10,r11,lsl#13 + eor r9,r9,r12,lsr#29 + eor r10,r10,r11,lsr#29 + eor r9,r9,r11,lsl#3 + eor r10,r10,r12,lsl#3 + eor r9,r9,r11,lsr#6 + eor r10,r10,r12,lsr#6 + ldr r11,[sp,#120+0] + eor r9,r9,r12,lsl#26 + + ldr r12,[sp,#120+4] + adds r3,r3,r9 + ldr r9,[sp,#192+0] + adc r4,r4,r10 + + ldr r10,[sp,#192+4] + adds r3,r3,r11 + adc r4,r4,r12 + adds r3,r3,r9 + adc r4,r4,r10 + @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) + @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 + @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 + mov r9,r7,lsr#14 + str r3,[sp,#64+0] + mov r10,r8,lsr#14 + str r4,[sp,#64+4] + eor r9,r9,r8,lsl#18 + ldr r11,[sp,#56+0] @ h.lo + eor r10,r10,r7,lsl#18 + ldr r12,[sp,#56+4] @ h.hi + eor r9,r9,r7,lsr#18 + eor r10,r10,r8,lsr#18 + eor r9,r9,r8,lsl#14 + eor r10,r10,r7,lsl#14 + eor r9,r9,r8,lsr#9 + eor r10,r10,r7,lsr#9 + eor r9,r9,r7,lsl#23 + eor r10,r10,r8,lsl#23 @ Sigma1(e) + adds r3,r3,r9 + ldr r9,[sp,#40+0] @ f.lo + adc r4,r4,r10 @ T += Sigma1(e) + ldr r10,[sp,#40+4] @ f.hi + adds r3,r3,r11 + ldr r11,[sp,#48+0] @ g.lo + adc r4,r4,r12 @ T += h + ldr r12,[sp,#48+4] @ g.hi + + eor r9,r9,r11 + str r7,[sp,#32+0] + eor r10,r10,r12 + str r8,[sp,#32+4] + and r9,r9,r7 + str r5,[sp,#0+0] + and r10,r10,r8 + str r6,[sp,#0+4] + eor r9,r9,r11 + ldr r11,[r14,#LO] @ K[i].lo + eor r10,r10,r12 @ Ch(e,f,g) + ldr r12,[r14,#HI] @ K[i].hi + + adds r3,r3,r9 + ldr r7,[sp,#24+0] @ d.lo + adc r4,r4,r10 @ T += Ch(e,f,g) + ldr r8,[sp,#24+4] @ d.hi + adds r3,r3,r11 + and r9,r11,#0xff + adc r4,r4,r12 @ T += K[i] + adds r7,r7,r3 + ldr r11,[sp,#8+0] @ b.lo + adc r8,r8,r4 @ d += T + teq r9,#23 + + ldr r12,[sp,#16+0] @ c.lo + orreq r14,r14,#1 + @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) + @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 + @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 + mov r9,r5,lsr#28 + mov r10,r6,lsr#28 + eor r9,r9,r6,lsl#4 + eor r10,r10,r5,lsl#4 + eor r9,r9,r6,lsr#2 + eor r10,r10,r5,lsr#2 + eor r9,r9,r5,lsl#30 + eor r10,r10,r6,lsl#30 + eor r9,r9,r6,lsr#7 + eor r10,r10,r5,lsr#7 + eor r9,r9,r5,lsl#25 + eor r10,r10,r6,lsl#25 @ Sigma0(a) + adds r3,r3,r9 + and r9,r5,r11 + adc r4,r4,r10 @ T += Sigma0(a) + + ldr r10,[sp,#8+4] @ b.hi + orr r5,r5,r11 + ldr r11,[sp,#16+4] @ c.hi + and r5,r5,r12 + and r12,r6,r10 + orr r6,r6,r10 + orr r5,r5,r9 @ Maj(a,b,c).lo + and r6,r6,r11 + adds r5,r5,r3 + orr r6,r6,r12 @ Maj(a,b,c).hi + sub sp,sp,#8 + adc r6,r6,r4 @ h += T + tst r14,#1 + add r14,r14,#8 + ldreq r9,[sp,#184+0] + ldreq r10,[sp,#184+4] + beq .L16_79 + bic r14,r14,#1 + + ldr r3,[sp,#8+0] + ldr r4,[sp,#8+4] + ldr r9, [r0,#0+LO] + ldr r10, [r0,#0+HI] + ldr r11, [r0,#8+LO] + ldr r12, [r0,#8+HI] + adds r9,r5,r9 + str r9, [r0,#0+LO] + adc r10,r6,r10 + str r10, [r0,#0+HI] + adds r11,r3,r11 + str r11, [r0,#8+LO] + adc r12,r4,r12 + str r12, [r0,#8+HI] + + ldr r5,[sp,#16+0] + ldr r6,[sp,#16+4] + ldr r3,[sp,#24+0] + ldr r4,[sp,#24+4] + ldr r9, [r0,#16+LO] + ldr r10, [r0,#16+HI] + ldr r11, [r0,#24+LO] + ldr r12, [r0,#24+HI] + adds r9,r5,r9 + str r9, [r0,#16+LO] + adc r10,r6,r10 + str r10, [r0,#16+HI] + adds r11,r3,r11 + str r11, [r0,#24+LO] + adc r12,r4,r12 + str r12, [r0,#24+HI] + + ldr r3,[sp,#40+0] + ldr r4,[sp,#40+4] + ldr r9, [r0,#32+LO] + ldr r10, [r0,#32+HI] + ldr r11, [r0,#40+LO] + ldr r12, [r0,#40+HI] + adds r7,r7,r9 + str r7,[r0,#32+LO] + adc r8,r8,r10 + str r8,[r0,#32+HI] + adds r11,r3,r11 + str r11, [r0,#40+LO] + adc r12,r4,r12 + str r12, [r0,#40+HI] + + ldr r5,[sp,#48+0] + ldr r6,[sp,#48+4] + ldr r3,[sp,#56+0] + ldr r4,[sp,#56+4] + ldr r9, [r0,#48+LO] + ldr r10, [r0,#48+HI] + ldr r11, [r0,#56+LO] + ldr r12, [r0,#56+HI] + adds r9,r5,r9 + str r9, [r0,#48+LO] + adc r10,r6,r10 + str r10, [r0,#48+HI] + adds r11,r3,r11 + str r11, [r0,#56+LO] + adc r12,r4,r12 + str r12, [r0,#56+HI] + + add sp,sp,#640 + sub r14,r14,#640 + + teq r1,r2 + bne .Loop + + add sp,sp,#8*9 @ destroy frame +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else + ldmia sp!,{r4-r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT) +.fpu neon + +.align 4 +.LNEON: + dmb @ errata #451034 on early Cortex A8 + vstmdb sp!,{d8-d15} @ ABI specification says so + sub r3,r3,#672 @ K512 + vldmia r0,{d16-d23} @ load context +.Loop_neon: + vshr.u64 d24,d20,#14 @ 0 +#if 0<16 + vld1.64 {d0},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d20,#18 + vshr.u64 d26,d20,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vsli.64 d26,d20,#23 +#if 0<16 && defined(__ARMEL__) + vrev64.8 d0,d0 +#endif + vadd.i64 d27,d28,d23 + veor d29,d21,d22 + veor d24,d25 + vand d29,d20 + veor d24,d26 @ Sigma1(e) + veor d29,d22 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d16,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d16,#34 + vshr.u64 d26,d16,#39 + vsli.64 d24,d16,#36 + vsli.64 d25,d16,#30 + vsli.64 d26,d16,#25 + vadd.i64 d27,d0 + vorr d30,d16,d18 + vand d29,d16,d18 + veor d23,d24,d25 + vand d30,d17 + veor d23,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d23,d27 + vadd.i64 d19,d27 + vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 1 +#if 1<16 + vld1.64 {d1},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vsli.64 d26,d19,#23 +#if 1<16 && defined(__ARMEL__) + vrev64.8 d1,d1 +#endif + vadd.i64 d27,d28,d22 + veor d29,d20,d21 + veor d24,d25 + vand d29,d19 + veor d24,d26 @ Sigma1(e) + veor d29,d21 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d23,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d23,#34 + vshr.u64 d26,d23,#39 + vsli.64 d24,d23,#36 + vsli.64 d25,d23,#30 + vsli.64 d26,d23,#25 + vadd.i64 d27,d1 + vorr d30,d23,d17 + vand d29,d23,d17 + veor d22,d24,d25 + vand d30,d16 + veor d22,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d22,d27 + vadd.i64 d18,d27 + vadd.i64 d22,d30 + vshr.u64 d24,d18,#14 @ 2 +#if 2<16 + vld1.64 {d2},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d18,#18 + vshr.u64 d26,d18,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vsli.64 d26,d18,#23 +#if 2<16 && defined(__ARMEL__) + vrev64.8 d2,d2 +#endif + vadd.i64 d27,d28,d21 + veor d29,d19,d20 + veor d24,d25 + vand d29,d18 + veor d24,d26 @ Sigma1(e) + veor d29,d20 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d22,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d22,#34 + vshr.u64 d26,d22,#39 + vsli.64 d24,d22,#36 + vsli.64 d25,d22,#30 + vsli.64 d26,d22,#25 + vadd.i64 d27,d2 + vorr d30,d22,d16 + vand d29,d22,d16 + veor d21,d24,d25 + vand d30,d23 + veor d21,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d21,d27 + vadd.i64 d17,d27 + vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 3 +#if 3<16 + vld1.64 {d3},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vsli.64 d26,d17,#23 +#if 3<16 && defined(__ARMEL__) + vrev64.8 d3,d3 +#endif + vadd.i64 d27,d28,d20 + veor d29,d18,d19 + veor d24,d25 + vand d29,d17 + veor d24,d26 @ Sigma1(e) + veor d29,d19 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d21,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d21,#34 + vshr.u64 d26,d21,#39 + vsli.64 d24,d21,#36 + vsli.64 d25,d21,#30 + vsli.64 d26,d21,#25 + vadd.i64 d27,d3 + vorr d30,d21,d23 + vand d29,d21,d23 + veor d20,d24,d25 + vand d30,d22 + veor d20,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d20,d27 + vadd.i64 d16,d27 + vadd.i64 d20,d30 + vshr.u64 d24,d16,#14 @ 4 +#if 4<16 + vld1.64 {d4},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d16,#18 + vshr.u64 d26,d16,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vsli.64 d26,d16,#23 +#if 4<16 && defined(__ARMEL__) + vrev64.8 d4,d4 +#endif + vadd.i64 d27,d28,d19 + veor d29,d17,d18 + veor d24,d25 + vand d29,d16 + veor d24,d26 @ Sigma1(e) + veor d29,d18 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d20,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d20,#34 + vshr.u64 d26,d20,#39 + vsli.64 d24,d20,#36 + vsli.64 d25,d20,#30 + vsli.64 d26,d20,#25 + vadd.i64 d27,d4 + vorr d30,d20,d22 + vand d29,d20,d22 + veor d19,d24,d25 + vand d30,d21 + veor d19,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d19,d27 + vadd.i64 d23,d27 + vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 5 +#if 5<16 + vld1.64 {d5},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vsli.64 d26,d23,#23 +#if 5<16 && defined(__ARMEL__) + vrev64.8 d5,d5 +#endif + vadd.i64 d27,d28,d18 + veor d29,d16,d17 + veor d24,d25 + vand d29,d23 + veor d24,d26 @ Sigma1(e) + veor d29,d17 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d19,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d19,#34 + vshr.u64 d26,d19,#39 + vsli.64 d24,d19,#36 + vsli.64 d25,d19,#30 + vsli.64 d26,d19,#25 + vadd.i64 d27,d5 + vorr d30,d19,d21 + vand d29,d19,d21 + veor d18,d24,d25 + vand d30,d20 + veor d18,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d18,d27 + vadd.i64 d22,d27 + vadd.i64 d18,d30 + vshr.u64 d24,d22,#14 @ 6 +#if 6<16 + vld1.64 {d6},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d22,#18 + vshr.u64 d26,d22,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vsli.64 d26,d22,#23 +#if 6<16 && defined(__ARMEL__) + vrev64.8 d6,d6 +#endif + vadd.i64 d27,d28,d17 + veor d29,d23,d16 + veor d24,d25 + vand d29,d22 + veor d24,d26 @ Sigma1(e) + veor d29,d16 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d18,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d18,#34 + vshr.u64 d26,d18,#39 + vsli.64 d24,d18,#36 + vsli.64 d25,d18,#30 + vsli.64 d26,d18,#25 + vadd.i64 d27,d6 + vorr d30,d18,d20 + vand d29,d18,d20 + veor d17,d24,d25 + vand d30,d19 + veor d17,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d17,d27 + vadd.i64 d21,d27 + vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 7 +#if 7<16 + vld1.64 {d7},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vsli.64 d26,d21,#23 +#if 7<16 && defined(__ARMEL__) + vrev64.8 d7,d7 +#endif + vadd.i64 d27,d28,d16 + veor d29,d22,d23 + veor d24,d25 + vand d29,d21 + veor d24,d26 @ Sigma1(e) + veor d29,d23 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d17,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d17,#34 + vshr.u64 d26,d17,#39 + vsli.64 d24,d17,#36 + vsli.64 d25,d17,#30 + vsli.64 d26,d17,#25 + vadd.i64 d27,d7 + vorr d30,d17,d19 + vand d29,d17,d19 + veor d16,d24,d25 + vand d30,d18 + veor d16,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d16,d27 + vadd.i64 d20,d27 + vadd.i64 d16,d30 + vshr.u64 d24,d20,#14 @ 8 +#if 8<16 + vld1.64 {d8},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d20,#18 + vshr.u64 d26,d20,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vsli.64 d26,d20,#23 +#if 8<16 && defined(__ARMEL__) + vrev64.8 d8,d8 +#endif + vadd.i64 d27,d28,d23 + veor d29,d21,d22 + veor d24,d25 + vand d29,d20 + veor d24,d26 @ Sigma1(e) + veor d29,d22 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d16,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d16,#34 + vshr.u64 d26,d16,#39 + vsli.64 d24,d16,#36 + vsli.64 d25,d16,#30 + vsli.64 d26,d16,#25 + vadd.i64 d27,d8 + vorr d30,d16,d18 + vand d29,d16,d18 + veor d23,d24,d25 + vand d30,d17 + veor d23,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d23,d27 + vadd.i64 d19,d27 + vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 9 +#if 9<16 + vld1.64 {d9},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vsli.64 d26,d19,#23 +#if 9<16 && defined(__ARMEL__) + vrev64.8 d9,d9 +#endif + vadd.i64 d27,d28,d22 + veor d29,d20,d21 + veor d24,d25 + vand d29,d19 + veor d24,d26 @ Sigma1(e) + veor d29,d21 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d23,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d23,#34 + vshr.u64 d26,d23,#39 + vsli.64 d24,d23,#36 + vsli.64 d25,d23,#30 + vsli.64 d26,d23,#25 + vadd.i64 d27,d9 + vorr d30,d23,d17 + vand d29,d23,d17 + veor d22,d24,d25 + vand d30,d16 + veor d22,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d22,d27 + vadd.i64 d18,d27 + vadd.i64 d22,d30 + vshr.u64 d24,d18,#14 @ 10 +#if 10<16 + vld1.64 {d10},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d18,#18 + vshr.u64 d26,d18,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vsli.64 d26,d18,#23 +#if 10<16 && defined(__ARMEL__) + vrev64.8 d10,d10 +#endif + vadd.i64 d27,d28,d21 + veor d29,d19,d20 + veor d24,d25 + vand d29,d18 + veor d24,d26 @ Sigma1(e) + veor d29,d20 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d22,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d22,#34 + vshr.u64 d26,d22,#39 + vsli.64 d24,d22,#36 + vsli.64 d25,d22,#30 + vsli.64 d26,d22,#25 + vadd.i64 d27,d10 + vorr d30,d22,d16 + vand d29,d22,d16 + veor d21,d24,d25 + vand d30,d23 + veor d21,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d21,d27 + vadd.i64 d17,d27 + vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 11 +#if 11<16 + vld1.64 {d11},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vsli.64 d26,d17,#23 +#if 11<16 && defined(__ARMEL__) + vrev64.8 d11,d11 +#endif + vadd.i64 d27,d28,d20 + veor d29,d18,d19 + veor d24,d25 + vand d29,d17 + veor d24,d26 @ Sigma1(e) + veor d29,d19 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d21,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d21,#34 + vshr.u64 d26,d21,#39 + vsli.64 d24,d21,#36 + vsli.64 d25,d21,#30 + vsli.64 d26,d21,#25 + vadd.i64 d27,d11 + vorr d30,d21,d23 + vand d29,d21,d23 + veor d20,d24,d25 + vand d30,d22 + veor d20,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d20,d27 + vadd.i64 d16,d27 + vadd.i64 d20,d30 + vshr.u64 d24,d16,#14 @ 12 +#if 12<16 + vld1.64 {d12},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d16,#18 + vshr.u64 d26,d16,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vsli.64 d26,d16,#23 +#if 12<16 && defined(__ARMEL__) + vrev64.8 d12,d12 +#endif + vadd.i64 d27,d28,d19 + veor d29,d17,d18 + veor d24,d25 + vand d29,d16 + veor d24,d26 @ Sigma1(e) + veor d29,d18 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d20,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d20,#34 + vshr.u64 d26,d20,#39 + vsli.64 d24,d20,#36 + vsli.64 d25,d20,#30 + vsli.64 d26,d20,#25 + vadd.i64 d27,d12 + vorr d30,d20,d22 + vand d29,d20,d22 + veor d19,d24,d25 + vand d30,d21 + veor d19,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d19,d27 + vadd.i64 d23,d27 + vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 13 +#if 13<16 + vld1.64 {d13},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vsli.64 d26,d23,#23 +#if 13<16 && defined(__ARMEL__) + vrev64.8 d13,d13 +#endif + vadd.i64 d27,d28,d18 + veor d29,d16,d17 + veor d24,d25 + vand d29,d23 + veor d24,d26 @ Sigma1(e) + veor d29,d17 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d19,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d19,#34 + vshr.u64 d26,d19,#39 + vsli.64 d24,d19,#36 + vsli.64 d25,d19,#30 + vsli.64 d26,d19,#25 + vadd.i64 d27,d13 + vorr d30,d19,d21 + vand d29,d19,d21 + veor d18,d24,d25 + vand d30,d20 + veor d18,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d18,d27 + vadd.i64 d22,d27 + vadd.i64 d18,d30 + vshr.u64 d24,d22,#14 @ 14 +#if 14<16 + vld1.64 {d14},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d22,#18 + vshr.u64 d26,d22,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vsli.64 d26,d22,#23 +#if 14<16 && defined(__ARMEL__) + vrev64.8 d14,d14 +#endif + vadd.i64 d27,d28,d17 + veor d29,d23,d16 + veor d24,d25 + vand d29,d22 + veor d24,d26 @ Sigma1(e) + veor d29,d16 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d18,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d18,#34 + vshr.u64 d26,d18,#39 + vsli.64 d24,d18,#36 + vsli.64 d25,d18,#30 + vsli.64 d26,d18,#25 + vadd.i64 d27,d14 + vorr d30,d18,d20 + vand d29,d18,d20 + veor d17,d24,d25 + vand d30,d19 + veor d17,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d17,d27 + vadd.i64 d21,d27 + vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 15 +#if 15<16 + vld1.64 {d15},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vsli.64 d26,d21,#23 +#if 15<16 && defined(__ARMEL__) + vrev64.8 d15,d15 +#endif + vadd.i64 d27,d28,d16 + veor d29,d22,d23 + veor d24,d25 + vand d29,d21 + veor d24,d26 @ Sigma1(e) + veor d29,d23 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d17,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d17,#34 + vshr.u64 d26,d17,#39 + vsli.64 d24,d17,#36 + vsli.64 d25,d17,#30 + vsli.64 d26,d17,#25 + vadd.i64 d27,d15 + vorr d30,d17,d19 + vand d29,d17,d19 + veor d16,d24,d25 + vand d30,d18 + veor d16,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d16,d27 + vadd.i64 d20,d27 + vadd.i64 d16,d30 + mov r12,#4 +.L16_79_neon: + subs r12,#1 + vshr.u64 q12,q7,#19 + vshr.u64 q13,q7,#61 + vshr.u64 q15,q7,#6 + vsli.64 q12,q7,#45 + vext.8 q14,q0,q1,#8 @ X[i+1] + vsli.64 q13,q7,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q0,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q4,q5,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d20,#14 @ from NEON_00_15 + vadd.i64 q0,q14 + vshr.u64 d25,d20,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d20,#41 @ from NEON_00_15 + vadd.i64 q0,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vsli.64 d26,d20,#23 +#if 16<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d23 + veor d29,d21,d22 + veor d24,d25 + vand d29,d20 + veor d24,d26 @ Sigma1(e) + veor d29,d22 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d16,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d16,#34 + vshr.u64 d26,d16,#39 + vsli.64 d24,d16,#36 + vsli.64 d25,d16,#30 + vsli.64 d26,d16,#25 + vadd.i64 d27,d0 + vorr d30,d16,d18 + vand d29,d16,d18 + veor d23,d24,d25 + vand d30,d17 + veor d23,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d23,d27 + vadd.i64 d19,d27 + vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 17 +#if 17<16 + vld1.64 {d1},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vsli.64 d26,d19,#23 +#if 17<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d22 + veor d29,d20,d21 + veor d24,d25 + vand d29,d19 + veor d24,d26 @ Sigma1(e) + veor d29,d21 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d23,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d23,#34 + vshr.u64 d26,d23,#39 + vsli.64 d24,d23,#36 + vsli.64 d25,d23,#30 + vsli.64 d26,d23,#25 + vadd.i64 d27,d1 + vorr d30,d23,d17 + vand d29,d23,d17 + veor d22,d24,d25 + vand d30,d16 + veor d22,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d22,d27 + vadd.i64 d18,d27 + vadd.i64 d22,d30 + vshr.u64 q12,q0,#19 + vshr.u64 q13,q0,#61 + vshr.u64 q15,q0,#6 + vsli.64 q12,q0,#45 + vext.8 q14,q1,q2,#8 @ X[i+1] + vsli.64 q13,q0,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q1,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q5,q6,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d18,#14 @ from NEON_00_15 + vadd.i64 q1,q14 + vshr.u64 d25,d18,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d18,#41 @ from NEON_00_15 + vadd.i64 q1,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vsli.64 d26,d18,#23 +#if 18<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d21 + veor d29,d19,d20 + veor d24,d25 + vand d29,d18 + veor d24,d26 @ Sigma1(e) + veor d29,d20 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d22,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d22,#34 + vshr.u64 d26,d22,#39 + vsli.64 d24,d22,#36 + vsli.64 d25,d22,#30 + vsli.64 d26,d22,#25 + vadd.i64 d27,d2 + vorr d30,d22,d16 + vand d29,d22,d16 + veor d21,d24,d25 + vand d30,d23 + veor d21,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d21,d27 + vadd.i64 d17,d27 + vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 19 +#if 19<16 + vld1.64 {d3},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vsli.64 d26,d17,#23 +#if 19<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d20 + veor d29,d18,d19 + veor d24,d25 + vand d29,d17 + veor d24,d26 @ Sigma1(e) + veor d29,d19 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d21,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d21,#34 + vshr.u64 d26,d21,#39 + vsli.64 d24,d21,#36 + vsli.64 d25,d21,#30 + vsli.64 d26,d21,#25 + vadd.i64 d27,d3 + vorr d30,d21,d23 + vand d29,d21,d23 + veor d20,d24,d25 + vand d30,d22 + veor d20,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d20,d27 + vadd.i64 d16,d27 + vadd.i64 d20,d30 + vshr.u64 q12,q1,#19 + vshr.u64 q13,q1,#61 + vshr.u64 q15,q1,#6 + vsli.64 q12,q1,#45 + vext.8 q14,q2,q3,#8 @ X[i+1] + vsli.64 q13,q1,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q2,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q6,q7,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d16,#14 @ from NEON_00_15 + vadd.i64 q2,q14 + vshr.u64 d25,d16,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d16,#41 @ from NEON_00_15 + vadd.i64 q2,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vsli.64 d26,d16,#23 +#if 20<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d19 + veor d29,d17,d18 + veor d24,d25 + vand d29,d16 + veor d24,d26 @ Sigma1(e) + veor d29,d18 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d20,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d20,#34 + vshr.u64 d26,d20,#39 + vsli.64 d24,d20,#36 + vsli.64 d25,d20,#30 + vsli.64 d26,d20,#25 + vadd.i64 d27,d4 + vorr d30,d20,d22 + vand d29,d20,d22 + veor d19,d24,d25 + vand d30,d21 + veor d19,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d19,d27 + vadd.i64 d23,d27 + vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 21 +#if 21<16 + vld1.64 {d5},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vsli.64 d26,d23,#23 +#if 21<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d18 + veor d29,d16,d17 + veor d24,d25 + vand d29,d23 + veor d24,d26 @ Sigma1(e) + veor d29,d17 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d19,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d19,#34 + vshr.u64 d26,d19,#39 + vsli.64 d24,d19,#36 + vsli.64 d25,d19,#30 + vsli.64 d26,d19,#25 + vadd.i64 d27,d5 + vorr d30,d19,d21 + vand d29,d19,d21 + veor d18,d24,d25 + vand d30,d20 + veor d18,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d18,d27 + vadd.i64 d22,d27 + vadd.i64 d18,d30 + vshr.u64 q12,q2,#19 + vshr.u64 q13,q2,#61 + vshr.u64 q15,q2,#6 + vsli.64 q12,q2,#45 + vext.8 q14,q3,q4,#8 @ X[i+1] + vsli.64 q13,q2,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q3,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q7,q0,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d22,#14 @ from NEON_00_15 + vadd.i64 q3,q14 + vshr.u64 d25,d22,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d22,#41 @ from NEON_00_15 + vadd.i64 q3,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vsli.64 d26,d22,#23 +#if 22<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d17 + veor d29,d23,d16 + veor d24,d25 + vand d29,d22 + veor d24,d26 @ Sigma1(e) + veor d29,d16 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d18,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d18,#34 + vshr.u64 d26,d18,#39 + vsli.64 d24,d18,#36 + vsli.64 d25,d18,#30 + vsli.64 d26,d18,#25 + vadd.i64 d27,d6 + vorr d30,d18,d20 + vand d29,d18,d20 + veor d17,d24,d25 + vand d30,d19 + veor d17,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d17,d27 + vadd.i64 d21,d27 + vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 23 +#if 23<16 + vld1.64 {d7},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vsli.64 d26,d21,#23 +#if 23<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d16 + veor d29,d22,d23 + veor d24,d25 + vand d29,d21 + veor d24,d26 @ Sigma1(e) + veor d29,d23 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d17,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d17,#34 + vshr.u64 d26,d17,#39 + vsli.64 d24,d17,#36 + vsli.64 d25,d17,#30 + vsli.64 d26,d17,#25 + vadd.i64 d27,d7 + vorr d30,d17,d19 + vand d29,d17,d19 + veor d16,d24,d25 + vand d30,d18 + veor d16,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d16,d27 + vadd.i64 d20,d27 + vadd.i64 d16,d30 + vshr.u64 q12,q3,#19 + vshr.u64 q13,q3,#61 + vshr.u64 q15,q3,#6 + vsli.64 q12,q3,#45 + vext.8 q14,q4,q5,#8 @ X[i+1] + vsli.64 q13,q3,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q4,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q0,q1,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d20,#14 @ from NEON_00_15 + vadd.i64 q4,q14 + vshr.u64 d25,d20,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d20,#41 @ from NEON_00_15 + vadd.i64 q4,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vsli.64 d26,d20,#23 +#if 24<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d23 + veor d29,d21,d22 + veor d24,d25 + vand d29,d20 + veor d24,d26 @ Sigma1(e) + veor d29,d22 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d16,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d16,#34 + vshr.u64 d26,d16,#39 + vsli.64 d24,d16,#36 + vsli.64 d25,d16,#30 + vsli.64 d26,d16,#25 + vadd.i64 d27,d8 + vorr d30,d16,d18 + vand d29,d16,d18 + veor d23,d24,d25 + vand d30,d17 + veor d23,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d23,d27 + vadd.i64 d19,d27 + vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 25 +#if 25<16 + vld1.64 {d9},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vsli.64 d26,d19,#23 +#if 25<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d22 + veor d29,d20,d21 + veor d24,d25 + vand d29,d19 + veor d24,d26 @ Sigma1(e) + veor d29,d21 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d23,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d23,#34 + vshr.u64 d26,d23,#39 + vsli.64 d24,d23,#36 + vsli.64 d25,d23,#30 + vsli.64 d26,d23,#25 + vadd.i64 d27,d9 + vorr d30,d23,d17 + vand d29,d23,d17 + veor d22,d24,d25 + vand d30,d16 + veor d22,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d22,d27 + vadd.i64 d18,d27 + vadd.i64 d22,d30 + vshr.u64 q12,q4,#19 + vshr.u64 q13,q4,#61 + vshr.u64 q15,q4,#6 + vsli.64 q12,q4,#45 + vext.8 q14,q5,q6,#8 @ X[i+1] + vsli.64 q13,q4,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q5,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q1,q2,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d18,#14 @ from NEON_00_15 + vadd.i64 q5,q14 + vshr.u64 d25,d18,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d18,#41 @ from NEON_00_15 + vadd.i64 q5,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vsli.64 d26,d18,#23 +#if 26<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d21 + veor d29,d19,d20 + veor d24,d25 + vand d29,d18 + veor d24,d26 @ Sigma1(e) + veor d29,d20 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d22,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d22,#34 + vshr.u64 d26,d22,#39 + vsli.64 d24,d22,#36 + vsli.64 d25,d22,#30 + vsli.64 d26,d22,#25 + vadd.i64 d27,d10 + vorr d30,d22,d16 + vand d29,d22,d16 + veor d21,d24,d25 + vand d30,d23 + veor d21,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d21,d27 + vadd.i64 d17,d27 + vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 27 +#if 27<16 + vld1.64 {d11},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vsli.64 d26,d17,#23 +#if 27<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d20 + veor d29,d18,d19 + veor d24,d25 + vand d29,d17 + veor d24,d26 @ Sigma1(e) + veor d29,d19 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d21,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d21,#34 + vshr.u64 d26,d21,#39 + vsli.64 d24,d21,#36 + vsli.64 d25,d21,#30 + vsli.64 d26,d21,#25 + vadd.i64 d27,d11 + vorr d30,d21,d23 + vand d29,d21,d23 + veor d20,d24,d25 + vand d30,d22 + veor d20,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d20,d27 + vadd.i64 d16,d27 + vadd.i64 d20,d30 + vshr.u64 q12,q5,#19 + vshr.u64 q13,q5,#61 + vshr.u64 q15,q5,#6 + vsli.64 q12,q5,#45 + vext.8 q14,q6,q7,#8 @ X[i+1] + vsli.64 q13,q5,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q6,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q2,q3,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d16,#14 @ from NEON_00_15 + vadd.i64 q6,q14 + vshr.u64 d25,d16,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d16,#41 @ from NEON_00_15 + vadd.i64 q6,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vsli.64 d26,d16,#23 +#if 28<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d19 + veor d29,d17,d18 + veor d24,d25 + vand d29,d16 + veor d24,d26 @ Sigma1(e) + veor d29,d18 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d20,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d20,#34 + vshr.u64 d26,d20,#39 + vsli.64 d24,d20,#36 + vsli.64 d25,d20,#30 + vsli.64 d26,d20,#25 + vadd.i64 d27,d12 + vorr d30,d20,d22 + vand d29,d20,d22 + veor d19,d24,d25 + vand d30,d21 + veor d19,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d19,d27 + vadd.i64 d23,d27 + vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 29 +#if 29<16 + vld1.64 {d13},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vsli.64 d26,d23,#23 +#if 29<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d18 + veor d29,d16,d17 + veor d24,d25 + vand d29,d23 + veor d24,d26 @ Sigma1(e) + veor d29,d17 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d19,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d19,#34 + vshr.u64 d26,d19,#39 + vsli.64 d24,d19,#36 + vsli.64 d25,d19,#30 + vsli.64 d26,d19,#25 + vadd.i64 d27,d13 + vorr d30,d19,d21 + vand d29,d19,d21 + veor d18,d24,d25 + vand d30,d20 + veor d18,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d18,d27 + vadd.i64 d22,d27 + vadd.i64 d18,d30 + vshr.u64 q12,q6,#19 + vshr.u64 q13,q6,#61 + vshr.u64 q15,q6,#6 + vsli.64 q12,q6,#45 + vext.8 q14,q7,q0,#8 @ X[i+1] + vsli.64 q13,q6,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q7,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q3,q4,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d22,#14 @ from NEON_00_15 + vadd.i64 q7,q14 + vshr.u64 d25,d22,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d22,#41 @ from NEON_00_15 + vadd.i64 q7,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vsli.64 d26,d22,#23 +#if 30<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d17 + veor d29,d23,d16 + veor d24,d25 + vand d29,d22 + veor d24,d26 @ Sigma1(e) + veor d29,d16 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d18,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d18,#34 + vshr.u64 d26,d18,#39 + vsli.64 d24,d18,#36 + vsli.64 d25,d18,#30 + vsli.64 d26,d18,#25 + vadd.i64 d27,d14 + vorr d30,d18,d20 + vand d29,d18,d20 + veor d17,d24,d25 + vand d30,d19 + veor d17,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d17,d27 + vadd.i64 d21,d27 + vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 31 +#if 31<16 + vld1.64 {d15},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vsli.64 d26,d21,#23 +#if 31<16 && defined(__ARMEL__) + vrev64.8 , +#endif + vadd.i64 d27,d28,d16 + veor d29,d22,d23 + veor d24,d25 + vand d29,d21 + veor d24,d26 @ Sigma1(e) + veor d29,d23 @ Ch(e,f,g) + vadd.i64 d27,d24 + vshr.u64 d24,d17,#28 + vadd.i64 d27,d29 + vshr.u64 d25,d17,#34 + vshr.u64 d26,d17,#39 + vsli.64 d24,d17,#36 + vsli.64 d25,d17,#30 + vsli.64 d26,d17,#25 + vadd.i64 d27,d15 + vorr d30,d17,d19 + vand d29,d17,d19 + veor d16,d24,d25 + vand d30,d18 + veor d16,d26 @ Sigma0(a) + vorr d30,d29 @ Maj(a,b,c) + vadd.i64 d16,d27 + vadd.i64 d20,d27 + vadd.i64 d16,d30 + bne .L16_79_neon + + vldmia r0,{d24-d31} @ load context to temp + vadd.i64 q8,q12 @ vectorized accumulate + vadd.i64 q9,q13 + vadd.i64 q10,q14 + vadd.i64 q11,q15 + vstmia r0,{d16-d23} @ save context + teq r1,r2 + sub r3,#640 @ rewind K512 + bne .Loop_neon + + vldmia sp!,{d8-d15} @ epilogue + .word 0xe12fff1e +#endif +.size sha512_block_data_order,.-sha512_block_data_order +.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by " +.align 2 +.comm OPENSSL_armcap_P,4,4 +#if defined(HAVE_GNU_STACK) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/ext/libressl/crypto/sha/sha512-elf-x86_64.S b/ext/libressl/crypto/sha/sha512-elf-x86_64.S new file mode 100644 index 0000000..1173407 --- /dev/null +++ b/ext/libressl/crypto/sha/sha512-elf-x86_64.S @@ -0,0 +1,1806 @@ +#include "x86_arch.h" +.text + +.globl sha512_block_data_order +.type sha512_block_data_order,@function +.align 16 +sha512_block_data_order: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $128+32,%rsp + leaq (%rsi,%rdx,8),%rdx + andq $-64,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %r11,128+24(%rsp) +.Lprologue: + + leaq K512(%rip),%rbp + + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp .Lloop + +.align 16 +.Lloop: + xorq %rdi,%rdi + movq 0(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,0(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 8(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,8(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 16(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,16(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 24(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,24(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 32(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,32(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 40(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,40(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 48(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,48(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 56(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,56(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + movq 64(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,64(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 72(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,72(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 80(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,80(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 88(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,88(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 96(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,96(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 104(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,104(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 112(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,112(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 120(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,120(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + jmp .Lrounds_16_xx +.align 16 +.Lrounds_16_xx: + movq 8(%rsp),%r13 + movq 112(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 72(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 0(%rsp),%r12 + movq %r8,%r13 + addq %r14,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,0(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 16(%rsp),%r13 + movq 120(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 80(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 8(%rsp),%r12 + movq %rdx,%r13 + addq %r14,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,8(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 24(%rsp),%r13 + movq 0(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 88(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 16(%rsp),%r12 + movq %rcx,%r13 + addq %r14,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,16(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 32(%rsp),%r13 + movq 8(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 96(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 24(%rsp),%r12 + movq %rbx,%r13 + addq %r14,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,24(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 40(%rsp),%r13 + movq 16(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 104(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 32(%rsp),%r12 + movq %rax,%r13 + addq %r14,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,32(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 48(%rsp),%r13 + movq 24(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 112(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 40(%rsp),%r12 + movq %r11,%r13 + addq %r14,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,40(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 56(%rsp),%r13 + movq 32(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 120(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 48(%rsp),%r12 + movq %r10,%r13 + addq %r14,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,48(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 64(%rsp),%r13 + movq 40(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 0(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 56(%rsp),%r12 + movq %r9,%r13 + addq %r14,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,56(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + movq 72(%rsp),%r13 + movq 48(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 8(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 64(%rsp),%r12 + movq %r8,%r13 + addq %r14,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,64(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 80(%rsp),%r13 + movq 56(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 16(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 72(%rsp),%r12 + movq %rdx,%r13 + addq %r14,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,72(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 88(%rsp),%r13 + movq 64(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 24(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 80(%rsp),%r12 + movq %rcx,%r13 + addq %r14,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,80(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 96(%rsp),%r13 + movq 72(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 32(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 88(%rsp),%r12 + movq %rbx,%r13 + addq %r14,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,88(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 104(%rsp),%r13 + movq 80(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 40(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 96(%rsp),%r12 + movq %rax,%r13 + addq %r14,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,96(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 112(%rsp),%r13 + movq 88(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 48(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 104(%rsp),%r12 + movq %r11,%r13 + addq %r14,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,104(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 120(%rsp),%r13 + movq 96(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 56(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 112(%rsp),%r12 + movq %r10,%r13 + addq %r14,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,112(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 0(%rsp),%r13 + movq 104(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 64(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 120(%rsp),%r12 + movq %r9,%r13 + addq %r14,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,120(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + cmpq $80,%rdi + jb .Lrounds_16_xx + + movq 128+0(%rsp),%rdi + leaq 128(%rsi),%rsi + + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb .Lloop + + movq 128+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue: + retq +.size sha512_block_data_order,.-sha512_block_data_order +.align 64 +.type K512,@object +K512: +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +#if defined(HAVE_GNU_STACK) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/ext/libressl/crypto/sha/sha512-macosx-x86_64.S b/ext/libressl/crypto/sha/sha512-macosx-x86_64.S new file mode 100644 index 0000000..7581da4 --- /dev/null +++ b/ext/libressl/crypto/sha/sha512-macosx-x86_64.S @@ -0,0 +1,1803 @@ +#include "x86_arch.h" +.text + +.globl _sha512_block_data_order + +.p2align 4 +_sha512_block_data_order: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $128+32,%rsp + leaq (%rsi,%rdx,8),%rdx + andq $-64,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %r11,128+24(%rsp) +L$prologue: + + leaq K512(%rip),%rbp + + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp L$loop + +.p2align 4 +L$loop: + xorq %rdi,%rdi + movq 0(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,0(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 8(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,8(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 16(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,16(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 24(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,24(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 32(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,32(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 40(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,40(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 48(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,48(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 56(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,56(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + movq 64(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,64(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 72(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,72(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 80(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,80(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 88(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,88(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 96(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,96(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 104(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,104(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 112(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,112(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 120(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,120(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + jmp L$rounds_16_xx +.p2align 4 +L$rounds_16_xx: + movq 8(%rsp),%r13 + movq 112(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 72(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 0(%rsp),%r12 + movq %r8,%r13 + addq %r14,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,0(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 16(%rsp),%r13 + movq 120(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 80(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 8(%rsp),%r12 + movq %rdx,%r13 + addq %r14,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,8(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 24(%rsp),%r13 + movq 0(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 88(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 16(%rsp),%r12 + movq %rcx,%r13 + addq %r14,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,16(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 32(%rsp),%r13 + movq 8(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 96(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 24(%rsp),%r12 + movq %rbx,%r13 + addq %r14,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,24(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 40(%rsp),%r13 + movq 16(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 104(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 32(%rsp),%r12 + movq %rax,%r13 + addq %r14,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,32(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 48(%rsp),%r13 + movq 24(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 112(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 40(%rsp),%r12 + movq %r11,%r13 + addq %r14,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,40(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 56(%rsp),%r13 + movq 32(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 120(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 48(%rsp),%r12 + movq %r10,%r13 + addq %r14,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,48(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 64(%rsp),%r13 + movq 40(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 0(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 56(%rsp),%r12 + movq %r9,%r13 + addq %r14,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,56(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + movq 72(%rsp),%r13 + movq 48(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 8(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 64(%rsp),%r12 + movq %r8,%r13 + addq %r14,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,64(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 80(%rsp),%r13 + movq 56(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 16(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 72(%rsp),%r12 + movq %rdx,%r13 + addq %r14,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,72(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 88(%rsp),%r13 + movq 64(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 24(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 80(%rsp),%r12 + movq %rcx,%r13 + addq %r14,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,80(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 96(%rsp),%r13 + movq 72(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 32(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 88(%rsp),%r12 + movq %rbx,%r13 + addq %r14,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,88(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 104(%rsp),%r13 + movq 80(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 40(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 96(%rsp),%r12 + movq %rax,%r13 + addq %r14,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,96(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 112(%rsp),%r13 + movq 88(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 48(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 104(%rsp),%r12 + movq %r11,%r13 + addq %r14,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,104(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 120(%rsp),%r13 + movq 96(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 56(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 112(%rsp),%r12 + movq %r10,%r13 + addq %r14,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,112(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 0(%rsp),%r13 + movq 104(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 64(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 120(%rsp),%r12 + movq %r9,%r13 + addq %r14,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,120(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + cmpq $80,%rdi + jb L$rounds_16_xx + + movq 128+0(%rsp),%rdi + leaq 128(%rsi),%rsi + + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb L$loop + + movq 128+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$epilogue: + retq + +.p2align 6 + +K512: +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 diff --git a/ext/libressl/crypto/sha/sha512-masm-x86_64.S b/ext/libressl/crypto/sha/sha512-masm-x86_64.S new file mode 100644 index 0000000..4a2b9af --- /dev/null +++ b/ext/libressl/crypto/sha/sha512-masm-x86_64.S @@ -0,0 +1,1888 @@ +; 1 "crypto/sha/sha512-masm-x86_64.S.tmp" +; 1 "" 1 +; 1 "" 3 +; 340 "" 3 +; 1 "" 1 +; 1 "" 2 +; 1 "crypto/sha/sha512-masm-x86_64.S.tmp" 2 +OPTION DOTNAME + +; 1 "./crypto/x86_arch.h" 1 + + +; 16 "./crypto/x86_arch.h" + + + + + + + + + +; 40 "./crypto/x86_arch.h" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +; 3 "crypto/sha/sha512-masm-x86_64.S.tmp" 2 +.text$ SEGMENT ALIGN(64) 'CODE' + +PUBLIC sha512_block_data_order + +ALIGN 16 +sha512_block_data_order PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha512_block_data_order:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + mov r11,rsp + shl rdx,4 + sub rsp,16*8+4*8 + lea rdx,QWORD PTR[rdx*8+rsi] + and rsp,-64 + mov QWORD PTR[((128+0))+rsp],rdi + mov QWORD PTR[((128+8))+rsp],rsi + mov QWORD PTR[((128+16))+rsp],rdx + mov QWORD PTR[((128+24))+rsp],r11 +$L$prologue:: + + lea rbp,QWORD PTR[K512] + + mov rax,QWORD PTR[rdi] + mov rbx,QWORD PTR[8+rdi] + mov rcx,QWORD PTR[16+rdi] + mov rdx,QWORD PTR[24+rdi] + mov r8,QWORD PTR[32+rdi] + mov r9,QWORD PTR[40+rdi] + mov r10,QWORD PTR[48+rdi] + mov r11,QWORD PTR[56+rdi] + jmp $L$loop + +ALIGN 16 +$L$loop:: + xor rdi,rdi + mov r12,QWORD PTR[rsi] + mov r13,r8 + mov r14,rax + bswap r12 + ror r13,23 + mov r15,r9 + mov QWORD PTR[rsp],r12 + + ror r14,5 + xor r13,r8 + xor r15,r10 + + ror r13,4 + add r12,r11 + xor r14,rax + + add r12,QWORD PTR[rdi*8+rbp] + and r15,r8 + mov r11,rbx + + ror r14,6 + xor r13,r8 + xor r15,r10 + + xor r11,rcx + xor r14,rax + add r12,r15 + mov r15,rbx + + ror r13,14 + and r11,rax + and r15,rcx + + ror r14,28 + add r12,r13 + add r11,r15 + + add rdx,r12 + add r11,r12 + lea rdi,QWORD PTR[1+rdi] + add r11,r14 + + mov r12,QWORD PTR[8+rsi] + mov r13,rdx + mov r14,r11 + bswap r12 + ror r13,23 + mov r15,r8 + mov QWORD PTR[8+rsp],r12 + + ror r14,5 + xor r13,rdx + xor r15,r9 + + ror r13,4 + add r12,r10 + xor r14,r11 + + add r12,QWORD PTR[rdi*8+rbp] + and r15,rdx + mov r10,rax + + ror r14,6 + xor r13,rdx + xor r15,r9 + + xor r10,rbx + xor r14,r11 + add r12,r15 + mov r15,rax + + ror r13,14 + and r10,r11 + and r15,rbx + + ror r14,28 + add r12,r13 + add r10,r15 + + add rcx,r12 + add r10,r12 + lea rdi,QWORD PTR[1+rdi] + add r10,r14 + + mov r12,QWORD PTR[16+rsi] + mov r13,rcx + mov r14,r10 + bswap r12 + ror r13,23 + mov r15,rdx + mov QWORD PTR[16+rsp],r12 + + ror r14,5 + xor r13,rcx + xor r15,r8 + + ror r13,4 + add r12,r9 + xor r14,r10 + + add r12,QWORD PTR[rdi*8+rbp] + and r15,rcx + mov r9,r11 + + ror r14,6 + xor r13,rcx + xor r15,r8 + + xor r9,rax + xor r14,r10 + add r12,r15 + mov r15,r11 + + ror r13,14 + and r9,r10 + and r15,rax + + ror r14,28 + add r12,r13 + add r9,r15 + + add rbx,r12 + add r9,r12 + lea rdi,QWORD PTR[1+rdi] + add r9,r14 + + mov r12,QWORD PTR[24+rsi] + mov r13,rbx + mov r14,r9 + bswap r12 + ror r13,23 + mov r15,rcx + mov QWORD PTR[24+rsp],r12 + + ror r14,5 + xor r13,rbx + xor r15,rdx + + ror r13,4 + add r12,r8 + xor r14,r9 + + add r12,QWORD PTR[rdi*8+rbp] + and r15,rbx + mov r8,r10 + + ror r14,6 + xor r13,rbx + xor r15,rdx + + xor r8,r11 + xor r14,r9 + add r12,r15 + mov r15,r10 + + ror r13,14 + and r8,r9 + and r15,r11 + + ror r14,28 + add r12,r13 + add r8,r15 + + add rax,r12 + add r8,r12 + lea rdi,QWORD PTR[1+rdi] + add r8,r14 + + mov r12,QWORD PTR[32+rsi] + mov r13,rax + mov r14,r8 + bswap r12 + ror r13,23 + mov r15,rbx + mov QWORD PTR[32+rsp],r12 + + ror r14,5 + xor r13,rax + xor r15,rcx + + ror r13,4 + add r12,rdx + xor r14,r8 + + add r12,QWORD PTR[rdi*8+rbp] + and r15,rax + mov rdx,r9 + + ror r14,6 + xor r13,rax + xor r15,rcx + + xor rdx,r10 + xor r14,r8 + add r12,r15 + mov r15,r9 + + ror r13,14 + and rdx,r8 + and r15,r10 + + ror r14,28 + add r12,r13 + add rdx,r15 + + add r11,r12 + add rdx,r12 + lea rdi,QWORD PTR[1+rdi] + add rdx,r14 + + mov r12,QWORD PTR[40+rsi] + mov r13,r11 + mov r14,rdx + bswap r12 + ror r13,23 + mov r15,rax + mov QWORD PTR[40+rsp],r12 + + ror r14,5 + xor r13,r11 + xor r15,rbx + + ror r13,4 + add r12,rcx + xor r14,rdx + + add r12,QWORD PTR[rdi*8+rbp] + and r15,r11 + mov rcx,r8 + + ror r14,6 + xor r13,r11 + xor r15,rbx + + xor rcx,r9 + xor r14,rdx + add r12,r15 + mov r15,r8 + + ror r13,14 + and rcx,rdx + and r15,r9 + + ror r14,28 + add r12,r13 + add rcx,r15 + + add r10,r12 + add rcx,r12 + lea rdi,QWORD PTR[1+rdi] + add rcx,r14 + + mov r12,QWORD PTR[48+rsi] + mov r13,r10 + mov r14,rcx + bswap r12 + ror r13,23 + mov r15,r11 + mov QWORD PTR[48+rsp],r12 + + ror r14,5 + xor r13,r10 + xor r15,rax + + ror r13,4 + add r12,rbx + xor r14,rcx + + add r12,QWORD PTR[rdi*8+rbp] + and r15,r10 + mov rbx,rdx + + ror r14,6 + xor r13,r10 + xor r15,rax + + xor rbx,r8 + xor r14,rcx + add r12,r15 + mov r15,rdx + + ror r13,14 + and rbx,rcx + and r15,r8 + + ror r14,28 + add r12,r13 + add rbx,r15 + + add r9,r12 + add rbx,r12 + lea rdi,QWORD PTR[1+rdi] + add rbx,r14 + + mov r12,QWORD PTR[56+rsi] + mov r13,r9 + mov r14,rbx + bswap r12 + ror r13,23 + mov r15,r10 + mov QWORD PTR[56+rsp],r12 + + ror r14,5 + xor r13,r9 + xor r15,r11 + + ror r13,4 + add r12,rax + xor r14,rbx + + add r12,QWORD PTR[rdi*8+rbp] + and r15,r9 + mov rax,rcx + + ror r14,6 + xor r13,r9 + xor r15,r11 + + xor rax,rdx + xor r14,rbx + add r12,r15 + mov r15,rcx + + ror r13,14 + and rax,rbx + and r15,rdx + + ror r14,28 + add r12,r13 + add rax,r15 + + add r8,r12 + add rax,r12 + lea rdi,QWORD PTR[1+rdi] + add rax,r14 + + mov r12,QWORD PTR[64+rsi] + mov r13,r8 + mov r14,rax + bswap r12 + ror r13,23 + mov r15,r9 + mov QWORD PTR[64+rsp],r12 + + ror r14,5 + xor r13,r8 + xor r15,r10 + + ror r13,4 + add r12,r11 + xor r14,rax + + add r12,QWORD PTR[rdi*8+rbp] + and r15,r8 + mov r11,rbx + + ror r14,6 + xor r13,r8 + xor r15,r10 + + xor r11,rcx + xor r14,rax + add r12,r15 + mov r15,rbx + + ror r13,14 + and r11,rax + and r15,rcx + + ror r14,28 + add r12,r13 + add r11,r15 + + add rdx,r12 + add r11,r12 + lea rdi,QWORD PTR[1+rdi] + add r11,r14 + + mov r12,QWORD PTR[72+rsi] + mov r13,rdx + mov r14,r11 + bswap r12 + ror r13,23 + mov r15,r8 + mov QWORD PTR[72+rsp],r12 + + ror r14,5 + xor r13,rdx + xor r15,r9 + + ror r13,4 + add r12,r10 + xor r14,r11 + + add r12,QWORD PTR[rdi*8+rbp] + and r15,rdx + mov r10,rax + + ror r14,6 + xor r13,rdx + xor r15,r9 + + xor r10,rbx + xor r14,r11 + add r12,r15 + mov r15,rax + + ror r13,14 + and r10,r11 + and r15,rbx + + ror r14,28 + add r12,r13 + add r10,r15 + + add rcx,r12 + add r10,r12 + lea rdi,QWORD PTR[1+rdi] + add r10,r14 + + mov r12,QWORD PTR[80+rsi] + mov r13,rcx + mov r14,r10 + bswap r12 + ror r13,23 + mov r15,rdx + mov QWORD PTR[80+rsp],r12 + + ror r14,5 + xor r13,rcx + xor r15,r8 + + ror r13,4 + add r12,r9 + xor r14,r10 + + add r12,QWORD PTR[rdi*8+rbp] + and r15,rcx + mov r9,r11 + + ror r14,6 + xor r13,rcx + xor r15,r8 + + xor r9,rax + xor r14,r10 + add r12,r15 + mov r15,r11 + + ror r13,14 + and r9,r10 + and r15,rax + + ror r14,28 + add r12,r13 + add r9,r15 + + add rbx,r12 + add r9,r12 + lea rdi,QWORD PTR[1+rdi] + add r9,r14 + + mov r12,QWORD PTR[88+rsi] + mov r13,rbx + mov r14,r9 + bswap r12 + ror r13,23 + mov r15,rcx + mov QWORD PTR[88+rsp],r12 + + ror r14,5 + xor r13,rbx + xor r15,rdx + + ror r13,4 + add r12,r8 + xor r14,r9 + + add r12,QWORD PTR[rdi*8+rbp] + and r15,rbx + mov r8,r10 + + ror r14,6 + xor r13,rbx + xor r15,rdx + + xor r8,r11 + xor r14,r9 + add r12,r15 + mov r15,r10 + + ror r13,14 + and r8,r9 + and r15,r11 + + ror r14,28 + add r12,r13 + add r8,r15 + + add rax,r12 + add r8,r12 + lea rdi,QWORD PTR[1+rdi] + add r8,r14 + + mov r12,QWORD PTR[96+rsi] + mov r13,rax + mov r14,r8 + bswap r12 + ror r13,23 + mov r15,rbx + mov QWORD PTR[96+rsp],r12 + + ror r14,5 + xor r13,rax + xor r15,rcx + + ror r13,4 + add r12,rdx + xor r14,r8 + + add r12,QWORD PTR[rdi*8+rbp] + and r15,rax + mov rdx,r9 + + ror r14,6 + xor r13,rax + xor r15,rcx + + xor rdx,r10 + xor r14,r8 + add r12,r15 + mov r15,r9 + + ror r13,14 + and rdx,r8 + and r15,r10 + + ror r14,28 + add r12,r13 + add rdx,r15 + + add r11,r12 + add rdx,r12 + lea rdi,QWORD PTR[1+rdi] + add rdx,r14 + + mov r12,QWORD PTR[104+rsi] + mov r13,r11 + mov r14,rdx + bswap r12 + ror r13,23 + mov r15,rax + mov QWORD PTR[104+rsp],r12 + + ror r14,5 + xor r13,r11 + xor r15,rbx + + ror r13,4 + add r12,rcx + xor r14,rdx + + add r12,QWORD PTR[rdi*8+rbp] + and r15,r11 + mov rcx,r8 + + ror r14,6 + xor r13,r11 + xor r15,rbx + + xor rcx,r9 + xor r14,rdx + add r12,r15 + mov r15,r8 + + ror r13,14 + and rcx,rdx + and r15,r9 + + ror r14,28 + add r12,r13 + add rcx,r15 + + add r10,r12 + add rcx,r12 + lea rdi,QWORD PTR[1+rdi] + add rcx,r14 + + mov r12,QWORD PTR[112+rsi] + mov r13,r10 + mov r14,rcx + bswap r12 + ror r13,23 + mov r15,r11 + mov QWORD PTR[112+rsp],r12 + + ror r14,5 + xor r13,r10 + xor r15,rax + + ror r13,4 + add r12,rbx + xor r14,rcx + + add r12,QWORD PTR[rdi*8+rbp] + and r15,r10 + mov rbx,rdx + + ror r14,6 + xor r13,r10 + xor r15,rax + + xor rbx,r8 + xor r14,rcx + add r12,r15 + mov r15,rdx + + ror r13,14 + and rbx,rcx + and r15,r8 + + ror r14,28 + add r12,r13 + add rbx,r15 + + add r9,r12 + add rbx,r12 + lea rdi,QWORD PTR[1+rdi] + add rbx,r14 + + mov r12,QWORD PTR[120+rsi] + mov r13,r9 + mov r14,rbx + bswap r12 + ror r13,23 + mov r15,r10 + mov QWORD PTR[120+rsp],r12 + + ror r14,5 + xor r13,r9 + xor r15,r11 + + ror r13,4 + add r12,rax + xor r14,rbx + + add r12,QWORD PTR[rdi*8+rbp] + and r15,r9 + mov rax,rcx + + ror r14,6 + xor r13,r9 + xor r15,r11 + + xor rax,rdx + xor r14,rbx + add r12,r15 + mov r15,rcx + + ror r13,14 + and rax,rbx + and r15,rdx + + ror r14,28 + add r12,r13 + add rax,r15 + + add r8,r12 + add rax,r12 + lea rdi,QWORD PTR[1+rdi] + add rax,r14 + + jmp $L$rounds_16_xx +ALIGN 16 +$L$rounds_16_xx:: + mov r13,QWORD PTR[8+rsp] + mov r14,QWORD PTR[112+rsp] + mov r12,r13 + mov r15,r14 + + ror r12,7 + xor r12,r13 + shr r13,7 + + ror r12,1 + xor r13,r12 + mov r12,QWORD PTR[72+rsp] + + ror r15,42 + xor r15,r14 + shr r14,6 + + ror r15,19 + add r12,r13 + xor r14,r15 + + add r12,QWORD PTR[rsp] + mov r13,r8 + add r12,r14 + mov r14,rax + ror r13,23 + mov r15,r9 + mov QWORD PTR[rsp],r12 + + ror r14,5 + xor r13,r8 + xor r15,r10 + + ror r13,4 + add r12,r11 + xor r14,rax + + add r12,QWORD PTR[rdi*8+rbp] + and r15,r8 + mov r11,rbx + + ror r14,6 + xor r13,r8 + xor r15,r10 + + xor r11,rcx + xor r14,rax + add r12,r15 + mov r15,rbx + + ror r13,14 + and r11,rax + and r15,rcx + + ror r14,28 + add r12,r13 + add r11,r15 + + add rdx,r12 + add r11,r12 + lea rdi,QWORD PTR[1+rdi] + add r11,r14 + + mov r13,QWORD PTR[16+rsp] + mov r14,QWORD PTR[120+rsp] + mov r12,r13 + mov r15,r14 + + ror r12,7 + xor r12,r13 + shr r13,7 + + ror r12,1 + xor r13,r12 + mov r12,QWORD PTR[80+rsp] + + ror r15,42 + xor r15,r14 + shr r14,6 + + ror r15,19 + add r12,r13 + xor r14,r15 + + add r12,QWORD PTR[8+rsp] + mov r13,rdx + add r12,r14 + mov r14,r11 + ror r13,23 + mov r15,r8 + mov QWORD PTR[8+rsp],r12 + + ror r14,5 + xor r13,rdx + xor r15,r9 + + ror r13,4 + add r12,r10 + xor r14,r11 + + add r12,QWORD PTR[rdi*8+rbp] + and r15,rdx + mov r10,rax + + ror r14,6 + xor r13,rdx + xor r15,r9 + + xor r10,rbx + xor r14,r11 + add r12,r15 + mov r15,rax + + ror r13,14 + and r10,r11 + and r15,rbx + + ror r14,28 + add r12,r13 + add r10,r15 + + add rcx,r12 + add r10,r12 + lea rdi,QWORD PTR[1+rdi] + add r10,r14 + + mov r13,QWORD PTR[24+rsp] + mov r14,QWORD PTR[rsp] + mov r12,r13 + mov r15,r14 + + ror r12,7 + xor r12,r13 + shr r13,7 + + ror r12,1 + xor r13,r12 + mov r12,QWORD PTR[88+rsp] + + ror r15,42 + xor r15,r14 + shr r14,6 + + ror r15,19 + add r12,r13 + xor r14,r15 + + add r12,QWORD PTR[16+rsp] + mov r13,rcx + add r12,r14 + mov r14,r10 + ror r13,23 + mov r15,rdx + mov QWORD PTR[16+rsp],r12 + + ror r14,5 + xor r13,rcx + xor r15,r8 + + ror r13,4 + add r12,r9 + xor r14,r10 + + add r12,QWORD PTR[rdi*8+rbp] + and r15,rcx + mov r9,r11 + + ror r14,6 + xor r13,rcx + xor r15,r8 + + xor r9,rax + xor r14,r10 + add r12,r15 + mov r15,r11 + + ror r13,14 + and r9,r10 + and r15,rax + + ror r14,28 + add r12,r13 + add r9,r15 + + add rbx,r12 + add r9,r12 + lea rdi,QWORD PTR[1+rdi] + add r9,r14 + + mov r13,QWORD PTR[32+rsp] + mov r14,QWORD PTR[8+rsp] + mov r12,r13 + mov r15,r14 + + ror r12,7 + xor r12,r13 + shr r13,7 + + ror r12,1 + xor r13,r12 + mov r12,QWORD PTR[96+rsp] + + ror r15,42 + xor r15,r14 + shr r14,6 + + ror r15,19 + add r12,r13 + xor r14,r15 + + add r12,QWORD PTR[24+rsp] + mov r13,rbx + add r12,r14 + mov r14,r9 + ror r13,23 + mov r15,rcx + mov QWORD PTR[24+rsp],r12 + + ror r14,5 + xor r13,rbx + xor r15,rdx + + ror r13,4 + add r12,r8 + xor r14,r9 + + add r12,QWORD PTR[rdi*8+rbp] + and r15,rbx + mov r8,r10 + + ror r14,6 + xor r13,rbx + xor r15,rdx + + xor r8,r11 + xor r14,r9 + add r12,r15 + mov r15,r10 + + ror r13,14 + and r8,r9 + and r15,r11 + + ror r14,28 + add r12,r13 + add r8,r15 + + add rax,r12 + add r8,r12 + lea rdi,QWORD PTR[1+rdi] + add r8,r14 + + mov r13,QWORD PTR[40+rsp] + mov r14,QWORD PTR[16+rsp] + mov r12,r13 + mov r15,r14 + + ror r12,7 + xor r12,r13 + shr r13,7 + + ror r12,1 + xor r13,r12 + mov r12,QWORD PTR[104+rsp] + + ror r15,42 + xor r15,r14 + shr r14,6 + + ror r15,19 + add r12,r13 + xor r14,r15 + + add r12,QWORD PTR[32+rsp] + mov r13,rax + add r12,r14 + mov r14,r8 + ror r13,23 + mov r15,rbx + mov QWORD PTR[32+rsp],r12 + + ror r14,5 + xor r13,rax + xor r15,rcx + + ror r13,4 + add r12,rdx + xor r14,r8 + + add r12,QWORD PTR[rdi*8+rbp] + and r15,rax + mov rdx,r9 + + ror r14,6 + xor r13,rax + xor r15,rcx + + xor rdx,r10 + xor r14,r8 + add r12,r15 + mov r15,r9 + + ror r13,14 + and rdx,r8 + and r15,r10 + + ror r14,28 + add r12,r13 + add rdx,r15 + + add r11,r12 + add rdx,r12 + lea rdi,QWORD PTR[1+rdi] + add rdx,r14 + + mov r13,QWORD PTR[48+rsp] + mov r14,QWORD PTR[24+rsp] + mov r12,r13 + mov r15,r14 + + ror r12,7 + xor r12,r13 + shr r13,7 + + ror r12,1 + xor r13,r12 + mov r12,QWORD PTR[112+rsp] + + ror r15,42 + xor r15,r14 + shr r14,6 + + ror r15,19 + add r12,r13 + xor r14,r15 + + add r12,QWORD PTR[40+rsp] + mov r13,r11 + add r12,r14 + mov r14,rdx + ror r13,23 + mov r15,rax + mov QWORD PTR[40+rsp],r12 + + ror r14,5 + xor r13,r11 + xor r15,rbx + + ror r13,4 + add r12,rcx + xor r14,rdx + + add r12,QWORD PTR[rdi*8+rbp] + and r15,r11 + mov rcx,r8 + + ror r14,6 + xor r13,r11 + xor r15,rbx + + xor rcx,r9 + xor r14,rdx + add r12,r15 + mov r15,r8 + + ror r13,14 + and rcx,rdx + and r15,r9 + + ror r14,28 + add r12,r13 + add rcx,r15 + + add r10,r12 + add rcx,r12 + lea rdi,QWORD PTR[1+rdi] + add rcx,r14 + + mov r13,QWORD PTR[56+rsp] + mov r14,QWORD PTR[32+rsp] + mov r12,r13 + mov r15,r14 + + ror r12,7 + xor r12,r13 + shr r13,7 + + ror r12,1 + xor r13,r12 + mov r12,QWORD PTR[120+rsp] + + ror r15,42 + xor r15,r14 + shr r14,6 + + ror r15,19 + add r12,r13 + xor r14,r15 + + add r12,QWORD PTR[48+rsp] + mov r13,r10 + add r12,r14 + mov r14,rcx + ror r13,23 + mov r15,r11 + mov QWORD PTR[48+rsp],r12 + + ror r14,5 + xor r13,r10 + xor r15,rax + + ror r13,4 + add r12,rbx + xor r14,rcx + + add r12,QWORD PTR[rdi*8+rbp] + and r15,r10 + mov rbx,rdx + + ror r14,6 + xor r13,r10 + xor r15,rax + + xor rbx,r8 + xor r14,rcx + add r12,r15 + mov r15,rdx + + ror r13,14 + and rbx,rcx + and r15,r8 + + ror r14,28 + add r12,r13 + add rbx,r15 + + add r9,r12 + add rbx,r12 + lea rdi,QWORD PTR[1+rdi] + add rbx,r14 + + mov r13,QWORD PTR[64+rsp] + mov r14,QWORD PTR[40+rsp] + mov r12,r13 + mov r15,r14 + + ror r12,7 + xor r12,r13 + shr r13,7 + + ror r12,1 + xor r13,r12 + mov r12,QWORD PTR[rsp] + + ror r15,42 + xor r15,r14 + shr r14,6 + + ror r15,19 + add r12,r13 + xor r14,r15 + + add r12,QWORD PTR[56+rsp] + mov r13,r9 + add r12,r14 + mov r14,rbx + ror r13,23 + mov r15,r10 + mov QWORD PTR[56+rsp],r12 + + ror r14,5 + xor r13,r9 + xor r15,r11 + + ror r13,4 + add r12,rax + xor r14,rbx + + add r12,QWORD PTR[rdi*8+rbp] + and r15,r9 + mov rax,rcx + + ror r14,6 + xor r13,r9 + xor r15,r11 + + xor rax,rdx + xor r14,rbx + add r12,r15 + mov r15,rcx + + ror r13,14 + and rax,rbx + and r15,rdx + + ror r14,28 + add r12,r13 + add rax,r15 + + add r8,r12 + add rax,r12 + lea rdi,QWORD PTR[1+rdi] + add rax,r14 + + mov r13,QWORD PTR[72+rsp] + mov r14,QWORD PTR[48+rsp] + mov r12,r13 + mov r15,r14 + + ror r12,7 + xor r12,r13 + shr r13,7 + + ror r12,1 + xor r13,r12 + mov r12,QWORD PTR[8+rsp] + + ror r15,42 + xor r15,r14 + shr r14,6 + + ror r15,19 + add r12,r13 + xor r14,r15 + + add r12,QWORD PTR[64+rsp] + mov r13,r8 + add r12,r14 + mov r14,rax + ror r13,23 + mov r15,r9 + mov QWORD PTR[64+rsp],r12 + + ror r14,5 + xor r13,r8 + xor r15,r10 + + ror r13,4 + add r12,r11 + xor r14,rax + + add r12,QWORD PTR[rdi*8+rbp] + and r15,r8 + mov r11,rbx + + ror r14,6 + xor r13,r8 + xor r15,r10 + + xor r11,rcx + xor r14,rax + add r12,r15 + mov r15,rbx + + ror r13,14 + and r11,rax + and r15,rcx + + ror r14,28 + add r12,r13 + add r11,r15 + + add rdx,r12 + add r11,r12 + lea rdi,QWORD PTR[1+rdi] + add r11,r14 + + mov r13,QWORD PTR[80+rsp] + mov r14,QWORD PTR[56+rsp] + mov r12,r13 + mov r15,r14 + + ror r12,7 + xor r12,r13 + shr r13,7 + + ror r12,1 + xor r13,r12 + mov r12,QWORD PTR[16+rsp] + + ror r15,42 + xor r15,r14 + shr r14,6 + + ror r15,19 + add r12,r13 + xor r14,r15 + + add r12,QWORD PTR[72+rsp] + mov r13,rdx + add r12,r14 + mov r14,r11 + ror r13,23 + mov r15,r8 + mov QWORD PTR[72+rsp],r12 + + ror r14,5 + xor r13,rdx + xor r15,r9 + + ror r13,4 + add r12,r10 + xor r14,r11 + + add r12,QWORD PTR[rdi*8+rbp] + and r15,rdx + mov r10,rax + + ror r14,6 + xor r13,rdx + xor r15,r9 + + xor r10,rbx + xor r14,r11 + add r12,r15 + mov r15,rax + + ror r13,14 + and r10,r11 + and r15,rbx + + ror r14,28 + add r12,r13 + add r10,r15 + + add rcx,r12 + add r10,r12 + lea rdi,QWORD PTR[1+rdi] + add r10,r14 + + mov r13,QWORD PTR[88+rsp] + mov r14,QWORD PTR[64+rsp] + mov r12,r13 + mov r15,r14 + + ror r12,7 + xor r12,r13 + shr r13,7 + + ror r12,1 + xor r13,r12 + mov r12,QWORD PTR[24+rsp] + + ror r15,42 + xor r15,r14 + shr r14,6 + + ror r15,19 + add r12,r13 + xor r14,r15 + + add r12,QWORD PTR[80+rsp] + mov r13,rcx + add r12,r14 + mov r14,r10 + ror r13,23 + mov r15,rdx + mov QWORD PTR[80+rsp],r12 + + ror r14,5 + xor r13,rcx + xor r15,r8 + + ror r13,4 + add r12,r9 + xor r14,r10 + + add r12,QWORD PTR[rdi*8+rbp] + and r15,rcx + mov r9,r11 + + ror r14,6 + xor r13,rcx + xor r15,r8 + + xor r9,rax + xor r14,r10 + add r12,r15 + mov r15,r11 + + ror r13,14 + and r9,r10 + and r15,rax + + ror r14,28 + add r12,r13 + add r9,r15 + + add rbx,r12 + add r9,r12 + lea rdi,QWORD PTR[1+rdi] + add r9,r14 + + mov r13,QWORD PTR[96+rsp] + mov r14,QWORD PTR[72+rsp] + mov r12,r13 + mov r15,r14 + + ror r12,7 + xor r12,r13 + shr r13,7 + + ror r12,1 + xor r13,r12 + mov r12,QWORD PTR[32+rsp] + + ror r15,42 + xor r15,r14 + shr r14,6 + + ror r15,19 + add r12,r13 + xor r14,r15 + + add r12,QWORD PTR[88+rsp] + mov r13,rbx + add r12,r14 + mov r14,r9 + ror r13,23 + mov r15,rcx + mov QWORD PTR[88+rsp],r12 + + ror r14,5 + xor r13,rbx + xor r15,rdx + + ror r13,4 + add r12,r8 + xor r14,r9 + + add r12,QWORD PTR[rdi*8+rbp] + and r15,rbx + mov r8,r10 + + ror r14,6 + xor r13,rbx + xor r15,rdx + + xor r8,r11 + xor r14,r9 + add r12,r15 + mov r15,r10 + + ror r13,14 + and r8,r9 + and r15,r11 + + ror r14,28 + add r12,r13 + add r8,r15 + + add rax,r12 + add r8,r12 + lea rdi,QWORD PTR[1+rdi] + add r8,r14 + + mov r13,QWORD PTR[104+rsp] + mov r14,QWORD PTR[80+rsp] + mov r12,r13 + mov r15,r14 + + ror r12,7 + xor r12,r13 + shr r13,7 + + ror r12,1 + xor r13,r12 + mov r12,QWORD PTR[40+rsp] + + ror r15,42 + xor r15,r14 + shr r14,6 + + ror r15,19 + add r12,r13 + xor r14,r15 + + add r12,QWORD PTR[96+rsp] + mov r13,rax + add r12,r14 + mov r14,r8 + ror r13,23 + mov r15,rbx + mov QWORD PTR[96+rsp],r12 + + ror r14,5 + xor r13,rax + xor r15,rcx + + ror r13,4 + add r12,rdx + xor r14,r8 + + add r12,QWORD PTR[rdi*8+rbp] + and r15,rax + mov rdx,r9 + + ror r14,6 + xor r13,rax + xor r15,rcx + + xor rdx,r10 + xor r14,r8 + add r12,r15 + mov r15,r9 + + ror r13,14 + and rdx,r8 + and r15,r10 + + ror r14,28 + add r12,r13 + add rdx,r15 + + add r11,r12 + add rdx,r12 + lea rdi,QWORD PTR[1+rdi] + add rdx,r14 + + mov r13,QWORD PTR[112+rsp] + mov r14,QWORD PTR[88+rsp] + mov r12,r13 + mov r15,r14 + + ror r12,7 + xor r12,r13 + shr r13,7 + + ror r12,1 + xor r13,r12 + mov r12,QWORD PTR[48+rsp] + + ror r15,42 + xor r15,r14 + shr r14,6 + + ror r15,19 + add r12,r13 + xor r14,r15 + + add r12,QWORD PTR[104+rsp] + mov r13,r11 + add r12,r14 + mov r14,rdx + ror r13,23 + mov r15,rax + mov QWORD PTR[104+rsp],r12 + + ror r14,5 + xor r13,r11 + xor r15,rbx + + ror r13,4 + add r12,rcx + xor r14,rdx + + add r12,QWORD PTR[rdi*8+rbp] + and r15,r11 + mov rcx,r8 + + ror r14,6 + xor r13,r11 + xor r15,rbx + + xor rcx,r9 + xor r14,rdx + add r12,r15 + mov r15,r8 + + ror r13,14 + and rcx,rdx + and r15,r9 + + ror r14,28 + add r12,r13 + add rcx,r15 + + add r10,r12 + add rcx,r12 + lea rdi,QWORD PTR[1+rdi] + add rcx,r14 + + mov r13,QWORD PTR[120+rsp] + mov r14,QWORD PTR[96+rsp] + mov r12,r13 + mov r15,r14 + + ror r12,7 + xor r12,r13 + shr r13,7 + + ror r12,1 + xor r13,r12 + mov r12,QWORD PTR[56+rsp] + + ror r15,42 + xor r15,r14 + shr r14,6 + + ror r15,19 + add r12,r13 + xor r14,r15 + + add r12,QWORD PTR[112+rsp] + mov r13,r10 + add r12,r14 + mov r14,rcx + ror r13,23 + mov r15,r11 + mov QWORD PTR[112+rsp],r12 + + ror r14,5 + xor r13,r10 + xor r15,rax + + ror r13,4 + add r12,rbx + xor r14,rcx + + add r12,QWORD PTR[rdi*8+rbp] + and r15,r10 + mov rbx,rdx + + ror r14,6 + xor r13,r10 + xor r15,rax + + xor rbx,r8 + xor r14,rcx + add r12,r15 + mov r15,rdx + + ror r13,14 + and rbx,rcx + and r15,r8 + + ror r14,28 + add r12,r13 + add rbx,r15 + + add r9,r12 + add rbx,r12 + lea rdi,QWORD PTR[1+rdi] + add rbx,r14 + + mov r13,QWORD PTR[rsp] + mov r14,QWORD PTR[104+rsp] + mov r12,r13 + mov r15,r14 + + ror r12,7 + xor r12,r13 + shr r13,7 + + ror r12,1 + xor r13,r12 + mov r12,QWORD PTR[64+rsp] + + ror r15,42 + xor r15,r14 + shr r14,6 + + ror r15,19 + add r12,r13 + xor r14,r15 + + add r12,QWORD PTR[120+rsp] + mov r13,r9 + add r12,r14 + mov r14,rbx + ror r13,23 + mov r15,r10 + mov QWORD PTR[120+rsp],r12 + + ror r14,5 + xor r13,r9 + xor r15,r11 + + ror r13,4 + add r12,rax + xor r14,rbx + + add r12,QWORD PTR[rdi*8+rbp] + and r15,r9 + mov rax,rcx + + ror r14,6 + xor r13,r9 + xor r15,r11 + + xor rax,rdx + xor r14,rbx + add r12,r15 + mov r15,rcx + + ror r13,14 + and rax,rbx + and r15,rdx + + ror r14,28 + add r12,r13 + add rax,r15 + + add r8,r12 + add rax,r12 + lea rdi,QWORD PTR[1+rdi] + add rax,r14 + + cmp rdi,80 + jb $L$rounds_16_xx + + mov rdi,QWORD PTR[((128+0))+rsp] + lea rsi,QWORD PTR[128+rsi] + + add rax,QWORD PTR[rdi] + add rbx,QWORD PTR[8+rdi] + add rcx,QWORD PTR[16+rdi] + add rdx,QWORD PTR[24+rdi] + add r8,QWORD PTR[32+rdi] + add r9,QWORD PTR[40+rdi] + add r10,QWORD PTR[48+rdi] + add r11,QWORD PTR[56+rdi] + + cmp rsi,QWORD PTR[((128+16))+rsp] + + mov QWORD PTR[rdi],rax + mov QWORD PTR[8+rdi],rbx + mov QWORD PTR[16+rdi],rcx + mov QWORD PTR[24+rdi],rdx + mov QWORD PTR[32+rdi],r8 + mov QWORD PTR[40+rdi],r9 + mov QWORD PTR[48+rdi],r10 + mov QWORD PTR[56+rdi],r11 + jb $L$loop + + mov rsi,QWORD PTR[((128+24))+rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha512_block_data_order:: +sha512_block_data_order ENDP +ALIGN 64 + +K512:: + DQ 0428a2f98d728ae22h,07137449123ef65cdh + DQ 0b5c0fbcfec4d3b2fh,0e9b5dba58189dbbch + DQ 03956c25bf348b538h,059f111f1b605d019h + DQ 0923f82a4af194f9bh,0ab1c5ed5da6d8118h + DQ 0d807aa98a3030242h,012835b0145706fbeh + DQ 0243185be4ee4b28ch,0550c7dc3d5ffb4e2h + DQ 072be5d74f27b896fh,080deb1fe3b1696b1h + DQ 09bdc06a725c71235h,0c19bf174cf692694h + DQ 0e49b69c19ef14ad2h,0efbe4786384f25e3h + DQ 00fc19dc68b8cd5b5h,0240ca1cc77ac9c65h + DQ 02de92c6f592b0275h,04a7484aa6ea6e483h + DQ 05cb0a9dcbd41fbd4h,076f988da831153b5h + DQ 0983e5152ee66dfabh,0a831c66d2db43210h + DQ 0b00327c898fb213fh,0bf597fc7beef0ee4h + DQ 0c6e00bf33da88fc2h,0d5a79147930aa725h + DQ 006ca6351e003826fh,0142929670a0e6e70h + DQ 027b70a8546d22ffch,02e1b21385c26c926h + DQ 04d2c6dfc5ac42aedh,053380d139d95b3dfh + DQ 0650a73548baf63deh,0766a0abb3c77b2a8h + DQ 081c2c92e47edaee6h,092722c851482353bh + DQ 0a2bfe8a14cf10364h,0a81a664bbc423001h + DQ 0c24b8b70d0f89791h,0c76c51a30654be30h + DQ 0d192e819d6ef5218h,0d69906245565a910h + DQ 0f40e35855771202ah,0106aa07032bbd1b8h + DQ 019a4c116b8d2d0c8h,01e376c085141ab53h + DQ 02748774cdf8eeb99h,034b0bcb5e19b48a8h + DQ 0391c0cb3c5c95a63h,04ed8aa4ae3418acbh + DQ 05b9cca4f7763e373h,0682e6ff3d6b2b8a3h + DQ 0748f82ee5defb2fch,078a5636f43172f60h + DQ 084c87814a1f0ab72h,08cc702081a6439ech + DQ 090befffa23631e28h,0a4506cebde82bde9h + DQ 0bef9a3f7b2c67915h,0c67178f2e372532bh + DQ 0ca273eceea26619ch,0d186b8c721c0c207h + DQ 0eada7dd6cde0eb1eh,0f57d4f7fee6ed178h + DQ 006f067aa72176fbah,00a637dc5a2c898a6h + DQ 0113f9804bef90daeh,01b710b35131c471bh + DQ 028db77f523047d84h,032caab7b40c72493h + DQ 03c9ebe0a15c9bebch,0431d67c49c100d4ch + DQ 04cc5d4becb3e42b6h,0597f299cfc657e2ah + DQ 05fcb6fab3ad6faech,06c44198c4a475817h + +.text$ ENDS +END + diff --git a/ext/libressl/crypto/sha/sha512-mingw64-x86_64.S b/ext/libressl/crypto/sha/sha512-mingw64-x86_64.S new file mode 100644 index 0000000..5153952 --- /dev/null +++ b/ext/libressl/crypto/sha/sha512-mingw64-x86_64.S @@ -0,0 +1,1814 @@ +#include "x86_arch.h" +.text + +.globl sha512_block_data_order +.def sha512_block_data_order; .scl 2; .type 32; .endef +.p2align 4 +sha512_block_data_order: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_sha512_block_data_order: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + movq %r9,%rcx + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $128+32,%rsp + leaq (%rsi,%rdx,8),%rdx + andq $-64,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %r11,128+24(%rsp) +.Lprologue: + + leaq K512(%rip),%rbp + + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp .Lloop + +.p2align 4 +.Lloop: + xorq %rdi,%rdi + movq 0(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,0(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 8(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,8(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 16(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,16(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 24(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,24(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 32(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,32(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 40(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,40(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 48(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,48(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 56(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,56(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + movq 64(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,64(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 72(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,72(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 80(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,80(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 88(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,88(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 96(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,96(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 104(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,104(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 112(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,112(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 120(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,120(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + jmp .Lrounds_16_xx +.p2align 4 +.Lrounds_16_xx: + movq 8(%rsp),%r13 + movq 112(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 72(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 0(%rsp),%r12 + movq %r8,%r13 + addq %r14,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,0(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 16(%rsp),%r13 + movq 120(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 80(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 8(%rsp),%r12 + movq %rdx,%r13 + addq %r14,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,8(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 24(%rsp),%r13 + movq 0(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 88(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 16(%rsp),%r12 + movq %rcx,%r13 + addq %r14,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,16(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 32(%rsp),%r13 + movq 8(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 96(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 24(%rsp),%r12 + movq %rbx,%r13 + addq %r14,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,24(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 40(%rsp),%r13 + movq 16(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 104(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 32(%rsp),%r12 + movq %rax,%r13 + addq %r14,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,32(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 48(%rsp),%r13 + movq 24(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 112(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 40(%rsp),%r12 + movq %r11,%r13 + addq %r14,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,40(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 56(%rsp),%r13 + movq 32(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 120(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 48(%rsp),%r12 + movq %r10,%r13 + addq %r14,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,48(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 64(%rsp),%r13 + movq 40(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 0(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 56(%rsp),%r12 + movq %r9,%r13 + addq %r14,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,56(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + movq 72(%rsp),%r13 + movq 48(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 8(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 64(%rsp),%r12 + movq %r8,%r13 + addq %r14,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,64(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 80(%rsp),%r13 + movq 56(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 16(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 72(%rsp),%r12 + movq %rdx,%r13 + addq %r14,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,72(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 88(%rsp),%r13 + movq 64(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 24(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 80(%rsp),%r12 + movq %rcx,%r13 + addq %r14,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,80(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 96(%rsp),%r13 + movq 72(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 32(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 88(%rsp),%r12 + movq %rbx,%r13 + addq %r14,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,88(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 104(%rsp),%r13 + movq 80(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 40(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 96(%rsp),%r12 + movq %rax,%r13 + addq %r14,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,96(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 112(%rsp),%r13 + movq 88(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 48(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 104(%rsp),%r12 + movq %r11,%r13 + addq %r14,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,104(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 120(%rsp),%r13 + movq 96(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 56(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 112(%rsp),%r12 + movq %r10,%r13 + addq %r14,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,112(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 0(%rsp),%r13 + movq 104(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 64(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 120(%rsp),%r12 + movq %r9,%r13 + addq %r14,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,120(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + cmpq $80,%rdi + jb .Lrounds_16_xx + + movq 128+0(%rsp),%rdi + leaq 128(%rsi),%rsi + + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb .Lloop + + movq 128+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + retq +.LSEH_end_sha512_block_data_order: +.p2align 6 + +K512: +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 diff --git a/ext/libressl/crypto/sha/sha512.c b/ext/libressl/crypto/sha/sha512.c new file mode 100644 index 0000000..6b95cfa --- /dev/null +++ b/ext/libressl/crypto/sha/sha512.c @@ -0,0 +1,547 @@ +/* $OpenBSD: sha512.c,v 1.15 2016/11/04 13:56:05 miod Exp $ */ +/* ==================================================================== + * Copyright (c) 2004 The OpenSSL Project. All rights reserved + * according to the OpenSSL license [found in ../../LICENSE]. + * ==================================================================== + */ + +#include + +#include +#include + +#include + +#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512) +/* + * IMPLEMENTATION NOTES. + * + * As you might have noticed 32-bit hash algorithms: + * + * - permit SHA_LONG to be wider than 32-bit (case on CRAY); + * - optimized versions implement two transform functions: one operating + * on [aligned] data in host byte order and one - on data in input + * stream byte order; + * - share common byte-order neutral collector and padding function + * implementations, ../md32_common.h; + * + * Neither of the above applies to this SHA-512 implementations. Reasons + * [in reverse order] are: + * + * - it's the only 64-bit hash algorithm for the moment of this writing, + * there is no need for common collector/padding implementation [yet]; + * - by supporting only one transform function [which operates on + * *aligned* data in input stream byte order, big-endian in this case] + * we minimize burden of maintenance in two ways: a) collector/padding + * function is simpler; b) only one transform function to stare at; + * - SHA_LONG64 is required to be exactly 64-bit in order to be able to + * apply a number of optimizations to mitigate potential performance + * penalties caused by previous design decision; + * + * Caveat lector. + * + * Implementation relies on the fact that "long long" is 64-bit on + * both 32- and 64-bit platforms. If some compiler vendor comes up + * with 128-bit long long, adjustment to sha.h would be required. + * As this implementation relies on 64-bit integer type, it's totally + * inappropriate for platforms which don't support it, most notably + * 16-bit platforms. + * + */ + +#include +#include +#include + +#if !defined(__STRICT_ALIGNMENT) || defined(SHA512_ASM) +#define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA +#endif + +int SHA384_Init(SHA512_CTX *c) + { + c->h[0]=U64(0xcbbb9d5dc1059ed8); + c->h[1]=U64(0x629a292a367cd507); + c->h[2]=U64(0x9159015a3070dd17); + c->h[3]=U64(0x152fecd8f70e5939); + c->h[4]=U64(0x67332667ffc00b31); + c->h[5]=U64(0x8eb44a8768581511); + c->h[6]=U64(0xdb0c2e0d64f98fa7); + c->h[7]=U64(0x47b5481dbefa4fa4); + + c->Nl=0; c->Nh=0; + c->num=0; c->md_len=SHA384_DIGEST_LENGTH; + return 1; + } + +int SHA512_Init(SHA512_CTX *c) + { + c->h[0]=U64(0x6a09e667f3bcc908); + c->h[1]=U64(0xbb67ae8584caa73b); + c->h[2]=U64(0x3c6ef372fe94f82b); + c->h[3]=U64(0xa54ff53a5f1d36f1); + c->h[4]=U64(0x510e527fade682d1); + c->h[5]=U64(0x9b05688c2b3e6c1f); + c->h[6]=U64(0x1f83d9abfb41bd6b); + c->h[7]=U64(0x5be0cd19137e2179); + + c->Nl=0; c->Nh=0; + c->num=0; c->md_len=SHA512_DIGEST_LENGTH; + return 1; + } + +#ifndef SHA512_ASM +static +#endif +void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num); + +int SHA512_Final (unsigned char *md, SHA512_CTX *c) + { + unsigned char *p=(unsigned char *)c->u.p; + size_t n=c->num; + + p[n]=0x80; /* There always is a room for one */ + n++; + if (n > (sizeof(c->u)-16)) + memset (p+n,0,sizeof(c->u)-n), n=0, + sha512_block_data_order (c,p,1); + + memset (p+n,0,sizeof(c->u)-16-n); +#if BYTE_ORDER == BIG_ENDIAN + c->u.d[SHA_LBLOCK-2] = c->Nh; + c->u.d[SHA_LBLOCK-1] = c->Nl; +#else + p[sizeof(c->u)-1] = (unsigned char)(c->Nl); + p[sizeof(c->u)-2] = (unsigned char)(c->Nl>>8); + p[sizeof(c->u)-3] = (unsigned char)(c->Nl>>16); + p[sizeof(c->u)-4] = (unsigned char)(c->Nl>>24); + p[sizeof(c->u)-5] = (unsigned char)(c->Nl>>32); + p[sizeof(c->u)-6] = (unsigned char)(c->Nl>>40); + p[sizeof(c->u)-7] = (unsigned char)(c->Nl>>48); + p[sizeof(c->u)-8] = (unsigned char)(c->Nl>>56); + p[sizeof(c->u)-9] = (unsigned char)(c->Nh); + p[sizeof(c->u)-10] = (unsigned char)(c->Nh>>8); + p[sizeof(c->u)-11] = (unsigned char)(c->Nh>>16); + p[sizeof(c->u)-12] = (unsigned char)(c->Nh>>24); + p[sizeof(c->u)-13] = (unsigned char)(c->Nh>>32); + p[sizeof(c->u)-14] = (unsigned char)(c->Nh>>40); + p[sizeof(c->u)-15] = (unsigned char)(c->Nh>>48); + p[sizeof(c->u)-16] = (unsigned char)(c->Nh>>56); +#endif + + sha512_block_data_order (c,p,1); + + if (md==0) return 0; + + switch (c->md_len) + { + /* Let compiler decide if it's appropriate to unroll... */ + case SHA384_DIGEST_LENGTH: + for (n=0;nh[n]; + + *(md++) = (unsigned char)(t>>56); + *(md++) = (unsigned char)(t>>48); + *(md++) = (unsigned char)(t>>40); + *(md++) = (unsigned char)(t>>32); + *(md++) = (unsigned char)(t>>24); + *(md++) = (unsigned char)(t>>16); + *(md++) = (unsigned char)(t>>8); + *(md++) = (unsigned char)(t); + } + break; + case SHA512_DIGEST_LENGTH: + for (n=0;nh[n]; + + *(md++) = (unsigned char)(t>>56); + *(md++) = (unsigned char)(t>>48); + *(md++) = (unsigned char)(t>>40); + *(md++) = (unsigned char)(t>>32); + *(md++) = (unsigned char)(t>>24); + *(md++) = (unsigned char)(t>>16); + *(md++) = (unsigned char)(t>>8); + *(md++) = (unsigned char)(t); + } + break; + /* ... as well as make sure md_len is not abused. */ + default: return 0; + } + + return 1; + } + +int SHA384_Final (unsigned char *md,SHA512_CTX *c) +{ return SHA512_Final (md,c); } + +int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len) + { + SHA_LONG64 l; + unsigned char *p=c->u.p; + const unsigned char *data=(const unsigned char *)_data; + + if (len==0) return 1; + + l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff); + if (l < c->Nl) c->Nh++; + if (sizeof(len)>=8) c->Nh+=(((SHA_LONG64)len)>>61); + c->Nl=l; + + if (c->num != 0) + { + size_t n = sizeof(c->u) - c->num; + + if (len < n) + { + memcpy (p+c->num,data,len), c->num += (unsigned int)len; + return 1; + } + else { + memcpy (p+c->num,data,n), c->num = 0; + len-=n, data+=n; + sha512_block_data_order (c,p,1); + } + } + + if (len >= sizeof(c->u)) + { +#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA + if ((size_t)data%sizeof(c->u.d[0]) != 0) + while (len >= sizeof(c->u)) + memcpy (p,data,sizeof(c->u)), + sha512_block_data_order (c,p,1), + len -= sizeof(c->u), + data += sizeof(c->u); + else +#endif + sha512_block_data_order (c,data,len/sizeof(c->u)), + data += len, + len %= sizeof(c->u), + data -= len; + } + + if (len != 0) memcpy (p,data,len), c->num = (int)len; + + return 1; + } + +int SHA384_Update (SHA512_CTX *c, const void *data, size_t len) +{ return SHA512_Update (c,data,len); } + +void SHA512_Transform (SHA512_CTX *c, const unsigned char *data) + { +#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA + if ((size_t)data%sizeof(c->u.d[0]) != 0) + memcpy(c->u.p,data,sizeof(c->u.p)), + data = c->u.p; +#endif + sha512_block_data_order (c,data,1); + } + +unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md) + { + SHA512_CTX c; + static unsigned char m[SHA384_DIGEST_LENGTH]; + + if (md == NULL) md=m; + SHA384_Init(&c); + SHA512_Update(&c,d,n); + SHA512_Final(md,&c); + explicit_bzero(&c,sizeof(c)); + return(md); + } + +unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md) + { + SHA512_CTX c; + static unsigned char m[SHA512_DIGEST_LENGTH]; + + if (md == NULL) md=m; + SHA512_Init(&c); + SHA512_Update(&c,d,n); + SHA512_Final(md,&c); + explicit_bzero(&c,sizeof(c)); + return(md); + } + +#ifndef SHA512_ASM +static const SHA_LONG64 K512[80] = { + U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd), + U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc), + U64(0x3956c25bf348b538),U64(0x59f111f1b605d019), + U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118), + U64(0xd807aa98a3030242),U64(0x12835b0145706fbe), + U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2), + U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1), + U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694), + U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3), + U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65), + U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483), + U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5), + U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210), + U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4), + U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725), + U64(0x06ca6351e003826f),U64(0x142929670a0e6e70), + U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926), + U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df), + U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8), + U64(0x81c2c92e47edaee6),U64(0x92722c851482353b), + U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001), + U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30), + U64(0xd192e819d6ef5218),U64(0xd69906245565a910), + U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8), + U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53), + U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8), + U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb), + U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3), + U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60), + U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec), + U64(0x90befffa23631e28),U64(0xa4506cebde82bde9), + U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b), + U64(0xca273eceea26619c),U64(0xd186b8c721c0c207), + U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178), + U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6), + U64(0x113f9804bef90dae),U64(0x1b710b35131c471b), + U64(0x28db77f523047d84),U64(0x32caab7b40c72493), + U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c), + U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a), + U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) }; + +#if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) +# if defined(__x86_64) || defined(__x86_64__) +# define ROTR(a,n) ({ SHA_LONG64 ret; \ + asm ("rorq %1,%0" \ + : "=r"(ret) \ + : "J"(n),"0"(a) \ + : "cc"); ret; }) +# define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \ + asm ("bswapq %0" \ + : "=r"(ret) \ + : "0"(ret)); ret; }) +# elif (defined(__i386) || defined(__i386__)) +# define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\ + unsigned int hi=p[0],lo=p[1]; \ + asm ("bswapl %0; bswapl %1;" \ + : "=r"(lo),"=r"(hi) \ + : "0"(lo),"1"(hi)); \ + ((SHA_LONG64)hi)<<32|lo; }) +# elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64) +# define ROTR(a,n) ({ SHA_LONG64 ret; \ + asm ("rotrdi %0,%1,%2" \ + : "=r"(ret) \ + : "r"(a),"K"(n)); ret; }) +# endif +#endif + +#ifndef PULL64 +#define B(x,j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8)) +#define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7)) +#endif + +#ifndef ROTR +#define ROTR(x,s) (((x)>>s) | (x)<<(64-s)) +#endif + +#define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) +#define Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) +#define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) +#define sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) + +#define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z))) +#define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) + + +#if defined(__i386) || defined(__i386__) || defined(_M_IX86) +/* + * This code should give better results on 32-bit CPU with less than + * ~24 registers, both size and performance wise... + */ +static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num) + { + const SHA_LONG64 *W=in; + SHA_LONG64 A,E,T; + SHA_LONG64 X[9+80],*F; + int i; + + while (num--) { + + F = X+80; + A = ctx->h[0]; F[1] = ctx->h[1]; + F[2] = ctx->h[2]; F[3] = ctx->h[3]; + E = ctx->h[4]; F[5] = ctx->h[5]; + F[6] = ctx->h[6]; F[7] = ctx->h[7]; + + for (i=0;i<16;i++,F--) + { + T = PULL64(W[i]); + F[0] = A; + F[4] = E; + F[8] = T; + T += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i]; + E = F[3] + T; + A = T + Sigma0(A) + Maj(A,F[1],F[2]); + } + + for (;i<80;i++,F--) + { + T = sigma0(F[8+16-1]); + T += sigma1(F[8+16-14]); + T += F[8+16] + F[8+16-9]; + + F[0] = A; + F[4] = E; + F[8] = T; + T += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i]; + E = F[3] + T; + A = T + Sigma0(A) + Maj(A,F[1],F[2]); + } + + ctx->h[0] += A; ctx->h[1] += F[1]; + ctx->h[2] += F[2]; ctx->h[3] += F[3]; + ctx->h[4] += E; ctx->h[5] += F[5]; + ctx->h[6] += F[6]; ctx->h[7] += F[7]; + + W+=SHA_LBLOCK; + } + } + +#elif defined(OPENSSL_SMALL_FOOTPRINT) + +static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num) + { + const SHA_LONG64 *W=in; + SHA_LONG64 a,b,c,d,e,f,g,h,s0,s1,T1,T2; + SHA_LONG64 X[16]; + int i; + + while (num--) { + + a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3]; + e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7]; + + for (i=0;i<16;i++) + { +#if BYTE_ORDER == BIG_ENDIAN + T1 = X[i] = W[i]; +#else + T1 = X[i] = PULL64(W[i]); +#endif + T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; + T2 = Sigma0(a) + Maj(a,b,c); + h = g; g = f; f = e; e = d + T1; + d = c; c = b; b = a; a = T1 + T2; + } + + for (;i<80;i++) + { + s0 = X[(i+1)&0x0f]; s0 = sigma0(s0); + s1 = X[(i+14)&0x0f]; s1 = sigma1(s1); + + T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf]; + T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; + T2 = Sigma0(a) + Maj(a,b,c); + h = g; g = f; f = e; e = d + T1; + d = c; c = b; b = a; a = T1 + T2; + } + + ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d; + ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h; + + W+=SHA_LBLOCK; + } + } + +#else + +#define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \ + T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; \ + h = Sigma0(a) + Maj(a,b,c); \ + d += T1; h += T1; } while (0) + +#define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X) do { \ + s0 = X[(j+1)&0x0f]; s0 = sigma0(s0); \ + s1 = X[(j+14)&0x0f]; s1 = sigma1(s1); \ + T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f]; \ + ROUND_00_15(i+j,a,b,c,d,e,f,g,h); } while (0) + +static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num) + { + const SHA_LONG64 *W=in; + SHA_LONG64 a,b,c,d,e,f,g,h,s0,s1,T1; + SHA_LONG64 X[16]; + int i; + + while (num--) { + + a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3]; + e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7]; + +#if BYTE_ORDER == BIG_ENDIAN + T1 = X[0] = W[0]; ROUND_00_15(0,a,b,c,d,e,f,g,h); + T1 = X[1] = W[1]; ROUND_00_15(1,h,a,b,c,d,e,f,g); + T1 = X[2] = W[2]; ROUND_00_15(2,g,h,a,b,c,d,e,f); + T1 = X[3] = W[3]; ROUND_00_15(3,f,g,h,a,b,c,d,e); + T1 = X[4] = W[4]; ROUND_00_15(4,e,f,g,h,a,b,c,d); + T1 = X[5] = W[5]; ROUND_00_15(5,d,e,f,g,h,a,b,c); + T1 = X[6] = W[6]; ROUND_00_15(6,c,d,e,f,g,h,a,b); + T1 = X[7] = W[7]; ROUND_00_15(7,b,c,d,e,f,g,h,a); + T1 = X[8] = W[8]; ROUND_00_15(8,a,b,c,d,e,f,g,h); + T1 = X[9] = W[9]; ROUND_00_15(9,h,a,b,c,d,e,f,g); + T1 = X[10] = W[10]; ROUND_00_15(10,g,h,a,b,c,d,e,f); + T1 = X[11] = W[11]; ROUND_00_15(11,f,g,h,a,b,c,d,e); + T1 = X[12] = W[12]; ROUND_00_15(12,e,f,g,h,a,b,c,d); + T1 = X[13] = W[13]; ROUND_00_15(13,d,e,f,g,h,a,b,c); + T1 = X[14] = W[14]; ROUND_00_15(14,c,d,e,f,g,h,a,b); + T1 = X[15] = W[15]; ROUND_00_15(15,b,c,d,e,f,g,h,a); +#else + T1 = X[0] = PULL64(W[0]); ROUND_00_15(0,a,b,c,d,e,f,g,h); + T1 = X[1] = PULL64(W[1]); ROUND_00_15(1,h,a,b,c,d,e,f,g); + T1 = X[2] = PULL64(W[2]); ROUND_00_15(2,g,h,a,b,c,d,e,f); + T1 = X[3] = PULL64(W[3]); ROUND_00_15(3,f,g,h,a,b,c,d,e); + T1 = X[4] = PULL64(W[4]); ROUND_00_15(4,e,f,g,h,a,b,c,d); + T1 = X[5] = PULL64(W[5]); ROUND_00_15(5,d,e,f,g,h,a,b,c); + T1 = X[6] = PULL64(W[6]); ROUND_00_15(6,c,d,e,f,g,h,a,b); + T1 = X[7] = PULL64(W[7]); ROUND_00_15(7,b,c,d,e,f,g,h,a); + T1 = X[8] = PULL64(W[8]); ROUND_00_15(8,a,b,c,d,e,f,g,h); + T1 = X[9] = PULL64(W[9]); ROUND_00_15(9,h,a,b,c,d,e,f,g); + T1 = X[10] = PULL64(W[10]); ROUND_00_15(10,g,h,a,b,c,d,e,f); + T1 = X[11] = PULL64(W[11]); ROUND_00_15(11,f,g,h,a,b,c,d,e); + T1 = X[12] = PULL64(W[12]); ROUND_00_15(12,e,f,g,h,a,b,c,d); + T1 = X[13] = PULL64(W[13]); ROUND_00_15(13,d,e,f,g,h,a,b,c); + T1 = X[14] = PULL64(W[14]); ROUND_00_15(14,c,d,e,f,g,h,a,b); + T1 = X[15] = PULL64(W[15]); ROUND_00_15(15,b,c,d,e,f,g,h,a); +#endif + + for (i=16;i<80;i+=16) + { + ROUND_16_80(i, 0,a,b,c,d,e,f,g,h,X); + ROUND_16_80(i, 1,h,a,b,c,d,e,f,g,X); + ROUND_16_80(i, 2,g,h,a,b,c,d,e,f,X); + ROUND_16_80(i, 3,f,g,h,a,b,c,d,e,X); + ROUND_16_80(i, 4,e,f,g,h,a,b,c,d,X); + ROUND_16_80(i, 5,d,e,f,g,h,a,b,c,X); + ROUND_16_80(i, 6,c,d,e,f,g,h,a,b,X); + ROUND_16_80(i, 7,b,c,d,e,f,g,h,a,X); + ROUND_16_80(i, 8,a,b,c,d,e,f,g,h,X); + ROUND_16_80(i, 9,h,a,b,c,d,e,f,g,X); + ROUND_16_80(i,10,g,h,a,b,c,d,e,f,X); + ROUND_16_80(i,11,f,g,h,a,b,c,d,e,X); + ROUND_16_80(i,12,e,f,g,h,a,b,c,d,X); + ROUND_16_80(i,13,d,e,f,g,h,a,b,c,X); + ROUND_16_80(i,14,c,d,e,f,g,h,a,b,X); + ROUND_16_80(i,15,b,c,d,e,f,g,h,a,X); + } + + ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d; + ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h; + + W+=SHA_LBLOCK; + } + } + +#endif + +#endif /* SHA512_ASM */ + +#endif /* !OPENSSL_NO_SHA512 */ diff --git a/ext/libressl/crypto/sha/sha_locl.h b/ext/libressl/crypto/sha/sha_locl.h new file mode 100644 index 0000000..46c9a39 --- /dev/null +++ b/ext/libressl/crypto/sha/sha_locl.h @@ -0,0 +1,419 @@ +/* $OpenBSD: sha_locl.h,v 1.23 2016/12/23 23:22:25 patrick Exp $ */ +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + +#include +#include + +#include +#include + +#define DATA_ORDER_IS_BIG_ENDIAN + +#define HASH_LONG SHA_LONG +#define HASH_CTX SHA_CTX +#define HASH_CBLOCK SHA_CBLOCK +#define HASH_MAKE_STRING(c,s) do { \ + unsigned long ll; \ + ll=(c)->h0; HOST_l2c(ll,(s)); \ + ll=(c)->h1; HOST_l2c(ll,(s)); \ + ll=(c)->h2; HOST_l2c(ll,(s)); \ + ll=(c)->h3; HOST_l2c(ll,(s)); \ + ll=(c)->h4; HOST_l2c(ll,(s)); \ + } while (0) + +# define HASH_UPDATE SHA1_Update +# define HASH_TRANSFORM SHA1_Transform +# define HASH_FINAL SHA1_Final +# define HASH_INIT SHA1_Init +# define HASH_BLOCK_DATA_ORDER sha1_block_data_order +# define Xupdate(a,ix,ia,ib,ic,id) ( (a)=(ia^ib^ic^id), \ + ix=(a)=ROTATE((a),1) \ + ) + +__BEGIN_HIDDEN_DECLS + +#ifndef SHA1_ASM +static +#endif + +void sha1_block_data_order (SHA_CTX *c, const void *p,size_t num); + +__END_HIDDEN_DECLS + +#include "md32_common.h" + +#define INIT_DATA_h0 0x67452301UL +#define INIT_DATA_h1 0xefcdab89UL +#define INIT_DATA_h2 0x98badcfeUL +#define INIT_DATA_h3 0x10325476UL +#define INIT_DATA_h4 0xc3d2e1f0UL + +int SHA1_Init(SHA_CTX *c) + { + memset (c,0,sizeof(*c)); + c->h0=INIT_DATA_h0; + c->h1=INIT_DATA_h1; + c->h2=INIT_DATA_h2; + c->h3=INIT_DATA_h3; + c->h4=INIT_DATA_h4; + return 1; + } + +#define K_00_19 0x5a827999UL +#define K_20_39 0x6ed9eba1UL +#define K_40_59 0x8f1bbcdcUL +#define K_60_79 0xca62c1d6UL + +/* As pointed out by Wei Dai , F() below can be + * simplified to the code in F_00_19. Wei attributes these optimisations + * to Peter Gutmann's SHS code, and he attributes it to Rich Schroeppel. + * #define F(x,y,z) (((x) & (y)) | ((~(x)) & (z))) + * I've just become aware of another tweak to be made, again from Wei Dai, + * in F_40_59, (x&a)|(y&a) -> (x|y)&a + */ +#define F_00_19(b,c,d) ((((c) ^ (d)) & (b)) ^ (d)) +#define F_20_39(b,c,d) ((b) ^ (c) ^ (d)) +#define F_40_59(b,c,d) (((b) & (c)) | (((b)|(c)) & (d))) +#define F_60_79(b,c,d) F_20_39(b,c,d) + +#ifndef OPENSSL_SMALL_FOOTPRINT + +#define BODY_00_15(i,a,b,c,d,e,f,xi) \ + (f)=xi+(e)+K_00_19+ROTATE((a),5)+F_00_19((b),(c),(d)); \ + (b)=ROTATE((b),30); + +#define BODY_16_19(i,a,b,c,d,e,f,xi,xa,xb,xc,xd) \ + Xupdate(f,xi,xa,xb,xc,xd); \ + (f)+=(e)+K_00_19+ROTATE((a),5)+F_00_19((b),(c),(d)); \ + (b)=ROTATE((b),30); + +#define BODY_20_31(i,a,b,c,d,e,f,xi,xa,xb,xc,xd) \ + Xupdate(f,xi,xa,xb,xc,xd); \ + (f)+=(e)+K_20_39+ROTATE((a),5)+F_20_39((b),(c),(d)); \ + (b)=ROTATE((b),30); + +#define BODY_32_39(i,a,b,c,d,e,f,xa,xb,xc,xd) \ + Xupdate(f,xa,xa,xb,xc,xd); \ + (f)+=(e)+K_20_39+ROTATE((a),5)+F_20_39((b),(c),(d)); \ + (b)=ROTATE((b),30); + +#define BODY_40_59(i,a,b,c,d,e,f,xa,xb,xc,xd) \ + Xupdate(f,xa,xa,xb,xc,xd); \ + (f)+=(e)+K_40_59+ROTATE((a),5)+F_40_59((b),(c),(d)); \ + (b)=ROTATE((b),30); + +#define BODY_60_79(i,a,b,c,d,e,f,xa,xb,xc,xd) \ + Xupdate(f,xa,xa,xb,xc,xd); \ + (f)=xa+(e)+K_60_79+ROTATE((a),5)+F_60_79((b),(c),(d)); \ + (b)=ROTATE((b),30); + +#ifdef X +#undef X +#endif +#ifndef MD32_XARRAY + /* + * Originally X was an array. As it's automatic it's natural + * to expect RISC compiler to accommodate at least part of it in + * the register bank, isn't it? Unfortunately not all compilers + * "find" this expectation reasonable:-( On order to make such + * compilers generate better code I replace X[] with a bunch of + * X0, X1, etc. See the function body below... + * + */ +# define X(i) XX##i +#else + /* + * However! Some compilers (most notably HP C) get overwhelmed by + * that many local variables so that we have to have the way to + * fall down to the original behavior. + */ +# define X(i) XX[i] +#endif + +#if !defined(SHA1_ASM) +#include +static void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, size_t num) + { + const unsigned char *data=p; + unsigned MD32_REG_T A,B,C,D,E,T,l; +#ifndef MD32_XARRAY + unsigned MD32_REG_T XX0, XX1, XX2, XX3, XX4, XX5, XX6, XX7, + XX8, XX9,XX10,XX11,XX12,XX13,XX14,XX15; +#else + SHA_LONG XX[16]; +#endif + + A=c->h0; + B=c->h1; + C=c->h2; + D=c->h3; + E=c->h4; + + for (;;) + { + + if (BYTE_ORDER != LITTLE_ENDIAN && + sizeof(SHA_LONG)==4 && ((size_t)p%4)==0) + { + const SHA_LONG *W=(const SHA_LONG *)data; + + X( 0) = W[0]; X( 1) = W[ 1]; + BODY_00_15( 0,A,B,C,D,E,T,X( 0)); X( 2) = W[ 2]; + BODY_00_15( 1,T,A,B,C,D,E,X( 1)); X( 3) = W[ 3]; + BODY_00_15( 2,E,T,A,B,C,D,X( 2)); X( 4) = W[ 4]; + BODY_00_15( 3,D,E,T,A,B,C,X( 3)); X( 5) = W[ 5]; + BODY_00_15( 4,C,D,E,T,A,B,X( 4)); X( 6) = W[ 6]; + BODY_00_15( 5,B,C,D,E,T,A,X( 5)); X( 7) = W[ 7]; + BODY_00_15( 6,A,B,C,D,E,T,X( 6)); X( 8) = W[ 8]; + BODY_00_15( 7,T,A,B,C,D,E,X( 7)); X( 9) = W[ 9]; + BODY_00_15( 8,E,T,A,B,C,D,X( 8)); X(10) = W[10]; + BODY_00_15( 9,D,E,T,A,B,C,X( 9)); X(11) = W[11]; + BODY_00_15(10,C,D,E,T,A,B,X(10)); X(12) = W[12]; + BODY_00_15(11,B,C,D,E,T,A,X(11)); X(13) = W[13]; + BODY_00_15(12,A,B,C,D,E,T,X(12)); X(14) = W[14]; + BODY_00_15(13,T,A,B,C,D,E,X(13)); X(15) = W[15]; + BODY_00_15(14,E,T,A,B,C,D,X(14)); + BODY_00_15(15,D,E,T,A,B,C,X(15)); + + data += SHA_CBLOCK; + } + else + { + HOST_c2l(data,l); X( 0)=l; HOST_c2l(data,l); X( 1)=l; + BODY_00_15( 0,A,B,C,D,E,T,X( 0)); HOST_c2l(data,l); X( 2)=l; + BODY_00_15( 1,T,A,B,C,D,E,X( 1)); HOST_c2l(data,l); X( 3)=l; + BODY_00_15( 2,E,T,A,B,C,D,X( 2)); HOST_c2l(data,l); X( 4)=l; + BODY_00_15( 3,D,E,T,A,B,C,X( 3)); HOST_c2l(data,l); X( 5)=l; + BODY_00_15( 4,C,D,E,T,A,B,X( 4)); HOST_c2l(data,l); X( 6)=l; + BODY_00_15( 5,B,C,D,E,T,A,X( 5)); HOST_c2l(data,l); X( 7)=l; + BODY_00_15( 6,A,B,C,D,E,T,X( 6)); HOST_c2l(data,l); X( 8)=l; + BODY_00_15( 7,T,A,B,C,D,E,X( 7)); HOST_c2l(data,l); X( 9)=l; + BODY_00_15( 8,E,T,A,B,C,D,X( 8)); HOST_c2l(data,l); X(10)=l; + BODY_00_15( 9,D,E,T,A,B,C,X( 9)); HOST_c2l(data,l); X(11)=l; + BODY_00_15(10,C,D,E,T,A,B,X(10)); HOST_c2l(data,l); X(12)=l; + BODY_00_15(11,B,C,D,E,T,A,X(11)); HOST_c2l(data,l); X(13)=l; + BODY_00_15(12,A,B,C,D,E,T,X(12)); HOST_c2l(data,l); X(14)=l; + BODY_00_15(13,T,A,B,C,D,E,X(13)); HOST_c2l(data,l); X(15)=l; + BODY_00_15(14,E,T,A,B,C,D,X(14)); + BODY_00_15(15,D,E,T,A,B,C,X(15)); + } + + BODY_16_19(16,C,D,E,T,A,B,X( 0),X( 0),X( 2),X( 8),X(13)); + BODY_16_19(17,B,C,D,E,T,A,X( 1),X( 1),X( 3),X( 9),X(14)); + BODY_16_19(18,A,B,C,D,E,T,X( 2),X( 2),X( 4),X(10),X(15)); + BODY_16_19(19,T,A,B,C,D,E,X( 3),X( 3),X( 5),X(11),X( 0)); + + BODY_20_31(20,E,T,A,B,C,D,X( 4),X( 4),X( 6),X(12),X( 1)); + BODY_20_31(21,D,E,T,A,B,C,X( 5),X( 5),X( 7),X(13),X( 2)); + BODY_20_31(22,C,D,E,T,A,B,X( 6),X( 6),X( 8),X(14),X( 3)); + BODY_20_31(23,B,C,D,E,T,A,X( 7),X( 7),X( 9),X(15),X( 4)); + BODY_20_31(24,A,B,C,D,E,T,X( 8),X( 8),X(10),X( 0),X( 5)); + BODY_20_31(25,T,A,B,C,D,E,X( 9),X( 9),X(11),X( 1),X( 6)); + BODY_20_31(26,E,T,A,B,C,D,X(10),X(10),X(12),X( 2),X( 7)); + BODY_20_31(27,D,E,T,A,B,C,X(11),X(11),X(13),X( 3),X( 8)); + BODY_20_31(28,C,D,E,T,A,B,X(12),X(12),X(14),X( 4),X( 9)); + BODY_20_31(29,B,C,D,E,T,A,X(13),X(13),X(15),X( 5),X(10)); + BODY_20_31(30,A,B,C,D,E,T,X(14),X(14),X( 0),X( 6),X(11)); + BODY_20_31(31,T,A,B,C,D,E,X(15),X(15),X( 1),X( 7),X(12)); + + BODY_32_39(32,E,T,A,B,C,D,X( 0),X( 2),X( 8),X(13)); + BODY_32_39(33,D,E,T,A,B,C,X( 1),X( 3),X( 9),X(14)); + BODY_32_39(34,C,D,E,T,A,B,X( 2),X( 4),X(10),X(15)); + BODY_32_39(35,B,C,D,E,T,A,X( 3),X( 5),X(11),X( 0)); + BODY_32_39(36,A,B,C,D,E,T,X( 4),X( 6),X(12),X( 1)); + BODY_32_39(37,T,A,B,C,D,E,X( 5),X( 7),X(13),X( 2)); + BODY_32_39(38,E,T,A,B,C,D,X( 6),X( 8),X(14),X( 3)); + BODY_32_39(39,D,E,T,A,B,C,X( 7),X( 9),X(15),X( 4)); + + BODY_40_59(40,C,D,E,T,A,B,X( 8),X(10),X( 0),X( 5)); + BODY_40_59(41,B,C,D,E,T,A,X( 9),X(11),X( 1),X( 6)); + BODY_40_59(42,A,B,C,D,E,T,X(10),X(12),X( 2),X( 7)); + BODY_40_59(43,T,A,B,C,D,E,X(11),X(13),X( 3),X( 8)); + BODY_40_59(44,E,T,A,B,C,D,X(12),X(14),X( 4),X( 9)); + BODY_40_59(45,D,E,T,A,B,C,X(13),X(15),X( 5),X(10)); + BODY_40_59(46,C,D,E,T,A,B,X(14),X( 0),X( 6),X(11)); + BODY_40_59(47,B,C,D,E,T,A,X(15),X( 1),X( 7),X(12)); + BODY_40_59(48,A,B,C,D,E,T,X( 0),X( 2),X( 8),X(13)); + BODY_40_59(49,T,A,B,C,D,E,X( 1),X( 3),X( 9),X(14)); + BODY_40_59(50,E,T,A,B,C,D,X( 2),X( 4),X(10),X(15)); + BODY_40_59(51,D,E,T,A,B,C,X( 3),X( 5),X(11),X( 0)); + BODY_40_59(52,C,D,E,T,A,B,X( 4),X( 6),X(12),X( 1)); + BODY_40_59(53,B,C,D,E,T,A,X( 5),X( 7),X(13),X( 2)); + BODY_40_59(54,A,B,C,D,E,T,X( 6),X( 8),X(14),X( 3)); + BODY_40_59(55,T,A,B,C,D,E,X( 7),X( 9),X(15),X( 4)); + BODY_40_59(56,E,T,A,B,C,D,X( 8),X(10),X( 0),X( 5)); + BODY_40_59(57,D,E,T,A,B,C,X( 9),X(11),X( 1),X( 6)); + BODY_40_59(58,C,D,E,T,A,B,X(10),X(12),X( 2),X( 7)); + BODY_40_59(59,B,C,D,E,T,A,X(11),X(13),X( 3),X( 8)); + + BODY_60_79(60,A,B,C,D,E,T,X(12),X(14),X( 4),X( 9)); + BODY_60_79(61,T,A,B,C,D,E,X(13),X(15),X( 5),X(10)); + BODY_60_79(62,E,T,A,B,C,D,X(14),X( 0),X( 6),X(11)); + BODY_60_79(63,D,E,T,A,B,C,X(15),X( 1),X( 7),X(12)); + BODY_60_79(64,C,D,E,T,A,B,X( 0),X( 2),X( 8),X(13)); + BODY_60_79(65,B,C,D,E,T,A,X( 1),X( 3),X( 9),X(14)); + BODY_60_79(66,A,B,C,D,E,T,X( 2),X( 4),X(10),X(15)); + BODY_60_79(67,T,A,B,C,D,E,X( 3),X( 5),X(11),X( 0)); + BODY_60_79(68,E,T,A,B,C,D,X( 4),X( 6),X(12),X( 1)); + BODY_60_79(69,D,E,T,A,B,C,X( 5),X( 7),X(13),X( 2)); + BODY_60_79(70,C,D,E,T,A,B,X( 6),X( 8),X(14),X( 3)); + BODY_60_79(71,B,C,D,E,T,A,X( 7),X( 9),X(15),X( 4)); + BODY_60_79(72,A,B,C,D,E,T,X( 8),X(10),X( 0),X( 5)); + BODY_60_79(73,T,A,B,C,D,E,X( 9),X(11),X( 1),X( 6)); + BODY_60_79(74,E,T,A,B,C,D,X(10),X(12),X( 2),X( 7)); + BODY_60_79(75,D,E,T,A,B,C,X(11),X(13),X( 3),X( 8)); + BODY_60_79(76,C,D,E,T,A,B,X(12),X(14),X( 4),X( 9)); + BODY_60_79(77,B,C,D,E,T,A,X(13),X(15),X( 5),X(10)); + BODY_60_79(78,A,B,C,D,E,T,X(14),X( 0),X( 6),X(11)); + BODY_60_79(79,T,A,B,C,D,E,X(15),X( 1),X( 7),X(12)); + + c->h0=(c->h0+E)&0xffffffffL; + c->h1=(c->h1+T)&0xffffffffL; + c->h2=(c->h2+A)&0xffffffffL; + c->h3=(c->h3+B)&0xffffffffL; + c->h4=(c->h4+C)&0xffffffffL; + + if (--num == 0) break; + + A=c->h0; + B=c->h1; + C=c->h2; + D=c->h3; + E=c->h4; + + } + } +#endif + +#else /* OPENSSL_SMALL_FOOTPRINT */ + +#define BODY_00_15(xi) do { \ + T=E+K_00_19+F_00_19(B,C,D); \ + E=D, D=C, C=ROTATE(B,30), B=A; \ + A=ROTATE(A,5)+T+xi; } while(0) + +#define BODY_16_19(xa,xb,xc,xd) do { \ + Xupdate(T,xa,xa,xb,xc,xd); \ + T+=E+K_00_19+F_00_19(B,C,D); \ + E=D, D=C, C=ROTATE(B,30), B=A; \ + A=ROTATE(A,5)+T; } while(0) + +#define BODY_20_39(xa,xb,xc,xd) do { \ + Xupdate(T,xa,xa,xb,xc,xd); \ + T+=E+K_20_39+F_20_39(B,C,D); \ + E=D, D=C, C=ROTATE(B,30), B=A; \ + A=ROTATE(A,5)+T; } while(0) + +#define BODY_40_59(xa,xb,xc,xd) do { \ + Xupdate(T,xa,xa,xb,xc,xd); \ + T+=E+K_40_59+F_40_59(B,C,D); \ + E=D, D=C, C=ROTATE(B,30), B=A; \ + A=ROTATE(A,5)+T; } while(0) + +#define BODY_60_79(xa,xb,xc,xd) do { \ + Xupdate(T,xa,xa,xb,xc,xd); \ + T=E+K_60_79+F_60_79(B,C,D); \ + E=D, D=C, C=ROTATE(B,30), B=A; \ + A=ROTATE(A,5)+T+xa; } while(0) + +#if !defined(SHA1_ASM) +static void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, size_t num) + { + const unsigned char *data=p; + unsigned MD32_REG_T A,B,C,D,E,T,l; + int i; + SHA_LONG X[16]; + + A=c->h0; + B=c->h1; + C=c->h2; + D=c->h3; + E=c->h4; + + for (;;) + { + for (i=0;i<16;i++) + { HOST_c2l(data,l); X[i]=l; BODY_00_15(X[i]); } + for (i=0;i<4;i++) + { BODY_16_19(X[i], X[i+2], X[i+8], X[(i+13)&15]); } + for (;i<24;i++) + { BODY_20_39(X[i&15], X[(i+2)&15], X[(i+8)&15],X[(i+13)&15]); } + for (i=0;i<20;i++) + { BODY_40_59(X[(i+8)&15],X[(i+10)&15],X[i&15], X[(i+5)&15]); } + for (i=4;i<24;i++) + { BODY_60_79(X[(i+8)&15],X[(i+10)&15],X[i&15], X[(i+5)&15]); } + + c->h0=(c->h0+A)&0xffffffffL; + c->h1=(c->h1+B)&0xffffffffL; + c->h2=(c->h2+C)&0xffffffffL; + c->h3=(c->h3+D)&0xffffffffL; + c->h4=(c->h4+E)&0xffffffffL; + + if (--num == 0) break; + + A=c->h0; + B=c->h1; + C=c->h2; + D=c->h3; + E=c->h4; + + } + } +#endif + +#endif -- cgit v1.2.3