summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
Diffstat (limited to 'ext')
-rw-r--r--ext/Makefile13
-rw-r--r--ext/aes/Makefile15
-rw-r--r--ext/aes/aes.c492
-rw-r--r--ext/aes/aes.h37
-rw-r--r--ext/blowfish/Makefile15
-rw-r--r--ext/blowfish/blowfish.c539
-rw-r--r--ext/blowfish/blowfish.h38
-rw-r--r--ext/crypto_obj.mk3
-rw-r--r--ext/libressl/Makefile12
-rw-r--r--ext/libressl/README1
-rw-r--r--ext/libressl/crypto/aead/Makefile13
-rw-r--r--ext/libressl/crypto/aead/e_chacha20poly1305.c395
-rw-r--r--ext/libressl/crypto/aes/Makefile14
-rw-r--r--ext/libressl/crypto/aes/aes-elf-armv4.S1074
-rw-r--r--ext/libressl/crypto/aes/aes-elf-x86_64.S2547
-rw-r--r--ext/libressl/crypto/aes/aes-macosx-x86_64.S2544
-rw-r--r--ext/libressl/crypto/aes/aes-masm-x86_64.S2948
-rw-r--r--ext/libressl/crypto/aes/aes-mingw64-x86_64.S2861
-rw-r--r--ext/libressl/crypto/aes/aes_cbc.c65
-rw-r--r--ext/libressl/crypto/aes/aes_cfb.c84
-rw-r--r--ext/libressl/crypto/aes/aes_core.c1374
-rw-r--r--ext/libressl/crypto/aes/aes_ctr.c62
-rw-r--r--ext/libressl/crypto/aes/aes_ecb.c69
-rw-r--r--ext/libressl/crypto/aes/aes_ige.c194
-rw-r--r--ext/libressl/crypto/aes/aes_locl.h83
-rw-r--r--ext/libressl/crypto/aes/aes_misc.c65
-rw-r--r--ext/libressl/crypto/aes/aes_ofb.c61
-rw-r--r--ext/libressl/crypto/aes/aes_wrap.c133
-rw-r--r--ext/libressl/crypto/aes/aesni-elf-x86_64.S2539
-rw-r--r--ext/libressl/crypto/aes/aesni-macosx-x86_64.S2536
-rw-r--r--ext/libressl/crypto/aes/aesni-masm-x86_64.S3099
-rw-r--r--ext/libressl/crypto/aes/aesni-mingw64-x86_64.S3008
-rw-r--r--ext/libressl/crypto/aes/aesni-sha1-elf-x86_64.S1401
-rw-r--r--ext/libressl/crypto/aes/aesni-sha1-macosx-x86_64.S1398
-rw-r--r--ext/libressl/crypto/aes/aesni-sha1-masm-x86_64.S1616
-rw-r--r--ext/libressl/crypto/aes/aesni-sha1-mingw64-x86_64.S1536
-rw-r--r--ext/libressl/crypto/aes/bsaes-elf-x86_64.S2502
-rw-r--r--ext/libressl/crypto/aes/bsaes-macosx-x86_64.S2499
-rw-r--r--ext/libressl/crypto/aes/bsaes-masm-x86_64.S2803
-rw-r--r--ext/libressl/crypto/aes/bsaes-mingw64-x86_64.S2725
-rw-r--r--ext/libressl/crypto/aes/vpaes-elf-x86_64.S832
-rw-r--r--ext/libressl/crypto/aes/vpaes-macosx-x86_64.S829
-rw-r--r--ext/libressl/crypto/aes/vpaes-masm-x86_64.S1213
-rw-r--r--ext/libressl/crypto/aes/vpaes-mingw64-x86_64.S1125
-rw-r--r--ext/libressl/crypto/bf/Makefile14
-rw-r--r--ext/libressl/crypto/bf/bf_cfb64.c121
-rw-r--r--ext/libressl/crypto/bf/bf_ecb.c94
-rw-r--r--ext/libressl/crypto/bf/bf_enc.c306
-rw-r--r--ext/libressl/crypto/bf/bf_locl.h219
-rw-r--r--ext/libressl/crypto/bf/bf_ofb64.c110
-rw-r--r--ext/libressl/crypto/bf/bf_pi.h328
-rw-r--r--ext/libressl/crypto/bf/bf_skey.c117
-rw-r--r--ext/libressl/crypto/chacha/Makefile13
-rw-r--r--ext/libressl/crypto/chacha/chacha-merged.c325
-rw-r--r--ext/libressl/crypto/chacha/chacha.c87
-rw-r--r--ext/libressl/crypto/compat/Makefile13
-rw-r--r--ext/libressl/crypto/compat/arc4random.c216
-rw-r--r--ext/libressl/crypto/compat/arc4random.h41
-rw-r--r--ext/libressl/crypto/compat/arc4random_aix.h81
-rw-r--r--ext/libressl/crypto/compat/arc4random_fe310.h27
-rw-r--r--ext/libressl/crypto/compat/arc4random_freebsd.h87
-rw-r--r--ext/libressl/crypto/compat/arc4random_hpux.h81
-rw-r--r--ext/libressl/crypto/compat/arc4random_linux.h88
-rw-r--r--ext/libressl/crypto/compat/arc4random_netbsd.h87
-rw-r--r--ext/libressl/crypto/compat/arc4random_osx.h81
-rw-r--r--ext/libressl/crypto/compat/arc4random_solaris.h81
-rw-r--r--ext/libressl/crypto/compat/arc4random_uniform.c56
-rw-r--r--ext/libressl/crypto/compat/arc4random_win.h78
-rw-r--r--ext/libressl/crypto/compat/chacha_private.h222
-rw-r--r--ext/libressl/crypto/compat/explicit_bzero.c19
-rw-r--r--ext/libressl/crypto/compat/explicit_bzero_win.c13
-rw-r--r--ext/libressl/crypto/compat/getentropy_aix.c402
-rw-r--r--ext/libressl/crypto/compat/getentropy_freebsd.c60
-rw-r--r--ext/libressl/crypto/compat/getentropy_hpux.c396
-rw-r--r--ext/libressl/crypto/compat/getentropy_linux.c525
-rw-r--r--ext/libressl/crypto/compat/getentropy_netbsd.c62
-rw-r--r--ext/libressl/crypto/compat/getentropy_osx.c417
-rw-r--r--ext/libressl/crypto/compat/getentropy_solaris.c422
-rw-r--r--ext/libressl/crypto/compat/getentropy_win.c50
-rw-r--r--ext/libressl/crypto/compat/timingsafe_bcmp.c29
-rw-r--r--ext/libressl/crypto/compat/timingsafe_memcmp.c46
-rw-r--r--ext/libressl/crypto/curve25519/Makefile14
-rw-r--r--ext/libressl/crypto/curve25519/curve25519-generic.c34
-rw-r--r--ext/libressl/crypto/curve25519/curve25519.c4935
-rw-r--r--ext/libressl/crypto/curve25519/curve25519_internal.h99
-rw-r--r--ext/libressl/crypto/modes/Makefile14
-rw-r--r--ext/libressl/crypto/modes/cbc128.c202
-rw-r--r--ext/libressl/crypto/modes/ccm128.c441
-rw-r--r--ext/libressl/crypto/modes/cfb128.c234
-rw-r--r--ext/libressl/crypto/modes/ctr128.c251
-rw-r--r--ext/libressl/crypto/modes/cts128.c267
-rw-r--r--ext/libressl/crypto/modes/gcm128.c1566
-rw-r--r--ext/libressl/crypto/modes/ghash-elf-armv4.S412
-rw-r--r--ext/libressl/crypto/modes/ghash-elf-x86_64.S1030
-rw-r--r--ext/libressl/crypto/modes/ghash-macosx-x86_64.S1027
-rw-r--r--ext/libressl/crypto/modes/ghash-masm-x86_64.S1256
-rw-r--r--ext/libressl/crypto/modes/ghash-mingw64-x86_64.S1175
-rw-r--r--ext/libressl/crypto/modes/modes_lcl.h113
-rw-r--r--ext/libressl/crypto/modes/ofb128.c119
-rw-r--r--ext/libressl/crypto/modes/xts128.c185
-rw-r--r--ext/libressl/crypto/poly1305/Makefile13
-rw-r--r--ext/libressl/crypto/poly1305/poly1305-donna.c321
-rw-r--r--ext/libressl/crypto/poly1305/poly1305.c38
-rw-r--r--ext/libressl/crypto/sha/Makefile14
-rw-r--r--ext/libressl/crypto/sha/sha1-elf-armv4.S455
-rw-r--r--ext/libressl/crypto/sha/sha1-elf-x86_64.S2491
-rw-r--r--ext/libressl/crypto/sha/sha1-macosx-x86_64.S2488
-rw-r--r--ext/libressl/crypto/sha/sha1-masm-x86_64.S2746
-rw-r--r--ext/libressl/crypto/sha/sha1-mingw64-x86_64.S2664
-rw-r--r--ext/libressl/crypto/sha/sha1_one.c81
-rw-r--r--ext/libressl/crypto/sha/sha1dgst.c72
-rw-r--r--ext/libressl/crypto/sha/sha256-elf-armv4.S1520
-rw-r--r--ext/libressl/crypto/sha/sha256-elf-x86_64.S1782
-rw-r--r--ext/libressl/crypto/sha/sha256-macosx-x86_64.S1779
-rw-r--r--ext/libressl/crypto/sha/sha256-masm-x86_64.S1864
-rw-r--r--ext/libressl/crypto/sha/sha256-mingw64-x86_64.S1790
-rw-r--r--ext/libressl/crypto/sha/sha256.c284
-rw-r--r--ext/libressl/crypto/sha/sha512-elf-armv4.S1786
-rw-r--r--ext/libressl/crypto/sha/sha512-elf-x86_64.S1806
-rw-r--r--ext/libressl/crypto/sha/sha512-macosx-x86_64.S1803
-rw-r--r--ext/libressl/crypto/sha/sha512-masm-x86_64.S1888
-rw-r--r--ext/libressl/crypto/sha/sha512-mingw64-x86_64.S1814
-rw-r--r--ext/libressl/crypto/sha/sha512.c547
-rw-r--r--ext/libressl/crypto/sha/sha_locl.h419
-rw-r--r--ext/libressl/include/compat/string.h87
-rw-r--r--ext/libressl/include/compat/unistd.h78
-rw-r--r--ext/libressl/include/md32_common.h345
-rw-r--r--ext/libressl/include/openssl/aes.h126
-rw-r--r--ext/libressl/include/openssl/blowfish.h112
-rw-r--r--ext/libressl/include/openssl/chacha.h58
-rw-r--r--ext/libressl/include/openssl/crypto.h12
-rw-r--r--ext/libressl/include/openssl/curve25519.h77
-rw-r--r--ext/libressl/include/openssl/modes.h144
-rw-r--r--ext/libressl/include/openssl/opensslconf.h153
-rw-r--r--ext/libressl/include/openssl/opensslfeatures.h120
-rw-r--r--ext/libressl/include/openssl/opensslv.h18
-rw-r--r--ext/libressl/include/openssl/poly1305.h49
-rw-r--r--ext/libressl/include/openssl/sha.h192
-rw-r--r--ext/libressl/ssl_common.mk7
-rw-r--r--ext/libressl/ssl_obj.mk8
-rw-r--r--ext/sha/Makefile15
-rw-r--r--ext/sha/sha1.c294
-rw-r--r--ext/sha/sha1.h44
143 files changed, 98604 insertions, 0 deletions
diff --git a/ext/Makefile b/ext/Makefile
new file mode 100644
index 0000000..1168470
--- /dev/null
+++ b/ext/Makefile
@@ -0,0 +1,13 @@
+include common.mk
+include crypto_obj.mk
+
+all:
+ for i in $(subdirs); do \
+ (cd $$i && $(MAKE)) || exit; \
+ done
+
+clean:
+ for i in $(subdirs); do \
+ (cd $$i && $(MAKE) clean) || exit; \
+ done
+ rm -f *.o *.a
diff --git a/ext/aes/Makefile b/ext/aes/Makefile
new file mode 100644
index 0000000..463aed3
--- /dev/null
+++ b/ext/aes/Makefile
@@ -0,0 +1,15 @@
+include common.mk
+
+obj = aes.o
+
+
+%.o: %.c %.h
+ $(CC) $(CFLAGS) -c $<
+
+%.o: %.S
+ $(CC) $(CFLAGS) -c $<
+
+all: $(obj)
+
+clean:
+ rm -f *.o
diff --git a/ext/aes/aes.c b/ext/aes/aes.c
new file mode 100644
index 0000000..fc74d8d
--- /dev/null
+++ b/ext/aes/aes.c
@@ -0,0 +1,492 @@
+/*
+
+This is an implementation of the AES algorithm, specifically ECB, CTR and CBC mode.
+Block size can be chosen in aes.h - available choices are AES128, AES192, AES256.
+
+The implementation is verified against the test vectors in:
+ National Institute of Standards and Technology Special Publication 800-38A 2001 ED
+
+ECB-AES128
+----------
+
+ plain-text:
+ 6bc1bee22e409f96e93d7e117393172a
+ ae2d8a571e03ac9c9eb76fac45af8e51
+ 30c81c46a35ce411e5fbc1191a0a52ef
+ f69f2445df4f9b17ad2b417be66c3710
+
+ key:
+ 2b7e151628aed2a6abf7158809cf4f3c
+
+ resulting cipher
+ 3ad77bb40d7a3660a89ecaf32466ef97
+ f5d3d58503b9699de785895a96fdbaaf
+ 43b1cd7f598ece23881b00e3ed030688
+ 7b0c785e27e8ad3f8223207104725dd4
+
+
+NOTE: String length must be evenly divisible by 16byte (str_len % 16 == 0)
+ You should pad the end of the string with zeros if this is not the case.
+ For AES192/256 the key size is proportionally larger.
+
+*/
+
+
+/*****************************************************************************/
+/* Includes: */
+/*****************************************************************************/
+#include <string.h> // CBC mode, for memset
+#include "aes.h"
+
+/*****************************************************************************/
+/* Defines: */
+/*****************************************************************************/
+// The number of columns comprising a state in AES. This is a constant in AES. Value=4
+#define Nb 4
+
+#if defined(AES256) && (AES256 == 1)
+ #define Nk 8
+ #define Nr 14
+#elif defined(AES192) && (AES192 == 1)
+ #define Nk 6
+ #define Nr 12
+#else
+ #define Nk 4 // The number of 32 bit words in a key.
+ #define Nr 10 // The number of rounds in AES Cipher.
+#endif
+
+// jcallan@github points out that declaring Multiply as a function
+// reduces code size considerably with the Keil ARM compiler.
+// See this link for more information: https://github.com/kokke/tiny-AES-C/pull/3
+#ifndef MULTIPLY_AS_A_FUNCTION
+ #define MULTIPLY_AS_A_FUNCTION 0
+#endif
+
+
+
+
+/*****************************************************************************/
+/* Private variables: */
+/*****************************************************************************/
+// state - array holding the intermediate results during decryption.
+typedef uint8_t state_t[4][4];
+
+
+
+// The lookup-tables are marked const so they can be placed in read-only storage instead of RAM
+// The numbers below can be computed dynamically trading ROM for RAM -
+// This can be useful in (embedded) bootloader applications, where ROM is often limited.
+static const uint8_t sbox[256] = {
+ //0 1 2 3 4 5 6 7 8 9 A B C D E F
+ 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+ 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+ 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+ 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+ 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+ 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+ 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+ 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+ 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+ 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+ 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+ 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+ 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+ 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+ 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+ 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 };
+
+static const uint8_t rsbox[256] = {
+ 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+ 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+ 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+ 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+ 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+ 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+ 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+ 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+ 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+ 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+ 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+ 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+ 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+ 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+ 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+ 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d };
+
+// The round constant word array, rcon[i], contains the values given by
+// x to the power (i-1) being powers of x (x is denoted as {02}) in the field GF(2^8)
+static const uint8_t rcon[11] = {
+ 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 };
+
+/*
+ * Jordan Goulder points out in PR #12 (https://github.com/kokke/tiny-AES-C/pull/12),
+ * that you can remove most of the elements in the rcon array, because they are unused.
+ *
+ * From Wikipedia's article on the Rijndael key schedule @ https://en.wikipedia.org/wiki/Rijndael_key_schedule#rcon
+ *
+ * "Only the first some of these constants are actually used – up to rcon[10] for AES-128 (as 11 round keys are needed),
+ * up to rcon[8] for AES-192, up to rcon[7] for AES-256. rcon[0] is not used in AES algorithm."
+ */
+
+
+/*****************************************************************************/
+/* Private functions: */
+/*****************************************************************************/
+/*
+static uint8_t getSBoxValue(uint8_t num)
+{
+ return sbox[num];
+}
+*/
+#define getSBoxValue(num) (sbox[(num)])
+
+// This function produces Nb(Nr+1) round keys. The round keys are used in each round to decrypt the states.
+static void KeyExpansion(uint8_t* RoundKey, const uint8_t* Key)
+{
+ unsigned i, j, k;
+ uint8_t tempa[4]; // Used for the column/row operations
+
+ // The first round key is the key itself.
+ for (i = 0; i < Nk; ++i)
+ {
+ RoundKey[(i * 4) + 0] = Key[(i * 4) + 0];
+ RoundKey[(i * 4) + 1] = Key[(i * 4) + 1];
+ RoundKey[(i * 4) + 2] = Key[(i * 4) + 2];
+ RoundKey[(i * 4) + 3] = Key[(i * 4) + 3];
+ }
+
+ // All other round keys are found from the previous round keys.
+ for (i = Nk; i < Nb * (Nr + 1); ++i)
+ {
+ {
+ k = (i - 1) * 4;
+ tempa[0]=RoundKey[k + 0];
+ tempa[1]=RoundKey[k + 1];
+ tempa[2]=RoundKey[k + 2];
+ tempa[3]=RoundKey[k + 3];
+
+ }
+
+ if (i % Nk == 0)
+ {
+ // This function shifts the 4 bytes in a word to the left once.
+ // [a0,a1,a2,a3] becomes [a1,a2,a3,a0]
+
+ // Function RotWord()
+ {
+ const uint8_t u8tmp = tempa[0];
+ tempa[0] = tempa[1];
+ tempa[1] = tempa[2];
+ tempa[2] = tempa[3];
+ tempa[3] = u8tmp;
+ }
+
+ // SubWord() is a function that takes a four-byte input word and
+ // applies the S-box to each of the four bytes to produce an output word.
+
+ // Function Subword()
+ {
+ tempa[0] = getSBoxValue(tempa[0]);
+ tempa[1] = getSBoxValue(tempa[1]);
+ tempa[2] = getSBoxValue(tempa[2]);
+ tempa[3] = getSBoxValue(tempa[3]);
+ }
+
+ tempa[0] = tempa[0] ^ rcon[i/Nk];
+ }
+#if defined(AES256) && (AES256 == 1)
+ if (i % Nk == 4)
+ {
+ // Function Subword()
+ {
+ tempa[0] = getSBoxValue(tempa[0]);
+ tempa[1] = getSBoxValue(tempa[1]);
+ tempa[2] = getSBoxValue(tempa[2]);
+ tempa[3] = getSBoxValue(tempa[3]);
+ }
+ }
+#endif
+ j = i * 4; k=(i - Nk) * 4;
+ RoundKey[j + 0] = RoundKey[k + 0] ^ tempa[0];
+ RoundKey[j + 1] = RoundKey[k + 1] ^ tempa[1];
+ RoundKey[j + 2] = RoundKey[k + 2] ^ tempa[2];
+ RoundKey[j + 3] = RoundKey[k + 3] ^ tempa[3];
+ }
+}
+
+// This function adds the round key to state.
+// The round key is added to the state by an XOR function.
+static void AddRoundKey(uint8_t round, state_t* state, const uint8_t* RoundKey)
+{
+ uint8_t i,j;
+ for (i = 0; i < 4; ++i)
+ {
+ for (j = 0; j < 4; ++j)
+ {
+ (*state)[i][j] ^= RoundKey[(round * Nb * 4) + (i * Nb) + j];
+ }
+ }
+}
+
+// The SubBytes Function Substitutes the values in the
+// state matrix with values in an S-box.
+static void SubBytes(state_t* state)
+{
+ uint8_t i, j;
+ for (i = 0; i < 4; ++i)
+ {
+ for (j = 0; j < 4; ++j)
+ {
+ (*state)[j][i] = getSBoxValue((*state)[j][i]);
+ }
+ }
+}
+
+// The ShiftRows() function shifts the rows in the state to the left.
+// Each row is shifted with different offset.
+// Offset = Row number. So the first row is not shifted.
+static void ShiftRows(state_t* state)
+{
+ uint8_t temp;
+
+ // Rotate first row 1 columns to left
+ temp = (*state)[0][1];
+ (*state)[0][1] = (*state)[1][1];
+ (*state)[1][1] = (*state)[2][1];
+ (*state)[2][1] = (*state)[3][1];
+ (*state)[3][1] = temp;
+
+ // Rotate second row 2 columns to left
+ temp = (*state)[0][2];
+ (*state)[0][2] = (*state)[2][2];
+ (*state)[2][2] = temp;
+
+ temp = (*state)[1][2];
+ (*state)[1][2] = (*state)[3][2];
+ (*state)[3][2] = temp;
+
+ // Rotate third row 3 columns to left
+ temp = (*state)[0][3];
+ (*state)[0][3] = (*state)[3][3];
+ (*state)[3][3] = (*state)[2][3];
+ (*state)[2][3] = (*state)[1][3];
+ (*state)[1][3] = temp;
+}
+
+static uint8_t xtime(uint8_t x)
+{
+ return ((x<<1) ^ (((x>>7) & 1) * 0x1b));
+}
+
+// MixColumns function mixes the columns of the state matrix
+static void MixColumns(state_t* state)
+{
+ uint8_t i;
+ uint8_t Tmp, Tm, t;
+ for (i = 0; i < 4; ++i)
+ {
+ t = (*state)[i][0];
+ Tmp = (*state)[i][0] ^ (*state)[i][1] ^ (*state)[i][2] ^ (*state)[i][3] ;
+ Tm = (*state)[i][0] ^ (*state)[i][1] ; Tm = xtime(Tm); (*state)[i][0] ^= Tm ^ Tmp ;
+ Tm = (*state)[i][1] ^ (*state)[i][2] ; Tm = xtime(Tm); (*state)[i][1] ^= Tm ^ Tmp ;
+ Tm = (*state)[i][2] ^ (*state)[i][3] ; Tm = xtime(Tm); (*state)[i][2] ^= Tm ^ Tmp ;
+ Tm = (*state)[i][3] ^ t ; Tm = xtime(Tm); (*state)[i][3] ^= Tm ^ Tmp ;
+ }
+}
+
+// Multiply is used to multiply numbers in the field GF(2^8)
+// Note: The last call to xtime() is unneeded, but often ends up generating a smaller binary
+// The compiler seems to be able to vectorize the operation better this way.
+// See https://github.com/kokke/tiny-AES-c/pull/34
+#if MULTIPLY_AS_A_FUNCTION
+static uint8_t Multiply(uint8_t x, uint8_t y)
+{
+ return (((y & 1) * x) ^
+ ((y>>1 & 1) * xtime(x)) ^
+ ((y>>2 & 1) * xtime(xtime(x))) ^
+ ((y>>3 & 1) * xtime(xtime(xtime(x)))) ^
+ ((y>>4 & 1) * xtime(xtime(xtime(xtime(x)))))); /* this last call to xtime() can be omitted */
+ }
+#else
+#define Multiply(x, y) \
+ ( ((y & 1) * x) ^ \
+ ((y>>1 & 1) * xtime(x)) ^ \
+ ((y>>2 & 1) * xtime(xtime(x))) ^ \
+ ((y>>3 & 1) * xtime(xtime(xtime(x)))) ^ \
+ ((y>>4 & 1) * xtime(xtime(xtime(xtime(x)))))) \
+
+#endif
+
+/*
+static uint8_t getSBoxInvert(uint8_t num)
+{
+ return rsbox[num];
+}
+*/
+#define getSBoxInvert(num) (rsbox[(num)])
+
+// MixColumns function mixes the columns of the state matrix.
+// The method used to multiply may be difficult to understand for the inexperienced.
+// Please use the references to gain more information.
+static void InvMixColumns(state_t* state)
+{
+ int i;
+ uint8_t a, b, c, d;
+ for (i = 0; i < 4; ++i)
+ {
+ a = (*state)[i][0];
+ b = (*state)[i][1];
+ c = (*state)[i][2];
+ d = (*state)[i][3];
+
+ (*state)[i][0] = Multiply(a, 0x0e) ^ Multiply(b, 0x0b) ^ Multiply(c, 0x0d) ^ Multiply(d, 0x09);
+ (*state)[i][1] = Multiply(a, 0x09) ^ Multiply(b, 0x0e) ^ Multiply(c, 0x0b) ^ Multiply(d, 0x0d);
+ (*state)[i][2] = Multiply(a, 0x0d) ^ Multiply(b, 0x09) ^ Multiply(c, 0x0e) ^ Multiply(d, 0x0b);
+ (*state)[i][3] = Multiply(a, 0x0b) ^ Multiply(b, 0x0d) ^ Multiply(c, 0x09) ^ Multiply(d, 0x0e);
+ }
+}
+
+
+// The SubBytes Function Substitutes the values in the
+// state matrix with values in an S-box.
+static void InvSubBytes(state_t* state)
+{
+ uint8_t i, j;
+ for (i = 0; i < 4; ++i)
+ {
+ for (j = 0; j < 4; ++j)
+ {
+ (*state)[j][i] = getSBoxInvert((*state)[j][i]);
+ }
+ }
+}
+
+static void InvShiftRows(state_t* state)
+{
+ uint8_t temp;
+
+ // Rotate first row 1 columns to right
+ temp = (*state)[3][1];
+ (*state)[3][1] = (*state)[2][1];
+ (*state)[2][1] = (*state)[1][1];
+ (*state)[1][1] = (*state)[0][1];
+ (*state)[0][1] = temp;
+
+ // Rotate second row 2 columns to right
+ temp = (*state)[0][2];
+ (*state)[0][2] = (*state)[2][2];
+ (*state)[2][2] = temp;
+
+ temp = (*state)[1][2];
+ (*state)[1][2] = (*state)[3][2];
+ (*state)[3][2] = temp;
+
+ // Rotate third row 3 columns to right
+ temp = (*state)[0][3];
+ (*state)[0][3] = (*state)[1][3];
+ (*state)[1][3] = (*state)[2][3];
+ (*state)[2][3] = (*state)[3][3];
+ (*state)[3][3] = temp;
+}
+
+// Cipher is the main function that encrypts the PlainText.
+static void Cipher(state_t* state, const uint8_t* RoundKey)
+{
+ uint8_t round = 0;
+
+ // Add the First round key to the state before starting the rounds.
+ AddRoundKey(0, state, RoundKey);
+
+ // There will be Nr rounds.
+ // The first Nr-1 rounds are identical.
+ // These Nr rounds are executed in the loop below.
+ // Last one without MixColumns()
+ for (round = 1; ; ++round)
+ {
+ SubBytes(state);
+ ShiftRows(state);
+ if (round == Nr) {
+ break;
+ }
+ MixColumns(state);
+ AddRoundKey(round, state, RoundKey);
+ }
+ // Add round key to last round
+ AddRoundKey(Nr, state, RoundKey);
+}
+
+static void InvCipher(state_t* state, const uint8_t* RoundKey)
+{
+ uint8_t round = 0;
+
+ // Add the First round key to the state before starting the rounds.
+ AddRoundKey(Nr, state, RoundKey);
+
+ // There will be Nr rounds.
+ // The first Nr-1 rounds are identical.
+ // These Nr rounds are executed in the loop below.
+ // Last one without InvMixColumn()
+ for (round = (Nr - 1); ; --round)
+ {
+ InvShiftRows(state);
+ InvSubBytes(state);
+ AddRoundKey(round, state, RoundKey);
+ if (round == 0) {
+ break;
+ }
+ InvMixColumns(state);
+ }
+
+}
+
+static void XorBlock(uint8_t* buf, const uint8_t* block)
+{
+ uint8_t i;
+ for (i = 0; i < AES_BLOCKLEN; ++i) // The block in AES is always 128bit no matter the key size
+ {
+ buf[i] ^= block[i];
+ }
+}
+
+/*****************************************************************************/
+/* Public functions: */
+/*****************************************************************************/
+
+void aes_init(AESCtx *ctx, uint8_t *key) {
+ KeyExpansion(ctx->RoundKey, key);
+}
+
+void aes_ecb_encrypt(AESCtx *ctx, uint8_t *buf) {
+ Cipher((state_t*)buf, ctx->RoundKey);
+}
+
+void aes_ecb_decrypt(AESCtx *ctx, uint8_t *buf) {
+ InvCipher((state_t*)buf, ctx->RoundKey);
+}
+
+void aes_cbc_encrypt(AESCtx *ctx, uint8_t *iv, uint8_t *buf, size_t length) {
+ size_t i;
+ uint8_t *block = iv;
+
+ for (i = 0; i < length; i += AES_BLOCKLEN) {
+ XorBlock(buf, block);
+ Cipher((state_t*)buf, ctx->RoundKey);
+ block = buf;
+ buf += AES_BLOCKLEN;
+ }
+}
+
+void aes_cbc_decrypt(AESCtx *ctx, uint8_t *iv, uint8_t *buf, size_t length) {
+ size_t i;
+ uint8_t block[AES_BLOCKLEN];
+ uint8_t block_next[AES_BLOCKLEN];
+
+ memcpy(block, iv, AES_BLOCKLEN);
+ for (i = 0; i < length; i += AES_BLOCKLEN) {
+ memcpy(block_next, buf, AES_BLOCKLEN);
+ InvCipher((state_t*)buf, ctx->RoundKey);
+ XorBlock(buf, block);
+ memcpy(block, block_next, AES_BLOCKLEN);
+ buf += AES_BLOCKLEN;
+ }
+}
diff --git a/ext/aes/aes.h b/ext/aes/aes.h
new file mode 100644
index 0000000..f6dd079
--- /dev/null
+++ b/ext/aes/aes.h
@@ -0,0 +1,37 @@
+#ifndef _AES_H_
+#define _AES_H_
+
+#include <stdint.h>
+#include <stddef.h>
+
+#define AES128 1
+//#define AES192 1
+//#define AES256 1
+
+#define AES_BLOCKLEN 16 // Block length in bytes - AES is 128b block only
+
+#if defined(AES256) && (AES256 == 1)
+ #define AES_KEYLEN 32
+ #define AES_KEYEXPSIZE 240
+#elif defined(AES192) && (AES192 == 1)
+ #define AES_KEYLEN 24
+ #define AES_KEYEXPSIZE 208
+#else
+ #define AES_KEYLEN 16 // Key length in bytes
+ #define AES_KEYEXPSIZE 176
+#endif
+
+typedef struct
+{
+ uint8_t RoundKey[AES_KEYEXPSIZE];
+} AESCtx;
+
+void aes_init(AESCtx *ctx, uint8_t *key);
+// buffer size is exactly AES_BLOCKLEN bytes;
+void aes_ecb_encrypt(AESCtx *ctx, uint8_t *buf);
+void aes_ecb_decrypt(AESCtx *ctx, uint8_t *buf);
+// buffer size MUST be mutile of AES_BLOCKLEN;
+void aes_cbc_encrypt(AESCtx *ctx, uint8_t *iv, uint8_t *buf, size_t length);
+void aes_cbc_decrypt(AESCtx *ctx, uint8_t *iv, uint8_t *buf, size_t length);
+
+#endif \ No newline at end of file
diff --git a/ext/blowfish/Makefile b/ext/blowfish/Makefile
new file mode 100644
index 0000000..fff41a0
--- /dev/null
+++ b/ext/blowfish/Makefile
@@ -0,0 +1,15 @@
+include common.mk
+
+obj = blowfish.o
+
+
+%.o: %.c %.h
+ $(CC) $(CFLAGS) -c $<
+
+%.o: %.S
+ $(CC) $(CFLAGS) -c $<
+
+all: $(obj)
+
+clean:
+ rm -f *.o
diff --git a/ext/blowfish/blowfish.c b/ext/blowfish/blowfish.c
new file mode 100644
index 0000000..ada71d3
--- /dev/null
+++ b/ext/blowfish/blowfish.c
@@ -0,0 +1,539 @@
+/*
+blowfish.c: C implementation of the Blowfish algorithm.
+
+Copyright (C) 1997 by Paul Kocher
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+
+
+COMMENTS ON USING THIS CODE:
+
+Normal usage is as follows:
+ [1] Allocate a BFCtx. (It may be too big for the stack.)
+ [2] Call Blowfish_Init with a pointer to your BFCtx, a pointer to
+ the key, and the number of bytes in the key.
+ [3] To encrypt a 64-bit block, call Blowfish_Encrypt with a pointer to
+ BFCtx, a pointer to the 32-bit left half of the plaintext
+ and a pointer to the 32-bit right half. The plaintext will be
+ overwritten with the ciphertext.
+ [4] Decryption is the same as encryption except that the plaintext and
+ ciphertext are reversed.
+
+Warning #1: The code does not check key lengths. (Caveat encryptor.)
+Warning #2: Beware that Blowfish keys repeat such that "ab" = "abab".
+Warning #3: It is normally a good idea to zeroize the BFCtx before
+ freeing it.
+Warning #4: Endianness conversions are the responsibility of the caller.
+ (To encrypt bytes on a little-endian platforms, you'll probably want
+ to swap bytes around instead of just casting.)
+Warning #5: Make sure to use a reasonable mode of operation for your
+ application. (If you don't know what CBC mode is, see Warning #7.)
+Warning #6: This code is susceptible to timing attacks.
+Warning #7: Security engineering is risky and non-intuitive. Have someone
+ check your work. If you don't know what you are doing, get help.
+
+
+This is code is fast enough for most applications, but is not optimized for
+speed.
+
+If you require this code under a license other than LGPL, please ask. (I
+can be located using your favorite search engine.) Unfortunately, I do not
+have time to provide unpaid support for everyone who uses this code.
+
+ -- Paul Kocher
+*/
+
+#include <string.h>
+#include "blowfish.h"
+
+#define N 16
+
+static const unsigned long ORIG_P[16 + 2] = {
+ 0x243F6A88L, 0x85A308D3L, 0x13198A2EL, 0x03707344L,
+ 0xA4093822L, 0x299F31D0L, 0x082EFA98L, 0xEC4E6C89L,
+ 0x452821E6L, 0x38D01377L, 0xBE5466CFL, 0x34E90C6CL,
+ 0xC0AC29B7L, 0xC97C50DDL, 0x3F84D5B5L, 0xB5470917L,
+ 0x9216D5D9L, 0x8979FB1BL
+};
+
+static const unsigned long ORIG_S[4][256] = {
+ { 0xD1310BA6L, 0x98DFB5ACL, 0x2FFD72DBL, 0xD01ADFB7L,
+ 0xB8E1AFEDL, 0x6A267E96L, 0xBA7C9045L, 0xF12C7F99L,
+ 0x24A19947L, 0xB3916CF7L, 0x0801F2E2L, 0x858EFC16L,
+ 0x636920D8L, 0x71574E69L, 0xA458FEA3L, 0xF4933D7EL,
+ 0x0D95748FL, 0x728EB658L, 0x718BCD58L, 0x82154AEEL,
+ 0x7B54A41DL, 0xC25A59B5L, 0x9C30D539L, 0x2AF26013L,
+ 0xC5D1B023L, 0x286085F0L, 0xCA417918L, 0xB8DB38EFL,
+ 0x8E79DCB0L, 0x603A180EL, 0x6C9E0E8BL, 0xB01E8A3EL,
+ 0xD71577C1L, 0xBD314B27L, 0x78AF2FDAL, 0x55605C60L,
+ 0xE65525F3L, 0xAA55AB94L, 0x57489862L, 0x63E81440L,
+ 0x55CA396AL, 0x2AAB10B6L, 0xB4CC5C34L, 0x1141E8CEL,
+ 0xA15486AFL, 0x7C72E993L, 0xB3EE1411L, 0x636FBC2AL,
+ 0x2BA9C55DL, 0x741831F6L, 0xCE5C3E16L, 0x9B87931EL,
+ 0xAFD6BA33L, 0x6C24CF5CL, 0x7A325381L, 0x28958677L,
+ 0x3B8F4898L, 0x6B4BB9AFL, 0xC4BFE81BL, 0x66282193L,
+ 0x61D809CCL, 0xFB21A991L, 0x487CAC60L, 0x5DEC8032L,
+ 0xEF845D5DL, 0xE98575B1L, 0xDC262302L, 0xEB651B88L,
+ 0x23893E81L, 0xD396ACC5L, 0x0F6D6FF3L, 0x83F44239L,
+ 0x2E0B4482L, 0xA4842004L, 0x69C8F04AL, 0x9E1F9B5EL,
+ 0x21C66842L, 0xF6E96C9AL, 0x670C9C61L, 0xABD388F0L,
+ 0x6A51A0D2L, 0xD8542F68L, 0x960FA728L, 0xAB5133A3L,
+ 0x6EEF0B6CL, 0x137A3BE4L, 0xBA3BF050L, 0x7EFB2A98L,
+ 0xA1F1651DL, 0x39AF0176L, 0x66CA593EL, 0x82430E88L,
+ 0x8CEE8619L, 0x456F9FB4L, 0x7D84A5C3L, 0x3B8B5EBEL,
+ 0xE06F75D8L, 0x85C12073L, 0x401A449FL, 0x56C16AA6L,
+ 0x4ED3AA62L, 0x363F7706L, 0x1BFEDF72L, 0x429B023DL,
+ 0x37D0D724L, 0xD00A1248L, 0xDB0FEAD3L, 0x49F1C09BL,
+ 0x075372C9L, 0x80991B7BL, 0x25D479D8L, 0xF6E8DEF7L,
+ 0xE3FE501AL, 0xB6794C3BL, 0x976CE0BDL, 0x04C006BAL,
+ 0xC1A94FB6L, 0x409F60C4L, 0x5E5C9EC2L, 0x196A2463L,
+ 0x68FB6FAFL, 0x3E6C53B5L, 0x1339B2EBL, 0x3B52EC6FL,
+ 0x6DFC511FL, 0x9B30952CL, 0xCC814544L, 0xAF5EBD09L,
+ 0xBEE3D004L, 0xDE334AFDL, 0x660F2807L, 0x192E4BB3L,
+ 0xC0CBA857L, 0x45C8740FL, 0xD20B5F39L, 0xB9D3FBDBL,
+ 0x5579C0BDL, 0x1A60320AL, 0xD6A100C6L, 0x402C7279L,
+ 0x679F25FEL, 0xFB1FA3CCL, 0x8EA5E9F8L, 0xDB3222F8L,
+ 0x3C7516DFL, 0xFD616B15L, 0x2F501EC8L, 0xAD0552ABL,
+ 0x323DB5FAL, 0xFD238760L, 0x53317B48L, 0x3E00DF82L,
+ 0x9E5C57BBL, 0xCA6F8CA0L, 0x1A87562EL, 0xDF1769DBL,
+ 0xD542A8F6L, 0x287EFFC3L, 0xAC6732C6L, 0x8C4F5573L,
+ 0x695B27B0L, 0xBBCA58C8L, 0xE1FFA35DL, 0xB8F011A0L,
+ 0x10FA3D98L, 0xFD2183B8L, 0x4AFCB56CL, 0x2DD1D35BL,
+ 0x9A53E479L, 0xB6F84565L, 0xD28E49BCL, 0x4BFB9790L,
+ 0xE1DDF2DAL, 0xA4CB7E33L, 0x62FB1341L, 0xCEE4C6E8L,
+ 0xEF20CADAL, 0x36774C01L, 0xD07E9EFEL, 0x2BF11FB4L,
+ 0x95DBDA4DL, 0xAE909198L, 0xEAAD8E71L, 0x6B93D5A0L,
+ 0xD08ED1D0L, 0xAFC725E0L, 0x8E3C5B2FL, 0x8E7594B7L,
+ 0x8FF6E2FBL, 0xF2122B64L, 0x8888B812L, 0x900DF01CL,
+ 0x4FAD5EA0L, 0x688FC31CL, 0xD1CFF191L, 0xB3A8C1ADL,
+ 0x2F2F2218L, 0xBE0E1777L, 0xEA752DFEL, 0x8B021FA1L,
+ 0xE5A0CC0FL, 0xB56F74E8L, 0x18ACF3D6L, 0xCE89E299L,
+ 0xB4A84FE0L, 0xFD13E0B7L, 0x7CC43B81L, 0xD2ADA8D9L,
+ 0x165FA266L, 0x80957705L, 0x93CC7314L, 0x211A1477L,
+ 0xE6AD2065L, 0x77B5FA86L, 0xC75442F5L, 0xFB9D35CFL,
+ 0xEBCDAF0CL, 0x7B3E89A0L, 0xD6411BD3L, 0xAE1E7E49L,
+ 0x00250E2DL, 0x2071B35EL, 0x226800BBL, 0x57B8E0AFL,
+ 0x2464369BL, 0xF009B91EL, 0x5563911DL, 0x59DFA6AAL,
+ 0x78C14389L, 0xD95A537FL, 0x207D5BA2L, 0x02E5B9C5L,
+ 0x83260376L, 0x6295CFA9L, 0x11C81968L, 0x4E734A41L,
+ 0xB3472DCAL, 0x7B14A94AL, 0x1B510052L, 0x9A532915L,
+ 0xD60F573FL, 0xBC9BC6E4L, 0x2B60A476L, 0x81E67400L,
+ 0x08BA6FB5L, 0x571BE91FL, 0xF296EC6BL, 0x2A0DD915L,
+ 0xB6636521L, 0xE7B9F9B6L, 0xFF34052EL, 0xC5855664L,
+ 0x53B02D5DL, 0xA99F8FA1L, 0x08BA4799L, 0x6E85076AL },
+ { 0x4B7A70E9L, 0xB5B32944L, 0xDB75092EL, 0xC4192623L,
+ 0xAD6EA6B0L, 0x49A7DF7DL, 0x9CEE60B8L, 0x8FEDB266L,
+ 0xECAA8C71L, 0x699A17FFL, 0x5664526CL, 0xC2B19EE1L,
+ 0x193602A5L, 0x75094C29L, 0xA0591340L, 0xE4183A3EL,
+ 0x3F54989AL, 0x5B429D65L, 0x6B8FE4D6L, 0x99F73FD6L,
+ 0xA1D29C07L, 0xEFE830F5L, 0x4D2D38E6L, 0xF0255DC1L,
+ 0x4CDD2086L, 0x8470EB26L, 0x6382E9C6L, 0x021ECC5EL,
+ 0x09686B3FL, 0x3EBAEFC9L, 0x3C971814L, 0x6B6A70A1L,
+ 0x687F3584L, 0x52A0E286L, 0xB79C5305L, 0xAA500737L,
+ 0x3E07841CL, 0x7FDEAE5CL, 0x8E7D44ECL, 0x5716F2B8L,
+ 0xB03ADA37L, 0xF0500C0DL, 0xF01C1F04L, 0x0200B3FFL,
+ 0xAE0CF51AL, 0x3CB574B2L, 0x25837A58L, 0xDC0921BDL,
+ 0xD19113F9L, 0x7CA92FF6L, 0x94324773L, 0x22F54701L,
+ 0x3AE5E581L, 0x37C2DADCL, 0xC8B57634L, 0x9AF3DDA7L,
+ 0xA9446146L, 0x0FD0030EL, 0xECC8C73EL, 0xA4751E41L,
+ 0xE238CD99L, 0x3BEA0E2FL, 0x3280BBA1L, 0x183EB331L,
+ 0x4E548B38L, 0x4F6DB908L, 0x6F420D03L, 0xF60A04BFL,
+ 0x2CB81290L, 0x24977C79L, 0x5679B072L, 0xBCAF89AFL,
+ 0xDE9A771FL, 0xD9930810L, 0xB38BAE12L, 0xDCCF3F2EL,
+ 0x5512721FL, 0x2E6B7124L, 0x501ADDE6L, 0x9F84CD87L,
+ 0x7A584718L, 0x7408DA17L, 0xBC9F9ABCL, 0xE94B7D8CL,
+ 0xEC7AEC3AL, 0xDB851DFAL, 0x63094366L, 0xC464C3D2L,
+ 0xEF1C1847L, 0x3215D908L, 0xDD433B37L, 0x24C2BA16L,
+ 0x12A14D43L, 0x2A65C451L, 0x50940002L, 0x133AE4DDL,
+ 0x71DFF89EL, 0x10314E55L, 0x81AC77D6L, 0x5F11199BL,
+ 0x043556F1L, 0xD7A3C76BL, 0x3C11183BL, 0x5924A509L,
+ 0xF28FE6EDL, 0x97F1FBFAL, 0x9EBABF2CL, 0x1E153C6EL,
+ 0x86E34570L, 0xEAE96FB1L, 0x860E5E0AL, 0x5A3E2AB3L,
+ 0x771FE71CL, 0x4E3D06FAL, 0x2965DCB9L, 0x99E71D0FL,
+ 0x803E89D6L, 0x5266C825L, 0x2E4CC978L, 0x9C10B36AL,
+ 0xC6150EBAL, 0x94E2EA78L, 0xA5FC3C53L, 0x1E0A2DF4L,
+ 0xF2F74EA7L, 0x361D2B3DL, 0x1939260FL, 0x19C27960L,
+ 0x5223A708L, 0xF71312B6L, 0xEBADFE6EL, 0xEAC31F66L,
+ 0xE3BC4595L, 0xA67BC883L, 0xB17F37D1L, 0x018CFF28L,
+ 0xC332DDEFL, 0xBE6C5AA5L, 0x65582185L, 0x68AB9802L,
+ 0xEECEA50FL, 0xDB2F953BL, 0x2AEF7DADL, 0x5B6E2F84L,
+ 0x1521B628L, 0x29076170L, 0xECDD4775L, 0x619F1510L,
+ 0x13CCA830L, 0xEB61BD96L, 0x0334FE1EL, 0xAA0363CFL,
+ 0xB5735C90L, 0x4C70A239L, 0xD59E9E0BL, 0xCBAADE14L,
+ 0xEECC86BCL, 0x60622CA7L, 0x9CAB5CABL, 0xB2F3846EL,
+ 0x648B1EAFL, 0x19BDF0CAL, 0xA02369B9L, 0x655ABB50L,
+ 0x40685A32L, 0x3C2AB4B3L, 0x319EE9D5L, 0xC021B8F7L,
+ 0x9B540B19L, 0x875FA099L, 0x95F7997EL, 0x623D7DA8L,
+ 0xF837889AL, 0x97E32D77L, 0x11ED935FL, 0x16681281L,
+ 0x0E358829L, 0xC7E61FD6L, 0x96DEDFA1L, 0x7858BA99L,
+ 0x57F584A5L, 0x1B227263L, 0x9B83C3FFL, 0x1AC24696L,
+ 0xCDB30AEBL, 0x532E3054L, 0x8FD948E4L, 0x6DBC3128L,
+ 0x58EBF2EFL, 0x34C6FFEAL, 0xFE28ED61L, 0xEE7C3C73L,
+ 0x5D4A14D9L, 0xE864B7E3L, 0x42105D14L, 0x203E13E0L,
+ 0x45EEE2B6L, 0xA3AAABEAL, 0xDB6C4F15L, 0xFACB4FD0L,
+ 0xC742F442L, 0xEF6ABBB5L, 0x654F3B1DL, 0x41CD2105L,
+ 0xD81E799EL, 0x86854DC7L, 0xE44B476AL, 0x3D816250L,
+ 0xCF62A1F2L, 0x5B8D2646L, 0xFC8883A0L, 0xC1C7B6A3L,
+ 0x7F1524C3L, 0x69CB7492L, 0x47848A0BL, 0x5692B285L,
+ 0x095BBF00L, 0xAD19489DL, 0x1462B174L, 0x23820E00L,
+ 0x58428D2AL, 0x0C55F5EAL, 0x1DADF43EL, 0x233F7061L,
+ 0x3372F092L, 0x8D937E41L, 0xD65FECF1L, 0x6C223BDBL,
+ 0x7CDE3759L, 0xCBEE7460L, 0x4085F2A7L, 0xCE77326EL,
+ 0xA6078084L, 0x19F8509EL, 0xE8EFD855L, 0x61D99735L,
+ 0xA969A7AAL, 0xC50C06C2L, 0x5A04ABFCL, 0x800BCADCL,
+ 0x9E447A2EL, 0xC3453484L, 0xFDD56705L, 0x0E1E9EC9L,
+ 0xDB73DBD3L, 0x105588CDL, 0x675FDA79L, 0xE3674340L,
+ 0xC5C43465L, 0x713E38D8L, 0x3D28F89EL, 0xF16DFF20L,
+ 0x153E21E7L, 0x8FB03D4AL, 0xE6E39F2BL, 0xDB83ADF7L },
+ { 0xE93D5A68L, 0x948140F7L, 0xF64C261CL, 0x94692934L,
+ 0x411520F7L, 0x7602D4F7L, 0xBCF46B2EL, 0xD4A20068L,
+ 0xD4082471L, 0x3320F46AL, 0x43B7D4B7L, 0x500061AFL,
+ 0x1E39F62EL, 0x97244546L, 0x14214F74L, 0xBF8B8840L,
+ 0x4D95FC1DL, 0x96B591AFL, 0x70F4DDD3L, 0x66A02F45L,
+ 0xBFBC09ECL, 0x03BD9785L, 0x7FAC6DD0L, 0x31CB8504L,
+ 0x96EB27B3L, 0x55FD3941L, 0xDA2547E6L, 0xABCA0A9AL,
+ 0x28507825L, 0x530429F4L, 0x0A2C86DAL, 0xE9B66DFBL,
+ 0x68DC1462L, 0xD7486900L, 0x680EC0A4L, 0x27A18DEEL,
+ 0x4F3FFEA2L, 0xE887AD8CL, 0xB58CE006L, 0x7AF4D6B6L,
+ 0xAACE1E7CL, 0xD3375FECL, 0xCE78A399L, 0x406B2A42L,
+ 0x20FE9E35L, 0xD9F385B9L, 0xEE39D7ABL, 0x3B124E8BL,
+ 0x1DC9FAF7L, 0x4B6D1856L, 0x26A36631L, 0xEAE397B2L,
+ 0x3A6EFA74L, 0xDD5B4332L, 0x6841E7F7L, 0xCA7820FBL,
+ 0xFB0AF54EL, 0xD8FEB397L, 0x454056ACL, 0xBA489527L,
+ 0x55533A3AL, 0x20838D87L, 0xFE6BA9B7L, 0xD096954BL,
+ 0x55A867BCL, 0xA1159A58L, 0xCCA92963L, 0x99E1DB33L,
+ 0xA62A4A56L, 0x3F3125F9L, 0x5EF47E1CL, 0x9029317CL,
+ 0xFDF8E802L, 0x04272F70L, 0x80BB155CL, 0x05282CE3L,
+ 0x95C11548L, 0xE4C66D22L, 0x48C1133FL, 0xC70F86DCL,
+ 0x07F9C9EEL, 0x41041F0FL, 0x404779A4L, 0x5D886E17L,
+ 0x325F51EBL, 0xD59BC0D1L, 0xF2BCC18FL, 0x41113564L,
+ 0x257B7834L, 0x602A9C60L, 0xDFF8E8A3L, 0x1F636C1BL,
+ 0x0E12B4C2L, 0x02E1329EL, 0xAF664FD1L, 0xCAD18115L,
+ 0x6B2395E0L, 0x333E92E1L, 0x3B240B62L, 0xEEBEB922L,
+ 0x85B2A20EL, 0xE6BA0D99L, 0xDE720C8CL, 0x2DA2F728L,
+ 0xD0127845L, 0x95B794FDL, 0x647D0862L, 0xE7CCF5F0L,
+ 0x5449A36FL, 0x877D48FAL, 0xC39DFD27L, 0xF33E8D1EL,
+ 0x0A476341L, 0x992EFF74L, 0x3A6F6EABL, 0xF4F8FD37L,
+ 0xA812DC60L, 0xA1EBDDF8L, 0x991BE14CL, 0xDB6E6B0DL,
+ 0xC67B5510L, 0x6D672C37L, 0x2765D43BL, 0xDCD0E804L,
+ 0xF1290DC7L, 0xCC00FFA3L, 0xB5390F92L, 0x690FED0BL,
+ 0x667B9FFBL, 0xCEDB7D9CL, 0xA091CF0BL, 0xD9155EA3L,
+ 0xBB132F88L, 0x515BAD24L, 0x7B9479BFL, 0x763BD6EBL,
+ 0x37392EB3L, 0xCC115979L, 0x8026E297L, 0xF42E312DL,
+ 0x6842ADA7L, 0xC66A2B3BL, 0x12754CCCL, 0x782EF11CL,
+ 0x6A124237L, 0xB79251E7L, 0x06A1BBE6L, 0x4BFB6350L,
+ 0x1A6B1018L, 0x11CAEDFAL, 0x3D25BDD8L, 0xE2E1C3C9L,
+ 0x44421659L, 0x0A121386L, 0xD90CEC6EL, 0xD5ABEA2AL,
+ 0x64AF674EL, 0xDA86A85FL, 0xBEBFE988L, 0x64E4C3FEL,
+ 0x9DBC8057L, 0xF0F7C086L, 0x60787BF8L, 0x6003604DL,
+ 0xD1FD8346L, 0xF6381FB0L, 0x7745AE04L, 0xD736FCCCL,
+ 0x83426B33L, 0xF01EAB71L, 0xB0804187L, 0x3C005E5FL,
+ 0x77A057BEL, 0xBDE8AE24L, 0x55464299L, 0xBF582E61L,
+ 0x4E58F48FL, 0xF2DDFDA2L, 0xF474EF38L, 0x8789BDC2L,
+ 0x5366F9C3L, 0xC8B38E74L, 0xB475F255L, 0x46FCD9B9L,
+ 0x7AEB2661L, 0x8B1DDF84L, 0x846A0E79L, 0x915F95E2L,
+ 0x466E598EL, 0x20B45770L, 0x8CD55591L, 0xC902DE4CL,
+ 0xB90BACE1L, 0xBB8205D0L, 0x11A86248L, 0x7574A99EL,
+ 0xB77F19B6L, 0xE0A9DC09L, 0x662D09A1L, 0xC4324633L,
+ 0xE85A1F02L, 0x09F0BE8CL, 0x4A99A025L, 0x1D6EFE10L,
+ 0x1AB93D1DL, 0x0BA5A4DFL, 0xA186F20FL, 0x2868F169L,
+ 0xDCB7DA83L, 0x573906FEL, 0xA1E2CE9BL, 0x4FCD7F52L,
+ 0x50115E01L, 0xA70683FAL, 0xA002B5C4L, 0x0DE6D027L,
+ 0x9AF88C27L, 0x773F8641L, 0xC3604C06L, 0x61A806B5L,
+ 0xF0177A28L, 0xC0F586E0L, 0x006058AAL, 0x30DC7D62L,
+ 0x11E69ED7L, 0x2338EA63L, 0x53C2DD94L, 0xC2C21634L,
+ 0xBBCBEE56L, 0x90BCB6DEL, 0xEBFC7DA1L, 0xCE591D76L,
+ 0x6F05E409L, 0x4B7C0188L, 0x39720A3DL, 0x7C927C24L,
+ 0x86E3725FL, 0x724D9DB9L, 0x1AC15BB4L, 0xD39EB8FCL,
+ 0xED545578L, 0x08FCA5B5L, 0xD83D7CD3L, 0x4DAD0FC4L,
+ 0x1E50EF5EL, 0xB161E6F8L, 0xA28514D9L, 0x6C51133CL,
+ 0x6FD5C7E7L, 0x56E14EC4L, 0x362ABFCEL, 0xDDC6C837L,
+ 0xD79A3234L, 0x92638212L, 0x670EFA8EL, 0x406000E0L },
+ { 0x3A39CE37L, 0xD3FAF5CFL, 0xABC27737L, 0x5AC52D1BL,
+ 0x5CB0679EL, 0x4FA33742L, 0xD3822740L, 0x99BC9BBEL,
+ 0xD5118E9DL, 0xBF0F7315L, 0xD62D1C7EL, 0xC700C47BL,
+ 0xB78C1B6BL, 0x21A19045L, 0xB26EB1BEL, 0x6A366EB4L,
+ 0x5748AB2FL, 0xBC946E79L, 0xC6A376D2L, 0x6549C2C8L,
+ 0x530FF8EEL, 0x468DDE7DL, 0xD5730A1DL, 0x4CD04DC6L,
+ 0x2939BBDBL, 0xA9BA4650L, 0xAC9526E8L, 0xBE5EE304L,
+ 0xA1FAD5F0L, 0x6A2D519AL, 0x63EF8CE2L, 0x9A86EE22L,
+ 0xC089C2B8L, 0x43242EF6L, 0xA51E03AAL, 0x9CF2D0A4L,
+ 0x83C061BAL, 0x9BE96A4DL, 0x8FE51550L, 0xBA645BD6L,
+ 0x2826A2F9L, 0xA73A3AE1L, 0x4BA99586L, 0xEF5562E9L,
+ 0xC72FEFD3L, 0xF752F7DAL, 0x3F046F69L, 0x77FA0A59L,
+ 0x80E4A915L, 0x87B08601L, 0x9B09E6ADL, 0x3B3EE593L,
+ 0xE990FD5AL, 0x9E34D797L, 0x2CF0B7D9L, 0x022B8B51L,
+ 0x96D5AC3AL, 0x017DA67DL, 0xD1CF3ED6L, 0x7C7D2D28L,
+ 0x1F9F25CFL, 0xADF2B89BL, 0x5AD6B472L, 0x5A88F54CL,
+ 0xE029AC71L, 0xE019A5E6L, 0x47B0ACFDL, 0xED93FA9BL,
+ 0xE8D3C48DL, 0x283B57CCL, 0xF8D56629L, 0x79132E28L,
+ 0x785F0191L, 0xED756055L, 0xF7960E44L, 0xE3D35E8CL,
+ 0x15056DD4L, 0x88F46DBAL, 0x03A16125L, 0x0564F0BDL,
+ 0xC3EB9E15L, 0x3C9057A2L, 0x97271AECL, 0xA93A072AL,
+ 0x1B3F6D9BL, 0x1E6321F5L, 0xF59C66FBL, 0x26DCF319L,
+ 0x7533D928L, 0xB155FDF5L, 0x03563482L, 0x8ABA3CBBL,
+ 0x28517711L, 0xC20AD9F8L, 0xABCC5167L, 0xCCAD925FL,
+ 0x4DE81751L, 0x3830DC8EL, 0x379D5862L, 0x9320F991L,
+ 0xEA7A90C2L, 0xFB3E7BCEL, 0x5121CE64L, 0x774FBE32L,
+ 0xA8B6E37EL, 0xC3293D46L, 0x48DE5369L, 0x6413E680L,
+ 0xA2AE0810L, 0xDD6DB224L, 0x69852DFDL, 0x09072166L,
+ 0xB39A460AL, 0x6445C0DDL, 0x586CDECFL, 0x1C20C8AEL,
+ 0x5BBEF7DDL, 0x1B588D40L, 0xCCD2017FL, 0x6BB4E3BBL,
+ 0xDDA26A7EL, 0x3A59FF45L, 0x3E350A44L, 0xBCB4CDD5L,
+ 0x72EACEA8L, 0xFA6484BBL, 0x8D6612AEL, 0xBF3C6F47L,
+ 0xD29BE463L, 0x542F5D9EL, 0xAEC2771BL, 0xF64E6370L,
+ 0x740E0D8DL, 0xE75B1357L, 0xF8721671L, 0xAF537D5DL,
+ 0x4040CB08L, 0x4EB4E2CCL, 0x34D2466AL, 0x0115AF84L,
+ 0xE1B00428L, 0x95983A1DL, 0x06B89FB4L, 0xCE6EA048L,
+ 0x6F3F3B82L, 0x3520AB82L, 0x011A1D4BL, 0x277227F8L,
+ 0x611560B1L, 0xE7933FDCL, 0xBB3A792BL, 0x344525BDL,
+ 0xA08839E1L, 0x51CE794BL, 0x2F32C9B7L, 0xA01FBAC9L,
+ 0xE01CC87EL, 0xBCC7D1F6L, 0xCF0111C3L, 0xA1E8AAC7L,
+ 0x1A908749L, 0xD44FBD9AL, 0xD0DADECBL, 0xD50ADA38L,
+ 0x0339C32AL, 0xC6913667L, 0x8DF9317CL, 0xE0B12B4FL,
+ 0xF79E59B7L, 0x43F5BB3AL, 0xF2D519FFL, 0x27D9459CL,
+ 0xBF97222CL, 0x15E6FC2AL, 0x0F91FC71L, 0x9B941525L,
+ 0xFAE59361L, 0xCEB69CEBL, 0xC2A86459L, 0x12BAA8D1L,
+ 0xB6C1075EL, 0xE3056A0CL, 0x10D25065L, 0xCB03A442L,
+ 0xE0EC6E0EL, 0x1698DB3BL, 0x4C98A0BEL, 0x3278E964L,
+ 0x9F1F9532L, 0xE0D392DFL, 0xD3A0342BL, 0x8971F21EL,
+ 0x1B0A7441L, 0x4BA3348CL, 0xC5BE7120L, 0xC37632D8L,
+ 0xDF359F8DL, 0x9B992F2EL, 0xE60B6F47L, 0x0FE3F11DL,
+ 0xE54CDA54L, 0x1EDAD891L, 0xCE6279CFL, 0xCD3E7E6FL,
+ 0x1618B166L, 0xFD2C1D05L, 0x848FD2C5L, 0xF6FB2299L,
+ 0xF523F357L, 0xA6327623L, 0x93A83531L, 0x56CCCD02L,
+ 0xACF08162L, 0x5A75EBB5L, 0x6E163697L, 0x88D273CCL,
+ 0xDE966292L, 0x81B949D0L, 0x4C50901BL, 0x71C65614L,
+ 0xE6C6C7BDL, 0x327A140AL, 0x45E1D006L, 0xC3F27B9AL,
+ 0xC9AA53FDL, 0x62A80F00L, 0xBB25BFE2L, 0x35BDD2F6L,
+ 0x71126905L, 0xB2040222L, 0xB6CBCF7CL, 0xCD769C2BL,
+ 0x53113EC0L, 0x1640E3D3L, 0x38ABBD60L, 0x2547ADF0L,
+ 0xBA38209CL, 0xF746CE76L, 0x77AFA1C5L, 0x20756060L,
+ 0x85CBFE4EL, 0x8AE88DD8L, 0x7AAAF9B0L, 0x4CF9AA7EL,
+ 0x1948C25CL, 0x02FB8A8CL, 0x01C36AE4L, 0xD6EBE1F9L,
+ 0x90D4F869L, 0xA65CDEA0L, 0x3F09252DL, 0xC208E69FL,
+ 0xB74E6132L, 0xCE77E25BL, 0x578FDFE3L, 0x3AC372E6L }
+};
+
+
+static unsigned long F(BFCtx *ctx, unsigned long x) {
+ unsigned short a, b, c, d;
+ unsigned long y;
+
+ d = (unsigned short)(x & 0xFF);
+ x >>= 8;
+ c = (unsigned short)(x & 0xFF);
+ x >>= 8;
+ b = (unsigned short)(x & 0xFF);
+ x >>= 8;
+ a = (unsigned short)(x & 0xFF);
+ y = ctx->S[0][a] + ctx->S[1][b];
+ y = y ^ ctx->S[2][c];
+ y = y + ctx->S[3][d];
+
+ return y;
+}
+
+
+static void Blowfish_Encrypt(BFCtx *ctx, unsigned long *xl, unsigned long *xr) {
+ unsigned long Xl;
+ unsigned long Xr;
+ unsigned long temp;
+ short i;
+
+ Xl = *xl;
+ Xr = *xr;
+
+ for (i = 0; i < N; ++i) {
+ Xl = Xl ^ ctx->P[i];
+ Xr = F(ctx, Xl) ^ Xr;
+
+ temp = Xl;
+ Xl = Xr;
+ Xr = temp;
+ }
+
+ temp = Xl;
+ Xl = Xr;
+ Xr = temp;
+
+ Xr = Xr ^ ctx->P[N];
+ Xl = Xl ^ ctx->P[N + 1];
+
+ *xl = Xl;
+ *xr = Xr;
+}
+
+
+static void Blowfish_Decrypt(BFCtx *ctx, unsigned long *xl, unsigned long *xr) {
+ unsigned long Xl;
+ unsigned long Xr;
+ unsigned long temp;
+ short i;
+
+ Xl = *xl;
+ Xr = *xr;
+
+ for (i = N + 1; i > 1; --i) {
+ Xl = Xl ^ ctx->P[i];
+ Xr = F(ctx, Xl) ^ Xr;
+
+ /* Exchange Xl and Xr */
+ temp = Xl;
+ Xl = Xr;
+ Xr = temp;
+ }
+
+ /* Exchange Xl and Xr */
+ temp = Xl;
+ Xl = Xr;
+ Xr = temp;
+
+ Xr = Xr ^ ctx->P[1];
+ Xl = Xl ^ ctx->P[0];
+
+ *xl = Xl;
+ *xr = Xr;
+}
+
+
+static void Blowfish_Init(BFCtx *ctx, unsigned char *key, int keyLen) {
+ int i, j, k;
+ unsigned long data, datal, datar;
+
+ for (i = 0; i < 4; i++) {
+ for (j = 0; j < 256; j++)
+ ctx->S[i][j] = ORIG_S[i][j];
+ }
+
+ j = 0;
+ for (i = 0; i < N + 2; ++i) {
+ data = 0x00000000;
+ for (k = 0; k < 4; ++k) {
+ data = (data << 8) | key[j];
+ j = j + 1;
+ if (j >= keyLen)
+ j = 0;
+ }
+ ctx->P[i] = ORIG_P[i] ^ data;
+ }
+
+ datal = 0x00000000;
+ datar = 0x00000000;
+
+ for (i = 0; i < N + 2; i += 2) {
+ Blowfish_Encrypt(ctx, &datal, &datar);
+ ctx->P[i] = datal;
+ ctx->P[i + 1] = datar;
+ }
+
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 256; j += 2) {
+ Blowfish_Encrypt(ctx, &datal, &datar);
+ ctx->S[i][j] = datal;
+ ctx->S[i][j + 1] = datar;
+ }
+ }
+}
+
+static void Xor_Block(unsigned char *buf, unsigned char *block) {
+ uint8_t i;
+
+ for (i = 0; i < BF_BLOCKLEN; ++i) {
+ buf[i] ^= block[i];
+ }
+}
+
+/*****************************************************************************/
+/* Public functions: */
+/*****************************************************************************/
+void bf_init(BFCtx *ctx, uint8_t *key) {
+ Blowfish_Init(ctx, key, BF_KEYLEN);
+}
+
+void bf_ecb_encrypt(BFCtx *ctx, uint8_t *buf) {
+ unsigned long xl, xr;
+
+ xl = (buf[0] << 24) |
+ (buf[1] << 16) |
+ (buf[2] << 8) |
+ (buf[3]);
+ xr = (buf[4] << 24) |
+ (buf[5] << 16) |
+ (buf[6] << 8) |
+ (buf[7]);
+
+ Blowfish_Encrypt(ctx, &xl, &xr);
+
+ buf[0] = (xl & 0xFF000000) >> 24;
+ buf[1] = (xl & 0x00FF0000) >> 16;
+ buf[2] = (xl & 0x0000FF00) >> 8;
+ buf[3] = (xl & 0x000000FF);
+ buf[4] = (xr & 0xFF000000) >> 24;
+ buf[5] = (xr & 0x00FF0000) >> 16;
+ buf[6] = (xr & 0x0000FF00) >> 8;
+ buf[7] = (xr & 0x000000FF);
+}
+
+void bf_ecb_decrypt(BFCtx *ctx, uint8_t *buf) {
+ unsigned long xl, xr;
+
+ xl = (buf[0] << 24) |
+ (buf[1] << 16) |
+ (buf[2] << 8) |
+ (buf[3]);
+ xr = (buf[4] << 24) |
+ (buf[5] << 16) |
+ (buf[6] << 8) |
+ (buf[7]);
+
+ Blowfish_Decrypt(ctx, &xl, &xr);
+
+ buf[0] = (xl & 0xFF000000) >> 24;
+ buf[1] = (xl & 0x00FF0000) >> 16;
+ buf[2] = (xl & 0x0000FF00) >> 8;
+ buf[3] = (xl & 0x000000FF);
+ buf[4] = (xr & 0xFF000000) >> 24;
+ buf[5] = (xr & 0x00FF0000) >> 16;
+ buf[6] = (xr & 0x0000FF00) >> 8;
+ buf[7] = (xr & 0x000000FF);
+}
+
+void bf_cbc_encrypt(BFCtx *ctx, uint8_t *iv, uint8_t *buf, size_t length) {
+ size_t i;
+ uint8_t *block = iv;
+
+ for (i = 0; i < length; i += BF_BLOCKLEN) {
+ Xor_Block(buf, block);
+ bf_ecb_encrypt(ctx, buf);
+ block = buf;
+ buf += BF_BLOCKLEN;
+ }
+}
+
+void bf_cbc_decrypt(BFCtx *ctx, uint8_t *iv, uint8_t *buf, size_t length) {
+ size_t i;
+ uint8_t block[BF_BLOCKLEN];
+ uint8_t block_next[BF_BLOCKLEN];
+
+ memcpy(block, iv, BF_BLOCKLEN);
+ for (i = 0; i < length; i += BF_BLOCKLEN) {
+ memcpy(block_next, buf, BF_BLOCKLEN);
+ bf_ecb_decrypt(ctx, buf);
+ Xor_Block(buf, block);
+ memcpy(block, block_next, BF_BLOCKLEN);
+ buf += BF_BLOCKLEN;
+ }
+}
diff --git a/ext/blowfish/blowfish.h b/ext/blowfish/blowfish.h
new file mode 100644
index 0000000..a317812
--- /dev/null
+++ b/ext/blowfish/blowfish.h
@@ -0,0 +1,38 @@
+/*
+blowfish.h: Header file for blowfish.c
+
+Copyright (C) 1997 by Paul Kocher
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+See blowfish.c for more information about this file.
+*/
+
+#include <stdint.h>
+#include <stddef.h>
+
+#define BF_KEYLEN 16
+#define BF_BLOCKLEN 8
+
+typedef struct {
+ unsigned long P[16 + 2];
+ unsigned long S[4][256];
+} BFCtx;
+
+void bf_init(BFCtx *ctx, uint8_t *key);
+void bf_ecb_encrypt(BFCtx *ctx, uint8_t *buf);
+void bf_ecb_decrypt(BFCtx *ctx, uint8_t *buf);
+void bf_cbc_decrypt(BFCtx *ctx, uint8_t *iv, uint8_t *buf, size_t length);
+void bf_cbc_encrypt(BFCtx *ctx, uint8_t *iv, uint8_t *buf, size_t length);
+
diff --git a/ext/crypto_obj.mk b/ext/crypto_obj.mk
new file mode 100644
index 0000000..54c7092
--- /dev/null
+++ b/ext/crypto_obj.mk
@@ -0,0 +1,3 @@
+obj_dep = aes/aes.o blowfish/blowfish.o sha/sha1.o
+
+subdirs = aes blowfish sha
diff --git a/ext/libressl/Makefile b/ext/libressl/Makefile
new file mode 100644
index 0000000..56eabfc
--- /dev/null
+++ b/ext/libressl/Makefile
@@ -0,0 +1,12 @@
+include ssl_obj.mk
+
+all:
+ for i in $(subdirs); do \
+ (cd $$i && $(MAKE)) || exit; \
+ done
+
+clean:
+ for i in $(subdirs); do \
+ (cd $$i && $(MAKE) clean) || exit; \
+ done
+ rm -f *.o *.a
diff --git a/ext/libressl/README b/ext/libressl/README
new file mode 100644
index 0000000..43f2397
--- /dev/null
+++ b/ext/libressl/README
@@ -0,0 +1 @@
+Extracted from libressl 3.4.2
diff --git a/ext/libressl/crypto/aead/Makefile b/ext/libressl/crypto/aead/Makefile
new file mode 100644
index 0000000..6bf1ccf
--- /dev/null
+++ b/ext/libressl/crypto/aead/Makefile
@@ -0,0 +1,13 @@
+include ../../ssl_common.mk
+
+obj = e_chacha20poly1305.o
+
+
+all: $(obj)
+dep: all
+
+%.o: %.c
+ $(CC) $(CFLAGS) -c $<
+
+clean:
+ rm -f *.o *.a
diff --git a/ext/libressl/crypto/aead/e_chacha20poly1305.c b/ext/libressl/crypto/aead/e_chacha20poly1305.c
new file mode 100644
index 0000000..9d8291e
--- /dev/null
+++ b/ext/libressl/crypto/aead/e_chacha20poly1305.c
@@ -0,0 +1,395 @@
+/* $OpenBSD: e_chacha20poly1305.c,v 1.21 2019/03/27 15:34:01 jsing Exp $ */
+
+/*
+ * Copyright (c) 2015 Reyk Floter <reyk@openbsd.org>
+ * Copyright (c) 2014, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include <openssl/opensslconf.h>
+
+#if !defined(OPENSSL_NO_CHACHA) && !defined(OPENSSL_NO_POLY1305)
+
+//#include <openssl/err.h>
+//#include <openssl/evp.h>
+#include <openssl/chacha.h>
+#include <openssl/poly1305.h>
+
+//#include "evp_locl.h"
+#define EVPerror(X) ;
+
+#define POLY1305_TAG_LEN 16
+
+#define CHACHA20_CONSTANT_LEN 4
+#define CHACHA20_IV_LEN 8
+#define CHACHA20_NONCE_LEN (CHACHA20_CONSTANT_LEN + CHACHA20_IV_LEN)
+#define XCHACHA20_NONCE_LEN 24
+
+#if 0
+struct aead_chacha20_poly1305_ctx {
+ unsigned char key[32];
+ unsigned char tag_len;
+};
+
+static int
+aead_chacha20_poly1305_init(EVP_AEAD_CTX *ctx, const unsigned char *key,
+ size_t key_len, size_t tag_len)
+{
+ struct aead_chacha20_poly1305_ctx *c20_ctx;
+
+ if (tag_len == 0)
+ tag_len = POLY1305_TAG_LEN;
+
+ if (tag_len > POLY1305_TAG_LEN) {
+ EVPerror(EVP_R_TOO_LARGE);
+ return 0;
+ }
+
+ /* Internal error - EVP_AEAD_CTX_init should catch this. */
+ if (key_len != sizeof(c20_ctx->key))
+ return 0;
+
+ c20_ctx = malloc(sizeof(struct aead_chacha20_poly1305_ctx));
+ if (c20_ctx == NULL)
+ return 0;
+
+ memcpy(&c20_ctx->key[0], key, key_len);
+ c20_ctx->tag_len = tag_len;
+ ctx->aead_state = c20_ctx;
+
+ return 1;
+}
+
+static void
+aead_chacha20_poly1305_cleanup(EVP_AEAD_CTX *ctx)
+{
+ struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state;
+
+ freezero(c20_ctx, sizeof(*c20_ctx));
+}
+
+#endif
+
+static void
+poly1305_update_with_length(poly1305_state *poly1305,
+ const unsigned char *data, size_t data_len)
+{
+ size_t j = data_len;
+ unsigned char length_bytes[8];
+ unsigned i;
+
+ for (i = 0; i < sizeof(length_bytes); i++) {
+ length_bytes[i] = j;
+ j >>= 8;
+ }
+
+ if (data != NULL)
+ CRYPTO_poly1305_update(poly1305, data, data_len);
+ CRYPTO_poly1305_update(poly1305, length_bytes, sizeof(length_bytes));
+}
+
+static void
+poly1305_update_with_pad16(poly1305_state *poly1305,
+ const unsigned char *data, size_t data_len)
+{
+ static const unsigned char zero_pad16[16];
+ size_t pad_len;
+
+ CRYPTO_poly1305_update(poly1305, data, data_len);
+
+ /* pad16() is defined in RFC 7539 2.8.1. */
+ if ((pad_len = data_len % 16) == 0)
+ return;
+
+ CRYPTO_poly1305_update(poly1305, zero_pad16, 16 - pad_len);
+}
+
+int
+aead_chacha20_poly1305_seal(unsigned char key[32], unsigned char tag_len,
+ unsigned char *out, size_t *out_len, size_t max_out_len,
+ const unsigned char *nonce, size_t nonce_len,
+ const unsigned char *in, size_t in_len,
+ const unsigned char *ad, size_t ad_len)
+{
+ unsigned char poly1305_key[32];
+ poly1305_state poly1305;
+ const unsigned char *iv;
+ const uint64_t in_len_64 = in_len;
+ uint64_t ctr;
+
+ /* The underlying ChaCha implementation may not overflow the block
+ * counter into the second counter word. Therefore we disallow
+ * individual operations that work on more than 2TB at a time.
+ * in_len_64 is needed because, on 32-bit platforms, size_t is only
+ * 32-bits and this produces a warning because it's always false.
+ * Casting to uint64_t inside the conditional is not sufficient to stop
+ * the warning. */
+ if (in_len_64 >= (1ULL << 32) * 64 - 64) {
+ EVPerror(EVP_R_TOO_LARGE);
+ return 0;
+ }
+
+ if (max_out_len < in_len + tag_len) {
+ EVPerror(EVP_R_BUFFER_TOO_SMALL);
+ return 0;
+ }
+
+ if (nonce_len != CHACHA20_NONCE_LEN) {
+ EVPerror(EVP_R_IV_TOO_LARGE);
+ return 0;
+ }
+
+ ctr = (uint64_t)((uint32_t)(nonce[0]) | (uint32_t)(nonce[1]) << 8 |
+ (uint32_t)(nonce[2]) << 16 | (uint32_t)(nonce[3]) << 24) << 32;
+ iv = nonce + CHACHA20_CONSTANT_LEN;
+
+ memset(poly1305_key, 0, sizeof(poly1305_key));
+ CRYPTO_chacha_20(poly1305_key, poly1305_key,
+ sizeof(poly1305_key), key, iv, ctr);
+
+ CRYPTO_poly1305_init(&poly1305, poly1305_key);
+ poly1305_update_with_pad16(&poly1305, ad, ad_len);
+ CRYPTO_chacha_20(out, in, in_len, key, iv, ctr + 1);
+ poly1305_update_with_pad16(&poly1305, out, in_len);
+ poly1305_update_with_length(&poly1305, NULL, ad_len);
+ poly1305_update_with_length(&poly1305, NULL, in_len);
+
+ if (tag_len != POLY1305_TAG_LEN) {
+ unsigned char tag[POLY1305_TAG_LEN];
+ CRYPTO_poly1305_finish(&poly1305, tag);
+ memcpy(out + in_len, tag, tag_len);
+ *out_len = in_len + tag_len;
+ return 1;
+ }
+
+ CRYPTO_poly1305_finish(&poly1305, out + in_len);
+ *out_len = in_len + POLY1305_TAG_LEN;
+ return 1;
+}
+
+int
+aead_chacha20_poly1305_open(unsigned char key[32], unsigned char tag_len,
+ unsigned char *out, size_t *out_len, size_t max_out_len,
+ const unsigned char *nonce, size_t nonce_len,
+ const unsigned char *in, size_t in_len,
+ const unsigned char *ad, size_t ad_len)
+{
+ unsigned char mac[POLY1305_TAG_LEN];
+ unsigned char poly1305_key[32];
+ const unsigned char *iv = nonce;
+ poly1305_state poly1305;
+ const uint64_t in_len_64 = in_len;
+ size_t plaintext_len;
+ uint64_t ctr = 0;
+
+ if (in_len < tag_len) {
+ EVPerror(EVP_R_BAD_DECRYPT);
+ return 0;
+ }
+
+ /* The underlying ChaCha implementation may not overflow the block
+ * counter into the second counter word. Therefore we disallow
+ * individual operations that work on more than 2TB at a time.
+ * in_len_64 is needed because, on 32-bit platforms, size_t is only
+ * 32-bits and this produces a warning because it's always false.
+ * Casting to uint64_t inside the conditional is not sufficient to stop
+ * the warning. */
+ if (in_len_64 >= (1ULL << 32) * 64 - 64) {
+ EVPerror(EVP_R_TOO_LARGE);
+ return 0;
+ }
+
+ if (nonce_len != CHACHA20_NONCE_LEN) {
+ EVPerror(EVP_R_IV_TOO_LARGE);
+ return 0;
+ }
+
+ plaintext_len = in_len - tag_len;
+
+ if (max_out_len < plaintext_len) {
+ EVPerror(EVP_R_BUFFER_TOO_SMALL);
+ return 0;
+ }
+
+ ctr = (uint64_t)((uint32_t)(nonce[0]) | (uint32_t)(nonce[1]) << 8 |
+ (uint32_t)(nonce[2]) << 16 | (uint32_t)(nonce[3]) << 24) << 32;
+ iv = nonce + CHACHA20_CONSTANT_LEN;
+
+ memset(poly1305_key, 0, sizeof(poly1305_key));
+ CRYPTO_chacha_20(poly1305_key, poly1305_key,
+ sizeof(poly1305_key), key, iv, ctr);
+
+ CRYPTO_poly1305_init(&poly1305, poly1305_key);
+ poly1305_update_with_pad16(&poly1305, ad, ad_len);
+ poly1305_update_with_pad16(&poly1305, in, plaintext_len);
+ poly1305_update_with_length(&poly1305, NULL, ad_len);
+ poly1305_update_with_length(&poly1305, NULL, plaintext_len);
+
+ CRYPTO_poly1305_finish(&poly1305, mac);
+
+ if (timingsafe_memcmp(mac, in + plaintext_len, tag_len) != 0) {
+ EVPerror(EVP_R_BAD_DECRYPT);
+ return 0;
+ }
+
+ CRYPTO_chacha_20(out, in, plaintext_len, key, iv, ctr + 1);
+ *out_len = plaintext_len;
+ return 1;
+}
+
+int
+aead_xchacha20_poly1305_seal(unsigned char key[32], unsigned char tag_len,
+ unsigned char *out, size_t *out_len, size_t max_out_len,
+ const unsigned char *nonce, size_t nonce_len,
+ const unsigned char *in, size_t in_len,
+ const unsigned char *ad, size_t ad_len)
+{
+ unsigned char poly1305_key[32];
+ unsigned char subkey[32];
+ poly1305_state poly1305;
+
+ if (max_out_len < in_len + tag_len) {
+ EVPerror(EVP_R_BUFFER_TOO_SMALL);
+ return 0;
+ }
+
+ if (nonce_len != XCHACHA20_NONCE_LEN) {
+ EVPerror(EVP_R_IV_TOO_LARGE);
+ return 0;
+ }
+
+ CRYPTO_hchacha_20(subkey, key, nonce);
+
+ CRYPTO_chacha_20(out, in, in_len, subkey, nonce + 16, 1);
+
+ memset(poly1305_key, 0, sizeof(poly1305_key));
+ CRYPTO_chacha_20(poly1305_key, poly1305_key, sizeof(poly1305_key),
+ subkey, nonce + 16, 0);
+
+ CRYPTO_poly1305_init(&poly1305, poly1305_key);
+ poly1305_update_with_pad16(&poly1305, ad, ad_len);
+ poly1305_update_with_pad16(&poly1305, out, in_len);
+ poly1305_update_with_length(&poly1305, NULL, ad_len);
+ poly1305_update_with_length(&poly1305, NULL, in_len);
+
+ if (tag_len != POLY1305_TAG_LEN) {
+ unsigned char tag[POLY1305_TAG_LEN];
+ CRYPTO_poly1305_finish(&poly1305, tag);
+ memcpy(out + in_len, tag, tag_len);
+ *out_len = in_len + tag_len;
+ return 1;
+ }
+
+ CRYPTO_poly1305_finish(&poly1305, out + in_len);
+ *out_len = in_len + POLY1305_TAG_LEN;
+ return 1;
+}
+
+int
+aead_xchacha20_poly1305_open(unsigned char key[32], unsigned char tag_len,
+ unsigned char *out, size_t *out_len, size_t max_out_len,
+ const unsigned char *nonce, size_t nonce_len,
+ const unsigned char *in, size_t in_len,
+ const unsigned char *ad, size_t ad_len)
+{
+ unsigned char mac[POLY1305_TAG_LEN];
+ unsigned char poly1305_key[32];
+ unsigned char subkey[32];
+ poly1305_state poly1305;
+ size_t plaintext_len;
+
+ if (in_len < tag_len) {
+ EVPerror(EVP_R_BAD_DECRYPT);
+ return 0;
+ }
+
+ if (nonce_len != XCHACHA20_NONCE_LEN) {
+ EVPerror(EVP_R_IV_TOO_LARGE);
+ return 0;
+ }
+
+ plaintext_len = in_len - tag_len;
+
+ if (max_out_len < plaintext_len) {
+ EVPerror(EVP_R_BUFFER_TOO_SMALL);
+ return 0;
+ }
+
+ CRYPTO_hchacha_20(subkey, key, nonce);
+
+ memset(poly1305_key, 0, sizeof(poly1305_key));
+ CRYPTO_chacha_20(poly1305_key, poly1305_key, sizeof(poly1305_key),
+ subkey, nonce + 16, 0);
+
+ CRYPTO_poly1305_init(&poly1305, poly1305_key);
+ poly1305_update_with_pad16(&poly1305, ad, ad_len);
+ poly1305_update_with_pad16(&poly1305, in, plaintext_len);
+ poly1305_update_with_length(&poly1305, NULL, ad_len);
+ poly1305_update_with_length(&poly1305, NULL, plaintext_len);
+
+ CRYPTO_poly1305_finish(&poly1305, mac);
+ if (timingsafe_memcmp(mac, in + plaintext_len, tag_len) != 0) {
+ EVPerror(EVP_R_BAD_DECRYPT);
+ return 0;
+ }
+
+ CRYPTO_chacha_20(out, in, plaintext_len, subkey, nonce + 16, 1);
+
+ *out_len = plaintext_len;
+ return 1;
+}
+
+#if 0
+/* RFC 7539 */
+static const EVP_AEAD aead_chacha20_poly1305 = {
+ .key_len = 32,
+ .nonce_len = CHACHA20_NONCE_LEN,
+ .overhead = POLY1305_TAG_LEN,
+ .max_tag_len = POLY1305_TAG_LEN,
+
+ .init = aead_chacha20_poly1305_init,
+ .cleanup = aead_chacha20_poly1305_cleanup,
+ .seal = aead_chacha20_poly1305_seal,
+ .open = aead_chacha20_poly1305_open,
+};
+
+const EVP_AEAD *
+EVP_aead_chacha20_poly1305()
+{
+ return &aead_chacha20_poly1305;
+}
+
+static const EVP_AEAD aead_xchacha20_poly1305 = {
+ .key_len = 32,
+ .nonce_len = XCHACHA20_NONCE_LEN,
+ .overhead = POLY1305_TAG_LEN,
+ .max_tag_len = POLY1305_TAG_LEN,
+
+ .init = aead_chacha20_poly1305_init,
+ .cleanup = aead_chacha20_poly1305_cleanup,
+ .seal = aead_xchacha20_poly1305_seal,
+ .open = aead_xchacha20_poly1305_open,
+};
+
+const EVP_AEAD *
+EVP_aead_xchacha20_poly1305()
+{
+ return &aead_xchacha20_poly1305;
+}
+#endif
+
+#endif /* !OPENSSL_NO_CHACHA && !OPENSSL_NO_POLY1305 */
diff --git a/ext/libressl/crypto/aes/Makefile b/ext/libressl/crypto/aes/Makefile
new file mode 100644
index 0000000..2b3c04c
--- /dev/null
+++ b/ext/libressl/crypto/aes/Makefile
@@ -0,0 +1,14 @@
+include ../../ssl_common.mk
+CFLAGS+= -D__BEGIN_HIDDEN_DECLS= -D__END_HIDDEN_DECLS=
+
+obj = aes_core.o aes_ecb.o aes_cbc.o aes_cfb.o aes_ctr.o aes_ige.o aes_ofb.o
+
+
+all: $(obj)
+dep: all
+
+%.o: %.c
+ $(CC) $(CFLAGS) -c $<
+
+clean:
+ rm -f *.o *.a
diff --git a/ext/libressl/crypto/aes/aes-elf-armv4.S b/ext/libressl/crypto/aes/aes-elf-armv4.S
new file mode 100644
index 0000000..8164b53
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes-elf-armv4.S
@@ -0,0 +1,1074 @@
+#include "arm_arch.h"
+.text
+.code 32
+
+.type AES_Te,%object
+.align 5
+AES_Te:
+.word 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
+.word 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
+.word 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
+.word 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
+.word 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
+.word 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
+.word 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
+.word 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
+.word 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
+.word 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
+.word 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
+.word 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
+.word 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
+.word 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
+.word 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
+.word 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
+.word 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
+.word 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
+.word 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
+.word 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
+.word 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
+.word 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
+.word 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
+.word 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
+.word 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
+.word 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
+.word 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
+.word 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
+.word 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
+.word 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
+.word 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
+.word 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
+.word 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
+.word 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
+.word 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
+.word 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
+.word 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
+.word 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
+.word 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
+.word 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
+.word 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
+.word 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
+.word 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
+.word 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
+.word 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
+.word 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
+.word 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
+.word 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
+.word 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
+.word 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
+.word 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
+.word 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
+.word 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
+.word 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
+.word 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
+.word 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
+.word 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
+.word 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
+.word 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
+.word 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
+.word 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
+.word 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
+.word 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
+.word 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
+@ Te4[256]
+.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
+.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
+.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
+.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
+.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
+.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
+.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
+.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
+.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
+.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
+.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
+.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
+.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
+.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
+.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
+.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
+.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
+.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
+.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
+.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
+.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
+.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
+.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
+.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
+.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
+.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
+.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
+.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
+.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
+.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
+.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
+.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+@ rcon[]
+.word 0x01000000, 0x02000000, 0x04000000, 0x08000000
+.word 0x10000000, 0x20000000, 0x40000000, 0x80000000
+.word 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
+.size AES_Te,.-AES_Te
+
+@ void AES_encrypt(const unsigned char *in, unsigned char *out,
+@ const AES_KEY *key) {
+.global AES_encrypt
+.type AES_encrypt,%function
+.align 5
+AES_encrypt:
+ sub r3,pc,#8 @ AES_encrypt
+ stmdb sp!,{r1,r4-r12,lr}
+ mov r12,r0 @ inp
+ mov r11,r2
+ sub r10,r3,#AES_encrypt-AES_Te @ Te
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+ ldrb r0,[r12,#3] @ load input data in endian-neutral
+ ldrb r4,[r12,#2] @ manner...
+ ldrb r5,[r12,#1]
+ ldrb r6,[r12,#0]
+ orr r0,r0,r4,lsl#8
+ ldrb r1,[r12,#7]
+ orr r0,r0,r5,lsl#16
+ ldrb r4,[r12,#6]
+ orr r0,r0,r6,lsl#24
+ ldrb r5,[r12,#5]
+ ldrb r6,[r12,#4]
+ orr r1,r1,r4,lsl#8
+ ldrb r2,[r12,#11]
+ orr r1,r1,r5,lsl#16
+ ldrb r4,[r12,#10]
+ orr r1,r1,r6,lsl#24
+ ldrb r5,[r12,#9]
+ ldrb r6,[r12,#8]
+ orr r2,r2,r4,lsl#8
+ ldrb r3,[r12,#15]
+ orr r2,r2,r5,lsl#16
+ ldrb r4,[r12,#14]
+ orr r2,r2,r6,lsl#24
+ ldrb r5,[r12,#13]
+ ldrb r6,[r12,#12]
+ orr r3,r3,r4,lsl#8
+ orr r3,r3,r5,lsl#16
+ orr r3,r3,r6,lsl#24
+#else
+ ldr r0,[r12,#0]
+ ldr r1,[r12,#4]
+ ldr r2,[r12,#8]
+ ldr r3,[r12,#12]
+#ifdef __ARMEL__
+ rev r0,r0
+ rev r1,r1
+ rev r2,r2
+ rev r3,r3
+#endif
+#endif
+ bl _armv4_AES_encrypt
+
+ ldr r12,[sp],#4 @ pop out
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+#ifdef __ARMEL__
+ rev r0,r0
+ rev r1,r1
+ rev r2,r2
+ rev r3,r3
+#endif
+ str r0,[r12,#0]
+ str r1,[r12,#4]
+ str r2,[r12,#8]
+ str r3,[r12,#12]
+#else
+ mov r4,r0,lsr#24 @ write output in endian-neutral
+ mov r5,r0,lsr#16 @ manner...
+ mov r6,r0,lsr#8
+ strb r4,[r12,#0]
+ strb r5,[r12,#1]
+ mov r4,r1,lsr#24
+ strb r6,[r12,#2]
+ mov r5,r1,lsr#16
+ strb r0,[r12,#3]
+ mov r6,r1,lsr#8
+ strb r4,[r12,#4]
+ strb r5,[r12,#5]
+ mov r4,r2,lsr#24
+ strb r6,[r12,#6]
+ mov r5,r2,lsr#16
+ strb r1,[r12,#7]
+ mov r6,r2,lsr#8
+ strb r4,[r12,#8]
+ strb r5,[r12,#9]
+ mov r4,r3,lsr#24
+ strb r6,[r12,#10]
+ mov r5,r3,lsr#16
+ strb r2,[r12,#11]
+ mov r6,r3,lsr#8
+ strb r4,[r12,#12]
+ strb r5,[r12,#13]
+ strb r6,[r12,#14]
+ strb r3,[r12,#15]
+#endif
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r12,pc}
+#else
+ ldmia sp!,{r4-r12,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
+.size AES_encrypt,.-AES_encrypt
+
+.type _armv4_AES_encrypt,%function
+.align 2
+_armv4_AES_encrypt:
+ str lr,[sp,#-4]! @ push lr
+ ldmia r11!,{r4-r7}
+ eor r0,r0,r4
+ ldr r12,[r11,#240-16]
+ eor r1,r1,r5
+ eor r2,r2,r6
+ eor r3,r3,r7
+ sub r12,r12,#1
+ mov lr,#255
+
+ and r7,lr,r0
+ and r8,lr,r0,lsr#8
+ and r9,lr,r0,lsr#16
+ mov r0,r0,lsr#24
+.Lenc_loop:
+ ldr r4,[r10,r7,lsl#2] @ Te3[s0>>0]
+ and r7,lr,r1,lsr#16 @ i0
+ ldr r5,[r10,r8,lsl#2] @ Te2[s0>>8]
+ and r8,lr,r1
+ ldr r6,[r10,r9,lsl#2] @ Te1[s0>>16]
+ and r9,lr,r1,lsr#8
+ ldr r0,[r10,r0,lsl#2] @ Te0[s0>>24]
+ mov r1,r1,lsr#24
+
+ ldr r7,[r10,r7,lsl#2] @ Te1[s1>>16]
+ ldr r8,[r10,r8,lsl#2] @ Te3[s1>>0]
+ ldr r9,[r10,r9,lsl#2] @ Te2[s1>>8]
+ eor r0,r0,r7,ror#8
+ ldr r1,[r10,r1,lsl#2] @ Te0[s1>>24]
+ and r7,lr,r2,lsr#8 @ i0
+ eor r5,r5,r8,ror#8
+ and r8,lr,r2,lsr#16 @ i1
+ eor r6,r6,r9,ror#8
+ and r9,lr,r2
+ ldr r7,[r10,r7,lsl#2] @ Te2[s2>>8]
+ eor r1,r1,r4,ror#24
+ ldr r8,[r10,r8,lsl#2] @ Te1[s2>>16]
+ mov r2,r2,lsr#24
+
+ ldr r9,[r10,r9,lsl#2] @ Te3[s2>>0]
+ eor r0,r0,r7,ror#16
+ ldr r2,[r10,r2,lsl#2] @ Te0[s2>>24]
+ and r7,lr,r3 @ i0
+ eor r1,r1,r8,ror#8
+ and r8,lr,r3,lsr#8 @ i1
+ eor r6,r6,r9,ror#16
+ and r9,lr,r3,lsr#16 @ i2
+ ldr r7,[r10,r7,lsl#2] @ Te3[s3>>0]
+ eor r2,r2,r5,ror#16
+ ldr r8,[r10,r8,lsl#2] @ Te2[s3>>8]
+ mov r3,r3,lsr#24
+
+ ldr r9,[r10,r9,lsl#2] @ Te1[s3>>16]
+ eor r0,r0,r7,ror#24
+ ldr r7,[r11],#16
+ eor r1,r1,r8,ror#16
+ ldr r3,[r10,r3,lsl#2] @ Te0[s3>>24]
+ eor r2,r2,r9,ror#8
+ ldr r4,[r11,#-12]
+ eor r3,r3,r6,ror#8
+
+ ldr r5,[r11,#-8]
+ eor r0,r0,r7
+ ldr r6,[r11,#-4]
+ and r7,lr,r0
+ eor r1,r1,r4
+ and r8,lr,r0,lsr#8
+ eor r2,r2,r5
+ and r9,lr,r0,lsr#16
+ eor r3,r3,r6
+ mov r0,r0,lsr#24
+
+ subs r12,r12,#1
+ bne .Lenc_loop
+
+ add r10,r10,#2
+
+ ldrb r4,[r10,r7,lsl#2] @ Te4[s0>>0]
+ and r7,lr,r1,lsr#16 @ i0
+ ldrb r5,[r10,r8,lsl#2] @ Te4[s0>>8]
+ and r8,lr,r1
+ ldrb r6,[r10,r9,lsl#2] @ Te4[s0>>16]
+ and r9,lr,r1,lsr#8
+ ldrb r0,[r10,r0,lsl#2] @ Te4[s0>>24]
+ mov r1,r1,lsr#24
+
+ ldrb r7,[r10,r7,lsl#2] @ Te4[s1>>16]
+ ldrb r8,[r10,r8,lsl#2] @ Te4[s1>>0]
+ ldrb r9,[r10,r9,lsl#2] @ Te4[s1>>8]
+ eor r0,r7,r0,lsl#8
+ ldrb r1,[r10,r1,lsl#2] @ Te4[s1>>24]
+ and r7,lr,r2,lsr#8 @ i0
+ eor r5,r8,r5,lsl#8
+ and r8,lr,r2,lsr#16 @ i1
+ eor r6,r9,r6,lsl#8
+ and r9,lr,r2
+ ldrb r7,[r10,r7,lsl#2] @ Te4[s2>>8]
+ eor r1,r4,r1,lsl#24
+ ldrb r8,[r10,r8,lsl#2] @ Te4[s2>>16]
+ mov r2,r2,lsr#24
+
+ ldrb r9,[r10,r9,lsl#2] @ Te4[s2>>0]
+ eor r0,r7,r0,lsl#8
+ ldrb r2,[r10,r2,lsl#2] @ Te4[s2>>24]
+ and r7,lr,r3 @ i0
+ eor r1,r1,r8,lsl#16
+ and r8,lr,r3,lsr#8 @ i1
+ eor r6,r9,r6,lsl#8
+ and r9,lr,r3,lsr#16 @ i2
+ ldrb r7,[r10,r7,lsl#2] @ Te4[s3>>0]
+ eor r2,r5,r2,lsl#24
+ ldrb r8,[r10,r8,lsl#2] @ Te4[s3>>8]
+ mov r3,r3,lsr#24
+
+ ldrb r9,[r10,r9,lsl#2] @ Te4[s3>>16]
+ eor r0,r7,r0,lsl#8
+ ldr r7,[r11,#0]
+ ldrb r3,[r10,r3,lsl#2] @ Te4[s3>>24]
+ eor r1,r1,r8,lsl#8
+ ldr r4,[r11,#4]
+ eor r2,r2,r9,lsl#16
+ ldr r5,[r11,#8]
+ eor r3,r6,r3,lsl#24
+ ldr r6,[r11,#12]
+
+ eor r0,r0,r7
+ eor r1,r1,r4
+ eor r2,r2,r5
+ eor r3,r3,r6
+
+ sub r10,r10,#2
+ ldr pc,[sp],#4 @ pop and return
+.size _armv4_AES_encrypt,.-_armv4_AES_encrypt
+
+.global AES_set_encrypt_key
+.type AES_set_encrypt_key,%function
+.align 5
+AES_set_encrypt_key:
+_armv4_AES_set_encrypt_key:
+ sub r3,pc,#8 @ AES_set_encrypt_key
+ teq r0,#0
+ moveq r0,#-1
+ beq .Labrt
+ teq r2,#0
+ moveq r0,#-1
+ beq .Labrt
+
+ teq r1,#128
+ beq .Lok
+ teq r1,#192
+ beq .Lok
+ teq r1,#256
+ movne r0,#-1
+ bne .Labrt
+
+.Lok: stmdb sp!,{r4-r12,lr}
+ sub r10,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4
+
+ mov r12,r0 @ inp
+ mov lr,r1 @ bits
+ mov r11,r2 @ key
+
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+ ldrb r0,[r12,#3] @ load input data in endian-neutral
+ ldrb r4,[r12,#2] @ manner...
+ ldrb r5,[r12,#1]
+ ldrb r6,[r12,#0]
+ orr r0,r0,r4,lsl#8
+ ldrb r1,[r12,#7]
+ orr r0,r0,r5,lsl#16
+ ldrb r4,[r12,#6]
+ orr r0,r0,r6,lsl#24
+ ldrb r5,[r12,#5]
+ ldrb r6,[r12,#4]
+ orr r1,r1,r4,lsl#8
+ ldrb r2,[r12,#11]
+ orr r1,r1,r5,lsl#16
+ ldrb r4,[r12,#10]
+ orr r1,r1,r6,lsl#24
+ ldrb r5,[r12,#9]
+ ldrb r6,[r12,#8]
+ orr r2,r2,r4,lsl#8
+ ldrb r3,[r12,#15]
+ orr r2,r2,r5,lsl#16
+ ldrb r4,[r12,#14]
+ orr r2,r2,r6,lsl#24
+ ldrb r5,[r12,#13]
+ ldrb r6,[r12,#12]
+ orr r3,r3,r4,lsl#8
+ str r0,[r11],#16
+ orr r3,r3,r5,lsl#16
+ str r1,[r11,#-12]
+ orr r3,r3,r6,lsl#24
+ str r2,[r11,#-8]
+ str r3,[r11,#-4]
+#else
+ ldr r0,[r12,#0]
+ ldr r1,[r12,#4]
+ ldr r2,[r12,#8]
+ ldr r3,[r12,#12]
+#ifdef __ARMEL__
+ rev r0,r0
+ rev r1,r1
+ rev r2,r2
+ rev r3,r3
+#endif
+ str r0,[r11],#16
+ str r1,[r11,#-12]
+ str r2,[r11,#-8]
+ str r3,[r11,#-4]
+#endif
+
+ teq lr,#128
+ bne .Lnot128
+ mov r12,#10
+ str r12,[r11,#240-16]
+ add r6,r10,#256 @ rcon
+ mov lr,#255
+
+.L128_loop:
+ and r5,lr,r3,lsr#24
+ and r7,lr,r3,lsr#16
+ ldrb r5,[r10,r5]
+ and r8,lr,r3,lsr#8
+ ldrb r7,[r10,r7]
+ and r9,lr,r3
+ ldrb r8,[r10,r8]
+ orr r5,r5,r7,lsl#24
+ ldrb r9,[r10,r9]
+ orr r5,r5,r8,lsl#16
+ ldr r4,[r6],#4 @ rcon[i++]
+ orr r5,r5,r9,lsl#8
+ eor r5,r5,r4
+ eor r0,r0,r5 @ rk[4]=rk[0]^...
+ eor r1,r1,r0 @ rk[5]=rk[1]^rk[4]
+ str r0,[r11],#16
+ eor r2,r2,r1 @ rk[6]=rk[2]^rk[5]
+ str r1,[r11,#-12]
+ eor r3,r3,r2 @ rk[7]=rk[3]^rk[6]
+ str r2,[r11,#-8]
+ subs r12,r12,#1
+ str r3,[r11,#-4]
+ bne .L128_loop
+ sub r2,r11,#176
+ b .Ldone
+
+.Lnot128:
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+ ldrb r8,[r12,#19]
+ ldrb r4,[r12,#18]
+ ldrb r5,[r12,#17]
+ ldrb r6,[r12,#16]
+ orr r8,r8,r4,lsl#8
+ ldrb r9,[r12,#23]
+ orr r8,r8,r5,lsl#16
+ ldrb r4,[r12,#22]
+ orr r8,r8,r6,lsl#24
+ ldrb r5,[r12,#21]
+ ldrb r6,[r12,#20]
+ orr r9,r9,r4,lsl#8
+ orr r9,r9,r5,lsl#16
+ str r8,[r11],#8
+ orr r9,r9,r6,lsl#24
+ str r9,[r11,#-4]
+#else
+ ldr r8,[r12,#16]
+ ldr r9,[r12,#20]
+#ifdef __ARMEL__
+ rev r8,r8
+ rev r9,r9
+#endif
+ str r8,[r11],#8
+ str r9,[r11,#-4]
+#endif
+
+ teq lr,#192
+ bne .Lnot192
+ mov r12,#12
+ str r12,[r11,#240-24]
+ add r6,r10,#256 @ rcon
+ mov lr,#255
+ mov r12,#8
+
+.L192_loop:
+ and r5,lr,r9,lsr#24
+ and r7,lr,r9,lsr#16
+ ldrb r5,[r10,r5]
+ and r8,lr,r9,lsr#8
+ ldrb r7,[r10,r7]
+ and r9,lr,r9
+ ldrb r8,[r10,r8]
+ orr r5,r5,r7,lsl#24
+ ldrb r9,[r10,r9]
+ orr r5,r5,r8,lsl#16
+ ldr r4,[r6],#4 @ rcon[i++]
+ orr r5,r5,r9,lsl#8
+ eor r9,r5,r4
+ eor r0,r0,r9 @ rk[6]=rk[0]^...
+ eor r1,r1,r0 @ rk[7]=rk[1]^rk[6]
+ str r0,[r11],#24
+ eor r2,r2,r1 @ rk[8]=rk[2]^rk[7]
+ str r1,[r11,#-20]
+ eor r3,r3,r2 @ rk[9]=rk[3]^rk[8]
+ str r2,[r11,#-16]
+ subs r12,r12,#1
+ str r3,[r11,#-12]
+ subeq r2,r11,#216
+ beq .Ldone
+
+ ldr r7,[r11,#-32]
+ ldr r8,[r11,#-28]
+ eor r7,r7,r3 @ rk[10]=rk[4]^rk[9]
+ eor r9,r8,r7 @ rk[11]=rk[5]^rk[10]
+ str r7,[r11,#-8]
+ str r9,[r11,#-4]
+ b .L192_loop
+
+.Lnot192:
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+ ldrb r8,[r12,#27]
+ ldrb r4,[r12,#26]
+ ldrb r5,[r12,#25]
+ ldrb r6,[r12,#24]
+ orr r8,r8,r4,lsl#8
+ ldrb r9,[r12,#31]
+ orr r8,r8,r5,lsl#16
+ ldrb r4,[r12,#30]
+ orr r8,r8,r6,lsl#24
+ ldrb r5,[r12,#29]
+ ldrb r6,[r12,#28]
+ orr r9,r9,r4,lsl#8
+ orr r9,r9,r5,lsl#16
+ str r8,[r11],#8
+ orr r9,r9,r6,lsl#24
+ str r9,[r11,#-4]
+#else
+ ldr r8,[r12,#24]
+ ldr r9,[r12,#28]
+#ifdef __ARMEL__
+ rev r8,r8
+ rev r9,r9
+#endif
+ str r8,[r11],#8
+ str r9,[r11,#-4]
+#endif
+
+ mov r12,#14
+ str r12,[r11,#240-32]
+ add r6,r10,#256 @ rcon
+ mov lr,#255
+ mov r12,#7
+
+.L256_loop:
+ and r5,lr,r9,lsr#24
+ and r7,lr,r9,lsr#16
+ ldrb r5,[r10,r5]
+ and r8,lr,r9,lsr#8
+ ldrb r7,[r10,r7]
+ and r9,lr,r9
+ ldrb r8,[r10,r8]
+ orr r5,r5,r7,lsl#24
+ ldrb r9,[r10,r9]
+ orr r5,r5,r8,lsl#16
+ ldr r4,[r6],#4 @ rcon[i++]
+ orr r5,r5,r9,lsl#8
+ eor r9,r5,r4
+ eor r0,r0,r9 @ rk[8]=rk[0]^...
+ eor r1,r1,r0 @ rk[9]=rk[1]^rk[8]
+ str r0,[r11],#32
+ eor r2,r2,r1 @ rk[10]=rk[2]^rk[9]
+ str r1,[r11,#-28]
+ eor r3,r3,r2 @ rk[11]=rk[3]^rk[10]
+ str r2,[r11,#-24]
+ subs r12,r12,#1
+ str r3,[r11,#-20]
+ subeq r2,r11,#256
+ beq .Ldone
+
+ and r5,lr,r3
+ and r7,lr,r3,lsr#8
+ ldrb r5,[r10,r5]
+ and r8,lr,r3,lsr#16
+ ldrb r7,[r10,r7]
+ and r9,lr,r3,lsr#24
+ ldrb r8,[r10,r8]
+ orr r5,r5,r7,lsl#8
+ ldrb r9,[r10,r9]
+ orr r5,r5,r8,lsl#16
+ ldr r4,[r11,#-48]
+ orr r5,r5,r9,lsl#24
+
+ ldr r7,[r11,#-44]
+ ldr r8,[r11,#-40]
+ eor r4,r4,r5 @ rk[12]=rk[4]^...
+ ldr r9,[r11,#-36]
+ eor r7,r7,r4 @ rk[13]=rk[5]^rk[12]
+ str r4,[r11,#-16]
+ eor r8,r8,r7 @ rk[14]=rk[6]^rk[13]
+ str r7,[r11,#-12]
+ eor r9,r9,r8 @ rk[15]=rk[7]^rk[14]
+ str r8,[r11,#-8]
+ str r9,[r11,#-4]
+ b .L256_loop
+
+.Ldone: mov r0,#0
+ ldmia sp!,{r4-r12,lr}
+.Labrt: tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+.size AES_set_encrypt_key,.-AES_set_encrypt_key
+
+.global AES_set_decrypt_key
+.type AES_set_decrypt_key,%function
+.align 5
+AES_set_decrypt_key:
+ str lr,[sp,#-4]! @ push lr
+ bl _armv4_AES_set_encrypt_key
+ teq r0,#0
+ ldrne lr,[sp],#4 @ pop lr
+ bne .Labrt
+
+ stmdb sp!,{r4-r12}
+
+ ldr r12,[r2,#240] @ AES_set_encrypt_key preserves r2,
+ mov r11,r2 @ which is AES_KEY *key
+ mov r7,r2
+ add r8,r2,r12,lsl#4
+
+.Linv: ldr r0,[r7]
+ ldr r1,[r7,#4]
+ ldr r2,[r7,#8]
+ ldr r3,[r7,#12]
+ ldr r4,[r8]
+ ldr r5,[r8,#4]
+ ldr r6,[r8,#8]
+ ldr r9,[r8,#12]
+ str r0,[r8],#-16
+ str r1,[r8,#16+4]
+ str r2,[r8,#16+8]
+ str r3,[r8,#16+12]
+ str r4,[r7],#16
+ str r5,[r7,#-12]
+ str r6,[r7,#-8]
+ str r9,[r7,#-4]
+ teq r7,r8
+ bne .Linv
+ ldr r0,[r11,#16]! @ prefetch tp1
+ mov r7,#0x80
+ mov r8,#0x1b
+ orr r7,r7,#0x8000
+ orr r8,r8,#0x1b00
+ orr r7,r7,r7,lsl#16
+ orr r8,r8,r8,lsl#16
+ sub r12,r12,#1
+ mvn r9,r7
+ mov r12,r12,lsl#2 @ (rounds-1)*4
+
+.Lmix: and r4,r0,r7
+ and r1,r0,r9
+ sub r4,r4,r4,lsr#7
+ and r4,r4,r8
+ eor r1,r4,r1,lsl#1 @ tp2
+
+ and r4,r1,r7
+ and r2,r1,r9
+ sub r4,r4,r4,lsr#7
+ and r4,r4,r8
+ eor r2,r4,r2,lsl#1 @ tp4
+
+ and r4,r2,r7
+ and r3,r2,r9
+ sub r4,r4,r4,lsr#7
+ and r4,r4,r8
+ eor r3,r4,r3,lsl#1 @ tp8
+
+ eor r4,r1,r2
+ eor r5,r0,r3 @ tp9
+ eor r4,r4,r3 @ tpe
+ eor r4,r4,r1,ror#24
+ eor r4,r4,r5,ror#24 @ ^= ROTATE(tpb=tp9^tp2,8)
+ eor r4,r4,r2,ror#16
+ eor r4,r4,r5,ror#16 @ ^= ROTATE(tpd=tp9^tp4,16)
+ eor r4,r4,r5,ror#8 @ ^= ROTATE(tp9,24)
+
+ ldr r0,[r11,#4] @ prefetch tp1
+ str r4,[r11],#4
+ subs r12,r12,#1
+ bne .Lmix
+
+ mov r0,#0
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r12,pc}
+#else
+ ldmia sp!,{r4-r12,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
+.size AES_set_decrypt_key,.-AES_set_decrypt_key
+
+.type AES_Td,%object
+.align 5
+AES_Td:
+.word 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
+.word 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
+.word 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
+.word 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
+.word 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
+.word 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
+.word 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
+.word 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
+.word 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
+.word 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
+.word 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
+.word 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
+.word 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
+.word 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
+.word 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
+.word 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
+.word 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
+.word 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
+.word 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
+.word 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
+.word 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
+.word 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
+.word 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
+.word 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
+.word 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
+.word 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
+.word 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
+.word 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
+.word 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
+.word 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
+.word 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
+.word 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
+.word 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
+.word 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
+.word 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
+.word 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
+.word 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
+.word 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
+.word 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
+.word 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
+.word 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
+.word 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
+.word 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
+.word 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
+.word 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
+.word 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
+.word 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
+.word 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
+.word 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
+.word 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
+.word 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
+.word 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
+.word 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
+.word 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
+.word 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
+.word 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
+.word 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
+.word 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
+.word 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
+.word 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
+.word 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
+.word 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
+.word 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
+.word 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
+@ Td4[256]
+.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+.size AES_Td,.-AES_Td
+
+@ void AES_decrypt(const unsigned char *in, unsigned char *out,
+@ const AES_KEY *key) {
+.global AES_decrypt
+.type AES_decrypt,%function
+.align 5
+AES_decrypt:
+ sub r3,pc,#8 @ AES_decrypt
+ stmdb sp!,{r1,r4-r12,lr}
+ mov r12,r0 @ inp
+ mov r11,r2
+ sub r10,r3,#AES_decrypt-AES_Td @ Td
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+ ldrb r0,[r12,#3] @ load input data in endian-neutral
+ ldrb r4,[r12,#2] @ manner...
+ ldrb r5,[r12,#1]
+ ldrb r6,[r12,#0]
+ orr r0,r0,r4,lsl#8
+ ldrb r1,[r12,#7]
+ orr r0,r0,r5,lsl#16
+ ldrb r4,[r12,#6]
+ orr r0,r0,r6,lsl#24
+ ldrb r5,[r12,#5]
+ ldrb r6,[r12,#4]
+ orr r1,r1,r4,lsl#8
+ ldrb r2,[r12,#11]
+ orr r1,r1,r5,lsl#16
+ ldrb r4,[r12,#10]
+ orr r1,r1,r6,lsl#24
+ ldrb r5,[r12,#9]
+ ldrb r6,[r12,#8]
+ orr r2,r2,r4,lsl#8
+ ldrb r3,[r12,#15]
+ orr r2,r2,r5,lsl#16
+ ldrb r4,[r12,#14]
+ orr r2,r2,r6,lsl#24
+ ldrb r5,[r12,#13]
+ ldrb r6,[r12,#12]
+ orr r3,r3,r4,lsl#8
+ orr r3,r3,r5,lsl#16
+ orr r3,r3,r6,lsl#24
+#else
+ ldr r0,[r12,#0]
+ ldr r1,[r12,#4]
+ ldr r2,[r12,#8]
+ ldr r3,[r12,#12]
+#ifdef __ARMEL__
+ rev r0,r0
+ rev r1,r1
+ rev r2,r2
+ rev r3,r3
+#endif
+#endif
+ bl _armv4_AES_decrypt
+
+ ldr r12,[sp],#4 @ pop out
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+#ifdef __ARMEL__
+ rev r0,r0
+ rev r1,r1
+ rev r2,r2
+ rev r3,r3
+#endif
+ str r0,[r12,#0]
+ str r1,[r12,#4]
+ str r2,[r12,#8]
+ str r3,[r12,#12]
+#else
+ mov r4,r0,lsr#24 @ write output in endian-neutral
+ mov r5,r0,lsr#16 @ manner...
+ mov r6,r0,lsr#8
+ strb r4,[r12,#0]
+ strb r5,[r12,#1]
+ mov r4,r1,lsr#24
+ strb r6,[r12,#2]
+ mov r5,r1,lsr#16
+ strb r0,[r12,#3]
+ mov r6,r1,lsr#8
+ strb r4,[r12,#4]
+ strb r5,[r12,#5]
+ mov r4,r2,lsr#24
+ strb r6,[r12,#6]
+ mov r5,r2,lsr#16
+ strb r1,[r12,#7]
+ mov r6,r2,lsr#8
+ strb r4,[r12,#8]
+ strb r5,[r12,#9]
+ mov r4,r3,lsr#24
+ strb r6,[r12,#10]
+ mov r5,r3,lsr#16
+ strb r2,[r12,#11]
+ mov r6,r3,lsr#8
+ strb r4,[r12,#12]
+ strb r5,[r12,#13]
+ strb r6,[r12,#14]
+ strb r3,[r12,#15]
+#endif
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r12,pc}
+#else
+ ldmia sp!,{r4-r12,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
+.size AES_decrypt,.-AES_decrypt
+
+.type _armv4_AES_decrypt,%function
+.align 2
+_armv4_AES_decrypt:
+ str lr,[sp,#-4]! @ push lr
+ ldmia r11!,{r4-r7}
+ eor r0,r0,r4
+ ldr r12,[r11,#240-16]
+ eor r1,r1,r5
+ eor r2,r2,r6
+ eor r3,r3,r7
+ sub r12,r12,#1
+ mov lr,#255
+
+ and r7,lr,r0,lsr#16
+ and r8,lr,r0,lsr#8
+ and r9,lr,r0
+ mov r0,r0,lsr#24
+.Ldec_loop:
+ ldr r4,[r10,r7,lsl#2] @ Td1[s0>>16]
+ and r7,lr,r1 @ i0
+ ldr r5,[r10,r8,lsl#2] @ Td2[s0>>8]
+ and r8,lr,r1,lsr#16
+ ldr r6,[r10,r9,lsl#2] @ Td3[s0>>0]
+ and r9,lr,r1,lsr#8
+ ldr r0,[r10,r0,lsl#2] @ Td0[s0>>24]
+ mov r1,r1,lsr#24
+
+ ldr r7,[r10,r7,lsl#2] @ Td3[s1>>0]
+ ldr r8,[r10,r8,lsl#2] @ Td1[s1>>16]
+ ldr r9,[r10,r9,lsl#2] @ Td2[s1>>8]
+ eor r0,r0,r7,ror#24
+ ldr r1,[r10,r1,lsl#2] @ Td0[s1>>24]
+ and r7,lr,r2,lsr#8 @ i0
+ eor r5,r8,r5,ror#8
+ and r8,lr,r2 @ i1
+ eor r6,r9,r6,ror#8
+ and r9,lr,r2,lsr#16
+ ldr r7,[r10,r7,lsl#2] @ Td2[s2>>8]
+ eor r1,r1,r4,ror#8
+ ldr r8,[r10,r8,lsl#2] @ Td3[s2>>0]
+ mov r2,r2,lsr#24
+
+ ldr r9,[r10,r9,lsl#2] @ Td1[s2>>16]
+ eor r0,r0,r7,ror#16
+ ldr r2,[r10,r2,lsl#2] @ Td0[s2>>24]
+ and r7,lr,r3,lsr#16 @ i0
+ eor r1,r1,r8,ror#24
+ and r8,lr,r3,lsr#8 @ i1
+ eor r6,r9,r6,ror#8
+ and r9,lr,r3 @ i2
+ ldr r7,[r10,r7,lsl#2] @ Td1[s3>>16]
+ eor r2,r2,r5,ror#8
+ ldr r8,[r10,r8,lsl#2] @ Td2[s3>>8]
+ mov r3,r3,lsr#24
+
+ ldr r9,[r10,r9,lsl#2] @ Td3[s3>>0]
+ eor r0,r0,r7,ror#8
+ ldr r7,[r11],#16
+ eor r1,r1,r8,ror#16
+ ldr r3,[r10,r3,lsl#2] @ Td0[s3>>24]
+ eor r2,r2,r9,ror#24
+
+ ldr r4,[r11,#-12]
+ eor r0,r0,r7
+ ldr r5,[r11,#-8]
+ eor r3,r3,r6,ror#8
+ ldr r6,[r11,#-4]
+ and r7,lr,r0,lsr#16
+ eor r1,r1,r4
+ and r8,lr,r0,lsr#8
+ eor r2,r2,r5
+ and r9,lr,r0
+ eor r3,r3,r6
+ mov r0,r0,lsr#24
+
+ subs r12,r12,#1
+ bne .Ldec_loop
+
+ add r10,r10,#1024
+
+ ldr r5,[r10,#0] @ prefetch Td4
+ ldr r6,[r10,#32]
+ ldr r4,[r10,#64]
+ ldr r5,[r10,#96]
+ ldr r6,[r10,#128]
+ ldr r4,[r10,#160]
+ ldr r5,[r10,#192]
+ ldr r6,[r10,#224]
+
+ ldrb r0,[r10,r0] @ Td4[s0>>24]
+ ldrb r4,[r10,r7] @ Td4[s0>>16]
+ and r7,lr,r1 @ i0
+ ldrb r5,[r10,r8] @ Td4[s0>>8]
+ and r8,lr,r1,lsr#16
+ ldrb r6,[r10,r9] @ Td4[s0>>0]
+ and r9,lr,r1,lsr#8
+
+ ldrb r7,[r10,r7] @ Td4[s1>>0]
+ ldrb r1,[r10,r1,lsr#24] @ Td4[s1>>24]
+ ldrb r8,[r10,r8] @ Td4[s1>>16]
+ eor r0,r7,r0,lsl#24
+ ldrb r9,[r10,r9] @ Td4[s1>>8]
+ eor r1,r4,r1,lsl#8
+ and r7,lr,r2,lsr#8 @ i0
+ eor r5,r5,r8,lsl#8
+ and r8,lr,r2 @ i1
+ ldrb r7,[r10,r7] @ Td4[s2>>8]
+ eor r6,r6,r9,lsl#8
+ ldrb r8,[r10,r8] @ Td4[s2>>0]
+ and r9,lr,r2,lsr#16
+
+ ldrb r2,[r10,r2,lsr#24] @ Td4[s2>>24]
+ eor r0,r0,r7,lsl#8
+ ldrb r9,[r10,r9] @ Td4[s2>>16]
+ eor r1,r8,r1,lsl#16
+ and r7,lr,r3,lsr#16 @ i0
+ eor r2,r5,r2,lsl#16
+ and r8,lr,r3,lsr#8 @ i1
+ ldrb r7,[r10,r7] @ Td4[s3>>16]
+ eor r6,r6,r9,lsl#16
+ ldrb r8,[r10,r8] @ Td4[s3>>8]
+ and r9,lr,r3 @ i2
+
+ ldrb r9,[r10,r9] @ Td4[s3>>0]
+ ldrb r3,[r10,r3,lsr#24] @ Td4[s3>>24]
+ eor r0,r0,r7,lsl#16
+ ldr r7,[r11,#0]
+ eor r1,r1,r8,lsl#8
+ ldr r4,[r11,#4]
+ eor r2,r9,r2,lsl#8
+ ldr r5,[r11,#8]
+ eor r3,r6,r3,lsl#24
+ ldr r6,[r11,#12]
+
+ eor r0,r0,r7
+ eor r1,r1,r4
+ eor r2,r2,r5
+ eor r3,r3,r6
+
+ sub r10,r10,#1024
+ ldr pc,[sp],#4 @ pop and return
+.size _armv4_AES_decrypt,.-_armv4_AES_decrypt
+.asciz "AES for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
+.align 2
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/ext/libressl/crypto/aes/aes-elf-x86_64.S b/ext/libressl/crypto/aes/aes-elf-x86_64.S
new file mode 100644
index 0000000..83c0053
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes-elf-x86_64.S
@@ -0,0 +1,2547 @@
+#include "x86_arch.h"
+
+.text
+.type _x86_64_AES_encrypt,@function
+.align 16
+_x86_64_AES_encrypt:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+
+ movl 240(%r15),%r13d
+ subl $1,%r13d
+ jmp .Lenc_loop
+.align 16
+.Lenc_loop:
+
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movl 0(%r14,%rsi,8),%r10d
+ movl 0(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r12d
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl %dl,%ebp
+ xorl 3(%r14,%rsi,8),%r10d
+ xorl 3(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r8d
+
+ movzbl %dh,%esi
+ shrl $16,%ecx
+ movzbl %ah,%ebp
+ xorl 3(%r14,%rsi,8),%r12d
+ shrl $16,%edx
+ xorl 3(%r14,%rbp,8),%r8d
+
+ shrl $16,%ebx
+ leaq 16(%r15),%r15
+ shrl $16,%eax
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ xorl 2(%r14,%rsi,8),%r10d
+ xorl 2(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r12d
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl %bl,%ebp
+ xorl 1(%r14,%rsi,8),%r10d
+ xorl 1(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r8d
+
+ movl 12(%r15),%edx
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movl 0(%r15),%eax
+ xorl 1(%r14,%rdi,8),%r12d
+ xorl 1(%r14,%rbp,8),%r8d
+
+ movl 4(%r15),%ebx
+ movl 8(%r15),%ecx
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ subl $1,%r13d
+ jnz .Lenc_loop
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movzbl 2(%r14,%rsi,8),%r10d
+ movzbl 2(%r14,%rdi,8),%r11d
+ movzbl 2(%r14,%rbp,8),%r12d
+
+ movzbl %dl,%esi
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movzbl 2(%r14,%rsi,8),%r8d
+ movl 0(%r14,%rdi,8),%edi
+ movl 0(%r14,%rbp,8),%ebp
+
+ andl $65280,%edi
+ andl $65280,%ebp
+
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+ shrl $16,%ecx
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ shrl $16,%edx
+ movl 0(%r14,%rsi,8),%esi
+ movl 0(%r14,%rdi,8),%edi
+
+ andl $65280,%esi
+ andl $65280,%edi
+ shrl $16,%ebx
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+ shrl $16,%eax
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ movl 0(%r14,%rsi,8),%esi
+ movl 0(%r14,%rdi,8),%edi
+ movl 0(%r14,%rbp,8),%ebp
+
+ andl $16711680,%esi
+ andl $16711680,%edi
+ andl $16711680,%ebp
+
+ xorl %esi,%r10d
+ xorl %edi,%r11d
+ xorl %ebp,%r12d
+
+ movzbl %bl,%esi
+ movzbl %dh,%edi
+ movzbl %ah,%ebp
+ movl 0(%r14,%rsi,8),%esi
+ movl 2(%r14,%rdi,8),%edi
+ movl 2(%r14,%rbp,8),%ebp
+
+ andl $16711680,%esi
+ andl $4278190080,%edi
+ andl $4278190080,%ebp
+
+ xorl %esi,%r8d
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movl 16+12(%r15),%edx
+ movl 2(%r14,%rsi,8),%esi
+ movl 2(%r14,%rdi,8),%edi
+ movl 16+0(%r15),%eax
+
+ andl $4278190080,%esi
+ andl $4278190080,%edi
+
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+
+ movl 16+4(%r15),%ebx
+ movl 16+8(%r15),%ecx
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ retq
+.size _x86_64_AES_encrypt,.-_x86_64_AES_encrypt
+.type _x86_64_AES_encrypt_compact,@function
+.align 16
+_x86_64_AES_encrypt_compact:
+ leaq 128(%r14),%r8
+ movl 0-128(%r8),%edi
+ movl 32-128(%r8),%ebp
+ movl 64-128(%r8),%r10d
+ movl 96-128(%r8),%r11d
+ movl 128-128(%r8),%edi
+ movl 160-128(%r8),%ebp
+ movl 192-128(%r8),%r10d
+ movl 224-128(%r8),%r11d
+ jmp .Lenc_loop_compact
+.align 16
+.Lenc_loop_compact:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ leaq 16(%r15),%r15
+ movzbl %al,%r10d
+ movzbl %bl,%r11d
+ movzbl %cl,%r12d
+ movzbl (%r14,%r10,1),%r10d
+ movzbl (%r14,%r11,1),%r11d
+ movzbl (%r14,%r12,1),%r12d
+
+ movzbl %dl,%r8d
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl (%r14,%r8,1),%r8d
+ movzbl (%r14,%rsi,1),%r9d
+ movzbl (%r14,%rdi,1),%r13d
+
+ movzbl %dh,%ebp
+ movzbl %ah,%esi
+ shrl $16,%ecx
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ shrl $16,%edx
+
+ movzbl %cl,%edi
+ shll $8,%r9d
+ shll $8,%r13d
+ movzbl (%r14,%rdi,1),%edi
+ xorl %r9d,%r10d
+ xorl %r13d,%r11d
+
+ movzbl %dl,%r9d
+ shrl $16,%eax
+ shrl $16,%ebx
+ movzbl %al,%r13d
+ shll $8,%ebp
+ shll $8,%esi
+ movzbl (%r14,%r9,1),%r9d
+ movzbl (%r14,%r13,1),%r13d
+ xorl %ebp,%r12d
+ xorl %esi,%r8d
+
+ movzbl %bl,%ebp
+ movzbl %dh,%esi
+ shll $16,%edi
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ xorl %edi,%r10d
+
+ movzbl %ah,%edi
+ shrl $8,%ecx
+ shrl $8,%ebx
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rcx,1),%edx
+ movzbl (%r14,%rbx,1),%ecx
+ shll $16,%r9d
+ shll $16,%r13d
+ shll $16,%ebp
+ xorl %r9d,%r11d
+ xorl %r13d,%r12d
+ xorl %ebp,%r8d
+
+ shll $24,%esi
+ shll $24,%edi
+ shll $24,%edx
+ xorl %esi,%r10d
+ shll $24,%ecx
+ xorl %edi,%r11d
+ movl %r10d,%eax
+ movl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ cmpq 16(%rsp),%r15
+ je .Lenc_compact_done
+ movl %eax,%esi
+ movl %ebx,%edi
+ andl $2155905152,%esi
+ andl $2155905152,%edi
+ movl %esi,%r10d
+ movl %edi,%r11d
+ shrl $7,%r10d
+ leal (%rax,%rax,1),%r8d
+ shrl $7,%r11d
+ leal (%rbx,%rbx,1),%r9d
+ subl %r10d,%esi
+ subl %r11d,%edi
+ andl $4278124286,%r8d
+ andl $4278124286,%r9d
+ andl $454761243,%esi
+ andl $454761243,%edi
+ movl %eax,%r10d
+ movl %ebx,%r11d
+ xorl %esi,%r8d
+ xorl %edi,%r9d
+
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movl %ecx,%esi
+ movl %edx,%edi
+ roll $24,%eax
+ roll $24,%ebx
+ andl $2155905152,%esi
+ andl $2155905152,%edi
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movl %esi,%r12d
+ movl %edi,%ebp
+ rorl $16,%r10d
+ rorl $16,%r11d
+ shrl $7,%r12d
+ leal (%rcx,%rcx,1),%r8d
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ shrl $7,%ebp
+ leal (%rdx,%rdx,1),%r9d
+ rorl $8,%r10d
+ rorl $8,%r11d
+ subl %r12d,%esi
+ subl %ebp,%edi
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+
+ andl $4278124286,%r8d
+ andl $4278124286,%r9d
+ andl $454761243,%esi
+ andl $454761243,%edi
+ movl %ecx,%r12d
+ movl %edx,%ebp
+ xorl %esi,%r8d
+ xorl %edi,%r9d
+
+ xorl %r8d,%ecx
+ xorl %r9d,%edx
+ roll $24,%ecx
+ roll $24,%edx
+ xorl %r8d,%ecx
+ xorl %r9d,%edx
+ movl 0(%r14),%esi
+ rorl $16,%r12d
+ rorl $16,%ebp
+ movl 64(%r14),%edi
+ xorl %r12d,%ecx
+ xorl %ebp,%edx
+ movl 128(%r14),%r8d
+ rorl $8,%r12d
+ rorl $8,%ebp
+ movl 192(%r14),%r9d
+ xorl %r12d,%ecx
+ xorl %ebp,%edx
+ jmp .Lenc_loop_compact
+.align 16
+.Lenc_compact_done:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ retq
+.size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact
+.globl AES_encrypt
+.type AES_encrypt,@function
+.align 16
+.globl asm_AES_encrypt
+.hidden asm_AES_encrypt
+asm_AES_encrypt:
+AES_encrypt:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+
+ movq %rsp,%r10
+ leaq -63(%rdx),%rcx
+ andq $-64,%rsp
+ subq %rsp,%rcx
+ negq %rcx
+ andq $960,%rcx
+ subq %rcx,%rsp
+ subq $32,%rsp
+
+ movq %rsi,16(%rsp)
+ movq %r10,24(%rsp)
+.Lenc_prologue:
+
+ movq %rdx,%r15
+ movl 240(%r15),%r13d
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+
+ shll $4,%r13d
+ leaq (%r15,%r13,1),%rbp
+ movq %r15,(%rsp)
+ movq %rbp,8(%rsp)
+
+
+ leaq .LAES_Te+2048(%rip),%r14
+ leaq 768(%rsp),%rbp
+ subq %r14,%rbp
+ andq $768,%rbp
+ leaq (%r14,%rbp,1),%r14
+
+ call _x86_64_AES_encrypt_compact
+
+ movq 16(%rsp),%r9
+ movq 24(%rsp),%rsi
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lenc_epilogue:
+ retq
+.size AES_encrypt,.-AES_encrypt
+.type _x86_64_AES_decrypt,@function
+.align 16
+_x86_64_AES_decrypt:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+
+ movl 240(%r15),%r13d
+ subl $1,%r13d
+ jmp .Ldec_loop
+.align 16
+.Ldec_loop:
+
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movl 0(%r14,%rsi,8),%r10d
+ movl 0(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r12d
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl %dl,%ebp
+ xorl 3(%r14,%rsi,8),%r10d
+ xorl 3(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r8d
+
+ movzbl %bh,%esi
+ shrl $16,%eax
+ movzbl %ch,%ebp
+ xorl 3(%r14,%rsi,8),%r12d
+ shrl $16,%edx
+ xorl 3(%r14,%rbp,8),%r8d
+
+ shrl $16,%ebx
+ leaq 16(%r15),%r15
+ shrl $16,%ecx
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ xorl 2(%r14,%rsi,8),%r10d
+ xorl 2(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r12d
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl %bl,%ebp
+ xorl 1(%r14,%rsi,8),%r10d
+ xorl 1(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r8d
+
+ movzbl %dh,%esi
+ movl 12(%r15),%edx
+ movzbl %ah,%ebp
+ xorl 1(%r14,%rsi,8),%r12d
+ movl 0(%r15),%eax
+ xorl 1(%r14,%rbp,8),%r8d
+
+ xorl %r10d,%eax
+ movl 4(%r15),%ebx
+ movl 8(%r15),%ecx
+ xorl %r12d,%ecx
+ xorl %r11d,%ebx
+ xorl %r8d,%edx
+ subl $1,%r13d
+ jnz .Ldec_loop
+ leaq 2048(%r14),%r14
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movzbl (%r14,%rsi,1),%r10d
+ movzbl (%r14,%rdi,1),%r11d
+ movzbl (%r14,%rbp,1),%r12d
+
+ movzbl %dl,%esi
+ movzbl %dh,%edi
+ movzbl %ah,%ebp
+ movzbl (%r14,%rsi,1),%r8d
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+
+ shll $8,%edi
+ shll $8,%ebp
+
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+ shrl $16,%edx
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ shrl $16,%eax
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+
+ shll $8,%esi
+ shll $8,%edi
+ shrl $16,%ebx
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+ shrl $16,%ecx
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+
+ shll $16,%esi
+ shll $16,%edi
+ shll $16,%ebp
+
+ xorl %esi,%r10d
+ xorl %edi,%r11d
+ xorl %ebp,%r12d
+
+ movzbl %bl,%esi
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+
+ shll $16,%esi
+ shll $24,%edi
+ shll $24,%ebp
+
+ xorl %esi,%r8d
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movl 16+12(%r15),%edx
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movl 16+0(%r15),%eax
+
+ shll $24,%esi
+ shll $24,%edi
+
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+
+ movl 16+4(%r15),%ebx
+ movl 16+8(%r15),%ecx
+ leaq -2048(%r14),%r14
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ retq
+.size _x86_64_AES_decrypt,.-_x86_64_AES_decrypt
+.type _x86_64_AES_decrypt_compact,@function
+.align 16
+_x86_64_AES_decrypt_compact:
+ leaq 128(%r14),%r8
+ movl 0-128(%r8),%edi
+ movl 32-128(%r8),%ebp
+ movl 64-128(%r8),%r10d
+ movl 96-128(%r8),%r11d
+ movl 128-128(%r8),%edi
+ movl 160-128(%r8),%ebp
+ movl 192-128(%r8),%r10d
+ movl 224-128(%r8),%r11d
+ jmp .Ldec_loop_compact
+
+.align 16
+.Ldec_loop_compact:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ leaq 16(%r15),%r15
+ movzbl %al,%r10d
+ movzbl %bl,%r11d
+ movzbl %cl,%r12d
+ movzbl (%r14,%r10,1),%r10d
+ movzbl (%r14,%r11,1),%r11d
+ movzbl (%r14,%r12,1),%r12d
+
+ movzbl %dl,%r8d
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl (%r14,%r8,1),%r8d
+ movzbl (%r14,%rsi,1),%r9d
+ movzbl (%r14,%rdi,1),%r13d
+
+ movzbl %bh,%ebp
+ movzbl %ch,%esi
+ shrl $16,%ecx
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ shrl $16,%edx
+
+ movzbl %cl,%edi
+ shll $8,%r9d
+ shll $8,%r13d
+ movzbl (%r14,%rdi,1),%edi
+ xorl %r9d,%r10d
+ xorl %r13d,%r11d
+
+ movzbl %dl,%r9d
+ shrl $16,%eax
+ shrl $16,%ebx
+ movzbl %al,%r13d
+ shll $8,%ebp
+ shll $8,%esi
+ movzbl (%r14,%r9,1),%r9d
+ movzbl (%r14,%r13,1),%r13d
+ xorl %ebp,%r12d
+ xorl %esi,%r8d
+
+ movzbl %bl,%ebp
+ movzbl %bh,%esi
+ shll $16,%edi
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ xorl %edi,%r10d
+
+ movzbl %ch,%edi
+ shll $16,%r9d
+ shll $16,%r13d
+ movzbl (%r14,%rdi,1),%ebx
+ xorl %r9d,%r11d
+ xorl %r13d,%r12d
+
+ movzbl %dh,%edi
+ shrl $8,%eax
+ shll $16,%ebp
+ movzbl (%r14,%rdi,1),%ecx
+ movzbl (%r14,%rax,1),%edx
+ xorl %ebp,%r8d
+
+ shll $24,%esi
+ shll $24,%ebx
+ shll $24,%ecx
+ xorl %esi,%r10d
+ shll $24,%edx
+ xorl %r11d,%ebx
+ movl %r10d,%eax
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ cmpq 16(%rsp),%r15
+ je .Ldec_compact_done
+
+ movq 256+0(%r14),%rsi
+ shlq $32,%rbx
+ shlq $32,%rdx
+ movq 256+8(%r14),%rdi
+ orq %rbx,%rax
+ orq %rdx,%rcx
+ movq 256+16(%r14),%rbp
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+ shrq $7,%r9
+ leaq (%rax,%rax,1),%r8
+ shrq $7,%r12
+ leaq (%rcx,%rcx,1),%r11
+ subq %r9,%rbx
+ subq %r12,%rdx
+ andq %rdi,%r8
+ andq %rdi,%r11
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r8,%rbx
+ xorq %r11,%rdx
+ movq %rbx,%r8
+ movq %rdx,%r11
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ leaq (%r8,%r8,1),%r9
+ shrq $7,%r13
+ leaq (%r11,%r11,1),%r12
+ subq %r10,%rbx
+ subq %r13,%rdx
+ andq %rdi,%r9
+ andq %rdi,%r12
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r9,%rbx
+ xorq %r12,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ xorq %rax,%r8
+ shrq $7,%r13
+ xorq %rcx,%r11
+ subq %r10,%rbx
+ subq %r13,%rdx
+ leaq (%r9,%r9,1),%r10
+ leaq (%r12,%r12,1),%r13
+ xorq %rax,%r9
+ xorq %rcx,%r12
+ andq %rdi,%r10
+ andq %rdi,%r13
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %rbx,%r10
+ xorq %rdx,%r13
+
+ xorq %r10,%rax
+ xorq %r13,%rcx
+ xorq %r10,%r8
+ xorq %r13,%r11
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ xorq %r10,%r9
+ xorq %r13,%r12
+ shrq $32,%rbx
+ shrq $32,%rdx
+ xorq %r8,%r10
+ xorq %r11,%r13
+ roll $8,%eax
+ roll $8,%ecx
+ xorq %r9,%r10
+ xorq %r12,%r13
+
+ roll $8,%ebx
+ roll $8,%edx
+ xorl %r10d,%eax
+ xorl %r13d,%ecx
+ shrq $32,%r10
+ shrq $32,%r13
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+ movq %r8,%r10
+ movq %r11,%r13
+ shrq $32,%r10
+ shrq $32,%r13
+ roll $24,%r8d
+ roll $24,%r11d
+ roll $24,%r10d
+ roll $24,%r13d
+ xorl %r8d,%eax
+ xorl %r11d,%ecx
+ movq %r9,%r8
+ movq %r12,%r11
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+ movq 0(%r14),%rsi
+ shrq $32,%r8
+ shrq $32,%r11
+ movq 64(%r14),%rdi
+ roll $16,%r9d
+ roll $16,%r12d
+ movq 128(%r14),%rbp
+ roll $16,%r8d
+ roll $16,%r11d
+ movq 192(%r14),%r10
+ xorl %r9d,%eax
+ xorl %r12d,%ecx
+ movq 256(%r14),%r13
+ xorl %r8d,%ebx
+ xorl %r11d,%edx
+ jmp .Ldec_loop_compact
+.align 16
+.Ldec_compact_done:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ retq
+.size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact
+.globl AES_decrypt
+.type AES_decrypt,@function
+.align 16
+.globl asm_AES_decrypt
+.hidden asm_AES_decrypt
+asm_AES_decrypt:
+AES_decrypt:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+
+ movq %rsp,%r10
+ leaq -63(%rdx),%rcx
+ andq $-64,%rsp
+ subq %rsp,%rcx
+ negq %rcx
+ andq $960,%rcx
+ subq %rcx,%rsp
+ subq $32,%rsp
+
+ movq %rsi,16(%rsp)
+ movq %r10,24(%rsp)
+.Ldec_prologue:
+
+ movq %rdx,%r15
+ movl 240(%r15),%r13d
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+
+ shll $4,%r13d
+ leaq (%r15,%r13,1),%rbp
+ movq %r15,(%rsp)
+ movq %rbp,8(%rsp)
+
+
+ leaq .LAES_Td+2048(%rip),%r14
+ leaq 768(%rsp),%rbp
+ subq %r14,%rbp
+ andq $768,%rbp
+ leaq (%r14,%rbp,1),%r14
+ shrq $3,%rbp
+ addq %rbp,%r14
+
+ call _x86_64_AES_decrypt_compact
+
+ movq 16(%rsp),%r9
+ movq 24(%rsp),%rsi
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Ldec_epilogue:
+ retq
+.size AES_decrypt,.-AES_decrypt
+.globl AES_set_encrypt_key
+.type AES_set_encrypt_key,@function
+.align 16
+AES_set_encrypt_key:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $8,%rsp
+.Lenc_key_prologue:
+
+ call _x86_64_AES_set_encrypt_key
+
+ movq 8(%rsp),%r15
+ movq 16(%rsp),%r14
+ movq 24(%rsp),%r13
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%rbp
+ movq 48(%rsp),%rbx
+ addq $56,%rsp
+.Lenc_key_epilogue:
+ retq
+.size AES_set_encrypt_key,.-AES_set_encrypt_key
+
+.type _x86_64_AES_set_encrypt_key,@function
+.align 16
+_x86_64_AES_set_encrypt_key:
+ movl %esi,%ecx
+ movq %rdi,%rsi
+ movq %rdx,%rdi
+
+ testq $-1,%rsi
+ jz .Lbadpointer
+ testq $-1,%rdi
+ jz .Lbadpointer
+
+ leaq .LAES_Te(%rip),%rbp
+ leaq 2048+128(%rbp),%rbp
+
+
+ movl 0-128(%rbp),%eax
+ movl 32-128(%rbp),%ebx
+ movl 64-128(%rbp),%r8d
+ movl 96-128(%rbp),%edx
+ movl 128-128(%rbp),%eax
+ movl 160-128(%rbp),%ebx
+ movl 192-128(%rbp),%r8d
+ movl 224-128(%rbp),%edx
+
+ cmpl $128,%ecx
+ je .L10rounds
+ cmpl $192,%ecx
+ je .L12rounds
+ cmpl $256,%ecx
+ je .L14rounds
+ movq $-2,%rax
+ jmp .Lexit
+
+.L10rounds:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rdx,8(%rdi)
+
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp .L10shortcut
+.align 4
+.L10loop:
+ movl 0(%rdi),%eax
+ movl 12(%rdi),%edx
+.L10shortcut:
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,16(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,20(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,24(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,28(%rdi)
+ addl $1,%ecx
+ leaq 16(%rdi),%rdi
+ cmpl $10,%ecx
+ jl .L10loop
+
+ movl $10,80(%rdi)
+ xorq %rax,%rax
+ jmp .Lexit
+
+.L12rounds:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 16(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rdx,16(%rdi)
+
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp .L12shortcut
+.align 4
+.L12loop:
+ movl 0(%rdi),%eax
+ movl 20(%rdi),%edx
+.L12shortcut:
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,24(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,28(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,32(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,36(%rdi)
+
+ cmpl $7,%ecx
+ je .L12break
+ addl $1,%ecx
+
+ xorl 16(%rdi),%eax
+ movl %eax,40(%rdi)
+ xorl 20(%rdi),%eax
+ movl %eax,44(%rdi)
+
+ leaq 24(%rdi),%rdi
+ jmp .L12loop
+.L12break:
+ movl $12,72(%rdi)
+ xorq %rax,%rax
+ jmp .Lexit
+
+.L14rounds:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 16(%rsi),%rcx
+ movq 24(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,16(%rdi)
+ movq %rdx,24(%rdi)
+
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp .L14shortcut
+.align 4
+.L14loop:
+ movl 0(%rdi),%eax
+ movl 28(%rdi),%edx
+.L14shortcut:
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,32(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,36(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,40(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,44(%rdi)
+
+ cmpl $6,%ecx
+ je .L14break
+ addl $1,%ecx
+
+ movl %eax,%edx
+ movl 16(%rdi),%eax
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ shll $8,%ebx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movl %eax,48(%rdi)
+ xorl 20(%rdi),%eax
+ movl %eax,52(%rdi)
+ xorl 24(%rdi),%eax
+ movl %eax,56(%rdi)
+ xorl 28(%rdi),%eax
+ movl %eax,60(%rdi)
+
+ leaq 32(%rdi),%rdi
+ jmp .L14loop
+.L14break:
+ movl $14,48(%rdi)
+ xorq %rax,%rax
+ jmp .Lexit
+
+.Lbadpointer:
+ movq $-1,%rax
+.Lexit:
+ retq
+.size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key
+.globl AES_set_decrypt_key
+.type AES_set_decrypt_key,@function
+.align 16
+AES_set_decrypt_key:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rdx
+.Ldec_key_prologue:
+
+ call _x86_64_AES_set_encrypt_key
+ movq (%rsp),%r8
+ cmpl $0,%eax
+ jne .Labort
+
+ movl 240(%r8),%r14d
+ xorq %rdi,%rdi
+ leaq (%rdi,%r14,4),%rcx
+ movq %r8,%rsi
+ leaq (%r8,%rcx,4),%rdi
+.align 4
+.Linvert:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 0(%rdi),%rcx
+ movq 8(%rdi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,0(%rsi)
+ movq %rdx,8(%rsi)
+ leaq 16(%rsi),%rsi
+ leaq -16(%rdi),%rdi
+ cmpq %rsi,%rdi
+ jne .Linvert
+
+ leaq .LAES_Te+2048+1024(%rip),%rax
+
+ movq 40(%rax),%rsi
+ movq 48(%rax),%rdi
+ movq 56(%rax),%rbp
+
+ movq %r8,%r15
+ subl $1,%r14d
+.align 4
+.Lpermute:
+ leaq 16(%r15),%r15
+ movq 0(%r15),%rax
+ movq 8(%r15),%rcx
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+ shrq $7,%r9
+ leaq (%rax,%rax,1),%r8
+ shrq $7,%r12
+ leaq (%rcx,%rcx,1),%r11
+ subq %r9,%rbx
+ subq %r12,%rdx
+ andq %rdi,%r8
+ andq %rdi,%r11
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r8,%rbx
+ xorq %r11,%rdx
+ movq %rbx,%r8
+ movq %rdx,%r11
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ leaq (%r8,%r8,1),%r9
+ shrq $7,%r13
+ leaq (%r11,%r11,1),%r12
+ subq %r10,%rbx
+ subq %r13,%rdx
+ andq %rdi,%r9
+ andq %rdi,%r12
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r9,%rbx
+ xorq %r12,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ xorq %rax,%r8
+ shrq $7,%r13
+ xorq %rcx,%r11
+ subq %r10,%rbx
+ subq %r13,%rdx
+ leaq (%r9,%r9,1),%r10
+ leaq (%r12,%r12,1),%r13
+ xorq %rax,%r9
+ xorq %rcx,%r12
+ andq %rdi,%r10
+ andq %rdi,%r13
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %rbx,%r10
+ xorq %rdx,%r13
+
+ xorq %r10,%rax
+ xorq %r13,%rcx
+ xorq %r10,%r8
+ xorq %r13,%r11
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ xorq %r10,%r9
+ xorq %r13,%r12
+ shrq $32,%rbx
+ shrq $32,%rdx
+ xorq %r8,%r10
+ xorq %r11,%r13
+ roll $8,%eax
+ roll $8,%ecx
+ xorq %r9,%r10
+ xorq %r12,%r13
+
+ roll $8,%ebx
+ roll $8,%edx
+ xorl %r10d,%eax
+ xorl %r13d,%ecx
+ shrq $32,%r10
+ shrq $32,%r13
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+ movq %r8,%r10
+ movq %r11,%r13
+ shrq $32,%r10
+ shrq $32,%r13
+ roll $24,%r8d
+ roll $24,%r11d
+ roll $24,%r10d
+ roll $24,%r13d
+ xorl %r8d,%eax
+ xorl %r11d,%ecx
+ movq %r9,%r8
+ movq %r12,%r11
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+
+ shrq $32,%r8
+ shrq $32,%r11
+
+ roll $16,%r9d
+ roll $16,%r12d
+
+ roll $16,%r8d
+ roll $16,%r11d
+
+ xorl %r9d,%eax
+ xorl %r12d,%ecx
+
+ xorl %r8d,%ebx
+ xorl %r11d,%edx
+ movl %eax,0(%r15)
+ movl %ebx,4(%r15)
+ movl %ecx,8(%r15)
+ movl %edx,12(%r15)
+ subl $1,%r14d
+ jnz .Lpermute
+
+ xorq %rax,%rax
+.Labort:
+ movq 8(%rsp),%r15
+ movq 16(%rsp),%r14
+ movq 24(%rsp),%r13
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%rbp
+ movq 48(%rsp),%rbx
+ addq $56,%rsp
+.Ldec_key_epilogue:
+ retq
+.size AES_set_decrypt_key,.-AES_set_decrypt_key
+.globl AES_cbc_encrypt
+.type AES_cbc_encrypt,@function
+.align 16
+
+.hidden OPENSSL_ia32cap_P
+.globl asm_AES_cbc_encrypt
+.hidden asm_AES_cbc_encrypt
+asm_AES_cbc_encrypt:
+AES_cbc_encrypt:
+ cmpq $0,%rdx
+ je .Lcbc_epilogue
+ pushfq
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+.Lcbc_prologue:
+
+ cld
+ movl %r9d,%r9d
+
+ leaq .LAES_Te(%rip),%r14
+ cmpq $0,%r9
+ jne .Lcbc_picked_te
+ leaq .LAES_Td(%rip),%r14
+.Lcbc_picked_te:
+
+ movl OPENSSL_ia32cap_P(%rip),%r10d
+ cmpq $512,%rdx
+ jb .Lcbc_slow_prologue
+ testq $15,%rdx
+ jnz .Lcbc_slow_prologue
+ btl $IA32CAP_BIT0_HT,%r10d
+ jc .Lcbc_slow_prologue
+
+
+ leaq -88-248(%rsp),%r15
+ andq $-64,%r15
+
+
+ movq %r14,%r10
+ leaq 2304(%r14),%r11
+ movq %r15,%r12
+ andq $4095,%r10
+ andq $4095,%r11
+ andq $4095,%r12
+
+ cmpq %r11,%r12
+ jb .Lcbc_te_break_out
+ subq %r11,%r12
+ subq %r12,%r15
+ jmp .Lcbc_te_ok
+.Lcbc_te_break_out:
+ subq %r10,%r12
+ andq $4095,%r12
+ addq $320,%r12
+ subq %r12,%r15
+.align 4
+.Lcbc_te_ok:
+
+ xchgq %rsp,%r15
+
+ movq %r15,16(%rsp)
+.Lcbc_fast_body:
+ movq %rdi,24(%rsp)
+ movq %rsi,32(%rsp)
+ movq %rdx,40(%rsp)
+ movq %rcx,48(%rsp)
+ movq %r8,56(%rsp)
+ movl $0,80+240(%rsp)
+ movq %r8,%rbp
+ movq %r9,%rbx
+ movq %rsi,%r9
+ movq %rdi,%r8
+ movq %rcx,%r15
+
+ movl 240(%r15),%eax
+
+ movq %r15,%r10
+ subq %r14,%r10
+ andq $4095,%r10
+ cmpq $2304,%r10
+ jb .Lcbc_do_ecopy
+ cmpq $4096-248,%r10
+ jb .Lcbc_skip_ecopy
+.align 4
+.Lcbc_do_ecopy:
+ movq %r15,%rsi
+ leaq 80(%rsp),%rdi
+ leaq 80(%rsp),%r15
+ movl $30,%ecx
+.long 0x90A548F3
+ movl %eax,(%rdi)
+.Lcbc_skip_ecopy:
+ movq %r15,0(%rsp)
+
+ movl $18,%ecx
+.align 4
+.Lcbc_prefetch_te:
+ movq 0(%r14),%r10
+ movq 32(%r14),%r11
+ movq 64(%r14),%r12
+ movq 96(%r14),%r13
+ leaq 128(%r14),%r14
+ subl $1,%ecx
+ jnz .Lcbc_prefetch_te
+ leaq -2304(%r14),%r14
+
+ cmpq $0,%rbx
+ je .LFAST_DECRYPT
+
+
+ movl 0(%rbp),%eax
+ movl 4(%rbp),%ebx
+ movl 8(%rbp),%ecx
+ movl 12(%rbp),%edx
+
+.align 4
+.Lcbc_fast_enc_loop:
+ xorl 0(%r8),%eax
+ xorl 4(%r8),%ebx
+ xorl 8(%r8),%ecx
+ xorl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+
+ call _x86_64_AES_encrypt
+
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ subq $16,%r10
+ testq $-16,%r10
+ movq %r10,40(%rsp)
+ jnz .Lcbc_fast_enc_loop
+ movq 56(%rsp),%rbp
+ movl %eax,0(%rbp)
+ movl %ebx,4(%rbp)
+ movl %ecx,8(%rbp)
+ movl %edx,12(%rbp)
+
+ jmp .Lcbc_fast_cleanup
+
+
+.align 16
+.LFAST_DECRYPT:
+ cmpq %r8,%r9
+ je .Lcbc_fast_dec_in_place
+
+ movq %rbp,64(%rsp)
+.align 4
+.Lcbc_fast_dec_loop:
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+
+ call _x86_64_AES_decrypt
+
+ movq 64(%rsp),%rbp
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ xorl 0(%rbp),%eax
+ xorl 4(%rbp),%ebx
+ xorl 8(%rbp),%ecx
+ xorl 12(%rbp),%edx
+ movq %r8,%rbp
+
+ subq $16,%r10
+ movq %r10,40(%rsp)
+ movq %rbp,64(%rsp)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ jnz .Lcbc_fast_dec_loop
+ movq 56(%rsp),%r12
+ movq 0(%rbp),%r10
+ movq 8(%rbp),%r11
+ movq %r10,0(%r12)
+ movq %r11,8(%r12)
+ jmp .Lcbc_fast_cleanup
+
+.align 16
+.Lcbc_fast_dec_in_place:
+ movq 0(%rbp),%r10
+ movq 8(%rbp),%r11
+ movq %r10,0+64(%rsp)
+ movq %r11,8+64(%rsp)
+.align 4
+.Lcbc_fast_dec_in_place_loop:
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+
+ call _x86_64_AES_decrypt
+
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ xorl 0+64(%rsp),%eax
+ xorl 4+64(%rsp),%ebx
+ xorl 8+64(%rsp),%ecx
+ xorl 12+64(%rsp),%edx
+
+ movq 0(%r8),%r11
+ movq 8(%r8),%r12
+ subq $16,%r10
+ jz .Lcbc_fast_dec_in_place_done
+
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ movq %r10,40(%rsp)
+ jmp .Lcbc_fast_dec_in_place_loop
+.Lcbc_fast_dec_in_place_done:
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+.align 4
+.Lcbc_fast_cleanup:
+ cmpl $0,80+240(%rsp)
+ leaq 80(%rsp),%rdi
+ je .Lcbc_exit
+ movl $30,%ecx
+ xorq %rax,%rax
+.long 0x90AB48F3
+
+ jmp .Lcbc_exit
+
+
+.align 16
+.Lcbc_slow_prologue:
+
+ leaq -88(%rsp),%rbp
+ andq $-64,%rbp
+
+ leaq -88-63(%rcx),%r10
+ subq %rbp,%r10
+ negq %r10
+ andq $960,%r10
+ subq %r10,%rbp
+
+ xchgq %rsp,%rbp
+
+ movq %rbp,16(%rsp)
+.Lcbc_slow_body:
+
+
+
+
+ movq %r8,56(%rsp)
+ movq %r8,%rbp
+ movq %r9,%rbx
+ movq %rsi,%r9
+ movq %rdi,%r8
+ movq %rcx,%r15
+ movq %rdx,%r10
+
+ movl 240(%r15),%eax
+ movq %r15,0(%rsp)
+ shll $4,%eax
+ leaq (%r15,%rax,1),%rax
+ movq %rax,8(%rsp)
+
+
+ leaq 2048(%r14),%r14
+ leaq 768-8(%rsp),%rax
+ subq %r14,%rax
+ andq $768,%rax
+ leaq (%r14,%rax,1),%r14
+
+ cmpq $0,%rbx
+ je .LSLOW_DECRYPT
+
+
+ testq $-16,%r10
+ movl 0(%rbp),%eax
+ movl 4(%rbp),%ebx
+ movl 8(%rbp),%ecx
+ movl 12(%rbp),%edx
+ jz .Lcbc_slow_enc_tail
+
+.align 4
+.Lcbc_slow_enc_loop:
+ xorl 0(%r8),%eax
+ xorl 4(%r8),%ebx
+ xorl 8(%r8),%ecx
+ xorl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ movq %r9,32(%rsp)
+ movq %r10,40(%rsp)
+
+ call _x86_64_AES_encrypt_compact
+
+ movq 24(%rsp),%r8
+ movq 32(%rsp),%r9
+ movq 40(%rsp),%r10
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ subq $16,%r10
+ testq $-16,%r10
+ jnz .Lcbc_slow_enc_loop
+ testq $15,%r10
+ jnz .Lcbc_slow_enc_tail
+ movq 56(%rsp),%rbp
+ movl %eax,0(%rbp)
+ movl %ebx,4(%rbp)
+ movl %ecx,8(%rbp)
+ movl %edx,12(%rbp)
+
+ jmp .Lcbc_exit
+
+.align 4
+.Lcbc_slow_enc_tail:
+ movq %rax,%r11
+ movq %rcx,%r12
+ movq %r10,%rcx
+ movq %r8,%rsi
+ movq %r9,%rdi
+.long 0x9066A4F3
+ movq $16,%rcx
+ subq %r10,%rcx
+ xorq %rax,%rax
+.long 0x9066AAF3
+ movq %r9,%r8
+ movq $16,%r10
+ movq %r11,%rax
+ movq %r12,%rcx
+ jmp .Lcbc_slow_enc_loop
+
+.align 16
+.LSLOW_DECRYPT:
+ shrq $3,%rax
+ addq %rax,%r14
+
+ movq 0(%rbp),%r11
+ movq 8(%rbp),%r12
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+
+.align 4
+.Lcbc_slow_dec_loop:
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ movq %r9,32(%rsp)
+ movq %r10,40(%rsp)
+
+ call _x86_64_AES_decrypt_compact
+
+ movq 24(%rsp),%r8
+ movq 32(%rsp),%r9
+ movq 40(%rsp),%r10
+ xorl 0+64(%rsp),%eax
+ xorl 4+64(%rsp),%ebx
+ xorl 8+64(%rsp),%ecx
+ xorl 12+64(%rsp),%edx
+
+ movq 0(%r8),%r11
+ movq 8(%r8),%r12
+ subq $16,%r10
+ jc .Lcbc_slow_dec_partial
+ jz .Lcbc_slow_dec_done
+
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ jmp .Lcbc_slow_dec_loop
+.Lcbc_slow_dec_done:
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ jmp .Lcbc_exit
+
+.align 4
+.Lcbc_slow_dec_partial:
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+
+ movl %eax,0+64(%rsp)
+ movl %ebx,4+64(%rsp)
+ movl %ecx,8+64(%rsp)
+ movl %edx,12+64(%rsp)
+
+ movq %r9,%rdi
+ leaq 64(%rsp),%rsi
+ leaq 16(%r10),%rcx
+.long 0x9066A4F3
+ jmp .Lcbc_exit
+
+.align 16
+.Lcbc_exit:
+ movq 16(%rsp),%rsi
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lcbc_popfq:
+ popfq
+.Lcbc_epilogue:
+ retq
+.size AES_cbc_encrypt,.-AES_cbc_encrypt
+.align 64
+.LAES_Te:
+.long 0xa56363c6,0xa56363c6
+.long 0x847c7cf8,0x847c7cf8
+.long 0x997777ee,0x997777ee
+.long 0x8d7b7bf6,0x8d7b7bf6
+.long 0x0df2f2ff,0x0df2f2ff
+.long 0xbd6b6bd6,0xbd6b6bd6
+.long 0xb16f6fde,0xb16f6fde
+.long 0x54c5c591,0x54c5c591
+.long 0x50303060,0x50303060
+.long 0x03010102,0x03010102
+.long 0xa96767ce,0xa96767ce
+.long 0x7d2b2b56,0x7d2b2b56
+.long 0x19fefee7,0x19fefee7
+.long 0x62d7d7b5,0x62d7d7b5
+.long 0xe6abab4d,0xe6abab4d
+.long 0x9a7676ec,0x9a7676ec
+.long 0x45caca8f,0x45caca8f
+.long 0x9d82821f,0x9d82821f
+.long 0x40c9c989,0x40c9c989
+.long 0x877d7dfa,0x877d7dfa
+.long 0x15fafaef,0x15fafaef
+.long 0xeb5959b2,0xeb5959b2
+.long 0xc947478e,0xc947478e
+.long 0x0bf0f0fb,0x0bf0f0fb
+.long 0xecadad41,0xecadad41
+.long 0x67d4d4b3,0x67d4d4b3
+.long 0xfda2a25f,0xfda2a25f
+.long 0xeaafaf45,0xeaafaf45
+.long 0xbf9c9c23,0xbf9c9c23
+.long 0xf7a4a453,0xf7a4a453
+.long 0x967272e4,0x967272e4
+.long 0x5bc0c09b,0x5bc0c09b
+.long 0xc2b7b775,0xc2b7b775
+.long 0x1cfdfde1,0x1cfdfde1
+.long 0xae93933d,0xae93933d
+.long 0x6a26264c,0x6a26264c
+.long 0x5a36366c,0x5a36366c
+.long 0x413f3f7e,0x413f3f7e
+.long 0x02f7f7f5,0x02f7f7f5
+.long 0x4fcccc83,0x4fcccc83
+.long 0x5c343468,0x5c343468
+.long 0xf4a5a551,0xf4a5a551
+.long 0x34e5e5d1,0x34e5e5d1
+.long 0x08f1f1f9,0x08f1f1f9
+.long 0x937171e2,0x937171e2
+.long 0x73d8d8ab,0x73d8d8ab
+.long 0x53313162,0x53313162
+.long 0x3f15152a,0x3f15152a
+.long 0x0c040408,0x0c040408
+.long 0x52c7c795,0x52c7c795
+.long 0x65232346,0x65232346
+.long 0x5ec3c39d,0x5ec3c39d
+.long 0x28181830,0x28181830
+.long 0xa1969637,0xa1969637
+.long 0x0f05050a,0x0f05050a
+.long 0xb59a9a2f,0xb59a9a2f
+.long 0x0907070e,0x0907070e
+.long 0x36121224,0x36121224
+.long 0x9b80801b,0x9b80801b
+.long 0x3de2e2df,0x3de2e2df
+.long 0x26ebebcd,0x26ebebcd
+.long 0x6927274e,0x6927274e
+.long 0xcdb2b27f,0xcdb2b27f
+.long 0x9f7575ea,0x9f7575ea
+.long 0x1b090912,0x1b090912
+.long 0x9e83831d,0x9e83831d
+.long 0x742c2c58,0x742c2c58
+.long 0x2e1a1a34,0x2e1a1a34
+.long 0x2d1b1b36,0x2d1b1b36
+.long 0xb26e6edc,0xb26e6edc
+.long 0xee5a5ab4,0xee5a5ab4
+.long 0xfba0a05b,0xfba0a05b
+.long 0xf65252a4,0xf65252a4
+.long 0x4d3b3b76,0x4d3b3b76
+.long 0x61d6d6b7,0x61d6d6b7
+.long 0xceb3b37d,0xceb3b37d
+.long 0x7b292952,0x7b292952
+.long 0x3ee3e3dd,0x3ee3e3dd
+.long 0x712f2f5e,0x712f2f5e
+.long 0x97848413,0x97848413
+.long 0xf55353a6,0xf55353a6
+.long 0x68d1d1b9,0x68d1d1b9
+.long 0x00000000,0x00000000
+.long 0x2cededc1,0x2cededc1
+.long 0x60202040,0x60202040
+.long 0x1ffcfce3,0x1ffcfce3
+.long 0xc8b1b179,0xc8b1b179
+.long 0xed5b5bb6,0xed5b5bb6
+.long 0xbe6a6ad4,0xbe6a6ad4
+.long 0x46cbcb8d,0x46cbcb8d
+.long 0xd9bebe67,0xd9bebe67
+.long 0x4b393972,0x4b393972
+.long 0xde4a4a94,0xde4a4a94
+.long 0xd44c4c98,0xd44c4c98
+.long 0xe85858b0,0xe85858b0
+.long 0x4acfcf85,0x4acfcf85
+.long 0x6bd0d0bb,0x6bd0d0bb
+.long 0x2aefefc5,0x2aefefc5
+.long 0xe5aaaa4f,0xe5aaaa4f
+.long 0x16fbfbed,0x16fbfbed
+.long 0xc5434386,0xc5434386
+.long 0xd74d4d9a,0xd74d4d9a
+.long 0x55333366,0x55333366
+.long 0x94858511,0x94858511
+.long 0xcf45458a,0xcf45458a
+.long 0x10f9f9e9,0x10f9f9e9
+.long 0x06020204,0x06020204
+.long 0x817f7ffe,0x817f7ffe
+.long 0xf05050a0,0xf05050a0
+.long 0x443c3c78,0x443c3c78
+.long 0xba9f9f25,0xba9f9f25
+.long 0xe3a8a84b,0xe3a8a84b
+.long 0xf35151a2,0xf35151a2
+.long 0xfea3a35d,0xfea3a35d
+.long 0xc0404080,0xc0404080
+.long 0x8a8f8f05,0x8a8f8f05
+.long 0xad92923f,0xad92923f
+.long 0xbc9d9d21,0xbc9d9d21
+.long 0x48383870,0x48383870
+.long 0x04f5f5f1,0x04f5f5f1
+.long 0xdfbcbc63,0xdfbcbc63
+.long 0xc1b6b677,0xc1b6b677
+.long 0x75dadaaf,0x75dadaaf
+.long 0x63212142,0x63212142
+.long 0x30101020,0x30101020
+.long 0x1affffe5,0x1affffe5
+.long 0x0ef3f3fd,0x0ef3f3fd
+.long 0x6dd2d2bf,0x6dd2d2bf
+.long 0x4ccdcd81,0x4ccdcd81
+.long 0x140c0c18,0x140c0c18
+.long 0x35131326,0x35131326
+.long 0x2fececc3,0x2fececc3
+.long 0xe15f5fbe,0xe15f5fbe
+.long 0xa2979735,0xa2979735
+.long 0xcc444488,0xcc444488
+.long 0x3917172e,0x3917172e
+.long 0x57c4c493,0x57c4c493
+.long 0xf2a7a755,0xf2a7a755
+.long 0x827e7efc,0x827e7efc
+.long 0x473d3d7a,0x473d3d7a
+.long 0xac6464c8,0xac6464c8
+.long 0xe75d5dba,0xe75d5dba
+.long 0x2b191932,0x2b191932
+.long 0x957373e6,0x957373e6
+.long 0xa06060c0,0xa06060c0
+.long 0x98818119,0x98818119
+.long 0xd14f4f9e,0xd14f4f9e
+.long 0x7fdcdca3,0x7fdcdca3
+.long 0x66222244,0x66222244
+.long 0x7e2a2a54,0x7e2a2a54
+.long 0xab90903b,0xab90903b
+.long 0x8388880b,0x8388880b
+.long 0xca46468c,0xca46468c
+.long 0x29eeeec7,0x29eeeec7
+.long 0xd3b8b86b,0xd3b8b86b
+.long 0x3c141428,0x3c141428
+.long 0x79dedea7,0x79dedea7
+.long 0xe25e5ebc,0xe25e5ebc
+.long 0x1d0b0b16,0x1d0b0b16
+.long 0x76dbdbad,0x76dbdbad
+.long 0x3be0e0db,0x3be0e0db
+.long 0x56323264,0x56323264
+.long 0x4e3a3a74,0x4e3a3a74
+.long 0x1e0a0a14,0x1e0a0a14
+.long 0xdb494992,0xdb494992
+.long 0x0a06060c,0x0a06060c
+.long 0x6c242448,0x6c242448
+.long 0xe45c5cb8,0xe45c5cb8
+.long 0x5dc2c29f,0x5dc2c29f
+.long 0x6ed3d3bd,0x6ed3d3bd
+.long 0xefacac43,0xefacac43
+.long 0xa66262c4,0xa66262c4
+.long 0xa8919139,0xa8919139
+.long 0xa4959531,0xa4959531
+.long 0x37e4e4d3,0x37e4e4d3
+.long 0x8b7979f2,0x8b7979f2
+.long 0x32e7e7d5,0x32e7e7d5
+.long 0x43c8c88b,0x43c8c88b
+.long 0x5937376e,0x5937376e
+.long 0xb76d6dda,0xb76d6dda
+.long 0x8c8d8d01,0x8c8d8d01
+.long 0x64d5d5b1,0x64d5d5b1
+.long 0xd24e4e9c,0xd24e4e9c
+.long 0xe0a9a949,0xe0a9a949
+.long 0xb46c6cd8,0xb46c6cd8
+.long 0xfa5656ac,0xfa5656ac
+.long 0x07f4f4f3,0x07f4f4f3
+.long 0x25eaeacf,0x25eaeacf
+.long 0xaf6565ca,0xaf6565ca
+.long 0x8e7a7af4,0x8e7a7af4
+.long 0xe9aeae47,0xe9aeae47
+.long 0x18080810,0x18080810
+.long 0xd5baba6f,0xd5baba6f
+.long 0x887878f0,0x887878f0
+.long 0x6f25254a,0x6f25254a
+.long 0x722e2e5c,0x722e2e5c
+.long 0x241c1c38,0x241c1c38
+.long 0xf1a6a657,0xf1a6a657
+.long 0xc7b4b473,0xc7b4b473
+.long 0x51c6c697,0x51c6c697
+.long 0x23e8e8cb,0x23e8e8cb
+.long 0x7cdddda1,0x7cdddda1
+.long 0x9c7474e8,0x9c7474e8
+.long 0x211f1f3e,0x211f1f3e
+.long 0xdd4b4b96,0xdd4b4b96
+.long 0xdcbdbd61,0xdcbdbd61
+.long 0x868b8b0d,0x868b8b0d
+.long 0x858a8a0f,0x858a8a0f
+.long 0x907070e0,0x907070e0
+.long 0x423e3e7c,0x423e3e7c
+.long 0xc4b5b571,0xc4b5b571
+.long 0xaa6666cc,0xaa6666cc
+.long 0xd8484890,0xd8484890
+.long 0x05030306,0x05030306
+.long 0x01f6f6f7,0x01f6f6f7
+.long 0x120e0e1c,0x120e0e1c
+.long 0xa36161c2,0xa36161c2
+.long 0x5f35356a,0x5f35356a
+.long 0xf95757ae,0xf95757ae
+.long 0xd0b9b969,0xd0b9b969
+.long 0x91868617,0x91868617
+.long 0x58c1c199,0x58c1c199
+.long 0x271d1d3a,0x271d1d3a
+.long 0xb99e9e27,0xb99e9e27
+.long 0x38e1e1d9,0x38e1e1d9
+.long 0x13f8f8eb,0x13f8f8eb
+.long 0xb398982b,0xb398982b
+.long 0x33111122,0x33111122
+.long 0xbb6969d2,0xbb6969d2
+.long 0x70d9d9a9,0x70d9d9a9
+.long 0x898e8e07,0x898e8e07
+.long 0xa7949433,0xa7949433
+.long 0xb69b9b2d,0xb69b9b2d
+.long 0x221e1e3c,0x221e1e3c
+.long 0x92878715,0x92878715
+.long 0x20e9e9c9,0x20e9e9c9
+.long 0x49cece87,0x49cece87
+.long 0xff5555aa,0xff5555aa
+.long 0x78282850,0x78282850
+.long 0x7adfdfa5,0x7adfdfa5
+.long 0x8f8c8c03,0x8f8c8c03
+.long 0xf8a1a159,0xf8a1a159
+.long 0x80898909,0x80898909
+.long 0x170d0d1a,0x170d0d1a
+.long 0xdabfbf65,0xdabfbf65
+.long 0x31e6e6d7,0x31e6e6d7
+.long 0xc6424284,0xc6424284
+.long 0xb86868d0,0xb86868d0
+.long 0xc3414182,0xc3414182
+.long 0xb0999929,0xb0999929
+.long 0x772d2d5a,0x772d2d5a
+.long 0x110f0f1e,0x110f0f1e
+.long 0xcbb0b07b,0xcbb0b07b
+.long 0xfc5454a8,0xfc5454a8
+.long 0xd6bbbb6d,0xd6bbbb6d
+.long 0x3a16162c,0x3a16162c
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.long 0x00000001, 0x00000002, 0x00000004, 0x00000008
+.long 0x00000010, 0x00000020, 0x00000040, 0x00000080
+.long 0x0000001b, 0x00000036, 0x80808080, 0x80808080
+.long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b
+.align 64
+.LAES_Td:
+.long 0x50a7f451,0x50a7f451
+.long 0x5365417e,0x5365417e
+.long 0xc3a4171a,0xc3a4171a
+.long 0x965e273a,0x965e273a
+.long 0xcb6bab3b,0xcb6bab3b
+.long 0xf1459d1f,0xf1459d1f
+.long 0xab58faac,0xab58faac
+.long 0x9303e34b,0x9303e34b
+.long 0x55fa3020,0x55fa3020
+.long 0xf66d76ad,0xf66d76ad
+.long 0x9176cc88,0x9176cc88
+.long 0x254c02f5,0x254c02f5
+.long 0xfcd7e54f,0xfcd7e54f
+.long 0xd7cb2ac5,0xd7cb2ac5
+.long 0x80443526,0x80443526
+.long 0x8fa362b5,0x8fa362b5
+.long 0x495ab1de,0x495ab1de
+.long 0x671bba25,0x671bba25
+.long 0x980eea45,0x980eea45
+.long 0xe1c0fe5d,0xe1c0fe5d
+.long 0x02752fc3,0x02752fc3
+.long 0x12f04c81,0x12f04c81
+.long 0xa397468d,0xa397468d
+.long 0xc6f9d36b,0xc6f9d36b
+.long 0xe75f8f03,0xe75f8f03
+.long 0x959c9215,0x959c9215
+.long 0xeb7a6dbf,0xeb7a6dbf
+.long 0xda595295,0xda595295
+.long 0x2d83bed4,0x2d83bed4
+.long 0xd3217458,0xd3217458
+.long 0x2969e049,0x2969e049
+.long 0x44c8c98e,0x44c8c98e
+.long 0x6a89c275,0x6a89c275
+.long 0x78798ef4,0x78798ef4
+.long 0x6b3e5899,0x6b3e5899
+.long 0xdd71b927,0xdd71b927
+.long 0xb64fe1be,0xb64fe1be
+.long 0x17ad88f0,0x17ad88f0
+.long 0x66ac20c9,0x66ac20c9
+.long 0xb43ace7d,0xb43ace7d
+.long 0x184adf63,0x184adf63
+.long 0x82311ae5,0x82311ae5
+.long 0x60335197,0x60335197
+.long 0x457f5362,0x457f5362
+.long 0xe07764b1,0xe07764b1
+.long 0x84ae6bbb,0x84ae6bbb
+.long 0x1ca081fe,0x1ca081fe
+.long 0x942b08f9,0x942b08f9
+.long 0x58684870,0x58684870
+.long 0x19fd458f,0x19fd458f
+.long 0x876cde94,0x876cde94
+.long 0xb7f87b52,0xb7f87b52
+.long 0x23d373ab,0x23d373ab
+.long 0xe2024b72,0xe2024b72
+.long 0x578f1fe3,0x578f1fe3
+.long 0x2aab5566,0x2aab5566
+.long 0x0728ebb2,0x0728ebb2
+.long 0x03c2b52f,0x03c2b52f
+.long 0x9a7bc586,0x9a7bc586
+.long 0xa50837d3,0xa50837d3
+.long 0xf2872830,0xf2872830
+.long 0xb2a5bf23,0xb2a5bf23
+.long 0xba6a0302,0xba6a0302
+.long 0x5c8216ed,0x5c8216ed
+.long 0x2b1ccf8a,0x2b1ccf8a
+.long 0x92b479a7,0x92b479a7
+.long 0xf0f207f3,0xf0f207f3
+.long 0xa1e2694e,0xa1e2694e
+.long 0xcdf4da65,0xcdf4da65
+.long 0xd5be0506,0xd5be0506
+.long 0x1f6234d1,0x1f6234d1
+.long 0x8afea6c4,0x8afea6c4
+.long 0x9d532e34,0x9d532e34
+.long 0xa055f3a2,0xa055f3a2
+.long 0x32e18a05,0x32e18a05
+.long 0x75ebf6a4,0x75ebf6a4
+.long 0x39ec830b,0x39ec830b
+.long 0xaaef6040,0xaaef6040
+.long 0x069f715e,0x069f715e
+.long 0x51106ebd,0x51106ebd
+.long 0xf98a213e,0xf98a213e
+.long 0x3d06dd96,0x3d06dd96
+.long 0xae053edd,0xae053edd
+.long 0x46bde64d,0x46bde64d
+.long 0xb58d5491,0xb58d5491
+.long 0x055dc471,0x055dc471
+.long 0x6fd40604,0x6fd40604
+.long 0xff155060,0xff155060
+.long 0x24fb9819,0x24fb9819
+.long 0x97e9bdd6,0x97e9bdd6
+.long 0xcc434089,0xcc434089
+.long 0x779ed967,0x779ed967
+.long 0xbd42e8b0,0xbd42e8b0
+.long 0x888b8907,0x888b8907
+.long 0x385b19e7,0x385b19e7
+.long 0xdbeec879,0xdbeec879
+.long 0x470a7ca1,0x470a7ca1
+.long 0xe90f427c,0xe90f427c
+.long 0xc91e84f8,0xc91e84f8
+.long 0x00000000,0x00000000
+.long 0x83868009,0x83868009
+.long 0x48ed2b32,0x48ed2b32
+.long 0xac70111e,0xac70111e
+.long 0x4e725a6c,0x4e725a6c
+.long 0xfbff0efd,0xfbff0efd
+.long 0x5638850f,0x5638850f
+.long 0x1ed5ae3d,0x1ed5ae3d
+.long 0x27392d36,0x27392d36
+.long 0x64d90f0a,0x64d90f0a
+.long 0x21a65c68,0x21a65c68
+.long 0xd1545b9b,0xd1545b9b
+.long 0x3a2e3624,0x3a2e3624
+.long 0xb1670a0c,0xb1670a0c
+.long 0x0fe75793,0x0fe75793
+.long 0xd296eeb4,0xd296eeb4
+.long 0x9e919b1b,0x9e919b1b
+.long 0x4fc5c080,0x4fc5c080
+.long 0xa220dc61,0xa220dc61
+.long 0x694b775a,0x694b775a
+.long 0x161a121c,0x161a121c
+.long 0x0aba93e2,0x0aba93e2
+.long 0xe52aa0c0,0xe52aa0c0
+.long 0x43e0223c,0x43e0223c
+.long 0x1d171b12,0x1d171b12
+.long 0x0b0d090e,0x0b0d090e
+.long 0xadc78bf2,0xadc78bf2
+.long 0xb9a8b62d,0xb9a8b62d
+.long 0xc8a91e14,0xc8a91e14
+.long 0x8519f157,0x8519f157
+.long 0x4c0775af,0x4c0775af
+.long 0xbbdd99ee,0xbbdd99ee
+.long 0xfd607fa3,0xfd607fa3
+.long 0x9f2601f7,0x9f2601f7
+.long 0xbcf5725c,0xbcf5725c
+.long 0xc53b6644,0xc53b6644
+.long 0x347efb5b,0x347efb5b
+.long 0x7629438b,0x7629438b
+.long 0xdcc623cb,0xdcc623cb
+.long 0x68fcedb6,0x68fcedb6
+.long 0x63f1e4b8,0x63f1e4b8
+.long 0xcadc31d7,0xcadc31d7
+.long 0x10856342,0x10856342
+.long 0x40229713,0x40229713
+.long 0x2011c684,0x2011c684
+.long 0x7d244a85,0x7d244a85
+.long 0xf83dbbd2,0xf83dbbd2
+.long 0x1132f9ae,0x1132f9ae
+.long 0x6da129c7,0x6da129c7
+.long 0x4b2f9e1d,0x4b2f9e1d
+.long 0xf330b2dc,0xf330b2dc
+.long 0xec52860d,0xec52860d
+.long 0xd0e3c177,0xd0e3c177
+.long 0x6c16b32b,0x6c16b32b
+.long 0x99b970a9,0x99b970a9
+.long 0xfa489411,0xfa489411
+.long 0x2264e947,0x2264e947
+.long 0xc48cfca8,0xc48cfca8
+.long 0x1a3ff0a0,0x1a3ff0a0
+.long 0xd82c7d56,0xd82c7d56
+.long 0xef903322,0xef903322
+.long 0xc74e4987,0xc74e4987
+.long 0xc1d138d9,0xc1d138d9
+.long 0xfea2ca8c,0xfea2ca8c
+.long 0x360bd498,0x360bd498
+.long 0xcf81f5a6,0xcf81f5a6
+.long 0x28de7aa5,0x28de7aa5
+.long 0x268eb7da,0x268eb7da
+.long 0xa4bfad3f,0xa4bfad3f
+.long 0xe49d3a2c,0xe49d3a2c
+.long 0x0d927850,0x0d927850
+.long 0x9bcc5f6a,0x9bcc5f6a
+.long 0x62467e54,0x62467e54
+.long 0xc2138df6,0xc2138df6
+.long 0xe8b8d890,0xe8b8d890
+.long 0x5ef7392e,0x5ef7392e
+.long 0xf5afc382,0xf5afc382
+.long 0xbe805d9f,0xbe805d9f
+.long 0x7c93d069,0x7c93d069
+.long 0xa92dd56f,0xa92dd56f
+.long 0xb31225cf,0xb31225cf
+.long 0x3b99acc8,0x3b99acc8
+.long 0xa77d1810,0xa77d1810
+.long 0x6e639ce8,0x6e639ce8
+.long 0x7bbb3bdb,0x7bbb3bdb
+.long 0x097826cd,0x097826cd
+.long 0xf418596e,0xf418596e
+.long 0x01b79aec,0x01b79aec
+.long 0xa89a4f83,0xa89a4f83
+.long 0x656e95e6,0x656e95e6
+.long 0x7ee6ffaa,0x7ee6ffaa
+.long 0x08cfbc21,0x08cfbc21
+.long 0xe6e815ef,0xe6e815ef
+.long 0xd99be7ba,0xd99be7ba
+.long 0xce366f4a,0xce366f4a
+.long 0xd4099fea,0xd4099fea
+.long 0xd67cb029,0xd67cb029
+.long 0xafb2a431,0xafb2a431
+.long 0x31233f2a,0x31233f2a
+.long 0x3094a5c6,0x3094a5c6
+.long 0xc066a235,0xc066a235
+.long 0x37bc4e74,0x37bc4e74
+.long 0xa6ca82fc,0xa6ca82fc
+.long 0xb0d090e0,0xb0d090e0
+.long 0x15d8a733,0x15d8a733
+.long 0x4a9804f1,0x4a9804f1
+.long 0xf7daec41,0xf7daec41
+.long 0x0e50cd7f,0x0e50cd7f
+.long 0x2ff69117,0x2ff69117
+.long 0x8dd64d76,0x8dd64d76
+.long 0x4db0ef43,0x4db0ef43
+.long 0x544daacc,0x544daacc
+.long 0xdf0496e4,0xdf0496e4
+.long 0xe3b5d19e,0xe3b5d19e
+.long 0x1b886a4c,0x1b886a4c
+.long 0xb81f2cc1,0xb81f2cc1
+.long 0x7f516546,0x7f516546
+.long 0x04ea5e9d,0x04ea5e9d
+.long 0x5d358c01,0x5d358c01
+.long 0x737487fa,0x737487fa
+.long 0x2e410bfb,0x2e410bfb
+.long 0x5a1d67b3,0x5a1d67b3
+.long 0x52d2db92,0x52d2db92
+.long 0x335610e9,0x335610e9
+.long 0x1347d66d,0x1347d66d
+.long 0x8c61d79a,0x8c61d79a
+.long 0x7a0ca137,0x7a0ca137
+.long 0x8e14f859,0x8e14f859
+.long 0x893c13eb,0x893c13eb
+.long 0xee27a9ce,0xee27a9ce
+.long 0x35c961b7,0x35c961b7
+.long 0xede51ce1,0xede51ce1
+.long 0x3cb1477a,0x3cb1477a
+.long 0x59dfd29c,0x59dfd29c
+.long 0x3f73f255,0x3f73f255
+.long 0x79ce1418,0x79ce1418
+.long 0xbf37c773,0xbf37c773
+.long 0xeacdf753,0xeacdf753
+.long 0x5baafd5f,0x5baafd5f
+.long 0x146f3ddf,0x146f3ddf
+.long 0x86db4478,0x86db4478
+.long 0x81f3afca,0x81f3afca
+.long 0x3ec468b9,0x3ec468b9
+.long 0x2c342438,0x2c342438
+.long 0x5f40a3c2,0x5f40a3c2
+.long 0x72c31d16,0x72c31d16
+.long 0x0c25e2bc,0x0c25e2bc
+.long 0x8b493c28,0x8b493c28
+.long 0x41950dff,0x41950dff
+.long 0x7101a839,0x7101a839
+.long 0xdeb30c08,0xdeb30c08
+.long 0x9ce4b4d8,0x9ce4b4d8
+.long 0x90c15664,0x90c15664
+.long 0x6184cb7b,0x6184cb7b
+.long 0x70b632d5,0x70b632d5
+.long 0x745c6c48,0x745c6c48
+.long 0x4257b8d0,0x4257b8d0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/ext/libressl/crypto/aes/aes-macosx-x86_64.S b/ext/libressl/crypto/aes/aes-macosx-x86_64.S
new file mode 100644
index 0000000..8a9c36e
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes-macosx-x86_64.S
@@ -0,0 +1,2544 @@
+#include "x86_arch.h"
+
+.text
+
+.p2align 4
+_x86_64_AES_encrypt:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+
+ movl 240(%r15),%r13d
+ subl $1,%r13d
+ jmp L$enc_loop
+.p2align 4
+L$enc_loop:
+
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movl 0(%r14,%rsi,8),%r10d
+ movl 0(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r12d
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl %dl,%ebp
+ xorl 3(%r14,%rsi,8),%r10d
+ xorl 3(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r8d
+
+ movzbl %dh,%esi
+ shrl $16,%ecx
+ movzbl %ah,%ebp
+ xorl 3(%r14,%rsi,8),%r12d
+ shrl $16,%edx
+ xorl 3(%r14,%rbp,8),%r8d
+
+ shrl $16,%ebx
+ leaq 16(%r15),%r15
+ shrl $16,%eax
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ xorl 2(%r14,%rsi,8),%r10d
+ xorl 2(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r12d
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl %bl,%ebp
+ xorl 1(%r14,%rsi,8),%r10d
+ xorl 1(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r8d
+
+ movl 12(%r15),%edx
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movl 0(%r15),%eax
+ xorl 1(%r14,%rdi,8),%r12d
+ xorl 1(%r14,%rbp,8),%r8d
+
+ movl 4(%r15),%ebx
+ movl 8(%r15),%ecx
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ subl $1,%r13d
+ jnz L$enc_loop
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movzbl 2(%r14,%rsi,8),%r10d
+ movzbl 2(%r14,%rdi,8),%r11d
+ movzbl 2(%r14,%rbp,8),%r12d
+
+ movzbl %dl,%esi
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movzbl 2(%r14,%rsi,8),%r8d
+ movl 0(%r14,%rdi,8),%edi
+ movl 0(%r14,%rbp,8),%ebp
+
+ andl $65280,%edi
+ andl $65280,%ebp
+
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+ shrl $16,%ecx
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ shrl $16,%edx
+ movl 0(%r14,%rsi,8),%esi
+ movl 0(%r14,%rdi,8),%edi
+
+ andl $65280,%esi
+ andl $65280,%edi
+ shrl $16,%ebx
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+ shrl $16,%eax
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ movl 0(%r14,%rsi,8),%esi
+ movl 0(%r14,%rdi,8),%edi
+ movl 0(%r14,%rbp,8),%ebp
+
+ andl $16711680,%esi
+ andl $16711680,%edi
+ andl $16711680,%ebp
+
+ xorl %esi,%r10d
+ xorl %edi,%r11d
+ xorl %ebp,%r12d
+
+ movzbl %bl,%esi
+ movzbl %dh,%edi
+ movzbl %ah,%ebp
+ movl 0(%r14,%rsi,8),%esi
+ movl 2(%r14,%rdi,8),%edi
+ movl 2(%r14,%rbp,8),%ebp
+
+ andl $16711680,%esi
+ andl $4278190080,%edi
+ andl $4278190080,%ebp
+
+ xorl %esi,%r8d
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movl 16+12(%r15),%edx
+ movl 2(%r14,%rsi,8),%esi
+ movl 2(%r14,%rdi,8),%edi
+ movl 16+0(%r15),%eax
+
+ andl $4278190080,%esi
+ andl $4278190080,%edi
+
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+
+ movl 16+4(%r15),%ebx
+ movl 16+8(%r15),%ecx
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ retq
+
+
+.p2align 4
+_x86_64_AES_encrypt_compact:
+ leaq 128(%r14),%r8
+ movl 0-128(%r8),%edi
+ movl 32-128(%r8),%ebp
+ movl 64-128(%r8),%r10d
+ movl 96-128(%r8),%r11d
+ movl 128-128(%r8),%edi
+ movl 160-128(%r8),%ebp
+ movl 192-128(%r8),%r10d
+ movl 224-128(%r8),%r11d
+ jmp L$enc_loop_compact
+.p2align 4
+L$enc_loop_compact:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ leaq 16(%r15),%r15
+ movzbl %al,%r10d
+ movzbl %bl,%r11d
+ movzbl %cl,%r12d
+ movzbl (%r14,%r10,1),%r10d
+ movzbl (%r14,%r11,1),%r11d
+ movzbl (%r14,%r12,1),%r12d
+
+ movzbl %dl,%r8d
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl (%r14,%r8,1),%r8d
+ movzbl (%r14,%rsi,1),%r9d
+ movzbl (%r14,%rdi,1),%r13d
+
+ movzbl %dh,%ebp
+ movzbl %ah,%esi
+ shrl $16,%ecx
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ shrl $16,%edx
+
+ movzbl %cl,%edi
+ shll $8,%r9d
+ shll $8,%r13d
+ movzbl (%r14,%rdi,1),%edi
+ xorl %r9d,%r10d
+ xorl %r13d,%r11d
+
+ movzbl %dl,%r9d
+ shrl $16,%eax
+ shrl $16,%ebx
+ movzbl %al,%r13d
+ shll $8,%ebp
+ shll $8,%esi
+ movzbl (%r14,%r9,1),%r9d
+ movzbl (%r14,%r13,1),%r13d
+ xorl %ebp,%r12d
+ xorl %esi,%r8d
+
+ movzbl %bl,%ebp
+ movzbl %dh,%esi
+ shll $16,%edi
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ xorl %edi,%r10d
+
+ movzbl %ah,%edi
+ shrl $8,%ecx
+ shrl $8,%ebx
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rcx,1),%edx
+ movzbl (%r14,%rbx,1),%ecx
+ shll $16,%r9d
+ shll $16,%r13d
+ shll $16,%ebp
+ xorl %r9d,%r11d
+ xorl %r13d,%r12d
+ xorl %ebp,%r8d
+
+ shll $24,%esi
+ shll $24,%edi
+ shll $24,%edx
+ xorl %esi,%r10d
+ shll $24,%ecx
+ xorl %edi,%r11d
+ movl %r10d,%eax
+ movl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ cmpq 16(%rsp),%r15
+ je L$enc_compact_done
+ movl %eax,%esi
+ movl %ebx,%edi
+ andl $2155905152,%esi
+ andl $2155905152,%edi
+ movl %esi,%r10d
+ movl %edi,%r11d
+ shrl $7,%r10d
+ leal (%rax,%rax,1),%r8d
+ shrl $7,%r11d
+ leal (%rbx,%rbx,1),%r9d
+ subl %r10d,%esi
+ subl %r11d,%edi
+ andl $4278124286,%r8d
+ andl $4278124286,%r9d
+ andl $454761243,%esi
+ andl $454761243,%edi
+ movl %eax,%r10d
+ movl %ebx,%r11d
+ xorl %esi,%r8d
+ xorl %edi,%r9d
+
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movl %ecx,%esi
+ movl %edx,%edi
+ roll $24,%eax
+ roll $24,%ebx
+ andl $2155905152,%esi
+ andl $2155905152,%edi
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movl %esi,%r12d
+ movl %edi,%ebp
+ rorl $16,%r10d
+ rorl $16,%r11d
+ shrl $7,%r12d
+ leal (%rcx,%rcx,1),%r8d
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ shrl $7,%ebp
+ leal (%rdx,%rdx,1),%r9d
+ rorl $8,%r10d
+ rorl $8,%r11d
+ subl %r12d,%esi
+ subl %ebp,%edi
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+
+ andl $4278124286,%r8d
+ andl $4278124286,%r9d
+ andl $454761243,%esi
+ andl $454761243,%edi
+ movl %ecx,%r12d
+ movl %edx,%ebp
+ xorl %esi,%r8d
+ xorl %edi,%r9d
+
+ xorl %r8d,%ecx
+ xorl %r9d,%edx
+ roll $24,%ecx
+ roll $24,%edx
+ xorl %r8d,%ecx
+ xorl %r9d,%edx
+ movl 0(%r14),%esi
+ rorl $16,%r12d
+ rorl $16,%ebp
+ movl 64(%r14),%edi
+ xorl %r12d,%ecx
+ xorl %ebp,%edx
+ movl 128(%r14),%r8d
+ rorl $8,%r12d
+ rorl $8,%ebp
+ movl 192(%r14),%r9d
+ xorl %r12d,%ecx
+ xorl %ebp,%edx
+ jmp L$enc_loop_compact
+.p2align 4
+L$enc_compact_done:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ retq
+
+.globl _AES_encrypt
+
+.p2align 4
+.globl _asm_AES_encrypt
+.private_extern _asm_AES_encrypt
+_asm_AES_encrypt:
+_AES_encrypt:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+
+ movq %rsp,%r10
+ leaq -63(%rdx),%rcx
+ andq $-64,%rsp
+ subq %rsp,%rcx
+ negq %rcx
+ andq $960,%rcx
+ subq %rcx,%rsp
+ subq $32,%rsp
+
+ movq %rsi,16(%rsp)
+ movq %r10,24(%rsp)
+L$enc_prologue:
+
+ movq %rdx,%r15
+ movl 240(%r15),%r13d
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+
+ shll $4,%r13d
+ leaq (%r15,%r13,1),%rbp
+ movq %r15,(%rsp)
+ movq %rbp,8(%rsp)
+
+
+ leaq L$AES_Te+2048(%rip),%r14
+ leaq 768(%rsp),%rbp
+ subq %r14,%rbp
+ andq $768,%rbp
+ leaq (%r14,%rbp,1),%r14
+
+ call _x86_64_AES_encrypt_compact
+
+ movq 16(%rsp),%r9
+ movq 24(%rsp),%rsi
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+L$enc_epilogue:
+ retq
+
+
+.p2align 4
+_x86_64_AES_decrypt:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+
+ movl 240(%r15),%r13d
+ subl $1,%r13d
+ jmp L$dec_loop
+.p2align 4
+L$dec_loop:
+
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movl 0(%r14,%rsi,8),%r10d
+ movl 0(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r12d
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl %dl,%ebp
+ xorl 3(%r14,%rsi,8),%r10d
+ xorl 3(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r8d
+
+ movzbl %bh,%esi
+ shrl $16,%eax
+ movzbl %ch,%ebp
+ xorl 3(%r14,%rsi,8),%r12d
+ shrl $16,%edx
+ xorl 3(%r14,%rbp,8),%r8d
+
+ shrl $16,%ebx
+ leaq 16(%r15),%r15
+ shrl $16,%ecx
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ xorl 2(%r14,%rsi,8),%r10d
+ xorl 2(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r12d
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl %bl,%ebp
+ xorl 1(%r14,%rsi,8),%r10d
+ xorl 1(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r8d
+
+ movzbl %dh,%esi
+ movl 12(%r15),%edx
+ movzbl %ah,%ebp
+ xorl 1(%r14,%rsi,8),%r12d
+ movl 0(%r15),%eax
+ xorl 1(%r14,%rbp,8),%r8d
+
+ xorl %r10d,%eax
+ movl 4(%r15),%ebx
+ movl 8(%r15),%ecx
+ xorl %r12d,%ecx
+ xorl %r11d,%ebx
+ xorl %r8d,%edx
+ subl $1,%r13d
+ jnz L$dec_loop
+ leaq 2048(%r14),%r14
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movzbl (%r14,%rsi,1),%r10d
+ movzbl (%r14,%rdi,1),%r11d
+ movzbl (%r14,%rbp,1),%r12d
+
+ movzbl %dl,%esi
+ movzbl %dh,%edi
+ movzbl %ah,%ebp
+ movzbl (%r14,%rsi,1),%r8d
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+
+ shll $8,%edi
+ shll $8,%ebp
+
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+ shrl $16,%edx
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ shrl $16,%eax
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+
+ shll $8,%esi
+ shll $8,%edi
+ shrl $16,%ebx
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+ shrl $16,%ecx
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+
+ shll $16,%esi
+ shll $16,%edi
+ shll $16,%ebp
+
+ xorl %esi,%r10d
+ xorl %edi,%r11d
+ xorl %ebp,%r12d
+
+ movzbl %bl,%esi
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+
+ shll $16,%esi
+ shll $24,%edi
+ shll $24,%ebp
+
+ xorl %esi,%r8d
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movl 16+12(%r15),%edx
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movl 16+0(%r15),%eax
+
+ shll $24,%esi
+ shll $24,%edi
+
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+
+ movl 16+4(%r15),%ebx
+ movl 16+8(%r15),%ecx
+ leaq -2048(%r14),%r14
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ retq
+
+
+.p2align 4
+_x86_64_AES_decrypt_compact:
+ leaq 128(%r14),%r8
+ movl 0-128(%r8),%edi
+ movl 32-128(%r8),%ebp
+ movl 64-128(%r8),%r10d
+ movl 96-128(%r8),%r11d
+ movl 128-128(%r8),%edi
+ movl 160-128(%r8),%ebp
+ movl 192-128(%r8),%r10d
+ movl 224-128(%r8),%r11d
+ jmp L$dec_loop_compact
+
+.p2align 4
+L$dec_loop_compact:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ leaq 16(%r15),%r15
+ movzbl %al,%r10d
+ movzbl %bl,%r11d
+ movzbl %cl,%r12d
+ movzbl (%r14,%r10,1),%r10d
+ movzbl (%r14,%r11,1),%r11d
+ movzbl (%r14,%r12,1),%r12d
+
+ movzbl %dl,%r8d
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl (%r14,%r8,1),%r8d
+ movzbl (%r14,%rsi,1),%r9d
+ movzbl (%r14,%rdi,1),%r13d
+
+ movzbl %bh,%ebp
+ movzbl %ch,%esi
+ shrl $16,%ecx
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ shrl $16,%edx
+
+ movzbl %cl,%edi
+ shll $8,%r9d
+ shll $8,%r13d
+ movzbl (%r14,%rdi,1),%edi
+ xorl %r9d,%r10d
+ xorl %r13d,%r11d
+
+ movzbl %dl,%r9d
+ shrl $16,%eax
+ shrl $16,%ebx
+ movzbl %al,%r13d
+ shll $8,%ebp
+ shll $8,%esi
+ movzbl (%r14,%r9,1),%r9d
+ movzbl (%r14,%r13,1),%r13d
+ xorl %ebp,%r12d
+ xorl %esi,%r8d
+
+ movzbl %bl,%ebp
+ movzbl %bh,%esi
+ shll $16,%edi
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ xorl %edi,%r10d
+
+ movzbl %ch,%edi
+ shll $16,%r9d
+ shll $16,%r13d
+ movzbl (%r14,%rdi,1),%ebx
+ xorl %r9d,%r11d
+ xorl %r13d,%r12d
+
+ movzbl %dh,%edi
+ shrl $8,%eax
+ shll $16,%ebp
+ movzbl (%r14,%rdi,1),%ecx
+ movzbl (%r14,%rax,1),%edx
+ xorl %ebp,%r8d
+
+ shll $24,%esi
+ shll $24,%ebx
+ shll $24,%ecx
+ xorl %esi,%r10d
+ shll $24,%edx
+ xorl %r11d,%ebx
+ movl %r10d,%eax
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ cmpq 16(%rsp),%r15
+ je L$dec_compact_done
+
+ movq 256+0(%r14),%rsi
+ shlq $32,%rbx
+ shlq $32,%rdx
+ movq 256+8(%r14),%rdi
+ orq %rbx,%rax
+ orq %rdx,%rcx
+ movq 256+16(%r14),%rbp
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+ shrq $7,%r9
+ leaq (%rax,%rax,1),%r8
+ shrq $7,%r12
+ leaq (%rcx,%rcx,1),%r11
+ subq %r9,%rbx
+ subq %r12,%rdx
+ andq %rdi,%r8
+ andq %rdi,%r11
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r8,%rbx
+ xorq %r11,%rdx
+ movq %rbx,%r8
+ movq %rdx,%r11
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ leaq (%r8,%r8,1),%r9
+ shrq $7,%r13
+ leaq (%r11,%r11,1),%r12
+ subq %r10,%rbx
+ subq %r13,%rdx
+ andq %rdi,%r9
+ andq %rdi,%r12
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r9,%rbx
+ xorq %r12,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ xorq %rax,%r8
+ shrq $7,%r13
+ xorq %rcx,%r11
+ subq %r10,%rbx
+ subq %r13,%rdx
+ leaq (%r9,%r9,1),%r10
+ leaq (%r12,%r12,1),%r13
+ xorq %rax,%r9
+ xorq %rcx,%r12
+ andq %rdi,%r10
+ andq %rdi,%r13
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %rbx,%r10
+ xorq %rdx,%r13
+
+ xorq %r10,%rax
+ xorq %r13,%rcx
+ xorq %r10,%r8
+ xorq %r13,%r11
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ xorq %r10,%r9
+ xorq %r13,%r12
+ shrq $32,%rbx
+ shrq $32,%rdx
+ xorq %r8,%r10
+ xorq %r11,%r13
+ roll $8,%eax
+ roll $8,%ecx
+ xorq %r9,%r10
+ xorq %r12,%r13
+
+ roll $8,%ebx
+ roll $8,%edx
+ xorl %r10d,%eax
+ xorl %r13d,%ecx
+ shrq $32,%r10
+ shrq $32,%r13
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+ movq %r8,%r10
+ movq %r11,%r13
+ shrq $32,%r10
+ shrq $32,%r13
+ roll $24,%r8d
+ roll $24,%r11d
+ roll $24,%r10d
+ roll $24,%r13d
+ xorl %r8d,%eax
+ xorl %r11d,%ecx
+ movq %r9,%r8
+ movq %r12,%r11
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+ movq 0(%r14),%rsi
+ shrq $32,%r8
+ shrq $32,%r11
+ movq 64(%r14),%rdi
+ roll $16,%r9d
+ roll $16,%r12d
+ movq 128(%r14),%rbp
+ roll $16,%r8d
+ roll $16,%r11d
+ movq 192(%r14),%r10
+ xorl %r9d,%eax
+ xorl %r12d,%ecx
+ movq 256(%r14),%r13
+ xorl %r8d,%ebx
+ xorl %r11d,%edx
+ jmp L$dec_loop_compact
+.p2align 4
+L$dec_compact_done:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ retq
+
+.globl _AES_decrypt
+
+.p2align 4
+.globl _asm_AES_decrypt
+.private_extern _asm_AES_decrypt
+_asm_AES_decrypt:
+_AES_decrypt:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+
+ movq %rsp,%r10
+ leaq -63(%rdx),%rcx
+ andq $-64,%rsp
+ subq %rsp,%rcx
+ negq %rcx
+ andq $960,%rcx
+ subq %rcx,%rsp
+ subq $32,%rsp
+
+ movq %rsi,16(%rsp)
+ movq %r10,24(%rsp)
+L$dec_prologue:
+
+ movq %rdx,%r15
+ movl 240(%r15),%r13d
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+
+ shll $4,%r13d
+ leaq (%r15,%r13,1),%rbp
+ movq %r15,(%rsp)
+ movq %rbp,8(%rsp)
+
+
+ leaq L$AES_Td+2048(%rip),%r14
+ leaq 768(%rsp),%rbp
+ subq %r14,%rbp
+ andq $768,%rbp
+ leaq (%r14,%rbp,1),%r14
+ shrq $3,%rbp
+ addq %rbp,%r14
+
+ call _x86_64_AES_decrypt_compact
+
+ movq 16(%rsp),%r9
+ movq 24(%rsp),%rsi
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+L$dec_epilogue:
+ retq
+
+.globl _AES_set_encrypt_key
+
+.p2align 4
+_AES_set_encrypt_key:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $8,%rsp
+L$enc_key_prologue:
+
+ call _x86_64_AES_set_encrypt_key
+
+ movq 8(%rsp),%r15
+ movq 16(%rsp),%r14
+ movq 24(%rsp),%r13
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%rbp
+ movq 48(%rsp),%rbx
+ addq $56,%rsp
+L$enc_key_epilogue:
+ retq
+
+
+
+.p2align 4
+_x86_64_AES_set_encrypt_key:
+ movl %esi,%ecx
+ movq %rdi,%rsi
+ movq %rdx,%rdi
+
+ testq $-1,%rsi
+ jz L$badpointer
+ testq $-1,%rdi
+ jz L$badpointer
+
+ leaq L$AES_Te(%rip),%rbp
+ leaq 2048+128(%rbp),%rbp
+
+
+ movl 0-128(%rbp),%eax
+ movl 32-128(%rbp),%ebx
+ movl 64-128(%rbp),%r8d
+ movl 96-128(%rbp),%edx
+ movl 128-128(%rbp),%eax
+ movl 160-128(%rbp),%ebx
+ movl 192-128(%rbp),%r8d
+ movl 224-128(%rbp),%edx
+
+ cmpl $128,%ecx
+ je L$10rounds
+ cmpl $192,%ecx
+ je L$12rounds
+ cmpl $256,%ecx
+ je L$14rounds
+ movq $-2,%rax
+ jmp L$exit
+
+L$10rounds:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rdx,8(%rdi)
+
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp L$10shortcut
+.p2align 2
+L$10loop:
+ movl 0(%rdi),%eax
+ movl 12(%rdi),%edx
+L$10shortcut:
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,16(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,20(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,24(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,28(%rdi)
+ addl $1,%ecx
+ leaq 16(%rdi),%rdi
+ cmpl $10,%ecx
+ jl L$10loop
+
+ movl $10,80(%rdi)
+ xorq %rax,%rax
+ jmp L$exit
+
+L$12rounds:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 16(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rdx,16(%rdi)
+
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp L$12shortcut
+.p2align 2
+L$12loop:
+ movl 0(%rdi),%eax
+ movl 20(%rdi),%edx
+L$12shortcut:
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,24(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,28(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,32(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,36(%rdi)
+
+ cmpl $7,%ecx
+ je L$12break
+ addl $1,%ecx
+
+ xorl 16(%rdi),%eax
+ movl %eax,40(%rdi)
+ xorl 20(%rdi),%eax
+ movl %eax,44(%rdi)
+
+ leaq 24(%rdi),%rdi
+ jmp L$12loop
+L$12break:
+ movl $12,72(%rdi)
+ xorq %rax,%rax
+ jmp L$exit
+
+L$14rounds:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 16(%rsi),%rcx
+ movq 24(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,16(%rdi)
+ movq %rdx,24(%rdi)
+
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp L$14shortcut
+.p2align 2
+L$14loop:
+ movl 0(%rdi),%eax
+ movl 28(%rdi),%edx
+L$14shortcut:
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,32(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,36(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,40(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,44(%rdi)
+
+ cmpl $6,%ecx
+ je L$14break
+ addl $1,%ecx
+
+ movl %eax,%edx
+ movl 16(%rdi),%eax
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ shll $8,%ebx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movl %eax,48(%rdi)
+ xorl 20(%rdi),%eax
+ movl %eax,52(%rdi)
+ xorl 24(%rdi),%eax
+ movl %eax,56(%rdi)
+ xorl 28(%rdi),%eax
+ movl %eax,60(%rdi)
+
+ leaq 32(%rdi),%rdi
+ jmp L$14loop
+L$14break:
+ movl $14,48(%rdi)
+ xorq %rax,%rax
+ jmp L$exit
+
+L$badpointer:
+ movq $-1,%rax
+L$exit:
+ retq
+
+.globl _AES_set_decrypt_key
+
+.p2align 4
+_AES_set_decrypt_key:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rdx
+L$dec_key_prologue:
+
+ call _x86_64_AES_set_encrypt_key
+ movq (%rsp),%r8
+ cmpl $0,%eax
+ jne L$abort
+
+ movl 240(%r8),%r14d
+ xorq %rdi,%rdi
+ leaq (%rdi,%r14,4),%rcx
+ movq %r8,%rsi
+ leaq (%r8,%rcx,4),%rdi
+.p2align 2
+L$invert:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 0(%rdi),%rcx
+ movq 8(%rdi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,0(%rsi)
+ movq %rdx,8(%rsi)
+ leaq 16(%rsi),%rsi
+ leaq -16(%rdi),%rdi
+ cmpq %rsi,%rdi
+ jne L$invert
+
+ leaq L$AES_Te+2048+1024(%rip),%rax
+
+ movq 40(%rax),%rsi
+ movq 48(%rax),%rdi
+ movq 56(%rax),%rbp
+
+ movq %r8,%r15
+ subl $1,%r14d
+.p2align 2
+L$permute:
+ leaq 16(%r15),%r15
+ movq 0(%r15),%rax
+ movq 8(%r15),%rcx
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+ shrq $7,%r9
+ leaq (%rax,%rax,1),%r8
+ shrq $7,%r12
+ leaq (%rcx,%rcx,1),%r11
+ subq %r9,%rbx
+ subq %r12,%rdx
+ andq %rdi,%r8
+ andq %rdi,%r11
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r8,%rbx
+ xorq %r11,%rdx
+ movq %rbx,%r8
+ movq %rdx,%r11
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ leaq (%r8,%r8,1),%r9
+ shrq $7,%r13
+ leaq (%r11,%r11,1),%r12
+ subq %r10,%rbx
+ subq %r13,%rdx
+ andq %rdi,%r9
+ andq %rdi,%r12
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r9,%rbx
+ xorq %r12,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ xorq %rax,%r8
+ shrq $7,%r13
+ xorq %rcx,%r11
+ subq %r10,%rbx
+ subq %r13,%rdx
+ leaq (%r9,%r9,1),%r10
+ leaq (%r12,%r12,1),%r13
+ xorq %rax,%r9
+ xorq %rcx,%r12
+ andq %rdi,%r10
+ andq %rdi,%r13
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %rbx,%r10
+ xorq %rdx,%r13
+
+ xorq %r10,%rax
+ xorq %r13,%rcx
+ xorq %r10,%r8
+ xorq %r13,%r11
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ xorq %r10,%r9
+ xorq %r13,%r12
+ shrq $32,%rbx
+ shrq $32,%rdx
+ xorq %r8,%r10
+ xorq %r11,%r13
+ roll $8,%eax
+ roll $8,%ecx
+ xorq %r9,%r10
+ xorq %r12,%r13
+
+ roll $8,%ebx
+ roll $8,%edx
+ xorl %r10d,%eax
+ xorl %r13d,%ecx
+ shrq $32,%r10
+ shrq $32,%r13
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+ movq %r8,%r10
+ movq %r11,%r13
+ shrq $32,%r10
+ shrq $32,%r13
+ roll $24,%r8d
+ roll $24,%r11d
+ roll $24,%r10d
+ roll $24,%r13d
+ xorl %r8d,%eax
+ xorl %r11d,%ecx
+ movq %r9,%r8
+ movq %r12,%r11
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+
+ shrq $32,%r8
+ shrq $32,%r11
+
+ roll $16,%r9d
+ roll $16,%r12d
+
+ roll $16,%r8d
+ roll $16,%r11d
+
+ xorl %r9d,%eax
+ xorl %r12d,%ecx
+
+ xorl %r8d,%ebx
+ xorl %r11d,%edx
+ movl %eax,0(%r15)
+ movl %ebx,4(%r15)
+ movl %ecx,8(%r15)
+ movl %edx,12(%r15)
+ subl $1,%r14d
+ jnz L$permute
+
+ xorq %rax,%rax
+L$abort:
+ movq 8(%rsp),%r15
+ movq 16(%rsp),%r14
+ movq 24(%rsp),%r13
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%rbp
+ movq 48(%rsp),%rbx
+ addq $56,%rsp
+L$dec_key_epilogue:
+ retq
+
+.globl _AES_cbc_encrypt
+
+.p2align 4
+
+.private_extern _OPENSSL_ia32cap_P
+.globl _asm_AES_cbc_encrypt
+.private_extern _asm_AES_cbc_encrypt
+_asm_AES_cbc_encrypt:
+_AES_cbc_encrypt:
+ cmpq $0,%rdx
+ je L$cbc_epilogue
+ pushfq
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+L$cbc_prologue:
+
+ cld
+ movl %r9d,%r9d
+
+ leaq L$AES_Te(%rip),%r14
+ cmpq $0,%r9
+ jne L$cbc_picked_te
+ leaq L$AES_Td(%rip),%r14
+L$cbc_picked_te:
+
+ movl _OPENSSL_ia32cap_P(%rip),%r10d
+ cmpq $512,%rdx
+ jb L$cbc_slow_prologue
+ testq $15,%rdx
+ jnz L$cbc_slow_prologue
+ btl $IA32CAP_BIT0_HT,%r10d
+ jc L$cbc_slow_prologue
+
+
+ leaq -88-248(%rsp),%r15
+ andq $-64,%r15
+
+
+ movq %r14,%r10
+ leaq 2304(%r14),%r11
+ movq %r15,%r12
+ andq $4095,%r10
+ andq $4095,%r11
+ andq $4095,%r12
+
+ cmpq %r11,%r12
+ jb L$cbc_te_break_out
+ subq %r11,%r12
+ subq %r12,%r15
+ jmp L$cbc_te_ok
+L$cbc_te_break_out:
+ subq %r10,%r12
+ andq $4095,%r12
+ addq $320,%r12
+ subq %r12,%r15
+.p2align 2
+L$cbc_te_ok:
+
+ xchgq %rsp,%r15
+
+ movq %r15,16(%rsp)
+L$cbc_fast_body:
+ movq %rdi,24(%rsp)
+ movq %rsi,32(%rsp)
+ movq %rdx,40(%rsp)
+ movq %rcx,48(%rsp)
+ movq %r8,56(%rsp)
+ movl $0,80+240(%rsp)
+ movq %r8,%rbp
+ movq %r9,%rbx
+ movq %rsi,%r9
+ movq %rdi,%r8
+ movq %rcx,%r15
+
+ movl 240(%r15),%eax
+
+ movq %r15,%r10
+ subq %r14,%r10
+ andq $4095,%r10
+ cmpq $2304,%r10
+ jb L$cbc_do_ecopy
+ cmpq $4096-248,%r10
+ jb L$cbc_skip_ecopy
+.p2align 2
+L$cbc_do_ecopy:
+ movq %r15,%rsi
+ leaq 80(%rsp),%rdi
+ leaq 80(%rsp),%r15
+ movl $30,%ecx
+.long 0x90A548F3
+ movl %eax,(%rdi)
+L$cbc_skip_ecopy:
+ movq %r15,0(%rsp)
+
+ movl $18,%ecx
+.p2align 2
+L$cbc_prefetch_te:
+ movq 0(%r14),%r10
+ movq 32(%r14),%r11
+ movq 64(%r14),%r12
+ movq 96(%r14),%r13
+ leaq 128(%r14),%r14
+ subl $1,%ecx
+ jnz L$cbc_prefetch_te
+ leaq -2304(%r14),%r14
+
+ cmpq $0,%rbx
+ je L$FAST_DECRYPT
+
+
+ movl 0(%rbp),%eax
+ movl 4(%rbp),%ebx
+ movl 8(%rbp),%ecx
+ movl 12(%rbp),%edx
+
+.p2align 2
+L$cbc_fast_enc_loop:
+ xorl 0(%r8),%eax
+ xorl 4(%r8),%ebx
+ xorl 8(%r8),%ecx
+ xorl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+
+ call _x86_64_AES_encrypt
+
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ subq $16,%r10
+ testq $-16,%r10
+ movq %r10,40(%rsp)
+ jnz L$cbc_fast_enc_loop
+ movq 56(%rsp),%rbp
+ movl %eax,0(%rbp)
+ movl %ebx,4(%rbp)
+ movl %ecx,8(%rbp)
+ movl %edx,12(%rbp)
+
+ jmp L$cbc_fast_cleanup
+
+
+.p2align 4
+L$FAST_DECRYPT:
+ cmpq %r8,%r9
+ je L$cbc_fast_dec_in_place
+
+ movq %rbp,64(%rsp)
+.p2align 2
+L$cbc_fast_dec_loop:
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+
+ call _x86_64_AES_decrypt
+
+ movq 64(%rsp),%rbp
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ xorl 0(%rbp),%eax
+ xorl 4(%rbp),%ebx
+ xorl 8(%rbp),%ecx
+ xorl 12(%rbp),%edx
+ movq %r8,%rbp
+
+ subq $16,%r10
+ movq %r10,40(%rsp)
+ movq %rbp,64(%rsp)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ jnz L$cbc_fast_dec_loop
+ movq 56(%rsp),%r12
+ movq 0(%rbp),%r10
+ movq 8(%rbp),%r11
+ movq %r10,0(%r12)
+ movq %r11,8(%r12)
+ jmp L$cbc_fast_cleanup
+
+.p2align 4
+L$cbc_fast_dec_in_place:
+ movq 0(%rbp),%r10
+ movq 8(%rbp),%r11
+ movq %r10,0+64(%rsp)
+ movq %r11,8+64(%rsp)
+.p2align 2
+L$cbc_fast_dec_in_place_loop:
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+
+ call _x86_64_AES_decrypt
+
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ xorl 0+64(%rsp),%eax
+ xorl 4+64(%rsp),%ebx
+ xorl 8+64(%rsp),%ecx
+ xorl 12+64(%rsp),%edx
+
+ movq 0(%r8),%r11
+ movq 8(%r8),%r12
+ subq $16,%r10
+ jz L$cbc_fast_dec_in_place_done
+
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ movq %r10,40(%rsp)
+ jmp L$cbc_fast_dec_in_place_loop
+L$cbc_fast_dec_in_place_done:
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+.p2align 2
+L$cbc_fast_cleanup:
+ cmpl $0,80+240(%rsp)
+ leaq 80(%rsp),%rdi
+ je L$cbc_exit
+ movl $30,%ecx
+ xorq %rax,%rax
+.long 0x90AB48F3
+
+ jmp L$cbc_exit
+
+
+.p2align 4
+L$cbc_slow_prologue:
+
+ leaq -88(%rsp),%rbp
+ andq $-64,%rbp
+
+ leaq -88-63(%rcx),%r10
+ subq %rbp,%r10
+ negq %r10
+ andq $960,%r10
+ subq %r10,%rbp
+
+ xchgq %rsp,%rbp
+
+ movq %rbp,16(%rsp)
+L$cbc_slow_body:
+
+
+
+
+ movq %r8,56(%rsp)
+ movq %r8,%rbp
+ movq %r9,%rbx
+ movq %rsi,%r9
+ movq %rdi,%r8
+ movq %rcx,%r15
+ movq %rdx,%r10
+
+ movl 240(%r15),%eax
+ movq %r15,0(%rsp)
+ shll $4,%eax
+ leaq (%r15,%rax,1),%rax
+ movq %rax,8(%rsp)
+
+
+ leaq 2048(%r14),%r14
+ leaq 768-8(%rsp),%rax
+ subq %r14,%rax
+ andq $768,%rax
+ leaq (%r14,%rax,1),%r14
+
+ cmpq $0,%rbx
+ je L$SLOW_DECRYPT
+
+
+ testq $-16,%r10
+ movl 0(%rbp),%eax
+ movl 4(%rbp),%ebx
+ movl 8(%rbp),%ecx
+ movl 12(%rbp),%edx
+ jz L$cbc_slow_enc_tail
+
+.p2align 2
+L$cbc_slow_enc_loop:
+ xorl 0(%r8),%eax
+ xorl 4(%r8),%ebx
+ xorl 8(%r8),%ecx
+ xorl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ movq %r9,32(%rsp)
+ movq %r10,40(%rsp)
+
+ call _x86_64_AES_encrypt_compact
+
+ movq 24(%rsp),%r8
+ movq 32(%rsp),%r9
+ movq 40(%rsp),%r10
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ subq $16,%r10
+ testq $-16,%r10
+ jnz L$cbc_slow_enc_loop
+ testq $15,%r10
+ jnz L$cbc_slow_enc_tail
+ movq 56(%rsp),%rbp
+ movl %eax,0(%rbp)
+ movl %ebx,4(%rbp)
+ movl %ecx,8(%rbp)
+ movl %edx,12(%rbp)
+
+ jmp L$cbc_exit
+
+.p2align 2
+L$cbc_slow_enc_tail:
+ movq %rax,%r11
+ movq %rcx,%r12
+ movq %r10,%rcx
+ movq %r8,%rsi
+ movq %r9,%rdi
+.long 0x9066A4F3
+ movq $16,%rcx
+ subq %r10,%rcx
+ xorq %rax,%rax
+.long 0x9066AAF3
+ movq %r9,%r8
+ movq $16,%r10
+ movq %r11,%rax
+ movq %r12,%rcx
+ jmp L$cbc_slow_enc_loop
+
+.p2align 4
+L$SLOW_DECRYPT:
+ shrq $3,%rax
+ addq %rax,%r14
+
+ movq 0(%rbp),%r11
+ movq 8(%rbp),%r12
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+
+.p2align 2
+L$cbc_slow_dec_loop:
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ movq %r9,32(%rsp)
+ movq %r10,40(%rsp)
+
+ call _x86_64_AES_decrypt_compact
+
+ movq 24(%rsp),%r8
+ movq 32(%rsp),%r9
+ movq 40(%rsp),%r10
+ xorl 0+64(%rsp),%eax
+ xorl 4+64(%rsp),%ebx
+ xorl 8+64(%rsp),%ecx
+ xorl 12+64(%rsp),%edx
+
+ movq 0(%r8),%r11
+ movq 8(%r8),%r12
+ subq $16,%r10
+ jc L$cbc_slow_dec_partial
+ jz L$cbc_slow_dec_done
+
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ jmp L$cbc_slow_dec_loop
+L$cbc_slow_dec_done:
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ jmp L$cbc_exit
+
+.p2align 2
+L$cbc_slow_dec_partial:
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+
+ movl %eax,0+64(%rsp)
+ movl %ebx,4+64(%rsp)
+ movl %ecx,8+64(%rsp)
+ movl %edx,12+64(%rsp)
+
+ movq %r9,%rdi
+ leaq 64(%rsp),%rsi
+ leaq 16(%r10),%rcx
+.long 0x9066A4F3
+ jmp L$cbc_exit
+
+.p2align 4
+L$cbc_exit:
+ movq 16(%rsp),%rsi
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+L$cbc_popfq:
+ popfq
+L$cbc_epilogue:
+ retq
+
+.p2align 6
+L$AES_Te:
+.long 0xa56363c6,0xa56363c6
+.long 0x847c7cf8,0x847c7cf8
+.long 0x997777ee,0x997777ee
+.long 0x8d7b7bf6,0x8d7b7bf6
+.long 0x0df2f2ff,0x0df2f2ff
+.long 0xbd6b6bd6,0xbd6b6bd6
+.long 0xb16f6fde,0xb16f6fde
+.long 0x54c5c591,0x54c5c591
+.long 0x50303060,0x50303060
+.long 0x03010102,0x03010102
+.long 0xa96767ce,0xa96767ce
+.long 0x7d2b2b56,0x7d2b2b56
+.long 0x19fefee7,0x19fefee7
+.long 0x62d7d7b5,0x62d7d7b5
+.long 0xe6abab4d,0xe6abab4d
+.long 0x9a7676ec,0x9a7676ec
+.long 0x45caca8f,0x45caca8f
+.long 0x9d82821f,0x9d82821f
+.long 0x40c9c989,0x40c9c989
+.long 0x877d7dfa,0x877d7dfa
+.long 0x15fafaef,0x15fafaef
+.long 0xeb5959b2,0xeb5959b2
+.long 0xc947478e,0xc947478e
+.long 0x0bf0f0fb,0x0bf0f0fb
+.long 0xecadad41,0xecadad41
+.long 0x67d4d4b3,0x67d4d4b3
+.long 0xfda2a25f,0xfda2a25f
+.long 0xeaafaf45,0xeaafaf45
+.long 0xbf9c9c23,0xbf9c9c23
+.long 0xf7a4a453,0xf7a4a453
+.long 0x967272e4,0x967272e4
+.long 0x5bc0c09b,0x5bc0c09b
+.long 0xc2b7b775,0xc2b7b775
+.long 0x1cfdfde1,0x1cfdfde1
+.long 0xae93933d,0xae93933d
+.long 0x6a26264c,0x6a26264c
+.long 0x5a36366c,0x5a36366c
+.long 0x413f3f7e,0x413f3f7e
+.long 0x02f7f7f5,0x02f7f7f5
+.long 0x4fcccc83,0x4fcccc83
+.long 0x5c343468,0x5c343468
+.long 0xf4a5a551,0xf4a5a551
+.long 0x34e5e5d1,0x34e5e5d1
+.long 0x08f1f1f9,0x08f1f1f9
+.long 0x937171e2,0x937171e2
+.long 0x73d8d8ab,0x73d8d8ab
+.long 0x53313162,0x53313162
+.long 0x3f15152a,0x3f15152a
+.long 0x0c040408,0x0c040408
+.long 0x52c7c795,0x52c7c795
+.long 0x65232346,0x65232346
+.long 0x5ec3c39d,0x5ec3c39d
+.long 0x28181830,0x28181830
+.long 0xa1969637,0xa1969637
+.long 0x0f05050a,0x0f05050a
+.long 0xb59a9a2f,0xb59a9a2f
+.long 0x0907070e,0x0907070e
+.long 0x36121224,0x36121224
+.long 0x9b80801b,0x9b80801b
+.long 0x3de2e2df,0x3de2e2df
+.long 0x26ebebcd,0x26ebebcd
+.long 0x6927274e,0x6927274e
+.long 0xcdb2b27f,0xcdb2b27f
+.long 0x9f7575ea,0x9f7575ea
+.long 0x1b090912,0x1b090912
+.long 0x9e83831d,0x9e83831d
+.long 0x742c2c58,0x742c2c58
+.long 0x2e1a1a34,0x2e1a1a34
+.long 0x2d1b1b36,0x2d1b1b36
+.long 0xb26e6edc,0xb26e6edc
+.long 0xee5a5ab4,0xee5a5ab4
+.long 0xfba0a05b,0xfba0a05b
+.long 0xf65252a4,0xf65252a4
+.long 0x4d3b3b76,0x4d3b3b76
+.long 0x61d6d6b7,0x61d6d6b7
+.long 0xceb3b37d,0xceb3b37d
+.long 0x7b292952,0x7b292952
+.long 0x3ee3e3dd,0x3ee3e3dd
+.long 0x712f2f5e,0x712f2f5e
+.long 0x97848413,0x97848413
+.long 0xf55353a6,0xf55353a6
+.long 0x68d1d1b9,0x68d1d1b9
+.long 0x00000000,0x00000000
+.long 0x2cededc1,0x2cededc1
+.long 0x60202040,0x60202040
+.long 0x1ffcfce3,0x1ffcfce3
+.long 0xc8b1b179,0xc8b1b179
+.long 0xed5b5bb6,0xed5b5bb6
+.long 0xbe6a6ad4,0xbe6a6ad4
+.long 0x46cbcb8d,0x46cbcb8d
+.long 0xd9bebe67,0xd9bebe67
+.long 0x4b393972,0x4b393972
+.long 0xde4a4a94,0xde4a4a94
+.long 0xd44c4c98,0xd44c4c98
+.long 0xe85858b0,0xe85858b0
+.long 0x4acfcf85,0x4acfcf85
+.long 0x6bd0d0bb,0x6bd0d0bb
+.long 0x2aefefc5,0x2aefefc5
+.long 0xe5aaaa4f,0xe5aaaa4f
+.long 0x16fbfbed,0x16fbfbed
+.long 0xc5434386,0xc5434386
+.long 0xd74d4d9a,0xd74d4d9a
+.long 0x55333366,0x55333366
+.long 0x94858511,0x94858511
+.long 0xcf45458a,0xcf45458a
+.long 0x10f9f9e9,0x10f9f9e9
+.long 0x06020204,0x06020204
+.long 0x817f7ffe,0x817f7ffe
+.long 0xf05050a0,0xf05050a0
+.long 0x443c3c78,0x443c3c78
+.long 0xba9f9f25,0xba9f9f25
+.long 0xe3a8a84b,0xe3a8a84b
+.long 0xf35151a2,0xf35151a2
+.long 0xfea3a35d,0xfea3a35d
+.long 0xc0404080,0xc0404080
+.long 0x8a8f8f05,0x8a8f8f05
+.long 0xad92923f,0xad92923f
+.long 0xbc9d9d21,0xbc9d9d21
+.long 0x48383870,0x48383870
+.long 0x04f5f5f1,0x04f5f5f1
+.long 0xdfbcbc63,0xdfbcbc63
+.long 0xc1b6b677,0xc1b6b677
+.long 0x75dadaaf,0x75dadaaf
+.long 0x63212142,0x63212142
+.long 0x30101020,0x30101020
+.long 0x1affffe5,0x1affffe5
+.long 0x0ef3f3fd,0x0ef3f3fd
+.long 0x6dd2d2bf,0x6dd2d2bf
+.long 0x4ccdcd81,0x4ccdcd81
+.long 0x140c0c18,0x140c0c18
+.long 0x35131326,0x35131326
+.long 0x2fececc3,0x2fececc3
+.long 0xe15f5fbe,0xe15f5fbe
+.long 0xa2979735,0xa2979735
+.long 0xcc444488,0xcc444488
+.long 0x3917172e,0x3917172e
+.long 0x57c4c493,0x57c4c493
+.long 0xf2a7a755,0xf2a7a755
+.long 0x827e7efc,0x827e7efc
+.long 0x473d3d7a,0x473d3d7a
+.long 0xac6464c8,0xac6464c8
+.long 0xe75d5dba,0xe75d5dba
+.long 0x2b191932,0x2b191932
+.long 0x957373e6,0x957373e6
+.long 0xa06060c0,0xa06060c0
+.long 0x98818119,0x98818119
+.long 0xd14f4f9e,0xd14f4f9e
+.long 0x7fdcdca3,0x7fdcdca3
+.long 0x66222244,0x66222244
+.long 0x7e2a2a54,0x7e2a2a54
+.long 0xab90903b,0xab90903b
+.long 0x8388880b,0x8388880b
+.long 0xca46468c,0xca46468c
+.long 0x29eeeec7,0x29eeeec7
+.long 0xd3b8b86b,0xd3b8b86b
+.long 0x3c141428,0x3c141428
+.long 0x79dedea7,0x79dedea7
+.long 0xe25e5ebc,0xe25e5ebc
+.long 0x1d0b0b16,0x1d0b0b16
+.long 0x76dbdbad,0x76dbdbad
+.long 0x3be0e0db,0x3be0e0db
+.long 0x56323264,0x56323264
+.long 0x4e3a3a74,0x4e3a3a74
+.long 0x1e0a0a14,0x1e0a0a14
+.long 0xdb494992,0xdb494992
+.long 0x0a06060c,0x0a06060c
+.long 0x6c242448,0x6c242448
+.long 0xe45c5cb8,0xe45c5cb8
+.long 0x5dc2c29f,0x5dc2c29f
+.long 0x6ed3d3bd,0x6ed3d3bd
+.long 0xefacac43,0xefacac43
+.long 0xa66262c4,0xa66262c4
+.long 0xa8919139,0xa8919139
+.long 0xa4959531,0xa4959531
+.long 0x37e4e4d3,0x37e4e4d3
+.long 0x8b7979f2,0x8b7979f2
+.long 0x32e7e7d5,0x32e7e7d5
+.long 0x43c8c88b,0x43c8c88b
+.long 0x5937376e,0x5937376e
+.long 0xb76d6dda,0xb76d6dda
+.long 0x8c8d8d01,0x8c8d8d01
+.long 0x64d5d5b1,0x64d5d5b1
+.long 0xd24e4e9c,0xd24e4e9c
+.long 0xe0a9a949,0xe0a9a949
+.long 0xb46c6cd8,0xb46c6cd8
+.long 0xfa5656ac,0xfa5656ac
+.long 0x07f4f4f3,0x07f4f4f3
+.long 0x25eaeacf,0x25eaeacf
+.long 0xaf6565ca,0xaf6565ca
+.long 0x8e7a7af4,0x8e7a7af4
+.long 0xe9aeae47,0xe9aeae47
+.long 0x18080810,0x18080810
+.long 0xd5baba6f,0xd5baba6f
+.long 0x887878f0,0x887878f0
+.long 0x6f25254a,0x6f25254a
+.long 0x722e2e5c,0x722e2e5c
+.long 0x241c1c38,0x241c1c38
+.long 0xf1a6a657,0xf1a6a657
+.long 0xc7b4b473,0xc7b4b473
+.long 0x51c6c697,0x51c6c697
+.long 0x23e8e8cb,0x23e8e8cb
+.long 0x7cdddda1,0x7cdddda1
+.long 0x9c7474e8,0x9c7474e8
+.long 0x211f1f3e,0x211f1f3e
+.long 0xdd4b4b96,0xdd4b4b96
+.long 0xdcbdbd61,0xdcbdbd61
+.long 0x868b8b0d,0x868b8b0d
+.long 0x858a8a0f,0x858a8a0f
+.long 0x907070e0,0x907070e0
+.long 0x423e3e7c,0x423e3e7c
+.long 0xc4b5b571,0xc4b5b571
+.long 0xaa6666cc,0xaa6666cc
+.long 0xd8484890,0xd8484890
+.long 0x05030306,0x05030306
+.long 0x01f6f6f7,0x01f6f6f7
+.long 0x120e0e1c,0x120e0e1c
+.long 0xa36161c2,0xa36161c2
+.long 0x5f35356a,0x5f35356a
+.long 0xf95757ae,0xf95757ae
+.long 0xd0b9b969,0xd0b9b969
+.long 0x91868617,0x91868617
+.long 0x58c1c199,0x58c1c199
+.long 0x271d1d3a,0x271d1d3a
+.long 0xb99e9e27,0xb99e9e27
+.long 0x38e1e1d9,0x38e1e1d9
+.long 0x13f8f8eb,0x13f8f8eb
+.long 0xb398982b,0xb398982b
+.long 0x33111122,0x33111122
+.long 0xbb6969d2,0xbb6969d2
+.long 0x70d9d9a9,0x70d9d9a9
+.long 0x898e8e07,0x898e8e07
+.long 0xa7949433,0xa7949433
+.long 0xb69b9b2d,0xb69b9b2d
+.long 0x221e1e3c,0x221e1e3c
+.long 0x92878715,0x92878715
+.long 0x20e9e9c9,0x20e9e9c9
+.long 0x49cece87,0x49cece87
+.long 0xff5555aa,0xff5555aa
+.long 0x78282850,0x78282850
+.long 0x7adfdfa5,0x7adfdfa5
+.long 0x8f8c8c03,0x8f8c8c03
+.long 0xf8a1a159,0xf8a1a159
+.long 0x80898909,0x80898909
+.long 0x170d0d1a,0x170d0d1a
+.long 0xdabfbf65,0xdabfbf65
+.long 0x31e6e6d7,0x31e6e6d7
+.long 0xc6424284,0xc6424284
+.long 0xb86868d0,0xb86868d0
+.long 0xc3414182,0xc3414182
+.long 0xb0999929,0xb0999929
+.long 0x772d2d5a,0x772d2d5a
+.long 0x110f0f1e,0x110f0f1e
+.long 0xcbb0b07b,0xcbb0b07b
+.long 0xfc5454a8,0xfc5454a8
+.long 0xd6bbbb6d,0xd6bbbb6d
+.long 0x3a16162c,0x3a16162c
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.long 0x00000001, 0x00000002, 0x00000004, 0x00000008
+.long 0x00000010, 0x00000020, 0x00000040, 0x00000080
+.long 0x0000001b, 0x00000036, 0x80808080, 0x80808080
+.long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b
+.p2align 6
+L$AES_Td:
+.long 0x50a7f451,0x50a7f451
+.long 0x5365417e,0x5365417e
+.long 0xc3a4171a,0xc3a4171a
+.long 0x965e273a,0x965e273a
+.long 0xcb6bab3b,0xcb6bab3b
+.long 0xf1459d1f,0xf1459d1f
+.long 0xab58faac,0xab58faac
+.long 0x9303e34b,0x9303e34b
+.long 0x55fa3020,0x55fa3020
+.long 0xf66d76ad,0xf66d76ad
+.long 0x9176cc88,0x9176cc88
+.long 0x254c02f5,0x254c02f5
+.long 0xfcd7e54f,0xfcd7e54f
+.long 0xd7cb2ac5,0xd7cb2ac5
+.long 0x80443526,0x80443526
+.long 0x8fa362b5,0x8fa362b5
+.long 0x495ab1de,0x495ab1de
+.long 0x671bba25,0x671bba25
+.long 0x980eea45,0x980eea45
+.long 0xe1c0fe5d,0xe1c0fe5d
+.long 0x02752fc3,0x02752fc3
+.long 0x12f04c81,0x12f04c81
+.long 0xa397468d,0xa397468d
+.long 0xc6f9d36b,0xc6f9d36b
+.long 0xe75f8f03,0xe75f8f03
+.long 0x959c9215,0x959c9215
+.long 0xeb7a6dbf,0xeb7a6dbf
+.long 0xda595295,0xda595295
+.long 0x2d83bed4,0x2d83bed4
+.long 0xd3217458,0xd3217458
+.long 0x2969e049,0x2969e049
+.long 0x44c8c98e,0x44c8c98e
+.long 0x6a89c275,0x6a89c275
+.long 0x78798ef4,0x78798ef4
+.long 0x6b3e5899,0x6b3e5899
+.long 0xdd71b927,0xdd71b927
+.long 0xb64fe1be,0xb64fe1be
+.long 0x17ad88f0,0x17ad88f0
+.long 0x66ac20c9,0x66ac20c9
+.long 0xb43ace7d,0xb43ace7d
+.long 0x184adf63,0x184adf63
+.long 0x82311ae5,0x82311ae5
+.long 0x60335197,0x60335197
+.long 0x457f5362,0x457f5362
+.long 0xe07764b1,0xe07764b1
+.long 0x84ae6bbb,0x84ae6bbb
+.long 0x1ca081fe,0x1ca081fe
+.long 0x942b08f9,0x942b08f9
+.long 0x58684870,0x58684870
+.long 0x19fd458f,0x19fd458f
+.long 0x876cde94,0x876cde94
+.long 0xb7f87b52,0xb7f87b52
+.long 0x23d373ab,0x23d373ab
+.long 0xe2024b72,0xe2024b72
+.long 0x578f1fe3,0x578f1fe3
+.long 0x2aab5566,0x2aab5566
+.long 0x0728ebb2,0x0728ebb2
+.long 0x03c2b52f,0x03c2b52f
+.long 0x9a7bc586,0x9a7bc586
+.long 0xa50837d3,0xa50837d3
+.long 0xf2872830,0xf2872830
+.long 0xb2a5bf23,0xb2a5bf23
+.long 0xba6a0302,0xba6a0302
+.long 0x5c8216ed,0x5c8216ed
+.long 0x2b1ccf8a,0x2b1ccf8a
+.long 0x92b479a7,0x92b479a7
+.long 0xf0f207f3,0xf0f207f3
+.long 0xa1e2694e,0xa1e2694e
+.long 0xcdf4da65,0xcdf4da65
+.long 0xd5be0506,0xd5be0506
+.long 0x1f6234d1,0x1f6234d1
+.long 0x8afea6c4,0x8afea6c4
+.long 0x9d532e34,0x9d532e34
+.long 0xa055f3a2,0xa055f3a2
+.long 0x32e18a05,0x32e18a05
+.long 0x75ebf6a4,0x75ebf6a4
+.long 0x39ec830b,0x39ec830b
+.long 0xaaef6040,0xaaef6040
+.long 0x069f715e,0x069f715e
+.long 0x51106ebd,0x51106ebd
+.long 0xf98a213e,0xf98a213e
+.long 0x3d06dd96,0x3d06dd96
+.long 0xae053edd,0xae053edd
+.long 0x46bde64d,0x46bde64d
+.long 0xb58d5491,0xb58d5491
+.long 0x055dc471,0x055dc471
+.long 0x6fd40604,0x6fd40604
+.long 0xff155060,0xff155060
+.long 0x24fb9819,0x24fb9819
+.long 0x97e9bdd6,0x97e9bdd6
+.long 0xcc434089,0xcc434089
+.long 0x779ed967,0x779ed967
+.long 0xbd42e8b0,0xbd42e8b0
+.long 0x888b8907,0x888b8907
+.long 0x385b19e7,0x385b19e7
+.long 0xdbeec879,0xdbeec879
+.long 0x470a7ca1,0x470a7ca1
+.long 0xe90f427c,0xe90f427c
+.long 0xc91e84f8,0xc91e84f8
+.long 0x00000000,0x00000000
+.long 0x83868009,0x83868009
+.long 0x48ed2b32,0x48ed2b32
+.long 0xac70111e,0xac70111e
+.long 0x4e725a6c,0x4e725a6c
+.long 0xfbff0efd,0xfbff0efd
+.long 0x5638850f,0x5638850f
+.long 0x1ed5ae3d,0x1ed5ae3d
+.long 0x27392d36,0x27392d36
+.long 0x64d90f0a,0x64d90f0a
+.long 0x21a65c68,0x21a65c68
+.long 0xd1545b9b,0xd1545b9b
+.long 0x3a2e3624,0x3a2e3624
+.long 0xb1670a0c,0xb1670a0c
+.long 0x0fe75793,0x0fe75793
+.long 0xd296eeb4,0xd296eeb4
+.long 0x9e919b1b,0x9e919b1b
+.long 0x4fc5c080,0x4fc5c080
+.long 0xa220dc61,0xa220dc61
+.long 0x694b775a,0x694b775a
+.long 0x161a121c,0x161a121c
+.long 0x0aba93e2,0x0aba93e2
+.long 0xe52aa0c0,0xe52aa0c0
+.long 0x43e0223c,0x43e0223c
+.long 0x1d171b12,0x1d171b12
+.long 0x0b0d090e,0x0b0d090e
+.long 0xadc78bf2,0xadc78bf2
+.long 0xb9a8b62d,0xb9a8b62d
+.long 0xc8a91e14,0xc8a91e14
+.long 0x8519f157,0x8519f157
+.long 0x4c0775af,0x4c0775af
+.long 0xbbdd99ee,0xbbdd99ee
+.long 0xfd607fa3,0xfd607fa3
+.long 0x9f2601f7,0x9f2601f7
+.long 0xbcf5725c,0xbcf5725c
+.long 0xc53b6644,0xc53b6644
+.long 0x347efb5b,0x347efb5b
+.long 0x7629438b,0x7629438b
+.long 0xdcc623cb,0xdcc623cb
+.long 0x68fcedb6,0x68fcedb6
+.long 0x63f1e4b8,0x63f1e4b8
+.long 0xcadc31d7,0xcadc31d7
+.long 0x10856342,0x10856342
+.long 0x40229713,0x40229713
+.long 0x2011c684,0x2011c684
+.long 0x7d244a85,0x7d244a85
+.long 0xf83dbbd2,0xf83dbbd2
+.long 0x1132f9ae,0x1132f9ae
+.long 0x6da129c7,0x6da129c7
+.long 0x4b2f9e1d,0x4b2f9e1d
+.long 0xf330b2dc,0xf330b2dc
+.long 0xec52860d,0xec52860d
+.long 0xd0e3c177,0xd0e3c177
+.long 0x6c16b32b,0x6c16b32b
+.long 0x99b970a9,0x99b970a9
+.long 0xfa489411,0xfa489411
+.long 0x2264e947,0x2264e947
+.long 0xc48cfca8,0xc48cfca8
+.long 0x1a3ff0a0,0x1a3ff0a0
+.long 0xd82c7d56,0xd82c7d56
+.long 0xef903322,0xef903322
+.long 0xc74e4987,0xc74e4987
+.long 0xc1d138d9,0xc1d138d9
+.long 0xfea2ca8c,0xfea2ca8c
+.long 0x360bd498,0x360bd498
+.long 0xcf81f5a6,0xcf81f5a6
+.long 0x28de7aa5,0x28de7aa5
+.long 0x268eb7da,0x268eb7da
+.long 0xa4bfad3f,0xa4bfad3f
+.long 0xe49d3a2c,0xe49d3a2c
+.long 0x0d927850,0x0d927850
+.long 0x9bcc5f6a,0x9bcc5f6a
+.long 0x62467e54,0x62467e54
+.long 0xc2138df6,0xc2138df6
+.long 0xe8b8d890,0xe8b8d890
+.long 0x5ef7392e,0x5ef7392e
+.long 0xf5afc382,0xf5afc382
+.long 0xbe805d9f,0xbe805d9f
+.long 0x7c93d069,0x7c93d069
+.long 0xa92dd56f,0xa92dd56f
+.long 0xb31225cf,0xb31225cf
+.long 0x3b99acc8,0x3b99acc8
+.long 0xa77d1810,0xa77d1810
+.long 0x6e639ce8,0x6e639ce8
+.long 0x7bbb3bdb,0x7bbb3bdb
+.long 0x097826cd,0x097826cd
+.long 0xf418596e,0xf418596e
+.long 0x01b79aec,0x01b79aec
+.long 0xa89a4f83,0xa89a4f83
+.long 0x656e95e6,0x656e95e6
+.long 0x7ee6ffaa,0x7ee6ffaa
+.long 0x08cfbc21,0x08cfbc21
+.long 0xe6e815ef,0xe6e815ef
+.long 0xd99be7ba,0xd99be7ba
+.long 0xce366f4a,0xce366f4a
+.long 0xd4099fea,0xd4099fea
+.long 0xd67cb029,0xd67cb029
+.long 0xafb2a431,0xafb2a431
+.long 0x31233f2a,0x31233f2a
+.long 0x3094a5c6,0x3094a5c6
+.long 0xc066a235,0xc066a235
+.long 0x37bc4e74,0x37bc4e74
+.long 0xa6ca82fc,0xa6ca82fc
+.long 0xb0d090e0,0xb0d090e0
+.long 0x15d8a733,0x15d8a733
+.long 0x4a9804f1,0x4a9804f1
+.long 0xf7daec41,0xf7daec41
+.long 0x0e50cd7f,0x0e50cd7f
+.long 0x2ff69117,0x2ff69117
+.long 0x8dd64d76,0x8dd64d76
+.long 0x4db0ef43,0x4db0ef43
+.long 0x544daacc,0x544daacc
+.long 0xdf0496e4,0xdf0496e4
+.long 0xe3b5d19e,0xe3b5d19e
+.long 0x1b886a4c,0x1b886a4c
+.long 0xb81f2cc1,0xb81f2cc1
+.long 0x7f516546,0x7f516546
+.long 0x04ea5e9d,0x04ea5e9d
+.long 0x5d358c01,0x5d358c01
+.long 0x737487fa,0x737487fa
+.long 0x2e410bfb,0x2e410bfb
+.long 0x5a1d67b3,0x5a1d67b3
+.long 0x52d2db92,0x52d2db92
+.long 0x335610e9,0x335610e9
+.long 0x1347d66d,0x1347d66d
+.long 0x8c61d79a,0x8c61d79a
+.long 0x7a0ca137,0x7a0ca137
+.long 0x8e14f859,0x8e14f859
+.long 0x893c13eb,0x893c13eb
+.long 0xee27a9ce,0xee27a9ce
+.long 0x35c961b7,0x35c961b7
+.long 0xede51ce1,0xede51ce1
+.long 0x3cb1477a,0x3cb1477a
+.long 0x59dfd29c,0x59dfd29c
+.long 0x3f73f255,0x3f73f255
+.long 0x79ce1418,0x79ce1418
+.long 0xbf37c773,0xbf37c773
+.long 0xeacdf753,0xeacdf753
+.long 0x5baafd5f,0x5baafd5f
+.long 0x146f3ddf,0x146f3ddf
+.long 0x86db4478,0x86db4478
+.long 0x81f3afca,0x81f3afca
+.long 0x3ec468b9,0x3ec468b9
+.long 0x2c342438,0x2c342438
+.long 0x5f40a3c2,0x5f40a3c2
+.long 0x72c31d16,0x72c31d16
+.long 0x0c25e2bc,0x0c25e2bc
+.long 0x8b493c28,0x8b493c28
+.long 0x41950dff,0x41950dff
+.long 0x7101a839,0x7101a839
+.long 0xdeb30c08,0xdeb30c08
+.long 0x9ce4b4d8,0x9ce4b4d8
+.long 0x90c15664,0x90c15664
+.long 0x6184cb7b,0x6184cb7b
+.long 0x70b632d5,0x70b632d5
+.long 0x745c6c48,0x745c6c48
+.long 0x4257b8d0,0x4257b8d0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align 6
diff --git a/ext/libressl/crypto/aes/aes-masm-x86_64.S b/ext/libressl/crypto/aes/aes-masm-x86_64.S
new file mode 100644
index 0000000..9094c72
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes-masm-x86_64.S
@@ -0,0 +1,2948 @@
+; 1 "crypto/aes/aes-masm-x86_64.S.tmp"
+; 1 "<built-in>" 1
+; 1 "<built-in>" 3
+; 340 "<built-in>" 3
+; 1 "<command line>" 1
+; 1 "<built-in>" 2
+; 1 "crypto/aes/aes-masm-x86_64.S.tmp" 2
+OPTION DOTNAME
+
+; 1 "./crypto/x86_arch.h" 1
+
+
+; 16 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+; 40 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+; 3 "crypto/aes/aes-masm-x86_64.S.tmp" 2
+
+.text$ SEGMENT ALIGN(64) 'CODE'
+
+ALIGN 16
+_x86_64_AES_encrypt PROC PRIVATE
+ xor eax,DWORD PTR[r15]
+ xor ebx,DWORD PTR[4+r15]
+ xor ecx,DWORD PTR[8+r15]
+ xor edx,DWORD PTR[12+r15]
+
+ mov r13d,DWORD PTR[240+r15]
+ sub r13d,1
+ jmp $L$enc_loop
+ALIGN 16
+$L$enc_loop::
+
+ movzx esi,al
+ movzx edi,bl
+ movzx ebp,cl
+ mov r10d,DWORD PTR[rsi*8+r14]
+ mov r11d,DWORD PTR[rdi*8+r14]
+ mov r12d,DWORD PTR[rbp*8+r14]
+
+ movzx esi,bh
+ movzx edi,ch
+ movzx ebp,dl
+ xor r10d,DWORD PTR[3+rsi*8+r14]
+ xor r11d,DWORD PTR[3+rdi*8+r14]
+ mov r8d,DWORD PTR[rbp*8+r14]
+
+ movzx esi,dh
+ shr ecx,16
+ movzx ebp,ah
+ xor r12d,DWORD PTR[3+rsi*8+r14]
+ shr edx,16
+ xor r8d,DWORD PTR[3+rbp*8+r14]
+
+ shr ebx,16
+ lea r15,QWORD PTR[16+r15]
+ shr eax,16
+
+ movzx esi,cl
+ movzx edi,dl
+ movzx ebp,al
+ xor r10d,DWORD PTR[2+rsi*8+r14]
+ xor r11d,DWORD PTR[2+rdi*8+r14]
+ xor r12d,DWORD PTR[2+rbp*8+r14]
+
+ movzx esi,dh
+ movzx edi,ah
+ movzx ebp,bl
+ xor r10d,DWORD PTR[1+rsi*8+r14]
+ xor r11d,DWORD PTR[1+rdi*8+r14]
+ xor r8d,DWORD PTR[2+rbp*8+r14]
+
+ mov edx,DWORD PTR[12+r15]
+ movzx edi,bh
+ movzx ebp,ch
+ mov eax,DWORD PTR[r15]
+ xor r12d,DWORD PTR[1+rdi*8+r14]
+ xor r8d,DWORD PTR[1+rbp*8+r14]
+
+ mov ebx,DWORD PTR[4+r15]
+ mov ecx,DWORD PTR[8+r15]
+ xor eax,r10d
+ xor ebx,r11d
+ xor ecx,r12d
+ xor edx,r8d
+ sub r13d,1
+ jnz $L$enc_loop
+ movzx esi,al
+ movzx edi,bl
+ movzx ebp,cl
+ movzx r10d,BYTE PTR[2+rsi*8+r14]
+ movzx r11d,BYTE PTR[2+rdi*8+r14]
+ movzx r12d,BYTE PTR[2+rbp*8+r14]
+
+ movzx esi,dl
+ movzx edi,bh
+ movzx ebp,ch
+ movzx r8d,BYTE PTR[2+rsi*8+r14]
+ mov edi,DWORD PTR[rdi*8+r14]
+ mov ebp,DWORD PTR[rbp*8+r14]
+
+ and edi,00000ff00h
+ and ebp,00000ff00h
+
+ xor r10d,edi
+ xor r11d,ebp
+ shr ecx,16
+
+ movzx esi,dh
+ movzx edi,ah
+ shr edx,16
+ mov esi,DWORD PTR[rsi*8+r14]
+ mov edi,DWORD PTR[rdi*8+r14]
+
+ and esi,00000ff00h
+ and edi,00000ff00h
+ shr ebx,16
+ xor r12d,esi
+ xor r8d,edi
+ shr eax,16
+
+ movzx esi,cl
+ movzx edi,dl
+ movzx ebp,al
+ mov esi,DWORD PTR[rsi*8+r14]
+ mov edi,DWORD PTR[rdi*8+r14]
+ mov ebp,DWORD PTR[rbp*8+r14]
+
+ and esi,000ff0000h
+ and edi,000ff0000h
+ and ebp,000ff0000h
+
+ xor r10d,esi
+ xor r11d,edi
+ xor r12d,ebp
+
+ movzx esi,bl
+ movzx edi,dh
+ movzx ebp,ah
+ mov esi,DWORD PTR[rsi*8+r14]
+ mov edi,DWORD PTR[2+rdi*8+r14]
+ mov ebp,DWORD PTR[2+rbp*8+r14]
+
+ and esi,000ff0000h
+ and edi,0ff000000h
+ and ebp,0ff000000h
+
+ xor r8d,esi
+ xor r10d,edi
+ xor r11d,ebp
+
+ movzx esi,bh
+ movzx edi,ch
+ mov edx,DWORD PTR[((16+12))+r15]
+ mov esi,DWORD PTR[2+rsi*8+r14]
+ mov edi,DWORD PTR[2+rdi*8+r14]
+ mov eax,DWORD PTR[((16+0))+r15]
+
+ and esi,0ff000000h
+ and edi,0ff000000h
+
+ xor r12d,esi
+ xor r8d,edi
+
+ mov ebx,DWORD PTR[((16+4))+r15]
+ mov ecx,DWORD PTR[((16+8))+r15]
+ xor eax,r10d
+ xor ebx,r11d
+ xor ecx,r12d
+ xor edx,r8d
+ DB 0F3h,0C3h ;repret
+_x86_64_AES_encrypt ENDP
+
+ALIGN 16
+_x86_64_AES_encrypt_compact PROC PRIVATE
+ lea r8,QWORD PTR[128+r14]
+ mov edi,DWORD PTR[((0-128))+r8]
+ mov ebp,DWORD PTR[((32-128))+r8]
+ mov r10d,DWORD PTR[((64-128))+r8]
+ mov r11d,DWORD PTR[((96-128))+r8]
+ mov edi,DWORD PTR[((128-128))+r8]
+ mov ebp,DWORD PTR[((160-128))+r8]
+ mov r10d,DWORD PTR[((192-128))+r8]
+ mov r11d,DWORD PTR[((224-128))+r8]
+ jmp $L$enc_loop_compact
+ALIGN 16
+$L$enc_loop_compact::
+ xor eax,DWORD PTR[r15]
+ xor ebx,DWORD PTR[4+r15]
+ xor ecx,DWORD PTR[8+r15]
+ xor edx,DWORD PTR[12+r15]
+ lea r15,QWORD PTR[16+r15]
+ movzx r10d,al
+ movzx r11d,bl
+ movzx r12d,cl
+ movzx r10d,BYTE PTR[r10*1+r14]
+ movzx r11d,BYTE PTR[r11*1+r14]
+ movzx r12d,BYTE PTR[r12*1+r14]
+
+ movzx r8d,dl
+ movzx esi,bh
+ movzx edi,ch
+ movzx r8d,BYTE PTR[r8*1+r14]
+ movzx r9d,BYTE PTR[rsi*1+r14]
+ movzx r13d,BYTE PTR[rdi*1+r14]
+
+ movzx ebp,dh
+ movzx esi,ah
+ shr ecx,16
+ movzx ebp,BYTE PTR[rbp*1+r14]
+ movzx esi,BYTE PTR[rsi*1+r14]
+ shr edx,16
+
+ movzx edi,cl
+ shl r9d,8
+ shl r13d,8
+ movzx edi,BYTE PTR[rdi*1+r14]
+ xor r10d,r9d
+ xor r11d,r13d
+
+ movzx r9d,dl
+ shr eax,16
+ shr ebx,16
+ movzx r13d,al
+ shl ebp,8
+ shl esi,8
+ movzx r9d,BYTE PTR[r9*1+r14]
+ movzx r13d,BYTE PTR[r13*1+r14]
+ xor r12d,ebp
+ xor r8d,esi
+
+ movzx ebp,bl
+ movzx esi,dh
+ shl edi,16
+ movzx ebp,BYTE PTR[rbp*1+r14]
+ movzx esi,BYTE PTR[rsi*1+r14]
+ xor r10d,edi
+
+ movzx edi,ah
+ shr ecx,8
+ shr ebx,8
+ movzx edi,BYTE PTR[rdi*1+r14]
+ movzx edx,BYTE PTR[rcx*1+r14]
+ movzx ecx,BYTE PTR[rbx*1+r14]
+ shl r9d,16
+ shl r13d,16
+ shl ebp,16
+ xor r11d,r9d
+ xor r12d,r13d
+ xor r8d,ebp
+
+ shl esi,24
+ shl edi,24
+ shl edx,24
+ xor r10d,esi
+ shl ecx,24
+ xor r11d,edi
+ mov eax,r10d
+ mov ebx,r11d
+ xor ecx,r12d
+ xor edx,r8d
+ cmp r15,QWORD PTR[16+rsp]
+ je $L$enc_compact_done
+ mov esi,eax
+ mov edi,ebx
+ and esi,080808080h
+ and edi,080808080h
+ mov r10d,esi
+ mov r11d,edi
+ shr r10d,7
+ lea r8d,DWORD PTR[rax*1+rax]
+ shr r11d,7
+ lea r9d,DWORD PTR[rbx*1+rbx]
+ sub esi,r10d
+ sub edi,r11d
+ and r8d,0fefefefeh
+ and r9d,0fefefefeh
+ and esi,01b1b1b1bh
+ and edi,01b1b1b1bh
+ mov r10d,eax
+ mov r11d,ebx
+ xor r8d,esi
+ xor r9d,edi
+
+ xor eax,r8d
+ xor ebx,r9d
+ mov esi,ecx
+ mov edi,edx
+ rol eax,24
+ rol ebx,24
+ and esi,080808080h
+ and edi,080808080h
+ xor eax,r8d
+ xor ebx,r9d
+ mov r12d,esi
+ mov ebp,edi
+ ror r10d,16
+ ror r11d,16
+ shr r12d,7
+ lea r8d,DWORD PTR[rcx*1+rcx]
+ xor eax,r10d
+ xor ebx,r11d
+ shr ebp,7
+ lea r9d,DWORD PTR[rdx*1+rdx]
+ ror r10d,8
+ ror r11d,8
+ sub esi,r12d
+ sub edi,ebp
+ xor eax,r10d
+ xor ebx,r11d
+
+ and r8d,0fefefefeh
+ and r9d,0fefefefeh
+ and esi,01b1b1b1bh
+ and edi,01b1b1b1bh
+ mov r12d,ecx
+ mov ebp,edx
+ xor r8d,esi
+ xor r9d,edi
+
+ xor ecx,r8d
+ xor edx,r9d
+ rol ecx,24
+ rol edx,24
+ xor ecx,r8d
+ xor edx,r9d
+ mov esi,DWORD PTR[r14]
+ ror r12d,16
+ ror ebp,16
+ mov edi,DWORD PTR[64+r14]
+ xor ecx,r12d
+ xor edx,ebp
+ mov r8d,DWORD PTR[128+r14]
+ ror r12d,8
+ ror ebp,8
+ mov r9d,DWORD PTR[192+r14]
+ xor ecx,r12d
+ xor edx,ebp
+ jmp $L$enc_loop_compact
+ALIGN 16
+$L$enc_compact_done::
+ xor eax,DWORD PTR[r15]
+ xor ebx,DWORD PTR[4+r15]
+ xor ecx,DWORD PTR[8+r15]
+ xor edx,DWORD PTR[12+r15]
+ DB 0F3h,0C3h ;repret
+_x86_64_AES_encrypt_compact ENDP
+PUBLIC AES_encrypt
+
+ALIGN 16
+PUBLIC asm_AES_encrypt
+
+asm_AES_encrypt::
+AES_encrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_AES_encrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+
+
+ mov r10,rsp
+ lea rcx,QWORD PTR[((-63))+rdx]
+ and rsp,-64
+ sub rcx,rsp
+ neg rcx
+ and rcx,03c0h
+ sub rsp,rcx
+ sub rsp,32
+
+ mov QWORD PTR[16+rsp],rsi
+ mov QWORD PTR[24+rsp],r10
+$L$enc_prologue::
+
+ mov r15,rdx
+ mov r13d,DWORD PTR[240+r15]
+
+ mov eax,DWORD PTR[rdi]
+ mov ebx,DWORD PTR[4+rdi]
+ mov ecx,DWORD PTR[8+rdi]
+ mov edx,DWORD PTR[12+rdi]
+
+ shl r13d,4
+ lea rbp,QWORD PTR[r13*1+r15]
+ mov QWORD PTR[rsp],r15
+ mov QWORD PTR[8+rsp],rbp
+
+
+ lea r14,QWORD PTR[(($L$AES_Te+2048))]
+ lea rbp,QWORD PTR[768+rsp]
+ sub rbp,r14
+ and rbp,0300h
+ lea r14,QWORD PTR[rbp*1+r14]
+
+ call _x86_64_AES_encrypt_compact
+
+ mov r9,QWORD PTR[16+rsp]
+ mov rsi,QWORD PTR[24+rsp]
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+
+ mov r15,QWORD PTR[rsi]
+ mov r14,QWORD PTR[8+rsi]
+ mov r13,QWORD PTR[16+rsi]
+ mov r12,QWORD PTR[24+rsi]
+ mov rbp,QWORD PTR[32+rsi]
+ mov rbx,QWORD PTR[40+rsi]
+ lea rsp,QWORD PTR[48+rsi]
+$L$enc_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_AES_encrypt::
+AES_encrypt ENDP
+
+ALIGN 16
+_x86_64_AES_decrypt PROC PRIVATE
+ xor eax,DWORD PTR[r15]
+ xor ebx,DWORD PTR[4+r15]
+ xor ecx,DWORD PTR[8+r15]
+ xor edx,DWORD PTR[12+r15]
+
+ mov r13d,DWORD PTR[240+r15]
+ sub r13d,1
+ jmp $L$dec_loop
+ALIGN 16
+$L$dec_loop::
+
+ movzx esi,al
+ movzx edi,bl
+ movzx ebp,cl
+ mov r10d,DWORD PTR[rsi*8+r14]
+ mov r11d,DWORD PTR[rdi*8+r14]
+ mov r12d,DWORD PTR[rbp*8+r14]
+
+ movzx esi,dh
+ movzx edi,ah
+ movzx ebp,dl
+ xor r10d,DWORD PTR[3+rsi*8+r14]
+ xor r11d,DWORD PTR[3+rdi*8+r14]
+ mov r8d,DWORD PTR[rbp*8+r14]
+
+ movzx esi,bh
+ shr eax,16
+ movzx ebp,ch
+ xor r12d,DWORD PTR[3+rsi*8+r14]
+ shr edx,16
+ xor r8d,DWORD PTR[3+rbp*8+r14]
+
+ shr ebx,16
+ lea r15,QWORD PTR[16+r15]
+ shr ecx,16
+
+ movzx esi,cl
+ movzx edi,dl
+ movzx ebp,al
+ xor r10d,DWORD PTR[2+rsi*8+r14]
+ xor r11d,DWORD PTR[2+rdi*8+r14]
+ xor r12d,DWORD PTR[2+rbp*8+r14]
+
+ movzx esi,bh
+ movzx edi,ch
+ movzx ebp,bl
+ xor r10d,DWORD PTR[1+rsi*8+r14]
+ xor r11d,DWORD PTR[1+rdi*8+r14]
+ xor r8d,DWORD PTR[2+rbp*8+r14]
+
+ movzx esi,dh
+ mov edx,DWORD PTR[12+r15]
+ movzx ebp,ah
+ xor r12d,DWORD PTR[1+rsi*8+r14]
+ mov eax,DWORD PTR[r15]
+ xor r8d,DWORD PTR[1+rbp*8+r14]
+
+ xor eax,r10d
+ mov ebx,DWORD PTR[4+r15]
+ mov ecx,DWORD PTR[8+r15]
+ xor ecx,r12d
+ xor ebx,r11d
+ xor edx,r8d
+ sub r13d,1
+ jnz $L$dec_loop
+ lea r14,QWORD PTR[2048+r14]
+ movzx esi,al
+ movzx edi,bl
+ movzx ebp,cl
+ movzx r10d,BYTE PTR[rsi*1+r14]
+ movzx r11d,BYTE PTR[rdi*1+r14]
+ movzx r12d,BYTE PTR[rbp*1+r14]
+
+ movzx esi,dl
+ movzx edi,dh
+ movzx ebp,ah
+ movzx r8d,BYTE PTR[rsi*1+r14]
+ movzx edi,BYTE PTR[rdi*1+r14]
+ movzx ebp,BYTE PTR[rbp*1+r14]
+
+ shl edi,8
+ shl ebp,8
+
+ xor r10d,edi
+ xor r11d,ebp
+ shr edx,16
+
+ movzx esi,bh
+ movzx edi,ch
+ shr eax,16
+ movzx esi,BYTE PTR[rsi*1+r14]
+ movzx edi,BYTE PTR[rdi*1+r14]
+
+ shl esi,8
+ shl edi,8
+ shr ebx,16
+ xor r12d,esi
+ xor r8d,edi
+ shr ecx,16
+
+ movzx esi,cl
+ movzx edi,dl
+ movzx ebp,al
+ movzx esi,BYTE PTR[rsi*1+r14]
+ movzx edi,BYTE PTR[rdi*1+r14]
+ movzx ebp,BYTE PTR[rbp*1+r14]
+
+ shl esi,16
+ shl edi,16
+ shl ebp,16
+
+ xor r10d,esi
+ xor r11d,edi
+ xor r12d,ebp
+
+ movzx esi,bl
+ movzx edi,bh
+ movzx ebp,ch
+ movzx esi,BYTE PTR[rsi*1+r14]
+ movzx edi,BYTE PTR[rdi*1+r14]
+ movzx ebp,BYTE PTR[rbp*1+r14]
+
+ shl esi,16
+ shl edi,24
+ shl ebp,24
+
+ xor r8d,esi
+ xor r10d,edi
+ xor r11d,ebp
+
+ movzx esi,dh
+ movzx edi,ah
+ mov edx,DWORD PTR[((16+12))+r15]
+ movzx esi,BYTE PTR[rsi*1+r14]
+ movzx edi,BYTE PTR[rdi*1+r14]
+ mov eax,DWORD PTR[((16+0))+r15]
+
+ shl esi,24
+ shl edi,24
+
+ xor r12d,esi
+ xor r8d,edi
+
+ mov ebx,DWORD PTR[((16+4))+r15]
+ mov ecx,DWORD PTR[((16+8))+r15]
+ lea r14,QWORD PTR[((-2048))+r14]
+ xor eax,r10d
+ xor ebx,r11d
+ xor ecx,r12d
+ xor edx,r8d
+ DB 0F3h,0C3h ;repret
+_x86_64_AES_decrypt ENDP
+
+ALIGN 16
+_x86_64_AES_decrypt_compact PROC PRIVATE
+ lea r8,QWORD PTR[128+r14]
+ mov edi,DWORD PTR[((0-128))+r8]
+ mov ebp,DWORD PTR[((32-128))+r8]
+ mov r10d,DWORD PTR[((64-128))+r8]
+ mov r11d,DWORD PTR[((96-128))+r8]
+ mov edi,DWORD PTR[((128-128))+r8]
+ mov ebp,DWORD PTR[((160-128))+r8]
+ mov r10d,DWORD PTR[((192-128))+r8]
+ mov r11d,DWORD PTR[((224-128))+r8]
+ jmp $L$dec_loop_compact
+
+ALIGN 16
+$L$dec_loop_compact::
+ xor eax,DWORD PTR[r15]
+ xor ebx,DWORD PTR[4+r15]
+ xor ecx,DWORD PTR[8+r15]
+ xor edx,DWORD PTR[12+r15]
+ lea r15,QWORD PTR[16+r15]
+ movzx r10d,al
+ movzx r11d,bl
+ movzx r12d,cl
+ movzx r10d,BYTE PTR[r10*1+r14]
+ movzx r11d,BYTE PTR[r11*1+r14]
+ movzx r12d,BYTE PTR[r12*1+r14]
+
+ movzx r8d,dl
+ movzx esi,dh
+ movzx edi,ah
+ movzx r8d,BYTE PTR[r8*1+r14]
+ movzx r9d,BYTE PTR[rsi*1+r14]
+ movzx r13d,BYTE PTR[rdi*1+r14]
+
+ movzx ebp,bh
+ movzx esi,ch
+ shr ecx,16
+ movzx ebp,BYTE PTR[rbp*1+r14]
+ movzx esi,BYTE PTR[rsi*1+r14]
+ shr edx,16
+
+ movzx edi,cl
+ shl r9d,8
+ shl r13d,8
+ movzx edi,BYTE PTR[rdi*1+r14]
+ xor r10d,r9d
+ xor r11d,r13d
+
+ movzx r9d,dl
+ shr eax,16
+ shr ebx,16
+ movzx r13d,al
+ shl ebp,8
+ shl esi,8
+ movzx r9d,BYTE PTR[r9*1+r14]
+ movzx r13d,BYTE PTR[r13*1+r14]
+ xor r12d,ebp
+ xor r8d,esi
+
+ movzx ebp,bl
+ movzx esi,bh
+ shl edi,16
+ movzx ebp,BYTE PTR[rbp*1+r14]
+ movzx esi,BYTE PTR[rsi*1+r14]
+ xor r10d,edi
+
+ movzx edi,ch
+ shl r9d,16
+ shl r13d,16
+ movzx ebx,BYTE PTR[rdi*1+r14]
+ xor r11d,r9d
+ xor r12d,r13d
+
+ movzx edi,dh
+ shr eax,8
+ shl ebp,16
+ movzx ecx,BYTE PTR[rdi*1+r14]
+ movzx edx,BYTE PTR[rax*1+r14]
+ xor r8d,ebp
+
+ shl esi,24
+ shl ebx,24
+ shl ecx,24
+ xor r10d,esi
+ shl edx,24
+ xor ebx,r11d
+ mov eax,r10d
+ xor ecx,r12d
+ xor edx,r8d
+ cmp r15,QWORD PTR[16+rsp]
+ je $L$dec_compact_done
+
+ mov rsi,QWORD PTR[((256+0))+r14]
+ shl rbx,32
+ shl rdx,32
+ mov rdi,QWORD PTR[((256+8))+r14]
+ or rax,rbx
+ or rcx,rdx
+ mov rbp,QWORD PTR[((256+16))+r14]
+ mov rbx,rax
+ mov rdx,rcx
+ and rbx,rsi
+ and rdx,rsi
+ mov r9,rbx
+ mov r12,rdx
+ shr r9,7
+ lea r8,QWORD PTR[rax*1+rax]
+ shr r12,7
+ lea r11,QWORD PTR[rcx*1+rcx]
+ sub rbx,r9
+ sub rdx,r12
+ and r8,rdi
+ and r11,rdi
+ and rbx,rbp
+ and rdx,rbp
+ xor rbx,r8
+ xor rdx,r11
+ mov r8,rbx
+ mov r11,rdx
+
+ and rbx,rsi
+ and rdx,rsi
+ mov r10,rbx
+ mov r13,rdx
+ shr r10,7
+ lea r9,QWORD PTR[r8*1+r8]
+ shr r13,7
+ lea r12,QWORD PTR[r11*1+r11]
+ sub rbx,r10
+ sub rdx,r13
+ and r9,rdi
+ and r12,rdi
+ and rbx,rbp
+ and rdx,rbp
+ xor rbx,r9
+ xor rdx,r12
+ mov r9,rbx
+ mov r12,rdx
+
+ and rbx,rsi
+ and rdx,rsi
+ mov r10,rbx
+ mov r13,rdx
+ shr r10,7
+ xor r8,rax
+ shr r13,7
+ xor r11,rcx
+ sub rbx,r10
+ sub rdx,r13
+ lea r10,QWORD PTR[r9*1+r9]
+ lea r13,QWORD PTR[r12*1+r12]
+ xor r9,rax
+ xor r12,rcx
+ and r10,rdi
+ and r13,rdi
+ and rbx,rbp
+ and rdx,rbp
+ xor r10,rbx
+ xor r13,rdx
+
+ xor rax,r10
+ xor rcx,r13
+ xor r8,r10
+ xor r11,r13
+ mov rbx,rax
+ mov rdx,rcx
+ xor r9,r10
+ xor r12,r13
+ shr rbx,32
+ shr rdx,32
+ xor r10,r8
+ xor r13,r11
+ rol eax,8
+ rol ecx,8
+ xor r10,r9
+ xor r13,r12
+
+ rol ebx,8
+ rol edx,8
+ xor eax,r10d
+ xor ecx,r13d
+ shr r10,32
+ shr r13,32
+ xor ebx,r10d
+ xor edx,r13d
+
+ mov r10,r8
+ mov r13,r11
+ shr r10,32
+ shr r13,32
+ rol r8d,24
+ rol r11d,24
+ rol r10d,24
+ rol r13d,24
+ xor eax,r8d
+ xor ecx,r11d
+ mov r8,r9
+ mov r11,r12
+ xor ebx,r10d
+ xor edx,r13d
+
+ mov rsi,QWORD PTR[r14]
+ shr r8,32
+ shr r11,32
+ mov rdi,QWORD PTR[64+r14]
+ rol r9d,16
+ rol r12d,16
+ mov rbp,QWORD PTR[128+r14]
+ rol r8d,16
+ rol r11d,16
+ mov r10,QWORD PTR[192+r14]
+ xor eax,r9d
+ xor ecx,r12d
+ mov r13,QWORD PTR[256+r14]
+ xor ebx,r8d
+ xor edx,r11d
+ jmp $L$dec_loop_compact
+ALIGN 16
+$L$dec_compact_done::
+ xor eax,DWORD PTR[r15]
+ xor ebx,DWORD PTR[4+r15]
+ xor ecx,DWORD PTR[8+r15]
+ xor edx,DWORD PTR[12+r15]
+ DB 0F3h,0C3h ;repret
+_x86_64_AES_decrypt_compact ENDP
+PUBLIC AES_decrypt
+
+ALIGN 16
+PUBLIC asm_AES_decrypt
+
+asm_AES_decrypt::
+AES_decrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_AES_decrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+
+
+ mov r10,rsp
+ lea rcx,QWORD PTR[((-63))+rdx]
+ and rsp,-64
+ sub rcx,rsp
+ neg rcx
+ and rcx,03c0h
+ sub rsp,rcx
+ sub rsp,32
+
+ mov QWORD PTR[16+rsp],rsi
+ mov QWORD PTR[24+rsp],r10
+$L$dec_prologue::
+
+ mov r15,rdx
+ mov r13d,DWORD PTR[240+r15]
+
+ mov eax,DWORD PTR[rdi]
+ mov ebx,DWORD PTR[4+rdi]
+ mov ecx,DWORD PTR[8+rdi]
+ mov edx,DWORD PTR[12+rdi]
+
+ shl r13d,4
+ lea rbp,QWORD PTR[r13*1+r15]
+ mov QWORD PTR[rsp],r15
+ mov QWORD PTR[8+rsp],rbp
+
+
+ lea r14,QWORD PTR[(($L$AES_Td+2048))]
+ lea rbp,QWORD PTR[768+rsp]
+ sub rbp,r14
+ and rbp,0300h
+ lea r14,QWORD PTR[rbp*1+r14]
+ shr rbp,3
+ add r14,rbp
+
+ call _x86_64_AES_decrypt_compact
+
+ mov r9,QWORD PTR[16+rsp]
+ mov rsi,QWORD PTR[24+rsp]
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+
+ mov r15,QWORD PTR[rsi]
+ mov r14,QWORD PTR[8+rsi]
+ mov r13,QWORD PTR[16+rsi]
+ mov r12,QWORD PTR[24+rsi]
+ mov rbp,QWORD PTR[32+rsi]
+ mov rbx,QWORD PTR[40+rsi]
+ lea rsp,QWORD PTR[48+rsi]
+$L$dec_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_AES_decrypt::
+AES_decrypt ENDP
+PUBLIC AES_set_encrypt_key
+
+ALIGN 16
+AES_set_encrypt_key PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_AES_set_encrypt_key::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ sub rsp,8
+$L$enc_key_prologue::
+
+ call _x86_64_AES_set_encrypt_key
+
+ mov r15,QWORD PTR[8+rsp]
+ mov r14,QWORD PTR[16+rsp]
+ mov r13,QWORD PTR[24+rsp]
+ mov r12,QWORD PTR[32+rsp]
+ mov rbp,QWORD PTR[40+rsp]
+ mov rbx,QWORD PTR[48+rsp]
+ add rsp,56
+$L$enc_key_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_AES_set_encrypt_key::
+AES_set_encrypt_key ENDP
+
+
+ALIGN 16
+_x86_64_AES_set_encrypt_key PROC PRIVATE
+ mov ecx,esi
+ mov rsi,rdi
+ mov rdi,rdx
+
+ test rsi,-1
+ jz $L$badpointer
+ test rdi,-1
+ jz $L$badpointer
+
+ lea rbp,QWORD PTR[$L$AES_Te]
+ lea rbp,QWORD PTR[((2048+128))+rbp]
+
+
+ mov eax,DWORD PTR[((0-128))+rbp]
+ mov ebx,DWORD PTR[((32-128))+rbp]
+ mov r8d,DWORD PTR[((64-128))+rbp]
+ mov edx,DWORD PTR[((96-128))+rbp]
+ mov eax,DWORD PTR[((128-128))+rbp]
+ mov ebx,DWORD PTR[((160-128))+rbp]
+ mov r8d,DWORD PTR[((192-128))+rbp]
+ mov edx,DWORD PTR[((224-128))+rbp]
+
+ cmp ecx,128
+ je $L$10rounds
+ cmp ecx,192
+ je $L$12rounds
+ cmp ecx,256
+ je $L$14rounds
+ mov rax,-2
+ jmp $L$exit
+
+$L$10rounds::
+ mov rax,QWORD PTR[rsi]
+ mov rdx,QWORD PTR[8+rsi]
+ mov QWORD PTR[rdi],rax
+ mov QWORD PTR[8+rdi],rdx
+
+ shr rdx,32
+ xor ecx,ecx
+ jmp $L$10shortcut
+ALIGN 4
+$L$10loop::
+ mov eax,DWORD PTR[rdi]
+ mov edx,DWORD PTR[12+rdi]
+$L$10shortcut::
+ movzx esi,dl
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,24
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shr edx,16
+ movzx esi,dl
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,8
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shl ebx,16
+ xor eax,ebx
+
+ xor eax,DWORD PTR[((1024-128))+rcx*4+rbp]
+ mov DWORD PTR[16+rdi],eax
+ xor eax,DWORD PTR[4+rdi]
+ mov DWORD PTR[20+rdi],eax
+ xor eax,DWORD PTR[8+rdi]
+ mov DWORD PTR[24+rdi],eax
+ xor eax,DWORD PTR[12+rdi]
+ mov DWORD PTR[28+rdi],eax
+ add ecx,1
+ lea rdi,QWORD PTR[16+rdi]
+ cmp ecx,10
+ jl $L$10loop
+
+ mov DWORD PTR[80+rdi],10
+ xor rax,rax
+ jmp $L$exit
+
+$L$12rounds::
+ mov rax,QWORD PTR[rsi]
+ mov rbx,QWORD PTR[8+rsi]
+ mov rdx,QWORD PTR[16+rsi]
+ mov QWORD PTR[rdi],rax
+ mov QWORD PTR[8+rdi],rbx
+ mov QWORD PTR[16+rdi],rdx
+
+ shr rdx,32
+ xor ecx,ecx
+ jmp $L$12shortcut
+ALIGN 4
+$L$12loop::
+ mov eax,DWORD PTR[rdi]
+ mov edx,DWORD PTR[20+rdi]
+$L$12shortcut::
+ movzx esi,dl
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,24
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shr edx,16
+ movzx esi,dl
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,8
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shl ebx,16
+ xor eax,ebx
+
+ xor eax,DWORD PTR[((1024-128))+rcx*4+rbp]
+ mov DWORD PTR[24+rdi],eax
+ xor eax,DWORD PTR[4+rdi]
+ mov DWORD PTR[28+rdi],eax
+ xor eax,DWORD PTR[8+rdi]
+ mov DWORD PTR[32+rdi],eax
+ xor eax,DWORD PTR[12+rdi]
+ mov DWORD PTR[36+rdi],eax
+
+ cmp ecx,7
+ je $L$12break
+ add ecx,1
+
+ xor eax,DWORD PTR[16+rdi]
+ mov DWORD PTR[40+rdi],eax
+ xor eax,DWORD PTR[20+rdi]
+ mov DWORD PTR[44+rdi],eax
+
+ lea rdi,QWORD PTR[24+rdi]
+ jmp $L$12loop
+$L$12break::
+ mov DWORD PTR[72+rdi],12
+ xor rax,rax
+ jmp $L$exit
+
+$L$14rounds::
+ mov rax,QWORD PTR[rsi]
+ mov rbx,QWORD PTR[8+rsi]
+ mov rcx,QWORD PTR[16+rsi]
+ mov rdx,QWORD PTR[24+rsi]
+ mov QWORD PTR[rdi],rax
+ mov QWORD PTR[8+rdi],rbx
+ mov QWORD PTR[16+rdi],rcx
+ mov QWORD PTR[24+rdi],rdx
+
+ shr rdx,32
+ xor ecx,ecx
+ jmp $L$14shortcut
+ALIGN 4
+$L$14loop::
+ mov eax,DWORD PTR[rdi]
+ mov edx,DWORD PTR[28+rdi]
+$L$14shortcut::
+ movzx esi,dl
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,24
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shr edx,16
+ movzx esi,dl
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,8
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shl ebx,16
+ xor eax,ebx
+
+ xor eax,DWORD PTR[((1024-128))+rcx*4+rbp]
+ mov DWORD PTR[32+rdi],eax
+ xor eax,DWORD PTR[4+rdi]
+ mov DWORD PTR[36+rdi],eax
+ xor eax,DWORD PTR[8+rdi]
+ mov DWORD PTR[40+rdi],eax
+ xor eax,DWORD PTR[12+rdi]
+ mov DWORD PTR[44+rdi],eax
+
+ cmp ecx,6
+ je $L$14break
+ add ecx,1
+
+ mov edx,eax
+ mov eax,DWORD PTR[16+rdi]
+ movzx esi,dl
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shr edx,16
+ shl ebx,8
+ movzx esi,dl
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,16
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shl ebx,24
+ xor eax,ebx
+
+ mov DWORD PTR[48+rdi],eax
+ xor eax,DWORD PTR[20+rdi]
+ mov DWORD PTR[52+rdi],eax
+ xor eax,DWORD PTR[24+rdi]
+ mov DWORD PTR[56+rdi],eax
+ xor eax,DWORD PTR[28+rdi]
+ mov DWORD PTR[60+rdi],eax
+
+ lea rdi,QWORD PTR[32+rdi]
+ jmp $L$14loop
+$L$14break::
+ mov DWORD PTR[48+rdi],14
+ xor rax,rax
+ jmp $L$exit
+
+$L$badpointer::
+ mov rax,-1
+$L$exit::
+ DB 0F3h,0C3h ;repret
+_x86_64_AES_set_encrypt_key ENDP
+PUBLIC AES_set_decrypt_key
+
+ALIGN 16
+AES_set_decrypt_key PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_AES_set_decrypt_key::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ push rdx
+$L$dec_key_prologue::
+
+ call _x86_64_AES_set_encrypt_key
+ mov r8,QWORD PTR[rsp]
+ cmp eax,0
+ jne $L$abort
+
+ mov r14d,DWORD PTR[240+r8]
+ xor rdi,rdi
+ lea rcx,QWORD PTR[r14*4+rdi]
+ mov rsi,r8
+ lea rdi,QWORD PTR[rcx*4+r8]
+ALIGN 4
+$L$invert::
+ mov rax,QWORD PTR[rsi]
+ mov rbx,QWORD PTR[8+rsi]
+ mov rcx,QWORD PTR[rdi]
+ mov rdx,QWORD PTR[8+rdi]
+ mov QWORD PTR[rdi],rax
+ mov QWORD PTR[8+rdi],rbx
+ mov QWORD PTR[rsi],rcx
+ mov QWORD PTR[8+rsi],rdx
+ lea rsi,QWORD PTR[16+rsi]
+ lea rdi,QWORD PTR[((-16))+rdi]
+ cmp rdi,rsi
+ jne $L$invert
+
+ lea rax,QWORD PTR[(($L$AES_Te+2048+1024))]
+
+ mov rsi,QWORD PTR[40+rax]
+ mov rdi,QWORD PTR[48+rax]
+ mov rbp,QWORD PTR[56+rax]
+
+ mov r15,r8
+ sub r14d,1
+ALIGN 4
+$L$permute::
+ lea r15,QWORD PTR[16+r15]
+ mov rax,QWORD PTR[r15]
+ mov rcx,QWORD PTR[8+r15]
+ mov rbx,rax
+ mov rdx,rcx
+ and rbx,rsi
+ and rdx,rsi
+ mov r9,rbx
+ mov r12,rdx
+ shr r9,7
+ lea r8,QWORD PTR[rax*1+rax]
+ shr r12,7
+ lea r11,QWORD PTR[rcx*1+rcx]
+ sub rbx,r9
+ sub rdx,r12
+ and r8,rdi
+ and r11,rdi
+ and rbx,rbp
+ and rdx,rbp
+ xor rbx,r8
+ xor rdx,r11
+ mov r8,rbx
+ mov r11,rdx
+
+ and rbx,rsi
+ and rdx,rsi
+ mov r10,rbx
+ mov r13,rdx
+ shr r10,7
+ lea r9,QWORD PTR[r8*1+r8]
+ shr r13,7
+ lea r12,QWORD PTR[r11*1+r11]
+ sub rbx,r10
+ sub rdx,r13
+ and r9,rdi
+ and r12,rdi
+ and rbx,rbp
+ and rdx,rbp
+ xor rbx,r9
+ xor rdx,r12
+ mov r9,rbx
+ mov r12,rdx
+
+ and rbx,rsi
+ and rdx,rsi
+ mov r10,rbx
+ mov r13,rdx
+ shr r10,7
+ xor r8,rax
+ shr r13,7
+ xor r11,rcx
+ sub rbx,r10
+ sub rdx,r13
+ lea r10,QWORD PTR[r9*1+r9]
+ lea r13,QWORD PTR[r12*1+r12]
+ xor r9,rax
+ xor r12,rcx
+ and r10,rdi
+ and r13,rdi
+ and rbx,rbp
+ and rdx,rbp
+ xor r10,rbx
+ xor r13,rdx
+
+ xor rax,r10
+ xor rcx,r13
+ xor r8,r10
+ xor r11,r13
+ mov rbx,rax
+ mov rdx,rcx
+ xor r9,r10
+ xor r12,r13
+ shr rbx,32
+ shr rdx,32
+ xor r10,r8
+ xor r13,r11
+ rol eax,8
+ rol ecx,8
+ xor r10,r9
+ xor r13,r12
+
+ rol ebx,8
+ rol edx,8
+ xor eax,r10d
+ xor ecx,r13d
+ shr r10,32
+ shr r13,32
+ xor ebx,r10d
+ xor edx,r13d
+
+ mov r10,r8
+ mov r13,r11
+ shr r10,32
+ shr r13,32
+ rol r8d,24
+ rol r11d,24
+ rol r10d,24
+ rol r13d,24
+ xor eax,r8d
+ xor ecx,r11d
+ mov r8,r9
+ mov r11,r12
+ xor ebx,r10d
+ xor edx,r13d
+
+
+ shr r8,32
+ shr r11,32
+
+ rol r9d,16
+ rol r12d,16
+
+ rol r8d,16
+ rol r11d,16
+
+ xor eax,r9d
+ xor ecx,r12d
+
+ xor ebx,r8d
+ xor edx,r11d
+ mov DWORD PTR[r15],eax
+ mov DWORD PTR[4+r15],ebx
+ mov DWORD PTR[8+r15],ecx
+ mov DWORD PTR[12+r15],edx
+ sub r14d,1
+ jnz $L$permute
+
+ xor rax,rax
+$L$abort::
+ mov r15,QWORD PTR[8+rsp]
+ mov r14,QWORD PTR[16+rsp]
+ mov r13,QWORD PTR[24+rsp]
+ mov r12,QWORD PTR[32+rsp]
+ mov rbp,QWORD PTR[40+rsp]
+ mov rbx,QWORD PTR[48+rsp]
+ add rsp,56
+$L$dec_key_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_AES_set_decrypt_key::
+AES_set_decrypt_key ENDP
+PUBLIC AES_cbc_encrypt
+
+ALIGN 16
+EXTERN OPENSSL_ia32cap_P:NEAR
+
+PUBLIC asm_AES_cbc_encrypt
+
+asm_AES_cbc_encrypt::
+AES_cbc_encrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_AES_cbc_encrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ cmp rdx,0
+ je $L$cbc_epilogue
+ pushfq
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+$L$cbc_prologue::
+
+ cld
+ mov r9d,r9d
+
+ lea r14,QWORD PTR[$L$AES_Te]
+ cmp r9,0
+ jne $L$cbc_picked_te
+ lea r14,QWORD PTR[$L$AES_Td]
+$L$cbc_picked_te::
+
+ mov r10d,DWORD PTR[OPENSSL_ia32cap_P]
+ cmp rdx,512
+ jb $L$cbc_slow_prologue
+ test rdx,15
+ jnz $L$cbc_slow_prologue
+ bt r10d,28
+ jc $L$cbc_slow_prologue
+
+
+ lea r15,QWORD PTR[((-88-248))+rsp]
+ and r15,-64
+
+
+ mov r10,r14
+ lea r11,QWORD PTR[2304+r14]
+ mov r12,r15
+ and r10,0FFFh
+ and r11,0FFFh
+ and r12,0FFFh
+
+ cmp r12,r11
+ jb $L$cbc_te_break_out
+ sub r12,r11
+ sub r15,r12
+ jmp $L$cbc_te_ok
+$L$cbc_te_break_out::
+ sub r12,r10
+ and r12,0FFFh
+ add r12,320
+ sub r15,r12
+ALIGN 4
+$L$cbc_te_ok::
+
+ xchg r15,rsp
+
+ mov QWORD PTR[16+rsp],r15
+$L$cbc_fast_body::
+ mov QWORD PTR[24+rsp],rdi
+ mov QWORD PTR[32+rsp],rsi
+ mov QWORD PTR[40+rsp],rdx
+ mov QWORD PTR[48+rsp],rcx
+ mov QWORD PTR[56+rsp],r8
+ mov DWORD PTR[((80+240))+rsp],0
+ mov rbp,r8
+ mov rbx,r9
+ mov r9,rsi
+ mov r8,rdi
+ mov r15,rcx
+
+ mov eax,DWORD PTR[240+r15]
+
+ mov r10,r15
+ sub r10,r14
+ and r10,0fffh
+ cmp r10,2304
+ jb $L$cbc_do_ecopy
+ cmp r10,4096-248
+ jb $L$cbc_skip_ecopy
+ALIGN 4
+$L$cbc_do_ecopy::
+ mov rsi,r15
+ lea rdi,QWORD PTR[80+rsp]
+ lea r15,QWORD PTR[80+rsp]
+ mov ecx,240/8
+ DD 090A548F3h
+ mov DWORD PTR[rdi],eax
+$L$cbc_skip_ecopy::
+ mov QWORD PTR[rsp],r15
+
+ mov ecx,18
+ALIGN 4
+$L$cbc_prefetch_te::
+ mov r10,QWORD PTR[r14]
+ mov r11,QWORD PTR[32+r14]
+ mov r12,QWORD PTR[64+r14]
+ mov r13,QWORD PTR[96+r14]
+ lea r14,QWORD PTR[128+r14]
+ sub ecx,1
+ jnz $L$cbc_prefetch_te
+ lea r14,QWORD PTR[((-2304))+r14]
+
+ cmp rbx,0
+ je $L$FAST_DECRYPT
+
+
+ mov eax,DWORD PTR[rbp]
+ mov ebx,DWORD PTR[4+rbp]
+ mov ecx,DWORD PTR[8+rbp]
+ mov edx,DWORD PTR[12+rbp]
+
+ALIGN 4
+$L$cbc_fast_enc_loop::
+ xor eax,DWORD PTR[r8]
+ xor ebx,DWORD PTR[4+r8]
+ xor ecx,DWORD PTR[8+r8]
+ xor edx,DWORD PTR[12+r8]
+ mov r15,QWORD PTR[rsp]
+ mov QWORD PTR[24+rsp],r8
+
+ call _x86_64_AES_encrypt
+
+ mov r8,QWORD PTR[24+rsp]
+ mov r10,QWORD PTR[40+rsp]
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+
+ lea r8,QWORD PTR[16+r8]
+ lea r9,QWORD PTR[16+r9]
+ sub r10,16
+ test r10,-16
+ mov QWORD PTR[40+rsp],r10
+ jnz $L$cbc_fast_enc_loop
+ mov rbp,QWORD PTR[56+rsp]
+ mov DWORD PTR[rbp],eax
+ mov DWORD PTR[4+rbp],ebx
+ mov DWORD PTR[8+rbp],ecx
+ mov DWORD PTR[12+rbp],edx
+
+ jmp $L$cbc_fast_cleanup
+
+
+ALIGN 16
+$L$FAST_DECRYPT::
+ cmp r9,r8
+ je $L$cbc_fast_dec_in_place
+
+ mov QWORD PTR[64+rsp],rbp
+ALIGN 4
+$L$cbc_fast_dec_loop::
+ mov eax,DWORD PTR[r8]
+ mov ebx,DWORD PTR[4+r8]
+ mov ecx,DWORD PTR[8+r8]
+ mov edx,DWORD PTR[12+r8]
+ mov r15,QWORD PTR[rsp]
+ mov QWORD PTR[24+rsp],r8
+
+ call _x86_64_AES_decrypt
+
+ mov rbp,QWORD PTR[64+rsp]
+ mov r8,QWORD PTR[24+rsp]
+ mov r10,QWORD PTR[40+rsp]
+ xor eax,DWORD PTR[rbp]
+ xor ebx,DWORD PTR[4+rbp]
+ xor ecx,DWORD PTR[8+rbp]
+ xor edx,DWORD PTR[12+rbp]
+ mov rbp,r8
+
+ sub r10,16
+ mov QWORD PTR[40+rsp],r10
+ mov QWORD PTR[64+rsp],rbp
+
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+
+ lea r8,QWORD PTR[16+r8]
+ lea r9,QWORD PTR[16+r9]
+ jnz $L$cbc_fast_dec_loop
+ mov r12,QWORD PTR[56+rsp]
+ mov r10,QWORD PTR[rbp]
+ mov r11,QWORD PTR[8+rbp]
+ mov QWORD PTR[r12],r10
+ mov QWORD PTR[8+r12],r11
+ jmp $L$cbc_fast_cleanup
+
+ALIGN 16
+$L$cbc_fast_dec_in_place::
+ mov r10,QWORD PTR[rbp]
+ mov r11,QWORD PTR[8+rbp]
+ mov QWORD PTR[((0+64))+rsp],r10
+ mov QWORD PTR[((8+64))+rsp],r11
+ALIGN 4
+$L$cbc_fast_dec_in_place_loop::
+ mov eax,DWORD PTR[r8]
+ mov ebx,DWORD PTR[4+r8]
+ mov ecx,DWORD PTR[8+r8]
+ mov edx,DWORD PTR[12+r8]
+ mov r15,QWORD PTR[rsp]
+ mov QWORD PTR[24+rsp],r8
+
+ call _x86_64_AES_decrypt
+
+ mov r8,QWORD PTR[24+rsp]
+ mov r10,QWORD PTR[40+rsp]
+ xor eax,DWORD PTR[((0+64))+rsp]
+ xor ebx,DWORD PTR[((4+64))+rsp]
+ xor ecx,DWORD PTR[((8+64))+rsp]
+ xor edx,DWORD PTR[((12+64))+rsp]
+
+ mov r11,QWORD PTR[r8]
+ mov r12,QWORD PTR[8+r8]
+ sub r10,16
+ jz $L$cbc_fast_dec_in_place_done
+
+ mov QWORD PTR[((0+64))+rsp],r11
+ mov QWORD PTR[((8+64))+rsp],r12
+
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+
+ lea r8,QWORD PTR[16+r8]
+ lea r9,QWORD PTR[16+r9]
+ mov QWORD PTR[40+rsp],r10
+ jmp $L$cbc_fast_dec_in_place_loop
+$L$cbc_fast_dec_in_place_done::
+ mov rdi,QWORD PTR[56+rsp]
+ mov QWORD PTR[rdi],r11
+ mov QWORD PTR[8+rdi],r12
+
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+
+ALIGN 4
+$L$cbc_fast_cleanup::
+ cmp DWORD PTR[((80+240))+rsp],0
+ lea rdi,QWORD PTR[80+rsp]
+ je $L$cbc_exit
+ mov ecx,240/8
+ xor rax,rax
+ DD 090AB48F3h
+
+ jmp $L$cbc_exit
+
+
+ALIGN 16
+$L$cbc_slow_prologue::
+
+ lea rbp,QWORD PTR[((-88))+rsp]
+ and rbp,-64
+
+ lea r10,QWORD PTR[((-88-63))+rcx]
+ sub r10,rbp
+ neg r10
+ and r10,03c0h
+ sub rbp,r10
+
+ xchg rbp,rsp
+
+ mov QWORD PTR[16+rsp],rbp
+$L$cbc_slow_body::
+
+
+
+
+ mov QWORD PTR[56+rsp],r8
+ mov rbp,r8
+ mov rbx,r9
+ mov r9,rsi
+ mov r8,rdi
+ mov r15,rcx
+ mov r10,rdx
+
+ mov eax,DWORD PTR[240+r15]
+ mov QWORD PTR[rsp],r15
+ shl eax,4
+ lea rax,QWORD PTR[rax*1+r15]
+ mov QWORD PTR[8+rsp],rax
+
+
+ lea r14,QWORD PTR[2048+r14]
+ lea rax,QWORD PTR[((768-8))+rsp]
+ sub rax,r14
+ and rax,0300h
+ lea r14,QWORD PTR[rax*1+r14]
+
+ cmp rbx,0
+ je $L$SLOW_DECRYPT
+
+
+ test r10,-16
+ mov eax,DWORD PTR[rbp]
+ mov ebx,DWORD PTR[4+rbp]
+ mov ecx,DWORD PTR[8+rbp]
+ mov edx,DWORD PTR[12+rbp]
+ jz $L$cbc_slow_enc_tail
+
+ALIGN 4
+$L$cbc_slow_enc_loop::
+ xor eax,DWORD PTR[r8]
+ xor ebx,DWORD PTR[4+r8]
+ xor ecx,DWORD PTR[8+r8]
+ xor edx,DWORD PTR[12+r8]
+ mov r15,QWORD PTR[rsp]
+ mov QWORD PTR[24+rsp],r8
+ mov QWORD PTR[32+rsp],r9
+ mov QWORD PTR[40+rsp],r10
+
+ call _x86_64_AES_encrypt_compact
+
+ mov r8,QWORD PTR[24+rsp]
+ mov r9,QWORD PTR[32+rsp]
+ mov r10,QWORD PTR[40+rsp]
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+
+ lea r8,QWORD PTR[16+r8]
+ lea r9,QWORD PTR[16+r9]
+ sub r10,16
+ test r10,-16
+ jnz $L$cbc_slow_enc_loop
+ test r10,15
+ jnz $L$cbc_slow_enc_tail
+ mov rbp,QWORD PTR[56+rsp]
+ mov DWORD PTR[rbp],eax
+ mov DWORD PTR[4+rbp],ebx
+ mov DWORD PTR[8+rbp],ecx
+ mov DWORD PTR[12+rbp],edx
+
+ jmp $L$cbc_exit
+
+ALIGN 4
+$L$cbc_slow_enc_tail::
+ mov r11,rax
+ mov r12,rcx
+ mov rcx,r10
+ mov rsi,r8
+ mov rdi,r9
+ DD 09066A4F3h
+ mov rcx,16
+ sub rcx,r10
+ xor rax,rax
+ DD 09066AAF3h
+ mov r8,r9
+ mov r10,16
+ mov rax,r11
+ mov rcx,r12
+ jmp $L$cbc_slow_enc_loop
+
+ALIGN 16
+$L$SLOW_DECRYPT::
+ shr rax,3
+ add r14,rax
+
+ mov r11,QWORD PTR[rbp]
+ mov r12,QWORD PTR[8+rbp]
+ mov QWORD PTR[((0+64))+rsp],r11
+ mov QWORD PTR[((8+64))+rsp],r12
+
+ALIGN 4
+$L$cbc_slow_dec_loop::
+ mov eax,DWORD PTR[r8]
+ mov ebx,DWORD PTR[4+r8]
+ mov ecx,DWORD PTR[8+r8]
+ mov edx,DWORD PTR[12+r8]
+ mov r15,QWORD PTR[rsp]
+ mov QWORD PTR[24+rsp],r8
+ mov QWORD PTR[32+rsp],r9
+ mov QWORD PTR[40+rsp],r10
+
+ call _x86_64_AES_decrypt_compact
+
+ mov r8,QWORD PTR[24+rsp]
+ mov r9,QWORD PTR[32+rsp]
+ mov r10,QWORD PTR[40+rsp]
+ xor eax,DWORD PTR[((0+64))+rsp]
+ xor ebx,DWORD PTR[((4+64))+rsp]
+ xor ecx,DWORD PTR[((8+64))+rsp]
+ xor edx,DWORD PTR[((12+64))+rsp]
+
+ mov r11,QWORD PTR[r8]
+ mov r12,QWORD PTR[8+r8]
+ sub r10,16
+ jc $L$cbc_slow_dec_partial
+ jz $L$cbc_slow_dec_done
+
+ mov QWORD PTR[((0+64))+rsp],r11
+ mov QWORD PTR[((8+64))+rsp],r12
+
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+
+ lea r8,QWORD PTR[16+r8]
+ lea r9,QWORD PTR[16+r9]
+ jmp $L$cbc_slow_dec_loop
+$L$cbc_slow_dec_done::
+ mov rdi,QWORD PTR[56+rsp]
+ mov QWORD PTR[rdi],r11
+ mov QWORD PTR[8+rdi],r12
+
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+
+ jmp $L$cbc_exit
+
+ALIGN 4
+$L$cbc_slow_dec_partial::
+ mov rdi,QWORD PTR[56+rsp]
+ mov QWORD PTR[rdi],r11
+ mov QWORD PTR[8+rdi],r12
+
+ mov DWORD PTR[((0+64))+rsp],eax
+ mov DWORD PTR[((4+64))+rsp],ebx
+ mov DWORD PTR[((8+64))+rsp],ecx
+ mov DWORD PTR[((12+64))+rsp],edx
+
+ mov rdi,r9
+ lea rsi,QWORD PTR[64+rsp]
+ lea rcx,QWORD PTR[16+r10]
+ DD 09066A4F3h
+ jmp $L$cbc_exit
+
+ALIGN 16
+$L$cbc_exit::
+ mov rsi,QWORD PTR[16+rsp]
+ mov r15,QWORD PTR[rsi]
+ mov r14,QWORD PTR[8+rsi]
+ mov r13,QWORD PTR[16+rsi]
+ mov r12,QWORD PTR[24+rsi]
+ mov rbp,QWORD PTR[32+rsi]
+ mov rbx,QWORD PTR[40+rsi]
+ lea rsp,QWORD PTR[48+rsi]
+$L$cbc_popfq::
+ popfq
+$L$cbc_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_AES_cbc_encrypt::
+AES_cbc_encrypt ENDP
+ALIGN 64
+$L$AES_Te::
+ DD 0a56363c6h,0a56363c6h
+ DD 0847c7cf8h,0847c7cf8h
+ DD 0997777eeh,0997777eeh
+ DD 08d7b7bf6h,08d7b7bf6h
+ DD 00df2f2ffh,00df2f2ffh
+ DD 0bd6b6bd6h,0bd6b6bd6h
+ DD 0b16f6fdeh,0b16f6fdeh
+ DD 054c5c591h,054c5c591h
+ DD 050303060h,050303060h
+ DD 003010102h,003010102h
+ DD 0a96767ceh,0a96767ceh
+ DD 07d2b2b56h,07d2b2b56h
+ DD 019fefee7h,019fefee7h
+ DD 062d7d7b5h,062d7d7b5h
+ DD 0e6abab4dh,0e6abab4dh
+ DD 09a7676ech,09a7676ech
+ DD 045caca8fh,045caca8fh
+ DD 09d82821fh,09d82821fh
+ DD 040c9c989h,040c9c989h
+ DD 0877d7dfah,0877d7dfah
+ DD 015fafaefh,015fafaefh
+ DD 0eb5959b2h,0eb5959b2h
+ DD 0c947478eh,0c947478eh
+ DD 00bf0f0fbh,00bf0f0fbh
+ DD 0ecadad41h,0ecadad41h
+ DD 067d4d4b3h,067d4d4b3h
+ DD 0fda2a25fh,0fda2a25fh
+ DD 0eaafaf45h,0eaafaf45h
+ DD 0bf9c9c23h,0bf9c9c23h
+ DD 0f7a4a453h,0f7a4a453h
+ DD 0967272e4h,0967272e4h
+ DD 05bc0c09bh,05bc0c09bh
+ DD 0c2b7b775h,0c2b7b775h
+ DD 01cfdfde1h,01cfdfde1h
+ DD 0ae93933dh,0ae93933dh
+ DD 06a26264ch,06a26264ch
+ DD 05a36366ch,05a36366ch
+ DD 0413f3f7eh,0413f3f7eh
+ DD 002f7f7f5h,002f7f7f5h
+ DD 04fcccc83h,04fcccc83h
+ DD 05c343468h,05c343468h
+ DD 0f4a5a551h,0f4a5a551h
+ DD 034e5e5d1h,034e5e5d1h
+ DD 008f1f1f9h,008f1f1f9h
+ DD 0937171e2h,0937171e2h
+ DD 073d8d8abh,073d8d8abh
+ DD 053313162h,053313162h
+ DD 03f15152ah,03f15152ah
+ DD 00c040408h,00c040408h
+ DD 052c7c795h,052c7c795h
+ DD 065232346h,065232346h
+ DD 05ec3c39dh,05ec3c39dh
+ DD 028181830h,028181830h
+ DD 0a1969637h,0a1969637h
+ DD 00f05050ah,00f05050ah
+ DD 0b59a9a2fh,0b59a9a2fh
+ DD 00907070eh,00907070eh
+ DD 036121224h,036121224h
+ DD 09b80801bh,09b80801bh
+ DD 03de2e2dfh,03de2e2dfh
+ DD 026ebebcdh,026ebebcdh
+ DD 06927274eh,06927274eh
+ DD 0cdb2b27fh,0cdb2b27fh
+ DD 09f7575eah,09f7575eah
+ DD 01b090912h,01b090912h
+ DD 09e83831dh,09e83831dh
+ DD 0742c2c58h,0742c2c58h
+ DD 02e1a1a34h,02e1a1a34h
+ DD 02d1b1b36h,02d1b1b36h
+ DD 0b26e6edch,0b26e6edch
+ DD 0ee5a5ab4h,0ee5a5ab4h
+ DD 0fba0a05bh,0fba0a05bh
+ DD 0f65252a4h,0f65252a4h
+ DD 04d3b3b76h,04d3b3b76h
+ DD 061d6d6b7h,061d6d6b7h
+ DD 0ceb3b37dh,0ceb3b37dh
+ DD 07b292952h,07b292952h
+ DD 03ee3e3ddh,03ee3e3ddh
+ DD 0712f2f5eh,0712f2f5eh
+ DD 097848413h,097848413h
+ DD 0f55353a6h,0f55353a6h
+ DD 068d1d1b9h,068d1d1b9h
+ DD 000000000h,000000000h
+ DD 02cededc1h,02cededc1h
+ DD 060202040h,060202040h
+ DD 01ffcfce3h,01ffcfce3h
+ DD 0c8b1b179h,0c8b1b179h
+ DD 0ed5b5bb6h,0ed5b5bb6h
+ DD 0be6a6ad4h,0be6a6ad4h
+ DD 046cbcb8dh,046cbcb8dh
+ DD 0d9bebe67h,0d9bebe67h
+ DD 04b393972h,04b393972h
+ DD 0de4a4a94h,0de4a4a94h
+ DD 0d44c4c98h,0d44c4c98h
+ DD 0e85858b0h,0e85858b0h
+ DD 04acfcf85h,04acfcf85h
+ DD 06bd0d0bbh,06bd0d0bbh
+ DD 02aefefc5h,02aefefc5h
+ DD 0e5aaaa4fh,0e5aaaa4fh
+ DD 016fbfbedh,016fbfbedh
+ DD 0c5434386h,0c5434386h
+ DD 0d74d4d9ah,0d74d4d9ah
+ DD 055333366h,055333366h
+ DD 094858511h,094858511h
+ DD 0cf45458ah,0cf45458ah
+ DD 010f9f9e9h,010f9f9e9h
+ DD 006020204h,006020204h
+ DD 0817f7ffeh,0817f7ffeh
+ DD 0f05050a0h,0f05050a0h
+ DD 0443c3c78h,0443c3c78h
+ DD 0ba9f9f25h,0ba9f9f25h
+ DD 0e3a8a84bh,0e3a8a84bh
+ DD 0f35151a2h,0f35151a2h
+ DD 0fea3a35dh,0fea3a35dh
+ DD 0c0404080h,0c0404080h
+ DD 08a8f8f05h,08a8f8f05h
+ DD 0ad92923fh,0ad92923fh
+ DD 0bc9d9d21h,0bc9d9d21h
+ DD 048383870h,048383870h
+ DD 004f5f5f1h,004f5f5f1h
+ DD 0dfbcbc63h,0dfbcbc63h
+ DD 0c1b6b677h,0c1b6b677h
+ DD 075dadaafh,075dadaafh
+ DD 063212142h,063212142h
+ DD 030101020h,030101020h
+ DD 01affffe5h,01affffe5h
+ DD 00ef3f3fdh,00ef3f3fdh
+ DD 06dd2d2bfh,06dd2d2bfh
+ DD 04ccdcd81h,04ccdcd81h
+ DD 0140c0c18h,0140c0c18h
+ DD 035131326h,035131326h
+ DD 02fececc3h,02fececc3h
+ DD 0e15f5fbeh,0e15f5fbeh
+ DD 0a2979735h,0a2979735h
+ DD 0cc444488h,0cc444488h
+ DD 03917172eh,03917172eh
+ DD 057c4c493h,057c4c493h
+ DD 0f2a7a755h,0f2a7a755h
+ DD 0827e7efch,0827e7efch
+ DD 0473d3d7ah,0473d3d7ah
+ DD 0ac6464c8h,0ac6464c8h
+ DD 0e75d5dbah,0e75d5dbah
+ DD 02b191932h,02b191932h
+ DD 0957373e6h,0957373e6h
+ DD 0a06060c0h,0a06060c0h
+ DD 098818119h,098818119h
+ DD 0d14f4f9eh,0d14f4f9eh
+ DD 07fdcdca3h,07fdcdca3h
+ DD 066222244h,066222244h
+ DD 07e2a2a54h,07e2a2a54h
+ DD 0ab90903bh,0ab90903bh
+ DD 08388880bh,08388880bh
+ DD 0ca46468ch,0ca46468ch
+ DD 029eeeec7h,029eeeec7h
+ DD 0d3b8b86bh,0d3b8b86bh
+ DD 03c141428h,03c141428h
+ DD 079dedea7h,079dedea7h
+ DD 0e25e5ebch,0e25e5ebch
+ DD 01d0b0b16h,01d0b0b16h
+ DD 076dbdbadh,076dbdbadh
+ DD 03be0e0dbh,03be0e0dbh
+ DD 056323264h,056323264h
+ DD 04e3a3a74h,04e3a3a74h
+ DD 01e0a0a14h,01e0a0a14h
+ DD 0db494992h,0db494992h
+ DD 00a06060ch,00a06060ch
+ DD 06c242448h,06c242448h
+ DD 0e45c5cb8h,0e45c5cb8h
+ DD 05dc2c29fh,05dc2c29fh
+ DD 06ed3d3bdh,06ed3d3bdh
+ DD 0efacac43h,0efacac43h
+ DD 0a66262c4h,0a66262c4h
+ DD 0a8919139h,0a8919139h
+ DD 0a4959531h,0a4959531h
+ DD 037e4e4d3h,037e4e4d3h
+ DD 08b7979f2h,08b7979f2h
+ DD 032e7e7d5h,032e7e7d5h
+ DD 043c8c88bh,043c8c88bh
+ DD 05937376eh,05937376eh
+ DD 0b76d6ddah,0b76d6ddah
+ DD 08c8d8d01h,08c8d8d01h
+ DD 064d5d5b1h,064d5d5b1h
+ DD 0d24e4e9ch,0d24e4e9ch
+ DD 0e0a9a949h,0e0a9a949h
+ DD 0b46c6cd8h,0b46c6cd8h
+ DD 0fa5656ach,0fa5656ach
+ DD 007f4f4f3h,007f4f4f3h
+ DD 025eaeacfh,025eaeacfh
+ DD 0af6565cah,0af6565cah
+ DD 08e7a7af4h,08e7a7af4h
+ DD 0e9aeae47h,0e9aeae47h
+ DD 018080810h,018080810h
+ DD 0d5baba6fh,0d5baba6fh
+ DD 0887878f0h,0887878f0h
+ DD 06f25254ah,06f25254ah
+ DD 0722e2e5ch,0722e2e5ch
+ DD 0241c1c38h,0241c1c38h
+ DD 0f1a6a657h,0f1a6a657h
+ DD 0c7b4b473h,0c7b4b473h
+ DD 051c6c697h,051c6c697h
+ DD 023e8e8cbh,023e8e8cbh
+ DD 07cdddda1h,07cdddda1h
+ DD 09c7474e8h,09c7474e8h
+ DD 0211f1f3eh,0211f1f3eh
+ DD 0dd4b4b96h,0dd4b4b96h
+ DD 0dcbdbd61h,0dcbdbd61h
+ DD 0868b8b0dh,0868b8b0dh
+ DD 0858a8a0fh,0858a8a0fh
+ DD 0907070e0h,0907070e0h
+ DD 0423e3e7ch,0423e3e7ch
+ DD 0c4b5b571h,0c4b5b571h
+ DD 0aa6666cch,0aa6666cch
+ DD 0d8484890h,0d8484890h
+ DD 005030306h,005030306h
+ DD 001f6f6f7h,001f6f6f7h
+ DD 0120e0e1ch,0120e0e1ch
+ DD 0a36161c2h,0a36161c2h
+ DD 05f35356ah,05f35356ah
+ DD 0f95757aeh,0f95757aeh
+ DD 0d0b9b969h,0d0b9b969h
+ DD 091868617h,091868617h
+ DD 058c1c199h,058c1c199h
+ DD 0271d1d3ah,0271d1d3ah
+ DD 0b99e9e27h,0b99e9e27h
+ DD 038e1e1d9h,038e1e1d9h
+ DD 013f8f8ebh,013f8f8ebh
+ DD 0b398982bh,0b398982bh
+ DD 033111122h,033111122h
+ DD 0bb6969d2h,0bb6969d2h
+ DD 070d9d9a9h,070d9d9a9h
+ DD 0898e8e07h,0898e8e07h
+ DD 0a7949433h,0a7949433h
+ DD 0b69b9b2dh,0b69b9b2dh
+ DD 0221e1e3ch,0221e1e3ch
+ DD 092878715h,092878715h
+ DD 020e9e9c9h,020e9e9c9h
+ DD 049cece87h,049cece87h
+ DD 0ff5555aah,0ff5555aah
+ DD 078282850h,078282850h
+ DD 07adfdfa5h,07adfdfa5h
+ DD 08f8c8c03h,08f8c8c03h
+ DD 0f8a1a159h,0f8a1a159h
+ DD 080898909h,080898909h
+ DD 0170d0d1ah,0170d0d1ah
+ DD 0dabfbf65h,0dabfbf65h
+ DD 031e6e6d7h,031e6e6d7h
+ DD 0c6424284h,0c6424284h
+ DD 0b86868d0h,0b86868d0h
+ DD 0c3414182h,0c3414182h
+ DD 0b0999929h,0b0999929h
+ DD 0772d2d5ah,0772d2d5ah
+ DD 0110f0f1eh,0110f0f1eh
+ DD 0cbb0b07bh,0cbb0b07bh
+ DD 0fc5454a8h,0fc5454a8h
+ DD 0d6bbbb6dh,0d6bbbb6dh
+ DD 03a16162ch,03a16162ch
+DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h
+DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h
+DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h
+DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h
+DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch
+DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h
+DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah
+DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h
+DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h
+DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h
+DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh
+DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh
+DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h
+DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h
+DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h
+DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h
+DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h
+DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h
+DB 060h,081h,04fh,0dch,022h,02ah,090h,088h
+DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh
+DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch
+DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h
+DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h
+DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h
+DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h
+DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah
+DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh
+DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh
+DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h
+DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh
+DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h
+DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h
+DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h
+DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h
+DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h
+DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h
+DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch
+DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h
+DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah
+DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h
+DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h
+DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h
+DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh
+DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh
+DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h
+DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h
+DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h
+DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h
+DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h
+DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h
+DB 060h,081h,04fh,0dch,022h,02ah,090h,088h
+DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh
+DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch
+DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h
+DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h
+DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h
+DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h
+DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah
+DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh
+DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh
+DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h
+DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh
+DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h
+DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h
+DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h
+DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h
+DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h
+DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h
+DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch
+DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h
+DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah
+DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h
+DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h
+DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h
+DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh
+DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh
+DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h
+DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h
+DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h
+DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h
+DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h
+DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h
+DB 060h,081h,04fh,0dch,022h,02ah,090h,088h
+DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh
+DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch
+DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h
+DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h
+DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h
+DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h
+DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah
+DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh
+DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh
+DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h
+DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh
+DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h
+DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h
+DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h
+DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h
+DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h
+DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h
+DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch
+DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h
+DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah
+DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h
+DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h
+DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h
+DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh
+DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh
+DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h
+DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h
+DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h
+DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h
+DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h
+DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h
+DB 060h,081h,04fh,0dch,022h,02ah,090h,088h
+DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh
+DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch
+DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h
+DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h
+DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h
+DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h
+DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah
+DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh
+DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh
+DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h
+DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh
+DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h
+DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h
+ DD 000000001h,000000002h,000000004h,000000008h
+ DD 000000010h,000000020h,000000040h,000000080h
+ DD 00000001bh,000000036h,080808080h,080808080h
+ DD 0fefefefeh,0fefefefeh,01b1b1b1bh,01b1b1b1bh
+ALIGN 64
+$L$AES_Td::
+ DD 050a7f451h,050a7f451h
+ DD 05365417eh,05365417eh
+ DD 0c3a4171ah,0c3a4171ah
+ DD 0965e273ah,0965e273ah
+ DD 0cb6bab3bh,0cb6bab3bh
+ DD 0f1459d1fh,0f1459d1fh
+ DD 0ab58faach,0ab58faach
+ DD 09303e34bh,09303e34bh
+ DD 055fa3020h,055fa3020h
+ DD 0f66d76adh,0f66d76adh
+ DD 09176cc88h,09176cc88h
+ DD 0254c02f5h,0254c02f5h
+ DD 0fcd7e54fh,0fcd7e54fh
+ DD 0d7cb2ac5h,0d7cb2ac5h
+ DD 080443526h,080443526h
+ DD 08fa362b5h,08fa362b5h
+ DD 0495ab1deh,0495ab1deh
+ DD 0671bba25h,0671bba25h
+ DD 0980eea45h,0980eea45h
+ DD 0e1c0fe5dh,0e1c0fe5dh
+ DD 002752fc3h,002752fc3h
+ DD 012f04c81h,012f04c81h
+ DD 0a397468dh,0a397468dh
+ DD 0c6f9d36bh,0c6f9d36bh
+ DD 0e75f8f03h,0e75f8f03h
+ DD 0959c9215h,0959c9215h
+ DD 0eb7a6dbfh,0eb7a6dbfh
+ DD 0da595295h,0da595295h
+ DD 02d83bed4h,02d83bed4h
+ DD 0d3217458h,0d3217458h
+ DD 02969e049h,02969e049h
+ DD 044c8c98eh,044c8c98eh
+ DD 06a89c275h,06a89c275h
+ DD 078798ef4h,078798ef4h
+ DD 06b3e5899h,06b3e5899h
+ DD 0dd71b927h,0dd71b927h
+ DD 0b64fe1beh,0b64fe1beh
+ DD 017ad88f0h,017ad88f0h
+ DD 066ac20c9h,066ac20c9h
+ DD 0b43ace7dh,0b43ace7dh
+ DD 0184adf63h,0184adf63h
+ DD 082311ae5h,082311ae5h
+ DD 060335197h,060335197h
+ DD 0457f5362h,0457f5362h
+ DD 0e07764b1h,0e07764b1h
+ DD 084ae6bbbh,084ae6bbbh
+ DD 01ca081feh,01ca081feh
+ DD 0942b08f9h,0942b08f9h
+ DD 058684870h,058684870h
+ DD 019fd458fh,019fd458fh
+ DD 0876cde94h,0876cde94h
+ DD 0b7f87b52h,0b7f87b52h
+ DD 023d373abh,023d373abh
+ DD 0e2024b72h,0e2024b72h
+ DD 0578f1fe3h,0578f1fe3h
+ DD 02aab5566h,02aab5566h
+ DD 00728ebb2h,00728ebb2h
+ DD 003c2b52fh,003c2b52fh
+ DD 09a7bc586h,09a7bc586h
+ DD 0a50837d3h,0a50837d3h
+ DD 0f2872830h,0f2872830h
+ DD 0b2a5bf23h,0b2a5bf23h
+ DD 0ba6a0302h,0ba6a0302h
+ DD 05c8216edh,05c8216edh
+ DD 02b1ccf8ah,02b1ccf8ah
+ DD 092b479a7h,092b479a7h
+ DD 0f0f207f3h,0f0f207f3h
+ DD 0a1e2694eh,0a1e2694eh
+ DD 0cdf4da65h,0cdf4da65h
+ DD 0d5be0506h,0d5be0506h
+ DD 01f6234d1h,01f6234d1h
+ DD 08afea6c4h,08afea6c4h
+ DD 09d532e34h,09d532e34h
+ DD 0a055f3a2h,0a055f3a2h
+ DD 032e18a05h,032e18a05h
+ DD 075ebf6a4h,075ebf6a4h
+ DD 039ec830bh,039ec830bh
+ DD 0aaef6040h,0aaef6040h
+ DD 0069f715eh,0069f715eh
+ DD 051106ebdh,051106ebdh
+ DD 0f98a213eh,0f98a213eh
+ DD 03d06dd96h,03d06dd96h
+ DD 0ae053eddh,0ae053eddh
+ DD 046bde64dh,046bde64dh
+ DD 0b58d5491h,0b58d5491h
+ DD 0055dc471h,0055dc471h
+ DD 06fd40604h,06fd40604h
+ DD 0ff155060h,0ff155060h
+ DD 024fb9819h,024fb9819h
+ DD 097e9bdd6h,097e9bdd6h
+ DD 0cc434089h,0cc434089h
+ DD 0779ed967h,0779ed967h
+ DD 0bd42e8b0h,0bd42e8b0h
+ DD 0888b8907h,0888b8907h
+ DD 0385b19e7h,0385b19e7h
+ DD 0dbeec879h,0dbeec879h
+ DD 0470a7ca1h,0470a7ca1h
+ DD 0e90f427ch,0e90f427ch
+ DD 0c91e84f8h,0c91e84f8h
+ DD 000000000h,000000000h
+ DD 083868009h,083868009h
+ DD 048ed2b32h,048ed2b32h
+ DD 0ac70111eh,0ac70111eh
+ DD 04e725a6ch,04e725a6ch
+ DD 0fbff0efdh,0fbff0efdh
+ DD 05638850fh,05638850fh
+ DD 01ed5ae3dh,01ed5ae3dh
+ DD 027392d36h,027392d36h
+ DD 064d90f0ah,064d90f0ah
+ DD 021a65c68h,021a65c68h
+ DD 0d1545b9bh,0d1545b9bh
+ DD 03a2e3624h,03a2e3624h
+ DD 0b1670a0ch,0b1670a0ch
+ DD 00fe75793h,00fe75793h
+ DD 0d296eeb4h,0d296eeb4h
+ DD 09e919b1bh,09e919b1bh
+ DD 04fc5c080h,04fc5c080h
+ DD 0a220dc61h,0a220dc61h
+ DD 0694b775ah,0694b775ah
+ DD 0161a121ch,0161a121ch
+ DD 00aba93e2h,00aba93e2h
+ DD 0e52aa0c0h,0e52aa0c0h
+ DD 043e0223ch,043e0223ch
+ DD 01d171b12h,01d171b12h
+ DD 00b0d090eh,00b0d090eh
+ DD 0adc78bf2h,0adc78bf2h
+ DD 0b9a8b62dh,0b9a8b62dh
+ DD 0c8a91e14h,0c8a91e14h
+ DD 08519f157h,08519f157h
+ DD 04c0775afh,04c0775afh
+ DD 0bbdd99eeh,0bbdd99eeh
+ DD 0fd607fa3h,0fd607fa3h
+ DD 09f2601f7h,09f2601f7h
+ DD 0bcf5725ch,0bcf5725ch
+ DD 0c53b6644h,0c53b6644h
+ DD 0347efb5bh,0347efb5bh
+ DD 07629438bh,07629438bh
+ DD 0dcc623cbh,0dcc623cbh
+ DD 068fcedb6h,068fcedb6h
+ DD 063f1e4b8h,063f1e4b8h
+ DD 0cadc31d7h,0cadc31d7h
+ DD 010856342h,010856342h
+ DD 040229713h,040229713h
+ DD 02011c684h,02011c684h
+ DD 07d244a85h,07d244a85h
+ DD 0f83dbbd2h,0f83dbbd2h
+ DD 01132f9aeh,01132f9aeh
+ DD 06da129c7h,06da129c7h
+ DD 04b2f9e1dh,04b2f9e1dh
+ DD 0f330b2dch,0f330b2dch
+ DD 0ec52860dh,0ec52860dh
+ DD 0d0e3c177h,0d0e3c177h
+ DD 06c16b32bh,06c16b32bh
+ DD 099b970a9h,099b970a9h
+ DD 0fa489411h,0fa489411h
+ DD 02264e947h,02264e947h
+ DD 0c48cfca8h,0c48cfca8h
+ DD 01a3ff0a0h,01a3ff0a0h
+ DD 0d82c7d56h,0d82c7d56h
+ DD 0ef903322h,0ef903322h
+ DD 0c74e4987h,0c74e4987h
+ DD 0c1d138d9h,0c1d138d9h
+ DD 0fea2ca8ch,0fea2ca8ch
+ DD 0360bd498h,0360bd498h
+ DD 0cf81f5a6h,0cf81f5a6h
+ DD 028de7aa5h,028de7aa5h
+ DD 0268eb7dah,0268eb7dah
+ DD 0a4bfad3fh,0a4bfad3fh
+ DD 0e49d3a2ch,0e49d3a2ch
+ DD 00d927850h,00d927850h
+ DD 09bcc5f6ah,09bcc5f6ah
+ DD 062467e54h,062467e54h
+ DD 0c2138df6h,0c2138df6h
+ DD 0e8b8d890h,0e8b8d890h
+ DD 05ef7392eh,05ef7392eh
+ DD 0f5afc382h,0f5afc382h
+ DD 0be805d9fh,0be805d9fh
+ DD 07c93d069h,07c93d069h
+ DD 0a92dd56fh,0a92dd56fh
+ DD 0b31225cfh,0b31225cfh
+ DD 03b99acc8h,03b99acc8h
+ DD 0a77d1810h,0a77d1810h
+ DD 06e639ce8h,06e639ce8h
+ DD 07bbb3bdbh,07bbb3bdbh
+ DD 0097826cdh,0097826cdh
+ DD 0f418596eh,0f418596eh
+ DD 001b79aech,001b79aech
+ DD 0a89a4f83h,0a89a4f83h
+ DD 0656e95e6h,0656e95e6h
+ DD 07ee6ffaah,07ee6ffaah
+ DD 008cfbc21h,008cfbc21h
+ DD 0e6e815efh,0e6e815efh
+ DD 0d99be7bah,0d99be7bah
+ DD 0ce366f4ah,0ce366f4ah
+ DD 0d4099feah,0d4099feah
+ DD 0d67cb029h,0d67cb029h
+ DD 0afb2a431h,0afb2a431h
+ DD 031233f2ah,031233f2ah
+ DD 03094a5c6h,03094a5c6h
+ DD 0c066a235h,0c066a235h
+ DD 037bc4e74h,037bc4e74h
+ DD 0a6ca82fch,0a6ca82fch
+ DD 0b0d090e0h,0b0d090e0h
+ DD 015d8a733h,015d8a733h
+ DD 04a9804f1h,04a9804f1h
+ DD 0f7daec41h,0f7daec41h
+ DD 00e50cd7fh,00e50cd7fh
+ DD 02ff69117h,02ff69117h
+ DD 08dd64d76h,08dd64d76h
+ DD 04db0ef43h,04db0ef43h
+ DD 0544daacch,0544daacch
+ DD 0df0496e4h,0df0496e4h
+ DD 0e3b5d19eh,0e3b5d19eh
+ DD 01b886a4ch,01b886a4ch
+ DD 0b81f2cc1h,0b81f2cc1h
+ DD 07f516546h,07f516546h
+ DD 004ea5e9dh,004ea5e9dh
+ DD 05d358c01h,05d358c01h
+ DD 0737487fah,0737487fah
+ DD 02e410bfbh,02e410bfbh
+ DD 05a1d67b3h,05a1d67b3h
+ DD 052d2db92h,052d2db92h
+ DD 0335610e9h,0335610e9h
+ DD 01347d66dh,01347d66dh
+ DD 08c61d79ah,08c61d79ah
+ DD 07a0ca137h,07a0ca137h
+ DD 08e14f859h,08e14f859h
+ DD 0893c13ebh,0893c13ebh
+ DD 0ee27a9ceh,0ee27a9ceh
+ DD 035c961b7h,035c961b7h
+ DD 0ede51ce1h,0ede51ce1h
+ DD 03cb1477ah,03cb1477ah
+ DD 059dfd29ch,059dfd29ch
+ DD 03f73f255h,03f73f255h
+ DD 079ce1418h,079ce1418h
+ DD 0bf37c773h,0bf37c773h
+ DD 0eacdf753h,0eacdf753h
+ DD 05baafd5fh,05baafd5fh
+ DD 0146f3ddfh,0146f3ddfh
+ DD 086db4478h,086db4478h
+ DD 081f3afcah,081f3afcah
+ DD 03ec468b9h,03ec468b9h
+ DD 02c342438h,02c342438h
+ DD 05f40a3c2h,05f40a3c2h
+ DD 072c31d16h,072c31d16h
+ DD 00c25e2bch,00c25e2bch
+ DD 08b493c28h,08b493c28h
+ DD 041950dffh,041950dffh
+ DD 07101a839h,07101a839h
+ DD 0deb30c08h,0deb30c08h
+ DD 09ce4b4d8h,09ce4b4d8h
+ DD 090c15664h,090c15664h
+ DD 06184cb7bh,06184cb7bh
+ DD 070b632d5h,070b632d5h
+ DD 0745c6c48h,0745c6c48h
+ DD 04257b8d0h,04257b8d0h
+DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h
+DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh
+DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h
+DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh
+DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh
+DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh
+DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h
+DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h
+DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h
+DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h
+DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah
+DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h
+DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah
+DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h
+DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h
+DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh
+DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah
+DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h
+DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h
+DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh
+DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h
+DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh
+DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h
+DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h
+DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h
+DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh
+DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh
+DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh
+DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h
+DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h
+DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h
+DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh
+ DD 080808080h,080808080h,0fefefefeh,0fefefefeh
+ DD 01b1b1b1bh,01b1b1b1bh,0,0
+DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h
+DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh
+DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h
+DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh
+DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh
+DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh
+DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h
+DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h
+DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h
+DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h
+DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah
+DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h
+DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah
+DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h
+DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h
+DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh
+DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah
+DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h
+DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h
+DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh
+DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h
+DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh
+DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h
+DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h
+DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h
+DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh
+DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh
+DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh
+DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h
+DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h
+DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h
+DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh
+ DD 080808080h,080808080h,0fefefefeh,0fefefefeh
+ DD 01b1b1b1bh,01b1b1b1bh,0,0
+DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h
+DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh
+DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h
+DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh
+DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh
+DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh
+DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h
+DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h
+DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h
+DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h
+DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah
+DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h
+DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah
+DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h
+DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h
+DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh
+DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah
+DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h
+DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h
+DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh
+DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h
+DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh
+DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h
+DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h
+DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h
+DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh
+DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh
+DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh
+DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h
+DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h
+DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h
+DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh
+ DD 080808080h,080808080h,0fefefefeh,0fefefefeh
+ DD 01b1b1b1bh,01b1b1b1bh,0,0
+DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h
+DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh
+DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h
+DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh
+DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh
+DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh
+DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h
+DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h
+DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h
+DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h
+DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah
+DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h
+DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah
+DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h
+DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h
+DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh
+DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah
+DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h
+DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h
+DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh
+DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h
+DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh
+DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h
+DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h
+DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h
+DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh
+DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh
+DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh
+DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h
+DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h
+DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h
+DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh
+ DD 080808080h,080808080h,0fefefefeh,0fefefefeh
+ DD 01b1b1b1bh,01b1b1b1bh,0,0
+DB 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32
+DB 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+DB 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+DB 62,0
+ALIGN 64
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+block_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$in_block_prologue
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$in_block_prologue
+
+ mov rax,QWORD PTR[24+rax]
+ lea rax,QWORD PTR[48+rax]
+
+ mov rbx,QWORD PTR[((-8))+rax]
+ mov rbp,QWORD PTR[((-16))+rax]
+ mov r12,QWORD PTR[((-24))+rax]
+ mov r13,QWORD PTR[((-32))+rax]
+ mov r14,QWORD PTR[((-40))+rax]
+ mov r15,QWORD PTR[((-48))+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+
+$L$in_block_prologue::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ jmp $L$common_seh_exit
+block_se_handler ENDP
+
+
+ALIGN 16
+key_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$in_key_prologue
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$in_key_prologue
+
+ lea rax,QWORD PTR[56+rax]
+
+ mov rbx,QWORD PTR[((-8))+rax]
+ mov rbp,QWORD PTR[((-16))+rax]
+ mov r12,QWORD PTR[((-24))+rax]
+ mov r13,QWORD PTR[((-32))+rax]
+ mov r14,QWORD PTR[((-40))+rax]
+ mov r15,QWORD PTR[((-48))+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+
+$L$in_key_prologue::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ jmp $L$common_seh_exit
+key_se_handler ENDP
+
+
+ALIGN 16
+cbc_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ lea r10,QWORD PTR[$L$cbc_prologue]
+ cmp rbx,r10
+ jb $L$in_cbc_prologue
+
+ lea r10,QWORD PTR[$L$cbc_fast_body]
+ cmp rbx,r10
+ jb $L$in_cbc_frame_setup
+
+ lea r10,QWORD PTR[$L$cbc_slow_prologue]
+ cmp rbx,r10
+ jb $L$in_cbc_body
+
+ lea r10,QWORD PTR[$L$cbc_slow_body]
+ cmp rbx,r10
+ jb $L$in_cbc_frame_setup
+
+$L$in_cbc_body::
+ mov rax,QWORD PTR[152+r8]
+
+ lea r10,QWORD PTR[$L$cbc_epilogue]
+ cmp rbx,r10
+ jae $L$in_cbc_prologue
+
+ lea rax,QWORD PTR[8+rax]
+
+ lea r10,QWORD PTR[$L$cbc_popfq]
+ cmp rbx,r10
+ jae $L$in_cbc_prologue
+
+ mov rax,QWORD PTR[8+rax]
+ lea rax,QWORD PTR[56+rax]
+
+$L$in_cbc_frame_setup::
+ mov rbx,QWORD PTR[((-16))+rax]
+ mov rbp,QWORD PTR[((-24))+rax]
+ mov r12,QWORD PTR[((-32))+rax]
+ mov r13,QWORD PTR[((-40))+rax]
+ mov r14,QWORD PTR[((-48))+rax]
+ mov r15,QWORD PTR[((-56))+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+
+$L$in_cbc_prologue::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+$L$common_seh_exit::
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+cbc_se_handler ENDP
+
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$SEH_begin_AES_encrypt
+ DD imagerel $L$SEH_end_AES_encrypt
+ DD imagerel $L$SEH_info_AES_encrypt
+
+ DD imagerel $L$SEH_begin_AES_decrypt
+ DD imagerel $L$SEH_end_AES_decrypt
+ DD imagerel $L$SEH_info_AES_decrypt
+
+ DD imagerel $L$SEH_begin_AES_set_encrypt_key
+ DD imagerel $L$SEH_end_AES_set_encrypt_key
+ DD imagerel $L$SEH_info_AES_set_encrypt_key
+
+ DD imagerel $L$SEH_begin_AES_set_decrypt_key
+ DD imagerel $L$SEH_end_AES_set_decrypt_key
+ DD imagerel $L$SEH_info_AES_set_decrypt_key
+
+ DD imagerel $L$SEH_begin_AES_cbc_encrypt
+ DD imagerel $L$SEH_end_AES_cbc_encrypt
+ DD imagerel $L$SEH_info_AES_cbc_encrypt
+
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$SEH_info_AES_encrypt::
+DB 9,0,0,0
+ DD imagerel block_se_handler
+ DD imagerel $L$enc_prologue,imagerel $L$enc_epilogue
+$L$SEH_info_AES_decrypt::
+DB 9,0,0,0
+ DD imagerel block_se_handler
+ DD imagerel $L$dec_prologue,imagerel $L$dec_epilogue
+$L$SEH_info_AES_set_encrypt_key::
+DB 9,0,0,0
+ DD imagerel key_se_handler
+ DD imagerel $L$enc_key_prologue,imagerel $L$enc_key_epilogue
+$L$SEH_info_AES_set_decrypt_key::
+DB 9,0,0,0
+ DD imagerel key_se_handler
+ DD imagerel $L$dec_key_prologue,imagerel $L$dec_key_epilogue
+$L$SEH_info_AES_cbc_encrypt::
+DB 9,0,0,0
+ DD imagerel cbc_se_handler
+
+.xdata ENDS
+END
+
diff --git a/ext/libressl/crypto/aes/aes-mingw64-x86_64.S b/ext/libressl/crypto/aes/aes-mingw64-x86_64.S
new file mode 100644
index 0000000..ca2d60f
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes-mingw64-x86_64.S
@@ -0,0 +1,2861 @@
+#include "x86_arch.h"
+
+.text
+.def _x86_64_AES_encrypt; .scl 3; .type 32; .endef
+.p2align 4
+_x86_64_AES_encrypt:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+
+ movl 240(%r15),%r13d
+ subl $1,%r13d
+ jmp .Lenc_loop
+.p2align 4
+.Lenc_loop:
+
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movl 0(%r14,%rsi,8),%r10d
+ movl 0(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r12d
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl %dl,%ebp
+ xorl 3(%r14,%rsi,8),%r10d
+ xorl 3(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r8d
+
+ movzbl %dh,%esi
+ shrl $16,%ecx
+ movzbl %ah,%ebp
+ xorl 3(%r14,%rsi,8),%r12d
+ shrl $16,%edx
+ xorl 3(%r14,%rbp,8),%r8d
+
+ shrl $16,%ebx
+ leaq 16(%r15),%r15
+ shrl $16,%eax
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ xorl 2(%r14,%rsi,8),%r10d
+ xorl 2(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r12d
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl %bl,%ebp
+ xorl 1(%r14,%rsi,8),%r10d
+ xorl 1(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r8d
+
+ movl 12(%r15),%edx
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movl 0(%r15),%eax
+ xorl 1(%r14,%rdi,8),%r12d
+ xorl 1(%r14,%rbp,8),%r8d
+
+ movl 4(%r15),%ebx
+ movl 8(%r15),%ecx
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ subl $1,%r13d
+ jnz .Lenc_loop
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movzbl 2(%r14,%rsi,8),%r10d
+ movzbl 2(%r14,%rdi,8),%r11d
+ movzbl 2(%r14,%rbp,8),%r12d
+
+ movzbl %dl,%esi
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movzbl 2(%r14,%rsi,8),%r8d
+ movl 0(%r14,%rdi,8),%edi
+ movl 0(%r14,%rbp,8),%ebp
+
+ andl $65280,%edi
+ andl $65280,%ebp
+
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+ shrl $16,%ecx
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ shrl $16,%edx
+ movl 0(%r14,%rsi,8),%esi
+ movl 0(%r14,%rdi,8),%edi
+
+ andl $65280,%esi
+ andl $65280,%edi
+ shrl $16,%ebx
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+ shrl $16,%eax
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ movl 0(%r14,%rsi,8),%esi
+ movl 0(%r14,%rdi,8),%edi
+ movl 0(%r14,%rbp,8),%ebp
+
+ andl $16711680,%esi
+ andl $16711680,%edi
+ andl $16711680,%ebp
+
+ xorl %esi,%r10d
+ xorl %edi,%r11d
+ xorl %ebp,%r12d
+
+ movzbl %bl,%esi
+ movzbl %dh,%edi
+ movzbl %ah,%ebp
+ movl 0(%r14,%rsi,8),%esi
+ movl 2(%r14,%rdi,8),%edi
+ movl 2(%r14,%rbp,8),%ebp
+
+ andl $16711680,%esi
+ andl $4278190080,%edi
+ andl $4278190080,%ebp
+
+ xorl %esi,%r8d
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movl 16+12(%r15),%edx
+ movl 2(%r14,%rsi,8),%esi
+ movl 2(%r14,%rdi,8),%edi
+ movl 16+0(%r15),%eax
+
+ andl $4278190080,%esi
+ andl $4278190080,%edi
+
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+
+ movl 16+4(%r15),%ebx
+ movl 16+8(%r15),%ecx
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ retq
+
+.def _x86_64_AES_encrypt_compact; .scl 3; .type 32; .endef
+.p2align 4
+_x86_64_AES_encrypt_compact:
+ leaq 128(%r14),%r8
+ movl 0-128(%r8),%edi
+ movl 32-128(%r8),%ebp
+ movl 64-128(%r8),%r10d
+ movl 96-128(%r8),%r11d
+ movl 128-128(%r8),%edi
+ movl 160-128(%r8),%ebp
+ movl 192-128(%r8),%r10d
+ movl 224-128(%r8),%r11d
+ jmp .Lenc_loop_compact
+.p2align 4
+.Lenc_loop_compact:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ leaq 16(%r15),%r15
+ movzbl %al,%r10d
+ movzbl %bl,%r11d
+ movzbl %cl,%r12d
+ movzbl (%r14,%r10,1),%r10d
+ movzbl (%r14,%r11,1),%r11d
+ movzbl (%r14,%r12,1),%r12d
+
+ movzbl %dl,%r8d
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl (%r14,%r8,1),%r8d
+ movzbl (%r14,%rsi,1),%r9d
+ movzbl (%r14,%rdi,1),%r13d
+
+ movzbl %dh,%ebp
+ movzbl %ah,%esi
+ shrl $16,%ecx
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ shrl $16,%edx
+
+ movzbl %cl,%edi
+ shll $8,%r9d
+ shll $8,%r13d
+ movzbl (%r14,%rdi,1),%edi
+ xorl %r9d,%r10d
+ xorl %r13d,%r11d
+
+ movzbl %dl,%r9d
+ shrl $16,%eax
+ shrl $16,%ebx
+ movzbl %al,%r13d
+ shll $8,%ebp
+ shll $8,%esi
+ movzbl (%r14,%r9,1),%r9d
+ movzbl (%r14,%r13,1),%r13d
+ xorl %ebp,%r12d
+ xorl %esi,%r8d
+
+ movzbl %bl,%ebp
+ movzbl %dh,%esi
+ shll $16,%edi
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ xorl %edi,%r10d
+
+ movzbl %ah,%edi
+ shrl $8,%ecx
+ shrl $8,%ebx
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rcx,1),%edx
+ movzbl (%r14,%rbx,1),%ecx
+ shll $16,%r9d
+ shll $16,%r13d
+ shll $16,%ebp
+ xorl %r9d,%r11d
+ xorl %r13d,%r12d
+ xorl %ebp,%r8d
+
+ shll $24,%esi
+ shll $24,%edi
+ shll $24,%edx
+ xorl %esi,%r10d
+ shll $24,%ecx
+ xorl %edi,%r11d
+ movl %r10d,%eax
+ movl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ cmpq 16(%rsp),%r15
+ je .Lenc_compact_done
+ movl %eax,%esi
+ movl %ebx,%edi
+ andl $2155905152,%esi
+ andl $2155905152,%edi
+ movl %esi,%r10d
+ movl %edi,%r11d
+ shrl $7,%r10d
+ leal (%rax,%rax,1),%r8d
+ shrl $7,%r11d
+ leal (%rbx,%rbx,1),%r9d
+ subl %r10d,%esi
+ subl %r11d,%edi
+ andl $4278124286,%r8d
+ andl $4278124286,%r9d
+ andl $454761243,%esi
+ andl $454761243,%edi
+ movl %eax,%r10d
+ movl %ebx,%r11d
+ xorl %esi,%r8d
+ xorl %edi,%r9d
+
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movl %ecx,%esi
+ movl %edx,%edi
+ roll $24,%eax
+ roll $24,%ebx
+ andl $2155905152,%esi
+ andl $2155905152,%edi
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movl %esi,%r12d
+ movl %edi,%ebp
+ rorl $16,%r10d
+ rorl $16,%r11d
+ shrl $7,%r12d
+ leal (%rcx,%rcx,1),%r8d
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ shrl $7,%ebp
+ leal (%rdx,%rdx,1),%r9d
+ rorl $8,%r10d
+ rorl $8,%r11d
+ subl %r12d,%esi
+ subl %ebp,%edi
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+
+ andl $4278124286,%r8d
+ andl $4278124286,%r9d
+ andl $454761243,%esi
+ andl $454761243,%edi
+ movl %ecx,%r12d
+ movl %edx,%ebp
+ xorl %esi,%r8d
+ xorl %edi,%r9d
+
+ xorl %r8d,%ecx
+ xorl %r9d,%edx
+ roll $24,%ecx
+ roll $24,%edx
+ xorl %r8d,%ecx
+ xorl %r9d,%edx
+ movl 0(%r14),%esi
+ rorl $16,%r12d
+ rorl $16,%ebp
+ movl 64(%r14),%edi
+ xorl %r12d,%ecx
+ xorl %ebp,%edx
+ movl 128(%r14),%r8d
+ rorl $8,%r12d
+ rorl $8,%ebp
+ movl 192(%r14),%r9d
+ xorl %r12d,%ecx
+ xorl %ebp,%edx
+ jmp .Lenc_loop_compact
+.p2align 4
+.Lenc_compact_done:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ retq
+
+.globl AES_encrypt
+.def AES_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+.globl asm_AES_encrypt
+
+asm_AES_encrypt:
+AES_encrypt:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_AES_encrypt:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+
+ movq %rsp,%r10
+ leaq -63(%rdx),%rcx
+ andq $-64,%rsp
+ subq %rsp,%rcx
+ negq %rcx
+ andq $960,%rcx
+ subq %rcx,%rsp
+ subq $32,%rsp
+
+ movq %rsi,16(%rsp)
+ movq %r10,24(%rsp)
+.Lenc_prologue:
+
+ movq %rdx,%r15
+ movl 240(%r15),%r13d
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+
+ shll $4,%r13d
+ leaq (%r15,%r13,1),%rbp
+ movq %r15,(%rsp)
+ movq %rbp,8(%rsp)
+
+
+ leaq .LAES_Te+2048(%rip),%r14
+ leaq 768(%rsp),%rbp
+ subq %r14,%rbp
+ andq $768,%rbp
+ leaq (%r14,%rbp,1),%r14
+
+ call _x86_64_AES_encrypt_compact
+
+ movq 16(%rsp),%r9
+ movq 24(%rsp),%rsi
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lenc_epilogue:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_AES_encrypt:
+.def _x86_64_AES_decrypt; .scl 3; .type 32; .endef
+.p2align 4
+_x86_64_AES_decrypt:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+
+ movl 240(%r15),%r13d
+ subl $1,%r13d
+ jmp .Ldec_loop
+.p2align 4
+.Ldec_loop:
+
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movl 0(%r14,%rsi,8),%r10d
+ movl 0(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r12d
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl %dl,%ebp
+ xorl 3(%r14,%rsi,8),%r10d
+ xorl 3(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r8d
+
+ movzbl %bh,%esi
+ shrl $16,%eax
+ movzbl %ch,%ebp
+ xorl 3(%r14,%rsi,8),%r12d
+ shrl $16,%edx
+ xorl 3(%r14,%rbp,8),%r8d
+
+ shrl $16,%ebx
+ leaq 16(%r15),%r15
+ shrl $16,%ecx
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ xorl 2(%r14,%rsi,8),%r10d
+ xorl 2(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r12d
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl %bl,%ebp
+ xorl 1(%r14,%rsi,8),%r10d
+ xorl 1(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r8d
+
+ movzbl %dh,%esi
+ movl 12(%r15),%edx
+ movzbl %ah,%ebp
+ xorl 1(%r14,%rsi,8),%r12d
+ movl 0(%r15),%eax
+ xorl 1(%r14,%rbp,8),%r8d
+
+ xorl %r10d,%eax
+ movl 4(%r15),%ebx
+ movl 8(%r15),%ecx
+ xorl %r12d,%ecx
+ xorl %r11d,%ebx
+ xorl %r8d,%edx
+ subl $1,%r13d
+ jnz .Ldec_loop
+ leaq 2048(%r14),%r14
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movzbl (%r14,%rsi,1),%r10d
+ movzbl (%r14,%rdi,1),%r11d
+ movzbl (%r14,%rbp,1),%r12d
+
+ movzbl %dl,%esi
+ movzbl %dh,%edi
+ movzbl %ah,%ebp
+ movzbl (%r14,%rsi,1),%r8d
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+
+ shll $8,%edi
+ shll $8,%ebp
+
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+ shrl $16,%edx
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ shrl $16,%eax
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+
+ shll $8,%esi
+ shll $8,%edi
+ shrl $16,%ebx
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+ shrl $16,%ecx
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+
+ shll $16,%esi
+ shll $16,%edi
+ shll $16,%ebp
+
+ xorl %esi,%r10d
+ xorl %edi,%r11d
+ xorl %ebp,%r12d
+
+ movzbl %bl,%esi
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+
+ shll $16,%esi
+ shll $24,%edi
+ shll $24,%ebp
+
+ xorl %esi,%r8d
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movl 16+12(%r15),%edx
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movl 16+0(%r15),%eax
+
+ shll $24,%esi
+ shll $24,%edi
+
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+
+ movl 16+4(%r15),%ebx
+ movl 16+8(%r15),%ecx
+ leaq -2048(%r14),%r14
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ retq
+
+.def _x86_64_AES_decrypt_compact; .scl 3; .type 32; .endef
+.p2align 4
+_x86_64_AES_decrypt_compact:
+ leaq 128(%r14),%r8
+ movl 0-128(%r8),%edi
+ movl 32-128(%r8),%ebp
+ movl 64-128(%r8),%r10d
+ movl 96-128(%r8),%r11d
+ movl 128-128(%r8),%edi
+ movl 160-128(%r8),%ebp
+ movl 192-128(%r8),%r10d
+ movl 224-128(%r8),%r11d
+ jmp .Ldec_loop_compact
+
+.p2align 4
+.Ldec_loop_compact:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ leaq 16(%r15),%r15
+ movzbl %al,%r10d
+ movzbl %bl,%r11d
+ movzbl %cl,%r12d
+ movzbl (%r14,%r10,1),%r10d
+ movzbl (%r14,%r11,1),%r11d
+ movzbl (%r14,%r12,1),%r12d
+
+ movzbl %dl,%r8d
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl (%r14,%r8,1),%r8d
+ movzbl (%r14,%rsi,1),%r9d
+ movzbl (%r14,%rdi,1),%r13d
+
+ movzbl %bh,%ebp
+ movzbl %ch,%esi
+ shrl $16,%ecx
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ shrl $16,%edx
+
+ movzbl %cl,%edi
+ shll $8,%r9d
+ shll $8,%r13d
+ movzbl (%r14,%rdi,1),%edi
+ xorl %r9d,%r10d
+ xorl %r13d,%r11d
+
+ movzbl %dl,%r9d
+ shrl $16,%eax
+ shrl $16,%ebx
+ movzbl %al,%r13d
+ shll $8,%ebp
+ shll $8,%esi
+ movzbl (%r14,%r9,1),%r9d
+ movzbl (%r14,%r13,1),%r13d
+ xorl %ebp,%r12d
+ xorl %esi,%r8d
+
+ movzbl %bl,%ebp
+ movzbl %bh,%esi
+ shll $16,%edi
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ xorl %edi,%r10d
+
+ movzbl %ch,%edi
+ shll $16,%r9d
+ shll $16,%r13d
+ movzbl (%r14,%rdi,1),%ebx
+ xorl %r9d,%r11d
+ xorl %r13d,%r12d
+
+ movzbl %dh,%edi
+ shrl $8,%eax
+ shll $16,%ebp
+ movzbl (%r14,%rdi,1),%ecx
+ movzbl (%r14,%rax,1),%edx
+ xorl %ebp,%r8d
+
+ shll $24,%esi
+ shll $24,%ebx
+ shll $24,%ecx
+ xorl %esi,%r10d
+ shll $24,%edx
+ xorl %r11d,%ebx
+ movl %r10d,%eax
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ cmpq 16(%rsp),%r15
+ je .Ldec_compact_done
+
+ movq 256+0(%r14),%rsi
+ shlq $32,%rbx
+ shlq $32,%rdx
+ movq 256+8(%r14),%rdi
+ orq %rbx,%rax
+ orq %rdx,%rcx
+ movq 256+16(%r14),%rbp
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+ shrq $7,%r9
+ leaq (%rax,%rax,1),%r8
+ shrq $7,%r12
+ leaq (%rcx,%rcx,1),%r11
+ subq %r9,%rbx
+ subq %r12,%rdx
+ andq %rdi,%r8
+ andq %rdi,%r11
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r8,%rbx
+ xorq %r11,%rdx
+ movq %rbx,%r8
+ movq %rdx,%r11
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ leaq (%r8,%r8,1),%r9
+ shrq $7,%r13
+ leaq (%r11,%r11,1),%r12
+ subq %r10,%rbx
+ subq %r13,%rdx
+ andq %rdi,%r9
+ andq %rdi,%r12
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r9,%rbx
+ xorq %r12,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ xorq %rax,%r8
+ shrq $7,%r13
+ xorq %rcx,%r11
+ subq %r10,%rbx
+ subq %r13,%rdx
+ leaq (%r9,%r9,1),%r10
+ leaq (%r12,%r12,1),%r13
+ xorq %rax,%r9
+ xorq %rcx,%r12
+ andq %rdi,%r10
+ andq %rdi,%r13
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %rbx,%r10
+ xorq %rdx,%r13
+
+ xorq %r10,%rax
+ xorq %r13,%rcx
+ xorq %r10,%r8
+ xorq %r13,%r11
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ xorq %r10,%r9
+ xorq %r13,%r12
+ shrq $32,%rbx
+ shrq $32,%rdx
+ xorq %r8,%r10
+ xorq %r11,%r13
+ roll $8,%eax
+ roll $8,%ecx
+ xorq %r9,%r10
+ xorq %r12,%r13
+
+ roll $8,%ebx
+ roll $8,%edx
+ xorl %r10d,%eax
+ xorl %r13d,%ecx
+ shrq $32,%r10
+ shrq $32,%r13
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+ movq %r8,%r10
+ movq %r11,%r13
+ shrq $32,%r10
+ shrq $32,%r13
+ roll $24,%r8d
+ roll $24,%r11d
+ roll $24,%r10d
+ roll $24,%r13d
+ xorl %r8d,%eax
+ xorl %r11d,%ecx
+ movq %r9,%r8
+ movq %r12,%r11
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+ movq 0(%r14),%rsi
+ shrq $32,%r8
+ shrq $32,%r11
+ movq 64(%r14),%rdi
+ roll $16,%r9d
+ roll $16,%r12d
+ movq 128(%r14),%rbp
+ roll $16,%r8d
+ roll $16,%r11d
+ movq 192(%r14),%r10
+ xorl %r9d,%eax
+ xorl %r12d,%ecx
+ movq 256(%r14),%r13
+ xorl %r8d,%ebx
+ xorl %r11d,%edx
+ jmp .Ldec_loop_compact
+.p2align 4
+.Ldec_compact_done:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ retq
+
+.globl AES_decrypt
+.def AES_decrypt; .scl 2; .type 32; .endef
+.p2align 4
+.globl asm_AES_decrypt
+
+asm_AES_decrypt:
+AES_decrypt:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_AES_decrypt:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+
+ movq %rsp,%r10
+ leaq -63(%rdx),%rcx
+ andq $-64,%rsp
+ subq %rsp,%rcx
+ negq %rcx
+ andq $960,%rcx
+ subq %rcx,%rsp
+ subq $32,%rsp
+
+ movq %rsi,16(%rsp)
+ movq %r10,24(%rsp)
+.Ldec_prologue:
+
+ movq %rdx,%r15
+ movl 240(%r15),%r13d
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+
+ shll $4,%r13d
+ leaq (%r15,%r13,1),%rbp
+ movq %r15,(%rsp)
+ movq %rbp,8(%rsp)
+
+
+ leaq .LAES_Td+2048(%rip),%r14
+ leaq 768(%rsp),%rbp
+ subq %r14,%rbp
+ andq $768,%rbp
+ leaq (%r14,%rbp,1),%r14
+ shrq $3,%rbp
+ addq %rbp,%r14
+
+ call _x86_64_AES_decrypt_compact
+
+ movq 16(%rsp),%r9
+ movq 24(%rsp),%rsi
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Ldec_epilogue:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_AES_decrypt:
+.globl AES_set_encrypt_key
+.def AES_set_encrypt_key; .scl 2; .type 32; .endef
+.p2align 4
+AES_set_encrypt_key:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_AES_set_encrypt_key:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $8,%rsp
+.Lenc_key_prologue:
+
+ call _x86_64_AES_set_encrypt_key
+
+ movq 8(%rsp),%r15
+ movq 16(%rsp),%r14
+ movq 24(%rsp),%r13
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%rbp
+ movq 48(%rsp),%rbx
+ addq $56,%rsp
+.Lenc_key_epilogue:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_AES_set_encrypt_key:
+
+.def _x86_64_AES_set_encrypt_key; .scl 3; .type 32; .endef
+.p2align 4
+_x86_64_AES_set_encrypt_key:
+ movl %esi,%ecx
+ movq %rdi,%rsi
+ movq %rdx,%rdi
+
+ testq $-1,%rsi
+ jz .Lbadpointer
+ testq $-1,%rdi
+ jz .Lbadpointer
+
+ leaq .LAES_Te(%rip),%rbp
+ leaq 2048+128(%rbp),%rbp
+
+
+ movl 0-128(%rbp),%eax
+ movl 32-128(%rbp),%ebx
+ movl 64-128(%rbp),%r8d
+ movl 96-128(%rbp),%edx
+ movl 128-128(%rbp),%eax
+ movl 160-128(%rbp),%ebx
+ movl 192-128(%rbp),%r8d
+ movl 224-128(%rbp),%edx
+
+ cmpl $128,%ecx
+ je .L10rounds
+ cmpl $192,%ecx
+ je .L12rounds
+ cmpl $256,%ecx
+ je .L14rounds
+ movq $-2,%rax
+ jmp .Lexit
+
+.L10rounds:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rdx,8(%rdi)
+
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp .L10shortcut
+.p2align 2
+.L10loop:
+ movl 0(%rdi),%eax
+ movl 12(%rdi),%edx
+.L10shortcut:
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,16(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,20(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,24(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,28(%rdi)
+ addl $1,%ecx
+ leaq 16(%rdi),%rdi
+ cmpl $10,%ecx
+ jl .L10loop
+
+ movl $10,80(%rdi)
+ xorq %rax,%rax
+ jmp .Lexit
+
+.L12rounds:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 16(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rdx,16(%rdi)
+
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp .L12shortcut
+.p2align 2
+.L12loop:
+ movl 0(%rdi),%eax
+ movl 20(%rdi),%edx
+.L12shortcut:
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,24(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,28(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,32(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,36(%rdi)
+
+ cmpl $7,%ecx
+ je .L12break
+ addl $1,%ecx
+
+ xorl 16(%rdi),%eax
+ movl %eax,40(%rdi)
+ xorl 20(%rdi),%eax
+ movl %eax,44(%rdi)
+
+ leaq 24(%rdi),%rdi
+ jmp .L12loop
+.L12break:
+ movl $12,72(%rdi)
+ xorq %rax,%rax
+ jmp .Lexit
+
+.L14rounds:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 16(%rsi),%rcx
+ movq 24(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,16(%rdi)
+ movq %rdx,24(%rdi)
+
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp .L14shortcut
+.p2align 2
+.L14loop:
+ movl 0(%rdi),%eax
+ movl 28(%rdi),%edx
+.L14shortcut:
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,32(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,36(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,40(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,44(%rdi)
+
+ cmpl $6,%ecx
+ je .L14break
+ addl $1,%ecx
+
+ movl %eax,%edx
+ movl 16(%rdi),%eax
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ shll $8,%ebx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movl %eax,48(%rdi)
+ xorl 20(%rdi),%eax
+ movl %eax,52(%rdi)
+ xorl 24(%rdi),%eax
+ movl %eax,56(%rdi)
+ xorl 28(%rdi),%eax
+ movl %eax,60(%rdi)
+
+ leaq 32(%rdi),%rdi
+ jmp .L14loop
+.L14break:
+ movl $14,48(%rdi)
+ xorq %rax,%rax
+ jmp .Lexit
+
+.Lbadpointer:
+ movq $-1,%rax
+.Lexit:
+ retq
+
+.globl AES_set_decrypt_key
+.def AES_set_decrypt_key; .scl 2; .type 32; .endef
+.p2align 4
+AES_set_decrypt_key:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_AES_set_decrypt_key:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rdx
+.Ldec_key_prologue:
+
+ call _x86_64_AES_set_encrypt_key
+ movq (%rsp),%r8
+ cmpl $0,%eax
+ jne .Labort
+
+ movl 240(%r8),%r14d
+ xorq %rdi,%rdi
+ leaq (%rdi,%r14,4),%rcx
+ movq %r8,%rsi
+ leaq (%r8,%rcx,4),%rdi
+.p2align 2
+.Linvert:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 0(%rdi),%rcx
+ movq 8(%rdi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,0(%rsi)
+ movq %rdx,8(%rsi)
+ leaq 16(%rsi),%rsi
+ leaq -16(%rdi),%rdi
+ cmpq %rsi,%rdi
+ jne .Linvert
+
+ leaq .LAES_Te+2048+1024(%rip),%rax
+
+ movq 40(%rax),%rsi
+ movq 48(%rax),%rdi
+ movq 56(%rax),%rbp
+
+ movq %r8,%r15
+ subl $1,%r14d
+.p2align 2
+.Lpermute:
+ leaq 16(%r15),%r15
+ movq 0(%r15),%rax
+ movq 8(%r15),%rcx
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+ shrq $7,%r9
+ leaq (%rax,%rax,1),%r8
+ shrq $7,%r12
+ leaq (%rcx,%rcx,1),%r11
+ subq %r9,%rbx
+ subq %r12,%rdx
+ andq %rdi,%r8
+ andq %rdi,%r11
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r8,%rbx
+ xorq %r11,%rdx
+ movq %rbx,%r8
+ movq %rdx,%r11
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ leaq (%r8,%r8,1),%r9
+ shrq $7,%r13
+ leaq (%r11,%r11,1),%r12
+ subq %r10,%rbx
+ subq %r13,%rdx
+ andq %rdi,%r9
+ andq %rdi,%r12
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r9,%rbx
+ xorq %r12,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ xorq %rax,%r8
+ shrq $7,%r13
+ xorq %rcx,%r11
+ subq %r10,%rbx
+ subq %r13,%rdx
+ leaq (%r9,%r9,1),%r10
+ leaq (%r12,%r12,1),%r13
+ xorq %rax,%r9
+ xorq %rcx,%r12
+ andq %rdi,%r10
+ andq %rdi,%r13
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %rbx,%r10
+ xorq %rdx,%r13
+
+ xorq %r10,%rax
+ xorq %r13,%rcx
+ xorq %r10,%r8
+ xorq %r13,%r11
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ xorq %r10,%r9
+ xorq %r13,%r12
+ shrq $32,%rbx
+ shrq $32,%rdx
+ xorq %r8,%r10
+ xorq %r11,%r13
+ roll $8,%eax
+ roll $8,%ecx
+ xorq %r9,%r10
+ xorq %r12,%r13
+
+ roll $8,%ebx
+ roll $8,%edx
+ xorl %r10d,%eax
+ xorl %r13d,%ecx
+ shrq $32,%r10
+ shrq $32,%r13
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+ movq %r8,%r10
+ movq %r11,%r13
+ shrq $32,%r10
+ shrq $32,%r13
+ roll $24,%r8d
+ roll $24,%r11d
+ roll $24,%r10d
+ roll $24,%r13d
+ xorl %r8d,%eax
+ xorl %r11d,%ecx
+ movq %r9,%r8
+ movq %r12,%r11
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+
+ shrq $32,%r8
+ shrq $32,%r11
+
+ roll $16,%r9d
+ roll $16,%r12d
+
+ roll $16,%r8d
+ roll $16,%r11d
+
+ xorl %r9d,%eax
+ xorl %r12d,%ecx
+
+ xorl %r8d,%ebx
+ xorl %r11d,%edx
+ movl %eax,0(%r15)
+ movl %ebx,4(%r15)
+ movl %ecx,8(%r15)
+ movl %edx,12(%r15)
+ subl $1,%r14d
+ jnz .Lpermute
+
+ xorq %rax,%rax
+.Labort:
+ movq 8(%rsp),%r15
+ movq 16(%rsp),%r14
+ movq 24(%rsp),%r13
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%rbp
+ movq 48(%rsp),%rbx
+ addq $56,%rsp
+.Ldec_key_epilogue:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_AES_set_decrypt_key:
+.globl AES_cbc_encrypt
+.def AES_cbc_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+
+
+.globl asm_AES_cbc_encrypt
+
+asm_AES_cbc_encrypt:
+AES_cbc_encrypt:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_AES_cbc_encrypt:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+ movq 48(%rsp),%r9
+
+ cmpq $0,%rdx
+ je .Lcbc_epilogue
+ pushfq
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+.Lcbc_prologue:
+
+ cld
+ movl %r9d,%r9d
+
+ leaq .LAES_Te(%rip),%r14
+ cmpq $0,%r9
+ jne .Lcbc_picked_te
+ leaq .LAES_Td(%rip),%r14
+.Lcbc_picked_te:
+
+ movl OPENSSL_ia32cap_P(%rip),%r10d
+ cmpq $512,%rdx
+ jb .Lcbc_slow_prologue
+ testq $15,%rdx
+ jnz .Lcbc_slow_prologue
+ btl $IA32CAP_BIT0_HT,%r10d
+ jc .Lcbc_slow_prologue
+
+
+ leaq -88-248(%rsp),%r15
+ andq $-64,%r15
+
+
+ movq %r14,%r10
+ leaq 2304(%r14),%r11
+ movq %r15,%r12
+ andq $4095,%r10
+ andq $4095,%r11
+ andq $4095,%r12
+
+ cmpq %r11,%r12
+ jb .Lcbc_te_break_out
+ subq %r11,%r12
+ subq %r12,%r15
+ jmp .Lcbc_te_ok
+.Lcbc_te_break_out:
+ subq %r10,%r12
+ andq $4095,%r12
+ addq $320,%r12
+ subq %r12,%r15
+.p2align 2
+.Lcbc_te_ok:
+
+ xchgq %rsp,%r15
+
+ movq %r15,16(%rsp)
+.Lcbc_fast_body:
+ movq %rdi,24(%rsp)
+ movq %rsi,32(%rsp)
+ movq %rdx,40(%rsp)
+ movq %rcx,48(%rsp)
+ movq %r8,56(%rsp)
+ movl $0,80+240(%rsp)
+ movq %r8,%rbp
+ movq %r9,%rbx
+ movq %rsi,%r9
+ movq %rdi,%r8
+ movq %rcx,%r15
+
+ movl 240(%r15),%eax
+
+ movq %r15,%r10
+ subq %r14,%r10
+ andq $4095,%r10
+ cmpq $2304,%r10
+ jb .Lcbc_do_ecopy
+ cmpq $4096-248,%r10
+ jb .Lcbc_skip_ecopy
+.p2align 2
+.Lcbc_do_ecopy:
+ movq %r15,%rsi
+ leaq 80(%rsp),%rdi
+ leaq 80(%rsp),%r15
+ movl $30,%ecx
+.long 0x90A548F3
+ movl %eax,(%rdi)
+.Lcbc_skip_ecopy:
+ movq %r15,0(%rsp)
+
+ movl $18,%ecx
+.p2align 2
+.Lcbc_prefetch_te:
+ movq 0(%r14),%r10
+ movq 32(%r14),%r11
+ movq 64(%r14),%r12
+ movq 96(%r14),%r13
+ leaq 128(%r14),%r14
+ subl $1,%ecx
+ jnz .Lcbc_prefetch_te
+ leaq -2304(%r14),%r14
+
+ cmpq $0,%rbx
+ je .LFAST_DECRYPT
+
+
+ movl 0(%rbp),%eax
+ movl 4(%rbp),%ebx
+ movl 8(%rbp),%ecx
+ movl 12(%rbp),%edx
+
+.p2align 2
+.Lcbc_fast_enc_loop:
+ xorl 0(%r8),%eax
+ xorl 4(%r8),%ebx
+ xorl 8(%r8),%ecx
+ xorl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+
+ call _x86_64_AES_encrypt
+
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ subq $16,%r10
+ testq $-16,%r10
+ movq %r10,40(%rsp)
+ jnz .Lcbc_fast_enc_loop
+ movq 56(%rsp),%rbp
+ movl %eax,0(%rbp)
+ movl %ebx,4(%rbp)
+ movl %ecx,8(%rbp)
+ movl %edx,12(%rbp)
+
+ jmp .Lcbc_fast_cleanup
+
+
+.p2align 4
+.LFAST_DECRYPT:
+ cmpq %r8,%r9
+ je .Lcbc_fast_dec_in_place
+
+ movq %rbp,64(%rsp)
+.p2align 2
+.Lcbc_fast_dec_loop:
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+
+ call _x86_64_AES_decrypt
+
+ movq 64(%rsp),%rbp
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ xorl 0(%rbp),%eax
+ xorl 4(%rbp),%ebx
+ xorl 8(%rbp),%ecx
+ xorl 12(%rbp),%edx
+ movq %r8,%rbp
+
+ subq $16,%r10
+ movq %r10,40(%rsp)
+ movq %rbp,64(%rsp)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ jnz .Lcbc_fast_dec_loop
+ movq 56(%rsp),%r12
+ movq 0(%rbp),%r10
+ movq 8(%rbp),%r11
+ movq %r10,0(%r12)
+ movq %r11,8(%r12)
+ jmp .Lcbc_fast_cleanup
+
+.p2align 4
+.Lcbc_fast_dec_in_place:
+ movq 0(%rbp),%r10
+ movq 8(%rbp),%r11
+ movq %r10,0+64(%rsp)
+ movq %r11,8+64(%rsp)
+.p2align 2
+.Lcbc_fast_dec_in_place_loop:
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+
+ call _x86_64_AES_decrypt
+
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ xorl 0+64(%rsp),%eax
+ xorl 4+64(%rsp),%ebx
+ xorl 8+64(%rsp),%ecx
+ xorl 12+64(%rsp),%edx
+
+ movq 0(%r8),%r11
+ movq 8(%r8),%r12
+ subq $16,%r10
+ jz .Lcbc_fast_dec_in_place_done
+
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ movq %r10,40(%rsp)
+ jmp .Lcbc_fast_dec_in_place_loop
+.Lcbc_fast_dec_in_place_done:
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+.p2align 2
+.Lcbc_fast_cleanup:
+ cmpl $0,80+240(%rsp)
+ leaq 80(%rsp),%rdi
+ je .Lcbc_exit
+ movl $30,%ecx
+ xorq %rax,%rax
+.long 0x90AB48F3
+
+ jmp .Lcbc_exit
+
+
+.p2align 4
+.Lcbc_slow_prologue:
+
+ leaq -88(%rsp),%rbp
+ andq $-64,%rbp
+
+ leaq -88-63(%rcx),%r10
+ subq %rbp,%r10
+ negq %r10
+ andq $960,%r10
+ subq %r10,%rbp
+
+ xchgq %rsp,%rbp
+
+ movq %rbp,16(%rsp)
+.Lcbc_slow_body:
+
+
+
+
+ movq %r8,56(%rsp)
+ movq %r8,%rbp
+ movq %r9,%rbx
+ movq %rsi,%r9
+ movq %rdi,%r8
+ movq %rcx,%r15
+ movq %rdx,%r10
+
+ movl 240(%r15),%eax
+ movq %r15,0(%rsp)
+ shll $4,%eax
+ leaq (%r15,%rax,1),%rax
+ movq %rax,8(%rsp)
+
+
+ leaq 2048(%r14),%r14
+ leaq 768-8(%rsp),%rax
+ subq %r14,%rax
+ andq $768,%rax
+ leaq (%r14,%rax,1),%r14
+
+ cmpq $0,%rbx
+ je .LSLOW_DECRYPT
+
+
+ testq $-16,%r10
+ movl 0(%rbp),%eax
+ movl 4(%rbp),%ebx
+ movl 8(%rbp),%ecx
+ movl 12(%rbp),%edx
+ jz .Lcbc_slow_enc_tail
+
+.p2align 2
+.Lcbc_slow_enc_loop:
+ xorl 0(%r8),%eax
+ xorl 4(%r8),%ebx
+ xorl 8(%r8),%ecx
+ xorl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ movq %r9,32(%rsp)
+ movq %r10,40(%rsp)
+
+ call _x86_64_AES_encrypt_compact
+
+ movq 24(%rsp),%r8
+ movq 32(%rsp),%r9
+ movq 40(%rsp),%r10
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ subq $16,%r10
+ testq $-16,%r10
+ jnz .Lcbc_slow_enc_loop
+ testq $15,%r10
+ jnz .Lcbc_slow_enc_tail
+ movq 56(%rsp),%rbp
+ movl %eax,0(%rbp)
+ movl %ebx,4(%rbp)
+ movl %ecx,8(%rbp)
+ movl %edx,12(%rbp)
+
+ jmp .Lcbc_exit
+
+.p2align 2
+.Lcbc_slow_enc_tail:
+ movq %rax,%r11
+ movq %rcx,%r12
+ movq %r10,%rcx
+ movq %r8,%rsi
+ movq %r9,%rdi
+.long 0x9066A4F3
+ movq $16,%rcx
+ subq %r10,%rcx
+ xorq %rax,%rax
+.long 0x9066AAF3
+ movq %r9,%r8
+ movq $16,%r10
+ movq %r11,%rax
+ movq %r12,%rcx
+ jmp .Lcbc_slow_enc_loop
+
+.p2align 4
+.LSLOW_DECRYPT:
+ shrq $3,%rax
+ addq %rax,%r14
+
+ movq 0(%rbp),%r11
+ movq 8(%rbp),%r12
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+
+.p2align 2
+.Lcbc_slow_dec_loop:
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ movq %r9,32(%rsp)
+ movq %r10,40(%rsp)
+
+ call _x86_64_AES_decrypt_compact
+
+ movq 24(%rsp),%r8
+ movq 32(%rsp),%r9
+ movq 40(%rsp),%r10
+ xorl 0+64(%rsp),%eax
+ xorl 4+64(%rsp),%ebx
+ xorl 8+64(%rsp),%ecx
+ xorl 12+64(%rsp),%edx
+
+ movq 0(%r8),%r11
+ movq 8(%r8),%r12
+ subq $16,%r10
+ jc .Lcbc_slow_dec_partial
+ jz .Lcbc_slow_dec_done
+
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ jmp .Lcbc_slow_dec_loop
+.Lcbc_slow_dec_done:
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ jmp .Lcbc_exit
+
+.p2align 2
+.Lcbc_slow_dec_partial:
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+
+ movl %eax,0+64(%rsp)
+ movl %ebx,4+64(%rsp)
+ movl %ecx,8+64(%rsp)
+ movl %edx,12+64(%rsp)
+
+ movq %r9,%rdi
+ leaq 64(%rsp),%rsi
+ leaq 16(%r10),%rcx
+.long 0x9066A4F3
+ jmp .Lcbc_exit
+
+.p2align 4
+.Lcbc_exit:
+ movq 16(%rsp),%rsi
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lcbc_popfq:
+ popfq
+.Lcbc_epilogue:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_AES_cbc_encrypt:
+.p2align 6
+.LAES_Te:
+.long 0xa56363c6,0xa56363c6
+.long 0x847c7cf8,0x847c7cf8
+.long 0x997777ee,0x997777ee
+.long 0x8d7b7bf6,0x8d7b7bf6
+.long 0x0df2f2ff,0x0df2f2ff
+.long 0xbd6b6bd6,0xbd6b6bd6
+.long 0xb16f6fde,0xb16f6fde
+.long 0x54c5c591,0x54c5c591
+.long 0x50303060,0x50303060
+.long 0x03010102,0x03010102
+.long 0xa96767ce,0xa96767ce
+.long 0x7d2b2b56,0x7d2b2b56
+.long 0x19fefee7,0x19fefee7
+.long 0x62d7d7b5,0x62d7d7b5
+.long 0xe6abab4d,0xe6abab4d
+.long 0x9a7676ec,0x9a7676ec
+.long 0x45caca8f,0x45caca8f
+.long 0x9d82821f,0x9d82821f
+.long 0x40c9c989,0x40c9c989
+.long 0x877d7dfa,0x877d7dfa
+.long 0x15fafaef,0x15fafaef
+.long 0xeb5959b2,0xeb5959b2
+.long 0xc947478e,0xc947478e
+.long 0x0bf0f0fb,0x0bf0f0fb
+.long 0xecadad41,0xecadad41
+.long 0x67d4d4b3,0x67d4d4b3
+.long 0xfda2a25f,0xfda2a25f
+.long 0xeaafaf45,0xeaafaf45
+.long 0xbf9c9c23,0xbf9c9c23
+.long 0xf7a4a453,0xf7a4a453
+.long 0x967272e4,0x967272e4
+.long 0x5bc0c09b,0x5bc0c09b
+.long 0xc2b7b775,0xc2b7b775
+.long 0x1cfdfde1,0x1cfdfde1
+.long 0xae93933d,0xae93933d
+.long 0x6a26264c,0x6a26264c
+.long 0x5a36366c,0x5a36366c
+.long 0x413f3f7e,0x413f3f7e
+.long 0x02f7f7f5,0x02f7f7f5
+.long 0x4fcccc83,0x4fcccc83
+.long 0x5c343468,0x5c343468
+.long 0xf4a5a551,0xf4a5a551
+.long 0x34e5e5d1,0x34e5e5d1
+.long 0x08f1f1f9,0x08f1f1f9
+.long 0x937171e2,0x937171e2
+.long 0x73d8d8ab,0x73d8d8ab
+.long 0x53313162,0x53313162
+.long 0x3f15152a,0x3f15152a
+.long 0x0c040408,0x0c040408
+.long 0x52c7c795,0x52c7c795
+.long 0x65232346,0x65232346
+.long 0x5ec3c39d,0x5ec3c39d
+.long 0x28181830,0x28181830
+.long 0xa1969637,0xa1969637
+.long 0x0f05050a,0x0f05050a
+.long 0xb59a9a2f,0xb59a9a2f
+.long 0x0907070e,0x0907070e
+.long 0x36121224,0x36121224
+.long 0x9b80801b,0x9b80801b
+.long 0x3de2e2df,0x3de2e2df
+.long 0x26ebebcd,0x26ebebcd
+.long 0x6927274e,0x6927274e
+.long 0xcdb2b27f,0xcdb2b27f
+.long 0x9f7575ea,0x9f7575ea
+.long 0x1b090912,0x1b090912
+.long 0x9e83831d,0x9e83831d
+.long 0x742c2c58,0x742c2c58
+.long 0x2e1a1a34,0x2e1a1a34
+.long 0x2d1b1b36,0x2d1b1b36
+.long 0xb26e6edc,0xb26e6edc
+.long 0xee5a5ab4,0xee5a5ab4
+.long 0xfba0a05b,0xfba0a05b
+.long 0xf65252a4,0xf65252a4
+.long 0x4d3b3b76,0x4d3b3b76
+.long 0x61d6d6b7,0x61d6d6b7
+.long 0xceb3b37d,0xceb3b37d
+.long 0x7b292952,0x7b292952
+.long 0x3ee3e3dd,0x3ee3e3dd
+.long 0x712f2f5e,0x712f2f5e
+.long 0x97848413,0x97848413
+.long 0xf55353a6,0xf55353a6
+.long 0x68d1d1b9,0x68d1d1b9
+.long 0x00000000,0x00000000
+.long 0x2cededc1,0x2cededc1
+.long 0x60202040,0x60202040
+.long 0x1ffcfce3,0x1ffcfce3
+.long 0xc8b1b179,0xc8b1b179
+.long 0xed5b5bb6,0xed5b5bb6
+.long 0xbe6a6ad4,0xbe6a6ad4
+.long 0x46cbcb8d,0x46cbcb8d
+.long 0xd9bebe67,0xd9bebe67
+.long 0x4b393972,0x4b393972
+.long 0xde4a4a94,0xde4a4a94
+.long 0xd44c4c98,0xd44c4c98
+.long 0xe85858b0,0xe85858b0
+.long 0x4acfcf85,0x4acfcf85
+.long 0x6bd0d0bb,0x6bd0d0bb
+.long 0x2aefefc5,0x2aefefc5
+.long 0xe5aaaa4f,0xe5aaaa4f
+.long 0x16fbfbed,0x16fbfbed
+.long 0xc5434386,0xc5434386
+.long 0xd74d4d9a,0xd74d4d9a
+.long 0x55333366,0x55333366
+.long 0x94858511,0x94858511
+.long 0xcf45458a,0xcf45458a
+.long 0x10f9f9e9,0x10f9f9e9
+.long 0x06020204,0x06020204
+.long 0x817f7ffe,0x817f7ffe
+.long 0xf05050a0,0xf05050a0
+.long 0x443c3c78,0x443c3c78
+.long 0xba9f9f25,0xba9f9f25
+.long 0xe3a8a84b,0xe3a8a84b
+.long 0xf35151a2,0xf35151a2
+.long 0xfea3a35d,0xfea3a35d
+.long 0xc0404080,0xc0404080
+.long 0x8a8f8f05,0x8a8f8f05
+.long 0xad92923f,0xad92923f
+.long 0xbc9d9d21,0xbc9d9d21
+.long 0x48383870,0x48383870
+.long 0x04f5f5f1,0x04f5f5f1
+.long 0xdfbcbc63,0xdfbcbc63
+.long 0xc1b6b677,0xc1b6b677
+.long 0x75dadaaf,0x75dadaaf
+.long 0x63212142,0x63212142
+.long 0x30101020,0x30101020
+.long 0x1affffe5,0x1affffe5
+.long 0x0ef3f3fd,0x0ef3f3fd
+.long 0x6dd2d2bf,0x6dd2d2bf
+.long 0x4ccdcd81,0x4ccdcd81
+.long 0x140c0c18,0x140c0c18
+.long 0x35131326,0x35131326
+.long 0x2fececc3,0x2fececc3
+.long 0xe15f5fbe,0xe15f5fbe
+.long 0xa2979735,0xa2979735
+.long 0xcc444488,0xcc444488
+.long 0x3917172e,0x3917172e
+.long 0x57c4c493,0x57c4c493
+.long 0xf2a7a755,0xf2a7a755
+.long 0x827e7efc,0x827e7efc
+.long 0x473d3d7a,0x473d3d7a
+.long 0xac6464c8,0xac6464c8
+.long 0xe75d5dba,0xe75d5dba
+.long 0x2b191932,0x2b191932
+.long 0x957373e6,0x957373e6
+.long 0xa06060c0,0xa06060c0
+.long 0x98818119,0x98818119
+.long 0xd14f4f9e,0xd14f4f9e
+.long 0x7fdcdca3,0x7fdcdca3
+.long 0x66222244,0x66222244
+.long 0x7e2a2a54,0x7e2a2a54
+.long 0xab90903b,0xab90903b
+.long 0x8388880b,0x8388880b
+.long 0xca46468c,0xca46468c
+.long 0x29eeeec7,0x29eeeec7
+.long 0xd3b8b86b,0xd3b8b86b
+.long 0x3c141428,0x3c141428
+.long 0x79dedea7,0x79dedea7
+.long 0xe25e5ebc,0xe25e5ebc
+.long 0x1d0b0b16,0x1d0b0b16
+.long 0x76dbdbad,0x76dbdbad
+.long 0x3be0e0db,0x3be0e0db
+.long 0x56323264,0x56323264
+.long 0x4e3a3a74,0x4e3a3a74
+.long 0x1e0a0a14,0x1e0a0a14
+.long 0xdb494992,0xdb494992
+.long 0x0a06060c,0x0a06060c
+.long 0x6c242448,0x6c242448
+.long 0xe45c5cb8,0xe45c5cb8
+.long 0x5dc2c29f,0x5dc2c29f
+.long 0x6ed3d3bd,0x6ed3d3bd
+.long 0xefacac43,0xefacac43
+.long 0xa66262c4,0xa66262c4
+.long 0xa8919139,0xa8919139
+.long 0xa4959531,0xa4959531
+.long 0x37e4e4d3,0x37e4e4d3
+.long 0x8b7979f2,0x8b7979f2
+.long 0x32e7e7d5,0x32e7e7d5
+.long 0x43c8c88b,0x43c8c88b
+.long 0x5937376e,0x5937376e
+.long 0xb76d6dda,0xb76d6dda
+.long 0x8c8d8d01,0x8c8d8d01
+.long 0x64d5d5b1,0x64d5d5b1
+.long 0xd24e4e9c,0xd24e4e9c
+.long 0xe0a9a949,0xe0a9a949
+.long 0xb46c6cd8,0xb46c6cd8
+.long 0xfa5656ac,0xfa5656ac
+.long 0x07f4f4f3,0x07f4f4f3
+.long 0x25eaeacf,0x25eaeacf
+.long 0xaf6565ca,0xaf6565ca
+.long 0x8e7a7af4,0x8e7a7af4
+.long 0xe9aeae47,0xe9aeae47
+.long 0x18080810,0x18080810
+.long 0xd5baba6f,0xd5baba6f
+.long 0x887878f0,0x887878f0
+.long 0x6f25254a,0x6f25254a
+.long 0x722e2e5c,0x722e2e5c
+.long 0x241c1c38,0x241c1c38
+.long 0xf1a6a657,0xf1a6a657
+.long 0xc7b4b473,0xc7b4b473
+.long 0x51c6c697,0x51c6c697
+.long 0x23e8e8cb,0x23e8e8cb
+.long 0x7cdddda1,0x7cdddda1
+.long 0x9c7474e8,0x9c7474e8
+.long 0x211f1f3e,0x211f1f3e
+.long 0xdd4b4b96,0xdd4b4b96
+.long 0xdcbdbd61,0xdcbdbd61
+.long 0x868b8b0d,0x868b8b0d
+.long 0x858a8a0f,0x858a8a0f
+.long 0x907070e0,0x907070e0
+.long 0x423e3e7c,0x423e3e7c
+.long 0xc4b5b571,0xc4b5b571
+.long 0xaa6666cc,0xaa6666cc
+.long 0xd8484890,0xd8484890
+.long 0x05030306,0x05030306
+.long 0x01f6f6f7,0x01f6f6f7
+.long 0x120e0e1c,0x120e0e1c
+.long 0xa36161c2,0xa36161c2
+.long 0x5f35356a,0x5f35356a
+.long 0xf95757ae,0xf95757ae
+.long 0xd0b9b969,0xd0b9b969
+.long 0x91868617,0x91868617
+.long 0x58c1c199,0x58c1c199
+.long 0x271d1d3a,0x271d1d3a
+.long 0xb99e9e27,0xb99e9e27
+.long 0x38e1e1d9,0x38e1e1d9
+.long 0x13f8f8eb,0x13f8f8eb
+.long 0xb398982b,0xb398982b
+.long 0x33111122,0x33111122
+.long 0xbb6969d2,0xbb6969d2
+.long 0x70d9d9a9,0x70d9d9a9
+.long 0x898e8e07,0x898e8e07
+.long 0xa7949433,0xa7949433
+.long 0xb69b9b2d,0xb69b9b2d
+.long 0x221e1e3c,0x221e1e3c
+.long 0x92878715,0x92878715
+.long 0x20e9e9c9,0x20e9e9c9
+.long 0x49cece87,0x49cece87
+.long 0xff5555aa,0xff5555aa
+.long 0x78282850,0x78282850
+.long 0x7adfdfa5,0x7adfdfa5
+.long 0x8f8c8c03,0x8f8c8c03
+.long 0xf8a1a159,0xf8a1a159
+.long 0x80898909,0x80898909
+.long 0x170d0d1a,0x170d0d1a
+.long 0xdabfbf65,0xdabfbf65
+.long 0x31e6e6d7,0x31e6e6d7
+.long 0xc6424284,0xc6424284
+.long 0xb86868d0,0xb86868d0
+.long 0xc3414182,0xc3414182
+.long 0xb0999929,0xb0999929
+.long 0x772d2d5a,0x772d2d5a
+.long 0x110f0f1e,0x110f0f1e
+.long 0xcbb0b07b,0xcbb0b07b
+.long 0xfc5454a8,0xfc5454a8
+.long 0xd6bbbb6d,0xd6bbbb6d
+.long 0x3a16162c,0x3a16162c
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.long 0x00000001, 0x00000002, 0x00000004, 0x00000008
+.long 0x00000010, 0x00000020, 0x00000040, 0x00000080
+.long 0x0000001b, 0x00000036, 0x80808080, 0x80808080
+.long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b
+.p2align 6
+.LAES_Td:
+.long 0x50a7f451,0x50a7f451
+.long 0x5365417e,0x5365417e
+.long 0xc3a4171a,0xc3a4171a
+.long 0x965e273a,0x965e273a
+.long 0xcb6bab3b,0xcb6bab3b
+.long 0xf1459d1f,0xf1459d1f
+.long 0xab58faac,0xab58faac
+.long 0x9303e34b,0x9303e34b
+.long 0x55fa3020,0x55fa3020
+.long 0xf66d76ad,0xf66d76ad
+.long 0x9176cc88,0x9176cc88
+.long 0x254c02f5,0x254c02f5
+.long 0xfcd7e54f,0xfcd7e54f
+.long 0xd7cb2ac5,0xd7cb2ac5
+.long 0x80443526,0x80443526
+.long 0x8fa362b5,0x8fa362b5
+.long 0x495ab1de,0x495ab1de
+.long 0x671bba25,0x671bba25
+.long 0x980eea45,0x980eea45
+.long 0xe1c0fe5d,0xe1c0fe5d
+.long 0x02752fc3,0x02752fc3
+.long 0x12f04c81,0x12f04c81
+.long 0xa397468d,0xa397468d
+.long 0xc6f9d36b,0xc6f9d36b
+.long 0xe75f8f03,0xe75f8f03
+.long 0x959c9215,0x959c9215
+.long 0xeb7a6dbf,0xeb7a6dbf
+.long 0xda595295,0xda595295
+.long 0x2d83bed4,0x2d83bed4
+.long 0xd3217458,0xd3217458
+.long 0x2969e049,0x2969e049
+.long 0x44c8c98e,0x44c8c98e
+.long 0x6a89c275,0x6a89c275
+.long 0x78798ef4,0x78798ef4
+.long 0x6b3e5899,0x6b3e5899
+.long 0xdd71b927,0xdd71b927
+.long 0xb64fe1be,0xb64fe1be
+.long 0x17ad88f0,0x17ad88f0
+.long 0x66ac20c9,0x66ac20c9
+.long 0xb43ace7d,0xb43ace7d
+.long 0x184adf63,0x184adf63
+.long 0x82311ae5,0x82311ae5
+.long 0x60335197,0x60335197
+.long 0x457f5362,0x457f5362
+.long 0xe07764b1,0xe07764b1
+.long 0x84ae6bbb,0x84ae6bbb
+.long 0x1ca081fe,0x1ca081fe
+.long 0x942b08f9,0x942b08f9
+.long 0x58684870,0x58684870
+.long 0x19fd458f,0x19fd458f
+.long 0x876cde94,0x876cde94
+.long 0xb7f87b52,0xb7f87b52
+.long 0x23d373ab,0x23d373ab
+.long 0xe2024b72,0xe2024b72
+.long 0x578f1fe3,0x578f1fe3
+.long 0x2aab5566,0x2aab5566
+.long 0x0728ebb2,0x0728ebb2
+.long 0x03c2b52f,0x03c2b52f
+.long 0x9a7bc586,0x9a7bc586
+.long 0xa50837d3,0xa50837d3
+.long 0xf2872830,0xf2872830
+.long 0xb2a5bf23,0xb2a5bf23
+.long 0xba6a0302,0xba6a0302
+.long 0x5c8216ed,0x5c8216ed
+.long 0x2b1ccf8a,0x2b1ccf8a
+.long 0x92b479a7,0x92b479a7
+.long 0xf0f207f3,0xf0f207f3
+.long 0xa1e2694e,0xa1e2694e
+.long 0xcdf4da65,0xcdf4da65
+.long 0xd5be0506,0xd5be0506
+.long 0x1f6234d1,0x1f6234d1
+.long 0x8afea6c4,0x8afea6c4
+.long 0x9d532e34,0x9d532e34
+.long 0xa055f3a2,0xa055f3a2
+.long 0x32e18a05,0x32e18a05
+.long 0x75ebf6a4,0x75ebf6a4
+.long 0x39ec830b,0x39ec830b
+.long 0xaaef6040,0xaaef6040
+.long 0x069f715e,0x069f715e
+.long 0x51106ebd,0x51106ebd
+.long 0xf98a213e,0xf98a213e
+.long 0x3d06dd96,0x3d06dd96
+.long 0xae053edd,0xae053edd
+.long 0x46bde64d,0x46bde64d
+.long 0xb58d5491,0xb58d5491
+.long 0x055dc471,0x055dc471
+.long 0x6fd40604,0x6fd40604
+.long 0xff155060,0xff155060
+.long 0x24fb9819,0x24fb9819
+.long 0x97e9bdd6,0x97e9bdd6
+.long 0xcc434089,0xcc434089
+.long 0x779ed967,0x779ed967
+.long 0xbd42e8b0,0xbd42e8b0
+.long 0x888b8907,0x888b8907
+.long 0x385b19e7,0x385b19e7
+.long 0xdbeec879,0xdbeec879
+.long 0x470a7ca1,0x470a7ca1
+.long 0xe90f427c,0xe90f427c
+.long 0xc91e84f8,0xc91e84f8
+.long 0x00000000,0x00000000
+.long 0x83868009,0x83868009
+.long 0x48ed2b32,0x48ed2b32
+.long 0xac70111e,0xac70111e
+.long 0x4e725a6c,0x4e725a6c
+.long 0xfbff0efd,0xfbff0efd
+.long 0x5638850f,0x5638850f
+.long 0x1ed5ae3d,0x1ed5ae3d
+.long 0x27392d36,0x27392d36
+.long 0x64d90f0a,0x64d90f0a
+.long 0x21a65c68,0x21a65c68
+.long 0xd1545b9b,0xd1545b9b
+.long 0x3a2e3624,0x3a2e3624
+.long 0xb1670a0c,0xb1670a0c
+.long 0x0fe75793,0x0fe75793
+.long 0xd296eeb4,0xd296eeb4
+.long 0x9e919b1b,0x9e919b1b
+.long 0x4fc5c080,0x4fc5c080
+.long 0xa220dc61,0xa220dc61
+.long 0x694b775a,0x694b775a
+.long 0x161a121c,0x161a121c
+.long 0x0aba93e2,0x0aba93e2
+.long 0xe52aa0c0,0xe52aa0c0
+.long 0x43e0223c,0x43e0223c
+.long 0x1d171b12,0x1d171b12
+.long 0x0b0d090e,0x0b0d090e
+.long 0xadc78bf2,0xadc78bf2
+.long 0xb9a8b62d,0xb9a8b62d
+.long 0xc8a91e14,0xc8a91e14
+.long 0x8519f157,0x8519f157
+.long 0x4c0775af,0x4c0775af
+.long 0xbbdd99ee,0xbbdd99ee
+.long 0xfd607fa3,0xfd607fa3
+.long 0x9f2601f7,0x9f2601f7
+.long 0xbcf5725c,0xbcf5725c
+.long 0xc53b6644,0xc53b6644
+.long 0x347efb5b,0x347efb5b
+.long 0x7629438b,0x7629438b
+.long 0xdcc623cb,0xdcc623cb
+.long 0x68fcedb6,0x68fcedb6
+.long 0x63f1e4b8,0x63f1e4b8
+.long 0xcadc31d7,0xcadc31d7
+.long 0x10856342,0x10856342
+.long 0x40229713,0x40229713
+.long 0x2011c684,0x2011c684
+.long 0x7d244a85,0x7d244a85
+.long 0xf83dbbd2,0xf83dbbd2
+.long 0x1132f9ae,0x1132f9ae
+.long 0x6da129c7,0x6da129c7
+.long 0x4b2f9e1d,0x4b2f9e1d
+.long 0xf330b2dc,0xf330b2dc
+.long 0xec52860d,0xec52860d
+.long 0xd0e3c177,0xd0e3c177
+.long 0x6c16b32b,0x6c16b32b
+.long 0x99b970a9,0x99b970a9
+.long 0xfa489411,0xfa489411
+.long 0x2264e947,0x2264e947
+.long 0xc48cfca8,0xc48cfca8
+.long 0x1a3ff0a0,0x1a3ff0a0
+.long 0xd82c7d56,0xd82c7d56
+.long 0xef903322,0xef903322
+.long 0xc74e4987,0xc74e4987
+.long 0xc1d138d9,0xc1d138d9
+.long 0xfea2ca8c,0xfea2ca8c
+.long 0x360bd498,0x360bd498
+.long 0xcf81f5a6,0xcf81f5a6
+.long 0x28de7aa5,0x28de7aa5
+.long 0x268eb7da,0x268eb7da
+.long 0xa4bfad3f,0xa4bfad3f
+.long 0xe49d3a2c,0xe49d3a2c
+.long 0x0d927850,0x0d927850
+.long 0x9bcc5f6a,0x9bcc5f6a
+.long 0x62467e54,0x62467e54
+.long 0xc2138df6,0xc2138df6
+.long 0xe8b8d890,0xe8b8d890
+.long 0x5ef7392e,0x5ef7392e
+.long 0xf5afc382,0xf5afc382
+.long 0xbe805d9f,0xbe805d9f
+.long 0x7c93d069,0x7c93d069
+.long 0xa92dd56f,0xa92dd56f
+.long 0xb31225cf,0xb31225cf
+.long 0x3b99acc8,0x3b99acc8
+.long 0xa77d1810,0xa77d1810
+.long 0x6e639ce8,0x6e639ce8
+.long 0x7bbb3bdb,0x7bbb3bdb
+.long 0x097826cd,0x097826cd
+.long 0xf418596e,0xf418596e
+.long 0x01b79aec,0x01b79aec
+.long 0xa89a4f83,0xa89a4f83
+.long 0x656e95e6,0x656e95e6
+.long 0x7ee6ffaa,0x7ee6ffaa
+.long 0x08cfbc21,0x08cfbc21
+.long 0xe6e815ef,0xe6e815ef
+.long 0xd99be7ba,0xd99be7ba
+.long 0xce366f4a,0xce366f4a
+.long 0xd4099fea,0xd4099fea
+.long 0xd67cb029,0xd67cb029
+.long 0xafb2a431,0xafb2a431
+.long 0x31233f2a,0x31233f2a
+.long 0x3094a5c6,0x3094a5c6
+.long 0xc066a235,0xc066a235
+.long 0x37bc4e74,0x37bc4e74
+.long 0xa6ca82fc,0xa6ca82fc
+.long 0xb0d090e0,0xb0d090e0
+.long 0x15d8a733,0x15d8a733
+.long 0x4a9804f1,0x4a9804f1
+.long 0xf7daec41,0xf7daec41
+.long 0x0e50cd7f,0x0e50cd7f
+.long 0x2ff69117,0x2ff69117
+.long 0x8dd64d76,0x8dd64d76
+.long 0x4db0ef43,0x4db0ef43
+.long 0x544daacc,0x544daacc
+.long 0xdf0496e4,0xdf0496e4
+.long 0xe3b5d19e,0xe3b5d19e
+.long 0x1b886a4c,0x1b886a4c
+.long 0xb81f2cc1,0xb81f2cc1
+.long 0x7f516546,0x7f516546
+.long 0x04ea5e9d,0x04ea5e9d
+.long 0x5d358c01,0x5d358c01
+.long 0x737487fa,0x737487fa
+.long 0x2e410bfb,0x2e410bfb
+.long 0x5a1d67b3,0x5a1d67b3
+.long 0x52d2db92,0x52d2db92
+.long 0x335610e9,0x335610e9
+.long 0x1347d66d,0x1347d66d
+.long 0x8c61d79a,0x8c61d79a
+.long 0x7a0ca137,0x7a0ca137
+.long 0x8e14f859,0x8e14f859
+.long 0x893c13eb,0x893c13eb
+.long 0xee27a9ce,0xee27a9ce
+.long 0x35c961b7,0x35c961b7
+.long 0xede51ce1,0xede51ce1
+.long 0x3cb1477a,0x3cb1477a
+.long 0x59dfd29c,0x59dfd29c
+.long 0x3f73f255,0x3f73f255
+.long 0x79ce1418,0x79ce1418
+.long 0xbf37c773,0xbf37c773
+.long 0xeacdf753,0xeacdf753
+.long 0x5baafd5f,0x5baafd5f
+.long 0x146f3ddf,0x146f3ddf
+.long 0x86db4478,0x86db4478
+.long 0x81f3afca,0x81f3afca
+.long 0x3ec468b9,0x3ec468b9
+.long 0x2c342438,0x2c342438
+.long 0x5f40a3c2,0x5f40a3c2
+.long 0x72c31d16,0x72c31d16
+.long 0x0c25e2bc,0x0c25e2bc
+.long 0x8b493c28,0x8b493c28
+.long 0x41950dff,0x41950dff
+.long 0x7101a839,0x7101a839
+.long 0xdeb30c08,0xdeb30c08
+.long 0x9ce4b4d8,0x9ce4b4d8
+.long 0x90c15664,0x90c15664
+.long 0x6184cb7b,0x6184cb7b
+.long 0x70b632d5,0x70b632d5
+.long 0x745c6c48,0x745c6c48
+.long 0x4257b8d0,0x4257b8d0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align 6
+
+.def block_se_handler; .scl 3; .type 32; .endef
+.p2align 4
+block_se_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+
+ movq 8(%r9),%rsi
+ movq 56(%r9),%r11
+
+ movl 0(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jb .Lin_block_prologue
+
+ movq 152(%r8),%rax
+
+ movl 4(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jae .Lin_block_prologue
+
+ movq 24(%rax),%rax
+ leaq 48(%rax),%rax
+
+ movq -8(%rax),%rbx
+ movq -16(%rax),%rbp
+ movq -24(%rax),%r12
+ movq -32(%rax),%r13
+ movq -40(%rax),%r14
+ movq -48(%rax),%r15
+ movq %rbx,144(%r8)
+ movq %rbp,160(%r8)
+ movq %r12,216(%r8)
+ movq %r13,224(%r8)
+ movq %r14,232(%r8)
+ movq %r15,240(%r8)
+
+.Lin_block_prologue:
+ movq 8(%rax),%rdi
+ movq 16(%rax),%rsi
+ movq %rax,152(%r8)
+ movq %rsi,168(%r8)
+ movq %rdi,176(%r8)
+
+ jmp .Lcommon_seh_exit
+
+
+.def key_se_handler; .scl 3; .type 32; .endef
+.p2align 4
+key_se_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+
+ movq 8(%r9),%rsi
+ movq 56(%r9),%r11
+
+ movl 0(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jb .Lin_key_prologue
+
+ movq 152(%r8),%rax
+
+ movl 4(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jae .Lin_key_prologue
+
+ leaq 56(%rax),%rax
+
+ movq -8(%rax),%rbx
+ movq -16(%rax),%rbp
+ movq -24(%rax),%r12
+ movq -32(%rax),%r13
+ movq -40(%rax),%r14
+ movq -48(%rax),%r15
+ movq %rbx,144(%r8)
+ movq %rbp,160(%r8)
+ movq %r12,216(%r8)
+ movq %r13,224(%r8)
+ movq %r14,232(%r8)
+ movq %r15,240(%r8)
+
+.Lin_key_prologue:
+ movq 8(%rax),%rdi
+ movq 16(%rax),%rsi
+ movq %rax,152(%r8)
+ movq %rsi,168(%r8)
+ movq %rdi,176(%r8)
+
+ jmp .Lcommon_seh_exit
+
+
+.def cbc_se_handler; .scl 3; .type 32; .endef
+.p2align 4
+cbc_se_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+
+ leaq .Lcbc_prologue(%rip),%r10
+ cmpq %r10,%rbx
+ jb .Lin_cbc_prologue
+
+ leaq .Lcbc_fast_body(%rip),%r10
+ cmpq %r10,%rbx
+ jb .Lin_cbc_frame_setup
+
+ leaq .Lcbc_slow_prologue(%rip),%r10
+ cmpq %r10,%rbx
+ jb .Lin_cbc_body
+
+ leaq .Lcbc_slow_body(%rip),%r10
+ cmpq %r10,%rbx
+ jb .Lin_cbc_frame_setup
+
+.Lin_cbc_body:
+ movq 152(%r8),%rax
+
+ leaq .Lcbc_epilogue(%rip),%r10
+ cmpq %r10,%rbx
+ jae .Lin_cbc_prologue
+
+ leaq 8(%rax),%rax
+
+ leaq .Lcbc_popfq(%rip),%r10
+ cmpq %r10,%rbx
+ jae .Lin_cbc_prologue
+
+ movq 8(%rax),%rax
+ leaq 56(%rax),%rax
+
+.Lin_cbc_frame_setup:
+ movq -16(%rax),%rbx
+ movq -24(%rax),%rbp
+ movq -32(%rax),%r12
+ movq -40(%rax),%r13
+ movq -48(%rax),%r14
+ movq -56(%rax),%r15
+ movq %rbx,144(%r8)
+ movq %rbp,160(%r8)
+ movq %r12,216(%r8)
+ movq %r13,224(%r8)
+ movq %r14,232(%r8)
+ movq %r15,240(%r8)
+
+.Lin_cbc_prologue:
+ movq 8(%rax),%rdi
+ movq 16(%rax),%rsi
+ movq %rax,152(%r8)
+ movq %rsi,168(%r8)
+ movq %rdi,176(%r8)
+
+.Lcommon_seh_exit:
+
+ movq 40(%r9),%rdi
+ movq %r8,%rsi
+ movl $154,%ecx
+.long 0xa548f3fc
+
+ movq %r9,%rsi
+ xorq %rcx,%rcx
+ movq 8(%rsi),%rdx
+ movq 0(%rsi),%r8
+ movq 16(%rsi),%r9
+ movq 40(%rsi),%r10
+ leaq 56(%rsi),%r11
+ leaq 24(%rsi),%r12
+ movq %r10,32(%rsp)
+ movq %r11,40(%rsp)
+ movq %r12,48(%rsp)
+ movq %rcx,56(%rsp)
+ call *__imp_RtlVirtualUnwind(%rip)
+
+ movl $1,%eax
+ addq $64,%rsp
+ popfq
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ popq %rdi
+ popq %rsi
+ retq
+
+
+.section .pdata
+.p2align 2
+.rva .LSEH_begin_AES_encrypt
+.rva .LSEH_end_AES_encrypt
+.rva .LSEH_info_AES_encrypt
+
+.rva .LSEH_begin_AES_decrypt
+.rva .LSEH_end_AES_decrypt
+.rva .LSEH_info_AES_decrypt
+
+.rva .LSEH_begin_AES_set_encrypt_key
+.rva .LSEH_end_AES_set_encrypt_key
+.rva .LSEH_info_AES_set_encrypt_key
+
+.rva .LSEH_begin_AES_set_decrypt_key
+.rva .LSEH_end_AES_set_decrypt_key
+.rva .LSEH_info_AES_set_decrypt_key
+
+.rva .LSEH_begin_AES_cbc_encrypt
+.rva .LSEH_end_AES_cbc_encrypt
+.rva .LSEH_info_AES_cbc_encrypt
+
+.section .xdata
+.p2align 3
+.LSEH_info_AES_encrypt:
+.byte 9,0,0,0
+.rva block_se_handler
+.rva .Lenc_prologue,.Lenc_epilogue
+.LSEH_info_AES_decrypt:
+.byte 9,0,0,0
+.rva block_se_handler
+.rva .Ldec_prologue,.Ldec_epilogue
+.LSEH_info_AES_set_encrypt_key:
+.byte 9,0,0,0
+.rva key_se_handler
+.rva .Lenc_key_prologue,.Lenc_key_epilogue
+.LSEH_info_AES_set_decrypt_key:
+.byte 9,0,0,0
+.rva key_se_handler
+.rva .Ldec_key_prologue,.Ldec_key_epilogue
+.LSEH_info_AES_cbc_encrypt:
+.byte 9,0,0,0
+.rva cbc_se_handler
diff --git a/ext/libressl/crypto/aes/aes_cbc.c b/ext/libressl/crypto/aes/aes_cbc.c
new file mode 100644
index 0000000..5e76f6e
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes_cbc.c
@@ -0,0 +1,65 @@
+/* $OpenBSD: aes_cbc.c,v 1.12 2014/06/12 15:49:27 deraadt Exp $ */
+/* ====================================================================
+ * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+#include <openssl/aes.h>
+#include <openssl/modes.h>
+
+void
+AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const AES_KEY *key, unsigned char *ivec, const int enc)
+{
+ if (enc)
+ CRYPTO_cbc128_encrypt(in, out, len, key, ivec,
+ (block128_f)AES_encrypt);
+ else
+ CRYPTO_cbc128_decrypt(in, out, len, key, ivec,
+ (block128_f)AES_decrypt);
+}
diff --git a/ext/libressl/crypto/aes/aes_cfb.c b/ext/libressl/crypto/aes/aes_cfb.c
new file mode 100644
index 0000000..a6384f9
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes_cfb.c
@@ -0,0 +1,84 @@
+/* $OpenBSD: aes_cfb.c,v 1.8 2014/06/12 15:49:27 deraadt Exp $ */
+/* ====================================================================
+ * Copyright (c) 2002-2006 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+#include <openssl/aes.h>
+#include <openssl/modes.h>
+
+/* The input and output encrypted as though 128bit cfb mode is being
+ * used. The extra state information to record how much of the
+ * 128bit block we have used is contained in *num;
+ */
+
+void
+AES_cfb128_encrypt(const unsigned char *in, unsigned char *out, size_t length,
+ const AES_KEY *key, unsigned char *ivec, int *num, const int enc)
+{
+ CRYPTO_cfb128_encrypt(in, out, length, key, ivec, num, enc,
+ (block128_f)AES_encrypt);
+}
+
+/* N.B. This expects the input to be packed, MS bit first */
+void
+AES_cfb1_encrypt(const unsigned char *in, unsigned char *out, size_t length,
+ const AES_KEY *key, unsigned char *ivec, int *num, const int enc)
+{
+ CRYPTO_cfb128_1_encrypt(in, out, length, key, ivec, num, enc,
+ (block128_f)AES_encrypt);
+}
+
+void
+AES_cfb8_encrypt(const unsigned char *in, unsigned char *out, size_t length,
+ const AES_KEY *key, unsigned char *ivec, int *num, const int enc)
+{
+ CRYPTO_cfb128_8_encrypt(in, out, length, key, ivec, num, enc,
+ (block128_f)AES_encrypt);
+}
+
diff --git a/ext/libressl/crypto/aes/aes_core.c b/ext/libressl/crypto/aes/aes_core.c
new file mode 100644
index 0000000..1b8a24c
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes_core.c
@@ -0,0 +1,1374 @@
+/* $OpenBSD: aes_core.c,v 1.13 2015/11/05 21:59:13 miod Exp $ */
+/**
+ * rijndael-alg-fst.c
+ *
+ * @version 3.0 (December 2000)
+ *
+ * Optimised ANSI C code for the Rijndael cipher (now AES)
+ *
+ * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
+ * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
+ * @author Paulo Barreto <paulo.barreto@terra.com.br>
+ *
+ * This code is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* Note: rewritten a little bit to provide error control and an OpenSSL-
+ compatible API */
+
+#ifndef AES_DEBUG
+# ifndef NDEBUG
+# define NDEBUG
+# endif
+#endif
+
+#include <stdlib.h>
+#include <openssl/aes.h>
+#include "aes_locl.h"
+
+#ifndef AES_ASM
+/*
+Te0[x] = S [x].[02, 01, 01, 03];
+Te1[x] = S [x].[03, 02, 01, 01];
+Te2[x] = S [x].[01, 03, 02, 01];
+Te3[x] = S [x].[01, 01, 03, 02];
+
+Td0[x] = Si[x].[0e, 09, 0d, 0b];
+Td1[x] = Si[x].[0b, 0e, 09, 0d];
+Td2[x] = Si[x].[0d, 0b, 0e, 09];
+Td3[x] = Si[x].[09, 0d, 0b, 0e];
+Td4[x] = Si[x].[01];
+*/
+
+static const u32 Te0[256] = {
+ 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
+ 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
+ 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
+ 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
+ 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
+ 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
+ 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
+ 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
+ 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
+ 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
+ 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
+ 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
+ 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
+ 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
+ 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
+ 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
+ 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
+ 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
+ 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
+ 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
+ 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
+ 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
+ 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
+ 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
+ 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
+ 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
+ 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
+ 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
+ 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
+ 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
+ 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
+ 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
+ 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
+ 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
+ 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
+ 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
+ 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
+ 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
+ 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
+ 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
+ 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
+ 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
+ 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
+ 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
+ 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
+ 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
+ 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
+ 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
+ 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
+ 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
+ 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
+ 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
+ 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
+ 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
+ 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
+ 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
+ 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
+ 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
+ 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
+ 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
+ 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
+ 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
+ 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
+ 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
+};
+static const u32 Te1[256] = {
+ 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
+ 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
+ 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
+ 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
+ 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
+ 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
+ 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
+ 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
+ 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
+ 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
+ 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
+ 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
+ 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
+ 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
+ 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
+ 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
+ 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
+ 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
+ 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
+ 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
+ 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
+ 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
+ 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
+ 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
+ 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
+ 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
+ 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
+ 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
+ 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
+ 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
+ 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
+ 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
+ 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
+ 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
+ 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
+ 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
+ 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
+ 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
+ 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
+ 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
+ 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
+ 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
+ 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
+ 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
+ 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
+ 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
+ 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
+ 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
+ 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
+ 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
+ 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
+ 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
+ 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
+ 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
+ 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
+ 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
+ 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
+ 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
+ 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
+ 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
+ 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
+ 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
+ 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
+ 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
+};
+static const u32 Te2[256] = {
+ 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
+ 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
+ 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
+ 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
+ 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
+ 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
+ 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
+ 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
+ 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
+ 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
+ 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
+ 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
+ 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
+ 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
+ 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
+ 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
+ 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
+ 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
+ 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
+ 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
+ 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
+ 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
+ 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
+ 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
+ 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
+ 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
+ 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
+ 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
+ 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
+ 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
+ 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
+ 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
+ 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
+ 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
+ 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
+ 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
+ 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
+ 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
+ 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
+ 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
+ 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
+ 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
+ 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
+ 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
+ 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
+ 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
+ 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
+ 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
+ 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
+ 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
+ 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
+ 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
+ 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
+ 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
+ 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
+ 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
+ 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
+ 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
+ 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
+ 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
+ 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
+ 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
+ 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
+ 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
+};
+static const u32 Te3[256] = {
+ 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
+ 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
+ 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
+ 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
+ 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
+ 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
+ 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
+ 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
+ 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
+ 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
+ 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
+ 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
+ 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
+ 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
+ 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
+ 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
+ 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
+ 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
+ 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
+ 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
+ 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
+ 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
+ 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
+ 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
+ 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
+ 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
+ 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
+ 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
+ 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
+ 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
+ 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
+ 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
+ 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
+ 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
+ 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
+ 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
+ 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
+ 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
+ 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
+ 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
+ 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
+ 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
+ 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
+ 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
+ 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
+ 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
+ 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
+ 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
+ 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
+ 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
+ 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
+ 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
+ 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
+ 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
+ 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
+ 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
+ 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
+ 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
+ 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
+ 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
+ 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
+ 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
+ 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
+ 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
+};
+
+static const u32 Td0[256] = {
+ 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
+ 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
+ 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
+ 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
+ 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
+ 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
+ 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
+ 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
+ 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
+ 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
+ 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
+ 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
+ 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
+ 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
+ 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
+ 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
+ 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
+ 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
+ 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
+ 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
+ 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
+ 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
+ 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
+ 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
+ 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
+ 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
+ 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
+ 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
+ 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
+ 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
+ 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
+ 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
+ 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
+ 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
+ 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
+ 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
+ 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
+ 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
+ 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
+ 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
+ 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
+ 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
+ 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
+ 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
+ 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
+ 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
+ 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
+ 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
+ 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
+ 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
+ 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
+ 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
+ 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
+ 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
+ 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
+ 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
+ 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
+ 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
+ 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
+ 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
+ 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
+ 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
+ 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
+ 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
+};
+static const u32 Td1[256] = {
+ 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
+ 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
+ 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
+ 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
+ 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
+ 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
+ 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
+ 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
+ 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
+ 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
+ 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
+ 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
+ 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
+ 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
+ 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
+ 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
+ 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
+ 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
+ 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
+ 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
+ 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
+ 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
+ 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
+ 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
+ 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
+ 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
+ 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
+ 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
+ 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
+ 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
+ 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
+ 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
+ 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
+ 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
+ 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
+ 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
+ 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
+ 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
+ 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
+ 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
+ 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
+ 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
+ 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
+ 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
+ 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
+ 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
+ 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
+ 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
+ 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
+ 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
+ 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
+ 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
+ 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
+ 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
+ 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
+ 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
+ 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
+ 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
+ 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
+ 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
+ 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
+ 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
+ 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
+ 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
+};
+static const u32 Td2[256] = {
+ 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
+ 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
+ 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
+ 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
+ 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
+ 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
+ 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
+ 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
+ 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
+ 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
+ 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
+ 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
+ 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
+ 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
+ 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
+ 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
+ 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
+ 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
+ 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
+ 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
+ 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
+ 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
+ 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
+ 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
+ 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
+ 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
+ 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
+ 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
+ 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
+ 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
+ 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
+ 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
+ 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
+ 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
+ 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
+ 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
+ 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
+ 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
+ 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
+ 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
+ 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
+ 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
+ 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
+ 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
+ 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
+ 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
+ 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
+ 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
+ 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
+ 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
+ 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
+ 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
+ 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
+ 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
+ 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
+ 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
+ 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
+ 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
+ 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
+ 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
+ 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
+ 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
+ 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
+ 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
+};
+static const u32 Td3[256] = {
+ 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
+ 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
+ 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
+ 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
+ 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
+ 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
+ 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
+ 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
+ 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
+ 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
+ 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
+ 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
+ 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
+ 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
+ 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
+ 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
+ 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
+ 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
+ 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
+ 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
+ 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
+ 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
+ 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
+ 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
+ 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
+ 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
+ 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
+ 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
+ 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
+ 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
+ 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
+ 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
+ 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
+ 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
+ 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
+ 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
+ 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
+ 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
+ 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
+ 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
+ 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
+ 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
+ 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
+ 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
+ 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
+ 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
+ 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
+ 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
+ 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
+ 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
+ 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
+ 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
+ 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
+ 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
+ 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
+ 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
+ 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
+ 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
+ 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
+ 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
+ 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
+ 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
+ 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
+ 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
+};
+static const u8 Td4[256] = {
+ 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
+ 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
+ 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
+ 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
+ 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
+ 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
+ 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
+ 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
+ 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
+ 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
+ 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
+ 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
+ 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
+ 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
+ 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
+ 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
+ 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
+ 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
+ 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
+ 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
+ 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
+ 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
+ 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
+ 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
+ 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
+ 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
+ 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
+ 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
+ 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
+ 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
+ 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
+ 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU,
+};
+static const u32 rcon[] = {
+ 0x01000000, 0x02000000, 0x04000000, 0x08000000,
+ 0x10000000, 0x20000000, 0x40000000, 0x80000000,
+ 0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
+};
+
+/**
+ * Expand the cipher key into the encryption key schedule.
+ */
+int
+AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key)
+{
+ u32 *rk;
+ int i = 0;
+ u32 temp;
+
+ if (!userKey || !key)
+ return -1;
+ if (bits != 128 && bits != 192 && bits != 256)
+ return -2;
+
+ rk = key->rd_key;
+
+ if (bits == 128)
+ key->rounds = 10;
+ else if (bits == 192)
+ key->rounds = 12;
+ else
+ key->rounds = 14;
+
+ rk[0] = GETU32(userKey);
+ rk[1] = GETU32(userKey + 4);
+ rk[2] = GETU32(userKey + 8);
+ rk[3] = GETU32(userKey + 12);
+ if (bits == 128) {
+ while (1) {
+ temp = rk[3];
+ rk[4] = rk[0] ^
+ (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
+ (Te3[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (Te0[(temp) & 0xff] & 0x0000ff00) ^
+ (Te1[(temp >> 24)] & 0x000000ff) ^
+ rcon[i];
+ rk[5] = rk[1] ^ rk[4];
+ rk[6] = rk[2] ^ rk[5];
+ rk[7] = rk[3] ^ rk[6];
+ if (++i == 10) {
+ return 0;
+ }
+ rk += 4;
+ }
+ }
+ rk[4] = GETU32(userKey + 16);
+ rk[5] = GETU32(userKey + 20);
+ if (bits == 192) {
+ while (1) {
+ temp = rk[5];
+ rk[6] = rk[ 0] ^
+ (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
+ (Te3[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (Te0[(temp) & 0xff] & 0x0000ff00) ^
+ (Te1[(temp >> 24)] & 0x000000ff) ^
+ rcon[i];
+ rk[7] = rk[1] ^ rk[6];
+ rk[8] = rk[2] ^ rk[7];
+ rk[9] = rk[3] ^ rk[8];
+ if (++i == 8) {
+ return 0;
+ }
+ rk[10] = rk[4] ^ rk[9];
+ rk[11] = rk[5] ^ rk[10];
+ rk += 6;
+ }
+ }
+ rk[6] = GETU32(userKey + 24);
+ rk[7] = GETU32(userKey + 28);
+ if (bits == 256) {
+ while (1) {
+ temp = rk[7];
+ rk[8] = rk[0] ^
+ (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
+ (Te3[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (Te0[(temp) & 0xff] & 0x0000ff00) ^
+ (Te1[(temp >> 24)] & 0x000000ff) ^
+ rcon[i];
+ rk[9] = rk[1] ^ rk[8];
+ rk[10] = rk[2] ^ rk[9];
+ rk[11] = rk[3] ^ rk[10];
+ if (++i == 7) {
+ return 0;
+ }
+ temp = rk[11];
+ rk[12] = rk[4] ^
+ (Te2[(temp >> 24)] & 0xff000000) ^
+ (Te3[(temp >> 16) & 0xff] & 0x00ff0000) ^
+ (Te0[(temp >> 8) & 0xff] & 0x0000ff00) ^
+ (Te1[(temp) & 0xff] & 0x000000ff);
+ rk[13] = rk[5] ^ rk[12];
+ rk[14] = rk[6] ^ rk[13];
+ rk[15] = rk[7] ^ rk[14];
+
+ rk += 8;
+ }
+ }
+ return 0;
+}
+
+/**
+ * Expand the cipher key into the decryption key schedule.
+ */
+int
+AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key)
+{
+ u32 *rk;
+ int i, j, status;
+ u32 temp;
+
+ /* first, start with an encryption schedule */
+ status = AES_set_encrypt_key(userKey, bits, key);
+ if (status < 0)
+ return status;
+
+ rk = key->rd_key;
+
+ /* invert the order of the round keys: */
+ for (i = 0, j = 4 * (key->rounds); i < j; i += 4, j -= 4) {
+ temp = rk[i];
+ rk[i] = rk[j];
+ rk[j] = temp;
+ temp = rk[i + 1];
+ rk[i + 1] = rk[j + 1];
+ rk[j + 1] = temp;
+ temp = rk[i + 2];
+ rk[i + 2] = rk[j + 2];
+ rk[j + 2] = temp;
+ temp = rk[i + 3];
+ rk[i + 3] = rk[j + 3];
+ rk[j + 3] = temp;
+ }
+ /* apply the inverse MixColumn transform to all round keys but the first and the last: */
+ for (i = 1; i < (key->rounds); i++) {
+ rk += 4;
+ rk[0] =
+ Td0[Te1[(rk[0] >> 24)] & 0xff] ^
+ Td1[Te1[(rk[0] >> 16) & 0xff] & 0xff] ^
+ Td2[Te1[(rk[0] >> 8) & 0xff] & 0xff] ^
+ Td3[Te1[(rk[0]) & 0xff] & 0xff];
+ rk[1] =
+ Td0[Te1[(rk[1] >> 24)] & 0xff] ^
+ Td1[Te1[(rk[1] >> 16) & 0xff] & 0xff] ^
+ Td2[Te1[(rk[1] >> 8) & 0xff] & 0xff] ^
+ Td3[Te1[(rk[1]) & 0xff] & 0xff];
+ rk[2] =
+ Td0[Te1[(rk[2] >> 24)] & 0xff] ^
+ Td1[Te1[(rk[2] >> 16) & 0xff] & 0xff] ^
+ Td2[Te1[(rk[2] >> 8) & 0xff] & 0xff] ^
+ Td3[Te1[(rk[2]) & 0xff] & 0xff];
+ rk[3] =
+ Td0[Te1[(rk[3] >> 24)] & 0xff] ^
+ Td1[Te1[(rk[3] >> 16) & 0xff] & 0xff] ^
+ Td2[Te1[(rk[3] >> 8) & 0xff] & 0xff] ^
+ Td3[Te1[(rk[3]) & 0xff] & 0xff];
+ }
+ return 0;
+}
+
+/*
+ * Encrypt a single block
+ * in and out can overlap
+ */
+void
+AES_encrypt(const unsigned char *in, unsigned char *out, const AES_KEY *key)
+{
+ const u32 *rk;
+ u32 s0, s1, s2, s3, t0, t1, t2, t3;
+#ifndef FULL_UNROLL
+ int r;
+#endif /* ?FULL_UNROLL */
+
+ rk = key->rd_key;
+
+ /*
+ * map byte array block to cipher state
+ * and add initial round key:
+ */
+ s0 = GETU32(in ) ^ rk[0];
+ s1 = GETU32(in + 4) ^ rk[1];
+ s2 = GETU32(in + 8) ^ rk[2];
+ s3 = GETU32(in + 12) ^ rk[3];
+#ifdef FULL_UNROLL
+ /* round 1: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
+ /* round 2: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
+ /* round 3: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
+ /* round 4: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
+ /* round 5: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
+ /* round 6: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
+ /* round 7: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
+ /* round 8: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
+ /* round 9: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
+ if (key->rounds > 10) {
+ /* round 10: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
+ /* round 11: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
+ if (key->rounds > 12) {
+ /* round 12: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
+ /* round 13: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
+ }
+ }
+ rk += key->rounds << 2;
+#else /* !FULL_UNROLL */
+ /*
+ * Nr - 1 full rounds:
+ */
+ r = key->rounds >> 1;
+ for (;;) {
+ t0 =
+ Te0[(s0 >> 24)] ^
+ Te1[(s1 >> 16) & 0xff] ^
+ Te2[(s2 >> 8) & 0xff] ^
+ Te3[(s3) & 0xff] ^
+ rk[4];
+ t1 =
+ Te0[(s1 >> 24)] ^
+ Te1[(s2 >> 16) & 0xff] ^
+ Te2[(s3 >> 8) & 0xff] ^
+ Te3[(s0) & 0xff] ^
+ rk[5];
+ t2 =
+ Te0[(s2 >> 24)] ^
+ Te1[(s3 >> 16) & 0xff] ^
+ Te2[(s0 >> 8) & 0xff] ^
+ Te3[(s1) & 0xff] ^
+ rk[6];
+ t3 =
+ Te0[(s3 >> 24)] ^
+ Te1[(s0 >> 16) & 0xff] ^
+ Te2[(s1 >> 8) & 0xff] ^
+ Te3[(s2) & 0xff] ^
+ rk[7];
+
+ rk += 8;
+ if (--r == 0) {
+ break;
+ }
+
+ s0 =
+ Te0[(t0 >> 24)] ^
+ Te1[(t1 >> 16) & 0xff] ^
+ Te2[(t2 >> 8) & 0xff] ^
+ Te3[(t3) & 0xff] ^
+ rk[0];
+ s1 =
+ Te0[(t1 >> 24)] ^
+ Te1[(t2 >> 16) & 0xff] ^
+ Te2[(t3 >> 8) & 0xff] ^
+ Te3[(t0) & 0xff] ^
+ rk[1];
+ s2 =
+ Te0[(t2 >> 24)] ^
+ Te1[(t3 >> 16) & 0xff] ^
+ Te2[(t0 >> 8) & 0xff] ^
+ Te3[(t1) & 0xff] ^
+ rk[2];
+ s3 =
+ Te0[(t3 >> 24)] ^
+ Te1[(t0 >> 16) & 0xff] ^
+ Te2[(t1 >> 8) & 0xff] ^
+ Te3[(t2) & 0xff] ^
+ rk[3];
+ }
+#endif /* ?FULL_UNROLL */
+ /*
+ * apply last round and
+ * map cipher state to byte array block:
+ */
+ s0 =
+ (Te2[(t0 >> 24)] & 0xff000000) ^
+ (Te3[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te0[(t2 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te1[(t3) & 0xff] & 0x000000ff) ^
+ rk[0];
+ PUTU32(out, s0);
+ s1 =
+ (Te2[(t1 >> 24)] & 0xff000000) ^
+ (Te3[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te0[(t3 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te1[(t0) & 0xff] & 0x000000ff) ^
+ rk[1];
+ PUTU32(out + 4, s1);
+ s2 =
+ (Te2[(t2 >> 24)] & 0xff000000) ^
+ (Te3[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te0[(t0 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te1[(t1) & 0xff] & 0x000000ff) ^
+ rk[2];
+ PUTU32(out + 8, s2);
+ s3 =
+ (Te2[(t3 >> 24)] & 0xff000000) ^
+ (Te3[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te0[(t1 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te1[(t2) & 0xff] & 0x000000ff) ^
+ rk[3];
+ PUTU32(out + 12, s3);
+}
+
+/*
+ * Decrypt a single block
+ * in and out can overlap
+ */
+void
+AES_decrypt(const unsigned char *in, unsigned char *out, const AES_KEY *key)
+{
+ const u32 *rk;
+ u32 s0, s1, s2, s3, t0, t1, t2, t3;
+#ifndef FULL_UNROLL
+ int r;
+#endif /* ?FULL_UNROLL */
+
+ rk = key->rd_key;
+
+ /*
+ * map byte array block to cipher state
+ * and add initial round key:
+ */
+ s0 = GETU32(in) ^ rk[0];
+ s1 = GETU32(in + 4) ^ rk[1];
+ s2 = GETU32(in + 8) ^ rk[2];
+ s3 = GETU32(in + 12) ^ rk[3];
+#ifdef FULL_UNROLL
+ /* round 1: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
+ /* round 2: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
+ /* round 3: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
+ /* round 4: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
+ /* round 5: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
+ /* round 6: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
+ /* round 7: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
+ /* round 8: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
+ /* round 9: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
+ if (key->rounds > 10) {
+ /* round 10: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
+ /* round 11: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
+ if (key->rounds > 12) {
+ /* round 12: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
+ /* round 13: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
+ }
+ }
+ rk += key->rounds << 2;
+#else /* !FULL_UNROLL */
+ /*
+ * Nr - 1 full rounds:
+ */
+ r = key->rounds >> 1;
+ for (;;) {
+ t0 =
+ Td0[(s0 >> 24)] ^
+ Td1[(s3 >> 16) & 0xff] ^
+ Td2[(s2 >> 8) & 0xff] ^
+ Td3[(s1) & 0xff] ^
+ rk[4];
+ t1 =
+ Td0[(s1 >> 24)] ^
+ Td1[(s0 >> 16) & 0xff] ^
+ Td2[(s3 >> 8) & 0xff] ^
+ Td3[(s2) & 0xff] ^
+ rk[5];
+ t2 =
+ Td0[(s2 >> 24)] ^
+ Td1[(s1 >> 16) & 0xff] ^
+ Td2[(s0 >> 8) & 0xff] ^
+ Td3[(s3) & 0xff] ^
+ rk[6];
+ t3 =
+ Td0[(s3 >> 24)] ^
+ Td1[(s2 >> 16) & 0xff] ^
+ Td2[(s1 >> 8) & 0xff] ^
+ Td3[(s0) & 0xff] ^
+ rk[7];
+
+ rk += 8;
+ if (--r == 0) {
+ break;
+ }
+
+ s0 =
+ Td0[(t0 >> 24)] ^
+ Td1[(t3 >> 16) & 0xff] ^
+ Td2[(t2 >> 8) & 0xff] ^
+ Td3[(t1) & 0xff] ^
+ rk[0];
+ s1 =
+ Td0[(t1 >> 24)] ^
+ Td1[(t0 >> 16) & 0xff] ^
+ Td2[(t3 >> 8) & 0xff] ^
+ Td3[(t2) & 0xff] ^
+ rk[1];
+ s2 =
+ Td0[(t2 >> 24)] ^
+ Td1[(t1 >> 16) & 0xff] ^
+ Td2[(t0 >> 8) & 0xff] ^
+ Td3[(t3) & 0xff] ^
+ rk[2];
+ s3 =
+ Td0[(t3 >> 24)] ^
+ Td1[(t2 >> 16) & 0xff] ^
+ Td2[(t1 >> 8) & 0xff] ^
+ Td3[(t0) & 0xff] ^
+ rk[3];
+ }
+#endif /* ?FULL_UNROLL */
+ /*
+ * apply last round and
+ * map cipher state to byte array block:
+ */
+ s0 =
+ (((uint32_t)Td4[(t0 >> 24)]) << 24) ^
+ (Td4[(t3 >> 16) & 0xff] << 16) ^
+ (Td4[(t2 >> 8) & 0xff] << 8) ^
+ (Td4[(t1) & 0xff]) ^
+ rk[0];
+ PUTU32(out, s0);
+ s1 =
+ (((uint32_t)Td4[(t1 >> 24)]) << 24) ^
+ (Td4[(t0 >> 16) & 0xff] << 16) ^
+ (Td4[(t3 >> 8) & 0xff] << 8) ^
+ (Td4[(t2) & 0xff]) ^
+ rk[1];
+ PUTU32(out + 4, s1);
+ s2 =
+ (((uint32_t)Td4[(t2 >> 24)]) << 24) ^
+ (Td4[(t1 >> 16) & 0xff] << 16) ^
+ (Td4[(t0 >> 8) & 0xff] << 8) ^
+ (Td4[(t3) & 0xff]) ^
+ rk[2];
+ PUTU32(out + 8, s2);
+ s3 =
+ (((uint32_t)Td4[(t3 >> 24)]) << 24) ^
+ (Td4[(t2 >> 16) & 0xff] << 16) ^
+ (Td4[(t1 >> 8) & 0xff] << 8) ^
+ (Td4[(t0) & 0xff]) ^
+ rk[3];
+ PUTU32(out + 12, s3);
+}
+
+#else /* AES_ASM */
+
+static const u8 Te4[256] = {
+ 0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U,
+ 0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U,
+ 0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U,
+ 0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U,
+ 0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU,
+ 0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U,
+ 0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU,
+ 0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U,
+ 0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U,
+ 0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U,
+ 0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU,
+ 0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU,
+ 0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U,
+ 0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U,
+ 0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U,
+ 0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U,
+ 0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U,
+ 0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U,
+ 0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U,
+ 0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU,
+ 0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU,
+ 0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U,
+ 0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U,
+ 0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U,
+ 0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U,
+ 0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU,
+ 0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU,
+ 0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU,
+ 0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U,
+ 0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU,
+ 0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U,
+ 0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U
+};
+static const u32 rcon[] = {
+ 0x01000000, 0x02000000, 0x04000000, 0x08000000,
+ 0x10000000, 0x20000000, 0x40000000, 0x80000000,
+ 0x1B000000, 0x36000000,
+ /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
+};
+
+/**
+ * Expand the cipher key into the encryption key schedule.
+ */
+int
+AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key)
+{
+ u32 *rk;
+ int i = 0;
+ u32 temp;
+
+ if (!userKey || !key)
+ return -1;
+ if (bits != 128 && bits != 192 && bits != 256)
+ return -2;
+
+ rk = key->rd_key;
+
+ if (bits == 128)
+ key->rounds = 10;
+ else if (bits == 192)
+ key->rounds = 12;
+ else
+ key->rounds = 14;
+
+ rk[0] = GETU32(userKey);
+ rk[1] = GETU32(userKey + 4);
+ rk[2] = GETU32(userKey + 8);
+ rk[3] = GETU32(userKey + 12);
+ if (bits == 128) {
+ while (1) {
+ temp = rk[3];
+ rk[4] = rk[0] ^
+ (Te4[(temp >> 16) & 0xff] << 24) ^
+ (Te4[(temp >> 8) & 0xff] << 16) ^
+ (Te4[(temp) & 0xff] << 8) ^
+ (Te4[(temp >> 24)]) ^
+ rcon[i];
+ rk[5] = rk[1] ^ rk[4];
+ rk[6] = rk[2] ^ rk[5];
+ rk[7] = rk[3] ^ rk[6];
+ if (++i == 10) {
+ return 0;
+ }
+ rk += 4;
+ }
+ }
+ rk[4] = GETU32(userKey + 16);
+ rk[5] = GETU32(userKey + 20);
+ if (bits == 192) {
+ while (1) {
+ temp = rk[5];
+ rk[6] = rk[0] ^
+ (Te4[(temp >> 16) & 0xff] << 24) ^
+ (Te4[(temp >> 8) & 0xff] << 16) ^
+ (Te4[(temp) & 0xff] << 8) ^
+ (Te4[(temp >> 24)]) ^
+ rcon[i];
+ rk[7] = rk[1] ^ rk[6];
+ rk[8] = rk[2] ^ rk[7];
+ rk[9] = rk[3] ^ rk[8];
+ if (++i == 8) {
+ return 0;
+ }
+ rk[10] = rk[4] ^ rk[9];
+ rk[11] = rk[5] ^ rk[10];
+ rk += 6;
+ }
+ }
+ rk[6] = GETU32(userKey + 24);
+ rk[7] = GETU32(userKey + 28);
+ if (bits == 256) {
+ while (1) {
+ temp = rk[7];
+ rk[8] = rk[0] ^
+ (Te4[(temp >> 16) & 0xff] << 24) ^
+ (Te4[(temp >> 8) & 0xff] << 16) ^
+ (Te4[(temp) & 0xff] << 8) ^
+ (Te4[(temp >> 24)]) ^
+ rcon[i];
+ rk[9] = rk[1] ^ rk[8];
+ rk[10] = rk[2] ^ rk[9];
+ rk[11] = rk[3] ^ rk[10];
+ if (++i == 7) {
+ return 0;
+ }
+ temp = rk[11];
+ rk[12] = rk[4] ^
+ (Te4[(temp >> 24)] << 24) ^
+ (Te4[(temp >> 16) & 0xff] << 16) ^
+ (Te4[(temp >> 8) & 0xff] << 8) ^
+ (Te4[(temp) & 0xff]);
+ rk[13] = rk[5] ^ rk[12];
+ rk[14] = rk[6] ^ rk[13];
+ rk[15] = rk[7] ^ rk[14];
+
+ rk += 8;
+ }
+ }
+ return 0;
+}
+
+/**
+ * Expand the cipher key into the decryption key schedule.
+ */
+int
+AES_set_decrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key)
+{
+ u32 *rk;
+ int i, j, status;
+ u32 temp;
+
+ /* first, start with an encryption schedule */
+ status = AES_set_encrypt_key(userKey, bits, key);
+ if (status < 0)
+ return status;
+
+ rk = key->rd_key;
+
+ /* invert the order of the round keys: */
+ for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
+ temp = rk[i];
+ rk[i] = rk[j];
+ rk[j] = temp;
+ temp = rk[i + 1];
+ rk[i + 1] = rk[j + 1];
+ rk[j + 1] = temp;
+ temp = rk[i + 2];
+ rk[i + 2] = rk[j + 2];
+ rk[j + 2] = temp;
+ temp = rk[i + 3];
+ rk[i + 3] = rk[j + 3];
+ rk[j + 3] = temp;
+ }
+ /* apply the inverse MixColumn transform to all round keys but the first and the last: */
+ for (i = 1; i < (key->rounds); i++) {
+ rk += 4;
+ for (j = 0; j < 4; j++) {
+ u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
+
+ tp1 = rk[j];
+ m = tp1 & 0x80808080;
+ tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
+ ((m - (m >> 7)) & 0x1b1b1b1b);
+ m = tp2 & 0x80808080;
+ tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
+ ((m - (m >> 7)) & 0x1b1b1b1b);
+ m = tp4 & 0x80808080;
+ tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
+ ((m - (m >> 7)) & 0x1b1b1b1b);
+ tp9 = tp8 ^ tp1;
+ tpb = tp9 ^ tp2;
+ tpd = tp9 ^ tp4;
+ tpe = tp8 ^ tp4 ^ tp2;
+#if defined(ROTATE)
+ rk[j] = tpe ^ ROTATE(tpd, 16) ^
+ ROTATE(tp9, 24) ^ ROTATE(tpb, 8);
+#else
+ rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
+ (tp9 >> 8) ^ (tp9 << 24) ^
+ (tpb >> 24) ^ (tpb << 8);
+#endif
+ }
+ }
+ return 0;
+}
+
+#endif /* AES_ASM */
diff --git a/ext/libressl/crypto/aes/aes_ctr.c b/ext/libressl/crypto/aes/aes_ctr.c
new file mode 100644
index 0000000..6079145
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes_ctr.c
@@ -0,0 +1,62 @@
+/* $OpenBSD: aes_ctr.c,v 1.9 2014/06/12 15:49:27 deraadt Exp $ */
+/* ====================================================================
+ * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+#include <openssl/aes.h>
+#include <openssl/modes.h>
+
+void
+AES_ctr128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t length, const AES_KEY *key, unsigned char ivec[AES_BLOCK_SIZE],
+ unsigned char ecount_buf[AES_BLOCK_SIZE], unsigned int *num)
+{
+ CRYPTO_ctr128_encrypt(in, out, length, key, ivec, ecount_buf, num,
+ (block128_f)AES_encrypt);
+}
diff --git a/ext/libressl/crypto/aes/aes_ecb.c b/ext/libressl/crypto/aes/aes_ecb.c
new file mode 100644
index 0000000..b05e539
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes_ecb.c
@@ -0,0 +1,69 @@
+/* $OpenBSD: aes_ecb.c,v 1.6 2015/02/10 09:46:30 miod Exp $ */
+/* ====================================================================
+ * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+#ifndef AES_DEBUG
+# ifndef NDEBUG
+# define NDEBUG
+# endif
+#endif
+
+#include <openssl/aes.h>
+#include "aes_locl.h"
+
+void
+AES_ecb_encrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key, const int enc)
+{
+ if (AES_ENCRYPT == enc)
+ AES_encrypt(in, out, key);
+ else
+ AES_decrypt(in, out, key);
+}
diff --git a/ext/libressl/crypto/aes/aes_ige.c b/ext/libressl/crypto/aes/aes_ige.c
new file mode 100644
index 0000000..85b7f69
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes_ige.c
@@ -0,0 +1,194 @@
+/* $OpenBSD: aes_ige.c,v 1.7 2015/02/10 09:46:30 miod Exp $ */
+/* ====================================================================
+ * Copyright (c) 2006 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+#include <openssl/aes.h>
+#include <openssl/crypto.h>
+
+#include "aes_locl.h"
+
+#define N_WORDS (AES_BLOCK_SIZE / sizeof(unsigned long))
+typedef struct {
+ unsigned long data[N_WORDS];
+} aes_block_t;
+
+/* XXX: probably some better way to do this */
+#if defined(__i386__) || defined(__x86_64__)
+#define UNALIGNED_MEMOPS_ARE_FAST 1
+#else
+#define UNALIGNED_MEMOPS_ARE_FAST 0
+#endif
+
+#if UNALIGNED_MEMOPS_ARE_FAST
+#define load_block(d, s) (d) = *(const aes_block_t *)(s)
+#define store_block(d, s) *(aes_block_t *)(d) = (s)
+#else
+#define load_block(d, s) memcpy((d).data, (s), AES_BLOCK_SIZE)
+#define store_block(d, s) memcpy((d), (s).data, AES_BLOCK_SIZE)
+#endif
+
+/* N.B. The IV for this mode is _twice_ the block size */
+
+void
+AES_ige_encrypt(const unsigned char *in, unsigned char *out, size_t length,
+ const AES_KEY *key, unsigned char *ivec, const int enc)
+{
+ size_t n;
+ size_t len;
+
+ OPENSSL_assert((length % AES_BLOCK_SIZE) == 0);
+
+ len = length / AES_BLOCK_SIZE;
+
+ if (AES_ENCRYPT == enc) {
+ if (in != out && (UNALIGNED_MEMOPS_ARE_FAST ||
+ ((size_t)in|(size_t)out|(size_t)ivec) %
+ sizeof(long) == 0)) {
+ aes_block_t *ivp = (aes_block_t *)ivec;
+ aes_block_t *iv2p = (aes_block_t *)(ivec + AES_BLOCK_SIZE);
+
+ while (len) {
+ aes_block_t *inp = (aes_block_t *)in;
+ aes_block_t *outp = (aes_block_t *)out;
+
+ for (n = 0; n < N_WORDS; ++n)
+ outp->data[n] = inp->data[n] ^ ivp->data[n];
+ AES_encrypt((unsigned char *)outp->data, (unsigned char *)outp->data, key);
+ for (n = 0; n < N_WORDS; ++n)
+ outp->data[n] ^= iv2p->data[n];
+ ivp = outp;
+ iv2p = inp;
+ --len;
+ in += AES_BLOCK_SIZE;
+ out += AES_BLOCK_SIZE;
+ }
+ memcpy(ivec, ivp->data, AES_BLOCK_SIZE);
+ memcpy(ivec + AES_BLOCK_SIZE, iv2p->data, AES_BLOCK_SIZE);
+ } else {
+ aes_block_t tmp, tmp2;
+ aes_block_t iv;
+ aes_block_t iv2;
+
+ load_block(iv, ivec);
+ load_block(iv2, ivec + AES_BLOCK_SIZE);
+
+ while (len) {
+ load_block(tmp, in);
+ for (n = 0; n < N_WORDS; ++n)
+ tmp2.data[n] = tmp.data[n] ^ iv.data[n];
+ AES_encrypt((unsigned char *)tmp2.data,
+ (unsigned char *)tmp2.data, key);
+ for (n = 0; n < N_WORDS; ++n)
+ tmp2.data[n] ^= iv2.data[n];
+ store_block(out, tmp2);
+ iv = tmp2;
+ iv2 = tmp;
+ --len;
+ in += AES_BLOCK_SIZE;
+ out += AES_BLOCK_SIZE;
+ }
+ memcpy(ivec, iv.data, AES_BLOCK_SIZE);
+ memcpy(ivec + AES_BLOCK_SIZE, iv2.data, AES_BLOCK_SIZE);
+ }
+ } else {
+ if (in != out && (UNALIGNED_MEMOPS_ARE_FAST ||
+ ((size_t)in|(size_t)out|(size_t)ivec) %
+ sizeof(long) == 0)) {
+ aes_block_t *ivp = (aes_block_t *)ivec;
+ aes_block_t *iv2p = (aes_block_t *)(ivec + AES_BLOCK_SIZE);
+
+ while (len) {
+ aes_block_t tmp;
+ aes_block_t *inp = (aes_block_t *)in;
+ aes_block_t *outp = (aes_block_t *)out;
+
+ for (n = 0; n < N_WORDS; ++n)
+ tmp.data[n] = inp->data[n] ^ iv2p->data[n];
+ AES_decrypt((unsigned char *)tmp.data,
+ (unsigned char *)outp->data, key);
+ for (n = 0; n < N_WORDS; ++n)
+ outp->data[n] ^= ivp->data[n];
+ ivp = inp;
+ iv2p = outp;
+ --len;
+ in += AES_BLOCK_SIZE;
+ out += AES_BLOCK_SIZE;
+ }
+ memcpy(ivec, ivp->data, AES_BLOCK_SIZE);
+ memcpy(ivec + AES_BLOCK_SIZE, iv2p->data, AES_BLOCK_SIZE);
+ } else {
+ aes_block_t tmp, tmp2;
+ aes_block_t iv;
+ aes_block_t iv2;
+
+ load_block(iv, ivec);
+ load_block(iv2, ivec + AES_BLOCK_SIZE);
+
+ while (len) {
+ load_block(tmp, in);
+ tmp2 = tmp;
+ for (n = 0; n < N_WORDS; ++n)
+ tmp.data[n] ^= iv2.data[n];
+ AES_decrypt((unsigned char *)tmp.data,
+ (unsigned char *)tmp.data, key);
+ for (n = 0; n < N_WORDS; ++n)
+ tmp.data[n] ^= iv.data[n];
+ store_block(out, tmp);
+ iv = tmp2;
+ iv2 = tmp;
+ --len;
+ in += AES_BLOCK_SIZE;
+ out += AES_BLOCK_SIZE;
+ }
+ memcpy(ivec, iv.data, AES_BLOCK_SIZE);
+ memcpy(ivec + AES_BLOCK_SIZE, iv2.data, AES_BLOCK_SIZE);
+ }
+ }
+}
diff --git a/ext/libressl/crypto/aes/aes_locl.h b/ext/libressl/crypto/aes/aes_locl.h
new file mode 100644
index 0000000..c47f65d
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes_locl.h
@@ -0,0 +1,83 @@
+/* $OpenBSD: aes_locl.h,v 1.11 2016/12/21 15:49:29 jsing Exp $ */
+/* ====================================================================
+ * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+#ifndef HEADER_AES_LOCL_H
+#define HEADER_AES_LOCL_H
+
+#include <openssl/opensslconf.h>
+
+#ifdef OPENSSL_NO_AES
+#error AES is disabled.
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+__BEGIN_HIDDEN_DECLS
+
+#define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] << 8) ^ ((u32)(pt)[3]))
+#define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >> 8); (ct)[3] = (u8)(st); }
+
+typedef unsigned int u32;
+typedef unsigned short u16;
+typedef unsigned char u8;
+
+#define MAXKC (256/32)
+#define MAXKB (256/8)
+#define MAXNR 14
+
+/* This controls loop-unrolling in aes_core.c */
+#undef FULL_UNROLL
+
+__END_HIDDEN_DECLS
+
+#endif /* !HEADER_AES_LOCL_H */
diff --git a/ext/libressl/crypto/aes/aes_misc.c b/ext/libressl/crypto/aes/aes_misc.c
new file mode 100644
index 0000000..6c1506d
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes_misc.c
@@ -0,0 +1,65 @@
+/* $OpenBSD: aes_misc.c,v 1.10 2014/07/09 11:10:50 bcook Exp $ */
+/* ====================================================================
+ * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+#include <openssl/opensslv.h>
+#include <openssl/crypto.h>
+#include <openssl/aes.h>
+#include "aes_locl.h"
+
+const char *
+AES_options(void)
+{
+#ifdef FULL_UNROLL
+ return "aes(full)";
+#else
+ return "aes(partial)";
+#endif
+}
diff --git a/ext/libressl/crypto/aes/aes_ofb.c b/ext/libressl/crypto/aes/aes_ofb.c
new file mode 100644
index 0000000..f8dc03a
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes_ofb.c
@@ -0,0 +1,61 @@
+/* $OpenBSD: aes_ofb.c,v 1.6 2014/06/12 15:49:27 deraadt Exp $ */
+/* ====================================================================
+ * Copyright (c) 2002-2006 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+#include <openssl/aes.h>
+#include <openssl/modes.h>
+
+void
+AES_ofb128_encrypt(const unsigned char *in, unsigned char *out, size_t length,
+ const AES_KEY *key, unsigned char *ivec, int *num)
+{
+ CRYPTO_ofb128_encrypt(in, out, length, key, ivec, num,
+ (block128_f)AES_encrypt);
+}
diff --git a/ext/libressl/crypto/aes/aes_wrap.c b/ext/libressl/crypto/aes/aes_wrap.c
new file mode 100644
index 0000000..b30630f
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes_wrap.c
@@ -0,0 +1,133 @@
+/* $OpenBSD: aes_wrap.c,v 1.12 2018/11/07 18:31:16 tb Exp $ */
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
+ * project.
+ */
+/* ====================================================================
+ * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * licensing@OpenSSL.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ */
+
+#include <string.h>
+
+#include <openssl/aes.h>
+#include <openssl/bio.h>
+
+static const unsigned char default_iv[] = {
+ 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6,
+};
+
+int
+AES_wrap_key(AES_KEY *key, const unsigned char *iv, unsigned char *out,
+ const unsigned char *in, unsigned int inlen)
+{
+ unsigned char *A, B[16], *R;
+ unsigned int i, j, t;
+
+ if ((inlen & 0x7) || (inlen < 16))
+ return -1;
+ A = B;
+ t = 1;
+ memmove(out + 8, in, inlen);
+ if (!iv)
+ iv = default_iv;
+
+ memcpy(A, iv, 8);
+
+ for (j = 0; j < 6; j++) {
+ R = out + 8;
+ for (i = 0; i < inlen; i += 8, t++, R += 8) {
+ memcpy(B + 8, R, 8);
+ AES_encrypt(B, B, key);
+ A[7] ^= (unsigned char)(t & 0xff);
+ if (t > 0xff) {
+ A[6] ^= (unsigned char)((t >> 8) & 0xff);
+ A[5] ^= (unsigned char)((t >> 16) & 0xff);
+ A[4] ^= (unsigned char)((t >> 24) & 0xff);
+ }
+ memcpy(R, B + 8, 8);
+ }
+ }
+ memcpy(out, A, 8);
+ return inlen + 8;
+}
+
+int
+AES_unwrap_key(AES_KEY *key, const unsigned char *iv, unsigned char *out,
+ const unsigned char *in, unsigned int inlen)
+{
+ unsigned char *A, B[16], *R;
+ unsigned int i, j, t;
+
+ if ((inlen & 0x7) || (inlen < 24))
+ return -1;
+ inlen -= 8;
+ A = B;
+ t = 6 * (inlen >> 3);
+ memcpy(A, in, 8);
+ memmove(out, in + 8, inlen);
+ for (j = 0; j < 6; j++) {
+ R = out + inlen - 8;
+ for (i = 0; i < inlen; i += 8, t--, R -= 8) {
+ A[7] ^= (unsigned char)(t & 0xff);
+ if (t > 0xff) {
+ A[6] ^= (unsigned char)((t >> 8) & 0xff);
+ A[5] ^= (unsigned char)((t >> 16) & 0xff);
+ A[4] ^= (unsigned char)((t >> 24) & 0xff);
+ }
+ memcpy(B + 8, R, 8);
+ AES_decrypt(B, B, key);
+ memcpy(R, B + 8, 8);
+ }
+ }
+ if (!iv)
+ iv = default_iv;
+ if (memcmp(A, iv, 8)) {
+ explicit_bzero(out, inlen);
+ return 0;
+ }
+ return inlen;
+}
diff --git a/ext/libressl/crypto/aes/aesni-elf-x86_64.S b/ext/libressl/crypto/aes/aesni-elf-x86_64.S
new file mode 100644
index 0000000..3b3dabf
--- /dev/null
+++ b/ext/libressl/crypto/aes/aesni-elf-x86_64.S
@@ -0,0 +1,2539 @@
+#include "x86_arch.h"
+.text
+.globl aesni_encrypt
+.type aesni_encrypt,@function
+.align 16
+aesni_encrypt:
+ movups (%rdi),%xmm2
+ movl 240(%rdx),%eax
+ movups (%rdx),%xmm0
+ movups 16(%rdx),%xmm1
+ leaq 32(%rdx),%rdx
+ xorps %xmm0,%xmm2
+.Loop_enc1_1:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rdx),%xmm1
+ leaq 16(%rdx),%rdx
+ jnz .Loop_enc1_1
+ aesenclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ retq
+.size aesni_encrypt,.-aesni_encrypt
+
+.globl aesni_decrypt
+.type aesni_decrypt,@function
+.align 16
+aesni_decrypt:
+ movups (%rdi),%xmm2
+ movl 240(%rdx),%eax
+ movups (%rdx),%xmm0
+ movups 16(%rdx),%xmm1
+ leaq 32(%rdx),%rdx
+ xorps %xmm0,%xmm2
+.Loop_dec1_2:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rdx),%xmm1
+ leaq 16(%rdx),%rdx
+ jnz .Loop_dec1_2
+ aesdeclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ retq
+.size aesni_decrypt, .-aesni_decrypt
+.type _aesni_encrypt3,@function
+.align 16
+_aesni_encrypt3:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ movups (%rcx),%xmm0
+
+.Lenc_loop3:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop3
+
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ retq
+.size _aesni_encrypt3,.-_aesni_encrypt3
+.type _aesni_decrypt3,@function
+.align 16
+_aesni_decrypt3:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ movups (%rcx),%xmm0
+
+.Ldec_loop3:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop3
+
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ retq
+.size _aesni_decrypt3,.-_aesni_decrypt3
+.type _aesni_encrypt4,@function
+.align 16
+_aesni_encrypt4:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ xorps %xmm0,%xmm5
+ movups (%rcx),%xmm0
+
+.Lenc_loop4:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop4
+
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ aesenclast %xmm0,%xmm5
+ retq
+.size _aesni_encrypt4,.-_aesni_encrypt4
+.type _aesni_decrypt4,@function
+.align 16
+_aesni_decrypt4:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ xorps %xmm0,%xmm5
+ movups (%rcx),%xmm0
+
+.Ldec_loop4:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop4
+
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ aesdeclast %xmm0,%xmm5
+ retq
+.size _aesni_decrypt4,.-_aesni_decrypt4
+.type _aesni_encrypt6,@function
+.align 16
+_aesni_encrypt6:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ aesenc %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesenc %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesenc %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesenc %xmm1,%xmm6
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm7
+ jmp .Lenc_loop6_enter
+.align 16
+.Lenc_loop6:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+.Lenc_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop6
+
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ aesenclast %xmm0,%xmm5
+ aesenclast %xmm0,%xmm6
+ aesenclast %xmm0,%xmm7
+ retq
+.size _aesni_encrypt6,.-_aesni_encrypt6
+.type _aesni_decrypt6,@function
+.align 16
+_aesni_decrypt6:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ aesdec %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesdec %xmm1,%xmm6
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm7
+ jmp .Ldec_loop6_enter
+.align 16
+.Ldec_loop6:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+.Ldec_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop6
+
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ aesdeclast %xmm0,%xmm5
+ aesdeclast %xmm0,%xmm6
+ aesdeclast %xmm0,%xmm7
+ retq
+.size _aesni_decrypt6,.-_aesni_decrypt6
+.type _aesni_encrypt8,@function
+.align 16
+_aesni_encrypt8:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ aesenc %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesenc %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesenc %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesenc %xmm1,%xmm6
+ pxor %xmm0,%xmm8
+ aesenc %xmm1,%xmm7
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm8
+ aesenc %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+ jmp .Lenc_loop8_enter
+.align 16
+.Lenc_loop8:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ aesenc %xmm1,%xmm8
+ aesenc %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+.Lenc_loop8_enter:
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ aesenc %xmm0,%xmm8
+ aesenc %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop8
+
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ aesenc %xmm1,%xmm8
+ aesenc %xmm1,%xmm9
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ aesenclast %xmm0,%xmm5
+ aesenclast %xmm0,%xmm6
+ aesenclast %xmm0,%xmm7
+ aesenclast %xmm0,%xmm8
+ aesenclast %xmm0,%xmm9
+ retq
+.size _aesni_encrypt8,.-_aesni_encrypt8
+.type _aesni_decrypt8,@function
+.align 16
+_aesni_decrypt8:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ aesdec %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesdec %xmm1,%xmm6
+ pxor %xmm0,%xmm8
+ aesdec %xmm1,%xmm7
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+ jmp .Ldec_loop8_enter
+.align 16
+.Ldec_loop8:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+.Ldec_loop8_enter:
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ aesdec %xmm0,%xmm8
+ aesdec %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop8
+
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ aesdeclast %xmm0,%xmm5
+ aesdeclast %xmm0,%xmm6
+ aesdeclast %xmm0,%xmm7
+ aesdeclast %xmm0,%xmm8
+ aesdeclast %xmm0,%xmm9
+ retq
+.size _aesni_decrypt8,.-_aesni_decrypt8
+.globl aesni_ecb_encrypt
+.type aesni_ecb_encrypt,@function
+.align 16
+aesni_ecb_encrypt:
+ andq $-16,%rdx
+ jz .Lecb_ret
+
+ movl 240(%rcx),%eax
+ movups (%rcx),%xmm0
+ movq %rcx,%r11
+ movl %eax,%r10d
+ testl %r8d,%r8d
+ jz .Lecb_decrypt
+
+ cmpq $128,%rdx
+ jb .Lecb_enc_tail
+
+ movdqu (%rdi),%xmm2
+ movdqu 16(%rdi),%xmm3
+ movdqu 32(%rdi),%xmm4
+ movdqu 48(%rdi),%xmm5
+ movdqu 64(%rdi),%xmm6
+ movdqu 80(%rdi),%xmm7
+ movdqu 96(%rdi),%xmm8
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ subq $128,%rdx
+ jmp .Lecb_enc_loop8_enter
+.align 16
+.Lecb_enc_loop8:
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movdqu (%rdi),%xmm2
+ movl %r10d,%eax
+ movups %xmm3,16(%rsi)
+ movdqu 16(%rdi),%xmm3
+ movups %xmm4,32(%rsi)
+ movdqu 32(%rdi),%xmm4
+ movups %xmm5,48(%rsi)
+ movdqu 48(%rdi),%xmm5
+ movups %xmm6,64(%rsi)
+ movdqu 64(%rdi),%xmm6
+ movups %xmm7,80(%rsi)
+ movdqu 80(%rdi),%xmm7
+ movups %xmm8,96(%rsi)
+ movdqu 96(%rdi),%xmm8
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+.Lecb_enc_loop8_enter:
+
+ call _aesni_encrypt8
+
+ subq $128,%rdx
+ jnc .Lecb_enc_loop8
+
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movups %xmm3,16(%rsi)
+ movl %r10d,%eax
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ addq $128,%rdx
+ jz .Lecb_ret
+
+.Lecb_enc_tail:
+ movups (%rdi),%xmm2
+ cmpq $32,%rdx
+ jb .Lecb_enc_one
+ movups 16(%rdi),%xmm3
+ je .Lecb_enc_two
+ movups 32(%rdi),%xmm4
+ cmpq $64,%rdx
+ jb .Lecb_enc_three
+ movups 48(%rdi),%xmm5
+ je .Lecb_enc_four
+ movups 64(%rdi),%xmm6
+ cmpq $96,%rdx
+ jb .Lecb_enc_five
+ movups 80(%rdi),%xmm7
+ je .Lecb_enc_six
+ movdqu 96(%rdi),%xmm8
+ call _aesni_encrypt8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_enc_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_3:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_3
+ aesenclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_enc_two:
+ xorps %xmm4,%xmm4
+ call _aesni_encrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_enc_three:
+ call _aesni_encrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_enc_four:
+ call _aesni_encrypt4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_enc_five:
+ xorps %xmm7,%xmm7
+ call _aesni_encrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_enc_six:
+ call _aesni_encrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ jmp .Lecb_ret
+
+.align 16
+.Lecb_decrypt:
+ cmpq $128,%rdx
+ jb .Lecb_dec_tail
+
+ movdqu (%rdi),%xmm2
+ movdqu 16(%rdi),%xmm3
+ movdqu 32(%rdi),%xmm4
+ movdqu 48(%rdi),%xmm5
+ movdqu 64(%rdi),%xmm6
+ movdqu 80(%rdi),%xmm7
+ movdqu 96(%rdi),%xmm8
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ subq $128,%rdx
+ jmp .Lecb_dec_loop8_enter
+.align 16
+.Lecb_dec_loop8:
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movdqu (%rdi),%xmm2
+ movl %r10d,%eax
+ movups %xmm3,16(%rsi)
+ movdqu 16(%rdi),%xmm3
+ movups %xmm4,32(%rsi)
+ movdqu 32(%rdi),%xmm4
+ movups %xmm5,48(%rsi)
+ movdqu 48(%rdi),%xmm5
+ movups %xmm6,64(%rsi)
+ movdqu 64(%rdi),%xmm6
+ movups %xmm7,80(%rsi)
+ movdqu 80(%rdi),%xmm7
+ movups %xmm8,96(%rsi)
+ movdqu 96(%rdi),%xmm8
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+.Lecb_dec_loop8_enter:
+
+ call _aesni_decrypt8
+
+ movups (%r11),%xmm0
+ subq $128,%rdx
+ jnc .Lecb_dec_loop8
+
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movups %xmm3,16(%rsi)
+ movl %r10d,%eax
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ addq $128,%rdx
+ jz .Lecb_ret
+
+.Lecb_dec_tail:
+ movups (%rdi),%xmm2
+ cmpq $32,%rdx
+ jb .Lecb_dec_one
+ movups 16(%rdi),%xmm3
+ je .Lecb_dec_two
+ movups 32(%rdi),%xmm4
+ cmpq $64,%rdx
+ jb .Lecb_dec_three
+ movups 48(%rdi),%xmm5
+ je .Lecb_dec_four
+ movups 64(%rdi),%xmm6
+ cmpq $96,%rdx
+ jb .Lecb_dec_five
+ movups 80(%rdi),%xmm7
+ je .Lecb_dec_six
+ movups 96(%rdi),%xmm8
+ movups (%rcx),%xmm0
+ call _aesni_decrypt8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_dec_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_4:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_4
+ aesdeclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_dec_two:
+ xorps %xmm4,%xmm4
+ call _aesni_decrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_dec_three:
+ call _aesni_decrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_dec_four:
+ call _aesni_decrypt4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_dec_five:
+ xorps %xmm7,%xmm7
+ call _aesni_decrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_dec_six:
+ call _aesni_decrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+
+.Lecb_ret:
+ retq
+.size aesni_ecb_encrypt,.-aesni_ecb_encrypt
+.globl aesni_ccm64_encrypt_blocks
+.type aesni_ccm64_encrypt_blocks,@function
+.align 16
+aesni_ccm64_encrypt_blocks:
+ movl 240(%rcx),%eax
+ movdqu (%r8),%xmm9
+ movdqa .Lincrement64(%rip),%xmm6
+ movdqa .Lbswap_mask(%rip),%xmm7
+
+ shrl $1,%eax
+ leaq 0(%rcx),%r11
+ movdqu (%r9),%xmm3
+ movdqa %xmm9,%xmm2
+ movl %eax,%r10d
+.byte 102,68,15,56,0,207
+ jmp .Lccm64_enc_outer
+.align 16
+.Lccm64_enc_outer:
+ movups (%r11),%xmm0
+ movl %r10d,%eax
+ movups (%rdi),%xmm8
+
+ xorps %xmm0,%xmm2
+ movups 16(%r11),%xmm1
+ xorps %xmm8,%xmm0
+ leaq 32(%r11),%rcx
+ xorps %xmm0,%xmm3
+ movups (%rcx),%xmm0
+
+.Lccm64_enc2_loop:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ aesenc %xmm1,%xmm3
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm3
+ movups 0(%rcx),%xmm0
+ jnz .Lccm64_enc2_loop
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ paddq %xmm6,%xmm9
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+
+ decq %rdx
+ leaq 16(%rdi),%rdi
+ xorps %xmm2,%xmm8
+ movdqa %xmm9,%xmm2
+ movups %xmm8,(%rsi)
+ leaq 16(%rsi),%rsi
+.byte 102,15,56,0,215
+ jnz .Lccm64_enc_outer
+
+ movups %xmm3,(%r9)
+ retq
+.size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
+.globl aesni_ccm64_decrypt_blocks
+.type aesni_ccm64_decrypt_blocks,@function
+.align 16
+aesni_ccm64_decrypt_blocks:
+ movl 240(%rcx),%eax
+ movups (%r8),%xmm9
+ movdqu (%r9),%xmm3
+ movdqa .Lincrement64(%rip),%xmm6
+ movdqa .Lbswap_mask(%rip),%xmm7
+
+ movaps %xmm9,%xmm2
+ movl %eax,%r10d
+ movq %rcx,%r11
+.byte 102,68,15,56,0,207
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_5:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_5
+ aesenclast %xmm1,%xmm2
+ movups (%rdi),%xmm8
+ paddq %xmm6,%xmm9
+ leaq 16(%rdi),%rdi
+ jmp .Lccm64_dec_outer
+.align 16
+.Lccm64_dec_outer:
+ xorps %xmm2,%xmm8
+ movdqa %xmm9,%xmm2
+ movl %r10d,%eax
+ movups %xmm8,(%rsi)
+ leaq 16(%rsi),%rsi
+.byte 102,15,56,0,215
+
+ subq $1,%rdx
+ jz .Lccm64_dec_break
+
+ movups (%r11),%xmm0
+ shrl $1,%eax
+ movups 16(%r11),%xmm1
+ xorps %xmm0,%xmm8
+ leaq 32(%r11),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm8,%xmm3
+ movups (%rcx),%xmm0
+
+.Lccm64_dec2_loop:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ aesenc %xmm1,%xmm3
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm3
+ movups 0(%rcx),%xmm0
+ jnz .Lccm64_dec2_loop
+ movups (%rdi),%xmm8
+ paddq %xmm6,%xmm9
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ leaq 16(%rdi),%rdi
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ jmp .Lccm64_dec_outer
+
+.align 16
+.Lccm64_dec_break:
+
+ movups (%r11),%xmm0
+ movups 16(%r11),%xmm1
+ xorps %xmm0,%xmm8
+ leaq 32(%r11),%r11
+ xorps %xmm8,%xmm3
+.Loop_enc1_6:
+ aesenc %xmm1,%xmm3
+ decl %eax
+ movups (%r11),%xmm1
+ leaq 16(%r11),%r11
+ jnz .Loop_enc1_6
+ aesenclast %xmm1,%xmm3
+ movups %xmm3,(%r9)
+ retq
+.size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
+.globl aesni_ctr32_encrypt_blocks
+.type aesni_ctr32_encrypt_blocks,@function
+.align 16
+aesni_ctr32_encrypt_blocks:
+ cmpq $1,%rdx
+ je .Lctr32_one_shortcut
+
+ movdqu (%r8),%xmm14
+ movdqa .Lbswap_mask(%rip),%xmm15
+ xorl %eax,%eax
+.byte 102,69,15,58,22,242,3
+.byte 102,68,15,58,34,240,3
+
+ movl 240(%rcx),%eax
+ bswapl %r10d
+ pxor %xmm12,%xmm12
+ pxor %xmm13,%xmm13
+.byte 102,69,15,58,34,226,0
+ leaq 3(%r10),%r11
+.byte 102,69,15,58,34,235,0
+ incl %r10d
+.byte 102,69,15,58,34,226,1
+ incq %r11
+.byte 102,69,15,58,34,235,1
+ incl %r10d
+.byte 102,69,15,58,34,226,2
+ incq %r11
+.byte 102,69,15,58,34,235,2
+ movdqa %xmm12,-40(%rsp)
+.byte 102,69,15,56,0,231
+ movdqa %xmm13,-24(%rsp)
+.byte 102,69,15,56,0,239
+
+ pshufd $192,%xmm12,%xmm2
+ pshufd $128,%xmm12,%xmm3
+ pshufd $64,%xmm12,%xmm4
+ cmpq $6,%rdx
+ jb .Lctr32_tail
+ shrl $1,%eax
+ movq %rcx,%r11
+ movl %eax,%r10d
+ subq $6,%rdx
+ jmp .Lctr32_loop6
+
+.align 16
+.Lctr32_loop6:
+ pshufd $192,%xmm13,%xmm5
+ por %xmm14,%xmm2
+ movups (%r11),%xmm0
+ pshufd $128,%xmm13,%xmm6
+ por %xmm14,%xmm3
+ movups 16(%r11),%xmm1
+ pshufd $64,%xmm13,%xmm7
+ por %xmm14,%xmm4
+ por %xmm14,%xmm5
+ xorps %xmm0,%xmm2
+ por %xmm14,%xmm6
+ por %xmm14,%xmm7
+
+
+
+
+ pxor %xmm0,%xmm3
+ aesenc %xmm1,%xmm2
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ aesenc %xmm1,%xmm3
+ movdqa .Lincrement32(%rip),%xmm13
+ pxor %xmm0,%xmm5
+ aesenc %xmm1,%xmm4
+ movdqa -40(%rsp),%xmm12
+ pxor %xmm0,%xmm6
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ jmp .Lctr32_enc_loop6_enter
+.align 16
+.Lctr32_enc_loop6:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+.Lctr32_enc_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Lctr32_enc_loop6
+
+ aesenc %xmm1,%xmm2
+ paddd %xmm13,%xmm12
+ aesenc %xmm1,%xmm3
+ paddd -24(%rsp),%xmm13
+ aesenc %xmm1,%xmm4
+ movdqa %xmm12,-40(%rsp)
+ aesenc %xmm1,%xmm5
+ movdqa %xmm13,-24(%rsp)
+ aesenc %xmm1,%xmm6
+.byte 102,69,15,56,0,231
+ aesenc %xmm1,%xmm7
+.byte 102,69,15,56,0,239
+
+ aesenclast %xmm0,%xmm2
+ movups (%rdi),%xmm8
+ aesenclast %xmm0,%xmm3
+ movups 16(%rdi),%xmm9
+ aesenclast %xmm0,%xmm4
+ movups 32(%rdi),%xmm10
+ aesenclast %xmm0,%xmm5
+ movups 48(%rdi),%xmm11
+ aesenclast %xmm0,%xmm6
+ movups 64(%rdi),%xmm1
+ aesenclast %xmm0,%xmm7
+ movups 80(%rdi),%xmm0
+ leaq 96(%rdi),%rdi
+
+ xorps %xmm2,%xmm8
+ pshufd $192,%xmm12,%xmm2
+ xorps %xmm3,%xmm9
+ pshufd $128,%xmm12,%xmm3
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ pshufd $64,%xmm12,%xmm4
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ xorps %xmm6,%xmm1
+ movups %xmm11,48(%rsi)
+ xorps %xmm7,%xmm0
+ movups %xmm1,64(%rsi)
+ movups %xmm0,80(%rsi)
+ leaq 96(%rsi),%rsi
+ movl %r10d,%eax
+ subq $6,%rdx
+ jnc .Lctr32_loop6
+
+ addq $6,%rdx
+ jz .Lctr32_done
+ movq %r11,%rcx
+ leal 1(%rax,%rax,1),%eax
+
+.Lctr32_tail:
+ por %xmm14,%xmm2
+ movups (%rdi),%xmm8
+ cmpq $2,%rdx
+ jb .Lctr32_one
+
+ por %xmm14,%xmm3
+ movups 16(%rdi),%xmm9
+ je .Lctr32_two
+
+ pshufd $192,%xmm13,%xmm5
+ por %xmm14,%xmm4
+ movups 32(%rdi),%xmm10
+ cmpq $4,%rdx
+ jb .Lctr32_three
+
+ pshufd $128,%xmm13,%xmm6
+ por %xmm14,%xmm5
+ movups 48(%rdi),%xmm11
+ je .Lctr32_four
+
+ por %xmm14,%xmm6
+ xorps %xmm7,%xmm7
+
+ call _aesni_encrypt6
+
+ movups 64(%rdi),%xmm1
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ xorps %xmm6,%xmm1
+ movups %xmm11,48(%rsi)
+ movups %xmm1,64(%rsi)
+ jmp .Lctr32_done
+
+.align 16
+.Lctr32_one_shortcut:
+ movups (%r8),%xmm2
+ movups (%rdi),%xmm8
+ movl 240(%rcx),%eax
+.Lctr32_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_7:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_7
+ aesenclast %xmm1,%xmm2
+ xorps %xmm2,%xmm8
+ movups %xmm8,(%rsi)
+ jmp .Lctr32_done
+
+.align 16
+.Lctr32_two:
+ xorps %xmm4,%xmm4
+ call _aesni_encrypt3
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ movups %xmm9,16(%rsi)
+ jmp .Lctr32_done
+
+.align 16
+.Lctr32_three:
+ call _aesni_encrypt3
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ movups %xmm10,32(%rsi)
+ jmp .Lctr32_done
+
+.align 16
+.Lctr32_four:
+ call _aesni_encrypt4
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ movups %xmm11,48(%rsi)
+
+.Lctr32_done:
+ retq
+.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
+.globl aesni_xts_encrypt
+.type aesni_xts_encrypt,@function
+.align 16
+aesni_xts_encrypt:
+ leaq -104(%rsp),%rsp
+ movups (%r9),%xmm15
+ movl 240(%r8),%eax
+ movl 240(%rcx),%r10d
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm15
+.Loop_enc1_8:
+ aesenc %xmm1,%xmm15
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz .Loop_enc1_8
+ aesenclast %xmm1,%xmm15
+ movq %rcx,%r11
+ movl %r10d,%eax
+ movq %rdx,%r9
+ andq $-16,%rdx
+
+ movdqa .Lxts_magic(%rip),%xmm8
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ subq $96,%rdx
+ jc .Lxts_enc_short
+
+ shrl $1,%eax
+ subl $1,%eax
+ movl %eax,%r10d
+ jmp .Lxts_enc_grandloop
+
+.align 16
+.Lxts_enc_grandloop:
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu 0(%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ pxor %xmm12,%xmm4
+ movdqu 80(%rdi),%xmm7
+ leaq 96(%rdi),%rdi
+ pxor %xmm13,%xmm5
+ movups (%r11),%xmm0
+ pxor %xmm14,%xmm6
+ pxor %xmm15,%xmm7
+
+
+
+ movups 16(%r11),%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movdqa %xmm10,0(%rsp)
+ aesenc %xmm1,%xmm2
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ movdqa %xmm11,16(%rsp)
+ aesenc %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ movdqa %xmm12,32(%rsp)
+ aesenc %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ movdqa %xmm13,48(%rsp)
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ movdqa %xmm14,64(%rsp)
+ aesenc %xmm1,%xmm6
+ movdqa %xmm15,80(%rsp)
+ aesenc %xmm1,%xmm7
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ jmp .Lxts_enc_loop6_enter
+
+.align 16
+.Lxts_enc_loop6:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+.Lxts_enc_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Lxts_enc_loop6
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ paddq %xmm15,%xmm15
+ aesenc %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesenc %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenc %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ aesenc %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesenc %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenc %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups 32(%rcx),%xmm0
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ aesenc %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesenc %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenc %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ aesenclast %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesenclast %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenclast %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesenclast %xmm0,%xmm5
+ aesenclast %xmm0,%xmm6
+ aesenclast %xmm0,%xmm7
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ xorps 0(%rsp),%xmm2
+ pand %xmm8,%xmm9
+ xorps 16(%rsp),%xmm3
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+
+ xorps 32(%rsp),%xmm4
+ movups %xmm2,0(%rsi)
+ xorps 48(%rsp),%xmm5
+ movups %xmm3,16(%rsi)
+ xorps 64(%rsp),%xmm6
+ movups %xmm4,32(%rsi)
+ xorps 80(%rsp),%xmm7
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ subq $96,%rdx
+ jnc .Lxts_enc_grandloop
+
+ leal 3(%rax,%rax,1),%eax
+ movq %r11,%rcx
+ movl %eax,%r10d
+
+.Lxts_enc_short:
+ addq $96,%rdx
+ jz .Lxts_enc_done
+
+ cmpq $32,%rdx
+ jb .Lxts_enc_one
+ je .Lxts_enc_two
+
+ cmpq $64,%rdx
+ jb .Lxts_enc_three
+ je .Lxts_enc_four
+
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ leaq 80(%rdi),%rdi
+ pxor %xmm12,%xmm4
+ pxor %xmm13,%xmm5
+ pxor %xmm14,%xmm6
+
+ call _aesni_encrypt6
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm15,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movdqu %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movdqu %xmm3,16(%rsi)
+ xorps %xmm14,%xmm6
+ movdqu %xmm4,32(%rsi)
+ movdqu %xmm5,48(%rsi)
+ movdqu %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.align 16
+.Lxts_enc_one:
+ movups (%rdi),%xmm2
+ leaq 16(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_9:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_9
+ aesenclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movdqa %xmm11,%xmm10
+ movups %xmm2,(%rsi)
+ leaq 16(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.align 16
+.Lxts_enc_two:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ leaq 32(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+
+ call _aesni_encrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm12,%xmm10
+ xorps %xmm11,%xmm3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ leaq 32(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.align 16
+.Lxts_enc_three:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ leaq 48(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+
+ call _aesni_encrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm13,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ leaq 48(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.align 16
+.Lxts_enc_four:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ xorps %xmm10,%xmm2
+ movups 48(%rdi),%xmm5
+ leaq 64(%rdi),%rdi
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ xorps %xmm13,%xmm5
+
+ call _aesni_encrypt4
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm15,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.align 16
+.Lxts_enc_done:
+ andq $15,%r9
+ jz .Lxts_enc_ret
+ movq %r9,%rdx
+
+.Lxts_enc_steal:
+ movzbl (%rdi),%eax
+ movzbl -16(%rsi),%ecx
+ leaq 1(%rdi),%rdi
+ movb %al,-16(%rsi)
+ movb %cl,0(%rsi)
+ leaq 1(%rsi),%rsi
+ subq $1,%rdx
+ jnz .Lxts_enc_steal
+
+ subq %r9,%rsi
+ movq %r11,%rcx
+ movl %r10d,%eax
+
+ movups -16(%rsi),%xmm2
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_10:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_10
+ aesenclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movups %xmm2,-16(%rsi)
+
+.Lxts_enc_ret:
+ leaq 104(%rsp),%rsp
+.Lxts_enc_epilogue:
+ retq
+.size aesni_xts_encrypt,.-aesni_xts_encrypt
+.globl aesni_xts_decrypt
+.type aesni_xts_decrypt,@function
+.align 16
+aesni_xts_decrypt:
+ leaq -104(%rsp),%rsp
+ movups (%r9),%xmm15
+ movl 240(%r8),%eax
+ movl 240(%rcx),%r10d
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm15
+.Loop_enc1_11:
+ aesenc %xmm1,%xmm15
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz .Loop_enc1_11
+ aesenclast %xmm1,%xmm15
+ xorl %eax,%eax
+ testq $15,%rdx
+ setnz %al
+ shlq $4,%rax
+ subq %rax,%rdx
+
+ movq %rcx,%r11
+ movl %r10d,%eax
+ movq %rdx,%r9
+ andq $-16,%rdx
+
+ movdqa .Lxts_magic(%rip),%xmm8
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ subq $96,%rdx
+ jc .Lxts_dec_short
+
+ shrl $1,%eax
+ subl $1,%eax
+ movl %eax,%r10d
+ jmp .Lxts_dec_grandloop
+
+.align 16
+.Lxts_dec_grandloop:
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu 0(%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ pxor %xmm12,%xmm4
+ movdqu 80(%rdi),%xmm7
+ leaq 96(%rdi),%rdi
+ pxor %xmm13,%xmm5
+ movups (%r11),%xmm0
+ pxor %xmm14,%xmm6
+ pxor %xmm15,%xmm7
+
+
+
+ movups 16(%r11),%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movdqa %xmm10,0(%rsp)
+ aesdec %xmm1,%xmm2
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ movdqa %xmm11,16(%rsp)
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ movdqa %xmm12,32(%rsp)
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ movdqa %xmm13,48(%rsp)
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ movdqa %xmm14,64(%rsp)
+ aesdec %xmm1,%xmm6
+ movdqa %xmm15,80(%rsp)
+ aesdec %xmm1,%xmm7
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ jmp .Lxts_dec_loop6_enter
+
+.align 16
+.Lxts_dec_loop6:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+.Lxts_dec_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Lxts_dec_loop6
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ paddq %xmm15,%xmm15
+ aesdec %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesdec %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdec %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ aesdec %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesdec %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdec %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ movups 32(%rcx),%xmm0
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ aesdec %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesdec %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdec %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ aesdeclast %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesdeclast %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdeclast %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesdeclast %xmm0,%xmm5
+ aesdeclast %xmm0,%xmm6
+ aesdeclast %xmm0,%xmm7
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ xorps 0(%rsp),%xmm2
+ pand %xmm8,%xmm9
+ xorps 16(%rsp),%xmm3
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+
+ xorps 32(%rsp),%xmm4
+ movups %xmm2,0(%rsi)
+ xorps 48(%rsp),%xmm5
+ movups %xmm3,16(%rsi)
+ xorps 64(%rsp),%xmm6
+ movups %xmm4,32(%rsi)
+ xorps 80(%rsp),%xmm7
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ subq $96,%rdx
+ jnc .Lxts_dec_grandloop
+
+ leal 3(%rax,%rax,1),%eax
+ movq %r11,%rcx
+ movl %eax,%r10d
+
+.Lxts_dec_short:
+ addq $96,%rdx
+ jz .Lxts_dec_done
+
+ cmpq $32,%rdx
+ jb .Lxts_dec_one
+ je .Lxts_dec_two
+
+ cmpq $64,%rdx
+ jb .Lxts_dec_three
+ je .Lxts_dec_four
+
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ leaq 80(%rdi),%rdi
+ pxor %xmm12,%xmm4
+ pxor %xmm13,%xmm5
+ pxor %xmm14,%xmm6
+
+ call _aesni_decrypt6
+
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movdqu %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movdqu %xmm3,16(%rsi)
+ xorps %xmm14,%xmm6
+ movdqu %xmm4,32(%rsi)
+ pxor %xmm14,%xmm14
+ movdqu %xmm5,48(%rsi)
+ pcmpgtd %xmm15,%xmm14
+ movdqu %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ pshufd $19,%xmm14,%xmm11
+ andq $15,%r9
+ jz .Lxts_dec_ret
+
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm11
+ pxor %xmm15,%xmm11
+ jmp .Lxts_dec_done2
+
+.align 16
+.Lxts_dec_one:
+ movups (%rdi),%xmm2
+ leaq 16(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_12:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_12
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movdqa %xmm11,%xmm10
+ movups %xmm2,(%rsi)
+ movdqa %xmm12,%xmm11
+ leaq 16(%rsi),%rsi
+ jmp .Lxts_dec_done
+
+.align 16
+.Lxts_dec_two:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ leaq 32(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+
+ call _aesni_decrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm12,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm13,%xmm11
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ leaq 32(%rsi),%rsi
+ jmp .Lxts_dec_done
+
+.align 16
+.Lxts_dec_three:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ leaq 48(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+
+ call _aesni_decrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm13,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm15,%xmm11
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ leaq 48(%rsi),%rsi
+ jmp .Lxts_dec_done
+
+.align 16
+.Lxts_dec_four:
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movups (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movups 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movups 32(%rdi),%xmm4
+ xorps %xmm10,%xmm2
+ movups 48(%rdi),%xmm5
+ leaq 64(%rdi),%rdi
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ xorps %xmm13,%xmm5
+
+ call _aesni_decrypt4
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm14,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm15,%xmm11
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ jmp .Lxts_dec_done
+
+.align 16
+.Lxts_dec_done:
+ andq $15,%r9
+ jz .Lxts_dec_ret
+.Lxts_dec_done2:
+ movq %r9,%rdx
+ movq %r11,%rcx
+ movl %r10d,%eax
+
+ movups (%rdi),%xmm2
+ xorps %xmm11,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_13:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_13
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm11,%xmm2
+ movups %xmm2,(%rsi)
+
+.Lxts_dec_steal:
+ movzbl 16(%rdi),%eax
+ movzbl (%rsi),%ecx
+ leaq 1(%rdi),%rdi
+ movb %al,(%rsi)
+ movb %cl,16(%rsi)
+ leaq 1(%rsi),%rsi
+ subq $1,%rdx
+ jnz .Lxts_dec_steal
+
+ subq %r9,%rsi
+ movq %r11,%rcx
+ movl %r10d,%eax
+
+ movups (%rsi),%xmm2
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_14:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_14
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movups %xmm2,(%rsi)
+
+.Lxts_dec_ret:
+ leaq 104(%rsp),%rsp
+.Lxts_dec_epilogue:
+ retq
+.size aesni_xts_decrypt,.-aesni_xts_decrypt
+.globl aesni_cbc_encrypt
+.type aesni_cbc_encrypt,@function
+.align 16
+aesni_cbc_encrypt:
+ testq %rdx,%rdx
+ jz .Lcbc_ret
+
+ movl 240(%rcx),%r10d
+ movq %rcx,%r11
+ testl %r9d,%r9d
+ jz .Lcbc_decrypt
+
+ movups (%r8),%xmm2
+ movl %r10d,%eax
+ cmpq $16,%rdx
+ jb .Lcbc_enc_tail
+ subq $16,%rdx
+ jmp .Lcbc_enc_loop
+.align 16
+.Lcbc_enc_loop:
+ movups (%rdi),%xmm3
+ leaq 16(%rdi),%rdi
+
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ xorps %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ xorps %xmm3,%xmm2
+.Loop_enc1_15:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_15
+ aesenclast %xmm1,%xmm2
+ movl %r10d,%eax
+ movq %r11,%rcx
+ movups %xmm2,0(%rsi)
+ leaq 16(%rsi),%rsi
+ subq $16,%rdx
+ jnc .Lcbc_enc_loop
+ addq $16,%rdx
+ jnz .Lcbc_enc_tail
+ movups %xmm2,(%r8)
+ jmp .Lcbc_ret
+
+.Lcbc_enc_tail:
+ movq %rdx,%rcx
+ xchgq %rdi,%rsi
+.long 0x9066A4F3
+ movl $16,%ecx
+ subq %rdx,%rcx
+ xorl %eax,%eax
+.long 0x9066AAF3
+ leaq -16(%rdi),%rdi
+ movl %r10d,%eax
+ movq %rdi,%rsi
+ movq %r11,%rcx
+ xorq %rdx,%rdx
+ jmp .Lcbc_enc_loop
+
+.align 16
+.Lcbc_decrypt:
+ movups (%r8),%xmm9
+ movl %r10d,%eax
+ cmpq $112,%rdx
+ jbe .Lcbc_dec_tail
+ shrl $1,%r10d
+ subq $112,%rdx
+ movl %r10d,%eax
+ movaps %xmm9,-24(%rsp)
+ jmp .Lcbc_dec_loop8_enter
+.align 16
+.Lcbc_dec_loop8:
+ movaps %xmm0,-24(%rsp)
+ movups %xmm9,(%rsi)
+ leaq 16(%rsi),%rsi
+.Lcbc_dec_loop8_enter:
+ movups (%rcx),%xmm0
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 16(%rcx),%xmm1
+
+ leaq 32(%rcx),%rcx
+ movdqu 32(%rdi),%xmm4
+ xorps %xmm0,%xmm2
+ movdqu 48(%rdi),%xmm5
+ xorps %xmm0,%xmm3
+ movdqu 64(%rdi),%xmm6
+ aesdec %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ movdqu 80(%rdi),%xmm7
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ movdqu 96(%rdi),%xmm8
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ movdqu 112(%rdi),%xmm9
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesdec %xmm1,%xmm6
+ pxor %xmm0,%xmm8
+ aesdec %xmm1,%xmm7
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+
+ call .Ldec_loop8_enter
+
+ movups (%rdi),%xmm1
+ movups 16(%rdi),%xmm0
+ xorps -24(%rsp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%rdi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%rdi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%rdi),%xmm1
+ xorps %xmm0,%xmm6
+ movups 80(%rdi),%xmm0
+ xorps %xmm1,%xmm7
+ movups 96(%rdi),%xmm1
+ xorps %xmm0,%xmm8
+ movups 112(%rdi),%xmm0
+ xorps %xmm1,%xmm9
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movq %r11,%rcx
+ movups %xmm7,80(%rsi)
+ leaq 128(%rdi),%rdi
+ movups %xmm8,96(%rsi)
+ leaq 112(%rsi),%rsi
+ subq $128,%rdx
+ ja .Lcbc_dec_loop8
+
+ movaps %xmm9,%xmm2
+ movaps %xmm0,%xmm9
+ addq $112,%rdx
+ jle .Lcbc_dec_tail_collected
+ movups %xmm2,(%rsi)
+ leal 1(%r10,%r10,1),%eax
+ leaq 16(%rsi),%rsi
+.Lcbc_dec_tail:
+ movups (%rdi),%xmm2
+ movaps %xmm2,%xmm8
+ cmpq $16,%rdx
+ jbe .Lcbc_dec_one
+
+ movups 16(%rdi),%xmm3
+ movaps %xmm3,%xmm7
+ cmpq $32,%rdx
+ jbe .Lcbc_dec_two
+
+ movups 32(%rdi),%xmm4
+ movaps %xmm4,%xmm6
+ cmpq $48,%rdx
+ jbe .Lcbc_dec_three
+
+ movups 48(%rdi),%xmm5
+ cmpq $64,%rdx
+ jbe .Lcbc_dec_four
+
+ movups 64(%rdi),%xmm6
+ cmpq $80,%rdx
+ jbe .Lcbc_dec_five
+
+ movups 80(%rdi),%xmm7
+ cmpq $96,%rdx
+ jbe .Lcbc_dec_six
+
+ movups 96(%rdi),%xmm8
+ movaps %xmm9,-24(%rsp)
+ call _aesni_decrypt8
+ movups (%rdi),%xmm1
+ movups 16(%rdi),%xmm0
+ xorps -24(%rsp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%rdi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%rdi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%rdi),%xmm1
+ xorps %xmm0,%xmm6
+ movups 80(%rdi),%xmm0
+ xorps %xmm1,%xmm7
+ movups 96(%rdi),%xmm9
+ xorps %xmm0,%xmm8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ movaps %xmm8,%xmm2
+ subq $112,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_16:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_16
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm9,%xmm2
+ movaps %xmm8,%xmm9
+ subq $16,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_two:
+ xorps %xmm4,%xmm4
+ call _aesni_decrypt3
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ movaps %xmm7,%xmm9
+ movaps %xmm3,%xmm2
+ leaq 16(%rsi),%rsi
+ subq $32,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_three:
+ call _aesni_decrypt3
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ xorps %xmm7,%xmm4
+ movups %xmm3,16(%rsi)
+ movaps %xmm6,%xmm9
+ movaps %xmm4,%xmm2
+ leaq 32(%rsi),%rsi
+ subq $48,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_four:
+ call _aesni_decrypt4
+ xorps %xmm9,%xmm2
+ movups 48(%rdi),%xmm9
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ xorps %xmm7,%xmm4
+ movups %xmm3,16(%rsi)
+ xorps %xmm6,%xmm5
+ movups %xmm4,32(%rsi)
+ movaps %xmm5,%xmm2
+ leaq 48(%rsi),%rsi
+ subq $64,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_five:
+ xorps %xmm7,%xmm7
+ call _aesni_decrypt6
+ movups 16(%rdi),%xmm1
+ movups 32(%rdi),%xmm0
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ xorps %xmm1,%xmm4
+ movups 48(%rdi),%xmm1
+ xorps %xmm0,%xmm5
+ movups 64(%rdi),%xmm9
+ xorps %xmm1,%xmm6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ movaps %xmm6,%xmm2
+ subq $80,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_six:
+ call _aesni_decrypt6
+ movups 16(%rdi),%xmm1
+ movups 32(%rdi),%xmm0
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ xorps %xmm1,%xmm4
+ movups 48(%rdi),%xmm1
+ xorps %xmm0,%xmm5
+ movups 64(%rdi),%xmm0
+ xorps %xmm1,%xmm6
+ movups 80(%rdi),%xmm9
+ xorps %xmm0,%xmm7
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ movaps %xmm7,%xmm2
+ subq $96,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_tail_collected:
+ andq $15,%rdx
+ movups %xmm9,(%r8)
+ jnz .Lcbc_dec_tail_partial
+ movups %xmm2,(%rsi)
+ jmp .Lcbc_dec_ret
+.align 16
+.Lcbc_dec_tail_partial:
+ movaps %xmm2,-24(%rsp)
+ movq $16,%rcx
+ movq %rsi,%rdi
+ subq %rdx,%rcx
+ leaq -24(%rsp),%rsi
+.long 0x9066A4F3
+
+.Lcbc_dec_ret:
+.Lcbc_ret:
+ retq
+.size aesni_cbc_encrypt,.-aesni_cbc_encrypt
+.globl aesni_set_decrypt_key
+.type aesni_set_decrypt_key,@function
+.align 16
+aesni_set_decrypt_key:
+ subq $8,%rsp
+ call __aesni_set_encrypt_key
+ shll $4,%esi
+ testl %eax,%eax
+ jnz .Ldec_key_ret
+ leaq 16(%rdx,%rsi,1),%rdi
+
+ movups (%rdx),%xmm0
+ movups (%rdi),%xmm1
+ movups %xmm0,(%rdi)
+ movups %xmm1,(%rdx)
+ leaq 16(%rdx),%rdx
+ leaq -16(%rdi),%rdi
+
+.Ldec_key_inverse:
+ movups (%rdx),%xmm0
+ movups (%rdi),%xmm1
+ aesimc %xmm0,%xmm0
+ aesimc %xmm1,%xmm1
+ leaq 16(%rdx),%rdx
+ leaq -16(%rdi),%rdi
+ movups %xmm0,16(%rdi)
+ movups %xmm1,-16(%rdx)
+ cmpq %rdx,%rdi
+ ja .Ldec_key_inverse
+
+ movups (%rdx),%xmm0
+ aesimc %xmm0,%xmm0
+ movups %xmm0,(%rdi)
+.Ldec_key_ret:
+ addq $8,%rsp
+ retq
+.LSEH_end_set_decrypt_key:
+.size aesni_set_decrypt_key,.-aesni_set_decrypt_key
+.globl aesni_set_encrypt_key
+.type aesni_set_encrypt_key,@function
+.align 16
+aesni_set_encrypt_key:
+__aesni_set_encrypt_key:
+ subq $8,%rsp
+ movq $-1,%rax
+ testq %rdi,%rdi
+ jz .Lenc_key_ret
+ testq %rdx,%rdx
+ jz .Lenc_key_ret
+
+ movups (%rdi),%xmm0
+ xorps %xmm4,%xmm4
+ leaq 16(%rdx),%rax
+ cmpl $256,%esi
+ je .L14rounds
+ cmpl $192,%esi
+ je .L12rounds
+ cmpl $128,%esi
+ jne .Lbad_keybits
+
+.L10rounds:
+ movl $9,%esi
+ movups %xmm0,(%rdx)
+ aeskeygenassist $1,%xmm0,%xmm1
+ call .Lkey_expansion_128_cold
+ aeskeygenassist $2,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $4,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $8,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $16,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $32,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $64,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $128,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $27,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $54,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ movups %xmm0,(%rax)
+ movl %esi,80(%rax)
+ xorl %eax,%eax
+ jmp .Lenc_key_ret
+
+.align 16
+.L12rounds:
+ movq 16(%rdi),%xmm2
+ movl $11,%esi
+ movups %xmm0,(%rdx)
+ aeskeygenassist $1,%xmm2,%xmm1
+ call .Lkey_expansion_192a_cold
+ aeskeygenassist $2,%xmm2,%xmm1
+ call .Lkey_expansion_192b
+ aeskeygenassist $4,%xmm2,%xmm1
+ call .Lkey_expansion_192a
+ aeskeygenassist $8,%xmm2,%xmm1
+ call .Lkey_expansion_192b
+ aeskeygenassist $16,%xmm2,%xmm1
+ call .Lkey_expansion_192a
+ aeskeygenassist $32,%xmm2,%xmm1
+ call .Lkey_expansion_192b
+ aeskeygenassist $64,%xmm2,%xmm1
+ call .Lkey_expansion_192a
+ aeskeygenassist $128,%xmm2,%xmm1
+ call .Lkey_expansion_192b
+ movups %xmm0,(%rax)
+ movl %esi,48(%rax)
+ xorq %rax,%rax
+ jmp .Lenc_key_ret
+
+.align 16
+.L14rounds:
+ movups 16(%rdi),%xmm2
+ movl $13,%esi
+ leaq 16(%rax),%rax
+ movups %xmm0,(%rdx)
+ movups %xmm2,16(%rdx)
+ aeskeygenassist $1,%xmm2,%xmm1
+ call .Lkey_expansion_256a_cold
+ aeskeygenassist $1,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $2,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $2,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $4,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $4,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $8,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $8,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $16,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $16,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $32,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $32,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $64,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ movups %xmm0,(%rax)
+ movl %esi,16(%rax)
+ xorq %rax,%rax
+ jmp .Lenc_key_ret
+
+.align 16
+.Lbad_keybits:
+ movq $-2,%rax
+.Lenc_key_ret:
+ addq $8,%rsp
+ retq
+.LSEH_end_set_encrypt_key:
+
+.align 16
+.Lkey_expansion_128:
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+.Lkey_expansion_128_cold:
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ retq
+
+.align 16
+.Lkey_expansion_192a:
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+.Lkey_expansion_192a_cold:
+ movaps %xmm2,%xmm5
+.Lkey_expansion_192b_warm:
+ shufps $16,%xmm0,%xmm4
+ movdqa %xmm2,%xmm3
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ pslldq $4,%xmm3
+ xorps %xmm4,%xmm0
+ pshufd $85,%xmm1,%xmm1
+ pxor %xmm3,%xmm2
+ pxor %xmm1,%xmm0
+ pshufd $255,%xmm0,%xmm3
+ pxor %xmm3,%xmm2
+ retq
+
+.align 16
+.Lkey_expansion_192b:
+ movaps %xmm0,%xmm3
+ shufps $68,%xmm0,%xmm5
+ movups %xmm5,(%rax)
+ shufps $78,%xmm2,%xmm3
+ movups %xmm3,16(%rax)
+ leaq 32(%rax),%rax
+ jmp .Lkey_expansion_192b_warm
+
+.align 16
+.Lkey_expansion_256a:
+ movups %xmm2,(%rax)
+ leaq 16(%rax),%rax
+.Lkey_expansion_256a_cold:
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ retq
+
+.align 16
+.Lkey_expansion_256b:
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+
+ shufps $16,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $140,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $170,%xmm1,%xmm1
+ xorps %xmm1,%xmm2
+ retq
+.size aesni_set_encrypt_key,.-aesni_set_encrypt_key
+.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key
+.align 64
+.Lbswap_mask:
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.Lincrement32:
+.long 6,6,6,0
+.Lincrement64:
+.long 1,0,0,0
+.Lxts_magic:
+.long 0x87,0,1,0
+
+.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/ext/libressl/crypto/aes/aesni-macosx-x86_64.S b/ext/libressl/crypto/aes/aesni-macosx-x86_64.S
new file mode 100644
index 0000000..6b3216b
--- /dev/null
+++ b/ext/libressl/crypto/aes/aesni-macosx-x86_64.S
@@ -0,0 +1,2536 @@
+#include "x86_arch.h"
+.text
+.globl _aesni_encrypt
+
+.p2align 4
+_aesni_encrypt:
+ movups (%rdi),%xmm2
+ movl 240(%rdx),%eax
+ movups (%rdx),%xmm0
+ movups 16(%rdx),%xmm1
+ leaq 32(%rdx),%rdx
+ xorps %xmm0,%xmm2
+L$oop_enc1_1:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rdx),%xmm1
+ leaq 16(%rdx),%rdx
+ jnz L$oop_enc1_1
+ aesenclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ retq
+
+
+.globl _aesni_decrypt
+
+.p2align 4
+_aesni_decrypt:
+ movups (%rdi),%xmm2
+ movl 240(%rdx),%eax
+ movups (%rdx),%xmm0
+ movups 16(%rdx),%xmm1
+ leaq 32(%rdx),%rdx
+ xorps %xmm0,%xmm2
+L$oop_dec1_2:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rdx),%xmm1
+ leaq 16(%rdx),%rdx
+ jnz L$oop_dec1_2
+ aesdeclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ retq
+
+
+.p2align 4
+_aesni_encrypt3:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ movups (%rcx),%xmm0
+
+L$enc_loop3:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ movups (%rcx),%xmm0
+ jnz L$enc_loop3
+
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ retq
+
+
+.p2align 4
+_aesni_decrypt3:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ movups (%rcx),%xmm0
+
+L$dec_loop3:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ movups (%rcx),%xmm0
+ jnz L$dec_loop3
+
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ retq
+
+
+.p2align 4
+_aesni_encrypt4:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ xorps %xmm0,%xmm5
+ movups (%rcx),%xmm0
+
+L$enc_loop4:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ movups (%rcx),%xmm0
+ jnz L$enc_loop4
+
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ aesenclast %xmm0,%xmm5
+ retq
+
+
+.p2align 4
+_aesni_decrypt4:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ xorps %xmm0,%xmm5
+ movups (%rcx),%xmm0
+
+L$dec_loop4:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ movups (%rcx),%xmm0
+ jnz L$dec_loop4
+
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ aesdeclast %xmm0,%xmm5
+ retq
+
+
+.p2align 4
+_aesni_encrypt6:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ aesenc %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesenc %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesenc %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesenc %xmm1,%xmm6
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm7
+ jmp L$enc_loop6_enter
+.p2align 4
+L$enc_loop6:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+L$enc_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz L$enc_loop6
+
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ aesenclast %xmm0,%xmm5
+ aesenclast %xmm0,%xmm6
+ aesenclast %xmm0,%xmm7
+ retq
+
+
+.p2align 4
+_aesni_decrypt6:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ aesdec %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesdec %xmm1,%xmm6
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm7
+ jmp L$dec_loop6_enter
+.p2align 4
+L$dec_loop6:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+L$dec_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz L$dec_loop6
+
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ aesdeclast %xmm0,%xmm5
+ aesdeclast %xmm0,%xmm6
+ aesdeclast %xmm0,%xmm7
+ retq
+
+
+.p2align 4
+_aesni_encrypt8:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ aesenc %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesenc %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesenc %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesenc %xmm1,%xmm6
+ pxor %xmm0,%xmm8
+ aesenc %xmm1,%xmm7
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm8
+ aesenc %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+ jmp L$enc_loop8_enter
+.p2align 4
+L$enc_loop8:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ aesenc %xmm1,%xmm8
+ aesenc %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+L$enc_loop8_enter:
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ aesenc %xmm0,%xmm8
+ aesenc %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ jnz L$enc_loop8
+
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ aesenc %xmm1,%xmm8
+ aesenc %xmm1,%xmm9
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ aesenclast %xmm0,%xmm5
+ aesenclast %xmm0,%xmm6
+ aesenclast %xmm0,%xmm7
+ aesenclast %xmm0,%xmm8
+ aesenclast %xmm0,%xmm9
+ retq
+
+
+.p2align 4
+_aesni_decrypt8:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ aesdec %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesdec %xmm1,%xmm6
+ pxor %xmm0,%xmm8
+ aesdec %xmm1,%xmm7
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+ jmp L$dec_loop8_enter
+.p2align 4
+L$dec_loop8:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+L$dec_loop8_enter:
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ aesdec %xmm0,%xmm8
+ aesdec %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ jnz L$dec_loop8
+
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ aesdeclast %xmm0,%xmm5
+ aesdeclast %xmm0,%xmm6
+ aesdeclast %xmm0,%xmm7
+ aesdeclast %xmm0,%xmm8
+ aesdeclast %xmm0,%xmm9
+ retq
+
+.globl _aesni_ecb_encrypt
+
+.p2align 4
+_aesni_ecb_encrypt:
+ andq $-16,%rdx
+ jz L$ecb_ret
+
+ movl 240(%rcx),%eax
+ movups (%rcx),%xmm0
+ movq %rcx,%r11
+ movl %eax,%r10d
+ testl %r8d,%r8d
+ jz L$ecb_decrypt
+
+ cmpq $128,%rdx
+ jb L$ecb_enc_tail
+
+ movdqu (%rdi),%xmm2
+ movdqu 16(%rdi),%xmm3
+ movdqu 32(%rdi),%xmm4
+ movdqu 48(%rdi),%xmm5
+ movdqu 64(%rdi),%xmm6
+ movdqu 80(%rdi),%xmm7
+ movdqu 96(%rdi),%xmm8
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ subq $128,%rdx
+ jmp L$ecb_enc_loop8_enter
+.p2align 4
+L$ecb_enc_loop8:
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movdqu (%rdi),%xmm2
+ movl %r10d,%eax
+ movups %xmm3,16(%rsi)
+ movdqu 16(%rdi),%xmm3
+ movups %xmm4,32(%rsi)
+ movdqu 32(%rdi),%xmm4
+ movups %xmm5,48(%rsi)
+ movdqu 48(%rdi),%xmm5
+ movups %xmm6,64(%rsi)
+ movdqu 64(%rdi),%xmm6
+ movups %xmm7,80(%rsi)
+ movdqu 80(%rdi),%xmm7
+ movups %xmm8,96(%rsi)
+ movdqu 96(%rdi),%xmm8
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+L$ecb_enc_loop8_enter:
+
+ call _aesni_encrypt8
+
+ subq $128,%rdx
+ jnc L$ecb_enc_loop8
+
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movups %xmm3,16(%rsi)
+ movl %r10d,%eax
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ addq $128,%rdx
+ jz L$ecb_ret
+
+L$ecb_enc_tail:
+ movups (%rdi),%xmm2
+ cmpq $32,%rdx
+ jb L$ecb_enc_one
+ movups 16(%rdi),%xmm3
+ je L$ecb_enc_two
+ movups 32(%rdi),%xmm4
+ cmpq $64,%rdx
+ jb L$ecb_enc_three
+ movups 48(%rdi),%xmm5
+ je L$ecb_enc_four
+ movups 64(%rdi),%xmm6
+ cmpq $96,%rdx
+ jb L$ecb_enc_five
+ movups 80(%rdi),%xmm7
+ je L$ecb_enc_six
+ movdqu 96(%rdi),%xmm8
+ call _aesni_encrypt8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+L$ecb_enc_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+L$oop_enc1_3:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_enc1_3
+ aesenclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+L$ecb_enc_two:
+ xorps %xmm4,%xmm4
+ call _aesni_encrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+L$ecb_enc_three:
+ call _aesni_encrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+L$ecb_enc_four:
+ call _aesni_encrypt4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+L$ecb_enc_five:
+ xorps %xmm7,%xmm7
+ call _aesni_encrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+L$ecb_enc_six:
+ call _aesni_encrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ jmp L$ecb_ret
+
+.p2align 4
+L$ecb_decrypt:
+ cmpq $128,%rdx
+ jb L$ecb_dec_tail
+
+ movdqu (%rdi),%xmm2
+ movdqu 16(%rdi),%xmm3
+ movdqu 32(%rdi),%xmm4
+ movdqu 48(%rdi),%xmm5
+ movdqu 64(%rdi),%xmm6
+ movdqu 80(%rdi),%xmm7
+ movdqu 96(%rdi),%xmm8
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ subq $128,%rdx
+ jmp L$ecb_dec_loop8_enter
+.p2align 4
+L$ecb_dec_loop8:
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movdqu (%rdi),%xmm2
+ movl %r10d,%eax
+ movups %xmm3,16(%rsi)
+ movdqu 16(%rdi),%xmm3
+ movups %xmm4,32(%rsi)
+ movdqu 32(%rdi),%xmm4
+ movups %xmm5,48(%rsi)
+ movdqu 48(%rdi),%xmm5
+ movups %xmm6,64(%rsi)
+ movdqu 64(%rdi),%xmm6
+ movups %xmm7,80(%rsi)
+ movdqu 80(%rdi),%xmm7
+ movups %xmm8,96(%rsi)
+ movdqu 96(%rdi),%xmm8
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+L$ecb_dec_loop8_enter:
+
+ call _aesni_decrypt8
+
+ movups (%r11),%xmm0
+ subq $128,%rdx
+ jnc L$ecb_dec_loop8
+
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movups %xmm3,16(%rsi)
+ movl %r10d,%eax
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ addq $128,%rdx
+ jz L$ecb_ret
+
+L$ecb_dec_tail:
+ movups (%rdi),%xmm2
+ cmpq $32,%rdx
+ jb L$ecb_dec_one
+ movups 16(%rdi),%xmm3
+ je L$ecb_dec_two
+ movups 32(%rdi),%xmm4
+ cmpq $64,%rdx
+ jb L$ecb_dec_three
+ movups 48(%rdi),%xmm5
+ je L$ecb_dec_four
+ movups 64(%rdi),%xmm6
+ cmpq $96,%rdx
+ jb L$ecb_dec_five
+ movups 80(%rdi),%xmm7
+ je L$ecb_dec_six
+ movups 96(%rdi),%xmm8
+ movups (%rcx),%xmm0
+ call _aesni_decrypt8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+L$ecb_dec_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+L$oop_dec1_4:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_dec1_4
+ aesdeclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+L$ecb_dec_two:
+ xorps %xmm4,%xmm4
+ call _aesni_decrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+L$ecb_dec_three:
+ call _aesni_decrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+L$ecb_dec_four:
+ call _aesni_decrypt4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+L$ecb_dec_five:
+ xorps %xmm7,%xmm7
+ call _aesni_decrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+L$ecb_dec_six:
+ call _aesni_decrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+
+L$ecb_ret:
+ retq
+
+.globl _aesni_ccm64_encrypt_blocks
+
+.p2align 4
+_aesni_ccm64_encrypt_blocks:
+ movl 240(%rcx),%eax
+ movdqu (%r8),%xmm9
+ movdqa L$increment64(%rip),%xmm6
+ movdqa L$bswap_mask(%rip),%xmm7
+
+ shrl $1,%eax
+ leaq 0(%rcx),%r11
+ movdqu (%r9),%xmm3
+ movdqa %xmm9,%xmm2
+ movl %eax,%r10d
+.byte 102,68,15,56,0,207
+ jmp L$ccm64_enc_outer
+.p2align 4
+L$ccm64_enc_outer:
+ movups (%r11),%xmm0
+ movl %r10d,%eax
+ movups (%rdi),%xmm8
+
+ xorps %xmm0,%xmm2
+ movups 16(%r11),%xmm1
+ xorps %xmm8,%xmm0
+ leaq 32(%r11),%rcx
+ xorps %xmm0,%xmm3
+ movups (%rcx),%xmm0
+
+L$ccm64_enc2_loop:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ aesenc %xmm1,%xmm3
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm3
+ movups 0(%rcx),%xmm0
+ jnz L$ccm64_enc2_loop
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ paddq %xmm6,%xmm9
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+
+ decq %rdx
+ leaq 16(%rdi),%rdi
+ xorps %xmm2,%xmm8
+ movdqa %xmm9,%xmm2
+ movups %xmm8,(%rsi)
+ leaq 16(%rsi),%rsi
+.byte 102,15,56,0,215
+ jnz L$ccm64_enc_outer
+
+ movups %xmm3,(%r9)
+ retq
+
+.globl _aesni_ccm64_decrypt_blocks
+
+.p2align 4
+_aesni_ccm64_decrypt_blocks:
+ movl 240(%rcx),%eax
+ movups (%r8),%xmm9
+ movdqu (%r9),%xmm3
+ movdqa L$increment64(%rip),%xmm6
+ movdqa L$bswap_mask(%rip),%xmm7
+
+ movaps %xmm9,%xmm2
+ movl %eax,%r10d
+ movq %rcx,%r11
+.byte 102,68,15,56,0,207
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+L$oop_enc1_5:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_enc1_5
+ aesenclast %xmm1,%xmm2
+ movups (%rdi),%xmm8
+ paddq %xmm6,%xmm9
+ leaq 16(%rdi),%rdi
+ jmp L$ccm64_dec_outer
+.p2align 4
+L$ccm64_dec_outer:
+ xorps %xmm2,%xmm8
+ movdqa %xmm9,%xmm2
+ movl %r10d,%eax
+ movups %xmm8,(%rsi)
+ leaq 16(%rsi),%rsi
+.byte 102,15,56,0,215
+
+ subq $1,%rdx
+ jz L$ccm64_dec_break
+
+ movups (%r11),%xmm0
+ shrl $1,%eax
+ movups 16(%r11),%xmm1
+ xorps %xmm0,%xmm8
+ leaq 32(%r11),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm8,%xmm3
+ movups (%rcx),%xmm0
+
+L$ccm64_dec2_loop:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ aesenc %xmm1,%xmm3
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm3
+ movups 0(%rcx),%xmm0
+ jnz L$ccm64_dec2_loop
+ movups (%rdi),%xmm8
+ paddq %xmm6,%xmm9
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ leaq 16(%rdi),%rdi
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ jmp L$ccm64_dec_outer
+
+.p2align 4
+L$ccm64_dec_break:
+
+ movups (%r11),%xmm0
+ movups 16(%r11),%xmm1
+ xorps %xmm0,%xmm8
+ leaq 32(%r11),%r11
+ xorps %xmm8,%xmm3
+L$oop_enc1_6:
+ aesenc %xmm1,%xmm3
+ decl %eax
+ movups (%r11),%xmm1
+ leaq 16(%r11),%r11
+ jnz L$oop_enc1_6
+ aesenclast %xmm1,%xmm3
+ movups %xmm3,(%r9)
+ retq
+
+.globl _aesni_ctr32_encrypt_blocks
+
+.p2align 4
+_aesni_ctr32_encrypt_blocks:
+ cmpq $1,%rdx
+ je L$ctr32_one_shortcut
+
+ movdqu (%r8),%xmm14
+ movdqa L$bswap_mask(%rip),%xmm15
+ xorl %eax,%eax
+.byte 102,69,15,58,22,242,3
+.byte 102,68,15,58,34,240,3
+
+ movl 240(%rcx),%eax
+ bswapl %r10d
+ pxor %xmm12,%xmm12
+ pxor %xmm13,%xmm13
+.byte 102,69,15,58,34,226,0
+ leaq 3(%r10),%r11
+.byte 102,69,15,58,34,235,0
+ incl %r10d
+.byte 102,69,15,58,34,226,1
+ incq %r11
+.byte 102,69,15,58,34,235,1
+ incl %r10d
+.byte 102,69,15,58,34,226,2
+ incq %r11
+.byte 102,69,15,58,34,235,2
+ movdqa %xmm12,-40(%rsp)
+.byte 102,69,15,56,0,231
+ movdqa %xmm13,-24(%rsp)
+.byte 102,69,15,56,0,239
+
+ pshufd $192,%xmm12,%xmm2
+ pshufd $128,%xmm12,%xmm3
+ pshufd $64,%xmm12,%xmm4
+ cmpq $6,%rdx
+ jb L$ctr32_tail
+ shrl $1,%eax
+ movq %rcx,%r11
+ movl %eax,%r10d
+ subq $6,%rdx
+ jmp L$ctr32_loop6
+
+.p2align 4
+L$ctr32_loop6:
+ pshufd $192,%xmm13,%xmm5
+ por %xmm14,%xmm2
+ movups (%r11),%xmm0
+ pshufd $128,%xmm13,%xmm6
+ por %xmm14,%xmm3
+ movups 16(%r11),%xmm1
+ pshufd $64,%xmm13,%xmm7
+ por %xmm14,%xmm4
+ por %xmm14,%xmm5
+ xorps %xmm0,%xmm2
+ por %xmm14,%xmm6
+ por %xmm14,%xmm7
+
+
+
+
+ pxor %xmm0,%xmm3
+ aesenc %xmm1,%xmm2
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ aesenc %xmm1,%xmm3
+ movdqa L$increment32(%rip),%xmm13
+ pxor %xmm0,%xmm5
+ aesenc %xmm1,%xmm4
+ movdqa -40(%rsp),%xmm12
+ pxor %xmm0,%xmm6
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ jmp L$ctr32_enc_loop6_enter
+.p2align 4
+L$ctr32_enc_loop6:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+L$ctr32_enc_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz L$ctr32_enc_loop6
+
+ aesenc %xmm1,%xmm2
+ paddd %xmm13,%xmm12
+ aesenc %xmm1,%xmm3
+ paddd -24(%rsp),%xmm13
+ aesenc %xmm1,%xmm4
+ movdqa %xmm12,-40(%rsp)
+ aesenc %xmm1,%xmm5
+ movdqa %xmm13,-24(%rsp)
+ aesenc %xmm1,%xmm6
+.byte 102,69,15,56,0,231
+ aesenc %xmm1,%xmm7
+.byte 102,69,15,56,0,239
+
+ aesenclast %xmm0,%xmm2
+ movups (%rdi),%xmm8
+ aesenclast %xmm0,%xmm3
+ movups 16(%rdi),%xmm9
+ aesenclast %xmm0,%xmm4
+ movups 32(%rdi),%xmm10
+ aesenclast %xmm0,%xmm5
+ movups 48(%rdi),%xmm11
+ aesenclast %xmm0,%xmm6
+ movups 64(%rdi),%xmm1
+ aesenclast %xmm0,%xmm7
+ movups 80(%rdi),%xmm0
+ leaq 96(%rdi),%rdi
+
+ xorps %xmm2,%xmm8
+ pshufd $192,%xmm12,%xmm2
+ xorps %xmm3,%xmm9
+ pshufd $128,%xmm12,%xmm3
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ pshufd $64,%xmm12,%xmm4
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ xorps %xmm6,%xmm1
+ movups %xmm11,48(%rsi)
+ xorps %xmm7,%xmm0
+ movups %xmm1,64(%rsi)
+ movups %xmm0,80(%rsi)
+ leaq 96(%rsi),%rsi
+ movl %r10d,%eax
+ subq $6,%rdx
+ jnc L$ctr32_loop6
+
+ addq $6,%rdx
+ jz L$ctr32_done
+ movq %r11,%rcx
+ leal 1(%rax,%rax,1),%eax
+
+L$ctr32_tail:
+ por %xmm14,%xmm2
+ movups (%rdi),%xmm8
+ cmpq $2,%rdx
+ jb L$ctr32_one
+
+ por %xmm14,%xmm3
+ movups 16(%rdi),%xmm9
+ je L$ctr32_two
+
+ pshufd $192,%xmm13,%xmm5
+ por %xmm14,%xmm4
+ movups 32(%rdi),%xmm10
+ cmpq $4,%rdx
+ jb L$ctr32_three
+
+ pshufd $128,%xmm13,%xmm6
+ por %xmm14,%xmm5
+ movups 48(%rdi),%xmm11
+ je L$ctr32_four
+
+ por %xmm14,%xmm6
+ xorps %xmm7,%xmm7
+
+ call _aesni_encrypt6
+
+ movups 64(%rdi),%xmm1
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ xorps %xmm6,%xmm1
+ movups %xmm11,48(%rsi)
+ movups %xmm1,64(%rsi)
+ jmp L$ctr32_done
+
+.p2align 4
+L$ctr32_one_shortcut:
+ movups (%r8),%xmm2
+ movups (%rdi),%xmm8
+ movl 240(%rcx),%eax
+L$ctr32_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+L$oop_enc1_7:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_enc1_7
+ aesenclast %xmm1,%xmm2
+ xorps %xmm2,%xmm8
+ movups %xmm8,(%rsi)
+ jmp L$ctr32_done
+
+.p2align 4
+L$ctr32_two:
+ xorps %xmm4,%xmm4
+ call _aesni_encrypt3
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ movups %xmm9,16(%rsi)
+ jmp L$ctr32_done
+
+.p2align 4
+L$ctr32_three:
+ call _aesni_encrypt3
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ movups %xmm10,32(%rsi)
+ jmp L$ctr32_done
+
+.p2align 4
+L$ctr32_four:
+ call _aesni_encrypt4
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ movups %xmm11,48(%rsi)
+
+L$ctr32_done:
+ retq
+
+.globl _aesni_xts_encrypt
+
+.p2align 4
+_aesni_xts_encrypt:
+ leaq -104(%rsp),%rsp
+ movups (%r9),%xmm15
+ movl 240(%r8),%eax
+ movl 240(%rcx),%r10d
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm15
+L$oop_enc1_8:
+ aesenc %xmm1,%xmm15
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz L$oop_enc1_8
+ aesenclast %xmm1,%xmm15
+ movq %rcx,%r11
+ movl %r10d,%eax
+ movq %rdx,%r9
+ andq $-16,%rdx
+
+ movdqa L$xts_magic(%rip),%xmm8
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ subq $96,%rdx
+ jc L$xts_enc_short
+
+ shrl $1,%eax
+ subl $1,%eax
+ movl %eax,%r10d
+ jmp L$xts_enc_grandloop
+
+.p2align 4
+L$xts_enc_grandloop:
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu 0(%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ pxor %xmm12,%xmm4
+ movdqu 80(%rdi),%xmm7
+ leaq 96(%rdi),%rdi
+ pxor %xmm13,%xmm5
+ movups (%r11),%xmm0
+ pxor %xmm14,%xmm6
+ pxor %xmm15,%xmm7
+
+
+
+ movups 16(%r11),%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movdqa %xmm10,0(%rsp)
+ aesenc %xmm1,%xmm2
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ movdqa %xmm11,16(%rsp)
+ aesenc %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ movdqa %xmm12,32(%rsp)
+ aesenc %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ movdqa %xmm13,48(%rsp)
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ movdqa %xmm14,64(%rsp)
+ aesenc %xmm1,%xmm6
+ movdqa %xmm15,80(%rsp)
+ aesenc %xmm1,%xmm7
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ jmp L$xts_enc_loop6_enter
+
+.p2align 4
+L$xts_enc_loop6:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+L$xts_enc_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz L$xts_enc_loop6
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ paddq %xmm15,%xmm15
+ aesenc %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesenc %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenc %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ aesenc %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesenc %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenc %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups 32(%rcx),%xmm0
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ aesenc %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesenc %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenc %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ aesenclast %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesenclast %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenclast %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesenclast %xmm0,%xmm5
+ aesenclast %xmm0,%xmm6
+ aesenclast %xmm0,%xmm7
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ xorps 0(%rsp),%xmm2
+ pand %xmm8,%xmm9
+ xorps 16(%rsp),%xmm3
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+
+ xorps 32(%rsp),%xmm4
+ movups %xmm2,0(%rsi)
+ xorps 48(%rsp),%xmm5
+ movups %xmm3,16(%rsi)
+ xorps 64(%rsp),%xmm6
+ movups %xmm4,32(%rsi)
+ xorps 80(%rsp),%xmm7
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ subq $96,%rdx
+ jnc L$xts_enc_grandloop
+
+ leal 3(%rax,%rax,1),%eax
+ movq %r11,%rcx
+ movl %eax,%r10d
+
+L$xts_enc_short:
+ addq $96,%rdx
+ jz L$xts_enc_done
+
+ cmpq $32,%rdx
+ jb L$xts_enc_one
+ je L$xts_enc_two
+
+ cmpq $64,%rdx
+ jb L$xts_enc_three
+ je L$xts_enc_four
+
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ leaq 80(%rdi),%rdi
+ pxor %xmm12,%xmm4
+ pxor %xmm13,%xmm5
+ pxor %xmm14,%xmm6
+
+ call _aesni_encrypt6
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm15,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movdqu %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movdqu %xmm3,16(%rsi)
+ xorps %xmm14,%xmm6
+ movdqu %xmm4,32(%rsi)
+ movdqu %xmm5,48(%rsi)
+ movdqu %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ jmp L$xts_enc_done
+
+.p2align 4
+L$xts_enc_one:
+ movups (%rdi),%xmm2
+ leaq 16(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+L$oop_enc1_9:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_enc1_9
+ aesenclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movdqa %xmm11,%xmm10
+ movups %xmm2,(%rsi)
+ leaq 16(%rsi),%rsi
+ jmp L$xts_enc_done
+
+.p2align 4
+L$xts_enc_two:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ leaq 32(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+
+ call _aesni_encrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm12,%xmm10
+ xorps %xmm11,%xmm3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ leaq 32(%rsi),%rsi
+ jmp L$xts_enc_done
+
+.p2align 4
+L$xts_enc_three:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ leaq 48(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+
+ call _aesni_encrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm13,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ leaq 48(%rsi),%rsi
+ jmp L$xts_enc_done
+
+.p2align 4
+L$xts_enc_four:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ xorps %xmm10,%xmm2
+ movups 48(%rdi),%xmm5
+ leaq 64(%rdi),%rdi
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ xorps %xmm13,%xmm5
+
+ call _aesni_encrypt4
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm15,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ jmp L$xts_enc_done
+
+.p2align 4
+L$xts_enc_done:
+ andq $15,%r9
+ jz L$xts_enc_ret
+ movq %r9,%rdx
+
+L$xts_enc_steal:
+ movzbl (%rdi),%eax
+ movzbl -16(%rsi),%ecx
+ leaq 1(%rdi),%rdi
+ movb %al,-16(%rsi)
+ movb %cl,0(%rsi)
+ leaq 1(%rsi),%rsi
+ subq $1,%rdx
+ jnz L$xts_enc_steal
+
+ subq %r9,%rsi
+ movq %r11,%rcx
+ movl %r10d,%eax
+
+ movups -16(%rsi),%xmm2
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+L$oop_enc1_10:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_enc1_10
+ aesenclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movups %xmm2,-16(%rsi)
+
+L$xts_enc_ret:
+ leaq 104(%rsp),%rsp
+L$xts_enc_epilogue:
+ retq
+
+.globl _aesni_xts_decrypt
+
+.p2align 4
+_aesni_xts_decrypt:
+ leaq -104(%rsp),%rsp
+ movups (%r9),%xmm15
+ movl 240(%r8),%eax
+ movl 240(%rcx),%r10d
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm15
+L$oop_enc1_11:
+ aesenc %xmm1,%xmm15
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz L$oop_enc1_11
+ aesenclast %xmm1,%xmm15
+ xorl %eax,%eax
+ testq $15,%rdx
+ setnz %al
+ shlq $4,%rax
+ subq %rax,%rdx
+
+ movq %rcx,%r11
+ movl %r10d,%eax
+ movq %rdx,%r9
+ andq $-16,%rdx
+
+ movdqa L$xts_magic(%rip),%xmm8
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ subq $96,%rdx
+ jc L$xts_dec_short
+
+ shrl $1,%eax
+ subl $1,%eax
+ movl %eax,%r10d
+ jmp L$xts_dec_grandloop
+
+.p2align 4
+L$xts_dec_grandloop:
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu 0(%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ pxor %xmm12,%xmm4
+ movdqu 80(%rdi),%xmm7
+ leaq 96(%rdi),%rdi
+ pxor %xmm13,%xmm5
+ movups (%r11),%xmm0
+ pxor %xmm14,%xmm6
+ pxor %xmm15,%xmm7
+
+
+
+ movups 16(%r11),%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movdqa %xmm10,0(%rsp)
+ aesdec %xmm1,%xmm2
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ movdqa %xmm11,16(%rsp)
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ movdqa %xmm12,32(%rsp)
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ movdqa %xmm13,48(%rsp)
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ movdqa %xmm14,64(%rsp)
+ aesdec %xmm1,%xmm6
+ movdqa %xmm15,80(%rsp)
+ aesdec %xmm1,%xmm7
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ jmp L$xts_dec_loop6_enter
+
+.p2align 4
+L$xts_dec_loop6:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+L$xts_dec_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz L$xts_dec_loop6
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ paddq %xmm15,%xmm15
+ aesdec %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesdec %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdec %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ aesdec %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesdec %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdec %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ movups 32(%rcx),%xmm0
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ aesdec %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesdec %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdec %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ aesdeclast %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesdeclast %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdeclast %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesdeclast %xmm0,%xmm5
+ aesdeclast %xmm0,%xmm6
+ aesdeclast %xmm0,%xmm7
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ xorps 0(%rsp),%xmm2
+ pand %xmm8,%xmm9
+ xorps 16(%rsp),%xmm3
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+
+ xorps 32(%rsp),%xmm4
+ movups %xmm2,0(%rsi)
+ xorps 48(%rsp),%xmm5
+ movups %xmm3,16(%rsi)
+ xorps 64(%rsp),%xmm6
+ movups %xmm4,32(%rsi)
+ xorps 80(%rsp),%xmm7
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ subq $96,%rdx
+ jnc L$xts_dec_grandloop
+
+ leal 3(%rax,%rax,1),%eax
+ movq %r11,%rcx
+ movl %eax,%r10d
+
+L$xts_dec_short:
+ addq $96,%rdx
+ jz L$xts_dec_done
+
+ cmpq $32,%rdx
+ jb L$xts_dec_one
+ je L$xts_dec_two
+
+ cmpq $64,%rdx
+ jb L$xts_dec_three
+ je L$xts_dec_four
+
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ leaq 80(%rdi),%rdi
+ pxor %xmm12,%xmm4
+ pxor %xmm13,%xmm5
+ pxor %xmm14,%xmm6
+
+ call _aesni_decrypt6
+
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movdqu %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movdqu %xmm3,16(%rsi)
+ xorps %xmm14,%xmm6
+ movdqu %xmm4,32(%rsi)
+ pxor %xmm14,%xmm14
+ movdqu %xmm5,48(%rsi)
+ pcmpgtd %xmm15,%xmm14
+ movdqu %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ pshufd $19,%xmm14,%xmm11
+ andq $15,%r9
+ jz L$xts_dec_ret
+
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm11
+ pxor %xmm15,%xmm11
+ jmp L$xts_dec_done2
+
+.p2align 4
+L$xts_dec_one:
+ movups (%rdi),%xmm2
+ leaq 16(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+L$oop_dec1_12:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_dec1_12
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movdqa %xmm11,%xmm10
+ movups %xmm2,(%rsi)
+ movdqa %xmm12,%xmm11
+ leaq 16(%rsi),%rsi
+ jmp L$xts_dec_done
+
+.p2align 4
+L$xts_dec_two:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ leaq 32(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+
+ call _aesni_decrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm12,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm13,%xmm11
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ leaq 32(%rsi),%rsi
+ jmp L$xts_dec_done
+
+.p2align 4
+L$xts_dec_three:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ leaq 48(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+
+ call _aesni_decrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm13,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm15,%xmm11
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ leaq 48(%rsi),%rsi
+ jmp L$xts_dec_done
+
+.p2align 4
+L$xts_dec_four:
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movups (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movups 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movups 32(%rdi),%xmm4
+ xorps %xmm10,%xmm2
+ movups 48(%rdi),%xmm5
+ leaq 64(%rdi),%rdi
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ xorps %xmm13,%xmm5
+
+ call _aesni_decrypt4
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm14,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm15,%xmm11
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ jmp L$xts_dec_done
+
+.p2align 4
+L$xts_dec_done:
+ andq $15,%r9
+ jz L$xts_dec_ret
+L$xts_dec_done2:
+ movq %r9,%rdx
+ movq %r11,%rcx
+ movl %r10d,%eax
+
+ movups (%rdi),%xmm2
+ xorps %xmm11,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+L$oop_dec1_13:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_dec1_13
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm11,%xmm2
+ movups %xmm2,(%rsi)
+
+L$xts_dec_steal:
+ movzbl 16(%rdi),%eax
+ movzbl (%rsi),%ecx
+ leaq 1(%rdi),%rdi
+ movb %al,(%rsi)
+ movb %cl,16(%rsi)
+ leaq 1(%rsi),%rsi
+ subq $1,%rdx
+ jnz L$xts_dec_steal
+
+ subq %r9,%rsi
+ movq %r11,%rcx
+ movl %r10d,%eax
+
+ movups (%rsi),%xmm2
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+L$oop_dec1_14:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_dec1_14
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movups %xmm2,(%rsi)
+
+L$xts_dec_ret:
+ leaq 104(%rsp),%rsp
+L$xts_dec_epilogue:
+ retq
+
+.globl _aesni_cbc_encrypt
+
+.p2align 4
+_aesni_cbc_encrypt:
+ testq %rdx,%rdx
+ jz L$cbc_ret
+
+ movl 240(%rcx),%r10d
+ movq %rcx,%r11
+ testl %r9d,%r9d
+ jz L$cbc_decrypt
+
+ movups (%r8),%xmm2
+ movl %r10d,%eax
+ cmpq $16,%rdx
+ jb L$cbc_enc_tail
+ subq $16,%rdx
+ jmp L$cbc_enc_loop
+.p2align 4
+L$cbc_enc_loop:
+ movups (%rdi),%xmm3
+ leaq 16(%rdi),%rdi
+
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ xorps %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ xorps %xmm3,%xmm2
+L$oop_enc1_15:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_enc1_15
+ aesenclast %xmm1,%xmm2
+ movl %r10d,%eax
+ movq %r11,%rcx
+ movups %xmm2,0(%rsi)
+ leaq 16(%rsi),%rsi
+ subq $16,%rdx
+ jnc L$cbc_enc_loop
+ addq $16,%rdx
+ jnz L$cbc_enc_tail
+ movups %xmm2,(%r8)
+ jmp L$cbc_ret
+
+L$cbc_enc_tail:
+ movq %rdx,%rcx
+ xchgq %rdi,%rsi
+.long 0x9066A4F3
+ movl $16,%ecx
+ subq %rdx,%rcx
+ xorl %eax,%eax
+.long 0x9066AAF3
+ leaq -16(%rdi),%rdi
+ movl %r10d,%eax
+ movq %rdi,%rsi
+ movq %r11,%rcx
+ xorq %rdx,%rdx
+ jmp L$cbc_enc_loop
+
+.p2align 4
+L$cbc_decrypt:
+ movups (%r8),%xmm9
+ movl %r10d,%eax
+ cmpq $112,%rdx
+ jbe L$cbc_dec_tail
+ shrl $1,%r10d
+ subq $112,%rdx
+ movl %r10d,%eax
+ movaps %xmm9,-24(%rsp)
+ jmp L$cbc_dec_loop8_enter
+.p2align 4
+L$cbc_dec_loop8:
+ movaps %xmm0,-24(%rsp)
+ movups %xmm9,(%rsi)
+ leaq 16(%rsi),%rsi
+L$cbc_dec_loop8_enter:
+ movups (%rcx),%xmm0
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 16(%rcx),%xmm1
+
+ leaq 32(%rcx),%rcx
+ movdqu 32(%rdi),%xmm4
+ xorps %xmm0,%xmm2
+ movdqu 48(%rdi),%xmm5
+ xorps %xmm0,%xmm3
+ movdqu 64(%rdi),%xmm6
+ aesdec %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ movdqu 80(%rdi),%xmm7
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ movdqu 96(%rdi),%xmm8
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ movdqu 112(%rdi),%xmm9
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesdec %xmm1,%xmm6
+ pxor %xmm0,%xmm8
+ aesdec %xmm1,%xmm7
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+
+ call L$dec_loop8_enter
+
+ movups (%rdi),%xmm1
+ movups 16(%rdi),%xmm0
+ xorps -24(%rsp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%rdi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%rdi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%rdi),%xmm1
+ xorps %xmm0,%xmm6
+ movups 80(%rdi),%xmm0
+ xorps %xmm1,%xmm7
+ movups 96(%rdi),%xmm1
+ xorps %xmm0,%xmm8
+ movups 112(%rdi),%xmm0
+ xorps %xmm1,%xmm9
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movq %r11,%rcx
+ movups %xmm7,80(%rsi)
+ leaq 128(%rdi),%rdi
+ movups %xmm8,96(%rsi)
+ leaq 112(%rsi),%rsi
+ subq $128,%rdx
+ ja L$cbc_dec_loop8
+
+ movaps %xmm9,%xmm2
+ movaps %xmm0,%xmm9
+ addq $112,%rdx
+ jle L$cbc_dec_tail_collected
+ movups %xmm2,(%rsi)
+ leal 1(%r10,%r10,1),%eax
+ leaq 16(%rsi),%rsi
+L$cbc_dec_tail:
+ movups (%rdi),%xmm2
+ movaps %xmm2,%xmm8
+ cmpq $16,%rdx
+ jbe L$cbc_dec_one
+
+ movups 16(%rdi),%xmm3
+ movaps %xmm3,%xmm7
+ cmpq $32,%rdx
+ jbe L$cbc_dec_two
+
+ movups 32(%rdi),%xmm4
+ movaps %xmm4,%xmm6
+ cmpq $48,%rdx
+ jbe L$cbc_dec_three
+
+ movups 48(%rdi),%xmm5
+ cmpq $64,%rdx
+ jbe L$cbc_dec_four
+
+ movups 64(%rdi),%xmm6
+ cmpq $80,%rdx
+ jbe L$cbc_dec_five
+
+ movups 80(%rdi),%xmm7
+ cmpq $96,%rdx
+ jbe L$cbc_dec_six
+
+ movups 96(%rdi),%xmm8
+ movaps %xmm9,-24(%rsp)
+ call _aesni_decrypt8
+ movups (%rdi),%xmm1
+ movups 16(%rdi),%xmm0
+ xorps -24(%rsp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%rdi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%rdi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%rdi),%xmm1
+ xorps %xmm0,%xmm6
+ movups 80(%rdi),%xmm0
+ xorps %xmm1,%xmm7
+ movups 96(%rdi),%xmm9
+ xorps %xmm0,%xmm8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ movaps %xmm8,%xmm2
+ subq $112,%rdx
+ jmp L$cbc_dec_tail_collected
+.p2align 4
+L$cbc_dec_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+L$oop_dec1_16:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_dec1_16
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm9,%xmm2
+ movaps %xmm8,%xmm9
+ subq $16,%rdx
+ jmp L$cbc_dec_tail_collected
+.p2align 4
+L$cbc_dec_two:
+ xorps %xmm4,%xmm4
+ call _aesni_decrypt3
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ movaps %xmm7,%xmm9
+ movaps %xmm3,%xmm2
+ leaq 16(%rsi),%rsi
+ subq $32,%rdx
+ jmp L$cbc_dec_tail_collected
+.p2align 4
+L$cbc_dec_three:
+ call _aesni_decrypt3
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ xorps %xmm7,%xmm4
+ movups %xmm3,16(%rsi)
+ movaps %xmm6,%xmm9
+ movaps %xmm4,%xmm2
+ leaq 32(%rsi),%rsi
+ subq $48,%rdx
+ jmp L$cbc_dec_tail_collected
+.p2align 4
+L$cbc_dec_four:
+ call _aesni_decrypt4
+ xorps %xmm9,%xmm2
+ movups 48(%rdi),%xmm9
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ xorps %xmm7,%xmm4
+ movups %xmm3,16(%rsi)
+ xorps %xmm6,%xmm5
+ movups %xmm4,32(%rsi)
+ movaps %xmm5,%xmm2
+ leaq 48(%rsi),%rsi
+ subq $64,%rdx
+ jmp L$cbc_dec_tail_collected
+.p2align 4
+L$cbc_dec_five:
+ xorps %xmm7,%xmm7
+ call _aesni_decrypt6
+ movups 16(%rdi),%xmm1
+ movups 32(%rdi),%xmm0
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ xorps %xmm1,%xmm4
+ movups 48(%rdi),%xmm1
+ xorps %xmm0,%xmm5
+ movups 64(%rdi),%xmm9
+ xorps %xmm1,%xmm6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ movaps %xmm6,%xmm2
+ subq $80,%rdx
+ jmp L$cbc_dec_tail_collected
+.p2align 4
+L$cbc_dec_six:
+ call _aesni_decrypt6
+ movups 16(%rdi),%xmm1
+ movups 32(%rdi),%xmm0
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ xorps %xmm1,%xmm4
+ movups 48(%rdi),%xmm1
+ xorps %xmm0,%xmm5
+ movups 64(%rdi),%xmm0
+ xorps %xmm1,%xmm6
+ movups 80(%rdi),%xmm9
+ xorps %xmm0,%xmm7
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ movaps %xmm7,%xmm2
+ subq $96,%rdx
+ jmp L$cbc_dec_tail_collected
+.p2align 4
+L$cbc_dec_tail_collected:
+ andq $15,%rdx
+ movups %xmm9,(%r8)
+ jnz L$cbc_dec_tail_partial
+ movups %xmm2,(%rsi)
+ jmp L$cbc_dec_ret
+.p2align 4
+L$cbc_dec_tail_partial:
+ movaps %xmm2,-24(%rsp)
+ movq $16,%rcx
+ movq %rsi,%rdi
+ subq %rdx,%rcx
+ leaq -24(%rsp),%rsi
+.long 0x9066A4F3
+
+L$cbc_dec_ret:
+L$cbc_ret:
+ retq
+
+.globl _aesni_set_decrypt_key
+
+.p2align 4
+_aesni_set_decrypt_key:
+ subq $8,%rsp
+ call __aesni_set_encrypt_key
+ shll $4,%esi
+ testl %eax,%eax
+ jnz L$dec_key_ret
+ leaq 16(%rdx,%rsi,1),%rdi
+
+ movups (%rdx),%xmm0
+ movups (%rdi),%xmm1
+ movups %xmm0,(%rdi)
+ movups %xmm1,(%rdx)
+ leaq 16(%rdx),%rdx
+ leaq -16(%rdi),%rdi
+
+L$dec_key_inverse:
+ movups (%rdx),%xmm0
+ movups (%rdi),%xmm1
+ aesimc %xmm0,%xmm0
+ aesimc %xmm1,%xmm1
+ leaq 16(%rdx),%rdx
+ leaq -16(%rdi),%rdi
+ movups %xmm0,16(%rdi)
+ movups %xmm1,-16(%rdx)
+ cmpq %rdx,%rdi
+ ja L$dec_key_inverse
+
+ movups (%rdx),%xmm0
+ aesimc %xmm0,%xmm0
+ movups %xmm0,(%rdi)
+L$dec_key_ret:
+ addq $8,%rsp
+ retq
+L$SEH_end_set_decrypt_key:
+
+.globl _aesni_set_encrypt_key
+
+.p2align 4
+_aesni_set_encrypt_key:
+__aesni_set_encrypt_key:
+ subq $8,%rsp
+ movq $-1,%rax
+ testq %rdi,%rdi
+ jz L$enc_key_ret
+ testq %rdx,%rdx
+ jz L$enc_key_ret
+
+ movups (%rdi),%xmm0
+ xorps %xmm4,%xmm4
+ leaq 16(%rdx),%rax
+ cmpl $256,%esi
+ je L$14rounds
+ cmpl $192,%esi
+ je L$12rounds
+ cmpl $128,%esi
+ jne L$bad_keybits
+
+L$10rounds:
+ movl $9,%esi
+ movups %xmm0,(%rdx)
+ aeskeygenassist $1,%xmm0,%xmm1
+ call L$key_expansion_128_cold
+ aeskeygenassist $2,%xmm0,%xmm1
+ call L$key_expansion_128
+ aeskeygenassist $4,%xmm0,%xmm1
+ call L$key_expansion_128
+ aeskeygenassist $8,%xmm0,%xmm1
+ call L$key_expansion_128
+ aeskeygenassist $16,%xmm0,%xmm1
+ call L$key_expansion_128
+ aeskeygenassist $32,%xmm0,%xmm1
+ call L$key_expansion_128
+ aeskeygenassist $64,%xmm0,%xmm1
+ call L$key_expansion_128
+ aeskeygenassist $128,%xmm0,%xmm1
+ call L$key_expansion_128
+ aeskeygenassist $27,%xmm0,%xmm1
+ call L$key_expansion_128
+ aeskeygenassist $54,%xmm0,%xmm1
+ call L$key_expansion_128
+ movups %xmm0,(%rax)
+ movl %esi,80(%rax)
+ xorl %eax,%eax
+ jmp L$enc_key_ret
+
+.p2align 4
+L$12rounds:
+ movq 16(%rdi),%xmm2
+ movl $11,%esi
+ movups %xmm0,(%rdx)
+ aeskeygenassist $1,%xmm2,%xmm1
+ call L$key_expansion_192a_cold
+ aeskeygenassist $2,%xmm2,%xmm1
+ call L$key_expansion_192b
+ aeskeygenassist $4,%xmm2,%xmm1
+ call L$key_expansion_192a
+ aeskeygenassist $8,%xmm2,%xmm1
+ call L$key_expansion_192b
+ aeskeygenassist $16,%xmm2,%xmm1
+ call L$key_expansion_192a
+ aeskeygenassist $32,%xmm2,%xmm1
+ call L$key_expansion_192b
+ aeskeygenassist $64,%xmm2,%xmm1
+ call L$key_expansion_192a
+ aeskeygenassist $128,%xmm2,%xmm1
+ call L$key_expansion_192b
+ movups %xmm0,(%rax)
+ movl %esi,48(%rax)
+ xorq %rax,%rax
+ jmp L$enc_key_ret
+
+.p2align 4
+L$14rounds:
+ movups 16(%rdi),%xmm2
+ movl $13,%esi
+ leaq 16(%rax),%rax
+ movups %xmm0,(%rdx)
+ movups %xmm2,16(%rdx)
+ aeskeygenassist $1,%xmm2,%xmm1
+ call L$key_expansion_256a_cold
+ aeskeygenassist $1,%xmm0,%xmm1
+ call L$key_expansion_256b
+ aeskeygenassist $2,%xmm2,%xmm1
+ call L$key_expansion_256a
+ aeskeygenassist $2,%xmm0,%xmm1
+ call L$key_expansion_256b
+ aeskeygenassist $4,%xmm2,%xmm1
+ call L$key_expansion_256a
+ aeskeygenassist $4,%xmm0,%xmm1
+ call L$key_expansion_256b
+ aeskeygenassist $8,%xmm2,%xmm1
+ call L$key_expansion_256a
+ aeskeygenassist $8,%xmm0,%xmm1
+ call L$key_expansion_256b
+ aeskeygenassist $16,%xmm2,%xmm1
+ call L$key_expansion_256a
+ aeskeygenassist $16,%xmm0,%xmm1
+ call L$key_expansion_256b
+ aeskeygenassist $32,%xmm2,%xmm1
+ call L$key_expansion_256a
+ aeskeygenassist $32,%xmm0,%xmm1
+ call L$key_expansion_256b
+ aeskeygenassist $64,%xmm2,%xmm1
+ call L$key_expansion_256a
+ movups %xmm0,(%rax)
+ movl %esi,16(%rax)
+ xorq %rax,%rax
+ jmp L$enc_key_ret
+
+.p2align 4
+L$bad_keybits:
+ movq $-2,%rax
+L$enc_key_ret:
+ addq $8,%rsp
+ retq
+L$SEH_end_set_encrypt_key:
+
+.p2align 4
+L$key_expansion_128:
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+L$key_expansion_128_cold:
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ retq
+
+.p2align 4
+L$key_expansion_192a:
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+L$key_expansion_192a_cold:
+ movaps %xmm2,%xmm5
+L$key_expansion_192b_warm:
+ shufps $16,%xmm0,%xmm4
+ movdqa %xmm2,%xmm3
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ pslldq $4,%xmm3
+ xorps %xmm4,%xmm0
+ pshufd $85,%xmm1,%xmm1
+ pxor %xmm3,%xmm2
+ pxor %xmm1,%xmm0
+ pshufd $255,%xmm0,%xmm3
+ pxor %xmm3,%xmm2
+ retq
+
+.p2align 4
+L$key_expansion_192b:
+ movaps %xmm0,%xmm3
+ shufps $68,%xmm0,%xmm5
+ movups %xmm5,(%rax)
+ shufps $78,%xmm2,%xmm3
+ movups %xmm3,16(%rax)
+ leaq 32(%rax),%rax
+ jmp L$key_expansion_192b_warm
+
+.p2align 4
+L$key_expansion_256a:
+ movups %xmm2,(%rax)
+ leaq 16(%rax),%rax
+L$key_expansion_256a_cold:
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ retq
+
+.p2align 4
+L$key_expansion_256b:
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+
+ shufps $16,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $140,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $170,%xmm1,%xmm1
+ xorps %xmm1,%xmm2
+ retq
+
+
+.p2align 6
+L$bswap_mask:
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+L$increment32:
+.long 6,6,6,0
+L$increment64:
+.long 1,0,0,0
+L$xts_magic:
+.long 0x87,0,1,0
+
+.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align 6
diff --git a/ext/libressl/crypto/aes/aesni-masm-x86_64.S b/ext/libressl/crypto/aes/aesni-masm-x86_64.S
new file mode 100644
index 0000000..f2a2490
--- /dev/null
+++ b/ext/libressl/crypto/aes/aesni-masm-x86_64.S
@@ -0,0 +1,3099 @@
+; 1 "crypto/aes/aesni-masm-x86_64.S.tmp"
+; 1 "<built-in>" 1
+; 1 "<built-in>" 3
+; 340 "<built-in>" 3
+; 1 "<command line>" 1
+; 1 "<built-in>" 2
+; 1 "crypto/aes/aesni-masm-x86_64.S.tmp" 2
+OPTION DOTNAME
+
+; 1 "./crypto/x86_arch.h" 1
+
+
+; 16 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+; 40 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+; 3 "crypto/aes/aesni-masm-x86_64.S.tmp" 2
+.text$ SEGMENT ALIGN(64) 'CODE'
+PUBLIC aesni_encrypt
+
+ALIGN 16
+aesni_encrypt PROC PUBLIC
+ movups xmm2,XMMWORD PTR[rcx]
+ mov eax,DWORD PTR[240+r8]
+ movups xmm0,XMMWORD PTR[r8]
+ movups xmm1,XMMWORD PTR[16+r8]
+ lea r8,QWORD PTR[32+r8]
+ xorps xmm2,xmm0
+$L$oop_enc1_1::
+ aesenc xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[r8]
+ lea r8,QWORD PTR[16+r8]
+ jnz $L$oop_enc1_1
+ aesenclast xmm2,xmm1
+ movups XMMWORD PTR[rdx],xmm2
+ DB 0F3h,0C3h ;repret
+aesni_encrypt ENDP
+
+PUBLIC aesni_decrypt
+
+ALIGN 16
+aesni_decrypt PROC PUBLIC
+ movups xmm2,XMMWORD PTR[rcx]
+ mov eax,DWORD PTR[240+r8]
+ movups xmm0,XMMWORD PTR[r8]
+ movups xmm1,XMMWORD PTR[16+r8]
+ lea r8,QWORD PTR[32+r8]
+ xorps xmm2,xmm0
+$L$oop_dec1_2::
+ aesdec xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[r8]
+ lea r8,QWORD PTR[16+r8]
+ jnz $L$oop_dec1_2
+ aesdeclast xmm2,xmm1
+ movups XMMWORD PTR[rdx],xmm2
+ DB 0F3h,0C3h ;repret
+aesni_decrypt ENDP
+
+ALIGN 16
+_aesni_encrypt3 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shr eax,1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ xorps xmm4,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+
+$L$enc_loop3::
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ dec eax
+ aesenc xmm4,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesenc xmm2,xmm0
+ aesenc xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesenc xmm4,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$enc_loop3
+
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ aesenc xmm4,xmm1
+ aesenclast xmm2,xmm0
+ aesenclast xmm3,xmm0
+ aesenclast xmm4,xmm0
+ DB 0F3h,0C3h ;repret
+_aesni_encrypt3 ENDP
+
+ALIGN 16
+_aesni_decrypt3 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shr eax,1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ xorps xmm4,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+
+$L$dec_loop3::
+ aesdec xmm2,xmm1
+ aesdec xmm3,xmm1
+ dec eax
+ aesdec xmm4,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesdec xmm2,xmm0
+ aesdec xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesdec xmm4,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$dec_loop3
+
+ aesdec xmm2,xmm1
+ aesdec xmm3,xmm1
+ aesdec xmm4,xmm1
+ aesdeclast xmm2,xmm0
+ aesdeclast xmm3,xmm0
+ aesdeclast xmm4,xmm0
+ DB 0F3h,0C3h ;repret
+_aesni_decrypt3 ENDP
+
+ALIGN 16
+_aesni_encrypt4 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shr eax,1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ xorps xmm4,xmm0
+ xorps xmm5,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+
+$L$enc_loop4::
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ dec eax
+ aesenc xmm4,xmm1
+ aesenc xmm5,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesenc xmm2,xmm0
+ aesenc xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesenc xmm4,xmm0
+ aesenc xmm5,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$enc_loop4
+
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ aesenc xmm4,xmm1
+ aesenc xmm5,xmm1
+ aesenclast xmm2,xmm0
+ aesenclast xmm3,xmm0
+ aesenclast xmm4,xmm0
+ aesenclast xmm5,xmm0
+ DB 0F3h,0C3h ;repret
+_aesni_encrypt4 ENDP
+
+ALIGN 16
+_aesni_decrypt4 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shr eax,1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ xorps xmm4,xmm0
+ xorps xmm5,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+
+$L$dec_loop4::
+ aesdec xmm2,xmm1
+ aesdec xmm3,xmm1
+ dec eax
+ aesdec xmm4,xmm1
+ aesdec xmm5,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesdec xmm2,xmm0
+ aesdec xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesdec xmm4,xmm0
+ aesdec xmm5,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$dec_loop4
+
+ aesdec xmm2,xmm1
+ aesdec xmm3,xmm1
+ aesdec xmm4,xmm1
+ aesdec xmm5,xmm1
+ aesdeclast xmm2,xmm0
+ aesdeclast xmm3,xmm0
+ aesdeclast xmm4,xmm0
+ aesdeclast xmm5,xmm0
+ DB 0F3h,0C3h ;repret
+_aesni_decrypt4 ENDP
+
+ALIGN 16
+_aesni_encrypt6 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shr eax,1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ pxor xmm3,xmm0
+ aesenc xmm2,xmm1
+ pxor xmm4,xmm0
+ aesenc xmm3,xmm1
+ pxor xmm5,xmm0
+ aesenc xmm4,xmm1
+ pxor xmm6,xmm0
+ aesenc xmm5,xmm1
+ pxor xmm7,xmm0
+ dec eax
+ aesenc xmm6,xmm1
+ movups xmm0,XMMWORD PTR[rcx]
+ aesenc xmm7,xmm1
+ jmp $L$enc_loop6_enter
+ALIGN 16
+$L$enc_loop6::
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ dec eax
+ aesenc xmm4,xmm1
+ aesenc xmm5,xmm1
+ aesenc xmm6,xmm1
+ aesenc xmm7,xmm1
+$L$enc_loop6_enter::
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesenc xmm2,xmm0
+ aesenc xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesenc xmm4,xmm0
+ aesenc xmm5,xmm0
+ aesenc xmm6,xmm0
+ aesenc xmm7,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$enc_loop6
+
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ aesenc xmm4,xmm1
+ aesenc xmm5,xmm1
+ aesenc xmm6,xmm1
+ aesenc xmm7,xmm1
+ aesenclast xmm2,xmm0
+ aesenclast xmm3,xmm0
+ aesenclast xmm4,xmm0
+ aesenclast xmm5,xmm0
+ aesenclast xmm6,xmm0
+ aesenclast xmm7,xmm0
+ DB 0F3h,0C3h ;repret
+_aesni_encrypt6 ENDP
+
+ALIGN 16
+_aesni_decrypt6 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shr eax,1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ pxor xmm3,xmm0
+ aesdec xmm2,xmm1
+ pxor xmm4,xmm0
+ aesdec xmm3,xmm1
+ pxor xmm5,xmm0
+ aesdec xmm4,xmm1
+ pxor xmm6,xmm0
+ aesdec xmm5,xmm1
+ pxor xmm7,xmm0
+ dec eax
+ aesdec xmm6,xmm1
+ movups xmm0,XMMWORD PTR[rcx]
+ aesdec xmm7,xmm1
+ jmp $L$dec_loop6_enter
+ALIGN 16
+$L$dec_loop6::
+ aesdec xmm2,xmm1
+ aesdec xmm3,xmm1
+ dec eax
+ aesdec xmm4,xmm1
+ aesdec xmm5,xmm1
+ aesdec xmm6,xmm1
+ aesdec xmm7,xmm1
+$L$dec_loop6_enter::
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesdec xmm2,xmm0
+ aesdec xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesdec xmm4,xmm0
+ aesdec xmm5,xmm0
+ aesdec xmm6,xmm0
+ aesdec xmm7,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$dec_loop6
+
+ aesdec xmm2,xmm1
+ aesdec xmm3,xmm1
+ aesdec xmm4,xmm1
+ aesdec xmm5,xmm1
+ aesdec xmm6,xmm1
+ aesdec xmm7,xmm1
+ aesdeclast xmm2,xmm0
+ aesdeclast xmm3,xmm0
+ aesdeclast xmm4,xmm0
+ aesdeclast xmm5,xmm0
+ aesdeclast xmm6,xmm0
+ aesdeclast xmm7,xmm0
+ DB 0F3h,0C3h ;repret
+_aesni_decrypt6 ENDP
+
+ALIGN 16
+_aesni_encrypt8 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shr eax,1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ aesenc xmm2,xmm1
+ pxor xmm4,xmm0
+ aesenc xmm3,xmm1
+ pxor xmm5,xmm0
+ aesenc xmm4,xmm1
+ pxor xmm6,xmm0
+ aesenc xmm5,xmm1
+ pxor xmm7,xmm0
+ dec eax
+ aesenc xmm6,xmm1
+ pxor xmm8,xmm0
+ aesenc xmm7,xmm1
+ pxor xmm9,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ aesenc xmm8,xmm1
+ aesenc xmm9,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ jmp $L$enc_loop8_enter
+ALIGN 16
+$L$enc_loop8::
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ dec eax
+ aesenc xmm4,xmm1
+ aesenc xmm5,xmm1
+ aesenc xmm6,xmm1
+ aesenc xmm7,xmm1
+ aesenc xmm8,xmm1
+ aesenc xmm9,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+$L$enc_loop8_enter::
+ aesenc xmm2,xmm0
+ aesenc xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesenc xmm4,xmm0
+ aesenc xmm5,xmm0
+ aesenc xmm6,xmm0
+ aesenc xmm7,xmm0
+ aesenc xmm8,xmm0
+ aesenc xmm9,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$enc_loop8
+
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ aesenc xmm4,xmm1
+ aesenc xmm5,xmm1
+ aesenc xmm6,xmm1
+ aesenc xmm7,xmm1
+ aesenc xmm8,xmm1
+ aesenc xmm9,xmm1
+ aesenclast xmm2,xmm0
+ aesenclast xmm3,xmm0
+ aesenclast xmm4,xmm0
+ aesenclast xmm5,xmm0
+ aesenclast xmm6,xmm0
+ aesenclast xmm7,xmm0
+ aesenclast xmm8,xmm0
+ aesenclast xmm9,xmm0
+ DB 0F3h,0C3h ;repret
+_aesni_encrypt8 ENDP
+
+ALIGN 16
+_aesni_decrypt8 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shr eax,1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ aesdec xmm2,xmm1
+ pxor xmm4,xmm0
+ aesdec xmm3,xmm1
+ pxor xmm5,xmm0
+ aesdec xmm4,xmm1
+ pxor xmm6,xmm0
+ aesdec xmm5,xmm1
+ pxor xmm7,xmm0
+ dec eax
+ aesdec xmm6,xmm1
+ pxor xmm8,xmm0
+ aesdec xmm7,xmm1
+ pxor xmm9,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ aesdec xmm8,xmm1
+ aesdec xmm9,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ jmp $L$dec_loop8_enter
+ALIGN 16
+$L$dec_loop8::
+ aesdec xmm2,xmm1
+ aesdec xmm3,xmm1
+ dec eax
+ aesdec xmm4,xmm1
+ aesdec xmm5,xmm1
+ aesdec xmm6,xmm1
+ aesdec xmm7,xmm1
+ aesdec xmm8,xmm1
+ aesdec xmm9,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+$L$dec_loop8_enter::
+ aesdec xmm2,xmm0
+ aesdec xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesdec xmm4,xmm0
+ aesdec xmm5,xmm0
+ aesdec xmm6,xmm0
+ aesdec xmm7,xmm0
+ aesdec xmm8,xmm0
+ aesdec xmm9,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$dec_loop8
+
+ aesdec xmm2,xmm1
+ aesdec xmm3,xmm1
+ aesdec xmm4,xmm1
+ aesdec xmm5,xmm1
+ aesdec xmm6,xmm1
+ aesdec xmm7,xmm1
+ aesdec xmm8,xmm1
+ aesdec xmm9,xmm1
+ aesdeclast xmm2,xmm0
+ aesdeclast xmm3,xmm0
+ aesdeclast xmm4,xmm0
+ aesdeclast xmm5,xmm0
+ aesdeclast xmm6,xmm0
+ aesdeclast xmm7,xmm0
+ aesdeclast xmm8,xmm0
+ aesdeclast xmm9,xmm0
+ DB 0F3h,0C3h ;repret
+_aesni_decrypt8 ENDP
+PUBLIC aesni_ecb_encrypt
+
+ALIGN 16
+aesni_ecb_encrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_ecb_encrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+
+
+ and rdx,-16
+ jz $L$ecb_ret
+
+ mov eax,DWORD PTR[240+rcx]
+ movups xmm0,XMMWORD PTR[rcx]
+ mov r11,rcx
+ mov r10d,eax
+ test r8d,r8d
+ jz $L$ecb_decrypt
+
+ cmp rdx,080h
+ jb $L$ecb_enc_tail
+
+ movdqu xmm2,XMMWORD PTR[rdi]
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ movdqu xmm8,XMMWORD PTR[96+rdi]
+ movdqu xmm9,XMMWORD PTR[112+rdi]
+ lea rdi,QWORD PTR[128+rdi]
+ sub rdx,080h
+ jmp $L$ecb_enc_loop8_enter
+ALIGN 16
+$L$ecb_enc_loop8::
+ movups XMMWORD PTR[rsi],xmm2
+ mov rcx,r11
+ movdqu xmm2,XMMWORD PTR[rdi]
+ mov eax,r10d
+ movups XMMWORD PTR[16+rsi],xmm3
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ movups XMMWORD PTR[32+rsi],xmm4
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ movups XMMWORD PTR[48+rsi],xmm5
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ movups XMMWORD PTR[64+rsi],xmm6
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ movups XMMWORD PTR[80+rsi],xmm7
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ movups XMMWORD PTR[96+rsi],xmm8
+ movdqu xmm8,XMMWORD PTR[96+rdi]
+ movups XMMWORD PTR[112+rsi],xmm9
+ lea rsi,QWORD PTR[128+rsi]
+ movdqu xmm9,XMMWORD PTR[112+rdi]
+ lea rdi,QWORD PTR[128+rdi]
+$L$ecb_enc_loop8_enter::
+
+ call _aesni_encrypt8
+
+ sub rdx,080h
+ jnc $L$ecb_enc_loop8
+
+ movups XMMWORD PTR[rsi],xmm2
+ mov rcx,r11
+ movups XMMWORD PTR[16+rsi],xmm3
+ mov eax,r10d
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ movups XMMWORD PTR[96+rsi],xmm8
+ movups XMMWORD PTR[112+rsi],xmm9
+ lea rsi,QWORD PTR[128+rsi]
+ add rdx,080h
+ jz $L$ecb_ret
+
+$L$ecb_enc_tail::
+ movups xmm2,XMMWORD PTR[rdi]
+ cmp rdx,020h
+ jb $L$ecb_enc_one
+ movups xmm3,XMMWORD PTR[16+rdi]
+ je $L$ecb_enc_two
+ movups xmm4,XMMWORD PTR[32+rdi]
+ cmp rdx,040h
+ jb $L$ecb_enc_three
+ movups xmm5,XMMWORD PTR[48+rdi]
+ je $L$ecb_enc_four
+ movups xmm6,XMMWORD PTR[64+rdi]
+ cmp rdx,060h
+ jb $L$ecb_enc_five
+ movups xmm7,XMMWORD PTR[80+rdi]
+ je $L$ecb_enc_six
+ movdqu xmm8,XMMWORD PTR[96+rdi]
+ call _aesni_encrypt8
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ movups XMMWORD PTR[96+rsi],xmm8
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_enc_one::
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_enc1_3::
+ aesenc xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_enc1_3
+ aesenclast xmm2,xmm1
+ movups XMMWORD PTR[rsi],xmm2
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_enc_two::
+ xorps xmm4,xmm4
+ call _aesni_encrypt3
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_enc_three::
+ call _aesni_encrypt3
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_enc_four::
+ call _aesni_encrypt4
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_enc_five::
+ xorps xmm7,xmm7
+ call _aesni_encrypt6
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_enc_six::
+ call _aesni_encrypt6
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ jmp $L$ecb_ret
+
+ALIGN 16
+$L$ecb_decrypt::
+ cmp rdx,080h
+ jb $L$ecb_dec_tail
+
+ movdqu xmm2,XMMWORD PTR[rdi]
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ movdqu xmm8,XMMWORD PTR[96+rdi]
+ movdqu xmm9,XMMWORD PTR[112+rdi]
+ lea rdi,QWORD PTR[128+rdi]
+ sub rdx,080h
+ jmp $L$ecb_dec_loop8_enter
+ALIGN 16
+$L$ecb_dec_loop8::
+ movups XMMWORD PTR[rsi],xmm2
+ mov rcx,r11
+ movdqu xmm2,XMMWORD PTR[rdi]
+ mov eax,r10d
+ movups XMMWORD PTR[16+rsi],xmm3
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ movups XMMWORD PTR[32+rsi],xmm4
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ movups XMMWORD PTR[48+rsi],xmm5
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ movups XMMWORD PTR[64+rsi],xmm6
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ movups XMMWORD PTR[80+rsi],xmm7
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ movups XMMWORD PTR[96+rsi],xmm8
+ movdqu xmm8,XMMWORD PTR[96+rdi]
+ movups XMMWORD PTR[112+rsi],xmm9
+ lea rsi,QWORD PTR[128+rsi]
+ movdqu xmm9,XMMWORD PTR[112+rdi]
+ lea rdi,QWORD PTR[128+rdi]
+$L$ecb_dec_loop8_enter::
+
+ call _aesni_decrypt8
+
+ movups xmm0,XMMWORD PTR[r11]
+ sub rdx,080h
+ jnc $L$ecb_dec_loop8
+
+ movups XMMWORD PTR[rsi],xmm2
+ mov rcx,r11
+ movups XMMWORD PTR[16+rsi],xmm3
+ mov eax,r10d
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ movups XMMWORD PTR[96+rsi],xmm8
+ movups XMMWORD PTR[112+rsi],xmm9
+ lea rsi,QWORD PTR[128+rsi]
+ add rdx,080h
+ jz $L$ecb_ret
+
+$L$ecb_dec_tail::
+ movups xmm2,XMMWORD PTR[rdi]
+ cmp rdx,020h
+ jb $L$ecb_dec_one
+ movups xmm3,XMMWORD PTR[16+rdi]
+ je $L$ecb_dec_two
+ movups xmm4,XMMWORD PTR[32+rdi]
+ cmp rdx,040h
+ jb $L$ecb_dec_three
+ movups xmm5,XMMWORD PTR[48+rdi]
+ je $L$ecb_dec_four
+ movups xmm6,XMMWORD PTR[64+rdi]
+ cmp rdx,060h
+ jb $L$ecb_dec_five
+ movups xmm7,XMMWORD PTR[80+rdi]
+ je $L$ecb_dec_six
+ movups xmm8,XMMWORD PTR[96+rdi]
+ movups xmm0,XMMWORD PTR[rcx]
+ call _aesni_decrypt8
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ movups XMMWORD PTR[96+rsi],xmm8
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_dec_one::
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_dec1_4::
+ aesdec xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_dec1_4
+ aesdeclast xmm2,xmm1
+ movups XMMWORD PTR[rsi],xmm2
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_dec_two::
+ xorps xmm4,xmm4
+ call _aesni_decrypt3
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_dec_three::
+ call _aesni_decrypt3
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_dec_four::
+ call _aesni_decrypt4
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_dec_five::
+ xorps xmm7,xmm7
+ call _aesni_decrypt6
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_dec_six::
+ call _aesni_decrypt6
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+
+$L$ecb_ret::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_ecb_encrypt::
+aesni_ecb_encrypt ENDP
+PUBLIC aesni_ccm64_encrypt_blocks
+
+ALIGN 16
+aesni_ccm64_encrypt_blocks PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_ccm64_encrypt_blocks::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ lea rsp,QWORD PTR[((-88))+rsp]
+ movaps XMMWORD PTR[rsp],xmm6
+ movaps XMMWORD PTR[16+rsp],xmm7
+ movaps XMMWORD PTR[32+rsp],xmm8
+ movaps XMMWORD PTR[48+rsp],xmm9
+$L$ccm64_enc_body::
+ mov eax,DWORD PTR[240+rcx]
+ movdqu xmm9,XMMWORD PTR[r8]
+ movdqa xmm6,XMMWORD PTR[$L$increment64]
+ movdqa xmm7,XMMWORD PTR[$L$bswap_mask]
+
+ shr eax,1
+ lea r11,QWORD PTR[rcx]
+ movdqu xmm3,XMMWORD PTR[r9]
+ movdqa xmm2,xmm9
+ mov r10d,eax
+DB 102,68,15,56,0,207
+ jmp $L$ccm64_enc_outer
+ALIGN 16
+$L$ccm64_enc_outer::
+ movups xmm0,XMMWORD PTR[r11]
+ mov eax,r10d
+ movups xmm8,XMMWORD PTR[rdi]
+
+ xorps xmm2,xmm0
+ movups xmm1,XMMWORD PTR[16+r11]
+ xorps xmm0,xmm8
+ lea rcx,QWORD PTR[32+r11]
+ xorps xmm3,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+
+$L$ccm64_enc2_loop::
+ aesenc xmm2,xmm1
+ dec eax
+ aesenc xmm3,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesenc xmm2,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesenc xmm3,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$ccm64_enc2_loop
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ paddq xmm9,xmm6
+ aesenclast xmm2,xmm0
+ aesenclast xmm3,xmm0
+
+ dec rdx
+ lea rdi,QWORD PTR[16+rdi]
+ xorps xmm8,xmm2
+ movdqa xmm2,xmm9
+ movups XMMWORD PTR[rsi],xmm8
+ lea rsi,QWORD PTR[16+rsi]
+DB 102,15,56,0,215
+ jnz $L$ccm64_enc_outer
+
+ movups XMMWORD PTR[r9],xmm3
+ movaps xmm6,XMMWORD PTR[rsp]
+ movaps xmm7,XMMWORD PTR[16+rsp]
+ movaps xmm8,XMMWORD PTR[32+rsp]
+ movaps xmm9,XMMWORD PTR[48+rsp]
+ lea rsp,QWORD PTR[88+rsp]
+$L$ccm64_enc_ret::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_ccm64_encrypt_blocks::
+aesni_ccm64_encrypt_blocks ENDP
+PUBLIC aesni_ccm64_decrypt_blocks
+
+ALIGN 16
+aesni_ccm64_decrypt_blocks PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_ccm64_decrypt_blocks::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ lea rsp,QWORD PTR[((-88))+rsp]
+ movaps XMMWORD PTR[rsp],xmm6
+ movaps XMMWORD PTR[16+rsp],xmm7
+ movaps XMMWORD PTR[32+rsp],xmm8
+ movaps XMMWORD PTR[48+rsp],xmm9
+$L$ccm64_dec_body::
+ mov eax,DWORD PTR[240+rcx]
+ movups xmm9,XMMWORD PTR[r8]
+ movdqu xmm3,XMMWORD PTR[r9]
+ movdqa xmm6,XMMWORD PTR[$L$increment64]
+ movdqa xmm7,XMMWORD PTR[$L$bswap_mask]
+
+ movaps xmm2,xmm9
+ mov r10d,eax
+ mov r11,rcx
+DB 102,68,15,56,0,207
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_enc1_5::
+ aesenc xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_enc1_5
+ aesenclast xmm2,xmm1
+ movups xmm8,XMMWORD PTR[rdi]
+ paddq xmm9,xmm6
+ lea rdi,QWORD PTR[16+rdi]
+ jmp $L$ccm64_dec_outer
+ALIGN 16
+$L$ccm64_dec_outer::
+ xorps xmm8,xmm2
+ movdqa xmm2,xmm9
+ mov eax,r10d
+ movups XMMWORD PTR[rsi],xmm8
+ lea rsi,QWORD PTR[16+rsi]
+DB 102,15,56,0,215
+
+ sub rdx,1
+ jz $L$ccm64_dec_break
+
+ movups xmm0,XMMWORD PTR[r11]
+ shr eax,1
+ movups xmm1,XMMWORD PTR[16+r11]
+ xorps xmm8,xmm0
+ lea rcx,QWORD PTR[32+r11]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm8
+ movups xmm0,XMMWORD PTR[rcx]
+
+$L$ccm64_dec2_loop::
+ aesenc xmm2,xmm1
+ dec eax
+ aesenc xmm3,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesenc xmm2,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesenc xmm3,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$ccm64_dec2_loop
+ movups xmm8,XMMWORD PTR[rdi]
+ paddq xmm9,xmm6
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ lea rdi,QWORD PTR[16+rdi]
+ aesenclast xmm2,xmm0
+ aesenclast xmm3,xmm0
+ jmp $L$ccm64_dec_outer
+
+ALIGN 16
+$L$ccm64_dec_break::
+
+ movups xmm0,XMMWORD PTR[r11]
+ movups xmm1,XMMWORD PTR[16+r11]
+ xorps xmm8,xmm0
+ lea r11,QWORD PTR[32+r11]
+ xorps xmm3,xmm8
+$L$oop_enc1_6::
+ aesenc xmm3,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[r11]
+ lea r11,QWORD PTR[16+r11]
+ jnz $L$oop_enc1_6
+ aesenclast xmm3,xmm1
+ movups XMMWORD PTR[r9],xmm3
+ movaps xmm6,XMMWORD PTR[rsp]
+ movaps xmm7,XMMWORD PTR[16+rsp]
+ movaps xmm8,XMMWORD PTR[32+rsp]
+ movaps xmm9,XMMWORD PTR[48+rsp]
+ lea rsp,QWORD PTR[88+rsp]
+$L$ccm64_dec_ret::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_ccm64_decrypt_blocks::
+aesni_ccm64_decrypt_blocks ENDP
+PUBLIC aesni_ctr32_encrypt_blocks
+
+ALIGN 16
+aesni_ctr32_encrypt_blocks PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_ctr32_encrypt_blocks::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+
+
+ lea rsp,QWORD PTR[((-200))+rsp]
+ movaps XMMWORD PTR[32+rsp],xmm6
+ movaps XMMWORD PTR[48+rsp],xmm7
+ movaps XMMWORD PTR[64+rsp],xmm8
+ movaps XMMWORD PTR[80+rsp],xmm9
+ movaps XMMWORD PTR[96+rsp],xmm10
+ movaps XMMWORD PTR[112+rsp],xmm11
+ movaps XMMWORD PTR[128+rsp],xmm12
+ movaps XMMWORD PTR[144+rsp],xmm13
+ movaps XMMWORD PTR[160+rsp],xmm14
+ movaps XMMWORD PTR[176+rsp],xmm15
+$L$ctr32_body::
+ cmp rdx,1
+ je $L$ctr32_one_shortcut
+
+ movdqu xmm14,XMMWORD PTR[r8]
+ movdqa xmm15,XMMWORD PTR[$L$bswap_mask]
+ xor eax,eax
+DB 102,69,15,58,22,242,3
+DB 102,68,15,58,34,240,3
+
+ mov eax,DWORD PTR[240+rcx]
+ bswap r10d
+ pxor xmm12,xmm12
+ pxor xmm13,xmm13
+DB 102,69,15,58,34,226,0
+ lea r11,QWORD PTR[3+r10]
+DB 102,69,15,58,34,235,0
+ inc r10d
+DB 102,69,15,58,34,226,1
+ inc r11
+DB 102,69,15,58,34,235,1
+ inc r10d
+DB 102,69,15,58,34,226,2
+ inc r11
+DB 102,69,15,58,34,235,2
+ movdqa XMMWORD PTR[rsp],xmm12
+DB 102,69,15,56,0,231
+ movdqa XMMWORD PTR[16+rsp],xmm13
+DB 102,69,15,56,0,239
+
+ pshufd xmm2,xmm12,192
+ pshufd xmm3,xmm12,128
+ pshufd xmm4,xmm12,64
+ cmp rdx,6
+ jb $L$ctr32_tail
+ shr eax,1
+ mov r11,rcx
+ mov r10d,eax
+ sub rdx,6
+ jmp $L$ctr32_loop6
+
+ALIGN 16
+$L$ctr32_loop6::
+ pshufd xmm5,xmm13,192
+ por xmm2,xmm14
+ movups xmm0,XMMWORD PTR[r11]
+ pshufd xmm6,xmm13,128
+ por xmm3,xmm14
+ movups xmm1,XMMWORD PTR[16+r11]
+ pshufd xmm7,xmm13,64
+ por xmm4,xmm14
+ por xmm5,xmm14
+ xorps xmm2,xmm0
+ por xmm6,xmm14
+ por xmm7,xmm14
+
+
+
+
+ pxor xmm3,xmm0
+ aesenc xmm2,xmm1
+ lea rcx,QWORD PTR[32+r11]
+ pxor xmm4,xmm0
+ aesenc xmm3,xmm1
+ movdqa xmm13,XMMWORD PTR[$L$increment32]
+ pxor xmm5,xmm0
+ aesenc xmm4,xmm1
+ movdqa xmm12,XMMWORD PTR[rsp]
+ pxor xmm6,xmm0
+ aesenc xmm5,xmm1
+ pxor xmm7,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ dec eax
+ aesenc xmm6,xmm1
+ aesenc xmm7,xmm1
+ jmp $L$ctr32_enc_loop6_enter
+ALIGN 16
+$L$ctr32_enc_loop6::
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ dec eax
+ aesenc xmm4,xmm1
+ aesenc xmm5,xmm1
+ aesenc xmm6,xmm1
+ aesenc xmm7,xmm1
+$L$ctr32_enc_loop6_enter::
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesenc xmm2,xmm0
+ aesenc xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesenc xmm4,xmm0
+ aesenc xmm5,xmm0
+ aesenc xmm6,xmm0
+ aesenc xmm7,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$ctr32_enc_loop6
+
+ aesenc xmm2,xmm1
+ paddd xmm12,xmm13
+ aesenc xmm3,xmm1
+ paddd xmm13,XMMWORD PTR[16+rsp]
+ aesenc xmm4,xmm1
+ movdqa XMMWORD PTR[rsp],xmm12
+ aesenc xmm5,xmm1
+ movdqa XMMWORD PTR[16+rsp],xmm13
+ aesenc xmm6,xmm1
+DB 102,69,15,56,0,231
+ aesenc xmm7,xmm1
+DB 102,69,15,56,0,239
+
+ aesenclast xmm2,xmm0
+ movups xmm8,XMMWORD PTR[rdi]
+ aesenclast xmm3,xmm0
+ movups xmm9,XMMWORD PTR[16+rdi]
+ aesenclast xmm4,xmm0
+ movups xmm10,XMMWORD PTR[32+rdi]
+ aesenclast xmm5,xmm0
+ movups xmm11,XMMWORD PTR[48+rdi]
+ aesenclast xmm6,xmm0
+ movups xmm1,XMMWORD PTR[64+rdi]
+ aesenclast xmm7,xmm0
+ movups xmm0,XMMWORD PTR[80+rdi]
+ lea rdi,QWORD PTR[96+rdi]
+
+ xorps xmm8,xmm2
+ pshufd xmm2,xmm12,192
+ xorps xmm9,xmm3
+ pshufd xmm3,xmm12,128
+ movups XMMWORD PTR[rsi],xmm8
+ xorps xmm10,xmm4
+ pshufd xmm4,xmm12,64
+ movups XMMWORD PTR[16+rsi],xmm9
+ xorps xmm11,xmm5
+ movups XMMWORD PTR[32+rsi],xmm10
+ xorps xmm1,xmm6
+ movups XMMWORD PTR[48+rsi],xmm11
+ xorps xmm0,xmm7
+ movups XMMWORD PTR[64+rsi],xmm1
+ movups XMMWORD PTR[80+rsi],xmm0
+ lea rsi,QWORD PTR[96+rsi]
+ mov eax,r10d
+ sub rdx,6
+ jnc $L$ctr32_loop6
+
+ add rdx,6
+ jz $L$ctr32_done
+ mov rcx,r11
+ lea eax,DWORD PTR[1+rax*1+rax]
+
+$L$ctr32_tail::
+ por xmm2,xmm14
+ movups xmm8,XMMWORD PTR[rdi]
+ cmp rdx,2
+ jb $L$ctr32_one
+
+ por xmm3,xmm14
+ movups xmm9,XMMWORD PTR[16+rdi]
+ je $L$ctr32_two
+
+ pshufd xmm5,xmm13,192
+ por xmm4,xmm14
+ movups xmm10,XMMWORD PTR[32+rdi]
+ cmp rdx,4
+ jb $L$ctr32_three
+
+ pshufd xmm6,xmm13,128
+ por xmm5,xmm14
+ movups xmm11,XMMWORD PTR[48+rdi]
+ je $L$ctr32_four
+
+ por xmm6,xmm14
+ xorps xmm7,xmm7
+
+ call _aesni_encrypt6
+
+ movups xmm1,XMMWORD PTR[64+rdi]
+ xorps xmm8,xmm2
+ xorps xmm9,xmm3
+ movups XMMWORD PTR[rsi],xmm8
+ xorps xmm10,xmm4
+ movups XMMWORD PTR[16+rsi],xmm9
+ xorps xmm11,xmm5
+ movups XMMWORD PTR[32+rsi],xmm10
+ xorps xmm1,xmm6
+ movups XMMWORD PTR[48+rsi],xmm11
+ movups XMMWORD PTR[64+rsi],xmm1
+ jmp $L$ctr32_done
+
+ALIGN 16
+$L$ctr32_one_shortcut::
+ movups xmm2,XMMWORD PTR[r8]
+ movups xmm8,XMMWORD PTR[rdi]
+ mov eax,DWORD PTR[240+rcx]
+$L$ctr32_one::
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_enc1_7::
+ aesenc xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_enc1_7
+ aesenclast xmm2,xmm1
+ xorps xmm8,xmm2
+ movups XMMWORD PTR[rsi],xmm8
+ jmp $L$ctr32_done
+
+ALIGN 16
+$L$ctr32_two::
+ xorps xmm4,xmm4
+ call _aesni_encrypt3
+ xorps xmm8,xmm2
+ xorps xmm9,xmm3
+ movups XMMWORD PTR[rsi],xmm8
+ movups XMMWORD PTR[16+rsi],xmm9
+ jmp $L$ctr32_done
+
+ALIGN 16
+$L$ctr32_three::
+ call _aesni_encrypt3
+ xorps xmm8,xmm2
+ xorps xmm9,xmm3
+ movups XMMWORD PTR[rsi],xmm8
+ xorps xmm10,xmm4
+ movups XMMWORD PTR[16+rsi],xmm9
+ movups XMMWORD PTR[32+rsi],xmm10
+ jmp $L$ctr32_done
+
+ALIGN 16
+$L$ctr32_four::
+ call _aesni_encrypt4
+ xorps xmm8,xmm2
+ xorps xmm9,xmm3
+ movups XMMWORD PTR[rsi],xmm8
+ xorps xmm10,xmm4
+ movups XMMWORD PTR[16+rsi],xmm9
+ xorps xmm11,xmm5
+ movups XMMWORD PTR[32+rsi],xmm10
+ movups XMMWORD PTR[48+rsi],xmm11
+
+$L$ctr32_done::
+ movaps xmm6,XMMWORD PTR[32+rsp]
+ movaps xmm7,XMMWORD PTR[48+rsp]
+ movaps xmm8,XMMWORD PTR[64+rsp]
+ movaps xmm9,XMMWORD PTR[80+rsp]
+ movaps xmm10,XMMWORD PTR[96+rsp]
+ movaps xmm11,XMMWORD PTR[112+rsp]
+ movaps xmm12,XMMWORD PTR[128+rsp]
+ movaps xmm13,XMMWORD PTR[144+rsp]
+ movaps xmm14,XMMWORD PTR[160+rsp]
+ movaps xmm15,XMMWORD PTR[176+rsp]
+ lea rsp,QWORD PTR[200+rsp]
+$L$ctr32_ret::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_ctr32_encrypt_blocks::
+aesni_ctr32_encrypt_blocks ENDP
+PUBLIC aesni_xts_encrypt
+
+ALIGN 16
+aesni_xts_encrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_xts_encrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ lea rsp,QWORD PTR[((-264))+rsp]
+ movaps XMMWORD PTR[96+rsp],xmm6
+ movaps XMMWORD PTR[112+rsp],xmm7
+ movaps XMMWORD PTR[128+rsp],xmm8
+ movaps XMMWORD PTR[144+rsp],xmm9
+ movaps XMMWORD PTR[160+rsp],xmm10
+ movaps XMMWORD PTR[176+rsp],xmm11
+ movaps XMMWORD PTR[192+rsp],xmm12
+ movaps XMMWORD PTR[208+rsp],xmm13
+ movaps XMMWORD PTR[224+rsp],xmm14
+ movaps XMMWORD PTR[240+rsp],xmm15
+$L$xts_enc_body::
+ movups xmm15,XMMWORD PTR[r9]
+ mov eax,DWORD PTR[240+r8]
+ mov r10d,DWORD PTR[240+rcx]
+ movups xmm0,XMMWORD PTR[r8]
+ movups xmm1,XMMWORD PTR[16+r8]
+ lea r8,QWORD PTR[32+r8]
+ xorps xmm15,xmm0
+$L$oop_enc1_8::
+ aesenc xmm15,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[r8]
+ lea r8,QWORD PTR[16+r8]
+ jnz $L$oop_enc1_8
+ aesenclast xmm15,xmm1
+ mov r11,rcx
+ mov eax,r10d
+ mov r9,rdx
+ and rdx,-16
+
+ movdqa xmm8,XMMWORD PTR[$L$xts_magic]
+ pxor xmm14,xmm14
+ pcmpgtd xmm14,xmm15
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm10,xmm15
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm11,xmm15
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm12,xmm15
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm13,xmm15
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+ sub rdx,16*6
+ jc $L$xts_enc_short
+
+ shr eax,1
+ sub eax,1
+ mov r10d,eax
+ jmp $L$xts_enc_grandloop
+
+ALIGN 16
+$L$xts_enc_grandloop::
+ pshufd xmm9,xmm14,013h
+ movdqa xmm14,xmm15
+ paddq xmm15,xmm15
+ movdqu xmm2,XMMWORD PTR[rdi]
+ pand xmm9,xmm8
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ pxor xmm15,xmm9
+
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ pxor xmm2,xmm10
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ pxor xmm3,xmm11
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ pxor xmm4,xmm12
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ lea rdi,QWORD PTR[96+rdi]
+ pxor xmm5,xmm13
+ movups xmm0,XMMWORD PTR[r11]
+ pxor xmm6,xmm14
+ pxor xmm7,xmm15
+
+
+
+ movups xmm1,XMMWORD PTR[16+r11]
+ pxor xmm2,xmm0
+ pxor xmm3,xmm0
+ movdqa XMMWORD PTR[rsp],xmm10
+ aesenc xmm2,xmm1
+ lea rcx,QWORD PTR[32+r11]
+ pxor xmm4,xmm0
+ movdqa XMMWORD PTR[16+rsp],xmm11
+ aesenc xmm3,xmm1
+ pxor xmm5,xmm0
+ movdqa XMMWORD PTR[32+rsp],xmm12
+ aesenc xmm4,xmm1
+ pxor xmm6,xmm0
+ movdqa XMMWORD PTR[48+rsp],xmm13
+ aesenc xmm5,xmm1
+ pxor xmm7,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ dec eax
+ movdqa XMMWORD PTR[64+rsp],xmm14
+ aesenc xmm6,xmm1
+ movdqa XMMWORD PTR[80+rsp],xmm15
+ aesenc xmm7,xmm1
+ pxor xmm14,xmm14
+ pcmpgtd xmm14,xmm15
+ jmp $L$xts_enc_loop6_enter
+
+ALIGN 16
+$L$xts_enc_loop6::
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ dec eax
+ aesenc xmm4,xmm1
+ aesenc xmm5,xmm1
+ aesenc xmm6,xmm1
+ aesenc xmm7,xmm1
+$L$xts_enc_loop6_enter::
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesenc xmm2,xmm0
+ aesenc xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesenc xmm4,xmm0
+ aesenc xmm5,xmm0
+ aesenc xmm6,xmm0
+ aesenc xmm7,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$xts_enc_loop6
+
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ paddq xmm15,xmm15
+ aesenc xmm2,xmm1
+ pand xmm9,xmm8
+ aesenc xmm3,xmm1
+ pcmpgtd xmm14,xmm15
+ aesenc xmm4,xmm1
+ pxor xmm15,xmm9
+ aesenc xmm5,xmm1
+ aesenc xmm6,xmm1
+ aesenc xmm7,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm10,xmm15
+ paddq xmm15,xmm15
+ aesenc xmm2,xmm0
+ pand xmm9,xmm8
+ aesenc xmm3,xmm0
+ pcmpgtd xmm14,xmm15
+ aesenc xmm4,xmm0
+ pxor xmm15,xmm9
+ aesenc xmm5,xmm0
+ aesenc xmm6,xmm0
+ aesenc xmm7,xmm0
+ movups xmm0,XMMWORD PTR[32+rcx]
+
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm11,xmm15
+ paddq xmm15,xmm15
+ aesenc xmm2,xmm1
+ pand xmm9,xmm8
+ aesenc xmm3,xmm1
+ pcmpgtd xmm14,xmm15
+ aesenc xmm4,xmm1
+ pxor xmm15,xmm9
+ aesenc xmm5,xmm1
+ aesenc xmm6,xmm1
+ aesenc xmm7,xmm1
+
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm12,xmm15
+ paddq xmm15,xmm15
+ aesenclast xmm2,xmm0
+ pand xmm9,xmm8
+ aesenclast xmm3,xmm0
+ pcmpgtd xmm14,xmm15
+ aesenclast xmm4,xmm0
+ pxor xmm15,xmm9
+ aesenclast xmm5,xmm0
+ aesenclast xmm6,xmm0
+ aesenclast xmm7,xmm0
+
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm13,xmm15
+ paddq xmm15,xmm15
+ xorps xmm2,XMMWORD PTR[rsp]
+ pand xmm9,xmm8
+ xorps xmm3,XMMWORD PTR[16+rsp]
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+
+ xorps xmm4,XMMWORD PTR[32+rsp]
+ movups XMMWORD PTR[rsi],xmm2
+ xorps xmm5,XMMWORD PTR[48+rsp]
+ movups XMMWORD PTR[16+rsi],xmm3
+ xorps xmm6,XMMWORD PTR[64+rsp]
+ movups XMMWORD PTR[32+rsi],xmm4
+ xorps xmm7,XMMWORD PTR[80+rsp]
+ movups XMMWORD PTR[48+rsi],xmm5
+ mov eax,r10d
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ lea rsi,QWORD PTR[96+rsi]
+ sub rdx,16*6
+ jnc $L$xts_enc_grandloop
+
+ lea eax,DWORD PTR[3+rax*1+rax]
+ mov rcx,r11
+ mov r10d,eax
+
+$L$xts_enc_short::
+ add rdx,16*6
+ jz $L$xts_enc_done
+
+ cmp rdx,020h
+ jb $L$xts_enc_one
+ je $L$xts_enc_two
+
+ cmp rdx,040h
+ jb $L$xts_enc_three
+ je $L$xts_enc_four
+
+ pshufd xmm9,xmm14,013h
+ movdqa xmm14,xmm15
+ paddq xmm15,xmm15
+ movdqu xmm2,XMMWORD PTR[rdi]
+ pand xmm9,xmm8
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ pxor xmm15,xmm9
+
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ pxor xmm2,xmm10
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ pxor xmm3,xmm11
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ lea rdi,QWORD PTR[80+rdi]
+ pxor xmm4,xmm12
+ pxor xmm5,xmm13
+ pxor xmm6,xmm14
+
+ call _aesni_encrypt6
+
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm15
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ movdqu XMMWORD PTR[rsi],xmm2
+ xorps xmm5,xmm13
+ movdqu XMMWORD PTR[16+rsi],xmm3
+ xorps xmm6,xmm14
+ movdqu XMMWORD PTR[32+rsi],xmm4
+ movdqu XMMWORD PTR[48+rsi],xmm5
+ movdqu XMMWORD PTR[64+rsi],xmm6
+ lea rsi,QWORD PTR[80+rsi]
+ jmp $L$xts_enc_done
+
+ALIGN 16
+$L$xts_enc_one::
+ movups xmm2,XMMWORD PTR[rdi]
+ lea rdi,QWORD PTR[16+rdi]
+ xorps xmm2,xmm10
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_enc1_9::
+ aesenc xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_enc1_9
+ aesenclast xmm2,xmm1
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm11
+ movups XMMWORD PTR[rsi],xmm2
+ lea rsi,QWORD PTR[16+rsi]
+ jmp $L$xts_enc_done
+
+ALIGN 16
+$L$xts_enc_two::
+ movups xmm2,XMMWORD PTR[rdi]
+ movups xmm3,XMMWORD PTR[16+rdi]
+ lea rdi,QWORD PTR[32+rdi]
+ xorps xmm2,xmm10
+ xorps xmm3,xmm11
+
+ call _aesni_encrypt3
+
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm12
+ xorps xmm3,xmm11
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ lea rsi,QWORD PTR[32+rsi]
+ jmp $L$xts_enc_done
+
+ALIGN 16
+$L$xts_enc_three::
+ movups xmm2,XMMWORD PTR[rdi]
+ movups xmm3,XMMWORD PTR[16+rdi]
+ movups xmm4,XMMWORD PTR[32+rdi]
+ lea rdi,QWORD PTR[48+rdi]
+ xorps xmm2,xmm10
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+
+ call _aesni_encrypt3
+
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm13
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ lea rsi,QWORD PTR[48+rsi]
+ jmp $L$xts_enc_done
+
+ALIGN 16
+$L$xts_enc_four::
+ movups xmm2,XMMWORD PTR[rdi]
+ movups xmm3,XMMWORD PTR[16+rdi]
+ movups xmm4,XMMWORD PTR[32+rdi]
+ xorps xmm2,xmm10
+ movups xmm5,XMMWORD PTR[48+rdi]
+ lea rdi,QWORD PTR[64+rdi]
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ xorps xmm5,xmm13
+
+ call _aesni_encrypt4
+
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm15
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ movups XMMWORD PTR[rsi],xmm2
+ xorps xmm5,xmm13
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ lea rsi,QWORD PTR[64+rsi]
+ jmp $L$xts_enc_done
+
+ALIGN 16
+$L$xts_enc_done::
+ and r9,15
+ jz $L$xts_enc_ret
+ mov rdx,r9
+
+$L$xts_enc_steal::
+ movzx eax,BYTE PTR[rdi]
+ movzx ecx,BYTE PTR[((-16))+rsi]
+ lea rdi,QWORD PTR[1+rdi]
+ mov BYTE PTR[((-16))+rsi],al
+ mov BYTE PTR[rsi],cl
+ lea rsi,QWORD PTR[1+rsi]
+ sub rdx,1
+ jnz $L$xts_enc_steal
+
+ sub rsi,r9
+ mov rcx,r11
+ mov eax,r10d
+
+ movups xmm2,XMMWORD PTR[((-16))+rsi]
+ xorps xmm2,xmm10
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_enc1_10::
+ aesenc xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_enc1_10
+ aesenclast xmm2,xmm1
+ xorps xmm2,xmm10
+ movups XMMWORD PTR[(-16)+rsi],xmm2
+
+$L$xts_enc_ret::
+ movaps xmm6,XMMWORD PTR[96+rsp]
+ movaps xmm7,XMMWORD PTR[112+rsp]
+ movaps xmm8,XMMWORD PTR[128+rsp]
+ movaps xmm9,XMMWORD PTR[144+rsp]
+ movaps xmm10,XMMWORD PTR[160+rsp]
+ movaps xmm11,XMMWORD PTR[176+rsp]
+ movaps xmm12,XMMWORD PTR[192+rsp]
+ movaps xmm13,XMMWORD PTR[208+rsp]
+ movaps xmm14,XMMWORD PTR[224+rsp]
+ movaps xmm15,XMMWORD PTR[240+rsp]
+ lea rsp,QWORD PTR[264+rsp]
+$L$xts_enc_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_xts_encrypt::
+aesni_xts_encrypt ENDP
+PUBLIC aesni_xts_decrypt
+
+ALIGN 16
+aesni_xts_decrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_xts_decrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ lea rsp,QWORD PTR[((-264))+rsp]
+ movaps XMMWORD PTR[96+rsp],xmm6
+ movaps XMMWORD PTR[112+rsp],xmm7
+ movaps XMMWORD PTR[128+rsp],xmm8
+ movaps XMMWORD PTR[144+rsp],xmm9
+ movaps XMMWORD PTR[160+rsp],xmm10
+ movaps XMMWORD PTR[176+rsp],xmm11
+ movaps XMMWORD PTR[192+rsp],xmm12
+ movaps XMMWORD PTR[208+rsp],xmm13
+ movaps XMMWORD PTR[224+rsp],xmm14
+ movaps XMMWORD PTR[240+rsp],xmm15
+$L$xts_dec_body::
+ movups xmm15,XMMWORD PTR[r9]
+ mov eax,DWORD PTR[240+r8]
+ mov r10d,DWORD PTR[240+rcx]
+ movups xmm0,XMMWORD PTR[r8]
+ movups xmm1,XMMWORD PTR[16+r8]
+ lea r8,QWORD PTR[32+r8]
+ xorps xmm15,xmm0
+$L$oop_enc1_11::
+ aesenc xmm15,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[r8]
+ lea r8,QWORD PTR[16+r8]
+ jnz $L$oop_enc1_11
+ aesenclast xmm15,xmm1
+ xor eax,eax
+ test rdx,15
+ setnz al
+ shl rax,4
+ sub rdx,rax
+
+ mov r11,rcx
+ mov eax,r10d
+ mov r9,rdx
+ and rdx,-16
+
+ movdqa xmm8,XMMWORD PTR[$L$xts_magic]
+ pxor xmm14,xmm14
+ pcmpgtd xmm14,xmm15
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm10,xmm15
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm11,xmm15
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm12,xmm15
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm13,xmm15
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+ sub rdx,16*6
+ jc $L$xts_dec_short
+
+ shr eax,1
+ sub eax,1
+ mov r10d,eax
+ jmp $L$xts_dec_grandloop
+
+ALIGN 16
+$L$xts_dec_grandloop::
+ pshufd xmm9,xmm14,013h
+ movdqa xmm14,xmm15
+ paddq xmm15,xmm15
+ movdqu xmm2,XMMWORD PTR[rdi]
+ pand xmm9,xmm8
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ pxor xmm15,xmm9
+
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ pxor xmm2,xmm10
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ pxor xmm3,xmm11
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ pxor xmm4,xmm12
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ lea rdi,QWORD PTR[96+rdi]
+ pxor xmm5,xmm13
+ movups xmm0,XMMWORD PTR[r11]
+ pxor xmm6,xmm14
+ pxor xmm7,xmm15
+
+
+
+ movups xmm1,XMMWORD PTR[16+r11]
+ pxor xmm2,xmm0
+ pxor xmm3,xmm0
+ movdqa XMMWORD PTR[rsp],xmm10
+ aesdec xmm2,xmm1
+ lea rcx,QWORD PTR[32+r11]
+ pxor xmm4,xmm0
+ movdqa XMMWORD PTR[16+rsp],xmm11
+ aesdec xmm3,xmm1
+ pxor xmm5,xmm0
+ movdqa XMMWORD PTR[32+rsp],xmm12
+ aesdec xmm4,xmm1
+ pxor xmm6,xmm0
+ movdqa XMMWORD PTR[48+rsp],xmm13
+ aesdec xmm5,xmm1
+ pxor xmm7,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ dec eax
+ movdqa XMMWORD PTR[64+rsp],xmm14
+ aesdec xmm6,xmm1
+ movdqa XMMWORD PTR[80+rsp],xmm15
+ aesdec xmm7,xmm1
+ pxor xmm14,xmm14
+ pcmpgtd xmm14,xmm15
+ jmp $L$xts_dec_loop6_enter
+
+ALIGN 16
+$L$xts_dec_loop6::
+ aesdec xmm2,xmm1
+ aesdec xmm3,xmm1
+ dec eax
+ aesdec xmm4,xmm1
+ aesdec xmm5,xmm1
+ aesdec xmm6,xmm1
+ aesdec xmm7,xmm1
+$L$xts_dec_loop6_enter::
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesdec xmm2,xmm0
+ aesdec xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesdec xmm4,xmm0
+ aesdec xmm5,xmm0
+ aesdec xmm6,xmm0
+ aesdec xmm7,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$xts_dec_loop6
+
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ paddq xmm15,xmm15
+ aesdec xmm2,xmm1
+ pand xmm9,xmm8
+ aesdec xmm3,xmm1
+ pcmpgtd xmm14,xmm15
+ aesdec xmm4,xmm1
+ pxor xmm15,xmm9
+ aesdec xmm5,xmm1
+ aesdec xmm6,xmm1
+ aesdec xmm7,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm10,xmm15
+ paddq xmm15,xmm15
+ aesdec xmm2,xmm0
+ pand xmm9,xmm8
+ aesdec xmm3,xmm0
+ pcmpgtd xmm14,xmm15
+ aesdec xmm4,xmm0
+ pxor xmm15,xmm9
+ aesdec xmm5,xmm0
+ aesdec xmm6,xmm0
+ aesdec xmm7,xmm0
+ movups xmm0,XMMWORD PTR[32+rcx]
+
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm11,xmm15
+ paddq xmm15,xmm15
+ aesdec xmm2,xmm1
+ pand xmm9,xmm8
+ aesdec xmm3,xmm1
+ pcmpgtd xmm14,xmm15
+ aesdec xmm4,xmm1
+ pxor xmm15,xmm9
+ aesdec xmm5,xmm1
+ aesdec xmm6,xmm1
+ aesdec xmm7,xmm1
+
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm12,xmm15
+ paddq xmm15,xmm15
+ aesdeclast xmm2,xmm0
+ pand xmm9,xmm8
+ aesdeclast xmm3,xmm0
+ pcmpgtd xmm14,xmm15
+ aesdeclast xmm4,xmm0
+ pxor xmm15,xmm9
+ aesdeclast xmm5,xmm0
+ aesdeclast xmm6,xmm0
+ aesdeclast xmm7,xmm0
+
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm13,xmm15
+ paddq xmm15,xmm15
+ xorps xmm2,XMMWORD PTR[rsp]
+ pand xmm9,xmm8
+ xorps xmm3,XMMWORD PTR[16+rsp]
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+
+ xorps xmm4,XMMWORD PTR[32+rsp]
+ movups XMMWORD PTR[rsi],xmm2
+ xorps xmm5,XMMWORD PTR[48+rsp]
+ movups XMMWORD PTR[16+rsi],xmm3
+ xorps xmm6,XMMWORD PTR[64+rsp]
+ movups XMMWORD PTR[32+rsi],xmm4
+ xorps xmm7,XMMWORD PTR[80+rsp]
+ movups XMMWORD PTR[48+rsi],xmm5
+ mov eax,r10d
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ lea rsi,QWORD PTR[96+rsi]
+ sub rdx,16*6
+ jnc $L$xts_dec_grandloop
+
+ lea eax,DWORD PTR[3+rax*1+rax]
+ mov rcx,r11
+ mov r10d,eax
+
+$L$xts_dec_short::
+ add rdx,16*6
+ jz $L$xts_dec_done
+
+ cmp rdx,020h
+ jb $L$xts_dec_one
+ je $L$xts_dec_two
+
+ cmp rdx,040h
+ jb $L$xts_dec_three
+ je $L$xts_dec_four
+
+ pshufd xmm9,xmm14,013h
+ movdqa xmm14,xmm15
+ paddq xmm15,xmm15
+ movdqu xmm2,XMMWORD PTR[rdi]
+ pand xmm9,xmm8
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ pxor xmm15,xmm9
+
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ pxor xmm2,xmm10
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ pxor xmm3,xmm11
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ lea rdi,QWORD PTR[80+rdi]
+ pxor xmm4,xmm12
+ pxor xmm5,xmm13
+ pxor xmm6,xmm14
+
+ call _aesni_decrypt6
+
+ xorps xmm2,xmm10
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ movdqu XMMWORD PTR[rsi],xmm2
+ xorps xmm5,xmm13
+ movdqu XMMWORD PTR[16+rsi],xmm3
+ xorps xmm6,xmm14
+ movdqu XMMWORD PTR[32+rsi],xmm4
+ pxor xmm14,xmm14
+ movdqu XMMWORD PTR[48+rsi],xmm5
+ pcmpgtd xmm14,xmm15
+ movdqu XMMWORD PTR[64+rsi],xmm6
+ lea rsi,QWORD PTR[80+rsi]
+ pshufd xmm11,xmm14,013h
+ and r9,15
+ jz $L$xts_dec_ret
+
+ movdqa xmm10,xmm15
+ paddq xmm15,xmm15
+ pand xmm11,xmm8
+ pxor xmm11,xmm15
+ jmp $L$xts_dec_done2
+
+ALIGN 16
+$L$xts_dec_one::
+ movups xmm2,XMMWORD PTR[rdi]
+ lea rdi,QWORD PTR[16+rdi]
+ xorps xmm2,xmm10
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_dec1_12::
+ aesdec xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_dec1_12
+ aesdeclast xmm2,xmm1
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm11
+ movups XMMWORD PTR[rsi],xmm2
+ movdqa xmm11,xmm12
+ lea rsi,QWORD PTR[16+rsi]
+ jmp $L$xts_dec_done
+
+ALIGN 16
+$L$xts_dec_two::
+ movups xmm2,XMMWORD PTR[rdi]
+ movups xmm3,XMMWORD PTR[16+rdi]
+ lea rdi,QWORD PTR[32+rdi]
+ xorps xmm2,xmm10
+ xorps xmm3,xmm11
+
+ call _aesni_decrypt3
+
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm12
+ xorps xmm3,xmm11
+ movdqa xmm11,xmm13
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ lea rsi,QWORD PTR[32+rsi]
+ jmp $L$xts_dec_done
+
+ALIGN 16
+$L$xts_dec_three::
+ movups xmm2,XMMWORD PTR[rdi]
+ movups xmm3,XMMWORD PTR[16+rdi]
+ movups xmm4,XMMWORD PTR[32+rdi]
+ lea rdi,QWORD PTR[48+rdi]
+ xorps xmm2,xmm10
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+
+ call _aesni_decrypt3
+
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm13
+ xorps xmm3,xmm11
+ movdqa xmm11,xmm15
+ xorps xmm4,xmm12
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ lea rsi,QWORD PTR[48+rsi]
+ jmp $L$xts_dec_done
+
+ALIGN 16
+$L$xts_dec_four::
+ pshufd xmm9,xmm14,013h
+ movdqa xmm14,xmm15
+ paddq xmm15,xmm15
+ movups xmm2,XMMWORD PTR[rdi]
+ pand xmm9,xmm8
+ movups xmm3,XMMWORD PTR[16+rdi]
+ pxor xmm15,xmm9
+
+ movups xmm4,XMMWORD PTR[32+rdi]
+ xorps xmm2,xmm10
+ movups xmm5,XMMWORD PTR[48+rdi]
+ lea rdi,QWORD PTR[64+rdi]
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ xorps xmm5,xmm13
+
+ call _aesni_decrypt4
+
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm14
+ xorps xmm3,xmm11
+ movdqa xmm11,xmm15
+ xorps xmm4,xmm12
+ movups XMMWORD PTR[rsi],xmm2
+ xorps xmm5,xmm13
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ lea rsi,QWORD PTR[64+rsi]
+ jmp $L$xts_dec_done
+
+ALIGN 16
+$L$xts_dec_done::
+ and r9,15
+ jz $L$xts_dec_ret
+$L$xts_dec_done2::
+ mov rdx,r9
+ mov rcx,r11
+ mov eax,r10d
+
+ movups xmm2,XMMWORD PTR[rdi]
+ xorps xmm2,xmm11
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_dec1_13::
+ aesdec xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_dec1_13
+ aesdeclast xmm2,xmm1
+ xorps xmm2,xmm11
+ movups XMMWORD PTR[rsi],xmm2
+
+$L$xts_dec_steal::
+ movzx eax,BYTE PTR[16+rdi]
+ movzx ecx,BYTE PTR[rsi]
+ lea rdi,QWORD PTR[1+rdi]
+ mov BYTE PTR[rsi],al
+ mov BYTE PTR[16+rsi],cl
+ lea rsi,QWORD PTR[1+rsi]
+ sub rdx,1
+ jnz $L$xts_dec_steal
+
+ sub rsi,r9
+ mov rcx,r11
+ mov eax,r10d
+
+ movups xmm2,XMMWORD PTR[rsi]
+ xorps xmm2,xmm10
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_dec1_14::
+ aesdec xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_dec1_14
+ aesdeclast xmm2,xmm1
+ xorps xmm2,xmm10
+ movups XMMWORD PTR[rsi],xmm2
+
+$L$xts_dec_ret::
+ movaps xmm6,XMMWORD PTR[96+rsp]
+ movaps xmm7,XMMWORD PTR[112+rsp]
+ movaps xmm8,XMMWORD PTR[128+rsp]
+ movaps xmm9,XMMWORD PTR[144+rsp]
+ movaps xmm10,XMMWORD PTR[160+rsp]
+ movaps xmm11,XMMWORD PTR[176+rsp]
+ movaps xmm12,XMMWORD PTR[192+rsp]
+ movaps xmm13,XMMWORD PTR[208+rsp]
+ movaps xmm14,XMMWORD PTR[224+rsp]
+ movaps xmm15,XMMWORD PTR[240+rsp]
+ lea rsp,QWORD PTR[264+rsp]
+$L$xts_dec_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_xts_decrypt::
+aesni_xts_decrypt ENDP
+PUBLIC aesni_cbc_encrypt
+
+ALIGN 16
+aesni_cbc_encrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_cbc_encrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ test rdx,rdx
+ jz $L$cbc_ret
+
+ mov r10d,DWORD PTR[240+rcx]
+ mov r11,rcx
+ test r9d,r9d
+ jz $L$cbc_decrypt
+
+ movups xmm2,XMMWORD PTR[r8]
+ mov eax,r10d
+ cmp rdx,16
+ jb $L$cbc_enc_tail
+ sub rdx,16
+ jmp $L$cbc_enc_loop
+ALIGN 16
+$L$cbc_enc_loop::
+ movups xmm3,XMMWORD PTR[rdi]
+ lea rdi,QWORD PTR[16+rdi]
+
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ xorps xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm3
+$L$oop_enc1_15::
+ aesenc xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_enc1_15
+ aesenclast xmm2,xmm1
+ mov eax,r10d
+ mov rcx,r11
+ movups XMMWORD PTR[rsi],xmm2
+ lea rsi,QWORD PTR[16+rsi]
+ sub rdx,16
+ jnc $L$cbc_enc_loop
+ add rdx,16
+ jnz $L$cbc_enc_tail
+ movups XMMWORD PTR[r8],xmm2
+ jmp $L$cbc_ret
+
+$L$cbc_enc_tail::
+ mov rcx,rdx
+ xchg rsi,rdi
+ DD 09066A4F3h
+ mov ecx,16
+ sub rcx,rdx
+ xor eax,eax
+ DD 09066AAF3h
+ lea rdi,QWORD PTR[((-16))+rdi]
+ mov eax,r10d
+ mov rsi,rdi
+ mov rcx,r11
+ xor rdx,rdx
+ jmp $L$cbc_enc_loop
+
+ALIGN 16
+$L$cbc_decrypt::
+ lea rsp,QWORD PTR[((-88))+rsp]
+ movaps XMMWORD PTR[rsp],xmm6
+ movaps XMMWORD PTR[16+rsp],xmm7
+ movaps XMMWORD PTR[32+rsp],xmm8
+ movaps XMMWORD PTR[48+rsp],xmm9
+$L$cbc_decrypt_body::
+ movups xmm9,XMMWORD PTR[r8]
+ mov eax,r10d
+ cmp rdx,070h
+ jbe $L$cbc_dec_tail
+ shr r10d,1
+ sub rdx,070h
+ mov eax,r10d
+ movaps XMMWORD PTR[64+rsp],xmm9
+ jmp $L$cbc_dec_loop8_enter
+ALIGN 16
+$L$cbc_dec_loop8::
+ movaps XMMWORD PTR[64+rsp],xmm0
+ movups XMMWORD PTR[rsi],xmm9
+ lea rsi,QWORD PTR[16+rsi]
+$L$cbc_dec_loop8_enter::
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm2,XMMWORD PTR[rdi]
+ movups xmm3,XMMWORD PTR[16+rdi]
+ movups xmm1,XMMWORD PTR[16+rcx]
+
+ lea rcx,QWORD PTR[32+rcx]
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ xorps xmm2,xmm0
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ xorps xmm3,xmm0
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ aesdec xmm2,xmm1
+ pxor xmm4,xmm0
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ aesdec xmm3,xmm1
+ pxor xmm5,xmm0
+ movdqu xmm8,XMMWORD PTR[96+rdi]
+ aesdec xmm4,xmm1
+ pxor xmm6,xmm0
+ movdqu xmm9,XMMWORD PTR[112+rdi]
+ aesdec xmm5,xmm1
+ pxor xmm7,xmm0
+ dec eax
+ aesdec xmm6,xmm1
+ pxor xmm8,xmm0
+ aesdec xmm7,xmm1
+ pxor xmm9,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ aesdec xmm8,xmm1
+ aesdec xmm9,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+
+ call $L$dec_loop8_enter
+
+ movups xmm1,XMMWORD PTR[rdi]
+ movups xmm0,XMMWORD PTR[16+rdi]
+ xorps xmm2,XMMWORD PTR[64+rsp]
+ xorps xmm3,xmm1
+ movups xmm1,XMMWORD PTR[32+rdi]
+ xorps xmm4,xmm0
+ movups xmm0,XMMWORD PTR[48+rdi]
+ xorps xmm5,xmm1
+ movups xmm1,XMMWORD PTR[64+rdi]
+ xorps xmm6,xmm0
+ movups xmm0,XMMWORD PTR[80+rdi]
+ xorps xmm7,xmm1
+ movups xmm1,XMMWORD PTR[96+rdi]
+ xorps xmm8,xmm0
+ movups xmm0,XMMWORD PTR[112+rdi]
+ xorps xmm9,xmm1
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ mov eax,r10d
+ movups XMMWORD PTR[64+rsi],xmm6
+ mov rcx,r11
+ movups XMMWORD PTR[80+rsi],xmm7
+ lea rdi,QWORD PTR[128+rdi]
+ movups XMMWORD PTR[96+rsi],xmm8
+ lea rsi,QWORD PTR[112+rsi]
+ sub rdx,080h
+ ja $L$cbc_dec_loop8
+
+ movaps xmm2,xmm9
+ movaps xmm9,xmm0
+ add rdx,070h
+ jle $L$cbc_dec_tail_collected
+ movups XMMWORD PTR[rsi],xmm2
+ lea eax,DWORD PTR[1+r10*1+r10]
+ lea rsi,QWORD PTR[16+rsi]
+$L$cbc_dec_tail::
+ movups xmm2,XMMWORD PTR[rdi]
+ movaps xmm8,xmm2
+ cmp rdx,010h
+ jbe $L$cbc_dec_one
+
+ movups xmm3,XMMWORD PTR[16+rdi]
+ movaps xmm7,xmm3
+ cmp rdx,020h
+ jbe $L$cbc_dec_two
+
+ movups xmm4,XMMWORD PTR[32+rdi]
+ movaps xmm6,xmm4
+ cmp rdx,030h
+ jbe $L$cbc_dec_three
+
+ movups xmm5,XMMWORD PTR[48+rdi]
+ cmp rdx,040h
+ jbe $L$cbc_dec_four
+
+ movups xmm6,XMMWORD PTR[64+rdi]
+ cmp rdx,050h
+ jbe $L$cbc_dec_five
+
+ movups xmm7,XMMWORD PTR[80+rdi]
+ cmp rdx,060h
+ jbe $L$cbc_dec_six
+
+ movups xmm8,XMMWORD PTR[96+rdi]
+ movaps XMMWORD PTR[64+rsp],xmm9
+ call _aesni_decrypt8
+ movups xmm1,XMMWORD PTR[rdi]
+ movups xmm0,XMMWORD PTR[16+rdi]
+ xorps xmm2,XMMWORD PTR[64+rsp]
+ xorps xmm3,xmm1
+ movups xmm1,XMMWORD PTR[32+rdi]
+ xorps xmm4,xmm0
+ movups xmm0,XMMWORD PTR[48+rdi]
+ xorps xmm5,xmm1
+ movups xmm1,XMMWORD PTR[64+rdi]
+ xorps xmm6,xmm0
+ movups xmm0,XMMWORD PTR[80+rdi]
+ xorps xmm7,xmm1
+ movups xmm9,XMMWORD PTR[96+rdi]
+ xorps xmm8,xmm0
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ lea rsi,QWORD PTR[96+rsi]
+ movaps xmm2,xmm8
+ sub rdx,070h
+ jmp $L$cbc_dec_tail_collected
+ALIGN 16
+$L$cbc_dec_one::
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_dec1_16::
+ aesdec xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_dec1_16
+ aesdeclast xmm2,xmm1
+ xorps xmm2,xmm9
+ movaps xmm9,xmm8
+ sub rdx,010h
+ jmp $L$cbc_dec_tail_collected
+ALIGN 16
+$L$cbc_dec_two::
+ xorps xmm4,xmm4
+ call _aesni_decrypt3
+ xorps xmm2,xmm9
+ xorps xmm3,xmm8
+ movups XMMWORD PTR[rsi],xmm2
+ movaps xmm9,xmm7
+ movaps xmm2,xmm3
+ lea rsi,QWORD PTR[16+rsi]
+ sub rdx,020h
+ jmp $L$cbc_dec_tail_collected
+ALIGN 16
+$L$cbc_dec_three::
+ call _aesni_decrypt3
+ xorps xmm2,xmm9
+ xorps xmm3,xmm8
+ movups XMMWORD PTR[rsi],xmm2
+ xorps xmm4,xmm7
+ movups XMMWORD PTR[16+rsi],xmm3
+ movaps xmm9,xmm6
+ movaps xmm2,xmm4
+ lea rsi,QWORD PTR[32+rsi]
+ sub rdx,030h
+ jmp $L$cbc_dec_tail_collected
+ALIGN 16
+$L$cbc_dec_four::
+ call _aesni_decrypt4
+ xorps xmm2,xmm9
+ movups xmm9,XMMWORD PTR[48+rdi]
+ xorps xmm3,xmm8
+ movups XMMWORD PTR[rsi],xmm2
+ xorps xmm4,xmm7
+ movups XMMWORD PTR[16+rsi],xmm3
+ xorps xmm5,xmm6
+ movups XMMWORD PTR[32+rsi],xmm4
+ movaps xmm2,xmm5
+ lea rsi,QWORD PTR[48+rsi]
+ sub rdx,040h
+ jmp $L$cbc_dec_tail_collected
+ALIGN 16
+$L$cbc_dec_five::
+ xorps xmm7,xmm7
+ call _aesni_decrypt6
+ movups xmm1,XMMWORD PTR[16+rdi]
+ movups xmm0,XMMWORD PTR[32+rdi]
+ xorps xmm2,xmm9
+ xorps xmm3,xmm8
+ xorps xmm4,xmm1
+ movups xmm1,XMMWORD PTR[48+rdi]
+ xorps xmm5,xmm0
+ movups xmm9,XMMWORD PTR[64+rdi]
+ xorps xmm6,xmm1
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ lea rsi,QWORD PTR[64+rsi]
+ movaps xmm2,xmm6
+ sub rdx,050h
+ jmp $L$cbc_dec_tail_collected
+ALIGN 16
+$L$cbc_dec_six::
+ call _aesni_decrypt6
+ movups xmm1,XMMWORD PTR[16+rdi]
+ movups xmm0,XMMWORD PTR[32+rdi]
+ xorps xmm2,xmm9
+ xorps xmm3,xmm8
+ xorps xmm4,xmm1
+ movups xmm1,XMMWORD PTR[48+rdi]
+ xorps xmm5,xmm0
+ movups xmm0,XMMWORD PTR[64+rdi]
+ xorps xmm6,xmm1
+ movups xmm9,XMMWORD PTR[80+rdi]
+ xorps xmm7,xmm0
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ lea rsi,QWORD PTR[80+rsi]
+ movaps xmm2,xmm7
+ sub rdx,060h
+ jmp $L$cbc_dec_tail_collected
+ALIGN 16
+$L$cbc_dec_tail_collected::
+ and rdx,15
+ movups XMMWORD PTR[r8],xmm9
+ jnz $L$cbc_dec_tail_partial
+ movups XMMWORD PTR[rsi],xmm2
+ jmp $L$cbc_dec_ret
+ALIGN 16
+$L$cbc_dec_tail_partial::
+ movaps XMMWORD PTR[64+rsp],xmm2
+ mov rcx,16
+ mov rdi,rsi
+ sub rcx,rdx
+ lea rsi,QWORD PTR[64+rsp]
+ DD 09066A4F3h
+
+$L$cbc_dec_ret::
+ movaps xmm6,XMMWORD PTR[rsp]
+ movaps xmm7,XMMWORD PTR[16+rsp]
+ movaps xmm8,XMMWORD PTR[32+rsp]
+ movaps xmm9,XMMWORD PTR[48+rsp]
+ lea rsp,QWORD PTR[88+rsp]
+$L$cbc_ret::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_cbc_encrypt::
+aesni_cbc_encrypt ENDP
+PUBLIC aesni_set_decrypt_key
+
+ALIGN 16
+aesni_set_decrypt_key PROC PUBLIC
+ sub rsp,8
+ call __aesni_set_encrypt_key
+ shl edx,4
+ test eax,eax
+ jnz $L$dec_key_ret
+ lea rcx,QWORD PTR[16+rdx*1+r8]
+
+ movups xmm0,XMMWORD PTR[r8]
+ movups xmm1,XMMWORD PTR[rcx]
+ movups XMMWORD PTR[rcx],xmm0
+ movups XMMWORD PTR[r8],xmm1
+ lea r8,QWORD PTR[16+r8]
+ lea rcx,QWORD PTR[((-16))+rcx]
+
+$L$dec_key_inverse::
+ movups xmm0,XMMWORD PTR[r8]
+ movups xmm1,XMMWORD PTR[rcx]
+ aesimc xmm0,xmm0
+ aesimc xmm1,xmm1
+ lea r8,QWORD PTR[16+r8]
+ lea rcx,QWORD PTR[((-16))+rcx]
+ movups XMMWORD PTR[16+rcx],xmm0
+ movups XMMWORD PTR[(-16)+r8],xmm1
+ cmp rcx,r8
+ ja $L$dec_key_inverse
+
+ movups xmm0,XMMWORD PTR[r8]
+ aesimc xmm0,xmm0
+ movups XMMWORD PTR[rcx],xmm0
+$L$dec_key_ret::
+ add rsp,8
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_set_decrypt_key::
+aesni_set_decrypt_key ENDP
+PUBLIC aesni_set_encrypt_key
+
+ALIGN 16
+aesni_set_encrypt_key PROC PUBLIC
+__aesni_set_encrypt_key::
+ sub rsp,8
+ mov rax,-1
+ test rcx,rcx
+ jz $L$enc_key_ret
+ test r8,r8
+ jz $L$enc_key_ret
+
+ movups xmm0,XMMWORD PTR[rcx]
+ xorps xmm4,xmm4
+ lea rax,QWORD PTR[16+r8]
+ cmp edx,256
+ je $L$14rounds
+ cmp edx,192
+ je $L$12rounds
+ cmp edx,128
+ jne $L$bad_keybits
+
+$L$10rounds::
+ mov edx,9
+ movups XMMWORD PTR[r8],xmm0
+ aeskeygenassist xmm1,xmm0,01h
+ call $L$key_expansion_128_cold
+ aeskeygenassist xmm1,xmm0,02h
+ call $L$key_expansion_128
+ aeskeygenassist xmm1,xmm0,04h
+ call $L$key_expansion_128
+ aeskeygenassist xmm1,xmm0,08h
+ call $L$key_expansion_128
+ aeskeygenassist xmm1,xmm0,010h
+ call $L$key_expansion_128
+ aeskeygenassist xmm1,xmm0,020h
+ call $L$key_expansion_128
+ aeskeygenassist xmm1,xmm0,040h
+ call $L$key_expansion_128
+ aeskeygenassist xmm1,xmm0,080h
+ call $L$key_expansion_128
+ aeskeygenassist xmm1,xmm0,01bh
+ call $L$key_expansion_128
+ aeskeygenassist xmm1,xmm0,036h
+ call $L$key_expansion_128
+ movups XMMWORD PTR[rax],xmm0
+ mov DWORD PTR[80+rax],edx
+ xor eax,eax
+ jmp $L$enc_key_ret
+
+ALIGN 16
+$L$12rounds::
+ movq xmm2,QWORD PTR[16+rcx]
+ mov edx,11
+ movups XMMWORD PTR[r8],xmm0
+ aeskeygenassist xmm1,xmm2,01h
+ call $L$key_expansion_192a_cold
+ aeskeygenassist xmm1,xmm2,02h
+ call $L$key_expansion_192b
+ aeskeygenassist xmm1,xmm2,04h
+ call $L$key_expansion_192a
+ aeskeygenassist xmm1,xmm2,08h
+ call $L$key_expansion_192b
+ aeskeygenassist xmm1,xmm2,010h
+ call $L$key_expansion_192a
+ aeskeygenassist xmm1,xmm2,020h
+ call $L$key_expansion_192b
+ aeskeygenassist xmm1,xmm2,040h
+ call $L$key_expansion_192a
+ aeskeygenassist xmm1,xmm2,080h
+ call $L$key_expansion_192b
+ movups XMMWORD PTR[rax],xmm0
+ mov DWORD PTR[48+rax],edx
+ xor rax,rax
+ jmp $L$enc_key_ret
+
+ALIGN 16
+$L$14rounds::
+ movups xmm2,XMMWORD PTR[16+rcx]
+ mov edx,13
+ lea rax,QWORD PTR[16+rax]
+ movups XMMWORD PTR[r8],xmm0
+ movups XMMWORD PTR[16+r8],xmm2
+ aeskeygenassist xmm1,xmm2,01h
+ call $L$key_expansion_256a_cold
+ aeskeygenassist xmm1,xmm0,01h
+ call $L$key_expansion_256b
+ aeskeygenassist xmm1,xmm2,02h
+ call $L$key_expansion_256a
+ aeskeygenassist xmm1,xmm0,02h
+ call $L$key_expansion_256b
+ aeskeygenassist xmm1,xmm2,04h
+ call $L$key_expansion_256a
+ aeskeygenassist xmm1,xmm0,04h
+ call $L$key_expansion_256b
+ aeskeygenassist xmm1,xmm2,08h
+ call $L$key_expansion_256a
+ aeskeygenassist xmm1,xmm0,08h
+ call $L$key_expansion_256b
+ aeskeygenassist xmm1,xmm2,010h
+ call $L$key_expansion_256a
+ aeskeygenassist xmm1,xmm0,010h
+ call $L$key_expansion_256b
+ aeskeygenassist xmm1,xmm2,020h
+ call $L$key_expansion_256a
+ aeskeygenassist xmm1,xmm0,020h
+ call $L$key_expansion_256b
+ aeskeygenassist xmm1,xmm2,040h
+ call $L$key_expansion_256a
+ movups XMMWORD PTR[rax],xmm0
+ mov DWORD PTR[16+rax],edx
+ xor rax,rax
+ jmp $L$enc_key_ret
+
+ALIGN 16
+$L$bad_keybits::
+ mov rax,-2
+$L$enc_key_ret::
+ add rsp,8
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_set_encrypt_key::
+
+ALIGN 16
+$L$key_expansion_128::
+ movups XMMWORD PTR[rax],xmm0
+ lea rax,QWORD PTR[16+rax]
+$L$key_expansion_128_cold::
+ shufps xmm4,xmm0,16
+ xorps xmm0,xmm4
+ shufps xmm4,xmm0,140
+ xorps xmm0,xmm4
+ shufps xmm1,xmm1,255
+ xorps xmm0,xmm1
+ DB 0F3h,0C3h ;repret
+
+ALIGN 16
+$L$key_expansion_192a::
+ movups XMMWORD PTR[rax],xmm0
+ lea rax,QWORD PTR[16+rax]
+$L$key_expansion_192a_cold::
+ movaps xmm5,xmm2
+$L$key_expansion_192b_warm::
+ shufps xmm4,xmm0,16
+ movdqa xmm3,xmm2
+ xorps xmm0,xmm4
+ shufps xmm4,xmm0,140
+ pslldq xmm3,4
+ xorps xmm0,xmm4
+ pshufd xmm1,xmm1,85
+ pxor xmm2,xmm3
+ pxor xmm0,xmm1
+ pshufd xmm3,xmm0,255
+ pxor xmm2,xmm3
+ DB 0F3h,0C3h ;repret
+
+ALIGN 16
+$L$key_expansion_192b::
+ movaps xmm3,xmm0
+ shufps xmm5,xmm0,68
+ movups XMMWORD PTR[rax],xmm5
+ shufps xmm3,xmm2,78
+ movups XMMWORD PTR[16+rax],xmm3
+ lea rax,QWORD PTR[32+rax]
+ jmp $L$key_expansion_192b_warm
+
+ALIGN 16
+$L$key_expansion_256a::
+ movups XMMWORD PTR[rax],xmm2
+ lea rax,QWORD PTR[16+rax]
+$L$key_expansion_256a_cold::
+ shufps xmm4,xmm0,16
+ xorps xmm0,xmm4
+ shufps xmm4,xmm0,140
+ xorps xmm0,xmm4
+ shufps xmm1,xmm1,255
+ xorps xmm0,xmm1
+ DB 0F3h,0C3h ;repret
+
+ALIGN 16
+$L$key_expansion_256b::
+ movups XMMWORD PTR[rax],xmm0
+ lea rax,QWORD PTR[16+rax]
+
+ shufps xmm4,xmm2,16
+ xorps xmm2,xmm4
+ shufps xmm4,xmm2,140
+ xorps xmm2,xmm4
+ shufps xmm1,xmm1,170
+ xorps xmm2,xmm1
+ DB 0F3h,0C3h ;repret
+aesni_set_encrypt_key ENDP
+
+ALIGN 64
+$L$bswap_mask::
+DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+$L$increment32::
+ DD 6,6,6,0
+$L$increment64::
+ DD 1,0,0,0
+$L$xts_magic::
+ DD 087h,0,1,0
+
+DB 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
+DB 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
+DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
+DB 115,108,46,111,114,103,62,0
+ALIGN 64
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+ecb_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[152+r8]
+
+ jmp $L$common_seh_tail
+ecb_se_handler ENDP
+
+
+ALIGN 16
+ccm64_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$common_seh_tail
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$common_seh_tail
+
+ lea rsi,QWORD PTR[rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,8
+ DD 0a548f3fch
+ lea rax,QWORD PTR[88+rax]
+
+ jmp $L$common_seh_tail
+ccm64_se_handler ENDP
+
+
+ALIGN 16
+ctr32_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ lea r10,QWORD PTR[$L$ctr32_body]
+ cmp rbx,r10
+ jb $L$common_seh_tail
+
+ mov rax,QWORD PTR[152+r8]
+
+ lea r10,QWORD PTR[$L$ctr32_ret]
+ cmp rbx,r10
+ jae $L$common_seh_tail
+
+ lea rsi,QWORD PTR[32+rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,20
+ DD 0a548f3fch
+ lea rax,QWORD PTR[200+rax]
+
+ jmp $L$common_seh_tail
+ctr32_se_handler ENDP
+
+
+ALIGN 16
+xts_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$common_seh_tail
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$common_seh_tail
+
+ lea rsi,QWORD PTR[96+rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,20
+ DD 0a548f3fch
+ lea rax,QWORD PTR[((104+160))+rax]
+
+ jmp $L$common_seh_tail
+xts_se_handler ENDP
+
+ALIGN 16
+cbc_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[152+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ lea r10,QWORD PTR[$L$cbc_decrypt]
+ cmp rbx,r10
+ jb $L$common_seh_tail
+
+ lea r10,QWORD PTR[$L$cbc_decrypt_body]
+ cmp rbx,r10
+ jb $L$restore_cbc_rax
+
+ lea r10,QWORD PTR[$L$cbc_ret]
+ cmp rbx,r10
+ jae $L$common_seh_tail
+
+ lea rsi,QWORD PTR[rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,8
+ DD 0a548f3fch
+ lea rax,QWORD PTR[88+rax]
+ jmp $L$common_seh_tail
+
+$L$restore_cbc_rax::
+ mov rax,QWORD PTR[120+r8]
+
+$L$common_seh_tail::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+cbc_se_handler ENDP
+
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$SEH_begin_aesni_ecb_encrypt
+ DD imagerel $L$SEH_end_aesni_ecb_encrypt
+ DD imagerel $L$SEH_info_ecb
+
+ DD imagerel $L$SEH_begin_aesni_ccm64_encrypt_blocks
+ DD imagerel $L$SEH_end_aesni_ccm64_encrypt_blocks
+ DD imagerel $L$SEH_info_ccm64_enc
+
+ DD imagerel $L$SEH_begin_aesni_ccm64_decrypt_blocks
+ DD imagerel $L$SEH_end_aesni_ccm64_decrypt_blocks
+ DD imagerel $L$SEH_info_ccm64_dec
+
+ DD imagerel $L$SEH_begin_aesni_ctr32_encrypt_blocks
+ DD imagerel $L$SEH_end_aesni_ctr32_encrypt_blocks
+ DD imagerel $L$SEH_info_ctr32
+
+ DD imagerel $L$SEH_begin_aesni_xts_encrypt
+ DD imagerel $L$SEH_end_aesni_xts_encrypt
+ DD imagerel $L$SEH_info_xts_enc
+
+ DD imagerel $L$SEH_begin_aesni_xts_decrypt
+ DD imagerel $L$SEH_end_aesni_xts_decrypt
+ DD imagerel $L$SEH_info_xts_dec
+ DD imagerel $L$SEH_begin_aesni_cbc_encrypt
+ DD imagerel $L$SEH_end_aesni_cbc_encrypt
+ DD imagerel $L$SEH_info_cbc
+
+ DD imagerel aesni_set_decrypt_key
+ DD imagerel $L$SEH_end_set_decrypt_key
+ DD imagerel $L$SEH_info_key
+
+ DD imagerel aesni_set_encrypt_key
+ DD imagerel $L$SEH_end_set_encrypt_key
+ DD imagerel $L$SEH_info_key
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$SEH_info_ecb::
+DB 9,0,0,0
+ DD imagerel ecb_se_handler
+$L$SEH_info_ccm64_enc::
+DB 9,0,0,0
+ DD imagerel ccm64_se_handler
+ DD imagerel $L$ccm64_enc_body,imagerel $L$ccm64_enc_ret
+$L$SEH_info_ccm64_dec::
+DB 9,0,0,0
+ DD imagerel ccm64_se_handler
+ DD imagerel $L$ccm64_dec_body,imagerel $L$ccm64_dec_ret
+$L$SEH_info_ctr32::
+DB 9,0,0,0
+ DD imagerel ctr32_se_handler
+$L$SEH_info_xts_enc::
+DB 9,0,0,0
+ DD imagerel xts_se_handler
+ DD imagerel $L$xts_enc_body,imagerel $L$xts_enc_epilogue
+$L$SEH_info_xts_dec::
+DB 9,0,0,0
+ DD imagerel xts_se_handler
+ DD imagerel $L$xts_dec_body,imagerel $L$xts_dec_epilogue
+$L$SEH_info_cbc::
+DB 9,0,0,0
+ DD imagerel cbc_se_handler
+$L$SEH_info_key::
+DB 001h,004h,001h,000h
+DB 004h,002h,000h,000h
+
+.xdata ENDS
+END
+
diff --git a/ext/libressl/crypto/aes/aesni-mingw64-x86_64.S b/ext/libressl/crypto/aes/aesni-mingw64-x86_64.S
new file mode 100644
index 0000000..0a82a10
--- /dev/null
+++ b/ext/libressl/crypto/aes/aesni-mingw64-x86_64.S
@@ -0,0 +1,3008 @@
+#include "x86_arch.h"
+.text
+.globl aesni_encrypt
+.def aesni_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+aesni_encrypt:
+ movups (%rcx),%xmm2
+ movl 240(%r8),%eax
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm2
+.Loop_enc1_1:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz .Loop_enc1_1
+ aesenclast %xmm1,%xmm2
+ movups %xmm2,(%rdx)
+ retq
+
+
+.globl aesni_decrypt
+.def aesni_decrypt; .scl 2; .type 32; .endef
+.p2align 4
+aesni_decrypt:
+ movups (%rcx),%xmm2
+ movl 240(%r8),%eax
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm2
+.Loop_dec1_2:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz .Loop_dec1_2
+ aesdeclast %xmm1,%xmm2
+ movups %xmm2,(%rdx)
+ retq
+
+.def _aesni_encrypt3; .scl 3; .type 32; .endef
+.p2align 4
+_aesni_encrypt3:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ movups (%rcx),%xmm0
+
+.Lenc_loop3:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop3
+
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ retq
+
+.def _aesni_decrypt3; .scl 3; .type 32; .endef
+.p2align 4
+_aesni_decrypt3:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ movups (%rcx),%xmm0
+
+.Ldec_loop3:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop3
+
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ retq
+
+.def _aesni_encrypt4; .scl 3; .type 32; .endef
+.p2align 4
+_aesni_encrypt4:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ xorps %xmm0,%xmm5
+ movups (%rcx),%xmm0
+
+.Lenc_loop4:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop4
+
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ aesenclast %xmm0,%xmm5
+ retq
+
+.def _aesni_decrypt4; .scl 3; .type 32; .endef
+.p2align 4
+_aesni_decrypt4:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ xorps %xmm0,%xmm5
+ movups (%rcx),%xmm0
+
+.Ldec_loop4:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop4
+
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ aesdeclast %xmm0,%xmm5
+ retq
+
+.def _aesni_encrypt6; .scl 3; .type 32; .endef
+.p2align 4
+_aesni_encrypt6:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ aesenc %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesenc %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesenc %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesenc %xmm1,%xmm6
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm7
+ jmp .Lenc_loop6_enter
+.p2align 4
+.Lenc_loop6:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+.Lenc_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop6
+
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ aesenclast %xmm0,%xmm5
+ aesenclast %xmm0,%xmm6
+ aesenclast %xmm0,%xmm7
+ retq
+
+.def _aesni_decrypt6; .scl 3; .type 32; .endef
+.p2align 4
+_aesni_decrypt6:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ aesdec %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesdec %xmm1,%xmm6
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm7
+ jmp .Ldec_loop6_enter
+.p2align 4
+.Ldec_loop6:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+.Ldec_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop6
+
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ aesdeclast %xmm0,%xmm5
+ aesdeclast %xmm0,%xmm6
+ aesdeclast %xmm0,%xmm7
+ retq
+
+.def _aesni_encrypt8; .scl 3; .type 32; .endef
+.p2align 4
+_aesni_encrypt8:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ aesenc %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesenc %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesenc %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesenc %xmm1,%xmm6
+ pxor %xmm0,%xmm8
+ aesenc %xmm1,%xmm7
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm8
+ aesenc %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+ jmp .Lenc_loop8_enter
+.p2align 4
+.Lenc_loop8:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ aesenc %xmm1,%xmm8
+ aesenc %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+.Lenc_loop8_enter:
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ aesenc %xmm0,%xmm8
+ aesenc %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop8
+
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ aesenc %xmm1,%xmm8
+ aesenc %xmm1,%xmm9
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ aesenclast %xmm0,%xmm5
+ aesenclast %xmm0,%xmm6
+ aesenclast %xmm0,%xmm7
+ aesenclast %xmm0,%xmm8
+ aesenclast %xmm0,%xmm9
+ retq
+
+.def _aesni_decrypt8; .scl 3; .type 32; .endef
+.p2align 4
+_aesni_decrypt8:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ aesdec %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesdec %xmm1,%xmm6
+ pxor %xmm0,%xmm8
+ aesdec %xmm1,%xmm7
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+ jmp .Ldec_loop8_enter
+.p2align 4
+.Ldec_loop8:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+.Ldec_loop8_enter:
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ aesdec %xmm0,%xmm8
+ aesdec %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop8
+
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ aesdeclast %xmm0,%xmm5
+ aesdeclast %xmm0,%xmm6
+ aesdeclast %xmm0,%xmm7
+ aesdeclast %xmm0,%xmm8
+ aesdeclast %xmm0,%xmm9
+ retq
+
+.globl aesni_ecb_encrypt
+.def aesni_ecb_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+aesni_ecb_encrypt:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_aesni_ecb_encrypt:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+
+ andq $-16,%rdx
+ jz .Lecb_ret
+
+ movl 240(%rcx),%eax
+ movups (%rcx),%xmm0
+ movq %rcx,%r11
+ movl %eax,%r10d
+ testl %r8d,%r8d
+ jz .Lecb_decrypt
+
+ cmpq $128,%rdx
+ jb .Lecb_enc_tail
+
+ movdqu (%rdi),%xmm2
+ movdqu 16(%rdi),%xmm3
+ movdqu 32(%rdi),%xmm4
+ movdqu 48(%rdi),%xmm5
+ movdqu 64(%rdi),%xmm6
+ movdqu 80(%rdi),%xmm7
+ movdqu 96(%rdi),%xmm8
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ subq $128,%rdx
+ jmp .Lecb_enc_loop8_enter
+.p2align 4
+.Lecb_enc_loop8:
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movdqu (%rdi),%xmm2
+ movl %r10d,%eax
+ movups %xmm3,16(%rsi)
+ movdqu 16(%rdi),%xmm3
+ movups %xmm4,32(%rsi)
+ movdqu 32(%rdi),%xmm4
+ movups %xmm5,48(%rsi)
+ movdqu 48(%rdi),%xmm5
+ movups %xmm6,64(%rsi)
+ movdqu 64(%rdi),%xmm6
+ movups %xmm7,80(%rsi)
+ movdqu 80(%rdi),%xmm7
+ movups %xmm8,96(%rsi)
+ movdqu 96(%rdi),%xmm8
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+.Lecb_enc_loop8_enter:
+
+ call _aesni_encrypt8
+
+ subq $128,%rdx
+ jnc .Lecb_enc_loop8
+
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movups %xmm3,16(%rsi)
+ movl %r10d,%eax
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ addq $128,%rdx
+ jz .Lecb_ret
+
+.Lecb_enc_tail:
+ movups (%rdi),%xmm2
+ cmpq $32,%rdx
+ jb .Lecb_enc_one
+ movups 16(%rdi),%xmm3
+ je .Lecb_enc_two
+ movups 32(%rdi),%xmm4
+ cmpq $64,%rdx
+ jb .Lecb_enc_three
+ movups 48(%rdi),%xmm5
+ je .Lecb_enc_four
+ movups 64(%rdi),%xmm6
+ cmpq $96,%rdx
+ jb .Lecb_enc_five
+ movups 80(%rdi),%xmm7
+ je .Lecb_enc_six
+ movdqu 96(%rdi),%xmm8
+ call _aesni_encrypt8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+.Lecb_enc_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_3:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_3
+ aesenclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+.Lecb_enc_two:
+ xorps %xmm4,%xmm4
+ call _aesni_encrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+.Lecb_enc_three:
+ call _aesni_encrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+.Lecb_enc_four:
+ call _aesni_encrypt4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+.Lecb_enc_five:
+ xorps %xmm7,%xmm7
+ call _aesni_encrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+.Lecb_enc_six:
+ call _aesni_encrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ jmp .Lecb_ret
+
+.p2align 4
+.Lecb_decrypt:
+ cmpq $128,%rdx
+ jb .Lecb_dec_tail
+
+ movdqu (%rdi),%xmm2
+ movdqu 16(%rdi),%xmm3
+ movdqu 32(%rdi),%xmm4
+ movdqu 48(%rdi),%xmm5
+ movdqu 64(%rdi),%xmm6
+ movdqu 80(%rdi),%xmm7
+ movdqu 96(%rdi),%xmm8
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ subq $128,%rdx
+ jmp .Lecb_dec_loop8_enter
+.p2align 4
+.Lecb_dec_loop8:
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movdqu (%rdi),%xmm2
+ movl %r10d,%eax
+ movups %xmm3,16(%rsi)
+ movdqu 16(%rdi),%xmm3
+ movups %xmm4,32(%rsi)
+ movdqu 32(%rdi),%xmm4
+ movups %xmm5,48(%rsi)
+ movdqu 48(%rdi),%xmm5
+ movups %xmm6,64(%rsi)
+ movdqu 64(%rdi),%xmm6
+ movups %xmm7,80(%rsi)
+ movdqu 80(%rdi),%xmm7
+ movups %xmm8,96(%rsi)
+ movdqu 96(%rdi),%xmm8
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+.Lecb_dec_loop8_enter:
+
+ call _aesni_decrypt8
+
+ movups (%r11),%xmm0
+ subq $128,%rdx
+ jnc .Lecb_dec_loop8
+
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movups %xmm3,16(%rsi)
+ movl %r10d,%eax
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ addq $128,%rdx
+ jz .Lecb_ret
+
+.Lecb_dec_tail:
+ movups (%rdi),%xmm2
+ cmpq $32,%rdx
+ jb .Lecb_dec_one
+ movups 16(%rdi),%xmm3
+ je .Lecb_dec_two
+ movups 32(%rdi),%xmm4
+ cmpq $64,%rdx
+ jb .Lecb_dec_three
+ movups 48(%rdi),%xmm5
+ je .Lecb_dec_four
+ movups 64(%rdi),%xmm6
+ cmpq $96,%rdx
+ jb .Lecb_dec_five
+ movups 80(%rdi),%xmm7
+ je .Lecb_dec_six
+ movups 96(%rdi),%xmm8
+ movups (%rcx),%xmm0
+ call _aesni_decrypt8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+.Lecb_dec_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_4:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_4
+ aesdeclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+.Lecb_dec_two:
+ xorps %xmm4,%xmm4
+ call _aesni_decrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+.Lecb_dec_three:
+ call _aesni_decrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+.Lecb_dec_four:
+ call _aesni_decrypt4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+.Lecb_dec_five:
+ xorps %xmm7,%xmm7
+ call _aesni_decrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+.Lecb_dec_six:
+ call _aesni_decrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+
+.Lecb_ret:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_aesni_ecb_encrypt:
+.globl aesni_ccm64_encrypt_blocks
+.def aesni_ccm64_encrypt_blocks; .scl 2; .type 32; .endef
+.p2align 4
+aesni_ccm64_encrypt_blocks:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_aesni_ccm64_encrypt_blocks:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+ movq 48(%rsp),%r9
+
+ leaq -88(%rsp),%rsp
+ movaps %xmm6,(%rsp)
+ movaps %xmm7,16(%rsp)
+ movaps %xmm8,32(%rsp)
+ movaps %xmm9,48(%rsp)
+.Lccm64_enc_body:
+ movl 240(%rcx),%eax
+ movdqu (%r8),%xmm9
+ movdqa .Lincrement64(%rip),%xmm6
+ movdqa .Lbswap_mask(%rip),%xmm7
+
+ shrl $1,%eax
+ leaq 0(%rcx),%r11
+ movdqu (%r9),%xmm3
+ movdqa %xmm9,%xmm2
+ movl %eax,%r10d
+.byte 102,68,15,56,0,207
+ jmp .Lccm64_enc_outer
+.p2align 4
+.Lccm64_enc_outer:
+ movups (%r11),%xmm0
+ movl %r10d,%eax
+ movups (%rdi),%xmm8
+
+ xorps %xmm0,%xmm2
+ movups 16(%r11),%xmm1
+ xorps %xmm8,%xmm0
+ leaq 32(%r11),%rcx
+ xorps %xmm0,%xmm3
+ movups (%rcx),%xmm0
+
+.Lccm64_enc2_loop:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ aesenc %xmm1,%xmm3
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm3
+ movups 0(%rcx),%xmm0
+ jnz .Lccm64_enc2_loop
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ paddq %xmm6,%xmm9
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+
+ decq %rdx
+ leaq 16(%rdi),%rdi
+ xorps %xmm2,%xmm8
+ movdqa %xmm9,%xmm2
+ movups %xmm8,(%rsi)
+ leaq 16(%rsi),%rsi
+.byte 102,15,56,0,215
+ jnz .Lccm64_enc_outer
+
+ movups %xmm3,(%r9)
+ movaps (%rsp),%xmm6
+ movaps 16(%rsp),%xmm7
+ movaps 32(%rsp),%xmm8
+ movaps 48(%rsp),%xmm9
+ leaq 88(%rsp),%rsp
+.Lccm64_enc_ret:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_aesni_ccm64_encrypt_blocks:
+.globl aesni_ccm64_decrypt_blocks
+.def aesni_ccm64_decrypt_blocks; .scl 2; .type 32; .endef
+.p2align 4
+aesni_ccm64_decrypt_blocks:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_aesni_ccm64_decrypt_blocks:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+ movq 48(%rsp),%r9
+
+ leaq -88(%rsp),%rsp
+ movaps %xmm6,(%rsp)
+ movaps %xmm7,16(%rsp)
+ movaps %xmm8,32(%rsp)
+ movaps %xmm9,48(%rsp)
+.Lccm64_dec_body:
+ movl 240(%rcx),%eax
+ movups (%r8),%xmm9
+ movdqu (%r9),%xmm3
+ movdqa .Lincrement64(%rip),%xmm6
+ movdqa .Lbswap_mask(%rip),%xmm7
+
+ movaps %xmm9,%xmm2
+ movl %eax,%r10d
+ movq %rcx,%r11
+.byte 102,68,15,56,0,207
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_5:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_5
+ aesenclast %xmm1,%xmm2
+ movups (%rdi),%xmm8
+ paddq %xmm6,%xmm9
+ leaq 16(%rdi),%rdi
+ jmp .Lccm64_dec_outer
+.p2align 4
+.Lccm64_dec_outer:
+ xorps %xmm2,%xmm8
+ movdqa %xmm9,%xmm2
+ movl %r10d,%eax
+ movups %xmm8,(%rsi)
+ leaq 16(%rsi),%rsi
+.byte 102,15,56,0,215
+
+ subq $1,%rdx
+ jz .Lccm64_dec_break
+
+ movups (%r11),%xmm0
+ shrl $1,%eax
+ movups 16(%r11),%xmm1
+ xorps %xmm0,%xmm8
+ leaq 32(%r11),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm8,%xmm3
+ movups (%rcx),%xmm0
+
+.Lccm64_dec2_loop:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ aesenc %xmm1,%xmm3
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm3
+ movups 0(%rcx),%xmm0
+ jnz .Lccm64_dec2_loop
+ movups (%rdi),%xmm8
+ paddq %xmm6,%xmm9
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ leaq 16(%rdi),%rdi
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ jmp .Lccm64_dec_outer
+
+.p2align 4
+.Lccm64_dec_break:
+
+ movups (%r11),%xmm0
+ movups 16(%r11),%xmm1
+ xorps %xmm0,%xmm8
+ leaq 32(%r11),%r11
+ xorps %xmm8,%xmm3
+.Loop_enc1_6:
+ aesenc %xmm1,%xmm3
+ decl %eax
+ movups (%r11),%xmm1
+ leaq 16(%r11),%r11
+ jnz .Loop_enc1_6
+ aesenclast %xmm1,%xmm3
+ movups %xmm3,(%r9)
+ movaps (%rsp),%xmm6
+ movaps 16(%rsp),%xmm7
+ movaps 32(%rsp),%xmm8
+ movaps 48(%rsp),%xmm9
+ leaq 88(%rsp),%rsp
+.Lccm64_dec_ret:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_aesni_ccm64_decrypt_blocks:
+.globl aesni_ctr32_encrypt_blocks
+.def aesni_ctr32_encrypt_blocks; .scl 2; .type 32; .endef
+.p2align 4
+aesni_ctr32_encrypt_blocks:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_aesni_ctr32_encrypt_blocks:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+
+ leaq -200(%rsp),%rsp
+ movaps %xmm6,32(%rsp)
+ movaps %xmm7,48(%rsp)
+ movaps %xmm8,64(%rsp)
+ movaps %xmm9,80(%rsp)
+ movaps %xmm10,96(%rsp)
+ movaps %xmm11,112(%rsp)
+ movaps %xmm12,128(%rsp)
+ movaps %xmm13,144(%rsp)
+ movaps %xmm14,160(%rsp)
+ movaps %xmm15,176(%rsp)
+.Lctr32_body:
+ cmpq $1,%rdx
+ je .Lctr32_one_shortcut
+
+ movdqu (%r8),%xmm14
+ movdqa .Lbswap_mask(%rip),%xmm15
+ xorl %eax,%eax
+.byte 102,69,15,58,22,242,3
+.byte 102,68,15,58,34,240,3
+
+ movl 240(%rcx),%eax
+ bswapl %r10d
+ pxor %xmm12,%xmm12
+ pxor %xmm13,%xmm13
+.byte 102,69,15,58,34,226,0
+ leaq 3(%r10),%r11
+.byte 102,69,15,58,34,235,0
+ incl %r10d
+.byte 102,69,15,58,34,226,1
+ incq %r11
+.byte 102,69,15,58,34,235,1
+ incl %r10d
+.byte 102,69,15,58,34,226,2
+ incq %r11
+.byte 102,69,15,58,34,235,2
+ movdqa %xmm12,0(%rsp)
+.byte 102,69,15,56,0,231
+ movdqa %xmm13,16(%rsp)
+.byte 102,69,15,56,0,239
+
+ pshufd $192,%xmm12,%xmm2
+ pshufd $128,%xmm12,%xmm3
+ pshufd $64,%xmm12,%xmm4
+ cmpq $6,%rdx
+ jb .Lctr32_tail
+ shrl $1,%eax
+ movq %rcx,%r11
+ movl %eax,%r10d
+ subq $6,%rdx
+ jmp .Lctr32_loop6
+
+.p2align 4
+.Lctr32_loop6:
+ pshufd $192,%xmm13,%xmm5
+ por %xmm14,%xmm2
+ movups (%r11),%xmm0
+ pshufd $128,%xmm13,%xmm6
+ por %xmm14,%xmm3
+ movups 16(%r11),%xmm1
+ pshufd $64,%xmm13,%xmm7
+ por %xmm14,%xmm4
+ por %xmm14,%xmm5
+ xorps %xmm0,%xmm2
+ por %xmm14,%xmm6
+ por %xmm14,%xmm7
+
+
+
+
+ pxor %xmm0,%xmm3
+ aesenc %xmm1,%xmm2
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ aesenc %xmm1,%xmm3
+ movdqa .Lincrement32(%rip),%xmm13
+ pxor %xmm0,%xmm5
+ aesenc %xmm1,%xmm4
+ movdqa 0(%rsp),%xmm12
+ pxor %xmm0,%xmm6
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ jmp .Lctr32_enc_loop6_enter
+.p2align 4
+.Lctr32_enc_loop6:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+.Lctr32_enc_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Lctr32_enc_loop6
+
+ aesenc %xmm1,%xmm2
+ paddd %xmm13,%xmm12
+ aesenc %xmm1,%xmm3
+ paddd 16(%rsp),%xmm13
+ aesenc %xmm1,%xmm4
+ movdqa %xmm12,0(%rsp)
+ aesenc %xmm1,%xmm5
+ movdqa %xmm13,16(%rsp)
+ aesenc %xmm1,%xmm6
+.byte 102,69,15,56,0,231
+ aesenc %xmm1,%xmm7
+.byte 102,69,15,56,0,239
+
+ aesenclast %xmm0,%xmm2
+ movups (%rdi),%xmm8
+ aesenclast %xmm0,%xmm3
+ movups 16(%rdi),%xmm9
+ aesenclast %xmm0,%xmm4
+ movups 32(%rdi),%xmm10
+ aesenclast %xmm0,%xmm5
+ movups 48(%rdi),%xmm11
+ aesenclast %xmm0,%xmm6
+ movups 64(%rdi),%xmm1
+ aesenclast %xmm0,%xmm7
+ movups 80(%rdi),%xmm0
+ leaq 96(%rdi),%rdi
+
+ xorps %xmm2,%xmm8
+ pshufd $192,%xmm12,%xmm2
+ xorps %xmm3,%xmm9
+ pshufd $128,%xmm12,%xmm3
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ pshufd $64,%xmm12,%xmm4
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ xorps %xmm6,%xmm1
+ movups %xmm11,48(%rsi)
+ xorps %xmm7,%xmm0
+ movups %xmm1,64(%rsi)
+ movups %xmm0,80(%rsi)
+ leaq 96(%rsi),%rsi
+ movl %r10d,%eax
+ subq $6,%rdx
+ jnc .Lctr32_loop6
+
+ addq $6,%rdx
+ jz .Lctr32_done
+ movq %r11,%rcx
+ leal 1(%rax,%rax,1),%eax
+
+.Lctr32_tail:
+ por %xmm14,%xmm2
+ movups (%rdi),%xmm8
+ cmpq $2,%rdx
+ jb .Lctr32_one
+
+ por %xmm14,%xmm3
+ movups 16(%rdi),%xmm9
+ je .Lctr32_two
+
+ pshufd $192,%xmm13,%xmm5
+ por %xmm14,%xmm4
+ movups 32(%rdi),%xmm10
+ cmpq $4,%rdx
+ jb .Lctr32_three
+
+ pshufd $128,%xmm13,%xmm6
+ por %xmm14,%xmm5
+ movups 48(%rdi),%xmm11
+ je .Lctr32_four
+
+ por %xmm14,%xmm6
+ xorps %xmm7,%xmm7
+
+ call _aesni_encrypt6
+
+ movups 64(%rdi),%xmm1
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ xorps %xmm6,%xmm1
+ movups %xmm11,48(%rsi)
+ movups %xmm1,64(%rsi)
+ jmp .Lctr32_done
+
+.p2align 4
+.Lctr32_one_shortcut:
+ movups (%r8),%xmm2
+ movups (%rdi),%xmm8
+ movl 240(%rcx),%eax
+.Lctr32_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_7:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_7
+ aesenclast %xmm1,%xmm2
+ xorps %xmm2,%xmm8
+ movups %xmm8,(%rsi)
+ jmp .Lctr32_done
+
+.p2align 4
+.Lctr32_two:
+ xorps %xmm4,%xmm4
+ call _aesni_encrypt3
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ movups %xmm9,16(%rsi)
+ jmp .Lctr32_done
+
+.p2align 4
+.Lctr32_three:
+ call _aesni_encrypt3
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ movups %xmm10,32(%rsi)
+ jmp .Lctr32_done
+
+.p2align 4
+.Lctr32_four:
+ call _aesni_encrypt4
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ movups %xmm11,48(%rsi)
+
+.Lctr32_done:
+ movaps 32(%rsp),%xmm6
+ movaps 48(%rsp),%xmm7
+ movaps 64(%rsp),%xmm8
+ movaps 80(%rsp),%xmm9
+ movaps 96(%rsp),%xmm10
+ movaps 112(%rsp),%xmm11
+ movaps 128(%rsp),%xmm12
+ movaps 144(%rsp),%xmm13
+ movaps 160(%rsp),%xmm14
+ movaps 176(%rsp),%xmm15
+ leaq 200(%rsp),%rsp
+.Lctr32_ret:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_aesni_ctr32_encrypt_blocks:
+.globl aesni_xts_encrypt
+.def aesni_xts_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+aesni_xts_encrypt:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_aesni_xts_encrypt:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+ movq 48(%rsp),%r9
+
+ leaq -264(%rsp),%rsp
+ movaps %xmm6,96(%rsp)
+ movaps %xmm7,112(%rsp)
+ movaps %xmm8,128(%rsp)
+ movaps %xmm9,144(%rsp)
+ movaps %xmm10,160(%rsp)
+ movaps %xmm11,176(%rsp)
+ movaps %xmm12,192(%rsp)
+ movaps %xmm13,208(%rsp)
+ movaps %xmm14,224(%rsp)
+ movaps %xmm15,240(%rsp)
+.Lxts_enc_body:
+ movups (%r9),%xmm15
+ movl 240(%r8),%eax
+ movl 240(%rcx),%r10d
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm15
+.Loop_enc1_8:
+ aesenc %xmm1,%xmm15
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz .Loop_enc1_8
+ aesenclast %xmm1,%xmm15
+ movq %rcx,%r11
+ movl %r10d,%eax
+ movq %rdx,%r9
+ andq $-16,%rdx
+
+ movdqa .Lxts_magic(%rip),%xmm8
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ subq $96,%rdx
+ jc .Lxts_enc_short
+
+ shrl $1,%eax
+ subl $1,%eax
+ movl %eax,%r10d
+ jmp .Lxts_enc_grandloop
+
+.p2align 4
+.Lxts_enc_grandloop:
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu 0(%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ pxor %xmm12,%xmm4
+ movdqu 80(%rdi),%xmm7
+ leaq 96(%rdi),%rdi
+ pxor %xmm13,%xmm5
+ movups (%r11),%xmm0
+ pxor %xmm14,%xmm6
+ pxor %xmm15,%xmm7
+
+
+
+ movups 16(%r11),%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movdqa %xmm10,0(%rsp)
+ aesenc %xmm1,%xmm2
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ movdqa %xmm11,16(%rsp)
+ aesenc %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ movdqa %xmm12,32(%rsp)
+ aesenc %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ movdqa %xmm13,48(%rsp)
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ movdqa %xmm14,64(%rsp)
+ aesenc %xmm1,%xmm6
+ movdqa %xmm15,80(%rsp)
+ aesenc %xmm1,%xmm7
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ jmp .Lxts_enc_loop6_enter
+
+.p2align 4
+.Lxts_enc_loop6:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+.Lxts_enc_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Lxts_enc_loop6
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ paddq %xmm15,%xmm15
+ aesenc %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesenc %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenc %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ aesenc %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesenc %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenc %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups 32(%rcx),%xmm0
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ aesenc %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesenc %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenc %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ aesenclast %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesenclast %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenclast %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesenclast %xmm0,%xmm5
+ aesenclast %xmm0,%xmm6
+ aesenclast %xmm0,%xmm7
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ xorps 0(%rsp),%xmm2
+ pand %xmm8,%xmm9
+ xorps 16(%rsp),%xmm3
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+
+ xorps 32(%rsp),%xmm4
+ movups %xmm2,0(%rsi)
+ xorps 48(%rsp),%xmm5
+ movups %xmm3,16(%rsi)
+ xorps 64(%rsp),%xmm6
+ movups %xmm4,32(%rsi)
+ xorps 80(%rsp),%xmm7
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ subq $96,%rdx
+ jnc .Lxts_enc_grandloop
+
+ leal 3(%rax,%rax,1),%eax
+ movq %r11,%rcx
+ movl %eax,%r10d
+
+.Lxts_enc_short:
+ addq $96,%rdx
+ jz .Lxts_enc_done
+
+ cmpq $32,%rdx
+ jb .Lxts_enc_one
+ je .Lxts_enc_two
+
+ cmpq $64,%rdx
+ jb .Lxts_enc_three
+ je .Lxts_enc_four
+
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ leaq 80(%rdi),%rdi
+ pxor %xmm12,%xmm4
+ pxor %xmm13,%xmm5
+ pxor %xmm14,%xmm6
+
+ call _aesni_encrypt6
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm15,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movdqu %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movdqu %xmm3,16(%rsi)
+ xorps %xmm14,%xmm6
+ movdqu %xmm4,32(%rsi)
+ movdqu %xmm5,48(%rsi)
+ movdqu %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.p2align 4
+.Lxts_enc_one:
+ movups (%rdi),%xmm2
+ leaq 16(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_9:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_9
+ aesenclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movdqa %xmm11,%xmm10
+ movups %xmm2,(%rsi)
+ leaq 16(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.p2align 4
+.Lxts_enc_two:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ leaq 32(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+
+ call _aesni_encrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm12,%xmm10
+ xorps %xmm11,%xmm3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ leaq 32(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.p2align 4
+.Lxts_enc_three:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ leaq 48(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+
+ call _aesni_encrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm13,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ leaq 48(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.p2align 4
+.Lxts_enc_four:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ xorps %xmm10,%xmm2
+ movups 48(%rdi),%xmm5
+ leaq 64(%rdi),%rdi
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ xorps %xmm13,%xmm5
+
+ call _aesni_encrypt4
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm15,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.p2align 4
+.Lxts_enc_done:
+ andq $15,%r9
+ jz .Lxts_enc_ret
+ movq %r9,%rdx
+
+.Lxts_enc_steal:
+ movzbl (%rdi),%eax
+ movzbl -16(%rsi),%ecx
+ leaq 1(%rdi),%rdi
+ movb %al,-16(%rsi)
+ movb %cl,0(%rsi)
+ leaq 1(%rsi),%rsi
+ subq $1,%rdx
+ jnz .Lxts_enc_steal
+
+ subq %r9,%rsi
+ movq %r11,%rcx
+ movl %r10d,%eax
+
+ movups -16(%rsi),%xmm2
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_10:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_10
+ aesenclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movups %xmm2,-16(%rsi)
+
+.Lxts_enc_ret:
+ movaps 96(%rsp),%xmm6
+ movaps 112(%rsp),%xmm7
+ movaps 128(%rsp),%xmm8
+ movaps 144(%rsp),%xmm9
+ movaps 160(%rsp),%xmm10
+ movaps 176(%rsp),%xmm11
+ movaps 192(%rsp),%xmm12
+ movaps 208(%rsp),%xmm13
+ movaps 224(%rsp),%xmm14
+ movaps 240(%rsp),%xmm15
+ leaq 264(%rsp),%rsp
+.Lxts_enc_epilogue:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_aesni_xts_encrypt:
+.globl aesni_xts_decrypt
+.def aesni_xts_decrypt; .scl 2; .type 32; .endef
+.p2align 4
+aesni_xts_decrypt:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_aesni_xts_decrypt:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+ movq 48(%rsp),%r9
+
+ leaq -264(%rsp),%rsp
+ movaps %xmm6,96(%rsp)
+ movaps %xmm7,112(%rsp)
+ movaps %xmm8,128(%rsp)
+ movaps %xmm9,144(%rsp)
+ movaps %xmm10,160(%rsp)
+ movaps %xmm11,176(%rsp)
+ movaps %xmm12,192(%rsp)
+ movaps %xmm13,208(%rsp)
+ movaps %xmm14,224(%rsp)
+ movaps %xmm15,240(%rsp)
+.Lxts_dec_body:
+ movups (%r9),%xmm15
+ movl 240(%r8),%eax
+ movl 240(%rcx),%r10d
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm15
+.Loop_enc1_11:
+ aesenc %xmm1,%xmm15
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz .Loop_enc1_11
+ aesenclast %xmm1,%xmm15
+ xorl %eax,%eax
+ testq $15,%rdx
+ setnz %al
+ shlq $4,%rax
+ subq %rax,%rdx
+
+ movq %rcx,%r11
+ movl %r10d,%eax
+ movq %rdx,%r9
+ andq $-16,%rdx
+
+ movdqa .Lxts_magic(%rip),%xmm8
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ subq $96,%rdx
+ jc .Lxts_dec_short
+
+ shrl $1,%eax
+ subl $1,%eax
+ movl %eax,%r10d
+ jmp .Lxts_dec_grandloop
+
+.p2align 4
+.Lxts_dec_grandloop:
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu 0(%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ pxor %xmm12,%xmm4
+ movdqu 80(%rdi),%xmm7
+ leaq 96(%rdi),%rdi
+ pxor %xmm13,%xmm5
+ movups (%r11),%xmm0
+ pxor %xmm14,%xmm6
+ pxor %xmm15,%xmm7
+
+
+
+ movups 16(%r11),%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movdqa %xmm10,0(%rsp)
+ aesdec %xmm1,%xmm2
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ movdqa %xmm11,16(%rsp)
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ movdqa %xmm12,32(%rsp)
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ movdqa %xmm13,48(%rsp)
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ movdqa %xmm14,64(%rsp)
+ aesdec %xmm1,%xmm6
+ movdqa %xmm15,80(%rsp)
+ aesdec %xmm1,%xmm7
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ jmp .Lxts_dec_loop6_enter
+
+.p2align 4
+.Lxts_dec_loop6:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+.Lxts_dec_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Lxts_dec_loop6
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ paddq %xmm15,%xmm15
+ aesdec %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesdec %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdec %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ aesdec %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesdec %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdec %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ movups 32(%rcx),%xmm0
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ aesdec %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesdec %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdec %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ aesdeclast %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesdeclast %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdeclast %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesdeclast %xmm0,%xmm5
+ aesdeclast %xmm0,%xmm6
+ aesdeclast %xmm0,%xmm7
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ xorps 0(%rsp),%xmm2
+ pand %xmm8,%xmm9
+ xorps 16(%rsp),%xmm3
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+
+ xorps 32(%rsp),%xmm4
+ movups %xmm2,0(%rsi)
+ xorps 48(%rsp),%xmm5
+ movups %xmm3,16(%rsi)
+ xorps 64(%rsp),%xmm6
+ movups %xmm4,32(%rsi)
+ xorps 80(%rsp),%xmm7
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ subq $96,%rdx
+ jnc .Lxts_dec_grandloop
+
+ leal 3(%rax,%rax,1),%eax
+ movq %r11,%rcx
+ movl %eax,%r10d
+
+.Lxts_dec_short:
+ addq $96,%rdx
+ jz .Lxts_dec_done
+
+ cmpq $32,%rdx
+ jb .Lxts_dec_one
+ je .Lxts_dec_two
+
+ cmpq $64,%rdx
+ jb .Lxts_dec_three
+ je .Lxts_dec_four
+
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ leaq 80(%rdi),%rdi
+ pxor %xmm12,%xmm4
+ pxor %xmm13,%xmm5
+ pxor %xmm14,%xmm6
+
+ call _aesni_decrypt6
+
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movdqu %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movdqu %xmm3,16(%rsi)
+ xorps %xmm14,%xmm6
+ movdqu %xmm4,32(%rsi)
+ pxor %xmm14,%xmm14
+ movdqu %xmm5,48(%rsi)
+ pcmpgtd %xmm15,%xmm14
+ movdqu %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ pshufd $19,%xmm14,%xmm11
+ andq $15,%r9
+ jz .Lxts_dec_ret
+
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm11
+ pxor %xmm15,%xmm11
+ jmp .Lxts_dec_done2
+
+.p2align 4
+.Lxts_dec_one:
+ movups (%rdi),%xmm2
+ leaq 16(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_12:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_12
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movdqa %xmm11,%xmm10
+ movups %xmm2,(%rsi)
+ movdqa %xmm12,%xmm11
+ leaq 16(%rsi),%rsi
+ jmp .Lxts_dec_done
+
+.p2align 4
+.Lxts_dec_two:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ leaq 32(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+
+ call _aesni_decrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm12,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm13,%xmm11
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ leaq 32(%rsi),%rsi
+ jmp .Lxts_dec_done
+
+.p2align 4
+.Lxts_dec_three:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ leaq 48(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+
+ call _aesni_decrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm13,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm15,%xmm11
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ leaq 48(%rsi),%rsi
+ jmp .Lxts_dec_done
+
+.p2align 4
+.Lxts_dec_four:
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movups (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movups 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movups 32(%rdi),%xmm4
+ xorps %xmm10,%xmm2
+ movups 48(%rdi),%xmm5
+ leaq 64(%rdi),%rdi
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ xorps %xmm13,%xmm5
+
+ call _aesni_decrypt4
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm14,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm15,%xmm11
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ jmp .Lxts_dec_done
+
+.p2align 4
+.Lxts_dec_done:
+ andq $15,%r9
+ jz .Lxts_dec_ret
+.Lxts_dec_done2:
+ movq %r9,%rdx
+ movq %r11,%rcx
+ movl %r10d,%eax
+
+ movups (%rdi),%xmm2
+ xorps %xmm11,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_13:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_13
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm11,%xmm2
+ movups %xmm2,(%rsi)
+
+.Lxts_dec_steal:
+ movzbl 16(%rdi),%eax
+ movzbl (%rsi),%ecx
+ leaq 1(%rdi),%rdi
+ movb %al,(%rsi)
+ movb %cl,16(%rsi)
+ leaq 1(%rsi),%rsi
+ subq $1,%rdx
+ jnz .Lxts_dec_steal
+
+ subq %r9,%rsi
+ movq %r11,%rcx
+ movl %r10d,%eax
+
+ movups (%rsi),%xmm2
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_14:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_14
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movups %xmm2,(%rsi)
+
+.Lxts_dec_ret:
+ movaps 96(%rsp),%xmm6
+ movaps 112(%rsp),%xmm7
+ movaps 128(%rsp),%xmm8
+ movaps 144(%rsp),%xmm9
+ movaps 160(%rsp),%xmm10
+ movaps 176(%rsp),%xmm11
+ movaps 192(%rsp),%xmm12
+ movaps 208(%rsp),%xmm13
+ movaps 224(%rsp),%xmm14
+ movaps 240(%rsp),%xmm15
+ leaq 264(%rsp),%rsp
+.Lxts_dec_epilogue:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_aesni_xts_decrypt:
+.globl aesni_cbc_encrypt
+.def aesni_cbc_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+aesni_cbc_encrypt:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_aesni_cbc_encrypt:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+ movq 48(%rsp),%r9
+
+ testq %rdx,%rdx
+ jz .Lcbc_ret
+
+ movl 240(%rcx),%r10d
+ movq %rcx,%r11
+ testl %r9d,%r9d
+ jz .Lcbc_decrypt
+
+ movups (%r8),%xmm2
+ movl %r10d,%eax
+ cmpq $16,%rdx
+ jb .Lcbc_enc_tail
+ subq $16,%rdx
+ jmp .Lcbc_enc_loop
+.p2align 4
+.Lcbc_enc_loop:
+ movups (%rdi),%xmm3
+ leaq 16(%rdi),%rdi
+
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ xorps %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ xorps %xmm3,%xmm2
+.Loop_enc1_15:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_15
+ aesenclast %xmm1,%xmm2
+ movl %r10d,%eax
+ movq %r11,%rcx
+ movups %xmm2,0(%rsi)
+ leaq 16(%rsi),%rsi
+ subq $16,%rdx
+ jnc .Lcbc_enc_loop
+ addq $16,%rdx
+ jnz .Lcbc_enc_tail
+ movups %xmm2,(%r8)
+ jmp .Lcbc_ret
+
+.Lcbc_enc_tail:
+ movq %rdx,%rcx
+ xchgq %rdi,%rsi
+.long 0x9066A4F3
+ movl $16,%ecx
+ subq %rdx,%rcx
+ xorl %eax,%eax
+.long 0x9066AAF3
+ leaq -16(%rdi),%rdi
+ movl %r10d,%eax
+ movq %rdi,%rsi
+ movq %r11,%rcx
+ xorq %rdx,%rdx
+ jmp .Lcbc_enc_loop
+
+.p2align 4
+.Lcbc_decrypt:
+ leaq -88(%rsp),%rsp
+ movaps %xmm6,(%rsp)
+ movaps %xmm7,16(%rsp)
+ movaps %xmm8,32(%rsp)
+ movaps %xmm9,48(%rsp)
+.Lcbc_decrypt_body:
+ movups (%r8),%xmm9
+ movl %r10d,%eax
+ cmpq $112,%rdx
+ jbe .Lcbc_dec_tail
+ shrl $1,%r10d
+ subq $112,%rdx
+ movl %r10d,%eax
+ movaps %xmm9,64(%rsp)
+ jmp .Lcbc_dec_loop8_enter
+.p2align 4
+.Lcbc_dec_loop8:
+ movaps %xmm0,64(%rsp)
+ movups %xmm9,(%rsi)
+ leaq 16(%rsi),%rsi
+.Lcbc_dec_loop8_enter:
+ movups (%rcx),%xmm0
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 16(%rcx),%xmm1
+
+ leaq 32(%rcx),%rcx
+ movdqu 32(%rdi),%xmm4
+ xorps %xmm0,%xmm2
+ movdqu 48(%rdi),%xmm5
+ xorps %xmm0,%xmm3
+ movdqu 64(%rdi),%xmm6
+ aesdec %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ movdqu 80(%rdi),%xmm7
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ movdqu 96(%rdi),%xmm8
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ movdqu 112(%rdi),%xmm9
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesdec %xmm1,%xmm6
+ pxor %xmm0,%xmm8
+ aesdec %xmm1,%xmm7
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+
+ call .Ldec_loop8_enter
+
+ movups (%rdi),%xmm1
+ movups 16(%rdi),%xmm0
+ xorps 64(%rsp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%rdi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%rdi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%rdi),%xmm1
+ xorps %xmm0,%xmm6
+ movups 80(%rdi),%xmm0
+ xorps %xmm1,%xmm7
+ movups 96(%rdi),%xmm1
+ xorps %xmm0,%xmm8
+ movups 112(%rdi),%xmm0
+ xorps %xmm1,%xmm9
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movq %r11,%rcx
+ movups %xmm7,80(%rsi)
+ leaq 128(%rdi),%rdi
+ movups %xmm8,96(%rsi)
+ leaq 112(%rsi),%rsi
+ subq $128,%rdx
+ ja .Lcbc_dec_loop8
+
+ movaps %xmm9,%xmm2
+ movaps %xmm0,%xmm9
+ addq $112,%rdx
+ jle .Lcbc_dec_tail_collected
+ movups %xmm2,(%rsi)
+ leal 1(%r10,%r10,1),%eax
+ leaq 16(%rsi),%rsi
+.Lcbc_dec_tail:
+ movups (%rdi),%xmm2
+ movaps %xmm2,%xmm8
+ cmpq $16,%rdx
+ jbe .Lcbc_dec_one
+
+ movups 16(%rdi),%xmm3
+ movaps %xmm3,%xmm7
+ cmpq $32,%rdx
+ jbe .Lcbc_dec_two
+
+ movups 32(%rdi),%xmm4
+ movaps %xmm4,%xmm6
+ cmpq $48,%rdx
+ jbe .Lcbc_dec_three
+
+ movups 48(%rdi),%xmm5
+ cmpq $64,%rdx
+ jbe .Lcbc_dec_four
+
+ movups 64(%rdi),%xmm6
+ cmpq $80,%rdx
+ jbe .Lcbc_dec_five
+
+ movups 80(%rdi),%xmm7
+ cmpq $96,%rdx
+ jbe .Lcbc_dec_six
+
+ movups 96(%rdi),%xmm8
+ movaps %xmm9,64(%rsp)
+ call _aesni_decrypt8
+ movups (%rdi),%xmm1
+ movups 16(%rdi),%xmm0
+ xorps 64(%rsp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%rdi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%rdi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%rdi),%xmm1
+ xorps %xmm0,%xmm6
+ movups 80(%rdi),%xmm0
+ xorps %xmm1,%xmm7
+ movups 96(%rdi),%xmm9
+ xorps %xmm0,%xmm8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ movaps %xmm8,%xmm2
+ subq $112,%rdx
+ jmp .Lcbc_dec_tail_collected
+.p2align 4
+.Lcbc_dec_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_16:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_16
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm9,%xmm2
+ movaps %xmm8,%xmm9
+ subq $16,%rdx
+ jmp .Lcbc_dec_tail_collected
+.p2align 4
+.Lcbc_dec_two:
+ xorps %xmm4,%xmm4
+ call _aesni_decrypt3
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ movaps %xmm7,%xmm9
+ movaps %xmm3,%xmm2
+ leaq 16(%rsi),%rsi
+ subq $32,%rdx
+ jmp .Lcbc_dec_tail_collected
+.p2align 4
+.Lcbc_dec_three:
+ call _aesni_decrypt3
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ xorps %xmm7,%xmm4
+ movups %xmm3,16(%rsi)
+ movaps %xmm6,%xmm9
+ movaps %xmm4,%xmm2
+ leaq 32(%rsi),%rsi
+ subq $48,%rdx
+ jmp .Lcbc_dec_tail_collected
+.p2align 4
+.Lcbc_dec_four:
+ call _aesni_decrypt4
+ xorps %xmm9,%xmm2
+ movups 48(%rdi),%xmm9
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ xorps %xmm7,%xmm4
+ movups %xmm3,16(%rsi)
+ xorps %xmm6,%xmm5
+ movups %xmm4,32(%rsi)
+ movaps %xmm5,%xmm2
+ leaq 48(%rsi),%rsi
+ subq $64,%rdx
+ jmp .Lcbc_dec_tail_collected
+.p2align 4
+.Lcbc_dec_five:
+ xorps %xmm7,%xmm7
+ call _aesni_decrypt6
+ movups 16(%rdi),%xmm1
+ movups 32(%rdi),%xmm0
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ xorps %xmm1,%xmm4
+ movups 48(%rdi),%xmm1
+ xorps %xmm0,%xmm5
+ movups 64(%rdi),%xmm9
+ xorps %xmm1,%xmm6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ movaps %xmm6,%xmm2
+ subq $80,%rdx
+ jmp .Lcbc_dec_tail_collected
+.p2align 4
+.Lcbc_dec_six:
+ call _aesni_decrypt6
+ movups 16(%rdi),%xmm1
+ movups 32(%rdi),%xmm0
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ xorps %xmm1,%xmm4
+ movups 48(%rdi),%xmm1
+ xorps %xmm0,%xmm5
+ movups 64(%rdi),%xmm0
+ xorps %xmm1,%xmm6
+ movups 80(%rdi),%xmm9
+ xorps %xmm0,%xmm7
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ movaps %xmm7,%xmm2
+ subq $96,%rdx
+ jmp .Lcbc_dec_tail_collected
+.p2align 4
+.Lcbc_dec_tail_collected:
+ andq $15,%rdx
+ movups %xmm9,(%r8)
+ jnz .Lcbc_dec_tail_partial
+ movups %xmm2,(%rsi)
+ jmp .Lcbc_dec_ret
+.p2align 4
+.Lcbc_dec_tail_partial:
+ movaps %xmm2,64(%rsp)
+ movq $16,%rcx
+ movq %rsi,%rdi
+ subq %rdx,%rcx
+ leaq 64(%rsp),%rsi
+.long 0x9066A4F3
+
+.Lcbc_dec_ret:
+ movaps (%rsp),%xmm6
+ movaps 16(%rsp),%xmm7
+ movaps 32(%rsp),%xmm8
+ movaps 48(%rsp),%xmm9
+ leaq 88(%rsp),%rsp
+.Lcbc_ret:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_aesni_cbc_encrypt:
+.globl aesni_set_decrypt_key
+.def aesni_set_decrypt_key; .scl 2; .type 32; .endef
+.p2align 4
+aesni_set_decrypt_key:
+ subq $8,%rsp
+ call __aesni_set_encrypt_key
+ shll $4,%edx
+ testl %eax,%eax
+ jnz .Ldec_key_ret
+ leaq 16(%r8,%rdx,1),%rcx
+
+ movups (%r8),%xmm0
+ movups (%rcx),%xmm1
+ movups %xmm0,(%rcx)
+ movups %xmm1,(%r8)
+ leaq 16(%r8),%r8
+ leaq -16(%rcx),%rcx
+
+.Ldec_key_inverse:
+ movups (%r8),%xmm0
+ movups (%rcx),%xmm1
+ aesimc %xmm0,%xmm0
+ aesimc %xmm1,%xmm1
+ leaq 16(%r8),%r8
+ leaq -16(%rcx),%rcx
+ movups %xmm0,16(%rcx)
+ movups %xmm1,-16(%r8)
+ cmpq %r8,%rcx
+ ja .Ldec_key_inverse
+
+ movups (%r8),%xmm0
+ aesimc %xmm0,%xmm0
+ movups %xmm0,(%rcx)
+.Ldec_key_ret:
+ addq $8,%rsp
+ retq
+.LSEH_end_set_decrypt_key:
+
+.globl aesni_set_encrypt_key
+.def aesni_set_encrypt_key; .scl 2; .type 32; .endef
+.p2align 4
+aesni_set_encrypt_key:
+__aesni_set_encrypt_key:
+ subq $8,%rsp
+ movq $-1,%rax
+ testq %rcx,%rcx
+ jz .Lenc_key_ret
+ testq %r8,%r8
+ jz .Lenc_key_ret
+
+ movups (%rcx),%xmm0
+ xorps %xmm4,%xmm4
+ leaq 16(%r8),%rax
+ cmpl $256,%edx
+ je .L14rounds
+ cmpl $192,%edx
+ je .L12rounds
+ cmpl $128,%edx
+ jne .Lbad_keybits
+
+.L10rounds:
+ movl $9,%edx
+ movups %xmm0,(%r8)
+ aeskeygenassist $1,%xmm0,%xmm1
+ call .Lkey_expansion_128_cold
+ aeskeygenassist $2,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $4,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $8,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $16,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $32,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $64,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $128,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $27,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $54,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ movups %xmm0,(%rax)
+ movl %edx,80(%rax)
+ xorl %eax,%eax
+ jmp .Lenc_key_ret
+
+.p2align 4
+.L12rounds:
+ movq 16(%rcx),%xmm2
+ movl $11,%edx
+ movups %xmm0,(%r8)
+ aeskeygenassist $1,%xmm2,%xmm1
+ call .Lkey_expansion_192a_cold
+ aeskeygenassist $2,%xmm2,%xmm1
+ call .Lkey_expansion_192b
+ aeskeygenassist $4,%xmm2,%xmm1
+ call .Lkey_expansion_192a
+ aeskeygenassist $8,%xmm2,%xmm1
+ call .Lkey_expansion_192b
+ aeskeygenassist $16,%xmm2,%xmm1
+ call .Lkey_expansion_192a
+ aeskeygenassist $32,%xmm2,%xmm1
+ call .Lkey_expansion_192b
+ aeskeygenassist $64,%xmm2,%xmm1
+ call .Lkey_expansion_192a
+ aeskeygenassist $128,%xmm2,%xmm1
+ call .Lkey_expansion_192b
+ movups %xmm0,(%rax)
+ movl %edx,48(%rax)
+ xorq %rax,%rax
+ jmp .Lenc_key_ret
+
+.p2align 4
+.L14rounds:
+ movups 16(%rcx),%xmm2
+ movl $13,%edx
+ leaq 16(%rax),%rax
+ movups %xmm0,(%r8)
+ movups %xmm2,16(%r8)
+ aeskeygenassist $1,%xmm2,%xmm1
+ call .Lkey_expansion_256a_cold
+ aeskeygenassist $1,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $2,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $2,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $4,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $4,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $8,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $8,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $16,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $16,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $32,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $32,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $64,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ movups %xmm0,(%rax)
+ movl %edx,16(%rax)
+ xorq %rax,%rax
+ jmp .Lenc_key_ret
+
+.p2align 4
+.Lbad_keybits:
+ movq $-2,%rax
+.Lenc_key_ret:
+ addq $8,%rsp
+ retq
+.LSEH_end_set_encrypt_key:
+
+.p2align 4
+.Lkey_expansion_128:
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+.Lkey_expansion_128_cold:
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ retq
+
+.p2align 4
+.Lkey_expansion_192a:
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+.Lkey_expansion_192a_cold:
+ movaps %xmm2,%xmm5
+.Lkey_expansion_192b_warm:
+ shufps $16,%xmm0,%xmm4
+ movdqa %xmm2,%xmm3
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ pslldq $4,%xmm3
+ xorps %xmm4,%xmm0
+ pshufd $85,%xmm1,%xmm1
+ pxor %xmm3,%xmm2
+ pxor %xmm1,%xmm0
+ pshufd $255,%xmm0,%xmm3
+ pxor %xmm3,%xmm2
+ retq
+
+.p2align 4
+.Lkey_expansion_192b:
+ movaps %xmm0,%xmm3
+ shufps $68,%xmm0,%xmm5
+ movups %xmm5,(%rax)
+ shufps $78,%xmm2,%xmm3
+ movups %xmm3,16(%rax)
+ leaq 32(%rax),%rax
+ jmp .Lkey_expansion_192b_warm
+
+.p2align 4
+.Lkey_expansion_256a:
+ movups %xmm2,(%rax)
+ leaq 16(%rax),%rax
+.Lkey_expansion_256a_cold:
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ retq
+
+.p2align 4
+.Lkey_expansion_256b:
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+
+ shufps $16,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $140,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $170,%xmm1,%xmm1
+ xorps %xmm1,%xmm2
+ retq
+
+
+.p2align 6
+.Lbswap_mask:
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.Lincrement32:
+.long 6,6,6,0
+.Lincrement64:
+.long 1,0,0,0
+.Lxts_magic:
+.long 0x87,0,1,0
+
+.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align 6
+
+.def ecb_se_handler; .scl 3; .type 32; .endef
+.p2align 4
+ecb_se_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 152(%r8),%rax
+
+ jmp .Lcommon_seh_tail
+
+
+.def ccm64_se_handler; .scl 3; .type 32; .endef
+.p2align 4
+ccm64_se_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+
+ movq 8(%r9),%rsi
+ movq 56(%r9),%r11
+
+ movl 0(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jb .Lcommon_seh_tail
+
+ movq 152(%r8),%rax
+
+ movl 4(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jae .Lcommon_seh_tail
+
+ leaq 0(%rax),%rsi
+ leaq 512(%r8),%rdi
+ movl $8,%ecx
+.long 0xa548f3fc
+ leaq 88(%rax),%rax
+
+ jmp .Lcommon_seh_tail
+
+
+.def ctr32_se_handler; .scl 3; .type 32; .endef
+.p2align 4
+ctr32_se_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+
+ leaq .Lctr32_body(%rip),%r10
+ cmpq %r10,%rbx
+ jb .Lcommon_seh_tail
+
+ movq 152(%r8),%rax
+
+ leaq .Lctr32_ret(%rip),%r10
+ cmpq %r10,%rbx
+ jae .Lcommon_seh_tail
+
+ leaq 32(%rax),%rsi
+ leaq 512(%r8),%rdi
+ movl $20,%ecx
+.long 0xa548f3fc
+ leaq 200(%rax),%rax
+
+ jmp .Lcommon_seh_tail
+
+
+.def xts_se_handler; .scl 3; .type 32; .endef
+.p2align 4
+xts_se_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+
+ movq 8(%r9),%rsi
+ movq 56(%r9),%r11
+
+ movl 0(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jb .Lcommon_seh_tail
+
+ movq 152(%r8),%rax
+
+ movl 4(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jae .Lcommon_seh_tail
+
+ leaq 96(%rax),%rsi
+ leaq 512(%r8),%rdi
+ movl $20,%ecx
+.long 0xa548f3fc
+ leaq 104+160(%rax),%rax
+
+ jmp .Lcommon_seh_tail
+
+.def cbc_se_handler; .scl 3; .type 32; .endef
+.p2align 4
+cbc_se_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 152(%r8),%rax
+ movq 248(%r8),%rbx
+
+ leaq .Lcbc_decrypt(%rip),%r10
+ cmpq %r10,%rbx
+ jb .Lcommon_seh_tail
+
+ leaq .Lcbc_decrypt_body(%rip),%r10
+ cmpq %r10,%rbx
+ jb .Lrestore_cbc_rax
+
+ leaq .Lcbc_ret(%rip),%r10
+ cmpq %r10,%rbx
+ jae .Lcommon_seh_tail
+
+ leaq 0(%rax),%rsi
+ leaq 512(%r8),%rdi
+ movl $8,%ecx
+.long 0xa548f3fc
+ leaq 88(%rax),%rax
+ jmp .Lcommon_seh_tail
+
+.Lrestore_cbc_rax:
+ movq 120(%r8),%rax
+
+.Lcommon_seh_tail:
+ movq 8(%rax),%rdi
+ movq 16(%rax),%rsi
+ movq %rax,152(%r8)
+ movq %rsi,168(%r8)
+ movq %rdi,176(%r8)
+
+ movq 40(%r9),%rdi
+ movq %r8,%rsi
+ movl $154,%ecx
+.long 0xa548f3fc
+
+ movq %r9,%rsi
+ xorq %rcx,%rcx
+ movq 8(%rsi),%rdx
+ movq 0(%rsi),%r8
+ movq 16(%rsi),%r9
+ movq 40(%rsi),%r10
+ leaq 56(%rsi),%r11
+ leaq 24(%rsi),%r12
+ movq %r10,32(%rsp)
+ movq %r11,40(%rsp)
+ movq %r12,48(%rsp)
+ movq %rcx,56(%rsp)
+ call *__imp_RtlVirtualUnwind(%rip)
+
+ movl $1,%eax
+ addq $64,%rsp
+ popfq
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ popq %rdi
+ popq %rsi
+ retq
+
+
+.section .pdata
+.p2align 2
+.rva .LSEH_begin_aesni_ecb_encrypt
+.rva .LSEH_end_aesni_ecb_encrypt
+.rva .LSEH_info_ecb
+
+.rva .LSEH_begin_aesni_ccm64_encrypt_blocks
+.rva .LSEH_end_aesni_ccm64_encrypt_blocks
+.rva .LSEH_info_ccm64_enc
+
+.rva .LSEH_begin_aesni_ccm64_decrypt_blocks
+.rva .LSEH_end_aesni_ccm64_decrypt_blocks
+.rva .LSEH_info_ccm64_dec
+
+.rva .LSEH_begin_aesni_ctr32_encrypt_blocks
+.rva .LSEH_end_aesni_ctr32_encrypt_blocks
+.rva .LSEH_info_ctr32
+
+.rva .LSEH_begin_aesni_xts_encrypt
+.rva .LSEH_end_aesni_xts_encrypt
+.rva .LSEH_info_xts_enc
+
+.rva .LSEH_begin_aesni_xts_decrypt
+.rva .LSEH_end_aesni_xts_decrypt
+.rva .LSEH_info_xts_dec
+.rva .LSEH_begin_aesni_cbc_encrypt
+.rva .LSEH_end_aesni_cbc_encrypt
+.rva .LSEH_info_cbc
+
+.rva aesni_set_decrypt_key
+.rva .LSEH_end_set_decrypt_key
+.rva .LSEH_info_key
+
+.rva aesni_set_encrypt_key
+.rva .LSEH_end_set_encrypt_key
+.rva .LSEH_info_key
+.section .xdata
+.p2align 3
+.LSEH_info_ecb:
+.byte 9,0,0,0
+.rva ecb_se_handler
+.LSEH_info_ccm64_enc:
+.byte 9,0,0,0
+.rva ccm64_se_handler
+.rva .Lccm64_enc_body,.Lccm64_enc_ret
+.LSEH_info_ccm64_dec:
+.byte 9,0,0,0
+.rva ccm64_se_handler
+.rva .Lccm64_dec_body,.Lccm64_dec_ret
+.LSEH_info_ctr32:
+.byte 9,0,0,0
+.rva ctr32_se_handler
+.LSEH_info_xts_enc:
+.byte 9,0,0,0
+.rva xts_se_handler
+.rva .Lxts_enc_body,.Lxts_enc_epilogue
+.LSEH_info_xts_dec:
+.byte 9,0,0,0
+.rva xts_se_handler
+.rva .Lxts_dec_body,.Lxts_dec_epilogue
+.LSEH_info_cbc:
+.byte 9,0,0,0
+.rva cbc_se_handler
+.LSEH_info_key:
+.byte 0x01,0x04,0x01,0x00
+.byte 0x04,0x02,0x00,0x00
diff --git a/ext/libressl/crypto/aes/aesni-sha1-elf-x86_64.S b/ext/libressl/crypto/aes/aesni-sha1-elf-x86_64.S
new file mode 100644
index 0000000..c0b3e5f
--- /dev/null
+++ b/ext/libressl/crypto/aes/aesni-sha1-elf-x86_64.S
@@ -0,0 +1,1401 @@
+#include "x86_arch.h"
+.text
+
+.hidden OPENSSL_ia32cap_P
+
+.globl aesni_cbc_sha1_enc
+.type aesni_cbc_sha1_enc,@function
+.align 16
+aesni_cbc_sha1_enc:
+
+ movl OPENSSL_ia32cap_P+0(%rip),%r10d
+ movl OPENSSL_ia32cap_P+4(%rip),%r11d
+ jmp aesni_cbc_sha1_enc_ssse3
+ retq
+.size aesni_cbc_sha1_enc,.-aesni_cbc_sha1_enc
+.type aesni_cbc_sha1_enc_ssse3,@function
+.align 16
+aesni_cbc_sha1_enc_ssse3:
+ movq 8(%rsp),%r10
+
+
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -104(%rsp),%rsp
+
+
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ movdqu (%r8),%xmm11
+ movq %r8,88(%rsp)
+ shlq $6,%r14
+ subq %r12,%r13
+ movl 240(%r15),%r8d
+ addq %r10,%r14
+
+ leaq K_XX_XX(%rip),%r11
+ movl 0(%r9),%eax
+ movl 4(%r9),%ebx
+ movl 8(%r9),%ecx
+ movl 12(%r9),%edx
+ movl %ebx,%esi
+ movl 16(%r9),%ebp
+
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r10),%xmm0
+ movdqu 16(%r10),%xmm1
+ movdqu 32(%r10),%xmm2
+ movdqu 48(%r10),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r10
+.byte 102,15,56,0,206
+.byte 102,15,56,0,214
+.byte 102,15,56,0,222
+ paddd %xmm9,%xmm0
+ paddd %xmm9,%xmm1
+ paddd %xmm9,%xmm2
+ movdqa %xmm0,0(%rsp)
+ psubd %xmm9,%xmm0
+ movdqa %xmm1,16(%rsp)
+ psubd %xmm9,%xmm1
+ movdqa %xmm2,32(%rsp)
+ psubd %xmm9,%xmm2
+ movups (%r15),%xmm13
+ movups 16(%r15),%xmm14
+ jmp .Loop_ssse3
+.align 16
+.Loop_ssse3:
+ movdqa %xmm1,%xmm4
+ addl 0(%rsp),%ebp
+ movups 0(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ xorl %edx,%ecx
+ movdqa %xmm3,%xmm8
+.byte 102,15,58,15,224,8
+ movl %eax,%edi
+ roll $5,%eax
+ paddd %xmm3,%xmm9
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrldq $4,%xmm8
+ xorl %edx,%esi
+ addl %eax,%ebp
+ pxor %xmm0,%xmm4
+ rorl $2,%ebx
+ addl %esi,%ebp
+ pxor %xmm2,%xmm8
+ addl 4(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pxor %xmm8,%xmm4
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm9,48(%rsp)
+ xorl %ecx,%edi
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ addl %ebp,%edx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm4,%xmm8
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 8(%rsp),%ecx
+ xorl %ebx,%eax
+ pslldq $12,%xmm10
+ paddd %xmm4,%xmm4
+ movl %edx,%edi
+ roll $5,%edx
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrld $31,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ movdqa %xmm10,%xmm9
+ rorl $7,%ebp
+ addl %esi,%ecx
+ psrld $30,%xmm10
+ por %xmm8,%xmm4
+ addl 12(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm4
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa 0(%r11),%xmm10
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ pxor %xmm9,%xmm4
+ rorl $7,%edx
+ addl %edi,%ebx
+ movdqa %xmm2,%xmm5
+ addl 16(%rsp),%eax
+ xorl %ebp,%edx
+ movdqa %xmm4,%xmm9
+.byte 102,15,58,15,233,8
+ movl %ebx,%edi
+ roll $5,%ebx
+ paddd %xmm4,%xmm10
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrldq $4,%xmm9
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ pxor %xmm1,%xmm5
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm3,%xmm9
+ addl 20(%rsp),%ebp
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pxor %xmm9,%xmm5
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa %xmm10,0(%rsp)
+ xorl %edx,%edi
+ addl %eax,%ebp
+ movdqa %xmm5,%xmm8
+ movdqa %xmm5,%xmm9
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 24(%rsp),%edx
+ xorl %ecx,%ebx
+ pslldq $12,%xmm8
+ paddd %xmm5,%xmm5
+ movl %ebp,%edi
+ roll $5,%ebp
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ psrld $31,%xmm9
+ xorl %ecx,%esi
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ addl %ebp,%edx
+ movdqa %xmm8,%xmm10
+ rorl $7,%eax
+ addl %esi,%edx
+ psrld $30,%xmm8
+ por %xmm9,%xmm5
+ addl 28(%rsp),%ecx
+ xorl %ebx,%eax
+ movl %edx,%esi
+ roll $5,%edx
+ pslld $2,%xmm10
+ pxor %xmm8,%xmm5
+ andl %eax,%edi
+ xorl %ebx,%eax
+ movdqa 16(%r11),%xmm8
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ pxor %xmm10,%xmm5
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movdqa %xmm3,%xmm6
+ addl 32(%rsp),%ebx
+ xorl %eax,%ebp
+ movdqa %xmm5,%xmm10
+.byte 102,15,58,15,242,8
+ movl %ecx,%edi
+ roll $5,%ecx
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ paddd %xmm5,%xmm8
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ psrldq $4,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ pxor %xmm2,%xmm6
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm4,%xmm10
+ addl 36(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ pxor %xmm10,%xmm6
+ andl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm8,16(%rsp)
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ movdqa %xmm6,%xmm9
+ movdqa %xmm6,%xmm10
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 40(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%ecx
+ pslldq $12,%xmm9
+ paddd %xmm6,%xmm6
+ movl %eax,%edi
+ roll $5,%eax
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrld $31,%xmm10
+ xorl %edx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ psrld $30,%xmm9
+ por %xmm10,%xmm6
+ addl 44(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pslld $2,%xmm8
+ pxor %xmm9,%xmm6
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa 16(%r11),%xmm9
+ xorl %ecx,%edi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %ebp,%edx
+ pxor %xmm8,%xmm6
+ rorl $7,%eax
+ addl %edi,%edx
+ movdqa %xmm4,%xmm7
+ addl 48(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm8
+.byte 102,15,58,15,251,8
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm6,%xmm9
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrldq $4,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ pxor %xmm3,%xmm7
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm5,%xmm8
+ addl 52(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ pxor %xmm8,%xmm7
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm9,32(%rsp)
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ movdqa %xmm7,%xmm10
+ movdqa %xmm7,%xmm8
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 56(%rsp),%eax
+ xorl %ebp,%edx
+ pslldq $12,%xmm10
+ paddd %xmm7,%xmm7
+ movl %ebx,%edi
+ roll $5,%ebx
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrld $31,%xmm8
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ movdqa %xmm10,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ psrld $30,%xmm10
+ por %xmm8,%xmm7
+ addl 60(%rsp),%ebp
+ cmpl $11,%r8d
+ jb .Laesenclast1
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast1
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+.Laesenclast1:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm7
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa 16(%r11),%xmm10
+ xorl %edx,%edi
+ addl %eax,%ebp
+ pxor %xmm9,%xmm7
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movdqa %xmm7,%xmm9
+ addl 0(%rsp),%edx
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,206,8
+ xorl %ecx,%ebx
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm1,%xmm0
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm7,%xmm10
+ xorl %ecx,%esi
+ movups 16(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,0(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ addl %ebp,%edx
+ pxor %xmm9,%xmm0
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 4(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm0,%xmm9
+ movdqa %xmm10,48(%rsp)
+ movl %edx,%esi
+ roll $5,%edx
+ andl %eax,%edi
+ xorl %ebx,%eax
+ pslld $2,%xmm0
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ psrld $30,%xmm9
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 8(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%edi
+ roll $5,%ecx
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ por %xmm9,%xmm0
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ movdqa %xmm0,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 12(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ andl %edx,%edi
+ xorl %ebp,%edx
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 16(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,215,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm2,%xmm1
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm8,%xmm9
+ paddd %xmm0,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm10,%xmm1
+ addl 20(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm1,%xmm10
+ movdqa %xmm8,0(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm1
+ addl 24(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm10
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm10,%xmm1
+ addl 28(%rsp),%ebx
+ xorl %eax,%edi
+ movdqa %xmm1,%xmm8
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 32(%rsp),%eax
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,192,8
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ pxor %xmm3,%xmm2
+ xorl %edx,%esi
+ addl %ebx,%eax
+ movdqa 32(%r11),%xmm10
+ paddd %xmm1,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm8,%xmm2
+ addl 36(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ movdqa %xmm2,%xmm8
+ movdqa %xmm9,16(%rsp)
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ pslld $2,%xmm2
+ addl 40(%rsp),%edx
+ xorl %ecx,%esi
+ psrld $30,%xmm8
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ por %xmm8,%xmm2
+ addl 44(%rsp),%ecx
+ xorl %ebx,%edi
+ movdqa %xmm2,%xmm9
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 48(%rsp),%ebx
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,201,8
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ pxor %xmm4,%xmm3
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm2,%xmm10
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm9,%xmm3
+ addl 52(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ movdqa %xmm3,%xmm9
+ movdqa %xmm10,32(%rsp)
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ pslld $2,%xmm3
+ addl 56(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ psrld $30,%xmm9
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ por %xmm9,%xmm3
+ addl 60(%rsp),%edx
+ xorl %ecx,%edi
+ movdqa %xmm3,%xmm10
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 0(%rsp),%ecx
+ pxor %xmm0,%xmm4
+.byte 102,68,15,58,15,210,8
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ pxor %xmm5,%xmm4
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ movdqa %xmm8,%xmm9
+ paddd %xmm3,%xmm8
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm10,%xmm4
+ addl 4(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm8,48(%rsp)
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ pslld $2,%xmm4
+ addl 8(%rsp),%eax
+ xorl %ebp,%esi
+ psrld $30,%xmm10
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ por %xmm10,%xmm4
+ addl 12(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movdqa %xmm4,%xmm8
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 16(%rsp),%edx
+ pxor %xmm1,%xmm5
+.byte 102,68,15,58,15,195,8
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm6,%xmm5
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm4,%xmm9
+ rorl $7,%eax
+ addl %esi,%edx
+ pxor %xmm8,%xmm5
+ addl 20(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ movdqa %xmm5,%xmm8
+ movdqa %xmm9,0(%rsp)
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb .Laesenclast2
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast2
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+.Laesenclast2:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ pslld $2,%xmm5
+ addl 24(%rsp),%ebx
+ xorl %eax,%esi
+ psrld $30,%xmm8
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ por %xmm8,%xmm5
+ addl 28(%rsp),%eax
+ xorl %ebp,%edi
+ movdqa %xmm5,%xmm9
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ecx,%edi
+ movups 32(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,16(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ pxor %xmm2,%xmm6
+.byte 102,68,15,58,15,204,8
+ xorl %edx,%ecx
+ addl 32(%rsp),%ebp
+ andl %edx,%edi
+ pxor %xmm7,%xmm6
+ andl %ecx,%esi
+ rorl $7,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm5,%xmm10
+ addl %edi,%ebp
+ movl %eax,%edi
+ pxor %xmm9,%xmm6
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movdqa %xmm6,%xmm9
+ movdqa %xmm10,16(%rsp)
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 36(%rsp),%edx
+ andl %ecx,%esi
+ pslld $2,%xmm6
+ andl %ebx,%edi
+ rorl $7,%eax
+ psrld $30,%xmm9
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ por %xmm9,%xmm6
+ movl %eax,%edi
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm10
+ addl 40(%rsp),%ecx
+ andl %ebx,%edi
+ andl %eax,%esi
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 44(%rsp),%ebx
+ andl %eax,%esi
+ andl %ebp,%edi
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ rorl $7,%edx
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%edi
+ pxor %xmm3,%xmm7
+.byte 102,68,15,58,15,213,8
+ xorl %ebp,%edx
+ addl 48(%rsp),%eax
+ andl %ebp,%edi
+ pxor %xmm0,%xmm7
+ andl %edx,%esi
+ rorl $7,%ecx
+ movdqa 48(%r11),%xmm9
+ paddd %xmm6,%xmm8
+ addl %edi,%eax
+ movl %ebx,%edi
+ pxor %xmm10,%xmm7
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movdqa %xmm7,%xmm10
+ movdqa %xmm8,32(%rsp)
+ movl %ecx,%esi
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ xorl %edx,%ecx
+ addl 52(%rsp),%ebp
+ andl %edx,%esi
+ pslld $2,%xmm7
+ andl %ecx,%edi
+ rorl $7,%ebx
+ psrld $30,%xmm10
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ por %xmm10,%xmm7
+ movl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm7,%xmm8
+ addl 56(%rsp),%edx
+ andl %ecx,%edi
+ andl %ebx,%esi
+ rorl $7,%eax
+ addl %edi,%edx
+ movl %ebp,%edi
+ roll $5,%ebp
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 60(%rsp),%ecx
+ andl %ebx,%esi
+ andl %eax,%edi
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%edi
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,198,8
+ xorl %eax,%ebp
+ addl 0(%rsp),%ebx
+ andl %eax,%edi
+ pxor %xmm1,%xmm0
+ andl %ebp,%esi
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ rorl $7,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm7,%xmm9
+ addl %edi,%ebx
+ movl %ecx,%edi
+ pxor %xmm8,%xmm0
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movdqa %xmm0,%xmm8
+ movdqa %xmm9,48(%rsp)
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 4(%rsp),%eax
+ andl %ebp,%esi
+ pslld $2,%xmm0
+ andl %edx,%edi
+ rorl $7,%ecx
+ psrld $30,%xmm8
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ por %xmm8,%xmm0
+ movl %ecx,%edi
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%ecx
+ movdqa %xmm0,%xmm9
+ addl 8(%rsp),%ebp
+ andl %edx,%edi
+ andl %ecx,%esi
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movl %eax,%edi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 12(%rsp),%edx
+ andl %ecx,%esi
+ andl %ebx,%edi
+ rorl $7,%eax
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%edi
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,207,8
+ xorl %ebx,%eax
+ addl 16(%rsp),%ecx
+ andl %ebx,%edi
+ pxor %xmm2,%xmm1
+ andl %eax,%esi
+ rorl $7,%ebp
+ movdqa %xmm10,%xmm8
+ paddd %xmm0,%xmm10
+ addl %edi,%ecx
+ movl %edx,%edi
+ pxor %xmm9,%xmm1
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movdqa %xmm1,%xmm9
+ movdqa %xmm10,0(%rsp)
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 20(%rsp),%ebx
+ andl %eax,%esi
+ pslld $2,%xmm1
+ andl %ebp,%edi
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ rorl $7,%edx
+ psrld $30,%xmm9
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ por %xmm9,%xmm1
+ movl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm1,%xmm10
+ addl 24(%rsp),%eax
+ andl %ebp,%edi
+ andl %edx,%esi
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movl %ecx,%esi
+ cmpl $11,%r8d
+ jb .Laesenclast3
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast3
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+.Laesenclast3:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ xorl %edx,%ecx
+ addl 28(%rsp),%ebp
+ andl %edx,%esi
+ andl %ecx,%edi
+ rorl $7,%ebx
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%edi
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,208,8
+ xorl %ecx,%ebx
+ addl 32(%rsp),%edx
+ andl %ecx,%edi
+ pxor %xmm3,%xmm2
+ andl %ebx,%esi
+ rorl $7,%eax
+ movdqa %xmm8,%xmm9
+ paddd %xmm1,%xmm8
+ addl %edi,%edx
+ movl %ebp,%edi
+ pxor %xmm10,%xmm2
+ roll $5,%ebp
+ movups 48(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,32(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movdqa %xmm2,%xmm10
+ movdqa %xmm8,16(%rsp)
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 36(%rsp),%ecx
+ andl %ebx,%esi
+ pslld $2,%xmm2
+ andl %eax,%edi
+ rorl $7,%ebp
+ psrld $30,%xmm10
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ por %xmm10,%xmm2
+ movl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm2,%xmm8
+ addl 40(%rsp),%ebx
+ andl %eax,%edi
+ andl %ebp,%esi
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ rorl $7,%edx
+ addl %edi,%ebx
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 44(%rsp),%eax
+ andl %ebp,%esi
+ andl %edx,%edi
+ rorl $7,%ecx
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ addl 48(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,193,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm4,%xmm3
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm10
+ paddd %xmm2,%xmm9
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm8,%xmm3
+ addl 52(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm3,%xmm8
+ movdqa %xmm9,32(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm3
+ addl 56(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm8
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm8,%xmm3
+ addl 60(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 0(%rsp),%eax
+ paddd %xmm3,%xmm10
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ movdqa %xmm10,48(%rsp)
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 4(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 8(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 12(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ cmpq %r14,%r10
+ je .Ldone_ssse3
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r10),%xmm0
+ movdqu 16(%r10),%xmm1
+ movdqu 32(%r10),%xmm2
+ movdqu 48(%r10),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r10
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+.byte 102,15,56,0,206
+ movl %ecx,%edi
+ roll $5,%ecx
+ paddd %xmm9,%xmm0
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ movdqa %xmm0,0(%rsp)
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ psubd %xmm9,%xmm0
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+.byte 102,15,56,0,214
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm9,%xmm1
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movdqa %xmm1,16(%rsp)
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ psubd %xmm9,%xmm1
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+.byte 102,15,56,0,222
+ movl %ebp,%edi
+ roll $5,%ebp
+ paddd %xmm9,%xmm2
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ movdqa %xmm2,32(%rsp)
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ psubd %xmm9,%xmm2
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb .Laesenclast4
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast4
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+.Laesenclast4:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movups %xmm11,48(%r13,%r12,1)
+ leaq 64(%r12),%r12
+
+ addl 0(%r9),%eax
+ addl 4(%r9),%esi
+ addl 8(%r9),%ecx
+ addl 12(%r9),%edx
+ movl %eax,0(%r9)
+ addl 16(%r9),%ebp
+ movl %esi,4(%r9)
+ movl %esi,%ebx
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movl %ebp,16(%r9)
+ jmp .Loop_ssse3
+
+.align 16
+.Ldone_ssse3:
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb .Laesenclast5
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast5
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+.Laesenclast5:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movups %xmm11,48(%r13,%r12,1)
+ movq 88(%rsp),%r8
+
+ addl 0(%r9),%eax
+ addl 4(%r9),%esi
+ addl 8(%r9),%ecx
+ movl %eax,0(%r9)
+ addl 12(%r9),%edx
+ movl %esi,4(%r9)
+ addl 16(%r9),%ebp
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movl %ebp,16(%r9)
+ movups %xmm11,(%r8)
+ leaq 104(%rsp),%rsi
+ movq 0(%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lepilogue_ssse3:
+ retq
+.size aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3
+.align 64
+K_XX_XX:
+.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+
+.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/ext/libressl/crypto/aes/aesni-sha1-macosx-x86_64.S b/ext/libressl/crypto/aes/aesni-sha1-macosx-x86_64.S
new file mode 100644
index 0000000..3e88b1a
--- /dev/null
+++ b/ext/libressl/crypto/aes/aesni-sha1-macosx-x86_64.S
@@ -0,0 +1,1398 @@
+#include "x86_arch.h"
+.text
+
+.private_extern _OPENSSL_ia32cap_P
+
+.globl _aesni_cbc_sha1_enc
+
+.p2align 4
+_aesni_cbc_sha1_enc:
+
+ movl _OPENSSL_ia32cap_P+0(%rip),%r10d
+ movl _OPENSSL_ia32cap_P+4(%rip),%r11d
+ jmp aesni_cbc_sha1_enc_ssse3
+ retq
+
+
+.p2align 4
+aesni_cbc_sha1_enc_ssse3:
+ movq 8(%rsp),%r10
+
+
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -104(%rsp),%rsp
+
+
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ movdqu (%r8),%xmm11
+ movq %r8,88(%rsp)
+ shlq $6,%r14
+ subq %r12,%r13
+ movl 240(%r15),%r8d
+ addq %r10,%r14
+
+ leaq K_XX_XX(%rip),%r11
+ movl 0(%r9),%eax
+ movl 4(%r9),%ebx
+ movl 8(%r9),%ecx
+ movl 12(%r9),%edx
+ movl %ebx,%esi
+ movl 16(%r9),%ebp
+
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r10),%xmm0
+ movdqu 16(%r10),%xmm1
+ movdqu 32(%r10),%xmm2
+ movdqu 48(%r10),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r10
+.byte 102,15,56,0,206
+.byte 102,15,56,0,214
+.byte 102,15,56,0,222
+ paddd %xmm9,%xmm0
+ paddd %xmm9,%xmm1
+ paddd %xmm9,%xmm2
+ movdqa %xmm0,0(%rsp)
+ psubd %xmm9,%xmm0
+ movdqa %xmm1,16(%rsp)
+ psubd %xmm9,%xmm1
+ movdqa %xmm2,32(%rsp)
+ psubd %xmm9,%xmm2
+ movups (%r15),%xmm13
+ movups 16(%r15),%xmm14
+ jmp L$oop_ssse3
+.p2align 4
+L$oop_ssse3:
+ movdqa %xmm1,%xmm4
+ addl 0(%rsp),%ebp
+ movups 0(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ xorl %edx,%ecx
+ movdqa %xmm3,%xmm8
+.byte 102,15,58,15,224,8
+ movl %eax,%edi
+ roll $5,%eax
+ paddd %xmm3,%xmm9
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrldq $4,%xmm8
+ xorl %edx,%esi
+ addl %eax,%ebp
+ pxor %xmm0,%xmm4
+ rorl $2,%ebx
+ addl %esi,%ebp
+ pxor %xmm2,%xmm8
+ addl 4(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pxor %xmm8,%xmm4
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm9,48(%rsp)
+ xorl %ecx,%edi
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ addl %ebp,%edx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm4,%xmm8
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 8(%rsp),%ecx
+ xorl %ebx,%eax
+ pslldq $12,%xmm10
+ paddd %xmm4,%xmm4
+ movl %edx,%edi
+ roll $5,%edx
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrld $31,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ movdqa %xmm10,%xmm9
+ rorl $7,%ebp
+ addl %esi,%ecx
+ psrld $30,%xmm10
+ por %xmm8,%xmm4
+ addl 12(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm4
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa 0(%r11),%xmm10
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ pxor %xmm9,%xmm4
+ rorl $7,%edx
+ addl %edi,%ebx
+ movdqa %xmm2,%xmm5
+ addl 16(%rsp),%eax
+ xorl %ebp,%edx
+ movdqa %xmm4,%xmm9
+.byte 102,15,58,15,233,8
+ movl %ebx,%edi
+ roll $5,%ebx
+ paddd %xmm4,%xmm10
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrldq $4,%xmm9
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ pxor %xmm1,%xmm5
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm3,%xmm9
+ addl 20(%rsp),%ebp
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pxor %xmm9,%xmm5
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa %xmm10,0(%rsp)
+ xorl %edx,%edi
+ addl %eax,%ebp
+ movdqa %xmm5,%xmm8
+ movdqa %xmm5,%xmm9
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 24(%rsp),%edx
+ xorl %ecx,%ebx
+ pslldq $12,%xmm8
+ paddd %xmm5,%xmm5
+ movl %ebp,%edi
+ roll $5,%ebp
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ psrld $31,%xmm9
+ xorl %ecx,%esi
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ addl %ebp,%edx
+ movdqa %xmm8,%xmm10
+ rorl $7,%eax
+ addl %esi,%edx
+ psrld $30,%xmm8
+ por %xmm9,%xmm5
+ addl 28(%rsp),%ecx
+ xorl %ebx,%eax
+ movl %edx,%esi
+ roll $5,%edx
+ pslld $2,%xmm10
+ pxor %xmm8,%xmm5
+ andl %eax,%edi
+ xorl %ebx,%eax
+ movdqa 16(%r11),%xmm8
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ pxor %xmm10,%xmm5
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movdqa %xmm3,%xmm6
+ addl 32(%rsp),%ebx
+ xorl %eax,%ebp
+ movdqa %xmm5,%xmm10
+.byte 102,15,58,15,242,8
+ movl %ecx,%edi
+ roll $5,%ecx
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ paddd %xmm5,%xmm8
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ psrldq $4,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ pxor %xmm2,%xmm6
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm4,%xmm10
+ addl 36(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ pxor %xmm10,%xmm6
+ andl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm8,16(%rsp)
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ movdqa %xmm6,%xmm9
+ movdqa %xmm6,%xmm10
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 40(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%ecx
+ pslldq $12,%xmm9
+ paddd %xmm6,%xmm6
+ movl %eax,%edi
+ roll $5,%eax
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrld $31,%xmm10
+ xorl %edx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ psrld $30,%xmm9
+ por %xmm10,%xmm6
+ addl 44(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pslld $2,%xmm8
+ pxor %xmm9,%xmm6
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa 16(%r11),%xmm9
+ xorl %ecx,%edi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %ebp,%edx
+ pxor %xmm8,%xmm6
+ rorl $7,%eax
+ addl %edi,%edx
+ movdqa %xmm4,%xmm7
+ addl 48(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm8
+.byte 102,15,58,15,251,8
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm6,%xmm9
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrldq $4,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ pxor %xmm3,%xmm7
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm5,%xmm8
+ addl 52(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ pxor %xmm8,%xmm7
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm9,32(%rsp)
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ movdqa %xmm7,%xmm10
+ movdqa %xmm7,%xmm8
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 56(%rsp),%eax
+ xorl %ebp,%edx
+ pslldq $12,%xmm10
+ paddd %xmm7,%xmm7
+ movl %ebx,%edi
+ roll $5,%ebx
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrld $31,%xmm8
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ movdqa %xmm10,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ psrld $30,%xmm10
+ por %xmm8,%xmm7
+ addl 60(%rsp),%ebp
+ cmpl $11,%r8d
+ jb L$aesenclast1
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je L$aesenclast1
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+L$aesenclast1:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm7
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa 16(%r11),%xmm10
+ xorl %edx,%edi
+ addl %eax,%ebp
+ pxor %xmm9,%xmm7
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movdqa %xmm7,%xmm9
+ addl 0(%rsp),%edx
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,206,8
+ xorl %ecx,%ebx
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm1,%xmm0
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm7,%xmm10
+ xorl %ecx,%esi
+ movups 16(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,0(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ addl %ebp,%edx
+ pxor %xmm9,%xmm0
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 4(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm0,%xmm9
+ movdqa %xmm10,48(%rsp)
+ movl %edx,%esi
+ roll $5,%edx
+ andl %eax,%edi
+ xorl %ebx,%eax
+ pslld $2,%xmm0
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ psrld $30,%xmm9
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 8(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%edi
+ roll $5,%ecx
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ por %xmm9,%xmm0
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ movdqa %xmm0,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 12(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ andl %edx,%edi
+ xorl %ebp,%edx
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 16(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,215,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm2,%xmm1
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm8,%xmm9
+ paddd %xmm0,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm10,%xmm1
+ addl 20(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm1,%xmm10
+ movdqa %xmm8,0(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm1
+ addl 24(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm10
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm10,%xmm1
+ addl 28(%rsp),%ebx
+ xorl %eax,%edi
+ movdqa %xmm1,%xmm8
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 32(%rsp),%eax
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,192,8
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ pxor %xmm3,%xmm2
+ xorl %edx,%esi
+ addl %ebx,%eax
+ movdqa 32(%r11),%xmm10
+ paddd %xmm1,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm8,%xmm2
+ addl 36(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ movdqa %xmm2,%xmm8
+ movdqa %xmm9,16(%rsp)
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ pslld $2,%xmm2
+ addl 40(%rsp),%edx
+ xorl %ecx,%esi
+ psrld $30,%xmm8
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ por %xmm8,%xmm2
+ addl 44(%rsp),%ecx
+ xorl %ebx,%edi
+ movdqa %xmm2,%xmm9
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 48(%rsp),%ebx
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,201,8
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ pxor %xmm4,%xmm3
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm2,%xmm10
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm9,%xmm3
+ addl 52(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ movdqa %xmm3,%xmm9
+ movdqa %xmm10,32(%rsp)
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ pslld $2,%xmm3
+ addl 56(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ psrld $30,%xmm9
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ por %xmm9,%xmm3
+ addl 60(%rsp),%edx
+ xorl %ecx,%edi
+ movdqa %xmm3,%xmm10
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 0(%rsp),%ecx
+ pxor %xmm0,%xmm4
+.byte 102,68,15,58,15,210,8
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ pxor %xmm5,%xmm4
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ movdqa %xmm8,%xmm9
+ paddd %xmm3,%xmm8
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm10,%xmm4
+ addl 4(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm8,48(%rsp)
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ pslld $2,%xmm4
+ addl 8(%rsp),%eax
+ xorl %ebp,%esi
+ psrld $30,%xmm10
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ por %xmm10,%xmm4
+ addl 12(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movdqa %xmm4,%xmm8
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 16(%rsp),%edx
+ pxor %xmm1,%xmm5
+.byte 102,68,15,58,15,195,8
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm6,%xmm5
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm4,%xmm9
+ rorl $7,%eax
+ addl %esi,%edx
+ pxor %xmm8,%xmm5
+ addl 20(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ movdqa %xmm5,%xmm8
+ movdqa %xmm9,0(%rsp)
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb L$aesenclast2
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je L$aesenclast2
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+L$aesenclast2:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ pslld $2,%xmm5
+ addl 24(%rsp),%ebx
+ xorl %eax,%esi
+ psrld $30,%xmm8
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ por %xmm8,%xmm5
+ addl 28(%rsp),%eax
+ xorl %ebp,%edi
+ movdqa %xmm5,%xmm9
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ecx,%edi
+ movups 32(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,16(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ pxor %xmm2,%xmm6
+.byte 102,68,15,58,15,204,8
+ xorl %edx,%ecx
+ addl 32(%rsp),%ebp
+ andl %edx,%edi
+ pxor %xmm7,%xmm6
+ andl %ecx,%esi
+ rorl $7,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm5,%xmm10
+ addl %edi,%ebp
+ movl %eax,%edi
+ pxor %xmm9,%xmm6
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movdqa %xmm6,%xmm9
+ movdqa %xmm10,16(%rsp)
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 36(%rsp),%edx
+ andl %ecx,%esi
+ pslld $2,%xmm6
+ andl %ebx,%edi
+ rorl $7,%eax
+ psrld $30,%xmm9
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ por %xmm9,%xmm6
+ movl %eax,%edi
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm10
+ addl 40(%rsp),%ecx
+ andl %ebx,%edi
+ andl %eax,%esi
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 44(%rsp),%ebx
+ andl %eax,%esi
+ andl %ebp,%edi
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ rorl $7,%edx
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%edi
+ pxor %xmm3,%xmm7
+.byte 102,68,15,58,15,213,8
+ xorl %ebp,%edx
+ addl 48(%rsp),%eax
+ andl %ebp,%edi
+ pxor %xmm0,%xmm7
+ andl %edx,%esi
+ rorl $7,%ecx
+ movdqa 48(%r11),%xmm9
+ paddd %xmm6,%xmm8
+ addl %edi,%eax
+ movl %ebx,%edi
+ pxor %xmm10,%xmm7
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movdqa %xmm7,%xmm10
+ movdqa %xmm8,32(%rsp)
+ movl %ecx,%esi
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ xorl %edx,%ecx
+ addl 52(%rsp),%ebp
+ andl %edx,%esi
+ pslld $2,%xmm7
+ andl %ecx,%edi
+ rorl $7,%ebx
+ psrld $30,%xmm10
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ por %xmm10,%xmm7
+ movl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm7,%xmm8
+ addl 56(%rsp),%edx
+ andl %ecx,%edi
+ andl %ebx,%esi
+ rorl $7,%eax
+ addl %edi,%edx
+ movl %ebp,%edi
+ roll $5,%ebp
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 60(%rsp),%ecx
+ andl %ebx,%esi
+ andl %eax,%edi
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%edi
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,198,8
+ xorl %eax,%ebp
+ addl 0(%rsp),%ebx
+ andl %eax,%edi
+ pxor %xmm1,%xmm0
+ andl %ebp,%esi
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ rorl $7,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm7,%xmm9
+ addl %edi,%ebx
+ movl %ecx,%edi
+ pxor %xmm8,%xmm0
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movdqa %xmm0,%xmm8
+ movdqa %xmm9,48(%rsp)
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 4(%rsp),%eax
+ andl %ebp,%esi
+ pslld $2,%xmm0
+ andl %edx,%edi
+ rorl $7,%ecx
+ psrld $30,%xmm8
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ por %xmm8,%xmm0
+ movl %ecx,%edi
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%ecx
+ movdqa %xmm0,%xmm9
+ addl 8(%rsp),%ebp
+ andl %edx,%edi
+ andl %ecx,%esi
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movl %eax,%edi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 12(%rsp),%edx
+ andl %ecx,%esi
+ andl %ebx,%edi
+ rorl $7,%eax
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%edi
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,207,8
+ xorl %ebx,%eax
+ addl 16(%rsp),%ecx
+ andl %ebx,%edi
+ pxor %xmm2,%xmm1
+ andl %eax,%esi
+ rorl $7,%ebp
+ movdqa %xmm10,%xmm8
+ paddd %xmm0,%xmm10
+ addl %edi,%ecx
+ movl %edx,%edi
+ pxor %xmm9,%xmm1
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movdqa %xmm1,%xmm9
+ movdqa %xmm10,0(%rsp)
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 20(%rsp),%ebx
+ andl %eax,%esi
+ pslld $2,%xmm1
+ andl %ebp,%edi
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ rorl $7,%edx
+ psrld $30,%xmm9
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ por %xmm9,%xmm1
+ movl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm1,%xmm10
+ addl 24(%rsp),%eax
+ andl %ebp,%edi
+ andl %edx,%esi
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movl %ecx,%esi
+ cmpl $11,%r8d
+ jb L$aesenclast3
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je L$aesenclast3
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+L$aesenclast3:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ xorl %edx,%ecx
+ addl 28(%rsp),%ebp
+ andl %edx,%esi
+ andl %ecx,%edi
+ rorl $7,%ebx
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%edi
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,208,8
+ xorl %ecx,%ebx
+ addl 32(%rsp),%edx
+ andl %ecx,%edi
+ pxor %xmm3,%xmm2
+ andl %ebx,%esi
+ rorl $7,%eax
+ movdqa %xmm8,%xmm9
+ paddd %xmm1,%xmm8
+ addl %edi,%edx
+ movl %ebp,%edi
+ pxor %xmm10,%xmm2
+ roll $5,%ebp
+ movups 48(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,32(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movdqa %xmm2,%xmm10
+ movdqa %xmm8,16(%rsp)
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 36(%rsp),%ecx
+ andl %ebx,%esi
+ pslld $2,%xmm2
+ andl %eax,%edi
+ rorl $7,%ebp
+ psrld $30,%xmm10
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ por %xmm10,%xmm2
+ movl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm2,%xmm8
+ addl 40(%rsp),%ebx
+ andl %eax,%edi
+ andl %ebp,%esi
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ rorl $7,%edx
+ addl %edi,%ebx
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 44(%rsp),%eax
+ andl %ebp,%esi
+ andl %edx,%edi
+ rorl $7,%ecx
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ addl 48(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,193,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm4,%xmm3
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm10
+ paddd %xmm2,%xmm9
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm8,%xmm3
+ addl 52(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm3,%xmm8
+ movdqa %xmm9,32(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm3
+ addl 56(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm8
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm8,%xmm3
+ addl 60(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 0(%rsp),%eax
+ paddd %xmm3,%xmm10
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ movdqa %xmm10,48(%rsp)
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 4(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 8(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 12(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ cmpq %r14,%r10
+ je L$done_ssse3
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r10),%xmm0
+ movdqu 16(%r10),%xmm1
+ movdqu 32(%r10),%xmm2
+ movdqu 48(%r10),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r10
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+.byte 102,15,56,0,206
+ movl %ecx,%edi
+ roll $5,%ecx
+ paddd %xmm9,%xmm0
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ movdqa %xmm0,0(%rsp)
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ psubd %xmm9,%xmm0
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+.byte 102,15,56,0,214
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm9,%xmm1
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movdqa %xmm1,16(%rsp)
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ psubd %xmm9,%xmm1
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+.byte 102,15,56,0,222
+ movl %ebp,%edi
+ roll $5,%ebp
+ paddd %xmm9,%xmm2
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ movdqa %xmm2,32(%rsp)
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ psubd %xmm9,%xmm2
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb L$aesenclast4
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je L$aesenclast4
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+L$aesenclast4:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movups %xmm11,48(%r13,%r12,1)
+ leaq 64(%r12),%r12
+
+ addl 0(%r9),%eax
+ addl 4(%r9),%esi
+ addl 8(%r9),%ecx
+ addl 12(%r9),%edx
+ movl %eax,0(%r9)
+ addl 16(%r9),%ebp
+ movl %esi,4(%r9)
+ movl %esi,%ebx
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movl %ebp,16(%r9)
+ jmp L$oop_ssse3
+
+.p2align 4
+L$done_ssse3:
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb L$aesenclast5
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je L$aesenclast5
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+L$aesenclast5:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movups %xmm11,48(%r13,%r12,1)
+ movq 88(%rsp),%r8
+
+ addl 0(%r9),%eax
+ addl 4(%r9),%esi
+ addl 8(%r9),%ecx
+ movl %eax,0(%r9)
+ addl 12(%r9),%edx
+ movl %esi,4(%r9)
+ addl 16(%r9),%ebp
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movl %ebp,16(%r9)
+ movups %xmm11,(%r8)
+ leaq 104(%rsp),%rsi
+ movq 0(%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+L$epilogue_ssse3:
+ retq
+
+.p2align 6
+K_XX_XX:
+.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+
+.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align 6
diff --git a/ext/libressl/crypto/aes/aesni-sha1-masm-x86_64.S b/ext/libressl/crypto/aes/aesni-sha1-masm-x86_64.S
new file mode 100644
index 0000000..db95881
--- /dev/null
+++ b/ext/libressl/crypto/aes/aesni-sha1-masm-x86_64.S
@@ -0,0 +1,1616 @@
+; 1 "crypto/aes/aesni-sha1-masm-x86_64.S.tmp"
+; 1 "<built-in>" 1
+; 1 "<built-in>" 3
+; 340 "<built-in>" 3
+; 1 "<command line>" 1
+; 1 "<built-in>" 2
+; 1 "crypto/aes/aesni-sha1-masm-x86_64.S.tmp" 2
+OPTION DOTNAME
+
+; 1 "./crypto/x86_arch.h" 1
+
+
+; 16 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+; 40 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+; 3 "crypto/aes/aesni-sha1-masm-x86_64.S.tmp" 2
+.text$ SEGMENT ALIGN(64) 'CODE'
+EXTERN OPENSSL_ia32cap_P:NEAR
+
+
+PUBLIC aesni_cbc_sha1_enc
+
+ALIGN 16
+aesni_cbc_sha1_enc PROC PUBLIC
+
+ mov r10d,DWORD PTR[((OPENSSL_ia32cap_P+0))]
+ mov r11d,DWORD PTR[((OPENSSL_ia32cap_P+4))]
+ jmp aesni_cbc_sha1_enc_ssse3
+ DB 0F3h,0C3h ;repret
+aesni_cbc_sha1_enc ENDP
+
+ALIGN 16
+aesni_cbc_sha1_enc_ssse3 PROC PRIVATE
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_cbc_sha1_enc_ssse3::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ mov r10,QWORD PTR[56+rsp]
+
+
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ lea rsp,QWORD PTR[((-264))+rsp]
+
+
+ movaps XMMWORD PTR[(96+0)+rsp],xmm6
+ movaps XMMWORD PTR[(96+16)+rsp],xmm7
+ movaps XMMWORD PTR[(96+32)+rsp],xmm8
+ movaps XMMWORD PTR[(96+48)+rsp],xmm9
+ movaps XMMWORD PTR[(96+64)+rsp],xmm10
+ movaps XMMWORD PTR[(96+80)+rsp],xmm11
+ movaps XMMWORD PTR[(96+96)+rsp],xmm12
+ movaps XMMWORD PTR[(96+112)+rsp],xmm13
+ movaps XMMWORD PTR[(96+128)+rsp],xmm14
+ movaps XMMWORD PTR[(96+144)+rsp],xmm15
+$L$prologue_ssse3::
+ mov r12,rdi
+ mov r13,rsi
+ mov r14,rdx
+ mov r15,rcx
+ movdqu xmm11,XMMWORD PTR[r8]
+ mov QWORD PTR[88+rsp],r8
+ shl r14,6
+ sub r13,r12
+ mov r8d,DWORD PTR[240+r15]
+ add r14,r10
+
+ lea r11,QWORD PTR[K_XX_XX]
+ mov eax,DWORD PTR[r9]
+ mov ebx,DWORD PTR[4+r9]
+ mov ecx,DWORD PTR[8+r9]
+ mov edx,DWORD PTR[12+r9]
+ mov esi,ebx
+ mov ebp,DWORD PTR[16+r9]
+
+ movdqa xmm6,XMMWORD PTR[64+r11]
+ movdqa xmm9,XMMWORD PTR[r11]
+ movdqu xmm0,XMMWORD PTR[r10]
+ movdqu xmm1,XMMWORD PTR[16+r10]
+ movdqu xmm2,XMMWORD PTR[32+r10]
+ movdqu xmm3,XMMWORD PTR[48+r10]
+DB 102,15,56,0,198
+ add r10,64
+DB 102,15,56,0,206
+DB 102,15,56,0,214
+DB 102,15,56,0,222
+ paddd xmm0,xmm9
+ paddd xmm1,xmm9
+ paddd xmm2,xmm9
+ movdqa XMMWORD PTR[rsp],xmm0
+ psubd xmm0,xmm9
+ movdqa XMMWORD PTR[16+rsp],xmm1
+ psubd xmm1,xmm9
+ movdqa XMMWORD PTR[32+rsp],xmm2
+ psubd xmm2,xmm9
+ movups xmm13,XMMWORD PTR[r15]
+ movups xmm14,XMMWORD PTR[16+r15]
+ jmp $L$oop_ssse3
+ALIGN 16
+$L$oop_ssse3::
+ movdqa xmm4,xmm1
+ add ebp,DWORD PTR[rsp]
+ movups xmm12,XMMWORD PTR[r12]
+ xorps xmm12,xmm13
+ xorps xmm11,xmm12
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[32+r15]
+ xor ecx,edx
+ movdqa xmm8,xmm3
+DB 102,15,58,15,224,8
+ mov edi,eax
+ rol eax,5
+ paddd xmm9,xmm3
+ and esi,ecx
+ xor ecx,edx
+ psrldq xmm8,4
+ xor esi,edx
+ add ebp,eax
+ pxor xmm4,xmm0
+ ror ebx,2
+ add ebp,esi
+ pxor xmm8,xmm2
+ add edx,DWORD PTR[4+rsp]
+ xor ebx,ecx
+ mov esi,ebp
+ rol ebp,5
+ pxor xmm4,xmm8
+ and edi,ebx
+ xor ebx,ecx
+ movdqa XMMWORD PTR[48+rsp],xmm9
+ xor edi,ecx
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[48+r15]
+ add edx,ebp
+ movdqa xmm10,xmm4
+ movdqa xmm8,xmm4
+ ror eax,7
+ add edx,edi
+ add ecx,DWORD PTR[8+rsp]
+ xor eax,ebx
+ pslldq xmm10,12
+ paddd xmm4,xmm4
+ mov edi,edx
+ rol edx,5
+ and esi,eax
+ xor eax,ebx
+ psrld xmm8,31
+ xor esi,ebx
+ add ecx,edx
+ movdqa xmm9,xmm10
+ ror ebp,7
+ add ecx,esi
+ psrld xmm10,30
+ por xmm4,xmm8
+ add ebx,DWORD PTR[12+rsp]
+ xor ebp,eax
+ mov esi,ecx
+ rol ecx,5
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[64+r15]
+ pslld xmm9,2
+ pxor xmm4,xmm10
+ and edi,ebp
+ xor ebp,eax
+ movdqa xmm10,XMMWORD PTR[r11]
+ xor edi,eax
+ add ebx,ecx
+ pxor xmm4,xmm9
+ ror edx,7
+ add ebx,edi
+ movdqa xmm5,xmm2
+ add eax,DWORD PTR[16+rsp]
+ xor edx,ebp
+ movdqa xmm9,xmm4
+DB 102,15,58,15,233,8
+ mov edi,ebx
+ rol ebx,5
+ paddd xmm10,xmm4
+ and esi,edx
+ xor edx,ebp
+ psrldq xmm9,4
+ xor esi,ebp
+ add eax,ebx
+ pxor xmm5,xmm1
+ ror ecx,7
+ add eax,esi
+ pxor xmm9,xmm3
+ add ebp,DWORD PTR[20+rsp]
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[80+r15]
+ xor ecx,edx
+ mov esi,eax
+ rol eax,5
+ pxor xmm5,xmm9
+ and edi,ecx
+ xor ecx,edx
+ movdqa XMMWORD PTR[rsp],xmm10
+ xor edi,edx
+ add ebp,eax
+ movdqa xmm8,xmm5
+ movdqa xmm9,xmm5
+ ror ebx,7
+ add ebp,edi
+ add edx,DWORD PTR[24+rsp]
+ xor ebx,ecx
+ pslldq xmm8,12
+ paddd xmm5,xmm5
+ mov edi,ebp
+ rol ebp,5
+ and esi,ebx
+ xor ebx,ecx
+ psrld xmm9,31
+ xor esi,ecx
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[96+r15]
+ add edx,ebp
+ movdqa xmm10,xmm8
+ ror eax,7
+ add edx,esi
+ psrld xmm8,30
+ por xmm5,xmm9
+ add ecx,DWORD PTR[28+rsp]
+ xor eax,ebx
+ mov esi,edx
+ rol edx,5
+ pslld xmm10,2
+ pxor xmm5,xmm8
+ and edi,eax
+ xor eax,ebx
+ movdqa xmm8,XMMWORD PTR[16+r11]
+ xor edi,ebx
+ add ecx,edx
+ pxor xmm5,xmm10
+ ror ebp,7
+ add ecx,edi
+ movdqa xmm6,xmm3
+ add ebx,DWORD PTR[32+rsp]
+ xor ebp,eax
+ movdqa xmm10,xmm5
+DB 102,15,58,15,242,8
+ mov edi,ecx
+ rol ecx,5
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[112+r15]
+ paddd xmm8,xmm5
+ and esi,ebp
+ xor ebp,eax
+ psrldq xmm10,4
+ xor esi,eax
+ add ebx,ecx
+ pxor xmm6,xmm2
+ ror edx,7
+ add ebx,esi
+ pxor xmm10,xmm4
+ add eax,DWORD PTR[36+rsp]
+ xor edx,ebp
+ mov esi,ebx
+ rol ebx,5
+ pxor xmm6,xmm10
+ and edi,edx
+ xor edx,ebp
+ movdqa XMMWORD PTR[16+rsp],xmm8
+ xor edi,ebp
+ add eax,ebx
+ movdqa xmm9,xmm6
+ movdqa xmm10,xmm6
+ ror ecx,7
+ add eax,edi
+ add ebp,DWORD PTR[40+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[128+r15]
+ xor ecx,edx
+ pslldq xmm9,12
+ paddd xmm6,xmm6
+ mov edi,eax
+ rol eax,5
+ and esi,ecx
+ xor ecx,edx
+ psrld xmm10,31
+ xor esi,edx
+ add ebp,eax
+ movdqa xmm8,xmm9
+ ror ebx,7
+ add ebp,esi
+ psrld xmm9,30
+ por xmm6,xmm10
+ add edx,DWORD PTR[44+rsp]
+ xor ebx,ecx
+ mov esi,ebp
+ rol ebp,5
+ pslld xmm8,2
+ pxor xmm6,xmm9
+ and edi,ebx
+ xor ebx,ecx
+ movdqa xmm9,XMMWORD PTR[16+r11]
+ xor edi,ecx
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[144+r15]
+ add edx,ebp
+ pxor xmm6,xmm8
+ ror eax,7
+ add edx,edi
+ movdqa xmm7,xmm4
+ add ecx,DWORD PTR[48+rsp]
+ xor eax,ebx
+ movdqa xmm8,xmm6
+DB 102,15,58,15,251,8
+ mov edi,edx
+ rol edx,5
+ paddd xmm9,xmm6
+ and esi,eax
+ xor eax,ebx
+ psrldq xmm8,4
+ xor esi,ebx
+ add ecx,edx
+ pxor xmm7,xmm3
+ ror ebp,7
+ add ecx,esi
+ pxor xmm8,xmm5
+ add ebx,DWORD PTR[52+rsp]
+ xor ebp,eax
+ mov esi,ecx
+ rol ecx,5
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[160+r15]
+ pxor xmm7,xmm8
+ and edi,ebp
+ xor ebp,eax
+ movdqa XMMWORD PTR[32+rsp],xmm9
+ xor edi,eax
+ add ebx,ecx
+ movdqa xmm10,xmm7
+ movdqa xmm8,xmm7
+ ror edx,7
+ add ebx,edi
+ add eax,DWORD PTR[56+rsp]
+ xor edx,ebp
+ pslldq xmm10,12
+ paddd xmm7,xmm7
+ mov edi,ebx
+ rol ebx,5
+ and esi,edx
+ xor edx,ebp
+ psrld xmm8,31
+ xor esi,ebp
+ add eax,ebx
+ movdqa xmm9,xmm10
+ ror ecx,7
+ add eax,esi
+ psrld xmm10,30
+ por xmm7,xmm8
+ add ebp,DWORD PTR[60+rsp]
+ cmp r8d,11
+ jb $L$aesenclast1
+ movups xmm14,XMMWORD PTR[176+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[192+r15]
+ aesenc xmm11,xmm14
+ je $L$aesenclast1
+ movups xmm14,XMMWORD PTR[208+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[224+r15]
+ aesenc xmm11,xmm14
+$L$aesenclast1::
+ aesenclast xmm11,xmm15
+ movups xmm14,XMMWORD PTR[16+r15]
+ xor ecx,edx
+ mov esi,eax
+ rol eax,5
+ pslld xmm9,2
+ pxor xmm7,xmm10
+ and edi,ecx
+ xor ecx,edx
+ movdqa xmm10,XMMWORD PTR[16+r11]
+ xor edi,edx
+ add ebp,eax
+ pxor xmm7,xmm9
+ ror ebx,7
+ add ebp,edi
+ movdqa xmm9,xmm7
+ add edx,DWORD PTR[rsp]
+ pxor xmm0,xmm4
+DB 102,68,15,58,15,206,8
+ xor ebx,ecx
+ mov edi,ebp
+ rol ebp,5
+ pxor xmm0,xmm1
+ and esi,ebx
+ xor ebx,ecx
+ movdqa xmm8,xmm10
+ paddd xmm10,xmm7
+ xor esi,ecx
+ movups xmm12,XMMWORD PTR[16+r12]
+ xorps xmm12,xmm13
+ movups XMMWORD PTR[r12*1+r13],xmm11
+ xorps xmm11,xmm12
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[32+r15]
+ add edx,ebp
+ pxor xmm0,xmm9
+ ror eax,7
+ add edx,esi
+ add ecx,DWORD PTR[4+rsp]
+ xor eax,ebx
+ movdqa xmm9,xmm0
+ movdqa XMMWORD PTR[48+rsp],xmm10
+ mov esi,edx
+ rol edx,5
+ and edi,eax
+ xor eax,ebx
+ pslld xmm0,2
+ xor edi,ebx
+ add ecx,edx
+ psrld xmm9,30
+ ror ebp,7
+ add ecx,edi
+ add ebx,DWORD PTR[8+rsp]
+ xor ebp,eax
+ mov edi,ecx
+ rol ecx,5
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[48+r15]
+ por xmm0,xmm9
+ and esi,ebp
+ xor ebp,eax
+ movdqa xmm10,xmm0
+ xor esi,eax
+ add ebx,ecx
+ ror edx,7
+ add ebx,esi
+ add eax,DWORD PTR[12+rsp]
+ xor edx,ebp
+ mov esi,ebx
+ rol ebx,5
+ and edi,edx
+ xor edx,ebp
+ xor edi,ebp
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ add ebp,DWORD PTR[16+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[64+r15]
+ pxor xmm1,xmm5
+DB 102,68,15,58,15,215,8
+ xor esi,edx
+ mov edi,eax
+ rol eax,5
+ pxor xmm1,xmm2
+ xor esi,ecx
+ add ebp,eax
+ movdqa xmm9,xmm8
+ paddd xmm8,xmm0
+ ror ebx,7
+ add ebp,esi
+ pxor xmm1,xmm10
+ add edx,DWORD PTR[20+rsp]
+ xor edi,ecx
+ mov esi,ebp
+ rol ebp,5
+ movdqa xmm10,xmm1
+ movdqa XMMWORD PTR[rsp],xmm8
+ xor edi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,edi
+ pslld xmm1,2
+ add ecx,DWORD PTR[24+rsp]
+ xor esi,ebx
+ psrld xmm10,30
+ mov edi,edx
+ rol edx,5
+ xor esi,eax
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[80+r15]
+ add ecx,edx
+ ror ebp,7
+ add ecx,esi
+ por xmm1,xmm10
+ add ebx,DWORD PTR[28+rsp]
+ xor edi,eax
+ movdqa xmm8,xmm1
+ mov esi,ecx
+ rol ecx,5
+ xor edi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,edi
+ add eax,DWORD PTR[32+rsp]
+ pxor xmm2,xmm6
+DB 102,68,15,58,15,192,8
+ xor esi,ebp
+ mov edi,ebx
+ rol ebx,5
+ pxor xmm2,xmm3
+ xor esi,edx
+ add eax,ebx
+ movdqa xmm10,XMMWORD PTR[32+r11]
+ paddd xmm9,xmm1
+ ror ecx,7
+ add eax,esi
+ pxor xmm2,xmm8
+ add ebp,DWORD PTR[36+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[96+r15]
+ xor edi,edx
+ mov esi,eax
+ rol eax,5
+ movdqa xmm8,xmm2
+ movdqa XMMWORD PTR[16+rsp],xmm9
+ xor edi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,edi
+ pslld xmm2,2
+ add edx,DWORD PTR[40+rsp]
+ xor esi,ecx
+ psrld xmm8,30
+ mov edi,ebp
+ rol ebp,5
+ xor esi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,esi
+ por xmm2,xmm8
+ add ecx,DWORD PTR[44+rsp]
+ xor edi,ebx
+ movdqa xmm9,xmm2
+ mov esi,edx
+ rol edx,5
+ xor edi,eax
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[112+r15]
+ add ecx,edx
+ ror ebp,7
+ add ecx,edi
+ add ebx,DWORD PTR[48+rsp]
+ pxor xmm3,xmm7
+DB 102,68,15,58,15,201,8
+ xor esi,eax
+ mov edi,ecx
+ rol ecx,5
+ pxor xmm3,xmm4
+ xor esi,ebp
+ add ebx,ecx
+ movdqa xmm8,xmm10
+ paddd xmm10,xmm2
+ ror edx,7
+ add ebx,esi
+ pxor xmm3,xmm9
+ add eax,DWORD PTR[52+rsp]
+ xor edi,ebp
+ mov esi,ebx
+ rol ebx,5
+ movdqa xmm9,xmm3
+ movdqa XMMWORD PTR[32+rsp],xmm10
+ xor edi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ pslld xmm3,2
+ add ebp,DWORD PTR[56+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[128+r15]
+ xor esi,edx
+ psrld xmm9,30
+ mov edi,eax
+ rol eax,5
+ xor esi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,esi
+ por xmm3,xmm9
+ add edx,DWORD PTR[60+rsp]
+ xor edi,ecx
+ movdqa xmm10,xmm3
+ mov esi,ebp
+ rol ebp,5
+ xor edi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,edi
+ add ecx,DWORD PTR[rsp]
+ pxor xmm4,xmm0
+DB 102,68,15,58,15,210,8
+ xor esi,ebx
+ mov edi,edx
+ rol edx,5
+ pxor xmm4,xmm5
+ xor esi,eax
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[144+r15]
+ add ecx,edx
+ movdqa xmm9,xmm8
+ paddd xmm8,xmm3
+ ror ebp,7
+ add ecx,esi
+ pxor xmm4,xmm10
+ add ebx,DWORD PTR[4+rsp]
+ xor edi,eax
+ mov esi,ecx
+ rol ecx,5
+ movdqa xmm10,xmm4
+ movdqa XMMWORD PTR[48+rsp],xmm8
+ xor edi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,edi
+ pslld xmm4,2
+ add eax,DWORD PTR[8+rsp]
+ xor esi,ebp
+ psrld xmm10,30
+ mov edi,ebx
+ rol ebx,5
+ xor esi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,esi
+ por xmm4,xmm10
+ add ebp,DWORD PTR[12+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[160+r15]
+ xor edi,edx
+ movdqa xmm8,xmm4
+ mov esi,eax
+ rol eax,5
+ xor edi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,edi
+ add edx,DWORD PTR[16+rsp]
+ pxor xmm5,xmm1
+DB 102,68,15,58,15,195,8
+ xor esi,ecx
+ mov edi,ebp
+ rol ebp,5
+ pxor xmm5,xmm6
+ xor esi,ebx
+ add edx,ebp
+ movdqa xmm10,xmm9
+ paddd xmm9,xmm4
+ ror eax,7
+ add edx,esi
+ pxor xmm5,xmm8
+ add ecx,DWORD PTR[20+rsp]
+ xor edi,ebx
+ mov esi,edx
+ rol edx,5
+ movdqa xmm8,xmm5
+ movdqa XMMWORD PTR[rsp],xmm9
+ xor edi,eax
+ cmp r8d,11
+ jb $L$aesenclast2
+ movups xmm14,XMMWORD PTR[176+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[192+r15]
+ aesenc xmm11,xmm14
+ je $L$aesenclast2
+ movups xmm14,XMMWORD PTR[208+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[224+r15]
+ aesenc xmm11,xmm14
+$L$aesenclast2::
+ aesenclast xmm11,xmm15
+ movups xmm14,XMMWORD PTR[16+r15]
+ add ecx,edx
+ ror ebp,7
+ add ecx,edi
+ pslld xmm5,2
+ add ebx,DWORD PTR[24+rsp]
+ xor esi,eax
+ psrld xmm8,30
+ mov edi,ecx
+ rol ecx,5
+ xor esi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,esi
+ por xmm5,xmm8
+ add eax,DWORD PTR[28+rsp]
+ xor edi,ebp
+ movdqa xmm9,xmm5
+ mov esi,ebx
+ rol ebx,5
+ xor edi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ mov edi,ecx
+ movups xmm12,XMMWORD PTR[32+r12]
+ xorps xmm12,xmm13
+ movups XMMWORD PTR[16+r12*1+r13],xmm11
+ xorps xmm11,xmm12
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[32+r15]
+ pxor xmm6,xmm2
+DB 102,68,15,58,15,204,8
+ xor ecx,edx
+ add ebp,DWORD PTR[32+rsp]
+ and edi,edx
+ pxor xmm6,xmm7
+ and esi,ecx
+ ror ebx,7
+ movdqa xmm8,xmm10
+ paddd xmm10,xmm5
+ add ebp,edi
+ mov edi,eax
+ pxor xmm6,xmm9
+ rol eax,5
+ add ebp,esi
+ xor ecx,edx
+ add ebp,eax
+ movdqa xmm9,xmm6
+ movdqa XMMWORD PTR[16+rsp],xmm10
+ mov esi,ebx
+ xor ebx,ecx
+ add edx,DWORD PTR[36+rsp]
+ and esi,ecx
+ pslld xmm6,2
+ and edi,ebx
+ ror eax,7
+ psrld xmm9,30
+ add edx,esi
+ mov esi,ebp
+ rol ebp,5
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[48+r15]
+ add edx,edi
+ xor ebx,ecx
+ add edx,ebp
+ por xmm6,xmm9
+ mov edi,eax
+ xor eax,ebx
+ movdqa xmm10,xmm6
+ add ecx,DWORD PTR[40+rsp]
+ and edi,ebx
+ and esi,eax
+ ror ebp,7
+ add ecx,edi
+ mov edi,edx
+ rol edx,5
+ add ecx,esi
+ xor eax,ebx
+ add ecx,edx
+ mov esi,ebp
+ xor ebp,eax
+ add ebx,DWORD PTR[44+rsp]
+ and esi,eax
+ and edi,ebp
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[64+r15]
+ ror edx,7
+ add ebx,esi
+ mov esi,ecx
+ rol ecx,5
+ add ebx,edi
+ xor ebp,eax
+ add ebx,ecx
+ mov edi,edx
+ pxor xmm7,xmm3
+DB 102,68,15,58,15,213,8
+ xor edx,ebp
+ add eax,DWORD PTR[48+rsp]
+ and edi,ebp
+ pxor xmm7,xmm0
+ and esi,edx
+ ror ecx,7
+ movdqa xmm9,XMMWORD PTR[48+r11]
+ paddd xmm8,xmm6
+ add eax,edi
+ mov edi,ebx
+ pxor xmm7,xmm10
+ rol ebx,5
+ add eax,esi
+ xor edx,ebp
+ add eax,ebx
+ movdqa xmm10,xmm7
+ movdqa XMMWORD PTR[32+rsp],xmm8
+ mov esi,ecx
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[80+r15]
+ xor ecx,edx
+ add ebp,DWORD PTR[52+rsp]
+ and esi,edx
+ pslld xmm7,2
+ and edi,ecx
+ ror ebx,7
+ psrld xmm10,30
+ add ebp,esi
+ mov esi,eax
+ rol eax,5
+ add ebp,edi
+ xor ecx,edx
+ add ebp,eax
+ por xmm7,xmm10
+ mov edi,ebx
+ xor ebx,ecx
+ movdqa xmm8,xmm7
+ add edx,DWORD PTR[56+rsp]
+ and edi,ecx
+ and esi,ebx
+ ror eax,7
+ add edx,edi
+ mov edi,ebp
+ rol ebp,5
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[96+r15]
+ add edx,esi
+ xor ebx,ecx
+ add edx,ebp
+ mov esi,eax
+ xor eax,ebx
+ add ecx,DWORD PTR[60+rsp]
+ and esi,ebx
+ and edi,eax
+ ror ebp,7
+ add ecx,esi
+ mov esi,edx
+ rol edx,5
+ add ecx,edi
+ xor eax,ebx
+ add ecx,edx
+ mov edi,ebp
+ pxor xmm0,xmm4
+DB 102,68,15,58,15,198,8
+ xor ebp,eax
+ add ebx,DWORD PTR[rsp]
+ and edi,eax
+ pxor xmm0,xmm1
+ and esi,ebp
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[112+r15]
+ ror edx,7
+ movdqa xmm10,xmm9
+ paddd xmm9,xmm7
+ add ebx,edi
+ mov edi,ecx
+ pxor xmm0,xmm8
+ rol ecx,5
+ add ebx,esi
+ xor ebp,eax
+ add ebx,ecx
+ movdqa xmm8,xmm0
+ movdqa XMMWORD PTR[48+rsp],xmm9
+ mov esi,edx
+ xor edx,ebp
+ add eax,DWORD PTR[4+rsp]
+ and esi,ebp
+ pslld xmm0,2
+ and edi,edx
+ ror ecx,7
+ psrld xmm8,30
+ add eax,esi
+ mov esi,ebx
+ rol ebx,5
+ add eax,edi
+ xor edx,ebp
+ add eax,ebx
+ por xmm0,xmm8
+ mov edi,ecx
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[128+r15]
+ xor ecx,edx
+ movdqa xmm9,xmm0
+ add ebp,DWORD PTR[8+rsp]
+ and edi,edx
+ and esi,ecx
+ ror ebx,7
+ add ebp,edi
+ mov edi,eax
+ rol eax,5
+ add ebp,esi
+ xor ecx,edx
+ add ebp,eax
+ mov esi,ebx
+ xor ebx,ecx
+ add edx,DWORD PTR[12+rsp]
+ and esi,ecx
+ and edi,ebx
+ ror eax,7
+ add edx,esi
+ mov esi,ebp
+ rol ebp,5
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[144+r15]
+ add edx,edi
+ xor ebx,ecx
+ add edx,ebp
+ mov edi,eax
+ pxor xmm1,xmm5
+DB 102,68,15,58,15,207,8
+ xor eax,ebx
+ add ecx,DWORD PTR[16+rsp]
+ and edi,ebx
+ pxor xmm1,xmm2
+ and esi,eax
+ ror ebp,7
+ movdqa xmm8,xmm10
+ paddd xmm10,xmm0
+ add ecx,edi
+ mov edi,edx
+ pxor xmm1,xmm9
+ rol edx,5
+ add ecx,esi
+ xor eax,ebx
+ add ecx,edx
+ movdqa xmm9,xmm1
+ movdqa XMMWORD PTR[rsp],xmm10
+ mov esi,ebp
+ xor ebp,eax
+ add ebx,DWORD PTR[20+rsp]
+ and esi,eax
+ pslld xmm1,2
+ and edi,ebp
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[160+r15]
+ ror edx,7
+ psrld xmm9,30
+ add ebx,esi
+ mov esi,ecx
+ rol ecx,5
+ add ebx,edi
+ xor ebp,eax
+ add ebx,ecx
+ por xmm1,xmm9
+ mov edi,edx
+ xor edx,ebp
+ movdqa xmm10,xmm1
+ add eax,DWORD PTR[24+rsp]
+ and edi,ebp
+ and esi,edx
+ ror ecx,7
+ add eax,edi
+ mov edi,ebx
+ rol ebx,5
+ add eax,esi
+ xor edx,ebp
+ add eax,ebx
+ mov esi,ecx
+ cmp r8d,11
+ jb $L$aesenclast3
+ movups xmm14,XMMWORD PTR[176+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[192+r15]
+ aesenc xmm11,xmm14
+ je $L$aesenclast3
+ movups xmm14,XMMWORD PTR[208+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[224+r15]
+ aesenc xmm11,xmm14
+$L$aesenclast3::
+ aesenclast xmm11,xmm15
+ movups xmm14,XMMWORD PTR[16+r15]
+ xor ecx,edx
+ add ebp,DWORD PTR[28+rsp]
+ and esi,edx
+ and edi,ecx
+ ror ebx,7
+ add ebp,esi
+ mov esi,eax
+ rol eax,5
+ add ebp,edi
+ xor ecx,edx
+ add ebp,eax
+ mov edi,ebx
+ pxor xmm2,xmm6
+DB 102,68,15,58,15,208,8
+ xor ebx,ecx
+ add edx,DWORD PTR[32+rsp]
+ and edi,ecx
+ pxor xmm2,xmm3
+ and esi,ebx
+ ror eax,7
+ movdqa xmm9,xmm8
+ paddd xmm8,xmm1
+ add edx,edi
+ mov edi,ebp
+ pxor xmm2,xmm10
+ rol ebp,5
+ movups xmm12,XMMWORD PTR[48+r12]
+ xorps xmm12,xmm13
+ movups XMMWORD PTR[32+r12*1+r13],xmm11
+ xorps xmm11,xmm12
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[32+r15]
+ add edx,esi
+ xor ebx,ecx
+ add edx,ebp
+ movdqa xmm10,xmm2
+ movdqa XMMWORD PTR[16+rsp],xmm8
+ mov esi,eax
+ xor eax,ebx
+ add ecx,DWORD PTR[36+rsp]
+ and esi,ebx
+ pslld xmm2,2
+ and edi,eax
+ ror ebp,7
+ psrld xmm10,30
+ add ecx,esi
+ mov esi,edx
+ rol edx,5
+ add ecx,edi
+ xor eax,ebx
+ add ecx,edx
+ por xmm2,xmm10
+ mov edi,ebp
+ xor ebp,eax
+ movdqa xmm8,xmm2
+ add ebx,DWORD PTR[40+rsp]
+ and edi,eax
+ and esi,ebp
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[48+r15]
+ ror edx,7
+ add ebx,edi
+ mov edi,ecx
+ rol ecx,5
+ add ebx,esi
+ xor ebp,eax
+ add ebx,ecx
+ mov esi,edx
+ xor edx,ebp
+ add eax,DWORD PTR[44+rsp]
+ and esi,ebp
+ and edi,edx
+ ror ecx,7
+ add eax,esi
+ mov esi,ebx
+ rol ebx,5
+ add eax,edi
+ xor edx,ebp
+ add eax,ebx
+ add ebp,DWORD PTR[48+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[64+r15]
+ pxor xmm3,xmm7
+DB 102,68,15,58,15,193,8
+ xor esi,edx
+ mov edi,eax
+ rol eax,5
+ pxor xmm3,xmm4
+ xor esi,ecx
+ add ebp,eax
+ movdqa xmm10,xmm9
+ paddd xmm9,xmm2
+ ror ebx,7
+ add ebp,esi
+ pxor xmm3,xmm8
+ add edx,DWORD PTR[52+rsp]
+ xor edi,ecx
+ mov esi,ebp
+ rol ebp,5
+ movdqa xmm8,xmm3
+ movdqa XMMWORD PTR[32+rsp],xmm9
+ xor edi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,edi
+ pslld xmm3,2
+ add ecx,DWORD PTR[56+rsp]
+ xor esi,ebx
+ psrld xmm8,30
+ mov edi,edx
+ rol edx,5
+ xor esi,eax
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[80+r15]
+ add ecx,edx
+ ror ebp,7
+ add ecx,esi
+ por xmm3,xmm8
+ add ebx,DWORD PTR[60+rsp]
+ xor edi,eax
+ mov esi,ecx
+ rol ecx,5
+ xor edi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,edi
+ add eax,DWORD PTR[rsp]
+ paddd xmm10,xmm3
+ xor esi,ebp
+ mov edi,ebx
+ rol ebx,5
+ xor esi,edx
+ movdqa XMMWORD PTR[48+rsp],xmm10
+ add eax,ebx
+ ror ecx,7
+ add eax,esi
+ add ebp,DWORD PTR[4+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[96+r15]
+ xor edi,edx
+ mov esi,eax
+ rol eax,5
+ xor edi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,edi
+ add edx,DWORD PTR[8+rsp]
+ xor esi,ecx
+ mov edi,ebp
+ rol ebp,5
+ xor esi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,esi
+ add ecx,DWORD PTR[12+rsp]
+ xor edi,ebx
+ mov esi,edx
+ rol edx,5
+ xor edi,eax
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[112+r15]
+ add ecx,edx
+ ror ebp,7
+ add ecx,edi
+ cmp r10,r14
+ je $L$done_ssse3
+ movdqa xmm6,XMMWORD PTR[64+r11]
+ movdqa xmm9,XMMWORD PTR[r11]
+ movdqu xmm0,XMMWORD PTR[r10]
+ movdqu xmm1,XMMWORD PTR[16+r10]
+ movdqu xmm2,XMMWORD PTR[32+r10]
+ movdqu xmm3,XMMWORD PTR[48+r10]
+DB 102,15,56,0,198
+ add r10,64
+ add ebx,DWORD PTR[16+rsp]
+ xor esi,eax
+DB 102,15,56,0,206
+ mov edi,ecx
+ rol ecx,5
+ paddd xmm0,xmm9
+ xor esi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,esi
+ movdqa XMMWORD PTR[rsp],xmm0
+ add eax,DWORD PTR[20+rsp]
+ xor edi,ebp
+ psubd xmm0,xmm9
+ mov esi,ebx
+ rol ebx,5
+ xor edi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ add ebp,DWORD PTR[24+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[128+r15]
+ xor esi,edx
+ mov edi,eax
+ rol eax,5
+ xor esi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,esi
+ add edx,DWORD PTR[28+rsp]
+ xor edi,ecx
+ mov esi,ebp
+ rol ebp,5
+ xor edi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,edi
+ add ecx,DWORD PTR[32+rsp]
+ xor esi,ebx
+DB 102,15,56,0,214
+ mov edi,edx
+ rol edx,5
+ paddd xmm1,xmm9
+ xor esi,eax
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[144+r15]
+ add ecx,edx
+ ror ebp,7
+ add ecx,esi
+ movdqa XMMWORD PTR[16+rsp],xmm1
+ add ebx,DWORD PTR[36+rsp]
+ xor edi,eax
+ psubd xmm1,xmm9
+ mov esi,ecx
+ rol ecx,5
+ xor edi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,edi
+ add eax,DWORD PTR[40+rsp]
+ xor esi,ebp
+ mov edi,ebx
+ rol ebx,5
+ xor esi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,esi
+ add ebp,DWORD PTR[44+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[160+r15]
+ xor edi,edx
+ mov esi,eax
+ rol eax,5
+ xor edi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,edi
+ add edx,DWORD PTR[48+rsp]
+ xor esi,ecx
+DB 102,15,56,0,222
+ mov edi,ebp
+ rol ebp,5
+ paddd xmm2,xmm9
+ xor esi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,esi
+ movdqa XMMWORD PTR[32+rsp],xmm2
+ add ecx,DWORD PTR[52+rsp]
+ xor edi,ebx
+ psubd xmm2,xmm9
+ mov esi,edx
+ rol edx,5
+ xor edi,eax
+ cmp r8d,11
+ jb $L$aesenclast4
+ movups xmm14,XMMWORD PTR[176+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[192+r15]
+ aesenc xmm11,xmm14
+ je $L$aesenclast4
+ movups xmm14,XMMWORD PTR[208+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[224+r15]
+ aesenc xmm11,xmm14
+$L$aesenclast4::
+ aesenclast xmm11,xmm15
+ movups xmm14,XMMWORD PTR[16+r15]
+ add ecx,edx
+ ror ebp,7
+ add ecx,edi
+ add ebx,DWORD PTR[56+rsp]
+ xor esi,eax
+ mov edi,ecx
+ rol ecx,5
+ xor esi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,esi
+ add eax,DWORD PTR[60+rsp]
+ xor edi,ebp
+ mov esi,ebx
+ rol ebx,5
+ xor edi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ movups XMMWORD PTR[48+r12*1+r13],xmm11
+ lea r12,QWORD PTR[64+r12]
+
+ add eax,DWORD PTR[r9]
+ add esi,DWORD PTR[4+r9]
+ add ecx,DWORD PTR[8+r9]
+ add edx,DWORD PTR[12+r9]
+ mov DWORD PTR[r9],eax
+ add ebp,DWORD PTR[16+r9]
+ mov DWORD PTR[4+r9],esi
+ mov ebx,esi
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+ mov DWORD PTR[16+r9],ebp
+ jmp $L$oop_ssse3
+
+ALIGN 16
+$L$done_ssse3::
+ add ebx,DWORD PTR[16+rsp]
+ xor esi,eax
+ mov edi,ecx
+ rol ecx,5
+ xor esi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,esi
+ add eax,DWORD PTR[20+rsp]
+ xor edi,ebp
+ mov esi,ebx
+ rol ebx,5
+ xor edi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ add ebp,DWORD PTR[24+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[128+r15]
+ xor esi,edx
+ mov edi,eax
+ rol eax,5
+ xor esi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,esi
+ add edx,DWORD PTR[28+rsp]
+ xor edi,ecx
+ mov esi,ebp
+ rol ebp,5
+ xor edi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,edi
+ add ecx,DWORD PTR[32+rsp]
+ xor esi,ebx
+ mov edi,edx
+ rol edx,5
+ xor esi,eax
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[144+r15]
+ add ecx,edx
+ ror ebp,7
+ add ecx,esi
+ add ebx,DWORD PTR[36+rsp]
+ xor edi,eax
+ mov esi,ecx
+ rol ecx,5
+ xor edi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,edi
+ add eax,DWORD PTR[40+rsp]
+ xor esi,ebp
+ mov edi,ebx
+ rol ebx,5
+ xor esi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,esi
+ add ebp,DWORD PTR[44+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[160+r15]
+ xor edi,edx
+ mov esi,eax
+ rol eax,5
+ xor edi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,edi
+ add edx,DWORD PTR[48+rsp]
+ xor esi,ecx
+ mov edi,ebp
+ rol ebp,5
+ xor esi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,esi
+ add ecx,DWORD PTR[52+rsp]
+ xor edi,ebx
+ mov esi,edx
+ rol edx,5
+ xor edi,eax
+ cmp r8d,11
+ jb $L$aesenclast5
+ movups xmm14,XMMWORD PTR[176+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[192+r15]
+ aesenc xmm11,xmm14
+ je $L$aesenclast5
+ movups xmm14,XMMWORD PTR[208+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[224+r15]
+ aesenc xmm11,xmm14
+$L$aesenclast5::
+ aesenclast xmm11,xmm15
+ movups xmm14,XMMWORD PTR[16+r15]
+ add ecx,edx
+ ror ebp,7
+ add ecx,edi
+ add ebx,DWORD PTR[56+rsp]
+ xor esi,eax
+ mov edi,ecx
+ rol ecx,5
+ xor esi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,esi
+ add eax,DWORD PTR[60+rsp]
+ xor edi,ebp
+ mov esi,ebx
+ rol ebx,5
+ xor edi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ movups XMMWORD PTR[48+r12*1+r13],xmm11
+ mov r8,QWORD PTR[88+rsp]
+
+ add eax,DWORD PTR[r9]
+ add esi,DWORD PTR[4+r9]
+ add ecx,DWORD PTR[8+r9]
+ mov DWORD PTR[r9],eax
+ add edx,DWORD PTR[12+r9]
+ mov DWORD PTR[4+r9],esi
+ add ebp,DWORD PTR[16+r9]
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+ mov DWORD PTR[16+r9],ebp
+ movups XMMWORD PTR[r8],xmm11
+ movaps xmm6,XMMWORD PTR[((96+0))+rsp]
+ movaps xmm7,XMMWORD PTR[((96+16))+rsp]
+ movaps xmm8,XMMWORD PTR[((96+32))+rsp]
+ movaps xmm9,XMMWORD PTR[((96+48))+rsp]
+ movaps xmm10,XMMWORD PTR[((96+64))+rsp]
+ movaps xmm11,XMMWORD PTR[((96+80))+rsp]
+ movaps xmm12,XMMWORD PTR[((96+96))+rsp]
+ movaps xmm13,XMMWORD PTR[((96+112))+rsp]
+ movaps xmm14,XMMWORD PTR[((96+128))+rsp]
+ movaps xmm15,XMMWORD PTR[((96+144))+rsp]
+ lea rsi,QWORD PTR[264+rsp]
+ mov r15,QWORD PTR[rsi]
+ mov r14,QWORD PTR[8+rsi]
+ mov r13,QWORD PTR[16+rsi]
+ mov r12,QWORD PTR[24+rsi]
+ mov rbp,QWORD PTR[32+rsi]
+ mov rbx,QWORD PTR[40+rsi]
+ lea rsp,QWORD PTR[48+rsi]
+$L$epilogue_ssse3::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_cbc_sha1_enc_ssse3::
+aesni_cbc_sha1_enc_ssse3 ENDP
+ALIGN 64
+K_XX_XX::
+ DD 05a827999h,05a827999h,05a827999h,05a827999h
+ DD 06ed9eba1h,06ed9eba1h,06ed9eba1h,06ed9eba1h
+ DD 08f1bbcdch,08f1bbcdch,08f1bbcdch,08f1bbcdch
+ DD 0ca62c1d6h,0ca62c1d6h,0ca62c1d6h,0ca62c1d6h
+ DD 000010203h,004050607h,008090a0bh,00c0d0e0fh
+
+DB 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115
+DB 116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52
+DB 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
+DB 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
+DB 114,103,62,0
+ALIGN 64
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+ssse3_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$common_seh_tail
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$common_seh_tail
+
+ lea rsi,QWORD PTR[96+rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,20
+ DD 0a548f3fch
+ lea rax,QWORD PTR[264+rax]
+
+ mov r15,QWORD PTR[rax]
+ mov r14,QWORD PTR[8+rax]
+ mov r13,QWORD PTR[16+rax]
+ mov r12,QWORD PTR[24+rax]
+ mov rbp,QWORD PTR[32+rax]
+ mov rbx,QWORD PTR[40+rax]
+ lea rax,QWORD PTR[48+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+
+$L$common_seh_tail::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+ssse3_handler ENDP
+
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$SEH_begin_aesni_cbc_sha1_enc_ssse3
+ DD imagerel $L$SEH_end_aesni_cbc_sha1_enc_ssse3
+ DD imagerel $L$SEH_info_aesni_cbc_sha1_enc_ssse3
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$SEH_info_aesni_cbc_sha1_enc_ssse3::
+DB 9,0,0,0
+ DD imagerel ssse3_handler
+ DD imagerel $L$prologue_ssse3,imagerel $L$epilogue_ssse3
+
+.xdata ENDS
+END
+
diff --git a/ext/libressl/crypto/aes/aesni-sha1-mingw64-x86_64.S b/ext/libressl/crypto/aes/aesni-sha1-mingw64-x86_64.S
new file mode 100644
index 0000000..c7a2d5c
--- /dev/null
+++ b/ext/libressl/crypto/aes/aesni-sha1-mingw64-x86_64.S
@@ -0,0 +1,1536 @@
+#include "x86_arch.h"
+.text
+
+
+
+.globl aesni_cbc_sha1_enc
+.def aesni_cbc_sha1_enc; .scl 2; .type 32; .endef
+.p2align 4
+aesni_cbc_sha1_enc:
+
+ movl OPENSSL_ia32cap_P+0(%rip),%r10d
+ movl OPENSSL_ia32cap_P+4(%rip),%r11d
+ jmp aesni_cbc_sha1_enc_ssse3
+ retq
+
+.def aesni_cbc_sha1_enc_ssse3; .scl 3; .type 32; .endef
+.p2align 4
+aesni_cbc_sha1_enc_ssse3:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_aesni_cbc_sha1_enc_ssse3:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+ movq 48(%rsp),%r9
+
+ movq 56(%rsp),%r10
+
+
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -264(%rsp),%rsp
+
+
+ movaps %xmm6,96+0(%rsp)
+ movaps %xmm7,96+16(%rsp)
+ movaps %xmm8,96+32(%rsp)
+ movaps %xmm9,96+48(%rsp)
+ movaps %xmm10,96+64(%rsp)
+ movaps %xmm11,96+80(%rsp)
+ movaps %xmm12,96+96(%rsp)
+ movaps %xmm13,96+112(%rsp)
+ movaps %xmm14,96+128(%rsp)
+ movaps %xmm15,96+144(%rsp)
+.Lprologue_ssse3:
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ movdqu (%r8),%xmm11
+ movq %r8,88(%rsp)
+ shlq $6,%r14
+ subq %r12,%r13
+ movl 240(%r15),%r8d
+ addq %r10,%r14
+
+ leaq K_XX_XX(%rip),%r11
+ movl 0(%r9),%eax
+ movl 4(%r9),%ebx
+ movl 8(%r9),%ecx
+ movl 12(%r9),%edx
+ movl %ebx,%esi
+ movl 16(%r9),%ebp
+
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r10),%xmm0
+ movdqu 16(%r10),%xmm1
+ movdqu 32(%r10),%xmm2
+ movdqu 48(%r10),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r10
+.byte 102,15,56,0,206
+.byte 102,15,56,0,214
+.byte 102,15,56,0,222
+ paddd %xmm9,%xmm0
+ paddd %xmm9,%xmm1
+ paddd %xmm9,%xmm2
+ movdqa %xmm0,0(%rsp)
+ psubd %xmm9,%xmm0
+ movdqa %xmm1,16(%rsp)
+ psubd %xmm9,%xmm1
+ movdqa %xmm2,32(%rsp)
+ psubd %xmm9,%xmm2
+ movups (%r15),%xmm13
+ movups 16(%r15),%xmm14
+ jmp .Loop_ssse3
+.p2align 4
+.Loop_ssse3:
+ movdqa %xmm1,%xmm4
+ addl 0(%rsp),%ebp
+ movups 0(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ xorl %edx,%ecx
+ movdqa %xmm3,%xmm8
+.byte 102,15,58,15,224,8
+ movl %eax,%edi
+ roll $5,%eax
+ paddd %xmm3,%xmm9
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrldq $4,%xmm8
+ xorl %edx,%esi
+ addl %eax,%ebp
+ pxor %xmm0,%xmm4
+ rorl $2,%ebx
+ addl %esi,%ebp
+ pxor %xmm2,%xmm8
+ addl 4(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pxor %xmm8,%xmm4
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm9,48(%rsp)
+ xorl %ecx,%edi
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ addl %ebp,%edx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm4,%xmm8
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 8(%rsp),%ecx
+ xorl %ebx,%eax
+ pslldq $12,%xmm10
+ paddd %xmm4,%xmm4
+ movl %edx,%edi
+ roll $5,%edx
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrld $31,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ movdqa %xmm10,%xmm9
+ rorl $7,%ebp
+ addl %esi,%ecx
+ psrld $30,%xmm10
+ por %xmm8,%xmm4
+ addl 12(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm4
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa 0(%r11),%xmm10
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ pxor %xmm9,%xmm4
+ rorl $7,%edx
+ addl %edi,%ebx
+ movdqa %xmm2,%xmm5
+ addl 16(%rsp),%eax
+ xorl %ebp,%edx
+ movdqa %xmm4,%xmm9
+.byte 102,15,58,15,233,8
+ movl %ebx,%edi
+ roll $5,%ebx
+ paddd %xmm4,%xmm10
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrldq $4,%xmm9
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ pxor %xmm1,%xmm5
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm3,%xmm9
+ addl 20(%rsp),%ebp
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pxor %xmm9,%xmm5
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa %xmm10,0(%rsp)
+ xorl %edx,%edi
+ addl %eax,%ebp
+ movdqa %xmm5,%xmm8
+ movdqa %xmm5,%xmm9
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 24(%rsp),%edx
+ xorl %ecx,%ebx
+ pslldq $12,%xmm8
+ paddd %xmm5,%xmm5
+ movl %ebp,%edi
+ roll $5,%ebp
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ psrld $31,%xmm9
+ xorl %ecx,%esi
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ addl %ebp,%edx
+ movdqa %xmm8,%xmm10
+ rorl $7,%eax
+ addl %esi,%edx
+ psrld $30,%xmm8
+ por %xmm9,%xmm5
+ addl 28(%rsp),%ecx
+ xorl %ebx,%eax
+ movl %edx,%esi
+ roll $5,%edx
+ pslld $2,%xmm10
+ pxor %xmm8,%xmm5
+ andl %eax,%edi
+ xorl %ebx,%eax
+ movdqa 16(%r11),%xmm8
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ pxor %xmm10,%xmm5
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movdqa %xmm3,%xmm6
+ addl 32(%rsp),%ebx
+ xorl %eax,%ebp
+ movdqa %xmm5,%xmm10
+.byte 102,15,58,15,242,8
+ movl %ecx,%edi
+ roll $5,%ecx
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ paddd %xmm5,%xmm8
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ psrldq $4,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ pxor %xmm2,%xmm6
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm4,%xmm10
+ addl 36(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ pxor %xmm10,%xmm6
+ andl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm8,16(%rsp)
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ movdqa %xmm6,%xmm9
+ movdqa %xmm6,%xmm10
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 40(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%ecx
+ pslldq $12,%xmm9
+ paddd %xmm6,%xmm6
+ movl %eax,%edi
+ roll $5,%eax
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrld $31,%xmm10
+ xorl %edx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ psrld $30,%xmm9
+ por %xmm10,%xmm6
+ addl 44(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pslld $2,%xmm8
+ pxor %xmm9,%xmm6
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa 16(%r11),%xmm9
+ xorl %ecx,%edi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %ebp,%edx
+ pxor %xmm8,%xmm6
+ rorl $7,%eax
+ addl %edi,%edx
+ movdqa %xmm4,%xmm7
+ addl 48(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm8
+.byte 102,15,58,15,251,8
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm6,%xmm9
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrldq $4,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ pxor %xmm3,%xmm7
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm5,%xmm8
+ addl 52(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ pxor %xmm8,%xmm7
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm9,32(%rsp)
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ movdqa %xmm7,%xmm10
+ movdqa %xmm7,%xmm8
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 56(%rsp),%eax
+ xorl %ebp,%edx
+ pslldq $12,%xmm10
+ paddd %xmm7,%xmm7
+ movl %ebx,%edi
+ roll $5,%ebx
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrld $31,%xmm8
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ movdqa %xmm10,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ psrld $30,%xmm10
+ por %xmm8,%xmm7
+ addl 60(%rsp),%ebp
+ cmpl $11,%r8d
+ jb .Laesenclast1
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast1
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+.Laesenclast1:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm7
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa 16(%r11),%xmm10
+ xorl %edx,%edi
+ addl %eax,%ebp
+ pxor %xmm9,%xmm7
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movdqa %xmm7,%xmm9
+ addl 0(%rsp),%edx
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,206,8
+ xorl %ecx,%ebx
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm1,%xmm0
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm7,%xmm10
+ xorl %ecx,%esi
+ movups 16(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,0(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ addl %ebp,%edx
+ pxor %xmm9,%xmm0
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 4(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm0,%xmm9
+ movdqa %xmm10,48(%rsp)
+ movl %edx,%esi
+ roll $5,%edx
+ andl %eax,%edi
+ xorl %ebx,%eax
+ pslld $2,%xmm0
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ psrld $30,%xmm9
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 8(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%edi
+ roll $5,%ecx
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ por %xmm9,%xmm0
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ movdqa %xmm0,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 12(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ andl %edx,%edi
+ xorl %ebp,%edx
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 16(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,215,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm2,%xmm1
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm8,%xmm9
+ paddd %xmm0,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm10,%xmm1
+ addl 20(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm1,%xmm10
+ movdqa %xmm8,0(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm1
+ addl 24(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm10
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm10,%xmm1
+ addl 28(%rsp),%ebx
+ xorl %eax,%edi
+ movdqa %xmm1,%xmm8
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 32(%rsp),%eax
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,192,8
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ pxor %xmm3,%xmm2
+ xorl %edx,%esi
+ addl %ebx,%eax
+ movdqa 32(%r11),%xmm10
+ paddd %xmm1,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm8,%xmm2
+ addl 36(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ movdqa %xmm2,%xmm8
+ movdqa %xmm9,16(%rsp)
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ pslld $2,%xmm2
+ addl 40(%rsp),%edx
+ xorl %ecx,%esi
+ psrld $30,%xmm8
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ por %xmm8,%xmm2
+ addl 44(%rsp),%ecx
+ xorl %ebx,%edi
+ movdqa %xmm2,%xmm9
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 48(%rsp),%ebx
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,201,8
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ pxor %xmm4,%xmm3
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm2,%xmm10
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm9,%xmm3
+ addl 52(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ movdqa %xmm3,%xmm9
+ movdqa %xmm10,32(%rsp)
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ pslld $2,%xmm3
+ addl 56(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ psrld $30,%xmm9
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ por %xmm9,%xmm3
+ addl 60(%rsp),%edx
+ xorl %ecx,%edi
+ movdqa %xmm3,%xmm10
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 0(%rsp),%ecx
+ pxor %xmm0,%xmm4
+.byte 102,68,15,58,15,210,8
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ pxor %xmm5,%xmm4
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ movdqa %xmm8,%xmm9
+ paddd %xmm3,%xmm8
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm10,%xmm4
+ addl 4(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm8,48(%rsp)
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ pslld $2,%xmm4
+ addl 8(%rsp),%eax
+ xorl %ebp,%esi
+ psrld $30,%xmm10
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ por %xmm10,%xmm4
+ addl 12(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movdqa %xmm4,%xmm8
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 16(%rsp),%edx
+ pxor %xmm1,%xmm5
+.byte 102,68,15,58,15,195,8
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm6,%xmm5
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm4,%xmm9
+ rorl $7,%eax
+ addl %esi,%edx
+ pxor %xmm8,%xmm5
+ addl 20(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ movdqa %xmm5,%xmm8
+ movdqa %xmm9,0(%rsp)
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb .Laesenclast2
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast2
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+.Laesenclast2:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ pslld $2,%xmm5
+ addl 24(%rsp),%ebx
+ xorl %eax,%esi
+ psrld $30,%xmm8
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ por %xmm8,%xmm5
+ addl 28(%rsp),%eax
+ xorl %ebp,%edi
+ movdqa %xmm5,%xmm9
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ecx,%edi
+ movups 32(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,16(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ pxor %xmm2,%xmm6
+.byte 102,68,15,58,15,204,8
+ xorl %edx,%ecx
+ addl 32(%rsp),%ebp
+ andl %edx,%edi
+ pxor %xmm7,%xmm6
+ andl %ecx,%esi
+ rorl $7,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm5,%xmm10
+ addl %edi,%ebp
+ movl %eax,%edi
+ pxor %xmm9,%xmm6
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movdqa %xmm6,%xmm9
+ movdqa %xmm10,16(%rsp)
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 36(%rsp),%edx
+ andl %ecx,%esi
+ pslld $2,%xmm6
+ andl %ebx,%edi
+ rorl $7,%eax
+ psrld $30,%xmm9
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ por %xmm9,%xmm6
+ movl %eax,%edi
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm10
+ addl 40(%rsp),%ecx
+ andl %ebx,%edi
+ andl %eax,%esi
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 44(%rsp),%ebx
+ andl %eax,%esi
+ andl %ebp,%edi
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ rorl $7,%edx
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%edi
+ pxor %xmm3,%xmm7
+.byte 102,68,15,58,15,213,8
+ xorl %ebp,%edx
+ addl 48(%rsp),%eax
+ andl %ebp,%edi
+ pxor %xmm0,%xmm7
+ andl %edx,%esi
+ rorl $7,%ecx
+ movdqa 48(%r11),%xmm9
+ paddd %xmm6,%xmm8
+ addl %edi,%eax
+ movl %ebx,%edi
+ pxor %xmm10,%xmm7
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movdqa %xmm7,%xmm10
+ movdqa %xmm8,32(%rsp)
+ movl %ecx,%esi
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ xorl %edx,%ecx
+ addl 52(%rsp),%ebp
+ andl %edx,%esi
+ pslld $2,%xmm7
+ andl %ecx,%edi
+ rorl $7,%ebx
+ psrld $30,%xmm10
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ por %xmm10,%xmm7
+ movl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm7,%xmm8
+ addl 56(%rsp),%edx
+ andl %ecx,%edi
+ andl %ebx,%esi
+ rorl $7,%eax
+ addl %edi,%edx
+ movl %ebp,%edi
+ roll $5,%ebp
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 60(%rsp),%ecx
+ andl %ebx,%esi
+ andl %eax,%edi
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%edi
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,198,8
+ xorl %eax,%ebp
+ addl 0(%rsp),%ebx
+ andl %eax,%edi
+ pxor %xmm1,%xmm0
+ andl %ebp,%esi
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ rorl $7,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm7,%xmm9
+ addl %edi,%ebx
+ movl %ecx,%edi
+ pxor %xmm8,%xmm0
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movdqa %xmm0,%xmm8
+ movdqa %xmm9,48(%rsp)
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 4(%rsp),%eax
+ andl %ebp,%esi
+ pslld $2,%xmm0
+ andl %edx,%edi
+ rorl $7,%ecx
+ psrld $30,%xmm8
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ por %xmm8,%xmm0
+ movl %ecx,%edi
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%ecx
+ movdqa %xmm0,%xmm9
+ addl 8(%rsp),%ebp
+ andl %edx,%edi
+ andl %ecx,%esi
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movl %eax,%edi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 12(%rsp),%edx
+ andl %ecx,%esi
+ andl %ebx,%edi
+ rorl $7,%eax
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%edi
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,207,8
+ xorl %ebx,%eax
+ addl 16(%rsp),%ecx
+ andl %ebx,%edi
+ pxor %xmm2,%xmm1
+ andl %eax,%esi
+ rorl $7,%ebp
+ movdqa %xmm10,%xmm8
+ paddd %xmm0,%xmm10
+ addl %edi,%ecx
+ movl %edx,%edi
+ pxor %xmm9,%xmm1
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movdqa %xmm1,%xmm9
+ movdqa %xmm10,0(%rsp)
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 20(%rsp),%ebx
+ andl %eax,%esi
+ pslld $2,%xmm1
+ andl %ebp,%edi
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ rorl $7,%edx
+ psrld $30,%xmm9
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ por %xmm9,%xmm1
+ movl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm1,%xmm10
+ addl 24(%rsp),%eax
+ andl %ebp,%edi
+ andl %edx,%esi
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movl %ecx,%esi
+ cmpl $11,%r8d
+ jb .Laesenclast3
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast3
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+.Laesenclast3:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ xorl %edx,%ecx
+ addl 28(%rsp),%ebp
+ andl %edx,%esi
+ andl %ecx,%edi
+ rorl $7,%ebx
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%edi
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,208,8
+ xorl %ecx,%ebx
+ addl 32(%rsp),%edx
+ andl %ecx,%edi
+ pxor %xmm3,%xmm2
+ andl %ebx,%esi
+ rorl $7,%eax
+ movdqa %xmm8,%xmm9
+ paddd %xmm1,%xmm8
+ addl %edi,%edx
+ movl %ebp,%edi
+ pxor %xmm10,%xmm2
+ roll $5,%ebp
+ movups 48(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,32(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movdqa %xmm2,%xmm10
+ movdqa %xmm8,16(%rsp)
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 36(%rsp),%ecx
+ andl %ebx,%esi
+ pslld $2,%xmm2
+ andl %eax,%edi
+ rorl $7,%ebp
+ psrld $30,%xmm10
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ por %xmm10,%xmm2
+ movl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm2,%xmm8
+ addl 40(%rsp),%ebx
+ andl %eax,%edi
+ andl %ebp,%esi
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ rorl $7,%edx
+ addl %edi,%ebx
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 44(%rsp),%eax
+ andl %ebp,%esi
+ andl %edx,%edi
+ rorl $7,%ecx
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ addl 48(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,193,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm4,%xmm3
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm10
+ paddd %xmm2,%xmm9
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm8,%xmm3
+ addl 52(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm3,%xmm8
+ movdqa %xmm9,32(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm3
+ addl 56(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm8
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm8,%xmm3
+ addl 60(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 0(%rsp),%eax
+ paddd %xmm3,%xmm10
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ movdqa %xmm10,48(%rsp)
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 4(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 8(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 12(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ cmpq %r14,%r10
+ je .Ldone_ssse3
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r10),%xmm0
+ movdqu 16(%r10),%xmm1
+ movdqu 32(%r10),%xmm2
+ movdqu 48(%r10),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r10
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+.byte 102,15,56,0,206
+ movl %ecx,%edi
+ roll $5,%ecx
+ paddd %xmm9,%xmm0
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ movdqa %xmm0,0(%rsp)
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ psubd %xmm9,%xmm0
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+.byte 102,15,56,0,214
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm9,%xmm1
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movdqa %xmm1,16(%rsp)
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ psubd %xmm9,%xmm1
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+.byte 102,15,56,0,222
+ movl %ebp,%edi
+ roll $5,%ebp
+ paddd %xmm9,%xmm2
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ movdqa %xmm2,32(%rsp)
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ psubd %xmm9,%xmm2
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb .Laesenclast4
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast4
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+.Laesenclast4:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movups %xmm11,48(%r13,%r12,1)
+ leaq 64(%r12),%r12
+
+ addl 0(%r9),%eax
+ addl 4(%r9),%esi
+ addl 8(%r9),%ecx
+ addl 12(%r9),%edx
+ movl %eax,0(%r9)
+ addl 16(%r9),%ebp
+ movl %esi,4(%r9)
+ movl %esi,%ebx
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movl %ebp,16(%r9)
+ jmp .Loop_ssse3
+
+.p2align 4
+.Ldone_ssse3:
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb .Laesenclast5
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast5
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+.Laesenclast5:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movups %xmm11,48(%r13,%r12,1)
+ movq 88(%rsp),%r8
+
+ addl 0(%r9),%eax
+ addl 4(%r9),%esi
+ addl 8(%r9),%ecx
+ movl %eax,0(%r9)
+ addl 12(%r9),%edx
+ movl %esi,4(%r9)
+ addl 16(%r9),%ebp
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movl %ebp,16(%r9)
+ movups %xmm11,(%r8)
+ movaps 96+0(%rsp),%xmm6
+ movaps 96+16(%rsp),%xmm7
+ movaps 96+32(%rsp),%xmm8
+ movaps 96+48(%rsp),%xmm9
+ movaps 96+64(%rsp),%xmm10
+ movaps 96+80(%rsp),%xmm11
+ movaps 96+96(%rsp),%xmm12
+ movaps 96+112(%rsp),%xmm13
+ movaps 96+128(%rsp),%xmm14
+ movaps 96+144(%rsp),%xmm15
+ leaq 264(%rsp),%rsi
+ movq 0(%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lepilogue_ssse3:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_aesni_cbc_sha1_enc_ssse3:
+.p2align 6
+K_XX_XX:
+.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+
+.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align 6
+
+.def ssse3_handler; .scl 3; .type 32; .endef
+.p2align 4
+ssse3_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+
+ movq 8(%r9),%rsi
+ movq 56(%r9),%r11
+
+ movl 0(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jb .Lcommon_seh_tail
+
+ movq 152(%r8),%rax
+
+ movl 4(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jae .Lcommon_seh_tail
+
+ leaq 96(%rax),%rsi
+ leaq 512(%r8),%rdi
+ movl $20,%ecx
+.long 0xa548f3fc
+ leaq 264(%rax),%rax
+
+ movq 0(%rax),%r15
+ movq 8(%rax),%r14
+ movq 16(%rax),%r13
+ movq 24(%rax),%r12
+ movq 32(%rax),%rbp
+ movq 40(%rax),%rbx
+ leaq 48(%rax),%rax
+ movq %rbx,144(%r8)
+ movq %rbp,160(%r8)
+ movq %r12,216(%r8)
+ movq %r13,224(%r8)
+ movq %r14,232(%r8)
+ movq %r15,240(%r8)
+
+.Lcommon_seh_tail:
+ movq 8(%rax),%rdi
+ movq 16(%rax),%rsi
+ movq %rax,152(%r8)
+ movq %rsi,168(%r8)
+ movq %rdi,176(%r8)
+
+ movq 40(%r9),%rdi
+ movq %r8,%rsi
+ movl $154,%ecx
+.long 0xa548f3fc
+
+ movq %r9,%rsi
+ xorq %rcx,%rcx
+ movq 8(%rsi),%rdx
+ movq 0(%rsi),%r8
+ movq 16(%rsi),%r9
+ movq 40(%rsi),%r10
+ leaq 56(%rsi),%r11
+ leaq 24(%rsi),%r12
+ movq %r10,32(%rsp)
+ movq %r11,40(%rsp)
+ movq %r12,48(%rsp)
+ movq %rcx,56(%rsp)
+ call *__imp_RtlVirtualUnwind(%rip)
+
+ movl $1,%eax
+ addq $64,%rsp
+ popfq
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ popq %rdi
+ popq %rsi
+ retq
+
+
+.section .pdata
+.p2align 2
+.rva .LSEH_begin_aesni_cbc_sha1_enc_ssse3
+.rva .LSEH_end_aesni_cbc_sha1_enc_ssse3
+.rva .LSEH_info_aesni_cbc_sha1_enc_ssse3
+.section .xdata
+.p2align 3
+.LSEH_info_aesni_cbc_sha1_enc_ssse3:
+.byte 9,0,0,0
+.rva ssse3_handler
+.rva .Lprologue_ssse3,.Lepilogue_ssse3
diff --git a/ext/libressl/crypto/aes/bsaes-elf-x86_64.S b/ext/libressl/crypto/aes/bsaes-elf-x86_64.S
new file mode 100644
index 0000000..903e374
--- /dev/null
+++ b/ext/libressl/crypto/aes/bsaes-elf-x86_64.S
@@ -0,0 +1,2502 @@
+#include "x86_arch.h"
+.text
+
+
+
+
+.type _bsaes_encrypt8,@function
+.align 64
+_bsaes_encrypt8:
+ leaq .LBS0(%rip),%r11
+
+ movdqa (%rax),%xmm8
+ leaq 16(%rax),%rax
+ movdqa 80(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+.byte 102,15,56,0,247
+_bsaes_encrypt8_bitslice:
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $1,%xmm3
+ pxor %xmm6,%xmm5
+ pxor %xmm4,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm6
+ psllq $1,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $1,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm15
+ pxor %xmm1,%xmm2
+ psllq $1,%xmm1
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm4,%xmm9
+ psrlq $2,%xmm4
+ movdqa %xmm3,%xmm10
+ psrlq $2,%xmm3
+ pxor %xmm6,%xmm4
+ pxor %xmm5,%xmm3
+ pand %xmm8,%xmm4
+ pand %xmm8,%xmm3
+ pxor %xmm4,%xmm6
+ psllq $2,%xmm4
+ pxor %xmm3,%xmm5
+ psllq $2,%xmm3
+ pxor %xmm9,%xmm4
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm2,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm2
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm2,%xmm9
+ psrlq $4,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $4,%xmm1
+ pxor %xmm6,%xmm2
+ pxor %xmm5,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $4,%xmm2
+ pxor %xmm1,%xmm5
+ psllq $4,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm4
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ decl %r10d
+ jmp .Lenc_sbox
+.align 16
+.Lenc_loop:
+ pxor 0(%rax),%xmm15
+ pxor 16(%rax),%xmm0
+.byte 102,68,15,56,0,255
+ pxor 32(%rax),%xmm1
+.byte 102,15,56,0,199
+ pxor 48(%rax),%xmm2
+.byte 102,15,56,0,207
+ pxor 64(%rax),%xmm3
+.byte 102,15,56,0,215
+ pxor 80(%rax),%xmm4
+.byte 102,15,56,0,223
+ pxor 96(%rax),%xmm5
+.byte 102,15,56,0,231
+ pxor 112(%rax),%xmm6
+.byte 102,15,56,0,239
+ leaq 128(%rax),%rax
+.byte 102,15,56,0,247
+.Lenc_sbox:
+ pxor %xmm5,%xmm4
+ pxor %xmm0,%xmm1
+ pxor %xmm15,%xmm2
+ pxor %xmm1,%xmm5
+ pxor %xmm15,%xmm4
+
+ pxor %xmm2,%xmm5
+ pxor %xmm6,%xmm2
+ pxor %xmm4,%xmm6
+ pxor %xmm3,%xmm2
+ pxor %xmm4,%xmm3
+ pxor %xmm0,%xmm2
+
+ pxor %xmm6,%xmm1
+ pxor %xmm4,%xmm0
+ movdqa %xmm6,%xmm10
+ movdqa %xmm0,%xmm9
+ movdqa %xmm4,%xmm8
+ movdqa %xmm1,%xmm12
+ movdqa %xmm5,%xmm11
+
+ pxor %xmm3,%xmm10
+ pxor %xmm1,%xmm9
+ pxor %xmm2,%xmm8
+ movdqa %xmm10,%xmm13
+ pxor %xmm3,%xmm12
+ movdqa %xmm9,%xmm7
+ pxor %xmm15,%xmm11
+ movdqa %xmm10,%xmm14
+
+ por %xmm8,%xmm9
+ por %xmm11,%xmm10
+ pxor %xmm7,%xmm14
+ pand %xmm11,%xmm13
+ pxor %xmm8,%xmm11
+ pand %xmm8,%xmm7
+ pand %xmm11,%xmm14
+ movdqa %xmm2,%xmm11
+ pxor %xmm15,%xmm11
+ pand %xmm11,%xmm12
+ pxor %xmm12,%xmm10
+ pxor %xmm12,%xmm9
+ movdqa %xmm6,%xmm12
+ movdqa %xmm4,%xmm11
+ pxor %xmm0,%xmm12
+ pxor %xmm5,%xmm11
+ movdqa %xmm12,%xmm8
+ pand %xmm11,%xmm12
+ por %xmm11,%xmm8
+ pxor %xmm12,%xmm7
+ pxor %xmm14,%xmm10
+ pxor %xmm13,%xmm9
+ pxor %xmm14,%xmm8
+ movdqa %xmm1,%xmm11
+ pxor %xmm13,%xmm7
+ movdqa %xmm3,%xmm12
+ pxor %xmm13,%xmm8
+ movdqa %xmm0,%xmm13
+ pand %xmm2,%xmm11
+ movdqa %xmm6,%xmm14
+ pand %xmm15,%xmm12
+ pand %xmm4,%xmm13
+ por %xmm5,%xmm14
+ pxor %xmm11,%xmm10
+ pxor %xmm12,%xmm9
+ pxor %xmm13,%xmm8
+ pxor %xmm14,%xmm7
+
+
+
+
+
+ movdqa %xmm10,%xmm11
+ pand %xmm8,%xmm10
+ pxor %xmm9,%xmm11
+
+ movdqa %xmm7,%xmm13
+ movdqa %xmm11,%xmm14
+ pxor %xmm10,%xmm13
+ pand %xmm13,%xmm14
+
+ movdqa %xmm8,%xmm12
+ pxor %xmm9,%xmm14
+ pxor %xmm7,%xmm12
+
+ pxor %xmm9,%xmm10
+
+ pand %xmm10,%xmm12
+
+ movdqa %xmm13,%xmm9
+ pxor %xmm7,%xmm12
+
+ pxor %xmm12,%xmm9
+ pxor %xmm12,%xmm8
+
+ pand %xmm7,%xmm9
+
+ pxor %xmm9,%xmm13
+ pxor %xmm9,%xmm8
+
+ pand %xmm14,%xmm13
+
+ pxor %xmm11,%xmm13
+ movdqa %xmm5,%xmm11
+ movdqa %xmm4,%xmm7
+ movdqa %xmm14,%xmm9
+ pxor %xmm13,%xmm9
+ pand %xmm5,%xmm9
+ pxor %xmm4,%xmm5
+ pand %xmm14,%xmm4
+ pand %xmm13,%xmm5
+ pxor %xmm4,%xmm5
+ pxor %xmm9,%xmm4
+ pxor %xmm15,%xmm11
+ pxor %xmm2,%xmm7
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm15,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm2,%xmm15
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm2
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm15
+ pxor %xmm11,%xmm7
+ pxor %xmm2,%xmm15
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm2
+ pxor %xmm11,%xmm5
+ pxor %xmm11,%xmm15
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm2
+
+ movdqa %xmm6,%xmm11
+ movdqa %xmm0,%xmm7
+ pxor %xmm3,%xmm11
+ pxor %xmm1,%xmm7
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm3,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm1,%xmm3
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm1
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm3
+ pxor %xmm11,%xmm7
+ pxor %xmm1,%xmm3
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm1
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ pxor %xmm13,%xmm10
+ pand %xmm6,%xmm10
+ pxor %xmm0,%xmm6
+ pand %xmm14,%xmm0
+ pand %xmm13,%xmm6
+ pxor %xmm0,%xmm6
+ pxor %xmm10,%xmm0
+ pxor %xmm11,%xmm6
+ pxor %xmm11,%xmm3
+ pxor %xmm7,%xmm0
+ pxor %xmm7,%xmm1
+ pxor %xmm15,%xmm6
+ pxor %xmm5,%xmm0
+ pxor %xmm6,%xmm3
+ pxor %xmm15,%xmm5
+ pxor %xmm0,%xmm15
+
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ pxor %xmm2,%xmm1
+ pxor %xmm4,%xmm2
+ pxor %xmm4,%xmm3
+
+ pxor %xmm2,%xmm5
+ decl %r10d
+ jl .Lenc_done
+ pshufd $147,%xmm15,%xmm7
+ pshufd $147,%xmm0,%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $147,%xmm3,%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $147,%xmm5,%xmm10
+ pxor %xmm9,%xmm3
+ pshufd $147,%xmm2,%xmm11
+ pxor %xmm10,%xmm5
+ pshufd $147,%xmm6,%xmm12
+ pxor %xmm11,%xmm2
+ pshufd $147,%xmm1,%xmm13
+ pxor %xmm12,%xmm6
+ pshufd $147,%xmm4,%xmm14
+ pxor %xmm13,%xmm1
+ pxor %xmm14,%xmm4
+
+ pxor %xmm15,%xmm8
+ pxor %xmm4,%xmm7
+ pxor %xmm4,%xmm8
+ pshufd $78,%xmm15,%xmm15
+ pxor %xmm0,%xmm9
+ pshufd $78,%xmm0,%xmm0
+ pxor %xmm2,%xmm12
+ pxor %xmm7,%xmm15
+ pxor %xmm6,%xmm13
+ pxor %xmm8,%xmm0
+ pxor %xmm5,%xmm11
+ pshufd $78,%xmm2,%xmm7
+ pxor %xmm1,%xmm14
+ pshufd $78,%xmm6,%xmm8
+ pxor %xmm3,%xmm10
+ pshufd $78,%xmm5,%xmm2
+ pxor %xmm4,%xmm10
+ pshufd $78,%xmm4,%xmm6
+ pxor %xmm4,%xmm11
+ pshufd $78,%xmm1,%xmm5
+ pxor %xmm11,%xmm7
+ pshufd $78,%xmm3,%xmm1
+ pxor %xmm12,%xmm8
+ pxor %xmm10,%xmm2
+ pxor %xmm14,%xmm6
+ pxor %xmm13,%xmm5
+ movdqa %xmm7,%xmm3
+ pxor %xmm9,%xmm1
+ movdqa %xmm8,%xmm4
+ movdqa 48(%r11),%xmm7
+ jnz .Lenc_loop
+ movdqa 64(%r11),%xmm7
+ jmp .Lenc_loop
+.align 16
+.Lenc_done:
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm2,%xmm10
+ psrlq $1,%xmm2
+ pxor %xmm4,%xmm1
+ pxor %xmm6,%xmm2
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm2
+ pxor %xmm1,%xmm4
+ psllq $1,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $1,%xmm2
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm2
+ movdqa %xmm3,%xmm9
+ psrlq $1,%xmm3
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm5,%xmm3
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm3
+ pand %xmm7,%xmm15
+ pxor %xmm3,%xmm5
+ psllq $1,%xmm3
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm3
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm6,%xmm9
+ psrlq $2,%xmm6
+ movdqa %xmm2,%xmm10
+ psrlq $2,%xmm2
+ pxor %xmm4,%xmm6
+ pxor %xmm1,%xmm2
+ pand %xmm8,%xmm6
+ pand %xmm8,%xmm2
+ pxor %xmm6,%xmm4
+ psllq $2,%xmm6
+ pxor %xmm2,%xmm1
+ psllq $2,%xmm2
+ pxor %xmm9,%xmm6
+ pxor %xmm10,%xmm2
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm5,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm5
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm5,%xmm9
+ psrlq $4,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $4,%xmm3
+ pxor %xmm4,%xmm5
+ pxor %xmm1,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm4
+ psllq $4,%xmm5
+ pxor %xmm3,%xmm1
+ psllq $4,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm6,%xmm0
+ pxor %xmm2,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm6
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm2
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa (%rax),%xmm7
+ pxor %xmm7,%xmm3
+ pxor %xmm7,%xmm5
+ pxor %xmm7,%xmm2
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm1
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm15
+ pxor %xmm7,%xmm0
+ retq
+.size _bsaes_encrypt8,.-_bsaes_encrypt8
+
+.type _bsaes_decrypt8,@function
+.align 64
+_bsaes_decrypt8:
+ leaq .LBS0(%rip),%r11
+
+ movdqa (%rax),%xmm8
+ leaq 16(%rax),%rax
+ movdqa -48(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+.byte 102,15,56,0,247
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $1,%xmm3
+ pxor %xmm6,%xmm5
+ pxor %xmm4,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm6
+ psllq $1,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $1,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm15
+ pxor %xmm1,%xmm2
+ psllq $1,%xmm1
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm4,%xmm9
+ psrlq $2,%xmm4
+ movdqa %xmm3,%xmm10
+ psrlq $2,%xmm3
+ pxor %xmm6,%xmm4
+ pxor %xmm5,%xmm3
+ pand %xmm8,%xmm4
+ pand %xmm8,%xmm3
+ pxor %xmm4,%xmm6
+ psllq $2,%xmm4
+ pxor %xmm3,%xmm5
+ psllq $2,%xmm3
+ pxor %xmm9,%xmm4
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm2,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm2
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm2,%xmm9
+ psrlq $4,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $4,%xmm1
+ pxor %xmm6,%xmm2
+ pxor %xmm5,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $4,%xmm2
+ pxor %xmm1,%xmm5
+ psllq $4,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm4
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ decl %r10d
+ jmp .Ldec_sbox
+.align 16
+.Ldec_loop:
+ pxor 0(%rax),%xmm15
+ pxor 16(%rax),%xmm0
+.byte 102,68,15,56,0,255
+ pxor 32(%rax),%xmm1
+.byte 102,15,56,0,199
+ pxor 48(%rax),%xmm2
+.byte 102,15,56,0,207
+ pxor 64(%rax),%xmm3
+.byte 102,15,56,0,215
+ pxor 80(%rax),%xmm4
+.byte 102,15,56,0,223
+ pxor 96(%rax),%xmm5
+.byte 102,15,56,0,231
+ pxor 112(%rax),%xmm6
+.byte 102,15,56,0,239
+ leaq 128(%rax),%rax
+.byte 102,15,56,0,247
+.Ldec_sbox:
+ pxor %xmm3,%xmm2
+
+ pxor %xmm6,%xmm3
+ pxor %xmm6,%xmm1
+ pxor %xmm3,%xmm5
+ pxor %xmm5,%xmm6
+ pxor %xmm6,%xmm0
+
+ pxor %xmm0,%xmm15
+ pxor %xmm4,%xmm1
+ pxor %xmm15,%xmm2
+ pxor %xmm15,%xmm4
+ pxor %xmm2,%xmm0
+ movdqa %xmm2,%xmm10
+ movdqa %xmm6,%xmm9
+ movdqa %xmm0,%xmm8
+ movdqa %xmm3,%xmm12
+ movdqa %xmm4,%xmm11
+
+ pxor %xmm15,%xmm10
+ pxor %xmm3,%xmm9
+ pxor %xmm5,%xmm8
+ movdqa %xmm10,%xmm13
+ pxor %xmm15,%xmm12
+ movdqa %xmm9,%xmm7
+ pxor %xmm1,%xmm11
+ movdqa %xmm10,%xmm14
+
+ por %xmm8,%xmm9
+ por %xmm11,%xmm10
+ pxor %xmm7,%xmm14
+ pand %xmm11,%xmm13
+ pxor %xmm8,%xmm11
+ pand %xmm8,%xmm7
+ pand %xmm11,%xmm14
+ movdqa %xmm5,%xmm11
+ pxor %xmm1,%xmm11
+ pand %xmm11,%xmm12
+ pxor %xmm12,%xmm10
+ pxor %xmm12,%xmm9
+ movdqa %xmm2,%xmm12
+ movdqa %xmm0,%xmm11
+ pxor %xmm6,%xmm12
+ pxor %xmm4,%xmm11
+ movdqa %xmm12,%xmm8
+ pand %xmm11,%xmm12
+ por %xmm11,%xmm8
+ pxor %xmm12,%xmm7
+ pxor %xmm14,%xmm10
+ pxor %xmm13,%xmm9
+ pxor %xmm14,%xmm8
+ movdqa %xmm3,%xmm11
+ pxor %xmm13,%xmm7
+ movdqa %xmm15,%xmm12
+ pxor %xmm13,%xmm8
+ movdqa %xmm6,%xmm13
+ pand %xmm5,%xmm11
+ movdqa %xmm2,%xmm14
+ pand %xmm1,%xmm12
+ pand %xmm0,%xmm13
+ por %xmm4,%xmm14
+ pxor %xmm11,%xmm10
+ pxor %xmm12,%xmm9
+ pxor %xmm13,%xmm8
+ pxor %xmm14,%xmm7
+
+
+
+
+
+ movdqa %xmm10,%xmm11
+ pand %xmm8,%xmm10
+ pxor %xmm9,%xmm11
+
+ movdqa %xmm7,%xmm13
+ movdqa %xmm11,%xmm14
+ pxor %xmm10,%xmm13
+ pand %xmm13,%xmm14
+
+ movdqa %xmm8,%xmm12
+ pxor %xmm9,%xmm14
+ pxor %xmm7,%xmm12
+
+ pxor %xmm9,%xmm10
+
+ pand %xmm10,%xmm12
+
+ movdqa %xmm13,%xmm9
+ pxor %xmm7,%xmm12
+
+ pxor %xmm12,%xmm9
+ pxor %xmm12,%xmm8
+
+ pand %xmm7,%xmm9
+
+ pxor %xmm9,%xmm13
+ pxor %xmm9,%xmm8
+
+ pand %xmm14,%xmm13
+
+ pxor %xmm11,%xmm13
+ movdqa %xmm4,%xmm11
+ movdqa %xmm0,%xmm7
+ movdqa %xmm14,%xmm9
+ pxor %xmm13,%xmm9
+ pand %xmm4,%xmm9
+ pxor %xmm0,%xmm4
+ pand %xmm14,%xmm0
+ pand %xmm13,%xmm4
+ pxor %xmm0,%xmm4
+ pxor %xmm9,%xmm0
+ pxor %xmm1,%xmm11
+ pxor %xmm5,%xmm7
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm1,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm5,%xmm1
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm5
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm1
+ pxor %xmm11,%xmm7
+ pxor %xmm5,%xmm1
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm5
+ pxor %xmm11,%xmm4
+ pxor %xmm11,%xmm1
+ pxor %xmm7,%xmm0
+ pxor %xmm7,%xmm5
+
+ movdqa %xmm2,%xmm11
+ movdqa %xmm6,%xmm7
+ pxor %xmm15,%xmm11
+ pxor %xmm3,%xmm7
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm15,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm3,%xmm15
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm3
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm15
+ pxor %xmm11,%xmm7
+ pxor %xmm3,%xmm15
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm3
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ pxor %xmm13,%xmm10
+ pand %xmm2,%xmm10
+ pxor %xmm6,%xmm2
+ pand %xmm14,%xmm6
+ pand %xmm13,%xmm2
+ pxor %xmm6,%xmm2
+ pxor %xmm10,%xmm6
+ pxor %xmm11,%xmm2
+ pxor %xmm11,%xmm15
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm3
+ pxor %xmm6,%xmm0
+ pxor %xmm4,%xmm5
+
+ pxor %xmm0,%xmm3
+ pxor %xmm6,%xmm1
+ pxor %xmm6,%xmm4
+ pxor %xmm1,%xmm3
+ pxor %xmm15,%xmm6
+ pxor %xmm4,%xmm3
+ pxor %xmm5,%xmm2
+ pxor %xmm0,%xmm5
+ pxor %xmm3,%xmm2
+
+ pxor %xmm15,%xmm3
+ pxor %xmm2,%xmm6
+ decl %r10d
+ jl .Ldec_done
+
+ pshufd $78,%xmm15,%xmm7
+ pshufd $78,%xmm2,%xmm13
+ pxor %xmm15,%xmm7
+ pshufd $78,%xmm4,%xmm14
+ pxor %xmm2,%xmm13
+ pshufd $78,%xmm0,%xmm8
+ pxor %xmm4,%xmm14
+ pshufd $78,%xmm5,%xmm9
+ pxor %xmm0,%xmm8
+ pshufd $78,%xmm3,%xmm10
+ pxor %xmm5,%xmm9
+ pxor %xmm13,%xmm15
+ pxor %xmm13,%xmm0
+ pshufd $78,%xmm1,%xmm11
+ pxor %xmm3,%xmm10
+ pxor %xmm7,%xmm5
+ pxor %xmm8,%xmm3
+ pshufd $78,%xmm6,%xmm12
+ pxor %xmm1,%xmm11
+ pxor %xmm14,%xmm0
+ pxor %xmm9,%xmm1
+ pxor %xmm6,%xmm12
+
+ pxor %xmm14,%xmm5
+ pxor %xmm13,%xmm3
+ pxor %xmm13,%xmm1
+ pxor %xmm10,%xmm6
+ pxor %xmm11,%xmm2
+ pxor %xmm14,%xmm1
+ pxor %xmm14,%xmm6
+ pxor %xmm12,%xmm4
+ pshufd $147,%xmm15,%xmm7
+ pshufd $147,%xmm0,%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $147,%xmm5,%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $147,%xmm3,%xmm10
+ pxor %xmm9,%xmm5
+ pshufd $147,%xmm1,%xmm11
+ pxor %xmm10,%xmm3
+ pshufd $147,%xmm6,%xmm12
+ pxor %xmm11,%xmm1
+ pshufd $147,%xmm2,%xmm13
+ pxor %xmm12,%xmm6
+ pshufd $147,%xmm4,%xmm14
+ pxor %xmm13,%xmm2
+ pxor %xmm14,%xmm4
+
+ pxor %xmm15,%xmm8
+ pxor %xmm4,%xmm7
+ pxor %xmm4,%xmm8
+ pshufd $78,%xmm15,%xmm15
+ pxor %xmm0,%xmm9
+ pshufd $78,%xmm0,%xmm0
+ pxor %xmm1,%xmm12
+ pxor %xmm7,%xmm15
+ pxor %xmm6,%xmm13
+ pxor %xmm8,%xmm0
+ pxor %xmm3,%xmm11
+ pshufd $78,%xmm1,%xmm7
+ pxor %xmm2,%xmm14
+ pshufd $78,%xmm6,%xmm8
+ pxor %xmm5,%xmm10
+ pshufd $78,%xmm3,%xmm1
+ pxor %xmm4,%xmm10
+ pshufd $78,%xmm4,%xmm6
+ pxor %xmm4,%xmm11
+ pshufd $78,%xmm2,%xmm3
+ pxor %xmm11,%xmm7
+ pshufd $78,%xmm5,%xmm2
+ pxor %xmm12,%xmm8
+ pxor %xmm1,%xmm10
+ pxor %xmm14,%xmm6
+ pxor %xmm3,%xmm13
+ movdqa %xmm7,%xmm3
+ pxor %xmm9,%xmm2
+ movdqa %xmm13,%xmm5
+ movdqa %xmm8,%xmm4
+ movdqa %xmm2,%xmm1
+ movdqa %xmm10,%xmm2
+ movdqa -16(%r11),%xmm7
+ jnz .Ldec_loop
+ movdqa -32(%r11),%xmm7
+ jmp .Ldec_loop
+.align 16
+.Ldec_done:
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm2,%xmm9
+ psrlq $1,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $1,%xmm1
+ pxor %xmm4,%xmm2
+ pxor %xmm6,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm4
+ psllq $1,%xmm2
+ pxor %xmm1,%xmm6
+ psllq $1,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm3,%xmm5
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm15
+ pxor %xmm5,%xmm3
+ psllq $1,%xmm5
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm6,%xmm9
+ psrlq $2,%xmm6
+ movdqa %xmm1,%xmm10
+ psrlq $2,%xmm1
+ pxor %xmm4,%xmm6
+ pxor %xmm2,%xmm1
+ pand %xmm8,%xmm6
+ pand %xmm8,%xmm1
+ pxor %xmm6,%xmm4
+ psllq $2,%xmm6
+ pxor %xmm1,%xmm2
+ psllq $2,%xmm1
+ pxor %xmm9,%xmm6
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm3,%xmm0
+ pxor %xmm5,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm3
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm5
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm3,%xmm9
+ psrlq $4,%xmm3
+ movdqa %xmm5,%xmm10
+ psrlq $4,%xmm5
+ pxor %xmm4,%xmm3
+ pxor %xmm2,%xmm5
+ pand %xmm7,%xmm3
+ pand %xmm7,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $4,%xmm3
+ pxor %xmm5,%xmm2
+ psllq $4,%xmm5
+ pxor %xmm9,%xmm3
+ pxor %xmm10,%xmm5
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm6,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm6
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa (%rax),%xmm7
+ pxor %xmm7,%xmm5
+ pxor %xmm7,%xmm3
+ pxor %xmm7,%xmm1
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm2
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm15
+ pxor %xmm7,%xmm0
+ retq
+.size _bsaes_decrypt8,.-_bsaes_decrypt8
+.type _bsaes_key_convert,@function
+.align 16
+_bsaes_key_convert:
+ leaq .Lmasks(%rip),%r11
+ movdqu (%rcx),%xmm7
+ leaq 16(%rcx),%rcx
+ movdqa 0(%r11),%xmm0
+ movdqa 16(%r11),%xmm1
+ movdqa 32(%r11),%xmm2
+ movdqa 48(%r11),%xmm3
+ movdqa 64(%r11),%xmm4
+ pcmpeqd %xmm5,%xmm5
+
+ movdqu (%rcx),%xmm6
+ movdqa %xmm7,(%rax)
+ leaq 16(%rax),%rax
+ decl %r10d
+ jmp .Lkey_loop
+.align 16
+.Lkey_loop:
+.byte 102,15,56,0,244
+
+ movdqa %xmm0,%xmm8
+ movdqa %xmm1,%xmm9
+
+ pand %xmm6,%xmm8
+ pand %xmm6,%xmm9
+ movdqa %xmm2,%xmm10
+ pcmpeqb %xmm0,%xmm8
+ psllq $4,%xmm0
+ movdqa %xmm3,%xmm11
+ pcmpeqb %xmm1,%xmm9
+ psllq $4,%xmm1
+
+ pand %xmm6,%xmm10
+ pand %xmm6,%xmm11
+ movdqa %xmm0,%xmm12
+ pcmpeqb %xmm2,%xmm10
+ psllq $4,%xmm2
+ movdqa %xmm1,%xmm13
+ pcmpeqb %xmm3,%xmm11
+ psllq $4,%xmm3
+
+ movdqa %xmm2,%xmm14
+ movdqa %xmm3,%xmm15
+ pxor %xmm5,%xmm8
+ pxor %xmm5,%xmm9
+
+ pand %xmm6,%xmm12
+ pand %xmm6,%xmm13
+ movdqa %xmm8,0(%rax)
+ pcmpeqb %xmm0,%xmm12
+ psrlq $4,%xmm0
+ movdqa %xmm9,16(%rax)
+ pcmpeqb %xmm1,%xmm13
+ psrlq $4,%xmm1
+ leaq 16(%rcx),%rcx
+
+ pand %xmm6,%xmm14
+ pand %xmm6,%xmm15
+ movdqa %xmm10,32(%rax)
+ pcmpeqb %xmm2,%xmm14
+ psrlq $4,%xmm2
+ movdqa %xmm11,48(%rax)
+ pcmpeqb %xmm3,%xmm15
+ psrlq $4,%xmm3
+ movdqu (%rcx),%xmm6
+
+ pxor %xmm5,%xmm13
+ pxor %xmm5,%xmm14
+ movdqa %xmm12,64(%rax)
+ movdqa %xmm13,80(%rax)
+ movdqa %xmm14,96(%rax)
+ movdqa %xmm15,112(%rax)
+ leaq 128(%rax),%rax
+ decl %r10d
+ jnz .Lkey_loop
+
+ movdqa 80(%r11),%xmm7
+
+ retq
+.size _bsaes_key_convert,.-_bsaes_key_convert
+
+.globl bsaes_cbc_encrypt
+.type bsaes_cbc_encrypt,@function
+.align 16
+bsaes_cbc_encrypt:
+ cmpl $0,%r9d
+ jne asm_AES_cbc_encrypt
+ cmpq $128,%rdx
+ jb asm_AES_cbc_encrypt
+
+ movq %rsp,%rax
+.Lcbc_dec_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movl 240(%rcx),%eax
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ movq %r8,%rbx
+ shrq $4,%r14
+
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor (%rsp),%xmm7
+ movdqa %xmm6,(%rax)
+ movdqa %xmm7,(%rsp)
+
+ movdqu (%rbx),%xmm14
+ subq $8,%r14
+.Lcbc_dec_loop:
+ movdqu 0(%r12),%xmm15
+ movdqu 16(%r12),%xmm0
+ movdqu 32(%r12),%xmm1
+ movdqu 48(%r12),%xmm2
+ movdqu 64(%r12),%xmm3
+ movdqu 80(%r12),%xmm4
+ movq %rsp,%rax
+ movdqu 96(%r12),%xmm5
+ movl %edx,%r10d
+ movdqu 112(%r12),%xmm6
+ movdqa %xmm14,32(%rbp)
+
+ call _bsaes_decrypt8
+
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm6
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm2
+ movdqu 112(%r12),%xmm14
+ pxor %xmm13,%xmm4
+ movdqu %xmm15,0(%r13)
+ leaq 128(%r12),%r12
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+ subq $8,%r14
+ jnc .Lcbc_dec_loop
+
+ addq $8,%r14
+ jz .Lcbc_dec_done
+
+ movdqu 0(%r12),%xmm15
+ movq %rsp,%rax
+ movl %edx,%r10d
+ cmpq $2,%r14
+ jb .Lcbc_dec_one
+ movdqu 16(%r12),%xmm0
+ je .Lcbc_dec_two
+ movdqu 32(%r12),%xmm1
+ cmpq $4,%r14
+ jb .Lcbc_dec_three
+ movdqu 48(%r12),%xmm2
+ je .Lcbc_dec_four
+ movdqu 64(%r12),%xmm3
+ cmpq $6,%r14
+ jb .Lcbc_dec_five
+ movdqu 80(%r12),%xmm4
+ je .Lcbc_dec_six
+ movdqu 96(%r12),%xmm5
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm6
+ movdqu 96(%r12),%xmm14
+ pxor %xmm12,%xmm2
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ jmp .Lcbc_dec_done
+.align 16
+.Lcbc_dec_six:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm14
+ pxor %xmm11,%xmm6
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ jmp .Lcbc_dec_done
+.align 16
+.Lcbc_dec_five:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm14
+ pxor %xmm10,%xmm1
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ jmp .Lcbc_dec_done
+.align 16
+.Lcbc_dec_four:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm14
+ pxor %xmm9,%xmm3
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ jmp .Lcbc_dec_done
+.align 16
+.Lcbc_dec_three:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm14
+ pxor %xmm8,%xmm5
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ jmp .Lcbc_dec_done
+.align 16
+.Lcbc_dec_two:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm14
+ pxor %xmm7,%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ jmp .Lcbc_dec_done
+.align 16
+.Lcbc_dec_one:
+ leaq (%r12),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r15),%rdx
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm14
+ movdqu %xmm14,(%r13)
+ movdqa %xmm15,%xmm14
+
+.Lcbc_dec_done:
+ movdqu %xmm14,(%rbx)
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+.Lcbc_dec_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lcbc_dec_bzero
+
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+.Lcbc_dec_epilogue:
+ retq
+.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
+
+.globl bsaes_ctr32_encrypt_blocks
+.type bsaes_ctr32_encrypt_blocks,@function
+.align 16
+bsaes_ctr32_encrypt_blocks:
+ movq %rsp,%rax
+.Lctr_enc_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movdqu (%r8),%xmm0
+ movl 240(%rcx),%eax
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ movdqa %xmm0,32(%rbp)
+ cmpq $8,%rdx
+ jb .Lctr_enc_short
+
+ movl %eax,%ebx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %ebx,%r10d
+ call _bsaes_key_convert
+ pxor %xmm6,%xmm7
+ movdqa %xmm7,(%rax)
+
+ movdqa (%rsp),%xmm8
+ leaq .LADD1(%rip),%r11
+ movdqa 32(%rbp),%xmm15
+ movdqa -32(%r11),%xmm7
+.byte 102,68,15,56,0,199
+.byte 102,68,15,56,0,255
+ movdqa %xmm8,(%rsp)
+ jmp .Lctr_enc_loop
+.align 16
+.Lctr_enc_loop:
+ movdqa %xmm15,32(%rbp)
+ movdqa %xmm15,%xmm0
+ movdqa %xmm15,%xmm1
+ paddd 0(%r11),%xmm0
+ movdqa %xmm15,%xmm2
+ paddd 16(%r11),%xmm1
+ movdqa %xmm15,%xmm3
+ paddd 32(%r11),%xmm2
+ movdqa %xmm15,%xmm4
+ paddd 48(%r11),%xmm3
+ movdqa %xmm15,%xmm5
+ paddd 64(%r11),%xmm4
+ movdqa %xmm15,%xmm6
+ paddd 80(%r11),%xmm5
+ paddd 96(%r11),%xmm6
+
+
+
+ movdqa (%rsp),%xmm8
+ leaq 16(%rsp),%rax
+ movdqa -16(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+ leaq .LBS0(%rip),%r11
+.byte 102,15,56,0,247
+ movl %ebx,%r10d
+
+ call _bsaes_encrypt8_bitslice
+
+ subq $8,%r14
+ jc .Lctr_enc_loop_done
+
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ movdqu 32(%r12),%xmm9
+ movdqu 48(%r12),%xmm10
+ movdqu 64(%r12),%xmm11
+ movdqu 80(%r12),%xmm12
+ movdqu 96(%r12),%xmm13
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ pxor %xmm15,%xmm7
+ movdqa 32(%rbp),%xmm15
+ pxor %xmm8,%xmm0
+ movdqu %xmm7,0(%r13)
+ pxor %xmm9,%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor %xmm10,%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor %xmm11,%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor %xmm12,%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor %xmm13,%xmm1
+ movdqu %xmm6,80(%r13)
+ pxor %xmm14,%xmm4
+ movdqu %xmm1,96(%r13)
+ leaq .LADD1(%rip),%r11
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+ paddd 112(%r11),%xmm15
+ jnz .Lctr_enc_loop
+
+ jmp .Lctr_enc_done
+.align 16
+.Lctr_enc_loop_done:
+ addq $8,%r14
+ movdqu 0(%r12),%xmm7
+ pxor %xmm7,%xmm15
+ movdqu %xmm15,0(%r13)
+ cmpq $2,%r14
+ jb .Lctr_enc_done
+ movdqu 16(%r12),%xmm8
+ pxor %xmm8,%xmm0
+ movdqu %xmm0,16(%r13)
+ je .Lctr_enc_done
+ movdqu 32(%r12),%xmm9
+ pxor %xmm9,%xmm3
+ movdqu %xmm3,32(%r13)
+ cmpq $4,%r14
+ jb .Lctr_enc_done
+ movdqu 48(%r12),%xmm10
+ pxor %xmm10,%xmm5
+ movdqu %xmm5,48(%r13)
+ je .Lctr_enc_done
+ movdqu 64(%r12),%xmm11
+ pxor %xmm11,%xmm2
+ movdqu %xmm2,64(%r13)
+ cmpq $6,%r14
+ jb .Lctr_enc_done
+ movdqu 80(%r12),%xmm12
+ pxor %xmm12,%xmm6
+ movdqu %xmm6,80(%r13)
+ je .Lctr_enc_done
+ movdqu 96(%r12),%xmm13
+ pxor %xmm13,%xmm1
+ movdqu %xmm1,96(%r13)
+ jmp .Lctr_enc_done
+
+.align 16
+.Lctr_enc_short:
+ leaq 32(%rbp),%rdi
+ leaq 48(%rbp),%rsi
+ leaq (%r15),%rdx
+ call asm_AES_encrypt
+ movdqu (%r12),%xmm0
+ leaq 16(%r12),%r12
+ movl 44(%rbp),%eax
+ bswapl %eax
+ pxor 48(%rbp),%xmm0
+ incl %eax
+ movdqu %xmm0,(%r13)
+ bswapl %eax
+ leaq 16(%r13),%r13
+ movl %eax,44(%rsp)
+ decq %r14
+ jnz .Lctr_enc_short
+
+.Lctr_enc_done:
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+.Lctr_enc_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lctr_enc_bzero
+
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+.Lctr_enc_epilogue:
+ retq
+.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
+.globl bsaes_xts_encrypt
+.type bsaes_xts_encrypt,@function
+.align 16
+bsaes_xts_encrypt:
+ movq %rsp,%rax
+.Lxts_enc_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+
+ leaq (%r9),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r8),%rdx
+ call asm_AES_encrypt
+
+ movl 240(%r15),%eax
+ movq %r14,%rbx
+
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor %xmm6,%xmm7
+ movdqa %xmm7,(%rax)
+
+ andq $-16,%r14
+ subq $128,%rsp
+ movdqa 32(%rbp),%xmm6
+
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+
+ subq $128,%r14
+ jc .Lxts_enc_short
+ jmp .Lxts_enc_loop
+
+.align 16
+.Lxts_enc_loop:
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ movdqa %xmm6,112(%rsp)
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ pxor %xmm14,%xmm6
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor 96(%rsp),%xmm1
+ movdqu %xmm6,80(%r13)
+ pxor 112(%rsp),%xmm4
+ movdqu %xmm1,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+
+ subq $128,%r14
+ jnc .Lxts_enc_loop
+
+.Lxts_enc_short:
+ addq $128,%r14
+ jz .Lxts_enc_done
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ cmpq $16,%r14
+ je .Lxts_enc_1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ cmpq $32,%r14
+ je .Lxts_enc_2
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ cmpq $48,%r14
+ je .Lxts_enc_3
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ cmpq $64,%r14
+ je .Lxts_enc_4
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ cmpq $80,%r14
+ je .Lxts_enc_5
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ cmpq $96,%r14
+ je .Lxts_enc_6
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqa %xmm6,112(%rsp)
+ leaq 112(%r12),%r12
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor 96(%rsp),%xmm1
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm1,96(%r13)
+ leaq 112(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.align 16
+.Lxts_enc_6:
+ pxor %xmm11,%xmm3
+ leaq 96(%r12),%r12
+ pxor %xmm12,%xmm4
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ movdqu %xmm6,80(%r13)
+ leaq 96(%r13),%r13
+
+ movdqa 96(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.align 16
+.Lxts_enc_5:
+ pxor %xmm10,%xmm2
+ leaq 80(%r12),%r12
+ pxor %xmm11,%xmm3
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ movdqu %xmm2,64(%r13)
+ leaq 80(%r13),%r13
+
+ movdqa 80(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.align 16
+.Lxts_enc_4:
+ pxor %xmm9,%xmm1
+ leaq 64(%r12),%r12
+ pxor %xmm10,%xmm2
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ movdqu %xmm5,48(%r13)
+ leaq 64(%r13),%r13
+
+ movdqa 64(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.align 16
+.Lxts_enc_3:
+ pxor %xmm8,%xmm0
+ leaq 48(%r12),%r12
+ pxor %xmm9,%xmm1
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm3,32(%r13)
+ leaq 48(%r13),%r13
+
+ movdqa 48(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.align 16
+.Lxts_enc_2:
+ pxor %xmm7,%xmm15
+ leaq 32(%r12),%r12
+ pxor %xmm8,%xmm0
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ leaq 32(%r13),%r13
+
+ movdqa 32(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.align 16
+.Lxts_enc_1:
+ pxor %xmm15,%xmm7
+ leaq 16(%r12),%r12
+ movdqa %xmm7,32(%rbp)
+ leaq 32(%rbp),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r15),%rdx
+ call asm_AES_encrypt
+ pxor 32(%rbp),%xmm15
+
+
+
+
+
+ movdqu %xmm15,0(%r13)
+ leaq 16(%r13),%r13
+
+ movdqa 16(%rsp),%xmm6
+
+.Lxts_enc_done:
+ andl $15,%ebx
+ jz .Lxts_enc_ret
+ movq %r13,%rdx
+
+.Lxts_enc_steal:
+ movzbl (%r12),%eax
+ movzbl -16(%rdx),%ecx
+ leaq 1(%r12),%r12
+ movb %al,-16(%rdx)
+ movb %cl,0(%rdx)
+ leaq 1(%rdx),%rdx
+ subl $1,%ebx
+ jnz .Lxts_enc_steal
+
+ movdqu -16(%r13),%xmm15
+ leaq 32(%rbp),%rdi
+ pxor %xmm6,%xmm15
+ leaq 32(%rbp),%rsi
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%rdx
+ call asm_AES_encrypt
+ pxor 32(%rbp),%xmm6
+ movdqu %xmm6,-16(%r13)
+
+.Lxts_enc_ret:
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+.Lxts_enc_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lxts_enc_bzero
+
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+.Lxts_enc_epilogue:
+ retq
+.size bsaes_xts_encrypt,.-bsaes_xts_encrypt
+
+.globl bsaes_xts_decrypt
+.type bsaes_xts_decrypt,@function
+.align 16
+bsaes_xts_decrypt:
+ movq %rsp,%rax
+.Lxts_dec_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+
+ leaq (%r9),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r8),%rdx
+ call asm_AES_encrypt
+
+ movl 240(%r15),%eax
+ movq %r14,%rbx
+
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor (%rsp),%xmm7
+ movdqa %xmm6,(%rax)
+ movdqa %xmm7,(%rsp)
+
+ xorl %eax,%eax
+ andq $-16,%r14
+ testl $15,%ebx
+ setnz %al
+ shlq $4,%rax
+ subq %rax,%r14
+
+ subq $128,%rsp
+ movdqa 32(%rbp),%xmm6
+
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+
+ subq $128,%r14
+ jc .Lxts_dec_short
+ jmp .Lxts_dec_loop
+
+.align 16
+.Lxts_dec_loop:
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ movdqa %xmm6,112(%rsp)
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ pxor %xmm14,%xmm6
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ pxor 96(%rsp),%xmm2
+ movdqu %xmm6,80(%r13)
+ pxor 112(%rsp),%xmm4
+ movdqu %xmm2,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+
+ subq $128,%r14
+ jnc .Lxts_dec_loop
+
+.Lxts_dec_short:
+ addq $128,%r14
+ jz .Lxts_dec_done
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ cmpq $16,%r14
+ je .Lxts_dec_1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ cmpq $32,%r14
+ je .Lxts_dec_2
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ cmpq $48,%r14
+ je .Lxts_dec_3
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ cmpq $64,%r14
+ je .Lxts_dec_4
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ cmpq $80,%r14
+ je .Lxts_dec_5
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ cmpq $96,%r14
+ je .Lxts_dec_6
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqa %xmm6,112(%rsp)
+ leaq 112(%r12),%r12
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ pxor 96(%rsp),%xmm2
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ leaq 112(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.align 16
+.Lxts_dec_6:
+ pxor %xmm11,%xmm3
+ leaq 96(%r12),%r12
+ pxor %xmm12,%xmm4
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ leaq 96(%r13),%r13
+
+ movdqa 96(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.align 16
+.Lxts_dec_5:
+ pxor %xmm10,%xmm2
+ leaq 80(%r12),%r12
+ pxor %xmm11,%xmm3
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ leaq 80(%r13),%r13
+
+ movdqa 80(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.align 16
+.Lxts_dec_4:
+ pxor %xmm9,%xmm1
+ leaq 64(%r12),%r12
+ pxor %xmm10,%xmm2
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ leaq 64(%r13),%r13
+
+ movdqa 64(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.align 16
+.Lxts_dec_3:
+ pxor %xmm8,%xmm0
+ leaq 48(%r12),%r12
+ pxor %xmm9,%xmm1
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ leaq 48(%r13),%r13
+
+ movdqa 48(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.align 16
+.Lxts_dec_2:
+ pxor %xmm7,%xmm15
+ leaq 32(%r12),%r12
+ pxor %xmm8,%xmm0
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ leaq 32(%r13),%r13
+
+ movdqa 32(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.align 16
+.Lxts_dec_1:
+ pxor %xmm15,%xmm7
+ leaq 16(%r12),%r12
+ movdqa %xmm7,32(%rbp)
+ leaq 32(%rbp),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r15),%rdx
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm15
+
+
+
+
+
+ movdqu %xmm15,0(%r13)
+ leaq 16(%r13),%r13
+
+ movdqa 16(%rsp),%xmm6
+
+.Lxts_dec_done:
+ andl $15,%ebx
+ jz .Lxts_dec_ret
+
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ movdqa %xmm6,%xmm5
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ movdqu (%r12),%xmm15
+ pxor %xmm13,%xmm6
+
+ leaq 32(%rbp),%rdi
+ pxor %xmm6,%xmm15
+ leaq 32(%rbp),%rsi
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%rdx
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm6
+ movq %r13,%rdx
+ movdqu %xmm6,(%r13)
+
+.Lxts_dec_steal:
+ movzbl 16(%r12),%eax
+ movzbl (%rdx),%ecx
+ leaq 1(%r12),%r12
+ movb %al,(%rdx)
+ movb %cl,16(%rdx)
+ leaq 1(%rdx),%rdx
+ subl $1,%ebx
+ jnz .Lxts_dec_steal
+
+ movdqu (%r13),%xmm15
+ leaq 32(%rbp),%rdi
+ pxor %xmm5,%xmm15
+ leaq 32(%rbp),%rsi
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%rdx
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm5
+ movdqu %xmm5,(%r13)
+
+.Lxts_dec_ret:
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+.Lxts_dec_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lxts_dec_bzero
+
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+.Lxts_dec_epilogue:
+ retq
+.size bsaes_xts_decrypt,.-bsaes_xts_decrypt
+.type _bsaes_const,@object
+.align 64
+_bsaes_const:
+.LM0ISR:
+.quad 0x0a0e0206070b0f03, 0x0004080c0d010509
+.LISRM0:
+.quad 0x01040b0e0205080f, 0x0306090c00070a0d
+.LISR:
+.quad 0x0504070602010003, 0x0f0e0d0c080b0a09
+.LBS0:
+.quad 0x5555555555555555, 0x5555555555555555
+.LBS1:
+.quad 0x3333333333333333, 0x3333333333333333
+.LBS2:
+.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+.LSR:
+.quad 0x0504070600030201, 0x0f0e0d0c0a09080b
+.LSRM0:
+.quad 0x0304090e00050a0f, 0x01060b0c0207080d
+.LM0SR:
+.quad 0x0a0e02060f03070b, 0x0004080c05090d01
+.LSWPUP:
+.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908
+.LSWPUPM0SR:
+.quad 0x0a0d02060c03070b, 0x0004080f05090e01
+.LADD1:
+.quad 0x0000000000000000, 0x0000000100000000
+.LADD2:
+.quad 0x0000000000000000, 0x0000000200000000
+.LADD3:
+.quad 0x0000000000000000, 0x0000000300000000
+.LADD4:
+.quad 0x0000000000000000, 0x0000000400000000
+.LADD5:
+.quad 0x0000000000000000, 0x0000000500000000
+.LADD6:
+.quad 0x0000000000000000, 0x0000000600000000
+.LADD7:
+.quad 0x0000000000000000, 0x0000000700000000
+.LADD8:
+.quad 0x0000000000000000, 0x0000000800000000
+.Lxts_magic:
+.long 0x87,0,1,0
+.Lmasks:
+.quad 0x0101010101010101, 0x0101010101010101
+.quad 0x0202020202020202, 0x0202020202020202
+.quad 0x0404040404040404, 0x0404040404040404
+.quad 0x0808080808080808, 0x0808080808080808
+.LM0:
+.quad 0x02060a0e03070b0f, 0x0004080c0105090d
+.L63:
+.quad 0x6363636363636363, 0x6363636363636363
+.byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44,32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32,65,110,100,121,32,80,111,108,121,97,107,111,118,0
+.align 64
+.size _bsaes_const,.-_bsaes_const
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/ext/libressl/crypto/aes/bsaes-macosx-x86_64.S b/ext/libressl/crypto/aes/bsaes-macosx-x86_64.S
new file mode 100644
index 0000000..5f780f0
--- /dev/null
+++ b/ext/libressl/crypto/aes/bsaes-macosx-x86_64.S
@@ -0,0 +1,2499 @@
+#include "x86_arch.h"
+.text
+
+
+
+
+
+.p2align 6
+_bsaes_encrypt8:
+ leaq L$BS0(%rip),%r11
+
+ movdqa (%rax),%xmm8
+ leaq 16(%rax),%rax
+ movdqa 80(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+.byte 102,15,56,0,247
+_bsaes_encrypt8_bitslice:
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $1,%xmm3
+ pxor %xmm6,%xmm5
+ pxor %xmm4,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm6
+ psllq $1,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $1,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm15
+ pxor %xmm1,%xmm2
+ psllq $1,%xmm1
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm4,%xmm9
+ psrlq $2,%xmm4
+ movdqa %xmm3,%xmm10
+ psrlq $2,%xmm3
+ pxor %xmm6,%xmm4
+ pxor %xmm5,%xmm3
+ pand %xmm8,%xmm4
+ pand %xmm8,%xmm3
+ pxor %xmm4,%xmm6
+ psllq $2,%xmm4
+ pxor %xmm3,%xmm5
+ psllq $2,%xmm3
+ pxor %xmm9,%xmm4
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm2,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm2
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm2,%xmm9
+ psrlq $4,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $4,%xmm1
+ pxor %xmm6,%xmm2
+ pxor %xmm5,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $4,%xmm2
+ pxor %xmm1,%xmm5
+ psllq $4,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm4
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ decl %r10d
+ jmp L$enc_sbox
+.p2align 4
+L$enc_loop:
+ pxor 0(%rax),%xmm15
+ pxor 16(%rax),%xmm0
+.byte 102,68,15,56,0,255
+ pxor 32(%rax),%xmm1
+.byte 102,15,56,0,199
+ pxor 48(%rax),%xmm2
+.byte 102,15,56,0,207
+ pxor 64(%rax),%xmm3
+.byte 102,15,56,0,215
+ pxor 80(%rax),%xmm4
+.byte 102,15,56,0,223
+ pxor 96(%rax),%xmm5
+.byte 102,15,56,0,231
+ pxor 112(%rax),%xmm6
+.byte 102,15,56,0,239
+ leaq 128(%rax),%rax
+.byte 102,15,56,0,247
+L$enc_sbox:
+ pxor %xmm5,%xmm4
+ pxor %xmm0,%xmm1
+ pxor %xmm15,%xmm2
+ pxor %xmm1,%xmm5
+ pxor %xmm15,%xmm4
+
+ pxor %xmm2,%xmm5
+ pxor %xmm6,%xmm2
+ pxor %xmm4,%xmm6
+ pxor %xmm3,%xmm2
+ pxor %xmm4,%xmm3
+ pxor %xmm0,%xmm2
+
+ pxor %xmm6,%xmm1
+ pxor %xmm4,%xmm0
+ movdqa %xmm6,%xmm10
+ movdqa %xmm0,%xmm9
+ movdqa %xmm4,%xmm8
+ movdqa %xmm1,%xmm12
+ movdqa %xmm5,%xmm11
+
+ pxor %xmm3,%xmm10
+ pxor %xmm1,%xmm9
+ pxor %xmm2,%xmm8
+ movdqa %xmm10,%xmm13
+ pxor %xmm3,%xmm12
+ movdqa %xmm9,%xmm7
+ pxor %xmm15,%xmm11
+ movdqa %xmm10,%xmm14
+
+ por %xmm8,%xmm9
+ por %xmm11,%xmm10
+ pxor %xmm7,%xmm14
+ pand %xmm11,%xmm13
+ pxor %xmm8,%xmm11
+ pand %xmm8,%xmm7
+ pand %xmm11,%xmm14
+ movdqa %xmm2,%xmm11
+ pxor %xmm15,%xmm11
+ pand %xmm11,%xmm12
+ pxor %xmm12,%xmm10
+ pxor %xmm12,%xmm9
+ movdqa %xmm6,%xmm12
+ movdqa %xmm4,%xmm11
+ pxor %xmm0,%xmm12
+ pxor %xmm5,%xmm11
+ movdqa %xmm12,%xmm8
+ pand %xmm11,%xmm12
+ por %xmm11,%xmm8
+ pxor %xmm12,%xmm7
+ pxor %xmm14,%xmm10
+ pxor %xmm13,%xmm9
+ pxor %xmm14,%xmm8
+ movdqa %xmm1,%xmm11
+ pxor %xmm13,%xmm7
+ movdqa %xmm3,%xmm12
+ pxor %xmm13,%xmm8
+ movdqa %xmm0,%xmm13
+ pand %xmm2,%xmm11
+ movdqa %xmm6,%xmm14
+ pand %xmm15,%xmm12
+ pand %xmm4,%xmm13
+ por %xmm5,%xmm14
+ pxor %xmm11,%xmm10
+ pxor %xmm12,%xmm9
+ pxor %xmm13,%xmm8
+ pxor %xmm14,%xmm7
+
+
+
+
+
+ movdqa %xmm10,%xmm11
+ pand %xmm8,%xmm10
+ pxor %xmm9,%xmm11
+
+ movdqa %xmm7,%xmm13
+ movdqa %xmm11,%xmm14
+ pxor %xmm10,%xmm13
+ pand %xmm13,%xmm14
+
+ movdqa %xmm8,%xmm12
+ pxor %xmm9,%xmm14
+ pxor %xmm7,%xmm12
+
+ pxor %xmm9,%xmm10
+
+ pand %xmm10,%xmm12
+
+ movdqa %xmm13,%xmm9
+ pxor %xmm7,%xmm12
+
+ pxor %xmm12,%xmm9
+ pxor %xmm12,%xmm8
+
+ pand %xmm7,%xmm9
+
+ pxor %xmm9,%xmm13
+ pxor %xmm9,%xmm8
+
+ pand %xmm14,%xmm13
+
+ pxor %xmm11,%xmm13
+ movdqa %xmm5,%xmm11
+ movdqa %xmm4,%xmm7
+ movdqa %xmm14,%xmm9
+ pxor %xmm13,%xmm9
+ pand %xmm5,%xmm9
+ pxor %xmm4,%xmm5
+ pand %xmm14,%xmm4
+ pand %xmm13,%xmm5
+ pxor %xmm4,%xmm5
+ pxor %xmm9,%xmm4
+ pxor %xmm15,%xmm11
+ pxor %xmm2,%xmm7
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm15,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm2,%xmm15
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm2
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm15
+ pxor %xmm11,%xmm7
+ pxor %xmm2,%xmm15
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm2
+ pxor %xmm11,%xmm5
+ pxor %xmm11,%xmm15
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm2
+
+ movdqa %xmm6,%xmm11
+ movdqa %xmm0,%xmm7
+ pxor %xmm3,%xmm11
+ pxor %xmm1,%xmm7
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm3,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm1,%xmm3
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm1
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm3
+ pxor %xmm11,%xmm7
+ pxor %xmm1,%xmm3
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm1
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ pxor %xmm13,%xmm10
+ pand %xmm6,%xmm10
+ pxor %xmm0,%xmm6
+ pand %xmm14,%xmm0
+ pand %xmm13,%xmm6
+ pxor %xmm0,%xmm6
+ pxor %xmm10,%xmm0
+ pxor %xmm11,%xmm6
+ pxor %xmm11,%xmm3
+ pxor %xmm7,%xmm0
+ pxor %xmm7,%xmm1
+ pxor %xmm15,%xmm6
+ pxor %xmm5,%xmm0
+ pxor %xmm6,%xmm3
+ pxor %xmm15,%xmm5
+ pxor %xmm0,%xmm15
+
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ pxor %xmm2,%xmm1
+ pxor %xmm4,%xmm2
+ pxor %xmm4,%xmm3
+
+ pxor %xmm2,%xmm5
+ decl %r10d
+ jl L$enc_done
+ pshufd $147,%xmm15,%xmm7
+ pshufd $147,%xmm0,%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $147,%xmm3,%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $147,%xmm5,%xmm10
+ pxor %xmm9,%xmm3
+ pshufd $147,%xmm2,%xmm11
+ pxor %xmm10,%xmm5
+ pshufd $147,%xmm6,%xmm12
+ pxor %xmm11,%xmm2
+ pshufd $147,%xmm1,%xmm13
+ pxor %xmm12,%xmm6
+ pshufd $147,%xmm4,%xmm14
+ pxor %xmm13,%xmm1
+ pxor %xmm14,%xmm4
+
+ pxor %xmm15,%xmm8
+ pxor %xmm4,%xmm7
+ pxor %xmm4,%xmm8
+ pshufd $78,%xmm15,%xmm15
+ pxor %xmm0,%xmm9
+ pshufd $78,%xmm0,%xmm0
+ pxor %xmm2,%xmm12
+ pxor %xmm7,%xmm15
+ pxor %xmm6,%xmm13
+ pxor %xmm8,%xmm0
+ pxor %xmm5,%xmm11
+ pshufd $78,%xmm2,%xmm7
+ pxor %xmm1,%xmm14
+ pshufd $78,%xmm6,%xmm8
+ pxor %xmm3,%xmm10
+ pshufd $78,%xmm5,%xmm2
+ pxor %xmm4,%xmm10
+ pshufd $78,%xmm4,%xmm6
+ pxor %xmm4,%xmm11
+ pshufd $78,%xmm1,%xmm5
+ pxor %xmm11,%xmm7
+ pshufd $78,%xmm3,%xmm1
+ pxor %xmm12,%xmm8
+ pxor %xmm10,%xmm2
+ pxor %xmm14,%xmm6
+ pxor %xmm13,%xmm5
+ movdqa %xmm7,%xmm3
+ pxor %xmm9,%xmm1
+ movdqa %xmm8,%xmm4
+ movdqa 48(%r11),%xmm7
+ jnz L$enc_loop
+ movdqa 64(%r11),%xmm7
+ jmp L$enc_loop
+.p2align 4
+L$enc_done:
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm2,%xmm10
+ psrlq $1,%xmm2
+ pxor %xmm4,%xmm1
+ pxor %xmm6,%xmm2
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm2
+ pxor %xmm1,%xmm4
+ psllq $1,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $1,%xmm2
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm2
+ movdqa %xmm3,%xmm9
+ psrlq $1,%xmm3
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm5,%xmm3
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm3
+ pand %xmm7,%xmm15
+ pxor %xmm3,%xmm5
+ psllq $1,%xmm3
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm3
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm6,%xmm9
+ psrlq $2,%xmm6
+ movdqa %xmm2,%xmm10
+ psrlq $2,%xmm2
+ pxor %xmm4,%xmm6
+ pxor %xmm1,%xmm2
+ pand %xmm8,%xmm6
+ pand %xmm8,%xmm2
+ pxor %xmm6,%xmm4
+ psllq $2,%xmm6
+ pxor %xmm2,%xmm1
+ psllq $2,%xmm2
+ pxor %xmm9,%xmm6
+ pxor %xmm10,%xmm2
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm5,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm5
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm5,%xmm9
+ psrlq $4,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $4,%xmm3
+ pxor %xmm4,%xmm5
+ pxor %xmm1,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm4
+ psllq $4,%xmm5
+ pxor %xmm3,%xmm1
+ psllq $4,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm6,%xmm0
+ pxor %xmm2,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm6
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm2
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa (%rax),%xmm7
+ pxor %xmm7,%xmm3
+ pxor %xmm7,%xmm5
+ pxor %xmm7,%xmm2
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm1
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm15
+ pxor %xmm7,%xmm0
+ retq
+
+
+
+.p2align 6
+_bsaes_decrypt8:
+ leaq L$BS0(%rip),%r11
+
+ movdqa (%rax),%xmm8
+ leaq 16(%rax),%rax
+ movdqa -48(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+.byte 102,15,56,0,247
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $1,%xmm3
+ pxor %xmm6,%xmm5
+ pxor %xmm4,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm6
+ psllq $1,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $1,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm15
+ pxor %xmm1,%xmm2
+ psllq $1,%xmm1
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm4,%xmm9
+ psrlq $2,%xmm4
+ movdqa %xmm3,%xmm10
+ psrlq $2,%xmm3
+ pxor %xmm6,%xmm4
+ pxor %xmm5,%xmm3
+ pand %xmm8,%xmm4
+ pand %xmm8,%xmm3
+ pxor %xmm4,%xmm6
+ psllq $2,%xmm4
+ pxor %xmm3,%xmm5
+ psllq $2,%xmm3
+ pxor %xmm9,%xmm4
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm2,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm2
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm2,%xmm9
+ psrlq $4,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $4,%xmm1
+ pxor %xmm6,%xmm2
+ pxor %xmm5,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $4,%xmm2
+ pxor %xmm1,%xmm5
+ psllq $4,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm4
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ decl %r10d
+ jmp L$dec_sbox
+.p2align 4
+L$dec_loop:
+ pxor 0(%rax),%xmm15
+ pxor 16(%rax),%xmm0
+.byte 102,68,15,56,0,255
+ pxor 32(%rax),%xmm1
+.byte 102,15,56,0,199
+ pxor 48(%rax),%xmm2
+.byte 102,15,56,0,207
+ pxor 64(%rax),%xmm3
+.byte 102,15,56,0,215
+ pxor 80(%rax),%xmm4
+.byte 102,15,56,0,223
+ pxor 96(%rax),%xmm5
+.byte 102,15,56,0,231
+ pxor 112(%rax),%xmm6
+.byte 102,15,56,0,239
+ leaq 128(%rax),%rax
+.byte 102,15,56,0,247
+L$dec_sbox:
+ pxor %xmm3,%xmm2
+
+ pxor %xmm6,%xmm3
+ pxor %xmm6,%xmm1
+ pxor %xmm3,%xmm5
+ pxor %xmm5,%xmm6
+ pxor %xmm6,%xmm0
+
+ pxor %xmm0,%xmm15
+ pxor %xmm4,%xmm1
+ pxor %xmm15,%xmm2
+ pxor %xmm15,%xmm4
+ pxor %xmm2,%xmm0
+ movdqa %xmm2,%xmm10
+ movdqa %xmm6,%xmm9
+ movdqa %xmm0,%xmm8
+ movdqa %xmm3,%xmm12
+ movdqa %xmm4,%xmm11
+
+ pxor %xmm15,%xmm10
+ pxor %xmm3,%xmm9
+ pxor %xmm5,%xmm8
+ movdqa %xmm10,%xmm13
+ pxor %xmm15,%xmm12
+ movdqa %xmm9,%xmm7
+ pxor %xmm1,%xmm11
+ movdqa %xmm10,%xmm14
+
+ por %xmm8,%xmm9
+ por %xmm11,%xmm10
+ pxor %xmm7,%xmm14
+ pand %xmm11,%xmm13
+ pxor %xmm8,%xmm11
+ pand %xmm8,%xmm7
+ pand %xmm11,%xmm14
+ movdqa %xmm5,%xmm11
+ pxor %xmm1,%xmm11
+ pand %xmm11,%xmm12
+ pxor %xmm12,%xmm10
+ pxor %xmm12,%xmm9
+ movdqa %xmm2,%xmm12
+ movdqa %xmm0,%xmm11
+ pxor %xmm6,%xmm12
+ pxor %xmm4,%xmm11
+ movdqa %xmm12,%xmm8
+ pand %xmm11,%xmm12
+ por %xmm11,%xmm8
+ pxor %xmm12,%xmm7
+ pxor %xmm14,%xmm10
+ pxor %xmm13,%xmm9
+ pxor %xmm14,%xmm8
+ movdqa %xmm3,%xmm11
+ pxor %xmm13,%xmm7
+ movdqa %xmm15,%xmm12
+ pxor %xmm13,%xmm8
+ movdqa %xmm6,%xmm13
+ pand %xmm5,%xmm11
+ movdqa %xmm2,%xmm14
+ pand %xmm1,%xmm12
+ pand %xmm0,%xmm13
+ por %xmm4,%xmm14
+ pxor %xmm11,%xmm10
+ pxor %xmm12,%xmm9
+ pxor %xmm13,%xmm8
+ pxor %xmm14,%xmm7
+
+
+
+
+
+ movdqa %xmm10,%xmm11
+ pand %xmm8,%xmm10
+ pxor %xmm9,%xmm11
+
+ movdqa %xmm7,%xmm13
+ movdqa %xmm11,%xmm14
+ pxor %xmm10,%xmm13
+ pand %xmm13,%xmm14
+
+ movdqa %xmm8,%xmm12
+ pxor %xmm9,%xmm14
+ pxor %xmm7,%xmm12
+
+ pxor %xmm9,%xmm10
+
+ pand %xmm10,%xmm12
+
+ movdqa %xmm13,%xmm9
+ pxor %xmm7,%xmm12
+
+ pxor %xmm12,%xmm9
+ pxor %xmm12,%xmm8
+
+ pand %xmm7,%xmm9
+
+ pxor %xmm9,%xmm13
+ pxor %xmm9,%xmm8
+
+ pand %xmm14,%xmm13
+
+ pxor %xmm11,%xmm13
+ movdqa %xmm4,%xmm11
+ movdqa %xmm0,%xmm7
+ movdqa %xmm14,%xmm9
+ pxor %xmm13,%xmm9
+ pand %xmm4,%xmm9
+ pxor %xmm0,%xmm4
+ pand %xmm14,%xmm0
+ pand %xmm13,%xmm4
+ pxor %xmm0,%xmm4
+ pxor %xmm9,%xmm0
+ pxor %xmm1,%xmm11
+ pxor %xmm5,%xmm7
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm1,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm5,%xmm1
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm5
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm1
+ pxor %xmm11,%xmm7
+ pxor %xmm5,%xmm1
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm5
+ pxor %xmm11,%xmm4
+ pxor %xmm11,%xmm1
+ pxor %xmm7,%xmm0
+ pxor %xmm7,%xmm5
+
+ movdqa %xmm2,%xmm11
+ movdqa %xmm6,%xmm7
+ pxor %xmm15,%xmm11
+ pxor %xmm3,%xmm7
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm15,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm3,%xmm15
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm3
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm15
+ pxor %xmm11,%xmm7
+ pxor %xmm3,%xmm15
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm3
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ pxor %xmm13,%xmm10
+ pand %xmm2,%xmm10
+ pxor %xmm6,%xmm2
+ pand %xmm14,%xmm6
+ pand %xmm13,%xmm2
+ pxor %xmm6,%xmm2
+ pxor %xmm10,%xmm6
+ pxor %xmm11,%xmm2
+ pxor %xmm11,%xmm15
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm3
+ pxor %xmm6,%xmm0
+ pxor %xmm4,%xmm5
+
+ pxor %xmm0,%xmm3
+ pxor %xmm6,%xmm1
+ pxor %xmm6,%xmm4
+ pxor %xmm1,%xmm3
+ pxor %xmm15,%xmm6
+ pxor %xmm4,%xmm3
+ pxor %xmm5,%xmm2
+ pxor %xmm0,%xmm5
+ pxor %xmm3,%xmm2
+
+ pxor %xmm15,%xmm3
+ pxor %xmm2,%xmm6
+ decl %r10d
+ jl L$dec_done
+
+ pshufd $78,%xmm15,%xmm7
+ pshufd $78,%xmm2,%xmm13
+ pxor %xmm15,%xmm7
+ pshufd $78,%xmm4,%xmm14
+ pxor %xmm2,%xmm13
+ pshufd $78,%xmm0,%xmm8
+ pxor %xmm4,%xmm14
+ pshufd $78,%xmm5,%xmm9
+ pxor %xmm0,%xmm8
+ pshufd $78,%xmm3,%xmm10
+ pxor %xmm5,%xmm9
+ pxor %xmm13,%xmm15
+ pxor %xmm13,%xmm0
+ pshufd $78,%xmm1,%xmm11
+ pxor %xmm3,%xmm10
+ pxor %xmm7,%xmm5
+ pxor %xmm8,%xmm3
+ pshufd $78,%xmm6,%xmm12
+ pxor %xmm1,%xmm11
+ pxor %xmm14,%xmm0
+ pxor %xmm9,%xmm1
+ pxor %xmm6,%xmm12
+
+ pxor %xmm14,%xmm5
+ pxor %xmm13,%xmm3
+ pxor %xmm13,%xmm1
+ pxor %xmm10,%xmm6
+ pxor %xmm11,%xmm2
+ pxor %xmm14,%xmm1
+ pxor %xmm14,%xmm6
+ pxor %xmm12,%xmm4
+ pshufd $147,%xmm15,%xmm7
+ pshufd $147,%xmm0,%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $147,%xmm5,%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $147,%xmm3,%xmm10
+ pxor %xmm9,%xmm5
+ pshufd $147,%xmm1,%xmm11
+ pxor %xmm10,%xmm3
+ pshufd $147,%xmm6,%xmm12
+ pxor %xmm11,%xmm1
+ pshufd $147,%xmm2,%xmm13
+ pxor %xmm12,%xmm6
+ pshufd $147,%xmm4,%xmm14
+ pxor %xmm13,%xmm2
+ pxor %xmm14,%xmm4
+
+ pxor %xmm15,%xmm8
+ pxor %xmm4,%xmm7
+ pxor %xmm4,%xmm8
+ pshufd $78,%xmm15,%xmm15
+ pxor %xmm0,%xmm9
+ pshufd $78,%xmm0,%xmm0
+ pxor %xmm1,%xmm12
+ pxor %xmm7,%xmm15
+ pxor %xmm6,%xmm13
+ pxor %xmm8,%xmm0
+ pxor %xmm3,%xmm11
+ pshufd $78,%xmm1,%xmm7
+ pxor %xmm2,%xmm14
+ pshufd $78,%xmm6,%xmm8
+ pxor %xmm5,%xmm10
+ pshufd $78,%xmm3,%xmm1
+ pxor %xmm4,%xmm10
+ pshufd $78,%xmm4,%xmm6
+ pxor %xmm4,%xmm11
+ pshufd $78,%xmm2,%xmm3
+ pxor %xmm11,%xmm7
+ pshufd $78,%xmm5,%xmm2
+ pxor %xmm12,%xmm8
+ pxor %xmm1,%xmm10
+ pxor %xmm14,%xmm6
+ pxor %xmm3,%xmm13
+ movdqa %xmm7,%xmm3
+ pxor %xmm9,%xmm2
+ movdqa %xmm13,%xmm5
+ movdqa %xmm8,%xmm4
+ movdqa %xmm2,%xmm1
+ movdqa %xmm10,%xmm2
+ movdqa -16(%r11),%xmm7
+ jnz L$dec_loop
+ movdqa -32(%r11),%xmm7
+ jmp L$dec_loop
+.p2align 4
+L$dec_done:
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm2,%xmm9
+ psrlq $1,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $1,%xmm1
+ pxor %xmm4,%xmm2
+ pxor %xmm6,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm4
+ psllq $1,%xmm2
+ pxor %xmm1,%xmm6
+ psllq $1,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm3,%xmm5
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm15
+ pxor %xmm5,%xmm3
+ psllq $1,%xmm5
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm6,%xmm9
+ psrlq $2,%xmm6
+ movdqa %xmm1,%xmm10
+ psrlq $2,%xmm1
+ pxor %xmm4,%xmm6
+ pxor %xmm2,%xmm1
+ pand %xmm8,%xmm6
+ pand %xmm8,%xmm1
+ pxor %xmm6,%xmm4
+ psllq $2,%xmm6
+ pxor %xmm1,%xmm2
+ psllq $2,%xmm1
+ pxor %xmm9,%xmm6
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm3,%xmm0
+ pxor %xmm5,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm3
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm5
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm3,%xmm9
+ psrlq $4,%xmm3
+ movdqa %xmm5,%xmm10
+ psrlq $4,%xmm5
+ pxor %xmm4,%xmm3
+ pxor %xmm2,%xmm5
+ pand %xmm7,%xmm3
+ pand %xmm7,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $4,%xmm3
+ pxor %xmm5,%xmm2
+ psllq $4,%xmm5
+ pxor %xmm9,%xmm3
+ pxor %xmm10,%xmm5
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm6,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm6
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa (%rax),%xmm7
+ pxor %xmm7,%xmm5
+ pxor %xmm7,%xmm3
+ pxor %xmm7,%xmm1
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm2
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm15
+ pxor %xmm7,%xmm0
+ retq
+
+
+.p2align 4
+_bsaes_key_convert:
+ leaq L$masks(%rip),%r11
+ movdqu (%rcx),%xmm7
+ leaq 16(%rcx),%rcx
+ movdqa 0(%r11),%xmm0
+ movdqa 16(%r11),%xmm1
+ movdqa 32(%r11),%xmm2
+ movdqa 48(%r11),%xmm3
+ movdqa 64(%r11),%xmm4
+ pcmpeqd %xmm5,%xmm5
+
+ movdqu (%rcx),%xmm6
+ movdqa %xmm7,(%rax)
+ leaq 16(%rax),%rax
+ decl %r10d
+ jmp L$key_loop
+.p2align 4
+L$key_loop:
+.byte 102,15,56,0,244
+
+ movdqa %xmm0,%xmm8
+ movdqa %xmm1,%xmm9
+
+ pand %xmm6,%xmm8
+ pand %xmm6,%xmm9
+ movdqa %xmm2,%xmm10
+ pcmpeqb %xmm0,%xmm8
+ psllq $4,%xmm0
+ movdqa %xmm3,%xmm11
+ pcmpeqb %xmm1,%xmm9
+ psllq $4,%xmm1
+
+ pand %xmm6,%xmm10
+ pand %xmm6,%xmm11
+ movdqa %xmm0,%xmm12
+ pcmpeqb %xmm2,%xmm10
+ psllq $4,%xmm2
+ movdqa %xmm1,%xmm13
+ pcmpeqb %xmm3,%xmm11
+ psllq $4,%xmm3
+
+ movdqa %xmm2,%xmm14
+ movdqa %xmm3,%xmm15
+ pxor %xmm5,%xmm8
+ pxor %xmm5,%xmm9
+
+ pand %xmm6,%xmm12
+ pand %xmm6,%xmm13
+ movdqa %xmm8,0(%rax)
+ pcmpeqb %xmm0,%xmm12
+ psrlq $4,%xmm0
+ movdqa %xmm9,16(%rax)
+ pcmpeqb %xmm1,%xmm13
+ psrlq $4,%xmm1
+ leaq 16(%rcx),%rcx
+
+ pand %xmm6,%xmm14
+ pand %xmm6,%xmm15
+ movdqa %xmm10,32(%rax)
+ pcmpeqb %xmm2,%xmm14
+ psrlq $4,%xmm2
+ movdqa %xmm11,48(%rax)
+ pcmpeqb %xmm3,%xmm15
+ psrlq $4,%xmm3
+ movdqu (%rcx),%xmm6
+
+ pxor %xmm5,%xmm13
+ pxor %xmm5,%xmm14
+ movdqa %xmm12,64(%rax)
+ movdqa %xmm13,80(%rax)
+ movdqa %xmm14,96(%rax)
+ movdqa %xmm15,112(%rax)
+ leaq 128(%rax),%rax
+ decl %r10d
+ jnz L$key_loop
+
+ movdqa 80(%r11),%xmm7
+
+ retq
+
+
+.globl _bsaes_cbc_encrypt
+
+.p2align 4
+_bsaes_cbc_encrypt:
+ cmpl $0,%r9d
+ jne _asm_AES_cbc_encrypt
+ cmpq $128,%rdx
+ jb _asm_AES_cbc_encrypt
+
+ movq %rsp,%rax
+L$cbc_dec_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movl 240(%rcx),%eax
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ movq %r8,%rbx
+ shrq $4,%r14
+
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor (%rsp),%xmm7
+ movdqa %xmm6,(%rax)
+ movdqa %xmm7,(%rsp)
+
+ movdqu (%rbx),%xmm14
+ subq $8,%r14
+L$cbc_dec_loop:
+ movdqu 0(%r12),%xmm15
+ movdqu 16(%r12),%xmm0
+ movdqu 32(%r12),%xmm1
+ movdqu 48(%r12),%xmm2
+ movdqu 64(%r12),%xmm3
+ movdqu 80(%r12),%xmm4
+ movq %rsp,%rax
+ movdqu 96(%r12),%xmm5
+ movl %edx,%r10d
+ movdqu 112(%r12),%xmm6
+ movdqa %xmm14,32(%rbp)
+
+ call _bsaes_decrypt8
+
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm6
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm2
+ movdqu 112(%r12),%xmm14
+ pxor %xmm13,%xmm4
+ movdqu %xmm15,0(%r13)
+ leaq 128(%r12),%r12
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+ subq $8,%r14
+ jnc L$cbc_dec_loop
+
+ addq $8,%r14
+ jz L$cbc_dec_done
+
+ movdqu 0(%r12),%xmm15
+ movq %rsp,%rax
+ movl %edx,%r10d
+ cmpq $2,%r14
+ jb L$cbc_dec_one
+ movdqu 16(%r12),%xmm0
+ je L$cbc_dec_two
+ movdqu 32(%r12),%xmm1
+ cmpq $4,%r14
+ jb L$cbc_dec_three
+ movdqu 48(%r12),%xmm2
+ je L$cbc_dec_four
+ movdqu 64(%r12),%xmm3
+ cmpq $6,%r14
+ jb L$cbc_dec_five
+ movdqu 80(%r12),%xmm4
+ je L$cbc_dec_six
+ movdqu 96(%r12),%xmm5
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm6
+ movdqu 96(%r12),%xmm14
+ pxor %xmm12,%xmm2
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ jmp L$cbc_dec_done
+.p2align 4
+L$cbc_dec_six:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm14
+ pxor %xmm11,%xmm6
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ jmp L$cbc_dec_done
+.p2align 4
+L$cbc_dec_five:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm14
+ pxor %xmm10,%xmm1
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ jmp L$cbc_dec_done
+.p2align 4
+L$cbc_dec_four:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm14
+ pxor %xmm9,%xmm3
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ jmp L$cbc_dec_done
+.p2align 4
+L$cbc_dec_three:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm14
+ pxor %xmm8,%xmm5
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ jmp L$cbc_dec_done
+.p2align 4
+L$cbc_dec_two:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm14
+ pxor %xmm7,%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ jmp L$cbc_dec_done
+.p2align 4
+L$cbc_dec_one:
+ leaq (%r12),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r15),%rdx
+ call _asm_AES_decrypt
+ pxor 32(%rbp),%xmm14
+ movdqu %xmm14,(%r13)
+ movdqa %xmm15,%xmm14
+
+L$cbc_dec_done:
+ movdqu %xmm14,(%rbx)
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+L$cbc_dec_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja L$cbc_dec_bzero
+
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+L$cbc_dec_epilogue:
+ retq
+
+
+.globl _bsaes_ctr32_encrypt_blocks
+
+.p2align 4
+_bsaes_ctr32_encrypt_blocks:
+ movq %rsp,%rax
+L$ctr_enc_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movdqu (%r8),%xmm0
+ movl 240(%rcx),%eax
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ movdqa %xmm0,32(%rbp)
+ cmpq $8,%rdx
+ jb L$ctr_enc_short
+
+ movl %eax,%ebx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %ebx,%r10d
+ call _bsaes_key_convert
+ pxor %xmm6,%xmm7
+ movdqa %xmm7,(%rax)
+
+ movdqa (%rsp),%xmm8
+ leaq L$ADD1(%rip),%r11
+ movdqa 32(%rbp),%xmm15
+ movdqa -32(%r11),%xmm7
+.byte 102,68,15,56,0,199
+.byte 102,68,15,56,0,255
+ movdqa %xmm8,(%rsp)
+ jmp L$ctr_enc_loop
+.p2align 4
+L$ctr_enc_loop:
+ movdqa %xmm15,32(%rbp)
+ movdqa %xmm15,%xmm0
+ movdqa %xmm15,%xmm1
+ paddd 0(%r11),%xmm0
+ movdqa %xmm15,%xmm2
+ paddd 16(%r11),%xmm1
+ movdqa %xmm15,%xmm3
+ paddd 32(%r11),%xmm2
+ movdqa %xmm15,%xmm4
+ paddd 48(%r11),%xmm3
+ movdqa %xmm15,%xmm5
+ paddd 64(%r11),%xmm4
+ movdqa %xmm15,%xmm6
+ paddd 80(%r11),%xmm5
+ paddd 96(%r11),%xmm6
+
+
+
+ movdqa (%rsp),%xmm8
+ leaq 16(%rsp),%rax
+ movdqa -16(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+ leaq L$BS0(%rip),%r11
+.byte 102,15,56,0,247
+ movl %ebx,%r10d
+
+ call _bsaes_encrypt8_bitslice
+
+ subq $8,%r14
+ jc L$ctr_enc_loop_done
+
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ movdqu 32(%r12),%xmm9
+ movdqu 48(%r12),%xmm10
+ movdqu 64(%r12),%xmm11
+ movdqu 80(%r12),%xmm12
+ movdqu 96(%r12),%xmm13
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ pxor %xmm15,%xmm7
+ movdqa 32(%rbp),%xmm15
+ pxor %xmm8,%xmm0
+ movdqu %xmm7,0(%r13)
+ pxor %xmm9,%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor %xmm10,%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor %xmm11,%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor %xmm12,%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor %xmm13,%xmm1
+ movdqu %xmm6,80(%r13)
+ pxor %xmm14,%xmm4
+ movdqu %xmm1,96(%r13)
+ leaq L$ADD1(%rip),%r11
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+ paddd 112(%r11),%xmm15
+ jnz L$ctr_enc_loop
+
+ jmp L$ctr_enc_done
+.p2align 4
+L$ctr_enc_loop_done:
+ addq $8,%r14
+ movdqu 0(%r12),%xmm7
+ pxor %xmm7,%xmm15
+ movdqu %xmm15,0(%r13)
+ cmpq $2,%r14
+ jb L$ctr_enc_done
+ movdqu 16(%r12),%xmm8
+ pxor %xmm8,%xmm0
+ movdqu %xmm0,16(%r13)
+ je L$ctr_enc_done
+ movdqu 32(%r12),%xmm9
+ pxor %xmm9,%xmm3
+ movdqu %xmm3,32(%r13)
+ cmpq $4,%r14
+ jb L$ctr_enc_done
+ movdqu 48(%r12),%xmm10
+ pxor %xmm10,%xmm5
+ movdqu %xmm5,48(%r13)
+ je L$ctr_enc_done
+ movdqu 64(%r12),%xmm11
+ pxor %xmm11,%xmm2
+ movdqu %xmm2,64(%r13)
+ cmpq $6,%r14
+ jb L$ctr_enc_done
+ movdqu 80(%r12),%xmm12
+ pxor %xmm12,%xmm6
+ movdqu %xmm6,80(%r13)
+ je L$ctr_enc_done
+ movdqu 96(%r12),%xmm13
+ pxor %xmm13,%xmm1
+ movdqu %xmm1,96(%r13)
+ jmp L$ctr_enc_done
+
+.p2align 4
+L$ctr_enc_short:
+ leaq 32(%rbp),%rdi
+ leaq 48(%rbp),%rsi
+ leaq (%r15),%rdx
+ call _asm_AES_encrypt
+ movdqu (%r12),%xmm0
+ leaq 16(%r12),%r12
+ movl 44(%rbp),%eax
+ bswapl %eax
+ pxor 48(%rbp),%xmm0
+ incl %eax
+ movdqu %xmm0,(%r13)
+ bswapl %eax
+ leaq 16(%r13),%r13
+ movl %eax,44(%rsp)
+ decq %r14
+ jnz L$ctr_enc_short
+
+L$ctr_enc_done:
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+L$ctr_enc_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja L$ctr_enc_bzero
+
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+L$ctr_enc_epilogue:
+ retq
+
+.globl _bsaes_xts_encrypt
+
+.p2align 4
+_bsaes_xts_encrypt:
+ movq %rsp,%rax
+L$xts_enc_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+
+ leaq (%r9),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r8),%rdx
+ call _asm_AES_encrypt
+
+ movl 240(%r15),%eax
+ movq %r14,%rbx
+
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor %xmm6,%xmm7
+ movdqa %xmm7,(%rax)
+
+ andq $-16,%r14
+ subq $128,%rsp
+ movdqa 32(%rbp),%xmm6
+
+ pxor %xmm14,%xmm14
+ movdqa L$xts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+
+ subq $128,%r14
+ jc L$xts_enc_short
+ jmp L$xts_enc_loop
+
+.p2align 4
+L$xts_enc_loop:
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ movdqa %xmm6,112(%rsp)
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ pxor %xmm14,%xmm6
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor 96(%rsp),%xmm1
+ movdqu %xmm6,80(%r13)
+ pxor 112(%rsp),%xmm4
+ movdqu %xmm1,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ pxor %xmm14,%xmm14
+ movdqa L$xts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+
+ subq $128,%r14
+ jnc L$xts_enc_loop
+
+L$xts_enc_short:
+ addq $128,%r14
+ jz L$xts_enc_done
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ cmpq $16,%r14
+ je L$xts_enc_1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ cmpq $32,%r14
+ je L$xts_enc_2
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ cmpq $48,%r14
+ je L$xts_enc_3
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ cmpq $64,%r14
+ je L$xts_enc_4
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ cmpq $80,%r14
+ je L$xts_enc_5
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ cmpq $96,%r14
+ je L$xts_enc_6
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqa %xmm6,112(%rsp)
+ leaq 112(%r12),%r12
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor 96(%rsp),%xmm1
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm1,96(%r13)
+ leaq 112(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ jmp L$xts_enc_done
+.p2align 4
+L$xts_enc_6:
+ pxor %xmm11,%xmm3
+ leaq 96(%r12),%r12
+ pxor %xmm12,%xmm4
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ movdqu %xmm6,80(%r13)
+ leaq 96(%r13),%r13
+
+ movdqa 96(%rsp),%xmm6
+ jmp L$xts_enc_done
+.p2align 4
+L$xts_enc_5:
+ pxor %xmm10,%xmm2
+ leaq 80(%r12),%r12
+ pxor %xmm11,%xmm3
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ movdqu %xmm2,64(%r13)
+ leaq 80(%r13),%r13
+
+ movdqa 80(%rsp),%xmm6
+ jmp L$xts_enc_done
+.p2align 4
+L$xts_enc_4:
+ pxor %xmm9,%xmm1
+ leaq 64(%r12),%r12
+ pxor %xmm10,%xmm2
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ movdqu %xmm5,48(%r13)
+ leaq 64(%r13),%r13
+
+ movdqa 64(%rsp),%xmm6
+ jmp L$xts_enc_done
+.p2align 4
+L$xts_enc_3:
+ pxor %xmm8,%xmm0
+ leaq 48(%r12),%r12
+ pxor %xmm9,%xmm1
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm3,32(%r13)
+ leaq 48(%r13),%r13
+
+ movdqa 48(%rsp),%xmm6
+ jmp L$xts_enc_done
+.p2align 4
+L$xts_enc_2:
+ pxor %xmm7,%xmm15
+ leaq 32(%r12),%r12
+ pxor %xmm8,%xmm0
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ leaq 32(%r13),%r13
+
+ movdqa 32(%rsp),%xmm6
+ jmp L$xts_enc_done
+.p2align 4
+L$xts_enc_1:
+ pxor %xmm15,%xmm7
+ leaq 16(%r12),%r12
+ movdqa %xmm7,32(%rbp)
+ leaq 32(%rbp),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r15),%rdx
+ call _asm_AES_encrypt
+ pxor 32(%rbp),%xmm15
+
+
+
+
+
+ movdqu %xmm15,0(%r13)
+ leaq 16(%r13),%r13
+
+ movdqa 16(%rsp),%xmm6
+
+L$xts_enc_done:
+ andl $15,%ebx
+ jz L$xts_enc_ret
+ movq %r13,%rdx
+
+L$xts_enc_steal:
+ movzbl (%r12),%eax
+ movzbl -16(%rdx),%ecx
+ leaq 1(%r12),%r12
+ movb %al,-16(%rdx)
+ movb %cl,0(%rdx)
+ leaq 1(%rdx),%rdx
+ subl $1,%ebx
+ jnz L$xts_enc_steal
+
+ movdqu -16(%r13),%xmm15
+ leaq 32(%rbp),%rdi
+ pxor %xmm6,%xmm15
+ leaq 32(%rbp),%rsi
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%rdx
+ call _asm_AES_encrypt
+ pxor 32(%rbp),%xmm6
+ movdqu %xmm6,-16(%r13)
+
+L$xts_enc_ret:
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+L$xts_enc_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja L$xts_enc_bzero
+
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+L$xts_enc_epilogue:
+ retq
+
+
+.globl _bsaes_xts_decrypt
+
+.p2align 4
+_bsaes_xts_decrypt:
+ movq %rsp,%rax
+L$xts_dec_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+
+ leaq (%r9),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r8),%rdx
+ call _asm_AES_encrypt
+
+ movl 240(%r15),%eax
+ movq %r14,%rbx
+
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor (%rsp),%xmm7
+ movdqa %xmm6,(%rax)
+ movdqa %xmm7,(%rsp)
+
+ xorl %eax,%eax
+ andq $-16,%r14
+ testl $15,%ebx
+ setnz %al
+ shlq $4,%rax
+ subq %rax,%r14
+
+ subq $128,%rsp
+ movdqa 32(%rbp),%xmm6
+
+ pxor %xmm14,%xmm14
+ movdqa L$xts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+
+ subq $128,%r14
+ jc L$xts_dec_short
+ jmp L$xts_dec_loop
+
+.p2align 4
+L$xts_dec_loop:
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ movdqa %xmm6,112(%rsp)
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ pxor %xmm14,%xmm6
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ pxor 96(%rsp),%xmm2
+ movdqu %xmm6,80(%r13)
+ pxor 112(%rsp),%xmm4
+ movdqu %xmm2,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ pxor %xmm14,%xmm14
+ movdqa L$xts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+
+ subq $128,%r14
+ jnc L$xts_dec_loop
+
+L$xts_dec_short:
+ addq $128,%r14
+ jz L$xts_dec_done
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ cmpq $16,%r14
+ je L$xts_dec_1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ cmpq $32,%r14
+ je L$xts_dec_2
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ cmpq $48,%r14
+ je L$xts_dec_3
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ cmpq $64,%r14
+ je L$xts_dec_4
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ cmpq $80,%r14
+ je L$xts_dec_5
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ cmpq $96,%r14
+ je L$xts_dec_6
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqa %xmm6,112(%rsp)
+ leaq 112(%r12),%r12
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ pxor 96(%rsp),%xmm2
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ leaq 112(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ jmp L$xts_dec_done
+.p2align 4
+L$xts_dec_6:
+ pxor %xmm11,%xmm3
+ leaq 96(%r12),%r12
+ pxor %xmm12,%xmm4
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ leaq 96(%r13),%r13
+
+ movdqa 96(%rsp),%xmm6
+ jmp L$xts_dec_done
+.p2align 4
+L$xts_dec_5:
+ pxor %xmm10,%xmm2
+ leaq 80(%r12),%r12
+ pxor %xmm11,%xmm3
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ leaq 80(%r13),%r13
+
+ movdqa 80(%rsp),%xmm6
+ jmp L$xts_dec_done
+.p2align 4
+L$xts_dec_4:
+ pxor %xmm9,%xmm1
+ leaq 64(%r12),%r12
+ pxor %xmm10,%xmm2
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ leaq 64(%r13),%r13
+
+ movdqa 64(%rsp),%xmm6
+ jmp L$xts_dec_done
+.p2align 4
+L$xts_dec_3:
+ pxor %xmm8,%xmm0
+ leaq 48(%r12),%r12
+ pxor %xmm9,%xmm1
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ leaq 48(%r13),%r13
+
+ movdqa 48(%rsp),%xmm6
+ jmp L$xts_dec_done
+.p2align 4
+L$xts_dec_2:
+ pxor %xmm7,%xmm15
+ leaq 32(%r12),%r12
+ pxor %xmm8,%xmm0
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ leaq 32(%r13),%r13
+
+ movdqa 32(%rsp),%xmm6
+ jmp L$xts_dec_done
+.p2align 4
+L$xts_dec_1:
+ pxor %xmm15,%xmm7
+ leaq 16(%r12),%r12
+ movdqa %xmm7,32(%rbp)
+ leaq 32(%rbp),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r15),%rdx
+ call _asm_AES_decrypt
+ pxor 32(%rbp),%xmm15
+
+
+
+
+
+ movdqu %xmm15,0(%r13)
+ leaq 16(%r13),%r13
+
+ movdqa 16(%rsp),%xmm6
+
+L$xts_dec_done:
+ andl $15,%ebx
+ jz L$xts_dec_ret
+
+ pxor %xmm14,%xmm14
+ movdqa L$xts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ movdqa %xmm6,%xmm5
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ movdqu (%r12),%xmm15
+ pxor %xmm13,%xmm6
+
+ leaq 32(%rbp),%rdi
+ pxor %xmm6,%xmm15
+ leaq 32(%rbp),%rsi
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%rdx
+ call _asm_AES_decrypt
+ pxor 32(%rbp),%xmm6
+ movq %r13,%rdx
+ movdqu %xmm6,(%r13)
+
+L$xts_dec_steal:
+ movzbl 16(%r12),%eax
+ movzbl (%rdx),%ecx
+ leaq 1(%r12),%r12
+ movb %al,(%rdx)
+ movb %cl,16(%rdx)
+ leaq 1(%rdx),%rdx
+ subl $1,%ebx
+ jnz L$xts_dec_steal
+
+ movdqu (%r13),%xmm15
+ leaq 32(%rbp),%rdi
+ pxor %xmm5,%xmm15
+ leaq 32(%rbp),%rsi
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%rdx
+ call _asm_AES_decrypt
+ pxor 32(%rbp),%xmm5
+ movdqu %xmm5,(%r13)
+
+L$xts_dec_ret:
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+L$xts_dec_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja L$xts_dec_bzero
+
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+L$xts_dec_epilogue:
+ retq
+
+
+.p2align 6
+_bsaes_const:
+L$M0ISR:
+.quad 0x0a0e0206070b0f03, 0x0004080c0d010509
+L$ISRM0:
+.quad 0x01040b0e0205080f, 0x0306090c00070a0d
+L$ISR:
+.quad 0x0504070602010003, 0x0f0e0d0c080b0a09
+L$BS0:
+.quad 0x5555555555555555, 0x5555555555555555
+L$BS1:
+.quad 0x3333333333333333, 0x3333333333333333
+L$BS2:
+.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+L$SR:
+.quad 0x0504070600030201, 0x0f0e0d0c0a09080b
+L$SRM0:
+.quad 0x0304090e00050a0f, 0x01060b0c0207080d
+L$M0SR:
+.quad 0x0a0e02060f03070b, 0x0004080c05090d01
+L$SWPUP:
+.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908
+L$SWPUPM0SR:
+.quad 0x0a0d02060c03070b, 0x0004080f05090e01
+L$ADD1:
+.quad 0x0000000000000000, 0x0000000100000000
+L$ADD2:
+.quad 0x0000000000000000, 0x0000000200000000
+L$ADD3:
+.quad 0x0000000000000000, 0x0000000300000000
+L$ADD4:
+.quad 0x0000000000000000, 0x0000000400000000
+L$ADD5:
+.quad 0x0000000000000000, 0x0000000500000000
+L$ADD6:
+.quad 0x0000000000000000, 0x0000000600000000
+L$ADD7:
+.quad 0x0000000000000000, 0x0000000700000000
+L$ADD8:
+.quad 0x0000000000000000, 0x0000000800000000
+L$xts_magic:
+.long 0x87,0,1,0
+L$masks:
+.quad 0x0101010101010101, 0x0101010101010101
+.quad 0x0202020202020202, 0x0202020202020202
+.quad 0x0404040404040404, 0x0404040404040404
+.quad 0x0808080808080808, 0x0808080808080808
+L$M0:
+.quad 0x02060a0e03070b0f, 0x0004080c0105090d
+L$63:
+.quad 0x6363636363636363, 0x6363636363636363
+.byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44,32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32,65,110,100,121,32,80,111,108,121,97,107,111,118,0
+.p2align 6
+
diff --git a/ext/libressl/crypto/aes/bsaes-masm-x86_64.S b/ext/libressl/crypto/aes/bsaes-masm-x86_64.S
new file mode 100644
index 0000000..6b1a97d
--- /dev/null
+++ b/ext/libressl/crypto/aes/bsaes-masm-x86_64.S
@@ -0,0 +1,2803 @@
+; 1 "crypto/aes/bsaes-masm-x86_64.S.tmp"
+; 1 "<built-in>" 1
+; 1 "<built-in>" 3
+; 340 "<built-in>" 3
+; 1 "<command line>" 1
+; 1 "<built-in>" 2
+; 1 "crypto/aes/bsaes-masm-x86_64.S.tmp" 2
+OPTION DOTNAME
+
+; 1 "./crypto/x86_arch.h" 1
+
+
+; 16 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+; 40 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+; 3 "crypto/aes/bsaes-masm-x86_64.S.tmp" 2
+.text$ SEGMENT ALIGN(64) 'CODE'
+
+EXTERN asm_AES_encrypt:NEAR
+EXTERN asm_AES_decrypt:NEAR
+
+
+ALIGN 64
+_bsaes_encrypt8 PROC PRIVATE
+ lea r11,QWORD PTR[$L$BS0]
+
+ movdqa xmm8,XMMWORD PTR[rax]
+ lea rax,QWORD PTR[16+rax]
+ movdqa xmm7,XMMWORD PTR[80+r11]
+ pxor xmm15,xmm8
+ pxor xmm0,xmm8
+DB 102,68,15,56,0,255
+ pxor xmm1,xmm8
+DB 102,15,56,0,199
+ pxor xmm2,xmm8
+DB 102,15,56,0,207
+ pxor xmm3,xmm8
+DB 102,15,56,0,215
+ pxor xmm4,xmm8
+DB 102,15,56,0,223
+ pxor xmm5,xmm8
+DB 102,15,56,0,231
+ pxor xmm6,xmm8
+DB 102,15,56,0,239
+DB 102,15,56,0,247
+_bsaes_encrypt8_bitslice::
+ movdqa xmm7,XMMWORD PTR[r11]
+ movdqa xmm8,XMMWORD PTR[16+r11]
+ movdqa xmm9,xmm5
+ psrlq xmm5,1
+ movdqa xmm10,xmm3
+ psrlq xmm3,1
+ pxor xmm5,xmm6
+ pxor xmm3,xmm4
+ pand xmm5,xmm7
+ pand xmm3,xmm7
+ pxor xmm6,xmm5
+ psllq xmm5,1
+ pxor xmm4,xmm3
+ psllq xmm3,1
+ pxor xmm5,xmm9
+ pxor xmm3,xmm10
+ movdqa xmm9,xmm1
+ psrlq xmm1,1
+ movdqa xmm10,xmm15
+ psrlq xmm15,1
+ pxor xmm1,xmm2
+ pxor xmm15,xmm0
+ pand xmm1,xmm7
+ pand xmm15,xmm7
+ pxor xmm2,xmm1
+ psllq xmm1,1
+ pxor xmm0,xmm15
+ psllq xmm15,1
+ pxor xmm1,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm7,XMMWORD PTR[32+r11]
+ movdqa xmm9,xmm4
+ psrlq xmm4,2
+ movdqa xmm10,xmm3
+ psrlq xmm3,2
+ pxor xmm4,xmm6
+ pxor xmm3,xmm5
+ pand xmm4,xmm8
+ pand xmm3,xmm8
+ pxor xmm6,xmm4
+ psllq xmm4,2
+ pxor xmm5,xmm3
+ psllq xmm3,2
+ pxor xmm4,xmm9
+ pxor xmm3,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,2
+ movdqa xmm10,xmm15
+ psrlq xmm15,2
+ pxor xmm0,xmm2
+ pxor xmm15,xmm1
+ pand xmm0,xmm8
+ pand xmm15,xmm8
+ pxor xmm2,xmm0
+ psllq xmm0,2
+ pxor xmm1,xmm15
+ psllq xmm15,2
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm9,xmm2
+ psrlq xmm2,4
+ movdqa xmm10,xmm1
+ psrlq xmm1,4
+ pxor xmm2,xmm6
+ pxor xmm1,xmm5
+ pand xmm2,xmm7
+ pand xmm1,xmm7
+ pxor xmm6,xmm2
+ psllq xmm2,4
+ pxor xmm5,xmm1
+ psllq xmm1,4
+ pxor xmm2,xmm9
+ pxor xmm1,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,4
+ movdqa xmm10,xmm15
+ psrlq xmm15,4
+ pxor xmm0,xmm4
+ pxor xmm15,xmm3
+ pand xmm0,xmm7
+ pand xmm15,xmm7
+ pxor xmm4,xmm0
+ psllq xmm0,4
+ pxor xmm3,xmm15
+ psllq xmm15,4
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ dec r10d
+ jmp $L$enc_sbox
+ALIGN 16
+$L$enc_loop::
+ pxor xmm15,XMMWORD PTR[rax]
+ pxor xmm0,XMMWORD PTR[16+rax]
+DB 102,68,15,56,0,255
+ pxor xmm1,XMMWORD PTR[32+rax]
+DB 102,15,56,0,199
+ pxor xmm2,XMMWORD PTR[48+rax]
+DB 102,15,56,0,207
+ pxor xmm3,XMMWORD PTR[64+rax]
+DB 102,15,56,0,215
+ pxor xmm4,XMMWORD PTR[80+rax]
+DB 102,15,56,0,223
+ pxor xmm5,XMMWORD PTR[96+rax]
+DB 102,15,56,0,231
+ pxor xmm6,XMMWORD PTR[112+rax]
+DB 102,15,56,0,239
+ lea rax,QWORD PTR[128+rax]
+DB 102,15,56,0,247
+$L$enc_sbox::
+ pxor xmm4,xmm5
+ pxor xmm1,xmm0
+ pxor xmm2,xmm15
+ pxor xmm5,xmm1
+ pxor xmm4,xmm15
+
+ pxor xmm5,xmm2
+ pxor xmm2,xmm6
+ pxor xmm6,xmm4
+ pxor xmm2,xmm3
+ pxor xmm3,xmm4
+ pxor xmm2,xmm0
+
+ pxor xmm1,xmm6
+ pxor xmm0,xmm4
+ movdqa xmm10,xmm6
+ movdqa xmm9,xmm0
+ movdqa xmm8,xmm4
+ movdqa xmm12,xmm1
+ movdqa xmm11,xmm5
+
+ pxor xmm10,xmm3
+ pxor xmm9,xmm1
+ pxor xmm8,xmm2
+ movdqa xmm13,xmm10
+ pxor xmm12,xmm3
+ movdqa xmm7,xmm9
+ pxor xmm11,xmm15
+ movdqa xmm14,xmm10
+
+ por xmm9,xmm8
+ por xmm10,xmm11
+ pxor xmm14,xmm7
+ pand xmm13,xmm11
+ pxor xmm11,xmm8
+ pand xmm7,xmm8
+ pand xmm14,xmm11
+ movdqa xmm11,xmm2
+ pxor xmm11,xmm15
+ pand xmm12,xmm11
+ pxor xmm10,xmm12
+ pxor xmm9,xmm12
+ movdqa xmm12,xmm6
+ movdqa xmm11,xmm4
+ pxor xmm12,xmm0
+ pxor xmm11,xmm5
+ movdqa xmm8,xmm12
+ pand xmm12,xmm11
+ por xmm8,xmm11
+ pxor xmm7,xmm12
+ pxor xmm10,xmm14
+ pxor xmm9,xmm13
+ pxor xmm8,xmm14
+ movdqa xmm11,xmm1
+ pxor xmm7,xmm13
+ movdqa xmm12,xmm3
+ pxor xmm8,xmm13
+ movdqa xmm13,xmm0
+ pand xmm11,xmm2
+ movdqa xmm14,xmm6
+ pand xmm12,xmm15
+ pand xmm13,xmm4
+ por xmm14,xmm5
+ pxor xmm10,xmm11
+ pxor xmm9,xmm12
+ pxor xmm8,xmm13
+ pxor xmm7,xmm14
+
+
+
+
+
+ movdqa xmm11,xmm10
+ pand xmm10,xmm8
+ pxor xmm11,xmm9
+
+ movdqa xmm13,xmm7
+ movdqa xmm14,xmm11
+ pxor xmm13,xmm10
+ pand xmm14,xmm13
+
+ movdqa xmm12,xmm8
+ pxor xmm14,xmm9
+ pxor xmm12,xmm7
+
+ pxor xmm10,xmm9
+
+ pand xmm12,xmm10
+
+ movdqa xmm9,xmm13
+ pxor xmm12,xmm7
+
+ pxor xmm9,xmm12
+ pxor xmm8,xmm12
+
+ pand xmm9,xmm7
+
+ pxor xmm13,xmm9
+ pxor xmm8,xmm9
+
+ pand xmm13,xmm14
+
+ pxor xmm13,xmm11
+ movdqa xmm11,xmm5
+ movdqa xmm7,xmm4
+ movdqa xmm9,xmm14
+ pxor xmm9,xmm13
+ pand xmm9,xmm5
+ pxor xmm5,xmm4
+ pand xmm4,xmm14
+ pand xmm5,xmm13
+ pxor xmm5,xmm4
+ pxor xmm4,xmm9
+ pxor xmm11,xmm15
+ pxor xmm7,xmm2
+ pxor xmm14,xmm12
+ pxor xmm13,xmm8
+ movdqa xmm10,xmm14
+ movdqa xmm9,xmm12
+ pxor xmm10,xmm13
+ pxor xmm9,xmm8
+ pand xmm10,xmm11
+ pand xmm9,xmm15
+ pxor xmm11,xmm7
+ pxor xmm15,xmm2
+ pand xmm7,xmm14
+ pand xmm2,xmm12
+ pand xmm11,xmm13
+ pand xmm15,xmm8
+ pxor xmm7,xmm11
+ pxor xmm15,xmm2
+ pxor xmm11,xmm10
+ pxor xmm2,xmm9
+ pxor xmm5,xmm11
+ pxor xmm15,xmm11
+ pxor xmm4,xmm7
+ pxor xmm2,xmm7
+
+ movdqa xmm11,xmm6
+ movdqa xmm7,xmm0
+ pxor xmm11,xmm3
+ pxor xmm7,xmm1
+ movdqa xmm10,xmm14
+ movdqa xmm9,xmm12
+ pxor xmm10,xmm13
+ pxor xmm9,xmm8
+ pand xmm10,xmm11
+ pand xmm9,xmm3
+ pxor xmm11,xmm7
+ pxor xmm3,xmm1
+ pand xmm7,xmm14
+ pand xmm1,xmm12
+ pand xmm11,xmm13
+ pand xmm3,xmm8
+ pxor xmm7,xmm11
+ pxor xmm3,xmm1
+ pxor xmm11,xmm10
+ pxor xmm1,xmm9
+ pxor xmm14,xmm12
+ pxor xmm13,xmm8
+ movdqa xmm10,xmm14
+ pxor xmm10,xmm13
+ pand xmm10,xmm6
+ pxor xmm6,xmm0
+ pand xmm0,xmm14
+ pand xmm6,xmm13
+ pxor xmm6,xmm0
+ pxor xmm0,xmm10
+ pxor xmm6,xmm11
+ pxor xmm3,xmm11
+ pxor xmm0,xmm7
+ pxor xmm1,xmm7
+ pxor xmm6,xmm15
+ pxor xmm0,xmm5
+ pxor xmm3,xmm6
+ pxor xmm5,xmm15
+ pxor xmm15,xmm0
+
+ pxor xmm0,xmm4
+ pxor xmm4,xmm1
+ pxor xmm1,xmm2
+ pxor xmm2,xmm4
+ pxor xmm3,xmm4
+
+ pxor xmm5,xmm2
+ dec r10d
+ jl $L$enc_done
+ pshufd xmm7,xmm15,093h
+ pshufd xmm8,xmm0,093h
+ pxor xmm15,xmm7
+ pshufd xmm9,xmm3,093h
+ pxor xmm0,xmm8
+ pshufd xmm10,xmm5,093h
+ pxor xmm3,xmm9
+ pshufd xmm11,xmm2,093h
+ pxor xmm5,xmm10
+ pshufd xmm12,xmm6,093h
+ pxor xmm2,xmm11
+ pshufd xmm13,xmm1,093h
+ pxor xmm6,xmm12
+ pshufd xmm14,xmm4,093h
+ pxor xmm1,xmm13
+ pxor xmm4,xmm14
+
+ pxor xmm8,xmm15
+ pxor xmm7,xmm4
+ pxor xmm8,xmm4
+ pshufd xmm15,xmm15,04Eh
+ pxor xmm9,xmm0
+ pshufd xmm0,xmm0,04Eh
+ pxor xmm12,xmm2
+ pxor xmm15,xmm7
+ pxor xmm13,xmm6
+ pxor xmm0,xmm8
+ pxor xmm11,xmm5
+ pshufd xmm7,xmm2,04Eh
+ pxor xmm14,xmm1
+ pshufd xmm8,xmm6,04Eh
+ pxor xmm10,xmm3
+ pshufd xmm2,xmm5,04Eh
+ pxor xmm10,xmm4
+ pshufd xmm6,xmm4,04Eh
+ pxor xmm11,xmm4
+ pshufd xmm5,xmm1,04Eh
+ pxor xmm7,xmm11
+ pshufd xmm1,xmm3,04Eh
+ pxor xmm8,xmm12
+ pxor xmm2,xmm10
+ pxor xmm6,xmm14
+ pxor xmm5,xmm13
+ movdqa xmm3,xmm7
+ pxor xmm1,xmm9
+ movdqa xmm4,xmm8
+ movdqa xmm7,XMMWORD PTR[48+r11]
+ jnz $L$enc_loop
+ movdqa xmm7,XMMWORD PTR[64+r11]
+ jmp $L$enc_loop
+ALIGN 16
+$L$enc_done::
+ movdqa xmm7,XMMWORD PTR[r11]
+ movdqa xmm8,XMMWORD PTR[16+r11]
+ movdqa xmm9,xmm1
+ psrlq xmm1,1
+ movdqa xmm10,xmm2
+ psrlq xmm2,1
+ pxor xmm1,xmm4
+ pxor xmm2,xmm6
+ pand xmm1,xmm7
+ pand xmm2,xmm7
+ pxor xmm4,xmm1
+ psllq xmm1,1
+ pxor xmm6,xmm2
+ psllq xmm2,1
+ pxor xmm1,xmm9
+ pxor xmm2,xmm10
+ movdqa xmm9,xmm3
+ psrlq xmm3,1
+ movdqa xmm10,xmm15
+ psrlq xmm15,1
+ pxor xmm3,xmm5
+ pxor xmm15,xmm0
+ pand xmm3,xmm7
+ pand xmm15,xmm7
+ pxor xmm5,xmm3
+ psllq xmm3,1
+ pxor xmm0,xmm15
+ psllq xmm15,1
+ pxor xmm3,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm7,XMMWORD PTR[32+r11]
+ movdqa xmm9,xmm6
+ psrlq xmm6,2
+ movdqa xmm10,xmm2
+ psrlq xmm2,2
+ pxor xmm6,xmm4
+ pxor xmm2,xmm1
+ pand xmm6,xmm8
+ pand xmm2,xmm8
+ pxor xmm4,xmm6
+ psllq xmm6,2
+ pxor xmm1,xmm2
+ psllq xmm2,2
+ pxor xmm6,xmm9
+ pxor xmm2,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,2
+ movdqa xmm10,xmm15
+ psrlq xmm15,2
+ pxor xmm0,xmm5
+ pxor xmm15,xmm3
+ pand xmm0,xmm8
+ pand xmm15,xmm8
+ pxor xmm5,xmm0
+ psllq xmm0,2
+ pxor xmm3,xmm15
+ psllq xmm15,2
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm9,xmm5
+ psrlq xmm5,4
+ movdqa xmm10,xmm3
+ psrlq xmm3,4
+ pxor xmm5,xmm4
+ pxor xmm3,xmm1
+ pand xmm5,xmm7
+ pand xmm3,xmm7
+ pxor xmm4,xmm5
+ psllq xmm5,4
+ pxor xmm1,xmm3
+ psllq xmm3,4
+ pxor xmm5,xmm9
+ pxor xmm3,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,4
+ movdqa xmm10,xmm15
+ psrlq xmm15,4
+ pxor xmm0,xmm6
+ pxor xmm15,xmm2
+ pand xmm0,xmm7
+ pand xmm15,xmm7
+ pxor xmm6,xmm0
+ psllq xmm0,4
+ pxor xmm2,xmm15
+ psllq xmm15,4
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm7,XMMWORD PTR[rax]
+ pxor xmm3,xmm7
+ pxor xmm5,xmm7
+ pxor xmm2,xmm7
+ pxor xmm6,xmm7
+ pxor xmm1,xmm7
+ pxor xmm4,xmm7
+ pxor xmm15,xmm7
+ pxor xmm0,xmm7
+ DB 0F3h,0C3h ;repret
+_bsaes_encrypt8 ENDP
+
+
+ALIGN 64
+_bsaes_decrypt8 PROC PRIVATE
+ lea r11,QWORD PTR[$L$BS0]
+
+ movdqa xmm8,XMMWORD PTR[rax]
+ lea rax,QWORD PTR[16+rax]
+ movdqa xmm7,XMMWORD PTR[((-48))+r11]
+ pxor xmm15,xmm8
+ pxor xmm0,xmm8
+DB 102,68,15,56,0,255
+ pxor xmm1,xmm8
+DB 102,15,56,0,199
+ pxor xmm2,xmm8
+DB 102,15,56,0,207
+ pxor xmm3,xmm8
+DB 102,15,56,0,215
+ pxor xmm4,xmm8
+DB 102,15,56,0,223
+ pxor xmm5,xmm8
+DB 102,15,56,0,231
+ pxor xmm6,xmm8
+DB 102,15,56,0,239
+DB 102,15,56,0,247
+ movdqa xmm7,XMMWORD PTR[r11]
+ movdqa xmm8,XMMWORD PTR[16+r11]
+ movdqa xmm9,xmm5
+ psrlq xmm5,1
+ movdqa xmm10,xmm3
+ psrlq xmm3,1
+ pxor xmm5,xmm6
+ pxor xmm3,xmm4
+ pand xmm5,xmm7
+ pand xmm3,xmm7
+ pxor xmm6,xmm5
+ psllq xmm5,1
+ pxor xmm4,xmm3
+ psllq xmm3,1
+ pxor xmm5,xmm9
+ pxor xmm3,xmm10
+ movdqa xmm9,xmm1
+ psrlq xmm1,1
+ movdqa xmm10,xmm15
+ psrlq xmm15,1
+ pxor xmm1,xmm2
+ pxor xmm15,xmm0
+ pand xmm1,xmm7
+ pand xmm15,xmm7
+ pxor xmm2,xmm1
+ psllq xmm1,1
+ pxor xmm0,xmm15
+ psllq xmm15,1
+ pxor xmm1,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm7,XMMWORD PTR[32+r11]
+ movdqa xmm9,xmm4
+ psrlq xmm4,2
+ movdqa xmm10,xmm3
+ psrlq xmm3,2
+ pxor xmm4,xmm6
+ pxor xmm3,xmm5
+ pand xmm4,xmm8
+ pand xmm3,xmm8
+ pxor xmm6,xmm4
+ psllq xmm4,2
+ pxor xmm5,xmm3
+ psllq xmm3,2
+ pxor xmm4,xmm9
+ pxor xmm3,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,2
+ movdqa xmm10,xmm15
+ psrlq xmm15,2
+ pxor xmm0,xmm2
+ pxor xmm15,xmm1
+ pand xmm0,xmm8
+ pand xmm15,xmm8
+ pxor xmm2,xmm0
+ psllq xmm0,2
+ pxor xmm1,xmm15
+ psllq xmm15,2
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm9,xmm2
+ psrlq xmm2,4
+ movdqa xmm10,xmm1
+ psrlq xmm1,4
+ pxor xmm2,xmm6
+ pxor xmm1,xmm5
+ pand xmm2,xmm7
+ pand xmm1,xmm7
+ pxor xmm6,xmm2
+ psllq xmm2,4
+ pxor xmm5,xmm1
+ psllq xmm1,4
+ pxor xmm2,xmm9
+ pxor xmm1,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,4
+ movdqa xmm10,xmm15
+ psrlq xmm15,4
+ pxor xmm0,xmm4
+ pxor xmm15,xmm3
+ pand xmm0,xmm7
+ pand xmm15,xmm7
+ pxor xmm4,xmm0
+ psllq xmm0,4
+ pxor xmm3,xmm15
+ psllq xmm15,4
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ dec r10d
+ jmp $L$dec_sbox
+ALIGN 16
+$L$dec_loop::
+ pxor xmm15,XMMWORD PTR[rax]
+ pxor xmm0,XMMWORD PTR[16+rax]
+DB 102,68,15,56,0,255
+ pxor xmm1,XMMWORD PTR[32+rax]
+DB 102,15,56,0,199
+ pxor xmm2,XMMWORD PTR[48+rax]
+DB 102,15,56,0,207
+ pxor xmm3,XMMWORD PTR[64+rax]
+DB 102,15,56,0,215
+ pxor xmm4,XMMWORD PTR[80+rax]
+DB 102,15,56,0,223
+ pxor xmm5,XMMWORD PTR[96+rax]
+DB 102,15,56,0,231
+ pxor xmm6,XMMWORD PTR[112+rax]
+DB 102,15,56,0,239
+ lea rax,QWORD PTR[128+rax]
+DB 102,15,56,0,247
+$L$dec_sbox::
+ pxor xmm2,xmm3
+
+ pxor xmm3,xmm6
+ pxor xmm1,xmm6
+ pxor xmm5,xmm3
+ pxor xmm6,xmm5
+ pxor xmm0,xmm6
+
+ pxor xmm15,xmm0
+ pxor xmm1,xmm4
+ pxor xmm2,xmm15
+ pxor xmm4,xmm15
+ pxor xmm0,xmm2
+ movdqa xmm10,xmm2
+ movdqa xmm9,xmm6
+ movdqa xmm8,xmm0
+ movdqa xmm12,xmm3
+ movdqa xmm11,xmm4
+
+ pxor xmm10,xmm15
+ pxor xmm9,xmm3
+ pxor xmm8,xmm5
+ movdqa xmm13,xmm10
+ pxor xmm12,xmm15
+ movdqa xmm7,xmm9
+ pxor xmm11,xmm1
+ movdqa xmm14,xmm10
+
+ por xmm9,xmm8
+ por xmm10,xmm11
+ pxor xmm14,xmm7
+ pand xmm13,xmm11
+ pxor xmm11,xmm8
+ pand xmm7,xmm8
+ pand xmm14,xmm11
+ movdqa xmm11,xmm5
+ pxor xmm11,xmm1
+ pand xmm12,xmm11
+ pxor xmm10,xmm12
+ pxor xmm9,xmm12
+ movdqa xmm12,xmm2
+ movdqa xmm11,xmm0
+ pxor xmm12,xmm6
+ pxor xmm11,xmm4
+ movdqa xmm8,xmm12
+ pand xmm12,xmm11
+ por xmm8,xmm11
+ pxor xmm7,xmm12
+ pxor xmm10,xmm14
+ pxor xmm9,xmm13
+ pxor xmm8,xmm14
+ movdqa xmm11,xmm3
+ pxor xmm7,xmm13
+ movdqa xmm12,xmm15
+ pxor xmm8,xmm13
+ movdqa xmm13,xmm6
+ pand xmm11,xmm5
+ movdqa xmm14,xmm2
+ pand xmm12,xmm1
+ pand xmm13,xmm0
+ por xmm14,xmm4
+ pxor xmm10,xmm11
+ pxor xmm9,xmm12
+ pxor xmm8,xmm13
+ pxor xmm7,xmm14
+
+
+
+
+
+ movdqa xmm11,xmm10
+ pand xmm10,xmm8
+ pxor xmm11,xmm9
+
+ movdqa xmm13,xmm7
+ movdqa xmm14,xmm11
+ pxor xmm13,xmm10
+ pand xmm14,xmm13
+
+ movdqa xmm12,xmm8
+ pxor xmm14,xmm9
+ pxor xmm12,xmm7
+
+ pxor xmm10,xmm9
+
+ pand xmm12,xmm10
+
+ movdqa xmm9,xmm13
+ pxor xmm12,xmm7
+
+ pxor xmm9,xmm12
+ pxor xmm8,xmm12
+
+ pand xmm9,xmm7
+
+ pxor xmm13,xmm9
+ pxor xmm8,xmm9
+
+ pand xmm13,xmm14
+
+ pxor xmm13,xmm11
+ movdqa xmm11,xmm4
+ movdqa xmm7,xmm0
+ movdqa xmm9,xmm14
+ pxor xmm9,xmm13
+ pand xmm9,xmm4
+ pxor xmm4,xmm0
+ pand xmm0,xmm14
+ pand xmm4,xmm13
+ pxor xmm4,xmm0
+ pxor xmm0,xmm9
+ pxor xmm11,xmm1
+ pxor xmm7,xmm5
+ pxor xmm14,xmm12
+ pxor xmm13,xmm8
+ movdqa xmm10,xmm14
+ movdqa xmm9,xmm12
+ pxor xmm10,xmm13
+ pxor xmm9,xmm8
+ pand xmm10,xmm11
+ pand xmm9,xmm1
+ pxor xmm11,xmm7
+ pxor xmm1,xmm5
+ pand xmm7,xmm14
+ pand xmm5,xmm12
+ pand xmm11,xmm13
+ pand xmm1,xmm8
+ pxor xmm7,xmm11
+ pxor xmm1,xmm5
+ pxor xmm11,xmm10
+ pxor xmm5,xmm9
+ pxor xmm4,xmm11
+ pxor xmm1,xmm11
+ pxor xmm0,xmm7
+ pxor xmm5,xmm7
+
+ movdqa xmm11,xmm2
+ movdqa xmm7,xmm6
+ pxor xmm11,xmm15
+ pxor xmm7,xmm3
+ movdqa xmm10,xmm14
+ movdqa xmm9,xmm12
+ pxor xmm10,xmm13
+ pxor xmm9,xmm8
+ pand xmm10,xmm11
+ pand xmm9,xmm15
+ pxor xmm11,xmm7
+ pxor xmm15,xmm3
+ pand xmm7,xmm14
+ pand xmm3,xmm12
+ pand xmm11,xmm13
+ pand xmm15,xmm8
+ pxor xmm7,xmm11
+ pxor xmm15,xmm3
+ pxor xmm11,xmm10
+ pxor xmm3,xmm9
+ pxor xmm14,xmm12
+ pxor xmm13,xmm8
+ movdqa xmm10,xmm14
+ pxor xmm10,xmm13
+ pand xmm10,xmm2
+ pxor xmm2,xmm6
+ pand xmm6,xmm14
+ pand xmm2,xmm13
+ pxor xmm2,xmm6
+ pxor xmm6,xmm10
+ pxor xmm2,xmm11
+ pxor xmm15,xmm11
+ pxor xmm6,xmm7
+ pxor xmm3,xmm7
+ pxor xmm0,xmm6
+ pxor xmm5,xmm4
+
+ pxor xmm3,xmm0
+ pxor xmm1,xmm6
+ pxor xmm4,xmm6
+ pxor xmm3,xmm1
+ pxor xmm6,xmm15
+ pxor xmm3,xmm4
+ pxor xmm2,xmm5
+ pxor xmm5,xmm0
+ pxor xmm2,xmm3
+
+ pxor xmm3,xmm15
+ pxor xmm6,xmm2
+ dec r10d
+ jl $L$dec_done
+
+ pshufd xmm7,xmm15,04Eh
+ pshufd xmm13,xmm2,04Eh
+ pxor xmm7,xmm15
+ pshufd xmm14,xmm4,04Eh
+ pxor xmm13,xmm2
+ pshufd xmm8,xmm0,04Eh
+ pxor xmm14,xmm4
+ pshufd xmm9,xmm5,04Eh
+ pxor xmm8,xmm0
+ pshufd xmm10,xmm3,04Eh
+ pxor xmm9,xmm5
+ pxor xmm15,xmm13
+ pxor xmm0,xmm13
+ pshufd xmm11,xmm1,04Eh
+ pxor xmm10,xmm3
+ pxor xmm5,xmm7
+ pxor xmm3,xmm8
+ pshufd xmm12,xmm6,04Eh
+ pxor xmm11,xmm1
+ pxor xmm0,xmm14
+ pxor xmm1,xmm9
+ pxor xmm12,xmm6
+
+ pxor xmm5,xmm14
+ pxor xmm3,xmm13
+ pxor xmm1,xmm13
+ pxor xmm6,xmm10
+ pxor xmm2,xmm11
+ pxor xmm1,xmm14
+ pxor xmm6,xmm14
+ pxor xmm4,xmm12
+ pshufd xmm7,xmm15,093h
+ pshufd xmm8,xmm0,093h
+ pxor xmm15,xmm7
+ pshufd xmm9,xmm5,093h
+ pxor xmm0,xmm8
+ pshufd xmm10,xmm3,093h
+ pxor xmm5,xmm9
+ pshufd xmm11,xmm1,093h
+ pxor xmm3,xmm10
+ pshufd xmm12,xmm6,093h
+ pxor xmm1,xmm11
+ pshufd xmm13,xmm2,093h
+ pxor xmm6,xmm12
+ pshufd xmm14,xmm4,093h
+ pxor xmm2,xmm13
+ pxor xmm4,xmm14
+
+ pxor xmm8,xmm15
+ pxor xmm7,xmm4
+ pxor xmm8,xmm4
+ pshufd xmm15,xmm15,04Eh
+ pxor xmm9,xmm0
+ pshufd xmm0,xmm0,04Eh
+ pxor xmm12,xmm1
+ pxor xmm15,xmm7
+ pxor xmm13,xmm6
+ pxor xmm0,xmm8
+ pxor xmm11,xmm3
+ pshufd xmm7,xmm1,04Eh
+ pxor xmm14,xmm2
+ pshufd xmm8,xmm6,04Eh
+ pxor xmm10,xmm5
+ pshufd xmm1,xmm3,04Eh
+ pxor xmm10,xmm4
+ pshufd xmm6,xmm4,04Eh
+ pxor xmm11,xmm4
+ pshufd xmm3,xmm2,04Eh
+ pxor xmm7,xmm11
+ pshufd xmm2,xmm5,04Eh
+ pxor xmm8,xmm12
+ pxor xmm10,xmm1
+ pxor xmm6,xmm14
+ pxor xmm13,xmm3
+ movdqa xmm3,xmm7
+ pxor xmm2,xmm9
+ movdqa xmm5,xmm13
+ movdqa xmm4,xmm8
+ movdqa xmm1,xmm2
+ movdqa xmm2,xmm10
+ movdqa xmm7,XMMWORD PTR[((-16))+r11]
+ jnz $L$dec_loop
+ movdqa xmm7,XMMWORD PTR[((-32))+r11]
+ jmp $L$dec_loop
+ALIGN 16
+$L$dec_done::
+ movdqa xmm7,XMMWORD PTR[r11]
+ movdqa xmm8,XMMWORD PTR[16+r11]
+ movdqa xmm9,xmm2
+ psrlq xmm2,1
+ movdqa xmm10,xmm1
+ psrlq xmm1,1
+ pxor xmm2,xmm4
+ pxor xmm1,xmm6
+ pand xmm2,xmm7
+ pand xmm1,xmm7
+ pxor xmm4,xmm2
+ psllq xmm2,1
+ pxor xmm6,xmm1
+ psllq xmm1,1
+ pxor xmm2,xmm9
+ pxor xmm1,xmm10
+ movdqa xmm9,xmm5
+ psrlq xmm5,1
+ movdqa xmm10,xmm15
+ psrlq xmm15,1
+ pxor xmm5,xmm3
+ pxor xmm15,xmm0
+ pand xmm5,xmm7
+ pand xmm15,xmm7
+ pxor xmm3,xmm5
+ psllq xmm5,1
+ pxor xmm0,xmm15
+ psllq xmm15,1
+ pxor xmm5,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm7,XMMWORD PTR[32+r11]
+ movdqa xmm9,xmm6
+ psrlq xmm6,2
+ movdqa xmm10,xmm1
+ psrlq xmm1,2
+ pxor xmm6,xmm4
+ pxor xmm1,xmm2
+ pand xmm6,xmm8
+ pand xmm1,xmm8
+ pxor xmm4,xmm6
+ psllq xmm6,2
+ pxor xmm2,xmm1
+ psllq xmm1,2
+ pxor xmm6,xmm9
+ pxor xmm1,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,2
+ movdqa xmm10,xmm15
+ psrlq xmm15,2
+ pxor xmm0,xmm3
+ pxor xmm15,xmm5
+ pand xmm0,xmm8
+ pand xmm15,xmm8
+ pxor xmm3,xmm0
+ psllq xmm0,2
+ pxor xmm5,xmm15
+ psllq xmm15,2
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm9,xmm3
+ psrlq xmm3,4
+ movdqa xmm10,xmm5
+ psrlq xmm5,4
+ pxor xmm3,xmm4
+ pxor xmm5,xmm2
+ pand xmm3,xmm7
+ pand xmm5,xmm7
+ pxor xmm4,xmm3
+ psllq xmm3,4
+ pxor xmm2,xmm5
+ psllq xmm5,4
+ pxor xmm3,xmm9
+ pxor xmm5,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,4
+ movdqa xmm10,xmm15
+ psrlq xmm15,4
+ pxor xmm0,xmm6
+ pxor xmm15,xmm1
+ pand xmm0,xmm7
+ pand xmm15,xmm7
+ pxor xmm6,xmm0
+ psllq xmm0,4
+ pxor xmm1,xmm15
+ psllq xmm15,4
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm7,XMMWORD PTR[rax]
+ pxor xmm5,xmm7
+ pxor xmm3,xmm7
+ pxor xmm1,xmm7
+ pxor xmm6,xmm7
+ pxor xmm2,xmm7
+ pxor xmm4,xmm7
+ pxor xmm15,xmm7
+ pxor xmm0,xmm7
+ DB 0F3h,0C3h ;repret
+_bsaes_decrypt8 ENDP
+
+ALIGN 16
+_bsaes_key_convert PROC PRIVATE
+ lea r11,QWORD PTR[$L$masks]
+ movdqu xmm7,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ movdqa xmm0,XMMWORD PTR[r11]
+ movdqa xmm1,XMMWORD PTR[16+r11]
+ movdqa xmm2,XMMWORD PTR[32+r11]
+ movdqa xmm3,XMMWORD PTR[48+r11]
+ movdqa xmm4,XMMWORD PTR[64+r11]
+ pcmpeqd xmm5,xmm5
+
+ movdqu xmm6,XMMWORD PTR[rcx]
+ movdqa XMMWORD PTR[rax],xmm7
+ lea rax,QWORD PTR[16+rax]
+ dec r10d
+ jmp $L$key_loop
+ALIGN 16
+$L$key_loop::
+DB 102,15,56,0,244
+
+ movdqa xmm8,xmm0
+ movdqa xmm9,xmm1
+
+ pand xmm8,xmm6
+ pand xmm9,xmm6
+ movdqa xmm10,xmm2
+ pcmpeqb xmm8,xmm0
+ psllq xmm0,4
+ movdqa xmm11,xmm3
+ pcmpeqb xmm9,xmm1
+ psllq xmm1,4
+
+ pand xmm10,xmm6
+ pand xmm11,xmm6
+ movdqa xmm12,xmm0
+ pcmpeqb xmm10,xmm2
+ psllq xmm2,4
+ movdqa xmm13,xmm1
+ pcmpeqb xmm11,xmm3
+ psllq xmm3,4
+
+ movdqa xmm14,xmm2
+ movdqa xmm15,xmm3
+ pxor xmm8,xmm5
+ pxor xmm9,xmm5
+
+ pand xmm12,xmm6
+ pand xmm13,xmm6
+ movdqa XMMWORD PTR[rax],xmm8
+ pcmpeqb xmm12,xmm0
+ psrlq xmm0,4
+ movdqa XMMWORD PTR[16+rax],xmm9
+ pcmpeqb xmm13,xmm1
+ psrlq xmm1,4
+ lea rcx,QWORD PTR[16+rcx]
+
+ pand xmm14,xmm6
+ pand xmm15,xmm6
+ movdqa XMMWORD PTR[32+rax],xmm10
+ pcmpeqb xmm14,xmm2
+ psrlq xmm2,4
+ movdqa XMMWORD PTR[48+rax],xmm11
+ pcmpeqb xmm15,xmm3
+ psrlq xmm3,4
+ movdqu xmm6,XMMWORD PTR[rcx]
+
+ pxor xmm13,xmm5
+ pxor xmm14,xmm5
+ movdqa XMMWORD PTR[64+rax],xmm12
+ movdqa XMMWORD PTR[80+rax],xmm13
+ movdqa XMMWORD PTR[96+rax],xmm14
+ movdqa XMMWORD PTR[112+rax],xmm15
+ lea rax,QWORD PTR[128+rax]
+ dec r10d
+ jnz $L$key_loop
+
+ movdqa xmm7,XMMWORD PTR[80+r11]
+
+ DB 0F3h,0C3h ;repret
+_bsaes_key_convert ENDP
+EXTERN asm_AES_cbc_encrypt:NEAR
+PUBLIC bsaes_cbc_encrypt
+
+ALIGN 16
+bsaes_cbc_encrypt PROC PUBLIC
+ mov r11d,DWORD PTR[48+rsp]
+ cmp r11d,0
+ jne asm_AES_cbc_encrypt
+ cmp r8,128
+ jb asm_AES_cbc_encrypt
+
+ mov rax,rsp
+$L$cbc_dec_prologue::
+ push rbp
+ push rbx
+ push r12
+ push r13
+ push r14
+ push r15
+ lea rsp,QWORD PTR[((-72))+rsp]
+ mov r10,QWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[((-160))+rsp]
+ movaps XMMWORD PTR[64+rsp],xmm6
+ movaps XMMWORD PTR[80+rsp],xmm7
+ movaps XMMWORD PTR[96+rsp],xmm8
+ movaps XMMWORD PTR[112+rsp],xmm9
+ movaps XMMWORD PTR[128+rsp],xmm10
+ movaps XMMWORD PTR[144+rsp],xmm11
+ movaps XMMWORD PTR[160+rsp],xmm12
+ movaps XMMWORD PTR[176+rsp],xmm13
+ movaps XMMWORD PTR[192+rsp],xmm14
+ movaps XMMWORD PTR[208+rsp],xmm15
+$L$cbc_dec_body::
+ mov rbp,rsp
+ mov eax,DWORD PTR[240+r9]
+ mov r12,rcx
+ mov r13,rdx
+ mov r14,r8
+ mov r15,r9
+ mov rbx,r10
+ shr r14,4
+
+ mov edx,eax
+ shl rax,7
+ sub rax,96
+ sub rsp,rax
+
+ mov rax,rsp
+ mov rcx,r15
+ mov r10d,edx
+ call _bsaes_key_convert
+ pxor xmm7,XMMWORD PTR[rsp]
+ movdqa XMMWORD PTR[rax],xmm6
+ movdqa XMMWORD PTR[rsp],xmm7
+
+ movdqu xmm14,XMMWORD PTR[rbx]
+ sub r14,8
+$L$cbc_dec_loop::
+ movdqu xmm15,XMMWORD PTR[r12]
+ movdqu xmm0,XMMWORD PTR[16+r12]
+ movdqu xmm1,XMMWORD PTR[32+r12]
+ movdqu xmm2,XMMWORD PTR[48+r12]
+ movdqu xmm3,XMMWORD PTR[64+r12]
+ movdqu xmm4,XMMWORD PTR[80+r12]
+ mov rax,rsp
+ movdqu xmm5,XMMWORD PTR[96+r12]
+ mov r10d,edx
+ movdqu xmm6,XMMWORD PTR[112+r12]
+ movdqa XMMWORD PTR[32+rbp],xmm14
+
+ call _bsaes_decrypt8
+
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm5,xmm8
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm3,xmm9
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ pxor xmm1,xmm10
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ pxor xmm6,xmm11
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ pxor xmm2,xmm12
+ movdqu xmm14,XMMWORD PTR[112+r12]
+ pxor xmm4,xmm13
+ movdqu XMMWORD PTR[r13],xmm15
+ lea r12,QWORD PTR[128+r12]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ movdqu XMMWORD PTR[48+r13],xmm3
+ movdqu XMMWORD PTR[64+r13],xmm1
+ movdqu XMMWORD PTR[80+r13],xmm6
+ movdqu XMMWORD PTR[96+r13],xmm2
+ movdqu XMMWORD PTR[112+r13],xmm4
+ lea r13,QWORD PTR[128+r13]
+ sub r14,8
+ jnc $L$cbc_dec_loop
+
+ add r14,8
+ jz $L$cbc_dec_done
+
+ movdqu xmm15,XMMWORD PTR[r12]
+ mov rax,rsp
+ mov r10d,edx
+ cmp r14,2
+ jb $L$cbc_dec_one
+ movdqu xmm0,XMMWORD PTR[16+r12]
+ je $L$cbc_dec_two
+ movdqu xmm1,XMMWORD PTR[32+r12]
+ cmp r14,4
+ jb $L$cbc_dec_three
+ movdqu xmm2,XMMWORD PTR[48+r12]
+ je $L$cbc_dec_four
+ movdqu xmm3,XMMWORD PTR[64+r12]
+ cmp r14,6
+ jb $L$cbc_dec_five
+ movdqu xmm4,XMMWORD PTR[80+r12]
+ je $L$cbc_dec_six
+ movdqu xmm5,XMMWORD PTR[96+r12]
+ movdqa XMMWORD PTR[32+rbp],xmm14
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm5,xmm8
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm3,xmm9
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ pxor xmm1,xmm10
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ pxor xmm6,xmm11
+ movdqu xmm14,XMMWORD PTR[96+r12]
+ pxor xmm2,xmm12
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ movdqu XMMWORD PTR[48+r13],xmm3
+ movdqu XMMWORD PTR[64+r13],xmm1
+ movdqu XMMWORD PTR[80+r13],xmm6
+ movdqu XMMWORD PTR[96+r13],xmm2
+ jmp $L$cbc_dec_done
+ALIGN 16
+$L$cbc_dec_six::
+ movdqa XMMWORD PTR[32+rbp],xmm14
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm5,xmm8
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm3,xmm9
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ pxor xmm1,xmm10
+ movdqu xmm14,XMMWORD PTR[80+r12]
+ pxor xmm6,xmm11
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ movdqu XMMWORD PTR[48+r13],xmm3
+ movdqu XMMWORD PTR[64+r13],xmm1
+ movdqu XMMWORD PTR[80+r13],xmm6
+ jmp $L$cbc_dec_done
+ALIGN 16
+$L$cbc_dec_five::
+ movdqa XMMWORD PTR[32+rbp],xmm14
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm5,xmm8
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm3,xmm9
+ movdqu xmm14,XMMWORD PTR[64+r12]
+ pxor xmm1,xmm10
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ movdqu XMMWORD PTR[48+r13],xmm3
+ movdqu XMMWORD PTR[64+r13],xmm1
+ jmp $L$cbc_dec_done
+ALIGN 16
+$L$cbc_dec_four::
+ movdqa XMMWORD PTR[32+rbp],xmm14
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm5,xmm8
+ movdqu xmm14,XMMWORD PTR[48+r12]
+ pxor xmm3,xmm9
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ movdqu XMMWORD PTR[48+r13],xmm3
+ jmp $L$cbc_dec_done
+ALIGN 16
+$L$cbc_dec_three::
+ movdqa XMMWORD PTR[32+rbp],xmm14
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu xmm14,XMMWORD PTR[32+r12]
+ pxor xmm5,xmm8
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ jmp $L$cbc_dec_done
+ALIGN 16
+$L$cbc_dec_two::
+ movdqa XMMWORD PTR[32+rbp],xmm14
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm14,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ jmp $L$cbc_dec_done
+ALIGN 16
+$L$cbc_dec_one::
+ lea rcx,QWORD PTR[r12]
+ lea rdx,QWORD PTR[32+rbp]
+ lea r8,QWORD PTR[r15]
+ call asm_AES_decrypt
+ pxor xmm14,XMMWORD PTR[32+rbp]
+ movdqu XMMWORD PTR[r13],xmm14
+ movdqa xmm14,xmm15
+
+$L$cbc_dec_done::
+ movdqu XMMWORD PTR[rbx],xmm14
+ lea rax,QWORD PTR[rsp]
+ pxor xmm0,xmm0
+$L$cbc_dec_bzero::
+ movdqa XMMWORD PTR[rax],xmm0
+ movdqa XMMWORD PTR[16+rax],xmm0
+ lea rax,QWORD PTR[32+rax]
+ cmp rbp,rax
+ ja $L$cbc_dec_bzero
+
+ lea rsp,QWORD PTR[rbp]
+ movaps xmm6,XMMWORD PTR[64+rbp]
+ movaps xmm7,XMMWORD PTR[80+rbp]
+ movaps xmm8,XMMWORD PTR[96+rbp]
+ movaps xmm9,XMMWORD PTR[112+rbp]
+ movaps xmm10,XMMWORD PTR[128+rbp]
+ movaps xmm11,XMMWORD PTR[144+rbp]
+ movaps xmm12,XMMWORD PTR[160+rbp]
+ movaps xmm13,XMMWORD PTR[176+rbp]
+ movaps xmm14,XMMWORD PTR[192+rbp]
+ movaps xmm15,XMMWORD PTR[208+rbp]
+ lea rsp,QWORD PTR[160+rbp]
+ mov r15,QWORD PTR[72+rsp]
+ mov r14,QWORD PTR[80+rsp]
+ mov r13,QWORD PTR[88+rsp]
+ mov r12,QWORD PTR[96+rsp]
+ mov rbx,QWORD PTR[104+rsp]
+ mov rax,QWORD PTR[112+rsp]
+ lea rsp,QWORD PTR[120+rsp]
+ mov rbp,rax
+$L$cbc_dec_epilogue::
+ DB 0F3h,0C3h ;repret
+bsaes_cbc_encrypt ENDP
+
+PUBLIC bsaes_ctr32_encrypt_blocks
+
+ALIGN 16
+bsaes_ctr32_encrypt_blocks PROC PUBLIC
+ mov rax,rsp
+$L$ctr_enc_prologue::
+ push rbp
+ push rbx
+ push r12
+ push r13
+ push r14
+ push r15
+ lea rsp,QWORD PTR[((-72))+rsp]
+ mov r10,QWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[((-160))+rsp]
+ movaps XMMWORD PTR[64+rsp],xmm6
+ movaps XMMWORD PTR[80+rsp],xmm7
+ movaps XMMWORD PTR[96+rsp],xmm8
+ movaps XMMWORD PTR[112+rsp],xmm9
+ movaps XMMWORD PTR[128+rsp],xmm10
+ movaps XMMWORD PTR[144+rsp],xmm11
+ movaps XMMWORD PTR[160+rsp],xmm12
+ movaps XMMWORD PTR[176+rsp],xmm13
+ movaps XMMWORD PTR[192+rsp],xmm14
+ movaps XMMWORD PTR[208+rsp],xmm15
+$L$ctr_enc_body::
+ mov rbp,rsp
+ movdqu xmm0,XMMWORD PTR[r10]
+ mov eax,DWORD PTR[240+r9]
+ mov r12,rcx
+ mov r13,rdx
+ mov r14,r8
+ mov r15,r9
+ movdqa XMMWORD PTR[32+rbp],xmm0
+ cmp r8,8
+ jb $L$ctr_enc_short
+
+ mov ebx,eax
+ shl rax,7
+ sub rax,96
+ sub rsp,rax
+
+ mov rax,rsp
+ mov rcx,r15
+ mov r10d,ebx
+ call _bsaes_key_convert
+ pxor xmm7,xmm6
+ movdqa XMMWORD PTR[rax],xmm7
+
+ movdqa xmm8,XMMWORD PTR[rsp]
+ lea r11,QWORD PTR[$L$ADD1]
+ movdqa xmm15,XMMWORD PTR[32+rbp]
+ movdqa xmm7,XMMWORD PTR[((-32))+r11]
+DB 102,68,15,56,0,199
+DB 102,68,15,56,0,255
+ movdqa XMMWORD PTR[rsp],xmm8
+ jmp $L$ctr_enc_loop
+ALIGN 16
+$L$ctr_enc_loop::
+ movdqa XMMWORD PTR[32+rbp],xmm15
+ movdqa xmm0,xmm15
+ movdqa xmm1,xmm15
+ paddd xmm0,XMMWORD PTR[r11]
+ movdqa xmm2,xmm15
+ paddd xmm1,XMMWORD PTR[16+r11]
+ movdqa xmm3,xmm15
+ paddd xmm2,XMMWORD PTR[32+r11]
+ movdqa xmm4,xmm15
+ paddd xmm3,XMMWORD PTR[48+r11]
+ movdqa xmm5,xmm15
+ paddd xmm4,XMMWORD PTR[64+r11]
+ movdqa xmm6,xmm15
+ paddd xmm5,XMMWORD PTR[80+r11]
+ paddd xmm6,XMMWORD PTR[96+r11]
+
+
+
+ movdqa xmm8,XMMWORD PTR[rsp]
+ lea rax,QWORD PTR[16+rsp]
+ movdqa xmm7,XMMWORD PTR[((-16))+r11]
+ pxor xmm15,xmm8
+ pxor xmm0,xmm8
+DB 102,68,15,56,0,255
+ pxor xmm1,xmm8
+DB 102,15,56,0,199
+ pxor xmm2,xmm8
+DB 102,15,56,0,207
+ pxor xmm3,xmm8
+DB 102,15,56,0,215
+ pxor xmm4,xmm8
+DB 102,15,56,0,223
+ pxor xmm5,xmm8
+DB 102,15,56,0,231
+ pxor xmm6,xmm8
+DB 102,15,56,0,239
+ lea r11,QWORD PTR[$L$BS0]
+DB 102,15,56,0,247
+ mov r10d,ebx
+
+ call _bsaes_encrypt8_bitslice
+
+ sub r14,8
+ jc $L$ctr_enc_loop_done
+
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ movdqu xmm14,XMMWORD PTR[112+r12]
+ lea r12,QWORD PTR[128+r12]
+ pxor xmm7,xmm15
+ movdqa xmm15,XMMWORD PTR[32+rbp]
+ pxor xmm0,xmm8
+ movdqu XMMWORD PTR[r13],xmm7
+ pxor xmm3,xmm9
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm5,xmm10
+ movdqu XMMWORD PTR[32+r13],xmm3
+ pxor xmm2,xmm11
+ movdqu XMMWORD PTR[48+r13],xmm5
+ pxor xmm6,xmm12
+ movdqu XMMWORD PTR[64+r13],xmm2
+ pxor xmm1,xmm13
+ movdqu XMMWORD PTR[80+r13],xmm6
+ pxor xmm4,xmm14
+ movdqu XMMWORD PTR[96+r13],xmm1
+ lea r11,QWORD PTR[$L$ADD1]
+ movdqu XMMWORD PTR[112+r13],xmm4
+ lea r13,QWORD PTR[128+r13]
+ paddd xmm15,XMMWORD PTR[112+r11]
+ jnz $L$ctr_enc_loop
+
+ jmp $L$ctr_enc_done
+ALIGN 16
+$L$ctr_enc_loop_done::
+ add r14,8
+ movdqu xmm7,XMMWORD PTR[r12]
+ pxor xmm15,xmm7
+ movdqu XMMWORD PTR[r13],xmm15
+ cmp r14,2
+ jb $L$ctr_enc_done
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm8
+ movdqu XMMWORD PTR[16+r13],xmm0
+ je $L$ctr_enc_done
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm3,xmm9
+ movdqu XMMWORD PTR[32+r13],xmm3
+ cmp r14,4
+ jb $L$ctr_enc_done
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm5,xmm10
+ movdqu XMMWORD PTR[48+r13],xmm5
+ je $L$ctr_enc_done
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ pxor xmm2,xmm11
+ movdqu XMMWORD PTR[64+r13],xmm2
+ cmp r14,6
+ jb $L$ctr_enc_done
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ pxor xmm6,xmm12
+ movdqu XMMWORD PTR[80+r13],xmm6
+ je $L$ctr_enc_done
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ pxor xmm1,xmm13
+ movdqu XMMWORD PTR[96+r13],xmm1
+ jmp $L$ctr_enc_done
+
+ALIGN 16
+$L$ctr_enc_short::
+ lea rcx,QWORD PTR[32+rbp]
+ lea rdx,QWORD PTR[48+rbp]
+ lea r8,QWORD PTR[r15]
+ call asm_AES_encrypt
+ movdqu xmm0,XMMWORD PTR[r12]
+ lea r12,QWORD PTR[16+r12]
+ mov eax,DWORD PTR[44+rbp]
+ bswap eax
+ pxor xmm0,XMMWORD PTR[48+rbp]
+ inc eax
+ movdqu XMMWORD PTR[r13],xmm0
+ bswap eax
+ lea r13,QWORD PTR[16+r13]
+ mov DWORD PTR[44+rsp],eax
+ dec r14
+ jnz $L$ctr_enc_short
+
+$L$ctr_enc_done::
+ lea rax,QWORD PTR[rsp]
+ pxor xmm0,xmm0
+$L$ctr_enc_bzero::
+ movdqa XMMWORD PTR[rax],xmm0
+ movdqa XMMWORD PTR[16+rax],xmm0
+ lea rax,QWORD PTR[32+rax]
+ cmp rbp,rax
+ ja $L$ctr_enc_bzero
+
+ lea rsp,QWORD PTR[rbp]
+ movaps xmm6,XMMWORD PTR[64+rbp]
+ movaps xmm7,XMMWORD PTR[80+rbp]
+ movaps xmm8,XMMWORD PTR[96+rbp]
+ movaps xmm9,XMMWORD PTR[112+rbp]
+ movaps xmm10,XMMWORD PTR[128+rbp]
+ movaps xmm11,XMMWORD PTR[144+rbp]
+ movaps xmm12,XMMWORD PTR[160+rbp]
+ movaps xmm13,XMMWORD PTR[176+rbp]
+ movaps xmm14,XMMWORD PTR[192+rbp]
+ movaps xmm15,XMMWORD PTR[208+rbp]
+ lea rsp,QWORD PTR[160+rbp]
+ mov r15,QWORD PTR[72+rsp]
+ mov r14,QWORD PTR[80+rsp]
+ mov r13,QWORD PTR[88+rsp]
+ mov r12,QWORD PTR[96+rsp]
+ mov rbx,QWORD PTR[104+rsp]
+ mov rax,QWORD PTR[112+rsp]
+ lea rsp,QWORD PTR[120+rsp]
+ mov rbp,rax
+$L$ctr_enc_epilogue::
+ DB 0F3h,0C3h ;repret
+bsaes_ctr32_encrypt_blocks ENDP
+PUBLIC bsaes_xts_encrypt
+
+ALIGN 16
+bsaes_xts_encrypt PROC PUBLIC
+ mov rax,rsp
+$L$xts_enc_prologue::
+ push rbp
+ push rbx
+ push r12
+ push r13
+ push r14
+ push r15
+ lea rsp,QWORD PTR[((-72))+rsp]
+ mov r10,QWORD PTR[160+rsp]
+ mov r11,QWORD PTR[168+rsp]
+ lea rsp,QWORD PTR[((-160))+rsp]
+ movaps XMMWORD PTR[64+rsp],xmm6
+ movaps XMMWORD PTR[80+rsp],xmm7
+ movaps XMMWORD PTR[96+rsp],xmm8
+ movaps XMMWORD PTR[112+rsp],xmm9
+ movaps XMMWORD PTR[128+rsp],xmm10
+ movaps XMMWORD PTR[144+rsp],xmm11
+ movaps XMMWORD PTR[160+rsp],xmm12
+ movaps XMMWORD PTR[176+rsp],xmm13
+ movaps XMMWORD PTR[192+rsp],xmm14
+ movaps XMMWORD PTR[208+rsp],xmm15
+$L$xts_enc_body::
+ mov rbp,rsp
+ mov r12,rcx
+ mov r13,rdx
+ mov r14,r8
+ mov r15,r9
+
+ lea rcx,QWORD PTR[r11]
+ lea rdx,QWORD PTR[32+rbp]
+ lea r8,QWORD PTR[r10]
+ call asm_AES_encrypt
+
+ mov eax,DWORD PTR[240+r15]
+ mov rbx,r14
+
+ mov edx,eax
+ shl rax,7
+ sub rax,96
+ sub rsp,rax
+
+ mov rax,rsp
+ mov rcx,r15
+ mov r10d,edx
+ call _bsaes_key_convert
+ pxor xmm7,xmm6
+ movdqa XMMWORD PTR[rax],xmm7
+
+ and r14,-16
+ sub rsp,080h
+ movdqa xmm6,XMMWORD PTR[32+rbp]
+
+ pxor xmm14,xmm14
+ movdqa xmm12,XMMWORD PTR[$L$xts_magic]
+ pcmpgtd xmm14,xmm6
+
+ sub r14,080h
+ jc $L$xts_enc_short
+ jmp $L$xts_enc_loop
+
+ALIGN 16
+$L$xts_enc_loop::
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm15,xmm6
+ movdqa XMMWORD PTR[rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm0,xmm6
+ movdqa XMMWORD PTR[16+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm7,XMMWORD PTR[r12]
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm1,xmm6
+ movdqa XMMWORD PTR[32+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm15,xmm7
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm2,xmm6
+ movdqa XMMWORD PTR[48+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm0,xmm8
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm3,xmm6
+ movdqa XMMWORD PTR[64+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm1,xmm9
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm4,xmm6
+ movdqa XMMWORD PTR[80+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ pxor xmm2,xmm10
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm5,xmm6
+ movdqa XMMWORD PTR[96+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ pxor xmm3,xmm11
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ pxor xmm4,xmm12
+ movdqu xmm14,XMMWORD PTR[112+r12]
+ lea r12,QWORD PTR[128+r12]
+ movdqa XMMWORD PTR[112+rsp],xmm6
+ pxor xmm5,xmm13
+ lea rax,QWORD PTR[128+rsp]
+ pxor xmm6,xmm14
+ mov r10d,edx
+
+ call _bsaes_encrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm3,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm5,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm3
+ pxor xmm2,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm5
+ pxor xmm6,XMMWORD PTR[80+rsp]
+ movdqu XMMWORD PTR[64+r13],xmm2
+ pxor xmm1,XMMWORD PTR[96+rsp]
+ movdqu XMMWORD PTR[80+r13],xmm6
+ pxor xmm4,XMMWORD PTR[112+rsp]
+ movdqu XMMWORD PTR[96+r13],xmm1
+ movdqu XMMWORD PTR[112+r13],xmm4
+ lea r13,QWORD PTR[128+r13]
+
+ movdqa xmm6,XMMWORD PTR[112+rsp]
+ pxor xmm14,xmm14
+ movdqa xmm12,XMMWORD PTR[$L$xts_magic]
+ pcmpgtd xmm14,xmm6
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+
+ sub r14,080h
+ jnc $L$xts_enc_loop
+
+$L$xts_enc_short::
+ add r14,080h
+ jz $L$xts_enc_done
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm15,xmm6
+ movdqa XMMWORD PTR[rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm0,xmm6
+ movdqa XMMWORD PTR[16+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm7,XMMWORD PTR[r12]
+ cmp r14,16
+ je $L$xts_enc_1
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm1,xmm6
+ movdqa XMMWORD PTR[32+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ cmp r14,32
+ je $L$xts_enc_2
+ pxor xmm15,xmm7
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm2,xmm6
+ movdqa XMMWORD PTR[48+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ cmp r14,48
+ je $L$xts_enc_3
+ pxor xmm0,xmm8
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm3,xmm6
+ movdqa XMMWORD PTR[64+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ cmp r14,64
+ je $L$xts_enc_4
+ pxor xmm1,xmm9
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm4,xmm6
+ movdqa XMMWORD PTR[80+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ cmp r14,80
+ je $L$xts_enc_5
+ pxor xmm2,xmm10
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm5,xmm6
+ movdqa XMMWORD PTR[96+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ cmp r14,96
+ je $L$xts_enc_6
+ pxor xmm3,xmm11
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ pxor xmm4,xmm12
+ movdqa XMMWORD PTR[112+rsp],xmm6
+ lea r12,QWORD PTR[112+r12]
+ pxor xmm5,xmm13
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_encrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm3,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm5,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm3
+ pxor xmm2,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm5
+ pxor xmm6,XMMWORD PTR[80+rsp]
+ movdqu XMMWORD PTR[64+r13],xmm2
+ pxor xmm1,XMMWORD PTR[96+rsp]
+ movdqu XMMWORD PTR[80+r13],xmm6
+ movdqu XMMWORD PTR[96+r13],xmm1
+ lea r13,QWORD PTR[112+r13]
+
+ movdqa xmm6,XMMWORD PTR[112+rsp]
+ jmp $L$xts_enc_done
+ALIGN 16
+$L$xts_enc_6::
+ pxor xmm3,xmm11
+ lea r12,QWORD PTR[96+r12]
+ pxor xmm4,xmm12
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_encrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm3,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm5,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm3
+ pxor xmm2,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm5
+ pxor xmm6,XMMWORD PTR[80+rsp]
+ movdqu XMMWORD PTR[64+r13],xmm2
+ movdqu XMMWORD PTR[80+r13],xmm6
+ lea r13,QWORD PTR[96+r13]
+
+ movdqa xmm6,XMMWORD PTR[96+rsp]
+ jmp $L$xts_enc_done
+ALIGN 16
+$L$xts_enc_5::
+ pxor xmm2,xmm10
+ lea r12,QWORD PTR[80+r12]
+ pxor xmm3,xmm11
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_encrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm3,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm5,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm3
+ pxor xmm2,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm5
+ movdqu XMMWORD PTR[64+r13],xmm2
+ lea r13,QWORD PTR[80+r13]
+
+ movdqa xmm6,XMMWORD PTR[80+rsp]
+ jmp $L$xts_enc_done
+ALIGN 16
+$L$xts_enc_4::
+ pxor xmm1,xmm9
+ lea r12,QWORD PTR[64+r12]
+ pxor xmm2,xmm10
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_encrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm3,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm5,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm3
+ movdqu XMMWORD PTR[48+r13],xmm5
+ lea r13,QWORD PTR[64+r13]
+
+ movdqa xmm6,XMMWORD PTR[64+rsp]
+ jmp $L$xts_enc_done
+ALIGN 16
+$L$xts_enc_3::
+ pxor xmm0,xmm8
+ lea r12,QWORD PTR[48+r12]
+ pxor xmm1,xmm9
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_encrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm3,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm3
+ lea r13,QWORD PTR[48+r13]
+
+ movdqa xmm6,XMMWORD PTR[48+rsp]
+ jmp $L$xts_enc_done
+ALIGN 16
+$L$xts_enc_2::
+ pxor xmm15,xmm7
+ lea r12,QWORD PTR[32+r12]
+ pxor xmm0,xmm8
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_encrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ lea r13,QWORD PTR[32+r13]
+
+ movdqa xmm6,XMMWORD PTR[32+rsp]
+ jmp $L$xts_enc_done
+ALIGN 16
+$L$xts_enc_1::
+ pxor xmm7,xmm15
+ lea r12,QWORD PTR[16+r12]
+ movdqa XMMWORD PTR[32+rbp],xmm7
+ lea rcx,QWORD PTR[32+rbp]
+ lea rdx,QWORD PTR[32+rbp]
+ lea r8,QWORD PTR[r15]
+ call asm_AES_encrypt
+ pxor xmm15,XMMWORD PTR[32+rbp]
+
+
+
+
+
+ movdqu XMMWORD PTR[r13],xmm15
+ lea r13,QWORD PTR[16+r13]
+
+ movdqa xmm6,XMMWORD PTR[16+rsp]
+
+$L$xts_enc_done::
+ and ebx,15
+ jz $L$xts_enc_ret
+ mov rdx,r13
+
+$L$xts_enc_steal::
+ movzx eax,BYTE PTR[r12]
+ movzx ecx,BYTE PTR[((-16))+rdx]
+ lea r12,QWORD PTR[1+r12]
+ mov BYTE PTR[((-16))+rdx],al
+ mov BYTE PTR[rdx],cl
+ lea rdx,QWORD PTR[1+rdx]
+ sub ebx,1
+ jnz $L$xts_enc_steal
+
+ movdqu xmm15,XMMWORD PTR[((-16))+r13]
+ lea rcx,QWORD PTR[32+rbp]
+ pxor xmm15,xmm6
+ lea rdx,QWORD PTR[32+rbp]
+ movdqa XMMWORD PTR[32+rbp],xmm15
+ lea r8,QWORD PTR[r15]
+ call asm_AES_encrypt
+ pxor xmm6,XMMWORD PTR[32+rbp]
+ movdqu XMMWORD PTR[(-16)+r13],xmm6
+
+$L$xts_enc_ret::
+ lea rax,QWORD PTR[rsp]
+ pxor xmm0,xmm0
+$L$xts_enc_bzero::
+ movdqa XMMWORD PTR[rax],xmm0
+ movdqa XMMWORD PTR[16+rax],xmm0
+ lea rax,QWORD PTR[32+rax]
+ cmp rbp,rax
+ ja $L$xts_enc_bzero
+
+ lea rsp,QWORD PTR[rbp]
+ movaps xmm6,XMMWORD PTR[64+rbp]
+ movaps xmm7,XMMWORD PTR[80+rbp]
+ movaps xmm8,XMMWORD PTR[96+rbp]
+ movaps xmm9,XMMWORD PTR[112+rbp]
+ movaps xmm10,XMMWORD PTR[128+rbp]
+ movaps xmm11,XMMWORD PTR[144+rbp]
+ movaps xmm12,XMMWORD PTR[160+rbp]
+ movaps xmm13,XMMWORD PTR[176+rbp]
+ movaps xmm14,XMMWORD PTR[192+rbp]
+ movaps xmm15,XMMWORD PTR[208+rbp]
+ lea rsp,QWORD PTR[160+rbp]
+ mov r15,QWORD PTR[72+rsp]
+ mov r14,QWORD PTR[80+rsp]
+ mov r13,QWORD PTR[88+rsp]
+ mov r12,QWORD PTR[96+rsp]
+ mov rbx,QWORD PTR[104+rsp]
+ mov rax,QWORD PTR[112+rsp]
+ lea rsp,QWORD PTR[120+rsp]
+ mov rbp,rax
+$L$xts_enc_epilogue::
+ DB 0F3h,0C3h ;repret
+bsaes_xts_encrypt ENDP
+
+PUBLIC bsaes_xts_decrypt
+
+ALIGN 16
+bsaes_xts_decrypt PROC PUBLIC
+ mov rax,rsp
+$L$xts_dec_prologue::
+ push rbp
+ push rbx
+ push r12
+ push r13
+ push r14
+ push r15
+ lea rsp,QWORD PTR[((-72))+rsp]
+ mov r10,QWORD PTR[160+rsp]
+ mov r11,QWORD PTR[168+rsp]
+ lea rsp,QWORD PTR[((-160))+rsp]
+ movaps XMMWORD PTR[64+rsp],xmm6
+ movaps XMMWORD PTR[80+rsp],xmm7
+ movaps XMMWORD PTR[96+rsp],xmm8
+ movaps XMMWORD PTR[112+rsp],xmm9
+ movaps XMMWORD PTR[128+rsp],xmm10
+ movaps XMMWORD PTR[144+rsp],xmm11
+ movaps XMMWORD PTR[160+rsp],xmm12
+ movaps XMMWORD PTR[176+rsp],xmm13
+ movaps XMMWORD PTR[192+rsp],xmm14
+ movaps XMMWORD PTR[208+rsp],xmm15
+$L$xts_dec_body::
+ mov rbp,rsp
+ mov r12,rcx
+ mov r13,rdx
+ mov r14,r8
+ mov r15,r9
+
+ lea rcx,QWORD PTR[r11]
+ lea rdx,QWORD PTR[32+rbp]
+ lea r8,QWORD PTR[r10]
+ call asm_AES_encrypt
+
+ mov eax,DWORD PTR[240+r15]
+ mov rbx,r14
+
+ mov edx,eax
+ shl rax,7
+ sub rax,96
+ sub rsp,rax
+
+ mov rax,rsp
+ mov rcx,r15
+ mov r10d,edx
+ call _bsaes_key_convert
+ pxor xmm7,XMMWORD PTR[rsp]
+ movdqa XMMWORD PTR[rax],xmm6
+ movdqa XMMWORD PTR[rsp],xmm7
+
+ xor eax,eax
+ and r14,-16
+ test ebx,15
+ setnz al
+ shl rax,4
+ sub r14,rax
+
+ sub rsp,080h
+ movdqa xmm6,XMMWORD PTR[32+rbp]
+
+ pxor xmm14,xmm14
+ movdqa xmm12,XMMWORD PTR[$L$xts_magic]
+ pcmpgtd xmm14,xmm6
+
+ sub r14,080h
+ jc $L$xts_dec_short
+ jmp $L$xts_dec_loop
+
+ALIGN 16
+$L$xts_dec_loop::
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm15,xmm6
+ movdqa XMMWORD PTR[rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm0,xmm6
+ movdqa XMMWORD PTR[16+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm7,XMMWORD PTR[r12]
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm1,xmm6
+ movdqa XMMWORD PTR[32+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm15,xmm7
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm2,xmm6
+ movdqa XMMWORD PTR[48+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm0,xmm8
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm3,xmm6
+ movdqa XMMWORD PTR[64+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm1,xmm9
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm4,xmm6
+ movdqa XMMWORD PTR[80+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ pxor xmm2,xmm10
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm5,xmm6
+ movdqa XMMWORD PTR[96+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ pxor xmm3,xmm11
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ pxor xmm4,xmm12
+ movdqu xmm14,XMMWORD PTR[112+r12]
+ lea r12,QWORD PTR[128+r12]
+ movdqa XMMWORD PTR[112+rsp],xmm6
+ pxor xmm5,xmm13
+ lea rax,QWORD PTR[128+rsp]
+ pxor xmm6,xmm14
+ mov r10d,edx
+
+ call _bsaes_decrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm5,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm3,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm5
+ pxor xmm1,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm3
+ pxor xmm6,XMMWORD PTR[80+rsp]
+ movdqu XMMWORD PTR[64+r13],xmm1
+ pxor xmm2,XMMWORD PTR[96+rsp]
+ movdqu XMMWORD PTR[80+r13],xmm6
+ pxor xmm4,XMMWORD PTR[112+rsp]
+ movdqu XMMWORD PTR[96+r13],xmm2
+ movdqu XMMWORD PTR[112+r13],xmm4
+ lea r13,QWORD PTR[128+r13]
+
+ movdqa xmm6,XMMWORD PTR[112+rsp]
+ pxor xmm14,xmm14
+ movdqa xmm12,XMMWORD PTR[$L$xts_magic]
+ pcmpgtd xmm14,xmm6
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+
+ sub r14,080h
+ jnc $L$xts_dec_loop
+
+$L$xts_dec_short::
+ add r14,080h
+ jz $L$xts_dec_done
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm15,xmm6
+ movdqa XMMWORD PTR[rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm0,xmm6
+ movdqa XMMWORD PTR[16+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm7,XMMWORD PTR[r12]
+ cmp r14,16
+ je $L$xts_dec_1
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm1,xmm6
+ movdqa XMMWORD PTR[32+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ cmp r14,32
+ je $L$xts_dec_2
+ pxor xmm15,xmm7
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm2,xmm6
+ movdqa XMMWORD PTR[48+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ cmp r14,48
+ je $L$xts_dec_3
+ pxor xmm0,xmm8
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm3,xmm6
+ movdqa XMMWORD PTR[64+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ cmp r14,64
+ je $L$xts_dec_4
+ pxor xmm1,xmm9
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm4,xmm6
+ movdqa XMMWORD PTR[80+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ cmp r14,80
+ je $L$xts_dec_5
+ pxor xmm2,xmm10
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm5,xmm6
+ movdqa XMMWORD PTR[96+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ cmp r14,96
+ je $L$xts_dec_6
+ pxor xmm3,xmm11
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ pxor xmm4,xmm12
+ movdqa XMMWORD PTR[112+rsp],xmm6
+ lea r12,QWORD PTR[112+r12]
+ pxor xmm5,xmm13
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_decrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm5,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm3,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm5
+ pxor xmm1,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm3
+ pxor xmm6,XMMWORD PTR[80+rsp]
+ movdqu XMMWORD PTR[64+r13],xmm1
+ pxor xmm2,XMMWORD PTR[96+rsp]
+ movdqu XMMWORD PTR[80+r13],xmm6
+ movdqu XMMWORD PTR[96+r13],xmm2
+ lea r13,QWORD PTR[112+r13]
+
+ movdqa xmm6,XMMWORD PTR[112+rsp]
+ jmp $L$xts_dec_done
+ALIGN 16
+$L$xts_dec_6::
+ pxor xmm3,xmm11
+ lea r12,QWORD PTR[96+r12]
+ pxor xmm4,xmm12
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_decrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm5,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm3,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm5
+ pxor xmm1,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm3
+ pxor xmm6,XMMWORD PTR[80+rsp]
+ movdqu XMMWORD PTR[64+r13],xmm1
+ movdqu XMMWORD PTR[80+r13],xmm6
+ lea r13,QWORD PTR[96+r13]
+
+ movdqa xmm6,XMMWORD PTR[96+rsp]
+ jmp $L$xts_dec_done
+ALIGN 16
+$L$xts_dec_5::
+ pxor xmm2,xmm10
+ lea r12,QWORD PTR[80+r12]
+ pxor xmm3,xmm11
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_decrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm5,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm3,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm5
+ pxor xmm1,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm3
+ movdqu XMMWORD PTR[64+r13],xmm1
+ lea r13,QWORD PTR[80+r13]
+
+ movdqa xmm6,XMMWORD PTR[80+rsp]
+ jmp $L$xts_dec_done
+ALIGN 16
+$L$xts_dec_4::
+ pxor xmm1,xmm9
+ lea r12,QWORD PTR[64+r12]
+ pxor xmm2,xmm10
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_decrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm5,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm3,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm5
+ movdqu XMMWORD PTR[48+r13],xmm3
+ lea r13,QWORD PTR[64+r13]
+
+ movdqa xmm6,XMMWORD PTR[64+rsp]
+ jmp $L$xts_dec_done
+ALIGN 16
+$L$xts_dec_3::
+ pxor xmm0,xmm8
+ lea r12,QWORD PTR[48+r12]
+ pxor xmm1,xmm9
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_decrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm5,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ lea r13,QWORD PTR[48+r13]
+
+ movdqa xmm6,XMMWORD PTR[48+rsp]
+ jmp $L$xts_dec_done
+ALIGN 16
+$L$xts_dec_2::
+ pxor xmm15,xmm7
+ lea r12,QWORD PTR[32+r12]
+ pxor xmm0,xmm8
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_decrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ lea r13,QWORD PTR[32+r13]
+
+ movdqa xmm6,XMMWORD PTR[32+rsp]
+ jmp $L$xts_dec_done
+ALIGN 16
+$L$xts_dec_1::
+ pxor xmm7,xmm15
+ lea r12,QWORD PTR[16+r12]
+ movdqa XMMWORD PTR[32+rbp],xmm7
+ lea rcx,QWORD PTR[32+rbp]
+ lea rdx,QWORD PTR[32+rbp]
+ lea r8,QWORD PTR[r15]
+ call asm_AES_decrypt
+ pxor xmm15,XMMWORD PTR[32+rbp]
+
+
+
+
+
+ movdqu XMMWORD PTR[r13],xmm15
+ lea r13,QWORD PTR[16+r13]
+
+ movdqa xmm6,XMMWORD PTR[16+rsp]
+
+$L$xts_dec_done::
+ and ebx,15
+ jz $L$xts_dec_ret
+
+ pxor xmm14,xmm14
+ movdqa xmm12,XMMWORD PTR[$L$xts_magic]
+ pcmpgtd xmm14,xmm6
+ pshufd xmm13,xmm14,013h
+ movdqa xmm5,xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ movdqu xmm15,XMMWORD PTR[r12]
+ pxor xmm6,xmm13
+
+ lea rcx,QWORD PTR[32+rbp]
+ pxor xmm15,xmm6
+ lea rdx,QWORD PTR[32+rbp]
+ movdqa XMMWORD PTR[32+rbp],xmm15
+ lea r8,QWORD PTR[r15]
+ call asm_AES_decrypt
+ pxor xmm6,XMMWORD PTR[32+rbp]
+ mov rdx,r13
+ movdqu XMMWORD PTR[r13],xmm6
+
+$L$xts_dec_steal::
+ movzx eax,BYTE PTR[16+r12]
+ movzx ecx,BYTE PTR[rdx]
+ lea r12,QWORD PTR[1+r12]
+ mov BYTE PTR[rdx],al
+ mov BYTE PTR[16+rdx],cl
+ lea rdx,QWORD PTR[1+rdx]
+ sub ebx,1
+ jnz $L$xts_dec_steal
+
+ movdqu xmm15,XMMWORD PTR[r13]
+ lea rcx,QWORD PTR[32+rbp]
+ pxor xmm15,xmm5
+ lea rdx,QWORD PTR[32+rbp]
+ movdqa XMMWORD PTR[32+rbp],xmm15
+ lea r8,QWORD PTR[r15]
+ call asm_AES_decrypt
+ pxor xmm5,XMMWORD PTR[32+rbp]
+ movdqu XMMWORD PTR[r13],xmm5
+
+$L$xts_dec_ret::
+ lea rax,QWORD PTR[rsp]
+ pxor xmm0,xmm0
+$L$xts_dec_bzero::
+ movdqa XMMWORD PTR[rax],xmm0
+ movdqa XMMWORD PTR[16+rax],xmm0
+ lea rax,QWORD PTR[32+rax]
+ cmp rbp,rax
+ ja $L$xts_dec_bzero
+
+ lea rsp,QWORD PTR[rbp]
+ movaps xmm6,XMMWORD PTR[64+rbp]
+ movaps xmm7,XMMWORD PTR[80+rbp]
+ movaps xmm8,XMMWORD PTR[96+rbp]
+ movaps xmm9,XMMWORD PTR[112+rbp]
+ movaps xmm10,XMMWORD PTR[128+rbp]
+ movaps xmm11,XMMWORD PTR[144+rbp]
+ movaps xmm12,XMMWORD PTR[160+rbp]
+ movaps xmm13,XMMWORD PTR[176+rbp]
+ movaps xmm14,XMMWORD PTR[192+rbp]
+ movaps xmm15,XMMWORD PTR[208+rbp]
+ lea rsp,QWORD PTR[160+rbp]
+ mov r15,QWORD PTR[72+rsp]
+ mov r14,QWORD PTR[80+rsp]
+ mov r13,QWORD PTR[88+rsp]
+ mov r12,QWORD PTR[96+rsp]
+ mov rbx,QWORD PTR[104+rsp]
+ mov rax,QWORD PTR[112+rsp]
+ lea rsp,QWORD PTR[120+rsp]
+ mov rbp,rax
+$L$xts_dec_epilogue::
+ DB 0F3h,0C3h ;repret
+bsaes_xts_decrypt ENDP
+
+ALIGN 64
+_bsaes_const::
+$L$M0ISR::
+ DQ 00a0e0206070b0f03h,00004080c0d010509h
+$L$ISRM0::
+ DQ 001040b0e0205080fh,00306090c00070a0dh
+$L$ISR::
+ DQ 00504070602010003h,00f0e0d0c080b0a09h
+$L$BS0::
+ DQ 05555555555555555h,05555555555555555h
+$L$BS1::
+ DQ 03333333333333333h,03333333333333333h
+$L$BS2::
+ DQ 00f0f0f0f0f0f0f0fh,00f0f0f0f0f0f0f0fh
+$L$SR::
+ DQ 00504070600030201h,00f0e0d0c0a09080bh
+$L$SRM0::
+ DQ 00304090e00050a0fh,001060b0c0207080dh
+$L$M0SR::
+ DQ 00a0e02060f03070bh,00004080c05090d01h
+$L$SWPUP::
+ DQ 00706050403020100h,00c0d0e0f0b0a0908h
+$L$SWPUPM0SR::
+ DQ 00a0d02060c03070bh,00004080f05090e01h
+$L$ADD1::
+ DQ 00000000000000000h,00000000100000000h
+$L$ADD2::
+ DQ 00000000000000000h,00000000200000000h
+$L$ADD3::
+ DQ 00000000000000000h,00000000300000000h
+$L$ADD4::
+ DQ 00000000000000000h,00000000400000000h
+$L$ADD5::
+ DQ 00000000000000000h,00000000500000000h
+$L$ADD6::
+ DQ 00000000000000000h,00000000600000000h
+$L$ADD7::
+ DQ 00000000000000000h,00000000700000000h
+$L$ADD8::
+ DQ 00000000000000000h,00000000800000000h
+$L$xts_magic::
+ DD 087h,0,1,0
+$L$masks::
+ DQ 00101010101010101h,00101010101010101h
+ DQ 00202020202020202h,00202020202020202h
+ DQ 00404040404040404h,00404040404040404h
+ DQ 00808080808080808h,00808080808080808h
+$L$M0::
+ DQ 002060a0e03070b0fh,00004080c0105090dh
+$L$63::
+ DQ 06363636363636363h,06363636363636363h
+DB 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102
+DB 111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44
+DB 32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44
+DB 32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32
+DB 65,110,100,121,32,80,111,108,121,97,107,111,118,0
+ALIGN 64
+
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$in_prologue
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$in_prologue
+
+ mov rax,QWORD PTR[160+r8]
+
+ lea rsi,QWORD PTR[64+rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,20
+ DD 0a548f3fch
+ lea rax,QWORD PTR[160+rax]
+
+ mov rbp,QWORD PTR[112+rax]
+ mov rbx,QWORD PTR[104+rax]
+ mov r12,QWORD PTR[96+rax]
+ mov r13,QWORD PTR[88+rax]
+ mov r14,QWORD PTR[80+rax]
+ mov r15,QWORD PTR[72+rax]
+ lea rax,QWORD PTR[120+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+
+$L$in_prologue::
+ mov QWORD PTR[152+r8],rax
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+se_handler ENDP
+
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$cbc_dec_prologue
+ DD imagerel $L$cbc_dec_epilogue
+ DD imagerel $L$cbc_dec_info
+
+ DD imagerel $L$ctr_enc_prologue
+ DD imagerel $L$ctr_enc_epilogue
+ DD imagerel $L$ctr_enc_info
+
+ DD imagerel $L$xts_enc_prologue
+ DD imagerel $L$xts_enc_epilogue
+ DD imagerel $L$xts_enc_info
+
+ DD imagerel $L$xts_dec_prologue
+ DD imagerel $L$xts_dec_epilogue
+ DD imagerel $L$xts_dec_info
+
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$cbc_dec_info::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$cbc_dec_body,imagerel $L$cbc_dec_epilogue
+$L$ctr_enc_info::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$ctr_enc_body,imagerel $L$ctr_enc_epilogue
+$L$xts_enc_info::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$xts_enc_body,imagerel $L$xts_enc_epilogue
+$L$xts_dec_info::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$xts_dec_body,imagerel $L$xts_dec_epilogue
+
+.xdata ENDS
+END
+
diff --git a/ext/libressl/crypto/aes/bsaes-mingw64-x86_64.S b/ext/libressl/crypto/aes/bsaes-mingw64-x86_64.S
new file mode 100644
index 0000000..f0b07cb
--- /dev/null
+++ b/ext/libressl/crypto/aes/bsaes-mingw64-x86_64.S
@@ -0,0 +1,2725 @@
+#include "x86_arch.h"
+.text
+
+
+
+
+.def _bsaes_encrypt8; .scl 3; .type 32; .endef
+.p2align 6
+_bsaes_encrypt8:
+ leaq .LBS0(%rip),%r11
+
+ movdqa (%rax),%xmm8
+ leaq 16(%rax),%rax
+ movdqa 80(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+.byte 102,15,56,0,247
+_bsaes_encrypt8_bitslice:
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $1,%xmm3
+ pxor %xmm6,%xmm5
+ pxor %xmm4,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm6
+ psllq $1,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $1,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm15
+ pxor %xmm1,%xmm2
+ psllq $1,%xmm1
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm4,%xmm9
+ psrlq $2,%xmm4
+ movdqa %xmm3,%xmm10
+ psrlq $2,%xmm3
+ pxor %xmm6,%xmm4
+ pxor %xmm5,%xmm3
+ pand %xmm8,%xmm4
+ pand %xmm8,%xmm3
+ pxor %xmm4,%xmm6
+ psllq $2,%xmm4
+ pxor %xmm3,%xmm5
+ psllq $2,%xmm3
+ pxor %xmm9,%xmm4
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm2,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm2
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm2,%xmm9
+ psrlq $4,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $4,%xmm1
+ pxor %xmm6,%xmm2
+ pxor %xmm5,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $4,%xmm2
+ pxor %xmm1,%xmm5
+ psllq $4,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm4
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ decl %r10d
+ jmp .Lenc_sbox
+.p2align 4
+.Lenc_loop:
+ pxor 0(%rax),%xmm15
+ pxor 16(%rax),%xmm0
+.byte 102,68,15,56,0,255
+ pxor 32(%rax),%xmm1
+.byte 102,15,56,0,199
+ pxor 48(%rax),%xmm2
+.byte 102,15,56,0,207
+ pxor 64(%rax),%xmm3
+.byte 102,15,56,0,215
+ pxor 80(%rax),%xmm4
+.byte 102,15,56,0,223
+ pxor 96(%rax),%xmm5
+.byte 102,15,56,0,231
+ pxor 112(%rax),%xmm6
+.byte 102,15,56,0,239
+ leaq 128(%rax),%rax
+.byte 102,15,56,0,247
+.Lenc_sbox:
+ pxor %xmm5,%xmm4
+ pxor %xmm0,%xmm1
+ pxor %xmm15,%xmm2
+ pxor %xmm1,%xmm5
+ pxor %xmm15,%xmm4
+
+ pxor %xmm2,%xmm5
+ pxor %xmm6,%xmm2
+ pxor %xmm4,%xmm6
+ pxor %xmm3,%xmm2
+ pxor %xmm4,%xmm3
+ pxor %xmm0,%xmm2
+
+ pxor %xmm6,%xmm1
+ pxor %xmm4,%xmm0
+ movdqa %xmm6,%xmm10
+ movdqa %xmm0,%xmm9
+ movdqa %xmm4,%xmm8
+ movdqa %xmm1,%xmm12
+ movdqa %xmm5,%xmm11
+
+ pxor %xmm3,%xmm10
+ pxor %xmm1,%xmm9
+ pxor %xmm2,%xmm8
+ movdqa %xmm10,%xmm13
+ pxor %xmm3,%xmm12
+ movdqa %xmm9,%xmm7
+ pxor %xmm15,%xmm11
+ movdqa %xmm10,%xmm14
+
+ por %xmm8,%xmm9
+ por %xmm11,%xmm10
+ pxor %xmm7,%xmm14
+ pand %xmm11,%xmm13
+ pxor %xmm8,%xmm11
+ pand %xmm8,%xmm7
+ pand %xmm11,%xmm14
+ movdqa %xmm2,%xmm11
+ pxor %xmm15,%xmm11
+ pand %xmm11,%xmm12
+ pxor %xmm12,%xmm10
+ pxor %xmm12,%xmm9
+ movdqa %xmm6,%xmm12
+ movdqa %xmm4,%xmm11
+ pxor %xmm0,%xmm12
+ pxor %xmm5,%xmm11
+ movdqa %xmm12,%xmm8
+ pand %xmm11,%xmm12
+ por %xmm11,%xmm8
+ pxor %xmm12,%xmm7
+ pxor %xmm14,%xmm10
+ pxor %xmm13,%xmm9
+ pxor %xmm14,%xmm8
+ movdqa %xmm1,%xmm11
+ pxor %xmm13,%xmm7
+ movdqa %xmm3,%xmm12
+ pxor %xmm13,%xmm8
+ movdqa %xmm0,%xmm13
+ pand %xmm2,%xmm11
+ movdqa %xmm6,%xmm14
+ pand %xmm15,%xmm12
+ pand %xmm4,%xmm13
+ por %xmm5,%xmm14
+ pxor %xmm11,%xmm10
+ pxor %xmm12,%xmm9
+ pxor %xmm13,%xmm8
+ pxor %xmm14,%xmm7
+
+
+
+
+
+ movdqa %xmm10,%xmm11
+ pand %xmm8,%xmm10
+ pxor %xmm9,%xmm11
+
+ movdqa %xmm7,%xmm13
+ movdqa %xmm11,%xmm14
+ pxor %xmm10,%xmm13
+ pand %xmm13,%xmm14
+
+ movdqa %xmm8,%xmm12
+ pxor %xmm9,%xmm14
+ pxor %xmm7,%xmm12
+
+ pxor %xmm9,%xmm10
+
+ pand %xmm10,%xmm12
+
+ movdqa %xmm13,%xmm9
+ pxor %xmm7,%xmm12
+
+ pxor %xmm12,%xmm9
+ pxor %xmm12,%xmm8
+
+ pand %xmm7,%xmm9
+
+ pxor %xmm9,%xmm13
+ pxor %xmm9,%xmm8
+
+ pand %xmm14,%xmm13
+
+ pxor %xmm11,%xmm13
+ movdqa %xmm5,%xmm11
+ movdqa %xmm4,%xmm7
+ movdqa %xmm14,%xmm9
+ pxor %xmm13,%xmm9
+ pand %xmm5,%xmm9
+ pxor %xmm4,%xmm5
+ pand %xmm14,%xmm4
+ pand %xmm13,%xmm5
+ pxor %xmm4,%xmm5
+ pxor %xmm9,%xmm4
+ pxor %xmm15,%xmm11
+ pxor %xmm2,%xmm7
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm15,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm2,%xmm15
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm2
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm15
+ pxor %xmm11,%xmm7
+ pxor %xmm2,%xmm15
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm2
+ pxor %xmm11,%xmm5
+ pxor %xmm11,%xmm15
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm2
+
+ movdqa %xmm6,%xmm11
+ movdqa %xmm0,%xmm7
+ pxor %xmm3,%xmm11
+ pxor %xmm1,%xmm7
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm3,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm1,%xmm3
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm1
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm3
+ pxor %xmm11,%xmm7
+ pxor %xmm1,%xmm3
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm1
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ pxor %xmm13,%xmm10
+ pand %xmm6,%xmm10
+ pxor %xmm0,%xmm6
+ pand %xmm14,%xmm0
+ pand %xmm13,%xmm6
+ pxor %xmm0,%xmm6
+ pxor %xmm10,%xmm0
+ pxor %xmm11,%xmm6
+ pxor %xmm11,%xmm3
+ pxor %xmm7,%xmm0
+ pxor %xmm7,%xmm1
+ pxor %xmm15,%xmm6
+ pxor %xmm5,%xmm0
+ pxor %xmm6,%xmm3
+ pxor %xmm15,%xmm5
+ pxor %xmm0,%xmm15
+
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ pxor %xmm2,%xmm1
+ pxor %xmm4,%xmm2
+ pxor %xmm4,%xmm3
+
+ pxor %xmm2,%xmm5
+ decl %r10d
+ jl .Lenc_done
+ pshufd $147,%xmm15,%xmm7
+ pshufd $147,%xmm0,%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $147,%xmm3,%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $147,%xmm5,%xmm10
+ pxor %xmm9,%xmm3
+ pshufd $147,%xmm2,%xmm11
+ pxor %xmm10,%xmm5
+ pshufd $147,%xmm6,%xmm12
+ pxor %xmm11,%xmm2
+ pshufd $147,%xmm1,%xmm13
+ pxor %xmm12,%xmm6
+ pshufd $147,%xmm4,%xmm14
+ pxor %xmm13,%xmm1
+ pxor %xmm14,%xmm4
+
+ pxor %xmm15,%xmm8
+ pxor %xmm4,%xmm7
+ pxor %xmm4,%xmm8
+ pshufd $78,%xmm15,%xmm15
+ pxor %xmm0,%xmm9
+ pshufd $78,%xmm0,%xmm0
+ pxor %xmm2,%xmm12
+ pxor %xmm7,%xmm15
+ pxor %xmm6,%xmm13
+ pxor %xmm8,%xmm0
+ pxor %xmm5,%xmm11
+ pshufd $78,%xmm2,%xmm7
+ pxor %xmm1,%xmm14
+ pshufd $78,%xmm6,%xmm8
+ pxor %xmm3,%xmm10
+ pshufd $78,%xmm5,%xmm2
+ pxor %xmm4,%xmm10
+ pshufd $78,%xmm4,%xmm6
+ pxor %xmm4,%xmm11
+ pshufd $78,%xmm1,%xmm5
+ pxor %xmm11,%xmm7
+ pshufd $78,%xmm3,%xmm1
+ pxor %xmm12,%xmm8
+ pxor %xmm10,%xmm2
+ pxor %xmm14,%xmm6
+ pxor %xmm13,%xmm5
+ movdqa %xmm7,%xmm3
+ pxor %xmm9,%xmm1
+ movdqa %xmm8,%xmm4
+ movdqa 48(%r11),%xmm7
+ jnz .Lenc_loop
+ movdqa 64(%r11),%xmm7
+ jmp .Lenc_loop
+.p2align 4
+.Lenc_done:
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm2,%xmm10
+ psrlq $1,%xmm2
+ pxor %xmm4,%xmm1
+ pxor %xmm6,%xmm2
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm2
+ pxor %xmm1,%xmm4
+ psllq $1,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $1,%xmm2
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm2
+ movdqa %xmm3,%xmm9
+ psrlq $1,%xmm3
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm5,%xmm3
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm3
+ pand %xmm7,%xmm15
+ pxor %xmm3,%xmm5
+ psllq $1,%xmm3
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm3
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm6,%xmm9
+ psrlq $2,%xmm6
+ movdqa %xmm2,%xmm10
+ psrlq $2,%xmm2
+ pxor %xmm4,%xmm6
+ pxor %xmm1,%xmm2
+ pand %xmm8,%xmm6
+ pand %xmm8,%xmm2
+ pxor %xmm6,%xmm4
+ psllq $2,%xmm6
+ pxor %xmm2,%xmm1
+ psllq $2,%xmm2
+ pxor %xmm9,%xmm6
+ pxor %xmm10,%xmm2
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm5,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm5
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm5,%xmm9
+ psrlq $4,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $4,%xmm3
+ pxor %xmm4,%xmm5
+ pxor %xmm1,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm4
+ psllq $4,%xmm5
+ pxor %xmm3,%xmm1
+ psllq $4,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm6,%xmm0
+ pxor %xmm2,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm6
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm2
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa (%rax),%xmm7
+ pxor %xmm7,%xmm3
+ pxor %xmm7,%xmm5
+ pxor %xmm7,%xmm2
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm1
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm15
+ pxor %xmm7,%xmm0
+ retq
+
+
+.def _bsaes_decrypt8; .scl 3; .type 32; .endef
+.p2align 6
+_bsaes_decrypt8:
+ leaq .LBS0(%rip),%r11
+
+ movdqa (%rax),%xmm8
+ leaq 16(%rax),%rax
+ movdqa -48(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+.byte 102,15,56,0,247
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $1,%xmm3
+ pxor %xmm6,%xmm5
+ pxor %xmm4,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm6
+ psllq $1,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $1,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm15
+ pxor %xmm1,%xmm2
+ psllq $1,%xmm1
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm4,%xmm9
+ psrlq $2,%xmm4
+ movdqa %xmm3,%xmm10
+ psrlq $2,%xmm3
+ pxor %xmm6,%xmm4
+ pxor %xmm5,%xmm3
+ pand %xmm8,%xmm4
+ pand %xmm8,%xmm3
+ pxor %xmm4,%xmm6
+ psllq $2,%xmm4
+ pxor %xmm3,%xmm5
+ psllq $2,%xmm3
+ pxor %xmm9,%xmm4
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm2,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm2
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm2,%xmm9
+ psrlq $4,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $4,%xmm1
+ pxor %xmm6,%xmm2
+ pxor %xmm5,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $4,%xmm2
+ pxor %xmm1,%xmm5
+ psllq $4,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm4
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ decl %r10d
+ jmp .Ldec_sbox
+.p2align 4
+.Ldec_loop:
+ pxor 0(%rax),%xmm15
+ pxor 16(%rax),%xmm0
+.byte 102,68,15,56,0,255
+ pxor 32(%rax),%xmm1
+.byte 102,15,56,0,199
+ pxor 48(%rax),%xmm2
+.byte 102,15,56,0,207
+ pxor 64(%rax),%xmm3
+.byte 102,15,56,0,215
+ pxor 80(%rax),%xmm4
+.byte 102,15,56,0,223
+ pxor 96(%rax),%xmm5
+.byte 102,15,56,0,231
+ pxor 112(%rax),%xmm6
+.byte 102,15,56,0,239
+ leaq 128(%rax),%rax
+.byte 102,15,56,0,247
+.Ldec_sbox:
+ pxor %xmm3,%xmm2
+
+ pxor %xmm6,%xmm3
+ pxor %xmm6,%xmm1
+ pxor %xmm3,%xmm5
+ pxor %xmm5,%xmm6
+ pxor %xmm6,%xmm0
+
+ pxor %xmm0,%xmm15
+ pxor %xmm4,%xmm1
+ pxor %xmm15,%xmm2
+ pxor %xmm15,%xmm4
+ pxor %xmm2,%xmm0
+ movdqa %xmm2,%xmm10
+ movdqa %xmm6,%xmm9
+ movdqa %xmm0,%xmm8
+ movdqa %xmm3,%xmm12
+ movdqa %xmm4,%xmm11
+
+ pxor %xmm15,%xmm10
+ pxor %xmm3,%xmm9
+ pxor %xmm5,%xmm8
+ movdqa %xmm10,%xmm13
+ pxor %xmm15,%xmm12
+ movdqa %xmm9,%xmm7
+ pxor %xmm1,%xmm11
+ movdqa %xmm10,%xmm14
+
+ por %xmm8,%xmm9
+ por %xmm11,%xmm10
+ pxor %xmm7,%xmm14
+ pand %xmm11,%xmm13
+ pxor %xmm8,%xmm11
+ pand %xmm8,%xmm7
+ pand %xmm11,%xmm14
+ movdqa %xmm5,%xmm11
+ pxor %xmm1,%xmm11
+ pand %xmm11,%xmm12
+ pxor %xmm12,%xmm10
+ pxor %xmm12,%xmm9
+ movdqa %xmm2,%xmm12
+ movdqa %xmm0,%xmm11
+ pxor %xmm6,%xmm12
+ pxor %xmm4,%xmm11
+ movdqa %xmm12,%xmm8
+ pand %xmm11,%xmm12
+ por %xmm11,%xmm8
+ pxor %xmm12,%xmm7
+ pxor %xmm14,%xmm10
+ pxor %xmm13,%xmm9
+ pxor %xmm14,%xmm8
+ movdqa %xmm3,%xmm11
+ pxor %xmm13,%xmm7
+ movdqa %xmm15,%xmm12
+ pxor %xmm13,%xmm8
+ movdqa %xmm6,%xmm13
+ pand %xmm5,%xmm11
+ movdqa %xmm2,%xmm14
+ pand %xmm1,%xmm12
+ pand %xmm0,%xmm13
+ por %xmm4,%xmm14
+ pxor %xmm11,%xmm10
+ pxor %xmm12,%xmm9
+ pxor %xmm13,%xmm8
+ pxor %xmm14,%xmm7
+
+
+
+
+
+ movdqa %xmm10,%xmm11
+ pand %xmm8,%xmm10
+ pxor %xmm9,%xmm11
+
+ movdqa %xmm7,%xmm13
+ movdqa %xmm11,%xmm14
+ pxor %xmm10,%xmm13
+ pand %xmm13,%xmm14
+
+ movdqa %xmm8,%xmm12
+ pxor %xmm9,%xmm14
+ pxor %xmm7,%xmm12
+
+ pxor %xmm9,%xmm10
+
+ pand %xmm10,%xmm12
+
+ movdqa %xmm13,%xmm9
+ pxor %xmm7,%xmm12
+
+ pxor %xmm12,%xmm9
+ pxor %xmm12,%xmm8
+
+ pand %xmm7,%xmm9
+
+ pxor %xmm9,%xmm13
+ pxor %xmm9,%xmm8
+
+ pand %xmm14,%xmm13
+
+ pxor %xmm11,%xmm13
+ movdqa %xmm4,%xmm11
+ movdqa %xmm0,%xmm7
+ movdqa %xmm14,%xmm9
+ pxor %xmm13,%xmm9
+ pand %xmm4,%xmm9
+ pxor %xmm0,%xmm4
+ pand %xmm14,%xmm0
+ pand %xmm13,%xmm4
+ pxor %xmm0,%xmm4
+ pxor %xmm9,%xmm0
+ pxor %xmm1,%xmm11
+ pxor %xmm5,%xmm7
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm1,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm5,%xmm1
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm5
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm1
+ pxor %xmm11,%xmm7
+ pxor %xmm5,%xmm1
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm5
+ pxor %xmm11,%xmm4
+ pxor %xmm11,%xmm1
+ pxor %xmm7,%xmm0
+ pxor %xmm7,%xmm5
+
+ movdqa %xmm2,%xmm11
+ movdqa %xmm6,%xmm7
+ pxor %xmm15,%xmm11
+ pxor %xmm3,%xmm7
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm15,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm3,%xmm15
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm3
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm15
+ pxor %xmm11,%xmm7
+ pxor %xmm3,%xmm15
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm3
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ pxor %xmm13,%xmm10
+ pand %xmm2,%xmm10
+ pxor %xmm6,%xmm2
+ pand %xmm14,%xmm6
+ pand %xmm13,%xmm2
+ pxor %xmm6,%xmm2
+ pxor %xmm10,%xmm6
+ pxor %xmm11,%xmm2
+ pxor %xmm11,%xmm15
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm3
+ pxor %xmm6,%xmm0
+ pxor %xmm4,%xmm5
+
+ pxor %xmm0,%xmm3
+ pxor %xmm6,%xmm1
+ pxor %xmm6,%xmm4
+ pxor %xmm1,%xmm3
+ pxor %xmm15,%xmm6
+ pxor %xmm4,%xmm3
+ pxor %xmm5,%xmm2
+ pxor %xmm0,%xmm5
+ pxor %xmm3,%xmm2
+
+ pxor %xmm15,%xmm3
+ pxor %xmm2,%xmm6
+ decl %r10d
+ jl .Ldec_done
+
+ pshufd $78,%xmm15,%xmm7
+ pshufd $78,%xmm2,%xmm13
+ pxor %xmm15,%xmm7
+ pshufd $78,%xmm4,%xmm14
+ pxor %xmm2,%xmm13
+ pshufd $78,%xmm0,%xmm8
+ pxor %xmm4,%xmm14
+ pshufd $78,%xmm5,%xmm9
+ pxor %xmm0,%xmm8
+ pshufd $78,%xmm3,%xmm10
+ pxor %xmm5,%xmm9
+ pxor %xmm13,%xmm15
+ pxor %xmm13,%xmm0
+ pshufd $78,%xmm1,%xmm11
+ pxor %xmm3,%xmm10
+ pxor %xmm7,%xmm5
+ pxor %xmm8,%xmm3
+ pshufd $78,%xmm6,%xmm12
+ pxor %xmm1,%xmm11
+ pxor %xmm14,%xmm0
+ pxor %xmm9,%xmm1
+ pxor %xmm6,%xmm12
+
+ pxor %xmm14,%xmm5
+ pxor %xmm13,%xmm3
+ pxor %xmm13,%xmm1
+ pxor %xmm10,%xmm6
+ pxor %xmm11,%xmm2
+ pxor %xmm14,%xmm1
+ pxor %xmm14,%xmm6
+ pxor %xmm12,%xmm4
+ pshufd $147,%xmm15,%xmm7
+ pshufd $147,%xmm0,%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $147,%xmm5,%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $147,%xmm3,%xmm10
+ pxor %xmm9,%xmm5
+ pshufd $147,%xmm1,%xmm11
+ pxor %xmm10,%xmm3
+ pshufd $147,%xmm6,%xmm12
+ pxor %xmm11,%xmm1
+ pshufd $147,%xmm2,%xmm13
+ pxor %xmm12,%xmm6
+ pshufd $147,%xmm4,%xmm14
+ pxor %xmm13,%xmm2
+ pxor %xmm14,%xmm4
+
+ pxor %xmm15,%xmm8
+ pxor %xmm4,%xmm7
+ pxor %xmm4,%xmm8
+ pshufd $78,%xmm15,%xmm15
+ pxor %xmm0,%xmm9
+ pshufd $78,%xmm0,%xmm0
+ pxor %xmm1,%xmm12
+ pxor %xmm7,%xmm15
+ pxor %xmm6,%xmm13
+ pxor %xmm8,%xmm0
+ pxor %xmm3,%xmm11
+ pshufd $78,%xmm1,%xmm7
+ pxor %xmm2,%xmm14
+ pshufd $78,%xmm6,%xmm8
+ pxor %xmm5,%xmm10
+ pshufd $78,%xmm3,%xmm1
+ pxor %xmm4,%xmm10
+ pshufd $78,%xmm4,%xmm6
+ pxor %xmm4,%xmm11
+ pshufd $78,%xmm2,%xmm3
+ pxor %xmm11,%xmm7
+ pshufd $78,%xmm5,%xmm2
+ pxor %xmm12,%xmm8
+ pxor %xmm1,%xmm10
+ pxor %xmm14,%xmm6
+ pxor %xmm3,%xmm13
+ movdqa %xmm7,%xmm3
+ pxor %xmm9,%xmm2
+ movdqa %xmm13,%xmm5
+ movdqa %xmm8,%xmm4
+ movdqa %xmm2,%xmm1
+ movdqa %xmm10,%xmm2
+ movdqa -16(%r11),%xmm7
+ jnz .Ldec_loop
+ movdqa -32(%r11),%xmm7
+ jmp .Ldec_loop
+.p2align 4
+.Ldec_done:
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm2,%xmm9
+ psrlq $1,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $1,%xmm1
+ pxor %xmm4,%xmm2
+ pxor %xmm6,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm4
+ psllq $1,%xmm2
+ pxor %xmm1,%xmm6
+ psllq $1,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm3,%xmm5
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm15
+ pxor %xmm5,%xmm3
+ psllq $1,%xmm5
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm6,%xmm9
+ psrlq $2,%xmm6
+ movdqa %xmm1,%xmm10
+ psrlq $2,%xmm1
+ pxor %xmm4,%xmm6
+ pxor %xmm2,%xmm1
+ pand %xmm8,%xmm6
+ pand %xmm8,%xmm1
+ pxor %xmm6,%xmm4
+ psllq $2,%xmm6
+ pxor %xmm1,%xmm2
+ psllq $2,%xmm1
+ pxor %xmm9,%xmm6
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm3,%xmm0
+ pxor %xmm5,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm3
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm5
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm3,%xmm9
+ psrlq $4,%xmm3
+ movdqa %xmm5,%xmm10
+ psrlq $4,%xmm5
+ pxor %xmm4,%xmm3
+ pxor %xmm2,%xmm5
+ pand %xmm7,%xmm3
+ pand %xmm7,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $4,%xmm3
+ pxor %xmm5,%xmm2
+ psllq $4,%xmm5
+ pxor %xmm9,%xmm3
+ pxor %xmm10,%xmm5
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm6,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm6
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa (%rax),%xmm7
+ pxor %xmm7,%xmm5
+ pxor %xmm7,%xmm3
+ pxor %xmm7,%xmm1
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm2
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm15
+ pxor %xmm7,%xmm0
+ retq
+
+.def _bsaes_key_convert; .scl 3; .type 32; .endef
+.p2align 4
+_bsaes_key_convert:
+ leaq .Lmasks(%rip),%r11
+ movdqu (%rcx),%xmm7
+ leaq 16(%rcx),%rcx
+ movdqa 0(%r11),%xmm0
+ movdqa 16(%r11),%xmm1
+ movdqa 32(%r11),%xmm2
+ movdqa 48(%r11),%xmm3
+ movdqa 64(%r11),%xmm4
+ pcmpeqd %xmm5,%xmm5
+
+ movdqu (%rcx),%xmm6
+ movdqa %xmm7,(%rax)
+ leaq 16(%rax),%rax
+ decl %r10d
+ jmp .Lkey_loop
+.p2align 4
+.Lkey_loop:
+.byte 102,15,56,0,244
+
+ movdqa %xmm0,%xmm8
+ movdqa %xmm1,%xmm9
+
+ pand %xmm6,%xmm8
+ pand %xmm6,%xmm9
+ movdqa %xmm2,%xmm10
+ pcmpeqb %xmm0,%xmm8
+ psllq $4,%xmm0
+ movdqa %xmm3,%xmm11
+ pcmpeqb %xmm1,%xmm9
+ psllq $4,%xmm1
+
+ pand %xmm6,%xmm10
+ pand %xmm6,%xmm11
+ movdqa %xmm0,%xmm12
+ pcmpeqb %xmm2,%xmm10
+ psllq $4,%xmm2
+ movdqa %xmm1,%xmm13
+ pcmpeqb %xmm3,%xmm11
+ psllq $4,%xmm3
+
+ movdqa %xmm2,%xmm14
+ movdqa %xmm3,%xmm15
+ pxor %xmm5,%xmm8
+ pxor %xmm5,%xmm9
+
+ pand %xmm6,%xmm12
+ pand %xmm6,%xmm13
+ movdqa %xmm8,0(%rax)
+ pcmpeqb %xmm0,%xmm12
+ psrlq $4,%xmm0
+ movdqa %xmm9,16(%rax)
+ pcmpeqb %xmm1,%xmm13
+ psrlq $4,%xmm1
+ leaq 16(%rcx),%rcx
+
+ pand %xmm6,%xmm14
+ pand %xmm6,%xmm15
+ movdqa %xmm10,32(%rax)
+ pcmpeqb %xmm2,%xmm14
+ psrlq $4,%xmm2
+ movdqa %xmm11,48(%rax)
+ pcmpeqb %xmm3,%xmm15
+ psrlq $4,%xmm3
+ movdqu (%rcx),%xmm6
+
+ pxor %xmm5,%xmm13
+ pxor %xmm5,%xmm14
+ movdqa %xmm12,64(%rax)
+ movdqa %xmm13,80(%rax)
+ movdqa %xmm14,96(%rax)
+ movdqa %xmm15,112(%rax)
+ leaq 128(%rax),%rax
+ decl %r10d
+ jnz .Lkey_loop
+
+ movdqa 80(%r11),%xmm7
+
+ retq
+
+
+.globl bsaes_cbc_encrypt
+.def bsaes_cbc_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+bsaes_cbc_encrypt:
+ movl 48(%rsp),%r11d
+ cmpl $0,%r11d
+ jne asm_AES_cbc_encrypt
+ cmpq $128,%r8
+ jb asm_AES_cbc_encrypt
+
+ movq %rsp,%rax
+.Lcbc_dec_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq 160(%rsp),%r10
+ leaq -160(%rsp),%rsp
+ movaps %xmm6,64(%rsp)
+ movaps %xmm7,80(%rsp)
+ movaps %xmm8,96(%rsp)
+ movaps %xmm9,112(%rsp)
+ movaps %xmm10,128(%rsp)
+ movaps %xmm11,144(%rsp)
+ movaps %xmm12,160(%rsp)
+ movaps %xmm13,176(%rsp)
+ movaps %xmm14,192(%rsp)
+ movaps %xmm15,208(%rsp)
+.Lcbc_dec_body:
+ movq %rsp,%rbp
+ movl 240(%r9),%eax
+ movq %rcx,%r12
+ movq %rdx,%r13
+ movq %r8,%r14
+ movq %r9,%r15
+ movq %r10,%rbx
+ shrq $4,%r14
+
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor (%rsp),%xmm7
+ movdqa %xmm6,(%rax)
+ movdqa %xmm7,(%rsp)
+
+ movdqu (%rbx),%xmm14
+ subq $8,%r14
+.Lcbc_dec_loop:
+ movdqu 0(%r12),%xmm15
+ movdqu 16(%r12),%xmm0
+ movdqu 32(%r12),%xmm1
+ movdqu 48(%r12),%xmm2
+ movdqu 64(%r12),%xmm3
+ movdqu 80(%r12),%xmm4
+ movq %rsp,%rax
+ movdqu 96(%r12),%xmm5
+ movl %edx,%r10d
+ movdqu 112(%r12),%xmm6
+ movdqa %xmm14,32(%rbp)
+
+ call _bsaes_decrypt8
+
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm6
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm2
+ movdqu 112(%r12),%xmm14
+ pxor %xmm13,%xmm4
+ movdqu %xmm15,0(%r13)
+ leaq 128(%r12),%r12
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+ subq $8,%r14
+ jnc .Lcbc_dec_loop
+
+ addq $8,%r14
+ jz .Lcbc_dec_done
+
+ movdqu 0(%r12),%xmm15
+ movq %rsp,%rax
+ movl %edx,%r10d
+ cmpq $2,%r14
+ jb .Lcbc_dec_one
+ movdqu 16(%r12),%xmm0
+ je .Lcbc_dec_two
+ movdqu 32(%r12),%xmm1
+ cmpq $4,%r14
+ jb .Lcbc_dec_three
+ movdqu 48(%r12),%xmm2
+ je .Lcbc_dec_four
+ movdqu 64(%r12),%xmm3
+ cmpq $6,%r14
+ jb .Lcbc_dec_five
+ movdqu 80(%r12),%xmm4
+ je .Lcbc_dec_six
+ movdqu 96(%r12),%xmm5
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm6
+ movdqu 96(%r12),%xmm14
+ pxor %xmm12,%xmm2
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ jmp .Lcbc_dec_done
+.p2align 4
+.Lcbc_dec_six:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm14
+ pxor %xmm11,%xmm6
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ jmp .Lcbc_dec_done
+.p2align 4
+.Lcbc_dec_five:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm14
+ pxor %xmm10,%xmm1
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ jmp .Lcbc_dec_done
+.p2align 4
+.Lcbc_dec_four:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm14
+ pxor %xmm9,%xmm3
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ jmp .Lcbc_dec_done
+.p2align 4
+.Lcbc_dec_three:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm14
+ pxor %xmm8,%xmm5
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ jmp .Lcbc_dec_done
+.p2align 4
+.Lcbc_dec_two:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm14
+ pxor %xmm7,%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ jmp .Lcbc_dec_done
+.p2align 4
+.Lcbc_dec_one:
+ leaq (%r12),%rcx
+ leaq 32(%rbp),%rdx
+ leaq (%r15),%r8
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm14
+ movdqu %xmm14,(%r13)
+ movdqa %xmm15,%xmm14
+
+.Lcbc_dec_done:
+ movdqu %xmm14,(%rbx)
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+.Lcbc_dec_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lcbc_dec_bzero
+
+ leaq (%rbp),%rsp
+ movaps 64(%rbp),%xmm6
+ movaps 80(%rbp),%xmm7
+ movaps 96(%rbp),%xmm8
+ movaps 112(%rbp),%xmm9
+ movaps 128(%rbp),%xmm10
+ movaps 144(%rbp),%xmm11
+ movaps 160(%rbp),%xmm12
+ movaps 176(%rbp),%xmm13
+ movaps 192(%rbp),%xmm14
+ movaps 208(%rbp),%xmm15
+ leaq 160(%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+.Lcbc_dec_epilogue:
+ retq
+
+
+.globl bsaes_ctr32_encrypt_blocks
+.def bsaes_ctr32_encrypt_blocks; .scl 2; .type 32; .endef
+.p2align 4
+bsaes_ctr32_encrypt_blocks:
+ movq %rsp,%rax
+.Lctr_enc_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq 160(%rsp),%r10
+ leaq -160(%rsp),%rsp
+ movaps %xmm6,64(%rsp)
+ movaps %xmm7,80(%rsp)
+ movaps %xmm8,96(%rsp)
+ movaps %xmm9,112(%rsp)
+ movaps %xmm10,128(%rsp)
+ movaps %xmm11,144(%rsp)
+ movaps %xmm12,160(%rsp)
+ movaps %xmm13,176(%rsp)
+ movaps %xmm14,192(%rsp)
+ movaps %xmm15,208(%rsp)
+.Lctr_enc_body:
+ movq %rsp,%rbp
+ movdqu (%r10),%xmm0
+ movl 240(%r9),%eax
+ movq %rcx,%r12
+ movq %rdx,%r13
+ movq %r8,%r14
+ movq %r9,%r15
+ movdqa %xmm0,32(%rbp)
+ cmpq $8,%r8
+ jb .Lctr_enc_short
+
+ movl %eax,%ebx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %ebx,%r10d
+ call _bsaes_key_convert
+ pxor %xmm6,%xmm7
+ movdqa %xmm7,(%rax)
+
+ movdqa (%rsp),%xmm8
+ leaq .LADD1(%rip),%r11
+ movdqa 32(%rbp),%xmm15
+ movdqa -32(%r11),%xmm7
+.byte 102,68,15,56,0,199
+.byte 102,68,15,56,0,255
+ movdqa %xmm8,(%rsp)
+ jmp .Lctr_enc_loop
+.p2align 4
+.Lctr_enc_loop:
+ movdqa %xmm15,32(%rbp)
+ movdqa %xmm15,%xmm0
+ movdqa %xmm15,%xmm1
+ paddd 0(%r11),%xmm0
+ movdqa %xmm15,%xmm2
+ paddd 16(%r11),%xmm1
+ movdqa %xmm15,%xmm3
+ paddd 32(%r11),%xmm2
+ movdqa %xmm15,%xmm4
+ paddd 48(%r11),%xmm3
+ movdqa %xmm15,%xmm5
+ paddd 64(%r11),%xmm4
+ movdqa %xmm15,%xmm6
+ paddd 80(%r11),%xmm5
+ paddd 96(%r11),%xmm6
+
+
+
+ movdqa (%rsp),%xmm8
+ leaq 16(%rsp),%rax
+ movdqa -16(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+ leaq .LBS0(%rip),%r11
+.byte 102,15,56,0,247
+ movl %ebx,%r10d
+
+ call _bsaes_encrypt8_bitslice
+
+ subq $8,%r14
+ jc .Lctr_enc_loop_done
+
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ movdqu 32(%r12),%xmm9
+ movdqu 48(%r12),%xmm10
+ movdqu 64(%r12),%xmm11
+ movdqu 80(%r12),%xmm12
+ movdqu 96(%r12),%xmm13
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ pxor %xmm15,%xmm7
+ movdqa 32(%rbp),%xmm15
+ pxor %xmm8,%xmm0
+ movdqu %xmm7,0(%r13)
+ pxor %xmm9,%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor %xmm10,%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor %xmm11,%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor %xmm12,%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor %xmm13,%xmm1
+ movdqu %xmm6,80(%r13)
+ pxor %xmm14,%xmm4
+ movdqu %xmm1,96(%r13)
+ leaq .LADD1(%rip),%r11
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+ paddd 112(%r11),%xmm15
+ jnz .Lctr_enc_loop
+
+ jmp .Lctr_enc_done
+.p2align 4
+.Lctr_enc_loop_done:
+ addq $8,%r14
+ movdqu 0(%r12),%xmm7
+ pxor %xmm7,%xmm15
+ movdqu %xmm15,0(%r13)
+ cmpq $2,%r14
+ jb .Lctr_enc_done
+ movdqu 16(%r12),%xmm8
+ pxor %xmm8,%xmm0
+ movdqu %xmm0,16(%r13)
+ je .Lctr_enc_done
+ movdqu 32(%r12),%xmm9
+ pxor %xmm9,%xmm3
+ movdqu %xmm3,32(%r13)
+ cmpq $4,%r14
+ jb .Lctr_enc_done
+ movdqu 48(%r12),%xmm10
+ pxor %xmm10,%xmm5
+ movdqu %xmm5,48(%r13)
+ je .Lctr_enc_done
+ movdqu 64(%r12),%xmm11
+ pxor %xmm11,%xmm2
+ movdqu %xmm2,64(%r13)
+ cmpq $6,%r14
+ jb .Lctr_enc_done
+ movdqu 80(%r12),%xmm12
+ pxor %xmm12,%xmm6
+ movdqu %xmm6,80(%r13)
+ je .Lctr_enc_done
+ movdqu 96(%r12),%xmm13
+ pxor %xmm13,%xmm1
+ movdqu %xmm1,96(%r13)
+ jmp .Lctr_enc_done
+
+.p2align 4
+.Lctr_enc_short:
+ leaq 32(%rbp),%rcx
+ leaq 48(%rbp),%rdx
+ leaq (%r15),%r8
+ call asm_AES_encrypt
+ movdqu (%r12),%xmm0
+ leaq 16(%r12),%r12
+ movl 44(%rbp),%eax
+ bswapl %eax
+ pxor 48(%rbp),%xmm0
+ incl %eax
+ movdqu %xmm0,(%r13)
+ bswapl %eax
+ leaq 16(%r13),%r13
+ movl %eax,44(%rsp)
+ decq %r14
+ jnz .Lctr_enc_short
+
+.Lctr_enc_done:
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+.Lctr_enc_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lctr_enc_bzero
+
+ leaq (%rbp),%rsp
+ movaps 64(%rbp),%xmm6
+ movaps 80(%rbp),%xmm7
+ movaps 96(%rbp),%xmm8
+ movaps 112(%rbp),%xmm9
+ movaps 128(%rbp),%xmm10
+ movaps 144(%rbp),%xmm11
+ movaps 160(%rbp),%xmm12
+ movaps 176(%rbp),%xmm13
+ movaps 192(%rbp),%xmm14
+ movaps 208(%rbp),%xmm15
+ leaq 160(%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+.Lctr_enc_epilogue:
+ retq
+
+.globl bsaes_xts_encrypt
+.def bsaes_xts_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+bsaes_xts_encrypt:
+ movq %rsp,%rax
+.Lxts_enc_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq 160(%rsp),%r10
+ movq 168(%rsp),%r11
+ leaq -160(%rsp),%rsp
+ movaps %xmm6,64(%rsp)
+ movaps %xmm7,80(%rsp)
+ movaps %xmm8,96(%rsp)
+ movaps %xmm9,112(%rsp)
+ movaps %xmm10,128(%rsp)
+ movaps %xmm11,144(%rsp)
+ movaps %xmm12,160(%rsp)
+ movaps %xmm13,176(%rsp)
+ movaps %xmm14,192(%rsp)
+ movaps %xmm15,208(%rsp)
+.Lxts_enc_body:
+ movq %rsp,%rbp
+ movq %rcx,%r12
+ movq %rdx,%r13
+ movq %r8,%r14
+ movq %r9,%r15
+
+ leaq (%r11),%rcx
+ leaq 32(%rbp),%rdx
+ leaq (%r10),%r8
+ call asm_AES_encrypt
+
+ movl 240(%r15),%eax
+ movq %r14,%rbx
+
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor %xmm6,%xmm7
+ movdqa %xmm7,(%rax)
+
+ andq $-16,%r14
+ subq $128,%rsp
+ movdqa 32(%rbp),%xmm6
+
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+
+ subq $128,%r14
+ jc .Lxts_enc_short
+ jmp .Lxts_enc_loop
+
+.p2align 4
+.Lxts_enc_loop:
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ movdqa %xmm6,112(%rsp)
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ pxor %xmm14,%xmm6
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor 96(%rsp),%xmm1
+ movdqu %xmm6,80(%r13)
+ pxor 112(%rsp),%xmm4
+ movdqu %xmm1,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+
+ subq $128,%r14
+ jnc .Lxts_enc_loop
+
+.Lxts_enc_short:
+ addq $128,%r14
+ jz .Lxts_enc_done
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ cmpq $16,%r14
+ je .Lxts_enc_1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ cmpq $32,%r14
+ je .Lxts_enc_2
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ cmpq $48,%r14
+ je .Lxts_enc_3
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ cmpq $64,%r14
+ je .Lxts_enc_4
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ cmpq $80,%r14
+ je .Lxts_enc_5
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ cmpq $96,%r14
+ je .Lxts_enc_6
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqa %xmm6,112(%rsp)
+ leaq 112(%r12),%r12
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor 96(%rsp),%xmm1
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm1,96(%r13)
+ leaq 112(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.p2align 4
+.Lxts_enc_6:
+ pxor %xmm11,%xmm3
+ leaq 96(%r12),%r12
+ pxor %xmm12,%xmm4
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ movdqu %xmm6,80(%r13)
+ leaq 96(%r13),%r13
+
+ movdqa 96(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.p2align 4
+.Lxts_enc_5:
+ pxor %xmm10,%xmm2
+ leaq 80(%r12),%r12
+ pxor %xmm11,%xmm3
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ movdqu %xmm2,64(%r13)
+ leaq 80(%r13),%r13
+
+ movdqa 80(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.p2align 4
+.Lxts_enc_4:
+ pxor %xmm9,%xmm1
+ leaq 64(%r12),%r12
+ pxor %xmm10,%xmm2
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ movdqu %xmm5,48(%r13)
+ leaq 64(%r13),%r13
+
+ movdqa 64(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.p2align 4
+.Lxts_enc_3:
+ pxor %xmm8,%xmm0
+ leaq 48(%r12),%r12
+ pxor %xmm9,%xmm1
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm3,32(%r13)
+ leaq 48(%r13),%r13
+
+ movdqa 48(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.p2align 4
+.Lxts_enc_2:
+ pxor %xmm7,%xmm15
+ leaq 32(%r12),%r12
+ pxor %xmm8,%xmm0
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ leaq 32(%r13),%r13
+
+ movdqa 32(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.p2align 4
+.Lxts_enc_1:
+ pxor %xmm15,%xmm7
+ leaq 16(%r12),%r12
+ movdqa %xmm7,32(%rbp)
+ leaq 32(%rbp),%rcx
+ leaq 32(%rbp),%rdx
+ leaq (%r15),%r8
+ call asm_AES_encrypt
+ pxor 32(%rbp),%xmm15
+
+
+
+
+
+ movdqu %xmm15,0(%r13)
+ leaq 16(%r13),%r13
+
+ movdqa 16(%rsp),%xmm6
+
+.Lxts_enc_done:
+ andl $15,%ebx
+ jz .Lxts_enc_ret
+ movq %r13,%rdx
+
+.Lxts_enc_steal:
+ movzbl (%r12),%eax
+ movzbl -16(%rdx),%ecx
+ leaq 1(%r12),%r12
+ movb %al,-16(%rdx)
+ movb %cl,0(%rdx)
+ leaq 1(%rdx),%rdx
+ subl $1,%ebx
+ jnz .Lxts_enc_steal
+
+ movdqu -16(%r13),%xmm15
+ leaq 32(%rbp),%rcx
+ pxor %xmm6,%xmm15
+ leaq 32(%rbp),%rdx
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%r8
+ call asm_AES_encrypt
+ pxor 32(%rbp),%xmm6
+ movdqu %xmm6,-16(%r13)
+
+.Lxts_enc_ret:
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+.Lxts_enc_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lxts_enc_bzero
+
+ leaq (%rbp),%rsp
+ movaps 64(%rbp),%xmm6
+ movaps 80(%rbp),%xmm7
+ movaps 96(%rbp),%xmm8
+ movaps 112(%rbp),%xmm9
+ movaps 128(%rbp),%xmm10
+ movaps 144(%rbp),%xmm11
+ movaps 160(%rbp),%xmm12
+ movaps 176(%rbp),%xmm13
+ movaps 192(%rbp),%xmm14
+ movaps 208(%rbp),%xmm15
+ leaq 160(%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+.Lxts_enc_epilogue:
+ retq
+
+
+.globl bsaes_xts_decrypt
+.def bsaes_xts_decrypt; .scl 2; .type 32; .endef
+.p2align 4
+bsaes_xts_decrypt:
+ movq %rsp,%rax
+.Lxts_dec_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq 160(%rsp),%r10
+ movq 168(%rsp),%r11
+ leaq -160(%rsp),%rsp
+ movaps %xmm6,64(%rsp)
+ movaps %xmm7,80(%rsp)
+ movaps %xmm8,96(%rsp)
+ movaps %xmm9,112(%rsp)
+ movaps %xmm10,128(%rsp)
+ movaps %xmm11,144(%rsp)
+ movaps %xmm12,160(%rsp)
+ movaps %xmm13,176(%rsp)
+ movaps %xmm14,192(%rsp)
+ movaps %xmm15,208(%rsp)
+.Lxts_dec_body:
+ movq %rsp,%rbp
+ movq %rcx,%r12
+ movq %rdx,%r13
+ movq %r8,%r14
+ movq %r9,%r15
+
+ leaq (%r11),%rcx
+ leaq 32(%rbp),%rdx
+ leaq (%r10),%r8
+ call asm_AES_encrypt
+
+ movl 240(%r15),%eax
+ movq %r14,%rbx
+
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor (%rsp),%xmm7
+ movdqa %xmm6,(%rax)
+ movdqa %xmm7,(%rsp)
+
+ xorl %eax,%eax
+ andq $-16,%r14
+ testl $15,%ebx
+ setnz %al
+ shlq $4,%rax
+ subq %rax,%r14
+
+ subq $128,%rsp
+ movdqa 32(%rbp),%xmm6
+
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+
+ subq $128,%r14
+ jc .Lxts_dec_short
+ jmp .Lxts_dec_loop
+
+.p2align 4
+.Lxts_dec_loop:
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ movdqa %xmm6,112(%rsp)
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ pxor %xmm14,%xmm6
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ pxor 96(%rsp),%xmm2
+ movdqu %xmm6,80(%r13)
+ pxor 112(%rsp),%xmm4
+ movdqu %xmm2,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+
+ subq $128,%r14
+ jnc .Lxts_dec_loop
+
+.Lxts_dec_short:
+ addq $128,%r14
+ jz .Lxts_dec_done
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ cmpq $16,%r14
+ je .Lxts_dec_1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ cmpq $32,%r14
+ je .Lxts_dec_2
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ cmpq $48,%r14
+ je .Lxts_dec_3
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ cmpq $64,%r14
+ je .Lxts_dec_4
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ cmpq $80,%r14
+ je .Lxts_dec_5
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ cmpq $96,%r14
+ je .Lxts_dec_6
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqa %xmm6,112(%rsp)
+ leaq 112(%r12),%r12
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ pxor 96(%rsp),%xmm2
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ leaq 112(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.p2align 4
+.Lxts_dec_6:
+ pxor %xmm11,%xmm3
+ leaq 96(%r12),%r12
+ pxor %xmm12,%xmm4
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ leaq 96(%r13),%r13
+
+ movdqa 96(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.p2align 4
+.Lxts_dec_5:
+ pxor %xmm10,%xmm2
+ leaq 80(%r12),%r12
+ pxor %xmm11,%xmm3
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ leaq 80(%r13),%r13
+
+ movdqa 80(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.p2align 4
+.Lxts_dec_4:
+ pxor %xmm9,%xmm1
+ leaq 64(%r12),%r12
+ pxor %xmm10,%xmm2
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ leaq 64(%r13),%r13
+
+ movdqa 64(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.p2align 4
+.Lxts_dec_3:
+ pxor %xmm8,%xmm0
+ leaq 48(%r12),%r12
+ pxor %xmm9,%xmm1
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ leaq 48(%r13),%r13
+
+ movdqa 48(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.p2align 4
+.Lxts_dec_2:
+ pxor %xmm7,%xmm15
+ leaq 32(%r12),%r12
+ pxor %xmm8,%xmm0
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ leaq 32(%r13),%r13
+
+ movdqa 32(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.p2align 4
+.Lxts_dec_1:
+ pxor %xmm15,%xmm7
+ leaq 16(%r12),%r12
+ movdqa %xmm7,32(%rbp)
+ leaq 32(%rbp),%rcx
+ leaq 32(%rbp),%rdx
+ leaq (%r15),%r8
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm15
+
+
+
+
+
+ movdqu %xmm15,0(%r13)
+ leaq 16(%r13),%r13
+
+ movdqa 16(%rsp),%xmm6
+
+.Lxts_dec_done:
+ andl $15,%ebx
+ jz .Lxts_dec_ret
+
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ movdqa %xmm6,%xmm5
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ movdqu (%r12),%xmm15
+ pxor %xmm13,%xmm6
+
+ leaq 32(%rbp),%rcx
+ pxor %xmm6,%xmm15
+ leaq 32(%rbp),%rdx
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%r8
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm6
+ movq %r13,%rdx
+ movdqu %xmm6,(%r13)
+
+.Lxts_dec_steal:
+ movzbl 16(%r12),%eax
+ movzbl (%rdx),%ecx
+ leaq 1(%r12),%r12
+ movb %al,(%rdx)
+ movb %cl,16(%rdx)
+ leaq 1(%rdx),%rdx
+ subl $1,%ebx
+ jnz .Lxts_dec_steal
+
+ movdqu (%r13),%xmm15
+ leaq 32(%rbp),%rcx
+ pxor %xmm5,%xmm15
+ leaq 32(%rbp),%rdx
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%r8
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm5
+ movdqu %xmm5,(%r13)
+
+.Lxts_dec_ret:
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+.Lxts_dec_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lxts_dec_bzero
+
+ leaq (%rbp),%rsp
+ movaps 64(%rbp),%xmm6
+ movaps 80(%rbp),%xmm7
+ movaps 96(%rbp),%xmm8
+ movaps 112(%rbp),%xmm9
+ movaps 128(%rbp),%xmm10
+ movaps 144(%rbp),%xmm11
+ movaps 160(%rbp),%xmm12
+ movaps 176(%rbp),%xmm13
+ movaps 192(%rbp),%xmm14
+ movaps 208(%rbp),%xmm15
+ leaq 160(%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+.Lxts_dec_epilogue:
+ retq
+
+
+.p2align 6
+_bsaes_const:
+.LM0ISR:
+.quad 0x0a0e0206070b0f03, 0x0004080c0d010509
+.LISRM0:
+.quad 0x01040b0e0205080f, 0x0306090c00070a0d
+.LISR:
+.quad 0x0504070602010003, 0x0f0e0d0c080b0a09
+.LBS0:
+.quad 0x5555555555555555, 0x5555555555555555
+.LBS1:
+.quad 0x3333333333333333, 0x3333333333333333
+.LBS2:
+.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+.LSR:
+.quad 0x0504070600030201, 0x0f0e0d0c0a09080b
+.LSRM0:
+.quad 0x0304090e00050a0f, 0x01060b0c0207080d
+.LM0SR:
+.quad 0x0a0e02060f03070b, 0x0004080c05090d01
+.LSWPUP:
+.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908
+.LSWPUPM0SR:
+.quad 0x0a0d02060c03070b, 0x0004080f05090e01
+.LADD1:
+.quad 0x0000000000000000, 0x0000000100000000
+.LADD2:
+.quad 0x0000000000000000, 0x0000000200000000
+.LADD3:
+.quad 0x0000000000000000, 0x0000000300000000
+.LADD4:
+.quad 0x0000000000000000, 0x0000000400000000
+.LADD5:
+.quad 0x0000000000000000, 0x0000000500000000
+.LADD6:
+.quad 0x0000000000000000, 0x0000000600000000
+.LADD7:
+.quad 0x0000000000000000, 0x0000000700000000
+.LADD8:
+.quad 0x0000000000000000, 0x0000000800000000
+.Lxts_magic:
+.long 0x87,0,1,0
+.Lmasks:
+.quad 0x0101010101010101, 0x0101010101010101
+.quad 0x0202020202020202, 0x0202020202020202
+.quad 0x0404040404040404, 0x0404040404040404
+.quad 0x0808080808080808, 0x0808080808080808
+.LM0:
+.quad 0x02060a0e03070b0f, 0x0004080c0105090d
+.L63:
+.quad 0x6363636363636363, 0x6363636363636363
+.byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44,32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32,65,110,100,121,32,80,111,108,121,97,107,111,118,0
+.p2align 6
+
+
+.def se_handler; .scl 3; .type 32; .endef
+.p2align 4
+se_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+
+ movq 8(%r9),%rsi
+ movq 56(%r9),%r11
+
+ movl 0(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jb .Lin_prologue
+
+ movq 152(%r8),%rax
+
+ movl 4(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jae .Lin_prologue
+
+ movq 160(%r8),%rax
+
+ leaq 64(%rax),%rsi
+ leaq 512(%r8),%rdi
+ movl $20,%ecx
+.long 0xa548f3fc
+ leaq 160(%rax),%rax
+
+ movq 112(%rax),%rbp
+ movq 104(%rax),%rbx
+ movq 96(%rax),%r12
+ movq 88(%rax),%r13
+ movq 80(%rax),%r14
+ movq 72(%rax),%r15
+ leaq 120(%rax),%rax
+ movq %rbx,144(%r8)
+ movq %rbp,160(%r8)
+ movq %r12,216(%r8)
+ movq %r13,224(%r8)
+ movq %r14,232(%r8)
+ movq %r15,240(%r8)
+
+.Lin_prologue:
+ movq %rax,152(%r8)
+
+ movq 40(%r9),%rdi
+ movq %r8,%rsi
+ movl $154,%ecx
+.long 0xa548f3fc
+
+ movq %r9,%rsi
+ xorq %rcx,%rcx
+ movq 8(%rsi),%rdx
+ movq 0(%rsi),%r8
+ movq 16(%rsi),%r9
+ movq 40(%rsi),%r10
+ leaq 56(%rsi),%r11
+ leaq 24(%rsi),%r12
+ movq %r10,32(%rsp)
+ movq %r11,40(%rsp)
+ movq %r12,48(%rsp)
+ movq %rcx,56(%rsp)
+ call *__imp_RtlVirtualUnwind(%rip)
+
+ movl $1,%eax
+ addq $64,%rsp
+ popfq
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ popq %rdi
+ popq %rsi
+ retq
+
+
+.section .pdata
+.p2align 2
+.rva .Lcbc_dec_prologue
+.rva .Lcbc_dec_epilogue
+.rva .Lcbc_dec_info
+
+.rva .Lctr_enc_prologue
+.rva .Lctr_enc_epilogue
+.rva .Lctr_enc_info
+
+.rva .Lxts_enc_prologue
+.rva .Lxts_enc_epilogue
+.rva .Lxts_enc_info
+
+.rva .Lxts_dec_prologue
+.rva .Lxts_dec_epilogue
+.rva .Lxts_dec_info
+
+.section .xdata
+.p2align 3
+.Lcbc_dec_info:
+.byte 9,0,0,0
+.rva se_handler
+.rva .Lcbc_dec_body,.Lcbc_dec_epilogue
+.Lctr_enc_info:
+.byte 9,0,0,0
+.rva se_handler
+.rva .Lctr_enc_body,.Lctr_enc_epilogue
+.Lxts_enc_info:
+.byte 9,0,0,0
+.rva se_handler
+.rva .Lxts_enc_body,.Lxts_enc_epilogue
+.Lxts_dec_info:
+.byte 9,0,0,0
+.rva se_handler
+.rva .Lxts_dec_body,.Lxts_dec_epilogue
diff --git a/ext/libressl/crypto/aes/vpaes-elf-x86_64.S b/ext/libressl/crypto/aes/vpaes-elf-x86_64.S
new file mode 100644
index 0000000..1e1a6e8
--- /dev/null
+++ b/ext/libressl/crypto/aes/vpaes-elf-x86_64.S
@@ -0,0 +1,832 @@
+#include "x86_arch.h"
+.text
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.type _vpaes_encrypt_core,@function
+.align 16
+_vpaes_encrypt_core:
+ movq %rdx,%r9
+ movq $16,%r11
+ movl 240(%rdx),%eax
+ movdqa %xmm9,%xmm1
+ movdqa .Lk_ipt(%rip),%xmm2
+ pandn %xmm0,%xmm1
+ movdqu (%r9),%xmm5
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+.byte 102,15,56,0,208
+ movdqa .Lk_ipt+16(%rip),%xmm0
+.byte 102,15,56,0,193
+ pxor %xmm5,%xmm2
+ pxor %xmm2,%xmm0
+ addq $16,%r9
+ leaq .Lk_mc_backward(%rip),%r10
+ jmp .Lenc_entry
+
+.align 16
+.Lenc_loop:
+
+ movdqa %xmm13,%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm5,%xmm4
+ movdqa %xmm12,%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ movdqa %xmm15,%xmm5
+.byte 102,15,56,0,234
+ movdqa -64(%r11,%r10,1),%xmm1
+ movdqa %xmm14,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm5,%xmm2
+ movdqa (%r11,%r10,1),%xmm4
+ movdqa %xmm0,%xmm3
+.byte 102,15,56,0,193
+ addq $16,%r9
+ pxor %xmm2,%xmm0
+.byte 102,15,56,0,220
+ addq $16,%r11
+ pxor %xmm0,%xmm3
+.byte 102,15,56,0,193
+ andq $48,%r11
+ pxor %xmm3,%xmm0
+ subq $1,%rax
+
+.Lenc_entry:
+
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa %xmm11,%xmm5
+.byte 102,15,56,0,232
+ pxor %xmm1,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm5,%xmm3
+ movdqa %xmm10,%xmm4
+.byte 102,15,56,0,224
+ pxor %xmm5,%xmm4
+ movdqa %xmm10,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm0,%xmm2
+ movdqa %xmm10,%xmm3
+ movdqu (%r9),%xmm5
+.byte 102,15,56,0,220
+ pxor %xmm1,%xmm3
+ jnz .Lenc_loop
+
+
+ movdqa -96(%r10),%xmm4
+ movdqa -80(%r10),%xmm0
+.byte 102,15,56,0,226
+ pxor %xmm5,%xmm4
+.byte 102,15,56,0,195
+ movdqa 64(%r11,%r10,1),%xmm1
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,193
+ retq
+.size _vpaes_encrypt_core,.-_vpaes_encrypt_core
+
+
+
+
+
+
+.type _vpaes_decrypt_core,@function
+.align 16
+_vpaes_decrypt_core:
+ movq %rdx,%r9
+ movl 240(%rdx),%eax
+ movdqa %xmm9,%xmm1
+ movdqa .Lk_dipt(%rip),%xmm2
+ pandn %xmm0,%xmm1
+ movq %rax,%r11
+ psrld $4,%xmm1
+ movdqu (%r9),%xmm5
+ shlq $4,%r11
+ pand %xmm9,%xmm0
+.byte 102,15,56,0,208
+ movdqa .Lk_dipt+16(%rip),%xmm0
+ xorq $48,%r11
+ leaq .Lk_dsbd(%rip),%r10
+.byte 102,15,56,0,193
+ andq $48,%r11
+ pxor %xmm5,%xmm2
+ movdqa .Lk_mc_forward+48(%rip),%xmm5
+ pxor %xmm2,%xmm0
+ addq $16,%r9
+ addq %r10,%r11
+ jmp .Ldec_entry
+
+.align 16
+.Ldec_loop:
+
+
+
+ movdqa -32(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa -16(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ addq $16,%r9
+
+.byte 102,15,56,0,197
+ movdqa 0(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 16(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ subq $1,%rax
+
+.byte 102,15,56,0,197
+ movdqa 32(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 48(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+
+.byte 102,15,56,0,197
+ movdqa 64(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 80(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+
+.byte 102,15,58,15,237,12
+
+.Ldec_entry:
+
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa %xmm11,%xmm2
+.byte 102,15,56,0,208
+ pxor %xmm1,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+ movdqa %xmm10,%xmm4
+.byte 102,15,56,0,224
+ pxor %xmm2,%xmm4
+ movdqa %xmm10,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm0,%xmm2
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,220
+ pxor %xmm1,%xmm3
+ movdqu (%r9),%xmm0
+ jnz .Ldec_loop
+
+
+ movdqa 96(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 112(%r10),%xmm0
+ movdqa -352(%r11),%xmm2
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,194
+ retq
+.size _vpaes_decrypt_core,.-_vpaes_decrypt_core
+
+
+
+
+
+
+.type _vpaes_schedule_core,@function
+.align 16
+_vpaes_schedule_core:
+
+
+
+
+
+ call _vpaes_preheat
+ movdqa .Lk_rcon(%rip),%xmm8
+ movdqu (%rdi),%xmm0
+
+
+ movdqa %xmm0,%xmm3
+ leaq .Lk_ipt(%rip),%r11
+ call _vpaes_schedule_transform
+ movdqa %xmm0,%xmm7
+
+ leaq .Lk_sr(%rip),%r10
+ testq %rcx,%rcx
+ jnz .Lschedule_am_decrypting
+
+
+ movdqu %xmm0,(%rdx)
+ jmp .Lschedule_go
+
+.Lschedule_am_decrypting:
+
+ movdqa (%r8,%r10,1),%xmm1
+.byte 102,15,56,0,217
+ movdqu %xmm3,(%rdx)
+ xorq $48,%r8
+
+.Lschedule_go:
+ cmpl $192,%esi
+ ja .Lschedule_256
+ je .Lschedule_192
+
+
+
+
+
+
+
+
+
+
+.Lschedule_128:
+ movl $10,%esi
+
+.Loop_schedule_128:
+ call _vpaes_schedule_round
+ decq %rsi
+ jz .Lschedule_mangle_last
+ call _vpaes_schedule_mangle
+ jmp .Loop_schedule_128
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.align 16
+.Lschedule_192:
+ movdqu 8(%rdi),%xmm0
+ call _vpaes_schedule_transform
+ movdqa %xmm0,%xmm6
+ pxor %xmm4,%xmm4
+ movhlps %xmm4,%xmm6
+ movl $4,%esi
+
+.Loop_schedule_192:
+ call _vpaes_schedule_round
+.byte 102,15,58,15,198,8
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_192_smear
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_round
+ decq %rsi
+ jz .Lschedule_mangle_last
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_192_smear
+ jmp .Loop_schedule_192
+
+
+
+
+
+
+
+
+
+
+
+.align 16
+.Lschedule_256:
+ movdqu 16(%rdi),%xmm0
+ call _vpaes_schedule_transform
+ movl $7,%esi
+
+.Loop_schedule_256:
+ call _vpaes_schedule_mangle
+ movdqa %xmm0,%xmm6
+
+
+ call _vpaes_schedule_round
+ decq %rsi
+ jz .Lschedule_mangle_last
+ call _vpaes_schedule_mangle
+
+
+ pshufd $255,%xmm0,%xmm0
+ movdqa %xmm7,%xmm5
+ movdqa %xmm6,%xmm7
+ call _vpaes_schedule_low_round
+ movdqa %xmm5,%xmm7
+
+ jmp .Loop_schedule_256
+
+
+
+
+
+
+
+
+
+
+
+
+.align 16
+.Lschedule_mangle_last:
+
+ leaq .Lk_deskew(%rip),%r11
+ testq %rcx,%rcx
+ jnz .Lschedule_mangle_last_dec
+
+
+ movdqa (%r8,%r10,1),%xmm1
+.byte 102,15,56,0,193
+ leaq .Lk_opt(%rip),%r11
+ addq $32,%rdx
+
+.Lschedule_mangle_last_dec:
+ addq $-16,%rdx
+ pxor .Lk_s63(%rip),%xmm0
+ call _vpaes_schedule_transform
+ movdqu %xmm0,(%rdx)
+
+
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
+ retq
+.size _vpaes_schedule_core,.-_vpaes_schedule_core
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.type _vpaes_schedule_192_smear,@function
+.align 16
+_vpaes_schedule_192_smear:
+ pshufd $128,%xmm6,%xmm0
+ pxor %xmm0,%xmm6
+ pshufd $254,%xmm7,%xmm0
+ pxor %xmm0,%xmm6
+ movdqa %xmm6,%xmm0
+ pxor %xmm1,%xmm1
+ movhlps %xmm1,%xmm6
+ retq
+.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.type _vpaes_schedule_round,@function
+.align 16
+_vpaes_schedule_round:
+
+ pxor %xmm1,%xmm1
+.byte 102,65,15,58,15,200,15
+.byte 102,69,15,58,15,192,15
+ pxor %xmm1,%xmm7
+
+
+ pshufd $255,%xmm0,%xmm0
+.byte 102,15,58,15,192,1
+
+
+
+
+_vpaes_schedule_low_round:
+
+ movdqa %xmm7,%xmm1
+ pslldq $4,%xmm7
+ pxor %xmm1,%xmm7
+ movdqa %xmm7,%xmm1
+ pslldq $8,%xmm7
+ pxor %xmm1,%xmm7
+ pxor .Lk_s63(%rip),%xmm7
+
+
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa %xmm11,%xmm2
+.byte 102,15,56,0,208
+ pxor %xmm1,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+ movdqa %xmm10,%xmm4
+.byte 102,15,56,0,224
+ pxor %xmm2,%xmm4
+ movdqa %xmm10,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm0,%xmm2
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,220
+ pxor %xmm1,%xmm3
+ movdqa %xmm13,%xmm4
+.byte 102,15,56,0,226
+ movdqa %xmm12,%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+
+
+ pxor %xmm7,%xmm0
+ movdqa %xmm0,%xmm7
+ retq
+.size _vpaes_schedule_round,.-_vpaes_schedule_round
+
+
+
+
+
+
+
+
+
+
+.type _vpaes_schedule_transform,@function
+.align 16
+_vpaes_schedule_transform:
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa (%r11),%xmm2
+.byte 102,15,56,0,208
+ movdqa 16(%r11),%xmm0
+.byte 102,15,56,0,193
+ pxor %xmm2,%xmm0
+ retq
+.size _vpaes_schedule_transform,.-_vpaes_schedule_transform
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.type _vpaes_schedule_mangle,@function
+.align 16
+_vpaes_schedule_mangle:
+ movdqa %xmm0,%xmm4
+ movdqa .Lk_mc_forward(%rip),%xmm5
+ testq %rcx,%rcx
+ jnz .Lschedule_mangle_dec
+
+
+ addq $16,%rdx
+ pxor .Lk_s63(%rip),%xmm4
+.byte 102,15,56,0,229
+ movdqa %xmm4,%xmm3
+.byte 102,15,56,0,229
+ pxor %xmm4,%xmm3
+.byte 102,15,56,0,229
+ pxor %xmm4,%xmm3
+
+ jmp .Lschedule_mangle_both
+.align 16
+.Lschedule_mangle_dec:
+
+ leaq .Lk_dksd(%rip),%r11
+ movdqa %xmm9,%xmm1
+ pandn %xmm4,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm4
+
+ movdqa 0(%r11),%xmm2
+.byte 102,15,56,0,212
+ movdqa 16(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+.byte 102,15,56,0,221
+
+ movdqa 32(%r11),%xmm2
+.byte 102,15,56,0,212
+ pxor %xmm3,%xmm2
+ movdqa 48(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+.byte 102,15,56,0,221
+
+ movdqa 64(%r11),%xmm2
+.byte 102,15,56,0,212
+ pxor %xmm3,%xmm2
+ movdqa 80(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+.byte 102,15,56,0,221
+
+ movdqa 96(%r11),%xmm2
+.byte 102,15,56,0,212
+ pxor %xmm3,%xmm2
+ movdqa 112(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+
+ addq $-16,%rdx
+
+.Lschedule_mangle_both:
+ movdqa (%r8,%r10,1),%xmm1
+.byte 102,15,56,0,217
+ addq $-16,%r8
+ andq $48,%r8
+ movdqu %xmm3,(%rdx)
+ retq
+.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle
+
+
+
+
+.globl vpaes_set_encrypt_key
+.type vpaes_set_encrypt_key,@function
+.align 16
+vpaes_set_encrypt_key:
+ movl %esi,%eax
+ shrl $5,%eax
+ addl $5,%eax
+ movl %eax,240(%rdx)
+
+ movl $0,%ecx
+ movl $48,%r8d
+ call _vpaes_schedule_core
+ xorl %eax,%eax
+ retq
+.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key
+
+.globl vpaes_set_decrypt_key
+.type vpaes_set_decrypt_key,@function
+.align 16
+vpaes_set_decrypt_key:
+ movl %esi,%eax
+ shrl $5,%eax
+ addl $5,%eax
+ movl %eax,240(%rdx)
+ shll $4,%eax
+ leaq 16(%rdx,%rax,1),%rdx
+
+ movl $1,%ecx
+ movl %esi,%r8d
+ shrl $1,%r8d
+ andl $32,%r8d
+ xorl $32,%r8d
+ call _vpaes_schedule_core
+ xorl %eax,%eax
+ retq
+.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key
+
+.globl vpaes_encrypt
+.type vpaes_encrypt,@function
+.align 16
+vpaes_encrypt:
+ movdqu (%rdi),%xmm0
+ call _vpaes_preheat
+ call _vpaes_encrypt_core
+ movdqu %xmm0,(%rsi)
+ retq
+.size vpaes_encrypt,.-vpaes_encrypt
+
+.globl vpaes_decrypt
+.type vpaes_decrypt,@function
+.align 16
+vpaes_decrypt:
+ movdqu (%rdi),%xmm0
+ call _vpaes_preheat
+ call _vpaes_decrypt_core
+ movdqu %xmm0,(%rsi)
+ retq
+.size vpaes_decrypt,.-vpaes_decrypt
+.globl vpaes_cbc_encrypt
+.type vpaes_cbc_encrypt,@function
+.align 16
+vpaes_cbc_encrypt:
+ xchgq %rcx,%rdx
+ subq $16,%rcx
+ jc .Lcbc_abort
+ movdqu (%r8),%xmm6
+ subq %rdi,%rsi
+ call _vpaes_preheat
+ cmpl $0,%r9d
+ je .Lcbc_dec_loop
+ jmp .Lcbc_enc_loop
+.align 16
+.Lcbc_enc_loop:
+ movdqu (%rdi),%xmm0
+ pxor %xmm6,%xmm0
+ call _vpaes_encrypt_core
+ movdqa %xmm0,%xmm6
+ movdqu %xmm0,(%rsi,%rdi,1)
+ leaq 16(%rdi),%rdi
+ subq $16,%rcx
+ jnc .Lcbc_enc_loop
+ jmp .Lcbc_done
+.align 16
+.Lcbc_dec_loop:
+ movdqu (%rdi),%xmm0
+ movdqa %xmm0,%xmm7
+ call _vpaes_decrypt_core
+ pxor %xmm6,%xmm0
+ movdqa %xmm7,%xmm6
+ movdqu %xmm0,(%rsi,%rdi,1)
+ leaq 16(%rdi),%rdi
+ subq $16,%rcx
+ jnc .Lcbc_dec_loop
+.Lcbc_done:
+ movdqu %xmm6,(%r8)
+.Lcbc_abort:
+ retq
+.size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt
+
+
+
+
+
+
+.type _vpaes_preheat,@function
+.align 16
+_vpaes_preheat:
+ leaq .Lk_s0F(%rip),%r10
+ movdqa -32(%r10),%xmm10
+ movdqa -16(%r10),%xmm11
+ movdqa 0(%r10),%xmm9
+ movdqa 48(%r10),%xmm13
+ movdqa 64(%r10),%xmm12
+ movdqa 80(%r10),%xmm15
+ movdqa 96(%r10),%xmm14
+ retq
+.size _vpaes_preheat,.-_vpaes_preheat
+
+
+
+
+
+.type _vpaes_consts,@object
+.align 64
+_vpaes_consts:
+.Lk_inv:
+.quad 0x0E05060F0D080180, 0x040703090A0B0C02
+.quad 0x01040A060F0B0780, 0x030D0E0C02050809
+
+.Lk_s0F:
+.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
+
+.Lk_ipt:
+.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808
+.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
+
+.Lk_sb1:
+.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
+.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
+.Lk_sb2:
+.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD
+.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A
+.Lk_sbo:
+.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878
+.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
+
+.Lk_mc_forward:
+.quad 0x0407060500030201, 0x0C0F0E0D080B0A09
+.quad 0x080B0A0904070605, 0x000302010C0F0E0D
+.quad 0x0C0F0E0D080B0A09, 0x0407060500030201
+.quad 0x000302010C0F0E0D, 0x080B0A0904070605
+
+.Lk_mc_backward:
+.quad 0x0605040702010003, 0x0E0D0C0F0A09080B
+.quad 0x020100030E0D0C0F, 0x0A09080B06050407
+.quad 0x0E0D0C0F0A09080B, 0x0605040702010003
+.quad 0x0A09080B06050407, 0x020100030E0D0C0F
+
+.Lk_sr:
+.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908
+.quad 0x030E09040F0A0500, 0x0B06010C07020D08
+.quad 0x0F060D040B020900, 0x070E050C030A0108
+.quad 0x0B0E0104070A0D00, 0x0306090C0F020508
+
+.Lk_rcon:
+.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
+
+.Lk_s63:
+.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B
+
+.Lk_opt:
+.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808
+.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
+
+.Lk_deskew:
+.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
+.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
+
+
+
+
+
+.Lk_dksd:
+.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
+.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E
+.Lk_dksb:
+.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99
+.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
+.Lk_dkse:
+.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086
+.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487
+.Lk_dks9:
+.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC
+.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE
+
+
+
+
+
+.Lk_dipt:
+.quad 0x0F505B040B545F00, 0x154A411E114E451A
+.quad 0x86E383E660056500, 0x12771772F491F194
+
+.Lk_dsb9:
+.quad 0x851C03539A86D600, 0xCAD51F504F994CC9
+.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565
+.Lk_dsbd:
+.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
+.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
+.Lk_dsbb:
+.quad 0xD022649296B44200, 0x602646F6B0F2D404
+.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
+.Lk_dsbe:
+.quad 0x46F2929626D4D000, 0x2242600464B4F6B0
+.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32
+.Lk_dsbo:
+.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
+.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
+.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
+.align 64
+.size _vpaes_consts,.-_vpaes_consts
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/ext/libressl/crypto/aes/vpaes-macosx-x86_64.S b/ext/libressl/crypto/aes/vpaes-macosx-x86_64.S
new file mode 100644
index 0000000..0a892a9
--- /dev/null
+++ b/ext/libressl/crypto/aes/vpaes-macosx-x86_64.S
@@ -0,0 +1,829 @@
+#include "x86_arch.h"
+.text
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.p2align 4
+_vpaes_encrypt_core:
+ movq %rdx,%r9
+ movq $16,%r11
+ movl 240(%rdx),%eax
+ movdqa %xmm9,%xmm1
+ movdqa L$k_ipt(%rip),%xmm2
+ pandn %xmm0,%xmm1
+ movdqu (%r9),%xmm5
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+.byte 102,15,56,0,208
+ movdqa L$k_ipt+16(%rip),%xmm0
+.byte 102,15,56,0,193
+ pxor %xmm5,%xmm2
+ pxor %xmm2,%xmm0
+ addq $16,%r9
+ leaq L$k_mc_backward(%rip),%r10
+ jmp L$enc_entry
+
+.p2align 4
+L$enc_loop:
+
+ movdqa %xmm13,%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm5,%xmm4
+ movdqa %xmm12,%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ movdqa %xmm15,%xmm5
+.byte 102,15,56,0,234
+ movdqa -64(%r11,%r10,1),%xmm1
+ movdqa %xmm14,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm5,%xmm2
+ movdqa (%r11,%r10,1),%xmm4
+ movdqa %xmm0,%xmm3
+.byte 102,15,56,0,193
+ addq $16,%r9
+ pxor %xmm2,%xmm0
+.byte 102,15,56,0,220
+ addq $16,%r11
+ pxor %xmm0,%xmm3
+.byte 102,15,56,0,193
+ andq $48,%r11
+ pxor %xmm3,%xmm0
+ subq $1,%rax
+
+L$enc_entry:
+
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa %xmm11,%xmm5
+.byte 102,15,56,0,232
+ pxor %xmm1,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm5,%xmm3
+ movdqa %xmm10,%xmm4
+.byte 102,15,56,0,224
+ pxor %xmm5,%xmm4
+ movdqa %xmm10,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm0,%xmm2
+ movdqa %xmm10,%xmm3
+ movdqu (%r9),%xmm5
+.byte 102,15,56,0,220
+ pxor %xmm1,%xmm3
+ jnz L$enc_loop
+
+
+ movdqa -96(%r10),%xmm4
+ movdqa -80(%r10),%xmm0
+.byte 102,15,56,0,226
+ pxor %xmm5,%xmm4
+.byte 102,15,56,0,195
+ movdqa 64(%r11,%r10,1),%xmm1
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,193
+ retq
+
+
+
+
+
+
+
+
+.p2align 4
+_vpaes_decrypt_core:
+ movq %rdx,%r9
+ movl 240(%rdx),%eax
+ movdqa %xmm9,%xmm1
+ movdqa L$k_dipt(%rip),%xmm2
+ pandn %xmm0,%xmm1
+ movq %rax,%r11
+ psrld $4,%xmm1
+ movdqu (%r9),%xmm5
+ shlq $4,%r11
+ pand %xmm9,%xmm0
+.byte 102,15,56,0,208
+ movdqa L$k_dipt+16(%rip),%xmm0
+ xorq $48,%r11
+ leaq L$k_dsbd(%rip),%r10
+.byte 102,15,56,0,193
+ andq $48,%r11
+ pxor %xmm5,%xmm2
+ movdqa L$k_mc_forward+48(%rip),%xmm5
+ pxor %xmm2,%xmm0
+ addq $16,%r9
+ addq %r10,%r11
+ jmp L$dec_entry
+
+.p2align 4
+L$dec_loop:
+
+
+
+ movdqa -32(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa -16(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ addq $16,%r9
+
+.byte 102,15,56,0,197
+ movdqa 0(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 16(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ subq $1,%rax
+
+.byte 102,15,56,0,197
+ movdqa 32(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 48(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+
+.byte 102,15,56,0,197
+ movdqa 64(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 80(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+
+.byte 102,15,58,15,237,12
+
+L$dec_entry:
+
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa %xmm11,%xmm2
+.byte 102,15,56,0,208
+ pxor %xmm1,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+ movdqa %xmm10,%xmm4
+.byte 102,15,56,0,224
+ pxor %xmm2,%xmm4
+ movdqa %xmm10,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm0,%xmm2
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,220
+ pxor %xmm1,%xmm3
+ movdqu (%r9),%xmm0
+ jnz L$dec_loop
+
+
+ movdqa 96(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 112(%r10),%xmm0
+ movdqa -352(%r11),%xmm2
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,194
+ retq
+
+
+
+
+
+
+
+
+.p2align 4
+_vpaes_schedule_core:
+
+
+
+
+
+ call _vpaes_preheat
+ movdqa L$k_rcon(%rip),%xmm8
+ movdqu (%rdi),%xmm0
+
+
+ movdqa %xmm0,%xmm3
+ leaq L$k_ipt(%rip),%r11
+ call _vpaes_schedule_transform
+ movdqa %xmm0,%xmm7
+
+ leaq L$k_sr(%rip),%r10
+ testq %rcx,%rcx
+ jnz L$schedule_am_decrypting
+
+
+ movdqu %xmm0,(%rdx)
+ jmp L$schedule_go
+
+L$schedule_am_decrypting:
+
+ movdqa (%r8,%r10,1),%xmm1
+.byte 102,15,56,0,217
+ movdqu %xmm3,(%rdx)
+ xorq $48,%r8
+
+L$schedule_go:
+ cmpl $192,%esi
+ ja L$schedule_256
+ je L$schedule_192
+
+
+
+
+
+
+
+
+
+
+L$schedule_128:
+ movl $10,%esi
+
+L$oop_schedule_128:
+ call _vpaes_schedule_round
+ decq %rsi
+ jz L$schedule_mangle_last
+ call _vpaes_schedule_mangle
+ jmp L$oop_schedule_128
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.p2align 4
+L$schedule_192:
+ movdqu 8(%rdi),%xmm0
+ call _vpaes_schedule_transform
+ movdqa %xmm0,%xmm6
+ pxor %xmm4,%xmm4
+ movhlps %xmm4,%xmm6
+ movl $4,%esi
+
+L$oop_schedule_192:
+ call _vpaes_schedule_round
+.byte 102,15,58,15,198,8
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_192_smear
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_round
+ decq %rsi
+ jz L$schedule_mangle_last
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_192_smear
+ jmp L$oop_schedule_192
+
+
+
+
+
+
+
+
+
+
+
+.p2align 4
+L$schedule_256:
+ movdqu 16(%rdi),%xmm0
+ call _vpaes_schedule_transform
+ movl $7,%esi
+
+L$oop_schedule_256:
+ call _vpaes_schedule_mangle
+ movdqa %xmm0,%xmm6
+
+
+ call _vpaes_schedule_round
+ decq %rsi
+ jz L$schedule_mangle_last
+ call _vpaes_schedule_mangle
+
+
+ pshufd $255,%xmm0,%xmm0
+ movdqa %xmm7,%xmm5
+ movdqa %xmm6,%xmm7
+ call _vpaes_schedule_low_round
+ movdqa %xmm5,%xmm7
+
+ jmp L$oop_schedule_256
+
+
+
+
+
+
+
+
+
+
+
+
+.p2align 4
+L$schedule_mangle_last:
+
+ leaq L$k_deskew(%rip),%r11
+ testq %rcx,%rcx
+ jnz L$schedule_mangle_last_dec
+
+
+ movdqa (%r8,%r10,1),%xmm1
+.byte 102,15,56,0,193
+ leaq L$k_opt(%rip),%r11
+ addq $32,%rdx
+
+L$schedule_mangle_last_dec:
+ addq $-16,%rdx
+ pxor L$k_s63(%rip),%xmm0
+ call _vpaes_schedule_transform
+ movdqu %xmm0,(%rdx)
+
+
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
+ retq
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.p2align 4
+_vpaes_schedule_192_smear:
+ pshufd $128,%xmm6,%xmm0
+ pxor %xmm0,%xmm6
+ pshufd $254,%xmm7,%xmm0
+ pxor %xmm0,%xmm6
+ movdqa %xmm6,%xmm0
+ pxor %xmm1,%xmm1
+ movhlps %xmm1,%xmm6
+ retq
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.p2align 4
+_vpaes_schedule_round:
+
+ pxor %xmm1,%xmm1
+.byte 102,65,15,58,15,200,15
+.byte 102,69,15,58,15,192,15
+ pxor %xmm1,%xmm7
+
+
+ pshufd $255,%xmm0,%xmm0
+.byte 102,15,58,15,192,1
+
+
+
+
+_vpaes_schedule_low_round:
+
+ movdqa %xmm7,%xmm1
+ pslldq $4,%xmm7
+ pxor %xmm1,%xmm7
+ movdqa %xmm7,%xmm1
+ pslldq $8,%xmm7
+ pxor %xmm1,%xmm7
+ pxor L$k_s63(%rip),%xmm7
+
+
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa %xmm11,%xmm2
+.byte 102,15,56,0,208
+ pxor %xmm1,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+ movdqa %xmm10,%xmm4
+.byte 102,15,56,0,224
+ pxor %xmm2,%xmm4
+ movdqa %xmm10,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm0,%xmm2
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,220
+ pxor %xmm1,%xmm3
+ movdqa %xmm13,%xmm4
+.byte 102,15,56,0,226
+ movdqa %xmm12,%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+
+
+ pxor %xmm7,%xmm0
+ movdqa %xmm0,%xmm7
+ retq
+
+
+
+
+
+
+
+
+
+
+
+
+.p2align 4
+_vpaes_schedule_transform:
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa (%r11),%xmm2
+.byte 102,15,56,0,208
+ movdqa 16(%r11),%xmm0
+.byte 102,15,56,0,193
+ pxor %xmm2,%xmm0
+ retq
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.p2align 4
+_vpaes_schedule_mangle:
+ movdqa %xmm0,%xmm4
+ movdqa L$k_mc_forward(%rip),%xmm5
+ testq %rcx,%rcx
+ jnz L$schedule_mangle_dec
+
+
+ addq $16,%rdx
+ pxor L$k_s63(%rip),%xmm4
+.byte 102,15,56,0,229
+ movdqa %xmm4,%xmm3
+.byte 102,15,56,0,229
+ pxor %xmm4,%xmm3
+.byte 102,15,56,0,229
+ pxor %xmm4,%xmm3
+
+ jmp L$schedule_mangle_both
+.p2align 4
+L$schedule_mangle_dec:
+
+ leaq L$k_dksd(%rip),%r11
+ movdqa %xmm9,%xmm1
+ pandn %xmm4,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm4
+
+ movdqa 0(%r11),%xmm2
+.byte 102,15,56,0,212
+ movdqa 16(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+.byte 102,15,56,0,221
+
+ movdqa 32(%r11),%xmm2
+.byte 102,15,56,0,212
+ pxor %xmm3,%xmm2
+ movdqa 48(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+.byte 102,15,56,0,221
+
+ movdqa 64(%r11),%xmm2
+.byte 102,15,56,0,212
+ pxor %xmm3,%xmm2
+ movdqa 80(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+.byte 102,15,56,0,221
+
+ movdqa 96(%r11),%xmm2
+.byte 102,15,56,0,212
+ pxor %xmm3,%xmm2
+ movdqa 112(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+
+ addq $-16,%rdx
+
+L$schedule_mangle_both:
+ movdqa (%r8,%r10,1),%xmm1
+.byte 102,15,56,0,217
+ addq $-16,%r8
+ andq $48,%r8
+ movdqu %xmm3,(%rdx)
+ retq
+
+
+
+
+
+.globl _vpaes_set_encrypt_key
+
+.p2align 4
+_vpaes_set_encrypt_key:
+ movl %esi,%eax
+ shrl $5,%eax
+ addl $5,%eax
+ movl %eax,240(%rdx)
+
+ movl $0,%ecx
+ movl $48,%r8d
+ call _vpaes_schedule_core
+ xorl %eax,%eax
+ retq
+
+
+.globl _vpaes_set_decrypt_key
+
+.p2align 4
+_vpaes_set_decrypt_key:
+ movl %esi,%eax
+ shrl $5,%eax
+ addl $5,%eax
+ movl %eax,240(%rdx)
+ shll $4,%eax
+ leaq 16(%rdx,%rax,1),%rdx
+
+ movl $1,%ecx
+ movl %esi,%r8d
+ shrl $1,%r8d
+ andl $32,%r8d
+ xorl $32,%r8d
+ call _vpaes_schedule_core
+ xorl %eax,%eax
+ retq
+
+
+.globl _vpaes_encrypt
+
+.p2align 4
+_vpaes_encrypt:
+ movdqu (%rdi),%xmm0
+ call _vpaes_preheat
+ call _vpaes_encrypt_core
+ movdqu %xmm0,(%rsi)
+ retq
+
+
+.globl _vpaes_decrypt
+
+.p2align 4
+_vpaes_decrypt:
+ movdqu (%rdi),%xmm0
+ call _vpaes_preheat
+ call _vpaes_decrypt_core
+ movdqu %xmm0,(%rsi)
+ retq
+
+.globl _vpaes_cbc_encrypt
+
+.p2align 4
+_vpaes_cbc_encrypt:
+ xchgq %rcx,%rdx
+ subq $16,%rcx
+ jc L$cbc_abort
+ movdqu (%r8),%xmm6
+ subq %rdi,%rsi
+ call _vpaes_preheat
+ cmpl $0,%r9d
+ je L$cbc_dec_loop
+ jmp L$cbc_enc_loop
+.p2align 4
+L$cbc_enc_loop:
+ movdqu (%rdi),%xmm0
+ pxor %xmm6,%xmm0
+ call _vpaes_encrypt_core
+ movdqa %xmm0,%xmm6
+ movdqu %xmm0,(%rsi,%rdi,1)
+ leaq 16(%rdi),%rdi
+ subq $16,%rcx
+ jnc L$cbc_enc_loop
+ jmp L$cbc_done
+.p2align 4
+L$cbc_dec_loop:
+ movdqu (%rdi),%xmm0
+ movdqa %xmm0,%xmm7
+ call _vpaes_decrypt_core
+ pxor %xmm6,%xmm0
+ movdqa %xmm7,%xmm6
+ movdqu %xmm0,(%rsi,%rdi,1)
+ leaq 16(%rdi),%rdi
+ subq $16,%rcx
+ jnc L$cbc_dec_loop
+L$cbc_done:
+ movdqu %xmm6,(%r8)
+L$cbc_abort:
+ retq
+
+
+
+
+
+
+
+
+.p2align 4
+_vpaes_preheat:
+ leaq L$k_s0F(%rip),%r10
+ movdqa -32(%r10),%xmm10
+ movdqa -16(%r10),%xmm11
+ movdqa 0(%r10),%xmm9
+ movdqa 48(%r10),%xmm13
+ movdqa 64(%r10),%xmm12
+ movdqa 80(%r10),%xmm15
+ movdqa 96(%r10),%xmm14
+ retq
+
+
+
+
+
+
+
+.p2align 6
+_vpaes_consts:
+L$k_inv:
+.quad 0x0E05060F0D080180, 0x040703090A0B0C02
+.quad 0x01040A060F0B0780, 0x030D0E0C02050809
+
+L$k_s0F:
+.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
+
+L$k_ipt:
+.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808
+.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
+
+L$k_sb1:
+.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
+.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
+L$k_sb2:
+.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD
+.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A
+L$k_sbo:
+.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878
+.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
+
+L$k_mc_forward:
+.quad 0x0407060500030201, 0x0C0F0E0D080B0A09
+.quad 0x080B0A0904070605, 0x000302010C0F0E0D
+.quad 0x0C0F0E0D080B0A09, 0x0407060500030201
+.quad 0x000302010C0F0E0D, 0x080B0A0904070605
+
+L$k_mc_backward:
+.quad 0x0605040702010003, 0x0E0D0C0F0A09080B
+.quad 0x020100030E0D0C0F, 0x0A09080B06050407
+.quad 0x0E0D0C0F0A09080B, 0x0605040702010003
+.quad 0x0A09080B06050407, 0x020100030E0D0C0F
+
+L$k_sr:
+.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908
+.quad 0x030E09040F0A0500, 0x0B06010C07020D08
+.quad 0x0F060D040B020900, 0x070E050C030A0108
+.quad 0x0B0E0104070A0D00, 0x0306090C0F020508
+
+L$k_rcon:
+.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
+
+L$k_s63:
+.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B
+
+L$k_opt:
+.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808
+.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
+
+L$k_deskew:
+.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
+.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
+
+
+
+
+
+L$k_dksd:
+.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
+.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E
+L$k_dksb:
+.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99
+.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
+L$k_dkse:
+.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086
+.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487
+L$k_dks9:
+.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC
+.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE
+
+
+
+
+
+L$k_dipt:
+.quad 0x0F505B040B545F00, 0x154A411E114E451A
+.quad 0x86E383E660056500, 0x12771772F491F194
+
+L$k_dsb9:
+.quad 0x851C03539A86D600, 0xCAD51F504F994CC9
+.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565
+L$k_dsbd:
+.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
+.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
+L$k_dsbb:
+.quad 0xD022649296B44200, 0x602646F6B0F2D404
+.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
+L$k_dsbe:
+.quad 0x46F2929626D4D000, 0x2242600464B4F6B0
+.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32
+L$k_dsbo:
+.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
+.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
+.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
+.p2align 6
+
diff --git a/ext/libressl/crypto/aes/vpaes-masm-x86_64.S b/ext/libressl/crypto/aes/vpaes-masm-x86_64.S
new file mode 100644
index 0000000..e10d98d
--- /dev/null
+++ b/ext/libressl/crypto/aes/vpaes-masm-x86_64.S
@@ -0,0 +1,1213 @@
+; 1 "crypto/aes/vpaes-masm-x86_64.S.tmp"
+; 1 "<built-in>" 1
+; 1 "<built-in>" 3
+; 340 "<built-in>" 3
+; 1 "<command line>" 1
+; 1 "<built-in>" 2
+; 1 "crypto/aes/vpaes-masm-x86_64.S.tmp" 2
+OPTION DOTNAME
+
+; 1 "./crypto/x86_arch.h" 1
+
+
+; 16 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+; 40 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+; 3 "crypto/aes/vpaes-masm-x86_64.S.tmp" 2
+.text$ SEGMENT ALIGN(64) 'CODE'
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN 16
+_vpaes_encrypt_core PROC PRIVATE
+ mov r9,rdx
+ mov r11,16
+ mov eax,DWORD PTR[240+rdx]
+ movdqa xmm1,xmm9
+ movdqa xmm2,XMMWORD PTR[$L$k_ipt]
+ pandn xmm1,xmm0
+ movdqu xmm5,XMMWORD PTR[r9]
+ psrld xmm1,4
+ pand xmm0,xmm9
+DB 102,15,56,0,208
+ movdqa xmm0,XMMWORD PTR[(($L$k_ipt+16))]
+DB 102,15,56,0,193
+ pxor xmm2,xmm5
+ pxor xmm0,xmm2
+ add r9,16
+ lea r10,QWORD PTR[$L$k_mc_backward]
+ jmp $L$enc_entry
+
+ALIGN 16
+$L$enc_loop::
+
+ movdqa xmm4,xmm13
+DB 102,15,56,0,226
+ pxor xmm4,xmm5
+ movdqa xmm0,xmm12
+DB 102,15,56,0,195
+ pxor xmm0,xmm4
+ movdqa xmm5,xmm15
+DB 102,15,56,0,234
+ movdqa xmm1,XMMWORD PTR[((-64))+r10*1+r11]
+ movdqa xmm2,xmm14
+DB 102,15,56,0,211
+ pxor xmm2,xmm5
+ movdqa xmm4,XMMWORD PTR[r10*1+r11]
+ movdqa xmm3,xmm0
+DB 102,15,56,0,193
+ add r9,16
+ pxor xmm0,xmm2
+DB 102,15,56,0,220
+ add r11,16
+ pxor xmm3,xmm0
+DB 102,15,56,0,193
+ and r11,030h
+ pxor xmm0,xmm3
+ sub rax,1
+
+$L$enc_entry::
+
+ movdqa xmm1,xmm9
+ pandn xmm1,xmm0
+ psrld xmm1,4
+ pand xmm0,xmm9
+ movdqa xmm5,xmm11
+DB 102,15,56,0,232
+ pxor xmm0,xmm1
+ movdqa xmm3,xmm10
+DB 102,15,56,0,217
+ pxor xmm3,xmm5
+ movdqa xmm4,xmm10
+DB 102,15,56,0,224
+ pxor xmm4,xmm5
+ movdqa xmm2,xmm10
+DB 102,15,56,0,211
+ pxor xmm2,xmm0
+ movdqa xmm3,xmm10
+ movdqu xmm5,XMMWORD PTR[r9]
+DB 102,15,56,0,220
+ pxor xmm3,xmm1
+ jnz $L$enc_loop
+
+
+ movdqa xmm4,XMMWORD PTR[((-96))+r10]
+ movdqa xmm0,XMMWORD PTR[((-80))+r10]
+DB 102,15,56,0,226
+ pxor xmm4,xmm5
+DB 102,15,56,0,195
+ movdqa xmm1,XMMWORD PTR[64+r10*1+r11]
+ pxor xmm0,xmm4
+DB 102,15,56,0,193
+ DB 0F3h,0C3h ;repret
+_vpaes_encrypt_core ENDP
+
+
+
+
+
+
+
+ALIGN 16
+_vpaes_decrypt_core PROC PRIVATE
+ mov r9,rdx
+ mov eax,DWORD PTR[240+rdx]
+ movdqa xmm1,xmm9
+ movdqa xmm2,XMMWORD PTR[$L$k_dipt]
+ pandn xmm1,xmm0
+ mov r11,rax
+ psrld xmm1,4
+ movdqu xmm5,XMMWORD PTR[r9]
+ shl r11,4
+ pand xmm0,xmm9
+DB 102,15,56,0,208
+ movdqa xmm0,XMMWORD PTR[(($L$k_dipt+16))]
+ xor r11,030h
+ lea r10,QWORD PTR[$L$k_dsbd]
+DB 102,15,56,0,193
+ and r11,030h
+ pxor xmm2,xmm5
+ movdqa xmm5,XMMWORD PTR[(($L$k_mc_forward+48))]
+ pxor xmm0,xmm2
+ add r9,16
+ add r11,r10
+ jmp $L$dec_entry
+
+ALIGN 16
+$L$dec_loop::
+
+
+
+ movdqa xmm4,XMMWORD PTR[((-32))+r10]
+DB 102,15,56,0,226
+ pxor xmm4,xmm0
+ movdqa xmm0,XMMWORD PTR[((-16))+r10]
+DB 102,15,56,0,195
+ pxor xmm0,xmm4
+ add r9,16
+
+DB 102,15,56,0,197
+ movdqa xmm4,XMMWORD PTR[r10]
+DB 102,15,56,0,226
+ pxor xmm4,xmm0
+ movdqa xmm0,XMMWORD PTR[16+r10]
+DB 102,15,56,0,195
+ pxor xmm0,xmm4
+ sub rax,1
+
+DB 102,15,56,0,197
+ movdqa xmm4,XMMWORD PTR[32+r10]
+DB 102,15,56,0,226
+ pxor xmm4,xmm0
+ movdqa xmm0,XMMWORD PTR[48+r10]
+DB 102,15,56,0,195
+ pxor xmm0,xmm4
+
+DB 102,15,56,0,197
+ movdqa xmm4,XMMWORD PTR[64+r10]
+DB 102,15,56,0,226
+ pxor xmm4,xmm0
+ movdqa xmm0,XMMWORD PTR[80+r10]
+DB 102,15,56,0,195
+ pxor xmm0,xmm4
+
+DB 102,15,58,15,237,12
+
+$L$dec_entry::
+
+ movdqa xmm1,xmm9
+ pandn xmm1,xmm0
+ psrld xmm1,4
+ pand xmm0,xmm9
+ movdqa xmm2,xmm11
+DB 102,15,56,0,208
+ pxor xmm0,xmm1
+ movdqa xmm3,xmm10
+DB 102,15,56,0,217
+ pxor xmm3,xmm2
+ movdqa xmm4,xmm10
+DB 102,15,56,0,224
+ pxor xmm4,xmm2
+ movdqa xmm2,xmm10
+DB 102,15,56,0,211
+ pxor xmm2,xmm0
+ movdqa xmm3,xmm10
+DB 102,15,56,0,220
+ pxor xmm3,xmm1
+ movdqu xmm0,XMMWORD PTR[r9]
+ jnz $L$dec_loop
+
+
+ movdqa xmm4,XMMWORD PTR[96+r10]
+DB 102,15,56,0,226
+ pxor xmm4,xmm0
+ movdqa xmm0,XMMWORD PTR[112+r10]
+ movdqa xmm2,XMMWORD PTR[((-352))+r11]
+DB 102,15,56,0,195
+ pxor xmm0,xmm4
+DB 102,15,56,0,194
+ DB 0F3h,0C3h ;repret
+_vpaes_decrypt_core ENDP
+
+
+
+
+
+
+
+ALIGN 16
+_vpaes_schedule_core PROC PRIVATE
+
+
+
+
+
+ call _vpaes_preheat
+ movdqa xmm8,XMMWORD PTR[$L$k_rcon]
+ movdqu xmm0,XMMWORD PTR[rdi]
+
+
+ movdqa xmm3,xmm0
+ lea r11,QWORD PTR[$L$k_ipt]
+ call _vpaes_schedule_transform
+ movdqa xmm7,xmm0
+
+ lea r10,QWORD PTR[$L$k_sr]
+ test rcx,rcx
+ jnz $L$schedule_am_decrypting
+
+
+ movdqu XMMWORD PTR[rdx],xmm0
+ jmp $L$schedule_go
+
+$L$schedule_am_decrypting::
+
+ movdqa xmm1,XMMWORD PTR[r10*1+r8]
+DB 102,15,56,0,217
+ movdqu XMMWORD PTR[rdx],xmm3
+ xor r8,030h
+
+$L$schedule_go::
+ cmp esi,192
+ ja $L$schedule_256
+ je $L$schedule_192
+
+
+
+
+
+
+
+
+
+
+$L$schedule_128::
+ mov esi,10
+
+$L$oop_schedule_128::
+ call _vpaes_schedule_round
+ dec rsi
+ jz $L$schedule_mangle_last
+ call _vpaes_schedule_mangle
+ jmp $L$oop_schedule_128
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN 16
+$L$schedule_192::
+ movdqu xmm0,XMMWORD PTR[8+rdi]
+ call _vpaes_schedule_transform
+ movdqa xmm6,xmm0
+ pxor xmm4,xmm4
+ movhlps xmm6,xmm4
+ mov esi,4
+
+$L$oop_schedule_192::
+ call _vpaes_schedule_round
+DB 102,15,58,15,198,8
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_192_smear
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_round
+ dec rsi
+ jz $L$schedule_mangle_last
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_192_smear
+ jmp $L$oop_schedule_192
+
+
+
+
+
+
+
+
+
+
+
+ALIGN 16
+$L$schedule_256::
+ movdqu xmm0,XMMWORD PTR[16+rdi]
+ call _vpaes_schedule_transform
+ mov esi,7
+
+$L$oop_schedule_256::
+ call _vpaes_schedule_mangle
+ movdqa xmm6,xmm0
+
+
+ call _vpaes_schedule_round
+ dec rsi
+ jz $L$schedule_mangle_last
+ call _vpaes_schedule_mangle
+
+
+ pshufd xmm0,xmm0,0FFh
+ movdqa xmm5,xmm7
+ movdqa xmm7,xmm6
+ call _vpaes_schedule_low_round
+ movdqa xmm7,xmm5
+
+ jmp $L$oop_schedule_256
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN 16
+$L$schedule_mangle_last::
+
+ lea r11,QWORD PTR[$L$k_deskew]
+ test rcx,rcx
+ jnz $L$schedule_mangle_last_dec
+
+
+ movdqa xmm1,XMMWORD PTR[r10*1+r8]
+DB 102,15,56,0,193
+ lea r11,QWORD PTR[$L$k_opt]
+ add rdx,32
+
+$L$schedule_mangle_last_dec::
+ add rdx,-16
+ pxor xmm0,XMMWORD PTR[$L$k_s63]
+ call _vpaes_schedule_transform
+ movdqu XMMWORD PTR[rdx],xmm0
+
+
+ pxor xmm0,xmm0
+ pxor xmm1,xmm1
+ pxor xmm2,xmm2
+ pxor xmm3,xmm3
+ pxor xmm4,xmm4
+ pxor xmm5,xmm5
+ pxor xmm6,xmm6
+ pxor xmm7,xmm7
+ DB 0F3h,0C3h ;repret
+_vpaes_schedule_core ENDP
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN 16
+_vpaes_schedule_192_smear PROC PRIVATE
+ pshufd xmm0,xmm6,080h
+ pxor xmm6,xmm0
+ pshufd xmm0,xmm7,0FEh
+ pxor xmm6,xmm0
+ movdqa xmm0,xmm6
+ pxor xmm1,xmm1
+ movhlps xmm6,xmm1
+ DB 0F3h,0C3h ;repret
+_vpaes_schedule_192_smear ENDP
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN 16
+_vpaes_schedule_round PROC PRIVATE
+
+ pxor xmm1,xmm1
+DB 102,65,15,58,15,200,15
+DB 102,69,15,58,15,192,15
+ pxor xmm7,xmm1
+
+
+ pshufd xmm0,xmm0,0FFh
+DB 102,15,58,15,192,1
+
+
+
+
+_vpaes_schedule_low_round::
+
+ movdqa xmm1,xmm7
+ pslldq xmm7,4
+ pxor xmm7,xmm1
+ movdqa xmm1,xmm7
+ pslldq xmm7,8
+ pxor xmm7,xmm1
+ pxor xmm7,XMMWORD PTR[$L$k_s63]
+
+
+ movdqa xmm1,xmm9
+ pandn xmm1,xmm0
+ psrld xmm1,4
+ pand xmm0,xmm9
+ movdqa xmm2,xmm11
+DB 102,15,56,0,208
+ pxor xmm0,xmm1
+ movdqa xmm3,xmm10
+DB 102,15,56,0,217
+ pxor xmm3,xmm2
+ movdqa xmm4,xmm10
+DB 102,15,56,0,224
+ pxor xmm4,xmm2
+ movdqa xmm2,xmm10
+DB 102,15,56,0,211
+ pxor xmm2,xmm0
+ movdqa xmm3,xmm10
+DB 102,15,56,0,220
+ pxor xmm3,xmm1
+ movdqa xmm4,xmm13
+DB 102,15,56,0,226
+ movdqa xmm0,xmm12
+DB 102,15,56,0,195
+ pxor xmm0,xmm4
+
+
+ pxor xmm0,xmm7
+ movdqa xmm7,xmm0
+ DB 0F3h,0C3h ;repret
+_vpaes_schedule_round ENDP
+
+
+
+
+
+
+
+
+
+
+
+ALIGN 16
+_vpaes_schedule_transform PROC PRIVATE
+ movdqa xmm1,xmm9
+ pandn xmm1,xmm0
+ psrld xmm1,4
+ pand xmm0,xmm9
+ movdqa xmm2,XMMWORD PTR[r11]
+DB 102,15,56,0,208
+ movdqa xmm0,XMMWORD PTR[16+r11]
+DB 102,15,56,0,193
+ pxor xmm0,xmm2
+ DB 0F3h,0C3h ;repret
+_vpaes_schedule_transform ENDP
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN 16
+_vpaes_schedule_mangle PROC PRIVATE
+ movdqa xmm4,xmm0
+ movdqa xmm5,XMMWORD PTR[$L$k_mc_forward]
+ test rcx,rcx
+ jnz $L$schedule_mangle_dec
+
+
+ add rdx,16
+ pxor xmm4,XMMWORD PTR[$L$k_s63]
+DB 102,15,56,0,229
+ movdqa xmm3,xmm4
+DB 102,15,56,0,229
+ pxor xmm3,xmm4
+DB 102,15,56,0,229
+ pxor xmm3,xmm4
+
+ jmp $L$schedule_mangle_both
+ALIGN 16
+$L$schedule_mangle_dec::
+
+ lea r11,QWORD PTR[$L$k_dksd]
+ movdqa xmm1,xmm9
+ pandn xmm1,xmm4
+ psrld xmm1,4
+ pand xmm4,xmm9
+
+ movdqa xmm2,XMMWORD PTR[r11]
+DB 102,15,56,0,212
+ movdqa xmm3,XMMWORD PTR[16+r11]
+DB 102,15,56,0,217
+ pxor xmm3,xmm2
+DB 102,15,56,0,221
+
+ movdqa xmm2,XMMWORD PTR[32+r11]
+DB 102,15,56,0,212
+ pxor xmm2,xmm3
+ movdqa xmm3,XMMWORD PTR[48+r11]
+DB 102,15,56,0,217
+ pxor xmm3,xmm2
+DB 102,15,56,0,221
+
+ movdqa xmm2,XMMWORD PTR[64+r11]
+DB 102,15,56,0,212
+ pxor xmm2,xmm3
+ movdqa xmm3,XMMWORD PTR[80+r11]
+DB 102,15,56,0,217
+ pxor xmm3,xmm2
+DB 102,15,56,0,221
+
+ movdqa xmm2,XMMWORD PTR[96+r11]
+DB 102,15,56,0,212
+ pxor xmm2,xmm3
+ movdqa xmm3,XMMWORD PTR[112+r11]
+DB 102,15,56,0,217
+ pxor xmm3,xmm2
+
+ add rdx,-16
+
+$L$schedule_mangle_both::
+ movdqa xmm1,XMMWORD PTR[r10*1+r8]
+DB 102,15,56,0,217
+ add r8,-16
+ and r8,030h
+ movdqu XMMWORD PTR[rdx],xmm3
+ DB 0F3h,0C3h ;repret
+_vpaes_schedule_mangle ENDP
+
+
+
+
+PUBLIC vpaes_set_encrypt_key
+
+ALIGN 16
+vpaes_set_encrypt_key PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_vpaes_set_encrypt_key::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ lea rsp,QWORD PTR[((-184))+rsp]
+ movaps XMMWORD PTR[16+rsp],xmm6
+ movaps XMMWORD PTR[32+rsp],xmm7
+ movaps XMMWORD PTR[48+rsp],xmm8
+ movaps XMMWORD PTR[64+rsp],xmm9
+ movaps XMMWORD PTR[80+rsp],xmm10
+ movaps XMMWORD PTR[96+rsp],xmm11
+ movaps XMMWORD PTR[112+rsp],xmm12
+ movaps XMMWORD PTR[128+rsp],xmm13
+ movaps XMMWORD PTR[144+rsp],xmm14
+ movaps XMMWORD PTR[160+rsp],xmm15
+$L$enc_key_body::
+ mov eax,esi
+ shr eax,5
+ add eax,5
+ mov DWORD PTR[240+rdx],eax
+
+ mov ecx,0
+ mov r8d,030h
+ call _vpaes_schedule_core
+ movaps xmm6,XMMWORD PTR[16+rsp]
+ movaps xmm7,XMMWORD PTR[32+rsp]
+ movaps xmm8,XMMWORD PTR[48+rsp]
+ movaps xmm9,XMMWORD PTR[64+rsp]
+ movaps xmm10,XMMWORD PTR[80+rsp]
+ movaps xmm11,XMMWORD PTR[96+rsp]
+ movaps xmm12,XMMWORD PTR[112+rsp]
+ movaps xmm13,XMMWORD PTR[128+rsp]
+ movaps xmm14,XMMWORD PTR[144+rsp]
+ movaps xmm15,XMMWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[184+rsp]
+$L$enc_key_epilogue::
+ xor eax,eax
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_vpaes_set_encrypt_key::
+vpaes_set_encrypt_key ENDP
+
+PUBLIC vpaes_set_decrypt_key
+
+ALIGN 16
+vpaes_set_decrypt_key PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_vpaes_set_decrypt_key::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ lea rsp,QWORD PTR[((-184))+rsp]
+ movaps XMMWORD PTR[16+rsp],xmm6
+ movaps XMMWORD PTR[32+rsp],xmm7
+ movaps XMMWORD PTR[48+rsp],xmm8
+ movaps XMMWORD PTR[64+rsp],xmm9
+ movaps XMMWORD PTR[80+rsp],xmm10
+ movaps XMMWORD PTR[96+rsp],xmm11
+ movaps XMMWORD PTR[112+rsp],xmm12
+ movaps XMMWORD PTR[128+rsp],xmm13
+ movaps XMMWORD PTR[144+rsp],xmm14
+ movaps XMMWORD PTR[160+rsp],xmm15
+$L$dec_key_body::
+ mov eax,esi
+ shr eax,5
+ add eax,5
+ mov DWORD PTR[240+rdx],eax
+ shl eax,4
+ lea rdx,QWORD PTR[16+rax*1+rdx]
+
+ mov ecx,1
+ mov r8d,esi
+ shr r8d,1
+ and r8d,32
+ xor r8d,32
+ call _vpaes_schedule_core
+ movaps xmm6,XMMWORD PTR[16+rsp]
+ movaps xmm7,XMMWORD PTR[32+rsp]
+ movaps xmm8,XMMWORD PTR[48+rsp]
+ movaps xmm9,XMMWORD PTR[64+rsp]
+ movaps xmm10,XMMWORD PTR[80+rsp]
+ movaps xmm11,XMMWORD PTR[96+rsp]
+ movaps xmm12,XMMWORD PTR[112+rsp]
+ movaps xmm13,XMMWORD PTR[128+rsp]
+ movaps xmm14,XMMWORD PTR[144+rsp]
+ movaps xmm15,XMMWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[184+rsp]
+$L$dec_key_epilogue::
+ xor eax,eax
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_vpaes_set_decrypt_key::
+vpaes_set_decrypt_key ENDP
+
+PUBLIC vpaes_encrypt
+
+ALIGN 16
+vpaes_encrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_vpaes_encrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ lea rsp,QWORD PTR[((-184))+rsp]
+ movaps XMMWORD PTR[16+rsp],xmm6
+ movaps XMMWORD PTR[32+rsp],xmm7
+ movaps XMMWORD PTR[48+rsp],xmm8
+ movaps XMMWORD PTR[64+rsp],xmm9
+ movaps XMMWORD PTR[80+rsp],xmm10
+ movaps XMMWORD PTR[96+rsp],xmm11
+ movaps XMMWORD PTR[112+rsp],xmm12
+ movaps XMMWORD PTR[128+rsp],xmm13
+ movaps XMMWORD PTR[144+rsp],xmm14
+ movaps XMMWORD PTR[160+rsp],xmm15
+$L$enc_body::
+ movdqu xmm0,XMMWORD PTR[rdi]
+ call _vpaes_preheat
+ call _vpaes_encrypt_core
+ movdqu XMMWORD PTR[rsi],xmm0
+ movaps xmm6,XMMWORD PTR[16+rsp]
+ movaps xmm7,XMMWORD PTR[32+rsp]
+ movaps xmm8,XMMWORD PTR[48+rsp]
+ movaps xmm9,XMMWORD PTR[64+rsp]
+ movaps xmm10,XMMWORD PTR[80+rsp]
+ movaps xmm11,XMMWORD PTR[96+rsp]
+ movaps xmm12,XMMWORD PTR[112+rsp]
+ movaps xmm13,XMMWORD PTR[128+rsp]
+ movaps xmm14,XMMWORD PTR[144+rsp]
+ movaps xmm15,XMMWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[184+rsp]
+$L$enc_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_vpaes_encrypt::
+vpaes_encrypt ENDP
+
+PUBLIC vpaes_decrypt
+
+ALIGN 16
+vpaes_decrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_vpaes_decrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ lea rsp,QWORD PTR[((-184))+rsp]
+ movaps XMMWORD PTR[16+rsp],xmm6
+ movaps XMMWORD PTR[32+rsp],xmm7
+ movaps XMMWORD PTR[48+rsp],xmm8
+ movaps XMMWORD PTR[64+rsp],xmm9
+ movaps XMMWORD PTR[80+rsp],xmm10
+ movaps XMMWORD PTR[96+rsp],xmm11
+ movaps XMMWORD PTR[112+rsp],xmm12
+ movaps XMMWORD PTR[128+rsp],xmm13
+ movaps XMMWORD PTR[144+rsp],xmm14
+ movaps XMMWORD PTR[160+rsp],xmm15
+$L$dec_body::
+ movdqu xmm0,XMMWORD PTR[rdi]
+ call _vpaes_preheat
+ call _vpaes_decrypt_core
+ movdqu XMMWORD PTR[rsi],xmm0
+ movaps xmm6,XMMWORD PTR[16+rsp]
+ movaps xmm7,XMMWORD PTR[32+rsp]
+ movaps xmm8,XMMWORD PTR[48+rsp]
+ movaps xmm9,XMMWORD PTR[64+rsp]
+ movaps xmm10,XMMWORD PTR[80+rsp]
+ movaps xmm11,XMMWORD PTR[96+rsp]
+ movaps xmm12,XMMWORD PTR[112+rsp]
+ movaps xmm13,XMMWORD PTR[128+rsp]
+ movaps xmm14,XMMWORD PTR[144+rsp]
+ movaps xmm15,XMMWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[184+rsp]
+$L$dec_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_vpaes_decrypt::
+vpaes_decrypt ENDP
+PUBLIC vpaes_cbc_encrypt
+
+ALIGN 16
+vpaes_cbc_encrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_vpaes_cbc_encrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ xchg rdx,rcx
+ sub rcx,16
+ jc $L$cbc_abort
+ lea rsp,QWORD PTR[((-184))+rsp]
+ movaps XMMWORD PTR[16+rsp],xmm6
+ movaps XMMWORD PTR[32+rsp],xmm7
+ movaps XMMWORD PTR[48+rsp],xmm8
+ movaps XMMWORD PTR[64+rsp],xmm9
+ movaps XMMWORD PTR[80+rsp],xmm10
+ movaps XMMWORD PTR[96+rsp],xmm11
+ movaps XMMWORD PTR[112+rsp],xmm12
+ movaps XMMWORD PTR[128+rsp],xmm13
+ movaps XMMWORD PTR[144+rsp],xmm14
+ movaps XMMWORD PTR[160+rsp],xmm15
+$L$cbc_body::
+ movdqu xmm6,XMMWORD PTR[r8]
+ sub rsi,rdi
+ call _vpaes_preheat
+ cmp r9d,0
+ je $L$cbc_dec_loop
+ jmp $L$cbc_enc_loop
+ALIGN 16
+$L$cbc_enc_loop::
+ movdqu xmm0,XMMWORD PTR[rdi]
+ pxor xmm0,xmm6
+ call _vpaes_encrypt_core
+ movdqa xmm6,xmm0
+ movdqu XMMWORD PTR[rdi*1+rsi],xmm0
+ lea rdi,QWORD PTR[16+rdi]
+ sub rcx,16
+ jnc $L$cbc_enc_loop
+ jmp $L$cbc_done
+ALIGN 16
+$L$cbc_dec_loop::
+ movdqu xmm0,XMMWORD PTR[rdi]
+ movdqa xmm7,xmm0
+ call _vpaes_decrypt_core
+ pxor xmm0,xmm6
+ movdqa xmm6,xmm7
+ movdqu XMMWORD PTR[rdi*1+rsi],xmm0
+ lea rdi,QWORD PTR[16+rdi]
+ sub rcx,16
+ jnc $L$cbc_dec_loop
+$L$cbc_done::
+ movdqu XMMWORD PTR[r8],xmm6
+ movaps xmm6,XMMWORD PTR[16+rsp]
+ movaps xmm7,XMMWORD PTR[32+rsp]
+ movaps xmm8,XMMWORD PTR[48+rsp]
+ movaps xmm9,XMMWORD PTR[64+rsp]
+ movaps xmm10,XMMWORD PTR[80+rsp]
+ movaps xmm11,XMMWORD PTR[96+rsp]
+ movaps xmm12,XMMWORD PTR[112+rsp]
+ movaps xmm13,XMMWORD PTR[128+rsp]
+ movaps xmm14,XMMWORD PTR[144+rsp]
+ movaps xmm15,XMMWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[184+rsp]
+$L$cbc_epilogue::
+$L$cbc_abort::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_vpaes_cbc_encrypt::
+vpaes_cbc_encrypt ENDP
+
+
+
+
+
+
+
+ALIGN 16
+_vpaes_preheat PROC PRIVATE
+ lea r10,QWORD PTR[$L$k_s0F]
+ movdqa xmm10,XMMWORD PTR[((-32))+r10]
+ movdqa xmm11,XMMWORD PTR[((-16))+r10]
+ movdqa xmm9,XMMWORD PTR[r10]
+ movdqa xmm13,XMMWORD PTR[48+r10]
+ movdqa xmm12,XMMWORD PTR[64+r10]
+ movdqa xmm15,XMMWORD PTR[80+r10]
+ movdqa xmm14,XMMWORD PTR[96+r10]
+ DB 0F3h,0C3h ;repret
+_vpaes_preheat ENDP
+
+
+
+
+
+
+ALIGN 64
+_vpaes_consts::
+$L$k_inv::
+ DQ 00E05060F0D080180h,0040703090A0B0C02h
+ DQ 001040A060F0B0780h,0030D0E0C02050809h
+
+$L$k_s0F::
+ DQ 00F0F0F0F0F0F0F0Fh,00F0F0F0F0F0F0F0Fh
+
+$L$k_ipt::
+ DQ 0C2B2E8985A2A7000h,0CABAE09052227808h
+ DQ 04C01307D317C4D00h,0CD80B1FCB0FDCC81h
+
+$L$k_sb1::
+ DQ 0B19BE18FCB503E00h,0A5DF7A6E142AF544h
+ DQ 03618D415FAE22300h,03BF7CCC10D2ED9EFh
+$L$k_sb2::
+ DQ 0E27A93C60B712400h,05EB7E955BC982FCDh
+ DQ 069EB88400AE12900h,0C2A163C8AB82234Ah
+$L$k_sbo::
+ DQ 0D0D26D176FBDC700h,015AABF7AC502A878h
+ DQ 0CFE474A55FBB6A00h,08E1E90D1412B35FAh
+
+$L$k_mc_forward::
+ DQ 00407060500030201h,00C0F0E0D080B0A09h
+ DQ 0080B0A0904070605h,0000302010C0F0E0Dh
+ DQ 00C0F0E0D080B0A09h,00407060500030201h
+ DQ 0000302010C0F0E0Dh,0080B0A0904070605h
+
+$L$k_mc_backward::
+ DQ 00605040702010003h,00E0D0C0F0A09080Bh
+ DQ 0020100030E0D0C0Fh,00A09080B06050407h
+ DQ 00E0D0C0F0A09080Bh,00605040702010003h
+ DQ 00A09080B06050407h,0020100030E0D0C0Fh
+
+$L$k_sr::
+ DQ 00706050403020100h,00F0E0D0C0B0A0908h
+ DQ 0030E09040F0A0500h,00B06010C07020D08h
+ DQ 00F060D040B020900h,0070E050C030A0108h
+ DQ 00B0E0104070A0D00h,00306090C0F020508h
+
+$L$k_rcon::
+ DQ 01F8391B9AF9DEEB6h,0702A98084D7C7D81h
+
+$L$k_s63::
+ DQ 05B5B5B5B5B5B5B5Bh,05B5B5B5B5B5B5B5Bh
+
+$L$k_opt::
+ DQ 0FF9F4929D6B66000h,0F7974121DEBE6808h
+ DQ 001EDBD5150BCEC00h,0E10D5DB1B05C0CE0h
+
+$L$k_deskew::
+ DQ 007E4A34047A4E300h,01DFEB95A5DBEF91Ah
+ DQ 05F36B5DC83EA6900h,02841C2ABF49D1E77h
+
+
+
+
+
+$L$k_dksd::
+ DQ 0FEB91A5DA3E44700h,00740E3A45A1DBEF9h
+ DQ 041C277F4B5368300h,05FDC69EAAB289D1Eh
+$L$k_dksb::
+ DQ 09A4FCA1F8550D500h,003D653861CC94C99h
+ DQ 0115BEDA7B6FC4A00h,0D993256F7E3482C8h
+$L$k_dkse::
+ DQ 0D5031CCA1FC9D600h,053859A4C994F5086h
+ DQ 0A23196054FDC7BE8h,0CD5EF96A20B31487h
+$L$k_dks9::
+ DQ 0B6116FC87ED9A700h,04AED933482255BFCh
+ DQ 04576516227143300h,08BB89FACE9DAFDCEh
+
+
+
+
+
+$L$k_dipt::
+ DQ 00F505B040B545F00h,0154A411E114E451Ah
+ DQ 086E383E660056500h,012771772F491F194h
+
+$L$k_dsb9::
+ DQ 0851C03539A86D600h,0CAD51F504F994CC9h
+ DQ 0C03B1789ECD74900h,0725E2C9EB2FBA565h
+$L$k_dsbd::
+ DQ 07D57CCDFE6B1A200h,0F56E9B13882A4439h
+ DQ 03CE2FAF724C6CB00h,02931180D15DEEFD3h
+$L$k_dsbb::
+ DQ 0D022649296B44200h,0602646F6B0F2D404h
+ DQ 0C19498A6CD596700h,0F3FF0C3E3255AA6Bh
+$L$k_dsbe::
+ DQ 046F2929626D4D000h,02242600464B4F6B0h
+ DQ 00C55A6CDFFAAC100h,09467F36B98593E32h
+$L$k_dsbo::
+ DQ 01387EA537EF94000h,0C7AA6DB9D4943E2Dh
+ DQ 012D7560F93441D00h,0CA4B8159D8C58E9Ch
+DB 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105
+DB 111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54
+DB 52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97
+DB 109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32
+DB 85,110,105,118,101,114,115,105,116,121,41,0
+ALIGN 64
+
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$in_prologue
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$in_prologue
+
+ lea rsi,QWORD PTR[16+rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,20
+ DD 0a548f3fch
+ lea rax,QWORD PTR[184+rax]
+
+$L$in_prologue::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+se_handler ENDP
+
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$SEH_begin_vpaes_set_encrypt_key
+ DD imagerel $L$SEH_end_vpaes_set_encrypt_key
+ DD imagerel $L$SEH_info_vpaes_set_encrypt_key
+
+ DD imagerel $L$SEH_begin_vpaes_set_decrypt_key
+ DD imagerel $L$SEH_end_vpaes_set_decrypt_key
+ DD imagerel $L$SEH_info_vpaes_set_decrypt_key
+
+ DD imagerel $L$SEH_begin_vpaes_encrypt
+ DD imagerel $L$SEH_end_vpaes_encrypt
+ DD imagerel $L$SEH_info_vpaes_encrypt
+
+ DD imagerel $L$SEH_begin_vpaes_decrypt
+ DD imagerel $L$SEH_end_vpaes_decrypt
+ DD imagerel $L$SEH_info_vpaes_decrypt
+
+ DD imagerel $L$SEH_begin_vpaes_cbc_encrypt
+ DD imagerel $L$SEH_end_vpaes_cbc_encrypt
+ DD imagerel $L$SEH_info_vpaes_cbc_encrypt
+
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$SEH_info_vpaes_set_encrypt_key::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$enc_key_body,imagerel $L$enc_key_epilogue
+$L$SEH_info_vpaes_set_decrypt_key::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$dec_key_body,imagerel $L$dec_key_epilogue
+$L$SEH_info_vpaes_encrypt::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$enc_body,imagerel $L$enc_epilogue
+$L$SEH_info_vpaes_decrypt::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$dec_body,imagerel $L$dec_epilogue
+$L$SEH_info_vpaes_cbc_encrypt::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$cbc_body,imagerel $L$cbc_epilogue
+
+.xdata ENDS
+END
+
diff --git a/ext/libressl/crypto/aes/vpaes-mingw64-x86_64.S b/ext/libressl/crypto/aes/vpaes-mingw64-x86_64.S
new file mode 100644
index 0000000..d6cb860
--- /dev/null
+++ b/ext/libressl/crypto/aes/vpaes-mingw64-x86_64.S
@@ -0,0 +1,1125 @@
+#include "x86_arch.h"
+.text
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.def _vpaes_encrypt_core; .scl 3; .type 32; .endef
+.p2align 4
+_vpaes_encrypt_core:
+ movq %rdx,%r9
+ movq $16,%r11
+ movl 240(%rdx),%eax
+ movdqa %xmm9,%xmm1
+ movdqa .Lk_ipt(%rip),%xmm2
+ pandn %xmm0,%xmm1
+ movdqu (%r9),%xmm5
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+.byte 102,15,56,0,208
+ movdqa .Lk_ipt+16(%rip),%xmm0
+.byte 102,15,56,0,193
+ pxor %xmm5,%xmm2
+ pxor %xmm2,%xmm0
+ addq $16,%r9
+ leaq .Lk_mc_backward(%rip),%r10
+ jmp .Lenc_entry
+
+.p2align 4
+.Lenc_loop:
+
+ movdqa %xmm13,%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm5,%xmm4
+ movdqa %xmm12,%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ movdqa %xmm15,%xmm5
+.byte 102,15,56,0,234
+ movdqa -64(%r11,%r10,1),%xmm1
+ movdqa %xmm14,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm5,%xmm2
+ movdqa (%r11,%r10,1),%xmm4
+ movdqa %xmm0,%xmm3
+.byte 102,15,56,0,193
+ addq $16,%r9
+ pxor %xmm2,%xmm0
+.byte 102,15,56,0,220
+ addq $16,%r11
+ pxor %xmm0,%xmm3
+.byte 102,15,56,0,193
+ andq $48,%r11
+ pxor %xmm3,%xmm0
+ subq $1,%rax
+
+.Lenc_entry:
+
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa %xmm11,%xmm5
+.byte 102,15,56,0,232
+ pxor %xmm1,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm5,%xmm3
+ movdqa %xmm10,%xmm4
+.byte 102,15,56,0,224
+ pxor %xmm5,%xmm4
+ movdqa %xmm10,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm0,%xmm2
+ movdqa %xmm10,%xmm3
+ movdqu (%r9),%xmm5
+.byte 102,15,56,0,220
+ pxor %xmm1,%xmm3
+ jnz .Lenc_loop
+
+
+ movdqa -96(%r10),%xmm4
+ movdqa -80(%r10),%xmm0
+.byte 102,15,56,0,226
+ pxor %xmm5,%xmm4
+.byte 102,15,56,0,195
+ movdqa 64(%r11,%r10,1),%xmm1
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,193
+ retq
+
+
+
+
+
+
+
+.def _vpaes_decrypt_core; .scl 3; .type 32; .endef
+.p2align 4
+_vpaes_decrypt_core:
+ movq %rdx,%r9
+ movl 240(%rdx),%eax
+ movdqa %xmm9,%xmm1
+ movdqa .Lk_dipt(%rip),%xmm2
+ pandn %xmm0,%xmm1
+ movq %rax,%r11
+ psrld $4,%xmm1
+ movdqu (%r9),%xmm5
+ shlq $4,%r11
+ pand %xmm9,%xmm0
+.byte 102,15,56,0,208
+ movdqa .Lk_dipt+16(%rip),%xmm0
+ xorq $48,%r11
+ leaq .Lk_dsbd(%rip),%r10
+.byte 102,15,56,0,193
+ andq $48,%r11
+ pxor %xmm5,%xmm2
+ movdqa .Lk_mc_forward+48(%rip),%xmm5
+ pxor %xmm2,%xmm0
+ addq $16,%r9
+ addq %r10,%r11
+ jmp .Ldec_entry
+
+.p2align 4
+.Ldec_loop:
+
+
+
+ movdqa -32(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa -16(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ addq $16,%r9
+
+.byte 102,15,56,0,197
+ movdqa 0(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 16(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ subq $1,%rax
+
+.byte 102,15,56,0,197
+ movdqa 32(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 48(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+
+.byte 102,15,56,0,197
+ movdqa 64(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 80(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+
+.byte 102,15,58,15,237,12
+
+.Ldec_entry:
+
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa %xmm11,%xmm2
+.byte 102,15,56,0,208
+ pxor %xmm1,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+ movdqa %xmm10,%xmm4
+.byte 102,15,56,0,224
+ pxor %xmm2,%xmm4
+ movdqa %xmm10,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm0,%xmm2
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,220
+ pxor %xmm1,%xmm3
+ movdqu (%r9),%xmm0
+ jnz .Ldec_loop
+
+
+ movdqa 96(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 112(%r10),%xmm0
+ movdqa -352(%r11),%xmm2
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,194
+ retq
+
+
+
+
+
+
+
+.def _vpaes_schedule_core; .scl 3; .type 32; .endef
+.p2align 4
+_vpaes_schedule_core:
+
+
+
+
+
+ call _vpaes_preheat
+ movdqa .Lk_rcon(%rip),%xmm8
+ movdqu (%rdi),%xmm0
+
+
+ movdqa %xmm0,%xmm3
+ leaq .Lk_ipt(%rip),%r11
+ call _vpaes_schedule_transform
+ movdqa %xmm0,%xmm7
+
+ leaq .Lk_sr(%rip),%r10
+ testq %rcx,%rcx
+ jnz .Lschedule_am_decrypting
+
+
+ movdqu %xmm0,(%rdx)
+ jmp .Lschedule_go
+
+.Lschedule_am_decrypting:
+
+ movdqa (%r8,%r10,1),%xmm1
+.byte 102,15,56,0,217
+ movdqu %xmm3,(%rdx)
+ xorq $48,%r8
+
+.Lschedule_go:
+ cmpl $192,%esi
+ ja .Lschedule_256
+ je .Lschedule_192
+
+
+
+
+
+
+
+
+
+
+.Lschedule_128:
+ movl $10,%esi
+
+.Loop_schedule_128:
+ call _vpaes_schedule_round
+ decq %rsi
+ jz .Lschedule_mangle_last
+ call _vpaes_schedule_mangle
+ jmp .Loop_schedule_128
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.p2align 4
+.Lschedule_192:
+ movdqu 8(%rdi),%xmm0
+ call _vpaes_schedule_transform
+ movdqa %xmm0,%xmm6
+ pxor %xmm4,%xmm4
+ movhlps %xmm4,%xmm6
+ movl $4,%esi
+
+.Loop_schedule_192:
+ call _vpaes_schedule_round
+.byte 102,15,58,15,198,8
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_192_smear
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_round
+ decq %rsi
+ jz .Lschedule_mangle_last
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_192_smear
+ jmp .Loop_schedule_192
+
+
+
+
+
+
+
+
+
+
+
+.p2align 4
+.Lschedule_256:
+ movdqu 16(%rdi),%xmm0
+ call _vpaes_schedule_transform
+ movl $7,%esi
+
+.Loop_schedule_256:
+ call _vpaes_schedule_mangle
+ movdqa %xmm0,%xmm6
+
+
+ call _vpaes_schedule_round
+ decq %rsi
+ jz .Lschedule_mangle_last
+ call _vpaes_schedule_mangle
+
+
+ pshufd $255,%xmm0,%xmm0
+ movdqa %xmm7,%xmm5
+ movdqa %xmm6,%xmm7
+ call _vpaes_schedule_low_round
+ movdqa %xmm5,%xmm7
+
+ jmp .Loop_schedule_256
+
+
+
+
+
+
+
+
+
+
+
+
+.p2align 4
+.Lschedule_mangle_last:
+
+ leaq .Lk_deskew(%rip),%r11
+ testq %rcx,%rcx
+ jnz .Lschedule_mangle_last_dec
+
+
+ movdqa (%r8,%r10,1),%xmm1
+.byte 102,15,56,0,193
+ leaq .Lk_opt(%rip),%r11
+ addq $32,%rdx
+
+.Lschedule_mangle_last_dec:
+ addq $-16,%rdx
+ pxor .Lk_s63(%rip),%xmm0
+ call _vpaes_schedule_transform
+ movdqu %xmm0,(%rdx)
+
+
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
+ retq
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.def _vpaes_schedule_192_smear; .scl 3; .type 32; .endef
+.p2align 4
+_vpaes_schedule_192_smear:
+ pshufd $128,%xmm6,%xmm0
+ pxor %xmm0,%xmm6
+ pshufd $254,%xmm7,%xmm0
+ pxor %xmm0,%xmm6
+ movdqa %xmm6,%xmm0
+ pxor %xmm1,%xmm1
+ movhlps %xmm1,%xmm6
+ retq
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.def _vpaes_schedule_round; .scl 3; .type 32; .endef
+.p2align 4
+_vpaes_schedule_round:
+
+ pxor %xmm1,%xmm1
+.byte 102,65,15,58,15,200,15
+.byte 102,69,15,58,15,192,15
+ pxor %xmm1,%xmm7
+
+
+ pshufd $255,%xmm0,%xmm0
+.byte 102,15,58,15,192,1
+
+
+
+
+_vpaes_schedule_low_round:
+
+ movdqa %xmm7,%xmm1
+ pslldq $4,%xmm7
+ pxor %xmm1,%xmm7
+ movdqa %xmm7,%xmm1
+ pslldq $8,%xmm7
+ pxor %xmm1,%xmm7
+ pxor .Lk_s63(%rip),%xmm7
+
+
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa %xmm11,%xmm2
+.byte 102,15,56,0,208
+ pxor %xmm1,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+ movdqa %xmm10,%xmm4
+.byte 102,15,56,0,224
+ pxor %xmm2,%xmm4
+ movdqa %xmm10,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm0,%xmm2
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,220
+ pxor %xmm1,%xmm3
+ movdqa %xmm13,%xmm4
+.byte 102,15,56,0,226
+ movdqa %xmm12,%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+
+
+ pxor %xmm7,%xmm0
+ movdqa %xmm0,%xmm7
+ retq
+
+
+
+
+
+
+
+
+
+
+
+.def _vpaes_schedule_transform; .scl 3; .type 32; .endef
+.p2align 4
+_vpaes_schedule_transform:
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa (%r11),%xmm2
+.byte 102,15,56,0,208
+ movdqa 16(%r11),%xmm0
+.byte 102,15,56,0,193
+ pxor %xmm2,%xmm0
+ retq
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.def _vpaes_schedule_mangle; .scl 3; .type 32; .endef
+.p2align 4
+_vpaes_schedule_mangle:
+ movdqa %xmm0,%xmm4
+ movdqa .Lk_mc_forward(%rip),%xmm5
+ testq %rcx,%rcx
+ jnz .Lschedule_mangle_dec
+
+
+ addq $16,%rdx
+ pxor .Lk_s63(%rip),%xmm4
+.byte 102,15,56,0,229
+ movdqa %xmm4,%xmm3
+.byte 102,15,56,0,229
+ pxor %xmm4,%xmm3
+.byte 102,15,56,0,229
+ pxor %xmm4,%xmm3
+
+ jmp .Lschedule_mangle_both
+.p2align 4
+.Lschedule_mangle_dec:
+
+ leaq .Lk_dksd(%rip),%r11
+ movdqa %xmm9,%xmm1
+ pandn %xmm4,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm4
+
+ movdqa 0(%r11),%xmm2
+.byte 102,15,56,0,212
+ movdqa 16(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+.byte 102,15,56,0,221
+
+ movdqa 32(%r11),%xmm2
+.byte 102,15,56,0,212
+ pxor %xmm3,%xmm2
+ movdqa 48(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+.byte 102,15,56,0,221
+
+ movdqa 64(%r11),%xmm2
+.byte 102,15,56,0,212
+ pxor %xmm3,%xmm2
+ movdqa 80(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+.byte 102,15,56,0,221
+
+ movdqa 96(%r11),%xmm2
+.byte 102,15,56,0,212
+ pxor %xmm3,%xmm2
+ movdqa 112(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+
+ addq $-16,%rdx
+
+.Lschedule_mangle_both:
+ movdqa (%r8,%r10,1),%xmm1
+.byte 102,15,56,0,217
+ addq $-16,%r8
+ andq $48,%r8
+ movdqu %xmm3,(%rdx)
+ retq
+
+
+
+
+
+.globl vpaes_set_encrypt_key
+.def vpaes_set_encrypt_key; .scl 2; .type 32; .endef
+.p2align 4
+vpaes_set_encrypt_key:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_vpaes_set_encrypt_key:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+
+ leaq -184(%rsp),%rsp
+ movaps %xmm6,16(%rsp)
+ movaps %xmm7,32(%rsp)
+ movaps %xmm8,48(%rsp)
+ movaps %xmm9,64(%rsp)
+ movaps %xmm10,80(%rsp)
+ movaps %xmm11,96(%rsp)
+ movaps %xmm12,112(%rsp)
+ movaps %xmm13,128(%rsp)
+ movaps %xmm14,144(%rsp)
+ movaps %xmm15,160(%rsp)
+.Lenc_key_body:
+ movl %esi,%eax
+ shrl $5,%eax
+ addl $5,%eax
+ movl %eax,240(%rdx)
+
+ movl $0,%ecx
+ movl $48,%r8d
+ call _vpaes_schedule_core
+ movaps 16(%rsp),%xmm6
+ movaps 32(%rsp),%xmm7
+ movaps 48(%rsp),%xmm8
+ movaps 64(%rsp),%xmm9
+ movaps 80(%rsp),%xmm10
+ movaps 96(%rsp),%xmm11
+ movaps 112(%rsp),%xmm12
+ movaps 128(%rsp),%xmm13
+ movaps 144(%rsp),%xmm14
+ movaps 160(%rsp),%xmm15
+ leaq 184(%rsp),%rsp
+.Lenc_key_epilogue:
+ xorl %eax,%eax
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_vpaes_set_encrypt_key:
+
+.globl vpaes_set_decrypt_key
+.def vpaes_set_decrypt_key; .scl 2; .type 32; .endef
+.p2align 4
+vpaes_set_decrypt_key:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_vpaes_set_decrypt_key:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+
+ leaq -184(%rsp),%rsp
+ movaps %xmm6,16(%rsp)
+ movaps %xmm7,32(%rsp)
+ movaps %xmm8,48(%rsp)
+ movaps %xmm9,64(%rsp)
+ movaps %xmm10,80(%rsp)
+ movaps %xmm11,96(%rsp)
+ movaps %xmm12,112(%rsp)
+ movaps %xmm13,128(%rsp)
+ movaps %xmm14,144(%rsp)
+ movaps %xmm15,160(%rsp)
+.Ldec_key_body:
+ movl %esi,%eax
+ shrl $5,%eax
+ addl $5,%eax
+ movl %eax,240(%rdx)
+ shll $4,%eax
+ leaq 16(%rdx,%rax,1),%rdx
+
+ movl $1,%ecx
+ movl %esi,%r8d
+ shrl $1,%r8d
+ andl $32,%r8d
+ xorl $32,%r8d
+ call _vpaes_schedule_core
+ movaps 16(%rsp),%xmm6
+ movaps 32(%rsp),%xmm7
+ movaps 48(%rsp),%xmm8
+ movaps 64(%rsp),%xmm9
+ movaps 80(%rsp),%xmm10
+ movaps 96(%rsp),%xmm11
+ movaps 112(%rsp),%xmm12
+ movaps 128(%rsp),%xmm13
+ movaps 144(%rsp),%xmm14
+ movaps 160(%rsp),%xmm15
+ leaq 184(%rsp),%rsp
+.Ldec_key_epilogue:
+ xorl %eax,%eax
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_vpaes_set_decrypt_key:
+
+.globl vpaes_encrypt
+.def vpaes_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+vpaes_encrypt:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_vpaes_encrypt:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+
+ leaq -184(%rsp),%rsp
+ movaps %xmm6,16(%rsp)
+ movaps %xmm7,32(%rsp)
+ movaps %xmm8,48(%rsp)
+ movaps %xmm9,64(%rsp)
+ movaps %xmm10,80(%rsp)
+ movaps %xmm11,96(%rsp)
+ movaps %xmm12,112(%rsp)
+ movaps %xmm13,128(%rsp)
+ movaps %xmm14,144(%rsp)
+ movaps %xmm15,160(%rsp)
+.Lenc_body:
+ movdqu (%rdi),%xmm0
+ call _vpaes_preheat
+ call _vpaes_encrypt_core
+ movdqu %xmm0,(%rsi)
+ movaps 16(%rsp),%xmm6
+ movaps 32(%rsp),%xmm7
+ movaps 48(%rsp),%xmm8
+ movaps 64(%rsp),%xmm9
+ movaps 80(%rsp),%xmm10
+ movaps 96(%rsp),%xmm11
+ movaps 112(%rsp),%xmm12
+ movaps 128(%rsp),%xmm13
+ movaps 144(%rsp),%xmm14
+ movaps 160(%rsp),%xmm15
+ leaq 184(%rsp),%rsp
+.Lenc_epilogue:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_vpaes_encrypt:
+
+.globl vpaes_decrypt
+.def vpaes_decrypt; .scl 2; .type 32; .endef
+.p2align 4
+vpaes_decrypt:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_vpaes_decrypt:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+
+ leaq -184(%rsp),%rsp
+ movaps %xmm6,16(%rsp)
+ movaps %xmm7,32(%rsp)
+ movaps %xmm8,48(%rsp)
+ movaps %xmm9,64(%rsp)
+ movaps %xmm10,80(%rsp)
+ movaps %xmm11,96(%rsp)
+ movaps %xmm12,112(%rsp)
+ movaps %xmm13,128(%rsp)
+ movaps %xmm14,144(%rsp)
+ movaps %xmm15,160(%rsp)
+.Ldec_body:
+ movdqu (%rdi),%xmm0
+ call _vpaes_preheat
+ call _vpaes_decrypt_core
+ movdqu %xmm0,(%rsi)
+ movaps 16(%rsp),%xmm6
+ movaps 32(%rsp),%xmm7
+ movaps 48(%rsp),%xmm8
+ movaps 64(%rsp),%xmm9
+ movaps 80(%rsp),%xmm10
+ movaps 96(%rsp),%xmm11
+ movaps 112(%rsp),%xmm12
+ movaps 128(%rsp),%xmm13
+ movaps 144(%rsp),%xmm14
+ movaps 160(%rsp),%xmm15
+ leaq 184(%rsp),%rsp
+.Ldec_epilogue:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_vpaes_decrypt:
+.globl vpaes_cbc_encrypt
+.def vpaes_cbc_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+vpaes_cbc_encrypt:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_vpaes_cbc_encrypt:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+ movq 48(%rsp),%r9
+
+ xchgq %rcx,%rdx
+ subq $16,%rcx
+ jc .Lcbc_abort
+ leaq -184(%rsp),%rsp
+ movaps %xmm6,16(%rsp)
+ movaps %xmm7,32(%rsp)
+ movaps %xmm8,48(%rsp)
+ movaps %xmm9,64(%rsp)
+ movaps %xmm10,80(%rsp)
+ movaps %xmm11,96(%rsp)
+ movaps %xmm12,112(%rsp)
+ movaps %xmm13,128(%rsp)
+ movaps %xmm14,144(%rsp)
+ movaps %xmm15,160(%rsp)
+.Lcbc_body:
+ movdqu (%r8),%xmm6
+ subq %rdi,%rsi
+ call _vpaes_preheat
+ cmpl $0,%r9d
+ je .Lcbc_dec_loop
+ jmp .Lcbc_enc_loop
+.p2align 4
+.Lcbc_enc_loop:
+ movdqu (%rdi),%xmm0
+ pxor %xmm6,%xmm0
+ call _vpaes_encrypt_core
+ movdqa %xmm0,%xmm6
+ movdqu %xmm0,(%rsi,%rdi,1)
+ leaq 16(%rdi),%rdi
+ subq $16,%rcx
+ jnc .Lcbc_enc_loop
+ jmp .Lcbc_done
+.p2align 4
+.Lcbc_dec_loop:
+ movdqu (%rdi),%xmm0
+ movdqa %xmm0,%xmm7
+ call _vpaes_decrypt_core
+ pxor %xmm6,%xmm0
+ movdqa %xmm7,%xmm6
+ movdqu %xmm0,(%rsi,%rdi,1)
+ leaq 16(%rdi),%rdi
+ subq $16,%rcx
+ jnc .Lcbc_dec_loop
+.Lcbc_done:
+ movdqu %xmm6,(%r8)
+ movaps 16(%rsp),%xmm6
+ movaps 32(%rsp),%xmm7
+ movaps 48(%rsp),%xmm8
+ movaps 64(%rsp),%xmm9
+ movaps 80(%rsp),%xmm10
+ movaps 96(%rsp),%xmm11
+ movaps 112(%rsp),%xmm12
+ movaps 128(%rsp),%xmm13
+ movaps 144(%rsp),%xmm14
+ movaps 160(%rsp),%xmm15
+ leaq 184(%rsp),%rsp
+.Lcbc_epilogue:
+.Lcbc_abort:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_vpaes_cbc_encrypt:
+
+
+
+
+
+
+.def _vpaes_preheat; .scl 3; .type 32; .endef
+.p2align 4
+_vpaes_preheat:
+ leaq .Lk_s0F(%rip),%r10
+ movdqa -32(%r10),%xmm10
+ movdqa -16(%r10),%xmm11
+ movdqa 0(%r10),%xmm9
+ movdqa 48(%r10),%xmm13
+ movdqa 64(%r10),%xmm12
+ movdqa 80(%r10),%xmm15
+ movdqa 96(%r10),%xmm14
+ retq
+
+
+
+
+
+
+
+.p2align 6
+_vpaes_consts:
+.Lk_inv:
+.quad 0x0E05060F0D080180, 0x040703090A0B0C02
+.quad 0x01040A060F0B0780, 0x030D0E0C02050809
+
+.Lk_s0F:
+.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
+
+.Lk_ipt:
+.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808
+.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
+
+.Lk_sb1:
+.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
+.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
+.Lk_sb2:
+.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD
+.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A
+.Lk_sbo:
+.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878
+.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
+
+.Lk_mc_forward:
+.quad 0x0407060500030201, 0x0C0F0E0D080B0A09
+.quad 0x080B0A0904070605, 0x000302010C0F0E0D
+.quad 0x0C0F0E0D080B0A09, 0x0407060500030201
+.quad 0x000302010C0F0E0D, 0x080B0A0904070605
+
+.Lk_mc_backward:
+.quad 0x0605040702010003, 0x0E0D0C0F0A09080B
+.quad 0x020100030E0D0C0F, 0x0A09080B06050407
+.quad 0x0E0D0C0F0A09080B, 0x0605040702010003
+.quad 0x0A09080B06050407, 0x020100030E0D0C0F
+
+.Lk_sr:
+.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908
+.quad 0x030E09040F0A0500, 0x0B06010C07020D08
+.quad 0x0F060D040B020900, 0x070E050C030A0108
+.quad 0x0B0E0104070A0D00, 0x0306090C0F020508
+
+.Lk_rcon:
+.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
+
+.Lk_s63:
+.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B
+
+.Lk_opt:
+.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808
+.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
+
+.Lk_deskew:
+.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
+.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
+
+
+
+
+
+.Lk_dksd:
+.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
+.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E
+.Lk_dksb:
+.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99
+.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
+.Lk_dkse:
+.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086
+.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487
+.Lk_dks9:
+.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC
+.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE
+
+
+
+
+
+.Lk_dipt:
+.quad 0x0F505B040B545F00, 0x154A411E114E451A
+.quad 0x86E383E660056500, 0x12771772F491F194
+
+.Lk_dsb9:
+.quad 0x851C03539A86D600, 0xCAD51F504F994CC9
+.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565
+.Lk_dsbd:
+.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
+.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
+.Lk_dsbb:
+.quad 0xD022649296B44200, 0x602646F6B0F2D404
+.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
+.Lk_dsbe:
+.quad 0x46F2929626D4D000, 0x2242600464B4F6B0
+.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32
+.Lk_dsbo:
+.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
+.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
+.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
+.p2align 6
+
+
+.def se_handler; .scl 3; .type 32; .endef
+.p2align 4
+se_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+
+ movq 8(%r9),%rsi
+ movq 56(%r9),%r11
+
+ movl 0(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jb .Lin_prologue
+
+ movq 152(%r8),%rax
+
+ movl 4(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jae .Lin_prologue
+
+ leaq 16(%rax),%rsi
+ leaq 512(%r8),%rdi
+ movl $20,%ecx
+.long 0xa548f3fc
+ leaq 184(%rax),%rax
+
+.Lin_prologue:
+ movq 8(%rax),%rdi
+ movq 16(%rax),%rsi
+ movq %rax,152(%r8)
+ movq %rsi,168(%r8)
+ movq %rdi,176(%r8)
+
+ movq 40(%r9),%rdi
+ movq %r8,%rsi
+ movl $154,%ecx
+.long 0xa548f3fc
+
+ movq %r9,%rsi
+ xorq %rcx,%rcx
+ movq 8(%rsi),%rdx
+ movq 0(%rsi),%r8
+ movq 16(%rsi),%r9
+ movq 40(%rsi),%r10
+ leaq 56(%rsi),%r11
+ leaq 24(%rsi),%r12
+ movq %r10,32(%rsp)
+ movq %r11,40(%rsp)
+ movq %r12,48(%rsp)
+ movq %rcx,56(%rsp)
+ call *__imp_RtlVirtualUnwind(%rip)
+
+ movl $1,%eax
+ addq $64,%rsp
+ popfq
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ popq %rdi
+ popq %rsi
+ retq
+
+
+.section .pdata
+.p2align 2
+.rva .LSEH_begin_vpaes_set_encrypt_key
+.rva .LSEH_end_vpaes_set_encrypt_key
+.rva .LSEH_info_vpaes_set_encrypt_key
+
+.rva .LSEH_begin_vpaes_set_decrypt_key
+.rva .LSEH_end_vpaes_set_decrypt_key
+.rva .LSEH_info_vpaes_set_decrypt_key
+
+.rva .LSEH_begin_vpaes_encrypt
+.rva .LSEH_end_vpaes_encrypt
+.rva .LSEH_info_vpaes_encrypt
+
+.rva .LSEH_begin_vpaes_decrypt
+.rva .LSEH_end_vpaes_decrypt
+.rva .LSEH_info_vpaes_decrypt
+
+.rva .LSEH_begin_vpaes_cbc_encrypt
+.rva .LSEH_end_vpaes_cbc_encrypt
+.rva .LSEH_info_vpaes_cbc_encrypt
+
+.section .xdata
+.p2align 3
+.LSEH_info_vpaes_set_encrypt_key:
+.byte 9,0,0,0
+.rva se_handler
+.rva .Lenc_key_body,.Lenc_key_epilogue
+.LSEH_info_vpaes_set_decrypt_key:
+.byte 9,0,0,0
+.rva se_handler
+.rva .Ldec_key_body,.Ldec_key_epilogue
+.LSEH_info_vpaes_encrypt:
+.byte 9,0,0,0
+.rva se_handler
+.rva .Lenc_body,.Lenc_epilogue
+.LSEH_info_vpaes_decrypt:
+.byte 9,0,0,0
+.rva se_handler
+.rva .Ldec_body,.Ldec_epilogue
+.LSEH_info_vpaes_cbc_encrypt:
+.byte 9,0,0,0
+.rva se_handler
+.rva .Lcbc_body,.Lcbc_epilogue
diff --git a/ext/libressl/crypto/bf/Makefile b/ext/libressl/crypto/bf/Makefile
new file mode 100644
index 0000000..dac4aba
--- /dev/null
+++ b/ext/libressl/crypto/bf/Makefile
@@ -0,0 +1,14 @@
+include ../../ssl_common.mk
+CFLAGS+= -D__BEGIN_HIDDEN_DECLS= -D__END_HIDDEN_DECLS=
+
+obj = bf_skey.o bf_enc.o bf_ecb.o bf_cfb64.o bf_ofb64.o
+
+
+all: $(obj)
+dep: all
+
+%.o: %.c
+ $(CC) $(CFLAGS) -c $<
+
+clean:
+ rm -f *.o *.a
diff --git a/ext/libressl/crypto/bf/bf_cfb64.c b/ext/libressl/crypto/bf/bf_cfb64.c
new file mode 100644
index 0000000..6cc0bb9
--- /dev/null
+++ b/ext/libressl/crypto/bf/bf_cfb64.c
@@ -0,0 +1,121 @@
+/* $OpenBSD: bf_cfb64.c,v 1.5 2014/10/28 07:35:58 jsg Exp $ */
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young (eay@cryptsoft.com)"
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+#include <openssl/blowfish.h>
+#include "bf_locl.h"
+
+/* The input and output encrypted as though 64bit cfb mode is being
+ * used. The extra state information to record how much of the
+ * 64bit block we have used is contained in *num;
+ */
+
+void BF_cfb64_encrypt(const unsigned char *in, unsigned char *out, long length,
+ const BF_KEY *schedule, unsigned char *ivec, int *num, int encrypt)
+ {
+ BF_LONG v0,v1,t;
+ int n= *num;
+ long l=length;
+ BF_LONG ti[2];
+ unsigned char *iv,c,cc;
+
+ iv=(unsigned char *)ivec;
+ if (encrypt)
+ {
+ while (l--)
+ {
+ if (n == 0)
+ {
+ n2l(iv,v0); ti[0]=v0;
+ n2l(iv,v1); ti[1]=v1;
+ BF_encrypt((BF_LONG *)ti,schedule);
+ iv=(unsigned char *)ivec;
+ t=ti[0]; l2n(t,iv);
+ t=ti[1]; l2n(t,iv);
+ iv=(unsigned char *)ivec;
+ }
+ c= *(in++)^iv[n];
+ *(out++)=c;
+ iv[n]=c;
+ n=(n+1)&0x07;
+ }
+ }
+ else
+ {
+ while (l--)
+ {
+ if (n == 0)
+ {
+ n2l(iv,v0); ti[0]=v0;
+ n2l(iv,v1); ti[1]=v1;
+ BF_encrypt((BF_LONG *)ti,schedule);
+ iv=(unsigned char *)ivec;
+ t=ti[0]; l2n(t,iv);
+ t=ti[1]; l2n(t,iv);
+ iv=(unsigned char *)ivec;
+ }
+ cc= *(in++);
+ c=iv[n];
+ iv[n]=cc;
+ *(out++)=c^cc;
+ n=(n+1)&0x07;
+ }
+ }
+ v0=v1=ti[0]=ti[1]=t=c=cc=0;
+ *num=n;
+ }
+
diff --git a/ext/libressl/crypto/bf/bf_ecb.c b/ext/libressl/crypto/bf/bf_ecb.c
new file mode 100644
index 0000000..305bd78
--- /dev/null
+++ b/ext/libressl/crypto/bf/bf_ecb.c
@@ -0,0 +1,94 @@
+/* $OpenBSD: bf_ecb.c,v 1.6 2014/07/09 11:10:50 bcook Exp $ */
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young (eay@cryptsoft.com)"
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+#include <openssl/blowfish.h>
+#include "bf_locl.h"
+#include <openssl/opensslv.h>
+
+/* Blowfish as implemented from 'Blowfish: Springer-Verlag paper'
+ * (From LECTURE NOTES IN COMPUTER SCIENCE 809, FAST SOFTWARE ENCRYPTION,
+ * CAMBRIDGE SECURITY WORKSHOP, CAMBRIDGE, U.K., DECEMBER 9-11, 1993)
+ */
+
+const char *BF_options(void)
+ {
+#ifdef BF_PTR
+ return("blowfish(ptr)");
+#elif defined(BF_PTR2)
+ return("blowfish(ptr2)");
+#else
+ return("blowfish(idx)");
+#endif
+ }
+
+void BF_ecb_encrypt(const unsigned char *in, unsigned char *out,
+ const BF_KEY *key, int encrypt)
+ {
+ BF_LONG l,d[2];
+
+ n2l(in,l); d[0]=l;
+ n2l(in,l); d[1]=l;
+ if (encrypt)
+ BF_encrypt(d,key);
+ else
+ BF_decrypt(d,key);
+ l=d[0]; l2n(l,out);
+ l=d[1]; l2n(l,out);
+ l=d[0]=d[1]=0;
+ }
+
diff --git a/ext/libressl/crypto/bf/bf_enc.c b/ext/libressl/crypto/bf/bf_enc.c
new file mode 100644
index 0000000..2cf1c86
--- /dev/null
+++ b/ext/libressl/crypto/bf/bf_enc.c
@@ -0,0 +1,306 @@
+/* $OpenBSD: bf_enc.c,v 1.6 2014/10/28 07:35:58 jsg Exp $ */
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young (eay@cryptsoft.com)"
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+#include <openssl/blowfish.h>
+#include "bf_locl.h"
+
+/* Blowfish as implemented from 'Blowfish: Springer-Verlag paper'
+ * (From LECTURE NOTES IN COMPUTER SCIENCE 809, FAST SOFTWARE ENCRYPTION,
+ * CAMBRIDGE SECURITY WORKSHOP, CAMBRIDGE, U.K., DECEMBER 9-11, 1993)
+ */
+
+#if (BF_ROUNDS != 16) && (BF_ROUNDS != 20)
+#error If you set BF_ROUNDS to some value other than 16 or 20, you will have \
+to modify the code.
+#endif
+
+void BF_encrypt(BF_LONG *data, const BF_KEY *key)
+ {
+#ifndef BF_PTR2
+ BF_LONG l,r;
+ const BF_LONG *p,*s;
+
+ p=key->P;
+ s= &(key->S[0]);
+ l=data[0];
+ r=data[1];
+
+ l^=p[0];
+ BF_ENC(r,l,s,p[ 1]);
+ BF_ENC(l,r,s,p[ 2]);
+ BF_ENC(r,l,s,p[ 3]);
+ BF_ENC(l,r,s,p[ 4]);
+ BF_ENC(r,l,s,p[ 5]);
+ BF_ENC(l,r,s,p[ 6]);
+ BF_ENC(r,l,s,p[ 7]);
+ BF_ENC(l,r,s,p[ 8]);
+ BF_ENC(r,l,s,p[ 9]);
+ BF_ENC(l,r,s,p[10]);
+ BF_ENC(r,l,s,p[11]);
+ BF_ENC(l,r,s,p[12]);
+ BF_ENC(r,l,s,p[13]);
+ BF_ENC(l,r,s,p[14]);
+ BF_ENC(r,l,s,p[15]);
+ BF_ENC(l,r,s,p[16]);
+#if BF_ROUNDS == 20
+ BF_ENC(r,l,s,p[17]);
+ BF_ENC(l,r,s,p[18]);
+ BF_ENC(r,l,s,p[19]);
+ BF_ENC(l,r,s,p[20]);
+#endif
+ r^=p[BF_ROUNDS+1];
+
+ data[1]=l&0xffffffffL;
+ data[0]=r&0xffffffffL;
+#else
+ BF_LONG l,r,t,*k;
+
+ l=data[0];
+ r=data[1];
+ k=(BF_LONG*)key;
+
+ l^=k[0];
+ BF_ENC(r,l,k, 1);
+ BF_ENC(l,r,k, 2);
+ BF_ENC(r,l,k, 3);
+ BF_ENC(l,r,k, 4);
+ BF_ENC(r,l,k, 5);
+ BF_ENC(l,r,k, 6);
+ BF_ENC(r,l,k, 7);
+ BF_ENC(l,r,k, 8);
+ BF_ENC(r,l,k, 9);
+ BF_ENC(l,r,k,10);
+ BF_ENC(r,l,k,11);
+ BF_ENC(l,r,k,12);
+ BF_ENC(r,l,k,13);
+ BF_ENC(l,r,k,14);
+ BF_ENC(r,l,k,15);
+ BF_ENC(l,r,k,16);
+#if BF_ROUNDS == 20
+ BF_ENC(r,l,k,17);
+ BF_ENC(l,r,k,18);
+ BF_ENC(r,l,k,19);
+ BF_ENC(l,r,k,20);
+#endif
+ r^=k[BF_ROUNDS+1];
+
+ data[1]=l&0xffffffffL;
+ data[0]=r&0xffffffffL;
+#endif
+ }
+
+#ifndef BF_DEFAULT_OPTIONS
+
+void BF_decrypt(BF_LONG *data, const BF_KEY *key)
+ {
+#ifndef BF_PTR2
+ BF_LONG l,r;
+ const BF_LONG *p,*s;
+
+ p=key->P;
+ s= &(key->S[0]);
+ l=data[0];
+ r=data[1];
+
+ l^=p[BF_ROUNDS+1];
+#if BF_ROUNDS == 20
+ BF_ENC(r,l,s,p[20]);
+ BF_ENC(l,r,s,p[19]);
+ BF_ENC(r,l,s,p[18]);
+ BF_ENC(l,r,s,p[17]);
+#endif
+ BF_ENC(r,l,s,p[16]);
+ BF_ENC(l,r,s,p[15]);
+ BF_ENC(r,l,s,p[14]);
+ BF_ENC(l,r,s,p[13]);
+ BF_ENC(r,l,s,p[12]);
+ BF_ENC(l,r,s,p[11]);
+ BF_ENC(r,l,s,p[10]);
+ BF_ENC(l,r,s,p[ 9]);
+ BF_ENC(r,l,s,p[ 8]);
+ BF_ENC(l,r,s,p[ 7]);
+ BF_ENC(r,l,s,p[ 6]);
+ BF_ENC(l,r,s,p[ 5]);
+ BF_ENC(r,l,s,p[ 4]);
+ BF_ENC(l,r,s,p[ 3]);
+ BF_ENC(r,l,s,p[ 2]);
+ BF_ENC(l,r,s,p[ 1]);
+ r^=p[0];
+
+ data[1]=l&0xffffffffL;
+ data[0]=r&0xffffffffL;
+#else
+ BF_LONG l,r,t,*k;
+
+ l=data[0];
+ r=data[1];
+ k=(BF_LONG *)key;
+
+ l^=k[BF_ROUNDS+1];
+#if BF_ROUNDS == 20
+ BF_ENC(r,l,k,20);
+ BF_ENC(l,r,k,19);
+ BF_ENC(r,l,k,18);
+ BF_ENC(l,r,k,17);
+#endif
+ BF_ENC(r,l,k,16);
+ BF_ENC(l,r,k,15);
+ BF_ENC(r,l,k,14);
+ BF_ENC(l,r,k,13);
+ BF_ENC(r,l,k,12);
+ BF_ENC(l,r,k,11);
+ BF_ENC(r,l,k,10);
+ BF_ENC(l,r,k, 9);
+ BF_ENC(r,l,k, 8);
+ BF_ENC(l,r,k, 7);
+ BF_ENC(r,l,k, 6);
+ BF_ENC(l,r,k, 5);
+ BF_ENC(r,l,k, 4);
+ BF_ENC(l,r,k, 3);
+ BF_ENC(r,l,k, 2);
+ BF_ENC(l,r,k, 1);
+ r^=k[0];
+
+ data[1]=l&0xffffffffL;
+ data[0]=r&0xffffffffL;
+#endif
+ }
+
+void BF_cbc_encrypt(const unsigned char *in, unsigned char *out, long length,
+ const BF_KEY *schedule, unsigned char *ivec, int encrypt)
+ {
+ BF_LONG tin0,tin1;
+ BF_LONG tout0,tout1,xor0,xor1;
+ long l=length;
+ BF_LONG tin[2];
+
+ if (encrypt)
+ {
+ n2l(ivec,tout0);
+ n2l(ivec,tout1);
+ ivec-=8;
+ for (l-=8; l>=0; l-=8)
+ {
+ n2l(in,tin0);
+ n2l(in,tin1);
+ tin0^=tout0;
+ tin1^=tout1;
+ tin[0]=tin0;
+ tin[1]=tin1;
+ BF_encrypt(tin,schedule);
+ tout0=tin[0];
+ tout1=tin[1];
+ l2n(tout0,out);
+ l2n(tout1,out);
+ }
+ if (l != -8)
+ {
+ n2ln(in,tin0,tin1,l+8);
+ tin0^=tout0;
+ tin1^=tout1;
+ tin[0]=tin0;
+ tin[1]=tin1;
+ BF_encrypt(tin,schedule);
+ tout0=tin[0];
+ tout1=tin[1];
+ l2n(tout0,out);
+ l2n(tout1,out);
+ }
+ l2n(tout0,ivec);
+ l2n(tout1,ivec);
+ }
+ else
+ {
+ n2l(ivec,xor0);
+ n2l(ivec,xor1);
+ ivec-=8;
+ for (l-=8; l>=0; l-=8)
+ {
+ n2l(in,tin0);
+ n2l(in,tin1);
+ tin[0]=tin0;
+ tin[1]=tin1;
+ BF_decrypt(tin,schedule);
+ tout0=tin[0]^xor0;
+ tout1=tin[1]^xor1;
+ l2n(tout0,out);
+ l2n(tout1,out);
+ xor0=tin0;
+ xor1=tin1;
+ }
+ if (l != -8)
+ {
+ n2l(in,tin0);
+ n2l(in,tin1);
+ tin[0]=tin0;
+ tin[1]=tin1;
+ BF_decrypt(tin,schedule);
+ tout0=tin[0]^xor0;
+ tout1=tin[1]^xor1;
+ l2nn(tout0,tout1,out,l+8);
+ xor0=tin0;
+ xor1=tin1;
+ }
+ l2n(xor0,ivec);
+ l2n(xor1,ivec);
+ }
+ tin0=tin1=tout0=tout1=xor0=xor1=0;
+ tin[0]=tin[1]=0;
+ }
+
+#endif
diff --git a/ext/libressl/crypto/bf/bf_locl.h b/ext/libressl/crypto/bf/bf_locl.h
new file mode 100644
index 0000000..0b66362
--- /dev/null
+++ b/ext/libressl/crypto/bf/bf_locl.h
@@ -0,0 +1,219 @@
+/* $OpenBSD: bf_locl.h,v 1.3 2014/06/12 15:49:28 deraadt Exp $ */
+/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young (eay@cryptsoft.com)"
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+#ifndef HEADER_BF_LOCL_H
+#define HEADER_BF_LOCL_H
+#include <openssl/opensslconf.h> /* BF_PTR, BF_PTR2 */
+
+#undef c2l
+#define c2l(c,l) (l =((unsigned long)(*((c)++))) , \
+ l|=((unsigned long)(*((c)++)))<< 8L, \
+ l|=((unsigned long)(*((c)++)))<<16L, \
+ l|=((unsigned long)(*((c)++)))<<24L)
+
+/* NOTE - c is not incremented as per c2l */
+#undef c2ln
+#define c2ln(c,l1,l2,n) { \
+ c+=n; \
+ l1=l2=0; \
+ switch (n) { \
+ case 8: l2 =((unsigned long)(*(--(c))))<<24L; \
+ case 7: l2|=((unsigned long)(*(--(c))))<<16L; \
+ case 6: l2|=((unsigned long)(*(--(c))))<< 8L; \
+ case 5: l2|=((unsigned long)(*(--(c)))); \
+ case 4: l1 =((unsigned long)(*(--(c))))<<24L; \
+ case 3: l1|=((unsigned long)(*(--(c))))<<16L; \
+ case 2: l1|=((unsigned long)(*(--(c))))<< 8L; \
+ case 1: l1|=((unsigned long)(*(--(c)))); \
+ } \
+ }
+
+#undef l2c
+#define l2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \
+ *((c)++)=(unsigned char)(((l)>> 8L)&0xff), \
+ *((c)++)=(unsigned char)(((l)>>16L)&0xff), \
+ *((c)++)=(unsigned char)(((l)>>24L)&0xff))
+
+/* NOTE - c is not incremented as per l2c */
+#undef l2cn
+#define l2cn(l1,l2,c,n) { \
+ c+=n; \
+ switch (n) { \
+ case 8: *(--(c))=(unsigned char)(((l2)>>24L)&0xff); \
+ case 7: *(--(c))=(unsigned char)(((l2)>>16L)&0xff); \
+ case 6: *(--(c))=(unsigned char)(((l2)>> 8L)&0xff); \
+ case 5: *(--(c))=(unsigned char)(((l2) )&0xff); \
+ case 4: *(--(c))=(unsigned char)(((l1)>>24L)&0xff); \
+ case 3: *(--(c))=(unsigned char)(((l1)>>16L)&0xff); \
+ case 2: *(--(c))=(unsigned char)(((l1)>> 8L)&0xff); \
+ case 1: *(--(c))=(unsigned char)(((l1) )&0xff); \
+ } \
+ }
+
+/* NOTE - c is not incremented as per n2l */
+#define n2ln(c,l1,l2,n) { \
+ c+=n; \
+ l1=l2=0; \
+ switch (n) { \
+ case 8: l2 =((unsigned long)(*(--(c)))) ; \
+ case 7: l2|=((unsigned long)(*(--(c))))<< 8; \
+ case 6: l2|=((unsigned long)(*(--(c))))<<16; \
+ case 5: l2|=((unsigned long)(*(--(c))))<<24; \
+ case 4: l1 =((unsigned long)(*(--(c)))) ; \
+ case 3: l1|=((unsigned long)(*(--(c))))<< 8; \
+ case 2: l1|=((unsigned long)(*(--(c))))<<16; \
+ case 1: l1|=((unsigned long)(*(--(c))))<<24; \
+ } \
+ }
+
+/* NOTE - c is not incremented as per l2n */
+#define l2nn(l1,l2,c,n) { \
+ c+=n; \
+ switch (n) { \
+ case 8: *(--(c))=(unsigned char)(((l2) )&0xff); \
+ case 7: *(--(c))=(unsigned char)(((l2)>> 8)&0xff); \
+ case 6: *(--(c))=(unsigned char)(((l2)>>16)&0xff); \
+ case 5: *(--(c))=(unsigned char)(((l2)>>24)&0xff); \
+ case 4: *(--(c))=(unsigned char)(((l1) )&0xff); \
+ case 3: *(--(c))=(unsigned char)(((l1)>> 8)&0xff); \
+ case 2: *(--(c))=(unsigned char)(((l1)>>16)&0xff); \
+ case 1: *(--(c))=(unsigned char)(((l1)>>24)&0xff); \
+ } \
+ }
+
+#undef n2l
+#define n2l(c,l) (l =((unsigned long)(*((c)++)))<<24L, \
+ l|=((unsigned long)(*((c)++)))<<16L, \
+ l|=((unsigned long)(*((c)++)))<< 8L, \
+ l|=((unsigned long)(*((c)++))))
+
+#undef l2n
+#define l2n(l,c) (*((c)++)=(unsigned char)(((l)>>24L)&0xff), \
+ *((c)++)=(unsigned char)(((l)>>16L)&0xff), \
+ *((c)++)=(unsigned char)(((l)>> 8L)&0xff), \
+ *((c)++)=(unsigned char)(((l) )&0xff))
+
+/* This is actually a big endian algorithm, the most significant byte
+ * is used to lookup array 0 */
+
+#if defined(BF_PTR2)
+
+/*
+ * This is basically a special Intel version. Point is that Intel
+ * doesn't have many registers, but offers a reach choice of addressing
+ * modes. So we spare some registers by directly traversing BF_KEY
+ * structure and hiring the most decorated addressing mode. The code
+ * generated by EGCS is *perfectly* competitive with assembler
+ * implementation!
+ */
+#define BF_ENC(LL,R,KEY,Pi) (\
+ LL^=KEY[Pi], \
+ t= KEY[BF_ROUNDS+2 + 0 + ((R>>24)&0xFF)], \
+ t+= KEY[BF_ROUNDS+2 + 256 + ((R>>16)&0xFF)], \
+ t^= KEY[BF_ROUNDS+2 + 512 + ((R>>8 )&0xFF)], \
+ t+= KEY[BF_ROUNDS+2 + 768 + ((R )&0xFF)], \
+ LL^=t \
+ )
+
+#elif defined(BF_PTR)
+
+#ifndef BF_LONG_LOG2
+#define BF_LONG_LOG2 2 /* default to BF_LONG being 32 bits */
+#endif
+#define BF_M (0xFF<<BF_LONG_LOG2)
+#define BF_0 (24-BF_LONG_LOG2)
+#define BF_1 (16-BF_LONG_LOG2)
+#define BF_2 ( 8-BF_LONG_LOG2)
+#define BF_3 BF_LONG_LOG2 /* left shift */
+
+/*
+ * This is normally very good on RISC platforms where normally you
+ * have to explicitly "multiply" array index by sizeof(BF_LONG)
+ * in order to calculate the effective address. This implementation
+ * excuses CPU from this extra work. Power[PC] uses should have most
+ * fun as (R>>BF_i)&BF_M gets folded into a single instruction, namely
+ * rlwinm. So let'em double-check if their compiler does it.
+ */
+
+#define BF_ENC(LL,R,S,P) ( \
+ LL^=P, \
+ LL^= (((*(BF_LONG *)((unsigned char *)&(S[ 0])+((R>>BF_0)&BF_M))+ \
+ *(BF_LONG *)((unsigned char *)&(S[256])+((R>>BF_1)&BF_M)))^ \
+ *(BF_LONG *)((unsigned char *)&(S[512])+((R>>BF_2)&BF_M)))+ \
+ *(BF_LONG *)((unsigned char *)&(S[768])+((R<<BF_3)&BF_M))) \
+ )
+#else
+
+/*
+ * This is a *generic* version. Seem to perform best on platforms that
+ * offer explicit support for extraction of 8-bit nibbles preferably
+ * complemented with "multiplying" of array index by sizeof(BF_LONG).
+ * For the moment of this writing the list comprises Alpha CPU featuring
+ * extbl and s[48]addq instructions.
+ */
+
+#define BF_ENC(LL,R,S,P) ( \
+ LL^=P, \
+ LL^=((( S[ ((int)(R>>24)&0xff)] + \
+ S[0x0100+((int)(R>>16)&0xff)])^ \
+ S[0x0200+((int)(R>> 8)&0xff)])+ \
+ S[0x0300+((int)(R )&0xff)])&0xffffffffL \
+ )
+#endif
+
+#endif
diff --git a/ext/libressl/crypto/bf/bf_ofb64.c b/ext/libressl/crypto/bf/bf_ofb64.c
new file mode 100644
index 0000000..9e33162
--- /dev/null
+++ b/ext/libressl/crypto/bf/bf_ofb64.c
@@ -0,0 +1,110 @@
+/* $OpenBSD: bf_ofb64.c,v 1.5 2014/10/28 07:35:58 jsg Exp $ */
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young (eay@cryptsoft.com)"
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+#include <openssl/blowfish.h>
+#include "bf_locl.h"
+
+/* The input and output encrypted as though 64bit ofb mode is being
+ * used. The extra state information to record how much of the
+ * 64bit block we have used is contained in *num;
+ */
+void BF_ofb64_encrypt(const unsigned char *in, unsigned char *out, long length,
+ const BF_KEY *schedule, unsigned char *ivec, int *num)
+ {
+ BF_LONG v0,v1,t;
+ int n= *num;
+ long l=length;
+ unsigned char d[8];
+ char *dp;
+ BF_LONG ti[2];
+ unsigned char *iv;
+ int save=0;
+
+ iv=(unsigned char *)ivec;
+ n2l(iv,v0);
+ n2l(iv,v1);
+ ti[0]=v0;
+ ti[1]=v1;
+ dp=(char *)d;
+ l2n(v0,dp);
+ l2n(v1,dp);
+ while (l--)
+ {
+ if (n == 0)
+ {
+ BF_encrypt((BF_LONG *)ti,schedule);
+ dp=(char *)d;
+ t=ti[0]; l2n(t,dp);
+ t=ti[1]; l2n(t,dp);
+ save++;
+ }
+ *(out++)= *(in++)^d[n];
+ n=(n+1)&0x07;
+ }
+ if (save)
+ {
+ v0=ti[0];
+ v1=ti[1];
+ iv=(unsigned char *)ivec;
+ l2n(v0,iv);
+ l2n(v1,iv);
+ }
+ t=v0=v1=ti[0]=ti[1]=0;
+ *num=n;
+ }
+
diff --git a/ext/libressl/crypto/bf/bf_pi.h b/ext/libressl/crypto/bf/bf_pi.h
new file mode 100644
index 0000000..ce4843a
--- /dev/null
+++ b/ext/libressl/crypto/bf/bf_pi.h
@@ -0,0 +1,328 @@
+/* $OpenBSD: bf_pi.h,v 1.4 2016/12/21 15:49:29 jsing Exp $ */
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young (eay@cryptsoft.com)"
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+__BEGIN_HIDDEN_DECLS
+
+static const BF_KEY bf_init= {
+ {
+ 0x243f6a88L, 0x85a308d3L, 0x13198a2eL, 0x03707344L,
+ 0xa4093822L, 0x299f31d0L, 0x082efa98L, 0xec4e6c89L,
+ 0x452821e6L, 0x38d01377L, 0xbe5466cfL, 0x34e90c6cL,
+ 0xc0ac29b7L, 0xc97c50ddL, 0x3f84d5b5L, 0xb5470917L,
+ 0x9216d5d9L, 0x8979fb1b
+ },{
+ 0xd1310ba6L, 0x98dfb5acL, 0x2ffd72dbL, 0xd01adfb7L,
+ 0xb8e1afedL, 0x6a267e96L, 0xba7c9045L, 0xf12c7f99L,
+ 0x24a19947L, 0xb3916cf7L, 0x0801f2e2L, 0x858efc16L,
+ 0x636920d8L, 0x71574e69L, 0xa458fea3L, 0xf4933d7eL,
+ 0x0d95748fL, 0x728eb658L, 0x718bcd58L, 0x82154aeeL,
+ 0x7b54a41dL, 0xc25a59b5L, 0x9c30d539L, 0x2af26013L,
+ 0xc5d1b023L, 0x286085f0L, 0xca417918L, 0xb8db38efL,
+ 0x8e79dcb0L, 0x603a180eL, 0x6c9e0e8bL, 0xb01e8a3eL,
+ 0xd71577c1L, 0xbd314b27L, 0x78af2fdaL, 0x55605c60L,
+ 0xe65525f3L, 0xaa55ab94L, 0x57489862L, 0x63e81440L,
+ 0x55ca396aL, 0x2aab10b6L, 0xb4cc5c34L, 0x1141e8ceL,
+ 0xa15486afL, 0x7c72e993L, 0xb3ee1411L, 0x636fbc2aL,
+ 0x2ba9c55dL, 0x741831f6L, 0xce5c3e16L, 0x9b87931eL,
+ 0xafd6ba33L, 0x6c24cf5cL, 0x7a325381L, 0x28958677L,
+ 0x3b8f4898L, 0x6b4bb9afL, 0xc4bfe81bL, 0x66282193L,
+ 0x61d809ccL, 0xfb21a991L, 0x487cac60L, 0x5dec8032L,
+ 0xef845d5dL, 0xe98575b1L, 0xdc262302L, 0xeb651b88L,
+ 0x23893e81L, 0xd396acc5L, 0x0f6d6ff3L, 0x83f44239L,
+ 0x2e0b4482L, 0xa4842004L, 0x69c8f04aL, 0x9e1f9b5eL,
+ 0x21c66842L, 0xf6e96c9aL, 0x670c9c61L, 0xabd388f0L,
+ 0x6a51a0d2L, 0xd8542f68L, 0x960fa728L, 0xab5133a3L,
+ 0x6eef0b6cL, 0x137a3be4L, 0xba3bf050L, 0x7efb2a98L,
+ 0xa1f1651dL, 0x39af0176L, 0x66ca593eL, 0x82430e88L,
+ 0x8cee8619L, 0x456f9fb4L, 0x7d84a5c3L, 0x3b8b5ebeL,
+ 0xe06f75d8L, 0x85c12073L, 0x401a449fL, 0x56c16aa6L,
+ 0x4ed3aa62L, 0x363f7706L, 0x1bfedf72L, 0x429b023dL,
+ 0x37d0d724L, 0xd00a1248L, 0xdb0fead3L, 0x49f1c09bL,
+ 0x075372c9L, 0x80991b7bL, 0x25d479d8L, 0xf6e8def7L,
+ 0xe3fe501aL, 0xb6794c3bL, 0x976ce0bdL, 0x04c006baL,
+ 0xc1a94fb6L, 0x409f60c4L, 0x5e5c9ec2L, 0x196a2463L,
+ 0x68fb6fafL, 0x3e6c53b5L, 0x1339b2ebL, 0x3b52ec6fL,
+ 0x6dfc511fL, 0x9b30952cL, 0xcc814544L, 0xaf5ebd09L,
+ 0xbee3d004L, 0xde334afdL, 0x660f2807L, 0x192e4bb3L,
+ 0xc0cba857L, 0x45c8740fL, 0xd20b5f39L, 0xb9d3fbdbL,
+ 0x5579c0bdL, 0x1a60320aL, 0xd6a100c6L, 0x402c7279L,
+ 0x679f25feL, 0xfb1fa3ccL, 0x8ea5e9f8L, 0xdb3222f8L,
+ 0x3c7516dfL, 0xfd616b15L, 0x2f501ec8L, 0xad0552abL,
+ 0x323db5faL, 0xfd238760L, 0x53317b48L, 0x3e00df82L,
+ 0x9e5c57bbL, 0xca6f8ca0L, 0x1a87562eL, 0xdf1769dbL,
+ 0xd542a8f6L, 0x287effc3L, 0xac6732c6L, 0x8c4f5573L,
+ 0x695b27b0L, 0xbbca58c8L, 0xe1ffa35dL, 0xb8f011a0L,
+ 0x10fa3d98L, 0xfd2183b8L, 0x4afcb56cL, 0x2dd1d35bL,
+ 0x9a53e479L, 0xb6f84565L, 0xd28e49bcL, 0x4bfb9790L,
+ 0xe1ddf2daL, 0xa4cb7e33L, 0x62fb1341L, 0xcee4c6e8L,
+ 0xef20cadaL, 0x36774c01L, 0xd07e9efeL, 0x2bf11fb4L,
+ 0x95dbda4dL, 0xae909198L, 0xeaad8e71L, 0x6b93d5a0L,
+ 0xd08ed1d0L, 0xafc725e0L, 0x8e3c5b2fL, 0x8e7594b7L,
+ 0x8ff6e2fbL, 0xf2122b64L, 0x8888b812L, 0x900df01cL,
+ 0x4fad5ea0L, 0x688fc31cL, 0xd1cff191L, 0xb3a8c1adL,
+ 0x2f2f2218L, 0xbe0e1777L, 0xea752dfeL, 0x8b021fa1L,
+ 0xe5a0cc0fL, 0xb56f74e8L, 0x18acf3d6L, 0xce89e299L,
+ 0xb4a84fe0L, 0xfd13e0b7L, 0x7cc43b81L, 0xd2ada8d9L,
+ 0x165fa266L, 0x80957705L, 0x93cc7314L, 0x211a1477L,
+ 0xe6ad2065L, 0x77b5fa86L, 0xc75442f5L, 0xfb9d35cfL,
+ 0xebcdaf0cL, 0x7b3e89a0L, 0xd6411bd3L, 0xae1e7e49L,
+ 0x00250e2dL, 0x2071b35eL, 0x226800bbL, 0x57b8e0afL,
+ 0x2464369bL, 0xf009b91eL, 0x5563911dL, 0x59dfa6aaL,
+ 0x78c14389L, 0xd95a537fL, 0x207d5ba2L, 0x02e5b9c5L,
+ 0x83260376L, 0x6295cfa9L, 0x11c81968L, 0x4e734a41L,
+ 0xb3472dcaL, 0x7b14a94aL, 0x1b510052L, 0x9a532915L,
+ 0xd60f573fL, 0xbc9bc6e4L, 0x2b60a476L, 0x81e67400L,
+ 0x08ba6fb5L, 0x571be91fL, 0xf296ec6bL, 0x2a0dd915L,
+ 0xb6636521L, 0xe7b9f9b6L, 0xff34052eL, 0xc5855664L,
+ 0x53b02d5dL, 0xa99f8fa1L, 0x08ba4799L, 0x6e85076aL,
+ 0x4b7a70e9L, 0xb5b32944L, 0xdb75092eL, 0xc4192623L,
+ 0xad6ea6b0L, 0x49a7df7dL, 0x9cee60b8L, 0x8fedb266L,
+ 0xecaa8c71L, 0x699a17ffL, 0x5664526cL, 0xc2b19ee1L,
+ 0x193602a5L, 0x75094c29L, 0xa0591340L, 0xe4183a3eL,
+ 0x3f54989aL, 0x5b429d65L, 0x6b8fe4d6L, 0x99f73fd6L,
+ 0xa1d29c07L, 0xefe830f5L, 0x4d2d38e6L, 0xf0255dc1L,
+ 0x4cdd2086L, 0x8470eb26L, 0x6382e9c6L, 0x021ecc5eL,
+ 0x09686b3fL, 0x3ebaefc9L, 0x3c971814L, 0x6b6a70a1L,
+ 0x687f3584L, 0x52a0e286L, 0xb79c5305L, 0xaa500737L,
+ 0x3e07841cL, 0x7fdeae5cL, 0x8e7d44ecL, 0x5716f2b8L,
+ 0xb03ada37L, 0xf0500c0dL, 0xf01c1f04L, 0x0200b3ffL,
+ 0xae0cf51aL, 0x3cb574b2L, 0x25837a58L, 0xdc0921bdL,
+ 0xd19113f9L, 0x7ca92ff6L, 0x94324773L, 0x22f54701L,
+ 0x3ae5e581L, 0x37c2dadcL, 0xc8b57634L, 0x9af3dda7L,
+ 0xa9446146L, 0x0fd0030eL, 0xecc8c73eL, 0xa4751e41L,
+ 0xe238cd99L, 0x3bea0e2fL, 0x3280bba1L, 0x183eb331L,
+ 0x4e548b38L, 0x4f6db908L, 0x6f420d03L, 0xf60a04bfL,
+ 0x2cb81290L, 0x24977c79L, 0x5679b072L, 0xbcaf89afL,
+ 0xde9a771fL, 0xd9930810L, 0xb38bae12L, 0xdccf3f2eL,
+ 0x5512721fL, 0x2e6b7124L, 0x501adde6L, 0x9f84cd87L,
+ 0x7a584718L, 0x7408da17L, 0xbc9f9abcL, 0xe94b7d8cL,
+ 0xec7aec3aL, 0xdb851dfaL, 0x63094366L, 0xc464c3d2L,
+ 0xef1c1847L, 0x3215d908L, 0xdd433b37L, 0x24c2ba16L,
+ 0x12a14d43L, 0x2a65c451L, 0x50940002L, 0x133ae4ddL,
+ 0x71dff89eL, 0x10314e55L, 0x81ac77d6L, 0x5f11199bL,
+ 0x043556f1L, 0xd7a3c76bL, 0x3c11183bL, 0x5924a509L,
+ 0xf28fe6edL, 0x97f1fbfaL, 0x9ebabf2cL, 0x1e153c6eL,
+ 0x86e34570L, 0xeae96fb1L, 0x860e5e0aL, 0x5a3e2ab3L,
+ 0x771fe71cL, 0x4e3d06faL, 0x2965dcb9L, 0x99e71d0fL,
+ 0x803e89d6L, 0x5266c825L, 0x2e4cc978L, 0x9c10b36aL,
+ 0xc6150ebaL, 0x94e2ea78L, 0xa5fc3c53L, 0x1e0a2df4L,
+ 0xf2f74ea7L, 0x361d2b3dL, 0x1939260fL, 0x19c27960L,
+ 0x5223a708L, 0xf71312b6L, 0xebadfe6eL, 0xeac31f66L,
+ 0xe3bc4595L, 0xa67bc883L, 0xb17f37d1L, 0x018cff28L,
+ 0xc332ddefL, 0xbe6c5aa5L, 0x65582185L, 0x68ab9802L,
+ 0xeecea50fL, 0xdb2f953bL, 0x2aef7dadL, 0x5b6e2f84L,
+ 0x1521b628L, 0x29076170L, 0xecdd4775L, 0x619f1510L,
+ 0x13cca830L, 0xeb61bd96L, 0x0334fe1eL, 0xaa0363cfL,
+ 0xb5735c90L, 0x4c70a239L, 0xd59e9e0bL, 0xcbaade14L,
+ 0xeecc86bcL, 0x60622ca7L, 0x9cab5cabL, 0xb2f3846eL,
+ 0x648b1eafL, 0x19bdf0caL, 0xa02369b9L, 0x655abb50L,
+ 0x40685a32L, 0x3c2ab4b3L, 0x319ee9d5L, 0xc021b8f7L,
+ 0x9b540b19L, 0x875fa099L, 0x95f7997eL, 0x623d7da8L,
+ 0xf837889aL, 0x97e32d77L, 0x11ed935fL, 0x16681281L,
+ 0x0e358829L, 0xc7e61fd6L, 0x96dedfa1L, 0x7858ba99L,
+ 0x57f584a5L, 0x1b227263L, 0x9b83c3ffL, 0x1ac24696L,
+ 0xcdb30aebL, 0x532e3054L, 0x8fd948e4L, 0x6dbc3128L,
+ 0x58ebf2efL, 0x34c6ffeaL, 0xfe28ed61L, 0xee7c3c73L,
+ 0x5d4a14d9L, 0xe864b7e3L, 0x42105d14L, 0x203e13e0L,
+ 0x45eee2b6L, 0xa3aaabeaL, 0xdb6c4f15L, 0xfacb4fd0L,
+ 0xc742f442L, 0xef6abbb5L, 0x654f3b1dL, 0x41cd2105L,
+ 0xd81e799eL, 0x86854dc7L, 0xe44b476aL, 0x3d816250L,
+ 0xcf62a1f2L, 0x5b8d2646L, 0xfc8883a0L, 0xc1c7b6a3L,
+ 0x7f1524c3L, 0x69cb7492L, 0x47848a0bL, 0x5692b285L,
+ 0x095bbf00L, 0xad19489dL, 0x1462b174L, 0x23820e00L,
+ 0x58428d2aL, 0x0c55f5eaL, 0x1dadf43eL, 0x233f7061L,
+ 0x3372f092L, 0x8d937e41L, 0xd65fecf1L, 0x6c223bdbL,
+ 0x7cde3759L, 0xcbee7460L, 0x4085f2a7L, 0xce77326eL,
+ 0xa6078084L, 0x19f8509eL, 0xe8efd855L, 0x61d99735L,
+ 0xa969a7aaL, 0xc50c06c2L, 0x5a04abfcL, 0x800bcadcL,
+ 0x9e447a2eL, 0xc3453484L, 0xfdd56705L, 0x0e1e9ec9L,
+ 0xdb73dbd3L, 0x105588cdL, 0x675fda79L, 0xe3674340L,
+ 0xc5c43465L, 0x713e38d8L, 0x3d28f89eL, 0xf16dff20L,
+ 0x153e21e7L, 0x8fb03d4aL, 0xe6e39f2bL, 0xdb83adf7L,
+ 0xe93d5a68L, 0x948140f7L, 0xf64c261cL, 0x94692934L,
+ 0x411520f7L, 0x7602d4f7L, 0xbcf46b2eL, 0xd4a20068L,
+ 0xd4082471L, 0x3320f46aL, 0x43b7d4b7L, 0x500061afL,
+ 0x1e39f62eL, 0x97244546L, 0x14214f74L, 0xbf8b8840L,
+ 0x4d95fc1dL, 0x96b591afL, 0x70f4ddd3L, 0x66a02f45L,
+ 0xbfbc09ecL, 0x03bd9785L, 0x7fac6dd0L, 0x31cb8504L,
+ 0x96eb27b3L, 0x55fd3941L, 0xda2547e6L, 0xabca0a9aL,
+ 0x28507825L, 0x530429f4L, 0x0a2c86daL, 0xe9b66dfbL,
+ 0x68dc1462L, 0xd7486900L, 0x680ec0a4L, 0x27a18deeL,
+ 0x4f3ffea2L, 0xe887ad8cL, 0xb58ce006L, 0x7af4d6b6L,
+ 0xaace1e7cL, 0xd3375fecL, 0xce78a399L, 0x406b2a42L,
+ 0x20fe9e35L, 0xd9f385b9L, 0xee39d7abL, 0x3b124e8bL,
+ 0x1dc9faf7L, 0x4b6d1856L, 0x26a36631L, 0xeae397b2L,
+ 0x3a6efa74L, 0xdd5b4332L, 0x6841e7f7L, 0xca7820fbL,
+ 0xfb0af54eL, 0xd8feb397L, 0x454056acL, 0xba489527L,
+ 0x55533a3aL, 0x20838d87L, 0xfe6ba9b7L, 0xd096954bL,
+ 0x55a867bcL, 0xa1159a58L, 0xcca92963L, 0x99e1db33L,
+ 0xa62a4a56L, 0x3f3125f9L, 0x5ef47e1cL, 0x9029317cL,
+ 0xfdf8e802L, 0x04272f70L, 0x80bb155cL, 0x05282ce3L,
+ 0x95c11548L, 0xe4c66d22L, 0x48c1133fL, 0xc70f86dcL,
+ 0x07f9c9eeL, 0x41041f0fL, 0x404779a4L, 0x5d886e17L,
+ 0x325f51ebL, 0xd59bc0d1L, 0xf2bcc18fL, 0x41113564L,
+ 0x257b7834L, 0x602a9c60L, 0xdff8e8a3L, 0x1f636c1bL,
+ 0x0e12b4c2L, 0x02e1329eL, 0xaf664fd1L, 0xcad18115L,
+ 0x6b2395e0L, 0x333e92e1L, 0x3b240b62L, 0xeebeb922L,
+ 0x85b2a20eL, 0xe6ba0d99L, 0xde720c8cL, 0x2da2f728L,
+ 0xd0127845L, 0x95b794fdL, 0x647d0862L, 0xe7ccf5f0L,
+ 0x5449a36fL, 0x877d48faL, 0xc39dfd27L, 0xf33e8d1eL,
+ 0x0a476341L, 0x992eff74L, 0x3a6f6eabL, 0xf4f8fd37L,
+ 0xa812dc60L, 0xa1ebddf8L, 0x991be14cL, 0xdb6e6b0dL,
+ 0xc67b5510L, 0x6d672c37L, 0x2765d43bL, 0xdcd0e804L,
+ 0xf1290dc7L, 0xcc00ffa3L, 0xb5390f92L, 0x690fed0bL,
+ 0x667b9ffbL, 0xcedb7d9cL, 0xa091cf0bL, 0xd9155ea3L,
+ 0xbb132f88L, 0x515bad24L, 0x7b9479bfL, 0x763bd6ebL,
+ 0x37392eb3L, 0xcc115979L, 0x8026e297L, 0xf42e312dL,
+ 0x6842ada7L, 0xc66a2b3bL, 0x12754cccL, 0x782ef11cL,
+ 0x6a124237L, 0xb79251e7L, 0x06a1bbe6L, 0x4bfb6350L,
+ 0x1a6b1018L, 0x11caedfaL, 0x3d25bdd8L, 0xe2e1c3c9L,
+ 0x44421659L, 0x0a121386L, 0xd90cec6eL, 0xd5abea2aL,
+ 0x64af674eL, 0xda86a85fL, 0xbebfe988L, 0x64e4c3feL,
+ 0x9dbc8057L, 0xf0f7c086L, 0x60787bf8L, 0x6003604dL,
+ 0xd1fd8346L, 0xf6381fb0L, 0x7745ae04L, 0xd736fcccL,
+ 0x83426b33L, 0xf01eab71L, 0xb0804187L, 0x3c005e5fL,
+ 0x77a057beL, 0xbde8ae24L, 0x55464299L, 0xbf582e61L,
+ 0x4e58f48fL, 0xf2ddfda2L, 0xf474ef38L, 0x8789bdc2L,
+ 0x5366f9c3L, 0xc8b38e74L, 0xb475f255L, 0x46fcd9b9L,
+ 0x7aeb2661L, 0x8b1ddf84L, 0x846a0e79L, 0x915f95e2L,
+ 0x466e598eL, 0x20b45770L, 0x8cd55591L, 0xc902de4cL,
+ 0xb90bace1L, 0xbb8205d0L, 0x11a86248L, 0x7574a99eL,
+ 0xb77f19b6L, 0xe0a9dc09L, 0x662d09a1L, 0xc4324633L,
+ 0xe85a1f02L, 0x09f0be8cL, 0x4a99a025L, 0x1d6efe10L,
+ 0x1ab93d1dL, 0x0ba5a4dfL, 0xa186f20fL, 0x2868f169L,
+ 0xdcb7da83L, 0x573906feL, 0xa1e2ce9bL, 0x4fcd7f52L,
+ 0x50115e01L, 0xa70683faL, 0xa002b5c4L, 0x0de6d027L,
+ 0x9af88c27L, 0x773f8641L, 0xc3604c06L, 0x61a806b5L,
+ 0xf0177a28L, 0xc0f586e0L, 0x006058aaL, 0x30dc7d62L,
+ 0x11e69ed7L, 0x2338ea63L, 0x53c2dd94L, 0xc2c21634L,
+ 0xbbcbee56L, 0x90bcb6deL, 0xebfc7da1L, 0xce591d76L,
+ 0x6f05e409L, 0x4b7c0188L, 0x39720a3dL, 0x7c927c24L,
+ 0x86e3725fL, 0x724d9db9L, 0x1ac15bb4L, 0xd39eb8fcL,
+ 0xed545578L, 0x08fca5b5L, 0xd83d7cd3L, 0x4dad0fc4L,
+ 0x1e50ef5eL, 0xb161e6f8L, 0xa28514d9L, 0x6c51133cL,
+ 0x6fd5c7e7L, 0x56e14ec4L, 0x362abfceL, 0xddc6c837L,
+ 0xd79a3234L, 0x92638212L, 0x670efa8eL, 0x406000e0L,
+ 0x3a39ce37L, 0xd3faf5cfL, 0xabc27737L, 0x5ac52d1bL,
+ 0x5cb0679eL, 0x4fa33742L, 0xd3822740L, 0x99bc9bbeL,
+ 0xd5118e9dL, 0xbf0f7315L, 0xd62d1c7eL, 0xc700c47bL,
+ 0xb78c1b6bL, 0x21a19045L, 0xb26eb1beL, 0x6a366eb4L,
+ 0x5748ab2fL, 0xbc946e79L, 0xc6a376d2L, 0x6549c2c8L,
+ 0x530ff8eeL, 0x468dde7dL, 0xd5730a1dL, 0x4cd04dc6L,
+ 0x2939bbdbL, 0xa9ba4650L, 0xac9526e8L, 0xbe5ee304L,
+ 0xa1fad5f0L, 0x6a2d519aL, 0x63ef8ce2L, 0x9a86ee22L,
+ 0xc089c2b8L, 0x43242ef6L, 0xa51e03aaL, 0x9cf2d0a4L,
+ 0x83c061baL, 0x9be96a4dL, 0x8fe51550L, 0xba645bd6L,
+ 0x2826a2f9L, 0xa73a3ae1L, 0x4ba99586L, 0xef5562e9L,
+ 0xc72fefd3L, 0xf752f7daL, 0x3f046f69L, 0x77fa0a59L,
+ 0x80e4a915L, 0x87b08601L, 0x9b09e6adL, 0x3b3ee593L,
+ 0xe990fd5aL, 0x9e34d797L, 0x2cf0b7d9L, 0x022b8b51L,
+ 0x96d5ac3aL, 0x017da67dL, 0xd1cf3ed6L, 0x7c7d2d28L,
+ 0x1f9f25cfL, 0xadf2b89bL, 0x5ad6b472L, 0x5a88f54cL,
+ 0xe029ac71L, 0xe019a5e6L, 0x47b0acfdL, 0xed93fa9bL,
+ 0xe8d3c48dL, 0x283b57ccL, 0xf8d56629L, 0x79132e28L,
+ 0x785f0191L, 0xed756055L, 0xf7960e44L, 0xe3d35e8cL,
+ 0x15056dd4L, 0x88f46dbaL, 0x03a16125L, 0x0564f0bdL,
+ 0xc3eb9e15L, 0x3c9057a2L, 0x97271aecL, 0xa93a072aL,
+ 0x1b3f6d9bL, 0x1e6321f5L, 0xf59c66fbL, 0x26dcf319L,
+ 0x7533d928L, 0xb155fdf5L, 0x03563482L, 0x8aba3cbbL,
+ 0x28517711L, 0xc20ad9f8L, 0xabcc5167L, 0xccad925fL,
+ 0x4de81751L, 0x3830dc8eL, 0x379d5862L, 0x9320f991L,
+ 0xea7a90c2L, 0xfb3e7bceL, 0x5121ce64L, 0x774fbe32L,
+ 0xa8b6e37eL, 0xc3293d46L, 0x48de5369L, 0x6413e680L,
+ 0xa2ae0810L, 0xdd6db224L, 0x69852dfdL, 0x09072166L,
+ 0xb39a460aL, 0x6445c0ddL, 0x586cdecfL, 0x1c20c8aeL,
+ 0x5bbef7ddL, 0x1b588d40L, 0xccd2017fL, 0x6bb4e3bbL,
+ 0xdda26a7eL, 0x3a59ff45L, 0x3e350a44L, 0xbcb4cdd5L,
+ 0x72eacea8L, 0xfa6484bbL, 0x8d6612aeL, 0xbf3c6f47L,
+ 0xd29be463L, 0x542f5d9eL, 0xaec2771bL, 0xf64e6370L,
+ 0x740e0d8dL, 0xe75b1357L, 0xf8721671L, 0xaf537d5dL,
+ 0x4040cb08L, 0x4eb4e2ccL, 0x34d2466aL, 0x0115af84L,
+ 0xe1b00428L, 0x95983a1dL, 0x06b89fb4L, 0xce6ea048L,
+ 0x6f3f3b82L, 0x3520ab82L, 0x011a1d4bL, 0x277227f8L,
+ 0x611560b1L, 0xe7933fdcL, 0xbb3a792bL, 0x344525bdL,
+ 0xa08839e1L, 0x51ce794bL, 0x2f32c9b7L, 0xa01fbac9L,
+ 0xe01cc87eL, 0xbcc7d1f6L, 0xcf0111c3L, 0xa1e8aac7L,
+ 0x1a908749L, 0xd44fbd9aL, 0xd0dadecbL, 0xd50ada38L,
+ 0x0339c32aL, 0xc6913667L, 0x8df9317cL, 0xe0b12b4fL,
+ 0xf79e59b7L, 0x43f5bb3aL, 0xf2d519ffL, 0x27d9459cL,
+ 0xbf97222cL, 0x15e6fc2aL, 0x0f91fc71L, 0x9b941525L,
+ 0xfae59361L, 0xceb69cebL, 0xc2a86459L, 0x12baa8d1L,
+ 0xb6c1075eL, 0xe3056a0cL, 0x10d25065L, 0xcb03a442L,
+ 0xe0ec6e0eL, 0x1698db3bL, 0x4c98a0beL, 0x3278e964L,
+ 0x9f1f9532L, 0xe0d392dfL, 0xd3a0342bL, 0x8971f21eL,
+ 0x1b0a7441L, 0x4ba3348cL, 0xc5be7120L, 0xc37632d8L,
+ 0xdf359f8dL, 0x9b992f2eL, 0xe60b6f47L, 0x0fe3f11dL,
+ 0xe54cda54L, 0x1edad891L, 0xce6279cfL, 0xcd3e7e6fL,
+ 0x1618b166L, 0xfd2c1d05L, 0x848fd2c5L, 0xf6fb2299L,
+ 0xf523f357L, 0xa6327623L, 0x93a83531L, 0x56cccd02L,
+ 0xacf08162L, 0x5a75ebb5L, 0x6e163697L, 0x88d273ccL,
+ 0xde966292L, 0x81b949d0L, 0x4c50901bL, 0x71c65614L,
+ 0xe6c6c7bdL, 0x327a140aL, 0x45e1d006L, 0xc3f27b9aL,
+ 0xc9aa53fdL, 0x62a80f00L, 0xbb25bfe2L, 0x35bdd2f6L,
+ 0x71126905L, 0xb2040222L, 0xb6cbcf7cL, 0xcd769c2bL,
+ 0x53113ec0L, 0x1640e3d3L, 0x38abbd60L, 0x2547adf0L,
+ 0xba38209cL, 0xf746ce76L, 0x77afa1c5L, 0x20756060L,
+ 0x85cbfe4eL, 0x8ae88dd8L, 0x7aaaf9b0L, 0x4cf9aa7eL,
+ 0x1948c25cL, 0x02fb8a8cL, 0x01c36ae4L, 0xd6ebe1f9L,
+ 0x90d4f869L, 0xa65cdea0L, 0x3f09252dL, 0xc208e69fL,
+ 0xb74e6132L, 0xce77e25bL, 0x578fdfe3L, 0x3ac372e6L,
+ }
+ };
+
+__END_HIDDEN_DECLS
diff --git a/ext/libressl/crypto/bf/bf_skey.c b/ext/libressl/crypto/bf/bf_skey.c
new file mode 100644
index 0000000..8191d17
--- /dev/null
+++ b/ext/libressl/crypto/bf/bf_skey.c
@@ -0,0 +1,117 @@
+/* $OpenBSD: bf_skey.c,v 1.12 2014/06/12 15:49:28 deraadt Exp $ */
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young (eay@cryptsoft.com)"
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <openssl/crypto.h>
+#include <openssl/blowfish.h>
+#include "bf_locl.h"
+#include "bf_pi.h"
+
+void BF_set_key(BF_KEY *key, int len, const unsigned char *data)
+ {
+ int i;
+ BF_LONG *p,ri,in[2];
+ const unsigned char *d,*end;
+
+
+ memcpy(key,&bf_init,sizeof(BF_KEY));
+ p=key->P;
+
+ if (len > ((BF_ROUNDS+2)*4)) len=(BF_ROUNDS+2)*4;
+
+ d=data;
+ end= &(data[len]);
+ for (i=0; i<(BF_ROUNDS+2); i++)
+ {
+ ri= *(d++);
+ if (d >= end) d=data;
+
+ ri<<=8;
+ ri|= *(d++);
+ if (d >= end) d=data;
+
+ ri<<=8;
+ ri|= *(d++);
+ if (d >= end) d=data;
+
+ ri<<=8;
+ ri|= *(d++);
+ if (d >= end) d=data;
+
+ p[i]^=ri;
+ }
+
+ in[0]=0L;
+ in[1]=0L;
+ for (i=0; i<(BF_ROUNDS+2); i+=2)
+ {
+ BF_encrypt(in,key);
+ p[i ]=in[0];
+ p[i+1]=in[1];
+ }
+
+ p=key->S;
+ for (i=0; i<4*256; i+=2)
+ {
+ BF_encrypt(in,key);
+ p[i ]=in[0];
+ p[i+1]=in[1];
+ }
+ }
+
diff --git a/ext/libressl/crypto/chacha/Makefile b/ext/libressl/crypto/chacha/Makefile
new file mode 100644
index 0000000..2eb56cb
--- /dev/null
+++ b/ext/libressl/crypto/chacha/Makefile
@@ -0,0 +1,13 @@
+include ../../ssl_common.mk
+
+obj = chacha.o
+
+
+all: $(obj)
+dep: all
+
+%.o: %.c
+ $(CC) $(CFLAGS) -c $<
+
+clean:
+ rm -f *.o *.a
diff --git a/ext/libressl/crypto/chacha/chacha-merged.c b/ext/libressl/crypto/chacha/chacha-merged.c
new file mode 100644
index 0000000..67508f2
--- /dev/null
+++ b/ext/libressl/crypto/chacha/chacha-merged.c
@@ -0,0 +1,325 @@
+/* $OpenBSD: chacha-merged.c,v 1.9 2019/01/22 00:59:21 dlg Exp $ */
+/*
+chacha-merged.c version 20080118
+D. J. Bernstein
+Public domain.
+*/
+
+#include <sys/types.h>
+
+#include <stdint.h>
+
+#define CHACHA_MINKEYLEN 16
+#define CHACHA_NONCELEN 8
+#define CHACHA_CTRLEN 8
+#define CHACHA_STATELEN (CHACHA_NONCELEN+CHACHA_CTRLEN)
+#define CHACHA_BLOCKLEN 64
+
+struct chacha_ctx {
+ u_int input[16];
+ uint8_t ks[CHACHA_BLOCKLEN];
+ uint8_t unused;
+};
+
+static inline void chacha_keysetup(struct chacha_ctx *x, const u_char *k,
+ u_int kbits)
+ __attribute__((__bounded__(__minbytes__, 2, CHACHA_MINKEYLEN)));
+static inline void chacha_ivsetup(struct chacha_ctx *x, const u_char *iv,
+ const u_char *ctr)
+ __attribute__((__bounded__(__minbytes__, 2, CHACHA_NONCELEN)))
+ __attribute__((__bounded__(__minbytes__, 3, CHACHA_CTRLEN)));
+static inline void chacha_encrypt_bytes(struct chacha_ctx *x, const u_char *m,
+ u_char *c, u_int bytes)
+ __attribute__((__bounded__(__buffer__, 2, 4)))
+ __attribute__((__bounded__(__buffer__, 3, 4)));
+
+typedef unsigned char u8;
+typedef unsigned int u32;
+
+typedef struct chacha_ctx chacha_ctx;
+
+#define U8C(v) (v##U)
+#define U32C(v) (v##U)
+
+#define U8V(v) ((u8)(v) & U8C(0xFF))
+#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
+
+#define ROTL32(v, n) \
+ (U32V((v) << (n)) | ((v) >> (32 - (n))))
+
+#define U8TO32_LITTLE(p) \
+ (((u32)((p)[0])) | \
+ ((u32)((p)[1]) << 8) | \
+ ((u32)((p)[2]) << 16) | \
+ ((u32)((p)[3]) << 24))
+
+#define U32TO8_LITTLE(p, v) \
+ do { \
+ (p)[0] = U8V((v)); \
+ (p)[1] = U8V((v) >> 8); \
+ (p)[2] = U8V((v) >> 16); \
+ (p)[3] = U8V((v) >> 24); \
+ } while (0)
+
+#define ROTATE(v,c) (ROTL32(v,c))
+#define XOR(v,w) ((v) ^ (w))
+#define PLUS(v,w) (U32V((v) + (w)))
+#define PLUSONE(v) (PLUS((v),1))
+
+#define QUARTERROUND(a,b,c,d) \
+ a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
+ c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
+ a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
+ c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
+
+/* Initialise with "expand 32-byte k". */
+static const char sigma[16] = {
+ 0x65, 0x78, 0x70, 0x61, 0x6e, 0x64, 0x20, 0x33,
+ 0x32, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20, 0x6b,
+};
+
+/* Initialise with "expand 16-byte k". */
+static const char tau[16] = {
+ 0x65, 0x78, 0x70, 0x61, 0x6e, 0x64, 0x20, 0x31,
+ 0x36, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20, 0x6b,
+};
+
+static inline void
+chacha_keysetup(chacha_ctx *x, const u8 *k, u32 kbits)
+{
+ const char *constants;
+
+ x->input[4] = U8TO32_LITTLE(k + 0);
+ x->input[5] = U8TO32_LITTLE(k + 4);
+ x->input[6] = U8TO32_LITTLE(k + 8);
+ x->input[7] = U8TO32_LITTLE(k + 12);
+ if (kbits == 256) { /* recommended */
+ k += 16;
+ constants = sigma;
+ } else { /* kbits == 128 */
+ constants = tau;
+ }
+ x->input[8] = U8TO32_LITTLE(k + 0);
+ x->input[9] = U8TO32_LITTLE(k + 4);
+ x->input[10] = U8TO32_LITTLE(k + 8);
+ x->input[11] = U8TO32_LITTLE(k + 12);
+ x->input[0] = U8TO32_LITTLE(constants + 0);
+ x->input[1] = U8TO32_LITTLE(constants + 4);
+ x->input[2] = U8TO32_LITTLE(constants + 8);
+ x->input[3] = U8TO32_LITTLE(constants + 12);
+}
+
+static inline void
+chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
+{
+ x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
+ x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
+ x->input[14] = U8TO32_LITTLE(iv + 0);
+ x->input[15] = U8TO32_LITTLE(iv + 4);
+}
+
+static inline void
+chacha_encrypt_bytes(chacha_ctx *x, const u8 *m, u8 *c, u32 bytes)
+{
+ u32 x0, x1, x2, x3, x4, x5, x6, x7;
+ u32 x8, x9, x10, x11, x12, x13, x14, x15;
+ u32 j0, j1, j2, j3, j4, j5, j6, j7;
+ u32 j8, j9, j10, j11, j12, j13, j14, j15;
+ u8 *ctarget = NULL;
+ u8 tmp[64];
+ u_int i;
+
+ if (!bytes)
+ return;
+
+ j0 = x->input[0];
+ j1 = x->input[1];
+ j2 = x->input[2];
+ j3 = x->input[3];
+ j4 = x->input[4];
+ j5 = x->input[5];
+ j6 = x->input[6];
+ j7 = x->input[7];
+ j8 = x->input[8];
+ j9 = x->input[9];
+ j10 = x->input[10];
+ j11 = x->input[11];
+ j12 = x->input[12];
+ j13 = x->input[13];
+ j14 = x->input[14];
+ j15 = x->input[15];
+
+ for (;;) {
+ if (bytes < 64) {
+ for (i = 0; i < bytes; ++i)
+ tmp[i] = m[i];
+ m = tmp;
+ ctarget = c;
+ c = tmp;
+ }
+ x0 = j0;
+ x1 = j1;
+ x2 = j2;
+ x3 = j3;
+ x4 = j4;
+ x5 = j5;
+ x6 = j6;
+ x7 = j7;
+ x8 = j8;
+ x9 = j9;
+ x10 = j10;
+ x11 = j11;
+ x12 = j12;
+ x13 = j13;
+ x14 = j14;
+ x15 = j15;
+ for (i = 20; i > 0; i -= 2) {
+ QUARTERROUND(x0, x4, x8, x12)
+ QUARTERROUND(x1, x5, x9, x13)
+ QUARTERROUND(x2, x6, x10, x14)
+ QUARTERROUND(x3, x7, x11, x15)
+ QUARTERROUND(x0, x5, x10, x15)
+ QUARTERROUND(x1, x6, x11, x12)
+ QUARTERROUND(x2, x7, x8, x13)
+ QUARTERROUND(x3, x4, x9, x14)
+ }
+ x0 = PLUS(x0, j0);
+ x1 = PLUS(x1, j1);
+ x2 = PLUS(x2, j2);
+ x3 = PLUS(x3, j3);
+ x4 = PLUS(x4, j4);
+ x5 = PLUS(x5, j5);
+ x6 = PLUS(x6, j6);
+ x7 = PLUS(x7, j7);
+ x8 = PLUS(x8, j8);
+ x9 = PLUS(x9, j9);
+ x10 = PLUS(x10, j10);
+ x11 = PLUS(x11, j11);
+ x12 = PLUS(x12, j12);
+ x13 = PLUS(x13, j13);
+ x14 = PLUS(x14, j14);
+ x15 = PLUS(x15, j15);
+
+ if (bytes < 64) {
+ U32TO8_LITTLE(x->ks + 0, x0);
+ U32TO8_LITTLE(x->ks + 4, x1);
+ U32TO8_LITTLE(x->ks + 8, x2);
+ U32TO8_LITTLE(x->ks + 12, x3);
+ U32TO8_LITTLE(x->ks + 16, x4);
+ U32TO8_LITTLE(x->ks + 20, x5);
+ U32TO8_LITTLE(x->ks + 24, x6);
+ U32TO8_LITTLE(x->ks + 28, x7);
+ U32TO8_LITTLE(x->ks + 32, x8);
+ U32TO8_LITTLE(x->ks + 36, x9);
+ U32TO8_LITTLE(x->ks + 40, x10);
+ U32TO8_LITTLE(x->ks + 44, x11);
+ U32TO8_LITTLE(x->ks + 48, x12);
+ U32TO8_LITTLE(x->ks + 52, x13);
+ U32TO8_LITTLE(x->ks + 56, x14);
+ U32TO8_LITTLE(x->ks + 60, x15);
+ }
+
+ x0 = XOR(x0, U8TO32_LITTLE(m + 0));
+ x1 = XOR(x1, U8TO32_LITTLE(m + 4));
+ x2 = XOR(x2, U8TO32_LITTLE(m + 8));
+ x3 = XOR(x3, U8TO32_LITTLE(m + 12));
+ x4 = XOR(x4, U8TO32_LITTLE(m + 16));
+ x5 = XOR(x5, U8TO32_LITTLE(m + 20));
+ x6 = XOR(x6, U8TO32_LITTLE(m + 24));
+ x7 = XOR(x7, U8TO32_LITTLE(m + 28));
+ x8 = XOR(x8, U8TO32_LITTLE(m + 32));
+ x9 = XOR(x9, U8TO32_LITTLE(m + 36));
+ x10 = XOR(x10, U8TO32_LITTLE(m + 40));
+ x11 = XOR(x11, U8TO32_LITTLE(m + 44));
+ x12 = XOR(x12, U8TO32_LITTLE(m + 48));
+ x13 = XOR(x13, U8TO32_LITTLE(m + 52));
+ x14 = XOR(x14, U8TO32_LITTLE(m + 56));
+ x15 = XOR(x15, U8TO32_LITTLE(m + 60));
+
+ j12 = PLUSONE(j12);
+ if (!j12) {
+ j13 = PLUSONE(j13);
+ /*
+ * Stopping at 2^70 bytes per nonce is the user's
+ * responsibility.
+ */
+ }
+
+ U32TO8_LITTLE(c + 0, x0);
+ U32TO8_LITTLE(c + 4, x1);
+ U32TO8_LITTLE(c + 8, x2);
+ U32TO8_LITTLE(c + 12, x3);
+ U32TO8_LITTLE(c + 16, x4);
+ U32TO8_LITTLE(c + 20, x5);
+ U32TO8_LITTLE(c + 24, x6);
+ U32TO8_LITTLE(c + 28, x7);
+ U32TO8_LITTLE(c + 32, x8);
+ U32TO8_LITTLE(c + 36, x9);
+ U32TO8_LITTLE(c + 40, x10);
+ U32TO8_LITTLE(c + 44, x11);
+ U32TO8_LITTLE(c + 48, x12);
+ U32TO8_LITTLE(c + 52, x13);
+ U32TO8_LITTLE(c + 56, x14);
+ U32TO8_LITTLE(c + 60, x15);
+
+ if (bytes <= 64) {
+ if (bytes < 64) {
+ for (i = 0; i < bytes; ++i)
+ ctarget[i] = c[i];
+ }
+ x->input[12] = j12;
+ x->input[13] = j13;
+ x->unused = 64 - bytes;
+ return;
+ }
+ bytes -= 64;
+ c += 64;
+ m += 64;
+ }
+}
+
+void
+CRYPTO_hchacha_20(unsigned char subkey[32], const unsigned char key[32],
+ const unsigned char nonce[16])
+{
+ uint32_t x[16];
+ int i;
+
+ x[0] = U8TO32_LITTLE(sigma + 0);
+ x[1] = U8TO32_LITTLE(sigma + 4);
+ x[2] = U8TO32_LITTLE(sigma + 8);
+ x[3] = U8TO32_LITTLE(sigma + 12);
+ x[4] = U8TO32_LITTLE(key + 0);
+ x[5] = U8TO32_LITTLE(key + 4);
+ x[6] = U8TO32_LITTLE(key + 8);
+ x[7] = U8TO32_LITTLE(key + 12);
+ x[8] = U8TO32_LITTLE(key + 16);
+ x[9] = U8TO32_LITTLE(key + 20);
+ x[10] = U8TO32_LITTLE(key + 24);
+ x[11] = U8TO32_LITTLE(key + 28);
+ x[12] = U8TO32_LITTLE(nonce + 0);
+ x[13] = U8TO32_LITTLE(nonce + 4);
+ x[14] = U8TO32_LITTLE(nonce + 8);
+ x[15] = U8TO32_LITTLE(nonce + 12);
+
+ for (i = 20; i > 0; i -= 2) {
+ QUARTERROUND(x[0], x[4], x[8], x[12])
+ QUARTERROUND(x[1], x[5], x[9], x[13])
+ QUARTERROUND(x[2], x[6], x[10], x[14])
+ QUARTERROUND(x[3], x[7], x[11], x[15])
+ QUARTERROUND(x[0], x[5], x[10], x[15])
+ QUARTERROUND(x[1], x[6], x[11], x[12])
+ QUARTERROUND(x[2], x[7], x[8], x[13])
+ QUARTERROUND(x[3], x[4], x[9], x[14])
+ }
+
+ U32TO8_LITTLE(subkey + 0, x[0]);
+ U32TO8_LITTLE(subkey + 4, x[1]);
+ U32TO8_LITTLE(subkey + 8, x[2]);
+ U32TO8_LITTLE(subkey + 12, x[3]);
+
+ U32TO8_LITTLE(subkey + 16, x[12]);
+ U32TO8_LITTLE(subkey + 20, x[13]);
+ U32TO8_LITTLE(subkey + 24, x[14]);
+ U32TO8_LITTLE(subkey + 28, x[15]);
+}
diff --git a/ext/libressl/crypto/chacha/chacha.c b/ext/libressl/crypto/chacha/chacha.c
new file mode 100644
index 0000000..f62a84d
--- /dev/null
+++ b/ext/libressl/crypto/chacha/chacha.c
@@ -0,0 +1,87 @@
+/* $OpenBSD: chacha.c,v 1.8 2019/01/22 00:59:21 dlg Exp $ */
+/*
+ * Copyright (c) 2014 Joel Sing <jsing@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <stdint.h>
+
+#include <openssl/chacha.h>
+
+#include "chacha-merged.c"
+
+void
+ChaCha_set_key(ChaCha_ctx *ctx, const unsigned char *key, unsigned int keybits)
+{
+ chacha_keysetup((chacha_ctx *)ctx, key, keybits);
+ ctx->unused = 0;
+}
+
+void
+ChaCha_set_iv(ChaCha_ctx *ctx, const unsigned char *iv,
+ const unsigned char *counter)
+{
+ chacha_ivsetup((chacha_ctx *)ctx, iv, counter);
+ ctx->unused = 0;
+}
+
+void
+ChaCha(ChaCha_ctx *ctx, unsigned char *out, const unsigned char *in, size_t len)
+{
+ unsigned char *k;
+ int i, l;
+
+ /* Consume remaining keystream, if any exists. */
+ if (ctx->unused > 0) {
+ k = ctx->ks + 64 - ctx->unused;
+ l = (len > ctx->unused) ? ctx->unused : len;
+ for (i = 0; i < l; i++)
+ *(out++) = *(in++) ^ *(k++);
+ ctx->unused -= l;
+ len -= l;
+ }
+
+ chacha_encrypt_bytes((chacha_ctx *)ctx, in, out, (uint32_t)len);
+}
+
+void
+CRYPTO_chacha_20(unsigned char *out, const unsigned char *in, size_t len,
+ const unsigned char key[32], const unsigned char iv[8], uint64_t counter)
+{
+ struct chacha_ctx ctx;
+
+ /*
+ * chacha_ivsetup expects the counter to be in u8. Rather than
+ * converting size_t to u8 and then back again, pass a counter of
+ * NULL and manually assign it afterwards.
+ */
+ chacha_keysetup(&ctx, key, 256);
+ chacha_ivsetup(&ctx, iv, NULL);
+ if (counter != 0) {
+ ctx.input[12] = (uint32_t)counter;
+ ctx.input[13] = (uint32_t)(counter >> 32);
+ }
+
+ chacha_encrypt_bytes(&ctx, in, out, (uint32_t)len);
+}
+
+void
+CRYPTO_xchacha_20(unsigned char *out, const unsigned char *in, size_t len,
+ const unsigned char key[32], const unsigned char iv[24])
+{
+ uint8_t subkey[32];
+
+ CRYPTO_hchacha_20(subkey, key, iv);
+ CRYPTO_chacha_20(out, in, len, subkey, iv + 16, 0);
+}
diff --git a/ext/libressl/crypto/compat/Makefile b/ext/libressl/crypto/compat/Makefile
new file mode 100644
index 0000000..00e3a67
--- /dev/null
+++ b/ext/libressl/crypto/compat/Makefile
@@ -0,0 +1,13 @@
+include ../../ssl_common.mk
+
+obj = arc4random.o explicit_bzero.o timingsafe_bcmp.o timingsafe_memcmp.o
+
+
+all: $(obj)
+dep: all
+
+%.o: %.c
+ $(CC) $(CFLAGS) -c $<
+
+clean:
+ rm -f *.o *.a
diff --git a/ext/libressl/crypto/compat/arc4random.c b/ext/libressl/crypto/compat/arc4random.c
new file mode 100644
index 0000000..67a47f6
--- /dev/null
+++ b/ext/libressl/crypto/compat/arc4random.c
@@ -0,0 +1,216 @@
+/* $OpenBSD: arc4random.c,v 1.55 2019/03/24 17:56:54 deraadt Exp $ */
+
+/*
+ * Copyright (c) 1996, David Mazieres <dm@uun.org>
+ * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
+ * Copyright (c) 2013, Markus Friedl <markus@openbsd.org>
+ * Copyright (c) 2014, Theo de Raadt <deraadt@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * ChaCha based random number generator for OpenBSD.
+ */
+
+#include <fcntl.h>
+#include <limits.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/time.h>
+
+#define KEYSTREAM_ONLY
+#include "chacha_private.h"
+
+#define minimum(a, b) ((a) < (b) ? (a) : (b))
+
+#if defined(__GNUC__) || defined(_MSC_VER)
+#define inline __inline
+#else /* __GNUC__ || _MSC_VER */
+#define inline
+#endif /* !__GNUC__ && !_MSC_VER */
+
+#define KEYSZ 32
+#define IVSZ 8
+#define BLOCKSZ 64
+#if defined(__FE310__)
+#define RSBLKS 16
+#define RSBUFSZ (BLOCKSZ)
+#else
+#define RSBUFSZ (16*BLOCKSZ)
+#endif
+
+/* Marked MAP_INHERIT_ZERO, so zero'd out in fork children. */
+static struct _rs {
+ size_t rs_have; /* valid bytes at end of rs_buf */
+ size_t rs_count; /* bytes till reseed */
+#if defined(__FE310__)
+ size_t rs_blocks; /* blocks till rekey */
+#endif
+} *rs;
+
+/* Maybe be preserved in fork children, if _rs_allocate() decides. */
+static struct _rsx {
+ chacha_ctx rs_chacha; /* chacha context for random keystream */
+ u_char rs_buf[RSBUFSZ]; /* keystream blocks */
+} *rsx;
+
+static inline int _rs_allocate(struct _rs **, struct _rsx **);
+static inline void _rs_forkdetect(void);
+#include "arc4random.h"
+
+static inline void _rs_rekey(u_char *dat, size_t datlen);
+
+static inline void
+_rs_init(u_char *buf, size_t n)
+{
+ if (n < KEYSZ + IVSZ)
+ return;
+
+ if (rs == NULL) {
+ if (_rs_allocate(&rs, &rsx) == -1)
+ _exit(1);
+#if defined(__FE310__)
+ rs->rs_blocks = (RSBLKS - 1);
+#endif
+ }
+
+ chacha_keysetup(&rsx->rs_chacha, buf, KEYSZ * 8, 0);
+ chacha_ivsetup(&rsx->rs_chacha, buf + KEYSZ);
+}
+
+static void
+_rs_stir(void)
+{
+ u_char rnd[KEYSZ + IVSZ];
+
+ if (getentropy(rnd, sizeof rnd) == -1)
+ _getentropy_fail();
+
+ if (!rs)
+ _rs_init(rnd, sizeof(rnd));
+ else
+ _rs_rekey(rnd, sizeof(rnd));
+ explicit_bzero(rnd, sizeof(rnd)); /* discard source seed */
+
+ /* invalidate rs_buf */
+ rs->rs_have = 0;
+ memset(rsx->rs_buf, 0, sizeof(rsx->rs_buf));
+
+ rs->rs_count = 1600000;
+}
+
+static inline void
+_rs_stir_if_needed(size_t len)
+{
+ _rs_forkdetect();
+ if (!rs || rs->rs_count <= len)
+ _rs_stir();
+ if (rs->rs_count <= len)
+ rs->rs_count = 0;
+ else
+ rs->rs_count -= len;
+}
+
+static inline void
+_rs_rekey(u_char *dat, size_t datlen)
+{
+#ifndef KEYSTREAM_ONLY
+ memset(rsx->rs_buf, 0, sizeof(rsx->rs_buf));
+#endif
+ /* fill rs_buf with the keystream */
+ chacha_encrypt_bytes(&rsx->rs_chacha, rsx->rs_buf,
+ rsx->rs_buf, sizeof(rsx->rs_buf));
+ /* mix in optional user provided data */
+ if (dat) {
+ size_t i, m;
+
+ m = minimum(datlen, KEYSZ + IVSZ);
+ for (i = 0; i < m; i++)
+ rsx->rs_buf[i] ^= dat[i];
+ }
+#if defined(__FE310__)
+ if (dat || (rs->rs_blocks == 0)) {
+ rs->rs_blocks = (RSBLKS - 1);
+ } else {
+ rs->rs_blocks--;
+ rs->rs_have = sizeof(rsx->rs_buf);
+ return;
+ }
+#endif
+ /* immediately reinit for backtracking resistance */
+ _rs_init(rsx->rs_buf, KEYSZ + IVSZ);
+ memset(rsx->rs_buf, 0, KEYSZ + IVSZ);
+ rs->rs_have = sizeof(rsx->rs_buf) - KEYSZ - IVSZ;
+}
+
+static inline void
+_rs_random_buf(void *_buf, size_t n)
+{
+ u_char *buf = (u_char *)_buf;
+ u_char *keystream;
+ size_t m;
+
+ _rs_stir_if_needed(n);
+ while (n > 0) {
+ if (rs->rs_have > 0) {
+ m = minimum(n, rs->rs_have);
+ keystream = rsx->rs_buf + sizeof(rsx->rs_buf)
+ - rs->rs_have;
+ memcpy(buf, keystream, m);
+ memset(keystream, 0, m);
+ buf += m;
+ n -= m;
+ rs->rs_have -= m;
+ }
+ if (rs->rs_have == 0)
+ _rs_rekey(NULL, 0);
+ }
+}
+
+static inline void
+_rs_random_u32(uint32_t *val)
+{
+ u_char *keystream;
+
+ _rs_stir_if_needed(sizeof(*val));
+ if (rs->rs_have < sizeof(*val))
+ _rs_rekey(NULL, 0);
+ keystream = rsx->rs_buf + sizeof(rsx->rs_buf) - rs->rs_have;
+ memcpy(val, keystream, sizeof(*val));
+ memset(keystream, 0, sizeof(*val));
+ rs->rs_have -= sizeof(*val);
+}
+
+uint32_t
+arc4random(void)
+{
+ uint32_t val;
+
+ _ARC4_LOCK();
+ _rs_random_u32(&val);
+ _ARC4_UNLOCK();
+ return val;
+}
+
+void
+arc4random_buf(void *buf, size_t n)
+{
+ _ARC4_LOCK();
+ _rs_random_buf(buf, n);
+ _ARC4_UNLOCK();
+}
diff --git a/ext/libressl/crypto/compat/arc4random.h b/ext/libressl/crypto/compat/arc4random.h
new file mode 100644
index 0000000..8a308a9
--- /dev/null
+++ b/ext/libressl/crypto/compat/arc4random.h
@@ -0,0 +1,41 @@
+#ifndef LIBCRYPTOCOMPAT_ARC4RANDOM_H
+#define LIBCRYPTOCOMPAT_ARC4RANDOM_H
+
+#include <sys/param.h>
+
+#if defined(_AIX)
+#include "arc4random_aix.h"
+
+#elif defined(__FreeBSD__)
+#include "arc4random_freebsd.h"
+
+#elif defined(__hpux)
+#include "arc4random_hpux.h"
+
+#elif defined(__linux__)
+#include "arc4random_linux.h"
+
+#elif defined(__midipix__)
+#include "arc4random_linux.h"
+
+#elif defined(__NetBSD__)
+#include "arc4random_netbsd.h"
+
+#elif defined(__APPLE__)
+#include "arc4random_osx.h"
+
+#elif defined(__sun)
+#include "arc4random_solaris.h"
+
+#elif defined(_WIN32)
+#include "arc4random_win.h"
+
+#elif defined(__FE310__)
+#include "arc4random_fe310.h"
+
+#else
+#error "No arc4random hooks defined for this platform."
+
+#endif
+
+#endif
diff --git a/ext/libressl/crypto/compat/arc4random_aix.h b/ext/libressl/crypto/compat/arc4random_aix.h
new file mode 100644
index 0000000..3142a1f
--- /dev/null
+++ b/ext/libressl/crypto/compat/arc4random_aix.h
@@ -0,0 +1,81 @@
+/* $OpenBSD: arc4random_aix.h,v 1.2 2016/06/30 12:19:51 bcook Exp $ */
+
+/*
+ * Copyright (c) 1996, David Mazieres <dm@uun.org>
+ * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
+ * Copyright (c) 2013, Markus Friedl <markus@openbsd.org>
+ * Copyright (c) 2014, Theo de Raadt <deraadt@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Stub functions for portability.
+ */
+
+#include <sys/mman.h>
+
+#include <pthread.h>
+#include <signal.h>
+
+static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER;
+#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx)
+#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx)
+
+#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f))
+
+static inline void
+_getentropy_fail(void)
+{
+ raise(SIGKILL);
+}
+
+static volatile sig_atomic_t _rs_forked;
+
+static inline void
+_rs_forkhandler(void)
+{
+ _rs_forked = 1;
+}
+
+static inline void
+_rs_forkdetect(void)
+{
+ static pid_t _rs_pid = 0;
+ pid_t pid = getpid();
+
+ if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) {
+ _rs_pid = pid;
+ _rs_forked = 0;
+ if (rs)
+ memset(rs, 0, sizeof(*rs));
+ }
+}
+
+static inline int
+_rs_allocate(struct _rs **rsp, struct _rsx **rsxp)
+{
+ if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED)
+ return (-1);
+
+ if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) {
+ munmap(*rsp, sizeof(**rsp));
+ *rsp = NULL;
+ return (-1);
+ }
+
+ _ARC4_ATFORK(_rs_forkhandler);
+ return (0);
+}
diff --git a/ext/libressl/crypto/compat/arc4random_fe310.h b/ext/libressl/crypto/compat/arc4random_fe310.h
new file mode 100644
index 0000000..131b0d3
--- /dev/null
+++ b/ext/libressl/crypto/compat/arc4random_fe310.h
@@ -0,0 +1,27 @@
+#define _ARC4_LOCK() ;
+#define _ARC4_UNLOCK() ;
+
+static inline void
+_getentropy_fail(void)
+{
+ _exit(1);
+}
+
+static inline void
+_rs_forkdetect(void)
+{
+}
+
+int arc4random_alloc(void **rsp, size_t rsp_size, void **rsxp, size_t rsxp_size);
+
+static inline int
+_rs_allocate(struct _rs **rsp, struct _rsx **rsxp)
+{
+ return arc4random_alloc((void **)rsp, sizeof(**rsp), (void **)rsxp, sizeof(**rsxp));
+}
+
+void arc4random_close(void)
+{
+ rs = NULL;
+ rsx = NULL;
+} \ No newline at end of file
diff --git a/ext/libressl/crypto/compat/arc4random_freebsd.h b/ext/libressl/crypto/compat/arc4random_freebsd.h
new file mode 100644
index 0000000..3faa5e4
--- /dev/null
+++ b/ext/libressl/crypto/compat/arc4random_freebsd.h
@@ -0,0 +1,87 @@
+/* $OpenBSD: arc4random_freebsd.h,v 1.4 2016/06/30 12:19:51 bcook Exp $ */
+
+/*
+ * Copyright (c) 1996, David Mazieres <dm@uun.org>
+ * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
+ * Copyright (c) 2013, Markus Friedl <markus@openbsd.org>
+ * Copyright (c) 2014, Theo de Raadt <deraadt@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Stub functions for portability.
+ */
+
+#include <sys/mman.h>
+
+#include <pthread.h>
+#include <signal.h>
+
+static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER;
+#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx)
+#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx)
+
+/*
+ * Unfortunately, pthread_atfork() is broken on FreeBSD (at least 9 and 10) if
+ * a program does not link to -lthr. Callbacks registered with pthread_atfork()
+ * appear to fail silently. So, it is not always possible to detect a PID
+ * wraparound.
+ */
+#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f))
+
+static inline void
+_getentropy_fail(void)
+{
+ raise(SIGKILL);
+}
+
+static volatile sig_atomic_t _rs_forked;
+
+static inline void
+_rs_forkhandler(void)
+{
+ _rs_forked = 1;
+}
+
+static inline void
+_rs_forkdetect(void)
+{
+ static pid_t _rs_pid = 0;
+ pid_t pid = getpid();
+
+ if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) {
+ _rs_pid = pid;
+ _rs_forked = 0;
+ if (rs)
+ memset(rs, 0, sizeof(*rs));
+ }
+}
+
+static inline int
+_rs_allocate(struct _rs **rsp, struct _rsx **rsxp)
+{
+ if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED)
+ return (-1);
+
+ if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) {
+ munmap(*rsp, sizeof(**rsp));
+ *rsp = NULL;
+ return (-1);
+ }
+
+ _ARC4_ATFORK(_rs_forkhandler);
+ return (0);
+}
diff --git a/ext/libressl/crypto/compat/arc4random_hpux.h b/ext/libressl/crypto/compat/arc4random_hpux.h
new file mode 100644
index 0000000..2a3fe8c
--- /dev/null
+++ b/ext/libressl/crypto/compat/arc4random_hpux.h
@@ -0,0 +1,81 @@
+/* $OpenBSD: arc4random_hpux.h,v 1.3 2016/06/30 12:19:51 bcook Exp $ */
+
+/*
+ * Copyright (c) 1996, David Mazieres <dm@uun.org>
+ * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
+ * Copyright (c) 2013, Markus Friedl <markus@openbsd.org>
+ * Copyright (c) 2014, Theo de Raadt <deraadt@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Stub functions for portability.
+ */
+
+#include <sys/mman.h>
+
+#include <pthread.h>
+#include <signal.h>
+
+static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER;
+#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx)
+#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx)
+
+#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f))
+
+static inline void
+_getentropy_fail(void)
+{
+ raise(SIGKILL);
+}
+
+static volatile sig_atomic_t _rs_forked;
+
+static inline void
+_rs_forkhandler(void)
+{
+ _rs_forked = 1;
+}
+
+static inline void
+_rs_forkdetect(void)
+{
+ static pid_t _rs_pid = 0;
+ pid_t pid = getpid();
+
+ if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) {
+ _rs_pid = pid;
+ _rs_forked = 0;
+ if (rs)
+ memset(rs, 0, sizeof(*rs));
+ }
+}
+
+static inline int
+_rs_allocate(struct _rs **rsp, struct _rsx **rsxp)
+{
+ if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED)
+ return (-1);
+
+ if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) {
+ munmap(*rsp, sizeof(**rsp));
+ *rsp = NULL;
+ return (-1);
+ }
+
+ _ARC4_ATFORK(_rs_forkhandler);
+ return (0);
+}
diff --git a/ext/libressl/crypto/compat/arc4random_linux.h b/ext/libressl/crypto/compat/arc4random_linux.h
new file mode 100644
index 0000000..5e1cf34
--- /dev/null
+++ b/ext/libressl/crypto/compat/arc4random_linux.h
@@ -0,0 +1,88 @@
+/* $OpenBSD: arc4random_linux.h,v 1.12 2019/07/11 10:37:28 inoguchi Exp $ */
+
+/*
+ * Copyright (c) 1996, David Mazieres <dm@uun.org>
+ * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
+ * Copyright (c) 2013, Markus Friedl <markus@openbsd.org>
+ * Copyright (c) 2014, Theo de Raadt <deraadt@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Stub functions for portability.
+ */
+
+#include <sys/mman.h>
+
+#include <pthread.h>
+#include <signal.h>
+
+static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER;
+#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx)
+#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx)
+
+#if defined(__GLIBC__) && !(defined(__UCLIBC__) && !defined(__ARCH_USE_MMU__))
+extern void *__dso_handle;
+extern int __register_atfork(void (*)(void), void(*)(void), void (*)(void), void *);
+#define _ARC4_ATFORK(f) __register_atfork(NULL, NULL, (f), __dso_handle)
+#else
+#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f))
+#endif
+
+static inline void
+_getentropy_fail(void)
+{
+ raise(SIGKILL);
+}
+
+static volatile sig_atomic_t _rs_forked;
+
+static inline void
+_rs_forkhandler(void)
+{
+ _rs_forked = 1;
+}
+
+static inline void
+_rs_forkdetect(void)
+{
+ static pid_t _rs_pid = 0;
+ pid_t pid = getpid();
+
+ /* XXX unusual calls to clone() can bypass checks */
+ if (_rs_pid == 0 || _rs_pid == 1 || _rs_pid != pid || _rs_forked) {
+ _rs_pid = pid;
+ _rs_forked = 0;
+ if (rs)
+ memset(rs, 0, sizeof(*rs));
+ }
+}
+
+static inline int
+_rs_allocate(struct _rs **rsp, struct _rsx **rsxp)
+{
+ if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED)
+ return (-1);
+
+ if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) {
+ munmap(*rsp, sizeof(**rsp));
+ *rsp = NULL;
+ return (-1);
+ }
+
+ _ARC4_ATFORK(_rs_forkhandler);
+ return (0);
+}
diff --git a/ext/libressl/crypto/compat/arc4random_netbsd.h b/ext/libressl/crypto/compat/arc4random_netbsd.h
new file mode 100644
index 0000000..611997d
--- /dev/null
+++ b/ext/libressl/crypto/compat/arc4random_netbsd.h
@@ -0,0 +1,87 @@
+/* $OpenBSD: arc4random_netbsd.h,v 1.3 2016/06/30 12:19:51 bcook Exp $ */
+
+/*
+ * Copyright (c) 1996, David Mazieres <dm@uun.org>
+ * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
+ * Copyright (c) 2013, Markus Friedl <markus@openbsd.org>
+ * Copyright (c) 2014, Theo de Raadt <deraadt@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Stub functions for portability.
+ */
+
+#include <sys/mman.h>
+
+#include <pthread.h>
+#include <signal.h>
+
+static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER;
+#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx)
+#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx)
+
+/*
+ * Unfortunately, pthread_atfork() is broken on FreeBSD (at least 9 and 10) if
+ * a program does not link to -lthr. Callbacks registered with pthread_atfork()
+ * appear to fail silently. So, it is not always possible to detect a PID
+ * wraparound.
+ */
+#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f))
+
+static inline void
+_getentropy_fail(void)
+{
+ raise(SIGKILL);
+}
+
+static volatile sig_atomic_t _rs_forked;
+
+static inline void
+_rs_forkhandler(void)
+{
+ _rs_forked = 1;
+}
+
+static inline void
+_rs_forkdetect(void)
+{
+ static pid_t _rs_pid = 0;
+ pid_t pid = getpid();
+
+ if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) {
+ _rs_pid = pid;
+ _rs_forked = 0;
+ if (rs)
+ memset(rs, 0, sizeof(*rs));
+ }
+}
+
+static inline int
+_rs_allocate(struct _rs **rsp, struct _rsx **rsxp)
+{
+ if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED)
+ return (-1);
+
+ if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) {
+ munmap(*rsp, sizeof(**rsp));
+ *rsp = NULL;
+ return (-1);
+ }
+
+ _ARC4_ATFORK(_rs_forkhandler);
+ return (0);
+}
diff --git a/ext/libressl/crypto/compat/arc4random_osx.h b/ext/libressl/crypto/compat/arc4random_osx.h
new file mode 100644
index 0000000..818ae6b
--- /dev/null
+++ b/ext/libressl/crypto/compat/arc4random_osx.h
@@ -0,0 +1,81 @@
+/* $OpenBSD: arc4random_osx.h,v 1.11 2016/06/30 12:19:51 bcook Exp $ */
+
+/*
+ * Copyright (c) 1996, David Mazieres <dm@uun.org>
+ * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
+ * Copyright (c) 2013, Markus Friedl <markus@openbsd.org>
+ * Copyright (c) 2014, Theo de Raadt <deraadt@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Stub functions for portability.
+ */
+
+#include <sys/mman.h>
+
+#include <pthread.h>
+#include <signal.h>
+
+static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER;
+#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx)
+#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx)
+
+#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f))
+
+static inline void
+_getentropy_fail(void)
+{
+ raise(SIGKILL);
+}
+
+static volatile sig_atomic_t _rs_forked;
+
+static inline void
+_rs_forkhandler(void)
+{
+ _rs_forked = 1;
+}
+
+static inline void
+_rs_forkdetect(void)
+{
+ static pid_t _rs_pid = 0;
+ pid_t pid = getpid();
+
+ if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) {
+ _rs_pid = pid;
+ _rs_forked = 0;
+ if (rs)
+ memset(rs, 0, sizeof(*rs));
+ }
+}
+
+static inline int
+_rs_allocate(struct _rs **rsp, struct _rsx **rsxp)
+{
+ if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED)
+ return (-1);
+
+ if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) {
+ munmap(*rsp, sizeof(**rsp));
+ *rsp = NULL;
+ return (-1);
+ }
+
+ _ARC4_ATFORK(_rs_forkhandler);
+ return (0);
+}
diff --git a/ext/libressl/crypto/compat/arc4random_solaris.h b/ext/libressl/crypto/compat/arc4random_solaris.h
new file mode 100644
index 0000000..b1084cd
--- /dev/null
+++ b/ext/libressl/crypto/compat/arc4random_solaris.h
@@ -0,0 +1,81 @@
+/* $OpenBSD: arc4random_solaris.h,v 1.10 2016/06/30 12:19:51 bcook Exp $ */
+
+/*
+ * Copyright (c) 1996, David Mazieres <dm@uun.org>
+ * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
+ * Copyright (c) 2013, Markus Friedl <markus@openbsd.org>
+ * Copyright (c) 2014, Theo de Raadt <deraadt@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Stub functions for portability.
+ */
+
+#include <sys/mman.h>
+
+#include <pthread.h>
+#include <signal.h>
+
+static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER;
+#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx)
+#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx)
+
+#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f))
+
+static inline void
+_getentropy_fail(void)
+{
+ raise(SIGKILL);
+}
+
+static volatile sig_atomic_t _rs_forked;
+
+static inline void
+_rs_forkhandler(void)
+{
+ _rs_forked = 1;
+}
+
+static inline void
+_rs_forkdetect(void)
+{
+ static pid_t _rs_pid = 0;
+ pid_t pid = getpid();
+
+ if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) {
+ _rs_pid = pid;
+ _rs_forked = 0;
+ if (rs)
+ memset(rs, 0, sizeof(*rs));
+ }
+}
+
+static inline int
+_rs_allocate(struct _rs **rsp, struct _rsx **rsxp)
+{
+ if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED)
+ return (-1);
+
+ if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) {
+ munmap(*rsp, sizeof(**rsp));
+ *rsp = NULL;
+ return (-1);
+ }
+
+ _ARC4_ATFORK(_rs_forkhandler);
+ return (0);
+}
diff --git a/ext/libressl/crypto/compat/arc4random_uniform.c b/ext/libressl/crypto/compat/arc4random_uniform.c
new file mode 100644
index 0000000..06cd29c
--- /dev/null
+++ b/ext/libressl/crypto/compat/arc4random_uniform.c
@@ -0,0 +1,56 @@
+/* $OpenBSD: arc4random_uniform.c,v 1.3 2019/01/20 02:59:07 bcook Exp $ */
+
+/*
+ * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+
+/*
+ * Calculate a uniformly distributed random number less than upper_bound
+ * avoiding "modulo bias".
+ *
+ * Uniformity is achieved by generating new random numbers until the one
+ * returned is outside the range [0, 2**32 % upper_bound). This
+ * guarantees the selected random number will be inside
+ * [2**32 % upper_bound, 2**32) which maps back to [0, upper_bound)
+ * after reduction modulo upper_bound.
+ */
+uint32_t
+arc4random_uniform(uint32_t upper_bound)
+{
+ uint32_t r, min;
+
+ if (upper_bound < 2)
+ return 0;
+
+ /* 2**32 % x == (2**32 - x) % x */
+ min = -upper_bound % upper_bound;
+
+ /*
+ * This could theoretically loop forever but each retry has
+ * p > 0.5 (worst case, usually far better) of selecting a
+ * number inside the range we need, so it should rarely need
+ * to re-roll.
+ */
+ for (;;) {
+ r = arc4random();
+ if (r >= min)
+ break;
+ }
+
+ return r % upper_bound;
+}
diff --git a/ext/libressl/crypto/compat/arc4random_win.h b/ext/libressl/crypto/compat/arc4random_win.h
new file mode 100644
index 0000000..deec8a1
--- /dev/null
+++ b/ext/libressl/crypto/compat/arc4random_win.h
@@ -0,0 +1,78 @@
+/* $OpenBSD: arc4random_win.h,v 1.6 2016/06/30 12:17:29 bcook Exp $ */
+
+/*
+ * Copyright (c) 1996, David Mazieres <dm@uun.org>
+ * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
+ * Copyright (c) 2013, Markus Friedl <markus@openbsd.org>
+ * Copyright (c) 2014, Theo de Raadt <deraadt@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Stub functions for portability.
+ */
+
+#include <windows.h>
+
+static volatile HANDLE arc4random_mtx = NULL;
+
+/*
+ * Initialize the mutex on the first lock attempt. On collision, each thread
+ * will attempt to allocate a mutex and compare-and-swap it into place as the
+ * global mutex. On failure to swap in the global mutex, the mutex is closed.
+ */
+#define _ARC4_LOCK() { \
+ if (!arc4random_mtx) { \
+ HANDLE p = CreateMutex(NULL, FALSE, NULL); \
+ if (InterlockedCompareExchangePointer((void **)&arc4random_mtx, (void *)p, NULL)) \
+ CloseHandle(p); \
+ } \
+ WaitForSingleObject(arc4random_mtx, INFINITE); \
+} \
+
+#define _ARC4_UNLOCK() ReleaseMutex(arc4random_mtx)
+
+static inline void
+_getentropy_fail(void)
+{
+ TerminateProcess(GetCurrentProcess(), 0);
+}
+
+static inline int
+_rs_allocate(struct _rs **rsp, struct _rsx **rsxp)
+{
+ *rsp = VirtualAlloc(NULL, sizeof(**rsp),
+ MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+ if (*rsp == NULL)
+ return (-1);
+
+ *rsxp = VirtualAlloc(NULL, sizeof(**rsxp),
+ MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+ if (*rsxp == NULL) {
+ VirtualFree(*rsp, 0, MEM_RELEASE);
+ *rsp = NULL;
+ return (-1);
+ }
+ return (0);
+}
+
+static inline void
+_rs_forkhandler(void)
+{
+}
+
+static inline void
+_rs_forkdetect(void)
+{
+}
diff --git a/ext/libressl/crypto/compat/chacha_private.h b/ext/libressl/crypto/compat/chacha_private.h
new file mode 100644
index 0000000..7c3680f
--- /dev/null
+++ b/ext/libressl/crypto/compat/chacha_private.h
@@ -0,0 +1,222 @@
+/*
+chacha-merged.c version 20080118
+D. J. Bernstein
+Public domain.
+*/
+
+/* $OpenBSD: chacha_private.h,v 1.2 2013/10/04 07:02:27 djm Exp $ */
+
+typedef unsigned char u8;
+typedef unsigned int u32;
+
+typedef struct
+{
+ u32 input[16]; /* could be compressed */
+} chacha_ctx;
+
+#define U8C(v) (v##U)
+#define U32C(v) (v##U)
+
+#define U8V(v) ((u8)(v) & U8C(0xFF))
+#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
+
+#define ROTL32(v, n) \
+ (U32V((v) << (n)) | ((v) >> (32 - (n))))
+
+#define U8TO32_LITTLE(p) \
+ (((u32)((p)[0]) ) | \
+ ((u32)((p)[1]) << 8) | \
+ ((u32)((p)[2]) << 16) | \
+ ((u32)((p)[3]) << 24))
+
+#define U32TO8_LITTLE(p, v) \
+ do { \
+ (p)[0] = U8V((v) ); \
+ (p)[1] = U8V((v) >> 8); \
+ (p)[2] = U8V((v) >> 16); \
+ (p)[3] = U8V((v) >> 24); \
+ } while (0)
+
+#define ROTATE(v,c) (ROTL32(v,c))
+#define XOR(v,w) ((v) ^ (w))
+#define PLUS(v,w) (U32V((v) + (w)))
+#define PLUSONE(v) (PLUS((v),1))
+
+#define QUARTERROUND(a,b,c,d) \
+ a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
+ c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
+ a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
+ c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
+
+static const char sigma[16] = "expand 32-byte k";
+static const char tau[16] = "expand 16-byte k";
+
+static void
+chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits,u32 ivbits)
+{
+ const char *constants;
+
+ x->input[4] = U8TO32_LITTLE(k + 0);
+ x->input[5] = U8TO32_LITTLE(k + 4);
+ x->input[6] = U8TO32_LITTLE(k + 8);
+ x->input[7] = U8TO32_LITTLE(k + 12);
+ if (kbits == 256) { /* recommended */
+ k += 16;
+ constants = sigma;
+ } else { /* kbits == 128 */
+ constants = tau;
+ }
+ x->input[8] = U8TO32_LITTLE(k + 0);
+ x->input[9] = U8TO32_LITTLE(k + 4);
+ x->input[10] = U8TO32_LITTLE(k + 8);
+ x->input[11] = U8TO32_LITTLE(k + 12);
+ x->input[0] = U8TO32_LITTLE(constants + 0);
+ x->input[1] = U8TO32_LITTLE(constants + 4);
+ x->input[2] = U8TO32_LITTLE(constants + 8);
+ x->input[3] = U8TO32_LITTLE(constants + 12);
+}
+
+static void
+chacha_ivsetup(chacha_ctx *x,const u8 *iv)
+{
+ x->input[12] = 0;
+ x->input[13] = 0;
+ x->input[14] = U8TO32_LITTLE(iv + 0);
+ x->input[15] = U8TO32_LITTLE(iv + 4);
+}
+
+static void
+chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
+{
+ u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
+ u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
+ u8 *ctarget = NULL;
+ u8 tmp[64];
+ u_int i;
+
+ if (!bytes) return;
+
+ j0 = x->input[0];
+ j1 = x->input[1];
+ j2 = x->input[2];
+ j3 = x->input[3];
+ j4 = x->input[4];
+ j5 = x->input[5];
+ j6 = x->input[6];
+ j7 = x->input[7];
+ j8 = x->input[8];
+ j9 = x->input[9];
+ j10 = x->input[10];
+ j11 = x->input[11];
+ j12 = x->input[12];
+ j13 = x->input[13];
+ j14 = x->input[14];
+ j15 = x->input[15];
+
+ for (;;) {
+ if (bytes < 64) {
+ for (i = 0;i < bytes;++i) tmp[i] = m[i];
+ m = tmp;
+ ctarget = c;
+ c = tmp;
+ }
+ x0 = j0;
+ x1 = j1;
+ x2 = j2;
+ x3 = j3;
+ x4 = j4;
+ x5 = j5;
+ x6 = j6;
+ x7 = j7;
+ x8 = j8;
+ x9 = j9;
+ x10 = j10;
+ x11 = j11;
+ x12 = j12;
+ x13 = j13;
+ x14 = j14;
+ x15 = j15;
+ for (i = 20;i > 0;i -= 2) {
+ QUARTERROUND( x0, x4, x8,x12)
+ QUARTERROUND( x1, x5, x9,x13)
+ QUARTERROUND( x2, x6,x10,x14)
+ QUARTERROUND( x3, x7,x11,x15)
+ QUARTERROUND( x0, x5,x10,x15)
+ QUARTERROUND( x1, x6,x11,x12)
+ QUARTERROUND( x2, x7, x8,x13)
+ QUARTERROUND( x3, x4, x9,x14)
+ }
+ x0 = PLUS(x0,j0);
+ x1 = PLUS(x1,j1);
+ x2 = PLUS(x2,j2);
+ x3 = PLUS(x3,j3);
+ x4 = PLUS(x4,j4);
+ x5 = PLUS(x5,j5);
+ x6 = PLUS(x6,j6);
+ x7 = PLUS(x7,j7);
+ x8 = PLUS(x8,j8);
+ x9 = PLUS(x9,j9);
+ x10 = PLUS(x10,j10);
+ x11 = PLUS(x11,j11);
+ x12 = PLUS(x12,j12);
+ x13 = PLUS(x13,j13);
+ x14 = PLUS(x14,j14);
+ x15 = PLUS(x15,j15);
+
+#ifndef KEYSTREAM_ONLY
+ x0 = XOR(x0,U8TO32_LITTLE(m + 0));
+ x1 = XOR(x1,U8TO32_LITTLE(m + 4));
+ x2 = XOR(x2,U8TO32_LITTLE(m + 8));
+ x3 = XOR(x3,U8TO32_LITTLE(m + 12));
+ x4 = XOR(x4,U8TO32_LITTLE(m + 16));
+ x5 = XOR(x5,U8TO32_LITTLE(m + 20));
+ x6 = XOR(x6,U8TO32_LITTLE(m + 24));
+ x7 = XOR(x7,U8TO32_LITTLE(m + 28));
+ x8 = XOR(x8,U8TO32_LITTLE(m + 32));
+ x9 = XOR(x9,U8TO32_LITTLE(m + 36));
+ x10 = XOR(x10,U8TO32_LITTLE(m + 40));
+ x11 = XOR(x11,U8TO32_LITTLE(m + 44));
+ x12 = XOR(x12,U8TO32_LITTLE(m + 48));
+ x13 = XOR(x13,U8TO32_LITTLE(m + 52));
+ x14 = XOR(x14,U8TO32_LITTLE(m + 56));
+ x15 = XOR(x15,U8TO32_LITTLE(m + 60));
+#endif
+
+ j12 = PLUSONE(j12);
+ if (!j12) {
+ j13 = PLUSONE(j13);
+ /* stopping at 2^70 bytes per nonce is user's responsibility */
+ }
+
+ U32TO8_LITTLE(c + 0,x0);
+ U32TO8_LITTLE(c + 4,x1);
+ U32TO8_LITTLE(c + 8,x2);
+ U32TO8_LITTLE(c + 12,x3);
+ U32TO8_LITTLE(c + 16,x4);
+ U32TO8_LITTLE(c + 20,x5);
+ U32TO8_LITTLE(c + 24,x6);
+ U32TO8_LITTLE(c + 28,x7);
+ U32TO8_LITTLE(c + 32,x8);
+ U32TO8_LITTLE(c + 36,x9);
+ U32TO8_LITTLE(c + 40,x10);
+ U32TO8_LITTLE(c + 44,x11);
+ U32TO8_LITTLE(c + 48,x12);
+ U32TO8_LITTLE(c + 52,x13);
+ U32TO8_LITTLE(c + 56,x14);
+ U32TO8_LITTLE(c + 60,x15);
+
+ if (bytes <= 64) {
+ if (bytes < 64) {
+ for (i = 0;i < bytes;++i) ctarget[i] = c[i];
+ }
+ x->input[12] = j12;
+ x->input[13] = j13;
+ return;
+ }
+ bytes -= 64;
+ c += 64;
+#ifndef KEYSTREAM_ONLY
+ m += 64;
+#endif
+ }
+}
diff --git a/ext/libressl/crypto/compat/explicit_bzero.c b/ext/libressl/crypto/compat/explicit_bzero.c
new file mode 100644
index 0000000..5dd0103
--- /dev/null
+++ b/ext/libressl/crypto/compat/explicit_bzero.c
@@ -0,0 +1,19 @@
+/* $OpenBSD: explicit_bzero.c,v 1.4 2015/08/31 02:53:57 guenther Exp $ */
+/*
+ * Public domain.
+ * Written by Matthew Dempsky.
+ */
+
+#include <string.h>
+
+__attribute__((weak)) void
+__explicit_bzero_hook(void *buf, size_t len)
+{
+}
+
+void
+explicit_bzero(void *buf, size_t len)
+{
+ memset(buf, 0, len);
+ __explicit_bzero_hook(buf, len);
+}
diff --git a/ext/libressl/crypto/compat/explicit_bzero_win.c b/ext/libressl/crypto/compat/explicit_bzero_win.c
new file mode 100644
index 0000000..0d09d90
--- /dev/null
+++ b/ext/libressl/crypto/compat/explicit_bzero_win.c
@@ -0,0 +1,13 @@
+/*
+ * Public domain.
+ * Win32 explicit_bzero compatibility shim.
+ */
+
+#include <windows.h>
+#include <string.h>
+
+void
+explicit_bzero(void *buf, size_t len)
+{
+ SecureZeroMemory(buf, len);
+}
diff --git a/ext/libressl/crypto/compat/getentropy_aix.c b/ext/libressl/crypto/compat/getentropy_aix.c
new file mode 100644
index 0000000..422e685
--- /dev/null
+++ b/ext/libressl/crypto/compat/getentropy_aix.c
@@ -0,0 +1,402 @@
+/* $OpenBSD: getentropy_aix.c,v 1.7 2020/05/17 14:44:20 deraadt Exp $ */
+
+/*
+ * Copyright (c) 2015 Michael Felt <aixtools@gmail.com>
+ * Copyright (c) 2014 Theo de Raadt <deraadt@openbsd.org>
+ * Copyright (c) 2014 Bob Beck <beck@obtuse.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Emulation of getentropy(2) as documented at:
+ * http://man.openbsd.org/getentropy.2
+ */
+/*
+ * -lperfstat is needed for the psuedo entropy data
+ */
+
+#include <sys/mman.h>
+#include <sys/procfs.h>
+#include <sys/protosw.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/statvfs.h>
+#include <sys/timers.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <termios.h>
+
+#include <openssl/sha.h>
+
+#include <libperfstat.h>
+
+#define REPEAT 5
+#define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
+
+#define HX(a, b) \
+ do { \
+ if ((a)) \
+ HD(errno); \
+ else \
+ HD(b); \
+ } while (0)
+
+#define HR(x, l) (SHA512_Update(&ctx, (char *)(x), (l)))
+#define HD(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (x)))
+#define HF(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (void*)))
+
+int getentropy(void *buf, size_t len);
+
+static int getentropy_urandom(void *buf, size_t len, const char *path,
+ int devfscheck);
+static int getentropy_fallback(void *buf, size_t len);
+
+int
+getentropy(void *buf, size_t len)
+{
+ int ret = -1;
+
+ if (len > 256) {
+ errno = EIO;
+ return (-1);
+ }
+
+ /*
+ * Try to get entropy with /dev/urandom
+ */
+ ret = getentropy_urandom(buf, len, "/dev/urandom", 0);
+ if (ret != -1)
+ return (ret);
+
+ /*
+ * Entropy collection via /dev/urandom has failed.
+ *
+ * No other API exists for collecting entropy, and we have
+ * no failsafe way to get it on AIX that is not sensitive
+ * to resource exhaustion.
+ *
+ * We have very few options:
+ * - Even syslog_r is unsafe to call at this low level, so
+ * there is no way to alert the user or program.
+ * - Cannot call abort() because some systems have unsafe
+ * corefiles.
+ * - Could raise(SIGKILL) resulting in silent program termination.
+ * - Return EIO, to hint that arc4random's stir function
+ * should raise(SIGKILL)
+ * - Do the best under the circumstances....
+ *
+ * This code path exists to bring light to the issue that AIX
+ * does not provide a failsafe API for entropy collection.
+ *
+ * We hope this demonstrates that AIX should consider
+ * providing a new failsafe API which works in a chroot or
+ * when file descriptors are exhausted.
+ */
+#undef FAIL_INSTEAD_OF_TRYING_FALLBACK
+#ifdef FAIL_INSTEAD_OF_TRYING_FALLBACK
+ raise(SIGKILL);
+#endif
+ ret = getentropy_fallback(buf, len);
+ if (ret != -1)
+ return (ret);
+
+ errno = EIO;
+ return (ret);
+}
+
+static int
+getentropy_urandom(void *buf, size_t len, const char *path, int devfscheck)
+{
+ struct stat st;
+ size_t i;
+ int fd, flags;
+ int save_errno = errno;
+
+start:
+
+ flags = O_RDONLY;
+#ifdef O_NOFOLLOW
+ flags |= O_NOFOLLOW;
+#endif
+#ifdef O_CLOEXEC
+ flags |= O_CLOEXEC;
+#endif
+ fd = open(path, flags, 0);
+ if (fd == -1) {
+ if (errno == EINTR)
+ goto start;
+ goto nodevrandom;
+ }
+#ifndef O_CLOEXEC
+ fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+#endif
+
+ /* Lightly verify that the device node looks sane */
+ if (fstat(fd, &st) == -1 || !S_ISCHR(st.st_mode)) {
+ close(fd);
+ goto nodevrandom;
+ }
+ for (i = 0; i < len; ) {
+ size_t wanted = len - i;
+ ssize_t ret = read(fd, (char *)buf + i, wanted);
+
+ if (ret == -1) {
+ if (errno == EAGAIN || errno == EINTR)
+ continue;
+ close(fd);
+ goto nodevrandom;
+ }
+ i += ret;
+ }
+ close(fd);
+ errno = save_errno;
+ return (0); /* satisfied */
+nodevrandom:
+ errno = EIO;
+ return (-1);
+}
+
+static const int cl[] = {
+ CLOCK_REALTIME,
+#ifdef CLOCK_MONOTONIC
+ CLOCK_MONOTONIC,
+#endif
+#ifdef CLOCK_MONOTONIC_RAW
+ CLOCK_MONOTONIC_RAW,
+#endif
+#ifdef CLOCK_TAI
+ CLOCK_TAI,
+#endif
+#ifdef CLOCK_VIRTUAL
+ CLOCK_VIRTUAL,
+#endif
+#ifdef CLOCK_UPTIME
+ CLOCK_UPTIME,
+#endif
+#ifdef CLOCK_PROCESS_CPUTIME_ID
+ CLOCK_PROCESS_CPUTIME_ID,
+#endif
+#ifdef CLOCK_THREAD_CPUTIME_ID
+ CLOCK_THREAD_CPUTIME_ID,
+#endif
+};
+
+static int
+getentropy_fallback(void *buf, size_t len)
+{
+ uint8_t results[SHA512_DIGEST_LENGTH];
+ int save_errno = errno, e, pgs = sysconf(_SC_PAGESIZE), faster = 0, repeat;
+ static int cnt;
+ struct timespec ts;
+ struct timeval tv;
+ perfstat_cpu_total_t cpustats;
+#ifdef _AIX61
+ perfstat_cpu_total_wpar_t cpustats_wpar;
+#endif
+ perfstat_partition_total_t lparstats;
+ perfstat_disk_total_t diskinfo;
+ perfstat_netinterface_total_t netinfo;
+ struct rusage ru;
+ sigset_t sigset;
+ struct stat st;
+ SHA512_CTX ctx;
+ static pid_t lastpid;
+ pid_t pid;
+ size_t i, ii, m;
+ char *p;
+
+ pid = getpid();
+ if (lastpid == pid) {
+ faster = 1;
+ repeat = 2;
+ } else {
+ faster = 0;
+ lastpid = pid;
+ repeat = REPEAT;
+ }
+ for (i = 0; i < len; ) {
+ int j;
+ SHA512_Init(&ctx);
+ for (j = 0; j < repeat; j++) {
+ HX((e = gettimeofday(&tv, NULL)) == -1, tv);
+ if (e != -1) {
+ cnt += (int)tv.tv_sec;
+ cnt += (int)tv.tv_usec;
+ }
+
+ HX(perfstat_cpu_total(NULL, &cpustats,
+ sizeof(cpustats), 1) == -1, cpustats);
+
+#ifdef _AIX61
+ HX(perfstat_cpu_total_wpar(NULL, &cpustats_wpar,
+ sizeof(cpustats_wpar), 1) == -1, cpustats_wpar);
+#endif
+
+ HX(perfstat_partition_total(NULL, &lparstats,
+ sizeof(lparstats), 1) == -1, lparstats);
+
+ HX(perfstat_disk_total(NULL, &diskinfo,
+ sizeof(diskinfo), 1) == -1, diskinfo);
+
+ HX(perfstat_netinterface_total(NULL, &netinfo,
+ sizeof(netinfo), 1) == -1, netinfo);
+
+ for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]); ii++)
+ HX(clock_gettime(cl[ii], &ts) == -1, ts);
+
+ HX((pid = getpid()) == -1, pid);
+ HX((pid = getsid(pid)) == -1, pid);
+ HX((pid = getppid()) == -1, pid);
+ HX((pid = getpgid(0)) == -1, pid);
+ HX((e = getpriority(0, 0)) == -1, e);
+
+ if (!faster) {
+ ts.tv_sec = 0;
+ ts.tv_nsec = 1;
+ (void) nanosleep(&ts, NULL);
+ }
+
+ HX(sigpending(&sigset) == -1, sigset);
+ HX(sigprocmask(SIG_BLOCK, NULL, &sigset) == -1,
+ sigset);
+
+ HF(getentropy); /* an addr in this library */
+ HF(printf); /* an addr in libc */
+ p = (char *)&p;
+ HD(p); /* an addr on stack */
+ p = (char *)&errno;
+ HD(p); /* the addr of errno */
+
+ if (i == 0) {
+ struct sockaddr_storage ss;
+ struct statvfs stvfs;
+ struct termios tios;
+ socklen_t ssl;
+ off_t off;
+
+ /*
+ * Prime-sized mappings encourage fragmentation;
+ * thus exposing some address entropy.
+ */
+ struct mm {
+ size_t npg;
+ void *p;
+ } mm[] = {
+ { 17, MAP_FAILED }, { 3, MAP_FAILED },
+ { 11, MAP_FAILED }, { 2, MAP_FAILED },
+ { 5, MAP_FAILED }, { 3, MAP_FAILED },
+ { 7, MAP_FAILED }, { 1, MAP_FAILED },
+ { 57, MAP_FAILED }, { 3, MAP_FAILED },
+ { 131, MAP_FAILED }, { 1, MAP_FAILED },
+ };
+
+ for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) {
+ HX(mm[m].p = mmap(NULL,
+ mm[m].npg * pgs,
+ PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANON, -1,
+ (off_t)0), mm[m].p);
+ if (mm[m].p != MAP_FAILED) {
+ size_t mo;
+
+ /* Touch some memory... */
+ p = mm[m].p;
+ mo = cnt %
+ (mm[m].npg * pgs - 1);
+ p[mo] = 1;
+ cnt += (int)((long)(mm[m].p)
+ / pgs);
+ }
+
+ /* Check cnts and times... */
+ for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]);
+ ii++) {
+ HX((e = clock_gettime(cl[ii],
+ &ts)) == -1, ts);
+ if (e != -1)
+ cnt += (int)ts.tv_nsec;
+ }
+
+ HX((e = getrusage(RUSAGE_SELF,
+ &ru)) == -1, ru);
+ if (e != -1) {
+ cnt += (int)ru.ru_utime.tv_sec;
+ cnt += (int)ru.ru_utime.tv_usec;
+ }
+ }
+
+ for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) {
+ if (mm[m].p != MAP_FAILED)
+ munmap(mm[m].p, mm[m].npg * pgs);
+ mm[m].p = MAP_FAILED;
+ }
+
+ HX(stat(".", &st) == -1, st);
+ HX(statvfs(".", &stvfs) == -1, stvfs);
+
+ HX(stat("/", &st) == -1, st);
+ HX(statvfs("/", &stvfs) == -1, stvfs);
+
+ HX((e = fstat(0, &st)) == -1, st);
+ if (e == -1) {
+ if (S_ISREG(st.st_mode) ||
+ S_ISFIFO(st.st_mode) ||
+ S_ISSOCK(st.st_mode)) {
+ HX(fstatvfs(0, &stvfs) == -1,
+ stvfs);
+ HX((off = lseek(0, (off_t)0,
+ SEEK_CUR)) < 0, off);
+ }
+ if (S_ISCHR(st.st_mode)) {
+ HX(tcgetattr(0, &tios) == -1,
+ tios);
+ } else if (S_ISSOCK(st.st_mode)) {
+ memset(&ss, 0, sizeof ss);
+ ssl = sizeof(ss);
+ HX(getpeername(0,
+ (void *)&ss, &ssl) == -1,
+ ss);
+ }
+ }
+
+ HX((e = getrusage(RUSAGE_CHILDREN,
+ &ru)) == -1, ru);
+ if (e != -1) {
+ cnt += (int)ru.ru_utime.tv_sec;
+ cnt += (int)ru.ru_utime.tv_usec;
+ }
+ } else {
+ /* Subsequent hashes absorb previous result */
+ HD(results);
+ }
+
+ HX((e = gettimeofday(&tv, NULL)) == -1, tv);
+ if (e != -1) {
+ cnt += (int)tv.tv_sec;
+ cnt += (int)tv.tv_usec;
+ }
+
+ HD(cnt);
+ }
+ SHA512_Final(results, &ctx);
+ memcpy((char *)buf + i, results, MINIMUM(sizeof(results), len - i));
+ i += MINIMUM(sizeof(results), len - i);
+ }
+ explicit_bzero(&ctx, sizeof ctx);
+ explicit_bzero(results, sizeof results);
+ errno = save_errno;
+ return (0); /* satisfied */
+}
diff --git a/ext/libressl/crypto/compat/getentropy_freebsd.c b/ext/libressl/crypto/compat/getentropy_freebsd.c
new file mode 100644
index 0000000..ea90ffe
--- /dev/null
+++ b/ext/libressl/crypto/compat/getentropy_freebsd.c
@@ -0,0 +1,60 @@
+/* $OpenBSD: getentropy_freebsd.c,v 1.4 2020/10/12 22:08:33 deraadt Exp $ */
+
+/*
+ * Copyright (c) 2014 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * Copyright (c) 2014 Brent Cook <bcook@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Emulation of getentropy(2) as documented at:
+ * http://man.openbsd.org/getentropy.2
+ */
+
+#include <sys/types.h>
+#include <sys/sysctl.h>
+
+#include <errno.h>
+#include <stddef.h>
+
+/*
+ * Derived from lib/libc/gen/arc4random.c from FreeBSD.
+ */
+static size_t
+getentropy_sysctl(u_char *buf, size_t size)
+{
+ const int mib[2] = { CTL_KERN, KERN_ARND };
+ size_t len, done;
+
+ done = 0;
+
+ do {
+ len = size;
+ if (sysctl(mib, 2, buf, &len, NULL, 0) == -1)
+ return (done);
+ done += len;
+ buf += len;
+ size -= len;
+ } while (size > 0);
+
+ return (done);
+}
+
+int
+getentropy(void *buf, size_t len)
+{
+ if (len <= 256 && getentropy_sysctl(buf, len) == len)
+ return (0);
+
+ errno = EIO;
+ return (-1);
+}
diff --git a/ext/libressl/crypto/compat/getentropy_hpux.c b/ext/libressl/crypto/compat/getentropy_hpux.c
new file mode 100644
index 0000000..c981880
--- /dev/null
+++ b/ext/libressl/crypto/compat/getentropy_hpux.c
@@ -0,0 +1,396 @@
+/* $OpenBSD: getentropy_hpux.c,v 1.7 2020/05/17 14:44:20 deraadt Exp $ */
+
+/*
+ * Copyright (c) 2014 Theo de Raadt <deraadt@openbsd.org>
+ * Copyright (c) 2014 Bob Beck <beck@obtuse.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Emulation of getentropy(2) as documented at:
+ * http://man.openbsd.org/getentropy.2
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/statvfs.h>
+#include <sys/socket.h>
+#include <sys/mount.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <termios.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <time.h>
+#include <openssl/sha.h>
+
+#include <sys/vfs.h>
+
+#include <sys/pstat.h>
+
+#define REPEAT 5
+#define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
+
+#define HX(a, b) \
+ do { \
+ if ((a)) \
+ HD(errno); \
+ else \
+ HD(b); \
+ } while (0)
+
+#define HR(x, l) (SHA512_Update(&ctx, (char *)(x), (l)))
+#define HD(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (x)))
+#define HF(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (void*)))
+
+int getentropy(void *buf, size_t len);
+
+static int getentropy_urandom(void *buf, size_t len, const char *path,
+ int devfscheck);
+static int getentropy_fallback(void *buf, size_t len);
+
+int
+getentropy(void *buf, size_t len)
+{
+ int ret = -1;
+
+ if (len > 256) {
+ errno = EIO;
+ return (-1);
+ }
+
+ /*
+ * Try to get entropy with /dev/urandom
+ */
+ ret = getentropy_urandom(buf, len, "/dev/urandom", 0);
+ if (ret != -1)
+ return (ret);
+
+ /*
+ * Entropy collection via /dev/urandom has failed.
+ *
+ * No other API exists for collecting entropy, and we have
+ * no failsafe way to get it on hpux that is not sensitive
+ * to resource exhaustion.
+ *
+ * We have very few options:
+ * - Even syslog_r is unsafe to call at this low level, so
+ * there is no way to alert the user or program.
+ * - Cannot call abort() because some systems have unsafe
+ * corefiles.
+ * - Could raise(SIGKILL) resulting in silent program termination.
+ * - Return EIO, to hint that arc4random's stir function
+ * should raise(SIGKILL)
+ * - Do the best under the circumstances....
+ *
+ * This code path exists to bring light to the issue that hpux
+ * does not provide a failsafe API for entropy collection.
+ *
+ * We hope this demonstrates that hpux should consider
+ * providing a new failsafe API which works in a chroot or
+ * when file descriptors are exhausted.
+ */
+#undef FAIL_INSTEAD_OF_TRYING_FALLBACK
+#ifdef FAIL_INSTEAD_OF_TRYING_FALLBACK
+ raise(SIGKILL);
+#endif
+ ret = getentropy_fallback(buf, len);
+ if (ret != -1)
+ return (ret);
+
+ errno = EIO;
+ return (ret);
+}
+
+static int
+getentropy_urandom(void *buf, size_t len, const char *path, int devfscheck)
+{
+ struct stat st;
+ size_t i;
+ int fd, flags;
+ int save_errno = errno;
+
+start:
+
+ flags = O_RDONLY;
+#ifdef O_NOFOLLOW
+ flags |= O_NOFOLLOW;
+#endif
+#ifdef O_CLOEXEC
+ flags |= O_CLOEXEC;
+#endif
+ fd = open(path, flags, 0);
+ if (fd == -1) {
+ if (errno == EINTR)
+ goto start;
+ goto nodevrandom;
+ }
+#ifndef O_CLOEXEC
+ fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+#endif
+
+ /* Lightly verify that the device node looks sane */
+ if (fstat(fd, &st) == -1 || !S_ISCHR(st.st_mode)) {
+ close(fd);
+ goto nodevrandom;
+ }
+ for (i = 0; i < len; ) {
+ size_t wanted = len - i;
+ ssize_t ret = read(fd, (char *)buf + i, wanted);
+
+ if (ret == -1) {
+ if (errno == EAGAIN || errno == EINTR)
+ continue;
+ close(fd);
+ goto nodevrandom;
+ }
+ i += ret;
+ }
+ close(fd);
+ errno = save_errno;
+ return (0); /* satisfied */
+nodevrandom:
+ errno = EIO;
+ return (-1);
+}
+
+static const int cl[] = {
+ CLOCK_REALTIME,
+#ifdef CLOCK_MONOTONIC
+ CLOCK_MONOTONIC,
+#endif
+#ifdef CLOCK_MONOTONIC_RAW
+ CLOCK_MONOTONIC_RAW,
+#endif
+#ifdef CLOCK_TAI
+ CLOCK_TAI,
+#endif
+#ifdef CLOCK_VIRTUAL
+ CLOCK_VIRTUAL,
+#endif
+#ifdef CLOCK_UPTIME
+ CLOCK_UPTIME,
+#endif
+#ifdef CLOCK_PROCESS_CPUTIME_ID
+ CLOCK_PROCESS_CPUTIME_ID,
+#endif
+#ifdef CLOCK_THREAD_CPUTIME_ID
+ CLOCK_THREAD_CPUTIME_ID,
+#endif
+};
+
+static int
+getentropy_fallback(void *buf, size_t len)
+{
+ uint8_t results[SHA512_DIGEST_LENGTH];
+ int save_errno = errno, e, pgs = sysconf(_SC_PAGESIZE), faster = 0, repeat;
+ static int cnt;
+ struct timespec ts;
+ struct timeval tv;
+ struct pst_vminfo pvi;
+ struct pst_vm_status pvs;
+ struct pst_dynamic pdy;
+ struct rusage ru;
+ sigset_t sigset;
+ struct stat st;
+ SHA512_CTX ctx;
+ static pid_t lastpid;
+ pid_t pid;
+ size_t i, ii, m;
+ char *p;
+
+ pid = getpid();
+ if (lastpid == pid) {
+ faster = 1;
+ repeat = 2;
+ } else {
+ faster = 0;
+ lastpid = pid;
+ repeat = REPEAT;
+ }
+ for (i = 0; i < len; ) {
+ int j;
+ SHA512_Init(&ctx);
+ for (j = 0; j < repeat; j++) {
+ HX((e = gettimeofday(&tv, NULL)) == -1, tv);
+ if (e != -1) {
+ cnt += (int)tv.tv_sec;
+ cnt += (int)tv.tv_usec;
+ }
+
+ HX(pstat_getvminfo(&pvi, sizeof(pvi), 1, 0) != 1, pvi);
+ HX(pstat_getprocvm(&pvs, sizeof(pvs), 0, 0) != 1, pvs);
+
+ for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]); ii++)
+ HX(clock_gettime(cl[ii], &ts) == -1, ts);
+
+ HX((pid = getpid()) == -1, pid);
+ HX((pid = getsid(pid)) == -1, pid);
+ HX((pid = getppid()) == -1, pid);
+ HX((pid = getpgid(0)) == -1, pid);
+ HX((e = getpriority(0, 0)) == -1, e);
+
+ if(pstat_getdynamic(&pdy, sizeof(pdy), 1, 0) != 1) {
+ HD(errno);
+ } else {
+ HD(pdy.psd_avg_1_min);
+ HD(pdy.psd_avg_5_min);
+ HD(pdy.psd_avg_15_min);
+ }
+
+ if (!faster) {
+ ts.tv_sec = 0;
+ ts.tv_nsec = 1;
+ (void) nanosleep(&ts, NULL);
+ }
+
+ HX(sigpending(&sigset) == -1, sigset);
+ HX(sigprocmask(SIG_BLOCK, NULL, &sigset) == -1,
+ sigset);
+
+ HF(getentropy); /* an addr in this library */
+ HF(printf); /* an addr in libc */
+ p = (char *)&p;
+ HD(p); /* an addr on stack */
+ p = (char *)&errno;
+ HD(p); /* the addr of errno */
+
+ if (i == 0) {
+ struct sockaddr_storage ss;
+ struct statvfs stvfs;
+ struct termios tios;
+ socklen_t ssl;
+ off_t off;
+
+ /*
+ * Prime-sized mappings encourage fragmentation;
+ * thus exposing some address entropy.
+ */
+ struct mm {
+ size_t npg;
+ void *p;
+ } mm[] = {
+ { 17, MAP_FAILED }, { 3, MAP_FAILED },
+ { 11, MAP_FAILED }, { 2, MAP_FAILED },
+ { 5, MAP_FAILED }, { 3, MAP_FAILED },
+ { 7, MAP_FAILED }, { 1, MAP_FAILED },
+ { 57, MAP_FAILED }, { 3, MAP_FAILED },
+ { 131, MAP_FAILED }, { 1, MAP_FAILED },
+ };
+
+ for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) {
+ HX(mm[m].p = mmap(NULL,
+ mm[m].npg * pgs,
+ PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANON, -1,
+ (off_t)0), mm[m].p);
+ if (mm[m].p != MAP_FAILED) {
+ size_t mo;
+
+ /* Touch some memory... */
+ p = mm[m].p;
+ mo = cnt %
+ (mm[m].npg * pgs - 1);
+ p[mo] = 1;
+ cnt += (int)((long)(mm[m].p)
+ / pgs);
+ }
+
+ /* Check cnts and times... */
+ for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]);
+ ii++) {
+ HX((e = clock_gettime(cl[ii],
+ &ts)) == -1, ts);
+ if (e != -1)
+ cnt += (int)ts.tv_nsec;
+ }
+
+ HX((e = getrusage(RUSAGE_SELF,
+ &ru)) == -1, ru);
+ if (e != -1) {
+ cnt += (int)ru.ru_utime.tv_sec;
+ cnt += (int)ru.ru_utime.tv_usec;
+ }
+ }
+
+ for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) {
+ if (mm[m].p != MAP_FAILED)
+ munmap(mm[m].p, mm[m].npg * pgs);
+ mm[m].p = MAP_FAILED;
+ }
+
+ HX(stat(".", &st) == -1, st);
+ HX(statvfs(".", &stvfs) == -1, stvfs);
+
+ HX(stat("/", &st) == -1, st);
+ HX(statvfs("/", &stvfs) == -1, stvfs);
+
+ HX((e = fstat(0, &st)) == -1, st);
+ if (e == -1) {
+ if (S_ISREG(st.st_mode) ||
+ S_ISFIFO(st.st_mode) ||
+ S_ISSOCK(st.st_mode)) {
+ HX(fstatvfs(0, &stvfs) == -1,
+ stvfs);
+ HX((off = lseek(0, (off_t)0,
+ SEEK_CUR)) < 0, off);
+ }
+ if (S_ISCHR(st.st_mode)) {
+ HX(tcgetattr(0, &tios) == -1,
+ tios);
+ } else if (S_ISSOCK(st.st_mode)) {
+ memset(&ss, 0, sizeof ss);
+ ssl = sizeof(ss);
+ HX(getpeername(0,
+ (void *)&ss, &ssl) == -1,
+ ss);
+ }
+ }
+
+ HX((e = getrusage(RUSAGE_CHILDREN,
+ &ru)) == -1, ru);
+ if (e != -1) {
+ cnt += (int)ru.ru_utime.tv_sec;
+ cnt += (int)ru.ru_utime.tv_usec;
+ }
+ } else {
+ /* Subsequent hashes absorb previous result */
+ HD(results);
+ }
+
+ HX((e = gettimeofday(&tv, NULL)) == -1, tv);
+ if (e != -1) {
+ cnt += (int)tv.tv_sec;
+ cnt += (int)tv.tv_usec;
+ }
+
+ HD(cnt);
+ }
+ SHA512_Final(results, &ctx);
+ memcpy((char *)buf + i, results, MINIMUM(sizeof(results), len - i));
+ i += MINIMUM(sizeof(results), len - i);
+ }
+ explicit_bzero(&ctx, sizeof ctx);
+ explicit_bzero(results, sizeof results);
+ errno = save_errno;
+ return (0); /* satisfied */
+}
diff --git a/ext/libressl/crypto/compat/getentropy_linux.c b/ext/libressl/crypto/compat/getentropy_linux.c
new file mode 100644
index 0000000..bc7a6be
--- /dev/null
+++ b/ext/libressl/crypto/compat/getentropy_linux.c
@@ -0,0 +1,525 @@
+/* $OpenBSD: getentropy_linux.c,v 1.47 2020/05/17 14:44:20 deraadt Exp $ */
+
+/*
+ * Copyright (c) 2014 Theo de Raadt <deraadt@openbsd.org>
+ * Copyright (c) 2014 Bob Beck <beck@obtuse.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Emulation of getentropy(2) as documented at:
+ * http://man.openbsd.org/getentropy.2
+ */
+
+#define _POSIX_C_SOURCE 199309L
+#define _GNU_SOURCE 1
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#ifdef SYS__sysctl
+#include <linux/sysctl.h>
+#endif
+#include <sys/statvfs.h>
+#include <sys/socket.h>
+#include <sys/mount.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <link.h>
+#include <termios.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <time.h>
+#include <openssl/sha.h>
+
+#include <linux/types.h>
+#include <linux/random.h>
+#ifdef HAVE_GETAUXVAL
+#include <sys/auxv.h>
+#endif
+#include <sys/vfs.h>
+
+#define REPEAT 5
+#define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
+
+#define HX(a, b) \
+ do { \
+ if ((a)) \
+ HD(errno); \
+ else \
+ HD(b); \
+ } while (0)
+
+#define HR(x, l) (SHA512_Update(&ctx, (char *)(x), (l)))
+#define HD(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (x)))
+#define HF(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (void*)))
+
+int getentropy(void *buf, size_t len);
+
+#if defined(SYS_getrandom) && defined(GRND_NONBLOCK)
+static int getentropy_getrandom(void *buf, size_t len);
+#endif
+static int getentropy_urandom(void *buf, size_t len);
+#ifdef SYS__sysctl
+static int getentropy_sysctl(void *buf, size_t len);
+#endif
+static int getentropy_fallback(void *buf, size_t len);
+static int getentropy_phdr(struct dl_phdr_info *info, size_t size, void *data);
+
+int
+getentropy(void *buf, size_t len)
+{
+ int ret = -1;
+
+ if (len > 256) {
+ errno = EIO;
+ return (-1);
+ }
+
+#if defined(SYS_getrandom) && defined(GRND_NONBLOCK)
+ /*
+ * Try descriptor-less getrandom(), in non-blocking mode.
+ *
+ * The design of Linux getrandom is broken. It has an
+ * uninitialized phase coupled with blocking behaviour, which
+ * is unacceptable from within a library at boot time without
+ * possible recovery. See http://bugs.python.org/issue26839#msg267745
+ */
+ ret = getentropy_getrandom(buf, len);
+ if (ret != -1)
+ return (ret);
+#endif
+
+ /*
+ * Try to get entropy with /dev/urandom
+ *
+ * This can fail if the process is inside a chroot or if file
+ * descriptors are exhausted.
+ */
+ ret = getentropy_urandom(buf, len);
+ if (ret != -1)
+ return (ret);
+
+#ifdef SYS__sysctl
+ /*
+ * Try to use sysctl CTL_KERN, KERN_RANDOM, RANDOM_UUID.
+ * sysctl is a failsafe API, so it guarantees a result. This
+ * should work inside a chroot, or when file descriptors are
+ * exhausted.
+ *
+ * However this can fail if the Linux kernel removes support
+ * for sysctl. Starting in 2007, there have been efforts to
+ * deprecate the sysctl API/ABI, and push callers towards use
+ * of the chroot-unavailable fd-using /proc mechanism --
+ * essentially the same problems as /dev/urandom.
+ *
+ * Numerous setbacks have been encountered in their deprecation
+ * schedule, so as of June 2014 the kernel ABI still exists on
+ * most Linux architectures. The sysctl() stub in libc is missing
+ * on some systems. There are also reports that some kernels
+ * spew messages to the console.
+ */
+ ret = getentropy_sysctl(buf, len);
+ if (ret != -1)
+ return (ret);
+#endif /* SYS__sysctl */
+
+ /*
+ * Entropy collection via /dev/urandom and sysctl have failed.
+ *
+ * No other API exists for collecting entropy. See the large
+ * comment block above.
+ *
+ * We have very few options:
+ * - Even syslog_r is unsafe to call at this low level, so
+ * there is no way to alert the user or program.
+ * - Cannot call abort() because some systems have unsafe
+ * corefiles.
+ * - Could raise(SIGKILL) resulting in silent program termination.
+ * - Return EIO, to hint that arc4random's stir function
+ * should raise(SIGKILL)
+ * - Do the best under the circumstances....
+ *
+ * This code path exists to bring light to the issue that Linux
+ * still does not provide a failsafe API for entropy collection.
+ *
+ * We hope this demonstrates that Linux should either retain their
+ * sysctl ABI, or consider providing a new failsafe API which
+ * works in a chroot or when file descriptors are exhausted.
+ */
+#undef FAIL_INSTEAD_OF_TRYING_FALLBACK
+#ifdef FAIL_INSTEAD_OF_TRYING_FALLBACK
+ raise(SIGKILL);
+#endif
+ ret = getentropy_fallback(buf, len);
+ if (ret != -1)
+ return (ret);
+
+ errno = EIO;
+ return (ret);
+}
+
+#if defined(SYS_getrandom) && defined(GRND_NONBLOCK)
+static int
+getentropy_getrandom(void *buf, size_t len)
+{
+ int pre_errno = errno;
+ int ret;
+ if (len > 256)
+ return (-1);
+ do {
+ ret = syscall(SYS_getrandom, buf, len, GRND_NONBLOCK);
+ } while (ret == -1 && errno == EINTR);
+
+ if (ret != len)
+ return (-1);
+ errno = pre_errno;
+ return (0);
+}
+#endif
+
+static int
+getentropy_urandom(void *buf, size_t len)
+{
+ struct stat st;
+ size_t i;
+ int fd, cnt, flags;
+ int save_errno = errno;
+
+start:
+
+ flags = O_RDONLY;
+#ifdef O_NOFOLLOW
+ flags |= O_NOFOLLOW;
+#endif
+#ifdef O_CLOEXEC
+ flags |= O_CLOEXEC;
+#endif
+ fd = open("/dev/urandom", flags, 0);
+ if (fd == -1) {
+ if (errno == EINTR)
+ goto start;
+ goto nodevrandom;
+ }
+#ifndef O_CLOEXEC
+ fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+#endif
+
+ /* Lightly verify that the device node looks sane */
+ if (fstat(fd, &st) == -1 || !S_ISCHR(st.st_mode)) {
+ close(fd);
+ goto nodevrandom;
+ }
+ if (ioctl(fd, RNDGETENTCNT, &cnt) == -1) {
+ close(fd);
+ goto nodevrandom;
+ }
+ for (i = 0; i < len; ) {
+ size_t wanted = len - i;
+ ssize_t ret = read(fd, (char *)buf + i, wanted);
+
+ if (ret == -1) {
+ if (errno == EAGAIN || errno == EINTR)
+ continue;
+ close(fd);
+ goto nodevrandom;
+ }
+ i += ret;
+ }
+ close(fd);
+ errno = save_errno;
+ return (0); /* satisfied */
+nodevrandom:
+ errno = EIO;
+ return (-1);
+}
+
+#ifdef SYS__sysctl
+static int
+getentropy_sysctl(void *buf, size_t len)
+{
+ static int mib[] = { CTL_KERN, KERN_RANDOM, RANDOM_UUID };
+ size_t i;
+ int save_errno = errno;
+
+ for (i = 0; i < len; ) {
+ size_t chunk = MINIMUM(len - i, 16);
+
+ /* SYS__sysctl because some systems already removed sysctl() */
+ struct __sysctl_args args = {
+ .name = mib,
+ .nlen = 3,
+ .oldval = (char *)buf + i,
+ .oldlenp = &chunk,
+ };
+ if (syscall(SYS__sysctl, &args) != 0)
+ goto sysctlfailed;
+ i += chunk;
+ }
+ errno = save_errno;
+ return (0); /* satisfied */
+sysctlfailed:
+ errno = EIO;
+ return (-1);
+}
+#endif /* SYS__sysctl */
+
+static const int cl[] = {
+ CLOCK_REALTIME,
+#ifdef CLOCK_MONOTONIC
+ CLOCK_MONOTONIC,
+#endif
+#ifdef CLOCK_MONOTONIC_RAW
+ CLOCK_MONOTONIC_RAW,
+#endif
+#ifdef CLOCK_TAI
+ CLOCK_TAI,
+#endif
+#ifdef CLOCK_VIRTUAL
+ CLOCK_VIRTUAL,
+#endif
+#ifdef CLOCK_UPTIME
+ CLOCK_UPTIME,
+#endif
+#ifdef CLOCK_PROCESS_CPUTIME_ID
+ CLOCK_PROCESS_CPUTIME_ID,
+#endif
+#ifdef CLOCK_THREAD_CPUTIME_ID
+ CLOCK_THREAD_CPUTIME_ID,
+#endif
+};
+
+static int
+getentropy_phdr(struct dl_phdr_info *info, size_t size, void *data)
+{
+ SHA512_CTX *ctx = data;
+
+ SHA512_Update(ctx, &info->dlpi_addr, sizeof (info->dlpi_addr));
+ return (0);
+}
+
+static int
+getentropy_fallback(void *buf, size_t len)
+{
+ uint8_t results[SHA512_DIGEST_LENGTH];
+ int save_errno = errno, e, pgs = getpagesize(), faster = 0, repeat;
+ static int cnt;
+ struct timespec ts;
+ struct timeval tv;
+ struct rusage ru;
+ sigset_t sigset;
+ struct stat st;
+ SHA512_CTX ctx;
+ static pid_t lastpid;
+ pid_t pid;
+ size_t i, ii, m;
+ char *p;
+
+ pid = getpid();
+ if (lastpid == pid) {
+ faster = 1;
+ repeat = 2;
+ } else {
+ faster = 0;
+ lastpid = pid;
+ repeat = REPEAT;
+ }
+ for (i = 0; i < len; ) {
+ int j;
+ SHA512_Init(&ctx);
+ for (j = 0; j < repeat; j++) {
+ HX((e = gettimeofday(&tv, NULL)) == -1, tv);
+ if (e != -1) {
+ cnt += (int)tv.tv_sec;
+ cnt += (int)tv.tv_usec;
+ }
+
+ dl_iterate_phdr(getentropy_phdr, &ctx);
+
+ for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]); ii++)
+ HX(clock_gettime(cl[ii], &ts) == -1, ts);
+
+ HX((pid = getpid()) == -1, pid);
+ HX((pid = getsid(pid)) == -1, pid);
+ HX((pid = getppid()) == -1, pid);
+ HX((pid = getpgid(0)) == -1, pid);
+ HX((e = getpriority(0, 0)) == -1, e);
+
+ if (!faster) {
+ ts.tv_sec = 0;
+ ts.tv_nsec = 1;
+ (void) nanosleep(&ts, NULL);
+ }
+
+ HX(sigpending(&sigset) == -1, sigset);
+ HX(sigprocmask(SIG_BLOCK, NULL, &sigset) == -1,
+ sigset);
+
+ HF(getentropy); /* an addr in this library */
+ HF(printf); /* an addr in libc */
+ p = (char *)&p;
+ HD(p); /* an addr on stack */
+ p = (char *)&errno;
+ HD(p); /* the addr of errno */
+
+ if (i == 0) {
+ struct sockaddr_storage ss;
+ struct statvfs stvfs;
+ struct termios tios;
+ struct statfs stfs;
+ socklen_t ssl;
+ off_t off;
+
+ /*
+ * Prime-sized mappings encourage fragmentation;
+ * thus exposing some address entropy.
+ */
+ struct mm {
+ size_t npg;
+ void *p;
+ } mm[] = {
+ { 17, MAP_FAILED }, { 3, MAP_FAILED },
+ { 11, MAP_FAILED }, { 2, MAP_FAILED },
+ { 5, MAP_FAILED }, { 3, MAP_FAILED },
+ { 7, MAP_FAILED }, { 1, MAP_FAILED },
+ { 57, MAP_FAILED }, { 3, MAP_FAILED },
+ { 131, MAP_FAILED }, { 1, MAP_FAILED },
+ };
+
+ for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) {
+ HX(mm[m].p = mmap(NULL,
+ mm[m].npg * pgs,
+ PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANON, -1,
+ (off_t)0), mm[m].p);
+ if (mm[m].p != MAP_FAILED) {
+ size_t mo;
+
+ /* Touch some memory... */
+ p = mm[m].p;
+ mo = cnt %
+ (mm[m].npg * pgs - 1);
+ p[mo] = 1;
+ cnt += (int)((long)(mm[m].p)
+ / pgs);
+ }
+
+ /* Check cnts and times... */
+ for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]);
+ ii++) {
+ HX((e = clock_gettime(cl[ii],
+ &ts)) == -1, ts);
+ if (e != -1)
+ cnt += (int)ts.tv_nsec;
+ }
+
+ HX((e = getrusage(RUSAGE_SELF,
+ &ru)) == -1, ru);
+ if (e != -1) {
+ cnt += (int)ru.ru_utime.tv_sec;
+ cnt += (int)ru.ru_utime.tv_usec;
+ }
+ }
+
+ for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) {
+ if (mm[m].p != MAP_FAILED)
+ munmap(mm[m].p, mm[m].npg * pgs);
+ mm[m].p = MAP_FAILED;
+ }
+
+ HX(stat(".", &st) == -1, st);
+ HX(statvfs(".", &stvfs) == -1, stvfs);
+ HX(statfs(".", &stfs) == -1, stfs);
+
+ HX(stat("/", &st) == -1, st);
+ HX(statvfs("/", &stvfs) == -1, stvfs);
+ HX(statfs("/", &stfs) == -1, stfs);
+
+ HX((e = fstat(0, &st)) == -1, st);
+ if (e == -1) {
+ if (S_ISREG(st.st_mode) ||
+ S_ISFIFO(st.st_mode) ||
+ S_ISSOCK(st.st_mode)) {
+ HX(fstatvfs(0, &stvfs) == -1,
+ stvfs);
+ HX(fstatfs(0, &stfs) == -1,
+ stfs);
+ HX((off = lseek(0, (off_t)0,
+ SEEK_CUR)) < 0, off);
+ }
+ if (S_ISCHR(st.st_mode)) {
+ HX(tcgetattr(0, &tios) == -1,
+ tios);
+ } else if (S_ISSOCK(st.st_mode)) {
+ memset(&ss, 0, sizeof ss);
+ ssl = sizeof(ss);
+ HX(getpeername(0,
+ (void *)&ss, &ssl) == -1,
+ ss);
+ }
+ }
+
+ HX((e = getrusage(RUSAGE_CHILDREN,
+ &ru)) == -1, ru);
+ if (e != -1) {
+ cnt += (int)ru.ru_utime.tv_sec;
+ cnt += (int)ru.ru_utime.tv_usec;
+ }
+ } else {
+ /* Subsequent hashes absorb previous result */
+ HD(results);
+ }
+
+ HX((e = gettimeofday(&tv, NULL)) == -1, tv);
+ if (e != -1) {
+ cnt += (int)tv.tv_sec;
+ cnt += (int)tv.tv_usec;
+ }
+
+ HD(cnt);
+ }
+#ifdef HAVE_GETAUXVAL
+#ifdef AT_RANDOM
+ /* Not as random as you think but we take what we are given */
+ p = (char *) getauxval(AT_RANDOM);
+ if (p)
+ HR(p, 16);
+#endif
+#ifdef AT_SYSINFO_EHDR
+ p = (char *) getauxval(AT_SYSINFO_EHDR);
+ if (p)
+ HR(p, pgs);
+#endif
+#ifdef AT_BASE
+ p = (char *) getauxval(AT_BASE);
+ if (p)
+ HD(p);
+#endif
+#endif
+
+ SHA512_Final(results, &ctx);
+ memcpy((char *)buf + i, results, MINIMUM(sizeof(results), len - i));
+ i += MINIMUM(sizeof(results), len - i);
+ }
+ explicit_bzero(&ctx, sizeof ctx);
+ explicit_bzero(results, sizeof results);
+ errno = save_errno;
+ return (0); /* satisfied */
+}
diff --git a/ext/libressl/crypto/compat/getentropy_netbsd.c b/ext/libressl/crypto/compat/getentropy_netbsd.c
new file mode 100644
index 0000000..5dc8959
--- /dev/null
+++ b/ext/libressl/crypto/compat/getentropy_netbsd.c
@@ -0,0 +1,62 @@
+/* $OpenBSD: getentropy_netbsd.c,v 1.4 2020/10/12 22:08:33 deraadt Exp $ */
+
+/*
+ * Copyright (c) 2014 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * Copyright (c) 2014 Brent Cook <bcook@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Emulation of getentropy(2) as documented at:
+ * http://man.openbsd.org/getentropy.2
+ */
+
+#include <sys/types.h>
+#include <sys/sysctl.h>
+
+#include <errno.h>
+#include <stddef.h>
+
+/*
+ * Derived from lib/libc/gen/arc4random.c from FreeBSD.
+ */
+static size_t
+getentropy_sysctl(u_char *buf, size_t size)
+{
+ const int mib[2] = { CTL_KERN, KERN_ARND };
+ size_t len, done;
+
+ done = 0;
+
+ do {
+ len = size;
+ if (sysctl(mib, 2, buf, &len, NULL, 0) == -1)
+ return (done);
+ done += len;
+ buf += len;
+ size -= len;
+ } while (size > 0);
+
+ return (done);
+}
+
+int
+getentropy(void *buf, size_t len)
+{
+ if (len <= 256 &&
+ getentropy_sysctl(buf, len) == len) {
+ return (0);
+ }
+
+ errno = EIO;
+ return (-1);
+}
diff --git a/ext/libressl/crypto/compat/getentropy_osx.c b/ext/libressl/crypto/compat/getentropy_osx.c
new file mode 100644
index 0000000..5d4067b
--- /dev/null
+++ b/ext/libressl/crypto/compat/getentropy_osx.c
@@ -0,0 +1,417 @@
+/* $OpenBSD: getentropy_osx.c,v 1.13 2020/05/17 14:44:20 deraadt Exp $ */
+
+/*
+ * Copyright (c) 2014 Theo de Raadt <deraadt@openbsd.org>
+ * Copyright (c) 2014 Bob Beck <beck@obtuse.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Emulation of getentropy(2) as documented at:
+ * http://man.openbsd.org/getentropy.2
+ */
+
+#include <TargetConditionals.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/sysctl.h>
+#include <sys/statvfs.h>
+#include <sys/socket.h>
+#include <sys/mount.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <termios.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <time.h>
+#include <mach/mach_time.h>
+#include <mach/mach_host.h>
+#include <mach/host_info.h>
+#if TARGET_OS_OSX
+#include <sys/socketvar.h>
+#include <sys/vmmeter.h>
+#endif
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#if TARGET_OS_OSX
+#include <netinet/udp.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp_var.h>
+#include <netinet/udp_var.h>
+#endif
+#include <CommonCrypto/CommonDigest.h>
+#define SHA512_Update(a, b, c) (CC_SHA512_Update((a), (b), (c)))
+#define SHA512_Init(xxx) (CC_SHA512_Init((xxx)))
+#define SHA512_Final(xxx, yyy) (CC_SHA512_Final((xxx), (yyy)))
+#define SHA512_CTX CC_SHA512_CTX
+#define SHA512_DIGEST_LENGTH CC_SHA512_DIGEST_LENGTH
+
+#define REPEAT 5
+#define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
+
+#define HX(a, b) \
+ do { \
+ if ((a)) \
+ HD(errno); \
+ else \
+ HD(b); \
+ } while (0)
+
+#define HR(x, l) (SHA512_Update(&ctx, (char *)(x), (l)))
+#define HD(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (x)))
+#define HF(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (void*)))
+
+int getentropy(void *buf, size_t len);
+
+static int getentropy_urandom(void *buf, size_t len);
+static int getentropy_fallback(void *buf, size_t len);
+
+int
+getentropy(void *buf, size_t len)
+{
+ int ret = -1;
+
+ if (len > 256) {
+ errno = EIO;
+ return (-1);
+ }
+
+ /*
+ * Try to get entropy with /dev/urandom
+ *
+ * This can fail if the process is inside a chroot or if file
+ * descriptors are exhausted.
+ */
+ ret = getentropy_urandom(buf, len);
+ if (ret != -1)
+ return (ret);
+
+ /*
+ * Entropy collection via /dev/urandom and sysctl have failed.
+ *
+ * No other API exists for collecting entropy, and we have
+ * no failsafe way to get it on OSX that is not sensitive
+ * to resource exhaustion.
+ *
+ * We have very few options:
+ * - Even syslog_r is unsafe to call at this low level, so
+ * there is no way to alert the user or program.
+ * - Cannot call abort() because some systems have unsafe
+ * corefiles.
+ * - Could raise(SIGKILL) resulting in silent program termination.
+ * - Return EIO, to hint that arc4random's stir function
+ * should raise(SIGKILL)
+ * - Do the best under the circumstances....
+ *
+ * This code path exists to bring light to the issue that OSX
+ * does not provide a failsafe API for entropy collection.
+ *
+ * We hope this demonstrates that OSX should consider
+ * providing a new failsafe API which works in a chroot or
+ * when file descriptors are exhausted.
+ */
+#undef FAIL_INSTEAD_OF_TRYING_FALLBACK
+#ifdef FAIL_INSTEAD_OF_TRYING_FALLBACK
+ raise(SIGKILL);
+#endif
+ ret = getentropy_fallback(buf, len);
+ if (ret != -1)
+ return (ret);
+
+ errno = EIO;
+ return (ret);
+}
+
+static int
+getentropy_urandom(void *buf, size_t len)
+{
+ struct stat st;
+ size_t i;
+ int fd, flags;
+ int save_errno = errno;
+
+start:
+
+ flags = O_RDONLY;
+#ifdef O_NOFOLLOW
+ flags |= O_NOFOLLOW;
+#endif
+#ifdef O_CLOEXEC
+ flags |= O_CLOEXEC;
+#endif
+ fd = open("/dev/urandom", flags, 0);
+ if (fd == -1) {
+ if (errno == EINTR)
+ goto start;
+ goto nodevrandom;
+ }
+#ifndef O_CLOEXEC
+ fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+#endif
+
+ /* Lightly verify that the device node looks sane */
+ if (fstat(fd, &st) == -1 || !S_ISCHR(st.st_mode)) {
+ close(fd);
+ goto nodevrandom;
+ }
+ for (i = 0; i < len; ) {
+ size_t wanted = len - i;
+ ssize_t ret = read(fd, (char *)buf + i, wanted);
+
+ if (ret == -1) {
+ if (errno == EAGAIN || errno == EINTR)
+ continue;
+ close(fd);
+ goto nodevrandom;
+ }
+ i += ret;
+ }
+ close(fd);
+ errno = save_errno;
+ return (0); /* satisfied */
+nodevrandom:
+ errno = EIO;
+ return (-1);
+}
+
+#if TARGET_OS_OSX
+static int tcpmib[] = { CTL_NET, AF_INET, IPPROTO_TCP, TCPCTL_STATS };
+static int udpmib[] = { CTL_NET, AF_INET, IPPROTO_UDP, UDPCTL_STATS };
+static int ipmib[] = { CTL_NET, AF_INET, IPPROTO_IP, IPCTL_STATS };
+#endif
+static int kmib[] = { CTL_KERN, KERN_USRSTACK };
+static int hwmib[] = { CTL_HW, HW_USERMEM };
+
+static int
+getentropy_fallback(void *buf, size_t len)
+{
+ uint8_t results[SHA512_DIGEST_LENGTH];
+ int save_errno = errno, e, pgs = getpagesize(), faster = 0, repeat;
+ static int cnt;
+ struct timespec ts;
+ struct timeval tv;
+ struct rusage ru;
+ sigset_t sigset;
+ struct stat st;
+ SHA512_CTX ctx;
+ static pid_t lastpid;
+ pid_t pid;
+ size_t i, ii, m;
+ char *p;
+#if TARGET_OS_OSX
+ struct tcpstat tcpstat;
+ struct udpstat udpstat;
+ struct ipstat ipstat;
+#endif
+ u_int64_t mach_time;
+ unsigned int idata;
+ void *addr;
+
+ pid = getpid();
+ if (lastpid == pid) {
+ faster = 1;
+ repeat = 2;
+ } else {
+ faster = 0;
+ lastpid = pid;
+ repeat = REPEAT;
+ }
+ for (i = 0; i < len; ) {
+ int j;
+ SHA512_Init(&ctx);
+ for (j = 0; j < repeat; j++) {
+ HX((e = gettimeofday(&tv, NULL)) == -1, tv);
+ if (e != -1) {
+ cnt += (int)tv.tv_sec;
+ cnt += (int)tv.tv_usec;
+ }
+
+ mach_time = mach_absolute_time();
+ HD(mach_time);
+
+ ii = sizeof(addr);
+ HX(sysctl(kmib, sizeof(kmib) / sizeof(kmib[0]),
+ &addr, &ii, NULL, 0) == -1, addr);
+
+ ii = sizeof(idata);
+ HX(sysctl(hwmib, sizeof(hwmib) / sizeof(hwmib[0]),
+ &idata, &ii, NULL, 0) == -1, idata);
+
+#if TARGET_OS_OSX
+ ii = sizeof(tcpstat);
+ HX(sysctl(tcpmib, sizeof(tcpmib) / sizeof(tcpmib[0]),
+ &tcpstat, &ii, NULL, 0) == -1, tcpstat);
+
+ ii = sizeof(udpstat);
+ HX(sysctl(udpmib, sizeof(udpmib) / sizeof(udpmib[0]),
+ &udpstat, &ii, NULL, 0) == -1, udpstat);
+
+ ii = sizeof(ipstat);
+ HX(sysctl(ipmib, sizeof(ipmib) / sizeof(ipmib[0]),
+ &ipstat, &ii, NULL, 0) == -1, ipstat);
+#endif
+
+ HX((pid = getpid()) == -1, pid);
+ HX((pid = getsid(pid)) == -1, pid);
+ HX((pid = getppid()) == -1, pid);
+ HX((pid = getpgid(0)) == -1, pid);
+ HX((e = getpriority(0, 0)) == -1, e);
+
+ if (!faster) {
+ ts.tv_sec = 0;
+ ts.tv_nsec = 1;
+ (void) nanosleep(&ts, NULL);
+ }
+
+ HX(sigpending(&sigset) == -1, sigset);
+ HX(sigprocmask(SIG_BLOCK, NULL, &sigset) == -1,
+ sigset);
+
+ HF(getentropy); /* an addr in this library */
+ HF(printf); /* an addr in libc */
+ p = (char *)&p;
+ HD(p); /* an addr on stack */
+ p = (char *)&errno;
+ HD(p); /* the addr of errno */
+
+ if (i == 0) {
+ struct sockaddr_storage ss;
+ struct statvfs stvfs;
+ struct termios tios;
+ struct statfs stfs;
+ socklen_t ssl;
+ off_t off;
+
+ /*
+ * Prime-sized mappings encourage fragmentation;
+ * thus exposing some address entropy.
+ */
+ struct mm {
+ size_t npg;
+ void *p;
+ } mm[] = {
+ { 17, MAP_FAILED }, { 3, MAP_FAILED },
+ { 11, MAP_FAILED }, { 2, MAP_FAILED },
+ { 5, MAP_FAILED }, { 3, MAP_FAILED },
+ { 7, MAP_FAILED }, { 1, MAP_FAILED },
+ { 57, MAP_FAILED }, { 3, MAP_FAILED },
+ { 131, MAP_FAILED }, { 1, MAP_FAILED },
+ };
+
+ for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) {
+ HX(mm[m].p = mmap(NULL,
+ mm[m].npg * pgs,
+ PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANON, -1,
+ (off_t)0), mm[m].p);
+ if (mm[m].p != MAP_FAILED) {
+ size_t mo;
+
+ /* Touch some memory... */
+ p = mm[m].p;
+ mo = cnt %
+ (mm[m].npg * pgs - 1);
+ p[mo] = 1;
+ cnt += (int)((long)(mm[m].p)
+ / pgs);
+ }
+
+ /* Check cnts and times... */
+ mach_time = mach_absolute_time();
+ HD(mach_time);
+ cnt += (int)mach_time;
+
+ HX((e = getrusage(RUSAGE_SELF,
+ &ru)) == -1, ru);
+ if (e != -1) {
+ cnt += (int)ru.ru_utime.tv_sec;
+ cnt += (int)ru.ru_utime.tv_usec;
+ }
+ }
+
+ for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) {
+ if (mm[m].p != MAP_FAILED)
+ munmap(mm[m].p, mm[m].npg * pgs);
+ mm[m].p = MAP_FAILED;
+ }
+
+ HX(stat(".", &st) == -1, st);
+ HX(statvfs(".", &stvfs) == -1, stvfs);
+ HX(statfs(".", &stfs) == -1, stfs);
+
+ HX(stat("/", &st) == -1, st);
+ HX(statvfs("/", &stvfs) == -1, stvfs);
+ HX(statfs("/", &stfs) == -1, stfs);
+
+ HX((e = fstat(0, &st)) == -1, st);
+ if (e == -1) {
+ if (S_ISREG(st.st_mode) ||
+ S_ISFIFO(st.st_mode) ||
+ S_ISSOCK(st.st_mode)) {
+ HX(fstatvfs(0, &stvfs) == -1,
+ stvfs);
+ HX(fstatfs(0, &stfs) == -1,
+ stfs);
+ HX((off = lseek(0, (off_t)0,
+ SEEK_CUR)) < 0, off);
+ }
+ if (S_ISCHR(st.st_mode)) {
+ HX(tcgetattr(0, &tios) == -1,
+ tios);
+ } else if (S_ISSOCK(st.st_mode)) {
+ memset(&ss, 0, sizeof ss);
+ ssl = sizeof(ss);
+ HX(getpeername(0,
+ (void *)&ss, &ssl) == -1,
+ ss);
+ }
+ }
+
+ HX((e = getrusage(RUSAGE_CHILDREN,
+ &ru)) == -1, ru);
+ if (e != -1) {
+ cnt += (int)ru.ru_utime.tv_sec;
+ cnt += (int)ru.ru_utime.tv_usec;
+ }
+ } else {
+ /* Subsequent hashes absorb previous result */
+ HD(results);
+ }
+
+ HX((e = gettimeofday(&tv, NULL)) == -1, tv);
+ if (e != -1) {
+ cnt += (int)tv.tv_sec;
+ cnt += (int)tv.tv_usec;
+ }
+
+ HD(cnt);
+ }
+
+ SHA512_Final(results, &ctx);
+ memcpy((char *)buf + i, results, MINIMUM(sizeof(results), len - i));
+ i += MINIMUM(sizeof(results), len - i);
+ }
+ explicit_bzero(&ctx, sizeof ctx);
+ explicit_bzero(results, sizeof results);
+ errno = save_errno;
+ return (0); /* satisfied */
+}
diff --git a/ext/libressl/crypto/compat/getentropy_solaris.c b/ext/libressl/crypto/compat/getentropy_solaris.c
new file mode 100644
index 0000000..cf5b9bf
--- /dev/null
+++ b/ext/libressl/crypto/compat/getentropy_solaris.c
@@ -0,0 +1,422 @@
+/* $OpenBSD: getentropy_solaris.c,v 1.14 2020/05/17 14:44:20 deraadt Exp $ */
+
+/*
+ * Copyright (c) 2014 Theo de Raadt <deraadt@openbsd.org>
+ * Copyright (c) 2014 Bob Beck <beck@obtuse.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Emulation of getentropy(2) as documented at:
+ * http://man.openbsd.org/getentropy.2
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/statvfs.h>
+#include <sys/socket.h>
+#include <sys/mount.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <link.h>
+#include <termios.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/sha2.h>
+#define SHA512_Init SHA512Init
+#define SHA512_Update SHA512Update
+#define SHA512_Final SHA512Final
+
+#include <sys/vfs.h>
+#include <sys/statfs.h>
+#include <sys/loadavg.h>
+
+#define REPEAT 5
+#define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
+
+#define HX(a, b) \
+ do { \
+ if ((a)) \
+ HD(errno); \
+ else \
+ HD(b); \
+ } while (0)
+
+#define HR(x, l) (SHA512_Update(&ctx, (char *)(x), (l)))
+#define HD(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (x)))
+#define HF(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (void*)))
+
+int getentropy(void *buf, size_t len);
+
+static int getentropy_urandom(void *buf, size_t len, const char *path,
+ int devfscheck);
+static int getentropy_fallback(void *buf, size_t len);
+static int getentropy_phdr(struct dl_phdr_info *info, size_t size, void *data);
+
+int
+getentropy(void *buf, size_t len)
+{
+ int ret = -1;
+
+ if (len > 256) {
+ errno = EIO;
+ return (-1);
+ }
+
+ /*
+ * Try to get entropy with /dev/urandom
+ *
+ * Solaris provides /dev/urandom as a symbolic link to
+ * /devices/pseudo/random@0:urandom which is provided by
+ * a devfs filesystem. Best practice is to use O_NOFOLLOW,
+ * so we must try the unpublished name directly.
+ *
+ * This can fail if the process is inside a chroot which lacks
+ * the devfs mount, or if file descriptors are exhausted.
+ */
+ ret = getentropy_urandom(buf, len,
+ "/devices/pseudo/random@0:urandom", 1);
+ if (ret != -1)
+ return (ret);
+
+ /*
+ * Unfortunately, chroot spaces on Solaris are sometimes setup
+ * with direct device node of the well-known /dev/urandom name
+ * (perhaps to avoid dragging all of devfs into the space).
+ *
+ * This can fail if the process is inside a chroot or if file
+ * descriptors are exhausted.
+ */
+ ret = getentropy_urandom(buf, len, "/dev/urandom", 0);
+ if (ret != -1)
+ return (ret);
+
+ /*
+ * Entropy collection via /dev/urandom has failed.
+ *
+ * No other API exists for collecting entropy, and we have
+ * no failsafe way to get it on Solaris that is not sensitive
+ * to resource exhaustion.
+ *
+ * We have very few options:
+ * - Even syslog_r is unsafe to call at this low level, so
+ * there is no way to alert the user or program.
+ * - Cannot call abort() because some systems have unsafe
+ * corefiles.
+ * - Could raise(SIGKILL) resulting in silent program termination.
+ * - Return EIO, to hint that arc4random's stir function
+ * should raise(SIGKILL)
+ * - Do the best under the circumstances....
+ *
+ * This code path exists to bring light to the issue that Solaris
+ * does not provide a failsafe API for entropy collection.
+ *
+ * We hope this demonstrates that Solaris should consider
+ * providing a new failsafe API which works in a chroot or
+ * when file descriptors are exhausted.
+ */
+#undef FAIL_INSTEAD_OF_TRYING_FALLBACK
+#ifdef FAIL_INSTEAD_OF_TRYING_FALLBACK
+ raise(SIGKILL);
+#endif
+ ret = getentropy_fallback(buf, len);
+ if (ret != -1)
+ return (ret);
+
+ errno = EIO;
+ return (ret);
+}
+
+static int
+getentropy_urandom(void *buf, size_t len, const char *path, int devfscheck)
+{
+ struct stat st;
+ size_t i;
+ int fd, flags;
+ int save_errno = errno;
+
+start:
+
+ flags = O_RDONLY;
+#ifdef O_NOFOLLOW
+ flags |= O_NOFOLLOW;
+#endif
+#ifdef O_CLOEXEC
+ flags |= O_CLOEXEC;
+#endif
+ fd = open(path, flags, 0);
+ if (fd == -1) {
+ if (errno == EINTR)
+ goto start;
+ goto nodevrandom;
+ }
+#ifndef O_CLOEXEC
+ fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+#endif
+
+ /* Lightly verify that the device node looks sane */
+ if (fstat(fd, &st) == -1 || !S_ISCHR(st.st_mode) ||
+ (devfscheck && (strcmp(st.st_fstype, "devfs") != 0))) {
+ close(fd);
+ goto nodevrandom;
+ }
+ for (i = 0; i < len; ) {
+ size_t wanted = len - i;
+ ssize_t ret = read(fd, (char *)buf + i, wanted);
+
+ if (ret == -1) {
+ if (errno == EAGAIN || errno == EINTR)
+ continue;
+ close(fd);
+ goto nodevrandom;
+ }
+ i += ret;
+ }
+ close(fd);
+ errno = save_errno;
+ return (0); /* satisfied */
+nodevrandom:
+ errno = EIO;
+ return (-1);
+}
+
+static const int cl[] = {
+ CLOCK_REALTIME,
+#ifdef CLOCK_MONOTONIC
+ CLOCK_MONOTONIC,
+#endif
+#ifdef CLOCK_MONOTONIC_RAW
+ CLOCK_MONOTONIC_RAW,
+#endif
+#ifdef CLOCK_TAI
+ CLOCK_TAI,
+#endif
+#ifdef CLOCK_VIRTUAL
+ CLOCK_VIRTUAL,
+#endif
+#ifdef CLOCK_UPTIME
+ CLOCK_UPTIME,
+#endif
+#ifdef CLOCK_PROCESS_CPUTIME_ID
+ CLOCK_PROCESS_CPUTIME_ID,
+#endif
+#ifdef CLOCK_THREAD_CPUTIME_ID
+ CLOCK_THREAD_CPUTIME_ID,
+#endif
+};
+
+static int
+getentropy_phdr(struct dl_phdr_info *info, size_t size, void *data)
+{
+ SHA512_CTX *ctx = data;
+
+ SHA512_Update(ctx, &info->dlpi_addr, sizeof (info->dlpi_addr));
+ return (0);
+}
+
+static int
+getentropy_fallback(void *buf, size_t len)
+{
+ uint8_t results[SHA512_DIGEST_LENGTH];
+ int save_errno = errno, e, pgs = getpagesize(), faster = 0, repeat;
+ static int cnt;
+ struct timespec ts;
+ struct timeval tv;
+ double loadavg[3];
+ struct rusage ru;
+ sigset_t sigset;
+ struct stat st;
+ SHA512_CTX ctx;
+ static pid_t lastpid;
+ pid_t pid;
+ size_t i, ii, m;
+ char *p;
+
+ pid = getpid();
+ if (lastpid == pid) {
+ faster = 1;
+ repeat = 2;
+ } else {
+ faster = 0;
+ lastpid = pid;
+ repeat = REPEAT;
+ }
+ for (i = 0; i < len; ) {
+ int j;
+ SHA512_Init(&ctx);
+ for (j = 0; j < repeat; j++) {
+ HX((e = gettimeofday(&tv, NULL)) == -1, tv);
+ if (e != -1) {
+ cnt += (int)tv.tv_sec;
+ cnt += (int)tv.tv_usec;
+ }
+
+ dl_iterate_phdr(getentropy_phdr, &ctx);
+
+ for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]); ii++)
+ HX(clock_gettime(cl[ii], &ts) == -1, ts);
+
+ HX((pid = getpid()) == -1, pid);
+ HX((pid = getsid(pid)) == -1, pid);
+ HX((pid = getppid()) == -1, pid);
+ HX((pid = getpgid(0)) == -1, pid);
+ HX((e = getpriority(0, 0)) == -1, e);
+ HX((getloadavg(loadavg, 3) == -1), loadavg);
+
+ if (!faster) {
+ ts.tv_sec = 0;
+ ts.tv_nsec = 1;
+ (void) nanosleep(&ts, NULL);
+ }
+
+ HX(sigpending(&sigset) == -1, sigset);
+ HX(sigprocmask(SIG_BLOCK, NULL, &sigset) == -1,
+ sigset);
+
+ HF(getentropy); /* an addr in this library */
+ HF(printf); /* an addr in libc */
+ p = (char *)&p;
+ HD(p); /* an addr on stack */
+ p = (char *)&errno;
+ HD(p); /* the addr of errno */
+
+ if (i == 0) {
+ struct sockaddr_storage ss;
+ struct statvfs stvfs;
+ struct termios tios;
+ socklen_t ssl;
+ off_t off;
+
+ /*
+ * Prime-sized mappings encourage fragmentation;
+ * thus exposing some address entropy.
+ */
+ struct mm {
+ size_t npg;
+ void *p;
+ } mm[] = {
+ { 17, MAP_FAILED }, { 3, MAP_FAILED },
+ { 11, MAP_FAILED }, { 2, MAP_FAILED },
+ { 5, MAP_FAILED }, { 3, MAP_FAILED },
+ { 7, MAP_FAILED }, { 1, MAP_FAILED },
+ { 57, MAP_FAILED }, { 3, MAP_FAILED },
+ { 131, MAP_FAILED }, { 1, MAP_FAILED },
+ };
+
+ for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) {
+ HX(mm[m].p = mmap(NULL,
+ mm[m].npg * pgs,
+ PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANON, -1,
+ (off_t)0), mm[m].p);
+ if (mm[m].p != MAP_FAILED) {
+ size_t mo;
+
+ /* Touch some memory... */
+ p = mm[m].p;
+ mo = cnt %
+ (mm[m].npg * pgs - 1);
+ p[mo] = 1;
+ cnt += (int)((long)(mm[m].p)
+ / pgs);
+ }
+
+ /* Check cnts and times... */
+ for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]);
+ ii++) {
+ HX((e = clock_gettime(cl[ii],
+ &ts)) == -1, ts);
+ if (e != -1)
+ cnt += (int)ts.tv_nsec;
+ }
+
+ HX((e = getrusage(RUSAGE_SELF,
+ &ru)) == -1, ru);
+ if (e != -1) {
+ cnt += (int)ru.ru_utime.tv_sec;
+ cnt += (int)ru.ru_utime.tv_usec;
+ }
+ }
+
+ for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) {
+ if (mm[m].p != MAP_FAILED)
+ munmap(mm[m].p, mm[m].npg * pgs);
+ mm[m].p = MAP_FAILED;
+ }
+
+ HX(stat(".", &st) == -1, st);
+ HX(statvfs(".", &stvfs) == -1, stvfs);
+
+ HX(stat("/", &st) == -1, st);
+ HX(statvfs("/", &stvfs) == -1, stvfs);
+
+ HX((e = fstat(0, &st)) == -1, st);
+ if (e == -1) {
+ if (S_ISREG(st.st_mode) ||
+ S_ISFIFO(st.st_mode) ||
+ S_ISSOCK(st.st_mode)) {
+ HX(fstatvfs(0, &stvfs) == -1,
+ stvfs);
+ HX((off = lseek(0, (off_t)0,
+ SEEK_CUR)) < 0, off);
+ }
+ if (S_ISCHR(st.st_mode)) {
+ HX(tcgetattr(0, &tios) == -1,
+ tios);
+ } else if (S_ISSOCK(st.st_mode)) {
+ memset(&ss, 0, sizeof ss);
+ ssl = sizeof(ss);
+ HX(getpeername(0,
+ (void *)&ss, &ssl) == -1,
+ ss);
+ }
+ }
+
+ HX((e = getrusage(RUSAGE_CHILDREN,
+ &ru)) == -1, ru);
+ if (e != -1) {
+ cnt += (int)ru.ru_utime.tv_sec;
+ cnt += (int)ru.ru_utime.tv_usec;
+ }
+ } else {
+ /* Subsequent hashes absorb previous result */
+ HD(results);
+ }
+
+ HX((e = gettimeofday(&tv, NULL)) == -1, tv);
+ if (e != -1) {
+ cnt += (int)tv.tv_sec;
+ cnt += (int)tv.tv_usec;
+ }
+
+ HD(cnt);
+ }
+ SHA512_Final(results, &ctx);
+ memcpy((char *)buf + i, results, MINIMUM(sizeof(results), len - i));
+ i += MINIMUM(sizeof(results), len - i);
+ }
+ explicit_bzero(&ctx, sizeof ctx);
+ explicit_bzero(results, sizeof results);
+ errno = save_errno;
+ return (0); /* satisfied */
+}
diff --git a/ext/libressl/crypto/compat/getentropy_win.c b/ext/libressl/crypto/compat/getentropy_win.c
new file mode 100644
index 0000000..64514b3
--- /dev/null
+++ b/ext/libressl/crypto/compat/getentropy_win.c
@@ -0,0 +1,50 @@
+/* $OpenBSD: getentropy_win.c,v 1.6 2020/11/11 10:41:24 bcook Exp $ */
+
+/*
+ * Copyright (c) 2014, Theo de Raadt <deraadt@openbsd.org>
+ * Copyright (c) 2014, Bob Beck <beck@obtuse.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Emulation of getentropy(2) as documented at:
+ * http://man.openbsd.org/getentropy.2
+ */
+
+#include <windows.h>
+#include <bcrypt.h>
+#include <errno.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+int getentropy(void *buf, size_t len);
+
+/*
+ * On Windows, BCryptGenRandom with BCRYPT_USE_SYSTEM_PREFERRED_RNG is supposed
+ * to be a well-seeded, cryptographically strong random number generator.
+ * https://docs.microsoft.com/en-us/windows/win32/api/bcrypt/nf-bcrypt-bcryptgenrandom
+ */
+int
+getentropy(void *buf, size_t len)
+{
+ if (len > 256) {
+ errno = EIO;
+ return (-1);
+ }
+
+ if (FAILED(BCryptGenRandom(NULL, buf, len, BCRYPT_USE_SYSTEM_PREFERRED_RNG))) {
+ errno = EIO;
+ return (-1);
+ }
+
+ return (0);
+}
diff --git a/ext/libressl/crypto/compat/timingsafe_bcmp.c b/ext/libressl/crypto/compat/timingsafe_bcmp.c
new file mode 100644
index 0000000..552e844
--- /dev/null
+++ b/ext/libressl/crypto/compat/timingsafe_bcmp.c
@@ -0,0 +1,29 @@
+/* $OpenBSD: timingsafe_bcmp.c,v 1.3 2015/08/31 02:53:57 guenther Exp $ */
+/*
+ * Copyright (c) 2010 Damien Miller. All rights reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <string.h>
+
+int
+timingsafe_bcmp(const void *b1, const void *b2, size_t n)
+{
+ const unsigned char *p1 = b1, *p2 = b2;
+ int ret = 0;
+
+ for (; n > 0; n--)
+ ret |= *p1++ ^ *p2++;
+ return (ret != 0);
+}
diff --git a/ext/libressl/crypto/compat/timingsafe_memcmp.c b/ext/libressl/crypto/compat/timingsafe_memcmp.c
new file mode 100644
index 0000000..bb210a3
--- /dev/null
+++ b/ext/libressl/crypto/compat/timingsafe_memcmp.c
@@ -0,0 +1,46 @@
+/* $OpenBSD: timingsafe_memcmp.c,v 1.2 2015/08/31 02:53:57 guenther Exp $ */
+/*
+ * Copyright (c) 2014 Google Inc.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <limits.h>
+#include <string.h>
+
+int
+timingsafe_memcmp(const void *b1, const void *b2, size_t len)
+{
+ const unsigned char *p1 = b1, *p2 = b2;
+ size_t i;
+ int res = 0, done = 0;
+
+ for (i = 0; i < len; i++) {
+ /* lt is -1 if p1[i] < p2[i]; else 0. */
+ int lt = (p1[i] - p2[i]) >> CHAR_BIT;
+
+ /* gt is -1 if p1[i] > p2[i]; else 0. */
+ int gt = (p2[i] - p1[i]) >> CHAR_BIT;
+
+ /* cmp is 1 if p1[i] > p2[i]; -1 if p1[i] < p2[i]; else 0. */
+ int cmp = lt - gt;
+
+ /* set res = cmp if !done. */
+ res |= cmp & ~done;
+
+ /* set done if p1[i] != p2[i]. */
+ done |= lt | gt;
+ }
+
+ return (res);
+}
diff --git a/ext/libressl/crypto/curve25519/Makefile b/ext/libressl/crypto/curve25519/Makefile
new file mode 100644
index 0000000..459383d
--- /dev/null
+++ b/ext/libressl/crypto/curve25519/Makefile
@@ -0,0 +1,14 @@
+include ../../ssl_common.mk
+CFLAGS += -D__BEGIN_HIDDEN_DECLS= -D__END_HIDDEN_DECLS= -DED25519
+
+obj = curve25519.o curve25519-generic.o
+
+
+all: $(obj)
+dep: all
+
+%.o: %.c
+ $(CC) $(CFLAGS) -c $<
+
+clean:
+ rm -f *.o *.a
diff --git a/ext/libressl/crypto/curve25519/curve25519-generic.c b/ext/libressl/crypto/curve25519/curve25519-generic.c
new file mode 100644
index 0000000..d533731
--- /dev/null
+++ b/ext/libressl/crypto/curve25519/curve25519-generic.c
@@ -0,0 +1,34 @@
+/* $OpenBSD: curve25519-generic.c,v 1.2 2019/05/11 15:55:52 tb Exp $ */
+/*
+ * Copyright (c) 2015, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * This code is mostly taken from the ref10 version of Ed25519 in SUPERCOP
+ * 20141124 (http://bench.cr.yp.to/supercop.html). That code is released as
+ * public domain but this file has the ISC license just to keep licencing
+ * simple.
+ *
+ * The field functions are shared by Ed25519 and X25519 where possible.
+ */
+
+#include "curve25519_internal.h"
+
+void
+x25519_scalar_mult(uint8_t out[32], const uint8_t scalar[32],
+ const uint8_t point[32])
+{
+ x25519_scalar_mult_generic(out, scalar, point);
+}
diff --git a/ext/libressl/crypto/curve25519/curve25519.c b/ext/libressl/crypto/curve25519/curve25519.c
new file mode 100644
index 0000000..13b54c3
--- /dev/null
+++ b/ext/libressl/crypto/curve25519/curve25519.c
@@ -0,0 +1,4935 @@
+/* $OpenBSD: curve25519.c,v 1.5 2019/05/11 15:55:52 tb Exp $ */
+/*
+ * Copyright (c) 2015, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * This code is mostly taken from the ref10 version of Ed25519 in SUPERCOP
+ * 20141124 (http://bench.cr.yp.to/supercop.html). That code is released as
+ * public domain but this file has the ISC license just to keep licencing
+ * simple.
+ *
+ * The field functions are shared by Ed25519 and X25519 where possible.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <openssl/curve25519.h>
+
+#ifdef ED25519
+#include <openssl/sha.h>
+#endif
+
+#include "curve25519_internal.h"
+
+static const int64_t kBottom25Bits = 0x1ffffffLL;
+static const int64_t kBottom26Bits = 0x3ffffffLL;
+static const int64_t kTop39Bits = 0xfffffffffe000000LL;
+static const int64_t kTop38Bits = 0xfffffffffc000000LL;
+
+static uint64_t load_3(const uint8_t *in) {
+ uint64_t result;
+ result = (uint64_t)in[0];
+ result |= ((uint64_t)in[1]) << 8;
+ result |= ((uint64_t)in[2]) << 16;
+ return result;
+}
+
+static uint64_t load_4(const uint8_t *in) {
+ uint64_t result;
+ result = (uint64_t)in[0];
+ result |= ((uint64_t)in[1]) << 8;
+ result |= ((uint64_t)in[2]) << 16;
+ result |= ((uint64_t)in[3]) << 24;
+ return result;
+}
+
+static void fe_frombytes(fe h, const uint8_t *s) {
+ /* Ignores top bit of h. */
+ int64_t h0 = load_4(s);
+ int64_t h1 = load_3(s + 4) << 6;
+ int64_t h2 = load_3(s + 7) << 5;
+ int64_t h3 = load_3(s + 10) << 3;
+ int64_t h4 = load_3(s + 13) << 2;
+ int64_t h5 = load_4(s + 16);
+ int64_t h6 = load_3(s + 20) << 7;
+ int64_t h7 = load_3(s + 23) << 5;
+ int64_t h8 = load_3(s + 26) << 4;
+ int64_t h9 = (load_3(s + 29) & 8388607) << 2;
+ int64_t carry0;
+ int64_t carry1;
+ int64_t carry2;
+ int64_t carry3;
+ int64_t carry4;
+ int64_t carry5;
+ int64_t carry6;
+ int64_t carry7;
+ int64_t carry8;
+ int64_t carry9;
+
+ carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits;
+ carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits;
+ carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits;
+ carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits;
+ carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits;
+
+ carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits;
+ carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits;
+ carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits;
+ carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits;
+ carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits;
+
+ h[0] = h0;
+ h[1] = h1;
+ h[2] = h2;
+ h[3] = h3;
+ h[4] = h4;
+ h[5] = h5;
+ h[6] = h6;
+ h[7] = h7;
+ h[8] = h8;
+ h[9] = h9;
+}
+
+/* Preconditions:
+ * |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
+ *
+ * Write p=2^255-19; q=floor(h/p).
+ * Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
+ *
+ * Proof:
+ * Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
+ * Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
+ *
+ * Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
+ * Then 0<y<1.
+ *
+ * Write r=h-pq.
+ * Have 0<=r<=p-1=2^255-20.
+ * Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
+ *
+ * Write x=r+19(2^-255)r+y.
+ * Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
+ *
+ * Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
+ * so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q. */
+static void fe_tobytes(uint8_t *s, const fe h) {
+ int32_t h0 = h[0];
+ int32_t h1 = h[1];
+ int32_t h2 = h[2];
+ int32_t h3 = h[3];
+ int32_t h4 = h[4];
+ int32_t h5 = h[5];
+ int32_t h6 = h[6];
+ int32_t h7 = h[7];
+ int32_t h8 = h[8];
+ int32_t h9 = h[9];
+ int32_t q;
+
+ q = (19 * h9 + (((int32_t) 1) << 24)) >> 25;
+ q = (h0 + q) >> 26;
+ q = (h1 + q) >> 25;
+ q = (h2 + q) >> 26;
+ q = (h3 + q) >> 25;
+ q = (h4 + q) >> 26;
+ q = (h5 + q) >> 25;
+ q = (h6 + q) >> 26;
+ q = (h7 + q) >> 25;
+ q = (h8 + q) >> 26;
+ q = (h9 + q) >> 25;
+
+ /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
+ h0 += 19 * q;
+ /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
+
+ h1 += h0 >> 26; h0 &= kBottom26Bits;
+ h2 += h1 >> 25; h1 &= kBottom25Bits;
+ h3 += h2 >> 26; h2 &= kBottom26Bits;
+ h4 += h3 >> 25; h3 &= kBottom25Bits;
+ h5 += h4 >> 26; h4 &= kBottom26Bits;
+ h6 += h5 >> 25; h5 &= kBottom25Bits;
+ h7 += h6 >> 26; h6 &= kBottom26Bits;
+ h8 += h7 >> 25; h7 &= kBottom25Bits;
+ h9 += h8 >> 26; h8 &= kBottom26Bits;
+ h9 &= kBottom25Bits;
+ /* h10 = carry9 */
+
+ /* Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
+ * Have h0+...+2^230 h9 between 0 and 2^255-1;
+ * evidently 2^255 h10-2^255 q = 0.
+ * Goal: Output h0+...+2^230 h9. */
+
+ s[0] = h0 >> 0;
+ s[1] = h0 >> 8;
+ s[2] = h0 >> 16;
+ s[3] = (h0 >> 24) | ((uint32_t)(h1) << 2);
+ s[4] = h1 >> 6;
+ s[5] = h1 >> 14;
+ s[6] = (h1 >> 22) | ((uint32_t)(h2) << 3);
+ s[7] = h2 >> 5;
+ s[8] = h2 >> 13;
+ s[9] = (h2 >> 21) | ((uint32_t)(h3) << 5);
+ s[10] = h3 >> 3;
+ s[11] = h3 >> 11;
+ s[12] = (h3 >> 19) | ((uint32_t)(h4) << 6);
+ s[13] = h4 >> 2;
+ s[14] = h4 >> 10;
+ s[15] = h4 >> 18;
+ s[16] = h5 >> 0;
+ s[17] = h5 >> 8;
+ s[18] = h5 >> 16;
+ s[19] = (h5 >> 24) | ((uint32_t)(h6) << 1);
+ s[20] = h6 >> 7;
+ s[21] = h6 >> 15;
+ s[22] = (h6 >> 23) | ((uint32_t)(h7) << 3);
+ s[23] = h7 >> 5;
+ s[24] = h7 >> 13;
+ s[25] = (h7 >> 21) | ((uint32_t)(h8) << 4);
+ s[26] = h8 >> 4;
+ s[27] = h8 >> 12;
+ s[28] = (h8 >> 20) | ((uint32_t)(h9) << 6);
+ s[29] = h9 >> 2;
+ s[30] = h9 >> 10;
+ s[31] = h9 >> 18;
+}
+
+/* h = f */
+static void fe_copy(fe h, const fe f) {
+ memmove(h, f, sizeof(int32_t) * 10);
+}
+
+/* h = 0 */
+static void fe_0(fe h) { memset(h, 0, sizeof(int32_t) * 10); }
+
+/* h = 1 */
+static void fe_1(fe h) {
+ memset(h, 0, sizeof(int32_t) * 10);
+ h[0] = 1;
+}
+
+/* h = f + g
+ * Can overlap h with f or g.
+ *
+ * Preconditions:
+ * |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+ * |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+ *
+ * Postconditions:
+ * |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. */
+static void fe_add(fe h, const fe f, const fe g) {
+ unsigned i;
+ for (i = 0; i < 10; i++) {
+ h[i] = f[i] + g[i];
+ }
+}
+
+/* h = f - g
+ * Can overlap h with f or g.
+ *
+ * Preconditions:
+ * |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+ * |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+ *
+ * Postconditions:
+ * |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. */
+static void fe_sub(fe h, const fe f, const fe g) {
+ unsigned i;
+ for (i = 0; i < 10; i++) {
+ h[i] = f[i] - g[i];
+ }
+}
+
+/* h = f * g
+ * Can overlap h with f or g.
+ *
+ * Preconditions:
+ * |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
+ * |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
+ *
+ * Postconditions:
+ * |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
+ *
+ * Notes on implementation strategy:
+ *
+ * Using schoolbook multiplication.
+ * Karatsuba would save a little in some cost models.
+ *
+ * Most multiplications by 2 and 19 are 32-bit precomputations;
+ * cheaper than 64-bit postcomputations.
+ *
+ * There is one remaining multiplication by 19 in the carry chain;
+ * one *19 precomputation can be merged into this,
+ * but the resulting data flow is considerably less clean.
+ *
+ * There are 12 carries below.
+ * 10 of them are 2-way parallelizable and vectorizable.
+ * Can get away with 11 carries, but then data flow is much deeper.
+ *
+ * With tighter constraints on inputs can squeeze carries into int32. */
+static void fe_mul(fe h, const fe f, const fe g) {
+ int32_t f0 = f[0];
+ int32_t f1 = f[1];
+ int32_t f2 = f[2];
+ int32_t f3 = f[3];
+ int32_t f4 = f[4];
+ int32_t f5 = f[5];
+ int32_t f6 = f[6];
+ int32_t f7 = f[7];
+ int32_t f8 = f[8];
+ int32_t f9 = f[9];
+ int32_t g0 = g[0];
+ int32_t g1 = g[1];
+ int32_t g2 = g[2];
+ int32_t g3 = g[3];
+ int32_t g4 = g[4];
+ int32_t g5 = g[5];
+ int32_t g6 = g[6];
+ int32_t g7 = g[7];
+ int32_t g8 = g[8];
+ int32_t g9 = g[9];
+ int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */
+ int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
+ int32_t g3_19 = 19 * g3;
+ int32_t g4_19 = 19 * g4;
+ int32_t g5_19 = 19 * g5;
+ int32_t g6_19 = 19 * g6;
+ int32_t g7_19 = 19 * g7;
+ int32_t g8_19 = 19 * g8;
+ int32_t g9_19 = 19 * g9;
+ int32_t f1_2 = 2 * f1;
+ int32_t f3_2 = 2 * f3;
+ int32_t f5_2 = 2 * f5;
+ int32_t f7_2 = 2 * f7;
+ int32_t f9_2 = 2 * f9;
+ int64_t f0g0 = f0 * (int64_t) g0;
+ int64_t f0g1 = f0 * (int64_t) g1;
+ int64_t f0g2 = f0 * (int64_t) g2;
+ int64_t f0g3 = f0 * (int64_t) g3;
+ int64_t f0g4 = f0 * (int64_t) g4;
+ int64_t f0g5 = f0 * (int64_t) g5;
+ int64_t f0g6 = f0 * (int64_t) g6;
+ int64_t f0g7 = f0 * (int64_t) g7;
+ int64_t f0g8 = f0 * (int64_t) g8;
+ int64_t f0g9 = f0 * (int64_t) g9;
+ int64_t f1g0 = f1 * (int64_t) g0;
+ int64_t f1g1_2 = f1_2 * (int64_t) g1;
+ int64_t f1g2 = f1 * (int64_t) g2;
+ int64_t f1g3_2 = f1_2 * (int64_t) g3;
+ int64_t f1g4 = f1 * (int64_t) g4;
+ int64_t f1g5_2 = f1_2 * (int64_t) g5;
+ int64_t f1g6 = f1 * (int64_t) g6;
+ int64_t f1g7_2 = f1_2 * (int64_t) g7;
+ int64_t f1g8 = f1 * (int64_t) g8;
+ int64_t f1g9_38 = f1_2 * (int64_t) g9_19;
+ int64_t f2g0 = f2 * (int64_t) g0;
+ int64_t f2g1 = f2 * (int64_t) g1;
+ int64_t f2g2 = f2 * (int64_t) g2;
+ int64_t f2g3 = f2 * (int64_t) g3;
+ int64_t f2g4 = f2 * (int64_t) g4;
+ int64_t f2g5 = f2 * (int64_t) g5;
+ int64_t f2g6 = f2 * (int64_t) g6;
+ int64_t f2g7 = f2 * (int64_t) g7;
+ int64_t f2g8_19 = f2 * (int64_t) g8_19;
+ int64_t f2g9_19 = f2 * (int64_t) g9_19;
+ int64_t f3g0 = f3 * (int64_t) g0;
+ int64_t f3g1_2 = f3_2 * (int64_t) g1;
+ int64_t f3g2 = f3 * (int64_t) g2;
+ int64_t f3g3_2 = f3_2 * (int64_t) g3;
+ int64_t f3g4 = f3 * (int64_t) g4;
+ int64_t f3g5_2 = f3_2 * (int64_t) g5;
+ int64_t f3g6 = f3 * (int64_t) g6;
+ int64_t f3g7_38 = f3_2 * (int64_t) g7_19;
+ int64_t f3g8_19 = f3 * (int64_t) g8_19;
+ int64_t f3g9_38 = f3_2 * (int64_t) g9_19;
+ int64_t f4g0 = f4 * (int64_t) g0;
+ int64_t f4g1 = f4 * (int64_t) g1;
+ int64_t f4g2 = f4 * (int64_t) g2;
+ int64_t f4g3 = f4 * (int64_t) g3;
+ int64_t f4g4 = f4 * (int64_t) g4;
+ int64_t f4g5 = f4 * (int64_t) g5;
+ int64_t f4g6_19 = f4 * (int64_t) g6_19;
+ int64_t f4g7_19 = f4 * (int64_t) g7_19;
+ int64_t f4g8_19 = f4 * (int64_t) g8_19;
+ int64_t f4g9_19 = f4 * (int64_t) g9_19;
+ int64_t f5g0 = f5 * (int64_t) g0;
+ int64_t f5g1_2 = f5_2 * (int64_t) g1;
+ int64_t f5g2 = f5 * (int64_t) g2;
+ int64_t f5g3_2 = f5_2 * (int64_t) g3;
+ int64_t f5g4 = f5 * (int64_t) g4;
+ int64_t f5g5_38 = f5_2 * (int64_t) g5_19;
+ int64_t f5g6_19 = f5 * (int64_t) g6_19;
+ int64_t f5g7_38 = f5_2 * (int64_t) g7_19;
+ int64_t f5g8_19 = f5 * (int64_t) g8_19;
+ int64_t f5g9_38 = f5_2 * (int64_t) g9_19;
+ int64_t f6g0 = f6 * (int64_t) g0;
+ int64_t f6g1 = f6 * (int64_t) g1;
+ int64_t f6g2 = f6 * (int64_t) g2;
+ int64_t f6g3 = f6 * (int64_t) g3;
+ int64_t f6g4_19 = f6 * (int64_t) g4_19;
+ int64_t f6g5_19 = f6 * (int64_t) g5_19;
+ int64_t f6g6_19 = f6 * (int64_t) g6_19;
+ int64_t f6g7_19 = f6 * (int64_t) g7_19;
+ int64_t f6g8_19 = f6 * (int64_t) g8_19;
+ int64_t f6g9_19 = f6 * (int64_t) g9_19;
+ int64_t f7g0 = f7 * (int64_t) g0;
+ int64_t f7g1_2 = f7_2 * (int64_t) g1;
+ int64_t f7g2 = f7 * (int64_t) g2;
+ int64_t f7g3_38 = f7_2 * (int64_t) g3_19;
+ int64_t f7g4_19 = f7 * (int64_t) g4_19;
+ int64_t f7g5_38 = f7_2 * (int64_t) g5_19;
+ int64_t f7g6_19 = f7 * (int64_t) g6_19;
+ int64_t f7g7_38 = f7_2 * (int64_t) g7_19;
+ int64_t f7g8_19 = f7 * (int64_t) g8_19;
+ int64_t f7g9_38 = f7_2 * (int64_t) g9_19;
+ int64_t f8g0 = f8 * (int64_t) g0;
+ int64_t f8g1 = f8 * (int64_t) g1;
+ int64_t f8g2_19 = f8 * (int64_t) g2_19;
+ int64_t f8g3_19 = f8 * (int64_t) g3_19;
+ int64_t f8g4_19 = f8 * (int64_t) g4_19;
+ int64_t f8g5_19 = f8 * (int64_t) g5_19;
+ int64_t f8g6_19 = f8 * (int64_t) g6_19;
+ int64_t f8g7_19 = f8 * (int64_t) g7_19;
+ int64_t f8g8_19 = f8 * (int64_t) g8_19;
+ int64_t f8g9_19 = f8 * (int64_t) g9_19;
+ int64_t f9g0 = f9 * (int64_t) g0;
+ int64_t f9g1_38 = f9_2 * (int64_t) g1_19;
+ int64_t f9g2_19 = f9 * (int64_t) g2_19;
+ int64_t f9g3_38 = f9_2 * (int64_t) g3_19;
+ int64_t f9g4_19 = f9 * (int64_t) g4_19;
+ int64_t f9g5_38 = f9_2 * (int64_t) g5_19;
+ int64_t f9g6_19 = f9 * (int64_t) g6_19;
+ int64_t f9g7_38 = f9_2 * (int64_t) g7_19;
+ int64_t f9g8_19 = f9 * (int64_t) g8_19;
+ int64_t f9g9_38 = f9_2 * (int64_t) g9_19;
+ int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38;
+ int64_t h1 = f0g1+f1g0 +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19;
+ int64_t h2 = f0g2+f1g1_2 +f2g0 +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38;
+ int64_t h3 = f0g3+f1g2 +f2g1 +f3g0 +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19;
+ int64_t h4 = f0g4+f1g3_2 +f2g2 +f3g1_2 +f4g0 +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38;
+ int64_t h5 = f0g5+f1g4 +f2g3 +f3g2 +f4g1 +f5g0 +f6g9_19+f7g8_19+f8g7_19+f9g6_19;
+ int64_t h6 = f0g6+f1g5_2 +f2g4 +f3g3_2 +f4g2 +f5g1_2 +f6g0 +f7g9_38+f8g8_19+f9g7_38;
+ int64_t h7 = f0g7+f1g6 +f2g5 +f3g4 +f4g3 +f5g2 +f6g1 +f7g0 +f8g9_19+f9g8_19;
+ int64_t h8 = f0g8+f1g7_2 +f2g6 +f3g5_2 +f4g4 +f5g3_2 +f6g2 +f7g1_2 +f8g0 +f9g9_38;
+ int64_t h9 = f0g9+f1g8 +f2g7 +f3g6 +f4g5 +f5g4 +f6g3 +f7g2 +f8g1 +f9g0 ;
+ int64_t carry0;
+ int64_t carry1;
+ int64_t carry2;
+ int64_t carry3;
+ int64_t carry4;
+ int64_t carry5;
+ int64_t carry6;
+ int64_t carry7;
+ int64_t carry8;
+ int64_t carry9;
+
+ /* |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38))
+ * i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8
+ * |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19))
+ * i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9 */
+
+ carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits;
+ carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits;
+ /* |h0| <= 2^25 */
+ /* |h4| <= 2^25 */
+ /* |h1| <= 1.71*2^59 */
+ /* |h5| <= 1.71*2^59 */
+
+ carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits;
+ carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits;
+ /* |h1| <= 2^24; from now on fits into int32 */
+ /* |h5| <= 2^24; from now on fits into int32 */
+ /* |h2| <= 1.41*2^60 */
+ /* |h6| <= 1.41*2^60 */
+
+ carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits;
+ carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits;
+ /* |h2| <= 2^25; from now on fits into int32 unchanged */
+ /* |h6| <= 2^25; from now on fits into int32 unchanged */
+ /* |h3| <= 1.71*2^59 */
+ /* |h7| <= 1.71*2^59 */
+
+ carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits;
+ carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits;
+ /* |h3| <= 2^24; from now on fits into int32 unchanged */
+ /* |h7| <= 2^24; from now on fits into int32 unchanged */
+ /* |h4| <= 1.72*2^34 */
+ /* |h8| <= 1.41*2^60 */
+
+ carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits;
+ carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits;
+ /* |h4| <= 2^25; from now on fits into int32 unchanged */
+ /* |h8| <= 2^25; from now on fits into int32 unchanged */
+ /* |h5| <= 1.01*2^24 */
+ /* |h9| <= 1.71*2^59 */
+
+ carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits;
+ /* |h9| <= 2^24; from now on fits into int32 unchanged */
+ /* |h0| <= 1.1*2^39 */
+
+ carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits;
+ /* |h0| <= 2^25; from now on fits into int32 unchanged */
+ /* |h1| <= 1.01*2^24 */
+
+ h[0] = h0;
+ h[1] = h1;
+ h[2] = h2;
+ h[3] = h3;
+ h[4] = h4;
+ h[5] = h5;
+ h[6] = h6;
+ h[7] = h7;
+ h[8] = h8;
+ h[9] = h9;
+}
+
+/* h = f * f
+ * Can overlap h with f.
+ *
+ * Preconditions:
+ * |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
+ *
+ * Postconditions:
+ * |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
+ *
+ * See fe_mul.c for discussion of implementation strategy. */
+static void fe_sq(fe h, const fe f) {
+ int32_t f0 = f[0];
+ int32_t f1 = f[1];
+ int32_t f2 = f[2];
+ int32_t f3 = f[3];
+ int32_t f4 = f[4];
+ int32_t f5 = f[5];
+ int32_t f6 = f[6];
+ int32_t f7 = f[7];
+ int32_t f8 = f[8];
+ int32_t f9 = f[9];
+ int32_t f0_2 = 2 * f0;
+ int32_t f1_2 = 2 * f1;
+ int32_t f2_2 = 2 * f2;
+ int32_t f3_2 = 2 * f3;
+ int32_t f4_2 = 2 * f4;
+ int32_t f5_2 = 2 * f5;
+ int32_t f6_2 = 2 * f6;
+ int32_t f7_2 = 2 * f7;
+ int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
+ int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
+ int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
+ int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
+ int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
+ int64_t f0f0 = f0 * (int64_t) f0;
+ int64_t f0f1_2 = f0_2 * (int64_t) f1;
+ int64_t f0f2_2 = f0_2 * (int64_t) f2;
+ int64_t f0f3_2 = f0_2 * (int64_t) f3;
+ int64_t f0f4_2 = f0_2 * (int64_t) f4;
+ int64_t f0f5_2 = f0_2 * (int64_t) f5;
+ int64_t f0f6_2 = f0_2 * (int64_t) f6;
+ int64_t f0f7_2 = f0_2 * (int64_t) f7;
+ int64_t f0f8_2 = f0_2 * (int64_t) f8;
+ int64_t f0f9_2 = f0_2 * (int64_t) f9;
+ int64_t f1f1_2 = f1_2 * (int64_t) f1;
+ int64_t f1f2_2 = f1_2 * (int64_t) f2;
+ int64_t f1f3_4 = f1_2 * (int64_t) f3_2;
+ int64_t f1f4_2 = f1_2 * (int64_t) f4;
+ int64_t f1f5_4 = f1_2 * (int64_t) f5_2;
+ int64_t f1f6_2 = f1_2 * (int64_t) f6;
+ int64_t f1f7_4 = f1_2 * (int64_t) f7_2;
+ int64_t f1f8_2 = f1_2 * (int64_t) f8;
+ int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
+ int64_t f2f2 = f2 * (int64_t) f2;
+ int64_t f2f3_2 = f2_2 * (int64_t) f3;
+ int64_t f2f4_2 = f2_2 * (int64_t) f4;
+ int64_t f2f5_2 = f2_2 * (int64_t) f5;
+ int64_t f2f6_2 = f2_2 * (int64_t) f6;
+ int64_t f2f7_2 = f2_2 * (int64_t) f7;
+ int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
+ int64_t f2f9_38 = f2 * (int64_t) f9_38;
+ int64_t f3f3_2 = f3_2 * (int64_t) f3;
+ int64_t f3f4_2 = f3_2 * (int64_t) f4;
+ int64_t f3f5_4 = f3_2 * (int64_t) f5_2;
+ int64_t f3f6_2 = f3_2 * (int64_t) f6;
+ int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
+ int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
+ int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
+ int64_t f4f4 = f4 * (int64_t) f4;
+ int64_t f4f5_2 = f4_2 * (int64_t) f5;
+ int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
+ int64_t f4f7_38 = f4 * (int64_t) f7_38;
+ int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
+ int64_t f4f9_38 = f4 * (int64_t) f9_38;
+ int64_t f5f5_38 = f5 * (int64_t) f5_38;
+ int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
+ int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
+ int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
+ int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
+ int64_t f6f6_19 = f6 * (int64_t) f6_19;
+ int64_t f6f7_38 = f6 * (int64_t) f7_38;
+ int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
+ int64_t f6f9_38 = f6 * (int64_t) f9_38;
+ int64_t f7f7_38 = f7 * (int64_t) f7_38;
+ int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
+ int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
+ int64_t f8f8_19 = f8 * (int64_t) f8_19;
+ int64_t f8f9_38 = f8 * (int64_t) f9_38;
+ int64_t f9f9_38 = f9 * (int64_t) f9_38;
+ int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
+ int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
+ int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
+ int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
+ int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38;
+ int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
+ int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
+ int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
+ int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38;
+ int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
+ int64_t carry0;
+ int64_t carry1;
+ int64_t carry2;
+ int64_t carry3;
+ int64_t carry4;
+ int64_t carry5;
+ int64_t carry6;
+ int64_t carry7;
+ int64_t carry8;
+ int64_t carry9;
+
+ carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits;
+ carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits;
+
+ carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits;
+ carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits;
+
+ carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits;
+ carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits;
+
+ carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits;
+ carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits;
+
+ carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits;
+ carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits;
+
+ carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits;
+
+ carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits;
+
+ h[0] = h0;
+ h[1] = h1;
+ h[2] = h2;
+ h[3] = h3;
+ h[4] = h4;
+ h[5] = h5;
+ h[6] = h6;
+ h[7] = h7;
+ h[8] = h8;
+ h[9] = h9;
+}
+
+static void fe_invert(fe out, const fe z) {
+ fe t0;
+ fe t1;
+ fe t2;
+ fe t3;
+ int i;
+
+ fe_sq(t0, z);
+ for (i = 1; i < 1; ++i) {
+ fe_sq(t0, t0);
+ }
+ fe_sq(t1, t0);
+ for (i = 1; i < 2; ++i) {
+ fe_sq(t1, t1);
+ }
+ fe_mul(t1, z, t1);
+ fe_mul(t0, t0, t1);
+ fe_sq(t2, t0);
+ for (i = 1; i < 1; ++i) {
+ fe_sq(t2, t2);
+ }
+ fe_mul(t1, t1, t2);
+ fe_sq(t2, t1);
+ for (i = 1; i < 5; ++i) {
+ fe_sq(t2, t2);
+ }
+ fe_mul(t1, t2, t1);
+ fe_sq(t2, t1);
+ for (i = 1; i < 10; ++i) {
+ fe_sq(t2, t2);
+ }
+ fe_mul(t2, t2, t1);
+ fe_sq(t3, t2);
+ for (i = 1; i < 20; ++i) {
+ fe_sq(t3, t3);
+ }
+ fe_mul(t2, t3, t2);
+ fe_sq(t2, t2);
+ for (i = 1; i < 10; ++i) {
+ fe_sq(t2, t2);
+ }
+ fe_mul(t1, t2, t1);
+ fe_sq(t2, t1);
+ for (i = 1; i < 50; ++i) {
+ fe_sq(t2, t2);
+ }
+ fe_mul(t2, t2, t1);
+ fe_sq(t3, t2);
+ for (i = 1; i < 100; ++i) {
+ fe_sq(t3, t3);
+ }
+ fe_mul(t2, t3, t2);
+ fe_sq(t2, t2);
+ for (i = 1; i < 50; ++i) {
+ fe_sq(t2, t2);
+ }
+ fe_mul(t1, t2, t1);
+ fe_sq(t1, t1);
+ for (i = 1; i < 5; ++i) {
+ fe_sq(t1, t1);
+ }
+ fe_mul(out, t1, t0);
+}
+
+/* h = -f
+ *
+ * Preconditions:
+ * |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+ *
+ * Postconditions:
+ * |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. */
+static void fe_neg(fe h, const fe f) {
+ unsigned i;
+ for (i = 0; i < 10; i++) {
+ h[i] = -f[i];
+ }
+}
+
+/* Replace (f,g) with (g,g) if b == 1;
+ * replace (f,g) with (f,g) if b == 0.
+ *
+ * Preconditions: b in {0,1}. */
+static void fe_cmov(fe f, const fe g, unsigned b) {
+ b = 0-b;
+ unsigned i;
+ for (i = 0; i < 10; i++) {
+ int32_t x = f[i] ^ g[i];
+ x &= b;
+ f[i] ^= x;
+ }
+}
+
+/* return 0 if f == 0
+ * return 1 if f != 0
+ *
+ * Preconditions:
+ * |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. */
+static int fe_isnonzero(const fe f) {
+ uint8_t s[32];
+ fe_tobytes(s, f);
+
+ static const uint8_t zero[32] = {0};
+ return timingsafe_memcmp(s, zero, sizeof(zero)) != 0;
+}
+
+/* return 1 if f is in {1,3,5,...,q-2}
+ * return 0 if f is in {0,2,4,...,q-1}
+ *
+ * Preconditions:
+ * |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. */
+static int fe_isnegative(const fe f) {
+ uint8_t s[32];
+ fe_tobytes(s, f);
+ return s[0] & 1;
+}
+
+/* h = 2 * f * f
+ * Can overlap h with f.
+ *
+ * Preconditions:
+ * |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
+ *
+ * Postconditions:
+ * |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
+ *
+ * See fe_mul.c for discussion of implementation strategy. */
+static void fe_sq2(fe h, const fe f) {
+ int32_t f0 = f[0];
+ int32_t f1 = f[1];
+ int32_t f2 = f[2];
+ int32_t f3 = f[3];
+ int32_t f4 = f[4];
+ int32_t f5 = f[5];
+ int32_t f6 = f[6];
+ int32_t f7 = f[7];
+ int32_t f8 = f[8];
+ int32_t f9 = f[9];
+ int32_t f0_2 = 2 * f0;
+ int32_t f1_2 = 2 * f1;
+ int32_t f2_2 = 2 * f2;
+ int32_t f3_2 = 2 * f3;
+ int32_t f4_2 = 2 * f4;
+ int32_t f5_2 = 2 * f5;
+ int32_t f6_2 = 2 * f6;
+ int32_t f7_2 = 2 * f7;
+ int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
+ int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
+ int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
+ int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
+ int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
+ int64_t f0f0 = f0 * (int64_t) f0;
+ int64_t f0f1_2 = f0_2 * (int64_t) f1;
+ int64_t f0f2_2 = f0_2 * (int64_t) f2;
+ int64_t f0f3_2 = f0_2 * (int64_t) f3;
+ int64_t f0f4_2 = f0_2 * (int64_t) f4;
+ int64_t f0f5_2 = f0_2 * (int64_t) f5;
+ int64_t f0f6_2 = f0_2 * (int64_t) f6;
+ int64_t f0f7_2 = f0_2 * (int64_t) f7;
+ int64_t f0f8_2 = f0_2 * (int64_t) f8;
+ int64_t f0f9_2 = f0_2 * (int64_t) f9;
+ int64_t f1f1_2 = f1_2 * (int64_t) f1;
+ int64_t f1f2_2 = f1_2 * (int64_t) f2;
+ int64_t f1f3_4 = f1_2 * (int64_t) f3_2;
+ int64_t f1f4_2 = f1_2 * (int64_t) f4;
+ int64_t f1f5_4 = f1_2 * (int64_t) f5_2;
+ int64_t f1f6_2 = f1_2 * (int64_t) f6;
+ int64_t f1f7_4 = f1_2 * (int64_t) f7_2;
+ int64_t f1f8_2 = f1_2 * (int64_t) f8;
+ int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
+ int64_t f2f2 = f2 * (int64_t) f2;
+ int64_t f2f3_2 = f2_2 * (int64_t) f3;
+ int64_t f2f4_2 = f2_2 * (int64_t) f4;
+ int64_t f2f5_2 = f2_2 * (int64_t) f5;
+ int64_t f2f6_2 = f2_2 * (int64_t) f6;
+ int64_t f2f7_2 = f2_2 * (int64_t) f7;
+ int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
+ int64_t f2f9_38 = f2 * (int64_t) f9_38;
+ int64_t f3f3_2 = f3_2 * (int64_t) f3;
+ int64_t f3f4_2 = f3_2 * (int64_t) f4;
+ int64_t f3f5_4 = f3_2 * (int64_t) f5_2;
+ int64_t f3f6_2 = f3_2 * (int64_t) f6;
+ int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
+ int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
+ int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
+ int64_t f4f4 = f4 * (int64_t) f4;
+ int64_t f4f5_2 = f4_2 * (int64_t) f5;
+ int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
+ int64_t f4f7_38 = f4 * (int64_t) f7_38;
+ int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
+ int64_t f4f9_38 = f4 * (int64_t) f9_38;
+ int64_t f5f5_38 = f5 * (int64_t) f5_38;
+ int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
+ int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
+ int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
+ int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
+ int64_t f6f6_19 = f6 * (int64_t) f6_19;
+ int64_t f6f7_38 = f6 * (int64_t) f7_38;
+ int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
+ int64_t f6f9_38 = f6 * (int64_t) f9_38;
+ int64_t f7f7_38 = f7 * (int64_t) f7_38;
+ int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
+ int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
+ int64_t f8f8_19 = f8 * (int64_t) f8_19;
+ int64_t f8f9_38 = f8 * (int64_t) f9_38;
+ int64_t f9f9_38 = f9 * (int64_t) f9_38;
+ int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
+ int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
+ int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
+ int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
+ int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38;
+ int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
+ int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
+ int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
+ int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38;
+ int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
+ int64_t carry0;
+ int64_t carry1;
+ int64_t carry2;
+ int64_t carry3;
+ int64_t carry4;
+ int64_t carry5;
+ int64_t carry6;
+ int64_t carry7;
+ int64_t carry8;
+ int64_t carry9;
+
+ h0 += h0;
+ h1 += h1;
+ h2 += h2;
+ h3 += h3;
+ h4 += h4;
+ h5 += h5;
+ h6 += h6;
+ h7 += h7;
+ h8 += h8;
+ h9 += h9;
+
+ carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits;
+ carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits;
+
+ carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits;
+ carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits;
+
+ carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits;
+ carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits;
+
+ carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits;
+ carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits;
+
+ carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits;
+ carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits;
+
+ carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits;
+
+ carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits;
+
+ h[0] = h0;
+ h[1] = h1;
+ h[2] = h2;
+ h[3] = h3;
+ h[4] = h4;
+ h[5] = h5;
+ h[6] = h6;
+ h[7] = h7;
+ h[8] = h8;
+ h[9] = h9;
+}
+
+static void fe_pow22523(fe out, const fe z) {
+ fe t0;
+ fe t1;
+ fe t2;
+ int i;
+
+ fe_sq(t0, z);
+ for (i = 1; i < 1; ++i) {
+ fe_sq(t0, t0);
+ }
+ fe_sq(t1, t0);
+ for (i = 1; i < 2; ++i) {
+ fe_sq(t1, t1);
+ }
+ fe_mul(t1, z, t1);
+ fe_mul(t0, t0, t1);
+ fe_sq(t0, t0);
+ for (i = 1; i < 1; ++i) {
+ fe_sq(t0, t0);
+ }
+ fe_mul(t0, t1, t0);
+ fe_sq(t1, t0);
+ for (i = 1; i < 5; ++i) {
+ fe_sq(t1, t1);
+ }
+ fe_mul(t0, t1, t0);
+ fe_sq(t1, t0);
+ for (i = 1; i < 10; ++i) {
+ fe_sq(t1, t1);
+ }
+ fe_mul(t1, t1, t0);
+ fe_sq(t2, t1);
+ for (i = 1; i < 20; ++i) {
+ fe_sq(t2, t2);
+ }
+ fe_mul(t1, t2, t1);
+ fe_sq(t1, t1);
+ for (i = 1; i < 10; ++i) {
+ fe_sq(t1, t1);
+ }
+ fe_mul(t0, t1, t0);
+ fe_sq(t1, t0);
+ for (i = 1; i < 50; ++i) {
+ fe_sq(t1, t1);
+ }
+ fe_mul(t1, t1, t0);
+ fe_sq(t2, t1);
+ for (i = 1; i < 100; ++i) {
+ fe_sq(t2, t2);
+ }
+ fe_mul(t1, t2, t1);
+ fe_sq(t1, t1);
+ for (i = 1; i < 50; ++i) {
+ fe_sq(t1, t1);
+ }
+ fe_mul(t0, t1, t0);
+ fe_sq(t0, t0);
+ for (i = 1; i < 2; ++i) {
+ fe_sq(t0, t0);
+ }
+ fe_mul(out, t0, z);
+}
+
+void x25519_ge_tobytes(uint8_t *s, const ge_p2 *h) {
+ fe recip;
+ fe x;
+ fe y;
+
+ fe_invert(recip, h->Z);
+ fe_mul(x, h->X, recip);
+ fe_mul(y, h->Y, recip);
+ fe_tobytes(s, y);
+ s[31] ^= fe_isnegative(x) << 7;
+}
+
+#ifdef ED25519
+static void ge_p3_tobytes(uint8_t *s, const ge_p3 *h) {
+ fe recip;
+ fe x;
+ fe y;
+
+ fe_invert(recip, h->Z);
+ fe_mul(x, h->X, recip);
+ fe_mul(y, h->Y, recip);
+ fe_tobytes(s, y);
+ s[31] ^= fe_isnegative(x) << 7;
+}
+#endif
+
+static const fe d = {-10913610, 13857413, -15372611, 6949391, 114729,
+ -8787816, -6275908, -3247719, -18696448, -12055116};
+
+static const fe sqrtm1 = {-32595792, -7943725, 9377950, 3500415, 12389472,
+ -272473, -25146209, -2005654, 326686, 11406482};
+
+int x25519_ge_frombytes_vartime(ge_p3 *h, const uint8_t *s) {
+ fe u;
+ fe v;
+ fe v3;
+ fe vxx;
+ fe check;
+
+ fe_frombytes(h->Y, s);
+ fe_1(h->Z);
+ fe_sq(u, h->Y);
+ fe_mul(v, u, d);
+ fe_sub(u, u, h->Z); /* u = y^2-1 */
+ fe_add(v, v, h->Z); /* v = dy^2+1 */
+
+ fe_sq(v3, v);
+ fe_mul(v3, v3, v); /* v3 = v^3 */
+ fe_sq(h->X, v3);
+ fe_mul(h->X, h->X, v);
+ fe_mul(h->X, h->X, u); /* x = uv^7 */
+
+ fe_pow22523(h->X, h->X); /* x = (uv^7)^((q-5)/8) */
+ fe_mul(h->X, h->X, v3);
+ fe_mul(h->X, h->X, u); /* x = uv^3(uv^7)^((q-5)/8) */
+
+ fe_sq(vxx, h->X);
+ fe_mul(vxx, vxx, v);
+ fe_sub(check, vxx, u); /* vx^2-u */
+ if (fe_isnonzero(check)) {
+ fe_add(check, vxx, u); /* vx^2+u */
+ if (fe_isnonzero(check)) {
+ return -1;
+ }
+ fe_mul(h->X, h->X, sqrtm1);
+ }
+
+ if (fe_isnegative(h->X) != (s[31] >> 7)) {
+ fe_neg(h->X, h->X);
+ }
+
+ fe_mul(h->T, h->X, h->Y);
+ return 0;
+}
+
+static void ge_p2_0(ge_p2 *h) {
+ fe_0(h->X);
+ fe_1(h->Y);
+ fe_1(h->Z);
+}
+
+static void ge_p3_0(ge_p3 *h) {
+ fe_0(h->X);
+ fe_1(h->Y);
+ fe_1(h->Z);
+ fe_0(h->T);
+}
+
+static void ge_cached_0(ge_cached *h) {
+ fe_1(h->YplusX);
+ fe_1(h->YminusX);
+ fe_1(h->Z);
+ fe_0(h->T2d);
+}
+
+static void ge_precomp_0(ge_precomp *h) {
+ fe_1(h->yplusx);
+ fe_1(h->yminusx);
+ fe_0(h->xy2d);
+}
+
+/* r = p */
+static void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p) {
+ fe_copy(r->X, p->X);
+ fe_copy(r->Y, p->Y);
+ fe_copy(r->Z, p->Z);
+}
+
+static const fe d2 = {-21827239, -5839606, -30745221, 13898782, 229458,
+ 15978800, -12551817, -6495438, 29715968, 9444199};
+
+/* r = p */
+void x25519_ge_p3_to_cached(ge_cached *r, const ge_p3 *p) {
+ fe_add(r->YplusX, p->Y, p->X);
+ fe_sub(r->YminusX, p->Y, p->X);
+ fe_copy(r->Z, p->Z);
+ fe_mul(r->T2d, p->T, d2);
+}
+
+/* r = p */
+void x25519_ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p) {
+ fe_mul(r->X, p->X, p->T);
+ fe_mul(r->Y, p->Y, p->Z);
+ fe_mul(r->Z, p->Z, p->T);
+}
+
+/* r = p */
+void x25519_ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p) {
+ fe_mul(r->X, p->X, p->T);
+ fe_mul(r->Y, p->Y, p->Z);
+ fe_mul(r->Z, p->Z, p->T);
+ fe_mul(r->T, p->X, p->Y);
+}
+
+/* r = p */
+static void ge_p1p1_to_cached(ge_cached *r, const ge_p1p1 *p) {
+ ge_p3 t;
+ x25519_ge_p1p1_to_p3(&t, p);
+ x25519_ge_p3_to_cached(r, &t);
+}
+
+/* r = 2 * p */
+static void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p) {
+ fe t0;
+
+ fe_sq(r->X, p->X);
+ fe_sq(r->Z, p->Y);
+ fe_sq2(r->T, p->Z);
+ fe_add(r->Y, p->X, p->Y);
+ fe_sq(t0, r->Y);
+ fe_add(r->Y, r->Z, r->X);
+ fe_sub(r->Z, r->Z, r->X);
+ fe_sub(r->X, t0, r->Y);
+ fe_sub(r->T, r->T, r->Z);
+}
+
+/* r = 2 * p */
+static void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p) {
+ ge_p2 q;
+ ge_p3_to_p2(&q, p);
+ ge_p2_dbl(r, &q);
+}
+
+/* r = p + q */
+static void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
+ fe t0;
+
+ fe_add(r->X, p->Y, p->X);
+ fe_sub(r->Y, p->Y, p->X);
+ fe_mul(r->Z, r->X, q->yplusx);
+ fe_mul(r->Y, r->Y, q->yminusx);
+ fe_mul(r->T, q->xy2d, p->T);
+ fe_add(t0, p->Z, p->Z);
+ fe_sub(r->X, r->Z, r->Y);
+ fe_add(r->Y, r->Z, r->Y);
+ fe_add(r->Z, t0, r->T);
+ fe_sub(r->T, t0, r->T);
+}
+
+#ifdef ED25519
+/* r = p - q */
+static void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
+ fe t0;
+
+ fe_add(r->X, p->Y, p->X);
+ fe_sub(r->Y, p->Y, p->X);
+ fe_mul(r->Z, r->X, q->yminusx);
+ fe_mul(r->Y, r->Y, q->yplusx);
+ fe_mul(r->T, q->xy2d, p->T);
+ fe_add(t0, p->Z, p->Z);
+ fe_sub(r->X, r->Z, r->Y);
+ fe_add(r->Y, r->Z, r->Y);
+ fe_sub(r->Z, t0, r->T);
+ fe_add(r->T, t0, r->T);
+}
+#endif
+
+/* r = p + q */
+void x25519_ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
+ fe t0;
+
+ fe_add(r->X, p->Y, p->X);
+ fe_sub(r->Y, p->Y, p->X);
+ fe_mul(r->Z, r->X, q->YplusX);
+ fe_mul(r->Y, r->Y, q->YminusX);
+ fe_mul(r->T, q->T2d, p->T);
+ fe_mul(r->X, p->Z, q->Z);
+ fe_add(t0, r->X, r->X);
+ fe_sub(r->X, r->Z, r->Y);
+ fe_add(r->Y, r->Z, r->Y);
+ fe_add(r->Z, t0, r->T);
+ fe_sub(r->T, t0, r->T);
+}
+
+/* r = p - q */
+void x25519_ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
+ fe t0;
+
+ fe_add(r->X, p->Y, p->X);
+ fe_sub(r->Y, p->Y, p->X);
+ fe_mul(r->Z, r->X, q->YminusX);
+ fe_mul(r->Y, r->Y, q->YplusX);
+ fe_mul(r->T, q->T2d, p->T);
+ fe_mul(r->X, p->Z, q->Z);
+ fe_add(t0, r->X, r->X);
+ fe_sub(r->X, r->Z, r->Y);
+ fe_add(r->Y, r->Z, r->Y);
+ fe_sub(r->Z, t0, r->T);
+ fe_add(r->T, t0, r->T);
+}
+
+static uint8_t equal(signed char b, signed char c) {
+ uint8_t ub = b;
+ uint8_t uc = c;
+ uint8_t x = ub ^ uc; /* 0: yes; 1..255: no */
+ uint32_t y = x; /* 0: yes; 1..255: no */
+ y -= 1; /* 4294967295: yes; 0..254: no */
+ y >>= 31; /* 1: yes; 0: no */
+ return y;
+}
+
+static void cmov(ge_precomp *t, const ge_precomp *u, uint8_t b) {
+ fe_cmov(t->yplusx, u->yplusx, b);
+ fe_cmov(t->yminusx, u->yminusx, b);
+ fe_cmov(t->xy2d, u->xy2d, b);
+}
+
+void x25519_ge_scalarmult_small_precomp(
+ ge_p3 *h, const uint8_t a[32], const uint8_t precomp_table[15 * 2 * 32]) {
+ /* precomp_table is first expanded into matching |ge_precomp|
+ * elements. */
+ ge_precomp multiples[15];
+
+ unsigned i;
+ for (i = 0; i < 15; i++) {
+ const uint8_t *bytes = &precomp_table[i*(2 * 32)];
+ fe x, y;
+ fe_frombytes(x, bytes);
+ fe_frombytes(y, bytes + 32);
+
+ ge_precomp *out = &multiples[i];
+ fe_add(out->yplusx, y, x);
+ fe_sub(out->yminusx, y, x);
+ fe_mul(out->xy2d, x, y);
+ fe_mul(out->xy2d, out->xy2d, d2);
+ }
+
+ /* See the comment above |k25519SmallPrecomp| about the structure of the
+ * precomputed elements. This loop does 64 additions and 64 doublings to
+ * calculate the result. */
+ ge_p3_0(h);
+
+ for (i = 63; i < 64; i--) {
+ unsigned j;
+ signed char index = 0;
+
+ for (j = 0; j < 4; j++) {
+ const uint8_t bit = 1 & (a[(8 * j) + (i / 8)] >> (i & 7));
+ index |= (bit << j);
+ }
+
+ ge_precomp e;
+ ge_precomp_0(&e);
+
+ for (j = 1; j < 16; j++) {
+ cmov(&e, &multiples[j-1], equal(index, j));
+ }
+
+ ge_cached cached;
+ ge_p1p1 r;
+ x25519_ge_p3_to_cached(&cached, h);
+ x25519_ge_add(&r, h, &cached);
+ x25519_ge_p1p1_to_p3(h, &r);
+
+ ge_madd(&r, h, &e);
+ x25519_ge_p1p1_to_p3(h, &r);
+ }
+}
+
+#if defined(OPENSSL_SMALL)
+
+/* This block of code replaces the standard base-point table with a much smaller
+ * one. The standard table is 30,720 bytes while this one is just 960.
+ *
+ * This table contains 15 pairs of group elements, (x, y), where each field
+ * element is serialised with |fe_tobytes|. If |i| is the index of the group
+ * element then consider i+1 as a four-bit number: (iâ‚€, iâ‚, iâ‚‚, i₃) (where iâ‚€
+ * is the most significant bit). The value of the group element is then:
+ * (i₀×2^192 + iâ‚×2^128 + i₂×2^64 + i₃)G, where G is the generator. */
+static const uint8_t k25519SmallPrecomp[15 * 2 * 32] = {
+ 0x1a, 0xd5, 0x25, 0x8f, 0x60, 0x2d, 0x56, 0xc9, 0xb2, 0xa7, 0x25, 0x95,
+ 0x60, 0xc7, 0x2c, 0x69, 0x5c, 0xdc, 0xd6, 0xfd, 0x31, 0xe2, 0xa4, 0xc0,
+ 0xfe, 0x53, 0x6e, 0xcd, 0xd3, 0x36, 0x69, 0x21, 0x58, 0x66, 0x66, 0x66,
+ 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66,
+ 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66,
+ 0x66, 0x66, 0x66, 0x66, 0x02, 0xa2, 0xed, 0xf4, 0x8f, 0x6b, 0x0b, 0x3e,
+ 0xeb, 0x35, 0x1a, 0xd5, 0x7e, 0xdb, 0x78, 0x00, 0x96, 0x8a, 0xa0, 0xb4,
+ 0xcf, 0x60, 0x4b, 0xd4, 0xd5, 0xf9, 0x2d, 0xbf, 0x88, 0xbd, 0x22, 0x62,
+ 0x13, 0x53, 0xe4, 0x82, 0x57, 0xfa, 0x1e, 0x8f, 0x06, 0x2b, 0x90, 0xba,
+ 0x08, 0xb6, 0x10, 0x54, 0x4f, 0x7c, 0x1b, 0x26, 0xed, 0xda, 0x6b, 0xdd,
+ 0x25, 0xd0, 0x4e, 0xea, 0x42, 0xbb, 0x25, 0x03, 0xa2, 0xfb, 0xcc, 0x61,
+ 0x67, 0x06, 0x70, 0x1a, 0xc4, 0x78, 0x3a, 0xff, 0x32, 0x62, 0xdd, 0x2c,
+ 0xab, 0x50, 0x19, 0x3b, 0xf2, 0x9b, 0x7d, 0xb8, 0xfd, 0x4f, 0x29, 0x9c,
+ 0xa7, 0x91, 0xba, 0x0e, 0x46, 0x5e, 0x51, 0xfe, 0x1d, 0xbf, 0xe5, 0xe5,
+ 0x9b, 0x95, 0x0d, 0x67, 0xf8, 0xd1, 0xb5, 0x5a, 0xa1, 0x93, 0x2c, 0xc3,
+ 0xde, 0x0e, 0x97, 0x85, 0x2d, 0x7f, 0xea, 0xab, 0x3e, 0x47, 0x30, 0x18,
+ 0x24, 0xe8, 0xb7, 0x60, 0xae, 0x47, 0x80, 0xfc, 0xe5, 0x23, 0xe7, 0xc2,
+ 0xc9, 0x85, 0xe6, 0x98, 0xa0, 0x29, 0x4e, 0xe1, 0x84, 0x39, 0x2d, 0x95,
+ 0x2c, 0xf3, 0x45, 0x3c, 0xff, 0xaf, 0x27, 0x4c, 0x6b, 0xa6, 0xf5, 0x4b,
+ 0x11, 0xbd, 0xba, 0x5b, 0x9e, 0xc4, 0xa4, 0x51, 0x1e, 0xbe, 0xd0, 0x90,
+ 0x3a, 0x9c, 0xc2, 0x26, 0xb6, 0x1e, 0xf1, 0x95, 0x7d, 0xc8, 0x6d, 0x52,
+ 0xe6, 0x99, 0x2c, 0x5f, 0x9a, 0x96, 0x0c, 0x68, 0x29, 0xfd, 0xe2, 0xfb,
+ 0xe6, 0xbc, 0xec, 0x31, 0x08, 0xec, 0xe6, 0xb0, 0x53, 0x60, 0xc3, 0x8c,
+ 0xbe, 0xc1, 0xb3, 0x8a, 0x8f, 0xe4, 0x88, 0x2b, 0x55, 0xe5, 0x64, 0x6e,
+ 0x9b, 0xd0, 0xaf, 0x7b, 0x64, 0x2a, 0x35, 0x25, 0x10, 0x52, 0xc5, 0x9e,
+ 0x58, 0x11, 0x39, 0x36, 0x45, 0x51, 0xb8, 0x39, 0x93, 0xfc, 0x9d, 0x6a,
+ 0xbe, 0x58, 0xcb, 0xa4, 0x0f, 0x51, 0x3c, 0x38, 0x05, 0xca, 0xab, 0x43,
+ 0x63, 0x0e, 0xf3, 0x8b, 0x41, 0xa6, 0xf8, 0x9b, 0x53, 0x70, 0x80, 0x53,
+ 0x86, 0x5e, 0x8f, 0xe3, 0xc3, 0x0d, 0x18, 0xc8, 0x4b, 0x34, 0x1f, 0xd8,
+ 0x1d, 0xbc, 0xf2, 0x6d, 0x34, 0x3a, 0xbe, 0xdf, 0xd9, 0xf6, 0xf3, 0x89,
+ 0xa1, 0xe1, 0x94, 0x9f, 0x5d, 0x4c, 0x5d, 0xe9, 0xa1, 0x49, 0x92, 0xef,
+ 0x0e, 0x53, 0x81, 0x89, 0x58, 0x87, 0xa6, 0x37, 0xf1, 0xdd, 0x62, 0x60,
+ 0x63, 0x5a, 0x9d, 0x1b, 0x8c, 0xc6, 0x7d, 0x52, 0xea, 0x70, 0x09, 0x6a,
+ 0xe1, 0x32, 0xf3, 0x73, 0x21, 0x1f, 0x07, 0x7b, 0x7c, 0x9b, 0x49, 0xd8,
+ 0xc0, 0xf3, 0x25, 0x72, 0x6f, 0x9d, 0xed, 0x31, 0x67, 0x36, 0x36, 0x54,
+ 0x40, 0x92, 0x71, 0xe6, 0x11, 0x28, 0x11, 0xad, 0x93, 0x32, 0x85, 0x7b,
+ 0x3e, 0xb7, 0x3b, 0x49, 0x13, 0x1c, 0x07, 0xb0, 0x2e, 0x93, 0xaa, 0xfd,
+ 0xfd, 0x28, 0x47, 0x3d, 0x8d, 0xd2, 0xda, 0xc7, 0x44, 0xd6, 0x7a, 0xdb,
+ 0x26, 0x7d, 0x1d, 0xb8, 0xe1, 0xde, 0x9d, 0x7a, 0x7d, 0x17, 0x7e, 0x1c,
+ 0x37, 0x04, 0x8d, 0x2d, 0x7c, 0x5e, 0x18, 0x38, 0x1e, 0xaf, 0xc7, 0x1b,
+ 0x33, 0x48, 0x31, 0x00, 0x59, 0xf6, 0xf2, 0xca, 0x0f, 0x27, 0x1b, 0x63,
+ 0x12, 0x7e, 0x02, 0x1d, 0x49, 0xc0, 0x5d, 0x79, 0x87, 0xef, 0x5e, 0x7a,
+ 0x2f, 0x1f, 0x66, 0x55, 0xd8, 0x09, 0xd9, 0x61, 0x38, 0x68, 0xb0, 0x07,
+ 0xa3, 0xfc, 0xcc, 0x85, 0x10, 0x7f, 0x4c, 0x65, 0x65, 0xb3, 0xfa, 0xfa,
+ 0xa5, 0x53, 0x6f, 0xdb, 0x74, 0x4c, 0x56, 0x46, 0x03, 0xe2, 0xd5, 0x7a,
+ 0x29, 0x1c, 0xc6, 0x02, 0xbc, 0x59, 0xf2, 0x04, 0x75, 0x63, 0xc0, 0x84,
+ 0x2f, 0x60, 0x1c, 0x67, 0x76, 0xfd, 0x63, 0x86, 0xf3, 0xfa, 0xbf, 0xdc,
+ 0xd2, 0x2d, 0x90, 0x91, 0xbd, 0x33, 0xa9, 0xe5, 0x66, 0x0c, 0xda, 0x42,
+ 0x27, 0xca, 0xf4, 0x66, 0xc2, 0xec, 0x92, 0x14, 0x57, 0x06, 0x63, 0xd0,
+ 0x4d, 0x15, 0x06, 0xeb, 0x69, 0x58, 0x4f, 0x77, 0xc5, 0x8b, 0xc7, 0xf0,
+ 0x8e, 0xed, 0x64, 0xa0, 0xb3, 0x3c, 0x66, 0x71, 0xc6, 0x2d, 0xda, 0x0a,
+ 0x0d, 0xfe, 0x70, 0x27, 0x64, 0xf8, 0x27, 0xfa, 0xf6, 0x5f, 0x30, 0xa5,
+ 0x0d, 0x6c, 0xda, 0xf2, 0x62, 0x5e, 0x78, 0x47, 0xd3, 0x66, 0x00, 0x1c,
+ 0xfd, 0x56, 0x1f, 0x5d, 0x3f, 0x6f, 0xf4, 0x4c, 0xd8, 0xfd, 0x0e, 0x27,
+ 0xc9, 0x5c, 0x2b, 0xbc, 0xc0, 0xa4, 0xe7, 0x23, 0x29, 0x02, 0x9f, 0x31,
+ 0xd6, 0xe9, 0xd7, 0x96, 0xf4, 0xe0, 0x5e, 0x0b, 0x0e, 0x13, 0xee, 0x3c,
+ 0x09, 0xed, 0xf2, 0x3d, 0x76, 0x91, 0xc3, 0xa4, 0x97, 0xae, 0xd4, 0x87,
+ 0xd0, 0x5d, 0xf6, 0x18, 0x47, 0x1f, 0x1d, 0x67, 0xf2, 0xcf, 0x63, 0xa0,
+ 0x91, 0x27, 0xf8, 0x93, 0x45, 0x75, 0x23, 0x3f, 0xd1, 0xf1, 0xad, 0x23,
+ 0xdd, 0x64, 0x93, 0x96, 0x41, 0x70, 0x7f, 0xf7, 0xf5, 0xa9, 0x89, 0xa2,
+ 0x34, 0xb0, 0x8d, 0x1b, 0xae, 0x19, 0x15, 0x49, 0x58, 0x23, 0x6d, 0x87,
+ 0x15, 0x4f, 0x81, 0x76, 0xfb, 0x23, 0xb5, 0xea, 0xcf, 0xac, 0x54, 0x8d,
+ 0x4e, 0x42, 0x2f, 0xeb, 0x0f, 0x63, 0xdb, 0x68, 0x37, 0xa8, 0xcf, 0x8b,
+ 0xab, 0xf5, 0xa4, 0x6e, 0x96, 0x2a, 0xb2, 0xd6, 0xbe, 0x9e, 0xbd, 0x0d,
+ 0xb4, 0x42, 0xa9, 0xcf, 0x01, 0x83, 0x8a, 0x17, 0x47, 0x76, 0xc4, 0xc6,
+ 0x83, 0x04, 0x95, 0x0b, 0xfc, 0x11, 0xc9, 0x62, 0xb8, 0x0c, 0x76, 0x84,
+ 0xd9, 0xb9, 0x37, 0xfa, 0xfc, 0x7c, 0xc2, 0x6d, 0x58, 0x3e, 0xb3, 0x04,
+ 0xbb, 0x8c, 0x8f, 0x48, 0xbc, 0x91, 0x27, 0xcc, 0xf9, 0xb7, 0x22, 0x19,
+ 0x83, 0x2e, 0x09, 0xb5, 0x72, 0xd9, 0x54, 0x1c, 0x4d, 0xa1, 0xea, 0x0b,
+ 0xf1, 0xc6, 0x08, 0x72, 0x46, 0x87, 0x7a, 0x6e, 0x80, 0x56, 0x0a, 0x8a,
+ 0xc0, 0xdd, 0x11, 0x6b, 0xd6, 0xdd, 0x47, 0xdf, 0x10, 0xd9, 0xd8, 0xea,
+ 0x7c, 0xb0, 0x8f, 0x03, 0x00, 0x2e, 0xc1, 0x8f, 0x44, 0xa8, 0xd3, 0x30,
+ 0x06, 0x89, 0xa2, 0xf9, 0x34, 0xad, 0xdc, 0x03, 0x85, 0xed, 0x51, 0xa7,
+ 0x82, 0x9c, 0xe7, 0x5d, 0x52, 0x93, 0x0c, 0x32, 0x9a, 0x5b, 0xe1, 0xaa,
+ 0xca, 0xb8, 0x02, 0x6d, 0x3a, 0xd4, 0xb1, 0x3a, 0xf0, 0x5f, 0xbe, 0xb5,
+ 0x0d, 0x10, 0x6b, 0x38, 0x32, 0xac, 0x76, 0x80, 0xbd, 0xca, 0x94, 0x71,
+ 0x7a, 0xf2, 0xc9, 0x35, 0x2a, 0xde, 0x9f, 0x42, 0x49, 0x18, 0x01, 0xab,
+ 0xbc, 0xef, 0x7c, 0x64, 0x3f, 0x58, 0x3d, 0x92, 0x59, 0xdb, 0x13, 0xdb,
+ 0x58, 0x6e, 0x0a, 0xe0, 0xb7, 0x91, 0x4a, 0x08, 0x20, 0xd6, 0x2e, 0x3c,
+ 0x45, 0xc9, 0x8b, 0x17, 0x79, 0xe7, 0xc7, 0x90, 0x99, 0x3a, 0x18, 0x25,
+};
+
+void x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t a[32]) {
+ x25519_ge_scalarmult_small_precomp(h, a, k25519SmallPrecomp);
+}
+
+#else
+
+/* k25519Precomp[i][j] = (j+1)*256^i*B */
+static const ge_precomp k25519Precomp[32][8] = {
+ {
+ {
+ {25967493, -14356035, 29566456, 3660896, -12694345, 4014787,
+ 27544626, -11754271, -6079156, 2047605},
+ {-12545711, 934262, -2722910, 3049990, -727428, 9406986, 12720692,
+ 5043384, 19500929, -15469378},
+ {-8738181, 4489570, 9688441, -14785194, 10184609, -12363380,
+ 29287919, 11864899, -24514362, -4438546},
+ },
+ {
+ {-12815894, -12976347, -21581243, 11784320, -25355658, -2750717,
+ -11717903, -3814571, -358445, -10211303},
+ {-21703237, 6903825, 27185491, 6451973, -29577724, -9554005,
+ -15616551, 11189268, -26829678, -5319081},
+ {26966642, 11152617, 32442495, 15396054, 14353839, -12752335,
+ -3128826, -9541118, -15472047, -4166697},
+ },
+ {
+ {15636291, -9688557, 24204773, -7912398, 616977, -16685262,
+ 27787600, -14772189, 28944400, -1550024},
+ {16568933, 4717097, -11556148, -1102322, 15682896, -11807043,
+ 16354577, -11775962, 7689662, 11199574},
+ {30464156, -5976125, -11779434, -15670865, 23220365, 15915852,
+ 7512774, 10017326, -17749093, -9920357},
+ },
+ {
+ {-17036878, 13921892, 10945806, -6033431, 27105052, -16084379,
+ -28926210, 15006023, 3284568, -6276540},
+ {23599295, -8306047, -11193664, -7687416, 13236774, 10506355,
+ 7464579, 9656445, 13059162, 10374397},
+ {7798556, 16710257, 3033922, 2874086, 28997861, 2835604, 32406664,
+ -3839045, -641708, -101325},
+ },
+ {
+ {10861363, 11473154, 27284546, 1981175, -30064349, 12577861,
+ 32867885, 14515107, -15438304, 10819380},
+ {4708026, 6336745, 20377586, 9066809, -11272109, 6594696, -25653668,
+ 12483688, -12668491, 5581306},
+ {19563160, 16186464, -29386857, 4097519, 10237984, -4348115,
+ 28542350, 13850243, -23678021, -15815942},
+ },
+ {
+ {-15371964, -12862754, 32573250, 4720197, -26436522, 5875511,
+ -19188627, -15224819, -9818940, -12085777},
+ {-8549212, 109983, 15149363, 2178705, 22900618, 4543417, 3044240,
+ -15689887, 1762328, 14866737},
+ {-18199695, -15951423, -10473290, 1707278, -17185920, 3916101,
+ -28236412, 3959421, 27914454, 4383652},
+ },
+ {
+ {5153746, 9909285, 1723747, -2777874, 30523605, 5516873, 19480852,
+ 5230134, -23952439, -15175766},
+ {-30269007, -3463509, 7665486, 10083793, 28475525, 1649722,
+ 20654025, 16520125, 30598449, 7715701},
+ {28881845, 14381568, 9657904, 3680757, -20181635, 7843316,
+ -31400660, 1370708, 29794553, -1409300},
+ },
+ {
+ {14499471, -2729599, -33191113, -4254652, 28494862, 14271267,
+ 30290735, 10876454, -33154098, 2381726},
+ {-7195431, -2655363, -14730155, 462251, -27724326, 3941372,
+ -6236617, 3696005, -32300832, 15351955},
+ {27431194, 8222322, 16448760, -3907995, -18707002, 11938355,
+ -32961401, -2970515, 29551813, 10109425},
+ },
+ },
+ {
+ {
+ {-13657040, -13155431, -31283750, 11777098, 21447386, 6519384,
+ -2378284, -1627556, 10092783, -4764171},
+ {27939166, 14210322, 4677035, 16277044, -22964462, -12398139,
+ -32508754, 12005538, -17810127, 12803510},
+ {17228999, -15661624, -1233527, 300140, -1224870, -11714777,
+ 30364213, -9038194, 18016357, 4397660},
+ },
+ {
+ {-10958843, -7690207, 4776341, -14954238, 27850028, -15602212,
+ -26619106, 14544525, -17477504, 982639},
+ {29253598, 15796703, -2863982, -9908884, 10057023, 3163536, 7332899,
+ -4120128, -21047696, 9934963},
+ {5793303, 16271923, -24131614, -10116404, 29188560, 1206517,
+ -14747930, 4559895, -30123922, -10897950},
+ },
+ {
+ {-27643952, -11493006, 16282657, -11036493, 28414021, -15012264,
+ 24191034, 4541697, -13338309, 5500568},
+ {12650548, -1497113, 9052871, 11355358, -17680037, -8400164,
+ -17430592, 12264343, 10874051, 13524335},
+ {25556948, -3045990, 714651, 2510400, 23394682, -10415330, 33119038,
+ 5080568, -22528059, 5376628},
+ },
+ {
+ {-26088264, -4011052, -17013699, -3537628, -6726793, 1920897,
+ -22321305, -9447443, 4535768, 1569007},
+ {-2255422, 14606630, -21692440, -8039818, 28430649, 8775819,
+ -30494562, 3044290, 31848280, 12543772},
+ {-22028579, 2943893, -31857513, 6777306, 13784462, -4292203,
+ -27377195, -2062731, 7718482, 14474653},
+ },
+ {
+ {2385315, 2454213, -22631320, 46603, -4437935, -15680415, 656965,
+ -7236665, 24316168, -5253567},
+ {13741529, 10911568, -33233417, -8603737, -20177830, -1033297,
+ 33040651, -13424532, -20729456, 8321686},
+ {21060490, -2212744, 15712757, -4336099, 1639040, 10656336,
+ 23845965, -11874838, -9984458, 608372},
+ },
+ {
+ {-13672732, -15087586, -10889693, -7557059, -6036909, 11305547,
+ 1123968, -6780577, 27229399, 23887},
+ {-23244140, -294205, -11744728, 14712571, -29465699, -2029617,
+ 12797024, -6440308, -1633405, 16678954},
+ {-29500620, 4770662, -16054387, 14001338, 7830047, 9564805,
+ -1508144, -4795045, -17169265, 4904953},
+ },
+ {
+ {24059557, 14617003, 19037157, -15039908, 19766093, -14906429,
+ 5169211, 16191880, 2128236, -4326833},
+ {-16981152, 4124966, -8540610, -10653797, 30336522, -14105247,
+ -29806336, 916033, -6882542, -2986532},
+ {-22630907, 12419372, -7134229, -7473371, -16478904, 16739175,
+ 285431, 2763829, 15736322, 4143876},
+ },
+ {
+ {2379352, 11839345, -4110402, -5988665, 11274298, 794957, 212801,
+ -14594663, 23527084, -16458268},
+ {33431127, -11130478, -17838966, -15626900, 8909499, 8376530,
+ -32625340, 4087881, -15188911, -14416214},
+ {1767683, 7197987, -13205226, -2022635, -13091350, 448826, 5799055,
+ 4357868, -4774191, -16323038},
+ },
+ },
+ {
+ {
+ {6721966, 13833823, -23523388, -1551314, 26354293, -11863321,
+ 23365147, -3949732, 7390890, 2759800},
+ {4409041, 2052381, 23373853, 10530217, 7676779, -12885954, 21302353,
+ -4264057, 1244380, -12919645},
+ {-4421239, 7169619, 4982368, -2957590, 30256825, -2777540, 14086413,
+ 9208236, 15886429, 16489664},
+ },
+ {
+ {1996075, 10375649, 14346367, 13311202, -6874135, -16438411,
+ -13693198, 398369, -30606455, -712933},
+ {-25307465, 9795880, -2777414, 14878809, -33531835, 14780363,
+ 13348553, 12076947, -30836462, 5113182},
+ {-17770784, 11797796, 31950843, 13929123, -25888302, 12288344,
+ -30341101, -7336386, 13847711, 5387222},
+ },
+ {
+ {-18582163, -3416217, 17824843, -2340966, 22744343, -10442611,
+ 8763061, 3617786, -19600662, 10370991},
+ {20246567, -14369378, 22358229, -543712, 18507283, -10413996,
+ 14554437, -8746092, 32232924, 16763880},
+ {9648505, 10094563, 26416693, 14745928, -30374318, -6472621,
+ 11094161, 15689506, 3140038, -16510092},
+ },
+ {
+ {-16160072, 5472695, 31895588, 4744994, 8823515, 10365685,
+ -27224800, 9448613, -28774454, 366295},
+ {19153450, 11523972, -11096490, -6503142, -24647631, 5420647,
+ 28344573, 8041113, 719605, 11671788},
+ {8678025, 2694440, -6808014, 2517372, 4964326, 11152271, -15432916,
+ -15266516, 27000813, -10195553},
+ },
+ {
+ {-15157904, 7134312, 8639287, -2814877, -7235688, 10421742, 564065,
+ 5336097, 6750977, -14521026},
+ {11836410, -3979488, 26297894, 16080799, 23455045, 15735944,
+ 1695823, -8819122, 8169720, 16220347},
+ {-18115838, 8653647, 17578566, -6092619, -8025777, -16012763,
+ -11144307, -2627664, -5990708, -14166033},
+ },
+ {
+ {-23308498, -10968312, 15213228, -10081214, -30853605, -11050004,
+ 27884329, 2847284, 2655861, 1738395},
+ {-27537433, -14253021, -25336301, -8002780, -9370762, 8129821,
+ 21651608, -3239336, -19087449, -11005278},
+ {1533110, 3437855, 23735889, 459276, 29970501, 11335377, 26030092,
+ 5821408, 10478196, 8544890},
+ },
+ {
+ {32173121, -16129311, 24896207, 3921497, 22579056, -3410854,
+ 19270449, 12217473, 17789017, -3395995},
+ {-30552961, -2228401, -15578829, -10147201, 13243889, 517024,
+ 15479401, -3853233, 30460520, 1052596},
+ {-11614875, 13323618, 32618793, 8175907, -15230173, 12596687,
+ 27491595, -4612359, 3179268, -9478891},
+ },
+ {
+ {31947069, -14366651, -4640583, -15339921, -15125977, -6039709,
+ -14756777, -16411740, 19072640, -9511060},
+ {11685058, 11822410, 3158003, -13952594, 33402194, -4165066,
+ 5977896, -5215017, 473099, 5040608},
+ {-20290863, 8198642, -27410132, 11602123, 1290375, -2799760,
+ 28326862, 1721092, -19558642, -3131606},
+ },
+ },
+ {
+ {
+ {7881532, 10687937, 7578723, 7738378, -18951012, -2553952, 21820786,
+ 8076149, -27868496, 11538389},
+ {-19935666, 3899861, 18283497, -6801568, -15728660, -11249211,
+ 8754525, 7446702, -5676054, 5797016},
+ {-11295600, -3793569, -15782110, -7964573, 12708869, -8456199,
+ 2014099, -9050574, -2369172, -5877341},
+ },
+ {
+ {-22472376, -11568741, -27682020, 1146375, 18956691, 16640559,
+ 1192730, -3714199, 15123619, 10811505},
+ {14352098, -3419715, -18942044, 10822655, 32750596, 4699007, -70363,
+ 15776356, -28886779, -11974553},
+ {-28241164, -8072475, -4978962, -5315317, 29416931, 1847569,
+ -20654173, -16484855, 4714547, -9600655},
+ },
+ {
+ {15200332, 8368572, 19679101, 15970074, -31872674, 1959451,
+ 24611599, -4543832, -11745876, 12340220},
+ {12876937, -10480056, 33134381, 6590940, -6307776, 14872440,
+ 9613953, 8241152, 15370987, 9608631},
+ {-4143277, -12014408, 8446281, -391603, 4407738, 13629032, -7724868,
+ 15866074, -28210621, -8814099},
+ },
+ {
+ {26660628, -15677655, 8393734, 358047, -7401291, 992988, -23904233,
+ 858697, 20571223, 8420556},
+ {14620715, 13067227, -15447274, 8264467, 14106269, 15080814,
+ 33531827, 12516406, -21574435, -12476749},
+ {236881, 10476226, 57258, -14677024, 6472998, 2466984, 17258519,
+ 7256740, 8791136, 15069930},
+ },
+ {
+ {1276410, -9371918, 22949635, -16322807, -23493039, -5702186,
+ 14711875, 4874229, -30663140, -2331391},
+ {5855666, 4990204, -13711848, 7294284, -7804282, 1924647, -1423175,
+ -7912378, -33069337, 9234253},
+ {20590503, -9018988, 31529744, -7352666, -2706834, 10650548,
+ 31559055, -11609587, 18979186, 13396066},
+ },
+ {
+ {24474287, 4968103, 22267082, 4407354, 24063882, -8325180,
+ -18816887, 13594782, 33514650, 7021958},
+ {-11566906, -6565505, -21365085, 15928892, -26158305, 4315421,
+ -25948728, -3916677, -21480480, 12868082},
+ {-28635013, 13504661, 19988037, -2132761, 21078225, 6443208,
+ -21446107, 2244500, -12455797, -8089383},
+ },
+ {
+ {-30595528, 13793479, -5852820, 319136, -25723172, -6263899,
+ 33086546, 8957937, -15233648, 5540521},
+ {-11630176, -11503902, -8119500, -7643073, 2620056, 1022908,
+ -23710744, -1568984, -16128528, -14962807},
+ {23152971, 775386, 27395463, 14006635, -9701118, 4649512, 1689819,
+ 892185, -11513277, -15205948},
+ },
+ {
+ {9770129, 9586738, 26496094, 4324120, 1556511, -3550024, 27453819,
+ 4763127, -19179614, 5867134},
+ {-32765025, 1927590, 31726409, -4753295, 23962434, -16019500,
+ 27846559, 5931263, -29749703, -16108455},
+ {27461885, -2977536, 22380810, 1815854, -23033753, -3031938,
+ 7283490, -15148073, -19526700, 7734629},
+ },
+ },
+ {
+ {
+ {-8010264, -9590817, -11120403, 6196038, 29344158, -13430885,
+ 7585295, -3176626, 18549497, 15302069},
+ {-32658337, -6171222, -7672793, -11051681, 6258878, 13504381,
+ 10458790, -6418461, -8872242, 8424746},
+ {24687205, 8613276, -30667046, -3233545, 1863892, -1830544,
+ 19206234, 7134917, -11284482, -828919},
+ },
+ {
+ {11334899, -9218022, 8025293, 12707519, 17523892, -10476071,
+ 10243738, -14685461, -5066034, 16498837},
+ {8911542, 6887158, -9584260, -6958590, 11145641, -9543680, 17303925,
+ -14124238, 6536641, 10543906},
+ {-28946384, 15479763, -17466835, 568876, -1497683, 11223454,
+ -2669190, -16625574, -27235709, 8876771},
+ },
+ {
+ {-25742899, -12566864, -15649966, -846607, -33026686, -796288,
+ -33481822, 15824474, -604426, -9039817},
+ {10330056, 70051, 7957388, -9002667, 9764902, 15609756, 27698697,
+ -4890037, 1657394, 3084098},
+ {10477963, -7470260, 12119566, -13250805, 29016247, -5365589,
+ 31280319, 14396151, -30233575, 15272409},
+ },
+ {
+ {-12288309, 3169463, 28813183, 16658753, 25116432, -5630466,
+ -25173957, -12636138, -25014757, 1950504},
+ {-26180358, 9489187, 11053416, -14746161, -31053720, 5825630,
+ -8384306, -8767532, 15341279, 8373727},
+ {28685821, 7759505, -14378516, -12002860, -31971820, 4079242,
+ 298136, -10232602, -2878207, 15190420},
+ },
+ {
+ {-32932876, 13806336, -14337485, -15794431, -24004620, 10940928,
+ 8669718, 2742393, -26033313, -6875003},
+ {-1580388, -11729417, -25979658, -11445023, -17411874, -10912854,
+ 9291594, -16247779, -12154742, 6048605},
+ {-30305315, 14843444, 1539301, 11864366, 20201677, 1900163,
+ 13934231, 5128323, 11213262, 9168384},
+ },
+ {
+ {-26280513, 11007847, 19408960, -940758, -18592965, -4328580,
+ -5088060, -11105150, 20470157, -16398701},
+ {-23136053, 9282192, 14855179, -15390078, -7362815, -14408560,
+ -22783952, 14461608, 14042978, 5230683},
+ {29969567, -2741594, -16711867, -8552442, 9175486, -2468974,
+ 21556951, 3506042, -5933891, -12449708},
+ },
+ {
+ {-3144746, 8744661, 19704003, 4581278, -20430686, 6830683,
+ -21284170, 8971513, -28539189, 15326563},
+ {-19464629, 10110288, -17262528, -3503892, -23500387, 1355669,
+ -15523050, 15300988, -20514118, 9168260},
+ {-5353335, 4488613, -23803248, 16314347, 7780487, -15638939,
+ -28948358, 9601605, 33087103, -9011387},
+ },
+ {
+ {-19443170, -15512900, -20797467, -12445323, -29824447, 10229461,
+ -27444329, -15000531, -5996870, 15664672},
+ {23294591, -16632613, -22650781, -8470978, 27844204, 11461195,
+ 13099750, -2460356, 18151676, 13417686},
+ {-24722913, -4176517, -31150679, 5988919, -26858785, 6685065,
+ 1661597, -12551441, 15271676, -15452665},
+ },
+ },
+ {
+ {
+ {11433042, -13228665, 8239631, -5279517, -1985436, -725718,
+ -18698764, 2167544, -6921301, -13440182},
+ {-31436171, 15575146, 30436815, 12192228, -22463353, 9395379,
+ -9917708, -8638997, 12215110, 12028277},
+ {14098400, 6555944, 23007258, 5757252, -15427832, -12950502,
+ 30123440, 4617780, -16900089, -655628},
+ },
+ {
+ {-4026201, -15240835, 11893168, 13718664, -14809462, 1847385,
+ -15819999, 10154009, 23973261, -12684474},
+ {-26531820, -3695990, -1908898, 2534301, -31870557, -16550355,
+ 18341390, -11419951, 32013174, -10103539},
+ {-25479301, 10876443, -11771086, -14625140, -12369567, 1838104,
+ 21911214, 6354752, 4425632, -837822},
+ },
+ {
+ {-10433389, -14612966, 22229858, -3091047, -13191166, 776729,
+ -17415375, -12020462, 4725005, 14044970},
+ {19268650, -7304421, 1555349, 8692754, -21474059, -9910664, 6347390,
+ -1411784, -19522291, -16109756},
+ {-24864089, 12986008, -10898878, -5558584, -11312371, -148526,
+ 19541418, 8180106, 9282262, 10282508},
+ },
+ {
+ {-26205082, 4428547, -8661196, -13194263, 4098402, -14165257,
+ 15522535, 8372215, 5542595, -10702683},
+ {-10562541, 14895633, 26814552, -16673850, -17480754, -2489360,
+ -2781891, 6993761, -18093885, 10114655},
+ {-20107055, -929418, 31422704, 10427861, -7110749, 6150669,
+ -29091755, -11529146, 25953725, -106158},
+ },
+ {
+ {-4234397, -8039292, -9119125, 3046000, 2101609, -12607294,
+ 19390020, 6094296, -3315279, 12831125},
+ {-15998678, 7578152, 5310217, 14408357, -33548620, -224739,
+ 31575954, 6326196, 7381791, -2421839},
+ {-20902779, 3296811, 24736065, -16328389, 18374254, 7318640,
+ 6295303, 8082724, -15362489, 12339664},
+ },
+ {
+ {27724736, 2291157, 6088201, -14184798, 1792727, 5857634, 13848414,
+ 15768922, 25091167, 14856294},
+ {-18866652, 8331043, 24373479, 8541013, -701998, -9269457, 12927300,
+ -12695493, -22182473, -9012899},
+ {-11423429, -5421590, 11632845, 3405020, 30536730, -11674039,
+ -27260765, 13866390, 30146206, 9142070},
+ },
+ {
+ {3924129, -15307516, -13817122, -10054960, 12291820, -668366,
+ -27702774, 9326384, -8237858, 4171294},
+ {-15921940, 16037937, 6713787, 16606682, -21612135, 2790944,
+ 26396185, 3731949, 345228, -5462949},
+ {-21327538, 13448259, 25284571, 1143661, 20614966, -8849387,
+ 2031539, -12391231, -16253183, -13582083},
+ },
+ {
+ {31016211, -16722429, 26371392, -14451233, -5027349, 14854137,
+ 17477601, 3842657, 28012650, -16405420},
+ {-5075835, 9368966, -8562079, -4600902, -15249953, 6970560,
+ -9189873, 16292057, -8867157, 3507940},
+ {29439664, 3537914, 23333589, 6997794, -17555561, -11018068,
+ -15209202, -15051267, -9164929, 6580396},
+ },
+ },
+ {
+ {
+ {-12185861, -7679788, 16438269, 10826160, -8696817, -6235611,
+ 17860444, -9273846, -2095802, 9304567},
+ {20714564, -4336911, 29088195, 7406487, 11426967, -5095705,
+ 14792667, -14608617, 5289421, -477127},
+ {-16665533, -10650790, -6160345, -13305760, 9192020, -1802462,
+ 17271490, 12349094, 26939669, -3752294},
+ },
+ {
+ {-12889898, 9373458, 31595848, 16374215, 21471720, 13221525,
+ -27283495, -12348559, -3698806, 117887},
+ {22263325, -6560050, 3984570, -11174646, -15114008, -566785,
+ 28311253, 5358056, -23319780, 541964},
+ {16259219, 3261970, 2309254, -15534474, -16885711, -4581916,
+ 24134070, -16705829, -13337066, -13552195},
+ },
+ {
+ {9378160, -13140186, -22845982, -12745264, 28198281, -7244098,
+ -2399684, -717351, 690426, 14876244},
+ {24977353, -314384, -8223969, -13465086, 28432343, -1176353,
+ -13068804, -12297348, -22380984, 6618999},
+ {-1538174, 11685646, 12944378, 13682314, -24389511, -14413193,
+ 8044829, -13817328, 32239829, -5652762},
+ },
+ {
+ {-18603066, 4762990, -926250, 8885304, -28412480, -3187315, 9781647,
+ -10350059, 32779359, 5095274},
+ {-33008130, -5214506, -32264887, -3685216, 9460461, -9327423,
+ -24601656, 14506724, 21639561, -2630236},
+ {-16400943, -13112215, 25239338, 15531969, 3987758, -4499318,
+ -1289502, -6863535, 17874574, 558605},
+ },
+ {
+ {-13600129, 10240081, 9171883, 16131053, -20869254, 9599700,
+ 33499487, 5080151, 2085892, 5119761},
+ {-22205145, -2519528, -16381601, 414691, -25019550, 2170430,
+ 30634760, -8363614, -31999993, -5759884},
+ {-6845704, 15791202, 8550074, -1312654, 29928809, -12092256,
+ 27534430, -7192145, -22351378, 12961482},
+ },
+ {
+ {-24492060, -9570771, 10368194, 11582341, -23397293, -2245287,
+ 16533930, 8206996, -30194652, -5159638},
+ {-11121496, -3382234, 2307366, 6362031, -135455, 8868177, -16835630,
+ 7031275, 7589640, 8945490},
+ {-32152748, 8917967, 6661220, -11677616, -1192060, -15793393,
+ 7251489, -11182180, 24099109, -14456170},
+ },
+ {
+ {5019558, -7907470, 4244127, -14714356, -26933272, 6453165,
+ -19118182, -13289025, -6231896, -10280736},
+ {10853594, 10721687, 26480089, 5861829, -22995819, 1972175,
+ -1866647, -10557898, -3363451, -6441124},
+ {-17002408, 5906790, 221599, -6563147, 7828208, -13248918, 24362661,
+ -2008168, -13866408, 7421392},
+ },
+ {
+ {8139927, -6546497, 32257646, -5890546, 30375719, 1886181,
+ -21175108, 15441252, 28826358, -4123029},
+ {6267086, 9695052, 7709135, -16603597, -32869068, -1886135,
+ 14795160, -7840124, 13746021, -1742048},
+ {28584902, 7787108, -6732942, -15050729, 22846041, -7571236,
+ -3181936, -363524, 4771362, -8419958},
+ },
+ },
+ {
+ {
+ {24949256, 6376279, -27466481, -8174608, -18646154, -9930606,
+ 33543569, -12141695, 3569627, 11342593},
+ {26514989, 4740088, 27912651, 3697550, 19331575, -11472339, 6809886,
+ 4608608, 7325975, -14801071},
+ {-11618399, -14554430, -24321212, 7655128, -1369274, 5214312,
+ -27400540, 10258390, -17646694, -8186692},
+ },
+ {
+ {11431204, 15823007, 26570245, 14329124, 18029990, 4796082,
+ -31446179, 15580664, 9280358, -3973687},
+ {-160783, -10326257, -22855316, -4304997, -20861367, -13621002,
+ -32810901, -11181622, -15545091, 4387441},
+ {-20799378, 12194512, 3937617, -5805892, -27154820, 9340370,
+ -24513992, 8548137, 20617071, -7482001},
+ },
+ {
+ {-938825, -3930586, -8714311, 16124718, 24603125, -6225393,
+ -13775352, -11875822, 24345683, 10325460},
+ {-19855277, -1568885, -22202708, 8714034, 14007766, 6928528,
+ 16318175, -1010689, 4766743, 3552007},
+ {-21751364, -16730916, 1351763, -803421, -4009670, 3950935, 3217514,
+ 14481909, 10988822, -3994762},
+ },
+ {
+ {15564307, -14311570, 3101243, 5684148, 30446780, -8051356,
+ 12677127, -6505343, -8295852, 13296005},
+ {-9442290, 6624296, -30298964, -11913677, -4670981, -2057379,
+ 31521204, 9614054, -30000824, 12074674},
+ {4771191, -135239, 14290749, -13089852, 27992298, 14998318,
+ -1413936, -1556716, 29832613, -16391035},
+ },
+ {
+ {7064884, -7541174, -19161962, -5067537, -18891269, -2912736,
+ 25825242, 5293297, -27122660, 13101590},
+ {-2298563, 2439670, -7466610, 1719965, -27267541, -16328445,
+ 32512469, -5317593, -30356070, -4190957},
+ {-30006540, 10162316, -33180176, 3981723, -16482138, -13070044,
+ 14413974, 9515896, 19568978, 9628812},
+ },
+ {
+ {33053803, 199357, 15894591, 1583059, 27380243, -4580435, -17838894,
+ -6106839, -6291786, 3437740},
+ {-18978877, 3884493, 19469877, 12726490, 15913552, 13614290,
+ -22961733, 70104, 7463304, 4176122},
+ {-27124001, 10659917, 11482427, -16070381, 12771467, -6635117,
+ -32719404, -5322751, 24216882, 5944158},
+ },
+ {
+ {8894125, 7450974, -2664149, -9765752, -28080517, -12389115,
+ 19345746, 14680796, 11632993, 5847885},
+ {26942781, -2315317, 9129564, -4906607, 26024105, 11769399,
+ -11518837, 6367194, -9727230, 4782140},
+ {19916461, -4828410, -22910704, -11414391, 25606324, -5972441,
+ 33253853, 8220911, 6358847, -1873857},
+ },
+ {
+ {801428, -2081702, 16569428, 11065167, 29875704, 96627, 7908388,
+ -4480480, -13538503, 1387155},
+ {19646058, 5720633, -11416706, 12814209, 11607948, 12749789,
+ 14147075, 15156355, -21866831, 11835260},
+ {19299512, 1155910, 28703737, 14890794, 2925026, 7269399, 26121523,
+ 15467869, -26560550, 5052483},
+ },
+ },
+ {
+ {
+ {-3017432, 10058206, 1980837, 3964243, 22160966, 12322533, -6431123,
+ -12618185, 12228557, -7003677},
+ {32944382, 14922211, -22844894, 5188528, 21913450, -8719943,
+ 4001465, 13238564, -6114803, 8653815},
+ {22865569, -4652735, 27603668, -12545395, 14348958, 8234005,
+ 24808405, 5719875, 28483275, 2841751},
+ },
+ {
+ {-16420968, -1113305, -327719, -12107856, 21886282, -15552774,
+ -1887966, -315658, 19932058, -12739203},
+ {-11656086, 10087521, -8864888, -5536143, -19278573, -3055912,
+ 3999228, 13239134, -4777469, -13910208},
+ {1382174, -11694719, 17266790, 9194690, -13324356, 9720081,
+ 20403944, 11284705, -14013818, 3093230},
+ },
+ {
+ {16650921, -11037932, -1064178, 1570629, -8329746, 7352753, -302424,
+ 16271225, -24049421, -6691850},
+ {-21911077, -5927941, -4611316, -5560156, -31744103, -10785293,
+ 24123614, 15193618, -21652117, -16739389},
+ {-9935934, -4289447, -25279823, 4372842, 2087473, 10399484,
+ 31870908, 14690798, 17361620, 11864968},
+ },
+ {
+ {-11307610, 6210372, 13206574, 5806320, -29017692, -13967200,
+ -12331205, -7486601, -25578460, -16240689},
+ {14668462, -12270235, 26039039, 15305210, 25515617, 4542480,
+ 10453892, 6577524, 9145645, -6443880},
+ {5974874, 3053895, -9433049, -10385191, -31865124, 3225009,
+ -7972642, 3936128, -5652273, -3050304},
+ },
+ {
+ {30625386, -4729400, -25555961, -12792866, -20484575, 7695099,
+ 17097188, -16303496, -27999779, 1803632},
+ {-3553091, 9865099, -5228566, 4272701, -5673832, -16689700,
+ 14911344, 12196514, -21405489, 7047412},
+ {20093277, 9920966, -11138194, -5343857, 13161587, 12044805,
+ -32856851, 4124601, -32343828, -10257566},
+ },
+ {
+ {-20788824, 14084654, -13531713, 7842147, 19119038, -13822605,
+ 4752377, -8714640, -21679658, 2288038},
+ {-26819236, -3283715, 29965059, 3039786, -14473765, 2540457,
+ 29457502, 14625692, -24819617, 12570232},
+ {-1063558, -11551823, 16920318, 12494842, 1278292, -5869109,
+ -21159943, -3498680, -11974704, 4724943},
+ },
+ {
+ {17960970, -11775534, -4140968, -9702530, -8876562, -1410617,
+ -12907383, -8659932, -29576300, 1903856},
+ {23134274, -14279132, -10681997, -1611936, 20684485, 15770816,
+ -12989750, 3190296, 26955097, 14109738},
+ {15308788, 5320727, -30113809, -14318877, 22902008, 7767164,
+ 29425325, -11277562, 31960942, 11934971},
+ },
+ {
+ {-27395711, 8435796, 4109644, 12222639, -24627868, 14818669,
+ 20638173, 4875028, 10491392, 1379718},
+ {-13159415, 9197841, 3875503, -8936108, -1383712, -5879801,
+ 33518459, 16176658, 21432314, 12180697},
+ {-11787308, 11500838, 13787581, -13832590, -22430679, 10140205,
+ 1465425, 12689540, -10301319, -13872883},
+ },
+ },
+ {
+ {
+ {5414091, -15386041, -21007664, 9643570, 12834970, 1186149,
+ -2622916, -1342231, 26128231, 6032912},
+ {-26337395, -13766162, 32496025, -13653919, 17847801, -12669156,
+ 3604025, 8316894, -25875034, -10437358},
+ {3296484, 6223048, 24680646, -12246460, -23052020, 5903205,
+ -8862297, -4639164, 12376617, 3188849},
+ },
+ {
+ {29190488, -14659046, 27549113, -1183516, 3520066, -10697301,
+ 32049515, -7309113, -16109234, -9852307},
+ {-14744486, -9309156, 735818, -598978, -20407687, -5057904,
+ 25246078, -15795669, 18640741, -960977},
+ {-6928835, -16430795, 10361374, 5642961, 4910474, 12345252,
+ -31638386, -494430, 10530747, 1053335},
+ },
+ {
+ {-29265967, -14186805, -13538216, -12117373, -19457059, -10655384,
+ -31462369, -2948985, 24018831, 15026644},
+ {-22592535, -3145277, -2289276, 5953843, -13440189, 9425631,
+ 25310643, 13003497, -2314791, -15145616},
+ {-27419985, -603321, -8043984, -1669117, -26092265, 13987819,
+ -27297622, 187899, -23166419, -2531735},
+ },
+ {
+ {-21744398, -13810475, 1844840, 5021428, -10434399, -15911473,
+ 9716667, 16266922, -5070217, 726099},
+ {29370922, -6053998, 7334071, -15342259, 9385287, 2247707,
+ -13661962, -4839461, 30007388, -15823341},
+ {-936379, 16086691, 23751945, -543318, -1167538, -5189036, 9137109,
+ 730663, 9835848, 4555336},
+ },
+ {
+ {-23376435, 1410446, -22253753, -12899614, 30867635, 15826977,
+ 17693930, 544696, -11985298, 12422646},
+ {31117226, -12215734, -13502838, 6561947, -9876867, -12757670,
+ -5118685, -4096706, 29120153, 13924425},
+ {-17400879, -14233209, 19675799, -2734756, -11006962, -5858820,
+ -9383939, -11317700, 7240931, -237388},
+ },
+ {
+ {-31361739, -11346780, -15007447, -5856218, -22453340, -12152771,
+ 1222336, 4389483, 3293637, -15551743},
+ {-16684801, -14444245, 11038544, 11054958, -13801175, -3338533,
+ -24319580, 7733547, 12796905, -6335822},
+ {-8759414, -10817836, -25418864, 10783769, -30615557, -9746811,
+ -28253339, 3647836, 3222231, -11160462},
+ },
+ {
+ {18606113, 1693100, -25448386, -15170272, 4112353, 10045021,
+ 23603893, -2048234, -7550776, 2484985},
+ {9255317, -3131197, -12156162, -1004256, 13098013, -9214866,
+ 16377220, -2102812, -19802075, -3034702},
+ {-22729289, 7496160, -5742199, 11329249, 19991973, -3347502,
+ -31718148, 9936966, -30097688, -10618797},
+ },
+ {
+ {21878590, -5001297, 4338336, 13643897, -3036865, 13160960,
+ 19708896, 5415497, -7360503, -4109293},
+ {27736861, 10103576, 12500508, 8502413, -3413016, -9633558,
+ 10436918, -1550276, -23659143, -8132100},
+ {19492550, -12104365, -29681976, -852630, -3208171, 12403437,
+ 30066266, 8367329, 13243957, 8709688},
+ },
+ },
+ {
+ {
+ {12015105, 2801261, 28198131, 10151021, 24818120, -4743133,
+ -11194191, -5645734, 5150968, 7274186},
+ {2831366, -12492146, 1478975, 6122054, 23825128, -12733586,
+ 31097299, 6083058, 31021603, -9793610},
+ {-2529932, -2229646, 445613, 10720828, -13849527, -11505937,
+ -23507731, 16354465, 15067285, -14147707},
+ },
+ {
+ {7840942, 14037873, -33364863, 15934016, -728213, -3642706,
+ 21403988, 1057586, -19379462, -12403220},
+ {915865, -16469274, 15608285, -8789130, -24357026, 6060030,
+ -17371319, 8410997, -7220461, 16527025},
+ {32922597, -556987, 20336074, -16184568, 10903705, -5384487,
+ 16957574, 52992, 23834301, 6588044},
+ },
+ {
+ {32752030, 11232950, 3381995, -8714866, 22652988, -10744103,
+ 17159699, 16689107, -20314580, -1305992},
+ {-4689649, 9166776, -25710296, -10847306, 11576752, 12733943,
+ 7924251, -2752281, 1976123, -7249027},
+ {21251222, 16309901, -2983015, -6783122, 30810597, 12967303, 156041,
+ -3371252, 12331345, -8237197},
+ },
+ {
+ {8651614, -4477032, -16085636, -4996994, 13002507, 2950805,
+ 29054427, -5106970, 10008136, -4667901},
+ {31486080, 15114593, -14261250, 12951354, 14369431, -7387845,
+ 16347321, -13662089, 8684155, -10532952},
+ {19443825, 11385320, 24468943, -9659068, -23919258, 2187569,
+ -26263207, -6086921, 31316348, 14219878},
+ },
+ {
+ {-28594490, 1193785, 32245219, 11392485, 31092169, 15722801,
+ 27146014, 6992409, 29126555, 9207390},
+ {32382935, 1110093, 18477781, 11028262, -27411763, -7548111,
+ -4980517, 10843782, -7957600, -14435730},
+ {2814918, 7836403, 27519878, -7868156, -20894015, -11553689,
+ -21494559, 8550130, 28346258, 1994730},
+ },
+ {
+ {-19578299, 8085545, -14000519, -3948622, 2785838, -16231307,
+ -19516951, 7174894, 22628102, 8115180},
+ {-30405132, 955511, -11133838, -15078069, -32447087, -13278079,
+ -25651578, 3317160, -9943017, 930272},
+ {-15303681, -6833769, 28856490, 1357446, 23421993, 1057177,
+ 24091212, -1388970, -22765376, -10650715},
+ },
+ {
+ {-22751231, -5303997, -12907607, -12768866, -15811511, -7797053,
+ -14839018, -16554220, -1867018, 8398970},
+ {-31969310, 2106403, -4736360, 1362501, 12813763, 16200670,
+ 22981545, -6291273, 18009408, -15772772},
+ {-17220923, -9545221, -27784654, 14166835, 29815394, 7444469,
+ 29551787, -3727419, 19288549, 1325865},
+ },
+ {
+ {15100157, -15835752, -23923978, -1005098, -26450192, 15509408,
+ 12376730, -3479146, 33166107, -8042750},
+ {20909231, 13023121, -9209752, 16251778, -5778415, -8094914,
+ 12412151, 10018715, 2213263, -13878373},
+ {32529814, -11074689, 30361439, -16689753, -9135940, 1513226,
+ 22922121, 6382134, -5766928, 8371348},
+ },
+ },
+ {
+ {
+ {9923462, 11271500, 12616794, 3544722, -29998368, -1721626,
+ 12891687, -8193132, -26442943, 10486144},
+ {-22597207, -7012665, 8587003, -8257861, 4084309, -12970062, 361726,
+ 2610596, -23921530, -11455195},
+ {5408411, -1136691, -4969122, 10561668, 24145918, 14240566,
+ 31319731, -4235541, 19985175, -3436086},
+ },
+ {
+ {-13994457, 16616821, 14549246, 3341099, 32155958, 13648976,
+ -17577068, 8849297, 65030, 8370684},
+ {-8320926, -12049626, 31204563, 5839400, -20627288, -1057277,
+ -19442942, 6922164, 12743482, -9800518},
+ {-2361371, 12678785, 28815050, 4759974, -23893047, 4884717,
+ 23783145, 11038569, 18800704, 255233},
+ },
+ {
+ {-5269658, -1773886, 13957886, 7990715, 23132995, 728773, 13393847,
+ 9066957, 19258688, -14753793},
+ {-2936654, -10827535, -10432089, 14516793, -3640786, 4372541,
+ -31934921, 2209390, -1524053, 2055794},
+ {580882, 16705327, 5468415, -2683018, -30926419, -14696000,
+ -7203346, -8994389, -30021019, 7394435},
+ },
+ {
+ {23838809, 1822728, -15738443, 15242727, 8318092, -3733104,
+ -21672180, -3492205, -4821741, 14799921},
+ {13345610, 9759151, 3371034, -16137791, 16353039, 8577942, 31129804,
+ 13496856, -9056018, 7402518},
+ {2286874, -4435931, -20042458, -2008336, -13696227, 5038122,
+ 11006906, -15760352, 8205061, 1607563},
+ },
+ {
+ {14414086, -8002132, 3331830, -3208217, 22249151, -5594188,
+ 18364661, -2906958, 30019587, -9029278},
+ {-27688051, 1585953, -10775053, 931069, -29120221, -11002319,
+ -14410829, 12029093, 9944378, 8024},
+ {4368715, -3709630, 29874200, -15022983, -20230386, -11410704,
+ -16114594, -999085, -8142388, 5640030},
+ },
+ {
+ {10299610, 13746483, 11661824, 16234854, 7630238, 5998374, 9809887,
+ -16694564, 15219798, -14327783},
+ {27425505, -5719081, 3055006, 10660664, 23458024, 595578, -15398605,
+ -1173195, -18342183, 9742717},
+ {6744077, 2427284, 26042789, 2720740, -847906, 1118974, 32324614,
+ 7406442, 12420155, 1994844},
+ },
+ {
+ {14012521, -5024720, -18384453, -9578469, -26485342, -3936439,
+ -13033478, -10909803, 24319929, -6446333},
+ {16412690, -4507367, 10772641, 15929391, -17068788, -4658621,
+ 10555945, -10484049, -30102368, -4739048},
+ {22397382, -7767684, -9293161, -12792868, 17166287, -9755136,
+ -27333065, 6199366, 21880021, -12250760},
+ },
+ {
+ {-4283307, 5368523, -31117018, 8163389, -30323063, 3209128,
+ 16557151, 8890729, 8840445, 4957760},
+ {-15447727, 709327, -6919446, -10870178, -29777922, 6522332,
+ -21720181, 12130072, -14796503, 5005757},
+ {-2114751, -14308128, 23019042, 15765735, -25269683, 6002752,
+ 10183197, -13239326, -16395286, -2176112},
+ },
+ },
+ {
+ {
+ {-19025756, 1632005, 13466291, -7995100, -23640451, 16573537,
+ -32013908, -3057104, 22208662, 2000468},
+ {3065073, -1412761, -25598674, -361432, -17683065, -5703415,
+ -8164212, 11248527, -3691214, -7414184},
+ {10379208, -6045554, 8877319, 1473647, -29291284, -12507580,
+ 16690915, 2553332, -3132688, 16400289},
+ },
+ {
+ {15716668, 1254266, -18472690, 7446274, -8448918, 6344164,
+ -22097271, -7285580, 26894937, 9132066},
+ {24158887, 12938817, 11085297, -8177598, -28063478, -4457083,
+ -30576463, 64452, -6817084, -2692882},
+ {13488534, 7794716, 22236231, 5989356, 25426474, -12578208, 2350710,
+ -3418511, -4688006, 2364226},
+ },
+ {
+ {16335052, 9132434, 25640582, 6678888, 1725628, 8517937, -11807024,
+ -11697457, 15445875, -7798101},
+ {29004207, -7867081, 28661402, -640412, -12794003, -7943086,
+ 31863255, -4135540, -278050, -15759279},
+ {-6122061, -14866665, -28614905, 14569919, -10857999, -3591829,
+ 10343412, -6976290, -29828287, -10815811},
+ },
+ {
+ {27081650, 3463984, 14099042, -4517604, 1616303, -6205604, 29542636,
+ 15372179, 17293797, 960709},
+ {20263915, 11434237, -5765435, 11236810, 13505955, -10857102,
+ -16111345, 6493122, -19384511, 7639714},
+ {-2830798, -14839232, 25403038, -8215196, -8317012, -16173699,
+ 18006287, -16043750, 29994677, -15808121},
+ },
+ {
+ {9769828, 5202651, -24157398, -13631392, -28051003, -11561624,
+ -24613141, -13860782, -31184575, 709464},
+ {12286395, 13076066, -21775189, -1176622, -25003198, 4057652,
+ -32018128, -8890874, 16102007, 13205847},
+ {13733362, 5599946, 10557076, 3195751, -5557991, 8536970, -25540170,
+ 8525972, 10151379, 10394400},
+ },
+ {
+ {4024660, -16137551, 22436262, 12276534, -9099015, -2686099,
+ 19698229, 11743039, -33302334, 8934414},
+ {-15879800, -4525240, -8580747, -2934061, 14634845, -698278,
+ -9449077, 3137094, -11536886, 11721158},
+ {17555939, -5013938, 8268606, 2331751, -22738815, 9761013, 9319229,
+ 8835153, -9205489, -1280045},
+ },
+ {
+ {-461409, -7830014, 20614118, 16688288, -7514766, -4807119,
+ 22300304, 505429, 6108462, -6183415},
+ {-5070281, 12367917, -30663534, 3234473, 32617080, -8422642,
+ 29880583, -13483331, -26898490, -7867459},
+ {-31975283, 5726539, 26934134, 10237677, -3173717, -605053,
+ 24199304, 3795095, 7592688, -14992079},
+ },
+ {
+ {21594432, -14964228, 17466408, -4077222, 32537084, 2739898,
+ 6407723, 12018833, -28256052, 4298412},
+ {-20650503, -11961496, -27236275, 570498, 3767144, -1717540,
+ 13891942, -1569194, 13717174, 10805743},
+ {-14676630, -15644296, 15287174, 11927123, 24177847, -8175568,
+ -796431, 14860609, -26938930, -5863836},
+ },
+ },
+ {
+ {
+ {12962541, 5311799, -10060768, 11658280, 18855286, -7954201,
+ 13286263, -12808704, -4381056, 9882022},
+ {18512079, 11319350, -20123124, 15090309, 18818594, 5271736,
+ -22727904, 3666879, -23967430, -3299429},
+ {-6789020, -3146043, 16192429, 13241070, 15898607, -14206114,
+ -10084880, -6661110, -2403099, 5276065},
+ },
+ {
+ {30169808, -5317648, 26306206, -11750859, 27814964, 7069267,
+ 7152851, 3684982, 1449224, 13082861},
+ {10342826, 3098505, 2119311, 193222, 25702612, 12233820, 23697382,
+ 15056736, -21016438, -8202000},
+ {-33150110, 3261608, 22745853, 7948688, 19370557, -15177665,
+ -26171976, 6482814, -10300080, -11060101},
+ },
+ {
+ {32869458, -5408545, 25609743, 15678670, -10687769, -15471071,
+ 26112421, 2521008, -22664288, 6904815},
+ {29506923, 4457497, 3377935, -9796444, -30510046, 12935080, 1561737,
+ 3841096, -29003639, -6657642},
+ {10340844, -6630377, -18656632, -2278430, 12621151, -13339055,
+ 30878497, -11824370, -25584551, 5181966},
+ },
+ {
+ {25940115, -12658025, 17324188, -10307374, -8671468, 15029094,
+ 24396252, -16450922, -2322852, -12388574},
+ {-21765684, 9916823, -1300409, 4079498, -1028346, 11909559, 1782390,
+ 12641087, 20603771, -6561742},
+ {-18882287, -11673380, 24849422, 11501709, 13161720, -4768874,
+ 1925523, 11914390, 4662781, 7820689},
+ },
+ {
+ {12241050, -425982, 8132691, 9393934, 32846760, -1599620, 29749456,
+ 12172924, 16136752, 15264020},
+ {-10349955, -14680563, -8211979, 2330220, -17662549, -14545780,
+ 10658213, 6671822, 19012087, 3772772},
+ {3753511, -3421066, 10617074, 2028709, 14841030, -6721664, 28718732,
+ -15762884, 20527771, 12988982},
+ },
+ {
+ {-14822485, -5797269, -3707987, 12689773, -898983, -10914866,
+ -24183046, -10564943, 3299665, -12424953},
+ {-16777703, -15253301, -9642417, 4978983, 3308785, 8755439, 6943197,
+ 6461331, -25583147, 8991218},
+ {-17226263, 1816362, -1673288, -6086439, 31783888, -8175991,
+ -32948145, 7417950, -30242287, 1507265},
+ },
+ {
+ {29692663, 6829891, -10498800, 4334896, 20945975, -11906496,
+ -28887608, 8209391, 14606362, -10647073},
+ {-3481570, 8707081, 32188102, 5672294, 22096700, 1711240, -33020695,
+ 9761487, 4170404, -2085325},
+ {-11587470, 14855945, -4127778, -1531857, -26649089, 15084046,
+ 22186522, 16002000, -14276837, -8400798},
+ },
+ {
+ {-4811456, 13761029, -31703877, -2483919, -3312471, 7869047,
+ -7113572, -9620092, 13240845, 10965870},
+ {-7742563, -8256762, -14768334, -13656260, -23232383, 12387166,
+ 4498947, 14147411, 29514390, 4302863},
+ {-13413405, -12407859, 20757302, -13801832, 14785143, 8976368,
+ -5061276, -2144373, 17846988, -13971927},
+ },
+ },
+ {
+ {
+ {-2244452, -754728, -4597030, -1066309, -6247172, 1455299,
+ -21647728, -9214789, -5222701, 12650267},
+ {-9906797, -16070310, 21134160, 12198166, -27064575, 708126, 387813,
+ 13770293, -19134326, 10958663},
+ {22470984, 12369526, 23446014, -5441109, -21520802, -9698723,
+ -11772496, -11574455, -25083830, 4271862},
+ },
+ {
+ {-25169565, -10053642, -19909332, 15361595, -5984358, 2159192,
+ 75375, -4278529, -32526221, 8469673},
+ {15854970, 4148314, -8893890, 7259002, 11666551, 13824734,
+ -30531198, 2697372, 24154791, -9460943},
+ {15446137, -15806644, 29759747, 14019369, 30811221, -9610191,
+ -31582008, 12840104, 24913809, 9815020},
+ },
+ {
+ {-4709286, -5614269, -31841498, -12288893, -14443537, 10799414,
+ -9103676, 13438769, 18735128, 9466238},
+ {11933045, 9281483, 5081055, -5183824, -2628162, -4905629, -7727821,
+ -10896103, -22728655, 16199064},
+ {14576810, 379472, -26786533, -8317236, -29426508, -10812974,
+ -102766, 1876699, 30801119, 2164795},
+ },
+ {
+ {15995086, 3199873, 13672555, 13712240, -19378835, -4647646,
+ -13081610, -15496269, -13492807, 1268052},
+ {-10290614, -3659039, -3286592, 10948818, 23037027, 3794475,
+ -3470338, -12600221, -17055369, 3565904},
+ {29210088, -9419337, -5919792, -4952785, 10834811, -13327726,
+ -16512102, -10820713, -27162222, -14030531},
+ },
+ {
+ {-13161890, 15508588, 16663704, -8156150, -28349942, 9019123,
+ -29183421, -3769423, 2244111, -14001979},
+ {-5152875, -3800936, -9306475, -6071583, 16243069, 14684434,
+ -25673088, -16180800, 13491506, 4641841},
+ {10813417, 643330, -19188515, -728916, 30292062, -16600078,
+ 27548447, -7721242, 14476989, -12767431},
+ },
+ {
+ {10292079, 9984945, 6481436, 8279905, -7251514, 7032743, 27282937,
+ -1644259, -27912810, 12651324},
+ {-31185513, -813383, 22271204, 11835308, 10201545, 15351028,
+ 17099662, 3988035, 21721536, -3148940},
+ {10202177, -6545839, -31373232, -9574638, -32150642, -8119683,
+ -12906320, 3852694, 13216206, 14842320},
+ },
+ {
+ {-15815640, -10601066, -6538952, -7258995, -6984659, -6581778,
+ -31500847, 13765824, -27434397, 9900184},
+ {14465505, -13833331, -32133984, -14738873, -27443187, 12990492,
+ 33046193, 15796406, -7051866, -8040114},
+ {30924417, -8279620, 6359016, -12816335, 16508377, 9071735,
+ -25488601, 15413635, 9524356, -7018878},
+ },
+ {
+ {12274201, -13175547, 32627641, -1785326, 6736625, 13267305,
+ 5237659, -5109483, 15663516, 4035784},
+ {-2951309, 8903985, 17349946, 601635, -16432815, -4612556,
+ -13732739, -15889334, -22258478, 4659091},
+ {-16916263, -4952973, -30393711, -15158821, 20774812, 15897498,
+ 5736189, 15026997, -2178256, -13455585},
+ },
+ },
+ {
+ {
+ {-8858980, -2219056, 28571666, -10155518, -474467, -10105698,
+ -3801496, 278095, 23440562, -290208},
+ {10226241, -5928702, 15139956, 120818, -14867693, 5218603, 32937275,
+ 11551483, -16571960, -7442864},
+ {17932739, -12437276, -24039557, 10749060, 11316803, 7535897,
+ 22503767, 5561594, -3646624, 3898661},
+ },
+ {
+ {7749907, -969567, -16339731, -16464, -25018111, 15122143, -1573531,
+ 7152530, 21831162, 1245233},
+ {26958459, -14658026, 4314586, 8346991, -5677764, 11960072,
+ -32589295, -620035, -30402091, -16716212},
+ {-12165896, 9166947, 33491384, 13673479, 29787085, 13096535,
+ 6280834, 14587357, -22338025, 13987525},
+ },
+ {
+ {-24349909, 7778775, 21116000, 15572597, -4833266, -5357778,
+ -4300898, -5124639, -7469781, -2858068},
+ {9681908, -6737123, -31951644, 13591838, -6883821, 386950, 31622781,
+ 6439245, -14581012, 4091397},
+ {-8426427, 1470727, -28109679, -1596990, 3978627, -5123623,
+ -19622683, 12092163, 29077877, -14741988},
+ },
+ {
+ {5269168, -6859726, -13230211, -8020715, 25932563, 1763552,
+ -5606110, -5505881, -20017847, 2357889},
+ {32264008, -15407652, -5387735, -1160093, -2091322, -3946900,
+ 23104804, -12869908, 5727338, 189038},
+ {14609123, -8954470, -6000566, -16622781, -14577387, -7743898,
+ -26745169, 10942115, -25888931, -14884697},
+ },
+ {
+ {20513500, 5557931, -15604613, 7829531, 26413943, -2019404,
+ -21378968, 7471781, 13913677, -5137875},
+ {-25574376, 11967826, 29233242, 12948236, -6754465, 4713227,
+ -8940970, 14059180, 12878652, 8511905},
+ {-25656801, 3393631, -2955415, -7075526, -2250709, 9366908,
+ -30223418, 6812974, 5568676, -3127656},
+ },
+ {
+ {11630004, 12144454, 2116339, 13606037, 27378885, 15676917,
+ -17408753, -13504373, -14395196, 8070818},
+ {27117696, -10007378, -31282771, -5570088, 1127282, 12772488,
+ -29845906, 10483306, -11552749, -1028714},
+ {10637467, -5688064, 5674781, 1072708, -26343588, -6982302,
+ -1683975, 9177853, -27493162, 15431203},
+ },
+ {
+ {20525145, 10892566, -12742472, 12779443, -29493034, 16150075,
+ -28240519, 14943142, -15056790, -7935931},
+ {-30024462, 5626926, -551567, -9981087, 753598, 11981191, 25244767,
+ -3239766, -3356550, 9594024},
+ {-23752644, 2636870, -5163910, -10103818, 585134, 7877383, 11345683,
+ -6492290, 13352335, -10977084},
+ },
+ {
+ {-1931799, -5407458, 3304649, -12884869, 17015806, -4877091,
+ -29783850, -7752482, -13215537, -319204},
+ {20239939, 6607058, 6203985, 3483793, -18386976, -779229, -20723742,
+ 15077870, -22750759, 14523817},
+ {27406042, -6041657, 27423596, -4497394, 4996214, 10002360,
+ -28842031, -4545494, -30172742, -4805667},
+ },
+ },
+ {
+ {
+ {11374242, 12660715, 17861383, -12540833, 10935568, 1099227,
+ -13886076, -9091740, -27727044, 11358504},
+ {-12730809, 10311867, 1510375, 10778093, -2119455, -9145702,
+ 32676003, 11149336, -26123651, 4985768},
+ {-19096303, 341147, -6197485, -239033, 15756973, -8796662, -983043,
+ 13794114, -19414307, -15621255},
+ },
+ {
+ {6490081, 11940286, 25495923, -7726360, 8668373, -8751316, 3367603,
+ 6970005, -1691065, -9004790},
+ {1656497, 13457317, 15370807, 6364910, 13605745, 8362338, -19174622,
+ -5475723, -16796596, -5031438},
+ {-22273315, -13524424, -64685, -4334223, -18605636, -10921968,
+ -20571065, -7007978, -99853, -10237333},
+ },
+ {
+ {17747465, 10039260, 19368299, -4050591, -20630635, -16041286,
+ 31992683, -15857976, -29260363, -5511971},
+ {31932027, -4986141, -19612382, 16366580, 22023614, 88450, 11371999,
+ -3744247, 4882242, -10626905},
+ {29796507, 37186, 19818052, 10115756, -11829032, 3352736, 18551198,
+ 3272828, -5190932, -4162409},
+ },
+ {
+ {12501286, 4044383, -8612957, -13392385, -32430052, 5136599,
+ -19230378, -3529697, 330070, -3659409},
+ {6384877, 2899513, 17807477, 7663917, -2358888, 12363165, 25366522,
+ -8573892, -271295, 12071499},
+ {-8365515, -4042521, 25133448, -4517355, -6211027, 2265927,
+ -32769618, 1936675, -5159697, 3829363},
+ },
+ {
+ {28425966, -5835433, -577090, -4697198, -14217555, 6870930, 7921550,
+ -6567787, 26333140, 14267664},
+ {-11067219, 11871231, 27385719, -10559544, -4585914, -11189312,
+ 10004786, -8709488, -21761224, 8930324},
+ {-21197785, -16396035, 25654216, -1725397, 12282012, 11008919,
+ 1541940, 4757911, -26491501, -16408940},
+ },
+ {
+ {13537262, -7759490, -20604840, 10961927, -5922820, -13218065,
+ -13156584, 6217254, -15943699, 13814990},
+ {-17422573, 15157790, 18705543, 29619, 24409717, -260476, 27361681,
+ 9257833, -1956526, -1776914},
+ {-25045300, -10191966, 15366585, 15166509, -13105086, 8423556,
+ -29171540, 12361135, -18685978, 4578290},
+ },
+ {
+ {24579768, 3711570, 1342322, -11180126, -27005135, 14124956,
+ -22544529, 14074919, 21964432, 8235257},
+ {-6528613, -2411497, 9442966, -5925588, 12025640, -1487420,
+ -2981514, -1669206, 13006806, 2355433},
+ {-16304899, -13605259, -6632427, -5142349, 16974359, -10911083,
+ 27202044, 1719366, 1141648, -12796236},
+ },
+ {
+ {-12863944, -13219986, -8318266, -11018091, -6810145, -4843894,
+ 13475066, -3133972, 32674895, 13715045},
+ {11423335, -5468059, 32344216, 8962751, 24989809, 9241752,
+ -13265253, 16086212, -28740881, -15642093},
+ {-1409668, 12530728, -6368726, 10847387, 19531186, -14132160,
+ -11709148, 7791794, -27245943, 4383347},
+ },
+ },
+ {
+ {
+ {-28970898, 5271447, -1266009, -9736989, -12455236, 16732599,
+ -4862407, -4906449, 27193557, 6245191},
+ {-15193956, 5362278, -1783893, 2695834, 4960227, 12840725, 23061898,
+ 3260492, 22510453, 8577507},
+ {-12632451, 11257346, -32692994, 13548177, -721004, 10879011,
+ 31168030, 13952092, -29571492, -3635906},
+ },
+ {
+ {3877321, -9572739, 32416692, 5405324, -11004407, -13656635,
+ 3759769, 11935320, 5611860, 8164018},
+ {-16275802, 14667797, 15906460, 12155291, -22111149, -9039718,
+ 32003002, -8832289, 5773085, -8422109},
+ {-23788118, -8254300, 1950875, 8937633, 18686727, 16459170, -905725,
+ 12376320, 31632953, 190926},
+ },
+ {
+ {-24593607, -16138885, -8423991, 13378746, 14162407, 6901328,
+ -8288749, 4508564, -25341555, -3627528},
+ {8884438, -5884009, 6023974, 10104341, -6881569, -4941533, 18722941,
+ -14786005, -1672488, 827625},
+ {-32720583, -16289296, -32503547, 7101210, 13354605, 2659080,
+ -1800575, -14108036, -24878478, 1541286},
+ },
+ {
+ {2901347, -1117687, 3880376, -10059388, -17620940, -3612781,
+ -21802117, -3567481, 20456845, -1885033},
+ {27019610, 12299467, -13658288, -1603234, -12861660, -4861471,
+ -19540150, -5016058, 29439641, 15138866},
+ {21536104, -6626420, -32447818, -10690208, -22408077, 5175814,
+ -5420040, -16361163, 7779328, 109896},
+ },
+ {
+ {30279744, 14648750, -8044871, 6425558, 13639621, -743509, 28698390,
+ 12180118, 23177719, -554075},
+ {26572847, 3405927, -31701700, 12890905, -19265668, 5335866,
+ -6493768, 2378492, 4439158, -13279347},
+ {-22716706, 3489070, -9225266, -332753, 18875722, -1140095,
+ 14819434, -12731527, -17717757, -5461437},
+ },
+ {
+ {-5056483, 16566551, 15953661, 3767752, -10436499, 15627060,
+ -820954, 2177225, 8550082, -15114165},
+ {-18473302, 16596775, -381660, 15663611, 22860960, 15585581,
+ -27844109, -3582739, -23260460, -8428588},
+ {-32480551, 15707275, -8205912, -5652081, 29464558, 2713815,
+ -22725137, 15860482, -21902570, 1494193},
+ },
+ {
+ {-19562091, -14087393, -25583872, -9299552, 13127842, 759709,
+ 21923482, 16529112, 8742704, 12967017},
+ {-28464899, 1553205, 32536856, -10473729, -24691605, -406174,
+ -8914625, -2933896, -29903758, 15553883},
+ {21877909, 3230008, 9881174, 10539357, -4797115, 2841332, 11543572,
+ 14513274, 19375923, -12647961},
+ },
+ {
+ {8832269, -14495485, 13253511, 5137575, 5037871, 4078777, 24880818,
+ -6222716, 2862653, 9455043},
+ {29306751, 5123106, 20245049, -14149889, 9592566, 8447059, -2077124,
+ -2990080, 15511449, 4789663},
+ {-20679756, 7004547, 8824831, -9434977, -4045704, -3750736,
+ -5754762, 108893, 23513200, 16652362},
+ },
+ },
+ {
+ {
+ {-33256173, 4144782, -4476029, -6579123, 10770039, -7155542,
+ -6650416, -12936300, -18319198, 10212860},
+ {2756081, 8598110, 7383731, -6859892, 22312759, -1105012, 21179801,
+ 2600940, -9988298, -12506466},
+ {-24645692, 13317462, -30449259, -15653928, 21365574, -10869657,
+ 11344424, 864440, -2499677, -16710063},
+ },
+ {
+ {-26432803, 6148329, -17184412, -14474154, 18782929, -275997,
+ -22561534, 211300, 2719757, 4940997},
+ {-1323882, 3911313, -6948744, 14759765, -30027150, 7851207,
+ 21690126, 8518463, 26699843, 5276295},
+ {-13149873, -6429067, 9396249, 365013, 24703301, -10488939, 1321586,
+ 149635, -15452774, 7159369},
+ },
+ {
+ {9987780, -3404759, 17507962, 9505530, 9731535, -2165514, 22356009,
+ 8312176, 22477218, -8403385},
+ {18155857, -16504990, 19744716, 9006923, 15154154, -10538976,
+ 24256460, -4864995, -22548173, 9334109},
+ {2986088, -4911893, 10776628, -3473844, 10620590, -7083203,
+ -21413845, 14253545, -22587149, 536906},
+ },
+ {
+ {4377756, 8115836, 24567078, 15495314, 11625074, 13064599, 7390551,
+ 10589625, 10838060, -15420424},
+ {-19342404, 867880, 9277171, -3218459, -14431572, -1986443,
+ 19295826, -15796950, 6378260, 699185},
+ {7895026, 4057113, -7081772, -13077756, -17886831, -323126, -716039,
+ 15693155, -5045064, -13373962},
+ },
+ {
+ {-7737563, -5869402, -14566319, -7406919, 11385654, 13201616,
+ 31730678, -10962840, -3918636, -9669325},
+ {10188286, -15770834, -7336361, 13427543, 22223443, 14896287,
+ 30743455, 7116568, -21786507, 5427593},
+ {696102, 13206899, 27047647, -10632082, 15285305, -9853179,
+ 10798490, -4578720, 19236243, 12477404},
+ },
+ {
+ {-11229439, 11243796, -17054270, -8040865, -788228, -8167967,
+ -3897669, 11180504, -23169516, 7733644},
+ {17800790, -14036179, -27000429, -11766671, 23887827, 3149671,
+ 23466177, -10538171, 10322027, 15313801},
+ {26246234, 11968874, 32263343, -5468728, 6830755, -13323031,
+ -15794704, -101982, -24449242, 10890804},
+ },
+ {
+ {-31365647, 10271363, -12660625, -6267268, 16690207, -13062544,
+ -14982212, 16484931, 25180797, -5334884},
+ {-586574, 10376444, -32586414, -11286356, 19801893, 10997610,
+ 2276632, 9482883, 316878, 13820577},
+ {-9882808, -4510367, -2115506, 16457136, -11100081, 11674996,
+ 30756178, -7515054, 30696930, -3712849},
+ },
+ {
+ {32988917, -9603412, 12499366, 7910787, -10617257, -11931514,
+ -7342816, -9985397, -32349517, 7392473},
+ {-8855661, 15927861, 9866406, -3649411, -2396914, -16655781,
+ -30409476, -9134995, 25112947, -2926644},
+ {-2504044, -436966, 25621774, -5678772, 15085042, -5479877,
+ -24884878, -13526194, 5537438, -13914319},
+ },
+ },
+ {
+ {
+ {-11225584, 2320285, -9584280, 10149187, -33444663, 5808648,
+ -14876251, -1729667, 31234590, 6090599},
+ {-9633316, 116426, 26083934, 2897444, -6364437, -2688086, 609721,
+ 15878753, -6970405, -9034768},
+ {-27757857, 247744, -15194774, -9002551, 23288161, -10011936,
+ -23869595, 6503646, 20650474, 1804084},
+ },
+ {
+ {-27589786, 15456424, 8972517, 8469608, 15640622, 4439847, 3121995,
+ -10329713, 27842616, -202328},
+ {-15306973, 2839644, 22530074, 10026331, 4602058, 5048462, 28248656,
+ 5031932, -11375082, 12714369},
+ {20807691, -7270825, 29286141, 11421711, -27876523, -13868230,
+ -21227475, 1035546, -19733229, 12796920},
+ },
+ {
+ {12076899, -14301286, -8785001, -11848922, -25012791, 16400684,
+ -17591495, -12899438, 3480665, -15182815},
+ {-32361549, 5457597, 28548107, 7833186, 7303070, -11953545,
+ -24363064, -15921875, -33374054, 2771025},
+ {-21389266, 421932, 26597266, 6860826, 22486084, -6737172,
+ -17137485, -4210226, -24552282, 15673397},
+ },
+ {
+ {-20184622, 2338216, 19788685, -9620956, -4001265, -8740893,
+ -20271184, 4733254, 3727144, -12934448},
+ {6120119, 814863, -11794402, -622716, 6812205, -15747771, 2019594,
+ 7975683, 31123697, -10958981},
+ {30069250, -11435332, 30434654, 2958439, 18399564, -976289,
+ 12296869, 9204260, -16432438, 9648165},
+ },
+ {
+ {32705432, -1550977, 30705658, 7451065, -11805606, 9631813, 3305266,
+ 5248604, -26008332, -11377501},
+ {17219865, 2375039, -31570947, -5575615, -19459679, 9219903, 294711,
+ 15298639, 2662509, -16297073},
+ {-1172927, -7558695, -4366770, -4287744, -21346413, -8434326,
+ 32087529, -1222777, 32247248, -14389861},
+ },
+ {
+ {14312628, 1221556, 17395390, -8700143, -4945741, -8684635,
+ -28197744, -9637817, -16027623, -13378845},
+ {-1428825, -9678990, -9235681, 6549687, -7383069, -468664, 23046502,
+ 9803137, 17597934, 2346211},
+ {18510800, 15337574, 26171504, 981392, -22241552, 7827556,
+ -23491134, -11323352, 3059833, -11782870},
+ },
+ {
+ {10141598, 6082907, 17829293, -1947643, 9830092, 13613136,
+ -25556636, -5544586, -33502212, 3592096},
+ {33114168, -15889352, -26525686, -13343397, 33076705, 8716171,
+ 1151462, 1521897, -982665, -6837803},
+ {-32939165, -4255815, 23947181, -324178, -33072974, -12305637,
+ -16637686, 3891704, 26353178, 693168},
+ },
+ {
+ {30374239, 1595580, -16884039, 13186931, 4600344, 406904, 9585294,
+ -400668, 31375464, 14369965},
+ {-14370654, -7772529, 1510301, 6434173, -18784789, -6262728,
+ 32732230, -13108839, 17901441, 16011505},
+ {18171223, -11934626, -12500402, 15197122, -11038147, -15230035,
+ -19172240, -16046376, 8764035, 12309598},
+ },
+ },
+ {
+ {
+ {5975908, -5243188, -19459362, -9681747, -11541277, 14015782,
+ -23665757, 1228319, 17544096, -10593782},
+ {5811932, -1715293, 3442887, -2269310, -18367348, -8359541,
+ -18044043, -15410127, -5565381, 12348900},
+ {-31399660, 11407555, 25755363, 6891399, -3256938, 14872274,
+ -24849353, 8141295, -10632534, -585479},
+ },
+ {
+ {-12675304, 694026, -5076145, 13300344, 14015258, -14451394,
+ -9698672, -11329050, 30944593, 1130208},
+ {8247766, -6710942, -26562381, -7709309, -14401939, -14648910,
+ 4652152, 2488540, 23550156, -271232},
+ {17294316, -3788438, 7026748, 15626851, 22990044, 113481, 2267737,
+ -5908146, -408818, -137719},
+ },
+ {
+ {16091085, -16253926, 18599252, 7340678, 2137637, -1221657,
+ -3364161, 14550936, 3260525, -7166271},
+ {-4910104, -13332887, 18550887, 10864893, -16459325, -7291596,
+ -23028869, -13204905, -12748722, 2701326},
+ {-8574695, 16099415, 4629974, -16340524, -20786213, -6005432,
+ -10018363, 9276971, 11329923, 1862132},
+ },
+ {
+ {14763076, -15903608, -30918270, 3689867, 3511892, 10313526,
+ -21951088, 12219231, -9037963, -940300},
+ {8894987, -3446094, 6150753, 3013931, 301220, 15693451, -31981216,
+ -2909717, -15438168, 11595570},
+ {15214962, 3537601, -26238722, -14058872, 4418657, -15230761,
+ 13947276, 10730794, -13489462, -4363670},
+ },
+ {
+ {-2538306, 7682793, 32759013, 263109, -29984731, -7955452,
+ -22332124, -10188635, 977108, 699994},
+ {-12466472, 4195084, -9211532, 550904, -15565337, 12917920,
+ 19118110, -439841, -30534533, -14337913},
+ {31788461, -14507657, 4799989, 7372237, 8808585, -14747943, 9408237,
+ -10051775, 12493932, -5409317},
+ },
+ {
+ {-25680606, 5260744, -19235809, -6284470, -3695942, 16566087,
+ 27218280, 2607121, 29375955, 6024730},
+ {842132, -2794693, -4763381, -8722815, 26332018, -12405641,
+ 11831880, 6985184, -9940361, 2854096},
+ {-4847262, -7969331, 2516242, -5847713, 9695691, -7221186, 16512645,
+ 960770, 12121869, 16648078},
+ },
+ {
+ {-15218652, 14667096, -13336229, 2013717, 30598287, -464137,
+ -31504922, -7882064, 20237806, 2838411},
+ {-19288047, 4453152, 15298546, -16178388, 22115043, -15972604,
+ 12544294, -13470457, 1068881, -12499905},
+ {-9558883, -16518835, 33238498, 13506958, 30505848, -1114596,
+ -8486907, -2630053, 12521378, 4845654},
+ },
+ {
+ {-28198521, 10744108, -2958380, 10199664, 7759311, -13088600,
+ 3409348, -873400, -6482306, -12885870},
+ {-23561822, 6230156, -20382013, 10655314, -24040585, -11621172,
+ 10477734, -1240216, -3113227, 13974498},
+ {12966261, 15550616, -32038948, -1615346, 21025980, -629444,
+ 5642325, 7188737, 18895762, 12629579},
+ },
+ },
+ {
+ {
+ {14741879, -14946887, 22177208, -11721237, 1279741, 8058600,
+ 11758140, 789443, 32195181, 3895677},
+ {10758205, 15755439, -4509950, 9243698, -4879422, 6879879, -2204575,
+ -3566119, -8982069, 4429647},
+ {-2453894, 15725973, -20436342, -10410672, -5803908, -11040220,
+ -7135870, -11642895, 18047436, -15281743},
+ },
+ {
+ {-25173001, -11307165, 29759956, 11776784, -22262383, -15820455,
+ 10993114, -12850837, -17620701, -9408468},
+ {21987233, 700364, -24505048, 14972008, -7774265, -5718395,
+ 32155026, 2581431, -29958985, 8773375},
+ {-25568350, 454463, -13211935, 16126715, 25240068, 8594567,
+ 20656846, 12017935, -7874389, -13920155},
+ },
+ {
+ {6028182, 6263078, -31011806, -11301710, -818919, 2461772,
+ -31841174, -5468042, -1721788, -2776725},
+ {-12278994, 16624277, 987579, -5922598, 32908203, 1248608, 7719845,
+ -4166698, 28408820, 6816612},
+ {-10358094, -8237829, 19549651, -12169222, 22082623, 16147817,
+ 20613181, 13982702, -10339570, 5067943},
+ },
+ {
+ {-30505967, -3821767, 12074681, 13582412, -19877972, 2443951,
+ -19719286, 12746132, 5331210, -10105944},
+ {30528811, 3601899, -1957090, 4619785, -27361822, -15436388,
+ 24180793, -12570394, 27679908, -1648928},
+ {9402404, -13957065, 32834043, 10838634, -26580150, -13237195,
+ 26653274, -8685565, 22611444, -12715406},
+ },
+ {
+ {22190590, 1118029, 22736441, 15130463, -30460692, -5991321,
+ 19189625, -4648942, 4854859, 6622139},
+ {-8310738, -2953450, -8262579, -3388049, -10401731, -271929,
+ 13424426, -3567227, 26404409, 13001963},
+ {-31241838, -15415700, -2994250, 8939346, 11562230, -12840670,
+ -26064365, -11621720, -15405155, 11020693},
+ },
+ {
+ {1866042, -7949489, -7898649, -10301010, 12483315, 13477547,
+ 3175636, -12424163, 28761762, 1406734},
+ {-448555, -1777666, 13018551, 3194501, -9580420, -11161737,
+ 24760585, -4347088, 25577411, -13378680},
+ {-24290378, 4759345, -690653, -1852816, 2066747, 10693769,
+ -29595790, 9884936, -9368926, 4745410},
+ },
+ {
+ {-9141284, 6049714, -19531061, -4341411, -31260798, 9944276,
+ -15462008, -11311852, 10931924, -11931931},
+ {-16561513, 14112680, -8012645, 4817318, -8040464, -11414606,
+ -22853429, 10856641, -20470770, 13434654},
+ {22759489, -10073434, -16766264, -1871422, 13637442, -10168091,
+ 1765144, -12654326, 28445307, -5364710},
+ },
+ {
+ {29875063, 12493613, 2795536, -3786330, 1710620, 15181182,
+ -10195717, -8788675, 9074234, 1167180},
+ {-26205683, 11014233, -9842651, -2635485, -26908120, 7532294,
+ -18716888, -9535498, 3843903, 9367684},
+ {-10969595, -6403711, 9591134, 9582310, 11349256, 108879, 16235123,
+ 8601684, -139197, 4242895},
+ },
+ },
+ {
+ {
+ {22092954, -13191123, -2042793, -11968512, 32186753, -11517388,
+ -6574341, 2470660, -27417366, 16625501},
+ {-11057722, 3042016, 13770083, -9257922, 584236, -544855, -7770857,
+ 2602725, -27351616, 14247413},
+ {6314175, -10264892, -32772502, 15957557, -10157730, 168750,
+ -8618807, 14290061, 27108877, -1180880},
+ },
+ {
+ {-8586597, -7170966, 13241782, 10960156, -32991015, -13794596,
+ 33547976, -11058889, -27148451, 981874},
+ {22833440, 9293594, -32649448, -13618667, -9136966, 14756819,
+ -22928859, -13970780, -10479804, -16197962},
+ {-7768587, 3326786, -28111797, 10783824, 19178761, 14905060,
+ 22680049, 13906969, -15933690, 3797899},
+ },
+ {
+ {21721356, -4212746, -12206123, 9310182, -3882239, -13653110,
+ 23740224, -2709232, 20491983, -8042152},
+ {9209270, -15135055, -13256557, -6167798, -731016, 15289673,
+ 25947805, 15286587, 30997318, -6703063},
+ {7392032, 16618386, 23946583, -8039892, -13265164, -1533858,
+ -14197445, -2321576, 17649998, -250080},
+ },
+ {
+ {-9301088, -14193827, 30609526, -3049543, -25175069, -1283752,
+ -15241566, -9525724, -2233253, 7662146},
+ {-17558673, 1763594, -33114336, 15908610, -30040870, -12174295,
+ 7335080, -8472199, -3174674, 3440183},
+ {-19889700, -5977008, -24111293, -9688870, 10799743, -16571957,
+ 40450, -4431835, 4862400, 1133},
+ },
+ {
+ {-32856209, -7873957, -5422389, 14860950, -16319031, 7956142,
+ 7258061, 311861, -30594991, -7379421},
+ {-3773428, -1565936, 28985340, 7499440, 24445838, 9325937, 29727763,
+ 16527196, 18278453, 15405622},
+ {-4381906, 8508652, -19898366, -3674424, -5984453, 15149970,
+ -13313598, 843523, -21875062, 13626197},
+ },
+ {
+ {2281448, -13487055, -10915418, -2609910, 1879358, 16164207,
+ -10783882, 3953792, 13340839, 15928663},
+ {31727126, -7179855, -18437503, -8283652, 2875793, -16390330,
+ -25269894, -7014826, -23452306, 5964753},
+ {4100420, -5959452, -17179337, 6017714, -18705837, 12227141,
+ -26684835, 11344144, 2538215, -7570755},
+ },
+ {
+ {-9433605, 6123113, 11159803, -2156608, 30016280, 14966241,
+ -20474983, 1485421, -629256, -15958862},
+ {-26804558, 4260919, 11851389, 9658551, -32017107, 16367492,
+ -20205425, -13191288, 11659922, -11115118},
+ {26180396, 10015009, -30844224, -8581293, 5418197, 9480663, 2231568,
+ -10170080, 33100372, -1306171},
+ },
+ {
+ {15121113, -5201871, -10389905, 15427821, -27509937, -15992507,
+ 21670947, 4486675, -5931810, -14466380},
+ {16166486, -9483733, -11104130, 6023908, -31926798, -1364923,
+ 2340060, -16254968, -10735770, -10039824},
+ {28042865, -3557089, -12126526, 12259706, -3717498, -6945899,
+ 6766453, -8689599, 18036436, 5803270},
+ },
+ },
+ {
+ {
+ {-817581, 6763912, 11803561, 1585585, 10958447, -2671165, 23855391,
+ 4598332, -6159431, -14117438},
+ {-31031306, -14256194, 17332029, -2383520, 31312682, -5967183,
+ 696309, 50292, -20095739, 11763584},
+ {-594563, -2514283, -32234153, 12643980, 12650761, 14811489, 665117,
+ -12613632, -19773211, -10713562},
+ },
+ {
+ {30464590, -11262872, -4127476, -12734478, 19835327, -7105613,
+ -24396175, 2075773, -17020157, 992471},
+ {18357185, -6994433, 7766382, 16342475, -29324918, 411174, 14578841,
+ 8080033, -11574335, -10601610},
+ {19598397, 10334610, 12555054, 2555664, 18821899, -10339780,
+ 21873263, 16014234, 26224780, 16452269},
+ },
+ {
+ {-30223925, 5145196, 5944548, 16385966, 3976735, 2009897, -11377804,
+ -7618186, -20533829, 3698650},
+ {14187449, 3448569, -10636236, -10810935, -22663880, -3433596,
+ 7268410, -10890444, 27394301, 12015369},
+ {19695761, 16087646, 28032085, 12999827, 6817792, 11427614,
+ 20244189, -1312777, -13259127, -3402461},
+ },
+ {
+ {30860103, 12735208, -1888245, -4699734, -16974906, 2256940,
+ -8166013, 12298312, -8550524, -10393462},
+ {-5719826, -11245325, -1910649, 15569035, 26642876, -7587760,
+ -5789354, -15118654, -4976164, 12651793},
+ {-2848395, 9953421, 11531313, -5282879, 26895123, -12697089,
+ -13118820, -16517902, 9768698, -2533218},
+ },
+ {
+ {-24719459, 1894651, -287698, -4704085, 15348719, -8156530,
+ 32767513, 12765450, 4940095, 10678226},
+ {18860224, 15980149, -18987240, -1562570, -26233012, -11071856,
+ -7843882, 13944024, -24372348, 16582019},
+ {-15504260, 4970268, -29893044, 4175593, -20993212, -2199756,
+ -11704054, 15444560, -11003761, 7989037},
+ },
+ {
+ {31490452, 5568061, -2412803, 2182383, -32336847, 4531686,
+ -32078269, 6200206, -19686113, -14800171},
+ {-17308668, -15879940, -31522777, -2831, -32887382, 16375549,
+ 8680158, -16371713, 28550068, -6857132},
+ {-28126887, -5688091, 16837845, -1820458, -6850681, 12700016,
+ -30039981, 4364038, 1155602, 5988841},
+ },
+ {
+ {21890435, -13272907, -12624011, 12154349, -7831873, 15300496,
+ 23148983, -4470481, 24618407, 8283181},
+ {-33136107, -10512751, 9975416, 6841041, -31559793, 16356536,
+ 3070187, -7025928, 1466169, 10740210},
+ {-1509399, -15488185, -13503385, -10655916, 32799044, 909394,
+ -13938903, -5779719, -32164649, -15327040},
+ },
+ {
+ {3960823, -14267803, -28026090, -15918051, -19404858, 13146868,
+ 15567327, 951507, -3260321, -573935},
+ {24740841, 5052253, -30094131, 8961361, 25877428, 6165135,
+ -24368180, 14397372, -7380369, -6144105},
+ {-28888365, 3510803, -28103278, -1158478, -11238128, -10631454,
+ -15441463, -14453128, -1625486, -6494814},
+ },
+ },
+ {
+ {
+ {793299, -9230478, 8836302, -6235707, -27360908, -2369593, 33152843,
+ -4885251, -9906200, -621852},
+ {5666233, 525582, 20782575, -8038419, -24538499, 14657740, 16099374,
+ 1468826, -6171428, -15186581},
+ {-4859255, -3779343, -2917758, -6748019, 7778750, 11688288,
+ -30404353, -9871238, -1558923, -9863646},
+ },
+ {
+ {10896332, -7719704, 824275, 472601, -19460308, 3009587, 25248958,
+ 14783338, -30581476, -15757844},
+ {10566929, 12612572, -31944212, 11118703, -12633376, 12362879,
+ 21752402, 8822496, 24003793, 14264025},
+ {27713862, -7355973, -11008240, 9227530, 27050101, 2504721,
+ 23886875, -13117525, 13958495, -5732453},
+ },
+ {
+ {-23481610, 4867226, -27247128, 3900521, 29838369, -8212291,
+ -31889399, -10041781, 7340521, -15410068},
+ {4646514, -8011124, -22766023, -11532654, 23184553, 8566613,
+ 31366726, -1381061, -15066784, -10375192},
+ {-17270517, 12723032, -16993061, 14878794, 21619651, -6197576,
+ 27584817, 3093888, -8843694, 3849921},
+ },
+ {
+ {-9064912, 2103172, 25561640, -15125738, -5239824, 9582958,
+ 32477045, -9017955, 5002294, -15550259},
+ {-12057553, -11177906, 21115585, -13365155, 8808712, -12030708,
+ 16489530, 13378448, -25845716, 12741426},
+ {-5946367, 10645103, -30911586, 15390284, -3286982, -7118677,
+ 24306472, 15852464, 28834118, -7646072},
+ },
+ {
+ {-17335748, -9107057, -24531279, 9434953, -8472084, -583362,
+ -13090771, 455841, 20461858, 5491305},
+ {13669248, -16095482, -12481974, -10203039, -14569770, -11893198,
+ -24995986, 11293807, -28588204, -9421832},
+ {28497928, 6272777, -33022994, 14470570, 8906179, -1225630,
+ 18504674, -14165166, 29867745, -8795943},
+ },
+ {
+ {-16207023, 13517196, -27799630, -13697798, 24009064, -6373891,
+ -6367600, -13175392, 22853429, -4012011},
+ {24191378, 16712145, -13931797, 15217831, 14542237, 1646131,
+ 18603514, -11037887, 12876623, -2112447},
+ {17902668, 4518229, -411702, -2829247, 26878217, 5258055, -12860753,
+ 608397, 16031844, 3723494},
+ },
+ {
+ {-28632773, 12763728, -20446446, 7577504, 33001348, -13017745,
+ 17558842, -7872890, 23896954, -4314245},
+ {-20005381, -12011952, 31520464, 605201, 2543521, 5991821, -2945064,
+ 7229064, -9919646, -8826859},
+ {28816045, 298879, -28165016, -15920938, 19000928, -1665890,
+ -12680833, -2949325, -18051778, -2082915},
+ },
+ {
+ {16000882, -344896, 3493092, -11447198, -29504595, -13159789,
+ 12577740, 16041268, -19715240, 7847707},
+ {10151868, 10572098, 27312476, 7922682, 14825339, 4723128,
+ -32855931, -6519018, -10020567, 3852848},
+ {-11430470, 15697596, -21121557, -4420647, 5386314, 15063598,
+ 16514493, -15932110, 29330899, -15076224},
+ },
+ },
+ {
+ {
+ {-25499735, -4378794, -15222908, -6901211, 16615731, 2051784,
+ 3303702, 15490, -27548796, 12314391},
+ {15683520, -6003043, 18109120, -9980648, 15337968, -5997823,
+ -16717435, 15921866, 16103996, -3731215},
+ {-23169824, -10781249, 13588192, -1628807, -3798557, -1074929,
+ -19273607, 5402699, -29815713, -9841101},
+ },
+ {
+ {23190676, 2384583, -32714340, 3462154, -29903655, -1529132,
+ -11266856, 8911517, -25205859, 2739713},
+ {21374101, -3554250, -33524649, 9874411, 15377179, 11831242,
+ -33529904, 6134907, 4931255, 11987849},
+ {-7732, -2978858, -16223486, 7277597, 105524, -322051, -31480539,
+ 13861388, -30076310, 10117930},
+ },
+ {
+ {-29501170, -10744872, -26163768, 13051539, -25625564, 5089643,
+ -6325503, 6704079, 12890019, 15728940},
+ {-21972360, -11771379, -951059, -4418840, 14704840, 2695116, 903376,
+ -10428139, 12885167, 8311031},
+ {-17516482, 5352194, 10384213, -13811658, 7506451, 13453191,
+ 26423267, 4384730, 1888765, -5435404},
+ },
+ {
+ {-25817338, -3107312, -13494599, -3182506, 30896459, -13921729,
+ -32251644, -12707869, -19464434, -3340243},
+ {-23607977, -2665774, -526091, 4651136, 5765089, 4618330, 6092245,
+ 14845197, 17151279, -9854116},
+ {-24830458, -12733720, -15165978, 10367250, -29530908, -265356,
+ 22825805, -7087279, -16866484, 16176525},
+ },
+ {
+ {-23583256, 6564961, 20063689, 3798228, -4740178, 7359225, 2006182,
+ -10363426, -28746253, -10197509},
+ {-10626600, -4486402, -13320562, -5125317, 3432136, -6393229,
+ 23632037, -1940610, 32808310, 1099883},
+ {15030977, 5768825, -27451236, -2887299, -6427378, -15361371,
+ -15277896, -6809350, 2051441, -15225865},
+ },
+ {
+ {-3362323, -7239372, 7517890, 9824992, 23555850, 295369, 5148398,
+ -14154188, -22686354, 16633660},
+ {4577086, -16752288, 13249841, -15304328, 19958763, -14537274,
+ 18559670, -10759549, 8402478, -9864273},
+ {-28406330, -1051581, -26790155, -907698, -17212414, -11030789,
+ 9453451, -14980072, 17983010, 9967138},
+ },
+ {
+ {-25762494, 6524722, 26585488, 9969270, 24709298, 1220360, -1677990,
+ 7806337, 17507396, 3651560},
+ {-10420457, -4118111, 14584639, 15971087, -15768321, 8861010,
+ 26556809, -5574557, -18553322, -11357135},
+ {2839101, 14284142, 4029895, 3472686, 14402957, 12689363, -26642121,
+ 8459447, -5605463, -7621941},
+ },
+ {
+ {-4839289, -3535444, 9744961, 2871048, 25113978, 3187018, -25110813,
+ -849066, 17258084, -7977739},
+ {18164541, -10595176, -17154882, -1542417, 19237078, -9745295,
+ 23357533, -15217008, 26908270, 12150756},
+ {-30264870, -7647865, 5112249, -7036672, -1499807, -6974257, 43168,
+ -5537701, -32302074, 16215819},
+ },
+ },
+ {
+ {
+ {-6898905, 9824394, -12304779, -4401089, -31397141, -6276835,
+ 32574489, 12532905, -7503072, -8675347},
+ {-27343522, -16515468, -27151524, -10722951, 946346, 16291093,
+ 254968, 7168080, 21676107, -1943028},
+ {21260961, -8424752, -16831886, -11920822, -23677961, 3968121,
+ -3651949, -6215466, -3556191, -7913075},
+ },
+ {
+ {16544754, 13250366, -16804428, 15546242, -4583003, 12757258,
+ -2462308, -8680336, -18907032, -9662799},
+ {-2415239, -15577728, 18312303, 4964443, -15272530, -12653564,
+ 26820651, 16690659, 25459437, -4564609},
+ {-25144690, 11425020, 28423002, -11020557, -6144921, -15826224,
+ 9142795, -2391602, -6432418, -1644817},
+ },
+ {
+ {-23104652, 6253476, 16964147, -3768872, -25113972, -12296437,
+ -27457225, -16344658, 6335692, 7249989},
+ {-30333227, 13979675, 7503222, -12368314, -11956721, -4621693,
+ -30272269, 2682242, 25993170, -12478523},
+ {4364628, 5930691, 32304656, -10044554, -8054781, 15091131,
+ 22857016, -10598955, 31820368, 15075278},
+ },
+ {
+ {31879134, -8918693, 17258761, 90626, -8041836, -4917709, 24162788,
+ -9650886, -17970238, 12833045},
+ {19073683, 14851414, -24403169, -11860168, 7625278, 11091125,
+ -19619190, 2074449, -9413939, 14905377},
+ {24483667, -11935567, -2518866, -11547418, -1553130, 15355506,
+ -25282080, 9253129, 27628530, -7555480},
+ },
+ {
+ {17597607, 8340603, 19355617, 552187, 26198470, -3176583, 4593324,
+ -9157582, -14110875, 15297016},
+ {510886, 14337390, -31785257, 16638632, 6328095, 2713355, -20217417,
+ -11864220, 8683221, 2921426},
+ {18606791, 11874196, 27155355, -5281482, -24031742, 6265446,
+ -25178240, -1278924, 4674690, 13890525},
+ },
+ {
+ {13609624, 13069022, -27372361, -13055908, 24360586, 9592974,
+ 14977157, 9835105, 4389687, 288396},
+ {9922506, -519394, 13613107, 5883594, -18758345, -434263, -12304062,
+ 8317628, 23388070, 16052080},
+ {12720016, 11937594, -31970060, -5028689, 26900120, 8561328,
+ -20155687, -11632979, -14754271, -10812892},
+ },
+ {
+ {15961858, 14150409, 26716931, -665832, -22794328, 13603569,
+ 11829573, 7467844, -28822128, 929275},
+ {11038231, -11582396, -27310482, -7316562, -10498527, -16307831,
+ -23479533, -9371869, -21393143, 2465074},
+ {20017163, -4323226, 27915242, 1529148, 12396362, 15675764,
+ 13817261, -9658066, 2463391, -4622140},
+ },
+ {
+ {-16358878, -12663911, -12065183, 4996454, -1256422, 1073572,
+ 9583558, 12851107, 4003896, 12673717},
+ {-1731589, -15155870, -3262930, 16143082, 19294135, 13385325,
+ 14741514, -9103726, 7903886, 2348101},
+ {24536016, -16515207, 12715592, -3862155, 1511293, 10047386,
+ -3842346, -7129159, -28377538, 10048127},
+ },
+ },
+ {
+ {
+ {-12622226, -6204820, 30718825, 2591312, -10617028, 12192840,
+ 18873298, -7297090, -32297756, 15221632},
+ {-26478122, -11103864, 11546244, -1852483, 9180880, 7656409,
+ -21343950, 2095755, 29769758, 6593415},
+ {-31994208, -2907461, 4176912, 3264766, 12538965, -868111, 26312345,
+ -6118678, 30958054, 8292160},
+ },
+ {
+ {31429822, -13959116, 29173532, 15632448, 12174511, -2760094,
+ 32808831, 3977186, 26143136, -3148876},
+ {22648901, 1402143, -22799984, 13746059, 7936347, 365344, -8668633,
+ -1674433, -3758243, -2304625},
+ {-15491917, 8012313, -2514730, -12702462, -23965846, -10254029,
+ -1612713, -1535569, -16664475, 8194478},
+ },
+ {
+ {27338066, -7507420, -7414224, 10140405, -19026427, -6589889,
+ 27277191, 8855376, 28572286, 3005164},
+ {26287124, 4821776, 25476601, -4145903, -3764513, -15788984,
+ -18008582, 1182479, -26094821, -13079595},
+ {-7171154, 3178080, 23970071, 6201893, -17195577, -4489192,
+ -21876275, -13982627, 32208683, -1198248},
+ },
+ {
+ {-16657702, 2817643, -10286362, 14811298, 6024667, 13349505,
+ -27315504, -10497842, -27672585, -11539858},
+ {15941029, -9405932, -21367050, 8062055, 31876073, -238629,
+ -15278393, -1444429, 15397331, -4130193},
+ {8934485, -13485467, -23286397, -13423241, -32446090, 14047986,
+ 31170398, -1441021, -27505566, 15087184},
+ },
+ {
+ {-18357243, -2156491, 24524913, -16677868, 15520427, -6360776,
+ -15502406, 11461896, 16788528, -5868942},
+ {-1947386, 16013773, 21750665, 3714552, -17401782, -16055433,
+ -3770287, -10323320, 31322514, -11615635},
+ {21426655, -5650218, -13648287, -5347537, -28812189, -4920970,
+ -18275391, -14621414, 13040862, -12112948},
+ },
+ {
+ {11293895, 12478086, -27136401, 15083750, -29307421, 14748872,
+ 14555558, -13417103, 1613711, 4896935},
+ {-25894883, 15323294, -8489791, -8057900, 25967126, -13425460,
+ 2825960, -4897045, -23971776, -11267415},
+ {-15924766, -5229880, -17443532, 6410664, 3622847, 10243618,
+ 20615400, 12405433, -23753030, -8436416},
+ },
+ {
+ {-7091295, 12556208, -20191352, 9025187, -17072479, 4333801,
+ 4378436, 2432030, 23097949, -566018},
+ {4565804, -16025654, 20084412, -7842817, 1724999, 189254, 24767264,
+ 10103221, -18512313, 2424778},
+ {366633, -11976806, 8173090, -6890119, 30788634, 5745705, -7168678,
+ 1344109, -3642553, 12412659},
+ },
+ {
+ {-24001791, 7690286, 14929416, -168257, -32210835, -13412986,
+ 24162697, -15326504, -3141501, 11179385},
+ {18289522, -14724954, 8056945, 16430056, -21729724, 7842514,
+ -6001441, -1486897, -18684645, -11443503},
+ {476239, 6601091, -6152790, -9723375, 17503545, -4863900, 27672959,
+ 13403813, 11052904, 5219329},
+ },
+ },
+ {
+ {
+ {20678546, -8375738, -32671898, 8849123, -5009758, 14574752,
+ 31186971, -3973730, 9014762, -8579056},
+ {-13644050, -10350239, -15962508, 5075808, -1514661, -11534600,
+ -33102500, 9160280, 8473550, -3256838},
+ {24900749, 14435722, 17209120, -15292541, -22592275, 9878983,
+ -7689309, -16335821, -24568481, 11788948},
+ },
+ {
+ {-3118155, -11395194, -13802089, 14797441, 9652448, -6845904,
+ -20037437, 10410733, -24568470, -1458691},
+ {-15659161, 16736706, -22467150, 10215878, -9097177, 7563911,
+ 11871841, -12505194, -18513325, 8464118},
+ {-23400612, 8348507, -14585951, -861714, -3950205, -6373419,
+ 14325289, 8628612, 33313881, -8370517},
+ },
+ {
+ {-20186973, -4967935, 22367356, 5271547, -1097117, -4788838,
+ -24805667, -10236854, -8940735, -5818269},
+ {-6948785, -1795212, -32625683, -16021179, 32635414, -7374245,
+ 15989197, -12838188, 28358192, -4253904},
+ {-23561781, -2799059, -32351682, -1661963, -9147719, 10429267,
+ -16637684, 4072016, -5351664, 5596589},
+ },
+ {
+ {-28236598, -3390048, 12312896, 6213178, 3117142, 16078565,
+ 29266239, 2557221, 1768301, 15373193},
+ {-7243358, -3246960, -4593467, -7553353, -127927, -912245, -1090902,
+ -4504991, -24660491, 3442910},
+ {-30210571, 5124043, 14181784, 8197961, 18964734, -11939093,
+ 22597931, 7176455, -18585478, 13365930},
+ },
+ {
+ {-7877390, -1499958, 8324673, 4690079, 6261860, 890446, 24538107,
+ -8570186, -9689599, -3031667},
+ {25008904, -10771599, -4305031, -9638010, 16265036, 15721635,
+ 683793, -11823784, 15723479, -15163481},
+ {-9660625, 12374379, -27006999, -7026148, -7724114, -12314514,
+ 11879682, 5400171, 519526, -1235876},
+ },
+ {
+ {22258397, -16332233, -7869817, 14613016, -22520255, -2950923,
+ -20353881, 7315967, 16648397, 7605640},
+ {-8081308, -8464597, -8223311, 9719710, 19259459, -15348212,
+ 23994942, -5281555, -9468848, 4763278},
+ {-21699244, 9220969, -15730624, 1084137, -25476107, -2852390,
+ 31088447, -7764523, -11356529, 728112},
+ },
+ {
+ {26047220, -11751471, -6900323, -16521798, 24092068, 9158119,
+ -4273545, -12555558, -29365436, -5498272},
+ {17510331, -322857, 5854289, 8403524, 17133918, -3112612, -28111007,
+ 12327945, 10750447, 10014012},
+ {-10312768, 3936952, 9156313, -8897683, 16498692, -994647,
+ -27481051, -666732, 3424691, 7540221},
+ },
+ {
+ {30322361, -6964110, 11361005, -4143317, 7433304, 4989748, -7071422,
+ -16317219, -9244265, 15258046},
+ {13054562, -2779497, 19155474, 469045, -12482797, 4566042, 5631406,
+ 2711395, 1062915, -5136345},
+ {-19240248, -11254599, -29509029, -7499965, -5835763, 13005411,
+ -6066489, 12194497, 32960380, 1459310},
+ },
+ },
+ {
+ {
+ {19852034, 7027924, 23669353, 10020366, 8586503, -6657907, 394197,
+ -6101885, 18638003, -11174937},
+ {31395534, 15098109, 26581030, 8030562, -16527914, -5007134,
+ 9012486, -7584354, -6643087, -5442636},
+ {-9192165, -2347377, -1997099, 4529534, 25766844, 607986, -13222,
+ 9677543, -32294889, -6456008},
+ },
+ {
+ {-2444496, -149937, 29348902, 8186665, 1873760, 12489863, -30934579,
+ -7839692, -7852844, -8138429},
+ {-15236356, -15433509, 7766470, 746860, 26346930, -10221762,
+ -27333451, 10754588, -9431476, 5203576},
+ {31834314, 14135496, -770007, 5159118, 20917671, -16768096,
+ -7467973, -7337524, 31809243, 7347066},
+ },
+ {
+ {-9606723, -11874240, 20414459, 13033986, 13716524, -11691881,
+ 19797970, -12211255, 15192876, -2087490},
+ {-12663563, -2181719, 1168162, -3804809, 26747877, -14138091,
+ 10609330, 12694420, 33473243, -13382104},
+ {33184999, 11180355, 15832085, -11385430, -1633671, 225884,
+ 15089336, -11023903, -6135662, 14480053},
+ },
+ {
+ {31308717, -5619998, 31030840, -1897099, 15674547, -6582883,
+ 5496208, 13685227, 27595050, 8737275},
+ {-20318852, -15150239, 10933843, -16178022, 8335352, -7546022,
+ -31008351, -12610604, 26498114, 66511},
+ {22644454, -8761729, -16671776, 4884562, -3105614, -13559366,
+ 30540766, -4286747, -13327787, -7515095},
+ },
+ {
+ {-28017847, 9834845, 18617207, -2681312, -3401956, -13307506,
+ 8205540, 13585437, -17127465, 15115439},
+ {23711543, -672915, 31206561, -8362711, 6164647, -9709987,
+ -33535882, -1426096, 8236921, 16492939},
+ {-23910559, -13515526, -26299483, -4503841, 25005590, -7687270,
+ 19574902, 10071562, 6708380, -6222424},
+ },
+ {
+ {2101391, -4930054, 19702731, 2367575, -15427167, 1047675, 5301017,
+ 9328700, 29955601, -11678310},
+ {3096359, 9271816, -21620864, -15521844, -14847996, -7592937,
+ -25892142, -12635595, -9917575, 6216608},
+ {-32615849, 338663, -25195611, 2510422, -29213566, -13820213,
+ 24822830, -6146567, -26767480, 7525079},
+ },
+ {
+ {-23066649, -13985623, 16133487, -7896178, -3389565, 778788,
+ -910336, -2782495, -19386633, 11994101},
+ {21691500, -13624626, -641331, -14367021, 3285881, -3483596,
+ -25064666, 9718258, -7477437, 13381418},
+ {18445390, -4202236, 14979846, 11622458, -1727110, -3582980,
+ 23111648, -6375247, 28535282, 15779576},
+ },
+ {
+ {30098053, 3089662, -9234387, 16662135, -21306940, 11308411,
+ -14068454, 12021730, 9955285, -16303356},
+ {9734894, -14576830, -7473633, -9138735, 2060392, 11313496,
+ -18426029, 9924399, 20194861, 13380996},
+ {-26378102, -7965207, -22167821, 15789297, -18055342, -6168792,
+ -1984914, 15707771, 26342023, 10146099},
+ },
+ },
+ {
+ {
+ {-26016874, -219943, 21339191, -41388, 19745256, -2878700,
+ -29637280, 2227040, 21612326, -545728},
+ {-13077387, 1184228, 23562814, -5970442, -20351244, -6348714,
+ 25764461, 12243797, -20856566, 11649658},
+ {-10031494, 11262626, 27384172, 2271902, 26947504, -15997771, 39944,
+ 6114064, 33514190, 2333242},
+ },
+ {
+ {-21433588, -12421821, 8119782, 7219913, -21830522, -9016134,
+ -6679750, -12670638, 24350578, -13450001},
+ {-4116307, -11271533, -23886186, 4843615, -30088339, 690623,
+ -31536088, -10406836, 8317860, 12352766},
+ {18200138, -14475911, -33087759, -2696619, -23702521, -9102511,
+ -23552096, -2287550, 20712163, 6719373},
+ },
+ {
+ {26656208, 6075253, -7858556, 1886072, -28344043, 4262326, 11117530,
+ -3763210, 26224235, -3297458},
+ {-17168938, -14854097, -3395676, -16369877, -19954045, 14050420,
+ 21728352, 9493610, 18620611, -16428628},
+ {-13323321, 13325349, 11432106, 5964811, 18609221, 6062965,
+ -5269471, -9725556, -30701573, -16479657},
+ },
+ {
+ {-23860538, -11233159, 26961357, 1640861, -32413112, -16737940,
+ 12248509, -5240639, 13735342, 1934062},
+ {25089769, 6742589, 17081145, -13406266, 21909293, -16067981,
+ -15136294, -3765346, -21277997, 5473616},
+ {31883677, -7961101, 1083432, -11572403, 22828471, 13290673,
+ -7125085, 12469656, 29111212, -5451014},
+ },
+ {
+ {24244947, -15050407, -26262976, 2791540, -14997599, 16666678,
+ 24367466, 6388839, -10295587, 452383},
+ {-25640782, -3417841, 5217916, 16224624, 19987036, -4082269,
+ -24236251, -5915248, 15766062, 8407814},
+ {-20406999, 13990231, 15495425, 16395525, 5377168, 15166495,
+ -8917023, -4388953, -8067909, 2276718},
+ },
+ {
+ {30157918, 12924066, -17712050, 9245753, 19895028, 3368142,
+ -23827587, 5096219, 22740376, -7303417},
+ {2041139, -14256350, 7783687, 13876377, -25946985, -13352459,
+ 24051124, 13742383, -15637599, 13295222},
+ {33338237, -8505733, 12532113, 7977527, 9106186, -1715251,
+ -17720195, -4612972, -4451357, -14669444},
+ },
+ {
+ {-20045281, 5454097, -14346548, 6447146, 28862071, 1883651,
+ -2469266, -4141880, 7770569, 9620597},
+ {23208068, 7979712, 33071466, 8149229, 1758231, -10834995, 30945528,
+ -1694323, -33502340, -14767970},
+ {1439958, -16270480, -1079989, -793782, 4625402, 10647766, -5043801,
+ 1220118, 30494170, -11440799},
+ },
+ {
+ {-5037580, -13028295, -2970559, -3061767, 15640974, -6701666,
+ -26739026, 926050, -1684339, -13333647},
+ {13908495, -3549272, 30919928, -6273825, -21521863, 7989039,
+ 9021034, 9078865, 3353509, 4033511},
+ {-29663431, -15113610, 32259991, -344482, 24295849, -12912123,
+ 23161163, 8839127, 27485041, 7356032},
+ },
+ },
+ {
+ {
+ {9661027, 705443, 11980065, -5370154, -1628543, 14661173, -6346142,
+ 2625015, 28431036, -16771834},
+ {-23839233, -8311415, -25945511, 7480958, -17681669, -8354183,
+ -22545972, 14150565, 15970762, 4099461},
+ {29262576, 16756590, 26350592, -8793563, 8529671, -11208050,
+ 13617293, -9937143, 11465739, 8317062},
+ },
+ {
+ {-25493081, -6962928, 32500200, -9419051, -23038724, -2302222,
+ 14898637, 3848455, 20969334, -5157516},
+ {-20384450, -14347713, -18336405, 13884722, -33039454, 2842114,
+ -21610826, -3649888, 11177095, 14989547},
+ {-24496721, -11716016, 16959896, 2278463, 12066309, 10137771,
+ 13515641, 2581286, -28487508, 9930240},
+ },
+ {
+ {-17751622, -2097826, 16544300, -13009300, -15914807, -14949081,
+ 18345767, -13403753, 16291481, -5314038},
+ {-33229194, 2553288, 32678213, 9875984, 8534129, 6889387, -9676774,
+ 6957617, 4368891, 9788741},
+ {16660756, 7281060, -10830758, 12911820, 20108584, -8101676,
+ -21722536, -8613148, 16250552, -11111103},
+ },
+ {
+ {-19765507, 2390526, -16551031, 14161980, 1905286, 6414907, 4689584,
+ 10604807, -30190403, 4782747},
+ {-1354539, 14736941, -7367442, -13292886, 7710542, -14155590,
+ -9981571, 4383045, 22546403, 437323},
+ {31665577, -12180464, -16186830, 1491339, -18368625, 3294682,
+ 27343084, 2786261, -30633590, -14097016},
+ },
+ {
+ {-14467279, -683715, -33374107, 7448552, 19294360, 14334329,
+ -19690631, 2355319, -19284671, -6114373},
+ {15121312, -15796162, 6377020, -6031361, -10798111, -12957845,
+ 18952177, 15496498, -29380133, 11754228},
+ {-2637277, -13483075, 8488727, -14303896, 12728761, -1622493,
+ 7141596, 11724556, 22761615, -10134141},
+ },
+ {
+ {16918416, 11729663, -18083579, 3022987, -31015732, -13339659,
+ -28741185, -12227393, 32851222, 11717399},
+ {11166634, 7338049, -6722523, 4531520, -29468672, -7302055,
+ 31474879, 3483633, -1193175, -4030831},
+ {-185635, 9921305, 31456609, -13536438, -12013818, 13348923,
+ 33142652, 6546660, -19985279, -3948376},
+ },
+ {
+ {-32460596, 11266712, -11197107, -7899103, 31703694, 3855903,
+ -8537131, -12833048, -30772034, -15486313},
+ {-18006477, 12709068, 3991746, -6479188, -21491523, -10550425,
+ -31135347, -16049879, 10928917, 3011958},
+ {-6957757, -15594337, 31696059, 334240, 29576716, 14796075,
+ -30831056, -12805180, 18008031, 10258577},
+ },
+ {
+ {-22448644, 15655569, 7018479, -4410003, -30314266, -1201591,
+ -1853465, 1367120, 25127874, 6671743},
+ {29701166, -14373934, -10878120, 9279288, -17568, 13127210,
+ 21382910, 11042292, 25838796, 4642684},
+ {-20430234, 14955537, -24126347, 8124619, -5369288, -5990470,
+ 30468147, -13900640, 18423289, 4177476},
+ },
+ },
+};
+
+static uint8_t negative(signed char b) {
+ uint32_t x = b;
+ x >>= 31; /* 1: yes; 0: no */
+ return x;
+}
+
+static void table_select(ge_precomp *t, int pos, signed char b) {
+ ge_precomp minust;
+ uint8_t bnegative = negative(b);
+ uint8_t babs = b - ((uint8_t)((-bnegative) & b) << 1);
+
+ ge_precomp_0(t);
+ cmov(t, &k25519Precomp[pos][0], equal(babs, 1));
+ cmov(t, &k25519Precomp[pos][1], equal(babs, 2));
+ cmov(t, &k25519Precomp[pos][2], equal(babs, 3));
+ cmov(t, &k25519Precomp[pos][3], equal(babs, 4));
+ cmov(t, &k25519Precomp[pos][4], equal(babs, 5));
+ cmov(t, &k25519Precomp[pos][5], equal(babs, 6));
+ cmov(t, &k25519Precomp[pos][6], equal(babs, 7));
+ cmov(t, &k25519Precomp[pos][7], equal(babs, 8));
+ fe_copy(minust.yplusx, t->yminusx);
+ fe_copy(minust.yminusx, t->yplusx);
+ fe_neg(minust.xy2d, t->xy2d);
+ cmov(t, &minust, bnegative);
+}
+
+/* h = a * B
+ * where a = a[0]+256*a[1]+...+256^31 a[31]
+ * B is the Ed25519 base point (x,4/5) with x positive.
+ *
+ * Preconditions:
+ * a[31] <= 127 */
+void x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t *a) {
+ signed char e[64];
+ signed char carry;
+ ge_p1p1 r;
+ ge_p2 s;
+ ge_precomp t;
+ int i;
+
+ for (i = 0; i < 32; ++i) {
+ e[2 * i + 0] = (a[i] >> 0) & 15;
+ e[2 * i + 1] = (a[i] >> 4) & 15;
+ }
+ /* each e[i] is between 0 and 15 */
+ /* e[63] is between 0 and 7 */
+
+ carry = 0;
+ for (i = 0; i < 63; ++i) {
+ e[i] += carry;
+ carry = e[i] + 8;
+ carry >>= 4;
+ e[i] -= carry << 4;
+ }
+ e[63] += carry;
+ /* each e[i] is between -8 and 8 */
+
+ ge_p3_0(h);
+ for (i = 1; i < 64; i += 2) {
+ table_select(&t, i / 2, e[i]);
+ ge_madd(&r, h, &t);
+ x25519_ge_p1p1_to_p3(h, &r);
+ }
+
+ ge_p3_dbl(&r, h);
+ x25519_ge_p1p1_to_p2(&s, &r);
+ ge_p2_dbl(&r, &s);
+ x25519_ge_p1p1_to_p2(&s, &r);
+ ge_p2_dbl(&r, &s);
+ x25519_ge_p1p1_to_p2(&s, &r);
+ ge_p2_dbl(&r, &s);
+ x25519_ge_p1p1_to_p3(h, &r);
+
+ for (i = 0; i < 64; i += 2) {
+ table_select(&t, i / 2, e[i]);
+ ge_madd(&r, h, &t);
+ x25519_ge_p1p1_to_p3(h, &r);
+ }
+}
+
+#endif
+
+static void cmov_cached(ge_cached *t, ge_cached *u, uint8_t b) {
+ fe_cmov(t->YplusX, u->YplusX, b);
+ fe_cmov(t->YminusX, u->YminusX, b);
+ fe_cmov(t->Z, u->Z, b);
+ fe_cmov(t->T2d, u->T2d, b);
+}
+
+/* r = scalar * A.
+ * where a = a[0]+256*a[1]+...+256^31 a[31]. */
+void x25519_ge_scalarmult(ge_p2 *r, const uint8_t *scalar, const ge_p3 *A) {
+ ge_p2 Ai_p2[8];
+ ge_cached Ai[16];
+ ge_p1p1 t;
+
+ ge_cached_0(&Ai[0]);
+ x25519_ge_p3_to_cached(&Ai[1], A);
+ ge_p3_to_p2(&Ai_p2[1], A);
+
+ unsigned i;
+ for (i = 2; i < 16; i += 2) {
+ ge_p2_dbl(&t, &Ai_p2[i / 2]);
+ ge_p1p1_to_cached(&Ai[i], &t);
+ if (i < 8) {
+ x25519_ge_p1p1_to_p2(&Ai_p2[i], &t);
+ }
+ x25519_ge_add(&t, A, &Ai[i]);
+ ge_p1p1_to_cached(&Ai[i + 1], &t);
+ if (i < 7) {
+ x25519_ge_p1p1_to_p2(&Ai_p2[i + 1], &t);
+ }
+ }
+
+ ge_p2_0(r);
+ ge_p3 u;
+
+ for (i = 0; i < 256; i += 4) {
+ ge_p2_dbl(&t, r);
+ x25519_ge_p1p1_to_p2(r, &t);
+ ge_p2_dbl(&t, r);
+ x25519_ge_p1p1_to_p2(r, &t);
+ ge_p2_dbl(&t, r);
+ x25519_ge_p1p1_to_p2(r, &t);
+ ge_p2_dbl(&t, r);
+ x25519_ge_p1p1_to_p3(&u, &t);
+
+ uint8_t index = scalar[31 - i/8];
+ index >>= 4 - (i & 4);
+ index &= 0xf;
+
+ unsigned j;
+ ge_cached selected;
+ ge_cached_0(&selected);
+ for (j = 0; j < 16; j++) {
+ cmov_cached(&selected, &Ai[j], equal(j, index));
+ }
+
+ x25519_ge_add(&t, &u, &selected);
+ x25519_ge_p1p1_to_p2(r, &t);
+ }
+}
+
+#ifdef ED25519
+static void slide(signed char *r, const uint8_t *a) {
+ int i;
+ int b;
+ int k;
+
+ for (i = 0; i < 256; ++i) {
+ r[i] = 1 & (a[i >> 3] >> (i & 7));
+ }
+
+ for (i = 0; i < 256; ++i) {
+ if (r[i]) {
+ for (b = 1; b <= 6 && i + b < 256; ++b) {
+ if (r[i + b]) {
+ if (r[i] + (r[i + b] << b) <= 15) {
+ r[i] += r[i + b] << b;
+ r[i + b] = 0;
+ } else if (r[i] - (r[i + b] << b) >= -15) {
+ r[i] -= r[i + b] << b;
+ for (k = i + b; k < 256; ++k) {
+ if (!r[k]) {
+ r[k] = 1;
+ break;
+ }
+ r[k] = 0;
+ }
+ } else {
+ break;
+ }
+ }
+ }
+ }
+ }
+}
+
+static const ge_precomp Bi[8] = {
+ {
+ {25967493, -14356035, 29566456, 3660896, -12694345, 4014787, 27544626,
+ -11754271, -6079156, 2047605},
+ {-12545711, 934262, -2722910, 3049990, -727428, 9406986, 12720692,
+ 5043384, 19500929, -15469378},
+ {-8738181, 4489570, 9688441, -14785194, 10184609, -12363380, 29287919,
+ 11864899, -24514362, -4438546},
+ },
+ {
+ {15636291, -9688557, 24204773, -7912398, 616977, -16685262, 27787600,
+ -14772189, 28944400, -1550024},
+ {16568933, 4717097, -11556148, -1102322, 15682896, -11807043, 16354577,
+ -11775962, 7689662, 11199574},
+ {30464156, -5976125, -11779434, -15670865, 23220365, 15915852, 7512774,
+ 10017326, -17749093, -9920357},
+ },
+ {
+ {10861363, 11473154, 27284546, 1981175, -30064349, 12577861, 32867885,
+ 14515107, -15438304, 10819380},
+ {4708026, 6336745, 20377586, 9066809, -11272109, 6594696, -25653668,
+ 12483688, -12668491, 5581306},
+ {19563160, 16186464, -29386857, 4097519, 10237984, -4348115, 28542350,
+ 13850243, -23678021, -15815942},
+ },
+ {
+ {5153746, 9909285, 1723747, -2777874, 30523605, 5516873, 19480852,
+ 5230134, -23952439, -15175766},
+ {-30269007, -3463509, 7665486, 10083793, 28475525, 1649722, 20654025,
+ 16520125, 30598449, 7715701},
+ {28881845, 14381568, 9657904, 3680757, -20181635, 7843316, -31400660,
+ 1370708, 29794553, -1409300},
+ },
+ {
+ {-22518993, -6692182, 14201702, -8745502, -23510406, 8844726, 18474211,
+ -1361450, -13062696, 13821877},
+ {-6455177, -7839871, 3374702, -4740862, -27098617, -10571707, 31655028,
+ -7212327, 18853322, -14220951},
+ {4566830, -12963868, -28974889, -12240689, -7602672, -2830569, -8514358,
+ -10431137, 2207753, -3209784},
+ },
+ {
+ {-25154831, -4185821, 29681144, 7868801, -6854661, -9423865, -12437364,
+ -663000, -31111463, -16132436},
+ {25576264, -2703214, 7349804, -11814844, 16472782, 9300885, 3844789,
+ 15725684, 171356, 6466918},
+ {23103977, 13316479, 9739013, -16149481, 817875, -15038942, 8965339,
+ -14088058, -30714912, 16193877},
+ },
+ {
+ {-33521811, 3180713, -2394130, 14003687, -16903474, -16270840, 17238398,
+ 4729455, -18074513, 9256800},
+ {-25182317, -4174131, 32336398, 5036987, -21236817, 11360617, 22616405,
+ 9761698, -19827198, 630305},
+ {-13720693, 2639453, -24237460, -7406481, 9494427, -5774029, -6554551,
+ -15960994, -2449256, -14291300},
+ },
+ {
+ {-3151181, -5046075, 9282714, 6866145, -31907062, -863023, -18940575,
+ 15033784, 25105118, -7894876},
+ {-24326370, 15950226, -31801215, -14592823, -11662737, -5090925,
+ 1573892, -2625887, 2198790, -15804619},
+ {-3099351, 10324967, -2241613, 7453183, -5446979, -2735503, -13812022,
+ -16236442, -32461234, -12290683},
+ },
+};
+
+/* r = a * A + b * B
+ * where a = a[0]+256*a[1]+...+256^31 a[31].
+ * and b = b[0]+256*b[1]+...+256^31 b[31].
+ * B is the Ed25519 base point (x,4/5) with x positive. */
+static void
+ge_double_scalarmult_vartime(ge_p2 *r, const uint8_t *a,
+ const ge_p3 *A, const uint8_t *b) {
+ signed char aslide[256];
+ signed char bslide[256];
+ ge_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */
+ ge_p1p1 t;
+ ge_p3 u;
+ ge_p3 A2;
+ int i;
+
+ slide(aslide, a);
+ slide(bslide, b);
+
+ x25519_ge_p3_to_cached(&Ai[0], A);
+ ge_p3_dbl(&t, A);
+ x25519_ge_p1p1_to_p3(&A2, &t);
+ x25519_ge_add(&t, &A2, &Ai[0]);
+ x25519_ge_p1p1_to_p3(&u, &t);
+ x25519_ge_p3_to_cached(&Ai[1], &u);
+ x25519_ge_add(&t, &A2, &Ai[1]);
+ x25519_ge_p1p1_to_p3(&u, &t);
+ x25519_ge_p3_to_cached(&Ai[2], &u);
+ x25519_ge_add(&t, &A2, &Ai[2]);
+ x25519_ge_p1p1_to_p3(&u, &t);
+ x25519_ge_p3_to_cached(&Ai[3], &u);
+ x25519_ge_add(&t, &A2, &Ai[3]);
+ x25519_ge_p1p1_to_p3(&u, &t);
+ x25519_ge_p3_to_cached(&Ai[4], &u);
+ x25519_ge_add(&t, &A2, &Ai[4]);
+ x25519_ge_p1p1_to_p3(&u, &t);
+ x25519_ge_p3_to_cached(&Ai[5], &u);
+ x25519_ge_add(&t, &A2, &Ai[5]);
+ x25519_ge_p1p1_to_p3(&u, &t);
+ x25519_ge_p3_to_cached(&Ai[6], &u);
+ x25519_ge_add(&t, &A2, &Ai[6]);
+ x25519_ge_p1p1_to_p3(&u, &t);
+ x25519_ge_p3_to_cached(&Ai[7], &u);
+
+ ge_p2_0(r);
+
+ for (i = 255; i >= 0; --i) {
+ if (aslide[i] || bslide[i]) {
+ break;
+ }
+ }
+
+ for (; i >= 0; --i) {
+ ge_p2_dbl(&t, r);
+
+ if (aslide[i] > 0) {
+ x25519_ge_p1p1_to_p3(&u, &t);
+ x25519_ge_add(&t, &u, &Ai[aslide[i] / 2]);
+ } else if (aslide[i] < 0) {
+ x25519_ge_p1p1_to_p3(&u, &t);
+ x25519_ge_sub(&t, &u, &Ai[(-aslide[i]) / 2]);
+ }
+
+ if (bslide[i] > 0) {
+ x25519_ge_p1p1_to_p3(&u, &t);
+ ge_madd(&t, &u, &Bi[bslide[i] / 2]);
+ } else if (bslide[i] < 0) {
+ x25519_ge_p1p1_to_p3(&u, &t);
+ ge_msub(&t, &u, &Bi[(-bslide[i]) / 2]);
+ }
+
+ x25519_ge_p1p1_to_p2(r, &t);
+ }
+}
+#endif
+
+/* The set of scalars is \Z/l
+ * where l = 2^252 + 27742317777372353535851937790883648493. */
+
+/* Input:
+ * s[0]+256*s[1]+...+256^63*s[63] = s
+ *
+ * Output:
+ * s[0]+256*s[1]+...+256^31*s[31] = s mod l
+ * where l = 2^252 + 27742317777372353535851937790883648493.
+ * Overwrites s in place. */
+void
+x25519_sc_reduce(uint8_t *s) {
+ int64_t s0 = 2097151 & load_3(s);
+ int64_t s1 = 2097151 & (load_4(s + 2) >> 5);
+ int64_t s2 = 2097151 & (load_3(s + 5) >> 2);
+ int64_t s3 = 2097151 & (load_4(s + 7) >> 7);
+ int64_t s4 = 2097151 & (load_4(s + 10) >> 4);
+ int64_t s5 = 2097151 & (load_3(s + 13) >> 1);
+ int64_t s6 = 2097151 & (load_4(s + 15) >> 6);
+ int64_t s7 = 2097151 & (load_3(s + 18) >> 3);
+ int64_t s8 = 2097151 & load_3(s + 21);
+ int64_t s9 = 2097151 & (load_4(s + 23) >> 5);
+ int64_t s10 = 2097151 & (load_3(s + 26) >> 2);
+ int64_t s11 = 2097151 & (load_4(s + 28) >> 7);
+ int64_t s12 = 2097151 & (load_4(s + 31) >> 4);
+ int64_t s13 = 2097151 & (load_3(s + 34) >> 1);
+ int64_t s14 = 2097151 & (load_4(s + 36) >> 6);
+ int64_t s15 = 2097151 & (load_3(s + 39) >> 3);
+ int64_t s16 = 2097151 & load_3(s + 42);
+ int64_t s17 = 2097151 & (load_4(s + 44) >> 5);
+ int64_t s18 = 2097151 & (load_3(s + 47) >> 2);
+ int64_t s19 = 2097151 & (load_4(s + 49) >> 7);
+ int64_t s20 = 2097151 & (load_4(s + 52) >> 4);
+ int64_t s21 = 2097151 & (load_3(s + 55) >> 1);
+ int64_t s22 = 2097151 & (load_4(s + 57) >> 6);
+ int64_t s23 = (load_4(s + 60) >> 3);
+ int64_t carry0;
+ int64_t carry1;
+ int64_t carry2;
+ int64_t carry3;
+ int64_t carry4;
+ int64_t carry5;
+ int64_t carry6;
+ int64_t carry7;
+ int64_t carry8;
+ int64_t carry9;
+ int64_t carry10;
+ int64_t carry11;
+ int64_t carry12;
+ int64_t carry13;
+ int64_t carry14;
+ int64_t carry15;
+ int64_t carry16;
+
+ s11 += s23 * 666643;
+ s12 += s23 * 470296;
+ s13 += s23 * 654183;
+ s14 -= s23 * 997805;
+ s15 += s23 * 136657;
+ s16 -= s23 * 683901;
+ s23 = 0;
+
+ s10 += s22 * 666643;
+ s11 += s22 * 470296;
+ s12 += s22 * 654183;
+ s13 -= s22 * 997805;
+ s14 += s22 * 136657;
+ s15 -= s22 * 683901;
+ s22 = 0;
+
+ s9 += s21 * 666643;
+ s10 += s21 * 470296;
+ s11 += s21 * 654183;
+ s12 -= s21 * 997805;
+ s13 += s21 * 136657;
+ s14 -= s21 * 683901;
+ s21 = 0;
+
+ s8 += s20 * 666643;
+ s9 += s20 * 470296;
+ s10 += s20 * 654183;
+ s11 -= s20 * 997805;
+ s12 += s20 * 136657;
+ s13 -= s20 * 683901;
+ s20 = 0;
+
+ s7 += s19 * 666643;
+ s8 += s19 * 470296;
+ s9 += s19 * 654183;
+ s10 -= s19 * 997805;
+ s11 += s19 * 136657;
+ s12 -= s19 * 683901;
+ s19 = 0;
+
+ s6 += s18 * 666643;
+ s7 += s18 * 470296;
+ s8 += s18 * 654183;
+ s9 -= s18 * 997805;
+ s10 += s18 * 136657;
+ s11 -= s18 * 683901;
+ s18 = 0;
+
+ carry6 = (s6 + (1 << 20)) >> 21;
+ s7 += carry6;
+ s6 -= carry6 << 21;
+ carry8 = (s8 + (1 << 20)) >> 21;
+ s9 += carry8;
+ s8 -= carry8 << 21;
+ carry10 = (s10 + (1 << 20)) >> 21;
+ s11 += carry10;
+ s10 -= carry10 << 21;
+ carry12 = (s12 + (1 << 20)) >> 21;
+ s13 += carry12;
+ s12 -= carry12 << 21;
+ carry14 = (s14 + (1 << 20)) >> 21;
+ s15 += carry14;
+ s14 -= carry14 << 21;
+ carry16 = (s16 + (1 << 20)) >> 21;
+ s17 += carry16;
+ s16 -= carry16 << 21;
+
+ carry7 = (s7 + (1 << 20)) >> 21;
+ s8 += carry7;
+ s7 -= carry7 << 21;
+ carry9 = (s9 + (1 << 20)) >> 21;
+ s10 += carry9;
+ s9 -= carry9 << 21;
+ carry11 = (s11 + (1 << 20)) >> 21;
+ s12 += carry11;
+ s11 -= carry11 << 21;
+ carry13 = (s13 + (1 << 20)) >> 21;
+ s14 += carry13;
+ s13 -= carry13 << 21;
+ carry15 = (s15 + (1 << 20)) >> 21;
+ s16 += carry15;
+ s15 -= carry15 << 21;
+
+ s5 += s17 * 666643;
+ s6 += s17 * 470296;
+ s7 += s17 * 654183;
+ s8 -= s17 * 997805;
+ s9 += s17 * 136657;
+ s10 -= s17 * 683901;
+ s17 = 0;
+
+ s4 += s16 * 666643;
+ s5 += s16 * 470296;
+ s6 += s16 * 654183;
+ s7 -= s16 * 997805;
+ s8 += s16 * 136657;
+ s9 -= s16 * 683901;
+ s16 = 0;
+
+ s3 += s15 * 666643;
+ s4 += s15 * 470296;
+ s5 += s15 * 654183;
+ s6 -= s15 * 997805;
+ s7 += s15 * 136657;
+ s8 -= s15 * 683901;
+ s15 = 0;
+
+ s2 += s14 * 666643;
+ s3 += s14 * 470296;
+ s4 += s14 * 654183;
+ s5 -= s14 * 997805;
+ s6 += s14 * 136657;
+ s7 -= s14 * 683901;
+ s14 = 0;
+
+ s1 += s13 * 666643;
+ s2 += s13 * 470296;
+ s3 += s13 * 654183;
+ s4 -= s13 * 997805;
+ s5 += s13 * 136657;
+ s6 -= s13 * 683901;
+ s13 = 0;
+
+ s0 += s12 * 666643;
+ s1 += s12 * 470296;
+ s2 += s12 * 654183;
+ s3 -= s12 * 997805;
+ s4 += s12 * 136657;
+ s5 -= s12 * 683901;
+ s12 = 0;
+
+ carry0 = (s0 + (1 << 20)) >> 21;
+ s1 += carry0;
+ s0 -= carry0 << 21;
+ carry2 = (s2 + (1 << 20)) >> 21;
+ s3 += carry2;
+ s2 -= carry2 << 21;
+ carry4 = (s4 + (1 << 20)) >> 21;
+ s5 += carry4;
+ s4 -= carry4 << 21;
+ carry6 = (s6 + (1 << 20)) >> 21;
+ s7 += carry6;
+ s6 -= carry6 << 21;
+ carry8 = (s8 + (1 << 20)) >> 21;
+ s9 += carry8;
+ s8 -= carry8 << 21;
+ carry10 = (s10 + (1 << 20)) >> 21;
+ s11 += carry10;
+ s10 -= carry10 << 21;
+
+ carry1 = (s1 + (1 << 20)) >> 21;
+ s2 += carry1;
+ s1 -= carry1 << 21;
+ carry3 = (s3 + (1 << 20)) >> 21;
+ s4 += carry3;
+ s3 -= carry3 << 21;
+ carry5 = (s5 + (1 << 20)) >> 21;
+ s6 += carry5;
+ s5 -= carry5 << 21;
+ carry7 = (s7 + (1 << 20)) >> 21;
+ s8 += carry7;
+ s7 -= carry7 << 21;
+ carry9 = (s9 + (1 << 20)) >> 21;
+ s10 += carry9;
+ s9 -= carry9 << 21;
+ carry11 = (s11 + (1 << 20)) >> 21;
+ s12 += carry11;
+ s11 -= carry11 << 21;
+
+ s0 += s12 * 666643;
+ s1 += s12 * 470296;
+ s2 += s12 * 654183;
+ s3 -= s12 * 997805;
+ s4 += s12 * 136657;
+ s5 -= s12 * 683901;
+ s12 = 0;
+
+ carry0 = s0 >> 21;
+ s1 += carry0;
+ s0 -= carry0 << 21;
+ carry1 = s1 >> 21;
+ s2 += carry1;
+ s1 -= carry1 << 21;
+ carry2 = s2 >> 21;
+ s3 += carry2;
+ s2 -= carry2 << 21;
+ carry3 = s3 >> 21;
+ s4 += carry3;
+ s3 -= carry3 << 21;
+ carry4 = s4 >> 21;
+ s5 += carry4;
+ s4 -= carry4 << 21;
+ carry5 = s5 >> 21;
+ s6 += carry5;
+ s5 -= carry5 << 21;
+ carry6 = s6 >> 21;
+ s7 += carry6;
+ s6 -= carry6 << 21;
+ carry7 = s7 >> 21;
+ s8 += carry7;
+ s7 -= carry7 << 21;
+ carry8 = s8 >> 21;
+ s9 += carry8;
+ s8 -= carry8 << 21;
+ carry9 = s9 >> 21;
+ s10 += carry9;
+ s9 -= carry9 << 21;
+ carry10 = s10 >> 21;
+ s11 += carry10;
+ s10 -= carry10 << 21;
+ carry11 = s11 >> 21;
+ s12 += carry11;
+ s11 -= carry11 << 21;
+
+ s0 += s12 * 666643;
+ s1 += s12 * 470296;
+ s2 += s12 * 654183;
+ s3 -= s12 * 997805;
+ s4 += s12 * 136657;
+ s5 -= s12 * 683901;
+ s12 = 0;
+
+ carry0 = s0 >> 21;
+ s1 += carry0;
+ s0 -= carry0 << 21;
+ carry1 = s1 >> 21;
+ s2 += carry1;
+ s1 -= carry1 << 21;
+ carry2 = s2 >> 21;
+ s3 += carry2;
+ s2 -= carry2 << 21;
+ carry3 = s3 >> 21;
+ s4 += carry3;
+ s3 -= carry3 << 21;
+ carry4 = s4 >> 21;
+ s5 += carry4;
+ s4 -= carry4 << 21;
+ carry5 = s5 >> 21;
+ s6 += carry5;
+ s5 -= carry5 << 21;
+ carry6 = s6 >> 21;
+ s7 += carry6;
+ s6 -= carry6 << 21;
+ carry7 = s7 >> 21;
+ s8 += carry7;
+ s7 -= carry7 << 21;
+ carry8 = s8 >> 21;
+ s9 += carry8;
+ s8 -= carry8 << 21;
+ carry9 = s9 >> 21;
+ s10 += carry9;
+ s9 -= carry9 << 21;
+ carry10 = s10 >> 21;
+ s11 += carry10;
+ s10 -= carry10 << 21;
+
+ s[0] = s0 >> 0;
+ s[1] = s0 >> 8;
+ s[2] = (s0 >> 16) | (s1 << 5);
+ s[3] = s1 >> 3;
+ s[4] = s1 >> 11;
+ s[5] = (s1 >> 19) | (s2 << 2);
+ s[6] = s2 >> 6;
+ s[7] = (s2 >> 14) | (s3 << 7);
+ s[8] = s3 >> 1;
+ s[9] = s3 >> 9;
+ s[10] = (s3 >> 17) | (s4 << 4);
+ s[11] = s4 >> 4;
+ s[12] = s4 >> 12;
+ s[13] = (s4 >> 20) | (s5 << 1);
+ s[14] = s5 >> 7;
+ s[15] = (s5 >> 15) | (s6 << 6);
+ s[16] = s6 >> 2;
+ s[17] = s6 >> 10;
+ s[18] = (s6 >> 18) | (s7 << 3);
+ s[19] = s7 >> 5;
+ s[20] = s7 >> 13;
+ s[21] = s8 >> 0;
+ s[22] = s8 >> 8;
+ s[23] = (s8 >> 16) | (s9 << 5);
+ s[24] = s9 >> 3;
+ s[25] = s9 >> 11;
+ s[26] = (s9 >> 19) | (s10 << 2);
+ s[27] = s10 >> 6;
+ s[28] = (s10 >> 14) | (s11 << 7);
+ s[29] = s11 >> 1;
+ s[30] = s11 >> 9;
+ s[31] = s11 >> 17;
+}
+
+#ifdef ED25519
+/* Input:
+ * a[0]+256*a[1]+...+256^31*a[31] = a
+ * b[0]+256*b[1]+...+256^31*b[31] = b
+ * c[0]+256*c[1]+...+256^31*c[31] = c
+ *
+ * Output:
+ * s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l
+ * where l = 2^252 + 27742317777372353535851937790883648493. */
+static void
+sc_muladd(uint8_t *s, const uint8_t *a, const uint8_t *b,
+ const uint8_t *c)
+{
+ int64_t a0 = 2097151 & load_3(a);
+ int64_t a1 = 2097151 & (load_4(a + 2) >> 5);
+ int64_t a2 = 2097151 & (load_3(a + 5) >> 2);
+ int64_t a3 = 2097151 & (load_4(a + 7) >> 7);
+ int64_t a4 = 2097151 & (load_4(a + 10) >> 4);
+ int64_t a5 = 2097151 & (load_3(a + 13) >> 1);
+ int64_t a6 = 2097151 & (load_4(a + 15) >> 6);
+ int64_t a7 = 2097151 & (load_3(a + 18) >> 3);
+ int64_t a8 = 2097151 & load_3(a + 21);
+ int64_t a9 = 2097151 & (load_4(a + 23) >> 5);
+ int64_t a10 = 2097151 & (load_3(a + 26) >> 2);
+ int64_t a11 = (load_4(a + 28) >> 7);
+ int64_t b0 = 2097151 & load_3(b);
+ int64_t b1 = 2097151 & (load_4(b + 2) >> 5);
+ int64_t b2 = 2097151 & (load_3(b + 5) >> 2);
+ int64_t b3 = 2097151 & (load_4(b + 7) >> 7);
+ int64_t b4 = 2097151 & (load_4(b + 10) >> 4);
+ int64_t b5 = 2097151 & (load_3(b + 13) >> 1);
+ int64_t b6 = 2097151 & (load_4(b + 15) >> 6);
+ int64_t b7 = 2097151 & (load_3(b + 18) >> 3);
+ int64_t b8 = 2097151 & load_3(b + 21);
+ int64_t b9 = 2097151 & (load_4(b + 23) >> 5);
+ int64_t b10 = 2097151 & (load_3(b + 26) >> 2);
+ int64_t b11 = (load_4(b + 28) >> 7);
+ int64_t c0 = 2097151 & load_3(c);
+ int64_t c1 = 2097151 & (load_4(c + 2) >> 5);
+ int64_t c2 = 2097151 & (load_3(c + 5) >> 2);
+ int64_t c3 = 2097151 & (load_4(c + 7) >> 7);
+ int64_t c4 = 2097151 & (load_4(c + 10) >> 4);
+ int64_t c5 = 2097151 & (load_3(c + 13) >> 1);
+ int64_t c6 = 2097151 & (load_4(c + 15) >> 6);
+ int64_t c7 = 2097151 & (load_3(c + 18) >> 3);
+ int64_t c8 = 2097151 & load_3(c + 21);
+ int64_t c9 = 2097151 & (load_4(c + 23) >> 5);
+ int64_t c10 = 2097151 & (load_3(c + 26) >> 2);
+ int64_t c11 = (load_4(c + 28) >> 7);
+ int64_t s0;
+ int64_t s1;
+ int64_t s2;
+ int64_t s3;
+ int64_t s4;
+ int64_t s5;
+ int64_t s6;
+ int64_t s7;
+ int64_t s8;
+ int64_t s9;
+ int64_t s10;
+ int64_t s11;
+ int64_t s12;
+ int64_t s13;
+ int64_t s14;
+ int64_t s15;
+ int64_t s16;
+ int64_t s17;
+ int64_t s18;
+ int64_t s19;
+ int64_t s20;
+ int64_t s21;
+ int64_t s22;
+ int64_t s23;
+ int64_t carry0;
+ int64_t carry1;
+ int64_t carry2;
+ int64_t carry3;
+ int64_t carry4;
+ int64_t carry5;
+ int64_t carry6;
+ int64_t carry7;
+ int64_t carry8;
+ int64_t carry9;
+ int64_t carry10;
+ int64_t carry11;
+ int64_t carry12;
+ int64_t carry13;
+ int64_t carry14;
+ int64_t carry15;
+ int64_t carry16;
+ int64_t carry17;
+ int64_t carry18;
+ int64_t carry19;
+ int64_t carry20;
+ int64_t carry21;
+ int64_t carry22;
+
+ s0 = c0 + a0 * b0;
+ s1 = c1 + a0 * b1 + a1 * b0;
+ s2 = c2 + a0 * b2 + a1 * b1 + a2 * b0;
+ s3 = c3 + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0;
+ s4 = c4 + a0 * b4 + a1 * b3 + a2 * b2 + a3 * b1 + a4 * b0;
+ s5 = c5 + a0 * b5 + a1 * b4 + a2 * b3 + a3 * b2 + a4 * b1 + a5 * b0;
+ s6 = c6 + a0 * b6 + a1 * b5 + a2 * b4 + a3 * b3 + a4 * b2 + a5 * b1 + a6 * b0;
+ s7 = c7 + a0 * b7 + a1 * b6 + a2 * b5 + a3 * b4 + a4 * b3 + a5 * b2 +
+ a6 * b1 + a7 * b0;
+ s8 = c8 + a0 * b8 + a1 * b7 + a2 * b6 + a3 * b5 + a4 * b4 + a5 * b3 +
+ a6 * b2 + a7 * b1 + a8 * b0;
+ s9 = c9 + a0 * b9 + a1 * b8 + a2 * b7 + a3 * b6 + a4 * b5 + a5 * b4 +
+ a6 * b3 + a7 * b2 + a8 * b1 + a9 * b0;
+ s10 = c10 + a0 * b10 + a1 * b9 + a2 * b8 + a3 * b7 + a4 * b6 + a5 * b5 +
+ a6 * b4 + a7 * b3 + a8 * b2 + a9 * b1 + a10 * b0;
+ s11 = c11 + a0 * b11 + a1 * b10 + a2 * b9 + a3 * b8 + a4 * b7 + a5 * b6 +
+ a6 * b5 + a7 * b4 + a8 * b3 + a9 * b2 + a10 * b1 + a11 * b0;
+ s12 = a1 * b11 + a2 * b10 + a3 * b9 + a4 * b8 + a5 * b7 + a6 * b6 + a7 * b5 +
+ a8 * b4 + a9 * b3 + a10 * b2 + a11 * b1;
+ s13 = a2 * b11 + a3 * b10 + a4 * b9 + a5 * b8 + a6 * b7 + a7 * b6 + a8 * b5 +
+ a9 * b4 + a10 * b3 + a11 * b2;
+ s14 = a3 * b11 + a4 * b10 + a5 * b9 + a6 * b8 + a7 * b7 + a8 * b6 + a9 * b5 +
+ a10 * b4 + a11 * b3;
+ s15 = a4 * b11 + a5 * b10 + a6 * b9 + a7 * b8 + a8 * b7 + a9 * b6 + a10 * b5 +
+ a11 * b4;
+ s16 = a5 * b11 + a6 * b10 + a7 * b9 + a8 * b8 + a9 * b7 + a10 * b6 + a11 * b5;
+ s17 = a6 * b11 + a7 * b10 + a8 * b9 + a9 * b8 + a10 * b7 + a11 * b6;
+ s18 = a7 * b11 + a8 * b10 + a9 * b9 + a10 * b8 + a11 * b7;
+ s19 = a8 * b11 + a9 * b10 + a10 * b9 + a11 * b8;
+ s20 = a9 * b11 + a10 * b10 + a11 * b9;
+ s21 = a10 * b11 + a11 * b10;
+ s22 = a11 * b11;
+ s23 = 0;
+
+ carry0 = (s0 + (1 << 20)) >> 21;
+ s1 += carry0;
+ s0 -= carry0 << 21;
+ carry2 = (s2 + (1 << 20)) >> 21;
+ s3 += carry2;
+ s2 -= carry2 << 21;
+ carry4 = (s4 + (1 << 20)) >> 21;
+ s5 += carry4;
+ s4 -= carry4 << 21;
+ carry6 = (s6 + (1 << 20)) >> 21;
+ s7 += carry6;
+ s6 -= carry6 << 21;
+ carry8 = (s8 + (1 << 20)) >> 21;
+ s9 += carry8;
+ s8 -= carry8 << 21;
+ carry10 = (s10 + (1 << 20)) >> 21;
+ s11 += carry10;
+ s10 -= carry10 << 21;
+ carry12 = (s12 + (1 << 20)) >> 21;
+ s13 += carry12;
+ s12 -= carry12 << 21;
+ carry14 = (s14 + (1 << 20)) >> 21;
+ s15 += carry14;
+ s14 -= carry14 << 21;
+ carry16 = (s16 + (1 << 20)) >> 21;
+ s17 += carry16;
+ s16 -= carry16 << 21;
+ carry18 = (s18 + (1 << 20)) >> 21;
+ s19 += carry18;
+ s18 -= carry18 << 21;
+ carry20 = (s20 + (1 << 20)) >> 21;
+ s21 += carry20;
+ s20 -= carry20 << 21;
+ carry22 = (s22 + (1 << 20)) >> 21;
+ s23 += carry22;
+ s22 -= carry22 << 21;
+
+ carry1 = (s1 + (1 << 20)) >> 21;
+ s2 += carry1;
+ s1 -= carry1 << 21;
+ carry3 = (s3 + (1 << 20)) >> 21;
+ s4 += carry3;
+ s3 -= carry3 << 21;
+ carry5 = (s5 + (1 << 20)) >> 21;
+ s6 += carry5;
+ s5 -= carry5 << 21;
+ carry7 = (s7 + (1 << 20)) >> 21;
+ s8 += carry7;
+ s7 -= carry7 << 21;
+ carry9 = (s9 + (1 << 20)) >> 21;
+ s10 += carry9;
+ s9 -= carry9 << 21;
+ carry11 = (s11 + (1 << 20)) >> 21;
+ s12 += carry11;
+ s11 -= carry11 << 21;
+ carry13 = (s13 + (1 << 20)) >> 21;
+ s14 += carry13;
+ s13 -= carry13 << 21;
+ carry15 = (s15 + (1 << 20)) >> 21;
+ s16 += carry15;
+ s15 -= carry15 << 21;
+ carry17 = (s17 + (1 << 20)) >> 21;
+ s18 += carry17;
+ s17 -= carry17 << 21;
+ carry19 = (s19 + (1 << 20)) >> 21;
+ s20 += carry19;
+ s19 -= carry19 << 21;
+ carry21 = (s21 + (1 << 20)) >> 21;
+ s22 += carry21;
+ s21 -= carry21 << 21;
+
+ s11 += s23 * 666643;
+ s12 += s23 * 470296;
+ s13 += s23 * 654183;
+ s14 -= s23 * 997805;
+ s15 += s23 * 136657;
+ s16 -= s23 * 683901;
+ s23 = 0;
+
+ s10 += s22 * 666643;
+ s11 += s22 * 470296;
+ s12 += s22 * 654183;
+ s13 -= s22 * 997805;
+ s14 += s22 * 136657;
+ s15 -= s22 * 683901;
+ s22 = 0;
+
+ s9 += s21 * 666643;
+ s10 += s21 * 470296;
+ s11 += s21 * 654183;
+ s12 -= s21 * 997805;
+ s13 += s21 * 136657;
+ s14 -= s21 * 683901;
+ s21 = 0;
+
+ s8 += s20 * 666643;
+ s9 += s20 * 470296;
+ s10 += s20 * 654183;
+ s11 -= s20 * 997805;
+ s12 += s20 * 136657;
+ s13 -= s20 * 683901;
+ s20 = 0;
+
+ s7 += s19 * 666643;
+ s8 += s19 * 470296;
+ s9 += s19 * 654183;
+ s10 -= s19 * 997805;
+ s11 += s19 * 136657;
+ s12 -= s19 * 683901;
+ s19 = 0;
+
+ s6 += s18 * 666643;
+ s7 += s18 * 470296;
+ s8 += s18 * 654183;
+ s9 -= s18 * 997805;
+ s10 += s18 * 136657;
+ s11 -= s18 * 683901;
+ s18 = 0;
+
+ carry6 = (s6 + (1 << 20)) >> 21;
+ s7 += carry6;
+ s6 -= carry6 << 21;
+ carry8 = (s8 + (1 << 20)) >> 21;
+ s9 += carry8;
+ s8 -= carry8 << 21;
+ carry10 = (s10 + (1 << 20)) >> 21;
+ s11 += carry10;
+ s10 -= carry10 << 21;
+ carry12 = (s12 + (1 << 20)) >> 21;
+ s13 += carry12;
+ s12 -= carry12 << 21;
+ carry14 = (s14 + (1 << 20)) >> 21;
+ s15 += carry14;
+ s14 -= carry14 << 21;
+ carry16 = (s16 + (1 << 20)) >> 21;
+ s17 += carry16;
+ s16 -= carry16 << 21;
+
+ carry7 = (s7 + (1 << 20)) >> 21;
+ s8 += carry7;
+ s7 -= carry7 << 21;
+ carry9 = (s9 + (1 << 20)) >> 21;
+ s10 += carry9;
+ s9 -= carry9 << 21;
+ carry11 = (s11 + (1 << 20)) >> 21;
+ s12 += carry11;
+ s11 -= carry11 << 21;
+ carry13 = (s13 + (1 << 20)) >> 21;
+ s14 += carry13;
+ s13 -= carry13 << 21;
+ carry15 = (s15 + (1 << 20)) >> 21;
+ s16 += carry15;
+ s15 -= carry15 << 21;
+
+ s5 += s17 * 666643;
+ s6 += s17 * 470296;
+ s7 += s17 * 654183;
+ s8 -= s17 * 997805;
+ s9 += s17 * 136657;
+ s10 -= s17 * 683901;
+ s17 = 0;
+
+ s4 += s16 * 666643;
+ s5 += s16 * 470296;
+ s6 += s16 * 654183;
+ s7 -= s16 * 997805;
+ s8 += s16 * 136657;
+ s9 -= s16 * 683901;
+ s16 = 0;
+
+ s3 += s15 * 666643;
+ s4 += s15 * 470296;
+ s5 += s15 * 654183;
+ s6 -= s15 * 997805;
+ s7 += s15 * 136657;
+ s8 -= s15 * 683901;
+ s15 = 0;
+
+ s2 += s14 * 666643;
+ s3 += s14 * 470296;
+ s4 += s14 * 654183;
+ s5 -= s14 * 997805;
+ s6 += s14 * 136657;
+ s7 -= s14 * 683901;
+ s14 = 0;
+
+ s1 += s13 * 666643;
+ s2 += s13 * 470296;
+ s3 += s13 * 654183;
+ s4 -= s13 * 997805;
+ s5 += s13 * 136657;
+ s6 -= s13 * 683901;
+ s13 = 0;
+
+ s0 += s12 * 666643;
+ s1 += s12 * 470296;
+ s2 += s12 * 654183;
+ s3 -= s12 * 997805;
+ s4 += s12 * 136657;
+ s5 -= s12 * 683901;
+ s12 = 0;
+
+ carry0 = (s0 + (1 << 20)) >> 21;
+ s1 += carry0;
+ s0 -= carry0 << 21;
+ carry2 = (s2 + (1 << 20)) >> 21;
+ s3 += carry2;
+ s2 -= carry2 << 21;
+ carry4 = (s4 + (1 << 20)) >> 21;
+ s5 += carry4;
+ s4 -= carry4 << 21;
+ carry6 = (s6 + (1 << 20)) >> 21;
+ s7 += carry6;
+ s6 -= carry6 << 21;
+ carry8 = (s8 + (1 << 20)) >> 21;
+ s9 += carry8;
+ s8 -= carry8 << 21;
+ carry10 = (s10 + (1 << 20)) >> 21;
+ s11 += carry10;
+ s10 -= carry10 << 21;
+
+ carry1 = (s1 + (1 << 20)) >> 21;
+ s2 += carry1;
+ s1 -= carry1 << 21;
+ carry3 = (s3 + (1 << 20)) >> 21;
+ s4 += carry3;
+ s3 -= carry3 << 21;
+ carry5 = (s5 + (1 << 20)) >> 21;
+ s6 += carry5;
+ s5 -= carry5 << 21;
+ carry7 = (s7 + (1 << 20)) >> 21;
+ s8 += carry7;
+ s7 -= carry7 << 21;
+ carry9 = (s9 + (1 << 20)) >> 21;
+ s10 += carry9;
+ s9 -= carry9 << 21;
+ carry11 = (s11 + (1 << 20)) >> 21;
+ s12 += carry11;
+ s11 -= carry11 << 21;
+
+ s0 += s12 * 666643;
+ s1 += s12 * 470296;
+ s2 += s12 * 654183;
+ s3 -= s12 * 997805;
+ s4 += s12 * 136657;
+ s5 -= s12 * 683901;
+ s12 = 0;
+
+ carry0 = s0 >> 21;
+ s1 += carry0;
+ s0 -= carry0 << 21;
+ carry1 = s1 >> 21;
+ s2 += carry1;
+ s1 -= carry1 << 21;
+ carry2 = s2 >> 21;
+ s3 += carry2;
+ s2 -= carry2 << 21;
+ carry3 = s3 >> 21;
+ s4 += carry3;
+ s3 -= carry3 << 21;
+ carry4 = s4 >> 21;
+ s5 += carry4;
+ s4 -= carry4 << 21;
+ carry5 = s5 >> 21;
+ s6 += carry5;
+ s5 -= carry5 << 21;
+ carry6 = s6 >> 21;
+ s7 += carry6;
+ s6 -= carry6 << 21;
+ carry7 = s7 >> 21;
+ s8 += carry7;
+ s7 -= carry7 << 21;
+ carry8 = s8 >> 21;
+ s9 += carry8;
+ s8 -= carry8 << 21;
+ carry9 = s9 >> 21;
+ s10 += carry9;
+ s9 -= carry9 << 21;
+ carry10 = s10 >> 21;
+ s11 += carry10;
+ s10 -= carry10 << 21;
+ carry11 = s11 >> 21;
+ s12 += carry11;
+ s11 -= carry11 << 21;
+
+ s0 += s12 * 666643;
+ s1 += s12 * 470296;
+ s2 += s12 * 654183;
+ s3 -= s12 * 997805;
+ s4 += s12 * 136657;
+ s5 -= s12 * 683901;
+ s12 = 0;
+
+ carry0 = s0 >> 21;
+ s1 += carry0;
+ s0 -= carry0 << 21;
+ carry1 = s1 >> 21;
+ s2 += carry1;
+ s1 -= carry1 << 21;
+ carry2 = s2 >> 21;
+ s3 += carry2;
+ s2 -= carry2 << 21;
+ carry3 = s3 >> 21;
+ s4 += carry3;
+ s3 -= carry3 << 21;
+ carry4 = s4 >> 21;
+ s5 += carry4;
+ s4 -= carry4 << 21;
+ carry5 = s5 >> 21;
+ s6 += carry5;
+ s5 -= carry5 << 21;
+ carry6 = s6 >> 21;
+ s7 += carry6;
+ s6 -= carry6 << 21;
+ carry7 = s7 >> 21;
+ s8 += carry7;
+ s7 -= carry7 << 21;
+ carry8 = s8 >> 21;
+ s9 += carry8;
+ s8 -= carry8 << 21;
+ carry9 = s9 >> 21;
+ s10 += carry9;
+ s9 -= carry9 << 21;
+ carry10 = s10 >> 21;
+ s11 += carry10;
+ s10 -= carry10 << 21;
+
+ s[0] = s0 >> 0;
+ s[1] = s0 >> 8;
+ s[2] = (s0 >> 16) | (s1 << 5);
+ s[3] = s1 >> 3;
+ s[4] = s1 >> 11;
+ s[5] = (s1 >> 19) | (s2 << 2);
+ s[6] = s2 >> 6;
+ s[7] = (s2 >> 14) | (s3 << 7);
+ s[8] = s3 >> 1;
+ s[9] = s3 >> 9;
+ s[10] = (s3 >> 17) | (s4 << 4);
+ s[11] = s4 >> 4;
+ s[12] = s4 >> 12;
+ s[13] = (s4 >> 20) | (s5 << 1);
+ s[14] = s5 >> 7;
+ s[15] = (s5 >> 15) | (s6 << 6);
+ s[16] = s6 >> 2;
+ s[17] = s6 >> 10;
+ s[18] = (s6 >> 18) | (s7 << 3);
+ s[19] = s7 >> 5;
+ s[20] = s7 >> 13;
+ s[21] = s8 >> 0;
+ s[22] = s8 >> 8;
+ s[23] = (s8 >> 16) | (s9 << 5);
+ s[24] = s9 >> 3;
+ s[25] = s9 >> 11;
+ s[26] = (s9 >> 19) | (s10 << 2);
+ s[27] = s10 >> 6;
+ s[28] = (s10 >> 14) | (s11 << 7);
+ s[29] = s11 >> 1;
+ s[30] = s11 >> 9;
+ s[31] = s11 >> 17;
+}
+#endif
+
+#ifdef ED25519
+void ED25519_keypair(uint8_t out_public_key[32], uint8_t out_private_key[64]) {
+ uint8_t seed[32];
+ arc4random_buf(seed, 32);
+
+ uint8_t az[SHA512_DIGEST_LENGTH];
+ SHA512(seed, 32, az);
+
+ az[0] &= 248;
+ az[31] &= 63;
+ az[31] |= 64;
+
+ ge_p3 A;
+ x25519_ge_scalarmult_base(&A, az);
+ ge_p3_tobytes(out_public_key, &A);
+
+ memcpy(out_private_key, seed, 32);
+ memmove(out_private_key + 32, out_public_key, 32);
+}
+
+int ED25519_sign(uint8_t *out_sig, const uint8_t *message, size_t message_len,
+ const uint8_t private_key[64]) {
+ uint8_t az[SHA512_DIGEST_LENGTH];
+ SHA512(private_key, 32, az);
+
+ az[0] &= 248;
+ az[31] &= 63;
+ az[31] |= 64;
+
+ SHA512_CTX hash_ctx;
+ SHA512_Init(&hash_ctx);
+ SHA512_Update(&hash_ctx, az + 32, 32);
+ SHA512_Update(&hash_ctx, message, message_len);
+ uint8_t nonce[SHA512_DIGEST_LENGTH];
+ SHA512_Final(nonce, &hash_ctx);
+
+ x25519_sc_reduce(nonce);
+ ge_p3 R;
+ x25519_ge_scalarmult_base(&R, nonce);
+ ge_p3_tobytes(out_sig, &R);
+
+ SHA512_Init(&hash_ctx);
+ SHA512_Update(&hash_ctx, out_sig, 32);
+ SHA512_Update(&hash_ctx, private_key + 32, 32);
+ SHA512_Update(&hash_ctx, message, message_len);
+ uint8_t hram[SHA512_DIGEST_LENGTH];
+ SHA512_Final(hram, &hash_ctx);
+
+ x25519_sc_reduce(hram);
+ sc_muladd(out_sig + 32, hram, az, nonce);
+
+ return 1;
+}
+
+int ED25519_verify(const uint8_t *message, size_t message_len,
+ const uint8_t signature[64], const uint8_t public_key[32]) {
+ ge_p3 A;
+ if ((signature[63] & 224) != 0 ||
+ x25519_ge_frombytes_vartime(&A, public_key) != 0) {
+ return 0;
+ }
+
+ fe_neg(A.X, A.X);
+ fe_neg(A.T, A.T);
+
+ uint8_t pkcopy[32];
+ memcpy(pkcopy, public_key, 32);
+ uint8_t rcopy[32];
+ memcpy(rcopy, signature, 32);
+ uint8_t scopy[32];
+ memcpy(scopy, signature + 32, 32);
+
+ SHA512_CTX hash_ctx;
+ SHA512_Init(&hash_ctx);
+ SHA512_Update(&hash_ctx, signature, 32);
+ SHA512_Update(&hash_ctx, public_key, 32);
+ SHA512_Update(&hash_ctx, message, message_len);
+ uint8_t h[SHA512_DIGEST_LENGTH];
+ SHA512_Final(h, &hash_ctx);
+
+ x25519_sc_reduce(h);
+
+ ge_p2 R;
+ ge_double_scalarmult_vartime(&R, h, &A, scopy);
+
+ uint8_t rcheck[32];
+ x25519_ge_tobytes(rcheck, &R);
+
+ return timingsafe_memcmp(rcheck, rcopy, sizeof(rcheck)) == 0;
+}
+#endif
+
+/* Replace (f,g) with (g,f) if b == 1;
+ * replace (f,g) with (f,g) if b == 0.
+ *
+ * Preconditions: b in {0,1}. */
+static void fe_cswap(fe f, fe g, unsigned int b) {
+ b = 0-b;
+ unsigned i;
+ for (i = 0; i < 10; i++) {
+ int32_t x = f[i] ^ g[i];
+ x &= b;
+ f[i] ^= x;
+ g[i] ^= x;
+ }
+}
+
+/* h = f * 121666
+ * Can overlap h with f.
+ *
+ * Preconditions:
+ * |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
+ *
+ * Postconditions:
+ * |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. */
+static void fe_mul121666(fe h, fe f) {
+ int32_t f0 = f[0];
+ int32_t f1 = f[1];
+ int32_t f2 = f[2];
+ int32_t f3 = f[3];
+ int32_t f4 = f[4];
+ int32_t f5 = f[5];
+ int32_t f6 = f[6];
+ int32_t f7 = f[7];
+ int32_t f8 = f[8];
+ int32_t f9 = f[9];
+ int64_t h0 = f0 * (int64_t) 121666;
+ int64_t h1 = f1 * (int64_t) 121666;
+ int64_t h2 = f2 * (int64_t) 121666;
+ int64_t h3 = f3 * (int64_t) 121666;
+ int64_t h4 = f4 * (int64_t) 121666;
+ int64_t h5 = f5 * (int64_t) 121666;
+ int64_t h6 = f6 * (int64_t) 121666;
+ int64_t h7 = f7 * (int64_t) 121666;
+ int64_t h8 = f8 * (int64_t) 121666;
+ int64_t h9 = f9 * (int64_t) 121666;
+ int64_t carry0;
+ int64_t carry1;
+ int64_t carry2;
+ int64_t carry3;
+ int64_t carry4;
+ int64_t carry5;
+ int64_t carry6;
+ int64_t carry7;
+ int64_t carry8;
+ int64_t carry9;
+
+ carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits;
+ carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits;
+ carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits;
+ carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits;
+ carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits;
+
+ carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits;
+ carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits;
+ carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits;
+ carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits;
+ carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits;
+
+ h[0] = h0;
+ h[1] = h1;
+ h[2] = h2;
+ h[3] = h3;
+ h[4] = h4;
+ h[5] = h5;
+ h[6] = h6;
+ h[7] = h7;
+ h[8] = h8;
+ h[9] = h9;
+}
+
+void
+x25519_scalar_mult_generic(uint8_t out[32], const uint8_t scalar[32],
+ const uint8_t point[32]) {
+ fe x1, x2, z2, x3, z3, tmp0, tmp1;
+
+ uint8_t e[32];
+ memcpy(e, scalar, 32);
+ e[0] &= 248;
+ e[31] &= 127;
+ e[31] |= 64;
+ fe_frombytes(x1, point);
+ fe_1(x2);
+ fe_0(z2);
+ fe_copy(x3, x1);
+ fe_1(z3);
+
+ unsigned swap = 0;
+ int pos;
+ for (pos = 254; pos >= 0; --pos) {
+ unsigned b = 1 & (e[pos / 8] >> (pos & 7));
+ swap ^= b;
+ fe_cswap(x2, x3, swap);
+ fe_cswap(z2, z3, swap);
+ swap = b;
+ fe_sub(tmp0, x3, z3);
+ fe_sub(tmp1, x2, z2);
+ fe_add(x2, x2, z2);
+ fe_add(z2, x3, z3);
+ fe_mul(z3, tmp0, x2);
+ fe_mul(z2, z2, tmp1);
+ fe_sq(tmp0, tmp1);
+ fe_sq(tmp1, x2);
+ fe_add(x3, z3, z2);
+ fe_sub(z2, z3, z2);
+ fe_mul(x2, tmp1, tmp0);
+ fe_sub(tmp1, tmp1, tmp0);
+ fe_sq(z2, z2);
+ fe_mul121666(z3, tmp1);
+ fe_sq(x3, x3);
+ fe_add(tmp0, tmp0, z3);
+ fe_mul(z3, x1, z2);
+ fe_mul(z2, tmp1, tmp0);
+ }
+ fe_cswap(x2, x3, swap);
+ fe_cswap(z2, z3, swap);
+
+ fe_invert(z2, z2);
+ fe_mul(x2, x2, z2);
+ fe_tobytes(out, x2);
+}
+
+#ifdef unused
+void
+x25519_public_from_private_generic(uint8_t out_public_value[32],
+ const uint8_t private_key[32])
+{
+ uint8_t e[32];
+
+ memcpy(e, private_key, 32);
+ e[0] &= 248;
+ e[31] &= 127;
+ e[31] |= 64;
+
+ ge_p3 A;
+ x25519_ge_scalarmult_base(&A, e);
+
+ /* We only need the u-coordinate of the curve25519 point. The map is
+ * u=(y+1)/(1-y). Since y=Y/Z, this gives u=(Z+Y)/(Z-Y). */
+ fe zplusy, zminusy, zminusy_inv;
+ fe_add(zplusy, A.Z, A.Y);
+ fe_sub(zminusy, A.Z, A.Y);
+ fe_invert(zminusy_inv, zminusy);
+ fe_mul(zplusy, zplusy, zminusy_inv);
+ fe_tobytes(out_public_value, zplusy);
+}
+#endif
+
+void
+x25519_public_from_private(uint8_t out_public_value[32],
+ const uint8_t private_key[32])
+{
+ static const uint8_t kMongomeryBasePoint[32] = {9};
+
+ x25519_scalar_mult(out_public_value, private_key, kMongomeryBasePoint);
+}
+
+void
+X25519_keypair(uint8_t out_public_value[X25519_KEY_LENGTH],
+ uint8_t out_private_key[X25519_KEY_LENGTH])
+{
+ /* All X25519 implementations should decode scalars correctly (see
+ * https://tools.ietf.org/html/rfc7748#section-5). However, if an
+ * implementation doesn't then it might interoperate with random keys a
+ * fraction of the time because they'll, randomly, happen to be correctly
+ * formed.
+ *
+ * Thus we do the opposite of the masking here to make sure that our private
+ * keys are never correctly masked and so, hopefully, any incorrect
+ * implementations are deterministically broken.
+ *
+ * This does not affect security because, although we're throwing away
+ * entropy, a valid implementation of scalarmult should throw away the exact
+ * same bits anyway. */
+ arc4random_buf(out_private_key, 32);
+
+ out_private_key[0] |= 7;
+ out_private_key[31] &= 63;
+ out_private_key[31] |= 128;
+
+ x25519_public_from_private(out_public_value, out_private_key);
+}
+
+int
+X25519(uint8_t out_shared_key[X25519_KEY_LENGTH],
+ const uint8_t private_key[X25519_KEY_LENGTH],
+ const uint8_t peer_public_value[X25519_KEY_LENGTH])
+{
+ static const uint8_t kZeros[32] = {0};
+
+ x25519_scalar_mult(out_shared_key, private_key, peer_public_value);
+
+ /* The all-zero output results when the input is a point of small order. */
+ return timingsafe_memcmp(kZeros, out_shared_key, 32) != 0;
+}
diff --git a/ext/libressl/crypto/curve25519/curve25519_internal.h b/ext/libressl/crypto/curve25519/curve25519_internal.h
new file mode 100644
index 0000000..09d20a4
--- /dev/null
+++ b/ext/libressl/crypto/curve25519/curve25519_internal.h
@@ -0,0 +1,99 @@
+/* $OpenBSD: curve25519_internal.h,v 1.3 2019/05/11 15:55:52 tb Exp $ */
+/*
+ * Copyright (c) 2015, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef HEADER_CURVE25519_INTERNAL_H
+#define HEADER_CURVE25519_INTERNAL_H
+
+#include <stdint.h>
+
+__BEGIN_HIDDEN_DECLS
+
+/* fe means field element. Here the field is \Z/(2^255-19). An element t,
+ * entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
+ * t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on
+ * context. */
+typedef int32_t fe[10];
+
+/* ge means group element.
+
+ * Here the group is the set of pairs (x,y) of field elements (see fe.h)
+ * satisfying -x^2 + y^2 = 1 + d x^2y^2
+ * where d = -121665/121666.
+ *
+ * Representations:
+ * ge_p2 (projective): (X:Y:Z) satisfying x=X/Z, y=Y/Z
+ * ge_p3 (extended): (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT
+ * ge_p1p1 (completed): ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T
+ * ge_precomp (Duif): (y+x,y-x,2dxy) */
+
+typedef struct {
+ fe X;
+ fe Y;
+ fe Z;
+} ge_p2;
+
+typedef struct {
+ fe X;
+ fe Y;
+ fe Z;
+ fe T;
+} ge_p3;
+
+typedef struct {
+ fe X;
+ fe Y;
+ fe Z;
+ fe T;
+} ge_p1p1;
+
+typedef struct {
+ fe yplusx;
+ fe yminusx;
+ fe xy2d;
+} ge_precomp;
+
+typedef struct {
+ fe YplusX;
+ fe YminusX;
+ fe Z;
+ fe T2d;
+} ge_cached;
+
+void x25519_ge_tobytes(uint8_t *s, const ge_p2 *h);
+int x25519_ge_frombytes_vartime(ge_p3 *h, const uint8_t *s);
+void x25519_ge_p3_to_cached(ge_cached *r, const ge_p3 *p);
+void x25519_ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p);
+void x25519_ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p);
+void x25519_ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q);
+void x25519_ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q);
+void x25519_ge_scalarmult_small_precomp(ge_p3 *h, const uint8_t a[32],
+ const uint8_t precomp_table[15 * 2 * 32]);
+void x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t a[32]);
+void x25519_ge_scalarmult(ge_p2 *r, const uint8_t *scalar, const ge_p3 *A);
+void x25519_sc_reduce(uint8_t *s);
+
+void x25519_public_from_private(uint8_t out_public_value[32],
+ const uint8_t private_key[32]);
+
+void x25519_scalar_mult(uint8_t out[32], const uint8_t scalar[32],
+ const uint8_t point[32]);
+void x25519_scalar_mult_generic(uint8_t out[32], const uint8_t scalar[32],
+ const uint8_t point[32]);
+
+__END_HIDDEN_DECLS
+
+#endif /* HEADER_CURVE25519_INTERNAL_H */
diff --git a/ext/libressl/crypto/modes/Makefile b/ext/libressl/crypto/modes/Makefile
new file mode 100644
index 0000000..aeba042
--- /dev/null
+++ b/ext/libressl/crypto/modes/Makefile
@@ -0,0 +1,14 @@
+include ../../ssl_common.mk
+CFLAGS += -D__BEGIN_HIDDEN_DECLS= -D__END_HIDDEN_DECLS=
+
+obj = cbc128.o ccm128.o cfb128.o ctr128.o cts128.o gcm128.o ofb128.o xts128.o
+
+
+all: $(obj)
+dep: all
+
+%.o: %.c
+ $(CC) $(CFLAGS) -c $<
+
+clean:
+ rm -f *.o *.a
diff --git a/ext/libressl/crypto/modes/cbc128.c b/ext/libressl/crypto/modes/cbc128.c
new file mode 100644
index 0000000..7502a48
--- /dev/null
+++ b/ext/libressl/crypto/modes/cbc128.c
@@ -0,0 +1,202 @@
+/* $OpenBSD: cbc128.c,v 1.4 2015/02/10 09:46:30 miod Exp $ */
+/* ====================================================================
+ * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+#include <openssl/crypto.h>
+#include "modes_lcl.h"
+#include <string.h>
+
+#ifndef MODES_DEBUG
+# ifndef NDEBUG
+# define NDEBUG
+# endif
+#endif
+
+#undef STRICT_ALIGNMENT
+#ifdef __STRICT_ALIGNMENT
+#define STRICT_ALIGNMENT 1
+#else
+#define STRICT_ALIGNMENT 0
+#endif
+
+void CRYPTO_cbc128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], block128_f block)
+{
+ size_t n;
+ const unsigned char *iv = ivec;
+
+#if !defined(OPENSSL_SMALL_FOOTPRINT)
+ if (STRICT_ALIGNMENT &&
+ ((size_t)in|(size_t)out|(size_t)ivec)%sizeof(size_t) != 0) {
+ while (len>=16) {
+ for(n=0; n<16; ++n)
+ out[n] = in[n] ^ iv[n];
+ (*block)(out, out, key);
+ iv = out;
+ len -= 16;
+ in += 16;
+ out += 16;
+ }
+ } else {
+ while (len>=16) {
+ for(n=0; n<16; n+=sizeof(size_t))
+ *(size_t*)(out+n) =
+ *(size_t*)(in+n) ^ *(size_t*)(iv+n);
+ (*block)(out, out, key);
+ iv = out;
+ len -= 16;
+ in += 16;
+ out += 16;
+ }
+ }
+#endif
+ while (len) {
+ for(n=0; n<16 && n<len; ++n)
+ out[n] = in[n] ^ iv[n];
+ for(; n<16; ++n)
+ out[n] = iv[n];
+ (*block)(out, out, key);
+ iv = out;
+ if (len<=16) break;
+ len -= 16;
+ in += 16;
+ out += 16;
+ }
+ memcpy(ivec,iv,16);
+}
+
+void CRYPTO_cbc128_decrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], block128_f block)
+{
+ size_t n;
+ union { size_t t[16/sizeof(size_t)]; unsigned char c[16]; } tmp;
+
+#if !defined(OPENSSL_SMALL_FOOTPRINT)
+ if (in != out) {
+ const unsigned char *iv = ivec;
+
+ if (STRICT_ALIGNMENT &&
+ ((size_t)in|(size_t)out|(size_t)ivec)%sizeof(size_t) != 0) {
+ while (len>=16) {
+ (*block)(in, out, key);
+ for(n=0; n<16; ++n)
+ out[n] ^= iv[n];
+ iv = in;
+ len -= 16;
+ in += 16;
+ out += 16;
+ }
+ } else if (16%sizeof(size_t) == 0) { /* always true */
+ while (len>=16) {
+ size_t *out_t=(size_t *)out, *iv_t=(size_t *)iv;
+
+ (*block)(in, out, key);
+ for(n=0; n<16/sizeof(size_t); n++)
+ out_t[n] ^= iv_t[n];
+ iv = in;
+ len -= 16;
+ in += 16;
+ out += 16;
+ }
+ }
+ memcpy(ivec,iv,16);
+ } else {
+ if (STRICT_ALIGNMENT &&
+ ((size_t)in|(size_t)out|(size_t)ivec)%sizeof(size_t) != 0) {
+ unsigned char c;
+ while (len>=16) {
+ (*block)(in, tmp.c, key);
+ for(n=0; n<16; ++n) {
+ c = in[n];
+ out[n] = tmp.c[n] ^ ivec[n];
+ ivec[n] = c;
+ }
+ len -= 16;
+ in += 16;
+ out += 16;
+ }
+ } else if (16%sizeof(size_t) == 0) { /* always true */
+ while (len>=16) {
+ size_t c, *out_t=(size_t *)out, *ivec_t=(size_t *)ivec;
+ const size_t *in_t=(const size_t *)in;
+
+ (*block)(in, tmp.c, key);
+ for(n=0; n<16/sizeof(size_t); n++) {
+ c = in_t[n];
+ out_t[n] = tmp.t[n] ^ ivec_t[n];
+ ivec_t[n] = c;
+ }
+ len -= 16;
+ in += 16;
+ out += 16;
+ }
+ }
+ }
+#endif
+ while (len) {
+ unsigned char c;
+ (*block)(in, tmp.c, key);
+ for(n=0; n<16 && n<len; ++n) {
+ c = in[n];
+ out[n] = tmp.c[n] ^ ivec[n];
+ ivec[n] = c;
+ }
+ if (len<=16) {
+ for (; n<16; ++n)
+ ivec[n] = in[n];
+ break;
+ }
+ len -= 16;
+ in += 16;
+ out += 16;
+ }
+}
diff --git a/ext/libressl/crypto/modes/ccm128.c b/ext/libressl/crypto/modes/ccm128.c
new file mode 100644
index 0000000..ffeb4e4
--- /dev/null
+++ b/ext/libressl/crypto/modes/ccm128.c
@@ -0,0 +1,441 @@
+/* $OpenBSD: ccm128.c,v 1.5 2019/05/08 14:18:25 tb Exp $ */
+/* ====================================================================
+ * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ */
+
+#include <openssl/crypto.h>
+#include "modes_lcl.h"
+#include <string.h>
+
+#ifndef MODES_DEBUG
+# ifndef NDEBUG
+# define NDEBUG
+# endif
+#endif
+
+/* First you setup M and L parameters and pass the key schedule.
+ * This is called once per session setup... */
+void CRYPTO_ccm128_init(CCM128_CONTEXT *ctx,
+ unsigned int M,unsigned int L,void *key,block128_f block)
+{
+ memset(ctx->nonce.c,0,sizeof(ctx->nonce.c));
+ ctx->nonce.c[0] = ((u8)(L-1)&7) | (u8)(((M-2)/2)&7)<<3;
+ ctx->blocks = 0;
+ ctx->block = block;
+ ctx->key = key;
+}
+
+/* !!! Following interfaces are to be called *once* per packet !!! */
+
+/* Then you setup per-message nonce and pass the length of the message */
+int CRYPTO_ccm128_setiv(CCM128_CONTEXT *ctx,
+ const unsigned char *nonce,size_t nlen,size_t mlen)
+{
+ unsigned int L = ctx->nonce.c[0]&7; /* the L parameter */
+
+ if (nlen<(14-L)) return -1; /* nonce is too short */
+
+ if (sizeof(mlen)==8 && L>=3) {
+ ctx->nonce.c[8] = (u8)(mlen>>(56%(sizeof(mlen)*8)));
+ ctx->nonce.c[9] = (u8)(mlen>>(48%(sizeof(mlen)*8)));
+ ctx->nonce.c[10] = (u8)(mlen>>(40%(sizeof(mlen)*8)));
+ ctx->nonce.c[11] = (u8)(mlen>>(32%(sizeof(mlen)*8)));
+ }
+ else
+ ctx->nonce.u[1] = 0;
+
+ ctx->nonce.c[12] = (u8)(mlen>>24);
+ ctx->nonce.c[13] = (u8)(mlen>>16);
+ ctx->nonce.c[14] = (u8)(mlen>>8);
+ ctx->nonce.c[15] = (u8)mlen;
+
+ ctx->nonce.c[0] &= ~0x40; /* clear Adata flag */
+ memcpy(&ctx->nonce.c[1],nonce,14-L);
+
+ return 0;
+}
+
+/* Then you pass additional authentication data, this is optional */
+void CRYPTO_ccm128_aad(CCM128_CONTEXT *ctx,
+ const unsigned char *aad,size_t alen)
+{ unsigned int i;
+ block128_f block = ctx->block;
+
+ if (alen==0) return;
+
+ ctx->nonce.c[0] |= 0x40; /* set Adata flag */
+ (*block)(ctx->nonce.c,ctx->cmac.c,ctx->key),
+ ctx->blocks++;
+
+ if (alen<(0x10000-0x100)) {
+ ctx->cmac.c[0] ^= (u8)(alen>>8);
+ ctx->cmac.c[1] ^= (u8)alen;
+ i=2;
+ }
+ else if (sizeof(alen)==8 && alen>=(size_t)1<<(32%(sizeof(alen)*8))) {
+ ctx->cmac.c[0] ^= 0xFF;
+ ctx->cmac.c[1] ^= 0xFF;
+ ctx->cmac.c[2] ^= (u8)(alen>>(56%(sizeof(alen)*8)));
+ ctx->cmac.c[3] ^= (u8)(alen>>(48%(sizeof(alen)*8)));
+ ctx->cmac.c[4] ^= (u8)(alen>>(40%(sizeof(alen)*8)));
+ ctx->cmac.c[5] ^= (u8)(alen>>(32%(sizeof(alen)*8)));
+ ctx->cmac.c[6] ^= (u8)(alen>>24);
+ ctx->cmac.c[7] ^= (u8)(alen>>16);
+ ctx->cmac.c[8] ^= (u8)(alen>>8);
+ ctx->cmac.c[9] ^= (u8)alen;
+ i=10;
+ }
+ else {
+ ctx->cmac.c[0] ^= 0xFF;
+ ctx->cmac.c[1] ^= 0xFE;
+ ctx->cmac.c[2] ^= (u8)(alen>>24);
+ ctx->cmac.c[3] ^= (u8)(alen>>16);
+ ctx->cmac.c[4] ^= (u8)(alen>>8);
+ ctx->cmac.c[5] ^= (u8)alen;
+ i=6;
+ }
+
+ do {
+ for(;i<16 && alen;++i,++aad,--alen)
+ ctx->cmac.c[i] ^= *aad;
+ (*block)(ctx->cmac.c,ctx->cmac.c,ctx->key),
+ ctx->blocks++;
+ i=0;
+ } while (alen);
+}
+
+/* Finally you encrypt or decrypt the message */
+
+/* counter part of nonce may not be larger than L*8 bits,
+ * L is not larger than 8, therefore 64-bit counter... */
+static void ctr64_inc(unsigned char *counter) {
+ unsigned int n=8;
+ u8 c;
+
+ counter += 8;
+ do {
+ --n;
+ c = counter[n];
+ ++c;
+ counter[n] = c;
+ if (c) return;
+ } while (n);
+}
+
+int CRYPTO_ccm128_encrypt(CCM128_CONTEXT *ctx,
+ const unsigned char *inp, unsigned char *out,
+ size_t len)
+{
+ size_t n;
+ unsigned int i,L;
+ unsigned char flags0 = ctx->nonce.c[0];
+ block128_f block = ctx->block;
+ void * key = ctx->key;
+ union { u64 u[2]; u8 c[16]; } scratch;
+
+ if (!(flags0&0x40))
+ (*block)(ctx->nonce.c,ctx->cmac.c,key),
+ ctx->blocks++;
+
+ ctx->nonce.c[0] = L = flags0&7;
+ for (n=0,i=15-L;i<15;++i) {
+ n |= ctx->nonce.c[i];
+ ctx->nonce.c[i]=0;
+ n <<= 8;
+ }
+ n |= ctx->nonce.c[15]; /* reconstructed length */
+ ctx->nonce.c[15]=1;
+
+ if (n!=len) return -1; /* length mismatch */
+
+ ctx->blocks += ((len+15)>>3)|1;
+ if (ctx->blocks > (U64(1)<<61)) return -2; /* too much data */
+
+ while (len>=16) {
+#ifdef __STRICT_ALIGNMENT
+ union { u64 u[2]; u8 c[16]; } temp;
+
+ memcpy (temp.c,inp,16);
+ ctx->cmac.u[0] ^= temp.u[0];
+ ctx->cmac.u[1] ^= temp.u[1];
+#else
+ ctx->cmac.u[0] ^= ((u64*)inp)[0];
+ ctx->cmac.u[1] ^= ((u64*)inp)[1];
+#endif
+ (*block)(ctx->cmac.c,ctx->cmac.c,key);
+ (*block)(ctx->nonce.c,scratch.c,key);
+ ctr64_inc(ctx->nonce.c);
+#ifdef __STRICT_ALIGNMENT
+ temp.u[0] ^= scratch.u[0];
+ temp.u[1] ^= scratch.u[1];
+ memcpy(out,temp.c,16);
+#else
+ ((u64*)out)[0] = scratch.u[0]^((u64*)inp)[0];
+ ((u64*)out)[1] = scratch.u[1]^((u64*)inp)[1];
+#endif
+ inp += 16;
+ out += 16;
+ len -= 16;
+ }
+
+ if (len) {
+ for (i=0; i<len; ++i) ctx->cmac.c[i] ^= inp[i];
+ (*block)(ctx->cmac.c,ctx->cmac.c,key);
+ (*block)(ctx->nonce.c,scratch.c,key);
+ for (i=0; i<len; ++i) out[i] = scratch.c[i]^inp[i];
+ }
+
+ for (i=15-L;i<16;++i)
+ ctx->nonce.c[i]=0;
+
+ (*block)(ctx->nonce.c,scratch.c,key);
+ ctx->cmac.u[0] ^= scratch.u[0];
+ ctx->cmac.u[1] ^= scratch.u[1];
+
+ ctx->nonce.c[0] = flags0;
+
+ return 0;
+}
+
+int CRYPTO_ccm128_decrypt(CCM128_CONTEXT *ctx,
+ const unsigned char *inp, unsigned char *out,
+ size_t len)
+{
+ size_t n;
+ unsigned int i,L;
+ unsigned char flags0 = ctx->nonce.c[0];
+ block128_f block = ctx->block;
+ void * key = ctx->key;
+ union { u64 u[2]; u8 c[16]; } scratch;
+
+ if (!(flags0&0x40))
+ (*block)(ctx->nonce.c,ctx->cmac.c,key);
+
+ ctx->nonce.c[0] = L = flags0&7;
+ for (n=0,i=15-L;i<15;++i) {
+ n |= ctx->nonce.c[i];
+ ctx->nonce.c[i]=0;
+ n <<= 8;
+ }
+ n |= ctx->nonce.c[15]; /* reconstructed length */
+ ctx->nonce.c[15]=1;
+
+ if (n!=len) return -1;
+
+ while (len>=16) {
+#ifdef __STRICT_ALIGNMENT
+ union { u64 u[2]; u8 c[16]; } temp;
+#endif
+ (*block)(ctx->nonce.c,scratch.c,key);
+ ctr64_inc(ctx->nonce.c);
+#ifdef __STRICT_ALIGNMENT
+ memcpy (temp.c,inp,16);
+ ctx->cmac.u[0] ^= (scratch.u[0] ^= temp.u[0]);
+ ctx->cmac.u[1] ^= (scratch.u[1] ^= temp.u[1]);
+ memcpy (out,scratch.c,16);
+#else
+ ctx->cmac.u[0] ^= (((u64*)out)[0] = scratch.u[0]^((u64*)inp)[0]);
+ ctx->cmac.u[1] ^= (((u64*)out)[1] = scratch.u[1]^((u64*)inp)[1]);
+#endif
+ (*block)(ctx->cmac.c,ctx->cmac.c,key);
+
+ inp += 16;
+ out += 16;
+ len -= 16;
+ }
+
+ if (len) {
+ (*block)(ctx->nonce.c,scratch.c,key);
+ for (i=0; i<len; ++i)
+ ctx->cmac.c[i] ^= (out[i] = scratch.c[i]^inp[i]);
+ (*block)(ctx->cmac.c,ctx->cmac.c,key);
+ }
+
+ for (i=15-L;i<16;++i)
+ ctx->nonce.c[i]=0;
+
+ (*block)(ctx->nonce.c,scratch.c,key);
+ ctx->cmac.u[0] ^= scratch.u[0];
+ ctx->cmac.u[1] ^= scratch.u[1];
+
+ ctx->nonce.c[0] = flags0;
+
+ return 0;
+}
+
+static void ctr64_add (unsigned char *counter,size_t inc)
+{ size_t n=8, val=0;
+
+ counter += 8;
+ do {
+ --n;
+ val += counter[n] + (inc&0xff);
+ counter[n] = (unsigned char)val;
+ val >>= 8; /* carry bit */
+ inc >>= 8;
+ } while(n && (inc || val));
+}
+
+int CRYPTO_ccm128_encrypt_ccm64(CCM128_CONTEXT *ctx,
+ const unsigned char *inp, unsigned char *out,
+ size_t len,ccm128_f stream)
+{
+ size_t n;
+ unsigned int i,L;
+ unsigned char flags0 = ctx->nonce.c[0];
+ block128_f block = ctx->block;
+ void * key = ctx->key;
+ union { u64 u[2]; u8 c[16]; } scratch;
+
+ if (!(flags0&0x40))
+ (*block)(ctx->nonce.c,ctx->cmac.c,key),
+ ctx->blocks++;
+
+ ctx->nonce.c[0] = L = flags0&7;
+ for (n=0,i=15-L;i<15;++i) {
+ n |= ctx->nonce.c[i];
+ ctx->nonce.c[i]=0;
+ n <<= 8;
+ }
+ n |= ctx->nonce.c[15]; /* reconstructed length */
+ ctx->nonce.c[15]=1;
+
+ if (n!=len) return -1; /* length mismatch */
+
+ ctx->blocks += ((len+15)>>3)|1;
+ if (ctx->blocks > (U64(1)<<61)) return -2; /* too much data */
+
+ if ((n=len/16)) {
+ (*stream)(inp,out,n,key,ctx->nonce.c,ctx->cmac.c);
+ n *= 16;
+ inp += n;
+ out += n;
+ len -= n;
+ if (len) ctr64_add(ctx->nonce.c,n/16);
+ }
+
+ if (len) {
+ for (i=0; i<len; ++i) ctx->cmac.c[i] ^= inp[i];
+ (*block)(ctx->cmac.c,ctx->cmac.c,key);
+ (*block)(ctx->nonce.c,scratch.c,key);
+ for (i=0; i<len; ++i) out[i] = scratch.c[i]^inp[i];
+ }
+
+ for (i=15-L;i<16;++i)
+ ctx->nonce.c[i]=0;
+
+ (*block)(ctx->nonce.c,scratch.c,key);
+ ctx->cmac.u[0] ^= scratch.u[0];
+ ctx->cmac.u[1] ^= scratch.u[1];
+
+ ctx->nonce.c[0] = flags0;
+
+ return 0;
+}
+
+int CRYPTO_ccm128_decrypt_ccm64(CCM128_CONTEXT *ctx,
+ const unsigned char *inp, unsigned char *out,
+ size_t len,ccm128_f stream)
+{
+ size_t n;
+ unsigned int i,L;
+ unsigned char flags0 = ctx->nonce.c[0];
+ block128_f block = ctx->block;
+ void * key = ctx->key;
+ union { u64 u[2]; u8 c[16]; } scratch;
+
+ if (!(flags0&0x40))
+ (*block)(ctx->nonce.c,ctx->cmac.c,key);
+
+ ctx->nonce.c[0] = L = flags0&7;
+ for (n=0,i=15-L;i<15;++i) {
+ n |= ctx->nonce.c[i];
+ ctx->nonce.c[i]=0;
+ n <<= 8;
+ }
+ n |= ctx->nonce.c[15]; /* reconstructed length */
+ ctx->nonce.c[15]=1;
+
+ if (n!=len) return -1;
+
+ if ((n=len/16)) {
+ (*stream)(inp,out,n,key,ctx->nonce.c,ctx->cmac.c);
+ n *= 16;
+ inp += n;
+ out += n;
+ len -= n;
+ if (len) ctr64_add(ctx->nonce.c,n/16);
+ }
+
+ if (len) {
+ (*block)(ctx->nonce.c,scratch.c,key);
+ for (i=0; i<len; ++i)
+ ctx->cmac.c[i] ^= (out[i] = scratch.c[i]^inp[i]);
+ (*block)(ctx->cmac.c,ctx->cmac.c,key);
+ }
+
+ for (i=15-L;i<16;++i)
+ ctx->nonce.c[i]=0;
+
+ (*block)(ctx->nonce.c,scratch.c,key);
+ ctx->cmac.u[0] ^= scratch.u[0];
+ ctx->cmac.u[1] ^= scratch.u[1];
+
+ ctx->nonce.c[0] = flags0;
+
+ return 0;
+}
+
+size_t CRYPTO_ccm128_tag(CCM128_CONTEXT *ctx,unsigned char *tag,size_t len)
+{ unsigned int M = (ctx->nonce.c[0]>>3)&7; /* the M parameter */
+
+ M *= 2; M += 2;
+ if (len != M) return 0;
+ memcpy(tag,ctx->cmac.c,M);
+ return M;
+}
diff --git a/ext/libressl/crypto/modes/cfb128.c b/ext/libressl/crypto/modes/cfb128.c
new file mode 100644
index 0000000..88bfbc4
--- /dev/null
+++ b/ext/libressl/crypto/modes/cfb128.c
@@ -0,0 +1,234 @@
+/* $OpenBSD: cfb128.c,v 1.4 2015/02/10 09:46:30 miod Exp $ */
+/* ====================================================================
+ * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+#include <openssl/crypto.h>
+#include "modes_lcl.h"
+#include <string.h>
+
+#ifndef MODES_DEBUG
+# ifndef NDEBUG
+# define NDEBUG
+# endif
+#endif
+
+/* The input and output encrypted as though 128bit cfb mode is being
+ * used. The extra state information to record how much of the
+ * 128bit block we have used is contained in *num;
+ */
+void CRYPTO_cfb128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], int *num,
+ int enc, block128_f block)
+{
+ unsigned int n;
+ size_t l = 0;
+
+ n = *num;
+
+ if (enc) {
+#if !defined(OPENSSL_SMALL_FOOTPRINT)
+ if (16%sizeof(size_t) == 0) do { /* always true actually */
+ while (n && len) {
+ *(out++) = ivec[n] ^= *(in++);
+ --len;
+ n = (n+1) % 16;
+ }
+#ifdef __STRICT_ALIGNMENT
+ if (((size_t)in|(size_t)out|(size_t)ivec)%sizeof(size_t) != 0)
+ break;
+#endif
+ while (len>=16) {
+ (*block)(ivec, ivec, key);
+ for (; n<16; n+=sizeof(size_t)) {
+ *(size_t*)(out+n) =
+ *(size_t*)(ivec+n) ^= *(size_t*)(in+n);
+ }
+ len -= 16;
+ out += 16;
+ in += 16;
+ n = 0;
+ }
+ if (len) {
+ (*block)(ivec, ivec, key);
+ while (len--) {
+ out[n] = ivec[n] ^= in[n];
+ ++n;
+ }
+ }
+ *num = n;
+ return;
+ } while (0);
+ /* the rest would be commonly eliminated by x86* compiler */
+#endif
+ while (l<len) {
+ if (n == 0) {
+ (*block)(ivec, ivec, key);
+ }
+ out[l] = ivec[n] ^= in[l];
+ ++l;
+ n = (n+1) % 16;
+ }
+ *num = n;
+ } else {
+#if !defined(OPENSSL_SMALL_FOOTPRINT)
+ if (16%sizeof(size_t) == 0) do { /* always true actually */
+ while (n && len) {
+ unsigned char c;
+ *(out++) = ivec[n] ^ (c = *(in++)); ivec[n] = c;
+ --len;
+ n = (n+1) % 16;
+ }
+#ifdef __STRICT_ALIGNMENT
+ if (((size_t)in|(size_t)out|(size_t)ivec)%sizeof(size_t) != 0)
+ break;
+#endif
+ while (len>=16) {
+ (*block)(ivec, ivec, key);
+ for (; n<16; n+=sizeof(size_t)) {
+ size_t t = *(size_t*)(in+n);
+ *(size_t*)(out+n) = *(size_t*)(ivec+n) ^ t;
+ *(size_t*)(ivec+n) = t;
+ }
+ len -= 16;
+ out += 16;
+ in += 16;
+ n = 0;
+ }
+ if (len) {
+ (*block)(ivec, ivec, key);
+ while (len--) {
+ unsigned char c;
+ out[n] = ivec[n] ^ (c = in[n]); ivec[n] = c;
+ ++n;
+ }
+ }
+ *num = n;
+ return;
+ } while (0);
+ /* the rest would be commonly eliminated by x86* compiler */
+#endif
+ while (l<len) {
+ unsigned char c;
+ if (n == 0) {
+ (*block)(ivec, ivec, key);
+ }
+ out[l] = ivec[n] ^ (c = in[l]); ivec[n] = c;
+ ++l;
+ n = (n+1) % 16;
+ }
+ *num=n;
+ }
+}
+
+/* This expects a single block of size nbits for both in and out. Note that
+ it corrupts any extra bits in the last byte of out */
+static void cfbr_encrypt_block(const unsigned char *in,unsigned char *out,
+ int nbits,const void *key,
+ unsigned char ivec[16],int enc,
+ block128_f block)
+{
+ int n,rem,num;
+ unsigned char ovec[16*2 + 1]; /* +1 because we dererefence (but don't use) one byte off the end */
+
+ if (nbits<=0 || nbits>128) return;
+
+ /* fill in the first half of the new IV with the current IV */
+ memcpy(ovec,ivec,16);
+ /* construct the new IV */
+ (*block)(ivec,ivec,key);
+ num = (nbits+7)/8;
+ if (enc) /* encrypt the input */
+ for(n=0 ; n < num ; ++n)
+ out[n] = (ovec[16+n] = in[n] ^ ivec[n]);
+ else /* decrypt the input */
+ for(n=0 ; n < num ; ++n)
+ out[n] = (ovec[16+n] = in[n]) ^ ivec[n];
+ /* shift ovec left... */
+ rem = nbits%8;
+ num = nbits/8;
+ if(rem==0)
+ memcpy(ivec,ovec+num,16);
+ else
+ for(n=0 ; n < 16 ; ++n)
+ ivec[n] = ovec[n+num]<<rem | ovec[n+num+1]>>(8-rem);
+
+ /* it is not necessary to cleanse ovec, since the IV is not secret */
+}
+
+/* N.B. This expects the input to be packed, MS bit first */
+void CRYPTO_cfb128_1_encrypt(const unsigned char *in, unsigned char *out,
+ size_t bits, const void *key,
+ unsigned char ivec[16], int *num,
+ int enc, block128_f block)
+{
+ size_t n;
+ unsigned char c[1],d[1];
+
+ for(n=0 ; n<bits ; ++n)
+ {
+ c[0]=(in[n/8]&(1 << (7-n%8))) ? 0x80 : 0;
+ cfbr_encrypt_block(c,d,1,key,ivec,enc,block);
+ out[n/8]=(out[n/8]&~(1 << (unsigned int)(7-n%8))) |
+ ((d[0]&0x80) >> (unsigned int)(n%8));
+ }
+}
+
+void CRYPTO_cfb128_8_encrypt(const unsigned char *in, unsigned char *out,
+ size_t length, const void *key,
+ unsigned char ivec[16], int *num,
+ int enc, block128_f block)
+{
+ size_t n;
+
+ for(n=0 ; n<length ; ++n)
+ cfbr_encrypt_block(&in[n],&out[n],8,key,ivec,enc,block);
+}
+
diff --git a/ext/libressl/crypto/modes/ctr128.c b/ext/libressl/crypto/modes/ctr128.c
new file mode 100644
index 0000000..3f14e4e
--- /dev/null
+++ b/ext/libressl/crypto/modes/ctr128.c
@@ -0,0 +1,251 @@
+/* $OpenBSD: ctr128.c,v 1.7 2017/08/13 17:46:24 bcook Exp $ */
+/* ====================================================================
+ * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+#include <openssl/crypto.h>
+#include "modes_lcl.h"
+#include <string.h>
+
+#ifndef MODES_DEBUG
+# ifndef NDEBUG
+# define NDEBUG
+# endif
+#endif
+#include <assert.h>
+
+/* NOTE: the IV/counter CTR mode is big-endian. The code itself
+ * is endian-neutral. */
+
+/* increment counter (128-bit int) by 1 */
+static void ctr128_inc(unsigned char *counter) {
+ u32 n=16;
+ u8 c;
+
+ do {
+ --n;
+ c = counter[n];
+ ++c;
+ counter[n] = c;
+ if (c) return;
+ } while (n);
+}
+
+#if !defined(OPENSSL_SMALL_FOOTPRINT)
+static void
+ctr128_inc_aligned(unsigned char *counter)
+{
+#if BYTE_ORDER == LITTLE_ENDIAN
+ ctr128_inc(counter);
+#else
+ size_t *data, c, n;
+ data = (size_t *)counter;
+ n = 16 / sizeof(size_t);
+ do {
+ --n;
+ c = data[n];
+ ++c;
+ data[n] = c;
+ if (c)
+ return;
+ } while (n);
+#endif
+}
+#endif
+
+/* The input encrypted as though 128bit counter mode is being
+ * used. The extra state information to record how much of the
+ * 128bit block we have used is contained in *num, and the
+ * encrypted counter is kept in ecount_buf. Both *num and
+ * ecount_buf must be initialised with zeros before the first
+ * call to CRYPTO_ctr128_encrypt().
+ *
+ * This algorithm assumes that the counter is in the x lower bits
+ * of the IV (ivec), and that the application has full control over
+ * overflow and the rest of the IV. This implementation takes NO
+ * responsability for checking that the counter doesn't overflow
+ * into the rest of the IV when incremented.
+ */
+void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], unsigned char ecount_buf[16],
+ unsigned int *num, block128_f block)
+{
+ unsigned int n;
+ size_t l=0;
+
+ assert(*num < 16);
+
+ n = *num;
+
+#if !defined(OPENSSL_SMALL_FOOTPRINT)
+ if (16%sizeof(size_t) == 0) do { /* always true actually */
+ while (n && len) {
+ *(out++) = *(in++) ^ ecount_buf[n];
+ --len;
+ n = (n+1) % 16;
+ }
+
+#ifdef __STRICT_ALIGNMENT
+ if (((size_t)in|(size_t)out|(size_t)ivec)%sizeof(size_t) != 0)
+ break;
+#endif
+ while (len>=16) {
+ (*block)(ivec, ecount_buf, key);
+ ctr128_inc_aligned(ivec);
+ for (; n<16; n+=sizeof(size_t))
+ *(size_t *)(out+n) =
+ *(size_t *)(in+n) ^ *(size_t *)(ecount_buf+n);
+ len -= 16;
+ out += 16;
+ in += 16;
+ n = 0;
+ }
+ if (len) {
+ (*block)(ivec, ecount_buf, key);
+ ctr128_inc_aligned(ivec);
+ while (len--) {
+ out[n] = in[n] ^ ecount_buf[n];
+ ++n;
+ }
+ }
+ *num = n;
+ return;
+ } while(0);
+ /* the rest would be commonly eliminated by x86* compiler */
+#endif
+ while (l<len) {
+ if (n==0) {
+ (*block)(ivec, ecount_buf, key);
+ ctr128_inc(ivec);
+ }
+ out[l] = in[l] ^ ecount_buf[n];
+ ++l;
+ n = (n+1) % 16;
+ }
+
+ *num=n;
+}
+
+/* increment upper 96 bits of 128-bit counter by 1 */
+static void ctr96_inc(unsigned char *counter) {
+ u32 n=12;
+ u8 c;
+
+ do {
+ --n;
+ c = counter[n];
+ ++c;
+ counter[n] = c;
+ if (c) return;
+ } while (n);
+}
+
+void CRYPTO_ctr128_encrypt_ctr32(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], unsigned char ecount_buf[16],
+ unsigned int *num, ctr128_f func)
+{
+ unsigned int n,ctr32;
+
+ assert(*num < 16);
+
+ n = *num;
+
+ while (n && len) {
+ *(out++) = *(in++) ^ ecount_buf[n];
+ --len;
+ n = (n+1) % 16;
+ }
+
+ ctr32 = GETU32(ivec+12);
+ while (len>=16) {
+ size_t blocks = len/16;
+ /*
+ * 1<<28 is just a not-so-small yet not-so-large number...
+ * Below condition is practically never met, but it has to
+ * be checked for code correctness.
+ */
+ if (sizeof(size_t)>sizeof(unsigned int) && blocks>(1U<<28))
+ blocks = (1U<<28);
+ /*
+ * As (*func) operates on 32-bit counter, caller
+ * has to handle overflow. 'if' below detects the
+ * overflow, which is then handled by limiting the
+ * amount of blocks to the exact overflow point...
+ */
+ ctr32 += (u32)blocks;
+ if (ctr32 < blocks) {
+ blocks -= ctr32;
+ ctr32 = 0;
+ }
+ (*func)(in,out,blocks,key,ivec);
+ /* (*ctr) does not update ivec, caller does: */
+ PUTU32(ivec+12,ctr32);
+ /* ... overflow was detected, propogate carry. */
+ if (ctr32 == 0) ctr96_inc(ivec);
+ blocks *= 16;
+ len -= blocks;
+ out += blocks;
+ in += blocks;
+ }
+ if (len) {
+ memset(ecount_buf,0,16);
+ (*func)(ecount_buf,ecount_buf,1,key,ivec);
+ ++ctr32;
+ PUTU32(ivec+12,ctr32);
+ if (ctr32 == 0) ctr96_inc(ivec);
+ while (len--) {
+ out[n] = in[n] ^ ecount_buf[n];
+ ++n;
+ }
+ }
+
+ *num=n;
+}
diff --git a/ext/libressl/crypto/modes/cts128.c b/ext/libressl/crypto/modes/cts128.c
new file mode 100644
index 0000000..b2f7174
--- /dev/null
+++ b/ext/libressl/crypto/modes/cts128.c
@@ -0,0 +1,267 @@
+/* $OpenBSD: cts128.c,v 1.5 2015/07/19 18:27:26 miod Exp $ */
+/* ====================================================================
+ * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
+ *
+ * Rights for redistribution and usage in source and binary
+ * forms are granted according to the OpenSSL license.
+ */
+
+#include <openssl/crypto.h>
+#include "modes_lcl.h"
+#include <string.h>
+
+#ifndef MODES_DEBUG
+# ifndef NDEBUG
+# define NDEBUG
+# endif
+#endif
+
+/*
+ * Trouble with Ciphertext Stealing, CTS, mode is that there is no
+ * common official specification, but couple of cipher/application
+ * specific ones: RFC2040 and RFC3962. Then there is 'Proposal to
+ * Extend CBC Mode By "Ciphertext Stealing"' at NIST site, which
+ * deviates from mentioned RFCs. Most notably it allows input to be
+ * of block length and it doesn't flip the order of the last two
+ * blocks. CTS is being discussed even in ECB context, but it's not
+ * adopted for any known application. This implementation provides
+ * two interfaces: one compliant with above mentioned RFCs and one
+ * compliant with the NIST proposal, both extending CBC mode.
+ */
+
+size_t CRYPTO_cts128_encrypt_block(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], block128_f block)
+{ size_t residue, n;
+
+ if (len <= 16) return 0;
+
+ if ((residue=len%16) == 0) residue = 16;
+
+ len -= residue;
+
+ CRYPTO_cbc128_encrypt(in,out,len,key,ivec,block);
+
+ in += len;
+ out += len;
+
+ for (n=0; n<residue; ++n)
+ ivec[n] ^= in[n];
+ (*block)(ivec,ivec,key);
+ memcpy(out,out-16,residue);
+ memcpy(out-16,ivec,16);
+
+ return len+residue;
+}
+
+size_t CRYPTO_nistcts128_encrypt_block(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], block128_f block)
+{ size_t residue, n;
+
+ if (len < 16) return 0;
+
+ residue=len%16;
+
+ len -= residue;
+
+ CRYPTO_cbc128_encrypt(in,out,len,key,ivec,block);
+
+ if (residue==0) return len;
+
+ in += len;
+ out += len;
+
+ for (n=0; n<residue; ++n)
+ ivec[n] ^= in[n];
+ (*block)(ivec,ivec,key);
+ memcpy(out-16+residue,ivec,16);
+
+ return len+residue;
+}
+
+size_t CRYPTO_cts128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], cbc128_f cbc)
+{ size_t residue;
+ union { size_t align; unsigned char c[16]; } tmp;
+
+ if (len <= 16) return 0;
+
+ if ((residue=len%16) == 0) residue = 16;
+
+ len -= residue;
+
+ (*cbc)(in,out,len,key,ivec,1);
+
+ in += len;
+ out += len;
+
+ memset(tmp.c,0,sizeof(tmp));
+ memcpy(tmp.c,in,residue);
+ memcpy(out,out-16,residue);
+ (*cbc)(tmp.c,out-16,16,key,ivec,1);
+ return len+residue;
+}
+
+size_t CRYPTO_nistcts128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], cbc128_f cbc)
+{ size_t residue;
+ union { size_t align; unsigned char c[16]; } tmp;
+
+ if (len < 16) return 0;
+
+ residue=len%16;
+
+ len -= residue;
+
+ (*cbc)(in,out,len,key,ivec,1);
+
+ if (residue==0) return len;
+
+ in += len;
+ out += len;
+
+ memset(tmp.c,0,sizeof(tmp));
+ memcpy(tmp.c,in,residue);
+ (*cbc)(tmp.c,out-16+residue,16,key,ivec,1);
+ return len+residue;
+}
+
+size_t CRYPTO_cts128_decrypt_block(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], block128_f block)
+{ size_t residue, n;
+ union { size_t align; unsigned char c[32]; } tmp;
+
+ if (len<=16) return 0;
+
+ if ((residue=len%16) == 0) residue = 16;
+
+ len -= 16+residue;
+
+ if (len) {
+ CRYPTO_cbc128_decrypt(in,out,len,key,ivec,block);
+ in += len;
+ out += len;
+ }
+
+ (*block)(in,tmp.c+16,key);
+
+ memcpy(tmp.c,tmp.c+16,16);
+ memcpy(tmp.c,in+16,residue);
+ (*block)(tmp.c,tmp.c,key);
+
+ for(n=0; n<16; ++n) {
+ unsigned char c = in[n];
+ out[n] = tmp.c[n] ^ ivec[n];
+ ivec[n] = c;
+ }
+ for(residue+=16; n<residue; ++n)
+ out[n] = tmp.c[n] ^ in[n];
+
+ return 16+len+residue;
+}
+
+size_t CRYPTO_nistcts128_decrypt_block(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], block128_f block)
+{ size_t residue, n;
+ union { size_t align; unsigned char c[32]; } tmp;
+
+ if (len<16) return 0;
+
+ residue=len%16;
+
+ if (residue==0) {
+ CRYPTO_cbc128_decrypt(in,out,len,key,ivec,block);
+ return len;
+ }
+
+ len -= 16+residue;
+
+ if (len) {
+ CRYPTO_cbc128_decrypt(in,out,len,key,ivec,block);
+ in += len;
+ out += len;
+ }
+
+ (*block)(in+residue,tmp.c+16,key);
+
+ memcpy(tmp.c,tmp.c+16,16);
+ memcpy(tmp.c,in,residue);
+ (*block)(tmp.c,tmp.c,key);
+
+ for(n=0; n<16; ++n) {
+ unsigned char c = in[n];
+ out[n] = tmp.c[n] ^ ivec[n];
+ ivec[n] = in[n+residue];
+ tmp.c[n] = c;
+ }
+ for(residue+=16; n<residue; ++n)
+ out[n] = tmp.c[n] ^ tmp.c[n-16];
+
+ return 16+len+residue;
+}
+
+size_t CRYPTO_cts128_decrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], cbc128_f cbc)
+{ size_t residue;
+ union { size_t align; unsigned char c[32]; } tmp;
+
+ if (len<=16) return 0;
+
+ if ((residue=len%16) == 0) residue = 16;
+
+ len -= 16+residue;
+
+ if (len) {
+ (*cbc)(in,out,len,key,ivec,0);
+ in += len;
+ out += len;
+ }
+
+ memset(tmp.c,0,sizeof(tmp));
+ /* this places in[16] at &tmp.c[16] and decrypted block at &tmp.c[0] */
+ (*cbc)(in,tmp.c,16,key,tmp.c+16,0);
+
+ memcpy(tmp.c,in+16,residue);
+ (*cbc)(tmp.c,tmp.c,32,key,ivec,0);
+ memcpy(out,tmp.c,16+residue);
+ return 16+len+residue;
+}
+
+size_t CRYPTO_nistcts128_decrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], cbc128_f cbc)
+{ size_t residue;
+ union { size_t align; unsigned char c[32]; } tmp;
+
+ if (len<16) return 0;
+
+ residue=len%16;
+
+ if (residue==0) {
+ (*cbc)(in,out,len,key,ivec,0);
+ return len;
+ }
+
+ len -= 16+residue;
+
+ if (len) {
+ (*cbc)(in,out,len,key,ivec,0);
+ in += len;
+ out += len;
+ }
+
+ memset(tmp.c,0,sizeof(tmp));
+ /* this places in[16] at &tmp.c[16] and decrypted block at &tmp.c[0] */
+ (*cbc)(in+residue,tmp.c,16,key,tmp.c+16,0);
+
+ memcpy(tmp.c,in,residue);
+ (*cbc)(tmp.c,tmp.c,32,key,ivec,0);
+ memcpy(out,tmp.c,16+residue);
+ return 16+len+residue;
+}
diff --git a/ext/libressl/crypto/modes/gcm128.c b/ext/libressl/crypto/modes/gcm128.c
new file mode 100644
index 0000000..d6c1bbe
--- /dev/null
+++ b/ext/libressl/crypto/modes/gcm128.c
@@ -0,0 +1,1566 @@
+/* $OpenBSD: gcm128.c,v 1.22 2018/01/24 23:03:37 kettenis Exp $ */
+/* ====================================================================
+ * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ */
+
+#define OPENSSL_FIPSAPI
+
+#include <openssl/crypto.h>
+#include "modes_lcl.h"
+#include <string.h>
+
+#ifndef MODES_DEBUG
+# ifndef NDEBUG
+# define NDEBUG
+# endif
+#endif
+
+#if defined(BSWAP4) && defined(__STRICT_ALIGNMENT)
+/* redefine, because alignment is ensured */
+#undef GETU32
+#define GETU32(p) BSWAP4(*(const u32 *)(p))
+#undef PUTU32
+#define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
+#endif
+
+#define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
+#define REDUCE1BIT(V) \
+ do { \
+ if (sizeof(size_t)==8) { \
+ u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
+ V.lo = (V.hi<<63)|(V.lo>>1); \
+ V.hi = (V.hi>>1 )^T; \
+ } else { \
+ u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
+ V.lo = (V.hi<<63)|(V.lo>>1); \
+ V.hi = (V.hi>>1 )^((u64)T<<32); \
+ } \
+ } while(0)
+
+/*
+ * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
+ * never be set to 8. 8 is effectively reserved for testing purposes.
+ * TABLE_BITS>1 are lookup-table-driven implementations referred to as
+ * "Shoup's" in GCM specification. In other words OpenSSL does not cover
+ * whole spectrum of possible table driven implementations. Why? In
+ * non-"Shoup's" case memory access pattern is segmented in such manner,
+ * that it's trivial to see that cache timing information can reveal
+ * fair portion of intermediate hash value. Given that ciphertext is
+ * always available to attacker, it's possible for him to attempt to
+ * deduce secret parameter H and if successful, tamper with messages
+ * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
+ * not as trivial, but there is no reason to believe that it's resistant
+ * to cache-timing attack. And the thing about "8-bit" implementation is
+ * that it consumes 16 (sixteen) times more memory, 4KB per individual
+ * key + 1KB shared. Well, on pros side it should be twice as fast as
+ * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
+ * was observed to run ~75% faster, closer to 100% for commercial
+ * compilers... Yet "4-bit" procedure is preferred, because it's
+ * believed to provide better security-performance balance and adequate
+ * all-round performance. "All-round" refers to things like:
+ *
+ * - shorter setup time effectively improves overall timing for
+ * handling short messages;
+ * - larger table allocation can become unbearable because of VM
+ * subsystem penalties (for example on Windows large enough free
+ * results in VM working set trimming, meaning that consequent
+ * malloc would immediately incur working set expansion);
+ * - larger table has larger cache footprint, which can affect
+ * performance of other code paths (not necessarily even from same
+ * thread in Hyper-Threading world);
+ *
+ * Value of 1 is not appropriate for performance reasons.
+ */
+#if TABLE_BITS==8
+
+static void gcm_init_8bit(u128 Htable[256], u64 H[2])
+{
+ int i, j;
+ u128 V;
+
+ Htable[0].hi = 0;
+ Htable[0].lo = 0;
+ V.hi = H[0];
+ V.lo = H[1];
+
+ for (Htable[128]=V, i=64; i>0; i>>=1) {
+ REDUCE1BIT(V);
+ Htable[i] = V;
+ }
+
+ for (i=2; i<256; i<<=1) {
+ u128 *Hi = Htable+i, H0 = *Hi;
+ for (j=1; j<i; ++j) {
+ Hi[j].hi = H0.hi^Htable[j].hi;
+ Hi[j].lo = H0.lo^Htable[j].lo;
+ }
+ }
+}
+
+static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
+{
+ u128 Z = { 0, 0};
+ const u8 *xi = (const u8 *)Xi+15;
+ size_t rem, n = *xi;
+ static const size_t rem_8bit[256] = {
+ PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
+ PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
+ PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
+ PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
+ PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
+ PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
+ PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
+ PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
+ PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
+ PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
+ PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
+ PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
+ PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
+ PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
+ PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
+ PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
+ PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
+ PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
+ PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
+ PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
+ PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
+ PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
+ PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
+ PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
+ PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
+ PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
+ PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
+ PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
+ PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
+ PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
+ PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
+ PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
+ PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
+ PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
+ PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
+ PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
+ PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
+ PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
+ PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
+ PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
+ PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
+ PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
+ PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
+ PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
+ PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
+ PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
+ PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
+ PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
+ PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
+ PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
+ PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
+ PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
+ PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
+ PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
+ PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
+ PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
+ PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
+ PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
+ PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
+ PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
+ PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
+ PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
+ PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
+ PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
+
+ while (1) {
+ Z.hi ^= Htable[n].hi;
+ Z.lo ^= Htable[n].lo;
+
+ if ((u8 *)Xi==xi) break;
+
+ n = *(--xi);
+
+ rem = (size_t)Z.lo&0xff;
+ Z.lo = (Z.hi<<56)|(Z.lo>>8);
+ Z.hi = (Z.hi>>8);
+#if SIZE_MAX == 0xffffffffffffffff
+ Z.hi ^= rem_8bit[rem];
+#else
+ Z.hi ^= (u64)rem_8bit[rem]<<32;
+#endif
+ }
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP8
+ Xi[0] = BSWAP8(Z.hi);
+ Xi[1] = BSWAP8(Z.lo);
+#else
+ u8 *p = (u8 *)Xi;
+ u32 v;
+ v = (u32)(Z.hi>>32); PUTU32(p,v);
+ v = (u32)(Z.hi); PUTU32(p+4,v);
+ v = (u32)(Z.lo>>32); PUTU32(p+8,v);
+ v = (u32)(Z.lo); PUTU32(p+12,v);
+#endif
+#else /* BIG_ENDIAN */
+ Xi[0] = Z.hi;
+ Xi[1] = Z.lo;
+#endif
+}
+#define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
+
+#elif TABLE_BITS==4
+
+static void gcm_init_4bit(u128 Htable[16], u64 H[2])
+{
+ u128 V;
+#if defined(OPENSSL_SMALL_FOOTPRINT)
+ int i;
+#endif
+
+ Htable[0].hi = 0;
+ Htable[0].lo = 0;
+ V.hi = H[0];
+ V.lo = H[1];
+
+#if defined(OPENSSL_SMALL_FOOTPRINT)
+ for (Htable[8]=V, i=4; i>0; i>>=1) {
+ REDUCE1BIT(V);
+ Htable[i] = V;
+ }
+
+ for (i=2; i<16; i<<=1) {
+ u128 *Hi = Htable+i;
+ int j;
+ for (V=*Hi, j=1; j<i; ++j) {
+ Hi[j].hi = V.hi^Htable[j].hi;
+ Hi[j].lo = V.lo^Htable[j].lo;
+ }
+ }
+#else
+ Htable[8] = V;
+ REDUCE1BIT(V);
+ Htable[4] = V;
+ REDUCE1BIT(V);
+ Htable[2] = V;
+ REDUCE1BIT(V);
+ Htable[1] = V;
+ Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
+ V=Htable[4];
+ Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
+ Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
+ Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
+ V=Htable[8];
+ Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
+ Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
+ Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
+ Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
+ Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
+ Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
+ Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
+#endif
+#if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
+ /*
+ * ARM assembler expects specific dword order in Htable.
+ */
+ {
+ int j;
+#if BYTE_ORDER == LITTLE_ENDIAN
+ for (j=0;j<16;++j) {
+ V = Htable[j];
+ Htable[j].hi = V.lo;
+ Htable[j].lo = V.hi;
+ }
+#else /* BIG_ENDIAN */
+ for (j=0;j<16;++j) {
+ V = Htable[j];
+ Htable[j].hi = V.lo<<32|V.lo>>32;
+ Htable[j].lo = V.hi<<32|V.hi>>32;
+ }
+#endif
+ }
+#endif
+}
+
+#ifndef GHASH_ASM
+static const size_t rem_4bit[16] = {
+ PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
+ PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
+ PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
+ PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
+
+static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
+{
+ u128 Z;
+ int cnt = 15;
+ size_t rem, nlo, nhi;
+
+ nlo = ((const u8 *)Xi)[15];
+ nhi = nlo>>4;
+ nlo &= 0xf;
+
+ Z.hi = Htable[nlo].hi;
+ Z.lo = Htable[nlo].lo;
+
+ while (1) {
+ rem = (size_t)Z.lo&0xf;
+ Z.lo = (Z.hi<<60)|(Z.lo>>4);
+ Z.hi = (Z.hi>>4);
+#if SIZE_MAX == 0xffffffffffffffff
+ Z.hi ^= rem_4bit[rem];
+#else
+ Z.hi ^= (u64)rem_4bit[rem]<<32;
+#endif
+ Z.hi ^= Htable[nhi].hi;
+ Z.lo ^= Htable[nhi].lo;
+
+ if (--cnt<0) break;
+
+ nlo = ((const u8 *)Xi)[cnt];
+ nhi = nlo>>4;
+ nlo &= 0xf;
+
+ rem = (size_t)Z.lo&0xf;
+ Z.lo = (Z.hi<<60)|(Z.lo>>4);
+ Z.hi = (Z.hi>>4);
+#if SIZE_MAX == 0xffffffffffffffff
+ Z.hi ^= rem_4bit[rem];
+#else
+ Z.hi ^= (u64)rem_4bit[rem]<<32;
+#endif
+ Z.hi ^= Htable[nlo].hi;
+ Z.lo ^= Htable[nlo].lo;
+ }
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP8
+ Xi[0] = BSWAP8(Z.hi);
+ Xi[1] = BSWAP8(Z.lo);
+#else
+ u8 *p = (u8 *)Xi;
+ u32 v;
+ v = (u32)(Z.hi>>32); PUTU32(p,v);
+ v = (u32)(Z.hi); PUTU32(p+4,v);
+ v = (u32)(Z.lo>>32); PUTU32(p+8,v);
+ v = (u32)(Z.lo); PUTU32(p+12,v);
+#endif
+#else /* BIG_ENDIAN */
+ Xi[0] = Z.hi;
+ Xi[1] = Z.lo;
+#endif
+}
+
+#if !defined(OPENSSL_SMALL_FOOTPRINT)
+/*
+ * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
+ * details... Compiler-generated code doesn't seem to give any
+ * performance improvement, at least not on x86[_64]. It's here
+ * mostly as reference and a placeholder for possible future
+ * non-trivial optimization[s]...
+ */
+static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
+ const u8 *inp,size_t len)
+{
+ u128 Z;
+ int cnt;
+ size_t rem, nlo, nhi;
+
+#if 1
+ do {
+ cnt = 15;
+ nlo = ((const u8 *)Xi)[15];
+ nlo ^= inp[15];
+ nhi = nlo>>4;
+ nlo &= 0xf;
+
+ Z.hi = Htable[nlo].hi;
+ Z.lo = Htable[nlo].lo;
+
+ while (1) {
+ rem = (size_t)Z.lo&0xf;
+ Z.lo = (Z.hi<<60)|(Z.lo>>4);
+ Z.hi = (Z.hi>>4);
+#if SIZE_MAX == 0xffffffffffffffff
+ Z.hi ^= rem_4bit[rem];
+#else
+ Z.hi ^= (u64)rem_4bit[rem]<<32;
+#endif
+ Z.hi ^= Htable[nhi].hi;
+ Z.lo ^= Htable[nhi].lo;
+
+ if (--cnt<0) break;
+
+ nlo = ((const u8 *)Xi)[cnt];
+ nlo ^= inp[cnt];
+ nhi = nlo>>4;
+ nlo &= 0xf;
+
+ rem = (size_t)Z.lo&0xf;
+ Z.lo = (Z.hi<<60)|(Z.lo>>4);
+ Z.hi = (Z.hi>>4);
+#if SIZE_MAX == 0xffffffffffffffff
+ Z.hi ^= rem_4bit[rem];
+#else
+ Z.hi ^= (u64)rem_4bit[rem]<<32;
+#endif
+ Z.hi ^= Htable[nlo].hi;
+ Z.lo ^= Htable[nlo].lo;
+ }
+#else
+ /*
+ * Extra 256+16 bytes per-key plus 512 bytes shared tables
+ * [should] give ~50% improvement... One could have PACK()-ed
+ * the rem_8bit even here, but the priority is to minimize
+ * cache footprint...
+ */
+ u128 Hshr4[16]; /* Htable shifted right by 4 bits */
+ u8 Hshl4[16]; /* Htable shifted left by 4 bits */
+ static const unsigned short rem_8bit[256] = {
+ 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
+ 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
+ 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
+ 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
+ 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
+ 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
+ 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
+ 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
+ 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
+ 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
+ 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
+ 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
+ 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
+ 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
+ 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
+ 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
+ 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
+ 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
+ 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
+ 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
+ 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
+ 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
+ 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
+ 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
+ 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
+ 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
+ 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
+ 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
+ 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
+ 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
+ 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
+ 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
+ /*
+ * This pre-processing phase slows down procedure by approximately
+ * same time as it makes each loop spin faster. In other words
+ * single block performance is approximately same as straightforward
+ * "4-bit" implementation, and then it goes only faster...
+ */
+ for (cnt=0; cnt<16; ++cnt) {
+ Z.hi = Htable[cnt].hi;
+ Z.lo = Htable[cnt].lo;
+ Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
+ Hshr4[cnt].hi = (Z.hi>>4);
+ Hshl4[cnt] = (u8)(Z.lo<<4);
+ }
+
+ do {
+ for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
+ nlo = ((const u8 *)Xi)[cnt];
+ nlo ^= inp[cnt];
+ nhi = nlo>>4;
+ nlo &= 0xf;
+
+ Z.hi ^= Htable[nlo].hi;
+ Z.lo ^= Htable[nlo].lo;
+
+ rem = (size_t)Z.lo&0xff;
+
+ Z.lo = (Z.hi<<56)|(Z.lo>>8);
+ Z.hi = (Z.hi>>8);
+
+ Z.hi ^= Hshr4[nhi].hi;
+ Z.lo ^= Hshr4[nhi].lo;
+ Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
+ }
+
+ nlo = ((const u8 *)Xi)[0];
+ nlo ^= inp[0];
+ nhi = nlo>>4;
+ nlo &= 0xf;
+
+ Z.hi ^= Htable[nlo].hi;
+ Z.lo ^= Htable[nlo].lo;
+
+ rem = (size_t)Z.lo&0xf;
+
+ Z.lo = (Z.hi<<60)|(Z.lo>>4);
+ Z.hi = (Z.hi>>4);
+
+ Z.hi ^= Htable[nhi].hi;
+ Z.lo ^= Htable[nhi].lo;
+ Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
+#endif
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP8
+ Xi[0] = BSWAP8(Z.hi);
+ Xi[1] = BSWAP8(Z.lo);
+#else
+ u8 *p = (u8 *)Xi;
+ u32 v;
+ v = (u32)(Z.hi>>32); PUTU32(p,v);
+ v = (u32)(Z.hi); PUTU32(p+4,v);
+ v = (u32)(Z.lo>>32); PUTU32(p+8,v);
+ v = (u32)(Z.lo); PUTU32(p+12,v);
+#endif
+#else /* BIG_ENDIAN */
+ Xi[0] = Z.hi;
+ Xi[1] = Z.lo;
+#endif
+ } while (inp+=16, len-=16);
+}
+#endif
+#else
+void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
+void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
+#endif
+
+#define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
+#if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
+#define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
+/* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
+ * trashing effect. In other words idea is to hash data while it's
+ * still in L1 cache after encryption pass... */
+#define GHASH_CHUNK (3*1024)
+#endif
+
+#else /* TABLE_BITS */
+
+static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
+{
+ u128 V,Z = { 0,0 };
+ long X;
+ int i,j;
+ const long *xi = (const long *)Xi;
+
+ V.hi = H[0]; /* H is in host byte order, no byte swapping */
+ V.lo = H[1];
+
+ for (j=0; j<16/sizeof(long); ++j) {
+#if BYTE_ORDER == LITTLE_ENDIAN
+#if SIZE_MAX == 0xffffffffffffffff
+#ifdef BSWAP8
+ X = (long)(BSWAP8(xi[j]));
+#else
+ const u8 *p = (const u8 *)(xi+j);
+ X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
+#endif
+#else
+ const u8 *p = (const u8 *)(xi+j);
+ X = (long)GETU32(p);
+#endif
+#else /* BIG_ENDIAN */
+ X = xi[j];
+#endif
+
+ for (i=0; i<8*sizeof(long); ++i, X<<=1) {
+ u64 M = (u64)(X>>(8*sizeof(long)-1));
+ Z.hi ^= V.hi&M;
+ Z.lo ^= V.lo&M;
+
+ REDUCE1BIT(V);
+ }
+ }
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP8
+ Xi[0] = BSWAP8(Z.hi);
+ Xi[1] = BSWAP8(Z.lo);
+#else
+ u8 *p = (u8 *)Xi;
+ u32 v;
+ v = (u32)(Z.hi>>32); PUTU32(p,v);
+ v = (u32)(Z.hi); PUTU32(p+4,v);
+ v = (u32)(Z.lo>>32); PUTU32(p+8,v);
+ v = (u32)(Z.lo); PUTU32(p+12,v);
+#endif
+#else /* BIG_ENDIAN */
+ Xi[0] = Z.hi;
+ Xi[1] = Z.lo;
+#endif
+}
+#define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
+
+#endif
+
+#if defined(GHASH_ASM) && \
+ (defined(__i386) || defined(__i386__) || \
+ defined(__x86_64) || defined(__x86_64__) || \
+ defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
+#include "x86_arch.h"
+#endif
+
+#if TABLE_BITS==4 && defined(GHASH_ASM)
+# if (defined(__i386) || defined(__i386__) || \
+ defined(__x86_64) || defined(__x86_64__) || \
+ defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
+# define GHASH_ASM_X86_OR_64
+# define GCM_FUNCREF_4BIT
+
+void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
+void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
+void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
+
+# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
+# define GHASH_ASM_X86
+void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
+void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
+
+void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
+void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
+# endif
+# elif defined(__arm__) || defined(__arm)
+# include "arm_arch.h"
+# if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+# define GHASH_ASM_ARM
+# define GCM_FUNCREF_4BIT
+void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
+void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
+# endif
+# endif
+#endif
+
+#ifdef GCM_FUNCREF_4BIT
+# undef GCM_MUL
+# define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
+# ifdef GHASH
+# undef GHASH
+# define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
+# endif
+#endif
+
+void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
+{
+ memset(ctx,0,sizeof(*ctx));
+ ctx->block = block;
+ ctx->key = key;
+
+ (*block)(ctx->H.c,ctx->H.c,key);
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+ /* H is stored in host byte order */
+#ifdef BSWAP8
+ ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
+ ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
+#else
+ u8 *p = ctx->H.c;
+ u64 hi,lo;
+ hi = (u64)GETU32(p) <<32|GETU32(p+4);
+ lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
+ ctx->H.u[0] = hi;
+ ctx->H.u[1] = lo;
+#endif
+#endif
+
+#if TABLE_BITS==8
+ gcm_init_8bit(ctx->Htable,ctx->H.u);
+#elif TABLE_BITS==4
+# if defined(GHASH_ASM_X86_OR_64)
+# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
+ /* check FXSR and PCLMULQDQ bits */
+ if ((OPENSSL_cpu_caps() & (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) ==
+ (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) {
+ gcm_init_clmul(ctx->Htable,ctx->H.u);
+ ctx->gmult = gcm_gmult_clmul;
+ ctx->ghash = gcm_ghash_clmul;
+ return;
+ }
+# endif
+ gcm_init_4bit(ctx->Htable,ctx->H.u);
+# if defined(GHASH_ASM_X86) /* x86 only */
+# if defined(OPENSSL_IA32_SSE2)
+ if (OPENSSL_cpu_caps() & CPUCAP_MASK_SSE) { /* check SSE bit */
+# else
+ if (OPENSSL_cpu_caps() & CPUCAP_MASK_MMX) { /* check MMX bit */
+# endif
+ ctx->gmult = gcm_gmult_4bit_mmx;
+ ctx->ghash = gcm_ghash_4bit_mmx;
+ } else {
+ ctx->gmult = gcm_gmult_4bit_x86;
+ ctx->ghash = gcm_ghash_4bit_x86;
+ }
+# else
+ ctx->gmult = gcm_gmult_4bit;
+ ctx->ghash = gcm_ghash_4bit;
+# endif
+# elif defined(GHASH_ASM_ARM)
+ if (OPENSSL_armcap_P & ARMV7_NEON) {
+ ctx->gmult = gcm_gmult_neon;
+ ctx->ghash = gcm_ghash_neon;
+ } else {
+ gcm_init_4bit(ctx->Htable,ctx->H.u);
+ ctx->gmult = gcm_gmult_4bit;
+ ctx->ghash = gcm_ghash_4bit;
+ }
+# else
+ gcm_init_4bit(ctx->Htable,ctx->H.u);
+# endif
+#endif
+}
+
+void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
+{
+ unsigned int ctr;
+#ifdef GCM_FUNCREF_4BIT
+ void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
+#endif
+
+ ctx->Yi.u[0] = 0;
+ ctx->Yi.u[1] = 0;
+ ctx->Xi.u[0] = 0;
+ ctx->Xi.u[1] = 0;
+ ctx->len.u[0] = 0; /* AAD length */
+ ctx->len.u[1] = 0; /* message length */
+ ctx->ares = 0;
+ ctx->mres = 0;
+
+ if (len==12) {
+ memcpy(ctx->Yi.c,iv,12);
+ ctx->Yi.c[15]=1;
+ ctr=1;
+ }
+ else {
+ size_t i;
+ u64 len0 = len;
+
+ while (len>=16) {
+ for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
+ GCM_MUL(ctx,Yi);
+ iv += 16;
+ len -= 16;
+ }
+ if (len) {
+ for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
+ GCM_MUL(ctx,Yi);
+ }
+ len0 <<= 3;
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP8
+ ctx->Yi.u[1] ^= BSWAP8(len0);
+#else
+ ctx->Yi.c[8] ^= (u8)(len0>>56);
+ ctx->Yi.c[9] ^= (u8)(len0>>48);
+ ctx->Yi.c[10] ^= (u8)(len0>>40);
+ ctx->Yi.c[11] ^= (u8)(len0>>32);
+ ctx->Yi.c[12] ^= (u8)(len0>>24);
+ ctx->Yi.c[13] ^= (u8)(len0>>16);
+ ctx->Yi.c[14] ^= (u8)(len0>>8);
+ ctx->Yi.c[15] ^= (u8)(len0);
+#endif
+#else /* BIG_ENDIAN */
+ ctx->Yi.u[1] ^= len0;
+#endif
+
+ GCM_MUL(ctx,Yi);
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP4
+ ctr = BSWAP4(ctx->Yi.d[3]);
+#else
+ ctr = GETU32(ctx->Yi.c+12);
+#endif
+#else /* BIG_ENDIAN */
+ ctr = ctx->Yi.d[3];
+#endif
+ }
+
+ (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
+ ++ctr;
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
+ PUTU32(ctx->Yi.c+12,ctr);
+#endif
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+#endif
+}
+
+int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
+{
+ size_t i;
+ unsigned int n;
+ u64 alen = ctx->len.u[0];
+#ifdef GCM_FUNCREF_4BIT
+ void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
+# ifdef GHASH
+ void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
+ const u8 *inp,size_t len) = ctx->ghash;
+# endif
+#endif
+
+ if (ctx->len.u[1]) return -2;
+
+ alen += len;
+ if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
+ return -1;
+ ctx->len.u[0] = alen;
+
+ n = ctx->ares;
+ if (n) {
+ while (n && len) {
+ ctx->Xi.c[n] ^= *(aad++);
+ --len;
+ n = (n+1)%16;
+ }
+ if (n==0) GCM_MUL(ctx,Xi);
+ else {
+ ctx->ares = n;
+ return 0;
+ }
+ }
+
+#ifdef GHASH
+ if ((i = (len&(size_t)-16))) {
+ GHASH(ctx,aad,i);
+ aad += i;
+ len -= i;
+ }
+#else
+ while (len>=16) {
+ for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
+ GCM_MUL(ctx,Xi);
+ aad += 16;
+ len -= 16;
+ }
+#endif
+ if (len) {
+ n = (unsigned int)len;
+ for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
+ }
+
+ ctx->ares = n;
+ return 0;
+}
+
+int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
+ const unsigned char *in, unsigned char *out,
+ size_t len)
+{
+ unsigned int n, ctr;
+ size_t i;
+ u64 mlen = ctx->len.u[1];
+ block128_f block = ctx->block;
+ void *key = ctx->key;
+#ifdef GCM_FUNCREF_4BIT
+ void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
+# ifdef GHASH
+ void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
+ const u8 *inp,size_t len) = ctx->ghash;
+# endif
+#endif
+
+ mlen += len;
+ if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
+ return -1;
+ ctx->len.u[1] = mlen;
+
+ if (ctx->ares) {
+ /* First call to encrypt finalizes GHASH(AAD) */
+ GCM_MUL(ctx,Xi);
+ ctx->ares = 0;
+ }
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP4
+ ctr = BSWAP4(ctx->Yi.d[3]);
+#else
+ ctr = GETU32(ctx->Yi.c+12);
+#endif
+#else /* BIG_ENDIAN */
+ ctr = ctx->Yi.d[3];
+#endif
+
+ n = ctx->mres;
+#if !defined(OPENSSL_SMALL_FOOTPRINT)
+ if (16%sizeof(size_t) == 0) do { /* always true actually */
+ if (n) {
+ while (n && len) {
+ ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
+ --len;
+ n = (n+1)%16;
+ }
+ if (n==0) GCM_MUL(ctx,Xi);
+ else {
+ ctx->mres = n;
+ return 0;
+ }
+ }
+#ifdef __STRICT_ALIGNMENT
+ if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
+ break;
+#endif
+#if defined(GHASH) && defined(GHASH_CHUNK)
+ while (len>=GHASH_CHUNK) {
+ size_t j=GHASH_CHUNK;
+
+ while (j) {
+ size_t *out_t=(size_t *)out;
+ const size_t *in_t=(const size_t *)in;
+
+ (*block)(ctx->Yi.c,ctx->EKi.c,key);
+ ++ctr;
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
+ PUTU32(ctx->Yi.c+12,ctr);
+#endif
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+#endif
+ for (i=0; i<16/sizeof(size_t); ++i)
+ out_t[i] = in_t[i] ^ ctx->EKi.t[i];
+ out += 16;
+ in += 16;
+ j -= 16;
+ }
+ GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
+ len -= GHASH_CHUNK;
+ }
+ if ((i = (len&(size_t)-16))) {
+ size_t j=i;
+
+ while (len>=16) {
+ size_t *out_t=(size_t *)out;
+ const size_t *in_t=(const size_t *)in;
+
+ (*block)(ctx->Yi.c,ctx->EKi.c,key);
+ ++ctr;
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
+ PUTU32(ctx->Yi.c+12,ctr);
+#endif
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+#endif
+ for (i=0; i<16/sizeof(size_t); ++i)
+ out_t[i] = in_t[i] ^ ctx->EKi.t[i];
+ out += 16;
+ in += 16;
+ len -= 16;
+ }
+ GHASH(ctx,out-j,j);
+ }
+#else
+ while (len>=16) {
+ size_t *out_t=(size_t *)out;
+ const size_t *in_t=(const size_t *)in;
+
+ (*block)(ctx->Yi.c,ctx->EKi.c,key);
+ ++ctr;
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
+ PUTU32(ctx->Yi.c+12,ctr);
+#endif
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+#endif
+ for (i=0; i<16/sizeof(size_t); ++i)
+ ctx->Xi.t[i] ^=
+ out_t[i] = in_t[i]^ctx->EKi.t[i];
+ GCM_MUL(ctx,Xi);
+ out += 16;
+ in += 16;
+ len -= 16;
+ }
+#endif
+ if (len) {
+ (*block)(ctx->Yi.c,ctx->EKi.c,key);
+ ++ctr;
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
+ PUTU32(ctx->Yi.c+12,ctr);
+#endif
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+#endif
+ while (len--) {
+ ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
+ ++n;
+ }
+ }
+
+ ctx->mres = n;
+ return 0;
+ } while(0);
+#endif
+ for (i=0;i<len;++i) {
+ if (n==0) {
+ (*block)(ctx->Yi.c,ctx->EKi.c,key);
+ ++ctr;
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
+ PUTU32(ctx->Yi.c+12,ctr);
+#endif
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+#endif
+ }
+ ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
+ n = (n+1)%16;
+ if (n==0)
+ GCM_MUL(ctx,Xi);
+ }
+
+ ctx->mres = n;
+ return 0;
+}
+
+int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
+ const unsigned char *in, unsigned char *out,
+ size_t len)
+{
+ unsigned int n, ctr;
+ size_t i;
+ u64 mlen = ctx->len.u[1];
+ block128_f block = ctx->block;
+ void *key = ctx->key;
+#ifdef GCM_FUNCREF_4BIT
+ void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
+# ifdef GHASH
+ void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
+ const u8 *inp,size_t len) = ctx->ghash;
+# endif
+#endif
+
+ mlen += len;
+ if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
+ return -1;
+ ctx->len.u[1] = mlen;
+
+ if (ctx->ares) {
+ /* First call to decrypt finalizes GHASH(AAD) */
+ GCM_MUL(ctx,Xi);
+ ctx->ares = 0;
+ }
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP4
+ ctr = BSWAP4(ctx->Yi.d[3]);
+#else
+ ctr = GETU32(ctx->Yi.c+12);
+#endif
+#else /* BIG_ENDIAN */
+ ctr = ctx->Yi.d[3];
+#endif
+
+ n = ctx->mres;
+#if !defined(OPENSSL_SMALL_FOOTPRINT)
+ if (16%sizeof(size_t) == 0) do { /* always true actually */
+ if (n) {
+ while (n && len) {
+ u8 c = *(in++);
+ *(out++) = c^ctx->EKi.c[n];
+ ctx->Xi.c[n] ^= c;
+ --len;
+ n = (n+1)%16;
+ }
+ if (n==0) GCM_MUL (ctx,Xi);
+ else {
+ ctx->mres = n;
+ return 0;
+ }
+ }
+#ifdef __STRICT_ALIGNMENT
+ if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
+ break;
+#endif
+#if defined(GHASH) && defined(GHASH_CHUNK)
+ while (len>=GHASH_CHUNK) {
+ size_t j=GHASH_CHUNK;
+
+ GHASH(ctx,in,GHASH_CHUNK);
+ while (j) {
+ size_t *out_t=(size_t *)out;
+ const size_t *in_t=(const size_t *)in;
+
+ (*block)(ctx->Yi.c,ctx->EKi.c,key);
+ ++ctr;
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
+ PUTU32(ctx->Yi.c+12,ctr);
+#endif
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+#endif
+ for (i=0; i<16/sizeof(size_t); ++i)
+ out_t[i] = in_t[i]^ctx->EKi.t[i];
+ out += 16;
+ in += 16;
+ j -= 16;
+ }
+ len -= GHASH_CHUNK;
+ }
+ if ((i = (len&(size_t)-16))) {
+ GHASH(ctx,in,i);
+ while (len>=16) {
+ size_t *out_t=(size_t *)out;
+ const size_t *in_t=(const size_t *)in;
+
+ (*block)(ctx->Yi.c,ctx->EKi.c,key);
+ ++ctr;
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
+ PUTU32(ctx->Yi.c+12,ctr);
+#endif
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+#endif
+ for (i=0; i<16/sizeof(size_t); ++i)
+ out_t[i] = in_t[i]^ctx->EKi.t[i];
+ out += 16;
+ in += 16;
+ len -= 16;
+ }
+ }
+#else
+ while (len>=16) {
+ size_t *out_t=(size_t *)out;
+ const size_t *in_t=(const size_t *)in;
+
+ (*block)(ctx->Yi.c,ctx->EKi.c,key);
+ ++ctr;
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
+ PUTU32(ctx->Yi.c+12,ctr);
+#endif
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+#endif
+ for (i=0; i<16/sizeof(size_t); ++i) {
+ size_t c = in[i];
+ out[i] = c^ctx->EKi.t[i];
+ ctx->Xi.t[i] ^= c;
+ }
+ GCM_MUL(ctx,Xi);
+ out += 16;
+ in += 16;
+ len -= 16;
+ }
+#endif
+ if (len) {
+ (*block)(ctx->Yi.c,ctx->EKi.c,key);
+ ++ctr;
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
+ PUTU32(ctx->Yi.c+12,ctr);
+#endif
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+#endif
+ while (len--) {
+ u8 c = in[n];
+ ctx->Xi.c[n] ^= c;
+ out[n] = c^ctx->EKi.c[n];
+ ++n;
+ }
+ }
+
+ ctx->mres = n;
+ return 0;
+ } while(0);
+#endif
+ for (i=0;i<len;++i) {
+ u8 c;
+ if (n==0) {
+ (*block)(ctx->Yi.c,ctx->EKi.c,key);
+ ++ctr;
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
+ PUTU32(ctx->Yi.c+12,ctr);
+#endif
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+#endif
+ }
+ c = in[i];
+ out[i] = c^ctx->EKi.c[n];
+ ctx->Xi.c[n] ^= c;
+ n = (n+1)%16;
+ if (n==0)
+ GCM_MUL(ctx,Xi);
+ }
+
+ ctx->mres = n;
+ return 0;
+}
+
+int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
+ const unsigned char *in, unsigned char *out,
+ size_t len, ctr128_f stream)
+{
+ unsigned int n, ctr;
+ size_t i;
+ u64 mlen = ctx->len.u[1];
+ void *key = ctx->key;
+#ifdef GCM_FUNCREF_4BIT
+ void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
+# ifdef GHASH
+ void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
+ const u8 *inp,size_t len) = ctx->ghash;
+# endif
+#endif
+
+ mlen += len;
+ if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
+ return -1;
+ ctx->len.u[1] = mlen;
+
+ if (ctx->ares) {
+ /* First call to encrypt finalizes GHASH(AAD) */
+ GCM_MUL(ctx,Xi);
+ ctx->ares = 0;
+ }
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP4
+ ctr = BSWAP4(ctx->Yi.d[3]);
+#else
+ ctr = GETU32(ctx->Yi.c+12);
+#endif
+#else /* BIG_ENDIAN */
+ ctr = ctx->Yi.d[3];
+#endif
+
+ n = ctx->mres;
+ if (n) {
+ while (n && len) {
+ ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
+ --len;
+ n = (n+1)%16;
+ }
+ if (n==0) GCM_MUL(ctx,Xi);
+ else {
+ ctx->mres = n;
+ return 0;
+ }
+ }
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+ while (len>=GHASH_CHUNK) {
+ (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
+ ctr += GHASH_CHUNK/16;
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
+ PUTU32(ctx->Yi.c+12,ctr);
+#endif
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+#endif
+ GHASH(ctx,out,GHASH_CHUNK);
+ out += GHASH_CHUNK;
+ in += GHASH_CHUNK;
+ len -= GHASH_CHUNK;
+ }
+#endif
+ if ((i = (len&(size_t)-16))) {
+ size_t j=i/16;
+
+ (*stream)(in,out,j,key,ctx->Yi.c);
+ ctr += (unsigned int)j;
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
+ PUTU32(ctx->Yi.c+12,ctr);
+#endif
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+#endif
+ in += i;
+ len -= i;
+#if defined(GHASH)
+ GHASH(ctx,out,i);
+ out += i;
+#else
+ while (j--) {
+ for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
+ GCM_MUL(ctx,Xi);
+ out += 16;
+ }
+#endif
+ }
+ if (len) {
+ (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
+ ++ctr;
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
+ PUTU32(ctx->Yi.c+12,ctr);
+#endif
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+#endif
+ while (len--) {
+ ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
+ ++n;
+ }
+ }
+
+ ctx->mres = n;
+ return 0;
+}
+
+int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
+ const unsigned char *in, unsigned char *out,
+ size_t len,ctr128_f stream)
+{
+ unsigned int n, ctr;
+ size_t i;
+ u64 mlen = ctx->len.u[1];
+ void *key = ctx->key;
+#ifdef GCM_FUNCREF_4BIT
+ void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
+# ifdef GHASH
+ void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
+ const u8 *inp,size_t len) = ctx->ghash;
+# endif
+#endif
+
+ mlen += len;
+ if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
+ return -1;
+ ctx->len.u[1] = mlen;
+
+ if (ctx->ares) {
+ /* First call to decrypt finalizes GHASH(AAD) */
+ GCM_MUL(ctx,Xi);
+ ctx->ares = 0;
+ }
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP4
+ ctr = BSWAP4(ctx->Yi.d[3]);
+#else
+ ctr = GETU32(ctx->Yi.c+12);
+#endif
+#else /* BIG_ENDIAN */
+ ctr = ctx->Yi.d[3];
+#endif
+
+ n = ctx->mres;
+ if (n) {
+ while (n && len) {
+ u8 c = *(in++);
+ *(out++) = c^ctx->EKi.c[n];
+ ctx->Xi.c[n] ^= c;
+ --len;
+ n = (n+1)%16;
+ }
+ if (n==0) GCM_MUL (ctx,Xi);
+ else {
+ ctx->mres = n;
+ return 0;
+ }
+ }
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+ while (len>=GHASH_CHUNK) {
+ GHASH(ctx,in,GHASH_CHUNK);
+ (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
+ ctr += GHASH_CHUNK/16;
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
+ PUTU32(ctx->Yi.c+12,ctr);
+#endif
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+#endif
+ out += GHASH_CHUNK;
+ in += GHASH_CHUNK;
+ len -= GHASH_CHUNK;
+ }
+#endif
+ if ((i = (len&(size_t)-16))) {
+ size_t j=i/16;
+
+#if defined(GHASH)
+ GHASH(ctx,in,i);
+#else
+ while (j--) {
+ size_t k;
+ for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
+ GCM_MUL(ctx,Xi);
+ in += 16;
+ }
+ j = i/16;
+ in -= i;
+#endif
+ (*stream)(in,out,j,key,ctx->Yi.c);
+ ctr += (unsigned int)j;
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
+ PUTU32(ctx->Yi.c+12,ctr);
+#endif
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+#endif
+ out += i;
+ in += i;
+ len -= i;
+ }
+ if (len) {
+ (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
+ ++ctr;
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+#else
+ PUTU32(ctx->Yi.c+12,ctr);
+#endif
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+#endif
+ while (len--) {
+ u8 c = in[n];
+ ctx->Xi.c[n] ^= c;
+ out[n] = c^ctx->EKi.c[n];
+ ++n;
+ }
+ }
+
+ ctx->mres = n;
+ return 0;
+}
+
+int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
+ size_t len)
+{
+ u64 alen = ctx->len.u[0]<<3;
+ u64 clen = ctx->len.u[1]<<3;
+#ifdef GCM_FUNCREF_4BIT
+ void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
+#endif
+
+ if (ctx->mres || ctx->ares)
+ GCM_MUL(ctx,Xi);
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+#ifdef BSWAP8
+ alen = BSWAP8(alen);
+ clen = BSWAP8(clen);
+#else
+ {
+ u8 *p = ctx->len.c;
+
+ ctx->len.u[0] = alen;
+ ctx->len.u[1] = clen;
+
+ alen = (u64)GETU32(p) <<32|GETU32(p+4);
+ clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
+ }
+#endif
+#endif
+
+ ctx->Xi.u[0] ^= alen;
+ ctx->Xi.u[1] ^= clen;
+ GCM_MUL(ctx,Xi);
+
+ ctx->Xi.u[0] ^= ctx->EK0.u[0];
+ ctx->Xi.u[1] ^= ctx->EK0.u[1];
+
+ if (tag && len<=sizeof(ctx->Xi))
+ return memcmp(ctx->Xi.c,tag,len);
+ else
+ return -1;
+}
+
+void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
+{
+ CRYPTO_gcm128_finish(ctx, NULL, 0);
+ memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
+}
+
+#if 0
+
+GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
+{
+ GCM128_CONTEXT *ret;
+
+ if ((ret = malloc(sizeof(GCM128_CONTEXT))))
+ CRYPTO_gcm128_init(ret,key,block);
+
+ return ret;
+}
+
+void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
+{
+ freezero(ctx, sizeof(*ctx));
+}
+
+#endif
diff --git a/ext/libressl/crypto/modes/ghash-elf-armv4.S b/ext/libressl/crypto/modes/ghash-elf-armv4.S
new file mode 100644
index 0000000..af42593
--- /dev/null
+++ b/ext/libressl/crypto/modes/ghash-elf-armv4.S
@@ -0,0 +1,412 @@
+#include "arm_arch.h"
+
+.text
+.syntax unified
+.code 32
+
+.type rem_4bit,%object
+.align 5
+rem_4bit:
+.short 0x0000,0x1C20,0x3840,0x2460
+.short 0x7080,0x6CA0,0x48C0,0x54E0
+.short 0xE100,0xFD20,0xD940,0xC560
+.short 0x9180,0x8DA0,0xA9C0,0xB5E0
+.size rem_4bit,.-rem_4bit
+
+.type rem_4bit_get,%function
+rem_4bit_get:
+ sub r2,pc,#8
+ sub r2,r2,#32 @ &rem_4bit
+ b .Lrem_4bit_got
+ nop
+.size rem_4bit_get,.-rem_4bit_get
+
+.global gcm_ghash_4bit
+.type gcm_ghash_4bit,%function
+gcm_ghash_4bit:
+ sub r12,pc,#8
+ add r3,r2,r3 @ r3 to point at the end
+ stmdb sp!,{r3-r11,lr} @ save r3/end too
+ sub r12,r12,#48 @ &rem_4bit
+
+ ldmia r12,{r4-r11} @ copy rem_4bit ...
+ stmdb sp!,{r4-r11} @ ... to stack
+
+ ldrb r12,[r2,#15]
+ ldrb r14,[r0,#15]
+.Louter:
+ eor r12,r12,r14
+ and r14,r12,#0xf0
+ and r12,r12,#0x0f
+ mov r3,#14
+
+ add r7,r1,r12,lsl#4
+ ldmia r7,{r4-r7} @ load Htbl[nlo]
+ add r11,r1,r14
+ ldrb r12,[r2,#14]
+
+ and r14,r4,#0xf @ rem
+ ldmia r11,{r8-r11} @ load Htbl[nhi]
+ add r14,r14,r14
+ eor r4,r8,r4,lsr#4
+ ldrh r8,[sp,r14] @ rem_4bit[rem]
+ eor r4,r4,r5,lsl#28
+ ldrb r14,[r0,#14]
+ eor r5,r9,r5,lsr#4
+ eor r5,r5,r6,lsl#28
+ eor r6,r10,r6,lsr#4
+ eor r6,r6,r7,lsl#28
+ eor r7,r11,r7,lsr#4
+ eor r12,r12,r14
+ and r14,r12,#0xf0
+ and r12,r12,#0x0f
+ eor r7,r7,r8,lsl#16
+
+.Linner:
+ add r11,r1,r12,lsl#4
+ and r12,r4,#0xf @ rem
+ subs r3,r3,#1
+ add r12,r12,r12
+ ldmia r11,{r8-r11} @ load Htbl[nlo]
+ eor r4,r8,r4,lsr#4
+ eor r4,r4,r5,lsl#28
+ eor r5,r9,r5,lsr#4
+ eor r5,r5,r6,lsl#28
+ ldrh r8,[sp,r12] @ rem_4bit[rem]
+ eor r6,r10,r6,lsr#4
+ ldrbpl r12,[r2,r3]
+ eor r6,r6,r7,lsl#28
+ eor r7,r11,r7,lsr#4
+
+ add r11,r1,r14
+ and r14,r4,#0xf @ rem
+ eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
+ add r14,r14,r14
+ ldmia r11,{r8-r11} @ load Htbl[nhi]
+ eor r4,r8,r4,lsr#4
+ ldrbpl r8,[r0,r3]
+ eor r4,r4,r5,lsl#28
+ eor r5,r9,r5,lsr#4
+ ldrh r9,[sp,r14]
+ eor r5,r5,r6,lsl#28
+ eor r6,r10,r6,lsr#4
+ eor r6,r6,r7,lsl#28
+ eorpl r12,r12,r8
+ eor r7,r11,r7,lsr#4
+ andpl r14,r12,#0xf0
+ andpl r12,r12,#0x0f
+ eor r7,r7,r9,lsl#16 @ ^= rem_4bit[rem]
+ bpl .Linner
+
+ ldr r3,[sp,#32] @ re-load r3/end
+ add r2,r2,#16
+ mov r14,r4
+#if __ARM_ARCH__>=7 && defined(__ARMEL__)
+ rev r4,r4
+ str r4,[r0,#12]
+#elif defined(__ARMEB__)
+ str r4,[r0,#12]
+#else
+ mov r9,r4,lsr#8
+ strb r4,[r0,#12+3]
+ mov r10,r4,lsr#16
+ strb r9,[r0,#12+2]
+ mov r11,r4,lsr#24
+ strb r10,[r0,#12+1]
+ strb r11,[r0,#12]
+#endif
+ cmp r2,r3
+#if __ARM_ARCH__>=7 && defined(__ARMEL__)
+ rev r5,r5
+ str r5,[r0,#8]
+#elif defined(__ARMEB__)
+ str r5,[r0,#8]
+#else
+ mov r9,r5,lsr#8
+ strb r5,[r0,#8+3]
+ mov r10,r5,lsr#16
+ strb r9,[r0,#8+2]
+ mov r11,r5,lsr#24
+ strb r10,[r0,#8+1]
+ strb r11,[r0,#8]
+#endif
+ ldrbne r12,[r2,#15]
+#if __ARM_ARCH__>=7 && defined(__ARMEL__)
+ rev r6,r6
+ str r6,[r0,#4]
+#elif defined(__ARMEB__)
+ str r6,[r0,#4]
+#else
+ mov r9,r6,lsr#8
+ strb r6,[r0,#4+3]
+ mov r10,r6,lsr#16
+ strb r9,[r0,#4+2]
+ mov r11,r6,lsr#24
+ strb r10,[r0,#4+1]
+ strb r11,[r0,#4]
+#endif
+
+#if __ARM_ARCH__>=7 && defined(__ARMEL__)
+ rev r7,r7
+ str r7,[r0,#0]
+#elif defined(__ARMEB__)
+ str r7,[r0,#0]
+#else
+ mov r9,r7,lsr#8
+ strb r7,[r0,#0+3]
+ mov r10,r7,lsr#16
+ strb r9,[r0,#0+2]
+ mov r11,r7,lsr#24
+ strb r10,[r0,#0+1]
+ strb r11,[r0,#0]
+#endif
+
+ bne .Louter
+
+ add sp,sp,#36
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r11,pc}
+#else
+ ldmia sp!,{r4-r11,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
+.size gcm_ghash_4bit,.-gcm_ghash_4bit
+
+.global gcm_gmult_4bit
+.type gcm_gmult_4bit,%function
+gcm_gmult_4bit:
+ stmdb sp!,{r4-r11,lr}
+ ldrb r12,[r0,#15]
+ b rem_4bit_get
+.Lrem_4bit_got:
+ and r14,r12,#0xf0
+ and r12,r12,#0x0f
+ mov r3,#14
+
+ add r7,r1,r12,lsl#4
+ ldmia r7,{r4-r7} @ load Htbl[nlo]
+ ldrb r12,[r0,#14]
+
+ add r11,r1,r14
+ and r14,r4,#0xf @ rem
+ ldmia r11,{r8-r11} @ load Htbl[nhi]
+ add r14,r14,r14
+ eor r4,r8,r4,lsr#4
+ ldrh r8,[r2,r14] @ rem_4bit[rem]
+ eor r4,r4,r5,lsl#28
+ eor r5,r9,r5,lsr#4
+ eor r5,r5,r6,lsl#28
+ eor r6,r10,r6,lsr#4
+ eor r6,r6,r7,lsl#28
+ eor r7,r11,r7,lsr#4
+ and r14,r12,#0xf0
+ eor r7,r7,r8,lsl#16
+ and r12,r12,#0x0f
+
+.Loop:
+ add r11,r1,r12,lsl#4
+ and r12,r4,#0xf @ rem
+ subs r3,r3,#1
+ add r12,r12,r12
+ ldmia r11,{r8-r11} @ load Htbl[nlo]
+ eor r4,r8,r4,lsr#4
+ eor r4,r4,r5,lsl#28
+ eor r5,r9,r5,lsr#4
+ eor r5,r5,r6,lsl#28
+ ldrh r8,[r2,r12] @ rem_4bit[rem]
+ eor r6,r10,r6,lsr#4
+ ldrbpl r12,[r0,r3]
+ eor r6,r6,r7,lsl#28
+ eor r7,r11,r7,lsr#4
+
+ add r11,r1,r14
+ and r14,r4,#0xf @ rem
+ eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
+ add r14,r14,r14
+ ldmia r11,{r8-r11} @ load Htbl[nhi]
+ eor r4,r8,r4,lsr#4
+ eor r4,r4,r5,lsl#28
+ eor r5,r9,r5,lsr#4
+ ldrh r8,[r2,r14] @ rem_4bit[rem]
+ eor r5,r5,r6,lsl#28
+ eor r6,r10,r6,lsr#4
+ eor r6,r6,r7,lsl#28
+ eor r7,r11,r7,lsr#4
+ andpl r14,r12,#0xf0
+ andpl r12,r12,#0x0f
+ eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
+ bpl .Loop
+#if __ARM_ARCH__>=7 && defined(__ARMEL__)
+ rev r4,r4
+ str r4,[r0,#12]
+#elif defined(__ARMEB__)
+ str r4,[r0,#12]
+#else
+ mov r9,r4,lsr#8
+ strb r4,[r0,#12+3]
+ mov r10,r4,lsr#16
+ strb r9,[r0,#12+2]
+ mov r11,r4,lsr#24
+ strb r10,[r0,#12+1]
+ strb r11,[r0,#12]
+#endif
+
+#if __ARM_ARCH__>=7 && defined(__ARMEL__)
+ rev r5,r5
+ str r5,[r0,#8]
+#elif defined(__ARMEB__)
+ str r5,[r0,#8]
+#else
+ mov r9,r5,lsr#8
+ strb r5,[r0,#8+3]
+ mov r10,r5,lsr#16
+ strb r9,[r0,#8+2]
+ mov r11,r5,lsr#24
+ strb r10,[r0,#8+1]
+ strb r11,[r0,#8]
+#endif
+
+#if __ARM_ARCH__>=7 && defined(__ARMEL__)
+ rev r6,r6
+ str r6,[r0,#4]
+#elif defined(__ARMEB__)
+ str r6,[r0,#4]
+#else
+ mov r9,r6,lsr#8
+ strb r6,[r0,#4+3]
+ mov r10,r6,lsr#16
+ strb r9,[r0,#4+2]
+ mov r11,r6,lsr#24
+ strb r10,[r0,#4+1]
+ strb r11,[r0,#4]
+#endif
+
+#if __ARM_ARCH__>=7 && defined(__ARMEL__)
+ rev r7,r7
+ str r7,[r0,#0]
+#elif defined(__ARMEB__)
+ str r7,[r0,#0]
+#else
+ mov r9,r7,lsr#8
+ strb r7,[r0,#0+3]
+ mov r10,r7,lsr#16
+ strb r9,[r0,#0+2]
+ mov r11,r7,lsr#24
+ strb r10,[r0,#0+1]
+ strb r11,[r0,#0]
+#endif
+
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r11,pc}
+#else
+ ldmia sp!,{r4-r11,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
+.size gcm_gmult_4bit,.-gcm_gmult_4bit
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+.fpu neon
+
+.global gcm_gmult_neon
+.type gcm_gmult_neon,%function
+.align 4
+gcm_gmult_neon:
+ sub r1,#16 @ point at H in GCM128_CTX
+ vld1.64 d29,[r0,:64]!@ load Xi
+ vmov.i32 d5,#0xe1 @ our irreducible polynomial
+ vld1.64 d28,[r0,:64]!
+ vshr.u64 d5,#32
+ vldmia r1,{d0-d1} @ load H
+ veor q12,q12
+#ifdef __ARMEL__
+ vrev64.8 q14,q14
+#endif
+ veor q13,q13
+ veor q11,q11
+ mov r1,#16
+ veor q10,q10
+ mov r3,#16
+ veor d2,d2
+ vdup.8 d4,d28[0] @ broadcast lowest byte
+ b .Linner_neon
+.size gcm_gmult_neon,.-gcm_gmult_neon
+
+.global gcm_ghash_neon
+.type gcm_ghash_neon,%function
+.align 4
+gcm_ghash_neon:
+ vld1.64 d21,[r0,:64]! @ load Xi
+ vmov.i32 d5,#0xe1 @ our irreducible polynomial
+ vld1.64 d20,[r0,:64]!
+ vshr.u64 d5,#32
+ vldmia r0,{d0-d1} @ load H
+ veor q12,q12
+ nop
+#ifdef __ARMEL__
+ vrev64.8 q10,q10
+#endif
+.Louter_neon:
+ vld1.64 d29,[r2]! @ load inp
+ veor q13,q13
+ vld1.64 d28,[r2]!
+ veor q11,q11
+ mov r1,#16
+#ifdef __ARMEL__
+ vrev64.8 q14,q14
+#endif
+ veor d2,d2
+ veor q14,q10 @ inp^=Xi
+ veor q10,q10
+ vdup.8 d4,d28[0] @ broadcast lowest byte
+.Linner_neon:
+ subs r1,r1,#1
+ vmull.p8 q9,d1,d4 @ H.lo·Xi[i]
+ vmull.p8 q8,d0,d4 @ H.hi·Xi[i]
+ vext.8 q14,q12,#1 @ IN>>=8
+
+ veor q10,q13 @ modulo-scheduled part
+ vshl.i64 d22,#48
+ vdup.8 d4,d28[0] @ broadcast lowest byte
+ veor d3,d18,d20
+
+ veor d21,d22
+ vuzp.8 q9,q8
+ vsli.8 d2,d3,#1 @ compose the "carry" byte
+ vext.8 q10,q12,#1 @ Z>>=8
+
+ vmull.p8 q11,d2,d5 @ "carry"·0xe1
+ vshr.u8 d2,d3,#7 @ save Z's bottom bit
+ vext.8 q13,q9,q12,#1 @ Qlo>>=8
+ veor q10,q8
+ bne .Linner_neon
+
+ veor q10,q13 @ modulo-scheduled artefact
+ vshl.i64 d22,#48
+ veor d21,d22
+
+ @ finalization, normalize Z:Zo
+ vand d2,d5 @ suffices to mask the bit
+ vshr.u64 d3,d20,#63
+ vshl.i64 q10,#1
+ subs r3,#16
+ vorr q10,q1 @ Z=Z:Zo<<1
+ bne .Louter_neon
+
+#ifdef __ARMEL__
+ vrev64.8 q10,q10
+#endif
+ sub r0,#16
+ vst1.64 d21,[r0,:64]! @ write out Xi
+ vst1.64 d20,[r0,:64]
+
+ .word 0xe12fff1e
+.size gcm_ghash_neon,.-gcm_ghash_neon
+#endif
+.asciz "GHASH for ARMv4/NEON, CRYPTOGAMS by <appro@openssl.org>"
+.align 2
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/ext/libressl/crypto/modes/ghash-elf-x86_64.S b/ext/libressl/crypto/modes/ghash-elf-x86_64.S
new file mode 100644
index 0000000..5f31626
--- /dev/null
+++ b/ext/libressl/crypto/modes/ghash-elf-x86_64.S
@@ -0,0 +1,1030 @@
+#include "x86_arch.h"
+.text
+
+.globl gcm_gmult_4bit
+.type gcm_gmult_4bit,@function
+.align 16
+gcm_gmult_4bit:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+.Lgmult_prologue:
+
+ movzbq 15(%rdi),%r8
+ leaq .Lrem_4bit(%rip),%r11
+ xorq %rax,%rax
+ xorq %rbx,%rbx
+ movb %r8b,%al
+ movb %r8b,%bl
+ shlb $4,%al
+ movq $14,%rcx
+ movq 8(%rsi,%rax,1),%r8
+ movq (%rsi,%rax,1),%r9
+ andb $240,%bl
+ movq %r8,%rdx
+ jmp .Loop1
+
+.align 16
+.Loop1:
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ movb (%rdi,%rcx,1),%al
+ shrq $4,%r9
+ xorq 8(%rsi,%rbx,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rbx,1),%r9
+ movb %al,%bl
+ xorq (%r11,%rdx,8),%r9
+ movq %r8,%rdx
+ shlb $4,%al
+ xorq %r10,%r8
+ decq %rcx
+ js .Lbreak1
+
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ shrq $4,%r9
+ xorq 8(%rsi,%rax,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rax,1),%r9
+ andb $240,%bl
+ xorq (%r11,%rdx,8),%r9
+ movq %r8,%rdx
+ xorq %r10,%r8
+ jmp .Loop1
+
+.align 16
+.Lbreak1:
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ shrq $4,%r9
+ xorq 8(%rsi,%rax,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rax,1),%r9
+ andb $240,%bl
+ xorq (%r11,%rdx,8),%r9
+ movq %r8,%rdx
+ xorq %r10,%r8
+
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ shrq $4,%r9
+ xorq 8(%rsi,%rbx,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rbx,1),%r9
+ xorq %r10,%r8
+ xorq (%r11,%rdx,8),%r9
+
+ bswapq %r8
+ bswapq %r9
+ movq %r8,8(%rdi)
+ movq %r9,(%rdi)
+
+ movq 16(%rsp),%rbx
+ leaq 24(%rsp),%rsp
+.Lgmult_epilogue:
+ retq
+.size gcm_gmult_4bit,.-gcm_gmult_4bit
+.globl gcm_ghash_4bit
+.type gcm_ghash_4bit,@function
+.align 16
+gcm_ghash_4bit:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $280,%rsp
+.Lghash_prologue:
+ movq %rdx,%r14
+ movq %rcx,%r15
+ subq $-128,%rsi
+ leaq 16+128(%rsp),%rbp
+ xorl %edx,%edx
+ movq 0+0-128(%rsi),%r8
+ movq 0+8-128(%rsi),%rax
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq 16+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq 16+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,0(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,0(%rbp)
+ movq 32+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,0-128(%rbp)
+ movq 32+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,1(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,8(%rbp)
+ movq 48+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,8-128(%rbp)
+ movq 48+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,2(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,16(%rbp)
+ movq 64+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,16-128(%rbp)
+ movq 64+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,3(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,24(%rbp)
+ movq 80+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,24-128(%rbp)
+ movq 80+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,4(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,32(%rbp)
+ movq 96+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,32-128(%rbp)
+ movq 96+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,5(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,40(%rbp)
+ movq 112+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,40-128(%rbp)
+ movq 112+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,6(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,48(%rbp)
+ movq 128+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,48-128(%rbp)
+ movq 128+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,7(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,56(%rbp)
+ movq 144+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,56-128(%rbp)
+ movq 144+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,8(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,64(%rbp)
+ movq 160+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,64-128(%rbp)
+ movq 160+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,9(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,72(%rbp)
+ movq 176+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,72-128(%rbp)
+ movq 176+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,10(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,80(%rbp)
+ movq 192+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,80-128(%rbp)
+ movq 192+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,11(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,88(%rbp)
+ movq 208+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,88-128(%rbp)
+ movq 208+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,12(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,96(%rbp)
+ movq 224+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,96-128(%rbp)
+ movq 224+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,13(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,104(%rbp)
+ movq 240+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,104-128(%rbp)
+ movq 240+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,14(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,112(%rbp)
+ shlb $4,%dl
+ movq %rax,112-128(%rbp)
+ shlq $60,%r10
+ movb %dl,15(%rsp)
+ orq %r10,%rbx
+ movq %r9,120(%rbp)
+ movq %rbx,120-128(%rbp)
+ addq $-128,%rsi
+ movq 8(%rdi),%r8
+ movq 0(%rdi),%r9
+ addq %r14,%r15
+ leaq .Lrem_8bit(%rip),%r11
+ jmp .Louter_loop
+.align 16
+.Louter_loop:
+ xorq (%r14),%r9
+ movq 8(%r14),%rdx
+ leaq 16(%r14),%r14
+ xorq %r8,%rdx
+ movq %r9,(%rdi)
+ movq %rdx,8(%rdi)
+ shrq $32,%rdx
+ xorq %rax,%rax
+ roll $8,%edx
+ movb %dl,%al
+ movzbl %dl,%ebx
+ shlb $4,%al
+ shrl $4,%ebx
+ roll $8,%edx
+ movq 8(%rsi,%rax,1),%r8
+ movq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ xorq %r8,%r12
+ movq %r9,%r10
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl 8(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl 4(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl 0(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ andl $240,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl -4(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ movzwq (%r11,%r12,2),%r12
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ shlq $48,%r12
+ xorq %r10,%r8
+ xorq %r12,%r9
+ movzbq %r8b,%r13
+ shrq $4,%r8
+ movq %r9,%r10
+ shlb $4,%r13b
+ shrq $4,%r9
+ xorq 8(%rsi,%rcx,1),%r8
+ movzwq (%r11,%r13,2),%r13
+ shlq $60,%r10
+ xorq (%rsi,%rcx,1),%r9
+ xorq %r10,%r8
+ shlq $48,%r13
+ bswapq %r8
+ xorq %r13,%r9
+ bswapq %r9
+ cmpq %r15,%r14
+ jb .Louter_loop
+ movq %r8,8(%rdi)
+ movq %r9,(%rdi)
+
+ leaq 280(%rsp),%rsi
+ movq 0(%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lghash_epilogue:
+ retq
+.size gcm_ghash_4bit,.-gcm_ghash_4bit
+.globl gcm_init_clmul
+.type gcm_init_clmul,@function
+.align 16
+gcm_init_clmul:
+ movdqu (%rsi),%xmm2
+ pshufd $78,%xmm2,%xmm2
+
+
+ pshufd $255,%xmm2,%xmm4
+ movdqa %xmm2,%xmm3
+ psllq $1,%xmm2
+ pxor %xmm5,%xmm5
+ psrlq $63,%xmm3
+ pcmpgtd %xmm4,%xmm5
+ pslldq $8,%xmm3
+ por %xmm3,%xmm2
+
+
+ pand .L0x1c2_polynomial(%rip),%xmm5
+ pxor %xmm5,%xmm2
+
+
+ movdqa %xmm2,%xmm0
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+
+
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ movdqu %xmm2,(%rdi)
+ movdqu %xmm0,16(%rdi)
+ retq
+.size gcm_init_clmul,.-gcm_init_clmul
+.globl gcm_gmult_clmul
+.type gcm_gmult_clmul,@function
+.align 16
+gcm_gmult_clmul:
+ movdqu (%rdi),%xmm0
+ movdqa .Lbswap_mask(%rip),%xmm5
+ movdqu (%rsi),%xmm2
+.byte 102,15,56,0,197
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+
+
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,197
+ movdqu %xmm0,(%rdi)
+ retq
+.size gcm_gmult_clmul,.-gcm_gmult_clmul
+.globl gcm_ghash_clmul
+.type gcm_ghash_clmul,@function
+.align 16
+gcm_ghash_clmul:
+ movdqa .Lbswap_mask(%rip),%xmm5
+
+ movdqu (%rdi),%xmm0
+ movdqu (%rsi),%xmm2
+.byte 102,15,56,0,197
+
+ subq $16,%rcx
+ jz .Lodd_tail
+
+ movdqu 16(%rsi),%xmm8
+
+
+
+
+
+ movdqu (%rdx),%xmm3
+ movdqu 16(%rdx),%xmm6
+.byte 102,15,56,0,221
+.byte 102,15,56,0,245
+ pxor %xmm3,%xmm0
+ movdqa %xmm6,%xmm7
+ pshufd $78,%xmm6,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm6,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,242,0
+.byte 102,15,58,68,250,17
+.byte 102,15,58,68,220,0
+ pxor %xmm6,%xmm3
+ pxor %xmm7,%xmm3
+
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm7
+ pxor %xmm4,%xmm6
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm8,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm8,%xmm4
+
+ leaq 32(%rdx),%rdx
+ subq $32,%rcx
+ jbe .Leven_tail
+
+.Lmod_loop:
+.byte 102,65,15,58,68,192,0
+.byte 102,65,15,58,68,200,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ movdqu (%rdx),%xmm3
+ pxor %xmm6,%xmm0
+ pxor %xmm7,%xmm1
+
+ movdqu 16(%rdx),%xmm6
+.byte 102,15,56,0,221
+.byte 102,15,56,0,245
+
+ movdqa %xmm6,%xmm7
+ pshufd $78,%xmm6,%xmm9
+ pshufd $78,%xmm2,%xmm10
+ pxor %xmm6,%xmm9
+ pxor %xmm2,%xmm10
+ pxor %xmm3,%xmm1
+
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+.byte 102,15,58,68,242,0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+
+.byte 102,15,58,68,250,17
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+
+.byte 102,69,15,58,68,202,0
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm8,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm8,%xmm4
+
+ pxor %xmm6,%xmm9
+ pxor %xmm7,%xmm9
+ movdqa %xmm9,%xmm10
+ psrldq $8,%xmm9
+ pslldq $8,%xmm10
+ pxor %xmm9,%xmm7
+ pxor %xmm10,%xmm6
+
+ leaq 32(%rdx),%rdx
+ subq $32,%rcx
+ ja .Lmod_loop
+
+.Leven_tail:
+.byte 102,65,15,58,68,192,0
+.byte 102,65,15,58,68,200,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm6,%xmm0
+ pxor %xmm7,%xmm1
+
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+
+
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ testq %rcx,%rcx
+ jnz .Ldone
+
+.Lodd_tail:
+ movdqu (%rdx),%xmm3
+.byte 102,15,56,0,221
+ pxor %xmm3,%xmm0
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+
+
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+.Ldone:
+.byte 102,15,56,0,197
+ movdqu %xmm0,(%rdi)
+ retq
+.LSEH_end_gcm_ghash_clmul:
+.size gcm_ghash_clmul,.-gcm_ghash_clmul
+.align 64
+.Lbswap_mask:
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.L0x1c2_polynomial:
+.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
+.align 64
+.type .Lrem_4bit,@object
+.Lrem_4bit:
+.long 0,0,0,471859200,0,943718400,0,610271232
+.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
+.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
+.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
+.type .Lrem_8bit,@object
+.Lrem_8bit:
+.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
+.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
+.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
+.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
+.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
+.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
+.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
+.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
+.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
+.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
+.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
+.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
+.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
+.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
+.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
+.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
+.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
+.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
+.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
+.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
+.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
+.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
+.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
+.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
+.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
+.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
+.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
+.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
+.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
+.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
+.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
+.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
+
+.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/ext/libressl/crypto/modes/ghash-macosx-x86_64.S b/ext/libressl/crypto/modes/ghash-macosx-x86_64.S
new file mode 100644
index 0000000..e6840a7
--- /dev/null
+++ b/ext/libressl/crypto/modes/ghash-macosx-x86_64.S
@@ -0,0 +1,1027 @@
+#include "x86_arch.h"
+.text
+
+.globl _gcm_gmult_4bit
+
+.p2align 4
+_gcm_gmult_4bit:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+L$gmult_prologue:
+
+ movzbq 15(%rdi),%r8
+ leaq L$rem_4bit(%rip),%r11
+ xorq %rax,%rax
+ xorq %rbx,%rbx
+ movb %r8b,%al
+ movb %r8b,%bl
+ shlb $4,%al
+ movq $14,%rcx
+ movq 8(%rsi,%rax,1),%r8
+ movq (%rsi,%rax,1),%r9
+ andb $240,%bl
+ movq %r8,%rdx
+ jmp L$oop1
+
+.p2align 4
+L$oop1:
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ movb (%rdi,%rcx,1),%al
+ shrq $4,%r9
+ xorq 8(%rsi,%rbx,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rbx,1),%r9
+ movb %al,%bl
+ xorq (%r11,%rdx,8),%r9
+ movq %r8,%rdx
+ shlb $4,%al
+ xorq %r10,%r8
+ decq %rcx
+ js L$break1
+
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ shrq $4,%r9
+ xorq 8(%rsi,%rax,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rax,1),%r9
+ andb $240,%bl
+ xorq (%r11,%rdx,8),%r9
+ movq %r8,%rdx
+ xorq %r10,%r8
+ jmp L$oop1
+
+.p2align 4
+L$break1:
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ shrq $4,%r9
+ xorq 8(%rsi,%rax,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rax,1),%r9
+ andb $240,%bl
+ xorq (%r11,%rdx,8),%r9
+ movq %r8,%rdx
+ xorq %r10,%r8
+
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ shrq $4,%r9
+ xorq 8(%rsi,%rbx,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rbx,1),%r9
+ xorq %r10,%r8
+ xorq (%r11,%rdx,8),%r9
+
+ bswapq %r8
+ bswapq %r9
+ movq %r8,8(%rdi)
+ movq %r9,(%rdi)
+
+ movq 16(%rsp),%rbx
+ leaq 24(%rsp),%rsp
+L$gmult_epilogue:
+ retq
+
+.globl _gcm_ghash_4bit
+
+.p2align 4
+_gcm_ghash_4bit:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $280,%rsp
+L$ghash_prologue:
+ movq %rdx,%r14
+ movq %rcx,%r15
+ subq $-128,%rsi
+ leaq 16+128(%rsp),%rbp
+ xorl %edx,%edx
+ movq 0+0-128(%rsi),%r8
+ movq 0+8-128(%rsi),%rax
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq 16+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq 16+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,0(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,0(%rbp)
+ movq 32+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,0-128(%rbp)
+ movq 32+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,1(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,8(%rbp)
+ movq 48+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,8-128(%rbp)
+ movq 48+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,2(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,16(%rbp)
+ movq 64+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,16-128(%rbp)
+ movq 64+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,3(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,24(%rbp)
+ movq 80+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,24-128(%rbp)
+ movq 80+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,4(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,32(%rbp)
+ movq 96+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,32-128(%rbp)
+ movq 96+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,5(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,40(%rbp)
+ movq 112+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,40-128(%rbp)
+ movq 112+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,6(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,48(%rbp)
+ movq 128+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,48-128(%rbp)
+ movq 128+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,7(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,56(%rbp)
+ movq 144+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,56-128(%rbp)
+ movq 144+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,8(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,64(%rbp)
+ movq 160+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,64-128(%rbp)
+ movq 160+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,9(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,72(%rbp)
+ movq 176+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,72-128(%rbp)
+ movq 176+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,10(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,80(%rbp)
+ movq 192+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,80-128(%rbp)
+ movq 192+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,11(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,88(%rbp)
+ movq 208+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,88-128(%rbp)
+ movq 208+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,12(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,96(%rbp)
+ movq 224+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,96-128(%rbp)
+ movq 224+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,13(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,104(%rbp)
+ movq 240+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,104-128(%rbp)
+ movq 240+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,14(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,112(%rbp)
+ shlb $4,%dl
+ movq %rax,112-128(%rbp)
+ shlq $60,%r10
+ movb %dl,15(%rsp)
+ orq %r10,%rbx
+ movq %r9,120(%rbp)
+ movq %rbx,120-128(%rbp)
+ addq $-128,%rsi
+ movq 8(%rdi),%r8
+ movq 0(%rdi),%r9
+ addq %r14,%r15
+ leaq L$rem_8bit(%rip),%r11
+ jmp L$outer_loop
+.p2align 4
+L$outer_loop:
+ xorq (%r14),%r9
+ movq 8(%r14),%rdx
+ leaq 16(%r14),%r14
+ xorq %r8,%rdx
+ movq %r9,(%rdi)
+ movq %rdx,8(%rdi)
+ shrq $32,%rdx
+ xorq %rax,%rax
+ roll $8,%edx
+ movb %dl,%al
+ movzbl %dl,%ebx
+ shlb $4,%al
+ shrl $4,%ebx
+ roll $8,%edx
+ movq 8(%rsi,%rax,1),%r8
+ movq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ xorq %r8,%r12
+ movq %r9,%r10
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl 8(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl 4(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl 0(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ andl $240,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl -4(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ movzwq (%r11,%r12,2),%r12
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ shlq $48,%r12
+ xorq %r10,%r8
+ xorq %r12,%r9
+ movzbq %r8b,%r13
+ shrq $4,%r8
+ movq %r9,%r10
+ shlb $4,%r13b
+ shrq $4,%r9
+ xorq 8(%rsi,%rcx,1),%r8
+ movzwq (%r11,%r13,2),%r13
+ shlq $60,%r10
+ xorq (%rsi,%rcx,1),%r9
+ xorq %r10,%r8
+ shlq $48,%r13
+ bswapq %r8
+ xorq %r13,%r9
+ bswapq %r9
+ cmpq %r15,%r14
+ jb L$outer_loop
+ movq %r8,8(%rdi)
+ movq %r9,(%rdi)
+
+ leaq 280(%rsp),%rsi
+ movq 0(%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+L$ghash_epilogue:
+ retq
+
+.globl _gcm_init_clmul
+
+.p2align 4
+_gcm_init_clmul:
+ movdqu (%rsi),%xmm2
+ pshufd $78,%xmm2,%xmm2
+
+
+ pshufd $255,%xmm2,%xmm4
+ movdqa %xmm2,%xmm3
+ psllq $1,%xmm2
+ pxor %xmm5,%xmm5
+ psrlq $63,%xmm3
+ pcmpgtd %xmm4,%xmm5
+ pslldq $8,%xmm3
+ por %xmm3,%xmm2
+
+
+ pand L$0x1c2_polynomial(%rip),%xmm5
+ pxor %xmm5,%xmm2
+
+
+ movdqa %xmm2,%xmm0
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+
+
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ movdqu %xmm2,(%rdi)
+ movdqu %xmm0,16(%rdi)
+ retq
+
+.globl _gcm_gmult_clmul
+
+.p2align 4
+_gcm_gmult_clmul:
+ movdqu (%rdi),%xmm0
+ movdqa L$bswap_mask(%rip),%xmm5
+ movdqu (%rsi),%xmm2
+.byte 102,15,56,0,197
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+
+
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,197
+ movdqu %xmm0,(%rdi)
+ retq
+
+.globl _gcm_ghash_clmul
+
+.p2align 4
+_gcm_ghash_clmul:
+ movdqa L$bswap_mask(%rip),%xmm5
+
+ movdqu (%rdi),%xmm0
+ movdqu (%rsi),%xmm2
+.byte 102,15,56,0,197
+
+ subq $16,%rcx
+ jz L$odd_tail
+
+ movdqu 16(%rsi),%xmm8
+
+
+
+
+
+ movdqu (%rdx),%xmm3
+ movdqu 16(%rdx),%xmm6
+.byte 102,15,56,0,221
+.byte 102,15,56,0,245
+ pxor %xmm3,%xmm0
+ movdqa %xmm6,%xmm7
+ pshufd $78,%xmm6,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm6,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,242,0
+.byte 102,15,58,68,250,17
+.byte 102,15,58,68,220,0
+ pxor %xmm6,%xmm3
+ pxor %xmm7,%xmm3
+
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm7
+ pxor %xmm4,%xmm6
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm8,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm8,%xmm4
+
+ leaq 32(%rdx),%rdx
+ subq $32,%rcx
+ jbe L$even_tail
+
+L$mod_loop:
+.byte 102,65,15,58,68,192,0
+.byte 102,65,15,58,68,200,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ movdqu (%rdx),%xmm3
+ pxor %xmm6,%xmm0
+ pxor %xmm7,%xmm1
+
+ movdqu 16(%rdx),%xmm6
+.byte 102,15,56,0,221
+.byte 102,15,56,0,245
+
+ movdqa %xmm6,%xmm7
+ pshufd $78,%xmm6,%xmm9
+ pshufd $78,%xmm2,%xmm10
+ pxor %xmm6,%xmm9
+ pxor %xmm2,%xmm10
+ pxor %xmm3,%xmm1
+
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+.byte 102,15,58,68,242,0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+
+.byte 102,15,58,68,250,17
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+
+.byte 102,69,15,58,68,202,0
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm8,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm8,%xmm4
+
+ pxor %xmm6,%xmm9
+ pxor %xmm7,%xmm9
+ movdqa %xmm9,%xmm10
+ psrldq $8,%xmm9
+ pslldq $8,%xmm10
+ pxor %xmm9,%xmm7
+ pxor %xmm10,%xmm6
+
+ leaq 32(%rdx),%rdx
+ subq $32,%rcx
+ ja L$mod_loop
+
+L$even_tail:
+.byte 102,65,15,58,68,192,0
+.byte 102,65,15,58,68,200,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm6,%xmm0
+ pxor %xmm7,%xmm1
+
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+
+
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ testq %rcx,%rcx
+ jnz L$done
+
+L$odd_tail:
+ movdqu (%rdx),%xmm3
+.byte 102,15,56,0,221
+ pxor %xmm3,%xmm0
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+
+
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+L$done:
+.byte 102,15,56,0,197
+ movdqu %xmm0,(%rdi)
+ retq
+L$SEH_end_gcm_ghash_clmul:
+
+.p2align 6
+L$bswap_mask:
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+L$0x1c2_polynomial:
+.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
+.p2align 6
+
+L$rem_4bit:
+.long 0,0,0,471859200,0,943718400,0,610271232
+.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
+.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
+.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
+
+L$rem_8bit:
+.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
+.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
+.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
+.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
+.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
+.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
+.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
+.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
+.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
+.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
+.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
+.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
+.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
+.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
+.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
+.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
+.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
+.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
+.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
+.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
+.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
+.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
+.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
+.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
+.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
+.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
+.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
+.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
+.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
+.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
+.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
+.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
+
+.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align 6
diff --git a/ext/libressl/crypto/modes/ghash-masm-x86_64.S b/ext/libressl/crypto/modes/ghash-masm-x86_64.S
new file mode 100644
index 0000000..ffdc1b5
--- /dev/null
+++ b/ext/libressl/crypto/modes/ghash-masm-x86_64.S
@@ -0,0 +1,1256 @@
+; 1 "crypto/modes/ghash-masm-x86_64.S.tmp"
+; 1 "<built-in>" 1
+; 1 "<built-in>" 3
+; 340 "<built-in>" 3
+; 1 "<command line>" 1
+; 1 "<built-in>" 2
+; 1 "crypto/modes/ghash-masm-x86_64.S.tmp" 2
+OPTION DOTNAME
+
+; 1 "./crypto/x86_arch.h" 1
+
+
+; 16 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+; 40 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+; 3 "crypto/modes/ghash-masm-x86_64.S.tmp" 2
+.text$ SEGMENT ALIGN(64) 'CODE'
+
+PUBLIC gcm_gmult_4bit
+
+ALIGN 16
+gcm_gmult_4bit PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_gcm_gmult_4bit::
+ mov rdi,rcx
+ mov rsi,rdx
+
+
+ push rbx
+ push rbp
+ push r12
+$L$gmult_prologue::
+
+ movzx r8,BYTE PTR[15+rdi]
+ lea r11,QWORD PTR[$L$rem_4bit]
+ xor rax,rax
+ xor rbx,rbx
+ mov al,r8b
+ mov bl,r8b
+ shl al,4
+ mov rcx,14
+ mov r8,QWORD PTR[8+rax*1+rsi]
+ mov r9,QWORD PTR[rax*1+rsi]
+ and bl,0f0h
+ mov rdx,r8
+ jmp $L$oop1
+
+ALIGN 16
+$L$oop1::
+ shr r8,4
+ and rdx,0fh
+ mov r10,r9
+ mov al,BYTE PTR[rcx*1+rdi]
+ shr r9,4
+ xor r8,QWORD PTR[8+rbx*1+rsi]
+ shl r10,60
+ xor r9,QWORD PTR[rbx*1+rsi]
+ mov bl,al
+ xor r9,QWORD PTR[rdx*8+r11]
+ mov rdx,r8
+ shl al,4
+ xor r8,r10
+ dec rcx
+ js $L$break1
+
+ shr r8,4
+ and rdx,0fh
+ mov r10,r9
+ shr r9,4
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ shl r10,60
+ xor r9,QWORD PTR[rax*1+rsi]
+ and bl,0f0h
+ xor r9,QWORD PTR[rdx*8+r11]
+ mov rdx,r8
+ xor r8,r10
+ jmp $L$oop1
+
+ALIGN 16
+$L$break1::
+ shr r8,4
+ and rdx,0fh
+ mov r10,r9
+ shr r9,4
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ shl r10,60
+ xor r9,QWORD PTR[rax*1+rsi]
+ and bl,0f0h
+ xor r9,QWORD PTR[rdx*8+r11]
+ mov rdx,r8
+ xor r8,r10
+
+ shr r8,4
+ and rdx,0fh
+ mov r10,r9
+ shr r9,4
+ xor r8,QWORD PTR[8+rbx*1+rsi]
+ shl r10,60
+ xor r9,QWORD PTR[rbx*1+rsi]
+ xor r8,r10
+ xor r9,QWORD PTR[rdx*8+r11]
+
+ bswap r8
+ bswap r9
+ mov QWORD PTR[8+rdi],r8
+ mov QWORD PTR[rdi],r9
+
+ mov rbx,QWORD PTR[16+rsp]
+ lea rsp,QWORD PTR[24+rsp]
+$L$gmult_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_gcm_gmult_4bit::
+gcm_gmult_4bit ENDP
+PUBLIC gcm_ghash_4bit
+
+ALIGN 16
+gcm_ghash_4bit PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_gcm_ghash_4bit::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+
+
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ sub rsp,280
+$L$ghash_prologue::
+ mov r14,rdx
+ mov r15,rcx
+ sub rsi,-128
+ lea rbp,QWORD PTR[((16+128))+rsp]
+ xor edx,edx
+ mov r8,QWORD PTR[((0+0-128))+rsi]
+ mov rax,QWORD PTR[((0+8-128))+rsi]
+ mov dl,al
+ shr rax,4
+ mov r10,r8
+ shr r8,4
+ mov r9,QWORD PTR[((16+0-128))+rsi]
+ shl dl,4
+ mov rbx,QWORD PTR[((16+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[rsp],dl
+ or rax,r10
+ mov dl,bl
+ shr rbx,4
+ mov r10,r9
+ shr r9,4
+ mov QWORD PTR[rbp],r8
+ mov r8,QWORD PTR[((32+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((0-128))+rbp],rax
+ mov rax,QWORD PTR[((32+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[1+rsp],dl
+ or rbx,r10
+ mov dl,al
+ shr rax,4
+ mov r10,r8
+ shr r8,4
+ mov QWORD PTR[8+rbp],r9
+ mov r9,QWORD PTR[((48+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((8-128))+rbp],rbx
+ mov rbx,QWORD PTR[((48+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[2+rsp],dl
+ or rax,r10
+ mov dl,bl
+ shr rbx,4
+ mov r10,r9
+ shr r9,4
+ mov QWORD PTR[16+rbp],r8
+ mov r8,QWORD PTR[((64+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((16-128))+rbp],rax
+ mov rax,QWORD PTR[((64+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[3+rsp],dl
+ or rbx,r10
+ mov dl,al
+ shr rax,4
+ mov r10,r8
+ shr r8,4
+ mov QWORD PTR[24+rbp],r9
+ mov r9,QWORD PTR[((80+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((24-128))+rbp],rbx
+ mov rbx,QWORD PTR[((80+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[4+rsp],dl
+ or rax,r10
+ mov dl,bl
+ shr rbx,4
+ mov r10,r9
+ shr r9,4
+ mov QWORD PTR[32+rbp],r8
+ mov r8,QWORD PTR[((96+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((32-128))+rbp],rax
+ mov rax,QWORD PTR[((96+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[5+rsp],dl
+ or rbx,r10
+ mov dl,al
+ shr rax,4
+ mov r10,r8
+ shr r8,4
+ mov QWORD PTR[40+rbp],r9
+ mov r9,QWORD PTR[((112+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((40-128))+rbp],rbx
+ mov rbx,QWORD PTR[((112+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[6+rsp],dl
+ or rax,r10
+ mov dl,bl
+ shr rbx,4
+ mov r10,r9
+ shr r9,4
+ mov QWORD PTR[48+rbp],r8
+ mov r8,QWORD PTR[((128+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((48-128))+rbp],rax
+ mov rax,QWORD PTR[((128+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[7+rsp],dl
+ or rbx,r10
+ mov dl,al
+ shr rax,4
+ mov r10,r8
+ shr r8,4
+ mov QWORD PTR[56+rbp],r9
+ mov r9,QWORD PTR[((144+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((56-128))+rbp],rbx
+ mov rbx,QWORD PTR[((144+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[8+rsp],dl
+ or rax,r10
+ mov dl,bl
+ shr rbx,4
+ mov r10,r9
+ shr r9,4
+ mov QWORD PTR[64+rbp],r8
+ mov r8,QWORD PTR[((160+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((64-128))+rbp],rax
+ mov rax,QWORD PTR[((160+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[9+rsp],dl
+ or rbx,r10
+ mov dl,al
+ shr rax,4
+ mov r10,r8
+ shr r8,4
+ mov QWORD PTR[72+rbp],r9
+ mov r9,QWORD PTR[((176+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((72-128))+rbp],rbx
+ mov rbx,QWORD PTR[((176+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[10+rsp],dl
+ or rax,r10
+ mov dl,bl
+ shr rbx,4
+ mov r10,r9
+ shr r9,4
+ mov QWORD PTR[80+rbp],r8
+ mov r8,QWORD PTR[((192+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((80-128))+rbp],rax
+ mov rax,QWORD PTR[((192+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[11+rsp],dl
+ or rbx,r10
+ mov dl,al
+ shr rax,4
+ mov r10,r8
+ shr r8,4
+ mov QWORD PTR[88+rbp],r9
+ mov r9,QWORD PTR[((208+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((88-128))+rbp],rbx
+ mov rbx,QWORD PTR[((208+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[12+rsp],dl
+ or rax,r10
+ mov dl,bl
+ shr rbx,4
+ mov r10,r9
+ shr r9,4
+ mov QWORD PTR[96+rbp],r8
+ mov r8,QWORD PTR[((224+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((96-128))+rbp],rax
+ mov rax,QWORD PTR[((224+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[13+rsp],dl
+ or rbx,r10
+ mov dl,al
+ shr rax,4
+ mov r10,r8
+ shr r8,4
+ mov QWORD PTR[104+rbp],r9
+ mov r9,QWORD PTR[((240+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((104-128))+rbp],rbx
+ mov rbx,QWORD PTR[((240+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[14+rsp],dl
+ or rax,r10
+ mov dl,bl
+ shr rbx,4
+ mov r10,r9
+ shr r9,4
+ mov QWORD PTR[112+rbp],r8
+ shl dl,4
+ mov QWORD PTR[((112-128))+rbp],rax
+ shl r10,60
+ mov BYTE PTR[15+rsp],dl
+ or rbx,r10
+ mov QWORD PTR[120+rbp],r9
+ mov QWORD PTR[((120-128))+rbp],rbx
+ add rsi,-128
+ mov r8,QWORD PTR[8+rdi]
+ mov r9,QWORD PTR[rdi]
+ add r15,r14
+ lea r11,QWORD PTR[$L$rem_8bit]
+ jmp $L$outer_loop
+ALIGN 16
+$L$outer_loop::
+ xor r9,QWORD PTR[r14]
+ mov rdx,QWORD PTR[8+r14]
+ lea r14,QWORD PTR[16+r14]
+ xor rdx,r8
+ mov QWORD PTR[rdi],r9
+ mov QWORD PTR[8+rdi],rdx
+ shr rdx,32
+ xor rax,rax
+ rol edx,8
+ mov al,dl
+ movzx ebx,dl
+ shl al,4
+ shr ebx,4
+ rol edx,8
+ mov r8,QWORD PTR[8+rax*1+rsi]
+ mov r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ movzx ecx,dl
+ shl al,4
+ movzx r12,BYTE PTR[rbx*1+rsp]
+ shr ecx,4
+ xor r12,r8
+ mov r10,r9
+ shr r8,8
+ movzx r12,r12b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rbx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rbx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r12,WORD PTR[r12*2+r11]
+ movzx ebx,dl
+ shl al,4
+ movzx r13,BYTE PTR[rcx*1+rsp]
+ shr ebx,4
+ shl r12,48
+ xor r13,r8
+ mov r10,r9
+ xor r9,r12
+ shr r8,8
+ movzx r13,r13b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rcx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rcx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r13,WORD PTR[r13*2+r11]
+ movzx ecx,dl
+ shl al,4
+ movzx r12,BYTE PTR[rbx*1+rsp]
+ shr ecx,4
+ shl r13,48
+ xor r12,r8
+ mov r10,r9
+ xor r9,r13
+ shr r8,8
+ movzx r12,r12b
+ mov edx,DWORD PTR[8+rdi]
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rbx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rbx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r12,WORD PTR[r12*2+r11]
+ movzx ebx,dl
+ shl al,4
+ movzx r13,BYTE PTR[rcx*1+rsp]
+ shr ebx,4
+ shl r12,48
+ xor r13,r8
+ mov r10,r9
+ xor r9,r12
+ shr r8,8
+ movzx r13,r13b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rcx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rcx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r13,WORD PTR[r13*2+r11]
+ movzx ecx,dl
+ shl al,4
+ movzx r12,BYTE PTR[rbx*1+rsp]
+ shr ecx,4
+ shl r13,48
+ xor r12,r8
+ mov r10,r9
+ xor r9,r13
+ shr r8,8
+ movzx r12,r12b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rbx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rbx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r12,WORD PTR[r12*2+r11]
+ movzx ebx,dl
+ shl al,4
+ movzx r13,BYTE PTR[rcx*1+rsp]
+ shr ebx,4
+ shl r12,48
+ xor r13,r8
+ mov r10,r9
+ xor r9,r12
+ shr r8,8
+ movzx r13,r13b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rcx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rcx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r13,WORD PTR[r13*2+r11]
+ movzx ecx,dl
+ shl al,4
+ movzx r12,BYTE PTR[rbx*1+rsp]
+ shr ecx,4
+ shl r13,48
+ xor r12,r8
+ mov r10,r9
+ xor r9,r13
+ shr r8,8
+ movzx r12,r12b
+ mov edx,DWORD PTR[4+rdi]
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rbx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rbx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r12,WORD PTR[r12*2+r11]
+ movzx ebx,dl
+ shl al,4
+ movzx r13,BYTE PTR[rcx*1+rsp]
+ shr ebx,4
+ shl r12,48
+ xor r13,r8
+ mov r10,r9
+ xor r9,r12
+ shr r8,8
+ movzx r13,r13b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rcx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rcx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r13,WORD PTR[r13*2+r11]
+ movzx ecx,dl
+ shl al,4
+ movzx r12,BYTE PTR[rbx*1+rsp]
+ shr ecx,4
+ shl r13,48
+ xor r12,r8
+ mov r10,r9
+ xor r9,r13
+ shr r8,8
+ movzx r12,r12b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rbx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rbx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r12,WORD PTR[r12*2+r11]
+ movzx ebx,dl
+ shl al,4
+ movzx r13,BYTE PTR[rcx*1+rsp]
+ shr ebx,4
+ shl r12,48
+ xor r13,r8
+ mov r10,r9
+ xor r9,r12
+ shr r8,8
+ movzx r13,r13b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rcx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rcx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r13,WORD PTR[r13*2+r11]
+ movzx ecx,dl
+ shl al,4
+ movzx r12,BYTE PTR[rbx*1+rsp]
+ shr ecx,4
+ shl r13,48
+ xor r12,r8
+ mov r10,r9
+ xor r9,r13
+ shr r8,8
+ movzx r12,r12b
+ mov edx,DWORD PTR[rdi]
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rbx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rbx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r12,WORD PTR[r12*2+r11]
+ movzx ebx,dl
+ shl al,4
+ movzx r13,BYTE PTR[rcx*1+rsp]
+ shr ebx,4
+ shl r12,48
+ xor r13,r8
+ mov r10,r9
+ xor r9,r12
+ shr r8,8
+ movzx r13,r13b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rcx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rcx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r13,WORD PTR[r13*2+r11]
+ movzx ecx,dl
+ shl al,4
+ movzx r12,BYTE PTR[rbx*1+rsp]
+ shr ecx,4
+ shl r13,48
+ xor r12,r8
+ mov r10,r9
+ xor r9,r13
+ shr r8,8
+ movzx r12,r12b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rbx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rbx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r12,WORD PTR[r12*2+r11]
+ movzx ebx,dl
+ shl al,4
+ movzx r13,BYTE PTR[rcx*1+rsp]
+ shr ebx,4
+ shl r12,48
+ xor r13,r8
+ mov r10,r9
+ xor r9,r12
+ shr r8,8
+ movzx r13,r13b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rcx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rcx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r13,WORD PTR[r13*2+r11]
+ movzx ecx,dl
+ shl al,4
+ movzx r12,BYTE PTR[rbx*1+rsp]
+ and ecx,240
+ shl r13,48
+ xor r12,r8
+ mov r10,r9
+ xor r9,r13
+ shr r8,8
+ movzx r12,r12b
+ mov edx,DWORD PTR[((-4))+rdi]
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rbx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rbx*8+rbp]
+ movzx r12,WORD PTR[r12*2+r11]
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ shl r12,48
+ xor r8,r10
+ xor r9,r12
+ movzx r13,r8b
+ shr r8,4
+ mov r10,r9
+ shl r13b,4
+ shr r9,4
+ xor r8,QWORD PTR[8+rcx*1+rsi]
+ movzx r13,WORD PTR[r13*2+r11]
+ shl r10,60
+ xor r9,QWORD PTR[rcx*1+rsi]
+ xor r8,r10
+ shl r13,48
+ bswap r8
+ xor r9,r13
+ bswap r9
+ cmp r14,r15
+ jb $L$outer_loop
+ mov QWORD PTR[8+rdi],r8
+ mov QWORD PTR[rdi],r9
+
+ lea rsi,QWORD PTR[280+rsp]
+ mov r15,QWORD PTR[rsi]
+ mov r14,QWORD PTR[8+rsi]
+ mov r13,QWORD PTR[16+rsi]
+ mov r12,QWORD PTR[24+rsi]
+ mov rbp,QWORD PTR[32+rsi]
+ mov rbx,QWORD PTR[40+rsi]
+ lea rsp,QWORD PTR[48+rsi]
+$L$ghash_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_gcm_ghash_4bit::
+gcm_ghash_4bit ENDP
+PUBLIC gcm_init_clmul
+
+ALIGN 16
+gcm_init_clmul PROC PUBLIC
+ movdqu xmm2,XMMWORD PTR[rdx]
+ pshufd xmm2,xmm2,78
+
+
+ pshufd xmm4,xmm2,255
+ movdqa xmm3,xmm2
+ psllq xmm2,1
+ pxor xmm5,xmm5
+ psrlq xmm3,63
+ pcmpgtd xmm5,xmm4
+ pslldq xmm3,8
+ por xmm2,xmm3
+
+
+ pand xmm5,XMMWORD PTR[$L$0x1c2_polynomial]
+ pxor xmm2,xmm5
+
+
+ movdqa xmm0,xmm2
+ movdqa xmm1,xmm0
+ pshufd xmm3,xmm0,78
+ pshufd xmm4,xmm2,78
+ pxor xmm3,xmm0
+ pxor xmm4,xmm2
+DB 102,15,58,68,194,0
+DB 102,15,58,68,202,17
+DB 102,15,58,68,220,0
+ pxor xmm3,xmm0
+ pxor xmm3,xmm1
+
+ movdqa xmm4,xmm3
+ psrldq xmm3,8
+ pslldq xmm4,8
+ pxor xmm1,xmm3
+ pxor xmm0,xmm4
+
+ movdqa xmm3,xmm0
+ psllq xmm0,1
+ pxor xmm0,xmm3
+ psllq xmm0,5
+ pxor xmm0,xmm3
+ psllq xmm0,57
+ movdqa xmm4,xmm0
+ pslldq xmm0,8
+ psrldq xmm4,8
+ pxor xmm0,xmm3
+ pxor xmm1,xmm4
+
+
+ movdqa xmm4,xmm0
+ psrlq xmm0,5
+ pxor xmm0,xmm4
+ psrlq xmm0,1
+ pxor xmm0,xmm4
+ pxor xmm4,xmm1
+ psrlq xmm0,1
+ pxor xmm0,xmm4
+ movdqu XMMWORD PTR[rcx],xmm2
+ movdqu XMMWORD PTR[16+rcx],xmm0
+ DB 0F3h,0C3h ;repret
+gcm_init_clmul ENDP
+PUBLIC gcm_gmult_clmul
+
+ALIGN 16
+gcm_gmult_clmul PROC PUBLIC
+ movdqu xmm0,XMMWORD PTR[rcx]
+ movdqa xmm5,XMMWORD PTR[$L$bswap_mask]
+ movdqu xmm2,XMMWORD PTR[rdx]
+DB 102,15,56,0,197
+ movdqa xmm1,xmm0
+ pshufd xmm3,xmm0,78
+ pshufd xmm4,xmm2,78
+ pxor xmm3,xmm0
+ pxor xmm4,xmm2
+DB 102,15,58,68,194,0
+DB 102,15,58,68,202,17
+DB 102,15,58,68,220,0
+ pxor xmm3,xmm0
+ pxor xmm3,xmm1
+
+ movdqa xmm4,xmm3
+ psrldq xmm3,8
+ pslldq xmm4,8
+ pxor xmm1,xmm3
+ pxor xmm0,xmm4
+
+ movdqa xmm3,xmm0
+ psllq xmm0,1
+ pxor xmm0,xmm3
+ psllq xmm0,5
+ pxor xmm0,xmm3
+ psllq xmm0,57
+ movdqa xmm4,xmm0
+ pslldq xmm0,8
+ psrldq xmm4,8
+ pxor xmm0,xmm3
+ pxor xmm1,xmm4
+
+
+ movdqa xmm4,xmm0
+ psrlq xmm0,5
+ pxor xmm0,xmm4
+ psrlq xmm0,1
+ pxor xmm0,xmm4
+ pxor xmm4,xmm1
+ psrlq xmm0,1
+ pxor xmm0,xmm4
+DB 102,15,56,0,197
+ movdqu XMMWORD PTR[rcx],xmm0
+ DB 0F3h,0C3h ;repret
+gcm_gmult_clmul ENDP
+PUBLIC gcm_ghash_clmul
+
+ALIGN 16
+gcm_ghash_clmul PROC PUBLIC
+$L$SEH_begin_gcm_ghash_clmul::
+
+DB 048h,083h,0ech,058h
+DB 00fh,029h,034h,024h
+DB 00fh,029h,07ch,024h,010h
+DB 044h,00fh,029h,044h,024h,020h
+DB 044h,00fh,029h,04ch,024h,030h
+DB 044h,00fh,029h,054h,024h,040h
+ movdqa xmm5,XMMWORD PTR[$L$bswap_mask]
+
+ movdqu xmm0,XMMWORD PTR[rcx]
+ movdqu xmm2,XMMWORD PTR[rdx]
+DB 102,15,56,0,197
+
+ sub r9,010h
+ jz $L$odd_tail
+
+ movdqu xmm8,XMMWORD PTR[16+rdx]
+
+
+
+
+
+ movdqu xmm3,XMMWORD PTR[r8]
+ movdqu xmm6,XMMWORD PTR[16+r8]
+DB 102,15,56,0,221
+DB 102,15,56,0,245
+ pxor xmm0,xmm3
+ movdqa xmm7,xmm6
+ pshufd xmm3,xmm6,78
+ pshufd xmm4,xmm2,78
+ pxor xmm3,xmm6
+ pxor xmm4,xmm2
+DB 102,15,58,68,242,0
+DB 102,15,58,68,250,17
+DB 102,15,58,68,220,0
+ pxor xmm3,xmm6
+ pxor xmm3,xmm7
+
+ movdqa xmm4,xmm3
+ psrldq xmm3,8
+ pslldq xmm4,8
+ pxor xmm7,xmm3
+ pxor xmm6,xmm4
+ movdqa xmm1,xmm0
+ pshufd xmm3,xmm0,78
+ pshufd xmm4,xmm8,78
+ pxor xmm3,xmm0
+ pxor xmm4,xmm8
+
+ lea r8,QWORD PTR[32+r8]
+ sub r9,020h
+ jbe $L$even_tail
+
+$L$mod_loop::
+DB 102,65,15,58,68,192,0
+DB 102,65,15,58,68,200,17
+DB 102,15,58,68,220,0
+ pxor xmm3,xmm0
+ pxor xmm3,xmm1
+
+ movdqa xmm4,xmm3
+ psrldq xmm3,8
+ pslldq xmm4,8
+ pxor xmm1,xmm3
+ pxor xmm0,xmm4
+ movdqu xmm3,XMMWORD PTR[r8]
+ pxor xmm0,xmm6
+ pxor xmm1,xmm7
+
+ movdqu xmm6,XMMWORD PTR[16+r8]
+DB 102,15,56,0,221
+DB 102,15,56,0,245
+
+ movdqa xmm7,xmm6
+ pshufd xmm9,xmm6,78
+ pshufd xmm10,xmm2,78
+ pxor xmm9,xmm6
+ pxor xmm10,xmm2
+ pxor xmm1,xmm3
+
+ movdqa xmm3,xmm0
+ psllq xmm0,1
+ pxor xmm0,xmm3
+ psllq xmm0,5
+ pxor xmm0,xmm3
+DB 102,15,58,68,242,0
+ psllq xmm0,57
+ movdqa xmm4,xmm0
+ pslldq xmm0,8
+ psrldq xmm4,8
+ pxor xmm0,xmm3
+ pxor xmm1,xmm4
+
+DB 102,15,58,68,250,17
+ movdqa xmm4,xmm0
+ psrlq xmm0,5
+ pxor xmm0,xmm4
+ psrlq xmm0,1
+ pxor xmm0,xmm4
+ pxor xmm4,xmm1
+ psrlq xmm0,1
+ pxor xmm0,xmm4
+
+DB 102,69,15,58,68,202,0
+ movdqa xmm1,xmm0
+ pshufd xmm3,xmm0,78
+ pshufd xmm4,xmm8,78
+ pxor xmm3,xmm0
+ pxor xmm4,xmm8
+
+ pxor xmm9,xmm6
+ pxor xmm9,xmm7
+ movdqa xmm10,xmm9
+ psrldq xmm9,8
+ pslldq xmm10,8
+ pxor xmm7,xmm9
+ pxor xmm6,xmm10
+
+ lea r8,QWORD PTR[32+r8]
+ sub r9,020h
+ ja $L$mod_loop
+
+$L$even_tail::
+DB 102,65,15,58,68,192,0
+DB 102,65,15,58,68,200,17
+DB 102,15,58,68,220,0
+ pxor xmm3,xmm0
+ pxor xmm3,xmm1
+
+ movdqa xmm4,xmm3
+ psrldq xmm3,8
+ pslldq xmm4,8
+ pxor xmm1,xmm3
+ pxor xmm0,xmm4
+ pxor xmm0,xmm6
+ pxor xmm1,xmm7
+
+ movdqa xmm3,xmm0
+ psllq xmm0,1
+ pxor xmm0,xmm3
+ psllq xmm0,5
+ pxor xmm0,xmm3
+ psllq xmm0,57
+ movdqa xmm4,xmm0
+ pslldq xmm0,8
+ psrldq xmm4,8
+ pxor xmm0,xmm3
+ pxor xmm1,xmm4
+
+
+ movdqa xmm4,xmm0
+ psrlq xmm0,5
+ pxor xmm0,xmm4
+ psrlq xmm0,1
+ pxor xmm0,xmm4
+ pxor xmm4,xmm1
+ psrlq xmm0,1
+ pxor xmm0,xmm4
+ test r9,r9
+ jnz $L$done
+
+$L$odd_tail::
+ movdqu xmm3,XMMWORD PTR[r8]
+DB 102,15,56,0,221
+ pxor xmm0,xmm3
+ movdqa xmm1,xmm0
+ pshufd xmm3,xmm0,78
+ pshufd xmm4,xmm2,78
+ pxor xmm3,xmm0
+ pxor xmm4,xmm2
+DB 102,15,58,68,194,0
+DB 102,15,58,68,202,17
+DB 102,15,58,68,220,0
+ pxor xmm3,xmm0
+ pxor xmm3,xmm1
+
+ movdqa xmm4,xmm3
+ psrldq xmm3,8
+ pslldq xmm4,8
+ pxor xmm1,xmm3
+ pxor xmm0,xmm4
+
+ movdqa xmm3,xmm0
+ psllq xmm0,1
+ pxor xmm0,xmm3
+ psllq xmm0,5
+ pxor xmm0,xmm3
+ psllq xmm0,57
+ movdqa xmm4,xmm0
+ pslldq xmm0,8
+ psrldq xmm4,8
+ pxor xmm0,xmm3
+ pxor xmm1,xmm4
+
+
+ movdqa xmm4,xmm0
+ psrlq xmm0,5
+ pxor xmm0,xmm4
+ psrlq xmm0,1
+ pxor xmm0,xmm4
+ pxor xmm4,xmm1
+ psrlq xmm0,1
+ pxor xmm0,xmm4
+$L$done::
+DB 102,15,56,0,197
+ movdqu XMMWORD PTR[rcx],xmm0
+ movaps xmm6,XMMWORD PTR[rsp]
+ movaps xmm7,XMMWORD PTR[16+rsp]
+ movaps xmm8,XMMWORD PTR[32+rsp]
+ movaps xmm9,XMMWORD PTR[48+rsp]
+ movaps xmm10,XMMWORD PTR[64+rsp]
+ add rsp,058h
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_gcm_ghash_clmul::
+gcm_ghash_clmul ENDP
+ALIGN 64
+$L$bswap_mask::
+DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+$L$0x1c2_polynomial::
+DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0c2h
+ALIGN 64
+
+$L$rem_4bit::
+ DD 0,0,0,471859200,0,943718400,0,610271232
+ DD 0,1887436800,0,1822425088,0,1220542464,0,1423966208
+ DD 0,3774873600,0,4246732800,0,3644850176,0,3311403008
+ DD 0,2441084928,0,2376073216,0,2847932416,0,3051356160
+
+$L$rem_8bit::
+ DW 00000h,001C2h,00384h,00246h,00708h,006CAh,0048Ch,0054Eh
+ DW 00E10h,00FD2h,00D94h,00C56h,00918h,008DAh,00A9Ch,00B5Eh
+ DW 01C20h,01DE2h,01FA4h,01E66h,01B28h,01AEAh,018ACh,0196Eh
+ DW 01230h,013F2h,011B4h,01076h,01538h,014FAh,016BCh,0177Eh
+ DW 03840h,03982h,03BC4h,03A06h,03F48h,03E8Ah,03CCCh,03D0Eh
+ DW 03650h,03792h,035D4h,03416h,03158h,0309Ah,032DCh,0331Eh
+ DW 02460h,025A2h,027E4h,02626h,02368h,022AAh,020ECh,0212Eh
+ DW 02A70h,02BB2h,029F4h,02836h,02D78h,02CBAh,02EFCh,02F3Eh
+ DW 07080h,07142h,07304h,072C6h,07788h,0764Ah,0740Ch,075CEh
+ DW 07E90h,07F52h,07D14h,07CD6h,07998h,0785Ah,07A1Ch,07BDEh
+ DW 06CA0h,06D62h,06F24h,06EE6h,06BA8h,06A6Ah,0682Ch,069EEh
+ DW 062B0h,06372h,06134h,060F6h,065B8h,0647Ah,0663Ch,067FEh
+ DW 048C0h,04902h,04B44h,04A86h,04FC8h,04E0Ah,04C4Ch,04D8Eh
+ DW 046D0h,04712h,04554h,04496h,041D8h,0401Ah,0425Ch,0439Eh
+ DW 054E0h,05522h,05764h,056A6h,053E8h,0522Ah,0506Ch,051AEh
+ DW 05AF0h,05B32h,05974h,058B6h,05DF8h,05C3Ah,05E7Ch,05FBEh
+ DW 0E100h,0E0C2h,0E284h,0E346h,0E608h,0E7CAh,0E58Ch,0E44Eh
+ DW 0EF10h,0EED2h,0EC94h,0ED56h,0E818h,0E9DAh,0EB9Ch,0EA5Eh
+ DW 0FD20h,0FCE2h,0FEA4h,0FF66h,0FA28h,0FBEAh,0F9ACh,0F86Eh
+ DW 0F330h,0F2F2h,0F0B4h,0F176h,0F438h,0F5FAh,0F7BCh,0F67Eh
+ DW 0D940h,0D882h,0DAC4h,0DB06h,0DE48h,0DF8Ah,0DDCCh,0DC0Eh
+ DW 0D750h,0D692h,0D4D4h,0D516h,0D058h,0D19Ah,0D3DCh,0D21Eh
+ DW 0C560h,0C4A2h,0C6E4h,0C726h,0C268h,0C3AAh,0C1ECh,0C02Eh
+ DW 0CB70h,0CAB2h,0C8F4h,0C936h,0CC78h,0CDBAh,0CFFCh,0CE3Eh
+ DW 09180h,09042h,09204h,093C6h,09688h,0974Ah,0950Ch,094CEh
+ DW 09F90h,09E52h,09C14h,09DD6h,09898h,0995Ah,09B1Ch,09ADEh
+ DW 08DA0h,08C62h,08E24h,08FE6h,08AA8h,08B6Ah,0892Ch,088EEh
+ DW 083B0h,08272h,08034h,081F6h,084B8h,0857Ah,0873Ch,086FEh
+ DW 0A9C0h,0A802h,0AA44h,0AB86h,0AEC8h,0AF0Ah,0AD4Ch,0AC8Eh
+ DW 0A7D0h,0A612h,0A454h,0A596h,0A0D8h,0A11Ah,0A35Ch,0A29Eh
+ DW 0B5E0h,0B422h,0B664h,0B7A6h,0B2E8h,0B32Ah,0B16Ch,0B0AEh
+ DW 0BBF0h,0BA32h,0B874h,0B9B6h,0BCF8h,0BD3Ah,0BF7Ch,0BEBEh
+
+DB 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52
+DB 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
+DB 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
+DB 114,103,62,0
+ALIGN 64
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$in_prologue
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$in_prologue
+
+ lea rax,QWORD PTR[24+rax]
+
+ mov rbx,QWORD PTR[((-8))+rax]
+ mov rbp,QWORD PTR[((-16))+rax]
+ mov r12,QWORD PTR[((-24))+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+
+$L$in_prologue::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+se_handler ENDP
+
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$SEH_begin_gcm_gmult_4bit
+ DD imagerel $L$SEH_end_gcm_gmult_4bit
+ DD imagerel $L$SEH_info_gcm_gmult_4bit
+
+ DD imagerel $L$SEH_begin_gcm_ghash_4bit
+ DD imagerel $L$SEH_end_gcm_ghash_4bit
+ DD imagerel $L$SEH_info_gcm_ghash_4bit
+
+ DD imagerel $L$SEH_begin_gcm_ghash_clmul
+ DD imagerel $L$SEH_end_gcm_ghash_clmul
+ DD imagerel $L$SEH_info_gcm_ghash_clmul
+
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$SEH_info_gcm_gmult_4bit::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$gmult_prologue,imagerel $L$gmult_epilogue
+$L$SEH_info_gcm_ghash_4bit::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$ghash_prologue,imagerel $L$ghash_epilogue
+$L$SEH_info_gcm_ghash_clmul::
+DB 001h,01fh,00bh,000h
+DB 01fh,0a8h,004h,000h
+DB 019h,098h,003h,000h
+DB 013h,088h,002h,000h
+DB 00dh,078h,001h,000h
+DB 008h,068h,000h,000h
+DB 004h,0a2h,000h,000h
+
+.xdata ENDS
+END
+
diff --git a/ext/libressl/crypto/modes/ghash-mingw64-x86_64.S b/ext/libressl/crypto/modes/ghash-mingw64-x86_64.S
new file mode 100644
index 0000000..cd0823b
--- /dev/null
+++ b/ext/libressl/crypto/modes/ghash-mingw64-x86_64.S
@@ -0,0 +1,1175 @@
+#include "x86_arch.h"
+.text
+
+.globl gcm_gmult_4bit
+.def gcm_gmult_4bit; .scl 2; .type 32; .endef
+.p2align 4
+gcm_gmult_4bit:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_gcm_gmult_4bit:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+.Lgmult_prologue:
+
+ movzbq 15(%rdi),%r8
+ leaq .Lrem_4bit(%rip),%r11
+ xorq %rax,%rax
+ xorq %rbx,%rbx
+ movb %r8b,%al
+ movb %r8b,%bl
+ shlb $4,%al
+ movq $14,%rcx
+ movq 8(%rsi,%rax,1),%r8
+ movq (%rsi,%rax,1),%r9
+ andb $240,%bl
+ movq %r8,%rdx
+ jmp .Loop1
+
+.p2align 4
+.Loop1:
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ movb (%rdi,%rcx,1),%al
+ shrq $4,%r9
+ xorq 8(%rsi,%rbx,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rbx,1),%r9
+ movb %al,%bl
+ xorq (%r11,%rdx,8),%r9
+ movq %r8,%rdx
+ shlb $4,%al
+ xorq %r10,%r8
+ decq %rcx
+ js .Lbreak1
+
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ shrq $4,%r9
+ xorq 8(%rsi,%rax,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rax,1),%r9
+ andb $240,%bl
+ xorq (%r11,%rdx,8),%r9
+ movq %r8,%rdx
+ xorq %r10,%r8
+ jmp .Loop1
+
+.p2align 4
+.Lbreak1:
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ shrq $4,%r9
+ xorq 8(%rsi,%rax,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rax,1),%r9
+ andb $240,%bl
+ xorq (%r11,%rdx,8),%r9
+ movq %r8,%rdx
+ xorq %r10,%r8
+
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ shrq $4,%r9
+ xorq 8(%rsi,%rbx,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rbx,1),%r9
+ xorq %r10,%r8
+ xorq (%r11,%rdx,8),%r9
+
+ bswapq %r8
+ bswapq %r9
+ movq %r8,8(%rdi)
+ movq %r9,(%rdi)
+
+ movq 16(%rsp),%rbx
+ leaq 24(%rsp),%rsp
+.Lgmult_epilogue:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_gcm_gmult_4bit:
+.globl gcm_ghash_4bit
+.def gcm_ghash_4bit; .scl 2; .type 32; .endef
+.p2align 4
+gcm_ghash_4bit:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_gcm_ghash_4bit:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $280,%rsp
+.Lghash_prologue:
+ movq %rdx,%r14
+ movq %rcx,%r15
+ subq $-128,%rsi
+ leaq 16+128(%rsp),%rbp
+ xorl %edx,%edx
+ movq 0+0-128(%rsi),%r8
+ movq 0+8-128(%rsi),%rax
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq 16+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq 16+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,0(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,0(%rbp)
+ movq 32+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,0-128(%rbp)
+ movq 32+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,1(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,8(%rbp)
+ movq 48+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,8-128(%rbp)
+ movq 48+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,2(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,16(%rbp)
+ movq 64+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,16-128(%rbp)
+ movq 64+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,3(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,24(%rbp)
+ movq 80+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,24-128(%rbp)
+ movq 80+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,4(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,32(%rbp)
+ movq 96+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,32-128(%rbp)
+ movq 96+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,5(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,40(%rbp)
+ movq 112+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,40-128(%rbp)
+ movq 112+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,6(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,48(%rbp)
+ movq 128+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,48-128(%rbp)
+ movq 128+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,7(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,56(%rbp)
+ movq 144+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,56-128(%rbp)
+ movq 144+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,8(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,64(%rbp)
+ movq 160+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,64-128(%rbp)
+ movq 160+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,9(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,72(%rbp)
+ movq 176+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,72-128(%rbp)
+ movq 176+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,10(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,80(%rbp)
+ movq 192+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,80-128(%rbp)
+ movq 192+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,11(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,88(%rbp)
+ movq 208+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,88-128(%rbp)
+ movq 208+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,12(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,96(%rbp)
+ movq 224+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,96-128(%rbp)
+ movq 224+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,13(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,104(%rbp)
+ movq 240+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,104-128(%rbp)
+ movq 240+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,14(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,112(%rbp)
+ shlb $4,%dl
+ movq %rax,112-128(%rbp)
+ shlq $60,%r10
+ movb %dl,15(%rsp)
+ orq %r10,%rbx
+ movq %r9,120(%rbp)
+ movq %rbx,120-128(%rbp)
+ addq $-128,%rsi
+ movq 8(%rdi),%r8
+ movq 0(%rdi),%r9
+ addq %r14,%r15
+ leaq .Lrem_8bit(%rip),%r11
+ jmp .Louter_loop
+.p2align 4
+.Louter_loop:
+ xorq (%r14),%r9
+ movq 8(%r14),%rdx
+ leaq 16(%r14),%r14
+ xorq %r8,%rdx
+ movq %r9,(%rdi)
+ movq %rdx,8(%rdi)
+ shrq $32,%rdx
+ xorq %rax,%rax
+ roll $8,%edx
+ movb %dl,%al
+ movzbl %dl,%ebx
+ shlb $4,%al
+ shrl $4,%ebx
+ roll $8,%edx
+ movq 8(%rsi,%rax,1),%r8
+ movq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ xorq %r8,%r12
+ movq %r9,%r10
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl 8(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl 4(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl 0(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ andl $240,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl -4(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ movzwq (%r11,%r12,2),%r12
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ shlq $48,%r12
+ xorq %r10,%r8
+ xorq %r12,%r9
+ movzbq %r8b,%r13
+ shrq $4,%r8
+ movq %r9,%r10
+ shlb $4,%r13b
+ shrq $4,%r9
+ xorq 8(%rsi,%rcx,1),%r8
+ movzwq (%r11,%r13,2),%r13
+ shlq $60,%r10
+ xorq (%rsi,%rcx,1),%r9
+ xorq %r10,%r8
+ shlq $48,%r13
+ bswapq %r8
+ xorq %r13,%r9
+ bswapq %r9
+ cmpq %r15,%r14
+ jb .Louter_loop
+ movq %r8,8(%rdi)
+ movq %r9,(%rdi)
+
+ leaq 280(%rsp),%rsi
+ movq 0(%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lghash_epilogue:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_gcm_ghash_4bit:
+.globl gcm_init_clmul
+.def gcm_init_clmul; .scl 2; .type 32; .endef
+.p2align 4
+gcm_init_clmul:
+ movdqu (%rdx),%xmm2
+ pshufd $78,%xmm2,%xmm2
+
+
+ pshufd $255,%xmm2,%xmm4
+ movdqa %xmm2,%xmm3
+ psllq $1,%xmm2
+ pxor %xmm5,%xmm5
+ psrlq $63,%xmm3
+ pcmpgtd %xmm4,%xmm5
+ pslldq $8,%xmm3
+ por %xmm3,%xmm2
+
+
+ pand .L0x1c2_polynomial(%rip),%xmm5
+ pxor %xmm5,%xmm2
+
+
+ movdqa %xmm2,%xmm0
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+
+
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ movdqu %xmm2,(%rcx)
+ movdqu %xmm0,16(%rcx)
+ retq
+
+.globl gcm_gmult_clmul
+.def gcm_gmult_clmul; .scl 2; .type 32; .endef
+.p2align 4
+gcm_gmult_clmul:
+ movdqu (%rcx),%xmm0
+ movdqa .Lbswap_mask(%rip),%xmm5
+ movdqu (%rdx),%xmm2
+.byte 102,15,56,0,197
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+
+
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,197
+ movdqu %xmm0,(%rcx)
+ retq
+
+.globl gcm_ghash_clmul
+.def gcm_ghash_clmul; .scl 2; .type 32; .endef
+.p2align 4
+gcm_ghash_clmul:
+.LSEH_begin_gcm_ghash_clmul:
+
+.byte 0x48,0x83,0xec,0x58
+.byte 0x0f,0x29,0x34,0x24
+.byte 0x0f,0x29,0x7c,0x24,0x10
+.byte 0x44,0x0f,0x29,0x44,0x24,0x20
+.byte 0x44,0x0f,0x29,0x4c,0x24,0x30
+.byte 0x44,0x0f,0x29,0x54,0x24,0x40
+ movdqa .Lbswap_mask(%rip),%xmm5
+
+ movdqu (%rcx),%xmm0
+ movdqu (%rdx),%xmm2
+.byte 102,15,56,0,197
+
+ subq $16,%r9
+ jz .Lodd_tail
+
+ movdqu 16(%rdx),%xmm8
+
+
+
+
+
+ movdqu (%r8),%xmm3
+ movdqu 16(%r8),%xmm6
+.byte 102,15,56,0,221
+.byte 102,15,56,0,245
+ pxor %xmm3,%xmm0
+ movdqa %xmm6,%xmm7
+ pshufd $78,%xmm6,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm6,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,242,0
+.byte 102,15,58,68,250,17
+.byte 102,15,58,68,220,0
+ pxor %xmm6,%xmm3
+ pxor %xmm7,%xmm3
+
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm7
+ pxor %xmm4,%xmm6
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm8,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm8,%xmm4
+
+ leaq 32(%r8),%r8
+ subq $32,%r9
+ jbe .Leven_tail
+
+.Lmod_loop:
+.byte 102,65,15,58,68,192,0
+.byte 102,65,15,58,68,200,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ movdqu (%r8),%xmm3
+ pxor %xmm6,%xmm0
+ pxor %xmm7,%xmm1
+
+ movdqu 16(%r8),%xmm6
+.byte 102,15,56,0,221
+.byte 102,15,56,0,245
+
+ movdqa %xmm6,%xmm7
+ pshufd $78,%xmm6,%xmm9
+ pshufd $78,%xmm2,%xmm10
+ pxor %xmm6,%xmm9
+ pxor %xmm2,%xmm10
+ pxor %xmm3,%xmm1
+
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+.byte 102,15,58,68,242,0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+
+.byte 102,15,58,68,250,17
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+
+.byte 102,69,15,58,68,202,0
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm8,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm8,%xmm4
+
+ pxor %xmm6,%xmm9
+ pxor %xmm7,%xmm9
+ movdqa %xmm9,%xmm10
+ psrldq $8,%xmm9
+ pslldq $8,%xmm10
+ pxor %xmm9,%xmm7
+ pxor %xmm10,%xmm6
+
+ leaq 32(%r8),%r8
+ subq $32,%r9
+ ja .Lmod_loop
+
+.Leven_tail:
+.byte 102,65,15,58,68,192,0
+.byte 102,65,15,58,68,200,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm6,%xmm0
+ pxor %xmm7,%xmm1
+
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+
+
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ testq %r9,%r9
+ jnz .Ldone
+
+.Lodd_tail:
+ movdqu (%r8),%xmm3
+.byte 102,15,56,0,221
+ pxor %xmm3,%xmm0
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+
+
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+.Ldone:
+.byte 102,15,56,0,197
+ movdqu %xmm0,(%rcx)
+ movaps (%rsp),%xmm6
+ movaps 16(%rsp),%xmm7
+ movaps 32(%rsp),%xmm8
+ movaps 48(%rsp),%xmm9
+ movaps 64(%rsp),%xmm10
+ addq $88,%rsp
+ retq
+.LSEH_end_gcm_ghash_clmul:
+
+.p2align 6
+.Lbswap_mask:
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.L0x1c2_polynomial:
+.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
+.p2align 6
+
+.Lrem_4bit:
+.long 0,0,0,471859200,0,943718400,0,610271232
+.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
+.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
+.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
+
+.Lrem_8bit:
+.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
+.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
+.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
+.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
+.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
+.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
+.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
+.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
+.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
+.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
+.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
+.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
+.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
+.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
+.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
+.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
+.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
+.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
+.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
+.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
+.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
+.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
+.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
+.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
+.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
+.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
+.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
+.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
+.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
+.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
+.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
+.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
+
+.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align 6
+
+.def se_handler; .scl 3; .type 32; .endef
+.p2align 4
+se_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+
+ movq 8(%r9),%rsi
+ movq 56(%r9),%r11
+
+ movl 0(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jb .Lin_prologue
+
+ movq 152(%r8),%rax
+
+ movl 4(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jae .Lin_prologue
+
+ leaq 24(%rax),%rax
+
+ movq -8(%rax),%rbx
+ movq -16(%rax),%rbp
+ movq -24(%rax),%r12
+ movq %rbx,144(%r8)
+ movq %rbp,160(%r8)
+ movq %r12,216(%r8)
+
+.Lin_prologue:
+ movq 8(%rax),%rdi
+ movq 16(%rax),%rsi
+ movq %rax,152(%r8)
+ movq %rsi,168(%r8)
+ movq %rdi,176(%r8)
+
+ movq 40(%r9),%rdi
+ movq %r8,%rsi
+ movl $154,%ecx
+.long 0xa548f3fc
+
+ movq %r9,%rsi
+ xorq %rcx,%rcx
+ movq 8(%rsi),%rdx
+ movq 0(%rsi),%r8
+ movq 16(%rsi),%r9
+ movq 40(%rsi),%r10
+ leaq 56(%rsi),%r11
+ leaq 24(%rsi),%r12
+ movq %r10,32(%rsp)
+ movq %r11,40(%rsp)
+ movq %r12,48(%rsp)
+ movq %rcx,56(%rsp)
+ call *__imp_RtlVirtualUnwind(%rip)
+
+ movl $1,%eax
+ addq $64,%rsp
+ popfq
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ popq %rdi
+ popq %rsi
+ retq
+
+
+.section .pdata
+.p2align 2
+.rva .LSEH_begin_gcm_gmult_4bit
+.rva .LSEH_end_gcm_gmult_4bit
+.rva .LSEH_info_gcm_gmult_4bit
+
+.rva .LSEH_begin_gcm_ghash_4bit
+.rva .LSEH_end_gcm_ghash_4bit
+.rva .LSEH_info_gcm_ghash_4bit
+
+.rva .LSEH_begin_gcm_ghash_clmul
+.rva .LSEH_end_gcm_ghash_clmul
+.rva .LSEH_info_gcm_ghash_clmul
+
+.section .xdata
+.p2align 3
+.LSEH_info_gcm_gmult_4bit:
+.byte 9,0,0,0
+.rva se_handler
+.rva .Lgmult_prologue,.Lgmult_epilogue
+.LSEH_info_gcm_ghash_4bit:
+.byte 9,0,0,0
+.rva se_handler
+.rva .Lghash_prologue,.Lghash_epilogue
+.LSEH_info_gcm_ghash_clmul:
+.byte 0x01,0x1f,0x0b,0x00
+.byte 0x1f,0xa8,0x04,0x00
+.byte 0x19,0x98,0x03,0x00
+.byte 0x13,0x88,0x02,0x00
+.byte 0x0d,0x78,0x01,0x00
+.byte 0x08,0x68,0x00,0x00
+.byte 0x04,0xa2,0x00,0x00
diff --git a/ext/libressl/crypto/modes/modes_lcl.h b/ext/libressl/crypto/modes/modes_lcl.h
new file mode 100644
index 0000000..bfea189
--- /dev/null
+++ b/ext/libressl/crypto/modes/modes_lcl.h
@@ -0,0 +1,113 @@
+/* $OpenBSD: modes_lcl.h,v 1.10 2016/12/21 15:49:29 jsing Exp $ */
+/* ====================================================================
+ * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use is governed by OpenSSL license.
+ * ====================================================================
+ */
+
+#include <machine/endian.h>
+
+#include <openssl/opensslconf.h>
+
+#include <openssl/modes.h>
+
+__BEGIN_HIDDEN_DECLS
+
+#if defined(_LP64)
+typedef long i64;
+typedef unsigned long u64;
+#define U64(C) C##UL
+#else
+typedef long long i64;
+typedef unsigned long long u64;
+#define U64(C) C##ULL
+#endif
+
+typedef unsigned int u32;
+typedef unsigned char u8;
+
+#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
+#if defined(__GNUC__) && __GNUC__>=2
+# if defined(__x86_64) || defined(__x86_64__)
+# define BSWAP8(x) ({ u64 ret=(x); \
+ asm ("bswapq %0" \
+ : "+r"(ret)); ret; })
+# define BSWAP4(x) ({ u32 ret=(x); \
+ asm ("bswapl %0" \
+ : "+r"(ret)); ret; })
+# elif (defined(__i386) || defined(__i386__))
+# define BSWAP8(x) ({ u32 lo=(u64)(x)>>32,hi=(x); \
+ asm ("bswapl %0; bswapl %1" \
+ : "+r"(hi),"+r"(lo)); \
+ (u64)hi<<32|lo; })
+# define BSWAP4(x) ({ u32 ret=(x); \
+ asm ("bswapl %0" \
+ : "+r"(ret)); ret; })
+# elif (defined(__arm__) || defined(__arm)) && !defined(__STRICT_ALIGNMENT)
+# if (__ARM_ARCH >= 6)
+# define BSWAP8(x) ({ u32 lo=(u64)(x)>>32,hi=(x); \
+ asm ("rev %0,%0; rev %1,%1" \
+ : "+r"(hi),"+r"(lo)); \
+ (u64)hi<<32|lo; })
+# define BSWAP4(x) ({ u32 ret; \
+ asm ("rev %0,%1" \
+ : "=r"(ret) : "r"((u32)(x))); \
+ ret; })
+# endif
+# endif
+#endif
+#endif
+
+#if defined(BSWAP4) && !defined(__STRICT_ALIGNMENT)
+#define GETU32(p) BSWAP4(*(const u32 *)(p))
+#define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
+#else
+#define GETU32(p) ((u32)(p)[0]<<24|(u32)(p)[1]<<16|(u32)(p)[2]<<8|(u32)(p)[3])
+#define PUTU32(p,v) ((p)[0]=(u8)((v)>>24),(p)[1]=(u8)((v)>>16),(p)[2]=(u8)((v)>>8),(p)[3]=(u8)(v))
+#endif
+
+/* GCM definitions */
+
+typedef struct { u64 hi,lo; } u128;
+
+#ifdef TABLE_BITS
+#undef TABLE_BITS
+#endif
+/*
+ * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
+ * never be set to 8 [or 1]. For further information see gcm128.c.
+ */
+#define TABLE_BITS 4
+
+struct gcm128_context {
+ /* Following 6 names follow names in GCM specification */
+ union { u64 u[2]; u32 d[4]; u8 c[16]; size_t t[16/sizeof(size_t)]; }
+ Yi,EKi,EK0,len,Xi,H;
+ /* Relative position of Xi, H and pre-computed Htable is used
+ * in some assembler modules, i.e. don't change the order! */
+#if TABLE_BITS==8
+ u128 Htable[256];
+#else
+ u128 Htable[16];
+ void (*gmult)(u64 Xi[2],const u128 Htable[16]);
+ void (*ghash)(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
+#endif
+ unsigned int mres, ares;
+ block128_f block;
+ void *key;
+};
+
+struct xts128_context {
+ void *key1, *key2;
+ block128_f block1,block2;
+};
+
+struct ccm128_context {
+ union { u64 u[2]; u8 c[16]; } nonce, cmac;
+ u64 blocks;
+ block128_f block;
+ void *key;
+};
+
+__END_HIDDEN_DECLS
diff --git a/ext/libressl/crypto/modes/ofb128.c b/ext/libressl/crypto/modes/ofb128.c
new file mode 100644
index 0000000..c6ca67a
--- /dev/null
+++ b/ext/libressl/crypto/modes/ofb128.c
@@ -0,0 +1,119 @@
+/* $OpenBSD: ofb128.c,v 1.4 2015/02/10 09:46:30 miod Exp $ */
+/* ====================================================================
+ * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+#include <openssl/crypto.h>
+#include "modes_lcl.h"
+#include <string.h>
+
+#ifndef MODES_DEBUG
+# ifndef NDEBUG
+# define NDEBUG
+# endif
+#endif
+
+/* The input and output encrypted as though 128bit ofb mode is being
+ * used. The extra state information to record how much of the
+ * 128bit block we have used is contained in *num;
+ */
+void CRYPTO_ofb128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], int *num,
+ block128_f block)
+{
+ unsigned int n;
+ size_t l=0;
+
+ n = *num;
+
+#if !defined(OPENSSL_SMALL_FOOTPRINT)
+ if (16%sizeof(size_t) == 0) do { /* always true actually */
+ while (n && len) {
+ *(out++) = *(in++) ^ ivec[n];
+ --len;
+ n = (n+1) % 16;
+ }
+#ifdef __STRICT_ALIGNMENT
+ if (((size_t)in|(size_t)out|(size_t)ivec)%sizeof(size_t) != 0)
+ break;
+#endif
+ while (len>=16) {
+ (*block)(ivec, ivec, key);
+ for (; n<16; n+=sizeof(size_t))
+ *(size_t*)(out+n) =
+ *(size_t*)(in+n) ^ *(size_t*)(ivec+n);
+ len -= 16;
+ out += 16;
+ in += 16;
+ n = 0;
+ }
+ if (len) {
+ (*block)(ivec, ivec, key);
+ while (len--) {
+ out[n] = in[n] ^ ivec[n];
+ ++n;
+ }
+ }
+ *num = n;
+ return;
+ } while(0);
+ /* the rest would be commonly eliminated by x86* compiler */
+#endif
+ while (l<len) {
+ if (n==0) {
+ (*block)(ivec, ivec, key);
+ }
+ out[l] = in[l] ^ ivec[n];
+ ++l;
+ n = (n+1) % 16;
+ }
+
+ *num=n;
+}
diff --git a/ext/libressl/crypto/modes/xts128.c b/ext/libressl/crypto/modes/xts128.c
new file mode 100644
index 0000000..e40505e
--- /dev/null
+++ b/ext/libressl/crypto/modes/xts128.c
@@ -0,0 +1,185 @@
+/* $OpenBSD: xts128.c,v 1.7 2017/08/13 17:46:24 bcook Exp $ */
+/* ====================================================================
+ * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ */
+
+#include <machine/endian.h>
+#include <openssl/crypto.h>
+#include "modes_lcl.h"
+#include <string.h>
+
+#ifndef MODES_DEBUG
+# ifndef NDEBUG
+# define NDEBUG
+# endif
+#endif
+
+int CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
+ const unsigned char *inp, unsigned char *out,
+ size_t len, int enc)
+{
+ union { u64 u[2]; u32 d[4]; u8 c[16]; } tweak, scratch;
+ unsigned int i;
+
+ if (len<16) return -1;
+
+ memcpy(tweak.c, iv, 16);
+
+ (*ctx->block2)(tweak.c,tweak.c,ctx->key2);
+
+ if (!enc && (len%16)) len-=16;
+
+ while (len>=16) {
+#ifdef __STRICT_ALIGNMENT
+ memcpy(scratch.c,inp,16);
+ scratch.u[0] ^= tweak.u[0];
+ scratch.u[1] ^= tweak.u[1];
+#else
+ scratch.u[0] = ((u64*)inp)[0]^tweak.u[0];
+ scratch.u[1] = ((u64*)inp)[1]^tweak.u[1];
+#endif
+ (*ctx->block1)(scratch.c,scratch.c,ctx->key1);
+#ifdef __STRICT_ALIGNMENT
+ scratch.u[0] ^= tweak.u[0];
+ scratch.u[1] ^= tweak.u[1];
+ memcpy(out,scratch.c,16);
+#else
+ ((u64*)out)[0] = scratch.u[0]^=tweak.u[0];
+ ((u64*)out)[1] = scratch.u[1]^=tweak.u[1];
+#endif
+ inp += 16;
+ out += 16;
+ len -= 16;
+
+ if (len==0) return 0;
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+ unsigned int carry,res;
+
+ res = 0x87&(((int)tweak.d[3])>>31);
+ carry = (unsigned int)(tweak.u[0]>>63);
+ tweak.u[0] = (tweak.u[0]<<1)^res;
+ tweak.u[1] = (tweak.u[1]<<1)|carry;
+#else /* BIG_ENDIAN */
+ size_t c;
+
+ for (c=0,i=0;i<16;++i) {
+ /*+ substitutes for |, because c is 1 bit */
+ c += ((size_t)tweak.c[i])<<1;
+ tweak.c[i] = (u8)c;
+ c = c>>8;
+ }
+ tweak.c[0] ^= (u8)(0x87&(0-c));
+#endif
+ }
+ if (enc) {
+ for (i=0;i<len;++i) {
+ u8 c = inp[i];
+ out[i] = scratch.c[i];
+ scratch.c[i] = c;
+ }
+ scratch.u[0] ^= tweak.u[0];
+ scratch.u[1] ^= tweak.u[1];
+ (*ctx->block1)(scratch.c,scratch.c,ctx->key1);
+ scratch.u[0] ^= tweak.u[0];
+ scratch.u[1] ^= tweak.u[1];
+ memcpy(out-16,scratch.c,16);
+ }
+ else {
+ union { u64 u[2]; u8 c[16]; } tweak1;
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+ unsigned int carry,res;
+
+ res = 0x87&(((int)tweak.d[3])>>31);
+ carry = (unsigned int)(tweak.u[0]>>63);
+ tweak1.u[0] = (tweak.u[0]<<1)^res;
+ tweak1.u[1] = (tweak.u[1]<<1)|carry;
+#else
+ size_t c;
+
+ for (c=0,i=0;i<16;++i) {
+ /*+ substitutes for |, because c is 1 bit */
+ c += ((size_t)tweak.c[i])<<1;
+ tweak1.c[i] = (u8)c;
+ c = c>>8;
+ }
+ tweak1.c[0] ^= (u8)(0x87&(0-c));
+#endif
+#ifdef __STRICT_ALIGNMENT
+ memcpy(scratch.c,inp,16);
+ scratch.u[0] ^= tweak1.u[0];
+ scratch.u[1] ^= tweak1.u[1];
+#else
+ scratch.u[0] = ((u64*)inp)[0]^tweak1.u[0];
+ scratch.u[1] = ((u64*)inp)[1]^tweak1.u[1];
+#endif
+ (*ctx->block1)(scratch.c,scratch.c,ctx->key1);
+ scratch.u[0] ^= tweak1.u[0];
+ scratch.u[1] ^= tweak1.u[1];
+
+ for (i=0;i<len;++i) {
+ u8 c = inp[16+i];
+ out[16+i] = scratch.c[i];
+ scratch.c[i] = c;
+ }
+ scratch.u[0] ^= tweak.u[0];
+ scratch.u[1] ^= tweak.u[1];
+ (*ctx->block1)(scratch.c,scratch.c,ctx->key1);
+#ifdef __STRICT_ALIGNMENT
+ scratch.u[0] ^= tweak.u[0];
+ scratch.u[1] ^= tweak.u[1];
+ memcpy (out,scratch.c,16);
+#else
+ ((u64*)out)[0] = scratch.u[0]^tweak.u[0];
+ ((u64*)out)[1] = scratch.u[1]^tweak.u[1];
+#endif
+ }
+
+ return 0;
+}
diff --git a/ext/libressl/crypto/poly1305/Makefile b/ext/libressl/crypto/poly1305/Makefile
new file mode 100644
index 0000000..94ceaf6
--- /dev/null
+++ b/ext/libressl/crypto/poly1305/Makefile
@@ -0,0 +1,13 @@
+include ../../ssl_common.mk
+
+obj = poly1305.o
+
+
+all: $(obj)
+dep: all
+
+%.o: %.c
+ $(CC) $(CFLAGS) -c $<
+
+clean:
+ rm -f *.o *.a
diff --git a/ext/libressl/crypto/poly1305/poly1305-donna.c b/ext/libressl/crypto/poly1305/poly1305-donna.c
new file mode 100644
index 0000000..773ea4e
--- /dev/null
+++ b/ext/libressl/crypto/poly1305/poly1305-donna.c
@@ -0,0 +1,321 @@
+/* $OpenBSD: poly1305-donna.c,v 1.3 2014/06/12 15:49:30 deraadt Exp $ */
+/*
+ * Public Domain poly1305 from Andrew Moon
+ * Based on poly1305-donna.c, poly1305-donna-32.h and poly1305-donna.h from:
+ * https://github.com/floodyberry/poly1305-donna
+ */
+
+#include <stddef.h>
+
+static inline void poly1305_init(poly1305_context *ctx,
+ const unsigned char key[32]);
+static inline void poly1305_update(poly1305_context *ctx,
+ const unsigned char *m, size_t bytes);
+static inline void poly1305_finish(poly1305_context *ctx,
+ unsigned char mac[16]);
+
+/*
+ * poly1305 implementation using 32 bit * 32 bit = 64 bit multiplication
+ * and 64 bit addition.
+ */
+
+#define poly1305_block_size 16
+
+/* 17 + sizeof(size_t) + 14*sizeof(unsigned long) */
+typedef struct poly1305_state_internal_t {
+ unsigned long r[5];
+ unsigned long h[5];
+ unsigned long pad[4];
+ size_t leftover;
+ unsigned char buffer[poly1305_block_size];
+ unsigned char final;
+} poly1305_state_internal_t;
+
+/* interpret four 8 bit unsigned integers as a 32 bit unsigned integer in little endian */
+static unsigned long
+U8TO32(const unsigned char *p)
+{
+ return (((unsigned long)(p[0] & 0xff)) |
+ ((unsigned long)(p[1] & 0xff) << 8) |
+ ((unsigned long)(p[2] & 0xff) << 16) |
+ ((unsigned long)(p[3] & 0xff) << 24));
+}
+
+/* store a 32 bit unsigned integer as four 8 bit unsigned integers in little endian */
+static void
+U32TO8(unsigned char *p, unsigned long v)
+{
+ p[0] = (v) & 0xff;
+ p[1] = (v >> 8) & 0xff;
+ p[2] = (v >> 16) & 0xff;
+ p[3] = (v >> 24) & 0xff;
+}
+
+static inline void
+poly1305_init(poly1305_context *ctx, const unsigned char key[32])
+{
+ poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx;
+
+ /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
+ st->r[0] = (U8TO32(&key[0])) & 0x3ffffff;
+ st->r[1] = (U8TO32(&key[3]) >> 2) & 0x3ffff03;
+ st->r[2] = (U8TO32(&key[6]) >> 4) & 0x3ffc0ff;
+ st->r[3] = (U8TO32(&key[9]) >> 6) & 0x3f03fff;
+ st->r[4] = (U8TO32(&key[12]) >> 8) & 0x00fffff;
+
+ /* h = 0 */
+ st->h[0] = 0;
+ st->h[1] = 0;
+ st->h[2] = 0;
+ st->h[3] = 0;
+ st->h[4] = 0;
+
+ /* save pad for later */
+ st->pad[0] = U8TO32(&key[16]);
+ st->pad[1] = U8TO32(&key[20]);
+ st->pad[2] = U8TO32(&key[24]);
+ st->pad[3] = U8TO32(&key[28]);
+
+ st->leftover = 0;
+ st->final = 0;
+}
+
+static void
+poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m, size_t bytes)
+{
+ const unsigned long hibit = (st->final) ? 0 : (1 << 24); /* 1 << 128 */
+ unsigned long r0, r1, r2, r3, r4;
+ unsigned long s1, s2, s3, s4;
+ unsigned long h0, h1, h2, h3, h4;
+ unsigned long long d0, d1, d2, d3, d4;
+ unsigned long c;
+
+ r0 = st->r[0];
+ r1 = st->r[1];
+ r2 = st->r[2];
+ r3 = st->r[3];
+ r4 = st->r[4];
+
+ s1 = r1 * 5;
+ s2 = r2 * 5;
+ s3 = r3 * 5;
+ s4 = r4 * 5;
+
+ h0 = st->h[0];
+ h1 = st->h[1];
+ h2 = st->h[2];
+ h3 = st->h[3];
+ h4 = st->h[4];
+
+ while (bytes >= poly1305_block_size) {
+ /* h += m[i] */
+ h0 += (U8TO32(m + 0)) & 0x3ffffff;
+ h1 += (U8TO32(m + 3) >> 2) & 0x3ffffff;
+ h2 += (U8TO32(m + 6) >> 4) & 0x3ffffff;
+ h3 += (U8TO32(m + 9) >> 6) & 0x3ffffff;
+ h4 += (U8TO32(m + 12) >> 8) | hibit;
+
+ /* h *= r */
+ d0 = ((unsigned long long)h0 * r0) +
+ ((unsigned long long)h1 * s4) +
+ ((unsigned long long)h2 * s3) +
+ ((unsigned long long)h3 * s2) +
+ ((unsigned long long)h4 * s1);
+ d1 = ((unsigned long long)h0 * r1) +
+ ((unsigned long long)h1 * r0) +
+ ((unsigned long long)h2 * s4) +
+ ((unsigned long long)h3 * s3) +
+ ((unsigned long long)h4 * s2);
+ d2 = ((unsigned long long)h0 * r2) +
+ ((unsigned long long)h1 * r1) +
+ ((unsigned long long)h2 * r0) +
+ ((unsigned long long)h3 * s4) +
+ ((unsigned long long)h4 * s3);
+ d3 = ((unsigned long long)h0 * r3) +
+ ((unsigned long long)h1 * r2) +
+ ((unsigned long long)h2 * r1) +
+ ((unsigned long long)h3 * r0) +
+ ((unsigned long long)h4 * s4);
+ d4 = ((unsigned long long)h0 * r4) +
+ ((unsigned long long)h1 * r3) +
+ ((unsigned long long)h2 * r2) +
+ ((unsigned long long)h3 * r1) +
+ ((unsigned long long)h4 * r0);
+
+ /* (partial) h %= p */
+ c = (unsigned long)(d0 >> 26);
+ h0 = (unsigned long)d0 & 0x3ffffff;
+ d1 += c;
+ c = (unsigned long)(d1 >> 26);
+ h1 = (unsigned long)d1 & 0x3ffffff;
+ d2 += c;
+ c = (unsigned long)(d2 >> 26);
+ h2 = (unsigned long)d2 & 0x3ffffff;
+ d3 += c;
+ c = (unsigned long)(d3 >> 26);
+ h3 = (unsigned long)d3 & 0x3ffffff;
+ d4 += c;
+ c = (unsigned long)(d4 >> 26);
+ h4 = (unsigned long)d4 & 0x3ffffff;
+ h0 += c * 5;
+ c = (h0 >> 26);
+ h0 = h0 & 0x3ffffff;
+ h1 += c;
+
+ m += poly1305_block_size;
+ bytes -= poly1305_block_size;
+ }
+
+ st->h[0] = h0;
+ st->h[1] = h1;
+ st->h[2] = h2;
+ st->h[3] = h3;
+ st->h[4] = h4;
+}
+
+static inline void
+poly1305_update(poly1305_context *ctx, const unsigned char *m, size_t bytes)
+{
+ poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx;
+ size_t i;
+
+ /* handle leftover */
+ if (st->leftover) {
+ size_t want = (poly1305_block_size - st->leftover);
+ if (want > bytes)
+ want = bytes;
+ for (i = 0; i < want; i++)
+ st->buffer[st->leftover + i] = m[i];
+ bytes -= want;
+ m += want;
+ st->leftover += want;
+ if (st->leftover < poly1305_block_size)
+ return;
+ poly1305_blocks(st, st->buffer, poly1305_block_size);
+ st->leftover = 0;
+ }
+
+ /* process full blocks */
+ if (bytes >= poly1305_block_size) {
+ size_t want = (bytes & ~(poly1305_block_size - 1));
+ poly1305_blocks(st, m, want);
+ m += want;
+ bytes -= want;
+ }
+
+ /* store leftover */
+ if (bytes) {
+ for (i = 0; i < bytes; i++)
+ st->buffer[st->leftover + i] = m[i];
+ st->leftover += bytes;
+ }
+}
+
+static inline void
+poly1305_finish(poly1305_context *ctx, unsigned char mac[16])
+{
+ poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx;
+ unsigned long h0, h1, h2, h3, h4, c;
+ unsigned long g0, g1, g2, g3, g4;
+ unsigned long long f;
+ unsigned long mask;
+
+ /* process the remaining block */
+ if (st->leftover) {
+ size_t i = st->leftover;
+ st->buffer[i++] = 1;
+ for (; i < poly1305_block_size; i++)
+ st->buffer[i] = 0;
+ st->final = 1;
+ poly1305_blocks(st, st->buffer, poly1305_block_size);
+ }
+
+ /* fully carry h */
+ h0 = st->h[0];
+ h1 = st->h[1];
+ h2 = st->h[2];
+ h3 = st->h[3];
+ h4 = st->h[4];
+
+ c = h1 >> 26;
+ h1 = h1 & 0x3ffffff;
+ h2 += c;
+ c = h2 >> 26;
+ h2 = h2 & 0x3ffffff;
+ h3 += c;
+ c = h3 >> 26;
+ h3 = h3 & 0x3ffffff;
+ h4 += c;
+ c = h4 >> 26;
+ h4 = h4 & 0x3ffffff;
+ h0 += c * 5;
+ c = h0 >> 26;
+ h0 = h0 & 0x3ffffff;
+ h1 += c;
+
+ /* compute h + -p */
+ g0 = h0 + 5;
+ c = g0 >> 26;
+ g0 &= 0x3ffffff;
+ g1 = h1 + c;
+ c = g1 >> 26;
+ g1 &= 0x3ffffff;
+ g2 = h2 + c;
+ c = g2 >> 26;
+ g2 &= 0x3ffffff;
+ g3 = h3 + c;
+ c = g3 >> 26;
+ g3 &= 0x3ffffff;
+ g4 = h4 + c - (1 << 26);
+
+ /* select h if h < p, or h + -p if h >= p */
+ mask = (g4 >> ((sizeof(unsigned long) * 8) - 1)) - 1;
+ g0 &= mask;
+ g1 &= mask;
+ g2 &= mask;
+ g3 &= mask;
+ g4 &= mask;
+ mask = ~mask;
+ h0 = (h0 & mask) | g0;
+ h1 = (h1 & mask) | g1;
+ h2 = (h2 & mask) | g2;
+ h3 = (h3 & mask) | g3;
+ h4 = (h4 & mask) | g4;
+
+ /* h = h % (2^128) */
+ h0 = ((h0) | (h1 << 26)) & 0xffffffff;
+ h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
+ h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
+ h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
+
+ /* mac = (h + pad) % (2^128) */
+ f = (unsigned long long)h0 + st->pad[0];
+ h0 = (unsigned long)f;
+ f = (unsigned long long)h1 + st->pad[1] + (f >> 32);
+ h1 = (unsigned long)f;
+ f = (unsigned long long)h2 + st->pad[2] + (f >> 32);
+ h2 = (unsigned long)f;
+ f = (unsigned long long)h3 + st->pad[3] + (f >> 32);
+ h3 = (unsigned long)f;
+
+ U32TO8(mac + 0, h0);
+ U32TO8(mac + 4, h1);
+ U32TO8(mac + 8, h2);
+ U32TO8(mac + 12, h3);
+
+ /* zero out the state */
+ st->h[0] = 0;
+ st->h[1] = 0;
+ st->h[2] = 0;
+ st->h[3] = 0;
+ st->h[4] = 0;
+ st->r[0] = 0;
+ st->r[1] = 0;
+ st->r[2] = 0;
+ st->r[3] = 0;
+ st->r[4] = 0;
+ st->pad[0] = 0;
+ st->pad[1] = 0;
+ st->pad[2] = 0;
+ st->pad[3] = 0;
+}
diff --git a/ext/libressl/crypto/poly1305/poly1305.c b/ext/libressl/crypto/poly1305/poly1305.c
new file mode 100644
index 0000000..75a34cc
--- /dev/null
+++ b/ext/libressl/crypto/poly1305/poly1305.c
@@ -0,0 +1,38 @@
+/* $OpenBSD: poly1305.c,v 1.3 2014/06/12 15:49:30 deraadt Exp $ */
+/*
+ * Copyright (c) 2014 Joel Sing <jsing@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <openssl/poly1305.h>
+#include "poly1305-donna.c"
+
+void
+CRYPTO_poly1305_init(poly1305_context *ctx, const unsigned char key[32])
+{
+ poly1305_init(ctx, key);
+}
+
+void
+CRYPTO_poly1305_update(poly1305_context *ctx, const unsigned char *in,
+ size_t len)
+{
+ poly1305_update(ctx, in, len);
+}
+
+void
+CRYPTO_poly1305_finish(poly1305_context *ctx, unsigned char mac[16])
+{
+ poly1305_finish(ctx, mac);
+}
diff --git a/ext/libressl/crypto/sha/Makefile b/ext/libressl/crypto/sha/Makefile
new file mode 100644
index 0000000..6eb0c20
--- /dev/null
+++ b/ext/libressl/crypto/sha/Makefile
@@ -0,0 +1,14 @@
+include ../../ssl_common.mk
+CFLAGS += -D__BEGIN_HIDDEN_DECLS= -D__END_HIDDEN_DECLS=
+
+obj = sha1dgst.o sha1_one.o sha256.o sha512.o
+
+
+all: $(obj)
+dep: all
+
+%.o: %.c
+ $(CC) $(CFLAGS) -c $<
+
+clean:
+ rm -f *.o *.a
diff --git a/ext/libressl/crypto/sha/sha1-elf-armv4.S b/ext/libressl/crypto/sha/sha1-elf-armv4.S
new file mode 100644
index 0000000..5aeaf7c
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha1-elf-armv4.S
@@ -0,0 +1,455 @@
+#include "arm_arch.h"
+
+.text
+
+.global sha1_block_data_order
+.type sha1_block_data_order,%function
+
+.align 2
+sha1_block_data_order:
+ stmdb sp!,{r4-r12,lr}
+ add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
+ ldmia r0,{r3,r4,r5,r6,r7}
+.Lloop:
+ ldr r8,.LK_00_19
+ mov r14,sp
+ sub sp,sp,#15*4
+ mov r5,r5,ror#30
+ mov r6,r6,ror#30
+ mov r7,r7,ror#30 @ [6]
+.L_00_15:
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+ ldrb r10,[r1,#2]
+ ldrb r9,[r1,#3]
+ ldrb r11,[r1,#1]
+ add r7,r8,r7,ror#2 @ E+=K_00_19
+ ldrb r12,[r1],#4
+ orr r9,r9,r10,lsl#8
+ eor r10,r5,r6 @ F_xx_xx
+ orr r9,r9,r11,lsl#16
+ add r7,r7,r3,ror#27 @ E+=ROR(A,27)
+ orr r9,r9,r12,lsl#24
+#else
+ ldr r9,[r1],#4 @ handles unaligned
+ add r7,r8,r7,ror#2 @ E+=K_00_19
+ eor r10,r5,r6 @ F_xx_xx
+ add r7,r7,r3,ror#27 @ E+=ROR(A,27)
+#ifdef __ARMEL__
+ rev r9,r9 @ byte swap
+#endif
+#endif
+ and r10,r4,r10,ror#2
+ add r7,r7,r9 @ E+=X[i]
+ eor r10,r10,r6,ror#2 @ F_00_19(B,C,D)
+ str r9,[r14,#-4]!
+ add r7,r7,r10 @ E+=F_00_19(B,C,D)
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+ ldrb r10,[r1,#2]
+ ldrb r9,[r1,#3]
+ ldrb r11,[r1,#1]
+ add r6,r8,r6,ror#2 @ E+=K_00_19
+ ldrb r12,[r1],#4
+ orr r9,r9,r10,lsl#8
+ eor r10,r4,r5 @ F_xx_xx
+ orr r9,r9,r11,lsl#16
+ add r6,r6,r7,ror#27 @ E+=ROR(A,27)
+ orr r9,r9,r12,lsl#24
+#else
+ ldr r9,[r1],#4 @ handles unaligned
+ add r6,r8,r6,ror#2 @ E+=K_00_19
+ eor r10,r4,r5 @ F_xx_xx
+ add r6,r6,r7,ror#27 @ E+=ROR(A,27)
+#ifdef __ARMEL__
+ rev r9,r9 @ byte swap
+#endif
+#endif
+ and r10,r3,r10,ror#2
+ add r6,r6,r9 @ E+=X[i]
+ eor r10,r10,r5,ror#2 @ F_00_19(B,C,D)
+ str r9,[r14,#-4]!
+ add r6,r6,r10 @ E+=F_00_19(B,C,D)
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+ ldrb r10,[r1,#2]
+ ldrb r9,[r1,#3]
+ ldrb r11,[r1,#1]
+ add r5,r8,r5,ror#2 @ E+=K_00_19
+ ldrb r12,[r1],#4
+ orr r9,r9,r10,lsl#8
+ eor r10,r3,r4 @ F_xx_xx
+ orr r9,r9,r11,lsl#16
+ add r5,r5,r6,ror#27 @ E+=ROR(A,27)
+ orr r9,r9,r12,lsl#24
+#else
+ ldr r9,[r1],#4 @ handles unaligned
+ add r5,r8,r5,ror#2 @ E+=K_00_19
+ eor r10,r3,r4 @ F_xx_xx
+ add r5,r5,r6,ror#27 @ E+=ROR(A,27)
+#ifdef __ARMEL__
+ rev r9,r9 @ byte swap
+#endif
+#endif
+ and r10,r7,r10,ror#2
+ add r5,r5,r9 @ E+=X[i]
+ eor r10,r10,r4,ror#2 @ F_00_19(B,C,D)
+ str r9,[r14,#-4]!
+ add r5,r5,r10 @ E+=F_00_19(B,C,D)
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+ ldrb r10,[r1,#2]
+ ldrb r9,[r1,#3]
+ ldrb r11,[r1,#1]
+ add r4,r8,r4,ror#2 @ E+=K_00_19
+ ldrb r12,[r1],#4
+ orr r9,r9,r10,lsl#8
+ eor r10,r7,r3 @ F_xx_xx
+ orr r9,r9,r11,lsl#16
+ add r4,r4,r5,ror#27 @ E+=ROR(A,27)
+ orr r9,r9,r12,lsl#24
+#else
+ ldr r9,[r1],#4 @ handles unaligned
+ add r4,r8,r4,ror#2 @ E+=K_00_19
+ eor r10,r7,r3 @ F_xx_xx
+ add r4,r4,r5,ror#27 @ E+=ROR(A,27)
+#ifdef __ARMEL__
+ rev r9,r9 @ byte swap
+#endif
+#endif
+ and r10,r6,r10,ror#2
+ add r4,r4,r9 @ E+=X[i]
+ eor r10,r10,r3,ror#2 @ F_00_19(B,C,D)
+ str r9,[r14,#-4]!
+ add r4,r4,r10 @ E+=F_00_19(B,C,D)
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+ ldrb r10,[r1,#2]
+ ldrb r9,[r1,#3]
+ ldrb r11,[r1,#1]
+ add r3,r8,r3,ror#2 @ E+=K_00_19
+ ldrb r12,[r1],#4
+ orr r9,r9,r10,lsl#8
+ eor r10,r6,r7 @ F_xx_xx
+ orr r9,r9,r11,lsl#16
+ add r3,r3,r4,ror#27 @ E+=ROR(A,27)
+ orr r9,r9,r12,lsl#24
+#else
+ ldr r9,[r1],#4 @ handles unaligned
+ add r3,r8,r3,ror#2 @ E+=K_00_19
+ eor r10,r6,r7 @ F_xx_xx
+ add r3,r3,r4,ror#27 @ E+=ROR(A,27)
+#ifdef __ARMEL__
+ rev r9,r9 @ byte swap
+#endif
+#endif
+ and r10,r5,r10,ror#2
+ add r3,r3,r9 @ E+=X[i]
+ eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
+ str r9,[r14,#-4]!
+ add r3,r3,r10 @ E+=F_00_19(B,C,D)
+ teq r14,sp
+ bne .L_00_15 @ [((11+4)*5+2)*3]
+ sub sp,sp,#25*4
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+ ldrb r10,[r1,#2]
+ ldrb r9,[r1,#3]
+ ldrb r11,[r1,#1]
+ add r7,r8,r7,ror#2 @ E+=K_00_19
+ ldrb r12,[r1],#4
+ orr r9,r9,r10,lsl#8
+ eor r10,r5,r6 @ F_xx_xx
+ orr r9,r9,r11,lsl#16
+ add r7,r7,r3,ror#27 @ E+=ROR(A,27)
+ orr r9,r9,r12,lsl#24
+#else
+ ldr r9,[r1],#4 @ handles unaligned
+ add r7,r8,r7,ror#2 @ E+=K_00_19
+ eor r10,r5,r6 @ F_xx_xx
+ add r7,r7,r3,ror#27 @ E+=ROR(A,27)
+#ifdef __ARMEL__
+ rev r9,r9 @ byte swap
+#endif
+#endif
+ and r10,r4,r10,ror#2
+ add r7,r7,r9 @ E+=X[i]
+ eor r10,r10,r6,ror#2 @ F_00_19(B,C,D)
+ str r9,[r14,#-4]!
+ add r7,r7,r10 @ E+=F_00_19(B,C,D)
+ ldr r9,[r14,#15*4]
+ ldr r10,[r14,#13*4]
+ ldr r11,[r14,#7*4]
+ add r6,r8,r6,ror#2 @ E+=K_xx_xx
+ ldr r12,[r14,#2*4]
+ eor r9,r9,r10
+ eor r11,r11,r12 @ 1 cycle stall
+ eor r10,r4,r5 @ F_xx_xx
+ mov r9,r9,ror#31
+ add r6,r6,r7,ror#27 @ E+=ROR(A,27)
+ eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
+ and r10,r3,r10,ror#2 @ F_xx_xx
+ @ F_xx_xx
+ add r6,r6,r9 @ E+=X[i]
+ eor r10,r10,r5,ror#2 @ F_00_19(B,C,D)
+ add r6,r6,r10 @ E+=F_00_19(B,C,D)
+ ldr r9,[r14,#15*4]
+ ldr r10,[r14,#13*4]
+ ldr r11,[r14,#7*4]
+ add r5,r8,r5,ror#2 @ E+=K_xx_xx
+ ldr r12,[r14,#2*4]
+ eor r9,r9,r10
+ eor r11,r11,r12 @ 1 cycle stall
+ eor r10,r3,r4 @ F_xx_xx
+ mov r9,r9,ror#31
+ add r5,r5,r6,ror#27 @ E+=ROR(A,27)
+ eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
+ and r10,r7,r10,ror#2 @ F_xx_xx
+ @ F_xx_xx
+ add r5,r5,r9 @ E+=X[i]
+ eor r10,r10,r4,ror#2 @ F_00_19(B,C,D)
+ add r5,r5,r10 @ E+=F_00_19(B,C,D)
+ ldr r9,[r14,#15*4]
+ ldr r10,[r14,#13*4]
+ ldr r11,[r14,#7*4]
+ add r4,r8,r4,ror#2 @ E+=K_xx_xx
+ ldr r12,[r14,#2*4]
+ eor r9,r9,r10
+ eor r11,r11,r12 @ 1 cycle stall
+ eor r10,r7,r3 @ F_xx_xx
+ mov r9,r9,ror#31
+ add r4,r4,r5,ror#27 @ E+=ROR(A,27)
+ eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
+ and r10,r6,r10,ror#2 @ F_xx_xx
+ @ F_xx_xx
+ add r4,r4,r9 @ E+=X[i]
+ eor r10,r10,r3,ror#2 @ F_00_19(B,C,D)
+ add r4,r4,r10 @ E+=F_00_19(B,C,D)
+ ldr r9,[r14,#15*4]
+ ldr r10,[r14,#13*4]
+ ldr r11,[r14,#7*4]
+ add r3,r8,r3,ror#2 @ E+=K_xx_xx
+ ldr r12,[r14,#2*4]
+ eor r9,r9,r10
+ eor r11,r11,r12 @ 1 cycle stall
+ eor r10,r6,r7 @ F_xx_xx
+ mov r9,r9,ror#31
+ add r3,r3,r4,ror#27 @ E+=ROR(A,27)
+ eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
+ and r10,r5,r10,ror#2 @ F_xx_xx
+ @ F_xx_xx
+ add r3,r3,r9 @ E+=X[i]
+ eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
+ add r3,r3,r10 @ E+=F_00_19(B,C,D)
+
+ ldr r8,.LK_20_39 @ [+15+16*4]
+ cmn sp,#0 @ [+3], clear carry to denote 20_39
+.L_20_39_or_60_79:
+ ldr r9,[r14,#15*4]
+ ldr r10,[r14,#13*4]
+ ldr r11,[r14,#7*4]
+ add r7,r8,r7,ror#2 @ E+=K_xx_xx
+ ldr r12,[r14,#2*4]
+ eor r9,r9,r10
+ eor r11,r11,r12 @ 1 cycle stall
+ eor r10,r5,r6 @ F_xx_xx
+ mov r9,r9,ror#31
+ add r7,r7,r3,ror#27 @ E+=ROR(A,27)
+ eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
+ eor r10,r4,r10,ror#2 @ F_xx_xx
+ @ F_xx_xx
+ add r7,r7,r9 @ E+=X[i]
+ add r7,r7,r10 @ E+=F_20_39(B,C,D)
+ ldr r9,[r14,#15*4]
+ ldr r10,[r14,#13*4]
+ ldr r11,[r14,#7*4]
+ add r6,r8,r6,ror#2 @ E+=K_xx_xx
+ ldr r12,[r14,#2*4]
+ eor r9,r9,r10
+ eor r11,r11,r12 @ 1 cycle stall
+ eor r10,r4,r5 @ F_xx_xx
+ mov r9,r9,ror#31
+ add r6,r6,r7,ror#27 @ E+=ROR(A,27)
+ eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
+ eor r10,r3,r10,ror#2 @ F_xx_xx
+ @ F_xx_xx
+ add r6,r6,r9 @ E+=X[i]
+ add r6,r6,r10 @ E+=F_20_39(B,C,D)
+ ldr r9,[r14,#15*4]
+ ldr r10,[r14,#13*4]
+ ldr r11,[r14,#7*4]
+ add r5,r8,r5,ror#2 @ E+=K_xx_xx
+ ldr r12,[r14,#2*4]
+ eor r9,r9,r10
+ eor r11,r11,r12 @ 1 cycle stall
+ eor r10,r3,r4 @ F_xx_xx
+ mov r9,r9,ror#31
+ add r5,r5,r6,ror#27 @ E+=ROR(A,27)
+ eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
+ eor r10,r7,r10,ror#2 @ F_xx_xx
+ @ F_xx_xx
+ add r5,r5,r9 @ E+=X[i]
+ add r5,r5,r10 @ E+=F_20_39(B,C,D)
+ ldr r9,[r14,#15*4]
+ ldr r10,[r14,#13*4]
+ ldr r11,[r14,#7*4]
+ add r4,r8,r4,ror#2 @ E+=K_xx_xx
+ ldr r12,[r14,#2*4]
+ eor r9,r9,r10
+ eor r11,r11,r12 @ 1 cycle stall
+ eor r10,r7,r3 @ F_xx_xx
+ mov r9,r9,ror#31
+ add r4,r4,r5,ror#27 @ E+=ROR(A,27)
+ eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
+ eor r10,r6,r10,ror#2 @ F_xx_xx
+ @ F_xx_xx
+ add r4,r4,r9 @ E+=X[i]
+ add r4,r4,r10 @ E+=F_20_39(B,C,D)
+ ldr r9,[r14,#15*4]
+ ldr r10,[r14,#13*4]
+ ldr r11,[r14,#7*4]
+ add r3,r8,r3,ror#2 @ E+=K_xx_xx
+ ldr r12,[r14,#2*4]
+ eor r9,r9,r10
+ eor r11,r11,r12 @ 1 cycle stall
+ eor r10,r6,r7 @ F_xx_xx
+ mov r9,r9,ror#31
+ add r3,r3,r4,ror#27 @ E+=ROR(A,27)
+ eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
+ eor r10,r5,r10,ror#2 @ F_xx_xx
+ @ F_xx_xx
+ add r3,r3,r9 @ E+=X[i]
+ add r3,r3,r10 @ E+=F_20_39(B,C,D)
+ teq r14,sp @ preserve carry
+ bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
+ bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
+
+ ldr r8,.LK_40_59
+ sub sp,sp,#20*4 @ [+2]
+.L_40_59:
+ ldr r9,[r14,#15*4]
+ ldr r10,[r14,#13*4]
+ ldr r11,[r14,#7*4]
+ add r7,r8,r7,ror#2 @ E+=K_xx_xx
+ ldr r12,[r14,#2*4]
+ eor r9,r9,r10
+ eor r11,r11,r12 @ 1 cycle stall
+ eor r10,r5,r6 @ F_xx_xx
+ mov r9,r9,ror#31
+ add r7,r7,r3,ror#27 @ E+=ROR(A,27)
+ eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
+ and r10,r4,r10,ror#2 @ F_xx_xx
+ and r11,r5,r6 @ F_xx_xx
+ add r7,r7,r9 @ E+=X[i]
+ add r7,r7,r10 @ E+=F_40_59(B,C,D)
+ add r7,r7,r11,ror#2
+ ldr r9,[r14,#15*4]
+ ldr r10,[r14,#13*4]
+ ldr r11,[r14,#7*4]
+ add r6,r8,r6,ror#2 @ E+=K_xx_xx
+ ldr r12,[r14,#2*4]
+ eor r9,r9,r10
+ eor r11,r11,r12 @ 1 cycle stall
+ eor r10,r4,r5 @ F_xx_xx
+ mov r9,r9,ror#31
+ add r6,r6,r7,ror#27 @ E+=ROR(A,27)
+ eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
+ and r10,r3,r10,ror#2 @ F_xx_xx
+ and r11,r4,r5 @ F_xx_xx
+ add r6,r6,r9 @ E+=X[i]
+ add r6,r6,r10 @ E+=F_40_59(B,C,D)
+ add r6,r6,r11,ror#2
+ ldr r9,[r14,#15*4]
+ ldr r10,[r14,#13*4]
+ ldr r11,[r14,#7*4]
+ add r5,r8,r5,ror#2 @ E+=K_xx_xx
+ ldr r12,[r14,#2*4]
+ eor r9,r9,r10
+ eor r11,r11,r12 @ 1 cycle stall
+ eor r10,r3,r4 @ F_xx_xx
+ mov r9,r9,ror#31
+ add r5,r5,r6,ror#27 @ E+=ROR(A,27)
+ eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
+ and r10,r7,r10,ror#2 @ F_xx_xx
+ and r11,r3,r4 @ F_xx_xx
+ add r5,r5,r9 @ E+=X[i]
+ add r5,r5,r10 @ E+=F_40_59(B,C,D)
+ add r5,r5,r11,ror#2
+ ldr r9,[r14,#15*4]
+ ldr r10,[r14,#13*4]
+ ldr r11,[r14,#7*4]
+ add r4,r8,r4,ror#2 @ E+=K_xx_xx
+ ldr r12,[r14,#2*4]
+ eor r9,r9,r10
+ eor r11,r11,r12 @ 1 cycle stall
+ eor r10,r7,r3 @ F_xx_xx
+ mov r9,r9,ror#31
+ add r4,r4,r5,ror#27 @ E+=ROR(A,27)
+ eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
+ and r10,r6,r10,ror#2 @ F_xx_xx
+ and r11,r7,r3 @ F_xx_xx
+ add r4,r4,r9 @ E+=X[i]
+ add r4,r4,r10 @ E+=F_40_59(B,C,D)
+ add r4,r4,r11,ror#2
+ ldr r9,[r14,#15*4]
+ ldr r10,[r14,#13*4]
+ ldr r11,[r14,#7*4]
+ add r3,r8,r3,ror#2 @ E+=K_xx_xx
+ ldr r12,[r14,#2*4]
+ eor r9,r9,r10
+ eor r11,r11,r12 @ 1 cycle stall
+ eor r10,r6,r7 @ F_xx_xx
+ mov r9,r9,ror#31
+ add r3,r3,r4,ror#27 @ E+=ROR(A,27)
+ eor r9,r9,r11,ror#31
+ str r9,[r14,#-4]!
+ and r10,r5,r10,ror#2 @ F_xx_xx
+ and r11,r6,r7 @ F_xx_xx
+ add r3,r3,r9 @ E+=X[i]
+ add r3,r3,r10 @ E+=F_40_59(B,C,D)
+ add r3,r3,r11,ror#2
+ teq r14,sp
+ bne .L_40_59 @ [+((12+5)*5+2)*4]
+
+ ldr r8,.LK_60_79
+ sub sp,sp,#20*4
+ cmp sp,#0 @ set carry to denote 60_79
+ b .L_20_39_or_60_79 @ [+4], spare 300 bytes
+.L_done:
+ add sp,sp,#80*4 @ "deallocate" stack frame
+ ldmia r0,{r8,r9,r10,r11,r12}
+ add r3,r8,r3
+ add r4,r9,r4
+ add r5,r10,r5,ror#2
+ add r6,r11,r6,ror#2
+ add r7,r12,r7,ror#2
+ stmia r0,{r3,r4,r5,r6,r7}
+ teq r1,r2
+ bne .Lloop @ [+18], total 1307
+
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r12,pc}
+#else
+ ldmia sp!,{r4-r12,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
+.align 2
+.LK_00_19: .word 0x5a827999
+.LK_20_39: .word 0x6ed9eba1
+.LK_40_59: .word 0x8f1bbcdc
+.LK_60_79: .word 0xca62c1d6
+.size sha1_block_data_order,.-sha1_block_data_order
+.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
+.align 2
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/ext/libressl/crypto/sha/sha1-elf-x86_64.S b/ext/libressl/crypto/sha/sha1-elf-x86_64.S
new file mode 100644
index 0000000..5a37019
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha1-elf-x86_64.S
@@ -0,0 +1,2491 @@
+#include "x86_arch.h"
+.text
+
+.hidden OPENSSL_ia32cap_P
+
+.globl sha1_block_data_order
+.type sha1_block_data_order,@function
+.align 16
+sha1_block_data_order:
+ movl OPENSSL_ia32cap_P+0(%rip),%r9d
+ movl OPENSSL_ia32cap_P+4(%rip),%r8d
+ testl $IA32CAP_MASK1_SSSE3,%r8d
+ jz .Lialu
+ jmp _ssse3_shortcut
+
+.align 16
+.Lialu:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ movq %rsp,%r11
+ movq %rdi,%r8
+ subq $72,%rsp
+ movq %rsi,%r9
+ andq $-64,%rsp
+ movq %rdx,%r10
+ movq %r11,64(%rsp)
+.Lprologue:
+
+ movl 0(%r8),%esi
+ movl 4(%r8),%edi
+ movl 8(%r8),%r11d
+ movl 12(%r8),%r12d
+ movl 16(%r8),%r13d
+ jmp .Lloop
+
+.align 16
+.Lloop:
+ movl 0(%r9),%edx
+ bswapl %edx
+ movl %edx,0(%rsp)
+ movl %r11d,%eax
+ movl 4(%r9),%ebp
+ movl %esi,%ecx
+ xorl %r12d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r13,1),%r13d
+ andl %edi,%eax
+ movl %ebp,4(%rsp)
+ addl %ecx,%r13d
+ xorl %r12d,%eax
+ roll $30,%edi
+ addl %eax,%r13d
+ movl %edi,%eax
+ movl 8(%r9),%edx
+ movl %r13d,%ecx
+ xorl %r11d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r12,1),%r12d
+ andl %esi,%eax
+ movl %edx,8(%rsp)
+ addl %ecx,%r12d
+ xorl %r11d,%eax
+ roll $30,%esi
+ addl %eax,%r12d
+ movl %esi,%eax
+ movl 12(%r9),%ebp
+ movl %r12d,%ecx
+ xorl %edi,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r11,1),%r11d
+ andl %r13d,%eax
+ movl %ebp,12(%rsp)
+ addl %ecx,%r11d
+ xorl %edi,%eax
+ roll $30,%r13d
+ addl %eax,%r11d
+ movl %r13d,%eax
+ movl 16(%r9),%edx
+ movl %r11d,%ecx
+ xorl %esi,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%rdi,1),%edi
+ andl %r12d,%eax
+ movl %edx,16(%rsp)
+ addl %ecx,%edi
+ xorl %esi,%eax
+ roll $30,%r12d
+ addl %eax,%edi
+ movl %r12d,%eax
+ movl 20(%r9),%ebp
+ movl %edi,%ecx
+ xorl %r13d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%rsi,1),%esi
+ andl %r11d,%eax
+ movl %ebp,20(%rsp)
+ addl %ecx,%esi
+ xorl %r13d,%eax
+ roll $30,%r11d
+ addl %eax,%esi
+ movl %r11d,%eax
+ movl 24(%r9),%edx
+ movl %esi,%ecx
+ xorl %r12d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r13,1),%r13d
+ andl %edi,%eax
+ movl %edx,24(%rsp)
+ addl %ecx,%r13d
+ xorl %r12d,%eax
+ roll $30,%edi
+ addl %eax,%r13d
+ movl %edi,%eax
+ movl 28(%r9),%ebp
+ movl %r13d,%ecx
+ xorl %r11d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r12,1),%r12d
+ andl %esi,%eax
+ movl %ebp,28(%rsp)
+ addl %ecx,%r12d
+ xorl %r11d,%eax
+ roll $30,%esi
+ addl %eax,%r12d
+ movl %esi,%eax
+ movl 32(%r9),%edx
+ movl %r12d,%ecx
+ xorl %edi,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r11,1),%r11d
+ andl %r13d,%eax
+ movl %edx,32(%rsp)
+ addl %ecx,%r11d
+ xorl %edi,%eax
+ roll $30,%r13d
+ addl %eax,%r11d
+ movl %r13d,%eax
+ movl 36(%r9),%ebp
+ movl %r11d,%ecx
+ xorl %esi,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%rdi,1),%edi
+ andl %r12d,%eax
+ movl %ebp,36(%rsp)
+ addl %ecx,%edi
+ xorl %esi,%eax
+ roll $30,%r12d
+ addl %eax,%edi
+ movl %r12d,%eax
+ movl 40(%r9),%edx
+ movl %edi,%ecx
+ xorl %r13d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%rsi,1),%esi
+ andl %r11d,%eax
+ movl %edx,40(%rsp)
+ addl %ecx,%esi
+ xorl %r13d,%eax
+ roll $30,%r11d
+ addl %eax,%esi
+ movl %r11d,%eax
+ movl 44(%r9),%ebp
+ movl %esi,%ecx
+ xorl %r12d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r13,1),%r13d
+ andl %edi,%eax
+ movl %ebp,44(%rsp)
+ addl %ecx,%r13d
+ xorl %r12d,%eax
+ roll $30,%edi
+ addl %eax,%r13d
+ movl %edi,%eax
+ movl 48(%r9),%edx
+ movl %r13d,%ecx
+ xorl %r11d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r12,1),%r12d
+ andl %esi,%eax
+ movl %edx,48(%rsp)
+ addl %ecx,%r12d
+ xorl %r11d,%eax
+ roll $30,%esi
+ addl %eax,%r12d
+ movl %esi,%eax
+ movl 52(%r9),%ebp
+ movl %r12d,%ecx
+ xorl %edi,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r11,1),%r11d
+ andl %r13d,%eax
+ movl %ebp,52(%rsp)
+ addl %ecx,%r11d
+ xorl %edi,%eax
+ roll $30,%r13d
+ addl %eax,%r11d
+ movl %r13d,%eax
+ movl 56(%r9),%edx
+ movl %r11d,%ecx
+ xorl %esi,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%rdi,1),%edi
+ andl %r12d,%eax
+ movl %edx,56(%rsp)
+ addl %ecx,%edi
+ xorl %esi,%eax
+ roll $30,%r12d
+ addl %eax,%edi
+ movl %r12d,%eax
+ movl 60(%r9),%ebp
+ movl %edi,%ecx
+ xorl %r13d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%rsi,1),%esi
+ andl %r11d,%eax
+ movl %ebp,60(%rsp)
+ addl %ecx,%esi
+ xorl %r13d,%eax
+ roll $30,%r11d
+ addl %eax,%esi
+ movl 0(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 8(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ xorl 32(%rsp),%edx
+ andl %edi,%eax
+ leal 1518500249(%rbp,%r13,1),%r13d
+ xorl 52(%rsp),%edx
+ xorl %r12d,%eax
+ roll $1,%edx
+ addl %ecx,%r13d
+ roll $30,%edi
+ movl %edx,0(%rsp)
+ addl %eax,%r13d
+ movl 4(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ xorl 36(%rsp),%ebp
+ andl %esi,%eax
+ leal 1518500249(%rdx,%r12,1),%r12d
+ xorl 56(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $1,%ebp
+ addl %ecx,%r12d
+ roll $30,%esi
+ movl %ebp,4(%rsp)
+ addl %eax,%r12d
+ movl 8(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 16(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ xorl 40(%rsp),%edx
+ andl %r13d,%eax
+ leal 1518500249(%rbp,%r11,1),%r11d
+ xorl 60(%rsp),%edx
+ xorl %edi,%eax
+ roll $1,%edx
+ addl %ecx,%r11d
+ roll $30,%r13d
+ movl %edx,8(%rsp)
+ addl %eax,%r11d
+ movl 12(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ xorl 44(%rsp),%ebp
+ andl %r12d,%eax
+ leal 1518500249(%rdx,%rdi,1),%edi
+ xorl 0(%rsp),%ebp
+ xorl %esi,%eax
+ roll $1,%ebp
+ addl %ecx,%edi
+ roll $30,%r12d
+ movl %ebp,12(%rsp)
+ addl %eax,%edi
+ movl 16(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 24(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ xorl 48(%rsp),%edx
+ andl %r11d,%eax
+ leal 1518500249(%rbp,%rsi,1),%esi
+ xorl 4(%rsp),%edx
+ xorl %r13d,%eax
+ roll $1,%edx
+ addl %ecx,%esi
+ roll $30,%r11d
+ movl %edx,16(%rsp)
+ addl %eax,%esi
+ movl 20(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r13,1),%r13d
+ xorl 52(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 8(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,20(%rsp)
+ movl 24(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 32(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r12,1),%r12d
+ xorl 56(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 12(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,24(%rsp)
+ movl 28(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r11,1),%r11d
+ xorl 60(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 16(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,28(%rsp)
+ movl 32(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 40(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rdi,1),%edi
+ xorl 0(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 20(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,32(%rsp)
+ movl 36(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 44(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rsi,1),%esi
+ xorl 4(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 24(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,36(%rsp)
+ movl 40(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 48(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r13,1),%r13d
+ xorl 8(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 28(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,40(%rsp)
+ movl 44(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 52(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r12,1),%r12d
+ xorl 12(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 32(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl %ebp,44(%rsp)
+ movl 48(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 56(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r11,1),%r11d
+ xorl 16(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 36(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl %edx,48(%rsp)
+ movl 52(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rdi,1),%edi
+ xorl 20(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 40(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %ebp,52(%rsp)
+ movl 56(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 0(%rsp),%edx
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rsi,1),%esi
+ xorl 24(%rsp),%edx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 44(%rsp),%edx
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%edx
+ movl %edx,56(%rsp)
+ movl 60(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r13,1),%r13d
+ xorl 28(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 48(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,60(%rsp)
+ movl 0(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r12,1),%r12d
+ xorl 32(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 52(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,0(%rsp)
+ movl 4(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r11,1),%r11d
+ xorl 36(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 56(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,4(%rsp)
+ movl 8(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 16(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rdi,1),%edi
+ xorl 40(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 60(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,8(%rsp)
+ movl 12(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rsi,1),%esi
+ xorl 44(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 0(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,12(%rsp)
+ movl 16(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 24(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r13,1),%r13d
+ xorl 48(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 4(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,16(%rsp)
+ movl 20(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r12,1),%r12d
+ xorl 52(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 8(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl %ebp,20(%rsp)
+ movl 24(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 32(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r11,1),%r11d
+ xorl 56(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 12(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl %edx,24(%rsp)
+ movl 28(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rdi,1),%edi
+ xorl 60(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 16(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %ebp,28(%rsp)
+ movl 32(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 40(%rsp),%edx
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rsi,1),%esi
+ xorl 0(%rsp),%edx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 20(%rsp),%edx
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%edx
+ movl %edx,32(%rsp)
+ movl 36(%rsp),%ebp
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 44(%rsp),%ebp
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r12d,%ebx
+ leal -1894007588(%rdx,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 24(%rsp),%ebp
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %ebp,36(%rsp)
+ addl %ecx,%r13d
+ movl 40(%rsp),%edx
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 48(%rsp),%edx
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %r11d,%ebx
+ leal -1894007588(%rbp,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 28(%rsp),%edx
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%edx
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %edx,40(%rsp)
+ addl %ecx,%r12d
+ movl 44(%rsp),%ebp
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 52(%rsp),%ebp
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %edi,%ebx
+ leal -1894007588(%rdx,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 32(%rsp),%ebp
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%ebp
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %ebp,44(%rsp)
+ addl %ecx,%r11d
+ movl 48(%rsp),%edx
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 56(%rsp),%edx
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 16(%rsp),%edx
+ xorl %esi,%ebx
+ leal -1894007588(%rbp,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 36(%rsp),%edx
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%edx
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %edx,48(%rsp)
+ addl %ecx,%edi
+ movl 52(%rsp),%ebp
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 60(%rsp),%ebp
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %r13d,%ebx
+ leal -1894007588(%rdx,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 40(%rsp),%ebp
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%ebp
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %ebp,52(%rsp)
+ addl %ecx,%esi
+ movl 56(%rsp),%edx
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 0(%rsp),%edx
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 24(%rsp),%edx
+ xorl %r12d,%ebx
+ leal -1894007588(%rbp,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 44(%rsp),%edx
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%edx
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %edx,56(%rsp)
+ addl %ecx,%r13d
+ movl 60(%rsp),%ebp
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 4(%rsp),%ebp
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %r11d,%ebx
+ leal -1894007588(%rdx,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 48(%rsp),%ebp
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %ebp,60(%rsp)
+ addl %ecx,%r12d
+ movl 0(%rsp),%edx
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 8(%rsp),%edx
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 32(%rsp),%edx
+ xorl %edi,%ebx
+ leal -1894007588(%rbp,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 52(%rsp),%edx
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%edx
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %edx,0(%rsp)
+ addl %ecx,%r11d
+ movl 4(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 12(%rsp),%ebp
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %esi,%ebx
+ leal -1894007588(%rdx,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 56(%rsp),%ebp
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%ebp
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %ebp,4(%rsp)
+ addl %ecx,%edi
+ movl 8(%rsp),%edx
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 16(%rsp),%edx
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 40(%rsp),%edx
+ xorl %r13d,%ebx
+ leal -1894007588(%rbp,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 60(%rsp),%edx
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%edx
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %edx,8(%rsp)
+ addl %ecx,%esi
+ movl 12(%rsp),%ebp
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 20(%rsp),%ebp
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 44(%rsp),%ebp
+ xorl %r12d,%ebx
+ leal -1894007588(%rdx,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 0(%rsp),%ebp
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %ebp,12(%rsp)
+ addl %ecx,%r13d
+ movl 16(%rsp),%edx
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 24(%rsp),%edx
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 48(%rsp),%edx
+ xorl %r11d,%ebx
+ leal -1894007588(%rbp,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 4(%rsp),%edx
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%edx
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %edx,16(%rsp)
+ addl %ecx,%r12d
+ movl 20(%rsp),%ebp
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 28(%rsp),%ebp
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 52(%rsp),%ebp
+ xorl %edi,%ebx
+ leal -1894007588(%rdx,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 8(%rsp),%ebp
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%ebp
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %ebp,20(%rsp)
+ addl %ecx,%r11d
+ movl 24(%rsp),%edx
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 32(%rsp),%edx
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 56(%rsp),%edx
+ xorl %esi,%ebx
+ leal -1894007588(%rbp,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 12(%rsp),%edx
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%edx
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %edx,24(%rsp)
+ addl %ecx,%edi
+ movl 28(%rsp),%ebp
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 36(%rsp),%ebp
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %r13d,%ebx
+ leal -1894007588(%rdx,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 16(%rsp),%ebp
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%ebp
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %ebp,28(%rsp)
+ addl %ecx,%esi
+ movl 32(%rsp),%edx
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 40(%rsp),%edx
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 0(%rsp),%edx
+ xorl %r12d,%ebx
+ leal -1894007588(%rbp,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 20(%rsp),%edx
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%edx
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %edx,32(%rsp)
+ addl %ecx,%r13d
+ movl 36(%rsp),%ebp
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 44(%rsp),%ebp
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r11d,%ebx
+ leal -1894007588(%rdx,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 24(%rsp),%ebp
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %ebp,36(%rsp)
+ addl %ecx,%r12d
+ movl 40(%rsp),%edx
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 48(%rsp),%edx
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %edi,%ebx
+ leal -1894007588(%rbp,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 28(%rsp),%edx
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%edx
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %edx,40(%rsp)
+ addl %ecx,%r11d
+ movl 44(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 52(%rsp),%ebp
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %esi,%ebx
+ leal -1894007588(%rdx,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 32(%rsp),%ebp
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%ebp
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %ebp,44(%rsp)
+ addl %ecx,%edi
+ movl 48(%rsp),%edx
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 56(%rsp),%edx
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 16(%rsp),%edx
+ xorl %r13d,%ebx
+ leal -1894007588(%rbp,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 36(%rsp),%edx
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%edx
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %edx,48(%rsp)
+ addl %ecx,%esi
+ movl 52(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r13,1),%r13d
+ xorl 20(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 40(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,52(%rsp)
+ movl 56(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 0(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r12,1),%r12d
+ xorl 24(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 44(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,56(%rsp)
+ movl 60(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r11,1),%r11d
+ xorl 28(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 48(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,60(%rsp)
+ movl 0(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%rdi,1),%edi
+ xorl 32(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 52(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,0(%rsp)
+ movl 4(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rsi,1),%esi
+ xorl 36(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 56(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,4(%rsp)
+ movl 8(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 16(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r13,1),%r13d
+ xorl 40(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 60(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,8(%rsp)
+ movl 12(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r12,1),%r12d
+ xorl 44(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 0(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl %ebp,12(%rsp)
+ movl 16(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 24(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r11,1),%r11d
+ xorl 48(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 4(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl %edx,16(%rsp)
+ movl 20(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rdi,1),%edi
+ xorl 52(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 8(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %ebp,20(%rsp)
+ movl 24(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 32(%rsp),%edx
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%rsi,1),%esi
+ xorl 56(%rsp),%edx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 12(%rsp),%edx
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%edx
+ movl %edx,24(%rsp)
+ movl 28(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r13,1),%r13d
+ xorl 60(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 16(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,28(%rsp)
+ movl 32(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 40(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r12,1),%r12d
+ xorl 0(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 20(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,32(%rsp)
+ movl 36(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 44(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r11,1),%r11d
+ xorl 4(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 24(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,36(%rsp)
+ movl 40(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 48(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%rdi,1),%edi
+ xorl 8(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 28(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,40(%rsp)
+ movl 44(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 52(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rsi,1),%esi
+ xorl 12(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 32(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,44(%rsp)
+ movl 48(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 56(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r13,1),%r13d
+ xorl 16(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 36(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,48(%rsp)
+ movl 52(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r12,1),%r12d
+ xorl 20(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 40(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl 56(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 0(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r11,1),%r11d
+ xorl 24(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 44(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl 60(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rdi,1),%edi
+ xorl 28(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 48(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl %r11d,%eax
+ leal -899497514(%rbp,%rsi,1),%esi
+ roll $5,%ecx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ roll $30,%r11d
+ addl %eax,%esi
+ addl 0(%r8),%esi
+ addl 4(%r8),%edi
+ addl 8(%r8),%r11d
+ addl 12(%r8),%r12d
+ addl 16(%r8),%r13d
+ movl %esi,0(%r8)
+ movl %edi,4(%r8)
+ movl %r11d,8(%r8)
+ movl %r12d,12(%r8)
+ movl %r13d,16(%r8)
+
+ subq $1,%r10
+ leaq 64(%r9),%r9
+ jnz .Lloop
+
+ movq 64(%rsp),%rsi
+ movq (%rsi),%r13
+ movq 8(%rsi),%r12
+ movq 16(%rsi),%rbp
+ movq 24(%rsi),%rbx
+ leaq 32(%rsi),%rsp
+.Lepilogue:
+ retq
+.size sha1_block_data_order,.-sha1_block_data_order
+.type sha1_block_data_order_ssse3,@function
+.align 16
+sha1_block_data_order_ssse3:
+_ssse3_shortcut:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ leaq -64(%rsp),%rsp
+ movq %rdi,%r8
+ movq %rsi,%r9
+ movq %rdx,%r10
+
+ shlq $6,%r10
+ addq %r9,%r10
+ leaq K_XX_XX(%rip),%r11
+
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movl %ebx,%esi
+ movl 16(%r8),%ebp
+
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r9),%xmm0
+ movdqu 16(%r9),%xmm1
+ movdqu 32(%r9),%xmm2
+ movdqu 48(%r9),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r9
+.byte 102,15,56,0,206
+.byte 102,15,56,0,214
+.byte 102,15,56,0,222
+ paddd %xmm9,%xmm0
+ paddd %xmm9,%xmm1
+ paddd %xmm9,%xmm2
+ movdqa %xmm0,0(%rsp)
+ psubd %xmm9,%xmm0
+ movdqa %xmm1,16(%rsp)
+ psubd %xmm9,%xmm1
+ movdqa %xmm2,32(%rsp)
+ psubd %xmm9,%xmm2
+ jmp .Loop_ssse3
+.align 16
+.Loop_ssse3:
+ movdqa %xmm1,%xmm4
+ addl 0(%rsp),%ebp
+ xorl %edx,%ecx
+ movdqa %xmm3,%xmm8
+.byte 102,15,58,15,224,8
+ movl %eax,%edi
+ roll $5,%eax
+ paddd %xmm3,%xmm9
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrldq $4,%xmm8
+ xorl %edx,%esi
+ addl %eax,%ebp
+ pxor %xmm0,%xmm4
+ rorl $2,%ebx
+ addl %esi,%ebp
+ pxor %xmm2,%xmm8
+ addl 4(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pxor %xmm8,%xmm4
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm9,48(%rsp)
+ xorl %ecx,%edi
+ addl %ebp,%edx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm4,%xmm8
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 8(%rsp),%ecx
+ xorl %ebx,%eax
+ pslldq $12,%xmm10
+ paddd %xmm4,%xmm4
+ movl %edx,%edi
+ roll $5,%edx
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrld $31,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ movdqa %xmm10,%xmm9
+ rorl $7,%ebp
+ addl %esi,%ecx
+ psrld $30,%xmm10
+ por %xmm8,%xmm4
+ addl 12(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm4
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa 0(%r11),%xmm10
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ pxor %xmm9,%xmm4
+ rorl $7,%edx
+ addl %edi,%ebx
+ movdqa %xmm2,%xmm5
+ addl 16(%rsp),%eax
+ xorl %ebp,%edx
+ movdqa %xmm4,%xmm9
+.byte 102,15,58,15,233,8
+ movl %ebx,%edi
+ roll $5,%ebx
+ paddd %xmm4,%xmm10
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrldq $4,%xmm9
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ pxor %xmm1,%xmm5
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm3,%xmm9
+ addl 20(%rsp),%ebp
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pxor %xmm9,%xmm5
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa %xmm10,0(%rsp)
+ xorl %edx,%edi
+ addl %eax,%ebp
+ movdqa %xmm5,%xmm8
+ movdqa %xmm5,%xmm9
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 24(%rsp),%edx
+ xorl %ecx,%ebx
+ pslldq $12,%xmm8
+ paddd %xmm5,%xmm5
+ movl %ebp,%edi
+ roll $5,%ebp
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ psrld $31,%xmm9
+ xorl %ecx,%esi
+ addl %ebp,%edx
+ movdqa %xmm8,%xmm10
+ rorl $7,%eax
+ addl %esi,%edx
+ psrld $30,%xmm8
+ por %xmm9,%xmm5
+ addl 28(%rsp),%ecx
+ xorl %ebx,%eax
+ movl %edx,%esi
+ roll $5,%edx
+ pslld $2,%xmm10
+ pxor %xmm8,%xmm5
+ andl %eax,%edi
+ xorl %ebx,%eax
+ movdqa 16(%r11),%xmm8
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ pxor %xmm10,%xmm5
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movdqa %xmm3,%xmm6
+ addl 32(%rsp),%ebx
+ xorl %eax,%ebp
+ movdqa %xmm5,%xmm10
+.byte 102,15,58,15,242,8
+ movl %ecx,%edi
+ roll $5,%ecx
+ paddd %xmm5,%xmm8
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ psrldq $4,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ pxor %xmm2,%xmm6
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm4,%xmm10
+ addl 36(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ pxor %xmm10,%xmm6
+ andl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm8,16(%rsp)
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ movdqa %xmm6,%xmm9
+ movdqa %xmm6,%xmm10
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 40(%rsp),%ebp
+ xorl %edx,%ecx
+ pslldq $12,%xmm9
+ paddd %xmm6,%xmm6
+ movl %eax,%edi
+ roll $5,%eax
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrld $31,%xmm10
+ xorl %edx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ psrld $30,%xmm9
+ por %xmm10,%xmm6
+ addl 44(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pslld $2,%xmm8
+ pxor %xmm9,%xmm6
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa 16(%r11),%xmm9
+ xorl %ecx,%edi
+ addl %ebp,%edx
+ pxor %xmm8,%xmm6
+ rorl $7,%eax
+ addl %edi,%edx
+ movdqa %xmm4,%xmm7
+ addl 48(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm8
+.byte 102,15,58,15,251,8
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm6,%xmm9
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrldq $4,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ pxor %xmm3,%xmm7
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm5,%xmm8
+ addl 52(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ pxor %xmm8,%xmm7
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm9,32(%rsp)
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ movdqa %xmm7,%xmm10
+ movdqa %xmm7,%xmm8
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 56(%rsp),%eax
+ xorl %ebp,%edx
+ pslldq $12,%xmm10
+ paddd %xmm7,%xmm7
+ movl %ebx,%edi
+ roll $5,%ebx
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrld $31,%xmm8
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ movdqa %xmm10,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ psrld $30,%xmm10
+ por %xmm8,%xmm7
+ addl 60(%rsp),%ebp
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm7
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa 16(%r11),%xmm10
+ xorl %edx,%edi
+ addl %eax,%ebp
+ pxor %xmm9,%xmm7
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movdqa %xmm7,%xmm9
+ addl 0(%rsp),%edx
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,206,8
+ xorl %ecx,%ebx
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm1,%xmm0
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm7,%xmm10
+ xorl %ecx,%esi
+ addl %ebp,%edx
+ pxor %xmm9,%xmm0
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 4(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm0,%xmm9
+ movdqa %xmm10,48(%rsp)
+ movl %edx,%esi
+ roll $5,%edx
+ andl %eax,%edi
+ xorl %ebx,%eax
+ pslld $2,%xmm0
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ psrld $30,%xmm9
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 8(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%edi
+ roll $5,%ecx
+ por %xmm9,%xmm0
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ movdqa %xmm0,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 12(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ andl %edx,%edi
+ xorl %ebp,%edx
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 16(%rsp),%ebp
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,215,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm2,%xmm1
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm8,%xmm9
+ paddd %xmm0,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm10,%xmm1
+ addl 20(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm1,%xmm10
+ movdqa %xmm8,0(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm1
+ addl 24(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm10
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm10,%xmm1
+ addl 28(%rsp),%ebx
+ xorl %eax,%edi
+ movdqa %xmm1,%xmm8
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 32(%rsp),%eax
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,192,8
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ pxor %xmm3,%xmm2
+ xorl %edx,%esi
+ addl %ebx,%eax
+ movdqa 32(%r11),%xmm10
+ paddd %xmm1,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm8,%xmm2
+ addl 36(%rsp),%ebp
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ movdqa %xmm2,%xmm8
+ movdqa %xmm9,16(%rsp)
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ pslld $2,%xmm2
+ addl 40(%rsp),%edx
+ xorl %ecx,%esi
+ psrld $30,%xmm8
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ por %xmm8,%xmm2
+ addl 44(%rsp),%ecx
+ xorl %ebx,%edi
+ movdqa %xmm2,%xmm9
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 48(%rsp),%ebx
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,201,8
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ pxor %xmm4,%xmm3
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm2,%xmm10
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm9,%xmm3
+ addl 52(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ movdqa %xmm3,%xmm9
+ movdqa %xmm10,32(%rsp)
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ pslld $2,%xmm3
+ addl 56(%rsp),%ebp
+ xorl %edx,%esi
+ psrld $30,%xmm9
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ por %xmm9,%xmm3
+ addl 60(%rsp),%edx
+ xorl %ecx,%edi
+ movdqa %xmm3,%xmm10
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 0(%rsp),%ecx
+ pxor %xmm0,%xmm4
+.byte 102,68,15,58,15,210,8
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ pxor %xmm5,%xmm4
+ xorl %eax,%esi
+ addl %edx,%ecx
+ movdqa %xmm8,%xmm9
+ paddd %xmm3,%xmm8
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm10,%xmm4
+ addl 4(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm8,48(%rsp)
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ pslld $2,%xmm4
+ addl 8(%rsp),%eax
+ xorl %ebp,%esi
+ psrld $30,%xmm10
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ por %xmm10,%xmm4
+ addl 12(%rsp),%ebp
+ xorl %edx,%edi
+ movdqa %xmm4,%xmm8
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 16(%rsp),%edx
+ pxor %xmm1,%xmm5
+.byte 102,68,15,58,15,195,8
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm6,%xmm5
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm4,%xmm9
+ rorl $7,%eax
+ addl %esi,%edx
+ pxor %xmm8,%xmm5
+ addl 20(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ movdqa %xmm5,%xmm8
+ movdqa %xmm9,0(%rsp)
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ pslld $2,%xmm5
+ addl 24(%rsp),%ebx
+ xorl %eax,%esi
+ psrld $30,%xmm8
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ por %xmm8,%xmm5
+ addl 28(%rsp),%eax
+ xorl %ebp,%edi
+ movdqa %xmm5,%xmm9
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ecx,%edi
+ pxor %xmm2,%xmm6
+.byte 102,68,15,58,15,204,8
+ xorl %edx,%ecx
+ addl 32(%rsp),%ebp
+ andl %edx,%edi
+ pxor %xmm7,%xmm6
+ andl %ecx,%esi
+ rorl $7,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm5,%xmm10
+ addl %edi,%ebp
+ movl %eax,%edi
+ pxor %xmm9,%xmm6
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movdqa %xmm6,%xmm9
+ movdqa %xmm10,16(%rsp)
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 36(%rsp),%edx
+ andl %ecx,%esi
+ pslld $2,%xmm6
+ andl %ebx,%edi
+ rorl $7,%eax
+ psrld $30,%xmm9
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ por %xmm9,%xmm6
+ movl %eax,%edi
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm10
+ addl 40(%rsp),%ecx
+ andl %ebx,%edi
+ andl %eax,%esi
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 44(%rsp),%ebx
+ andl %eax,%esi
+ andl %ebp,%edi
+ rorl $7,%edx
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%edi
+ pxor %xmm3,%xmm7
+.byte 102,68,15,58,15,213,8
+ xorl %ebp,%edx
+ addl 48(%rsp),%eax
+ andl %ebp,%edi
+ pxor %xmm0,%xmm7
+ andl %edx,%esi
+ rorl $7,%ecx
+ movdqa 48(%r11),%xmm9
+ paddd %xmm6,%xmm8
+ addl %edi,%eax
+ movl %ebx,%edi
+ pxor %xmm10,%xmm7
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movdqa %xmm7,%xmm10
+ movdqa %xmm8,32(%rsp)
+ movl %ecx,%esi
+ xorl %edx,%ecx
+ addl 52(%rsp),%ebp
+ andl %edx,%esi
+ pslld $2,%xmm7
+ andl %ecx,%edi
+ rorl $7,%ebx
+ psrld $30,%xmm10
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ por %xmm10,%xmm7
+ movl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm7,%xmm8
+ addl 56(%rsp),%edx
+ andl %ecx,%edi
+ andl %ebx,%esi
+ rorl $7,%eax
+ addl %edi,%edx
+ movl %ebp,%edi
+ roll $5,%ebp
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 60(%rsp),%ecx
+ andl %ebx,%esi
+ andl %eax,%edi
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%edi
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,198,8
+ xorl %eax,%ebp
+ addl 0(%rsp),%ebx
+ andl %eax,%edi
+ pxor %xmm1,%xmm0
+ andl %ebp,%esi
+ rorl $7,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm7,%xmm9
+ addl %edi,%ebx
+ movl %ecx,%edi
+ pxor %xmm8,%xmm0
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movdqa %xmm0,%xmm8
+ movdqa %xmm9,48(%rsp)
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 4(%rsp),%eax
+ andl %ebp,%esi
+ pslld $2,%xmm0
+ andl %edx,%edi
+ rorl $7,%ecx
+ psrld $30,%xmm8
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ por %xmm8,%xmm0
+ movl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa %xmm0,%xmm9
+ addl 8(%rsp),%ebp
+ andl %edx,%edi
+ andl %ecx,%esi
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movl %eax,%edi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 12(%rsp),%edx
+ andl %ecx,%esi
+ andl %ebx,%edi
+ rorl $7,%eax
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%edi
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,207,8
+ xorl %ebx,%eax
+ addl 16(%rsp),%ecx
+ andl %ebx,%edi
+ pxor %xmm2,%xmm1
+ andl %eax,%esi
+ rorl $7,%ebp
+ movdqa %xmm10,%xmm8
+ paddd %xmm0,%xmm10
+ addl %edi,%ecx
+ movl %edx,%edi
+ pxor %xmm9,%xmm1
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movdqa %xmm1,%xmm9
+ movdqa %xmm10,0(%rsp)
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 20(%rsp),%ebx
+ andl %eax,%esi
+ pslld $2,%xmm1
+ andl %ebp,%edi
+ rorl $7,%edx
+ psrld $30,%xmm9
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ por %xmm9,%xmm1
+ movl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm1,%xmm10
+ addl 24(%rsp),%eax
+ andl %ebp,%edi
+ andl %edx,%esi
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movl %ecx,%esi
+ xorl %edx,%ecx
+ addl 28(%rsp),%ebp
+ andl %edx,%esi
+ andl %ecx,%edi
+ rorl $7,%ebx
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%edi
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,208,8
+ xorl %ecx,%ebx
+ addl 32(%rsp),%edx
+ andl %ecx,%edi
+ pxor %xmm3,%xmm2
+ andl %ebx,%esi
+ rorl $7,%eax
+ movdqa %xmm8,%xmm9
+ paddd %xmm1,%xmm8
+ addl %edi,%edx
+ movl %ebp,%edi
+ pxor %xmm10,%xmm2
+ roll $5,%ebp
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movdqa %xmm2,%xmm10
+ movdqa %xmm8,16(%rsp)
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 36(%rsp),%ecx
+ andl %ebx,%esi
+ pslld $2,%xmm2
+ andl %eax,%edi
+ rorl $7,%ebp
+ psrld $30,%xmm10
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ por %xmm10,%xmm2
+ movl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm2,%xmm8
+ addl 40(%rsp),%ebx
+ andl %eax,%edi
+ andl %ebp,%esi
+ rorl $7,%edx
+ addl %edi,%ebx
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 44(%rsp),%eax
+ andl %ebp,%esi
+ andl %edx,%edi
+ rorl $7,%ecx
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ addl 48(%rsp),%ebp
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,193,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm4,%xmm3
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm10
+ paddd %xmm2,%xmm9
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm8,%xmm3
+ addl 52(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm3,%xmm8
+ movdqa %xmm9,32(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm3
+ addl 56(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm8
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm8,%xmm3
+ addl 60(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 0(%rsp),%eax
+ paddd %xmm3,%xmm10
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ movdqa %xmm10,48(%rsp)
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 4(%rsp),%ebp
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 8(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 12(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ cmpq %r10,%r9
+ je .Ldone_ssse3
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r9),%xmm0
+ movdqu 16(%r9),%xmm1
+ movdqu 32(%r9),%xmm2
+ movdqu 48(%r9),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r9
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+.byte 102,15,56,0,206
+ movl %ecx,%edi
+ roll $5,%ecx
+ paddd %xmm9,%xmm0
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ movdqa %xmm0,0(%rsp)
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ psubd %xmm9,%xmm0
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+.byte 102,15,56,0,214
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm9,%xmm1
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movdqa %xmm1,16(%rsp)
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ psubd %xmm9,%xmm1
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+.byte 102,15,56,0,222
+ movl %ebp,%edi
+ roll $5,%ebp
+ paddd %xmm9,%xmm2
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ movdqa %xmm2,32(%rsp)
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ psubd %xmm9,%xmm2
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 0(%r8),%eax
+ addl 4(%r8),%esi
+ addl 8(%r8),%ecx
+ addl 12(%r8),%edx
+ movl %eax,0(%r8)
+ addl 16(%r8),%ebp
+ movl %esi,4(%r8)
+ movl %esi,%ebx
+ movl %ecx,8(%r8)
+ movl %edx,12(%r8)
+ movl %ebp,16(%r8)
+ jmp .Loop_ssse3
+
+.align 16
+.Ldone_ssse3:
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 0(%r8),%eax
+ addl 4(%r8),%esi
+ addl 8(%r8),%ecx
+ movl %eax,0(%r8)
+ addl 12(%r8),%edx
+ movl %esi,4(%r8)
+ addl 16(%r8),%ebp
+ movl %ecx,8(%r8)
+ movl %edx,12(%r8)
+ movl %ebp,16(%r8)
+ leaq 64(%rsp),%rsi
+ movq 0(%rsi),%r12
+ movq 8(%rsi),%rbp
+ movq 16(%rsi),%rbx
+ leaq 24(%rsi),%rsp
+.Lepilogue_ssse3:
+ retq
+.size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3
+.align 64
+K_XX_XX:
+.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/ext/libressl/crypto/sha/sha1-macosx-x86_64.S b/ext/libressl/crypto/sha/sha1-macosx-x86_64.S
new file mode 100644
index 0000000..04a8aff
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha1-macosx-x86_64.S
@@ -0,0 +1,2488 @@
+#include "x86_arch.h"
+.text
+
+.private_extern _OPENSSL_ia32cap_P
+
+.globl _sha1_block_data_order
+
+.p2align 4
+_sha1_block_data_order:
+ movl _OPENSSL_ia32cap_P+0(%rip),%r9d
+ movl _OPENSSL_ia32cap_P+4(%rip),%r8d
+ testl $IA32CAP_MASK1_SSSE3,%r8d
+ jz L$ialu
+ jmp _ssse3_shortcut
+
+.p2align 4
+L$ialu:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ movq %rsp,%r11
+ movq %rdi,%r8
+ subq $72,%rsp
+ movq %rsi,%r9
+ andq $-64,%rsp
+ movq %rdx,%r10
+ movq %r11,64(%rsp)
+L$prologue:
+
+ movl 0(%r8),%esi
+ movl 4(%r8),%edi
+ movl 8(%r8),%r11d
+ movl 12(%r8),%r12d
+ movl 16(%r8),%r13d
+ jmp L$loop
+
+.p2align 4
+L$loop:
+ movl 0(%r9),%edx
+ bswapl %edx
+ movl %edx,0(%rsp)
+ movl %r11d,%eax
+ movl 4(%r9),%ebp
+ movl %esi,%ecx
+ xorl %r12d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r13,1),%r13d
+ andl %edi,%eax
+ movl %ebp,4(%rsp)
+ addl %ecx,%r13d
+ xorl %r12d,%eax
+ roll $30,%edi
+ addl %eax,%r13d
+ movl %edi,%eax
+ movl 8(%r9),%edx
+ movl %r13d,%ecx
+ xorl %r11d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r12,1),%r12d
+ andl %esi,%eax
+ movl %edx,8(%rsp)
+ addl %ecx,%r12d
+ xorl %r11d,%eax
+ roll $30,%esi
+ addl %eax,%r12d
+ movl %esi,%eax
+ movl 12(%r9),%ebp
+ movl %r12d,%ecx
+ xorl %edi,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r11,1),%r11d
+ andl %r13d,%eax
+ movl %ebp,12(%rsp)
+ addl %ecx,%r11d
+ xorl %edi,%eax
+ roll $30,%r13d
+ addl %eax,%r11d
+ movl %r13d,%eax
+ movl 16(%r9),%edx
+ movl %r11d,%ecx
+ xorl %esi,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%rdi,1),%edi
+ andl %r12d,%eax
+ movl %edx,16(%rsp)
+ addl %ecx,%edi
+ xorl %esi,%eax
+ roll $30,%r12d
+ addl %eax,%edi
+ movl %r12d,%eax
+ movl 20(%r9),%ebp
+ movl %edi,%ecx
+ xorl %r13d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%rsi,1),%esi
+ andl %r11d,%eax
+ movl %ebp,20(%rsp)
+ addl %ecx,%esi
+ xorl %r13d,%eax
+ roll $30,%r11d
+ addl %eax,%esi
+ movl %r11d,%eax
+ movl 24(%r9),%edx
+ movl %esi,%ecx
+ xorl %r12d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r13,1),%r13d
+ andl %edi,%eax
+ movl %edx,24(%rsp)
+ addl %ecx,%r13d
+ xorl %r12d,%eax
+ roll $30,%edi
+ addl %eax,%r13d
+ movl %edi,%eax
+ movl 28(%r9),%ebp
+ movl %r13d,%ecx
+ xorl %r11d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r12,1),%r12d
+ andl %esi,%eax
+ movl %ebp,28(%rsp)
+ addl %ecx,%r12d
+ xorl %r11d,%eax
+ roll $30,%esi
+ addl %eax,%r12d
+ movl %esi,%eax
+ movl 32(%r9),%edx
+ movl %r12d,%ecx
+ xorl %edi,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r11,1),%r11d
+ andl %r13d,%eax
+ movl %edx,32(%rsp)
+ addl %ecx,%r11d
+ xorl %edi,%eax
+ roll $30,%r13d
+ addl %eax,%r11d
+ movl %r13d,%eax
+ movl 36(%r9),%ebp
+ movl %r11d,%ecx
+ xorl %esi,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%rdi,1),%edi
+ andl %r12d,%eax
+ movl %ebp,36(%rsp)
+ addl %ecx,%edi
+ xorl %esi,%eax
+ roll $30,%r12d
+ addl %eax,%edi
+ movl %r12d,%eax
+ movl 40(%r9),%edx
+ movl %edi,%ecx
+ xorl %r13d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%rsi,1),%esi
+ andl %r11d,%eax
+ movl %edx,40(%rsp)
+ addl %ecx,%esi
+ xorl %r13d,%eax
+ roll $30,%r11d
+ addl %eax,%esi
+ movl %r11d,%eax
+ movl 44(%r9),%ebp
+ movl %esi,%ecx
+ xorl %r12d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r13,1),%r13d
+ andl %edi,%eax
+ movl %ebp,44(%rsp)
+ addl %ecx,%r13d
+ xorl %r12d,%eax
+ roll $30,%edi
+ addl %eax,%r13d
+ movl %edi,%eax
+ movl 48(%r9),%edx
+ movl %r13d,%ecx
+ xorl %r11d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r12,1),%r12d
+ andl %esi,%eax
+ movl %edx,48(%rsp)
+ addl %ecx,%r12d
+ xorl %r11d,%eax
+ roll $30,%esi
+ addl %eax,%r12d
+ movl %esi,%eax
+ movl 52(%r9),%ebp
+ movl %r12d,%ecx
+ xorl %edi,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r11,1),%r11d
+ andl %r13d,%eax
+ movl %ebp,52(%rsp)
+ addl %ecx,%r11d
+ xorl %edi,%eax
+ roll $30,%r13d
+ addl %eax,%r11d
+ movl %r13d,%eax
+ movl 56(%r9),%edx
+ movl %r11d,%ecx
+ xorl %esi,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%rdi,1),%edi
+ andl %r12d,%eax
+ movl %edx,56(%rsp)
+ addl %ecx,%edi
+ xorl %esi,%eax
+ roll $30,%r12d
+ addl %eax,%edi
+ movl %r12d,%eax
+ movl 60(%r9),%ebp
+ movl %edi,%ecx
+ xorl %r13d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%rsi,1),%esi
+ andl %r11d,%eax
+ movl %ebp,60(%rsp)
+ addl %ecx,%esi
+ xorl %r13d,%eax
+ roll $30,%r11d
+ addl %eax,%esi
+ movl 0(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 8(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ xorl 32(%rsp),%edx
+ andl %edi,%eax
+ leal 1518500249(%rbp,%r13,1),%r13d
+ xorl 52(%rsp),%edx
+ xorl %r12d,%eax
+ roll $1,%edx
+ addl %ecx,%r13d
+ roll $30,%edi
+ movl %edx,0(%rsp)
+ addl %eax,%r13d
+ movl 4(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ xorl 36(%rsp),%ebp
+ andl %esi,%eax
+ leal 1518500249(%rdx,%r12,1),%r12d
+ xorl 56(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $1,%ebp
+ addl %ecx,%r12d
+ roll $30,%esi
+ movl %ebp,4(%rsp)
+ addl %eax,%r12d
+ movl 8(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 16(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ xorl 40(%rsp),%edx
+ andl %r13d,%eax
+ leal 1518500249(%rbp,%r11,1),%r11d
+ xorl 60(%rsp),%edx
+ xorl %edi,%eax
+ roll $1,%edx
+ addl %ecx,%r11d
+ roll $30,%r13d
+ movl %edx,8(%rsp)
+ addl %eax,%r11d
+ movl 12(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ xorl 44(%rsp),%ebp
+ andl %r12d,%eax
+ leal 1518500249(%rdx,%rdi,1),%edi
+ xorl 0(%rsp),%ebp
+ xorl %esi,%eax
+ roll $1,%ebp
+ addl %ecx,%edi
+ roll $30,%r12d
+ movl %ebp,12(%rsp)
+ addl %eax,%edi
+ movl 16(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 24(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ xorl 48(%rsp),%edx
+ andl %r11d,%eax
+ leal 1518500249(%rbp,%rsi,1),%esi
+ xorl 4(%rsp),%edx
+ xorl %r13d,%eax
+ roll $1,%edx
+ addl %ecx,%esi
+ roll $30,%r11d
+ movl %edx,16(%rsp)
+ addl %eax,%esi
+ movl 20(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r13,1),%r13d
+ xorl 52(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 8(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,20(%rsp)
+ movl 24(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 32(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r12,1),%r12d
+ xorl 56(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 12(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,24(%rsp)
+ movl 28(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r11,1),%r11d
+ xorl 60(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 16(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,28(%rsp)
+ movl 32(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 40(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rdi,1),%edi
+ xorl 0(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 20(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,32(%rsp)
+ movl 36(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 44(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rsi,1),%esi
+ xorl 4(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 24(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,36(%rsp)
+ movl 40(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 48(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r13,1),%r13d
+ xorl 8(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 28(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,40(%rsp)
+ movl 44(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 52(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r12,1),%r12d
+ xorl 12(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 32(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl %ebp,44(%rsp)
+ movl 48(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 56(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r11,1),%r11d
+ xorl 16(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 36(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl %edx,48(%rsp)
+ movl 52(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rdi,1),%edi
+ xorl 20(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 40(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %ebp,52(%rsp)
+ movl 56(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 0(%rsp),%edx
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rsi,1),%esi
+ xorl 24(%rsp),%edx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 44(%rsp),%edx
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%edx
+ movl %edx,56(%rsp)
+ movl 60(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r13,1),%r13d
+ xorl 28(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 48(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,60(%rsp)
+ movl 0(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r12,1),%r12d
+ xorl 32(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 52(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,0(%rsp)
+ movl 4(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r11,1),%r11d
+ xorl 36(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 56(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,4(%rsp)
+ movl 8(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 16(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rdi,1),%edi
+ xorl 40(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 60(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,8(%rsp)
+ movl 12(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rsi,1),%esi
+ xorl 44(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 0(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,12(%rsp)
+ movl 16(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 24(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r13,1),%r13d
+ xorl 48(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 4(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,16(%rsp)
+ movl 20(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r12,1),%r12d
+ xorl 52(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 8(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl %ebp,20(%rsp)
+ movl 24(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 32(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r11,1),%r11d
+ xorl 56(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 12(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl %edx,24(%rsp)
+ movl 28(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rdi,1),%edi
+ xorl 60(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 16(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %ebp,28(%rsp)
+ movl 32(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 40(%rsp),%edx
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rsi,1),%esi
+ xorl 0(%rsp),%edx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 20(%rsp),%edx
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%edx
+ movl %edx,32(%rsp)
+ movl 36(%rsp),%ebp
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 44(%rsp),%ebp
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r12d,%ebx
+ leal -1894007588(%rdx,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 24(%rsp),%ebp
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %ebp,36(%rsp)
+ addl %ecx,%r13d
+ movl 40(%rsp),%edx
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 48(%rsp),%edx
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %r11d,%ebx
+ leal -1894007588(%rbp,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 28(%rsp),%edx
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%edx
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %edx,40(%rsp)
+ addl %ecx,%r12d
+ movl 44(%rsp),%ebp
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 52(%rsp),%ebp
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %edi,%ebx
+ leal -1894007588(%rdx,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 32(%rsp),%ebp
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%ebp
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %ebp,44(%rsp)
+ addl %ecx,%r11d
+ movl 48(%rsp),%edx
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 56(%rsp),%edx
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 16(%rsp),%edx
+ xorl %esi,%ebx
+ leal -1894007588(%rbp,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 36(%rsp),%edx
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%edx
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %edx,48(%rsp)
+ addl %ecx,%edi
+ movl 52(%rsp),%ebp
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 60(%rsp),%ebp
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %r13d,%ebx
+ leal -1894007588(%rdx,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 40(%rsp),%ebp
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%ebp
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %ebp,52(%rsp)
+ addl %ecx,%esi
+ movl 56(%rsp),%edx
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 0(%rsp),%edx
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 24(%rsp),%edx
+ xorl %r12d,%ebx
+ leal -1894007588(%rbp,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 44(%rsp),%edx
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%edx
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %edx,56(%rsp)
+ addl %ecx,%r13d
+ movl 60(%rsp),%ebp
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 4(%rsp),%ebp
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %r11d,%ebx
+ leal -1894007588(%rdx,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 48(%rsp),%ebp
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %ebp,60(%rsp)
+ addl %ecx,%r12d
+ movl 0(%rsp),%edx
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 8(%rsp),%edx
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 32(%rsp),%edx
+ xorl %edi,%ebx
+ leal -1894007588(%rbp,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 52(%rsp),%edx
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%edx
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %edx,0(%rsp)
+ addl %ecx,%r11d
+ movl 4(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 12(%rsp),%ebp
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %esi,%ebx
+ leal -1894007588(%rdx,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 56(%rsp),%ebp
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%ebp
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %ebp,4(%rsp)
+ addl %ecx,%edi
+ movl 8(%rsp),%edx
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 16(%rsp),%edx
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 40(%rsp),%edx
+ xorl %r13d,%ebx
+ leal -1894007588(%rbp,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 60(%rsp),%edx
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%edx
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %edx,8(%rsp)
+ addl %ecx,%esi
+ movl 12(%rsp),%ebp
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 20(%rsp),%ebp
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 44(%rsp),%ebp
+ xorl %r12d,%ebx
+ leal -1894007588(%rdx,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 0(%rsp),%ebp
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %ebp,12(%rsp)
+ addl %ecx,%r13d
+ movl 16(%rsp),%edx
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 24(%rsp),%edx
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 48(%rsp),%edx
+ xorl %r11d,%ebx
+ leal -1894007588(%rbp,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 4(%rsp),%edx
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%edx
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %edx,16(%rsp)
+ addl %ecx,%r12d
+ movl 20(%rsp),%ebp
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 28(%rsp),%ebp
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 52(%rsp),%ebp
+ xorl %edi,%ebx
+ leal -1894007588(%rdx,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 8(%rsp),%ebp
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%ebp
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %ebp,20(%rsp)
+ addl %ecx,%r11d
+ movl 24(%rsp),%edx
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 32(%rsp),%edx
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 56(%rsp),%edx
+ xorl %esi,%ebx
+ leal -1894007588(%rbp,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 12(%rsp),%edx
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%edx
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %edx,24(%rsp)
+ addl %ecx,%edi
+ movl 28(%rsp),%ebp
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 36(%rsp),%ebp
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %r13d,%ebx
+ leal -1894007588(%rdx,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 16(%rsp),%ebp
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%ebp
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %ebp,28(%rsp)
+ addl %ecx,%esi
+ movl 32(%rsp),%edx
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 40(%rsp),%edx
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 0(%rsp),%edx
+ xorl %r12d,%ebx
+ leal -1894007588(%rbp,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 20(%rsp),%edx
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%edx
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %edx,32(%rsp)
+ addl %ecx,%r13d
+ movl 36(%rsp),%ebp
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 44(%rsp),%ebp
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r11d,%ebx
+ leal -1894007588(%rdx,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 24(%rsp),%ebp
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %ebp,36(%rsp)
+ addl %ecx,%r12d
+ movl 40(%rsp),%edx
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 48(%rsp),%edx
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %edi,%ebx
+ leal -1894007588(%rbp,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 28(%rsp),%edx
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%edx
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %edx,40(%rsp)
+ addl %ecx,%r11d
+ movl 44(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 52(%rsp),%ebp
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %esi,%ebx
+ leal -1894007588(%rdx,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 32(%rsp),%ebp
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%ebp
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %ebp,44(%rsp)
+ addl %ecx,%edi
+ movl 48(%rsp),%edx
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 56(%rsp),%edx
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 16(%rsp),%edx
+ xorl %r13d,%ebx
+ leal -1894007588(%rbp,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 36(%rsp),%edx
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%edx
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %edx,48(%rsp)
+ addl %ecx,%esi
+ movl 52(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r13,1),%r13d
+ xorl 20(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 40(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,52(%rsp)
+ movl 56(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 0(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r12,1),%r12d
+ xorl 24(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 44(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,56(%rsp)
+ movl 60(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r11,1),%r11d
+ xorl 28(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 48(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,60(%rsp)
+ movl 0(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%rdi,1),%edi
+ xorl 32(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 52(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,0(%rsp)
+ movl 4(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rsi,1),%esi
+ xorl 36(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 56(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,4(%rsp)
+ movl 8(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 16(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r13,1),%r13d
+ xorl 40(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 60(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,8(%rsp)
+ movl 12(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r12,1),%r12d
+ xorl 44(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 0(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl %ebp,12(%rsp)
+ movl 16(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 24(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r11,1),%r11d
+ xorl 48(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 4(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl %edx,16(%rsp)
+ movl 20(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rdi,1),%edi
+ xorl 52(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 8(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %ebp,20(%rsp)
+ movl 24(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 32(%rsp),%edx
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%rsi,1),%esi
+ xorl 56(%rsp),%edx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 12(%rsp),%edx
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%edx
+ movl %edx,24(%rsp)
+ movl 28(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r13,1),%r13d
+ xorl 60(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 16(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,28(%rsp)
+ movl 32(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 40(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r12,1),%r12d
+ xorl 0(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 20(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,32(%rsp)
+ movl 36(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 44(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r11,1),%r11d
+ xorl 4(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 24(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,36(%rsp)
+ movl 40(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 48(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%rdi,1),%edi
+ xorl 8(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 28(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,40(%rsp)
+ movl 44(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 52(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rsi,1),%esi
+ xorl 12(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 32(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,44(%rsp)
+ movl 48(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 56(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r13,1),%r13d
+ xorl 16(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 36(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,48(%rsp)
+ movl 52(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r12,1),%r12d
+ xorl 20(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 40(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl 56(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 0(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r11,1),%r11d
+ xorl 24(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 44(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl 60(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rdi,1),%edi
+ xorl 28(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 48(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl %r11d,%eax
+ leal -899497514(%rbp,%rsi,1),%esi
+ roll $5,%ecx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ roll $30,%r11d
+ addl %eax,%esi
+ addl 0(%r8),%esi
+ addl 4(%r8),%edi
+ addl 8(%r8),%r11d
+ addl 12(%r8),%r12d
+ addl 16(%r8),%r13d
+ movl %esi,0(%r8)
+ movl %edi,4(%r8)
+ movl %r11d,8(%r8)
+ movl %r12d,12(%r8)
+ movl %r13d,16(%r8)
+
+ subq $1,%r10
+ leaq 64(%r9),%r9
+ jnz L$loop
+
+ movq 64(%rsp),%rsi
+ movq (%rsi),%r13
+ movq 8(%rsi),%r12
+ movq 16(%rsi),%rbp
+ movq 24(%rsi),%rbx
+ leaq 32(%rsi),%rsp
+L$epilogue:
+ retq
+
+
+.p2align 4
+sha1_block_data_order_ssse3:
+_ssse3_shortcut:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ leaq -64(%rsp),%rsp
+ movq %rdi,%r8
+ movq %rsi,%r9
+ movq %rdx,%r10
+
+ shlq $6,%r10
+ addq %r9,%r10
+ leaq K_XX_XX(%rip),%r11
+
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movl %ebx,%esi
+ movl 16(%r8),%ebp
+
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r9),%xmm0
+ movdqu 16(%r9),%xmm1
+ movdqu 32(%r9),%xmm2
+ movdqu 48(%r9),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r9
+.byte 102,15,56,0,206
+.byte 102,15,56,0,214
+.byte 102,15,56,0,222
+ paddd %xmm9,%xmm0
+ paddd %xmm9,%xmm1
+ paddd %xmm9,%xmm2
+ movdqa %xmm0,0(%rsp)
+ psubd %xmm9,%xmm0
+ movdqa %xmm1,16(%rsp)
+ psubd %xmm9,%xmm1
+ movdqa %xmm2,32(%rsp)
+ psubd %xmm9,%xmm2
+ jmp L$oop_ssse3
+.p2align 4
+L$oop_ssse3:
+ movdqa %xmm1,%xmm4
+ addl 0(%rsp),%ebp
+ xorl %edx,%ecx
+ movdqa %xmm3,%xmm8
+.byte 102,15,58,15,224,8
+ movl %eax,%edi
+ roll $5,%eax
+ paddd %xmm3,%xmm9
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrldq $4,%xmm8
+ xorl %edx,%esi
+ addl %eax,%ebp
+ pxor %xmm0,%xmm4
+ rorl $2,%ebx
+ addl %esi,%ebp
+ pxor %xmm2,%xmm8
+ addl 4(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pxor %xmm8,%xmm4
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm9,48(%rsp)
+ xorl %ecx,%edi
+ addl %ebp,%edx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm4,%xmm8
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 8(%rsp),%ecx
+ xorl %ebx,%eax
+ pslldq $12,%xmm10
+ paddd %xmm4,%xmm4
+ movl %edx,%edi
+ roll $5,%edx
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrld $31,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ movdqa %xmm10,%xmm9
+ rorl $7,%ebp
+ addl %esi,%ecx
+ psrld $30,%xmm10
+ por %xmm8,%xmm4
+ addl 12(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm4
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa 0(%r11),%xmm10
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ pxor %xmm9,%xmm4
+ rorl $7,%edx
+ addl %edi,%ebx
+ movdqa %xmm2,%xmm5
+ addl 16(%rsp),%eax
+ xorl %ebp,%edx
+ movdqa %xmm4,%xmm9
+.byte 102,15,58,15,233,8
+ movl %ebx,%edi
+ roll $5,%ebx
+ paddd %xmm4,%xmm10
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrldq $4,%xmm9
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ pxor %xmm1,%xmm5
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm3,%xmm9
+ addl 20(%rsp),%ebp
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pxor %xmm9,%xmm5
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa %xmm10,0(%rsp)
+ xorl %edx,%edi
+ addl %eax,%ebp
+ movdqa %xmm5,%xmm8
+ movdqa %xmm5,%xmm9
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 24(%rsp),%edx
+ xorl %ecx,%ebx
+ pslldq $12,%xmm8
+ paddd %xmm5,%xmm5
+ movl %ebp,%edi
+ roll $5,%ebp
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ psrld $31,%xmm9
+ xorl %ecx,%esi
+ addl %ebp,%edx
+ movdqa %xmm8,%xmm10
+ rorl $7,%eax
+ addl %esi,%edx
+ psrld $30,%xmm8
+ por %xmm9,%xmm5
+ addl 28(%rsp),%ecx
+ xorl %ebx,%eax
+ movl %edx,%esi
+ roll $5,%edx
+ pslld $2,%xmm10
+ pxor %xmm8,%xmm5
+ andl %eax,%edi
+ xorl %ebx,%eax
+ movdqa 16(%r11),%xmm8
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ pxor %xmm10,%xmm5
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movdqa %xmm3,%xmm6
+ addl 32(%rsp),%ebx
+ xorl %eax,%ebp
+ movdqa %xmm5,%xmm10
+.byte 102,15,58,15,242,8
+ movl %ecx,%edi
+ roll $5,%ecx
+ paddd %xmm5,%xmm8
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ psrldq $4,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ pxor %xmm2,%xmm6
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm4,%xmm10
+ addl 36(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ pxor %xmm10,%xmm6
+ andl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm8,16(%rsp)
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ movdqa %xmm6,%xmm9
+ movdqa %xmm6,%xmm10
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 40(%rsp),%ebp
+ xorl %edx,%ecx
+ pslldq $12,%xmm9
+ paddd %xmm6,%xmm6
+ movl %eax,%edi
+ roll $5,%eax
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrld $31,%xmm10
+ xorl %edx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ psrld $30,%xmm9
+ por %xmm10,%xmm6
+ addl 44(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pslld $2,%xmm8
+ pxor %xmm9,%xmm6
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa 16(%r11),%xmm9
+ xorl %ecx,%edi
+ addl %ebp,%edx
+ pxor %xmm8,%xmm6
+ rorl $7,%eax
+ addl %edi,%edx
+ movdqa %xmm4,%xmm7
+ addl 48(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm8
+.byte 102,15,58,15,251,8
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm6,%xmm9
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrldq $4,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ pxor %xmm3,%xmm7
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm5,%xmm8
+ addl 52(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ pxor %xmm8,%xmm7
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm9,32(%rsp)
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ movdqa %xmm7,%xmm10
+ movdqa %xmm7,%xmm8
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 56(%rsp),%eax
+ xorl %ebp,%edx
+ pslldq $12,%xmm10
+ paddd %xmm7,%xmm7
+ movl %ebx,%edi
+ roll $5,%ebx
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrld $31,%xmm8
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ movdqa %xmm10,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ psrld $30,%xmm10
+ por %xmm8,%xmm7
+ addl 60(%rsp),%ebp
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm7
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa 16(%r11),%xmm10
+ xorl %edx,%edi
+ addl %eax,%ebp
+ pxor %xmm9,%xmm7
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movdqa %xmm7,%xmm9
+ addl 0(%rsp),%edx
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,206,8
+ xorl %ecx,%ebx
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm1,%xmm0
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm7,%xmm10
+ xorl %ecx,%esi
+ addl %ebp,%edx
+ pxor %xmm9,%xmm0
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 4(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm0,%xmm9
+ movdqa %xmm10,48(%rsp)
+ movl %edx,%esi
+ roll $5,%edx
+ andl %eax,%edi
+ xorl %ebx,%eax
+ pslld $2,%xmm0
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ psrld $30,%xmm9
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 8(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%edi
+ roll $5,%ecx
+ por %xmm9,%xmm0
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ movdqa %xmm0,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 12(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ andl %edx,%edi
+ xorl %ebp,%edx
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 16(%rsp),%ebp
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,215,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm2,%xmm1
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm8,%xmm9
+ paddd %xmm0,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm10,%xmm1
+ addl 20(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm1,%xmm10
+ movdqa %xmm8,0(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm1
+ addl 24(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm10
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm10,%xmm1
+ addl 28(%rsp),%ebx
+ xorl %eax,%edi
+ movdqa %xmm1,%xmm8
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 32(%rsp),%eax
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,192,8
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ pxor %xmm3,%xmm2
+ xorl %edx,%esi
+ addl %ebx,%eax
+ movdqa 32(%r11),%xmm10
+ paddd %xmm1,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm8,%xmm2
+ addl 36(%rsp),%ebp
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ movdqa %xmm2,%xmm8
+ movdqa %xmm9,16(%rsp)
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ pslld $2,%xmm2
+ addl 40(%rsp),%edx
+ xorl %ecx,%esi
+ psrld $30,%xmm8
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ por %xmm8,%xmm2
+ addl 44(%rsp),%ecx
+ xorl %ebx,%edi
+ movdqa %xmm2,%xmm9
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 48(%rsp),%ebx
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,201,8
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ pxor %xmm4,%xmm3
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm2,%xmm10
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm9,%xmm3
+ addl 52(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ movdqa %xmm3,%xmm9
+ movdqa %xmm10,32(%rsp)
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ pslld $2,%xmm3
+ addl 56(%rsp),%ebp
+ xorl %edx,%esi
+ psrld $30,%xmm9
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ por %xmm9,%xmm3
+ addl 60(%rsp),%edx
+ xorl %ecx,%edi
+ movdqa %xmm3,%xmm10
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 0(%rsp),%ecx
+ pxor %xmm0,%xmm4
+.byte 102,68,15,58,15,210,8
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ pxor %xmm5,%xmm4
+ xorl %eax,%esi
+ addl %edx,%ecx
+ movdqa %xmm8,%xmm9
+ paddd %xmm3,%xmm8
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm10,%xmm4
+ addl 4(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm8,48(%rsp)
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ pslld $2,%xmm4
+ addl 8(%rsp),%eax
+ xorl %ebp,%esi
+ psrld $30,%xmm10
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ por %xmm10,%xmm4
+ addl 12(%rsp),%ebp
+ xorl %edx,%edi
+ movdqa %xmm4,%xmm8
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 16(%rsp),%edx
+ pxor %xmm1,%xmm5
+.byte 102,68,15,58,15,195,8
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm6,%xmm5
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm4,%xmm9
+ rorl $7,%eax
+ addl %esi,%edx
+ pxor %xmm8,%xmm5
+ addl 20(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ movdqa %xmm5,%xmm8
+ movdqa %xmm9,0(%rsp)
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ pslld $2,%xmm5
+ addl 24(%rsp),%ebx
+ xorl %eax,%esi
+ psrld $30,%xmm8
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ por %xmm8,%xmm5
+ addl 28(%rsp),%eax
+ xorl %ebp,%edi
+ movdqa %xmm5,%xmm9
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ecx,%edi
+ pxor %xmm2,%xmm6
+.byte 102,68,15,58,15,204,8
+ xorl %edx,%ecx
+ addl 32(%rsp),%ebp
+ andl %edx,%edi
+ pxor %xmm7,%xmm6
+ andl %ecx,%esi
+ rorl $7,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm5,%xmm10
+ addl %edi,%ebp
+ movl %eax,%edi
+ pxor %xmm9,%xmm6
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movdqa %xmm6,%xmm9
+ movdqa %xmm10,16(%rsp)
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 36(%rsp),%edx
+ andl %ecx,%esi
+ pslld $2,%xmm6
+ andl %ebx,%edi
+ rorl $7,%eax
+ psrld $30,%xmm9
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ por %xmm9,%xmm6
+ movl %eax,%edi
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm10
+ addl 40(%rsp),%ecx
+ andl %ebx,%edi
+ andl %eax,%esi
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 44(%rsp),%ebx
+ andl %eax,%esi
+ andl %ebp,%edi
+ rorl $7,%edx
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%edi
+ pxor %xmm3,%xmm7
+.byte 102,68,15,58,15,213,8
+ xorl %ebp,%edx
+ addl 48(%rsp),%eax
+ andl %ebp,%edi
+ pxor %xmm0,%xmm7
+ andl %edx,%esi
+ rorl $7,%ecx
+ movdqa 48(%r11),%xmm9
+ paddd %xmm6,%xmm8
+ addl %edi,%eax
+ movl %ebx,%edi
+ pxor %xmm10,%xmm7
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movdqa %xmm7,%xmm10
+ movdqa %xmm8,32(%rsp)
+ movl %ecx,%esi
+ xorl %edx,%ecx
+ addl 52(%rsp),%ebp
+ andl %edx,%esi
+ pslld $2,%xmm7
+ andl %ecx,%edi
+ rorl $7,%ebx
+ psrld $30,%xmm10
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ por %xmm10,%xmm7
+ movl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm7,%xmm8
+ addl 56(%rsp),%edx
+ andl %ecx,%edi
+ andl %ebx,%esi
+ rorl $7,%eax
+ addl %edi,%edx
+ movl %ebp,%edi
+ roll $5,%ebp
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 60(%rsp),%ecx
+ andl %ebx,%esi
+ andl %eax,%edi
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%edi
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,198,8
+ xorl %eax,%ebp
+ addl 0(%rsp),%ebx
+ andl %eax,%edi
+ pxor %xmm1,%xmm0
+ andl %ebp,%esi
+ rorl $7,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm7,%xmm9
+ addl %edi,%ebx
+ movl %ecx,%edi
+ pxor %xmm8,%xmm0
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movdqa %xmm0,%xmm8
+ movdqa %xmm9,48(%rsp)
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 4(%rsp),%eax
+ andl %ebp,%esi
+ pslld $2,%xmm0
+ andl %edx,%edi
+ rorl $7,%ecx
+ psrld $30,%xmm8
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ por %xmm8,%xmm0
+ movl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa %xmm0,%xmm9
+ addl 8(%rsp),%ebp
+ andl %edx,%edi
+ andl %ecx,%esi
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movl %eax,%edi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 12(%rsp),%edx
+ andl %ecx,%esi
+ andl %ebx,%edi
+ rorl $7,%eax
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%edi
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,207,8
+ xorl %ebx,%eax
+ addl 16(%rsp),%ecx
+ andl %ebx,%edi
+ pxor %xmm2,%xmm1
+ andl %eax,%esi
+ rorl $7,%ebp
+ movdqa %xmm10,%xmm8
+ paddd %xmm0,%xmm10
+ addl %edi,%ecx
+ movl %edx,%edi
+ pxor %xmm9,%xmm1
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movdqa %xmm1,%xmm9
+ movdqa %xmm10,0(%rsp)
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 20(%rsp),%ebx
+ andl %eax,%esi
+ pslld $2,%xmm1
+ andl %ebp,%edi
+ rorl $7,%edx
+ psrld $30,%xmm9
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ por %xmm9,%xmm1
+ movl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm1,%xmm10
+ addl 24(%rsp),%eax
+ andl %ebp,%edi
+ andl %edx,%esi
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movl %ecx,%esi
+ xorl %edx,%ecx
+ addl 28(%rsp),%ebp
+ andl %edx,%esi
+ andl %ecx,%edi
+ rorl $7,%ebx
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%edi
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,208,8
+ xorl %ecx,%ebx
+ addl 32(%rsp),%edx
+ andl %ecx,%edi
+ pxor %xmm3,%xmm2
+ andl %ebx,%esi
+ rorl $7,%eax
+ movdqa %xmm8,%xmm9
+ paddd %xmm1,%xmm8
+ addl %edi,%edx
+ movl %ebp,%edi
+ pxor %xmm10,%xmm2
+ roll $5,%ebp
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movdqa %xmm2,%xmm10
+ movdqa %xmm8,16(%rsp)
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 36(%rsp),%ecx
+ andl %ebx,%esi
+ pslld $2,%xmm2
+ andl %eax,%edi
+ rorl $7,%ebp
+ psrld $30,%xmm10
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ por %xmm10,%xmm2
+ movl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm2,%xmm8
+ addl 40(%rsp),%ebx
+ andl %eax,%edi
+ andl %ebp,%esi
+ rorl $7,%edx
+ addl %edi,%ebx
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 44(%rsp),%eax
+ andl %ebp,%esi
+ andl %edx,%edi
+ rorl $7,%ecx
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ addl 48(%rsp),%ebp
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,193,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm4,%xmm3
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm10
+ paddd %xmm2,%xmm9
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm8,%xmm3
+ addl 52(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm3,%xmm8
+ movdqa %xmm9,32(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm3
+ addl 56(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm8
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm8,%xmm3
+ addl 60(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 0(%rsp),%eax
+ paddd %xmm3,%xmm10
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ movdqa %xmm10,48(%rsp)
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 4(%rsp),%ebp
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 8(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 12(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ cmpq %r10,%r9
+ je L$done_ssse3
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r9),%xmm0
+ movdqu 16(%r9),%xmm1
+ movdqu 32(%r9),%xmm2
+ movdqu 48(%r9),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r9
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+.byte 102,15,56,0,206
+ movl %ecx,%edi
+ roll $5,%ecx
+ paddd %xmm9,%xmm0
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ movdqa %xmm0,0(%rsp)
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ psubd %xmm9,%xmm0
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+.byte 102,15,56,0,214
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm9,%xmm1
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movdqa %xmm1,16(%rsp)
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ psubd %xmm9,%xmm1
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+.byte 102,15,56,0,222
+ movl %ebp,%edi
+ roll $5,%ebp
+ paddd %xmm9,%xmm2
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ movdqa %xmm2,32(%rsp)
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ psubd %xmm9,%xmm2
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 0(%r8),%eax
+ addl 4(%r8),%esi
+ addl 8(%r8),%ecx
+ addl 12(%r8),%edx
+ movl %eax,0(%r8)
+ addl 16(%r8),%ebp
+ movl %esi,4(%r8)
+ movl %esi,%ebx
+ movl %ecx,8(%r8)
+ movl %edx,12(%r8)
+ movl %ebp,16(%r8)
+ jmp L$oop_ssse3
+
+.p2align 4
+L$done_ssse3:
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 0(%r8),%eax
+ addl 4(%r8),%esi
+ addl 8(%r8),%ecx
+ movl %eax,0(%r8)
+ addl 12(%r8),%edx
+ movl %esi,4(%r8)
+ addl 16(%r8),%ebp
+ movl %ecx,8(%r8)
+ movl %edx,12(%r8)
+ movl %ebp,16(%r8)
+ leaq 64(%rsp),%rsi
+ movq 0(%rsi),%r12
+ movq 8(%rsi),%rbp
+ movq 16(%rsi),%rbx
+ leaq 24(%rsi),%rsp
+L$epilogue_ssse3:
+ retq
+
+.p2align 6
+K_XX_XX:
+.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align 6
diff --git a/ext/libressl/crypto/sha/sha1-masm-x86_64.S b/ext/libressl/crypto/sha/sha1-masm-x86_64.S
new file mode 100644
index 0000000..36d8732
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha1-masm-x86_64.S
@@ -0,0 +1,2746 @@
+; 1 "crypto/sha/sha1-masm-x86_64.S.tmp"
+; 1 "<built-in>" 1
+; 1 "<built-in>" 3
+; 340 "<built-in>" 3
+; 1 "<command line>" 1
+; 1 "<built-in>" 2
+; 1 "crypto/sha/sha1-masm-x86_64.S.tmp" 2
+OPTION DOTNAME
+
+; 1 "./crypto/x86_arch.h" 1
+
+
+; 16 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+; 40 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+; 3 "crypto/sha/sha1-masm-x86_64.S.tmp" 2
+.text$ SEGMENT ALIGN(64) 'CODE'
+EXTERN OPENSSL_ia32cap_P:NEAR
+
+
+PUBLIC sha1_block_data_order
+
+ALIGN 16
+sha1_block_data_order PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_sha1_block_data_order::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ mov r9d,DWORD PTR[((OPENSSL_ia32cap_P+0))]
+ mov r8d,DWORD PTR[((OPENSSL_ia32cap_P+4))]
+ test r8d,(1 SHL 9)
+ jz $L$ialu
+ jmp _ssse3_shortcut
+
+ALIGN 16
+$L$ialu::
+ push rbx
+ push rbp
+ push r12
+ push r13
+ mov r11,rsp
+ mov r8,rdi
+ sub rsp,72
+ mov r9,rsi
+ and rsp,-64
+ mov r10,rdx
+ mov QWORD PTR[64+rsp],r11
+$L$prologue::
+
+ mov esi,DWORD PTR[r8]
+ mov edi,DWORD PTR[4+r8]
+ mov r11d,DWORD PTR[8+r8]
+ mov r12d,DWORD PTR[12+r8]
+ mov r13d,DWORD PTR[16+r8]
+ jmp $L$loop
+
+ALIGN 16
+$L$loop::
+ mov edx,DWORD PTR[r9]
+ bswap edx
+ mov DWORD PTR[rsp],edx
+ mov eax,r11d
+ mov ebp,DWORD PTR[4+r9]
+ mov ecx,esi
+ xor eax,r12d
+ bswap ebp
+ rol ecx,5
+ lea r13d,DWORD PTR[1518500249+r13*1+rdx]
+ and eax,edi
+ mov DWORD PTR[4+rsp],ebp
+ add r13d,ecx
+ xor eax,r12d
+ rol edi,30
+ add r13d,eax
+ mov eax,edi
+ mov edx,DWORD PTR[8+r9]
+ mov ecx,r13d
+ xor eax,r11d
+ bswap edx
+ rol ecx,5
+ lea r12d,DWORD PTR[1518500249+r12*1+rbp]
+ and eax,esi
+ mov DWORD PTR[8+rsp],edx
+ add r12d,ecx
+ xor eax,r11d
+ rol esi,30
+ add r12d,eax
+ mov eax,esi
+ mov ebp,DWORD PTR[12+r9]
+ mov ecx,r12d
+ xor eax,edi
+ bswap ebp
+ rol ecx,5
+ lea r11d,DWORD PTR[1518500249+r11*1+rdx]
+ and eax,r13d
+ mov DWORD PTR[12+rsp],ebp
+ add r11d,ecx
+ xor eax,edi
+ rol r13d,30
+ add r11d,eax
+ mov eax,r13d
+ mov edx,DWORD PTR[16+r9]
+ mov ecx,r11d
+ xor eax,esi
+ bswap edx
+ rol ecx,5
+ lea edi,DWORD PTR[1518500249+rdi*1+rbp]
+ and eax,r12d
+ mov DWORD PTR[16+rsp],edx
+ add edi,ecx
+ xor eax,esi
+ rol r12d,30
+ add edi,eax
+ mov eax,r12d
+ mov ebp,DWORD PTR[20+r9]
+ mov ecx,edi
+ xor eax,r13d
+ bswap ebp
+ rol ecx,5
+ lea esi,DWORD PTR[1518500249+rsi*1+rdx]
+ and eax,r11d
+ mov DWORD PTR[20+rsp],ebp
+ add esi,ecx
+ xor eax,r13d
+ rol r11d,30
+ add esi,eax
+ mov eax,r11d
+ mov edx,DWORD PTR[24+r9]
+ mov ecx,esi
+ xor eax,r12d
+ bswap edx
+ rol ecx,5
+ lea r13d,DWORD PTR[1518500249+r13*1+rbp]
+ and eax,edi
+ mov DWORD PTR[24+rsp],edx
+ add r13d,ecx
+ xor eax,r12d
+ rol edi,30
+ add r13d,eax
+ mov eax,edi
+ mov ebp,DWORD PTR[28+r9]
+ mov ecx,r13d
+ xor eax,r11d
+ bswap ebp
+ rol ecx,5
+ lea r12d,DWORD PTR[1518500249+r12*1+rdx]
+ and eax,esi
+ mov DWORD PTR[28+rsp],ebp
+ add r12d,ecx
+ xor eax,r11d
+ rol esi,30
+ add r12d,eax
+ mov eax,esi
+ mov edx,DWORD PTR[32+r9]
+ mov ecx,r12d
+ xor eax,edi
+ bswap edx
+ rol ecx,5
+ lea r11d,DWORD PTR[1518500249+r11*1+rbp]
+ and eax,r13d
+ mov DWORD PTR[32+rsp],edx
+ add r11d,ecx
+ xor eax,edi
+ rol r13d,30
+ add r11d,eax
+ mov eax,r13d
+ mov ebp,DWORD PTR[36+r9]
+ mov ecx,r11d
+ xor eax,esi
+ bswap ebp
+ rol ecx,5
+ lea edi,DWORD PTR[1518500249+rdi*1+rdx]
+ and eax,r12d
+ mov DWORD PTR[36+rsp],ebp
+ add edi,ecx
+ xor eax,esi
+ rol r12d,30
+ add edi,eax
+ mov eax,r12d
+ mov edx,DWORD PTR[40+r9]
+ mov ecx,edi
+ xor eax,r13d
+ bswap edx
+ rol ecx,5
+ lea esi,DWORD PTR[1518500249+rsi*1+rbp]
+ and eax,r11d
+ mov DWORD PTR[40+rsp],edx
+ add esi,ecx
+ xor eax,r13d
+ rol r11d,30
+ add esi,eax
+ mov eax,r11d
+ mov ebp,DWORD PTR[44+r9]
+ mov ecx,esi
+ xor eax,r12d
+ bswap ebp
+ rol ecx,5
+ lea r13d,DWORD PTR[1518500249+r13*1+rdx]
+ and eax,edi
+ mov DWORD PTR[44+rsp],ebp
+ add r13d,ecx
+ xor eax,r12d
+ rol edi,30
+ add r13d,eax
+ mov eax,edi
+ mov edx,DWORD PTR[48+r9]
+ mov ecx,r13d
+ xor eax,r11d
+ bswap edx
+ rol ecx,5
+ lea r12d,DWORD PTR[1518500249+r12*1+rbp]
+ and eax,esi
+ mov DWORD PTR[48+rsp],edx
+ add r12d,ecx
+ xor eax,r11d
+ rol esi,30
+ add r12d,eax
+ mov eax,esi
+ mov ebp,DWORD PTR[52+r9]
+ mov ecx,r12d
+ xor eax,edi
+ bswap ebp
+ rol ecx,5
+ lea r11d,DWORD PTR[1518500249+r11*1+rdx]
+ and eax,r13d
+ mov DWORD PTR[52+rsp],ebp
+ add r11d,ecx
+ xor eax,edi
+ rol r13d,30
+ add r11d,eax
+ mov eax,r13d
+ mov edx,DWORD PTR[56+r9]
+ mov ecx,r11d
+ xor eax,esi
+ bswap edx
+ rol ecx,5
+ lea edi,DWORD PTR[1518500249+rdi*1+rbp]
+ and eax,r12d
+ mov DWORD PTR[56+rsp],edx
+ add edi,ecx
+ xor eax,esi
+ rol r12d,30
+ add edi,eax
+ mov eax,r12d
+ mov ebp,DWORD PTR[60+r9]
+ mov ecx,edi
+ xor eax,r13d
+ bswap ebp
+ rol ecx,5
+ lea esi,DWORD PTR[1518500249+rsi*1+rdx]
+ and eax,r11d
+ mov DWORD PTR[60+rsp],ebp
+ add esi,ecx
+ xor eax,r13d
+ rol r11d,30
+ add esi,eax
+ mov edx,DWORD PTR[rsp]
+ mov eax,r11d
+ mov ecx,esi
+ xor edx,DWORD PTR[8+rsp]
+ xor eax,r12d
+ rol ecx,5
+ xor edx,DWORD PTR[32+rsp]
+ and eax,edi
+ lea r13d,DWORD PTR[1518500249+r13*1+rbp]
+ xor edx,DWORD PTR[52+rsp]
+ xor eax,r12d
+ rol edx,1
+ add r13d,ecx
+ rol edi,30
+ mov DWORD PTR[rsp],edx
+ add r13d,eax
+ mov ebp,DWORD PTR[4+rsp]
+ mov eax,edi
+ mov ecx,r13d
+ xor ebp,DWORD PTR[12+rsp]
+ xor eax,r11d
+ rol ecx,5
+ xor ebp,DWORD PTR[36+rsp]
+ and eax,esi
+ lea r12d,DWORD PTR[1518500249+r12*1+rdx]
+ xor ebp,DWORD PTR[56+rsp]
+ xor eax,r11d
+ rol ebp,1
+ add r12d,ecx
+ rol esi,30
+ mov DWORD PTR[4+rsp],ebp
+ add r12d,eax
+ mov edx,DWORD PTR[8+rsp]
+ mov eax,esi
+ mov ecx,r12d
+ xor edx,DWORD PTR[16+rsp]
+ xor eax,edi
+ rol ecx,5
+ xor edx,DWORD PTR[40+rsp]
+ and eax,r13d
+ lea r11d,DWORD PTR[1518500249+r11*1+rbp]
+ xor edx,DWORD PTR[60+rsp]
+ xor eax,edi
+ rol edx,1
+ add r11d,ecx
+ rol r13d,30
+ mov DWORD PTR[8+rsp],edx
+ add r11d,eax
+ mov ebp,DWORD PTR[12+rsp]
+ mov eax,r13d
+ mov ecx,r11d
+ xor ebp,DWORD PTR[20+rsp]
+ xor eax,esi
+ rol ecx,5
+ xor ebp,DWORD PTR[44+rsp]
+ and eax,r12d
+ lea edi,DWORD PTR[1518500249+rdi*1+rdx]
+ xor ebp,DWORD PTR[rsp]
+ xor eax,esi
+ rol ebp,1
+ add edi,ecx
+ rol r12d,30
+ mov DWORD PTR[12+rsp],ebp
+ add edi,eax
+ mov edx,DWORD PTR[16+rsp]
+ mov eax,r12d
+ mov ecx,edi
+ xor edx,DWORD PTR[24+rsp]
+ xor eax,r13d
+ rol ecx,5
+ xor edx,DWORD PTR[48+rsp]
+ and eax,r11d
+ lea esi,DWORD PTR[1518500249+rsi*1+rbp]
+ xor edx,DWORD PTR[4+rsp]
+ xor eax,r13d
+ rol edx,1
+ add esi,ecx
+ rol r11d,30
+ mov DWORD PTR[16+rsp],edx
+ add esi,eax
+ mov ebp,DWORD PTR[20+rsp]
+ mov eax,r11d
+ mov ecx,esi
+ xor ebp,DWORD PTR[28+rsp]
+ xor eax,edi
+ rol ecx,5
+ lea r13d,DWORD PTR[1859775393+r13*1+rdx]
+ xor ebp,DWORD PTR[52+rsp]
+ xor eax,r12d
+ add r13d,ecx
+ xor ebp,DWORD PTR[8+rsp]
+ rol edi,30
+ add r13d,eax
+ rol ebp,1
+ mov DWORD PTR[20+rsp],ebp
+ mov edx,DWORD PTR[24+rsp]
+ mov eax,edi
+ mov ecx,r13d
+ xor edx,DWORD PTR[32+rsp]
+ xor eax,esi
+ rol ecx,5
+ lea r12d,DWORD PTR[1859775393+r12*1+rbp]
+ xor edx,DWORD PTR[56+rsp]
+ xor eax,r11d
+ add r12d,ecx
+ xor edx,DWORD PTR[12+rsp]
+ rol esi,30
+ add r12d,eax
+ rol edx,1
+ mov DWORD PTR[24+rsp],edx
+ mov ebp,DWORD PTR[28+rsp]
+ mov eax,esi
+ mov ecx,r12d
+ xor ebp,DWORD PTR[36+rsp]
+ xor eax,r13d
+ rol ecx,5
+ lea r11d,DWORD PTR[1859775393+r11*1+rdx]
+ xor ebp,DWORD PTR[60+rsp]
+ xor eax,edi
+ add r11d,ecx
+ xor ebp,DWORD PTR[16+rsp]
+ rol r13d,30
+ add r11d,eax
+ rol ebp,1
+ mov DWORD PTR[28+rsp],ebp
+ mov edx,DWORD PTR[32+rsp]
+ mov eax,r13d
+ mov ecx,r11d
+ xor edx,DWORD PTR[40+rsp]
+ xor eax,r12d
+ rol ecx,5
+ lea edi,DWORD PTR[1859775393+rdi*1+rbp]
+ xor edx,DWORD PTR[rsp]
+ xor eax,esi
+ add edi,ecx
+ xor edx,DWORD PTR[20+rsp]
+ rol r12d,30
+ add edi,eax
+ rol edx,1
+ mov DWORD PTR[32+rsp],edx
+ mov ebp,DWORD PTR[36+rsp]
+ mov eax,r12d
+ mov ecx,edi
+ xor ebp,DWORD PTR[44+rsp]
+ xor eax,r11d
+ rol ecx,5
+ lea esi,DWORD PTR[1859775393+rsi*1+rdx]
+ xor ebp,DWORD PTR[4+rsp]
+ xor eax,r13d
+ add esi,ecx
+ xor ebp,DWORD PTR[24+rsp]
+ rol r11d,30
+ add esi,eax
+ rol ebp,1
+ mov DWORD PTR[36+rsp],ebp
+ mov edx,DWORD PTR[40+rsp]
+ mov eax,r11d
+ mov ecx,esi
+ xor edx,DWORD PTR[48+rsp]
+ xor eax,edi
+ rol ecx,5
+ lea r13d,DWORD PTR[1859775393+r13*1+rbp]
+ xor edx,DWORD PTR[8+rsp]
+ xor eax,r12d
+ add r13d,ecx
+ xor edx,DWORD PTR[28+rsp]
+ rol edi,30
+ add r13d,eax
+ rol edx,1
+ mov DWORD PTR[40+rsp],edx
+ mov ebp,DWORD PTR[44+rsp]
+ mov eax,edi
+ mov ecx,r13d
+ xor ebp,DWORD PTR[52+rsp]
+ xor eax,esi
+ rol ecx,5
+ lea r12d,DWORD PTR[1859775393+r12*1+rdx]
+ xor ebp,DWORD PTR[12+rsp]
+ xor eax,r11d
+ add r12d,ecx
+ xor ebp,DWORD PTR[32+rsp]
+ rol esi,30
+ add r12d,eax
+ rol ebp,1
+ mov DWORD PTR[44+rsp],ebp
+ mov edx,DWORD PTR[48+rsp]
+ mov eax,esi
+ mov ecx,r12d
+ xor edx,DWORD PTR[56+rsp]
+ xor eax,r13d
+ rol ecx,5
+ lea r11d,DWORD PTR[1859775393+r11*1+rbp]
+ xor edx,DWORD PTR[16+rsp]
+ xor eax,edi
+ add r11d,ecx
+ xor edx,DWORD PTR[36+rsp]
+ rol r13d,30
+ add r11d,eax
+ rol edx,1
+ mov DWORD PTR[48+rsp],edx
+ mov ebp,DWORD PTR[52+rsp]
+ mov eax,r13d
+ mov ecx,r11d
+ xor ebp,DWORD PTR[60+rsp]
+ xor eax,r12d
+ rol ecx,5
+ lea edi,DWORD PTR[1859775393+rdi*1+rdx]
+ xor ebp,DWORD PTR[20+rsp]
+ xor eax,esi
+ add edi,ecx
+ xor ebp,DWORD PTR[40+rsp]
+ rol r12d,30
+ add edi,eax
+ rol ebp,1
+ mov DWORD PTR[52+rsp],ebp
+ mov edx,DWORD PTR[56+rsp]
+ mov eax,r12d
+ mov ecx,edi
+ xor edx,DWORD PTR[rsp]
+ xor eax,r11d
+ rol ecx,5
+ lea esi,DWORD PTR[1859775393+rsi*1+rbp]
+ xor edx,DWORD PTR[24+rsp]
+ xor eax,r13d
+ add esi,ecx
+ xor edx,DWORD PTR[44+rsp]
+ rol r11d,30
+ add esi,eax
+ rol edx,1
+ mov DWORD PTR[56+rsp],edx
+ mov ebp,DWORD PTR[60+rsp]
+ mov eax,r11d
+ mov ecx,esi
+ xor ebp,DWORD PTR[4+rsp]
+ xor eax,edi
+ rol ecx,5
+ lea r13d,DWORD PTR[1859775393+r13*1+rdx]
+ xor ebp,DWORD PTR[28+rsp]
+ xor eax,r12d
+ add r13d,ecx
+ xor ebp,DWORD PTR[48+rsp]
+ rol edi,30
+ add r13d,eax
+ rol ebp,1
+ mov DWORD PTR[60+rsp],ebp
+ mov edx,DWORD PTR[rsp]
+ mov eax,edi
+ mov ecx,r13d
+ xor edx,DWORD PTR[8+rsp]
+ xor eax,esi
+ rol ecx,5
+ lea r12d,DWORD PTR[1859775393+r12*1+rbp]
+ xor edx,DWORD PTR[32+rsp]
+ xor eax,r11d
+ add r12d,ecx
+ xor edx,DWORD PTR[52+rsp]
+ rol esi,30
+ add r12d,eax
+ rol edx,1
+ mov DWORD PTR[rsp],edx
+ mov ebp,DWORD PTR[4+rsp]
+ mov eax,esi
+ mov ecx,r12d
+ xor ebp,DWORD PTR[12+rsp]
+ xor eax,r13d
+ rol ecx,5
+ lea r11d,DWORD PTR[1859775393+r11*1+rdx]
+ xor ebp,DWORD PTR[36+rsp]
+ xor eax,edi
+ add r11d,ecx
+ xor ebp,DWORD PTR[56+rsp]
+ rol r13d,30
+ add r11d,eax
+ rol ebp,1
+ mov DWORD PTR[4+rsp],ebp
+ mov edx,DWORD PTR[8+rsp]
+ mov eax,r13d
+ mov ecx,r11d
+ xor edx,DWORD PTR[16+rsp]
+ xor eax,r12d
+ rol ecx,5
+ lea edi,DWORD PTR[1859775393+rdi*1+rbp]
+ xor edx,DWORD PTR[40+rsp]
+ xor eax,esi
+ add edi,ecx
+ xor edx,DWORD PTR[60+rsp]
+ rol r12d,30
+ add edi,eax
+ rol edx,1
+ mov DWORD PTR[8+rsp],edx
+ mov ebp,DWORD PTR[12+rsp]
+ mov eax,r12d
+ mov ecx,edi
+ xor ebp,DWORD PTR[20+rsp]
+ xor eax,r11d
+ rol ecx,5
+ lea esi,DWORD PTR[1859775393+rsi*1+rdx]
+ xor ebp,DWORD PTR[44+rsp]
+ xor eax,r13d
+ add esi,ecx
+ xor ebp,DWORD PTR[rsp]
+ rol r11d,30
+ add esi,eax
+ rol ebp,1
+ mov DWORD PTR[12+rsp],ebp
+ mov edx,DWORD PTR[16+rsp]
+ mov eax,r11d
+ mov ecx,esi
+ xor edx,DWORD PTR[24+rsp]
+ xor eax,edi
+ rol ecx,5
+ lea r13d,DWORD PTR[1859775393+r13*1+rbp]
+ xor edx,DWORD PTR[48+rsp]
+ xor eax,r12d
+ add r13d,ecx
+ xor edx,DWORD PTR[4+rsp]
+ rol edi,30
+ add r13d,eax
+ rol edx,1
+ mov DWORD PTR[16+rsp],edx
+ mov ebp,DWORD PTR[20+rsp]
+ mov eax,edi
+ mov ecx,r13d
+ xor ebp,DWORD PTR[28+rsp]
+ xor eax,esi
+ rol ecx,5
+ lea r12d,DWORD PTR[1859775393+r12*1+rdx]
+ xor ebp,DWORD PTR[52+rsp]
+ xor eax,r11d
+ add r12d,ecx
+ xor ebp,DWORD PTR[8+rsp]
+ rol esi,30
+ add r12d,eax
+ rol ebp,1
+ mov DWORD PTR[20+rsp],ebp
+ mov edx,DWORD PTR[24+rsp]
+ mov eax,esi
+ mov ecx,r12d
+ xor edx,DWORD PTR[32+rsp]
+ xor eax,r13d
+ rol ecx,5
+ lea r11d,DWORD PTR[1859775393+r11*1+rbp]
+ xor edx,DWORD PTR[56+rsp]
+ xor eax,edi
+ add r11d,ecx
+ xor edx,DWORD PTR[12+rsp]
+ rol r13d,30
+ add r11d,eax
+ rol edx,1
+ mov DWORD PTR[24+rsp],edx
+ mov ebp,DWORD PTR[28+rsp]
+ mov eax,r13d
+ mov ecx,r11d
+ xor ebp,DWORD PTR[36+rsp]
+ xor eax,r12d
+ rol ecx,5
+ lea edi,DWORD PTR[1859775393+rdi*1+rdx]
+ xor ebp,DWORD PTR[60+rsp]
+ xor eax,esi
+ add edi,ecx
+ xor ebp,DWORD PTR[16+rsp]
+ rol r12d,30
+ add edi,eax
+ rol ebp,1
+ mov DWORD PTR[28+rsp],ebp
+ mov edx,DWORD PTR[32+rsp]
+ mov eax,r12d
+ mov ecx,edi
+ xor edx,DWORD PTR[40+rsp]
+ xor eax,r11d
+ rol ecx,5
+ lea esi,DWORD PTR[1859775393+rsi*1+rbp]
+ xor edx,DWORD PTR[rsp]
+ xor eax,r13d
+ add esi,ecx
+ xor edx,DWORD PTR[20+rsp]
+ rol r11d,30
+ add esi,eax
+ rol edx,1
+ mov DWORD PTR[32+rsp],edx
+ mov ebp,DWORD PTR[36+rsp]
+ mov eax,r11d
+ mov ebx,r11d
+ xor ebp,DWORD PTR[44+rsp]
+ and eax,r12d
+ mov ecx,esi
+ xor ebp,DWORD PTR[4+rsp]
+ xor ebx,r12d
+ lea r13d,DWORD PTR[((-1894007588))+r13*1+rdx]
+ rol ecx,5
+ xor ebp,DWORD PTR[24+rsp]
+ add r13d,eax
+ and ebx,edi
+ rol ebp,1
+ add r13d,ebx
+ rol edi,30
+ mov DWORD PTR[36+rsp],ebp
+ add r13d,ecx
+ mov edx,DWORD PTR[40+rsp]
+ mov eax,edi
+ mov ebx,edi
+ xor edx,DWORD PTR[48+rsp]
+ and eax,r11d
+ mov ecx,r13d
+ xor edx,DWORD PTR[8+rsp]
+ xor ebx,r11d
+ lea r12d,DWORD PTR[((-1894007588))+r12*1+rbp]
+ rol ecx,5
+ xor edx,DWORD PTR[28+rsp]
+ add r12d,eax
+ and ebx,esi
+ rol edx,1
+ add r12d,ebx
+ rol esi,30
+ mov DWORD PTR[40+rsp],edx
+ add r12d,ecx
+ mov ebp,DWORD PTR[44+rsp]
+ mov eax,esi
+ mov ebx,esi
+ xor ebp,DWORD PTR[52+rsp]
+ and eax,edi
+ mov ecx,r12d
+ xor ebp,DWORD PTR[12+rsp]
+ xor ebx,edi
+ lea r11d,DWORD PTR[((-1894007588))+r11*1+rdx]
+ rol ecx,5
+ xor ebp,DWORD PTR[32+rsp]
+ add r11d,eax
+ and ebx,r13d
+ rol ebp,1
+ add r11d,ebx
+ rol r13d,30
+ mov DWORD PTR[44+rsp],ebp
+ add r11d,ecx
+ mov edx,DWORD PTR[48+rsp]
+ mov eax,r13d
+ mov ebx,r13d
+ xor edx,DWORD PTR[56+rsp]
+ and eax,esi
+ mov ecx,r11d
+ xor edx,DWORD PTR[16+rsp]
+ xor ebx,esi
+ lea edi,DWORD PTR[((-1894007588))+rdi*1+rbp]
+ rol ecx,5
+ xor edx,DWORD PTR[36+rsp]
+ add edi,eax
+ and ebx,r12d
+ rol edx,1
+ add edi,ebx
+ rol r12d,30
+ mov DWORD PTR[48+rsp],edx
+ add edi,ecx
+ mov ebp,DWORD PTR[52+rsp]
+ mov eax,r12d
+ mov ebx,r12d
+ xor ebp,DWORD PTR[60+rsp]
+ and eax,r13d
+ mov ecx,edi
+ xor ebp,DWORD PTR[20+rsp]
+ xor ebx,r13d
+ lea esi,DWORD PTR[((-1894007588))+rsi*1+rdx]
+ rol ecx,5
+ xor ebp,DWORD PTR[40+rsp]
+ add esi,eax
+ and ebx,r11d
+ rol ebp,1
+ add esi,ebx
+ rol r11d,30
+ mov DWORD PTR[52+rsp],ebp
+ add esi,ecx
+ mov edx,DWORD PTR[56+rsp]
+ mov eax,r11d
+ mov ebx,r11d
+ xor edx,DWORD PTR[rsp]
+ and eax,r12d
+ mov ecx,esi
+ xor edx,DWORD PTR[24+rsp]
+ xor ebx,r12d
+ lea r13d,DWORD PTR[((-1894007588))+r13*1+rbp]
+ rol ecx,5
+ xor edx,DWORD PTR[44+rsp]
+ add r13d,eax
+ and ebx,edi
+ rol edx,1
+ add r13d,ebx
+ rol edi,30
+ mov DWORD PTR[56+rsp],edx
+ add r13d,ecx
+ mov ebp,DWORD PTR[60+rsp]
+ mov eax,edi
+ mov ebx,edi
+ xor ebp,DWORD PTR[4+rsp]
+ and eax,r11d
+ mov ecx,r13d
+ xor ebp,DWORD PTR[28+rsp]
+ xor ebx,r11d
+ lea r12d,DWORD PTR[((-1894007588))+r12*1+rdx]
+ rol ecx,5
+ xor ebp,DWORD PTR[48+rsp]
+ add r12d,eax
+ and ebx,esi
+ rol ebp,1
+ add r12d,ebx
+ rol esi,30
+ mov DWORD PTR[60+rsp],ebp
+ add r12d,ecx
+ mov edx,DWORD PTR[rsp]
+ mov eax,esi
+ mov ebx,esi
+ xor edx,DWORD PTR[8+rsp]
+ and eax,edi
+ mov ecx,r12d
+ xor edx,DWORD PTR[32+rsp]
+ xor ebx,edi
+ lea r11d,DWORD PTR[((-1894007588))+r11*1+rbp]
+ rol ecx,5
+ xor edx,DWORD PTR[52+rsp]
+ add r11d,eax
+ and ebx,r13d
+ rol edx,1
+ add r11d,ebx
+ rol r13d,30
+ mov DWORD PTR[rsp],edx
+ add r11d,ecx
+ mov ebp,DWORD PTR[4+rsp]
+ mov eax,r13d
+ mov ebx,r13d
+ xor ebp,DWORD PTR[12+rsp]
+ and eax,esi
+ mov ecx,r11d
+ xor ebp,DWORD PTR[36+rsp]
+ xor ebx,esi
+ lea edi,DWORD PTR[((-1894007588))+rdi*1+rdx]
+ rol ecx,5
+ xor ebp,DWORD PTR[56+rsp]
+ add edi,eax
+ and ebx,r12d
+ rol ebp,1
+ add edi,ebx
+ rol r12d,30
+ mov DWORD PTR[4+rsp],ebp
+ add edi,ecx
+ mov edx,DWORD PTR[8+rsp]
+ mov eax,r12d
+ mov ebx,r12d
+ xor edx,DWORD PTR[16+rsp]
+ and eax,r13d
+ mov ecx,edi
+ xor edx,DWORD PTR[40+rsp]
+ xor ebx,r13d
+ lea esi,DWORD PTR[((-1894007588))+rsi*1+rbp]
+ rol ecx,5
+ xor edx,DWORD PTR[60+rsp]
+ add esi,eax
+ and ebx,r11d
+ rol edx,1
+ add esi,ebx
+ rol r11d,30
+ mov DWORD PTR[8+rsp],edx
+ add esi,ecx
+ mov ebp,DWORD PTR[12+rsp]
+ mov eax,r11d
+ mov ebx,r11d
+ xor ebp,DWORD PTR[20+rsp]
+ and eax,r12d
+ mov ecx,esi
+ xor ebp,DWORD PTR[44+rsp]
+ xor ebx,r12d
+ lea r13d,DWORD PTR[((-1894007588))+r13*1+rdx]
+ rol ecx,5
+ xor ebp,DWORD PTR[rsp]
+ add r13d,eax
+ and ebx,edi
+ rol ebp,1
+ add r13d,ebx
+ rol edi,30
+ mov DWORD PTR[12+rsp],ebp
+ add r13d,ecx
+ mov edx,DWORD PTR[16+rsp]
+ mov eax,edi
+ mov ebx,edi
+ xor edx,DWORD PTR[24+rsp]
+ and eax,r11d
+ mov ecx,r13d
+ xor edx,DWORD PTR[48+rsp]
+ xor ebx,r11d
+ lea r12d,DWORD PTR[((-1894007588))+r12*1+rbp]
+ rol ecx,5
+ xor edx,DWORD PTR[4+rsp]
+ add r12d,eax
+ and ebx,esi
+ rol edx,1
+ add r12d,ebx
+ rol esi,30
+ mov DWORD PTR[16+rsp],edx
+ add r12d,ecx
+ mov ebp,DWORD PTR[20+rsp]
+ mov eax,esi
+ mov ebx,esi
+ xor ebp,DWORD PTR[28+rsp]
+ and eax,edi
+ mov ecx,r12d
+ xor ebp,DWORD PTR[52+rsp]
+ xor ebx,edi
+ lea r11d,DWORD PTR[((-1894007588))+r11*1+rdx]
+ rol ecx,5
+ xor ebp,DWORD PTR[8+rsp]
+ add r11d,eax
+ and ebx,r13d
+ rol ebp,1
+ add r11d,ebx
+ rol r13d,30
+ mov DWORD PTR[20+rsp],ebp
+ add r11d,ecx
+ mov edx,DWORD PTR[24+rsp]
+ mov eax,r13d
+ mov ebx,r13d
+ xor edx,DWORD PTR[32+rsp]
+ and eax,esi
+ mov ecx,r11d
+ xor edx,DWORD PTR[56+rsp]
+ xor ebx,esi
+ lea edi,DWORD PTR[((-1894007588))+rdi*1+rbp]
+ rol ecx,5
+ xor edx,DWORD PTR[12+rsp]
+ add edi,eax
+ and ebx,r12d
+ rol edx,1
+ add edi,ebx
+ rol r12d,30
+ mov DWORD PTR[24+rsp],edx
+ add edi,ecx
+ mov ebp,DWORD PTR[28+rsp]
+ mov eax,r12d
+ mov ebx,r12d
+ xor ebp,DWORD PTR[36+rsp]
+ and eax,r13d
+ mov ecx,edi
+ xor ebp,DWORD PTR[60+rsp]
+ xor ebx,r13d
+ lea esi,DWORD PTR[((-1894007588))+rsi*1+rdx]
+ rol ecx,5
+ xor ebp,DWORD PTR[16+rsp]
+ add esi,eax
+ and ebx,r11d
+ rol ebp,1
+ add esi,ebx
+ rol r11d,30
+ mov DWORD PTR[28+rsp],ebp
+ add esi,ecx
+ mov edx,DWORD PTR[32+rsp]
+ mov eax,r11d
+ mov ebx,r11d
+ xor edx,DWORD PTR[40+rsp]
+ and eax,r12d
+ mov ecx,esi
+ xor edx,DWORD PTR[rsp]
+ xor ebx,r12d
+ lea r13d,DWORD PTR[((-1894007588))+r13*1+rbp]
+ rol ecx,5
+ xor edx,DWORD PTR[20+rsp]
+ add r13d,eax
+ and ebx,edi
+ rol edx,1
+ add r13d,ebx
+ rol edi,30
+ mov DWORD PTR[32+rsp],edx
+ add r13d,ecx
+ mov ebp,DWORD PTR[36+rsp]
+ mov eax,edi
+ mov ebx,edi
+ xor ebp,DWORD PTR[44+rsp]
+ and eax,r11d
+ mov ecx,r13d
+ xor ebp,DWORD PTR[4+rsp]
+ xor ebx,r11d
+ lea r12d,DWORD PTR[((-1894007588))+r12*1+rdx]
+ rol ecx,5
+ xor ebp,DWORD PTR[24+rsp]
+ add r12d,eax
+ and ebx,esi
+ rol ebp,1
+ add r12d,ebx
+ rol esi,30
+ mov DWORD PTR[36+rsp],ebp
+ add r12d,ecx
+ mov edx,DWORD PTR[40+rsp]
+ mov eax,esi
+ mov ebx,esi
+ xor edx,DWORD PTR[48+rsp]
+ and eax,edi
+ mov ecx,r12d
+ xor edx,DWORD PTR[8+rsp]
+ xor ebx,edi
+ lea r11d,DWORD PTR[((-1894007588))+r11*1+rbp]
+ rol ecx,5
+ xor edx,DWORD PTR[28+rsp]
+ add r11d,eax
+ and ebx,r13d
+ rol edx,1
+ add r11d,ebx
+ rol r13d,30
+ mov DWORD PTR[40+rsp],edx
+ add r11d,ecx
+ mov ebp,DWORD PTR[44+rsp]
+ mov eax,r13d
+ mov ebx,r13d
+ xor ebp,DWORD PTR[52+rsp]
+ and eax,esi
+ mov ecx,r11d
+ xor ebp,DWORD PTR[12+rsp]
+ xor ebx,esi
+ lea edi,DWORD PTR[((-1894007588))+rdi*1+rdx]
+ rol ecx,5
+ xor ebp,DWORD PTR[32+rsp]
+ add edi,eax
+ and ebx,r12d
+ rol ebp,1
+ add edi,ebx
+ rol r12d,30
+ mov DWORD PTR[44+rsp],ebp
+ add edi,ecx
+ mov edx,DWORD PTR[48+rsp]
+ mov eax,r12d
+ mov ebx,r12d
+ xor edx,DWORD PTR[56+rsp]
+ and eax,r13d
+ mov ecx,edi
+ xor edx,DWORD PTR[16+rsp]
+ xor ebx,r13d
+ lea esi,DWORD PTR[((-1894007588))+rsi*1+rbp]
+ rol ecx,5
+ xor edx,DWORD PTR[36+rsp]
+ add esi,eax
+ and ebx,r11d
+ rol edx,1
+ add esi,ebx
+ rol r11d,30
+ mov DWORD PTR[48+rsp],edx
+ add esi,ecx
+ mov ebp,DWORD PTR[52+rsp]
+ mov eax,r11d
+ mov ecx,esi
+ xor ebp,DWORD PTR[60+rsp]
+ xor eax,edi
+ rol ecx,5
+ lea r13d,DWORD PTR[((-899497514))+r13*1+rdx]
+ xor ebp,DWORD PTR[20+rsp]
+ xor eax,r12d
+ add r13d,ecx
+ xor ebp,DWORD PTR[40+rsp]
+ rol edi,30
+ add r13d,eax
+ rol ebp,1
+ mov DWORD PTR[52+rsp],ebp
+ mov edx,DWORD PTR[56+rsp]
+ mov eax,edi
+ mov ecx,r13d
+ xor edx,DWORD PTR[rsp]
+ xor eax,esi
+ rol ecx,5
+ lea r12d,DWORD PTR[((-899497514))+r12*1+rbp]
+ xor edx,DWORD PTR[24+rsp]
+ xor eax,r11d
+ add r12d,ecx
+ xor edx,DWORD PTR[44+rsp]
+ rol esi,30
+ add r12d,eax
+ rol edx,1
+ mov DWORD PTR[56+rsp],edx
+ mov ebp,DWORD PTR[60+rsp]
+ mov eax,esi
+ mov ecx,r12d
+ xor ebp,DWORD PTR[4+rsp]
+ xor eax,r13d
+ rol ecx,5
+ lea r11d,DWORD PTR[((-899497514))+r11*1+rdx]
+ xor ebp,DWORD PTR[28+rsp]
+ xor eax,edi
+ add r11d,ecx
+ xor ebp,DWORD PTR[48+rsp]
+ rol r13d,30
+ add r11d,eax
+ rol ebp,1
+ mov DWORD PTR[60+rsp],ebp
+ mov edx,DWORD PTR[rsp]
+ mov eax,r13d
+ mov ecx,r11d
+ xor edx,DWORD PTR[8+rsp]
+ xor eax,r12d
+ rol ecx,5
+ lea edi,DWORD PTR[((-899497514))+rdi*1+rbp]
+ xor edx,DWORD PTR[32+rsp]
+ xor eax,esi
+ add edi,ecx
+ xor edx,DWORD PTR[52+rsp]
+ rol r12d,30
+ add edi,eax
+ rol edx,1
+ mov DWORD PTR[rsp],edx
+ mov ebp,DWORD PTR[4+rsp]
+ mov eax,r12d
+ mov ecx,edi
+ xor ebp,DWORD PTR[12+rsp]
+ xor eax,r11d
+ rol ecx,5
+ lea esi,DWORD PTR[((-899497514))+rsi*1+rdx]
+ xor ebp,DWORD PTR[36+rsp]
+ xor eax,r13d
+ add esi,ecx
+ xor ebp,DWORD PTR[56+rsp]
+ rol r11d,30
+ add esi,eax
+ rol ebp,1
+ mov DWORD PTR[4+rsp],ebp
+ mov edx,DWORD PTR[8+rsp]
+ mov eax,r11d
+ mov ecx,esi
+ xor edx,DWORD PTR[16+rsp]
+ xor eax,edi
+ rol ecx,5
+ lea r13d,DWORD PTR[((-899497514))+r13*1+rbp]
+ xor edx,DWORD PTR[40+rsp]
+ xor eax,r12d
+ add r13d,ecx
+ xor edx,DWORD PTR[60+rsp]
+ rol edi,30
+ add r13d,eax
+ rol edx,1
+ mov DWORD PTR[8+rsp],edx
+ mov ebp,DWORD PTR[12+rsp]
+ mov eax,edi
+ mov ecx,r13d
+ xor ebp,DWORD PTR[20+rsp]
+ xor eax,esi
+ rol ecx,5
+ lea r12d,DWORD PTR[((-899497514))+r12*1+rdx]
+ xor ebp,DWORD PTR[44+rsp]
+ xor eax,r11d
+ add r12d,ecx
+ xor ebp,DWORD PTR[rsp]
+ rol esi,30
+ add r12d,eax
+ rol ebp,1
+ mov DWORD PTR[12+rsp],ebp
+ mov edx,DWORD PTR[16+rsp]
+ mov eax,esi
+ mov ecx,r12d
+ xor edx,DWORD PTR[24+rsp]
+ xor eax,r13d
+ rol ecx,5
+ lea r11d,DWORD PTR[((-899497514))+r11*1+rbp]
+ xor edx,DWORD PTR[48+rsp]
+ xor eax,edi
+ add r11d,ecx
+ xor edx,DWORD PTR[4+rsp]
+ rol r13d,30
+ add r11d,eax
+ rol edx,1
+ mov DWORD PTR[16+rsp],edx
+ mov ebp,DWORD PTR[20+rsp]
+ mov eax,r13d
+ mov ecx,r11d
+ xor ebp,DWORD PTR[28+rsp]
+ xor eax,r12d
+ rol ecx,5
+ lea edi,DWORD PTR[((-899497514))+rdi*1+rdx]
+ xor ebp,DWORD PTR[52+rsp]
+ xor eax,esi
+ add edi,ecx
+ xor ebp,DWORD PTR[8+rsp]
+ rol r12d,30
+ add edi,eax
+ rol ebp,1
+ mov DWORD PTR[20+rsp],ebp
+ mov edx,DWORD PTR[24+rsp]
+ mov eax,r12d
+ mov ecx,edi
+ xor edx,DWORD PTR[32+rsp]
+ xor eax,r11d
+ rol ecx,5
+ lea esi,DWORD PTR[((-899497514))+rsi*1+rbp]
+ xor edx,DWORD PTR[56+rsp]
+ xor eax,r13d
+ add esi,ecx
+ xor edx,DWORD PTR[12+rsp]
+ rol r11d,30
+ add esi,eax
+ rol edx,1
+ mov DWORD PTR[24+rsp],edx
+ mov ebp,DWORD PTR[28+rsp]
+ mov eax,r11d
+ mov ecx,esi
+ xor ebp,DWORD PTR[36+rsp]
+ xor eax,edi
+ rol ecx,5
+ lea r13d,DWORD PTR[((-899497514))+r13*1+rdx]
+ xor ebp,DWORD PTR[60+rsp]
+ xor eax,r12d
+ add r13d,ecx
+ xor ebp,DWORD PTR[16+rsp]
+ rol edi,30
+ add r13d,eax
+ rol ebp,1
+ mov DWORD PTR[28+rsp],ebp
+ mov edx,DWORD PTR[32+rsp]
+ mov eax,edi
+ mov ecx,r13d
+ xor edx,DWORD PTR[40+rsp]
+ xor eax,esi
+ rol ecx,5
+ lea r12d,DWORD PTR[((-899497514))+r12*1+rbp]
+ xor edx,DWORD PTR[rsp]
+ xor eax,r11d
+ add r12d,ecx
+ xor edx,DWORD PTR[20+rsp]
+ rol esi,30
+ add r12d,eax
+ rol edx,1
+ mov DWORD PTR[32+rsp],edx
+ mov ebp,DWORD PTR[36+rsp]
+ mov eax,esi
+ mov ecx,r12d
+ xor ebp,DWORD PTR[44+rsp]
+ xor eax,r13d
+ rol ecx,5
+ lea r11d,DWORD PTR[((-899497514))+r11*1+rdx]
+ xor ebp,DWORD PTR[4+rsp]
+ xor eax,edi
+ add r11d,ecx
+ xor ebp,DWORD PTR[24+rsp]
+ rol r13d,30
+ add r11d,eax
+ rol ebp,1
+ mov DWORD PTR[36+rsp],ebp
+ mov edx,DWORD PTR[40+rsp]
+ mov eax,r13d
+ mov ecx,r11d
+ xor edx,DWORD PTR[48+rsp]
+ xor eax,r12d
+ rol ecx,5
+ lea edi,DWORD PTR[((-899497514))+rdi*1+rbp]
+ xor edx,DWORD PTR[8+rsp]
+ xor eax,esi
+ add edi,ecx
+ xor edx,DWORD PTR[28+rsp]
+ rol r12d,30
+ add edi,eax
+ rol edx,1
+ mov DWORD PTR[40+rsp],edx
+ mov ebp,DWORD PTR[44+rsp]
+ mov eax,r12d
+ mov ecx,edi
+ xor ebp,DWORD PTR[52+rsp]
+ xor eax,r11d
+ rol ecx,5
+ lea esi,DWORD PTR[((-899497514))+rsi*1+rdx]
+ xor ebp,DWORD PTR[12+rsp]
+ xor eax,r13d
+ add esi,ecx
+ xor ebp,DWORD PTR[32+rsp]
+ rol r11d,30
+ add esi,eax
+ rol ebp,1
+ mov DWORD PTR[44+rsp],ebp
+ mov edx,DWORD PTR[48+rsp]
+ mov eax,r11d
+ mov ecx,esi
+ xor edx,DWORD PTR[56+rsp]
+ xor eax,edi
+ rol ecx,5
+ lea r13d,DWORD PTR[((-899497514))+r13*1+rbp]
+ xor edx,DWORD PTR[16+rsp]
+ xor eax,r12d
+ add r13d,ecx
+ xor edx,DWORD PTR[36+rsp]
+ rol edi,30
+ add r13d,eax
+ rol edx,1
+ mov DWORD PTR[48+rsp],edx
+ mov ebp,DWORD PTR[52+rsp]
+ mov eax,edi
+ mov ecx,r13d
+ xor ebp,DWORD PTR[60+rsp]
+ xor eax,esi
+ rol ecx,5
+ lea r12d,DWORD PTR[((-899497514))+r12*1+rdx]
+ xor ebp,DWORD PTR[20+rsp]
+ xor eax,r11d
+ add r12d,ecx
+ xor ebp,DWORD PTR[40+rsp]
+ rol esi,30
+ add r12d,eax
+ rol ebp,1
+ mov edx,DWORD PTR[56+rsp]
+ mov eax,esi
+ mov ecx,r12d
+ xor edx,DWORD PTR[rsp]
+ xor eax,r13d
+ rol ecx,5
+ lea r11d,DWORD PTR[((-899497514))+r11*1+rbp]
+ xor edx,DWORD PTR[24+rsp]
+ xor eax,edi
+ add r11d,ecx
+ xor edx,DWORD PTR[44+rsp]
+ rol r13d,30
+ add r11d,eax
+ rol edx,1
+ mov ebp,DWORD PTR[60+rsp]
+ mov eax,r13d
+ mov ecx,r11d
+ xor ebp,DWORD PTR[4+rsp]
+ xor eax,r12d
+ rol ecx,5
+ lea edi,DWORD PTR[((-899497514))+rdi*1+rdx]
+ xor ebp,DWORD PTR[28+rsp]
+ xor eax,esi
+ add edi,ecx
+ xor ebp,DWORD PTR[48+rsp]
+ rol r12d,30
+ add edi,eax
+ rol ebp,1
+ mov eax,r12d
+ mov ecx,edi
+ xor eax,r11d
+ lea esi,DWORD PTR[((-899497514))+rsi*1+rbp]
+ rol ecx,5
+ xor eax,r13d
+ add esi,ecx
+ rol r11d,30
+ add esi,eax
+ add esi,DWORD PTR[r8]
+ add edi,DWORD PTR[4+r8]
+ add r11d,DWORD PTR[8+r8]
+ add r12d,DWORD PTR[12+r8]
+ add r13d,DWORD PTR[16+r8]
+ mov DWORD PTR[r8],esi
+ mov DWORD PTR[4+r8],edi
+ mov DWORD PTR[8+r8],r11d
+ mov DWORD PTR[12+r8],r12d
+ mov DWORD PTR[16+r8],r13d
+
+ sub r10,1
+ lea r9,QWORD PTR[64+r9]
+ jnz $L$loop
+
+ mov rsi,QWORD PTR[64+rsp]
+ mov r13,QWORD PTR[rsi]
+ mov r12,QWORD PTR[8+rsi]
+ mov rbp,QWORD PTR[16+rsi]
+ mov rbx,QWORD PTR[24+rsi]
+ lea rsp,QWORD PTR[32+rsi]
+$L$epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_sha1_block_data_order::
+sha1_block_data_order ENDP
+
+ALIGN 16
+sha1_block_data_order_ssse3 PROC PRIVATE
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_sha1_block_data_order_ssse3::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+_ssse3_shortcut::
+ push rbx
+ push rbp
+ push r12
+ lea rsp,QWORD PTR[((-144))+rsp]
+ movaps XMMWORD PTR[(64+0)+rsp],xmm6
+ movaps XMMWORD PTR[(64+16)+rsp],xmm7
+ movaps XMMWORD PTR[(64+32)+rsp],xmm8
+ movaps XMMWORD PTR[(64+48)+rsp],xmm9
+ movaps XMMWORD PTR[(64+64)+rsp],xmm10
+$L$prologue_ssse3::
+ mov r8,rdi
+ mov r9,rsi
+ mov r10,rdx
+
+ shl r10,6
+ add r10,r9
+ lea r11,QWORD PTR[K_XX_XX]
+
+ mov eax,DWORD PTR[r8]
+ mov ebx,DWORD PTR[4+r8]
+ mov ecx,DWORD PTR[8+r8]
+ mov edx,DWORD PTR[12+r8]
+ mov esi,ebx
+ mov ebp,DWORD PTR[16+r8]
+
+ movdqa xmm6,XMMWORD PTR[64+r11]
+ movdqa xmm9,XMMWORD PTR[r11]
+ movdqu xmm0,XMMWORD PTR[r9]
+ movdqu xmm1,XMMWORD PTR[16+r9]
+ movdqu xmm2,XMMWORD PTR[32+r9]
+ movdqu xmm3,XMMWORD PTR[48+r9]
+DB 102,15,56,0,198
+ add r9,64
+DB 102,15,56,0,206
+DB 102,15,56,0,214
+DB 102,15,56,0,222
+ paddd xmm0,xmm9
+ paddd xmm1,xmm9
+ paddd xmm2,xmm9
+ movdqa XMMWORD PTR[rsp],xmm0
+ psubd xmm0,xmm9
+ movdqa XMMWORD PTR[16+rsp],xmm1
+ psubd xmm1,xmm9
+ movdqa XMMWORD PTR[32+rsp],xmm2
+ psubd xmm2,xmm9
+ jmp $L$oop_ssse3
+ALIGN 16
+$L$oop_ssse3::
+ movdqa xmm4,xmm1
+ add ebp,DWORD PTR[rsp]
+ xor ecx,edx
+ movdqa xmm8,xmm3
+DB 102,15,58,15,224,8
+ mov edi,eax
+ rol eax,5
+ paddd xmm9,xmm3
+ and esi,ecx
+ xor ecx,edx
+ psrldq xmm8,4
+ xor esi,edx
+ add ebp,eax
+ pxor xmm4,xmm0
+ ror ebx,2
+ add ebp,esi
+ pxor xmm8,xmm2
+ add edx,DWORD PTR[4+rsp]
+ xor ebx,ecx
+ mov esi,ebp
+ rol ebp,5
+ pxor xmm4,xmm8
+ and edi,ebx
+ xor ebx,ecx
+ movdqa XMMWORD PTR[48+rsp],xmm9
+ xor edi,ecx
+ add edx,ebp
+ movdqa xmm10,xmm4
+ movdqa xmm8,xmm4
+ ror eax,7
+ add edx,edi
+ add ecx,DWORD PTR[8+rsp]
+ xor eax,ebx
+ pslldq xmm10,12
+ paddd xmm4,xmm4
+ mov edi,edx
+ rol edx,5
+ and esi,eax
+ xor eax,ebx
+ psrld xmm8,31
+ xor esi,ebx
+ add ecx,edx
+ movdqa xmm9,xmm10
+ ror ebp,7
+ add ecx,esi
+ psrld xmm10,30
+ por xmm4,xmm8
+ add ebx,DWORD PTR[12+rsp]
+ xor ebp,eax
+ mov esi,ecx
+ rol ecx,5
+ pslld xmm9,2
+ pxor xmm4,xmm10
+ and edi,ebp
+ xor ebp,eax
+ movdqa xmm10,XMMWORD PTR[r11]
+ xor edi,eax
+ add ebx,ecx
+ pxor xmm4,xmm9
+ ror edx,7
+ add ebx,edi
+ movdqa xmm5,xmm2
+ add eax,DWORD PTR[16+rsp]
+ xor edx,ebp
+ movdqa xmm9,xmm4
+DB 102,15,58,15,233,8
+ mov edi,ebx
+ rol ebx,5
+ paddd xmm10,xmm4
+ and esi,edx
+ xor edx,ebp
+ psrldq xmm9,4
+ xor esi,ebp
+ add eax,ebx
+ pxor xmm5,xmm1
+ ror ecx,7
+ add eax,esi
+ pxor xmm9,xmm3
+ add ebp,DWORD PTR[20+rsp]
+ xor ecx,edx
+ mov esi,eax
+ rol eax,5
+ pxor xmm5,xmm9
+ and edi,ecx
+ xor ecx,edx
+ movdqa XMMWORD PTR[rsp],xmm10
+ xor edi,edx
+ add ebp,eax
+ movdqa xmm8,xmm5
+ movdqa xmm9,xmm5
+ ror ebx,7
+ add ebp,edi
+ add edx,DWORD PTR[24+rsp]
+ xor ebx,ecx
+ pslldq xmm8,12
+ paddd xmm5,xmm5
+ mov edi,ebp
+ rol ebp,5
+ and esi,ebx
+ xor ebx,ecx
+ psrld xmm9,31
+ xor esi,ecx
+ add edx,ebp
+ movdqa xmm10,xmm8
+ ror eax,7
+ add edx,esi
+ psrld xmm8,30
+ por xmm5,xmm9
+ add ecx,DWORD PTR[28+rsp]
+ xor eax,ebx
+ mov esi,edx
+ rol edx,5
+ pslld xmm10,2
+ pxor xmm5,xmm8
+ and edi,eax
+ xor eax,ebx
+ movdqa xmm8,XMMWORD PTR[16+r11]
+ xor edi,ebx
+ add ecx,edx
+ pxor xmm5,xmm10
+ ror ebp,7
+ add ecx,edi
+ movdqa xmm6,xmm3
+ add ebx,DWORD PTR[32+rsp]
+ xor ebp,eax
+ movdqa xmm10,xmm5
+DB 102,15,58,15,242,8
+ mov edi,ecx
+ rol ecx,5
+ paddd xmm8,xmm5
+ and esi,ebp
+ xor ebp,eax
+ psrldq xmm10,4
+ xor esi,eax
+ add ebx,ecx
+ pxor xmm6,xmm2
+ ror edx,7
+ add ebx,esi
+ pxor xmm10,xmm4
+ add eax,DWORD PTR[36+rsp]
+ xor edx,ebp
+ mov esi,ebx
+ rol ebx,5
+ pxor xmm6,xmm10
+ and edi,edx
+ xor edx,ebp
+ movdqa XMMWORD PTR[16+rsp],xmm8
+ xor edi,ebp
+ add eax,ebx
+ movdqa xmm9,xmm6
+ movdqa xmm10,xmm6
+ ror ecx,7
+ add eax,edi
+ add ebp,DWORD PTR[40+rsp]
+ xor ecx,edx
+ pslldq xmm9,12
+ paddd xmm6,xmm6
+ mov edi,eax
+ rol eax,5
+ and esi,ecx
+ xor ecx,edx
+ psrld xmm10,31
+ xor esi,edx
+ add ebp,eax
+ movdqa xmm8,xmm9
+ ror ebx,7
+ add ebp,esi
+ psrld xmm9,30
+ por xmm6,xmm10
+ add edx,DWORD PTR[44+rsp]
+ xor ebx,ecx
+ mov esi,ebp
+ rol ebp,5
+ pslld xmm8,2
+ pxor xmm6,xmm9
+ and edi,ebx
+ xor ebx,ecx
+ movdqa xmm9,XMMWORD PTR[16+r11]
+ xor edi,ecx
+ add edx,ebp
+ pxor xmm6,xmm8
+ ror eax,7
+ add edx,edi
+ movdqa xmm7,xmm4
+ add ecx,DWORD PTR[48+rsp]
+ xor eax,ebx
+ movdqa xmm8,xmm6
+DB 102,15,58,15,251,8
+ mov edi,edx
+ rol edx,5
+ paddd xmm9,xmm6
+ and esi,eax
+ xor eax,ebx
+ psrldq xmm8,4
+ xor esi,ebx
+ add ecx,edx
+ pxor xmm7,xmm3
+ ror ebp,7
+ add ecx,esi
+ pxor xmm8,xmm5
+ add ebx,DWORD PTR[52+rsp]
+ xor ebp,eax
+ mov esi,ecx
+ rol ecx,5
+ pxor xmm7,xmm8
+ and edi,ebp
+ xor ebp,eax
+ movdqa XMMWORD PTR[32+rsp],xmm9
+ xor edi,eax
+ add ebx,ecx
+ movdqa xmm10,xmm7
+ movdqa xmm8,xmm7
+ ror edx,7
+ add ebx,edi
+ add eax,DWORD PTR[56+rsp]
+ xor edx,ebp
+ pslldq xmm10,12
+ paddd xmm7,xmm7
+ mov edi,ebx
+ rol ebx,5
+ and esi,edx
+ xor edx,ebp
+ psrld xmm8,31
+ xor esi,ebp
+ add eax,ebx
+ movdqa xmm9,xmm10
+ ror ecx,7
+ add eax,esi
+ psrld xmm10,30
+ por xmm7,xmm8
+ add ebp,DWORD PTR[60+rsp]
+ xor ecx,edx
+ mov esi,eax
+ rol eax,5
+ pslld xmm9,2
+ pxor xmm7,xmm10
+ and edi,ecx
+ xor ecx,edx
+ movdqa xmm10,XMMWORD PTR[16+r11]
+ xor edi,edx
+ add ebp,eax
+ pxor xmm7,xmm9
+ ror ebx,7
+ add ebp,edi
+ movdqa xmm9,xmm7
+ add edx,DWORD PTR[rsp]
+ pxor xmm0,xmm4
+DB 102,68,15,58,15,206,8
+ xor ebx,ecx
+ mov edi,ebp
+ rol ebp,5
+ pxor xmm0,xmm1
+ and esi,ebx
+ xor ebx,ecx
+ movdqa xmm8,xmm10
+ paddd xmm10,xmm7
+ xor esi,ecx
+ add edx,ebp
+ pxor xmm0,xmm9
+ ror eax,7
+ add edx,esi
+ add ecx,DWORD PTR[4+rsp]
+ xor eax,ebx
+ movdqa xmm9,xmm0
+ movdqa XMMWORD PTR[48+rsp],xmm10
+ mov esi,edx
+ rol edx,5
+ and edi,eax
+ xor eax,ebx
+ pslld xmm0,2
+ xor edi,ebx
+ add ecx,edx
+ psrld xmm9,30
+ ror ebp,7
+ add ecx,edi
+ add ebx,DWORD PTR[8+rsp]
+ xor ebp,eax
+ mov edi,ecx
+ rol ecx,5
+ por xmm0,xmm9
+ and esi,ebp
+ xor ebp,eax
+ movdqa xmm10,xmm0
+ xor esi,eax
+ add ebx,ecx
+ ror edx,7
+ add ebx,esi
+ add eax,DWORD PTR[12+rsp]
+ xor edx,ebp
+ mov esi,ebx
+ rol ebx,5
+ and edi,edx
+ xor edx,ebp
+ xor edi,ebp
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ add ebp,DWORD PTR[16+rsp]
+ pxor xmm1,xmm5
+DB 102,68,15,58,15,215,8
+ xor esi,edx
+ mov edi,eax
+ rol eax,5
+ pxor xmm1,xmm2
+ xor esi,ecx
+ add ebp,eax
+ movdqa xmm9,xmm8
+ paddd xmm8,xmm0
+ ror ebx,7
+ add ebp,esi
+ pxor xmm1,xmm10
+ add edx,DWORD PTR[20+rsp]
+ xor edi,ecx
+ mov esi,ebp
+ rol ebp,5
+ movdqa xmm10,xmm1
+ movdqa XMMWORD PTR[rsp],xmm8
+ xor edi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,edi
+ pslld xmm1,2
+ add ecx,DWORD PTR[24+rsp]
+ xor esi,ebx
+ psrld xmm10,30
+ mov edi,edx
+ rol edx,5
+ xor esi,eax
+ add ecx,edx
+ ror ebp,7
+ add ecx,esi
+ por xmm1,xmm10
+ add ebx,DWORD PTR[28+rsp]
+ xor edi,eax
+ movdqa xmm8,xmm1
+ mov esi,ecx
+ rol ecx,5
+ xor edi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,edi
+ add eax,DWORD PTR[32+rsp]
+ pxor xmm2,xmm6
+DB 102,68,15,58,15,192,8
+ xor esi,ebp
+ mov edi,ebx
+ rol ebx,5
+ pxor xmm2,xmm3
+ xor esi,edx
+ add eax,ebx
+ movdqa xmm10,XMMWORD PTR[32+r11]
+ paddd xmm9,xmm1
+ ror ecx,7
+ add eax,esi
+ pxor xmm2,xmm8
+ add ebp,DWORD PTR[36+rsp]
+ xor edi,edx
+ mov esi,eax
+ rol eax,5
+ movdqa xmm8,xmm2
+ movdqa XMMWORD PTR[16+rsp],xmm9
+ xor edi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,edi
+ pslld xmm2,2
+ add edx,DWORD PTR[40+rsp]
+ xor esi,ecx
+ psrld xmm8,30
+ mov edi,ebp
+ rol ebp,5
+ xor esi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,esi
+ por xmm2,xmm8
+ add ecx,DWORD PTR[44+rsp]
+ xor edi,ebx
+ movdqa xmm9,xmm2
+ mov esi,edx
+ rol edx,5
+ xor edi,eax
+ add ecx,edx
+ ror ebp,7
+ add ecx,edi
+ add ebx,DWORD PTR[48+rsp]
+ pxor xmm3,xmm7
+DB 102,68,15,58,15,201,8
+ xor esi,eax
+ mov edi,ecx
+ rol ecx,5
+ pxor xmm3,xmm4
+ xor esi,ebp
+ add ebx,ecx
+ movdqa xmm8,xmm10
+ paddd xmm10,xmm2
+ ror edx,7
+ add ebx,esi
+ pxor xmm3,xmm9
+ add eax,DWORD PTR[52+rsp]
+ xor edi,ebp
+ mov esi,ebx
+ rol ebx,5
+ movdqa xmm9,xmm3
+ movdqa XMMWORD PTR[32+rsp],xmm10
+ xor edi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ pslld xmm3,2
+ add ebp,DWORD PTR[56+rsp]
+ xor esi,edx
+ psrld xmm9,30
+ mov edi,eax
+ rol eax,5
+ xor esi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,esi
+ por xmm3,xmm9
+ add edx,DWORD PTR[60+rsp]
+ xor edi,ecx
+ movdqa xmm10,xmm3
+ mov esi,ebp
+ rol ebp,5
+ xor edi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,edi
+ add ecx,DWORD PTR[rsp]
+ pxor xmm4,xmm0
+DB 102,68,15,58,15,210,8
+ xor esi,ebx
+ mov edi,edx
+ rol edx,5
+ pxor xmm4,xmm5
+ xor esi,eax
+ add ecx,edx
+ movdqa xmm9,xmm8
+ paddd xmm8,xmm3
+ ror ebp,7
+ add ecx,esi
+ pxor xmm4,xmm10
+ add ebx,DWORD PTR[4+rsp]
+ xor edi,eax
+ mov esi,ecx
+ rol ecx,5
+ movdqa xmm10,xmm4
+ movdqa XMMWORD PTR[48+rsp],xmm8
+ xor edi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,edi
+ pslld xmm4,2
+ add eax,DWORD PTR[8+rsp]
+ xor esi,ebp
+ psrld xmm10,30
+ mov edi,ebx
+ rol ebx,5
+ xor esi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,esi
+ por xmm4,xmm10
+ add ebp,DWORD PTR[12+rsp]
+ xor edi,edx
+ movdqa xmm8,xmm4
+ mov esi,eax
+ rol eax,5
+ xor edi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,edi
+ add edx,DWORD PTR[16+rsp]
+ pxor xmm5,xmm1
+DB 102,68,15,58,15,195,8
+ xor esi,ecx
+ mov edi,ebp
+ rol ebp,5
+ pxor xmm5,xmm6
+ xor esi,ebx
+ add edx,ebp
+ movdqa xmm10,xmm9
+ paddd xmm9,xmm4
+ ror eax,7
+ add edx,esi
+ pxor xmm5,xmm8
+ add ecx,DWORD PTR[20+rsp]
+ xor edi,ebx
+ mov esi,edx
+ rol edx,5
+ movdqa xmm8,xmm5
+ movdqa XMMWORD PTR[rsp],xmm9
+ xor edi,eax
+ add ecx,edx
+ ror ebp,7
+ add ecx,edi
+ pslld xmm5,2
+ add ebx,DWORD PTR[24+rsp]
+ xor esi,eax
+ psrld xmm8,30
+ mov edi,ecx
+ rol ecx,5
+ xor esi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,esi
+ por xmm5,xmm8
+ add eax,DWORD PTR[28+rsp]
+ xor edi,ebp
+ movdqa xmm9,xmm5
+ mov esi,ebx
+ rol ebx,5
+ xor edi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ mov edi,ecx
+ pxor xmm6,xmm2
+DB 102,68,15,58,15,204,8
+ xor ecx,edx
+ add ebp,DWORD PTR[32+rsp]
+ and edi,edx
+ pxor xmm6,xmm7
+ and esi,ecx
+ ror ebx,7
+ movdqa xmm8,xmm10
+ paddd xmm10,xmm5
+ add ebp,edi
+ mov edi,eax
+ pxor xmm6,xmm9
+ rol eax,5
+ add ebp,esi
+ xor ecx,edx
+ add ebp,eax
+ movdqa xmm9,xmm6
+ movdqa XMMWORD PTR[16+rsp],xmm10
+ mov esi,ebx
+ xor ebx,ecx
+ add edx,DWORD PTR[36+rsp]
+ and esi,ecx
+ pslld xmm6,2
+ and edi,ebx
+ ror eax,7
+ psrld xmm9,30
+ add edx,esi
+ mov esi,ebp
+ rol ebp,5
+ add edx,edi
+ xor ebx,ecx
+ add edx,ebp
+ por xmm6,xmm9
+ mov edi,eax
+ xor eax,ebx
+ movdqa xmm10,xmm6
+ add ecx,DWORD PTR[40+rsp]
+ and edi,ebx
+ and esi,eax
+ ror ebp,7
+ add ecx,edi
+ mov edi,edx
+ rol edx,5
+ add ecx,esi
+ xor eax,ebx
+ add ecx,edx
+ mov esi,ebp
+ xor ebp,eax
+ add ebx,DWORD PTR[44+rsp]
+ and esi,eax
+ and edi,ebp
+ ror edx,7
+ add ebx,esi
+ mov esi,ecx
+ rol ecx,5
+ add ebx,edi
+ xor ebp,eax
+ add ebx,ecx
+ mov edi,edx
+ pxor xmm7,xmm3
+DB 102,68,15,58,15,213,8
+ xor edx,ebp
+ add eax,DWORD PTR[48+rsp]
+ and edi,ebp
+ pxor xmm7,xmm0
+ and esi,edx
+ ror ecx,7
+ movdqa xmm9,XMMWORD PTR[48+r11]
+ paddd xmm8,xmm6
+ add eax,edi
+ mov edi,ebx
+ pxor xmm7,xmm10
+ rol ebx,5
+ add eax,esi
+ xor edx,ebp
+ add eax,ebx
+ movdqa xmm10,xmm7
+ movdqa XMMWORD PTR[32+rsp],xmm8
+ mov esi,ecx
+ xor ecx,edx
+ add ebp,DWORD PTR[52+rsp]
+ and esi,edx
+ pslld xmm7,2
+ and edi,ecx
+ ror ebx,7
+ psrld xmm10,30
+ add ebp,esi
+ mov esi,eax
+ rol eax,5
+ add ebp,edi
+ xor ecx,edx
+ add ebp,eax
+ por xmm7,xmm10
+ mov edi,ebx
+ xor ebx,ecx
+ movdqa xmm8,xmm7
+ add edx,DWORD PTR[56+rsp]
+ and edi,ecx
+ and esi,ebx
+ ror eax,7
+ add edx,edi
+ mov edi,ebp
+ rol ebp,5
+ add edx,esi
+ xor ebx,ecx
+ add edx,ebp
+ mov esi,eax
+ xor eax,ebx
+ add ecx,DWORD PTR[60+rsp]
+ and esi,ebx
+ and edi,eax
+ ror ebp,7
+ add ecx,esi
+ mov esi,edx
+ rol edx,5
+ add ecx,edi
+ xor eax,ebx
+ add ecx,edx
+ mov edi,ebp
+ pxor xmm0,xmm4
+DB 102,68,15,58,15,198,8
+ xor ebp,eax
+ add ebx,DWORD PTR[rsp]
+ and edi,eax
+ pxor xmm0,xmm1
+ and esi,ebp
+ ror edx,7
+ movdqa xmm10,xmm9
+ paddd xmm9,xmm7
+ add ebx,edi
+ mov edi,ecx
+ pxor xmm0,xmm8
+ rol ecx,5
+ add ebx,esi
+ xor ebp,eax
+ add ebx,ecx
+ movdqa xmm8,xmm0
+ movdqa XMMWORD PTR[48+rsp],xmm9
+ mov esi,edx
+ xor edx,ebp
+ add eax,DWORD PTR[4+rsp]
+ and esi,ebp
+ pslld xmm0,2
+ and edi,edx
+ ror ecx,7
+ psrld xmm8,30
+ add eax,esi
+ mov esi,ebx
+ rol ebx,5
+ add eax,edi
+ xor edx,ebp
+ add eax,ebx
+ por xmm0,xmm8
+ mov edi,ecx
+ xor ecx,edx
+ movdqa xmm9,xmm0
+ add ebp,DWORD PTR[8+rsp]
+ and edi,edx
+ and esi,ecx
+ ror ebx,7
+ add ebp,edi
+ mov edi,eax
+ rol eax,5
+ add ebp,esi
+ xor ecx,edx
+ add ebp,eax
+ mov esi,ebx
+ xor ebx,ecx
+ add edx,DWORD PTR[12+rsp]
+ and esi,ecx
+ and edi,ebx
+ ror eax,7
+ add edx,esi
+ mov esi,ebp
+ rol ebp,5
+ add edx,edi
+ xor ebx,ecx
+ add edx,ebp
+ mov edi,eax
+ pxor xmm1,xmm5
+DB 102,68,15,58,15,207,8
+ xor eax,ebx
+ add ecx,DWORD PTR[16+rsp]
+ and edi,ebx
+ pxor xmm1,xmm2
+ and esi,eax
+ ror ebp,7
+ movdqa xmm8,xmm10
+ paddd xmm10,xmm0
+ add ecx,edi
+ mov edi,edx
+ pxor xmm1,xmm9
+ rol edx,5
+ add ecx,esi
+ xor eax,ebx
+ add ecx,edx
+ movdqa xmm9,xmm1
+ movdqa XMMWORD PTR[rsp],xmm10
+ mov esi,ebp
+ xor ebp,eax
+ add ebx,DWORD PTR[20+rsp]
+ and esi,eax
+ pslld xmm1,2
+ and edi,ebp
+ ror edx,7
+ psrld xmm9,30
+ add ebx,esi
+ mov esi,ecx
+ rol ecx,5
+ add ebx,edi
+ xor ebp,eax
+ add ebx,ecx
+ por xmm1,xmm9
+ mov edi,edx
+ xor edx,ebp
+ movdqa xmm10,xmm1
+ add eax,DWORD PTR[24+rsp]
+ and edi,ebp
+ and esi,edx
+ ror ecx,7
+ add eax,edi
+ mov edi,ebx
+ rol ebx,5
+ add eax,esi
+ xor edx,ebp
+ add eax,ebx
+ mov esi,ecx
+ xor ecx,edx
+ add ebp,DWORD PTR[28+rsp]
+ and esi,edx
+ and edi,ecx
+ ror ebx,7
+ add ebp,esi
+ mov esi,eax
+ rol eax,5
+ add ebp,edi
+ xor ecx,edx
+ add ebp,eax
+ mov edi,ebx
+ pxor xmm2,xmm6
+DB 102,68,15,58,15,208,8
+ xor ebx,ecx
+ add edx,DWORD PTR[32+rsp]
+ and edi,ecx
+ pxor xmm2,xmm3
+ and esi,ebx
+ ror eax,7
+ movdqa xmm9,xmm8
+ paddd xmm8,xmm1
+ add edx,edi
+ mov edi,ebp
+ pxor xmm2,xmm10
+ rol ebp,5
+ add edx,esi
+ xor ebx,ecx
+ add edx,ebp
+ movdqa xmm10,xmm2
+ movdqa XMMWORD PTR[16+rsp],xmm8
+ mov esi,eax
+ xor eax,ebx
+ add ecx,DWORD PTR[36+rsp]
+ and esi,ebx
+ pslld xmm2,2
+ and edi,eax
+ ror ebp,7
+ psrld xmm10,30
+ add ecx,esi
+ mov esi,edx
+ rol edx,5
+ add ecx,edi
+ xor eax,ebx
+ add ecx,edx
+ por xmm2,xmm10
+ mov edi,ebp
+ xor ebp,eax
+ movdqa xmm8,xmm2
+ add ebx,DWORD PTR[40+rsp]
+ and edi,eax
+ and esi,ebp
+ ror edx,7
+ add ebx,edi
+ mov edi,ecx
+ rol ecx,5
+ add ebx,esi
+ xor ebp,eax
+ add ebx,ecx
+ mov esi,edx
+ xor edx,ebp
+ add eax,DWORD PTR[44+rsp]
+ and esi,ebp
+ and edi,edx
+ ror ecx,7
+ add eax,esi
+ mov esi,ebx
+ rol ebx,5
+ add eax,edi
+ xor edx,ebp
+ add eax,ebx
+ add ebp,DWORD PTR[48+rsp]
+ pxor xmm3,xmm7
+DB 102,68,15,58,15,193,8
+ xor esi,edx
+ mov edi,eax
+ rol eax,5
+ pxor xmm3,xmm4
+ xor esi,ecx
+ add ebp,eax
+ movdqa xmm10,xmm9
+ paddd xmm9,xmm2
+ ror ebx,7
+ add ebp,esi
+ pxor xmm3,xmm8
+ add edx,DWORD PTR[52+rsp]
+ xor edi,ecx
+ mov esi,ebp
+ rol ebp,5
+ movdqa xmm8,xmm3
+ movdqa XMMWORD PTR[32+rsp],xmm9
+ xor edi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,edi
+ pslld xmm3,2
+ add ecx,DWORD PTR[56+rsp]
+ xor esi,ebx
+ psrld xmm8,30
+ mov edi,edx
+ rol edx,5
+ xor esi,eax
+ add ecx,edx
+ ror ebp,7
+ add ecx,esi
+ por xmm3,xmm8
+ add ebx,DWORD PTR[60+rsp]
+ xor edi,eax
+ mov esi,ecx
+ rol ecx,5
+ xor edi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,edi
+ add eax,DWORD PTR[rsp]
+ paddd xmm10,xmm3
+ xor esi,ebp
+ mov edi,ebx
+ rol ebx,5
+ xor esi,edx
+ movdqa XMMWORD PTR[48+rsp],xmm10
+ add eax,ebx
+ ror ecx,7
+ add eax,esi
+ add ebp,DWORD PTR[4+rsp]
+ xor edi,edx
+ mov esi,eax
+ rol eax,5
+ xor edi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,edi
+ add edx,DWORD PTR[8+rsp]
+ xor esi,ecx
+ mov edi,ebp
+ rol ebp,5
+ xor esi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,esi
+ add ecx,DWORD PTR[12+rsp]
+ xor edi,ebx
+ mov esi,edx
+ rol edx,5
+ xor edi,eax
+ add ecx,edx
+ ror ebp,7
+ add ecx,edi
+ cmp r9,r10
+ je $L$done_ssse3
+ movdqa xmm6,XMMWORD PTR[64+r11]
+ movdqa xmm9,XMMWORD PTR[r11]
+ movdqu xmm0,XMMWORD PTR[r9]
+ movdqu xmm1,XMMWORD PTR[16+r9]
+ movdqu xmm2,XMMWORD PTR[32+r9]
+ movdqu xmm3,XMMWORD PTR[48+r9]
+DB 102,15,56,0,198
+ add r9,64
+ add ebx,DWORD PTR[16+rsp]
+ xor esi,eax
+DB 102,15,56,0,206
+ mov edi,ecx
+ rol ecx,5
+ paddd xmm0,xmm9
+ xor esi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,esi
+ movdqa XMMWORD PTR[rsp],xmm0
+ add eax,DWORD PTR[20+rsp]
+ xor edi,ebp
+ psubd xmm0,xmm9
+ mov esi,ebx
+ rol ebx,5
+ xor edi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ add ebp,DWORD PTR[24+rsp]
+ xor esi,edx
+ mov edi,eax
+ rol eax,5
+ xor esi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,esi
+ add edx,DWORD PTR[28+rsp]
+ xor edi,ecx
+ mov esi,ebp
+ rol ebp,5
+ xor edi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,edi
+ add ecx,DWORD PTR[32+rsp]
+ xor esi,ebx
+DB 102,15,56,0,214
+ mov edi,edx
+ rol edx,5
+ paddd xmm1,xmm9
+ xor esi,eax
+ add ecx,edx
+ ror ebp,7
+ add ecx,esi
+ movdqa XMMWORD PTR[16+rsp],xmm1
+ add ebx,DWORD PTR[36+rsp]
+ xor edi,eax
+ psubd xmm1,xmm9
+ mov esi,ecx
+ rol ecx,5
+ xor edi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,edi
+ add eax,DWORD PTR[40+rsp]
+ xor esi,ebp
+ mov edi,ebx
+ rol ebx,5
+ xor esi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,esi
+ add ebp,DWORD PTR[44+rsp]
+ xor edi,edx
+ mov esi,eax
+ rol eax,5
+ xor edi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,edi
+ add edx,DWORD PTR[48+rsp]
+ xor esi,ecx
+DB 102,15,56,0,222
+ mov edi,ebp
+ rol ebp,5
+ paddd xmm2,xmm9
+ xor esi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,esi
+ movdqa XMMWORD PTR[32+rsp],xmm2
+ add ecx,DWORD PTR[52+rsp]
+ xor edi,ebx
+ psubd xmm2,xmm9
+ mov esi,edx
+ rol edx,5
+ xor edi,eax
+ add ecx,edx
+ ror ebp,7
+ add ecx,edi
+ add ebx,DWORD PTR[56+rsp]
+ xor esi,eax
+ mov edi,ecx
+ rol ecx,5
+ xor esi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,esi
+ add eax,DWORD PTR[60+rsp]
+ xor edi,ebp
+ mov esi,ebx
+ rol ebx,5
+ xor edi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ add eax,DWORD PTR[r8]
+ add esi,DWORD PTR[4+r8]
+ add ecx,DWORD PTR[8+r8]
+ add edx,DWORD PTR[12+r8]
+ mov DWORD PTR[r8],eax
+ add ebp,DWORD PTR[16+r8]
+ mov DWORD PTR[4+r8],esi
+ mov ebx,esi
+ mov DWORD PTR[8+r8],ecx
+ mov DWORD PTR[12+r8],edx
+ mov DWORD PTR[16+r8],ebp
+ jmp $L$oop_ssse3
+
+ALIGN 16
+$L$done_ssse3::
+ add ebx,DWORD PTR[16+rsp]
+ xor esi,eax
+ mov edi,ecx
+ rol ecx,5
+ xor esi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,esi
+ add eax,DWORD PTR[20+rsp]
+ xor edi,ebp
+ mov esi,ebx
+ rol ebx,5
+ xor edi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ add ebp,DWORD PTR[24+rsp]
+ xor esi,edx
+ mov edi,eax
+ rol eax,5
+ xor esi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,esi
+ add edx,DWORD PTR[28+rsp]
+ xor edi,ecx
+ mov esi,ebp
+ rol ebp,5
+ xor edi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,edi
+ add ecx,DWORD PTR[32+rsp]
+ xor esi,ebx
+ mov edi,edx
+ rol edx,5
+ xor esi,eax
+ add ecx,edx
+ ror ebp,7
+ add ecx,esi
+ add ebx,DWORD PTR[36+rsp]
+ xor edi,eax
+ mov esi,ecx
+ rol ecx,5
+ xor edi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,edi
+ add eax,DWORD PTR[40+rsp]
+ xor esi,ebp
+ mov edi,ebx
+ rol ebx,5
+ xor esi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,esi
+ add ebp,DWORD PTR[44+rsp]
+ xor edi,edx
+ mov esi,eax
+ rol eax,5
+ xor edi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,edi
+ add edx,DWORD PTR[48+rsp]
+ xor esi,ecx
+ mov edi,ebp
+ rol ebp,5
+ xor esi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,esi
+ add ecx,DWORD PTR[52+rsp]
+ xor edi,ebx
+ mov esi,edx
+ rol edx,5
+ xor edi,eax
+ add ecx,edx
+ ror ebp,7
+ add ecx,edi
+ add ebx,DWORD PTR[56+rsp]
+ xor esi,eax
+ mov edi,ecx
+ rol ecx,5
+ xor esi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,esi
+ add eax,DWORD PTR[60+rsp]
+ xor edi,ebp
+ mov esi,ebx
+ rol ebx,5
+ xor edi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ add eax,DWORD PTR[r8]
+ add esi,DWORD PTR[4+r8]
+ add ecx,DWORD PTR[8+r8]
+ mov DWORD PTR[r8],eax
+ add edx,DWORD PTR[12+r8]
+ mov DWORD PTR[4+r8],esi
+ add ebp,DWORD PTR[16+r8]
+ mov DWORD PTR[8+r8],ecx
+ mov DWORD PTR[12+r8],edx
+ mov DWORD PTR[16+r8],ebp
+ movaps xmm6,XMMWORD PTR[((64+0))+rsp]
+ movaps xmm7,XMMWORD PTR[((64+16))+rsp]
+ movaps xmm8,XMMWORD PTR[((64+32))+rsp]
+ movaps xmm9,XMMWORD PTR[((64+48))+rsp]
+ movaps xmm10,XMMWORD PTR[((64+64))+rsp]
+ lea rsi,QWORD PTR[144+rsp]
+ mov r12,QWORD PTR[rsi]
+ mov rbp,QWORD PTR[8+rsi]
+ mov rbx,QWORD PTR[16+rsi]
+ lea rsp,QWORD PTR[24+rsi]
+$L$epilogue_ssse3::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_sha1_block_data_order_ssse3::
+sha1_block_data_order_ssse3 ENDP
+ALIGN 64
+K_XX_XX::
+ DD 05a827999h,05a827999h,05a827999h,05a827999h
+ DD 06ed9eba1h,06ed9eba1h,06ed9eba1h,06ed9eba1h
+ DD 08f1bbcdch,08f1bbcdch,08f1bbcdch,08f1bbcdch
+ DD 0ca62c1d6h,0ca62c1d6h,0ca62c1d6h,0ca62c1d6h
+ DD 000010203h,004050607h,008090a0bh,00c0d0e0fh
+DB 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115
+DB 102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44
+DB 32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60
+DB 97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114
+DB 103,62,0
+ALIGN 64
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ lea r10,QWORD PTR[$L$prologue]
+ cmp rbx,r10
+ jb $L$common_seh_tail
+
+ mov rax,QWORD PTR[152+r8]
+
+ lea r10,QWORD PTR[$L$epilogue]
+ cmp rbx,r10
+ jae $L$common_seh_tail
+
+ mov rax,QWORD PTR[64+rax]
+ lea rax,QWORD PTR[32+rax]
+
+ mov rbx,QWORD PTR[((-8))+rax]
+ mov rbp,QWORD PTR[((-16))+rax]
+ mov r12,QWORD PTR[((-24))+rax]
+ mov r13,QWORD PTR[((-32))+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+
+ jmp $L$common_seh_tail
+se_handler ENDP
+
+
+ALIGN 16
+ssse3_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$common_seh_tail
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$common_seh_tail
+
+ lea rsi,QWORD PTR[64+rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,10
+ DD 0a548f3fch
+ lea rax,QWORD PTR[168+rax]
+
+ mov rbx,QWORD PTR[((-8))+rax]
+ mov rbp,QWORD PTR[((-16))+rax]
+ mov r12,QWORD PTR[((-24))+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+
+$L$common_seh_tail::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+ssse3_handler ENDP
+
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$SEH_begin_sha1_block_data_order
+ DD imagerel $L$SEH_end_sha1_block_data_order
+ DD imagerel $L$SEH_info_sha1_block_data_order
+ DD imagerel $L$SEH_begin_sha1_block_data_order_ssse3
+ DD imagerel $L$SEH_end_sha1_block_data_order_ssse3
+ DD imagerel $L$SEH_info_sha1_block_data_order_ssse3
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$SEH_info_sha1_block_data_order::
+DB 9,0,0,0
+ DD imagerel se_handler
+$L$SEH_info_sha1_block_data_order_ssse3::
+DB 9,0,0,0
+ DD imagerel ssse3_handler
+ DD imagerel $L$prologue_ssse3,imagerel $L$epilogue_ssse3
+
+.xdata ENDS
+END
+
diff --git a/ext/libressl/crypto/sha/sha1-mingw64-x86_64.S b/ext/libressl/crypto/sha/sha1-mingw64-x86_64.S
new file mode 100644
index 0000000..3ce9fc9
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha1-mingw64-x86_64.S
@@ -0,0 +1,2664 @@
+#include "x86_arch.h"
+.text
+
+
+
+.globl sha1_block_data_order
+.def sha1_block_data_order; .scl 2; .type 32; .endef
+.p2align 4
+sha1_block_data_order:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_sha1_block_data_order:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+
+ movl OPENSSL_ia32cap_P+0(%rip),%r9d
+ movl OPENSSL_ia32cap_P+4(%rip),%r8d
+ testl $IA32CAP_MASK1_SSSE3,%r8d
+ jz .Lialu
+ jmp _ssse3_shortcut
+
+.p2align 4
+.Lialu:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ movq %rsp,%r11
+ movq %rdi,%r8
+ subq $72,%rsp
+ movq %rsi,%r9
+ andq $-64,%rsp
+ movq %rdx,%r10
+ movq %r11,64(%rsp)
+.Lprologue:
+
+ movl 0(%r8),%esi
+ movl 4(%r8),%edi
+ movl 8(%r8),%r11d
+ movl 12(%r8),%r12d
+ movl 16(%r8),%r13d
+ jmp .Lloop
+
+.p2align 4
+.Lloop:
+ movl 0(%r9),%edx
+ bswapl %edx
+ movl %edx,0(%rsp)
+ movl %r11d,%eax
+ movl 4(%r9),%ebp
+ movl %esi,%ecx
+ xorl %r12d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r13,1),%r13d
+ andl %edi,%eax
+ movl %ebp,4(%rsp)
+ addl %ecx,%r13d
+ xorl %r12d,%eax
+ roll $30,%edi
+ addl %eax,%r13d
+ movl %edi,%eax
+ movl 8(%r9),%edx
+ movl %r13d,%ecx
+ xorl %r11d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r12,1),%r12d
+ andl %esi,%eax
+ movl %edx,8(%rsp)
+ addl %ecx,%r12d
+ xorl %r11d,%eax
+ roll $30,%esi
+ addl %eax,%r12d
+ movl %esi,%eax
+ movl 12(%r9),%ebp
+ movl %r12d,%ecx
+ xorl %edi,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r11,1),%r11d
+ andl %r13d,%eax
+ movl %ebp,12(%rsp)
+ addl %ecx,%r11d
+ xorl %edi,%eax
+ roll $30,%r13d
+ addl %eax,%r11d
+ movl %r13d,%eax
+ movl 16(%r9),%edx
+ movl %r11d,%ecx
+ xorl %esi,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%rdi,1),%edi
+ andl %r12d,%eax
+ movl %edx,16(%rsp)
+ addl %ecx,%edi
+ xorl %esi,%eax
+ roll $30,%r12d
+ addl %eax,%edi
+ movl %r12d,%eax
+ movl 20(%r9),%ebp
+ movl %edi,%ecx
+ xorl %r13d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%rsi,1),%esi
+ andl %r11d,%eax
+ movl %ebp,20(%rsp)
+ addl %ecx,%esi
+ xorl %r13d,%eax
+ roll $30,%r11d
+ addl %eax,%esi
+ movl %r11d,%eax
+ movl 24(%r9),%edx
+ movl %esi,%ecx
+ xorl %r12d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r13,1),%r13d
+ andl %edi,%eax
+ movl %edx,24(%rsp)
+ addl %ecx,%r13d
+ xorl %r12d,%eax
+ roll $30,%edi
+ addl %eax,%r13d
+ movl %edi,%eax
+ movl 28(%r9),%ebp
+ movl %r13d,%ecx
+ xorl %r11d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r12,1),%r12d
+ andl %esi,%eax
+ movl %ebp,28(%rsp)
+ addl %ecx,%r12d
+ xorl %r11d,%eax
+ roll $30,%esi
+ addl %eax,%r12d
+ movl %esi,%eax
+ movl 32(%r9),%edx
+ movl %r12d,%ecx
+ xorl %edi,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r11,1),%r11d
+ andl %r13d,%eax
+ movl %edx,32(%rsp)
+ addl %ecx,%r11d
+ xorl %edi,%eax
+ roll $30,%r13d
+ addl %eax,%r11d
+ movl %r13d,%eax
+ movl 36(%r9),%ebp
+ movl %r11d,%ecx
+ xorl %esi,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%rdi,1),%edi
+ andl %r12d,%eax
+ movl %ebp,36(%rsp)
+ addl %ecx,%edi
+ xorl %esi,%eax
+ roll $30,%r12d
+ addl %eax,%edi
+ movl %r12d,%eax
+ movl 40(%r9),%edx
+ movl %edi,%ecx
+ xorl %r13d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%rsi,1),%esi
+ andl %r11d,%eax
+ movl %edx,40(%rsp)
+ addl %ecx,%esi
+ xorl %r13d,%eax
+ roll $30,%r11d
+ addl %eax,%esi
+ movl %r11d,%eax
+ movl 44(%r9),%ebp
+ movl %esi,%ecx
+ xorl %r12d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r13,1),%r13d
+ andl %edi,%eax
+ movl %ebp,44(%rsp)
+ addl %ecx,%r13d
+ xorl %r12d,%eax
+ roll $30,%edi
+ addl %eax,%r13d
+ movl %edi,%eax
+ movl 48(%r9),%edx
+ movl %r13d,%ecx
+ xorl %r11d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r12,1),%r12d
+ andl %esi,%eax
+ movl %edx,48(%rsp)
+ addl %ecx,%r12d
+ xorl %r11d,%eax
+ roll $30,%esi
+ addl %eax,%r12d
+ movl %esi,%eax
+ movl 52(%r9),%ebp
+ movl %r12d,%ecx
+ xorl %edi,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r11,1),%r11d
+ andl %r13d,%eax
+ movl %ebp,52(%rsp)
+ addl %ecx,%r11d
+ xorl %edi,%eax
+ roll $30,%r13d
+ addl %eax,%r11d
+ movl %r13d,%eax
+ movl 56(%r9),%edx
+ movl %r11d,%ecx
+ xorl %esi,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%rdi,1),%edi
+ andl %r12d,%eax
+ movl %edx,56(%rsp)
+ addl %ecx,%edi
+ xorl %esi,%eax
+ roll $30,%r12d
+ addl %eax,%edi
+ movl %r12d,%eax
+ movl 60(%r9),%ebp
+ movl %edi,%ecx
+ xorl %r13d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%rsi,1),%esi
+ andl %r11d,%eax
+ movl %ebp,60(%rsp)
+ addl %ecx,%esi
+ xorl %r13d,%eax
+ roll $30,%r11d
+ addl %eax,%esi
+ movl 0(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 8(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ xorl 32(%rsp),%edx
+ andl %edi,%eax
+ leal 1518500249(%rbp,%r13,1),%r13d
+ xorl 52(%rsp),%edx
+ xorl %r12d,%eax
+ roll $1,%edx
+ addl %ecx,%r13d
+ roll $30,%edi
+ movl %edx,0(%rsp)
+ addl %eax,%r13d
+ movl 4(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ xorl 36(%rsp),%ebp
+ andl %esi,%eax
+ leal 1518500249(%rdx,%r12,1),%r12d
+ xorl 56(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $1,%ebp
+ addl %ecx,%r12d
+ roll $30,%esi
+ movl %ebp,4(%rsp)
+ addl %eax,%r12d
+ movl 8(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 16(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ xorl 40(%rsp),%edx
+ andl %r13d,%eax
+ leal 1518500249(%rbp,%r11,1),%r11d
+ xorl 60(%rsp),%edx
+ xorl %edi,%eax
+ roll $1,%edx
+ addl %ecx,%r11d
+ roll $30,%r13d
+ movl %edx,8(%rsp)
+ addl %eax,%r11d
+ movl 12(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ xorl 44(%rsp),%ebp
+ andl %r12d,%eax
+ leal 1518500249(%rdx,%rdi,1),%edi
+ xorl 0(%rsp),%ebp
+ xorl %esi,%eax
+ roll $1,%ebp
+ addl %ecx,%edi
+ roll $30,%r12d
+ movl %ebp,12(%rsp)
+ addl %eax,%edi
+ movl 16(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 24(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ xorl 48(%rsp),%edx
+ andl %r11d,%eax
+ leal 1518500249(%rbp,%rsi,1),%esi
+ xorl 4(%rsp),%edx
+ xorl %r13d,%eax
+ roll $1,%edx
+ addl %ecx,%esi
+ roll $30,%r11d
+ movl %edx,16(%rsp)
+ addl %eax,%esi
+ movl 20(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r13,1),%r13d
+ xorl 52(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 8(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,20(%rsp)
+ movl 24(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 32(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r12,1),%r12d
+ xorl 56(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 12(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,24(%rsp)
+ movl 28(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r11,1),%r11d
+ xorl 60(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 16(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,28(%rsp)
+ movl 32(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 40(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rdi,1),%edi
+ xorl 0(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 20(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,32(%rsp)
+ movl 36(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 44(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rsi,1),%esi
+ xorl 4(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 24(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,36(%rsp)
+ movl 40(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 48(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r13,1),%r13d
+ xorl 8(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 28(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,40(%rsp)
+ movl 44(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 52(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r12,1),%r12d
+ xorl 12(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 32(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl %ebp,44(%rsp)
+ movl 48(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 56(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r11,1),%r11d
+ xorl 16(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 36(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl %edx,48(%rsp)
+ movl 52(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rdi,1),%edi
+ xorl 20(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 40(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %ebp,52(%rsp)
+ movl 56(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 0(%rsp),%edx
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rsi,1),%esi
+ xorl 24(%rsp),%edx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 44(%rsp),%edx
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%edx
+ movl %edx,56(%rsp)
+ movl 60(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r13,1),%r13d
+ xorl 28(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 48(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,60(%rsp)
+ movl 0(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r12,1),%r12d
+ xorl 32(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 52(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,0(%rsp)
+ movl 4(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r11,1),%r11d
+ xorl 36(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 56(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,4(%rsp)
+ movl 8(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 16(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rdi,1),%edi
+ xorl 40(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 60(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,8(%rsp)
+ movl 12(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rsi,1),%esi
+ xorl 44(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 0(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,12(%rsp)
+ movl 16(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 24(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r13,1),%r13d
+ xorl 48(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 4(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,16(%rsp)
+ movl 20(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r12,1),%r12d
+ xorl 52(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 8(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl %ebp,20(%rsp)
+ movl 24(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 32(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r11,1),%r11d
+ xorl 56(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 12(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl %edx,24(%rsp)
+ movl 28(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rdi,1),%edi
+ xorl 60(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 16(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %ebp,28(%rsp)
+ movl 32(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 40(%rsp),%edx
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rsi,1),%esi
+ xorl 0(%rsp),%edx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 20(%rsp),%edx
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%edx
+ movl %edx,32(%rsp)
+ movl 36(%rsp),%ebp
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 44(%rsp),%ebp
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r12d,%ebx
+ leal -1894007588(%rdx,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 24(%rsp),%ebp
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %ebp,36(%rsp)
+ addl %ecx,%r13d
+ movl 40(%rsp),%edx
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 48(%rsp),%edx
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %r11d,%ebx
+ leal -1894007588(%rbp,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 28(%rsp),%edx
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%edx
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %edx,40(%rsp)
+ addl %ecx,%r12d
+ movl 44(%rsp),%ebp
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 52(%rsp),%ebp
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %edi,%ebx
+ leal -1894007588(%rdx,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 32(%rsp),%ebp
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%ebp
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %ebp,44(%rsp)
+ addl %ecx,%r11d
+ movl 48(%rsp),%edx
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 56(%rsp),%edx
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 16(%rsp),%edx
+ xorl %esi,%ebx
+ leal -1894007588(%rbp,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 36(%rsp),%edx
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%edx
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %edx,48(%rsp)
+ addl %ecx,%edi
+ movl 52(%rsp),%ebp
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 60(%rsp),%ebp
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %r13d,%ebx
+ leal -1894007588(%rdx,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 40(%rsp),%ebp
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%ebp
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %ebp,52(%rsp)
+ addl %ecx,%esi
+ movl 56(%rsp),%edx
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 0(%rsp),%edx
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 24(%rsp),%edx
+ xorl %r12d,%ebx
+ leal -1894007588(%rbp,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 44(%rsp),%edx
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%edx
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %edx,56(%rsp)
+ addl %ecx,%r13d
+ movl 60(%rsp),%ebp
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 4(%rsp),%ebp
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %r11d,%ebx
+ leal -1894007588(%rdx,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 48(%rsp),%ebp
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %ebp,60(%rsp)
+ addl %ecx,%r12d
+ movl 0(%rsp),%edx
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 8(%rsp),%edx
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 32(%rsp),%edx
+ xorl %edi,%ebx
+ leal -1894007588(%rbp,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 52(%rsp),%edx
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%edx
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %edx,0(%rsp)
+ addl %ecx,%r11d
+ movl 4(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 12(%rsp),%ebp
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %esi,%ebx
+ leal -1894007588(%rdx,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 56(%rsp),%ebp
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%ebp
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %ebp,4(%rsp)
+ addl %ecx,%edi
+ movl 8(%rsp),%edx
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 16(%rsp),%edx
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 40(%rsp),%edx
+ xorl %r13d,%ebx
+ leal -1894007588(%rbp,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 60(%rsp),%edx
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%edx
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %edx,8(%rsp)
+ addl %ecx,%esi
+ movl 12(%rsp),%ebp
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 20(%rsp),%ebp
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 44(%rsp),%ebp
+ xorl %r12d,%ebx
+ leal -1894007588(%rdx,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 0(%rsp),%ebp
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %ebp,12(%rsp)
+ addl %ecx,%r13d
+ movl 16(%rsp),%edx
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 24(%rsp),%edx
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 48(%rsp),%edx
+ xorl %r11d,%ebx
+ leal -1894007588(%rbp,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 4(%rsp),%edx
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%edx
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %edx,16(%rsp)
+ addl %ecx,%r12d
+ movl 20(%rsp),%ebp
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 28(%rsp),%ebp
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 52(%rsp),%ebp
+ xorl %edi,%ebx
+ leal -1894007588(%rdx,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 8(%rsp),%ebp
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%ebp
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %ebp,20(%rsp)
+ addl %ecx,%r11d
+ movl 24(%rsp),%edx
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 32(%rsp),%edx
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 56(%rsp),%edx
+ xorl %esi,%ebx
+ leal -1894007588(%rbp,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 12(%rsp),%edx
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%edx
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %edx,24(%rsp)
+ addl %ecx,%edi
+ movl 28(%rsp),%ebp
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 36(%rsp),%ebp
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %r13d,%ebx
+ leal -1894007588(%rdx,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 16(%rsp),%ebp
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%ebp
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %ebp,28(%rsp)
+ addl %ecx,%esi
+ movl 32(%rsp),%edx
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 40(%rsp),%edx
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 0(%rsp),%edx
+ xorl %r12d,%ebx
+ leal -1894007588(%rbp,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 20(%rsp),%edx
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%edx
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %edx,32(%rsp)
+ addl %ecx,%r13d
+ movl 36(%rsp),%ebp
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 44(%rsp),%ebp
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r11d,%ebx
+ leal -1894007588(%rdx,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 24(%rsp),%ebp
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %ebp,36(%rsp)
+ addl %ecx,%r12d
+ movl 40(%rsp),%edx
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 48(%rsp),%edx
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %edi,%ebx
+ leal -1894007588(%rbp,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 28(%rsp),%edx
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%edx
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %edx,40(%rsp)
+ addl %ecx,%r11d
+ movl 44(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 52(%rsp),%ebp
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %esi,%ebx
+ leal -1894007588(%rdx,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 32(%rsp),%ebp
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%ebp
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %ebp,44(%rsp)
+ addl %ecx,%edi
+ movl 48(%rsp),%edx
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 56(%rsp),%edx
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 16(%rsp),%edx
+ xorl %r13d,%ebx
+ leal -1894007588(%rbp,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 36(%rsp),%edx
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%edx
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %edx,48(%rsp)
+ addl %ecx,%esi
+ movl 52(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r13,1),%r13d
+ xorl 20(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 40(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,52(%rsp)
+ movl 56(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 0(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r12,1),%r12d
+ xorl 24(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 44(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,56(%rsp)
+ movl 60(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r11,1),%r11d
+ xorl 28(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 48(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,60(%rsp)
+ movl 0(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%rdi,1),%edi
+ xorl 32(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 52(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,0(%rsp)
+ movl 4(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rsi,1),%esi
+ xorl 36(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 56(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,4(%rsp)
+ movl 8(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 16(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r13,1),%r13d
+ xorl 40(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 60(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,8(%rsp)
+ movl 12(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r12,1),%r12d
+ xorl 44(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 0(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl %ebp,12(%rsp)
+ movl 16(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 24(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r11,1),%r11d
+ xorl 48(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 4(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl %edx,16(%rsp)
+ movl 20(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rdi,1),%edi
+ xorl 52(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 8(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %ebp,20(%rsp)
+ movl 24(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 32(%rsp),%edx
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%rsi,1),%esi
+ xorl 56(%rsp),%edx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 12(%rsp),%edx
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%edx
+ movl %edx,24(%rsp)
+ movl 28(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r13,1),%r13d
+ xorl 60(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 16(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,28(%rsp)
+ movl 32(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 40(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r12,1),%r12d
+ xorl 0(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 20(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,32(%rsp)
+ movl 36(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 44(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r11,1),%r11d
+ xorl 4(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 24(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,36(%rsp)
+ movl 40(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 48(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%rdi,1),%edi
+ xorl 8(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 28(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,40(%rsp)
+ movl 44(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 52(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rsi,1),%esi
+ xorl 12(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 32(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,44(%rsp)
+ movl 48(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 56(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r13,1),%r13d
+ xorl 16(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 36(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,48(%rsp)
+ movl 52(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r12,1),%r12d
+ xorl 20(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 40(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl 56(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 0(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r11,1),%r11d
+ xorl 24(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 44(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl 60(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rdi,1),%edi
+ xorl 28(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 48(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl %r11d,%eax
+ leal -899497514(%rbp,%rsi,1),%esi
+ roll $5,%ecx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ roll $30,%r11d
+ addl %eax,%esi
+ addl 0(%r8),%esi
+ addl 4(%r8),%edi
+ addl 8(%r8),%r11d
+ addl 12(%r8),%r12d
+ addl 16(%r8),%r13d
+ movl %esi,0(%r8)
+ movl %edi,4(%r8)
+ movl %r11d,8(%r8)
+ movl %r12d,12(%r8)
+ movl %r13d,16(%r8)
+
+ subq $1,%r10
+ leaq 64(%r9),%r9
+ jnz .Lloop
+
+ movq 64(%rsp),%rsi
+ movq (%rsi),%r13
+ movq 8(%rsi),%r12
+ movq 16(%rsi),%rbp
+ movq 24(%rsi),%rbx
+ leaq 32(%rsi),%rsp
+.Lepilogue:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_sha1_block_data_order:
+.def sha1_block_data_order_ssse3; .scl 3; .type 32; .endef
+.p2align 4
+sha1_block_data_order_ssse3:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_sha1_block_data_order_ssse3:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+
+_ssse3_shortcut:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ leaq -144(%rsp),%rsp
+ movaps %xmm6,64+0(%rsp)
+ movaps %xmm7,64+16(%rsp)
+ movaps %xmm8,64+32(%rsp)
+ movaps %xmm9,64+48(%rsp)
+ movaps %xmm10,64+64(%rsp)
+.Lprologue_ssse3:
+ movq %rdi,%r8
+ movq %rsi,%r9
+ movq %rdx,%r10
+
+ shlq $6,%r10
+ addq %r9,%r10
+ leaq K_XX_XX(%rip),%r11
+
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movl %ebx,%esi
+ movl 16(%r8),%ebp
+
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r9),%xmm0
+ movdqu 16(%r9),%xmm1
+ movdqu 32(%r9),%xmm2
+ movdqu 48(%r9),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r9
+.byte 102,15,56,0,206
+.byte 102,15,56,0,214
+.byte 102,15,56,0,222
+ paddd %xmm9,%xmm0
+ paddd %xmm9,%xmm1
+ paddd %xmm9,%xmm2
+ movdqa %xmm0,0(%rsp)
+ psubd %xmm9,%xmm0
+ movdqa %xmm1,16(%rsp)
+ psubd %xmm9,%xmm1
+ movdqa %xmm2,32(%rsp)
+ psubd %xmm9,%xmm2
+ jmp .Loop_ssse3
+.p2align 4
+.Loop_ssse3:
+ movdqa %xmm1,%xmm4
+ addl 0(%rsp),%ebp
+ xorl %edx,%ecx
+ movdqa %xmm3,%xmm8
+.byte 102,15,58,15,224,8
+ movl %eax,%edi
+ roll $5,%eax
+ paddd %xmm3,%xmm9
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrldq $4,%xmm8
+ xorl %edx,%esi
+ addl %eax,%ebp
+ pxor %xmm0,%xmm4
+ rorl $2,%ebx
+ addl %esi,%ebp
+ pxor %xmm2,%xmm8
+ addl 4(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pxor %xmm8,%xmm4
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm9,48(%rsp)
+ xorl %ecx,%edi
+ addl %ebp,%edx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm4,%xmm8
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 8(%rsp),%ecx
+ xorl %ebx,%eax
+ pslldq $12,%xmm10
+ paddd %xmm4,%xmm4
+ movl %edx,%edi
+ roll $5,%edx
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrld $31,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ movdqa %xmm10,%xmm9
+ rorl $7,%ebp
+ addl %esi,%ecx
+ psrld $30,%xmm10
+ por %xmm8,%xmm4
+ addl 12(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm4
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa 0(%r11),%xmm10
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ pxor %xmm9,%xmm4
+ rorl $7,%edx
+ addl %edi,%ebx
+ movdqa %xmm2,%xmm5
+ addl 16(%rsp),%eax
+ xorl %ebp,%edx
+ movdqa %xmm4,%xmm9
+.byte 102,15,58,15,233,8
+ movl %ebx,%edi
+ roll $5,%ebx
+ paddd %xmm4,%xmm10
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrldq $4,%xmm9
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ pxor %xmm1,%xmm5
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm3,%xmm9
+ addl 20(%rsp),%ebp
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pxor %xmm9,%xmm5
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa %xmm10,0(%rsp)
+ xorl %edx,%edi
+ addl %eax,%ebp
+ movdqa %xmm5,%xmm8
+ movdqa %xmm5,%xmm9
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 24(%rsp),%edx
+ xorl %ecx,%ebx
+ pslldq $12,%xmm8
+ paddd %xmm5,%xmm5
+ movl %ebp,%edi
+ roll $5,%ebp
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ psrld $31,%xmm9
+ xorl %ecx,%esi
+ addl %ebp,%edx
+ movdqa %xmm8,%xmm10
+ rorl $7,%eax
+ addl %esi,%edx
+ psrld $30,%xmm8
+ por %xmm9,%xmm5
+ addl 28(%rsp),%ecx
+ xorl %ebx,%eax
+ movl %edx,%esi
+ roll $5,%edx
+ pslld $2,%xmm10
+ pxor %xmm8,%xmm5
+ andl %eax,%edi
+ xorl %ebx,%eax
+ movdqa 16(%r11),%xmm8
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ pxor %xmm10,%xmm5
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movdqa %xmm3,%xmm6
+ addl 32(%rsp),%ebx
+ xorl %eax,%ebp
+ movdqa %xmm5,%xmm10
+.byte 102,15,58,15,242,8
+ movl %ecx,%edi
+ roll $5,%ecx
+ paddd %xmm5,%xmm8
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ psrldq $4,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ pxor %xmm2,%xmm6
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm4,%xmm10
+ addl 36(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ pxor %xmm10,%xmm6
+ andl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm8,16(%rsp)
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ movdqa %xmm6,%xmm9
+ movdqa %xmm6,%xmm10
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 40(%rsp),%ebp
+ xorl %edx,%ecx
+ pslldq $12,%xmm9
+ paddd %xmm6,%xmm6
+ movl %eax,%edi
+ roll $5,%eax
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrld $31,%xmm10
+ xorl %edx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ psrld $30,%xmm9
+ por %xmm10,%xmm6
+ addl 44(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pslld $2,%xmm8
+ pxor %xmm9,%xmm6
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa 16(%r11),%xmm9
+ xorl %ecx,%edi
+ addl %ebp,%edx
+ pxor %xmm8,%xmm6
+ rorl $7,%eax
+ addl %edi,%edx
+ movdqa %xmm4,%xmm7
+ addl 48(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm8
+.byte 102,15,58,15,251,8
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm6,%xmm9
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrldq $4,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ pxor %xmm3,%xmm7
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm5,%xmm8
+ addl 52(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ pxor %xmm8,%xmm7
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm9,32(%rsp)
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ movdqa %xmm7,%xmm10
+ movdqa %xmm7,%xmm8
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 56(%rsp),%eax
+ xorl %ebp,%edx
+ pslldq $12,%xmm10
+ paddd %xmm7,%xmm7
+ movl %ebx,%edi
+ roll $5,%ebx
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrld $31,%xmm8
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ movdqa %xmm10,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ psrld $30,%xmm10
+ por %xmm8,%xmm7
+ addl 60(%rsp),%ebp
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm7
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa 16(%r11),%xmm10
+ xorl %edx,%edi
+ addl %eax,%ebp
+ pxor %xmm9,%xmm7
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movdqa %xmm7,%xmm9
+ addl 0(%rsp),%edx
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,206,8
+ xorl %ecx,%ebx
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm1,%xmm0
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm7,%xmm10
+ xorl %ecx,%esi
+ addl %ebp,%edx
+ pxor %xmm9,%xmm0
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 4(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm0,%xmm9
+ movdqa %xmm10,48(%rsp)
+ movl %edx,%esi
+ roll $5,%edx
+ andl %eax,%edi
+ xorl %ebx,%eax
+ pslld $2,%xmm0
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ psrld $30,%xmm9
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 8(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%edi
+ roll $5,%ecx
+ por %xmm9,%xmm0
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ movdqa %xmm0,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 12(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ andl %edx,%edi
+ xorl %ebp,%edx
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 16(%rsp),%ebp
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,215,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm2,%xmm1
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm8,%xmm9
+ paddd %xmm0,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm10,%xmm1
+ addl 20(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm1,%xmm10
+ movdqa %xmm8,0(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm1
+ addl 24(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm10
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm10,%xmm1
+ addl 28(%rsp),%ebx
+ xorl %eax,%edi
+ movdqa %xmm1,%xmm8
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 32(%rsp),%eax
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,192,8
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ pxor %xmm3,%xmm2
+ xorl %edx,%esi
+ addl %ebx,%eax
+ movdqa 32(%r11),%xmm10
+ paddd %xmm1,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm8,%xmm2
+ addl 36(%rsp),%ebp
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ movdqa %xmm2,%xmm8
+ movdqa %xmm9,16(%rsp)
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ pslld $2,%xmm2
+ addl 40(%rsp),%edx
+ xorl %ecx,%esi
+ psrld $30,%xmm8
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ por %xmm8,%xmm2
+ addl 44(%rsp),%ecx
+ xorl %ebx,%edi
+ movdqa %xmm2,%xmm9
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 48(%rsp),%ebx
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,201,8
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ pxor %xmm4,%xmm3
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm2,%xmm10
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm9,%xmm3
+ addl 52(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ movdqa %xmm3,%xmm9
+ movdqa %xmm10,32(%rsp)
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ pslld $2,%xmm3
+ addl 56(%rsp),%ebp
+ xorl %edx,%esi
+ psrld $30,%xmm9
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ por %xmm9,%xmm3
+ addl 60(%rsp),%edx
+ xorl %ecx,%edi
+ movdqa %xmm3,%xmm10
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 0(%rsp),%ecx
+ pxor %xmm0,%xmm4
+.byte 102,68,15,58,15,210,8
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ pxor %xmm5,%xmm4
+ xorl %eax,%esi
+ addl %edx,%ecx
+ movdqa %xmm8,%xmm9
+ paddd %xmm3,%xmm8
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm10,%xmm4
+ addl 4(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm8,48(%rsp)
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ pslld $2,%xmm4
+ addl 8(%rsp),%eax
+ xorl %ebp,%esi
+ psrld $30,%xmm10
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ por %xmm10,%xmm4
+ addl 12(%rsp),%ebp
+ xorl %edx,%edi
+ movdqa %xmm4,%xmm8
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 16(%rsp),%edx
+ pxor %xmm1,%xmm5
+.byte 102,68,15,58,15,195,8
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm6,%xmm5
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm4,%xmm9
+ rorl $7,%eax
+ addl %esi,%edx
+ pxor %xmm8,%xmm5
+ addl 20(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ movdqa %xmm5,%xmm8
+ movdqa %xmm9,0(%rsp)
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ pslld $2,%xmm5
+ addl 24(%rsp),%ebx
+ xorl %eax,%esi
+ psrld $30,%xmm8
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ por %xmm8,%xmm5
+ addl 28(%rsp),%eax
+ xorl %ebp,%edi
+ movdqa %xmm5,%xmm9
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ecx,%edi
+ pxor %xmm2,%xmm6
+.byte 102,68,15,58,15,204,8
+ xorl %edx,%ecx
+ addl 32(%rsp),%ebp
+ andl %edx,%edi
+ pxor %xmm7,%xmm6
+ andl %ecx,%esi
+ rorl $7,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm5,%xmm10
+ addl %edi,%ebp
+ movl %eax,%edi
+ pxor %xmm9,%xmm6
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movdqa %xmm6,%xmm9
+ movdqa %xmm10,16(%rsp)
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 36(%rsp),%edx
+ andl %ecx,%esi
+ pslld $2,%xmm6
+ andl %ebx,%edi
+ rorl $7,%eax
+ psrld $30,%xmm9
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ por %xmm9,%xmm6
+ movl %eax,%edi
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm10
+ addl 40(%rsp),%ecx
+ andl %ebx,%edi
+ andl %eax,%esi
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 44(%rsp),%ebx
+ andl %eax,%esi
+ andl %ebp,%edi
+ rorl $7,%edx
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%edi
+ pxor %xmm3,%xmm7
+.byte 102,68,15,58,15,213,8
+ xorl %ebp,%edx
+ addl 48(%rsp),%eax
+ andl %ebp,%edi
+ pxor %xmm0,%xmm7
+ andl %edx,%esi
+ rorl $7,%ecx
+ movdqa 48(%r11),%xmm9
+ paddd %xmm6,%xmm8
+ addl %edi,%eax
+ movl %ebx,%edi
+ pxor %xmm10,%xmm7
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movdqa %xmm7,%xmm10
+ movdqa %xmm8,32(%rsp)
+ movl %ecx,%esi
+ xorl %edx,%ecx
+ addl 52(%rsp),%ebp
+ andl %edx,%esi
+ pslld $2,%xmm7
+ andl %ecx,%edi
+ rorl $7,%ebx
+ psrld $30,%xmm10
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ por %xmm10,%xmm7
+ movl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm7,%xmm8
+ addl 56(%rsp),%edx
+ andl %ecx,%edi
+ andl %ebx,%esi
+ rorl $7,%eax
+ addl %edi,%edx
+ movl %ebp,%edi
+ roll $5,%ebp
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 60(%rsp),%ecx
+ andl %ebx,%esi
+ andl %eax,%edi
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%edi
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,198,8
+ xorl %eax,%ebp
+ addl 0(%rsp),%ebx
+ andl %eax,%edi
+ pxor %xmm1,%xmm0
+ andl %ebp,%esi
+ rorl $7,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm7,%xmm9
+ addl %edi,%ebx
+ movl %ecx,%edi
+ pxor %xmm8,%xmm0
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movdqa %xmm0,%xmm8
+ movdqa %xmm9,48(%rsp)
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 4(%rsp),%eax
+ andl %ebp,%esi
+ pslld $2,%xmm0
+ andl %edx,%edi
+ rorl $7,%ecx
+ psrld $30,%xmm8
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ por %xmm8,%xmm0
+ movl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa %xmm0,%xmm9
+ addl 8(%rsp),%ebp
+ andl %edx,%edi
+ andl %ecx,%esi
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movl %eax,%edi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 12(%rsp),%edx
+ andl %ecx,%esi
+ andl %ebx,%edi
+ rorl $7,%eax
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%edi
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,207,8
+ xorl %ebx,%eax
+ addl 16(%rsp),%ecx
+ andl %ebx,%edi
+ pxor %xmm2,%xmm1
+ andl %eax,%esi
+ rorl $7,%ebp
+ movdqa %xmm10,%xmm8
+ paddd %xmm0,%xmm10
+ addl %edi,%ecx
+ movl %edx,%edi
+ pxor %xmm9,%xmm1
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movdqa %xmm1,%xmm9
+ movdqa %xmm10,0(%rsp)
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 20(%rsp),%ebx
+ andl %eax,%esi
+ pslld $2,%xmm1
+ andl %ebp,%edi
+ rorl $7,%edx
+ psrld $30,%xmm9
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ por %xmm9,%xmm1
+ movl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm1,%xmm10
+ addl 24(%rsp),%eax
+ andl %ebp,%edi
+ andl %edx,%esi
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movl %ecx,%esi
+ xorl %edx,%ecx
+ addl 28(%rsp),%ebp
+ andl %edx,%esi
+ andl %ecx,%edi
+ rorl $7,%ebx
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%edi
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,208,8
+ xorl %ecx,%ebx
+ addl 32(%rsp),%edx
+ andl %ecx,%edi
+ pxor %xmm3,%xmm2
+ andl %ebx,%esi
+ rorl $7,%eax
+ movdqa %xmm8,%xmm9
+ paddd %xmm1,%xmm8
+ addl %edi,%edx
+ movl %ebp,%edi
+ pxor %xmm10,%xmm2
+ roll $5,%ebp
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movdqa %xmm2,%xmm10
+ movdqa %xmm8,16(%rsp)
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 36(%rsp),%ecx
+ andl %ebx,%esi
+ pslld $2,%xmm2
+ andl %eax,%edi
+ rorl $7,%ebp
+ psrld $30,%xmm10
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ por %xmm10,%xmm2
+ movl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm2,%xmm8
+ addl 40(%rsp),%ebx
+ andl %eax,%edi
+ andl %ebp,%esi
+ rorl $7,%edx
+ addl %edi,%ebx
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 44(%rsp),%eax
+ andl %ebp,%esi
+ andl %edx,%edi
+ rorl $7,%ecx
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ addl 48(%rsp),%ebp
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,193,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm4,%xmm3
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm10
+ paddd %xmm2,%xmm9
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm8,%xmm3
+ addl 52(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm3,%xmm8
+ movdqa %xmm9,32(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm3
+ addl 56(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm8
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm8,%xmm3
+ addl 60(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 0(%rsp),%eax
+ paddd %xmm3,%xmm10
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ movdqa %xmm10,48(%rsp)
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 4(%rsp),%ebp
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 8(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 12(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ cmpq %r10,%r9
+ je .Ldone_ssse3
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r9),%xmm0
+ movdqu 16(%r9),%xmm1
+ movdqu 32(%r9),%xmm2
+ movdqu 48(%r9),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r9
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+.byte 102,15,56,0,206
+ movl %ecx,%edi
+ roll $5,%ecx
+ paddd %xmm9,%xmm0
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ movdqa %xmm0,0(%rsp)
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ psubd %xmm9,%xmm0
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+.byte 102,15,56,0,214
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm9,%xmm1
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movdqa %xmm1,16(%rsp)
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ psubd %xmm9,%xmm1
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+.byte 102,15,56,0,222
+ movl %ebp,%edi
+ roll $5,%ebp
+ paddd %xmm9,%xmm2
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ movdqa %xmm2,32(%rsp)
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ psubd %xmm9,%xmm2
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 0(%r8),%eax
+ addl 4(%r8),%esi
+ addl 8(%r8),%ecx
+ addl 12(%r8),%edx
+ movl %eax,0(%r8)
+ addl 16(%r8),%ebp
+ movl %esi,4(%r8)
+ movl %esi,%ebx
+ movl %ecx,8(%r8)
+ movl %edx,12(%r8)
+ movl %ebp,16(%r8)
+ jmp .Loop_ssse3
+
+.p2align 4
+.Ldone_ssse3:
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 0(%r8),%eax
+ addl 4(%r8),%esi
+ addl 8(%r8),%ecx
+ movl %eax,0(%r8)
+ addl 12(%r8),%edx
+ movl %esi,4(%r8)
+ addl 16(%r8),%ebp
+ movl %ecx,8(%r8)
+ movl %edx,12(%r8)
+ movl %ebp,16(%r8)
+ movaps 64+0(%rsp),%xmm6
+ movaps 64+16(%rsp),%xmm7
+ movaps 64+32(%rsp),%xmm8
+ movaps 64+48(%rsp),%xmm9
+ movaps 64+64(%rsp),%xmm10
+ leaq 144(%rsp),%rsi
+ movq 0(%rsi),%r12
+ movq 8(%rsi),%rbp
+ movq 16(%rsi),%rbx
+ leaq 24(%rsi),%rsp
+.Lepilogue_ssse3:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_sha1_block_data_order_ssse3:
+.p2align 6
+K_XX_XX:
+.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align 6
+
+.def se_handler; .scl 3; .type 32; .endef
+.p2align 4
+se_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+
+ leaq .Lprologue(%rip),%r10
+ cmpq %r10,%rbx
+ jb .Lcommon_seh_tail
+
+ movq 152(%r8),%rax
+
+ leaq .Lepilogue(%rip),%r10
+ cmpq %r10,%rbx
+ jae .Lcommon_seh_tail
+
+ movq 64(%rax),%rax
+ leaq 32(%rax),%rax
+
+ movq -8(%rax),%rbx
+ movq -16(%rax),%rbp
+ movq -24(%rax),%r12
+ movq -32(%rax),%r13
+ movq %rbx,144(%r8)
+ movq %rbp,160(%r8)
+ movq %r12,216(%r8)
+ movq %r13,224(%r8)
+
+ jmp .Lcommon_seh_tail
+
+
+.def ssse3_handler; .scl 3; .type 32; .endef
+.p2align 4
+ssse3_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+
+ movq 8(%r9),%rsi
+ movq 56(%r9),%r11
+
+ movl 0(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jb .Lcommon_seh_tail
+
+ movq 152(%r8),%rax
+
+ movl 4(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jae .Lcommon_seh_tail
+
+ leaq 64(%rax),%rsi
+ leaq 512(%r8),%rdi
+ movl $10,%ecx
+.long 0xa548f3fc
+ leaq 168(%rax),%rax
+
+ movq -8(%rax),%rbx
+ movq -16(%rax),%rbp
+ movq -24(%rax),%r12
+ movq %rbx,144(%r8)
+ movq %rbp,160(%r8)
+ movq %r12,216(%r8)
+
+.Lcommon_seh_tail:
+ movq 8(%rax),%rdi
+ movq 16(%rax),%rsi
+ movq %rax,152(%r8)
+ movq %rsi,168(%r8)
+ movq %rdi,176(%r8)
+
+ movq 40(%r9),%rdi
+ movq %r8,%rsi
+ movl $154,%ecx
+.long 0xa548f3fc
+
+ movq %r9,%rsi
+ xorq %rcx,%rcx
+ movq 8(%rsi),%rdx
+ movq 0(%rsi),%r8
+ movq 16(%rsi),%r9
+ movq 40(%rsi),%r10
+ leaq 56(%rsi),%r11
+ leaq 24(%rsi),%r12
+ movq %r10,32(%rsp)
+ movq %r11,40(%rsp)
+ movq %r12,48(%rsp)
+ movq %rcx,56(%rsp)
+ call *__imp_RtlVirtualUnwind(%rip)
+
+ movl $1,%eax
+ addq $64,%rsp
+ popfq
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ popq %rdi
+ popq %rsi
+ retq
+
+
+.section .pdata
+.p2align 2
+.rva .LSEH_begin_sha1_block_data_order
+.rva .LSEH_end_sha1_block_data_order
+.rva .LSEH_info_sha1_block_data_order
+.rva .LSEH_begin_sha1_block_data_order_ssse3
+.rva .LSEH_end_sha1_block_data_order_ssse3
+.rva .LSEH_info_sha1_block_data_order_ssse3
+.section .xdata
+.p2align 3
+.LSEH_info_sha1_block_data_order:
+.byte 9,0,0,0
+.rva se_handler
+.LSEH_info_sha1_block_data_order_ssse3:
+.byte 9,0,0,0
+.rva ssse3_handler
+.rva .Lprologue_ssse3,.Lepilogue_ssse3
diff --git a/ext/libressl/crypto/sha/sha1_one.c b/ext/libressl/crypto/sha/sha1_one.c
new file mode 100644
index 0000000..57e5220
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha1_one.c
@@ -0,0 +1,81 @@
+/* $OpenBSD: sha1_one.c,v 1.12 2015/09/10 15:56:26 jsing Exp $ */
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young (eay@cryptsoft.com)"
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <openssl/opensslconf.h>
+
+#include <openssl/crypto.h>
+#include <openssl/sha.h>
+
+#ifndef OPENSSL_NO_SHA1
+unsigned char *SHA1(const unsigned char *d, size_t n, unsigned char *md)
+ {
+ SHA_CTX c;
+ static unsigned char m[SHA_DIGEST_LENGTH];
+
+ if (md == NULL) md=m;
+ if (!SHA1_Init(&c))
+ return NULL;
+ SHA1_Update(&c,d,n);
+ SHA1_Final(md,&c);
+ explicit_bzero(&c,sizeof(c));
+ return(md);
+ }
+#endif
diff --git a/ext/libressl/crypto/sha/sha1dgst.c b/ext/libressl/crypto/sha/sha1dgst.c
new file mode 100644
index 0000000..0c3df49
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha1dgst.c
@@ -0,0 +1,72 @@
+/* $OpenBSD: sha1dgst.c,v 1.14 2015/09/13 21:09:56 doug Exp $ */
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young (eay@cryptsoft.com)"
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+#include <openssl/opensslconf.h>
+
+#include <openssl/crypto.h>
+
+#if !defined(OPENSSL_NO_SHA1) && !defined(OPENSSL_NO_SHA)
+
+#include <openssl/opensslv.h>
+
+/* The implementation is in ../md32_common.h */
+
+#include "sha_locl.h"
+
+#endif
+
diff --git a/ext/libressl/crypto/sha/sha256-elf-armv4.S b/ext/libressl/crypto/sha/sha256-elf-armv4.S
new file mode 100644
index 0000000..9b155c7
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha256-elf-armv4.S
@@ -0,0 +1,1520 @@
+#include "arm_arch.h"
+
+.text
+.code 32
+
+.type K256,%object
+.align 5
+K256:
+.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.size K256,.-K256
+
+.global sha256_block_data_order
+.type sha256_block_data_order,%function
+sha256_block_data_order:
+ sub r3,pc,#8 @ sha256_block_data_order
+ add r2,r1,r2,lsl#6 @ len to point at the end of inp
+ stmdb sp!,{r0,r1,r2,r4-r11,lr}
+ ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11}
+ sub r14,r3,#256 @ K256
+ sub sp,sp,#16*4 @ alloca(X[16])
+.Loop:
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+ ldr r3,[r1],#4
+#else
+ ldrb r3,[r1,#3] @ 0
+ ldrb r12,[r1,#2]
+ ldrb r2,[r1,#1]
+ ldrb r0,[r1],#4
+ orr r3,r3,r12,lsl#8
+ orr r3,r3,r2,lsl#16
+ orr r3,r3,r0,lsl#24
+#endif
+ mov r0,r8,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r8,ror#11
+ eor r2,r9,r10
+#if 0>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 0==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r8,ror#25 @ Sigma1(e)
+ and r2,r2,r8
+ str r3,[sp,#0*4]
+ add r3,r3,r0
+ eor r2,r2,r10 @ Ch(e,f,g)
+ add r3,r3,r11
+ mov r11,r4,ror#2
+ add r3,r3,r2
+ eor r11,r11,r4,ror#13
+ add r3,r3,r12
+ eor r11,r11,r4,ror#22 @ Sigma0(a)
+#if 0>=15
+ ldr r1,[sp,#2*4] @ from BODY_16_xx
+#endif
+ orr r0,r4,r5
+ and r2,r4,r5
+ and r0,r0,r6
+ add r11,r11,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r7,r7,r3
+ add r11,r11,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+ ldr r3,[r1],#4
+#else
+ ldrb r3,[r1,#3] @ 1
+ ldrb r12,[r1,#2]
+ ldrb r2,[r1,#1]
+ ldrb r0,[r1],#4
+ orr r3,r3,r12,lsl#8
+ orr r3,r3,r2,lsl#16
+ orr r3,r3,r0,lsl#24
+#endif
+ mov r0,r7,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r7,ror#11
+ eor r2,r8,r9
+#if 1>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 1==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r7,ror#25 @ Sigma1(e)
+ and r2,r2,r7
+ str r3,[sp,#1*4]
+ add r3,r3,r0
+ eor r2,r2,r9 @ Ch(e,f,g)
+ add r3,r3,r10
+ mov r10,r11,ror#2
+ add r3,r3,r2
+ eor r10,r10,r11,ror#13
+ add r3,r3,r12
+ eor r10,r10,r11,ror#22 @ Sigma0(a)
+#if 1>=15
+ ldr r1,[sp,#3*4] @ from BODY_16_xx
+#endif
+ orr r0,r11,r4
+ and r2,r11,r4
+ and r0,r0,r5
+ add r10,r10,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r6,r6,r3
+ add r10,r10,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+ ldr r3,[r1],#4
+#else
+ ldrb r3,[r1,#3] @ 2
+ ldrb r12,[r1,#2]
+ ldrb r2,[r1,#1]
+ ldrb r0,[r1],#4
+ orr r3,r3,r12,lsl#8
+ orr r3,r3,r2,lsl#16
+ orr r3,r3,r0,lsl#24
+#endif
+ mov r0,r6,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r6,ror#11
+ eor r2,r7,r8
+#if 2>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 2==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r6,ror#25 @ Sigma1(e)
+ and r2,r2,r6
+ str r3,[sp,#2*4]
+ add r3,r3,r0
+ eor r2,r2,r8 @ Ch(e,f,g)
+ add r3,r3,r9
+ mov r9,r10,ror#2
+ add r3,r3,r2
+ eor r9,r9,r10,ror#13
+ add r3,r3,r12
+ eor r9,r9,r10,ror#22 @ Sigma0(a)
+#if 2>=15
+ ldr r1,[sp,#4*4] @ from BODY_16_xx
+#endif
+ orr r0,r10,r11
+ and r2,r10,r11
+ and r0,r0,r4
+ add r9,r9,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r5,r5,r3
+ add r9,r9,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+ ldr r3,[r1],#4
+#else
+ ldrb r3,[r1,#3] @ 3
+ ldrb r12,[r1,#2]
+ ldrb r2,[r1,#1]
+ ldrb r0,[r1],#4
+ orr r3,r3,r12,lsl#8
+ orr r3,r3,r2,lsl#16
+ orr r3,r3,r0,lsl#24
+#endif
+ mov r0,r5,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r5,ror#11
+ eor r2,r6,r7
+#if 3>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 3==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r5,ror#25 @ Sigma1(e)
+ and r2,r2,r5
+ str r3,[sp,#3*4]
+ add r3,r3,r0
+ eor r2,r2,r7 @ Ch(e,f,g)
+ add r3,r3,r8
+ mov r8,r9,ror#2
+ add r3,r3,r2
+ eor r8,r8,r9,ror#13
+ add r3,r3,r12
+ eor r8,r8,r9,ror#22 @ Sigma0(a)
+#if 3>=15
+ ldr r1,[sp,#5*4] @ from BODY_16_xx
+#endif
+ orr r0,r9,r10
+ and r2,r9,r10
+ and r0,r0,r11
+ add r8,r8,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r4,r4,r3
+ add r8,r8,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+ ldr r3,[r1],#4
+#else
+ ldrb r3,[r1,#3] @ 4
+ ldrb r12,[r1,#2]
+ ldrb r2,[r1,#1]
+ ldrb r0,[r1],#4
+ orr r3,r3,r12,lsl#8
+ orr r3,r3,r2,lsl#16
+ orr r3,r3,r0,lsl#24
+#endif
+ mov r0,r4,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r4,ror#11
+ eor r2,r5,r6
+#if 4>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 4==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r4,ror#25 @ Sigma1(e)
+ and r2,r2,r4
+ str r3,[sp,#4*4]
+ add r3,r3,r0
+ eor r2,r2,r6 @ Ch(e,f,g)
+ add r3,r3,r7
+ mov r7,r8,ror#2
+ add r3,r3,r2
+ eor r7,r7,r8,ror#13
+ add r3,r3,r12
+ eor r7,r7,r8,ror#22 @ Sigma0(a)
+#if 4>=15
+ ldr r1,[sp,#6*4] @ from BODY_16_xx
+#endif
+ orr r0,r8,r9
+ and r2,r8,r9
+ and r0,r0,r10
+ add r7,r7,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r11,r11,r3
+ add r7,r7,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+ ldr r3,[r1],#4
+#else
+ ldrb r3,[r1,#3] @ 5
+ ldrb r12,[r1,#2]
+ ldrb r2,[r1,#1]
+ ldrb r0,[r1],#4
+ orr r3,r3,r12,lsl#8
+ orr r3,r3,r2,lsl#16
+ orr r3,r3,r0,lsl#24
+#endif
+ mov r0,r11,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r11,ror#11
+ eor r2,r4,r5
+#if 5>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 5==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r11,ror#25 @ Sigma1(e)
+ and r2,r2,r11
+ str r3,[sp,#5*4]
+ add r3,r3,r0
+ eor r2,r2,r5 @ Ch(e,f,g)
+ add r3,r3,r6
+ mov r6,r7,ror#2
+ add r3,r3,r2
+ eor r6,r6,r7,ror#13
+ add r3,r3,r12
+ eor r6,r6,r7,ror#22 @ Sigma0(a)
+#if 5>=15
+ ldr r1,[sp,#7*4] @ from BODY_16_xx
+#endif
+ orr r0,r7,r8
+ and r2,r7,r8
+ and r0,r0,r9
+ add r6,r6,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r10,r10,r3
+ add r6,r6,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+ ldr r3,[r1],#4
+#else
+ ldrb r3,[r1,#3] @ 6
+ ldrb r12,[r1,#2]
+ ldrb r2,[r1,#1]
+ ldrb r0,[r1],#4
+ orr r3,r3,r12,lsl#8
+ orr r3,r3,r2,lsl#16
+ orr r3,r3,r0,lsl#24
+#endif
+ mov r0,r10,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r10,ror#11
+ eor r2,r11,r4
+#if 6>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 6==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r10,ror#25 @ Sigma1(e)
+ and r2,r2,r10
+ str r3,[sp,#6*4]
+ add r3,r3,r0
+ eor r2,r2,r4 @ Ch(e,f,g)
+ add r3,r3,r5
+ mov r5,r6,ror#2
+ add r3,r3,r2
+ eor r5,r5,r6,ror#13
+ add r3,r3,r12
+ eor r5,r5,r6,ror#22 @ Sigma0(a)
+#if 6>=15
+ ldr r1,[sp,#8*4] @ from BODY_16_xx
+#endif
+ orr r0,r6,r7
+ and r2,r6,r7
+ and r0,r0,r8
+ add r5,r5,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r9,r9,r3
+ add r5,r5,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+ ldr r3,[r1],#4
+#else
+ ldrb r3,[r1,#3] @ 7
+ ldrb r12,[r1,#2]
+ ldrb r2,[r1,#1]
+ ldrb r0,[r1],#4
+ orr r3,r3,r12,lsl#8
+ orr r3,r3,r2,lsl#16
+ orr r3,r3,r0,lsl#24
+#endif
+ mov r0,r9,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r9,ror#11
+ eor r2,r10,r11
+#if 7>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 7==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r9,ror#25 @ Sigma1(e)
+ and r2,r2,r9
+ str r3,[sp,#7*4]
+ add r3,r3,r0
+ eor r2,r2,r11 @ Ch(e,f,g)
+ add r3,r3,r4
+ mov r4,r5,ror#2
+ add r3,r3,r2
+ eor r4,r4,r5,ror#13
+ add r3,r3,r12
+ eor r4,r4,r5,ror#22 @ Sigma0(a)
+#if 7>=15
+ ldr r1,[sp,#9*4] @ from BODY_16_xx
+#endif
+ orr r0,r5,r6
+ and r2,r5,r6
+ and r0,r0,r7
+ add r4,r4,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r8,r8,r3
+ add r4,r4,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+ ldr r3,[r1],#4
+#else
+ ldrb r3,[r1,#3] @ 8
+ ldrb r12,[r1,#2]
+ ldrb r2,[r1,#1]
+ ldrb r0,[r1],#4
+ orr r3,r3,r12,lsl#8
+ orr r3,r3,r2,lsl#16
+ orr r3,r3,r0,lsl#24
+#endif
+ mov r0,r8,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r8,ror#11
+ eor r2,r9,r10
+#if 8>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 8==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r8,ror#25 @ Sigma1(e)
+ and r2,r2,r8
+ str r3,[sp,#8*4]
+ add r3,r3,r0
+ eor r2,r2,r10 @ Ch(e,f,g)
+ add r3,r3,r11
+ mov r11,r4,ror#2
+ add r3,r3,r2
+ eor r11,r11,r4,ror#13
+ add r3,r3,r12
+ eor r11,r11,r4,ror#22 @ Sigma0(a)
+#if 8>=15
+ ldr r1,[sp,#10*4] @ from BODY_16_xx
+#endif
+ orr r0,r4,r5
+ and r2,r4,r5
+ and r0,r0,r6
+ add r11,r11,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r7,r7,r3
+ add r11,r11,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+ ldr r3,[r1],#4
+#else
+ ldrb r3,[r1,#3] @ 9
+ ldrb r12,[r1,#2]
+ ldrb r2,[r1,#1]
+ ldrb r0,[r1],#4
+ orr r3,r3,r12,lsl#8
+ orr r3,r3,r2,lsl#16
+ orr r3,r3,r0,lsl#24
+#endif
+ mov r0,r7,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r7,ror#11
+ eor r2,r8,r9
+#if 9>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 9==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r7,ror#25 @ Sigma1(e)
+ and r2,r2,r7
+ str r3,[sp,#9*4]
+ add r3,r3,r0
+ eor r2,r2,r9 @ Ch(e,f,g)
+ add r3,r3,r10
+ mov r10,r11,ror#2
+ add r3,r3,r2
+ eor r10,r10,r11,ror#13
+ add r3,r3,r12
+ eor r10,r10,r11,ror#22 @ Sigma0(a)
+#if 9>=15
+ ldr r1,[sp,#11*4] @ from BODY_16_xx
+#endif
+ orr r0,r11,r4
+ and r2,r11,r4
+ and r0,r0,r5
+ add r10,r10,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r6,r6,r3
+ add r10,r10,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+ ldr r3,[r1],#4
+#else
+ ldrb r3,[r1,#3] @ 10
+ ldrb r12,[r1,#2]
+ ldrb r2,[r1,#1]
+ ldrb r0,[r1],#4
+ orr r3,r3,r12,lsl#8
+ orr r3,r3,r2,lsl#16
+ orr r3,r3,r0,lsl#24
+#endif
+ mov r0,r6,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r6,ror#11
+ eor r2,r7,r8
+#if 10>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 10==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r6,ror#25 @ Sigma1(e)
+ and r2,r2,r6
+ str r3,[sp,#10*4]
+ add r3,r3,r0
+ eor r2,r2,r8 @ Ch(e,f,g)
+ add r3,r3,r9
+ mov r9,r10,ror#2
+ add r3,r3,r2
+ eor r9,r9,r10,ror#13
+ add r3,r3,r12
+ eor r9,r9,r10,ror#22 @ Sigma0(a)
+#if 10>=15
+ ldr r1,[sp,#12*4] @ from BODY_16_xx
+#endif
+ orr r0,r10,r11
+ and r2,r10,r11
+ and r0,r0,r4
+ add r9,r9,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r5,r5,r3
+ add r9,r9,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+ ldr r3,[r1],#4
+#else
+ ldrb r3,[r1,#3] @ 11
+ ldrb r12,[r1,#2]
+ ldrb r2,[r1,#1]
+ ldrb r0,[r1],#4
+ orr r3,r3,r12,lsl#8
+ orr r3,r3,r2,lsl#16
+ orr r3,r3,r0,lsl#24
+#endif
+ mov r0,r5,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r5,ror#11
+ eor r2,r6,r7
+#if 11>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 11==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r5,ror#25 @ Sigma1(e)
+ and r2,r2,r5
+ str r3,[sp,#11*4]
+ add r3,r3,r0
+ eor r2,r2,r7 @ Ch(e,f,g)
+ add r3,r3,r8
+ mov r8,r9,ror#2
+ add r3,r3,r2
+ eor r8,r8,r9,ror#13
+ add r3,r3,r12
+ eor r8,r8,r9,ror#22 @ Sigma0(a)
+#if 11>=15
+ ldr r1,[sp,#13*4] @ from BODY_16_xx
+#endif
+ orr r0,r9,r10
+ and r2,r9,r10
+ and r0,r0,r11
+ add r8,r8,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r4,r4,r3
+ add r8,r8,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+ ldr r3,[r1],#4
+#else
+ ldrb r3,[r1,#3] @ 12
+ ldrb r12,[r1,#2]
+ ldrb r2,[r1,#1]
+ ldrb r0,[r1],#4
+ orr r3,r3,r12,lsl#8
+ orr r3,r3,r2,lsl#16
+ orr r3,r3,r0,lsl#24
+#endif
+ mov r0,r4,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r4,ror#11
+ eor r2,r5,r6
+#if 12>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 12==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r4,ror#25 @ Sigma1(e)
+ and r2,r2,r4
+ str r3,[sp,#12*4]
+ add r3,r3,r0
+ eor r2,r2,r6 @ Ch(e,f,g)
+ add r3,r3,r7
+ mov r7,r8,ror#2
+ add r3,r3,r2
+ eor r7,r7,r8,ror#13
+ add r3,r3,r12
+ eor r7,r7,r8,ror#22 @ Sigma0(a)
+#if 12>=15
+ ldr r1,[sp,#14*4] @ from BODY_16_xx
+#endif
+ orr r0,r8,r9
+ and r2,r8,r9
+ and r0,r0,r10
+ add r7,r7,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r11,r11,r3
+ add r7,r7,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+ ldr r3,[r1],#4
+#else
+ ldrb r3,[r1,#3] @ 13
+ ldrb r12,[r1,#2]
+ ldrb r2,[r1,#1]
+ ldrb r0,[r1],#4
+ orr r3,r3,r12,lsl#8
+ orr r3,r3,r2,lsl#16
+ orr r3,r3,r0,lsl#24
+#endif
+ mov r0,r11,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r11,ror#11
+ eor r2,r4,r5
+#if 13>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 13==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r11,ror#25 @ Sigma1(e)
+ and r2,r2,r11
+ str r3,[sp,#13*4]
+ add r3,r3,r0
+ eor r2,r2,r5 @ Ch(e,f,g)
+ add r3,r3,r6
+ mov r6,r7,ror#2
+ add r3,r3,r2
+ eor r6,r6,r7,ror#13
+ add r3,r3,r12
+ eor r6,r6,r7,ror#22 @ Sigma0(a)
+#if 13>=15
+ ldr r1,[sp,#15*4] @ from BODY_16_xx
+#endif
+ orr r0,r7,r8
+ and r2,r7,r8
+ and r0,r0,r9
+ add r6,r6,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r10,r10,r3
+ add r6,r6,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+ ldr r3,[r1],#4
+#else
+ ldrb r3,[r1,#3] @ 14
+ ldrb r12,[r1,#2]
+ ldrb r2,[r1,#1]
+ ldrb r0,[r1],#4
+ orr r3,r3,r12,lsl#8
+ orr r3,r3,r2,lsl#16
+ orr r3,r3,r0,lsl#24
+#endif
+ mov r0,r10,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r10,ror#11
+ eor r2,r11,r4
+#if 14>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 14==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r10,ror#25 @ Sigma1(e)
+ and r2,r2,r10
+ str r3,[sp,#14*4]
+ add r3,r3,r0
+ eor r2,r2,r4 @ Ch(e,f,g)
+ add r3,r3,r5
+ mov r5,r6,ror#2
+ add r3,r3,r2
+ eor r5,r5,r6,ror#13
+ add r3,r3,r12
+ eor r5,r5,r6,ror#22 @ Sigma0(a)
+#if 14>=15
+ ldr r1,[sp,#0*4] @ from BODY_16_xx
+#endif
+ orr r0,r6,r7
+ and r2,r6,r7
+ and r0,r0,r8
+ add r5,r5,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r9,r9,r3
+ add r5,r5,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+ ldr r3,[r1],#4
+#else
+ ldrb r3,[r1,#3] @ 15
+ ldrb r12,[r1,#2]
+ ldrb r2,[r1,#1]
+ ldrb r0,[r1],#4
+ orr r3,r3,r12,lsl#8
+ orr r3,r3,r2,lsl#16
+ orr r3,r3,r0,lsl#24
+#endif
+ mov r0,r9,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r9,ror#11
+ eor r2,r10,r11
+#if 15>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 15==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r9,ror#25 @ Sigma1(e)
+ and r2,r2,r9
+ str r3,[sp,#15*4]
+ add r3,r3,r0
+ eor r2,r2,r11 @ Ch(e,f,g)
+ add r3,r3,r4
+ mov r4,r5,ror#2
+ add r3,r3,r2
+ eor r4,r4,r5,ror#13
+ add r3,r3,r12
+ eor r4,r4,r5,ror#22 @ Sigma0(a)
+#if 15>=15
+ ldr r1,[sp,#1*4] @ from BODY_16_xx
+#endif
+ orr r0,r5,r6
+ and r2,r5,r6
+ and r0,r0,r7
+ add r4,r4,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r8,r8,r3
+ add r4,r4,r0
+.Lrounds_16_xx:
+ @ ldr r1,[sp,#1*4] @ 16
+ ldr r12,[sp,#14*4]
+ mov r0,r1,ror#7
+ ldr r3,[sp,#0*4]
+ eor r0,r0,r1,ror#18
+ ldr r2,[sp,#9*4]
+ eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
+ mov r1,r12,ror#17
+ add r3,r3,r0
+ eor r1,r1,r12,ror#19
+ add r3,r3,r2
+ eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
+ @ add r3,r3,r1
+ mov r0,r8,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r8,ror#11
+ eor r2,r9,r10
+#if 16>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 16==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r8,ror#25 @ Sigma1(e)
+ and r2,r2,r8
+ str r3,[sp,#0*4]
+ add r3,r3,r0
+ eor r2,r2,r10 @ Ch(e,f,g)
+ add r3,r3,r11
+ mov r11,r4,ror#2
+ add r3,r3,r2
+ eor r11,r11,r4,ror#13
+ add r3,r3,r12
+ eor r11,r11,r4,ror#22 @ Sigma0(a)
+#if 16>=15
+ ldr r1,[sp,#2*4] @ from BODY_16_xx
+#endif
+ orr r0,r4,r5
+ and r2,r4,r5
+ and r0,r0,r6
+ add r11,r11,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r7,r7,r3
+ add r11,r11,r0
+ @ ldr r1,[sp,#2*4] @ 17
+ ldr r12,[sp,#15*4]
+ mov r0,r1,ror#7
+ ldr r3,[sp,#1*4]
+ eor r0,r0,r1,ror#18
+ ldr r2,[sp,#10*4]
+ eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
+ mov r1,r12,ror#17
+ add r3,r3,r0
+ eor r1,r1,r12,ror#19
+ add r3,r3,r2
+ eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
+ @ add r3,r3,r1
+ mov r0,r7,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r7,ror#11
+ eor r2,r8,r9
+#if 17>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 17==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r7,ror#25 @ Sigma1(e)
+ and r2,r2,r7
+ str r3,[sp,#1*4]
+ add r3,r3,r0
+ eor r2,r2,r9 @ Ch(e,f,g)
+ add r3,r3,r10
+ mov r10,r11,ror#2
+ add r3,r3,r2
+ eor r10,r10,r11,ror#13
+ add r3,r3,r12
+ eor r10,r10,r11,ror#22 @ Sigma0(a)
+#if 17>=15
+ ldr r1,[sp,#3*4] @ from BODY_16_xx
+#endif
+ orr r0,r11,r4
+ and r2,r11,r4
+ and r0,r0,r5
+ add r10,r10,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r6,r6,r3
+ add r10,r10,r0
+ @ ldr r1,[sp,#3*4] @ 18
+ ldr r12,[sp,#0*4]
+ mov r0,r1,ror#7
+ ldr r3,[sp,#2*4]
+ eor r0,r0,r1,ror#18
+ ldr r2,[sp,#11*4]
+ eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
+ mov r1,r12,ror#17
+ add r3,r3,r0
+ eor r1,r1,r12,ror#19
+ add r3,r3,r2
+ eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
+ @ add r3,r3,r1
+ mov r0,r6,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r6,ror#11
+ eor r2,r7,r8
+#if 18>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 18==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r6,ror#25 @ Sigma1(e)
+ and r2,r2,r6
+ str r3,[sp,#2*4]
+ add r3,r3,r0
+ eor r2,r2,r8 @ Ch(e,f,g)
+ add r3,r3,r9
+ mov r9,r10,ror#2
+ add r3,r3,r2
+ eor r9,r9,r10,ror#13
+ add r3,r3,r12
+ eor r9,r9,r10,ror#22 @ Sigma0(a)
+#if 18>=15
+ ldr r1,[sp,#4*4] @ from BODY_16_xx
+#endif
+ orr r0,r10,r11
+ and r2,r10,r11
+ and r0,r0,r4
+ add r9,r9,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r5,r5,r3
+ add r9,r9,r0
+ @ ldr r1,[sp,#4*4] @ 19
+ ldr r12,[sp,#1*4]
+ mov r0,r1,ror#7
+ ldr r3,[sp,#3*4]
+ eor r0,r0,r1,ror#18
+ ldr r2,[sp,#12*4]
+ eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
+ mov r1,r12,ror#17
+ add r3,r3,r0
+ eor r1,r1,r12,ror#19
+ add r3,r3,r2
+ eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
+ @ add r3,r3,r1
+ mov r0,r5,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r5,ror#11
+ eor r2,r6,r7
+#if 19>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 19==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r5,ror#25 @ Sigma1(e)
+ and r2,r2,r5
+ str r3,[sp,#3*4]
+ add r3,r3,r0
+ eor r2,r2,r7 @ Ch(e,f,g)
+ add r3,r3,r8
+ mov r8,r9,ror#2
+ add r3,r3,r2
+ eor r8,r8,r9,ror#13
+ add r3,r3,r12
+ eor r8,r8,r9,ror#22 @ Sigma0(a)
+#if 19>=15
+ ldr r1,[sp,#5*4] @ from BODY_16_xx
+#endif
+ orr r0,r9,r10
+ and r2,r9,r10
+ and r0,r0,r11
+ add r8,r8,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r4,r4,r3
+ add r8,r8,r0
+ @ ldr r1,[sp,#5*4] @ 20
+ ldr r12,[sp,#2*4]
+ mov r0,r1,ror#7
+ ldr r3,[sp,#4*4]
+ eor r0,r0,r1,ror#18
+ ldr r2,[sp,#13*4]
+ eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
+ mov r1,r12,ror#17
+ add r3,r3,r0
+ eor r1,r1,r12,ror#19
+ add r3,r3,r2
+ eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
+ @ add r3,r3,r1
+ mov r0,r4,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r4,ror#11
+ eor r2,r5,r6
+#if 20>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 20==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r4,ror#25 @ Sigma1(e)
+ and r2,r2,r4
+ str r3,[sp,#4*4]
+ add r3,r3,r0
+ eor r2,r2,r6 @ Ch(e,f,g)
+ add r3,r3,r7
+ mov r7,r8,ror#2
+ add r3,r3,r2
+ eor r7,r7,r8,ror#13
+ add r3,r3,r12
+ eor r7,r7,r8,ror#22 @ Sigma0(a)
+#if 20>=15
+ ldr r1,[sp,#6*4] @ from BODY_16_xx
+#endif
+ orr r0,r8,r9
+ and r2,r8,r9
+ and r0,r0,r10
+ add r7,r7,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r11,r11,r3
+ add r7,r7,r0
+ @ ldr r1,[sp,#6*4] @ 21
+ ldr r12,[sp,#3*4]
+ mov r0,r1,ror#7
+ ldr r3,[sp,#5*4]
+ eor r0,r0,r1,ror#18
+ ldr r2,[sp,#14*4]
+ eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
+ mov r1,r12,ror#17
+ add r3,r3,r0
+ eor r1,r1,r12,ror#19
+ add r3,r3,r2
+ eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
+ @ add r3,r3,r1
+ mov r0,r11,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r11,ror#11
+ eor r2,r4,r5
+#if 21>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 21==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r11,ror#25 @ Sigma1(e)
+ and r2,r2,r11
+ str r3,[sp,#5*4]
+ add r3,r3,r0
+ eor r2,r2,r5 @ Ch(e,f,g)
+ add r3,r3,r6
+ mov r6,r7,ror#2
+ add r3,r3,r2
+ eor r6,r6,r7,ror#13
+ add r3,r3,r12
+ eor r6,r6,r7,ror#22 @ Sigma0(a)
+#if 21>=15
+ ldr r1,[sp,#7*4] @ from BODY_16_xx
+#endif
+ orr r0,r7,r8
+ and r2,r7,r8
+ and r0,r0,r9
+ add r6,r6,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r10,r10,r3
+ add r6,r6,r0
+ @ ldr r1,[sp,#7*4] @ 22
+ ldr r12,[sp,#4*4]
+ mov r0,r1,ror#7
+ ldr r3,[sp,#6*4]
+ eor r0,r0,r1,ror#18
+ ldr r2,[sp,#15*4]
+ eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
+ mov r1,r12,ror#17
+ add r3,r3,r0
+ eor r1,r1,r12,ror#19
+ add r3,r3,r2
+ eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
+ @ add r3,r3,r1
+ mov r0,r10,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r10,ror#11
+ eor r2,r11,r4
+#if 22>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 22==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r10,ror#25 @ Sigma1(e)
+ and r2,r2,r10
+ str r3,[sp,#6*4]
+ add r3,r3,r0
+ eor r2,r2,r4 @ Ch(e,f,g)
+ add r3,r3,r5
+ mov r5,r6,ror#2
+ add r3,r3,r2
+ eor r5,r5,r6,ror#13
+ add r3,r3,r12
+ eor r5,r5,r6,ror#22 @ Sigma0(a)
+#if 22>=15
+ ldr r1,[sp,#8*4] @ from BODY_16_xx
+#endif
+ orr r0,r6,r7
+ and r2,r6,r7
+ and r0,r0,r8
+ add r5,r5,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r9,r9,r3
+ add r5,r5,r0
+ @ ldr r1,[sp,#8*4] @ 23
+ ldr r12,[sp,#5*4]
+ mov r0,r1,ror#7
+ ldr r3,[sp,#7*4]
+ eor r0,r0,r1,ror#18
+ ldr r2,[sp,#0*4]
+ eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
+ mov r1,r12,ror#17
+ add r3,r3,r0
+ eor r1,r1,r12,ror#19
+ add r3,r3,r2
+ eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
+ @ add r3,r3,r1
+ mov r0,r9,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r9,ror#11
+ eor r2,r10,r11
+#if 23>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 23==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r9,ror#25 @ Sigma1(e)
+ and r2,r2,r9
+ str r3,[sp,#7*4]
+ add r3,r3,r0
+ eor r2,r2,r11 @ Ch(e,f,g)
+ add r3,r3,r4
+ mov r4,r5,ror#2
+ add r3,r3,r2
+ eor r4,r4,r5,ror#13
+ add r3,r3,r12
+ eor r4,r4,r5,ror#22 @ Sigma0(a)
+#if 23>=15
+ ldr r1,[sp,#9*4] @ from BODY_16_xx
+#endif
+ orr r0,r5,r6
+ and r2,r5,r6
+ and r0,r0,r7
+ add r4,r4,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r8,r8,r3
+ add r4,r4,r0
+ @ ldr r1,[sp,#9*4] @ 24
+ ldr r12,[sp,#6*4]
+ mov r0,r1,ror#7
+ ldr r3,[sp,#8*4]
+ eor r0,r0,r1,ror#18
+ ldr r2,[sp,#1*4]
+ eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
+ mov r1,r12,ror#17
+ add r3,r3,r0
+ eor r1,r1,r12,ror#19
+ add r3,r3,r2
+ eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
+ @ add r3,r3,r1
+ mov r0,r8,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r8,ror#11
+ eor r2,r9,r10
+#if 24>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 24==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r8,ror#25 @ Sigma1(e)
+ and r2,r2,r8
+ str r3,[sp,#8*4]
+ add r3,r3,r0
+ eor r2,r2,r10 @ Ch(e,f,g)
+ add r3,r3,r11
+ mov r11,r4,ror#2
+ add r3,r3,r2
+ eor r11,r11,r4,ror#13
+ add r3,r3,r12
+ eor r11,r11,r4,ror#22 @ Sigma0(a)
+#if 24>=15
+ ldr r1,[sp,#10*4] @ from BODY_16_xx
+#endif
+ orr r0,r4,r5
+ and r2,r4,r5
+ and r0,r0,r6
+ add r11,r11,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r7,r7,r3
+ add r11,r11,r0
+ @ ldr r1,[sp,#10*4] @ 25
+ ldr r12,[sp,#7*4]
+ mov r0,r1,ror#7
+ ldr r3,[sp,#9*4]
+ eor r0,r0,r1,ror#18
+ ldr r2,[sp,#2*4]
+ eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
+ mov r1,r12,ror#17
+ add r3,r3,r0
+ eor r1,r1,r12,ror#19
+ add r3,r3,r2
+ eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
+ @ add r3,r3,r1
+ mov r0,r7,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r7,ror#11
+ eor r2,r8,r9
+#if 25>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 25==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r7,ror#25 @ Sigma1(e)
+ and r2,r2,r7
+ str r3,[sp,#9*4]
+ add r3,r3,r0
+ eor r2,r2,r9 @ Ch(e,f,g)
+ add r3,r3,r10
+ mov r10,r11,ror#2
+ add r3,r3,r2
+ eor r10,r10,r11,ror#13
+ add r3,r3,r12
+ eor r10,r10,r11,ror#22 @ Sigma0(a)
+#if 25>=15
+ ldr r1,[sp,#11*4] @ from BODY_16_xx
+#endif
+ orr r0,r11,r4
+ and r2,r11,r4
+ and r0,r0,r5
+ add r10,r10,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r6,r6,r3
+ add r10,r10,r0
+ @ ldr r1,[sp,#11*4] @ 26
+ ldr r12,[sp,#8*4]
+ mov r0,r1,ror#7
+ ldr r3,[sp,#10*4]
+ eor r0,r0,r1,ror#18
+ ldr r2,[sp,#3*4]
+ eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
+ mov r1,r12,ror#17
+ add r3,r3,r0
+ eor r1,r1,r12,ror#19
+ add r3,r3,r2
+ eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
+ @ add r3,r3,r1
+ mov r0,r6,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r6,ror#11
+ eor r2,r7,r8
+#if 26>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 26==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r6,ror#25 @ Sigma1(e)
+ and r2,r2,r6
+ str r3,[sp,#10*4]
+ add r3,r3,r0
+ eor r2,r2,r8 @ Ch(e,f,g)
+ add r3,r3,r9
+ mov r9,r10,ror#2
+ add r3,r3,r2
+ eor r9,r9,r10,ror#13
+ add r3,r3,r12
+ eor r9,r9,r10,ror#22 @ Sigma0(a)
+#if 26>=15
+ ldr r1,[sp,#12*4] @ from BODY_16_xx
+#endif
+ orr r0,r10,r11
+ and r2,r10,r11
+ and r0,r0,r4
+ add r9,r9,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r5,r5,r3
+ add r9,r9,r0
+ @ ldr r1,[sp,#12*4] @ 27
+ ldr r12,[sp,#9*4]
+ mov r0,r1,ror#7
+ ldr r3,[sp,#11*4]
+ eor r0,r0,r1,ror#18
+ ldr r2,[sp,#4*4]
+ eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
+ mov r1,r12,ror#17
+ add r3,r3,r0
+ eor r1,r1,r12,ror#19
+ add r3,r3,r2
+ eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
+ @ add r3,r3,r1
+ mov r0,r5,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r5,ror#11
+ eor r2,r6,r7
+#if 27>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 27==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r5,ror#25 @ Sigma1(e)
+ and r2,r2,r5
+ str r3,[sp,#11*4]
+ add r3,r3,r0
+ eor r2,r2,r7 @ Ch(e,f,g)
+ add r3,r3,r8
+ mov r8,r9,ror#2
+ add r3,r3,r2
+ eor r8,r8,r9,ror#13
+ add r3,r3,r12
+ eor r8,r8,r9,ror#22 @ Sigma0(a)
+#if 27>=15
+ ldr r1,[sp,#13*4] @ from BODY_16_xx
+#endif
+ orr r0,r9,r10
+ and r2,r9,r10
+ and r0,r0,r11
+ add r8,r8,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r4,r4,r3
+ add r8,r8,r0
+ @ ldr r1,[sp,#13*4] @ 28
+ ldr r12,[sp,#10*4]
+ mov r0,r1,ror#7
+ ldr r3,[sp,#12*4]
+ eor r0,r0,r1,ror#18
+ ldr r2,[sp,#5*4]
+ eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
+ mov r1,r12,ror#17
+ add r3,r3,r0
+ eor r1,r1,r12,ror#19
+ add r3,r3,r2
+ eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
+ @ add r3,r3,r1
+ mov r0,r4,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r4,ror#11
+ eor r2,r5,r6
+#if 28>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 28==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r4,ror#25 @ Sigma1(e)
+ and r2,r2,r4
+ str r3,[sp,#12*4]
+ add r3,r3,r0
+ eor r2,r2,r6 @ Ch(e,f,g)
+ add r3,r3,r7
+ mov r7,r8,ror#2
+ add r3,r3,r2
+ eor r7,r7,r8,ror#13
+ add r3,r3,r12
+ eor r7,r7,r8,ror#22 @ Sigma0(a)
+#if 28>=15
+ ldr r1,[sp,#14*4] @ from BODY_16_xx
+#endif
+ orr r0,r8,r9
+ and r2,r8,r9
+ and r0,r0,r10
+ add r7,r7,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r11,r11,r3
+ add r7,r7,r0
+ @ ldr r1,[sp,#14*4] @ 29
+ ldr r12,[sp,#11*4]
+ mov r0,r1,ror#7
+ ldr r3,[sp,#13*4]
+ eor r0,r0,r1,ror#18
+ ldr r2,[sp,#6*4]
+ eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
+ mov r1,r12,ror#17
+ add r3,r3,r0
+ eor r1,r1,r12,ror#19
+ add r3,r3,r2
+ eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
+ @ add r3,r3,r1
+ mov r0,r11,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r11,ror#11
+ eor r2,r4,r5
+#if 29>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 29==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r11,ror#25 @ Sigma1(e)
+ and r2,r2,r11
+ str r3,[sp,#13*4]
+ add r3,r3,r0
+ eor r2,r2,r5 @ Ch(e,f,g)
+ add r3,r3,r6
+ mov r6,r7,ror#2
+ add r3,r3,r2
+ eor r6,r6,r7,ror#13
+ add r3,r3,r12
+ eor r6,r6,r7,ror#22 @ Sigma0(a)
+#if 29>=15
+ ldr r1,[sp,#15*4] @ from BODY_16_xx
+#endif
+ orr r0,r7,r8
+ and r2,r7,r8
+ and r0,r0,r9
+ add r6,r6,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r10,r10,r3
+ add r6,r6,r0
+ @ ldr r1,[sp,#15*4] @ 30
+ ldr r12,[sp,#12*4]
+ mov r0,r1,ror#7
+ ldr r3,[sp,#14*4]
+ eor r0,r0,r1,ror#18
+ ldr r2,[sp,#7*4]
+ eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
+ mov r1,r12,ror#17
+ add r3,r3,r0
+ eor r1,r1,r12,ror#19
+ add r3,r3,r2
+ eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
+ @ add r3,r3,r1
+ mov r0,r10,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r10,ror#11
+ eor r2,r11,r4
+#if 30>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 30==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r10,ror#25 @ Sigma1(e)
+ and r2,r2,r10
+ str r3,[sp,#14*4]
+ add r3,r3,r0
+ eor r2,r2,r4 @ Ch(e,f,g)
+ add r3,r3,r5
+ mov r5,r6,ror#2
+ add r3,r3,r2
+ eor r5,r5,r6,ror#13
+ add r3,r3,r12
+ eor r5,r5,r6,ror#22 @ Sigma0(a)
+#if 30>=15
+ ldr r1,[sp,#0*4] @ from BODY_16_xx
+#endif
+ orr r0,r6,r7
+ and r2,r6,r7
+ and r0,r0,r8
+ add r5,r5,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r9,r9,r3
+ add r5,r5,r0
+ @ ldr r1,[sp,#0*4] @ 31
+ ldr r12,[sp,#13*4]
+ mov r0,r1,ror#7
+ ldr r3,[sp,#15*4]
+ eor r0,r0,r1,ror#18
+ ldr r2,[sp,#8*4]
+ eor r0,r0,r1,lsr#3 @ sigma0(X[i+1])
+ mov r1,r12,ror#17
+ add r3,r3,r0
+ eor r1,r1,r12,ror#19
+ add r3,r3,r2
+ eor r1,r1,r12,lsr#10 @ sigma1(X[i+14])
+ @ add r3,r3,r1
+ mov r0,r9,ror#6
+ ldr r12,[r14],#4 @ *K256++
+ eor r0,r0,r9,ror#11
+ eor r2,r10,r11
+#if 31>=16
+ add r3,r3,r1 @ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+ rev r3,r3
+#endif
+#if 31==15
+ str r1,[sp,#17*4] @ leave room for r1
+#endif
+ eor r0,r0,r9,ror#25 @ Sigma1(e)
+ and r2,r2,r9
+ str r3,[sp,#15*4]
+ add r3,r3,r0
+ eor r2,r2,r11 @ Ch(e,f,g)
+ add r3,r3,r4
+ mov r4,r5,ror#2
+ add r3,r3,r2
+ eor r4,r4,r5,ror#13
+ add r3,r3,r12
+ eor r4,r4,r5,ror#22 @ Sigma0(a)
+#if 31>=15
+ ldr r1,[sp,#1*4] @ from BODY_16_xx
+#endif
+ orr r0,r5,r6
+ and r2,r5,r6
+ and r0,r0,r7
+ add r4,r4,r3
+ orr r0,r0,r2 @ Maj(a,b,c)
+ add r8,r8,r3
+ add r4,r4,r0
+ and r12,r12,#0xff
+ cmp r12,#0xf2
+ bne .Lrounds_16_xx
+
+ ldr r3,[sp,#16*4] @ pull ctx
+ ldr r0,[r3,#0]
+ ldr r2,[r3,#4]
+ ldr r12,[r3,#8]
+ add r4,r4,r0
+ ldr r0,[r3,#12]
+ add r5,r5,r2
+ ldr r2,[r3,#16]
+ add r6,r6,r12
+ ldr r12,[r3,#20]
+ add r7,r7,r0
+ ldr r0,[r3,#24]
+ add r8,r8,r2
+ ldr r2,[r3,#28]
+ add r9,r9,r12
+ ldr r1,[sp,#17*4] @ pull inp
+ ldr r12,[sp,#18*4] @ pull inp+len
+ add r10,r10,r0
+ add r11,r11,r2
+ stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11}
+ cmp r1,r12
+ sub r14,r14,#256 @ rewind Ktbl
+ bne .Loop
+
+ add sp,sp,#19*4 @ destroy frame
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r11,pc}
+#else
+ ldmia sp!,{r4-r11,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
+.size sha256_block_data_order,.-sha256_block_data_order
+.asciz "SHA256 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
+.align 2
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/ext/libressl/crypto/sha/sha256-elf-x86_64.S b/ext/libressl/crypto/sha/sha256-elf-x86_64.S
new file mode 100644
index 0000000..9eea6a7
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha256-elf-x86_64.S
@@ -0,0 +1,1782 @@
+#include "x86_arch.h"
+.text
+
+.globl sha256_block_data_order
+.type sha256_block_data_order,@function
+.align 16
+sha256_block_data_order:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r11
+ shlq $4,%rdx
+ subq $64+32,%rsp
+ leaq (%rsi,%rdx,4),%rdx
+ andq $-64,%rsp
+ movq %rdi,64+0(%rsp)
+ movq %rsi,64+8(%rsp)
+ movq %rdx,64+16(%rsp)
+ movq %r11,64+24(%rsp)
+.Lprologue:
+
+ leaq K256(%rip),%rbp
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+ movl 16(%rdi),%r8d
+ movl 20(%rdi),%r9d
+ movl 24(%rdi),%r10d
+ movl 28(%rdi),%r11d
+ jmp .Lloop
+
+.align 16
+.Lloop:
+ xorq %rdi,%rdi
+ movl 0(%rsi),%r12d
+ movl %r8d,%r13d
+ movl %eax,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ movl %r12d,0(%rsp)
+
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %eax,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r8d,%r15d
+ movl %ebx,%r11d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ xorl %ecx,%r11d
+ xorl %eax,%r14d
+ addl %r15d,%r12d
+ movl %ebx,%r15d
+
+ rorl $6,%r13d
+ andl %eax,%r11d
+ andl %ecx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r11d
+
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r11d
+
+ movl 4(%rsi),%r12d
+ movl %edx,%r13d
+ movl %r11d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r8d,%r15d
+ movl %r12d,4(%rsp)
+
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r11d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %edx,%r15d
+ movl %eax,%r10d
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ xorl %ebx,%r10d
+ xorl %r11d,%r14d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+
+ rorl $6,%r13d
+ andl %r11d,%r10d
+ andl %ebx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r10d
+
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r10d
+
+ movl 8(%rsi),%r12d
+ movl %ecx,%r13d
+ movl %r10d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ movl %r12d,8(%rsp)
+
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r10d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ecx,%r15d
+ movl %r11d,%r9d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ xorl %eax,%r9d
+ xorl %r10d,%r14d
+ addl %r15d,%r12d
+ movl %r11d,%r15d
+
+ rorl $6,%r13d
+ andl %r10d,%r9d
+ andl %eax,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r9d
+
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r9d
+
+ movl 12(%rsi),%r12d
+ movl %ebx,%r13d
+ movl %r9d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ecx,%r15d
+ movl %r12d,12(%rsp)
+
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %r9d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ebx,%r15d
+ movl %r10d,%r8d
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ xorl %r11d,%r8d
+ xorl %r9d,%r14d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+
+ rorl $6,%r13d
+ andl %r9d,%r8d
+ andl %r11d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r8d
+
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r8d
+
+ movl 16(%rsi),%r12d
+ movl %eax,%r13d
+ movl %r8d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ movl %r12d,16(%rsp)
+
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %r8d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %eax,%r15d
+ movl %r9d,%edx
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ xorl %r10d,%edx
+ xorl %r8d,%r14d
+ addl %r15d,%r12d
+ movl %r9d,%r15d
+
+ rorl $6,%r13d
+ andl %r8d,%edx
+ andl %r10d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%edx
+
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%edx
+
+ movl 20(%rsi),%r12d
+ movl %r11d,%r13d
+ movl %edx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %eax,%r15d
+ movl %r12d,20(%rsp)
+
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %edx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r11d,%r15d
+ movl %r8d,%ecx
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ xorl %r9d,%ecx
+ xorl %edx,%r14d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+
+ rorl $6,%r13d
+ andl %edx,%ecx
+ andl %r9d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ecx
+
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ecx
+
+ movl 24(%rsi),%r12d
+ movl %r10d,%r13d
+ movl %ecx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ movl %r12d,24(%rsp)
+
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %ecx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r10d,%r15d
+ movl %edx,%ebx
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ xorl %r8d,%ebx
+ xorl %ecx,%r14d
+ addl %r15d,%r12d
+ movl %edx,%r15d
+
+ rorl $6,%r13d
+ andl %ecx,%ebx
+ andl %r8d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ebx
+
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ebx
+
+ movl 28(%rsi),%r12d
+ movl %r9d,%r13d
+ movl %ebx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r10d,%r15d
+ movl %r12d,28(%rsp)
+
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %ebx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r9d,%r15d
+ movl %ecx,%eax
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ xorl %edx,%eax
+ xorl %ebx,%r14d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+
+ rorl $6,%r13d
+ andl %ebx,%eax
+ andl %edx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%eax
+
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 1(%rdi),%rdi
+ addl %r14d,%eax
+
+ movl 32(%rsi),%r12d
+ movl %r8d,%r13d
+ movl %eax,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ movl %r12d,32(%rsp)
+
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %eax,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r8d,%r15d
+ movl %ebx,%r11d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ xorl %ecx,%r11d
+ xorl %eax,%r14d
+ addl %r15d,%r12d
+ movl %ebx,%r15d
+
+ rorl $6,%r13d
+ andl %eax,%r11d
+ andl %ecx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r11d
+
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r11d
+
+ movl 36(%rsi),%r12d
+ movl %edx,%r13d
+ movl %r11d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r8d,%r15d
+ movl %r12d,36(%rsp)
+
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r11d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %edx,%r15d
+ movl %eax,%r10d
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ xorl %ebx,%r10d
+ xorl %r11d,%r14d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+
+ rorl $6,%r13d
+ andl %r11d,%r10d
+ andl %ebx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r10d
+
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r10d
+
+ movl 40(%rsi),%r12d
+ movl %ecx,%r13d
+ movl %r10d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ movl %r12d,40(%rsp)
+
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r10d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ecx,%r15d
+ movl %r11d,%r9d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ xorl %eax,%r9d
+ xorl %r10d,%r14d
+ addl %r15d,%r12d
+ movl %r11d,%r15d
+
+ rorl $6,%r13d
+ andl %r10d,%r9d
+ andl %eax,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r9d
+
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r9d
+
+ movl 44(%rsi),%r12d
+ movl %ebx,%r13d
+ movl %r9d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ecx,%r15d
+ movl %r12d,44(%rsp)
+
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %r9d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ebx,%r15d
+ movl %r10d,%r8d
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ xorl %r11d,%r8d
+ xorl %r9d,%r14d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+
+ rorl $6,%r13d
+ andl %r9d,%r8d
+ andl %r11d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r8d
+
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r8d
+
+ movl 48(%rsi),%r12d
+ movl %eax,%r13d
+ movl %r8d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ movl %r12d,48(%rsp)
+
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %r8d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %eax,%r15d
+ movl %r9d,%edx
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ xorl %r10d,%edx
+ xorl %r8d,%r14d
+ addl %r15d,%r12d
+ movl %r9d,%r15d
+
+ rorl $6,%r13d
+ andl %r8d,%edx
+ andl %r10d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%edx
+
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%edx
+
+ movl 52(%rsi),%r12d
+ movl %r11d,%r13d
+ movl %edx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %eax,%r15d
+ movl %r12d,52(%rsp)
+
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %edx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r11d,%r15d
+ movl %r8d,%ecx
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ xorl %r9d,%ecx
+ xorl %edx,%r14d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+
+ rorl $6,%r13d
+ andl %edx,%ecx
+ andl %r9d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ecx
+
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ecx
+
+ movl 56(%rsi),%r12d
+ movl %r10d,%r13d
+ movl %ecx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ movl %r12d,56(%rsp)
+
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %ecx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r10d,%r15d
+ movl %edx,%ebx
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ xorl %r8d,%ebx
+ xorl %ecx,%r14d
+ addl %r15d,%r12d
+ movl %edx,%r15d
+
+ rorl $6,%r13d
+ andl %ecx,%ebx
+ andl %r8d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ebx
+
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ebx
+
+ movl 60(%rsi),%r12d
+ movl %r9d,%r13d
+ movl %ebx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r10d,%r15d
+ movl %r12d,60(%rsp)
+
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %ebx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r9d,%r15d
+ movl %ecx,%eax
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ xorl %edx,%eax
+ xorl %ebx,%r14d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+
+ rorl $6,%r13d
+ andl %ebx,%eax
+ andl %edx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%eax
+
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 1(%rdi),%rdi
+ addl %r14d,%eax
+
+ jmp .Lrounds_16_xx
+.align 16
+.Lrounds_16_xx:
+ movl 4(%rsp),%r13d
+ movl 56(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 36(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 0(%rsp),%r12d
+ movl %r8d,%r13d
+ addl %r14d,%r12d
+ movl %eax,%r14d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ movl %r12d,0(%rsp)
+
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %eax,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r8d,%r15d
+ movl %ebx,%r11d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ xorl %ecx,%r11d
+ xorl %eax,%r14d
+ addl %r15d,%r12d
+ movl %ebx,%r15d
+
+ rorl $6,%r13d
+ andl %eax,%r11d
+ andl %ecx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r11d
+
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r11d
+
+ movl 8(%rsp),%r13d
+ movl 60(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 40(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 4(%rsp),%r12d
+ movl %edx,%r13d
+ addl %r14d,%r12d
+ movl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r8d,%r15d
+ movl %r12d,4(%rsp)
+
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r11d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %edx,%r15d
+ movl %eax,%r10d
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ xorl %ebx,%r10d
+ xorl %r11d,%r14d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+
+ rorl $6,%r13d
+ andl %r11d,%r10d
+ andl %ebx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r10d
+
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r10d
+
+ movl 12(%rsp),%r13d
+ movl 0(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 44(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 8(%rsp),%r12d
+ movl %ecx,%r13d
+ addl %r14d,%r12d
+ movl %r10d,%r14d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ movl %r12d,8(%rsp)
+
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r10d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ecx,%r15d
+ movl %r11d,%r9d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ xorl %eax,%r9d
+ xorl %r10d,%r14d
+ addl %r15d,%r12d
+ movl %r11d,%r15d
+
+ rorl $6,%r13d
+ andl %r10d,%r9d
+ andl %eax,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r9d
+
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r9d
+
+ movl 16(%rsp),%r13d
+ movl 4(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 48(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 12(%rsp),%r12d
+ movl %ebx,%r13d
+ addl %r14d,%r12d
+ movl %r9d,%r14d
+ rorl $14,%r13d
+ movl %ecx,%r15d
+ movl %r12d,12(%rsp)
+
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %r9d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ebx,%r15d
+ movl %r10d,%r8d
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ xorl %r11d,%r8d
+ xorl %r9d,%r14d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+
+ rorl $6,%r13d
+ andl %r9d,%r8d
+ andl %r11d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r8d
+
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r8d
+
+ movl 20(%rsp),%r13d
+ movl 8(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 52(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 16(%rsp),%r12d
+ movl %eax,%r13d
+ addl %r14d,%r12d
+ movl %r8d,%r14d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ movl %r12d,16(%rsp)
+
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %r8d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %eax,%r15d
+ movl %r9d,%edx
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ xorl %r10d,%edx
+ xorl %r8d,%r14d
+ addl %r15d,%r12d
+ movl %r9d,%r15d
+
+ rorl $6,%r13d
+ andl %r8d,%edx
+ andl %r10d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%edx
+
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%edx
+
+ movl 24(%rsp),%r13d
+ movl 12(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 56(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 20(%rsp),%r12d
+ movl %r11d,%r13d
+ addl %r14d,%r12d
+ movl %edx,%r14d
+ rorl $14,%r13d
+ movl %eax,%r15d
+ movl %r12d,20(%rsp)
+
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %edx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r11d,%r15d
+ movl %r8d,%ecx
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ xorl %r9d,%ecx
+ xorl %edx,%r14d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+
+ rorl $6,%r13d
+ andl %edx,%ecx
+ andl %r9d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ecx
+
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ecx
+
+ movl 28(%rsp),%r13d
+ movl 16(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 60(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 24(%rsp),%r12d
+ movl %r10d,%r13d
+ addl %r14d,%r12d
+ movl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ movl %r12d,24(%rsp)
+
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %ecx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r10d,%r15d
+ movl %edx,%ebx
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ xorl %r8d,%ebx
+ xorl %ecx,%r14d
+ addl %r15d,%r12d
+ movl %edx,%r15d
+
+ rorl $6,%r13d
+ andl %ecx,%ebx
+ andl %r8d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ebx
+
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ebx
+
+ movl 32(%rsp),%r13d
+ movl 20(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 0(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 28(%rsp),%r12d
+ movl %r9d,%r13d
+ addl %r14d,%r12d
+ movl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r10d,%r15d
+ movl %r12d,28(%rsp)
+
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %ebx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r9d,%r15d
+ movl %ecx,%eax
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ xorl %edx,%eax
+ xorl %ebx,%r14d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+
+ rorl $6,%r13d
+ andl %ebx,%eax
+ andl %edx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%eax
+
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 1(%rdi),%rdi
+ addl %r14d,%eax
+
+ movl 36(%rsp),%r13d
+ movl 24(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 4(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 32(%rsp),%r12d
+ movl %r8d,%r13d
+ addl %r14d,%r12d
+ movl %eax,%r14d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ movl %r12d,32(%rsp)
+
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %eax,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r8d,%r15d
+ movl %ebx,%r11d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ xorl %ecx,%r11d
+ xorl %eax,%r14d
+ addl %r15d,%r12d
+ movl %ebx,%r15d
+
+ rorl $6,%r13d
+ andl %eax,%r11d
+ andl %ecx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r11d
+
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r11d
+
+ movl 40(%rsp),%r13d
+ movl 28(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 8(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 36(%rsp),%r12d
+ movl %edx,%r13d
+ addl %r14d,%r12d
+ movl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r8d,%r15d
+ movl %r12d,36(%rsp)
+
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r11d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %edx,%r15d
+ movl %eax,%r10d
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ xorl %ebx,%r10d
+ xorl %r11d,%r14d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+
+ rorl $6,%r13d
+ andl %r11d,%r10d
+ andl %ebx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r10d
+
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r10d
+
+ movl 44(%rsp),%r13d
+ movl 32(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 12(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 40(%rsp),%r12d
+ movl %ecx,%r13d
+ addl %r14d,%r12d
+ movl %r10d,%r14d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ movl %r12d,40(%rsp)
+
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r10d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ecx,%r15d
+ movl %r11d,%r9d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ xorl %eax,%r9d
+ xorl %r10d,%r14d
+ addl %r15d,%r12d
+ movl %r11d,%r15d
+
+ rorl $6,%r13d
+ andl %r10d,%r9d
+ andl %eax,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r9d
+
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r9d
+
+ movl 48(%rsp),%r13d
+ movl 36(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 16(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 44(%rsp),%r12d
+ movl %ebx,%r13d
+ addl %r14d,%r12d
+ movl %r9d,%r14d
+ rorl $14,%r13d
+ movl %ecx,%r15d
+ movl %r12d,44(%rsp)
+
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %r9d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ebx,%r15d
+ movl %r10d,%r8d
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ xorl %r11d,%r8d
+ xorl %r9d,%r14d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+
+ rorl $6,%r13d
+ andl %r9d,%r8d
+ andl %r11d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r8d
+
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r8d
+
+ movl 52(%rsp),%r13d
+ movl 40(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 20(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 48(%rsp),%r12d
+ movl %eax,%r13d
+ addl %r14d,%r12d
+ movl %r8d,%r14d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ movl %r12d,48(%rsp)
+
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %r8d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %eax,%r15d
+ movl %r9d,%edx
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ xorl %r10d,%edx
+ xorl %r8d,%r14d
+ addl %r15d,%r12d
+ movl %r9d,%r15d
+
+ rorl $6,%r13d
+ andl %r8d,%edx
+ andl %r10d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%edx
+
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%edx
+
+ movl 56(%rsp),%r13d
+ movl 44(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 24(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 52(%rsp),%r12d
+ movl %r11d,%r13d
+ addl %r14d,%r12d
+ movl %edx,%r14d
+ rorl $14,%r13d
+ movl %eax,%r15d
+ movl %r12d,52(%rsp)
+
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %edx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r11d,%r15d
+ movl %r8d,%ecx
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ xorl %r9d,%ecx
+ xorl %edx,%r14d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+
+ rorl $6,%r13d
+ andl %edx,%ecx
+ andl %r9d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ecx
+
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ecx
+
+ movl 60(%rsp),%r13d
+ movl 48(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 28(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 56(%rsp),%r12d
+ movl %r10d,%r13d
+ addl %r14d,%r12d
+ movl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ movl %r12d,56(%rsp)
+
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %ecx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r10d,%r15d
+ movl %edx,%ebx
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ xorl %r8d,%ebx
+ xorl %ecx,%r14d
+ addl %r15d,%r12d
+ movl %edx,%r15d
+
+ rorl $6,%r13d
+ andl %ecx,%ebx
+ andl %r8d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ebx
+
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ebx
+
+ movl 0(%rsp),%r13d
+ movl 52(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 32(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 60(%rsp),%r12d
+ movl %r9d,%r13d
+ addl %r14d,%r12d
+ movl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r10d,%r15d
+ movl %r12d,60(%rsp)
+
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %ebx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r9d,%r15d
+ movl %ecx,%eax
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ xorl %edx,%eax
+ xorl %ebx,%r14d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+
+ rorl $6,%r13d
+ andl %ebx,%eax
+ andl %edx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%eax
+
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 1(%rdi),%rdi
+ addl %r14d,%eax
+
+ cmpq $64,%rdi
+ jb .Lrounds_16_xx
+
+ movq 64+0(%rsp),%rdi
+ leaq 64(%rsi),%rsi
+
+ addl 0(%rdi),%eax
+ addl 4(%rdi),%ebx
+ addl 8(%rdi),%ecx
+ addl 12(%rdi),%edx
+ addl 16(%rdi),%r8d
+ addl 20(%rdi),%r9d
+ addl 24(%rdi),%r10d
+ addl 28(%rdi),%r11d
+
+ cmpq 64+16(%rsp),%rsi
+
+ movl %eax,0(%rdi)
+ movl %ebx,4(%rdi)
+ movl %ecx,8(%rdi)
+ movl %edx,12(%rdi)
+ movl %r8d,16(%rdi)
+ movl %r9d,20(%rdi)
+ movl %r10d,24(%rdi)
+ movl %r11d,28(%rdi)
+ jb .Lloop
+
+ movq 64+24(%rsp),%rsi
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lepilogue:
+ retq
+.size sha256_block_data_order,.-sha256_block_data_order
+.align 64
+.type K256,@object
+K256:
+.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/ext/libressl/crypto/sha/sha256-macosx-x86_64.S b/ext/libressl/crypto/sha/sha256-macosx-x86_64.S
new file mode 100644
index 0000000..4b468b7
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha256-macosx-x86_64.S
@@ -0,0 +1,1779 @@
+#include "x86_arch.h"
+.text
+
+.globl _sha256_block_data_order
+
+.p2align 4
+_sha256_block_data_order:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r11
+ shlq $4,%rdx
+ subq $64+32,%rsp
+ leaq (%rsi,%rdx,4),%rdx
+ andq $-64,%rsp
+ movq %rdi,64+0(%rsp)
+ movq %rsi,64+8(%rsp)
+ movq %rdx,64+16(%rsp)
+ movq %r11,64+24(%rsp)
+L$prologue:
+
+ leaq K256(%rip),%rbp
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+ movl 16(%rdi),%r8d
+ movl 20(%rdi),%r9d
+ movl 24(%rdi),%r10d
+ movl 28(%rdi),%r11d
+ jmp L$loop
+
+.p2align 4
+L$loop:
+ xorq %rdi,%rdi
+ movl 0(%rsi),%r12d
+ movl %r8d,%r13d
+ movl %eax,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ movl %r12d,0(%rsp)
+
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %eax,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r8d,%r15d
+ movl %ebx,%r11d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ xorl %ecx,%r11d
+ xorl %eax,%r14d
+ addl %r15d,%r12d
+ movl %ebx,%r15d
+
+ rorl $6,%r13d
+ andl %eax,%r11d
+ andl %ecx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r11d
+
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r11d
+
+ movl 4(%rsi),%r12d
+ movl %edx,%r13d
+ movl %r11d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r8d,%r15d
+ movl %r12d,4(%rsp)
+
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r11d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %edx,%r15d
+ movl %eax,%r10d
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ xorl %ebx,%r10d
+ xorl %r11d,%r14d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+
+ rorl $6,%r13d
+ andl %r11d,%r10d
+ andl %ebx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r10d
+
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r10d
+
+ movl 8(%rsi),%r12d
+ movl %ecx,%r13d
+ movl %r10d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ movl %r12d,8(%rsp)
+
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r10d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ecx,%r15d
+ movl %r11d,%r9d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ xorl %eax,%r9d
+ xorl %r10d,%r14d
+ addl %r15d,%r12d
+ movl %r11d,%r15d
+
+ rorl $6,%r13d
+ andl %r10d,%r9d
+ andl %eax,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r9d
+
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r9d
+
+ movl 12(%rsi),%r12d
+ movl %ebx,%r13d
+ movl %r9d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ecx,%r15d
+ movl %r12d,12(%rsp)
+
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %r9d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ebx,%r15d
+ movl %r10d,%r8d
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ xorl %r11d,%r8d
+ xorl %r9d,%r14d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+
+ rorl $6,%r13d
+ andl %r9d,%r8d
+ andl %r11d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r8d
+
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r8d
+
+ movl 16(%rsi),%r12d
+ movl %eax,%r13d
+ movl %r8d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ movl %r12d,16(%rsp)
+
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %r8d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %eax,%r15d
+ movl %r9d,%edx
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ xorl %r10d,%edx
+ xorl %r8d,%r14d
+ addl %r15d,%r12d
+ movl %r9d,%r15d
+
+ rorl $6,%r13d
+ andl %r8d,%edx
+ andl %r10d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%edx
+
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%edx
+
+ movl 20(%rsi),%r12d
+ movl %r11d,%r13d
+ movl %edx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %eax,%r15d
+ movl %r12d,20(%rsp)
+
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %edx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r11d,%r15d
+ movl %r8d,%ecx
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ xorl %r9d,%ecx
+ xorl %edx,%r14d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+
+ rorl $6,%r13d
+ andl %edx,%ecx
+ andl %r9d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ecx
+
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ecx
+
+ movl 24(%rsi),%r12d
+ movl %r10d,%r13d
+ movl %ecx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ movl %r12d,24(%rsp)
+
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %ecx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r10d,%r15d
+ movl %edx,%ebx
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ xorl %r8d,%ebx
+ xorl %ecx,%r14d
+ addl %r15d,%r12d
+ movl %edx,%r15d
+
+ rorl $6,%r13d
+ andl %ecx,%ebx
+ andl %r8d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ebx
+
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ebx
+
+ movl 28(%rsi),%r12d
+ movl %r9d,%r13d
+ movl %ebx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r10d,%r15d
+ movl %r12d,28(%rsp)
+
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %ebx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r9d,%r15d
+ movl %ecx,%eax
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ xorl %edx,%eax
+ xorl %ebx,%r14d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+
+ rorl $6,%r13d
+ andl %ebx,%eax
+ andl %edx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%eax
+
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 1(%rdi),%rdi
+ addl %r14d,%eax
+
+ movl 32(%rsi),%r12d
+ movl %r8d,%r13d
+ movl %eax,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ movl %r12d,32(%rsp)
+
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %eax,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r8d,%r15d
+ movl %ebx,%r11d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ xorl %ecx,%r11d
+ xorl %eax,%r14d
+ addl %r15d,%r12d
+ movl %ebx,%r15d
+
+ rorl $6,%r13d
+ andl %eax,%r11d
+ andl %ecx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r11d
+
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r11d
+
+ movl 36(%rsi),%r12d
+ movl %edx,%r13d
+ movl %r11d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r8d,%r15d
+ movl %r12d,36(%rsp)
+
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r11d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %edx,%r15d
+ movl %eax,%r10d
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ xorl %ebx,%r10d
+ xorl %r11d,%r14d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+
+ rorl $6,%r13d
+ andl %r11d,%r10d
+ andl %ebx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r10d
+
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r10d
+
+ movl 40(%rsi),%r12d
+ movl %ecx,%r13d
+ movl %r10d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ movl %r12d,40(%rsp)
+
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r10d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ecx,%r15d
+ movl %r11d,%r9d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ xorl %eax,%r9d
+ xorl %r10d,%r14d
+ addl %r15d,%r12d
+ movl %r11d,%r15d
+
+ rorl $6,%r13d
+ andl %r10d,%r9d
+ andl %eax,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r9d
+
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r9d
+
+ movl 44(%rsi),%r12d
+ movl %ebx,%r13d
+ movl %r9d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ecx,%r15d
+ movl %r12d,44(%rsp)
+
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %r9d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ebx,%r15d
+ movl %r10d,%r8d
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ xorl %r11d,%r8d
+ xorl %r9d,%r14d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+
+ rorl $6,%r13d
+ andl %r9d,%r8d
+ andl %r11d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r8d
+
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r8d
+
+ movl 48(%rsi),%r12d
+ movl %eax,%r13d
+ movl %r8d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ movl %r12d,48(%rsp)
+
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %r8d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %eax,%r15d
+ movl %r9d,%edx
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ xorl %r10d,%edx
+ xorl %r8d,%r14d
+ addl %r15d,%r12d
+ movl %r9d,%r15d
+
+ rorl $6,%r13d
+ andl %r8d,%edx
+ andl %r10d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%edx
+
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%edx
+
+ movl 52(%rsi),%r12d
+ movl %r11d,%r13d
+ movl %edx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %eax,%r15d
+ movl %r12d,52(%rsp)
+
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %edx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r11d,%r15d
+ movl %r8d,%ecx
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ xorl %r9d,%ecx
+ xorl %edx,%r14d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+
+ rorl $6,%r13d
+ andl %edx,%ecx
+ andl %r9d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ecx
+
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ecx
+
+ movl 56(%rsi),%r12d
+ movl %r10d,%r13d
+ movl %ecx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ movl %r12d,56(%rsp)
+
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %ecx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r10d,%r15d
+ movl %edx,%ebx
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ xorl %r8d,%ebx
+ xorl %ecx,%r14d
+ addl %r15d,%r12d
+ movl %edx,%r15d
+
+ rorl $6,%r13d
+ andl %ecx,%ebx
+ andl %r8d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ebx
+
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ebx
+
+ movl 60(%rsi),%r12d
+ movl %r9d,%r13d
+ movl %ebx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r10d,%r15d
+ movl %r12d,60(%rsp)
+
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %ebx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r9d,%r15d
+ movl %ecx,%eax
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ xorl %edx,%eax
+ xorl %ebx,%r14d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+
+ rorl $6,%r13d
+ andl %ebx,%eax
+ andl %edx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%eax
+
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 1(%rdi),%rdi
+ addl %r14d,%eax
+
+ jmp L$rounds_16_xx
+.p2align 4
+L$rounds_16_xx:
+ movl 4(%rsp),%r13d
+ movl 56(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 36(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 0(%rsp),%r12d
+ movl %r8d,%r13d
+ addl %r14d,%r12d
+ movl %eax,%r14d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ movl %r12d,0(%rsp)
+
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %eax,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r8d,%r15d
+ movl %ebx,%r11d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ xorl %ecx,%r11d
+ xorl %eax,%r14d
+ addl %r15d,%r12d
+ movl %ebx,%r15d
+
+ rorl $6,%r13d
+ andl %eax,%r11d
+ andl %ecx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r11d
+
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r11d
+
+ movl 8(%rsp),%r13d
+ movl 60(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 40(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 4(%rsp),%r12d
+ movl %edx,%r13d
+ addl %r14d,%r12d
+ movl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r8d,%r15d
+ movl %r12d,4(%rsp)
+
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r11d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %edx,%r15d
+ movl %eax,%r10d
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ xorl %ebx,%r10d
+ xorl %r11d,%r14d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+
+ rorl $6,%r13d
+ andl %r11d,%r10d
+ andl %ebx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r10d
+
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r10d
+
+ movl 12(%rsp),%r13d
+ movl 0(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 44(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 8(%rsp),%r12d
+ movl %ecx,%r13d
+ addl %r14d,%r12d
+ movl %r10d,%r14d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ movl %r12d,8(%rsp)
+
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r10d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ecx,%r15d
+ movl %r11d,%r9d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ xorl %eax,%r9d
+ xorl %r10d,%r14d
+ addl %r15d,%r12d
+ movl %r11d,%r15d
+
+ rorl $6,%r13d
+ andl %r10d,%r9d
+ andl %eax,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r9d
+
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r9d
+
+ movl 16(%rsp),%r13d
+ movl 4(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 48(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 12(%rsp),%r12d
+ movl %ebx,%r13d
+ addl %r14d,%r12d
+ movl %r9d,%r14d
+ rorl $14,%r13d
+ movl %ecx,%r15d
+ movl %r12d,12(%rsp)
+
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %r9d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ebx,%r15d
+ movl %r10d,%r8d
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ xorl %r11d,%r8d
+ xorl %r9d,%r14d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+
+ rorl $6,%r13d
+ andl %r9d,%r8d
+ andl %r11d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r8d
+
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r8d
+
+ movl 20(%rsp),%r13d
+ movl 8(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 52(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 16(%rsp),%r12d
+ movl %eax,%r13d
+ addl %r14d,%r12d
+ movl %r8d,%r14d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ movl %r12d,16(%rsp)
+
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %r8d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %eax,%r15d
+ movl %r9d,%edx
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ xorl %r10d,%edx
+ xorl %r8d,%r14d
+ addl %r15d,%r12d
+ movl %r9d,%r15d
+
+ rorl $6,%r13d
+ andl %r8d,%edx
+ andl %r10d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%edx
+
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%edx
+
+ movl 24(%rsp),%r13d
+ movl 12(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 56(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 20(%rsp),%r12d
+ movl %r11d,%r13d
+ addl %r14d,%r12d
+ movl %edx,%r14d
+ rorl $14,%r13d
+ movl %eax,%r15d
+ movl %r12d,20(%rsp)
+
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %edx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r11d,%r15d
+ movl %r8d,%ecx
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ xorl %r9d,%ecx
+ xorl %edx,%r14d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+
+ rorl $6,%r13d
+ andl %edx,%ecx
+ andl %r9d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ecx
+
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ecx
+
+ movl 28(%rsp),%r13d
+ movl 16(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 60(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 24(%rsp),%r12d
+ movl %r10d,%r13d
+ addl %r14d,%r12d
+ movl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ movl %r12d,24(%rsp)
+
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %ecx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r10d,%r15d
+ movl %edx,%ebx
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ xorl %r8d,%ebx
+ xorl %ecx,%r14d
+ addl %r15d,%r12d
+ movl %edx,%r15d
+
+ rorl $6,%r13d
+ andl %ecx,%ebx
+ andl %r8d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ebx
+
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ebx
+
+ movl 32(%rsp),%r13d
+ movl 20(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 0(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 28(%rsp),%r12d
+ movl %r9d,%r13d
+ addl %r14d,%r12d
+ movl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r10d,%r15d
+ movl %r12d,28(%rsp)
+
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %ebx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r9d,%r15d
+ movl %ecx,%eax
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ xorl %edx,%eax
+ xorl %ebx,%r14d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+
+ rorl $6,%r13d
+ andl %ebx,%eax
+ andl %edx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%eax
+
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 1(%rdi),%rdi
+ addl %r14d,%eax
+
+ movl 36(%rsp),%r13d
+ movl 24(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 4(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 32(%rsp),%r12d
+ movl %r8d,%r13d
+ addl %r14d,%r12d
+ movl %eax,%r14d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ movl %r12d,32(%rsp)
+
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %eax,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r8d,%r15d
+ movl %ebx,%r11d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ xorl %ecx,%r11d
+ xorl %eax,%r14d
+ addl %r15d,%r12d
+ movl %ebx,%r15d
+
+ rorl $6,%r13d
+ andl %eax,%r11d
+ andl %ecx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r11d
+
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r11d
+
+ movl 40(%rsp),%r13d
+ movl 28(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 8(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 36(%rsp),%r12d
+ movl %edx,%r13d
+ addl %r14d,%r12d
+ movl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r8d,%r15d
+ movl %r12d,36(%rsp)
+
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r11d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %edx,%r15d
+ movl %eax,%r10d
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ xorl %ebx,%r10d
+ xorl %r11d,%r14d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+
+ rorl $6,%r13d
+ andl %r11d,%r10d
+ andl %ebx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r10d
+
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r10d
+
+ movl 44(%rsp),%r13d
+ movl 32(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 12(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 40(%rsp),%r12d
+ movl %ecx,%r13d
+ addl %r14d,%r12d
+ movl %r10d,%r14d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ movl %r12d,40(%rsp)
+
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r10d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ecx,%r15d
+ movl %r11d,%r9d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ xorl %eax,%r9d
+ xorl %r10d,%r14d
+ addl %r15d,%r12d
+ movl %r11d,%r15d
+
+ rorl $6,%r13d
+ andl %r10d,%r9d
+ andl %eax,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r9d
+
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r9d
+
+ movl 48(%rsp),%r13d
+ movl 36(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 16(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 44(%rsp),%r12d
+ movl %ebx,%r13d
+ addl %r14d,%r12d
+ movl %r9d,%r14d
+ rorl $14,%r13d
+ movl %ecx,%r15d
+ movl %r12d,44(%rsp)
+
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %r9d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ebx,%r15d
+ movl %r10d,%r8d
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ xorl %r11d,%r8d
+ xorl %r9d,%r14d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+
+ rorl $6,%r13d
+ andl %r9d,%r8d
+ andl %r11d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r8d
+
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r8d
+
+ movl 52(%rsp),%r13d
+ movl 40(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 20(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 48(%rsp),%r12d
+ movl %eax,%r13d
+ addl %r14d,%r12d
+ movl %r8d,%r14d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ movl %r12d,48(%rsp)
+
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %r8d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %eax,%r15d
+ movl %r9d,%edx
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ xorl %r10d,%edx
+ xorl %r8d,%r14d
+ addl %r15d,%r12d
+ movl %r9d,%r15d
+
+ rorl $6,%r13d
+ andl %r8d,%edx
+ andl %r10d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%edx
+
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%edx
+
+ movl 56(%rsp),%r13d
+ movl 44(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 24(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 52(%rsp),%r12d
+ movl %r11d,%r13d
+ addl %r14d,%r12d
+ movl %edx,%r14d
+ rorl $14,%r13d
+ movl %eax,%r15d
+ movl %r12d,52(%rsp)
+
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %edx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r11d,%r15d
+ movl %r8d,%ecx
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ xorl %r9d,%ecx
+ xorl %edx,%r14d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+
+ rorl $6,%r13d
+ andl %edx,%ecx
+ andl %r9d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ecx
+
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ecx
+
+ movl 60(%rsp),%r13d
+ movl 48(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 28(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 56(%rsp),%r12d
+ movl %r10d,%r13d
+ addl %r14d,%r12d
+ movl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ movl %r12d,56(%rsp)
+
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %ecx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r10d,%r15d
+ movl %edx,%ebx
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ xorl %r8d,%ebx
+ xorl %ecx,%r14d
+ addl %r15d,%r12d
+ movl %edx,%r15d
+
+ rorl $6,%r13d
+ andl %ecx,%ebx
+ andl %r8d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ebx
+
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ebx
+
+ movl 0(%rsp),%r13d
+ movl 52(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 32(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 60(%rsp),%r12d
+ movl %r9d,%r13d
+ addl %r14d,%r12d
+ movl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r10d,%r15d
+ movl %r12d,60(%rsp)
+
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %ebx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r9d,%r15d
+ movl %ecx,%eax
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ xorl %edx,%eax
+ xorl %ebx,%r14d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+
+ rorl $6,%r13d
+ andl %ebx,%eax
+ andl %edx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%eax
+
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 1(%rdi),%rdi
+ addl %r14d,%eax
+
+ cmpq $64,%rdi
+ jb L$rounds_16_xx
+
+ movq 64+0(%rsp),%rdi
+ leaq 64(%rsi),%rsi
+
+ addl 0(%rdi),%eax
+ addl 4(%rdi),%ebx
+ addl 8(%rdi),%ecx
+ addl 12(%rdi),%edx
+ addl 16(%rdi),%r8d
+ addl 20(%rdi),%r9d
+ addl 24(%rdi),%r10d
+ addl 28(%rdi),%r11d
+
+ cmpq 64+16(%rsp),%rsi
+
+ movl %eax,0(%rdi)
+ movl %ebx,4(%rdi)
+ movl %ecx,8(%rdi)
+ movl %edx,12(%rdi)
+ movl %r8d,16(%rdi)
+ movl %r9d,20(%rdi)
+ movl %r10d,24(%rdi)
+ movl %r11d,28(%rdi)
+ jb L$loop
+
+ movq 64+24(%rsp),%rsi
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+L$epilogue:
+ retq
+
+.p2align 6
+
+K256:
+.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
diff --git a/ext/libressl/crypto/sha/sha256-masm-x86_64.S b/ext/libressl/crypto/sha/sha256-masm-x86_64.S
new file mode 100644
index 0000000..33c705d
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha256-masm-x86_64.S
@@ -0,0 +1,1864 @@
+; 1 "crypto/sha/sha256-masm-x86_64.S.tmp"
+; 1 "<built-in>" 1
+; 1 "<built-in>" 3
+; 340 "<built-in>" 3
+; 1 "<command line>" 1
+; 1 "<built-in>" 2
+; 1 "crypto/sha/sha256-masm-x86_64.S.tmp" 2
+OPTION DOTNAME
+
+; 1 "./crypto/x86_arch.h" 1
+
+
+; 16 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+; 40 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+; 3 "crypto/sha/sha256-masm-x86_64.S.tmp" 2
+.text$ SEGMENT ALIGN(64) 'CODE'
+
+PUBLIC sha256_block_data_order
+
+ALIGN 16
+sha256_block_data_order PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_sha256_block_data_order::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+
+
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ mov r11,rsp
+ shl rdx,4
+ sub rsp,16*4+4*8
+ lea rdx,QWORD PTR[rdx*4+rsi]
+ and rsp,-64
+ mov QWORD PTR[((64+0))+rsp],rdi
+ mov QWORD PTR[((64+8))+rsp],rsi
+ mov QWORD PTR[((64+16))+rsp],rdx
+ mov QWORD PTR[((64+24))+rsp],r11
+$L$prologue::
+
+ lea rbp,QWORD PTR[K256]
+
+ mov eax,DWORD PTR[rdi]
+ mov ebx,DWORD PTR[4+rdi]
+ mov ecx,DWORD PTR[8+rdi]
+ mov edx,DWORD PTR[12+rdi]
+ mov r8d,DWORD PTR[16+rdi]
+ mov r9d,DWORD PTR[20+rdi]
+ mov r10d,DWORD PTR[24+rdi]
+ mov r11d,DWORD PTR[28+rdi]
+ jmp $L$loop
+
+ALIGN 16
+$L$loop::
+ xor rdi,rdi
+ mov r12d,DWORD PTR[rsi]
+ mov r13d,r8d
+ mov r14d,eax
+ bswap r12d
+ ror r13d,14
+ mov r15d,r9d
+ mov DWORD PTR[rsp],r12d
+
+ ror r14d,9
+ xor r13d,r8d
+ xor r15d,r10d
+
+ ror r13d,5
+ add r12d,r11d
+ xor r14d,eax
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,r8d
+ mov r11d,ebx
+
+ ror r14d,11
+ xor r13d,r8d
+ xor r15d,r10d
+
+ xor r11d,ecx
+ xor r14d,eax
+ add r12d,r15d
+ mov r15d,ebx
+
+ ror r13d,6
+ and r11d,eax
+ and r15d,ecx
+
+ ror r14d,2
+ add r12d,r13d
+ add r11d,r15d
+
+ add edx,r12d
+ add r11d,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add r11d,r14d
+
+ mov r12d,DWORD PTR[4+rsi]
+ mov r13d,edx
+ mov r14d,r11d
+ bswap r12d
+ ror r13d,14
+ mov r15d,r8d
+ mov DWORD PTR[4+rsp],r12d
+
+ ror r14d,9
+ xor r13d,edx
+ xor r15d,r9d
+
+ ror r13d,5
+ add r12d,r10d
+ xor r14d,r11d
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,edx
+ mov r10d,eax
+
+ ror r14d,11
+ xor r13d,edx
+ xor r15d,r9d
+
+ xor r10d,ebx
+ xor r14d,r11d
+ add r12d,r15d
+ mov r15d,eax
+
+ ror r13d,6
+ and r10d,r11d
+ and r15d,ebx
+
+ ror r14d,2
+ add r12d,r13d
+ add r10d,r15d
+
+ add ecx,r12d
+ add r10d,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add r10d,r14d
+
+ mov r12d,DWORD PTR[8+rsi]
+ mov r13d,ecx
+ mov r14d,r10d
+ bswap r12d
+ ror r13d,14
+ mov r15d,edx
+ mov DWORD PTR[8+rsp],r12d
+
+ ror r14d,9
+ xor r13d,ecx
+ xor r15d,r8d
+
+ ror r13d,5
+ add r12d,r9d
+ xor r14d,r10d
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,ecx
+ mov r9d,r11d
+
+ ror r14d,11
+ xor r13d,ecx
+ xor r15d,r8d
+
+ xor r9d,eax
+ xor r14d,r10d
+ add r12d,r15d
+ mov r15d,r11d
+
+ ror r13d,6
+ and r9d,r10d
+ and r15d,eax
+
+ ror r14d,2
+ add r12d,r13d
+ add r9d,r15d
+
+ add ebx,r12d
+ add r9d,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add r9d,r14d
+
+ mov r12d,DWORD PTR[12+rsi]
+ mov r13d,ebx
+ mov r14d,r9d
+ bswap r12d
+ ror r13d,14
+ mov r15d,ecx
+ mov DWORD PTR[12+rsp],r12d
+
+ ror r14d,9
+ xor r13d,ebx
+ xor r15d,edx
+
+ ror r13d,5
+ add r12d,r8d
+ xor r14d,r9d
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,ebx
+ mov r8d,r10d
+
+ ror r14d,11
+ xor r13d,ebx
+ xor r15d,edx
+
+ xor r8d,r11d
+ xor r14d,r9d
+ add r12d,r15d
+ mov r15d,r10d
+
+ ror r13d,6
+ and r8d,r9d
+ and r15d,r11d
+
+ ror r14d,2
+ add r12d,r13d
+ add r8d,r15d
+
+ add eax,r12d
+ add r8d,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add r8d,r14d
+
+ mov r12d,DWORD PTR[16+rsi]
+ mov r13d,eax
+ mov r14d,r8d
+ bswap r12d
+ ror r13d,14
+ mov r15d,ebx
+ mov DWORD PTR[16+rsp],r12d
+
+ ror r14d,9
+ xor r13d,eax
+ xor r15d,ecx
+
+ ror r13d,5
+ add r12d,edx
+ xor r14d,r8d
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,eax
+ mov edx,r9d
+
+ ror r14d,11
+ xor r13d,eax
+ xor r15d,ecx
+
+ xor edx,r10d
+ xor r14d,r8d
+ add r12d,r15d
+ mov r15d,r9d
+
+ ror r13d,6
+ and edx,r8d
+ and r15d,r10d
+
+ ror r14d,2
+ add r12d,r13d
+ add edx,r15d
+
+ add r11d,r12d
+ add edx,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add edx,r14d
+
+ mov r12d,DWORD PTR[20+rsi]
+ mov r13d,r11d
+ mov r14d,edx
+ bswap r12d
+ ror r13d,14
+ mov r15d,eax
+ mov DWORD PTR[20+rsp],r12d
+
+ ror r14d,9
+ xor r13d,r11d
+ xor r15d,ebx
+
+ ror r13d,5
+ add r12d,ecx
+ xor r14d,edx
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,r11d
+ mov ecx,r8d
+
+ ror r14d,11
+ xor r13d,r11d
+ xor r15d,ebx
+
+ xor ecx,r9d
+ xor r14d,edx
+ add r12d,r15d
+ mov r15d,r8d
+
+ ror r13d,6
+ and ecx,edx
+ and r15d,r9d
+
+ ror r14d,2
+ add r12d,r13d
+ add ecx,r15d
+
+ add r10d,r12d
+ add ecx,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add ecx,r14d
+
+ mov r12d,DWORD PTR[24+rsi]
+ mov r13d,r10d
+ mov r14d,ecx
+ bswap r12d
+ ror r13d,14
+ mov r15d,r11d
+ mov DWORD PTR[24+rsp],r12d
+
+ ror r14d,9
+ xor r13d,r10d
+ xor r15d,eax
+
+ ror r13d,5
+ add r12d,ebx
+ xor r14d,ecx
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,r10d
+ mov ebx,edx
+
+ ror r14d,11
+ xor r13d,r10d
+ xor r15d,eax
+
+ xor ebx,r8d
+ xor r14d,ecx
+ add r12d,r15d
+ mov r15d,edx
+
+ ror r13d,6
+ and ebx,ecx
+ and r15d,r8d
+
+ ror r14d,2
+ add r12d,r13d
+ add ebx,r15d
+
+ add r9d,r12d
+ add ebx,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add ebx,r14d
+
+ mov r12d,DWORD PTR[28+rsi]
+ mov r13d,r9d
+ mov r14d,ebx
+ bswap r12d
+ ror r13d,14
+ mov r15d,r10d
+ mov DWORD PTR[28+rsp],r12d
+
+ ror r14d,9
+ xor r13d,r9d
+ xor r15d,r11d
+
+ ror r13d,5
+ add r12d,eax
+ xor r14d,ebx
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,r9d
+ mov eax,ecx
+
+ ror r14d,11
+ xor r13d,r9d
+ xor r15d,r11d
+
+ xor eax,edx
+ xor r14d,ebx
+ add r12d,r15d
+ mov r15d,ecx
+
+ ror r13d,6
+ and eax,ebx
+ and r15d,edx
+
+ ror r14d,2
+ add r12d,r13d
+ add eax,r15d
+
+ add r8d,r12d
+ add eax,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add eax,r14d
+
+ mov r12d,DWORD PTR[32+rsi]
+ mov r13d,r8d
+ mov r14d,eax
+ bswap r12d
+ ror r13d,14
+ mov r15d,r9d
+ mov DWORD PTR[32+rsp],r12d
+
+ ror r14d,9
+ xor r13d,r8d
+ xor r15d,r10d
+
+ ror r13d,5
+ add r12d,r11d
+ xor r14d,eax
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,r8d
+ mov r11d,ebx
+
+ ror r14d,11
+ xor r13d,r8d
+ xor r15d,r10d
+
+ xor r11d,ecx
+ xor r14d,eax
+ add r12d,r15d
+ mov r15d,ebx
+
+ ror r13d,6
+ and r11d,eax
+ and r15d,ecx
+
+ ror r14d,2
+ add r12d,r13d
+ add r11d,r15d
+
+ add edx,r12d
+ add r11d,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add r11d,r14d
+
+ mov r12d,DWORD PTR[36+rsi]
+ mov r13d,edx
+ mov r14d,r11d
+ bswap r12d
+ ror r13d,14
+ mov r15d,r8d
+ mov DWORD PTR[36+rsp],r12d
+
+ ror r14d,9
+ xor r13d,edx
+ xor r15d,r9d
+
+ ror r13d,5
+ add r12d,r10d
+ xor r14d,r11d
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,edx
+ mov r10d,eax
+
+ ror r14d,11
+ xor r13d,edx
+ xor r15d,r9d
+
+ xor r10d,ebx
+ xor r14d,r11d
+ add r12d,r15d
+ mov r15d,eax
+
+ ror r13d,6
+ and r10d,r11d
+ and r15d,ebx
+
+ ror r14d,2
+ add r12d,r13d
+ add r10d,r15d
+
+ add ecx,r12d
+ add r10d,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add r10d,r14d
+
+ mov r12d,DWORD PTR[40+rsi]
+ mov r13d,ecx
+ mov r14d,r10d
+ bswap r12d
+ ror r13d,14
+ mov r15d,edx
+ mov DWORD PTR[40+rsp],r12d
+
+ ror r14d,9
+ xor r13d,ecx
+ xor r15d,r8d
+
+ ror r13d,5
+ add r12d,r9d
+ xor r14d,r10d
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,ecx
+ mov r9d,r11d
+
+ ror r14d,11
+ xor r13d,ecx
+ xor r15d,r8d
+
+ xor r9d,eax
+ xor r14d,r10d
+ add r12d,r15d
+ mov r15d,r11d
+
+ ror r13d,6
+ and r9d,r10d
+ and r15d,eax
+
+ ror r14d,2
+ add r12d,r13d
+ add r9d,r15d
+
+ add ebx,r12d
+ add r9d,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add r9d,r14d
+
+ mov r12d,DWORD PTR[44+rsi]
+ mov r13d,ebx
+ mov r14d,r9d
+ bswap r12d
+ ror r13d,14
+ mov r15d,ecx
+ mov DWORD PTR[44+rsp],r12d
+
+ ror r14d,9
+ xor r13d,ebx
+ xor r15d,edx
+
+ ror r13d,5
+ add r12d,r8d
+ xor r14d,r9d
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,ebx
+ mov r8d,r10d
+
+ ror r14d,11
+ xor r13d,ebx
+ xor r15d,edx
+
+ xor r8d,r11d
+ xor r14d,r9d
+ add r12d,r15d
+ mov r15d,r10d
+
+ ror r13d,6
+ and r8d,r9d
+ and r15d,r11d
+
+ ror r14d,2
+ add r12d,r13d
+ add r8d,r15d
+
+ add eax,r12d
+ add r8d,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add r8d,r14d
+
+ mov r12d,DWORD PTR[48+rsi]
+ mov r13d,eax
+ mov r14d,r8d
+ bswap r12d
+ ror r13d,14
+ mov r15d,ebx
+ mov DWORD PTR[48+rsp],r12d
+
+ ror r14d,9
+ xor r13d,eax
+ xor r15d,ecx
+
+ ror r13d,5
+ add r12d,edx
+ xor r14d,r8d
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,eax
+ mov edx,r9d
+
+ ror r14d,11
+ xor r13d,eax
+ xor r15d,ecx
+
+ xor edx,r10d
+ xor r14d,r8d
+ add r12d,r15d
+ mov r15d,r9d
+
+ ror r13d,6
+ and edx,r8d
+ and r15d,r10d
+
+ ror r14d,2
+ add r12d,r13d
+ add edx,r15d
+
+ add r11d,r12d
+ add edx,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add edx,r14d
+
+ mov r12d,DWORD PTR[52+rsi]
+ mov r13d,r11d
+ mov r14d,edx
+ bswap r12d
+ ror r13d,14
+ mov r15d,eax
+ mov DWORD PTR[52+rsp],r12d
+
+ ror r14d,9
+ xor r13d,r11d
+ xor r15d,ebx
+
+ ror r13d,5
+ add r12d,ecx
+ xor r14d,edx
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,r11d
+ mov ecx,r8d
+
+ ror r14d,11
+ xor r13d,r11d
+ xor r15d,ebx
+
+ xor ecx,r9d
+ xor r14d,edx
+ add r12d,r15d
+ mov r15d,r8d
+
+ ror r13d,6
+ and ecx,edx
+ and r15d,r9d
+
+ ror r14d,2
+ add r12d,r13d
+ add ecx,r15d
+
+ add r10d,r12d
+ add ecx,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add ecx,r14d
+
+ mov r12d,DWORD PTR[56+rsi]
+ mov r13d,r10d
+ mov r14d,ecx
+ bswap r12d
+ ror r13d,14
+ mov r15d,r11d
+ mov DWORD PTR[56+rsp],r12d
+
+ ror r14d,9
+ xor r13d,r10d
+ xor r15d,eax
+
+ ror r13d,5
+ add r12d,ebx
+ xor r14d,ecx
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,r10d
+ mov ebx,edx
+
+ ror r14d,11
+ xor r13d,r10d
+ xor r15d,eax
+
+ xor ebx,r8d
+ xor r14d,ecx
+ add r12d,r15d
+ mov r15d,edx
+
+ ror r13d,6
+ and ebx,ecx
+ and r15d,r8d
+
+ ror r14d,2
+ add r12d,r13d
+ add ebx,r15d
+
+ add r9d,r12d
+ add ebx,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add ebx,r14d
+
+ mov r12d,DWORD PTR[60+rsi]
+ mov r13d,r9d
+ mov r14d,ebx
+ bswap r12d
+ ror r13d,14
+ mov r15d,r10d
+ mov DWORD PTR[60+rsp],r12d
+
+ ror r14d,9
+ xor r13d,r9d
+ xor r15d,r11d
+
+ ror r13d,5
+ add r12d,eax
+ xor r14d,ebx
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,r9d
+ mov eax,ecx
+
+ ror r14d,11
+ xor r13d,r9d
+ xor r15d,r11d
+
+ xor eax,edx
+ xor r14d,ebx
+ add r12d,r15d
+ mov r15d,ecx
+
+ ror r13d,6
+ and eax,ebx
+ and r15d,edx
+
+ ror r14d,2
+ add r12d,r13d
+ add eax,r15d
+
+ add r8d,r12d
+ add eax,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add eax,r14d
+
+ jmp $L$rounds_16_xx
+ALIGN 16
+$L$rounds_16_xx::
+ mov r13d,DWORD PTR[4+rsp]
+ mov r14d,DWORD PTR[56+rsp]
+ mov r12d,r13d
+ mov r15d,r14d
+
+ ror r12d,11
+ xor r12d,r13d
+ shr r13d,3
+
+ ror r12d,7
+ xor r13d,r12d
+ mov r12d,DWORD PTR[36+rsp]
+
+ ror r15d,2
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ add r12d,r13d
+ xor r14d,r15d
+
+ add r12d,DWORD PTR[rsp]
+ mov r13d,r8d
+ add r12d,r14d
+ mov r14d,eax
+ ror r13d,14
+ mov r15d,r9d
+ mov DWORD PTR[rsp],r12d
+
+ ror r14d,9
+ xor r13d,r8d
+ xor r15d,r10d
+
+ ror r13d,5
+ add r12d,r11d
+ xor r14d,eax
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,r8d
+ mov r11d,ebx
+
+ ror r14d,11
+ xor r13d,r8d
+ xor r15d,r10d
+
+ xor r11d,ecx
+ xor r14d,eax
+ add r12d,r15d
+ mov r15d,ebx
+
+ ror r13d,6
+ and r11d,eax
+ and r15d,ecx
+
+ ror r14d,2
+ add r12d,r13d
+ add r11d,r15d
+
+ add edx,r12d
+ add r11d,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add r11d,r14d
+
+ mov r13d,DWORD PTR[8+rsp]
+ mov r14d,DWORD PTR[60+rsp]
+ mov r12d,r13d
+ mov r15d,r14d
+
+ ror r12d,11
+ xor r12d,r13d
+ shr r13d,3
+
+ ror r12d,7
+ xor r13d,r12d
+ mov r12d,DWORD PTR[40+rsp]
+
+ ror r15d,2
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ add r12d,r13d
+ xor r14d,r15d
+
+ add r12d,DWORD PTR[4+rsp]
+ mov r13d,edx
+ add r12d,r14d
+ mov r14d,r11d
+ ror r13d,14
+ mov r15d,r8d
+ mov DWORD PTR[4+rsp],r12d
+
+ ror r14d,9
+ xor r13d,edx
+ xor r15d,r9d
+
+ ror r13d,5
+ add r12d,r10d
+ xor r14d,r11d
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,edx
+ mov r10d,eax
+
+ ror r14d,11
+ xor r13d,edx
+ xor r15d,r9d
+
+ xor r10d,ebx
+ xor r14d,r11d
+ add r12d,r15d
+ mov r15d,eax
+
+ ror r13d,6
+ and r10d,r11d
+ and r15d,ebx
+
+ ror r14d,2
+ add r12d,r13d
+ add r10d,r15d
+
+ add ecx,r12d
+ add r10d,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add r10d,r14d
+
+ mov r13d,DWORD PTR[12+rsp]
+ mov r14d,DWORD PTR[rsp]
+ mov r12d,r13d
+ mov r15d,r14d
+
+ ror r12d,11
+ xor r12d,r13d
+ shr r13d,3
+
+ ror r12d,7
+ xor r13d,r12d
+ mov r12d,DWORD PTR[44+rsp]
+
+ ror r15d,2
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ add r12d,r13d
+ xor r14d,r15d
+
+ add r12d,DWORD PTR[8+rsp]
+ mov r13d,ecx
+ add r12d,r14d
+ mov r14d,r10d
+ ror r13d,14
+ mov r15d,edx
+ mov DWORD PTR[8+rsp],r12d
+
+ ror r14d,9
+ xor r13d,ecx
+ xor r15d,r8d
+
+ ror r13d,5
+ add r12d,r9d
+ xor r14d,r10d
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,ecx
+ mov r9d,r11d
+
+ ror r14d,11
+ xor r13d,ecx
+ xor r15d,r8d
+
+ xor r9d,eax
+ xor r14d,r10d
+ add r12d,r15d
+ mov r15d,r11d
+
+ ror r13d,6
+ and r9d,r10d
+ and r15d,eax
+
+ ror r14d,2
+ add r12d,r13d
+ add r9d,r15d
+
+ add ebx,r12d
+ add r9d,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add r9d,r14d
+
+ mov r13d,DWORD PTR[16+rsp]
+ mov r14d,DWORD PTR[4+rsp]
+ mov r12d,r13d
+ mov r15d,r14d
+
+ ror r12d,11
+ xor r12d,r13d
+ shr r13d,3
+
+ ror r12d,7
+ xor r13d,r12d
+ mov r12d,DWORD PTR[48+rsp]
+
+ ror r15d,2
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ add r12d,r13d
+ xor r14d,r15d
+
+ add r12d,DWORD PTR[12+rsp]
+ mov r13d,ebx
+ add r12d,r14d
+ mov r14d,r9d
+ ror r13d,14
+ mov r15d,ecx
+ mov DWORD PTR[12+rsp],r12d
+
+ ror r14d,9
+ xor r13d,ebx
+ xor r15d,edx
+
+ ror r13d,5
+ add r12d,r8d
+ xor r14d,r9d
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,ebx
+ mov r8d,r10d
+
+ ror r14d,11
+ xor r13d,ebx
+ xor r15d,edx
+
+ xor r8d,r11d
+ xor r14d,r9d
+ add r12d,r15d
+ mov r15d,r10d
+
+ ror r13d,6
+ and r8d,r9d
+ and r15d,r11d
+
+ ror r14d,2
+ add r12d,r13d
+ add r8d,r15d
+
+ add eax,r12d
+ add r8d,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add r8d,r14d
+
+ mov r13d,DWORD PTR[20+rsp]
+ mov r14d,DWORD PTR[8+rsp]
+ mov r12d,r13d
+ mov r15d,r14d
+
+ ror r12d,11
+ xor r12d,r13d
+ shr r13d,3
+
+ ror r12d,7
+ xor r13d,r12d
+ mov r12d,DWORD PTR[52+rsp]
+
+ ror r15d,2
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ add r12d,r13d
+ xor r14d,r15d
+
+ add r12d,DWORD PTR[16+rsp]
+ mov r13d,eax
+ add r12d,r14d
+ mov r14d,r8d
+ ror r13d,14
+ mov r15d,ebx
+ mov DWORD PTR[16+rsp],r12d
+
+ ror r14d,9
+ xor r13d,eax
+ xor r15d,ecx
+
+ ror r13d,5
+ add r12d,edx
+ xor r14d,r8d
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,eax
+ mov edx,r9d
+
+ ror r14d,11
+ xor r13d,eax
+ xor r15d,ecx
+
+ xor edx,r10d
+ xor r14d,r8d
+ add r12d,r15d
+ mov r15d,r9d
+
+ ror r13d,6
+ and edx,r8d
+ and r15d,r10d
+
+ ror r14d,2
+ add r12d,r13d
+ add edx,r15d
+
+ add r11d,r12d
+ add edx,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add edx,r14d
+
+ mov r13d,DWORD PTR[24+rsp]
+ mov r14d,DWORD PTR[12+rsp]
+ mov r12d,r13d
+ mov r15d,r14d
+
+ ror r12d,11
+ xor r12d,r13d
+ shr r13d,3
+
+ ror r12d,7
+ xor r13d,r12d
+ mov r12d,DWORD PTR[56+rsp]
+
+ ror r15d,2
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ add r12d,r13d
+ xor r14d,r15d
+
+ add r12d,DWORD PTR[20+rsp]
+ mov r13d,r11d
+ add r12d,r14d
+ mov r14d,edx
+ ror r13d,14
+ mov r15d,eax
+ mov DWORD PTR[20+rsp],r12d
+
+ ror r14d,9
+ xor r13d,r11d
+ xor r15d,ebx
+
+ ror r13d,5
+ add r12d,ecx
+ xor r14d,edx
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,r11d
+ mov ecx,r8d
+
+ ror r14d,11
+ xor r13d,r11d
+ xor r15d,ebx
+
+ xor ecx,r9d
+ xor r14d,edx
+ add r12d,r15d
+ mov r15d,r8d
+
+ ror r13d,6
+ and ecx,edx
+ and r15d,r9d
+
+ ror r14d,2
+ add r12d,r13d
+ add ecx,r15d
+
+ add r10d,r12d
+ add ecx,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add ecx,r14d
+
+ mov r13d,DWORD PTR[28+rsp]
+ mov r14d,DWORD PTR[16+rsp]
+ mov r12d,r13d
+ mov r15d,r14d
+
+ ror r12d,11
+ xor r12d,r13d
+ shr r13d,3
+
+ ror r12d,7
+ xor r13d,r12d
+ mov r12d,DWORD PTR[60+rsp]
+
+ ror r15d,2
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ add r12d,r13d
+ xor r14d,r15d
+
+ add r12d,DWORD PTR[24+rsp]
+ mov r13d,r10d
+ add r12d,r14d
+ mov r14d,ecx
+ ror r13d,14
+ mov r15d,r11d
+ mov DWORD PTR[24+rsp],r12d
+
+ ror r14d,9
+ xor r13d,r10d
+ xor r15d,eax
+
+ ror r13d,5
+ add r12d,ebx
+ xor r14d,ecx
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,r10d
+ mov ebx,edx
+
+ ror r14d,11
+ xor r13d,r10d
+ xor r15d,eax
+
+ xor ebx,r8d
+ xor r14d,ecx
+ add r12d,r15d
+ mov r15d,edx
+
+ ror r13d,6
+ and ebx,ecx
+ and r15d,r8d
+
+ ror r14d,2
+ add r12d,r13d
+ add ebx,r15d
+
+ add r9d,r12d
+ add ebx,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add ebx,r14d
+
+ mov r13d,DWORD PTR[32+rsp]
+ mov r14d,DWORD PTR[20+rsp]
+ mov r12d,r13d
+ mov r15d,r14d
+
+ ror r12d,11
+ xor r12d,r13d
+ shr r13d,3
+
+ ror r12d,7
+ xor r13d,r12d
+ mov r12d,DWORD PTR[rsp]
+
+ ror r15d,2
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ add r12d,r13d
+ xor r14d,r15d
+
+ add r12d,DWORD PTR[28+rsp]
+ mov r13d,r9d
+ add r12d,r14d
+ mov r14d,ebx
+ ror r13d,14
+ mov r15d,r10d
+ mov DWORD PTR[28+rsp],r12d
+
+ ror r14d,9
+ xor r13d,r9d
+ xor r15d,r11d
+
+ ror r13d,5
+ add r12d,eax
+ xor r14d,ebx
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,r9d
+ mov eax,ecx
+
+ ror r14d,11
+ xor r13d,r9d
+ xor r15d,r11d
+
+ xor eax,edx
+ xor r14d,ebx
+ add r12d,r15d
+ mov r15d,ecx
+
+ ror r13d,6
+ and eax,ebx
+ and r15d,edx
+
+ ror r14d,2
+ add r12d,r13d
+ add eax,r15d
+
+ add r8d,r12d
+ add eax,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add eax,r14d
+
+ mov r13d,DWORD PTR[36+rsp]
+ mov r14d,DWORD PTR[24+rsp]
+ mov r12d,r13d
+ mov r15d,r14d
+
+ ror r12d,11
+ xor r12d,r13d
+ shr r13d,3
+
+ ror r12d,7
+ xor r13d,r12d
+ mov r12d,DWORD PTR[4+rsp]
+
+ ror r15d,2
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ add r12d,r13d
+ xor r14d,r15d
+
+ add r12d,DWORD PTR[32+rsp]
+ mov r13d,r8d
+ add r12d,r14d
+ mov r14d,eax
+ ror r13d,14
+ mov r15d,r9d
+ mov DWORD PTR[32+rsp],r12d
+
+ ror r14d,9
+ xor r13d,r8d
+ xor r15d,r10d
+
+ ror r13d,5
+ add r12d,r11d
+ xor r14d,eax
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,r8d
+ mov r11d,ebx
+
+ ror r14d,11
+ xor r13d,r8d
+ xor r15d,r10d
+
+ xor r11d,ecx
+ xor r14d,eax
+ add r12d,r15d
+ mov r15d,ebx
+
+ ror r13d,6
+ and r11d,eax
+ and r15d,ecx
+
+ ror r14d,2
+ add r12d,r13d
+ add r11d,r15d
+
+ add edx,r12d
+ add r11d,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add r11d,r14d
+
+ mov r13d,DWORD PTR[40+rsp]
+ mov r14d,DWORD PTR[28+rsp]
+ mov r12d,r13d
+ mov r15d,r14d
+
+ ror r12d,11
+ xor r12d,r13d
+ shr r13d,3
+
+ ror r12d,7
+ xor r13d,r12d
+ mov r12d,DWORD PTR[8+rsp]
+
+ ror r15d,2
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ add r12d,r13d
+ xor r14d,r15d
+
+ add r12d,DWORD PTR[36+rsp]
+ mov r13d,edx
+ add r12d,r14d
+ mov r14d,r11d
+ ror r13d,14
+ mov r15d,r8d
+ mov DWORD PTR[36+rsp],r12d
+
+ ror r14d,9
+ xor r13d,edx
+ xor r15d,r9d
+
+ ror r13d,5
+ add r12d,r10d
+ xor r14d,r11d
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,edx
+ mov r10d,eax
+
+ ror r14d,11
+ xor r13d,edx
+ xor r15d,r9d
+
+ xor r10d,ebx
+ xor r14d,r11d
+ add r12d,r15d
+ mov r15d,eax
+
+ ror r13d,6
+ and r10d,r11d
+ and r15d,ebx
+
+ ror r14d,2
+ add r12d,r13d
+ add r10d,r15d
+
+ add ecx,r12d
+ add r10d,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add r10d,r14d
+
+ mov r13d,DWORD PTR[44+rsp]
+ mov r14d,DWORD PTR[32+rsp]
+ mov r12d,r13d
+ mov r15d,r14d
+
+ ror r12d,11
+ xor r12d,r13d
+ shr r13d,3
+
+ ror r12d,7
+ xor r13d,r12d
+ mov r12d,DWORD PTR[12+rsp]
+
+ ror r15d,2
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ add r12d,r13d
+ xor r14d,r15d
+
+ add r12d,DWORD PTR[40+rsp]
+ mov r13d,ecx
+ add r12d,r14d
+ mov r14d,r10d
+ ror r13d,14
+ mov r15d,edx
+ mov DWORD PTR[40+rsp],r12d
+
+ ror r14d,9
+ xor r13d,ecx
+ xor r15d,r8d
+
+ ror r13d,5
+ add r12d,r9d
+ xor r14d,r10d
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,ecx
+ mov r9d,r11d
+
+ ror r14d,11
+ xor r13d,ecx
+ xor r15d,r8d
+
+ xor r9d,eax
+ xor r14d,r10d
+ add r12d,r15d
+ mov r15d,r11d
+
+ ror r13d,6
+ and r9d,r10d
+ and r15d,eax
+
+ ror r14d,2
+ add r12d,r13d
+ add r9d,r15d
+
+ add ebx,r12d
+ add r9d,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add r9d,r14d
+
+ mov r13d,DWORD PTR[48+rsp]
+ mov r14d,DWORD PTR[36+rsp]
+ mov r12d,r13d
+ mov r15d,r14d
+
+ ror r12d,11
+ xor r12d,r13d
+ shr r13d,3
+
+ ror r12d,7
+ xor r13d,r12d
+ mov r12d,DWORD PTR[16+rsp]
+
+ ror r15d,2
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ add r12d,r13d
+ xor r14d,r15d
+
+ add r12d,DWORD PTR[44+rsp]
+ mov r13d,ebx
+ add r12d,r14d
+ mov r14d,r9d
+ ror r13d,14
+ mov r15d,ecx
+ mov DWORD PTR[44+rsp],r12d
+
+ ror r14d,9
+ xor r13d,ebx
+ xor r15d,edx
+
+ ror r13d,5
+ add r12d,r8d
+ xor r14d,r9d
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,ebx
+ mov r8d,r10d
+
+ ror r14d,11
+ xor r13d,ebx
+ xor r15d,edx
+
+ xor r8d,r11d
+ xor r14d,r9d
+ add r12d,r15d
+ mov r15d,r10d
+
+ ror r13d,6
+ and r8d,r9d
+ and r15d,r11d
+
+ ror r14d,2
+ add r12d,r13d
+ add r8d,r15d
+
+ add eax,r12d
+ add r8d,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add r8d,r14d
+
+ mov r13d,DWORD PTR[52+rsp]
+ mov r14d,DWORD PTR[40+rsp]
+ mov r12d,r13d
+ mov r15d,r14d
+
+ ror r12d,11
+ xor r12d,r13d
+ shr r13d,3
+
+ ror r12d,7
+ xor r13d,r12d
+ mov r12d,DWORD PTR[20+rsp]
+
+ ror r15d,2
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ add r12d,r13d
+ xor r14d,r15d
+
+ add r12d,DWORD PTR[48+rsp]
+ mov r13d,eax
+ add r12d,r14d
+ mov r14d,r8d
+ ror r13d,14
+ mov r15d,ebx
+ mov DWORD PTR[48+rsp],r12d
+
+ ror r14d,9
+ xor r13d,eax
+ xor r15d,ecx
+
+ ror r13d,5
+ add r12d,edx
+ xor r14d,r8d
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,eax
+ mov edx,r9d
+
+ ror r14d,11
+ xor r13d,eax
+ xor r15d,ecx
+
+ xor edx,r10d
+ xor r14d,r8d
+ add r12d,r15d
+ mov r15d,r9d
+
+ ror r13d,6
+ and edx,r8d
+ and r15d,r10d
+
+ ror r14d,2
+ add r12d,r13d
+ add edx,r15d
+
+ add r11d,r12d
+ add edx,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add edx,r14d
+
+ mov r13d,DWORD PTR[56+rsp]
+ mov r14d,DWORD PTR[44+rsp]
+ mov r12d,r13d
+ mov r15d,r14d
+
+ ror r12d,11
+ xor r12d,r13d
+ shr r13d,3
+
+ ror r12d,7
+ xor r13d,r12d
+ mov r12d,DWORD PTR[24+rsp]
+
+ ror r15d,2
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ add r12d,r13d
+ xor r14d,r15d
+
+ add r12d,DWORD PTR[52+rsp]
+ mov r13d,r11d
+ add r12d,r14d
+ mov r14d,edx
+ ror r13d,14
+ mov r15d,eax
+ mov DWORD PTR[52+rsp],r12d
+
+ ror r14d,9
+ xor r13d,r11d
+ xor r15d,ebx
+
+ ror r13d,5
+ add r12d,ecx
+ xor r14d,edx
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,r11d
+ mov ecx,r8d
+
+ ror r14d,11
+ xor r13d,r11d
+ xor r15d,ebx
+
+ xor ecx,r9d
+ xor r14d,edx
+ add r12d,r15d
+ mov r15d,r8d
+
+ ror r13d,6
+ and ecx,edx
+ and r15d,r9d
+
+ ror r14d,2
+ add r12d,r13d
+ add ecx,r15d
+
+ add r10d,r12d
+ add ecx,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add ecx,r14d
+
+ mov r13d,DWORD PTR[60+rsp]
+ mov r14d,DWORD PTR[48+rsp]
+ mov r12d,r13d
+ mov r15d,r14d
+
+ ror r12d,11
+ xor r12d,r13d
+ shr r13d,3
+
+ ror r12d,7
+ xor r13d,r12d
+ mov r12d,DWORD PTR[28+rsp]
+
+ ror r15d,2
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ add r12d,r13d
+ xor r14d,r15d
+
+ add r12d,DWORD PTR[56+rsp]
+ mov r13d,r10d
+ add r12d,r14d
+ mov r14d,ecx
+ ror r13d,14
+ mov r15d,r11d
+ mov DWORD PTR[56+rsp],r12d
+
+ ror r14d,9
+ xor r13d,r10d
+ xor r15d,eax
+
+ ror r13d,5
+ add r12d,ebx
+ xor r14d,ecx
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,r10d
+ mov ebx,edx
+
+ ror r14d,11
+ xor r13d,r10d
+ xor r15d,eax
+
+ xor ebx,r8d
+ xor r14d,ecx
+ add r12d,r15d
+ mov r15d,edx
+
+ ror r13d,6
+ and ebx,ecx
+ and r15d,r8d
+
+ ror r14d,2
+ add r12d,r13d
+ add ebx,r15d
+
+ add r9d,r12d
+ add ebx,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add ebx,r14d
+
+ mov r13d,DWORD PTR[rsp]
+ mov r14d,DWORD PTR[52+rsp]
+ mov r12d,r13d
+ mov r15d,r14d
+
+ ror r12d,11
+ xor r12d,r13d
+ shr r13d,3
+
+ ror r12d,7
+ xor r13d,r12d
+ mov r12d,DWORD PTR[32+rsp]
+
+ ror r15d,2
+ xor r15d,r14d
+ shr r14d,10
+
+ ror r15d,17
+ add r12d,r13d
+ xor r14d,r15d
+
+ add r12d,DWORD PTR[60+rsp]
+ mov r13d,r9d
+ add r12d,r14d
+ mov r14d,ebx
+ ror r13d,14
+ mov r15d,r10d
+ mov DWORD PTR[60+rsp],r12d
+
+ ror r14d,9
+ xor r13d,r9d
+ xor r15d,r11d
+
+ ror r13d,5
+ add r12d,eax
+ xor r14d,ebx
+
+ add r12d,DWORD PTR[rdi*4+rbp]
+ and r15d,r9d
+ mov eax,ecx
+
+ ror r14d,11
+ xor r13d,r9d
+ xor r15d,r11d
+
+ xor eax,edx
+ xor r14d,ebx
+ add r12d,r15d
+ mov r15d,ecx
+
+ ror r13d,6
+ and eax,ebx
+ and r15d,edx
+
+ ror r14d,2
+ add r12d,r13d
+ add eax,r15d
+
+ add r8d,r12d
+ add eax,r12d
+ lea rdi,QWORD PTR[1+rdi]
+ add eax,r14d
+
+ cmp rdi,64
+ jb $L$rounds_16_xx
+
+ mov rdi,QWORD PTR[((64+0))+rsp]
+ lea rsi,QWORD PTR[64+rsi]
+
+ add eax,DWORD PTR[rdi]
+ add ebx,DWORD PTR[4+rdi]
+ add ecx,DWORD PTR[8+rdi]
+ add edx,DWORD PTR[12+rdi]
+ add r8d,DWORD PTR[16+rdi]
+ add r9d,DWORD PTR[20+rdi]
+ add r10d,DWORD PTR[24+rdi]
+ add r11d,DWORD PTR[28+rdi]
+
+ cmp rsi,QWORD PTR[((64+16))+rsp]
+
+ mov DWORD PTR[rdi],eax
+ mov DWORD PTR[4+rdi],ebx
+ mov DWORD PTR[8+rdi],ecx
+ mov DWORD PTR[12+rdi],edx
+ mov DWORD PTR[16+rdi],r8d
+ mov DWORD PTR[20+rdi],r9d
+ mov DWORD PTR[24+rdi],r10d
+ mov DWORD PTR[28+rdi],r11d
+ jb $L$loop
+
+ mov rsi,QWORD PTR[((64+24))+rsp]
+ mov r15,QWORD PTR[rsi]
+ mov r14,QWORD PTR[8+rsi]
+ mov r13,QWORD PTR[16+rsi]
+ mov r12,QWORD PTR[24+rsi]
+ mov rbp,QWORD PTR[32+rsi]
+ mov rbx,QWORD PTR[40+rsi]
+ lea rsp,QWORD PTR[48+rsi]
+$L$epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_sha256_block_data_order::
+sha256_block_data_order ENDP
+ALIGN 64
+
+K256::
+ DD 0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h
+ DD 03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h
+ DD 0d807aa98h,012835b01h,0243185beh,0550c7dc3h
+ DD 072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h
+ DD 0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch
+ DD 02de92c6fh,04a7484aah,05cb0a9dch,076f988dah
+ DD 0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h
+ DD 0c6e00bf3h,0d5a79147h,006ca6351h,014292967h
+ DD 027b70a85h,02e1b2138h,04d2c6dfch,053380d13h
+ DD 0650a7354h,0766a0abbh,081c2c92eh,092722c85h
+ DD 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h
+ DD 0d192e819h,0d6990624h,0f40e3585h,0106aa070h
+ DD 019a4c116h,01e376c08h,02748774ch,034b0bcb5h
+ DD 0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h
+ DD 0748f82eeh,078a5636fh,084c87814h,08cc70208h
+ DD 090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h
+
+.text$ ENDS
+END
+
diff --git a/ext/libressl/crypto/sha/sha256-mingw64-x86_64.S b/ext/libressl/crypto/sha/sha256-mingw64-x86_64.S
new file mode 100644
index 0000000..3de981b
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha256-mingw64-x86_64.S
@@ -0,0 +1,1790 @@
+#include "x86_arch.h"
+.text
+
+.globl sha256_block_data_order
+.def sha256_block_data_order; .scl 2; .type 32; .endef
+.p2align 4
+sha256_block_data_order:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_sha256_block_data_order:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r11
+ shlq $4,%rdx
+ subq $64+32,%rsp
+ leaq (%rsi,%rdx,4),%rdx
+ andq $-64,%rsp
+ movq %rdi,64+0(%rsp)
+ movq %rsi,64+8(%rsp)
+ movq %rdx,64+16(%rsp)
+ movq %r11,64+24(%rsp)
+.Lprologue:
+
+ leaq K256(%rip),%rbp
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+ movl 16(%rdi),%r8d
+ movl 20(%rdi),%r9d
+ movl 24(%rdi),%r10d
+ movl 28(%rdi),%r11d
+ jmp .Lloop
+
+.p2align 4
+.Lloop:
+ xorq %rdi,%rdi
+ movl 0(%rsi),%r12d
+ movl %r8d,%r13d
+ movl %eax,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ movl %r12d,0(%rsp)
+
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %eax,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r8d,%r15d
+ movl %ebx,%r11d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ xorl %ecx,%r11d
+ xorl %eax,%r14d
+ addl %r15d,%r12d
+ movl %ebx,%r15d
+
+ rorl $6,%r13d
+ andl %eax,%r11d
+ andl %ecx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r11d
+
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r11d
+
+ movl 4(%rsi),%r12d
+ movl %edx,%r13d
+ movl %r11d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r8d,%r15d
+ movl %r12d,4(%rsp)
+
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r11d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %edx,%r15d
+ movl %eax,%r10d
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ xorl %ebx,%r10d
+ xorl %r11d,%r14d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+
+ rorl $6,%r13d
+ andl %r11d,%r10d
+ andl %ebx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r10d
+
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r10d
+
+ movl 8(%rsi),%r12d
+ movl %ecx,%r13d
+ movl %r10d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ movl %r12d,8(%rsp)
+
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r10d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ecx,%r15d
+ movl %r11d,%r9d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ xorl %eax,%r9d
+ xorl %r10d,%r14d
+ addl %r15d,%r12d
+ movl %r11d,%r15d
+
+ rorl $6,%r13d
+ andl %r10d,%r9d
+ andl %eax,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r9d
+
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r9d
+
+ movl 12(%rsi),%r12d
+ movl %ebx,%r13d
+ movl %r9d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ecx,%r15d
+ movl %r12d,12(%rsp)
+
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %r9d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ebx,%r15d
+ movl %r10d,%r8d
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ xorl %r11d,%r8d
+ xorl %r9d,%r14d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+
+ rorl $6,%r13d
+ andl %r9d,%r8d
+ andl %r11d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r8d
+
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r8d
+
+ movl 16(%rsi),%r12d
+ movl %eax,%r13d
+ movl %r8d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ movl %r12d,16(%rsp)
+
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %r8d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %eax,%r15d
+ movl %r9d,%edx
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ xorl %r10d,%edx
+ xorl %r8d,%r14d
+ addl %r15d,%r12d
+ movl %r9d,%r15d
+
+ rorl $6,%r13d
+ andl %r8d,%edx
+ andl %r10d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%edx
+
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%edx
+
+ movl 20(%rsi),%r12d
+ movl %r11d,%r13d
+ movl %edx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %eax,%r15d
+ movl %r12d,20(%rsp)
+
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %edx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r11d,%r15d
+ movl %r8d,%ecx
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ xorl %r9d,%ecx
+ xorl %edx,%r14d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+
+ rorl $6,%r13d
+ andl %edx,%ecx
+ andl %r9d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ecx
+
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ecx
+
+ movl 24(%rsi),%r12d
+ movl %r10d,%r13d
+ movl %ecx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ movl %r12d,24(%rsp)
+
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %ecx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r10d,%r15d
+ movl %edx,%ebx
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ xorl %r8d,%ebx
+ xorl %ecx,%r14d
+ addl %r15d,%r12d
+ movl %edx,%r15d
+
+ rorl $6,%r13d
+ andl %ecx,%ebx
+ andl %r8d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ebx
+
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ebx
+
+ movl 28(%rsi),%r12d
+ movl %r9d,%r13d
+ movl %ebx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r10d,%r15d
+ movl %r12d,28(%rsp)
+
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %ebx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r9d,%r15d
+ movl %ecx,%eax
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ xorl %edx,%eax
+ xorl %ebx,%r14d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+
+ rorl $6,%r13d
+ andl %ebx,%eax
+ andl %edx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%eax
+
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 1(%rdi),%rdi
+ addl %r14d,%eax
+
+ movl 32(%rsi),%r12d
+ movl %r8d,%r13d
+ movl %eax,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ movl %r12d,32(%rsp)
+
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %eax,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r8d,%r15d
+ movl %ebx,%r11d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ xorl %ecx,%r11d
+ xorl %eax,%r14d
+ addl %r15d,%r12d
+ movl %ebx,%r15d
+
+ rorl $6,%r13d
+ andl %eax,%r11d
+ andl %ecx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r11d
+
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r11d
+
+ movl 36(%rsi),%r12d
+ movl %edx,%r13d
+ movl %r11d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r8d,%r15d
+ movl %r12d,36(%rsp)
+
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r11d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %edx,%r15d
+ movl %eax,%r10d
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ xorl %ebx,%r10d
+ xorl %r11d,%r14d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+
+ rorl $6,%r13d
+ andl %r11d,%r10d
+ andl %ebx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r10d
+
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r10d
+
+ movl 40(%rsi),%r12d
+ movl %ecx,%r13d
+ movl %r10d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ movl %r12d,40(%rsp)
+
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r10d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ecx,%r15d
+ movl %r11d,%r9d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ xorl %eax,%r9d
+ xorl %r10d,%r14d
+ addl %r15d,%r12d
+ movl %r11d,%r15d
+
+ rorl $6,%r13d
+ andl %r10d,%r9d
+ andl %eax,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r9d
+
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r9d
+
+ movl 44(%rsi),%r12d
+ movl %ebx,%r13d
+ movl %r9d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ecx,%r15d
+ movl %r12d,44(%rsp)
+
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %r9d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ebx,%r15d
+ movl %r10d,%r8d
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ xorl %r11d,%r8d
+ xorl %r9d,%r14d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+
+ rorl $6,%r13d
+ andl %r9d,%r8d
+ andl %r11d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r8d
+
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r8d
+
+ movl 48(%rsi),%r12d
+ movl %eax,%r13d
+ movl %r8d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ movl %r12d,48(%rsp)
+
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %r8d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %eax,%r15d
+ movl %r9d,%edx
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ xorl %r10d,%edx
+ xorl %r8d,%r14d
+ addl %r15d,%r12d
+ movl %r9d,%r15d
+
+ rorl $6,%r13d
+ andl %r8d,%edx
+ andl %r10d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%edx
+
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%edx
+
+ movl 52(%rsi),%r12d
+ movl %r11d,%r13d
+ movl %edx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %eax,%r15d
+ movl %r12d,52(%rsp)
+
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %edx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r11d,%r15d
+ movl %r8d,%ecx
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ xorl %r9d,%ecx
+ xorl %edx,%r14d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+
+ rorl $6,%r13d
+ andl %edx,%ecx
+ andl %r9d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ecx
+
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ecx
+
+ movl 56(%rsi),%r12d
+ movl %r10d,%r13d
+ movl %ecx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ movl %r12d,56(%rsp)
+
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %ecx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r10d,%r15d
+ movl %edx,%ebx
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ xorl %r8d,%ebx
+ xorl %ecx,%r14d
+ addl %r15d,%r12d
+ movl %edx,%r15d
+
+ rorl $6,%r13d
+ andl %ecx,%ebx
+ andl %r8d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ebx
+
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ebx
+
+ movl 60(%rsi),%r12d
+ movl %r9d,%r13d
+ movl %ebx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r10d,%r15d
+ movl %r12d,60(%rsp)
+
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %ebx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r9d,%r15d
+ movl %ecx,%eax
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ xorl %edx,%eax
+ xorl %ebx,%r14d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+
+ rorl $6,%r13d
+ andl %ebx,%eax
+ andl %edx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%eax
+
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 1(%rdi),%rdi
+ addl %r14d,%eax
+
+ jmp .Lrounds_16_xx
+.p2align 4
+.Lrounds_16_xx:
+ movl 4(%rsp),%r13d
+ movl 56(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 36(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 0(%rsp),%r12d
+ movl %r8d,%r13d
+ addl %r14d,%r12d
+ movl %eax,%r14d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ movl %r12d,0(%rsp)
+
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %eax,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r8d,%r15d
+ movl %ebx,%r11d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ xorl %ecx,%r11d
+ xorl %eax,%r14d
+ addl %r15d,%r12d
+ movl %ebx,%r15d
+
+ rorl $6,%r13d
+ andl %eax,%r11d
+ andl %ecx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r11d
+
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r11d
+
+ movl 8(%rsp),%r13d
+ movl 60(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 40(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 4(%rsp),%r12d
+ movl %edx,%r13d
+ addl %r14d,%r12d
+ movl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r8d,%r15d
+ movl %r12d,4(%rsp)
+
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r11d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %edx,%r15d
+ movl %eax,%r10d
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ xorl %ebx,%r10d
+ xorl %r11d,%r14d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+
+ rorl $6,%r13d
+ andl %r11d,%r10d
+ andl %ebx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r10d
+
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r10d
+
+ movl 12(%rsp),%r13d
+ movl 0(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 44(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 8(%rsp),%r12d
+ movl %ecx,%r13d
+ addl %r14d,%r12d
+ movl %r10d,%r14d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ movl %r12d,8(%rsp)
+
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r10d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ecx,%r15d
+ movl %r11d,%r9d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ xorl %eax,%r9d
+ xorl %r10d,%r14d
+ addl %r15d,%r12d
+ movl %r11d,%r15d
+
+ rorl $6,%r13d
+ andl %r10d,%r9d
+ andl %eax,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r9d
+
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r9d
+
+ movl 16(%rsp),%r13d
+ movl 4(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 48(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 12(%rsp),%r12d
+ movl %ebx,%r13d
+ addl %r14d,%r12d
+ movl %r9d,%r14d
+ rorl $14,%r13d
+ movl %ecx,%r15d
+ movl %r12d,12(%rsp)
+
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %r9d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ebx,%r15d
+ movl %r10d,%r8d
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ xorl %r11d,%r8d
+ xorl %r9d,%r14d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+
+ rorl $6,%r13d
+ andl %r9d,%r8d
+ andl %r11d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r8d
+
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r8d
+
+ movl 20(%rsp),%r13d
+ movl 8(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 52(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 16(%rsp),%r12d
+ movl %eax,%r13d
+ addl %r14d,%r12d
+ movl %r8d,%r14d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ movl %r12d,16(%rsp)
+
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %r8d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %eax,%r15d
+ movl %r9d,%edx
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ xorl %r10d,%edx
+ xorl %r8d,%r14d
+ addl %r15d,%r12d
+ movl %r9d,%r15d
+
+ rorl $6,%r13d
+ andl %r8d,%edx
+ andl %r10d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%edx
+
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%edx
+
+ movl 24(%rsp),%r13d
+ movl 12(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 56(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 20(%rsp),%r12d
+ movl %r11d,%r13d
+ addl %r14d,%r12d
+ movl %edx,%r14d
+ rorl $14,%r13d
+ movl %eax,%r15d
+ movl %r12d,20(%rsp)
+
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %edx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r11d,%r15d
+ movl %r8d,%ecx
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ xorl %r9d,%ecx
+ xorl %edx,%r14d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+
+ rorl $6,%r13d
+ andl %edx,%ecx
+ andl %r9d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ecx
+
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ecx
+
+ movl 28(%rsp),%r13d
+ movl 16(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 60(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 24(%rsp),%r12d
+ movl %r10d,%r13d
+ addl %r14d,%r12d
+ movl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ movl %r12d,24(%rsp)
+
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %ecx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r10d,%r15d
+ movl %edx,%ebx
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ xorl %r8d,%ebx
+ xorl %ecx,%r14d
+ addl %r15d,%r12d
+ movl %edx,%r15d
+
+ rorl $6,%r13d
+ andl %ecx,%ebx
+ andl %r8d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ebx
+
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ebx
+
+ movl 32(%rsp),%r13d
+ movl 20(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 0(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 28(%rsp),%r12d
+ movl %r9d,%r13d
+ addl %r14d,%r12d
+ movl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r10d,%r15d
+ movl %r12d,28(%rsp)
+
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %ebx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r9d,%r15d
+ movl %ecx,%eax
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ xorl %edx,%eax
+ xorl %ebx,%r14d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+
+ rorl $6,%r13d
+ andl %ebx,%eax
+ andl %edx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%eax
+
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 1(%rdi),%rdi
+ addl %r14d,%eax
+
+ movl 36(%rsp),%r13d
+ movl 24(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 4(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 32(%rsp),%r12d
+ movl %r8d,%r13d
+ addl %r14d,%r12d
+ movl %eax,%r14d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ movl %r12d,32(%rsp)
+
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %eax,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r8d,%r15d
+ movl %ebx,%r11d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ xorl %ecx,%r11d
+ xorl %eax,%r14d
+ addl %r15d,%r12d
+ movl %ebx,%r15d
+
+ rorl $6,%r13d
+ andl %eax,%r11d
+ andl %ecx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r11d
+
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r11d
+
+ movl 40(%rsp),%r13d
+ movl 28(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 8(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 36(%rsp),%r12d
+ movl %edx,%r13d
+ addl %r14d,%r12d
+ movl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r8d,%r15d
+ movl %r12d,36(%rsp)
+
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r11d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %edx,%r15d
+ movl %eax,%r10d
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ xorl %ebx,%r10d
+ xorl %r11d,%r14d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+
+ rorl $6,%r13d
+ andl %r11d,%r10d
+ andl %ebx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r10d
+
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r10d
+
+ movl 44(%rsp),%r13d
+ movl 32(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 12(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 40(%rsp),%r12d
+ movl %ecx,%r13d
+ addl %r14d,%r12d
+ movl %r10d,%r14d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ movl %r12d,40(%rsp)
+
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r10d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ecx,%r15d
+ movl %r11d,%r9d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ xorl %eax,%r9d
+ xorl %r10d,%r14d
+ addl %r15d,%r12d
+ movl %r11d,%r15d
+
+ rorl $6,%r13d
+ andl %r10d,%r9d
+ andl %eax,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r9d
+
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r9d
+
+ movl 48(%rsp),%r13d
+ movl 36(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 16(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 44(%rsp),%r12d
+ movl %ebx,%r13d
+ addl %r14d,%r12d
+ movl %r9d,%r14d
+ rorl $14,%r13d
+ movl %ecx,%r15d
+ movl %r12d,44(%rsp)
+
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %r9d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ebx,%r15d
+ movl %r10d,%r8d
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ xorl %r11d,%r8d
+ xorl %r9d,%r14d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+
+ rorl $6,%r13d
+ andl %r9d,%r8d
+ andl %r11d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r8d
+
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r8d
+
+ movl 52(%rsp),%r13d
+ movl 40(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 20(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 48(%rsp),%r12d
+ movl %eax,%r13d
+ addl %r14d,%r12d
+ movl %r8d,%r14d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ movl %r12d,48(%rsp)
+
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %r8d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %eax,%r15d
+ movl %r9d,%edx
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ xorl %r10d,%edx
+ xorl %r8d,%r14d
+ addl %r15d,%r12d
+ movl %r9d,%r15d
+
+ rorl $6,%r13d
+ andl %r8d,%edx
+ andl %r10d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%edx
+
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%edx
+
+ movl 56(%rsp),%r13d
+ movl 44(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 24(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 52(%rsp),%r12d
+ movl %r11d,%r13d
+ addl %r14d,%r12d
+ movl %edx,%r14d
+ rorl $14,%r13d
+ movl %eax,%r15d
+ movl %r12d,52(%rsp)
+
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %edx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r11d,%r15d
+ movl %r8d,%ecx
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ xorl %r9d,%ecx
+ xorl %edx,%r14d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+
+ rorl $6,%r13d
+ andl %edx,%ecx
+ andl %r9d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ecx
+
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ecx
+
+ movl 60(%rsp),%r13d
+ movl 48(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 28(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 56(%rsp),%r12d
+ movl %r10d,%r13d
+ addl %r14d,%r12d
+ movl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ movl %r12d,56(%rsp)
+
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %ecx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r10d,%r15d
+ movl %edx,%ebx
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ xorl %r8d,%ebx
+ xorl %ecx,%r14d
+ addl %r15d,%r12d
+ movl %edx,%r15d
+
+ rorl $6,%r13d
+ andl %ecx,%ebx
+ andl %r8d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ebx
+
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ebx
+
+ movl 0(%rsp),%r13d
+ movl 52(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 32(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 60(%rsp),%r12d
+ movl %r9d,%r13d
+ addl %r14d,%r12d
+ movl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r10d,%r15d
+ movl %r12d,60(%rsp)
+
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %ebx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r9d,%r15d
+ movl %ecx,%eax
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ xorl %edx,%eax
+ xorl %ebx,%r14d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+
+ rorl $6,%r13d
+ andl %ebx,%eax
+ andl %edx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%eax
+
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 1(%rdi),%rdi
+ addl %r14d,%eax
+
+ cmpq $64,%rdi
+ jb .Lrounds_16_xx
+
+ movq 64+0(%rsp),%rdi
+ leaq 64(%rsi),%rsi
+
+ addl 0(%rdi),%eax
+ addl 4(%rdi),%ebx
+ addl 8(%rdi),%ecx
+ addl 12(%rdi),%edx
+ addl 16(%rdi),%r8d
+ addl 20(%rdi),%r9d
+ addl 24(%rdi),%r10d
+ addl 28(%rdi),%r11d
+
+ cmpq 64+16(%rsp),%rsi
+
+ movl %eax,0(%rdi)
+ movl %ebx,4(%rdi)
+ movl %ecx,8(%rdi)
+ movl %edx,12(%rdi)
+ movl %r8d,16(%rdi)
+ movl %r9d,20(%rdi)
+ movl %r10d,24(%rdi)
+ movl %r11d,28(%rdi)
+ jb .Lloop
+
+ movq 64+24(%rsp),%rsi
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lepilogue:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_sha256_block_data_order:
+.p2align 6
+
+K256:
+.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
diff --git a/ext/libressl/crypto/sha/sha256.c b/ext/libressl/crypto/sha/sha256.c
new file mode 100644
index 0000000..9c05d3b
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha256.c
@@ -0,0 +1,284 @@
+/* $OpenBSD: sha256.c,v 1.10 2019/01/21 23:20:31 jsg Exp $ */
+/* ====================================================================
+ * Copyright (c) 2004 The OpenSSL Project. All rights reserved
+ * according to the OpenSSL license [found in ../../LICENSE].
+ * ====================================================================
+ */
+
+#include <openssl/opensslconf.h>
+
+#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA256)
+
+#include <machine/endian.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <openssl/crypto.h>
+#include <openssl/sha.h>
+#include <openssl/opensslv.h>
+
+int SHA224_Init(SHA256_CTX *c)
+ {
+ memset (c,0,sizeof(*c));
+ c->h[0]=0xc1059ed8UL; c->h[1]=0x367cd507UL;
+ c->h[2]=0x3070dd17UL; c->h[3]=0xf70e5939UL;
+ c->h[4]=0xffc00b31UL; c->h[5]=0x68581511UL;
+ c->h[6]=0x64f98fa7UL; c->h[7]=0xbefa4fa4UL;
+ c->md_len=SHA224_DIGEST_LENGTH;
+ return 1;
+ }
+
+int SHA256_Init(SHA256_CTX *c)
+ {
+ memset (c,0,sizeof(*c));
+ c->h[0]=0x6a09e667UL; c->h[1]=0xbb67ae85UL;
+ c->h[2]=0x3c6ef372UL; c->h[3]=0xa54ff53aUL;
+ c->h[4]=0x510e527fUL; c->h[5]=0x9b05688cUL;
+ c->h[6]=0x1f83d9abUL; c->h[7]=0x5be0cd19UL;
+ c->md_len=SHA256_DIGEST_LENGTH;
+ return 1;
+ }
+
+unsigned char *SHA224(const unsigned char *d, size_t n, unsigned char *md)
+ {
+ SHA256_CTX c;
+ static unsigned char m[SHA224_DIGEST_LENGTH];
+
+ if (md == NULL) md=m;
+ SHA224_Init(&c);
+ SHA256_Update(&c,d,n);
+ SHA256_Final(md,&c);
+ explicit_bzero(&c,sizeof(c));
+ return(md);
+ }
+
+unsigned char *SHA256(const unsigned char *d, size_t n, unsigned char *md)
+ {
+ SHA256_CTX c;
+ static unsigned char m[SHA256_DIGEST_LENGTH];
+
+ if (md == NULL) md=m;
+ SHA256_Init(&c);
+ SHA256_Update(&c,d,n);
+ SHA256_Final(md,&c);
+ explicit_bzero(&c,sizeof(c));
+ return(md);
+ }
+
+int SHA224_Update(SHA256_CTX *c, const void *data, size_t len)
+{ return SHA256_Update (c,data,len); }
+int SHA224_Final (unsigned char *md, SHA256_CTX *c)
+{ return SHA256_Final (md,c); }
+
+#define DATA_ORDER_IS_BIG_ENDIAN
+
+#define HASH_LONG SHA_LONG
+#define HASH_CTX SHA256_CTX
+#define HASH_CBLOCK SHA_CBLOCK
+/*
+ * Note that FIPS180-2 discusses "Truncation of the Hash Function Output."
+ * default: case below covers for it. It's not clear however if it's
+ * permitted to truncate to amount of bytes not divisible by 4. I bet not,
+ * but if it is, then default: case shall be extended. For reference.
+ * Idea behind separate cases for pre-defined lengths is to let the
+ * compiler decide if it's appropriate to unroll small loops.
+ */
+#define HASH_MAKE_STRING(c,s) do { \
+ unsigned long ll; \
+ unsigned int nn; \
+ switch ((c)->md_len) \
+ { case SHA224_DIGEST_LENGTH: \
+ for (nn=0;nn<SHA224_DIGEST_LENGTH/4;nn++) \
+ { ll=(c)->h[nn]; HOST_l2c(ll,(s)); } \
+ break; \
+ case SHA256_DIGEST_LENGTH: \
+ for (nn=0;nn<SHA256_DIGEST_LENGTH/4;nn++) \
+ { ll=(c)->h[nn]; HOST_l2c(ll,(s)); } \
+ break; \
+ default: \
+ if ((c)->md_len > SHA256_DIGEST_LENGTH) \
+ return 0; \
+ for (nn=0;nn<(c)->md_len/4;nn++) \
+ { ll=(c)->h[nn]; HOST_l2c(ll,(s)); } \
+ break; \
+ } \
+ } while (0)
+
+#define HASH_UPDATE SHA256_Update
+#define HASH_TRANSFORM SHA256_Transform
+#define HASH_FINAL SHA256_Final
+#define HASH_BLOCK_DATA_ORDER sha256_block_data_order
+#ifndef SHA256_ASM
+static
+#endif
+void sha256_block_data_order (SHA256_CTX *ctx, const void *in, size_t num);
+
+#include "md32_common.h"
+
+#ifndef SHA256_ASM
+static const SHA_LONG K256[64] = {
+ 0x428a2f98UL,0x71374491UL,0xb5c0fbcfUL,0xe9b5dba5UL,
+ 0x3956c25bUL,0x59f111f1UL,0x923f82a4UL,0xab1c5ed5UL,
+ 0xd807aa98UL,0x12835b01UL,0x243185beUL,0x550c7dc3UL,
+ 0x72be5d74UL,0x80deb1feUL,0x9bdc06a7UL,0xc19bf174UL,
+ 0xe49b69c1UL,0xefbe4786UL,0x0fc19dc6UL,0x240ca1ccUL,
+ 0x2de92c6fUL,0x4a7484aaUL,0x5cb0a9dcUL,0x76f988daUL,
+ 0x983e5152UL,0xa831c66dUL,0xb00327c8UL,0xbf597fc7UL,
+ 0xc6e00bf3UL,0xd5a79147UL,0x06ca6351UL,0x14292967UL,
+ 0x27b70a85UL,0x2e1b2138UL,0x4d2c6dfcUL,0x53380d13UL,
+ 0x650a7354UL,0x766a0abbUL,0x81c2c92eUL,0x92722c85UL,
+ 0xa2bfe8a1UL,0xa81a664bUL,0xc24b8b70UL,0xc76c51a3UL,
+ 0xd192e819UL,0xd6990624UL,0xf40e3585UL,0x106aa070UL,
+ 0x19a4c116UL,0x1e376c08UL,0x2748774cUL,0x34b0bcb5UL,
+ 0x391c0cb3UL,0x4ed8aa4aUL,0x5b9cca4fUL,0x682e6ff3UL,
+ 0x748f82eeUL,0x78a5636fUL,0x84c87814UL,0x8cc70208UL,
+ 0x90befffaUL,0xa4506cebUL,0xbef9a3f7UL,0xc67178f2UL };
+
+/*
+ * FIPS specification refers to right rotations, while our ROTATE macro
+ * is left one. This is why you might notice that rotation coefficients
+ * differ from those observed in FIPS document by 32-N...
+ */
+#define Sigma0(x) (ROTATE((x),30) ^ ROTATE((x),19) ^ ROTATE((x),10))
+#define Sigma1(x) (ROTATE((x),26) ^ ROTATE((x),21) ^ ROTATE((x),7))
+#define sigma0(x) (ROTATE((x),25) ^ ROTATE((x),14) ^ ((x)>>3))
+#define sigma1(x) (ROTATE((x),15) ^ ROTATE((x),13) ^ ((x)>>10))
+
+#define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
+#define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
+
+#ifdef OPENSSL_SMALL_FOOTPRINT
+
+static void sha256_block_data_order (SHA256_CTX *ctx, const void *in, size_t num)
+ {
+ unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1,T2;
+ SHA_LONG X[16],l;
+ int i;
+ const unsigned char *data=in;
+
+ while (num--) {
+
+ a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3];
+ e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7];
+
+ for (i=0;i<16;i++)
+ {
+ HOST_c2l(data,l); T1 = X[i] = l;
+ T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];
+ T2 = Sigma0(a) + Maj(a,b,c);
+ h = g; g = f; f = e; e = d + T1;
+ d = c; c = b; b = a; a = T1 + T2;
+ }
+
+ for (;i<64;i++)
+ {
+ s0 = X[(i+1)&0x0f]; s0 = sigma0(s0);
+ s1 = X[(i+14)&0x0f]; s1 = sigma1(s1);
+
+ T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
+ T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];
+ T2 = Sigma0(a) + Maj(a,b,c);
+ h = g; g = f; f = e; e = d + T1;
+ d = c; c = b; b = a; a = T1 + T2;
+ }
+
+ ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
+ ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
+
+ }
+}
+
+#else
+
+#define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \
+ T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i]; \
+ h = Sigma0(a) + Maj(a,b,c); \
+ d += T1; h += T1; } while (0)
+
+#define ROUND_16_63(i,a,b,c,d,e,f,g,h,X) do { \
+ s0 = X[(i+1)&0x0f]; s0 = sigma0(s0); \
+ s1 = X[(i+14)&0x0f]; s1 = sigma1(s1); \
+ T1 = X[(i)&0x0f] += s0 + s1 + X[(i+9)&0x0f]; \
+ ROUND_00_15(i,a,b,c,d,e,f,g,h); } while (0)
+
+static void sha256_block_data_order (SHA256_CTX *ctx, const void *in, size_t num)
+ {
+ unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1;
+ SHA_LONG X[16];
+ int i;
+ const unsigned char *data=in;
+
+ while (num--) {
+
+ a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3];
+ e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7];
+
+ if (BYTE_ORDER != LITTLE_ENDIAN &&
+ sizeof(SHA_LONG)==4 && ((size_t)in%4)==0)
+ {
+ const SHA_LONG *W=(const SHA_LONG *)data;
+
+ T1 = X[0] = W[0]; ROUND_00_15(0,a,b,c,d,e,f,g,h);
+ T1 = X[1] = W[1]; ROUND_00_15(1,h,a,b,c,d,e,f,g);
+ T1 = X[2] = W[2]; ROUND_00_15(2,g,h,a,b,c,d,e,f);
+ T1 = X[3] = W[3]; ROUND_00_15(3,f,g,h,a,b,c,d,e);
+ T1 = X[4] = W[4]; ROUND_00_15(4,e,f,g,h,a,b,c,d);
+ T1 = X[5] = W[5]; ROUND_00_15(5,d,e,f,g,h,a,b,c);
+ T1 = X[6] = W[6]; ROUND_00_15(6,c,d,e,f,g,h,a,b);
+ T1 = X[7] = W[7]; ROUND_00_15(7,b,c,d,e,f,g,h,a);
+ T1 = X[8] = W[8]; ROUND_00_15(8,a,b,c,d,e,f,g,h);
+ T1 = X[9] = W[9]; ROUND_00_15(9,h,a,b,c,d,e,f,g);
+ T1 = X[10] = W[10]; ROUND_00_15(10,g,h,a,b,c,d,e,f);
+ T1 = X[11] = W[11]; ROUND_00_15(11,f,g,h,a,b,c,d,e);
+ T1 = X[12] = W[12]; ROUND_00_15(12,e,f,g,h,a,b,c,d);
+ T1 = X[13] = W[13]; ROUND_00_15(13,d,e,f,g,h,a,b,c);
+ T1 = X[14] = W[14]; ROUND_00_15(14,c,d,e,f,g,h,a,b);
+ T1 = X[15] = W[15]; ROUND_00_15(15,b,c,d,e,f,g,h,a);
+
+ data += SHA256_CBLOCK;
+ }
+ else
+ {
+ SHA_LONG l;
+
+ HOST_c2l(data,l); T1 = X[0] = l; ROUND_00_15(0,a,b,c,d,e,f,g,h);
+ HOST_c2l(data,l); T1 = X[1] = l; ROUND_00_15(1,h,a,b,c,d,e,f,g);
+ HOST_c2l(data,l); T1 = X[2] = l; ROUND_00_15(2,g,h,a,b,c,d,e,f);
+ HOST_c2l(data,l); T1 = X[3] = l; ROUND_00_15(3,f,g,h,a,b,c,d,e);
+ HOST_c2l(data,l); T1 = X[4] = l; ROUND_00_15(4,e,f,g,h,a,b,c,d);
+ HOST_c2l(data,l); T1 = X[5] = l; ROUND_00_15(5,d,e,f,g,h,a,b,c);
+ HOST_c2l(data,l); T1 = X[6] = l; ROUND_00_15(6,c,d,e,f,g,h,a,b);
+ HOST_c2l(data,l); T1 = X[7] = l; ROUND_00_15(7,b,c,d,e,f,g,h,a);
+ HOST_c2l(data,l); T1 = X[8] = l; ROUND_00_15(8,a,b,c,d,e,f,g,h);
+ HOST_c2l(data,l); T1 = X[9] = l; ROUND_00_15(9,h,a,b,c,d,e,f,g);
+ HOST_c2l(data,l); T1 = X[10] = l; ROUND_00_15(10,g,h,a,b,c,d,e,f);
+ HOST_c2l(data,l); T1 = X[11] = l; ROUND_00_15(11,f,g,h,a,b,c,d,e);
+ HOST_c2l(data,l); T1 = X[12] = l; ROUND_00_15(12,e,f,g,h,a,b,c,d);
+ HOST_c2l(data,l); T1 = X[13] = l; ROUND_00_15(13,d,e,f,g,h,a,b,c);
+ HOST_c2l(data,l); T1 = X[14] = l; ROUND_00_15(14,c,d,e,f,g,h,a,b);
+ HOST_c2l(data,l); T1 = X[15] = l; ROUND_00_15(15,b,c,d,e,f,g,h,a);
+ }
+
+ for (i=16;i<64;i+=8)
+ {
+ ROUND_16_63(i+0,a,b,c,d,e,f,g,h,X);
+ ROUND_16_63(i+1,h,a,b,c,d,e,f,g,X);
+ ROUND_16_63(i+2,g,h,a,b,c,d,e,f,X);
+ ROUND_16_63(i+3,f,g,h,a,b,c,d,e,X);
+ ROUND_16_63(i+4,e,f,g,h,a,b,c,d,X);
+ ROUND_16_63(i+5,d,e,f,g,h,a,b,c,X);
+ ROUND_16_63(i+6,c,d,e,f,g,h,a,b,X);
+ ROUND_16_63(i+7,b,c,d,e,f,g,h,a,X);
+ }
+
+ ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
+ ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
+
+ }
+ }
+
+#endif
+#endif /* SHA256_ASM */
+
+#endif /* OPENSSL_NO_SHA256 */
diff --git a/ext/libressl/crypto/sha/sha512-elf-armv4.S b/ext/libressl/crypto/sha/sha512-elf-armv4.S
new file mode 100644
index 0000000..8abf8d5
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha512-elf-armv4.S
@@ -0,0 +1,1786 @@
+#include "arm_arch.h"
+#ifdef __ARMEL__
+# define LO 0
+# define HI 4
+# define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1
+#else
+# define HI 0
+# define LO 4
+# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1
+#endif
+
+.text
+.code 32
+.type K512,%object
+.align 5
+K512:
+WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd)
+WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc)
+WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019)
+WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118)
+WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe)
+WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2)
+WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1)
+WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694)
+WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3)
+WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65)
+WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483)
+WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5)
+WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210)
+WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4)
+WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725)
+WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70)
+WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926)
+WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df)
+WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8)
+WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b)
+WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001)
+WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30)
+WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910)
+WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8)
+WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53)
+WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8)
+WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb)
+WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3)
+WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60)
+WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec)
+WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9)
+WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b)
+WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207)
+WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178)
+WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6)
+WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b)
+WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493)
+WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
+WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
+WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
+.size K512,.-K512
+.LOPENSSL_armcap:
+.word OPENSSL_armcap_P-sha512_block_data_order
+.skip 32-4
+
+.global sha512_block_data_order
+.type sha512_block_data_order,%function
+sha512_block_data_order:
+ sub r3,pc,#8 @ sha512_block_data_order
+ add r2,r1,r2,lsl#7 @ len to point at the end of inp
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+ ldr r12,.LOPENSSL_armcap
+ ldr r12,[r3,r12] @ OPENSSL_armcap_P
+ tst r12,#1
+ bne .LNEON
+#endif
+ stmdb sp!,{r4-r12,lr}
+ sub r14,r3,#672 @ K512
+ sub sp,sp,#9*8
+
+ ldr r7,[r0,#32+LO]
+ ldr r8,[r0,#32+HI]
+ ldr r9, [r0,#48+LO]
+ ldr r10, [r0,#48+HI]
+ ldr r11, [r0,#56+LO]
+ ldr r12, [r0,#56+HI]
+.Loop:
+ str r9, [sp,#48+0]
+ str r10, [sp,#48+4]
+ str r11, [sp,#56+0]
+ str r12, [sp,#56+4]
+ ldr r5,[r0,#0+LO]
+ ldr r6,[r0,#0+HI]
+ ldr r3,[r0,#8+LO]
+ ldr r4,[r0,#8+HI]
+ ldr r9, [r0,#16+LO]
+ ldr r10, [r0,#16+HI]
+ ldr r11, [r0,#24+LO]
+ ldr r12, [r0,#24+HI]
+ str r3,[sp,#8+0]
+ str r4,[sp,#8+4]
+ str r9, [sp,#16+0]
+ str r10, [sp,#16+4]
+ str r11, [sp,#24+0]
+ str r12, [sp,#24+4]
+ ldr r3,[r0,#40+LO]
+ ldr r4,[r0,#40+HI]
+ str r3,[sp,#40+0]
+ str r4,[sp,#40+4]
+
+.L00_15:
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+ ldrb r3,[r1,#7]
+ ldrb r9, [r1,#6]
+ ldrb r10, [r1,#5]
+ ldrb r11, [r1,#4]
+ ldrb r4,[r1,#3]
+ ldrb r12, [r1,#2]
+ orr r3,r3,r9,lsl#8
+ ldrb r9, [r1,#1]
+ orr r3,r3,r10,lsl#16
+ ldrb r10, [r1],#8
+ orr r3,r3,r11,lsl#24
+ orr r4,r4,r12,lsl#8
+ orr r4,r4,r9,lsl#16
+ orr r4,r4,r10,lsl#24
+#else
+ ldr r3,[r1,#4]
+ ldr r4,[r1],#8
+#ifdef __ARMEL__
+ rev r3,r3
+ rev r4,r4
+#endif
+#endif
+ @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
+ @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
+ @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
+ mov r9,r7,lsr#14
+ str r3,[sp,#64+0]
+ mov r10,r8,lsr#14
+ str r4,[sp,#64+4]
+ eor r9,r9,r8,lsl#18
+ ldr r11,[sp,#56+0] @ h.lo
+ eor r10,r10,r7,lsl#18
+ ldr r12,[sp,#56+4] @ h.hi
+ eor r9,r9,r7,lsr#18
+ eor r10,r10,r8,lsr#18
+ eor r9,r9,r8,lsl#14
+ eor r10,r10,r7,lsl#14
+ eor r9,r9,r8,lsr#9
+ eor r10,r10,r7,lsr#9
+ eor r9,r9,r7,lsl#23
+ eor r10,r10,r8,lsl#23 @ Sigma1(e)
+ adds r3,r3,r9
+ ldr r9,[sp,#40+0] @ f.lo
+ adc r4,r4,r10 @ T += Sigma1(e)
+ ldr r10,[sp,#40+4] @ f.hi
+ adds r3,r3,r11
+ ldr r11,[sp,#48+0] @ g.lo
+ adc r4,r4,r12 @ T += h
+ ldr r12,[sp,#48+4] @ g.hi
+
+ eor r9,r9,r11
+ str r7,[sp,#32+0]
+ eor r10,r10,r12
+ str r8,[sp,#32+4]
+ and r9,r9,r7
+ str r5,[sp,#0+0]
+ and r10,r10,r8
+ str r6,[sp,#0+4]
+ eor r9,r9,r11
+ ldr r11,[r14,#LO] @ K[i].lo
+ eor r10,r10,r12 @ Ch(e,f,g)
+ ldr r12,[r14,#HI] @ K[i].hi
+
+ adds r3,r3,r9
+ ldr r7,[sp,#24+0] @ d.lo
+ adc r4,r4,r10 @ T += Ch(e,f,g)
+ ldr r8,[sp,#24+4] @ d.hi
+ adds r3,r3,r11
+ and r9,r11,#0xff
+ adc r4,r4,r12 @ T += K[i]
+ adds r7,r7,r3
+ ldr r11,[sp,#8+0] @ b.lo
+ adc r8,r8,r4 @ d += T
+ teq r9,#148
+
+ ldr r12,[sp,#16+0] @ c.lo
+ orreq r14,r14,#1
+ @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
+ @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
+ @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
+ mov r9,r5,lsr#28
+ mov r10,r6,lsr#28
+ eor r9,r9,r6,lsl#4
+ eor r10,r10,r5,lsl#4
+ eor r9,r9,r6,lsr#2
+ eor r10,r10,r5,lsr#2
+ eor r9,r9,r5,lsl#30
+ eor r10,r10,r6,lsl#30
+ eor r9,r9,r6,lsr#7
+ eor r10,r10,r5,lsr#7
+ eor r9,r9,r5,lsl#25
+ eor r10,r10,r6,lsl#25 @ Sigma0(a)
+ adds r3,r3,r9
+ and r9,r5,r11
+ adc r4,r4,r10 @ T += Sigma0(a)
+
+ ldr r10,[sp,#8+4] @ b.hi
+ orr r5,r5,r11
+ ldr r11,[sp,#16+4] @ c.hi
+ and r5,r5,r12
+ and r12,r6,r10
+ orr r6,r6,r10
+ orr r5,r5,r9 @ Maj(a,b,c).lo
+ and r6,r6,r11
+ adds r5,r5,r3
+ orr r6,r6,r12 @ Maj(a,b,c).hi
+ sub sp,sp,#8
+ adc r6,r6,r4 @ h += T
+ tst r14,#1
+ add r14,r14,#8
+ tst r14,#1
+ beq .L00_15
+ ldr r9,[sp,#184+0]
+ ldr r10,[sp,#184+4]
+ bic r14,r14,#1
+.L16_79:
+ @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
+ @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
+ @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7
+ mov r3,r9,lsr#1
+ ldr r11,[sp,#80+0]
+ mov r4,r10,lsr#1
+ ldr r12,[sp,#80+4]
+ eor r3,r3,r10,lsl#31
+ eor r4,r4,r9,lsl#31
+ eor r3,r3,r9,lsr#8
+ eor r4,r4,r10,lsr#8
+ eor r3,r3,r10,lsl#24
+ eor r4,r4,r9,lsl#24
+ eor r3,r3,r9,lsr#7
+ eor r4,r4,r10,lsr#7
+ eor r3,r3,r10,lsl#25
+
+ @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
+ @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
+ @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
+ mov r9,r11,lsr#19
+ mov r10,r12,lsr#19
+ eor r9,r9,r12,lsl#13
+ eor r10,r10,r11,lsl#13
+ eor r9,r9,r12,lsr#29
+ eor r10,r10,r11,lsr#29
+ eor r9,r9,r11,lsl#3
+ eor r10,r10,r12,lsl#3
+ eor r9,r9,r11,lsr#6
+ eor r10,r10,r12,lsr#6
+ ldr r11,[sp,#120+0]
+ eor r9,r9,r12,lsl#26
+
+ ldr r12,[sp,#120+4]
+ adds r3,r3,r9
+ ldr r9,[sp,#192+0]
+ adc r4,r4,r10
+
+ ldr r10,[sp,#192+4]
+ adds r3,r3,r11
+ adc r4,r4,r12
+ adds r3,r3,r9
+ adc r4,r4,r10
+ @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
+ @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
+ @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
+ mov r9,r7,lsr#14
+ str r3,[sp,#64+0]
+ mov r10,r8,lsr#14
+ str r4,[sp,#64+4]
+ eor r9,r9,r8,lsl#18
+ ldr r11,[sp,#56+0] @ h.lo
+ eor r10,r10,r7,lsl#18
+ ldr r12,[sp,#56+4] @ h.hi
+ eor r9,r9,r7,lsr#18
+ eor r10,r10,r8,lsr#18
+ eor r9,r9,r8,lsl#14
+ eor r10,r10,r7,lsl#14
+ eor r9,r9,r8,lsr#9
+ eor r10,r10,r7,lsr#9
+ eor r9,r9,r7,lsl#23
+ eor r10,r10,r8,lsl#23 @ Sigma1(e)
+ adds r3,r3,r9
+ ldr r9,[sp,#40+0] @ f.lo
+ adc r4,r4,r10 @ T += Sigma1(e)
+ ldr r10,[sp,#40+4] @ f.hi
+ adds r3,r3,r11
+ ldr r11,[sp,#48+0] @ g.lo
+ adc r4,r4,r12 @ T += h
+ ldr r12,[sp,#48+4] @ g.hi
+
+ eor r9,r9,r11
+ str r7,[sp,#32+0]
+ eor r10,r10,r12
+ str r8,[sp,#32+4]
+ and r9,r9,r7
+ str r5,[sp,#0+0]
+ and r10,r10,r8
+ str r6,[sp,#0+4]
+ eor r9,r9,r11
+ ldr r11,[r14,#LO] @ K[i].lo
+ eor r10,r10,r12 @ Ch(e,f,g)
+ ldr r12,[r14,#HI] @ K[i].hi
+
+ adds r3,r3,r9
+ ldr r7,[sp,#24+0] @ d.lo
+ adc r4,r4,r10 @ T += Ch(e,f,g)
+ ldr r8,[sp,#24+4] @ d.hi
+ adds r3,r3,r11
+ and r9,r11,#0xff
+ adc r4,r4,r12 @ T += K[i]
+ adds r7,r7,r3
+ ldr r11,[sp,#8+0] @ b.lo
+ adc r8,r8,r4 @ d += T
+ teq r9,#23
+
+ ldr r12,[sp,#16+0] @ c.lo
+ orreq r14,r14,#1
+ @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
+ @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
+ @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
+ mov r9,r5,lsr#28
+ mov r10,r6,lsr#28
+ eor r9,r9,r6,lsl#4
+ eor r10,r10,r5,lsl#4
+ eor r9,r9,r6,lsr#2
+ eor r10,r10,r5,lsr#2
+ eor r9,r9,r5,lsl#30
+ eor r10,r10,r6,lsl#30
+ eor r9,r9,r6,lsr#7
+ eor r10,r10,r5,lsr#7
+ eor r9,r9,r5,lsl#25
+ eor r10,r10,r6,lsl#25 @ Sigma0(a)
+ adds r3,r3,r9
+ and r9,r5,r11
+ adc r4,r4,r10 @ T += Sigma0(a)
+
+ ldr r10,[sp,#8+4] @ b.hi
+ orr r5,r5,r11
+ ldr r11,[sp,#16+4] @ c.hi
+ and r5,r5,r12
+ and r12,r6,r10
+ orr r6,r6,r10
+ orr r5,r5,r9 @ Maj(a,b,c).lo
+ and r6,r6,r11
+ adds r5,r5,r3
+ orr r6,r6,r12 @ Maj(a,b,c).hi
+ sub sp,sp,#8
+ adc r6,r6,r4 @ h += T
+ tst r14,#1
+ add r14,r14,#8
+ ldreq r9,[sp,#184+0]
+ ldreq r10,[sp,#184+4]
+ beq .L16_79
+ bic r14,r14,#1
+
+ ldr r3,[sp,#8+0]
+ ldr r4,[sp,#8+4]
+ ldr r9, [r0,#0+LO]
+ ldr r10, [r0,#0+HI]
+ ldr r11, [r0,#8+LO]
+ ldr r12, [r0,#8+HI]
+ adds r9,r5,r9
+ str r9, [r0,#0+LO]
+ adc r10,r6,r10
+ str r10, [r0,#0+HI]
+ adds r11,r3,r11
+ str r11, [r0,#8+LO]
+ adc r12,r4,r12
+ str r12, [r0,#8+HI]
+
+ ldr r5,[sp,#16+0]
+ ldr r6,[sp,#16+4]
+ ldr r3,[sp,#24+0]
+ ldr r4,[sp,#24+4]
+ ldr r9, [r0,#16+LO]
+ ldr r10, [r0,#16+HI]
+ ldr r11, [r0,#24+LO]
+ ldr r12, [r0,#24+HI]
+ adds r9,r5,r9
+ str r9, [r0,#16+LO]
+ adc r10,r6,r10
+ str r10, [r0,#16+HI]
+ adds r11,r3,r11
+ str r11, [r0,#24+LO]
+ adc r12,r4,r12
+ str r12, [r0,#24+HI]
+
+ ldr r3,[sp,#40+0]
+ ldr r4,[sp,#40+4]
+ ldr r9, [r0,#32+LO]
+ ldr r10, [r0,#32+HI]
+ ldr r11, [r0,#40+LO]
+ ldr r12, [r0,#40+HI]
+ adds r7,r7,r9
+ str r7,[r0,#32+LO]
+ adc r8,r8,r10
+ str r8,[r0,#32+HI]
+ adds r11,r3,r11
+ str r11, [r0,#40+LO]
+ adc r12,r4,r12
+ str r12, [r0,#40+HI]
+
+ ldr r5,[sp,#48+0]
+ ldr r6,[sp,#48+4]
+ ldr r3,[sp,#56+0]
+ ldr r4,[sp,#56+4]
+ ldr r9, [r0,#48+LO]
+ ldr r10, [r0,#48+HI]
+ ldr r11, [r0,#56+LO]
+ ldr r12, [r0,#56+HI]
+ adds r9,r5,r9
+ str r9, [r0,#48+LO]
+ adc r10,r6,r10
+ str r10, [r0,#48+HI]
+ adds r11,r3,r11
+ str r11, [r0,#56+LO]
+ adc r12,r4,r12
+ str r12, [r0,#56+HI]
+
+ add sp,sp,#640
+ sub r14,r14,#640
+
+ teq r1,r2
+ bne .Loop
+
+ add sp,sp,#8*9 @ destroy frame
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r12,pc}
+#else
+ ldmia sp!,{r4-r12,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+.fpu neon
+
+.align 4
+.LNEON:
+ dmb @ errata #451034 on early Cortex A8
+ vstmdb sp!,{d8-d15} @ ABI specification says so
+ sub r3,r3,#672 @ K512
+ vldmia r0,{d16-d23} @ load context
+.Loop_neon:
+ vshr.u64 d24,d20,#14 @ 0
+#if 0<16
+ vld1.64 {d0},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d20,#18
+ vshr.u64 d26,d20,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d20,#50
+ vsli.64 d25,d20,#46
+ vsli.64 d26,d20,#23
+#if 0<16 && defined(__ARMEL__)
+ vrev64.8 d0,d0
+#endif
+ vadd.i64 d27,d28,d23
+ veor d29,d21,d22
+ veor d24,d25
+ vand d29,d20
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d22 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d16,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d16,#34
+ vshr.u64 d26,d16,#39
+ vsli.64 d24,d16,#36
+ vsli.64 d25,d16,#30
+ vsli.64 d26,d16,#25
+ vadd.i64 d27,d0
+ vorr d30,d16,d18
+ vand d29,d16,d18
+ veor d23,d24,d25
+ vand d30,d17
+ veor d23,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d23,d27
+ vadd.i64 d19,d27
+ vadd.i64 d23,d30
+ vshr.u64 d24,d19,#14 @ 1
+#if 1<16
+ vld1.64 {d1},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d19,#18
+ vshr.u64 d26,d19,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d19,#50
+ vsli.64 d25,d19,#46
+ vsli.64 d26,d19,#23
+#if 1<16 && defined(__ARMEL__)
+ vrev64.8 d1,d1
+#endif
+ vadd.i64 d27,d28,d22
+ veor d29,d20,d21
+ veor d24,d25
+ vand d29,d19
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d21 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d23,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d23,#34
+ vshr.u64 d26,d23,#39
+ vsli.64 d24,d23,#36
+ vsli.64 d25,d23,#30
+ vsli.64 d26,d23,#25
+ vadd.i64 d27,d1
+ vorr d30,d23,d17
+ vand d29,d23,d17
+ veor d22,d24,d25
+ vand d30,d16
+ veor d22,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d22,d27
+ vadd.i64 d18,d27
+ vadd.i64 d22,d30
+ vshr.u64 d24,d18,#14 @ 2
+#if 2<16
+ vld1.64 {d2},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d18,#18
+ vshr.u64 d26,d18,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d18,#50
+ vsli.64 d25,d18,#46
+ vsli.64 d26,d18,#23
+#if 2<16 && defined(__ARMEL__)
+ vrev64.8 d2,d2
+#endif
+ vadd.i64 d27,d28,d21
+ veor d29,d19,d20
+ veor d24,d25
+ vand d29,d18
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d20 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d22,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d22,#34
+ vshr.u64 d26,d22,#39
+ vsli.64 d24,d22,#36
+ vsli.64 d25,d22,#30
+ vsli.64 d26,d22,#25
+ vadd.i64 d27,d2
+ vorr d30,d22,d16
+ vand d29,d22,d16
+ veor d21,d24,d25
+ vand d30,d23
+ veor d21,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d21,d27
+ vadd.i64 d17,d27
+ vadd.i64 d21,d30
+ vshr.u64 d24,d17,#14 @ 3
+#if 3<16
+ vld1.64 {d3},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d17,#18
+ vshr.u64 d26,d17,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d17,#50
+ vsli.64 d25,d17,#46
+ vsli.64 d26,d17,#23
+#if 3<16 && defined(__ARMEL__)
+ vrev64.8 d3,d3
+#endif
+ vadd.i64 d27,d28,d20
+ veor d29,d18,d19
+ veor d24,d25
+ vand d29,d17
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d19 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d21,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d21,#34
+ vshr.u64 d26,d21,#39
+ vsli.64 d24,d21,#36
+ vsli.64 d25,d21,#30
+ vsli.64 d26,d21,#25
+ vadd.i64 d27,d3
+ vorr d30,d21,d23
+ vand d29,d21,d23
+ veor d20,d24,d25
+ vand d30,d22
+ veor d20,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d20,d27
+ vadd.i64 d16,d27
+ vadd.i64 d20,d30
+ vshr.u64 d24,d16,#14 @ 4
+#if 4<16
+ vld1.64 {d4},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d16,#18
+ vshr.u64 d26,d16,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d16,#50
+ vsli.64 d25,d16,#46
+ vsli.64 d26,d16,#23
+#if 4<16 && defined(__ARMEL__)
+ vrev64.8 d4,d4
+#endif
+ vadd.i64 d27,d28,d19
+ veor d29,d17,d18
+ veor d24,d25
+ vand d29,d16
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d18 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d20,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d20,#34
+ vshr.u64 d26,d20,#39
+ vsli.64 d24,d20,#36
+ vsli.64 d25,d20,#30
+ vsli.64 d26,d20,#25
+ vadd.i64 d27,d4
+ vorr d30,d20,d22
+ vand d29,d20,d22
+ veor d19,d24,d25
+ vand d30,d21
+ veor d19,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d19,d27
+ vadd.i64 d23,d27
+ vadd.i64 d19,d30
+ vshr.u64 d24,d23,#14 @ 5
+#if 5<16
+ vld1.64 {d5},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d23,#18
+ vshr.u64 d26,d23,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d23,#50
+ vsli.64 d25,d23,#46
+ vsli.64 d26,d23,#23
+#if 5<16 && defined(__ARMEL__)
+ vrev64.8 d5,d5
+#endif
+ vadd.i64 d27,d28,d18
+ veor d29,d16,d17
+ veor d24,d25
+ vand d29,d23
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d17 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d19,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d19,#34
+ vshr.u64 d26,d19,#39
+ vsli.64 d24,d19,#36
+ vsli.64 d25,d19,#30
+ vsli.64 d26,d19,#25
+ vadd.i64 d27,d5
+ vorr d30,d19,d21
+ vand d29,d19,d21
+ veor d18,d24,d25
+ vand d30,d20
+ veor d18,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d18,d27
+ vadd.i64 d22,d27
+ vadd.i64 d18,d30
+ vshr.u64 d24,d22,#14 @ 6
+#if 6<16
+ vld1.64 {d6},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d22,#18
+ vshr.u64 d26,d22,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d22,#50
+ vsli.64 d25,d22,#46
+ vsli.64 d26,d22,#23
+#if 6<16 && defined(__ARMEL__)
+ vrev64.8 d6,d6
+#endif
+ vadd.i64 d27,d28,d17
+ veor d29,d23,d16
+ veor d24,d25
+ vand d29,d22
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d16 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d18,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d18,#34
+ vshr.u64 d26,d18,#39
+ vsli.64 d24,d18,#36
+ vsli.64 d25,d18,#30
+ vsli.64 d26,d18,#25
+ vadd.i64 d27,d6
+ vorr d30,d18,d20
+ vand d29,d18,d20
+ veor d17,d24,d25
+ vand d30,d19
+ veor d17,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d17,d27
+ vadd.i64 d21,d27
+ vadd.i64 d17,d30
+ vshr.u64 d24,d21,#14 @ 7
+#if 7<16
+ vld1.64 {d7},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d21,#18
+ vshr.u64 d26,d21,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d21,#50
+ vsli.64 d25,d21,#46
+ vsli.64 d26,d21,#23
+#if 7<16 && defined(__ARMEL__)
+ vrev64.8 d7,d7
+#endif
+ vadd.i64 d27,d28,d16
+ veor d29,d22,d23
+ veor d24,d25
+ vand d29,d21
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d23 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d17,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d17,#34
+ vshr.u64 d26,d17,#39
+ vsli.64 d24,d17,#36
+ vsli.64 d25,d17,#30
+ vsli.64 d26,d17,#25
+ vadd.i64 d27,d7
+ vorr d30,d17,d19
+ vand d29,d17,d19
+ veor d16,d24,d25
+ vand d30,d18
+ veor d16,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d16,d27
+ vadd.i64 d20,d27
+ vadd.i64 d16,d30
+ vshr.u64 d24,d20,#14 @ 8
+#if 8<16
+ vld1.64 {d8},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d20,#18
+ vshr.u64 d26,d20,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d20,#50
+ vsli.64 d25,d20,#46
+ vsli.64 d26,d20,#23
+#if 8<16 && defined(__ARMEL__)
+ vrev64.8 d8,d8
+#endif
+ vadd.i64 d27,d28,d23
+ veor d29,d21,d22
+ veor d24,d25
+ vand d29,d20
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d22 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d16,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d16,#34
+ vshr.u64 d26,d16,#39
+ vsli.64 d24,d16,#36
+ vsli.64 d25,d16,#30
+ vsli.64 d26,d16,#25
+ vadd.i64 d27,d8
+ vorr d30,d16,d18
+ vand d29,d16,d18
+ veor d23,d24,d25
+ vand d30,d17
+ veor d23,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d23,d27
+ vadd.i64 d19,d27
+ vadd.i64 d23,d30
+ vshr.u64 d24,d19,#14 @ 9
+#if 9<16
+ vld1.64 {d9},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d19,#18
+ vshr.u64 d26,d19,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d19,#50
+ vsli.64 d25,d19,#46
+ vsli.64 d26,d19,#23
+#if 9<16 && defined(__ARMEL__)
+ vrev64.8 d9,d9
+#endif
+ vadd.i64 d27,d28,d22
+ veor d29,d20,d21
+ veor d24,d25
+ vand d29,d19
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d21 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d23,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d23,#34
+ vshr.u64 d26,d23,#39
+ vsli.64 d24,d23,#36
+ vsli.64 d25,d23,#30
+ vsli.64 d26,d23,#25
+ vadd.i64 d27,d9
+ vorr d30,d23,d17
+ vand d29,d23,d17
+ veor d22,d24,d25
+ vand d30,d16
+ veor d22,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d22,d27
+ vadd.i64 d18,d27
+ vadd.i64 d22,d30
+ vshr.u64 d24,d18,#14 @ 10
+#if 10<16
+ vld1.64 {d10},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d18,#18
+ vshr.u64 d26,d18,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d18,#50
+ vsli.64 d25,d18,#46
+ vsli.64 d26,d18,#23
+#if 10<16 && defined(__ARMEL__)
+ vrev64.8 d10,d10
+#endif
+ vadd.i64 d27,d28,d21
+ veor d29,d19,d20
+ veor d24,d25
+ vand d29,d18
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d20 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d22,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d22,#34
+ vshr.u64 d26,d22,#39
+ vsli.64 d24,d22,#36
+ vsli.64 d25,d22,#30
+ vsli.64 d26,d22,#25
+ vadd.i64 d27,d10
+ vorr d30,d22,d16
+ vand d29,d22,d16
+ veor d21,d24,d25
+ vand d30,d23
+ veor d21,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d21,d27
+ vadd.i64 d17,d27
+ vadd.i64 d21,d30
+ vshr.u64 d24,d17,#14 @ 11
+#if 11<16
+ vld1.64 {d11},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d17,#18
+ vshr.u64 d26,d17,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d17,#50
+ vsli.64 d25,d17,#46
+ vsli.64 d26,d17,#23
+#if 11<16 && defined(__ARMEL__)
+ vrev64.8 d11,d11
+#endif
+ vadd.i64 d27,d28,d20
+ veor d29,d18,d19
+ veor d24,d25
+ vand d29,d17
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d19 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d21,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d21,#34
+ vshr.u64 d26,d21,#39
+ vsli.64 d24,d21,#36
+ vsli.64 d25,d21,#30
+ vsli.64 d26,d21,#25
+ vadd.i64 d27,d11
+ vorr d30,d21,d23
+ vand d29,d21,d23
+ veor d20,d24,d25
+ vand d30,d22
+ veor d20,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d20,d27
+ vadd.i64 d16,d27
+ vadd.i64 d20,d30
+ vshr.u64 d24,d16,#14 @ 12
+#if 12<16
+ vld1.64 {d12},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d16,#18
+ vshr.u64 d26,d16,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d16,#50
+ vsli.64 d25,d16,#46
+ vsli.64 d26,d16,#23
+#if 12<16 && defined(__ARMEL__)
+ vrev64.8 d12,d12
+#endif
+ vadd.i64 d27,d28,d19
+ veor d29,d17,d18
+ veor d24,d25
+ vand d29,d16
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d18 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d20,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d20,#34
+ vshr.u64 d26,d20,#39
+ vsli.64 d24,d20,#36
+ vsli.64 d25,d20,#30
+ vsli.64 d26,d20,#25
+ vadd.i64 d27,d12
+ vorr d30,d20,d22
+ vand d29,d20,d22
+ veor d19,d24,d25
+ vand d30,d21
+ veor d19,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d19,d27
+ vadd.i64 d23,d27
+ vadd.i64 d19,d30
+ vshr.u64 d24,d23,#14 @ 13
+#if 13<16
+ vld1.64 {d13},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d23,#18
+ vshr.u64 d26,d23,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d23,#50
+ vsli.64 d25,d23,#46
+ vsli.64 d26,d23,#23
+#if 13<16 && defined(__ARMEL__)
+ vrev64.8 d13,d13
+#endif
+ vadd.i64 d27,d28,d18
+ veor d29,d16,d17
+ veor d24,d25
+ vand d29,d23
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d17 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d19,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d19,#34
+ vshr.u64 d26,d19,#39
+ vsli.64 d24,d19,#36
+ vsli.64 d25,d19,#30
+ vsli.64 d26,d19,#25
+ vadd.i64 d27,d13
+ vorr d30,d19,d21
+ vand d29,d19,d21
+ veor d18,d24,d25
+ vand d30,d20
+ veor d18,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d18,d27
+ vadd.i64 d22,d27
+ vadd.i64 d18,d30
+ vshr.u64 d24,d22,#14 @ 14
+#if 14<16
+ vld1.64 {d14},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d22,#18
+ vshr.u64 d26,d22,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d22,#50
+ vsli.64 d25,d22,#46
+ vsli.64 d26,d22,#23
+#if 14<16 && defined(__ARMEL__)
+ vrev64.8 d14,d14
+#endif
+ vadd.i64 d27,d28,d17
+ veor d29,d23,d16
+ veor d24,d25
+ vand d29,d22
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d16 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d18,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d18,#34
+ vshr.u64 d26,d18,#39
+ vsli.64 d24,d18,#36
+ vsli.64 d25,d18,#30
+ vsli.64 d26,d18,#25
+ vadd.i64 d27,d14
+ vorr d30,d18,d20
+ vand d29,d18,d20
+ veor d17,d24,d25
+ vand d30,d19
+ veor d17,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d17,d27
+ vadd.i64 d21,d27
+ vadd.i64 d17,d30
+ vshr.u64 d24,d21,#14 @ 15
+#if 15<16
+ vld1.64 {d15},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d21,#18
+ vshr.u64 d26,d21,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d21,#50
+ vsli.64 d25,d21,#46
+ vsli.64 d26,d21,#23
+#if 15<16 && defined(__ARMEL__)
+ vrev64.8 d15,d15
+#endif
+ vadd.i64 d27,d28,d16
+ veor d29,d22,d23
+ veor d24,d25
+ vand d29,d21
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d23 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d17,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d17,#34
+ vshr.u64 d26,d17,#39
+ vsli.64 d24,d17,#36
+ vsli.64 d25,d17,#30
+ vsli.64 d26,d17,#25
+ vadd.i64 d27,d15
+ vorr d30,d17,d19
+ vand d29,d17,d19
+ veor d16,d24,d25
+ vand d30,d18
+ veor d16,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d16,d27
+ vadd.i64 d20,d27
+ vadd.i64 d16,d30
+ mov r12,#4
+.L16_79_neon:
+ subs r12,#1
+ vshr.u64 q12,q7,#19
+ vshr.u64 q13,q7,#61
+ vshr.u64 q15,q7,#6
+ vsli.64 q12,q7,#45
+ vext.8 q14,q0,q1,#8 @ X[i+1]
+ vsli.64 q13,q7,#3
+ veor q15,q12
+ vshr.u64 q12,q14,#1
+ veor q15,q13 @ sigma1(X[i+14])
+ vshr.u64 q13,q14,#8
+ vadd.i64 q0,q15
+ vshr.u64 q15,q14,#7
+ vsli.64 q12,q14,#63
+ vsli.64 q13,q14,#56
+ vext.8 q14,q4,q5,#8 @ X[i+9]
+ veor q15,q12
+ vshr.u64 d24,d20,#14 @ from NEON_00_15
+ vadd.i64 q0,q14
+ vshr.u64 d25,d20,#18 @ from NEON_00_15
+ veor q15,q13 @ sigma0(X[i+1])
+ vshr.u64 d26,d20,#41 @ from NEON_00_15
+ vadd.i64 q0,q15
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d20,#50
+ vsli.64 d25,d20,#46
+ vsli.64 d26,d20,#23
+#if 16<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ vadd.i64 d27,d28,d23
+ veor d29,d21,d22
+ veor d24,d25
+ vand d29,d20
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d22 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d16,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d16,#34
+ vshr.u64 d26,d16,#39
+ vsli.64 d24,d16,#36
+ vsli.64 d25,d16,#30
+ vsli.64 d26,d16,#25
+ vadd.i64 d27,d0
+ vorr d30,d16,d18
+ vand d29,d16,d18
+ veor d23,d24,d25
+ vand d30,d17
+ veor d23,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d23,d27
+ vadd.i64 d19,d27
+ vadd.i64 d23,d30
+ vshr.u64 d24,d19,#14 @ 17
+#if 17<16
+ vld1.64 {d1},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d19,#18
+ vshr.u64 d26,d19,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d19,#50
+ vsli.64 d25,d19,#46
+ vsli.64 d26,d19,#23
+#if 17<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ vadd.i64 d27,d28,d22
+ veor d29,d20,d21
+ veor d24,d25
+ vand d29,d19
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d21 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d23,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d23,#34
+ vshr.u64 d26,d23,#39
+ vsli.64 d24,d23,#36
+ vsli.64 d25,d23,#30
+ vsli.64 d26,d23,#25
+ vadd.i64 d27,d1
+ vorr d30,d23,d17
+ vand d29,d23,d17
+ veor d22,d24,d25
+ vand d30,d16
+ veor d22,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d22,d27
+ vadd.i64 d18,d27
+ vadd.i64 d22,d30
+ vshr.u64 q12,q0,#19
+ vshr.u64 q13,q0,#61
+ vshr.u64 q15,q0,#6
+ vsli.64 q12,q0,#45
+ vext.8 q14,q1,q2,#8 @ X[i+1]
+ vsli.64 q13,q0,#3
+ veor q15,q12
+ vshr.u64 q12,q14,#1
+ veor q15,q13 @ sigma1(X[i+14])
+ vshr.u64 q13,q14,#8
+ vadd.i64 q1,q15
+ vshr.u64 q15,q14,#7
+ vsli.64 q12,q14,#63
+ vsli.64 q13,q14,#56
+ vext.8 q14,q5,q6,#8 @ X[i+9]
+ veor q15,q12
+ vshr.u64 d24,d18,#14 @ from NEON_00_15
+ vadd.i64 q1,q14
+ vshr.u64 d25,d18,#18 @ from NEON_00_15
+ veor q15,q13 @ sigma0(X[i+1])
+ vshr.u64 d26,d18,#41 @ from NEON_00_15
+ vadd.i64 q1,q15
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d18,#50
+ vsli.64 d25,d18,#46
+ vsli.64 d26,d18,#23
+#if 18<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ vadd.i64 d27,d28,d21
+ veor d29,d19,d20
+ veor d24,d25
+ vand d29,d18
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d20 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d22,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d22,#34
+ vshr.u64 d26,d22,#39
+ vsli.64 d24,d22,#36
+ vsli.64 d25,d22,#30
+ vsli.64 d26,d22,#25
+ vadd.i64 d27,d2
+ vorr d30,d22,d16
+ vand d29,d22,d16
+ veor d21,d24,d25
+ vand d30,d23
+ veor d21,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d21,d27
+ vadd.i64 d17,d27
+ vadd.i64 d21,d30
+ vshr.u64 d24,d17,#14 @ 19
+#if 19<16
+ vld1.64 {d3},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d17,#18
+ vshr.u64 d26,d17,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d17,#50
+ vsli.64 d25,d17,#46
+ vsli.64 d26,d17,#23
+#if 19<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ vadd.i64 d27,d28,d20
+ veor d29,d18,d19
+ veor d24,d25
+ vand d29,d17
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d19 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d21,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d21,#34
+ vshr.u64 d26,d21,#39
+ vsli.64 d24,d21,#36
+ vsli.64 d25,d21,#30
+ vsli.64 d26,d21,#25
+ vadd.i64 d27,d3
+ vorr d30,d21,d23
+ vand d29,d21,d23
+ veor d20,d24,d25
+ vand d30,d22
+ veor d20,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d20,d27
+ vadd.i64 d16,d27
+ vadd.i64 d20,d30
+ vshr.u64 q12,q1,#19
+ vshr.u64 q13,q1,#61
+ vshr.u64 q15,q1,#6
+ vsli.64 q12,q1,#45
+ vext.8 q14,q2,q3,#8 @ X[i+1]
+ vsli.64 q13,q1,#3
+ veor q15,q12
+ vshr.u64 q12,q14,#1
+ veor q15,q13 @ sigma1(X[i+14])
+ vshr.u64 q13,q14,#8
+ vadd.i64 q2,q15
+ vshr.u64 q15,q14,#7
+ vsli.64 q12,q14,#63
+ vsli.64 q13,q14,#56
+ vext.8 q14,q6,q7,#8 @ X[i+9]
+ veor q15,q12
+ vshr.u64 d24,d16,#14 @ from NEON_00_15
+ vadd.i64 q2,q14
+ vshr.u64 d25,d16,#18 @ from NEON_00_15
+ veor q15,q13 @ sigma0(X[i+1])
+ vshr.u64 d26,d16,#41 @ from NEON_00_15
+ vadd.i64 q2,q15
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d16,#50
+ vsli.64 d25,d16,#46
+ vsli.64 d26,d16,#23
+#if 20<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ vadd.i64 d27,d28,d19
+ veor d29,d17,d18
+ veor d24,d25
+ vand d29,d16
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d18 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d20,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d20,#34
+ vshr.u64 d26,d20,#39
+ vsli.64 d24,d20,#36
+ vsli.64 d25,d20,#30
+ vsli.64 d26,d20,#25
+ vadd.i64 d27,d4
+ vorr d30,d20,d22
+ vand d29,d20,d22
+ veor d19,d24,d25
+ vand d30,d21
+ veor d19,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d19,d27
+ vadd.i64 d23,d27
+ vadd.i64 d19,d30
+ vshr.u64 d24,d23,#14 @ 21
+#if 21<16
+ vld1.64 {d5},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d23,#18
+ vshr.u64 d26,d23,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d23,#50
+ vsli.64 d25,d23,#46
+ vsli.64 d26,d23,#23
+#if 21<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ vadd.i64 d27,d28,d18
+ veor d29,d16,d17
+ veor d24,d25
+ vand d29,d23
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d17 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d19,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d19,#34
+ vshr.u64 d26,d19,#39
+ vsli.64 d24,d19,#36
+ vsli.64 d25,d19,#30
+ vsli.64 d26,d19,#25
+ vadd.i64 d27,d5
+ vorr d30,d19,d21
+ vand d29,d19,d21
+ veor d18,d24,d25
+ vand d30,d20
+ veor d18,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d18,d27
+ vadd.i64 d22,d27
+ vadd.i64 d18,d30
+ vshr.u64 q12,q2,#19
+ vshr.u64 q13,q2,#61
+ vshr.u64 q15,q2,#6
+ vsli.64 q12,q2,#45
+ vext.8 q14,q3,q4,#8 @ X[i+1]
+ vsli.64 q13,q2,#3
+ veor q15,q12
+ vshr.u64 q12,q14,#1
+ veor q15,q13 @ sigma1(X[i+14])
+ vshr.u64 q13,q14,#8
+ vadd.i64 q3,q15
+ vshr.u64 q15,q14,#7
+ vsli.64 q12,q14,#63
+ vsli.64 q13,q14,#56
+ vext.8 q14,q7,q0,#8 @ X[i+9]
+ veor q15,q12
+ vshr.u64 d24,d22,#14 @ from NEON_00_15
+ vadd.i64 q3,q14
+ vshr.u64 d25,d22,#18 @ from NEON_00_15
+ veor q15,q13 @ sigma0(X[i+1])
+ vshr.u64 d26,d22,#41 @ from NEON_00_15
+ vadd.i64 q3,q15
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d22,#50
+ vsli.64 d25,d22,#46
+ vsli.64 d26,d22,#23
+#if 22<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ vadd.i64 d27,d28,d17
+ veor d29,d23,d16
+ veor d24,d25
+ vand d29,d22
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d16 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d18,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d18,#34
+ vshr.u64 d26,d18,#39
+ vsli.64 d24,d18,#36
+ vsli.64 d25,d18,#30
+ vsli.64 d26,d18,#25
+ vadd.i64 d27,d6
+ vorr d30,d18,d20
+ vand d29,d18,d20
+ veor d17,d24,d25
+ vand d30,d19
+ veor d17,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d17,d27
+ vadd.i64 d21,d27
+ vadd.i64 d17,d30
+ vshr.u64 d24,d21,#14 @ 23
+#if 23<16
+ vld1.64 {d7},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d21,#18
+ vshr.u64 d26,d21,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d21,#50
+ vsli.64 d25,d21,#46
+ vsli.64 d26,d21,#23
+#if 23<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ vadd.i64 d27,d28,d16
+ veor d29,d22,d23
+ veor d24,d25
+ vand d29,d21
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d23 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d17,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d17,#34
+ vshr.u64 d26,d17,#39
+ vsli.64 d24,d17,#36
+ vsli.64 d25,d17,#30
+ vsli.64 d26,d17,#25
+ vadd.i64 d27,d7
+ vorr d30,d17,d19
+ vand d29,d17,d19
+ veor d16,d24,d25
+ vand d30,d18
+ veor d16,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d16,d27
+ vadd.i64 d20,d27
+ vadd.i64 d16,d30
+ vshr.u64 q12,q3,#19
+ vshr.u64 q13,q3,#61
+ vshr.u64 q15,q3,#6
+ vsli.64 q12,q3,#45
+ vext.8 q14,q4,q5,#8 @ X[i+1]
+ vsli.64 q13,q3,#3
+ veor q15,q12
+ vshr.u64 q12,q14,#1
+ veor q15,q13 @ sigma1(X[i+14])
+ vshr.u64 q13,q14,#8
+ vadd.i64 q4,q15
+ vshr.u64 q15,q14,#7
+ vsli.64 q12,q14,#63
+ vsli.64 q13,q14,#56
+ vext.8 q14,q0,q1,#8 @ X[i+9]
+ veor q15,q12
+ vshr.u64 d24,d20,#14 @ from NEON_00_15
+ vadd.i64 q4,q14
+ vshr.u64 d25,d20,#18 @ from NEON_00_15
+ veor q15,q13 @ sigma0(X[i+1])
+ vshr.u64 d26,d20,#41 @ from NEON_00_15
+ vadd.i64 q4,q15
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d20,#50
+ vsli.64 d25,d20,#46
+ vsli.64 d26,d20,#23
+#if 24<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ vadd.i64 d27,d28,d23
+ veor d29,d21,d22
+ veor d24,d25
+ vand d29,d20
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d22 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d16,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d16,#34
+ vshr.u64 d26,d16,#39
+ vsli.64 d24,d16,#36
+ vsli.64 d25,d16,#30
+ vsli.64 d26,d16,#25
+ vadd.i64 d27,d8
+ vorr d30,d16,d18
+ vand d29,d16,d18
+ veor d23,d24,d25
+ vand d30,d17
+ veor d23,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d23,d27
+ vadd.i64 d19,d27
+ vadd.i64 d23,d30
+ vshr.u64 d24,d19,#14 @ 25
+#if 25<16
+ vld1.64 {d9},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d19,#18
+ vshr.u64 d26,d19,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d19,#50
+ vsli.64 d25,d19,#46
+ vsli.64 d26,d19,#23
+#if 25<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ vadd.i64 d27,d28,d22
+ veor d29,d20,d21
+ veor d24,d25
+ vand d29,d19
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d21 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d23,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d23,#34
+ vshr.u64 d26,d23,#39
+ vsli.64 d24,d23,#36
+ vsli.64 d25,d23,#30
+ vsli.64 d26,d23,#25
+ vadd.i64 d27,d9
+ vorr d30,d23,d17
+ vand d29,d23,d17
+ veor d22,d24,d25
+ vand d30,d16
+ veor d22,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d22,d27
+ vadd.i64 d18,d27
+ vadd.i64 d22,d30
+ vshr.u64 q12,q4,#19
+ vshr.u64 q13,q4,#61
+ vshr.u64 q15,q4,#6
+ vsli.64 q12,q4,#45
+ vext.8 q14,q5,q6,#8 @ X[i+1]
+ vsli.64 q13,q4,#3
+ veor q15,q12
+ vshr.u64 q12,q14,#1
+ veor q15,q13 @ sigma1(X[i+14])
+ vshr.u64 q13,q14,#8
+ vadd.i64 q5,q15
+ vshr.u64 q15,q14,#7
+ vsli.64 q12,q14,#63
+ vsli.64 q13,q14,#56
+ vext.8 q14,q1,q2,#8 @ X[i+9]
+ veor q15,q12
+ vshr.u64 d24,d18,#14 @ from NEON_00_15
+ vadd.i64 q5,q14
+ vshr.u64 d25,d18,#18 @ from NEON_00_15
+ veor q15,q13 @ sigma0(X[i+1])
+ vshr.u64 d26,d18,#41 @ from NEON_00_15
+ vadd.i64 q5,q15
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d18,#50
+ vsli.64 d25,d18,#46
+ vsli.64 d26,d18,#23
+#if 26<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ vadd.i64 d27,d28,d21
+ veor d29,d19,d20
+ veor d24,d25
+ vand d29,d18
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d20 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d22,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d22,#34
+ vshr.u64 d26,d22,#39
+ vsli.64 d24,d22,#36
+ vsli.64 d25,d22,#30
+ vsli.64 d26,d22,#25
+ vadd.i64 d27,d10
+ vorr d30,d22,d16
+ vand d29,d22,d16
+ veor d21,d24,d25
+ vand d30,d23
+ veor d21,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d21,d27
+ vadd.i64 d17,d27
+ vadd.i64 d21,d30
+ vshr.u64 d24,d17,#14 @ 27
+#if 27<16
+ vld1.64 {d11},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d17,#18
+ vshr.u64 d26,d17,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d17,#50
+ vsli.64 d25,d17,#46
+ vsli.64 d26,d17,#23
+#if 27<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ vadd.i64 d27,d28,d20
+ veor d29,d18,d19
+ veor d24,d25
+ vand d29,d17
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d19 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d21,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d21,#34
+ vshr.u64 d26,d21,#39
+ vsli.64 d24,d21,#36
+ vsli.64 d25,d21,#30
+ vsli.64 d26,d21,#25
+ vadd.i64 d27,d11
+ vorr d30,d21,d23
+ vand d29,d21,d23
+ veor d20,d24,d25
+ vand d30,d22
+ veor d20,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d20,d27
+ vadd.i64 d16,d27
+ vadd.i64 d20,d30
+ vshr.u64 q12,q5,#19
+ vshr.u64 q13,q5,#61
+ vshr.u64 q15,q5,#6
+ vsli.64 q12,q5,#45
+ vext.8 q14,q6,q7,#8 @ X[i+1]
+ vsli.64 q13,q5,#3
+ veor q15,q12
+ vshr.u64 q12,q14,#1
+ veor q15,q13 @ sigma1(X[i+14])
+ vshr.u64 q13,q14,#8
+ vadd.i64 q6,q15
+ vshr.u64 q15,q14,#7
+ vsli.64 q12,q14,#63
+ vsli.64 q13,q14,#56
+ vext.8 q14,q2,q3,#8 @ X[i+9]
+ veor q15,q12
+ vshr.u64 d24,d16,#14 @ from NEON_00_15
+ vadd.i64 q6,q14
+ vshr.u64 d25,d16,#18 @ from NEON_00_15
+ veor q15,q13 @ sigma0(X[i+1])
+ vshr.u64 d26,d16,#41 @ from NEON_00_15
+ vadd.i64 q6,q15
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d16,#50
+ vsli.64 d25,d16,#46
+ vsli.64 d26,d16,#23
+#if 28<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ vadd.i64 d27,d28,d19
+ veor d29,d17,d18
+ veor d24,d25
+ vand d29,d16
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d18 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d20,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d20,#34
+ vshr.u64 d26,d20,#39
+ vsli.64 d24,d20,#36
+ vsli.64 d25,d20,#30
+ vsli.64 d26,d20,#25
+ vadd.i64 d27,d12
+ vorr d30,d20,d22
+ vand d29,d20,d22
+ veor d19,d24,d25
+ vand d30,d21
+ veor d19,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d19,d27
+ vadd.i64 d23,d27
+ vadd.i64 d19,d30
+ vshr.u64 d24,d23,#14 @ 29
+#if 29<16
+ vld1.64 {d13},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d23,#18
+ vshr.u64 d26,d23,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d23,#50
+ vsli.64 d25,d23,#46
+ vsli.64 d26,d23,#23
+#if 29<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ vadd.i64 d27,d28,d18
+ veor d29,d16,d17
+ veor d24,d25
+ vand d29,d23
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d17 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d19,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d19,#34
+ vshr.u64 d26,d19,#39
+ vsli.64 d24,d19,#36
+ vsli.64 d25,d19,#30
+ vsli.64 d26,d19,#25
+ vadd.i64 d27,d13
+ vorr d30,d19,d21
+ vand d29,d19,d21
+ veor d18,d24,d25
+ vand d30,d20
+ veor d18,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d18,d27
+ vadd.i64 d22,d27
+ vadd.i64 d18,d30
+ vshr.u64 q12,q6,#19
+ vshr.u64 q13,q6,#61
+ vshr.u64 q15,q6,#6
+ vsli.64 q12,q6,#45
+ vext.8 q14,q7,q0,#8 @ X[i+1]
+ vsli.64 q13,q6,#3
+ veor q15,q12
+ vshr.u64 q12,q14,#1
+ veor q15,q13 @ sigma1(X[i+14])
+ vshr.u64 q13,q14,#8
+ vadd.i64 q7,q15
+ vshr.u64 q15,q14,#7
+ vsli.64 q12,q14,#63
+ vsli.64 q13,q14,#56
+ vext.8 q14,q3,q4,#8 @ X[i+9]
+ veor q15,q12
+ vshr.u64 d24,d22,#14 @ from NEON_00_15
+ vadd.i64 q7,q14
+ vshr.u64 d25,d22,#18 @ from NEON_00_15
+ veor q15,q13 @ sigma0(X[i+1])
+ vshr.u64 d26,d22,#41 @ from NEON_00_15
+ vadd.i64 q7,q15
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d22,#50
+ vsli.64 d25,d22,#46
+ vsli.64 d26,d22,#23
+#if 30<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ vadd.i64 d27,d28,d17
+ veor d29,d23,d16
+ veor d24,d25
+ vand d29,d22
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d16 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d18,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d18,#34
+ vshr.u64 d26,d18,#39
+ vsli.64 d24,d18,#36
+ vsli.64 d25,d18,#30
+ vsli.64 d26,d18,#25
+ vadd.i64 d27,d14
+ vorr d30,d18,d20
+ vand d29,d18,d20
+ veor d17,d24,d25
+ vand d30,d19
+ veor d17,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d17,d27
+ vadd.i64 d21,d27
+ vadd.i64 d17,d30
+ vshr.u64 d24,d21,#14 @ 31
+#if 31<16
+ vld1.64 {d15},[r1]! @ handles unaligned
+#endif
+ vshr.u64 d25,d21,#18
+ vshr.u64 d26,d21,#41
+ vld1.64 {d28},[r3,:64]! @ K[i++]
+ vsli.64 d24,d21,#50
+ vsli.64 d25,d21,#46
+ vsli.64 d26,d21,#23
+#if 31<16 && defined(__ARMEL__)
+ vrev64.8 ,
+#endif
+ vadd.i64 d27,d28,d16
+ veor d29,d22,d23
+ veor d24,d25
+ vand d29,d21
+ veor d24,d26 @ Sigma1(e)
+ veor d29,d23 @ Ch(e,f,g)
+ vadd.i64 d27,d24
+ vshr.u64 d24,d17,#28
+ vadd.i64 d27,d29
+ vshr.u64 d25,d17,#34
+ vshr.u64 d26,d17,#39
+ vsli.64 d24,d17,#36
+ vsli.64 d25,d17,#30
+ vsli.64 d26,d17,#25
+ vadd.i64 d27,d15
+ vorr d30,d17,d19
+ vand d29,d17,d19
+ veor d16,d24,d25
+ vand d30,d18
+ veor d16,d26 @ Sigma0(a)
+ vorr d30,d29 @ Maj(a,b,c)
+ vadd.i64 d16,d27
+ vadd.i64 d20,d27
+ vadd.i64 d16,d30
+ bne .L16_79_neon
+
+ vldmia r0,{d24-d31} @ load context to temp
+ vadd.i64 q8,q12 @ vectorized accumulate
+ vadd.i64 q9,q13
+ vadd.i64 q10,q14
+ vadd.i64 q11,q15
+ vstmia r0,{d16-d23} @ save context
+ teq r1,r2
+ sub r3,#640 @ rewind K512
+ bne .Loop_neon
+
+ vldmia sp!,{d8-d15} @ epilogue
+ .word 0xe12fff1e
+#endif
+.size sha512_block_data_order,.-sha512_block_data_order
+.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro@openssl.org>"
+.align 2
+.comm OPENSSL_armcap_P,4,4
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/ext/libressl/crypto/sha/sha512-elf-x86_64.S b/ext/libressl/crypto/sha/sha512-elf-x86_64.S
new file mode 100644
index 0000000..1173407
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha512-elf-x86_64.S
@@ -0,0 +1,1806 @@
+#include "x86_arch.h"
+.text
+
+.globl sha512_block_data_order
+.type sha512_block_data_order,@function
+.align 16
+sha512_block_data_order:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r11
+ shlq $4,%rdx
+ subq $128+32,%rsp
+ leaq (%rsi,%rdx,8),%rdx
+ andq $-64,%rsp
+ movq %rdi,128+0(%rsp)
+ movq %rsi,128+8(%rsp)
+ movq %rdx,128+16(%rsp)
+ movq %r11,128+24(%rsp)
+.Lprologue:
+
+ leaq K512(%rip),%rbp
+
+ movq 0(%rdi),%rax
+ movq 8(%rdi),%rbx
+ movq 16(%rdi),%rcx
+ movq 24(%rdi),%rdx
+ movq 32(%rdi),%r8
+ movq 40(%rdi),%r9
+ movq 48(%rdi),%r10
+ movq 56(%rdi),%r11
+ jmp .Lloop
+
+.align 16
+.Lloop:
+ xorq %rdi,%rdi
+ movq 0(%rsi),%r12
+ movq %r8,%r13
+ movq %rax,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,0(%rsp)
+
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
+
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
+
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
+
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 1(%rdi),%rdi
+ addq %r14,%r11
+
+ movq 8(%rsi),%r12
+ movq %rdx,%r13
+ movq %r11,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,8(%rsp)
+
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
+
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
+
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
+
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 1(%rdi),%rdi
+ addq %r14,%r10
+
+ movq 16(%rsi),%r12
+ movq %rcx,%r13
+ movq %r10,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,16(%rsp)
+
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
+
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
+
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
+
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 1(%rdi),%rdi
+ addq %r14,%r9
+
+ movq 24(%rsi),%r12
+ movq %rbx,%r13
+ movq %r9,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,24(%rsp)
+
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
+
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
+
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
+
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 1(%rdi),%rdi
+ addq %r14,%r8
+
+ movq 32(%rsi),%r12
+ movq %rax,%r13
+ movq %r8,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,32(%rsp)
+
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
+
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
+
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
+
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rdx
+
+ movq 40(%rsi),%r12
+ movq %r11,%r13
+ movq %rdx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,40(%rsp)
+
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
+
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
+
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
+
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rcx
+
+ movq 48(%rsi),%r12
+ movq %r10,%r13
+ movq %rcx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,48(%rsp)
+
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
+
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
+
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
+
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rbx
+
+ movq 56(%rsi),%r12
+ movq %r9,%r13
+ movq %rbx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,56(%rsp)
+
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
+
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
+
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
+
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 1(%rdi),%rdi
+ addq %r14,%rax
+
+ movq 64(%rsi),%r12
+ movq %r8,%r13
+ movq %rax,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,64(%rsp)
+
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
+
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
+
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
+
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 1(%rdi),%rdi
+ addq %r14,%r11
+
+ movq 72(%rsi),%r12
+ movq %rdx,%r13
+ movq %r11,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,72(%rsp)
+
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
+
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
+
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
+
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 1(%rdi),%rdi
+ addq %r14,%r10
+
+ movq 80(%rsi),%r12
+ movq %rcx,%r13
+ movq %r10,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,80(%rsp)
+
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
+
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
+
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
+
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 1(%rdi),%rdi
+ addq %r14,%r9
+
+ movq 88(%rsi),%r12
+ movq %rbx,%r13
+ movq %r9,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,88(%rsp)
+
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
+
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
+
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
+
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 1(%rdi),%rdi
+ addq %r14,%r8
+
+ movq 96(%rsi),%r12
+ movq %rax,%r13
+ movq %r8,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,96(%rsp)
+
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
+
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
+
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
+
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rdx
+
+ movq 104(%rsi),%r12
+ movq %r11,%r13
+ movq %rdx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,104(%rsp)
+
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
+
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
+
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
+
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rcx
+
+ movq 112(%rsi),%r12
+ movq %r10,%r13
+ movq %rcx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,112(%rsp)
+
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
+
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
+
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
+
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rbx
+
+ movq 120(%rsi),%r12
+ movq %r9,%r13
+ movq %rbx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,120(%rsp)
+
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
+
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
+
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
+
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 1(%rdi),%rdi
+ addq %r14,%rax
+
+ jmp .Lrounds_16_xx
+.align 16
+.Lrounds_16_xx:
+ movq 8(%rsp),%r13
+ movq 112(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 72(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 0(%rsp),%r12
+ movq %r8,%r13
+ addq %r14,%r12
+ movq %rax,%r14
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,0(%rsp)
+
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
+
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
+
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
+
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 1(%rdi),%rdi
+ addq %r14,%r11
+
+ movq 16(%rsp),%r13
+ movq 120(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 80(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 8(%rsp),%r12
+ movq %rdx,%r13
+ addq %r14,%r12
+ movq %r11,%r14
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,8(%rsp)
+
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
+
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
+
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
+
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 1(%rdi),%rdi
+ addq %r14,%r10
+
+ movq 24(%rsp),%r13
+ movq 0(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 88(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 16(%rsp),%r12
+ movq %rcx,%r13
+ addq %r14,%r12
+ movq %r10,%r14
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,16(%rsp)
+
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
+
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
+
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
+
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 1(%rdi),%rdi
+ addq %r14,%r9
+
+ movq 32(%rsp),%r13
+ movq 8(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 96(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 24(%rsp),%r12
+ movq %rbx,%r13
+ addq %r14,%r12
+ movq %r9,%r14
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,24(%rsp)
+
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
+
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
+
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
+
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 1(%rdi),%rdi
+ addq %r14,%r8
+
+ movq 40(%rsp),%r13
+ movq 16(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 104(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 32(%rsp),%r12
+ movq %rax,%r13
+ addq %r14,%r12
+ movq %r8,%r14
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,32(%rsp)
+
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
+
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
+
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
+
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rdx
+
+ movq 48(%rsp),%r13
+ movq 24(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 112(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 40(%rsp),%r12
+ movq %r11,%r13
+ addq %r14,%r12
+ movq %rdx,%r14
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,40(%rsp)
+
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
+
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
+
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
+
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rcx
+
+ movq 56(%rsp),%r13
+ movq 32(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 120(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 48(%rsp),%r12
+ movq %r10,%r13
+ addq %r14,%r12
+ movq %rcx,%r14
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,48(%rsp)
+
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
+
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
+
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
+
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rbx
+
+ movq 64(%rsp),%r13
+ movq 40(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 0(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 56(%rsp),%r12
+ movq %r9,%r13
+ addq %r14,%r12
+ movq %rbx,%r14
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,56(%rsp)
+
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
+
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
+
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
+
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 1(%rdi),%rdi
+ addq %r14,%rax
+
+ movq 72(%rsp),%r13
+ movq 48(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 8(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 64(%rsp),%r12
+ movq %r8,%r13
+ addq %r14,%r12
+ movq %rax,%r14
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,64(%rsp)
+
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
+
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
+
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
+
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 1(%rdi),%rdi
+ addq %r14,%r11
+
+ movq 80(%rsp),%r13
+ movq 56(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 16(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 72(%rsp),%r12
+ movq %rdx,%r13
+ addq %r14,%r12
+ movq %r11,%r14
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,72(%rsp)
+
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
+
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
+
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
+
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 1(%rdi),%rdi
+ addq %r14,%r10
+
+ movq 88(%rsp),%r13
+ movq 64(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 24(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 80(%rsp),%r12
+ movq %rcx,%r13
+ addq %r14,%r12
+ movq %r10,%r14
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,80(%rsp)
+
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
+
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
+
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
+
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 1(%rdi),%rdi
+ addq %r14,%r9
+
+ movq 96(%rsp),%r13
+ movq 72(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 32(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 88(%rsp),%r12
+ movq %rbx,%r13
+ addq %r14,%r12
+ movq %r9,%r14
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,88(%rsp)
+
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
+
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
+
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
+
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 1(%rdi),%rdi
+ addq %r14,%r8
+
+ movq 104(%rsp),%r13
+ movq 80(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 40(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 96(%rsp),%r12
+ movq %rax,%r13
+ addq %r14,%r12
+ movq %r8,%r14
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,96(%rsp)
+
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
+
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
+
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
+
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rdx
+
+ movq 112(%rsp),%r13
+ movq 88(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 48(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 104(%rsp),%r12
+ movq %r11,%r13
+ addq %r14,%r12
+ movq %rdx,%r14
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,104(%rsp)
+
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
+
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
+
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
+
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rcx
+
+ movq 120(%rsp),%r13
+ movq 96(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 56(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 112(%rsp),%r12
+ movq %r10,%r13
+ addq %r14,%r12
+ movq %rcx,%r14
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,112(%rsp)
+
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
+
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
+
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
+
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rbx
+
+ movq 0(%rsp),%r13
+ movq 104(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 64(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 120(%rsp),%r12
+ movq %r9,%r13
+ addq %r14,%r12
+ movq %rbx,%r14
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,120(%rsp)
+
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
+
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
+
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
+
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 1(%rdi),%rdi
+ addq %r14,%rax
+
+ cmpq $80,%rdi
+ jb .Lrounds_16_xx
+
+ movq 128+0(%rsp),%rdi
+ leaq 128(%rsi),%rsi
+
+ addq 0(%rdi),%rax
+ addq 8(%rdi),%rbx
+ addq 16(%rdi),%rcx
+ addq 24(%rdi),%rdx
+ addq 32(%rdi),%r8
+ addq 40(%rdi),%r9
+ addq 48(%rdi),%r10
+ addq 56(%rdi),%r11
+
+ cmpq 128+16(%rsp),%rsi
+
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,16(%rdi)
+ movq %rdx,24(%rdi)
+ movq %r8,32(%rdi)
+ movq %r9,40(%rdi)
+ movq %r10,48(%rdi)
+ movq %r11,56(%rdi)
+ jb .Lloop
+
+ movq 128+24(%rsp),%rsi
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lepilogue:
+ retq
+.size sha512_block_data_order,.-sha512_block_data_order
+.align 64
+.type K512,@object
+K512:
+.quad 0x428a2f98d728ae22,0x7137449123ef65cd
+.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+.quad 0x3956c25bf348b538,0x59f111f1b605d019
+.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
+.quad 0xd807aa98a3030242,0x12835b0145706fbe
+.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
+.quad 0x9bdc06a725c71235,0xc19bf174cf692694
+.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
+.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
+.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+.quad 0x983e5152ee66dfab,0xa831c66d2db43210
+.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
+.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
+.quad 0x06ca6351e003826f,0x142929670a0e6e70
+.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
+.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+.quad 0x650a73548baf63de,0x766a0abb3c77b2a8
+.quad 0x81c2c92e47edaee6,0x92722c851482353b
+.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
+.quad 0xc24b8b70d0f89791,0xc76c51a30654be30
+.quad 0xd192e819d6ef5218,0xd69906245565a910
+.quad 0xf40e35855771202a,0x106aa07032bbd1b8
+.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
+.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+.quad 0x748f82ee5defb2fc,0x78a5636f43172f60
+.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
+.quad 0x90befffa23631e28,0xa4506cebde82bde9
+.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
+.quad 0xca273eceea26619c,0xd186b8c721c0c207
+.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
+.quad 0x113f9804bef90dae,0x1b710b35131c471b
+.quad 0x28db77f523047d84,0x32caab7b40c72493
+.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/ext/libressl/crypto/sha/sha512-macosx-x86_64.S b/ext/libressl/crypto/sha/sha512-macosx-x86_64.S
new file mode 100644
index 0000000..7581da4
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha512-macosx-x86_64.S
@@ -0,0 +1,1803 @@
+#include "x86_arch.h"
+.text
+
+.globl _sha512_block_data_order
+
+.p2align 4
+_sha512_block_data_order:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r11
+ shlq $4,%rdx
+ subq $128+32,%rsp
+ leaq (%rsi,%rdx,8),%rdx
+ andq $-64,%rsp
+ movq %rdi,128+0(%rsp)
+ movq %rsi,128+8(%rsp)
+ movq %rdx,128+16(%rsp)
+ movq %r11,128+24(%rsp)
+L$prologue:
+
+ leaq K512(%rip),%rbp
+
+ movq 0(%rdi),%rax
+ movq 8(%rdi),%rbx
+ movq 16(%rdi),%rcx
+ movq 24(%rdi),%rdx
+ movq 32(%rdi),%r8
+ movq 40(%rdi),%r9
+ movq 48(%rdi),%r10
+ movq 56(%rdi),%r11
+ jmp L$loop
+
+.p2align 4
+L$loop:
+ xorq %rdi,%rdi
+ movq 0(%rsi),%r12
+ movq %r8,%r13
+ movq %rax,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,0(%rsp)
+
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
+
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
+
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
+
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 1(%rdi),%rdi
+ addq %r14,%r11
+
+ movq 8(%rsi),%r12
+ movq %rdx,%r13
+ movq %r11,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,8(%rsp)
+
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
+
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
+
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
+
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 1(%rdi),%rdi
+ addq %r14,%r10
+
+ movq 16(%rsi),%r12
+ movq %rcx,%r13
+ movq %r10,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,16(%rsp)
+
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
+
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
+
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
+
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 1(%rdi),%rdi
+ addq %r14,%r9
+
+ movq 24(%rsi),%r12
+ movq %rbx,%r13
+ movq %r9,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,24(%rsp)
+
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
+
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
+
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
+
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 1(%rdi),%rdi
+ addq %r14,%r8
+
+ movq 32(%rsi),%r12
+ movq %rax,%r13
+ movq %r8,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,32(%rsp)
+
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
+
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
+
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
+
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rdx
+
+ movq 40(%rsi),%r12
+ movq %r11,%r13
+ movq %rdx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,40(%rsp)
+
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
+
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
+
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
+
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rcx
+
+ movq 48(%rsi),%r12
+ movq %r10,%r13
+ movq %rcx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,48(%rsp)
+
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
+
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
+
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
+
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rbx
+
+ movq 56(%rsi),%r12
+ movq %r9,%r13
+ movq %rbx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,56(%rsp)
+
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
+
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
+
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
+
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 1(%rdi),%rdi
+ addq %r14,%rax
+
+ movq 64(%rsi),%r12
+ movq %r8,%r13
+ movq %rax,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,64(%rsp)
+
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
+
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
+
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
+
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 1(%rdi),%rdi
+ addq %r14,%r11
+
+ movq 72(%rsi),%r12
+ movq %rdx,%r13
+ movq %r11,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,72(%rsp)
+
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
+
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
+
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
+
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 1(%rdi),%rdi
+ addq %r14,%r10
+
+ movq 80(%rsi),%r12
+ movq %rcx,%r13
+ movq %r10,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,80(%rsp)
+
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
+
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
+
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
+
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 1(%rdi),%rdi
+ addq %r14,%r9
+
+ movq 88(%rsi),%r12
+ movq %rbx,%r13
+ movq %r9,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,88(%rsp)
+
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
+
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
+
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
+
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 1(%rdi),%rdi
+ addq %r14,%r8
+
+ movq 96(%rsi),%r12
+ movq %rax,%r13
+ movq %r8,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,96(%rsp)
+
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
+
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
+
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
+
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rdx
+
+ movq 104(%rsi),%r12
+ movq %r11,%r13
+ movq %rdx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,104(%rsp)
+
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
+
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
+
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
+
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rcx
+
+ movq 112(%rsi),%r12
+ movq %r10,%r13
+ movq %rcx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,112(%rsp)
+
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
+
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
+
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
+
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rbx
+
+ movq 120(%rsi),%r12
+ movq %r9,%r13
+ movq %rbx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,120(%rsp)
+
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
+
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
+
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
+
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 1(%rdi),%rdi
+ addq %r14,%rax
+
+ jmp L$rounds_16_xx
+.p2align 4
+L$rounds_16_xx:
+ movq 8(%rsp),%r13
+ movq 112(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 72(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 0(%rsp),%r12
+ movq %r8,%r13
+ addq %r14,%r12
+ movq %rax,%r14
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,0(%rsp)
+
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
+
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
+
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
+
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 1(%rdi),%rdi
+ addq %r14,%r11
+
+ movq 16(%rsp),%r13
+ movq 120(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 80(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 8(%rsp),%r12
+ movq %rdx,%r13
+ addq %r14,%r12
+ movq %r11,%r14
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,8(%rsp)
+
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
+
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
+
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
+
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 1(%rdi),%rdi
+ addq %r14,%r10
+
+ movq 24(%rsp),%r13
+ movq 0(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 88(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 16(%rsp),%r12
+ movq %rcx,%r13
+ addq %r14,%r12
+ movq %r10,%r14
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,16(%rsp)
+
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
+
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
+
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
+
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 1(%rdi),%rdi
+ addq %r14,%r9
+
+ movq 32(%rsp),%r13
+ movq 8(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 96(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 24(%rsp),%r12
+ movq %rbx,%r13
+ addq %r14,%r12
+ movq %r9,%r14
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,24(%rsp)
+
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
+
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
+
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
+
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 1(%rdi),%rdi
+ addq %r14,%r8
+
+ movq 40(%rsp),%r13
+ movq 16(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 104(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 32(%rsp),%r12
+ movq %rax,%r13
+ addq %r14,%r12
+ movq %r8,%r14
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,32(%rsp)
+
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
+
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
+
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
+
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rdx
+
+ movq 48(%rsp),%r13
+ movq 24(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 112(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 40(%rsp),%r12
+ movq %r11,%r13
+ addq %r14,%r12
+ movq %rdx,%r14
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,40(%rsp)
+
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
+
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
+
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
+
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rcx
+
+ movq 56(%rsp),%r13
+ movq 32(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 120(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 48(%rsp),%r12
+ movq %r10,%r13
+ addq %r14,%r12
+ movq %rcx,%r14
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,48(%rsp)
+
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
+
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
+
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
+
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rbx
+
+ movq 64(%rsp),%r13
+ movq 40(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 0(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 56(%rsp),%r12
+ movq %r9,%r13
+ addq %r14,%r12
+ movq %rbx,%r14
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,56(%rsp)
+
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
+
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
+
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
+
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 1(%rdi),%rdi
+ addq %r14,%rax
+
+ movq 72(%rsp),%r13
+ movq 48(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 8(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 64(%rsp),%r12
+ movq %r8,%r13
+ addq %r14,%r12
+ movq %rax,%r14
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,64(%rsp)
+
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
+
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
+
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
+
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 1(%rdi),%rdi
+ addq %r14,%r11
+
+ movq 80(%rsp),%r13
+ movq 56(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 16(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 72(%rsp),%r12
+ movq %rdx,%r13
+ addq %r14,%r12
+ movq %r11,%r14
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,72(%rsp)
+
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
+
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
+
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
+
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 1(%rdi),%rdi
+ addq %r14,%r10
+
+ movq 88(%rsp),%r13
+ movq 64(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 24(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 80(%rsp),%r12
+ movq %rcx,%r13
+ addq %r14,%r12
+ movq %r10,%r14
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,80(%rsp)
+
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
+
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
+
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
+
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 1(%rdi),%rdi
+ addq %r14,%r9
+
+ movq 96(%rsp),%r13
+ movq 72(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 32(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 88(%rsp),%r12
+ movq %rbx,%r13
+ addq %r14,%r12
+ movq %r9,%r14
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,88(%rsp)
+
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
+
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
+
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
+
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 1(%rdi),%rdi
+ addq %r14,%r8
+
+ movq 104(%rsp),%r13
+ movq 80(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 40(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 96(%rsp),%r12
+ movq %rax,%r13
+ addq %r14,%r12
+ movq %r8,%r14
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,96(%rsp)
+
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
+
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
+
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
+
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rdx
+
+ movq 112(%rsp),%r13
+ movq 88(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 48(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 104(%rsp),%r12
+ movq %r11,%r13
+ addq %r14,%r12
+ movq %rdx,%r14
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,104(%rsp)
+
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
+
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
+
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
+
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rcx
+
+ movq 120(%rsp),%r13
+ movq 96(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 56(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 112(%rsp),%r12
+ movq %r10,%r13
+ addq %r14,%r12
+ movq %rcx,%r14
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,112(%rsp)
+
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
+
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
+
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
+
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rbx
+
+ movq 0(%rsp),%r13
+ movq 104(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 64(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 120(%rsp),%r12
+ movq %r9,%r13
+ addq %r14,%r12
+ movq %rbx,%r14
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,120(%rsp)
+
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
+
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
+
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
+
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 1(%rdi),%rdi
+ addq %r14,%rax
+
+ cmpq $80,%rdi
+ jb L$rounds_16_xx
+
+ movq 128+0(%rsp),%rdi
+ leaq 128(%rsi),%rsi
+
+ addq 0(%rdi),%rax
+ addq 8(%rdi),%rbx
+ addq 16(%rdi),%rcx
+ addq 24(%rdi),%rdx
+ addq 32(%rdi),%r8
+ addq 40(%rdi),%r9
+ addq 48(%rdi),%r10
+ addq 56(%rdi),%r11
+
+ cmpq 128+16(%rsp),%rsi
+
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,16(%rdi)
+ movq %rdx,24(%rdi)
+ movq %r8,32(%rdi)
+ movq %r9,40(%rdi)
+ movq %r10,48(%rdi)
+ movq %r11,56(%rdi)
+ jb L$loop
+
+ movq 128+24(%rsp),%rsi
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+L$epilogue:
+ retq
+
+.p2align 6
+
+K512:
+.quad 0x428a2f98d728ae22,0x7137449123ef65cd
+.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+.quad 0x3956c25bf348b538,0x59f111f1b605d019
+.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
+.quad 0xd807aa98a3030242,0x12835b0145706fbe
+.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
+.quad 0x9bdc06a725c71235,0xc19bf174cf692694
+.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
+.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
+.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+.quad 0x983e5152ee66dfab,0xa831c66d2db43210
+.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
+.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
+.quad 0x06ca6351e003826f,0x142929670a0e6e70
+.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
+.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+.quad 0x650a73548baf63de,0x766a0abb3c77b2a8
+.quad 0x81c2c92e47edaee6,0x92722c851482353b
+.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
+.quad 0xc24b8b70d0f89791,0xc76c51a30654be30
+.quad 0xd192e819d6ef5218,0xd69906245565a910
+.quad 0xf40e35855771202a,0x106aa07032bbd1b8
+.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
+.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+.quad 0x748f82ee5defb2fc,0x78a5636f43172f60
+.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
+.quad 0x90befffa23631e28,0xa4506cebde82bde9
+.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
+.quad 0xca273eceea26619c,0xd186b8c721c0c207
+.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
+.quad 0x113f9804bef90dae,0x1b710b35131c471b
+.quad 0x28db77f523047d84,0x32caab7b40c72493
+.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
diff --git a/ext/libressl/crypto/sha/sha512-masm-x86_64.S b/ext/libressl/crypto/sha/sha512-masm-x86_64.S
new file mode 100644
index 0000000..4a2b9af
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha512-masm-x86_64.S
@@ -0,0 +1,1888 @@
+; 1 "crypto/sha/sha512-masm-x86_64.S.tmp"
+; 1 "<built-in>" 1
+; 1 "<built-in>" 3
+; 340 "<built-in>" 3
+; 1 "<command line>" 1
+; 1 "<built-in>" 2
+; 1 "crypto/sha/sha512-masm-x86_64.S.tmp" 2
+OPTION DOTNAME
+
+; 1 "./crypto/x86_arch.h" 1
+
+
+; 16 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+; 40 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+; 3 "crypto/sha/sha512-masm-x86_64.S.tmp" 2
+.text$ SEGMENT ALIGN(64) 'CODE'
+
+PUBLIC sha512_block_data_order
+
+ALIGN 16
+sha512_block_data_order PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_sha512_block_data_order::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+
+
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ mov r11,rsp
+ shl rdx,4
+ sub rsp,16*8+4*8
+ lea rdx,QWORD PTR[rdx*8+rsi]
+ and rsp,-64
+ mov QWORD PTR[((128+0))+rsp],rdi
+ mov QWORD PTR[((128+8))+rsp],rsi
+ mov QWORD PTR[((128+16))+rsp],rdx
+ mov QWORD PTR[((128+24))+rsp],r11
+$L$prologue::
+
+ lea rbp,QWORD PTR[K512]
+
+ mov rax,QWORD PTR[rdi]
+ mov rbx,QWORD PTR[8+rdi]
+ mov rcx,QWORD PTR[16+rdi]
+ mov rdx,QWORD PTR[24+rdi]
+ mov r8,QWORD PTR[32+rdi]
+ mov r9,QWORD PTR[40+rdi]
+ mov r10,QWORD PTR[48+rdi]
+ mov r11,QWORD PTR[56+rdi]
+ jmp $L$loop
+
+ALIGN 16
+$L$loop::
+ xor rdi,rdi
+ mov r12,QWORD PTR[rsi]
+ mov r13,r8
+ mov r14,rax
+ bswap r12
+ ror r13,23
+ mov r15,r9
+ mov QWORD PTR[rsp],r12
+
+ ror r14,5
+ xor r13,r8
+ xor r15,r10
+
+ ror r13,4
+ add r12,r11
+ xor r14,rax
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,r8
+ mov r11,rbx
+
+ ror r14,6
+ xor r13,r8
+ xor r15,r10
+
+ xor r11,rcx
+ xor r14,rax
+ add r12,r15
+ mov r15,rbx
+
+ ror r13,14
+ and r11,rax
+ and r15,rcx
+
+ ror r14,28
+ add r12,r13
+ add r11,r15
+
+ add rdx,r12
+ add r11,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add r11,r14
+
+ mov r12,QWORD PTR[8+rsi]
+ mov r13,rdx
+ mov r14,r11
+ bswap r12
+ ror r13,23
+ mov r15,r8
+ mov QWORD PTR[8+rsp],r12
+
+ ror r14,5
+ xor r13,rdx
+ xor r15,r9
+
+ ror r13,4
+ add r12,r10
+ xor r14,r11
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,rdx
+ mov r10,rax
+
+ ror r14,6
+ xor r13,rdx
+ xor r15,r9
+
+ xor r10,rbx
+ xor r14,r11
+ add r12,r15
+ mov r15,rax
+
+ ror r13,14
+ and r10,r11
+ and r15,rbx
+
+ ror r14,28
+ add r12,r13
+ add r10,r15
+
+ add rcx,r12
+ add r10,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add r10,r14
+
+ mov r12,QWORD PTR[16+rsi]
+ mov r13,rcx
+ mov r14,r10
+ bswap r12
+ ror r13,23
+ mov r15,rdx
+ mov QWORD PTR[16+rsp],r12
+
+ ror r14,5
+ xor r13,rcx
+ xor r15,r8
+
+ ror r13,4
+ add r12,r9
+ xor r14,r10
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,rcx
+ mov r9,r11
+
+ ror r14,6
+ xor r13,rcx
+ xor r15,r8
+
+ xor r9,rax
+ xor r14,r10
+ add r12,r15
+ mov r15,r11
+
+ ror r13,14
+ and r9,r10
+ and r15,rax
+
+ ror r14,28
+ add r12,r13
+ add r9,r15
+
+ add rbx,r12
+ add r9,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add r9,r14
+
+ mov r12,QWORD PTR[24+rsi]
+ mov r13,rbx
+ mov r14,r9
+ bswap r12
+ ror r13,23
+ mov r15,rcx
+ mov QWORD PTR[24+rsp],r12
+
+ ror r14,5
+ xor r13,rbx
+ xor r15,rdx
+
+ ror r13,4
+ add r12,r8
+ xor r14,r9
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,rbx
+ mov r8,r10
+
+ ror r14,6
+ xor r13,rbx
+ xor r15,rdx
+
+ xor r8,r11
+ xor r14,r9
+ add r12,r15
+ mov r15,r10
+
+ ror r13,14
+ and r8,r9
+ and r15,r11
+
+ ror r14,28
+ add r12,r13
+ add r8,r15
+
+ add rax,r12
+ add r8,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add r8,r14
+
+ mov r12,QWORD PTR[32+rsi]
+ mov r13,rax
+ mov r14,r8
+ bswap r12
+ ror r13,23
+ mov r15,rbx
+ mov QWORD PTR[32+rsp],r12
+
+ ror r14,5
+ xor r13,rax
+ xor r15,rcx
+
+ ror r13,4
+ add r12,rdx
+ xor r14,r8
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,rax
+ mov rdx,r9
+
+ ror r14,6
+ xor r13,rax
+ xor r15,rcx
+
+ xor rdx,r10
+ xor r14,r8
+ add r12,r15
+ mov r15,r9
+
+ ror r13,14
+ and rdx,r8
+ and r15,r10
+
+ ror r14,28
+ add r12,r13
+ add rdx,r15
+
+ add r11,r12
+ add rdx,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add rdx,r14
+
+ mov r12,QWORD PTR[40+rsi]
+ mov r13,r11
+ mov r14,rdx
+ bswap r12
+ ror r13,23
+ mov r15,rax
+ mov QWORD PTR[40+rsp],r12
+
+ ror r14,5
+ xor r13,r11
+ xor r15,rbx
+
+ ror r13,4
+ add r12,rcx
+ xor r14,rdx
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,r11
+ mov rcx,r8
+
+ ror r14,6
+ xor r13,r11
+ xor r15,rbx
+
+ xor rcx,r9
+ xor r14,rdx
+ add r12,r15
+ mov r15,r8
+
+ ror r13,14
+ and rcx,rdx
+ and r15,r9
+
+ ror r14,28
+ add r12,r13
+ add rcx,r15
+
+ add r10,r12
+ add rcx,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add rcx,r14
+
+ mov r12,QWORD PTR[48+rsi]
+ mov r13,r10
+ mov r14,rcx
+ bswap r12
+ ror r13,23
+ mov r15,r11
+ mov QWORD PTR[48+rsp],r12
+
+ ror r14,5
+ xor r13,r10
+ xor r15,rax
+
+ ror r13,4
+ add r12,rbx
+ xor r14,rcx
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,r10
+ mov rbx,rdx
+
+ ror r14,6
+ xor r13,r10
+ xor r15,rax
+
+ xor rbx,r8
+ xor r14,rcx
+ add r12,r15
+ mov r15,rdx
+
+ ror r13,14
+ and rbx,rcx
+ and r15,r8
+
+ ror r14,28
+ add r12,r13
+ add rbx,r15
+
+ add r9,r12
+ add rbx,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add rbx,r14
+
+ mov r12,QWORD PTR[56+rsi]
+ mov r13,r9
+ mov r14,rbx
+ bswap r12
+ ror r13,23
+ mov r15,r10
+ mov QWORD PTR[56+rsp],r12
+
+ ror r14,5
+ xor r13,r9
+ xor r15,r11
+
+ ror r13,4
+ add r12,rax
+ xor r14,rbx
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,r9
+ mov rax,rcx
+
+ ror r14,6
+ xor r13,r9
+ xor r15,r11
+
+ xor rax,rdx
+ xor r14,rbx
+ add r12,r15
+ mov r15,rcx
+
+ ror r13,14
+ and rax,rbx
+ and r15,rdx
+
+ ror r14,28
+ add r12,r13
+ add rax,r15
+
+ add r8,r12
+ add rax,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add rax,r14
+
+ mov r12,QWORD PTR[64+rsi]
+ mov r13,r8
+ mov r14,rax
+ bswap r12
+ ror r13,23
+ mov r15,r9
+ mov QWORD PTR[64+rsp],r12
+
+ ror r14,5
+ xor r13,r8
+ xor r15,r10
+
+ ror r13,4
+ add r12,r11
+ xor r14,rax
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,r8
+ mov r11,rbx
+
+ ror r14,6
+ xor r13,r8
+ xor r15,r10
+
+ xor r11,rcx
+ xor r14,rax
+ add r12,r15
+ mov r15,rbx
+
+ ror r13,14
+ and r11,rax
+ and r15,rcx
+
+ ror r14,28
+ add r12,r13
+ add r11,r15
+
+ add rdx,r12
+ add r11,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add r11,r14
+
+ mov r12,QWORD PTR[72+rsi]
+ mov r13,rdx
+ mov r14,r11
+ bswap r12
+ ror r13,23
+ mov r15,r8
+ mov QWORD PTR[72+rsp],r12
+
+ ror r14,5
+ xor r13,rdx
+ xor r15,r9
+
+ ror r13,4
+ add r12,r10
+ xor r14,r11
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,rdx
+ mov r10,rax
+
+ ror r14,6
+ xor r13,rdx
+ xor r15,r9
+
+ xor r10,rbx
+ xor r14,r11
+ add r12,r15
+ mov r15,rax
+
+ ror r13,14
+ and r10,r11
+ and r15,rbx
+
+ ror r14,28
+ add r12,r13
+ add r10,r15
+
+ add rcx,r12
+ add r10,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add r10,r14
+
+ mov r12,QWORD PTR[80+rsi]
+ mov r13,rcx
+ mov r14,r10
+ bswap r12
+ ror r13,23
+ mov r15,rdx
+ mov QWORD PTR[80+rsp],r12
+
+ ror r14,5
+ xor r13,rcx
+ xor r15,r8
+
+ ror r13,4
+ add r12,r9
+ xor r14,r10
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,rcx
+ mov r9,r11
+
+ ror r14,6
+ xor r13,rcx
+ xor r15,r8
+
+ xor r9,rax
+ xor r14,r10
+ add r12,r15
+ mov r15,r11
+
+ ror r13,14
+ and r9,r10
+ and r15,rax
+
+ ror r14,28
+ add r12,r13
+ add r9,r15
+
+ add rbx,r12
+ add r9,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add r9,r14
+
+ mov r12,QWORD PTR[88+rsi]
+ mov r13,rbx
+ mov r14,r9
+ bswap r12
+ ror r13,23
+ mov r15,rcx
+ mov QWORD PTR[88+rsp],r12
+
+ ror r14,5
+ xor r13,rbx
+ xor r15,rdx
+
+ ror r13,4
+ add r12,r8
+ xor r14,r9
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,rbx
+ mov r8,r10
+
+ ror r14,6
+ xor r13,rbx
+ xor r15,rdx
+
+ xor r8,r11
+ xor r14,r9
+ add r12,r15
+ mov r15,r10
+
+ ror r13,14
+ and r8,r9
+ and r15,r11
+
+ ror r14,28
+ add r12,r13
+ add r8,r15
+
+ add rax,r12
+ add r8,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add r8,r14
+
+ mov r12,QWORD PTR[96+rsi]
+ mov r13,rax
+ mov r14,r8
+ bswap r12
+ ror r13,23
+ mov r15,rbx
+ mov QWORD PTR[96+rsp],r12
+
+ ror r14,5
+ xor r13,rax
+ xor r15,rcx
+
+ ror r13,4
+ add r12,rdx
+ xor r14,r8
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,rax
+ mov rdx,r9
+
+ ror r14,6
+ xor r13,rax
+ xor r15,rcx
+
+ xor rdx,r10
+ xor r14,r8
+ add r12,r15
+ mov r15,r9
+
+ ror r13,14
+ and rdx,r8
+ and r15,r10
+
+ ror r14,28
+ add r12,r13
+ add rdx,r15
+
+ add r11,r12
+ add rdx,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add rdx,r14
+
+ mov r12,QWORD PTR[104+rsi]
+ mov r13,r11
+ mov r14,rdx
+ bswap r12
+ ror r13,23
+ mov r15,rax
+ mov QWORD PTR[104+rsp],r12
+
+ ror r14,5
+ xor r13,r11
+ xor r15,rbx
+
+ ror r13,4
+ add r12,rcx
+ xor r14,rdx
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,r11
+ mov rcx,r8
+
+ ror r14,6
+ xor r13,r11
+ xor r15,rbx
+
+ xor rcx,r9
+ xor r14,rdx
+ add r12,r15
+ mov r15,r8
+
+ ror r13,14
+ and rcx,rdx
+ and r15,r9
+
+ ror r14,28
+ add r12,r13
+ add rcx,r15
+
+ add r10,r12
+ add rcx,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add rcx,r14
+
+ mov r12,QWORD PTR[112+rsi]
+ mov r13,r10
+ mov r14,rcx
+ bswap r12
+ ror r13,23
+ mov r15,r11
+ mov QWORD PTR[112+rsp],r12
+
+ ror r14,5
+ xor r13,r10
+ xor r15,rax
+
+ ror r13,4
+ add r12,rbx
+ xor r14,rcx
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,r10
+ mov rbx,rdx
+
+ ror r14,6
+ xor r13,r10
+ xor r15,rax
+
+ xor rbx,r8
+ xor r14,rcx
+ add r12,r15
+ mov r15,rdx
+
+ ror r13,14
+ and rbx,rcx
+ and r15,r8
+
+ ror r14,28
+ add r12,r13
+ add rbx,r15
+
+ add r9,r12
+ add rbx,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add rbx,r14
+
+ mov r12,QWORD PTR[120+rsi]
+ mov r13,r9
+ mov r14,rbx
+ bswap r12
+ ror r13,23
+ mov r15,r10
+ mov QWORD PTR[120+rsp],r12
+
+ ror r14,5
+ xor r13,r9
+ xor r15,r11
+
+ ror r13,4
+ add r12,rax
+ xor r14,rbx
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,r9
+ mov rax,rcx
+
+ ror r14,6
+ xor r13,r9
+ xor r15,r11
+
+ xor rax,rdx
+ xor r14,rbx
+ add r12,r15
+ mov r15,rcx
+
+ ror r13,14
+ and rax,rbx
+ and r15,rdx
+
+ ror r14,28
+ add r12,r13
+ add rax,r15
+
+ add r8,r12
+ add rax,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add rax,r14
+
+ jmp $L$rounds_16_xx
+ALIGN 16
+$L$rounds_16_xx::
+ mov r13,QWORD PTR[8+rsp]
+ mov r14,QWORD PTR[112+rsp]
+ mov r12,r13
+ mov r15,r14
+
+ ror r12,7
+ xor r12,r13
+ shr r13,7
+
+ ror r12,1
+ xor r13,r12
+ mov r12,QWORD PTR[72+rsp]
+
+ ror r15,42
+ xor r15,r14
+ shr r14,6
+
+ ror r15,19
+ add r12,r13
+ xor r14,r15
+
+ add r12,QWORD PTR[rsp]
+ mov r13,r8
+ add r12,r14
+ mov r14,rax
+ ror r13,23
+ mov r15,r9
+ mov QWORD PTR[rsp],r12
+
+ ror r14,5
+ xor r13,r8
+ xor r15,r10
+
+ ror r13,4
+ add r12,r11
+ xor r14,rax
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,r8
+ mov r11,rbx
+
+ ror r14,6
+ xor r13,r8
+ xor r15,r10
+
+ xor r11,rcx
+ xor r14,rax
+ add r12,r15
+ mov r15,rbx
+
+ ror r13,14
+ and r11,rax
+ and r15,rcx
+
+ ror r14,28
+ add r12,r13
+ add r11,r15
+
+ add rdx,r12
+ add r11,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add r11,r14
+
+ mov r13,QWORD PTR[16+rsp]
+ mov r14,QWORD PTR[120+rsp]
+ mov r12,r13
+ mov r15,r14
+
+ ror r12,7
+ xor r12,r13
+ shr r13,7
+
+ ror r12,1
+ xor r13,r12
+ mov r12,QWORD PTR[80+rsp]
+
+ ror r15,42
+ xor r15,r14
+ shr r14,6
+
+ ror r15,19
+ add r12,r13
+ xor r14,r15
+
+ add r12,QWORD PTR[8+rsp]
+ mov r13,rdx
+ add r12,r14
+ mov r14,r11
+ ror r13,23
+ mov r15,r8
+ mov QWORD PTR[8+rsp],r12
+
+ ror r14,5
+ xor r13,rdx
+ xor r15,r9
+
+ ror r13,4
+ add r12,r10
+ xor r14,r11
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,rdx
+ mov r10,rax
+
+ ror r14,6
+ xor r13,rdx
+ xor r15,r9
+
+ xor r10,rbx
+ xor r14,r11
+ add r12,r15
+ mov r15,rax
+
+ ror r13,14
+ and r10,r11
+ and r15,rbx
+
+ ror r14,28
+ add r12,r13
+ add r10,r15
+
+ add rcx,r12
+ add r10,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add r10,r14
+
+ mov r13,QWORD PTR[24+rsp]
+ mov r14,QWORD PTR[rsp]
+ mov r12,r13
+ mov r15,r14
+
+ ror r12,7
+ xor r12,r13
+ shr r13,7
+
+ ror r12,1
+ xor r13,r12
+ mov r12,QWORD PTR[88+rsp]
+
+ ror r15,42
+ xor r15,r14
+ shr r14,6
+
+ ror r15,19
+ add r12,r13
+ xor r14,r15
+
+ add r12,QWORD PTR[16+rsp]
+ mov r13,rcx
+ add r12,r14
+ mov r14,r10
+ ror r13,23
+ mov r15,rdx
+ mov QWORD PTR[16+rsp],r12
+
+ ror r14,5
+ xor r13,rcx
+ xor r15,r8
+
+ ror r13,4
+ add r12,r9
+ xor r14,r10
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,rcx
+ mov r9,r11
+
+ ror r14,6
+ xor r13,rcx
+ xor r15,r8
+
+ xor r9,rax
+ xor r14,r10
+ add r12,r15
+ mov r15,r11
+
+ ror r13,14
+ and r9,r10
+ and r15,rax
+
+ ror r14,28
+ add r12,r13
+ add r9,r15
+
+ add rbx,r12
+ add r9,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add r9,r14
+
+ mov r13,QWORD PTR[32+rsp]
+ mov r14,QWORD PTR[8+rsp]
+ mov r12,r13
+ mov r15,r14
+
+ ror r12,7
+ xor r12,r13
+ shr r13,7
+
+ ror r12,1
+ xor r13,r12
+ mov r12,QWORD PTR[96+rsp]
+
+ ror r15,42
+ xor r15,r14
+ shr r14,6
+
+ ror r15,19
+ add r12,r13
+ xor r14,r15
+
+ add r12,QWORD PTR[24+rsp]
+ mov r13,rbx
+ add r12,r14
+ mov r14,r9
+ ror r13,23
+ mov r15,rcx
+ mov QWORD PTR[24+rsp],r12
+
+ ror r14,5
+ xor r13,rbx
+ xor r15,rdx
+
+ ror r13,4
+ add r12,r8
+ xor r14,r9
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,rbx
+ mov r8,r10
+
+ ror r14,6
+ xor r13,rbx
+ xor r15,rdx
+
+ xor r8,r11
+ xor r14,r9
+ add r12,r15
+ mov r15,r10
+
+ ror r13,14
+ and r8,r9
+ and r15,r11
+
+ ror r14,28
+ add r12,r13
+ add r8,r15
+
+ add rax,r12
+ add r8,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add r8,r14
+
+ mov r13,QWORD PTR[40+rsp]
+ mov r14,QWORD PTR[16+rsp]
+ mov r12,r13
+ mov r15,r14
+
+ ror r12,7
+ xor r12,r13
+ shr r13,7
+
+ ror r12,1
+ xor r13,r12
+ mov r12,QWORD PTR[104+rsp]
+
+ ror r15,42
+ xor r15,r14
+ shr r14,6
+
+ ror r15,19
+ add r12,r13
+ xor r14,r15
+
+ add r12,QWORD PTR[32+rsp]
+ mov r13,rax
+ add r12,r14
+ mov r14,r8
+ ror r13,23
+ mov r15,rbx
+ mov QWORD PTR[32+rsp],r12
+
+ ror r14,5
+ xor r13,rax
+ xor r15,rcx
+
+ ror r13,4
+ add r12,rdx
+ xor r14,r8
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,rax
+ mov rdx,r9
+
+ ror r14,6
+ xor r13,rax
+ xor r15,rcx
+
+ xor rdx,r10
+ xor r14,r8
+ add r12,r15
+ mov r15,r9
+
+ ror r13,14
+ and rdx,r8
+ and r15,r10
+
+ ror r14,28
+ add r12,r13
+ add rdx,r15
+
+ add r11,r12
+ add rdx,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add rdx,r14
+
+ mov r13,QWORD PTR[48+rsp]
+ mov r14,QWORD PTR[24+rsp]
+ mov r12,r13
+ mov r15,r14
+
+ ror r12,7
+ xor r12,r13
+ shr r13,7
+
+ ror r12,1
+ xor r13,r12
+ mov r12,QWORD PTR[112+rsp]
+
+ ror r15,42
+ xor r15,r14
+ shr r14,6
+
+ ror r15,19
+ add r12,r13
+ xor r14,r15
+
+ add r12,QWORD PTR[40+rsp]
+ mov r13,r11
+ add r12,r14
+ mov r14,rdx
+ ror r13,23
+ mov r15,rax
+ mov QWORD PTR[40+rsp],r12
+
+ ror r14,5
+ xor r13,r11
+ xor r15,rbx
+
+ ror r13,4
+ add r12,rcx
+ xor r14,rdx
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,r11
+ mov rcx,r8
+
+ ror r14,6
+ xor r13,r11
+ xor r15,rbx
+
+ xor rcx,r9
+ xor r14,rdx
+ add r12,r15
+ mov r15,r8
+
+ ror r13,14
+ and rcx,rdx
+ and r15,r9
+
+ ror r14,28
+ add r12,r13
+ add rcx,r15
+
+ add r10,r12
+ add rcx,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add rcx,r14
+
+ mov r13,QWORD PTR[56+rsp]
+ mov r14,QWORD PTR[32+rsp]
+ mov r12,r13
+ mov r15,r14
+
+ ror r12,7
+ xor r12,r13
+ shr r13,7
+
+ ror r12,1
+ xor r13,r12
+ mov r12,QWORD PTR[120+rsp]
+
+ ror r15,42
+ xor r15,r14
+ shr r14,6
+
+ ror r15,19
+ add r12,r13
+ xor r14,r15
+
+ add r12,QWORD PTR[48+rsp]
+ mov r13,r10
+ add r12,r14
+ mov r14,rcx
+ ror r13,23
+ mov r15,r11
+ mov QWORD PTR[48+rsp],r12
+
+ ror r14,5
+ xor r13,r10
+ xor r15,rax
+
+ ror r13,4
+ add r12,rbx
+ xor r14,rcx
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,r10
+ mov rbx,rdx
+
+ ror r14,6
+ xor r13,r10
+ xor r15,rax
+
+ xor rbx,r8
+ xor r14,rcx
+ add r12,r15
+ mov r15,rdx
+
+ ror r13,14
+ and rbx,rcx
+ and r15,r8
+
+ ror r14,28
+ add r12,r13
+ add rbx,r15
+
+ add r9,r12
+ add rbx,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add rbx,r14
+
+ mov r13,QWORD PTR[64+rsp]
+ mov r14,QWORD PTR[40+rsp]
+ mov r12,r13
+ mov r15,r14
+
+ ror r12,7
+ xor r12,r13
+ shr r13,7
+
+ ror r12,1
+ xor r13,r12
+ mov r12,QWORD PTR[rsp]
+
+ ror r15,42
+ xor r15,r14
+ shr r14,6
+
+ ror r15,19
+ add r12,r13
+ xor r14,r15
+
+ add r12,QWORD PTR[56+rsp]
+ mov r13,r9
+ add r12,r14
+ mov r14,rbx
+ ror r13,23
+ mov r15,r10
+ mov QWORD PTR[56+rsp],r12
+
+ ror r14,5
+ xor r13,r9
+ xor r15,r11
+
+ ror r13,4
+ add r12,rax
+ xor r14,rbx
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,r9
+ mov rax,rcx
+
+ ror r14,6
+ xor r13,r9
+ xor r15,r11
+
+ xor rax,rdx
+ xor r14,rbx
+ add r12,r15
+ mov r15,rcx
+
+ ror r13,14
+ and rax,rbx
+ and r15,rdx
+
+ ror r14,28
+ add r12,r13
+ add rax,r15
+
+ add r8,r12
+ add rax,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add rax,r14
+
+ mov r13,QWORD PTR[72+rsp]
+ mov r14,QWORD PTR[48+rsp]
+ mov r12,r13
+ mov r15,r14
+
+ ror r12,7
+ xor r12,r13
+ shr r13,7
+
+ ror r12,1
+ xor r13,r12
+ mov r12,QWORD PTR[8+rsp]
+
+ ror r15,42
+ xor r15,r14
+ shr r14,6
+
+ ror r15,19
+ add r12,r13
+ xor r14,r15
+
+ add r12,QWORD PTR[64+rsp]
+ mov r13,r8
+ add r12,r14
+ mov r14,rax
+ ror r13,23
+ mov r15,r9
+ mov QWORD PTR[64+rsp],r12
+
+ ror r14,5
+ xor r13,r8
+ xor r15,r10
+
+ ror r13,4
+ add r12,r11
+ xor r14,rax
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,r8
+ mov r11,rbx
+
+ ror r14,6
+ xor r13,r8
+ xor r15,r10
+
+ xor r11,rcx
+ xor r14,rax
+ add r12,r15
+ mov r15,rbx
+
+ ror r13,14
+ and r11,rax
+ and r15,rcx
+
+ ror r14,28
+ add r12,r13
+ add r11,r15
+
+ add rdx,r12
+ add r11,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add r11,r14
+
+ mov r13,QWORD PTR[80+rsp]
+ mov r14,QWORD PTR[56+rsp]
+ mov r12,r13
+ mov r15,r14
+
+ ror r12,7
+ xor r12,r13
+ shr r13,7
+
+ ror r12,1
+ xor r13,r12
+ mov r12,QWORD PTR[16+rsp]
+
+ ror r15,42
+ xor r15,r14
+ shr r14,6
+
+ ror r15,19
+ add r12,r13
+ xor r14,r15
+
+ add r12,QWORD PTR[72+rsp]
+ mov r13,rdx
+ add r12,r14
+ mov r14,r11
+ ror r13,23
+ mov r15,r8
+ mov QWORD PTR[72+rsp],r12
+
+ ror r14,5
+ xor r13,rdx
+ xor r15,r9
+
+ ror r13,4
+ add r12,r10
+ xor r14,r11
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,rdx
+ mov r10,rax
+
+ ror r14,6
+ xor r13,rdx
+ xor r15,r9
+
+ xor r10,rbx
+ xor r14,r11
+ add r12,r15
+ mov r15,rax
+
+ ror r13,14
+ and r10,r11
+ and r15,rbx
+
+ ror r14,28
+ add r12,r13
+ add r10,r15
+
+ add rcx,r12
+ add r10,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add r10,r14
+
+ mov r13,QWORD PTR[88+rsp]
+ mov r14,QWORD PTR[64+rsp]
+ mov r12,r13
+ mov r15,r14
+
+ ror r12,7
+ xor r12,r13
+ shr r13,7
+
+ ror r12,1
+ xor r13,r12
+ mov r12,QWORD PTR[24+rsp]
+
+ ror r15,42
+ xor r15,r14
+ shr r14,6
+
+ ror r15,19
+ add r12,r13
+ xor r14,r15
+
+ add r12,QWORD PTR[80+rsp]
+ mov r13,rcx
+ add r12,r14
+ mov r14,r10
+ ror r13,23
+ mov r15,rdx
+ mov QWORD PTR[80+rsp],r12
+
+ ror r14,5
+ xor r13,rcx
+ xor r15,r8
+
+ ror r13,4
+ add r12,r9
+ xor r14,r10
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,rcx
+ mov r9,r11
+
+ ror r14,6
+ xor r13,rcx
+ xor r15,r8
+
+ xor r9,rax
+ xor r14,r10
+ add r12,r15
+ mov r15,r11
+
+ ror r13,14
+ and r9,r10
+ and r15,rax
+
+ ror r14,28
+ add r12,r13
+ add r9,r15
+
+ add rbx,r12
+ add r9,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add r9,r14
+
+ mov r13,QWORD PTR[96+rsp]
+ mov r14,QWORD PTR[72+rsp]
+ mov r12,r13
+ mov r15,r14
+
+ ror r12,7
+ xor r12,r13
+ shr r13,7
+
+ ror r12,1
+ xor r13,r12
+ mov r12,QWORD PTR[32+rsp]
+
+ ror r15,42
+ xor r15,r14
+ shr r14,6
+
+ ror r15,19
+ add r12,r13
+ xor r14,r15
+
+ add r12,QWORD PTR[88+rsp]
+ mov r13,rbx
+ add r12,r14
+ mov r14,r9
+ ror r13,23
+ mov r15,rcx
+ mov QWORD PTR[88+rsp],r12
+
+ ror r14,5
+ xor r13,rbx
+ xor r15,rdx
+
+ ror r13,4
+ add r12,r8
+ xor r14,r9
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,rbx
+ mov r8,r10
+
+ ror r14,6
+ xor r13,rbx
+ xor r15,rdx
+
+ xor r8,r11
+ xor r14,r9
+ add r12,r15
+ mov r15,r10
+
+ ror r13,14
+ and r8,r9
+ and r15,r11
+
+ ror r14,28
+ add r12,r13
+ add r8,r15
+
+ add rax,r12
+ add r8,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add r8,r14
+
+ mov r13,QWORD PTR[104+rsp]
+ mov r14,QWORD PTR[80+rsp]
+ mov r12,r13
+ mov r15,r14
+
+ ror r12,7
+ xor r12,r13
+ shr r13,7
+
+ ror r12,1
+ xor r13,r12
+ mov r12,QWORD PTR[40+rsp]
+
+ ror r15,42
+ xor r15,r14
+ shr r14,6
+
+ ror r15,19
+ add r12,r13
+ xor r14,r15
+
+ add r12,QWORD PTR[96+rsp]
+ mov r13,rax
+ add r12,r14
+ mov r14,r8
+ ror r13,23
+ mov r15,rbx
+ mov QWORD PTR[96+rsp],r12
+
+ ror r14,5
+ xor r13,rax
+ xor r15,rcx
+
+ ror r13,4
+ add r12,rdx
+ xor r14,r8
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,rax
+ mov rdx,r9
+
+ ror r14,6
+ xor r13,rax
+ xor r15,rcx
+
+ xor rdx,r10
+ xor r14,r8
+ add r12,r15
+ mov r15,r9
+
+ ror r13,14
+ and rdx,r8
+ and r15,r10
+
+ ror r14,28
+ add r12,r13
+ add rdx,r15
+
+ add r11,r12
+ add rdx,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add rdx,r14
+
+ mov r13,QWORD PTR[112+rsp]
+ mov r14,QWORD PTR[88+rsp]
+ mov r12,r13
+ mov r15,r14
+
+ ror r12,7
+ xor r12,r13
+ shr r13,7
+
+ ror r12,1
+ xor r13,r12
+ mov r12,QWORD PTR[48+rsp]
+
+ ror r15,42
+ xor r15,r14
+ shr r14,6
+
+ ror r15,19
+ add r12,r13
+ xor r14,r15
+
+ add r12,QWORD PTR[104+rsp]
+ mov r13,r11
+ add r12,r14
+ mov r14,rdx
+ ror r13,23
+ mov r15,rax
+ mov QWORD PTR[104+rsp],r12
+
+ ror r14,5
+ xor r13,r11
+ xor r15,rbx
+
+ ror r13,4
+ add r12,rcx
+ xor r14,rdx
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,r11
+ mov rcx,r8
+
+ ror r14,6
+ xor r13,r11
+ xor r15,rbx
+
+ xor rcx,r9
+ xor r14,rdx
+ add r12,r15
+ mov r15,r8
+
+ ror r13,14
+ and rcx,rdx
+ and r15,r9
+
+ ror r14,28
+ add r12,r13
+ add rcx,r15
+
+ add r10,r12
+ add rcx,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add rcx,r14
+
+ mov r13,QWORD PTR[120+rsp]
+ mov r14,QWORD PTR[96+rsp]
+ mov r12,r13
+ mov r15,r14
+
+ ror r12,7
+ xor r12,r13
+ shr r13,7
+
+ ror r12,1
+ xor r13,r12
+ mov r12,QWORD PTR[56+rsp]
+
+ ror r15,42
+ xor r15,r14
+ shr r14,6
+
+ ror r15,19
+ add r12,r13
+ xor r14,r15
+
+ add r12,QWORD PTR[112+rsp]
+ mov r13,r10
+ add r12,r14
+ mov r14,rcx
+ ror r13,23
+ mov r15,r11
+ mov QWORD PTR[112+rsp],r12
+
+ ror r14,5
+ xor r13,r10
+ xor r15,rax
+
+ ror r13,4
+ add r12,rbx
+ xor r14,rcx
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,r10
+ mov rbx,rdx
+
+ ror r14,6
+ xor r13,r10
+ xor r15,rax
+
+ xor rbx,r8
+ xor r14,rcx
+ add r12,r15
+ mov r15,rdx
+
+ ror r13,14
+ and rbx,rcx
+ and r15,r8
+
+ ror r14,28
+ add r12,r13
+ add rbx,r15
+
+ add r9,r12
+ add rbx,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add rbx,r14
+
+ mov r13,QWORD PTR[rsp]
+ mov r14,QWORD PTR[104+rsp]
+ mov r12,r13
+ mov r15,r14
+
+ ror r12,7
+ xor r12,r13
+ shr r13,7
+
+ ror r12,1
+ xor r13,r12
+ mov r12,QWORD PTR[64+rsp]
+
+ ror r15,42
+ xor r15,r14
+ shr r14,6
+
+ ror r15,19
+ add r12,r13
+ xor r14,r15
+
+ add r12,QWORD PTR[120+rsp]
+ mov r13,r9
+ add r12,r14
+ mov r14,rbx
+ ror r13,23
+ mov r15,r10
+ mov QWORD PTR[120+rsp],r12
+
+ ror r14,5
+ xor r13,r9
+ xor r15,r11
+
+ ror r13,4
+ add r12,rax
+ xor r14,rbx
+
+ add r12,QWORD PTR[rdi*8+rbp]
+ and r15,r9
+ mov rax,rcx
+
+ ror r14,6
+ xor r13,r9
+ xor r15,r11
+
+ xor rax,rdx
+ xor r14,rbx
+ add r12,r15
+ mov r15,rcx
+
+ ror r13,14
+ and rax,rbx
+ and r15,rdx
+
+ ror r14,28
+ add r12,r13
+ add rax,r15
+
+ add r8,r12
+ add rax,r12
+ lea rdi,QWORD PTR[1+rdi]
+ add rax,r14
+
+ cmp rdi,80
+ jb $L$rounds_16_xx
+
+ mov rdi,QWORD PTR[((128+0))+rsp]
+ lea rsi,QWORD PTR[128+rsi]
+
+ add rax,QWORD PTR[rdi]
+ add rbx,QWORD PTR[8+rdi]
+ add rcx,QWORD PTR[16+rdi]
+ add rdx,QWORD PTR[24+rdi]
+ add r8,QWORD PTR[32+rdi]
+ add r9,QWORD PTR[40+rdi]
+ add r10,QWORD PTR[48+rdi]
+ add r11,QWORD PTR[56+rdi]
+
+ cmp rsi,QWORD PTR[((128+16))+rsp]
+
+ mov QWORD PTR[rdi],rax
+ mov QWORD PTR[8+rdi],rbx
+ mov QWORD PTR[16+rdi],rcx
+ mov QWORD PTR[24+rdi],rdx
+ mov QWORD PTR[32+rdi],r8
+ mov QWORD PTR[40+rdi],r9
+ mov QWORD PTR[48+rdi],r10
+ mov QWORD PTR[56+rdi],r11
+ jb $L$loop
+
+ mov rsi,QWORD PTR[((128+24))+rsp]
+ mov r15,QWORD PTR[rsi]
+ mov r14,QWORD PTR[8+rsi]
+ mov r13,QWORD PTR[16+rsi]
+ mov r12,QWORD PTR[24+rsi]
+ mov rbp,QWORD PTR[32+rsi]
+ mov rbx,QWORD PTR[40+rsi]
+ lea rsp,QWORD PTR[48+rsi]
+$L$epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_sha512_block_data_order::
+sha512_block_data_order ENDP
+ALIGN 64
+
+K512::
+ DQ 0428a2f98d728ae22h,07137449123ef65cdh
+ DQ 0b5c0fbcfec4d3b2fh,0e9b5dba58189dbbch
+ DQ 03956c25bf348b538h,059f111f1b605d019h
+ DQ 0923f82a4af194f9bh,0ab1c5ed5da6d8118h
+ DQ 0d807aa98a3030242h,012835b0145706fbeh
+ DQ 0243185be4ee4b28ch,0550c7dc3d5ffb4e2h
+ DQ 072be5d74f27b896fh,080deb1fe3b1696b1h
+ DQ 09bdc06a725c71235h,0c19bf174cf692694h
+ DQ 0e49b69c19ef14ad2h,0efbe4786384f25e3h
+ DQ 00fc19dc68b8cd5b5h,0240ca1cc77ac9c65h
+ DQ 02de92c6f592b0275h,04a7484aa6ea6e483h
+ DQ 05cb0a9dcbd41fbd4h,076f988da831153b5h
+ DQ 0983e5152ee66dfabh,0a831c66d2db43210h
+ DQ 0b00327c898fb213fh,0bf597fc7beef0ee4h
+ DQ 0c6e00bf33da88fc2h,0d5a79147930aa725h
+ DQ 006ca6351e003826fh,0142929670a0e6e70h
+ DQ 027b70a8546d22ffch,02e1b21385c26c926h
+ DQ 04d2c6dfc5ac42aedh,053380d139d95b3dfh
+ DQ 0650a73548baf63deh,0766a0abb3c77b2a8h
+ DQ 081c2c92e47edaee6h,092722c851482353bh
+ DQ 0a2bfe8a14cf10364h,0a81a664bbc423001h
+ DQ 0c24b8b70d0f89791h,0c76c51a30654be30h
+ DQ 0d192e819d6ef5218h,0d69906245565a910h
+ DQ 0f40e35855771202ah,0106aa07032bbd1b8h
+ DQ 019a4c116b8d2d0c8h,01e376c085141ab53h
+ DQ 02748774cdf8eeb99h,034b0bcb5e19b48a8h
+ DQ 0391c0cb3c5c95a63h,04ed8aa4ae3418acbh
+ DQ 05b9cca4f7763e373h,0682e6ff3d6b2b8a3h
+ DQ 0748f82ee5defb2fch,078a5636f43172f60h
+ DQ 084c87814a1f0ab72h,08cc702081a6439ech
+ DQ 090befffa23631e28h,0a4506cebde82bde9h
+ DQ 0bef9a3f7b2c67915h,0c67178f2e372532bh
+ DQ 0ca273eceea26619ch,0d186b8c721c0c207h
+ DQ 0eada7dd6cde0eb1eh,0f57d4f7fee6ed178h
+ DQ 006f067aa72176fbah,00a637dc5a2c898a6h
+ DQ 0113f9804bef90daeh,01b710b35131c471bh
+ DQ 028db77f523047d84h,032caab7b40c72493h
+ DQ 03c9ebe0a15c9bebch,0431d67c49c100d4ch
+ DQ 04cc5d4becb3e42b6h,0597f299cfc657e2ah
+ DQ 05fcb6fab3ad6faech,06c44198c4a475817h
+
+.text$ ENDS
+END
+
diff --git a/ext/libressl/crypto/sha/sha512-mingw64-x86_64.S b/ext/libressl/crypto/sha/sha512-mingw64-x86_64.S
new file mode 100644
index 0000000..5153952
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha512-mingw64-x86_64.S
@@ -0,0 +1,1814 @@
+#include "x86_arch.h"
+.text
+
+.globl sha512_block_data_order
+.def sha512_block_data_order; .scl 2; .type 32; .endef
+.p2align 4
+sha512_block_data_order:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_sha512_block_data_order:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r11
+ shlq $4,%rdx
+ subq $128+32,%rsp
+ leaq (%rsi,%rdx,8),%rdx
+ andq $-64,%rsp
+ movq %rdi,128+0(%rsp)
+ movq %rsi,128+8(%rsp)
+ movq %rdx,128+16(%rsp)
+ movq %r11,128+24(%rsp)
+.Lprologue:
+
+ leaq K512(%rip),%rbp
+
+ movq 0(%rdi),%rax
+ movq 8(%rdi),%rbx
+ movq 16(%rdi),%rcx
+ movq 24(%rdi),%rdx
+ movq 32(%rdi),%r8
+ movq 40(%rdi),%r9
+ movq 48(%rdi),%r10
+ movq 56(%rdi),%r11
+ jmp .Lloop
+
+.p2align 4
+.Lloop:
+ xorq %rdi,%rdi
+ movq 0(%rsi),%r12
+ movq %r8,%r13
+ movq %rax,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,0(%rsp)
+
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
+
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
+
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
+
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 1(%rdi),%rdi
+ addq %r14,%r11
+
+ movq 8(%rsi),%r12
+ movq %rdx,%r13
+ movq %r11,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,8(%rsp)
+
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
+
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
+
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
+
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 1(%rdi),%rdi
+ addq %r14,%r10
+
+ movq 16(%rsi),%r12
+ movq %rcx,%r13
+ movq %r10,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,16(%rsp)
+
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
+
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
+
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
+
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 1(%rdi),%rdi
+ addq %r14,%r9
+
+ movq 24(%rsi),%r12
+ movq %rbx,%r13
+ movq %r9,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,24(%rsp)
+
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
+
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
+
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
+
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 1(%rdi),%rdi
+ addq %r14,%r8
+
+ movq 32(%rsi),%r12
+ movq %rax,%r13
+ movq %r8,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,32(%rsp)
+
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
+
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
+
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
+
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rdx
+
+ movq 40(%rsi),%r12
+ movq %r11,%r13
+ movq %rdx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,40(%rsp)
+
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
+
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
+
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
+
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rcx
+
+ movq 48(%rsi),%r12
+ movq %r10,%r13
+ movq %rcx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,48(%rsp)
+
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
+
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
+
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
+
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rbx
+
+ movq 56(%rsi),%r12
+ movq %r9,%r13
+ movq %rbx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,56(%rsp)
+
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
+
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
+
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
+
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 1(%rdi),%rdi
+ addq %r14,%rax
+
+ movq 64(%rsi),%r12
+ movq %r8,%r13
+ movq %rax,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,64(%rsp)
+
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
+
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
+
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
+
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 1(%rdi),%rdi
+ addq %r14,%r11
+
+ movq 72(%rsi),%r12
+ movq %rdx,%r13
+ movq %r11,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,72(%rsp)
+
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
+
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
+
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
+
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 1(%rdi),%rdi
+ addq %r14,%r10
+
+ movq 80(%rsi),%r12
+ movq %rcx,%r13
+ movq %r10,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,80(%rsp)
+
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
+
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
+
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
+
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 1(%rdi),%rdi
+ addq %r14,%r9
+
+ movq 88(%rsi),%r12
+ movq %rbx,%r13
+ movq %r9,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,88(%rsp)
+
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
+
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
+
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
+
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 1(%rdi),%rdi
+ addq %r14,%r8
+
+ movq 96(%rsi),%r12
+ movq %rax,%r13
+ movq %r8,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,96(%rsp)
+
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
+
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
+
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
+
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rdx
+
+ movq 104(%rsi),%r12
+ movq %r11,%r13
+ movq %rdx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,104(%rsp)
+
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
+
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
+
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
+
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rcx
+
+ movq 112(%rsi),%r12
+ movq %r10,%r13
+ movq %rcx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,112(%rsp)
+
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
+
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
+
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
+
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rbx
+
+ movq 120(%rsi),%r12
+ movq %r9,%r13
+ movq %rbx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,120(%rsp)
+
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
+
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
+
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
+
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 1(%rdi),%rdi
+ addq %r14,%rax
+
+ jmp .Lrounds_16_xx
+.p2align 4
+.Lrounds_16_xx:
+ movq 8(%rsp),%r13
+ movq 112(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 72(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 0(%rsp),%r12
+ movq %r8,%r13
+ addq %r14,%r12
+ movq %rax,%r14
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,0(%rsp)
+
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
+
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
+
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
+
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 1(%rdi),%rdi
+ addq %r14,%r11
+
+ movq 16(%rsp),%r13
+ movq 120(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 80(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 8(%rsp),%r12
+ movq %rdx,%r13
+ addq %r14,%r12
+ movq %r11,%r14
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,8(%rsp)
+
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
+
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
+
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
+
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 1(%rdi),%rdi
+ addq %r14,%r10
+
+ movq 24(%rsp),%r13
+ movq 0(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 88(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 16(%rsp),%r12
+ movq %rcx,%r13
+ addq %r14,%r12
+ movq %r10,%r14
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,16(%rsp)
+
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
+
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
+
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
+
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 1(%rdi),%rdi
+ addq %r14,%r9
+
+ movq 32(%rsp),%r13
+ movq 8(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 96(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 24(%rsp),%r12
+ movq %rbx,%r13
+ addq %r14,%r12
+ movq %r9,%r14
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,24(%rsp)
+
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
+
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
+
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
+
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 1(%rdi),%rdi
+ addq %r14,%r8
+
+ movq 40(%rsp),%r13
+ movq 16(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 104(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 32(%rsp),%r12
+ movq %rax,%r13
+ addq %r14,%r12
+ movq %r8,%r14
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,32(%rsp)
+
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
+
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
+
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
+
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rdx
+
+ movq 48(%rsp),%r13
+ movq 24(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 112(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 40(%rsp),%r12
+ movq %r11,%r13
+ addq %r14,%r12
+ movq %rdx,%r14
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,40(%rsp)
+
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
+
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
+
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
+
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rcx
+
+ movq 56(%rsp),%r13
+ movq 32(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 120(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 48(%rsp),%r12
+ movq %r10,%r13
+ addq %r14,%r12
+ movq %rcx,%r14
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,48(%rsp)
+
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
+
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
+
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
+
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rbx
+
+ movq 64(%rsp),%r13
+ movq 40(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 0(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 56(%rsp),%r12
+ movq %r9,%r13
+ addq %r14,%r12
+ movq %rbx,%r14
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,56(%rsp)
+
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
+
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
+
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
+
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 1(%rdi),%rdi
+ addq %r14,%rax
+
+ movq 72(%rsp),%r13
+ movq 48(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 8(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 64(%rsp),%r12
+ movq %r8,%r13
+ addq %r14,%r12
+ movq %rax,%r14
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,64(%rsp)
+
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
+
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
+
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
+
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 1(%rdi),%rdi
+ addq %r14,%r11
+
+ movq 80(%rsp),%r13
+ movq 56(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 16(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 72(%rsp),%r12
+ movq %rdx,%r13
+ addq %r14,%r12
+ movq %r11,%r14
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,72(%rsp)
+
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
+
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
+
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
+
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 1(%rdi),%rdi
+ addq %r14,%r10
+
+ movq 88(%rsp),%r13
+ movq 64(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 24(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 80(%rsp),%r12
+ movq %rcx,%r13
+ addq %r14,%r12
+ movq %r10,%r14
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,80(%rsp)
+
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
+
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
+
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
+
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 1(%rdi),%rdi
+ addq %r14,%r9
+
+ movq 96(%rsp),%r13
+ movq 72(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 32(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 88(%rsp),%r12
+ movq %rbx,%r13
+ addq %r14,%r12
+ movq %r9,%r14
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,88(%rsp)
+
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
+
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
+
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
+
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 1(%rdi),%rdi
+ addq %r14,%r8
+
+ movq 104(%rsp),%r13
+ movq 80(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 40(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 96(%rsp),%r12
+ movq %rax,%r13
+ addq %r14,%r12
+ movq %r8,%r14
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,96(%rsp)
+
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
+
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
+
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
+
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rdx
+
+ movq 112(%rsp),%r13
+ movq 88(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 48(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 104(%rsp),%r12
+ movq %r11,%r13
+ addq %r14,%r12
+ movq %rdx,%r14
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,104(%rsp)
+
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
+
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
+
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
+
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rcx
+
+ movq 120(%rsp),%r13
+ movq 96(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 56(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 112(%rsp),%r12
+ movq %r10,%r13
+ addq %r14,%r12
+ movq %rcx,%r14
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,112(%rsp)
+
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
+
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
+
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
+
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rbx
+
+ movq 0(%rsp),%r13
+ movq 104(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 64(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 120(%rsp),%r12
+ movq %r9,%r13
+ addq %r14,%r12
+ movq %rbx,%r14
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,120(%rsp)
+
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
+
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
+
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
+
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 1(%rdi),%rdi
+ addq %r14,%rax
+
+ cmpq $80,%rdi
+ jb .Lrounds_16_xx
+
+ movq 128+0(%rsp),%rdi
+ leaq 128(%rsi),%rsi
+
+ addq 0(%rdi),%rax
+ addq 8(%rdi),%rbx
+ addq 16(%rdi),%rcx
+ addq 24(%rdi),%rdx
+ addq 32(%rdi),%r8
+ addq 40(%rdi),%r9
+ addq 48(%rdi),%r10
+ addq 56(%rdi),%r11
+
+ cmpq 128+16(%rsp),%rsi
+
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,16(%rdi)
+ movq %rdx,24(%rdi)
+ movq %r8,32(%rdi)
+ movq %r9,40(%rdi)
+ movq %r10,48(%rdi)
+ movq %r11,56(%rdi)
+ jb .Lloop
+
+ movq 128+24(%rsp),%rsi
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lepilogue:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_sha512_block_data_order:
+.p2align 6
+
+K512:
+.quad 0x428a2f98d728ae22,0x7137449123ef65cd
+.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+.quad 0x3956c25bf348b538,0x59f111f1b605d019
+.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
+.quad 0xd807aa98a3030242,0x12835b0145706fbe
+.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
+.quad 0x9bdc06a725c71235,0xc19bf174cf692694
+.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
+.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
+.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+.quad 0x983e5152ee66dfab,0xa831c66d2db43210
+.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
+.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
+.quad 0x06ca6351e003826f,0x142929670a0e6e70
+.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
+.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+.quad 0x650a73548baf63de,0x766a0abb3c77b2a8
+.quad 0x81c2c92e47edaee6,0x92722c851482353b
+.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
+.quad 0xc24b8b70d0f89791,0xc76c51a30654be30
+.quad 0xd192e819d6ef5218,0xd69906245565a910
+.quad 0xf40e35855771202a,0x106aa07032bbd1b8
+.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
+.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+.quad 0x748f82ee5defb2fc,0x78a5636f43172f60
+.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
+.quad 0x90befffa23631e28,0xa4506cebde82bde9
+.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
+.quad 0xca273eceea26619c,0xd186b8c721c0c207
+.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
+.quad 0x113f9804bef90dae,0x1b710b35131c471b
+.quad 0x28db77f523047d84,0x32caab7b40c72493
+.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
diff --git a/ext/libressl/crypto/sha/sha512.c b/ext/libressl/crypto/sha/sha512.c
new file mode 100644
index 0000000..6b95cfa
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha512.c
@@ -0,0 +1,547 @@
+/* $OpenBSD: sha512.c,v 1.15 2016/11/04 13:56:05 miod Exp $ */
+/* ====================================================================
+ * Copyright (c) 2004 The OpenSSL Project. All rights reserved
+ * according to the OpenSSL license [found in ../../LICENSE].
+ * ====================================================================
+ */
+
+#include <machine/endian.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <openssl/opensslconf.h>
+
+#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
+/*
+ * IMPLEMENTATION NOTES.
+ *
+ * As you might have noticed 32-bit hash algorithms:
+ *
+ * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
+ * - optimized versions implement two transform functions: one operating
+ * on [aligned] data in host byte order and one - on data in input
+ * stream byte order;
+ * - share common byte-order neutral collector and padding function
+ * implementations, ../md32_common.h;
+ *
+ * Neither of the above applies to this SHA-512 implementations. Reasons
+ * [in reverse order] are:
+ *
+ * - it's the only 64-bit hash algorithm for the moment of this writing,
+ * there is no need for common collector/padding implementation [yet];
+ * - by supporting only one transform function [which operates on
+ * *aligned* data in input stream byte order, big-endian in this case]
+ * we minimize burden of maintenance in two ways: a) collector/padding
+ * function is simpler; b) only one transform function to stare at;
+ * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
+ * apply a number of optimizations to mitigate potential performance
+ * penalties caused by previous design decision;
+ *
+ * Caveat lector.
+ *
+ * Implementation relies on the fact that "long long" is 64-bit on
+ * both 32- and 64-bit platforms. If some compiler vendor comes up
+ * with 128-bit long long, adjustment to sha.h would be required.
+ * As this implementation relies on 64-bit integer type, it's totally
+ * inappropriate for platforms which don't support it, most notably
+ * 16-bit platforms.
+ * <appro@fy.chalmers.se>
+ */
+
+#include <openssl/crypto.h>
+#include <openssl/opensslv.h>
+#include <openssl/sha.h>
+
+#if !defined(__STRICT_ALIGNMENT) || defined(SHA512_ASM)
+#define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
+#endif
+
+int SHA384_Init(SHA512_CTX *c)
+ {
+ c->h[0]=U64(0xcbbb9d5dc1059ed8);
+ c->h[1]=U64(0x629a292a367cd507);
+ c->h[2]=U64(0x9159015a3070dd17);
+ c->h[3]=U64(0x152fecd8f70e5939);
+ c->h[4]=U64(0x67332667ffc00b31);
+ c->h[5]=U64(0x8eb44a8768581511);
+ c->h[6]=U64(0xdb0c2e0d64f98fa7);
+ c->h[7]=U64(0x47b5481dbefa4fa4);
+
+ c->Nl=0; c->Nh=0;
+ c->num=0; c->md_len=SHA384_DIGEST_LENGTH;
+ return 1;
+ }
+
+int SHA512_Init(SHA512_CTX *c)
+ {
+ c->h[0]=U64(0x6a09e667f3bcc908);
+ c->h[1]=U64(0xbb67ae8584caa73b);
+ c->h[2]=U64(0x3c6ef372fe94f82b);
+ c->h[3]=U64(0xa54ff53a5f1d36f1);
+ c->h[4]=U64(0x510e527fade682d1);
+ c->h[5]=U64(0x9b05688c2b3e6c1f);
+ c->h[6]=U64(0x1f83d9abfb41bd6b);
+ c->h[7]=U64(0x5be0cd19137e2179);
+
+ c->Nl=0; c->Nh=0;
+ c->num=0; c->md_len=SHA512_DIGEST_LENGTH;
+ return 1;
+ }
+
+#ifndef SHA512_ASM
+static
+#endif
+void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num);
+
+int SHA512_Final (unsigned char *md, SHA512_CTX *c)
+ {
+ unsigned char *p=(unsigned char *)c->u.p;
+ size_t n=c->num;
+
+ p[n]=0x80; /* There always is a room for one */
+ n++;
+ if (n > (sizeof(c->u)-16))
+ memset (p+n,0,sizeof(c->u)-n), n=0,
+ sha512_block_data_order (c,p,1);
+
+ memset (p+n,0,sizeof(c->u)-16-n);
+#if BYTE_ORDER == BIG_ENDIAN
+ c->u.d[SHA_LBLOCK-2] = c->Nh;
+ c->u.d[SHA_LBLOCK-1] = c->Nl;
+#else
+ p[sizeof(c->u)-1] = (unsigned char)(c->Nl);
+ p[sizeof(c->u)-2] = (unsigned char)(c->Nl>>8);
+ p[sizeof(c->u)-3] = (unsigned char)(c->Nl>>16);
+ p[sizeof(c->u)-4] = (unsigned char)(c->Nl>>24);
+ p[sizeof(c->u)-5] = (unsigned char)(c->Nl>>32);
+ p[sizeof(c->u)-6] = (unsigned char)(c->Nl>>40);
+ p[sizeof(c->u)-7] = (unsigned char)(c->Nl>>48);
+ p[sizeof(c->u)-8] = (unsigned char)(c->Nl>>56);
+ p[sizeof(c->u)-9] = (unsigned char)(c->Nh);
+ p[sizeof(c->u)-10] = (unsigned char)(c->Nh>>8);
+ p[sizeof(c->u)-11] = (unsigned char)(c->Nh>>16);
+ p[sizeof(c->u)-12] = (unsigned char)(c->Nh>>24);
+ p[sizeof(c->u)-13] = (unsigned char)(c->Nh>>32);
+ p[sizeof(c->u)-14] = (unsigned char)(c->Nh>>40);
+ p[sizeof(c->u)-15] = (unsigned char)(c->Nh>>48);
+ p[sizeof(c->u)-16] = (unsigned char)(c->Nh>>56);
+#endif
+
+ sha512_block_data_order (c,p,1);
+
+ if (md==0) return 0;
+
+ switch (c->md_len)
+ {
+ /* Let compiler decide if it's appropriate to unroll... */
+ case SHA384_DIGEST_LENGTH:
+ for (n=0;n<SHA384_DIGEST_LENGTH/8;n++)
+ {
+ SHA_LONG64 t = c->h[n];
+
+ *(md++) = (unsigned char)(t>>56);
+ *(md++) = (unsigned char)(t>>48);
+ *(md++) = (unsigned char)(t>>40);
+ *(md++) = (unsigned char)(t>>32);
+ *(md++) = (unsigned char)(t>>24);
+ *(md++) = (unsigned char)(t>>16);
+ *(md++) = (unsigned char)(t>>8);
+ *(md++) = (unsigned char)(t);
+ }
+ break;
+ case SHA512_DIGEST_LENGTH:
+ for (n=0;n<SHA512_DIGEST_LENGTH/8;n++)
+ {
+ SHA_LONG64 t = c->h[n];
+
+ *(md++) = (unsigned char)(t>>56);
+ *(md++) = (unsigned char)(t>>48);
+ *(md++) = (unsigned char)(t>>40);
+ *(md++) = (unsigned char)(t>>32);
+ *(md++) = (unsigned char)(t>>24);
+ *(md++) = (unsigned char)(t>>16);
+ *(md++) = (unsigned char)(t>>8);
+ *(md++) = (unsigned char)(t);
+ }
+ break;
+ /* ... as well as make sure md_len is not abused. */
+ default: return 0;
+ }
+
+ return 1;
+ }
+
+int SHA384_Final (unsigned char *md,SHA512_CTX *c)
+{ return SHA512_Final (md,c); }
+
+int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
+ {
+ SHA_LONG64 l;
+ unsigned char *p=c->u.p;
+ const unsigned char *data=(const unsigned char *)_data;
+
+ if (len==0) return 1;
+
+ l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff);
+ if (l < c->Nl) c->Nh++;
+ if (sizeof(len)>=8) c->Nh+=(((SHA_LONG64)len)>>61);
+ c->Nl=l;
+
+ if (c->num != 0)
+ {
+ size_t n = sizeof(c->u) - c->num;
+
+ if (len < n)
+ {
+ memcpy (p+c->num,data,len), c->num += (unsigned int)len;
+ return 1;
+ }
+ else {
+ memcpy (p+c->num,data,n), c->num = 0;
+ len-=n, data+=n;
+ sha512_block_data_order (c,p,1);
+ }
+ }
+
+ if (len >= sizeof(c->u))
+ {
+#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
+ if ((size_t)data%sizeof(c->u.d[0]) != 0)
+ while (len >= sizeof(c->u))
+ memcpy (p,data,sizeof(c->u)),
+ sha512_block_data_order (c,p,1),
+ len -= sizeof(c->u),
+ data += sizeof(c->u);
+ else
+#endif
+ sha512_block_data_order (c,data,len/sizeof(c->u)),
+ data += len,
+ len %= sizeof(c->u),
+ data -= len;
+ }
+
+ if (len != 0) memcpy (p,data,len), c->num = (int)len;
+
+ return 1;
+ }
+
+int SHA384_Update (SHA512_CTX *c, const void *data, size_t len)
+{ return SHA512_Update (c,data,len); }
+
+void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
+ {
+#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
+ if ((size_t)data%sizeof(c->u.d[0]) != 0)
+ memcpy(c->u.p,data,sizeof(c->u.p)),
+ data = c->u.p;
+#endif
+ sha512_block_data_order (c,data,1);
+ }
+
+unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
+ {
+ SHA512_CTX c;
+ static unsigned char m[SHA384_DIGEST_LENGTH];
+
+ if (md == NULL) md=m;
+ SHA384_Init(&c);
+ SHA512_Update(&c,d,n);
+ SHA512_Final(md,&c);
+ explicit_bzero(&c,sizeof(c));
+ return(md);
+ }
+
+unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
+ {
+ SHA512_CTX c;
+ static unsigned char m[SHA512_DIGEST_LENGTH];
+
+ if (md == NULL) md=m;
+ SHA512_Init(&c);
+ SHA512_Update(&c,d,n);
+ SHA512_Final(md,&c);
+ explicit_bzero(&c,sizeof(c));
+ return(md);
+ }
+
+#ifndef SHA512_ASM
+static const SHA_LONG64 K512[80] = {
+ U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd),
+ U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc),
+ U64(0x3956c25bf348b538),U64(0x59f111f1b605d019),
+ U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118),
+ U64(0xd807aa98a3030242),U64(0x12835b0145706fbe),
+ U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2),
+ U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1),
+ U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694),
+ U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3),
+ U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65),
+ U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483),
+ U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5),
+ U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210),
+ U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4),
+ U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725),
+ U64(0x06ca6351e003826f),U64(0x142929670a0e6e70),
+ U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926),
+ U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df),
+ U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8),
+ U64(0x81c2c92e47edaee6),U64(0x92722c851482353b),
+ U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001),
+ U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30),
+ U64(0xd192e819d6ef5218),U64(0xd69906245565a910),
+ U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8),
+ U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53),
+ U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8),
+ U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb),
+ U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3),
+ U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60),
+ U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec),
+ U64(0x90befffa23631e28),U64(0xa4506cebde82bde9),
+ U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b),
+ U64(0xca273eceea26619c),U64(0xd186b8c721c0c207),
+ U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178),
+ U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6),
+ U64(0x113f9804bef90dae),U64(0x1b710b35131c471b),
+ U64(0x28db77f523047d84),U64(0x32caab7b40c72493),
+ U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c),
+ U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a),
+ U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) };
+
+#if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
+# if defined(__x86_64) || defined(__x86_64__)
+# define ROTR(a,n) ({ SHA_LONG64 ret; \
+ asm ("rorq %1,%0" \
+ : "=r"(ret) \
+ : "J"(n),"0"(a) \
+ : "cc"); ret; })
+# define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \
+ asm ("bswapq %0" \
+ : "=r"(ret) \
+ : "0"(ret)); ret; })
+# elif (defined(__i386) || defined(__i386__))
+# define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
+ unsigned int hi=p[0],lo=p[1]; \
+ asm ("bswapl %0; bswapl %1;" \
+ : "=r"(lo),"=r"(hi) \
+ : "0"(lo),"1"(hi)); \
+ ((SHA_LONG64)hi)<<32|lo; })
+# elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
+# define ROTR(a,n) ({ SHA_LONG64 ret; \
+ asm ("rotrdi %0,%1,%2" \
+ : "=r"(ret) \
+ : "r"(a),"K"(n)); ret; })
+# endif
+#endif
+
+#ifndef PULL64
+#define B(x,j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
+#define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
+#endif
+
+#ifndef ROTR
+#define ROTR(x,s) (((x)>>s) | (x)<<(64-s))
+#endif
+
+#define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
+#define Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
+#define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
+#define sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
+
+#define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
+#define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
+
+
+#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
+/*
+ * This code should give better results on 32-bit CPU with less than
+ * ~24 registers, both size and performance wise...
+ */
+static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
+ {
+ const SHA_LONG64 *W=in;
+ SHA_LONG64 A,E,T;
+ SHA_LONG64 X[9+80],*F;
+ int i;
+
+ while (num--) {
+
+ F = X+80;
+ A = ctx->h[0]; F[1] = ctx->h[1];
+ F[2] = ctx->h[2]; F[3] = ctx->h[3];
+ E = ctx->h[4]; F[5] = ctx->h[5];
+ F[6] = ctx->h[6]; F[7] = ctx->h[7];
+
+ for (i=0;i<16;i++,F--)
+ {
+ T = PULL64(W[i]);
+ F[0] = A;
+ F[4] = E;
+ F[8] = T;
+ T += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
+ E = F[3] + T;
+ A = T + Sigma0(A) + Maj(A,F[1],F[2]);
+ }
+
+ for (;i<80;i++,F--)
+ {
+ T = sigma0(F[8+16-1]);
+ T += sigma1(F[8+16-14]);
+ T += F[8+16] + F[8+16-9];
+
+ F[0] = A;
+ F[4] = E;
+ F[8] = T;
+ T += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
+ E = F[3] + T;
+ A = T + Sigma0(A) + Maj(A,F[1],F[2]);
+ }
+
+ ctx->h[0] += A; ctx->h[1] += F[1];
+ ctx->h[2] += F[2]; ctx->h[3] += F[3];
+ ctx->h[4] += E; ctx->h[5] += F[5];
+ ctx->h[6] += F[6]; ctx->h[7] += F[7];
+
+ W+=SHA_LBLOCK;
+ }
+ }
+
+#elif defined(OPENSSL_SMALL_FOOTPRINT)
+
+static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
+ {
+ const SHA_LONG64 *W=in;
+ SHA_LONG64 a,b,c,d,e,f,g,h,s0,s1,T1,T2;
+ SHA_LONG64 X[16];
+ int i;
+
+ while (num--) {
+
+ a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3];
+ e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7];
+
+ for (i=0;i<16;i++)
+ {
+#if BYTE_ORDER == BIG_ENDIAN
+ T1 = X[i] = W[i];
+#else
+ T1 = X[i] = PULL64(W[i]);
+#endif
+ T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
+ T2 = Sigma0(a) + Maj(a,b,c);
+ h = g; g = f; f = e; e = d + T1;
+ d = c; c = b; b = a; a = T1 + T2;
+ }
+
+ for (;i<80;i++)
+ {
+ s0 = X[(i+1)&0x0f]; s0 = sigma0(s0);
+ s1 = X[(i+14)&0x0f]; s1 = sigma1(s1);
+
+ T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
+ T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
+ T2 = Sigma0(a) + Maj(a,b,c);
+ h = g; g = f; f = e; e = d + T1;
+ d = c; c = b; b = a; a = T1 + T2;
+ }
+
+ ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
+ ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
+
+ W+=SHA_LBLOCK;
+ }
+ }
+
+#else
+
+#define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \
+ T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; \
+ h = Sigma0(a) + Maj(a,b,c); \
+ d += T1; h += T1; } while (0)
+
+#define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X) do { \
+ s0 = X[(j+1)&0x0f]; s0 = sigma0(s0); \
+ s1 = X[(j+14)&0x0f]; s1 = sigma1(s1); \
+ T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f]; \
+ ROUND_00_15(i+j,a,b,c,d,e,f,g,h); } while (0)
+
+static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
+ {
+ const SHA_LONG64 *W=in;
+ SHA_LONG64 a,b,c,d,e,f,g,h,s0,s1,T1;
+ SHA_LONG64 X[16];
+ int i;
+
+ while (num--) {
+
+ a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3];
+ e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7];
+
+#if BYTE_ORDER == BIG_ENDIAN
+ T1 = X[0] = W[0]; ROUND_00_15(0,a,b,c,d,e,f,g,h);
+ T1 = X[1] = W[1]; ROUND_00_15(1,h,a,b,c,d,e,f,g);
+ T1 = X[2] = W[2]; ROUND_00_15(2,g,h,a,b,c,d,e,f);
+ T1 = X[3] = W[3]; ROUND_00_15(3,f,g,h,a,b,c,d,e);
+ T1 = X[4] = W[4]; ROUND_00_15(4,e,f,g,h,a,b,c,d);
+ T1 = X[5] = W[5]; ROUND_00_15(5,d,e,f,g,h,a,b,c);
+ T1 = X[6] = W[6]; ROUND_00_15(6,c,d,e,f,g,h,a,b);
+ T1 = X[7] = W[7]; ROUND_00_15(7,b,c,d,e,f,g,h,a);
+ T1 = X[8] = W[8]; ROUND_00_15(8,a,b,c,d,e,f,g,h);
+ T1 = X[9] = W[9]; ROUND_00_15(9,h,a,b,c,d,e,f,g);
+ T1 = X[10] = W[10]; ROUND_00_15(10,g,h,a,b,c,d,e,f);
+ T1 = X[11] = W[11]; ROUND_00_15(11,f,g,h,a,b,c,d,e);
+ T1 = X[12] = W[12]; ROUND_00_15(12,e,f,g,h,a,b,c,d);
+ T1 = X[13] = W[13]; ROUND_00_15(13,d,e,f,g,h,a,b,c);
+ T1 = X[14] = W[14]; ROUND_00_15(14,c,d,e,f,g,h,a,b);
+ T1 = X[15] = W[15]; ROUND_00_15(15,b,c,d,e,f,g,h,a);
+#else
+ T1 = X[0] = PULL64(W[0]); ROUND_00_15(0,a,b,c,d,e,f,g,h);
+ T1 = X[1] = PULL64(W[1]); ROUND_00_15(1,h,a,b,c,d,e,f,g);
+ T1 = X[2] = PULL64(W[2]); ROUND_00_15(2,g,h,a,b,c,d,e,f);
+ T1 = X[3] = PULL64(W[3]); ROUND_00_15(3,f,g,h,a,b,c,d,e);
+ T1 = X[4] = PULL64(W[4]); ROUND_00_15(4,e,f,g,h,a,b,c,d);
+ T1 = X[5] = PULL64(W[5]); ROUND_00_15(5,d,e,f,g,h,a,b,c);
+ T1 = X[6] = PULL64(W[6]); ROUND_00_15(6,c,d,e,f,g,h,a,b);
+ T1 = X[7] = PULL64(W[7]); ROUND_00_15(7,b,c,d,e,f,g,h,a);
+ T1 = X[8] = PULL64(W[8]); ROUND_00_15(8,a,b,c,d,e,f,g,h);
+ T1 = X[9] = PULL64(W[9]); ROUND_00_15(9,h,a,b,c,d,e,f,g);
+ T1 = X[10] = PULL64(W[10]); ROUND_00_15(10,g,h,a,b,c,d,e,f);
+ T1 = X[11] = PULL64(W[11]); ROUND_00_15(11,f,g,h,a,b,c,d,e);
+ T1 = X[12] = PULL64(W[12]); ROUND_00_15(12,e,f,g,h,a,b,c,d);
+ T1 = X[13] = PULL64(W[13]); ROUND_00_15(13,d,e,f,g,h,a,b,c);
+ T1 = X[14] = PULL64(W[14]); ROUND_00_15(14,c,d,e,f,g,h,a,b);
+ T1 = X[15] = PULL64(W[15]); ROUND_00_15(15,b,c,d,e,f,g,h,a);
+#endif
+
+ for (i=16;i<80;i+=16)
+ {
+ ROUND_16_80(i, 0,a,b,c,d,e,f,g,h,X);
+ ROUND_16_80(i, 1,h,a,b,c,d,e,f,g,X);
+ ROUND_16_80(i, 2,g,h,a,b,c,d,e,f,X);
+ ROUND_16_80(i, 3,f,g,h,a,b,c,d,e,X);
+ ROUND_16_80(i, 4,e,f,g,h,a,b,c,d,X);
+ ROUND_16_80(i, 5,d,e,f,g,h,a,b,c,X);
+ ROUND_16_80(i, 6,c,d,e,f,g,h,a,b,X);
+ ROUND_16_80(i, 7,b,c,d,e,f,g,h,a,X);
+ ROUND_16_80(i, 8,a,b,c,d,e,f,g,h,X);
+ ROUND_16_80(i, 9,h,a,b,c,d,e,f,g,X);
+ ROUND_16_80(i,10,g,h,a,b,c,d,e,f,X);
+ ROUND_16_80(i,11,f,g,h,a,b,c,d,e,X);
+ ROUND_16_80(i,12,e,f,g,h,a,b,c,d,X);
+ ROUND_16_80(i,13,d,e,f,g,h,a,b,c,X);
+ ROUND_16_80(i,14,c,d,e,f,g,h,a,b,X);
+ ROUND_16_80(i,15,b,c,d,e,f,g,h,a,X);
+ }
+
+ ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
+ ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
+
+ W+=SHA_LBLOCK;
+ }
+ }
+
+#endif
+
+#endif /* SHA512_ASM */
+
+#endif /* !OPENSSL_NO_SHA512 */
diff --git a/ext/libressl/crypto/sha/sha_locl.h b/ext/libressl/crypto/sha/sha_locl.h
new file mode 100644
index 0000000..46c9a39
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha_locl.h
@@ -0,0 +1,419 @@
+/* $OpenBSD: sha_locl.h,v 1.23 2016/12/23 23:22:25 patrick Exp $ */
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young (eay@cryptsoft.com)"
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <openssl/opensslconf.h>
+#include <openssl/sha.h>
+
+#define DATA_ORDER_IS_BIG_ENDIAN
+
+#define HASH_LONG SHA_LONG
+#define HASH_CTX SHA_CTX
+#define HASH_CBLOCK SHA_CBLOCK
+#define HASH_MAKE_STRING(c,s) do { \
+ unsigned long ll; \
+ ll=(c)->h0; HOST_l2c(ll,(s)); \
+ ll=(c)->h1; HOST_l2c(ll,(s)); \
+ ll=(c)->h2; HOST_l2c(ll,(s)); \
+ ll=(c)->h3; HOST_l2c(ll,(s)); \
+ ll=(c)->h4; HOST_l2c(ll,(s)); \
+ } while (0)
+
+# define HASH_UPDATE SHA1_Update
+# define HASH_TRANSFORM SHA1_Transform
+# define HASH_FINAL SHA1_Final
+# define HASH_INIT SHA1_Init
+# define HASH_BLOCK_DATA_ORDER sha1_block_data_order
+# define Xupdate(a,ix,ia,ib,ic,id) ( (a)=(ia^ib^ic^id), \
+ ix=(a)=ROTATE((a),1) \
+ )
+
+__BEGIN_HIDDEN_DECLS
+
+#ifndef SHA1_ASM
+static
+#endif
+
+void sha1_block_data_order (SHA_CTX *c, const void *p,size_t num);
+
+__END_HIDDEN_DECLS
+
+#include "md32_common.h"
+
+#define INIT_DATA_h0 0x67452301UL
+#define INIT_DATA_h1 0xefcdab89UL
+#define INIT_DATA_h2 0x98badcfeUL
+#define INIT_DATA_h3 0x10325476UL
+#define INIT_DATA_h4 0xc3d2e1f0UL
+
+int SHA1_Init(SHA_CTX *c)
+ {
+ memset (c,0,sizeof(*c));
+ c->h0=INIT_DATA_h0;
+ c->h1=INIT_DATA_h1;
+ c->h2=INIT_DATA_h2;
+ c->h3=INIT_DATA_h3;
+ c->h4=INIT_DATA_h4;
+ return 1;
+ }
+
+#define K_00_19 0x5a827999UL
+#define K_20_39 0x6ed9eba1UL
+#define K_40_59 0x8f1bbcdcUL
+#define K_60_79 0xca62c1d6UL
+
+/* As pointed out by Wei Dai <weidai@eskimo.com>, F() below can be
+ * simplified to the code in F_00_19. Wei attributes these optimisations
+ * to Peter Gutmann's SHS code, and he attributes it to Rich Schroeppel.
+ * #define F(x,y,z) (((x) & (y)) | ((~(x)) & (z)))
+ * I've just become aware of another tweak to be made, again from Wei Dai,
+ * in F_40_59, (x&a)|(y&a) -> (x|y)&a
+ */
+#define F_00_19(b,c,d) ((((c) ^ (d)) & (b)) ^ (d))
+#define F_20_39(b,c,d) ((b) ^ (c) ^ (d))
+#define F_40_59(b,c,d) (((b) & (c)) | (((b)|(c)) & (d)))
+#define F_60_79(b,c,d) F_20_39(b,c,d)
+
+#ifndef OPENSSL_SMALL_FOOTPRINT
+
+#define BODY_00_15(i,a,b,c,d,e,f,xi) \
+ (f)=xi+(e)+K_00_19+ROTATE((a),5)+F_00_19((b),(c),(d)); \
+ (b)=ROTATE((b),30);
+
+#define BODY_16_19(i,a,b,c,d,e,f,xi,xa,xb,xc,xd) \
+ Xupdate(f,xi,xa,xb,xc,xd); \
+ (f)+=(e)+K_00_19+ROTATE((a),5)+F_00_19((b),(c),(d)); \
+ (b)=ROTATE((b),30);
+
+#define BODY_20_31(i,a,b,c,d,e,f,xi,xa,xb,xc,xd) \
+ Xupdate(f,xi,xa,xb,xc,xd); \
+ (f)+=(e)+K_20_39+ROTATE((a),5)+F_20_39((b),(c),(d)); \
+ (b)=ROTATE((b),30);
+
+#define BODY_32_39(i,a,b,c,d,e,f,xa,xb,xc,xd) \
+ Xupdate(f,xa,xa,xb,xc,xd); \
+ (f)+=(e)+K_20_39+ROTATE((a),5)+F_20_39((b),(c),(d)); \
+ (b)=ROTATE((b),30);
+
+#define BODY_40_59(i,a,b,c,d,e,f,xa,xb,xc,xd) \
+ Xupdate(f,xa,xa,xb,xc,xd); \
+ (f)+=(e)+K_40_59+ROTATE((a),5)+F_40_59((b),(c),(d)); \
+ (b)=ROTATE((b),30);
+
+#define BODY_60_79(i,a,b,c,d,e,f,xa,xb,xc,xd) \
+ Xupdate(f,xa,xa,xb,xc,xd); \
+ (f)=xa+(e)+K_60_79+ROTATE((a),5)+F_60_79((b),(c),(d)); \
+ (b)=ROTATE((b),30);
+
+#ifdef X
+#undef X
+#endif
+#ifndef MD32_XARRAY
+ /*
+ * Originally X was an array. As it's automatic it's natural
+ * to expect RISC compiler to accommodate at least part of it in
+ * the register bank, isn't it? Unfortunately not all compilers
+ * "find" this expectation reasonable:-( On order to make such
+ * compilers generate better code I replace X[] with a bunch of
+ * X0, X1, etc. See the function body below...
+ * <appro@fy.chalmers.se>
+ */
+# define X(i) XX##i
+#else
+ /*
+ * However! Some compilers (most notably HP C) get overwhelmed by
+ * that many local variables so that we have to have the way to
+ * fall down to the original behavior.
+ */
+# define X(i) XX[i]
+#endif
+
+#if !defined(SHA1_ASM)
+#include <machine/endian.h>
+static void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, size_t num)
+ {
+ const unsigned char *data=p;
+ unsigned MD32_REG_T A,B,C,D,E,T,l;
+#ifndef MD32_XARRAY
+ unsigned MD32_REG_T XX0, XX1, XX2, XX3, XX4, XX5, XX6, XX7,
+ XX8, XX9,XX10,XX11,XX12,XX13,XX14,XX15;
+#else
+ SHA_LONG XX[16];
+#endif
+
+ A=c->h0;
+ B=c->h1;
+ C=c->h2;
+ D=c->h3;
+ E=c->h4;
+
+ for (;;)
+ {
+
+ if (BYTE_ORDER != LITTLE_ENDIAN &&
+ sizeof(SHA_LONG)==4 && ((size_t)p%4)==0)
+ {
+ const SHA_LONG *W=(const SHA_LONG *)data;
+
+ X( 0) = W[0]; X( 1) = W[ 1];
+ BODY_00_15( 0,A,B,C,D,E,T,X( 0)); X( 2) = W[ 2];
+ BODY_00_15( 1,T,A,B,C,D,E,X( 1)); X( 3) = W[ 3];
+ BODY_00_15( 2,E,T,A,B,C,D,X( 2)); X( 4) = W[ 4];
+ BODY_00_15( 3,D,E,T,A,B,C,X( 3)); X( 5) = W[ 5];
+ BODY_00_15( 4,C,D,E,T,A,B,X( 4)); X( 6) = W[ 6];
+ BODY_00_15( 5,B,C,D,E,T,A,X( 5)); X( 7) = W[ 7];
+ BODY_00_15( 6,A,B,C,D,E,T,X( 6)); X( 8) = W[ 8];
+ BODY_00_15( 7,T,A,B,C,D,E,X( 7)); X( 9) = W[ 9];
+ BODY_00_15( 8,E,T,A,B,C,D,X( 8)); X(10) = W[10];
+ BODY_00_15( 9,D,E,T,A,B,C,X( 9)); X(11) = W[11];
+ BODY_00_15(10,C,D,E,T,A,B,X(10)); X(12) = W[12];
+ BODY_00_15(11,B,C,D,E,T,A,X(11)); X(13) = W[13];
+ BODY_00_15(12,A,B,C,D,E,T,X(12)); X(14) = W[14];
+ BODY_00_15(13,T,A,B,C,D,E,X(13)); X(15) = W[15];
+ BODY_00_15(14,E,T,A,B,C,D,X(14));
+ BODY_00_15(15,D,E,T,A,B,C,X(15));
+
+ data += SHA_CBLOCK;
+ }
+ else
+ {
+ HOST_c2l(data,l); X( 0)=l; HOST_c2l(data,l); X( 1)=l;
+ BODY_00_15( 0,A,B,C,D,E,T,X( 0)); HOST_c2l(data,l); X( 2)=l;
+ BODY_00_15( 1,T,A,B,C,D,E,X( 1)); HOST_c2l(data,l); X( 3)=l;
+ BODY_00_15( 2,E,T,A,B,C,D,X( 2)); HOST_c2l(data,l); X( 4)=l;
+ BODY_00_15( 3,D,E,T,A,B,C,X( 3)); HOST_c2l(data,l); X( 5)=l;
+ BODY_00_15( 4,C,D,E,T,A,B,X( 4)); HOST_c2l(data,l); X( 6)=l;
+ BODY_00_15( 5,B,C,D,E,T,A,X( 5)); HOST_c2l(data,l); X( 7)=l;
+ BODY_00_15( 6,A,B,C,D,E,T,X( 6)); HOST_c2l(data,l); X( 8)=l;
+ BODY_00_15( 7,T,A,B,C,D,E,X( 7)); HOST_c2l(data,l); X( 9)=l;
+ BODY_00_15( 8,E,T,A,B,C,D,X( 8)); HOST_c2l(data,l); X(10)=l;
+ BODY_00_15( 9,D,E,T,A,B,C,X( 9)); HOST_c2l(data,l); X(11)=l;
+ BODY_00_15(10,C,D,E,T,A,B,X(10)); HOST_c2l(data,l); X(12)=l;
+ BODY_00_15(11,B,C,D,E,T,A,X(11)); HOST_c2l(data,l); X(13)=l;
+ BODY_00_15(12,A,B,C,D,E,T,X(12)); HOST_c2l(data,l); X(14)=l;
+ BODY_00_15(13,T,A,B,C,D,E,X(13)); HOST_c2l(data,l); X(15)=l;
+ BODY_00_15(14,E,T,A,B,C,D,X(14));
+ BODY_00_15(15,D,E,T,A,B,C,X(15));
+ }
+
+ BODY_16_19(16,C,D,E,T,A,B,X( 0),X( 0),X( 2),X( 8),X(13));
+ BODY_16_19(17,B,C,D,E,T,A,X( 1),X( 1),X( 3),X( 9),X(14));
+ BODY_16_19(18,A,B,C,D,E,T,X( 2),X( 2),X( 4),X(10),X(15));
+ BODY_16_19(19,T,A,B,C,D,E,X( 3),X( 3),X( 5),X(11),X( 0));
+
+ BODY_20_31(20,E,T,A,B,C,D,X( 4),X( 4),X( 6),X(12),X( 1));
+ BODY_20_31(21,D,E,T,A,B,C,X( 5),X( 5),X( 7),X(13),X( 2));
+ BODY_20_31(22,C,D,E,T,A,B,X( 6),X( 6),X( 8),X(14),X( 3));
+ BODY_20_31(23,B,C,D,E,T,A,X( 7),X( 7),X( 9),X(15),X( 4));
+ BODY_20_31(24,A,B,C,D,E,T,X( 8),X( 8),X(10),X( 0),X( 5));
+ BODY_20_31(25,T,A,B,C,D,E,X( 9),X( 9),X(11),X( 1),X( 6));
+ BODY_20_31(26,E,T,A,B,C,D,X(10),X(10),X(12),X( 2),X( 7));
+ BODY_20_31(27,D,E,T,A,B,C,X(11),X(11),X(13),X( 3),X( 8));
+ BODY_20_31(28,C,D,E,T,A,B,X(12),X(12),X(14),X( 4),X( 9));
+ BODY_20_31(29,B,C,D,E,T,A,X(13),X(13),X(15),X( 5),X(10));
+ BODY_20_31(30,A,B,C,D,E,T,X(14),X(14),X( 0),X( 6),X(11));
+ BODY_20_31(31,T,A,B,C,D,E,X(15),X(15),X( 1),X( 7),X(12));
+
+ BODY_32_39(32,E,T,A,B,C,D,X( 0),X( 2),X( 8),X(13));
+ BODY_32_39(33,D,E,T,A,B,C,X( 1),X( 3),X( 9),X(14));
+ BODY_32_39(34,C,D,E,T,A,B,X( 2),X( 4),X(10),X(15));
+ BODY_32_39(35,B,C,D,E,T,A,X( 3),X( 5),X(11),X( 0));
+ BODY_32_39(36,A,B,C,D,E,T,X( 4),X( 6),X(12),X( 1));
+ BODY_32_39(37,T,A,B,C,D,E,X( 5),X( 7),X(13),X( 2));
+ BODY_32_39(38,E,T,A,B,C,D,X( 6),X( 8),X(14),X( 3));
+ BODY_32_39(39,D,E,T,A,B,C,X( 7),X( 9),X(15),X( 4));
+
+ BODY_40_59(40,C,D,E,T,A,B,X( 8),X(10),X( 0),X( 5));
+ BODY_40_59(41,B,C,D,E,T,A,X( 9),X(11),X( 1),X( 6));
+ BODY_40_59(42,A,B,C,D,E,T,X(10),X(12),X( 2),X( 7));
+ BODY_40_59(43,T,A,B,C,D,E,X(11),X(13),X( 3),X( 8));
+ BODY_40_59(44,E,T,A,B,C,D,X(12),X(14),X( 4),X( 9));
+ BODY_40_59(45,D,E,T,A,B,C,X(13),X(15),X( 5),X(10));
+ BODY_40_59(46,C,D,E,T,A,B,X(14),X( 0),X( 6),X(11));
+ BODY_40_59(47,B,C,D,E,T,A,X(15),X( 1),X( 7),X(12));
+ BODY_40_59(48,A,B,C,D,E,T,X( 0),X( 2),X( 8),X(13));
+ BODY_40_59(49,T,A,B,C,D,E,X( 1),X( 3),X( 9),X(14));
+ BODY_40_59(50,E,T,A,B,C,D,X( 2),X( 4),X(10),X(15));
+ BODY_40_59(51,D,E,T,A,B,C,X( 3),X( 5),X(11),X( 0));
+ BODY_40_59(52,C,D,E,T,A,B,X( 4),X( 6),X(12),X( 1));
+ BODY_40_59(53,B,C,D,E,T,A,X( 5),X( 7),X(13),X( 2));
+ BODY_40_59(54,A,B,C,D,E,T,X( 6),X( 8),X(14),X( 3));
+ BODY_40_59(55,T,A,B,C,D,E,X( 7),X( 9),X(15),X( 4));
+ BODY_40_59(56,E,T,A,B,C,D,X( 8),X(10),X( 0),X( 5));
+ BODY_40_59(57,D,E,T,A,B,C,X( 9),X(11),X( 1),X( 6));
+ BODY_40_59(58,C,D,E,T,A,B,X(10),X(12),X( 2),X( 7));
+ BODY_40_59(59,B,C,D,E,T,A,X(11),X(13),X( 3),X( 8));
+
+ BODY_60_79(60,A,B,C,D,E,T,X(12),X(14),X( 4),X( 9));
+ BODY_60_79(61,T,A,B,C,D,E,X(13),X(15),X( 5),X(10));
+ BODY_60_79(62,E,T,A,B,C,D,X(14),X( 0),X( 6),X(11));
+ BODY_60_79(63,D,E,T,A,B,C,X(15),X( 1),X( 7),X(12));
+ BODY_60_79(64,C,D,E,T,A,B,X( 0),X( 2),X( 8),X(13));
+ BODY_60_79(65,B,C,D,E,T,A,X( 1),X( 3),X( 9),X(14));
+ BODY_60_79(66,A,B,C,D,E,T,X( 2),X( 4),X(10),X(15));
+ BODY_60_79(67,T,A,B,C,D,E,X( 3),X( 5),X(11),X( 0));
+ BODY_60_79(68,E,T,A,B,C,D,X( 4),X( 6),X(12),X( 1));
+ BODY_60_79(69,D,E,T,A,B,C,X( 5),X( 7),X(13),X( 2));
+ BODY_60_79(70,C,D,E,T,A,B,X( 6),X( 8),X(14),X( 3));
+ BODY_60_79(71,B,C,D,E,T,A,X( 7),X( 9),X(15),X( 4));
+ BODY_60_79(72,A,B,C,D,E,T,X( 8),X(10),X( 0),X( 5));
+ BODY_60_79(73,T,A,B,C,D,E,X( 9),X(11),X( 1),X( 6));
+ BODY_60_79(74,E,T,A,B,C,D,X(10),X(12),X( 2),X( 7));
+ BODY_60_79(75,D,E,T,A,B,C,X(11),X(13),X( 3),X( 8));
+ BODY_60_79(76,C,D,E,T,A,B,X(12),X(14),X( 4),X( 9));
+ BODY_60_79(77,B,C,D,E,T,A,X(13),X(15),X( 5),X(10));
+ BODY_60_79(78,A,B,C,D,E,T,X(14),X( 0),X( 6),X(11));
+ BODY_60_79(79,T,A,B,C,D,E,X(15),X( 1),X( 7),X(12));
+
+ c->h0=(c->h0+E)&0xffffffffL;
+ c->h1=(c->h1+T)&0xffffffffL;
+ c->h2=(c->h2+A)&0xffffffffL;
+ c->h3=(c->h3+B)&0xffffffffL;
+ c->h4=(c->h4+C)&0xffffffffL;
+
+ if (--num == 0) break;
+
+ A=c->h0;
+ B=c->h1;
+ C=c->h2;
+ D=c->h3;
+ E=c->h4;
+
+ }
+ }
+#endif
+
+#else /* OPENSSL_SMALL_FOOTPRINT */
+
+#define BODY_00_15(xi) do { \
+ T=E+K_00_19+F_00_19(B,C,D); \
+ E=D, D=C, C=ROTATE(B,30), B=A; \
+ A=ROTATE(A,5)+T+xi; } while(0)
+
+#define BODY_16_19(xa,xb,xc,xd) do { \
+ Xupdate(T,xa,xa,xb,xc,xd); \
+ T+=E+K_00_19+F_00_19(B,C,D); \
+ E=D, D=C, C=ROTATE(B,30), B=A; \
+ A=ROTATE(A,5)+T; } while(0)
+
+#define BODY_20_39(xa,xb,xc,xd) do { \
+ Xupdate(T,xa,xa,xb,xc,xd); \
+ T+=E+K_20_39+F_20_39(B,C,D); \
+ E=D, D=C, C=ROTATE(B,30), B=A; \
+ A=ROTATE(A,5)+T; } while(0)
+
+#define BODY_40_59(xa,xb,xc,xd) do { \
+ Xupdate(T,xa,xa,xb,xc,xd); \
+ T+=E+K_40_59+F_40_59(B,C,D); \
+ E=D, D=C, C=ROTATE(B,30), B=A; \
+ A=ROTATE(A,5)+T; } while(0)
+
+#define BODY_60_79(xa,xb,xc,xd) do { \
+ Xupdate(T,xa,xa,xb,xc,xd); \
+ T=E+K_60_79+F_60_79(B,C,D); \
+ E=D, D=C, C=ROTATE(B,30), B=A; \
+ A=ROTATE(A,5)+T+xa; } while(0)
+
+#if !defined(SHA1_ASM)
+static void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, size_t num)
+ {
+ const unsigned char *data=p;
+ unsigned MD32_REG_T A,B,C,D,E,T,l;
+ int i;
+ SHA_LONG X[16];
+
+ A=c->h0;
+ B=c->h1;
+ C=c->h2;
+ D=c->h3;
+ E=c->h4;
+
+ for (;;)
+ {
+ for (i=0;i<16;i++)
+ { HOST_c2l(data,l); X[i]=l; BODY_00_15(X[i]); }
+ for (i=0;i<4;i++)
+ { BODY_16_19(X[i], X[i+2], X[i+8], X[(i+13)&15]); }
+ for (;i<24;i++)
+ { BODY_20_39(X[i&15], X[(i+2)&15], X[(i+8)&15],X[(i+13)&15]); }
+ for (i=0;i<20;i++)
+ { BODY_40_59(X[(i+8)&15],X[(i+10)&15],X[i&15], X[(i+5)&15]); }
+ for (i=4;i<24;i++)
+ { BODY_60_79(X[(i+8)&15],X[(i+10)&15],X[i&15], X[(i+5)&15]); }
+
+ c->h0=(c->h0+A)&0xffffffffL;
+ c->h1=(c->h1+B)&0xffffffffL;
+ c->h2=(c->h2+C)&0xffffffffL;
+ c->h3=(c->h3+D)&0xffffffffL;
+ c->h4=(c->h4+E)&0xffffffffL;
+
+ if (--num == 0) break;
+
+ A=c->h0;
+ B=c->h1;
+ C=c->h2;
+ D=c->h3;
+ E=c->h4;
+
+ }
+ }
+#endif
+
+#endif
diff --git a/ext/libressl/include/compat/string.h b/ext/libressl/include/compat/string.h
new file mode 100644
index 0000000..4bf7519
--- /dev/null
+++ b/ext/libressl/include/compat/string.h
@@ -0,0 +1,87 @@
+/*
+ * Public domain
+ * string.h compatibility shim
+ */
+
+#ifndef LIBCRYPTOCOMPAT_STRING_H
+#define LIBCRYPTOCOMPAT_STRING_H
+
+#ifdef _MSC_VER
+#if _MSC_VER >= 1900
+#include <../ucrt/string.h>
+#else
+#include <../include/string.h>
+#endif
+#else
+#include_next <string.h>
+#endif
+
+#include <sys/types.h>
+
+#if defined(__sun) || defined(_AIX) || defined(__hpux)
+/* Some functions historically defined in string.h were placed in strings.h by
+ * SUS. Use the same hack as OS X and FreeBSD use to work around on AIX,
+ * Solaris, and HPUX.
+ */
+#include <strings.h>
+#endif
+
+#ifndef HAVE_STRCASECMP
+int strcasecmp(const char *s1, const char *s2);
+int strncasecmp(const char *s1, const char *s2, size_t len);
+#endif
+
+#ifndef HAVE_STRLCPY
+size_t strlcpy(char *dst, const char *src, size_t siz);
+#endif
+
+#ifndef HAVE_STRLCAT
+size_t strlcat(char *dst, const char *src, size_t siz);
+#endif
+
+#ifndef HAVE_STRNDUP
+char * strndup(const char *str, size_t maxlen);
+/* the only user of strnlen is strndup, so only build it if needed */
+#ifndef HAVE_STRNLEN
+size_t strnlen(const char *str, size_t maxlen);
+#endif
+#endif
+
+#ifndef HAVE_STRSEP
+char *strsep(char **stringp, const char *delim);
+#endif
+
+#ifndef HAVE_EXPLICIT_BZERO
+void explicit_bzero(void *, size_t);
+#endif
+
+#ifndef HAVE_TIMINGSAFE_BCMP
+int timingsafe_bcmp(const void *b1, const void *b2, size_t n);
+#endif
+
+#ifndef HAVE_TIMINGSAFE_MEMCMP
+int timingsafe_memcmp(const void *b1, const void *b2, size_t len);
+#endif
+
+#ifndef HAVE_MEMMEM
+void * memmem(const void *big, size_t big_len, const void *little,
+ size_t little_len);
+#endif
+
+#ifdef _WIN32
+#include <errno.h>
+
+static inline char *
+posix_strerror(int errnum)
+{
+ if (errnum == ECONNREFUSED) {
+ return "Connection refused";
+ }
+ return strerror(errnum);
+}
+
+#define strerror(errnum) posix_strerror(errnum)
+
+#endif
+
+#endif
diff --git a/ext/libressl/include/compat/unistd.h b/ext/libressl/include/compat/unistd.h
new file mode 100644
index 0000000..5e6ab1d
--- /dev/null
+++ b/ext/libressl/include/compat/unistd.h
@@ -0,0 +1,78 @@
+/*
+ * Public domain
+ * unistd.h compatibility shim
+ */
+
+#ifndef LIBCRYPTOCOMPAT_UNISTD_H
+#define LIBCRYPTOCOMPAT_UNISTD_H
+
+#ifndef _MSC_VER
+
+#include_next <unistd.h>
+
+#ifdef __MINGW32__
+int ftruncate(int fd, off_t length);
+uid_t getuid(void);
+ssize_t pread(int d, void *buf, size_t nbytes, off_t offset);
+ssize_t pwrite(int d, const void *buf, size_t nbytes, off_t offset);
+#endif
+
+#else
+
+#include <stdlib.h>
+#include <io.h>
+#include <process.h>
+
+#define STDOUT_FILENO 1
+#define STDERR_FILENO 2
+
+#define R_OK 4
+#define W_OK 2
+#define X_OK 0
+#define F_OK 0
+
+#define SEEK_SET 0
+#define SEEK_CUR 1
+#define SEEK_END 2
+
+#define access _access
+
+#ifdef _MSC_VER
+#include <windows.h>
+static inline unsigned int sleep(unsigned int seconds)
+{
+ Sleep(seconds * 1000);
+ return seconds;
+}
+#endif
+
+int ftruncate(int fd, off_t length);
+uid_t getuid(void);
+ssize_t pread(int d, void *buf, size_t nbytes, off_t offset);
+ssize_t pwrite(int d, const void *buf, size_t nbytes, off_t offset);
+
+#endif
+
+#ifndef HAVE_GETENTROPY
+int getentropy(void *buf, size_t buflen);
+#else
+/*
+ * Solaris 11.3 adds getentropy(2), but defines the function in sys/random.h
+ */
+#if defined(__sun)
+#include <sys/random.h>
+#endif
+#endif
+
+#ifndef HAVE_GETPAGESIZE
+int getpagesize(void);
+#endif
+
+#define pledge(request, paths) 0
+#define unveil(path, permissions) 0
+
+#ifndef HAVE_PIPE2
+int pipe2(int fildes[2], int flags);
+#endif
+
+#endif
diff --git a/ext/libressl/include/md32_common.h b/ext/libressl/include/md32_common.h
new file mode 100644
index 0000000..0dca617
--- /dev/null
+++ b/ext/libressl/include/md32_common.h
@@ -0,0 +1,345 @@
+/* $OpenBSD: md32_common.h,v 1.22 2016/11/04 13:56:04 miod Exp $ */
+/* ====================================================================
+ * Copyright (c) 1999-2007 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * licensing@OpenSSL.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+/*
+ * This is a generic 32 bit "collector" for message digest algorithms.
+ * Whenever needed it collects input character stream into chunks of
+ * 32 bit values and invokes a block function that performs actual hash
+ * calculations.
+ *
+ * Porting guide.
+ *
+ * Obligatory macros:
+ *
+ * DATA_ORDER_IS_BIG_ENDIAN or DATA_ORDER_IS_LITTLE_ENDIAN
+ * this macro defines byte order of input stream.
+ * HASH_CBLOCK
+ * size of a unit chunk HASH_BLOCK operates on.
+ * HASH_LONG
+ * has to be at least 32 bit wide.
+ * HASH_CTX
+ * context structure that at least contains following
+ * members:
+ * typedef struct {
+ * ...
+ * HASH_LONG Nl,Nh;
+ * either {
+ * HASH_LONG data[HASH_LBLOCK];
+ * unsigned char data[HASH_CBLOCK];
+ * };
+ * unsigned int num;
+ * ...
+ * } HASH_CTX;
+ * data[] vector is expected to be zeroed upon first call to
+ * HASH_UPDATE.
+ * HASH_UPDATE
+ * name of "Update" function, implemented here.
+ * HASH_TRANSFORM
+ * name of "Transform" function, implemented here.
+ * HASH_FINAL
+ * name of "Final" function, implemented here.
+ * HASH_BLOCK_DATA_ORDER
+ * name of "block" function capable of treating *unaligned* input
+ * message in original (data) byte order, implemented externally.
+ * HASH_MAKE_STRING
+ * macro convering context variables to an ASCII hash string.
+ *
+ * MD5 example:
+ *
+ * #define DATA_ORDER_IS_LITTLE_ENDIAN
+ *
+ * #define HASH_LONG MD5_LONG
+ * #define HASH_CTX MD5_CTX
+ * #define HASH_CBLOCK MD5_CBLOCK
+ * #define HASH_UPDATE MD5_Update
+ * #define HASH_TRANSFORM MD5_Transform
+ * #define HASH_FINAL MD5_Final
+ * #define HASH_BLOCK_DATA_ORDER md5_block_data_order
+ *
+ * <appro@fy.chalmers.se>
+ */
+
+#include <stdint.h>
+
+#include <openssl/opensslconf.h>
+
+#if !defined(DATA_ORDER_IS_BIG_ENDIAN) && !defined(DATA_ORDER_IS_LITTLE_ENDIAN)
+#error "DATA_ORDER must be defined!"
+#endif
+
+#ifndef HASH_CBLOCK
+#error "HASH_CBLOCK must be defined!"
+#endif
+#ifndef HASH_LONG
+#error "HASH_LONG must be defined!"
+#endif
+#ifndef HASH_CTX
+#error "HASH_CTX must be defined!"
+#endif
+
+#ifndef HASH_UPDATE
+#error "HASH_UPDATE must be defined!"
+#endif
+#ifndef HASH_TRANSFORM
+#error "HASH_TRANSFORM must be defined!"
+#endif
+#if !defined(HASH_FINAL) && !defined(HASH_NO_FINAL)
+#error "HASH_FINAL or HASH_NO_FINAL must be defined!"
+#endif
+
+#ifndef HASH_BLOCK_DATA_ORDER
+#error "HASH_BLOCK_DATA_ORDER must be defined!"
+#endif
+
+/*
+ * This common idiom is recognized by the compiler and turned into a
+ * CPU-specific intrinsic as appropriate.
+ * e.g. GCC optimizes to roll on amd64 at -O0
+ */
+static inline uint32_t ROTATE(uint32_t a, uint32_t n)
+{
+ return (a<<n)|(a>>(32-n));
+}
+
+#if defined(DATA_ORDER_IS_BIG_ENDIAN)
+
+#if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
+# if (defined(__i386) || defined(__i386__) || \
+ defined(__x86_64) || defined(__x86_64__))
+ /*
+ * This gives ~30-40% performance improvement in SHA-256 compiled
+ * with gcc [on P4]. Well, first macro to be frank. We can pull
+ * this trick on x86* platforms only, because these CPUs can fetch
+ * unaligned data without raising an exception.
+ */
+# define HOST_c2l(c,l) ({ unsigned int r=*((const unsigned int *)(c)); \
+ asm ("bswapl %0":"=r"(r):"0"(r)); \
+ (c)+=4; (l)=r; })
+# define HOST_l2c(l,c) ({ unsigned int r=(l); \
+ asm ("bswapl %0":"=r"(r):"0"(r)); \
+ *((unsigned int *)(c))=r; (c)+=4; })
+# endif
+#endif
+
+#ifndef HOST_c2l
+#define HOST_c2l(c,l) do {l =(((unsigned long)(*((c)++)))<<24); \
+ l|=(((unsigned long)(*((c)++)))<<16); \
+ l|=(((unsigned long)(*((c)++)))<< 8); \
+ l|=(((unsigned long)(*((c)++))) ); \
+ } while (0)
+#endif
+#ifndef HOST_l2c
+#define HOST_l2c(l,c) do {*((c)++)=(unsigned char)(((l)>>24)&0xff); \
+ *((c)++)=(unsigned char)(((l)>>16)&0xff); \
+ *((c)++)=(unsigned char)(((l)>> 8)&0xff); \
+ *((c)++)=(unsigned char)(((l) )&0xff); \
+ } while (0)
+#endif
+
+#elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
+
+#if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
+# define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4)
+# define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4)
+#endif
+
+#ifndef HOST_c2l
+#define HOST_c2l(c,l) do {l =(((unsigned long)(*((c)++))) ); \
+ l|=(((unsigned long)(*((c)++)))<< 8); \
+ l|=(((unsigned long)(*((c)++)))<<16); \
+ l|=(((unsigned long)(*((c)++)))<<24); \
+ } while (0)
+#endif
+#ifndef HOST_l2c
+#define HOST_l2c(l,c) do {*((c)++)=(unsigned char)(((l) )&0xff); \
+ *((c)++)=(unsigned char)(((l)>> 8)&0xff); \
+ *((c)++)=(unsigned char)(((l)>>16)&0xff); \
+ *((c)++)=(unsigned char)(((l)>>24)&0xff); \
+ } while (0)
+#endif
+
+#endif
+
+/*
+ * Time for some action:-)
+ */
+
+int
+HASH_UPDATE(HASH_CTX *c, const void *data_, size_t len)
+{
+ const unsigned char *data = data_;
+ unsigned char *p;
+ HASH_LONG l;
+ size_t n;
+
+ if (len == 0)
+ return 1;
+
+ l = (c->Nl + (((HASH_LONG)len) << 3))&0xffffffffUL;
+ /* 95-05-24 eay Fixed a bug with the overflow handling, thanks to
+ * Wei Dai <weidai@eskimo.com> for pointing it out. */
+ if (l < c->Nl) /* overflow */
+ c->Nh++;
+ c->Nh+=(HASH_LONG)(len>>29); /* might cause compiler warning on 16-bit */
+ c->Nl = l;
+
+ n = c->num;
+ if (n != 0) {
+ p = (unsigned char *)c->data;
+
+ if (len >= HASH_CBLOCK || len + n >= HASH_CBLOCK) {
+ memcpy (p + n, data, HASH_CBLOCK - n);
+ HASH_BLOCK_DATA_ORDER (c, p, 1);
+ n = HASH_CBLOCK - n;
+ data += n;
+ len -= n;
+ c->num = 0;
+ memset (p,0,HASH_CBLOCK); /* keep it zeroed */
+ } else {
+ memcpy (p + n, data, len);
+ c->num += (unsigned int)len;
+ return 1;
+ }
+ }
+
+ n = len/HASH_CBLOCK;
+ if (n > 0) {
+ HASH_BLOCK_DATA_ORDER (c, data, n);
+ n *= HASH_CBLOCK;
+ data += n;
+ len -= n;
+ }
+
+ if (len != 0) {
+ p = (unsigned char *)c->data;
+ c->num = (unsigned int)len;
+ memcpy (p, data, len);
+ }
+ return 1;
+}
+
+
+void HASH_TRANSFORM (HASH_CTX *c, const unsigned char *data)
+{
+ HASH_BLOCK_DATA_ORDER (c, data, 1);
+}
+
+
+#ifndef HASH_NO_FINAL
+int HASH_FINAL (unsigned char *md, HASH_CTX *c)
+{
+ unsigned char *p = (unsigned char *)c->data;
+ size_t n = c->num;
+
+ p[n] = 0x80; /* there is always room for one */
+ n++;
+
+ if (n > (HASH_CBLOCK - 8)) {
+ memset (p + n, 0, HASH_CBLOCK - n);
+ n = 0;
+ HASH_BLOCK_DATA_ORDER (c, p, 1);
+ }
+ memset (p + n, 0, HASH_CBLOCK - 8 - n);
+
+ p += HASH_CBLOCK - 8;
+#if defined(DATA_ORDER_IS_BIG_ENDIAN)
+ HOST_l2c(c->Nh, p);
+ HOST_l2c(c->Nl, p);
+#elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
+ HOST_l2c(c->Nl, p);
+ HOST_l2c(c->Nh, p);
+#endif
+ p -= HASH_CBLOCK;
+ HASH_BLOCK_DATA_ORDER (c, p, 1);
+ c->num = 0;
+ memset (p, 0, HASH_CBLOCK);
+
+#ifndef HASH_MAKE_STRING
+#error "HASH_MAKE_STRING must be defined!"
+#else
+ HASH_MAKE_STRING(c, md);
+#endif
+
+ return 1;
+}
+#endif
+
+#ifndef MD32_REG_T
+#if defined(__alpha) || defined(__sparcv9) || defined(__mips)
+#define MD32_REG_T long
+/*
+ * This comment was originaly written for MD5, which is why it
+ * discusses A-D. But it basically applies to all 32-bit digests,
+ * which is why it was moved to common header file.
+ *
+ * In case you wonder why A-D are declared as long and not
+ * as MD5_LONG. Doing so results in slight performance
+ * boost on LP64 architectures. The catch is we don't
+ * really care if 32 MSBs of a 64-bit register get polluted
+ * with eventual overflows as we *save* only 32 LSBs in
+ * *either* case. Now declaring 'em long excuses the compiler
+ * from keeping 32 MSBs zeroed resulting in 13% performance
+ * improvement under SPARC Solaris7/64 and 5% under AlphaLinux.
+ * Well, to be honest it should say that this *prevents*
+ * performance degradation.
+ * <appro@fy.chalmers.se>
+ */
+#else
+/*
+ * Above is not absolute and there are LP64 compilers that
+ * generate better code if MD32_REG_T is defined int. The above
+ * pre-processor condition reflects the circumstances under which
+ * the conclusion was made and is subject to further extension.
+ * <appro@fy.chalmers.se>
+ */
+#define MD32_REG_T int
+#endif
+#endif
diff --git a/ext/libressl/include/openssl/aes.h b/ext/libressl/include/openssl/aes.h
new file mode 100644
index 0000000..c904485
--- /dev/null
+++ b/ext/libressl/include/openssl/aes.h
@@ -0,0 +1,126 @@
+/* $OpenBSD: aes.h,v 1.14 2014/07/09 09:10:07 miod Exp $ */
+/* ====================================================================
+ * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+#ifndef HEADER_AES_H
+#define HEADER_AES_H
+
+#include <openssl/opensslconf.h>
+
+#ifdef OPENSSL_NO_AES
+#error AES is disabled.
+#endif
+
+#include <stddef.h>
+
+#define AES_ENCRYPT 1
+#define AES_DECRYPT 0
+
+/* Because array size can't be a const in C, the following two are macros.
+ Both sizes are in bytes. */
+#define AES_MAXNR 14
+#define AES_BLOCK_SIZE 16
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* This should be a hidden type, but EVP requires that the size be known */
+struct aes_key_st {
+ unsigned int rd_key[4 *(AES_MAXNR + 1)];
+ int rounds;
+};
+typedef struct aes_key_st AES_KEY;
+
+const char *AES_options(void);
+
+int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key);
+int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key);
+
+void AES_encrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key);
+void AES_decrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key);
+
+void AES_ecb_encrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key, const int enc);
+void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
+ size_t length, const AES_KEY *key, unsigned char *ivec, const int enc);
+void AES_cfb128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t length, const AES_KEY *key, unsigned char *ivec, int *num,
+ const int enc);
+void AES_cfb1_encrypt(const unsigned char *in, unsigned char *out,
+ size_t length, const AES_KEY *key, unsigned char *ivec, int *num,
+ const int enc);
+void AES_cfb8_encrypt(const unsigned char *in, unsigned char *out,
+ size_t length, const AES_KEY *key, unsigned char *ivec, int *num,
+ const int enc);
+void AES_ofb128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t length, const AES_KEY *key, unsigned char *ivec, int *num);
+void AES_ctr128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t length, const AES_KEY *key, unsigned char ivec[AES_BLOCK_SIZE],
+ unsigned char ecount_buf[AES_BLOCK_SIZE], unsigned int *num);
+/* NB: the IV is _two_ blocks long */
+void AES_ige_encrypt(const unsigned char *in, unsigned char *out,
+ size_t length, const AES_KEY *key, unsigned char *ivec, const int enc);
+
+int AES_wrap_key(AES_KEY *key, const unsigned char *iv, unsigned char *out,
+ const unsigned char *in, unsigned int inlen);
+int AES_unwrap_key(AES_KEY *key, const unsigned char *iv, unsigned char *out,
+ const unsigned char *in, unsigned int inlen);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !HEADER_AES_H */
diff --git a/ext/libressl/include/openssl/blowfish.h b/ext/libressl/include/openssl/blowfish.h
new file mode 100644
index 0000000..4d2db80
--- /dev/null
+++ b/ext/libressl/include/openssl/blowfish.h
@@ -0,0 +1,112 @@
+/* $OpenBSD: blowfish.h,v 1.14 2014/07/10 09:01:04 miod Exp $ */
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young (eay@cryptsoft.com)"
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+#ifndef HEADER_BLOWFISH_H
+#define HEADER_BLOWFISH_H
+
+#include <openssl/opensslconf.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef OPENSSL_NO_BF
+#error BF is disabled.
+#endif
+
+#define BF_ENCRYPT 1
+#define BF_DECRYPT 0
+
+/*
+ * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ * ! BF_LONG has to be at least 32 bits wide. If it's wider, then !
+ * ! BF_LONG_LOG2 has to be defined along. !
+ * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ */
+
+#define BF_LONG unsigned int
+
+#define BF_ROUNDS 16
+#define BF_BLOCK 8
+
+typedef struct bf_key_st
+ {
+ BF_LONG P[BF_ROUNDS+2];
+ BF_LONG S[4*256];
+ } BF_KEY;
+
+void BF_set_key(BF_KEY *key, int len, const unsigned char *data);
+
+void BF_encrypt(BF_LONG *data,const BF_KEY *key);
+void BF_decrypt(BF_LONG *data,const BF_KEY *key);
+
+void BF_ecb_encrypt(const unsigned char *in, unsigned char *out,
+ const BF_KEY *key, int enc);
+void BF_cbc_encrypt(const unsigned char *in, unsigned char *out, long length,
+ const BF_KEY *schedule, unsigned char *ivec, int enc);
+void BF_cfb64_encrypt(const unsigned char *in, unsigned char *out, long length,
+ const BF_KEY *schedule, unsigned char *ivec, int *num, int enc);
+void BF_ofb64_encrypt(const unsigned char *in, unsigned char *out, long length,
+ const BF_KEY *schedule, unsigned char *ivec, int *num);
+const char *BF_options(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/ext/libressl/include/openssl/chacha.h b/ext/libressl/include/openssl/chacha.h
new file mode 100644
index 0000000..e2345b2
--- /dev/null
+++ b/ext/libressl/include/openssl/chacha.h
@@ -0,0 +1,58 @@
+/* $OpenBSD: chacha.h,v 1.8 2019/01/22 00:59:21 dlg Exp $ */
+/*
+ * Copyright (c) 2014 Joel Sing <jsing@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef HEADER_CHACHA_H
+#define HEADER_CHACHA_H
+
+#include <openssl/opensslconf.h>
+
+#if defined(OPENSSL_NO_CHACHA)
+#error ChaCha is disabled.
+#endif
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+ unsigned int input[16];
+ unsigned char ks[64];
+ unsigned char unused;
+} ChaCha_ctx;
+
+void ChaCha_set_key(ChaCha_ctx *ctx, const unsigned char *key,
+ unsigned int keybits);
+void ChaCha_set_iv(ChaCha_ctx *ctx, const unsigned char *iv,
+ const unsigned char *counter);
+void ChaCha(ChaCha_ctx *ctx, unsigned char *out, const unsigned char *in,
+ size_t len);
+
+void CRYPTO_chacha_20(unsigned char *out, const unsigned char *in, size_t len,
+ const unsigned char key[32], const unsigned char iv[8], uint64_t counter);
+void CRYPTO_xchacha_20(unsigned char *out, const unsigned char *in, size_t len,
+ const unsigned char key[32], const unsigned char iv[24]);
+void CRYPTO_hchacha_20(unsigned char out[32],
+ const unsigned char key[32], const unsigned char iv[16]);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HEADER_CHACHA_H */
diff --git a/ext/libressl/include/openssl/crypto.h b/ext/libressl/include/openssl/crypto.h
new file mode 100644
index 0000000..46cc836
--- /dev/null
+++ b/ext/libressl/include/openssl/crypto.h
@@ -0,0 +1,12 @@
+#include <unistd.h>
+
+#include <openssl/opensslconf.h>
+
+static inline void
+OpenSSLDie(const char *file, int line, const char *assertion)
+{
+ _exit(1);
+}
+/* die if we have to */
+void OpenSSLDie(const char *file, int line, const char *assertion);
+#define OPENSSL_assert(e) (void)((e) ? 0 : (OpenSSLDie(__FILE__, __LINE__, #e),1))
diff --git a/ext/libressl/include/openssl/curve25519.h b/ext/libressl/include/openssl/curve25519.h
new file mode 100644
index 0000000..0470e8e
--- /dev/null
+++ b/ext/libressl/include/openssl/curve25519.h
@@ -0,0 +1,77 @@
+/* $OpenBSD: curve25519.h,v 1.3 2019/05/11 15:55:52 tb Exp $ */
+/*
+ * Copyright (c) 2015, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef HEADER_CURVE25519_H
+#define HEADER_CURVE25519_H
+
+#include <stdint.h>
+
+#include <openssl/opensslconf.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * Curve25519.
+ *
+ * Curve25519 is an elliptic curve. See https://tools.ietf.org/html/rfc7748.
+ */
+
+/*
+ * X25519.
+ *
+ * X25519 is the Diffie-Hellman primitive built from curve25519. It is
+ * sometimes referred to as curve25519, but X25519 is a more precise name.
+ * See http://cr.yp.to/ecdh.html and https://tools.ietf.org/html/rfc7748.
+ */
+
+#define X25519_KEY_LENGTH 32
+
+/*
+ * X25519_keypair sets |out_public_value| and |out_private_key| to a freshly
+ * generated, public/private key pair.
+ */
+void X25519_keypair(uint8_t out_public_value[X25519_KEY_LENGTH],
+ uint8_t out_private_key[X25519_KEY_LENGTH]);
+
+/*
+ * X25519 writes a shared key to |out_shared_key| that is calculated from the
+ * given private key and the peer's public value. It returns one on success and
+ * zero on error.
+ *
+ * Don't use the shared key directly, rather use a KDF and also include the two
+ * public values as inputs.
+ */
+int X25519(uint8_t out_shared_key[X25519_KEY_LENGTH],
+ const uint8_t private_key[X25519_KEY_LENGTH],
+ const uint8_t peers_public_value[X25519_KEY_LENGTH]);
+
+/*
+* ED25519
+*/
+void ED25519_keypair(uint8_t out_public_key[32], uint8_t out_private_key[64]);
+int ED25519_sign(uint8_t *out_sig, const uint8_t *message, size_t message_len,
+ const uint8_t private_key[64]);
+int ED25519_verify(const uint8_t *message, size_t message_len,
+ const uint8_t signature[64], const uint8_t public_key[32]);
+
+#if defined(__cplusplus)
+} /* extern C */
+#endif
+
+#endif /* HEADER_CURVE25519_H */
diff --git a/ext/libressl/include/openssl/modes.h b/ext/libressl/include/openssl/modes.h
new file mode 100644
index 0000000..67ec751
--- /dev/null
+++ b/ext/libressl/include/openssl/modes.h
@@ -0,0 +1,144 @@
+/* $OpenBSD: modes.h,v 1.3 2018/07/24 10:47:19 bcook Exp $ */
+/* ====================================================================
+ * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
+ *
+ * Rights for redistribution and usage in source and binary
+ * forms are granted according to the OpenSSL license.
+ */
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void (*block128_f)(const unsigned char in[16],
+ unsigned char out[16],
+ const void *key);
+
+typedef void (*cbc128_f)(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], int enc);
+
+typedef void (*ctr128_f)(const unsigned char *in, unsigned char *out,
+ size_t blocks, const void *key,
+ const unsigned char ivec[16]);
+
+typedef void (*ccm128_f)(const unsigned char *in, unsigned char *out,
+ size_t blocks, const void *key,
+ const unsigned char ivec[16],unsigned char cmac[16]);
+
+void CRYPTO_cbc128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], block128_f block);
+void CRYPTO_cbc128_decrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], block128_f block);
+
+void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], unsigned char ecount_buf[16],
+ unsigned int *num, block128_f block);
+
+void CRYPTO_ctr128_encrypt_ctr32(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], unsigned char ecount_buf[16],
+ unsigned int *num, ctr128_f ctr);
+
+void CRYPTO_ofb128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], int *num,
+ block128_f block);
+
+void CRYPTO_cfb128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], int *num,
+ int enc, block128_f block);
+void CRYPTO_cfb128_8_encrypt(const unsigned char *in, unsigned char *out,
+ size_t length, const void *key,
+ unsigned char ivec[16], int *num,
+ int enc, block128_f block);
+void CRYPTO_cfb128_1_encrypt(const unsigned char *in, unsigned char *out,
+ size_t bits, const void *key,
+ unsigned char ivec[16], int *num,
+ int enc, block128_f block);
+
+size_t CRYPTO_cts128_encrypt_block(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], block128_f block);
+size_t CRYPTO_cts128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], cbc128_f cbc);
+size_t CRYPTO_cts128_decrypt_block(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], block128_f block);
+size_t CRYPTO_cts128_decrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], cbc128_f cbc);
+
+size_t CRYPTO_nistcts128_encrypt_block(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], block128_f block);
+size_t CRYPTO_nistcts128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], cbc128_f cbc);
+size_t CRYPTO_nistcts128_decrypt_block(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], block128_f block);
+size_t CRYPTO_nistcts128_decrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], cbc128_f cbc);
+
+typedef struct gcm128_context GCM128_CONTEXT;
+
+GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block);
+void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block);
+void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
+ size_t len);
+int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
+ size_t len);
+int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
+ const unsigned char *in, unsigned char *out,
+ size_t len);
+int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
+ const unsigned char *in, unsigned char *out,
+ size_t len);
+int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
+ const unsigned char *in, unsigned char *out,
+ size_t len, ctr128_f stream);
+int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
+ const unsigned char *in, unsigned char *out,
+ size_t len, ctr128_f stream);
+int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
+ size_t len);
+void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len);
+void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx);
+
+typedef struct ccm128_context CCM128_CONTEXT;
+
+void CRYPTO_ccm128_init(CCM128_CONTEXT *ctx,
+ unsigned int M, unsigned int L, void *key,block128_f block);
+int CRYPTO_ccm128_setiv(CCM128_CONTEXT *ctx,
+ const unsigned char *nonce, size_t nlen, size_t mlen);
+void CRYPTO_ccm128_aad(CCM128_CONTEXT *ctx,
+ const unsigned char *aad, size_t alen);
+int CRYPTO_ccm128_encrypt(CCM128_CONTEXT *ctx,
+ const unsigned char *inp, unsigned char *out, size_t len);
+int CRYPTO_ccm128_decrypt(CCM128_CONTEXT *ctx,
+ const unsigned char *inp, unsigned char *out, size_t len);
+int CRYPTO_ccm128_encrypt_ccm64(CCM128_CONTEXT *ctx,
+ const unsigned char *inp, unsigned char *out, size_t len,
+ ccm128_f stream);
+int CRYPTO_ccm128_decrypt_ccm64(CCM128_CONTEXT *ctx,
+ const unsigned char *inp, unsigned char *out, size_t len,
+ ccm128_f stream);
+size_t CRYPTO_ccm128_tag(CCM128_CONTEXT *ctx, unsigned char *tag, size_t len);
+
+typedef struct xts128_context XTS128_CONTEXT;
+
+int CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
+ const unsigned char *inp, unsigned char *out, size_t len, int enc);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/ext/libressl/include/openssl/opensslconf.h b/ext/libressl/include/openssl/opensslconf.h
new file mode 100644
index 0000000..bb71768
--- /dev/null
+++ b/ext/libressl/include/openssl/opensslconf.h
@@ -0,0 +1,153 @@
+#include <openssl/opensslfeatures.h>
+/* crypto/opensslconf.h.in */
+
+#if defined(_MSC_VER) && !defined(__attribute__)
+#define __attribute__(a)
+#endif
+
+#if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR)
+#define OPENSSLDIR "/etc/ssl"
+#endif
+
+#undef OPENSSL_UNISTD
+#define OPENSSL_UNISTD <unistd.h>
+
+#undef OPENSSL_EXPORT_VAR_AS_FUNCTION
+
+#if defined(HEADER_IDEA_H) && !defined(IDEA_INT)
+#define IDEA_INT unsigned int
+#endif
+
+#if defined(HEADER_MD2_H) && !defined(MD2_INT)
+#define MD2_INT unsigned int
+#endif
+
+#if defined(HEADER_RC2_H) && !defined(RC2_INT)
+/* I need to put in a mod for the alpha - eay */
+#define RC2_INT unsigned int
+#endif
+
+#if defined(HEADER_RC4_H)
+#if !defined(RC4_INT)
+/* using int types make the structure larger but make the code faster
+ * on most boxes I have tested - up to %20 faster. */
+/*
+ * I don't know what does "most" mean, but declaring "int" is a must on:
+ * - Intel P6 because partial register stalls are very expensive;
+ * - elder Alpha because it lacks byte load/store instructions;
+ */
+#define RC4_INT unsigned int
+#endif
+#if !defined(RC4_CHUNK)
+/*
+ * This enables code handling data aligned at natural CPU word
+ * boundary. See crypto/rc4/rc4_enc.c for further details.
+ */
+#define RC4_CHUNK unsigned long
+#endif
+#endif
+
+#if (defined(HEADER_NEW_DES_H) || defined(HEADER_DES_H)) && !defined(DES_LONG)
+/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a
+ * %20 speed up (longs are 8 bytes, int's are 4). */
+#ifndef DES_LONG
+#define DES_LONG unsigned int
+#endif
+#endif
+
+#if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H)
+#define CONFIG_HEADER_BN_H
+#undef BN_LLONG
+
+/* Should we define BN_DIV2W here? */
+
+/* Only one for the following should be defined */
+#define SIXTY_FOUR_BIT_LONG
+#undef SIXTY_FOUR_BIT
+#undef THIRTY_TWO_BIT
+#endif
+
+#if defined(HEADER_RC4_LOCL_H) && !defined(CONFIG_HEADER_RC4_LOCL_H)
+#define CONFIG_HEADER_RC4_LOCL_H
+/* if this is defined data[i] is used instead of *data, this is a %20
+ * speedup on x86 */
+#undef RC4_INDEX
+#endif
+
+#if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H)
+#define CONFIG_HEADER_BF_LOCL_H
+#undef BF_PTR
+#endif /* HEADER_BF_LOCL_H */
+
+#if defined(HEADER_DES_LOCL_H) && !defined(CONFIG_HEADER_DES_LOCL_H)
+#define CONFIG_HEADER_DES_LOCL_H
+#ifndef DES_DEFAULT_OPTIONS
+/* the following is tweaked from a config script, that is why it is a
+ * protected undef/define */
+#ifndef DES_PTR
+#undef DES_PTR
+#endif
+
+/* This helps C compiler generate the correct code for multiple functional
+ * units. It reduces register dependancies at the expense of 2 more
+ * registers */
+#ifndef DES_RISC1
+#undef DES_RISC1
+#endif
+
+#ifndef DES_RISC2
+#undef DES_RISC2
+#endif
+
+#if defined(DES_RISC1) && defined(DES_RISC2)
+YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!!
+#endif
+
+/* Unroll the inner loop, this sometimes helps, sometimes hinders.
+ * Very mucy CPU dependant */
+#ifndef DES_UNROLL
+#define DES_UNROLL
+#endif
+
+/* These default values were supplied by
+ * Peter Gutman <pgut001@cs.auckland.ac.nz>
+ * They are only used if nothing else has been defined */
+#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL)
+/* Special defines which change the way the code is built depending on the
+ CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find
+ even newer MIPS CPU's, but at the moment one size fits all for
+ optimization options. Older Sparc's work better with only UNROLL, but
+ there's no way to tell at compile time what it is you're running on */
+
+#if defined( sun ) /* Newer Sparc's */
+# define DES_PTR
+# define DES_RISC1
+# define DES_UNROLL
+#elif defined( __ultrix ) /* Older MIPS */
+# define DES_PTR
+# define DES_RISC2
+# define DES_UNROLL
+#elif defined( __osf1__ ) /* Alpha */
+# define DES_PTR
+# define DES_RISC2
+#elif defined ( _AIX ) /* RS6000 */
+ /* Unknown */
+#elif defined( __hpux ) /* HP-PA */
+ /* Unknown */
+#elif defined( __aux ) /* 68K */
+ /* Unknown */
+#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */
+# define DES_UNROLL
+#elif defined( __sgi ) /* Newer MIPS */
+# define DES_PTR
+# define DES_RISC2
+# define DES_UNROLL
+#elif defined(i386) || defined(__i386__) /* x86 boxes, should be gcc */
+# define DES_PTR
+# define DES_RISC1
+# define DES_UNROLL
+#endif /* Systems-specific speed defines */
+#endif
+
+#endif /* DES_DEFAULT_OPTIONS */
+#endif /* HEADER_DES_LOCL_H */
diff --git a/ext/libressl/include/openssl/opensslfeatures.h b/ext/libressl/include/openssl/opensslfeatures.h
new file mode 100644
index 0000000..ba80520
--- /dev/null
+++ b/ext/libressl/include/openssl/opensslfeatures.h
@@ -0,0 +1,120 @@
+/*
+ * Feature flags for LibreSSL... so you can actually tell when things
+ * are enabled, rather than not being able to tell when things are
+ * enabled (or possibly not yet not implemented, or removed!).
+ */
+#define LIBRESSL_HAS_TLS1_3
+#define LIBRESSL_HAS_DTLS1_2
+
+#define OPENSSL_THREADS
+
+#define OPENSSL_NO_BUF_FREELISTS
+#define OPENSSL_NO_GMP
+#define OPENSSL_NO_JPAKE
+#define OPENSSL_NO_KRB5
+#define OPENSSL_NO_RSAX
+#define OPENSSL_NO_SHA0
+#define OPENSSL_NO_SSL2
+#define OPENSSL_NO_STORE
+
+/*
+ * OPENSSL_NO_* flags that currently appear in OpenSSL.
+ */
+
+/* #define OPENSSL_NO_AFALGENG */
+/* #define OPENSSL_NO_ALGORITHMS */
+/* #define OPENSSL_NO_ARIA */
+/* #define OPENSSL_NO_ASM */
+#define OPENSSL_NO_ASYNC
+/* #define OPENSSL_NO_AUTOALGINIT */
+/* #define OPENSSL_NO_AUTOERRINIT */
+/* #define OPENSSL_NO_AUTOLOAD_CONFIG */
+/* #define OPENSSL_NO_BF */
+/* #define OPENSSL_NO_BLAKE2 */
+/* #define OPENSSL_NO_CAMELLIA */
+/* #define OPENSSL_NO_CAPIENG */
+/* #define OPENSSL_NO_CAST */
+/* #define OPENSSL_NO_CHACHA */
+/* #define OPENSSL_NO_CMAC */
+/* #define OPENSSL_NO_CMS */
+#define OPENSSL_NO_COMP /* XXX */
+/* #define OPENSSL_NO_CRYPTO_MDEBUG */
+/* #define OPENSSL_NO_CRYPTO_MDEBUG_BACKTRACE */
+/* #define OPENSSL_NO_CT */
+/* #define OPENSSL_NO_DECC_INIT */
+/* #define OPENSSL_NO_DES */
+/* #define OPENSSL_NO_DEVCRYPTOENG */
+/* #define OPENSSL_NO_DGRAM */
+/* #define OPENSSL_NO_DH */
+/* #define OPENSSL_NO_DSA */
+/* #define OPENSSL_NO_DSO */
+/* #define OPENSSL_NO_DTLS */
+/* #define OPENSSL_NO_DTLS1 */
+/* #define OPENSSL_NO_DTLS1_2 */
+/* #define OPENSSL_NO_DTLS1_2_METHOD */
+/* #define OPENSSL_NO_DTLS1_METHOD */
+#define OPENSSL_NO_DYNAMIC_ENGINE
+/* #define OPENSSL_NO_EC */
+/* #define OPENSSL_NO_EC2M */
+#define OPENSSL_NO_EC_NISTP_64_GCC_128
+#define OPENSSL_NO_EGD
+/* #define OPENSSL_NO_ENGINE */
+/* #define OPENSSL_NO_ERR */
+/* #define OPENSSL_NO_FUZZ_AFL */
+/* #define OPENSSL_NO_FUZZ_LIBFUZZER */
+/* #define OPENSSL_NO_GOST */
+#define OPENSSL_NO_HEARTBEATS
+/* #define OPENSSL_NO_HW */
+/* #define OPENSSL_NO_HW_PADLOCK */
+/* #define OPENSSL_NO_IDEA */
+/* #define OPENSSL_NO_INLINE_ASM */
+#define OPENSSL_NO_MD2
+/* #define OPENSSL_NO_MD4 */
+/* #define OPENSSL_NO_MD5 */
+#define OPENSSL_NO_MDC2
+/* #define OPENSSL_NO_MULTIBLOCK */
+/* #define OPENSSL_NO_NEXTPROTONEG */
+/* #define OPENSSL_NO_OCB */
+/* #define OPENSSL_NO_OCSP */
+/* #define OPENSSL_NO_PINSHARED */
+/* #define OPENSSL_NO_POLY1305 */
+/* #define OPENSSL_NO_POSIX_IO */
+#define OPENSSL_NO_PSK
+/* #define OPENSSL_NO_RC2 */
+/* #define OPENSSL_NO_RC4 */
+#define OPENSSL_NO_RC5
+/* #define OPENSSL_NO_RDRAND */
+#define OPENSSL_NO_RFC3779
+/* #define OPENSSL_NO_RMD160 */
+/* #define OPENSSL_NO_RSA */
+/* #define OPENSSL_NO_SCRYPT */
+#define OPENSSL_NO_SCTP
+/* #define OPENSSL_NO_SECURE_MEMORY */
+#define OPENSSL_NO_SEED
+/* #define OPENSSL_NO_SIPHASH */
+/* #define OPENSSL_NO_SM2 */
+/* #define OPENSSL_NO_SM3 */
+/* #define OPENSSL_NO_SM4 */
+/* #define OPENSSL_NO_SOCK */
+#define OPENSSL_NO_SRP
+/* #define OPENSSL_NO_SRTP */
+#define OPENSSL_NO_SSL3
+#define OPENSSL_NO_SSL3_METHOD
+#define OPENSSL_NO_SSL_TRACE
+/* #define OPENSSL_NO_STATIC_ENGINE */
+/* #define OPENSSL_NO_STDIO */
+/* #define OPENSSL_NO_TLS */
+/* #define OPENSSL_NO_TLS1 */
+/* #define OPENSSL_NO_TLS1_1 */
+/* #define OPENSSL_NO_TLS1_1_METHOD */
+/* #define OPENSSL_NO_TLS1_2 */
+/* #define OPENSSL_NO_TLS1_2_METHOD */
+#ifndef LIBRESSL_HAS_TLS1_3
+#define OPENSSL_NO_TLS1_3
+#endif
+/* #define OPENSSL_NO_TLS1_METHOD */
+/* #define OPENSSL_NO_TS */
+/* #define OPENSSL_NO_UI_CONSOLE */
+/* #define OPENSSL_NO_UNIT_TEST */
+/* #define OPENSSL_NO_WEAK_SSL_CIPHERS */
+/* #define OPENSSL_NO_WHIRLPOOL */
diff --git a/ext/libressl/include/openssl/opensslv.h b/ext/libressl/include/openssl/opensslv.h
new file mode 100644
index 0000000..e06b97e
--- /dev/null
+++ b/ext/libressl/include/openssl/opensslv.h
@@ -0,0 +1,18 @@
+/* $OpenBSD: opensslv.h,v 1.66 2021/09/15 17:14:26 tb Exp $ */
+#ifndef HEADER_OPENSSLV_H
+#define HEADER_OPENSSLV_H
+
+/* These will change with each release of LibreSSL-portable */
+#define LIBRESSL_VERSION_NUMBER 0x3040200fL
+/* ^ Patch starts here */
+#define LIBRESSL_VERSION_TEXT "LibreSSL 3.4.2"
+
+/* These will never change */
+#define OPENSSL_VERSION_NUMBER 0x20000000L
+#define OPENSSL_VERSION_TEXT LIBRESSL_VERSION_TEXT
+#define OPENSSL_VERSION_PTEXT " part of " OPENSSL_VERSION_TEXT
+
+#define SHLIB_VERSION_HISTORY ""
+#define SHLIB_VERSION_NUMBER "1.0.0"
+
+#endif /* HEADER_OPENSSLV_H */
diff --git a/ext/libressl/include/openssl/poly1305.h b/ext/libressl/include/openssl/poly1305.h
new file mode 100644
index 0000000..00ab0bf
--- /dev/null
+++ b/ext/libressl/include/openssl/poly1305.h
@@ -0,0 +1,49 @@
+/* $OpenBSD: poly1305.h,v 1.3 2014/07/25 14:04:51 jsing Exp $ */
+/*
+ * Copyright (c) 2014 Joel Sing <jsing@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef HEADER_POLY1305_H
+#define HEADER_POLY1305_H
+
+#include <openssl/opensslconf.h>
+
+#if defined(OPENSSL_NO_POLY1305)
+#error Poly1305 is disabled.
+#endif
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct poly1305_context {
+ size_t aligner;
+ unsigned char opaque[136];
+} poly1305_context;
+
+typedef struct poly1305_context poly1305_state;
+
+void CRYPTO_poly1305_init(poly1305_context *ctx, const unsigned char key[32]);
+void CRYPTO_poly1305_update(poly1305_context *ctx, const unsigned char *in,
+ size_t len);
+void CRYPTO_poly1305_finish(poly1305_context *ctx, unsigned char mac[16]);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HEADER_POLY1305_H */
diff --git a/ext/libressl/include/openssl/sha.h b/ext/libressl/include/openssl/sha.h
new file mode 100644
index 0000000..87fdf8d
--- /dev/null
+++ b/ext/libressl/include/openssl/sha.h
@@ -0,0 +1,192 @@
+/* $OpenBSD: sha.h,v 1.21 2015/09/13 21:09:56 doug Exp $ */
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young (eay@cryptsoft.com)"
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+#include <stddef.h>
+
+#ifndef HEADER_SHA_H
+#define HEADER_SHA_H
+#if !defined(HAVE_ATTRIBUTE__BOUNDED__) && !defined(__OpenBSD__)
+#define __bounded__(x, y, z)
+#endif
+
+#include <openssl/opensslconf.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(OPENSSL_NO_SHA) || defined(OPENSSL_NO_SHA1)
+#error SHA is disabled.
+#endif
+
+/*
+ * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ * ! SHA_LONG has to be at least 32 bits wide. !
+ * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ */
+
+#define SHA_LONG unsigned int
+
+#define SHA_LBLOCK 16
+#define SHA_CBLOCK (SHA_LBLOCK*4) /* SHA treats input data as a
+ * contiguous array of 32 bit
+ * wide big-endian values. */
+#define SHA_LAST_BLOCK (SHA_CBLOCK-8)
+#define SHA_DIGEST_LENGTH 20
+
+typedef struct SHAstate_st
+ {
+ SHA_LONG h0,h1,h2,h3,h4;
+ SHA_LONG Nl,Nh;
+ SHA_LONG data[SHA_LBLOCK];
+ unsigned int num;
+ } SHA_CTX;
+
+#ifndef OPENSSL_NO_SHA1
+int SHA1_Init(SHA_CTX *c);
+int SHA1_Update(SHA_CTX *c, const void *data, size_t len)
+ __attribute__ ((__bounded__(__buffer__,2,3)));
+int SHA1_Final(unsigned char *md, SHA_CTX *c);
+unsigned char *SHA1(const unsigned char *d, size_t n, unsigned char *md)
+ __attribute__ ((__bounded__(__buffer__,1,2)));
+void SHA1_Transform(SHA_CTX *c, const unsigned char *data);
+#endif
+
+#define SHA256_CBLOCK (SHA_LBLOCK*4) /* SHA-256 treats input data as a
+ * contiguous array of 32 bit
+ * wide big-endian values. */
+#define SHA224_DIGEST_LENGTH 28
+#define SHA256_DIGEST_LENGTH 32
+
+typedef struct SHA256state_st
+ {
+ SHA_LONG h[8];
+ SHA_LONG Nl,Nh;
+ SHA_LONG data[SHA_LBLOCK];
+ unsigned int num,md_len;
+ } SHA256_CTX;
+
+#ifndef OPENSSL_NO_SHA256
+int SHA224_Init(SHA256_CTX *c);
+int SHA224_Update(SHA256_CTX *c, const void *data, size_t len)
+ __attribute__ ((__bounded__(__buffer__,2,3)));
+int SHA224_Final(unsigned char *md, SHA256_CTX *c);
+unsigned char *SHA224(const unsigned char *d, size_t n,unsigned char *md)
+ __attribute__ ((__bounded__(__buffer__,1,2)));
+int SHA256_Init(SHA256_CTX *c);
+int SHA256_Update(SHA256_CTX *c, const void *data, size_t len)
+ __attribute__ ((__bounded__(__buffer__,2,3)));
+int SHA256_Final(unsigned char *md, SHA256_CTX *c);
+unsigned char *SHA256(const unsigned char *d, size_t n,unsigned char *md)
+ __attribute__ ((__bounded__(__buffer__,1,2)));
+void SHA256_Transform(SHA256_CTX *c, const unsigned char *data);
+#endif
+
+#define SHA384_DIGEST_LENGTH 48
+#define SHA512_DIGEST_LENGTH 64
+
+#ifndef OPENSSL_NO_SHA512
+/*
+ * Unlike 32-bit digest algorithms, SHA-512 *relies* on SHA_LONG64
+ * being exactly 64-bit wide. See Implementation Notes in sha512.c
+ * for further details.
+ */
+#define SHA512_CBLOCK (SHA_LBLOCK*8) /* SHA-512 treats input data as a
+ * contiguous array of 64 bit
+ * wide big-endian values. */
+#if defined(_LP64)
+#define SHA_LONG64 unsigned long
+#define U64(C) C##UL
+#else
+#define SHA_LONG64 unsigned long long
+#define U64(C) C##ULL
+#endif
+
+typedef struct SHA512state_st
+ {
+ SHA_LONG64 h[8];
+ SHA_LONG64 Nl,Nh;
+ union {
+ SHA_LONG64 d[SHA_LBLOCK];
+ unsigned char p[SHA512_CBLOCK];
+ } u;
+ unsigned int num,md_len;
+ } SHA512_CTX;
+#endif
+
+#ifndef OPENSSL_NO_SHA512
+int SHA384_Init(SHA512_CTX *c);
+int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
+ __attribute__ ((__bounded__(__buffer__,2,3)));
+int SHA384_Final(unsigned char *md, SHA512_CTX *c);
+unsigned char *SHA384(const unsigned char *d, size_t n,unsigned char *md)
+ __attribute__ ((__bounded__(__buffer__,1,2)));
+int SHA512_Init(SHA512_CTX *c);
+int SHA512_Update(SHA512_CTX *c, const void *data, size_t len)
+ __attribute__ ((__bounded__(__buffer__,2,3)));
+int SHA512_Final(unsigned char *md, SHA512_CTX *c);
+unsigned char *SHA512(const unsigned char *d, size_t n,unsigned char *md)
+ __attribute__ ((__bounded__(__buffer__,1,2)));
+void SHA512_Transform(SHA512_CTX *c, const unsigned char *data);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/ext/libressl/ssl_common.mk b/ext/libressl/ssl_common.mk
new file mode 100644
index 0000000..fab0710
--- /dev/null
+++ b/ext/libressl/ssl_common.mk
@@ -0,0 +1,7 @@
+-include common.mk
+
+CFLAGS += -I../../include/compat -I../../include
+# exclude from include/compat/string.h
+CFLAGS += -DHAVE_STRCASECMP -DHAVE_STRLCPY -DHAVE_STRLCAT -DHAVE_STRNDUP -DHAVE_STRNLEN -DHAVE_STRSEP -DHAVE_MEMMEM
+# exclude from include/compat/unistd.h
+CFLAGS += -DHAVE_GETPAGESIZE -DHAVE_PIPE2
diff --git a/ext/libressl/ssl_obj.mk b/ext/libressl/ssl_obj.mk
new file mode 100644
index 0000000..1eef355
--- /dev/null
+++ b/ext/libressl/ssl_obj.mk
@@ -0,0 +1,8 @@
+obj_dep_ = curve25519/curve25519.o curve25519/curve25519-generic.o \
+ chacha/chacha.o poly1305/poly1305.o aead/e_chacha20poly1305.o \
+ sha/sha256.o sha/sha512.o \
+ compat/arc4random.o compat/explicit_bzero.o compat/timingsafe_memcmp.o compat/timingsafe_bcmp.o
+obj_dep = $(addprefix crypto/,$(obj_dep_))
+
+subdirs_ = curve25519 chacha poly1305 aead sha aes bf modes compat
+subdirs = $(addprefix crypto/,$(subdirs_))
diff --git a/ext/sha/Makefile b/ext/sha/Makefile
new file mode 100644
index 0000000..ee67f3e
--- /dev/null
+++ b/ext/sha/Makefile
@@ -0,0 +1,15 @@
+include common.mk
+
+obj = sha1.o
+
+
+%.o: %.c %.h
+ $(CC) $(CFLAGS) -c $<
+
+%.o: %.S
+ $(CC) $(CFLAGS) -c $<
+
+all: $(obj)
+
+clean:
+ rm -f *.o
diff --git a/ext/sha/sha1.c b/ext/sha/sha1.c
new file mode 100644
index 0000000..9e5ff68
--- /dev/null
+++ b/ext/sha/sha1.c
@@ -0,0 +1,294 @@
+/*
+SHA-1 in C
+By Steve Reid <steve@edmweb.com>
+100% Public Domain
+
+Test Vectors (from FIPS PUB 180-1)
+"abc"
+ A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D
+"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
+ 84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1
+A million repetitions of "a"
+ 34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F
+*/
+
+/* #define LITTLE_ENDIAN * This should be #define'd already, if true. */
+/* #define SHA1HANDSOFF * Copies data before messing with it. */
+
+#define SHA1HANDSOFF
+
+#include <stdio.h>
+#include <string.h>
+
+/* for uint32_t */
+#include <stdint.h>
+
+#include "sha1.h"
+
+
+#define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits))))
+
+/* blk0() and blk() perform the initial expand. */
+/* I got the idea of expanding during the round function from SSLeay */
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) \
+ |(rol(block->l[i],8)&0x00FF00FF))
+#elif BYTE_ORDER == BIG_ENDIAN
+#define blk0(i) block->l[i]
+#else
+#error "Endianness not defined!"
+#endif
+#define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] \
+ ^block->l[(i+2)&15]^block->l[i&15],1))
+
+/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
+#define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5);w=rol(w,30);
+#define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=rol(w,30);
+#define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30);
+#define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30);
+#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30);
+
+
+/* Hash a single 512-bit block. This is the core of the algorithm. */
+
+void SHA1Transform(
+ uint32_t state[5],
+ const unsigned char buffer[64]
+)
+{
+ uint32_t a, b, c, d, e;
+
+ typedef union
+ {
+ unsigned char c[64];
+ uint32_t l[16];
+ } CHAR64LONG16;
+
+#ifdef SHA1HANDSOFF
+ CHAR64LONG16 block[1]; /* use array to appear as a pointer */
+
+ memcpy(block, buffer, 64);
+#else
+ /* The following had better never be used because it causes the
+ * pointer-to-const buffer to be cast into a pointer to non-const.
+ * And the result is written through. I threw a "const" in, hoping
+ * this will cause a diagnostic.
+ */
+ CHAR64LONG16 *block = (const CHAR64LONG16 *) buffer;
+#endif
+ /* Copy context->state[] to working vars */
+ a = state[0];
+ b = state[1];
+ c = state[2];
+ d = state[3];
+ e = state[4];
+ /* 4 rounds of 20 operations each. Loop unrolled. */
+ R0(a, b, c, d, e, 0);
+ R0(e, a, b, c, d, 1);
+ R0(d, e, a, b, c, 2);
+ R0(c, d, e, a, b, 3);
+ R0(b, c, d, e, a, 4);
+ R0(a, b, c, d, e, 5);
+ R0(e, a, b, c, d, 6);
+ R0(d, e, a, b, c, 7);
+ R0(c, d, e, a, b, 8);
+ R0(b, c, d, e, a, 9);
+ R0(a, b, c, d, e, 10);
+ R0(e, a, b, c, d, 11);
+ R0(d, e, a, b, c, 12);
+ R0(c, d, e, a, b, 13);
+ R0(b, c, d, e, a, 14);
+ R0(a, b, c, d, e, 15);
+ R1(e, a, b, c, d, 16);
+ R1(d, e, a, b, c, 17);
+ R1(c, d, e, a, b, 18);
+ R1(b, c, d, e, a, 19);
+ R2(a, b, c, d, e, 20);
+ R2(e, a, b, c, d, 21);
+ R2(d, e, a, b, c, 22);
+ R2(c, d, e, a, b, 23);
+ R2(b, c, d, e, a, 24);
+ R2(a, b, c, d, e, 25);
+ R2(e, a, b, c, d, 26);
+ R2(d, e, a, b, c, 27);
+ R2(c, d, e, a, b, 28);
+ R2(b, c, d, e, a, 29);
+ R2(a, b, c, d, e, 30);
+ R2(e, a, b, c, d, 31);
+ R2(d, e, a, b, c, 32);
+ R2(c, d, e, a, b, 33);
+ R2(b, c, d, e, a, 34);
+ R2(a, b, c, d, e, 35);
+ R2(e, a, b, c, d, 36);
+ R2(d, e, a, b, c, 37);
+ R2(c, d, e, a, b, 38);
+ R2(b, c, d, e, a, 39);
+ R3(a, b, c, d, e, 40);
+ R3(e, a, b, c, d, 41);
+ R3(d, e, a, b, c, 42);
+ R3(c, d, e, a, b, 43);
+ R3(b, c, d, e, a, 44);
+ R3(a, b, c, d, e, 45);
+ R3(e, a, b, c, d, 46);
+ R3(d, e, a, b, c, 47);
+ R3(c, d, e, a, b, 48);
+ R3(b, c, d, e, a, 49);
+ R3(a, b, c, d, e, 50);
+ R3(e, a, b, c, d, 51);
+ R3(d, e, a, b, c, 52);
+ R3(c, d, e, a, b, 53);
+ R3(b, c, d, e, a, 54);
+ R3(a, b, c, d, e, 55);
+ R3(e, a, b, c, d, 56);
+ R3(d, e, a, b, c, 57);
+ R3(c, d, e, a, b, 58);
+ R3(b, c, d, e, a, 59);
+ R4(a, b, c, d, e, 60);
+ R4(e, a, b, c, d, 61);
+ R4(d, e, a, b, c, 62);
+ R4(c, d, e, a, b, 63);
+ R4(b, c, d, e, a, 64);
+ R4(a, b, c, d, e, 65);
+ R4(e, a, b, c, d, 66);
+ R4(d, e, a, b, c, 67);
+ R4(c, d, e, a, b, 68);
+ R4(b, c, d, e, a, 69);
+ R4(a, b, c, d, e, 70);
+ R4(e, a, b, c, d, 71);
+ R4(d, e, a, b, c, 72);
+ R4(c, d, e, a, b, 73);
+ R4(b, c, d, e, a, 74);
+ R4(a, b, c, d, e, 75);
+ R4(e, a, b, c, d, 76);
+ R4(d, e, a, b, c, 77);
+ R4(c, d, e, a, b, 78);
+ R4(b, c, d, e, a, 79);
+ /* Add the working vars back into context.state[] */
+ state[0] += a;
+ state[1] += b;
+ state[2] += c;
+ state[3] += d;
+ state[4] += e;
+ /* Wipe variables */
+ a = b = c = d = e = 0;
+#ifdef SHA1HANDSOFF
+ memset(block, '\0', sizeof(block));
+#endif
+}
+
+
+/* SHA1Init - Initialize new context */
+
+void SHA1Init(
+ SHA1_CTX * context
+)
+{
+ /* SHA1 initialization constants */
+ context->state[0] = 0x67452301;
+ context->state[1] = 0xEFCDAB89;
+ context->state[2] = 0x98BADCFE;
+ context->state[3] = 0x10325476;
+ context->state[4] = 0xC3D2E1F0;
+ context->count[0] = context->count[1] = 0;
+}
+
+
+/* Run your data through this. */
+
+void SHA1Update(
+ SHA1_CTX * context,
+ const unsigned char *data,
+ uint32_t len
+)
+{
+ uint32_t i;
+
+ uint32_t j;
+
+ j = context->count[0];
+ if ((context->count[0] += len << 3) < j)
+ context->count[1]++;
+ context->count[1] += (len >> 29);
+ j = (j >> 3) & 63;
+ if ((j + len) > 63)
+ {
+ memcpy(&context->buffer[j], data, (i = 64 - j));
+ SHA1Transform(context->state, context->buffer);
+ for (; i + 63 < len; i += 64)
+ {
+ SHA1Transform(context->state, &data[i]);
+ }
+ j = 0;
+ }
+ else
+ i = 0;
+ memcpy(&context->buffer[j], &data[i], len - i);
+}
+
+
+/* Add padding and return the message digest. */
+
+void SHA1Final(
+ unsigned char digest[20],
+ SHA1_CTX * context
+)
+{
+ unsigned i;
+
+ unsigned char finalcount[8];
+
+ unsigned char c;
+
+#if 0 /* untested "improvement" by DHR */
+ /* Convert context->count to a sequence of bytes
+ * in finalcount. Second element first, but
+ * big-endian order within element.
+ * But we do it all backwards.
+ */
+ unsigned char *fcp = &finalcount[8];
+
+ for (i = 0; i < 2; i++)
+ {
+ uint32_t t = context->count[i];
+
+ int j;
+
+ for (j = 0; j < 4; t >>= 8, j++)
+ *--fcp = (unsigned char) t}
+#else
+ for (i = 0; i < 8; i++)
+ {
+ finalcount[i] = (unsigned char) ((context->count[(i >= 4 ? 0 : 1)] >> ((3 - (i & 3)) * 8)) & 255); /* Endian independent */
+ }
+#endif
+ c = 0200;
+ SHA1Update(context, &c, 1);
+ while ((context->count[0] & 504) != 448)
+ {
+ c = 0000;
+ SHA1Update(context, &c, 1);
+ }
+ SHA1Update(context, finalcount, 8); /* Should cause a SHA1Transform() */
+ for (i = 0; i < 20; i++)
+ {
+ digest[i] = (unsigned char)
+ ((context->state[i >> 2] >> ((3 - (i & 3)) * 8)) & 255);
+ }
+ /* Wipe variables */
+ memset(context, '\0', sizeof(*context));
+ memset(&finalcount, '\0', sizeof(finalcount));
+}
+
+void SHA1(
+ char *hash_out,
+ const char *str,
+ int len)
+{
+ SHA1_CTX ctx;
+ unsigned int ii;
+
+ SHA1Init(&ctx);
+ for (ii=0; ii<len; ii+=1)
+ SHA1Update(&ctx, (const unsigned char*)str + ii, 1);
+ SHA1Final((unsigned char *)hash_out, &ctx);
+}
diff --git a/ext/sha/sha1.h b/ext/sha/sha1.h
new file mode 100644
index 0000000..96bb008
--- /dev/null
+++ b/ext/sha/sha1.h
@@ -0,0 +1,44 @@
+#ifndef SHA1_H
+#define SHA1_H
+
+/*
+ SHA-1 in C
+ By Steve Reid <steve@edmweb.com>
+ 100% Public Domain
+ */
+
+#include "stdint.h"
+
+typedef struct
+{
+ uint32_t state[5];
+ uint32_t count[2];
+ unsigned char buffer[64];
+} SHA1_CTX;
+
+void SHA1Transform(
+ uint32_t state[5],
+ const unsigned char buffer[64]
+ );
+
+void SHA1Init(
+ SHA1_CTX * context
+ );
+
+void SHA1Update(
+ SHA1_CTX * context,
+ const unsigned char *data,
+ uint32_t len
+ );
+
+void SHA1Final(
+ unsigned char digest[20],
+ SHA1_CTX * context
+ );
+
+void SHA1(
+ char *hash_out,
+ const char *str,
+ int len);
+
+#endif /* SHA1_H */