summaryrefslogtreecommitdiff
path: root/ext/libressl/crypto/aes
diff options
context:
space:
mode:
Diffstat (limited to 'ext/libressl/crypto/aes')
-rw-r--r--ext/libressl/crypto/aes/Makefile14
-rw-r--r--ext/libressl/crypto/aes/aes-elf-armv4.S1074
-rw-r--r--ext/libressl/crypto/aes/aes-elf-x86_64.S2547
-rw-r--r--ext/libressl/crypto/aes/aes-macosx-x86_64.S2544
-rw-r--r--ext/libressl/crypto/aes/aes-masm-x86_64.S2948
-rw-r--r--ext/libressl/crypto/aes/aes-mingw64-x86_64.S2861
-rw-r--r--ext/libressl/crypto/aes/aes_cbc.c65
-rw-r--r--ext/libressl/crypto/aes/aes_cfb.c84
-rw-r--r--ext/libressl/crypto/aes/aes_core.c1374
-rw-r--r--ext/libressl/crypto/aes/aes_ctr.c62
-rw-r--r--ext/libressl/crypto/aes/aes_ecb.c69
-rw-r--r--ext/libressl/crypto/aes/aes_ige.c194
-rw-r--r--ext/libressl/crypto/aes/aes_locl.h83
-rw-r--r--ext/libressl/crypto/aes/aes_misc.c65
-rw-r--r--ext/libressl/crypto/aes/aes_ofb.c61
-rw-r--r--ext/libressl/crypto/aes/aes_wrap.c133
-rw-r--r--ext/libressl/crypto/aes/aesni-elf-x86_64.S2539
-rw-r--r--ext/libressl/crypto/aes/aesni-macosx-x86_64.S2536
-rw-r--r--ext/libressl/crypto/aes/aesni-masm-x86_64.S3099
-rw-r--r--ext/libressl/crypto/aes/aesni-mingw64-x86_64.S3008
-rw-r--r--ext/libressl/crypto/aes/aesni-sha1-elf-x86_64.S1401
-rw-r--r--ext/libressl/crypto/aes/aesni-sha1-macosx-x86_64.S1398
-rw-r--r--ext/libressl/crypto/aes/aesni-sha1-masm-x86_64.S1616
-rw-r--r--ext/libressl/crypto/aes/aesni-sha1-mingw64-x86_64.S1536
-rw-r--r--ext/libressl/crypto/aes/bsaes-elf-x86_64.S2502
-rw-r--r--ext/libressl/crypto/aes/bsaes-macosx-x86_64.S2499
-rw-r--r--ext/libressl/crypto/aes/bsaes-masm-x86_64.S2803
-rw-r--r--ext/libressl/crypto/aes/bsaes-mingw64-x86_64.S2725
-rw-r--r--ext/libressl/crypto/aes/vpaes-elf-x86_64.S832
-rw-r--r--ext/libressl/crypto/aes/vpaes-macosx-x86_64.S829
-rw-r--r--ext/libressl/crypto/aes/vpaes-masm-x86_64.S1213
-rw-r--r--ext/libressl/crypto/aes/vpaes-mingw64-x86_64.S1125
32 files changed, 45839 insertions, 0 deletions
diff --git a/ext/libressl/crypto/aes/Makefile b/ext/libressl/crypto/aes/Makefile
new file mode 100644
index 0000000..2b3c04c
--- /dev/null
+++ b/ext/libressl/crypto/aes/Makefile
@@ -0,0 +1,14 @@
+include ../../ssl_common.mk
+CFLAGS+= -D__BEGIN_HIDDEN_DECLS= -D__END_HIDDEN_DECLS=
+
+obj = aes_core.o aes_ecb.o aes_cbc.o aes_cfb.o aes_ctr.o aes_ige.o aes_ofb.o
+
+
+all: $(obj)
+dep: all
+
+%.o: %.c
+ $(CC) $(CFLAGS) -c $<
+
+clean:
+ rm -f *.o *.a
diff --git a/ext/libressl/crypto/aes/aes-elf-armv4.S b/ext/libressl/crypto/aes/aes-elf-armv4.S
new file mode 100644
index 0000000..8164b53
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes-elf-armv4.S
@@ -0,0 +1,1074 @@
+#include "arm_arch.h"
+.text
+.code 32
+
+.type AES_Te,%object
+.align 5
+AES_Te:
+.word 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
+.word 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
+.word 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
+.word 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
+.word 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
+.word 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
+.word 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
+.word 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
+.word 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
+.word 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
+.word 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
+.word 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
+.word 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
+.word 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
+.word 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
+.word 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
+.word 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
+.word 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
+.word 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
+.word 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
+.word 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
+.word 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
+.word 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
+.word 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
+.word 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
+.word 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
+.word 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
+.word 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
+.word 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
+.word 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
+.word 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
+.word 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
+.word 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
+.word 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
+.word 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
+.word 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
+.word 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
+.word 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
+.word 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
+.word 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
+.word 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
+.word 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
+.word 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
+.word 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
+.word 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
+.word 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
+.word 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
+.word 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
+.word 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
+.word 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
+.word 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
+.word 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
+.word 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
+.word 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
+.word 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
+.word 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
+.word 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
+.word 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
+.word 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
+.word 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
+.word 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
+.word 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
+.word 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
+.word 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
+@ Te4[256]
+.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
+.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
+.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
+.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
+.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
+.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
+.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
+.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
+.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
+.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
+.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
+.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
+.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
+.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
+.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
+.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
+.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
+.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
+.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
+.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
+.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
+.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
+.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
+.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
+.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
+.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
+.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
+.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
+.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
+.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
+.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
+.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+@ rcon[]
+.word 0x01000000, 0x02000000, 0x04000000, 0x08000000
+.word 0x10000000, 0x20000000, 0x40000000, 0x80000000
+.word 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
+.size AES_Te,.-AES_Te
+
+@ void AES_encrypt(const unsigned char *in, unsigned char *out,
+@ const AES_KEY *key) {
+.global AES_encrypt
+.type AES_encrypt,%function
+.align 5
+AES_encrypt:
+ sub r3,pc,#8 @ AES_encrypt
+ stmdb sp!,{r1,r4-r12,lr}
+ mov r12,r0 @ inp
+ mov r11,r2
+ sub r10,r3,#AES_encrypt-AES_Te @ Te
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+ ldrb r0,[r12,#3] @ load input data in endian-neutral
+ ldrb r4,[r12,#2] @ manner...
+ ldrb r5,[r12,#1]
+ ldrb r6,[r12,#0]
+ orr r0,r0,r4,lsl#8
+ ldrb r1,[r12,#7]
+ orr r0,r0,r5,lsl#16
+ ldrb r4,[r12,#6]
+ orr r0,r0,r6,lsl#24
+ ldrb r5,[r12,#5]
+ ldrb r6,[r12,#4]
+ orr r1,r1,r4,lsl#8
+ ldrb r2,[r12,#11]
+ orr r1,r1,r5,lsl#16
+ ldrb r4,[r12,#10]
+ orr r1,r1,r6,lsl#24
+ ldrb r5,[r12,#9]
+ ldrb r6,[r12,#8]
+ orr r2,r2,r4,lsl#8
+ ldrb r3,[r12,#15]
+ orr r2,r2,r5,lsl#16
+ ldrb r4,[r12,#14]
+ orr r2,r2,r6,lsl#24
+ ldrb r5,[r12,#13]
+ ldrb r6,[r12,#12]
+ orr r3,r3,r4,lsl#8
+ orr r3,r3,r5,lsl#16
+ orr r3,r3,r6,lsl#24
+#else
+ ldr r0,[r12,#0]
+ ldr r1,[r12,#4]
+ ldr r2,[r12,#8]
+ ldr r3,[r12,#12]
+#ifdef __ARMEL__
+ rev r0,r0
+ rev r1,r1
+ rev r2,r2
+ rev r3,r3
+#endif
+#endif
+ bl _armv4_AES_encrypt
+
+ ldr r12,[sp],#4 @ pop out
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+#ifdef __ARMEL__
+ rev r0,r0
+ rev r1,r1
+ rev r2,r2
+ rev r3,r3
+#endif
+ str r0,[r12,#0]
+ str r1,[r12,#4]
+ str r2,[r12,#8]
+ str r3,[r12,#12]
+#else
+ mov r4,r0,lsr#24 @ write output in endian-neutral
+ mov r5,r0,lsr#16 @ manner...
+ mov r6,r0,lsr#8
+ strb r4,[r12,#0]
+ strb r5,[r12,#1]
+ mov r4,r1,lsr#24
+ strb r6,[r12,#2]
+ mov r5,r1,lsr#16
+ strb r0,[r12,#3]
+ mov r6,r1,lsr#8
+ strb r4,[r12,#4]
+ strb r5,[r12,#5]
+ mov r4,r2,lsr#24
+ strb r6,[r12,#6]
+ mov r5,r2,lsr#16
+ strb r1,[r12,#7]
+ mov r6,r2,lsr#8
+ strb r4,[r12,#8]
+ strb r5,[r12,#9]
+ mov r4,r3,lsr#24
+ strb r6,[r12,#10]
+ mov r5,r3,lsr#16
+ strb r2,[r12,#11]
+ mov r6,r3,lsr#8
+ strb r4,[r12,#12]
+ strb r5,[r12,#13]
+ strb r6,[r12,#14]
+ strb r3,[r12,#15]
+#endif
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r12,pc}
+#else
+ ldmia sp!,{r4-r12,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
+.size AES_encrypt,.-AES_encrypt
+
+.type _armv4_AES_encrypt,%function
+.align 2
+_armv4_AES_encrypt:
+ str lr,[sp,#-4]! @ push lr
+ ldmia r11!,{r4-r7}
+ eor r0,r0,r4
+ ldr r12,[r11,#240-16]
+ eor r1,r1,r5
+ eor r2,r2,r6
+ eor r3,r3,r7
+ sub r12,r12,#1
+ mov lr,#255
+
+ and r7,lr,r0
+ and r8,lr,r0,lsr#8
+ and r9,lr,r0,lsr#16
+ mov r0,r0,lsr#24
+.Lenc_loop:
+ ldr r4,[r10,r7,lsl#2] @ Te3[s0>>0]
+ and r7,lr,r1,lsr#16 @ i0
+ ldr r5,[r10,r8,lsl#2] @ Te2[s0>>8]
+ and r8,lr,r1
+ ldr r6,[r10,r9,lsl#2] @ Te1[s0>>16]
+ and r9,lr,r1,lsr#8
+ ldr r0,[r10,r0,lsl#2] @ Te0[s0>>24]
+ mov r1,r1,lsr#24
+
+ ldr r7,[r10,r7,lsl#2] @ Te1[s1>>16]
+ ldr r8,[r10,r8,lsl#2] @ Te3[s1>>0]
+ ldr r9,[r10,r9,lsl#2] @ Te2[s1>>8]
+ eor r0,r0,r7,ror#8
+ ldr r1,[r10,r1,lsl#2] @ Te0[s1>>24]
+ and r7,lr,r2,lsr#8 @ i0
+ eor r5,r5,r8,ror#8
+ and r8,lr,r2,lsr#16 @ i1
+ eor r6,r6,r9,ror#8
+ and r9,lr,r2
+ ldr r7,[r10,r7,lsl#2] @ Te2[s2>>8]
+ eor r1,r1,r4,ror#24
+ ldr r8,[r10,r8,lsl#2] @ Te1[s2>>16]
+ mov r2,r2,lsr#24
+
+ ldr r9,[r10,r9,lsl#2] @ Te3[s2>>0]
+ eor r0,r0,r7,ror#16
+ ldr r2,[r10,r2,lsl#2] @ Te0[s2>>24]
+ and r7,lr,r3 @ i0
+ eor r1,r1,r8,ror#8
+ and r8,lr,r3,lsr#8 @ i1
+ eor r6,r6,r9,ror#16
+ and r9,lr,r3,lsr#16 @ i2
+ ldr r7,[r10,r7,lsl#2] @ Te3[s3>>0]
+ eor r2,r2,r5,ror#16
+ ldr r8,[r10,r8,lsl#2] @ Te2[s3>>8]
+ mov r3,r3,lsr#24
+
+ ldr r9,[r10,r9,lsl#2] @ Te1[s3>>16]
+ eor r0,r0,r7,ror#24
+ ldr r7,[r11],#16
+ eor r1,r1,r8,ror#16
+ ldr r3,[r10,r3,lsl#2] @ Te0[s3>>24]
+ eor r2,r2,r9,ror#8
+ ldr r4,[r11,#-12]
+ eor r3,r3,r6,ror#8
+
+ ldr r5,[r11,#-8]
+ eor r0,r0,r7
+ ldr r6,[r11,#-4]
+ and r7,lr,r0
+ eor r1,r1,r4
+ and r8,lr,r0,lsr#8
+ eor r2,r2,r5
+ and r9,lr,r0,lsr#16
+ eor r3,r3,r6
+ mov r0,r0,lsr#24
+
+ subs r12,r12,#1
+ bne .Lenc_loop
+
+ add r10,r10,#2
+
+ ldrb r4,[r10,r7,lsl#2] @ Te4[s0>>0]
+ and r7,lr,r1,lsr#16 @ i0
+ ldrb r5,[r10,r8,lsl#2] @ Te4[s0>>8]
+ and r8,lr,r1
+ ldrb r6,[r10,r9,lsl#2] @ Te4[s0>>16]
+ and r9,lr,r1,lsr#8
+ ldrb r0,[r10,r0,lsl#2] @ Te4[s0>>24]
+ mov r1,r1,lsr#24
+
+ ldrb r7,[r10,r7,lsl#2] @ Te4[s1>>16]
+ ldrb r8,[r10,r8,lsl#2] @ Te4[s1>>0]
+ ldrb r9,[r10,r9,lsl#2] @ Te4[s1>>8]
+ eor r0,r7,r0,lsl#8
+ ldrb r1,[r10,r1,lsl#2] @ Te4[s1>>24]
+ and r7,lr,r2,lsr#8 @ i0
+ eor r5,r8,r5,lsl#8
+ and r8,lr,r2,lsr#16 @ i1
+ eor r6,r9,r6,lsl#8
+ and r9,lr,r2
+ ldrb r7,[r10,r7,lsl#2] @ Te4[s2>>8]
+ eor r1,r4,r1,lsl#24
+ ldrb r8,[r10,r8,lsl#2] @ Te4[s2>>16]
+ mov r2,r2,lsr#24
+
+ ldrb r9,[r10,r9,lsl#2] @ Te4[s2>>0]
+ eor r0,r7,r0,lsl#8
+ ldrb r2,[r10,r2,lsl#2] @ Te4[s2>>24]
+ and r7,lr,r3 @ i0
+ eor r1,r1,r8,lsl#16
+ and r8,lr,r3,lsr#8 @ i1
+ eor r6,r9,r6,lsl#8
+ and r9,lr,r3,lsr#16 @ i2
+ ldrb r7,[r10,r7,lsl#2] @ Te4[s3>>0]
+ eor r2,r5,r2,lsl#24
+ ldrb r8,[r10,r8,lsl#2] @ Te4[s3>>8]
+ mov r3,r3,lsr#24
+
+ ldrb r9,[r10,r9,lsl#2] @ Te4[s3>>16]
+ eor r0,r7,r0,lsl#8
+ ldr r7,[r11,#0]
+ ldrb r3,[r10,r3,lsl#2] @ Te4[s3>>24]
+ eor r1,r1,r8,lsl#8
+ ldr r4,[r11,#4]
+ eor r2,r2,r9,lsl#16
+ ldr r5,[r11,#8]
+ eor r3,r6,r3,lsl#24
+ ldr r6,[r11,#12]
+
+ eor r0,r0,r7
+ eor r1,r1,r4
+ eor r2,r2,r5
+ eor r3,r3,r6
+
+ sub r10,r10,#2
+ ldr pc,[sp],#4 @ pop and return
+.size _armv4_AES_encrypt,.-_armv4_AES_encrypt
+
+.global AES_set_encrypt_key
+.type AES_set_encrypt_key,%function
+.align 5
+AES_set_encrypt_key:
+_armv4_AES_set_encrypt_key:
+ sub r3,pc,#8 @ AES_set_encrypt_key
+ teq r0,#0
+ moveq r0,#-1
+ beq .Labrt
+ teq r2,#0
+ moveq r0,#-1
+ beq .Labrt
+
+ teq r1,#128
+ beq .Lok
+ teq r1,#192
+ beq .Lok
+ teq r1,#256
+ movne r0,#-1
+ bne .Labrt
+
+.Lok: stmdb sp!,{r4-r12,lr}
+ sub r10,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4
+
+ mov r12,r0 @ inp
+ mov lr,r1 @ bits
+ mov r11,r2 @ key
+
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+ ldrb r0,[r12,#3] @ load input data in endian-neutral
+ ldrb r4,[r12,#2] @ manner...
+ ldrb r5,[r12,#1]
+ ldrb r6,[r12,#0]
+ orr r0,r0,r4,lsl#8
+ ldrb r1,[r12,#7]
+ orr r0,r0,r5,lsl#16
+ ldrb r4,[r12,#6]
+ orr r0,r0,r6,lsl#24
+ ldrb r5,[r12,#5]
+ ldrb r6,[r12,#4]
+ orr r1,r1,r4,lsl#8
+ ldrb r2,[r12,#11]
+ orr r1,r1,r5,lsl#16
+ ldrb r4,[r12,#10]
+ orr r1,r1,r6,lsl#24
+ ldrb r5,[r12,#9]
+ ldrb r6,[r12,#8]
+ orr r2,r2,r4,lsl#8
+ ldrb r3,[r12,#15]
+ orr r2,r2,r5,lsl#16
+ ldrb r4,[r12,#14]
+ orr r2,r2,r6,lsl#24
+ ldrb r5,[r12,#13]
+ ldrb r6,[r12,#12]
+ orr r3,r3,r4,lsl#8
+ str r0,[r11],#16
+ orr r3,r3,r5,lsl#16
+ str r1,[r11,#-12]
+ orr r3,r3,r6,lsl#24
+ str r2,[r11,#-8]
+ str r3,[r11,#-4]
+#else
+ ldr r0,[r12,#0]
+ ldr r1,[r12,#4]
+ ldr r2,[r12,#8]
+ ldr r3,[r12,#12]
+#ifdef __ARMEL__
+ rev r0,r0
+ rev r1,r1
+ rev r2,r2
+ rev r3,r3
+#endif
+ str r0,[r11],#16
+ str r1,[r11,#-12]
+ str r2,[r11,#-8]
+ str r3,[r11,#-4]
+#endif
+
+ teq lr,#128
+ bne .Lnot128
+ mov r12,#10
+ str r12,[r11,#240-16]
+ add r6,r10,#256 @ rcon
+ mov lr,#255
+
+.L128_loop:
+ and r5,lr,r3,lsr#24
+ and r7,lr,r3,lsr#16
+ ldrb r5,[r10,r5]
+ and r8,lr,r3,lsr#8
+ ldrb r7,[r10,r7]
+ and r9,lr,r3
+ ldrb r8,[r10,r8]
+ orr r5,r5,r7,lsl#24
+ ldrb r9,[r10,r9]
+ orr r5,r5,r8,lsl#16
+ ldr r4,[r6],#4 @ rcon[i++]
+ orr r5,r5,r9,lsl#8
+ eor r5,r5,r4
+ eor r0,r0,r5 @ rk[4]=rk[0]^...
+ eor r1,r1,r0 @ rk[5]=rk[1]^rk[4]
+ str r0,[r11],#16
+ eor r2,r2,r1 @ rk[6]=rk[2]^rk[5]
+ str r1,[r11,#-12]
+ eor r3,r3,r2 @ rk[7]=rk[3]^rk[6]
+ str r2,[r11,#-8]
+ subs r12,r12,#1
+ str r3,[r11,#-4]
+ bne .L128_loop
+ sub r2,r11,#176
+ b .Ldone
+
+.Lnot128:
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+ ldrb r8,[r12,#19]
+ ldrb r4,[r12,#18]
+ ldrb r5,[r12,#17]
+ ldrb r6,[r12,#16]
+ orr r8,r8,r4,lsl#8
+ ldrb r9,[r12,#23]
+ orr r8,r8,r5,lsl#16
+ ldrb r4,[r12,#22]
+ orr r8,r8,r6,lsl#24
+ ldrb r5,[r12,#21]
+ ldrb r6,[r12,#20]
+ orr r9,r9,r4,lsl#8
+ orr r9,r9,r5,lsl#16
+ str r8,[r11],#8
+ orr r9,r9,r6,lsl#24
+ str r9,[r11,#-4]
+#else
+ ldr r8,[r12,#16]
+ ldr r9,[r12,#20]
+#ifdef __ARMEL__
+ rev r8,r8
+ rev r9,r9
+#endif
+ str r8,[r11],#8
+ str r9,[r11,#-4]
+#endif
+
+ teq lr,#192
+ bne .Lnot192
+ mov r12,#12
+ str r12,[r11,#240-24]
+ add r6,r10,#256 @ rcon
+ mov lr,#255
+ mov r12,#8
+
+.L192_loop:
+ and r5,lr,r9,lsr#24
+ and r7,lr,r9,lsr#16
+ ldrb r5,[r10,r5]
+ and r8,lr,r9,lsr#8
+ ldrb r7,[r10,r7]
+ and r9,lr,r9
+ ldrb r8,[r10,r8]
+ orr r5,r5,r7,lsl#24
+ ldrb r9,[r10,r9]
+ orr r5,r5,r8,lsl#16
+ ldr r4,[r6],#4 @ rcon[i++]
+ orr r5,r5,r9,lsl#8
+ eor r9,r5,r4
+ eor r0,r0,r9 @ rk[6]=rk[0]^...
+ eor r1,r1,r0 @ rk[7]=rk[1]^rk[6]
+ str r0,[r11],#24
+ eor r2,r2,r1 @ rk[8]=rk[2]^rk[7]
+ str r1,[r11,#-20]
+ eor r3,r3,r2 @ rk[9]=rk[3]^rk[8]
+ str r2,[r11,#-16]
+ subs r12,r12,#1
+ str r3,[r11,#-12]
+ subeq r2,r11,#216
+ beq .Ldone
+
+ ldr r7,[r11,#-32]
+ ldr r8,[r11,#-28]
+ eor r7,r7,r3 @ rk[10]=rk[4]^rk[9]
+ eor r9,r8,r7 @ rk[11]=rk[5]^rk[10]
+ str r7,[r11,#-8]
+ str r9,[r11,#-4]
+ b .L192_loop
+
+.Lnot192:
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+ ldrb r8,[r12,#27]
+ ldrb r4,[r12,#26]
+ ldrb r5,[r12,#25]
+ ldrb r6,[r12,#24]
+ orr r8,r8,r4,lsl#8
+ ldrb r9,[r12,#31]
+ orr r8,r8,r5,lsl#16
+ ldrb r4,[r12,#30]
+ orr r8,r8,r6,lsl#24
+ ldrb r5,[r12,#29]
+ ldrb r6,[r12,#28]
+ orr r9,r9,r4,lsl#8
+ orr r9,r9,r5,lsl#16
+ str r8,[r11],#8
+ orr r9,r9,r6,lsl#24
+ str r9,[r11,#-4]
+#else
+ ldr r8,[r12,#24]
+ ldr r9,[r12,#28]
+#ifdef __ARMEL__
+ rev r8,r8
+ rev r9,r9
+#endif
+ str r8,[r11],#8
+ str r9,[r11,#-4]
+#endif
+
+ mov r12,#14
+ str r12,[r11,#240-32]
+ add r6,r10,#256 @ rcon
+ mov lr,#255
+ mov r12,#7
+
+.L256_loop:
+ and r5,lr,r9,lsr#24
+ and r7,lr,r9,lsr#16
+ ldrb r5,[r10,r5]
+ and r8,lr,r9,lsr#8
+ ldrb r7,[r10,r7]
+ and r9,lr,r9
+ ldrb r8,[r10,r8]
+ orr r5,r5,r7,lsl#24
+ ldrb r9,[r10,r9]
+ orr r5,r5,r8,lsl#16
+ ldr r4,[r6],#4 @ rcon[i++]
+ orr r5,r5,r9,lsl#8
+ eor r9,r5,r4
+ eor r0,r0,r9 @ rk[8]=rk[0]^...
+ eor r1,r1,r0 @ rk[9]=rk[1]^rk[8]
+ str r0,[r11],#32
+ eor r2,r2,r1 @ rk[10]=rk[2]^rk[9]
+ str r1,[r11,#-28]
+ eor r3,r3,r2 @ rk[11]=rk[3]^rk[10]
+ str r2,[r11,#-24]
+ subs r12,r12,#1
+ str r3,[r11,#-20]
+ subeq r2,r11,#256
+ beq .Ldone
+
+ and r5,lr,r3
+ and r7,lr,r3,lsr#8
+ ldrb r5,[r10,r5]
+ and r8,lr,r3,lsr#16
+ ldrb r7,[r10,r7]
+ and r9,lr,r3,lsr#24
+ ldrb r8,[r10,r8]
+ orr r5,r5,r7,lsl#8
+ ldrb r9,[r10,r9]
+ orr r5,r5,r8,lsl#16
+ ldr r4,[r11,#-48]
+ orr r5,r5,r9,lsl#24
+
+ ldr r7,[r11,#-44]
+ ldr r8,[r11,#-40]
+ eor r4,r4,r5 @ rk[12]=rk[4]^...
+ ldr r9,[r11,#-36]
+ eor r7,r7,r4 @ rk[13]=rk[5]^rk[12]
+ str r4,[r11,#-16]
+ eor r8,r8,r7 @ rk[14]=rk[6]^rk[13]
+ str r7,[r11,#-12]
+ eor r9,r9,r8 @ rk[15]=rk[7]^rk[14]
+ str r8,[r11,#-8]
+ str r9,[r11,#-4]
+ b .L256_loop
+
+.Ldone: mov r0,#0
+ ldmia sp!,{r4-r12,lr}
+.Labrt: tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+.size AES_set_encrypt_key,.-AES_set_encrypt_key
+
+.global AES_set_decrypt_key
+.type AES_set_decrypt_key,%function
+.align 5
+AES_set_decrypt_key:
+ str lr,[sp,#-4]! @ push lr
+ bl _armv4_AES_set_encrypt_key
+ teq r0,#0
+ ldrne lr,[sp],#4 @ pop lr
+ bne .Labrt
+
+ stmdb sp!,{r4-r12}
+
+ ldr r12,[r2,#240] @ AES_set_encrypt_key preserves r2,
+ mov r11,r2 @ which is AES_KEY *key
+ mov r7,r2
+ add r8,r2,r12,lsl#4
+
+.Linv: ldr r0,[r7]
+ ldr r1,[r7,#4]
+ ldr r2,[r7,#8]
+ ldr r3,[r7,#12]
+ ldr r4,[r8]
+ ldr r5,[r8,#4]
+ ldr r6,[r8,#8]
+ ldr r9,[r8,#12]
+ str r0,[r8],#-16
+ str r1,[r8,#16+4]
+ str r2,[r8,#16+8]
+ str r3,[r8,#16+12]
+ str r4,[r7],#16
+ str r5,[r7,#-12]
+ str r6,[r7,#-8]
+ str r9,[r7,#-4]
+ teq r7,r8
+ bne .Linv
+ ldr r0,[r11,#16]! @ prefetch tp1
+ mov r7,#0x80
+ mov r8,#0x1b
+ orr r7,r7,#0x8000
+ orr r8,r8,#0x1b00
+ orr r7,r7,r7,lsl#16
+ orr r8,r8,r8,lsl#16
+ sub r12,r12,#1
+ mvn r9,r7
+ mov r12,r12,lsl#2 @ (rounds-1)*4
+
+.Lmix: and r4,r0,r7
+ and r1,r0,r9
+ sub r4,r4,r4,lsr#7
+ and r4,r4,r8
+ eor r1,r4,r1,lsl#1 @ tp2
+
+ and r4,r1,r7
+ and r2,r1,r9
+ sub r4,r4,r4,lsr#7
+ and r4,r4,r8
+ eor r2,r4,r2,lsl#1 @ tp4
+
+ and r4,r2,r7
+ and r3,r2,r9
+ sub r4,r4,r4,lsr#7
+ and r4,r4,r8
+ eor r3,r4,r3,lsl#1 @ tp8
+
+ eor r4,r1,r2
+ eor r5,r0,r3 @ tp9
+ eor r4,r4,r3 @ tpe
+ eor r4,r4,r1,ror#24
+ eor r4,r4,r5,ror#24 @ ^= ROTATE(tpb=tp9^tp2,8)
+ eor r4,r4,r2,ror#16
+ eor r4,r4,r5,ror#16 @ ^= ROTATE(tpd=tp9^tp4,16)
+ eor r4,r4,r5,ror#8 @ ^= ROTATE(tp9,24)
+
+ ldr r0,[r11,#4] @ prefetch tp1
+ str r4,[r11],#4
+ subs r12,r12,#1
+ bne .Lmix
+
+ mov r0,#0
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r12,pc}
+#else
+ ldmia sp!,{r4-r12,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
+.size AES_set_decrypt_key,.-AES_set_decrypt_key
+
+.type AES_Td,%object
+.align 5
+AES_Td:
+.word 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
+.word 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
+.word 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
+.word 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
+.word 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
+.word 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
+.word 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
+.word 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
+.word 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
+.word 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
+.word 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
+.word 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
+.word 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
+.word 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
+.word 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
+.word 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
+.word 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
+.word 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
+.word 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
+.word 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
+.word 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
+.word 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
+.word 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
+.word 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
+.word 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
+.word 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
+.word 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
+.word 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
+.word 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
+.word 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
+.word 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
+.word 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
+.word 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
+.word 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
+.word 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
+.word 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
+.word 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
+.word 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
+.word 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
+.word 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
+.word 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
+.word 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
+.word 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
+.word 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
+.word 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
+.word 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
+.word 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
+.word 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
+.word 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
+.word 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
+.word 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
+.word 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
+.word 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
+.word 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
+.word 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
+.word 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
+.word 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
+.word 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
+.word 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
+.word 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
+.word 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
+.word 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
+.word 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
+.word 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
+@ Td4[256]
+.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+.size AES_Td,.-AES_Td
+
+@ void AES_decrypt(const unsigned char *in, unsigned char *out,
+@ const AES_KEY *key) {
+.global AES_decrypt
+.type AES_decrypt,%function
+.align 5
+AES_decrypt:
+ sub r3,pc,#8 @ AES_decrypt
+ stmdb sp!,{r1,r4-r12,lr}
+ mov r12,r0 @ inp
+ mov r11,r2
+ sub r10,r3,#AES_decrypt-AES_Td @ Td
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+ ldrb r0,[r12,#3] @ load input data in endian-neutral
+ ldrb r4,[r12,#2] @ manner...
+ ldrb r5,[r12,#1]
+ ldrb r6,[r12,#0]
+ orr r0,r0,r4,lsl#8
+ ldrb r1,[r12,#7]
+ orr r0,r0,r5,lsl#16
+ ldrb r4,[r12,#6]
+ orr r0,r0,r6,lsl#24
+ ldrb r5,[r12,#5]
+ ldrb r6,[r12,#4]
+ orr r1,r1,r4,lsl#8
+ ldrb r2,[r12,#11]
+ orr r1,r1,r5,lsl#16
+ ldrb r4,[r12,#10]
+ orr r1,r1,r6,lsl#24
+ ldrb r5,[r12,#9]
+ ldrb r6,[r12,#8]
+ orr r2,r2,r4,lsl#8
+ ldrb r3,[r12,#15]
+ orr r2,r2,r5,lsl#16
+ ldrb r4,[r12,#14]
+ orr r2,r2,r6,lsl#24
+ ldrb r5,[r12,#13]
+ ldrb r6,[r12,#12]
+ orr r3,r3,r4,lsl#8
+ orr r3,r3,r5,lsl#16
+ orr r3,r3,r6,lsl#24
+#else
+ ldr r0,[r12,#0]
+ ldr r1,[r12,#4]
+ ldr r2,[r12,#8]
+ ldr r3,[r12,#12]
+#ifdef __ARMEL__
+ rev r0,r0
+ rev r1,r1
+ rev r2,r2
+ rev r3,r3
+#endif
+#endif
+ bl _armv4_AES_decrypt
+
+ ldr r12,[sp],#4 @ pop out
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+#ifdef __ARMEL__
+ rev r0,r0
+ rev r1,r1
+ rev r2,r2
+ rev r3,r3
+#endif
+ str r0,[r12,#0]
+ str r1,[r12,#4]
+ str r2,[r12,#8]
+ str r3,[r12,#12]
+#else
+ mov r4,r0,lsr#24 @ write output in endian-neutral
+ mov r5,r0,lsr#16 @ manner...
+ mov r6,r0,lsr#8
+ strb r4,[r12,#0]
+ strb r5,[r12,#1]
+ mov r4,r1,lsr#24
+ strb r6,[r12,#2]
+ mov r5,r1,lsr#16
+ strb r0,[r12,#3]
+ mov r6,r1,lsr#8
+ strb r4,[r12,#4]
+ strb r5,[r12,#5]
+ mov r4,r2,lsr#24
+ strb r6,[r12,#6]
+ mov r5,r2,lsr#16
+ strb r1,[r12,#7]
+ mov r6,r2,lsr#8
+ strb r4,[r12,#8]
+ strb r5,[r12,#9]
+ mov r4,r3,lsr#24
+ strb r6,[r12,#10]
+ mov r5,r3,lsr#16
+ strb r2,[r12,#11]
+ mov r6,r3,lsr#8
+ strb r4,[r12,#12]
+ strb r5,[r12,#13]
+ strb r6,[r12,#14]
+ strb r3,[r12,#15]
+#endif
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r12,pc}
+#else
+ ldmia sp!,{r4-r12,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
+.size AES_decrypt,.-AES_decrypt
+
+.type _armv4_AES_decrypt,%function
+.align 2
+_armv4_AES_decrypt:
+ str lr,[sp,#-4]! @ push lr
+ ldmia r11!,{r4-r7}
+ eor r0,r0,r4
+ ldr r12,[r11,#240-16]
+ eor r1,r1,r5
+ eor r2,r2,r6
+ eor r3,r3,r7
+ sub r12,r12,#1
+ mov lr,#255
+
+ and r7,lr,r0,lsr#16
+ and r8,lr,r0,lsr#8
+ and r9,lr,r0
+ mov r0,r0,lsr#24
+.Ldec_loop:
+ ldr r4,[r10,r7,lsl#2] @ Td1[s0>>16]
+ and r7,lr,r1 @ i0
+ ldr r5,[r10,r8,lsl#2] @ Td2[s0>>8]
+ and r8,lr,r1,lsr#16
+ ldr r6,[r10,r9,lsl#2] @ Td3[s0>>0]
+ and r9,lr,r1,lsr#8
+ ldr r0,[r10,r0,lsl#2] @ Td0[s0>>24]
+ mov r1,r1,lsr#24
+
+ ldr r7,[r10,r7,lsl#2] @ Td3[s1>>0]
+ ldr r8,[r10,r8,lsl#2] @ Td1[s1>>16]
+ ldr r9,[r10,r9,lsl#2] @ Td2[s1>>8]
+ eor r0,r0,r7,ror#24
+ ldr r1,[r10,r1,lsl#2] @ Td0[s1>>24]
+ and r7,lr,r2,lsr#8 @ i0
+ eor r5,r8,r5,ror#8
+ and r8,lr,r2 @ i1
+ eor r6,r9,r6,ror#8
+ and r9,lr,r2,lsr#16
+ ldr r7,[r10,r7,lsl#2] @ Td2[s2>>8]
+ eor r1,r1,r4,ror#8
+ ldr r8,[r10,r8,lsl#2] @ Td3[s2>>0]
+ mov r2,r2,lsr#24
+
+ ldr r9,[r10,r9,lsl#2] @ Td1[s2>>16]
+ eor r0,r0,r7,ror#16
+ ldr r2,[r10,r2,lsl#2] @ Td0[s2>>24]
+ and r7,lr,r3,lsr#16 @ i0
+ eor r1,r1,r8,ror#24
+ and r8,lr,r3,lsr#8 @ i1
+ eor r6,r9,r6,ror#8
+ and r9,lr,r3 @ i2
+ ldr r7,[r10,r7,lsl#2] @ Td1[s3>>16]
+ eor r2,r2,r5,ror#8
+ ldr r8,[r10,r8,lsl#2] @ Td2[s3>>8]
+ mov r3,r3,lsr#24
+
+ ldr r9,[r10,r9,lsl#2] @ Td3[s3>>0]
+ eor r0,r0,r7,ror#8
+ ldr r7,[r11],#16
+ eor r1,r1,r8,ror#16
+ ldr r3,[r10,r3,lsl#2] @ Td0[s3>>24]
+ eor r2,r2,r9,ror#24
+
+ ldr r4,[r11,#-12]
+ eor r0,r0,r7
+ ldr r5,[r11,#-8]
+ eor r3,r3,r6,ror#8
+ ldr r6,[r11,#-4]
+ and r7,lr,r0,lsr#16
+ eor r1,r1,r4
+ and r8,lr,r0,lsr#8
+ eor r2,r2,r5
+ and r9,lr,r0
+ eor r3,r3,r6
+ mov r0,r0,lsr#24
+
+ subs r12,r12,#1
+ bne .Ldec_loop
+
+ add r10,r10,#1024
+
+ ldr r5,[r10,#0] @ prefetch Td4
+ ldr r6,[r10,#32]
+ ldr r4,[r10,#64]
+ ldr r5,[r10,#96]
+ ldr r6,[r10,#128]
+ ldr r4,[r10,#160]
+ ldr r5,[r10,#192]
+ ldr r6,[r10,#224]
+
+ ldrb r0,[r10,r0] @ Td4[s0>>24]
+ ldrb r4,[r10,r7] @ Td4[s0>>16]
+ and r7,lr,r1 @ i0
+ ldrb r5,[r10,r8] @ Td4[s0>>8]
+ and r8,lr,r1,lsr#16
+ ldrb r6,[r10,r9] @ Td4[s0>>0]
+ and r9,lr,r1,lsr#8
+
+ ldrb r7,[r10,r7] @ Td4[s1>>0]
+ ldrb r1,[r10,r1,lsr#24] @ Td4[s1>>24]
+ ldrb r8,[r10,r8] @ Td4[s1>>16]
+ eor r0,r7,r0,lsl#24
+ ldrb r9,[r10,r9] @ Td4[s1>>8]
+ eor r1,r4,r1,lsl#8
+ and r7,lr,r2,lsr#8 @ i0
+ eor r5,r5,r8,lsl#8
+ and r8,lr,r2 @ i1
+ ldrb r7,[r10,r7] @ Td4[s2>>8]
+ eor r6,r6,r9,lsl#8
+ ldrb r8,[r10,r8] @ Td4[s2>>0]
+ and r9,lr,r2,lsr#16
+
+ ldrb r2,[r10,r2,lsr#24] @ Td4[s2>>24]
+ eor r0,r0,r7,lsl#8
+ ldrb r9,[r10,r9] @ Td4[s2>>16]
+ eor r1,r8,r1,lsl#16
+ and r7,lr,r3,lsr#16 @ i0
+ eor r2,r5,r2,lsl#16
+ and r8,lr,r3,lsr#8 @ i1
+ ldrb r7,[r10,r7] @ Td4[s3>>16]
+ eor r6,r6,r9,lsl#16
+ ldrb r8,[r10,r8] @ Td4[s3>>8]
+ and r9,lr,r3 @ i2
+
+ ldrb r9,[r10,r9] @ Td4[s3>>0]
+ ldrb r3,[r10,r3,lsr#24] @ Td4[s3>>24]
+ eor r0,r0,r7,lsl#16
+ ldr r7,[r11,#0]
+ eor r1,r1,r8,lsl#8
+ ldr r4,[r11,#4]
+ eor r2,r9,r2,lsl#8
+ ldr r5,[r11,#8]
+ eor r3,r6,r3,lsl#24
+ ldr r6,[r11,#12]
+
+ eor r0,r0,r7
+ eor r1,r1,r4
+ eor r2,r2,r5
+ eor r3,r3,r6
+
+ sub r10,r10,#1024
+ ldr pc,[sp],#4 @ pop and return
+.size _armv4_AES_decrypt,.-_armv4_AES_decrypt
+.asciz "AES for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
+.align 2
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/ext/libressl/crypto/aes/aes-elf-x86_64.S b/ext/libressl/crypto/aes/aes-elf-x86_64.S
new file mode 100644
index 0000000..83c0053
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes-elf-x86_64.S
@@ -0,0 +1,2547 @@
+#include "x86_arch.h"
+
+.text
+.type _x86_64_AES_encrypt,@function
+.align 16
+_x86_64_AES_encrypt:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+
+ movl 240(%r15),%r13d
+ subl $1,%r13d
+ jmp .Lenc_loop
+.align 16
+.Lenc_loop:
+
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movl 0(%r14,%rsi,8),%r10d
+ movl 0(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r12d
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl %dl,%ebp
+ xorl 3(%r14,%rsi,8),%r10d
+ xorl 3(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r8d
+
+ movzbl %dh,%esi
+ shrl $16,%ecx
+ movzbl %ah,%ebp
+ xorl 3(%r14,%rsi,8),%r12d
+ shrl $16,%edx
+ xorl 3(%r14,%rbp,8),%r8d
+
+ shrl $16,%ebx
+ leaq 16(%r15),%r15
+ shrl $16,%eax
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ xorl 2(%r14,%rsi,8),%r10d
+ xorl 2(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r12d
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl %bl,%ebp
+ xorl 1(%r14,%rsi,8),%r10d
+ xorl 1(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r8d
+
+ movl 12(%r15),%edx
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movl 0(%r15),%eax
+ xorl 1(%r14,%rdi,8),%r12d
+ xorl 1(%r14,%rbp,8),%r8d
+
+ movl 4(%r15),%ebx
+ movl 8(%r15),%ecx
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ subl $1,%r13d
+ jnz .Lenc_loop
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movzbl 2(%r14,%rsi,8),%r10d
+ movzbl 2(%r14,%rdi,8),%r11d
+ movzbl 2(%r14,%rbp,8),%r12d
+
+ movzbl %dl,%esi
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movzbl 2(%r14,%rsi,8),%r8d
+ movl 0(%r14,%rdi,8),%edi
+ movl 0(%r14,%rbp,8),%ebp
+
+ andl $65280,%edi
+ andl $65280,%ebp
+
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+ shrl $16,%ecx
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ shrl $16,%edx
+ movl 0(%r14,%rsi,8),%esi
+ movl 0(%r14,%rdi,8),%edi
+
+ andl $65280,%esi
+ andl $65280,%edi
+ shrl $16,%ebx
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+ shrl $16,%eax
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ movl 0(%r14,%rsi,8),%esi
+ movl 0(%r14,%rdi,8),%edi
+ movl 0(%r14,%rbp,8),%ebp
+
+ andl $16711680,%esi
+ andl $16711680,%edi
+ andl $16711680,%ebp
+
+ xorl %esi,%r10d
+ xorl %edi,%r11d
+ xorl %ebp,%r12d
+
+ movzbl %bl,%esi
+ movzbl %dh,%edi
+ movzbl %ah,%ebp
+ movl 0(%r14,%rsi,8),%esi
+ movl 2(%r14,%rdi,8),%edi
+ movl 2(%r14,%rbp,8),%ebp
+
+ andl $16711680,%esi
+ andl $4278190080,%edi
+ andl $4278190080,%ebp
+
+ xorl %esi,%r8d
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movl 16+12(%r15),%edx
+ movl 2(%r14,%rsi,8),%esi
+ movl 2(%r14,%rdi,8),%edi
+ movl 16+0(%r15),%eax
+
+ andl $4278190080,%esi
+ andl $4278190080,%edi
+
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+
+ movl 16+4(%r15),%ebx
+ movl 16+8(%r15),%ecx
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ retq
+.size _x86_64_AES_encrypt,.-_x86_64_AES_encrypt
+.type _x86_64_AES_encrypt_compact,@function
+.align 16
+_x86_64_AES_encrypt_compact:
+ leaq 128(%r14),%r8
+ movl 0-128(%r8),%edi
+ movl 32-128(%r8),%ebp
+ movl 64-128(%r8),%r10d
+ movl 96-128(%r8),%r11d
+ movl 128-128(%r8),%edi
+ movl 160-128(%r8),%ebp
+ movl 192-128(%r8),%r10d
+ movl 224-128(%r8),%r11d
+ jmp .Lenc_loop_compact
+.align 16
+.Lenc_loop_compact:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ leaq 16(%r15),%r15
+ movzbl %al,%r10d
+ movzbl %bl,%r11d
+ movzbl %cl,%r12d
+ movzbl (%r14,%r10,1),%r10d
+ movzbl (%r14,%r11,1),%r11d
+ movzbl (%r14,%r12,1),%r12d
+
+ movzbl %dl,%r8d
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl (%r14,%r8,1),%r8d
+ movzbl (%r14,%rsi,1),%r9d
+ movzbl (%r14,%rdi,1),%r13d
+
+ movzbl %dh,%ebp
+ movzbl %ah,%esi
+ shrl $16,%ecx
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ shrl $16,%edx
+
+ movzbl %cl,%edi
+ shll $8,%r9d
+ shll $8,%r13d
+ movzbl (%r14,%rdi,1),%edi
+ xorl %r9d,%r10d
+ xorl %r13d,%r11d
+
+ movzbl %dl,%r9d
+ shrl $16,%eax
+ shrl $16,%ebx
+ movzbl %al,%r13d
+ shll $8,%ebp
+ shll $8,%esi
+ movzbl (%r14,%r9,1),%r9d
+ movzbl (%r14,%r13,1),%r13d
+ xorl %ebp,%r12d
+ xorl %esi,%r8d
+
+ movzbl %bl,%ebp
+ movzbl %dh,%esi
+ shll $16,%edi
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ xorl %edi,%r10d
+
+ movzbl %ah,%edi
+ shrl $8,%ecx
+ shrl $8,%ebx
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rcx,1),%edx
+ movzbl (%r14,%rbx,1),%ecx
+ shll $16,%r9d
+ shll $16,%r13d
+ shll $16,%ebp
+ xorl %r9d,%r11d
+ xorl %r13d,%r12d
+ xorl %ebp,%r8d
+
+ shll $24,%esi
+ shll $24,%edi
+ shll $24,%edx
+ xorl %esi,%r10d
+ shll $24,%ecx
+ xorl %edi,%r11d
+ movl %r10d,%eax
+ movl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ cmpq 16(%rsp),%r15
+ je .Lenc_compact_done
+ movl %eax,%esi
+ movl %ebx,%edi
+ andl $2155905152,%esi
+ andl $2155905152,%edi
+ movl %esi,%r10d
+ movl %edi,%r11d
+ shrl $7,%r10d
+ leal (%rax,%rax,1),%r8d
+ shrl $7,%r11d
+ leal (%rbx,%rbx,1),%r9d
+ subl %r10d,%esi
+ subl %r11d,%edi
+ andl $4278124286,%r8d
+ andl $4278124286,%r9d
+ andl $454761243,%esi
+ andl $454761243,%edi
+ movl %eax,%r10d
+ movl %ebx,%r11d
+ xorl %esi,%r8d
+ xorl %edi,%r9d
+
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movl %ecx,%esi
+ movl %edx,%edi
+ roll $24,%eax
+ roll $24,%ebx
+ andl $2155905152,%esi
+ andl $2155905152,%edi
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movl %esi,%r12d
+ movl %edi,%ebp
+ rorl $16,%r10d
+ rorl $16,%r11d
+ shrl $7,%r12d
+ leal (%rcx,%rcx,1),%r8d
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ shrl $7,%ebp
+ leal (%rdx,%rdx,1),%r9d
+ rorl $8,%r10d
+ rorl $8,%r11d
+ subl %r12d,%esi
+ subl %ebp,%edi
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+
+ andl $4278124286,%r8d
+ andl $4278124286,%r9d
+ andl $454761243,%esi
+ andl $454761243,%edi
+ movl %ecx,%r12d
+ movl %edx,%ebp
+ xorl %esi,%r8d
+ xorl %edi,%r9d
+
+ xorl %r8d,%ecx
+ xorl %r9d,%edx
+ roll $24,%ecx
+ roll $24,%edx
+ xorl %r8d,%ecx
+ xorl %r9d,%edx
+ movl 0(%r14),%esi
+ rorl $16,%r12d
+ rorl $16,%ebp
+ movl 64(%r14),%edi
+ xorl %r12d,%ecx
+ xorl %ebp,%edx
+ movl 128(%r14),%r8d
+ rorl $8,%r12d
+ rorl $8,%ebp
+ movl 192(%r14),%r9d
+ xorl %r12d,%ecx
+ xorl %ebp,%edx
+ jmp .Lenc_loop_compact
+.align 16
+.Lenc_compact_done:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ retq
+.size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact
+.globl AES_encrypt
+.type AES_encrypt,@function
+.align 16
+.globl asm_AES_encrypt
+.hidden asm_AES_encrypt
+asm_AES_encrypt:
+AES_encrypt:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+
+ movq %rsp,%r10
+ leaq -63(%rdx),%rcx
+ andq $-64,%rsp
+ subq %rsp,%rcx
+ negq %rcx
+ andq $960,%rcx
+ subq %rcx,%rsp
+ subq $32,%rsp
+
+ movq %rsi,16(%rsp)
+ movq %r10,24(%rsp)
+.Lenc_prologue:
+
+ movq %rdx,%r15
+ movl 240(%r15),%r13d
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+
+ shll $4,%r13d
+ leaq (%r15,%r13,1),%rbp
+ movq %r15,(%rsp)
+ movq %rbp,8(%rsp)
+
+
+ leaq .LAES_Te+2048(%rip),%r14
+ leaq 768(%rsp),%rbp
+ subq %r14,%rbp
+ andq $768,%rbp
+ leaq (%r14,%rbp,1),%r14
+
+ call _x86_64_AES_encrypt_compact
+
+ movq 16(%rsp),%r9
+ movq 24(%rsp),%rsi
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lenc_epilogue:
+ retq
+.size AES_encrypt,.-AES_encrypt
+.type _x86_64_AES_decrypt,@function
+.align 16
+_x86_64_AES_decrypt:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+
+ movl 240(%r15),%r13d
+ subl $1,%r13d
+ jmp .Ldec_loop
+.align 16
+.Ldec_loop:
+
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movl 0(%r14,%rsi,8),%r10d
+ movl 0(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r12d
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl %dl,%ebp
+ xorl 3(%r14,%rsi,8),%r10d
+ xorl 3(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r8d
+
+ movzbl %bh,%esi
+ shrl $16,%eax
+ movzbl %ch,%ebp
+ xorl 3(%r14,%rsi,8),%r12d
+ shrl $16,%edx
+ xorl 3(%r14,%rbp,8),%r8d
+
+ shrl $16,%ebx
+ leaq 16(%r15),%r15
+ shrl $16,%ecx
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ xorl 2(%r14,%rsi,8),%r10d
+ xorl 2(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r12d
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl %bl,%ebp
+ xorl 1(%r14,%rsi,8),%r10d
+ xorl 1(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r8d
+
+ movzbl %dh,%esi
+ movl 12(%r15),%edx
+ movzbl %ah,%ebp
+ xorl 1(%r14,%rsi,8),%r12d
+ movl 0(%r15),%eax
+ xorl 1(%r14,%rbp,8),%r8d
+
+ xorl %r10d,%eax
+ movl 4(%r15),%ebx
+ movl 8(%r15),%ecx
+ xorl %r12d,%ecx
+ xorl %r11d,%ebx
+ xorl %r8d,%edx
+ subl $1,%r13d
+ jnz .Ldec_loop
+ leaq 2048(%r14),%r14
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movzbl (%r14,%rsi,1),%r10d
+ movzbl (%r14,%rdi,1),%r11d
+ movzbl (%r14,%rbp,1),%r12d
+
+ movzbl %dl,%esi
+ movzbl %dh,%edi
+ movzbl %ah,%ebp
+ movzbl (%r14,%rsi,1),%r8d
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+
+ shll $8,%edi
+ shll $8,%ebp
+
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+ shrl $16,%edx
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ shrl $16,%eax
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+
+ shll $8,%esi
+ shll $8,%edi
+ shrl $16,%ebx
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+ shrl $16,%ecx
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+
+ shll $16,%esi
+ shll $16,%edi
+ shll $16,%ebp
+
+ xorl %esi,%r10d
+ xorl %edi,%r11d
+ xorl %ebp,%r12d
+
+ movzbl %bl,%esi
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+
+ shll $16,%esi
+ shll $24,%edi
+ shll $24,%ebp
+
+ xorl %esi,%r8d
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movl 16+12(%r15),%edx
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movl 16+0(%r15),%eax
+
+ shll $24,%esi
+ shll $24,%edi
+
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+
+ movl 16+4(%r15),%ebx
+ movl 16+8(%r15),%ecx
+ leaq -2048(%r14),%r14
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ retq
+.size _x86_64_AES_decrypt,.-_x86_64_AES_decrypt
+.type _x86_64_AES_decrypt_compact,@function
+.align 16
+_x86_64_AES_decrypt_compact:
+ leaq 128(%r14),%r8
+ movl 0-128(%r8),%edi
+ movl 32-128(%r8),%ebp
+ movl 64-128(%r8),%r10d
+ movl 96-128(%r8),%r11d
+ movl 128-128(%r8),%edi
+ movl 160-128(%r8),%ebp
+ movl 192-128(%r8),%r10d
+ movl 224-128(%r8),%r11d
+ jmp .Ldec_loop_compact
+
+.align 16
+.Ldec_loop_compact:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ leaq 16(%r15),%r15
+ movzbl %al,%r10d
+ movzbl %bl,%r11d
+ movzbl %cl,%r12d
+ movzbl (%r14,%r10,1),%r10d
+ movzbl (%r14,%r11,1),%r11d
+ movzbl (%r14,%r12,1),%r12d
+
+ movzbl %dl,%r8d
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl (%r14,%r8,1),%r8d
+ movzbl (%r14,%rsi,1),%r9d
+ movzbl (%r14,%rdi,1),%r13d
+
+ movzbl %bh,%ebp
+ movzbl %ch,%esi
+ shrl $16,%ecx
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ shrl $16,%edx
+
+ movzbl %cl,%edi
+ shll $8,%r9d
+ shll $8,%r13d
+ movzbl (%r14,%rdi,1),%edi
+ xorl %r9d,%r10d
+ xorl %r13d,%r11d
+
+ movzbl %dl,%r9d
+ shrl $16,%eax
+ shrl $16,%ebx
+ movzbl %al,%r13d
+ shll $8,%ebp
+ shll $8,%esi
+ movzbl (%r14,%r9,1),%r9d
+ movzbl (%r14,%r13,1),%r13d
+ xorl %ebp,%r12d
+ xorl %esi,%r8d
+
+ movzbl %bl,%ebp
+ movzbl %bh,%esi
+ shll $16,%edi
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ xorl %edi,%r10d
+
+ movzbl %ch,%edi
+ shll $16,%r9d
+ shll $16,%r13d
+ movzbl (%r14,%rdi,1),%ebx
+ xorl %r9d,%r11d
+ xorl %r13d,%r12d
+
+ movzbl %dh,%edi
+ shrl $8,%eax
+ shll $16,%ebp
+ movzbl (%r14,%rdi,1),%ecx
+ movzbl (%r14,%rax,1),%edx
+ xorl %ebp,%r8d
+
+ shll $24,%esi
+ shll $24,%ebx
+ shll $24,%ecx
+ xorl %esi,%r10d
+ shll $24,%edx
+ xorl %r11d,%ebx
+ movl %r10d,%eax
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ cmpq 16(%rsp),%r15
+ je .Ldec_compact_done
+
+ movq 256+0(%r14),%rsi
+ shlq $32,%rbx
+ shlq $32,%rdx
+ movq 256+8(%r14),%rdi
+ orq %rbx,%rax
+ orq %rdx,%rcx
+ movq 256+16(%r14),%rbp
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+ shrq $7,%r9
+ leaq (%rax,%rax,1),%r8
+ shrq $7,%r12
+ leaq (%rcx,%rcx,1),%r11
+ subq %r9,%rbx
+ subq %r12,%rdx
+ andq %rdi,%r8
+ andq %rdi,%r11
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r8,%rbx
+ xorq %r11,%rdx
+ movq %rbx,%r8
+ movq %rdx,%r11
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ leaq (%r8,%r8,1),%r9
+ shrq $7,%r13
+ leaq (%r11,%r11,1),%r12
+ subq %r10,%rbx
+ subq %r13,%rdx
+ andq %rdi,%r9
+ andq %rdi,%r12
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r9,%rbx
+ xorq %r12,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ xorq %rax,%r8
+ shrq $7,%r13
+ xorq %rcx,%r11
+ subq %r10,%rbx
+ subq %r13,%rdx
+ leaq (%r9,%r9,1),%r10
+ leaq (%r12,%r12,1),%r13
+ xorq %rax,%r9
+ xorq %rcx,%r12
+ andq %rdi,%r10
+ andq %rdi,%r13
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %rbx,%r10
+ xorq %rdx,%r13
+
+ xorq %r10,%rax
+ xorq %r13,%rcx
+ xorq %r10,%r8
+ xorq %r13,%r11
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ xorq %r10,%r9
+ xorq %r13,%r12
+ shrq $32,%rbx
+ shrq $32,%rdx
+ xorq %r8,%r10
+ xorq %r11,%r13
+ roll $8,%eax
+ roll $8,%ecx
+ xorq %r9,%r10
+ xorq %r12,%r13
+
+ roll $8,%ebx
+ roll $8,%edx
+ xorl %r10d,%eax
+ xorl %r13d,%ecx
+ shrq $32,%r10
+ shrq $32,%r13
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+ movq %r8,%r10
+ movq %r11,%r13
+ shrq $32,%r10
+ shrq $32,%r13
+ roll $24,%r8d
+ roll $24,%r11d
+ roll $24,%r10d
+ roll $24,%r13d
+ xorl %r8d,%eax
+ xorl %r11d,%ecx
+ movq %r9,%r8
+ movq %r12,%r11
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+ movq 0(%r14),%rsi
+ shrq $32,%r8
+ shrq $32,%r11
+ movq 64(%r14),%rdi
+ roll $16,%r9d
+ roll $16,%r12d
+ movq 128(%r14),%rbp
+ roll $16,%r8d
+ roll $16,%r11d
+ movq 192(%r14),%r10
+ xorl %r9d,%eax
+ xorl %r12d,%ecx
+ movq 256(%r14),%r13
+ xorl %r8d,%ebx
+ xorl %r11d,%edx
+ jmp .Ldec_loop_compact
+.align 16
+.Ldec_compact_done:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ retq
+.size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact
+.globl AES_decrypt
+.type AES_decrypt,@function
+.align 16
+.globl asm_AES_decrypt
+.hidden asm_AES_decrypt
+asm_AES_decrypt:
+AES_decrypt:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+
+ movq %rsp,%r10
+ leaq -63(%rdx),%rcx
+ andq $-64,%rsp
+ subq %rsp,%rcx
+ negq %rcx
+ andq $960,%rcx
+ subq %rcx,%rsp
+ subq $32,%rsp
+
+ movq %rsi,16(%rsp)
+ movq %r10,24(%rsp)
+.Ldec_prologue:
+
+ movq %rdx,%r15
+ movl 240(%r15),%r13d
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+
+ shll $4,%r13d
+ leaq (%r15,%r13,1),%rbp
+ movq %r15,(%rsp)
+ movq %rbp,8(%rsp)
+
+
+ leaq .LAES_Td+2048(%rip),%r14
+ leaq 768(%rsp),%rbp
+ subq %r14,%rbp
+ andq $768,%rbp
+ leaq (%r14,%rbp,1),%r14
+ shrq $3,%rbp
+ addq %rbp,%r14
+
+ call _x86_64_AES_decrypt_compact
+
+ movq 16(%rsp),%r9
+ movq 24(%rsp),%rsi
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Ldec_epilogue:
+ retq
+.size AES_decrypt,.-AES_decrypt
+.globl AES_set_encrypt_key
+.type AES_set_encrypt_key,@function
+.align 16
+AES_set_encrypt_key:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $8,%rsp
+.Lenc_key_prologue:
+
+ call _x86_64_AES_set_encrypt_key
+
+ movq 8(%rsp),%r15
+ movq 16(%rsp),%r14
+ movq 24(%rsp),%r13
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%rbp
+ movq 48(%rsp),%rbx
+ addq $56,%rsp
+.Lenc_key_epilogue:
+ retq
+.size AES_set_encrypt_key,.-AES_set_encrypt_key
+
+.type _x86_64_AES_set_encrypt_key,@function
+.align 16
+_x86_64_AES_set_encrypt_key:
+ movl %esi,%ecx
+ movq %rdi,%rsi
+ movq %rdx,%rdi
+
+ testq $-1,%rsi
+ jz .Lbadpointer
+ testq $-1,%rdi
+ jz .Lbadpointer
+
+ leaq .LAES_Te(%rip),%rbp
+ leaq 2048+128(%rbp),%rbp
+
+
+ movl 0-128(%rbp),%eax
+ movl 32-128(%rbp),%ebx
+ movl 64-128(%rbp),%r8d
+ movl 96-128(%rbp),%edx
+ movl 128-128(%rbp),%eax
+ movl 160-128(%rbp),%ebx
+ movl 192-128(%rbp),%r8d
+ movl 224-128(%rbp),%edx
+
+ cmpl $128,%ecx
+ je .L10rounds
+ cmpl $192,%ecx
+ je .L12rounds
+ cmpl $256,%ecx
+ je .L14rounds
+ movq $-2,%rax
+ jmp .Lexit
+
+.L10rounds:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rdx,8(%rdi)
+
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp .L10shortcut
+.align 4
+.L10loop:
+ movl 0(%rdi),%eax
+ movl 12(%rdi),%edx
+.L10shortcut:
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,16(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,20(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,24(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,28(%rdi)
+ addl $1,%ecx
+ leaq 16(%rdi),%rdi
+ cmpl $10,%ecx
+ jl .L10loop
+
+ movl $10,80(%rdi)
+ xorq %rax,%rax
+ jmp .Lexit
+
+.L12rounds:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 16(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rdx,16(%rdi)
+
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp .L12shortcut
+.align 4
+.L12loop:
+ movl 0(%rdi),%eax
+ movl 20(%rdi),%edx
+.L12shortcut:
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,24(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,28(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,32(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,36(%rdi)
+
+ cmpl $7,%ecx
+ je .L12break
+ addl $1,%ecx
+
+ xorl 16(%rdi),%eax
+ movl %eax,40(%rdi)
+ xorl 20(%rdi),%eax
+ movl %eax,44(%rdi)
+
+ leaq 24(%rdi),%rdi
+ jmp .L12loop
+.L12break:
+ movl $12,72(%rdi)
+ xorq %rax,%rax
+ jmp .Lexit
+
+.L14rounds:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 16(%rsi),%rcx
+ movq 24(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,16(%rdi)
+ movq %rdx,24(%rdi)
+
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp .L14shortcut
+.align 4
+.L14loop:
+ movl 0(%rdi),%eax
+ movl 28(%rdi),%edx
+.L14shortcut:
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,32(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,36(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,40(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,44(%rdi)
+
+ cmpl $6,%ecx
+ je .L14break
+ addl $1,%ecx
+
+ movl %eax,%edx
+ movl 16(%rdi),%eax
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ shll $8,%ebx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movl %eax,48(%rdi)
+ xorl 20(%rdi),%eax
+ movl %eax,52(%rdi)
+ xorl 24(%rdi),%eax
+ movl %eax,56(%rdi)
+ xorl 28(%rdi),%eax
+ movl %eax,60(%rdi)
+
+ leaq 32(%rdi),%rdi
+ jmp .L14loop
+.L14break:
+ movl $14,48(%rdi)
+ xorq %rax,%rax
+ jmp .Lexit
+
+.Lbadpointer:
+ movq $-1,%rax
+.Lexit:
+ retq
+.size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key
+.globl AES_set_decrypt_key
+.type AES_set_decrypt_key,@function
+.align 16
+AES_set_decrypt_key:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rdx
+.Ldec_key_prologue:
+
+ call _x86_64_AES_set_encrypt_key
+ movq (%rsp),%r8
+ cmpl $0,%eax
+ jne .Labort
+
+ movl 240(%r8),%r14d
+ xorq %rdi,%rdi
+ leaq (%rdi,%r14,4),%rcx
+ movq %r8,%rsi
+ leaq (%r8,%rcx,4),%rdi
+.align 4
+.Linvert:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 0(%rdi),%rcx
+ movq 8(%rdi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,0(%rsi)
+ movq %rdx,8(%rsi)
+ leaq 16(%rsi),%rsi
+ leaq -16(%rdi),%rdi
+ cmpq %rsi,%rdi
+ jne .Linvert
+
+ leaq .LAES_Te+2048+1024(%rip),%rax
+
+ movq 40(%rax),%rsi
+ movq 48(%rax),%rdi
+ movq 56(%rax),%rbp
+
+ movq %r8,%r15
+ subl $1,%r14d
+.align 4
+.Lpermute:
+ leaq 16(%r15),%r15
+ movq 0(%r15),%rax
+ movq 8(%r15),%rcx
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+ shrq $7,%r9
+ leaq (%rax,%rax,1),%r8
+ shrq $7,%r12
+ leaq (%rcx,%rcx,1),%r11
+ subq %r9,%rbx
+ subq %r12,%rdx
+ andq %rdi,%r8
+ andq %rdi,%r11
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r8,%rbx
+ xorq %r11,%rdx
+ movq %rbx,%r8
+ movq %rdx,%r11
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ leaq (%r8,%r8,1),%r9
+ shrq $7,%r13
+ leaq (%r11,%r11,1),%r12
+ subq %r10,%rbx
+ subq %r13,%rdx
+ andq %rdi,%r9
+ andq %rdi,%r12
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r9,%rbx
+ xorq %r12,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ xorq %rax,%r8
+ shrq $7,%r13
+ xorq %rcx,%r11
+ subq %r10,%rbx
+ subq %r13,%rdx
+ leaq (%r9,%r9,1),%r10
+ leaq (%r12,%r12,1),%r13
+ xorq %rax,%r9
+ xorq %rcx,%r12
+ andq %rdi,%r10
+ andq %rdi,%r13
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %rbx,%r10
+ xorq %rdx,%r13
+
+ xorq %r10,%rax
+ xorq %r13,%rcx
+ xorq %r10,%r8
+ xorq %r13,%r11
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ xorq %r10,%r9
+ xorq %r13,%r12
+ shrq $32,%rbx
+ shrq $32,%rdx
+ xorq %r8,%r10
+ xorq %r11,%r13
+ roll $8,%eax
+ roll $8,%ecx
+ xorq %r9,%r10
+ xorq %r12,%r13
+
+ roll $8,%ebx
+ roll $8,%edx
+ xorl %r10d,%eax
+ xorl %r13d,%ecx
+ shrq $32,%r10
+ shrq $32,%r13
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+ movq %r8,%r10
+ movq %r11,%r13
+ shrq $32,%r10
+ shrq $32,%r13
+ roll $24,%r8d
+ roll $24,%r11d
+ roll $24,%r10d
+ roll $24,%r13d
+ xorl %r8d,%eax
+ xorl %r11d,%ecx
+ movq %r9,%r8
+ movq %r12,%r11
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+
+ shrq $32,%r8
+ shrq $32,%r11
+
+ roll $16,%r9d
+ roll $16,%r12d
+
+ roll $16,%r8d
+ roll $16,%r11d
+
+ xorl %r9d,%eax
+ xorl %r12d,%ecx
+
+ xorl %r8d,%ebx
+ xorl %r11d,%edx
+ movl %eax,0(%r15)
+ movl %ebx,4(%r15)
+ movl %ecx,8(%r15)
+ movl %edx,12(%r15)
+ subl $1,%r14d
+ jnz .Lpermute
+
+ xorq %rax,%rax
+.Labort:
+ movq 8(%rsp),%r15
+ movq 16(%rsp),%r14
+ movq 24(%rsp),%r13
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%rbp
+ movq 48(%rsp),%rbx
+ addq $56,%rsp
+.Ldec_key_epilogue:
+ retq
+.size AES_set_decrypt_key,.-AES_set_decrypt_key
+.globl AES_cbc_encrypt
+.type AES_cbc_encrypt,@function
+.align 16
+
+.hidden OPENSSL_ia32cap_P
+.globl asm_AES_cbc_encrypt
+.hidden asm_AES_cbc_encrypt
+asm_AES_cbc_encrypt:
+AES_cbc_encrypt:
+ cmpq $0,%rdx
+ je .Lcbc_epilogue
+ pushfq
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+.Lcbc_prologue:
+
+ cld
+ movl %r9d,%r9d
+
+ leaq .LAES_Te(%rip),%r14
+ cmpq $0,%r9
+ jne .Lcbc_picked_te
+ leaq .LAES_Td(%rip),%r14
+.Lcbc_picked_te:
+
+ movl OPENSSL_ia32cap_P(%rip),%r10d
+ cmpq $512,%rdx
+ jb .Lcbc_slow_prologue
+ testq $15,%rdx
+ jnz .Lcbc_slow_prologue
+ btl $IA32CAP_BIT0_HT,%r10d
+ jc .Lcbc_slow_prologue
+
+
+ leaq -88-248(%rsp),%r15
+ andq $-64,%r15
+
+
+ movq %r14,%r10
+ leaq 2304(%r14),%r11
+ movq %r15,%r12
+ andq $4095,%r10
+ andq $4095,%r11
+ andq $4095,%r12
+
+ cmpq %r11,%r12
+ jb .Lcbc_te_break_out
+ subq %r11,%r12
+ subq %r12,%r15
+ jmp .Lcbc_te_ok
+.Lcbc_te_break_out:
+ subq %r10,%r12
+ andq $4095,%r12
+ addq $320,%r12
+ subq %r12,%r15
+.align 4
+.Lcbc_te_ok:
+
+ xchgq %rsp,%r15
+
+ movq %r15,16(%rsp)
+.Lcbc_fast_body:
+ movq %rdi,24(%rsp)
+ movq %rsi,32(%rsp)
+ movq %rdx,40(%rsp)
+ movq %rcx,48(%rsp)
+ movq %r8,56(%rsp)
+ movl $0,80+240(%rsp)
+ movq %r8,%rbp
+ movq %r9,%rbx
+ movq %rsi,%r9
+ movq %rdi,%r8
+ movq %rcx,%r15
+
+ movl 240(%r15),%eax
+
+ movq %r15,%r10
+ subq %r14,%r10
+ andq $4095,%r10
+ cmpq $2304,%r10
+ jb .Lcbc_do_ecopy
+ cmpq $4096-248,%r10
+ jb .Lcbc_skip_ecopy
+.align 4
+.Lcbc_do_ecopy:
+ movq %r15,%rsi
+ leaq 80(%rsp),%rdi
+ leaq 80(%rsp),%r15
+ movl $30,%ecx
+.long 0x90A548F3
+ movl %eax,(%rdi)
+.Lcbc_skip_ecopy:
+ movq %r15,0(%rsp)
+
+ movl $18,%ecx
+.align 4
+.Lcbc_prefetch_te:
+ movq 0(%r14),%r10
+ movq 32(%r14),%r11
+ movq 64(%r14),%r12
+ movq 96(%r14),%r13
+ leaq 128(%r14),%r14
+ subl $1,%ecx
+ jnz .Lcbc_prefetch_te
+ leaq -2304(%r14),%r14
+
+ cmpq $0,%rbx
+ je .LFAST_DECRYPT
+
+
+ movl 0(%rbp),%eax
+ movl 4(%rbp),%ebx
+ movl 8(%rbp),%ecx
+ movl 12(%rbp),%edx
+
+.align 4
+.Lcbc_fast_enc_loop:
+ xorl 0(%r8),%eax
+ xorl 4(%r8),%ebx
+ xorl 8(%r8),%ecx
+ xorl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+
+ call _x86_64_AES_encrypt
+
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ subq $16,%r10
+ testq $-16,%r10
+ movq %r10,40(%rsp)
+ jnz .Lcbc_fast_enc_loop
+ movq 56(%rsp),%rbp
+ movl %eax,0(%rbp)
+ movl %ebx,4(%rbp)
+ movl %ecx,8(%rbp)
+ movl %edx,12(%rbp)
+
+ jmp .Lcbc_fast_cleanup
+
+
+.align 16
+.LFAST_DECRYPT:
+ cmpq %r8,%r9
+ je .Lcbc_fast_dec_in_place
+
+ movq %rbp,64(%rsp)
+.align 4
+.Lcbc_fast_dec_loop:
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+
+ call _x86_64_AES_decrypt
+
+ movq 64(%rsp),%rbp
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ xorl 0(%rbp),%eax
+ xorl 4(%rbp),%ebx
+ xorl 8(%rbp),%ecx
+ xorl 12(%rbp),%edx
+ movq %r8,%rbp
+
+ subq $16,%r10
+ movq %r10,40(%rsp)
+ movq %rbp,64(%rsp)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ jnz .Lcbc_fast_dec_loop
+ movq 56(%rsp),%r12
+ movq 0(%rbp),%r10
+ movq 8(%rbp),%r11
+ movq %r10,0(%r12)
+ movq %r11,8(%r12)
+ jmp .Lcbc_fast_cleanup
+
+.align 16
+.Lcbc_fast_dec_in_place:
+ movq 0(%rbp),%r10
+ movq 8(%rbp),%r11
+ movq %r10,0+64(%rsp)
+ movq %r11,8+64(%rsp)
+.align 4
+.Lcbc_fast_dec_in_place_loop:
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+
+ call _x86_64_AES_decrypt
+
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ xorl 0+64(%rsp),%eax
+ xorl 4+64(%rsp),%ebx
+ xorl 8+64(%rsp),%ecx
+ xorl 12+64(%rsp),%edx
+
+ movq 0(%r8),%r11
+ movq 8(%r8),%r12
+ subq $16,%r10
+ jz .Lcbc_fast_dec_in_place_done
+
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ movq %r10,40(%rsp)
+ jmp .Lcbc_fast_dec_in_place_loop
+.Lcbc_fast_dec_in_place_done:
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+.align 4
+.Lcbc_fast_cleanup:
+ cmpl $0,80+240(%rsp)
+ leaq 80(%rsp),%rdi
+ je .Lcbc_exit
+ movl $30,%ecx
+ xorq %rax,%rax
+.long 0x90AB48F3
+
+ jmp .Lcbc_exit
+
+
+.align 16
+.Lcbc_slow_prologue:
+
+ leaq -88(%rsp),%rbp
+ andq $-64,%rbp
+
+ leaq -88-63(%rcx),%r10
+ subq %rbp,%r10
+ negq %r10
+ andq $960,%r10
+ subq %r10,%rbp
+
+ xchgq %rsp,%rbp
+
+ movq %rbp,16(%rsp)
+.Lcbc_slow_body:
+
+
+
+
+ movq %r8,56(%rsp)
+ movq %r8,%rbp
+ movq %r9,%rbx
+ movq %rsi,%r9
+ movq %rdi,%r8
+ movq %rcx,%r15
+ movq %rdx,%r10
+
+ movl 240(%r15),%eax
+ movq %r15,0(%rsp)
+ shll $4,%eax
+ leaq (%r15,%rax,1),%rax
+ movq %rax,8(%rsp)
+
+
+ leaq 2048(%r14),%r14
+ leaq 768-8(%rsp),%rax
+ subq %r14,%rax
+ andq $768,%rax
+ leaq (%r14,%rax,1),%r14
+
+ cmpq $0,%rbx
+ je .LSLOW_DECRYPT
+
+
+ testq $-16,%r10
+ movl 0(%rbp),%eax
+ movl 4(%rbp),%ebx
+ movl 8(%rbp),%ecx
+ movl 12(%rbp),%edx
+ jz .Lcbc_slow_enc_tail
+
+.align 4
+.Lcbc_slow_enc_loop:
+ xorl 0(%r8),%eax
+ xorl 4(%r8),%ebx
+ xorl 8(%r8),%ecx
+ xorl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ movq %r9,32(%rsp)
+ movq %r10,40(%rsp)
+
+ call _x86_64_AES_encrypt_compact
+
+ movq 24(%rsp),%r8
+ movq 32(%rsp),%r9
+ movq 40(%rsp),%r10
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ subq $16,%r10
+ testq $-16,%r10
+ jnz .Lcbc_slow_enc_loop
+ testq $15,%r10
+ jnz .Lcbc_slow_enc_tail
+ movq 56(%rsp),%rbp
+ movl %eax,0(%rbp)
+ movl %ebx,4(%rbp)
+ movl %ecx,8(%rbp)
+ movl %edx,12(%rbp)
+
+ jmp .Lcbc_exit
+
+.align 4
+.Lcbc_slow_enc_tail:
+ movq %rax,%r11
+ movq %rcx,%r12
+ movq %r10,%rcx
+ movq %r8,%rsi
+ movq %r9,%rdi
+.long 0x9066A4F3
+ movq $16,%rcx
+ subq %r10,%rcx
+ xorq %rax,%rax
+.long 0x9066AAF3
+ movq %r9,%r8
+ movq $16,%r10
+ movq %r11,%rax
+ movq %r12,%rcx
+ jmp .Lcbc_slow_enc_loop
+
+.align 16
+.LSLOW_DECRYPT:
+ shrq $3,%rax
+ addq %rax,%r14
+
+ movq 0(%rbp),%r11
+ movq 8(%rbp),%r12
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+
+.align 4
+.Lcbc_slow_dec_loop:
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ movq %r9,32(%rsp)
+ movq %r10,40(%rsp)
+
+ call _x86_64_AES_decrypt_compact
+
+ movq 24(%rsp),%r8
+ movq 32(%rsp),%r9
+ movq 40(%rsp),%r10
+ xorl 0+64(%rsp),%eax
+ xorl 4+64(%rsp),%ebx
+ xorl 8+64(%rsp),%ecx
+ xorl 12+64(%rsp),%edx
+
+ movq 0(%r8),%r11
+ movq 8(%r8),%r12
+ subq $16,%r10
+ jc .Lcbc_slow_dec_partial
+ jz .Lcbc_slow_dec_done
+
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ jmp .Lcbc_slow_dec_loop
+.Lcbc_slow_dec_done:
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ jmp .Lcbc_exit
+
+.align 4
+.Lcbc_slow_dec_partial:
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+
+ movl %eax,0+64(%rsp)
+ movl %ebx,4+64(%rsp)
+ movl %ecx,8+64(%rsp)
+ movl %edx,12+64(%rsp)
+
+ movq %r9,%rdi
+ leaq 64(%rsp),%rsi
+ leaq 16(%r10),%rcx
+.long 0x9066A4F3
+ jmp .Lcbc_exit
+
+.align 16
+.Lcbc_exit:
+ movq 16(%rsp),%rsi
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lcbc_popfq:
+ popfq
+.Lcbc_epilogue:
+ retq
+.size AES_cbc_encrypt,.-AES_cbc_encrypt
+.align 64
+.LAES_Te:
+.long 0xa56363c6,0xa56363c6
+.long 0x847c7cf8,0x847c7cf8
+.long 0x997777ee,0x997777ee
+.long 0x8d7b7bf6,0x8d7b7bf6
+.long 0x0df2f2ff,0x0df2f2ff
+.long 0xbd6b6bd6,0xbd6b6bd6
+.long 0xb16f6fde,0xb16f6fde
+.long 0x54c5c591,0x54c5c591
+.long 0x50303060,0x50303060
+.long 0x03010102,0x03010102
+.long 0xa96767ce,0xa96767ce
+.long 0x7d2b2b56,0x7d2b2b56
+.long 0x19fefee7,0x19fefee7
+.long 0x62d7d7b5,0x62d7d7b5
+.long 0xe6abab4d,0xe6abab4d
+.long 0x9a7676ec,0x9a7676ec
+.long 0x45caca8f,0x45caca8f
+.long 0x9d82821f,0x9d82821f
+.long 0x40c9c989,0x40c9c989
+.long 0x877d7dfa,0x877d7dfa
+.long 0x15fafaef,0x15fafaef
+.long 0xeb5959b2,0xeb5959b2
+.long 0xc947478e,0xc947478e
+.long 0x0bf0f0fb,0x0bf0f0fb
+.long 0xecadad41,0xecadad41
+.long 0x67d4d4b3,0x67d4d4b3
+.long 0xfda2a25f,0xfda2a25f
+.long 0xeaafaf45,0xeaafaf45
+.long 0xbf9c9c23,0xbf9c9c23
+.long 0xf7a4a453,0xf7a4a453
+.long 0x967272e4,0x967272e4
+.long 0x5bc0c09b,0x5bc0c09b
+.long 0xc2b7b775,0xc2b7b775
+.long 0x1cfdfde1,0x1cfdfde1
+.long 0xae93933d,0xae93933d
+.long 0x6a26264c,0x6a26264c
+.long 0x5a36366c,0x5a36366c
+.long 0x413f3f7e,0x413f3f7e
+.long 0x02f7f7f5,0x02f7f7f5
+.long 0x4fcccc83,0x4fcccc83
+.long 0x5c343468,0x5c343468
+.long 0xf4a5a551,0xf4a5a551
+.long 0x34e5e5d1,0x34e5e5d1
+.long 0x08f1f1f9,0x08f1f1f9
+.long 0x937171e2,0x937171e2
+.long 0x73d8d8ab,0x73d8d8ab
+.long 0x53313162,0x53313162
+.long 0x3f15152a,0x3f15152a
+.long 0x0c040408,0x0c040408
+.long 0x52c7c795,0x52c7c795
+.long 0x65232346,0x65232346
+.long 0x5ec3c39d,0x5ec3c39d
+.long 0x28181830,0x28181830
+.long 0xa1969637,0xa1969637
+.long 0x0f05050a,0x0f05050a
+.long 0xb59a9a2f,0xb59a9a2f
+.long 0x0907070e,0x0907070e
+.long 0x36121224,0x36121224
+.long 0x9b80801b,0x9b80801b
+.long 0x3de2e2df,0x3de2e2df
+.long 0x26ebebcd,0x26ebebcd
+.long 0x6927274e,0x6927274e
+.long 0xcdb2b27f,0xcdb2b27f
+.long 0x9f7575ea,0x9f7575ea
+.long 0x1b090912,0x1b090912
+.long 0x9e83831d,0x9e83831d
+.long 0x742c2c58,0x742c2c58
+.long 0x2e1a1a34,0x2e1a1a34
+.long 0x2d1b1b36,0x2d1b1b36
+.long 0xb26e6edc,0xb26e6edc
+.long 0xee5a5ab4,0xee5a5ab4
+.long 0xfba0a05b,0xfba0a05b
+.long 0xf65252a4,0xf65252a4
+.long 0x4d3b3b76,0x4d3b3b76
+.long 0x61d6d6b7,0x61d6d6b7
+.long 0xceb3b37d,0xceb3b37d
+.long 0x7b292952,0x7b292952
+.long 0x3ee3e3dd,0x3ee3e3dd
+.long 0x712f2f5e,0x712f2f5e
+.long 0x97848413,0x97848413
+.long 0xf55353a6,0xf55353a6
+.long 0x68d1d1b9,0x68d1d1b9
+.long 0x00000000,0x00000000
+.long 0x2cededc1,0x2cededc1
+.long 0x60202040,0x60202040
+.long 0x1ffcfce3,0x1ffcfce3
+.long 0xc8b1b179,0xc8b1b179
+.long 0xed5b5bb6,0xed5b5bb6
+.long 0xbe6a6ad4,0xbe6a6ad4
+.long 0x46cbcb8d,0x46cbcb8d
+.long 0xd9bebe67,0xd9bebe67
+.long 0x4b393972,0x4b393972
+.long 0xde4a4a94,0xde4a4a94
+.long 0xd44c4c98,0xd44c4c98
+.long 0xe85858b0,0xe85858b0
+.long 0x4acfcf85,0x4acfcf85
+.long 0x6bd0d0bb,0x6bd0d0bb
+.long 0x2aefefc5,0x2aefefc5
+.long 0xe5aaaa4f,0xe5aaaa4f
+.long 0x16fbfbed,0x16fbfbed
+.long 0xc5434386,0xc5434386
+.long 0xd74d4d9a,0xd74d4d9a
+.long 0x55333366,0x55333366
+.long 0x94858511,0x94858511
+.long 0xcf45458a,0xcf45458a
+.long 0x10f9f9e9,0x10f9f9e9
+.long 0x06020204,0x06020204
+.long 0x817f7ffe,0x817f7ffe
+.long 0xf05050a0,0xf05050a0
+.long 0x443c3c78,0x443c3c78
+.long 0xba9f9f25,0xba9f9f25
+.long 0xe3a8a84b,0xe3a8a84b
+.long 0xf35151a2,0xf35151a2
+.long 0xfea3a35d,0xfea3a35d
+.long 0xc0404080,0xc0404080
+.long 0x8a8f8f05,0x8a8f8f05
+.long 0xad92923f,0xad92923f
+.long 0xbc9d9d21,0xbc9d9d21
+.long 0x48383870,0x48383870
+.long 0x04f5f5f1,0x04f5f5f1
+.long 0xdfbcbc63,0xdfbcbc63
+.long 0xc1b6b677,0xc1b6b677
+.long 0x75dadaaf,0x75dadaaf
+.long 0x63212142,0x63212142
+.long 0x30101020,0x30101020
+.long 0x1affffe5,0x1affffe5
+.long 0x0ef3f3fd,0x0ef3f3fd
+.long 0x6dd2d2bf,0x6dd2d2bf
+.long 0x4ccdcd81,0x4ccdcd81
+.long 0x140c0c18,0x140c0c18
+.long 0x35131326,0x35131326
+.long 0x2fececc3,0x2fececc3
+.long 0xe15f5fbe,0xe15f5fbe
+.long 0xa2979735,0xa2979735
+.long 0xcc444488,0xcc444488
+.long 0x3917172e,0x3917172e
+.long 0x57c4c493,0x57c4c493
+.long 0xf2a7a755,0xf2a7a755
+.long 0x827e7efc,0x827e7efc
+.long 0x473d3d7a,0x473d3d7a
+.long 0xac6464c8,0xac6464c8
+.long 0xe75d5dba,0xe75d5dba
+.long 0x2b191932,0x2b191932
+.long 0x957373e6,0x957373e6
+.long 0xa06060c0,0xa06060c0
+.long 0x98818119,0x98818119
+.long 0xd14f4f9e,0xd14f4f9e
+.long 0x7fdcdca3,0x7fdcdca3
+.long 0x66222244,0x66222244
+.long 0x7e2a2a54,0x7e2a2a54
+.long 0xab90903b,0xab90903b
+.long 0x8388880b,0x8388880b
+.long 0xca46468c,0xca46468c
+.long 0x29eeeec7,0x29eeeec7
+.long 0xd3b8b86b,0xd3b8b86b
+.long 0x3c141428,0x3c141428
+.long 0x79dedea7,0x79dedea7
+.long 0xe25e5ebc,0xe25e5ebc
+.long 0x1d0b0b16,0x1d0b0b16
+.long 0x76dbdbad,0x76dbdbad
+.long 0x3be0e0db,0x3be0e0db
+.long 0x56323264,0x56323264
+.long 0x4e3a3a74,0x4e3a3a74
+.long 0x1e0a0a14,0x1e0a0a14
+.long 0xdb494992,0xdb494992
+.long 0x0a06060c,0x0a06060c
+.long 0x6c242448,0x6c242448
+.long 0xe45c5cb8,0xe45c5cb8
+.long 0x5dc2c29f,0x5dc2c29f
+.long 0x6ed3d3bd,0x6ed3d3bd
+.long 0xefacac43,0xefacac43
+.long 0xa66262c4,0xa66262c4
+.long 0xa8919139,0xa8919139
+.long 0xa4959531,0xa4959531
+.long 0x37e4e4d3,0x37e4e4d3
+.long 0x8b7979f2,0x8b7979f2
+.long 0x32e7e7d5,0x32e7e7d5
+.long 0x43c8c88b,0x43c8c88b
+.long 0x5937376e,0x5937376e
+.long 0xb76d6dda,0xb76d6dda
+.long 0x8c8d8d01,0x8c8d8d01
+.long 0x64d5d5b1,0x64d5d5b1
+.long 0xd24e4e9c,0xd24e4e9c
+.long 0xe0a9a949,0xe0a9a949
+.long 0xb46c6cd8,0xb46c6cd8
+.long 0xfa5656ac,0xfa5656ac
+.long 0x07f4f4f3,0x07f4f4f3
+.long 0x25eaeacf,0x25eaeacf
+.long 0xaf6565ca,0xaf6565ca
+.long 0x8e7a7af4,0x8e7a7af4
+.long 0xe9aeae47,0xe9aeae47
+.long 0x18080810,0x18080810
+.long 0xd5baba6f,0xd5baba6f
+.long 0x887878f0,0x887878f0
+.long 0x6f25254a,0x6f25254a
+.long 0x722e2e5c,0x722e2e5c
+.long 0x241c1c38,0x241c1c38
+.long 0xf1a6a657,0xf1a6a657
+.long 0xc7b4b473,0xc7b4b473
+.long 0x51c6c697,0x51c6c697
+.long 0x23e8e8cb,0x23e8e8cb
+.long 0x7cdddda1,0x7cdddda1
+.long 0x9c7474e8,0x9c7474e8
+.long 0x211f1f3e,0x211f1f3e
+.long 0xdd4b4b96,0xdd4b4b96
+.long 0xdcbdbd61,0xdcbdbd61
+.long 0x868b8b0d,0x868b8b0d
+.long 0x858a8a0f,0x858a8a0f
+.long 0x907070e0,0x907070e0
+.long 0x423e3e7c,0x423e3e7c
+.long 0xc4b5b571,0xc4b5b571
+.long 0xaa6666cc,0xaa6666cc
+.long 0xd8484890,0xd8484890
+.long 0x05030306,0x05030306
+.long 0x01f6f6f7,0x01f6f6f7
+.long 0x120e0e1c,0x120e0e1c
+.long 0xa36161c2,0xa36161c2
+.long 0x5f35356a,0x5f35356a
+.long 0xf95757ae,0xf95757ae
+.long 0xd0b9b969,0xd0b9b969
+.long 0x91868617,0x91868617
+.long 0x58c1c199,0x58c1c199
+.long 0x271d1d3a,0x271d1d3a
+.long 0xb99e9e27,0xb99e9e27
+.long 0x38e1e1d9,0x38e1e1d9
+.long 0x13f8f8eb,0x13f8f8eb
+.long 0xb398982b,0xb398982b
+.long 0x33111122,0x33111122
+.long 0xbb6969d2,0xbb6969d2
+.long 0x70d9d9a9,0x70d9d9a9
+.long 0x898e8e07,0x898e8e07
+.long 0xa7949433,0xa7949433
+.long 0xb69b9b2d,0xb69b9b2d
+.long 0x221e1e3c,0x221e1e3c
+.long 0x92878715,0x92878715
+.long 0x20e9e9c9,0x20e9e9c9
+.long 0x49cece87,0x49cece87
+.long 0xff5555aa,0xff5555aa
+.long 0x78282850,0x78282850
+.long 0x7adfdfa5,0x7adfdfa5
+.long 0x8f8c8c03,0x8f8c8c03
+.long 0xf8a1a159,0xf8a1a159
+.long 0x80898909,0x80898909
+.long 0x170d0d1a,0x170d0d1a
+.long 0xdabfbf65,0xdabfbf65
+.long 0x31e6e6d7,0x31e6e6d7
+.long 0xc6424284,0xc6424284
+.long 0xb86868d0,0xb86868d0
+.long 0xc3414182,0xc3414182
+.long 0xb0999929,0xb0999929
+.long 0x772d2d5a,0x772d2d5a
+.long 0x110f0f1e,0x110f0f1e
+.long 0xcbb0b07b,0xcbb0b07b
+.long 0xfc5454a8,0xfc5454a8
+.long 0xd6bbbb6d,0xd6bbbb6d
+.long 0x3a16162c,0x3a16162c
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.long 0x00000001, 0x00000002, 0x00000004, 0x00000008
+.long 0x00000010, 0x00000020, 0x00000040, 0x00000080
+.long 0x0000001b, 0x00000036, 0x80808080, 0x80808080
+.long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b
+.align 64
+.LAES_Td:
+.long 0x50a7f451,0x50a7f451
+.long 0x5365417e,0x5365417e
+.long 0xc3a4171a,0xc3a4171a
+.long 0x965e273a,0x965e273a
+.long 0xcb6bab3b,0xcb6bab3b
+.long 0xf1459d1f,0xf1459d1f
+.long 0xab58faac,0xab58faac
+.long 0x9303e34b,0x9303e34b
+.long 0x55fa3020,0x55fa3020
+.long 0xf66d76ad,0xf66d76ad
+.long 0x9176cc88,0x9176cc88
+.long 0x254c02f5,0x254c02f5
+.long 0xfcd7e54f,0xfcd7e54f
+.long 0xd7cb2ac5,0xd7cb2ac5
+.long 0x80443526,0x80443526
+.long 0x8fa362b5,0x8fa362b5
+.long 0x495ab1de,0x495ab1de
+.long 0x671bba25,0x671bba25
+.long 0x980eea45,0x980eea45
+.long 0xe1c0fe5d,0xe1c0fe5d
+.long 0x02752fc3,0x02752fc3
+.long 0x12f04c81,0x12f04c81
+.long 0xa397468d,0xa397468d
+.long 0xc6f9d36b,0xc6f9d36b
+.long 0xe75f8f03,0xe75f8f03
+.long 0x959c9215,0x959c9215
+.long 0xeb7a6dbf,0xeb7a6dbf
+.long 0xda595295,0xda595295
+.long 0x2d83bed4,0x2d83bed4
+.long 0xd3217458,0xd3217458
+.long 0x2969e049,0x2969e049
+.long 0x44c8c98e,0x44c8c98e
+.long 0x6a89c275,0x6a89c275
+.long 0x78798ef4,0x78798ef4
+.long 0x6b3e5899,0x6b3e5899
+.long 0xdd71b927,0xdd71b927
+.long 0xb64fe1be,0xb64fe1be
+.long 0x17ad88f0,0x17ad88f0
+.long 0x66ac20c9,0x66ac20c9
+.long 0xb43ace7d,0xb43ace7d
+.long 0x184adf63,0x184adf63
+.long 0x82311ae5,0x82311ae5
+.long 0x60335197,0x60335197
+.long 0x457f5362,0x457f5362
+.long 0xe07764b1,0xe07764b1
+.long 0x84ae6bbb,0x84ae6bbb
+.long 0x1ca081fe,0x1ca081fe
+.long 0x942b08f9,0x942b08f9
+.long 0x58684870,0x58684870
+.long 0x19fd458f,0x19fd458f
+.long 0x876cde94,0x876cde94
+.long 0xb7f87b52,0xb7f87b52
+.long 0x23d373ab,0x23d373ab
+.long 0xe2024b72,0xe2024b72
+.long 0x578f1fe3,0x578f1fe3
+.long 0x2aab5566,0x2aab5566
+.long 0x0728ebb2,0x0728ebb2
+.long 0x03c2b52f,0x03c2b52f
+.long 0x9a7bc586,0x9a7bc586
+.long 0xa50837d3,0xa50837d3
+.long 0xf2872830,0xf2872830
+.long 0xb2a5bf23,0xb2a5bf23
+.long 0xba6a0302,0xba6a0302
+.long 0x5c8216ed,0x5c8216ed
+.long 0x2b1ccf8a,0x2b1ccf8a
+.long 0x92b479a7,0x92b479a7
+.long 0xf0f207f3,0xf0f207f3
+.long 0xa1e2694e,0xa1e2694e
+.long 0xcdf4da65,0xcdf4da65
+.long 0xd5be0506,0xd5be0506
+.long 0x1f6234d1,0x1f6234d1
+.long 0x8afea6c4,0x8afea6c4
+.long 0x9d532e34,0x9d532e34
+.long 0xa055f3a2,0xa055f3a2
+.long 0x32e18a05,0x32e18a05
+.long 0x75ebf6a4,0x75ebf6a4
+.long 0x39ec830b,0x39ec830b
+.long 0xaaef6040,0xaaef6040
+.long 0x069f715e,0x069f715e
+.long 0x51106ebd,0x51106ebd
+.long 0xf98a213e,0xf98a213e
+.long 0x3d06dd96,0x3d06dd96
+.long 0xae053edd,0xae053edd
+.long 0x46bde64d,0x46bde64d
+.long 0xb58d5491,0xb58d5491
+.long 0x055dc471,0x055dc471
+.long 0x6fd40604,0x6fd40604
+.long 0xff155060,0xff155060
+.long 0x24fb9819,0x24fb9819
+.long 0x97e9bdd6,0x97e9bdd6
+.long 0xcc434089,0xcc434089
+.long 0x779ed967,0x779ed967
+.long 0xbd42e8b0,0xbd42e8b0
+.long 0x888b8907,0x888b8907
+.long 0x385b19e7,0x385b19e7
+.long 0xdbeec879,0xdbeec879
+.long 0x470a7ca1,0x470a7ca1
+.long 0xe90f427c,0xe90f427c
+.long 0xc91e84f8,0xc91e84f8
+.long 0x00000000,0x00000000
+.long 0x83868009,0x83868009
+.long 0x48ed2b32,0x48ed2b32
+.long 0xac70111e,0xac70111e
+.long 0x4e725a6c,0x4e725a6c
+.long 0xfbff0efd,0xfbff0efd
+.long 0x5638850f,0x5638850f
+.long 0x1ed5ae3d,0x1ed5ae3d
+.long 0x27392d36,0x27392d36
+.long 0x64d90f0a,0x64d90f0a
+.long 0x21a65c68,0x21a65c68
+.long 0xd1545b9b,0xd1545b9b
+.long 0x3a2e3624,0x3a2e3624
+.long 0xb1670a0c,0xb1670a0c
+.long 0x0fe75793,0x0fe75793
+.long 0xd296eeb4,0xd296eeb4
+.long 0x9e919b1b,0x9e919b1b
+.long 0x4fc5c080,0x4fc5c080
+.long 0xa220dc61,0xa220dc61
+.long 0x694b775a,0x694b775a
+.long 0x161a121c,0x161a121c
+.long 0x0aba93e2,0x0aba93e2
+.long 0xe52aa0c0,0xe52aa0c0
+.long 0x43e0223c,0x43e0223c
+.long 0x1d171b12,0x1d171b12
+.long 0x0b0d090e,0x0b0d090e
+.long 0xadc78bf2,0xadc78bf2
+.long 0xb9a8b62d,0xb9a8b62d
+.long 0xc8a91e14,0xc8a91e14
+.long 0x8519f157,0x8519f157
+.long 0x4c0775af,0x4c0775af
+.long 0xbbdd99ee,0xbbdd99ee
+.long 0xfd607fa3,0xfd607fa3
+.long 0x9f2601f7,0x9f2601f7
+.long 0xbcf5725c,0xbcf5725c
+.long 0xc53b6644,0xc53b6644
+.long 0x347efb5b,0x347efb5b
+.long 0x7629438b,0x7629438b
+.long 0xdcc623cb,0xdcc623cb
+.long 0x68fcedb6,0x68fcedb6
+.long 0x63f1e4b8,0x63f1e4b8
+.long 0xcadc31d7,0xcadc31d7
+.long 0x10856342,0x10856342
+.long 0x40229713,0x40229713
+.long 0x2011c684,0x2011c684
+.long 0x7d244a85,0x7d244a85
+.long 0xf83dbbd2,0xf83dbbd2
+.long 0x1132f9ae,0x1132f9ae
+.long 0x6da129c7,0x6da129c7
+.long 0x4b2f9e1d,0x4b2f9e1d
+.long 0xf330b2dc,0xf330b2dc
+.long 0xec52860d,0xec52860d
+.long 0xd0e3c177,0xd0e3c177
+.long 0x6c16b32b,0x6c16b32b
+.long 0x99b970a9,0x99b970a9
+.long 0xfa489411,0xfa489411
+.long 0x2264e947,0x2264e947
+.long 0xc48cfca8,0xc48cfca8
+.long 0x1a3ff0a0,0x1a3ff0a0
+.long 0xd82c7d56,0xd82c7d56
+.long 0xef903322,0xef903322
+.long 0xc74e4987,0xc74e4987
+.long 0xc1d138d9,0xc1d138d9
+.long 0xfea2ca8c,0xfea2ca8c
+.long 0x360bd498,0x360bd498
+.long 0xcf81f5a6,0xcf81f5a6
+.long 0x28de7aa5,0x28de7aa5
+.long 0x268eb7da,0x268eb7da
+.long 0xa4bfad3f,0xa4bfad3f
+.long 0xe49d3a2c,0xe49d3a2c
+.long 0x0d927850,0x0d927850
+.long 0x9bcc5f6a,0x9bcc5f6a
+.long 0x62467e54,0x62467e54
+.long 0xc2138df6,0xc2138df6
+.long 0xe8b8d890,0xe8b8d890
+.long 0x5ef7392e,0x5ef7392e
+.long 0xf5afc382,0xf5afc382
+.long 0xbe805d9f,0xbe805d9f
+.long 0x7c93d069,0x7c93d069
+.long 0xa92dd56f,0xa92dd56f
+.long 0xb31225cf,0xb31225cf
+.long 0x3b99acc8,0x3b99acc8
+.long 0xa77d1810,0xa77d1810
+.long 0x6e639ce8,0x6e639ce8
+.long 0x7bbb3bdb,0x7bbb3bdb
+.long 0x097826cd,0x097826cd
+.long 0xf418596e,0xf418596e
+.long 0x01b79aec,0x01b79aec
+.long 0xa89a4f83,0xa89a4f83
+.long 0x656e95e6,0x656e95e6
+.long 0x7ee6ffaa,0x7ee6ffaa
+.long 0x08cfbc21,0x08cfbc21
+.long 0xe6e815ef,0xe6e815ef
+.long 0xd99be7ba,0xd99be7ba
+.long 0xce366f4a,0xce366f4a
+.long 0xd4099fea,0xd4099fea
+.long 0xd67cb029,0xd67cb029
+.long 0xafb2a431,0xafb2a431
+.long 0x31233f2a,0x31233f2a
+.long 0x3094a5c6,0x3094a5c6
+.long 0xc066a235,0xc066a235
+.long 0x37bc4e74,0x37bc4e74
+.long 0xa6ca82fc,0xa6ca82fc
+.long 0xb0d090e0,0xb0d090e0
+.long 0x15d8a733,0x15d8a733
+.long 0x4a9804f1,0x4a9804f1
+.long 0xf7daec41,0xf7daec41
+.long 0x0e50cd7f,0x0e50cd7f
+.long 0x2ff69117,0x2ff69117
+.long 0x8dd64d76,0x8dd64d76
+.long 0x4db0ef43,0x4db0ef43
+.long 0x544daacc,0x544daacc
+.long 0xdf0496e4,0xdf0496e4
+.long 0xe3b5d19e,0xe3b5d19e
+.long 0x1b886a4c,0x1b886a4c
+.long 0xb81f2cc1,0xb81f2cc1
+.long 0x7f516546,0x7f516546
+.long 0x04ea5e9d,0x04ea5e9d
+.long 0x5d358c01,0x5d358c01
+.long 0x737487fa,0x737487fa
+.long 0x2e410bfb,0x2e410bfb
+.long 0x5a1d67b3,0x5a1d67b3
+.long 0x52d2db92,0x52d2db92
+.long 0x335610e9,0x335610e9
+.long 0x1347d66d,0x1347d66d
+.long 0x8c61d79a,0x8c61d79a
+.long 0x7a0ca137,0x7a0ca137
+.long 0x8e14f859,0x8e14f859
+.long 0x893c13eb,0x893c13eb
+.long 0xee27a9ce,0xee27a9ce
+.long 0x35c961b7,0x35c961b7
+.long 0xede51ce1,0xede51ce1
+.long 0x3cb1477a,0x3cb1477a
+.long 0x59dfd29c,0x59dfd29c
+.long 0x3f73f255,0x3f73f255
+.long 0x79ce1418,0x79ce1418
+.long 0xbf37c773,0xbf37c773
+.long 0xeacdf753,0xeacdf753
+.long 0x5baafd5f,0x5baafd5f
+.long 0x146f3ddf,0x146f3ddf
+.long 0x86db4478,0x86db4478
+.long 0x81f3afca,0x81f3afca
+.long 0x3ec468b9,0x3ec468b9
+.long 0x2c342438,0x2c342438
+.long 0x5f40a3c2,0x5f40a3c2
+.long 0x72c31d16,0x72c31d16
+.long 0x0c25e2bc,0x0c25e2bc
+.long 0x8b493c28,0x8b493c28
+.long 0x41950dff,0x41950dff
+.long 0x7101a839,0x7101a839
+.long 0xdeb30c08,0xdeb30c08
+.long 0x9ce4b4d8,0x9ce4b4d8
+.long 0x90c15664,0x90c15664
+.long 0x6184cb7b,0x6184cb7b
+.long 0x70b632d5,0x70b632d5
+.long 0x745c6c48,0x745c6c48
+.long 0x4257b8d0,0x4257b8d0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/ext/libressl/crypto/aes/aes-macosx-x86_64.S b/ext/libressl/crypto/aes/aes-macosx-x86_64.S
new file mode 100644
index 0000000..8a9c36e
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes-macosx-x86_64.S
@@ -0,0 +1,2544 @@
+#include "x86_arch.h"
+
+.text
+
+.p2align 4
+_x86_64_AES_encrypt:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+
+ movl 240(%r15),%r13d
+ subl $1,%r13d
+ jmp L$enc_loop
+.p2align 4
+L$enc_loop:
+
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movl 0(%r14,%rsi,8),%r10d
+ movl 0(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r12d
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl %dl,%ebp
+ xorl 3(%r14,%rsi,8),%r10d
+ xorl 3(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r8d
+
+ movzbl %dh,%esi
+ shrl $16,%ecx
+ movzbl %ah,%ebp
+ xorl 3(%r14,%rsi,8),%r12d
+ shrl $16,%edx
+ xorl 3(%r14,%rbp,8),%r8d
+
+ shrl $16,%ebx
+ leaq 16(%r15),%r15
+ shrl $16,%eax
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ xorl 2(%r14,%rsi,8),%r10d
+ xorl 2(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r12d
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl %bl,%ebp
+ xorl 1(%r14,%rsi,8),%r10d
+ xorl 1(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r8d
+
+ movl 12(%r15),%edx
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movl 0(%r15),%eax
+ xorl 1(%r14,%rdi,8),%r12d
+ xorl 1(%r14,%rbp,8),%r8d
+
+ movl 4(%r15),%ebx
+ movl 8(%r15),%ecx
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ subl $1,%r13d
+ jnz L$enc_loop
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movzbl 2(%r14,%rsi,8),%r10d
+ movzbl 2(%r14,%rdi,8),%r11d
+ movzbl 2(%r14,%rbp,8),%r12d
+
+ movzbl %dl,%esi
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movzbl 2(%r14,%rsi,8),%r8d
+ movl 0(%r14,%rdi,8),%edi
+ movl 0(%r14,%rbp,8),%ebp
+
+ andl $65280,%edi
+ andl $65280,%ebp
+
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+ shrl $16,%ecx
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ shrl $16,%edx
+ movl 0(%r14,%rsi,8),%esi
+ movl 0(%r14,%rdi,8),%edi
+
+ andl $65280,%esi
+ andl $65280,%edi
+ shrl $16,%ebx
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+ shrl $16,%eax
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ movl 0(%r14,%rsi,8),%esi
+ movl 0(%r14,%rdi,8),%edi
+ movl 0(%r14,%rbp,8),%ebp
+
+ andl $16711680,%esi
+ andl $16711680,%edi
+ andl $16711680,%ebp
+
+ xorl %esi,%r10d
+ xorl %edi,%r11d
+ xorl %ebp,%r12d
+
+ movzbl %bl,%esi
+ movzbl %dh,%edi
+ movzbl %ah,%ebp
+ movl 0(%r14,%rsi,8),%esi
+ movl 2(%r14,%rdi,8),%edi
+ movl 2(%r14,%rbp,8),%ebp
+
+ andl $16711680,%esi
+ andl $4278190080,%edi
+ andl $4278190080,%ebp
+
+ xorl %esi,%r8d
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movl 16+12(%r15),%edx
+ movl 2(%r14,%rsi,8),%esi
+ movl 2(%r14,%rdi,8),%edi
+ movl 16+0(%r15),%eax
+
+ andl $4278190080,%esi
+ andl $4278190080,%edi
+
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+
+ movl 16+4(%r15),%ebx
+ movl 16+8(%r15),%ecx
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ retq
+
+
+.p2align 4
+_x86_64_AES_encrypt_compact:
+ leaq 128(%r14),%r8
+ movl 0-128(%r8),%edi
+ movl 32-128(%r8),%ebp
+ movl 64-128(%r8),%r10d
+ movl 96-128(%r8),%r11d
+ movl 128-128(%r8),%edi
+ movl 160-128(%r8),%ebp
+ movl 192-128(%r8),%r10d
+ movl 224-128(%r8),%r11d
+ jmp L$enc_loop_compact
+.p2align 4
+L$enc_loop_compact:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ leaq 16(%r15),%r15
+ movzbl %al,%r10d
+ movzbl %bl,%r11d
+ movzbl %cl,%r12d
+ movzbl (%r14,%r10,1),%r10d
+ movzbl (%r14,%r11,1),%r11d
+ movzbl (%r14,%r12,1),%r12d
+
+ movzbl %dl,%r8d
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl (%r14,%r8,1),%r8d
+ movzbl (%r14,%rsi,1),%r9d
+ movzbl (%r14,%rdi,1),%r13d
+
+ movzbl %dh,%ebp
+ movzbl %ah,%esi
+ shrl $16,%ecx
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ shrl $16,%edx
+
+ movzbl %cl,%edi
+ shll $8,%r9d
+ shll $8,%r13d
+ movzbl (%r14,%rdi,1),%edi
+ xorl %r9d,%r10d
+ xorl %r13d,%r11d
+
+ movzbl %dl,%r9d
+ shrl $16,%eax
+ shrl $16,%ebx
+ movzbl %al,%r13d
+ shll $8,%ebp
+ shll $8,%esi
+ movzbl (%r14,%r9,1),%r9d
+ movzbl (%r14,%r13,1),%r13d
+ xorl %ebp,%r12d
+ xorl %esi,%r8d
+
+ movzbl %bl,%ebp
+ movzbl %dh,%esi
+ shll $16,%edi
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ xorl %edi,%r10d
+
+ movzbl %ah,%edi
+ shrl $8,%ecx
+ shrl $8,%ebx
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rcx,1),%edx
+ movzbl (%r14,%rbx,1),%ecx
+ shll $16,%r9d
+ shll $16,%r13d
+ shll $16,%ebp
+ xorl %r9d,%r11d
+ xorl %r13d,%r12d
+ xorl %ebp,%r8d
+
+ shll $24,%esi
+ shll $24,%edi
+ shll $24,%edx
+ xorl %esi,%r10d
+ shll $24,%ecx
+ xorl %edi,%r11d
+ movl %r10d,%eax
+ movl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ cmpq 16(%rsp),%r15
+ je L$enc_compact_done
+ movl %eax,%esi
+ movl %ebx,%edi
+ andl $2155905152,%esi
+ andl $2155905152,%edi
+ movl %esi,%r10d
+ movl %edi,%r11d
+ shrl $7,%r10d
+ leal (%rax,%rax,1),%r8d
+ shrl $7,%r11d
+ leal (%rbx,%rbx,1),%r9d
+ subl %r10d,%esi
+ subl %r11d,%edi
+ andl $4278124286,%r8d
+ andl $4278124286,%r9d
+ andl $454761243,%esi
+ andl $454761243,%edi
+ movl %eax,%r10d
+ movl %ebx,%r11d
+ xorl %esi,%r8d
+ xorl %edi,%r9d
+
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movl %ecx,%esi
+ movl %edx,%edi
+ roll $24,%eax
+ roll $24,%ebx
+ andl $2155905152,%esi
+ andl $2155905152,%edi
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movl %esi,%r12d
+ movl %edi,%ebp
+ rorl $16,%r10d
+ rorl $16,%r11d
+ shrl $7,%r12d
+ leal (%rcx,%rcx,1),%r8d
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ shrl $7,%ebp
+ leal (%rdx,%rdx,1),%r9d
+ rorl $8,%r10d
+ rorl $8,%r11d
+ subl %r12d,%esi
+ subl %ebp,%edi
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+
+ andl $4278124286,%r8d
+ andl $4278124286,%r9d
+ andl $454761243,%esi
+ andl $454761243,%edi
+ movl %ecx,%r12d
+ movl %edx,%ebp
+ xorl %esi,%r8d
+ xorl %edi,%r9d
+
+ xorl %r8d,%ecx
+ xorl %r9d,%edx
+ roll $24,%ecx
+ roll $24,%edx
+ xorl %r8d,%ecx
+ xorl %r9d,%edx
+ movl 0(%r14),%esi
+ rorl $16,%r12d
+ rorl $16,%ebp
+ movl 64(%r14),%edi
+ xorl %r12d,%ecx
+ xorl %ebp,%edx
+ movl 128(%r14),%r8d
+ rorl $8,%r12d
+ rorl $8,%ebp
+ movl 192(%r14),%r9d
+ xorl %r12d,%ecx
+ xorl %ebp,%edx
+ jmp L$enc_loop_compact
+.p2align 4
+L$enc_compact_done:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ retq
+
+.globl _AES_encrypt
+
+.p2align 4
+.globl _asm_AES_encrypt
+.private_extern _asm_AES_encrypt
+_asm_AES_encrypt:
+_AES_encrypt:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+
+ movq %rsp,%r10
+ leaq -63(%rdx),%rcx
+ andq $-64,%rsp
+ subq %rsp,%rcx
+ negq %rcx
+ andq $960,%rcx
+ subq %rcx,%rsp
+ subq $32,%rsp
+
+ movq %rsi,16(%rsp)
+ movq %r10,24(%rsp)
+L$enc_prologue:
+
+ movq %rdx,%r15
+ movl 240(%r15),%r13d
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+
+ shll $4,%r13d
+ leaq (%r15,%r13,1),%rbp
+ movq %r15,(%rsp)
+ movq %rbp,8(%rsp)
+
+
+ leaq L$AES_Te+2048(%rip),%r14
+ leaq 768(%rsp),%rbp
+ subq %r14,%rbp
+ andq $768,%rbp
+ leaq (%r14,%rbp,1),%r14
+
+ call _x86_64_AES_encrypt_compact
+
+ movq 16(%rsp),%r9
+ movq 24(%rsp),%rsi
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+L$enc_epilogue:
+ retq
+
+
+.p2align 4
+_x86_64_AES_decrypt:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+
+ movl 240(%r15),%r13d
+ subl $1,%r13d
+ jmp L$dec_loop
+.p2align 4
+L$dec_loop:
+
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movl 0(%r14,%rsi,8),%r10d
+ movl 0(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r12d
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl %dl,%ebp
+ xorl 3(%r14,%rsi,8),%r10d
+ xorl 3(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r8d
+
+ movzbl %bh,%esi
+ shrl $16,%eax
+ movzbl %ch,%ebp
+ xorl 3(%r14,%rsi,8),%r12d
+ shrl $16,%edx
+ xorl 3(%r14,%rbp,8),%r8d
+
+ shrl $16,%ebx
+ leaq 16(%r15),%r15
+ shrl $16,%ecx
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ xorl 2(%r14,%rsi,8),%r10d
+ xorl 2(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r12d
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl %bl,%ebp
+ xorl 1(%r14,%rsi,8),%r10d
+ xorl 1(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r8d
+
+ movzbl %dh,%esi
+ movl 12(%r15),%edx
+ movzbl %ah,%ebp
+ xorl 1(%r14,%rsi,8),%r12d
+ movl 0(%r15),%eax
+ xorl 1(%r14,%rbp,8),%r8d
+
+ xorl %r10d,%eax
+ movl 4(%r15),%ebx
+ movl 8(%r15),%ecx
+ xorl %r12d,%ecx
+ xorl %r11d,%ebx
+ xorl %r8d,%edx
+ subl $1,%r13d
+ jnz L$dec_loop
+ leaq 2048(%r14),%r14
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movzbl (%r14,%rsi,1),%r10d
+ movzbl (%r14,%rdi,1),%r11d
+ movzbl (%r14,%rbp,1),%r12d
+
+ movzbl %dl,%esi
+ movzbl %dh,%edi
+ movzbl %ah,%ebp
+ movzbl (%r14,%rsi,1),%r8d
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+
+ shll $8,%edi
+ shll $8,%ebp
+
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+ shrl $16,%edx
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ shrl $16,%eax
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+
+ shll $8,%esi
+ shll $8,%edi
+ shrl $16,%ebx
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+ shrl $16,%ecx
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+
+ shll $16,%esi
+ shll $16,%edi
+ shll $16,%ebp
+
+ xorl %esi,%r10d
+ xorl %edi,%r11d
+ xorl %ebp,%r12d
+
+ movzbl %bl,%esi
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+
+ shll $16,%esi
+ shll $24,%edi
+ shll $24,%ebp
+
+ xorl %esi,%r8d
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movl 16+12(%r15),%edx
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movl 16+0(%r15),%eax
+
+ shll $24,%esi
+ shll $24,%edi
+
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+
+ movl 16+4(%r15),%ebx
+ movl 16+8(%r15),%ecx
+ leaq -2048(%r14),%r14
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ retq
+
+
+.p2align 4
+_x86_64_AES_decrypt_compact:
+ leaq 128(%r14),%r8
+ movl 0-128(%r8),%edi
+ movl 32-128(%r8),%ebp
+ movl 64-128(%r8),%r10d
+ movl 96-128(%r8),%r11d
+ movl 128-128(%r8),%edi
+ movl 160-128(%r8),%ebp
+ movl 192-128(%r8),%r10d
+ movl 224-128(%r8),%r11d
+ jmp L$dec_loop_compact
+
+.p2align 4
+L$dec_loop_compact:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ leaq 16(%r15),%r15
+ movzbl %al,%r10d
+ movzbl %bl,%r11d
+ movzbl %cl,%r12d
+ movzbl (%r14,%r10,1),%r10d
+ movzbl (%r14,%r11,1),%r11d
+ movzbl (%r14,%r12,1),%r12d
+
+ movzbl %dl,%r8d
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl (%r14,%r8,1),%r8d
+ movzbl (%r14,%rsi,1),%r9d
+ movzbl (%r14,%rdi,1),%r13d
+
+ movzbl %bh,%ebp
+ movzbl %ch,%esi
+ shrl $16,%ecx
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ shrl $16,%edx
+
+ movzbl %cl,%edi
+ shll $8,%r9d
+ shll $8,%r13d
+ movzbl (%r14,%rdi,1),%edi
+ xorl %r9d,%r10d
+ xorl %r13d,%r11d
+
+ movzbl %dl,%r9d
+ shrl $16,%eax
+ shrl $16,%ebx
+ movzbl %al,%r13d
+ shll $8,%ebp
+ shll $8,%esi
+ movzbl (%r14,%r9,1),%r9d
+ movzbl (%r14,%r13,1),%r13d
+ xorl %ebp,%r12d
+ xorl %esi,%r8d
+
+ movzbl %bl,%ebp
+ movzbl %bh,%esi
+ shll $16,%edi
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ xorl %edi,%r10d
+
+ movzbl %ch,%edi
+ shll $16,%r9d
+ shll $16,%r13d
+ movzbl (%r14,%rdi,1),%ebx
+ xorl %r9d,%r11d
+ xorl %r13d,%r12d
+
+ movzbl %dh,%edi
+ shrl $8,%eax
+ shll $16,%ebp
+ movzbl (%r14,%rdi,1),%ecx
+ movzbl (%r14,%rax,1),%edx
+ xorl %ebp,%r8d
+
+ shll $24,%esi
+ shll $24,%ebx
+ shll $24,%ecx
+ xorl %esi,%r10d
+ shll $24,%edx
+ xorl %r11d,%ebx
+ movl %r10d,%eax
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ cmpq 16(%rsp),%r15
+ je L$dec_compact_done
+
+ movq 256+0(%r14),%rsi
+ shlq $32,%rbx
+ shlq $32,%rdx
+ movq 256+8(%r14),%rdi
+ orq %rbx,%rax
+ orq %rdx,%rcx
+ movq 256+16(%r14),%rbp
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+ shrq $7,%r9
+ leaq (%rax,%rax,1),%r8
+ shrq $7,%r12
+ leaq (%rcx,%rcx,1),%r11
+ subq %r9,%rbx
+ subq %r12,%rdx
+ andq %rdi,%r8
+ andq %rdi,%r11
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r8,%rbx
+ xorq %r11,%rdx
+ movq %rbx,%r8
+ movq %rdx,%r11
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ leaq (%r8,%r8,1),%r9
+ shrq $7,%r13
+ leaq (%r11,%r11,1),%r12
+ subq %r10,%rbx
+ subq %r13,%rdx
+ andq %rdi,%r9
+ andq %rdi,%r12
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r9,%rbx
+ xorq %r12,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ xorq %rax,%r8
+ shrq $7,%r13
+ xorq %rcx,%r11
+ subq %r10,%rbx
+ subq %r13,%rdx
+ leaq (%r9,%r9,1),%r10
+ leaq (%r12,%r12,1),%r13
+ xorq %rax,%r9
+ xorq %rcx,%r12
+ andq %rdi,%r10
+ andq %rdi,%r13
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %rbx,%r10
+ xorq %rdx,%r13
+
+ xorq %r10,%rax
+ xorq %r13,%rcx
+ xorq %r10,%r8
+ xorq %r13,%r11
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ xorq %r10,%r9
+ xorq %r13,%r12
+ shrq $32,%rbx
+ shrq $32,%rdx
+ xorq %r8,%r10
+ xorq %r11,%r13
+ roll $8,%eax
+ roll $8,%ecx
+ xorq %r9,%r10
+ xorq %r12,%r13
+
+ roll $8,%ebx
+ roll $8,%edx
+ xorl %r10d,%eax
+ xorl %r13d,%ecx
+ shrq $32,%r10
+ shrq $32,%r13
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+ movq %r8,%r10
+ movq %r11,%r13
+ shrq $32,%r10
+ shrq $32,%r13
+ roll $24,%r8d
+ roll $24,%r11d
+ roll $24,%r10d
+ roll $24,%r13d
+ xorl %r8d,%eax
+ xorl %r11d,%ecx
+ movq %r9,%r8
+ movq %r12,%r11
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+ movq 0(%r14),%rsi
+ shrq $32,%r8
+ shrq $32,%r11
+ movq 64(%r14),%rdi
+ roll $16,%r9d
+ roll $16,%r12d
+ movq 128(%r14),%rbp
+ roll $16,%r8d
+ roll $16,%r11d
+ movq 192(%r14),%r10
+ xorl %r9d,%eax
+ xorl %r12d,%ecx
+ movq 256(%r14),%r13
+ xorl %r8d,%ebx
+ xorl %r11d,%edx
+ jmp L$dec_loop_compact
+.p2align 4
+L$dec_compact_done:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ retq
+
+.globl _AES_decrypt
+
+.p2align 4
+.globl _asm_AES_decrypt
+.private_extern _asm_AES_decrypt
+_asm_AES_decrypt:
+_AES_decrypt:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+
+ movq %rsp,%r10
+ leaq -63(%rdx),%rcx
+ andq $-64,%rsp
+ subq %rsp,%rcx
+ negq %rcx
+ andq $960,%rcx
+ subq %rcx,%rsp
+ subq $32,%rsp
+
+ movq %rsi,16(%rsp)
+ movq %r10,24(%rsp)
+L$dec_prologue:
+
+ movq %rdx,%r15
+ movl 240(%r15),%r13d
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+
+ shll $4,%r13d
+ leaq (%r15,%r13,1),%rbp
+ movq %r15,(%rsp)
+ movq %rbp,8(%rsp)
+
+
+ leaq L$AES_Td+2048(%rip),%r14
+ leaq 768(%rsp),%rbp
+ subq %r14,%rbp
+ andq $768,%rbp
+ leaq (%r14,%rbp,1),%r14
+ shrq $3,%rbp
+ addq %rbp,%r14
+
+ call _x86_64_AES_decrypt_compact
+
+ movq 16(%rsp),%r9
+ movq 24(%rsp),%rsi
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+L$dec_epilogue:
+ retq
+
+.globl _AES_set_encrypt_key
+
+.p2align 4
+_AES_set_encrypt_key:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $8,%rsp
+L$enc_key_prologue:
+
+ call _x86_64_AES_set_encrypt_key
+
+ movq 8(%rsp),%r15
+ movq 16(%rsp),%r14
+ movq 24(%rsp),%r13
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%rbp
+ movq 48(%rsp),%rbx
+ addq $56,%rsp
+L$enc_key_epilogue:
+ retq
+
+
+
+.p2align 4
+_x86_64_AES_set_encrypt_key:
+ movl %esi,%ecx
+ movq %rdi,%rsi
+ movq %rdx,%rdi
+
+ testq $-1,%rsi
+ jz L$badpointer
+ testq $-1,%rdi
+ jz L$badpointer
+
+ leaq L$AES_Te(%rip),%rbp
+ leaq 2048+128(%rbp),%rbp
+
+
+ movl 0-128(%rbp),%eax
+ movl 32-128(%rbp),%ebx
+ movl 64-128(%rbp),%r8d
+ movl 96-128(%rbp),%edx
+ movl 128-128(%rbp),%eax
+ movl 160-128(%rbp),%ebx
+ movl 192-128(%rbp),%r8d
+ movl 224-128(%rbp),%edx
+
+ cmpl $128,%ecx
+ je L$10rounds
+ cmpl $192,%ecx
+ je L$12rounds
+ cmpl $256,%ecx
+ je L$14rounds
+ movq $-2,%rax
+ jmp L$exit
+
+L$10rounds:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rdx,8(%rdi)
+
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp L$10shortcut
+.p2align 2
+L$10loop:
+ movl 0(%rdi),%eax
+ movl 12(%rdi),%edx
+L$10shortcut:
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,16(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,20(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,24(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,28(%rdi)
+ addl $1,%ecx
+ leaq 16(%rdi),%rdi
+ cmpl $10,%ecx
+ jl L$10loop
+
+ movl $10,80(%rdi)
+ xorq %rax,%rax
+ jmp L$exit
+
+L$12rounds:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 16(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rdx,16(%rdi)
+
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp L$12shortcut
+.p2align 2
+L$12loop:
+ movl 0(%rdi),%eax
+ movl 20(%rdi),%edx
+L$12shortcut:
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,24(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,28(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,32(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,36(%rdi)
+
+ cmpl $7,%ecx
+ je L$12break
+ addl $1,%ecx
+
+ xorl 16(%rdi),%eax
+ movl %eax,40(%rdi)
+ xorl 20(%rdi),%eax
+ movl %eax,44(%rdi)
+
+ leaq 24(%rdi),%rdi
+ jmp L$12loop
+L$12break:
+ movl $12,72(%rdi)
+ xorq %rax,%rax
+ jmp L$exit
+
+L$14rounds:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 16(%rsi),%rcx
+ movq 24(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,16(%rdi)
+ movq %rdx,24(%rdi)
+
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp L$14shortcut
+.p2align 2
+L$14loop:
+ movl 0(%rdi),%eax
+ movl 28(%rdi),%edx
+L$14shortcut:
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,32(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,36(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,40(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,44(%rdi)
+
+ cmpl $6,%ecx
+ je L$14break
+ addl $1,%ecx
+
+ movl %eax,%edx
+ movl 16(%rdi),%eax
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ shll $8,%ebx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movl %eax,48(%rdi)
+ xorl 20(%rdi),%eax
+ movl %eax,52(%rdi)
+ xorl 24(%rdi),%eax
+ movl %eax,56(%rdi)
+ xorl 28(%rdi),%eax
+ movl %eax,60(%rdi)
+
+ leaq 32(%rdi),%rdi
+ jmp L$14loop
+L$14break:
+ movl $14,48(%rdi)
+ xorq %rax,%rax
+ jmp L$exit
+
+L$badpointer:
+ movq $-1,%rax
+L$exit:
+ retq
+
+.globl _AES_set_decrypt_key
+
+.p2align 4
+_AES_set_decrypt_key:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rdx
+L$dec_key_prologue:
+
+ call _x86_64_AES_set_encrypt_key
+ movq (%rsp),%r8
+ cmpl $0,%eax
+ jne L$abort
+
+ movl 240(%r8),%r14d
+ xorq %rdi,%rdi
+ leaq (%rdi,%r14,4),%rcx
+ movq %r8,%rsi
+ leaq (%r8,%rcx,4),%rdi
+.p2align 2
+L$invert:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 0(%rdi),%rcx
+ movq 8(%rdi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,0(%rsi)
+ movq %rdx,8(%rsi)
+ leaq 16(%rsi),%rsi
+ leaq -16(%rdi),%rdi
+ cmpq %rsi,%rdi
+ jne L$invert
+
+ leaq L$AES_Te+2048+1024(%rip),%rax
+
+ movq 40(%rax),%rsi
+ movq 48(%rax),%rdi
+ movq 56(%rax),%rbp
+
+ movq %r8,%r15
+ subl $1,%r14d
+.p2align 2
+L$permute:
+ leaq 16(%r15),%r15
+ movq 0(%r15),%rax
+ movq 8(%r15),%rcx
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+ shrq $7,%r9
+ leaq (%rax,%rax,1),%r8
+ shrq $7,%r12
+ leaq (%rcx,%rcx,1),%r11
+ subq %r9,%rbx
+ subq %r12,%rdx
+ andq %rdi,%r8
+ andq %rdi,%r11
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r8,%rbx
+ xorq %r11,%rdx
+ movq %rbx,%r8
+ movq %rdx,%r11
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ leaq (%r8,%r8,1),%r9
+ shrq $7,%r13
+ leaq (%r11,%r11,1),%r12
+ subq %r10,%rbx
+ subq %r13,%rdx
+ andq %rdi,%r9
+ andq %rdi,%r12
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r9,%rbx
+ xorq %r12,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ xorq %rax,%r8
+ shrq $7,%r13
+ xorq %rcx,%r11
+ subq %r10,%rbx
+ subq %r13,%rdx
+ leaq (%r9,%r9,1),%r10
+ leaq (%r12,%r12,1),%r13
+ xorq %rax,%r9
+ xorq %rcx,%r12
+ andq %rdi,%r10
+ andq %rdi,%r13
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %rbx,%r10
+ xorq %rdx,%r13
+
+ xorq %r10,%rax
+ xorq %r13,%rcx
+ xorq %r10,%r8
+ xorq %r13,%r11
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ xorq %r10,%r9
+ xorq %r13,%r12
+ shrq $32,%rbx
+ shrq $32,%rdx
+ xorq %r8,%r10
+ xorq %r11,%r13
+ roll $8,%eax
+ roll $8,%ecx
+ xorq %r9,%r10
+ xorq %r12,%r13
+
+ roll $8,%ebx
+ roll $8,%edx
+ xorl %r10d,%eax
+ xorl %r13d,%ecx
+ shrq $32,%r10
+ shrq $32,%r13
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+ movq %r8,%r10
+ movq %r11,%r13
+ shrq $32,%r10
+ shrq $32,%r13
+ roll $24,%r8d
+ roll $24,%r11d
+ roll $24,%r10d
+ roll $24,%r13d
+ xorl %r8d,%eax
+ xorl %r11d,%ecx
+ movq %r9,%r8
+ movq %r12,%r11
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+
+ shrq $32,%r8
+ shrq $32,%r11
+
+ roll $16,%r9d
+ roll $16,%r12d
+
+ roll $16,%r8d
+ roll $16,%r11d
+
+ xorl %r9d,%eax
+ xorl %r12d,%ecx
+
+ xorl %r8d,%ebx
+ xorl %r11d,%edx
+ movl %eax,0(%r15)
+ movl %ebx,4(%r15)
+ movl %ecx,8(%r15)
+ movl %edx,12(%r15)
+ subl $1,%r14d
+ jnz L$permute
+
+ xorq %rax,%rax
+L$abort:
+ movq 8(%rsp),%r15
+ movq 16(%rsp),%r14
+ movq 24(%rsp),%r13
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%rbp
+ movq 48(%rsp),%rbx
+ addq $56,%rsp
+L$dec_key_epilogue:
+ retq
+
+.globl _AES_cbc_encrypt
+
+.p2align 4
+
+.private_extern _OPENSSL_ia32cap_P
+.globl _asm_AES_cbc_encrypt
+.private_extern _asm_AES_cbc_encrypt
+_asm_AES_cbc_encrypt:
+_AES_cbc_encrypt:
+ cmpq $0,%rdx
+ je L$cbc_epilogue
+ pushfq
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+L$cbc_prologue:
+
+ cld
+ movl %r9d,%r9d
+
+ leaq L$AES_Te(%rip),%r14
+ cmpq $0,%r9
+ jne L$cbc_picked_te
+ leaq L$AES_Td(%rip),%r14
+L$cbc_picked_te:
+
+ movl _OPENSSL_ia32cap_P(%rip),%r10d
+ cmpq $512,%rdx
+ jb L$cbc_slow_prologue
+ testq $15,%rdx
+ jnz L$cbc_slow_prologue
+ btl $IA32CAP_BIT0_HT,%r10d
+ jc L$cbc_slow_prologue
+
+
+ leaq -88-248(%rsp),%r15
+ andq $-64,%r15
+
+
+ movq %r14,%r10
+ leaq 2304(%r14),%r11
+ movq %r15,%r12
+ andq $4095,%r10
+ andq $4095,%r11
+ andq $4095,%r12
+
+ cmpq %r11,%r12
+ jb L$cbc_te_break_out
+ subq %r11,%r12
+ subq %r12,%r15
+ jmp L$cbc_te_ok
+L$cbc_te_break_out:
+ subq %r10,%r12
+ andq $4095,%r12
+ addq $320,%r12
+ subq %r12,%r15
+.p2align 2
+L$cbc_te_ok:
+
+ xchgq %rsp,%r15
+
+ movq %r15,16(%rsp)
+L$cbc_fast_body:
+ movq %rdi,24(%rsp)
+ movq %rsi,32(%rsp)
+ movq %rdx,40(%rsp)
+ movq %rcx,48(%rsp)
+ movq %r8,56(%rsp)
+ movl $0,80+240(%rsp)
+ movq %r8,%rbp
+ movq %r9,%rbx
+ movq %rsi,%r9
+ movq %rdi,%r8
+ movq %rcx,%r15
+
+ movl 240(%r15),%eax
+
+ movq %r15,%r10
+ subq %r14,%r10
+ andq $4095,%r10
+ cmpq $2304,%r10
+ jb L$cbc_do_ecopy
+ cmpq $4096-248,%r10
+ jb L$cbc_skip_ecopy
+.p2align 2
+L$cbc_do_ecopy:
+ movq %r15,%rsi
+ leaq 80(%rsp),%rdi
+ leaq 80(%rsp),%r15
+ movl $30,%ecx
+.long 0x90A548F3
+ movl %eax,(%rdi)
+L$cbc_skip_ecopy:
+ movq %r15,0(%rsp)
+
+ movl $18,%ecx
+.p2align 2
+L$cbc_prefetch_te:
+ movq 0(%r14),%r10
+ movq 32(%r14),%r11
+ movq 64(%r14),%r12
+ movq 96(%r14),%r13
+ leaq 128(%r14),%r14
+ subl $1,%ecx
+ jnz L$cbc_prefetch_te
+ leaq -2304(%r14),%r14
+
+ cmpq $0,%rbx
+ je L$FAST_DECRYPT
+
+
+ movl 0(%rbp),%eax
+ movl 4(%rbp),%ebx
+ movl 8(%rbp),%ecx
+ movl 12(%rbp),%edx
+
+.p2align 2
+L$cbc_fast_enc_loop:
+ xorl 0(%r8),%eax
+ xorl 4(%r8),%ebx
+ xorl 8(%r8),%ecx
+ xorl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+
+ call _x86_64_AES_encrypt
+
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ subq $16,%r10
+ testq $-16,%r10
+ movq %r10,40(%rsp)
+ jnz L$cbc_fast_enc_loop
+ movq 56(%rsp),%rbp
+ movl %eax,0(%rbp)
+ movl %ebx,4(%rbp)
+ movl %ecx,8(%rbp)
+ movl %edx,12(%rbp)
+
+ jmp L$cbc_fast_cleanup
+
+
+.p2align 4
+L$FAST_DECRYPT:
+ cmpq %r8,%r9
+ je L$cbc_fast_dec_in_place
+
+ movq %rbp,64(%rsp)
+.p2align 2
+L$cbc_fast_dec_loop:
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+
+ call _x86_64_AES_decrypt
+
+ movq 64(%rsp),%rbp
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ xorl 0(%rbp),%eax
+ xorl 4(%rbp),%ebx
+ xorl 8(%rbp),%ecx
+ xorl 12(%rbp),%edx
+ movq %r8,%rbp
+
+ subq $16,%r10
+ movq %r10,40(%rsp)
+ movq %rbp,64(%rsp)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ jnz L$cbc_fast_dec_loop
+ movq 56(%rsp),%r12
+ movq 0(%rbp),%r10
+ movq 8(%rbp),%r11
+ movq %r10,0(%r12)
+ movq %r11,8(%r12)
+ jmp L$cbc_fast_cleanup
+
+.p2align 4
+L$cbc_fast_dec_in_place:
+ movq 0(%rbp),%r10
+ movq 8(%rbp),%r11
+ movq %r10,0+64(%rsp)
+ movq %r11,8+64(%rsp)
+.p2align 2
+L$cbc_fast_dec_in_place_loop:
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+
+ call _x86_64_AES_decrypt
+
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ xorl 0+64(%rsp),%eax
+ xorl 4+64(%rsp),%ebx
+ xorl 8+64(%rsp),%ecx
+ xorl 12+64(%rsp),%edx
+
+ movq 0(%r8),%r11
+ movq 8(%r8),%r12
+ subq $16,%r10
+ jz L$cbc_fast_dec_in_place_done
+
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ movq %r10,40(%rsp)
+ jmp L$cbc_fast_dec_in_place_loop
+L$cbc_fast_dec_in_place_done:
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+.p2align 2
+L$cbc_fast_cleanup:
+ cmpl $0,80+240(%rsp)
+ leaq 80(%rsp),%rdi
+ je L$cbc_exit
+ movl $30,%ecx
+ xorq %rax,%rax
+.long 0x90AB48F3
+
+ jmp L$cbc_exit
+
+
+.p2align 4
+L$cbc_slow_prologue:
+
+ leaq -88(%rsp),%rbp
+ andq $-64,%rbp
+
+ leaq -88-63(%rcx),%r10
+ subq %rbp,%r10
+ negq %r10
+ andq $960,%r10
+ subq %r10,%rbp
+
+ xchgq %rsp,%rbp
+
+ movq %rbp,16(%rsp)
+L$cbc_slow_body:
+
+
+
+
+ movq %r8,56(%rsp)
+ movq %r8,%rbp
+ movq %r9,%rbx
+ movq %rsi,%r9
+ movq %rdi,%r8
+ movq %rcx,%r15
+ movq %rdx,%r10
+
+ movl 240(%r15),%eax
+ movq %r15,0(%rsp)
+ shll $4,%eax
+ leaq (%r15,%rax,1),%rax
+ movq %rax,8(%rsp)
+
+
+ leaq 2048(%r14),%r14
+ leaq 768-8(%rsp),%rax
+ subq %r14,%rax
+ andq $768,%rax
+ leaq (%r14,%rax,1),%r14
+
+ cmpq $0,%rbx
+ je L$SLOW_DECRYPT
+
+
+ testq $-16,%r10
+ movl 0(%rbp),%eax
+ movl 4(%rbp),%ebx
+ movl 8(%rbp),%ecx
+ movl 12(%rbp),%edx
+ jz L$cbc_slow_enc_tail
+
+.p2align 2
+L$cbc_slow_enc_loop:
+ xorl 0(%r8),%eax
+ xorl 4(%r8),%ebx
+ xorl 8(%r8),%ecx
+ xorl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ movq %r9,32(%rsp)
+ movq %r10,40(%rsp)
+
+ call _x86_64_AES_encrypt_compact
+
+ movq 24(%rsp),%r8
+ movq 32(%rsp),%r9
+ movq 40(%rsp),%r10
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ subq $16,%r10
+ testq $-16,%r10
+ jnz L$cbc_slow_enc_loop
+ testq $15,%r10
+ jnz L$cbc_slow_enc_tail
+ movq 56(%rsp),%rbp
+ movl %eax,0(%rbp)
+ movl %ebx,4(%rbp)
+ movl %ecx,8(%rbp)
+ movl %edx,12(%rbp)
+
+ jmp L$cbc_exit
+
+.p2align 2
+L$cbc_slow_enc_tail:
+ movq %rax,%r11
+ movq %rcx,%r12
+ movq %r10,%rcx
+ movq %r8,%rsi
+ movq %r9,%rdi
+.long 0x9066A4F3
+ movq $16,%rcx
+ subq %r10,%rcx
+ xorq %rax,%rax
+.long 0x9066AAF3
+ movq %r9,%r8
+ movq $16,%r10
+ movq %r11,%rax
+ movq %r12,%rcx
+ jmp L$cbc_slow_enc_loop
+
+.p2align 4
+L$SLOW_DECRYPT:
+ shrq $3,%rax
+ addq %rax,%r14
+
+ movq 0(%rbp),%r11
+ movq 8(%rbp),%r12
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+
+.p2align 2
+L$cbc_slow_dec_loop:
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ movq %r9,32(%rsp)
+ movq %r10,40(%rsp)
+
+ call _x86_64_AES_decrypt_compact
+
+ movq 24(%rsp),%r8
+ movq 32(%rsp),%r9
+ movq 40(%rsp),%r10
+ xorl 0+64(%rsp),%eax
+ xorl 4+64(%rsp),%ebx
+ xorl 8+64(%rsp),%ecx
+ xorl 12+64(%rsp),%edx
+
+ movq 0(%r8),%r11
+ movq 8(%r8),%r12
+ subq $16,%r10
+ jc L$cbc_slow_dec_partial
+ jz L$cbc_slow_dec_done
+
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ jmp L$cbc_slow_dec_loop
+L$cbc_slow_dec_done:
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ jmp L$cbc_exit
+
+.p2align 2
+L$cbc_slow_dec_partial:
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+
+ movl %eax,0+64(%rsp)
+ movl %ebx,4+64(%rsp)
+ movl %ecx,8+64(%rsp)
+ movl %edx,12+64(%rsp)
+
+ movq %r9,%rdi
+ leaq 64(%rsp),%rsi
+ leaq 16(%r10),%rcx
+.long 0x9066A4F3
+ jmp L$cbc_exit
+
+.p2align 4
+L$cbc_exit:
+ movq 16(%rsp),%rsi
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+L$cbc_popfq:
+ popfq
+L$cbc_epilogue:
+ retq
+
+.p2align 6
+L$AES_Te:
+.long 0xa56363c6,0xa56363c6
+.long 0x847c7cf8,0x847c7cf8
+.long 0x997777ee,0x997777ee
+.long 0x8d7b7bf6,0x8d7b7bf6
+.long 0x0df2f2ff,0x0df2f2ff
+.long 0xbd6b6bd6,0xbd6b6bd6
+.long 0xb16f6fde,0xb16f6fde
+.long 0x54c5c591,0x54c5c591
+.long 0x50303060,0x50303060
+.long 0x03010102,0x03010102
+.long 0xa96767ce,0xa96767ce
+.long 0x7d2b2b56,0x7d2b2b56
+.long 0x19fefee7,0x19fefee7
+.long 0x62d7d7b5,0x62d7d7b5
+.long 0xe6abab4d,0xe6abab4d
+.long 0x9a7676ec,0x9a7676ec
+.long 0x45caca8f,0x45caca8f
+.long 0x9d82821f,0x9d82821f
+.long 0x40c9c989,0x40c9c989
+.long 0x877d7dfa,0x877d7dfa
+.long 0x15fafaef,0x15fafaef
+.long 0xeb5959b2,0xeb5959b2
+.long 0xc947478e,0xc947478e
+.long 0x0bf0f0fb,0x0bf0f0fb
+.long 0xecadad41,0xecadad41
+.long 0x67d4d4b3,0x67d4d4b3
+.long 0xfda2a25f,0xfda2a25f
+.long 0xeaafaf45,0xeaafaf45
+.long 0xbf9c9c23,0xbf9c9c23
+.long 0xf7a4a453,0xf7a4a453
+.long 0x967272e4,0x967272e4
+.long 0x5bc0c09b,0x5bc0c09b
+.long 0xc2b7b775,0xc2b7b775
+.long 0x1cfdfde1,0x1cfdfde1
+.long 0xae93933d,0xae93933d
+.long 0x6a26264c,0x6a26264c
+.long 0x5a36366c,0x5a36366c
+.long 0x413f3f7e,0x413f3f7e
+.long 0x02f7f7f5,0x02f7f7f5
+.long 0x4fcccc83,0x4fcccc83
+.long 0x5c343468,0x5c343468
+.long 0xf4a5a551,0xf4a5a551
+.long 0x34e5e5d1,0x34e5e5d1
+.long 0x08f1f1f9,0x08f1f1f9
+.long 0x937171e2,0x937171e2
+.long 0x73d8d8ab,0x73d8d8ab
+.long 0x53313162,0x53313162
+.long 0x3f15152a,0x3f15152a
+.long 0x0c040408,0x0c040408
+.long 0x52c7c795,0x52c7c795
+.long 0x65232346,0x65232346
+.long 0x5ec3c39d,0x5ec3c39d
+.long 0x28181830,0x28181830
+.long 0xa1969637,0xa1969637
+.long 0x0f05050a,0x0f05050a
+.long 0xb59a9a2f,0xb59a9a2f
+.long 0x0907070e,0x0907070e
+.long 0x36121224,0x36121224
+.long 0x9b80801b,0x9b80801b
+.long 0x3de2e2df,0x3de2e2df
+.long 0x26ebebcd,0x26ebebcd
+.long 0x6927274e,0x6927274e
+.long 0xcdb2b27f,0xcdb2b27f
+.long 0x9f7575ea,0x9f7575ea
+.long 0x1b090912,0x1b090912
+.long 0x9e83831d,0x9e83831d
+.long 0x742c2c58,0x742c2c58
+.long 0x2e1a1a34,0x2e1a1a34
+.long 0x2d1b1b36,0x2d1b1b36
+.long 0xb26e6edc,0xb26e6edc
+.long 0xee5a5ab4,0xee5a5ab4
+.long 0xfba0a05b,0xfba0a05b
+.long 0xf65252a4,0xf65252a4
+.long 0x4d3b3b76,0x4d3b3b76
+.long 0x61d6d6b7,0x61d6d6b7
+.long 0xceb3b37d,0xceb3b37d
+.long 0x7b292952,0x7b292952
+.long 0x3ee3e3dd,0x3ee3e3dd
+.long 0x712f2f5e,0x712f2f5e
+.long 0x97848413,0x97848413
+.long 0xf55353a6,0xf55353a6
+.long 0x68d1d1b9,0x68d1d1b9
+.long 0x00000000,0x00000000
+.long 0x2cededc1,0x2cededc1
+.long 0x60202040,0x60202040
+.long 0x1ffcfce3,0x1ffcfce3
+.long 0xc8b1b179,0xc8b1b179
+.long 0xed5b5bb6,0xed5b5bb6
+.long 0xbe6a6ad4,0xbe6a6ad4
+.long 0x46cbcb8d,0x46cbcb8d
+.long 0xd9bebe67,0xd9bebe67
+.long 0x4b393972,0x4b393972
+.long 0xde4a4a94,0xde4a4a94
+.long 0xd44c4c98,0xd44c4c98
+.long 0xe85858b0,0xe85858b0
+.long 0x4acfcf85,0x4acfcf85
+.long 0x6bd0d0bb,0x6bd0d0bb
+.long 0x2aefefc5,0x2aefefc5
+.long 0xe5aaaa4f,0xe5aaaa4f
+.long 0x16fbfbed,0x16fbfbed
+.long 0xc5434386,0xc5434386
+.long 0xd74d4d9a,0xd74d4d9a
+.long 0x55333366,0x55333366
+.long 0x94858511,0x94858511
+.long 0xcf45458a,0xcf45458a
+.long 0x10f9f9e9,0x10f9f9e9
+.long 0x06020204,0x06020204
+.long 0x817f7ffe,0x817f7ffe
+.long 0xf05050a0,0xf05050a0
+.long 0x443c3c78,0x443c3c78
+.long 0xba9f9f25,0xba9f9f25
+.long 0xe3a8a84b,0xe3a8a84b
+.long 0xf35151a2,0xf35151a2
+.long 0xfea3a35d,0xfea3a35d
+.long 0xc0404080,0xc0404080
+.long 0x8a8f8f05,0x8a8f8f05
+.long 0xad92923f,0xad92923f
+.long 0xbc9d9d21,0xbc9d9d21
+.long 0x48383870,0x48383870
+.long 0x04f5f5f1,0x04f5f5f1
+.long 0xdfbcbc63,0xdfbcbc63
+.long 0xc1b6b677,0xc1b6b677
+.long 0x75dadaaf,0x75dadaaf
+.long 0x63212142,0x63212142
+.long 0x30101020,0x30101020
+.long 0x1affffe5,0x1affffe5
+.long 0x0ef3f3fd,0x0ef3f3fd
+.long 0x6dd2d2bf,0x6dd2d2bf
+.long 0x4ccdcd81,0x4ccdcd81
+.long 0x140c0c18,0x140c0c18
+.long 0x35131326,0x35131326
+.long 0x2fececc3,0x2fececc3
+.long 0xe15f5fbe,0xe15f5fbe
+.long 0xa2979735,0xa2979735
+.long 0xcc444488,0xcc444488
+.long 0x3917172e,0x3917172e
+.long 0x57c4c493,0x57c4c493
+.long 0xf2a7a755,0xf2a7a755
+.long 0x827e7efc,0x827e7efc
+.long 0x473d3d7a,0x473d3d7a
+.long 0xac6464c8,0xac6464c8
+.long 0xe75d5dba,0xe75d5dba
+.long 0x2b191932,0x2b191932
+.long 0x957373e6,0x957373e6
+.long 0xa06060c0,0xa06060c0
+.long 0x98818119,0x98818119
+.long 0xd14f4f9e,0xd14f4f9e
+.long 0x7fdcdca3,0x7fdcdca3
+.long 0x66222244,0x66222244
+.long 0x7e2a2a54,0x7e2a2a54
+.long 0xab90903b,0xab90903b
+.long 0x8388880b,0x8388880b
+.long 0xca46468c,0xca46468c
+.long 0x29eeeec7,0x29eeeec7
+.long 0xd3b8b86b,0xd3b8b86b
+.long 0x3c141428,0x3c141428
+.long 0x79dedea7,0x79dedea7
+.long 0xe25e5ebc,0xe25e5ebc
+.long 0x1d0b0b16,0x1d0b0b16
+.long 0x76dbdbad,0x76dbdbad
+.long 0x3be0e0db,0x3be0e0db
+.long 0x56323264,0x56323264
+.long 0x4e3a3a74,0x4e3a3a74
+.long 0x1e0a0a14,0x1e0a0a14
+.long 0xdb494992,0xdb494992
+.long 0x0a06060c,0x0a06060c
+.long 0x6c242448,0x6c242448
+.long 0xe45c5cb8,0xe45c5cb8
+.long 0x5dc2c29f,0x5dc2c29f
+.long 0x6ed3d3bd,0x6ed3d3bd
+.long 0xefacac43,0xefacac43
+.long 0xa66262c4,0xa66262c4
+.long 0xa8919139,0xa8919139
+.long 0xa4959531,0xa4959531
+.long 0x37e4e4d3,0x37e4e4d3
+.long 0x8b7979f2,0x8b7979f2
+.long 0x32e7e7d5,0x32e7e7d5
+.long 0x43c8c88b,0x43c8c88b
+.long 0x5937376e,0x5937376e
+.long 0xb76d6dda,0xb76d6dda
+.long 0x8c8d8d01,0x8c8d8d01
+.long 0x64d5d5b1,0x64d5d5b1
+.long 0xd24e4e9c,0xd24e4e9c
+.long 0xe0a9a949,0xe0a9a949
+.long 0xb46c6cd8,0xb46c6cd8
+.long 0xfa5656ac,0xfa5656ac
+.long 0x07f4f4f3,0x07f4f4f3
+.long 0x25eaeacf,0x25eaeacf
+.long 0xaf6565ca,0xaf6565ca
+.long 0x8e7a7af4,0x8e7a7af4
+.long 0xe9aeae47,0xe9aeae47
+.long 0x18080810,0x18080810
+.long 0xd5baba6f,0xd5baba6f
+.long 0x887878f0,0x887878f0
+.long 0x6f25254a,0x6f25254a
+.long 0x722e2e5c,0x722e2e5c
+.long 0x241c1c38,0x241c1c38
+.long 0xf1a6a657,0xf1a6a657
+.long 0xc7b4b473,0xc7b4b473
+.long 0x51c6c697,0x51c6c697
+.long 0x23e8e8cb,0x23e8e8cb
+.long 0x7cdddda1,0x7cdddda1
+.long 0x9c7474e8,0x9c7474e8
+.long 0x211f1f3e,0x211f1f3e
+.long 0xdd4b4b96,0xdd4b4b96
+.long 0xdcbdbd61,0xdcbdbd61
+.long 0x868b8b0d,0x868b8b0d
+.long 0x858a8a0f,0x858a8a0f
+.long 0x907070e0,0x907070e0
+.long 0x423e3e7c,0x423e3e7c
+.long 0xc4b5b571,0xc4b5b571
+.long 0xaa6666cc,0xaa6666cc
+.long 0xd8484890,0xd8484890
+.long 0x05030306,0x05030306
+.long 0x01f6f6f7,0x01f6f6f7
+.long 0x120e0e1c,0x120e0e1c
+.long 0xa36161c2,0xa36161c2
+.long 0x5f35356a,0x5f35356a
+.long 0xf95757ae,0xf95757ae
+.long 0xd0b9b969,0xd0b9b969
+.long 0x91868617,0x91868617
+.long 0x58c1c199,0x58c1c199
+.long 0x271d1d3a,0x271d1d3a
+.long 0xb99e9e27,0xb99e9e27
+.long 0x38e1e1d9,0x38e1e1d9
+.long 0x13f8f8eb,0x13f8f8eb
+.long 0xb398982b,0xb398982b
+.long 0x33111122,0x33111122
+.long 0xbb6969d2,0xbb6969d2
+.long 0x70d9d9a9,0x70d9d9a9
+.long 0x898e8e07,0x898e8e07
+.long 0xa7949433,0xa7949433
+.long 0xb69b9b2d,0xb69b9b2d
+.long 0x221e1e3c,0x221e1e3c
+.long 0x92878715,0x92878715
+.long 0x20e9e9c9,0x20e9e9c9
+.long 0x49cece87,0x49cece87
+.long 0xff5555aa,0xff5555aa
+.long 0x78282850,0x78282850
+.long 0x7adfdfa5,0x7adfdfa5
+.long 0x8f8c8c03,0x8f8c8c03
+.long 0xf8a1a159,0xf8a1a159
+.long 0x80898909,0x80898909
+.long 0x170d0d1a,0x170d0d1a
+.long 0xdabfbf65,0xdabfbf65
+.long 0x31e6e6d7,0x31e6e6d7
+.long 0xc6424284,0xc6424284
+.long 0xb86868d0,0xb86868d0
+.long 0xc3414182,0xc3414182
+.long 0xb0999929,0xb0999929
+.long 0x772d2d5a,0x772d2d5a
+.long 0x110f0f1e,0x110f0f1e
+.long 0xcbb0b07b,0xcbb0b07b
+.long 0xfc5454a8,0xfc5454a8
+.long 0xd6bbbb6d,0xd6bbbb6d
+.long 0x3a16162c,0x3a16162c
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.long 0x00000001, 0x00000002, 0x00000004, 0x00000008
+.long 0x00000010, 0x00000020, 0x00000040, 0x00000080
+.long 0x0000001b, 0x00000036, 0x80808080, 0x80808080
+.long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b
+.p2align 6
+L$AES_Td:
+.long 0x50a7f451,0x50a7f451
+.long 0x5365417e,0x5365417e
+.long 0xc3a4171a,0xc3a4171a
+.long 0x965e273a,0x965e273a
+.long 0xcb6bab3b,0xcb6bab3b
+.long 0xf1459d1f,0xf1459d1f
+.long 0xab58faac,0xab58faac
+.long 0x9303e34b,0x9303e34b
+.long 0x55fa3020,0x55fa3020
+.long 0xf66d76ad,0xf66d76ad
+.long 0x9176cc88,0x9176cc88
+.long 0x254c02f5,0x254c02f5
+.long 0xfcd7e54f,0xfcd7e54f
+.long 0xd7cb2ac5,0xd7cb2ac5
+.long 0x80443526,0x80443526
+.long 0x8fa362b5,0x8fa362b5
+.long 0x495ab1de,0x495ab1de
+.long 0x671bba25,0x671bba25
+.long 0x980eea45,0x980eea45
+.long 0xe1c0fe5d,0xe1c0fe5d
+.long 0x02752fc3,0x02752fc3
+.long 0x12f04c81,0x12f04c81
+.long 0xa397468d,0xa397468d
+.long 0xc6f9d36b,0xc6f9d36b
+.long 0xe75f8f03,0xe75f8f03
+.long 0x959c9215,0x959c9215
+.long 0xeb7a6dbf,0xeb7a6dbf
+.long 0xda595295,0xda595295
+.long 0x2d83bed4,0x2d83bed4
+.long 0xd3217458,0xd3217458
+.long 0x2969e049,0x2969e049
+.long 0x44c8c98e,0x44c8c98e
+.long 0x6a89c275,0x6a89c275
+.long 0x78798ef4,0x78798ef4
+.long 0x6b3e5899,0x6b3e5899
+.long 0xdd71b927,0xdd71b927
+.long 0xb64fe1be,0xb64fe1be
+.long 0x17ad88f0,0x17ad88f0
+.long 0x66ac20c9,0x66ac20c9
+.long 0xb43ace7d,0xb43ace7d
+.long 0x184adf63,0x184adf63
+.long 0x82311ae5,0x82311ae5
+.long 0x60335197,0x60335197
+.long 0x457f5362,0x457f5362
+.long 0xe07764b1,0xe07764b1
+.long 0x84ae6bbb,0x84ae6bbb
+.long 0x1ca081fe,0x1ca081fe
+.long 0x942b08f9,0x942b08f9
+.long 0x58684870,0x58684870
+.long 0x19fd458f,0x19fd458f
+.long 0x876cde94,0x876cde94
+.long 0xb7f87b52,0xb7f87b52
+.long 0x23d373ab,0x23d373ab
+.long 0xe2024b72,0xe2024b72
+.long 0x578f1fe3,0x578f1fe3
+.long 0x2aab5566,0x2aab5566
+.long 0x0728ebb2,0x0728ebb2
+.long 0x03c2b52f,0x03c2b52f
+.long 0x9a7bc586,0x9a7bc586
+.long 0xa50837d3,0xa50837d3
+.long 0xf2872830,0xf2872830
+.long 0xb2a5bf23,0xb2a5bf23
+.long 0xba6a0302,0xba6a0302
+.long 0x5c8216ed,0x5c8216ed
+.long 0x2b1ccf8a,0x2b1ccf8a
+.long 0x92b479a7,0x92b479a7
+.long 0xf0f207f3,0xf0f207f3
+.long 0xa1e2694e,0xa1e2694e
+.long 0xcdf4da65,0xcdf4da65
+.long 0xd5be0506,0xd5be0506
+.long 0x1f6234d1,0x1f6234d1
+.long 0x8afea6c4,0x8afea6c4
+.long 0x9d532e34,0x9d532e34
+.long 0xa055f3a2,0xa055f3a2
+.long 0x32e18a05,0x32e18a05
+.long 0x75ebf6a4,0x75ebf6a4
+.long 0x39ec830b,0x39ec830b
+.long 0xaaef6040,0xaaef6040
+.long 0x069f715e,0x069f715e
+.long 0x51106ebd,0x51106ebd
+.long 0xf98a213e,0xf98a213e
+.long 0x3d06dd96,0x3d06dd96
+.long 0xae053edd,0xae053edd
+.long 0x46bde64d,0x46bde64d
+.long 0xb58d5491,0xb58d5491
+.long 0x055dc471,0x055dc471
+.long 0x6fd40604,0x6fd40604
+.long 0xff155060,0xff155060
+.long 0x24fb9819,0x24fb9819
+.long 0x97e9bdd6,0x97e9bdd6
+.long 0xcc434089,0xcc434089
+.long 0x779ed967,0x779ed967
+.long 0xbd42e8b0,0xbd42e8b0
+.long 0x888b8907,0x888b8907
+.long 0x385b19e7,0x385b19e7
+.long 0xdbeec879,0xdbeec879
+.long 0x470a7ca1,0x470a7ca1
+.long 0xe90f427c,0xe90f427c
+.long 0xc91e84f8,0xc91e84f8
+.long 0x00000000,0x00000000
+.long 0x83868009,0x83868009
+.long 0x48ed2b32,0x48ed2b32
+.long 0xac70111e,0xac70111e
+.long 0x4e725a6c,0x4e725a6c
+.long 0xfbff0efd,0xfbff0efd
+.long 0x5638850f,0x5638850f
+.long 0x1ed5ae3d,0x1ed5ae3d
+.long 0x27392d36,0x27392d36
+.long 0x64d90f0a,0x64d90f0a
+.long 0x21a65c68,0x21a65c68
+.long 0xd1545b9b,0xd1545b9b
+.long 0x3a2e3624,0x3a2e3624
+.long 0xb1670a0c,0xb1670a0c
+.long 0x0fe75793,0x0fe75793
+.long 0xd296eeb4,0xd296eeb4
+.long 0x9e919b1b,0x9e919b1b
+.long 0x4fc5c080,0x4fc5c080
+.long 0xa220dc61,0xa220dc61
+.long 0x694b775a,0x694b775a
+.long 0x161a121c,0x161a121c
+.long 0x0aba93e2,0x0aba93e2
+.long 0xe52aa0c0,0xe52aa0c0
+.long 0x43e0223c,0x43e0223c
+.long 0x1d171b12,0x1d171b12
+.long 0x0b0d090e,0x0b0d090e
+.long 0xadc78bf2,0xadc78bf2
+.long 0xb9a8b62d,0xb9a8b62d
+.long 0xc8a91e14,0xc8a91e14
+.long 0x8519f157,0x8519f157
+.long 0x4c0775af,0x4c0775af
+.long 0xbbdd99ee,0xbbdd99ee
+.long 0xfd607fa3,0xfd607fa3
+.long 0x9f2601f7,0x9f2601f7
+.long 0xbcf5725c,0xbcf5725c
+.long 0xc53b6644,0xc53b6644
+.long 0x347efb5b,0x347efb5b
+.long 0x7629438b,0x7629438b
+.long 0xdcc623cb,0xdcc623cb
+.long 0x68fcedb6,0x68fcedb6
+.long 0x63f1e4b8,0x63f1e4b8
+.long 0xcadc31d7,0xcadc31d7
+.long 0x10856342,0x10856342
+.long 0x40229713,0x40229713
+.long 0x2011c684,0x2011c684
+.long 0x7d244a85,0x7d244a85
+.long 0xf83dbbd2,0xf83dbbd2
+.long 0x1132f9ae,0x1132f9ae
+.long 0x6da129c7,0x6da129c7
+.long 0x4b2f9e1d,0x4b2f9e1d
+.long 0xf330b2dc,0xf330b2dc
+.long 0xec52860d,0xec52860d
+.long 0xd0e3c177,0xd0e3c177
+.long 0x6c16b32b,0x6c16b32b
+.long 0x99b970a9,0x99b970a9
+.long 0xfa489411,0xfa489411
+.long 0x2264e947,0x2264e947
+.long 0xc48cfca8,0xc48cfca8
+.long 0x1a3ff0a0,0x1a3ff0a0
+.long 0xd82c7d56,0xd82c7d56
+.long 0xef903322,0xef903322
+.long 0xc74e4987,0xc74e4987
+.long 0xc1d138d9,0xc1d138d9
+.long 0xfea2ca8c,0xfea2ca8c
+.long 0x360bd498,0x360bd498
+.long 0xcf81f5a6,0xcf81f5a6
+.long 0x28de7aa5,0x28de7aa5
+.long 0x268eb7da,0x268eb7da
+.long 0xa4bfad3f,0xa4bfad3f
+.long 0xe49d3a2c,0xe49d3a2c
+.long 0x0d927850,0x0d927850
+.long 0x9bcc5f6a,0x9bcc5f6a
+.long 0x62467e54,0x62467e54
+.long 0xc2138df6,0xc2138df6
+.long 0xe8b8d890,0xe8b8d890
+.long 0x5ef7392e,0x5ef7392e
+.long 0xf5afc382,0xf5afc382
+.long 0xbe805d9f,0xbe805d9f
+.long 0x7c93d069,0x7c93d069
+.long 0xa92dd56f,0xa92dd56f
+.long 0xb31225cf,0xb31225cf
+.long 0x3b99acc8,0x3b99acc8
+.long 0xa77d1810,0xa77d1810
+.long 0x6e639ce8,0x6e639ce8
+.long 0x7bbb3bdb,0x7bbb3bdb
+.long 0x097826cd,0x097826cd
+.long 0xf418596e,0xf418596e
+.long 0x01b79aec,0x01b79aec
+.long 0xa89a4f83,0xa89a4f83
+.long 0x656e95e6,0x656e95e6
+.long 0x7ee6ffaa,0x7ee6ffaa
+.long 0x08cfbc21,0x08cfbc21
+.long 0xe6e815ef,0xe6e815ef
+.long 0xd99be7ba,0xd99be7ba
+.long 0xce366f4a,0xce366f4a
+.long 0xd4099fea,0xd4099fea
+.long 0xd67cb029,0xd67cb029
+.long 0xafb2a431,0xafb2a431
+.long 0x31233f2a,0x31233f2a
+.long 0x3094a5c6,0x3094a5c6
+.long 0xc066a235,0xc066a235
+.long 0x37bc4e74,0x37bc4e74
+.long 0xa6ca82fc,0xa6ca82fc
+.long 0xb0d090e0,0xb0d090e0
+.long 0x15d8a733,0x15d8a733
+.long 0x4a9804f1,0x4a9804f1
+.long 0xf7daec41,0xf7daec41
+.long 0x0e50cd7f,0x0e50cd7f
+.long 0x2ff69117,0x2ff69117
+.long 0x8dd64d76,0x8dd64d76
+.long 0x4db0ef43,0x4db0ef43
+.long 0x544daacc,0x544daacc
+.long 0xdf0496e4,0xdf0496e4
+.long 0xe3b5d19e,0xe3b5d19e
+.long 0x1b886a4c,0x1b886a4c
+.long 0xb81f2cc1,0xb81f2cc1
+.long 0x7f516546,0x7f516546
+.long 0x04ea5e9d,0x04ea5e9d
+.long 0x5d358c01,0x5d358c01
+.long 0x737487fa,0x737487fa
+.long 0x2e410bfb,0x2e410bfb
+.long 0x5a1d67b3,0x5a1d67b3
+.long 0x52d2db92,0x52d2db92
+.long 0x335610e9,0x335610e9
+.long 0x1347d66d,0x1347d66d
+.long 0x8c61d79a,0x8c61d79a
+.long 0x7a0ca137,0x7a0ca137
+.long 0x8e14f859,0x8e14f859
+.long 0x893c13eb,0x893c13eb
+.long 0xee27a9ce,0xee27a9ce
+.long 0x35c961b7,0x35c961b7
+.long 0xede51ce1,0xede51ce1
+.long 0x3cb1477a,0x3cb1477a
+.long 0x59dfd29c,0x59dfd29c
+.long 0x3f73f255,0x3f73f255
+.long 0x79ce1418,0x79ce1418
+.long 0xbf37c773,0xbf37c773
+.long 0xeacdf753,0xeacdf753
+.long 0x5baafd5f,0x5baafd5f
+.long 0x146f3ddf,0x146f3ddf
+.long 0x86db4478,0x86db4478
+.long 0x81f3afca,0x81f3afca
+.long 0x3ec468b9,0x3ec468b9
+.long 0x2c342438,0x2c342438
+.long 0x5f40a3c2,0x5f40a3c2
+.long 0x72c31d16,0x72c31d16
+.long 0x0c25e2bc,0x0c25e2bc
+.long 0x8b493c28,0x8b493c28
+.long 0x41950dff,0x41950dff
+.long 0x7101a839,0x7101a839
+.long 0xdeb30c08,0xdeb30c08
+.long 0x9ce4b4d8,0x9ce4b4d8
+.long 0x90c15664,0x90c15664
+.long 0x6184cb7b,0x6184cb7b
+.long 0x70b632d5,0x70b632d5
+.long 0x745c6c48,0x745c6c48
+.long 0x4257b8d0,0x4257b8d0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align 6
diff --git a/ext/libressl/crypto/aes/aes-masm-x86_64.S b/ext/libressl/crypto/aes/aes-masm-x86_64.S
new file mode 100644
index 0000000..9094c72
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes-masm-x86_64.S
@@ -0,0 +1,2948 @@
+; 1 "crypto/aes/aes-masm-x86_64.S.tmp"
+; 1 "<built-in>" 1
+; 1 "<built-in>" 3
+; 340 "<built-in>" 3
+; 1 "<command line>" 1
+; 1 "<built-in>" 2
+; 1 "crypto/aes/aes-masm-x86_64.S.tmp" 2
+OPTION DOTNAME
+
+; 1 "./crypto/x86_arch.h" 1
+
+
+; 16 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+; 40 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+; 3 "crypto/aes/aes-masm-x86_64.S.tmp" 2
+
+.text$ SEGMENT ALIGN(64) 'CODE'
+
+ALIGN 16
+_x86_64_AES_encrypt PROC PRIVATE
+ xor eax,DWORD PTR[r15]
+ xor ebx,DWORD PTR[4+r15]
+ xor ecx,DWORD PTR[8+r15]
+ xor edx,DWORD PTR[12+r15]
+
+ mov r13d,DWORD PTR[240+r15]
+ sub r13d,1
+ jmp $L$enc_loop
+ALIGN 16
+$L$enc_loop::
+
+ movzx esi,al
+ movzx edi,bl
+ movzx ebp,cl
+ mov r10d,DWORD PTR[rsi*8+r14]
+ mov r11d,DWORD PTR[rdi*8+r14]
+ mov r12d,DWORD PTR[rbp*8+r14]
+
+ movzx esi,bh
+ movzx edi,ch
+ movzx ebp,dl
+ xor r10d,DWORD PTR[3+rsi*8+r14]
+ xor r11d,DWORD PTR[3+rdi*8+r14]
+ mov r8d,DWORD PTR[rbp*8+r14]
+
+ movzx esi,dh
+ shr ecx,16
+ movzx ebp,ah
+ xor r12d,DWORD PTR[3+rsi*8+r14]
+ shr edx,16
+ xor r8d,DWORD PTR[3+rbp*8+r14]
+
+ shr ebx,16
+ lea r15,QWORD PTR[16+r15]
+ shr eax,16
+
+ movzx esi,cl
+ movzx edi,dl
+ movzx ebp,al
+ xor r10d,DWORD PTR[2+rsi*8+r14]
+ xor r11d,DWORD PTR[2+rdi*8+r14]
+ xor r12d,DWORD PTR[2+rbp*8+r14]
+
+ movzx esi,dh
+ movzx edi,ah
+ movzx ebp,bl
+ xor r10d,DWORD PTR[1+rsi*8+r14]
+ xor r11d,DWORD PTR[1+rdi*8+r14]
+ xor r8d,DWORD PTR[2+rbp*8+r14]
+
+ mov edx,DWORD PTR[12+r15]
+ movzx edi,bh
+ movzx ebp,ch
+ mov eax,DWORD PTR[r15]
+ xor r12d,DWORD PTR[1+rdi*8+r14]
+ xor r8d,DWORD PTR[1+rbp*8+r14]
+
+ mov ebx,DWORD PTR[4+r15]
+ mov ecx,DWORD PTR[8+r15]
+ xor eax,r10d
+ xor ebx,r11d
+ xor ecx,r12d
+ xor edx,r8d
+ sub r13d,1
+ jnz $L$enc_loop
+ movzx esi,al
+ movzx edi,bl
+ movzx ebp,cl
+ movzx r10d,BYTE PTR[2+rsi*8+r14]
+ movzx r11d,BYTE PTR[2+rdi*8+r14]
+ movzx r12d,BYTE PTR[2+rbp*8+r14]
+
+ movzx esi,dl
+ movzx edi,bh
+ movzx ebp,ch
+ movzx r8d,BYTE PTR[2+rsi*8+r14]
+ mov edi,DWORD PTR[rdi*8+r14]
+ mov ebp,DWORD PTR[rbp*8+r14]
+
+ and edi,00000ff00h
+ and ebp,00000ff00h
+
+ xor r10d,edi
+ xor r11d,ebp
+ shr ecx,16
+
+ movzx esi,dh
+ movzx edi,ah
+ shr edx,16
+ mov esi,DWORD PTR[rsi*8+r14]
+ mov edi,DWORD PTR[rdi*8+r14]
+
+ and esi,00000ff00h
+ and edi,00000ff00h
+ shr ebx,16
+ xor r12d,esi
+ xor r8d,edi
+ shr eax,16
+
+ movzx esi,cl
+ movzx edi,dl
+ movzx ebp,al
+ mov esi,DWORD PTR[rsi*8+r14]
+ mov edi,DWORD PTR[rdi*8+r14]
+ mov ebp,DWORD PTR[rbp*8+r14]
+
+ and esi,000ff0000h
+ and edi,000ff0000h
+ and ebp,000ff0000h
+
+ xor r10d,esi
+ xor r11d,edi
+ xor r12d,ebp
+
+ movzx esi,bl
+ movzx edi,dh
+ movzx ebp,ah
+ mov esi,DWORD PTR[rsi*8+r14]
+ mov edi,DWORD PTR[2+rdi*8+r14]
+ mov ebp,DWORD PTR[2+rbp*8+r14]
+
+ and esi,000ff0000h
+ and edi,0ff000000h
+ and ebp,0ff000000h
+
+ xor r8d,esi
+ xor r10d,edi
+ xor r11d,ebp
+
+ movzx esi,bh
+ movzx edi,ch
+ mov edx,DWORD PTR[((16+12))+r15]
+ mov esi,DWORD PTR[2+rsi*8+r14]
+ mov edi,DWORD PTR[2+rdi*8+r14]
+ mov eax,DWORD PTR[((16+0))+r15]
+
+ and esi,0ff000000h
+ and edi,0ff000000h
+
+ xor r12d,esi
+ xor r8d,edi
+
+ mov ebx,DWORD PTR[((16+4))+r15]
+ mov ecx,DWORD PTR[((16+8))+r15]
+ xor eax,r10d
+ xor ebx,r11d
+ xor ecx,r12d
+ xor edx,r8d
+ DB 0F3h,0C3h ;repret
+_x86_64_AES_encrypt ENDP
+
+ALIGN 16
+_x86_64_AES_encrypt_compact PROC PRIVATE
+ lea r8,QWORD PTR[128+r14]
+ mov edi,DWORD PTR[((0-128))+r8]
+ mov ebp,DWORD PTR[((32-128))+r8]
+ mov r10d,DWORD PTR[((64-128))+r8]
+ mov r11d,DWORD PTR[((96-128))+r8]
+ mov edi,DWORD PTR[((128-128))+r8]
+ mov ebp,DWORD PTR[((160-128))+r8]
+ mov r10d,DWORD PTR[((192-128))+r8]
+ mov r11d,DWORD PTR[((224-128))+r8]
+ jmp $L$enc_loop_compact
+ALIGN 16
+$L$enc_loop_compact::
+ xor eax,DWORD PTR[r15]
+ xor ebx,DWORD PTR[4+r15]
+ xor ecx,DWORD PTR[8+r15]
+ xor edx,DWORD PTR[12+r15]
+ lea r15,QWORD PTR[16+r15]
+ movzx r10d,al
+ movzx r11d,bl
+ movzx r12d,cl
+ movzx r10d,BYTE PTR[r10*1+r14]
+ movzx r11d,BYTE PTR[r11*1+r14]
+ movzx r12d,BYTE PTR[r12*1+r14]
+
+ movzx r8d,dl
+ movzx esi,bh
+ movzx edi,ch
+ movzx r8d,BYTE PTR[r8*1+r14]
+ movzx r9d,BYTE PTR[rsi*1+r14]
+ movzx r13d,BYTE PTR[rdi*1+r14]
+
+ movzx ebp,dh
+ movzx esi,ah
+ shr ecx,16
+ movzx ebp,BYTE PTR[rbp*1+r14]
+ movzx esi,BYTE PTR[rsi*1+r14]
+ shr edx,16
+
+ movzx edi,cl
+ shl r9d,8
+ shl r13d,8
+ movzx edi,BYTE PTR[rdi*1+r14]
+ xor r10d,r9d
+ xor r11d,r13d
+
+ movzx r9d,dl
+ shr eax,16
+ shr ebx,16
+ movzx r13d,al
+ shl ebp,8
+ shl esi,8
+ movzx r9d,BYTE PTR[r9*1+r14]
+ movzx r13d,BYTE PTR[r13*1+r14]
+ xor r12d,ebp
+ xor r8d,esi
+
+ movzx ebp,bl
+ movzx esi,dh
+ shl edi,16
+ movzx ebp,BYTE PTR[rbp*1+r14]
+ movzx esi,BYTE PTR[rsi*1+r14]
+ xor r10d,edi
+
+ movzx edi,ah
+ shr ecx,8
+ shr ebx,8
+ movzx edi,BYTE PTR[rdi*1+r14]
+ movzx edx,BYTE PTR[rcx*1+r14]
+ movzx ecx,BYTE PTR[rbx*1+r14]
+ shl r9d,16
+ shl r13d,16
+ shl ebp,16
+ xor r11d,r9d
+ xor r12d,r13d
+ xor r8d,ebp
+
+ shl esi,24
+ shl edi,24
+ shl edx,24
+ xor r10d,esi
+ shl ecx,24
+ xor r11d,edi
+ mov eax,r10d
+ mov ebx,r11d
+ xor ecx,r12d
+ xor edx,r8d
+ cmp r15,QWORD PTR[16+rsp]
+ je $L$enc_compact_done
+ mov esi,eax
+ mov edi,ebx
+ and esi,080808080h
+ and edi,080808080h
+ mov r10d,esi
+ mov r11d,edi
+ shr r10d,7
+ lea r8d,DWORD PTR[rax*1+rax]
+ shr r11d,7
+ lea r9d,DWORD PTR[rbx*1+rbx]
+ sub esi,r10d
+ sub edi,r11d
+ and r8d,0fefefefeh
+ and r9d,0fefefefeh
+ and esi,01b1b1b1bh
+ and edi,01b1b1b1bh
+ mov r10d,eax
+ mov r11d,ebx
+ xor r8d,esi
+ xor r9d,edi
+
+ xor eax,r8d
+ xor ebx,r9d
+ mov esi,ecx
+ mov edi,edx
+ rol eax,24
+ rol ebx,24
+ and esi,080808080h
+ and edi,080808080h
+ xor eax,r8d
+ xor ebx,r9d
+ mov r12d,esi
+ mov ebp,edi
+ ror r10d,16
+ ror r11d,16
+ shr r12d,7
+ lea r8d,DWORD PTR[rcx*1+rcx]
+ xor eax,r10d
+ xor ebx,r11d
+ shr ebp,7
+ lea r9d,DWORD PTR[rdx*1+rdx]
+ ror r10d,8
+ ror r11d,8
+ sub esi,r12d
+ sub edi,ebp
+ xor eax,r10d
+ xor ebx,r11d
+
+ and r8d,0fefefefeh
+ and r9d,0fefefefeh
+ and esi,01b1b1b1bh
+ and edi,01b1b1b1bh
+ mov r12d,ecx
+ mov ebp,edx
+ xor r8d,esi
+ xor r9d,edi
+
+ xor ecx,r8d
+ xor edx,r9d
+ rol ecx,24
+ rol edx,24
+ xor ecx,r8d
+ xor edx,r9d
+ mov esi,DWORD PTR[r14]
+ ror r12d,16
+ ror ebp,16
+ mov edi,DWORD PTR[64+r14]
+ xor ecx,r12d
+ xor edx,ebp
+ mov r8d,DWORD PTR[128+r14]
+ ror r12d,8
+ ror ebp,8
+ mov r9d,DWORD PTR[192+r14]
+ xor ecx,r12d
+ xor edx,ebp
+ jmp $L$enc_loop_compact
+ALIGN 16
+$L$enc_compact_done::
+ xor eax,DWORD PTR[r15]
+ xor ebx,DWORD PTR[4+r15]
+ xor ecx,DWORD PTR[8+r15]
+ xor edx,DWORD PTR[12+r15]
+ DB 0F3h,0C3h ;repret
+_x86_64_AES_encrypt_compact ENDP
+PUBLIC AES_encrypt
+
+ALIGN 16
+PUBLIC asm_AES_encrypt
+
+asm_AES_encrypt::
+AES_encrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_AES_encrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+
+
+ mov r10,rsp
+ lea rcx,QWORD PTR[((-63))+rdx]
+ and rsp,-64
+ sub rcx,rsp
+ neg rcx
+ and rcx,03c0h
+ sub rsp,rcx
+ sub rsp,32
+
+ mov QWORD PTR[16+rsp],rsi
+ mov QWORD PTR[24+rsp],r10
+$L$enc_prologue::
+
+ mov r15,rdx
+ mov r13d,DWORD PTR[240+r15]
+
+ mov eax,DWORD PTR[rdi]
+ mov ebx,DWORD PTR[4+rdi]
+ mov ecx,DWORD PTR[8+rdi]
+ mov edx,DWORD PTR[12+rdi]
+
+ shl r13d,4
+ lea rbp,QWORD PTR[r13*1+r15]
+ mov QWORD PTR[rsp],r15
+ mov QWORD PTR[8+rsp],rbp
+
+
+ lea r14,QWORD PTR[(($L$AES_Te+2048))]
+ lea rbp,QWORD PTR[768+rsp]
+ sub rbp,r14
+ and rbp,0300h
+ lea r14,QWORD PTR[rbp*1+r14]
+
+ call _x86_64_AES_encrypt_compact
+
+ mov r9,QWORD PTR[16+rsp]
+ mov rsi,QWORD PTR[24+rsp]
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+
+ mov r15,QWORD PTR[rsi]
+ mov r14,QWORD PTR[8+rsi]
+ mov r13,QWORD PTR[16+rsi]
+ mov r12,QWORD PTR[24+rsi]
+ mov rbp,QWORD PTR[32+rsi]
+ mov rbx,QWORD PTR[40+rsi]
+ lea rsp,QWORD PTR[48+rsi]
+$L$enc_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_AES_encrypt::
+AES_encrypt ENDP
+
+ALIGN 16
+_x86_64_AES_decrypt PROC PRIVATE
+ xor eax,DWORD PTR[r15]
+ xor ebx,DWORD PTR[4+r15]
+ xor ecx,DWORD PTR[8+r15]
+ xor edx,DWORD PTR[12+r15]
+
+ mov r13d,DWORD PTR[240+r15]
+ sub r13d,1
+ jmp $L$dec_loop
+ALIGN 16
+$L$dec_loop::
+
+ movzx esi,al
+ movzx edi,bl
+ movzx ebp,cl
+ mov r10d,DWORD PTR[rsi*8+r14]
+ mov r11d,DWORD PTR[rdi*8+r14]
+ mov r12d,DWORD PTR[rbp*8+r14]
+
+ movzx esi,dh
+ movzx edi,ah
+ movzx ebp,dl
+ xor r10d,DWORD PTR[3+rsi*8+r14]
+ xor r11d,DWORD PTR[3+rdi*8+r14]
+ mov r8d,DWORD PTR[rbp*8+r14]
+
+ movzx esi,bh
+ shr eax,16
+ movzx ebp,ch
+ xor r12d,DWORD PTR[3+rsi*8+r14]
+ shr edx,16
+ xor r8d,DWORD PTR[3+rbp*8+r14]
+
+ shr ebx,16
+ lea r15,QWORD PTR[16+r15]
+ shr ecx,16
+
+ movzx esi,cl
+ movzx edi,dl
+ movzx ebp,al
+ xor r10d,DWORD PTR[2+rsi*8+r14]
+ xor r11d,DWORD PTR[2+rdi*8+r14]
+ xor r12d,DWORD PTR[2+rbp*8+r14]
+
+ movzx esi,bh
+ movzx edi,ch
+ movzx ebp,bl
+ xor r10d,DWORD PTR[1+rsi*8+r14]
+ xor r11d,DWORD PTR[1+rdi*8+r14]
+ xor r8d,DWORD PTR[2+rbp*8+r14]
+
+ movzx esi,dh
+ mov edx,DWORD PTR[12+r15]
+ movzx ebp,ah
+ xor r12d,DWORD PTR[1+rsi*8+r14]
+ mov eax,DWORD PTR[r15]
+ xor r8d,DWORD PTR[1+rbp*8+r14]
+
+ xor eax,r10d
+ mov ebx,DWORD PTR[4+r15]
+ mov ecx,DWORD PTR[8+r15]
+ xor ecx,r12d
+ xor ebx,r11d
+ xor edx,r8d
+ sub r13d,1
+ jnz $L$dec_loop
+ lea r14,QWORD PTR[2048+r14]
+ movzx esi,al
+ movzx edi,bl
+ movzx ebp,cl
+ movzx r10d,BYTE PTR[rsi*1+r14]
+ movzx r11d,BYTE PTR[rdi*1+r14]
+ movzx r12d,BYTE PTR[rbp*1+r14]
+
+ movzx esi,dl
+ movzx edi,dh
+ movzx ebp,ah
+ movzx r8d,BYTE PTR[rsi*1+r14]
+ movzx edi,BYTE PTR[rdi*1+r14]
+ movzx ebp,BYTE PTR[rbp*1+r14]
+
+ shl edi,8
+ shl ebp,8
+
+ xor r10d,edi
+ xor r11d,ebp
+ shr edx,16
+
+ movzx esi,bh
+ movzx edi,ch
+ shr eax,16
+ movzx esi,BYTE PTR[rsi*1+r14]
+ movzx edi,BYTE PTR[rdi*1+r14]
+
+ shl esi,8
+ shl edi,8
+ shr ebx,16
+ xor r12d,esi
+ xor r8d,edi
+ shr ecx,16
+
+ movzx esi,cl
+ movzx edi,dl
+ movzx ebp,al
+ movzx esi,BYTE PTR[rsi*1+r14]
+ movzx edi,BYTE PTR[rdi*1+r14]
+ movzx ebp,BYTE PTR[rbp*1+r14]
+
+ shl esi,16
+ shl edi,16
+ shl ebp,16
+
+ xor r10d,esi
+ xor r11d,edi
+ xor r12d,ebp
+
+ movzx esi,bl
+ movzx edi,bh
+ movzx ebp,ch
+ movzx esi,BYTE PTR[rsi*1+r14]
+ movzx edi,BYTE PTR[rdi*1+r14]
+ movzx ebp,BYTE PTR[rbp*1+r14]
+
+ shl esi,16
+ shl edi,24
+ shl ebp,24
+
+ xor r8d,esi
+ xor r10d,edi
+ xor r11d,ebp
+
+ movzx esi,dh
+ movzx edi,ah
+ mov edx,DWORD PTR[((16+12))+r15]
+ movzx esi,BYTE PTR[rsi*1+r14]
+ movzx edi,BYTE PTR[rdi*1+r14]
+ mov eax,DWORD PTR[((16+0))+r15]
+
+ shl esi,24
+ shl edi,24
+
+ xor r12d,esi
+ xor r8d,edi
+
+ mov ebx,DWORD PTR[((16+4))+r15]
+ mov ecx,DWORD PTR[((16+8))+r15]
+ lea r14,QWORD PTR[((-2048))+r14]
+ xor eax,r10d
+ xor ebx,r11d
+ xor ecx,r12d
+ xor edx,r8d
+ DB 0F3h,0C3h ;repret
+_x86_64_AES_decrypt ENDP
+
+ALIGN 16
+_x86_64_AES_decrypt_compact PROC PRIVATE
+ lea r8,QWORD PTR[128+r14]
+ mov edi,DWORD PTR[((0-128))+r8]
+ mov ebp,DWORD PTR[((32-128))+r8]
+ mov r10d,DWORD PTR[((64-128))+r8]
+ mov r11d,DWORD PTR[((96-128))+r8]
+ mov edi,DWORD PTR[((128-128))+r8]
+ mov ebp,DWORD PTR[((160-128))+r8]
+ mov r10d,DWORD PTR[((192-128))+r8]
+ mov r11d,DWORD PTR[((224-128))+r8]
+ jmp $L$dec_loop_compact
+
+ALIGN 16
+$L$dec_loop_compact::
+ xor eax,DWORD PTR[r15]
+ xor ebx,DWORD PTR[4+r15]
+ xor ecx,DWORD PTR[8+r15]
+ xor edx,DWORD PTR[12+r15]
+ lea r15,QWORD PTR[16+r15]
+ movzx r10d,al
+ movzx r11d,bl
+ movzx r12d,cl
+ movzx r10d,BYTE PTR[r10*1+r14]
+ movzx r11d,BYTE PTR[r11*1+r14]
+ movzx r12d,BYTE PTR[r12*1+r14]
+
+ movzx r8d,dl
+ movzx esi,dh
+ movzx edi,ah
+ movzx r8d,BYTE PTR[r8*1+r14]
+ movzx r9d,BYTE PTR[rsi*1+r14]
+ movzx r13d,BYTE PTR[rdi*1+r14]
+
+ movzx ebp,bh
+ movzx esi,ch
+ shr ecx,16
+ movzx ebp,BYTE PTR[rbp*1+r14]
+ movzx esi,BYTE PTR[rsi*1+r14]
+ shr edx,16
+
+ movzx edi,cl
+ shl r9d,8
+ shl r13d,8
+ movzx edi,BYTE PTR[rdi*1+r14]
+ xor r10d,r9d
+ xor r11d,r13d
+
+ movzx r9d,dl
+ shr eax,16
+ shr ebx,16
+ movzx r13d,al
+ shl ebp,8
+ shl esi,8
+ movzx r9d,BYTE PTR[r9*1+r14]
+ movzx r13d,BYTE PTR[r13*1+r14]
+ xor r12d,ebp
+ xor r8d,esi
+
+ movzx ebp,bl
+ movzx esi,bh
+ shl edi,16
+ movzx ebp,BYTE PTR[rbp*1+r14]
+ movzx esi,BYTE PTR[rsi*1+r14]
+ xor r10d,edi
+
+ movzx edi,ch
+ shl r9d,16
+ shl r13d,16
+ movzx ebx,BYTE PTR[rdi*1+r14]
+ xor r11d,r9d
+ xor r12d,r13d
+
+ movzx edi,dh
+ shr eax,8
+ shl ebp,16
+ movzx ecx,BYTE PTR[rdi*1+r14]
+ movzx edx,BYTE PTR[rax*1+r14]
+ xor r8d,ebp
+
+ shl esi,24
+ shl ebx,24
+ shl ecx,24
+ xor r10d,esi
+ shl edx,24
+ xor ebx,r11d
+ mov eax,r10d
+ xor ecx,r12d
+ xor edx,r8d
+ cmp r15,QWORD PTR[16+rsp]
+ je $L$dec_compact_done
+
+ mov rsi,QWORD PTR[((256+0))+r14]
+ shl rbx,32
+ shl rdx,32
+ mov rdi,QWORD PTR[((256+8))+r14]
+ or rax,rbx
+ or rcx,rdx
+ mov rbp,QWORD PTR[((256+16))+r14]
+ mov rbx,rax
+ mov rdx,rcx
+ and rbx,rsi
+ and rdx,rsi
+ mov r9,rbx
+ mov r12,rdx
+ shr r9,7
+ lea r8,QWORD PTR[rax*1+rax]
+ shr r12,7
+ lea r11,QWORD PTR[rcx*1+rcx]
+ sub rbx,r9
+ sub rdx,r12
+ and r8,rdi
+ and r11,rdi
+ and rbx,rbp
+ and rdx,rbp
+ xor rbx,r8
+ xor rdx,r11
+ mov r8,rbx
+ mov r11,rdx
+
+ and rbx,rsi
+ and rdx,rsi
+ mov r10,rbx
+ mov r13,rdx
+ shr r10,7
+ lea r9,QWORD PTR[r8*1+r8]
+ shr r13,7
+ lea r12,QWORD PTR[r11*1+r11]
+ sub rbx,r10
+ sub rdx,r13
+ and r9,rdi
+ and r12,rdi
+ and rbx,rbp
+ and rdx,rbp
+ xor rbx,r9
+ xor rdx,r12
+ mov r9,rbx
+ mov r12,rdx
+
+ and rbx,rsi
+ and rdx,rsi
+ mov r10,rbx
+ mov r13,rdx
+ shr r10,7
+ xor r8,rax
+ shr r13,7
+ xor r11,rcx
+ sub rbx,r10
+ sub rdx,r13
+ lea r10,QWORD PTR[r9*1+r9]
+ lea r13,QWORD PTR[r12*1+r12]
+ xor r9,rax
+ xor r12,rcx
+ and r10,rdi
+ and r13,rdi
+ and rbx,rbp
+ and rdx,rbp
+ xor r10,rbx
+ xor r13,rdx
+
+ xor rax,r10
+ xor rcx,r13
+ xor r8,r10
+ xor r11,r13
+ mov rbx,rax
+ mov rdx,rcx
+ xor r9,r10
+ xor r12,r13
+ shr rbx,32
+ shr rdx,32
+ xor r10,r8
+ xor r13,r11
+ rol eax,8
+ rol ecx,8
+ xor r10,r9
+ xor r13,r12
+
+ rol ebx,8
+ rol edx,8
+ xor eax,r10d
+ xor ecx,r13d
+ shr r10,32
+ shr r13,32
+ xor ebx,r10d
+ xor edx,r13d
+
+ mov r10,r8
+ mov r13,r11
+ shr r10,32
+ shr r13,32
+ rol r8d,24
+ rol r11d,24
+ rol r10d,24
+ rol r13d,24
+ xor eax,r8d
+ xor ecx,r11d
+ mov r8,r9
+ mov r11,r12
+ xor ebx,r10d
+ xor edx,r13d
+
+ mov rsi,QWORD PTR[r14]
+ shr r8,32
+ shr r11,32
+ mov rdi,QWORD PTR[64+r14]
+ rol r9d,16
+ rol r12d,16
+ mov rbp,QWORD PTR[128+r14]
+ rol r8d,16
+ rol r11d,16
+ mov r10,QWORD PTR[192+r14]
+ xor eax,r9d
+ xor ecx,r12d
+ mov r13,QWORD PTR[256+r14]
+ xor ebx,r8d
+ xor edx,r11d
+ jmp $L$dec_loop_compact
+ALIGN 16
+$L$dec_compact_done::
+ xor eax,DWORD PTR[r15]
+ xor ebx,DWORD PTR[4+r15]
+ xor ecx,DWORD PTR[8+r15]
+ xor edx,DWORD PTR[12+r15]
+ DB 0F3h,0C3h ;repret
+_x86_64_AES_decrypt_compact ENDP
+PUBLIC AES_decrypt
+
+ALIGN 16
+PUBLIC asm_AES_decrypt
+
+asm_AES_decrypt::
+AES_decrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_AES_decrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+
+
+ mov r10,rsp
+ lea rcx,QWORD PTR[((-63))+rdx]
+ and rsp,-64
+ sub rcx,rsp
+ neg rcx
+ and rcx,03c0h
+ sub rsp,rcx
+ sub rsp,32
+
+ mov QWORD PTR[16+rsp],rsi
+ mov QWORD PTR[24+rsp],r10
+$L$dec_prologue::
+
+ mov r15,rdx
+ mov r13d,DWORD PTR[240+r15]
+
+ mov eax,DWORD PTR[rdi]
+ mov ebx,DWORD PTR[4+rdi]
+ mov ecx,DWORD PTR[8+rdi]
+ mov edx,DWORD PTR[12+rdi]
+
+ shl r13d,4
+ lea rbp,QWORD PTR[r13*1+r15]
+ mov QWORD PTR[rsp],r15
+ mov QWORD PTR[8+rsp],rbp
+
+
+ lea r14,QWORD PTR[(($L$AES_Td+2048))]
+ lea rbp,QWORD PTR[768+rsp]
+ sub rbp,r14
+ and rbp,0300h
+ lea r14,QWORD PTR[rbp*1+r14]
+ shr rbp,3
+ add r14,rbp
+
+ call _x86_64_AES_decrypt_compact
+
+ mov r9,QWORD PTR[16+rsp]
+ mov rsi,QWORD PTR[24+rsp]
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+
+ mov r15,QWORD PTR[rsi]
+ mov r14,QWORD PTR[8+rsi]
+ mov r13,QWORD PTR[16+rsi]
+ mov r12,QWORD PTR[24+rsi]
+ mov rbp,QWORD PTR[32+rsi]
+ mov rbx,QWORD PTR[40+rsi]
+ lea rsp,QWORD PTR[48+rsi]
+$L$dec_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_AES_decrypt::
+AES_decrypt ENDP
+PUBLIC AES_set_encrypt_key
+
+ALIGN 16
+AES_set_encrypt_key PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_AES_set_encrypt_key::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ sub rsp,8
+$L$enc_key_prologue::
+
+ call _x86_64_AES_set_encrypt_key
+
+ mov r15,QWORD PTR[8+rsp]
+ mov r14,QWORD PTR[16+rsp]
+ mov r13,QWORD PTR[24+rsp]
+ mov r12,QWORD PTR[32+rsp]
+ mov rbp,QWORD PTR[40+rsp]
+ mov rbx,QWORD PTR[48+rsp]
+ add rsp,56
+$L$enc_key_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_AES_set_encrypt_key::
+AES_set_encrypt_key ENDP
+
+
+ALIGN 16
+_x86_64_AES_set_encrypt_key PROC PRIVATE
+ mov ecx,esi
+ mov rsi,rdi
+ mov rdi,rdx
+
+ test rsi,-1
+ jz $L$badpointer
+ test rdi,-1
+ jz $L$badpointer
+
+ lea rbp,QWORD PTR[$L$AES_Te]
+ lea rbp,QWORD PTR[((2048+128))+rbp]
+
+
+ mov eax,DWORD PTR[((0-128))+rbp]
+ mov ebx,DWORD PTR[((32-128))+rbp]
+ mov r8d,DWORD PTR[((64-128))+rbp]
+ mov edx,DWORD PTR[((96-128))+rbp]
+ mov eax,DWORD PTR[((128-128))+rbp]
+ mov ebx,DWORD PTR[((160-128))+rbp]
+ mov r8d,DWORD PTR[((192-128))+rbp]
+ mov edx,DWORD PTR[((224-128))+rbp]
+
+ cmp ecx,128
+ je $L$10rounds
+ cmp ecx,192
+ je $L$12rounds
+ cmp ecx,256
+ je $L$14rounds
+ mov rax,-2
+ jmp $L$exit
+
+$L$10rounds::
+ mov rax,QWORD PTR[rsi]
+ mov rdx,QWORD PTR[8+rsi]
+ mov QWORD PTR[rdi],rax
+ mov QWORD PTR[8+rdi],rdx
+
+ shr rdx,32
+ xor ecx,ecx
+ jmp $L$10shortcut
+ALIGN 4
+$L$10loop::
+ mov eax,DWORD PTR[rdi]
+ mov edx,DWORD PTR[12+rdi]
+$L$10shortcut::
+ movzx esi,dl
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,24
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shr edx,16
+ movzx esi,dl
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,8
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shl ebx,16
+ xor eax,ebx
+
+ xor eax,DWORD PTR[((1024-128))+rcx*4+rbp]
+ mov DWORD PTR[16+rdi],eax
+ xor eax,DWORD PTR[4+rdi]
+ mov DWORD PTR[20+rdi],eax
+ xor eax,DWORD PTR[8+rdi]
+ mov DWORD PTR[24+rdi],eax
+ xor eax,DWORD PTR[12+rdi]
+ mov DWORD PTR[28+rdi],eax
+ add ecx,1
+ lea rdi,QWORD PTR[16+rdi]
+ cmp ecx,10
+ jl $L$10loop
+
+ mov DWORD PTR[80+rdi],10
+ xor rax,rax
+ jmp $L$exit
+
+$L$12rounds::
+ mov rax,QWORD PTR[rsi]
+ mov rbx,QWORD PTR[8+rsi]
+ mov rdx,QWORD PTR[16+rsi]
+ mov QWORD PTR[rdi],rax
+ mov QWORD PTR[8+rdi],rbx
+ mov QWORD PTR[16+rdi],rdx
+
+ shr rdx,32
+ xor ecx,ecx
+ jmp $L$12shortcut
+ALIGN 4
+$L$12loop::
+ mov eax,DWORD PTR[rdi]
+ mov edx,DWORD PTR[20+rdi]
+$L$12shortcut::
+ movzx esi,dl
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,24
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shr edx,16
+ movzx esi,dl
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,8
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shl ebx,16
+ xor eax,ebx
+
+ xor eax,DWORD PTR[((1024-128))+rcx*4+rbp]
+ mov DWORD PTR[24+rdi],eax
+ xor eax,DWORD PTR[4+rdi]
+ mov DWORD PTR[28+rdi],eax
+ xor eax,DWORD PTR[8+rdi]
+ mov DWORD PTR[32+rdi],eax
+ xor eax,DWORD PTR[12+rdi]
+ mov DWORD PTR[36+rdi],eax
+
+ cmp ecx,7
+ je $L$12break
+ add ecx,1
+
+ xor eax,DWORD PTR[16+rdi]
+ mov DWORD PTR[40+rdi],eax
+ xor eax,DWORD PTR[20+rdi]
+ mov DWORD PTR[44+rdi],eax
+
+ lea rdi,QWORD PTR[24+rdi]
+ jmp $L$12loop
+$L$12break::
+ mov DWORD PTR[72+rdi],12
+ xor rax,rax
+ jmp $L$exit
+
+$L$14rounds::
+ mov rax,QWORD PTR[rsi]
+ mov rbx,QWORD PTR[8+rsi]
+ mov rcx,QWORD PTR[16+rsi]
+ mov rdx,QWORD PTR[24+rsi]
+ mov QWORD PTR[rdi],rax
+ mov QWORD PTR[8+rdi],rbx
+ mov QWORD PTR[16+rdi],rcx
+ mov QWORD PTR[24+rdi],rdx
+
+ shr rdx,32
+ xor ecx,ecx
+ jmp $L$14shortcut
+ALIGN 4
+$L$14loop::
+ mov eax,DWORD PTR[rdi]
+ mov edx,DWORD PTR[28+rdi]
+$L$14shortcut::
+ movzx esi,dl
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,24
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shr edx,16
+ movzx esi,dl
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,8
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shl ebx,16
+ xor eax,ebx
+
+ xor eax,DWORD PTR[((1024-128))+rcx*4+rbp]
+ mov DWORD PTR[32+rdi],eax
+ xor eax,DWORD PTR[4+rdi]
+ mov DWORD PTR[36+rdi],eax
+ xor eax,DWORD PTR[8+rdi]
+ mov DWORD PTR[40+rdi],eax
+ xor eax,DWORD PTR[12+rdi]
+ mov DWORD PTR[44+rdi],eax
+
+ cmp ecx,6
+ je $L$14break
+ add ecx,1
+
+ mov edx,eax
+ mov eax,DWORD PTR[16+rdi]
+ movzx esi,dl
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shr edx,16
+ shl ebx,8
+ movzx esi,dl
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,16
+ xor eax,ebx
+
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shl ebx,24
+ xor eax,ebx
+
+ mov DWORD PTR[48+rdi],eax
+ xor eax,DWORD PTR[20+rdi]
+ mov DWORD PTR[52+rdi],eax
+ xor eax,DWORD PTR[24+rdi]
+ mov DWORD PTR[56+rdi],eax
+ xor eax,DWORD PTR[28+rdi]
+ mov DWORD PTR[60+rdi],eax
+
+ lea rdi,QWORD PTR[32+rdi]
+ jmp $L$14loop
+$L$14break::
+ mov DWORD PTR[48+rdi],14
+ xor rax,rax
+ jmp $L$exit
+
+$L$badpointer::
+ mov rax,-1
+$L$exit::
+ DB 0F3h,0C3h ;repret
+_x86_64_AES_set_encrypt_key ENDP
+PUBLIC AES_set_decrypt_key
+
+ALIGN 16
+AES_set_decrypt_key PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_AES_set_decrypt_key::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ push rdx
+$L$dec_key_prologue::
+
+ call _x86_64_AES_set_encrypt_key
+ mov r8,QWORD PTR[rsp]
+ cmp eax,0
+ jne $L$abort
+
+ mov r14d,DWORD PTR[240+r8]
+ xor rdi,rdi
+ lea rcx,QWORD PTR[r14*4+rdi]
+ mov rsi,r8
+ lea rdi,QWORD PTR[rcx*4+r8]
+ALIGN 4
+$L$invert::
+ mov rax,QWORD PTR[rsi]
+ mov rbx,QWORD PTR[8+rsi]
+ mov rcx,QWORD PTR[rdi]
+ mov rdx,QWORD PTR[8+rdi]
+ mov QWORD PTR[rdi],rax
+ mov QWORD PTR[8+rdi],rbx
+ mov QWORD PTR[rsi],rcx
+ mov QWORD PTR[8+rsi],rdx
+ lea rsi,QWORD PTR[16+rsi]
+ lea rdi,QWORD PTR[((-16))+rdi]
+ cmp rdi,rsi
+ jne $L$invert
+
+ lea rax,QWORD PTR[(($L$AES_Te+2048+1024))]
+
+ mov rsi,QWORD PTR[40+rax]
+ mov rdi,QWORD PTR[48+rax]
+ mov rbp,QWORD PTR[56+rax]
+
+ mov r15,r8
+ sub r14d,1
+ALIGN 4
+$L$permute::
+ lea r15,QWORD PTR[16+r15]
+ mov rax,QWORD PTR[r15]
+ mov rcx,QWORD PTR[8+r15]
+ mov rbx,rax
+ mov rdx,rcx
+ and rbx,rsi
+ and rdx,rsi
+ mov r9,rbx
+ mov r12,rdx
+ shr r9,7
+ lea r8,QWORD PTR[rax*1+rax]
+ shr r12,7
+ lea r11,QWORD PTR[rcx*1+rcx]
+ sub rbx,r9
+ sub rdx,r12
+ and r8,rdi
+ and r11,rdi
+ and rbx,rbp
+ and rdx,rbp
+ xor rbx,r8
+ xor rdx,r11
+ mov r8,rbx
+ mov r11,rdx
+
+ and rbx,rsi
+ and rdx,rsi
+ mov r10,rbx
+ mov r13,rdx
+ shr r10,7
+ lea r9,QWORD PTR[r8*1+r8]
+ shr r13,7
+ lea r12,QWORD PTR[r11*1+r11]
+ sub rbx,r10
+ sub rdx,r13
+ and r9,rdi
+ and r12,rdi
+ and rbx,rbp
+ and rdx,rbp
+ xor rbx,r9
+ xor rdx,r12
+ mov r9,rbx
+ mov r12,rdx
+
+ and rbx,rsi
+ and rdx,rsi
+ mov r10,rbx
+ mov r13,rdx
+ shr r10,7
+ xor r8,rax
+ shr r13,7
+ xor r11,rcx
+ sub rbx,r10
+ sub rdx,r13
+ lea r10,QWORD PTR[r9*1+r9]
+ lea r13,QWORD PTR[r12*1+r12]
+ xor r9,rax
+ xor r12,rcx
+ and r10,rdi
+ and r13,rdi
+ and rbx,rbp
+ and rdx,rbp
+ xor r10,rbx
+ xor r13,rdx
+
+ xor rax,r10
+ xor rcx,r13
+ xor r8,r10
+ xor r11,r13
+ mov rbx,rax
+ mov rdx,rcx
+ xor r9,r10
+ xor r12,r13
+ shr rbx,32
+ shr rdx,32
+ xor r10,r8
+ xor r13,r11
+ rol eax,8
+ rol ecx,8
+ xor r10,r9
+ xor r13,r12
+
+ rol ebx,8
+ rol edx,8
+ xor eax,r10d
+ xor ecx,r13d
+ shr r10,32
+ shr r13,32
+ xor ebx,r10d
+ xor edx,r13d
+
+ mov r10,r8
+ mov r13,r11
+ shr r10,32
+ shr r13,32
+ rol r8d,24
+ rol r11d,24
+ rol r10d,24
+ rol r13d,24
+ xor eax,r8d
+ xor ecx,r11d
+ mov r8,r9
+ mov r11,r12
+ xor ebx,r10d
+ xor edx,r13d
+
+
+ shr r8,32
+ shr r11,32
+
+ rol r9d,16
+ rol r12d,16
+
+ rol r8d,16
+ rol r11d,16
+
+ xor eax,r9d
+ xor ecx,r12d
+
+ xor ebx,r8d
+ xor edx,r11d
+ mov DWORD PTR[r15],eax
+ mov DWORD PTR[4+r15],ebx
+ mov DWORD PTR[8+r15],ecx
+ mov DWORD PTR[12+r15],edx
+ sub r14d,1
+ jnz $L$permute
+
+ xor rax,rax
+$L$abort::
+ mov r15,QWORD PTR[8+rsp]
+ mov r14,QWORD PTR[16+rsp]
+ mov r13,QWORD PTR[24+rsp]
+ mov r12,QWORD PTR[32+rsp]
+ mov rbp,QWORD PTR[40+rsp]
+ mov rbx,QWORD PTR[48+rsp]
+ add rsp,56
+$L$dec_key_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_AES_set_decrypt_key::
+AES_set_decrypt_key ENDP
+PUBLIC AES_cbc_encrypt
+
+ALIGN 16
+EXTERN OPENSSL_ia32cap_P:NEAR
+
+PUBLIC asm_AES_cbc_encrypt
+
+asm_AES_cbc_encrypt::
+AES_cbc_encrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_AES_cbc_encrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ cmp rdx,0
+ je $L$cbc_epilogue
+ pushfq
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+$L$cbc_prologue::
+
+ cld
+ mov r9d,r9d
+
+ lea r14,QWORD PTR[$L$AES_Te]
+ cmp r9,0
+ jne $L$cbc_picked_te
+ lea r14,QWORD PTR[$L$AES_Td]
+$L$cbc_picked_te::
+
+ mov r10d,DWORD PTR[OPENSSL_ia32cap_P]
+ cmp rdx,512
+ jb $L$cbc_slow_prologue
+ test rdx,15
+ jnz $L$cbc_slow_prologue
+ bt r10d,28
+ jc $L$cbc_slow_prologue
+
+
+ lea r15,QWORD PTR[((-88-248))+rsp]
+ and r15,-64
+
+
+ mov r10,r14
+ lea r11,QWORD PTR[2304+r14]
+ mov r12,r15
+ and r10,0FFFh
+ and r11,0FFFh
+ and r12,0FFFh
+
+ cmp r12,r11
+ jb $L$cbc_te_break_out
+ sub r12,r11
+ sub r15,r12
+ jmp $L$cbc_te_ok
+$L$cbc_te_break_out::
+ sub r12,r10
+ and r12,0FFFh
+ add r12,320
+ sub r15,r12
+ALIGN 4
+$L$cbc_te_ok::
+
+ xchg r15,rsp
+
+ mov QWORD PTR[16+rsp],r15
+$L$cbc_fast_body::
+ mov QWORD PTR[24+rsp],rdi
+ mov QWORD PTR[32+rsp],rsi
+ mov QWORD PTR[40+rsp],rdx
+ mov QWORD PTR[48+rsp],rcx
+ mov QWORD PTR[56+rsp],r8
+ mov DWORD PTR[((80+240))+rsp],0
+ mov rbp,r8
+ mov rbx,r9
+ mov r9,rsi
+ mov r8,rdi
+ mov r15,rcx
+
+ mov eax,DWORD PTR[240+r15]
+
+ mov r10,r15
+ sub r10,r14
+ and r10,0fffh
+ cmp r10,2304
+ jb $L$cbc_do_ecopy
+ cmp r10,4096-248
+ jb $L$cbc_skip_ecopy
+ALIGN 4
+$L$cbc_do_ecopy::
+ mov rsi,r15
+ lea rdi,QWORD PTR[80+rsp]
+ lea r15,QWORD PTR[80+rsp]
+ mov ecx,240/8
+ DD 090A548F3h
+ mov DWORD PTR[rdi],eax
+$L$cbc_skip_ecopy::
+ mov QWORD PTR[rsp],r15
+
+ mov ecx,18
+ALIGN 4
+$L$cbc_prefetch_te::
+ mov r10,QWORD PTR[r14]
+ mov r11,QWORD PTR[32+r14]
+ mov r12,QWORD PTR[64+r14]
+ mov r13,QWORD PTR[96+r14]
+ lea r14,QWORD PTR[128+r14]
+ sub ecx,1
+ jnz $L$cbc_prefetch_te
+ lea r14,QWORD PTR[((-2304))+r14]
+
+ cmp rbx,0
+ je $L$FAST_DECRYPT
+
+
+ mov eax,DWORD PTR[rbp]
+ mov ebx,DWORD PTR[4+rbp]
+ mov ecx,DWORD PTR[8+rbp]
+ mov edx,DWORD PTR[12+rbp]
+
+ALIGN 4
+$L$cbc_fast_enc_loop::
+ xor eax,DWORD PTR[r8]
+ xor ebx,DWORD PTR[4+r8]
+ xor ecx,DWORD PTR[8+r8]
+ xor edx,DWORD PTR[12+r8]
+ mov r15,QWORD PTR[rsp]
+ mov QWORD PTR[24+rsp],r8
+
+ call _x86_64_AES_encrypt
+
+ mov r8,QWORD PTR[24+rsp]
+ mov r10,QWORD PTR[40+rsp]
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+
+ lea r8,QWORD PTR[16+r8]
+ lea r9,QWORD PTR[16+r9]
+ sub r10,16
+ test r10,-16
+ mov QWORD PTR[40+rsp],r10
+ jnz $L$cbc_fast_enc_loop
+ mov rbp,QWORD PTR[56+rsp]
+ mov DWORD PTR[rbp],eax
+ mov DWORD PTR[4+rbp],ebx
+ mov DWORD PTR[8+rbp],ecx
+ mov DWORD PTR[12+rbp],edx
+
+ jmp $L$cbc_fast_cleanup
+
+
+ALIGN 16
+$L$FAST_DECRYPT::
+ cmp r9,r8
+ je $L$cbc_fast_dec_in_place
+
+ mov QWORD PTR[64+rsp],rbp
+ALIGN 4
+$L$cbc_fast_dec_loop::
+ mov eax,DWORD PTR[r8]
+ mov ebx,DWORD PTR[4+r8]
+ mov ecx,DWORD PTR[8+r8]
+ mov edx,DWORD PTR[12+r8]
+ mov r15,QWORD PTR[rsp]
+ mov QWORD PTR[24+rsp],r8
+
+ call _x86_64_AES_decrypt
+
+ mov rbp,QWORD PTR[64+rsp]
+ mov r8,QWORD PTR[24+rsp]
+ mov r10,QWORD PTR[40+rsp]
+ xor eax,DWORD PTR[rbp]
+ xor ebx,DWORD PTR[4+rbp]
+ xor ecx,DWORD PTR[8+rbp]
+ xor edx,DWORD PTR[12+rbp]
+ mov rbp,r8
+
+ sub r10,16
+ mov QWORD PTR[40+rsp],r10
+ mov QWORD PTR[64+rsp],rbp
+
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+
+ lea r8,QWORD PTR[16+r8]
+ lea r9,QWORD PTR[16+r9]
+ jnz $L$cbc_fast_dec_loop
+ mov r12,QWORD PTR[56+rsp]
+ mov r10,QWORD PTR[rbp]
+ mov r11,QWORD PTR[8+rbp]
+ mov QWORD PTR[r12],r10
+ mov QWORD PTR[8+r12],r11
+ jmp $L$cbc_fast_cleanup
+
+ALIGN 16
+$L$cbc_fast_dec_in_place::
+ mov r10,QWORD PTR[rbp]
+ mov r11,QWORD PTR[8+rbp]
+ mov QWORD PTR[((0+64))+rsp],r10
+ mov QWORD PTR[((8+64))+rsp],r11
+ALIGN 4
+$L$cbc_fast_dec_in_place_loop::
+ mov eax,DWORD PTR[r8]
+ mov ebx,DWORD PTR[4+r8]
+ mov ecx,DWORD PTR[8+r8]
+ mov edx,DWORD PTR[12+r8]
+ mov r15,QWORD PTR[rsp]
+ mov QWORD PTR[24+rsp],r8
+
+ call _x86_64_AES_decrypt
+
+ mov r8,QWORD PTR[24+rsp]
+ mov r10,QWORD PTR[40+rsp]
+ xor eax,DWORD PTR[((0+64))+rsp]
+ xor ebx,DWORD PTR[((4+64))+rsp]
+ xor ecx,DWORD PTR[((8+64))+rsp]
+ xor edx,DWORD PTR[((12+64))+rsp]
+
+ mov r11,QWORD PTR[r8]
+ mov r12,QWORD PTR[8+r8]
+ sub r10,16
+ jz $L$cbc_fast_dec_in_place_done
+
+ mov QWORD PTR[((0+64))+rsp],r11
+ mov QWORD PTR[((8+64))+rsp],r12
+
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+
+ lea r8,QWORD PTR[16+r8]
+ lea r9,QWORD PTR[16+r9]
+ mov QWORD PTR[40+rsp],r10
+ jmp $L$cbc_fast_dec_in_place_loop
+$L$cbc_fast_dec_in_place_done::
+ mov rdi,QWORD PTR[56+rsp]
+ mov QWORD PTR[rdi],r11
+ mov QWORD PTR[8+rdi],r12
+
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+
+ALIGN 4
+$L$cbc_fast_cleanup::
+ cmp DWORD PTR[((80+240))+rsp],0
+ lea rdi,QWORD PTR[80+rsp]
+ je $L$cbc_exit
+ mov ecx,240/8
+ xor rax,rax
+ DD 090AB48F3h
+
+ jmp $L$cbc_exit
+
+
+ALIGN 16
+$L$cbc_slow_prologue::
+
+ lea rbp,QWORD PTR[((-88))+rsp]
+ and rbp,-64
+
+ lea r10,QWORD PTR[((-88-63))+rcx]
+ sub r10,rbp
+ neg r10
+ and r10,03c0h
+ sub rbp,r10
+
+ xchg rbp,rsp
+
+ mov QWORD PTR[16+rsp],rbp
+$L$cbc_slow_body::
+
+
+
+
+ mov QWORD PTR[56+rsp],r8
+ mov rbp,r8
+ mov rbx,r9
+ mov r9,rsi
+ mov r8,rdi
+ mov r15,rcx
+ mov r10,rdx
+
+ mov eax,DWORD PTR[240+r15]
+ mov QWORD PTR[rsp],r15
+ shl eax,4
+ lea rax,QWORD PTR[rax*1+r15]
+ mov QWORD PTR[8+rsp],rax
+
+
+ lea r14,QWORD PTR[2048+r14]
+ lea rax,QWORD PTR[((768-8))+rsp]
+ sub rax,r14
+ and rax,0300h
+ lea r14,QWORD PTR[rax*1+r14]
+
+ cmp rbx,0
+ je $L$SLOW_DECRYPT
+
+
+ test r10,-16
+ mov eax,DWORD PTR[rbp]
+ mov ebx,DWORD PTR[4+rbp]
+ mov ecx,DWORD PTR[8+rbp]
+ mov edx,DWORD PTR[12+rbp]
+ jz $L$cbc_slow_enc_tail
+
+ALIGN 4
+$L$cbc_slow_enc_loop::
+ xor eax,DWORD PTR[r8]
+ xor ebx,DWORD PTR[4+r8]
+ xor ecx,DWORD PTR[8+r8]
+ xor edx,DWORD PTR[12+r8]
+ mov r15,QWORD PTR[rsp]
+ mov QWORD PTR[24+rsp],r8
+ mov QWORD PTR[32+rsp],r9
+ mov QWORD PTR[40+rsp],r10
+
+ call _x86_64_AES_encrypt_compact
+
+ mov r8,QWORD PTR[24+rsp]
+ mov r9,QWORD PTR[32+rsp]
+ mov r10,QWORD PTR[40+rsp]
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+
+ lea r8,QWORD PTR[16+r8]
+ lea r9,QWORD PTR[16+r9]
+ sub r10,16
+ test r10,-16
+ jnz $L$cbc_slow_enc_loop
+ test r10,15
+ jnz $L$cbc_slow_enc_tail
+ mov rbp,QWORD PTR[56+rsp]
+ mov DWORD PTR[rbp],eax
+ mov DWORD PTR[4+rbp],ebx
+ mov DWORD PTR[8+rbp],ecx
+ mov DWORD PTR[12+rbp],edx
+
+ jmp $L$cbc_exit
+
+ALIGN 4
+$L$cbc_slow_enc_tail::
+ mov r11,rax
+ mov r12,rcx
+ mov rcx,r10
+ mov rsi,r8
+ mov rdi,r9
+ DD 09066A4F3h
+ mov rcx,16
+ sub rcx,r10
+ xor rax,rax
+ DD 09066AAF3h
+ mov r8,r9
+ mov r10,16
+ mov rax,r11
+ mov rcx,r12
+ jmp $L$cbc_slow_enc_loop
+
+ALIGN 16
+$L$SLOW_DECRYPT::
+ shr rax,3
+ add r14,rax
+
+ mov r11,QWORD PTR[rbp]
+ mov r12,QWORD PTR[8+rbp]
+ mov QWORD PTR[((0+64))+rsp],r11
+ mov QWORD PTR[((8+64))+rsp],r12
+
+ALIGN 4
+$L$cbc_slow_dec_loop::
+ mov eax,DWORD PTR[r8]
+ mov ebx,DWORD PTR[4+r8]
+ mov ecx,DWORD PTR[8+r8]
+ mov edx,DWORD PTR[12+r8]
+ mov r15,QWORD PTR[rsp]
+ mov QWORD PTR[24+rsp],r8
+ mov QWORD PTR[32+rsp],r9
+ mov QWORD PTR[40+rsp],r10
+
+ call _x86_64_AES_decrypt_compact
+
+ mov r8,QWORD PTR[24+rsp]
+ mov r9,QWORD PTR[32+rsp]
+ mov r10,QWORD PTR[40+rsp]
+ xor eax,DWORD PTR[((0+64))+rsp]
+ xor ebx,DWORD PTR[((4+64))+rsp]
+ xor ecx,DWORD PTR[((8+64))+rsp]
+ xor edx,DWORD PTR[((12+64))+rsp]
+
+ mov r11,QWORD PTR[r8]
+ mov r12,QWORD PTR[8+r8]
+ sub r10,16
+ jc $L$cbc_slow_dec_partial
+ jz $L$cbc_slow_dec_done
+
+ mov QWORD PTR[((0+64))+rsp],r11
+ mov QWORD PTR[((8+64))+rsp],r12
+
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+
+ lea r8,QWORD PTR[16+r8]
+ lea r9,QWORD PTR[16+r9]
+ jmp $L$cbc_slow_dec_loop
+$L$cbc_slow_dec_done::
+ mov rdi,QWORD PTR[56+rsp]
+ mov QWORD PTR[rdi],r11
+ mov QWORD PTR[8+rdi],r12
+
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+
+ jmp $L$cbc_exit
+
+ALIGN 4
+$L$cbc_slow_dec_partial::
+ mov rdi,QWORD PTR[56+rsp]
+ mov QWORD PTR[rdi],r11
+ mov QWORD PTR[8+rdi],r12
+
+ mov DWORD PTR[((0+64))+rsp],eax
+ mov DWORD PTR[((4+64))+rsp],ebx
+ mov DWORD PTR[((8+64))+rsp],ecx
+ mov DWORD PTR[((12+64))+rsp],edx
+
+ mov rdi,r9
+ lea rsi,QWORD PTR[64+rsp]
+ lea rcx,QWORD PTR[16+r10]
+ DD 09066A4F3h
+ jmp $L$cbc_exit
+
+ALIGN 16
+$L$cbc_exit::
+ mov rsi,QWORD PTR[16+rsp]
+ mov r15,QWORD PTR[rsi]
+ mov r14,QWORD PTR[8+rsi]
+ mov r13,QWORD PTR[16+rsi]
+ mov r12,QWORD PTR[24+rsi]
+ mov rbp,QWORD PTR[32+rsi]
+ mov rbx,QWORD PTR[40+rsi]
+ lea rsp,QWORD PTR[48+rsi]
+$L$cbc_popfq::
+ popfq
+$L$cbc_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_AES_cbc_encrypt::
+AES_cbc_encrypt ENDP
+ALIGN 64
+$L$AES_Te::
+ DD 0a56363c6h,0a56363c6h
+ DD 0847c7cf8h,0847c7cf8h
+ DD 0997777eeh,0997777eeh
+ DD 08d7b7bf6h,08d7b7bf6h
+ DD 00df2f2ffh,00df2f2ffh
+ DD 0bd6b6bd6h,0bd6b6bd6h
+ DD 0b16f6fdeh,0b16f6fdeh
+ DD 054c5c591h,054c5c591h
+ DD 050303060h,050303060h
+ DD 003010102h,003010102h
+ DD 0a96767ceh,0a96767ceh
+ DD 07d2b2b56h,07d2b2b56h
+ DD 019fefee7h,019fefee7h
+ DD 062d7d7b5h,062d7d7b5h
+ DD 0e6abab4dh,0e6abab4dh
+ DD 09a7676ech,09a7676ech
+ DD 045caca8fh,045caca8fh
+ DD 09d82821fh,09d82821fh
+ DD 040c9c989h,040c9c989h
+ DD 0877d7dfah,0877d7dfah
+ DD 015fafaefh,015fafaefh
+ DD 0eb5959b2h,0eb5959b2h
+ DD 0c947478eh,0c947478eh
+ DD 00bf0f0fbh,00bf0f0fbh
+ DD 0ecadad41h,0ecadad41h
+ DD 067d4d4b3h,067d4d4b3h
+ DD 0fda2a25fh,0fda2a25fh
+ DD 0eaafaf45h,0eaafaf45h
+ DD 0bf9c9c23h,0bf9c9c23h
+ DD 0f7a4a453h,0f7a4a453h
+ DD 0967272e4h,0967272e4h
+ DD 05bc0c09bh,05bc0c09bh
+ DD 0c2b7b775h,0c2b7b775h
+ DD 01cfdfde1h,01cfdfde1h
+ DD 0ae93933dh,0ae93933dh
+ DD 06a26264ch,06a26264ch
+ DD 05a36366ch,05a36366ch
+ DD 0413f3f7eh,0413f3f7eh
+ DD 002f7f7f5h,002f7f7f5h
+ DD 04fcccc83h,04fcccc83h
+ DD 05c343468h,05c343468h
+ DD 0f4a5a551h,0f4a5a551h
+ DD 034e5e5d1h,034e5e5d1h
+ DD 008f1f1f9h,008f1f1f9h
+ DD 0937171e2h,0937171e2h
+ DD 073d8d8abh,073d8d8abh
+ DD 053313162h,053313162h
+ DD 03f15152ah,03f15152ah
+ DD 00c040408h,00c040408h
+ DD 052c7c795h,052c7c795h
+ DD 065232346h,065232346h
+ DD 05ec3c39dh,05ec3c39dh
+ DD 028181830h,028181830h
+ DD 0a1969637h,0a1969637h
+ DD 00f05050ah,00f05050ah
+ DD 0b59a9a2fh,0b59a9a2fh
+ DD 00907070eh,00907070eh
+ DD 036121224h,036121224h
+ DD 09b80801bh,09b80801bh
+ DD 03de2e2dfh,03de2e2dfh
+ DD 026ebebcdh,026ebebcdh
+ DD 06927274eh,06927274eh
+ DD 0cdb2b27fh,0cdb2b27fh
+ DD 09f7575eah,09f7575eah
+ DD 01b090912h,01b090912h
+ DD 09e83831dh,09e83831dh
+ DD 0742c2c58h,0742c2c58h
+ DD 02e1a1a34h,02e1a1a34h
+ DD 02d1b1b36h,02d1b1b36h
+ DD 0b26e6edch,0b26e6edch
+ DD 0ee5a5ab4h,0ee5a5ab4h
+ DD 0fba0a05bh,0fba0a05bh
+ DD 0f65252a4h,0f65252a4h
+ DD 04d3b3b76h,04d3b3b76h
+ DD 061d6d6b7h,061d6d6b7h
+ DD 0ceb3b37dh,0ceb3b37dh
+ DD 07b292952h,07b292952h
+ DD 03ee3e3ddh,03ee3e3ddh
+ DD 0712f2f5eh,0712f2f5eh
+ DD 097848413h,097848413h
+ DD 0f55353a6h,0f55353a6h
+ DD 068d1d1b9h,068d1d1b9h
+ DD 000000000h,000000000h
+ DD 02cededc1h,02cededc1h
+ DD 060202040h,060202040h
+ DD 01ffcfce3h,01ffcfce3h
+ DD 0c8b1b179h,0c8b1b179h
+ DD 0ed5b5bb6h,0ed5b5bb6h
+ DD 0be6a6ad4h,0be6a6ad4h
+ DD 046cbcb8dh,046cbcb8dh
+ DD 0d9bebe67h,0d9bebe67h
+ DD 04b393972h,04b393972h
+ DD 0de4a4a94h,0de4a4a94h
+ DD 0d44c4c98h,0d44c4c98h
+ DD 0e85858b0h,0e85858b0h
+ DD 04acfcf85h,04acfcf85h
+ DD 06bd0d0bbh,06bd0d0bbh
+ DD 02aefefc5h,02aefefc5h
+ DD 0e5aaaa4fh,0e5aaaa4fh
+ DD 016fbfbedh,016fbfbedh
+ DD 0c5434386h,0c5434386h
+ DD 0d74d4d9ah,0d74d4d9ah
+ DD 055333366h,055333366h
+ DD 094858511h,094858511h
+ DD 0cf45458ah,0cf45458ah
+ DD 010f9f9e9h,010f9f9e9h
+ DD 006020204h,006020204h
+ DD 0817f7ffeh,0817f7ffeh
+ DD 0f05050a0h,0f05050a0h
+ DD 0443c3c78h,0443c3c78h
+ DD 0ba9f9f25h,0ba9f9f25h
+ DD 0e3a8a84bh,0e3a8a84bh
+ DD 0f35151a2h,0f35151a2h
+ DD 0fea3a35dh,0fea3a35dh
+ DD 0c0404080h,0c0404080h
+ DD 08a8f8f05h,08a8f8f05h
+ DD 0ad92923fh,0ad92923fh
+ DD 0bc9d9d21h,0bc9d9d21h
+ DD 048383870h,048383870h
+ DD 004f5f5f1h,004f5f5f1h
+ DD 0dfbcbc63h,0dfbcbc63h
+ DD 0c1b6b677h,0c1b6b677h
+ DD 075dadaafh,075dadaafh
+ DD 063212142h,063212142h
+ DD 030101020h,030101020h
+ DD 01affffe5h,01affffe5h
+ DD 00ef3f3fdh,00ef3f3fdh
+ DD 06dd2d2bfh,06dd2d2bfh
+ DD 04ccdcd81h,04ccdcd81h
+ DD 0140c0c18h,0140c0c18h
+ DD 035131326h,035131326h
+ DD 02fececc3h,02fececc3h
+ DD 0e15f5fbeh,0e15f5fbeh
+ DD 0a2979735h,0a2979735h
+ DD 0cc444488h,0cc444488h
+ DD 03917172eh,03917172eh
+ DD 057c4c493h,057c4c493h
+ DD 0f2a7a755h,0f2a7a755h
+ DD 0827e7efch,0827e7efch
+ DD 0473d3d7ah,0473d3d7ah
+ DD 0ac6464c8h,0ac6464c8h
+ DD 0e75d5dbah,0e75d5dbah
+ DD 02b191932h,02b191932h
+ DD 0957373e6h,0957373e6h
+ DD 0a06060c0h,0a06060c0h
+ DD 098818119h,098818119h
+ DD 0d14f4f9eh,0d14f4f9eh
+ DD 07fdcdca3h,07fdcdca3h
+ DD 066222244h,066222244h
+ DD 07e2a2a54h,07e2a2a54h
+ DD 0ab90903bh,0ab90903bh
+ DD 08388880bh,08388880bh
+ DD 0ca46468ch,0ca46468ch
+ DD 029eeeec7h,029eeeec7h
+ DD 0d3b8b86bh,0d3b8b86bh
+ DD 03c141428h,03c141428h
+ DD 079dedea7h,079dedea7h
+ DD 0e25e5ebch,0e25e5ebch
+ DD 01d0b0b16h,01d0b0b16h
+ DD 076dbdbadh,076dbdbadh
+ DD 03be0e0dbh,03be0e0dbh
+ DD 056323264h,056323264h
+ DD 04e3a3a74h,04e3a3a74h
+ DD 01e0a0a14h,01e0a0a14h
+ DD 0db494992h,0db494992h
+ DD 00a06060ch,00a06060ch
+ DD 06c242448h,06c242448h
+ DD 0e45c5cb8h,0e45c5cb8h
+ DD 05dc2c29fh,05dc2c29fh
+ DD 06ed3d3bdh,06ed3d3bdh
+ DD 0efacac43h,0efacac43h
+ DD 0a66262c4h,0a66262c4h
+ DD 0a8919139h,0a8919139h
+ DD 0a4959531h,0a4959531h
+ DD 037e4e4d3h,037e4e4d3h
+ DD 08b7979f2h,08b7979f2h
+ DD 032e7e7d5h,032e7e7d5h
+ DD 043c8c88bh,043c8c88bh
+ DD 05937376eh,05937376eh
+ DD 0b76d6ddah,0b76d6ddah
+ DD 08c8d8d01h,08c8d8d01h
+ DD 064d5d5b1h,064d5d5b1h
+ DD 0d24e4e9ch,0d24e4e9ch
+ DD 0e0a9a949h,0e0a9a949h
+ DD 0b46c6cd8h,0b46c6cd8h
+ DD 0fa5656ach,0fa5656ach
+ DD 007f4f4f3h,007f4f4f3h
+ DD 025eaeacfh,025eaeacfh
+ DD 0af6565cah,0af6565cah
+ DD 08e7a7af4h,08e7a7af4h
+ DD 0e9aeae47h,0e9aeae47h
+ DD 018080810h,018080810h
+ DD 0d5baba6fh,0d5baba6fh
+ DD 0887878f0h,0887878f0h
+ DD 06f25254ah,06f25254ah
+ DD 0722e2e5ch,0722e2e5ch
+ DD 0241c1c38h,0241c1c38h
+ DD 0f1a6a657h,0f1a6a657h
+ DD 0c7b4b473h,0c7b4b473h
+ DD 051c6c697h,051c6c697h
+ DD 023e8e8cbh,023e8e8cbh
+ DD 07cdddda1h,07cdddda1h
+ DD 09c7474e8h,09c7474e8h
+ DD 0211f1f3eh,0211f1f3eh
+ DD 0dd4b4b96h,0dd4b4b96h
+ DD 0dcbdbd61h,0dcbdbd61h
+ DD 0868b8b0dh,0868b8b0dh
+ DD 0858a8a0fh,0858a8a0fh
+ DD 0907070e0h,0907070e0h
+ DD 0423e3e7ch,0423e3e7ch
+ DD 0c4b5b571h,0c4b5b571h
+ DD 0aa6666cch,0aa6666cch
+ DD 0d8484890h,0d8484890h
+ DD 005030306h,005030306h
+ DD 001f6f6f7h,001f6f6f7h
+ DD 0120e0e1ch,0120e0e1ch
+ DD 0a36161c2h,0a36161c2h
+ DD 05f35356ah,05f35356ah
+ DD 0f95757aeh,0f95757aeh
+ DD 0d0b9b969h,0d0b9b969h
+ DD 091868617h,091868617h
+ DD 058c1c199h,058c1c199h
+ DD 0271d1d3ah,0271d1d3ah
+ DD 0b99e9e27h,0b99e9e27h
+ DD 038e1e1d9h,038e1e1d9h
+ DD 013f8f8ebh,013f8f8ebh
+ DD 0b398982bh,0b398982bh
+ DD 033111122h,033111122h
+ DD 0bb6969d2h,0bb6969d2h
+ DD 070d9d9a9h,070d9d9a9h
+ DD 0898e8e07h,0898e8e07h
+ DD 0a7949433h,0a7949433h
+ DD 0b69b9b2dh,0b69b9b2dh
+ DD 0221e1e3ch,0221e1e3ch
+ DD 092878715h,092878715h
+ DD 020e9e9c9h,020e9e9c9h
+ DD 049cece87h,049cece87h
+ DD 0ff5555aah,0ff5555aah
+ DD 078282850h,078282850h
+ DD 07adfdfa5h,07adfdfa5h
+ DD 08f8c8c03h,08f8c8c03h
+ DD 0f8a1a159h,0f8a1a159h
+ DD 080898909h,080898909h
+ DD 0170d0d1ah,0170d0d1ah
+ DD 0dabfbf65h,0dabfbf65h
+ DD 031e6e6d7h,031e6e6d7h
+ DD 0c6424284h,0c6424284h
+ DD 0b86868d0h,0b86868d0h
+ DD 0c3414182h,0c3414182h
+ DD 0b0999929h,0b0999929h
+ DD 0772d2d5ah,0772d2d5ah
+ DD 0110f0f1eh,0110f0f1eh
+ DD 0cbb0b07bh,0cbb0b07bh
+ DD 0fc5454a8h,0fc5454a8h
+ DD 0d6bbbb6dh,0d6bbbb6dh
+ DD 03a16162ch,03a16162ch
+DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h
+DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h
+DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h
+DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h
+DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch
+DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h
+DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah
+DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h
+DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h
+DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h
+DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh
+DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh
+DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h
+DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h
+DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h
+DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h
+DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h
+DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h
+DB 060h,081h,04fh,0dch,022h,02ah,090h,088h
+DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh
+DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch
+DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h
+DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h
+DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h
+DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h
+DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah
+DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh
+DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh
+DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h
+DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh
+DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h
+DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h
+DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h
+DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h
+DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h
+DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h
+DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch
+DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h
+DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah
+DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h
+DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h
+DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h
+DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh
+DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh
+DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h
+DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h
+DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h
+DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h
+DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h
+DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h
+DB 060h,081h,04fh,0dch,022h,02ah,090h,088h
+DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh
+DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch
+DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h
+DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h
+DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h
+DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h
+DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah
+DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh
+DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh
+DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h
+DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh
+DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h
+DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h
+DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h
+DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h
+DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h
+DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h
+DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch
+DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h
+DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah
+DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h
+DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h
+DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h
+DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh
+DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh
+DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h
+DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h
+DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h
+DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h
+DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h
+DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h
+DB 060h,081h,04fh,0dch,022h,02ah,090h,088h
+DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh
+DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch
+DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h
+DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h
+DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h
+DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h
+DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah
+DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh
+DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh
+DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h
+DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh
+DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h
+DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h
+DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h
+DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h
+DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h
+DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h
+DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch
+DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h
+DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah
+DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h
+DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h
+DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h
+DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh
+DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh
+DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h
+DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h
+DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h
+DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h
+DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h
+DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h
+DB 060h,081h,04fh,0dch,022h,02ah,090h,088h
+DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh
+DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch
+DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h
+DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h
+DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h
+DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h
+DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah
+DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh
+DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh
+DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h
+DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh
+DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h
+DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h
+ DD 000000001h,000000002h,000000004h,000000008h
+ DD 000000010h,000000020h,000000040h,000000080h
+ DD 00000001bh,000000036h,080808080h,080808080h
+ DD 0fefefefeh,0fefefefeh,01b1b1b1bh,01b1b1b1bh
+ALIGN 64
+$L$AES_Td::
+ DD 050a7f451h,050a7f451h
+ DD 05365417eh,05365417eh
+ DD 0c3a4171ah,0c3a4171ah
+ DD 0965e273ah,0965e273ah
+ DD 0cb6bab3bh,0cb6bab3bh
+ DD 0f1459d1fh,0f1459d1fh
+ DD 0ab58faach,0ab58faach
+ DD 09303e34bh,09303e34bh
+ DD 055fa3020h,055fa3020h
+ DD 0f66d76adh,0f66d76adh
+ DD 09176cc88h,09176cc88h
+ DD 0254c02f5h,0254c02f5h
+ DD 0fcd7e54fh,0fcd7e54fh
+ DD 0d7cb2ac5h,0d7cb2ac5h
+ DD 080443526h,080443526h
+ DD 08fa362b5h,08fa362b5h
+ DD 0495ab1deh,0495ab1deh
+ DD 0671bba25h,0671bba25h
+ DD 0980eea45h,0980eea45h
+ DD 0e1c0fe5dh,0e1c0fe5dh
+ DD 002752fc3h,002752fc3h
+ DD 012f04c81h,012f04c81h
+ DD 0a397468dh,0a397468dh
+ DD 0c6f9d36bh,0c6f9d36bh
+ DD 0e75f8f03h,0e75f8f03h
+ DD 0959c9215h,0959c9215h
+ DD 0eb7a6dbfh,0eb7a6dbfh
+ DD 0da595295h,0da595295h
+ DD 02d83bed4h,02d83bed4h
+ DD 0d3217458h,0d3217458h
+ DD 02969e049h,02969e049h
+ DD 044c8c98eh,044c8c98eh
+ DD 06a89c275h,06a89c275h
+ DD 078798ef4h,078798ef4h
+ DD 06b3e5899h,06b3e5899h
+ DD 0dd71b927h,0dd71b927h
+ DD 0b64fe1beh,0b64fe1beh
+ DD 017ad88f0h,017ad88f0h
+ DD 066ac20c9h,066ac20c9h
+ DD 0b43ace7dh,0b43ace7dh
+ DD 0184adf63h,0184adf63h
+ DD 082311ae5h,082311ae5h
+ DD 060335197h,060335197h
+ DD 0457f5362h,0457f5362h
+ DD 0e07764b1h,0e07764b1h
+ DD 084ae6bbbh,084ae6bbbh
+ DD 01ca081feh,01ca081feh
+ DD 0942b08f9h,0942b08f9h
+ DD 058684870h,058684870h
+ DD 019fd458fh,019fd458fh
+ DD 0876cde94h,0876cde94h
+ DD 0b7f87b52h,0b7f87b52h
+ DD 023d373abh,023d373abh
+ DD 0e2024b72h,0e2024b72h
+ DD 0578f1fe3h,0578f1fe3h
+ DD 02aab5566h,02aab5566h
+ DD 00728ebb2h,00728ebb2h
+ DD 003c2b52fh,003c2b52fh
+ DD 09a7bc586h,09a7bc586h
+ DD 0a50837d3h,0a50837d3h
+ DD 0f2872830h,0f2872830h
+ DD 0b2a5bf23h,0b2a5bf23h
+ DD 0ba6a0302h,0ba6a0302h
+ DD 05c8216edh,05c8216edh
+ DD 02b1ccf8ah,02b1ccf8ah
+ DD 092b479a7h,092b479a7h
+ DD 0f0f207f3h,0f0f207f3h
+ DD 0a1e2694eh,0a1e2694eh
+ DD 0cdf4da65h,0cdf4da65h
+ DD 0d5be0506h,0d5be0506h
+ DD 01f6234d1h,01f6234d1h
+ DD 08afea6c4h,08afea6c4h
+ DD 09d532e34h,09d532e34h
+ DD 0a055f3a2h,0a055f3a2h
+ DD 032e18a05h,032e18a05h
+ DD 075ebf6a4h,075ebf6a4h
+ DD 039ec830bh,039ec830bh
+ DD 0aaef6040h,0aaef6040h
+ DD 0069f715eh,0069f715eh
+ DD 051106ebdh,051106ebdh
+ DD 0f98a213eh,0f98a213eh
+ DD 03d06dd96h,03d06dd96h
+ DD 0ae053eddh,0ae053eddh
+ DD 046bde64dh,046bde64dh
+ DD 0b58d5491h,0b58d5491h
+ DD 0055dc471h,0055dc471h
+ DD 06fd40604h,06fd40604h
+ DD 0ff155060h,0ff155060h
+ DD 024fb9819h,024fb9819h
+ DD 097e9bdd6h,097e9bdd6h
+ DD 0cc434089h,0cc434089h
+ DD 0779ed967h,0779ed967h
+ DD 0bd42e8b0h,0bd42e8b0h
+ DD 0888b8907h,0888b8907h
+ DD 0385b19e7h,0385b19e7h
+ DD 0dbeec879h,0dbeec879h
+ DD 0470a7ca1h,0470a7ca1h
+ DD 0e90f427ch,0e90f427ch
+ DD 0c91e84f8h,0c91e84f8h
+ DD 000000000h,000000000h
+ DD 083868009h,083868009h
+ DD 048ed2b32h,048ed2b32h
+ DD 0ac70111eh,0ac70111eh
+ DD 04e725a6ch,04e725a6ch
+ DD 0fbff0efdh,0fbff0efdh
+ DD 05638850fh,05638850fh
+ DD 01ed5ae3dh,01ed5ae3dh
+ DD 027392d36h,027392d36h
+ DD 064d90f0ah,064d90f0ah
+ DD 021a65c68h,021a65c68h
+ DD 0d1545b9bh,0d1545b9bh
+ DD 03a2e3624h,03a2e3624h
+ DD 0b1670a0ch,0b1670a0ch
+ DD 00fe75793h,00fe75793h
+ DD 0d296eeb4h,0d296eeb4h
+ DD 09e919b1bh,09e919b1bh
+ DD 04fc5c080h,04fc5c080h
+ DD 0a220dc61h,0a220dc61h
+ DD 0694b775ah,0694b775ah
+ DD 0161a121ch,0161a121ch
+ DD 00aba93e2h,00aba93e2h
+ DD 0e52aa0c0h,0e52aa0c0h
+ DD 043e0223ch,043e0223ch
+ DD 01d171b12h,01d171b12h
+ DD 00b0d090eh,00b0d090eh
+ DD 0adc78bf2h,0adc78bf2h
+ DD 0b9a8b62dh,0b9a8b62dh
+ DD 0c8a91e14h,0c8a91e14h
+ DD 08519f157h,08519f157h
+ DD 04c0775afh,04c0775afh
+ DD 0bbdd99eeh,0bbdd99eeh
+ DD 0fd607fa3h,0fd607fa3h
+ DD 09f2601f7h,09f2601f7h
+ DD 0bcf5725ch,0bcf5725ch
+ DD 0c53b6644h,0c53b6644h
+ DD 0347efb5bh,0347efb5bh
+ DD 07629438bh,07629438bh
+ DD 0dcc623cbh,0dcc623cbh
+ DD 068fcedb6h,068fcedb6h
+ DD 063f1e4b8h,063f1e4b8h
+ DD 0cadc31d7h,0cadc31d7h
+ DD 010856342h,010856342h
+ DD 040229713h,040229713h
+ DD 02011c684h,02011c684h
+ DD 07d244a85h,07d244a85h
+ DD 0f83dbbd2h,0f83dbbd2h
+ DD 01132f9aeh,01132f9aeh
+ DD 06da129c7h,06da129c7h
+ DD 04b2f9e1dh,04b2f9e1dh
+ DD 0f330b2dch,0f330b2dch
+ DD 0ec52860dh,0ec52860dh
+ DD 0d0e3c177h,0d0e3c177h
+ DD 06c16b32bh,06c16b32bh
+ DD 099b970a9h,099b970a9h
+ DD 0fa489411h,0fa489411h
+ DD 02264e947h,02264e947h
+ DD 0c48cfca8h,0c48cfca8h
+ DD 01a3ff0a0h,01a3ff0a0h
+ DD 0d82c7d56h,0d82c7d56h
+ DD 0ef903322h,0ef903322h
+ DD 0c74e4987h,0c74e4987h
+ DD 0c1d138d9h,0c1d138d9h
+ DD 0fea2ca8ch,0fea2ca8ch
+ DD 0360bd498h,0360bd498h
+ DD 0cf81f5a6h,0cf81f5a6h
+ DD 028de7aa5h,028de7aa5h
+ DD 0268eb7dah,0268eb7dah
+ DD 0a4bfad3fh,0a4bfad3fh
+ DD 0e49d3a2ch,0e49d3a2ch
+ DD 00d927850h,00d927850h
+ DD 09bcc5f6ah,09bcc5f6ah
+ DD 062467e54h,062467e54h
+ DD 0c2138df6h,0c2138df6h
+ DD 0e8b8d890h,0e8b8d890h
+ DD 05ef7392eh,05ef7392eh
+ DD 0f5afc382h,0f5afc382h
+ DD 0be805d9fh,0be805d9fh
+ DD 07c93d069h,07c93d069h
+ DD 0a92dd56fh,0a92dd56fh
+ DD 0b31225cfh,0b31225cfh
+ DD 03b99acc8h,03b99acc8h
+ DD 0a77d1810h,0a77d1810h
+ DD 06e639ce8h,06e639ce8h
+ DD 07bbb3bdbh,07bbb3bdbh
+ DD 0097826cdh,0097826cdh
+ DD 0f418596eh,0f418596eh
+ DD 001b79aech,001b79aech
+ DD 0a89a4f83h,0a89a4f83h
+ DD 0656e95e6h,0656e95e6h
+ DD 07ee6ffaah,07ee6ffaah
+ DD 008cfbc21h,008cfbc21h
+ DD 0e6e815efh,0e6e815efh
+ DD 0d99be7bah,0d99be7bah
+ DD 0ce366f4ah,0ce366f4ah
+ DD 0d4099feah,0d4099feah
+ DD 0d67cb029h,0d67cb029h
+ DD 0afb2a431h,0afb2a431h
+ DD 031233f2ah,031233f2ah
+ DD 03094a5c6h,03094a5c6h
+ DD 0c066a235h,0c066a235h
+ DD 037bc4e74h,037bc4e74h
+ DD 0a6ca82fch,0a6ca82fch
+ DD 0b0d090e0h,0b0d090e0h
+ DD 015d8a733h,015d8a733h
+ DD 04a9804f1h,04a9804f1h
+ DD 0f7daec41h,0f7daec41h
+ DD 00e50cd7fh,00e50cd7fh
+ DD 02ff69117h,02ff69117h
+ DD 08dd64d76h,08dd64d76h
+ DD 04db0ef43h,04db0ef43h
+ DD 0544daacch,0544daacch
+ DD 0df0496e4h,0df0496e4h
+ DD 0e3b5d19eh,0e3b5d19eh
+ DD 01b886a4ch,01b886a4ch
+ DD 0b81f2cc1h,0b81f2cc1h
+ DD 07f516546h,07f516546h
+ DD 004ea5e9dh,004ea5e9dh
+ DD 05d358c01h,05d358c01h
+ DD 0737487fah,0737487fah
+ DD 02e410bfbh,02e410bfbh
+ DD 05a1d67b3h,05a1d67b3h
+ DD 052d2db92h,052d2db92h
+ DD 0335610e9h,0335610e9h
+ DD 01347d66dh,01347d66dh
+ DD 08c61d79ah,08c61d79ah
+ DD 07a0ca137h,07a0ca137h
+ DD 08e14f859h,08e14f859h
+ DD 0893c13ebh,0893c13ebh
+ DD 0ee27a9ceh,0ee27a9ceh
+ DD 035c961b7h,035c961b7h
+ DD 0ede51ce1h,0ede51ce1h
+ DD 03cb1477ah,03cb1477ah
+ DD 059dfd29ch,059dfd29ch
+ DD 03f73f255h,03f73f255h
+ DD 079ce1418h,079ce1418h
+ DD 0bf37c773h,0bf37c773h
+ DD 0eacdf753h,0eacdf753h
+ DD 05baafd5fh,05baafd5fh
+ DD 0146f3ddfh,0146f3ddfh
+ DD 086db4478h,086db4478h
+ DD 081f3afcah,081f3afcah
+ DD 03ec468b9h,03ec468b9h
+ DD 02c342438h,02c342438h
+ DD 05f40a3c2h,05f40a3c2h
+ DD 072c31d16h,072c31d16h
+ DD 00c25e2bch,00c25e2bch
+ DD 08b493c28h,08b493c28h
+ DD 041950dffh,041950dffh
+ DD 07101a839h,07101a839h
+ DD 0deb30c08h,0deb30c08h
+ DD 09ce4b4d8h,09ce4b4d8h
+ DD 090c15664h,090c15664h
+ DD 06184cb7bh,06184cb7bh
+ DD 070b632d5h,070b632d5h
+ DD 0745c6c48h,0745c6c48h
+ DD 04257b8d0h,04257b8d0h
+DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h
+DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh
+DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h
+DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh
+DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh
+DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh
+DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h
+DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h
+DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h
+DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h
+DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah
+DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h
+DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah
+DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h
+DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h
+DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh
+DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah
+DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h
+DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h
+DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh
+DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h
+DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh
+DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h
+DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h
+DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h
+DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh
+DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh
+DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh
+DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h
+DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h
+DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h
+DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh
+ DD 080808080h,080808080h,0fefefefeh,0fefefefeh
+ DD 01b1b1b1bh,01b1b1b1bh,0,0
+DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h
+DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh
+DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h
+DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh
+DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh
+DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh
+DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h
+DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h
+DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h
+DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h
+DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah
+DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h
+DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah
+DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h
+DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h
+DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh
+DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah
+DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h
+DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h
+DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh
+DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h
+DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh
+DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h
+DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h
+DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h
+DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh
+DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh
+DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh
+DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h
+DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h
+DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h
+DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh
+ DD 080808080h,080808080h,0fefefefeh,0fefefefeh
+ DD 01b1b1b1bh,01b1b1b1bh,0,0
+DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h
+DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh
+DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h
+DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh
+DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh
+DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh
+DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h
+DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h
+DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h
+DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h
+DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah
+DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h
+DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah
+DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h
+DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h
+DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh
+DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah
+DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h
+DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h
+DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh
+DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h
+DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh
+DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h
+DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h
+DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h
+DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh
+DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh
+DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh
+DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h
+DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h
+DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h
+DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh
+ DD 080808080h,080808080h,0fefefefeh,0fefefefeh
+ DD 01b1b1b1bh,01b1b1b1bh,0,0
+DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h
+DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh
+DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h
+DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh
+DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh
+DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh
+DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h
+DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h
+DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h
+DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h
+DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah
+DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h
+DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah
+DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h
+DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h
+DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh
+DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah
+DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h
+DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h
+DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh
+DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h
+DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh
+DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h
+DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h
+DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h
+DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh
+DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh
+DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh
+DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h
+DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h
+DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h
+DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh
+ DD 080808080h,080808080h,0fefefefeh,0fefefefeh
+ DD 01b1b1b1bh,01b1b1b1bh,0,0
+DB 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32
+DB 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+DB 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+DB 62,0
+ALIGN 64
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+block_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$in_block_prologue
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$in_block_prologue
+
+ mov rax,QWORD PTR[24+rax]
+ lea rax,QWORD PTR[48+rax]
+
+ mov rbx,QWORD PTR[((-8))+rax]
+ mov rbp,QWORD PTR[((-16))+rax]
+ mov r12,QWORD PTR[((-24))+rax]
+ mov r13,QWORD PTR[((-32))+rax]
+ mov r14,QWORD PTR[((-40))+rax]
+ mov r15,QWORD PTR[((-48))+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+
+$L$in_block_prologue::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ jmp $L$common_seh_exit
+block_se_handler ENDP
+
+
+ALIGN 16
+key_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$in_key_prologue
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$in_key_prologue
+
+ lea rax,QWORD PTR[56+rax]
+
+ mov rbx,QWORD PTR[((-8))+rax]
+ mov rbp,QWORD PTR[((-16))+rax]
+ mov r12,QWORD PTR[((-24))+rax]
+ mov r13,QWORD PTR[((-32))+rax]
+ mov r14,QWORD PTR[((-40))+rax]
+ mov r15,QWORD PTR[((-48))+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+
+$L$in_key_prologue::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ jmp $L$common_seh_exit
+key_se_handler ENDP
+
+
+ALIGN 16
+cbc_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ lea r10,QWORD PTR[$L$cbc_prologue]
+ cmp rbx,r10
+ jb $L$in_cbc_prologue
+
+ lea r10,QWORD PTR[$L$cbc_fast_body]
+ cmp rbx,r10
+ jb $L$in_cbc_frame_setup
+
+ lea r10,QWORD PTR[$L$cbc_slow_prologue]
+ cmp rbx,r10
+ jb $L$in_cbc_body
+
+ lea r10,QWORD PTR[$L$cbc_slow_body]
+ cmp rbx,r10
+ jb $L$in_cbc_frame_setup
+
+$L$in_cbc_body::
+ mov rax,QWORD PTR[152+r8]
+
+ lea r10,QWORD PTR[$L$cbc_epilogue]
+ cmp rbx,r10
+ jae $L$in_cbc_prologue
+
+ lea rax,QWORD PTR[8+rax]
+
+ lea r10,QWORD PTR[$L$cbc_popfq]
+ cmp rbx,r10
+ jae $L$in_cbc_prologue
+
+ mov rax,QWORD PTR[8+rax]
+ lea rax,QWORD PTR[56+rax]
+
+$L$in_cbc_frame_setup::
+ mov rbx,QWORD PTR[((-16))+rax]
+ mov rbp,QWORD PTR[((-24))+rax]
+ mov r12,QWORD PTR[((-32))+rax]
+ mov r13,QWORD PTR[((-40))+rax]
+ mov r14,QWORD PTR[((-48))+rax]
+ mov r15,QWORD PTR[((-56))+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+
+$L$in_cbc_prologue::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+$L$common_seh_exit::
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+cbc_se_handler ENDP
+
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$SEH_begin_AES_encrypt
+ DD imagerel $L$SEH_end_AES_encrypt
+ DD imagerel $L$SEH_info_AES_encrypt
+
+ DD imagerel $L$SEH_begin_AES_decrypt
+ DD imagerel $L$SEH_end_AES_decrypt
+ DD imagerel $L$SEH_info_AES_decrypt
+
+ DD imagerel $L$SEH_begin_AES_set_encrypt_key
+ DD imagerel $L$SEH_end_AES_set_encrypt_key
+ DD imagerel $L$SEH_info_AES_set_encrypt_key
+
+ DD imagerel $L$SEH_begin_AES_set_decrypt_key
+ DD imagerel $L$SEH_end_AES_set_decrypt_key
+ DD imagerel $L$SEH_info_AES_set_decrypt_key
+
+ DD imagerel $L$SEH_begin_AES_cbc_encrypt
+ DD imagerel $L$SEH_end_AES_cbc_encrypt
+ DD imagerel $L$SEH_info_AES_cbc_encrypt
+
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$SEH_info_AES_encrypt::
+DB 9,0,0,0
+ DD imagerel block_se_handler
+ DD imagerel $L$enc_prologue,imagerel $L$enc_epilogue
+$L$SEH_info_AES_decrypt::
+DB 9,0,0,0
+ DD imagerel block_se_handler
+ DD imagerel $L$dec_prologue,imagerel $L$dec_epilogue
+$L$SEH_info_AES_set_encrypt_key::
+DB 9,0,0,0
+ DD imagerel key_se_handler
+ DD imagerel $L$enc_key_prologue,imagerel $L$enc_key_epilogue
+$L$SEH_info_AES_set_decrypt_key::
+DB 9,0,0,0
+ DD imagerel key_se_handler
+ DD imagerel $L$dec_key_prologue,imagerel $L$dec_key_epilogue
+$L$SEH_info_AES_cbc_encrypt::
+DB 9,0,0,0
+ DD imagerel cbc_se_handler
+
+.xdata ENDS
+END
+
diff --git a/ext/libressl/crypto/aes/aes-mingw64-x86_64.S b/ext/libressl/crypto/aes/aes-mingw64-x86_64.S
new file mode 100644
index 0000000..ca2d60f
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes-mingw64-x86_64.S
@@ -0,0 +1,2861 @@
+#include "x86_arch.h"
+
+.text
+.def _x86_64_AES_encrypt; .scl 3; .type 32; .endef
+.p2align 4
+_x86_64_AES_encrypt:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+
+ movl 240(%r15),%r13d
+ subl $1,%r13d
+ jmp .Lenc_loop
+.p2align 4
+.Lenc_loop:
+
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movl 0(%r14,%rsi,8),%r10d
+ movl 0(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r12d
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl %dl,%ebp
+ xorl 3(%r14,%rsi,8),%r10d
+ xorl 3(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r8d
+
+ movzbl %dh,%esi
+ shrl $16,%ecx
+ movzbl %ah,%ebp
+ xorl 3(%r14,%rsi,8),%r12d
+ shrl $16,%edx
+ xorl 3(%r14,%rbp,8),%r8d
+
+ shrl $16,%ebx
+ leaq 16(%r15),%r15
+ shrl $16,%eax
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ xorl 2(%r14,%rsi,8),%r10d
+ xorl 2(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r12d
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl %bl,%ebp
+ xorl 1(%r14,%rsi,8),%r10d
+ xorl 1(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r8d
+
+ movl 12(%r15),%edx
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movl 0(%r15),%eax
+ xorl 1(%r14,%rdi,8),%r12d
+ xorl 1(%r14,%rbp,8),%r8d
+
+ movl 4(%r15),%ebx
+ movl 8(%r15),%ecx
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ subl $1,%r13d
+ jnz .Lenc_loop
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movzbl 2(%r14,%rsi,8),%r10d
+ movzbl 2(%r14,%rdi,8),%r11d
+ movzbl 2(%r14,%rbp,8),%r12d
+
+ movzbl %dl,%esi
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movzbl 2(%r14,%rsi,8),%r8d
+ movl 0(%r14,%rdi,8),%edi
+ movl 0(%r14,%rbp,8),%ebp
+
+ andl $65280,%edi
+ andl $65280,%ebp
+
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+ shrl $16,%ecx
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ shrl $16,%edx
+ movl 0(%r14,%rsi,8),%esi
+ movl 0(%r14,%rdi,8),%edi
+
+ andl $65280,%esi
+ andl $65280,%edi
+ shrl $16,%ebx
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+ shrl $16,%eax
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ movl 0(%r14,%rsi,8),%esi
+ movl 0(%r14,%rdi,8),%edi
+ movl 0(%r14,%rbp,8),%ebp
+
+ andl $16711680,%esi
+ andl $16711680,%edi
+ andl $16711680,%ebp
+
+ xorl %esi,%r10d
+ xorl %edi,%r11d
+ xorl %ebp,%r12d
+
+ movzbl %bl,%esi
+ movzbl %dh,%edi
+ movzbl %ah,%ebp
+ movl 0(%r14,%rsi,8),%esi
+ movl 2(%r14,%rdi,8),%edi
+ movl 2(%r14,%rbp,8),%ebp
+
+ andl $16711680,%esi
+ andl $4278190080,%edi
+ andl $4278190080,%ebp
+
+ xorl %esi,%r8d
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movl 16+12(%r15),%edx
+ movl 2(%r14,%rsi,8),%esi
+ movl 2(%r14,%rdi,8),%edi
+ movl 16+0(%r15),%eax
+
+ andl $4278190080,%esi
+ andl $4278190080,%edi
+
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+
+ movl 16+4(%r15),%ebx
+ movl 16+8(%r15),%ecx
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ retq
+
+.def _x86_64_AES_encrypt_compact; .scl 3; .type 32; .endef
+.p2align 4
+_x86_64_AES_encrypt_compact:
+ leaq 128(%r14),%r8
+ movl 0-128(%r8),%edi
+ movl 32-128(%r8),%ebp
+ movl 64-128(%r8),%r10d
+ movl 96-128(%r8),%r11d
+ movl 128-128(%r8),%edi
+ movl 160-128(%r8),%ebp
+ movl 192-128(%r8),%r10d
+ movl 224-128(%r8),%r11d
+ jmp .Lenc_loop_compact
+.p2align 4
+.Lenc_loop_compact:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ leaq 16(%r15),%r15
+ movzbl %al,%r10d
+ movzbl %bl,%r11d
+ movzbl %cl,%r12d
+ movzbl (%r14,%r10,1),%r10d
+ movzbl (%r14,%r11,1),%r11d
+ movzbl (%r14,%r12,1),%r12d
+
+ movzbl %dl,%r8d
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl (%r14,%r8,1),%r8d
+ movzbl (%r14,%rsi,1),%r9d
+ movzbl (%r14,%rdi,1),%r13d
+
+ movzbl %dh,%ebp
+ movzbl %ah,%esi
+ shrl $16,%ecx
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ shrl $16,%edx
+
+ movzbl %cl,%edi
+ shll $8,%r9d
+ shll $8,%r13d
+ movzbl (%r14,%rdi,1),%edi
+ xorl %r9d,%r10d
+ xorl %r13d,%r11d
+
+ movzbl %dl,%r9d
+ shrl $16,%eax
+ shrl $16,%ebx
+ movzbl %al,%r13d
+ shll $8,%ebp
+ shll $8,%esi
+ movzbl (%r14,%r9,1),%r9d
+ movzbl (%r14,%r13,1),%r13d
+ xorl %ebp,%r12d
+ xorl %esi,%r8d
+
+ movzbl %bl,%ebp
+ movzbl %dh,%esi
+ shll $16,%edi
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ xorl %edi,%r10d
+
+ movzbl %ah,%edi
+ shrl $8,%ecx
+ shrl $8,%ebx
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rcx,1),%edx
+ movzbl (%r14,%rbx,1),%ecx
+ shll $16,%r9d
+ shll $16,%r13d
+ shll $16,%ebp
+ xorl %r9d,%r11d
+ xorl %r13d,%r12d
+ xorl %ebp,%r8d
+
+ shll $24,%esi
+ shll $24,%edi
+ shll $24,%edx
+ xorl %esi,%r10d
+ shll $24,%ecx
+ xorl %edi,%r11d
+ movl %r10d,%eax
+ movl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ cmpq 16(%rsp),%r15
+ je .Lenc_compact_done
+ movl %eax,%esi
+ movl %ebx,%edi
+ andl $2155905152,%esi
+ andl $2155905152,%edi
+ movl %esi,%r10d
+ movl %edi,%r11d
+ shrl $7,%r10d
+ leal (%rax,%rax,1),%r8d
+ shrl $7,%r11d
+ leal (%rbx,%rbx,1),%r9d
+ subl %r10d,%esi
+ subl %r11d,%edi
+ andl $4278124286,%r8d
+ andl $4278124286,%r9d
+ andl $454761243,%esi
+ andl $454761243,%edi
+ movl %eax,%r10d
+ movl %ebx,%r11d
+ xorl %esi,%r8d
+ xorl %edi,%r9d
+
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movl %ecx,%esi
+ movl %edx,%edi
+ roll $24,%eax
+ roll $24,%ebx
+ andl $2155905152,%esi
+ andl $2155905152,%edi
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movl %esi,%r12d
+ movl %edi,%ebp
+ rorl $16,%r10d
+ rorl $16,%r11d
+ shrl $7,%r12d
+ leal (%rcx,%rcx,1),%r8d
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ shrl $7,%ebp
+ leal (%rdx,%rdx,1),%r9d
+ rorl $8,%r10d
+ rorl $8,%r11d
+ subl %r12d,%esi
+ subl %ebp,%edi
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+
+ andl $4278124286,%r8d
+ andl $4278124286,%r9d
+ andl $454761243,%esi
+ andl $454761243,%edi
+ movl %ecx,%r12d
+ movl %edx,%ebp
+ xorl %esi,%r8d
+ xorl %edi,%r9d
+
+ xorl %r8d,%ecx
+ xorl %r9d,%edx
+ roll $24,%ecx
+ roll $24,%edx
+ xorl %r8d,%ecx
+ xorl %r9d,%edx
+ movl 0(%r14),%esi
+ rorl $16,%r12d
+ rorl $16,%ebp
+ movl 64(%r14),%edi
+ xorl %r12d,%ecx
+ xorl %ebp,%edx
+ movl 128(%r14),%r8d
+ rorl $8,%r12d
+ rorl $8,%ebp
+ movl 192(%r14),%r9d
+ xorl %r12d,%ecx
+ xorl %ebp,%edx
+ jmp .Lenc_loop_compact
+.p2align 4
+.Lenc_compact_done:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ retq
+
+.globl AES_encrypt
+.def AES_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+.globl asm_AES_encrypt
+
+asm_AES_encrypt:
+AES_encrypt:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_AES_encrypt:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+
+ movq %rsp,%r10
+ leaq -63(%rdx),%rcx
+ andq $-64,%rsp
+ subq %rsp,%rcx
+ negq %rcx
+ andq $960,%rcx
+ subq %rcx,%rsp
+ subq $32,%rsp
+
+ movq %rsi,16(%rsp)
+ movq %r10,24(%rsp)
+.Lenc_prologue:
+
+ movq %rdx,%r15
+ movl 240(%r15),%r13d
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+
+ shll $4,%r13d
+ leaq (%r15,%r13,1),%rbp
+ movq %r15,(%rsp)
+ movq %rbp,8(%rsp)
+
+
+ leaq .LAES_Te+2048(%rip),%r14
+ leaq 768(%rsp),%rbp
+ subq %r14,%rbp
+ andq $768,%rbp
+ leaq (%r14,%rbp,1),%r14
+
+ call _x86_64_AES_encrypt_compact
+
+ movq 16(%rsp),%r9
+ movq 24(%rsp),%rsi
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lenc_epilogue:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_AES_encrypt:
+.def _x86_64_AES_decrypt; .scl 3; .type 32; .endef
+.p2align 4
+_x86_64_AES_decrypt:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+
+ movl 240(%r15),%r13d
+ subl $1,%r13d
+ jmp .Ldec_loop
+.p2align 4
+.Ldec_loop:
+
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movl 0(%r14,%rsi,8),%r10d
+ movl 0(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r12d
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl %dl,%ebp
+ xorl 3(%r14,%rsi,8),%r10d
+ xorl 3(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r8d
+
+ movzbl %bh,%esi
+ shrl $16,%eax
+ movzbl %ch,%ebp
+ xorl 3(%r14,%rsi,8),%r12d
+ shrl $16,%edx
+ xorl 3(%r14,%rbp,8),%r8d
+
+ shrl $16,%ebx
+ leaq 16(%r15),%r15
+ shrl $16,%ecx
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ xorl 2(%r14,%rsi,8),%r10d
+ xorl 2(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r12d
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl %bl,%ebp
+ xorl 1(%r14,%rsi,8),%r10d
+ xorl 1(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r8d
+
+ movzbl %dh,%esi
+ movl 12(%r15),%edx
+ movzbl %ah,%ebp
+ xorl 1(%r14,%rsi,8),%r12d
+ movl 0(%r15),%eax
+ xorl 1(%r14,%rbp,8),%r8d
+
+ xorl %r10d,%eax
+ movl 4(%r15),%ebx
+ movl 8(%r15),%ecx
+ xorl %r12d,%ecx
+ xorl %r11d,%ebx
+ xorl %r8d,%edx
+ subl $1,%r13d
+ jnz .Ldec_loop
+ leaq 2048(%r14),%r14
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movzbl (%r14,%rsi,1),%r10d
+ movzbl (%r14,%rdi,1),%r11d
+ movzbl (%r14,%rbp,1),%r12d
+
+ movzbl %dl,%esi
+ movzbl %dh,%edi
+ movzbl %ah,%ebp
+ movzbl (%r14,%rsi,1),%r8d
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+
+ shll $8,%edi
+ shll $8,%ebp
+
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+ shrl $16,%edx
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ shrl $16,%eax
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+
+ shll $8,%esi
+ shll $8,%edi
+ shrl $16,%ebx
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+ shrl $16,%ecx
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+
+ shll $16,%esi
+ shll $16,%edi
+ shll $16,%ebp
+
+ xorl %esi,%r10d
+ xorl %edi,%r11d
+ xorl %ebp,%r12d
+
+ movzbl %bl,%esi
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+
+ shll $16,%esi
+ shll $24,%edi
+ shll $24,%ebp
+
+ xorl %esi,%r8d
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movl 16+12(%r15),%edx
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movl 16+0(%r15),%eax
+
+ shll $24,%esi
+ shll $24,%edi
+
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+
+ movl 16+4(%r15),%ebx
+ movl 16+8(%r15),%ecx
+ leaq -2048(%r14),%r14
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ retq
+
+.def _x86_64_AES_decrypt_compact; .scl 3; .type 32; .endef
+.p2align 4
+_x86_64_AES_decrypt_compact:
+ leaq 128(%r14),%r8
+ movl 0-128(%r8),%edi
+ movl 32-128(%r8),%ebp
+ movl 64-128(%r8),%r10d
+ movl 96-128(%r8),%r11d
+ movl 128-128(%r8),%edi
+ movl 160-128(%r8),%ebp
+ movl 192-128(%r8),%r10d
+ movl 224-128(%r8),%r11d
+ jmp .Ldec_loop_compact
+
+.p2align 4
+.Ldec_loop_compact:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ leaq 16(%r15),%r15
+ movzbl %al,%r10d
+ movzbl %bl,%r11d
+ movzbl %cl,%r12d
+ movzbl (%r14,%r10,1),%r10d
+ movzbl (%r14,%r11,1),%r11d
+ movzbl (%r14,%r12,1),%r12d
+
+ movzbl %dl,%r8d
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl (%r14,%r8,1),%r8d
+ movzbl (%r14,%rsi,1),%r9d
+ movzbl (%r14,%rdi,1),%r13d
+
+ movzbl %bh,%ebp
+ movzbl %ch,%esi
+ shrl $16,%ecx
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ shrl $16,%edx
+
+ movzbl %cl,%edi
+ shll $8,%r9d
+ shll $8,%r13d
+ movzbl (%r14,%rdi,1),%edi
+ xorl %r9d,%r10d
+ xorl %r13d,%r11d
+
+ movzbl %dl,%r9d
+ shrl $16,%eax
+ shrl $16,%ebx
+ movzbl %al,%r13d
+ shll $8,%ebp
+ shll $8,%esi
+ movzbl (%r14,%r9,1),%r9d
+ movzbl (%r14,%r13,1),%r13d
+ xorl %ebp,%r12d
+ xorl %esi,%r8d
+
+ movzbl %bl,%ebp
+ movzbl %bh,%esi
+ shll $16,%edi
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ xorl %edi,%r10d
+
+ movzbl %ch,%edi
+ shll $16,%r9d
+ shll $16,%r13d
+ movzbl (%r14,%rdi,1),%ebx
+ xorl %r9d,%r11d
+ xorl %r13d,%r12d
+
+ movzbl %dh,%edi
+ shrl $8,%eax
+ shll $16,%ebp
+ movzbl (%r14,%rdi,1),%ecx
+ movzbl (%r14,%rax,1),%edx
+ xorl %ebp,%r8d
+
+ shll $24,%esi
+ shll $24,%ebx
+ shll $24,%ecx
+ xorl %esi,%r10d
+ shll $24,%edx
+ xorl %r11d,%ebx
+ movl %r10d,%eax
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ cmpq 16(%rsp),%r15
+ je .Ldec_compact_done
+
+ movq 256+0(%r14),%rsi
+ shlq $32,%rbx
+ shlq $32,%rdx
+ movq 256+8(%r14),%rdi
+ orq %rbx,%rax
+ orq %rdx,%rcx
+ movq 256+16(%r14),%rbp
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+ shrq $7,%r9
+ leaq (%rax,%rax,1),%r8
+ shrq $7,%r12
+ leaq (%rcx,%rcx,1),%r11
+ subq %r9,%rbx
+ subq %r12,%rdx
+ andq %rdi,%r8
+ andq %rdi,%r11
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r8,%rbx
+ xorq %r11,%rdx
+ movq %rbx,%r8
+ movq %rdx,%r11
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ leaq (%r8,%r8,1),%r9
+ shrq $7,%r13
+ leaq (%r11,%r11,1),%r12
+ subq %r10,%rbx
+ subq %r13,%rdx
+ andq %rdi,%r9
+ andq %rdi,%r12
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r9,%rbx
+ xorq %r12,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ xorq %rax,%r8
+ shrq $7,%r13
+ xorq %rcx,%r11
+ subq %r10,%rbx
+ subq %r13,%rdx
+ leaq (%r9,%r9,1),%r10
+ leaq (%r12,%r12,1),%r13
+ xorq %rax,%r9
+ xorq %rcx,%r12
+ andq %rdi,%r10
+ andq %rdi,%r13
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %rbx,%r10
+ xorq %rdx,%r13
+
+ xorq %r10,%rax
+ xorq %r13,%rcx
+ xorq %r10,%r8
+ xorq %r13,%r11
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ xorq %r10,%r9
+ xorq %r13,%r12
+ shrq $32,%rbx
+ shrq $32,%rdx
+ xorq %r8,%r10
+ xorq %r11,%r13
+ roll $8,%eax
+ roll $8,%ecx
+ xorq %r9,%r10
+ xorq %r12,%r13
+
+ roll $8,%ebx
+ roll $8,%edx
+ xorl %r10d,%eax
+ xorl %r13d,%ecx
+ shrq $32,%r10
+ shrq $32,%r13
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+ movq %r8,%r10
+ movq %r11,%r13
+ shrq $32,%r10
+ shrq $32,%r13
+ roll $24,%r8d
+ roll $24,%r11d
+ roll $24,%r10d
+ roll $24,%r13d
+ xorl %r8d,%eax
+ xorl %r11d,%ecx
+ movq %r9,%r8
+ movq %r12,%r11
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+ movq 0(%r14),%rsi
+ shrq $32,%r8
+ shrq $32,%r11
+ movq 64(%r14),%rdi
+ roll $16,%r9d
+ roll $16,%r12d
+ movq 128(%r14),%rbp
+ roll $16,%r8d
+ roll $16,%r11d
+ movq 192(%r14),%r10
+ xorl %r9d,%eax
+ xorl %r12d,%ecx
+ movq 256(%r14),%r13
+ xorl %r8d,%ebx
+ xorl %r11d,%edx
+ jmp .Ldec_loop_compact
+.p2align 4
+.Ldec_compact_done:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ retq
+
+.globl AES_decrypt
+.def AES_decrypt; .scl 2; .type 32; .endef
+.p2align 4
+.globl asm_AES_decrypt
+
+asm_AES_decrypt:
+AES_decrypt:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_AES_decrypt:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+
+ movq %rsp,%r10
+ leaq -63(%rdx),%rcx
+ andq $-64,%rsp
+ subq %rsp,%rcx
+ negq %rcx
+ andq $960,%rcx
+ subq %rcx,%rsp
+ subq $32,%rsp
+
+ movq %rsi,16(%rsp)
+ movq %r10,24(%rsp)
+.Ldec_prologue:
+
+ movq %rdx,%r15
+ movl 240(%r15),%r13d
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+
+ shll $4,%r13d
+ leaq (%r15,%r13,1),%rbp
+ movq %r15,(%rsp)
+ movq %rbp,8(%rsp)
+
+
+ leaq .LAES_Td+2048(%rip),%r14
+ leaq 768(%rsp),%rbp
+ subq %r14,%rbp
+ andq $768,%rbp
+ leaq (%r14,%rbp,1),%r14
+ shrq $3,%rbp
+ addq %rbp,%r14
+
+ call _x86_64_AES_decrypt_compact
+
+ movq 16(%rsp),%r9
+ movq 24(%rsp),%rsi
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Ldec_epilogue:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_AES_decrypt:
+.globl AES_set_encrypt_key
+.def AES_set_encrypt_key; .scl 2; .type 32; .endef
+.p2align 4
+AES_set_encrypt_key:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_AES_set_encrypt_key:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $8,%rsp
+.Lenc_key_prologue:
+
+ call _x86_64_AES_set_encrypt_key
+
+ movq 8(%rsp),%r15
+ movq 16(%rsp),%r14
+ movq 24(%rsp),%r13
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%rbp
+ movq 48(%rsp),%rbx
+ addq $56,%rsp
+.Lenc_key_epilogue:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_AES_set_encrypt_key:
+
+.def _x86_64_AES_set_encrypt_key; .scl 3; .type 32; .endef
+.p2align 4
+_x86_64_AES_set_encrypt_key:
+ movl %esi,%ecx
+ movq %rdi,%rsi
+ movq %rdx,%rdi
+
+ testq $-1,%rsi
+ jz .Lbadpointer
+ testq $-1,%rdi
+ jz .Lbadpointer
+
+ leaq .LAES_Te(%rip),%rbp
+ leaq 2048+128(%rbp),%rbp
+
+
+ movl 0-128(%rbp),%eax
+ movl 32-128(%rbp),%ebx
+ movl 64-128(%rbp),%r8d
+ movl 96-128(%rbp),%edx
+ movl 128-128(%rbp),%eax
+ movl 160-128(%rbp),%ebx
+ movl 192-128(%rbp),%r8d
+ movl 224-128(%rbp),%edx
+
+ cmpl $128,%ecx
+ je .L10rounds
+ cmpl $192,%ecx
+ je .L12rounds
+ cmpl $256,%ecx
+ je .L14rounds
+ movq $-2,%rax
+ jmp .Lexit
+
+.L10rounds:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rdx,8(%rdi)
+
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp .L10shortcut
+.p2align 2
+.L10loop:
+ movl 0(%rdi),%eax
+ movl 12(%rdi),%edx
+.L10shortcut:
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,16(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,20(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,24(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,28(%rdi)
+ addl $1,%ecx
+ leaq 16(%rdi),%rdi
+ cmpl $10,%ecx
+ jl .L10loop
+
+ movl $10,80(%rdi)
+ xorq %rax,%rax
+ jmp .Lexit
+
+.L12rounds:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 16(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rdx,16(%rdi)
+
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp .L12shortcut
+.p2align 2
+.L12loop:
+ movl 0(%rdi),%eax
+ movl 20(%rdi),%edx
+.L12shortcut:
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,24(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,28(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,32(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,36(%rdi)
+
+ cmpl $7,%ecx
+ je .L12break
+ addl $1,%ecx
+
+ xorl 16(%rdi),%eax
+ movl %eax,40(%rdi)
+ xorl 20(%rdi),%eax
+ movl %eax,44(%rdi)
+
+ leaq 24(%rdi),%rdi
+ jmp .L12loop
+.L12break:
+ movl $12,72(%rdi)
+ xorq %rax,%rax
+ jmp .Lexit
+
+.L14rounds:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 16(%rsi),%rcx
+ movq 24(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,16(%rdi)
+ movq %rdx,24(%rdi)
+
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp .L14shortcut
+.p2align 2
+.L14loop:
+ movl 0(%rdi),%eax
+ movl 28(%rdi),%edx
+.L14shortcut:
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,32(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,36(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,40(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,44(%rdi)
+
+ cmpl $6,%ecx
+ je .L14break
+ addl $1,%ecx
+
+ movl %eax,%edx
+ movl 16(%rdi),%eax
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ shll $8,%ebx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movl %eax,48(%rdi)
+ xorl 20(%rdi),%eax
+ movl %eax,52(%rdi)
+ xorl 24(%rdi),%eax
+ movl %eax,56(%rdi)
+ xorl 28(%rdi),%eax
+ movl %eax,60(%rdi)
+
+ leaq 32(%rdi),%rdi
+ jmp .L14loop
+.L14break:
+ movl $14,48(%rdi)
+ xorq %rax,%rax
+ jmp .Lexit
+
+.Lbadpointer:
+ movq $-1,%rax
+.Lexit:
+ retq
+
+.globl AES_set_decrypt_key
+.def AES_set_decrypt_key; .scl 2; .type 32; .endef
+.p2align 4
+AES_set_decrypt_key:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_AES_set_decrypt_key:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rdx
+.Ldec_key_prologue:
+
+ call _x86_64_AES_set_encrypt_key
+ movq (%rsp),%r8
+ cmpl $0,%eax
+ jne .Labort
+
+ movl 240(%r8),%r14d
+ xorq %rdi,%rdi
+ leaq (%rdi,%r14,4),%rcx
+ movq %r8,%rsi
+ leaq (%r8,%rcx,4),%rdi
+.p2align 2
+.Linvert:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 0(%rdi),%rcx
+ movq 8(%rdi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,0(%rsi)
+ movq %rdx,8(%rsi)
+ leaq 16(%rsi),%rsi
+ leaq -16(%rdi),%rdi
+ cmpq %rsi,%rdi
+ jne .Linvert
+
+ leaq .LAES_Te+2048+1024(%rip),%rax
+
+ movq 40(%rax),%rsi
+ movq 48(%rax),%rdi
+ movq 56(%rax),%rbp
+
+ movq %r8,%r15
+ subl $1,%r14d
+.p2align 2
+.Lpermute:
+ leaq 16(%r15),%r15
+ movq 0(%r15),%rax
+ movq 8(%r15),%rcx
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+ shrq $7,%r9
+ leaq (%rax,%rax,1),%r8
+ shrq $7,%r12
+ leaq (%rcx,%rcx,1),%r11
+ subq %r9,%rbx
+ subq %r12,%rdx
+ andq %rdi,%r8
+ andq %rdi,%r11
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r8,%rbx
+ xorq %r11,%rdx
+ movq %rbx,%r8
+ movq %rdx,%r11
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ leaq (%r8,%r8,1),%r9
+ shrq $7,%r13
+ leaq (%r11,%r11,1),%r12
+ subq %r10,%rbx
+ subq %r13,%rdx
+ andq %rdi,%r9
+ andq %rdi,%r12
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r9,%rbx
+ xorq %r12,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ xorq %rax,%r8
+ shrq $7,%r13
+ xorq %rcx,%r11
+ subq %r10,%rbx
+ subq %r13,%rdx
+ leaq (%r9,%r9,1),%r10
+ leaq (%r12,%r12,1),%r13
+ xorq %rax,%r9
+ xorq %rcx,%r12
+ andq %rdi,%r10
+ andq %rdi,%r13
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %rbx,%r10
+ xorq %rdx,%r13
+
+ xorq %r10,%rax
+ xorq %r13,%rcx
+ xorq %r10,%r8
+ xorq %r13,%r11
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ xorq %r10,%r9
+ xorq %r13,%r12
+ shrq $32,%rbx
+ shrq $32,%rdx
+ xorq %r8,%r10
+ xorq %r11,%r13
+ roll $8,%eax
+ roll $8,%ecx
+ xorq %r9,%r10
+ xorq %r12,%r13
+
+ roll $8,%ebx
+ roll $8,%edx
+ xorl %r10d,%eax
+ xorl %r13d,%ecx
+ shrq $32,%r10
+ shrq $32,%r13
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+ movq %r8,%r10
+ movq %r11,%r13
+ shrq $32,%r10
+ shrq $32,%r13
+ roll $24,%r8d
+ roll $24,%r11d
+ roll $24,%r10d
+ roll $24,%r13d
+ xorl %r8d,%eax
+ xorl %r11d,%ecx
+ movq %r9,%r8
+ movq %r12,%r11
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+
+ shrq $32,%r8
+ shrq $32,%r11
+
+ roll $16,%r9d
+ roll $16,%r12d
+
+ roll $16,%r8d
+ roll $16,%r11d
+
+ xorl %r9d,%eax
+ xorl %r12d,%ecx
+
+ xorl %r8d,%ebx
+ xorl %r11d,%edx
+ movl %eax,0(%r15)
+ movl %ebx,4(%r15)
+ movl %ecx,8(%r15)
+ movl %edx,12(%r15)
+ subl $1,%r14d
+ jnz .Lpermute
+
+ xorq %rax,%rax
+.Labort:
+ movq 8(%rsp),%r15
+ movq 16(%rsp),%r14
+ movq 24(%rsp),%r13
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%rbp
+ movq 48(%rsp),%rbx
+ addq $56,%rsp
+.Ldec_key_epilogue:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_AES_set_decrypt_key:
+.globl AES_cbc_encrypt
+.def AES_cbc_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+
+
+.globl asm_AES_cbc_encrypt
+
+asm_AES_cbc_encrypt:
+AES_cbc_encrypt:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_AES_cbc_encrypt:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+ movq 48(%rsp),%r9
+
+ cmpq $0,%rdx
+ je .Lcbc_epilogue
+ pushfq
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+.Lcbc_prologue:
+
+ cld
+ movl %r9d,%r9d
+
+ leaq .LAES_Te(%rip),%r14
+ cmpq $0,%r9
+ jne .Lcbc_picked_te
+ leaq .LAES_Td(%rip),%r14
+.Lcbc_picked_te:
+
+ movl OPENSSL_ia32cap_P(%rip),%r10d
+ cmpq $512,%rdx
+ jb .Lcbc_slow_prologue
+ testq $15,%rdx
+ jnz .Lcbc_slow_prologue
+ btl $IA32CAP_BIT0_HT,%r10d
+ jc .Lcbc_slow_prologue
+
+
+ leaq -88-248(%rsp),%r15
+ andq $-64,%r15
+
+
+ movq %r14,%r10
+ leaq 2304(%r14),%r11
+ movq %r15,%r12
+ andq $4095,%r10
+ andq $4095,%r11
+ andq $4095,%r12
+
+ cmpq %r11,%r12
+ jb .Lcbc_te_break_out
+ subq %r11,%r12
+ subq %r12,%r15
+ jmp .Lcbc_te_ok
+.Lcbc_te_break_out:
+ subq %r10,%r12
+ andq $4095,%r12
+ addq $320,%r12
+ subq %r12,%r15
+.p2align 2
+.Lcbc_te_ok:
+
+ xchgq %rsp,%r15
+
+ movq %r15,16(%rsp)
+.Lcbc_fast_body:
+ movq %rdi,24(%rsp)
+ movq %rsi,32(%rsp)
+ movq %rdx,40(%rsp)
+ movq %rcx,48(%rsp)
+ movq %r8,56(%rsp)
+ movl $0,80+240(%rsp)
+ movq %r8,%rbp
+ movq %r9,%rbx
+ movq %rsi,%r9
+ movq %rdi,%r8
+ movq %rcx,%r15
+
+ movl 240(%r15),%eax
+
+ movq %r15,%r10
+ subq %r14,%r10
+ andq $4095,%r10
+ cmpq $2304,%r10
+ jb .Lcbc_do_ecopy
+ cmpq $4096-248,%r10
+ jb .Lcbc_skip_ecopy
+.p2align 2
+.Lcbc_do_ecopy:
+ movq %r15,%rsi
+ leaq 80(%rsp),%rdi
+ leaq 80(%rsp),%r15
+ movl $30,%ecx
+.long 0x90A548F3
+ movl %eax,(%rdi)
+.Lcbc_skip_ecopy:
+ movq %r15,0(%rsp)
+
+ movl $18,%ecx
+.p2align 2
+.Lcbc_prefetch_te:
+ movq 0(%r14),%r10
+ movq 32(%r14),%r11
+ movq 64(%r14),%r12
+ movq 96(%r14),%r13
+ leaq 128(%r14),%r14
+ subl $1,%ecx
+ jnz .Lcbc_prefetch_te
+ leaq -2304(%r14),%r14
+
+ cmpq $0,%rbx
+ je .LFAST_DECRYPT
+
+
+ movl 0(%rbp),%eax
+ movl 4(%rbp),%ebx
+ movl 8(%rbp),%ecx
+ movl 12(%rbp),%edx
+
+.p2align 2
+.Lcbc_fast_enc_loop:
+ xorl 0(%r8),%eax
+ xorl 4(%r8),%ebx
+ xorl 8(%r8),%ecx
+ xorl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+
+ call _x86_64_AES_encrypt
+
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ subq $16,%r10
+ testq $-16,%r10
+ movq %r10,40(%rsp)
+ jnz .Lcbc_fast_enc_loop
+ movq 56(%rsp),%rbp
+ movl %eax,0(%rbp)
+ movl %ebx,4(%rbp)
+ movl %ecx,8(%rbp)
+ movl %edx,12(%rbp)
+
+ jmp .Lcbc_fast_cleanup
+
+
+.p2align 4
+.LFAST_DECRYPT:
+ cmpq %r8,%r9
+ je .Lcbc_fast_dec_in_place
+
+ movq %rbp,64(%rsp)
+.p2align 2
+.Lcbc_fast_dec_loop:
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+
+ call _x86_64_AES_decrypt
+
+ movq 64(%rsp),%rbp
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ xorl 0(%rbp),%eax
+ xorl 4(%rbp),%ebx
+ xorl 8(%rbp),%ecx
+ xorl 12(%rbp),%edx
+ movq %r8,%rbp
+
+ subq $16,%r10
+ movq %r10,40(%rsp)
+ movq %rbp,64(%rsp)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ jnz .Lcbc_fast_dec_loop
+ movq 56(%rsp),%r12
+ movq 0(%rbp),%r10
+ movq 8(%rbp),%r11
+ movq %r10,0(%r12)
+ movq %r11,8(%r12)
+ jmp .Lcbc_fast_cleanup
+
+.p2align 4
+.Lcbc_fast_dec_in_place:
+ movq 0(%rbp),%r10
+ movq 8(%rbp),%r11
+ movq %r10,0+64(%rsp)
+ movq %r11,8+64(%rsp)
+.p2align 2
+.Lcbc_fast_dec_in_place_loop:
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+
+ call _x86_64_AES_decrypt
+
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ xorl 0+64(%rsp),%eax
+ xorl 4+64(%rsp),%ebx
+ xorl 8+64(%rsp),%ecx
+ xorl 12+64(%rsp),%edx
+
+ movq 0(%r8),%r11
+ movq 8(%r8),%r12
+ subq $16,%r10
+ jz .Lcbc_fast_dec_in_place_done
+
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ movq %r10,40(%rsp)
+ jmp .Lcbc_fast_dec_in_place_loop
+.Lcbc_fast_dec_in_place_done:
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+.p2align 2
+.Lcbc_fast_cleanup:
+ cmpl $0,80+240(%rsp)
+ leaq 80(%rsp),%rdi
+ je .Lcbc_exit
+ movl $30,%ecx
+ xorq %rax,%rax
+.long 0x90AB48F3
+
+ jmp .Lcbc_exit
+
+
+.p2align 4
+.Lcbc_slow_prologue:
+
+ leaq -88(%rsp),%rbp
+ andq $-64,%rbp
+
+ leaq -88-63(%rcx),%r10
+ subq %rbp,%r10
+ negq %r10
+ andq $960,%r10
+ subq %r10,%rbp
+
+ xchgq %rsp,%rbp
+
+ movq %rbp,16(%rsp)
+.Lcbc_slow_body:
+
+
+
+
+ movq %r8,56(%rsp)
+ movq %r8,%rbp
+ movq %r9,%rbx
+ movq %rsi,%r9
+ movq %rdi,%r8
+ movq %rcx,%r15
+ movq %rdx,%r10
+
+ movl 240(%r15),%eax
+ movq %r15,0(%rsp)
+ shll $4,%eax
+ leaq (%r15,%rax,1),%rax
+ movq %rax,8(%rsp)
+
+
+ leaq 2048(%r14),%r14
+ leaq 768-8(%rsp),%rax
+ subq %r14,%rax
+ andq $768,%rax
+ leaq (%r14,%rax,1),%r14
+
+ cmpq $0,%rbx
+ je .LSLOW_DECRYPT
+
+
+ testq $-16,%r10
+ movl 0(%rbp),%eax
+ movl 4(%rbp),%ebx
+ movl 8(%rbp),%ecx
+ movl 12(%rbp),%edx
+ jz .Lcbc_slow_enc_tail
+
+.p2align 2
+.Lcbc_slow_enc_loop:
+ xorl 0(%r8),%eax
+ xorl 4(%r8),%ebx
+ xorl 8(%r8),%ecx
+ xorl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ movq %r9,32(%rsp)
+ movq %r10,40(%rsp)
+
+ call _x86_64_AES_encrypt_compact
+
+ movq 24(%rsp),%r8
+ movq 32(%rsp),%r9
+ movq 40(%rsp),%r10
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ subq $16,%r10
+ testq $-16,%r10
+ jnz .Lcbc_slow_enc_loop
+ testq $15,%r10
+ jnz .Lcbc_slow_enc_tail
+ movq 56(%rsp),%rbp
+ movl %eax,0(%rbp)
+ movl %ebx,4(%rbp)
+ movl %ecx,8(%rbp)
+ movl %edx,12(%rbp)
+
+ jmp .Lcbc_exit
+
+.p2align 2
+.Lcbc_slow_enc_tail:
+ movq %rax,%r11
+ movq %rcx,%r12
+ movq %r10,%rcx
+ movq %r8,%rsi
+ movq %r9,%rdi
+.long 0x9066A4F3
+ movq $16,%rcx
+ subq %r10,%rcx
+ xorq %rax,%rax
+.long 0x9066AAF3
+ movq %r9,%r8
+ movq $16,%r10
+ movq %r11,%rax
+ movq %r12,%rcx
+ jmp .Lcbc_slow_enc_loop
+
+.p2align 4
+.LSLOW_DECRYPT:
+ shrq $3,%rax
+ addq %rax,%r14
+
+ movq 0(%rbp),%r11
+ movq 8(%rbp),%r12
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+
+.p2align 2
+.Lcbc_slow_dec_loop:
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ movq %r9,32(%rsp)
+ movq %r10,40(%rsp)
+
+ call _x86_64_AES_decrypt_compact
+
+ movq 24(%rsp),%r8
+ movq 32(%rsp),%r9
+ movq 40(%rsp),%r10
+ xorl 0+64(%rsp),%eax
+ xorl 4+64(%rsp),%ebx
+ xorl 8+64(%rsp),%ecx
+ xorl 12+64(%rsp),%edx
+
+ movq 0(%r8),%r11
+ movq 8(%r8),%r12
+ subq $16,%r10
+ jc .Lcbc_slow_dec_partial
+ jz .Lcbc_slow_dec_done
+
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ jmp .Lcbc_slow_dec_loop
+.Lcbc_slow_dec_done:
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ jmp .Lcbc_exit
+
+.p2align 2
+.Lcbc_slow_dec_partial:
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+
+ movl %eax,0+64(%rsp)
+ movl %ebx,4+64(%rsp)
+ movl %ecx,8+64(%rsp)
+ movl %edx,12+64(%rsp)
+
+ movq %r9,%rdi
+ leaq 64(%rsp),%rsi
+ leaq 16(%r10),%rcx
+.long 0x9066A4F3
+ jmp .Lcbc_exit
+
+.p2align 4
+.Lcbc_exit:
+ movq 16(%rsp),%rsi
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lcbc_popfq:
+ popfq
+.Lcbc_epilogue:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_AES_cbc_encrypt:
+.p2align 6
+.LAES_Te:
+.long 0xa56363c6,0xa56363c6
+.long 0x847c7cf8,0x847c7cf8
+.long 0x997777ee,0x997777ee
+.long 0x8d7b7bf6,0x8d7b7bf6
+.long 0x0df2f2ff,0x0df2f2ff
+.long 0xbd6b6bd6,0xbd6b6bd6
+.long 0xb16f6fde,0xb16f6fde
+.long 0x54c5c591,0x54c5c591
+.long 0x50303060,0x50303060
+.long 0x03010102,0x03010102
+.long 0xa96767ce,0xa96767ce
+.long 0x7d2b2b56,0x7d2b2b56
+.long 0x19fefee7,0x19fefee7
+.long 0x62d7d7b5,0x62d7d7b5
+.long 0xe6abab4d,0xe6abab4d
+.long 0x9a7676ec,0x9a7676ec
+.long 0x45caca8f,0x45caca8f
+.long 0x9d82821f,0x9d82821f
+.long 0x40c9c989,0x40c9c989
+.long 0x877d7dfa,0x877d7dfa
+.long 0x15fafaef,0x15fafaef
+.long 0xeb5959b2,0xeb5959b2
+.long 0xc947478e,0xc947478e
+.long 0x0bf0f0fb,0x0bf0f0fb
+.long 0xecadad41,0xecadad41
+.long 0x67d4d4b3,0x67d4d4b3
+.long 0xfda2a25f,0xfda2a25f
+.long 0xeaafaf45,0xeaafaf45
+.long 0xbf9c9c23,0xbf9c9c23
+.long 0xf7a4a453,0xf7a4a453
+.long 0x967272e4,0x967272e4
+.long 0x5bc0c09b,0x5bc0c09b
+.long 0xc2b7b775,0xc2b7b775
+.long 0x1cfdfde1,0x1cfdfde1
+.long 0xae93933d,0xae93933d
+.long 0x6a26264c,0x6a26264c
+.long 0x5a36366c,0x5a36366c
+.long 0x413f3f7e,0x413f3f7e
+.long 0x02f7f7f5,0x02f7f7f5
+.long 0x4fcccc83,0x4fcccc83
+.long 0x5c343468,0x5c343468
+.long 0xf4a5a551,0xf4a5a551
+.long 0x34e5e5d1,0x34e5e5d1
+.long 0x08f1f1f9,0x08f1f1f9
+.long 0x937171e2,0x937171e2
+.long 0x73d8d8ab,0x73d8d8ab
+.long 0x53313162,0x53313162
+.long 0x3f15152a,0x3f15152a
+.long 0x0c040408,0x0c040408
+.long 0x52c7c795,0x52c7c795
+.long 0x65232346,0x65232346
+.long 0x5ec3c39d,0x5ec3c39d
+.long 0x28181830,0x28181830
+.long 0xa1969637,0xa1969637
+.long 0x0f05050a,0x0f05050a
+.long 0xb59a9a2f,0xb59a9a2f
+.long 0x0907070e,0x0907070e
+.long 0x36121224,0x36121224
+.long 0x9b80801b,0x9b80801b
+.long 0x3de2e2df,0x3de2e2df
+.long 0x26ebebcd,0x26ebebcd
+.long 0x6927274e,0x6927274e
+.long 0xcdb2b27f,0xcdb2b27f
+.long 0x9f7575ea,0x9f7575ea
+.long 0x1b090912,0x1b090912
+.long 0x9e83831d,0x9e83831d
+.long 0x742c2c58,0x742c2c58
+.long 0x2e1a1a34,0x2e1a1a34
+.long 0x2d1b1b36,0x2d1b1b36
+.long 0xb26e6edc,0xb26e6edc
+.long 0xee5a5ab4,0xee5a5ab4
+.long 0xfba0a05b,0xfba0a05b
+.long 0xf65252a4,0xf65252a4
+.long 0x4d3b3b76,0x4d3b3b76
+.long 0x61d6d6b7,0x61d6d6b7
+.long 0xceb3b37d,0xceb3b37d
+.long 0x7b292952,0x7b292952
+.long 0x3ee3e3dd,0x3ee3e3dd
+.long 0x712f2f5e,0x712f2f5e
+.long 0x97848413,0x97848413
+.long 0xf55353a6,0xf55353a6
+.long 0x68d1d1b9,0x68d1d1b9
+.long 0x00000000,0x00000000
+.long 0x2cededc1,0x2cededc1
+.long 0x60202040,0x60202040
+.long 0x1ffcfce3,0x1ffcfce3
+.long 0xc8b1b179,0xc8b1b179
+.long 0xed5b5bb6,0xed5b5bb6
+.long 0xbe6a6ad4,0xbe6a6ad4
+.long 0x46cbcb8d,0x46cbcb8d
+.long 0xd9bebe67,0xd9bebe67
+.long 0x4b393972,0x4b393972
+.long 0xde4a4a94,0xde4a4a94
+.long 0xd44c4c98,0xd44c4c98
+.long 0xe85858b0,0xe85858b0
+.long 0x4acfcf85,0x4acfcf85
+.long 0x6bd0d0bb,0x6bd0d0bb
+.long 0x2aefefc5,0x2aefefc5
+.long 0xe5aaaa4f,0xe5aaaa4f
+.long 0x16fbfbed,0x16fbfbed
+.long 0xc5434386,0xc5434386
+.long 0xd74d4d9a,0xd74d4d9a
+.long 0x55333366,0x55333366
+.long 0x94858511,0x94858511
+.long 0xcf45458a,0xcf45458a
+.long 0x10f9f9e9,0x10f9f9e9
+.long 0x06020204,0x06020204
+.long 0x817f7ffe,0x817f7ffe
+.long 0xf05050a0,0xf05050a0
+.long 0x443c3c78,0x443c3c78
+.long 0xba9f9f25,0xba9f9f25
+.long 0xe3a8a84b,0xe3a8a84b
+.long 0xf35151a2,0xf35151a2
+.long 0xfea3a35d,0xfea3a35d
+.long 0xc0404080,0xc0404080
+.long 0x8a8f8f05,0x8a8f8f05
+.long 0xad92923f,0xad92923f
+.long 0xbc9d9d21,0xbc9d9d21
+.long 0x48383870,0x48383870
+.long 0x04f5f5f1,0x04f5f5f1
+.long 0xdfbcbc63,0xdfbcbc63
+.long 0xc1b6b677,0xc1b6b677
+.long 0x75dadaaf,0x75dadaaf
+.long 0x63212142,0x63212142
+.long 0x30101020,0x30101020
+.long 0x1affffe5,0x1affffe5
+.long 0x0ef3f3fd,0x0ef3f3fd
+.long 0x6dd2d2bf,0x6dd2d2bf
+.long 0x4ccdcd81,0x4ccdcd81
+.long 0x140c0c18,0x140c0c18
+.long 0x35131326,0x35131326
+.long 0x2fececc3,0x2fececc3
+.long 0xe15f5fbe,0xe15f5fbe
+.long 0xa2979735,0xa2979735
+.long 0xcc444488,0xcc444488
+.long 0x3917172e,0x3917172e
+.long 0x57c4c493,0x57c4c493
+.long 0xf2a7a755,0xf2a7a755
+.long 0x827e7efc,0x827e7efc
+.long 0x473d3d7a,0x473d3d7a
+.long 0xac6464c8,0xac6464c8
+.long 0xe75d5dba,0xe75d5dba
+.long 0x2b191932,0x2b191932
+.long 0x957373e6,0x957373e6
+.long 0xa06060c0,0xa06060c0
+.long 0x98818119,0x98818119
+.long 0xd14f4f9e,0xd14f4f9e
+.long 0x7fdcdca3,0x7fdcdca3
+.long 0x66222244,0x66222244
+.long 0x7e2a2a54,0x7e2a2a54
+.long 0xab90903b,0xab90903b
+.long 0x8388880b,0x8388880b
+.long 0xca46468c,0xca46468c
+.long 0x29eeeec7,0x29eeeec7
+.long 0xd3b8b86b,0xd3b8b86b
+.long 0x3c141428,0x3c141428
+.long 0x79dedea7,0x79dedea7
+.long 0xe25e5ebc,0xe25e5ebc
+.long 0x1d0b0b16,0x1d0b0b16
+.long 0x76dbdbad,0x76dbdbad
+.long 0x3be0e0db,0x3be0e0db
+.long 0x56323264,0x56323264
+.long 0x4e3a3a74,0x4e3a3a74
+.long 0x1e0a0a14,0x1e0a0a14
+.long 0xdb494992,0xdb494992
+.long 0x0a06060c,0x0a06060c
+.long 0x6c242448,0x6c242448
+.long 0xe45c5cb8,0xe45c5cb8
+.long 0x5dc2c29f,0x5dc2c29f
+.long 0x6ed3d3bd,0x6ed3d3bd
+.long 0xefacac43,0xefacac43
+.long 0xa66262c4,0xa66262c4
+.long 0xa8919139,0xa8919139
+.long 0xa4959531,0xa4959531
+.long 0x37e4e4d3,0x37e4e4d3
+.long 0x8b7979f2,0x8b7979f2
+.long 0x32e7e7d5,0x32e7e7d5
+.long 0x43c8c88b,0x43c8c88b
+.long 0x5937376e,0x5937376e
+.long 0xb76d6dda,0xb76d6dda
+.long 0x8c8d8d01,0x8c8d8d01
+.long 0x64d5d5b1,0x64d5d5b1
+.long 0xd24e4e9c,0xd24e4e9c
+.long 0xe0a9a949,0xe0a9a949
+.long 0xb46c6cd8,0xb46c6cd8
+.long 0xfa5656ac,0xfa5656ac
+.long 0x07f4f4f3,0x07f4f4f3
+.long 0x25eaeacf,0x25eaeacf
+.long 0xaf6565ca,0xaf6565ca
+.long 0x8e7a7af4,0x8e7a7af4
+.long 0xe9aeae47,0xe9aeae47
+.long 0x18080810,0x18080810
+.long 0xd5baba6f,0xd5baba6f
+.long 0x887878f0,0x887878f0
+.long 0x6f25254a,0x6f25254a
+.long 0x722e2e5c,0x722e2e5c
+.long 0x241c1c38,0x241c1c38
+.long 0xf1a6a657,0xf1a6a657
+.long 0xc7b4b473,0xc7b4b473
+.long 0x51c6c697,0x51c6c697
+.long 0x23e8e8cb,0x23e8e8cb
+.long 0x7cdddda1,0x7cdddda1
+.long 0x9c7474e8,0x9c7474e8
+.long 0x211f1f3e,0x211f1f3e
+.long 0xdd4b4b96,0xdd4b4b96
+.long 0xdcbdbd61,0xdcbdbd61
+.long 0x868b8b0d,0x868b8b0d
+.long 0x858a8a0f,0x858a8a0f
+.long 0x907070e0,0x907070e0
+.long 0x423e3e7c,0x423e3e7c
+.long 0xc4b5b571,0xc4b5b571
+.long 0xaa6666cc,0xaa6666cc
+.long 0xd8484890,0xd8484890
+.long 0x05030306,0x05030306
+.long 0x01f6f6f7,0x01f6f6f7
+.long 0x120e0e1c,0x120e0e1c
+.long 0xa36161c2,0xa36161c2
+.long 0x5f35356a,0x5f35356a
+.long 0xf95757ae,0xf95757ae
+.long 0xd0b9b969,0xd0b9b969
+.long 0x91868617,0x91868617
+.long 0x58c1c199,0x58c1c199
+.long 0x271d1d3a,0x271d1d3a
+.long 0xb99e9e27,0xb99e9e27
+.long 0x38e1e1d9,0x38e1e1d9
+.long 0x13f8f8eb,0x13f8f8eb
+.long 0xb398982b,0xb398982b
+.long 0x33111122,0x33111122
+.long 0xbb6969d2,0xbb6969d2
+.long 0x70d9d9a9,0x70d9d9a9
+.long 0x898e8e07,0x898e8e07
+.long 0xa7949433,0xa7949433
+.long 0xb69b9b2d,0xb69b9b2d
+.long 0x221e1e3c,0x221e1e3c
+.long 0x92878715,0x92878715
+.long 0x20e9e9c9,0x20e9e9c9
+.long 0x49cece87,0x49cece87
+.long 0xff5555aa,0xff5555aa
+.long 0x78282850,0x78282850
+.long 0x7adfdfa5,0x7adfdfa5
+.long 0x8f8c8c03,0x8f8c8c03
+.long 0xf8a1a159,0xf8a1a159
+.long 0x80898909,0x80898909
+.long 0x170d0d1a,0x170d0d1a
+.long 0xdabfbf65,0xdabfbf65
+.long 0x31e6e6d7,0x31e6e6d7
+.long 0xc6424284,0xc6424284
+.long 0xb86868d0,0xb86868d0
+.long 0xc3414182,0xc3414182
+.long 0xb0999929,0xb0999929
+.long 0x772d2d5a,0x772d2d5a
+.long 0x110f0f1e,0x110f0f1e
+.long 0xcbb0b07b,0xcbb0b07b
+.long 0xfc5454a8,0xfc5454a8
+.long 0xd6bbbb6d,0xd6bbbb6d
+.long 0x3a16162c,0x3a16162c
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.long 0x00000001, 0x00000002, 0x00000004, 0x00000008
+.long 0x00000010, 0x00000020, 0x00000040, 0x00000080
+.long 0x0000001b, 0x00000036, 0x80808080, 0x80808080
+.long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b
+.p2align 6
+.LAES_Td:
+.long 0x50a7f451,0x50a7f451
+.long 0x5365417e,0x5365417e
+.long 0xc3a4171a,0xc3a4171a
+.long 0x965e273a,0x965e273a
+.long 0xcb6bab3b,0xcb6bab3b
+.long 0xf1459d1f,0xf1459d1f
+.long 0xab58faac,0xab58faac
+.long 0x9303e34b,0x9303e34b
+.long 0x55fa3020,0x55fa3020
+.long 0xf66d76ad,0xf66d76ad
+.long 0x9176cc88,0x9176cc88
+.long 0x254c02f5,0x254c02f5
+.long 0xfcd7e54f,0xfcd7e54f
+.long 0xd7cb2ac5,0xd7cb2ac5
+.long 0x80443526,0x80443526
+.long 0x8fa362b5,0x8fa362b5
+.long 0x495ab1de,0x495ab1de
+.long 0x671bba25,0x671bba25
+.long 0x980eea45,0x980eea45
+.long 0xe1c0fe5d,0xe1c0fe5d
+.long 0x02752fc3,0x02752fc3
+.long 0x12f04c81,0x12f04c81
+.long 0xa397468d,0xa397468d
+.long 0xc6f9d36b,0xc6f9d36b
+.long 0xe75f8f03,0xe75f8f03
+.long 0x959c9215,0x959c9215
+.long 0xeb7a6dbf,0xeb7a6dbf
+.long 0xda595295,0xda595295
+.long 0x2d83bed4,0x2d83bed4
+.long 0xd3217458,0xd3217458
+.long 0x2969e049,0x2969e049
+.long 0x44c8c98e,0x44c8c98e
+.long 0x6a89c275,0x6a89c275
+.long 0x78798ef4,0x78798ef4
+.long 0x6b3e5899,0x6b3e5899
+.long 0xdd71b927,0xdd71b927
+.long 0xb64fe1be,0xb64fe1be
+.long 0x17ad88f0,0x17ad88f0
+.long 0x66ac20c9,0x66ac20c9
+.long 0xb43ace7d,0xb43ace7d
+.long 0x184adf63,0x184adf63
+.long 0x82311ae5,0x82311ae5
+.long 0x60335197,0x60335197
+.long 0x457f5362,0x457f5362
+.long 0xe07764b1,0xe07764b1
+.long 0x84ae6bbb,0x84ae6bbb
+.long 0x1ca081fe,0x1ca081fe
+.long 0x942b08f9,0x942b08f9
+.long 0x58684870,0x58684870
+.long 0x19fd458f,0x19fd458f
+.long 0x876cde94,0x876cde94
+.long 0xb7f87b52,0xb7f87b52
+.long 0x23d373ab,0x23d373ab
+.long 0xe2024b72,0xe2024b72
+.long 0x578f1fe3,0x578f1fe3
+.long 0x2aab5566,0x2aab5566
+.long 0x0728ebb2,0x0728ebb2
+.long 0x03c2b52f,0x03c2b52f
+.long 0x9a7bc586,0x9a7bc586
+.long 0xa50837d3,0xa50837d3
+.long 0xf2872830,0xf2872830
+.long 0xb2a5bf23,0xb2a5bf23
+.long 0xba6a0302,0xba6a0302
+.long 0x5c8216ed,0x5c8216ed
+.long 0x2b1ccf8a,0x2b1ccf8a
+.long 0x92b479a7,0x92b479a7
+.long 0xf0f207f3,0xf0f207f3
+.long 0xa1e2694e,0xa1e2694e
+.long 0xcdf4da65,0xcdf4da65
+.long 0xd5be0506,0xd5be0506
+.long 0x1f6234d1,0x1f6234d1
+.long 0x8afea6c4,0x8afea6c4
+.long 0x9d532e34,0x9d532e34
+.long 0xa055f3a2,0xa055f3a2
+.long 0x32e18a05,0x32e18a05
+.long 0x75ebf6a4,0x75ebf6a4
+.long 0x39ec830b,0x39ec830b
+.long 0xaaef6040,0xaaef6040
+.long 0x069f715e,0x069f715e
+.long 0x51106ebd,0x51106ebd
+.long 0xf98a213e,0xf98a213e
+.long 0x3d06dd96,0x3d06dd96
+.long 0xae053edd,0xae053edd
+.long 0x46bde64d,0x46bde64d
+.long 0xb58d5491,0xb58d5491
+.long 0x055dc471,0x055dc471
+.long 0x6fd40604,0x6fd40604
+.long 0xff155060,0xff155060
+.long 0x24fb9819,0x24fb9819
+.long 0x97e9bdd6,0x97e9bdd6
+.long 0xcc434089,0xcc434089
+.long 0x779ed967,0x779ed967
+.long 0xbd42e8b0,0xbd42e8b0
+.long 0x888b8907,0x888b8907
+.long 0x385b19e7,0x385b19e7
+.long 0xdbeec879,0xdbeec879
+.long 0x470a7ca1,0x470a7ca1
+.long 0xe90f427c,0xe90f427c
+.long 0xc91e84f8,0xc91e84f8
+.long 0x00000000,0x00000000
+.long 0x83868009,0x83868009
+.long 0x48ed2b32,0x48ed2b32
+.long 0xac70111e,0xac70111e
+.long 0x4e725a6c,0x4e725a6c
+.long 0xfbff0efd,0xfbff0efd
+.long 0x5638850f,0x5638850f
+.long 0x1ed5ae3d,0x1ed5ae3d
+.long 0x27392d36,0x27392d36
+.long 0x64d90f0a,0x64d90f0a
+.long 0x21a65c68,0x21a65c68
+.long 0xd1545b9b,0xd1545b9b
+.long 0x3a2e3624,0x3a2e3624
+.long 0xb1670a0c,0xb1670a0c
+.long 0x0fe75793,0x0fe75793
+.long 0xd296eeb4,0xd296eeb4
+.long 0x9e919b1b,0x9e919b1b
+.long 0x4fc5c080,0x4fc5c080
+.long 0xa220dc61,0xa220dc61
+.long 0x694b775a,0x694b775a
+.long 0x161a121c,0x161a121c
+.long 0x0aba93e2,0x0aba93e2
+.long 0xe52aa0c0,0xe52aa0c0
+.long 0x43e0223c,0x43e0223c
+.long 0x1d171b12,0x1d171b12
+.long 0x0b0d090e,0x0b0d090e
+.long 0xadc78bf2,0xadc78bf2
+.long 0xb9a8b62d,0xb9a8b62d
+.long 0xc8a91e14,0xc8a91e14
+.long 0x8519f157,0x8519f157
+.long 0x4c0775af,0x4c0775af
+.long 0xbbdd99ee,0xbbdd99ee
+.long 0xfd607fa3,0xfd607fa3
+.long 0x9f2601f7,0x9f2601f7
+.long 0xbcf5725c,0xbcf5725c
+.long 0xc53b6644,0xc53b6644
+.long 0x347efb5b,0x347efb5b
+.long 0x7629438b,0x7629438b
+.long 0xdcc623cb,0xdcc623cb
+.long 0x68fcedb6,0x68fcedb6
+.long 0x63f1e4b8,0x63f1e4b8
+.long 0xcadc31d7,0xcadc31d7
+.long 0x10856342,0x10856342
+.long 0x40229713,0x40229713
+.long 0x2011c684,0x2011c684
+.long 0x7d244a85,0x7d244a85
+.long 0xf83dbbd2,0xf83dbbd2
+.long 0x1132f9ae,0x1132f9ae
+.long 0x6da129c7,0x6da129c7
+.long 0x4b2f9e1d,0x4b2f9e1d
+.long 0xf330b2dc,0xf330b2dc
+.long 0xec52860d,0xec52860d
+.long 0xd0e3c177,0xd0e3c177
+.long 0x6c16b32b,0x6c16b32b
+.long 0x99b970a9,0x99b970a9
+.long 0xfa489411,0xfa489411
+.long 0x2264e947,0x2264e947
+.long 0xc48cfca8,0xc48cfca8
+.long 0x1a3ff0a0,0x1a3ff0a0
+.long 0xd82c7d56,0xd82c7d56
+.long 0xef903322,0xef903322
+.long 0xc74e4987,0xc74e4987
+.long 0xc1d138d9,0xc1d138d9
+.long 0xfea2ca8c,0xfea2ca8c
+.long 0x360bd498,0x360bd498
+.long 0xcf81f5a6,0xcf81f5a6
+.long 0x28de7aa5,0x28de7aa5
+.long 0x268eb7da,0x268eb7da
+.long 0xa4bfad3f,0xa4bfad3f
+.long 0xe49d3a2c,0xe49d3a2c
+.long 0x0d927850,0x0d927850
+.long 0x9bcc5f6a,0x9bcc5f6a
+.long 0x62467e54,0x62467e54
+.long 0xc2138df6,0xc2138df6
+.long 0xe8b8d890,0xe8b8d890
+.long 0x5ef7392e,0x5ef7392e
+.long 0xf5afc382,0xf5afc382
+.long 0xbe805d9f,0xbe805d9f
+.long 0x7c93d069,0x7c93d069
+.long 0xa92dd56f,0xa92dd56f
+.long 0xb31225cf,0xb31225cf
+.long 0x3b99acc8,0x3b99acc8
+.long 0xa77d1810,0xa77d1810
+.long 0x6e639ce8,0x6e639ce8
+.long 0x7bbb3bdb,0x7bbb3bdb
+.long 0x097826cd,0x097826cd
+.long 0xf418596e,0xf418596e
+.long 0x01b79aec,0x01b79aec
+.long 0xa89a4f83,0xa89a4f83
+.long 0x656e95e6,0x656e95e6
+.long 0x7ee6ffaa,0x7ee6ffaa
+.long 0x08cfbc21,0x08cfbc21
+.long 0xe6e815ef,0xe6e815ef
+.long 0xd99be7ba,0xd99be7ba
+.long 0xce366f4a,0xce366f4a
+.long 0xd4099fea,0xd4099fea
+.long 0xd67cb029,0xd67cb029
+.long 0xafb2a431,0xafb2a431
+.long 0x31233f2a,0x31233f2a
+.long 0x3094a5c6,0x3094a5c6
+.long 0xc066a235,0xc066a235
+.long 0x37bc4e74,0x37bc4e74
+.long 0xa6ca82fc,0xa6ca82fc
+.long 0xb0d090e0,0xb0d090e0
+.long 0x15d8a733,0x15d8a733
+.long 0x4a9804f1,0x4a9804f1
+.long 0xf7daec41,0xf7daec41
+.long 0x0e50cd7f,0x0e50cd7f
+.long 0x2ff69117,0x2ff69117
+.long 0x8dd64d76,0x8dd64d76
+.long 0x4db0ef43,0x4db0ef43
+.long 0x544daacc,0x544daacc
+.long 0xdf0496e4,0xdf0496e4
+.long 0xe3b5d19e,0xe3b5d19e
+.long 0x1b886a4c,0x1b886a4c
+.long 0xb81f2cc1,0xb81f2cc1
+.long 0x7f516546,0x7f516546
+.long 0x04ea5e9d,0x04ea5e9d
+.long 0x5d358c01,0x5d358c01
+.long 0x737487fa,0x737487fa
+.long 0x2e410bfb,0x2e410bfb
+.long 0x5a1d67b3,0x5a1d67b3
+.long 0x52d2db92,0x52d2db92
+.long 0x335610e9,0x335610e9
+.long 0x1347d66d,0x1347d66d
+.long 0x8c61d79a,0x8c61d79a
+.long 0x7a0ca137,0x7a0ca137
+.long 0x8e14f859,0x8e14f859
+.long 0x893c13eb,0x893c13eb
+.long 0xee27a9ce,0xee27a9ce
+.long 0x35c961b7,0x35c961b7
+.long 0xede51ce1,0xede51ce1
+.long 0x3cb1477a,0x3cb1477a
+.long 0x59dfd29c,0x59dfd29c
+.long 0x3f73f255,0x3f73f255
+.long 0x79ce1418,0x79ce1418
+.long 0xbf37c773,0xbf37c773
+.long 0xeacdf753,0xeacdf753
+.long 0x5baafd5f,0x5baafd5f
+.long 0x146f3ddf,0x146f3ddf
+.long 0x86db4478,0x86db4478
+.long 0x81f3afca,0x81f3afca
+.long 0x3ec468b9,0x3ec468b9
+.long 0x2c342438,0x2c342438
+.long 0x5f40a3c2,0x5f40a3c2
+.long 0x72c31d16,0x72c31d16
+.long 0x0c25e2bc,0x0c25e2bc
+.long 0x8b493c28,0x8b493c28
+.long 0x41950dff,0x41950dff
+.long 0x7101a839,0x7101a839
+.long 0xdeb30c08,0xdeb30c08
+.long 0x9ce4b4d8,0x9ce4b4d8
+.long 0x90c15664,0x90c15664
+.long 0x6184cb7b,0x6184cb7b
+.long 0x70b632d5,0x70b632d5
+.long 0x745c6c48,0x745c6c48
+.long 0x4257b8d0,0x4257b8d0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align 6
+
+.def block_se_handler; .scl 3; .type 32; .endef
+.p2align 4
+block_se_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+
+ movq 8(%r9),%rsi
+ movq 56(%r9),%r11
+
+ movl 0(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jb .Lin_block_prologue
+
+ movq 152(%r8),%rax
+
+ movl 4(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jae .Lin_block_prologue
+
+ movq 24(%rax),%rax
+ leaq 48(%rax),%rax
+
+ movq -8(%rax),%rbx
+ movq -16(%rax),%rbp
+ movq -24(%rax),%r12
+ movq -32(%rax),%r13
+ movq -40(%rax),%r14
+ movq -48(%rax),%r15
+ movq %rbx,144(%r8)
+ movq %rbp,160(%r8)
+ movq %r12,216(%r8)
+ movq %r13,224(%r8)
+ movq %r14,232(%r8)
+ movq %r15,240(%r8)
+
+.Lin_block_prologue:
+ movq 8(%rax),%rdi
+ movq 16(%rax),%rsi
+ movq %rax,152(%r8)
+ movq %rsi,168(%r8)
+ movq %rdi,176(%r8)
+
+ jmp .Lcommon_seh_exit
+
+
+.def key_se_handler; .scl 3; .type 32; .endef
+.p2align 4
+key_se_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+
+ movq 8(%r9),%rsi
+ movq 56(%r9),%r11
+
+ movl 0(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jb .Lin_key_prologue
+
+ movq 152(%r8),%rax
+
+ movl 4(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jae .Lin_key_prologue
+
+ leaq 56(%rax),%rax
+
+ movq -8(%rax),%rbx
+ movq -16(%rax),%rbp
+ movq -24(%rax),%r12
+ movq -32(%rax),%r13
+ movq -40(%rax),%r14
+ movq -48(%rax),%r15
+ movq %rbx,144(%r8)
+ movq %rbp,160(%r8)
+ movq %r12,216(%r8)
+ movq %r13,224(%r8)
+ movq %r14,232(%r8)
+ movq %r15,240(%r8)
+
+.Lin_key_prologue:
+ movq 8(%rax),%rdi
+ movq 16(%rax),%rsi
+ movq %rax,152(%r8)
+ movq %rsi,168(%r8)
+ movq %rdi,176(%r8)
+
+ jmp .Lcommon_seh_exit
+
+
+.def cbc_se_handler; .scl 3; .type 32; .endef
+.p2align 4
+cbc_se_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+
+ leaq .Lcbc_prologue(%rip),%r10
+ cmpq %r10,%rbx
+ jb .Lin_cbc_prologue
+
+ leaq .Lcbc_fast_body(%rip),%r10
+ cmpq %r10,%rbx
+ jb .Lin_cbc_frame_setup
+
+ leaq .Lcbc_slow_prologue(%rip),%r10
+ cmpq %r10,%rbx
+ jb .Lin_cbc_body
+
+ leaq .Lcbc_slow_body(%rip),%r10
+ cmpq %r10,%rbx
+ jb .Lin_cbc_frame_setup
+
+.Lin_cbc_body:
+ movq 152(%r8),%rax
+
+ leaq .Lcbc_epilogue(%rip),%r10
+ cmpq %r10,%rbx
+ jae .Lin_cbc_prologue
+
+ leaq 8(%rax),%rax
+
+ leaq .Lcbc_popfq(%rip),%r10
+ cmpq %r10,%rbx
+ jae .Lin_cbc_prologue
+
+ movq 8(%rax),%rax
+ leaq 56(%rax),%rax
+
+.Lin_cbc_frame_setup:
+ movq -16(%rax),%rbx
+ movq -24(%rax),%rbp
+ movq -32(%rax),%r12
+ movq -40(%rax),%r13
+ movq -48(%rax),%r14
+ movq -56(%rax),%r15
+ movq %rbx,144(%r8)
+ movq %rbp,160(%r8)
+ movq %r12,216(%r8)
+ movq %r13,224(%r8)
+ movq %r14,232(%r8)
+ movq %r15,240(%r8)
+
+.Lin_cbc_prologue:
+ movq 8(%rax),%rdi
+ movq 16(%rax),%rsi
+ movq %rax,152(%r8)
+ movq %rsi,168(%r8)
+ movq %rdi,176(%r8)
+
+.Lcommon_seh_exit:
+
+ movq 40(%r9),%rdi
+ movq %r8,%rsi
+ movl $154,%ecx
+.long 0xa548f3fc
+
+ movq %r9,%rsi
+ xorq %rcx,%rcx
+ movq 8(%rsi),%rdx
+ movq 0(%rsi),%r8
+ movq 16(%rsi),%r9
+ movq 40(%rsi),%r10
+ leaq 56(%rsi),%r11
+ leaq 24(%rsi),%r12
+ movq %r10,32(%rsp)
+ movq %r11,40(%rsp)
+ movq %r12,48(%rsp)
+ movq %rcx,56(%rsp)
+ call *__imp_RtlVirtualUnwind(%rip)
+
+ movl $1,%eax
+ addq $64,%rsp
+ popfq
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ popq %rdi
+ popq %rsi
+ retq
+
+
+.section .pdata
+.p2align 2
+.rva .LSEH_begin_AES_encrypt
+.rva .LSEH_end_AES_encrypt
+.rva .LSEH_info_AES_encrypt
+
+.rva .LSEH_begin_AES_decrypt
+.rva .LSEH_end_AES_decrypt
+.rva .LSEH_info_AES_decrypt
+
+.rva .LSEH_begin_AES_set_encrypt_key
+.rva .LSEH_end_AES_set_encrypt_key
+.rva .LSEH_info_AES_set_encrypt_key
+
+.rva .LSEH_begin_AES_set_decrypt_key
+.rva .LSEH_end_AES_set_decrypt_key
+.rva .LSEH_info_AES_set_decrypt_key
+
+.rva .LSEH_begin_AES_cbc_encrypt
+.rva .LSEH_end_AES_cbc_encrypt
+.rva .LSEH_info_AES_cbc_encrypt
+
+.section .xdata
+.p2align 3
+.LSEH_info_AES_encrypt:
+.byte 9,0,0,0
+.rva block_se_handler
+.rva .Lenc_prologue,.Lenc_epilogue
+.LSEH_info_AES_decrypt:
+.byte 9,0,0,0
+.rva block_se_handler
+.rva .Ldec_prologue,.Ldec_epilogue
+.LSEH_info_AES_set_encrypt_key:
+.byte 9,0,0,0
+.rva key_se_handler
+.rva .Lenc_key_prologue,.Lenc_key_epilogue
+.LSEH_info_AES_set_decrypt_key:
+.byte 9,0,0,0
+.rva key_se_handler
+.rva .Ldec_key_prologue,.Ldec_key_epilogue
+.LSEH_info_AES_cbc_encrypt:
+.byte 9,0,0,0
+.rva cbc_se_handler
diff --git a/ext/libressl/crypto/aes/aes_cbc.c b/ext/libressl/crypto/aes/aes_cbc.c
new file mode 100644
index 0000000..5e76f6e
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes_cbc.c
@@ -0,0 +1,65 @@
+/* $OpenBSD: aes_cbc.c,v 1.12 2014/06/12 15:49:27 deraadt Exp $ */
+/* ====================================================================
+ * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+#include <openssl/aes.h>
+#include <openssl/modes.h>
+
+void
+AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const AES_KEY *key, unsigned char *ivec, const int enc)
+{
+ if (enc)
+ CRYPTO_cbc128_encrypt(in, out, len, key, ivec,
+ (block128_f)AES_encrypt);
+ else
+ CRYPTO_cbc128_decrypt(in, out, len, key, ivec,
+ (block128_f)AES_decrypt);
+}
diff --git a/ext/libressl/crypto/aes/aes_cfb.c b/ext/libressl/crypto/aes/aes_cfb.c
new file mode 100644
index 0000000..a6384f9
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes_cfb.c
@@ -0,0 +1,84 @@
+/* $OpenBSD: aes_cfb.c,v 1.8 2014/06/12 15:49:27 deraadt Exp $ */
+/* ====================================================================
+ * Copyright (c) 2002-2006 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+#include <openssl/aes.h>
+#include <openssl/modes.h>
+
+/* The input and output encrypted as though 128bit cfb mode is being
+ * used. The extra state information to record how much of the
+ * 128bit block we have used is contained in *num;
+ */
+
+void
+AES_cfb128_encrypt(const unsigned char *in, unsigned char *out, size_t length,
+ const AES_KEY *key, unsigned char *ivec, int *num, const int enc)
+{
+ CRYPTO_cfb128_encrypt(in, out, length, key, ivec, num, enc,
+ (block128_f)AES_encrypt);
+}
+
+/* N.B. This expects the input to be packed, MS bit first */
+void
+AES_cfb1_encrypt(const unsigned char *in, unsigned char *out, size_t length,
+ const AES_KEY *key, unsigned char *ivec, int *num, const int enc)
+{
+ CRYPTO_cfb128_1_encrypt(in, out, length, key, ivec, num, enc,
+ (block128_f)AES_encrypt);
+}
+
+void
+AES_cfb8_encrypt(const unsigned char *in, unsigned char *out, size_t length,
+ const AES_KEY *key, unsigned char *ivec, int *num, const int enc)
+{
+ CRYPTO_cfb128_8_encrypt(in, out, length, key, ivec, num, enc,
+ (block128_f)AES_encrypt);
+}
+
diff --git a/ext/libressl/crypto/aes/aes_core.c b/ext/libressl/crypto/aes/aes_core.c
new file mode 100644
index 0000000..1b8a24c
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes_core.c
@@ -0,0 +1,1374 @@
+/* $OpenBSD: aes_core.c,v 1.13 2015/11/05 21:59:13 miod Exp $ */
+/**
+ * rijndael-alg-fst.c
+ *
+ * @version 3.0 (December 2000)
+ *
+ * Optimised ANSI C code for the Rijndael cipher (now AES)
+ *
+ * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
+ * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
+ * @author Paulo Barreto <paulo.barreto@terra.com.br>
+ *
+ * This code is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* Note: rewritten a little bit to provide error control and an OpenSSL-
+ compatible API */
+
+#ifndef AES_DEBUG
+# ifndef NDEBUG
+# define NDEBUG
+# endif
+#endif
+
+#include <stdlib.h>
+#include <openssl/aes.h>
+#include "aes_locl.h"
+
+#ifndef AES_ASM
+/*
+Te0[x] = S [x].[02, 01, 01, 03];
+Te1[x] = S [x].[03, 02, 01, 01];
+Te2[x] = S [x].[01, 03, 02, 01];
+Te3[x] = S [x].[01, 01, 03, 02];
+
+Td0[x] = Si[x].[0e, 09, 0d, 0b];
+Td1[x] = Si[x].[0b, 0e, 09, 0d];
+Td2[x] = Si[x].[0d, 0b, 0e, 09];
+Td3[x] = Si[x].[09, 0d, 0b, 0e];
+Td4[x] = Si[x].[01];
+*/
+
+static const u32 Te0[256] = {
+ 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
+ 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
+ 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
+ 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
+ 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
+ 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
+ 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
+ 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
+ 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
+ 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
+ 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
+ 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
+ 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
+ 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
+ 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
+ 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
+ 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
+ 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
+ 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
+ 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
+ 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
+ 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
+ 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
+ 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
+ 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
+ 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
+ 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
+ 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
+ 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
+ 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
+ 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
+ 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
+ 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
+ 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
+ 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
+ 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
+ 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
+ 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
+ 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
+ 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
+ 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
+ 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
+ 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
+ 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
+ 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
+ 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
+ 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
+ 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
+ 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
+ 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
+ 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
+ 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
+ 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
+ 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
+ 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
+ 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
+ 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
+ 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
+ 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
+ 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
+ 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
+ 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
+ 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
+ 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
+};
+static const u32 Te1[256] = {
+ 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
+ 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
+ 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
+ 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
+ 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
+ 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
+ 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
+ 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
+ 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
+ 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
+ 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
+ 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
+ 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
+ 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
+ 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
+ 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
+ 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
+ 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
+ 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
+ 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
+ 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
+ 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
+ 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
+ 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
+ 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
+ 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
+ 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
+ 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
+ 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
+ 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
+ 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
+ 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
+ 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
+ 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
+ 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
+ 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
+ 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
+ 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
+ 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
+ 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
+ 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
+ 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
+ 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
+ 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
+ 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
+ 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
+ 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
+ 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
+ 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
+ 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
+ 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
+ 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
+ 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
+ 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
+ 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
+ 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
+ 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
+ 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
+ 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
+ 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
+ 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
+ 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
+ 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
+ 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
+};
+static const u32 Te2[256] = {
+ 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
+ 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
+ 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
+ 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
+ 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
+ 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
+ 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
+ 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
+ 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
+ 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
+ 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
+ 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
+ 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
+ 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
+ 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
+ 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
+ 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
+ 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
+ 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
+ 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
+ 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
+ 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
+ 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
+ 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
+ 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
+ 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
+ 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
+ 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
+ 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
+ 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
+ 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
+ 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
+ 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
+ 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
+ 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
+ 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
+ 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
+ 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
+ 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
+ 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
+ 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
+ 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
+ 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
+ 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
+ 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
+ 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
+ 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
+ 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
+ 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
+ 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
+ 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
+ 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
+ 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
+ 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
+ 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
+ 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
+ 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
+ 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
+ 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
+ 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
+ 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
+ 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
+ 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
+ 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
+};
+static const u32 Te3[256] = {
+ 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
+ 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
+ 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
+ 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
+ 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
+ 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
+ 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
+ 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
+ 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
+ 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
+ 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
+ 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
+ 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
+ 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
+ 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
+ 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
+ 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
+ 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
+ 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
+ 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
+ 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
+ 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
+ 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
+ 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
+ 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
+ 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
+ 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
+ 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
+ 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
+ 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
+ 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
+ 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
+ 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
+ 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
+ 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
+ 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
+ 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
+ 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
+ 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
+ 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
+ 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
+ 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
+ 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
+ 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
+ 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
+ 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
+ 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
+ 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
+ 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
+ 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
+ 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
+ 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
+ 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
+ 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
+ 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
+ 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
+ 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
+ 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
+ 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
+ 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
+ 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
+ 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
+ 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
+ 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
+};
+
+static const u32 Td0[256] = {
+ 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
+ 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
+ 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
+ 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
+ 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
+ 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
+ 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
+ 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
+ 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
+ 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
+ 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
+ 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
+ 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
+ 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
+ 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
+ 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
+ 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
+ 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
+ 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
+ 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
+ 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
+ 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
+ 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
+ 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
+ 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
+ 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
+ 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
+ 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
+ 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
+ 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
+ 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
+ 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
+ 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
+ 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
+ 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
+ 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
+ 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
+ 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
+ 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
+ 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
+ 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
+ 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
+ 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
+ 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
+ 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
+ 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
+ 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
+ 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
+ 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
+ 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
+ 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
+ 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
+ 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
+ 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
+ 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
+ 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
+ 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
+ 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
+ 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
+ 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
+ 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
+ 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
+ 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
+ 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
+};
+static const u32 Td1[256] = {
+ 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
+ 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
+ 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
+ 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
+ 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
+ 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
+ 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
+ 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
+ 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
+ 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
+ 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
+ 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
+ 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
+ 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
+ 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
+ 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
+ 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
+ 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
+ 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
+ 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
+ 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
+ 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
+ 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
+ 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
+ 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
+ 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
+ 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
+ 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
+ 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
+ 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
+ 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
+ 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
+ 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
+ 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
+ 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
+ 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
+ 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
+ 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
+ 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
+ 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
+ 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
+ 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
+ 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
+ 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
+ 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
+ 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
+ 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
+ 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
+ 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
+ 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
+ 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
+ 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
+ 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
+ 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
+ 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
+ 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
+ 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
+ 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
+ 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
+ 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
+ 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
+ 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
+ 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
+ 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
+};
+static const u32 Td2[256] = {
+ 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
+ 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
+ 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
+ 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
+ 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
+ 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
+ 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
+ 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
+ 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
+ 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
+ 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
+ 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
+ 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
+ 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
+ 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
+ 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
+ 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
+ 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
+ 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
+ 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
+ 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
+ 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
+ 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
+ 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
+ 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
+ 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
+ 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
+ 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
+ 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
+ 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
+ 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
+ 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
+ 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
+ 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
+ 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
+ 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
+ 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
+ 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
+ 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
+ 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
+ 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
+ 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
+ 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
+ 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
+ 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
+ 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
+ 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
+ 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
+ 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
+ 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
+ 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
+ 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
+ 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
+ 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
+ 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
+ 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
+ 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
+ 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
+ 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
+ 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
+ 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
+ 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
+ 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
+ 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
+};
+static const u32 Td3[256] = {
+ 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
+ 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
+ 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
+ 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
+ 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
+ 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
+ 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
+ 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
+ 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
+ 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
+ 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
+ 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
+ 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
+ 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
+ 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
+ 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
+ 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
+ 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
+ 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
+ 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
+ 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
+ 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
+ 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
+ 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
+ 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
+ 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
+ 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
+ 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
+ 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
+ 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
+ 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
+ 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
+ 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
+ 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
+ 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
+ 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
+ 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
+ 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
+ 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
+ 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
+ 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
+ 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
+ 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
+ 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
+ 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
+ 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
+ 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
+ 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
+ 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
+ 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
+ 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
+ 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
+ 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
+ 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
+ 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
+ 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
+ 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
+ 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
+ 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
+ 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
+ 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
+ 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
+ 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
+ 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
+};
+static const u8 Td4[256] = {
+ 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
+ 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
+ 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
+ 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
+ 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
+ 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
+ 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
+ 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
+ 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
+ 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
+ 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
+ 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
+ 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
+ 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
+ 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
+ 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
+ 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
+ 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
+ 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
+ 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
+ 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
+ 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
+ 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
+ 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
+ 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
+ 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
+ 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
+ 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
+ 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
+ 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
+ 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
+ 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU,
+};
+static const u32 rcon[] = {
+ 0x01000000, 0x02000000, 0x04000000, 0x08000000,
+ 0x10000000, 0x20000000, 0x40000000, 0x80000000,
+ 0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
+};
+
+/**
+ * Expand the cipher key into the encryption key schedule.
+ */
+int
+AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key)
+{
+ u32 *rk;
+ int i = 0;
+ u32 temp;
+
+ if (!userKey || !key)
+ return -1;
+ if (bits != 128 && bits != 192 && bits != 256)
+ return -2;
+
+ rk = key->rd_key;
+
+ if (bits == 128)
+ key->rounds = 10;
+ else if (bits == 192)
+ key->rounds = 12;
+ else
+ key->rounds = 14;
+
+ rk[0] = GETU32(userKey);
+ rk[1] = GETU32(userKey + 4);
+ rk[2] = GETU32(userKey + 8);
+ rk[3] = GETU32(userKey + 12);
+ if (bits == 128) {
+ while (1) {
+ temp = rk[3];
+ rk[4] = rk[0] ^
+ (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
+ (Te3[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (Te0[(temp) & 0xff] & 0x0000ff00) ^
+ (Te1[(temp >> 24)] & 0x000000ff) ^
+ rcon[i];
+ rk[5] = rk[1] ^ rk[4];
+ rk[6] = rk[2] ^ rk[5];
+ rk[7] = rk[3] ^ rk[6];
+ if (++i == 10) {
+ return 0;
+ }
+ rk += 4;
+ }
+ }
+ rk[4] = GETU32(userKey + 16);
+ rk[5] = GETU32(userKey + 20);
+ if (bits == 192) {
+ while (1) {
+ temp = rk[5];
+ rk[6] = rk[ 0] ^
+ (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
+ (Te3[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (Te0[(temp) & 0xff] & 0x0000ff00) ^
+ (Te1[(temp >> 24)] & 0x000000ff) ^
+ rcon[i];
+ rk[7] = rk[1] ^ rk[6];
+ rk[8] = rk[2] ^ rk[7];
+ rk[9] = rk[3] ^ rk[8];
+ if (++i == 8) {
+ return 0;
+ }
+ rk[10] = rk[4] ^ rk[9];
+ rk[11] = rk[5] ^ rk[10];
+ rk += 6;
+ }
+ }
+ rk[6] = GETU32(userKey + 24);
+ rk[7] = GETU32(userKey + 28);
+ if (bits == 256) {
+ while (1) {
+ temp = rk[7];
+ rk[8] = rk[0] ^
+ (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
+ (Te3[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (Te0[(temp) & 0xff] & 0x0000ff00) ^
+ (Te1[(temp >> 24)] & 0x000000ff) ^
+ rcon[i];
+ rk[9] = rk[1] ^ rk[8];
+ rk[10] = rk[2] ^ rk[9];
+ rk[11] = rk[3] ^ rk[10];
+ if (++i == 7) {
+ return 0;
+ }
+ temp = rk[11];
+ rk[12] = rk[4] ^
+ (Te2[(temp >> 24)] & 0xff000000) ^
+ (Te3[(temp >> 16) & 0xff] & 0x00ff0000) ^
+ (Te0[(temp >> 8) & 0xff] & 0x0000ff00) ^
+ (Te1[(temp) & 0xff] & 0x000000ff);
+ rk[13] = rk[5] ^ rk[12];
+ rk[14] = rk[6] ^ rk[13];
+ rk[15] = rk[7] ^ rk[14];
+
+ rk += 8;
+ }
+ }
+ return 0;
+}
+
+/**
+ * Expand the cipher key into the decryption key schedule.
+ */
+int
+AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key)
+{
+ u32 *rk;
+ int i, j, status;
+ u32 temp;
+
+ /* first, start with an encryption schedule */
+ status = AES_set_encrypt_key(userKey, bits, key);
+ if (status < 0)
+ return status;
+
+ rk = key->rd_key;
+
+ /* invert the order of the round keys: */
+ for (i = 0, j = 4 * (key->rounds); i < j; i += 4, j -= 4) {
+ temp = rk[i];
+ rk[i] = rk[j];
+ rk[j] = temp;
+ temp = rk[i + 1];
+ rk[i + 1] = rk[j + 1];
+ rk[j + 1] = temp;
+ temp = rk[i + 2];
+ rk[i + 2] = rk[j + 2];
+ rk[j + 2] = temp;
+ temp = rk[i + 3];
+ rk[i + 3] = rk[j + 3];
+ rk[j + 3] = temp;
+ }
+ /* apply the inverse MixColumn transform to all round keys but the first and the last: */
+ for (i = 1; i < (key->rounds); i++) {
+ rk += 4;
+ rk[0] =
+ Td0[Te1[(rk[0] >> 24)] & 0xff] ^
+ Td1[Te1[(rk[0] >> 16) & 0xff] & 0xff] ^
+ Td2[Te1[(rk[0] >> 8) & 0xff] & 0xff] ^
+ Td3[Te1[(rk[0]) & 0xff] & 0xff];
+ rk[1] =
+ Td0[Te1[(rk[1] >> 24)] & 0xff] ^
+ Td1[Te1[(rk[1] >> 16) & 0xff] & 0xff] ^
+ Td2[Te1[(rk[1] >> 8) & 0xff] & 0xff] ^
+ Td3[Te1[(rk[1]) & 0xff] & 0xff];
+ rk[2] =
+ Td0[Te1[(rk[2] >> 24)] & 0xff] ^
+ Td1[Te1[(rk[2] >> 16) & 0xff] & 0xff] ^
+ Td2[Te1[(rk[2] >> 8) & 0xff] & 0xff] ^
+ Td3[Te1[(rk[2]) & 0xff] & 0xff];
+ rk[3] =
+ Td0[Te1[(rk[3] >> 24)] & 0xff] ^
+ Td1[Te1[(rk[3] >> 16) & 0xff] & 0xff] ^
+ Td2[Te1[(rk[3] >> 8) & 0xff] & 0xff] ^
+ Td3[Te1[(rk[3]) & 0xff] & 0xff];
+ }
+ return 0;
+}
+
+/*
+ * Encrypt a single block
+ * in and out can overlap
+ */
+void
+AES_encrypt(const unsigned char *in, unsigned char *out, const AES_KEY *key)
+{
+ const u32 *rk;
+ u32 s0, s1, s2, s3, t0, t1, t2, t3;
+#ifndef FULL_UNROLL
+ int r;
+#endif /* ?FULL_UNROLL */
+
+ rk = key->rd_key;
+
+ /*
+ * map byte array block to cipher state
+ * and add initial round key:
+ */
+ s0 = GETU32(in ) ^ rk[0];
+ s1 = GETU32(in + 4) ^ rk[1];
+ s2 = GETU32(in + 8) ^ rk[2];
+ s3 = GETU32(in + 12) ^ rk[3];
+#ifdef FULL_UNROLL
+ /* round 1: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
+ /* round 2: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
+ /* round 3: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
+ /* round 4: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
+ /* round 5: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
+ /* round 6: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
+ /* round 7: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
+ /* round 8: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
+ /* round 9: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
+ if (key->rounds > 10) {
+ /* round 10: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
+ /* round 11: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
+ if (key->rounds > 12) {
+ /* round 12: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
+ /* round 13: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
+ }
+ }
+ rk += key->rounds << 2;
+#else /* !FULL_UNROLL */
+ /*
+ * Nr - 1 full rounds:
+ */
+ r = key->rounds >> 1;
+ for (;;) {
+ t0 =
+ Te0[(s0 >> 24)] ^
+ Te1[(s1 >> 16) & 0xff] ^
+ Te2[(s2 >> 8) & 0xff] ^
+ Te3[(s3) & 0xff] ^
+ rk[4];
+ t1 =
+ Te0[(s1 >> 24)] ^
+ Te1[(s2 >> 16) & 0xff] ^
+ Te2[(s3 >> 8) & 0xff] ^
+ Te3[(s0) & 0xff] ^
+ rk[5];
+ t2 =
+ Te0[(s2 >> 24)] ^
+ Te1[(s3 >> 16) & 0xff] ^
+ Te2[(s0 >> 8) & 0xff] ^
+ Te3[(s1) & 0xff] ^
+ rk[6];
+ t3 =
+ Te0[(s3 >> 24)] ^
+ Te1[(s0 >> 16) & 0xff] ^
+ Te2[(s1 >> 8) & 0xff] ^
+ Te3[(s2) & 0xff] ^
+ rk[7];
+
+ rk += 8;
+ if (--r == 0) {
+ break;
+ }
+
+ s0 =
+ Te0[(t0 >> 24)] ^
+ Te1[(t1 >> 16) & 0xff] ^
+ Te2[(t2 >> 8) & 0xff] ^
+ Te3[(t3) & 0xff] ^
+ rk[0];
+ s1 =
+ Te0[(t1 >> 24)] ^
+ Te1[(t2 >> 16) & 0xff] ^
+ Te2[(t3 >> 8) & 0xff] ^
+ Te3[(t0) & 0xff] ^
+ rk[1];
+ s2 =
+ Te0[(t2 >> 24)] ^
+ Te1[(t3 >> 16) & 0xff] ^
+ Te2[(t0 >> 8) & 0xff] ^
+ Te3[(t1) & 0xff] ^
+ rk[2];
+ s3 =
+ Te0[(t3 >> 24)] ^
+ Te1[(t0 >> 16) & 0xff] ^
+ Te2[(t1 >> 8) & 0xff] ^
+ Te3[(t2) & 0xff] ^
+ rk[3];
+ }
+#endif /* ?FULL_UNROLL */
+ /*
+ * apply last round and
+ * map cipher state to byte array block:
+ */
+ s0 =
+ (Te2[(t0 >> 24)] & 0xff000000) ^
+ (Te3[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te0[(t2 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te1[(t3) & 0xff] & 0x000000ff) ^
+ rk[0];
+ PUTU32(out, s0);
+ s1 =
+ (Te2[(t1 >> 24)] & 0xff000000) ^
+ (Te3[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te0[(t3 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te1[(t0) & 0xff] & 0x000000ff) ^
+ rk[1];
+ PUTU32(out + 4, s1);
+ s2 =
+ (Te2[(t2 >> 24)] & 0xff000000) ^
+ (Te3[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te0[(t0 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te1[(t1) & 0xff] & 0x000000ff) ^
+ rk[2];
+ PUTU32(out + 8, s2);
+ s3 =
+ (Te2[(t3 >> 24)] & 0xff000000) ^
+ (Te3[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te0[(t1 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te1[(t2) & 0xff] & 0x000000ff) ^
+ rk[3];
+ PUTU32(out + 12, s3);
+}
+
+/*
+ * Decrypt a single block
+ * in and out can overlap
+ */
+void
+AES_decrypt(const unsigned char *in, unsigned char *out, const AES_KEY *key)
+{
+ const u32 *rk;
+ u32 s0, s1, s2, s3, t0, t1, t2, t3;
+#ifndef FULL_UNROLL
+ int r;
+#endif /* ?FULL_UNROLL */
+
+ rk = key->rd_key;
+
+ /*
+ * map byte array block to cipher state
+ * and add initial round key:
+ */
+ s0 = GETU32(in) ^ rk[0];
+ s1 = GETU32(in + 4) ^ rk[1];
+ s2 = GETU32(in + 8) ^ rk[2];
+ s3 = GETU32(in + 12) ^ rk[3];
+#ifdef FULL_UNROLL
+ /* round 1: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
+ /* round 2: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
+ /* round 3: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
+ /* round 4: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
+ /* round 5: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
+ /* round 6: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
+ /* round 7: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
+ /* round 8: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
+ /* round 9: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
+ if (key->rounds > 10) {
+ /* round 10: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
+ /* round 11: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
+ if (key->rounds > 12) {
+ /* round 12: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
+ /* round 13: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
+ }
+ }
+ rk += key->rounds << 2;
+#else /* !FULL_UNROLL */
+ /*
+ * Nr - 1 full rounds:
+ */
+ r = key->rounds >> 1;
+ for (;;) {
+ t0 =
+ Td0[(s0 >> 24)] ^
+ Td1[(s3 >> 16) & 0xff] ^
+ Td2[(s2 >> 8) & 0xff] ^
+ Td3[(s1) & 0xff] ^
+ rk[4];
+ t1 =
+ Td0[(s1 >> 24)] ^
+ Td1[(s0 >> 16) & 0xff] ^
+ Td2[(s3 >> 8) & 0xff] ^
+ Td3[(s2) & 0xff] ^
+ rk[5];
+ t2 =
+ Td0[(s2 >> 24)] ^
+ Td1[(s1 >> 16) & 0xff] ^
+ Td2[(s0 >> 8) & 0xff] ^
+ Td3[(s3) & 0xff] ^
+ rk[6];
+ t3 =
+ Td0[(s3 >> 24)] ^
+ Td1[(s2 >> 16) & 0xff] ^
+ Td2[(s1 >> 8) & 0xff] ^
+ Td3[(s0) & 0xff] ^
+ rk[7];
+
+ rk += 8;
+ if (--r == 0) {
+ break;
+ }
+
+ s0 =
+ Td0[(t0 >> 24)] ^
+ Td1[(t3 >> 16) & 0xff] ^
+ Td2[(t2 >> 8) & 0xff] ^
+ Td3[(t1) & 0xff] ^
+ rk[0];
+ s1 =
+ Td0[(t1 >> 24)] ^
+ Td1[(t0 >> 16) & 0xff] ^
+ Td2[(t3 >> 8) & 0xff] ^
+ Td3[(t2) & 0xff] ^
+ rk[1];
+ s2 =
+ Td0[(t2 >> 24)] ^
+ Td1[(t1 >> 16) & 0xff] ^
+ Td2[(t0 >> 8) & 0xff] ^
+ Td3[(t3) & 0xff] ^
+ rk[2];
+ s3 =
+ Td0[(t3 >> 24)] ^
+ Td1[(t2 >> 16) & 0xff] ^
+ Td2[(t1 >> 8) & 0xff] ^
+ Td3[(t0) & 0xff] ^
+ rk[3];
+ }
+#endif /* ?FULL_UNROLL */
+ /*
+ * apply last round and
+ * map cipher state to byte array block:
+ */
+ s0 =
+ (((uint32_t)Td4[(t0 >> 24)]) << 24) ^
+ (Td4[(t3 >> 16) & 0xff] << 16) ^
+ (Td4[(t2 >> 8) & 0xff] << 8) ^
+ (Td4[(t1) & 0xff]) ^
+ rk[0];
+ PUTU32(out, s0);
+ s1 =
+ (((uint32_t)Td4[(t1 >> 24)]) << 24) ^
+ (Td4[(t0 >> 16) & 0xff] << 16) ^
+ (Td4[(t3 >> 8) & 0xff] << 8) ^
+ (Td4[(t2) & 0xff]) ^
+ rk[1];
+ PUTU32(out + 4, s1);
+ s2 =
+ (((uint32_t)Td4[(t2 >> 24)]) << 24) ^
+ (Td4[(t1 >> 16) & 0xff] << 16) ^
+ (Td4[(t0 >> 8) & 0xff] << 8) ^
+ (Td4[(t3) & 0xff]) ^
+ rk[2];
+ PUTU32(out + 8, s2);
+ s3 =
+ (((uint32_t)Td4[(t3 >> 24)]) << 24) ^
+ (Td4[(t2 >> 16) & 0xff] << 16) ^
+ (Td4[(t1 >> 8) & 0xff] << 8) ^
+ (Td4[(t0) & 0xff]) ^
+ rk[3];
+ PUTU32(out + 12, s3);
+}
+
+#else /* AES_ASM */
+
+static const u8 Te4[256] = {
+ 0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U,
+ 0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U,
+ 0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U,
+ 0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U,
+ 0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU,
+ 0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U,
+ 0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU,
+ 0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U,
+ 0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U,
+ 0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U,
+ 0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU,
+ 0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU,
+ 0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U,
+ 0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U,
+ 0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U,
+ 0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U,
+ 0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U,
+ 0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U,
+ 0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U,
+ 0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU,
+ 0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU,
+ 0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U,
+ 0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U,
+ 0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U,
+ 0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U,
+ 0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU,
+ 0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU,
+ 0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU,
+ 0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U,
+ 0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU,
+ 0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U,
+ 0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U
+};
+static const u32 rcon[] = {
+ 0x01000000, 0x02000000, 0x04000000, 0x08000000,
+ 0x10000000, 0x20000000, 0x40000000, 0x80000000,
+ 0x1B000000, 0x36000000,
+ /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
+};
+
+/**
+ * Expand the cipher key into the encryption key schedule.
+ */
+int
+AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key)
+{
+ u32 *rk;
+ int i = 0;
+ u32 temp;
+
+ if (!userKey || !key)
+ return -1;
+ if (bits != 128 && bits != 192 && bits != 256)
+ return -2;
+
+ rk = key->rd_key;
+
+ if (bits == 128)
+ key->rounds = 10;
+ else if (bits == 192)
+ key->rounds = 12;
+ else
+ key->rounds = 14;
+
+ rk[0] = GETU32(userKey);
+ rk[1] = GETU32(userKey + 4);
+ rk[2] = GETU32(userKey + 8);
+ rk[3] = GETU32(userKey + 12);
+ if (bits == 128) {
+ while (1) {
+ temp = rk[3];
+ rk[4] = rk[0] ^
+ (Te4[(temp >> 16) & 0xff] << 24) ^
+ (Te4[(temp >> 8) & 0xff] << 16) ^
+ (Te4[(temp) & 0xff] << 8) ^
+ (Te4[(temp >> 24)]) ^
+ rcon[i];
+ rk[5] = rk[1] ^ rk[4];
+ rk[6] = rk[2] ^ rk[5];
+ rk[7] = rk[3] ^ rk[6];
+ if (++i == 10) {
+ return 0;
+ }
+ rk += 4;
+ }
+ }
+ rk[4] = GETU32(userKey + 16);
+ rk[5] = GETU32(userKey + 20);
+ if (bits == 192) {
+ while (1) {
+ temp = rk[5];
+ rk[6] = rk[0] ^
+ (Te4[(temp >> 16) & 0xff] << 24) ^
+ (Te4[(temp >> 8) & 0xff] << 16) ^
+ (Te4[(temp) & 0xff] << 8) ^
+ (Te4[(temp >> 24)]) ^
+ rcon[i];
+ rk[7] = rk[1] ^ rk[6];
+ rk[8] = rk[2] ^ rk[7];
+ rk[9] = rk[3] ^ rk[8];
+ if (++i == 8) {
+ return 0;
+ }
+ rk[10] = rk[4] ^ rk[9];
+ rk[11] = rk[5] ^ rk[10];
+ rk += 6;
+ }
+ }
+ rk[6] = GETU32(userKey + 24);
+ rk[7] = GETU32(userKey + 28);
+ if (bits == 256) {
+ while (1) {
+ temp = rk[7];
+ rk[8] = rk[0] ^
+ (Te4[(temp >> 16) & 0xff] << 24) ^
+ (Te4[(temp >> 8) & 0xff] << 16) ^
+ (Te4[(temp) & 0xff] << 8) ^
+ (Te4[(temp >> 24)]) ^
+ rcon[i];
+ rk[9] = rk[1] ^ rk[8];
+ rk[10] = rk[2] ^ rk[9];
+ rk[11] = rk[3] ^ rk[10];
+ if (++i == 7) {
+ return 0;
+ }
+ temp = rk[11];
+ rk[12] = rk[4] ^
+ (Te4[(temp >> 24)] << 24) ^
+ (Te4[(temp >> 16) & 0xff] << 16) ^
+ (Te4[(temp >> 8) & 0xff] << 8) ^
+ (Te4[(temp) & 0xff]);
+ rk[13] = rk[5] ^ rk[12];
+ rk[14] = rk[6] ^ rk[13];
+ rk[15] = rk[7] ^ rk[14];
+
+ rk += 8;
+ }
+ }
+ return 0;
+}
+
+/**
+ * Expand the cipher key into the decryption key schedule.
+ */
+int
+AES_set_decrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key)
+{
+ u32 *rk;
+ int i, j, status;
+ u32 temp;
+
+ /* first, start with an encryption schedule */
+ status = AES_set_encrypt_key(userKey, bits, key);
+ if (status < 0)
+ return status;
+
+ rk = key->rd_key;
+
+ /* invert the order of the round keys: */
+ for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
+ temp = rk[i];
+ rk[i] = rk[j];
+ rk[j] = temp;
+ temp = rk[i + 1];
+ rk[i + 1] = rk[j + 1];
+ rk[j + 1] = temp;
+ temp = rk[i + 2];
+ rk[i + 2] = rk[j + 2];
+ rk[j + 2] = temp;
+ temp = rk[i + 3];
+ rk[i + 3] = rk[j + 3];
+ rk[j + 3] = temp;
+ }
+ /* apply the inverse MixColumn transform to all round keys but the first and the last: */
+ for (i = 1; i < (key->rounds); i++) {
+ rk += 4;
+ for (j = 0; j < 4; j++) {
+ u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
+
+ tp1 = rk[j];
+ m = tp1 & 0x80808080;
+ tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
+ ((m - (m >> 7)) & 0x1b1b1b1b);
+ m = tp2 & 0x80808080;
+ tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
+ ((m - (m >> 7)) & 0x1b1b1b1b);
+ m = tp4 & 0x80808080;
+ tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
+ ((m - (m >> 7)) & 0x1b1b1b1b);
+ tp9 = tp8 ^ tp1;
+ tpb = tp9 ^ tp2;
+ tpd = tp9 ^ tp4;
+ tpe = tp8 ^ tp4 ^ tp2;
+#if defined(ROTATE)
+ rk[j] = tpe ^ ROTATE(tpd, 16) ^
+ ROTATE(tp9, 24) ^ ROTATE(tpb, 8);
+#else
+ rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
+ (tp9 >> 8) ^ (tp9 << 24) ^
+ (tpb >> 24) ^ (tpb << 8);
+#endif
+ }
+ }
+ return 0;
+}
+
+#endif /* AES_ASM */
diff --git a/ext/libressl/crypto/aes/aes_ctr.c b/ext/libressl/crypto/aes/aes_ctr.c
new file mode 100644
index 0000000..6079145
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes_ctr.c
@@ -0,0 +1,62 @@
+/* $OpenBSD: aes_ctr.c,v 1.9 2014/06/12 15:49:27 deraadt Exp $ */
+/* ====================================================================
+ * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+#include <openssl/aes.h>
+#include <openssl/modes.h>
+
+void
+AES_ctr128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t length, const AES_KEY *key, unsigned char ivec[AES_BLOCK_SIZE],
+ unsigned char ecount_buf[AES_BLOCK_SIZE], unsigned int *num)
+{
+ CRYPTO_ctr128_encrypt(in, out, length, key, ivec, ecount_buf, num,
+ (block128_f)AES_encrypt);
+}
diff --git a/ext/libressl/crypto/aes/aes_ecb.c b/ext/libressl/crypto/aes/aes_ecb.c
new file mode 100644
index 0000000..b05e539
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes_ecb.c
@@ -0,0 +1,69 @@
+/* $OpenBSD: aes_ecb.c,v 1.6 2015/02/10 09:46:30 miod Exp $ */
+/* ====================================================================
+ * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+#ifndef AES_DEBUG
+# ifndef NDEBUG
+# define NDEBUG
+# endif
+#endif
+
+#include <openssl/aes.h>
+#include "aes_locl.h"
+
+void
+AES_ecb_encrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key, const int enc)
+{
+ if (AES_ENCRYPT == enc)
+ AES_encrypt(in, out, key);
+ else
+ AES_decrypt(in, out, key);
+}
diff --git a/ext/libressl/crypto/aes/aes_ige.c b/ext/libressl/crypto/aes/aes_ige.c
new file mode 100644
index 0000000..85b7f69
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes_ige.c
@@ -0,0 +1,194 @@
+/* $OpenBSD: aes_ige.c,v 1.7 2015/02/10 09:46:30 miod Exp $ */
+/* ====================================================================
+ * Copyright (c) 2006 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+#include <openssl/aes.h>
+#include <openssl/crypto.h>
+
+#include "aes_locl.h"
+
+#define N_WORDS (AES_BLOCK_SIZE / sizeof(unsigned long))
+typedef struct {
+ unsigned long data[N_WORDS];
+} aes_block_t;
+
+/* XXX: probably some better way to do this */
+#if defined(__i386__) || defined(__x86_64__)
+#define UNALIGNED_MEMOPS_ARE_FAST 1
+#else
+#define UNALIGNED_MEMOPS_ARE_FAST 0
+#endif
+
+#if UNALIGNED_MEMOPS_ARE_FAST
+#define load_block(d, s) (d) = *(const aes_block_t *)(s)
+#define store_block(d, s) *(aes_block_t *)(d) = (s)
+#else
+#define load_block(d, s) memcpy((d).data, (s), AES_BLOCK_SIZE)
+#define store_block(d, s) memcpy((d), (s).data, AES_BLOCK_SIZE)
+#endif
+
+/* N.B. The IV for this mode is _twice_ the block size */
+
+void
+AES_ige_encrypt(const unsigned char *in, unsigned char *out, size_t length,
+ const AES_KEY *key, unsigned char *ivec, const int enc)
+{
+ size_t n;
+ size_t len;
+
+ OPENSSL_assert((length % AES_BLOCK_SIZE) == 0);
+
+ len = length / AES_BLOCK_SIZE;
+
+ if (AES_ENCRYPT == enc) {
+ if (in != out && (UNALIGNED_MEMOPS_ARE_FAST ||
+ ((size_t)in|(size_t)out|(size_t)ivec) %
+ sizeof(long) == 0)) {
+ aes_block_t *ivp = (aes_block_t *)ivec;
+ aes_block_t *iv2p = (aes_block_t *)(ivec + AES_BLOCK_SIZE);
+
+ while (len) {
+ aes_block_t *inp = (aes_block_t *)in;
+ aes_block_t *outp = (aes_block_t *)out;
+
+ for (n = 0; n < N_WORDS; ++n)
+ outp->data[n] = inp->data[n] ^ ivp->data[n];
+ AES_encrypt((unsigned char *)outp->data, (unsigned char *)outp->data, key);
+ for (n = 0; n < N_WORDS; ++n)
+ outp->data[n] ^= iv2p->data[n];
+ ivp = outp;
+ iv2p = inp;
+ --len;
+ in += AES_BLOCK_SIZE;
+ out += AES_BLOCK_SIZE;
+ }
+ memcpy(ivec, ivp->data, AES_BLOCK_SIZE);
+ memcpy(ivec + AES_BLOCK_SIZE, iv2p->data, AES_BLOCK_SIZE);
+ } else {
+ aes_block_t tmp, tmp2;
+ aes_block_t iv;
+ aes_block_t iv2;
+
+ load_block(iv, ivec);
+ load_block(iv2, ivec + AES_BLOCK_SIZE);
+
+ while (len) {
+ load_block(tmp, in);
+ for (n = 0; n < N_WORDS; ++n)
+ tmp2.data[n] = tmp.data[n] ^ iv.data[n];
+ AES_encrypt((unsigned char *)tmp2.data,
+ (unsigned char *)tmp2.data, key);
+ for (n = 0; n < N_WORDS; ++n)
+ tmp2.data[n] ^= iv2.data[n];
+ store_block(out, tmp2);
+ iv = tmp2;
+ iv2 = tmp;
+ --len;
+ in += AES_BLOCK_SIZE;
+ out += AES_BLOCK_SIZE;
+ }
+ memcpy(ivec, iv.data, AES_BLOCK_SIZE);
+ memcpy(ivec + AES_BLOCK_SIZE, iv2.data, AES_BLOCK_SIZE);
+ }
+ } else {
+ if (in != out && (UNALIGNED_MEMOPS_ARE_FAST ||
+ ((size_t)in|(size_t)out|(size_t)ivec) %
+ sizeof(long) == 0)) {
+ aes_block_t *ivp = (aes_block_t *)ivec;
+ aes_block_t *iv2p = (aes_block_t *)(ivec + AES_BLOCK_SIZE);
+
+ while (len) {
+ aes_block_t tmp;
+ aes_block_t *inp = (aes_block_t *)in;
+ aes_block_t *outp = (aes_block_t *)out;
+
+ for (n = 0; n < N_WORDS; ++n)
+ tmp.data[n] = inp->data[n] ^ iv2p->data[n];
+ AES_decrypt((unsigned char *)tmp.data,
+ (unsigned char *)outp->data, key);
+ for (n = 0; n < N_WORDS; ++n)
+ outp->data[n] ^= ivp->data[n];
+ ivp = inp;
+ iv2p = outp;
+ --len;
+ in += AES_BLOCK_SIZE;
+ out += AES_BLOCK_SIZE;
+ }
+ memcpy(ivec, ivp->data, AES_BLOCK_SIZE);
+ memcpy(ivec + AES_BLOCK_SIZE, iv2p->data, AES_BLOCK_SIZE);
+ } else {
+ aes_block_t tmp, tmp2;
+ aes_block_t iv;
+ aes_block_t iv2;
+
+ load_block(iv, ivec);
+ load_block(iv2, ivec + AES_BLOCK_SIZE);
+
+ while (len) {
+ load_block(tmp, in);
+ tmp2 = tmp;
+ for (n = 0; n < N_WORDS; ++n)
+ tmp.data[n] ^= iv2.data[n];
+ AES_decrypt((unsigned char *)tmp.data,
+ (unsigned char *)tmp.data, key);
+ for (n = 0; n < N_WORDS; ++n)
+ tmp.data[n] ^= iv.data[n];
+ store_block(out, tmp);
+ iv = tmp2;
+ iv2 = tmp;
+ --len;
+ in += AES_BLOCK_SIZE;
+ out += AES_BLOCK_SIZE;
+ }
+ memcpy(ivec, iv.data, AES_BLOCK_SIZE);
+ memcpy(ivec + AES_BLOCK_SIZE, iv2.data, AES_BLOCK_SIZE);
+ }
+ }
+}
diff --git a/ext/libressl/crypto/aes/aes_locl.h b/ext/libressl/crypto/aes/aes_locl.h
new file mode 100644
index 0000000..c47f65d
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes_locl.h
@@ -0,0 +1,83 @@
+/* $OpenBSD: aes_locl.h,v 1.11 2016/12/21 15:49:29 jsing Exp $ */
+/* ====================================================================
+ * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+#ifndef HEADER_AES_LOCL_H
+#define HEADER_AES_LOCL_H
+
+#include <openssl/opensslconf.h>
+
+#ifdef OPENSSL_NO_AES
+#error AES is disabled.
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+__BEGIN_HIDDEN_DECLS
+
+#define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] << 8) ^ ((u32)(pt)[3]))
+#define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >> 8); (ct)[3] = (u8)(st); }
+
+typedef unsigned int u32;
+typedef unsigned short u16;
+typedef unsigned char u8;
+
+#define MAXKC (256/32)
+#define MAXKB (256/8)
+#define MAXNR 14
+
+/* This controls loop-unrolling in aes_core.c */
+#undef FULL_UNROLL
+
+__END_HIDDEN_DECLS
+
+#endif /* !HEADER_AES_LOCL_H */
diff --git a/ext/libressl/crypto/aes/aes_misc.c b/ext/libressl/crypto/aes/aes_misc.c
new file mode 100644
index 0000000..6c1506d
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes_misc.c
@@ -0,0 +1,65 @@
+/* $OpenBSD: aes_misc.c,v 1.10 2014/07/09 11:10:50 bcook Exp $ */
+/* ====================================================================
+ * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+#include <openssl/opensslv.h>
+#include <openssl/crypto.h>
+#include <openssl/aes.h>
+#include "aes_locl.h"
+
+const char *
+AES_options(void)
+{
+#ifdef FULL_UNROLL
+ return "aes(full)";
+#else
+ return "aes(partial)";
+#endif
+}
diff --git a/ext/libressl/crypto/aes/aes_ofb.c b/ext/libressl/crypto/aes/aes_ofb.c
new file mode 100644
index 0000000..f8dc03a
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes_ofb.c
@@ -0,0 +1,61 @@
+/* $OpenBSD: aes_ofb.c,v 1.6 2014/06/12 15:49:27 deraadt Exp $ */
+/* ====================================================================
+ * Copyright (c) 2002-2006 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+#include <openssl/aes.h>
+#include <openssl/modes.h>
+
+void
+AES_ofb128_encrypt(const unsigned char *in, unsigned char *out, size_t length,
+ const AES_KEY *key, unsigned char *ivec, int *num)
+{
+ CRYPTO_ofb128_encrypt(in, out, length, key, ivec, num,
+ (block128_f)AES_encrypt);
+}
diff --git a/ext/libressl/crypto/aes/aes_wrap.c b/ext/libressl/crypto/aes/aes_wrap.c
new file mode 100644
index 0000000..b30630f
--- /dev/null
+++ b/ext/libressl/crypto/aes/aes_wrap.c
@@ -0,0 +1,133 @@
+/* $OpenBSD: aes_wrap.c,v 1.12 2018/11/07 18:31:16 tb Exp $ */
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
+ * project.
+ */
+/* ====================================================================
+ * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * licensing@OpenSSL.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ */
+
+#include <string.h>
+
+#include <openssl/aes.h>
+#include <openssl/bio.h>
+
+static const unsigned char default_iv[] = {
+ 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6,
+};
+
+int
+AES_wrap_key(AES_KEY *key, const unsigned char *iv, unsigned char *out,
+ const unsigned char *in, unsigned int inlen)
+{
+ unsigned char *A, B[16], *R;
+ unsigned int i, j, t;
+
+ if ((inlen & 0x7) || (inlen < 16))
+ return -1;
+ A = B;
+ t = 1;
+ memmove(out + 8, in, inlen);
+ if (!iv)
+ iv = default_iv;
+
+ memcpy(A, iv, 8);
+
+ for (j = 0; j < 6; j++) {
+ R = out + 8;
+ for (i = 0; i < inlen; i += 8, t++, R += 8) {
+ memcpy(B + 8, R, 8);
+ AES_encrypt(B, B, key);
+ A[7] ^= (unsigned char)(t & 0xff);
+ if (t > 0xff) {
+ A[6] ^= (unsigned char)((t >> 8) & 0xff);
+ A[5] ^= (unsigned char)((t >> 16) & 0xff);
+ A[4] ^= (unsigned char)((t >> 24) & 0xff);
+ }
+ memcpy(R, B + 8, 8);
+ }
+ }
+ memcpy(out, A, 8);
+ return inlen + 8;
+}
+
+int
+AES_unwrap_key(AES_KEY *key, const unsigned char *iv, unsigned char *out,
+ const unsigned char *in, unsigned int inlen)
+{
+ unsigned char *A, B[16], *R;
+ unsigned int i, j, t;
+
+ if ((inlen & 0x7) || (inlen < 24))
+ return -1;
+ inlen -= 8;
+ A = B;
+ t = 6 * (inlen >> 3);
+ memcpy(A, in, 8);
+ memmove(out, in + 8, inlen);
+ for (j = 0; j < 6; j++) {
+ R = out + inlen - 8;
+ for (i = 0; i < inlen; i += 8, t--, R -= 8) {
+ A[7] ^= (unsigned char)(t & 0xff);
+ if (t > 0xff) {
+ A[6] ^= (unsigned char)((t >> 8) & 0xff);
+ A[5] ^= (unsigned char)((t >> 16) & 0xff);
+ A[4] ^= (unsigned char)((t >> 24) & 0xff);
+ }
+ memcpy(B + 8, R, 8);
+ AES_decrypt(B, B, key);
+ memcpy(R, B + 8, 8);
+ }
+ }
+ if (!iv)
+ iv = default_iv;
+ if (memcmp(A, iv, 8)) {
+ explicit_bzero(out, inlen);
+ return 0;
+ }
+ return inlen;
+}
diff --git a/ext/libressl/crypto/aes/aesni-elf-x86_64.S b/ext/libressl/crypto/aes/aesni-elf-x86_64.S
new file mode 100644
index 0000000..3b3dabf
--- /dev/null
+++ b/ext/libressl/crypto/aes/aesni-elf-x86_64.S
@@ -0,0 +1,2539 @@
+#include "x86_arch.h"
+.text
+.globl aesni_encrypt
+.type aesni_encrypt,@function
+.align 16
+aesni_encrypt:
+ movups (%rdi),%xmm2
+ movl 240(%rdx),%eax
+ movups (%rdx),%xmm0
+ movups 16(%rdx),%xmm1
+ leaq 32(%rdx),%rdx
+ xorps %xmm0,%xmm2
+.Loop_enc1_1:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rdx),%xmm1
+ leaq 16(%rdx),%rdx
+ jnz .Loop_enc1_1
+ aesenclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ retq
+.size aesni_encrypt,.-aesni_encrypt
+
+.globl aesni_decrypt
+.type aesni_decrypt,@function
+.align 16
+aesni_decrypt:
+ movups (%rdi),%xmm2
+ movl 240(%rdx),%eax
+ movups (%rdx),%xmm0
+ movups 16(%rdx),%xmm1
+ leaq 32(%rdx),%rdx
+ xorps %xmm0,%xmm2
+.Loop_dec1_2:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rdx),%xmm1
+ leaq 16(%rdx),%rdx
+ jnz .Loop_dec1_2
+ aesdeclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ retq
+.size aesni_decrypt, .-aesni_decrypt
+.type _aesni_encrypt3,@function
+.align 16
+_aesni_encrypt3:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ movups (%rcx),%xmm0
+
+.Lenc_loop3:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop3
+
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ retq
+.size _aesni_encrypt3,.-_aesni_encrypt3
+.type _aesni_decrypt3,@function
+.align 16
+_aesni_decrypt3:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ movups (%rcx),%xmm0
+
+.Ldec_loop3:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop3
+
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ retq
+.size _aesni_decrypt3,.-_aesni_decrypt3
+.type _aesni_encrypt4,@function
+.align 16
+_aesni_encrypt4:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ xorps %xmm0,%xmm5
+ movups (%rcx),%xmm0
+
+.Lenc_loop4:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop4
+
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ aesenclast %xmm0,%xmm5
+ retq
+.size _aesni_encrypt4,.-_aesni_encrypt4
+.type _aesni_decrypt4,@function
+.align 16
+_aesni_decrypt4:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ xorps %xmm0,%xmm5
+ movups (%rcx),%xmm0
+
+.Ldec_loop4:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop4
+
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ aesdeclast %xmm0,%xmm5
+ retq
+.size _aesni_decrypt4,.-_aesni_decrypt4
+.type _aesni_encrypt6,@function
+.align 16
+_aesni_encrypt6:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ aesenc %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesenc %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesenc %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesenc %xmm1,%xmm6
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm7
+ jmp .Lenc_loop6_enter
+.align 16
+.Lenc_loop6:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+.Lenc_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop6
+
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ aesenclast %xmm0,%xmm5
+ aesenclast %xmm0,%xmm6
+ aesenclast %xmm0,%xmm7
+ retq
+.size _aesni_encrypt6,.-_aesni_encrypt6
+.type _aesni_decrypt6,@function
+.align 16
+_aesni_decrypt6:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ aesdec %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesdec %xmm1,%xmm6
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm7
+ jmp .Ldec_loop6_enter
+.align 16
+.Ldec_loop6:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+.Ldec_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop6
+
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ aesdeclast %xmm0,%xmm5
+ aesdeclast %xmm0,%xmm6
+ aesdeclast %xmm0,%xmm7
+ retq
+.size _aesni_decrypt6,.-_aesni_decrypt6
+.type _aesni_encrypt8,@function
+.align 16
+_aesni_encrypt8:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ aesenc %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesenc %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesenc %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesenc %xmm1,%xmm6
+ pxor %xmm0,%xmm8
+ aesenc %xmm1,%xmm7
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm8
+ aesenc %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+ jmp .Lenc_loop8_enter
+.align 16
+.Lenc_loop8:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ aesenc %xmm1,%xmm8
+ aesenc %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+.Lenc_loop8_enter:
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ aesenc %xmm0,%xmm8
+ aesenc %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop8
+
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ aesenc %xmm1,%xmm8
+ aesenc %xmm1,%xmm9
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ aesenclast %xmm0,%xmm5
+ aesenclast %xmm0,%xmm6
+ aesenclast %xmm0,%xmm7
+ aesenclast %xmm0,%xmm8
+ aesenclast %xmm0,%xmm9
+ retq
+.size _aesni_encrypt8,.-_aesni_encrypt8
+.type _aesni_decrypt8,@function
+.align 16
+_aesni_decrypt8:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ aesdec %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesdec %xmm1,%xmm6
+ pxor %xmm0,%xmm8
+ aesdec %xmm1,%xmm7
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+ jmp .Ldec_loop8_enter
+.align 16
+.Ldec_loop8:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+.Ldec_loop8_enter:
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ aesdec %xmm0,%xmm8
+ aesdec %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop8
+
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ aesdeclast %xmm0,%xmm5
+ aesdeclast %xmm0,%xmm6
+ aesdeclast %xmm0,%xmm7
+ aesdeclast %xmm0,%xmm8
+ aesdeclast %xmm0,%xmm9
+ retq
+.size _aesni_decrypt8,.-_aesni_decrypt8
+.globl aesni_ecb_encrypt
+.type aesni_ecb_encrypt,@function
+.align 16
+aesni_ecb_encrypt:
+ andq $-16,%rdx
+ jz .Lecb_ret
+
+ movl 240(%rcx),%eax
+ movups (%rcx),%xmm0
+ movq %rcx,%r11
+ movl %eax,%r10d
+ testl %r8d,%r8d
+ jz .Lecb_decrypt
+
+ cmpq $128,%rdx
+ jb .Lecb_enc_tail
+
+ movdqu (%rdi),%xmm2
+ movdqu 16(%rdi),%xmm3
+ movdqu 32(%rdi),%xmm4
+ movdqu 48(%rdi),%xmm5
+ movdqu 64(%rdi),%xmm6
+ movdqu 80(%rdi),%xmm7
+ movdqu 96(%rdi),%xmm8
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ subq $128,%rdx
+ jmp .Lecb_enc_loop8_enter
+.align 16
+.Lecb_enc_loop8:
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movdqu (%rdi),%xmm2
+ movl %r10d,%eax
+ movups %xmm3,16(%rsi)
+ movdqu 16(%rdi),%xmm3
+ movups %xmm4,32(%rsi)
+ movdqu 32(%rdi),%xmm4
+ movups %xmm5,48(%rsi)
+ movdqu 48(%rdi),%xmm5
+ movups %xmm6,64(%rsi)
+ movdqu 64(%rdi),%xmm6
+ movups %xmm7,80(%rsi)
+ movdqu 80(%rdi),%xmm7
+ movups %xmm8,96(%rsi)
+ movdqu 96(%rdi),%xmm8
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+.Lecb_enc_loop8_enter:
+
+ call _aesni_encrypt8
+
+ subq $128,%rdx
+ jnc .Lecb_enc_loop8
+
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movups %xmm3,16(%rsi)
+ movl %r10d,%eax
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ addq $128,%rdx
+ jz .Lecb_ret
+
+.Lecb_enc_tail:
+ movups (%rdi),%xmm2
+ cmpq $32,%rdx
+ jb .Lecb_enc_one
+ movups 16(%rdi),%xmm3
+ je .Lecb_enc_two
+ movups 32(%rdi),%xmm4
+ cmpq $64,%rdx
+ jb .Lecb_enc_three
+ movups 48(%rdi),%xmm5
+ je .Lecb_enc_four
+ movups 64(%rdi),%xmm6
+ cmpq $96,%rdx
+ jb .Lecb_enc_five
+ movups 80(%rdi),%xmm7
+ je .Lecb_enc_six
+ movdqu 96(%rdi),%xmm8
+ call _aesni_encrypt8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_enc_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_3:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_3
+ aesenclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_enc_two:
+ xorps %xmm4,%xmm4
+ call _aesni_encrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_enc_three:
+ call _aesni_encrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_enc_four:
+ call _aesni_encrypt4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_enc_five:
+ xorps %xmm7,%xmm7
+ call _aesni_encrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_enc_six:
+ call _aesni_encrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ jmp .Lecb_ret
+
+.align 16
+.Lecb_decrypt:
+ cmpq $128,%rdx
+ jb .Lecb_dec_tail
+
+ movdqu (%rdi),%xmm2
+ movdqu 16(%rdi),%xmm3
+ movdqu 32(%rdi),%xmm4
+ movdqu 48(%rdi),%xmm5
+ movdqu 64(%rdi),%xmm6
+ movdqu 80(%rdi),%xmm7
+ movdqu 96(%rdi),%xmm8
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ subq $128,%rdx
+ jmp .Lecb_dec_loop8_enter
+.align 16
+.Lecb_dec_loop8:
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movdqu (%rdi),%xmm2
+ movl %r10d,%eax
+ movups %xmm3,16(%rsi)
+ movdqu 16(%rdi),%xmm3
+ movups %xmm4,32(%rsi)
+ movdqu 32(%rdi),%xmm4
+ movups %xmm5,48(%rsi)
+ movdqu 48(%rdi),%xmm5
+ movups %xmm6,64(%rsi)
+ movdqu 64(%rdi),%xmm6
+ movups %xmm7,80(%rsi)
+ movdqu 80(%rdi),%xmm7
+ movups %xmm8,96(%rsi)
+ movdqu 96(%rdi),%xmm8
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+.Lecb_dec_loop8_enter:
+
+ call _aesni_decrypt8
+
+ movups (%r11),%xmm0
+ subq $128,%rdx
+ jnc .Lecb_dec_loop8
+
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movups %xmm3,16(%rsi)
+ movl %r10d,%eax
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ addq $128,%rdx
+ jz .Lecb_ret
+
+.Lecb_dec_tail:
+ movups (%rdi),%xmm2
+ cmpq $32,%rdx
+ jb .Lecb_dec_one
+ movups 16(%rdi),%xmm3
+ je .Lecb_dec_two
+ movups 32(%rdi),%xmm4
+ cmpq $64,%rdx
+ jb .Lecb_dec_three
+ movups 48(%rdi),%xmm5
+ je .Lecb_dec_four
+ movups 64(%rdi),%xmm6
+ cmpq $96,%rdx
+ jb .Lecb_dec_five
+ movups 80(%rdi),%xmm7
+ je .Lecb_dec_six
+ movups 96(%rdi),%xmm8
+ movups (%rcx),%xmm0
+ call _aesni_decrypt8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_dec_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_4:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_4
+ aesdeclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_dec_two:
+ xorps %xmm4,%xmm4
+ call _aesni_decrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_dec_three:
+ call _aesni_decrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_dec_four:
+ call _aesni_decrypt4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_dec_five:
+ xorps %xmm7,%xmm7
+ call _aesni_decrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_dec_six:
+ call _aesni_decrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+
+.Lecb_ret:
+ retq
+.size aesni_ecb_encrypt,.-aesni_ecb_encrypt
+.globl aesni_ccm64_encrypt_blocks
+.type aesni_ccm64_encrypt_blocks,@function
+.align 16
+aesni_ccm64_encrypt_blocks:
+ movl 240(%rcx),%eax
+ movdqu (%r8),%xmm9
+ movdqa .Lincrement64(%rip),%xmm6
+ movdqa .Lbswap_mask(%rip),%xmm7
+
+ shrl $1,%eax
+ leaq 0(%rcx),%r11
+ movdqu (%r9),%xmm3
+ movdqa %xmm9,%xmm2
+ movl %eax,%r10d
+.byte 102,68,15,56,0,207
+ jmp .Lccm64_enc_outer
+.align 16
+.Lccm64_enc_outer:
+ movups (%r11),%xmm0
+ movl %r10d,%eax
+ movups (%rdi),%xmm8
+
+ xorps %xmm0,%xmm2
+ movups 16(%r11),%xmm1
+ xorps %xmm8,%xmm0
+ leaq 32(%r11),%rcx
+ xorps %xmm0,%xmm3
+ movups (%rcx),%xmm0
+
+.Lccm64_enc2_loop:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ aesenc %xmm1,%xmm3
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm3
+ movups 0(%rcx),%xmm0
+ jnz .Lccm64_enc2_loop
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ paddq %xmm6,%xmm9
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+
+ decq %rdx
+ leaq 16(%rdi),%rdi
+ xorps %xmm2,%xmm8
+ movdqa %xmm9,%xmm2
+ movups %xmm8,(%rsi)
+ leaq 16(%rsi),%rsi
+.byte 102,15,56,0,215
+ jnz .Lccm64_enc_outer
+
+ movups %xmm3,(%r9)
+ retq
+.size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
+.globl aesni_ccm64_decrypt_blocks
+.type aesni_ccm64_decrypt_blocks,@function
+.align 16
+aesni_ccm64_decrypt_blocks:
+ movl 240(%rcx),%eax
+ movups (%r8),%xmm9
+ movdqu (%r9),%xmm3
+ movdqa .Lincrement64(%rip),%xmm6
+ movdqa .Lbswap_mask(%rip),%xmm7
+
+ movaps %xmm9,%xmm2
+ movl %eax,%r10d
+ movq %rcx,%r11
+.byte 102,68,15,56,0,207
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_5:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_5
+ aesenclast %xmm1,%xmm2
+ movups (%rdi),%xmm8
+ paddq %xmm6,%xmm9
+ leaq 16(%rdi),%rdi
+ jmp .Lccm64_dec_outer
+.align 16
+.Lccm64_dec_outer:
+ xorps %xmm2,%xmm8
+ movdqa %xmm9,%xmm2
+ movl %r10d,%eax
+ movups %xmm8,(%rsi)
+ leaq 16(%rsi),%rsi
+.byte 102,15,56,0,215
+
+ subq $1,%rdx
+ jz .Lccm64_dec_break
+
+ movups (%r11),%xmm0
+ shrl $1,%eax
+ movups 16(%r11),%xmm1
+ xorps %xmm0,%xmm8
+ leaq 32(%r11),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm8,%xmm3
+ movups (%rcx),%xmm0
+
+.Lccm64_dec2_loop:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ aesenc %xmm1,%xmm3
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm3
+ movups 0(%rcx),%xmm0
+ jnz .Lccm64_dec2_loop
+ movups (%rdi),%xmm8
+ paddq %xmm6,%xmm9
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ leaq 16(%rdi),%rdi
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ jmp .Lccm64_dec_outer
+
+.align 16
+.Lccm64_dec_break:
+
+ movups (%r11),%xmm0
+ movups 16(%r11),%xmm1
+ xorps %xmm0,%xmm8
+ leaq 32(%r11),%r11
+ xorps %xmm8,%xmm3
+.Loop_enc1_6:
+ aesenc %xmm1,%xmm3
+ decl %eax
+ movups (%r11),%xmm1
+ leaq 16(%r11),%r11
+ jnz .Loop_enc1_6
+ aesenclast %xmm1,%xmm3
+ movups %xmm3,(%r9)
+ retq
+.size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
+.globl aesni_ctr32_encrypt_blocks
+.type aesni_ctr32_encrypt_blocks,@function
+.align 16
+aesni_ctr32_encrypt_blocks:
+ cmpq $1,%rdx
+ je .Lctr32_one_shortcut
+
+ movdqu (%r8),%xmm14
+ movdqa .Lbswap_mask(%rip),%xmm15
+ xorl %eax,%eax
+.byte 102,69,15,58,22,242,3
+.byte 102,68,15,58,34,240,3
+
+ movl 240(%rcx),%eax
+ bswapl %r10d
+ pxor %xmm12,%xmm12
+ pxor %xmm13,%xmm13
+.byte 102,69,15,58,34,226,0
+ leaq 3(%r10),%r11
+.byte 102,69,15,58,34,235,0
+ incl %r10d
+.byte 102,69,15,58,34,226,1
+ incq %r11
+.byte 102,69,15,58,34,235,1
+ incl %r10d
+.byte 102,69,15,58,34,226,2
+ incq %r11
+.byte 102,69,15,58,34,235,2
+ movdqa %xmm12,-40(%rsp)
+.byte 102,69,15,56,0,231
+ movdqa %xmm13,-24(%rsp)
+.byte 102,69,15,56,0,239
+
+ pshufd $192,%xmm12,%xmm2
+ pshufd $128,%xmm12,%xmm3
+ pshufd $64,%xmm12,%xmm4
+ cmpq $6,%rdx
+ jb .Lctr32_tail
+ shrl $1,%eax
+ movq %rcx,%r11
+ movl %eax,%r10d
+ subq $6,%rdx
+ jmp .Lctr32_loop6
+
+.align 16
+.Lctr32_loop6:
+ pshufd $192,%xmm13,%xmm5
+ por %xmm14,%xmm2
+ movups (%r11),%xmm0
+ pshufd $128,%xmm13,%xmm6
+ por %xmm14,%xmm3
+ movups 16(%r11),%xmm1
+ pshufd $64,%xmm13,%xmm7
+ por %xmm14,%xmm4
+ por %xmm14,%xmm5
+ xorps %xmm0,%xmm2
+ por %xmm14,%xmm6
+ por %xmm14,%xmm7
+
+
+
+
+ pxor %xmm0,%xmm3
+ aesenc %xmm1,%xmm2
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ aesenc %xmm1,%xmm3
+ movdqa .Lincrement32(%rip),%xmm13
+ pxor %xmm0,%xmm5
+ aesenc %xmm1,%xmm4
+ movdqa -40(%rsp),%xmm12
+ pxor %xmm0,%xmm6
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ jmp .Lctr32_enc_loop6_enter
+.align 16
+.Lctr32_enc_loop6:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+.Lctr32_enc_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Lctr32_enc_loop6
+
+ aesenc %xmm1,%xmm2
+ paddd %xmm13,%xmm12
+ aesenc %xmm1,%xmm3
+ paddd -24(%rsp),%xmm13
+ aesenc %xmm1,%xmm4
+ movdqa %xmm12,-40(%rsp)
+ aesenc %xmm1,%xmm5
+ movdqa %xmm13,-24(%rsp)
+ aesenc %xmm1,%xmm6
+.byte 102,69,15,56,0,231
+ aesenc %xmm1,%xmm7
+.byte 102,69,15,56,0,239
+
+ aesenclast %xmm0,%xmm2
+ movups (%rdi),%xmm8
+ aesenclast %xmm0,%xmm3
+ movups 16(%rdi),%xmm9
+ aesenclast %xmm0,%xmm4
+ movups 32(%rdi),%xmm10
+ aesenclast %xmm0,%xmm5
+ movups 48(%rdi),%xmm11
+ aesenclast %xmm0,%xmm6
+ movups 64(%rdi),%xmm1
+ aesenclast %xmm0,%xmm7
+ movups 80(%rdi),%xmm0
+ leaq 96(%rdi),%rdi
+
+ xorps %xmm2,%xmm8
+ pshufd $192,%xmm12,%xmm2
+ xorps %xmm3,%xmm9
+ pshufd $128,%xmm12,%xmm3
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ pshufd $64,%xmm12,%xmm4
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ xorps %xmm6,%xmm1
+ movups %xmm11,48(%rsi)
+ xorps %xmm7,%xmm0
+ movups %xmm1,64(%rsi)
+ movups %xmm0,80(%rsi)
+ leaq 96(%rsi),%rsi
+ movl %r10d,%eax
+ subq $6,%rdx
+ jnc .Lctr32_loop6
+
+ addq $6,%rdx
+ jz .Lctr32_done
+ movq %r11,%rcx
+ leal 1(%rax,%rax,1),%eax
+
+.Lctr32_tail:
+ por %xmm14,%xmm2
+ movups (%rdi),%xmm8
+ cmpq $2,%rdx
+ jb .Lctr32_one
+
+ por %xmm14,%xmm3
+ movups 16(%rdi),%xmm9
+ je .Lctr32_two
+
+ pshufd $192,%xmm13,%xmm5
+ por %xmm14,%xmm4
+ movups 32(%rdi),%xmm10
+ cmpq $4,%rdx
+ jb .Lctr32_three
+
+ pshufd $128,%xmm13,%xmm6
+ por %xmm14,%xmm5
+ movups 48(%rdi),%xmm11
+ je .Lctr32_four
+
+ por %xmm14,%xmm6
+ xorps %xmm7,%xmm7
+
+ call _aesni_encrypt6
+
+ movups 64(%rdi),%xmm1
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ xorps %xmm6,%xmm1
+ movups %xmm11,48(%rsi)
+ movups %xmm1,64(%rsi)
+ jmp .Lctr32_done
+
+.align 16
+.Lctr32_one_shortcut:
+ movups (%r8),%xmm2
+ movups (%rdi),%xmm8
+ movl 240(%rcx),%eax
+.Lctr32_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_7:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_7
+ aesenclast %xmm1,%xmm2
+ xorps %xmm2,%xmm8
+ movups %xmm8,(%rsi)
+ jmp .Lctr32_done
+
+.align 16
+.Lctr32_two:
+ xorps %xmm4,%xmm4
+ call _aesni_encrypt3
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ movups %xmm9,16(%rsi)
+ jmp .Lctr32_done
+
+.align 16
+.Lctr32_three:
+ call _aesni_encrypt3
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ movups %xmm10,32(%rsi)
+ jmp .Lctr32_done
+
+.align 16
+.Lctr32_four:
+ call _aesni_encrypt4
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ movups %xmm11,48(%rsi)
+
+.Lctr32_done:
+ retq
+.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
+.globl aesni_xts_encrypt
+.type aesni_xts_encrypt,@function
+.align 16
+aesni_xts_encrypt:
+ leaq -104(%rsp),%rsp
+ movups (%r9),%xmm15
+ movl 240(%r8),%eax
+ movl 240(%rcx),%r10d
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm15
+.Loop_enc1_8:
+ aesenc %xmm1,%xmm15
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz .Loop_enc1_8
+ aesenclast %xmm1,%xmm15
+ movq %rcx,%r11
+ movl %r10d,%eax
+ movq %rdx,%r9
+ andq $-16,%rdx
+
+ movdqa .Lxts_magic(%rip),%xmm8
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ subq $96,%rdx
+ jc .Lxts_enc_short
+
+ shrl $1,%eax
+ subl $1,%eax
+ movl %eax,%r10d
+ jmp .Lxts_enc_grandloop
+
+.align 16
+.Lxts_enc_grandloop:
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu 0(%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ pxor %xmm12,%xmm4
+ movdqu 80(%rdi),%xmm7
+ leaq 96(%rdi),%rdi
+ pxor %xmm13,%xmm5
+ movups (%r11),%xmm0
+ pxor %xmm14,%xmm6
+ pxor %xmm15,%xmm7
+
+
+
+ movups 16(%r11),%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movdqa %xmm10,0(%rsp)
+ aesenc %xmm1,%xmm2
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ movdqa %xmm11,16(%rsp)
+ aesenc %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ movdqa %xmm12,32(%rsp)
+ aesenc %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ movdqa %xmm13,48(%rsp)
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ movdqa %xmm14,64(%rsp)
+ aesenc %xmm1,%xmm6
+ movdqa %xmm15,80(%rsp)
+ aesenc %xmm1,%xmm7
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ jmp .Lxts_enc_loop6_enter
+
+.align 16
+.Lxts_enc_loop6:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+.Lxts_enc_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Lxts_enc_loop6
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ paddq %xmm15,%xmm15
+ aesenc %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesenc %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenc %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ aesenc %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesenc %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenc %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups 32(%rcx),%xmm0
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ aesenc %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesenc %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenc %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ aesenclast %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesenclast %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenclast %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesenclast %xmm0,%xmm5
+ aesenclast %xmm0,%xmm6
+ aesenclast %xmm0,%xmm7
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ xorps 0(%rsp),%xmm2
+ pand %xmm8,%xmm9
+ xorps 16(%rsp),%xmm3
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+
+ xorps 32(%rsp),%xmm4
+ movups %xmm2,0(%rsi)
+ xorps 48(%rsp),%xmm5
+ movups %xmm3,16(%rsi)
+ xorps 64(%rsp),%xmm6
+ movups %xmm4,32(%rsi)
+ xorps 80(%rsp),%xmm7
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ subq $96,%rdx
+ jnc .Lxts_enc_grandloop
+
+ leal 3(%rax,%rax,1),%eax
+ movq %r11,%rcx
+ movl %eax,%r10d
+
+.Lxts_enc_short:
+ addq $96,%rdx
+ jz .Lxts_enc_done
+
+ cmpq $32,%rdx
+ jb .Lxts_enc_one
+ je .Lxts_enc_two
+
+ cmpq $64,%rdx
+ jb .Lxts_enc_three
+ je .Lxts_enc_four
+
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ leaq 80(%rdi),%rdi
+ pxor %xmm12,%xmm4
+ pxor %xmm13,%xmm5
+ pxor %xmm14,%xmm6
+
+ call _aesni_encrypt6
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm15,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movdqu %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movdqu %xmm3,16(%rsi)
+ xorps %xmm14,%xmm6
+ movdqu %xmm4,32(%rsi)
+ movdqu %xmm5,48(%rsi)
+ movdqu %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.align 16
+.Lxts_enc_one:
+ movups (%rdi),%xmm2
+ leaq 16(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_9:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_9
+ aesenclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movdqa %xmm11,%xmm10
+ movups %xmm2,(%rsi)
+ leaq 16(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.align 16
+.Lxts_enc_two:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ leaq 32(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+
+ call _aesni_encrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm12,%xmm10
+ xorps %xmm11,%xmm3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ leaq 32(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.align 16
+.Lxts_enc_three:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ leaq 48(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+
+ call _aesni_encrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm13,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ leaq 48(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.align 16
+.Lxts_enc_four:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ xorps %xmm10,%xmm2
+ movups 48(%rdi),%xmm5
+ leaq 64(%rdi),%rdi
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ xorps %xmm13,%xmm5
+
+ call _aesni_encrypt4
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm15,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.align 16
+.Lxts_enc_done:
+ andq $15,%r9
+ jz .Lxts_enc_ret
+ movq %r9,%rdx
+
+.Lxts_enc_steal:
+ movzbl (%rdi),%eax
+ movzbl -16(%rsi),%ecx
+ leaq 1(%rdi),%rdi
+ movb %al,-16(%rsi)
+ movb %cl,0(%rsi)
+ leaq 1(%rsi),%rsi
+ subq $1,%rdx
+ jnz .Lxts_enc_steal
+
+ subq %r9,%rsi
+ movq %r11,%rcx
+ movl %r10d,%eax
+
+ movups -16(%rsi),%xmm2
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_10:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_10
+ aesenclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movups %xmm2,-16(%rsi)
+
+.Lxts_enc_ret:
+ leaq 104(%rsp),%rsp
+.Lxts_enc_epilogue:
+ retq
+.size aesni_xts_encrypt,.-aesni_xts_encrypt
+.globl aesni_xts_decrypt
+.type aesni_xts_decrypt,@function
+.align 16
+aesni_xts_decrypt:
+ leaq -104(%rsp),%rsp
+ movups (%r9),%xmm15
+ movl 240(%r8),%eax
+ movl 240(%rcx),%r10d
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm15
+.Loop_enc1_11:
+ aesenc %xmm1,%xmm15
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz .Loop_enc1_11
+ aesenclast %xmm1,%xmm15
+ xorl %eax,%eax
+ testq $15,%rdx
+ setnz %al
+ shlq $4,%rax
+ subq %rax,%rdx
+
+ movq %rcx,%r11
+ movl %r10d,%eax
+ movq %rdx,%r9
+ andq $-16,%rdx
+
+ movdqa .Lxts_magic(%rip),%xmm8
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ subq $96,%rdx
+ jc .Lxts_dec_short
+
+ shrl $1,%eax
+ subl $1,%eax
+ movl %eax,%r10d
+ jmp .Lxts_dec_grandloop
+
+.align 16
+.Lxts_dec_grandloop:
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu 0(%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ pxor %xmm12,%xmm4
+ movdqu 80(%rdi),%xmm7
+ leaq 96(%rdi),%rdi
+ pxor %xmm13,%xmm5
+ movups (%r11),%xmm0
+ pxor %xmm14,%xmm6
+ pxor %xmm15,%xmm7
+
+
+
+ movups 16(%r11),%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movdqa %xmm10,0(%rsp)
+ aesdec %xmm1,%xmm2
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ movdqa %xmm11,16(%rsp)
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ movdqa %xmm12,32(%rsp)
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ movdqa %xmm13,48(%rsp)
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ movdqa %xmm14,64(%rsp)
+ aesdec %xmm1,%xmm6
+ movdqa %xmm15,80(%rsp)
+ aesdec %xmm1,%xmm7
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ jmp .Lxts_dec_loop6_enter
+
+.align 16
+.Lxts_dec_loop6:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+.Lxts_dec_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Lxts_dec_loop6
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ paddq %xmm15,%xmm15
+ aesdec %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesdec %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdec %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ aesdec %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesdec %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdec %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ movups 32(%rcx),%xmm0
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ aesdec %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesdec %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdec %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ aesdeclast %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesdeclast %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdeclast %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesdeclast %xmm0,%xmm5
+ aesdeclast %xmm0,%xmm6
+ aesdeclast %xmm0,%xmm7
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ xorps 0(%rsp),%xmm2
+ pand %xmm8,%xmm9
+ xorps 16(%rsp),%xmm3
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+
+ xorps 32(%rsp),%xmm4
+ movups %xmm2,0(%rsi)
+ xorps 48(%rsp),%xmm5
+ movups %xmm3,16(%rsi)
+ xorps 64(%rsp),%xmm6
+ movups %xmm4,32(%rsi)
+ xorps 80(%rsp),%xmm7
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ subq $96,%rdx
+ jnc .Lxts_dec_grandloop
+
+ leal 3(%rax,%rax,1),%eax
+ movq %r11,%rcx
+ movl %eax,%r10d
+
+.Lxts_dec_short:
+ addq $96,%rdx
+ jz .Lxts_dec_done
+
+ cmpq $32,%rdx
+ jb .Lxts_dec_one
+ je .Lxts_dec_two
+
+ cmpq $64,%rdx
+ jb .Lxts_dec_three
+ je .Lxts_dec_four
+
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ leaq 80(%rdi),%rdi
+ pxor %xmm12,%xmm4
+ pxor %xmm13,%xmm5
+ pxor %xmm14,%xmm6
+
+ call _aesni_decrypt6
+
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movdqu %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movdqu %xmm3,16(%rsi)
+ xorps %xmm14,%xmm6
+ movdqu %xmm4,32(%rsi)
+ pxor %xmm14,%xmm14
+ movdqu %xmm5,48(%rsi)
+ pcmpgtd %xmm15,%xmm14
+ movdqu %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ pshufd $19,%xmm14,%xmm11
+ andq $15,%r9
+ jz .Lxts_dec_ret
+
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm11
+ pxor %xmm15,%xmm11
+ jmp .Lxts_dec_done2
+
+.align 16
+.Lxts_dec_one:
+ movups (%rdi),%xmm2
+ leaq 16(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_12:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_12
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movdqa %xmm11,%xmm10
+ movups %xmm2,(%rsi)
+ movdqa %xmm12,%xmm11
+ leaq 16(%rsi),%rsi
+ jmp .Lxts_dec_done
+
+.align 16
+.Lxts_dec_two:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ leaq 32(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+
+ call _aesni_decrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm12,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm13,%xmm11
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ leaq 32(%rsi),%rsi
+ jmp .Lxts_dec_done
+
+.align 16
+.Lxts_dec_three:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ leaq 48(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+
+ call _aesni_decrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm13,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm15,%xmm11
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ leaq 48(%rsi),%rsi
+ jmp .Lxts_dec_done
+
+.align 16
+.Lxts_dec_four:
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movups (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movups 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movups 32(%rdi),%xmm4
+ xorps %xmm10,%xmm2
+ movups 48(%rdi),%xmm5
+ leaq 64(%rdi),%rdi
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ xorps %xmm13,%xmm5
+
+ call _aesni_decrypt4
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm14,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm15,%xmm11
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ jmp .Lxts_dec_done
+
+.align 16
+.Lxts_dec_done:
+ andq $15,%r9
+ jz .Lxts_dec_ret
+.Lxts_dec_done2:
+ movq %r9,%rdx
+ movq %r11,%rcx
+ movl %r10d,%eax
+
+ movups (%rdi),%xmm2
+ xorps %xmm11,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_13:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_13
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm11,%xmm2
+ movups %xmm2,(%rsi)
+
+.Lxts_dec_steal:
+ movzbl 16(%rdi),%eax
+ movzbl (%rsi),%ecx
+ leaq 1(%rdi),%rdi
+ movb %al,(%rsi)
+ movb %cl,16(%rsi)
+ leaq 1(%rsi),%rsi
+ subq $1,%rdx
+ jnz .Lxts_dec_steal
+
+ subq %r9,%rsi
+ movq %r11,%rcx
+ movl %r10d,%eax
+
+ movups (%rsi),%xmm2
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_14:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_14
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movups %xmm2,(%rsi)
+
+.Lxts_dec_ret:
+ leaq 104(%rsp),%rsp
+.Lxts_dec_epilogue:
+ retq
+.size aesni_xts_decrypt,.-aesni_xts_decrypt
+.globl aesni_cbc_encrypt
+.type aesni_cbc_encrypt,@function
+.align 16
+aesni_cbc_encrypt:
+ testq %rdx,%rdx
+ jz .Lcbc_ret
+
+ movl 240(%rcx),%r10d
+ movq %rcx,%r11
+ testl %r9d,%r9d
+ jz .Lcbc_decrypt
+
+ movups (%r8),%xmm2
+ movl %r10d,%eax
+ cmpq $16,%rdx
+ jb .Lcbc_enc_tail
+ subq $16,%rdx
+ jmp .Lcbc_enc_loop
+.align 16
+.Lcbc_enc_loop:
+ movups (%rdi),%xmm3
+ leaq 16(%rdi),%rdi
+
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ xorps %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ xorps %xmm3,%xmm2
+.Loop_enc1_15:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_15
+ aesenclast %xmm1,%xmm2
+ movl %r10d,%eax
+ movq %r11,%rcx
+ movups %xmm2,0(%rsi)
+ leaq 16(%rsi),%rsi
+ subq $16,%rdx
+ jnc .Lcbc_enc_loop
+ addq $16,%rdx
+ jnz .Lcbc_enc_tail
+ movups %xmm2,(%r8)
+ jmp .Lcbc_ret
+
+.Lcbc_enc_tail:
+ movq %rdx,%rcx
+ xchgq %rdi,%rsi
+.long 0x9066A4F3
+ movl $16,%ecx
+ subq %rdx,%rcx
+ xorl %eax,%eax
+.long 0x9066AAF3
+ leaq -16(%rdi),%rdi
+ movl %r10d,%eax
+ movq %rdi,%rsi
+ movq %r11,%rcx
+ xorq %rdx,%rdx
+ jmp .Lcbc_enc_loop
+
+.align 16
+.Lcbc_decrypt:
+ movups (%r8),%xmm9
+ movl %r10d,%eax
+ cmpq $112,%rdx
+ jbe .Lcbc_dec_tail
+ shrl $1,%r10d
+ subq $112,%rdx
+ movl %r10d,%eax
+ movaps %xmm9,-24(%rsp)
+ jmp .Lcbc_dec_loop8_enter
+.align 16
+.Lcbc_dec_loop8:
+ movaps %xmm0,-24(%rsp)
+ movups %xmm9,(%rsi)
+ leaq 16(%rsi),%rsi
+.Lcbc_dec_loop8_enter:
+ movups (%rcx),%xmm0
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 16(%rcx),%xmm1
+
+ leaq 32(%rcx),%rcx
+ movdqu 32(%rdi),%xmm4
+ xorps %xmm0,%xmm2
+ movdqu 48(%rdi),%xmm5
+ xorps %xmm0,%xmm3
+ movdqu 64(%rdi),%xmm6
+ aesdec %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ movdqu 80(%rdi),%xmm7
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ movdqu 96(%rdi),%xmm8
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ movdqu 112(%rdi),%xmm9
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesdec %xmm1,%xmm6
+ pxor %xmm0,%xmm8
+ aesdec %xmm1,%xmm7
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+
+ call .Ldec_loop8_enter
+
+ movups (%rdi),%xmm1
+ movups 16(%rdi),%xmm0
+ xorps -24(%rsp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%rdi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%rdi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%rdi),%xmm1
+ xorps %xmm0,%xmm6
+ movups 80(%rdi),%xmm0
+ xorps %xmm1,%xmm7
+ movups 96(%rdi),%xmm1
+ xorps %xmm0,%xmm8
+ movups 112(%rdi),%xmm0
+ xorps %xmm1,%xmm9
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movq %r11,%rcx
+ movups %xmm7,80(%rsi)
+ leaq 128(%rdi),%rdi
+ movups %xmm8,96(%rsi)
+ leaq 112(%rsi),%rsi
+ subq $128,%rdx
+ ja .Lcbc_dec_loop8
+
+ movaps %xmm9,%xmm2
+ movaps %xmm0,%xmm9
+ addq $112,%rdx
+ jle .Lcbc_dec_tail_collected
+ movups %xmm2,(%rsi)
+ leal 1(%r10,%r10,1),%eax
+ leaq 16(%rsi),%rsi
+.Lcbc_dec_tail:
+ movups (%rdi),%xmm2
+ movaps %xmm2,%xmm8
+ cmpq $16,%rdx
+ jbe .Lcbc_dec_one
+
+ movups 16(%rdi),%xmm3
+ movaps %xmm3,%xmm7
+ cmpq $32,%rdx
+ jbe .Lcbc_dec_two
+
+ movups 32(%rdi),%xmm4
+ movaps %xmm4,%xmm6
+ cmpq $48,%rdx
+ jbe .Lcbc_dec_three
+
+ movups 48(%rdi),%xmm5
+ cmpq $64,%rdx
+ jbe .Lcbc_dec_four
+
+ movups 64(%rdi),%xmm6
+ cmpq $80,%rdx
+ jbe .Lcbc_dec_five
+
+ movups 80(%rdi),%xmm7
+ cmpq $96,%rdx
+ jbe .Lcbc_dec_six
+
+ movups 96(%rdi),%xmm8
+ movaps %xmm9,-24(%rsp)
+ call _aesni_decrypt8
+ movups (%rdi),%xmm1
+ movups 16(%rdi),%xmm0
+ xorps -24(%rsp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%rdi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%rdi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%rdi),%xmm1
+ xorps %xmm0,%xmm6
+ movups 80(%rdi),%xmm0
+ xorps %xmm1,%xmm7
+ movups 96(%rdi),%xmm9
+ xorps %xmm0,%xmm8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ movaps %xmm8,%xmm2
+ subq $112,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_16:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_16
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm9,%xmm2
+ movaps %xmm8,%xmm9
+ subq $16,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_two:
+ xorps %xmm4,%xmm4
+ call _aesni_decrypt3
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ movaps %xmm7,%xmm9
+ movaps %xmm3,%xmm2
+ leaq 16(%rsi),%rsi
+ subq $32,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_three:
+ call _aesni_decrypt3
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ xorps %xmm7,%xmm4
+ movups %xmm3,16(%rsi)
+ movaps %xmm6,%xmm9
+ movaps %xmm4,%xmm2
+ leaq 32(%rsi),%rsi
+ subq $48,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_four:
+ call _aesni_decrypt4
+ xorps %xmm9,%xmm2
+ movups 48(%rdi),%xmm9
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ xorps %xmm7,%xmm4
+ movups %xmm3,16(%rsi)
+ xorps %xmm6,%xmm5
+ movups %xmm4,32(%rsi)
+ movaps %xmm5,%xmm2
+ leaq 48(%rsi),%rsi
+ subq $64,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_five:
+ xorps %xmm7,%xmm7
+ call _aesni_decrypt6
+ movups 16(%rdi),%xmm1
+ movups 32(%rdi),%xmm0
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ xorps %xmm1,%xmm4
+ movups 48(%rdi),%xmm1
+ xorps %xmm0,%xmm5
+ movups 64(%rdi),%xmm9
+ xorps %xmm1,%xmm6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ movaps %xmm6,%xmm2
+ subq $80,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_six:
+ call _aesni_decrypt6
+ movups 16(%rdi),%xmm1
+ movups 32(%rdi),%xmm0
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ xorps %xmm1,%xmm4
+ movups 48(%rdi),%xmm1
+ xorps %xmm0,%xmm5
+ movups 64(%rdi),%xmm0
+ xorps %xmm1,%xmm6
+ movups 80(%rdi),%xmm9
+ xorps %xmm0,%xmm7
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ movaps %xmm7,%xmm2
+ subq $96,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_tail_collected:
+ andq $15,%rdx
+ movups %xmm9,(%r8)
+ jnz .Lcbc_dec_tail_partial
+ movups %xmm2,(%rsi)
+ jmp .Lcbc_dec_ret
+.align 16
+.Lcbc_dec_tail_partial:
+ movaps %xmm2,-24(%rsp)
+ movq $16,%rcx
+ movq %rsi,%rdi
+ subq %rdx,%rcx
+ leaq -24(%rsp),%rsi
+.long 0x9066A4F3
+
+.Lcbc_dec_ret:
+.Lcbc_ret:
+ retq
+.size aesni_cbc_encrypt,.-aesni_cbc_encrypt
+.globl aesni_set_decrypt_key
+.type aesni_set_decrypt_key,@function
+.align 16
+aesni_set_decrypt_key:
+ subq $8,%rsp
+ call __aesni_set_encrypt_key
+ shll $4,%esi
+ testl %eax,%eax
+ jnz .Ldec_key_ret
+ leaq 16(%rdx,%rsi,1),%rdi
+
+ movups (%rdx),%xmm0
+ movups (%rdi),%xmm1
+ movups %xmm0,(%rdi)
+ movups %xmm1,(%rdx)
+ leaq 16(%rdx),%rdx
+ leaq -16(%rdi),%rdi
+
+.Ldec_key_inverse:
+ movups (%rdx),%xmm0
+ movups (%rdi),%xmm1
+ aesimc %xmm0,%xmm0
+ aesimc %xmm1,%xmm1
+ leaq 16(%rdx),%rdx
+ leaq -16(%rdi),%rdi
+ movups %xmm0,16(%rdi)
+ movups %xmm1,-16(%rdx)
+ cmpq %rdx,%rdi
+ ja .Ldec_key_inverse
+
+ movups (%rdx),%xmm0
+ aesimc %xmm0,%xmm0
+ movups %xmm0,(%rdi)
+.Ldec_key_ret:
+ addq $8,%rsp
+ retq
+.LSEH_end_set_decrypt_key:
+.size aesni_set_decrypt_key,.-aesni_set_decrypt_key
+.globl aesni_set_encrypt_key
+.type aesni_set_encrypt_key,@function
+.align 16
+aesni_set_encrypt_key:
+__aesni_set_encrypt_key:
+ subq $8,%rsp
+ movq $-1,%rax
+ testq %rdi,%rdi
+ jz .Lenc_key_ret
+ testq %rdx,%rdx
+ jz .Lenc_key_ret
+
+ movups (%rdi),%xmm0
+ xorps %xmm4,%xmm4
+ leaq 16(%rdx),%rax
+ cmpl $256,%esi
+ je .L14rounds
+ cmpl $192,%esi
+ je .L12rounds
+ cmpl $128,%esi
+ jne .Lbad_keybits
+
+.L10rounds:
+ movl $9,%esi
+ movups %xmm0,(%rdx)
+ aeskeygenassist $1,%xmm0,%xmm1
+ call .Lkey_expansion_128_cold
+ aeskeygenassist $2,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $4,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $8,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $16,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $32,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $64,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $128,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $27,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $54,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ movups %xmm0,(%rax)
+ movl %esi,80(%rax)
+ xorl %eax,%eax
+ jmp .Lenc_key_ret
+
+.align 16
+.L12rounds:
+ movq 16(%rdi),%xmm2
+ movl $11,%esi
+ movups %xmm0,(%rdx)
+ aeskeygenassist $1,%xmm2,%xmm1
+ call .Lkey_expansion_192a_cold
+ aeskeygenassist $2,%xmm2,%xmm1
+ call .Lkey_expansion_192b
+ aeskeygenassist $4,%xmm2,%xmm1
+ call .Lkey_expansion_192a
+ aeskeygenassist $8,%xmm2,%xmm1
+ call .Lkey_expansion_192b
+ aeskeygenassist $16,%xmm2,%xmm1
+ call .Lkey_expansion_192a
+ aeskeygenassist $32,%xmm2,%xmm1
+ call .Lkey_expansion_192b
+ aeskeygenassist $64,%xmm2,%xmm1
+ call .Lkey_expansion_192a
+ aeskeygenassist $128,%xmm2,%xmm1
+ call .Lkey_expansion_192b
+ movups %xmm0,(%rax)
+ movl %esi,48(%rax)
+ xorq %rax,%rax
+ jmp .Lenc_key_ret
+
+.align 16
+.L14rounds:
+ movups 16(%rdi),%xmm2
+ movl $13,%esi
+ leaq 16(%rax),%rax
+ movups %xmm0,(%rdx)
+ movups %xmm2,16(%rdx)
+ aeskeygenassist $1,%xmm2,%xmm1
+ call .Lkey_expansion_256a_cold
+ aeskeygenassist $1,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $2,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $2,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $4,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $4,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $8,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $8,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $16,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $16,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $32,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $32,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $64,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ movups %xmm0,(%rax)
+ movl %esi,16(%rax)
+ xorq %rax,%rax
+ jmp .Lenc_key_ret
+
+.align 16
+.Lbad_keybits:
+ movq $-2,%rax
+.Lenc_key_ret:
+ addq $8,%rsp
+ retq
+.LSEH_end_set_encrypt_key:
+
+.align 16
+.Lkey_expansion_128:
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+.Lkey_expansion_128_cold:
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ retq
+
+.align 16
+.Lkey_expansion_192a:
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+.Lkey_expansion_192a_cold:
+ movaps %xmm2,%xmm5
+.Lkey_expansion_192b_warm:
+ shufps $16,%xmm0,%xmm4
+ movdqa %xmm2,%xmm3
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ pslldq $4,%xmm3
+ xorps %xmm4,%xmm0
+ pshufd $85,%xmm1,%xmm1
+ pxor %xmm3,%xmm2
+ pxor %xmm1,%xmm0
+ pshufd $255,%xmm0,%xmm3
+ pxor %xmm3,%xmm2
+ retq
+
+.align 16
+.Lkey_expansion_192b:
+ movaps %xmm0,%xmm3
+ shufps $68,%xmm0,%xmm5
+ movups %xmm5,(%rax)
+ shufps $78,%xmm2,%xmm3
+ movups %xmm3,16(%rax)
+ leaq 32(%rax),%rax
+ jmp .Lkey_expansion_192b_warm
+
+.align 16
+.Lkey_expansion_256a:
+ movups %xmm2,(%rax)
+ leaq 16(%rax),%rax
+.Lkey_expansion_256a_cold:
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ retq
+
+.align 16
+.Lkey_expansion_256b:
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+
+ shufps $16,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $140,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $170,%xmm1,%xmm1
+ xorps %xmm1,%xmm2
+ retq
+.size aesni_set_encrypt_key,.-aesni_set_encrypt_key
+.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key
+.align 64
+.Lbswap_mask:
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.Lincrement32:
+.long 6,6,6,0
+.Lincrement64:
+.long 1,0,0,0
+.Lxts_magic:
+.long 0x87,0,1,0
+
+.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/ext/libressl/crypto/aes/aesni-macosx-x86_64.S b/ext/libressl/crypto/aes/aesni-macosx-x86_64.S
new file mode 100644
index 0000000..6b3216b
--- /dev/null
+++ b/ext/libressl/crypto/aes/aesni-macosx-x86_64.S
@@ -0,0 +1,2536 @@
+#include "x86_arch.h"
+.text
+.globl _aesni_encrypt
+
+.p2align 4
+_aesni_encrypt:
+ movups (%rdi),%xmm2
+ movl 240(%rdx),%eax
+ movups (%rdx),%xmm0
+ movups 16(%rdx),%xmm1
+ leaq 32(%rdx),%rdx
+ xorps %xmm0,%xmm2
+L$oop_enc1_1:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rdx),%xmm1
+ leaq 16(%rdx),%rdx
+ jnz L$oop_enc1_1
+ aesenclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ retq
+
+
+.globl _aesni_decrypt
+
+.p2align 4
+_aesni_decrypt:
+ movups (%rdi),%xmm2
+ movl 240(%rdx),%eax
+ movups (%rdx),%xmm0
+ movups 16(%rdx),%xmm1
+ leaq 32(%rdx),%rdx
+ xorps %xmm0,%xmm2
+L$oop_dec1_2:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rdx),%xmm1
+ leaq 16(%rdx),%rdx
+ jnz L$oop_dec1_2
+ aesdeclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ retq
+
+
+.p2align 4
+_aesni_encrypt3:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ movups (%rcx),%xmm0
+
+L$enc_loop3:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ movups (%rcx),%xmm0
+ jnz L$enc_loop3
+
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ retq
+
+
+.p2align 4
+_aesni_decrypt3:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ movups (%rcx),%xmm0
+
+L$dec_loop3:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ movups (%rcx),%xmm0
+ jnz L$dec_loop3
+
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ retq
+
+
+.p2align 4
+_aesni_encrypt4:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ xorps %xmm0,%xmm5
+ movups (%rcx),%xmm0
+
+L$enc_loop4:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ movups (%rcx),%xmm0
+ jnz L$enc_loop4
+
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ aesenclast %xmm0,%xmm5
+ retq
+
+
+.p2align 4
+_aesni_decrypt4:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ xorps %xmm0,%xmm5
+ movups (%rcx),%xmm0
+
+L$dec_loop4:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ movups (%rcx),%xmm0
+ jnz L$dec_loop4
+
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ aesdeclast %xmm0,%xmm5
+ retq
+
+
+.p2align 4
+_aesni_encrypt6:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ aesenc %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesenc %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesenc %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesenc %xmm1,%xmm6
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm7
+ jmp L$enc_loop6_enter
+.p2align 4
+L$enc_loop6:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+L$enc_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz L$enc_loop6
+
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ aesenclast %xmm0,%xmm5
+ aesenclast %xmm0,%xmm6
+ aesenclast %xmm0,%xmm7
+ retq
+
+
+.p2align 4
+_aesni_decrypt6:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ aesdec %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesdec %xmm1,%xmm6
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm7
+ jmp L$dec_loop6_enter
+.p2align 4
+L$dec_loop6:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+L$dec_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz L$dec_loop6
+
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ aesdeclast %xmm0,%xmm5
+ aesdeclast %xmm0,%xmm6
+ aesdeclast %xmm0,%xmm7
+ retq
+
+
+.p2align 4
+_aesni_encrypt8:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ aesenc %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesenc %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesenc %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesenc %xmm1,%xmm6
+ pxor %xmm0,%xmm8
+ aesenc %xmm1,%xmm7
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm8
+ aesenc %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+ jmp L$enc_loop8_enter
+.p2align 4
+L$enc_loop8:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ aesenc %xmm1,%xmm8
+ aesenc %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+L$enc_loop8_enter:
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ aesenc %xmm0,%xmm8
+ aesenc %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ jnz L$enc_loop8
+
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ aesenc %xmm1,%xmm8
+ aesenc %xmm1,%xmm9
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ aesenclast %xmm0,%xmm5
+ aesenclast %xmm0,%xmm6
+ aesenclast %xmm0,%xmm7
+ aesenclast %xmm0,%xmm8
+ aesenclast %xmm0,%xmm9
+ retq
+
+
+.p2align 4
+_aesni_decrypt8:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ aesdec %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesdec %xmm1,%xmm6
+ pxor %xmm0,%xmm8
+ aesdec %xmm1,%xmm7
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+ jmp L$dec_loop8_enter
+.p2align 4
+L$dec_loop8:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+L$dec_loop8_enter:
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ aesdec %xmm0,%xmm8
+ aesdec %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ jnz L$dec_loop8
+
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ aesdeclast %xmm0,%xmm5
+ aesdeclast %xmm0,%xmm6
+ aesdeclast %xmm0,%xmm7
+ aesdeclast %xmm0,%xmm8
+ aesdeclast %xmm0,%xmm9
+ retq
+
+.globl _aesni_ecb_encrypt
+
+.p2align 4
+_aesni_ecb_encrypt:
+ andq $-16,%rdx
+ jz L$ecb_ret
+
+ movl 240(%rcx),%eax
+ movups (%rcx),%xmm0
+ movq %rcx,%r11
+ movl %eax,%r10d
+ testl %r8d,%r8d
+ jz L$ecb_decrypt
+
+ cmpq $128,%rdx
+ jb L$ecb_enc_tail
+
+ movdqu (%rdi),%xmm2
+ movdqu 16(%rdi),%xmm3
+ movdqu 32(%rdi),%xmm4
+ movdqu 48(%rdi),%xmm5
+ movdqu 64(%rdi),%xmm6
+ movdqu 80(%rdi),%xmm7
+ movdqu 96(%rdi),%xmm8
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ subq $128,%rdx
+ jmp L$ecb_enc_loop8_enter
+.p2align 4
+L$ecb_enc_loop8:
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movdqu (%rdi),%xmm2
+ movl %r10d,%eax
+ movups %xmm3,16(%rsi)
+ movdqu 16(%rdi),%xmm3
+ movups %xmm4,32(%rsi)
+ movdqu 32(%rdi),%xmm4
+ movups %xmm5,48(%rsi)
+ movdqu 48(%rdi),%xmm5
+ movups %xmm6,64(%rsi)
+ movdqu 64(%rdi),%xmm6
+ movups %xmm7,80(%rsi)
+ movdqu 80(%rdi),%xmm7
+ movups %xmm8,96(%rsi)
+ movdqu 96(%rdi),%xmm8
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+L$ecb_enc_loop8_enter:
+
+ call _aesni_encrypt8
+
+ subq $128,%rdx
+ jnc L$ecb_enc_loop8
+
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movups %xmm3,16(%rsi)
+ movl %r10d,%eax
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ addq $128,%rdx
+ jz L$ecb_ret
+
+L$ecb_enc_tail:
+ movups (%rdi),%xmm2
+ cmpq $32,%rdx
+ jb L$ecb_enc_one
+ movups 16(%rdi),%xmm3
+ je L$ecb_enc_two
+ movups 32(%rdi),%xmm4
+ cmpq $64,%rdx
+ jb L$ecb_enc_three
+ movups 48(%rdi),%xmm5
+ je L$ecb_enc_four
+ movups 64(%rdi),%xmm6
+ cmpq $96,%rdx
+ jb L$ecb_enc_five
+ movups 80(%rdi),%xmm7
+ je L$ecb_enc_six
+ movdqu 96(%rdi),%xmm8
+ call _aesni_encrypt8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+L$ecb_enc_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+L$oop_enc1_3:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_enc1_3
+ aesenclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+L$ecb_enc_two:
+ xorps %xmm4,%xmm4
+ call _aesni_encrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+L$ecb_enc_three:
+ call _aesni_encrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+L$ecb_enc_four:
+ call _aesni_encrypt4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+L$ecb_enc_five:
+ xorps %xmm7,%xmm7
+ call _aesni_encrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+L$ecb_enc_six:
+ call _aesni_encrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ jmp L$ecb_ret
+
+.p2align 4
+L$ecb_decrypt:
+ cmpq $128,%rdx
+ jb L$ecb_dec_tail
+
+ movdqu (%rdi),%xmm2
+ movdqu 16(%rdi),%xmm3
+ movdqu 32(%rdi),%xmm4
+ movdqu 48(%rdi),%xmm5
+ movdqu 64(%rdi),%xmm6
+ movdqu 80(%rdi),%xmm7
+ movdqu 96(%rdi),%xmm8
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ subq $128,%rdx
+ jmp L$ecb_dec_loop8_enter
+.p2align 4
+L$ecb_dec_loop8:
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movdqu (%rdi),%xmm2
+ movl %r10d,%eax
+ movups %xmm3,16(%rsi)
+ movdqu 16(%rdi),%xmm3
+ movups %xmm4,32(%rsi)
+ movdqu 32(%rdi),%xmm4
+ movups %xmm5,48(%rsi)
+ movdqu 48(%rdi),%xmm5
+ movups %xmm6,64(%rsi)
+ movdqu 64(%rdi),%xmm6
+ movups %xmm7,80(%rsi)
+ movdqu 80(%rdi),%xmm7
+ movups %xmm8,96(%rsi)
+ movdqu 96(%rdi),%xmm8
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+L$ecb_dec_loop8_enter:
+
+ call _aesni_decrypt8
+
+ movups (%r11),%xmm0
+ subq $128,%rdx
+ jnc L$ecb_dec_loop8
+
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movups %xmm3,16(%rsi)
+ movl %r10d,%eax
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ addq $128,%rdx
+ jz L$ecb_ret
+
+L$ecb_dec_tail:
+ movups (%rdi),%xmm2
+ cmpq $32,%rdx
+ jb L$ecb_dec_one
+ movups 16(%rdi),%xmm3
+ je L$ecb_dec_two
+ movups 32(%rdi),%xmm4
+ cmpq $64,%rdx
+ jb L$ecb_dec_three
+ movups 48(%rdi),%xmm5
+ je L$ecb_dec_four
+ movups 64(%rdi),%xmm6
+ cmpq $96,%rdx
+ jb L$ecb_dec_five
+ movups 80(%rdi),%xmm7
+ je L$ecb_dec_six
+ movups 96(%rdi),%xmm8
+ movups (%rcx),%xmm0
+ call _aesni_decrypt8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+L$ecb_dec_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+L$oop_dec1_4:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_dec1_4
+ aesdeclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+L$ecb_dec_two:
+ xorps %xmm4,%xmm4
+ call _aesni_decrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+L$ecb_dec_three:
+ call _aesni_decrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+L$ecb_dec_four:
+ call _aesni_decrypt4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+L$ecb_dec_five:
+ xorps %xmm7,%xmm7
+ call _aesni_decrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+L$ecb_dec_six:
+ call _aesni_decrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+
+L$ecb_ret:
+ retq
+
+.globl _aesni_ccm64_encrypt_blocks
+
+.p2align 4
+_aesni_ccm64_encrypt_blocks:
+ movl 240(%rcx),%eax
+ movdqu (%r8),%xmm9
+ movdqa L$increment64(%rip),%xmm6
+ movdqa L$bswap_mask(%rip),%xmm7
+
+ shrl $1,%eax
+ leaq 0(%rcx),%r11
+ movdqu (%r9),%xmm3
+ movdqa %xmm9,%xmm2
+ movl %eax,%r10d
+.byte 102,68,15,56,0,207
+ jmp L$ccm64_enc_outer
+.p2align 4
+L$ccm64_enc_outer:
+ movups (%r11),%xmm0
+ movl %r10d,%eax
+ movups (%rdi),%xmm8
+
+ xorps %xmm0,%xmm2
+ movups 16(%r11),%xmm1
+ xorps %xmm8,%xmm0
+ leaq 32(%r11),%rcx
+ xorps %xmm0,%xmm3
+ movups (%rcx),%xmm0
+
+L$ccm64_enc2_loop:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ aesenc %xmm1,%xmm3
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm3
+ movups 0(%rcx),%xmm0
+ jnz L$ccm64_enc2_loop
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ paddq %xmm6,%xmm9
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+
+ decq %rdx
+ leaq 16(%rdi),%rdi
+ xorps %xmm2,%xmm8
+ movdqa %xmm9,%xmm2
+ movups %xmm8,(%rsi)
+ leaq 16(%rsi),%rsi
+.byte 102,15,56,0,215
+ jnz L$ccm64_enc_outer
+
+ movups %xmm3,(%r9)
+ retq
+
+.globl _aesni_ccm64_decrypt_blocks
+
+.p2align 4
+_aesni_ccm64_decrypt_blocks:
+ movl 240(%rcx),%eax
+ movups (%r8),%xmm9
+ movdqu (%r9),%xmm3
+ movdqa L$increment64(%rip),%xmm6
+ movdqa L$bswap_mask(%rip),%xmm7
+
+ movaps %xmm9,%xmm2
+ movl %eax,%r10d
+ movq %rcx,%r11
+.byte 102,68,15,56,0,207
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+L$oop_enc1_5:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_enc1_5
+ aesenclast %xmm1,%xmm2
+ movups (%rdi),%xmm8
+ paddq %xmm6,%xmm9
+ leaq 16(%rdi),%rdi
+ jmp L$ccm64_dec_outer
+.p2align 4
+L$ccm64_dec_outer:
+ xorps %xmm2,%xmm8
+ movdqa %xmm9,%xmm2
+ movl %r10d,%eax
+ movups %xmm8,(%rsi)
+ leaq 16(%rsi),%rsi
+.byte 102,15,56,0,215
+
+ subq $1,%rdx
+ jz L$ccm64_dec_break
+
+ movups (%r11),%xmm0
+ shrl $1,%eax
+ movups 16(%r11),%xmm1
+ xorps %xmm0,%xmm8
+ leaq 32(%r11),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm8,%xmm3
+ movups (%rcx),%xmm0
+
+L$ccm64_dec2_loop:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ aesenc %xmm1,%xmm3
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm3
+ movups 0(%rcx),%xmm0
+ jnz L$ccm64_dec2_loop
+ movups (%rdi),%xmm8
+ paddq %xmm6,%xmm9
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ leaq 16(%rdi),%rdi
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ jmp L$ccm64_dec_outer
+
+.p2align 4
+L$ccm64_dec_break:
+
+ movups (%r11),%xmm0
+ movups 16(%r11),%xmm1
+ xorps %xmm0,%xmm8
+ leaq 32(%r11),%r11
+ xorps %xmm8,%xmm3
+L$oop_enc1_6:
+ aesenc %xmm1,%xmm3
+ decl %eax
+ movups (%r11),%xmm1
+ leaq 16(%r11),%r11
+ jnz L$oop_enc1_6
+ aesenclast %xmm1,%xmm3
+ movups %xmm3,(%r9)
+ retq
+
+.globl _aesni_ctr32_encrypt_blocks
+
+.p2align 4
+_aesni_ctr32_encrypt_blocks:
+ cmpq $1,%rdx
+ je L$ctr32_one_shortcut
+
+ movdqu (%r8),%xmm14
+ movdqa L$bswap_mask(%rip),%xmm15
+ xorl %eax,%eax
+.byte 102,69,15,58,22,242,3
+.byte 102,68,15,58,34,240,3
+
+ movl 240(%rcx),%eax
+ bswapl %r10d
+ pxor %xmm12,%xmm12
+ pxor %xmm13,%xmm13
+.byte 102,69,15,58,34,226,0
+ leaq 3(%r10),%r11
+.byte 102,69,15,58,34,235,0
+ incl %r10d
+.byte 102,69,15,58,34,226,1
+ incq %r11
+.byte 102,69,15,58,34,235,1
+ incl %r10d
+.byte 102,69,15,58,34,226,2
+ incq %r11
+.byte 102,69,15,58,34,235,2
+ movdqa %xmm12,-40(%rsp)
+.byte 102,69,15,56,0,231
+ movdqa %xmm13,-24(%rsp)
+.byte 102,69,15,56,0,239
+
+ pshufd $192,%xmm12,%xmm2
+ pshufd $128,%xmm12,%xmm3
+ pshufd $64,%xmm12,%xmm4
+ cmpq $6,%rdx
+ jb L$ctr32_tail
+ shrl $1,%eax
+ movq %rcx,%r11
+ movl %eax,%r10d
+ subq $6,%rdx
+ jmp L$ctr32_loop6
+
+.p2align 4
+L$ctr32_loop6:
+ pshufd $192,%xmm13,%xmm5
+ por %xmm14,%xmm2
+ movups (%r11),%xmm0
+ pshufd $128,%xmm13,%xmm6
+ por %xmm14,%xmm3
+ movups 16(%r11),%xmm1
+ pshufd $64,%xmm13,%xmm7
+ por %xmm14,%xmm4
+ por %xmm14,%xmm5
+ xorps %xmm0,%xmm2
+ por %xmm14,%xmm6
+ por %xmm14,%xmm7
+
+
+
+
+ pxor %xmm0,%xmm3
+ aesenc %xmm1,%xmm2
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ aesenc %xmm1,%xmm3
+ movdqa L$increment32(%rip),%xmm13
+ pxor %xmm0,%xmm5
+ aesenc %xmm1,%xmm4
+ movdqa -40(%rsp),%xmm12
+ pxor %xmm0,%xmm6
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ jmp L$ctr32_enc_loop6_enter
+.p2align 4
+L$ctr32_enc_loop6:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+L$ctr32_enc_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz L$ctr32_enc_loop6
+
+ aesenc %xmm1,%xmm2
+ paddd %xmm13,%xmm12
+ aesenc %xmm1,%xmm3
+ paddd -24(%rsp),%xmm13
+ aesenc %xmm1,%xmm4
+ movdqa %xmm12,-40(%rsp)
+ aesenc %xmm1,%xmm5
+ movdqa %xmm13,-24(%rsp)
+ aesenc %xmm1,%xmm6
+.byte 102,69,15,56,0,231
+ aesenc %xmm1,%xmm7
+.byte 102,69,15,56,0,239
+
+ aesenclast %xmm0,%xmm2
+ movups (%rdi),%xmm8
+ aesenclast %xmm0,%xmm3
+ movups 16(%rdi),%xmm9
+ aesenclast %xmm0,%xmm4
+ movups 32(%rdi),%xmm10
+ aesenclast %xmm0,%xmm5
+ movups 48(%rdi),%xmm11
+ aesenclast %xmm0,%xmm6
+ movups 64(%rdi),%xmm1
+ aesenclast %xmm0,%xmm7
+ movups 80(%rdi),%xmm0
+ leaq 96(%rdi),%rdi
+
+ xorps %xmm2,%xmm8
+ pshufd $192,%xmm12,%xmm2
+ xorps %xmm3,%xmm9
+ pshufd $128,%xmm12,%xmm3
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ pshufd $64,%xmm12,%xmm4
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ xorps %xmm6,%xmm1
+ movups %xmm11,48(%rsi)
+ xorps %xmm7,%xmm0
+ movups %xmm1,64(%rsi)
+ movups %xmm0,80(%rsi)
+ leaq 96(%rsi),%rsi
+ movl %r10d,%eax
+ subq $6,%rdx
+ jnc L$ctr32_loop6
+
+ addq $6,%rdx
+ jz L$ctr32_done
+ movq %r11,%rcx
+ leal 1(%rax,%rax,1),%eax
+
+L$ctr32_tail:
+ por %xmm14,%xmm2
+ movups (%rdi),%xmm8
+ cmpq $2,%rdx
+ jb L$ctr32_one
+
+ por %xmm14,%xmm3
+ movups 16(%rdi),%xmm9
+ je L$ctr32_two
+
+ pshufd $192,%xmm13,%xmm5
+ por %xmm14,%xmm4
+ movups 32(%rdi),%xmm10
+ cmpq $4,%rdx
+ jb L$ctr32_three
+
+ pshufd $128,%xmm13,%xmm6
+ por %xmm14,%xmm5
+ movups 48(%rdi),%xmm11
+ je L$ctr32_four
+
+ por %xmm14,%xmm6
+ xorps %xmm7,%xmm7
+
+ call _aesni_encrypt6
+
+ movups 64(%rdi),%xmm1
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ xorps %xmm6,%xmm1
+ movups %xmm11,48(%rsi)
+ movups %xmm1,64(%rsi)
+ jmp L$ctr32_done
+
+.p2align 4
+L$ctr32_one_shortcut:
+ movups (%r8),%xmm2
+ movups (%rdi),%xmm8
+ movl 240(%rcx),%eax
+L$ctr32_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+L$oop_enc1_7:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_enc1_7
+ aesenclast %xmm1,%xmm2
+ xorps %xmm2,%xmm8
+ movups %xmm8,(%rsi)
+ jmp L$ctr32_done
+
+.p2align 4
+L$ctr32_two:
+ xorps %xmm4,%xmm4
+ call _aesni_encrypt3
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ movups %xmm9,16(%rsi)
+ jmp L$ctr32_done
+
+.p2align 4
+L$ctr32_three:
+ call _aesni_encrypt3
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ movups %xmm10,32(%rsi)
+ jmp L$ctr32_done
+
+.p2align 4
+L$ctr32_four:
+ call _aesni_encrypt4
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ movups %xmm11,48(%rsi)
+
+L$ctr32_done:
+ retq
+
+.globl _aesni_xts_encrypt
+
+.p2align 4
+_aesni_xts_encrypt:
+ leaq -104(%rsp),%rsp
+ movups (%r9),%xmm15
+ movl 240(%r8),%eax
+ movl 240(%rcx),%r10d
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm15
+L$oop_enc1_8:
+ aesenc %xmm1,%xmm15
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz L$oop_enc1_8
+ aesenclast %xmm1,%xmm15
+ movq %rcx,%r11
+ movl %r10d,%eax
+ movq %rdx,%r9
+ andq $-16,%rdx
+
+ movdqa L$xts_magic(%rip),%xmm8
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ subq $96,%rdx
+ jc L$xts_enc_short
+
+ shrl $1,%eax
+ subl $1,%eax
+ movl %eax,%r10d
+ jmp L$xts_enc_grandloop
+
+.p2align 4
+L$xts_enc_grandloop:
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu 0(%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ pxor %xmm12,%xmm4
+ movdqu 80(%rdi),%xmm7
+ leaq 96(%rdi),%rdi
+ pxor %xmm13,%xmm5
+ movups (%r11),%xmm0
+ pxor %xmm14,%xmm6
+ pxor %xmm15,%xmm7
+
+
+
+ movups 16(%r11),%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movdqa %xmm10,0(%rsp)
+ aesenc %xmm1,%xmm2
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ movdqa %xmm11,16(%rsp)
+ aesenc %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ movdqa %xmm12,32(%rsp)
+ aesenc %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ movdqa %xmm13,48(%rsp)
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ movdqa %xmm14,64(%rsp)
+ aesenc %xmm1,%xmm6
+ movdqa %xmm15,80(%rsp)
+ aesenc %xmm1,%xmm7
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ jmp L$xts_enc_loop6_enter
+
+.p2align 4
+L$xts_enc_loop6:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+L$xts_enc_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz L$xts_enc_loop6
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ paddq %xmm15,%xmm15
+ aesenc %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesenc %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenc %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ aesenc %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesenc %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenc %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups 32(%rcx),%xmm0
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ aesenc %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesenc %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenc %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ aesenclast %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesenclast %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenclast %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesenclast %xmm0,%xmm5
+ aesenclast %xmm0,%xmm6
+ aesenclast %xmm0,%xmm7
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ xorps 0(%rsp),%xmm2
+ pand %xmm8,%xmm9
+ xorps 16(%rsp),%xmm3
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+
+ xorps 32(%rsp),%xmm4
+ movups %xmm2,0(%rsi)
+ xorps 48(%rsp),%xmm5
+ movups %xmm3,16(%rsi)
+ xorps 64(%rsp),%xmm6
+ movups %xmm4,32(%rsi)
+ xorps 80(%rsp),%xmm7
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ subq $96,%rdx
+ jnc L$xts_enc_grandloop
+
+ leal 3(%rax,%rax,1),%eax
+ movq %r11,%rcx
+ movl %eax,%r10d
+
+L$xts_enc_short:
+ addq $96,%rdx
+ jz L$xts_enc_done
+
+ cmpq $32,%rdx
+ jb L$xts_enc_one
+ je L$xts_enc_two
+
+ cmpq $64,%rdx
+ jb L$xts_enc_three
+ je L$xts_enc_four
+
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ leaq 80(%rdi),%rdi
+ pxor %xmm12,%xmm4
+ pxor %xmm13,%xmm5
+ pxor %xmm14,%xmm6
+
+ call _aesni_encrypt6
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm15,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movdqu %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movdqu %xmm3,16(%rsi)
+ xorps %xmm14,%xmm6
+ movdqu %xmm4,32(%rsi)
+ movdqu %xmm5,48(%rsi)
+ movdqu %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ jmp L$xts_enc_done
+
+.p2align 4
+L$xts_enc_one:
+ movups (%rdi),%xmm2
+ leaq 16(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+L$oop_enc1_9:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_enc1_9
+ aesenclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movdqa %xmm11,%xmm10
+ movups %xmm2,(%rsi)
+ leaq 16(%rsi),%rsi
+ jmp L$xts_enc_done
+
+.p2align 4
+L$xts_enc_two:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ leaq 32(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+
+ call _aesni_encrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm12,%xmm10
+ xorps %xmm11,%xmm3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ leaq 32(%rsi),%rsi
+ jmp L$xts_enc_done
+
+.p2align 4
+L$xts_enc_three:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ leaq 48(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+
+ call _aesni_encrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm13,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ leaq 48(%rsi),%rsi
+ jmp L$xts_enc_done
+
+.p2align 4
+L$xts_enc_four:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ xorps %xmm10,%xmm2
+ movups 48(%rdi),%xmm5
+ leaq 64(%rdi),%rdi
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ xorps %xmm13,%xmm5
+
+ call _aesni_encrypt4
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm15,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ jmp L$xts_enc_done
+
+.p2align 4
+L$xts_enc_done:
+ andq $15,%r9
+ jz L$xts_enc_ret
+ movq %r9,%rdx
+
+L$xts_enc_steal:
+ movzbl (%rdi),%eax
+ movzbl -16(%rsi),%ecx
+ leaq 1(%rdi),%rdi
+ movb %al,-16(%rsi)
+ movb %cl,0(%rsi)
+ leaq 1(%rsi),%rsi
+ subq $1,%rdx
+ jnz L$xts_enc_steal
+
+ subq %r9,%rsi
+ movq %r11,%rcx
+ movl %r10d,%eax
+
+ movups -16(%rsi),%xmm2
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+L$oop_enc1_10:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_enc1_10
+ aesenclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movups %xmm2,-16(%rsi)
+
+L$xts_enc_ret:
+ leaq 104(%rsp),%rsp
+L$xts_enc_epilogue:
+ retq
+
+.globl _aesni_xts_decrypt
+
+.p2align 4
+_aesni_xts_decrypt:
+ leaq -104(%rsp),%rsp
+ movups (%r9),%xmm15
+ movl 240(%r8),%eax
+ movl 240(%rcx),%r10d
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm15
+L$oop_enc1_11:
+ aesenc %xmm1,%xmm15
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz L$oop_enc1_11
+ aesenclast %xmm1,%xmm15
+ xorl %eax,%eax
+ testq $15,%rdx
+ setnz %al
+ shlq $4,%rax
+ subq %rax,%rdx
+
+ movq %rcx,%r11
+ movl %r10d,%eax
+ movq %rdx,%r9
+ andq $-16,%rdx
+
+ movdqa L$xts_magic(%rip),%xmm8
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ subq $96,%rdx
+ jc L$xts_dec_short
+
+ shrl $1,%eax
+ subl $1,%eax
+ movl %eax,%r10d
+ jmp L$xts_dec_grandloop
+
+.p2align 4
+L$xts_dec_grandloop:
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu 0(%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ pxor %xmm12,%xmm4
+ movdqu 80(%rdi),%xmm7
+ leaq 96(%rdi),%rdi
+ pxor %xmm13,%xmm5
+ movups (%r11),%xmm0
+ pxor %xmm14,%xmm6
+ pxor %xmm15,%xmm7
+
+
+
+ movups 16(%r11),%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movdqa %xmm10,0(%rsp)
+ aesdec %xmm1,%xmm2
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ movdqa %xmm11,16(%rsp)
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ movdqa %xmm12,32(%rsp)
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ movdqa %xmm13,48(%rsp)
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ movdqa %xmm14,64(%rsp)
+ aesdec %xmm1,%xmm6
+ movdqa %xmm15,80(%rsp)
+ aesdec %xmm1,%xmm7
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ jmp L$xts_dec_loop6_enter
+
+.p2align 4
+L$xts_dec_loop6:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+L$xts_dec_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz L$xts_dec_loop6
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ paddq %xmm15,%xmm15
+ aesdec %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesdec %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdec %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ aesdec %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesdec %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdec %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ movups 32(%rcx),%xmm0
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ aesdec %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesdec %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdec %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ aesdeclast %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesdeclast %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdeclast %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesdeclast %xmm0,%xmm5
+ aesdeclast %xmm0,%xmm6
+ aesdeclast %xmm0,%xmm7
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ xorps 0(%rsp),%xmm2
+ pand %xmm8,%xmm9
+ xorps 16(%rsp),%xmm3
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+
+ xorps 32(%rsp),%xmm4
+ movups %xmm2,0(%rsi)
+ xorps 48(%rsp),%xmm5
+ movups %xmm3,16(%rsi)
+ xorps 64(%rsp),%xmm6
+ movups %xmm4,32(%rsi)
+ xorps 80(%rsp),%xmm7
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ subq $96,%rdx
+ jnc L$xts_dec_grandloop
+
+ leal 3(%rax,%rax,1),%eax
+ movq %r11,%rcx
+ movl %eax,%r10d
+
+L$xts_dec_short:
+ addq $96,%rdx
+ jz L$xts_dec_done
+
+ cmpq $32,%rdx
+ jb L$xts_dec_one
+ je L$xts_dec_two
+
+ cmpq $64,%rdx
+ jb L$xts_dec_three
+ je L$xts_dec_four
+
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ leaq 80(%rdi),%rdi
+ pxor %xmm12,%xmm4
+ pxor %xmm13,%xmm5
+ pxor %xmm14,%xmm6
+
+ call _aesni_decrypt6
+
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movdqu %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movdqu %xmm3,16(%rsi)
+ xorps %xmm14,%xmm6
+ movdqu %xmm4,32(%rsi)
+ pxor %xmm14,%xmm14
+ movdqu %xmm5,48(%rsi)
+ pcmpgtd %xmm15,%xmm14
+ movdqu %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ pshufd $19,%xmm14,%xmm11
+ andq $15,%r9
+ jz L$xts_dec_ret
+
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm11
+ pxor %xmm15,%xmm11
+ jmp L$xts_dec_done2
+
+.p2align 4
+L$xts_dec_one:
+ movups (%rdi),%xmm2
+ leaq 16(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+L$oop_dec1_12:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_dec1_12
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movdqa %xmm11,%xmm10
+ movups %xmm2,(%rsi)
+ movdqa %xmm12,%xmm11
+ leaq 16(%rsi),%rsi
+ jmp L$xts_dec_done
+
+.p2align 4
+L$xts_dec_two:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ leaq 32(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+
+ call _aesni_decrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm12,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm13,%xmm11
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ leaq 32(%rsi),%rsi
+ jmp L$xts_dec_done
+
+.p2align 4
+L$xts_dec_three:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ leaq 48(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+
+ call _aesni_decrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm13,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm15,%xmm11
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ leaq 48(%rsi),%rsi
+ jmp L$xts_dec_done
+
+.p2align 4
+L$xts_dec_four:
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movups (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movups 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movups 32(%rdi),%xmm4
+ xorps %xmm10,%xmm2
+ movups 48(%rdi),%xmm5
+ leaq 64(%rdi),%rdi
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ xorps %xmm13,%xmm5
+
+ call _aesni_decrypt4
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm14,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm15,%xmm11
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ jmp L$xts_dec_done
+
+.p2align 4
+L$xts_dec_done:
+ andq $15,%r9
+ jz L$xts_dec_ret
+L$xts_dec_done2:
+ movq %r9,%rdx
+ movq %r11,%rcx
+ movl %r10d,%eax
+
+ movups (%rdi),%xmm2
+ xorps %xmm11,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+L$oop_dec1_13:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_dec1_13
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm11,%xmm2
+ movups %xmm2,(%rsi)
+
+L$xts_dec_steal:
+ movzbl 16(%rdi),%eax
+ movzbl (%rsi),%ecx
+ leaq 1(%rdi),%rdi
+ movb %al,(%rsi)
+ movb %cl,16(%rsi)
+ leaq 1(%rsi),%rsi
+ subq $1,%rdx
+ jnz L$xts_dec_steal
+
+ subq %r9,%rsi
+ movq %r11,%rcx
+ movl %r10d,%eax
+
+ movups (%rsi),%xmm2
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+L$oop_dec1_14:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_dec1_14
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movups %xmm2,(%rsi)
+
+L$xts_dec_ret:
+ leaq 104(%rsp),%rsp
+L$xts_dec_epilogue:
+ retq
+
+.globl _aesni_cbc_encrypt
+
+.p2align 4
+_aesni_cbc_encrypt:
+ testq %rdx,%rdx
+ jz L$cbc_ret
+
+ movl 240(%rcx),%r10d
+ movq %rcx,%r11
+ testl %r9d,%r9d
+ jz L$cbc_decrypt
+
+ movups (%r8),%xmm2
+ movl %r10d,%eax
+ cmpq $16,%rdx
+ jb L$cbc_enc_tail
+ subq $16,%rdx
+ jmp L$cbc_enc_loop
+.p2align 4
+L$cbc_enc_loop:
+ movups (%rdi),%xmm3
+ leaq 16(%rdi),%rdi
+
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ xorps %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ xorps %xmm3,%xmm2
+L$oop_enc1_15:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_enc1_15
+ aesenclast %xmm1,%xmm2
+ movl %r10d,%eax
+ movq %r11,%rcx
+ movups %xmm2,0(%rsi)
+ leaq 16(%rsi),%rsi
+ subq $16,%rdx
+ jnc L$cbc_enc_loop
+ addq $16,%rdx
+ jnz L$cbc_enc_tail
+ movups %xmm2,(%r8)
+ jmp L$cbc_ret
+
+L$cbc_enc_tail:
+ movq %rdx,%rcx
+ xchgq %rdi,%rsi
+.long 0x9066A4F3
+ movl $16,%ecx
+ subq %rdx,%rcx
+ xorl %eax,%eax
+.long 0x9066AAF3
+ leaq -16(%rdi),%rdi
+ movl %r10d,%eax
+ movq %rdi,%rsi
+ movq %r11,%rcx
+ xorq %rdx,%rdx
+ jmp L$cbc_enc_loop
+
+.p2align 4
+L$cbc_decrypt:
+ movups (%r8),%xmm9
+ movl %r10d,%eax
+ cmpq $112,%rdx
+ jbe L$cbc_dec_tail
+ shrl $1,%r10d
+ subq $112,%rdx
+ movl %r10d,%eax
+ movaps %xmm9,-24(%rsp)
+ jmp L$cbc_dec_loop8_enter
+.p2align 4
+L$cbc_dec_loop8:
+ movaps %xmm0,-24(%rsp)
+ movups %xmm9,(%rsi)
+ leaq 16(%rsi),%rsi
+L$cbc_dec_loop8_enter:
+ movups (%rcx),%xmm0
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 16(%rcx),%xmm1
+
+ leaq 32(%rcx),%rcx
+ movdqu 32(%rdi),%xmm4
+ xorps %xmm0,%xmm2
+ movdqu 48(%rdi),%xmm5
+ xorps %xmm0,%xmm3
+ movdqu 64(%rdi),%xmm6
+ aesdec %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ movdqu 80(%rdi),%xmm7
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ movdqu 96(%rdi),%xmm8
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ movdqu 112(%rdi),%xmm9
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesdec %xmm1,%xmm6
+ pxor %xmm0,%xmm8
+ aesdec %xmm1,%xmm7
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+
+ call L$dec_loop8_enter
+
+ movups (%rdi),%xmm1
+ movups 16(%rdi),%xmm0
+ xorps -24(%rsp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%rdi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%rdi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%rdi),%xmm1
+ xorps %xmm0,%xmm6
+ movups 80(%rdi),%xmm0
+ xorps %xmm1,%xmm7
+ movups 96(%rdi),%xmm1
+ xorps %xmm0,%xmm8
+ movups 112(%rdi),%xmm0
+ xorps %xmm1,%xmm9
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movq %r11,%rcx
+ movups %xmm7,80(%rsi)
+ leaq 128(%rdi),%rdi
+ movups %xmm8,96(%rsi)
+ leaq 112(%rsi),%rsi
+ subq $128,%rdx
+ ja L$cbc_dec_loop8
+
+ movaps %xmm9,%xmm2
+ movaps %xmm0,%xmm9
+ addq $112,%rdx
+ jle L$cbc_dec_tail_collected
+ movups %xmm2,(%rsi)
+ leal 1(%r10,%r10,1),%eax
+ leaq 16(%rsi),%rsi
+L$cbc_dec_tail:
+ movups (%rdi),%xmm2
+ movaps %xmm2,%xmm8
+ cmpq $16,%rdx
+ jbe L$cbc_dec_one
+
+ movups 16(%rdi),%xmm3
+ movaps %xmm3,%xmm7
+ cmpq $32,%rdx
+ jbe L$cbc_dec_two
+
+ movups 32(%rdi),%xmm4
+ movaps %xmm4,%xmm6
+ cmpq $48,%rdx
+ jbe L$cbc_dec_three
+
+ movups 48(%rdi),%xmm5
+ cmpq $64,%rdx
+ jbe L$cbc_dec_four
+
+ movups 64(%rdi),%xmm6
+ cmpq $80,%rdx
+ jbe L$cbc_dec_five
+
+ movups 80(%rdi),%xmm7
+ cmpq $96,%rdx
+ jbe L$cbc_dec_six
+
+ movups 96(%rdi),%xmm8
+ movaps %xmm9,-24(%rsp)
+ call _aesni_decrypt8
+ movups (%rdi),%xmm1
+ movups 16(%rdi),%xmm0
+ xorps -24(%rsp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%rdi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%rdi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%rdi),%xmm1
+ xorps %xmm0,%xmm6
+ movups 80(%rdi),%xmm0
+ xorps %xmm1,%xmm7
+ movups 96(%rdi),%xmm9
+ xorps %xmm0,%xmm8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ movaps %xmm8,%xmm2
+ subq $112,%rdx
+ jmp L$cbc_dec_tail_collected
+.p2align 4
+L$cbc_dec_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+L$oop_dec1_16:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_dec1_16
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm9,%xmm2
+ movaps %xmm8,%xmm9
+ subq $16,%rdx
+ jmp L$cbc_dec_tail_collected
+.p2align 4
+L$cbc_dec_two:
+ xorps %xmm4,%xmm4
+ call _aesni_decrypt3
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ movaps %xmm7,%xmm9
+ movaps %xmm3,%xmm2
+ leaq 16(%rsi),%rsi
+ subq $32,%rdx
+ jmp L$cbc_dec_tail_collected
+.p2align 4
+L$cbc_dec_three:
+ call _aesni_decrypt3
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ xorps %xmm7,%xmm4
+ movups %xmm3,16(%rsi)
+ movaps %xmm6,%xmm9
+ movaps %xmm4,%xmm2
+ leaq 32(%rsi),%rsi
+ subq $48,%rdx
+ jmp L$cbc_dec_tail_collected
+.p2align 4
+L$cbc_dec_four:
+ call _aesni_decrypt4
+ xorps %xmm9,%xmm2
+ movups 48(%rdi),%xmm9
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ xorps %xmm7,%xmm4
+ movups %xmm3,16(%rsi)
+ xorps %xmm6,%xmm5
+ movups %xmm4,32(%rsi)
+ movaps %xmm5,%xmm2
+ leaq 48(%rsi),%rsi
+ subq $64,%rdx
+ jmp L$cbc_dec_tail_collected
+.p2align 4
+L$cbc_dec_five:
+ xorps %xmm7,%xmm7
+ call _aesni_decrypt6
+ movups 16(%rdi),%xmm1
+ movups 32(%rdi),%xmm0
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ xorps %xmm1,%xmm4
+ movups 48(%rdi),%xmm1
+ xorps %xmm0,%xmm5
+ movups 64(%rdi),%xmm9
+ xorps %xmm1,%xmm6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ movaps %xmm6,%xmm2
+ subq $80,%rdx
+ jmp L$cbc_dec_tail_collected
+.p2align 4
+L$cbc_dec_six:
+ call _aesni_decrypt6
+ movups 16(%rdi),%xmm1
+ movups 32(%rdi),%xmm0
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ xorps %xmm1,%xmm4
+ movups 48(%rdi),%xmm1
+ xorps %xmm0,%xmm5
+ movups 64(%rdi),%xmm0
+ xorps %xmm1,%xmm6
+ movups 80(%rdi),%xmm9
+ xorps %xmm0,%xmm7
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ movaps %xmm7,%xmm2
+ subq $96,%rdx
+ jmp L$cbc_dec_tail_collected
+.p2align 4
+L$cbc_dec_tail_collected:
+ andq $15,%rdx
+ movups %xmm9,(%r8)
+ jnz L$cbc_dec_tail_partial
+ movups %xmm2,(%rsi)
+ jmp L$cbc_dec_ret
+.p2align 4
+L$cbc_dec_tail_partial:
+ movaps %xmm2,-24(%rsp)
+ movq $16,%rcx
+ movq %rsi,%rdi
+ subq %rdx,%rcx
+ leaq -24(%rsp),%rsi
+.long 0x9066A4F3
+
+L$cbc_dec_ret:
+L$cbc_ret:
+ retq
+
+.globl _aesni_set_decrypt_key
+
+.p2align 4
+_aesni_set_decrypt_key:
+ subq $8,%rsp
+ call __aesni_set_encrypt_key
+ shll $4,%esi
+ testl %eax,%eax
+ jnz L$dec_key_ret
+ leaq 16(%rdx,%rsi,1),%rdi
+
+ movups (%rdx),%xmm0
+ movups (%rdi),%xmm1
+ movups %xmm0,(%rdi)
+ movups %xmm1,(%rdx)
+ leaq 16(%rdx),%rdx
+ leaq -16(%rdi),%rdi
+
+L$dec_key_inverse:
+ movups (%rdx),%xmm0
+ movups (%rdi),%xmm1
+ aesimc %xmm0,%xmm0
+ aesimc %xmm1,%xmm1
+ leaq 16(%rdx),%rdx
+ leaq -16(%rdi),%rdi
+ movups %xmm0,16(%rdi)
+ movups %xmm1,-16(%rdx)
+ cmpq %rdx,%rdi
+ ja L$dec_key_inverse
+
+ movups (%rdx),%xmm0
+ aesimc %xmm0,%xmm0
+ movups %xmm0,(%rdi)
+L$dec_key_ret:
+ addq $8,%rsp
+ retq
+L$SEH_end_set_decrypt_key:
+
+.globl _aesni_set_encrypt_key
+
+.p2align 4
+_aesni_set_encrypt_key:
+__aesni_set_encrypt_key:
+ subq $8,%rsp
+ movq $-1,%rax
+ testq %rdi,%rdi
+ jz L$enc_key_ret
+ testq %rdx,%rdx
+ jz L$enc_key_ret
+
+ movups (%rdi),%xmm0
+ xorps %xmm4,%xmm4
+ leaq 16(%rdx),%rax
+ cmpl $256,%esi
+ je L$14rounds
+ cmpl $192,%esi
+ je L$12rounds
+ cmpl $128,%esi
+ jne L$bad_keybits
+
+L$10rounds:
+ movl $9,%esi
+ movups %xmm0,(%rdx)
+ aeskeygenassist $1,%xmm0,%xmm1
+ call L$key_expansion_128_cold
+ aeskeygenassist $2,%xmm0,%xmm1
+ call L$key_expansion_128
+ aeskeygenassist $4,%xmm0,%xmm1
+ call L$key_expansion_128
+ aeskeygenassist $8,%xmm0,%xmm1
+ call L$key_expansion_128
+ aeskeygenassist $16,%xmm0,%xmm1
+ call L$key_expansion_128
+ aeskeygenassist $32,%xmm0,%xmm1
+ call L$key_expansion_128
+ aeskeygenassist $64,%xmm0,%xmm1
+ call L$key_expansion_128
+ aeskeygenassist $128,%xmm0,%xmm1
+ call L$key_expansion_128
+ aeskeygenassist $27,%xmm0,%xmm1
+ call L$key_expansion_128
+ aeskeygenassist $54,%xmm0,%xmm1
+ call L$key_expansion_128
+ movups %xmm0,(%rax)
+ movl %esi,80(%rax)
+ xorl %eax,%eax
+ jmp L$enc_key_ret
+
+.p2align 4
+L$12rounds:
+ movq 16(%rdi),%xmm2
+ movl $11,%esi
+ movups %xmm0,(%rdx)
+ aeskeygenassist $1,%xmm2,%xmm1
+ call L$key_expansion_192a_cold
+ aeskeygenassist $2,%xmm2,%xmm1
+ call L$key_expansion_192b
+ aeskeygenassist $4,%xmm2,%xmm1
+ call L$key_expansion_192a
+ aeskeygenassist $8,%xmm2,%xmm1
+ call L$key_expansion_192b
+ aeskeygenassist $16,%xmm2,%xmm1
+ call L$key_expansion_192a
+ aeskeygenassist $32,%xmm2,%xmm1
+ call L$key_expansion_192b
+ aeskeygenassist $64,%xmm2,%xmm1
+ call L$key_expansion_192a
+ aeskeygenassist $128,%xmm2,%xmm1
+ call L$key_expansion_192b
+ movups %xmm0,(%rax)
+ movl %esi,48(%rax)
+ xorq %rax,%rax
+ jmp L$enc_key_ret
+
+.p2align 4
+L$14rounds:
+ movups 16(%rdi),%xmm2
+ movl $13,%esi
+ leaq 16(%rax),%rax
+ movups %xmm0,(%rdx)
+ movups %xmm2,16(%rdx)
+ aeskeygenassist $1,%xmm2,%xmm1
+ call L$key_expansion_256a_cold
+ aeskeygenassist $1,%xmm0,%xmm1
+ call L$key_expansion_256b
+ aeskeygenassist $2,%xmm2,%xmm1
+ call L$key_expansion_256a
+ aeskeygenassist $2,%xmm0,%xmm1
+ call L$key_expansion_256b
+ aeskeygenassist $4,%xmm2,%xmm1
+ call L$key_expansion_256a
+ aeskeygenassist $4,%xmm0,%xmm1
+ call L$key_expansion_256b
+ aeskeygenassist $8,%xmm2,%xmm1
+ call L$key_expansion_256a
+ aeskeygenassist $8,%xmm0,%xmm1
+ call L$key_expansion_256b
+ aeskeygenassist $16,%xmm2,%xmm1
+ call L$key_expansion_256a
+ aeskeygenassist $16,%xmm0,%xmm1
+ call L$key_expansion_256b
+ aeskeygenassist $32,%xmm2,%xmm1
+ call L$key_expansion_256a
+ aeskeygenassist $32,%xmm0,%xmm1
+ call L$key_expansion_256b
+ aeskeygenassist $64,%xmm2,%xmm1
+ call L$key_expansion_256a
+ movups %xmm0,(%rax)
+ movl %esi,16(%rax)
+ xorq %rax,%rax
+ jmp L$enc_key_ret
+
+.p2align 4
+L$bad_keybits:
+ movq $-2,%rax
+L$enc_key_ret:
+ addq $8,%rsp
+ retq
+L$SEH_end_set_encrypt_key:
+
+.p2align 4
+L$key_expansion_128:
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+L$key_expansion_128_cold:
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ retq
+
+.p2align 4
+L$key_expansion_192a:
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+L$key_expansion_192a_cold:
+ movaps %xmm2,%xmm5
+L$key_expansion_192b_warm:
+ shufps $16,%xmm0,%xmm4
+ movdqa %xmm2,%xmm3
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ pslldq $4,%xmm3
+ xorps %xmm4,%xmm0
+ pshufd $85,%xmm1,%xmm1
+ pxor %xmm3,%xmm2
+ pxor %xmm1,%xmm0
+ pshufd $255,%xmm0,%xmm3
+ pxor %xmm3,%xmm2
+ retq
+
+.p2align 4
+L$key_expansion_192b:
+ movaps %xmm0,%xmm3
+ shufps $68,%xmm0,%xmm5
+ movups %xmm5,(%rax)
+ shufps $78,%xmm2,%xmm3
+ movups %xmm3,16(%rax)
+ leaq 32(%rax),%rax
+ jmp L$key_expansion_192b_warm
+
+.p2align 4
+L$key_expansion_256a:
+ movups %xmm2,(%rax)
+ leaq 16(%rax),%rax
+L$key_expansion_256a_cold:
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ retq
+
+.p2align 4
+L$key_expansion_256b:
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+
+ shufps $16,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $140,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $170,%xmm1,%xmm1
+ xorps %xmm1,%xmm2
+ retq
+
+
+.p2align 6
+L$bswap_mask:
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+L$increment32:
+.long 6,6,6,0
+L$increment64:
+.long 1,0,0,0
+L$xts_magic:
+.long 0x87,0,1,0
+
+.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align 6
diff --git a/ext/libressl/crypto/aes/aesni-masm-x86_64.S b/ext/libressl/crypto/aes/aesni-masm-x86_64.S
new file mode 100644
index 0000000..f2a2490
--- /dev/null
+++ b/ext/libressl/crypto/aes/aesni-masm-x86_64.S
@@ -0,0 +1,3099 @@
+; 1 "crypto/aes/aesni-masm-x86_64.S.tmp"
+; 1 "<built-in>" 1
+; 1 "<built-in>" 3
+; 340 "<built-in>" 3
+; 1 "<command line>" 1
+; 1 "<built-in>" 2
+; 1 "crypto/aes/aesni-masm-x86_64.S.tmp" 2
+OPTION DOTNAME
+
+; 1 "./crypto/x86_arch.h" 1
+
+
+; 16 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+; 40 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+; 3 "crypto/aes/aesni-masm-x86_64.S.tmp" 2
+.text$ SEGMENT ALIGN(64) 'CODE'
+PUBLIC aesni_encrypt
+
+ALIGN 16
+aesni_encrypt PROC PUBLIC
+ movups xmm2,XMMWORD PTR[rcx]
+ mov eax,DWORD PTR[240+r8]
+ movups xmm0,XMMWORD PTR[r8]
+ movups xmm1,XMMWORD PTR[16+r8]
+ lea r8,QWORD PTR[32+r8]
+ xorps xmm2,xmm0
+$L$oop_enc1_1::
+ aesenc xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[r8]
+ lea r8,QWORD PTR[16+r8]
+ jnz $L$oop_enc1_1
+ aesenclast xmm2,xmm1
+ movups XMMWORD PTR[rdx],xmm2
+ DB 0F3h,0C3h ;repret
+aesni_encrypt ENDP
+
+PUBLIC aesni_decrypt
+
+ALIGN 16
+aesni_decrypt PROC PUBLIC
+ movups xmm2,XMMWORD PTR[rcx]
+ mov eax,DWORD PTR[240+r8]
+ movups xmm0,XMMWORD PTR[r8]
+ movups xmm1,XMMWORD PTR[16+r8]
+ lea r8,QWORD PTR[32+r8]
+ xorps xmm2,xmm0
+$L$oop_dec1_2::
+ aesdec xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[r8]
+ lea r8,QWORD PTR[16+r8]
+ jnz $L$oop_dec1_2
+ aesdeclast xmm2,xmm1
+ movups XMMWORD PTR[rdx],xmm2
+ DB 0F3h,0C3h ;repret
+aesni_decrypt ENDP
+
+ALIGN 16
+_aesni_encrypt3 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shr eax,1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ xorps xmm4,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+
+$L$enc_loop3::
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ dec eax
+ aesenc xmm4,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesenc xmm2,xmm0
+ aesenc xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesenc xmm4,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$enc_loop3
+
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ aesenc xmm4,xmm1
+ aesenclast xmm2,xmm0
+ aesenclast xmm3,xmm0
+ aesenclast xmm4,xmm0
+ DB 0F3h,0C3h ;repret
+_aesni_encrypt3 ENDP
+
+ALIGN 16
+_aesni_decrypt3 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shr eax,1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ xorps xmm4,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+
+$L$dec_loop3::
+ aesdec xmm2,xmm1
+ aesdec xmm3,xmm1
+ dec eax
+ aesdec xmm4,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesdec xmm2,xmm0
+ aesdec xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesdec xmm4,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$dec_loop3
+
+ aesdec xmm2,xmm1
+ aesdec xmm3,xmm1
+ aesdec xmm4,xmm1
+ aesdeclast xmm2,xmm0
+ aesdeclast xmm3,xmm0
+ aesdeclast xmm4,xmm0
+ DB 0F3h,0C3h ;repret
+_aesni_decrypt3 ENDP
+
+ALIGN 16
+_aesni_encrypt4 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shr eax,1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ xorps xmm4,xmm0
+ xorps xmm5,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+
+$L$enc_loop4::
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ dec eax
+ aesenc xmm4,xmm1
+ aesenc xmm5,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesenc xmm2,xmm0
+ aesenc xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesenc xmm4,xmm0
+ aesenc xmm5,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$enc_loop4
+
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ aesenc xmm4,xmm1
+ aesenc xmm5,xmm1
+ aesenclast xmm2,xmm0
+ aesenclast xmm3,xmm0
+ aesenclast xmm4,xmm0
+ aesenclast xmm5,xmm0
+ DB 0F3h,0C3h ;repret
+_aesni_encrypt4 ENDP
+
+ALIGN 16
+_aesni_decrypt4 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shr eax,1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ xorps xmm4,xmm0
+ xorps xmm5,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+
+$L$dec_loop4::
+ aesdec xmm2,xmm1
+ aesdec xmm3,xmm1
+ dec eax
+ aesdec xmm4,xmm1
+ aesdec xmm5,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesdec xmm2,xmm0
+ aesdec xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesdec xmm4,xmm0
+ aesdec xmm5,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$dec_loop4
+
+ aesdec xmm2,xmm1
+ aesdec xmm3,xmm1
+ aesdec xmm4,xmm1
+ aesdec xmm5,xmm1
+ aesdeclast xmm2,xmm0
+ aesdeclast xmm3,xmm0
+ aesdeclast xmm4,xmm0
+ aesdeclast xmm5,xmm0
+ DB 0F3h,0C3h ;repret
+_aesni_decrypt4 ENDP
+
+ALIGN 16
+_aesni_encrypt6 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shr eax,1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ pxor xmm3,xmm0
+ aesenc xmm2,xmm1
+ pxor xmm4,xmm0
+ aesenc xmm3,xmm1
+ pxor xmm5,xmm0
+ aesenc xmm4,xmm1
+ pxor xmm6,xmm0
+ aesenc xmm5,xmm1
+ pxor xmm7,xmm0
+ dec eax
+ aesenc xmm6,xmm1
+ movups xmm0,XMMWORD PTR[rcx]
+ aesenc xmm7,xmm1
+ jmp $L$enc_loop6_enter
+ALIGN 16
+$L$enc_loop6::
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ dec eax
+ aesenc xmm4,xmm1
+ aesenc xmm5,xmm1
+ aesenc xmm6,xmm1
+ aesenc xmm7,xmm1
+$L$enc_loop6_enter::
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesenc xmm2,xmm0
+ aesenc xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesenc xmm4,xmm0
+ aesenc xmm5,xmm0
+ aesenc xmm6,xmm0
+ aesenc xmm7,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$enc_loop6
+
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ aesenc xmm4,xmm1
+ aesenc xmm5,xmm1
+ aesenc xmm6,xmm1
+ aesenc xmm7,xmm1
+ aesenclast xmm2,xmm0
+ aesenclast xmm3,xmm0
+ aesenclast xmm4,xmm0
+ aesenclast xmm5,xmm0
+ aesenclast xmm6,xmm0
+ aesenclast xmm7,xmm0
+ DB 0F3h,0C3h ;repret
+_aesni_encrypt6 ENDP
+
+ALIGN 16
+_aesni_decrypt6 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shr eax,1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ pxor xmm3,xmm0
+ aesdec xmm2,xmm1
+ pxor xmm4,xmm0
+ aesdec xmm3,xmm1
+ pxor xmm5,xmm0
+ aesdec xmm4,xmm1
+ pxor xmm6,xmm0
+ aesdec xmm5,xmm1
+ pxor xmm7,xmm0
+ dec eax
+ aesdec xmm6,xmm1
+ movups xmm0,XMMWORD PTR[rcx]
+ aesdec xmm7,xmm1
+ jmp $L$dec_loop6_enter
+ALIGN 16
+$L$dec_loop6::
+ aesdec xmm2,xmm1
+ aesdec xmm3,xmm1
+ dec eax
+ aesdec xmm4,xmm1
+ aesdec xmm5,xmm1
+ aesdec xmm6,xmm1
+ aesdec xmm7,xmm1
+$L$dec_loop6_enter::
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesdec xmm2,xmm0
+ aesdec xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesdec xmm4,xmm0
+ aesdec xmm5,xmm0
+ aesdec xmm6,xmm0
+ aesdec xmm7,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$dec_loop6
+
+ aesdec xmm2,xmm1
+ aesdec xmm3,xmm1
+ aesdec xmm4,xmm1
+ aesdec xmm5,xmm1
+ aesdec xmm6,xmm1
+ aesdec xmm7,xmm1
+ aesdeclast xmm2,xmm0
+ aesdeclast xmm3,xmm0
+ aesdeclast xmm4,xmm0
+ aesdeclast xmm5,xmm0
+ aesdeclast xmm6,xmm0
+ aesdeclast xmm7,xmm0
+ DB 0F3h,0C3h ;repret
+_aesni_decrypt6 ENDP
+
+ALIGN 16
+_aesni_encrypt8 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shr eax,1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ aesenc xmm2,xmm1
+ pxor xmm4,xmm0
+ aesenc xmm3,xmm1
+ pxor xmm5,xmm0
+ aesenc xmm4,xmm1
+ pxor xmm6,xmm0
+ aesenc xmm5,xmm1
+ pxor xmm7,xmm0
+ dec eax
+ aesenc xmm6,xmm1
+ pxor xmm8,xmm0
+ aesenc xmm7,xmm1
+ pxor xmm9,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ aesenc xmm8,xmm1
+ aesenc xmm9,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ jmp $L$enc_loop8_enter
+ALIGN 16
+$L$enc_loop8::
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ dec eax
+ aesenc xmm4,xmm1
+ aesenc xmm5,xmm1
+ aesenc xmm6,xmm1
+ aesenc xmm7,xmm1
+ aesenc xmm8,xmm1
+ aesenc xmm9,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+$L$enc_loop8_enter::
+ aesenc xmm2,xmm0
+ aesenc xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesenc xmm4,xmm0
+ aesenc xmm5,xmm0
+ aesenc xmm6,xmm0
+ aesenc xmm7,xmm0
+ aesenc xmm8,xmm0
+ aesenc xmm9,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$enc_loop8
+
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ aesenc xmm4,xmm1
+ aesenc xmm5,xmm1
+ aesenc xmm6,xmm1
+ aesenc xmm7,xmm1
+ aesenc xmm8,xmm1
+ aesenc xmm9,xmm1
+ aesenclast xmm2,xmm0
+ aesenclast xmm3,xmm0
+ aesenclast xmm4,xmm0
+ aesenclast xmm5,xmm0
+ aesenclast xmm6,xmm0
+ aesenclast xmm7,xmm0
+ aesenclast xmm8,xmm0
+ aesenclast xmm9,xmm0
+ DB 0F3h,0C3h ;repret
+_aesni_encrypt8 ENDP
+
+ALIGN 16
+_aesni_decrypt8 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shr eax,1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ aesdec xmm2,xmm1
+ pxor xmm4,xmm0
+ aesdec xmm3,xmm1
+ pxor xmm5,xmm0
+ aesdec xmm4,xmm1
+ pxor xmm6,xmm0
+ aesdec xmm5,xmm1
+ pxor xmm7,xmm0
+ dec eax
+ aesdec xmm6,xmm1
+ pxor xmm8,xmm0
+ aesdec xmm7,xmm1
+ pxor xmm9,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ aesdec xmm8,xmm1
+ aesdec xmm9,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ jmp $L$dec_loop8_enter
+ALIGN 16
+$L$dec_loop8::
+ aesdec xmm2,xmm1
+ aesdec xmm3,xmm1
+ dec eax
+ aesdec xmm4,xmm1
+ aesdec xmm5,xmm1
+ aesdec xmm6,xmm1
+ aesdec xmm7,xmm1
+ aesdec xmm8,xmm1
+ aesdec xmm9,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+$L$dec_loop8_enter::
+ aesdec xmm2,xmm0
+ aesdec xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesdec xmm4,xmm0
+ aesdec xmm5,xmm0
+ aesdec xmm6,xmm0
+ aesdec xmm7,xmm0
+ aesdec xmm8,xmm0
+ aesdec xmm9,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$dec_loop8
+
+ aesdec xmm2,xmm1
+ aesdec xmm3,xmm1
+ aesdec xmm4,xmm1
+ aesdec xmm5,xmm1
+ aesdec xmm6,xmm1
+ aesdec xmm7,xmm1
+ aesdec xmm8,xmm1
+ aesdec xmm9,xmm1
+ aesdeclast xmm2,xmm0
+ aesdeclast xmm3,xmm0
+ aesdeclast xmm4,xmm0
+ aesdeclast xmm5,xmm0
+ aesdeclast xmm6,xmm0
+ aesdeclast xmm7,xmm0
+ aesdeclast xmm8,xmm0
+ aesdeclast xmm9,xmm0
+ DB 0F3h,0C3h ;repret
+_aesni_decrypt8 ENDP
+PUBLIC aesni_ecb_encrypt
+
+ALIGN 16
+aesni_ecb_encrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_ecb_encrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+
+
+ and rdx,-16
+ jz $L$ecb_ret
+
+ mov eax,DWORD PTR[240+rcx]
+ movups xmm0,XMMWORD PTR[rcx]
+ mov r11,rcx
+ mov r10d,eax
+ test r8d,r8d
+ jz $L$ecb_decrypt
+
+ cmp rdx,080h
+ jb $L$ecb_enc_tail
+
+ movdqu xmm2,XMMWORD PTR[rdi]
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ movdqu xmm8,XMMWORD PTR[96+rdi]
+ movdqu xmm9,XMMWORD PTR[112+rdi]
+ lea rdi,QWORD PTR[128+rdi]
+ sub rdx,080h
+ jmp $L$ecb_enc_loop8_enter
+ALIGN 16
+$L$ecb_enc_loop8::
+ movups XMMWORD PTR[rsi],xmm2
+ mov rcx,r11
+ movdqu xmm2,XMMWORD PTR[rdi]
+ mov eax,r10d
+ movups XMMWORD PTR[16+rsi],xmm3
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ movups XMMWORD PTR[32+rsi],xmm4
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ movups XMMWORD PTR[48+rsi],xmm5
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ movups XMMWORD PTR[64+rsi],xmm6
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ movups XMMWORD PTR[80+rsi],xmm7
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ movups XMMWORD PTR[96+rsi],xmm8
+ movdqu xmm8,XMMWORD PTR[96+rdi]
+ movups XMMWORD PTR[112+rsi],xmm9
+ lea rsi,QWORD PTR[128+rsi]
+ movdqu xmm9,XMMWORD PTR[112+rdi]
+ lea rdi,QWORD PTR[128+rdi]
+$L$ecb_enc_loop8_enter::
+
+ call _aesni_encrypt8
+
+ sub rdx,080h
+ jnc $L$ecb_enc_loop8
+
+ movups XMMWORD PTR[rsi],xmm2
+ mov rcx,r11
+ movups XMMWORD PTR[16+rsi],xmm3
+ mov eax,r10d
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ movups XMMWORD PTR[96+rsi],xmm8
+ movups XMMWORD PTR[112+rsi],xmm9
+ lea rsi,QWORD PTR[128+rsi]
+ add rdx,080h
+ jz $L$ecb_ret
+
+$L$ecb_enc_tail::
+ movups xmm2,XMMWORD PTR[rdi]
+ cmp rdx,020h
+ jb $L$ecb_enc_one
+ movups xmm3,XMMWORD PTR[16+rdi]
+ je $L$ecb_enc_two
+ movups xmm4,XMMWORD PTR[32+rdi]
+ cmp rdx,040h
+ jb $L$ecb_enc_three
+ movups xmm5,XMMWORD PTR[48+rdi]
+ je $L$ecb_enc_four
+ movups xmm6,XMMWORD PTR[64+rdi]
+ cmp rdx,060h
+ jb $L$ecb_enc_five
+ movups xmm7,XMMWORD PTR[80+rdi]
+ je $L$ecb_enc_six
+ movdqu xmm8,XMMWORD PTR[96+rdi]
+ call _aesni_encrypt8
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ movups XMMWORD PTR[96+rsi],xmm8
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_enc_one::
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_enc1_3::
+ aesenc xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_enc1_3
+ aesenclast xmm2,xmm1
+ movups XMMWORD PTR[rsi],xmm2
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_enc_two::
+ xorps xmm4,xmm4
+ call _aesni_encrypt3
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_enc_three::
+ call _aesni_encrypt3
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_enc_four::
+ call _aesni_encrypt4
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_enc_five::
+ xorps xmm7,xmm7
+ call _aesni_encrypt6
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_enc_six::
+ call _aesni_encrypt6
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ jmp $L$ecb_ret
+
+ALIGN 16
+$L$ecb_decrypt::
+ cmp rdx,080h
+ jb $L$ecb_dec_tail
+
+ movdqu xmm2,XMMWORD PTR[rdi]
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ movdqu xmm8,XMMWORD PTR[96+rdi]
+ movdqu xmm9,XMMWORD PTR[112+rdi]
+ lea rdi,QWORD PTR[128+rdi]
+ sub rdx,080h
+ jmp $L$ecb_dec_loop8_enter
+ALIGN 16
+$L$ecb_dec_loop8::
+ movups XMMWORD PTR[rsi],xmm2
+ mov rcx,r11
+ movdqu xmm2,XMMWORD PTR[rdi]
+ mov eax,r10d
+ movups XMMWORD PTR[16+rsi],xmm3
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ movups XMMWORD PTR[32+rsi],xmm4
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ movups XMMWORD PTR[48+rsi],xmm5
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ movups XMMWORD PTR[64+rsi],xmm6
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ movups XMMWORD PTR[80+rsi],xmm7
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ movups XMMWORD PTR[96+rsi],xmm8
+ movdqu xmm8,XMMWORD PTR[96+rdi]
+ movups XMMWORD PTR[112+rsi],xmm9
+ lea rsi,QWORD PTR[128+rsi]
+ movdqu xmm9,XMMWORD PTR[112+rdi]
+ lea rdi,QWORD PTR[128+rdi]
+$L$ecb_dec_loop8_enter::
+
+ call _aesni_decrypt8
+
+ movups xmm0,XMMWORD PTR[r11]
+ sub rdx,080h
+ jnc $L$ecb_dec_loop8
+
+ movups XMMWORD PTR[rsi],xmm2
+ mov rcx,r11
+ movups XMMWORD PTR[16+rsi],xmm3
+ mov eax,r10d
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ movups XMMWORD PTR[96+rsi],xmm8
+ movups XMMWORD PTR[112+rsi],xmm9
+ lea rsi,QWORD PTR[128+rsi]
+ add rdx,080h
+ jz $L$ecb_ret
+
+$L$ecb_dec_tail::
+ movups xmm2,XMMWORD PTR[rdi]
+ cmp rdx,020h
+ jb $L$ecb_dec_one
+ movups xmm3,XMMWORD PTR[16+rdi]
+ je $L$ecb_dec_two
+ movups xmm4,XMMWORD PTR[32+rdi]
+ cmp rdx,040h
+ jb $L$ecb_dec_three
+ movups xmm5,XMMWORD PTR[48+rdi]
+ je $L$ecb_dec_four
+ movups xmm6,XMMWORD PTR[64+rdi]
+ cmp rdx,060h
+ jb $L$ecb_dec_five
+ movups xmm7,XMMWORD PTR[80+rdi]
+ je $L$ecb_dec_six
+ movups xmm8,XMMWORD PTR[96+rdi]
+ movups xmm0,XMMWORD PTR[rcx]
+ call _aesni_decrypt8
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ movups XMMWORD PTR[96+rsi],xmm8
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_dec_one::
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_dec1_4::
+ aesdec xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_dec1_4
+ aesdeclast xmm2,xmm1
+ movups XMMWORD PTR[rsi],xmm2
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_dec_two::
+ xorps xmm4,xmm4
+ call _aesni_decrypt3
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_dec_three::
+ call _aesni_decrypt3
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_dec_four::
+ call _aesni_decrypt4
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_dec_five::
+ xorps xmm7,xmm7
+ call _aesni_decrypt6
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ jmp $L$ecb_ret
+ALIGN 16
+$L$ecb_dec_six::
+ call _aesni_decrypt6
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+
+$L$ecb_ret::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_ecb_encrypt::
+aesni_ecb_encrypt ENDP
+PUBLIC aesni_ccm64_encrypt_blocks
+
+ALIGN 16
+aesni_ccm64_encrypt_blocks PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_ccm64_encrypt_blocks::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ lea rsp,QWORD PTR[((-88))+rsp]
+ movaps XMMWORD PTR[rsp],xmm6
+ movaps XMMWORD PTR[16+rsp],xmm7
+ movaps XMMWORD PTR[32+rsp],xmm8
+ movaps XMMWORD PTR[48+rsp],xmm9
+$L$ccm64_enc_body::
+ mov eax,DWORD PTR[240+rcx]
+ movdqu xmm9,XMMWORD PTR[r8]
+ movdqa xmm6,XMMWORD PTR[$L$increment64]
+ movdqa xmm7,XMMWORD PTR[$L$bswap_mask]
+
+ shr eax,1
+ lea r11,QWORD PTR[rcx]
+ movdqu xmm3,XMMWORD PTR[r9]
+ movdqa xmm2,xmm9
+ mov r10d,eax
+DB 102,68,15,56,0,207
+ jmp $L$ccm64_enc_outer
+ALIGN 16
+$L$ccm64_enc_outer::
+ movups xmm0,XMMWORD PTR[r11]
+ mov eax,r10d
+ movups xmm8,XMMWORD PTR[rdi]
+
+ xorps xmm2,xmm0
+ movups xmm1,XMMWORD PTR[16+r11]
+ xorps xmm0,xmm8
+ lea rcx,QWORD PTR[32+r11]
+ xorps xmm3,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+
+$L$ccm64_enc2_loop::
+ aesenc xmm2,xmm1
+ dec eax
+ aesenc xmm3,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesenc xmm2,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesenc xmm3,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$ccm64_enc2_loop
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ paddq xmm9,xmm6
+ aesenclast xmm2,xmm0
+ aesenclast xmm3,xmm0
+
+ dec rdx
+ lea rdi,QWORD PTR[16+rdi]
+ xorps xmm8,xmm2
+ movdqa xmm2,xmm9
+ movups XMMWORD PTR[rsi],xmm8
+ lea rsi,QWORD PTR[16+rsi]
+DB 102,15,56,0,215
+ jnz $L$ccm64_enc_outer
+
+ movups XMMWORD PTR[r9],xmm3
+ movaps xmm6,XMMWORD PTR[rsp]
+ movaps xmm7,XMMWORD PTR[16+rsp]
+ movaps xmm8,XMMWORD PTR[32+rsp]
+ movaps xmm9,XMMWORD PTR[48+rsp]
+ lea rsp,QWORD PTR[88+rsp]
+$L$ccm64_enc_ret::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_ccm64_encrypt_blocks::
+aesni_ccm64_encrypt_blocks ENDP
+PUBLIC aesni_ccm64_decrypt_blocks
+
+ALIGN 16
+aesni_ccm64_decrypt_blocks PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_ccm64_decrypt_blocks::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ lea rsp,QWORD PTR[((-88))+rsp]
+ movaps XMMWORD PTR[rsp],xmm6
+ movaps XMMWORD PTR[16+rsp],xmm7
+ movaps XMMWORD PTR[32+rsp],xmm8
+ movaps XMMWORD PTR[48+rsp],xmm9
+$L$ccm64_dec_body::
+ mov eax,DWORD PTR[240+rcx]
+ movups xmm9,XMMWORD PTR[r8]
+ movdqu xmm3,XMMWORD PTR[r9]
+ movdqa xmm6,XMMWORD PTR[$L$increment64]
+ movdqa xmm7,XMMWORD PTR[$L$bswap_mask]
+
+ movaps xmm2,xmm9
+ mov r10d,eax
+ mov r11,rcx
+DB 102,68,15,56,0,207
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_enc1_5::
+ aesenc xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_enc1_5
+ aesenclast xmm2,xmm1
+ movups xmm8,XMMWORD PTR[rdi]
+ paddq xmm9,xmm6
+ lea rdi,QWORD PTR[16+rdi]
+ jmp $L$ccm64_dec_outer
+ALIGN 16
+$L$ccm64_dec_outer::
+ xorps xmm8,xmm2
+ movdqa xmm2,xmm9
+ mov eax,r10d
+ movups XMMWORD PTR[rsi],xmm8
+ lea rsi,QWORD PTR[16+rsi]
+DB 102,15,56,0,215
+
+ sub rdx,1
+ jz $L$ccm64_dec_break
+
+ movups xmm0,XMMWORD PTR[r11]
+ shr eax,1
+ movups xmm1,XMMWORD PTR[16+r11]
+ xorps xmm8,xmm0
+ lea rcx,QWORD PTR[32+r11]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm8
+ movups xmm0,XMMWORD PTR[rcx]
+
+$L$ccm64_dec2_loop::
+ aesenc xmm2,xmm1
+ dec eax
+ aesenc xmm3,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesenc xmm2,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesenc xmm3,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$ccm64_dec2_loop
+ movups xmm8,XMMWORD PTR[rdi]
+ paddq xmm9,xmm6
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ lea rdi,QWORD PTR[16+rdi]
+ aesenclast xmm2,xmm0
+ aesenclast xmm3,xmm0
+ jmp $L$ccm64_dec_outer
+
+ALIGN 16
+$L$ccm64_dec_break::
+
+ movups xmm0,XMMWORD PTR[r11]
+ movups xmm1,XMMWORD PTR[16+r11]
+ xorps xmm8,xmm0
+ lea r11,QWORD PTR[32+r11]
+ xorps xmm3,xmm8
+$L$oop_enc1_6::
+ aesenc xmm3,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[r11]
+ lea r11,QWORD PTR[16+r11]
+ jnz $L$oop_enc1_6
+ aesenclast xmm3,xmm1
+ movups XMMWORD PTR[r9],xmm3
+ movaps xmm6,XMMWORD PTR[rsp]
+ movaps xmm7,XMMWORD PTR[16+rsp]
+ movaps xmm8,XMMWORD PTR[32+rsp]
+ movaps xmm9,XMMWORD PTR[48+rsp]
+ lea rsp,QWORD PTR[88+rsp]
+$L$ccm64_dec_ret::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_ccm64_decrypt_blocks::
+aesni_ccm64_decrypt_blocks ENDP
+PUBLIC aesni_ctr32_encrypt_blocks
+
+ALIGN 16
+aesni_ctr32_encrypt_blocks PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_ctr32_encrypt_blocks::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+
+
+ lea rsp,QWORD PTR[((-200))+rsp]
+ movaps XMMWORD PTR[32+rsp],xmm6
+ movaps XMMWORD PTR[48+rsp],xmm7
+ movaps XMMWORD PTR[64+rsp],xmm8
+ movaps XMMWORD PTR[80+rsp],xmm9
+ movaps XMMWORD PTR[96+rsp],xmm10
+ movaps XMMWORD PTR[112+rsp],xmm11
+ movaps XMMWORD PTR[128+rsp],xmm12
+ movaps XMMWORD PTR[144+rsp],xmm13
+ movaps XMMWORD PTR[160+rsp],xmm14
+ movaps XMMWORD PTR[176+rsp],xmm15
+$L$ctr32_body::
+ cmp rdx,1
+ je $L$ctr32_one_shortcut
+
+ movdqu xmm14,XMMWORD PTR[r8]
+ movdqa xmm15,XMMWORD PTR[$L$bswap_mask]
+ xor eax,eax
+DB 102,69,15,58,22,242,3
+DB 102,68,15,58,34,240,3
+
+ mov eax,DWORD PTR[240+rcx]
+ bswap r10d
+ pxor xmm12,xmm12
+ pxor xmm13,xmm13
+DB 102,69,15,58,34,226,0
+ lea r11,QWORD PTR[3+r10]
+DB 102,69,15,58,34,235,0
+ inc r10d
+DB 102,69,15,58,34,226,1
+ inc r11
+DB 102,69,15,58,34,235,1
+ inc r10d
+DB 102,69,15,58,34,226,2
+ inc r11
+DB 102,69,15,58,34,235,2
+ movdqa XMMWORD PTR[rsp],xmm12
+DB 102,69,15,56,0,231
+ movdqa XMMWORD PTR[16+rsp],xmm13
+DB 102,69,15,56,0,239
+
+ pshufd xmm2,xmm12,192
+ pshufd xmm3,xmm12,128
+ pshufd xmm4,xmm12,64
+ cmp rdx,6
+ jb $L$ctr32_tail
+ shr eax,1
+ mov r11,rcx
+ mov r10d,eax
+ sub rdx,6
+ jmp $L$ctr32_loop6
+
+ALIGN 16
+$L$ctr32_loop6::
+ pshufd xmm5,xmm13,192
+ por xmm2,xmm14
+ movups xmm0,XMMWORD PTR[r11]
+ pshufd xmm6,xmm13,128
+ por xmm3,xmm14
+ movups xmm1,XMMWORD PTR[16+r11]
+ pshufd xmm7,xmm13,64
+ por xmm4,xmm14
+ por xmm5,xmm14
+ xorps xmm2,xmm0
+ por xmm6,xmm14
+ por xmm7,xmm14
+
+
+
+
+ pxor xmm3,xmm0
+ aesenc xmm2,xmm1
+ lea rcx,QWORD PTR[32+r11]
+ pxor xmm4,xmm0
+ aesenc xmm3,xmm1
+ movdqa xmm13,XMMWORD PTR[$L$increment32]
+ pxor xmm5,xmm0
+ aesenc xmm4,xmm1
+ movdqa xmm12,XMMWORD PTR[rsp]
+ pxor xmm6,xmm0
+ aesenc xmm5,xmm1
+ pxor xmm7,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ dec eax
+ aesenc xmm6,xmm1
+ aesenc xmm7,xmm1
+ jmp $L$ctr32_enc_loop6_enter
+ALIGN 16
+$L$ctr32_enc_loop6::
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ dec eax
+ aesenc xmm4,xmm1
+ aesenc xmm5,xmm1
+ aesenc xmm6,xmm1
+ aesenc xmm7,xmm1
+$L$ctr32_enc_loop6_enter::
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesenc xmm2,xmm0
+ aesenc xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesenc xmm4,xmm0
+ aesenc xmm5,xmm0
+ aesenc xmm6,xmm0
+ aesenc xmm7,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$ctr32_enc_loop6
+
+ aesenc xmm2,xmm1
+ paddd xmm12,xmm13
+ aesenc xmm3,xmm1
+ paddd xmm13,XMMWORD PTR[16+rsp]
+ aesenc xmm4,xmm1
+ movdqa XMMWORD PTR[rsp],xmm12
+ aesenc xmm5,xmm1
+ movdqa XMMWORD PTR[16+rsp],xmm13
+ aesenc xmm6,xmm1
+DB 102,69,15,56,0,231
+ aesenc xmm7,xmm1
+DB 102,69,15,56,0,239
+
+ aesenclast xmm2,xmm0
+ movups xmm8,XMMWORD PTR[rdi]
+ aesenclast xmm3,xmm0
+ movups xmm9,XMMWORD PTR[16+rdi]
+ aesenclast xmm4,xmm0
+ movups xmm10,XMMWORD PTR[32+rdi]
+ aesenclast xmm5,xmm0
+ movups xmm11,XMMWORD PTR[48+rdi]
+ aesenclast xmm6,xmm0
+ movups xmm1,XMMWORD PTR[64+rdi]
+ aesenclast xmm7,xmm0
+ movups xmm0,XMMWORD PTR[80+rdi]
+ lea rdi,QWORD PTR[96+rdi]
+
+ xorps xmm8,xmm2
+ pshufd xmm2,xmm12,192
+ xorps xmm9,xmm3
+ pshufd xmm3,xmm12,128
+ movups XMMWORD PTR[rsi],xmm8
+ xorps xmm10,xmm4
+ pshufd xmm4,xmm12,64
+ movups XMMWORD PTR[16+rsi],xmm9
+ xorps xmm11,xmm5
+ movups XMMWORD PTR[32+rsi],xmm10
+ xorps xmm1,xmm6
+ movups XMMWORD PTR[48+rsi],xmm11
+ xorps xmm0,xmm7
+ movups XMMWORD PTR[64+rsi],xmm1
+ movups XMMWORD PTR[80+rsi],xmm0
+ lea rsi,QWORD PTR[96+rsi]
+ mov eax,r10d
+ sub rdx,6
+ jnc $L$ctr32_loop6
+
+ add rdx,6
+ jz $L$ctr32_done
+ mov rcx,r11
+ lea eax,DWORD PTR[1+rax*1+rax]
+
+$L$ctr32_tail::
+ por xmm2,xmm14
+ movups xmm8,XMMWORD PTR[rdi]
+ cmp rdx,2
+ jb $L$ctr32_one
+
+ por xmm3,xmm14
+ movups xmm9,XMMWORD PTR[16+rdi]
+ je $L$ctr32_two
+
+ pshufd xmm5,xmm13,192
+ por xmm4,xmm14
+ movups xmm10,XMMWORD PTR[32+rdi]
+ cmp rdx,4
+ jb $L$ctr32_three
+
+ pshufd xmm6,xmm13,128
+ por xmm5,xmm14
+ movups xmm11,XMMWORD PTR[48+rdi]
+ je $L$ctr32_four
+
+ por xmm6,xmm14
+ xorps xmm7,xmm7
+
+ call _aesni_encrypt6
+
+ movups xmm1,XMMWORD PTR[64+rdi]
+ xorps xmm8,xmm2
+ xorps xmm9,xmm3
+ movups XMMWORD PTR[rsi],xmm8
+ xorps xmm10,xmm4
+ movups XMMWORD PTR[16+rsi],xmm9
+ xorps xmm11,xmm5
+ movups XMMWORD PTR[32+rsi],xmm10
+ xorps xmm1,xmm6
+ movups XMMWORD PTR[48+rsi],xmm11
+ movups XMMWORD PTR[64+rsi],xmm1
+ jmp $L$ctr32_done
+
+ALIGN 16
+$L$ctr32_one_shortcut::
+ movups xmm2,XMMWORD PTR[r8]
+ movups xmm8,XMMWORD PTR[rdi]
+ mov eax,DWORD PTR[240+rcx]
+$L$ctr32_one::
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_enc1_7::
+ aesenc xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_enc1_7
+ aesenclast xmm2,xmm1
+ xorps xmm8,xmm2
+ movups XMMWORD PTR[rsi],xmm8
+ jmp $L$ctr32_done
+
+ALIGN 16
+$L$ctr32_two::
+ xorps xmm4,xmm4
+ call _aesni_encrypt3
+ xorps xmm8,xmm2
+ xorps xmm9,xmm3
+ movups XMMWORD PTR[rsi],xmm8
+ movups XMMWORD PTR[16+rsi],xmm9
+ jmp $L$ctr32_done
+
+ALIGN 16
+$L$ctr32_three::
+ call _aesni_encrypt3
+ xorps xmm8,xmm2
+ xorps xmm9,xmm3
+ movups XMMWORD PTR[rsi],xmm8
+ xorps xmm10,xmm4
+ movups XMMWORD PTR[16+rsi],xmm9
+ movups XMMWORD PTR[32+rsi],xmm10
+ jmp $L$ctr32_done
+
+ALIGN 16
+$L$ctr32_four::
+ call _aesni_encrypt4
+ xorps xmm8,xmm2
+ xorps xmm9,xmm3
+ movups XMMWORD PTR[rsi],xmm8
+ xorps xmm10,xmm4
+ movups XMMWORD PTR[16+rsi],xmm9
+ xorps xmm11,xmm5
+ movups XMMWORD PTR[32+rsi],xmm10
+ movups XMMWORD PTR[48+rsi],xmm11
+
+$L$ctr32_done::
+ movaps xmm6,XMMWORD PTR[32+rsp]
+ movaps xmm7,XMMWORD PTR[48+rsp]
+ movaps xmm8,XMMWORD PTR[64+rsp]
+ movaps xmm9,XMMWORD PTR[80+rsp]
+ movaps xmm10,XMMWORD PTR[96+rsp]
+ movaps xmm11,XMMWORD PTR[112+rsp]
+ movaps xmm12,XMMWORD PTR[128+rsp]
+ movaps xmm13,XMMWORD PTR[144+rsp]
+ movaps xmm14,XMMWORD PTR[160+rsp]
+ movaps xmm15,XMMWORD PTR[176+rsp]
+ lea rsp,QWORD PTR[200+rsp]
+$L$ctr32_ret::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_ctr32_encrypt_blocks::
+aesni_ctr32_encrypt_blocks ENDP
+PUBLIC aesni_xts_encrypt
+
+ALIGN 16
+aesni_xts_encrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_xts_encrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ lea rsp,QWORD PTR[((-264))+rsp]
+ movaps XMMWORD PTR[96+rsp],xmm6
+ movaps XMMWORD PTR[112+rsp],xmm7
+ movaps XMMWORD PTR[128+rsp],xmm8
+ movaps XMMWORD PTR[144+rsp],xmm9
+ movaps XMMWORD PTR[160+rsp],xmm10
+ movaps XMMWORD PTR[176+rsp],xmm11
+ movaps XMMWORD PTR[192+rsp],xmm12
+ movaps XMMWORD PTR[208+rsp],xmm13
+ movaps XMMWORD PTR[224+rsp],xmm14
+ movaps XMMWORD PTR[240+rsp],xmm15
+$L$xts_enc_body::
+ movups xmm15,XMMWORD PTR[r9]
+ mov eax,DWORD PTR[240+r8]
+ mov r10d,DWORD PTR[240+rcx]
+ movups xmm0,XMMWORD PTR[r8]
+ movups xmm1,XMMWORD PTR[16+r8]
+ lea r8,QWORD PTR[32+r8]
+ xorps xmm15,xmm0
+$L$oop_enc1_8::
+ aesenc xmm15,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[r8]
+ lea r8,QWORD PTR[16+r8]
+ jnz $L$oop_enc1_8
+ aesenclast xmm15,xmm1
+ mov r11,rcx
+ mov eax,r10d
+ mov r9,rdx
+ and rdx,-16
+
+ movdqa xmm8,XMMWORD PTR[$L$xts_magic]
+ pxor xmm14,xmm14
+ pcmpgtd xmm14,xmm15
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm10,xmm15
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm11,xmm15
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm12,xmm15
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm13,xmm15
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+ sub rdx,16*6
+ jc $L$xts_enc_short
+
+ shr eax,1
+ sub eax,1
+ mov r10d,eax
+ jmp $L$xts_enc_grandloop
+
+ALIGN 16
+$L$xts_enc_grandloop::
+ pshufd xmm9,xmm14,013h
+ movdqa xmm14,xmm15
+ paddq xmm15,xmm15
+ movdqu xmm2,XMMWORD PTR[rdi]
+ pand xmm9,xmm8
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ pxor xmm15,xmm9
+
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ pxor xmm2,xmm10
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ pxor xmm3,xmm11
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ pxor xmm4,xmm12
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ lea rdi,QWORD PTR[96+rdi]
+ pxor xmm5,xmm13
+ movups xmm0,XMMWORD PTR[r11]
+ pxor xmm6,xmm14
+ pxor xmm7,xmm15
+
+
+
+ movups xmm1,XMMWORD PTR[16+r11]
+ pxor xmm2,xmm0
+ pxor xmm3,xmm0
+ movdqa XMMWORD PTR[rsp],xmm10
+ aesenc xmm2,xmm1
+ lea rcx,QWORD PTR[32+r11]
+ pxor xmm4,xmm0
+ movdqa XMMWORD PTR[16+rsp],xmm11
+ aesenc xmm3,xmm1
+ pxor xmm5,xmm0
+ movdqa XMMWORD PTR[32+rsp],xmm12
+ aesenc xmm4,xmm1
+ pxor xmm6,xmm0
+ movdqa XMMWORD PTR[48+rsp],xmm13
+ aesenc xmm5,xmm1
+ pxor xmm7,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ dec eax
+ movdqa XMMWORD PTR[64+rsp],xmm14
+ aesenc xmm6,xmm1
+ movdqa XMMWORD PTR[80+rsp],xmm15
+ aesenc xmm7,xmm1
+ pxor xmm14,xmm14
+ pcmpgtd xmm14,xmm15
+ jmp $L$xts_enc_loop6_enter
+
+ALIGN 16
+$L$xts_enc_loop6::
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ dec eax
+ aesenc xmm4,xmm1
+ aesenc xmm5,xmm1
+ aesenc xmm6,xmm1
+ aesenc xmm7,xmm1
+$L$xts_enc_loop6_enter::
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesenc xmm2,xmm0
+ aesenc xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesenc xmm4,xmm0
+ aesenc xmm5,xmm0
+ aesenc xmm6,xmm0
+ aesenc xmm7,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$xts_enc_loop6
+
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ paddq xmm15,xmm15
+ aesenc xmm2,xmm1
+ pand xmm9,xmm8
+ aesenc xmm3,xmm1
+ pcmpgtd xmm14,xmm15
+ aesenc xmm4,xmm1
+ pxor xmm15,xmm9
+ aesenc xmm5,xmm1
+ aesenc xmm6,xmm1
+ aesenc xmm7,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm10,xmm15
+ paddq xmm15,xmm15
+ aesenc xmm2,xmm0
+ pand xmm9,xmm8
+ aesenc xmm3,xmm0
+ pcmpgtd xmm14,xmm15
+ aesenc xmm4,xmm0
+ pxor xmm15,xmm9
+ aesenc xmm5,xmm0
+ aesenc xmm6,xmm0
+ aesenc xmm7,xmm0
+ movups xmm0,XMMWORD PTR[32+rcx]
+
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm11,xmm15
+ paddq xmm15,xmm15
+ aesenc xmm2,xmm1
+ pand xmm9,xmm8
+ aesenc xmm3,xmm1
+ pcmpgtd xmm14,xmm15
+ aesenc xmm4,xmm1
+ pxor xmm15,xmm9
+ aesenc xmm5,xmm1
+ aesenc xmm6,xmm1
+ aesenc xmm7,xmm1
+
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm12,xmm15
+ paddq xmm15,xmm15
+ aesenclast xmm2,xmm0
+ pand xmm9,xmm8
+ aesenclast xmm3,xmm0
+ pcmpgtd xmm14,xmm15
+ aesenclast xmm4,xmm0
+ pxor xmm15,xmm9
+ aesenclast xmm5,xmm0
+ aesenclast xmm6,xmm0
+ aesenclast xmm7,xmm0
+
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm13,xmm15
+ paddq xmm15,xmm15
+ xorps xmm2,XMMWORD PTR[rsp]
+ pand xmm9,xmm8
+ xorps xmm3,XMMWORD PTR[16+rsp]
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+
+ xorps xmm4,XMMWORD PTR[32+rsp]
+ movups XMMWORD PTR[rsi],xmm2
+ xorps xmm5,XMMWORD PTR[48+rsp]
+ movups XMMWORD PTR[16+rsi],xmm3
+ xorps xmm6,XMMWORD PTR[64+rsp]
+ movups XMMWORD PTR[32+rsi],xmm4
+ xorps xmm7,XMMWORD PTR[80+rsp]
+ movups XMMWORD PTR[48+rsi],xmm5
+ mov eax,r10d
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ lea rsi,QWORD PTR[96+rsi]
+ sub rdx,16*6
+ jnc $L$xts_enc_grandloop
+
+ lea eax,DWORD PTR[3+rax*1+rax]
+ mov rcx,r11
+ mov r10d,eax
+
+$L$xts_enc_short::
+ add rdx,16*6
+ jz $L$xts_enc_done
+
+ cmp rdx,020h
+ jb $L$xts_enc_one
+ je $L$xts_enc_two
+
+ cmp rdx,040h
+ jb $L$xts_enc_three
+ je $L$xts_enc_four
+
+ pshufd xmm9,xmm14,013h
+ movdqa xmm14,xmm15
+ paddq xmm15,xmm15
+ movdqu xmm2,XMMWORD PTR[rdi]
+ pand xmm9,xmm8
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ pxor xmm15,xmm9
+
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ pxor xmm2,xmm10
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ pxor xmm3,xmm11
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ lea rdi,QWORD PTR[80+rdi]
+ pxor xmm4,xmm12
+ pxor xmm5,xmm13
+ pxor xmm6,xmm14
+
+ call _aesni_encrypt6
+
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm15
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ movdqu XMMWORD PTR[rsi],xmm2
+ xorps xmm5,xmm13
+ movdqu XMMWORD PTR[16+rsi],xmm3
+ xorps xmm6,xmm14
+ movdqu XMMWORD PTR[32+rsi],xmm4
+ movdqu XMMWORD PTR[48+rsi],xmm5
+ movdqu XMMWORD PTR[64+rsi],xmm6
+ lea rsi,QWORD PTR[80+rsi]
+ jmp $L$xts_enc_done
+
+ALIGN 16
+$L$xts_enc_one::
+ movups xmm2,XMMWORD PTR[rdi]
+ lea rdi,QWORD PTR[16+rdi]
+ xorps xmm2,xmm10
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_enc1_9::
+ aesenc xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_enc1_9
+ aesenclast xmm2,xmm1
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm11
+ movups XMMWORD PTR[rsi],xmm2
+ lea rsi,QWORD PTR[16+rsi]
+ jmp $L$xts_enc_done
+
+ALIGN 16
+$L$xts_enc_two::
+ movups xmm2,XMMWORD PTR[rdi]
+ movups xmm3,XMMWORD PTR[16+rdi]
+ lea rdi,QWORD PTR[32+rdi]
+ xorps xmm2,xmm10
+ xorps xmm3,xmm11
+
+ call _aesni_encrypt3
+
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm12
+ xorps xmm3,xmm11
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ lea rsi,QWORD PTR[32+rsi]
+ jmp $L$xts_enc_done
+
+ALIGN 16
+$L$xts_enc_three::
+ movups xmm2,XMMWORD PTR[rdi]
+ movups xmm3,XMMWORD PTR[16+rdi]
+ movups xmm4,XMMWORD PTR[32+rdi]
+ lea rdi,QWORD PTR[48+rdi]
+ xorps xmm2,xmm10
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+
+ call _aesni_encrypt3
+
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm13
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ lea rsi,QWORD PTR[48+rsi]
+ jmp $L$xts_enc_done
+
+ALIGN 16
+$L$xts_enc_four::
+ movups xmm2,XMMWORD PTR[rdi]
+ movups xmm3,XMMWORD PTR[16+rdi]
+ movups xmm4,XMMWORD PTR[32+rdi]
+ xorps xmm2,xmm10
+ movups xmm5,XMMWORD PTR[48+rdi]
+ lea rdi,QWORD PTR[64+rdi]
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ xorps xmm5,xmm13
+
+ call _aesni_encrypt4
+
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm15
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ movups XMMWORD PTR[rsi],xmm2
+ xorps xmm5,xmm13
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ lea rsi,QWORD PTR[64+rsi]
+ jmp $L$xts_enc_done
+
+ALIGN 16
+$L$xts_enc_done::
+ and r9,15
+ jz $L$xts_enc_ret
+ mov rdx,r9
+
+$L$xts_enc_steal::
+ movzx eax,BYTE PTR[rdi]
+ movzx ecx,BYTE PTR[((-16))+rsi]
+ lea rdi,QWORD PTR[1+rdi]
+ mov BYTE PTR[((-16))+rsi],al
+ mov BYTE PTR[rsi],cl
+ lea rsi,QWORD PTR[1+rsi]
+ sub rdx,1
+ jnz $L$xts_enc_steal
+
+ sub rsi,r9
+ mov rcx,r11
+ mov eax,r10d
+
+ movups xmm2,XMMWORD PTR[((-16))+rsi]
+ xorps xmm2,xmm10
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_enc1_10::
+ aesenc xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_enc1_10
+ aesenclast xmm2,xmm1
+ xorps xmm2,xmm10
+ movups XMMWORD PTR[(-16)+rsi],xmm2
+
+$L$xts_enc_ret::
+ movaps xmm6,XMMWORD PTR[96+rsp]
+ movaps xmm7,XMMWORD PTR[112+rsp]
+ movaps xmm8,XMMWORD PTR[128+rsp]
+ movaps xmm9,XMMWORD PTR[144+rsp]
+ movaps xmm10,XMMWORD PTR[160+rsp]
+ movaps xmm11,XMMWORD PTR[176+rsp]
+ movaps xmm12,XMMWORD PTR[192+rsp]
+ movaps xmm13,XMMWORD PTR[208+rsp]
+ movaps xmm14,XMMWORD PTR[224+rsp]
+ movaps xmm15,XMMWORD PTR[240+rsp]
+ lea rsp,QWORD PTR[264+rsp]
+$L$xts_enc_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_xts_encrypt::
+aesni_xts_encrypt ENDP
+PUBLIC aesni_xts_decrypt
+
+ALIGN 16
+aesni_xts_decrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_xts_decrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ lea rsp,QWORD PTR[((-264))+rsp]
+ movaps XMMWORD PTR[96+rsp],xmm6
+ movaps XMMWORD PTR[112+rsp],xmm7
+ movaps XMMWORD PTR[128+rsp],xmm8
+ movaps XMMWORD PTR[144+rsp],xmm9
+ movaps XMMWORD PTR[160+rsp],xmm10
+ movaps XMMWORD PTR[176+rsp],xmm11
+ movaps XMMWORD PTR[192+rsp],xmm12
+ movaps XMMWORD PTR[208+rsp],xmm13
+ movaps XMMWORD PTR[224+rsp],xmm14
+ movaps XMMWORD PTR[240+rsp],xmm15
+$L$xts_dec_body::
+ movups xmm15,XMMWORD PTR[r9]
+ mov eax,DWORD PTR[240+r8]
+ mov r10d,DWORD PTR[240+rcx]
+ movups xmm0,XMMWORD PTR[r8]
+ movups xmm1,XMMWORD PTR[16+r8]
+ lea r8,QWORD PTR[32+r8]
+ xorps xmm15,xmm0
+$L$oop_enc1_11::
+ aesenc xmm15,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[r8]
+ lea r8,QWORD PTR[16+r8]
+ jnz $L$oop_enc1_11
+ aesenclast xmm15,xmm1
+ xor eax,eax
+ test rdx,15
+ setnz al
+ shl rax,4
+ sub rdx,rax
+
+ mov r11,rcx
+ mov eax,r10d
+ mov r9,rdx
+ and rdx,-16
+
+ movdqa xmm8,XMMWORD PTR[$L$xts_magic]
+ pxor xmm14,xmm14
+ pcmpgtd xmm14,xmm15
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm10,xmm15
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm11,xmm15
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm12,xmm15
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm13,xmm15
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+ sub rdx,16*6
+ jc $L$xts_dec_short
+
+ shr eax,1
+ sub eax,1
+ mov r10d,eax
+ jmp $L$xts_dec_grandloop
+
+ALIGN 16
+$L$xts_dec_grandloop::
+ pshufd xmm9,xmm14,013h
+ movdqa xmm14,xmm15
+ paddq xmm15,xmm15
+ movdqu xmm2,XMMWORD PTR[rdi]
+ pand xmm9,xmm8
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ pxor xmm15,xmm9
+
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ pxor xmm2,xmm10
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ pxor xmm3,xmm11
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ pxor xmm4,xmm12
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ lea rdi,QWORD PTR[96+rdi]
+ pxor xmm5,xmm13
+ movups xmm0,XMMWORD PTR[r11]
+ pxor xmm6,xmm14
+ pxor xmm7,xmm15
+
+
+
+ movups xmm1,XMMWORD PTR[16+r11]
+ pxor xmm2,xmm0
+ pxor xmm3,xmm0
+ movdqa XMMWORD PTR[rsp],xmm10
+ aesdec xmm2,xmm1
+ lea rcx,QWORD PTR[32+r11]
+ pxor xmm4,xmm0
+ movdqa XMMWORD PTR[16+rsp],xmm11
+ aesdec xmm3,xmm1
+ pxor xmm5,xmm0
+ movdqa XMMWORD PTR[32+rsp],xmm12
+ aesdec xmm4,xmm1
+ pxor xmm6,xmm0
+ movdqa XMMWORD PTR[48+rsp],xmm13
+ aesdec xmm5,xmm1
+ pxor xmm7,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ dec eax
+ movdqa XMMWORD PTR[64+rsp],xmm14
+ aesdec xmm6,xmm1
+ movdqa XMMWORD PTR[80+rsp],xmm15
+ aesdec xmm7,xmm1
+ pxor xmm14,xmm14
+ pcmpgtd xmm14,xmm15
+ jmp $L$xts_dec_loop6_enter
+
+ALIGN 16
+$L$xts_dec_loop6::
+ aesdec xmm2,xmm1
+ aesdec xmm3,xmm1
+ dec eax
+ aesdec xmm4,xmm1
+ aesdec xmm5,xmm1
+ aesdec xmm6,xmm1
+ aesdec xmm7,xmm1
+$L$xts_dec_loop6_enter::
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesdec xmm2,xmm0
+ aesdec xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesdec xmm4,xmm0
+ aesdec xmm5,xmm0
+ aesdec xmm6,xmm0
+ aesdec xmm7,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$xts_dec_loop6
+
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ paddq xmm15,xmm15
+ aesdec xmm2,xmm1
+ pand xmm9,xmm8
+ aesdec xmm3,xmm1
+ pcmpgtd xmm14,xmm15
+ aesdec xmm4,xmm1
+ pxor xmm15,xmm9
+ aesdec xmm5,xmm1
+ aesdec xmm6,xmm1
+ aesdec xmm7,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm10,xmm15
+ paddq xmm15,xmm15
+ aesdec xmm2,xmm0
+ pand xmm9,xmm8
+ aesdec xmm3,xmm0
+ pcmpgtd xmm14,xmm15
+ aesdec xmm4,xmm0
+ pxor xmm15,xmm9
+ aesdec xmm5,xmm0
+ aesdec xmm6,xmm0
+ aesdec xmm7,xmm0
+ movups xmm0,XMMWORD PTR[32+rcx]
+
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm11,xmm15
+ paddq xmm15,xmm15
+ aesdec xmm2,xmm1
+ pand xmm9,xmm8
+ aesdec xmm3,xmm1
+ pcmpgtd xmm14,xmm15
+ aesdec xmm4,xmm1
+ pxor xmm15,xmm9
+ aesdec xmm5,xmm1
+ aesdec xmm6,xmm1
+ aesdec xmm7,xmm1
+
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm12,xmm15
+ paddq xmm15,xmm15
+ aesdeclast xmm2,xmm0
+ pand xmm9,xmm8
+ aesdeclast xmm3,xmm0
+ pcmpgtd xmm14,xmm15
+ aesdeclast xmm4,xmm0
+ pxor xmm15,xmm9
+ aesdeclast xmm5,xmm0
+ aesdeclast xmm6,xmm0
+ aesdeclast xmm7,xmm0
+
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm13,xmm15
+ paddq xmm15,xmm15
+ xorps xmm2,XMMWORD PTR[rsp]
+ pand xmm9,xmm8
+ xorps xmm3,XMMWORD PTR[16+rsp]
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+
+ xorps xmm4,XMMWORD PTR[32+rsp]
+ movups XMMWORD PTR[rsi],xmm2
+ xorps xmm5,XMMWORD PTR[48+rsp]
+ movups XMMWORD PTR[16+rsi],xmm3
+ xorps xmm6,XMMWORD PTR[64+rsp]
+ movups XMMWORD PTR[32+rsi],xmm4
+ xorps xmm7,XMMWORD PTR[80+rsp]
+ movups XMMWORD PTR[48+rsi],xmm5
+ mov eax,r10d
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ lea rsi,QWORD PTR[96+rsi]
+ sub rdx,16*6
+ jnc $L$xts_dec_grandloop
+
+ lea eax,DWORD PTR[3+rax*1+rax]
+ mov rcx,r11
+ mov r10d,eax
+
+$L$xts_dec_short::
+ add rdx,16*6
+ jz $L$xts_dec_done
+
+ cmp rdx,020h
+ jb $L$xts_dec_one
+ je $L$xts_dec_two
+
+ cmp rdx,040h
+ jb $L$xts_dec_three
+ je $L$xts_dec_four
+
+ pshufd xmm9,xmm14,013h
+ movdqa xmm14,xmm15
+ paddq xmm15,xmm15
+ movdqu xmm2,XMMWORD PTR[rdi]
+ pand xmm9,xmm8
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ pxor xmm15,xmm9
+
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ pxor xmm2,xmm10
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ pxor xmm3,xmm11
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ lea rdi,QWORD PTR[80+rdi]
+ pxor xmm4,xmm12
+ pxor xmm5,xmm13
+ pxor xmm6,xmm14
+
+ call _aesni_decrypt6
+
+ xorps xmm2,xmm10
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ movdqu XMMWORD PTR[rsi],xmm2
+ xorps xmm5,xmm13
+ movdqu XMMWORD PTR[16+rsi],xmm3
+ xorps xmm6,xmm14
+ movdqu XMMWORD PTR[32+rsi],xmm4
+ pxor xmm14,xmm14
+ movdqu XMMWORD PTR[48+rsi],xmm5
+ pcmpgtd xmm14,xmm15
+ movdqu XMMWORD PTR[64+rsi],xmm6
+ lea rsi,QWORD PTR[80+rsi]
+ pshufd xmm11,xmm14,013h
+ and r9,15
+ jz $L$xts_dec_ret
+
+ movdqa xmm10,xmm15
+ paddq xmm15,xmm15
+ pand xmm11,xmm8
+ pxor xmm11,xmm15
+ jmp $L$xts_dec_done2
+
+ALIGN 16
+$L$xts_dec_one::
+ movups xmm2,XMMWORD PTR[rdi]
+ lea rdi,QWORD PTR[16+rdi]
+ xorps xmm2,xmm10
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_dec1_12::
+ aesdec xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_dec1_12
+ aesdeclast xmm2,xmm1
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm11
+ movups XMMWORD PTR[rsi],xmm2
+ movdqa xmm11,xmm12
+ lea rsi,QWORD PTR[16+rsi]
+ jmp $L$xts_dec_done
+
+ALIGN 16
+$L$xts_dec_two::
+ movups xmm2,XMMWORD PTR[rdi]
+ movups xmm3,XMMWORD PTR[16+rdi]
+ lea rdi,QWORD PTR[32+rdi]
+ xorps xmm2,xmm10
+ xorps xmm3,xmm11
+
+ call _aesni_decrypt3
+
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm12
+ xorps xmm3,xmm11
+ movdqa xmm11,xmm13
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ lea rsi,QWORD PTR[32+rsi]
+ jmp $L$xts_dec_done
+
+ALIGN 16
+$L$xts_dec_three::
+ movups xmm2,XMMWORD PTR[rdi]
+ movups xmm3,XMMWORD PTR[16+rdi]
+ movups xmm4,XMMWORD PTR[32+rdi]
+ lea rdi,QWORD PTR[48+rdi]
+ xorps xmm2,xmm10
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+
+ call _aesni_decrypt3
+
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm13
+ xorps xmm3,xmm11
+ movdqa xmm11,xmm15
+ xorps xmm4,xmm12
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ lea rsi,QWORD PTR[48+rsi]
+ jmp $L$xts_dec_done
+
+ALIGN 16
+$L$xts_dec_four::
+ pshufd xmm9,xmm14,013h
+ movdqa xmm14,xmm15
+ paddq xmm15,xmm15
+ movups xmm2,XMMWORD PTR[rdi]
+ pand xmm9,xmm8
+ movups xmm3,XMMWORD PTR[16+rdi]
+ pxor xmm15,xmm9
+
+ movups xmm4,XMMWORD PTR[32+rdi]
+ xorps xmm2,xmm10
+ movups xmm5,XMMWORD PTR[48+rdi]
+ lea rdi,QWORD PTR[64+rdi]
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ xorps xmm5,xmm13
+
+ call _aesni_decrypt4
+
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm14
+ xorps xmm3,xmm11
+ movdqa xmm11,xmm15
+ xorps xmm4,xmm12
+ movups XMMWORD PTR[rsi],xmm2
+ xorps xmm5,xmm13
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ lea rsi,QWORD PTR[64+rsi]
+ jmp $L$xts_dec_done
+
+ALIGN 16
+$L$xts_dec_done::
+ and r9,15
+ jz $L$xts_dec_ret
+$L$xts_dec_done2::
+ mov rdx,r9
+ mov rcx,r11
+ mov eax,r10d
+
+ movups xmm2,XMMWORD PTR[rdi]
+ xorps xmm2,xmm11
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_dec1_13::
+ aesdec xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_dec1_13
+ aesdeclast xmm2,xmm1
+ xorps xmm2,xmm11
+ movups XMMWORD PTR[rsi],xmm2
+
+$L$xts_dec_steal::
+ movzx eax,BYTE PTR[16+rdi]
+ movzx ecx,BYTE PTR[rsi]
+ lea rdi,QWORD PTR[1+rdi]
+ mov BYTE PTR[rsi],al
+ mov BYTE PTR[16+rsi],cl
+ lea rsi,QWORD PTR[1+rsi]
+ sub rdx,1
+ jnz $L$xts_dec_steal
+
+ sub rsi,r9
+ mov rcx,r11
+ mov eax,r10d
+
+ movups xmm2,XMMWORD PTR[rsi]
+ xorps xmm2,xmm10
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_dec1_14::
+ aesdec xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_dec1_14
+ aesdeclast xmm2,xmm1
+ xorps xmm2,xmm10
+ movups XMMWORD PTR[rsi],xmm2
+
+$L$xts_dec_ret::
+ movaps xmm6,XMMWORD PTR[96+rsp]
+ movaps xmm7,XMMWORD PTR[112+rsp]
+ movaps xmm8,XMMWORD PTR[128+rsp]
+ movaps xmm9,XMMWORD PTR[144+rsp]
+ movaps xmm10,XMMWORD PTR[160+rsp]
+ movaps xmm11,XMMWORD PTR[176+rsp]
+ movaps xmm12,XMMWORD PTR[192+rsp]
+ movaps xmm13,XMMWORD PTR[208+rsp]
+ movaps xmm14,XMMWORD PTR[224+rsp]
+ movaps xmm15,XMMWORD PTR[240+rsp]
+ lea rsp,QWORD PTR[264+rsp]
+$L$xts_dec_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_xts_decrypt::
+aesni_xts_decrypt ENDP
+PUBLIC aesni_cbc_encrypt
+
+ALIGN 16
+aesni_cbc_encrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_cbc_encrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ test rdx,rdx
+ jz $L$cbc_ret
+
+ mov r10d,DWORD PTR[240+rcx]
+ mov r11,rcx
+ test r9d,r9d
+ jz $L$cbc_decrypt
+
+ movups xmm2,XMMWORD PTR[r8]
+ mov eax,r10d
+ cmp rdx,16
+ jb $L$cbc_enc_tail
+ sub rdx,16
+ jmp $L$cbc_enc_loop
+ALIGN 16
+$L$cbc_enc_loop::
+ movups xmm3,XMMWORD PTR[rdi]
+ lea rdi,QWORD PTR[16+rdi]
+
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ xorps xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm3
+$L$oop_enc1_15::
+ aesenc xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_enc1_15
+ aesenclast xmm2,xmm1
+ mov eax,r10d
+ mov rcx,r11
+ movups XMMWORD PTR[rsi],xmm2
+ lea rsi,QWORD PTR[16+rsi]
+ sub rdx,16
+ jnc $L$cbc_enc_loop
+ add rdx,16
+ jnz $L$cbc_enc_tail
+ movups XMMWORD PTR[r8],xmm2
+ jmp $L$cbc_ret
+
+$L$cbc_enc_tail::
+ mov rcx,rdx
+ xchg rsi,rdi
+ DD 09066A4F3h
+ mov ecx,16
+ sub rcx,rdx
+ xor eax,eax
+ DD 09066AAF3h
+ lea rdi,QWORD PTR[((-16))+rdi]
+ mov eax,r10d
+ mov rsi,rdi
+ mov rcx,r11
+ xor rdx,rdx
+ jmp $L$cbc_enc_loop
+
+ALIGN 16
+$L$cbc_decrypt::
+ lea rsp,QWORD PTR[((-88))+rsp]
+ movaps XMMWORD PTR[rsp],xmm6
+ movaps XMMWORD PTR[16+rsp],xmm7
+ movaps XMMWORD PTR[32+rsp],xmm8
+ movaps XMMWORD PTR[48+rsp],xmm9
+$L$cbc_decrypt_body::
+ movups xmm9,XMMWORD PTR[r8]
+ mov eax,r10d
+ cmp rdx,070h
+ jbe $L$cbc_dec_tail
+ shr r10d,1
+ sub rdx,070h
+ mov eax,r10d
+ movaps XMMWORD PTR[64+rsp],xmm9
+ jmp $L$cbc_dec_loop8_enter
+ALIGN 16
+$L$cbc_dec_loop8::
+ movaps XMMWORD PTR[64+rsp],xmm0
+ movups XMMWORD PTR[rsi],xmm9
+ lea rsi,QWORD PTR[16+rsi]
+$L$cbc_dec_loop8_enter::
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm2,XMMWORD PTR[rdi]
+ movups xmm3,XMMWORD PTR[16+rdi]
+ movups xmm1,XMMWORD PTR[16+rcx]
+
+ lea rcx,QWORD PTR[32+rcx]
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ xorps xmm2,xmm0
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ xorps xmm3,xmm0
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ aesdec xmm2,xmm1
+ pxor xmm4,xmm0
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ aesdec xmm3,xmm1
+ pxor xmm5,xmm0
+ movdqu xmm8,XMMWORD PTR[96+rdi]
+ aesdec xmm4,xmm1
+ pxor xmm6,xmm0
+ movdqu xmm9,XMMWORD PTR[112+rdi]
+ aesdec xmm5,xmm1
+ pxor xmm7,xmm0
+ dec eax
+ aesdec xmm6,xmm1
+ pxor xmm8,xmm0
+ aesdec xmm7,xmm1
+ pxor xmm9,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ aesdec xmm8,xmm1
+ aesdec xmm9,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+
+ call $L$dec_loop8_enter
+
+ movups xmm1,XMMWORD PTR[rdi]
+ movups xmm0,XMMWORD PTR[16+rdi]
+ xorps xmm2,XMMWORD PTR[64+rsp]
+ xorps xmm3,xmm1
+ movups xmm1,XMMWORD PTR[32+rdi]
+ xorps xmm4,xmm0
+ movups xmm0,XMMWORD PTR[48+rdi]
+ xorps xmm5,xmm1
+ movups xmm1,XMMWORD PTR[64+rdi]
+ xorps xmm6,xmm0
+ movups xmm0,XMMWORD PTR[80+rdi]
+ xorps xmm7,xmm1
+ movups xmm1,XMMWORD PTR[96+rdi]
+ xorps xmm8,xmm0
+ movups xmm0,XMMWORD PTR[112+rdi]
+ xorps xmm9,xmm1
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ mov eax,r10d
+ movups XMMWORD PTR[64+rsi],xmm6
+ mov rcx,r11
+ movups XMMWORD PTR[80+rsi],xmm7
+ lea rdi,QWORD PTR[128+rdi]
+ movups XMMWORD PTR[96+rsi],xmm8
+ lea rsi,QWORD PTR[112+rsi]
+ sub rdx,080h
+ ja $L$cbc_dec_loop8
+
+ movaps xmm2,xmm9
+ movaps xmm9,xmm0
+ add rdx,070h
+ jle $L$cbc_dec_tail_collected
+ movups XMMWORD PTR[rsi],xmm2
+ lea eax,DWORD PTR[1+r10*1+r10]
+ lea rsi,QWORD PTR[16+rsi]
+$L$cbc_dec_tail::
+ movups xmm2,XMMWORD PTR[rdi]
+ movaps xmm8,xmm2
+ cmp rdx,010h
+ jbe $L$cbc_dec_one
+
+ movups xmm3,XMMWORD PTR[16+rdi]
+ movaps xmm7,xmm3
+ cmp rdx,020h
+ jbe $L$cbc_dec_two
+
+ movups xmm4,XMMWORD PTR[32+rdi]
+ movaps xmm6,xmm4
+ cmp rdx,030h
+ jbe $L$cbc_dec_three
+
+ movups xmm5,XMMWORD PTR[48+rdi]
+ cmp rdx,040h
+ jbe $L$cbc_dec_four
+
+ movups xmm6,XMMWORD PTR[64+rdi]
+ cmp rdx,050h
+ jbe $L$cbc_dec_five
+
+ movups xmm7,XMMWORD PTR[80+rdi]
+ cmp rdx,060h
+ jbe $L$cbc_dec_six
+
+ movups xmm8,XMMWORD PTR[96+rdi]
+ movaps XMMWORD PTR[64+rsp],xmm9
+ call _aesni_decrypt8
+ movups xmm1,XMMWORD PTR[rdi]
+ movups xmm0,XMMWORD PTR[16+rdi]
+ xorps xmm2,XMMWORD PTR[64+rsp]
+ xorps xmm3,xmm1
+ movups xmm1,XMMWORD PTR[32+rdi]
+ xorps xmm4,xmm0
+ movups xmm0,XMMWORD PTR[48+rdi]
+ xorps xmm5,xmm1
+ movups xmm1,XMMWORD PTR[64+rdi]
+ xorps xmm6,xmm0
+ movups xmm0,XMMWORD PTR[80+rdi]
+ xorps xmm7,xmm1
+ movups xmm9,XMMWORD PTR[96+rdi]
+ xorps xmm8,xmm0
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ lea rsi,QWORD PTR[96+rsi]
+ movaps xmm2,xmm8
+ sub rdx,070h
+ jmp $L$cbc_dec_tail_collected
+ALIGN 16
+$L$cbc_dec_one::
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+$L$oop_dec1_16::
+ aesdec xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_dec1_16
+ aesdeclast xmm2,xmm1
+ xorps xmm2,xmm9
+ movaps xmm9,xmm8
+ sub rdx,010h
+ jmp $L$cbc_dec_tail_collected
+ALIGN 16
+$L$cbc_dec_two::
+ xorps xmm4,xmm4
+ call _aesni_decrypt3
+ xorps xmm2,xmm9
+ xorps xmm3,xmm8
+ movups XMMWORD PTR[rsi],xmm2
+ movaps xmm9,xmm7
+ movaps xmm2,xmm3
+ lea rsi,QWORD PTR[16+rsi]
+ sub rdx,020h
+ jmp $L$cbc_dec_tail_collected
+ALIGN 16
+$L$cbc_dec_three::
+ call _aesni_decrypt3
+ xorps xmm2,xmm9
+ xorps xmm3,xmm8
+ movups XMMWORD PTR[rsi],xmm2
+ xorps xmm4,xmm7
+ movups XMMWORD PTR[16+rsi],xmm3
+ movaps xmm9,xmm6
+ movaps xmm2,xmm4
+ lea rsi,QWORD PTR[32+rsi]
+ sub rdx,030h
+ jmp $L$cbc_dec_tail_collected
+ALIGN 16
+$L$cbc_dec_four::
+ call _aesni_decrypt4
+ xorps xmm2,xmm9
+ movups xmm9,XMMWORD PTR[48+rdi]
+ xorps xmm3,xmm8
+ movups XMMWORD PTR[rsi],xmm2
+ xorps xmm4,xmm7
+ movups XMMWORD PTR[16+rsi],xmm3
+ xorps xmm5,xmm6
+ movups XMMWORD PTR[32+rsi],xmm4
+ movaps xmm2,xmm5
+ lea rsi,QWORD PTR[48+rsi]
+ sub rdx,040h
+ jmp $L$cbc_dec_tail_collected
+ALIGN 16
+$L$cbc_dec_five::
+ xorps xmm7,xmm7
+ call _aesni_decrypt6
+ movups xmm1,XMMWORD PTR[16+rdi]
+ movups xmm0,XMMWORD PTR[32+rdi]
+ xorps xmm2,xmm9
+ xorps xmm3,xmm8
+ xorps xmm4,xmm1
+ movups xmm1,XMMWORD PTR[48+rdi]
+ xorps xmm5,xmm0
+ movups xmm9,XMMWORD PTR[64+rdi]
+ xorps xmm6,xmm1
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ lea rsi,QWORD PTR[64+rsi]
+ movaps xmm2,xmm6
+ sub rdx,050h
+ jmp $L$cbc_dec_tail_collected
+ALIGN 16
+$L$cbc_dec_six::
+ call _aesni_decrypt6
+ movups xmm1,XMMWORD PTR[16+rdi]
+ movups xmm0,XMMWORD PTR[32+rdi]
+ xorps xmm2,xmm9
+ xorps xmm3,xmm8
+ xorps xmm4,xmm1
+ movups xmm1,XMMWORD PTR[48+rdi]
+ xorps xmm5,xmm0
+ movups xmm0,XMMWORD PTR[64+rdi]
+ xorps xmm6,xmm1
+ movups xmm9,XMMWORD PTR[80+rdi]
+ xorps xmm7,xmm0
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ lea rsi,QWORD PTR[80+rsi]
+ movaps xmm2,xmm7
+ sub rdx,060h
+ jmp $L$cbc_dec_tail_collected
+ALIGN 16
+$L$cbc_dec_tail_collected::
+ and rdx,15
+ movups XMMWORD PTR[r8],xmm9
+ jnz $L$cbc_dec_tail_partial
+ movups XMMWORD PTR[rsi],xmm2
+ jmp $L$cbc_dec_ret
+ALIGN 16
+$L$cbc_dec_tail_partial::
+ movaps XMMWORD PTR[64+rsp],xmm2
+ mov rcx,16
+ mov rdi,rsi
+ sub rcx,rdx
+ lea rsi,QWORD PTR[64+rsp]
+ DD 09066A4F3h
+
+$L$cbc_dec_ret::
+ movaps xmm6,XMMWORD PTR[rsp]
+ movaps xmm7,XMMWORD PTR[16+rsp]
+ movaps xmm8,XMMWORD PTR[32+rsp]
+ movaps xmm9,XMMWORD PTR[48+rsp]
+ lea rsp,QWORD PTR[88+rsp]
+$L$cbc_ret::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_cbc_encrypt::
+aesni_cbc_encrypt ENDP
+PUBLIC aesni_set_decrypt_key
+
+ALIGN 16
+aesni_set_decrypt_key PROC PUBLIC
+ sub rsp,8
+ call __aesni_set_encrypt_key
+ shl edx,4
+ test eax,eax
+ jnz $L$dec_key_ret
+ lea rcx,QWORD PTR[16+rdx*1+r8]
+
+ movups xmm0,XMMWORD PTR[r8]
+ movups xmm1,XMMWORD PTR[rcx]
+ movups XMMWORD PTR[rcx],xmm0
+ movups XMMWORD PTR[r8],xmm1
+ lea r8,QWORD PTR[16+r8]
+ lea rcx,QWORD PTR[((-16))+rcx]
+
+$L$dec_key_inverse::
+ movups xmm0,XMMWORD PTR[r8]
+ movups xmm1,XMMWORD PTR[rcx]
+ aesimc xmm0,xmm0
+ aesimc xmm1,xmm1
+ lea r8,QWORD PTR[16+r8]
+ lea rcx,QWORD PTR[((-16))+rcx]
+ movups XMMWORD PTR[16+rcx],xmm0
+ movups XMMWORD PTR[(-16)+r8],xmm1
+ cmp rcx,r8
+ ja $L$dec_key_inverse
+
+ movups xmm0,XMMWORD PTR[r8]
+ aesimc xmm0,xmm0
+ movups XMMWORD PTR[rcx],xmm0
+$L$dec_key_ret::
+ add rsp,8
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_set_decrypt_key::
+aesni_set_decrypt_key ENDP
+PUBLIC aesni_set_encrypt_key
+
+ALIGN 16
+aesni_set_encrypt_key PROC PUBLIC
+__aesni_set_encrypt_key::
+ sub rsp,8
+ mov rax,-1
+ test rcx,rcx
+ jz $L$enc_key_ret
+ test r8,r8
+ jz $L$enc_key_ret
+
+ movups xmm0,XMMWORD PTR[rcx]
+ xorps xmm4,xmm4
+ lea rax,QWORD PTR[16+r8]
+ cmp edx,256
+ je $L$14rounds
+ cmp edx,192
+ je $L$12rounds
+ cmp edx,128
+ jne $L$bad_keybits
+
+$L$10rounds::
+ mov edx,9
+ movups XMMWORD PTR[r8],xmm0
+ aeskeygenassist xmm1,xmm0,01h
+ call $L$key_expansion_128_cold
+ aeskeygenassist xmm1,xmm0,02h
+ call $L$key_expansion_128
+ aeskeygenassist xmm1,xmm0,04h
+ call $L$key_expansion_128
+ aeskeygenassist xmm1,xmm0,08h
+ call $L$key_expansion_128
+ aeskeygenassist xmm1,xmm0,010h
+ call $L$key_expansion_128
+ aeskeygenassist xmm1,xmm0,020h
+ call $L$key_expansion_128
+ aeskeygenassist xmm1,xmm0,040h
+ call $L$key_expansion_128
+ aeskeygenassist xmm1,xmm0,080h
+ call $L$key_expansion_128
+ aeskeygenassist xmm1,xmm0,01bh
+ call $L$key_expansion_128
+ aeskeygenassist xmm1,xmm0,036h
+ call $L$key_expansion_128
+ movups XMMWORD PTR[rax],xmm0
+ mov DWORD PTR[80+rax],edx
+ xor eax,eax
+ jmp $L$enc_key_ret
+
+ALIGN 16
+$L$12rounds::
+ movq xmm2,QWORD PTR[16+rcx]
+ mov edx,11
+ movups XMMWORD PTR[r8],xmm0
+ aeskeygenassist xmm1,xmm2,01h
+ call $L$key_expansion_192a_cold
+ aeskeygenassist xmm1,xmm2,02h
+ call $L$key_expansion_192b
+ aeskeygenassist xmm1,xmm2,04h
+ call $L$key_expansion_192a
+ aeskeygenassist xmm1,xmm2,08h
+ call $L$key_expansion_192b
+ aeskeygenassist xmm1,xmm2,010h
+ call $L$key_expansion_192a
+ aeskeygenassist xmm1,xmm2,020h
+ call $L$key_expansion_192b
+ aeskeygenassist xmm1,xmm2,040h
+ call $L$key_expansion_192a
+ aeskeygenassist xmm1,xmm2,080h
+ call $L$key_expansion_192b
+ movups XMMWORD PTR[rax],xmm0
+ mov DWORD PTR[48+rax],edx
+ xor rax,rax
+ jmp $L$enc_key_ret
+
+ALIGN 16
+$L$14rounds::
+ movups xmm2,XMMWORD PTR[16+rcx]
+ mov edx,13
+ lea rax,QWORD PTR[16+rax]
+ movups XMMWORD PTR[r8],xmm0
+ movups XMMWORD PTR[16+r8],xmm2
+ aeskeygenassist xmm1,xmm2,01h
+ call $L$key_expansion_256a_cold
+ aeskeygenassist xmm1,xmm0,01h
+ call $L$key_expansion_256b
+ aeskeygenassist xmm1,xmm2,02h
+ call $L$key_expansion_256a
+ aeskeygenassist xmm1,xmm0,02h
+ call $L$key_expansion_256b
+ aeskeygenassist xmm1,xmm2,04h
+ call $L$key_expansion_256a
+ aeskeygenassist xmm1,xmm0,04h
+ call $L$key_expansion_256b
+ aeskeygenassist xmm1,xmm2,08h
+ call $L$key_expansion_256a
+ aeskeygenassist xmm1,xmm0,08h
+ call $L$key_expansion_256b
+ aeskeygenassist xmm1,xmm2,010h
+ call $L$key_expansion_256a
+ aeskeygenassist xmm1,xmm0,010h
+ call $L$key_expansion_256b
+ aeskeygenassist xmm1,xmm2,020h
+ call $L$key_expansion_256a
+ aeskeygenassist xmm1,xmm0,020h
+ call $L$key_expansion_256b
+ aeskeygenassist xmm1,xmm2,040h
+ call $L$key_expansion_256a
+ movups XMMWORD PTR[rax],xmm0
+ mov DWORD PTR[16+rax],edx
+ xor rax,rax
+ jmp $L$enc_key_ret
+
+ALIGN 16
+$L$bad_keybits::
+ mov rax,-2
+$L$enc_key_ret::
+ add rsp,8
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_set_encrypt_key::
+
+ALIGN 16
+$L$key_expansion_128::
+ movups XMMWORD PTR[rax],xmm0
+ lea rax,QWORD PTR[16+rax]
+$L$key_expansion_128_cold::
+ shufps xmm4,xmm0,16
+ xorps xmm0,xmm4
+ shufps xmm4,xmm0,140
+ xorps xmm0,xmm4
+ shufps xmm1,xmm1,255
+ xorps xmm0,xmm1
+ DB 0F3h,0C3h ;repret
+
+ALIGN 16
+$L$key_expansion_192a::
+ movups XMMWORD PTR[rax],xmm0
+ lea rax,QWORD PTR[16+rax]
+$L$key_expansion_192a_cold::
+ movaps xmm5,xmm2
+$L$key_expansion_192b_warm::
+ shufps xmm4,xmm0,16
+ movdqa xmm3,xmm2
+ xorps xmm0,xmm4
+ shufps xmm4,xmm0,140
+ pslldq xmm3,4
+ xorps xmm0,xmm4
+ pshufd xmm1,xmm1,85
+ pxor xmm2,xmm3
+ pxor xmm0,xmm1
+ pshufd xmm3,xmm0,255
+ pxor xmm2,xmm3
+ DB 0F3h,0C3h ;repret
+
+ALIGN 16
+$L$key_expansion_192b::
+ movaps xmm3,xmm0
+ shufps xmm5,xmm0,68
+ movups XMMWORD PTR[rax],xmm5
+ shufps xmm3,xmm2,78
+ movups XMMWORD PTR[16+rax],xmm3
+ lea rax,QWORD PTR[32+rax]
+ jmp $L$key_expansion_192b_warm
+
+ALIGN 16
+$L$key_expansion_256a::
+ movups XMMWORD PTR[rax],xmm2
+ lea rax,QWORD PTR[16+rax]
+$L$key_expansion_256a_cold::
+ shufps xmm4,xmm0,16
+ xorps xmm0,xmm4
+ shufps xmm4,xmm0,140
+ xorps xmm0,xmm4
+ shufps xmm1,xmm1,255
+ xorps xmm0,xmm1
+ DB 0F3h,0C3h ;repret
+
+ALIGN 16
+$L$key_expansion_256b::
+ movups XMMWORD PTR[rax],xmm0
+ lea rax,QWORD PTR[16+rax]
+
+ shufps xmm4,xmm2,16
+ xorps xmm2,xmm4
+ shufps xmm4,xmm2,140
+ xorps xmm2,xmm4
+ shufps xmm1,xmm1,170
+ xorps xmm2,xmm1
+ DB 0F3h,0C3h ;repret
+aesni_set_encrypt_key ENDP
+
+ALIGN 64
+$L$bswap_mask::
+DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+$L$increment32::
+ DD 6,6,6,0
+$L$increment64::
+ DD 1,0,0,0
+$L$xts_magic::
+ DD 087h,0,1,0
+
+DB 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
+DB 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
+DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
+DB 115,108,46,111,114,103,62,0
+ALIGN 64
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+ecb_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[152+r8]
+
+ jmp $L$common_seh_tail
+ecb_se_handler ENDP
+
+
+ALIGN 16
+ccm64_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$common_seh_tail
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$common_seh_tail
+
+ lea rsi,QWORD PTR[rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,8
+ DD 0a548f3fch
+ lea rax,QWORD PTR[88+rax]
+
+ jmp $L$common_seh_tail
+ccm64_se_handler ENDP
+
+
+ALIGN 16
+ctr32_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ lea r10,QWORD PTR[$L$ctr32_body]
+ cmp rbx,r10
+ jb $L$common_seh_tail
+
+ mov rax,QWORD PTR[152+r8]
+
+ lea r10,QWORD PTR[$L$ctr32_ret]
+ cmp rbx,r10
+ jae $L$common_seh_tail
+
+ lea rsi,QWORD PTR[32+rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,20
+ DD 0a548f3fch
+ lea rax,QWORD PTR[200+rax]
+
+ jmp $L$common_seh_tail
+ctr32_se_handler ENDP
+
+
+ALIGN 16
+xts_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$common_seh_tail
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$common_seh_tail
+
+ lea rsi,QWORD PTR[96+rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,20
+ DD 0a548f3fch
+ lea rax,QWORD PTR[((104+160))+rax]
+
+ jmp $L$common_seh_tail
+xts_se_handler ENDP
+
+ALIGN 16
+cbc_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[152+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ lea r10,QWORD PTR[$L$cbc_decrypt]
+ cmp rbx,r10
+ jb $L$common_seh_tail
+
+ lea r10,QWORD PTR[$L$cbc_decrypt_body]
+ cmp rbx,r10
+ jb $L$restore_cbc_rax
+
+ lea r10,QWORD PTR[$L$cbc_ret]
+ cmp rbx,r10
+ jae $L$common_seh_tail
+
+ lea rsi,QWORD PTR[rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,8
+ DD 0a548f3fch
+ lea rax,QWORD PTR[88+rax]
+ jmp $L$common_seh_tail
+
+$L$restore_cbc_rax::
+ mov rax,QWORD PTR[120+r8]
+
+$L$common_seh_tail::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+cbc_se_handler ENDP
+
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$SEH_begin_aesni_ecb_encrypt
+ DD imagerel $L$SEH_end_aesni_ecb_encrypt
+ DD imagerel $L$SEH_info_ecb
+
+ DD imagerel $L$SEH_begin_aesni_ccm64_encrypt_blocks
+ DD imagerel $L$SEH_end_aesni_ccm64_encrypt_blocks
+ DD imagerel $L$SEH_info_ccm64_enc
+
+ DD imagerel $L$SEH_begin_aesni_ccm64_decrypt_blocks
+ DD imagerel $L$SEH_end_aesni_ccm64_decrypt_blocks
+ DD imagerel $L$SEH_info_ccm64_dec
+
+ DD imagerel $L$SEH_begin_aesni_ctr32_encrypt_blocks
+ DD imagerel $L$SEH_end_aesni_ctr32_encrypt_blocks
+ DD imagerel $L$SEH_info_ctr32
+
+ DD imagerel $L$SEH_begin_aesni_xts_encrypt
+ DD imagerel $L$SEH_end_aesni_xts_encrypt
+ DD imagerel $L$SEH_info_xts_enc
+
+ DD imagerel $L$SEH_begin_aesni_xts_decrypt
+ DD imagerel $L$SEH_end_aesni_xts_decrypt
+ DD imagerel $L$SEH_info_xts_dec
+ DD imagerel $L$SEH_begin_aesni_cbc_encrypt
+ DD imagerel $L$SEH_end_aesni_cbc_encrypt
+ DD imagerel $L$SEH_info_cbc
+
+ DD imagerel aesni_set_decrypt_key
+ DD imagerel $L$SEH_end_set_decrypt_key
+ DD imagerel $L$SEH_info_key
+
+ DD imagerel aesni_set_encrypt_key
+ DD imagerel $L$SEH_end_set_encrypt_key
+ DD imagerel $L$SEH_info_key
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$SEH_info_ecb::
+DB 9,0,0,0
+ DD imagerel ecb_se_handler
+$L$SEH_info_ccm64_enc::
+DB 9,0,0,0
+ DD imagerel ccm64_se_handler
+ DD imagerel $L$ccm64_enc_body,imagerel $L$ccm64_enc_ret
+$L$SEH_info_ccm64_dec::
+DB 9,0,0,0
+ DD imagerel ccm64_se_handler
+ DD imagerel $L$ccm64_dec_body,imagerel $L$ccm64_dec_ret
+$L$SEH_info_ctr32::
+DB 9,0,0,0
+ DD imagerel ctr32_se_handler
+$L$SEH_info_xts_enc::
+DB 9,0,0,0
+ DD imagerel xts_se_handler
+ DD imagerel $L$xts_enc_body,imagerel $L$xts_enc_epilogue
+$L$SEH_info_xts_dec::
+DB 9,0,0,0
+ DD imagerel xts_se_handler
+ DD imagerel $L$xts_dec_body,imagerel $L$xts_dec_epilogue
+$L$SEH_info_cbc::
+DB 9,0,0,0
+ DD imagerel cbc_se_handler
+$L$SEH_info_key::
+DB 001h,004h,001h,000h
+DB 004h,002h,000h,000h
+
+.xdata ENDS
+END
+
diff --git a/ext/libressl/crypto/aes/aesni-mingw64-x86_64.S b/ext/libressl/crypto/aes/aesni-mingw64-x86_64.S
new file mode 100644
index 0000000..0a82a10
--- /dev/null
+++ b/ext/libressl/crypto/aes/aesni-mingw64-x86_64.S
@@ -0,0 +1,3008 @@
+#include "x86_arch.h"
+.text
+.globl aesni_encrypt
+.def aesni_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+aesni_encrypt:
+ movups (%rcx),%xmm2
+ movl 240(%r8),%eax
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm2
+.Loop_enc1_1:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz .Loop_enc1_1
+ aesenclast %xmm1,%xmm2
+ movups %xmm2,(%rdx)
+ retq
+
+
+.globl aesni_decrypt
+.def aesni_decrypt; .scl 2; .type 32; .endef
+.p2align 4
+aesni_decrypt:
+ movups (%rcx),%xmm2
+ movl 240(%r8),%eax
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm2
+.Loop_dec1_2:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz .Loop_dec1_2
+ aesdeclast %xmm1,%xmm2
+ movups %xmm2,(%rdx)
+ retq
+
+.def _aesni_encrypt3; .scl 3; .type 32; .endef
+.p2align 4
+_aesni_encrypt3:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ movups (%rcx),%xmm0
+
+.Lenc_loop3:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop3
+
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ retq
+
+.def _aesni_decrypt3; .scl 3; .type 32; .endef
+.p2align 4
+_aesni_decrypt3:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ movups (%rcx),%xmm0
+
+.Ldec_loop3:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop3
+
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ retq
+
+.def _aesni_encrypt4; .scl 3; .type 32; .endef
+.p2align 4
+_aesni_encrypt4:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ xorps %xmm0,%xmm5
+ movups (%rcx),%xmm0
+
+.Lenc_loop4:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop4
+
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ aesenclast %xmm0,%xmm5
+ retq
+
+.def _aesni_decrypt4; .scl 3; .type 32; .endef
+.p2align 4
+_aesni_decrypt4:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ xorps %xmm0,%xmm5
+ movups (%rcx),%xmm0
+
+.Ldec_loop4:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop4
+
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ aesdeclast %xmm0,%xmm5
+ retq
+
+.def _aesni_encrypt6; .scl 3; .type 32; .endef
+.p2align 4
+_aesni_encrypt6:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ aesenc %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesenc %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesenc %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesenc %xmm1,%xmm6
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm7
+ jmp .Lenc_loop6_enter
+.p2align 4
+.Lenc_loop6:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+.Lenc_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop6
+
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ aesenclast %xmm0,%xmm5
+ aesenclast %xmm0,%xmm6
+ aesenclast %xmm0,%xmm7
+ retq
+
+.def _aesni_decrypt6; .scl 3; .type 32; .endef
+.p2align 4
+_aesni_decrypt6:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ aesdec %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesdec %xmm1,%xmm6
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm7
+ jmp .Ldec_loop6_enter
+.p2align 4
+.Ldec_loop6:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+.Ldec_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop6
+
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ aesdeclast %xmm0,%xmm5
+ aesdeclast %xmm0,%xmm6
+ aesdeclast %xmm0,%xmm7
+ retq
+
+.def _aesni_encrypt8; .scl 3; .type 32; .endef
+.p2align 4
+_aesni_encrypt8:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ aesenc %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesenc %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesenc %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesenc %xmm1,%xmm6
+ pxor %xmm0,%xmm8
+ aesenc %xmm1,%xmm7
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm8
+ aesenc %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+ jmp .Lenc_loop8_enter
+.p2align 4
+.Lenc_loop8:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ aesenc %xmm1,%xmm8
+ aesenc %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+.Lenc_loop8_enter:
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ aesenc %xmm0,%xmm8
+ aesenc %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop8
+
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ aesenc %xmm1,%xmm8
+ aesenc %xmm1,%xmm9
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ aesenclast %xmm0,%xmm5
+ aesenclast %xmm0,%xmm6
+ aesenclast %xmm0,%xmm7
+ aesenclast %xmm0,%xmm8
+ aesenclast %xmm0,%xmm9
+ retq
+
+.def _aesni_decrypt8; .scl 3; .type 32; .endef
+.p2align 4
+_aesni_decrypt8:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ aesdec %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesdec %xmm1,%xmm6
+ pxor %xmm0,%xmm8
+ aesdec %xmm1,%xmm7
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+ jmp .Ldec_loop8_enter
+.p2align 4
+.Ldec_loop8:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+.Ldec_loop8_enter:
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ aesdec %xmm0,%xmm8
+ aesdec %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop8
+
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ aesdeclast %xmm0,%xmm5
+ aesdeclast %xmm0,%xmm6
+ aesdeclast %xmm0,%xmm7
+ aesdeclast %xmm0,%xmm8
+ aesdeclast %xmm0,%xmm9
+ retq
+
+.globl aesni_ecb_encrypt
+.def aesni_ecb_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+aesni_ecb_encrypt:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_aesni_ecb_encrypt:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+
+ andq $-16,%rdx
+ jz .Lecb_ret
+
+ movl 240(%rcx),%eax
+ movups (%rcx),%xmm0
+ movq %rcx,%r11
+ movl %eax,%r10d
+ testl %r8d,%r8d
+ jz .Lecb_decrypt
+
+ cmpq $128,%rdx
+ jb .Lecb_enc_tail
+
+ movdqu (%rdi),%xmm2
+ movdqu 16(%rdi),%xmm3
+ movdqu 32(%rdi),%xmm4
+ movdqu 48(%rdi),%xmm5
+ movdqu 64(%rdi),%xmm6
+ movdqu 80(%rdi),%xmm7
+ movdqu 96(%rdi),%xmm8
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ subq $128,%rdx
+ jmp .Lecb_enc_loop8_enter
+.p2align 4
+.Lecb_enc_loop8:
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movdqu (%rdi),%xmm2
+ movl %r10d,%eax
+ movups %xmm3,16(%rsi)
+ movdqu 16(%rdi),%xmm3
+ movups %xmm4,32(%rsi)
+ movdqu 32(%rdi),%xmm4
+ movups %xmm5,48(%rsi)
+ movdqu 48(%rdi),%xmm5
+ movups %xmm6,64(%rsi)
+ movdqu 64(%rdi),%xmm6
+ movups %xmm7,80(%rsi)
+ movdqu 80(%rdi),%xmm7
+ movups %xmm8,96(%rsi)
+ movdqu 96(%rdi),%xmm8
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+.Lecb_enc_loop8_enter:
+
+ call _aesni_encrypt8
+
+ subq $128,%rdx
+ jnc .Lecb_enc_loop8
+
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movups %xmm3,16(%rsi)
+ movl %r10d,%eax
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ addq $128,%rdx
+ jz .Lecb_ret
+
+.Lecb_enc_tail:
+ movups (%rdi),%xmm2
+ cmpq $32,%rdx
+ jb .Lecb_enc_one
+ movups 16(%rdi),%xmm3
+ je .Lecb_enc_two
+ movups 32(%rdi),%xmm4
+ cmpq $64,%rdx
+ jb .Lecb_enc_three
+ movups 48(%rdi),%xmm5
+ je .Lecb_enc_four
+ movups 64(%rdi),%xmm6
+ cmpq $96,%rdx
+ jb .Lecb_enc_five
+ movups 80(%rdi),%xmm7
+ je .Lecb_enc_six
+ movdqu 96(%rdi),%xmm8
+ call _aesni_encrypt8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+.Lecb_enc_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_3:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_3
+ aesenclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+.Lecb_enc_two:
+ xorps %xmm4,%xmm4
+ call _aesni_encrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+.Lecb_enc_three:
+ call _aesni_encrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+.Lecb_enc_four:
+ call _aesni_encrypt4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+.Lecb_enc_five:
+ xorps %xmm7,%xmm7
+ call _aesni_encrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+.Lecb_enc_six:
+ call _aesni_encrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ jmp .Lecb_ret
+
+.p2align 4
+.Lecb_decrypt:
+ cmpq $128,%rdx
+ jb .Lecb_dec_tail
+
+ movdqu (%rdi),%xmm2
+ movdqu 16(%rdi),%xmm3
+ movdqu 32(%rdi),%xmm4
+ movdqu 48(%rdi),%xmm5
+ movdqu 64(%rdi),%xmm6
+ movdqu 80(%rdi),%xmm7
+ movdqu 96(%rdi),%xmm8
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ subq $128,%rdx
+ jmp .Lecb_dec_loop8_enter
+.p2align 4
+.Lecb_dec_loop8:
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movdqu (%rdi),%xmm2
+ movl %r10d,%eax
+ movups %xmm3,16(%rsi)
+ movdqu 16(%rdi),%xmm3
+ movups %xmm4,32(%rsi)
+ movdqu 32(%rdi),%xmm4
+ movups %xmm5,48(%rsi)
+ movdqu 48(%rdi),%xmm5
+ movups %xmm6,64(%rsi)
+ movdqu 64(%rdi),%xmm6
+ movups %xmm7,80(%rsi)
+ movdqu 80(%rdi),%xmm7
+ movups %xmm8,96(%rsi)
+ movdqu 96(%rdi),%xmm8
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+.Lecb_dec_loop8_enter:
+
+ call _aesni_decrypt8
+
+ movups (%r11),%xmm0
+ subq $128,%rdx
+ jnc .Lecb_dec_loop8
+
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movups %xmm3,16(%rsi)
+ movl %r10d,%eax
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ addq $128,%rdx
+ jz .Lecb_ret
+
+.Lecb_dec_tail:
+ movups (%rdi),%xmm2
+ cmpq $32,%rdx
+ jb .Lecb_dec_one
+ movups 16(%rdi),%xmm3
+ je .Lecb_dec_two
+ movups 32(%rdi),%xmm4
+ cmpq $64,%rdx
+ jb .Lecb_dec_three
+ movups 48(%rdi),%xmm5
+ je .Lecb_dec_four
+ movups 64(%rdi),%xmm6
+ cmpq $96,%rdx
+ jb .Lecb_dec_five
+ movups 80(%rdi),%xmm7
+ je .Lecb_dec_six
+ movups 96(%rdi),%xmm8
+ movups (%rcx),%xmm0
+ call _aesni_decrypt8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+.Lecb_dec_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_4:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_4
+ aesdeclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+.Lecb_dec_two:
+ xorps %xmm4,%xmm4
+ call _aesni_decrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+.Lecb_dec_three:
+ call _aesni_decrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+.Lecb_dec_four:
+ call _aesni_decrypt4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+.Lecb_dec_five:
+ xorps %xmm7,%xmm7
+ call _aesni_decrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+.Lecb_dec_six:
+ call _aesni_decrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+
+.Lecb_ret:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_aesni_ecb_encrypt:
+.globl aesni_ccm64_encrypt_blocks
+.def aesni_ccm64_encrypt_blocks; .scl 2; .type 32; .endef
+.p2align 4
+aesni_ccm64_encrypt_blocks:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_aesni_ccm64_encrypt_blocks:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+ movq 48(%rsp),%r9
+
+ leaq -88(%rsp),%rsp
+ movaps %xmm6,(%rsp)
+ movaps %xmm7,16(%rsp)
+ movaps %xmm8,32(%rsp)
+ movaps %xmm9,48(%rsp)
+.Lccm64_enc_body:
+ movl 240(%rcx),%eax
+ movdqu (%r8),%xmm9
+ movdqa .Lincrement64(%rip),%xmm6
+ movdqa .Lbswap_mask(%rip),%xmm7
+
+ shrl $1,%eax
+ leaq 0(%rcx),%r11
+ movdqu (%r9),%xmm3
+ movdqa %xmm9,%xmm2
+ movl %eax,%r10d
+.byte 102,68,15,56,0,207
+ jmp .Lccm64_enc_outer
+.p2align 4
+.Lccm64_enc_outer:
+ movups (%r11),%xmm0
+ movl %r10d,%eax
+ movups (%rdi),%xmm8
+
+ xorps %xmm0,%xmm2
+ movups 16(%r11),%xmm1
+ xorps %xmm8,%xmm0
+ leaq 32(%r11),%rcx
+ xorps %xmm0,%xmm3
+ movups (%rcx),%xmm0
+
+.Lccm64_enc2_loop:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ aesenc %xmm1,%xmm3
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm3
+ movups 0(%rcx),%xmm0
+ jnz .Lccm64_enc2_loop
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ paddq %xmm6,%xmm9
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+
+ decq %rdx
+ leaq 16(%rdi),%rdi
+ xorps %xmm2,%xmm8
+ movdqa %xmm9,%xmm2
+ movups %xmm8,(%rsi)
+ leaq 16(%rsi),%rsi
+.byte 102,15,56,0,215
+ jnz .Lccm64_enc_outer
+
+ movups %xmm3,(%r9)
+ movaps (%rsp),%xmm6
+ movaps 16(%rsp),%xmm7
+ movaps 32(%rsp),%xmm8
+ movaps 48(%rsp),%xmm9
+ leaq 88(%rsp),%rsp
+.Lccm64_enc_ret:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_aesni_ccm64_encrypt_blocks:
+.globl aesni_ccm64_decrypt_blocks
+.def aesni_ccm64_decrypt_blocks; .scl 2; .type 32; .endef
+.p2align 4
+aesni_ccm64_decrypt_blocks:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_aesni_ccm64_decrypt_blocks:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+ movq 48(%rsp),%r9
+
+ leaq -88(%rsp),%rsp
+ movaps %xmm6,(%rsp)
+ movaps %xmm7,16(%rsp)
+ movaps %xmm8,32(%rsp)
+ movaps %xmm9,48(%rsp)
+.Lccm64_dec_body:
+ movl 240(%rcx),%eax
+ movups (%r8),%xmm9
+ movdqu (%r9),%xmm3
+ movdqa .Lincrement64(%rip),%xmm6
+ movdqa .Lbswap_mask(%rip),%xmm7
+
+ movaps %xmm9,%xmm2
+ movl %eax,%r10d
+ movq %rcx,%r11
+.byte 102,68,15,56,0,207
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_5:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_5
+ aesenclast %xmm1,%xmm2
+ movups (%rdi),%xmm8
+ paddq %xmm6,%xmm9
+ leaq 16(%rdi),%rdi
+ jmp .Lccm64_dec_outer
+.p2align 4
+.Lccm64_dec_outer:
+ xorps %xmm2,%xmm8
+ movdqa %xmm9,%xmm2
+ movl %r10d,%eax
+ movups %xmm8,(%rsi)
+ leaq 16(%rsi),%rsi
+.byte 102,15,56,0,215
+
+ subq $1,%rdx
+ jz .Lccm64_dec_break
+
+ movups (%r11),%xmm0
+ shrl $1,%eax
+ movups 16(%r11),%xmm1
+ xorps %xmm0,%xmm8
+ leaq 32(%r11),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm8,%xmm3
+ movups (%rcx),%xmm0
+
+.Lccm64_dec2_loop:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ aesenc %xmm1,%xmm3
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm3
+ movups 0(%rcx),%xmm0
+ jnz .Lccm64_dec2_loop
+ movups (%rdi),%xmm8
+ paddq %xmm6,%xmm9
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ leaq 16(%rdi),%rdi
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ jmp .Lccm64_dec_outer
+
+.p2align 4
+.Lccm64_dec_break:
+
+ movups (%r11),%xmm0
+ movups 16(%r11),%xmm1
+ xorps %xmm0,%xmm8
+ leaq 32(%r11),%r11
+ xorps %xmm8,%xmm3
+.Loop_enc1_6:
+ aesenc %xmm1,%xmm3
+ decl %eax
+ movups (%r11),%xmm1
+ leaq 16(%r11),%r11
+ jnz .Loop_enc1_6
+ aesenclast %xmm1,%xmm3
+ movups %xmm3,(%r9)
+ movaps (%rsp),%xmm6
+ movaps 16(%rsp),%xmm7
+ movaps 32(%rsp),%xmm8
+ movaps 48(%rsp),%xmm9
+ leaq 88(%rsp),%rsp
+.Lccm64_dec_ret:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_aesni_ccm64_decrypt_blocks:
+.globl aesni_ctr32_encrypt_blocks
+.def aesni_ctr32_encrypt_blocks; .scl 2; .type 32; .endef
+.p2align 4
+aesni_ctr32_encrypt_blocks:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_aesni_ctr32_encrypt_blocks:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+
+ leaq -200(%rsp),%rsp
+ movaps %xmm6,32(%rsp)
+ movaps %xmm7,48(%rsp)
+ movaps %xmm8,64(%rsp)
+ movaps %xmm9,80(%rsp)
+ movaps %xmm10,96(%rsp)
+ movaps %xmm11,112(%rsp)
+ movaps %xmm12,128(%rsp)
+ movaps %xmm13,144(%rsp)
+ movaps %xmm14,160(%rsp)
+ movaps %xmm15,176(%rsp)
+.Lctr32_body:
+ cmpq $1,%rdx
+ je .Lctr32_one_shortcut
+
+ movdqu (%r8),%xmm14
+ movdqa .Lbswap_mask(%rip),%xmm15
+ xorl %eax,%eax
+.byte 102,69,15,58,22,242,3
+.byte 102,68,15,58,34,240,3
+
+ movl 240(%rcx),%eax
+ bswapl %r10d
+ pxor %xmm12,%xmm12
+ pxor %xmm13,%xmm13
+.byte 102,69,15,58,34,226,0
+ leaq 3(%r10),%r11
+.byte 102,69,15,58,34,235,0
+ incl %r10d
+.byte 102,69,15,58,34,226,1
+ incq %r11
+.byte 102,69,15,58,34,235,1
+ incl %r10d
+.byte 102,69,15,58,34,226,2
+ incq %r11
+.byte 102,69,15,58,34,235,2
+ movdqa %xmm12,0(%rsp)
+.byte 102,69,15,56,0,231
+ movdqa %xmm13,16(%rsp)
+.byte 102,69,15,56,0,239
+
+ pshufd $192,%xmm12,%xmm2
+ pshufd $128,%xmm12,%xmm3
+ pshufd $64,%xmm12,%xmm4
+ cmpq $6,%rdx
+ jb .Lctr32_tail
+ shrl $1,%eax
+ movq %rcx,%r11
+ movl %eax,%r10d
+ subq $6,%rdx
+ jmp .Lctr32_loop6
+
+.p2align 4
+.Lctr32_loop6:
+ pshufd $192,%xmm13,%xmm5
+ por %xmm14,%xmm2
+ movups (%r11),%xmm0
+ pshufd $128,%xmm13,%xmm6
+ por %xmm14,%xmm3
+ movups 16(%r11),%xmm1
+ pshufd $64,%xmm13,%xmm7
+ por %xmm14,%xmm4
+ por %xmm14,%xmm5
+ xorps %xmm0,%xmm2
+ por %xmm14,%xmm6
+ por %xmm14,%xmm7
+
+
+
+
+ pxor %xmm0,%xmm3
+ aesenc %xmm1,%xmm2
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ aesenc %xmm1,%xmm3
+ movdqa .Lincrement32(%rip),%xmm13
+ pxor %xmm0,%xmm5
+ aesenc %xmm1,%xmm4
+ movdqa 0(%rsp),%xmm12
+ pxor %xmm0,%xmm6
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ jmp .Lctr32_enc_loop6_enter
+.p2align 4
+.Lctr32_enc_loop6:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+.Lctr32_enc_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Lctr32_enc_loop6
+
+ aesenc %xmm1,%xmm2
+ paddd %xmm13,%xmm12
+ aesenc %xmm1,%xmm3
+ paddd 16(%rsp),%xmm13
+ aesenc %xmm1,%xmm4
+ movdqa %xmm12,0(%rsp)
+ aesenc %xmm1,%xmm5
+ movdqa %xmm13,16(%rsp)
+ aesenc %xmm1,%xmm6
+.byte 102,69,15,56,0,231
+ aesenc %xmm1,%xmm7
+.byte 102,69,15,56,0,239
+
+ aesenclast %xmm0,%xmm2
+ movups (%rdi),%xmm8
+ aesenclast %xmm0,%xmm3
+ movups 16(%rdi),%xmm9
+ aesenclast %xmm0,%xmm4
+ movups 32(%rdi),%xmm10
+ aesenclast %xmm0,%xmm5
+ movups 48(%rdi),%xmm11
+ aesenclast %xmm0,%xmm6
+ movups 64(%rdi),%xmm1
+ aesenclast %xmm0,%xmm7
+ movups 80(%rdi),%xmm0
+ leaq 96(%rdi),%rdi
+
+ xorps %xmm2,%xmm8
+ pshufd $192,%xmm12,%xmm2
+ xorps %xmm3,%xmm9
+ pshufd $128,%xmm12,%xmm3
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ pshufd $64,%xmm12,%xmm4
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ xorps %xmm6,%xmm1
+ movups %xmm11,48(%rsi)
+ xorps %xmm7,%xmm0
+ movups %xmm1,64(%rsi)
+ movups %xmm0,80(%rsi)
+ leaq 96(%rsi),%rsi
+ movl %r10d,%eax
+ subq $6,%rdx
+ jnc .Lctr32_loop6
+
+ addq $6,%rdx
+ jz .Lctr32_done
+ movq %r11,%rcx
+ leal 1(%rax,%rax,1),%eax
+
+.Lctr32_tail:
+ por %xmm14,%xmm2
+ movups (%rdi),%xmm8
+ cmpq $2,%rdx
+ jb .Lctr32_one
+
+ por %xmm14,%xmm3
+ movups 16(%rdi),%xmm9
+ je .Lctr32_two
+
+ pshufd $192,%xmm13,%xmm5
+ por %xmm14,%xmm4
+ movups 32(%rdi),%xmm10
+ cmpq $4,%rdx
+ jb .Lctr32_three
+
+ pshufd $128,%xmm13,%xmm6
+ por %xmm14,%xmm5
+ movups 48(%rdi),%xmm11
+ je .Lctr32_four
+
+ por %xmm14,%xmm6
+ xorps %xmm7,%xmm7
+
+ call _aesni_encrypt6
+
+ movups 64(%rdi),%xmm1
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ xorps %xmm6,%xmm1
+ movups %xmm11,48(%rsi)
+ movups %xmm1,64(%rsi)
+ jmp .Lctr32_done
+
+.p2align 4
+.Lctr32_one_shortcut:
+ movups (%r8),%xmm2
+ movups (%rdi),%xmm8
+ movl 240(%rcx),%eax
+.Lctr32_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_7:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_7
+ aesenclast %xmm1,%xmm2
+ xorps %xmm2,%xmm8
+ movups %xmm8,(%rsi)
+ jmp .Lctr32_done
+
+.p2align 4
+.Lctr32_two:
+ xorps %xmm4,%xmm4
+ call _aesni_encrypt3
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ movups %xmm9,16(%rsi)
+ jmp .Lctr32_done
+
+.p2align 4
+.Lctr32_three:
+ call _aesni_encrypt3
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ movups %xmm10,32(%rsi)
+ jmp .Lctr32_done
+
+.p2align 4
+.Lctr32_four:
+ call _aesni_encrypt4
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ movups %xmm11,48(%rsi)
+
+.Lctr32_done:
+ movaps 32(%rsp),%xmm6
+ movaps 48(%rsp),%xmm7
+ movaps 64(%rsp),%xmm8
+ movaps 80(%rsp),%xmm9
+ movaps 96(%rsp),%xmm10
+ movaps 112(%rsp),%xmm11
+ movaps 128(%rsp),%xmm12
+ movaps 144(%rsp),%xmm13
+ movaps 160(%rsp),%xmm14
+ movaps 176(%rsp),%xmm15
+ leaq 200(%rsp),%rsp
+.Lctr32_ret:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_aesni_ctr32_encrypt_blocks:
+.globl aesni_xts_encrypt
+.def aesni_xts_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+aesni_xts_encrypt:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_aesni_xts_encrypt:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+ movq 48(%rsp),%r9
+
+ leaq -264(%rsp),%rsp
+ movaps %xmm6,96(%rsp)
+ movaps %xmm7,112(%rsp)
+ movaps %xmm8,128(%rsp)
+ movaps %xmm9,144(%rsp)
+ movaps %xmm10,160(%rsp)
+ movaps %xmm11,176(%rsp)
+ movaps %xmm12,192(%rsp)
+ movaps %xmm13,208(%rsp)
+ movaps %xmm14,224(%rsp)
+ movaps %xmm15,240(%rsp)
+.Lxts_enc_body:
+ movups (%r9),%xmm15
+ movl 240(%r8),%eax
+ movl 240(%rcx),%r10d
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm15
+.Loop_enc1_8:
+ aesenc %xmm1,%xmm15
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz .Loop_enc1_8
+ aesenclast %xmm1,%xmm15
+ movq %rcx,%r11
+ movl %r10d,%eax
+ movq %rdx,%r9
+ andq $-16,%rdx
+
+ movdqa .Lxts_magic(%rip),%xmm8
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ subq $96,%rdx
+ jc .Lxts_enc_short
+
+ shrl $1,%eax
+ subl $1,%eax
+ movl %eax,%r10d
+ jmp .Lxts_enc_grandloop
+
+.p2align 4
+.Lxts_enc_grandloop:
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu 0(%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ pxor %xmm12,%xmm4
+ movdqu 80(%rdi),%xmm7
+ leaq 96(%rdi),%rdi
+ pxor %xmm13,%xmm5
+ movups (%r11),%xmm0
+ pxor %xmm14,%xmm6
+ pxor %xmm15,%xmm7
+
+
+
+ movups 16(%r11),%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movdqa %xmm10,0(%rsp)
+ aesenc %xmm1,%xmm2
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ movdqa %xmm11,16(%rsp)
+ aesenc %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ movdqa %xmm12,32(%rsp)
+ aesenc %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ movdqa %xmm13,48(%rsp)
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ movdqa %xmm14,64(%rsp)
+ aesenc %xmm1,%xmm6
+ movdqa %xmm15,80(%rsp)
+ aesenc %xmm1,%xmm7
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ jmp .Lxts_enc_loop6_enter
+
+.p2align 4
+.Lxts_enc_loop6:
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+.Lxts_enc_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Lxts_enc_loop6
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ paddq %xmm15,%xmm15
+ aesenc %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesenc %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenc %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ aesenc %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesenc %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenc %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups 32(%rcx),%xmm0
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ aesenc %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesenc %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenc %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ aesenclast %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesenclast %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenclast %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesenclast %xmm0,%xmm5
+ aesenclast %xmm0,%xmm6
+ aesenclast %xmm0,%xmm7
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ xorps 0(%rsp),%xmm2
+ pand %xmm8,%xmm9
+ xorps 16(%rsp),%xmm3
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+
+ xorps 32(%rsp),%xmm4
+ movups %xmm2,0(%rsi)
+ xorps 48(%rsp),%xmm5
+ movups %xmm3,16(%rsi)
+ xorps 64(%rsp),%xmm6
+ movups %xmm4,32(%rsi)
+ xorps 80(%rsp),%xmm7
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ subq $96,%rdx
+ jnc .Lxts_enc_grandloop
+
+ leal 3(%rax,%rax,1),%eax
+ movq %r11,%rcx
+ movl %eax,%r10d
+
+.Lxts_enc_short:
+ addq $96,%rdx
+ jz .Lxts_enc_done
+
+ cmpq $32,%rdx
+ jb .Lxts_enc_one
+ je .Lxts_enc_two
+
+ cmpq $64,%rdx
+ jb .Lxts_enc_three
+ je .Lxts_enc_four
+
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ leaq 80(%rdi),%rdi
+ pxor %xmm12,%xmm4
+ pxor %xmm13,%xmm5
+ pxor %xmm14,%xmm6
+
+ call _aesni_encrypt6
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm15,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movdqu %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movdqu %xmm3,16(%rsi)
+ xorps %xmm14,%xmm6
+ movdqu %xmm4,32(%rsi)
+ movdqu %xmm5,48(%rsi)
+ movdqu %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.p2align 4
+.Lxts_enc_one:
+ movups (%rdi),%xmm2
+ leaq 16(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_9:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_9
+ aesenclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movdqa %xmm11,%xmm10
+ movups %xmm2,(%rsi)
+ leaq 16(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.p2align 4
+.Lxts_enc_two:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ leaq 32(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+
+ call _aesni_encrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm12,%xmm10
+ xorps %xmm11,%xmm3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ leaq 32(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.p2align 4
+.Lxts_enc_three:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ leaq 48(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+
+ call _aesni_encrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm13,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ leaq 48(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.p2align 4
+.Lxts_enc_four:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ xorps %xmm10,%xmm2
+ movups 48(%rdi),%xmm5
+ leaq 64(%rdi),%rdi
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ xorps %xmm13,%xmm5
+
+ call _aesni_encrypt4
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm15,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.p2align 4
+.Lxts_enc_done:
+ andq $15,%r9
+ jz .Lxts_enc_ret
+ movq %r9,%rdx
+
+.Lxts_enc_steal:
+ movzbl (%rdi),%eax
+ movzbl -16(%rsi),%ecx
+ leaq 1(%rdi),%rdi
+ movb %al,-16(%rsi)
+ movb %cl,0(%rsi)
+ leaq 1(%rsi),%rsi
+ subq $1,%rdx
+ jnz .Lxts_enc_steal
+
+ subq %r9,%rsi
+ movq %r11,%rcx
+ movl %r10d,%eax
+
+ movups -16(%rsi),%xmm2
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_10:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_10
+ aesenclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movups %xmm2,-16(%rsi)
+
+.Lxts_enc_ret:
+ movaps 96(%rsp),%xmm6
+ movaps 112(%rsp),%xmm7
+ movaps 128(%rsp),%xmm8
+ movaps 144(%rsp),%xmm9
+ movaps 160(%rsp),%xmm10
+ movaps 176(%rsp),%xmm11
+ movaps 192(%rsp),%xmm12
+ movaps 208(%rsp),%xmm13
+ movaps 224(%rsp),%xmm14
+ movaps 240(%rsp),%xmm15
+ leaq 264(%rsp),%rsp
+.Lxts_enc_epilogue:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_aesni_xts_encrypt:
+.globl aesni_xts_decrypt
+.def aesni_xts_decrypt; .scl 2; .type 32; .endef
+.p2align 4
+aesni_xts_decrypt:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_aesni_xts_decrypt:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+ movq 48(%rsp),%r9
+
+ leaq -264(%rsp),%rsp
+ movaps %xmm6,96(%rsp)
+ movaps %xmm7,112(%rsp)
+ movaps %xmm8,128(%rsp)
+ movaps %xmm9,144(%rsp)
+ movaps %xmm10,160(%rsp)
+ movaps %xmm11,176(%rsp)
+ movaps %xmm12,192(%rsp)
+ movaps %xmm13,208(%rsp)
+ movaps %xmm14,224(%rsp)
+ movaps %xmm15,240(%rsp)
+.Lxts_dec_body:
+ movups (%r9),%xmm15
+ movl 240(%r8),%eax
+ movl 240(%rcx),%r10d
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm15
+.Loop_enc1_11:
+ aesenc %xmm1,%xmm15
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz .Loop_enc1_11
+ aesenclast %xmm1,%xmm15
+ xorl %eax,%eax
+ testq $15,%rdx
+ setnz %al
+ shlq $4,%rax
+ subq %rax,%rdx
+
+ movq %rcx,%r11
+ movl %r10d,%eax
+ movq %rdx,%r9
+ andq $-16,%rdx
+
+ movdqa .Lxts_magic(%rip),%xmm8
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ subq $96,%rdx
+ jc .Lxts_dec_short
+
+ shrl $1,%eax
+ subl $1,%eax
+ movl %eax,%r10d
+ jmp .Lxts_dec_grandloop
+
+.p2align 4
+.Lxts_dec_grandloop:
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu 0(%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ pxor %xmm12,%xmm4
+ movdqu 80(%rdi),%xmm7
+ leaq 96(%rdi),%rdi
+ pxor %xmm13,%xmm5
+ movups (%r11),%xmm0
+ pxor %xmm14,%xmm6
+ pxor %xmm15,%xmm7
+
+
+
+ movups 16(%r11),%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movdqa %xmm10,0(%rsp)
+ aesdec %xmm1,%xmm2
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ movdqa %xmm11,16(%rsp)
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ movdqa %xmm12,32(%rsp)
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ movdqa %xmm13,48(%rsp)
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ movdqa %xmm14,64(%rsp)
+ aesdec %xmm1,%xmm6
+ movdqa %xmm15,80(%rsp)
+ aesdec %xmm1,%xmm7
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ jmp .Lxts_dec_loop6_enter
+
+.p2align 4
+.Lxts_dec_loop6:
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+.Lxts_dec_loop6_enter:
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Lxts_dec_loop6
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ paddq %xmm15,%xmm15
+ aesdec %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesdec %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdec %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ aesdec %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesdec %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdec %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ movups 32(%rcx),%xmm0
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ aesdec %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesdec %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdec %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ aesdeclast %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesdeclast %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdeclast %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesdeclast %xmm0,%xmm5
+ aesdeclast %xmm0,%xmm6
+ aesdeclast %xmm0,%xmm7
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ xorps 0(%rsp),%xmm2
+ pand %xmm8,%xmm9
+ xorps 16(%rsp),%xmm3
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+
+ xorps 32(%rsp),%xmm4
+ movups %xmm2,0(%rsi)
+ xorps 48(%rsp),%xmm5
+ movups %xmm3,16(%rsi)
+ xorps 64(%rsp),%xmm6
+ movups %xmm4,32(%rsi)
+ xorps 80(%rsp),%xmm7
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ subq $96,%rdx
+ jnc .Lxts_dec_grandloop
+
+ leal 3(%rax,%rax,1),%eax
+ movq %r11,%rcx
+ movl %eax,%r10d
+
+.Lxts_dec_short:
+ addq $96,%rdx
+ jz .Lxts_dec_done
+
+ cmpq $32,%rdx
+ jb .Lxts_dec_one
+ je .Lxts_dec_two
+
+ cmpq $64,%rdx
+ jb .Lxts_dec_three
+ je .Lxts_dec_four
+
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ leaq 80(%rdi),%rdi
+ pxor %xmm12,%xmm4
+ pxor %xmm13,%xmm5
+ pxor %xmm14,%xmm6
+
+ call _aesni_decrypt6
+
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movdqu %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movdqu %xmm3,16(%rsi)
+ xorps %xmm14,%xmm6
+ movdqu %xmm4,32(%rsi)
+ pxor %xmm14,%xmm14
+ movdqu %xmm5,48(%rsi)
+ pcmpgtd %xmm15,%xmm14
+ movdqu %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ pshufd $19,%xmm14,%xmm11
+ andq $15,%r9
+ jz .Lxts_dec_ret
+
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm11
+ pxor %xmm15,%xmm11
+ jmp .Lxts_dec_done2
+
+.p2align 4
+.Lxts_dec_one:
+ movups (%rdi),%xmm2
+ leaq 16(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_12:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_12
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movdqa %xmm11,%xmm10
+ movups %xmm2,(%rsi)
+ movdqa %xmm12,%xmm11
+ leaq 16(%rsi),%rsi
+ jmp .Lxts_dec_done
+
+.p2align 4
+.Lxts_dec_two:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ leaq 32(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+
+ call _aesni_decrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm12,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm13,%xmm11
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ leaq 32(%rsi),%rsi
+ jmp .Lxts_dec_done
+
+.p2align 4
+.Lxts_dec_three:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ leaq 48(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+
+ call _aesni_decrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm13,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm15,%xmm11
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ leaq 48(%rsi),%rsi
+ jmp .Lxts_dec_done
+
+.p2align 4
+.Lxts_dec_four:
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movups (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movups 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movups 32(%rdi),%xmm4
+ xorps %xmm10,%xmm2
+ movups 48(%rdi),%xmm5
+ leaq 64(%rdi),%rdi
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ xorps %xmm13,%xmm5
+
+ call _aesni_decrypt4
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm14,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm15,%xmm11
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ jmp .Lxts_dec_done
+
+.p2align 4
+.Lxts_dec_done:
+ andq $15,%r9
+ jz .Lxts_dec_ret
+.Lxts_dec_done2:
+ movq %r9,%rdx
+ movq %r11,%rcx
+ movl %r10d,%eax
+
+ movups (%rdi),%xmm2
+ xorps %xmm11,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_13:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_13
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm11,%xmm2
+ movups %xmm2,(%rsi)
+
+.Lxts_dec_steal:
+ movzbl 16(%rdi),%eax
+ movzbl (%rsi),%ecx
+ leaq 1(%rdi),%rdi
+ movb %al,(%rsi)
+ movb %cl,16(%rsi)
+ leaq 1(%rsi),%rsi
+ subq $1,%rdx
+ jnz .Lxts_dec_steal
+
+ subq %r9,%rsi
+ movq %r11,%rcx
+ movl %r10d,%eax
+
+ movups (%rsi),%xmm2
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_14:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_14
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movups %xmm2,(%rsi)
+
+.Lxts_dec_ret:
+ movaps 96(%rsp),%xmm6
+ movaps 112(%rsp),%xmm7
+ movaps 128(%rsp),%xmm8
+ movaps 144(%rsp),%xmm9
+ movaps 160(%rsp),%xmm10
+ movaps 176(%rsp),%xmm11
+ movaps 192(%rsp),%xmm12
+ movaps 208(%rsp),%xmm13
+ movaps 224(%rsp),%xmm14
+ movaps 240(%rsp),%xmm15
+ leaq 264(%rsp),%rsp
+.Lxts_dec_epilogue:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_aesni_xts_decrypt:
+.globl aesni_cbc_encrypt
+.def aesni_cbc_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+aesni_cbc_encrypt:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_aesni_cbc_encrypt:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+ movq 48(%rsp),%r9
+
+ testq %rdx,%rdx
+ jz .Lcbc_ret
+
+ movl 240(%rcx),%r10d
+ movq %rcx,%r11
+ testl %r9d,%r9d
+ jz .Lcbc_decrypt
+
+ movups (%r8),%xmm2
+ movl %r10d,%eax
+ cmpq $16,%rdx
+ jb .Lcbc_enc_tail
+ subq $16,%rdx
+ jmp .Lcbc_enc_loop
+.p2align 4
+.Lcbc_enc_loop:
+ movups (%rdi),%xmm3
+ leaq 16(%rdi),%rdi
+
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ xorps %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ xorps %xmm3,%xmm2
+.Loop_enc1_15:
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_15
+ aesenclast %xmm1,%xmm2
+ movl %r10d,%eax
+ movq %r11,%rcx
+ movups %xmm2,0(%rsi)
+ leaq 16(%rsi),%rsi
+ subq $16,%rdx
+ jnc .Lcbc_enc_loop
+ addq $16,%rdx
+ jnz .Lcbc_enc_tail
+ movups %xmm2,(%r8)
+ jmp .Lcbc_ret
+
+.Lcbc_enc_tail:
+ movq %rdx,%rcx
+ xchgq %rdi,%rsi
+.long 0x9066A4F3
+ movl $16,%ecx
+ subq %rdx,%rcx
+ xorl %eax,%eax
+.long 0x9066AAF3
+ leaq -16(%rdi),%rdi
+ movl %r10d,%eax
+ movq %rdi,%rsi
+ movq %r11,%rcx
+ xorq %rdx,%rdx
+ jmp .Lcbc_enc_loop
+
+.p2align 4
+.Lcbc_decrypt:
+ leaq -88(%rsp),%rsp
+ movaps %xmm6,(%rsp)
+ movaps %xmm7,16(%rsp)
+ movaps %xmm8,32(%rsp)
+ movaps %xmm9,48(%rsp)
+.Lcbc_decrypt_body:
+ movups (%r8),%xmm9
+ movl %r10d,%eax
+ cmpq $112,%rdx
+ jbe .Lcbc_dec_tail
+ shrl $1,%r10d
+ subq $112,%rdx
+ movl %r10d,%eax
+ movaps %xmm9,64(%rsp)
+ jmp .Lcbc_dec_loop8_enter
+.p2align 4
+.Lcbc_dec_loop8:
+ movaps %xmm0,64(%rsp)
+ movups %xmm9,(%rsi)
+ leaq 16(%rsi),%rsi
+.Lcbc_dec_loop8_enter:
+ movups (%rcx),%xmm0
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 16(%rcx),%xmm1
+
+ leaq 32(%rcx),%rcx
+ movdqu 32(%rdi),%xmm4
+ xorps %xmm0,%xmm2
+ movdqu 48(%rdi),%xmm5
+ xorps %xmm0,%xmm3
+ movdqu 64(%rdi),%xmm6
+ aesdec %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ movdqu 80(%rdi),%xmm7
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ movdqu 96(%rdi),%xmm8
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ movdqu 112(%rdi),%xmm9
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesdec %xmm1,%xmm6
+ pxor %xmm0,%xmm8
+ aesdec %xmm1,%xmm7
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+
+ call .Ldec_loop8_enter
+
+ movups (%rdi),%xmm1
+ movups 16(%rdi),%xmm0
+ xorps 64(%rsp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%rdi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%rdi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%rdi),%xmm1
+ xorps %xmm0,%xmm6
+ movups 80(%rdi),%xmm0
+ xorps %xmm1,%xmm7
+ movups 96(%rdi),%xmm1
+ xorps %xmm0,%xmm8
+ movups 112(%rdi),%xmm0
+ xorps %xmm1,%xmm9
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movq %r11,%rcx
+ movups %xmm7,80(%rsi)
+ leaq 128(%rdi),%rdi
+ movups %xmm8,96(%rsi)
+ leaq 112(%rsi),%rsi
+ subq $128,%rdx
+ ja .Lcbc_dec_loop8
+
+ movaps %xmm9,%xmm2
+ movaps %xmm0,%xmm9
+ addq $112,%rdx
+ jle .Lcbc_dec_tail_collected
+ movups %xmm2,(%rsi)
+ leal 1(%r10,%r10,1),%eax
+ leaq 16(%rsi),%rsi
+.Lcbc_dec_tail:
+ movups (%rdi),%xmm2
+ movaps %xmm2,%xmm8
+ cmpq $16,%rdx
+ jbe .Lcbc_dec_one
+
+ movups 16(%rdi),%xmm3
+ movaps %xmm3,%xmm7
+ cmpq $32,%rdx
+ jbe .Lcbc_dec_two
+
+ movups 32(%rdi),%xmm4
+ movaps %xmm4,%xmm6
+ cmpq $48,%rdx
+ jbe .Lcbc_dec_three
+
+ movups 48(%rdi),%xmm5
+ cmpq $64,%rdx
+ jbe .Lcbc_dec_four
+
+ movups 64(%rdi),%xmm6
+ cmpq $80,%rdx
+ jbe .Lcbc_dec_five
+
+ movups 80(%rdi),%xmm7
+ cmpq $96,%rdx
+ jbe .Lcbc_dec_six
+
+ movups 96(%rdi),%xmm8
+ movaps %xmm9,64(%rsp)
+ call _aesni_decrypt8
+ movups (%rdi),%xmm1
+ movups 16(%rdi),%xmm0
+ xorps 64(%rsp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%rdi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%rdi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%rdi),%xmm1
+ xorps %xmm0,%xmm6
+ movups 80(%rdi),%xmm0
+ xorps %xmm1,%xmm7
+ movups 96(%rdi),%xmm9
+ xorps %xmm0,%xmm8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ movaps %xmm8,%xmm2
+ subq $112,%rdx
+ jmp .Lcbc_dec_tail_collected
+.p2align 4
+.Lcbc_dec_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_16:
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_16
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm9,%xmm2
+ movaps %xmm8,%xmm9
+ subq $16,%rdx
+ jmp .Lcbc_dec_tail_collected
+.p2align 4
+.Lcbc_dec_two:
+ xorps %xmm4,%xmm4
+ call _aesni_decrypt3
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ movaps %xmm7,%xmm9
+ movaps %xmm3,%xmm2
+ leaq 16(%rsi),%rsi
+ subq $32,%rdx
+ jmp .Lcbc_dec_tail_collected
+.p2align 4
+.Lcbc_dec_three:
+ call _aesni_decrypt3
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ xorps %xmm7,%xmm4
+ movups %xmm3,16(%rsi)
+ movaps %xmm6,%xmm9
+ movaps %xmm4,%xmm2
+ leaq 32(%rsi),%rsi
+ subq $48,%rdx
+ jmp .Lcbc_dec_tail_collected
+.p2align 4
+.Lcbc_dec_four:
+ call _aesni_decrypt4
+ xorps %xmm9,%xmm2
+ movups 48(%rdi),%xmm9
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ xorps %xmm7,%xmm4
+ movups %xmm3,16(%rsi)
+ xorps %xmm6,%xmm5
+ movups %xmm4,32(%rsi)
+ movaps %xmm5,%xmm2
+ leaq 48(%rsi),%rsi
+ subq $64,%rdx
+ jmp .Lcbc_dec_tail_collected
+.p2align 4
+.Lcbc_dec_five:
+ xorps %xmm7,%xmm7
+ call _aesni_decrypt6
+ movups 16(%rdi),%xmm1
+ movups 32(%rdi),%xmm0
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ xorps %xmm1,%xmm4
+ movups 48(%rdi),%xmm1
+ xorps %xmm0,%xmm5
+ movups 64(%rdi),%xmm9
+ xorps %xmm1,%xmm6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ movaps %xmm6,%xmm2
+ subq $80,%rdx
+ jmp .Lcbc_dec_tail_collected
+.p2align 4
+.Lcbc_dec_six:
+ call _aesni_decrypt6
+ movups 16(%rdi),%xmm1
+ movups 32(%rdi),%xmm0
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ xorps %xmm1,%xmm4
+ movups 48(%rdi),%xmm1
+ xorps %xmm0,%xmm5
+ movups 64(%rdi),%xmm0
+ xorps %xmm1,%xmm6
+ movups 80(%rdi),%xmm9
+ xorps %xmm0,%xmm7
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ movaps %xmm7,%xmm2
+ subq $96,%rdx
+ jmp .Lcbc_dec_tail_collected
+.p2align 4
+.Lcbc_dec_tail_collected:
+ andq $15,%rdx
+ movups %xmm9,(%r8)
+ jnz .Lcbc_dec_tail_partial
+ movups %xmm2,(%rsi)
+ jmp .Lcbc_dec_ret
+.p2align 4
+.Lcbc_dec_tail_partial:
+ movaps %xmm2,64(%rsp)
+ movq $16,%rcx
+ movq %rsi,%rdi
+ subq %rdx,%rcx
+ leaq 64(%rsp),%rsi
+.long 0x9066A4F3
+
+.Lcbc_dec_ret:
+ movaps (%rsp),%xmm6
+ movaps 16(%rsp),%xmm7
+ movaps 32(%rsp),%xmm8
+ movaps 48(%rsp),%xmm9
+ leaq 88(%rsp),%rsp
+.Lcbc_ret:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_aesni_cbc_encrypt:
+.globl aesni_set_decrypt_key
+.def aesni_set_decrypt_key; .scl 2; .type 32; .endef
+.p2align 4
+aesni_set_decrypt_key:
+ subq $8,%rsp
+ call __aesni_set_encrypt_key
+ shll $4,%edx
+ testl %eax,%eax
+ jnz .Ldec_key_ret
+ leaq 16(%r8,%rdx,1),%rcx
+
+ movups (%r8),%xmm0
+ movups (%rcx),%xmm1
+ movups %xmm0,(%rcx)
+ movups %xmm1,(%r8)
+ leaq 16(%r8),%r8
+ leaq -16(%rcx),%rcx
+
+.Ldec_key_inverse:
+ movups (%r8),%xmm0
+ movups (%rcx),%xmm1
+ aesimc %xmm0,%xmm0
+ aesimc %xmm1,%xmm1
+ leaq 16(%r8),%r8
+ leaq -16(%rcx),%rcx
+ movups %xmm0,16(%rcx)
+ movups %xmm1,-16(%r8)
+ cmpq %r8,%rcx
+ ja .Ldec_key_inverse
+
+ movups (%r8),%xmm0
+ aesimc %xmm0,%xmm0
+ movups %xmm0,(%rcx)
+.Ldec_key_ret:
+ addq $8,%rsp
+ retq
+.LSEH_end_set_decrypt_key:
+
+.globl aesni_set_encrypt_key
+.def aesni_set_encrypt_key; .scl 2; .type 32; .endef
+.p2align 4
+aesni_set_encrypt_key:
+__aesni_set_encrypt_key:
+ subq $8,%rsp
+ movq $-1,%rax
+ testq %rcx,%rcx
+ jz .Lenc_key_ret
+ testq %r8,%r8
+ jz .Lenc_key_ret
+
+ movups (%rcx),%xmm0
+ xorps %xmm4,%xmm4
+ leaq 16(%r8),%rax
+ cmpl $256,%edx
+ je .L14rounds
+ cmpl $192,%edx
+ je .L12rounds
+ cmpl $128,%edx
+ jne .Lbad_keybits
+
+.L10rounds:
+ movl $9,%edx
+ movups %xmm0,(%r8)
+ aeskeygenassist $1,%xmm0,%xmm1
+ call .Lkey_expansion_128_cold
+ aeskeygenassist $2,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $4,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $8,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $16,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $32,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $64,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $128,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $27,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $54,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ movups %xmm0,(%rax)
+ movl %edx,80(%rax)
+ xorl %eax,%eax
+ jmp .Lenc_key_ret
+
+.p2align 4
+.L12rounds:
+ movq 16(%rcx),%xmm2
+ movl $11,%edx
+ movups %xmm0,(%r8)
+ aeskeygenassist $1,%xmm2,%xmm1
+ call .Lkey_expansion_192a_cold
+ aeskeygenassist $2,%xmm2,%xmm1
+ call .Lkey_expansion_192b
+ aeskeygenassist $4,%xmm2,%xmm1
+ call .Lkey_expansion_192a
+ aeskeygenassist $8,%xmm2,%xmm1
+ call .Lkey_expansion_192b
+ aeskeygenassist $16,%xmm2,%xmm1
+ call .Lkey_expansion_192a
+ aeskeygenassist $32,%xmm2,%xmm1
+ call .Lkey_expansion_192b
+ aeskeygenassist $64,%xmm2,%xmm1
+ call .Lkey_expansion_192a
+ aeskeygenassist $128,%xmm2,%xmm1
+ call .Lkey_expansion_192b
+ movups %xmm0,(%rax)
+ movl %edx,48(%rax)
+ xorq %rax,%rax
+ jmp .Lenc_key_ret
+
+.p2align 4
+.L14rounds:
+ movups 16(%rcx),%xmm2
+ movl $13,%edx
+ leaq 16(%rax),%rax
+ movups %xmm0,(%r8)
+ movups %xmm2,16(%r8)
+ aeskeygenassist $1,%xmm2,%xmm1
+ call .Lkey_expansion_256a_cold
+ aeskeygenassist $1,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $2,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $2,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $4,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $4,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $8,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $8,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $16,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $16,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $32,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $32,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $64,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ movups %xmm0,(%rax)
+ movl %edx,16(%rax)
+ xorq %rax,%rax
+ jmp .Lenc_key_ret
+
+.p2align 4
+.Lbad_keybits:
+ movq $-2,%rax
+.Lenc_key_ret:
+ addq $8,%rsp
+ retq
+.LSEH_end_set_encrypt_key:
+
+.p2align 4
+.Lkey_expansion_128:
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+.Lkey_expansion_128_cold:
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ retq
+
+.p2align 4
+.Lkey_expansion_192a:
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+.Lkey_expansion_192a_cold:
+ movaps %xmm2,%xmm5
+.Lkey_expansion_192b_warm:
+ shufps $16,%xmm0,%xmm4
+ movdqa %xmm2,%xmm3
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ pslldq $4,%xmm3
+ xorps %xmm4,%xmm0
+ pshufd $85,%xmm1,%xmm1
+ pxor %xmm3,%xmm2
+ pxor %xmm1,%xmm0
+ pshufd $255,%xmm0,%xmm3
+ pxor %xmm3,%xmm2
+ retq
+
+.p2align 4
+.Lkey_expansion_192b:
+ movaps %xmm0,%xmm3
+ shufps $68,%xmm0,%xmm5
+ movups %xmm5,(%rax)
+ shufps $78,%xmm2,%xmm3
+ movups %xmm3,16(%rax)
+ leaq 32(%rax),%rax
+ jmp .Lkey_expansion_192b_warm
+
+.p2align 4
+.Lkey_expansion_256a:
+ movups %xmm2,(%rax)
+ leaq 16(%rax),%rax
+.Lkey_expansion_256a_cold:
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ retq
+
+.p2align 4
+.Lkey_expansion_256b:
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+
+ shufps $16,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $140,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $170,%xmm1,%xmm1
+ xorps %xmm1,%xmm2
+ retq
+
+
+.p2align 6
+.Lbswap_mask:
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.Lincrement32:
+.long 6,6,6,0
+.Lincrement64:
+.long 1,0,0,0
+.Lxts_magic:
+.long 0x87,0,1,0
+
+.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align 6
+
+.def ecb_se_handler; .scl 3; .type 32; .endef
+.p2align 4
+ecb_se_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 152(%r8),%rax
+
+ jmp .Lcommon_seh_tail
+
+
+.def ccm64_se_handler; .scl 3; .type 32; .endef
+.p2align 4
+ccm64_se_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+
+ movq 8(%r9),%rsi
+ movq 56(%r9),%r11
+
+ movl 0(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jb .Lcommon_seh_tail
+
+ movq 152(%r8),%rax
+
+ movl 4(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jae .Lcommon_seh_tail
+
+ leaq 0(%rax),%rsi
+ leaq 512(%r8),%rdi
+ movl $8,%ecx
+.long 0xa548f3fc
+ leaq 88(%rax),%rax
+
+ jmp .Lcommon_seh_tail
+
+
+.def ctr32_se_handler; .scl 3; .type 32; .endef
+.p2align 4
+ctr32_se_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+
+ leaq .Lctr32_body(%rip),%r10
+ cmpq %r10,%rbx
+ jb .Lcommon_seh_tail
+
+ movq 152(%r8),%rax
+
+ leaq .Lctr32_ret(%rip),%r10
+ cmpq %r10,%rbx
+ jae .Lcommon_seh_tail
+
+ leaq 32(%rax),%rsi
+ leaq 512(%r8),%rdi
+ movl $20,%ecx
+.long 0xa548f3fc
+ leaq 200(%rax),%rax
+
+ jmp .Lcommon_seh_tail
+
+
+.def xts_se_handler; .scl 3; .type 32; .endef
+.p2align 4
+xts_se_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+
+ movq 8(%r9),%rsi
+ movq 56(%r9),%r11
+
+ movl 0(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jb .Lcommon_seh_tail
+
+ movq 152(%r8),%rax
+
+ movl 4(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jae .Lcommon_seh_tail
+
+ leaq 96(%rax),%rsi
+ leaq 512(%r8),%rdi
+ movl $20,%ecx
+.long 0xa548f3fc
+ leaq 104+160(%rax),%rax
+
+ jmp .Lcommon_seh_tail
+
+.def cbc_se_handler; .scl 3; .type 32; .endef
+.p2align 4
+cbc_se_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 152(%r8),%rax
+ movq 248(%r8),%rbx
+
+ leaq .Lcbc_decrypt(%rip),%r10
+ cmpq %r10,%rbx
+ jb .Lcommon_seh_tail
+
+ leaq .Lcbc_decrypt_body(%rip),%r10
+ cmpq %r10,%rbx
+ jb .Lrestore_cbc_rax
+
+ leaq .Lcbc_ret(%rip),%r10
+ cmpq %r10,%rbx
+ jae .Lcommon_seh_tail
+
+ leaq 0(%rax),%rsi
+ leaq 512(%r8),%rdi
+ movl $8,%ecx
+.long 0xa548f3fc
+ leaq 88(%rax),%rax
+ jmp .Lcommon_seh_tail
+
+.Lrestore_cbc_rax:
+ movq 120(%r8),%rax
+
+.Lcommon_seh_tail:
+ movq 8(%rax),%rdi
+ movq 16(%rax),%rsi
+ movq %rax,152(%r8)
+ movq %rsi,168(%r8)
+ movq %rdi,176(%r8)
+
+ movq 40(%r9),%rdi
+ movq %r8,%rsi
+ movl $154,%ecx
+.long 0xa548f3fc
+
+ movq %r9,%rsi
+ xorq %rcx,%rcx
+ movq 8(%rsi),%rdx
+ movq 0(%rsi),%r8
+ movq 16(%rsi),%r9
+ movq 40(%rsi),%r10
+ leaq 56(%rsi),%r11
+ leaq 24(%rsi),%r12
+ movq %r10,32(%rsp)
+ movq %r11,40(%rsp)
+ movq %r12,48(%rsp)
+ movq %rcx,56(%rsp)
+ call *__imp_RtlVirtualUnwind(%rip)
+
+ movl $1,%eax
+ addq $64,%rsp
+ popfq
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ popq %rdi
+ popq %rsi
+ retq
+
+
+.section .pdata
+.p2align 2
+.rva .LSEH_begin_aesni_ecb_encrypt
+.rva .LSEH_end_aesni_ecb_encrypt
+.rva .LSEH_info_ecb
+
+.rva .LSEH_begin_aesni_ccm64_encrypt_blocks
+.rva .LSEH_end_aesni_ccm64_encrypt_blocks
+.rva .LSEH_info_ccm64_enc
+
+.rva .LSEH_begin_aesni_ccm64_decrypt_blocks
+.rva .LSEH_end_aesni_ccm64_decrypt_blocks
+.rva .LSEH_info_ccm64_dec
+
+.rva .LSEH_begin_aesni_ctr32_encrypt_blocks
+.rva .LSEH_end_aesni_ctr32_encrypt_blocks
+.rva .LSEH_info_ctr32
+
+.rva .LSEH_begin_aesni_xts_encrypt
+.rva .LSEH_end_aesni_xts_encrypt
+.rva .LSEH_info_xts_enc
+
+.rva .LSEH_begin_aesni_xts_decrypt
+.rva .LSEH_end_aesni_xts_decrypt
+.rva .LSEH_info_xts_dec
+.rva .LSEH_begin_aesni_cbc_encrypt
+.rva .LSEH_end_aesni_cbc_encrypt
+.rva .LSEH_info_cbc
+
+.rva aesni_set_decrypt_key
+.rva .LSEH_end_set_decrypt_key
+.rva .LSEH_info_key
+
+.rva aesni_set_encrypt_key
+.rva .LSEH_end_set_encrypt_key
+.rva .LSEH_info_key
+.section .xdata
+.p2align 3
+.LSEH_info_ecb:
+.byte 9,0,0,0
+.rva ecb_se_handler
+.LSEH_info_ccm64_enc:
+.byte 9,0,0,0
+.rva ccm64_se_handler
+.rva .Lccm64_enc_body,.Lccm64_enc_ret
+.LSEH_info_ccm64_dec:
+.byte 9,0,0,0
+.rva ccm64_se_handler
+.rva .Lccm64_dec_body,.Lccm64_dec_ret
+.LSEH_info_ctr32:
+.byte 9,0,0,0
+.rva ctr32_se_handler
+.LSEH_info_xts_enc:
+.byte 9,0,0,0
+.rva xts_se_handler
+.rva .Lxts_enc_body,.Lxts_enc_epilogue
+.LSEH_info_xts_dec:
+.byte 9,0,0,0
+.rva xts_se_handler
+.rva .Lxts_dec_body,.Lxts_dec_epilogue
+.LSEH_info_cbc:
+.byte 9,0,0,0
+.rva cbc_se_handler
+.LSEH_info_key:
+.byte 0x01,0x04,0x01,0x00
+.byte 0x04,0x02,0x00,0x00
diff --git a/ext/libressl/crypto/aes/aesni-sha1-elf-x86_64.S b/ext/libressl/crypto/aes/aesni-sha1-elf-x86_64.S
new file mode 100644
index 0000000..c0b3e5f
--- /dev/null
+++ b/ext/libressl/crypto/aes/aesni-sha1-elf-x86_64.S
@@ -0,0 +1,1401 @@
+#include "x86_arch.h"
+.text
+
+.hidden OPENSSL_ia32cap_P
+
+.globl aesni_cbc_sha1_enc
+.type aesni_cbc_sha1_enc,@function
+.align 16
+aesni_cbc_sha1_enc:
+
+ movl OPENSSL_ia32cap_P+0(%rip),%r10d
+ movl OPENSSL_ia32cap_P+4(%rip),%r11d
+ jmp aesni_cbc_sha1_enc_ssse3
+ retq
+.size aesni_cbc_sha1_enc,.-aesni_cbc_sha1_enc
+.type aesni_cbc_sha1_enc_ssse3,@function
+.align 16
+aesni_cbc_sha1_enc_ssse3:
+ movq 8(%rsp),%r10
+
+
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -104(%rsp),%rsp
+
+
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ movdqu (%r8),%xmm11
+ movq %r8,88(%rsp)
+ shlq $6,%r14
+ subq %r12,%r13
+ movl 240(%r15),%r8d
+ addq %r10,%r14
+
+ leaq K_XX_XX(%rip),%r11
+ movl 0(%r9),%eax
+ movl 4(%r9),%ebx
+ movl 8(%r9),%ecx
+ movl 12(%r9),%edx
+ movl %ebx,%esi
+ movl 16(%r9),%ebp
+
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r10),%xmm0
+ movdqu 16(%r10),%xmm1
+ movdqu 32(%r10),%xmm2
+ movdqu 48(%r10),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r10
+.byte 102,15,56,0,206
+.byte 102,15,56,0,214
+.byte 102,15,56,0,222
+ paddd %xmm9,%xmm0
+ paddd %xmm9,%xmm1
+ paddd %xmm9,%xmm2
+ movdqa %xmm0,0(%rsp)
+ psubd %xmm9,%xmm0
+ movdqa %xmm1,16(%rsp)
+ psubd %xmm9,%xmm1
+ movdqa %xmm2,32(%rsp)
+ psubd %xmm9,%xmm2
+ movups (%r15),%xmm13
+ movups 16(%r15),%xmm14
+ jmp .Loop_ssse3
+.align 16
+.Loop_ssse3:
+ movdqa %xmm1,%xmm4
+ addl 0(%rsp),%ebp
+ movups 0(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ xorl %edx,%ecx
+ movdqa %xmm3,%xmm8
+.byte 102,15,58,15,224,8
+ movl %eax,%edi
+ roll $5,%eax
+ paddd %xmm3,%xmm9
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrldq $4,%xmm8
+ xorl %edx,%esi
+ addl %eax,%ebp
+ pxor %xmm0,%xmm4
+ rorl $2,%ebx
+ addl %esi,%ebp
+ pxor %xmm2,%xmm8
+ addl 4(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pxor %xmm8,%xmm4
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm9,48(%rsp)
+ xorl %ecx,%edi
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ addl %ebp,%edx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm4,%xmm8
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 8(%rsp),%ecx
+ xorl %ebx,%eax
+ pslldq $12,%xmm10
+ paddd %xmm4,%xmm4
+ movl %edx,%edi
+ roll $5,%edx
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrld $31,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ movdqa %xmm10,%xmm9
+ rorl $7,%ebp
+ addl %esi,%ecx
+ psrld $30,%xmm10
+ por %xmm8,%xmm4
+ addl 12(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm4
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa 0(%r11),%xmm10
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ pxor %xmm9,%xmm4
+ rorl $7,%edx
+ addl %edi,%ebx
+ movdqa %xmm2,%xmm5
+ addl 16(%rsp),%eax
+ xorl %ebp,%edx
+ movdqa %xmm4,%xmm9
+.byte 102,15,58,15,233,8
+ movl %ebx,%edi
+ roll $5,%ebx
+ paddd %xmm4,%xmm10
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrldq $4,%xmm9
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ pxor %xmm1,%xmm5
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm3,%xmm9
+ addl 20(%rsp),%ebp
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pxor %xmm9,%xmm5
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa %xmm10,0(%rsp)
+ xorl %edx,%edi
+ addl %eax,%ebp
+ movdqa %xmm5,%xmm8
+ movdqa %xmm5,%xmm9
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 24(%rsp),%edx
+ xorl %ecx,%ebx
+ pslldq $12,%xmm8
+ paddd %xmm5,%xmm5
+ movl %ebp,%edi
+ roll $5,%ebp
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ psrld $31,%xmm9
+ xorl %ecx,%esi
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ addl %ebp,%edx
+ movdqa %xmm8,%xmm10
+ rorl $7,%eax
+ addl %esi,%edx
+ psrld $30,%xmm8
+ por %xmm9,%xmm5
+ addl 28(%rsp),%ecx
+ xorl %ebx,%eax
+ movl %edx,%esi
+ roll $5,%edx
+ pslld $2,%xmm10
+ pxor %xmm8,%xmm5
+ andl %eax,%edi
+ xorl %ebx,%eax
+ movdqa 16(%r11),%xmm8
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ pxor %xmm10,%xmm5
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movdqa %xmm3,%xmm6
+ addl 32(%rsp),%ebx
+ xorl %eax,%ebp
+ movdqa %xmm5,%xmm10
+.byte 102,15,58,15,242,8
+ movl %ecx,%edi
+ roll $5,%ecx
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ paddd %xmm5,%xmm8
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ psrldq $4,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ pxor %xmm2,%xmm6
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm4,%xmm10
+ addl 36(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ pxor %xmm10,%xmm6
+ andl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm8,16(%rsp)
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ movdqa %xmm6,%xmm9
+ movdqa %xmm6,%xmm10
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 40(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%ecx
+ pslldq $12,%xmm9
+ paddd %xmm6,%xmm6
+ movl %eax,%edi
+ roll $5,%eax
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrld $31,%xmm10
+ xorl %edx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ psrld $30,%xmm9
+ por %xmm10,%xmm6
+ addl 44(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pslld $2,%xmm8
+ pxor %xmm9,%xmm6
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa 16(%r11),%xmm9
+ xorl %ecx,%edi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %ebp,%edx
+ pxor %xmm8,%xmm6
+ rorl $7,%eax
+ addl %edi,%edx
+ movdqa %xmm4,%xmm7
+ addl 48(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm8
+.byte 102,15,58,15,251,8
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm6,%xmm9
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrldq $4,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ pxor %xmm3,%xmm7
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm5,%xmm8
+ addl 52(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ pxor %xmm8,%xmm7
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm9,32(%rsp)
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ movdqa %xmm7,%xmm10
+ movdqa %xmm7,%xmm8
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 56(%rsp),%eax
+ xorl %ebp,%edx
+ pslldq $12,%xmm10
+ paddd %xmm7,%xmm7
+ movl %ebx,%edi
+ roll $5,%ebx
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrld $31,%xmm8
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ movdqa %xmm10,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ psrld $30,%xmm10
+ por %xmm8,%xmm7
+ addl 60(%rsp),%ebp
+ cmpl $11,%r8d
+ jb .Laesenclast1
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast1
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+.Laesenclast1:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm7
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa 16(%r11),%xmm10
+ xorl %edx,%edi
+ addl %eax,%ebp
+ pxor %xmm9,%xmm7
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movdqa %xmm7,%xmm9
+ addl 0(%rsp),%edx
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,206,8
+ xorl %ecx,%ebx
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm1,%xmm0
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm7,%xmm10
+ xorl %ecx,%esi
+ movups 16(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,0(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ addl %ebp,%edx
+ pxor %xmm9,%xmm0
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 4(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm0,%xmm9
+ movdqa %xmm10,48(%rsp)
+ movl %edx,%esi
+ roll $5,%edx
+ andl %eax,%edi
+ xorl %ebx,%eax
+ pslld $2,%xmm0
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ psrld $30,%xmm9
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 8(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%edi
+ roll $5,%ecx
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ por %xmm9,%xmm0
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ movdqa %xmm0,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 12(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ andl %edx,%edi
+ xorl %ebp,%edx
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 16(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,215,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm2,%xmm1
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm8,%xmm9
+ paddd %xmm0,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm10,%xmm1
+ addl 20(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm1,%xmm10
+ movdqa %xmm8,0(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm1
+ addl 24(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm10
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm10,%xmm1
+ addl 28(%rsp),%ebx
+ xorl %eax,%edi
+ movdqa %xmm1,%xmm8
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 32(%rsp),%eax
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,192,8
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ pxor %xmm3,%xmm2
+ xorl %edx,%esi
+ addl %ebx,%eax
+ movdqa 32(%r11),%xmm10
+ paddd %xmm1,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm8,%xmm2
+ addl 36(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ movdqa %xmm2,%xmm8
+ movdqa %xmm9,16(%rsp)
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ pslld $2,%xmm2
+ addl 40(%rsp),%edx
+ xorl %ecx,%esi
+ psrld $30,%xmm8
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ por %xmm8,%xmm2
+ addl 44(%rsp),%ecx
+ xorl %ebx,%edi
+ movdqa %xmm2,%xmm9
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 48(%rsp),%ebx
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,201,8
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ pxor %xmm4,%xmm3
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm2,%xmm10
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm9,%xmm3
+ addl 52(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ movdqa %xmm3,%xmm9
+ movdqa %xmm10,32(%rsp)
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ pslld $2,%xmm3
+ addl 56(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ psrld $30,%xmm9
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ por %xmm9,%xmm3
+ addl 60(%rsp),%edx
+ xorl %ecx,%edi
+ movdqa %xmm3,%xmm10
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 0(%rsp),%ecx
+ pxor %xmm0,%xmm4
+.byte 102,68,15,58,15,210,8
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ pxor %xmm5,%xmm4
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ movdqa %xmm8,%xmm9
+ paddd %xmm3,%xmm8
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm10,%xmm4
+ addl 4(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm8,48(%rsp)
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ pslld $2,%xmm4
+ addl 8(%rsp),%eax
+ xorl %ebp,%esi
+ psrld $30,%xmm10
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ por %xmm10,%xmm4
+ addl 12(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movdqa %xmm4,%xmm8
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 16(%rsp),%edx
+ pxor %xmm1,%xmm5
+.byte 102,68,15,58,15,195,8
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm6,%xmm5
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm4,%xmm9
+ rorl $7,%eax
+ addl %esi,%edx
+ pxor %xmm8,%xmm5
+ addl 20(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ movdqa %xmm5,%xmm8
+ movdqa %xmm9,0(%rsp)
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb .Laesenclast2
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast2
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+.Laesenclast2:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ pslld $2,%xmm5
+ addl 24(%rsp),%ebx
+ xorl %eax,%esi
+ psrld $30,%xmm8
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ por %xmm8,%xmm5
+ addl 28(%rsp),%eax
+ xorl %ebp,%edi
+ movdqa %xmm5,%xmm9
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ecx,%edi
+ movups 32(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,16(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ pxor %xmm2,%xmm6
+.byte 102,68,15,58,15,204,8
+ xorl %edx,%ecx
+ addl 32(%rsp),%ebp
+ andl %edx,%edi
+ pxor %xmm7,%xmm6
+ andl %ecx,%esi
+ rorl $7,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm5,%xmm10
+ addl %edi,%ebp
+ movl %eax,%edi
+ pxor %xmm9,%xmm6
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movdqa %xmm6,%xmm9
+ movdqa %xmm10,16(%rsp)
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 36(%rsp),%edx
+ andl %ecx,%esi
+ pslld $2,%xmm6
+ andl %ebx,%edi
+ rorl $7,%eax
+ psrld $30,%xmm9
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ por %xmm9,%xmm6
+ movl %eax,%edi
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm10
+ addl 40(%rsp),%ecx
+ andl %ebx,%edi
+ andl %eax,%esi
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 44(%rsp),%ebx
+ andl %eax,%esi
+ andl %ebp,%edi
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ rorl $7,%edx
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%edi
+ pxor %xmm3,%xmm7
+.byte 102,68,15,58,15,213,8
+ xorl %ebp,%edx
+ addl 48(%rsp),%eax
+ andl %ebp,%edi
+ pxor %xmm0,%xmm7
+ andl %edx,%esi
+ rorl $7,%ecx
+ movdqa 48(%r11),%xmm9
+ paddd %xmm6,%xmm8
+ addl %edi,%eax
+ movl %ebx,%edi
+ pxor %xmm10,%xmm7
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movdqa %xmm7,%xmm10
+ movdqa %xmm8,32(%rsp)
+ movl %ecx,%esi
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ xorl %edx,%ecx
+ addl 52(%rsp),%ebp
+ andl %edx,%esi
+ pslld $2,%xmm7
+ andl %ecx,%edi
+ rorl $7,%ebx
+ psrld $30,%xmm10
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ por %xmm10,%xmm7
+ movl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm7,%xmm8
+ addl 56(%rsp),%edx
+ andl %ecx,%edi
+ andl %ebx,%esi
+ rorl $7,%eax
+ addl %edi,%edx
+ movl %ebp,%edi
+ roll $5,%ebp
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 60(%rsp),%ecx
+ andl %ebx,%esi
+ andl %eax,%edi
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%edi
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,198,8
+ xorl %eax,%ebp
+ addl 0(%rsp),%ebx
+ andl %eax,%edi
+ pxor %xmm1,%xmm0
+ andl %ebp,%esi
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ rorl $7,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm7,%xmm9
+ addl %edi,%ebx
+ movl %ecx,%edi
+ pxor %xmm8,%xmm0
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movdqa %xmm0,%xmm8
+ movdqa %xmm9,48(%rsp)
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 4(%rsp),%eax
+ andl %ebp,%esi
+ pslld $2,%xmm0
+ andl %edx,%edi
+ rorl $7,%ecx
+ psrld $30,%xmm8
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ por %xmm8,%xmm0
+ movl %ecx,%edi
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%ecx
+ movdqa %xmm0,%xmm9
+ addl 8(%rsp),%ebp
+ andl %edx,%edi
+ andl %ecx,%esi
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movl %eax,%edi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 12(%rsp),%edx
+ andl %ecx,%esi
+ andl %ebx,%edi
+ rorl $7,%eax
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%edi
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,207,8
+ xorl %ebx,%eax
+ addl 16(%rsp),%ecx
+ andl %ebx,%edi
+ pxor %xmm2,%xmm1
+ andl %eax,%esi
+ rorl $7,%ebp
+ movdqa %xmm10,%xmm8
+ paddd %xmm0,%xmm10
+ addl %edi,%ecx
+ movl %edx,%edi
+ pxor %xmm9,%xmm1
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movdqa %xmm1,%xmm9
+ movdqa %xmm10,0(%rsp)
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 20(%rsp),%ebx
+ andl %eax,%esi
+ pslld $2,%xmm1
+ andl %ebp,%edi
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ rorl $7,%edx
+ psrld $30,%xmm9
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ por %xmm9,%xmm1
+ movl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm1,%xmm10
+ addl 24(%rsp),%eax
+ andl %ebp,%edi
+ andl %edx,%esi
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movl %ecx,%esi
+ cmpl $11,%r8d
+ jb .Laesenclast3
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast3
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+.Laesenclast3:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ xorl %edx,%ecx
+ addl 28(%rsp),%ebp
+ andl %edx,%esi
+ andl %ecx,%edi
+ rorl $7,%ebx
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%edi
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,208,8
+ xorl %ecx,%ebx
+ addl 32(%rsp),%edx
+ andl %ecx,%edi
+ pxor %xmm3,%xmm2
+ andl %ebx,%esi
+ rorl $7,%eax
+ movdqa %xmm8,%xmm9
+ paddd %xmm1,%xmm8
+ addl %edi,%edx
+ movl %ebp,%edi
+ pxor %xmm10,%xmm2
+ roll $5,%ebp
+ movups 48(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,32(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movdqa %xmm2,%xmm10
+ movdqa %xmm8,16(%rsp)
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 36(%rsp),%ecx
+ andl %ebx,%esi
+ pslld $2,%xmm2
+ andl %eax,%edi
+ rorl $7,%ebp
+ psrld $30,%xmm10
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ por %xmm10,%xmm2
+ movl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm2,%xmm8
+ addl 40(%rsp),%ebx
+ andl %eax,%edi
+ andl %ebp,%esi
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ rorl $7,%edx
+ addl %edi,%ebx
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 44(%rsp),%eax
+ andl %ebp,%esi
+ andl %edx,%edi
+ rorl $7,%ecx
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ addl 48(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,193,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm4,%xmm3
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm10
+ paddd %xmm2,%xmm9
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm8,%xmm3
+ addl 52(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm3,%xmm8
+ movdqa %xmm9,32(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm3
+ addl 56(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm8
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm8,%xmm3
+ addl 60(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 0(%rsp),%eax
+ paddd %xmm3,%xmm10
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ movdqa %xmm10,48(%rsp)
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 4(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 8(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 12(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ cmpq %r14,%r10
+ je .Ldone_ssse3
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r10),%xmm0
+ movdqu 16(%r10),%xmm1
+ movdqu 32(%r10),%xmm2
+ movdqu 48(%r10),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r10
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+.byte 102,15,56,0,206
+ movl %ecx,%edi
+ roll $5,%ecx
+ paddd %xmm9,%xmm0
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ movdqa %xmm0,0(%rsp)
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ psubd %xmm9,%xmm0
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+.byte 102,15,56,0,214
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm9,%xmm1
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movdqa %xmm1,16(%rsp)
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ psubd %xmm9,%xmm1
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+.byte 102,15,56,0,222
+ movl %ebp,%edi
+ roll $5,%ebp
+ paddd %xmm9,%xmm2
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ movdqa %xmm2,32(%rsp)
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ psubd %xmm9,%xmm2
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb .Laesenclast4
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast4
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+.Laesenclast4:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movups %xmm11,48(%r13,%r12,1)
+ leaq 64(%r12),%r12
+
+ addl 0(%r9),%eax
+ addl 4(%r9),%esi
+ addl 8(%r9),%ecx
+ addl 12(%r9),%edx
+ movl %eax,0(%r9)
+ addl 16(%r9),%ebp
+ movl %esi,4(%r9)
+ movl %esi,%ebx
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movl %ebp,16(%r9)
+ jmp .Loop_ssse3
+
+.align 16
+.Ldone_ssse3:
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb .Laesenclast5
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast5
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+.Laesenclast5:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movups %xmm11,48(%r13,%r12,1)
+ movq 88(%rsp),%r8
+
+ addl 0(%r9),%eax
+ addl 4(%r9),%esi
+ addl 8(%r9),%ecx
+ movl %eax,0(%r9)
+ addl 12(%r9),%edx
+ movl %esi,4(%r9)
+ addl 16(%r9),%ebp
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movl %ebp,16(%r9)
+ movups %xmm11,(%r8)
+ leaq 104(%rsp),%rsi
+ movq 0(%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lepilogue_ssse3:
+ retq
+.size aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3
+.align 64
+K_XX_XX:
+.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+
+.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/ext/libressl/crypto/aes/aesni-sha1-macosx-x86_64.S b/ext/libressl/crypto/aes/aesni-sha1-macosx-x86_64.S
new file mode 100644
index 0000000..3e88b1a
--- /dev/null
+++ b/ext/libressl/crypto/aes/aesni-sha1-macosx-x86_64.S
@@ -0,0 +1,1398 @@
+#include "x86_arch.h"
+.text
+
+.private_extern _OPENSSL_ia32cap_P
+
+.globl _aesni_cbc_sha1_enc
+
+.p2align 4
+_aesni_cbc_sha1_enc:
+
+ movl _OPENSSL_ia32cap_P+0(%rip),%r10d
+ movl _OPENSSL_ia32cap_P+4(%rip),%r11d
+ jmp aesni_cbc_sha1_enc_ssse3
+ retq
+
+
+.p2align 4
+aesni_cbc_sha1_enc_ssse3:
+ movq 8(%rsp),%r10
+
+
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -104(%rsp),%rsp
+
+
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ movdqu (%r8),%xmm11
+ movq %r8,88(%rsp)
+ shlq $6,%r14
+ subq %r12,%r13
+ movl 240(%r15),%r8d
+ addq %r10,%r14
+
+ leaq K_XX_XX(%rip),%r11
+ movl 0(%r9),%eax
+ movl 4(%r9),%ebx
+ movl 8(%r9),%ecx
+ movl 12(%r9),%edx
+ movl %ebx,%esi
+ movl 16(%r9),%ebp
+
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r10),%xmm0
+ movdqu 16(%r10),%xmm1
+ movdqu 32(%r10),%xmm2
+ movdqu 48(%r10),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r10
+.byte 102,15,56,0,206
+.byte 102,15,56,0,214
+.byte 102,15,56,0,222
+ paddd %xmm9,%xmm0
+ paddd %xmm9,%xmm1
+ paddd %xmm9,%xmm2
+ movdqa %xmm0,0(%rsp)
+ psubd %xmm9,%xmm0
+ movdqa %xmm1,16(%rsp)
+ psubd %xmm9,%xmm1
+ movdqa %xmm2,32(%rsp)
+ psubd %xmm9,%xmm2
+ movups (%r15),%xmm13
+ movups 16(%r15),%xmm14
+ jmp L$oop_ssse3
+.p2align 4
+L$oop_ssse3:
+ movdqa %xmm1,%xmm4
+ addl 0(%rsp),%ebp
+ movups 0(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ xorl %edx,%ecx
+ movdqa %xmm3,%xmm8
+.byte 102,15,58,15,224,8
+ movl %eax,%edi
+ roll $5,%eax
+ paddd %xmm3,%xmm9
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrldq $4,%xmm8
+ xorl %edx,%esi
+ addl %eax,%ebp
+ pxor %xmm0,%xmm4
+ rorl $2,%ebx
+ addl %esi,%ebp
+ pxor %xmm2,%xmm8
+ addl 4(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pxor %xmm8,%xmm4
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm9,48(%rsp)
+ xorl %ecx,%edi
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ addl %ebp,%edx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm4,%xmm8
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 8(%rsp),%ecx
+ xorl %ebx,%eax
+ pslldq $12,%xmm10
+ paddd %xmm4,%xmm4
+ movl %edx,%edi
+ roll $5,%edx
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrld $31,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ movdqa %xmm10,%xmm9
+ rorl $7,%ebp
+ addl %esi,%ecx
+ psrld $30,%xmm10
+ por %xmm8,%xmm4
+ addl 12(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm4
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa 0(%r11),%xmm10
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ pxor %xmm9,%xmm4
+ rorl $7,%edx
+ addl %edi,%ebx
+ movdqa %xmm2,%xmm5
+ addl 16(%rsp),%eax
+ xorl %ebp,%edx
+ movdqa %xmm4,%xmm9
+.byte 102,15,58,15,233,8
+ movl %ebx,%edi
+ roll $5,%ebx
+ paddd %xmm4,%xmm10
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrldq $4,%xmm9
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ pxor %xmm1,%xmm5
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm3,%xmm9
+ addl 20(%rsp),%ebp
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pxor %xmm9,%xmm5
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa %xmm10,0(%rsp)
+ xorl %edx,%edi
+ addl %eax,%ebp
+ movdqa %xmm5,%xmm8
+ movdqa %xmm5,%xmm9
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 24(%rsp),%edx
+ xorl %ecx,%ebx
+ pslldq $12,%xmm8
+ paddd %xmm5,%xmm5
+ movl %ebp,%edi
+ roll $5,%ebp
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ psrld $31,%xmm9
+ xorl %ecx,%esi
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ addl %ebp,%edx
+ movdqa %xmm8,%xmm10
+ rorl $7,%eax
+ addl %esi,%edx
+ psrld $30,%xmm8
+ por %xmm9,%xmm5
+ addl 28(%rsp),%ecx
+ xorl %ebx,%eax
+ movl %edx,%esi
+ roll $5,%edx
+ pslld $2,%xmm10
+ pxor %xmm8,%xmm5
+ andl %eax,%edi
+ xorl %ebx,%eax
+ movdqa 16(%r11),%xmm8
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ pxor %xmm10,%xmm5
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movdqa %xmm3,%xmm6
+ addl 32(%rsp),%ebx
+ xorl %eax,%ebp
+ movdqa %xmm5,%xmm10
+.byte 102,15,58,15,242,8
+ movl %ecx,%edi
+ roll $5,%ecx
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ paddd %xmm5,%xmm8
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ psrldq $4,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ pxor %xmm2,%xmm6
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm4,%xmm10
+ addl 36(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ pxor %xmm10,%xmm6
+ andl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm8,16(%rsp)
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ movdqa %xmm6,%xmm9
+ movdqa %xmm6,%xmm10
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 40(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%ecx
+ pslldq $12,%xmm9
+ paddd %xmm6,%xmm6
+ movl %eax,%edi
+ roll $5,%eax
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrld $31,%xmm10
+ xorl %edx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ psrld $30,%xmm9
+ por %xmm10,%xmm6
+ addl 44(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pslld $2,%xmm8
+ pxor %xmm9,%xmm6
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa 16(%r11),%xmm9
+ xorl %ecx,%edi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %ebp,%edx
+ pxor %xmm8,%xmm6
+ rorl $7,%eax
+ addl %edi,%edx
+ movdqa %xmm4,%xmm7
+ addl 48(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm8
+.byte 102,15,58,15,251,8
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm6,%xmm9
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrldq $4,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ pxor %xmm3,%xmm7
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm5,%xmm8
+ addl 52(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ pxor %xmm8,%xmm7
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm9,32(%rsp)
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ movdqa %xmm7,%xmm10
+ movdqa %xmm7,%xmm8
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 56(%rsp),%eax
+ xorl %ebp,%edx
+ pslldq $12,%xmm10
+ paddd %xmm7,%xmm7
+ movl %ebx,%edi
+ roll $5,%ebx
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrld $31,%xmm8
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ movdqa %xmm10,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ psrld $30,%xmm10
+ por %xmm8,%xmm7
+ addl 60(%rsp),%ebp
+ cmpl $11,%r8d
+ jb L$aesenclast1
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je L$aesenclast1
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+L$aesenclast1:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm7
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa 16(%r11),%xmm10
+ xorl %edx,%edi
+ addl %eax,%ebp
+ pxor %xmm9,%xmm7
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movdqa %xmm7,%xmm9
+ addl 0(%rsp),%edx
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,206,8
+ xorl %ecx,%ebx
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm1,%xmm0
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm7,%xmm10
+ xorl %ecx,%esi
+ movups 16(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,0(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ addl %ebp,%edx
+ pxor %xmm9,%xmm0
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 4(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm0,%xmm9
+ movdqa %xmm10,48(%rsp)
+ movl %edx,%esi
+ roll $5,%edx
+ andl %eax,%edi
+ xorl %ebx,%eax
+ pslld $2,%xmm0
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ psrld $30,%xmm9
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 8(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%edi
+ roll $5,%ecx
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ por %xmm9,%xmm0
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ movdqa %xmm0,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 12(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ andl %edx,%edi
+ xorl %ebp,%edx
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 16(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,215,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm2,%xmm1
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm8,%xmm9
+ paddd %xmm0,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm10,%xmm1
+ addl 20(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm1,%xmm10
+ movdqa %xmm8,0(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm1
+ addl 24(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm10
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm10,%xmm1
+ addl 28(%rsp),%ebx
+ xorl %eax,%edi
+ movdqa %xmm1,%xmm8
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 32(%rsp),%eax
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,192,8
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ pxor %xmm3,%xmm2
+ xorl %edx,%esi
+ addl %ebx,%eax
+ movdqa 32(%r11),%xmm10
+ paddd %xmm1,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm8,%xmm2
+ addl 36(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ movdqa %xmm2,%xmm8
+ movdqa %xmm9,16(%rsp)
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ pslld $2,%xmm2
+ addl 40(%rsp),%edx
+ xorl %ecx,%esi
+ psrld $30,%xmm8
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ por %xmm8,%xmm2
+ addl 44(%rsp),%ecx
+ xorl %ebx,%edi
+ movdqa %xmm2,%xmm9
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 48(%rsp),%ebx
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,201,8
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ pxor %xmm4,%xmm3
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm2,%xmm10
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm9,%xmm3
+ addl 52(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ movdqa %xmm3,%xmm9
+ movdqa %xmm10,32(%rsp)
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ pslld $2,%xmm3
+ addl 56(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ psrld $30,%xmm9
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ por %xmm9,%xmm3
+ addl 60(%rsp),%edx
+ xorl %ecx,%edi
+ movdqa %xmm3,%xmm10
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 0(%rsp),%ecx
+ pxor %xmm0,%xmm4
+.byte 102,68,15,58,15,210,8
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ pxor %xmm5,%xmm4
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ movdqa %xmm8,%xmm9
+ paddd %xmm3,%xmm8
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm10,%xmm4
+ addl 4(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm8,48(%rsp)
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ pslld $2,%xmm4
+ addl 8(%rsp),%eax
+ xorl %ebp,%esi
+ psrld $30,%xmm10
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ por %xmm10,%xmm4
+ addl 12(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movdqa %xmm4,%xmm8
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 16(%rsp),%edx
+ pxor %xmm1,%xmm5
+.byte 102,68,15,58,15,195,8
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm6,%xmm5
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm4,%xmm9
+ rorl $7,%eax
+ addl %esi,%edx
+ pxor %xmm8,%xmm5
+ addl 20(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ movdqa %xmm5,%xmm8
+ movdqa %xmm9,0(%rsp)
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb L$aesenclast2
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je L$aesenclast2
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+L$aesenclast2:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ pslld $2,%xmm5
+ addl 24(%rsp),%ebx
+ xorl %eax,%esi
+ psrld $30,%xmm8
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ por %xmm8,%xmm5
+ addl 28(%rsp),%eax
+ xorl %ebp,%edi
+ movdqa %xmm5,%xmm9
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ecx,%edi
+ movups 32(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,16(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ pxor %xmm2,%xmm6
+.byte 102,68,15,58,15,204,8
+ xorl %edx,%ecx
+ addl 32(%rsp),%ebp
+ andl %edx,%edi
+ pxor %xmm7,%xmm6
+ andl %ecx,%esi
+ rorl $7,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm5,%xmm10
+ addl %edi,%ebp
+ movl %eax,%edi
+ pxor %xmm9,%xmm6
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movdqa %xmm6,%xmm9
+ movdqa %xmm10,16(%rsp)
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 36(%rsp),%edx
+ andl %ecx,%esi
+ pslld $2,%xmm6
+ andl %ebx,%edi
+ rorl $7,%eax
+ psrld $30,%xmm9
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ por %xmm9,%xmm6
+ movl %eax,%edi
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm10
+ addl 40(%rsp),%ecx
+ andl %ebx,%edi
+ andl %eax,%esi
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 44(%rsp),%ebx
+ andl %eax,%esi
+ andl %ebp,%edi
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ rorl $7,%edx
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%edi
+ pxor %xmm3,%xmm7
+.byte 102,68,15,58,15,213,8
+ xorl %ebp,%edx
+ addl 48(%rsp),%eax
+ andl %ebp,%edi
+ pxor %xmm0,%xmm7
+ andl %edx,%esi
+ rorl $7,%ecx
+ movdqa 48(%r11),%xmm9
+ paddd %xmm6,%xmm8
+ addl %edi,%eax
+ movl %ebx,%edi
+ pxor %xmm10,%xmm7
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movdqa %xmm7,%xmm10
+ movdqa %xmm8,32(%rsp)
+ movl %ecx,%esi
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ xorl %edx,%ecx
+ addl 52(%rsp),%ebp
+ andl %edx,%esi
+ pslld $2,%xmm7
+ andl %ecx,%edi
+ rorl $7,%ebx
+ psrld $30,%xmm10
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ por %xmm10,%xmm7
+ movl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm7,%xmm8
+ addl 56(%rsp),%edx
+ andl %ecx,%edi
+ andl %ebx,%esi
+ rorl $7,%eax
+ addl %edi,%edx
+ movl %ebp,%edi
+ roll $5,%ebp
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 60(%rsp),%ecx
+ andl %ebx,%esi
+ andl %eax,%edi
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%edi
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,198,8
+ xorl %eax,%ebp
+ addl 0(%rsp),%ebx
+ andl %eax,%edi
+ pxor %xmm1,%xmm0
+ andl %ebp,%esi
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ rorl $7,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm7,%xmm9
+ addl %edi,%ebx
+ movl %ecx,%edi
+ pxor %xmm8,%xmm0
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movdqa %xmm0,%xmm8
+ movdqa %xmm9,48(%rsp)
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 4(%rsp),%eax
+ andl %ebp,%esi
+ pslld $2,%xmm0
+ andl %edx,%edi
+ rorl $7,%ecx
+ psrld $30,%xmm8
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ por %xmm8,%xmm0
+ movl %ecx,%edi
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%ecx
+ movdqa %xmm0,%xmm9
+ addl 8(%rsp),%ebp
+ andl %edx,%edi
+ andl %ecx,%esi
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movl %eax,%edi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 12(%rsp),%edx
+ andl %ecx,%esi
+ andl %ebx,%edi
+ rorl $7,%eax
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%edi
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,207,8
+ xorl %ebx,%eax
+ addl 16(%rsp),%ecx
+ andl %ebx,%edi
+ pxor %xmm2,%xmm1
+ andl %eax,%esi
+ rorl $7,%ebp
+ movdqa %xmm10,%xmm8
+ paddd %xmm0,%xmm10
+ addl %edi,%ecx
+ movl %edx,%edi
+ pxor %xmm9,%xmm1
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movdqa %xmm1,%xmm9
+ movdqa %xmm10,0(%rsp)
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 20(%rsp),%ebx
+ andl %eax,%esi
+ pslld $2,%xmm1
+ andl %ebp,%edi
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ rorl $7,%edx
+ psrld $30,%xmm9
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ por %xmm9,%xmm1
+ movl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm1,%xmm10
+ addl 24(%rsp),%eax
+ andl %ebp,%edi
+ andl %edx,%esi
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movl %ecx,%esi
+ cmpl $11,%r8d
+ jb L$aesenclast3
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je L$aesenclast3
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+L$aesenclast3:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ xorl %edx,%ecx
+ addl 28(%rsp),%ebp
+ andl %edx,%esi
+ andl %ecx,%edi
+ rorl $7,%ebx
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%edi
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,208,8
+ xorl %ecx,%ebx
+ addl 32(%rsp),%edx
+ andl %ecx,%edi
+ pxor %xmm3,%xmm2
+ andl %ebx,%esi
+ rorl $7,%eax
+ movdqa %xmm8,%xmm9
+ paddd %xmm1,%xmm8
+ addl %edi,%edx
+ movl %ebp,%edi
+ pxor %xmm10,%xmm2
+ roll $5,%ebp
+ movups 48(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,32(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movdqa %xmm2,%xmm10
+ movdqa %xmm8,16(%rsp)
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 36(%rsp),%ecx
+ andl %ebx,%esi
+ pslld $2,%xmm2
+ andl %eax,%edi
+ rorl $7,%ebp
+ psrld $30,%xmm10
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ por %xmm10,%xmm2
+ movl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm2,%xmm8
+ addl 40(%rsp),%ebx
+ andl %eax,%edi
+ andl %ebp,%esi
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ rorl $7,%edx
+ addl %edi,%ebx
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 44(%rsp),%eax
+ andl %ebp,%esi
+ andl %edx,%edi
+ rorl $7,%ecx
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ addl 48(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,193,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm4,%xmm3
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm10
+ paddd %xmm2,%xmm9
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm8,%xmm3
+ addl 52(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm3,%xmm8
+ movdqa %xmm9,32(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm3
+ addl 56(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm8
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm8,%xmm3
+ addl 60(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 0(%rsp),%eax
+ paddd %xmm3,%xmm10
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ movdqa %xmm10,48(%rsp)
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 4(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 8(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 12(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ cmpq %r14,%r10
+ je L$done_ssse3
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r10),%xmm0
+ movdqu 16(%r10),%xmm1
+ movdqu 32(%r10),%xmm2
+ movdqu 48(%r10),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r10
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+.byte 102,15,56,0,206
+ movl %ecx,%edi
+ roll $5,%ecx
+ paddd %xmm9,%xmm0
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ movdqa %xmm0,0(%rsp)
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ psubd %xmm9,%xmm0
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+.byte 102,15,56,0,214
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm9,%xmm1
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movdqa %xmm1,16(%rsp)
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ psubd %xmm9,%xmm1
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+.byte 102,15,56,0,222
+ movl %ebp,%edi
+ roll $5,%ebp
+ paddd %xmm9,%xmm2
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ movdqa %xmm2,32(%rsp)
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ psubd %xmm9,%xmm2
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb L$aesenclast4
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je L$aesenclast4
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+L$aesenclast4:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movups %xmm11,48(%r13,%r12,1)
+ leaq 64(%r12),%r12
+
+ addl 0(%r9),%eax
+ addl 4(%r9),%esi
+ addl 8(%r9),%ecx
+ addl 12(%r9),%edx
+ movl %eax,0(%r9)
+ addl 16(%r9),%ebp
+ movl %esi,4(%r9)
+ movl %esi,%ebx
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movl %ebp,16(%r9)
+ jmp L$oop_ssse3
+
+.p2align 4
+L$done_ssse3:
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb L$aesenclast5
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je L$aesenclast5
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+L$aesenclast5:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movups %xmm11,48(%r13,%r12,1)
+ movq 88(%rsp),%r8
+
+ addl 0(%r9),%eax
+ addl 4(%r9),%esi
+ addl 8(%r9),%ecx
+ movl %eax,0(%r9)
+ addl 12(%r9),%edx
+ movl %esi,4(%r9)
+ addl 16(%r9),%ebp
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movl %ebp,16(%r9)
+ movups %xmm11,(%r8)
+ leaq 104(%rsp),%rsi
+ movq 0(%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+L$epilogue_ssse3:
+ retq
+
+.p2align 6
+K_XX_XX:
+.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+
+.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align 6
diff --git a/ext/libressl/crypto/aes/aesni-sha1-masm-x86_64.S b/ext/libressl/crypto/aes/aesni-sha1-masm-x86_64.S
new file mode 100644
index 0000000..db95881
--- /dev/null
+++ b/ext/libressl/crypto/aes/aesni-sha1-masm-x86_64.S
@@ -0,0 +1,1616 @@
+; 1 "crypto/aes/aesni-sha1-masm-x86_64.S.tmp"
+; 1 "<built-in>" 1
+; 1 "<built-in>" 3
+; 340 "<built-in>" 3
+; 1 "<command line>" 1
+; 1 "<built-in>" 2
+; 1 "crypto/aes/aesni-sha1-masm-x86_64.S.tmp" 2
+OPTION DOTNAME
+
+; 1 "./crypto/x86_arch.h" 1
+
+
+; 16 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+; 40 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+; 3 "crypto/aes/aesni-sha1-masm-x86_64.S.tmp" 2
+.text$ SEGMENT ALIGN(64) 'CODE'
+EXTERN OPENSSL_ia32cap_P:NEAR
+
+
+PUBLIC aesni_cbc_sha1_enc
+
+ALIGN 16
+aesni_cbc_sha1_enc PROC PUBLIC
+
+ mov r10d,DWORD PTR[((OPENSSL_ia32cap_P+0))]
+ mov r11d,DWORD PTR[((OPENSSL_ia32cap_P+4))]
+ jmp aesni_cbc_sha1_enc_ssse3
+ DB 0F3h,0C3h ;repret
+aesni_cbc_sha1_enc ENDP
+
+ALIGN 16
+aesni_cbc_sha1_enc_ssse3 PROC PRIVATE
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_aesni_cbc_sha1_enc_ssse3::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ mov r10,QWORD PTR[56+rsp]
+
+
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ lea rsp,QWORD PTR[((-264))+rsp]
+
+
+ movaps XMMWORD PTR[(96+0)+rsp],xmm6
+ movaps XMMWORD PTR[(96+16)+rsp],xmm7
+ movaps XMMWORD PTR[(96+32)+rsp],xmm8
+ movaps XMMWORD PTR[(96+48)+rsp],xmm9
+ movaps XMMWORD PTR[(96+64)+rsp],xmm10
+ movaps XMMWORD PTR[(96+80)+rsp],xmm11
+ movaps XMMWORD PTR[(96+96)+rsp],xmm12
+ movaps XMMWORD PTR[(96+112)+rsp],xmm13
+ movaps XMMWORD PTR[(96+128)+rsp],xmm14
+ movaps XMMWORD PTR[(96+144)+rsp],xmm15
+$L$prologue_ssse3::
+ mov r12,rdi
+ mov r13,rsi
+ mov r14,rdx
+ mov r15,rcx
+ movdqu xmm11,XMMWORD PTR[r8]
+ mov QWORD PTR[88+rsp],r8
+ shl r14,6
+ sub r13,r12
+ mov r8d,DWORD PTR[240+r15]
+ add r14,r10
+
+ lea r11,QWORD PTR[K_XX_XX]
+ mov eax,DWORD PTR[r9]
+ mov ebx,DWORD PTR[4+r9]
+ mov ecx,DWORD PTR[8+r9]
+ mov edx,DWORD PTR[12+r9]
+ mov esi,ebx
+ mov ebp,DWORD PTR[16+r9]
+
+ movdqa xmm6,XMMWORD PTR[64+r11]
+ movdqa xmm9,XMMWORD PTR[r11]
+ movdqu xmm0,XMMWORD PTR[r10]
+ movdqu xmm1,XMMWORD PTR[16+r10]
+ movdqu xmm2,XMMWORD PTR[32+r10]
+ movdqu xmm3,XMMWORD PTR[48+r10]
+DB 102,15,56,0,198
+ add r10,64
+DB 102,15,56,0,206
+DB 102,15,56,0,214
+DB 102,15,56,0,222
+ paddd xmm0,xmm9
+ paddd xmm1,xmm9
+ paddd xmm2,xmm9
+ movdqa XMMWORD PTR[rsp],xmm0
+ psubd xmm0,xmm9
+ movdqa XMMWORD PTR[16+rsp],xmm1
+ psubd xmm1,xmm9
+ movdqa XMMWORD PTR[32+rsp],xmm2
+ psubd xmm2,xmm9
+ movups xmm13,XMMWORD PTR[r15]
+ movups xmm14,XMMWORD PTR[16+r15]
+ jmp $L$oop_ssse3
+ALIGN 16
+$L$oop_ssse3::
+ movdqa xmm4,xmm1
+ add ebp,DWORD PTR[rsp]
+ movups xmm12,XMMWORD PTR[r12]
+ xorps xmm12,xmm13
+ xorps xmm11,xmm12
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[32+r15]
+ xor ecx,edx
+ movdqa xmm8,xmm3
+DB 102,15,58,15,224,8
+ mov edi,eax
+ rol eax,5
+ paddd xmm9,xmm3
+ and esi,ecx
+ xor ecx,edx
+ psrldq xmm8,4
+ xor esi,edx
+ add ebp,eax
+ pxor xmm4,xmm0
+ ror ebx,2
+ add ebp,esi
+ pxor xmm8,xmm2
+ add edx,DWORD PTR[4+rsp]
+ xor ebx,ecx
+ mov esi,ebp
+ rol ebp,5
+ pxor xmm4,xmm8
+ and edi,ebx
+ xor ebx,ecx
+ movdqa XMMWORD PTR[48+rsp],xmm9
+ xor edi,ecx
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[48+r15]
+ add edx,ebp
+ movdqa xmm10,xmm4
+ movdqa xmm8,xmm4
+ ror eax,7
+ add edx,edi
+ add ecx,DWORD PTR[8+rsp]
+ xor eax,ebx
+ pslldq xmm10,12
+ paddd xmm4,xmm4
+ mov edi,edx
+ rol edx,5
+ and esi,eax
+ xor eax,ebx
+ psrld xmm8,31
+ xor esi,ebx
+ add ecx,edx
+ movdqa xmm9,xmm10
+ ror ebp,7
+ add ecx,esi
+ psrld xmm10,30
+ por xmm4,xmm8
+ add ebx,DWORD PTR[12+rsp]
+ xor ebp,eax
+ mov esi,ecx
+ rol ecx,5
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[64+r15]
+ pslld xmm9,2
+ pxor xmm4,xmm10
+ and edi,ebp
+ xor ebp,eax
+ movdqa xmm10,XMMWORD PTR[r11]
+ xor edi,eax
+ add ebx,ecx
+ pxor xmm4,xmm9
+ ror edx,7
+ add ebx,edi
+ movdqa xmm5,xmm2
+ add eax,DWORD PTR[16+rsp]
+ xor edx,ebp
+ movdqa xmm9,xmm4
+DB 102,15,58,15,233,8
+ mov edi,ebx
+ rol ebx,5
+ paddd xmm10,xmm4
+ and esi,edx
+ xor edx,ebp
+ psrldq xmm9,4
+ xor esi,ebp
+ add eax,ebx
+ pxor xmm5,xmm1
+ ror ecx,7
+ add eax,esi
+ pxor xmm9,xmm3
+ add ebp,DWORD PTR[20+rsp]
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[80+r15]
+ xor ecx,edx
+ mov esi,eax
+ rol eax,5
+ pxor xmm5,xmm9
+ and edi,ecx
+ xor ecx,edx
+ movdqa XMMWORD PTR[rsp],xmm10
+ xor edi,edx
+ add ebp,eax
+ movdqa xmm8,xmm5
+ movdqa xmm9,xmm5
+ ror ebx,7
+ add ebp,edi
+ add edx,DWORD PTR[24+rsp]
+ xor ebx,ecx
+ pslldq xmm8,12
+ paddd xmm5,xmm5
+ mov edi,ebp
+ rol ebp,5
+ and esi,ebx
+ xor ebx,ecx
+ psrld xmm9,31
+ xor esi,ecx
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[96+r15]
+ add edx,ebp
+ movdqa xmm10,xmm8
+ ror eax,7
+ add edx,esi
+ psrld xmm8,30
+ por xmm5,xmm9
+ add ecx,DWORD PTR[28+rsp]
+ xor eax,ebx
+ mov esi,edx
+ rol edx,5
+ pslld xmm10,2
+ pxor xmm5,xmm8
+ and edi,eax
+ xor eax,ebx
+ movdqa xmm8,XMMWORD PTR[16+r11]
+ xor edi,ebx
+ add ecx,edx
+ pxor xmm5,xmm10
+ ror ebp,7
+ add ecx,edi
+ movdqa xmm6,xmm3
+ add ebx,DWORD PTR[32+rsp]
+ xor ebp,eax
+ movdqa xmm10,xmm5
+DB 102,15,58,15,242,8
+ mov edi,ecx
+ rol ecx,5
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[112+r15]
+ paddd xmm8,xmm5
+ and esi,ebp
+ xor ebp,eax
+ psrldq xmm10,4
+ xor esi,eax
+ add ebx,ecx
+ pxor xmm6,xmm2
+ ror edx,7
+ add ebx,esi
+ pxor xmm10,xmm4
+ add eax,DWORD PTR[36+rsp]
+ xor edx,ebp
+ mov esi,ebx
+ rol ebx,5
+ pxor xmm6,xmm10
+ and edi,edx
+ xor edx,ebp
+ movdqa XMMWORD PTR[16+rsp],xmm8
+ xor edi,ebp
+ add eax,ebx
+ movdqa xmm9,xmm6
+ movdqa xmm10,xmm6
+ ror ecx,7
+ add eax,edi
+ add ebp,DWORD PTR[40+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[128+r15]
+ xor ecx,edx
+ pslldq xmm9,12
+ paddd xmm6,xmm6
+ mov edi,eax
+ rol eax,5
+ and esi,ecx
+ xor ecx,edx
+ psrld xmm10,31
+ xor esi,edx
+ add ebp,eax
+ movdqa xmm8,xmm9
+ ror ebx,7
+ add ebp,esi
+ psrld xmm9,30
+ por xmm6,xmm10
+ add edx,DWORD PTR[44+rsp]
+ xor ebx,ecx
+ mov esi,ebp
+ rol ebp,5
+ pslld xmm8,2
+ pxor xmm6,xmm9
+ and edi,ebx
+ xor ebx,ecx
+ movdqa xmm9,XMMWORD PTR[16+r11]
+ xor edi,ecx
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[144+r15]
+ add edx,ebp
+ pxor xmm6,xmm8
+ ror eax,7
+ add edx,edi
+ movdqa xmm7,xmm4
+ add ecx,DWORD PTR[48+rsp]
+ xor eax,ebx
+ movdqa xmm8,xmm6
+DB 102,15,58,15,251,8
+ mov edi,edx
+ rol edx,5
+ paddd xmm9,xmm6
+ and esi,eax
+ xor eax,ebx
+ psrldq xmm8,4
+ xor esi,ebx
+ add ecx,edx
+ pxor xmm7,xmm3
+ ror ebp,7
+ add ecx,esi
+ pxor xmm8,xmm5
+ add ebx,DWORD PTR[52+rsp]
+ xor ebp,eax
+ mov esi,ecx
+ rol ecx,5
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[160+r15]
+ pxor xmm7,xmm8
+ and edi,ebp
+ xor ebp,eax
+ movdqa XMMWORD PTR[32+rsp],xmm9
+ xor edi,eax
+ add ebx,ecx
+ movdqa xmm10,xmm7
+ movdqa xmm8,xmm7
+ ror edx,7
+ add ebx,edi
+ add eax,DWORD PTR[56+rsp]
+ xor edx,ebp
+ pslldq xmm10,12
+ paddd xmm7,xmm7
+ mov edi,ebx
+ rol ebx,5
+ and esi,edx
+ xor edx,ebp
+ psrld xmm8,31
+ xor esi,ebp
+ add eax,ebx
+ movdqa xmm9,xmm10
+ ror ecx,7
+ add eax,esi
+ psrld xmm10,30
+ por xmm7,xmm8
+ add ebp,DWORD PTR[60+rsp]
+ cmp r8d,11
+ jb $L$aesenclast1
+ movups xmm14,XMMWORD PTR[176+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[192+r15]
+ aesenc xmm11,xmm14
+ je $L$aesenclast1
+ movups xmm14,XMMWORD PTR[208+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[224+r15]
+ aesenc xmm11,xmm14
+$L$aesenclast1::
+ aesenclast xmm11,xmm15
+ movups xmm14,XMMWORD PTR[16+r15]
+ xor ecx,edx
+ mov esi,eax
+ rol eax,5
+ pslld xmm9,2
+ pxor xmm7,xmm10
+ and edi,ecx
+ xor ecx,edx
+ movdqa xmm10,XMMWORD PTR[16+r11]
+ xor edi,edx
+ add ebp,eax
+ pxor xmm7,xmm9
+ ror ebx,7
+ add ebp,edi
+ movdqa xmm9,xmm7
+ add edx,DWORD PTR[rsp]
+ pxor xmm0,xmm4
+DB 102,68,15,58,15,206,8
+ xor ebx,ecx
+ mov edi,ebp
+ rol ebp,5
+ pxor xmm0,xmm1
+ and esi,ebx
+ xor ebx,ecx
+ movdqa xmm8,xmm10
+ paddd xmm10,xmm7
+ xor esi,ecx
+ movups xmm12,XMMWORD PTR[16+r12]
+ xorps xmm12,xmm13
+ movups XMMWORD PTR[r12*1+r13],xmm11
+ xorps xmm11,xmm12
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[32+r15]
+ add edx,ebp
+ pxor xmm0,xmm9
+ ror eax,7
+ add edx,esi
+ add ecx,DWORD PTR[4+rsp]
+ xor eax,ebx
+ movdqa xmm9,xmm0
+ movdqa XMMWORD PTR[48+rsp],xmm10
+ mov esi,edx
+ rol edx,5
+ and edi,eax
+ xor eax,ebx
+ pslld xmm0,2
+ xor edi,ebx
+ add ecx,edx
+ psrld xmm9,30
+ ror ebp,7
+ add ecx,edi
+ add ebx,DWORD PTR[8+rsp]
+ xor ebp,eax
+ mov edi,ecx
+ rol ecx,5
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[48+r15]
+ por xmm0,xmm9
+ and esi,ebp
+ xor ebp,eax
+ movdqa xmm10,xmm0
+ xor esi,eax
+ add ebx,ecx
+ ror edx,7
+ add ebx,esi
+ add eax,DWORD PTR[12+rsp]
+ xor edx,ebp
+ mov esi,ebx
+ rol ebx,5
+ and edi,edx
+ xor edx,ebp
+ xor edi,ebp
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ add ebp,DWORD PTR[16+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[64+r15]
+ pxor xmm1,xmm5
+DB 102,68,15,58,15,215,8
+ xor esi,edx
+ mov edi,eax
+ rol eax,5
+ pxor xmm1,xmm2
+ xor esi,ecx
+ add ebp,eax
+ movdqa xmm9,xmm8
+ paddd xmm8,xmm0
+ ror ebx,7
+ add ebp,esi
+ pxor xmm1,xmm10
+ add edx,DWORD PTR[20+rsp]
+ xor edi,ecx
+ mov esi,ebp
+ rol ebp,5
+ movdqa xmm10,xmm1
+ movdqa XMMWORD PTR[rsp],xmm8
+ xor edi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,edi
+ pslld xmm1,2
+ add ecx,DWORD PTR[24+rsp]
+ xor esi,ebx
+ psrld xmm10,30
+ mov edi,edx
+ rol edx,5
+ xor esi,eax
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[80+r15]
+ add ecx,edx
+ ror ebp,7
+ add ecx,esi
+ por xmm1,xmm10
+ add ebx,DWORD PTR[28+rsp]
+ xor edi,eax
+ movdqa xmm8,xmm1
+ mov esi,ecx
+ rol ecx,5
+ xor edi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,edi
+ add eax,DWORD PTR[32+rsp]
+ pxor xmm2,xmm6
+DB 102,68,15,58,15,192,8
+ xor esi,ebp
+ mov edi,ebx
+ rol ebx,5
+ pxor xmm2,xmm3
+ xor esi,edx
+ add eax,ebx
+ movdqa xmm10,XMMWORD PTR[32+r11]
+ paddd xmm9,xmm1
+ ror ecx,7
+ add eax,esi
+ pxor xmm2,xmm8
+ add ebp,DWORD PTR[36+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[96+r15]
+ xor edi,edx
+ mov esi,eax
+ rol eax,5
+ movdqa xmm8,xmm2
+ movdqa XMMWORD PTR[16+rsp],xmm9
+ xor edi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,edi
+ pslld xmm2,2
+ add edx,DWORD PTR[40+rsp]
+ xor esi,ecx
+ psrld xmm8,30
+ mov edi,ebp
+ rol ebp,5
+ xor esi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,esi
+ por xmm2,xmm8
+ add ecx,DWORD PTR[44+rsp]
+ xor edi,ebx
+ movdqa xmm9,xmm2
+ mov esi,edx
+ rol edx,5
+ xor edi,eax
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[112+r15]
+ add ecx,edx
+ ror ebp,7
+ add ecx,edi
+ add ebx,DWORD PTR[48+rsp]
+ pxor xmm3,xmm7
+DB 102,68,15,58,15,201,8
+ xor esi,eax
+ mov edi,ecx
+ rol ecx,5
+ pxor xmm3,xmm4
+ xor esi,ebp
+ add ebx,ecx
+ movdqa xmm8,xmm10
+ paddd xmm10,xmm2
+ ror edx,7
+ add ebx,esi
+ pxor xmm3,xmm9
+ add eax,DWORD PTR[52+rsp]
+ xor edi,ebp
+ mov esi,ebx
+ rol ebx,5
+ movdqa xmm9,xmm3
+ movdqa XMMWORD PTR[32+rsp],xmm10
+ xor edi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ pslld xmm3,2
+ add ebp,DWORD PTR[56+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[128+r15]
+ xor esi,edx
+ psrld xmm9,30
+ mov edi,eax
+ rol eax,5
+ xor esi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,esi
+ por xmm3,xmm9
+ add edx,DWORD PTR[60+rsp]
+ xor edi,ecx
+ movdqa xmm10,xmm3
+ mov esi,ebp
+ rol ebp,5
+ xor edi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,edi
+ add ecx,DWORD PTR[rsp]
+ pxor xmm4,xmm0
+DB 102,68,15,58,15,210,8
+ xor esi,ebx
+ mov edi,edx
+ rol edx,5
+ pxor xmm4,xmm5
+ xor esi,eax
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[144+r15]
+ add ecx,edx
+ movdqa xmm9,xmm8
+ paddd xmm8,xmm3
+ ror ebp,7
+ add ecx,esi
+ pxor xmm4,xmm10
+ add ebx,DWORD PTR[4+rsp]
+ xor edi,eax
+ mov esi,ecx
+ rol ecx,5
+ movdqa xmm10,xmm4
+ movdqa XMMWORD PTR[48+rsp],xmm8
+ xor edi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,edi
+ pslld xmm4,2
+ add eax,DWORD PTR[8+rsp]
+ xor esi,ebp
+ psrld xmm10,30
+ mov edi,ebx
+ rol ebx,5
+ xor esi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,esi
+ por xmm4,xmm10
+ add ebp,DWORD PTR[12+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[160+r15]
+ xor edi,edx
+ movdqa xmm8,xmm4
+ mov esi,eax
+ rol eax,5
+ xor edi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,edi
+ add edx,DWORD PTR[16+rsp]
+ pxor xmm5,xmm1
+DB 102,68,15,58,15,195,8
+ xor esi,ecx
+ mov edi,ebp
+ rol ebp,5
+ pxor xmm5,xmm6
+ xor esi,ebx
+ add edx,ebp
+ movdqa xmm10,xmm9
+ paddd xmm9,xmm4
+ ror eax,7
+ add edx,esi
+ pxor xmm5,xmm8
+ add ecx,DWORD PTR[20+rsp]
+ xor edi,ebx
+ mov esi,edx
+ rol edx,5
+ movdqa xmm8,xmm5
+ movdqa XMMWORD PTR[rsp],xmm9
+ xor edi,eax
+ cmp r8d,11
+ jb $L$aesenclast2
+ movups xmm14,XMMWORD PTR[176+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[192+r15]
+ aesenc xmm11,xmm14
+ je $L$aesenclast2
+ movups xmm14,XMMWORD PTR[208+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[224+r15]
+ aesenc xmm11,xmm14
+$L$aesenclast2::
+ aesenclast xmm11,xmm15
+ movups xmm14,XMMWORD PTR[16+r15]
+ add ecx,edx
+ ror ebp,7
+ add ecx,edi
+ pslld xmm5,2
+ add ebx,DWORD PTR[24+rsp]
+ xor esi,eax
+ psrld xmm8,30
+ mov edi,ecx
+ rol ecx,5
+ xor esi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,esi
+ por xmm5,xmm8
+ add eax,DWORD PTR[28+rsp]
+ xor edi,ebp
+ movdqa xmm9,xmm5
+ mov esi,ebx
+ rol ebx,5
+ xor edi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ mov edi,ecx
+ movups xmm12,XMMWORD PTR[32+r12]
+ xorps xmm12,xmm13
+ movups XMMWORD PTR[16+r12*1+r13],xmm11
+ xorps xmm11,xmm12
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[32+r15]
+ pxor xmm6,xmm2
+DB 102,68,15,58,15,204,8
+ xor ecx,edx
+ add ebp,DWORD PTR[32+rsp]
+ and edi,edx
+ pxor xmm6,xmm7
+ and esi,ecx
+ ror ebx,7
+ movdqa xmm8,xmm10
+ paddd xmm10,xmm5
+ add ebp,edi
+ mov edi,eax
+ pxor xmm6,xmm9
+ rol eax,5
+ add ebp,esi
+ xor ecx,edx
+ add ebp,eax
+ movdqa xmm9,xmm6
+ movdqa XMMWORD PTR[16+rsp],xmm10
+ mov esi,ebx
+ xor ebx,ecx
+ add edx,DWORD PTR[36+rsp]
+ and esi,ecx
+ pslld xmm6,2
+ and edi,ebx
+ ror eax,7
+ psrld xmm9,30
+ add edx,esi
+ mov esi,ebp
+ rol ebp,5
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[48+r15]
+ add edx,edi
+ xor ebx,ecx
+ add edx,ebp
+ por xmm6,xmm9
+ mov edi,eax
+ xor eax,ebx
+ movdqa xmm10,xmm6
+ add ecx,DWORD PTR[40+rsp]
+ and edi,ebx
+ and esi,eax
+ ror ebp,7
+ add ecx,edi
+ mov edi,edx
+ rol edx,5
+ add ecx,esi
+ xor eax,ebx
+ add ecx,edx
+ mov esi,ebp
+ xor ebp,eax
+ add ebx,DWORD PTR[44+rsp]
+ and esi,eax
+ and edi,ebp
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[64+r15]
+ ror edx,7
+ add ebx,esi
+ mov esi,ecx
+ rol ecx,5
+ add ebx,edi
+ xor ebp,eax
+ add ebx,ecx
+ mov edi,edx
+ pxor xmm7,xmm3
+DB 102,68,15,58,15,213,8
+ xor edx,ebp
+ add eax,DWORD PTR[48+rsp]
+ and edi,ebp
+ pxor xmm7,xmm0
+ and esi,edx
+ ror ecx,7
+ movdqa xmm9,XMMWORD PTR[48+r11]
+ paddd xmm8,xmm6
+ add eax,edi
+ mov edi,ebx
+ pxor xmm7,xmm10
+ rol ebx,5
+ add eax,esi
+ xor edx,ebp
+ add eax,ebx
+ movdqa xmm10,xmm7
+ movdqa XMMWORD PTR[32+rsp],xmm8
+ mov esi,ecx
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[80+r15]
+ xor ecx,edx
+ add ebp,DWORD PTR[52+rsp]
+ and esi,edx
+ pslld xmm7,2
+ and edi,ecx
+ ror ebx,7
+ psrld xmm10,30
+ add ebp,esi
+ mov esi,eax
+ rol eax,5
+ add ebp,edi
+ xor ecx,edx
+ add ebp,eax
+ por xmm7,xmm10
+ mov edi,ebx
+ xor ebx,ecx
+ movdqa xmm8,xmm7
+ add edx,DWORD PTR[56+rsp]
+ and edi,ecx
+ and esi,ebx
+ ror eax,7
+ add edx,edi
+ mov edi,ebp
+ rol ebp,5
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[96+r15]
+ add edx,esi
+ xor ebx,ecx
+ add edx,ebp
+ mov esi,eax
+ xor eax,ebx
+ add ecx,DWORD PTR[60+rsp]
+ and esi,ebx
+ and edi,eax
+ ror ebp,7
+ add ecx,esi
+ mov esi,edx
+ rol edx,5
+ add ecx,edi
+ xor eax,ebx
+ add ecx,edx
+ mov edi,ebp
+ pxor xmm0,xmm4
+DB 102,68,15,58,15,198,8
+ xor ebp,eax
+ add ebx,DWORD PTR[rsp]
+ and edi,eax
+ pxor xmm0,xmm1
+ and esi,ebp
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[112+r15]
+ ror edx,7
+ movdqa xmm10,xmm9
+ paddd xmm9,xmm7
+ add ebx,edi
+ mov edi,ecx
+ pxor xmm0,xmm8
+ rol ecx,5
+ add ebx,esi
+ xor ebp,eax
+ add ebx,ecx
+ movdqa xmm8,xmm0
+ movdqa XMMWORD PTR[48+rsp],xmm9
+ mov esi,edx
+ xor edx,ebp
+ add eax,DWORD PTR[4+rsp]
+ and esi,ebp
+ pslld xmm0,2
+ and edi,edx
+ ror ecx,7
+ psrld xmm8,30
+ add eax,esi
+ mov esi,ebx
+ rol ebx,5
+ add eax,edi
+ xor edx,ebp
+ add eax,ebx
+ por xmm0,xmm8
+ mov edi,ecx
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[128+r15]
+ xor ecx,edx
+ movdqa xmm9,xmm0
+ add ebp,DWORD PTR[8+rsp]
+ and edi,edx
+ and esi,ecx
+ ror ebx,7
+ add ebp,edi
+ mov edi,eax
+ rol eax,5
+ add ebp,esi
+ xor ecx,edx
+ add ebp,eax
+ mov esi,ebx
+ xor ebx,ecx
+ add edx,DWORD PTR[12+rsp]
+ and esi,ecx
+ and edi,ebx
+ ror eax,7
+ add edx,esi
+ mov esi,ebp
+ rol ebp,5
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[144+r15]
+ add edx,edi
+ xor ebx,ecx
+ add edx,ebp
+ mov edi,eax
+ pxor xmm1,xmm5
+DB 102,68,15,58,15,207,8
+ xor eax,ebx
+ add ecx,DWORD PTR[16+rsp]
+ and edi,ebx
+ pxor xmm1,xmm2
+ and esi,eax
+ ror ebp,7
+ movdqa xmm8,xmm10
+ paddd xmm10,xmm0
+ add ecx,edi
+ mov edi,edx
+ pxor xmm1,xmm9
+ rol edx,5
+ add ecx,esi
+ xor eax,ebx
+ add ecx,edx
+ movdqa xmm9,xmm1
+ movdqa XMMWORD PTR[rsp],xmm10
+ mov esi,ebp
+ xor ebp,eax
+ add ebx,DWORD PTR[20+rsp]
+ and esi,eax
+ pslld xmm1,2
+ and edi,ebp
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[160+r15]
+ ror edx,7
+ psrld xmm9,30
+ add ebx,esi
+ mov esi,ecx
+ rol ecx,5
+ add ebx,edi
+ xor ebp,eax
+ add ebx,ecx
+ por xmm1,xmm9
+ mov edi,edx
+ xor edx,ebp
+ movdqa xmm10,xmm1
+ add eax,DWORD PTR[24+rsp]
+ and edi,ebp
+ and esi,edx
+ ror ecx,7
+ add eax,edi
+ mov edi,ebx
+ rol ebx,5
+ add eax,esi
+ xor edx,ebp
+ add eax,ebx
+ mov esi,ecx
+ cmp r8d,11
+ jb $L$aesenclast3
+ movups xmm14,XMMWORD PTR[176+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[192+r15]
+ aesenc xmm11,xmm14
+ je $L$aesenclast3
+ movups xmm14,XMMWORD PTR[208+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[224+r15]
+ aesenc xmm11,xmm14
+$L$aesenclast3::
+ aesenclast xmm11,xmm15
+ movups xmm14,XMMWORD PTR[16+r15]
+ xor ecx,edx
+ add ebp,DWORD PTR[28+rsp]
+ and esi,edx
+ and edi,ecx
+ ror ebx,7
+ add ebp,esi
+ mov esi,eax
+ rol eax,5
+ add ebp,edi
+ xor ecx,edx
+ add ebp,eax
+ mov edi,ebx
+ pxor xmm2,xmm6
+DB 102,68,15,58,15,208,8
+ xor ebx,ecx
+ add edx,DWORD PTR[32+rsp]
+ and edi,ecx
+ pxor xmm2,xmm3
+ and esi,ebx
+ ror eax,7
+ movdqa xmm9,xmm8
+ paddd xmm8,xmm1
+ add edx,edi
+ mov edi,ebp
+ pxor xmm2,xmm10
+ rol ebp,5
+ movups xmm12,XMMWORD PTR[48+r12]
+ xorps xmm12,xmm13
+ movups XMMWORD PTR[32+r12*1+r13],xmm11
+ xorps xmm11,xmm12
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[32+r15]
+ add edx,esi
+ xor ebx,ecx
+ add edx,ebp
+ movdqa xmm10,xmm2
+ movdqa XMMWORD PTR[16+rsp],xmm8
+ mov esi,eax
+ xor eax,ebx
+ add ecx,DWORD PTR[36+rsp]
+ and esi,ebx
+ pslld xmm2,2
+ and edi,eax
+ ror ebp,7
+ psrld xmm10,30
+ add ecx,esi
+ mov esi,edx
+ rol edx,5
+ add ecx,edi
+ xor eax,ebx
+ add ecx,edx
+ por xmm2,xmm10
+ mov edi,ebp
+ xor ebp,eax
+ movdqa xmm8,xmm2
+ add ebx,DWORD PTR[40+rsp]
+ and edi,eax
+ and esi,ebp
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[48+r15]
+ ror edx,7
+ add ebx,edi
+ mov edi,ecx
+ rol ecx,5
+ add ebx,esi
+ xor ebp,eax
+ add ebx,ecx
+ mov esi,edx
+ xor edx,ebp
+ add eax,DWORD PTR[44+rsp]
+ and esi,ebp
+ and edi,edx
+ ror ecx,7
+ add eax,esi
+ mov esi,ebx
+ rol ebx,5
+ add eax,edi
+ xor edx,ebp
+ add eax,ebx
+ add ebp,DWORD PTR[48+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[64+r15]
+ pxor xmm3,xmm7
+DB 102,68,15,58,15,193,8
+ xor esi,edx
+ mov edi,eax
+ rol eax,5
+ pxor xmm3,xmm4
+ xor esi,ecx
+ add ebp,eax
+ movdqa xmm10,xmm9
+ paddd xmm9,xmm2
+ ror ebx,7
+ add ebp,esi
+ pxor xmm3,xmm8
+ add edx,DWORD PTR[52+rsp]
+ xor edi,ecx
+ mov esi,ebp
+ rol ebp,5
+ movdqa xmm8,xmm3
+ movdqa XMMWORD PTR[32+rsp],xmm9
+ xor edi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,edi
+ pslld xmm3,2
+ add ecx,DWORD PTR[56+rsp]
+ xor esi,ebx
+ psrld xmm8,30
+ mov edi,edx
+ rol edx,5
+ xor esi,eax
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[80+r15]
+ add ecx,edx
+ ror ebp,7
+ add ecx,esi
+ por xmm3,xmm8
+ add ebx,DWORD PTR[60+rsp]
+ xor edi,eax
+ mov esi,ecx
+ rol ecx,5
+ xor edi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,edi
+ add eax,DWORD PTR[rsp]
+ paddd xmm10,xmm3
+ xor esi,ebp
+ mov edi,ebx
+ rol ebx,5
+ xor esi,edx
+ movdqa XMMWORD PTR[48+rsp],xmm10
+ add eax,ebx
+ ror ecx,7
+ add eax,esi
+ add ebp,DWORD PTR[4+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[96+r15]
+ xor edi,edx
+ mov esi,eax
+ rol eax,5
+ xor edi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,edi
+ add edx,DWORD PTR[8+rsp]
+ xor esi,ecx
+ mov edi,ebp
+ rol ebp,5
+ xor esi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,esi
+ add ecx,DWORD PTR[12+rsp]
+ xor edi,ebx
+ mov esi,edx
+ rol edx,5
+ xor edi,eax
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[112+r15]
+ add ecx,edx
+ ror ebp,7
+ add ecx,edi
+ cmp r10,r14
+ je $L$done_ssse3
+ movdqa xmm6,XMMWORD PTR[64+r11]
+ movdqa xmm9,XMMWORD PTR[r11]
+ movdqu xmm0,XMMWORD PTR[r10]
+ movdqu xmm1,XMMWORD PTR[16+r10]
+ movdqu xmm2,XMMWORD PTR[32+r10]
+ movdqu xmm3,XMMWORD PTR[48+r10]
+DB 102,15,56,0,198
+ add r10,64
+ add ebx,DWORD PTR[16+rsp]
+ xor esi,eax
+DB 102,15,56,0,206
+ mov edi,ecx
+ rol ecx,5
+ paddd xmm0,xmm9
+ xor esi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,esi
+ movdqa XMMWORD PTR[rsp],xmm0
+ add eax,DWORD PTR[20+rsp]
+ xor edi,ebp
+ psubd xmm0,xmm9
+ mov esi,ebx
+ rol ebx,5
+ xor edi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ add ebp,DWORD PTR[24+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[128+r15]
+ xor esi,edx
+ mov edi,eax
+ rol eax,5
+ xor esi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,esi
+ add edx,DWORD PTR[28+rsp]
+ xor edi,ecx
+ mov esi,ebp
+ rol ebp,5
+ xor edi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,edi
+ add ecx,DWORD PTR[32+rsp]
+ xor esi,ebx
+DB 102,15,56,0,214
+ mov edi,edx
+ rol edx,5
+ paddd xmm1,xmm9
+ xor esi,eax
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[144+r15]
+ add ecx,edx
+ ror ebp,7
+ add ecx,esi
+ movdqa XMMWORD PTR[16+rsp],xmm1
+ add ebx,DWORD PTR[36+rsp]
+ xor edi,eax
+ psubd xmm1,xmm9
+ mov esi,ecx
+ rol ecx,5
+ xor edi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,edi
+ add eax,DWORD PTR[40+rsp]
+ xor esi,ebp
+ mov edi,ebx
+ rol ebx,5
+ xor esi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,esi
+ add ebp,DWORD PTR[44+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[160+r15]
+ xor edi,edx
+ mov esi,eax
+ rol eax,5
+ xor edi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,edi
+ add edx,DWORD PTR[48+rsp]
+ xor esi,ecx
+DB 102,15,56,0,222
+ mov edi,ebp
+ rol ebp,5
+ paddd xmm2,xmm9
+ xor esi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,esi
+ movdqa XMMWORD PTR[32+rsp],xmm2
+ add ecx,DWORD PTR[52+rsp]
+ xor edi,ebx
+ psubd xmm2,xmm9
+ mov esi,edx
+ rol edx,5
+ xor edi,eax
+ cmp r8d,11
+ jb $L$aesenclast4
+ movups xmm14,XMMWORD PTR[176+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[192+r15]
+ aesenc xmm11,xmm14
+ je $L$aesenclast4
+ movups xmm14,XMMWORD PTR[208+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[224+r15]
+ aesenc xmm11,xmm14
+$L$aesenclast4::
+ aesenclast xmm11,xmm15
+ movups xmm14,XMMWORD PTR[16+r15]
+ add ecx,edx
+ ror ebp,7
+ add ecx,edi
+ add ebx,DWORD PTR[56+rsp]
+ xor esi,eax
+ mov edi,ecx
+ rol ecx,5
+ xor esi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,esi
+ add eax,DWORD PTR[60+rsp]
+ xor edi,ebp
+ mov esi,ebx
+ rol ebx,5
+ xor edi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ movups XMMWORD PTR[48+r12*1+r13],xmm11
+ lea r12,QWORD PTR[64+r12]
+
+ add eax,DWORD PTR[r9]
+ add esi,DWORD PTR[4+r9]
+ add ecx,DWORD PTR[8+r9]
+ add edx,DWORD PTR[12+r9]
+ mov DWORD PTR[r9],eax
+ add ebp,DWORD PTR[16+r9]
+ mov DWORD PTR[4+r9],esi
+ mov ebx,esi
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+ mov DWORD PTR[16+r9],ebp
+ jmp $L$oop_ssse3
+
+ALIGN 16
+$L$done_ssse3::
+ add ebx,DWORD PTR[16+rsp]
+ xor esi,eax
+ mov edi,ecx
+ rol ecx,5
+ xor esi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,esi
+ add eax,DWORD PTR[20+rsp]
+ xor edi,ebp
+ mov esi,ebx
+ rol ebx,5
+ xor edi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ add ebp,DWORD PTR[24+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[128+r15]
+ xor esi,edx
+ mov edi,eax
+ rol eax,5
+ xor esi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,esi
+ add edx,DWORD PTR[28+rsp]
+ xor edi,ecx
+ mov esi,ebp
+ rol ebp,5
+ xor edi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,edi
+ add ecx,DWORD PTR[32+rsp]
+ xor esi,ebx
+ mov edi,edx
+ rol edx,5
+ xor esi,eax
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[144+r15]
+ add ecx,edx
+ ror ebp,7
+ add ecx,esi
+ add ebx,DWORD PTR[36+rsp]
+ xor edi,eax
+ mov esi,ecx
+ rol ecx,5
+ xor edi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,edi
+ add eax,DWORD PTR[40+rsp]
+ xor esi,ebp
+ mov edi,ebx
+ rol ebx,5
+ xor esi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,esi
+ add ebp,DWORD PTR[44+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[160+r15]
+ xor edi,edx
+ mov esi,eax
+ rol eax,5
+ xor edi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,edi
+ add edx,DWORD PTR[48+rsp]
+ xor esi,ecx
+ mov edi,ebp
+ rol ebp,5
+ xor esi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,esi
+ add ecx,DWORD PTR[52+rsp]
+ xor edi,ebx
+ mov esi,edx
+ rol edx,5
+ xor edi,eax
+ cmp r8d,11
+ jb $L$aesenclast5
+ movups xmm14,XMMWORD PTR[176+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[192+r15]
+ aesenc xmm11,xmm14
+ je $L$aesenclast5
+ movups xmm14,XMMWORD PTR[208+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[224+r15]
+ aesenc xmm11,xmm14
+$L$aesenclast5::
+ aesenclast xmm11,xmm15
+ movups xmm14,XMMWORD PTR[16+r15]
+ add ecx,edx
+ ror ebp,7
+ add ecx,edi
+ add ebx,DWORD PTR[56+rsp]
+ xor esi,eax
+ mov edi,ecx
+ rol ecx,5
+ xor esi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,esi
+ add eax,DWORD PTR[60+rsp]
+ xor edi,ebp
+ mov esi,ebx
+ rol ebx,5
+ xor edi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ movups XMMWORD PTR[48+r12*1+r13],xmm11
+ mov r8,QWORD PTR[88+rsp]
+
+ add eax,DWORD PTR[r9]
+ add esi,DWORD PTR[4+r9]
+ add ecx,DWORD PTR[8+r9]
+ mov DWORD PTR[r9],eax
+ add edx,DWORD PTR[12+r9]
+ mov DWORD PTR[4+r9],esi
+ add ebp,DWORD PTR[16+r9]
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+ mov DWORD PTR[16+r9],ebp
+ movups XMMWORD PTR[r8],xmm11
+ movaps xmm6,XMMWORD PTR[((96+0))+rsp]
+ movaps xmm7,XMMWORD PTR[((96+16))+rsp]
+ movaps xmm8,XMMWORD PTR[((96+32))+rsp]
+ movaps xmm9,XMMWORD PTR[((96+48))+rsp]
+ movaps xmm10,XMMWORD PTR[((96+64))+rsp]
+ movaps xmm11,XMMWORD PTR[((96+80))+rsp]
+ movaps xmm12,XMMWORD PTR[((96+96))+rsp]
+ movaps xmm13,XMMWORD PTR[((96+112))+rsp]
+ movaps xmm14,XMMWORD PTR[((96+128))+rsp]
+ movaps xmm15,XMMWORD PTR[((96+144))+rsp]
+ lea rsi,QWORD PTR[264+rsp]
+ mov r15,QWORD PTR[rsi]
+ mov r14,QWORD PTR[8+rsi]
+ mov r13,QWORD PTR[16+rsi]
+ mov r12,QWORD PTR[24+rsi]
+ mov rbp,QWORD PTR[32+rsi]
+ mov rbx,QWORD PTR[40+rsi]
+ lea rsp,QWORD PTR[48+rsi]
+$L$epilogue_ssse3::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_aesni_cbc_sha1_enc_ssse3::
+aesni_cbc_sha1_enc_ssse3 ENDP
+ALIGN 64
+K_XX_XX::
+ DD 05a827999h,05a827999h,05a827999h,05a827999h
+ DD 06ed9eba1h,06ed9eba1h,06ed9eba1h,06ed9eba1h
+ DD 08f1bbcdch,08f1bbcdch,08f1bbcdch,08f1bbcdch
+ DD 0ca62c1d6h,0ca62c1d6h,0ca62c1d6h,0ca62c1d6h
+ DD 000010203h,004050607h,008090a0bh,00c0d0e0fh
+
+DB 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115
+DB 116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52
+DB 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
+DB 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
+DB 114,103,62,0
+ALIGN 64
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+ssse3_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$common_seh_tail
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$common_seh_tail
+
+ lea rsi,QWORD PTR[96+rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,20
+ DD 0a548f3fch
+ lea rax,QWORD PTR[264+rax]
+
+ mov r15,QWORD PTR[rax]
+ mov r14,QWORD PTR[8+rax]
+ mov r13,QWORD PTR[16+rax]
+ mov r12,QWORD PTR[24+rax]
+ mov rbp,QWORD PTR[32+rax]
+ mov rbx,QWORD PTR[40+rax]
+ lea rax,QWORD PTR[48+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+
+$L$common_seh_tail::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+ssse3_handler ENDP
+
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$SEH_begin_aesni_cbc_sha1_enc_ssse3
+ DD imagerel $L$SEH_end_aesni_cbc_sha1_enc_ssse3
+ DD imagerel $L$SEH_info_aesni_cbc_sha1_enc_ssse3
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$SEH_info_aesni_cbc_sha1_enc_ssse3::
+DB 9,0,0,0
+ DD imagerel ssse3_handler
+ DD imagerel $L$prologue_ssse3,imagerel $L$epilogue_ssse3
+
+.xdata ENDS
+END
+
diff --git a/ext/libressl/crypto/aes/aesni-sha1-mingw64-x86_64.S b/ext/libressl/crypto/aes/aesni-sha1-mingw64-x86_64.S
new file mode 100644
index 0000000..c7a2d5c
--- /dev/null
+++ b/ext/libressl/crypto/aes/aesni-sha1-mingw64-x86_64.S
@@ -0,0 +1,1536 @@
+#include "x86_arch.h"
+.text
+
+
+
+.globl aesni_cbc_sha1_enc
+.def aesni_cbc_sha1_enc; .scl 2; .type 32; .endef
+.p2align 4
+aesni_cbc_sha1_enc:
+
+ movl OPENSSL_ia32cap_P+0(%rip),%r10d
+ movl OPENSSL_ia32cap_P+4(%rip),%r11d
+ jmp aesni_cbc_sha1_enc_ssse3
+ retq
+
+.def aesni_cbc_sha1_enc_ssse3; .scl 3; .type 32; .endef
+.p2align 4
+aesni_cbc_sha1_enc_ssse3:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_aesni_cbc_sha1_enc_ssse3:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+ movq 48(%rsp),%r9
+
+ movq 56(%rsp),%r10
+
+
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -264(%rsp),%rsp
+
+
+ movaps %xmm6,96+0(%rsp)
+ movaps %xmm7,96+16(%rsp)
+ movaps %xmm8,96+32(%rsp)
+ movaps %xmm9,96+48(%rsp)
+ movaps %xmm10,96+64(%rsp)
+ movaps %xmm11,96+80(%rsp)
+ movaps %xmm12,96+96(%rsp)
+ movaps %xmm13,96+112(%rsp)
+ movaps %xmm14,96+128(%rsp)
+ movaps %xmm15,96+144(%rsp)
+.Lprologue_ssse3:
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ movdqu (%r8),%xmm11
+ movq %r8,88(%rsp)
+ shlq $6,%r14
+ subq %r12,%r13
+ movl 240(%r15),%r8d
+ addq %r10,%r14
+
+ leaq K_XX_XX(%rip),%r11
+ movl 0(%r9),%eax
+ movl 4(%r9),%ebx
+ movl 8(%r9),%ecx
+ movl 12(%r9),%edx
+ movl %ebx,%esi
+ movl 16(%r9),%ebp
+
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r10),%xmm0
+ movdqu 16(%r10),%xmm1
+ movdqu 32(%r10),%xmm2
+ movdqu 48(%r10),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r10
+.byte 102,15,56,0,206
+.byte 102,15,56,0,214
+.byte 102,15,56,0,222
+ paddd %xmm9,%xmm0
+ paddd %xmm9,%xmm1
+ paddd %xmm9,%xmm2
+ movdqa %xmm0,0(%rsp)
+ psubd %xmm9,%xmm0
+ movdqa %xmm1,16(%rsp)
+ psubd %xmm9,%xmm1
+ movdqa %xmm2,32(%rsp)
+ psubd %xmm9,%xmm2
+ movups (%r15),%xmm13
+ movups 16(%r15),%xmm14
+ jmp .Loop_ssse3
+.p2align 4
+.Loop_ssse3:
+ movdqa %xmm1,%xmm4
+ addl 0(%rsp),%ebp
+ movups 0(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ xorl %edx,%ecx
+ movdqa %xmm3,%xmm8
+.byte 102,15,58,15,224,8
+ movl %eax,%edi
+ roll $5,%eax
+ paddd %xmm3,%xmm9
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrldq $4,%xmm8
+ xorl %edx,%esi
+ addl %eax,%ebp
+ pxor %xmm0,%xmm4
+ rorl $2,%ebx
+ addl %esi,%ebp
+ pxor %xmm2,%xmm8
+ addl 4(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pxor %xmm8,%xmm4
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm9,48(%rsp)
+ xorl %ecx,%edi
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ addl %ebp,%edx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm4,%xmm8
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 8(%rsp),%ecx
+ xorl %ebx,%eax
+ pslldq $12,%xmm10
+ paddd %xmm4,%xmm4
+ movl %edx,%edi
+ roll $5,%edx
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrld $31,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ movdqa %xmm10,%xmm9
+ rorl $7,%ebp
+ addl %esi,%ecx
+ psrld $30,%xmm10
+ por %xmm8,%xmm4
+ addl 12(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm4
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa 0(%r11),%xmm10
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ pxor %xmm9,%xmm4
+ rorl $7,%edx
+ addl %edi,%ebx
+ movdqa %xmm2,%xmm5
+ addl 16(%rsp),%eax
+ xorl %ebp,%edx
+ movdqa %xmm4,%xmm9
+.byte 102,15,58,15,233,8
+ movl %ebx,%edi
+ roll $5,%ebx
+ paddd %xmm4,%xmm10
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrldq $4,%xmm9
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ pxor %xmm1,%xmm5
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm3,%xmm9
+ addl 20(%rsp),%ebp
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pxor %xmm9,%xmm5
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa %xmm10,0(%rsp)
+ xorl %edx,%edi
+ addl %eax,%ebp
+ movdqa %xmm5,%xmm8
+ movdqa %xmm5,%xmm9
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 24(%rsp),%edx
+ xorl %ecx,%ebx
+ pslldq $12,%xmm8
+ paddd %xmm5,%xmm5
+ movl %ebp,%edi
+ roll $5,%ebp
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ psrld $31,%xmm9
+ xorl %ecx,%esi
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ addl %ebp,%edx
+ movdqa %xmm8,%xmm10
+ rorl $7,%eax
+ addl %esi,%edx
+ psrld $30,%xmm8
+ por %xmm9,%xmm5
+ addl 28(%rsp),%ecx
+ xorl %ebx,%eax
+ movl %edx,%esi
+ roll $5,%edx
+ pslld $2,%xmm10
+ pxor %xmm8,%xmm5
+ andl %eax,%edi
+ xorl %ebx,%eax
+ movdqa 16(%r11),%xmm8
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ pxor %xmm10,%xmm5
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movdqa %xmm3,%xmm6
+ addl 32(%rsp),%ebx
+ xorl %eax,%ebp
+ movdqa %xmm5,%xmm10
+.byte 102,15,58,15,242,8
+ movl %ecx,%edi
+ roll $5,%ecx
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ paddd %xmm5,%xmm8
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ psrldq $4,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ pxor %xmm2,%xmm6
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm4,%xmm10
+ addl 36(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ pxor %xmm10,%xmm6
+ andl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm8,16(%rsp)
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ movdqa %xmm6,%xmm9
+ movdqa %xmm6,%xmm10
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 40(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%ecx
+ pslldq $12,%xmm9
+ paddd %xmm6,%xmm6
+ movl %eax,%edi
+ roll $5,%eax
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrld $31,%xmm10
+ xorl %edx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ psrld $30,%xmm9
+ por %xmm10,%xmm6
+ addl 44(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pslld $2,%xmm8
+ pxor %xmm9,%xmm6
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa 16(%r11),%xmm9
+ xorl %ecx,%edi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %ebp,%edx
+ pxor %xmm8,%xmm6
+ rorl $7,%eax
+ addl %edi,%edx
+ movdqa %xmm4,%xmm7
+ addl 48(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm8
+.byte 102,15,58,15,251,8
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm6,%xmm9
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrldq $4,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ pxor %xmm3,%xmm7
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm5,%xmm8
+ addl 52(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ pxor %xmm8,%xmm7
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm9,32(%rsp)
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ movdqa %xmm7,%xmm10
+ movdqa %xmm7,%xmm8
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 56(%rsp),%eax
+ xorl %ebp,%edx
+ pslldq $12,%xmm10
+ paddd %xmm7,%xmm7
+ movl %ebx,%edi
+ roll $5,%ebx
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrld $31,%xmm8
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ movdqa %xmm10,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ psrld $30,%xmm10
+ por %xmm8,%xmm7
+ addl 60(%rsp),%ebp
+ cmpl $11,%r8d
+ jb .Laesenclast1
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast1
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+.Laesenclast1:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm7
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa 16(%r11),%xmm10
+ xorl %edx,%edi
+ addl %eax,%ebp
+ pxor %xmm9,%xmm7
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movdqa %xmm7,%xmm9
+ addl 0(%rsp),%edx
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,206,8
+ xorl %ecx,%ebx
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm1,%xmm0
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm7,%xmm10
+ xorl %ecx,%esi
+ movups 16(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,0(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ addl %ebp,%edx
+ pxor %xmm9,%xmm0
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 4(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm0,%xmm9
+ movdqa %xmm10,48(%rsp)
+ movl %edx,%esi
+ roll $5,%edx
+ andl %eax,%edi
+ xorl %ebx,%eax
+ pslld $2,%xmm0
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ psrld $30,%xmm9
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 8(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%edi
+ roll $5,%ecx
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ por %xmm9,%xmm0
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ movdqa %xmm0,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 12(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ andl %edx,%edi
+ xorl %ebp,%edx
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 16(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,215,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm2,%xmm1
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm8,%xmm9
+ paddd %xmm0,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm10,%xmm1
+ addl 20(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm1,%xmm10
+ movdqa %xmm8,0(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm1
+ addl 24(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm10
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm10,%xmm1
+ addl 28(%rsp),%ebx
+ xorl %eax,%edi
+ movdqa %xmm1,%xmm8
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 32(%rsp),%eax
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,192,8
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ pxor %xmm3,%xmm2
+ xorl %edx,%esi
+ addl %ebx,%eax
+ movdqa 32(%r11),%xmm10
+ paddd %xmm1,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm8,%xmm2
+ addl 36(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ movdqa %xmm2,%xmm8
+ movdqa %xmm9,16(%rsp)
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ pslld $2,%xmm2
+ addl 40(%rsp),%edx
+ xorl %ecx,%esi
+ psrld $30,%xmm8
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ por %xmm8,%xmm2
+ addl 44(%rsp),%ecx
+ xorl %ebx,%edi
+ movdqa %xmm2,%xmm9
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 48(%rsp),%ebx
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,201,8
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ pxor %xmm4,%xmm3
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm2,%xmm10
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm9,%xmm3
+ addl 52(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ movdqa %xmm3,%xmm9
+ movdqa %xmm10,32(%rsp)
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ pslld $2,%xmm3
+ addl 56(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ psrld $30,%xmm9
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ por %xmm9,%xmm3
+ addl 60(%rsp),%edx
+ xorl %ecx,%edi
+ movdqa %xmm3,%xmm10
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 0(%rsp),%ecx
+ pxor %xmm0,%xmm4
+.byte 102,68,15,58,15,210,8
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ pxor %xmm5,%xmm4
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ movdqa %xmm8,%xmm9
+ paddd %xmm3,%xmm8
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm10,%xmm4
+ addl 4(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm8,48(%rsp)
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ pslld $2,%xmm4
+ addl 8(%rsp),%eax
+ xorl %ebp,%esi
+ psrld $30,%xmm10
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ por %xmm10,%xmm4
+ addl 12(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movdqa %xmm4,%xmm8
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 16(%rsp),%edx
+ pxor %xmm1,%xmm5
+.byte 102,68,15,58,15,195,8
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm6,%xmm5
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm4,%xmm9
+ rorl $7,%eax
+ addl %esi,%edx
+ pxor %xmm8,%xmm5
+ addl 20(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ movdqa %xmm5,%xmm8
+ movdqa %xmm9,0(%rsp)
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb .Laesenclast2
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast2
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+.Laesenclast2:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ pslld $2,%xmm5
+ addl 24(%rsp),%ebx
+ xorl %eax,%esi
+ psrld $30,%xmm8
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ por %xmm8,%xmm5
+ addl 28(%rsp),%eax
+ xorl %ebp,%edi
+ movdqa %xmm5,%xmm9
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ecx,%edi
+ movups 32(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,16(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ pxor %xmm2,%xmm6
+.byte 102,68,15,58,15,204,8
+ xorl %edx,%ecx
+ addl 32(%rsp),%ebp
+ andl %edx,%edi
+ pxor %xmm7,%xmm6
+ andl %ecx,%esi
+ rorl $7,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm5,%xmm10
+ addl %edi,%ebp
+ movl %eax,%edi
+ pxor %xmm9,%xmm6
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movdqa %xmm6,%xmm9
+ movdqa %xmm10,16(%rsp)
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 36(%rsp),%edx
+ andl %ecx,%esi
+ pslld $2,%xmm6
+ andl %ebx,%edi
+ rorl $7,%eax
+ psrld $30,%xmm9
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ por %xmm9,%xmm6
+ movl %eax,%edi
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm10
+ addl 40(%rsp),%ecx
+ andl %ebx,%edi
+ andl %eax,%esi
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 44(%rsp),%ebx
+ andl %eax,%esi
+ andl %ebp,%edi
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ rorl $7,%edx
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%edi
+ pxor %xmm3,%xmm7
+.byte 102,68,15,58,15,213,8
+ xorl %ebp,%edx
+ addl 48(%rsp),%eax
+ andl %ebp,%edi
+ pxor %xmm0,%xmm7
+ andl %edx,%esi
+ rorl $7,%ecx
+ movdqa 48(%r11),%xmm9
+ paddd %xmm6,%xmm8
+ addl %edi,%eax
+ movl %ebx,%edi
+ pxor %xmm10,%xmm7
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movdqa %xmm7,%xmm10
+ movdqa %xmm8,32(%rsp)
+ movl %ecx,%esi
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ xorl %edx,%ecx
+ addl 52(%rsp),%ebp
+ andl %edx,%esi
+ pslld $2,%xmm7
+ andl %ecx,%edi
+ rorl $7,%ebx
+ psrld $30,%xmm10
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ por %xmm10,%xmm7
+ movl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm7,%xmm8
+ addl 56(%rsp),%edx
+ andl %ecx,%edi
+ andl %ebx,%esi
+ rorl $7,%eax
+ addl %edi,%edx
+ movl %ebp,%edi
+ roll $5,%ebp
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 60(%rsp),%ecx
+ andl %ebx,%esi
+ andl %eax,%edi
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%edi
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,198,8
+ xorl %eax,%ebp
+ addl 0(%rsp),%ebx
+ andl %eax,%edi
+ pxor %xmm1,%xmm0
+ andl %ebp,%esi
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ rorl $7,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm7,%xmm9
+ addl %edi,%ebx
+ movl %ecx,%edi
+ pxor %xmm8,%xmm0
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movdqa %xmm0,%xmm8
+ movdqa %xmm9,48(%rsp)
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 4(%rsp),%eax
+ andl %ebp,%esi
+ pslld $2,%xmm0
+ andl %edx,%edi
+ rorl $7,%ecx
+ psrld $30,%xmm8
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ por %xmm8,%xmm0
+ movl %ecx,%edi
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%ecx
+ movdqa %xmm0,%xmm9
+ addl 8(%rsp),%ebp
+ andl %edx,%edi
+ andl %ecx,%esi
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movl %eax,%edi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 12(%rsp),%edx
+ andl %ecx,%esi
+ andl %ebx,%edi
+ rorl $7,%eax
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%edi
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,207,8
+ xorl %ebx,%eax
+ addl 16(%rsp),%ecx
+ andl %ebx,%edi
+ pxor %xmm2,%xmm1
+ andl %eax,%esi
+ rorl $7,%ebp
+ movdqa %xmm10,%xmm8
+ paddd %xmm0,%xmm10
+ addl %edi,%ecx
+ movl %edx,%edi
+ pxor %xmm9,%xmm1
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movdqa %xmm1,%xmm9
+ movdqa %xmm10,0(%rsp)
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 20(%rsp),%ebx
+ andl %eax,%esi
+ pslld $2,%xmm1
+ andl %ebp,%edi
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ rorl $7,%edx
+ psrld $30,%xmm9
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ por %xmm9,%xmm1
+ movl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm1,%xmm10
+ addl 24(%rsp),%eax
+ andl %ebp,%edi
+ andl %edx,%esi
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movl %ecx,%esi
+ cmpl $11,%r8d
+ jb .Laesenclast3
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast3
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+.Laesenclast3:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ xorl %edx,%ecx
+ addl 28(%rsp),%ebp
+ andl %edx,%esi
+ andl %ecx,%edi
+ rorl $7,%ebx
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%edi
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,208,8
+ xorl %ecx,%ebx
+ addl 32(%rsp),%edx
+ andl %ecx,%edi
+ pxor %xmm3,%xmm2
+ andl %ebx,%esi
+ rorl $7,%eax
+ movdqa %xmm8,%xmm9
+ paddd %xmm1,%xmm8
+ addl %edi,%edx
+ movl %ebp,%edi
+ pxor %xmm10,%xmm2
+ roll $5,%ebp
+ movups 48(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,32(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movdqa %xmm2,%xmm10
+ movdqa %xmm8,16(%rsp)
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 36(%rsp),%ecx
+ andl %ebx,%esi
+ pslld $2,%xmm2
+ andl %eax,%edi
+ rorl $7,%ebp
+ psrld $30,%xmm10
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ por %xmm10,%xmm2
+ movl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm2,%xmm8
+ addl 40(%rsp),%ebx
+ andl %eax,%edi
+ andl %ebp,%esi
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ rorl $7,%edx
+ addl %edi,%ebx
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 44(%rsp),%eax
+ andl %ebp,%esi
+ andl %edx,%edi
+ rorl $7,%ecx
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ addl 48(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,193,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm4,%xmm3
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm10
+ paddd %xmm2,%xmm9
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm8,%xmm3
+ addl 52(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm3,%xmm8
+ movdqa %xmm9,32(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm3
+ addl 56(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm8
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm8,%xmm3
+ addl 60(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 0(%rsp),%eax
+ paddd %xmm3,%xmm10
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ movdqa %xmm10,48(%rsp)
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 4(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 8(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 12(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ cmpq %r14,%r10
+ je .Ldone_ssse3
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r10),%xmm0
+ movdqu 16(%r10),%xmm1
+ movdqu 32(%r10),%xmm2
+ movdqu 48(%r10),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r10
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+.byte 102,15,56,0,206
+ movl %ecx,%edi
+ roll $5,%ecx
+ paddd %xmm9,%xmm0
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ movdqa %xmm0,0(%rsp)
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ psubd %xmm9,%xmm0
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+.byte 102,15,56,0,214
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm9,%xmm1
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movdqa %xmm1,16(%rsp)
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ psubd %xmm9,%xmm1
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+.byte 102,15,56,0,222
+ movl %ebp,%edi
+ roll $5,%ebp
+ paddd %xmm9,%xmm2
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ movdqa %xmm2,32(%rsp)
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ psubd %xmm9,%xmm2
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb .Laesenclast4
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast4
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+.Laesenclast4:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movups %xmm11,48(%r13,%r12,1)
+ leaq 64(%r12),%r12
+
+ addl 0(%r9),%eax
+ addl 4(%r9),%esi
+ addl 8(%r9),%ecx
+ addl 12(%r9),%edx
+ movl %eax,0(%r9)
+ addl 16(%r9),%ebp
+ movl %esi,4(%r9)
+ movl %esi,%ebx
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movl %ebp,16(%r9)
+ jmp .Loop_ssse3
+
+.p2align 4
+.Ldone_ssse3:
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb .Laesenclast5
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast5
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+.Laesenclast5:
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movups %xmm11,48(%r13,%r12,1)
+ movq 88(%rsp),%r8
+
+ addl 0(%r9),%eax
+ addl 4(%r9),%esi
+ addl 8(%r9),%ecx
+ movl %eax,0(%r9)
+ addl 12(%r9),%edx
+ movl %esi,4(%r9)
+ addl 16(%r9),%ebp
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movl %ebp,16(%r9)
+ movups %xmm11,(%r8)
+ movaps 96+0(%rsp),%xmm6
+ movaps 96+16(%rsp),%xmm7
+ movaps 96+32(%rsp),%xmm8
+ movaps 96+48(%rsp),%xmm9
+ movaps 96+64(%rsp),%xmm10
+ movaps 96+80(%rsp),%xmm11
+ movaps 96+96(%rsp),%xmm12
+ movaps 96+112(%rsp),%xmm13
+ movaps 96+128(%rsp),%xmm14
+ movaps 96+144(%rsp),%xmm15
+ leaq 264(%rsp),%rsi
+ movq 0(%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lepilogue_ssse3:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_aesni_cbc_sha1_enc_ssse3:
+.p2align 6
+K_XX_XX:
+.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+
+.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align 6
+
+.def ssse3_handler; .scl 3; .type 32; .endef
+.p2align 4
+ssse3_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+
+ movq 8(%r9),%rsi
+ movq 56(%r9),%r11
+
+ movl 0(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jb .Lcommon_seh_tail
+
+ movq 152(%r8),%rax
+
+ movl 4(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jae .Lcommon_seh_tail
+
+ leaq 96(%rax),%rsi
+ leaq 512(%r8),%rdi
+ movl $20,%ecx
+.long 0xa548f3fc
+ leaq 264(%rax),%rax
+
+ movq 0(%rax),%r15
+ movq 8(%rax),%r14
+ movq 16(%rax),%r13
+ movq 24(%rax),%r12
+ movq 32(%rax),%rbp
+ movq 40(%rax),%rbx
+ leaq 48(%rax),%rax
+ movq %rbx,144(%r8)
+ movq %rbp,160(%r8)
+ movq %r12,216(%r8)
+ movq %r13,224(%r8)
+ movq %r14,232(%r8)
+ movq %r15,240(%r8)
+
+.Lcommon_seh_tail:
+ movq 8(%rax),%rdi
+ movq 16(%rax),%rsi
+ movq %rax,152(%r8)
+ movq %rsi,168(%r8)
+ movq %rdi,176(%r8)
+
+ movq 40(%r9),%rdi
+ movq %r8,%rsi
+ movl $154,%ecx
+.long 0xa548f3fc
+
+ movq %r9,%rsi
+ xorq %rcx,%rcx
+ movq 8(%rsi),%rdx
+ movq 0(%rsi),%r8
+ movq 16(%rsi),%r9
+ movq 40(%rsi),%r10
+ leaq 56(%rsi),%r11
+ leaq 24(%rsi),%r12
+ movq %r10,32(%rsp)
+ movq %r11,40(%rsp)
+ movq %r12,48(%rsp)
+ movq %rcx,56(%rsp)
+ call *__imp_RtlVirtualUnwind(%rip)
+
+ movl $1,%eax
+ addq $64,%rsp
+ popfq
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ popq %rdi
+ popq %rsi
+ retq
+
+
+.section .pdata
+.p2align 2
+.rva .LSEH_begin_aesni_cbc_sha1_enc_ssse3
+.rva .LSEH_end_aesni_cbc_sha1_enc_ssse3
+.rva .LSEH_info_aesni_cbc_sha1_enc_ssse3
+.section .xdata
+.p2align 3
+.LSEH_info_aesni_cbc_sha1_enc_ssse3:
+.byte 9,0,0,0
+.rva ssse3_handler
+.rva .Lprologue_ssse3,.Lepilogue_ssse3
diff --git a/ext/libressl/crypto/aes/bsaes-elf-x86_64.S b/ext/libressl/crypto/aes/bsaes-elf-x86_64.S
new file mode 100644
index 0000000..903e374
--- /dev/null
+++ b/ext/libressl/crypto/aes/bsaes-elf-x86_64.S
@@ -0,0 +1,2502 @@
+#include "x86_arch.h"
+.text
+
+
+
+
+.type _bsaes_encrypt8,@function
+.align 64
+_bsaes_encrypt8:
+ leaq .LBS0(%rip),%r11
+
+ movdqa (%rax),%xmm8
+ leaq 16(%rax),%rax
+ movdqa 80(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+.byte 102,15,56,0,247
+_bsaes_encrypt8_bitslice:
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $1,%xmm3
+ pxor %xmm6,%xmm5
+ pxor %xmm4,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm6
+ psllq $1,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $1,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm15
+ pxor %xmm1,%xmm2
+ psllq $1,%xmm1
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm4,%xmm9
+ psrlq $2,%xmm4
+ movdqa %xmm3,%xmm10
+ psrlq $2,%xmm3
+ pxor %xmm6,%xmm4
+ pxor %xmm5,%xmm3
+ pand %xmm8,%xmm4
+ pand %xmm8,%xmm3
+ pxor %xmm4,%xmm6
+ psllq $2,%xmm4
+ pxor %xmm3,%xmm5
+ psllq $2,%xmm3
+ pxor %xmm9,%xmm4
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm2,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm2
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm2,%xmm9
+ psrlq $4,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $4,%xmm1
+ pxor %xmm6,%xmm2
+ pxor %xmm5,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $4,%xmm2
+ pxor %xmm1,%xmm5
+ psllq $4,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm4
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ decl %r10d
+ jmp .Lenc_sbox
+.align 16
+.Lenc_loop:
+ pxor 0(%rax),%xmm15
+ pxor 16(%rax),%xmm0
+.byte 102,68,15,56,0,255
+ pxor 32(%rax),%xmm1
+.byte 102,15,56,0,199
+ pxor 48(%rax),%xmm2
+.byte 102,15,56,0,207
+ pxor 64(%rax),%xmm3
+.byte 102,15,56,0,215
+ pxor 80(%rax),%xmm4
+.byte 102,15,56,0,223
+ pxor 96(%rax),%xmm5
+.byte 102,15,56,0,231
+ pxor 112(%rax),%xmm6
+.byte 102,15,56,0,239
+ leaq 128(%rax),%rax
+.byte 102,15,56,0,247
+.Lenc_sbox:
+ pxor %xmm5,%xmm4
+ pxor %xmm0,%xmm1
+ pxor %xmm15,%xmm2
+ pxor %xmm1,%xmm5
+ pxor %xmm15,%xmm4
+
+ pxor %xmm2,%xmm5
+ pxor %xmm6,%xmm2
+ pxor %xmm4,%xmm6
+ pxor %xmm3,%xmm2
+ pxor %xmm4,%xmm3
+ pxor %xmm0,%xmm2
+
+ pxor %xmm6,%xmm1
+ pxor %xmm4,%xmm0
+ movdqa %xmm6,%xmm10
+ movdqa %xmm0,%xmm9
+ movdqa %xmm4,%xmm8
+ movdqa %xmm1,%xmm12
+ movdqa %xmm5,%xmm11
+
+ pxor %xmm3,%xmm10
+ pxor %xmm1,%xmm9
+ pxor %xmm2,%xmm8
+ movdqa %xmm10,%xmm13
+ pxor %xmm3,%xmm12
+ movdqa %xmm9,%xmm7
+ pxor %xmm15,%xmm11
+ movdqa %xmm10,%xmm14
+
+ por %xmm8,%xmm9
+ por %xmm11,%xmm10
+ pxor %xmm7,%xmm14
+ pand %xmm11,%xmm13
+ pxor %xmm8,%xmm11
+ pand %xmm8,%xmm7
+ pand %xmm11,%xmm14
+ movdqa %xmm2,%xmm11
+ pxor %xmm15,%xmm11
+ pand %xmm11,%xmm12
+ pxor %xmm12,%xmm10
+ pxor %xmm12,%xmm9
+ movdqa %xmm6,%xmm12
+ movdqa %xmm4,%xmm11
+ pxor %xmm0,%xmm12
+ pxor %xmm5,%xmm11
+ movdqa %xmm12,%xmm8
+ pand %xmm11,%xmm12
+ por %xmm11,%xmm8
+ pxor %xmm12,%xmm7
+ pxor %xmm14,%xmm10
+ pxor %xmm13,%xmm9
+ pxor %xmm14,%xmm8
+ movdqa %xmm1,%xmm11
+ pxor %xmm13,%xmm7
+ movdqa %xmm3,%xmm12
+ pxor %xmm13,%xmm8
+ movdqa %xmm0,%xmm13
+ pand %xmm2,%xmm11
+ movdqa %xmm6,%xmm14
+ pand %xmm15,%xmm12
+ pand %xmm4,%xmm13
+ por %xmm5,%xmm14
+ pxor %xmm11,%xmm10
+ pxor %xmm12,%xmm9
+ pxor %xmm13,%xmm8
+ pxor %xmm14,%xmm7
+
+
+
+
+
+ movdqa %xmm10,%xmm11
+ pand %xmm8,%xmm10
+ pxor %xmm9,%xmm11
+
+ movdqa %xmm7,%xmm13
+ movdqa %xmm11,%xmm14
+ pxor %xmm10,%xmm13
+ pand %xmm13,%xmm14
+
+ movdqa %xmm8,%xmm12
+ pxor %xmm9,%xmm14
+ pxor %xmm7,%xmm12
+
+ pxor %xmm9,%xmm10
+
+ pand %xmm10,%xmm12
+
+ movdqa %xmm13,%xmm9
+ pxor %xmm7,%xmm12
+
+ pxor %xmm12,%xmm9
+ pxor %xmm12,%xmm8
+
+ pand %xmm7,%xmm9
+
+ pxor %xmm9,%xmm13
+ pxor %xmm9,%xmm8
+
+ pand %xmm14,%xmm13
+
+ pxor %xmm11,%xmm13
+ movdqa %xmm5,%xmm11
+ movdqa %xmm4,%xmm7
+ movdqa %xmm14,%xmm9
+ pxor %xmm13,%xmm9
+ pand %xmm5,%xmm9
+ pxor %xmm4,%xmm5
+ pand %xmm14,%xmm4
+ pand %xmm13,%xmm5
+ pxor %xmm4,%xmm5
+ pxor %xmm9,%xmm4
+ pxor %xmm15,%xmm11
+ pxor %xmm2,%xmm7
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm15,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm2,%xmm15
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm2
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm15
+ pxor %xmm11,%xmm7
+ pxor %xmm2,%xmm15
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm2
+ pxor %xmm11,%xmm5
+ pxor %xmm11,%xmm15
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm2
+
+ movdqa %xmm6,%xmm11
+ movdqa %xmm0,%xmm7
+ pxor %xmm3,%xmm11
+ pxor %xmm1,%xmm7
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm3,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm1,%xmm3
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm1
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm3
+ pxor %xmm11,%xmm7
+ pxor %xmm1,%xmm3
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm1
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ pxor %xmm13,%xmm10
+ pand %xmm6,%xmm10
+ pxor %xmm0,%xmm6
+ pand %xmm14,%xmm0
+ pand %xmm13,%xmm6
+ pxor %xmm0,%xmm6
+ pxor %xmm10,%xmm0
+ pxor %xmm11,%xmm6
+ pxor %xmm11,%xmm3
+ pxor %xmm7,%xmm0
+ pxor %xmm7,%xmm1
+ pxor %xmm15,%xmm6
+ pxor %xmm5,%xmm0
+ pxor %xmm6,%xmm3
+ pxor %xmm15,%xmm5
+ pxor %xmm0,%xmm15
+
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ pxor %xmm2,%xmm1
+ pxor %xmm4,%xmm2
+ pxor %xmm4,%xmm3
+
+ pxor %xmm2,%xmm5
+ decl %r10d
+ jl .Lenc_done
+ pshufd $147,%xmm15,%xmm7
+ pshufd $147,%xmm0,%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $147,%xmm3,%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $147,%xmm5,%xmm10
+ pxor %xmm9,%xmm3
+ pshufd $147,%xmm2,%xmm11
+ pxor %xmm10,%xmm5
+ pshufd $147,%xmm6,%xmm12
+ pxor %xmm11,%xmm2
+ pshufd $147,%xmm1,%xmm13
+ pxor %xmm12,%xmm6
+ pshufd $147,%xmm4,%xmm14
+ pxor %xmm13,%xmm1
+ pxor %xmm14,%xmm4
+
+ pxor %xmm15,%xmm8
+ pxor %xmm4,%xmm7
+ pxor %xmm4,%xmm8
+ pshufd $78,%xmm15,%xmm15
+ pxor %xmm0,%xmm9
+ pshufd $78,%xmm0,%xmm0
+ pxor %xmm2,%xmm12
+ pxor %xmm7,%xmm15
+ pxor %xmm6,%xmm13
+ pxor %xmm8,%xmm0
+ pxor %xmm5,%xmm11
+ pshufd $78,%xmm2,%xmm7
+ pxor %xmm1,%xmm14
+ pshufd $78,%xmm6,%xmm8
+ pxor %xmm3,%xmm10
+ pshufd $78,%xmm5,%xmm2
+ pxor %xmm4,%xmm10
+ pshufd $78,%xmm4,%xmm6
+ pxor %xmm4,%xmm11
+ pshufd $78,%xmm1,%xmm5
+ pxor %xmm11,%xmm7
+ pshufd $78,%xmm3,%xmm1
+ pxor %xmm12,%xmm8
+ pxor %xmm10,%xmm2
+ pxor %xmm14,%xmm6
+ pxor %xmm13,%xmm5
+ movdqa %xmm7,%xmm3
+ pxor %xmm9,%xmm1
+ movdqa %xmm8,%xmm4
+ movdqa 48(%r11),%xmm7
+ jnz .Lenc_loop
+ movdqa 64(%r11),%xmm7
+ jmp .Lenc_loop
+.align 16
+.Lenc_done:
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm2,%xmm10
+ psrlq $1,%xmm2
+ pxor %xmm4,%xmm1
+ pxor %xmm6,%xmm2
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm2
+ pxor %xmm1,%xmm4
+ psllq $1,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $1,%xmm2
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm2
+ movdqa %xmm3,%xmm9
+ psrlq $1,%xmm3
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm5,%xmm3
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm3
+ pand %xmm7,%xmm15
+ pxor %xmm3,%xmm5
+ psllq $1,%xmm3
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm3
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm6,%xmm9
+ psrlq $2,%xmm6
+ movdqa %xmm2,%xmm10
+ psrlq $2,%xmm2
+ pxor %xmm4,%xmm6
+ pxor %xmm1,%xmm2
+ pand %xmm8,%xmm6
+ pand %xmm8,%xmm2
+ pxor %xmm6,%xmm4
+ psllq $2,%xmm6
+ pxor %xmm2,%xmm1
+ psllq $2,%xmm2
+ pxor %xmm9,%xmm6
+ pxor %xmm10,%xmm2
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm5,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm5
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm5,%xmm9
+ psrlq $4,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $4,%xmm3
+ pxor %xmm4,%xmm5
+ pxor %xmm1,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm4
+ psllq $4,%xmm5
+ pxor %xmm3,%xmm1
+ psllq $4,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm6,%xmm0
+ pxor %xmm2,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm6
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm2
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa (%rax),%xmm7
+ pxor %xmm7,%xmm3
+ pxor %xmm7,%xmm5
+ pxor %xmm7,%xmm2
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm1
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm15
+ pxor %xmm7,%xmm0
+ retq
+.size _bsaes_encrypt8,.-_bsaes_encrypt8
+
+.type _bsaes_decrypt8,@function
+.align 64
+_bsaes_decrypt8:
+ leaq .LBS0(%rip),%r11
+
+ movdqa (%rax),%xmm8
+ leaq 16(%rax),%rax
+ movdqa -48(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+.byte 102,15,56,0,247
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $1,%xmm3
+ pxor %xmm6,%xmm5
+ pxor %xmm4,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm6
+ psllq $1,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $1,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm15
+ pxor %xmm1,%xmm2
+ psllq $1,%xmm1
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm4,%xmm9
+ psrlq $2,%xmm4
+ movdqa %xmm3,%xmm10
+ psrlq $2,%xmm3
+ pxor %xmm6,%xmm4
+ pxor %xmm5,%xmm3
+ pand %xmm8,%xmm4
+ pand %xmm8,%xmm3
+ pxor %xmm4,%xmm6
+ psllq $2,%xmm4
+ pxor %xmm3,%xmm5
+ psllq $2,%xmm3
+ pxor %xmm9,%xmm4
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm2,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm2
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm2,%xmm9
+ psrlq $4,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $4,%xmm1
+ pxor %xmm6,%xmm2
+ pxor %xmm5,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $4,%xmm2
+ pxor %xmm1,%xmm5
+ psllq $4,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm4
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ decl %r10d
+ jmp .Ldec_sbox
+.align 16
+.Ldec_loop:
+ pxor 0(%rax),%xmm15
+ pxor 16(%rax),%xmm0
+.byte 102,68,15,56,0,255
+ pxor 32(%rax),%xmm1
+.byte 102,15,56,0,199
+ pxor 48(%rax),%xmm2
+.byte 102,15,56,0,207
+ pxor 64(%rax),%xmm3
+.byte 102,15,56,0,215
+ pxor 80(%rax),%xmm4
+.byte 102,15,56,0,223
+ pxor 96(%rax),%xmm5
+.byte 102,15,56,0,231
+ pxor 112(%rax),%xmm6
+.byte 102,15,56,0,239
+ leaq 128(%rax),%rax
+.byte 102,15,56,0,247
+.Ldec_sbox:
+ pxor %xmm3,%xmm2
+
+ pxor %xmm6,%xmm3
+ pxor %xmm6,%xmm1
+ pxor %xmm3,%xmm5
+ pxor %xmm5,%xmm6
+ pxor %xmm6,%xmm0
+
+ pxor %xmm0,%xmm15
+ pxor %xmm4,%xmm1
+ pxor %xmm15,%xmm2
+ pxor %xmm15,%xmm4
+ pxor %xmm2,%xmm0
+ movdqa %xmm2,%xmm10
+ movdqa %xmm6,%xmm9
+ movdqa %xmm0,%xmm8
+ movdqa %xmm3,%xmm12
+ movdqa %xmm4,%xmm11
+
+ pxor %xmm15,%xmm10
+ pxor %xmm3,%xmm9
+ pxor %xmm5,%xmm8
+ movdqa %xmm10,%xmm13
+ pxor %xmm15,%xmm12
+ movdqa %xmm9,%xmm7
+ pxor %xmm1,%xmm11
+ movdqa %xmm10,%xmm14
+
+ por %xmm8,%xmm9
+ por %xmm11,%xmm10
+ pxor %xmm7,%xmm14
+ pand %xmm11,%xmm13
+ pxor %xmm8,%xmm11
+ pand %xmm8,%xmm7
+ pand %xmm11,%xmm14
+ movdqa %xmm5,%xmm11
+ pxor %xmm1,%xmm11
+ pand %xmm11,%xmm12
+ pxor %xmm12,%xmm10
+ pxor %xmm12,%xmm9
+ movdqa %xmm2,%xmm12
+ movdqa %xmm0,%xmm11
+ pxor %xmm6,%xmm12
+ pxor %xmm4,%xmm11
+ movdqa %xmm12,%xmm8
+ pand %xmm11,%xmm12
+ por %xmm11,%xmm8
+ pxor %xmm12,%xmm7
+ pxor %xmm14,%xmm10
+ pxor %xmm13,%xmm9
+ pxor %xmm14,%xmm8
+ movdqa %xmm3,%xmm11
+ pxor %xmm13,%xmm7
+ movdqa %xmm15,%xmm12
+ pxor %xmm13,%xmm8
+ movdqa %xmm6,%xmm13
+ pand %xmm5,%xmm11
+ movdqa %xmm2,%xmm14
+ pand %xmm1,%xmm12
+ pand %xmm0,%xmm13
+ por %xmm4,%xmm14
+ pxor %xmm11,%xmm10
+ pxor %xmm12,%xmm9
+ pxor %xmm13,%xmm8
+ pxor %xmm14,%xmm7
+
+
+
+
+
+ movdqa %xmm10,%xmm11
+ pand %xmm8,%xmm10
+ pxor %xmm9,%xmm11
+
+ movdqa %xmm7,%xmm13
+ movdqa %xmm11,%xmm14
+ pxor %xmm10,%xmm13
+ pand %xmm13,%xmm14
+
+ movdqa %xmm8,%xmm12
+ pxor %xmm9,%xmm14
+ pxor %xmm7,%xmm12
+
+ pxor %xmm9,%xmm10
+
+ pand %xmm10,%xmm12
+
+ movdqa %xmm13,%xmm9
+ pxor %xmm7,%xmm12
+
+ pxor %xmm12,%xmm9
+ pxor %xmm12,%xmm8
+
+ pand %xmm7,%xmm9
+
+ pxor %xmm9,%xmm13
+ pxor %xmm9,%xmm8
+
+ pand %xmm14,%xmm13
+
+ pxor %xmm11,%xmm13
+ movdqa %xmm4,%xmm11
+ movdqa %xmm0,%xmm7
+ movdqa %xmm14,%xmm9
+ pxor %xmm13,%xmm9
+ pand %xmm4,%xmm9
+ pxor %xmm0,%xmm4
+ pand %xmm14,%xmm0
+ pand %xmm13,%xmm4
+ pxor %xmm0,%xmm4
+ pxor %xmm9,%xmm0
+ pxor %xmm1,%xmm11
+ pxor %xmm5,%xmm7
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm1,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm5,%xmm1
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm5
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm1
+ pxor %xmm11,%xmm7
+ pxor %xmm5,%xmm1
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm5
+ pxor %xmm11,%xmm4
+ pxor %xmm11,%xmm1
+ pxor %xmm7,%xmm0
+ pxor %xmm7,%xmm5
+
+ movdqa %xmm2,%xmm11
+ movdqa %xmm6,%xmm7
+ pxor %xmm15,%xmm11
+ pxor %xmm3,%xmm7
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm15,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm3,%xmm15
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm3
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm15
+ pxor %xmm11,%xmm7
+ pxor %xmm3,%xmm15
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm3
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ pxor %xmm13,%xmm10
+ pand %xmm2,%xmm10
+ pxor %xmm6,%xmm2
+ pand %xmm14,%xmm6
+ pand %xmm13,%xmm2
+ pxor %xmm6,%xmm2
+ pxor %xmm10,%xmm6
+ pxor %xmm11,%xmm2
+ pxor %xmm11,%xmm15
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm3
+ pxor %xmm6,%xmm0
+ pxor %xmm4,%xmm5
+
+ pxor %xmm0,%xmm3
+ pxor %xmm6,%xmm1
+ pxor %xmm6,%xmm4
+ pxor %xmm1,%xmm3
+ pxor %xmm15,%xmm6
+ pxor %xmm4,%xmm3
+ pxor %xmm5,%xmm2
+ pxor %xmm0,%xmm5
+ pxor %xmm3,%xmm2
+
+ pxor %xmm15,%xmm3
+ pxor %xmm2,%xmm6
+ decl %r10d
+ jl .Ldec_done
+
+ pshufd $78,%xmm15,%xmm7
+ pshufd $78,%xmm2,%xmm13
+ pxor %xmm15,%xmm7
+ pshufd $78,%xmm4,%xmm14
+ pxor %xmm2,%xmm13
+ pshufd $78,%xmm0,%xmm8
+ pxor %xmm4,%xmm14
+ pshufd $78,%xmm5,%xmm9
+ pxor %xmm0,%xmm8
+ pshufd $78,%xmm3,%xmm10
+ pxor %xmm5,%xmm9
+ pxor %xmm13,%xmm15
+ pxor %xmm13,%xmm0
+ pshufd $78,%xmm1,%xmm11
+ pxor %xmm3,%xmm10
+ pxor %xmm7,%xmm5
+ pxor %xmm8,%xmm3
+ pshufd $78,%xmm6,%xmm12
+ pxor %xmm1,%xmm11
+ pxor %xmm14,%xmm0
+ pxor %xmm9,%xmm1
+ pxor %xmm6,%xmm12
+
+ pxor %xmm14,%xmm5
+ pxor %xmm13,%xmm3
+ pxor %xmm13,%xmm1
+ pxor %xmm10,%xmm6
+ pxor %xmm11,%xmm2
+ pxor %xmm14,%xmm1
+ pxor %xmm14,%xmm6
+ pxor %xmm12,%xmm4
+ pshufd $147,%xmm15,%xmm7
+ pshufd $147,%xmm0,%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $147,%xmm5,%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $147,%xmm3,%xmm10
+ pxor %xmm9,%xmm5
+ pshufd $147,%xmm1,%xmm11
+ pxor %xmm10,%xmm3
+ pshufd $147,%xmm6,%xmm12
+ pxor %xmm11,%xmm1
+ pshufd $147,%xmm2,%xmm13
+ pxor %xmm12,%xmm6
+ pshufd $147,%xmm4,%xmm14
+ pxor %xmm13,%xmm2
+ pxor %xmm14,%xmm4
+
+ pxor %xmm15,%xmm8
+ pxor %xmm4,%xmm7
+ pxor %xmm4,%xmm8
+ pshufd $78,%xmm15,%xmm15
+ pxor %xmm0,%xmm9
+ pshufd $78,%xmm0,%xmm0
+ pxor %xmm1,%xmm12
+ pxor %xmm7,%xmm15
+ pxor %xmm6,%xmm13
+ pxor %xmm8,%xmm0
+ pxor %xmm3,%xmm11
+ pshufd $78,%xmm1,%xmm7
+ pxor %xmm2,%xmm14
+ pshufd $78,%xmm6,%xmm8
+ pxor %xmm5,%xmm10
+ pshufd $78,%xmm3,%xmm1
+ pxor %xmm4,%xmm10
+ pshufd $78,%xmm4,%xmm6
+ pxor %xmm4,%xmm11
+ pshufd $78,%xmm2,%xmm3
+ pxor %xmm11,%xmm7
+ pshufd $78,%xmm5,%xmm2
+ pxor %xmm12,%xmm8
+ pxor %xmm1,%xmm10
+ pxor %xmm14,%xmm6
+ pxor %xmm3,%xmm13
+ movdqa %xmm7,%xmm3
+ pxor %xmm9,%xmm2
+ movdqa %xmm13,%xmm5
+ movdqa %xmm8,%xmm4
+ movdqa %xmm2,%xmm1
+ movdqa %xmm10,%xmm2
+ movdqa -16(%r11),%xmm7
+ jnz .Ldec_loop
+ movdqa -32(%r11),%xmm7
+ jmp .Ldec_loop
+.align 16
+.Ldec_done:
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm2,%xmm9
+ psrlq $1,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $1,%xmm1
+ pxor %xmm4,%xmm2
+ pxor %xmm6,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm4
+ psllq $1,%xmm2
+ pxor %xmm1,%xmm6
+ psllq $1,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm3,%xmm5
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm15
+ pxor %xmm5,%xmm3
+ psllq $1,%xmm5
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm6,%xmm9
+ psrlq $2,%xmm6
+ movdqa %xmm1,%xmm10
+ psrlq $2,%xmm1
+ pxor %xmm4,%xmm6
+ pxor %xmm2,%xmm1
+ pand %xmm8,%xmm6
+ pand %xmm8,%xmm1
+ pxor %xmm6,%xmm4
+ psllq $2,%xmm6
+ pxor %xmm1,%xmm2
+ psllq $2,%xmm1
+ pxor %xmm9,%xmm6
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm3,%xmm0
+ pxor %xmm5,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm3
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm5
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm3,%xmm9
+ psrlq $4,%xmm3
+ movdqa %xmm5,%xmm10
+ psrlq $4,%xmm5
+ pxor %xmm4,%xmm3
+ pxor %xmm2,%xmm5
+ pand %xmm7,%xmm3
+ pand %xmm7,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $4,%xmm3
+ pxor %xmm5,%xmm2
+ psllq $4,%xmm5
+ pxor %xmm9,%xmm3
+ pxor %xmm10,%xmm5
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm6,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm6
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa (%rax),%xmm7
+ pxor %xmm7,%xmm5
+ pxor %xmm7,%xmm3
+ pxor %xmm7,%xmm1
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm2
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm15
+ pxor %xmm7,%xmm0
+ retq
+.size _bsaes_decrypt8,.-_bsaes_decrypt8
+.type _bsaes_key_convert,@function
+.align 16
+_bsaes_key_convert:
+ leaq .Lmasks(%rip),%r11
+ movdqu (%rcx),%xmm7
+ leaq 16(%rcx),%rcx
+ movdqa 0(%r11),%xmm0
+ movdqa 16(%r11),%xmm1
+ movdqa 32(%r11),%xmm2
+ movdqa 48(%r11),%xmm3
+ movdqa 64(%r11),%xmm4
+ pcmpeqd %xmm5,%xmm5
+
+ movdqu (%rcx),%xmm6
+ movdqa %xmm7,(%rax)
+ leaq 16(%rax),%rax
+ decl %r10d
+ jmp .Lkey_loop
+.align 16
+.Lkey_loop:
+.byte 102,15,56,0,244
+
+ movdqa %xmm0,%xmm8
+ movdqa %xmm1,%xmm9
+
+ pand %xmm6,%xmm8
+ pand %xmm6,%xmm9
+ movdqa %xmm2,%xmm10
+ pcmpeqb %xmm0,%xmm8
+ psllq $4,%xmm0
+ movdqa %xmm3,%xmm11
+ pcmpeqb %xmm1,%xmm9
+ psllq $4,%xmm1
+
+ pand %xmm6,%xmm10
+ pand %xmm6,%xmm11
+ movdqa %xmm0,%xmm12
+ pcmpeqb %xmm2,%xmm10
+ psllq $4,%xmm2
+ movdqa %xmm1,%xmm13
+ pcmpeqb %xmm3,%xmm11
+ psllq $4,%xmm3
+
+ movdqa %xmm2,%xmm14
+ movdqa %xmm3,%xmm15
+ pxor %xmm5,%xmm8
+ pxor %xmm5,%xmm9
+
+ pand %xmm6,%xmm12
+ pand %xmm6,%xmm13
+ movdqa %xmm8,0(%rax)
+ pcmpeqb %xmm0,%xmm12
+ psrlq $4,%xmm0
+ movdqa %xmm9,16(%rax)
+ pcmpeqb %xmm1,%xmm13
+ psrlq $4,%xmm1
+ leaq 16(%rcx),%rcx
+
+ pand %xmm6,%xmm14
+ pand %xmm6,%xmm15
+ movdqa %xmm10,32(%rax)
+ pcmpeqb %xmm2,%xmm14
+ psrlq $4,%xmm2
+ movdqa %xmm11,48(%rax)
+ pcmpeqb %xmm3,%xmm15
+ psrlq $4,%xmm3
+ movdqu (%rcx),%xmm6
+
+ pxor %xmm5,%xmm13
+ pxor %xmm5,%xmm14
+ movdqa %xmm12,64(%rax)
+ movdqa %xmm13,80(%rax)
+ movdqa %xmm14,96(%rax)
+ movdqa %xmm15,112(%rax)
+ leaq 128(%rax),%rax
+ decl %r10d
+ jnz .Lkey_loop
+
+ movdqa 80(%r11),%xmm7
+
+ retq
+.size _bsaes_key_convert,.-_bsaes_key_convert
+
+.globl bsaes_cbc_encrypt
+.type bsaes_cbc_encrypt,@function
+.align 16
+bsaes_cbc_encrypt:
+ cmpl $0,%r9d
+ jne asm_AES_cbc_encrypt
+ cmpq $128,%rdx
+ jb asm_AES_cbc_encrypt
+
+ movq %rsp,%rax
+.Lcbc_dec_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movl 240(%rcx),%eax
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ movq %r8,%rbx
+ shrq $4,%r14
+
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor (%rsp),%xmm7
+ movdqa %xmm6,(%rax)
+ movdqa %xmm7,(%rsp)
+
+ movdqu (%rbx),%xmm14
+ subq $8,%r14
+.Lcbc_dec_loop:
+ movdqu 0(%r12),%xmm15
+ movdqu 16(%r12),%xmm0
+ movdqu 32(%r12),%xmm1
+ movdqu 48(%r12),%xmm2
+ movdqu 64(%r12),%xmm3
+ movdqu 80(%r12),%xmm4
+ movq %rsp,%rax
+ movdqu 96(%r12),%xmm5
+ movl %edx,%r10d
+ movdqu 112(%r12),%xmm6
+ movdqa %xmm14,32(%rbp)
+
+ call _bsaes_decrypt8
+
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm6
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm2
+ movdqu 112(%r12),%xmm14
+ pxor %xmm13,%xmm4
+ movdqu %xmm15,0(%r13)
+ leaq 128(%r12),%r12
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+ subq $8,%r14
+ jnc .Lcbc_dec_loop
+
+ addq $8,%r14
+ jz .Lcbc_dec_done
+
+ movdqu 0(%r12),%xmm15
+ movq %rsp,%rax
+ movl %edx,%r10d
+ cmpq $2,%r14
+ jb .Lcbc_dec_one
+ movdqu 16(%r12),%xmm0
+ je .Lcbc_dec_two
+ movdqu 32(%r12),%xmm1
+ cmpq $4,%r14
+ jb .Lcbc_dec_three
+ movdqu 48(%r12),%xmm2
+ je .Lcbc_dec_four
+ movdqu 64(%r12),%xmm3
+ cmpq $6,%r14
+ jb .Lcbc_dec_five
+ movdqu 80(%r12),%xmm4
+ je .Lcbc_dec_six
+ movdqu 96(%r12),%xmm5
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm6
+ movdqu 96(%r12),%xmm14
+ pxor %xmm12,%xmm2
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ jmp .Lcbc_dec_done
+.align 16
+.Lcbc_dec_six:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm14
+ pxor %xmm11,%xmm6
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ jmp .Lcbc_dec_done
+.align 16
+.Lcbc_dec_five:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm14
+ pxor %xmm10,%xmm1
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ jmp .Lcbc_dec_done
+.align 16
+.Lcbc_dec_four:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm14
+ pxor %xmm9,%xmm3
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ jmp .Lcbc_dec_done
+.align 16
+.Lcbc_dec_three:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm14
+ pxor %xmm8,%xmm5
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ jmp .Lcbc_dec_done
+.align 16
+.Lcbc_dec_two:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm14
+ pxor %xmm7,%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ jmp .Lcbc_dec_done
+.align 16
+.Lcbc_dec_one:
+ leaq (%r12),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r15),%rdx
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm14
+ movdqu %xmm14,(%r13)
+ movdqa %xmm15,%xmm14
+
+.Lcbc_dec_done:
+ movdqu %xmm14,(%rbx)
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+.Lcbc_dec_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lcbc_dec_bzero
+
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+.Lcbc_dec_epilogue:
+ retq
+.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
+
+.globl bsaes_ctr32_encrypt_blocks
+.type bsaes_ctr32_encrypt_blocks,@function
+.align 16
+bsaes_ctr32_encrypt_blocks:
+ movq %rsp,%rax
+.Lctr_enc_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movdqu (%r8),%xmm0
+ movl 240(%rcx),%eax
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ movdqa %xmm0,32(%rbp)
+ cmpq $8,%rdx
+ jb .Lctr_enc_short
+
+ movl %eax,%ebx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %ebx,%r10d
+ call _bsaes_key_convert
+ pxor %xmm6,%xmm7
+ movdqa %xmm7,(%rax)
+
+ movdqa (%rsp),%xmm8
+ leaq .LADD1(%rip),%r11
+ movdqa 32(%rbp),%xmm15
+ movdqa -32(%r11),%xmm7
+.byte 102,68,15,56,0,199
+.byte 102,68,15,56,0,255
+ movdqa %xmm8,(%rsp)
+ jmp .Lctr_enc_loop
+.align 16
+.Lctr_enc_loop:
+ movdqa %xmm15,32(%rbp)
+ movdqa %xmm15,%xmm0
+ movdqa %xmm15,%xmm1
+ paddd 0(%r11),%xmm0
+ movdqa %xmm15,%xmm2
+ paddd 16(%r11),%xmm1
+ movdqa %xmm15,%xmm3
+ paddd 32(%r11),%xmm2
+ movdqa %xmm15,%xmm4
+ paddd 48(%r11),%xmm3
+ movdqa %xmm15,%xmm5
+ paddd 64(%r11),%xmm4
+ movdqa %xmm15,%xmm6
+ paddd 80(%r11),%xmm5
+ paddd 96(%r11),%xmm6
+
+
+
+ movdqa (%rsp),%xmm8
+ leaq 16(%rsp),%rax
+ movdqa -16(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+ leaq .LBS0(%rip),%r11
+.byte 102,15,56,0,247
+ movl %ebx,%r10d
+
+ call _bsaes_encrypt8_bitslice
+
+ subq $8,%r14
+ jc .Lctr_enc_loop_done
+
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ movdqu 32(%r12),%xmm9
+ movdqu 48(%r12),%xmm10
+ movdqu 64(%r12),%xmm11
+ movdqu 80(%r12),%xmm12
+ movdqu 96(%r12),%xmm13
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ pxor %xmm15,%xmm7
+ movdqa 32(%rbp),%xmm15
+ pxor %xmm8,%xmm0
+ movdqu %xmm7,0(%r13)
+ pxor %xmm9,%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor %xmm10,%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor %xmm11,%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor %xmm12,%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor %xmm13,%xmm1
+ movdqu %xmm6,80(%r13)
+ pxor %xmm14,%xmm4
+ movdqu %xmm1,96(%r13)
+ leaq .LADD1(%rip),%r11
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+ paddd 112(%r11),%xmm15
+ jnz .Lctr_enc_loop
+
+ jmp .Lctr_enc_done
+.align 16
+.Lctr_enc_loop_done:
+ addq $8,%r14
+ movdqu 0(%r12),%xmm7
+ pxor %xmm7,%xmm15
+ movdqu %xmm15,0(%r13)
+ cmpq $2,%r14
+ jb .Lctr_enc_done
+ movdqu 16(%r12),%xmm8
+ pxor %xmm8,%xmm0
+ movdqu %xmm0,16(%r13)
+ je .Lctr_enc_done
+ movdqu 32(%r12),%xmm9
+ pxor %xmm9,%xmm3
+ movdqu %xmm3,32(%r13)
+ cmpq $4,%r14
+ jb .Lctr_enc_done
+ movdqu 48(%r12),%xmm10
+ pxor %xmm10,%xmm5
+ movdqu %xmm5,48(%r13)
+ je .Lctr_enc_done
+ movdqu 64(%r12),%xmm11
+ pxor %xmm11,%xmm2
+ movdqu %xmm2,64(%r13)
+ cmpq $6,%r14
+ jb .Lctr_enc_done
+ movdqu 80(%r12),%xmm12
+ pxor %xmm12,%xmm6
+ movdqu %xmm6,80(%r13)
+ je .Lctr_enc_done
+ movdqu 96(%r12),%xmm13
+ pxor %xmm13,%xmm1
+ movdqu %xmm1,96(%r13)
+ jmp .Lctr_enc_done
+
+.align 16
+.Lctr_enc_short:
+ leaq 32(%rbp),%rdi
+ leaq 48(%rbp),%rsi
+ leaq (%r15),%rdx
+ call asm_AES_encrypt
+ movdqu (%r12),%xmm0
+ leaq 16(%r12),%r12
+ movl 44(%rbp),%eax
+ bswapl %eax
+ pxor 48(%rbp),%xmm0
+ incl %eax
+ movdqu %xmm0,(%r13)
+ bswapl %eax
+ leaq 16(%r13),%r13
+ movl %eax,44(%rsp)
+ decq %r14
+ jnz .Lctr_enc_short
+
+.Lctr_enc_done:
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+.Lctr_enc_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lctr_enc_bzero
+
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+.Lctr_enc_epilogue:
+ retq
+.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
+.globl bsaes_xts_encrypt
+.type bsaes_xts_encrypt,@function
+.align 16
+bsaes_xts_encrypt:
+ movq %rsp,%rax
+.Lxts_enc_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+
+ leaq (%r9),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r8),%rdx
+ call asm_AES_encrypt
+
+ movl 240(%r15),%eax
+ movq %r14,%rbx
+
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor %xmm6,%xmm7
+ movdqa %xmm7,(%rax)
+
+ andq $-16,%r14
+ subq $128,%rsp
+ movdqa 32(%rbp),%xmm6
+
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+
+ subq $128,%r14
+ jc .Lxts_enc_short
+ jmp .Lxts_enc_loop
+
+.align 16
+.Lxts_enc_loop:
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ movdqa %xmm6,112(%rsp)
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ pxor %xmm14,%xmm6
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor 96(%rsp),%xmm1
+ movdqu %xmm6,80(%r13)
+ pxor 112(%rsp),%xmm4
+ movdqu %xmm1,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+
+ subq $128,%r14
+ jnc .Lxts_enc_loop
+
+.Lxts_enc_short:
+ addq $128,%r14
+ jz .Lxts_enc_done
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ cmpq $16,%r14
+ je .Lxts_enc_1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ cmpq $32,%r14
+ je .Lxts_enc_2
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ cmpq $48,%r14
+ je .Lxts_enc_3
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ cmpq $64,%r14
+ je .Lxts_enc_4
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ cmpq $80,%r14
+ je .Lxts_enc_5
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ cmpq $96,%r14
+ je .Lxts_enc_6
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqa %xmm6,112(%rsp)
+ leaq 112(%r12),%r12
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor 96(%rsp),%xmm1
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm1,96(%r13)
+ leaq 112(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.align 16
+.Lxts_enc_6:
+ pxor %xmm11,%xmm3
+ leaq 96(%r12),%r12
+ pxor %xmm12,%xmm4
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ movdqu %xmm6,80(%r13)
+ leaq 96(%r13),%r13
+
+ movdqa 96(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.align 16
+.Lxts_enc_5:
+ pxor %xmm10,%xmm2
+ leaq 80(%r12),%r12
+ pxor %xmm11,%xmm3
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ movdqu %xmm2,64(%r13)
+ leaq 80(%r13),%r13
+
+ movdqa 80(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.align 16
+.Lxts_enc_4:
+ pxor %xmm9,%xmm1
+ leaq 64(%r12),%r12
+ pxor %xmm10,%xmm2
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ movdqu %xmm5,48(%r13)
+ leaq 64(%r13),%r13
+
+ movdqa 64(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.align 16
+.Lxts_enc_3:
+ pxor %xmm8,%xmm0
+ leaq 48(%r12),%r12
+ pxor %xmm9,%xmm1
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm3,32(%r13)
+ leaq 48(%r13),%r13
+
+ movdqa 48(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.align 16
+.Lxts_enc_2:
+ pxor %xmm7,%xmm15
+ leaq 32(%r12),%r12
+ pxor %xmm8,%xmm0
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ leaq 32(%r13),%r13
+
+ movdqa 32(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.align 16
+.Lxts_enc_1:
+ pxor %xmm15,%xmm7
+ leaq 16(%r12),%r12
+ movdqa %xmm7,32(%rbp)
+ leaq 32(%rbp),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r15),%rdx
+ call asm_AES_encrypt
+ pxor 32(%rbp),%xmm15
+
+
+
+
+
+ movdqu %xmm15,0(%r13)
+ leaq 16(%r13),%r13
+
+ movdqa 16(%rsp),%xmm6
+
+.Lxts_enc_done:
+ andl $15,%ebx
+ jz .Lxts_enc_ret
+ movq %r13,%rdx
+
+.Lxts_enc_steal:
+ movzbl (%r12),%eax
+ movzbl -16(%rdx),%ecx
+ leaq 1(%r12),%r12
+ movb %al,-16(%rdx)
+ movb %cl,0(%rdx)
+ leaq 1(%rdx),%rdx
+ subl $1,%ebx
+ jnz .Lxts_enc_steal
+
+ movdqu -16(%r13),%xmm15
+ leaq 32(%rbp),%rdi
+ pxor %xmm6,%xmm15
+ leaq 32(%rbp),%rsi
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%rdx
+ call asm_AES_encrypt
+ pxor 32(%rbp),%xmm6
+ movdqu %xmm6,-16(%r13)
+
+.Lxts_enc_ret:
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+.Lxts_enc_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lxts_enc_bzero
+
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+.Lxts_enc_epilogue:
+ retq
+.size bsaes_xts_encrypt,.-bsaes_xts_encrypt
+
+.globl bsaes_xts_decrypt
+.type bsaes_xts_decrypt,@function
+.align 16
+bsaes_xts_decrypt:
+ movq %rsp,%rax
+.Lxts_dec_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+
+ leaq (%r9),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r8),%rdx
+ call asm_AES_encrypt
+
+ movl 240(%r15),%eax
+ movq %r14,%rbx
+
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor (%rsp),%xmm7
+ movdqa %xmm6,(%rax)
+ movdqa %xmm7,(%rsp)
+
+ xorl %eax,%eax
+ andq $-16,%r14
+ testl $15,%ebx
+ setnz %al
+ shlq $4,%rax
+ subq %rax,%r14
+
+ subq $128,%rsp
+ movdqa 32(%rbp),%xmm6
+
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+
+ subq $128,%r14
+ jc .Lxts_dec_short
+ jmp .Lxts_dec_loop
+
+.align 16
+.Lxts_dec_loop:
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ movdqa %xmm6,112(%rsp)
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ pxor %xmm14,%xmm6
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ pxor 96(%rsp),%xmm2
+ movdqu %xmm6,80(%r13)
+ pxor 112(%rsp),%xmm4
+ movdqu %xmm2,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+
+ subq $128,%r14
+ jnc .Lxts_dec_loop
+
+.Lxts_dec_short:
+ addq $128,%r14
+ jz .Lxts_dec_done
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ cmpq $16,%r14
+ je .Lxts_dec_1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ cmpq $32,%r14
+ je .Lxts_dec_2
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ cmpq $48,%r14
+ je .Lxts_dec_3
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ cmpq $64,%r14
+ je .Lxts_dec_4
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ cmpq $80,%r14
+ je .Lxts_dec_5
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ cmpq $96,%r14
+ je .Lxts_dec_6
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqa %xmm6,112(%rsp)
+ leaq 112(%r12),%r12
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ pxor 96(%rsp),%xmm2
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ leaq 112(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.align 16
+.Lxts_dec_6:
+ pxor %xmm11,%xmm3
+ leaq 96(%r12),%r12
+ pxor %xmm12,%xmm4
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ leaq 96(%r13),%r13
+
+ movdqa 96(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.align 16
+.Lxts_dec_5:
+ pxor %xmm10,%xmm2
+ leaq 80(%r12),%r12
+ pxor %xmm11,%xmm3
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ leaq 80(%r13),%r13
+
+ movdqa 80(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.align 16
+.Lxts_dec_4:
+ pxor %xmm9,%xmm1
+ leaq 64(%r12),%r12
+ pxor %xmm10,%xmm2
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ leaq 64(%r13),%r13
+
+ movdqa 64(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.align 16
+.Lxts_dec_3:
+ pxor %xmm8,%xmm0
+ leaq 48(%r12),%r12
+ pxor %xmm9,%xmm1
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ leaq 48(%r13),%r13
+
+ movdqa 48(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.align 16
+.Lxts_dec_2:
+ pxor %xmm7,%xmm15
+ leaq 32(%r12),%r12
+ pxor %xmm8,%xmm0
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ leaq 32(%r13),%r13
+
+ movdqa 32(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.align 16
+.Lxts_dec_1:
+ pxor %xmm15,%xmm7
+ leaq 16(%r12),%r12
+ movdqa %xmm7,32(%rbp)
+ leaq 32(%rbp),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r15),%rdx
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm15
+
+
+
+
+
+ movdqu %xmm15,0(%r13)
+ leaq 16(%r13),%r13
+
+ movdqa 16(%rsp),%xmm6
+
+.Lxts_dec_done:
+ andl $15,%ebx
+ jz .Lxts_dec_ret
+
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ movdqa %xmm6,%xmm5
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ movdqu (%r12),%xmm15
+ pxor %xmm13,%xmm6
+
+ leaq 32(%rbp),%rdi
+ pxor %xmm6,%xmm15
+ leaq 32(%rbp),%rsi
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%rdx
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm6
+ movq %r13,%rdx
+ movdqu %xmm6,(%r13)
+
+.Lxts_dec_steal:
+ movzbl 16(%r12),%eax
+ movzbl (%rdx),%ecx
+ leaq 1(%r12),%r12
+ movb %al,(%rdx)
+ movb %cl,16(%rdx)
+ leaq 1(%rdx),%rdx
+ subl $1,%ebx
+ jnz .Lxts_dec_steal
+
+ movdqu (%r13),%xmm15
+ leaq 32(%rbp),%rdi
+ pxor %xmm5,%xmm15
+ leaq 32(%rbp),%rsi
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%rdx
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm5
+ movdqu %xmm5,(%r13)
+
+.Lxts_dec_ret:
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+.Lxts_dec_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lxts_dec_bzero
+
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+.Lxts_dec_epilogue:
+ retq
+.size bsaes_xts_decrypt,.-bsaes_xts_decrypt
+.type _bsaes_const,@object
+.align 64
+_bsaes_const:
+.LM0ISR:
+.quad 0x0a0e0206070b0f03, 0x0004080c0d010509
+.LISRM0:
+.quad 0x01040b0e0205080f, 0x0306090c00070a0d
+.LISR:
+.quad 0x0504070602010003, 0x0f0e0d0c080b0a09
+.LBS0:
+.quad 0x5555555555555555, 0x5555555555555555
+.LBS1:
+.quad 0x3333333333333333, 0x3333333333333333
+.LBS2:
+.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+.LSR:
+.quad 0x0504070600030201, 0x0f0e0d0c0a09080b
+.LSRM0:
+.quad 0x0304090e00050a0f, 0x01060b0c0207080d
+.LM0SR:
+.quad 0x0a0e02060f03070b, 0x0004080c05090d01
+.LSWPUP:
+.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908
+.LSWPUPM0SR:
+.quad 0x0a0d02060c03070b, 0x0004080f05090e01
+.LADD1:
+.quad 0x0000000000000000, 0x0000000100000000
+.LADD2:
+.quad 0x0000000000000000, 0x0000000200000000
+.LADD3:
+.quad 0x0000000000000000, 0x0000000300000000
+.LADD4:
+.quad 0x0000000000000000, 0x0000000400000000
+.LADD5:
+.quad 0x0000000000000000, 0x0000000500000000
+.LADD6:
+.quad 0x0000000000000000, 0x0000000600000000
+.LADD7:
+.quad 0x0000000000000000, 0x0000000700000000
+.LADD8:
+.quad 0x0000000000000000, 0x0000000800000000
+.Lxts_magic:
+.long 0x87,0,1,0
+.Lmasks:
+.quad 0x0101010101010101, 0x0101010101010101
+.quad 0x0202020202020202, 0x0202020202020202
+.quad 0x0404040404040404, 0x0404040404040404
+.quad 0x0808080808080808, 0x0808080808080808
+.LM0:
+.quad 0x02060a0e03070b0f, 0x0004080c0105090d
+.L63:
+.quad 0x6363636363636363, 0x6363636363636363
+.byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44,32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32,65,110,100,121,32,80,111,108,121,97,107,111,118,0
+.align 64
+.size _bsaes_const,.-_bsaes_const
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/ext/libressl/crypto/aes/bsaes-macosx-x86_64.S b/ext/libressl/crypto/aes/bsaes-macosx-x86_64.S
new file mode 100644
index 0000000..5f780f0
--- /dev/null
+++ b/ext/libressl/crypto/aes/bsaes-macosx-x86_64.S
@@ -0,0 +1,2499 @@
+#include "x86_arch.h"
+.text
+
+
+
+
+
+.p2align 6
+_bsaes_encrypt8:
+ leaq L$BS0(%rip),%r11
+
+ movdqa (%rax),%xmm8
+ leaq 16(%rax),%rax
+ movdqa 80(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+.byte 102,15,56,0,247
+_bsaes_encrypt8_bitslice:
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $1,%xmm3
+ pxor %xmm6,%xmm5
+ pxor %xmm4,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm6
+ psllq $1,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $1,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm15
+ pxor %xmm1,%xmm2
+ psllq $1,%xmm1
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm4,%xmm9
+ psrlq $2,%xmm4
+ movdqa %xmm3,%xmm10
+ psrlq $2,%xmm3
+ pxor %xmm6,%xmm4
+ pxor %xmm5,%xmm3
+ pand %xmm8,%xmm4
+ pand %xmm8,%xmm3
+ pxor %xmm4,%xmm6
+ psllq $2,%xmm4
+ pxor %xmm3,%xmm5
+ psllq $2,%xmm3
+ pxor %xmm9,%xmm4
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm2,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm2
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm2,%xmm9
+ psrlq $4,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $4,%xmm1
+ pxor %xmm6,%xmm2
+ pxor %xmm5,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $4,%xmm2
+ pxor %xmm1,%xmm5
+ psllq $4,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm4
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ decl %r10d
+ jmp L$enc_sbox
+.p2align 4
+L$enc_loop:
+ pxor 0(%rax),%xmm15
+ pxor 16(%rax),%xmm0
+.byte 102,68,15,56,0,255
+ pxor 32(%rax),%xmm1
+.byte 102,15,56,0,199
+ pxor 48(%rax),%xmm2
+.byte 102,15,56,0,207
+ pxor 64(%rax),%xmm3
+.byte 102,15,56,0,215
+ pxor 80(%rax),%xmm4
+.byte 102,15,56,0,223
+ pxor 96(%rax),%xmm5
+.byte 102,15,56,0,231
+ pxor 112(%rax),%xmm6
+.byte 102,15,56,0,239
+ leaq 128(%rax),%rax
+.byte 102,15,56,0,247
+L$enc_sbox:
+ pxor %xmm5,%xmm4
+ pxor %xmm0,%xmm1
+ pxor %xmm15,%xmm2
+ pxor %xmm1,%xmm5
+ pxor %xmm15,%xmm4
+
+ pxor %xmm2,%xmm5
+ pxor %xmm6,%xmm2
+ pxor %xmm4,%xmm6
+ pxor %xmm3,%xmm2
+ pxor %xmm4,%xmm3
+ pxor %xmm0,%xmm2
+
+ pxor %xmm6,%xmm1
+ pxor %xmm4,%xmm0
+ movdqa %xmm6,%xmm10
+ movdqa %xmm0,%xmm9
+ movdqa %xmm4,%xmm8
+ movdqa %xmm1,%xmm12
+ movdqa %xmm5,%xmm11
+
+ pxor %xmm3,%xmm10
+ pxor %xmm1,%xmm9
+ pxor %xmm2,%xmm8
+ movdqa %xmm10,%xmm13
+ pxor %xmm3,%xmm12
+ movdqa %xmm9,%xmm7
+ pxor %xmm15,%xmm11
+ movdqa %xmm10,%xmm14
+
+ por %xmm8,%xmm9
+ por %xmm11,%xmm10
+ pxor %xmm7,%xmm14
+ pand %xmm11,%xmm13
+ pxor %xmm8,%xmm11
+ pand %xmm8,%xmm7
+ pand %xmm11,%xmm14
+ movdqa %xmm2,%xmm11
+ pxor %xmm15,%xmm11
+ pand %xmm11,%xmm12
+ pxor %xmm12,%xmm10
+ pxor %xmm12,%xmm9
+ movdqa %xmm6,%xmm12
+ movdqa %xmm4,%xmm11
+ pxor %xmm0,%xmm12
+ pxor %xmm5,%xmm11
+ movdqa %xmm12,%xmm8
+ pand %xmm11,%xmm12
+ por %xmm11,%xmm8
+ pxor %xmm12,%xmm7
+ pxor %xmm14,%xmm10
+ pxor %xmm13,%xmm9
+ pxor %xmm14,%xmm8
+ movdqa %xmm1,%xmm11
+ pxor %xmm13,%xmm7
+ movdqa %xmm3,%xmm12
+ pxor %xmm13,%xmm8
+ movdqa %xmm0,%xmm13
+ pand %xmm2,%xmm11
+ movdqa %xmm6,%xmm14
+ pand %xmm15,%xmm12
+ pand %xmm4,%xmm13
+ por %xmm5,%xmm14
+ pxor %xmm11,%xmm10
+ pxor %xmm12,%xmm9
+ pxor %xmm13,%xmm8
+ pxor %xmm14,%xmm7
+
+
+
+
+
+ movdqa %xmm10,%xmm11
+ pand %xmm8,%xmm10
+ pxor %xmm9,%xmm11
+
+ movdqa %xmm7,%xmm13
+ movdqa %xmm11,%xmm14
+ pxor %xmm10,%xmm13
+ pand %xmm13,%xmm14
+
+ movdqa %xmm8,%xmm12
+ pxor %xmm9,%xmm14
+ pxor %xmm7,%xmm12
+
+ pxor %xmm9,%xmm10
+
+ pand %xmm10,%xmm12
+
+ movdqa %xmm13,%xmm9
+ pxor %xmm7,%xmm12
+
+ pxor %xmm12,%xmm9
+ pxor %xmm12,%xmm8
+
+ pand %xmm7,%xmm9
+
+ pxor %xmm9,%xmm13
+ pxor %xmm9,%xmm8
+
+ pand %xmm14,%xmm13
+
+ pxor %xmm11,%xmm13
+ movdqa %xmm5,%xmm11
+ movdqa %xmm4,%xmm7
+ movdqa %xmm14,%xmm9
+ pxor %xmm13,%xmm9
+ pand %xmm5,%xmm9
+ pxor %xmm4,%xmm5
+ pand %xmm14,%xmm4
+ pand %xmm13,%xmm5
+ pxor %xmm4,%xmm5
+ pxor %xmm9,%xmm4
+ pxor %xmm15,%xmm11
+ pxor %xmm2,%xmm7
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm15,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm2,%xmm15
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm2
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm15
+ pxor %xmm11,%xmm7
+ pxor %xmm2,%xmm15
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm2
+ pxor %xmm11,%xmm5
+ pxor %xmm11,%xmm15
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm2
+
+ movdqa %xmm6,%xmm11
+ movdqa %xmm0,%xmm7
+ pxor %xmm3,%xmm11
+ pxor %xmm1,%xmm7
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm3,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm1,%xmm3
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm1
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm3
+ pxor %xmm11,%xmm7
+ pxor %xmm1,%xmm3
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm1
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ pxor %xmm13,%xmm10
+ pand %xmm6,%xmm10
+ pxor %xmm0,%xmm6
+ pand %xmm14,%xmm0
+ pand %xmm13,%xmm6
+ pxor %xmm0,%xmm6
+ pxor %xmm10,%xmm0
+ pxor %xmm11,%xmm6
+ pxor %xmm11,%xmm3
+ pxor %xmm7,%xmm0
+ pxor %xmm7,%xmm1
+ pxor %xmm15,%xmm6
+ pxor %xmm5,%xmm0
+ pxor %xmm6,%xmm3
+ pxor %xmm15,%xmm5
+ pxor %xmm0,%xmm15
+
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ pxor %xmm2,%xmm1
+ pxor %xmm4,%xmm2
+ pxor %xmm4,%xmm3
+
+ pxor %xmm2,%xmm5
+ decl %r10d
+ jl L$enc_done
+ pshufd $147,%xmm15,%xmm7
+ pshufd $147,%xmm0,%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $147,%xmm3,%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $147,%xmm5,%xmm10
+ pxor %xmm9,%xmm3
+ pshufd $147,%xmm2,%xmm11
+ pxor %xmm10,%xmm5
+ pshufd $147,%xmm6,%xmm12
+ pxor %xmm11,%xmm2
+ pshufd $147,%xmm1,%xmm13
+ pxor %xmm12,%xmm6
+ pshufd $147,%xmm4,%xmm14
+ pxor %xmm13,%xmm1
+ pxor %xmm14,%xmm4
+
+ pxor %xmm15,%xmm8
+ pxor %xmm4,%xmm7
+ pxor %xmm4,%xmm8
+ pshufd $78,%xmm15,%xmm15
+ pxor %xmm0,%xmm9
+ pshufd $78,%xmm0,%xmm0
+ pxor %xmm2,%xmm12
+ pxor %xmm7,%xmm15
+ pxor %xmm6,%xmm13
+ pxor %xmm8,%xmm0
+ pxor %xmm5,%xmm11
+ pshufd $78,%xmm2,%xmm7
+ pxor %xmm1,%xmm14
+ pshufd $78,%xmm6,%xmm8
+ pxor %xmm3,%xmm10
+ pshufd $78,%xmm5,%xmm2
+ pxor %xmm4,%xmm10
+ pshufd $78,%xmm4,%xmm6
+ pxor %xmm4,%xmm11
+ pshufd $78,%xmm1,%xmm5
+ pxor %xmm11,%xmm7
+ pshufd $78,%xmm3,%xmm1
+ pxor %xmm12,%xmm8
+ pxor %xmm10,%xmm2
+ pxor %xmm14,%xmm6
+ pxor %xmm13,%xmm5
+ movdqa %xmm7,%xmm3
+ pxor %xmm9,%xmm1
+ movdqa %xmm8,%xmm4
+ movdqa 48(%r11),%xmm7
+ jnz L$enc_loop
+ movdqa 64(%r11),%xmm7
+ jmp L$enc_loop
+.p2align 4
+L$enc_done:
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm2,%xmm10
+ psrlq $1,%xmm2
+ pxor %xmm4,%xmm1
+ pxor %xmm6,%xmm2
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm2
+ pxor %xmm1,%xmm4
+ psllq $1,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $1,%xmm2
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm2
+ movdqa %xmm3,%xmm9
+ psrlq $1,%xmm3
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm5,%xmm3
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm3
+ pand %xmm7,%xmm15
+ pxor %xmm3,%xmm5
+ psllq $1,%xmm3
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm3
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm6,%xmm9
+ psrlq $2,%xmm6
+ movdqa %xmm2,%xmm10
+ psrlq $2,%xmm2
+ pxor %xmm4,%xmm6
+ pxor %xmm1,%xmm2
+ pand %xmm8,%xmm6
+ pand %xmm8,%xmm2
+ pxor %xmm6,%xmm4
+ psllq $2,%xmm6
+ pxor %xmm2,%xmm1
+ psllq $2,%xmm2
+ pxor %xmm9,%xmm6
+ pxor %xmm10,%xmm2
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm5,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm5
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm5,%xmm9
+ psrlq $4,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $4,%xmm3
+ pxor %xmm4,%xmm5
+ pxor %xmm1,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm4
+ psllq $4,%xmm5
+ pxor %xmm3,%xmm1
+ psllq $4,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm6,%xmm0
+ pxor %xmm2,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm6
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm2
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa (%rax),%xmm7
+ pxor %xmm7,%xmm3
+ pxor %xmm7,%xmm5
+ pxor %xmm7,%xmm2
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm1
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm15
+ pxor %xmm7,%xmm0
+ retq
+
+
+
+.p2align 6
+_bsaes_decrypt8:
+ leaq L$BS0(%rip),%r11
+
+ movdqa (%rax),%xmm8
+ leaq 16(%rax),%rax
+ movdqa -48(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+.byte 102,15,56,0,247
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $1,%xmm3
+ pxor %xmm6,%xmm5
+ pxor %xmm4,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm6
+ psllq $1,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $1,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm15
+ pxor %xmm1,%xmm2
+ psllq $1,%xmm1
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm4,%xmm9
+ psrlq $2,%xmm4
+ movdqa %xmm3,%xmm10
+ psrlq $2,%xmm3
+ pxor %xmm6,%xmm4
+ pxor %xmm5,%xmm3
+ pand %xmm8,%xmm4
+ pand %xmm8,%xmm3
+ pxor %xmm4,%xmm6
+ psllq $2,%xmm4
+ pxor %xmm3,%xmm5
+ psllq $2,%xmm3
+ pxor %xmm9,%xmm4
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm2,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm2
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm2,%xmm9
+ psrlq $4,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $4,%xmm1
+ pxor %xmm6,%xmm2
+ pxor %xmm5,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $4,%xmm2
+ pxor %xmm1,%xmm5
+ psllq $4,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm4
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ decl %r10d
+ jmp L$dec_sbox
+.p2align 4
+L$dec_loop:
+ pxor 0(%rax),%xmm15
+ pxor 16(%rax),%xmm0
+.byte 102,68,15,56,0,255
+ pxor 32(%rax),%xmm1
+.byte 102,15,56,0,199
+ pxor 48(%rax),%xmm2
+.byte 102,15,56,0,207
+ pxor 64(%rax),%xmm3
+.byte 102,15,56,0,215
+ pxor 80(%rax),%xmm4
+.byte 102,15,56,0,223
+ pxor 96(%rax),%xmm5
+.byte 102,15,56,0,231
+ pxor 112(%rax),%xmm6
+.byte 102,15,56,0,239
+ leaq 128(%rax),%rax
+.byte 102,15,56,0,247
+L$dec_sbox:
+ pxor %xmm3,%xmm2
+
+ pxor %xmm6,%xmm3
+ pxor %xmm6,%xmm1
+ pxor %xmm3,%xmm5
+ pxor %xmm5,%xmm6
+ pxor %xmm6,%xmm0
+
+ pxor %xmm0,%xmm15
+ pxor %xmm4,%xmm1
+ pxor %xmm15,%xmm2
+ pxor %xmm15,%xmm4
+ pxor %xmm2,%xmm0
+ movdqa %xmm2,%xmm10
+ movdqa %xmm6,%xmm9
+ movdqa %xmm0,%xmm8
+ movdqa %xmm3,%xmm12
+ movdqa %xmm4,%xmm11
+
+ pxor %xmm15,%xmm10
+ pxor %xmm3,%xmm9
+ pxor %xmm5,%xmm8
+ movdqa %xmm10,%xmm13
+ pxor %xmm15,%xmm12
+ movdqa %xmm9,%xmm7
+ pxor %xmm1,%xmm11
+ movdqa %xmm10,%xmm14
+
+ por %xmm8,%xmm9
+ por %xmm11,%xmm10
+ pxor %xmm7,%xmm14
+ pand %xmm11,%xmm13
+ pxor %xmm8,%xmm11
+ pand %xmm8,%xmm7
+ pand %xmm11,%xmm14
+ movdqa %xmm5,%xmm11
+ pxor %xmm1,%xmm11
+ pand %xmm11,%xmm12
+ pxor %xmm12,%xmm10
+ pxor %xmm12,%xmm9
+ movdqa %xmm2,%xmm12
+ movdqa %xmm0,%xmm11
+ pxor %xmm6,%xmm12
+ pxor %xmm4,%xmm11
+ movdqa %xmm12,%xmm8
+ pand %xmm11,%xmm12
+ por %xmm11,%xmm8
+ pxor %xmm12,%xmm7
+ pxor %xmm14,%xmm10
+ pxor %xmm13,%xmm9
+ pxor %xmm14,%xmm8
+ movdqa %xmm3,%xmm11
+ pxor %xmm13,%xmm7
+ movdqa %xmm15,%xmm12
+ pxor %xmm13,%xmm8
+ movdqa %xmm6,%xmm13
+ pand %xmm5,%xmm11
+ movdqa %xmm2,%xmm14
+ pand %xmm1,%xmm12
+ pand %xmm0,%xmm13
+ por %xmm4,%xmm14
+ pxor %xmm11,%xmm10
+ pxor %xmm12,%xmm9
+ pxor %xmm13,%xmm8
+ pxor %xmm14,%xmm7
+
+
+
+
+
+ movdqa %xmm10,%xmm11
+ pand %xmm8,%xmm10
+ pxor %xmm9,%xmm11
+
+ movdqa %xmm7,%xmm13
+ movdqa %xmm11,%xmm14
+ pxor %xmm10,%xmm13
+ pand %xmm13,%xmm14
+
+ movdqa %xmm8,%xmm12
+ pxor %xmm9,%xmm14
+ pxor %xmm7,%xmm12
+
+ pxor %xmm9,%xmm10
+
+ pand %xmm10,%xmm12
+
+ movdqa %xmm13,%xmm9
+ pxor %xmm7,%xmm12
+
+ pxor %xmm12,%xmm9
+ pxor %xmm12,%xmm8
+
+ pand %xmm7,%xmm9
+
+ pxor %xmm9,%xmm13
+ pxor %xmm9,%xmm8
+
+ pand %xmm14,%xmm13
+
+ pxor %xmm11,%xmm13
+ movdqa %xmm4,%xmm11
+ movdqa %xmm0,%xmm7
+ movdqa %xmm14,%xmm9
+ pxor %xmm13,%xmm9
+ pand %xmm4,%xmm9
+ pxor %xmm0,%xmm4
+ pand %xmm14,%xmm0
+ pand %xmm13,%xmm4
+ pxor %xmm0,%xmm4
+ pxor %xmm9,%xmm0
+ pxor %xmm1,%xmm11
+ pxor %xmm5,%xmm7
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm1,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm5,%xmm1
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm5
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm1
+ pxor %xmm11,%xmm7
+ pxor %xmm5,%xmm1
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm5
+ pxor %xmm11,%xmm4
+ pxor %xmm11,%xmm1
+ pxor %xmm7,%xmm0
+ pxor %xmm7,%xmm5
+
+ movdqa %xmm2,%xmm11
+ movdqa %xmm6,%xmm7
+ pxor %xmm15,%xmm11
+ pxor %xmm3,%xmm7
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm15,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm3,%xmm15
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm3
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm15
+ pxor %xmm11,%xmm7
+ pxor %xmm3,%xmm15
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm3
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ pxor %xmm13,%xmm10
+ pand %xmm2,%xmm10
+ pxor %xmm6,%xmm2
+ pand %xmm14,%xmm6
+ pand %xmm13,%xmm2
+ pxor %xmm6,%xmm2
+ pxor %xmm10,%xmm6
+ pxor %xmm11,%xmm2
+ pxor %xmm11,%xmm15
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm3
+ pxor %xmm6,%xmm0
+ pxor %xmm4,%xmm5
+
+ pxor %xmm0,%xmm3
+ pxor %xmm6,%xmm1
+ pxor %xmm6,%xmm4
+ pxor %xmm1,%xmm3
+ pxor %xmm15,%xmm6
+ pxor %xmm4,%xmm3
+ pxor %xmm5,%xmm2
+ pxor %xmm0,%xmm5
+ pxor %xmm3,%xmm2
+
+ pxor %xmm15,%xmm3
+ pxor %xmm2,%xmm6
+ decl %r10d
+ jl L$dec_done
+
+ pshufd $78,%xmm15,%xmm7
+ pshufd $78,%xmm2,%xmm13
+ pxor %xmm15,%xmm7
+ pshufd $78,%xmm4,%xmm14
+ pxor %xmm2,%xmm13
+ pshufd $78,%xmm0,%xmm8
+ pxor %xmm4,%xmm14
+ pshufd $78,%xmm5,%xmm9
+ pxor %xmm0,%xmm8
+ pshufd $78,%xmm3,%xmm10
+ pxor %xmm5,%xmm9
+ pxor %xmm13,%xmm15
+ pxor %xmm13,%xmm0
+ pshufd $78,%xmm1,%xmm11
+ pxor %xmm3,%xmm10
+ pxor %xmm7,%xmm5
+ pxor %xmm8,%xmm3
+ pshufd $78,%xmm6,%xmm12
+ pxor %xmm1,%xmm11
+ pxor %xmm14,%xmm0
+ pxor %xmm9,%xmm1
+ pxor %xmm6,%xmm12
+
+ pxor %xmm14,%xmm5
+ pxor %xmm13,%xmm3
+ pxor %xmm13,%xmm1
+ pxor %xmm10,%xmm6
+ pxor %xmm11,%xmm2
+ pxor %xmm14,%xmm1
+ pxor %xmm14,%xmm6
+ pxor %xmm12,%xmm4
+ pshufd $147,%xmm15,%xmm7
+ pshufd $147,%xmm0,%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $147,%xmm5,%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $147,%xmm3,%xmm10
+ pxor %xmm9,%xmm5
+ pshufd $147,%xmm1,%xmm11
+ pxor %xmm10,%xmm3
+ pshufd $147,%xmm6,%xmm12
+ pxor %xmm11,%xmm1
+ pshufd $147,%xmm2,%xmm13
+ pxor %xmm12,%xmm6
+ pshufd $147,%xmm4,%xmm14
+ pxor %xmm13,%xmm2
+ pxor %xmm14,%xmm4
+
+ pxor %xmm15,%xmm8
+ pxor %xmm4,%xmm7
+ pxor %xmm4,%xmm8
+ pshufd $78,%xmm15,%xmm15
+ pxor %xmm0,%xmm9
+ pshufd $78,%xmm0,%xmm0
+ pxor %xmm1,%xmm12
+ pxor %xmm7,%xmm15
+ pxor %xmm6,%xmm13
+ pxor %xmm8,%xmm0
+ pxor %xmm3,%xmm11
+ pshufd $78,%xmm1,%xmm7
+ pxor %xmm2,%xmm14
+ pshufd $78,%xmm6,%xmm8
+ pxor %xmm5,%xmm10
+ pshufd $78,%xmm3,%xmm1
+ pxor %xmm4,%xmm10
+ pshufd $78,%xmm4,%xmm6
+ pxor %xmm4,%xmm11
+ pshufd $78,%xmm2,%xmm3
+ pxor %xmm11,%xmm7
+ pshufd $78,%xmm5,%xmm2
+ pxor %xmm12,%xmm8
+ pxor %xmm1,%xmm10
+ pxor %xmm14,%xmm6
+ pxor %xmm3,%xmm13
+ movdqa %xmm7,%xmm3
+ pxor %xmm9,%xmm2
+ movdqa %xmm13,%xmm5
+ movdqa %xmm8,%xmm4
+ movdqa %xmm2,%xmm1
+ movdqa %xmm10,%xmm2
+ movdqa -16(%r11),%xmm7
+ jnz L$dec_loop
+ movdqa -32(%r11),%xmm7
+ jmp L$dec_loop
+.p2align 4
+L$dec_done:
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm2,%xmm9
+ psrlq $1,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $1,%xmm1
+ pxor %xmm4,%xmm2
+ pxor %xmm6,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm4
+ psllq $1,%xmm2
+ pxor %xmm1,%xmm6
+ psllq $1,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm3,%xmm5
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm15
+ pxor %xmm5,%xmm3
+ psllq $1,%xmm5
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm6,%xmm9
+ psrlq $2,%xmm6
+ movdqa %xmm1,%xmm10
+ psrlq $2,%xmm1
+ pxor %xmm4,%xmm6
+ pxor %xmm2,%xmm1
+ pand %xmm8,%xmm6
+ pand %xmm8,%xmm1
+ pxor %xmm6,%xmm4
+ psllq $2,%xmm6
+ pxor %xmm1,%xmm2
+ psllq $2,%xmm1
+ pxor %xmm9,%xmm6
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm3,%xmm0
+ pxor %xmm5,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm3
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm5
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm3,%xmm9
+ psrlq $4,%xmm3
+ movdqa %xmm5,%xmm10
+ psrlq $4,%xmm5
+ pxor %xmm4,%xmm3
+ pxor %xmm2,%xmm5
+ pand %xmm7,%xmm3
+ pand %xmm7,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $4,%xmm3
+ pxor %xmm5,%xmm2
+ psllq $4,%xmm5
+ pxor %xmm9,%xmm3
+ pxor %xmm10,%xmm5
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm6,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm6
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa (%rax),%xmm7
+ pxor %xmm7,%xmm5
+ pxor %xmm7,%xmm3
+ pxor %xmm7,%xmm1
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm2
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm15
+ pxor %xmm7,%xmm0
+ retq
+
+
+.p2align 4
+_bsaes_key_convert:
+ leaq L$masks(%rip),%r11
+ movdqu (%rcx),%xmm7
+ leaq 16(%rcx),%rcx
+ movdqa 0(%r11),%xmm0
+ movdqa 16(%r11),%xmm1
+ movdqa 32(%r11),%xmm2
+ movdqa 48(%r11),%xmm3
+ movdqa 64(%r11),%xmm4
+ pcmpeqd %xmm5,%xmm5
+
+ movdqu (%rcx),%xmm6
+ movdqa %xmm7,(%rax)
+ leaq 16(%rax),%rax
+ decl %r10d
+ jmp L$key_loop
+.p2align 4
+L$key_loop:
+.byte 102,15,56,0,244
+
+ movdqa %xmm0,%xmm8
+ movdqa %xmm1,%xmm9
+
+ pand %xmm6,%xmm8
+ pand %xmm6,%xmm9
+ movdqa %xmm2,%xmm10
+ pcmpeqb %xmm0,%xmm8
+ psllq $4,%xmm0
+ movdqa %xmm3,%xmm11
+ pcmpeqb %xmm1,%xmm9
+ psllq $4,%xmm1
+
+ pand %xmm6,%xmm10
+ pand %xmm6,%xmm11
+ movdqa %xmm0,%xmm12
+ pcmpeqb %xmm2,%xmm10
+ psllq $4,%xmm2
+ movdqa %xmm1,%xmm13
+ pcmpeqb %xmm3,%xmm11
+ psllq $4,%xmm3
+
+ movdqa %xmm2,%xmm14
+ movdqa %xmm3,%xmm15
+ pxor %xmm5,%xmm8
+ pxor %xmm5,%xmm9
+
+ pand %xmm6,%xmm12
+ pand %xmm6,%xmm13
+ movdqa %xmm8,0(%rax)
+ pcmpeqb %xmm0,%xmm12
+ psrlq $4,%xmm0
+ movdqa %xmm9,16(%rax)
+ pcmpeqb %xmm1,%xmm13
+ psrlq $4,%xmm1
+ leaq 16(%rcx),%rcx
+
+ pand %xmm6,%xmm14
+ pand %xmm6,%xmm15
+ movdqa %xmm10,32(%rax)
+ pcmpeqb %xmm2,%xmm14
+ psrlq $4,%xmm2
+ movdqa %xmm11,48(%rax)
+ pcmpeqb %xmm3,%xmm15
+ psrlq $4,%xmm3
+ movdqu (%rcx),%xmm6
+
+ pxor %xmm5,%xmm13
+ pxor %xmm5,%xmm14
+ movdqa %xmm12,64(%rax)
+ movdqa %xmm13,80(%rax)
+ movdqa %xmm14,96(%rax)
+ movdqa %xmm15,112(%rax)
+ leaq 128(%rax),%rax
+ decl %r10d
+ jnz L$key_loop
+
+ movdqa 80(%r11),%xmm7
+
+ retq
+
+
+.globl _bsaes_cbc_encrypt
+
+.p2align 4
+_bsaes_cbc_encrypt:
+ cmpl $0,%r9d
+ jne _asm_AES_cbc_encrypt
+ cmpq $128,%rdx
+ jb _asm_AES_cbc_encrypt
+
+ movq %rsp,%rax
+L$cbc_dec_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movl 240(%rcx),%eax
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ movq %r8,%rbx
+ shrq $4,%r14
+
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor (%rsp),%xmm7
+ movdqa %xmm6,(%rax)
+ movdqa %xmm7,(%rsp)
+
+ movdqu (%rbx),%xmm14
+ subq $8,%r14
+L$cbc_dec_loop:
+ movdqu 0(%r12),%xmm15
+ movdqu 16(%r12),%xmm0
+ movdqu 32(%r12),%xmm1
+ movdqu 48(%r12),%xmm2
+ movdqu 64(%r12),%xmm3
+ movdqu 80(%r12),%xmm4
+ movq %rsp,%rax
+ movdqu 96(%r12),%xmm5
+ movl %edx,%r10d
+ movdqu 112(%r12),%xmm6
+ movdqa %xmm14,32(%rbp)
+
+ call _bsaes_decrypt8
+
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm6
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm2
+ movdqu 112(%r12),%xmm14
+ pxor %xmm13,%xmm4
+ movdqu %xmm15,0(%r13)
+ leaq 128(%r12),%r12
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+ subq $8,%r14
+ jnc L$cbc_dec_loop
+
+ addq $8,%r14
+ jz L$cbc_dec_done
+
+ movdqu 0(%r12),%xmm15
+ movq %rsp,%rax
+ movl %edx,%r10d
+ cmpq $2,%r14
+ jb L$cbc_dec_one
+ movdqu 16(%r12),%xmm0
+ je L$cbc_dec_two
+ movdqu 32(%r12),%xmm1
+ cmpq $4,%r14
+ jb L$cbc_dec_three
+ movdqu 48(%r12),%xmm2
+ je L$cbc_dec_four
+ movdqu 64(%r12),%xmm3
+ cmpq $6,%r14
+ jb L$cbc_dec_five
+ movdqu 80(%r12),%xmm4
+ je L$cbc_dec_six
+ movdqu 96(%r12),%xmm5
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm6
+ movdqu 96(%r12),%xmm14
+ pxor %xmm12,%xmm2
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ jmp L$cbc_dec_done
+.p2align 4
+L$cbc_dec_six:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm14
+ pxor %xmm11,%xmm6
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ jmp L$cbc_dec_done
+.p2align 4
+L$cbc_dec_five:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm14
+ pxor %xmm10,%xmm1
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ jmp L$cbc_dec_done
+.p2align 4
+L$cbc_dec_four:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm14
+ pxor %xmm9,%xmm3
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ jmp L$cbc_dec_done
+.p2align 4
+L$cbc_dec_three:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm14
+ pxor %xmm8,%xmm5
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ jmp L$cbc_dec_done
+.p2align 4
+L$cbc_dec_two:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm14
+ pxor %xmm7,%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ jmp L$cbc_dec_done
+.p2align 4
+L$cbc_dec_one:
+ leaq (%r12),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r15),%rdx
+ call _asm_AES_decrypt
+ pxor 32(%rbp),%xmm14
+ movdqu %xmm14,(%r13)
+ movdqa %xmm15,%xmm14
+
+L$cbc_dec_done:
+ movdqu %xmm14,(%rbx)
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+L$cbc_dec_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja L$cbc_dec_bzero
+
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+L$cbc_dec_epilogue:
+ retq
+
+
+.globl _bsaes_ctr32_encrypt_blocks
+
+.p2align 4
+_bsaes_ctr32_encrypt_blocks:
+ movq %rsp,%rax
+L$ctr_enc_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movdqu (%r8),%xmm0
+ movl 240(%rcx),%eax
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ movdqa %xmm0,32(%rbp)
+ cmpq $8,%rdx
+ jb L$ctr_enc_short
+
+ movl %eax,%ebx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %ebx,%r10d
+ call _bsaes_key_convert
+ pxor %xmm6,%xmm7
+ movdqa %xmm7,(%rax)
+
+ movdqa (%rsp),%xmm8
+ leaq L$ADD1(%rip),%r11
+ movdqa 32(%rbp),%xmm15
+ movdqa -32(%r11),%xmm7
+.byte 102,68,15,56,0,199
+.byte 102,68,15,56,0,255
+ movdqa %xmm8,(%rsp)
+ jmp L$ctr_enc_loop
+.p2align 4
+L$ctr_enc_loop:
+ movdqa %xmm15,32(%rbp)
+ movdqa %xmm15,%xmm0
+ movdqa %xmm15,%xmm1
+ paddd 0(%r11),%xmm0
+ movdqa %xmm15,%xmm2
+ paddd 16(%r11),%xmm1
+ movdqa %xmm15,%xmm3
+ paddd 32(%r11),%xmm2
+ movdqa %xmm15,%xmm4
+ paddd 48(%r11),%xmm3
+ movdqa %xmm15,%xmm5
+ paddd 64(%r11),%xmm4
+ movdqa %xmm15,%xmm6
+ paddd 80(%r11),%xmm5
+ paddd 96(%r11),%xmm6
+
+
+
+ movdqa (%rsp),%xmm8
+ leaq 16(%rsp),%rax
+ movdqa -16(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+ leaq L$BS0(%rip),%r11
+.byte 102,15,56,0,247
+ movl %ebx,%r10d
+
+ call _bsaes_encrypt8_bitslice
+
+ subq $8,%r14
+ jc L$ctr_enc_loop_done
+
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ movdqu 32(%r12),%xmm9
+ movdqu 48(%r12),%xmm10
+ movdqu 64(%r12),%xmm11
+ movdqu 80(%r12),%xmm12
+ movdqu 96(%r12),%xmm13
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ pxor %xmm15,%xmm7
+ movdqa 32(%rbp),%xmm15
+ pxor %xmm8,%xmm0
+ movdqu %xmm7,0(%r13)
+ pxor %xmm9,%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor %xmm10,%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor %xmm11,%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor %xmm12,%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor %xmm13,%xmm1
+ movdqu %xmm6,80(%r13)
+ pxor %xmm14,%xmm4
+ movdqu %xmm1,96(%r13)
+ leaq L$ADD1(%rip),%r11
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+ paddd 112(%r11),%xmm15
+ jnz L$ctr_enc_loop
+
+ jmp L$ctr_enc_done
+.p2align 4
+L$ctr_enc_loop_done:
+ addq $8,%r14
+ movdqu 0(%r12),%xmm7
+ pxor %xmm7,%xmm15
+ movdqu %xmm15,0(%r13)
+ cmpq $2,%r14
+ jb L$ctr_enc_done
+ movdqu 16(%r12),%xmm8
+ pxor %xmm8,%xmm0
+ movdqu %xmm0,16(%r13)
+ je L$ctr_enc_done
+ movdqu 32(%r12),%xmm9
+ pxor %xmm9,%xmm3
+ movdqu %xmm3,32(%r13)
+ cmpq $4,%r14
+ jb L$ctr_enc_done
+ movdqu 48(%r12),%xmm10
+ pxor %xmm10,%xmm5
+ movdqu %xmm5,48(%r13)
+ je L$ctr_enc_done
+ movdqu 64(%r12),%xmm11
+ pxor %xmm11,%xmm2
+ movdqu %xmm2,64(%r13)
+ cmpq $6,%r14
+ jb L$ctr_enc_done
+ movdqu 80(%r12),%xmm12
+ pxor %xmm12,%xmm6
+ movdqu %xmm6,80(%r13)
+ je L$ctr_enc_done
+ movdqu 96(%r12),%xmm13
+ pxor %xmm13,%xmm1
+ movdqu %xmm1,96(%r13)
+ jmp L$ctr_enc_done
+
+.p2align 4
+L$ctr_enc_short:
+ leaq 32(%rbp),%rdi
+ leaq 48(%rbp),%rsi
+ leaq (%r15),%rdx
+ call _asm_AES_encrypt
+ movdqu (%r12),%xmm0
+ leaq 16(%r12),%r12
+ movl 44(%rbp),%eax
+ bswapl %eax
+ pxor 48(%rbp),%xmm0
+ incl %eax
+ movdqu %xmm0,(%r13)
+ bswapl %eax
+ leaq 16(%r13),%r13
+ movl %eax,44(%rsp)
+ decq %r14
+ jnz L$ctr_enc_short
+
+L$ctr_enc_done:
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+L$ctr_enc_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja L$ctr_enc_bzero
+
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+L$ctr_enc_epilogue:
+ retq
+
+.globl _bsaes_xts_encrypt
+
+.p2align 4
+_bsaes_xts_encrypt:
+ movq %rsp,%rax
+L$xts_enc_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+
+ leaq (%r9),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r8),%rdx
+ call _asm_AES_encrypt
+
+ movl 240(%r15),%eax
+ movq %r14,%rbx
+
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor %xmm6,%xmm7
+ movdqa %xmm7,(%rax)
+
+ andq $-16,%r14
+ subq $128,%rsp
+ movdqa 32(%rbp),%xmm6
+
+ pxor %xmm14,%xmm14
+ movdqa L$xts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+
+ subq $128,%r14
+ jc L$xts_enc_short
+ jmp L$xts_enc_loop
+
+.p2align 4
+L$xts_enc_loop:
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ movdqa %xmm6,112(%rsp)
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ pxor %xmm14,%xmm6
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor 96(%rsp),%xmm1
+ movdqu %xmm6,80(%r13)
+ pxor 112(%rsp),%xmm4
+ movdqu %xmm1,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ pxor %xmm14,%xmm14
+ movdqa L$xts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+
+ subq $128,%r14
+ jnc L$xts_enc_loop
+
+L$xts_enc_short:
+ addq $128,%r14
+ jz L$xts_enc_done
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ cmpq $16,%r14
+ je L$xts_enc_1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ cmpq $32,%r14
+ je L$xts_enc_2
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ cmpq $48,%r14
+ je L$xts_enc_3
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ cmpq $64,%r14
+ je L$xts_enc_4
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ cmpq $80,%r14
+ je L$xts_enc_5
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ cmpq $96,%r14
+ je L$xts_enc_6
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqa %xmm6,112(%rsp)
+ leaq 112(%r12),%r12
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor 96(%rsp),%xmm1
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm1,96(%r13)
+ leaq 112(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ jmp L$xts_enc_done
+.p2align 4
+L$xts_enc_6:
+ pxor %xmm11,%xmm3
+ leaq 96(%r12),%r12
+ pxor %xmm12,%xmm4
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ movdqu %xmm6,80(%r13)
+ leaq 96(%r13),%r13
+
+ movdqa 96(%rsp),%xmm6
+ jmp L$xts_enc_done
+.p2align 4
+L$xts_enc_5:
+ pxor %xmm10,%xmm2
+ leaq 80(%r12),%r12
+ pxor %xmm11,%xmm3
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ movdqu %xmm2,64(%r13)
+ leaq 80(%r13),%r13
+
+ movdqa 80(%rsp),%xmm6
+ jmp L$xts_enc_done
+.p2align 4
+L$xts_enc_4:
+ pxor %xmm9,%xmm1
+ leaq 64(%r12),%r12
+ pxor %xmm10,%xmm2
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ movdqu %xmm5,48(%r13)
+ leaq 64(%r13),%r13
+
+ movdqa 64(%rsp),%xmm6
+ jmp L$xts_enc_done
+.p2align 4
+L$xts_enc_3:
+ pxor %xmm8,%xmm0
+ leaq 48(%r12),%r12
+ pxor %xmm9,%xmm1
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm3,32(%r13)
+ leaq 48(%r13),%r13
+
+ movdqa 48(%rsp),%xmm6
+ jmp L$xts_enc_done
+.p2align 4
+L$xts_enc_2:
+ pxor %xmm7,%xmm15
+ leaq 32(%r12),%r12
+ pxor %xmm8,%xmm0
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ leaq 32(%r13),%r13
+
+ movdqa 32(%rsp),%xmm6
+ jmp L$xts_enc_done
+.p2align 4
+L$xts_enc_1:
+ pxor %xmm15,%xmm7
+ leaq 16(%r12),%r12
+ movdqa %xmm7,32(%rbp)
+ leaq 32(%rbp),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r15),%rdx
+ call _asm_AES_encrypt
+ pxor 32(%rbp),%xmm15
+
+
+
+
+
+ movdqu %xmm15,0(%r13)
+ leaq 16(%r13),%r13
+
+ movdqa 16(%rsp),%xmm6
+
+L$xts_enc_done:
+ andl $15,%ebx
+ jz L$xts_enc_ret
+ movq %r13,%rdx
+
+L$xts_enc_steal:
+ movzbl (%r12),%eax
+ movzbl -16(%rdx),%ecx
+ leaq 1(%r12),%r12
+ movb %al,-16(%rdx)
+ movb %cl,0(%rdx)
+ leaq 1(%rdx),%rdx
+ subl $1,%ebx
+ jnz L$xts_enc_steal
+
+ movdqu -16(%r13),%xmm15
+ leaq 32(%rbp),%rdi
+ pxor %xmm6,%xmm15
+ leaq 32(%rbp),%rsi
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%rdx
+ call _asm_AES_encrypt
+ pxor 32(%rbp),%xmm6
+ movdqu %xmm6,-16(%r13)
+
+L$xts_enc_ret:
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+L$xts_enc_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja L$xts_enc_bzero
+
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+L$xts_enc_epilogue:
+ retq
+
+
+.globl _bsaes_xts_decrypt
+
+.p2align 4
+_bsaes_xts_decrypt:
+ movq %rsp,%rax
+L$xts_dec_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+
+ leaq (%r9),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r8),%rdx
+ call _asm_AES_encrypt
+
+ movl 240(%r15),%eax
+ movq %r14,%rbx
+
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor (%rsp),%xmm7
+ movdqa %xmm6,(%rax)
+ movdqa %xmm7,(%rsp)
+
+ xorl %eax,%eax
+ andq $-16,%r14
+ testl $15,%ebx
+ setnz %al
+ shlq $4,%rax
+ subq %rax,%r14
+
+ subq $128,%rsp
+ movdqa 32(%rbp),%xmm6
+
+ pxor %xmm14,%xmm14
+ movdqa L$xts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+
+ subq $128,%r14
+ jc L$xts_dec_short
+ jmp L$xts_dec_loop
+
+.p2align 4
+L$xts_dec_loop:
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ movdqa %xmm6,112(%rsp)
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ pxor %xmm14,%xmm6
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ pxor 96(%rsp),%xmm2
+ movdqu %xmm6,80(%r13)
+ pxor 112(%rsp),%xmm4
+ movdqu %xmm2,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ pxor %xmm14,%xmm14
+ movdqa L$xts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+
+ subq $128,%r14
+ jnc L$xts_dec_loop
+
+L$xts_dec_short:
+ addq $128,%r14
+ jz L$xts_dec_done
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ cmpq $16,%r14
+ je L$xts_dec_1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ cmpq $32,%r14
+ je L$xts_dec_2
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ cmpq $48,%r14
+ je L$xts_dec_3
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ cmpq $64,%r14
+ je L$xts_dec_4
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ cmpq $80,%r14
+ je L$xts_dec_5
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ cmpq $96,%r14
+ je L$xts_dec_6
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqa %xmm6,112(%rsp)
+ leaq 112(%r12),%r12
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ pxor 96(%rsp),%xmm2
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ leaq 112(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ jmp L$xts_dec_done
+.p2align 4
+L$xts_dec_6:
+ pxor %xmm11,%xmm3
+ leaq 96(%r12),%r12
+ pxor %xmm12,%xmm4
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ leaq 96(%r13),%r13
+
+ movdqa 96(%rsp),%xmm6
+ jmp L$xts_dec_done
+.p2align 4
+L$xts_dec_5:
+ pxor %xmm10,%xmm2
+ leaq 80(%r12),%r12
+ pxor %xmm11,%xmm3
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ leaq 80(%r13),%r13
+
+ movdqa 80(%rsp),%xmm6
+ jmp L$xts_dec_done
+.p2align 4
+L$xts_dec_4:
+ pxor %xmm9,%xmm1
+ leaq 64(%r12),%r12
+ pxor %xmm10,%xmm2
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ leaq 64(%r13),%r13
+
+ movdqa 64(%rsp),%xmm6
+ jmp L$xts_dec_done
+.p2align 4
+L$xts_dec_3:
+ pxor %xmm8,%xmm0
+ leaq 48(%r12),%r12
+ pxor %xmm9,%xmm1
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ leaq 48(%r13),%r13
+
+ movdqa 48(%rsp),%xmm6
+ jmp L$xts_dec_done
+.p2align 4
+L$xts_dec_2:
+ pxor %xmm7,%xmm15
+ leaq 32(%r12),%r12
+ pxor %xmm8,%xmm0
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ leaq 32(%r13),%r13
+
+ movdqa 32(%rsp),%xmm6
+ jmp L$xts_dec_done
+.p2align 4
+L$xts_dec_1:
+ pxor %xmm15,%xmm7
+ leaq 16(%r12),%r12
+ movdqa %xmm7,32(%rbp)
+ leaq 32(%rbp),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r15),%rdx
+ call _asm_AES_decrypt
+ pxor 32(%rbp),%xmm15
+
+
+
+
+
+ movdqu %xmm15,0(%r13)
+ leaq 16(%r13),%r13
+
+ movdqa 16(%rsp),%xmm6
+
+L$xts_dec_done:
+ andl $15,%ebx
+ jz L$xts_dec_ret
+
+ pxor %xmm14,%xmm14
+ movdqa L$xts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ movdqa %xmm6,%xmm5
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ movdqu (%r12),%xmm15
+ pxor %xmm13,%xmm6
+
+ leaq 32(%rbp),%rdi
+ pxor %xmm6,%xmm15
+ leaq 32(%rbp),%rsi
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%rdx
+ call _asm_AES_decrypt
+ pxor 32(%rbp),%xmm6
+ movq %r13,%rdx
+ movdqu %xmm6,(%r13)
+
+L$xts_dec_steal:
+ movzbl 16(%r12),%eax
+ movzbl (%rdx),%ecx
+ leaq 1(%r12),%r12
+ movb %al,(%rdx)
+ movb %cl,16(%rdx)
+ leaq 1(%rdx),%rdx
+ subl $1,%ebx
+ jnz L$xts_dec_steal
+
+ movdqu (%r13),%xmm15
+ leaq 32(%rbp),%rdi
+ pxor %xmm5,%xmm15
+ leaq 32(%rbp),%rsi
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%rdx
+ call _asm_AES_decrypt
+ pxor 32(%rbp),%xmm5
+ movdqu %xmm5,(%r13)
+
+L$xts_dec_ret:
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+L$xts_dec_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja L$xts_dec_bzero
+
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+L$xts_dec_epilogue:
+ retq
+
+
+.p2align 6
+_bsaes_const:
+L$M0ISR:
+.quad 0x0a0e0206070b0f03, 0x0004080c0d010509
+L$ISRM0:
+.quad 0x01040b0e0205080f, 0x0306090c00070a0d
+L$ISR:
+.quad 0x0504070602010003, 0x0f0e0d0c080b0a09
+L$BS0:
+.quad 0x5555555555555555, 0x5555555555555555
+L$BS1:
+.quad 0x3333333333333333, 0x3333333333333333
+L$BS2:
+.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+L$SR:
+.quad 0x0504070600030201, 0x0f0e0d0c0a09080b
+L$SRM0:
+.quad 0x0304090e00050a0f, 0x01060b0c0207080d
+L$M0SR:
+.quad 0x0a0e02060f03070b, 0x0004080c05090d01
+L$SWPUP:
+.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908
+L$SWPUPM0SR:
+.quad 0x0a0d02060c03070b, 0x0004080f05090e01
+L$ADD1:
+.quad 0x0000000000000000, 0x0000000100000000
+L$ADD2:
+.quad 0x0000000000000000, 0x0000000200000000
+L$ADD3:
+.quad 0x0000000000000000, 0x0000000300000000
+L$ADD4:
+.quad 0x0000000000000000, 0x0000000400000000
+L$ADD5:
+.quad 0x0000000000000000, 0x0000000500000000
+L$ADD6:
+.quad 0x0000000000000000, 0x0000000600000000
+L$ADD7:
+.quad 0x0000000000000000, 0x0000000700000000
+L$ADD8:
+.quad 0x0000000000000000, 0x0000000800000000
+L$xts_magic:
+.long 0x87,0,1,0
+L$masks:
+.quad 0x0101010101010101, 0x0101010101010101
+.quad 0x0202020202020202, 0x0202020202020202
+.quad 0x0404040404040404, 0x0404040404040404
+.quad 0x0808080808080808, 0x0808080808080808
+L$M0:
+.quad 0x02060a0e03070b0f, 0x0004080c0105090d
+L$63:
+.quad 0x6363636363636363, 0x6363636363636363
+.byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44,32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32,65,110,100,121,32,80,111,108,121,97,107,111,118,0
+.p2align 6
+
diff --git a/ext/libressl/crypto/aes/bsaes-masm-x86_64.S b/ext/libressl/crypto/aes/bsaes-masm-x86_64.S
new file mode 100644
index 0000000..6b1a97d
--- /dev/null
+++ b/ext/libressl/crypto/aes/bsaes-masm-x86_64.S
@@ -0,0 +1,2803 @@
+; 1 "crypto/aes/bsaes-masm-x86_64.S.tmp"
+; 1 "<built-in>" 1
+; 1 "<built-in>" 3
+; 340 "<built-in>" 3
+; 1 "<command line>" 1
+; 1 "<built-in>" 2
+; 1 "crypto/aes/bsaes-masm-x86_64.S.tmp" 2
+OPTION DOTNAME
+
+; 1 "./crypto/x86_arch.h" 1
+
+
+; 16 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+; 40 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+; 3 "crypto/aes/bsaes-masm-x86_64.S.tmp" 2
+.text$ SEGMENT ALIGN(64) 'CODE'
+
+EXTERN asm_AES_encrypt:NEAR
+EXTERN asm_AES_decrypt:NEAR
+
+
+ALIGN 64
+_bsaes_encrypt8 PROC PRIVATE
+ lea r11,QWORD PTR[$L$BS0]
+
+ movdqa xmm8,XMMWORD PTR[rax]
+ lea rax,QWORD PTR[16+rax]
+ movdqa xmm7,XMMWORD PTR[80+r11]
+ pxor xmm15,xmm8
+ pxor xmm0,xmm8
+DB 102,68,15,56,0,255
+ pxor xmm1,xmm8
+DB 102,15,56,0,199
+ pxor xmm2,xmm8
+DB 102,15,56,0,207
+ pxor xmm3,xmm8
+DB 102,15,56,0,215
+ pxor xmm4,xmm8
+DB 102,15,56,0,223
+ pxor xmm5,xmm8
+DB 102,15,56,0,231
+ pxor xmm6,xmm8
+DB 102,15,56,0,239
+DB 102,15,56,0,247
+_bsaes_encrypt8_bitslice::
+ movdqa xmm7,XMMWORD PTR[r11]
+ movdqa xmm8,XMMWORD PTR[16+r11]
+ movdqa xmm9,xmm5
+ psrlq xmm5,1
+ movdqa xmm10,xmm3
+ psrlq xmm3,1
+ pxor xmm5,xmm6
+ pxor xmm3,xmm4
+ pand xmm5,xmm7
+ pand xmm3,xmm7
+ pxor xmm6,xmm5
+ psllq xmm5,1
+ pxor xmm4,xmm3
+ psllq xmm3,1
+ pxor xmm5,xmm9
+ pxor xmm3,xmm10
+ movdqa xmm9,xmm1
+ psrlq xmm1,1
+ movdqa xmm10,xmm15
+ psrlq xmm15,1
+ pxor xmm1,xmm2
+ pxor xmm15,xmm0
+ pand xmm1,xmm7
+ pand xmm15,xmm7
+ pxor xmm2,xmm1
+ psllq xmm1,1
+ pxor xmm0,xmm15
+ psllq xmm15,1
+ pxor xmm1,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm7,XMMWORD PTR[32+r11]
+ movdqa xmm9,xmm4
+ psrlq xmm4,2
+ movdqa xmm10,xmm3
+ psrlq xmm3,2
+ pxor xmm4,xmm6
+ pxor xmm3,xmm5
+ pand xmm4,xmm8
+ pand xmm3,xmm8
+ pxor xmm6,xmm4
+ psllq xmm4,2
+ pxor xmm5,xmm3
+ psllq xmm3,2
+ pxor xmm4,xmm9
+ pxor xmm3,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,2
+ movdqa xmm10,xmm15
+ psrlq xmm15,2
+ pxor xmm0,xmm2
+ pxor xmm15,xmm1
+ pand xmm0,xmm8
+ pand xmm15,xmm8
+ pxor xmm2,xmm0
+ psllq xmm0,2
+ pxor xmm1,xmm15
+ psllq xmm15,2
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm9,xmm2
+ psrlq xmm2,4
+ movdqa xmm10,xmm1
+ psrlq xmm1,4
+ pxor xmm2,xmm6
+ pxor xmm1,xmm5
+ pand xmm2,xmm7
+ pand xmm1,xmm7
+ pxor xmm6,xmm2
+ psllq xmm2,4
+ pxor xmm5,xmm1
+ psllq xmm1,4
+ pxor xmm2,xmm9
+ pxor xmm1,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,4
+ movdqa xmm10,xmm15
+ psrlq xmm15,4
+ pxor xmm0,xmm4
+ pxor xmm15,xmm3
+ pand xmm0,xmm7
+ pand xmm15,xmm7
+ pxor xmm4,xmm0
+ psllq xmm0,4
+ pxor xmm3,xmm15
+ psllq xmm15,4
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ dec r10d
+ jmp $L$enc_sbox
+ALIGN 16
+$L$enc_loop::
+ pxor xmm15,XMMWORD PTR[rax]
+ pxor xmm0,XMMWORD PTR[16+rax]
+DB 102,68,15,56,0,255
+ pxor xmm1,XMMWORD PTR[32+rax]
+DB 102,15,56,0,199
+ pxor xmm2,XMMWORD PTR[48+rax]
+DB 102,15,56,0,207
+ pxor xmm3,XMMWORD PTR[64+rax]
+DB 102,15,56,0,215
+ pxor xmm4,XMMWORD PTR[80+rax]
+DB 102,15,56,0,223
+ pxor xmm5,XMMWORD PTR[96+rax]
+DB 102,15,56,0,231
+ pxor xmm6,XMMWORD PTR[112+rax]
+DB 102,15,56,0,239
+ lea rax,QWORD PTR[128+rax]
+DB 102,15,56,0,247
+$L$enc_sbox::
+ pxor xmm4,xmm5
+ pxor xmm1,xmm0
+ pxor xmm2,xmm15
+ pxor xmm5,xmm1
+ pxor xmm4,xmm15
+
+ pxor xmm5,xmm2
+ pxor xmm2,xmm6
+ pxor xmm6,xmm4
+ pxor xmm2,xmm3
+ pxor xmm3,xmm4
+ pxor xmm2,xmm0
+
+ pxor xmm1,xmm6
+ pxor xmm0,xmm4
+ movdqa xmm10,xmm6
+ movdqa xmm9,xmm0
+ movdqa xmm8,xmm4
+ movdqa xmm12,xmm1
+ movdqa xmm11,xmm5
+
+ pxor xmm10,xmm3
+ pxor xmm9,xmm1
+ pxor xmm8,xmm2
+ movdqa xmm13,xmm10
+ pxor xmm12,xmm3
+ movdqa xmm7,xmm9
+ pxor xmm11,xmm15
+ movdqa xmm14,xmm10
+
+ por xmm9,xmm8
+ por xmm10,xmm11
+ pxor xmm14,xmm7
+ pand xmm13,xmm11
+ pxor xmm11,xmm8
+ pand xmm7,xmm8
+ pand xmm14,xmm11
+ movdqa xmm11,xmm2
+ pxor xmm11,xmm15
+ pand xmm12,xmm11
+ pxor xmm10,xmm12
+ pxor xmm9,xmm12
+ movdqa xmm12,xmm6
+ movdqa xmm11,xmm4
+ pxor xmm12,xmm0
+ pxor xmm11,xmm5
+ movdqa xmm8,xmm12
+ pand xmm12,xmm11
+ por xmm8,xmm11
+ pxor xmm7,xmm12
+ pxor xmm10,xmm14
+ pxor xmm9,xmm13
+ pxor xmm8,xmm14
+ movdqa xmm11,xmm1
+ pxor xmm7,xmm13
+ movdqa xmm12,xmm3
+ pxor xmm8,xmm13
+ movdqa xmm13,xmm0
+ pand xmm11,xmm2
+ movdqa xmm14,xmm6
+ pand xmm12,xmm15
+ pand xmm13,xmm4
+ por xmm14,xmm5
+ pxor xmm10,xmm11
+ pxor xmm9,xmm12
+ pxor xmm8,xmm13
+ pxor xmm7,xmm14
+
+
+
+
+
+ movdqa xmm11,xmm10
+ pand xmm10,xmm8
+ pxor xmm11,xmm9
+
+ movdqa xmm13,xmm7
+ movdqa xmm14,xmm11
+ pxor xmm13,xmm10
+ pand xmm14,xmm13
+
+ movdqa xmm12,xmm8
+ pxor xmm14,xmm9
+ pxor xmm12,xmm7
+
+ pxor xmm10,xmm9
+
+ pand xmm12,xmm10
+
+ movdqa xmm9,xmm13
+ pxor xmm12,xmm7
+
+ pxor xmm9,xmm12
+ pxor xmm8,xmm12
+
+ pand xmm9,xmm7
+
+ pxor xmm13,xmm9
+ pxor xmm8,xmm9
+
+ pand xmm13,xmm14
+
+ pxor xmm13,xmm11
+ movdqa xmm11,xmm5
+ movdqa xmm7,xmm4
+ movdqa xmm9,xmm14
+ pxor xmm9,xmm13
+ pand xmm9,xmm5
+ pxor xmm5,xmm4
+ pand xmm4,xmm14
+ pand xmm5,xmm13
+ pxor xmm5,xmm4
+ pxor xmm4,xmm9
+ pxor xmm11,xmm15
+ pxor xmm7,xmm2
+ pxor xmm14,xmm12
+ pxor xmm13,xmm8
+ movdqa xmm10,xmm14
+ movdqa xmm9,xmm12
+ pxor xmm10,xmm13
+ pxor xmm9,xmm8
+ pand xmm10,xmm11
+ pand xmm9,xmm15
+ pxor xmm11,xmm7
+ pxor xmm15,xmm2
+ pand xmm7,xmm14
+ pand xmm2,xmm12
+ pand xmm11,xmm13
+ pand xmm15,xmm8
+ pxor xmm7,xmm11
+ pxor xmm15,xmm2
+ pxor xmm11,xmm10
+ pxor xmm2,xmm9
+ pxor xmm5,xmm11
+ pxor xmm15,xmm11
+ pxor xmm4,xmm7
+ pxor xmm2,xmm7
+
+ movdqa xmm11,xmm6
+ movdqa xmm7,xmm0
+ pxor xmm11,xmm3
+ pxor xmm7,xmm1
+ movdqa xmm10,xmm14
+ movdqa xmm9,xmm12
+ pxor xmm10,xmm13
+ pxor xmm9,xmm8
+ pand xmm10,xmm11
+ pand xmm9,xmm3
+ pxor xmm11,xmm7
+ pxor xmm3,xmm1
+ pand xmm7,xmm14
+ pand xmm1,xmm12
+ pand xmm11,xmm13
+ pand xmm3,xmm8
+ pxor xmm7,xmm11
+ pxor xmm3,xmm1
+ pxor xmm11,xmm10
+ pxor xmm1,xmm9
+ pxor xmm14,xmm12
+ pxor xmm13,xmm8
+ movdqa xmm10,xmm14
+ pxor xmm10,xmm13
+ pand xmm10,xmm6
+ pxor xmm6,xmm0
+ pand xmm0,xmm14
+ pand xmm6,xmm13
+ pxor xmm6,xmm0
+ pxor xmm0,xmm10
+ pxor xmm6,xmm11
+ pxor xmm3,xmm11
+ pxor xmm0,xmm7
+ pxor xmm1,xmm7
+ pxor xmm6,xmm15
+ pxor xmm0,xmm5
+ pxor xmm3,xmm6
+ pxor xmm5,xmm15
+ pxor xmm15,xmm0
+
+ pxor xmm0,xmm4
+ pxor xmm4,xmm1
+ pxor xmm1,xmm2
+ pxor xmm2,xmm4
+ pxor xmm3,xmm4
+
+ pxor xmm5,xmm2
+ dec r10d
+ jl $L$enc_done
+ pshufd xmm7,xmm15,093h
+ pshufd xmm8,xmm0,093h
+ pxor xmm15,xmm7
+ pshufd xmm9,xmm3,093h
+ pxor xmm0,xmm8
+ pshufd xmm10,xmm5,093h
+ pxor xmm3,xmm9
+ pshufd xmm11,xmm2,093h
+ pxor xmm5,xmm10
+ pshufd xmm12,xmm6,093h
+ pxor xmm2,xmm11
+ pshufd xmm13,xmm1,093h
+ pxor xmm6,xmm12
+ pshufd xmm14,xmm4,093h
+ pxor xmm1,xmm13
+ pxor xmm4,xmm14
+
+ pxor xmm8,xmm15
+ pxor xmm7,xmm4
+ pxor xmm8,xmm4
+ pshufd xmm15,xmm15,04Eh
+ pxor xmm9,xmm0
+ pshufd xmm0,xmm0,04Eh
+ pxor xmm12,xmm2
+ pxor xmm15,xmm7
+ pxor xmm13,xmm6
+ pxor xmm0,xmm8
+ pxor xmm11,xmm5
+ pshufd xmm7,xmm2,04Eh
+ pxor xmm14,xmm1
+ pshufd xmm8,xmm6,04Eh
+ pxor xmm10,xmm3
+ pshufd xmm2,xmm5,04Eh
+ pxor xmm10,xmm4
+ pshufd xmm6,xmm4,04Eh
+ pxor xmm11,xmm4
+ pshufd xmm5,xmm1,04Eh
+ pxor xmm7,xmm11
+ pshufd xmm1,xmm3,04Eh
+ pxor xmm8,xmm12
+ pxor xmm2,xmm10
+ pxor xmm6,xmm14
+ pxor xmm5,xmm13
+ movdqa xmm3,xmm7
+ pxor xmm1,xmm9
+ movdqa xmm4,xmm8
+ movdqa xmm7,XMMWORD PTR[48+r11]
+ jnz $L$enc_loop
+ movdqa xmm7,XMMWORD PTR[64+r11]
+ jmp $L$enc_loop
+ALIGN 16
+$L$enc_done::
+ movdqa xmm7,XMMWORD PTR[r11]
+ movdqa xmm8,XMMWORD PTR[16+r11]
+ movdqa xmm9,xmm1
+ psrlq xmm1,1
+ movdqa xmm10,xmm2
+ psrlq xmm2,1
+ pxor xmm1,xmm4
+ pxor xmm2,xmm6
+ pand xmm1,xmm7
+ pand xmm2,xmm7
+ pxor xmm4,xmm1
+ psllq xmm1,1
+ pxor xmm6,xmm2
+ psllq xmm2,1
+ pxor xmm1,xmm9
+ pxor xmm2,xmm10
+ movdqa xmm9,xmm3
+ psrlq xmm3,1
+ movdqa xmm10,xmm15
+ psrlq xmm15,1
+ pxor xmm3,xmm5
+ pxor xmm15,xmm0
+ pand xmm3,xmm7
+ pand xmm15,xmm7
+ pxor xmm5,xmm3
+ psllq xmm3,1
+ pxor xmm0,xmm15
+ psllq xmm15,1
+ pxor xmm3,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm7,XMMWORD PTR[32+r11]
+ movdqa xmm9,xmm6
+ psrlq xmm6,2
+ movdqa xmm10,xmm2
+ psrlq xmm2,2
+ pxor xmm6,xmm4
+ pxor xmm2,xmm1
+ pand xmm6,xmm8
+ pand xmm2,xmm8
+ pxor xmm4,xmm6
+ psllq xmm6,2
+ pxor xmm1,xmm2
+ psllq xmm2,2
+ pxor xmm6,xmm9
+ pxor xmm2,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,2
+ movdqa xmm10,xmm15
+ psrlq xmm15,2
+ pxor xmm0,xmm5
+ pxor xmm15,xmm3
+ pand xmm0,xmm8
+ pand xmm15,xmm8
+ pxor xmm5,xmm0
+ psllq xmm0,2
+ pxor xmm3,xmm15
+ psllq xmm15,2
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm9,xmm5
+ psrlq xmm5,4
+ movdqa xmm10,xmm3
+ psrlq xmm3,4
+ pxor xmm5,xmm4
+ pxor xmm3,xmm1
+ pand xmm5,xmm7
+ pand xmm3,xmm7
+ pxor xmm4,xmm5
+ psllq xmm5,4
+ pxor xmm1,xmm3
+ psllq xmm3,4
+ pxor xmm5,xmm9
+ pxor xmm3,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,4
+ movdqa xmm10,xmm15
+ psrlq xmm15,4
+ pxor xmm0,xmm6
+ pxor xmm15,xmm2
+ pand xmm0,xmm7
+ pand xmm15,xmm7
+ pxor xmm6,xmm0
+ psllq xmm0,4
+ pxor xmm2,xmm15
+ psllq xmm15,4
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm7,XMMWORD PTR[rax]
+ pxor xmm3,xmm7
+ pxor xmm5,xmm7
+ pxor xmm2,xmm7
+ pxor xmm6,xmm7
+ pxor xmm1,xmm7
+ pxor xmm4,xmm7
+ pxor xmm15,xmm7
+ pxor xmm0,xmm7
+ DB 0F3h,0C3h ;repret
+_bsaes_encrypt8 ENDP
+
+
+ALIGN 64
+_bsaes_decrypt8 PROC PRIVATE
+ lea r11,QWORD PTR[$L$BS0]
+
+ movdqa xmm8,XMMWORD PTR[rax]
+ lea rax,QWORD PTR[16+rax]
+ movdqa xmm7,XMMWORD PTR[((-48))+r11]
+ pxor xmm15,xmm8
+ pxor xmm0,xmm8
+DB 102,68,15,56,0,255
+ pxor xmm1,xmm8
+DB 102,15,56,0,199
+ pxor xmm2,xmm8
+DB 102,15,56,0,207
+ pxor xmm3,xmm8
+DB 102,15,56,0,215
+ pxor xmm4,xmm8
+DB 102,15,56,0,223
+ pxor xmm5,xmm8
+DB 102,15,56,0,231
+ pxor xmm6,xmm8
+DB 102,15,56,0,239
+DB 102,15,56,0,247
+ movdqa xmm7,XMMWORD PTR[r11]
+ movdqa xmm8,XMMWORD PTR[16+r11]
+ movdqa xmm9,xmm5
+ psrlq xmm5,1
+ movdqa xmm10,xmm3
+ psrlq xmm3,1
+ pxor xmm5,xmm6
+ pxor xmm3,xmm4
+ pand xmm5,xmm7
+ pand xmm3,xmm7
+ pxor xmm6,xmm5
+ psllq xmm5,1
+ pxor xmm4,xmm3
+ psllq xmm3,1
+ pxor xmm5,xmm9
+ pxor xmm3,xmm10
+ movdqa xmm9,xmm1
+ psrlq xmm1,1
+ movdqa xmm10,xmm15
+ psrlq xmm15,1
+ pxor xmm1,xmm2
+ pxor xmm15,xmm0
+ pand xmm1,xmm7
+ pand xmm15,xmm7
+ pxor xmm2,xmm1
+ psllq xmm1,1
+ pxor xmm0,xmm15
+ psllq xmm15,1
+ pxor xmm1,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm7,XMMWORD PTR[32+r11]
+ movdqa xmm9,xmm4
+ psrlq xmm4,2
+ movdqa xmm10,xmm3
+ psrlq xmm3,2
+ pxor xmm4,xmm6
+ pxor xmm3,xmm5
+ pand xmm4,xmm8
+ pand xmm3,xmm8
+ pxor xmm6,xmm4
+ psllq xmm4,2
+ pxor xmm5,xmm3
+ psllq xmm3,2
+ pxor xmm4,xmm9
+ pxor xmm3,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,2
+ movdqa xmm10,xmm15
+ psrlq xmm15,2
+ pxor xmm0,xmm2
+ pxor xmm15,xmm1
+ pand xmm0,xmm8
+ pand xmm15,xmm8
+ pxor xmm2,xmm0
+ psllq xmm0,2
+ pxor xmm1,xmm15
+ psllq xmm15,2
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm9,xmm2
+ psrlq xmm2,4
+ movdqa xmm10,xmm1
+ psrlq xmm1,4
+ pxor xmm2,xmm6
+ pxor xmm1,xmm5
+ pand xmm2,xmm7
+ pand xmm1,xmm7
+ pxor xmm6,xmm2
+ psllq xmm2,4
+ pxor xmm5,xmm1
+ psllq xmm1,4
+ pxor xmm2,xmm9
+ pxor xmm1,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,4
+ movdqa xmm10,xmm15
+ psrlq xmm15,4
+ pxor xmm0,xmm4
+ pxor xmm15,xmm3
+ pand xmm0,xmm7
+ pand xmm15,xmm7
+ pxor xmm4,xmm0
+ psllq xmm0,4
+ pxor xmm3,xmm15
+ psllq xmm15,4
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ dec r10d
+ jmp $L$dec_sbox
+ALIGN 16
+$L$dec_loop::
+ pxor xmm15,XMMWORD PTR[rax]
+ pxor xmm0,XMMWORD PTR[16+rax]
+DB 102,68,15,56,0,255
+ pxor xmm1,XMMWORD PTR[32+rax]
+DB 102,15,56,0,199
+ pxor xmm2,XMMWORD PTR[48+rax]
+DB 102,15,56,0,207
+ pxor xmm3,XMMWORD PTR[64+rax]
+DB 102,15,56,0,215
+ pxor xmm4,XMMWORD PTR[80+rax]
+DB 102,15,56,0,223
+ pxor xmm5,XMMWORD PTR[96+rax]
+DB 102,15,56,0,231
+ pxor xmm6,XMMWORD PTR[112+rax]
+DB 102,15,56,0,239
+ lea rax,QWORD PTR[128+rax]
+DB 102,15,56,0,247
+$L$dec_sbox::
+ pxor xmm2,xmm3
+
+ pxor xmm3,xmm6
+ pxor xmm1,xmm6
+ pxor xmm5,xmm3
+ pxor xmm6,xmm5
+ pxor xmm0,xmm6
+
+ pxor xmm15,xmm0
+ pxor xmm1,xmm4
+ pxor xmm2,xmm15
+ pxor xmm4,xmm15
+ pxor xmm0,xmm2
+ movdqa xmm10,xmm2
+ movdqa xmm9,xmm6
+ movdqa xmm8,xmm0
+ movdqa xmm12,xmm3
+ movdqa xmm11,xmm4
+
+ pxor xmm10,xmm15
+ pxor xmm9,xmm3
+ pxor xmm8,xmm5
+ movdqa xmm13,xmm10
+ pxor xmm12,xmm15
+ movdqa xmm7,xmm9
+ pxor xmm11,xmm1
+ movdqa xmm14,xmm10
+
+ por xmm9,xmm8
+ por xmm10,xmm11
+ pxor xmm14,xmm7
+ pand xmm13,xmm11
+ pxor xmm11,xmm8
+ pand xmm7,xmm8
+ pand xmm14,xmm11
+ movdqa xmm11,xmm5
+ pxor xmm11,xmm1
+ pand xmm12,xmm11
+ pxor xmm10,xmm12
+ pxor xmm9,xmm12
+ movdqa xmm12,xmm2
+ movdqa xmm11,xmm0
+ pxor xmm12,xmm6
+ pxor xmm11,xmm4
+ movdqa xmm8,xmm12
+ pand xmm12,xmm11
+ por xmm8,xmm11
+ pxor xmm7,xmm12
+ pxor xmm10,xmm14
+ pxor xmm9,xmm13
+ pxor xmm8,xmm14
+ movdqa xmm11,xmm3
+ pxor xmm7,xmm13
+ movdqa xmm12,xmm15
+ pxor xmm8,xmm13
+ movdqa xmm13,xmm6
+ pand xmm11,xmm5
+ movdqa xmm14,xmm2
+ pand xmm12,xmm1
+ pand xmm13,xmm0
+ por xmm14,xmm4
+ pxor xmm10,xmm11
+ pxor xmm9,xmm12
+ pxor xmm8,xmm13
+ pxor xmm7,xmm14
+
+
+
+
+
+ movdqa xmm11,xmm10
+ pand xmm10,xmm8
+ pxor xmm11,xmm9
+
+ movdqa xmm13,xmm7
+ movdqa xmm14,xmm11
+ pxor xmm13,xmm10
+ pand xmm14,xmm13
+
+ movdqa xmm12,xmm8
+ pxor xmm14,xmm9
+ pxor xmm12,xmm7
+
+ pxor xmm10,xmm9
+
+ pand xmm12,xmm10
+
+ movdqa xmm9,xmm13
+ pxor xmm12,xmm7
+
+ pxor xmm9,xmm12
+ pxor xmm8,xmm12
+
+ pand xmm9,xmm7
+
+ pxor xmm13,xmm9
+ pxor xmm8,xmm9
+
+ pand xmm13,xmm14
+
+ pxor xmm13,xmm11
+ movdqa xmm11,xmm4
+ movdqa xmm7,xmm0
+ movdqa xmm9,xmm14
+ pxor xmm9,xmm13
+ pand xmm9,xmm4
+ pxor xmm4,xmm0
+ pand xmm0,xmm14
+ pand xmm4,xmm13
+ pxor xmm4,xmm0
+ pxor xmm0,xmm9
+ pxor xmm11,xmm1
+ pxor xmm7,xmm5
+ pxor xmm14,xmm12
+ pxor xmm13,xmm8
+ movdqa xmm10,xmm14
+ movdqa xmm9,xmm12
+ pxor xmm10,xmm13
+ pxor xmm9,xmm8
+ pand xmm10,xmm11
+ pand xmm9,xmm1
+ pxor xmm11,xmm7
+ pxor xmm1,xmm5
+ pand xmm7,xmm14
+ pand xmm5,xmm12
+ pand xmm11,xmm13
+ pand xmm1,xmm8
+ pxor xmm7,xmm11
+ pxor xmm1,xmm5
+ pxor xmm11,xmm10
+ pxor xmm5,xmm9
+ pxor xmm4,xmm11
+ pxor xmm1,xmm11
+ pxor xmm0,xmm7
+ pxor xmm5,xmm7
+
+ movdqa xmm11,xmm2
+ movdqa xmm7,xmm6
+ pxor xmm11,xmm15
+ pxor xmm7,xmm3
+ movdqa xmm10,xmm14
+ movdqa xmm9,xmm12
+ pxor xmm10,xmm13
+ pxor xmm9,xmm8
+ pand xmm10,xmm11
+ pand xmm9,xmm15
+ pxor xmm11,xmm7
+ pxor xmm15,xmm3
+ pand xmm7,xmm14
+ pand xmm3,xmm12
+ pand xmm11,xmm13
+ pand xmm15,xmm8
+ pxor xmm7,xmm11
+ pxor xmm15,xmm3
+ pxor xmm11,xmm10
+ pxor xmm3,xmm9
+ pxor xmm14,xmm12
+ pxor xmm13,xmm8
+ movdqa xmm10,xmm14
+ pxor xmm10,xmm13
+ pand xmm10,xmm2
+ pxor xmm2,xmm6
+ pand xmm6,xmm14
+ pand xmm2,xmm13
+ pxor xmm2,xmm6
+ pxor xmm6,xmm10
+ pxor xmm2,xmm11
+ pxor xmm15,xmm11
+ pxor xmm6,xmm7
+ pxor xmm3,xmm7
+ pxor xmm0,xmm6
+ pxor xmm5,xmm4
+
+ pxor xmm3,xmm0
+ pxor xmm1,xmm6
+ pxor xmm4,xmm6
+ pxor xmm3,xmm1
+ pxor xmm6,xmm15
+ pxor xmm3,xmm4
+ pxor xmm2,xmm5
+ pxor xmm5,xmm0
+ pxor xmm2,xmm3
+
+ pxor xmm3,xmm15
+ pxor xmm6,xmm2
+ dec r10d
+ jl $L$dec_done
+
+ pshufd xmm7,xmm15,04Eh
+ pshufd xmm13,xmm2,04Eh
+ pxor xmm7,xmm15
+ pshufd xmm14,xmm4,04Eh
+ pxor xmm13,xmm2
+ pshufd xmm8,xmm0,04Eh
+ pxor xmm14,xmm4
+ pshufd xmm9,xmm5,04Eh
+ pxor xmm8,xmm0
+ pshufd xmm10,xmm3,04Eh
+ pxor xmm9,xmm5
+ pxor xmm15,xmm13
+ pxor xmm0,xmm13
+ pshufd xmm11,xmm1,04Eh
+ pxor xmm10,xmm3
+ pxor xmm5,xmm7
+ pxor xmm3,xmm8
+ pshufd xmm12,xmm6,04Eh
+ pxor xmm11,xmm1
+ pxor xmm0,xmm14
+ pxor xmm1,xmm9
+ pxor xmm12,xmm6
+
+ pxor xmm5,xmm14
+ pxor xmm3,xmm13
+ pxor xmm1,xmm13
+ pxor xmm6,xmm10
+ pxor xmm2,xmm11
+ pxor xmm1,xmm14
+ pxor xmm6,xmm14
+ pxor xmm4,xmm12
+ pshufd xmm7,xmm15,093h
+ pshufd xmm8,xmm0,093h
+ pxor xmm15,xmm7
+ pshufd xmm9,xmm5,093h
+ pxor xmm0,xmm8
+ pshufd xmm10,xmm3,093h
+ pxor xmm5,xmm9
+ pshufd xmm11,xmm1,093h
+ pxor xmm3,xmm10
+ pshufd xmm12,xmm6,093h
+ pxor xmm1,xmm11
+ pshufd xmm13,xmm2,093h
+ pxor xmm6,xmm12
+ pshufd xmm14,xmm4,093h
+ pxor xmm2,xmm13
+ pxor xmm4,xmm14
+
+ pxor xmm8,xmm15
+ pxor xmm7,xmm4
+ pxor xmm8,xmm4
+ pshufd xmm15,xmm15,04Eh
+ pxor xmm9,xmm0
+ pshufd xmm0,xmm0,04Eh
+ pxor xmm12,xmm1
+ pxor xmm15,xmm7
+ pxor xmm13,xmm6
+ pxor xmm0,xmm8
+ pxor xmm11,xmm3
+ pshufd xmm7,xmm1,04Eh
+ pxor xmm14,xmm2
+ pshufd xmm8,xmm6,04Eh
+ pxor xmm10,xmm5
+ pshufd xmm1,xmm3,04Eh
+ pxor xmm10,xmm4
+ pshufd xmm6,xmm4,04Eh
+ pxor xmm11,xmm4
+ pshufd xmm3,xmm2,04Eh
+ pxor xmm7,xmm11
+ pshufd xmm2,xmm5,04Eh
+ pxor xmm8,xmm12
+ pxor xmm10,xmm1
+ pxor xmm6,xmm14
+ pxor xmm13,xmm3
+ movdqa xmm3,xmm7
+ pxor xmm2,xmm9
+ movdqa xmm5,xmm13
+ movdqa xmm4,xmm8
+ movdqa xmm1,xmm2
+ movdqa xmm2,xmm10
+ movdqa xmm7,XMMWORD PTR[((-16))+r11]
+ jnz $L$dec_loop
+ movdqa xmm7,XMMWORD PTR[((-32))+r11]
+ jmp $L$dec_loop
+ALIGN 16
+$L$dec_done::
+ movdqa xmm7,XMMWORD PTR[r11]
+ movdqa xmm8,XMMWORD PTR[16+r11]
+ movdqa xmm9,xmm2
+ psrlq xmm2,1
+ movdqa xmm10,xmm1
+ psrlq xmm1,1
+ pxor xmm2,xmm4
+ pxor xmm1,xmm6
+ pand xmm2,xmm7
+ pand xmm1,xmm7
+ pxor xmm4,xmm2
+ psllq xmm2,1
+ pxor xmm6,xmm1
+ psllq xmm1,1
+ pxor xmm2,xmm9
+ pxor xmm1,xmm10
+ movdqa xmm9,xmm5
+ psrlq xmm5,1
+ movdqa xmm10,xmm15
+ psrlq xmm15,1
+ pxor xmm5,xmm3
+ pxor xmm15,xmm0
+ pand xmm5,xmm7
+ pand xmm15,xmm7
+ pxor xmm3,xmm5
+ psllq xmm5,1
+ pxor xmm0,xmm15
+ psllq xmm15,1
+ pxor xmm5,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm7,XMMWORD PTR[32+r11]
+ movdqa xmm9,xmm6
+ psrlq xmm6,2
+ movdqa xmm10,xmm1
+ psrlq xmm1,2
+ pxor xmm6,xmm4
+ pxor xmm1,xmm2
+ pand xmm6,xmm8
+ pand xmm1,xmm8
+ pxor xmm4,xmm6
+ psllq xmm6,2
+ pxor xmm2,xmm1
+ psllq xmm1,2
+ pxor xmm6,xmm9
+ pxor xmm1,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,2
+ movdqa xmm10,xmm15
+ psrlq xmm15,2
+ pxor xmm0,xmm3
+ pxor xmm15,xmm5
+ pand xmm0,xmm8
+ pand xmm15,xmm8
+ pxor xmm3,xmm0
+ psllq xmm0,2
+ pxor xmm5,xmm15
+ psllq xmm15,2
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm9,xmm3
+ psrlq xmm3,4
+ movdqa xmm10,xmm5
+ psrlq xmm5,4
+ pxor xmm3,xmm4
+ pxor xmm5,xmm2
+ pand xmm3,xmm7
+ pand xmm5,xmm7
+ pxor xmm4,xmm3
+ psllq xmm3,4
+ pxor xmm2,xmm5
+ psllq xmm5,4
+ pxor xmm3,xmm9
+ pxor xmm5,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,4
+ movdqa xmm10,xmm15
+ psrlq xmm15,4
+ pxor xmm0,xmm6
+ pxor xmm15,xmm1
+ pand xmm0,xmm7
+ pand xmm15,xmm7
+ pxor xmm6,xmm0
+ psllq xmm0,4
+ pxor xmm1,xmm15
+ psllq xmm15,4
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm7,XMMWORD PTR[rax]
+ pxor xmm5,xmm7
+ pxor xmm3,xmm7
+ pxor xmm1,xmm7
+ pxor xmm6,xmm7
+ pxor xmm2,xmm7
+ pxor xmm4,xmm7
+ pxor xmm15,xmm7
+ pxor xmm0,xmm7
+ DB 0F3h,0C3h ;repret
+_bsaes_decrypt8 ENDP
+
+ALIGN 16
+_bsaes_key_convert PROC PRIVATE
+ lea r11,QWORD PTR[$L$masks]
+ movdqu xmm7,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ movdqa xmm0,XMMWORD PTR[r11]
+ movdqa xmm1,XMMWORD PTR[16+r11]
+ movdqa xmm2,XMMWORD PTR[32+r11]
+ movdqa xmm3,XMMWORD PTR[48+r11]
+ movdqa xmm4,XMMWORD PTR[64+r11]
+ pcmpeqd xmm5,xmm5
+
+ movdqu xmm6,XMMWORD PTR[rcx]
+ movdqa XMMWORD PTR[rax],xmm7
+ lea rax,QWORD PTR[16+rax]
+ dec r10d
+ jmp $L$key_loop
+ALIGN 16
+$L$key_loop::
+DB 102,15,56,0,244
+
+ movdqa xmm8,xmm0
+ movdqa xmm9,xmm1
+
+ pand xmm8,xmm6
+ pand xmm9,xmm6
+ movdqa xmm10,xmm2
+ pcmpeqb xmm8,xmm0
+ psllq xmm0,4
+ movdqa xmm11,xmm3
+ pcmpeqb xmm9,xmm1
+ psllq xmm1,4
+
+ pand xmm10,xmm6
+ pand xmm11,xmm6
+ movdqa xmm12,xmm0
+ pcmpeqb xmm10,xmm2
+ psllq xmm2,4
+ movdqa xmm13,xmm1
+ pcmpeqb xmm11,xmm3
+ psllq xmm3,4
+
+ movdqa xmm14,xmm2
+ movdqa xmm15,xmm3
+ pxor xmm8,xmm5
+ pxor xmm9,xmm5
+
+ pand xmm12,xmm6
+ pand xmm13,xmm6
+ movdqa XMMWORD PTR[rax],xmm8
+ pcmpeqb xmm12,xmm0
+ psrlq xmm0,4
+ movdqa XMMWORD PTR[16+rax],xmm9
+ pcmpeqb xmm13,xmm1
+ psrlq xmm1,4
+ lea rcx,QWORD PTR[16+rcx]
+
+ pand xmm14,xmm6
+ pand xmm15,xmm6
+ movdqa XMMWORD PTR[32+rax],xmm10
+ pcmpeqb xmm14,xmm2
+ psrlq xmm2,4
+ movdqa XMMWORD PTR[48+rax],xmm11
+ pcmpeqb xmm15,xmm3
+ psrlq xmm3,4
+ movdqu xmm6,XMMWORD PTR[rcx]
+
+ pxor xmm13,xmm5
+ pxor xmm14,xmm5
+ movdqa XMMWORD PTR[64+rax],xmm12
+ movdqa XMMWORD PTR[80+rax],xmm13
+ movdqa XMMWORD PTR[96+rax],xmm14
+ movdqa XMMWORD PTR[112+rax],xmm15
+ lea rax,QWORD PTR[128+rax]
+ dec r10d
+ jnz $L$key_loop
+
+ movdqa xmm7,XMMWORD PTR[80+r11]
+
+ DB 0F3h,0C3h ;repret
+_bsaes_key_convert ENDP
+EXTERN asm_AES_cbc_encrypt:NEAR
+PUBLIC bsaes_cbc_encrypt
+
+ALIGN 16
+bsaes_cbc_encrypt PROC PUBLIC
+ mov r11d,DWORD PTR[48+rsp]
+ cmp r11d,0
+ jne asm_AES_cbc_encrypt
+ cmp r8,128
+ jb asm_AES_cbc_encrypt
+
+ mov rax,rsp
+$L$cbc_dec_prologue::
+ push rbp
+ push rbx
+ push r12
+ push r13
+ push r14
+ push r15
+ lea rsp,QWORD PTR[((-72))+rsp]
+ mov r10,QWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[((-160))+rsp]
+ movaps XMMWORD PTR[64+rsp],xmm6
+ movaps XMMWORD PTR[80+rsp],xmm7
+ movaps XMMWORD PTR[96+rsp],xmm8
+ movaps XMMWORD PTR[112+rsp],xmm9
+ movaps XMMWORD PTR[128+rsp],xmm10
+ movaps XMMWORD PTR[144+rsp],xmm11
+ movaps XMMWORD PTR[160+rsp],xmm12
+ movaps XMMWORD PTR[176+rsp],xmm13
+ movaps XMMWORD PTR[192+rsp],xmm14
+ movaps XMMWORD PTR[208+rsp],xmm15
+$L$cbc_dec_body::
+ mov rbp,rsp
+ mov eax,DWORD PTR[240+r9]
+ mov r12,rcx
+ mov r13,rdx
+ mov r14,r8
+ mov r15,r9
+ mov rbx,r10
+ shr r14,4
+
+ mov edx,eax
+ shl rax,7
+ sub rax,96
+ sub rsp,rax
+
+ mov rax,rsp
+ mov rcx,r15
+ mov r10d,edx
+ call _bsaes_key_convert
+ pxor xmm7,XMMWORD PTR[rsp]
+ movdqa XMMWORD PTR[rax],xmm6
+ movdqa XMMWORD PTR[rsp],xmm7
+
+ movdqu xmm14,XMMWORD PTR[rbx]
+ sub r14,8
+$L$cbc_dec_loop::
+ movdqu xmm15,XMMWORD PTR[r12]
+ movdqu xmm0,XMMWORD PTR[16+r12]
+ movdqu xmm1,XMMWORD PTR[32+r12]
+ movdqu xmm2,XMMWORD PTR[48+r12]
+ movdqu xmm3,XMMWORD PTR[64+r12]
+ movdqu xmm4,XMMWORD PTR[80+r12]
+ mov rax,rsp
+ movdqu xmm5,XMMWORD PTR[96+r12]
+ mov r10d,edx
+ movdqu xmm6,XMMWORD PTR[112+r12]
+ movdqa XMMWORD PTR[32+rbp],xmm14
+
+ call _bsaes_decrypt8
+
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm5,xmm8
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm3,xmm9
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ pxor xmm1,xmm10
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ pxor xmm6,xmm11
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ pxor xmm2,xmm12
+ movdqu xmm14,XMMWORD PTR[112+r12]
+ pxor xmm4,xmm13
+ movdqu XMMWORD PTR[r13],xmm15
+ lea r12,QWORD PTR[128+r12]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ movdqu XMMWORD PTR[48+r13],xmm3
+ movdqu XMMWORD PTR[64+r13],xmm1
+ movdqu XMMWORD PTR[80+r13],xmm6
+ movdqu XMMWORD PTR[96+r13],xmm2
+ movdqu XMMWORD PTR[112+r13],xmm4
+ lea r13,QWORD PTR[128+r13]
+ sub r14,8
+ jnc $L$cbc_dec_loop
+
+ add r14,8
+ jz $L$cbc_dec_done
+
+ movdqu xmm15,XMMWORD PTR[r12]
+ mov rax,rsp
+ mov r10d,edx
+ cmp r14,2
+ jb $L$cbc_dec_one
+ movdqu xmm0,XMMWORD PTR[16+r12]
+ je $L$cbc_dec_two
+ movdqu xmm1,XMMWORD PTR[32+r12]
+ cmp r14,4
+ jb $L$cbc_dec_three
+ movdqu xmm2,XMMWORD PTR[48+r12]
+ je $L$cbc_dec_four
+ movdqu xmm3,XMMWORD PTR[64+r12]
+ cmp r14,6
+ jb $L$cbc_dec_five
+ movdqu xmm4,XMMWORD PTR[80+r12]
+ je $L$cbc_dec_six
+ movdqu xmm5,XMMWORD PTR[96+r12]
+ movdqa XMMWORD PTR[32+rbp],xmm14
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm5,xmm8
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm3,xmm9
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ pxor xmm1,xmm10
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ pxor xmm6,xmm11
+ movdqu xmm14,XMMWORD PTR[96+r12]
+ pxor xmm2,xmm12
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ movdqu XMMWORD PTR[48+r13],xmm3
+ movdqu XMMWORD PTR[64+r13],xmm1
+ movdqu XMMWORD PTR[80+r13],xmm6
+ movdqu XMMWORD PTR[96+r13],xmm2
+ jmp $L$cbc_dec_done
+ALIGN 16
+$L$cbc_dec_six::
+ movdqa XMMWORD PTR[32+rbp],xmm14
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm5,xmm8
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm3,xmm9
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ pxor xmm1,xmm10
+ movdqu xmm14,XMMWORD PTR[80+r12]
+ pxor xmm6,xmm11
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ movdqu XMMWORD PTR[48+r13],xmm3
+ movdqu XMMWORD PTR[64+r13],xmm1
+ movdqu XMMWORD PTR[80+r13],xmm6
+ jmp $L$cbc_dec_done
+ALIGN 16
+$L$cbc_dec_five::
+ movdqa XMMWORD PTR[32+rbp],xmm14
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm5,xmm8
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm3,xmm9
+ movdqu xmm14,XMMWORD PTR[64+r12]
+ pxor xmm1,xmm10
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ movdqu XMMWORD PTR[48+r13],xmm3
+ movdqu XMMWORD PTR[64+r13],xmm1
+ jmp $L$cbc_dec_done
+ALIGN 16
+$L$cbc_dec_four::
+ movdqa XMMWORD PTR[32+rbp],xmm14
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm5,xmm8
+ movdqu xmm14,XMMWORD PTR[48+r12]
+ pxor xmm3,xmm9
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ movdqu XMMWORD PTR[48+r13],xmm3
+ jmp $L$cbc_dec_done
+ALIGN 16
+$L$cbc_dec_three::
+ movdqa XMMWORD PTR[32+rbp],xmm14
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu xmm14,XMMWORD PTR[32+r12]
+ pxor xmm5,xmm8
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ jmp $L$cbc_dec_done
+ALIGN 16
+$L$cbc_dec_two::
+ movdqa XMMWORD PTR[32+rbp],xmm14
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm14,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ jmp $L$cbc_dec_done
+ALIGN 16
+$L$cbc_dec_one::
+ lea rcx,QWORD PTR[r12]
+ lea rdx,QWORD PTR[32+rbp]
+ lea r8,QWORD PTR[r15]
+ call asm_AES_decrypt
+ pxor xmm14,XMMWORD PTR[32+rbp]
+ movdqu XMMWORD PTR[r13],xmm14
+ movdqa xmm14,xmm15
+
+$L$cbc_dec_done::
+ movdqu XMMWORD PTR[rbx],xmm14
+ lea rax,QWORD PTR[rsp]
+ pxor xmm0,xmm0
+$L$cbc_dec_bzero::
+ movdqa XMMWORD PTR[rax],xmm0
+ movdqa XMMWORD PTR[16+rax],xmm0
+ lea rax,QWORD PTR[32+rax]
+ cmp rbp,rax
+ ja $L$cbc_dec_bzero
+
+ lea rsp,QWORD PTR[rbp]
+ movaps xmm6,XMMWORD PTR[64+rbp]
+ movaps xmm7,XMMWORD PTR[80+rbp]
+ movaps xmm8,XMMWORD PTR[96+rbp]
+ movaps xmm9,XMMWORD PTR[112+rbp]
+ movaps xmm10,XMMWORD PTR[128+rbp]
+ movaps xmm11,XMMWORD PTR[144+rbp]
+ movaps xmm12,XMMWORD PTR[160+rbp]
+ movaps xmm13,XMMWORD PTR[176+rbp]
+ movaps xmm14,XMMWORD PTR[192+rbp]
+ movaps xmm15,XMMWORD PTR[208+rbp]
+ lea rsp,QWORD PTR[160+rbp]
+ mov r15,QWORD PTR[72+rsp]
+ mov r14,QWORD PTR[80+rsp]
+ mov r13,QWORD PTR[88+rsp]
+ mov r12,QWORD PTR[96+rsp]
+ mov rbx,QWORD PTR[104+rsp]
+ mov rax,QWORD PTR[112+rsp]
+ lea rsp,QWORD PTR[120+rsp]
+ mov rbp,rax
+$L$cbc_dec_epilogue::
+ DB 0F3h,0C3h ;repret
+bsaes_cbc_encrypt ENDP
+
+PUBLIC bsaes_ctr32_encrypt_blocks
+
+ALIGN 16
+bsaes_ctr32_encrypt_blocks PROC PUBLIC
+ mov rax,rsp
+$L$ctr_enc_prologue::
+ push rbp
+ push rbx
+ push r12
+ push r13
+ push r14
+ push r15
+ lea rsp,QWORD PTR[((-72))+rsp]
+ mov r10,QWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[((-160))+rsp]
+ movaps XMMWORD PTR[64+rsp],xmm6
+ movaps XMMWORD PTR[80+rsp],xmm7
+ movaps XMMWORD PTR[96+rsp],xmm8
+ movaps XMMWORD PTR[112+rsp],xmm9
+ movaps XMMWORD PTR[128+rsp],xmm10
+ movaps XMMWORD PTR[144+rsp],xmm11
+ movaps XMMWORD PTR[160+rsp],xmm12
+ movaps XMMWORD PTR[176+rsp],xmm13
+ movaps XMMWORD PTR[192+rsp],xmm14
+ movaps XMMWORD PTR[208+rsp],xmm15
+$L$ctr_enc_body::
+ mov rbp,rsp
+ movdqu xmm0,XMMWORD PTR[r10]
+ mov eax,DWORD PTR[240+r9]
+ mov r12,rcx
+ mov r13,rdx
+ mov r14,r8
+ mov r15,r9
+ movdqa XMMWORD PTR[32+rbp],xmm0
+ cmp r8,8
+ jb $L$ctr_enc_short
+
+ mov ebx,eax
+ shl rax,7
+ sub rax,96
+ sub rsp,rax
+
+ mov rax,rsp
+ mov rcx,r15
+ mov r10d,ebx
+ call _bsaes_key_convert
+ pxor xmm7,xmm6
+ movdqa XMMWORD PTR[rax],xmm7
+
+ movdqa xmm8,XMMWORD PTR[rsp]
+ lea r11,QWORD PTR[$L$ADD1]
+ movdqa xmm15,XMMWORD PTR[32+rbp]
+ movdqa xmm7,XMMWORD PTR[((-32))+r11]
+DB 102,68,15,56,0,199
+DB 102,68,15,56,0,255
+ movdqa XMMWORD PTR[rsp],xmm8
+ jmp $L$ctr_enc_loop
+ALIGN 16
+$L$ctr_enc_loop::
+ movdqa XMMWORD PTR[32+rbp],xmm15
+ movdqa xmm0,xmm15
+ movdqa xmm1,xmm15
+ paddd xmm0,XMMWORD PTR[r11]
+ movdqa xmm2,xmm15
+ paddd xmm1,XMMWORD PTR[16+r11]
+ movdqa xmm3,xmm15
+ paddd xmm2,XMMWORD PTR[32+r11]
+ movdqa xmm4,xmm15
+ paddd xmm3,XMMWORD PTR[48+r11]
+ movdqa xmm5,xmm15
+ paddd xmm4,XMMWORD PTR[64+r11]
+ movdqa xmm6,xmm15
+ paddd xmm5,XMMWORD PTR[80+r11]
+ paddd xmm6,XMMWORD PTR[96+r11]
+
+
+
+ movdqa xmm8,XMMWORD PTR[rsp]
+ lea rax,QWORD PTR[16+rsp]
+ movdqa xmm7,XMMWORD PTR[((-16))+r11]
+ pxor xmm15,xmm8
+ pxor xmm0,xmm8
+DB 102,68,15,56,0,255
+ pxor xmm1,xmm8
+DB 102,15,56,0,199
+ pxor xmm2,xmm8
+DB 102,15,56,0,207
+ pxor xmm3,xmm8
+DB 102,15,56,0,215
+ pxor xmm4,xmm8
+DB 102,15,56,0,223
+ pxor xmm5,xmm8
+DB 102,15,56,0,231
+ pxor xmm6,xmm8
+DB 102,15,56,0,239
+ lea r11,QWORD PTR[$L$BS0]
+DB 102,15,56,0,247
+ mov r10d,ebx
+
+ call _bsaes_encrypt8_bitslice
+
+ sub r14,8
+ jc $L$ctr_enc_loop_done
+
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ movdqu xmm14,XMMWORD PTR[112+r12]
+ lea r12,QWORD PTR[128+r12]
+ pxor xmm7,xmm15
+ movdqa xmm15,XMMWORD PTR[32+rbp]
+ pxor xmm0,xmm8
+ movdqu XMMWORD PTR[r13],xmm7
+ pxor xmm3,xmm9
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm5,xmm10
+ movdqu XMMWORD PTR[32+r13],xmm3
+ pxor xmm2,xmm11
+ movdqu XMMWORD PTR[48+r13],xmm5
+ pxor xmm6,xmm12
+ movdqu XMMWORD PTR[64+r13],xmm2
+ pxor xmm1,xmm13
+ movdqu XMMWORD PTR[80+r13],xmm6
+ pxor xmm4,xmm14
+ movdqu XMMWORD PTR[96+r13],xmm1
+ lea r11,QWORD PTR[$L$ADD1]
+ movdqu XMMWORD PTR[112+r13],xmm4
+ lea r13,QWORD PTR[128+r13]
+ paddd xmm15,XMMWORD PTR[112+r11]
+ jnz $L$ctr_enc_loop
+
+ jmp $L$ctr_enc_done
+ALIGN 16
+$L$ctr_enc_loop_done::
+ add r14,8
+ movdqu xmm7,XMMWORD PTR[r12]
+ pxor xmm15,xmm7
+ movdqu XMMWORD PTR[r13],xmm15
+ cmp r14,2
+ jb $L$ctr_enc_done
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm8
+ movdqu XMMWORD PTR[16+r13],xmm0
+ je $L$ctr_enc_done
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm3,xmm9
+ movdqu XMMWORD PTR[32+r13],xmm3
+ cmp r14,4
+ jb $L$ctr_enc_done
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm5,xmm10
+ movdqu XMMWORD PTR[48+r13],xmm5
+ je $L$ctr_enc_done
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ pxor xmm2,xmm11
+ movdqu XMMWORD PTR[64+r13],xmm2
+ cmp r14,6
+ jb $L$ctr_enc_done
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ pxor xmm6,xmm12
+ movdqu XMMWORD PTR[80+r13],xmm6
+ je $L$ctr_enc_done
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ pxor xmm1,xmm13
+ movdqu XMMWORD PTR[96+r13],xmm1
+ jmp $L$ctr_enc_done
+
+ALIGN 16
+$L$ctr_enc_short::
+ lea rcx,QWORD PTR[32+rbp]
+ lea rdx,QWORD PTR[48+rbp]
+ lea r8,QWORD PTR[r15]
+ call asm_AES_encrypt
+ movdqu xmm0,XMMWORD PTR[r12]
+ lea r12,QWORD PTR[16+r12]
+ mov eax,DWORD PTR[44+rbp]
+ bswap eax
+ pxor xmm0,XMMWORD PTR[48+rbp]
+ inc eax
+ movdqu XMMWORD PTR[r13],xmm0
+ bswap eax
+ lea r13,QWORD PTR[16+r13]
+ mov DWORD PTR[44+rsp],eax
+ dec r14
+ jnz $L$ctr_enc_short
+
+$L$ctr_enc_done::
+ lea rax,QWORD PTR[rsp]
+ pxor xmm0,xmm0
+$L$ctr_enc_bzero::
+ movdqa XMMWORD PTR[rax],xmm0
+ movdqa XMMWORD PTR[16+rax],xmm0
+ lea rax,QWORD PTR[32+rax]
+ cmp rbp,rax
+ ja $L$ctr_enc_bzero
+
+ lea rsp,QWORD PTR[rbp]
+ movaps xmm6,XMMWORD PTR[64+rbp]
+ movaps xmm7,XMMWORD PTR[80+rbp]
+ movaps xmm8,XMMWORD PTR[96+rbp]
+ movaps xmm9,XMMWORD PTR[112+rbp]
+ movaps xmm10,XMMWORD PTR[128+rbp]
+ movaps xmm11,XMMWORD PTR[144+rbp]
+ movaps xmm12,XMMWORD PTR[160+rbp]
+ movaps xmm13,XMMWORD PTR[176+rbp]
+ movaps xmm14,XMMWORD PTR[192+rbp]
+ movaps xmm15,XMMWORD PTR[208+rbp]
+ lea rsp,QWORD PTR[160+rbp]
+ mov r15,QWORD PTR[72+rsp]
+ mov r14,QWORD PTR[80+rsp]
+ mov r13,QWORD PTR[88+rsp]
+ mov r12,QWORD PTR[96+rsp]
+ mov rbx,QWORD PTR[104+rsp]
+ mov rax,QWORD PTR[112+rsp]
+ lea rsp,QWORD PTR[120+rsp]
+ mov rbp,rax
+$L$ctr_enc_epilogue::
+ DB 0F3h,0C3h ;repret
+bsaes_ctr32_encrypt_blocks ENDP
+PUBLIC bsaes_xts_encrypt
+
+ALIGN 16
+bsaes_xts_encrypt PROC PUBLIC
+ mov rax,rsp
+$L$xts_enc_prologue::
+ push rbp
+ push rbx
+ push r12
+ push r13
+ push r14
+ push r15
+ lea rsp,QWORD PTR[((-72))+rsp]
+ mov r10,QWORD PTR[160+rsp]
+ mov r11,QWORD PTR[168+rsp]
+ lea rsp,QWORD PTR[((-160))+rsp]
+ movaps XMMWORD PTR[64+rsp],xmm6
+ movaps XMMWORD PTR[80+rsp],xmm7
+ movaps XMMWORD PTR[96+rsp],xmm8
+ movaps XMMWORD PTR[112+rsp],xmm9
+ movaps XMMWORD PTR[128+rsp],xmm10
+ movaps XMMWORD PTR[144+rsp],xmm11
+ movaps XMMWORD PTR[160+rsp],xmm12
+ movaps XMMWORD PTR[176+rsp],xmm13
+ movaps XMMWORD PTR[192+rsp],xmm14
+ movaps XMMWORD PTR[208+rsp],xmm15
+$L$xts_enc_body::
+ mov rbp,rsp
+ mov r12,rcx
+ mov r13,rdx
+ mov r14,r8
+ mov r15,r9
+
+ lea rcx,QWORD PTR[r11]
+ lea rdx,QWORD PTR[32+rbp]
+ lea r8,QWORD PTR[r10]
+ call asm_AES_encrypt
+
+ mov eax,DWORD PTR[240+r15]
+ mov rbx,r14
+
+ mov edx,eax
+ shl rax,7
+ sub rax,96
+ sub rsp,rax
+
+ mov rax,rsp
+ mov rcx,r15
+ mov r10d,edx
+ call _bsaes_key_convert
+ pxor xmm7,xmm6
+ movdqa XMMWORD PTR[rax],xmm7
+
+ and r14,-16
+ sub rsp,080h
+ movdqa xmm6,XMMWORD PTR[32+rbp]
+
+ pxor xmm14,xmm14
+ movdqa xmm12,XMMWORD PTR[$L$xts_magic]
+ pcmpgtd xmm14,xmm6
+
+ sub r14,080h
+ jc $L$xts_enc_short
+ jmp $L$xts_enc_loop
+
+ALIGN 16
+$L$xts_enc_loop::
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm15,xmm6
+ movdqa XMMWORD PTR[rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm0,xmm6
+ movdqa XMMWORD PTR[16+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm7,XMMWORD PTR[r12]
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm1,xmm6
+ movdqa XMMWORD PTR[32+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm15,xmm7
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm2,xmm6
+ movdqa XMMWORD PTR[48+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm0,xmm8
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm3,xmm6
+ movdqa XMMWORD PTR[64+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm1,xmm9
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm4,xmm6
+ movdqa XMMWORD PTR[80+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ pxor xmm2,xmm10
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm5,xmm6
+ movdqa XMMWORD PTR[96+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ pxor xmm3,xmm11
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ pxor xmm4,xmm12
+ movdqu xmm14,XMMWORD PTR[112+r12]
+ lea r12,QWORD PTR[128+r12]
+ movdqa XMMWORD PTR[112+rsp],xmm6
+ pxor xmm5,xmm13
+ lea rax,QWORD PTR[128+rsp]
+ pxor xmm6,xmm14
+ mov r10d,edx
+
+ call _bsaes_encrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm3,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm5,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm3
+ pxor xmm2,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm5
+ pxor xmm6,XMMWORD PTR[80+rsp]
+ movdqu XMMWORD PTR[64+r13],xmm2
+ pxor xmm1,XMMWORD PTR[96+rsp]
+ movdqu XMMWORD PTR[80+r13],xmm6
+ pxor xmm4,XMMWORD PTR[112+rsp]
+ movdqu XMMWORD PTR[96+r13],xmm1
+ movdqu XMMWORD PTR[112+r13],xmm4
+ lea r13,QWORD PTR[128+r13]
+
+ movdqa xmm6,XMMWORD PTR[112+rsp]
+ pxor xmm14,xmm14
+ movdqa xmm12,XMMWORD PTR[$L$xts_magic]
+ pcmpgtd xmm14,xmm6
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+
+ sub r14,080h
+ jnc $L$xts_enc_loop
+
+$L$xts_enc_short::
+ add r14,080h
+ jz $L$xts_enc_done
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm15,xmm6
+ movdqa XMMWORD PTR[rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm0,xmm6
+ movdqa XMMWORD PTR[16+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm7,XMMWORD PTR[r12]
+ cmp r14,16
+ je $L$xts_enc_1
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm1,xmm6
+ movdqa XMMWORD PTR[32+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ cmp r14,32
+ je $L$xts_enc_2
+ pxor xmm15,xmm7
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm2,xmm6
+ movdqa XMMWORD PTR[48+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ cmp r14,48
+ je $L$xts_enc_3
+ pxor xmm0,xmm8
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm3,xmm6
+ movdqa XMMWORD PTR[64+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ cmp r14,64
+ je $L$xts_enc_4
+ pxor xmm1,xmm9
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm4,xmm6
+ movdqa XMMWORD PTR[80+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ cmp r14,80
+ je $L$xts_enc_5
+ pxor xmm2,xmm10
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm5,xmm6
+ movdqa XMMWORD PTR[96+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ cmp r14,96
+ je $L$xts_enc_6
+ pxor xmm3,xmm11
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ pxor xmm4,xmm12
+ movdqa XMMWORD PTR[112+rsp],xmm6
+ lea r12,QWORD PTR[112+r12]
+ pxor xmm5,xmm13
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_encrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm3,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm5,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm3
+ pxor xmm2,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm5
+ pxor xmm6,XMMWORD PTR[80+rsp]
+ movdqu XMMWORD PTR[64+r13],xmm2
+ pxor xmm1,XMMWORD PTR[96+rsp]
+ movdqu XMMWORD PTR[80+r13],xmm6
+ movdqu XMMWORD PTR[96+r13],xmm1
+ lea r13,QWORD PTR[112+r13]
+
+ movdqa xmm6,XMMWORD PTR[112+rsp]
+ jmp $L$xts_enc_done
+ALIGN 16
+$L$xts_enc_6::
+ pxor xmm3,xmm11
+ lea r12,QWORD PTR[96+r12]
+ pxor xmm4,xmm12
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_encrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm3,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm5,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm3
+ pxor xmm2,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm5
+ pxor xmm6,XMMWORD PTR[80+rsp]
+ movdqu XMMWORD PTR[64+r13],xmm2
+ movdqu XMMWORD PTR[80+r13],xmm6
+ lea r13,QWORD PTR[96+r13]
+
+ movdqa xmm6,XMMWORD PTR[96+rsp]
+ jmp $L$xts_enc_done
+ALIGN 16
+$L$xts_enc_5::
+ pxor xmm2,xmm10
+ lea r12,QWORD PTR[80+r12]
+ pxor xmm3,xmm11
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_encrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm3,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm5,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm3
+ pxor xmm2,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm5
+ movdqu XMMWORD PTR[64+r13],xmm2
+ lea r13,QWORD PTR[80+r13]
+
+ movdqa xmm6,XMMWORD PTR[80+rsp]
+ jmp $L$xts_enc_done
+ALIGN 16
+$L$xts_enc_4::
+ pxor xmm1,xmm9
+ lea r12,QWORD PTR[64+r12]
+ pxor xmm2,xmm10
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_encrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm3,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm5,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm3
+ movdqu XMMWORD PTR[48+r13],xmm5
+ lea r13,QWORD PTR[64+r13]
+
+ movdqa xmm6,XMMWORD PTR[64+rsp]
+ jmp $L$xts_enc_done
+ALIGN 16
+$L$xts_enc_3::
+ pxor xmm0,xmm8
+ lea r12,QWORD PTR[48+r12]
+ pxor xmm1,xmm9
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_encrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm3,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm3
+ lea r13,QWORD PTR[48+r13]
+
+ movdqa xmm6,XMMWORD PTR[48+rsp]
+ jmp $L$xts_enc_done
+ALIGN 16
+$L$xts_enc_2::
+ pxor xmm15,xmm7
+ lea r12,QWORD PTR[32+r12]
+ pxor xmm0,xmm8
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_encrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ lea r13,QWORD PTR[32+r13]
+
+ movdqa xmm6,XMMWORD PTR[32+rsp]
+ jmp $L$xts_enc_done
+ALIGN 16
+$L$xts_enc_1::
+ pxor xmm7,xmm15
+ lea r12,QWORD PTR[16+r12]
+ movdqa XMMWORD PTR[32+rbp],xmm7
+ lea rcx,QWORD PTR[32+rbp]
+ lea rdx,QWORD PTR[32+rbp]
+ lea r8,QWORD PTR[r15]
+ call asm_AES_encrypt
+ pxor xmm15,XMMWORD PTR[32+rbp]
+
+
+
+
+
+ movdqu XMMWORD PTR[r13],xmm15
+ lea r13,QWORD PTR[16+r13]
+
+ movdqa xmm6,XMMWORD PTR[16+rsp]
+
+$L$xts_enc_done::
+ and ebx,15
+ jz $L$xts_enc_ret
+ mov rdx,r13
+
+$L$xts_enc_steal::
+ movzx eax,BYTE PTR[r12]
+ movzx ecx,BYTE PTR[((-16))+rdx]
+ lea r12,QWORD PTR[1+r12]
+ mov BYTE PTR[((-16))+rdx],al
+ mov BYTE PTR[rdx],cl
+ lea rdx,QWORD PTR[1+rdx]
+ sub ebx,1
+ jnz $L$xts_enc_steal
+
+ movdqu xmm15,XMMWORD PTR[((-16))+r13]
+ lea rcx,QWORD PTR[32+rbp]
+ pxor xmm15,xmm6
+ lea rdx,QWORD PTR[32+rbp]
+ movdqa XMMWORD PTR[32+rbp],xmm15
+ lea r8,QWORD PTR[r15]
+ call asm_AES_encrypt
+ pxor xmm6,XMMWORD PTR[32+rbp]
+ movdqu XMMWORD PTR[(-16)+r13],xmm6
+
+$L$xts_enc_ret::
+ lea rax,QWORD PTR[rsp]
+ pxor xmm0,xmm0
+$L$xts_enc_bzero::
+ movdqa XMMWORD PTR[rax],xmm0
+ movdqa XMMWORD PTR[16+rax],xmm0
+ lea rax,QWORD PTR[32+rax]
+ cmp rbp,rax
+ ja $L$xts_enc_bzero
+
+ lea rsp,QWORD PTR[rbp]
+ movaps xmm6,XMMWORD PTR[64+rbp]
+ movaps xmm7,XMMWORD PTR[80+rbp]
+ movaps xmm8,XMMWORD PTR[96+rbp]
+ movaps xmm9,XMMWORD PTR[112+rbp]
+ movaps xmm10,XMMWORD PTR[128+rbp]
+ movaps xmm11,XMMWORD PTR[144+rbp]
+ movaps xmm12,XMMWORD PTR[160+rbp]
+ movaps xmm13,XMMWORD PTR[176+rbp]
+ movaps xmm14,XMMWORD PTR[192+rbp]
+ movaps xmm15,XMMWORD PTR[208+rbp]
+ lea rsp,QWORD PTR[160+rbp]
+ mov r15,QWORD PTR[72+rsp]
+ mov r14,QWORD PTR[80+rsp]
+ mov r13,QWORD PTR[88+rsp]
+ mov r12,QWORD PTR[96+rsp]
+ mov rbx,QWORD PTR[104+rsp]
+ mov rax,QWORD PTR[112+rsp]
+ lea rsp,QWORD PTR[120+rsp]
+ mov rbp,rax
+$L$xts_enc_epilogue::
+ DB 0F3h,0C3h ;repret
+bsaes_xts_encrypt ENDP
+
+PUBLIC bsaes_xts_decrypt
+
+ALIGN 16
+bsaes_xts_decrypt PROC PUBLIC
+ mov rax,rsp
+$L$xts_dec_prologue::
+ push rbp
+ push rbx
+ push r12
+ push r13
+ push r14
+ push r15
+ lea rsp,QWORD PTR[((-72))+rsp]
+ mov r10,QWORD PTR[160+rsp]
+ mov r11,QWORD PTR[168+rsp]
+ lea rsp,QWORD PTR[((-160))+rsp]
+ movaps XMMWORD PTR[64+rsp],xmm6
+ movaps XMMWORD PTR[80+rsp],xmm7
+ movaps XMMWORD PTR[96+rsp],xmm8
+ movaps XMMWORD PTR[112+rsp],xmm9
+ movaps XMMWORD PTR[128+rsp],xmm10
+ movaps XMMWORD PTR[144+rsp],xmm11
+ movaps XMMWORD PTR[160+rsp],xmm12
+ movaps XMMWORD PTR[176+rsp],xmm13
+ movaps XMMWORD PTR[192+rsp],xmm14
+ movaps XMMWORD PTR[208+rsp],xmm15
+$L$xts_dec_body::
+ mov rbp,rsp
+ mov r12,rcx
+ mov r13,rdx
+ mov r14,r8
+ mov r15,r9
+
+ lea rcx,QWORD PTR[r11]
+ lea rdx,QWORD PTR[32+rbp]
+ lea r8,QWORD PTR[r10]
+ call asm_AES_encrypt
+
+ mov eax,DWORD PTR[240+r15]
+ mov rbx,r14
+
+ mov edx,eax
+ shl rax,7
+ sub rax,96
+ sub rsp,rax
+
+ mov rax,rsp
+ mov rcx,r15
+ mov r10d,edx
+ call _bsaes_key_convert
+ pxor xmm7,XMMWORD PTR[rsp]
+ movdqa XMMWORD PTR[rax],xmm6
+ movdqa XMMWORD PTR[rsp],xmm7
+
+ xor eax,eax
+ and r14,-16
+ test ebx,15
+ setnz al
+ shl rax,4
+ sub r14,rax
+
+ sub rsp,080h
+ movdqa xmm6,XMMWORD PTR[32+rbp]
+
+ pxor xmm14,xmm14
+ movdqa xmm12,XMMWORD PTR[$L$xts_magic]
+ pcmpgtd xmm14,xmm6
+
+ sub r14,080h
+ jc $L$xts_dec_short
+ jmp $L$xts_dec_loop
+
+ALIGN 16
+$L$xts_dec_loop::
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm15,xmm6
+ movdqa XMMWORD PTR[rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm0,xmm6
+ movdqa XMMWORD PTR[16+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm7,XMMWORD PTR[r12]
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm1,xmm6
+ movdqa XMMWORD PTR[32+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm15,xmm7
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm2,xmm6
+ movdqa XMMWORD PTR[48+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm0,xmm8
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm3,xmm6
+ movdqa XMMWORD PTR[64+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm1,xmm9
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm4,xmm6
+ movdqa XMMWORD PTR[80+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ pxor xmm2,xmm10
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm5,xmm6
+ movdqa XMMWORD PTR[96+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ pxor xmm3,xmm11
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ pxor xmm4,xmm12
+ movdqu xmm14,XMMWORD PTR[112+r12]
+ lea r12,QWORD PTR[128+r12]
+ movdqa XMMWORD PTR[112+rsp],xmm6
+ pxor xmm5,xmm13
+ lea rax,QWORD PTR[128+rsp]
+ pxor xmm6,xmm14
+ mov r10d,edx
+
+ call _bsaes_decrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm5,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm3,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm5
+ pxor xmm1,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm3
+ pxor xmm6,XMMWORD PTR[80+rsp]
+ movdqu XMMWORD PTR[64+r13],xmm1
+ pxor xmm2,XMMWORD PTR[96+rsp]
+ movdqu XMMWORD PTR[80+r13],xmm6
+ pxor xmm4,XMMWORD PTR[112+rsp]
+ movdqu XMMWORD PTR[96+r13],xmm2
+ movdqu XMMWORD PTR[112+r13],xmm4
+ lea r13,QWORD PTR[128+r13]
+
+ movdqa xmm6,XMMWORD PTR[112+rsp]
+ pxor xmm14,xmm14
+ movdqa xmm12,XMMWORD PTR[$L$xts_magic]
+ pcmpgtd xmm14,xmm6
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+
+ sub r14,080h
+ jnc $L$xts_dec_loop
+
+$L$xts_dec_short::
+ add r14,080h
+ jz $L$xts_dec_done
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm15,xmm6
+ movdqa XMMWORD PTR[rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm0,xmm6
+ movdqa XMMWORD PTR[16+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm7,XMMWORD PTR[r12]
+ cmp r14,16
+ je $L$xts_dec_1
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm1,xmm6
+ movdqa XMMWORD PTR[32+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ cmp r14,32
+ je $L$xts_dec_2
+ pxor xmm15,xmm7
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm2,xmm6
+ movdqa XMMWORD PTR[48+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ cmp r14,48
+ je $L$xts_dec_3
+ pxor xmm0,xmm8
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm3,xmm6
+ movdqa XMMWORD PTR[64+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ cmp r14,64
+ je $L$xts_dec_4
+ pxor xmm1,xmm9
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm4,xmm6
+ movdqa XMMWORD PTR[80+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ cmp r14,80
+ je $L$xts_dec_5
+ pxor xmm2,xmm10
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm5,xmm6
+ movdqa XMMWORD PTR[96+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ cmp r14,96
+ je $L$xts_dec_6
+ pxor xmm3,xmm11
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ pxor xmm4,xmm12
+ movdqa XMMWORD PTR[112+rsp],xmm6
+ lea r12,QWORD PTR[112+r12]
+ pxor xmm5,xmm13
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_decrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm5,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm3,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm5
+ pxor xmm1,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm3
+ pxor xmm6,XMMWORD PTR[80+rsp]
+ movdqu XMMWORD PTR[64+r13],xmm1
+ pxor xmm2,XMMWORD PTR[96+rsp]
+ movdqu XMMWORD PTR[80+r13],xmm6
+ movdqu XMMWORD PTR[96+r13],xmm2
+ lea r13,QWORD PTR[112+r13]
+
+ movdqa xmm6,XMMWORD PTR[112+rsp]
+ jmp $L$xts_dec_done
+ALIGN 16
+$L$xts_dec_6::
+ pxor xmm3,xmm11
+ lea r12,QWORD PTR[96+r12]
+ pxor xmm4,xmm12
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_decrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm5,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm3,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm5
+ pxor xmm1,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm3
+ pxor xmm6,XMMWORD PTR[80+rsp]
+ movdqu XMMWORD PTR[64+r13],xmm1
+ movdqu XMMWORD PTR[80+r13],xmm6
+ lea r13,QWORD PTR[96+r13]
+
+ movdqa xmm6,XMMWORD PTR[96+rsp]
+ jmp $L$xts_dec_done
+ALIGN 16
+$L$xts_dec_5::
+ pxor xmm2,xmm10
+ lea r12,QWORD PTR[80+r12]
+ pxor xmm3,xmm11
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_decrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm5,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm3,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm5
+ pxor xmm1,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm3
+ movdqu XMMWORD PTR[64+r13],xmm1
+ lea r13,QWORD PTR[80+r13]
+
+ movdqa xmm6,XMMWORD PTR[80+rsp]
+ jmp $L$xts_dec_done
+ALIGN 16
+$L$xts_dec_4::
+ pxor xmm1,xmm9
+ lea r12,QWORD PTR[64+r12]
+ pxor xmm2,xmm10
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_decrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm5,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm3,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm5
+ movdqu XMMWORD PTR[48+r13],xmm3
+ lea r13,QWORD PTR[64+r13]
+
+ movdqa xmm6,XMMWORD PTR[64+rsp]
+ jmp $L$xts_dec_done
+ALIGN 16
+$L$xts_dec_3::
+ pxor xmm0,xmm8
+ lea r12,QWORD PTR[48+r12]
+ pxor xmm1,xmm9
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_decrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm5,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ lea r13,QWORD PTR[48+r13]
+
+ movdqa xmm6,XMMWORD PTR[48+rsp]
+ jmp $L$xts_dec_done
+ALIGN 16
+$L$xts_dec_2::
+ pxor xmm15,xmm7
+ lea r12,QWORD PTR[32+r12]
+ pxor xmm0,xmm8
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+
+ call _bsaes_decrypt8
+
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ lea r13,QWORD PTR[32+r13]
+
+ movdqa xmm6,XMMWORD PTR[32+rsp]
+ jmp $L$xts_dec_done
+ALIGN 16
+$L$xts_dec_1::
+ pxor xmm7,xmm15
+ lea r12,QWORD PTR[16+r12]
+ movdqa XMMWORD PTR[32+rbp],xmm7
+ lea rcx,QWORD PTR[32+rbp]
+ lea rdx,QWORD PTR[32+rbp]
+ lea r8,QWORD PTR[r15]
+ call asm_AES_decrypt
+ pxor xmm15,XMMWORD PTR[32+rbp]
+
+
+
+
+
+ movdqu XMMWORD PTR[r13],xmm15
+ lea r13,QWORD PTR[16+r13]
+
+ movdqa xmm6,XMMWORD PTR[16+rsp]
+
+$L$xts_dec_done::
+ and ebx,15
+ jz $L$xts_dec_ret
+
+ pxor xmm14,xmm14
+ movdqa xmm12,XMMWORD PTR[$L$xts_magic]
+ pcmpgtd xmm14,xmm6
+ pshufd xmm13,xmm14,013h
+ movdqa xmm5,xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ movdqu xmm15,XMMWORD PTR[r12]
+ pxor xmm6,xmm13
+
+ lea rcx,QWORD PTR[32+rbp]
+ pxor xmm15,xmm6
+ lea rdx,QWORD PTR[32+rbp]
+ movdqa XMMWORD PTR[32+rbp],xmm15
+ lea r8,QWORD PTR[r15]
+ call asm_AES_decrypt
+ pxor xmm6,XMMWORD PTR[32+rbp]
+ mov rdx,r13
+ movdqu XMMWORD PTR[r13],xmm6
+
+$L$xts_dec_steal::
+ movzx eax,BYTE PTR[16+r12]
+ movzx ecx,BYTE PTR[rdx]
+ lea r12,QWORD PTR[1+r12]
+ mov BYTE PTR[rdx],al
+ mov BYTE PTR[16+rdx],cl
+ lea rdx,QWORD PTR[1+rdx]
+ sub ebx,1
+ jnz $L$xts_dec_steal
+
+ movdqu xmm15,XMMWORD PTR[r13]
+ lea rcx,QWORD PTR[32+rbp]
+ pxor xmm15,xmm5
+ lea rdx,QWORD PTR[32+rbp]
+ movdqa XMMWORD PTR[32+rbp],xmm15
+ lea r8,QWORD PTR[r15]
+ call asm_AES_decrypt
+ pxor xmm5,XMMWORD PTR[32+rbp]
+ movdqu XMMWORD PTR[r13],xmm5
+
+$L$xts_dec_ret::
+ lea rax,QWORD PTR[rsp]
+ pxor xmm0,xmm0
+$L$xts_dec_bzero::
+ movdqa XMMWORD PTR[rax],xmm0
+ movdqa XMMWORD PTR[16+rax],xmm0
+ lea rax,QWORD PTR[32+rax]
+ cmp rbp,rax
+ ja $L$xts_dec_bzero
+
+ lea rsp,QWORD PTR[rbp]
+ movaps xmm6,XMMWORD PTR[64+rbp]
+ movaps xmm7,XMMWORD PTR[80+rbp]
+ movaps xmm8,XMMWORD PTR[96+rbp]
+ movaps xmm9,XMMWORD PTR[112+rbp]
+ movaps xmm10,XMMWORD PTR[128+rbp]
+ movaps xmm11,XMMWORD PTR[144+rbp]
+ movaps xmm12,XMMWORD PTR[160+rbp]
+ movaps xmm13,XMMWORD PTR[176+rbp]
+ movaps xmm14,XMMWORD PTR[192+rbp]
+ movaps xmm15,XMMWORD PTR[208+rbp]
+ lea rsp,QWORD PTR[160+rbp]
+ mov r15,QWORD PTR[72+rsp]
+ mov r14,QWORD PTR[80+rsp]
+ mov r13,QWORD PTR[88+rsp]
+ mov r12,QWORD PTR[96+rsp]
+ mov rbx,QWORD PTR[104+rsp]
+ mov rax,QWORD PTR[112+rsp]
+ lea rsp,QWORD PTR[120+rsp]
+ mov rbp,rax
+$L$xts_dec_epilogue::
+ DB 0F3h,0C3h ;repret
+bsaes_xts_decrypt ENDP
+
+ALIGN 64
+_bsaes_const::
+$L$M0ISR::
+ DQ 00a0e0206070b0f03h,00004080c0d010509h
+$L$ISRM0::
+ DQ 001040b0e0205080fh,00306090c00070a0dh
+$L$ISR::
+ DQ 00504070602010003h,00f0e0d0c080b0a09h
+$L$BS0::
+ DQ 05555555555555555h,05555555555555555h
+$L$BS1::
+ DQ 03333333333333333h,03333333333333333h
+$L$BS2::
+ DQ 00f0f0f0f0f0f0f0fh,00f0f0f0f0f0f0f0fh
+$L$SR::
+ DQ 00504070600030201h,00f0e0d0c0a09080bh
+$L$SRM0::
+ DQ 00304090e00050a0fh,001060b0c0207080dh
+$L$M0SR::
+ DQ 00a0e02060f03070bh,00004080c05090d01h
+$L$SWPUP::
+ DQ 00706050403020100h,00c0d0e0f0b0a0908h
+$L$SWPUPM0SR::
+ DQ 00a0d02060c03070bh,00004080f05090e01h
+$L$ADD1::
+ DQ 00000000000000000h,00000000100000000h
+$L$ADD2::
+ DQ 00000000000000000h,00000000200000000h
+$L$ADD3::
+ DQ 00000000000000000h,00000000300000000h
+$L$ADD4::
+ DQ 00000000000000000h,00000000400000000h
+$L$ADD5::
+ DQ 00000000000000000h,00000000500000000h
+$L$ADD6::
+ DQ 00000000000000000h,00000000600000000h
+$L$ADD7::
+ DQ 00000000000000000h,00000000700000000h
+$L$ADD8::
+ DQ 00000000000000000h,00000000800000000h
+$L$xts_magic::
+ DD 087h,0,1,0
+$L$masks::
+ DQ 00101010101010101h,00101010101010101h
+ DQ 00202020202020202h,00202020202020202h
+ DQ 00404040404040404h,00404040404040404h
+ DQ 00808080808080808h,00808080808080808h
+$L$M0::
+ DQ 002060a0e03070b0fh,00004080c0105090dh
+$L$63::
+ DQ 06363636363636363h,06363636363636363h
+DB 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102
+DB 111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44
+DB 32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44
+DB 32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32
+DB 65,110,100,121,32,80,111,108,121,97,107,111,118,0
+ALIGN 64
+
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$in_prologue
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$in_prologue
+
+ mov rax,QWORD PTR[160+r8]
+
+ lea rsi,QWORD PTR[64+rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,20
+ DD 0a548f3fch
+ lea rax,QWORD PTR[160+rax]
+
+ mov rbp,QWORD PTR[112+rax]
+ mov rbx,QWORD PTR[104+rax]
+ mov r12,QWORD PTR[96+rax]
+ mov r13,QWORD PTR[88+rax]
+ mov r14,QWORD PTR[80+rax]
+ mov r15,QWORD PTR[72+rax]
+ lea rax,QWORD PTR[120+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+
+$L$in_prologue::
+ mov QWORD PTR[152+r8],rax
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+se_handler ENDP
+
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$cbc_dec_prologue
+ DD imagerel $L$cbc_dec_epilogue
+ DD imagerel $L$cbc_dec_info
+
+ DD imagerel $L$ctr_enc_prologue
+ DD imagerel $L$ctr_enc_epilogue
+ DD imagerel $L$ctr_enc_info
+
+ DD imagerel $L$xts_enc_prologue
+ DD imagerel $L$xts_enc_epilogue
+ DD imagerel $L$xts_enc_info
+
+ DD imagerel $L$xts_dec_prologue
+ DD imagerel $L$xts_dec_epilogue
+ DD imagerel $L$xts_dec_info
+
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$cbc_dec_info::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$cbc_dec_body,imagerel $L$cbc_dec_epilogue
+$L$ctr_enc_info::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$ctr_enc_body,imagerel $L$ctr_enc_epilogue
+$L$xts_enc_info::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$xts_enc_body,imagerel $L$xts_enc_epilogue
+$L$xts_dec_info::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$xts_dec_body,imagerel $L$xts_dec_epilogue
+
+.xdata ENDS
+END
+
diff --git a/ext/libressl/crypto/aes/bsaes-mingw64-x86_64.S b/ext/libressl/crypto/aes/bsaes-mingw64-x86_64.S
new file mode 100644
index 0000000..f0b07cb
--- /dev/null
+++ b/ext/libressl/crypto/aes/bsaes-mingw64-x86_64.S
@@ -0,0 +1,2725 @@
+#include "x86_arch.h"
+.text
+
+
+
+
+.def _bsaes_encrypt8; .scl 3; .type 32; .endef
+.p2align 6
+_bsaes_encrypt8:
+ leaq .LBS0(%rip),%r11
+
+ movdqa (%rax),%xmm8
+ leaq 16(%rax),%rax
+ movdqa 80(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+.byte 102,15,56,0,247
+_bsaes_encrypt8_bitslice:
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $1,%xmm3
+ pxor %xmm6,%xmm5
+ pxor %xmm4,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm6
+ psllq $1,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $1,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm15
+ pxor %xmm1,%xmm2
+ psllq $1,%xmm1
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm4,%xmm9
+ psrlq $2,%xmm4
+ movdqa %xmm3,%xmm10
+ psrlq $2,%xmm3
+ pxor %xmm6,%xmm4
+ pxor %xmm5,%xmm3
+ pand %xmm8,%xmm4
+ pand %xmm8,%xmm3
+ pxor %xmm4,%xmm6
+ psllq $2,%xmm4
+ pxor %xmm3,%xmm5
+ psllq $2,%xmm3
+ pxor %xmm9,%xmm4
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm2,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm2
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm2,%xmm9
+ psrlq $4,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $4,%xmm1
+ pxor %xmm6,%xmm2
+ pxor %xmm5,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $4,%xmm2
+ pxor %xmm1,%xmm5
+ psllq $4,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm4
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ decl %r10d
+ jmp .Lenc_sbox
+.p2align 4
+.Lenc_loop:
+ pxor 0(%rax),%xmm15
+ pxor 16(%rax),%xmm0
+.byte 102,68,15,56,0,255
+ pxor 32(%rax),%xmm1
+.byte 102,15,56,0,199
+ pxor 48(%rax),%xmm2
+.byte 102,15,56,0,207
+ pxor 64(%rax),%xmm3
+.byte 102,15,56,0,215
+ pxor 80(%rax),%xmm4
+.byte 102,15,56,0,223
+ pxor 96(%rax),%xmm5
+.byte 102,15,56,0,231
+ pxor 112(%rax),%xmm6
+.byte 102,15,56,0,239
+ leaq 128(%rax),%rax
+.byte 102,15,56,0,247
+.Lenc_sbox:
+ pxor %xmm5,%xmm4
+ pxor %xmm0,%xmm1
+ pxor %xmm15,%xmm2
+ pxor %xmm1,%xmm5
+ pxor %xmm15,%xmm4
+
+ pxor %xmm2,%xmm5
+ pxor %xmm6,%xmm2
+ pxor %xmm4,%xmm6
+ pxor %xmm3,%xmm2
+ pxor %xmm4,%xmm3
+ pxor %xmm0,%xmm2
+
+ pxor %xmm6,%xmm1
+ pxor %xmm4,%xmm0
+ movdqa %xmm6,%xmm10
+ movdqa %xmm0,%xmm9
+ movdqa %xmm4,%xmm8
+ movdqa %xmm1,%xmm12
+ movdqa %xmm5,%xmm11
+
+ pxor %xmm3,%xmm10
+ pxor %xmm1,%xmm9
+ pxor %xmm2,%xmm8
+ movdqa %xmm10,%xmm13
+ pxor %xmm3,%xmm12
+ movdqa %xmm9,%xmm7
+ pxor %xmm15,%xmm11
+ movdqa %xmm10,%xmm14
+
+ por %xmm8,%xmm9
+ por %xmm11,%xmm10
+ pxor %xmm7,%xmm14
+ pand %xmm11,%xmm13
+ pxor %xmm8,%xmm11
+ pand %xmm8,%xmm7
+ pand %xmm11,%xmm14
+ movdqa %xmm2,%xmm11
+ pxor %xmm15,%xmm11
+ pand %xmm11,%xmm12
+ pxor %xmm12,%xmm10
+ pxor %xmm12,%xmm9
+ movdqa %xmm6,%xmm12
+ movdqa %xmm4,%xmm11
+ pxor %xmm0,%xmm12
+ pxor %xmm5,%xmm11
+ movdqa %xmm12,%xmm8
+ pand %xmm11,%xmm12
+ por %xmm11,%xmm8
+ pxor %xmm12,%xmm7
+ pxor %xmm14,%xmm10
+ pxor %xmm13,%xmm9
+ pxor %xmm14,%xmm8
+ movdqa %xmm1,%xmm11
+ pxor %xmm13,%xmm7
+ movdqa %xmm3,%xmm12
+ pxor %xmm13,%xmm8
+ movdqa %xmm0,%xmm13
+ pand %xmm2,%xmm11
+ movdqa %xmm6,%xmm14
+ pand %xmm15,%xmm12
+ pand %xmm4,%xmm13
+ por %xmm5,%xmm14
+ pxor %xmm11,%xmm10
+ pxor %xmm12,%xmm9
+ pxor %xmm13,%xmm8
+ pxor %xmm14,%xmm7
+
+
+
+
+
+ movdqa %xmm10,%xmm11
+ pand %xmm8,%xmm10
+ pxor %xmm9,%xmm11
+
+ movdqa %xmm7,%xmm13
+ movdqa %xmm11,%xmm14
+ pxor %xmm10,%xmm13
+ pand %xmm13,%xmm14
+
+ movdqa %xmm8,%xmm12
+ pxor %xmm9,%xmm14
+ pxor %xmm7,%xmm12
+
+ pxor %xmm9,%xmm10
+
+ pand %xmm10,%xmm12
+
+ movdqa %xmm13,%xmm9
+ pxor %xmm7,%xmm12
+
+ pxor %xmm12,%xmm9
+ pxor %xmm12,%xmm8
+
+ pand %xmm7,%xmm9
+
+ pxor %xmm9,%xmm13
+ pxor %xmm9,%xmm8
+
+ pand %xmm14,%xmm13
+
+ pxor %xmm11,%xmm13
+ movdqa %xmm5,%xmm11
+ movdqa %xmm4,%xmm7
+ movdqa %xmm14,%xmm9
+ pxor %xmm13,%xmm9
+ pand %xmm5,%xmm9
+ pxor %xmm4,%xmm5
+ pand %xmm14,%xmm4
+ pand %xmm13,%xmm5
+ pxor %xmm4,%xmm5
+ pxor %xmm9,%xmm4
+ pxor %xmm15,%xmm11
+ pxor %xmm2,%xmm7
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm15,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm2,%xmm15
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm2
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm15
+ pxor %xmm11,%xmm7
+ pxor %xmm2,%xmm15
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm2
+ pxor %xmm11,%xmm5
+ pxor %xmm11,%xmm15
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm2
+
+ movdqa %xmm6,%xmm11
+ movdqa %xmm0,%xmm7
+ pxor %xmm3,%xmm11
+ pxor %xmm1,%xmm7
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm3,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm1,%xmm3
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm1
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm3
+ pxor %xmm11,%xmm7
+ pxor %xmm1,%xmm3
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm1
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ pxor %xmm13,%xmm10
+ pand %xmm6,%xmm10
+ pxor %xmm0,%xmm6
+ pand %xmm14,%xmm0
+ pand %xmm13,%xmm6
+ pxor %xmm0,%xmm6
+ pxor %xmm10,%xmm0
+ pxor %xmm11,%xmm6
+ pxor %xmm11,%xmm3
+ pxor %xmm7,%xmm0
+ pxor %xmm7,%xmm1
+ pxor %xmm15,%xmm6
+ pxor %xmm5,%xmm0
+ pxor %xmm6,%xmm3
+ pxor %xmm15,%xmm5
+ pxor %xmm0,%xmm15
+
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ pxor %xmm2,%xmm1
+ pxor %xmm4,%xmm2
+ pxor %xmm4,%xmm3
+
+ pxor %xmm2,%xmm5
+ decl %r10d
+ jl .Lenc_done
+ pshufd $147,%xmm15,%xmm7
+ pshufd $147,%xmm0,%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $147,%xmm3,%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $147,%xmm5,%xmm10
+ pxor %xmm9,%xmm3
+ pshufd $147,%xmm2,%xmm11
+ pxor %xmm10,%xmm5
+ pshufd $147,%xmm6,%xmm12
+ pxor %xmm11,%xmm2
+ pshufd $147,%xmm1,%xmm13
+ pxor %xmm12,%xmm6
+ pshufd $147,%xmm4,%xmm14
+ pxor %xmm13,%xmm1
+ pxor %xmm14,%xmm4
+
+ pxor %xmm15,%xmm8
+ pxor %xmm4,%xmm7
+ pxor %xmm4,%xmm8
+ pshufd $78,%xmm15,%xmm15
+ pxor %xmm0,%xmm9
+ pshufd $78,%xmm0,%xmm0
+ pxor %xmm2,%xmm12
+ pxor %xmm7,%xmm15
+ pxor %xmm6,%xmm13
+ pxor %xmm8,%xmm0
+ pxor %xmm5,%xmm11
+ pshufd $78,%xmm2,%xmm7
+ pxor %xmm1,%xmm14
+ pshufd $78,%xmm6,%xmm8
+ pxor %xmm3,%xmm10
+ pshufd $78,%xmm5,%xmm2
+ pxor %xmm4,%xmm10
+ pshufd $78,%xmm4,%xmm6
+ pxor %xmm4,%xmm11
+ pshufd $78,%xmm1,%xmm5
+ pxor %xmm11,%xmm7
+ pshufd $78,%xmm3,%xmm1
+ pxor %xmm12,%xmm8
+ pxor %xmm10,%xmm2
+ pxor %xmm14,%xmm6
+ pxor %xmm13,%xmm5
+ movdqa %xmm7,%xmm3
+ pxor %xmm9,%xmm1
+ movdqa %xmm8,%xmm4
+ movdqa 48(%r11),%xmm7
+ jnz .Lenc_loop
+ movdqa 64(%r11),%xmm7
+ jmp .Lenc_loop
+.p2align 4
+.Lenc_done:
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm2,%xmm10
+ psrlq $1,%xmm2
+ pxor %xmm4,%xmm1
+ pxor %xmm6,%xmm2
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm2
+ pxor %xmm1,%xmm4
+ psllq $1,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $1,%xmm2
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm2
+ movdqa %xmm3,%xmm9
+ psrlq $1,%xmm3
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm5,%xmm3
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm3
+ pand %xmm7,%xmm15
+ pxor %xmm3,%xmm5
+ psllq $1,%xmm3
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm3
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm6,%xmm9
+ psrlq $2,%xmm6
+ movdqa %xmm2,%xmm10
+ psrlq $2,%xmm2
+ pxor %xmm4,%xmm6
+ pxor %xmm1,%xmm2
+ pand %xmm8,%xmm6
+ pand %xmm8,%xmm2
+ pxor %xmm6,%xmm4
+ psllq $2,%xmm6
+ pxor %xmm2,%xmm1
+ psllq $2,%xmm2
+ pxor %xmm9,%xmm6
+ pxor %xmm10,%xmm2
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm5,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm5
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm5,%xmm9
+ psrlq $4,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $4,%xmm3
+ pxor %xmm4,%xmm5
+ pxor %xmm1,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm4
+ psllq $4,%xmm5
+ pxor %xmm3,%xmm1
+ psllq $4,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm6,%xmm0
+ pxor %xmm2,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm6
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm2
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa (%rax),%xmm7
+ pxor %xmm7,%xmm3
+ pxor %xmm7,%xmm5
+ pxor %xmm7,%xmm2
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm1
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm15
+ pxor %xmm7,%xmm0
+ retq
+
+
+.def _bsaes_decrypt8; .scl 3; .type 32; .endef
+.p2align 6
+_bsaes_decrypt8:
+ leaq .LBS0(%rip),%r11
+
+ movdqa (%rax),%xmm8
+ leaq 16(%rax),%rax
+ movdqa -48(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+.byte 102,15,56,0,247
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $1,%xmm3
+ pxor %xmm6,%xmm5
+ pxor %xmm4,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm6
+ psllq $1,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $1,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm15
+ pxor %xmm1,%xmm2
+ psllq $1,%xmm1
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm4,%xmm9
+ psrlq $2,%xmm4
+ movdqa %xmm3,%xmm10
+ psrlq $2,%xmm3
+ pxor %xmm6,%xmm4
+ pxor %xmm5,%xmm3
+ pand %xmm8,%xmm4
+ pand %xmm8,%xmm3
+ pxor %xmm4,%xmm6
+ psllq $2,%xmm4
+ pxor %xmm3,%xmm5
+ psllq $2,%xmm3
+ pxor %xmm9,%xmm4
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm2,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm2
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm2,%xmm9
+ psrlq $4,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $4,%xmm1
+ pxor %xmm6,%xmm2
+ pxor %xmm5,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $4,%xmm2
+ pxor %xmm1,%xmm5
+ psllq $4,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm4
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ decl %r10d
+ jmp .Ldec_sbox
+.p2align 4
+.Ldec_loop:
+ pxor 0(%rax),%xmm15
+ pxor 16(%rax),%xmm0
+.byte 102,68,15,56,0,255
+ pxor 32(%rax),%xmm1
+.byte 102,15,56,0,199
+ pxor 48(%rax),%xmm2
+.byte 102,15,56,0,207
+ pxor 64(%rax),%xmm3
+.byte 102,15,56,0,215
+ pxor 80(%rax),%xmm4
+.byte 102,15,56,0,223
+ pxor 96(%rax),%xmm5
+.byte 102,15,56,0,231
+ pxor 112(%rax),%xmm6
+.byte 102,15,56,0,239
+ leaq 128(%rax),%rax
+.byte 102,15,56,0,247
+.Ldec_sbox:
+ pxor %xmm3,%xmm2
+
+ pxor %xmm6,%xmm3
+ pxor %xmm6,%xmm1
+ pxor %xmm3,%xmm5
+ pxor %xmm5,%xmm6
+ pxor %xmm6,%xmm0
+
+ pxor %xmm0,%xmm15
+ pxor %xmm4,%xmm1
+ pxor %xmm15,%xmm2
+ pxor %xmm15,%xmm4
+ pxor %xmm2,%xmm0
+ movdqa %xmm2,%xmm10
+ movdqa %xmm6,%xmm9
+ movdqa %xmm0,%xmm8
+ movdqa %xmm3,%xmm12
+ movdqa %xmm4,%xmm11
+
+ pxor %xmm15,%xmm10
+ pxor %xmm3,%xmm9
+ pxor %xmm5,%xmm8
+ movdqa %xmm10,%xmm13
+ pxor %xmm15,%xmm12
+ movdqa %xmm9,%xmm7
+ pxor %xmm1,%xmm11
+ movdqa %xmm10,%xmm14
+
+ por %xmm8,%xmm9
+ por %xmm11,%xmm10
+ pxor %xmm7,%xmm14
+ pand %xmm11,%xmm13
+ pxor %xmm8,%xmm11
+ pand %xmm8,%xmm7
+ pand %xmm11,%xmm14
+ movdqa %xmm5,%xmm11
+ pxor %xmm1,%xmm11
+ pand %xmm11,%xmm12
+ pxor %xmm12,%xmm10
+ pxor %xmm12,%xmm9
+ movdqa %xmm2,%xmm12
+ movdqa %xmm0,%xmm11
+ pxor %xmm6,%xmm12
+ pxor %xmm4,%xmm11
+ movdqa %xmm12,%xmm8
+ pand %xmm11,%xmm12
+ por %xmm11,%xmm8
+ pxor %xmm12,%xmm7
+ pxor %xmm14,%xmm10
+ pxor %xmm13,%xmm9
+ pxor %xmm14,%xmm8
+ movdqa %xmm3,%xmm11
+ pxor %xmm13,%xmm7
+ movdqa %xmm15,%xmm12
+ pxor %xmm13,%xmm8
+ movdqa %xmm6,%xmm13
+ pand %xmm5,%xmm11
+ movdqa %xmm2,%xmm14
+ pand %xmm1,%xmm12
+ pand %xmm0,%xmm13
+ por %xmm4,%xmm14
+ pxor %xmm11,%xmm10
+ pxor %xmm12,%xmm9
+ pxor %xmm13,%xmm8
+ pxor %xmm14,%xmm7
+
+
+
+
+
+ movdqa %xmm10,%xmm11
+ pand %xmm8,%xmm10
+ pxor %xmm9,%xmm11
+
+ movdqa %xmm7,%xmm13
+ movdqa %xmm11,%xmm14
+ pxor %xmm10,%xmm13
+ pand %xmm13,%xmm14
+
+ movdqa %xmm8,%xmm12
+ pxor %xmm9,%xmm14
+ pxor %xmm7,%xmm12
+
+ pxor %xmm9,%xmm10
+
+ pand %xmm10,%xmm12
+
+ movdqa %xmm13,%xmm9
+ pxor %xmm7,%xmm12
+
+ pxor %xmm12,%xmm9
+ pxor %xmm12,%xmm8
+
+ pand %xmm7,%xmm9
+
+ pxor %xmm9,%xmm13
+ pxor %xmm9,%xmm8
+
+ pand %xmm14,%xmm13
+
+ pxor %xmm11,%xmm13
+ movdqa %xmm4,%xmm11
+ movdqa %xmm0,%xmm7
+ movdqa %xmm14,%xmm9
+ pxor %xmm13,%xmm9
+ pand %xmm4,%xmm9
+ pxor %xmm0,%xmm4
+ pand %xmm14,%xmm0
+ pand %xmm13,%xmm4
+ pxor %xmm0,%xmm4
+ pxor %xmm9,%xmm0
+ pxor %xmm1,%xmm11
+ pxor %xmm5,%xmm7
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm1,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm5,%xmm1
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm5
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm1
+ pxor %xmm11,%xmm7
+ pxor %xmm5,%xmm1
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm5
+ pxor %xmm11,%xmm4
+ pxor %xmm11,%xmm1
+ pxor %xmm7,%xmm0
+ pxor %xmm7,%xmm5
+
+ movdqa %xmm2,%xmm11
+ movdqa %xmm6,%xmm7
+ pxor %xmm15,%xmm11
+ pxor %xmm3,%xmm7
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm15,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm3,%xmm15
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm3
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm15
+ pxor %xmm11,%xmm7
+ pxor %xmm3,%xmm15
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm3
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ pxor %xmm13,%xmm10
+ pand %xmm2,%xmm10
+ pxor %xmm6,%xmm2
+ pand %xmm14,%xmm6
+ pand %xmm13,%xmm2
+ pxor %xmm6,%xmm2
+ pxor %xmm10,%xmm6
+ pxor %xmm11,%xmm2
+ pxor %xmm11,%xmm15
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm3
+ pxor %xmm6,%xmm0
+ pxor %xmm4,%xmm5
+
+ pxor %xmm0,%xmm3
+ pxor %xmm6,%xmm1
+ pxor %xmm6,%xmm4
+ pxor %xmm1,%xmm3
+ pxor %xmm15,%xmm6
+ pxor %xmm4,%xmm3
+ pxor %xmm5,%xmm2
+ pxor %xmm0,%xmm5
+ pxor %xmm3,%xmm2
+
+ pxor %xmm15,%xmm3
+ pxor %xmm2,%xmm6
+ decl %r10d
+ jl .Ldec_done
+
+ pshufd $78,%xmm15,%xmm7
+ pshufd $78,%xmm2,%xmm13
+ pxor %xmm15,%xmm7
+ pshufd $78,%xmm4,%xmm14
+ pxor %xmm2,%xmm13
+ pshufd $78,%xmm0,%xmm8
+ pxor %xmm4,%xmm14
+ pshufd $78,%xmm5,%xmm9
+ pxor %xmm0,%xmm8
+ pshufd $78,%xmm3,%xmm10
+ pxor %xmm5,%xmm9
+ pxor %xmm13,%xmm15
+ pxor %xmm13,%xmm0
+ pshufd $78,%xmm1,%xmm11
+ pxor %xmm3,%xmm10
+ pxor %xmm7,%xmm5
+ pxor %xmm8,%xmm3
+ pshufd $78,%xmm6,%xmm12
+ pxor %xmm1,%xmm11
+ pxor %xmm14,%xmm0
+ pxor %xmm9,%xmm1
+ pxor %xmm6,%xmm12
+
+ pxor %xmm14,%xmm5
+ pxor %xmm13,%xmm3
+ pxor %xmm13,%xmm1
+ pxor %xmm10,%xmm6
+ pxor %xmm11,%xmm2
+ pxor %xmm14,%xmm1
+ pxor %xmm14,%xmm6
+ pxor %xmm12,%xmm4
+ pshufd $147,%xmm15,%xmm7
+ pshufd $147,%xmm0,%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $147,%xmm5,%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $147,%xmm3,%xmm10
+ pxor %xmm9,%xmm5
+ pshufd $147,%xmm1,%xmm11
+ pxor %xmm10,%xmm3
+ pshufd $147,%xmm6,%xmm12
+ pxor %xmm11,%xmm1
+ pshufd $147,%xmm2,%xmm13
+ pxor %xmm12,%xmm6
+ pshufd $147,%xmm4,%xmm14
+ pxor %xmm13,%xmm2
+ pxor %xmm14,%xmm4
+
+ pxor %xmm15,%xmm8
+ pxor %xmm4,%xmm7
+ pxor %xmm4,%xmm8
+ pshufd $78,%xmm15,%xmm15
+ pxor %xmm0,%xmm9
+ pshufd $78,%xmm0,%xmm0
+ pxor %xmm1,%xmm12
+ pxor %xmm7,%xmm15
+ pxor %xmm6,%xmm13
+ pxor %xmm8,%xmm0
+ pxor %xmm3,%xmm11
+ pshufd $78,%xmm1,%xmm7
+ pxor %xmm2,%xmm14
+ pshufd $78,%xmm6,%xmm8
+ pxor %xmm5,%xmm10
+ pshufd $78,%xmm3,%xmm1
+ pxor %xmm4,%xmm10
+ pshufd $78,%xmm4,%xmm6
+ pxor %xmm4,%xmm11
+ pshufd $78,%xmm2,%xmm3
+ pxor %xmm11,%xmm7
+ pshufd $78,%xmm5,%xmm2
+ pxor %xmm12,%xmm8
+ pxor %xmm1,%xmm10
+ pxor %xmm14,%xmm6
+ pxor %xmm3,%xmm13
+ movdqa %xmm7,%xmm3
+ pxor %xmm9,%xmm2
+ movdqa %xmm13,%xmm5
+ movdqa %xmm8,%xmm4
+ movdqa %xmm2,%xmm1
+ movdqa %xmm10,%xmm2
+ movdqa -16(%r11),%xmm7
+ jnz .Ldec_loop
+ movdqa -32(%r11),%xmm7
+ jmp .Ldec_loop
+.p2align 4
+.Ldec_done:
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm2,%xmm9
+ psrlq $1,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $1,%xmm1
+ pxor %xmm4,%xmm2
+ pxor %xmm6,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm4
+ psllq $1,%xmm2
+ pxor %xmm1,%xmm6
+ psllq $1,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm3,%xmm5
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm15
+ pxor %xmm5,%xmm3
+ psllq $1,%xmm5
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm6,%xmm9
+ psrlq $2,%xmm6
+ movdqa %xmm1,%xmm10
+ psrlq $2,%xmm1
+ pxor %xmm4,%xmm6
+ pxor %xmm2,%xmm1
+ pand %xmm8,%xmm6
+ pand %xmm8,%xmm1
+ pxor %xmm6,%xmm4
+ psllq $2,%xmm6
+ pxor %xmm1,%xmm2
+ psllq $2,%xmm1
+ pxor %xmm9,%xmm6
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm3,%xmm0
+ pxor %xmm5,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm3
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm5
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm3,%xmm9
+ psrlq $4,%xmm3
+ movdqa %xmm5,%xmm10
+ psrlq $4,%xmm5
+ pxor %xmm4,%xmm3
+ pxor %xmm2,%xmm5
+ pand %xmm7,%xmm3
+ pand %xmm7,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $4,%xmm3
+ pxor %xmm5,%xmm2
+ psllq $4,%xmm5
+ pxor %xmm9,%xmm3
+ pxor %xmm10,%xmm5
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm6,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm6
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa (%rax),%xmm7
+ pxor %xmm7,%xmm5
+ pxor %xmm7,%xmm3
+ pxor %xmm7,%xmm1
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm2
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm15
+ pxor %xmm7,%xmm0
+ retq
+
+.def _bsaes_key_convert; .scl 3; .type 32; .endef
+.p2align 4
+_bsaes_key_convert:
+ leaq .Lmasks(%rip),%r11
+ movdqu (%rcx),%xmm7
+ leaq 16(%rcx),%rcx
+ movdqa 0(%r11),%xmm0
+ movdqa 16(%r11),%xmm1
+ movdqa 32(%r11),%xmm2
+ movdqa 48(%r11),%xmm3
+ movdqa 64(%r11),%xmm4
+ pcmpeqd %xmm5,%xmm5
+
+ movdqu (%rcx),%xmm6
+ movdqa %xmm7,(%rax)
+ leaq 16(%rax),%rax
+ decl %r10d
+ jmp .Lkey_loop
+.p2align 4
+.Lkey_loop:
+.byte 102,15,56,0,244
+
+ movdqa %xmm0,%xmm8
+ movdqa %xmm1,%xmm9
+
+ pand %xmm6,%xmm8
+ pand %xmm6,%xmm9
+ movdqa %xmm2,%xmm10
+ pcmpeqb %xmm0,%xmm8
+ psllq $4,%xmm0
+ movdqa %xmm3,%xmm11
+ pcmpeqb %xmm1,%xmm9
+ psllq $4,%xmm1
+
+ pand %xmm6,%xmm10
+ pand %xmm6,%xmm11
+ movdqa %xmm0,%xmm12
+ pcmpeqb %xmm2,%xmm10
+ psllq $4,%xmm2
+ movdqa %xmm1,%xmm13
+ pcmpeqb %xmm3,%xmm11
+ psllq $4,%xmm3
+
+ movdqa %xmm2,%xmm14
+ movdqa %xmm3,%xmm15
+ pxor %xmm5,%xmm8
+ pxor %xmm5,%xmm9
+
+ pand %xmm6,%xmm12
+ pand %xmm6,%xmm13
+ movdqa %xmm8,0(%rax)
+ pcmpeqb %xmm0,%xmm12
+ psrlq $4,%xmm0
+ movdqa %xmm9,16(%rax)
+ pcmpeqb %xmm1,%xmm13
+ psrlq $4,%xmm1
+ leaq 16(%rcx),%rcx
+
+ pand %xmm6,%xmm14
+ pand %xmm6,%xmm15
+ movdqa %xmm10,32(%rax)
+ pcmpeqb %xmm2,%xmm14
+ psrlq $4,%xmm2
+ movdqa %xmm11,48(%rax)
+ pcmpeqb %xmm3,%xmm15
+ psrlq $4,%xmm3
+ movdqu (%rcx),%xmm6
+
+ pxor %xmm5,%xmm13
+ pxor %xmm5,%xmm14
+ movdqa %xmm12,64(%rax)
+ movdqa %xmm13,80(%rax)
+ movdqa %xmm14,96(%rax)
+ movdqa %xmm15,112(%rax)
+ leaq 128(%rax),%rax
+ decl %r10d
+ jnz .Lkey_loop
+
+ movdqa 80(%r11),%xmm7
+
+ retq
+
+
+.globl bsaes_cbc_encrypt
+.def bsaes_cbc_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+bsaes_cbc_encrypt:
+ movl 48(%rsp),%r11d
+ cmpl $0,%r11d
+ jne asm_AES_cbc_encrypt
+ cmpq $128,%r8
+ jb asm_AES_cbc_encrypt
+
+ movq %rsp,%rax
+.Lcbc_dec_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq 160(%rsp),%r10
+ leaq -160(%rsp),%rsp
+ movaps %xmm6,64(%rsp)
+ movaps %xmm7,80(%rsp)
+ movaps %xmm8,96(%rsp)
+ movaps %xmm9,112(%rsp)
+ movaps %xmm10,128(%rsp)
+ movaps %xmm11,144(%rsp)
+ movaps %xmm12,160(%rsp)
+ movaps %xmm13,176(%rsp)
+ movaps %xmm14,192(%rsp)
+ movaps %xmm15,208(%rsp)
+.Lcbc_dec_body:
+ movq %rsp,%rbp
+ movl 240(%r9),%eax
+ movq %rcx,%r12
+ movq %rdx,%r13
+ movq %r8,%r14
+ movq %r9,%r15
+ movq %r10,%rbx
+ shrq $4,%r14
+
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor (%rsp),%xmm7
+ movdqa %xmm6,(%rax)
+ movdqa %xmm7,(%rsp)
+
+ movdqu (%rbx),%xmm14
+ subq $8,%r14
+.Lcbc_dec_loop:
+ movdqu 0(%r12),%xmm15
+ movdqu 16(%r12),%xmm0
+ movdqu 32(%r12),%xmm1
+ movdqu 48(%r12),%xmm2
+ movdqu 64(%r12),%xmm3
+ movdqu 80(%r12),%xmm4
+ movq %rsp,%rax
+ movdqu 96(%r12),%xmm5
+ movl %edx,%r10d
+ movdqu 112(%r12),%xmm6
+ movdqa %xmm14,32(%rbp)
+
+ call _bsaes_decrypt8
+
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm6
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm2
+ movdqu 112(%r12),%xmm14
+ pxor %xmm13,%xmm4
+ movdqu %xmm15,0(%r13)
+ leaq 128(%r12),%r12
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+ subq $8,%r14
+ jnc .Lcbc_dec_loop
+
+ addq $8,%r14
+ jz .Lcbc_dec_done
+
+ movdqu 0(%r12),%xmm15
+ movq %rsp,%rax
+ movl %edx,%r10d
+ cmpq $2,%r14
+ jb .Lcbc_dec_one
+ movdqu 16(%r12),%xmm0
+ je .Lcbc_dec_two
+ movdqu 32(%r12),%xmm1
+ cmpq $4,%r14
+ jb .Lcbc_dec_three
+ movdqu 48(%r12),%xmm2
+ je .Lcbc_dec_four
+ movdqu 64(%r12),%xmm3
+ cmpq $6,%r14
+ jb .Lcbc_dec_five
+ movdqu 80(%r12),%xmm4
+ je .Lcbc_dec_six
+ movdqu 96(%r12),%xmm5
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm6
+ movdqu 96(%r12),%xmm14
+ pxor %xmm12,%xmm2
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ jmp .Lcbc_dec_done
+.p2align 4
+.Lcbc_dec_six:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm14
+ pxor %xmm11,%xmm6
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ jmp .Lcbc_dec_done
+.p2align 4
+.Lcbc_dec_five:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm14
+ pxor %xmm10,%xmm1
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ jmp .Lcbc_dec_done
+.p2align 4
+.Lcbc_dec_four:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm14
+ pxor %xmm9,%xmm3
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ jmp .Lcbc_dec_done
+.p2align 4
+.Lcbc_dec_three:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm14
+ pxor %xmm8,%xmm5
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ jmp .Lcbc_dec_done
+.p2align 4
+.Lcbc_dec_two:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm14
+ pxor %xmm7,%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ jmp .Lcbc_dec_done
+.p2align 4
+.Lcbc_dec_one:
+ leaq (%r12),%rcx
+ leaq 32(%rbp),%rdx
+ leaq (%r15),%r8
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm14
+ movdqu %xmm14,(%r13)
+ movdqa %xmm15,%xmm14
+
+.Lcbc_dec_done:
+ movdqu %xmm14,(%rbx)
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+.Lcbc_dec_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lcbc_dec_bzero
+
+ leaq (%rbp),%rsp
+ movaps 64(%rbp),%xmm6
+ movaps 80(%rbp),%xmm7
+ movaps 96(%rbp),%xmm8
+ movaps 112(%rbp),%xmm9
+ movaps 128(%rbp),%xmm10
+ movaps 144(%rbp),%xmm11
+ movaps 160(%rbp),%xmm12
+ movaps 176(%rbp),%xmm13
+ movaps 192(%rbp),%xmm14
+ movaps 208(%rbp),%xmm15
+ leaq 160(%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+.Lcbc_dec_epilogue:
+ retq
+
+
+.globl bsaes_ctr32_encrypt_blocks
+.def bsaes_ctr32_encrypt_blocks; .scl 2; .type 32; .endef
+.p2align 4
+bsaes_ctr32_encrypt_blocks:
+ movq %rsp,%rax
+.Lctr_enc_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq 160(%rsp),%r10
+ leaq -160(%rsp),%rsp
+ movaps %xmm6,64(%rsp)
+ movaps %xmm7,80(%rsp)
+ movaps %xmm8,96(%rsp)
+ movaps %xmm9,112(%rsp)
+ movaps %xmm10,128(%rsp)
+ movaps %xmm11,144(%rsp)
+ movaps %xmm12,160(%rsp)
+ movaps %xmm13,176(%rsp)
+ movaps %xmm14,192(%rsp)
+ movaps %xmm15,208(%rsp)
+.Lctr_enc_body:
+ movq %rsp,%rbp
+ movdqu (%r10),%xmm0
+ movl 240(%r9),%eax
+ movq %rcx,%r12
+ movq %rdx,%r13
+ movq %r8,%r14
+ movq %r9,%r15
+ movdqa %xmm0,32(%rbp)
+ cmpq $8,%r8
+ jb .Lctr_enc_short
+
+ movl %eax,%ebx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %ebx,%r10d
+ call _bsaes_key_convert
+ pxor %xmm6,%xmm7
+ movdqa %xmm7,(%rax)
+
+ movdqa (%rsp),%xmm8
+ leaq .LADD1(%rip),%r11
+ movdqa 32(%rbp),%xmm15
+ movdqa -32(%r11),%xmm7
+.byte 102,68,15,56,0,199
+.byte 102,68,15,56,0,255
+ movdqa %xmm8,(%rsp)
+ jmp .Lctr_enc_loop
+.p2align 4
+.Lctr_enc_loop:
+ movdqa %xmm15,32(%rbp)
+ movdqa %xmm15,%xmm0
+ movdqa %xmm15,%xmm1
+ paddd 0(%r11),%xmm0
+ movdqa %xmm15,%xmm2
+ paddd 16(%r11),%xmm1
+ movdqa %xmm15,%xmm3
+ paddd 32(%r11),%xmm2
+ movdqa %xmm15,%xmm4
+ paddd 48(%r11),%xmm3
+ movdqa %xmm15,%xmm5
+ paddd 64(%r11),%xmm4
+ movdqa %xmm15,%xmm6
+ paddd 80(%r11),%xmm5
+ paddd 96(%r11),%xmm6
+
+
+
+ movdqa (%rsp),%xmm8
+ leaq 16(%rsp),%rax
+ movdqa -16(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+ leaq .LBS0(%rip),%r11
+.byte 102,15,56,0,247
+ movl %ebx,%r10d
+
+ call _bsaes_encrypt8_bitslice
+
+ subq $8,%r14
+ jc .Lctr_enc_loop_done
+
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ movdqu 32(%r12),%xmm9
+ movdqu 48(%r12),%xmm10
+ movdqu 64(%r12),%xmm11
+ movdqu 80(%r12),%xmm12
+ movdqu 96(%r12),%xmm13
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ pxor %xmm15,%xmm7
+ movdqa 32(%rbp),%xmm15
+ pxor %xmm8,%xmm0
+ movdqu %xmm7,0(%r13)
+ pxor %xmm9,%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor %xmm10,%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor %xmm11,%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor %xmm12,%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor %xmm13,%xmm1
+ movdqu %xmm6,80(%r13)
+ pxor %xmm14,%xmm4
+ movdqu %xmm1,96(%r13)
+ leaq .LADD1(%rip),%r11
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+ paddd 112(%r11),%xmm15
+ jnz .Lctr_enc_loop
+
+ jmp .Lctr_enc_done
+.p2align 4
+.Lctr_enc_loop_done:
+ addq $8,%r14
+ movdqu 0(%r12),%xmm7
+ pxor %xmm7,%xmm15
+ movdqu %xmm15,0(%r13)
+ cmpq $2,%r14
+ jb .Lctr_enc_done
+ movdqu 16(%r12),%xmm8
+ pxor %xmm8,%xmm0
+ movdqu %xmm0,16(%r13)
+ je .Lctr_enc_done
+ movdqu 32(%r12),%xmm9
+ pxor %xmm9,%xmm3
+ movdqu %xmm3,32(%r13)
+ cmpq $4,%r14
+ jb .Lctr_enc_done
+ movdqu 48(%r12),%xmm10
+ pxor %xmm10,%xmm5
+ movdqu %xmm5,48(%r13)
+ je .Lctr_enc_done
+ movdqu 64(%r12),%xmm11
+ pxor %xmm11,%xmm2
+ movdqu %xmm2,64(%r13)
+ cmpq $6,%r14
+ jb .Lctr_enc_done
+ movdqu 80(%r12),%xmm12
+ pxor %xmm12,%xmm6
+ movdqu %xmm6,80(%r13)
+ je .Lctr_enc_done
+ movdqu 96(%r12),%xmm13
+ pxor %xmm13,%xmm1
+ movdqu %xmm1,96(%r13)
+ jmp .Lctr_enc_done
+
+.p2align 4
+.Lctr_enc_short:
+ leaq 32(%rbp),%rcx
+ leaq 48(%rbp),%rdx
+ leaq (%r15),%r8
+ call asm_AES_encrypt
+ movdqu (%r12),%xmm0
+ leaq 16(%r12),%r12
+ movl 44(%rbp),%eax
+ bswapl %eax
+ pxor 48(%rbp),%xmm0
+ incl %eax
+ movdqu %xmm0,(%r13)
+ bswapl %eax
+ leaq 16(%r13),%r13
+ movl %eax,44(%rsp)
+ decq %r14
+ jnz .Lctr_enc_short
+
+.Lctr_enc_done:
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+.Lctr_enc_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lctr_enc_bzero
+
+ leaq (%rbp),%rsp
+ movaps 64(%rbp),%xmm6
+ movaps 80(%rbp),%xmm7
+ movaps 96(%rbp),%xmm8
+ movaps 112(%rbp),%xmm9
+ movaps 128(%rbp),%xmm10
+ movaps 144(%rbp),%xmm11
+ movaps 160(%rbp),%xmm12
+ movaps 176(%rbp),%xmm13
+ movaps 192(%rbp),%xmm14
+ movaps 208(%rbp),%xmm15
+ leaq 160(%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+.Lctr_enc_epilogue:
+ retq
+
+.globl bsaes_xts_encrypt
+.def bsaes_xts_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+bsaes_xts_encrypt:
+ movq %rsp,%rax
+.Lxts_enc_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq 160(%rsp),%r10
+ movq 168(%rsp),%r11
+ leaq -160(%rsp),%rsp
+ movaps %xmm6,64(%rsp)
+ movaps %xmm7,80(%rsp)
+ movaps %xmm8,96(%rsp)
+ movaps %xmm9,112(%rsp)
+ movaps %xmm10,128(%rsp)
+ movaps %xmm11,144(%rsp)
+ movaps %xmm12,160(%rsp)
+ movaps %xmm13,176(%rsp)
+ movaps %xmm14,192(%rsp)
+ movaps %xmm15,208(%rsp)
+.Lxts_enc_body:
+ movq %rsp,%rbp
+ movq %rcx,%r12
+ movq %rdx,%r13
+ movq %r8,%r14
+ movq %r9,%r15
+
+ leaq (%r11),%rcx
+ leaq 32(%rbp),%rdx
+ leaq (%r10),%r8
+ call asm_AES_encrypt
+
+ movl 240(%r15),%eax
+ movq %r14,%rbx
+
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor %xmm6,%xmm7
+ movdqa %xmm7,(%rax)
+
+ andq $-16,%r14
+ subq $128,%rsp
+ movdqa 32(%rbp),%xmm6
+
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+
+ subq $128,%r14
+ jc .Lxts_enc_short
+ jmp .Lxts_enc_loop
+
+.p2align 4
+.Lxts_enc_loop:
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ movdqa %xmm6,112(%rsp)
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ pxor %xmm14,%xmm6
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor 96(%rsp),%xmm1
+ movdqu %xmm6,80(%r13)
+ pxor 112(%rsp),%xmm4
+ movdqu %xmm1,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+
+ subq $128,%r14
+ jnc .Lxts_enc_loop
+
+.Lxts_enc_short:
+ addq $128,%r14
+ jz .Lxts_enc_done
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ cmpq $16,%r14
+ je .Lxts_enc_1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ cmpq $32,%r14
+ je .Lxts_enc_2
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ cmpq $48,%r14
+ je .Lxts_enc_3
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ cmpq $64,%r14
+ je .Lxts_enc_4
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ cmpq $80,%r14
+ je .Lxts_enc_5
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ cmpq $96,%r14
+ je .Lxts_enc_6
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqa %xmm6,112(%rsp)
+ leaq 112(%r12),%r12
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor 96(%rsp),%xmm1
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm1,96(%r13)
+ leaq 112(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.p2align 4
+.Lxts_enc_6:
+ pxor %xmm11,%xmm3
+ leaq 96(%r12),%r12
+ pxor %xmm12,%xmm4
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ movdqu %xmm6,80(%r13)
+ leaq 96(%r13),%r13
+
+ movdqa 96(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.p2align 4
+.Lxts_enc_5:
+ pxor %xmm10,%xmm2
+ leaq 80(%r12),%r12
+ pxor %xmm11,%xmm3
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ movdqu %xmm2,64(%r13)
+ leaq 80(%r13),%r13
+
+ movdqa 80(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.p2align 4
+.Lxts_enc_4:
+ pxor %xmm9,%xmm1
+ leaq 64(%r12),%r12
+ pxor %xmm10,%xmm2
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ movdqu %xmm5,48(%r13)
+ leaq 64(%r13),%r13
+
+ movdqa 64(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.p2align 4
+.Lxts_enc_3:
+ pxor %xmm8,%xmm0
+ leaq 48(%r12),%r12
+ pxor %xmm9,%xmm1
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm3,32(%r13)
+ leaq 48(%r13),%r13
+
+ movdqa 48(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.p2align 4
+.Lxts_enc_2:
+ pxor %xmm7,%xmm15
+ leaq 32(%r12),%r12
+ pxor %xmm8,%xmm0
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ leaq 32(%r13),%r13
+
+ movdqa 32(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.p2align 4
+.Lxts_enc_1:
+ pxor %xmm15,%xmm7
+ leaq 16(%r12),%r12
+ movdqa %xmm7,32(%rbp)
+ leaq 32(%rbp),%rcx
+ leaq 32(%rbp),%rdx
+ leaq (%r15),%r8
+ call asm_AES_encrypt
+ pxor 32(%rbp),%xmm15
+
+
+
+
+
+ movdqu %xmm15,0(%r13)
+ leaq 16(%r13),%r13
+
+ movdqa 16(%rsp),%xmm6
+
+.Lxts_enc_done:
+ andl $15,%ebx
+ jz .Lxts_enc_ret
+ movq %r13,%rdx
+
+.Lxts_enc_steal:
+ movzbl (%r12),%eax
+ movzbl -16(%rdx),%ecx
+ leaq 1(%r12),%r12
+ movb %al,-16(%rdx)
+ movb %cl,0(%rdx)
+ leaq 1(%rdx),%rdx
+ subl $1,%ebx
+ jnz .Lxts_enc_steal
+
+ movdqu -16(%r13),%xmm15
+ leaq 32(%rbp),%rcx
+ pxor %xmm6,%xmm15
+ leaq 32(%rbp),%rdx
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%r8
+ call asm_AES_encrypt
+ pxor 32(%rbp),%xmm6
+ movdqu %xmm6,-16(%r13)
+
+.Lxts_enc_ret:
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+.Lxts_enc_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lxts_enc_bzero
+
+ leaq (%rbp),%rsp
+ movaps 64(%rbp),%xmm6
+ movaps 80(%rbp),%xmm7
+ movaps 96(%rbp),%xmm8
+ movaps 112(%rbp),%xmm9
+ movaps 128(%rbp),%xmm10
+ movaps 144(%rbp),%xmm11
+ movaps 160(%rbp),%xmm12
+ movaps 176(%rbp),%xmm13
+ movaps 192(%rbp),%xmm14
+ movaps 208(%rbp),%xmm15
+ leaq 160(%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+.Lxts_enc_epilogue:
+ retq
+
+
+.globl bsaes_xts_decrypt
+.def bsaes_xts_decrypt; .scl 2; .type 32; .endef
+.p2align 4
+bsaes_xts_decrypt:
+ movq %rsp,%rax
+.Lxts_dec_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq 160(%rsp),%r10
+ movq 168(%rsp),%r11
+ leaq -160(%rsp),%rsp
+ movaps %xmm6,64(%rsp)
+ movaps %xmm7,80(%rsp)
+ movaps %xmm8,96(%rsp)
+ movaps %xmm9,112(%rsp)
+ movaps %xmm10,128(%rsp)
+ movaps %xmm11,144(%rsp)
+ movaps %xmm12,160(%rsp)
+ movaps %xmm13,176(%rsp)
+ movaps %xmm14,192(%rsp)
+ movaps %xmm15,208(%rsp)
+.Lxts_dec_body:
+ movq %rsp,%rbp
+ movq %rcx,%r12
+ movq %rdx,%r13
+ movq %r8,%r14
+ movq %r9,%r15
+
+ leaq (%r11),%rcx
+ leaq 32(%rbp),%rdx
+ leaq (%r10),%r8
+ call asm_AES_encrypt
+
+ movl 240(%r15),%eax
+ movq %r14,%rbx
+
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor (%rsp),%xmm7
+ movdqa %xmm6,(%rax)
+ movdqa %xmm7,(%rsp)
+
+ xorl %eax,%eax
+ andq $-16,%r14
+ testl $15,%ebx
+ setnz %al
+ shlq $4,%rax
+ subq %rax,%r14
+
+ subq $128,%rsp
+ movdqa 32(%rbp),%xmm6
+
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+
+ subq $128,%r14
+ jc .Lxts_dec_short
+ jmp .Lxts_dec_loop
+
+.p2align 4
+.Lxts_dec_loop:
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ movdqa %xmm6,112(%rsp)
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ pxor %xmm14,%xmm6
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ pxor 96(%rsp),%xmm2
+ movdqu %xmm6,80(%r13)
+ pxor 112(%rsp),%xmm4
+ movdqu %xmm2,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+
+ subq $128,%r14
+ jnc .Lxts_dec_loop
+
+.Lxts_dec_short:
+ addq $128,%r14
+ jz .Lxts_dec_done
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ cmpq $16,%r14
+ je .Lxts_dec_1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ cmpq $32,%r14
+ je .Lxts_dec_2
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ cmpq $48,%r14
+ je .Lxts_dec_3
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ cmpq $64,%r14
+ je .Lxts_dec_4
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ cmpq $80,%r14
+ je .Lxts_dec_5
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ cmpq $96,%r14
+ je .Lxts_dec_6
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqa %xmm6,112(%rsp)
+ leaq 112(%r12),%r12
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ pxor 96(%rsp),%xmm2
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ leaq 112(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.p2align 4
+.Lxts_dec_6:
+ pxor %xmm11,%xmm3
+ leaq 96(%r12),%r12
+ pxor %xmm12,%xmm4
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ leaq 96(%r13),%r13
+
+ movdqa 96(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.p2align 4
+.Lxts_dec_5:
+ pxor %xmm10,%xmm2
+ leaq 80(%r12),%r12
+ pxor %xmm11,%xmm3
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ leaq 80(%r13),%r13
+
+ movdqa 80(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.p2align 4
+.Lxts_dec_4:
+ pxor %xmm9,%xmm1
+ leaq 64(%r12),%r12
+ pxor %xmm10,%xmm2
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ leaq 64(%r13),%r13
+
+ movdqa 64(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.p2align 4
+.Lxts_dec_3:
+ pxor %xmm8,%xmm0
+ leaq 48(%r12),%r12
+ pxor %xmm9,%xmm1
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ leaq 48(%r13),%r13
+
+ movdqa 48(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.p2align 4
+.Lxts_dec_2:
+ pxor %xmm7,%xmm15
+ leaq 32(%r12),%r12
+ pxor %xmm8,%xmm0
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ leaq 32(%r13),%r13
+
+ movdqa 32(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.p2align 4
+.Lxts_dec_1:
+ pxor %xmm15,%xmm7
+ leaq 16(%r12),%r12
+ movdqa %xmm7,32(%rbp)
+ leaq 32(%rbp),%rcx
+ leaq 32(%rbp),%rdx
+ leaq (%r15),%r8
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm15
+
+
+
+
+
+ movdqu %xmm15,0(%r13)
+ leaq 16(%r13),%r13
+
+ movdqa 16(%rsp),%xmm6
+
+.Lxts_dec_done:
+ andl $15,%ebx
+ jz .Lxts_dec_ret
+
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ movdqa %xmm6,%xmm5
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ movdqu (%r12),%xmm15
+ pxor %xmm13,%xmm6
+
+ leaq 32(%rbp),%rcx
+ pxor %xmm6,%xmm15
+ leaq 32(%rbp),%rdx
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%r8
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm6
+ movq %r13,%rdx
+ movdqu %xmm6,(%r13)
+
+.Lxts_dec_steal:
+ movzbl 16(%r12),%eax
+ movzbl (%rdx),%ecx
+ leaq 1(%r12),%r12
+ movb %al,(%rdx)
+ movb %cl,16(%rdx)
+ leaq 1(%rdx),%rdx
+ subl $1,%ebx
+ jnz .Lxts_dec_steal
+
+ movdqu (%r13),%xmm15
+ leaq 32(%rbp),%rcx
+ pxor %xmm5,%xmm15
+ leaq 32(%rbp),%rdx
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%r8
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm5
+ movdqu %xmm5,(%r13)
+
+.Lxts_dec_ret:
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+.Lxts_dec_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lxts_dec_bzero
+
+ leaq (%rbp),%rsp
+ movaps 64(%rbp),%xmm6
+ movaps 80(%rbp),%xmm7
+ movaps 96(%rbp),%xmm8
+ movaps 112(%rbp),%xmm9
+ movaps 128(%rbp),%xmm10
+ movaps 144(%rbp),%xmm11
+ movaps 160(%rbp),%xmm12
+ movaps 176(%rbp),%xmm13
+ movaps 192(%rbp),%xmm14
+ movaps 208(%rbp),%xmm15
+ leaq 160(%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+.Lxts_dec_epilogue:
+ retq
+
+
+.p2align 6
+_bsaes_const:
+.LM0ISR:
+.quad 0x0a0e0206070b0f03, 0x0004080c0d010509
+.LISRM0:
+.quad 0x01040b0e0205080f, 0x0306090c00070a0d
+.LISR:
+.quad 0x0504070602010003, 0x0f0e0d0c080b0a09
+.LBS0:
+.quad 0x5555555555555555, 0x5555555555555555
+.LBS1:
+.quad 0x3333333333333333, 0x3333333333333333
+.LBS2:
+.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+.LSR:
+.quad 0x0504070600030201, 0x0f0e0d0c0a09080b
+.LSRM0:
+.quad 0x0304090e00050a0f, 0x01060b0c0207080d
+.LM0SR:
+.quad 0x0a0e02060f03070b, 0x0004080c05090d01
+.LSWPUP:
+.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908
+.LSWPUPM0SR:
+.quad 0x0a0d02060c03070b, 0x0004080f05090e01
+.LADD1:
+.quad 0x0000000000000000, 0x0000000100000000
+.LADD2:
+.quad 0x0000000000000000, 0x0000000200000000
+.LADD3:
+.quad 0x0000000000000000, 0x0000000300000000
+.LADD4:
+.quad 0x0000000000000000, 0x0000000400000000
+.LADD5:
+.quad 0x0000000000000000, 0x0000000500000000
+.LADD6:
+.quad 0x0000000000000000, 0x0000000600000000
+.LADD7:
+.quad 0x0000000000000000, 0x0000000700000000
+.LADD8:
+.quad 0x0000000000000000, 0x0000000800000000
+.Lxts_magic:
+.long 0x87,0,1,0
+.Lmasks:
+.quad 0x0101010101010101, 0x0101010101010101
+.quad 0x0202020202020202, 0x0202020202020202
+.quad 0x0404040404040404, 0x0404040404040404
+.quad 0x0808080808080808, 0x0808080808080808
+.LM0:
+.quad 0x02060a0e03070b0f, 0x0004080c0105090d
+.L63:
+.quad 0x6363636363636363, 0x6363636363636363
+.byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44,32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32,65,110,100,121,32,80,111,108,121,97,107,111,118,0
+.p2align 6
+
+
+.def se_handler; .scl 3; .type 32; .endef
+.p2align 4
+se_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+
+ movq 8(%r9),%rsi
+ movq 56(%r9),%r11
+
+ movl 0(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jb .Lin_prologue
+
+ movq 152(%r8),%rax
+
+ movl 4(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jae .Lin_prologue
+
+ movq 160(%r8),%rax
+
+ leaq 64(%rax),%rsi
+ leaq 512(%r8),%rdi
+ movl $20,%ecx
+.long 0xa548f3fc
+ leaq 160(%rax),%rax
+
+ movq 112(%rax),%rbp
+ movq 104(%rax),%rbx
+ movq 96(%rax),%r12
+ movq 88(%rax),%r13
+ movq 80(%rax),%r14
+ movq 72(%rax),%r15
+ leaq 120(%rax),%rax
+ movq %rbx,144(%r8)
+ movq %rbp,160(%r8)
+ movq %r12,216(%r8)
+ movq %r13,224(%r8)
+ movq %r14,232(%r8)
+ movq %r15,240(%r8)
+
+.Lin_prologue:
+ movq %rax,152(%r8)
+
+ movq 40(%r9),%rdi
+ movq %r8,%rsi
+ movl $154,%ecx
+.long 0xa548f3fc
+
+ movq %r9,%rsi
+ xorq %rcx,%rcx
+ movq 8(%rsi),%rdx
+ movq 0(%rsi),%r8
+ movq 16(%rsi),%r9
+ movq 40(%rsi),%r10
+ leaq 56(%rsi),%r11
+ leaq 24(%rsi),%r12
+ movq %r10,32(%rsp)
+ movq %r11,40(%rsp)
+ movq %r12,48(%rsp)
+ movq %rcx,56(%rsp)
+ call *__imp_RtlVirtualUnwind(%rip)
+
+ movl $1,%eax
+ addq $64,%rsp
+ popfq
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ popq %rdi
+ popq %rsi
+ retq
+
+
+.section .pdata
+.p2align 2
+.rva .Lcbc_dec_prologue
+.rva .Lcbc_dec_epilogue
+.rva .Lcbc_dec_info
+
+.rva .Lctr_enc_prologue
+.rva .Lctr_enc_epilogue
+.rva .Lctr_enc_info
+
+.rva .Lxts_enc_prologue
+.rva .Lxts_enc_epilogue
+.rva .Lxts_enc_info
+
+.rva .Lxts_dec_prologue
+.rva .Lxts_dec_epilogue
+.rva .Lxts_dec_info
+
+.section .xdata
+.p2align 3
+.Lcbc_dec_info:
+.byte 9,0,0,0
+.rva se_handler
+.rva .Lcbc_dec_body,.Lcbc_dec_epilogue
+.Lctr_enc_info:
+.byte 9,0,0,0
+.rva se_handler
+.rva .Lctr_enc_body,.Lctr_enc_epilogue
+.Lxts_enc_info:
+.byte 9,0,0,0
+.rva se_handler
+.rva .Lxts_enc_body,.Lxts_enc_epilogue
+.Lxts_dec_info:
+.byte 9,0,0,0
+.rva se_handler
+.rva .Lxts_dec_body,.Lxts_dec_epilogue
diff --git a/ext/libressl/crypto/aes/vpaes-elf-x86_64.S b/ext/libressl/crypto/aes/vpaes-elf-x86_64.S
new file mode 100644
index 0000000..1e1a6e8
--- /dev/null
+++ b/ext/libressl/crypto/aes/vpaes-elf-x86_64.S
@@ -0,0 +1,832 @@
+#include "x86_arch.h"
+.text
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.type _vpaes_encrypt_core,@function
+.align 16
+_vpaes_encrypt_core:
+ movq %rdx,%r9
+ movq $16,%r11
+ movl 240(%rdx),%eax
+ movdqa %xmm9,%xmm1
+ movdqa .Lk_ipt(%rip),%xmm2
+ pandn %xmm0,%xmm1
+ movdqu (%r9),%xmm5
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+.byte 102,15,56,0,208
+ movdqa .Lk_ipt+16(%rip),%xmm0
+.byte 102,15,56,0,193
+ pxor %xmm5,%xmm2
+ pxor %xmm2,%xmm0
+ addq $16,%r9
+ leaq .Lk_mc_backward(%rip),%r10
+ jmp .Lenc_entry
+
+.align 16
+.Lenc_loop:
+
+ movdqa %xmm13,%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm5,%xmm4
+ movdqa %xmm12,%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ movdqa %xmm15,%xmm5
+.byte 102,15,56,0,234
+ movdqa -64(%r11,%r10,1),%xmm1
+ movdqa %xmm14,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm5,%xmm2
+ movdqa (%r11,%r10,1),%xmm4
+ movdqa %xmm0,%xmm3
+.byte 102,15,56,0,193
+ addq $16,%r9
+ pxor %xmm2,%xmm0
+.byte 102,15,56,0,220
+ addq $16,%r11
+ pxor %xmm0,%xmm3
+.byte 102,15,56,0,193
+ andq $48,%r11
+ pxor %xmm3,%xmm0
+ subq $1,%rax
+
+.Lenc_entry:
+
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa %xmm11,%xmm5
+.byte 102,15,56,0,232
+ pxor %xmm1,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm5,%xmm3
+ movdqa %xmm10,%xmm4
+.byte 102,15,56,0,224
+ pxor %xmm5,%xmm4
+ movdqa %xmm10,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm0,%xmm2
+ movdqa %xmm10,%xmm3
+ movdqu (%r9),%xmm5
+.byte 102,15,56,0,220
+ pxor %xmm1,%xmm3
+ jnz .Lenc_loop
+
+
+ movdqa -96(%r10),%xmm4
+ movdqa -80(%r10),%xmm0
+.byte 102,15,56,0,226
+ pxor %xmm5,%xmm4
+.byte 102,15,56,0,195
+ movdqa 64(%r11,%r10,1),%xmm1
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,193
+ retq
+.size _vpaes_encrypt_core,.-_vpaes_encrypt_core
+
+
+
+
+
+
+.type _vpaes_decrypt_core,@function
+.align 16
+_vpaes_decrypt_core:
+ movq %rdx,%r9
+ movl 240(%rdx),%eax
+ movdqa %xmm9,%xmm1
+ movdqa .Lk_dipt(%rip),%xmm2
+ pandn %xmm0,%xmm1
+ movq %rax,%r11
+ psrld $4,%xmm1
+ movdqu (%r9),%xmm5
+ shlq $4,%r11
+ pand %xmm9,%xmm0
+.byte 102,15,56,0,208
+ movdqa .Lk_dipt+16(%rip),%xmm0
+ xorq $48,%r11
+ leaq .Lk_dsbd(%rip),%r10
+.byte 102,15,56,0,193
+ andq $48,%r11
+ pxor %xmm5,%xmm2
+ movdqa .Lk_mc_forward+48(%rip),%xmm5
+ pxor %xmm2,%xmm0
+ addq $16,%r9
+ addq %r10,%r11
+ jmp .Ldec_entry
+
+.align 16
+.Ldec_loop:
+
+
+
+ movdqa -32(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa -16(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ addq $16,%r9
+
+.byte 102,15,56,0,197
+ movdqa 0(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 16(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ subq $1,%rax
+
+.byte 102,15,56,0,197
+ movdqa 32(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 48(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+
+.byte 102,15,56,0,197
+ movdqa 64(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 80(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+
+.byte 102,15,58,15,237,12
+
+.Ldec_entry:
+
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa %xmm11,%xmm2
+.byte 102,15,56,0,208
+ pxor %xmm1,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+ movdqa %xmm10,%xmm4
+.byte 102,15,56,0,224
+ pxor %xmm2,%xmm4
+ movdqa %xmm10,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm0,%xmm2
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,220
+ pxor %xmm1,%xmm3
+ movdqu (%r9),%xmm0
+ jnz .Ldec_loop
+
+
+ movdqa 96(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 112(%r10),%xmm0
+ movdqa -352(%r11),%xmm2
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,194
+ retq
+.size _vpaes_decrypt_core,.-_vpaes_decrypt_core
+
+
+
+
+
+
+.type _vpaes_schedule_core,@function
+.align 16
+_vpaes_schedule_core:
+
+
+
+
+
+ call _vpaes_preheat
+ movdqa .Lk_rcon(%rip),%xmm8
+ movdqu (%rdi),%xmm0
+
+
+ movdqa %xmm0,%xmm3
+ leaq .Lk_ipt(%rip),%r11
+ call _vpaes_schedule_transform
+ movdqa %xmm0,%xmm7
+
+ leaq .Lk_sr(%rip),%r10
+ testq %rcx,%rcx
+ jnz .Lschedule_am_decrypting
+
+
+ movdqu %xmm0,(%rdx)
+ jmp .Lschedule_go
+
+.Lschedule_am_decrypting:
+
+ movdqa (%r8,%r10,1),%xmm1
+.byte 102,15,56,0,217
+ movdqu %xmm3,(%rdx)
+ xorq $48,%r8
+
+.Lschedule_go:
+ cmpl $192,%esi
+ ja .Lschedule_256
+ je .Lschedule_192
+
+
+
+
+
+
+
+
+
+
+.Lschedule_128:
+ movl $10,%esi
+
+.Loop_schedule_128:
+ call _vpaes_schedule_round
+ decq %rsi
+ jz .Lschedule_mangle_last
+ call _vpaes_schedule_mangle
+ jmp .Loop_schedule_128
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.align 16
+.Lschedule_192:
+ movdqu 8(%rdi),%xmm0
+ call _vpaes_schedule_transform
+ movdqa %xmm0,%xmm6
+ pxor %xmm4,%xmm4
+ movhlps %xmm4,%xmm6
+ movl $4,%esi
+
+.Loop_schedule_192:
+ call _vpaes_schedule_round
+.byte 102,15,58,15,198,8
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_192_smear
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_round
+ decq %rsi
+ jz .Lschedule_mangle_last
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_192_smear
+ jmp .Loop_schedule_192
+
+
+
+
+
+
+
+
+
+
+
+.align 16
+.Lschedule_256:
+ movdqu 16(%rdi),%xmm0
+ call _vpaes_schedule_transform
+ movl $7,%esi
+
+.Loop_schedule_256:
+ call _vpaes_schedule_mangle
+ movdqa %xmm0,%xmm6
+
+
+ call _vpaes_schedule_round
+ decq %rsi
+ jz .Lschedule_mangle_last
+ call _vpaes_schedule_mangle
+
+
+ pshufd $255,%xmm0,%xmm0
+ movdqa %xmm7,%xmm5
+ movdqa %xmm6,%xmm7
+ call _vpaes_schedule_low_round
+ movdqa %xmm5,%xmm7
+
+ jmp .Loop_schedule_256
+
+
+
+
+
+
+
+
+
+
+
+
+.align 16
+.Lschedule_mangle_last:
+
+ leaq .Lk_deskew(%rip),%r11
+ testq %rcx,%rcx
+ jnz .Lschedule_mangle_last_dec
+
+
+ movdqa (%r8,%r10,1),%xmm1
+.byte 102,15,56,0,193
+ leaq .Lk_opt(%rip),%r11
+ addq $32,%rdx
+
+.Lschedule_mangle_last_dec:
+ addq $-16,%rdx
+ pxor .Lk_s63(%rip),%xmm0
+ call _vpaes_schedule_transform
+ movdqu %xmm0,(%rdx)
+
+
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
+ retq
+.size _vpaes_schedule_core,.-_vpaes_schedule_core
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.type _vpaes_schedule_192_smear,@function
+.align 16
+_vpaes_schedule_192_smear:
+ pshufd $128,%xmm6,%xmm0
+ pxor %xmm0,%xmm6
+ pshufd $254,%xmm7,%xmm0
+ pxor %xmm0,%xmm6
+ movdqa %xmm6,%xmm0
+ pxor %xmm1,%xmm1
+ movhlps %xmm1,%xmm6
+ retq
+.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.type _vpaes_schedule_round,@function
+.align 16
+_vpaes_schedule_round:
+
+ pxor %xmm1,%xmm1
+.byte 102,65,15,58,15,200,15
+.byte 102,69,15,58,15,192,15
+ pxor %xmm1,%xmm7
+
+
+ pshufd $255,%xmm0,%xmm0
+.byte 102,15,58,15,192,1
+
+
+
+
+_vpaes_schedule_low_round:
+
+ movdqa %xmm7,%xmm1
+ pslldq $4,%xmm7
+ pxor %xmm1,%xmm7
+ movdqa %xmm7,%xmm1
+ pslldq $8,%xmm7
+ pxor %xmm1,%xmm7
+ pxor .Lk_s63(%rip),%xmm7
+
+
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa %xmm11,%xmm2
+.byte 102,15,56,0,208
+ pxor %xmm1,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+ movdqa %xmm10,%xmm4
+.byte 102,15,56,0,224
+ pxor %xmm2,%xmm4
+ movdqa %xmm10,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm0,%xmm2
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,220
+ pxor %xmm1,%xmm3
+ movdqa %xmm13,%xmm4
+.byte 102,15,56,0,226
+ movdqa %xmm12,%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+
+
+ pxor %xmm7,%xmm0
+ movdqa %xmm0,%xmm7
+ retq
+.size _vpaes_schedule_round,.-_vpaes_schedule_round
+
+
+
+
+
+
+
+
+
+
+.type _vpaes_schedule_transform,@function
+.align 16
+_vpaes_schedule_transform:
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa (%r11),%xmm2
+.byte 102,15,56,0,208
+ movdqa 16(%r11),%xmm0
+.byte 102,15,56,0,193
+ pxor %xmm2,%xmm0
+ retq
+.size _vpaes_schedule_transform,.-_vpaes_schedule_transform
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.type _vpaes_schedule_mangle,@function
+.align 16
+_vpaes_schedule_mangle:
+ movdqa %xmm0,%xmm4
+ movdqa .Lk_mc_forward(%rip),%xmm5
+ testq %rcx,%rcx
+ jnz .Lschedule_mangle_dec
+
+
+ addq $16,%rdx
+ pxor .Lk_s63(%rip),%xmm4
+.byte 102,15,56,0,229
+ movdqa %xmm4,%xmm3
+.byte 102,15,56,0,229
+ pxor %xmm4,%xmm3
+.byte 102,15,56,0,229
+ pxor %xmm4,%xmm3
+
+ jmp .Lschedule_mangle_both
+.align 16
+.Lschedule_mangle_dec:
+
+ leaq .Lk_dksd(%rip),%r11
+ movdqa %xmm9,%xmm1
+ pandn %xmm4,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm4
+
+ movdqa 0(%r11),%xmm2
+.byte 102,15,56,0,212
+ movdqa 16(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+.byte 102,15,56,0,221
+
+ movdqa 32(%r11),%xmm2
+.byte 102,15,56,0,212
+ pxor %xmm3,%xmm2
+ movdqa 48(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+.byte 102,15,56,0,221
+
+ movdqa 64(%r11),%xmm2
+.byte 102,15,56,0,212
+ pxor %xmm3,%xmm2
+ movdqa 80(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+.byte 102,15,56,0,221
+
+ movdqa 96(%r11),%xmm2
+.byte 102,15,56,0,212
+ pxor %xmm3,%xmm2
+ movdqa 112(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+
+ addq $-16,%rdx
+
+.Lschedule_mangle_both:
+ movdqa (%r8,%r10,1),%xmm1
+.byte 102,15,56,0,217
+ addq $-16,%r8
+ andq $48,%r8
+ movdqu %xmm3,(%rdx)
+ retq
+.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle
+
+
+
+
+.globl vpaes_set_encrypt_key
+.type vpaes_set_encrypt_key,@function
+.align 16
+vpaes_set_encrypt_key:
+ movl %esi,%eax
+ shrl $5,%eax
+ addl $5,%eax
+ movl %eax,240(%rdx)
+
+ movl $0,%ecx
+ movl $48,%r8d
+ call _vpaes_schedule_core
+ xorl %eax,%eax
+ retq
+.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key
+
+.globl vpaes_set_decrypt_key
+.type vpaes_set_decrypt_key,@function
+.align 16
+vpaes_set_decrypt_key:
+ movl %esi,%eax
+ shrl $5,%eax
+ addl $5,%eax
+ movl %eax,240(%rdx)
+ shll $4,%eax
+ leaq 16(%rdx,%rax,1),%rdx
+
+ movl $1,%ecx
+ movl %esi,%r8d
+ shrl $1,%r8d
+ andl $32,%r8d
+ xorl $32,%r8d
+ call _vpaes_schedule_core
+ xorl %eax,%eax
+ retq
+.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key
+
+.globl vpaes_encrypt
+.type vpaes_encrypt,@function
+.align 16
+vpaes_encrypt:
+ movdqu (%rdi),%xmm0
+ call _vpaes_preheat
+ call _vpaes_encrypt_core
+ movdqu %xmm0,(%rsi)
+ retq
+.size vpaes_encrypt,.-vpaes_encrypt
+
+.globl vpaes_decrypt
+.type vpaes_decrypt,@function
+.align 16
+vpaes_decrypt:
+ movdqu (%rdi),%xmm0
+ call _vpaes_preheat
+ call _vpaes_decrypt_core
+ movdqu %xmm0,(%rsi)
+ retq
+.size vpaes_decrypt,.-vpaes_decrypt
+.globl vpaes_cbc_encrypt
+.type vpaes_cbc_encrypt,@function
+.align 16
+vpaes_cbc_encrypt:
+ xchgq %rcx,%rdx
+ subq $16,%rcx
+ jc .Lcbc_abort
+ movdqu (%r8),%xmm6
+ subq %rdi,%rsi
+ call _vpaes_preheat
+ cmpl $0,%r9d
+ je .Lcbc_dec_loop
+ jmp .Lcbc_enc_loop
+.align 16
+.Lcbc_enc_loop:
+ movdqu (%rdi),%xmm0
+ pxor %xmm6,%xmm0
+ call _vpaes_encrypt_core
+ movdqa %xmm0,%xmm6
+ movdqu %xmm0,(%rsi,%rdi,1)
+ leaq 16(%rdi),%rdi
+ subq $16,%rcx
+ jnc .Lcbc_enc_loop
+ jmp .Lcbc_done
+.align 16
+.Lcbc_dec_loop:
+ movdqu (%rdi),%xmm0
+ movdqa %xmm0,%xmm7
+ call _vpaes_decrypt_core
+ pxor %xmm6,%xmm0
+ movdqa %xmm7,%xmm6
+ movdqu %xmm0,(%rsi,%rdi,1)
+ leaq 16(%rdi),%rdi
+ subq $16,%rcx
+ jnc .Lcbc_dec_loop
+.Lcbc_done:
+ movdqu %xmm6,(%r8)
+.Lcbc_abort:
+ retq
+.size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt
+
+
+
+
+
+
+.type _vpaes_preheat,@function
+.align 16
+_vpaes_preheat:
+ leaq .Lk_s0F(%rip),%r10
+ movdqa -32(%r10),%xmm10
+ movdqa -16(%r10),%xmm11
+ movdqa 0(%r10),%xmm9
+ movdqa 48(%r10),%xmm13
+ movdqa 64(%r10),%xmm12
+ movdqa 80(%r10),%xmm15
+ movdqa 96(%r10),%xmm14
+ retq
+.size _vpaes_preheat,.-_vpaes_preheat
+
+
+
+
+
+.type _vpaes_consts,@object
+.align 64
+_vpaes_consts:
+.Lk_inv:
+.quad 0x0E05060F0D080180, 0x040703090A0B0C02
+.quad 0x01040A060F0B0780, 0x030D0E0C02050809
+
+.Lk_s0F:
+.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
+
+.Lk_ipt:
+.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808
+.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
+
+.Lk_sb1:
+.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
+.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
+.Lk_sb2:
+.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD
+.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A
+.Lk_sbo:
+.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878
+.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
+
+.Lk_mc_forward:
+.quad 0x0407060500030201, 0x0C0F0E0D080B0A09
+.quad 0x080B0A0904070605, 0x000302010C0F0E0D
+.quad 0x0C0F0E0D080B0A09, 0x0407060500030201
+.quad 0x000302010C0F0E0D, 0x080B0A0904070605
+
+.Lk_mc_backward:
+.quad 0x0605040702010003, 0x0E0D0C0F0A09080B
+.quad 0x020100030E0D0C0F, 0x0A09080B06050407
+.quad 0x0E0D0C0F0A09080B, 0x0605040702010003
+.quad 0x0A09080B06050407, 0x020100030E0D0C0F
+
+.Lk_sr:
+.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908
+.quad 0x030E09040F0A0500, 0x0B06010C07020D08
+.quad 0x0F060D040B020900, 0x070E050C030A0108
+.quad 0x0B0E0104070A0D00, 0x0306090C0F020508
+
+.Lk_rcon:
+.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
+
+.Lk_s63:
+.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B
+
+.Lk_opt:
+.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808
+.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
+
+.Lk_deskew:
+.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
+.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
+
+
+
+
+
+.Lk_dksd:
+.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
+.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E
+.Lk_dksb:
+.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99
+.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
+.Lk_dkse:
+.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086
+.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487
+.Lk_dks9:
+.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC
+.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE
+
+
+
+
+
+.Lk_dipt:
+.quad 0x0F505B040B545F00, 0x154A411E114E451A
+.quad 0x86E383E660056500, 0x12771772F491F194
+
+.Lk_dsb9:
+.quad 0x851C03539A86D600, 0xCAD51F504F994CC9
+.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565
+.Lk_dsbd:
+.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
+.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
+.Lk_dsbb:
+.quad 0xD022649296B44200, 0x602646F6B0F2D404
+.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
+.Lk_dsbe:
+.quad 0x46F2929626D4D000, 0x2242600464B4F6B0
+.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32
+.Lk_dsbo:
+.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
+.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
+.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
+.align 64
+.size _vpaes_consts,.-_vpaes_consts
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/ext/libressl/crypto/aes/vpaes-macosx-x86_64.S b/ext/libressl/crypto/aes/vpaes-macosx-x86_64.S
new file mode 100644
index 0000000..0a892a9
--- /dev/null
+++ b/ext/libressl/crypto/aes/vpaes-macosx-x86_64.S
@@ -0,0 +1,829 @@
+#include "x86_arch.h"
+.text
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.p2align 4
+_vpaes_encrypt_core:
+ movq %rdx,%r9
+ movq $16,%r11
+ movl 240(%rdx),%eax
+ movdqa %xmm9,%xmm1
+ movdqa L$k_ipt(%rip),%xmm2
+ pandn %xmm0,%xmm1
+ movdqu (%r9),%xmm5
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+.byte 102,15,56,0,208
+ movdqa L$k_ipt+16(%rip),%xmm0
+.byte 102,15,56,0,193
+ pxor %xmm5,%xmm2
+ pxor %xmm2,%xmm0
+ addq $16,%r9
+ leaq L$k_mc_backward(%rip),%r10
+ jmp L$enc_entry
+
+.p2align 4
+L$enc_loop:
+
+ movdqa %xmm13,%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm5,%xmm4
+ movdqa %xmm12,%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ movdqa %xmm15,%xmm5
+.byte 102,15,56,0,234
+ movdqa -64(%r11,%r10,1),%xmm1
+ movdqa %xmm14,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm5,%xmm2
+ movdqa (%r11,%r10,1),%xmm4
+ movdqa %xmm0,%xmm3
+.byte 102,15,56,0,193
+ addq $16,%r9
+ pxor %xmm2,%xmm0
+.byte 102,15,56,0,220
+ addq $16,%r11
+ pxor %xmm0,%xmm3
+.byte 102,15,56,0,193
+ andq $48,%r11
+ pxor %xmm3,%xmm0
+ subq $1,%rax
+
+L$enc_entry:
+
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa %xmm11,%xmm5
+.byte 102,15,56,0,232
+ pxor %xmm1,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm5,%xmm3
+ movdqa %xmm10,%xmm4
+.byte 102,15,56,0,224
+ pxor %xmm5,%xmm4
+ movdqa %xmm10,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm0,%xmm2
+ movdqa %xmm10,%xmm3
+ movdqu (%r9),%xmm5
+.byte 102,15,56,0,220
+ pxor %xmm1,%xmm3
+ jnz L$enc_loop
+
+
+ movdqa -96(%r10),%xmm4
+ movdqa -80(%r10),%xmm0
+.byte 102,15,56,0,226
+ pxor %xmm5,%xmm4
+.byte 102,15,56,0,195
+ movdqa 64(%r11,%r10,1),%xmm1
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,193
+ retq
+
+
+
+
+
+
+
+
+.p2align 4
+_vpaes_decrypt_core:
+ movq %rdx,%r9
+ movl 240(%rdx),%eax
+ movdqa %xmm9,%xmm1
+ movdqa L$k_dipt(%rip),%xmm2
+ pandn %xmm0,%xmm1
+ movq %rax,%r11
+ psrld $4,%xmm1
+ movdqu (%r9),%xmm5
+ shlq $4,%r11
+ pand %xmm9,%xmm0
+.byte 102,15,56,0,208
+ movdqa L$k_dipt+16(%rip),%xmm0
+ xorq $48,%r11
+ leaq L$k_dsbd(%rip),%r10
+.byte 102,15,56,0,193
+ andq $48,%r11
+ pxor %xmm5,%xmm2
+ movdqa L$k_mc_forward+48(%rip),%xmm5
+ pxor %xmm2,%xmm0
+ addq $16,%r9
+ addq %r10,%r11
+ jmp L$dec_entry
+
+.p2align 4
+L$dec_loop:
+
+
+
+ movdqa -32(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa -16(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ addq $16,%r9
+
+.byte 102,15,56,0,197
+ movdqa 0(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 16(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ subq $1,%rax
+
+.byte 102,15,56,0,197
+ movdqa 32(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 48(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+
+.byte 102,15,56,0,197
+ movdqa 64(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 80(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+
+.byte 102,15,58,15,237,12
+
+L$dec_entry:
+
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa %xmm11,%xmm2
+.byte 102,15,56,0,208
+ pxor %xmm1,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+ movdqa %xmm10,%xmm4
+.byte 102,15,56,0,224
+ pxor %xmm2,%xmm4
+ movdqa %xmm10,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm0,%xmm2
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,220
+ pxor %xmm1,%xmm3
+ movdqu (%r9),%xmm0
+ jnz L$dec_loop
+
+
+ movdqa 96(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 112(%r10),%xmm0
+ movdqa -352(%r11),%xmm2
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,194
+ retq
+
+
+
+
+
+
+
+
+.p2align 4
+_vpaes_schedule_core:
+
+
+
+
+
+ call _vpaes_preheat
+ movdqa L$k_rcon(%rip),%xmm8
+ movdqu (%rdi),%xmm0
+
+
+ movdqa %xmm0,%xmm3
+ leaq L$k_ipt(%rip),%r11
+ call _vpaes_schedule_transform
+ movdqa %xmm0,%xmm7
+
+ leaq L$k_sr(%rip),%r10
+ testq %rcx,%rcx
+ jnz L$schedule_am_decrypting
+
+
+ movdqu %xmm0,(%rdx)
+ jmp L$schedule_go
+
+L$schedule_am_decrypting:
+
+ movdqa (%r8,%r10,1),%xmm1
+.byte 102,15,56,0,217
+ movdqu %xmm3,(%rdx)
+ xorq $48,%r8
+
+L$schedule_go:
+ cmpl $192,%esi
+ ja L$schedule_256
+ je L$schedule_192
+
+
+
+
+
+
+
+
+
+
+L$schedule_128:
+ movl $10,%esi
+
+L$oop_schedule_128:
+ call _vpaes_schedule_round
+ decq %rsi
+ jz L$schedule_mangle_last
+ call _vpaes_schedule_mangle
+ jmp L$oop_schedule_128
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.p2align 4
+L$schedule_192:
+ movdqu 8(%rdi),%xmm0
+ call _vpaes_schedule_transform
+ movdqa %xmm0,%xmm6
+ pxor %xmm4,%xmm4
+ movhlps %xmm4,%xmm6
+ movl $4,%esi
+
+L$oop_schedule_192:
+ call _vpaes_schedule_round
+.byte 102,15,58,15,198,8
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_192_smear
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_round
+ decq %rsi
+ jz L$schedule_mangle_last
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_192_smear
+ jmp L$oop_schedule_192
+
+
+
+
+
+
+
+
+
+
+
+.p2align 4
+L$schedule_256:
+ movdqu 16(%rdi),%xmm0
+ call _vpaes_schedule_transform
+ movl $7,%esi
+
+L$oop_schedule_256:
+ call _vpaes_schedule_mangle
+ movdqa %xmm0,%xmm6
+
+
+ call _vpaes_schedule_round
+ decq %rsi
+ jz L$schedule_mangle_last
+ call _vpaes_schedule_mangle
+
+
+ pshufd $255,%xmm0,%xmm0
+ movdqa %xmm7,%xmm5
+ movdqa %xmm6,%xmm7
+ call _vpaes_schedule_low_round
+ movdqa %xmm5,%xmm7
+
+ jmp L$oop_schedule_256
+
+
+
+
+
+
+
+
+
+
+
+
+.p2align 4
+L$schedule_mangle_last:
+
+ leaq L$k_deskew(%rip),%r11
+ testq %rcx,%rcx
+ jnz L$schedule_mangle_last_dec
+
+
+ movdqa (%r8,%r10,1),%xmm1
+.byte 102,15,56,0,193
+ leaq L$k_opt(%rip),%r11
+ addq $32,%rdx
+
+L$schedule_mangle_last_dec:
+ addq $-16,%rdx
+ pxor L$k_s63(%rip),%xmm0
+ call _vpaes_schedule_transform
+ movdqu %xmm0,(%rdx)
+
+
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
+ retq
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.p2align 4
+_vpaes_schedule_192_smear:
+ pshufd $128,%xmm6,%xmm0
+ pxor %xmm0,%xmm6
+ pshufd $254,%xmm7,%xmm0
+ pxor %xmm0,%xmm6
+ movdqa %xmm6,%xmm0
+ pxor %xmm1,%xmm1
+ movhlps %xmm1,%xmm6
+ retq
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.p2align 4
+_vpaes_schedule_round:
+
+ pxor %xmm1,%xmm1
+.byte 102,65,15,58,15,200,15
+.byte 102,69,15,58,15,192,15
+ pxor %xmm1,%xmm7
+
+
+ pshufd $255,%xmm0,%xmm0
+.byte 102,15,58,15,192,1
+
+
+
+
+_vpaes_schedule_low_round:
+
+ movdqa %xmm7,%xmm1
+ pslldq $4,%xmm7
+ pxor %xmm1,%xmm7
+ movdqa %xmm7,%xmm1
+ pslldq $8,%xmm7
+ pxor %xmm1,%xmm7
+ pxor L$k_s63(%rip),%xmm7
+
+
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa %xmm11,%xmm2
+.byte 102,15,56,0,208
+ pxor %xmm1,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+ movdqa %xmm10,%xmm4
+.byte 102,15,56,0,224
+ pxor %xmm2,%xmm4
+ movdqa %xmm10,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm0,%xmm2
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,220
+ pxor %xmm1,%xmm3
+ movdqa %xmm13,%xmm4
+.byte 102,15,56,0,226
+ movdqa %xmm12,%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+
+
+ pxor %xmm7,%xmm0
+ movdqa %xmm0,%xmm7
+ retq
+
+
+
+
+
+
+
+
+
+
+
+
+.p2align 4
+_vpaes_schedule_transform:
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa (%r11),%xmm2
+.byte 102,15,56,0,208
+ movdqa 16(%r11),%xmm0
+.byte 102,15,56,0,193
+ pxor %xmm2,%xmm0
+ retq
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.p2align 4
+_vpaes_schedule_mangle:
+ movdqa %xmm0,%xmm4
+ movdqa L$k_mc_forward(%rip),%xmm5
+ testq %rcx,%rcx
+ jnz L$schedule_mangle_dec
+
+
+ addq $16,%rdx
+ pxor L$k_s63(%rip),%xmm4
+.byte 102,15,56,0,229
+ movdqa %xmm4,%xmm3
+.byte 102,15,56,0,229
+ pxor %xmm4,%xmm3
+.byte 102,15,56,0,229
+ pxor %xmm4,%xmm3
+
+ jmp L$schedule_mangle_both
+.p2align 4
+L$schedule_mangle_dec:
+
+ leaq L$k_dksd(%rip),%r11
+ movdqa %xmm9,%xmm1
+ pandn %xmm4,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm4
+
+ movdqa 0(%r11),%xmm2
+.byte 102,15,56,0,212
+ movdqa 16(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+.byte 102,15,56,0,221
+
+ movdqa 32(%r11),%xmm2
+.byte 102,15,56,0,212
+ pxor %xmm3,%xmm2
+ movdqa 48(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+.byte 102,15,56,0,221
+
+ movdqa 64(%r11),%xmm2
+.byte 102,15,56,0,212
+ pxor %xmm3,%xmm2
+ movdqa 80(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+.byte 102,15,56,0,221
+
+ movdqa 96(%r11),%xmm2
+.byte 102,15,56,0,212
+ pxor %xmm3,%xmm2
+ movdqa 112(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+
+ addq $-16,%rdx
+
+L$schedule_mangle_both:
+ movdqa (%r8,%r10,1),%xmm1
+.byte 102,15,56,0,217
+ addq $-16,%r8
+ andq $48,%r8
+ movdqu %xmm3,(%rdx)
+ retq
+
+
+
+
+
+.globl _vpaes_set_encrypt_key
+
+.p2align 4
+_vpaes_set_encrypt_key:
+ movl %esi,%eax
+ shrl $5,%eax
+ addl $5,%eax
+ movl %eax,240(%rdx)
+
+ movl $0,%ecx
+ movl $48,%r8d
+ call _vpaes_schedule_core
+ xorl %eax,%eax
+ retq
+
+
+.globl _vpaes_set_decrypt_key
+
+.p2align 4
+_vpaes_set_decrypt_key:
+ movl %esi,%eax
+ shrl $5,%eax
+ addl $5,%eax
+ movl %eax,240(%rdx)
+ shll $4,%eax
+ leaq 16(%rdx,%rax,1),%rdx
+
+ movl $1,%ecx
+ movl %esi,%r8d
+ shrl $1,%r8d
+ andl $32,%r8d
+ xorl $32,%r8d
+ call _vpaes_schedule_core
+ xorl %eax,%eax
+ retq
+
+
+.globl _vpaes_encrypt
+
+.p2align 4
+_vpaes_encrypt:
+ movdqu (%rdi),%xmm0
+ call _vpaes_preheat
+ call _vpaes_encrypt_core
+ movdqu %xmm0,(%rsi)
+ retq
+
+
+.globl _vpaes_decrypt
+
+.p2align 4
+_vpaes_decrypt:
+ movdqu (%rdi),%xmm0
+ call _vpaes_preheat
+ call _vpaes_decrypt_core
+ movdqu %xmm0,(%rsi)
+ retq
+
+.globl _vpaes_cbc_encrypt
+
+.p2align 4
+_vpaes_cbc_encrypt:
+ xchgq %rcx,%rdx
+ subq $16,%rcx
+ jc L$cbc_abort
+ movdqu (%r8),%xmm6
+ subq %rdi,%rsi
+ call _vpaes_preheat
+ cmpl $0,%r9d
+ je L$cbc_dec_loop
+ jmp L$cbc_enc_loop
+.p2align 4
+L$cbc_enc_loop:
+ movdqu (%rdi),%xmm0
+ pxor %xmm6,%xmm0
+ call _vpaes_encrypt_core
+ movdqa %xmm0,%xmm6
+ movdqu %xmm0,(%rsi,%rdi,1)
+ leaq 16(%rdi),%rdi
+ subq $16,%rcx
+ jnc L$cbc_enc_loop
+ jmp L$cbc_done
+.p2align 4
+L$cbc_dec_loop:
+ movdqu (%rdi),%xmm0
+ movdqa %xmm0,%xmm7
+ call _vpaes_decrypt_core
+ pxor %xmm6,%xmm0
+ movdqa %xmm7,%xmm6
+ movdqu %xmm0,(%rsi,%rdi,1)
+ leaq 16(%rdi),%rdi
+ subq $16,%rcx
+ jnc L$cbc_dec_loop
+L$cbc_done:
+ movdqu %xmm6,(%r8)
+L$cbc_abort:
+ retq
+
+
+
+
+
+
+
+
+.p2align 4
+_vpaes_preheat:
+ leaq L$k_s0F(%rip),%r10
+ movdqa -32(%r10),%xmm10
+ movdqa -16(%r10),%xmm11
+ movdqa 0(%r10),%xmm9
+ movdqa 48(%r10),%xmm13
+ movdqa 64(%r10),%xmm12
+ movdqa 80(%r10),%xmm15
+ movdqa 96(%r10),%xmm14
+ retq
+
+
+
+
+
+
+
+.p2align 6
+_vpaes_consts:
+L$k_inv:
+.quad 0x0E05060F0D080180, 0x040703090A0B0C02
+.quad 0x01040A060F0B0780, 0x030D0E0C02050809
+
+L$k_s0F:
+.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
+
+L$k_ipt:
+.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808
+.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
+
+L$k_sb1:
+.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
+.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
+L$k_sb2:
+.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD
+.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A
+L$k_sbo:
+.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878
+.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
+
+L$k_mc_forward:
+.quad 0x0407060500030201, 0x0C0F0E0D080B0A09
+.quad 0x080B0A0904070605, 0x000302010C0F0E0D
+.quad 0x0C0F0E0D080B0A09, 0x0407060500030201
+.quad 0x000302010C0F0E0D, 0x080B0A0904070605
+
+L$k_mc_backward:
+.quad 0x0605040702010003, 0x0E0D0C0F0A09080B
+.quad 0x020100030E0D0C0F, 0x0A09080B06050407
+.quad 0x0E0D0C0F0A09080B, 0x0605040702010003
+.quad 0x0A09080B06050407, 0x020100030E0D0C0F
+
+L$k_sr:
+.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908
+.quad 0x030E09040F0A0500, 0x0B06010C07020D08
+.quad 0x0F060D040B020900, 0x070E050C030A0108
+.quad 0x0B0E0104070A0D00, 0x0306090C0F020508
+
+L$k_rcon:
+.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
+
+L$k_s63:
+.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B
+
+L$k_opt:
+.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808
+.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
+
+L$k_deskew:
+.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
+.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
+
+
+
+
+
+L$k_dksd:
+.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
+.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E
+L$k_dksb:
+.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99
+.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
+L$k_dkse:
+.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086
+.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487
+L$k_dks9:
+.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC
+.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE
+
+
+
+
+
+L$k_dipt:
+.quad 0x0F505B040B545F00, 0x154A411E114E451A
+.quad 0x86E383E660056500, 0x12771772F491F194
+
+L$k_dsb9:
+.quad 0x851C03539A86D600, 0xCAD51F504F994CC9
+.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565
+L$k_dsbd:
+.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
+.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
+L$k_dsbb:
+.quad 0xD022649296B44200, 0x602646F6B0F2D404
+.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
+L$k_dsbe:
+.quad 0x46F2929626D4D000, 0x2242600464B4F6B0
+.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32
+L$k_dsbo:
+.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
+.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
+.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
+.p2align 6
+
diff --git a/ext/libressl/crypto/aes/vpaes-masm-x86_64.S b/ext/libressl/crypto/aes/vpaes-masm-x86_64.S
new file mode 100644
index 0000000..e10d98d
--- /dev/null
+++ b/ext/libressl/crypto/aes/vpaes-masm-x86_64.S
@@ -0,0 +1,1213 @@
+; 1 "crypto/aes/vpaes-masm-x86_64.S.tmp"
+; 1 "<built-in>" 1
+; 1 "<built-in>" 3
+; 340 "<built-in>" 3
+; 1 "<command line>" 1
+; 1 "<built-in>" 2
+; 1 "crypto/aes/vpaes-masm-x86_64.S.tmp" 2
+OPTION DOTNAME
+
+; 1 "./crypto/x86_arch.h" 1
+
+
+; 16 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+; 40 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+; 3 "crypto/aes/vpaes-masm-x86_64.S.tmp" 2
+.text$ SEGMENT ALIGN(64) 'CODE'
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN 16
+_vpaes_encrypt_core PROC PRIVATE
+ mov r9,rdx
+ mov r11,16
+ mov eax,DWORD PTR[240+rdx]
+ movdqa xmm1,xmm9
+ movdqa xmm2,XMMWORD PTR[$L$k_ipt]
+ pandn xmm1,xmm0
+ movdqu xmm5,XMMWORD PTR[r9]
+ psrld xmm1,4
+ pand xmm0,xmm9
+DB 102,15,56,0,208
+ movdqa xmm0,XMMWORD PTR[(($L$k_ipt+16))]
+DB 102,15,56,0,193
+ pxor xmm2,xmm5
+ pxor xmm0,xmm2
+ add r9,16
+ lea r10,QWORD PTR[$L$k_mc_backward]
+ jmp $L$enc_entry
+
+ALIGN 16
+$L$enc_loop::
+
+ movdqa xmm4,xmm13
+DB 102,15,56,0,226
+ pxor xmm4,xmm5
+ movdqa xmm0,xmm12
+DB 102,15,56,0,195
+ pxor xmm0,xmm4
+ movdqa xmm5,xmm15
+DB 102,15,56,0,234
+ movdqa xmm1,XMMWORD PTR[((-64))+r10*1+r11]
+ movdqa xmm2,xmm14
+DB 102,15,56,0,211
+ pxor xmm2,xmm5
+ movdqa xmm4,XMMWORD PTR[r10*1+r11]
+ movdqa xmm3,xmm0
+DB 102,15,56,0,193
+ add r9,16
+ pxor xmm0,xmm2
+DB 102,15,56,0,220
+ add r11,16
+ pxor xmm3,xmm0
+DB 102,15,56,0,193
+ and r11,030h
+ pxor xmm0,xmm3
+ sub rax,1
+
+$L$enc_entry::
+
+ movdqa xmm1,xmm9
+ pandn xmm1,xmm0
+ psrld xmm1,4
+ pand xmm0,xmm9
+ movdqa xmm5,xmm11
+DB 102,15,56,0,232
+ pxor xmm0,xmm1
+ movdqa xmm3,xmm10
+DB 102,15,56,0,217
+ pxor xmm3,xmm5
+ movdqa xmm4,xmm10
+DB 102,15,56,0,224
+ pxor xmm4,xmm5
+ movdqa xmm2,xmm10
+DB 102,15,56,0,211
+ pxor xmm2,xmm0
+ movdqa xmm3,xmm10
+ movdqu xmm5,XMMWORD PTR[r9]
+DB 102,15,56,0,220
+ pxor xmm3,xmm1
+ jnz $L$enc_loop
+
+
+ movdqa xmm4,XMMWORD PTR[((-96))+r10]
+ movdqa xmm0,XMMWORD PTR[((-80))+r10]
+DB 102,15,56,0,226
+ pxor xmm4,xmm5
+DB 102,15,56,0,195
+ movdqa xmm1,XMMWORD PTR[64+r10*1+r11]
+ pxor xmm0,xmm4
+DB 102,15,56,0,193
+ DB 0F3h,0C3h ;repret
+_vpaes_encrypt_core ENDP
+
+
+
+
+
+
+
+ALIGN 16
+_vpaes_decrypt_core PROC PRIVATE
+ mov r9,rdx
+ mov eax,DWORD PTR[240+rdx]
+ movdqa xmm1,xmm9
+ movdqa xmm2,XMMWORD PTR[$L$k_dipt]
+ pandn xmm1,xmm0
+ mov r11,rax
+ psrld xmm1,4
+ movdqu xmm5,XMMWORD PTR[r9]
+ shl r11,4
+ pand xmm0,xmm9
+DB 102,15,56,0,208
+ movdqa xmm0,XMMWORD PTR[(($L$k_dipt+16))]
+ xor r11,030h
+ lea r10,QWORD PTR[$L$k_dsbd]
+DB 102,15,56,0,193
+ and r11,030h
+ pxor xmm2,xmm5
+ movdqa xmm5,XMMWORD PTR[(($L$k_mc_forward+48))]
+ pxor xmm0,xmm2
+ add r9,16
+ add r11,r10
+ jmp $L$dec_entry
+
+ALIGN 16
+$L$dec_loop::
+
+
+
+ movdqa xmm4,XMMWORD PTR[((-32))+r10]
+DB 102,15,56,0,226
+ pxor xmm4,xmm0
+ movdqa xmm0,XMMWORD PTR[((-16))+r10]
+DB 102,15,56,0,195
+ pxor xmm0,xmm4
+ add r9,16
+
+DB 102,15,56,0,197
+ movdqa xmm4,XMMWORD PTR[r10]
+DB 102,15,56,0,226
+ pxor xmm4,xmm0
+ movdqa xmm0,XMMWORD PTR[16+r10]
+DB 102,15,56,0,195
+ pxor xmm0,xmm4
+ sub rax,1
+
+DB 102,15,56,0,197
+ movdqa xmm4,XMMWORD PTR[32+r10]
+DB 102,15,56,0,226
+ pxor xmm4,xmm0
+ movdqa xmm0,XMMWORD PTR[48+r10]
+DB 102,15,56,0,195
+ pxor xmm0,xmm4
+
+DB 102,15,56,0,197
+ movdqa xmm4,XMMWORD PTR[64+r10]
+DB 102,15,56,0,226
+ pxor xmm4,xmm0
+ movdqa xmm0,XMMWORD PTR[80+r10]
+DB 102,15,56,0,195
+ pxor xmm0,xmm4
+
+DB 102,15,58,15,237,12
+
+$L$dec_entry::
+
+ movdqa xmm1,xmm9
+ pandn xmm1,xmm0
+ psrld xmm1,4
+ pand xmm0,xmm9
+ movdqa xmm2,xmm11
+DB 102,15,56,0,208
+ pxor xmm0,xmm1
+ movdqa xmm3,xmm10
+DB 102,15,56,0,217
+ pxor xmm3,xmm2
+ movdqa xmm4,xmm10
+DB 102,15,56,0,224
+ pxor xmm4,xmm2
+ movdqa xmm2,xmm10
+DB 102,15,56,0,211
+ pxor xmm2,xmm0
+ movdqa xmm3,xmm10
+DB 102,15,56,0,220
+ pxor xmm3,xmm1
+ movdqu xmm0,XMMWORD PTR[r9]
+ jnz $L$dec_loop
+
+
+ movdqa xmm4,XMMWORD PTR[96+r10]
+DB 102,15,56,0,226
+ pxor xmm4,xmm0
+ movdqa xmm0,XMMWORD PTR[112+r10]
+ movdqa xmm2,XMMWORD PTR[((-352))+r11]
+DB 102,15,56,0,195
+ pxor xmm0,xmm4
+DB 102,15,56,0,194
+ DB 0F3h,0C3h ;repret
+_vpaes_decrypt_core ENDP
+
+
+
+
+
+
+
+ALIGN 16
+_vpaes_schedule_core PROC PRIVATE
+
+
+
+
+
+ call _vpaes_preheat
+ movdqa xmm8,XMMWORD PTR[$L$k_rcon]
+ movdqu xmm0,XMMWORD PTR[rdi]
+
+
+ movdqa xmm3,xmm0
+ lea r11,QWORD PTR[$L$k_ipt]
+ call _vpaes_schedule_transform
+ movdqa xmm7,xmm0
+
+ lea r10,QWORD PTR[$L$k_sr]
+ test rcx,rcx
+ jnz $L$schedule_am_decrypting
+
+
+ movdqu XMMWORD PTR[rdx],xmm0
+ jmp $L$schedule_go
+
+$L$schedule_am_decrypting::
+
+ movdqa xmm1,XMMWORD PTR[r10*1+r8]
+DB 102,15,56,0,217
+ movdqu XMMWORD PTR[rdx],xmm3
+ xor r8,030h
+
+$L$schedule_go::
+ cmp esi,192
+ ja $L$schedule_256
+ je $L$schedule_192
+
+
+
+
+
+
+
+
+
+
+$L$schedule_128::
+ mov esi,10
+
+$L$oop_schedule_128::
+ call _vpaes_schedule_round
+ dec rsi
+ jz $L$schedule_mangle_last
+ call _vpaes_schedule_mangle
+ jmp $L$oop_schedule_128
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN 16
+$L$schedule_192::
+ movdqu xmm0,XMMWORD PTR[8+rdi]
+ call _vpaes_schedule_transform
+ movdqa xmm6,xmm0
+ pxor xmm4,xmm4
+ movhlps xmm6,xmm4
+ mov esi,4
+
+$L$oop_schedule_192::
+ call _vpaes_schedule_round
+DB 102,15,58,15,198,8
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_192_smear
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_round
+ dec rsi
+ jz $L$schedule_mangle_last
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_192_smear
+ jmp $L$oop_schedule_192
+
+
+
+
+
+
+
+
+
+
+
+ALIGN 16
+$L$schedule_256::
+ movdqu xmm0,XMMWORD PTR[16+rdi]
+ call _vpaes_schedule_transform
+ mov esi,7
+
+$L$oop_schedule_256::
+ call _vpaes_schedule_mangle
+ movdqa xmm6,xmm0
+
+
+ call _vpaes_schedule_round
+ dec rsi
+ jz $L$schedule_mangle_last
+ call _vpaes_schedule_mangle
+
+
+ pshufd xmm0,xmm0,0FFh
+ movdqa xmm5,xmm7
+ movdqa xmm7,xmm6
+ call _vpaes_schedule_low_round
+ movdqa xmm7,xmm5
+
+ jmp $L$oop_schedule_256
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN 16
+$L$schedule_mangle_last::
+
+ lea r11,QWORD PTR[$L$k_deskew]
+ test rcx,rcx
+ jnz $L$schedule_mangle_last_dec
+
+
+ movdqa xmm1,XMMWORD PTR[r10*1+r8]
+DB 102,15,56,0,193
+ lea r11,QWORD PTR[$L$k_opt]
+ add rdx,32
+
+$L$schedule_mangle_last_dec::
+ add rdx,-16
+ pxor xmm0,XMMWORD PTR[$L$k_s63]
+ call _vpaes_schedule_transform
+ movdqu XMMWORD PTR[rdx],xmm0
+
+
+ pxor xmm0,xmm0
+ pxor xmm1,xmm1
+ pxor xmm2,xmm2
+ pxor xmm3,xmm3
+ pxor xmm4,xmm4
+ pxor xmm5,xmm5
+ pxor xmm6,xmm6
+ pxor xmm7,xmm7
+ DB 0F3h,0C3h ;repret
+_vpaes_schedule_core ENDP
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN 16
+_vpaes_schedule_192_smear PROC PRIVATE
+ pshufd xmm0,xmm6,080h
+ pxor xmm6,xmm0
+ pshufd xmm0,xmm7,0FEh
+ pxor xmm6,xmm0
+ movdqa xmm0,xmm6
+ pxor xmm1,xmm1
+ movhlps xmm6,xmm1
+ DB 0F3h,0C3h ;repret
+_vpaes_schedule_192_smear ENDP
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN 16
+_vpaes_schedule_round PROC PRIVATE
+
+ pxor xmm1,xmm1
+DB 102,65,15,58,15,200,15
+DB 102,69,15,58,15,192,15
+ pxor xmm7,xmm1
+
+
+ pshufd xmm0,xmm0,0FFh
+DB 102,15,58,15,192,1
+
+
+
+
+_vpaes_schedule_low_round::
+
+ movdqa xmm1,xmm7
+ pslldq xmm7,4
+ pxor xmm7,xmm1
+ movdqa xmm1,xmm7
+ pslldq xmm7,8
+ pxor xmm7,xmm1
+ pxor xmm7,XMMWORD PTR[$L$k_s63]
+
+
+ movdqa xmm1,xmm9
+ pandn xmm1,xmm0
+ psrld xmm1,4
+ pand xmm0,xmm9
+ movdqa xmm2,xmm11
+DB 102,15,56,0,208
+ pxor xmm0,xmm1
+ movdqa xmm3,xmm10
+DB 102,15,56,0,217
+ pxor xmm3,xmm2
+ movdqa xmm4,xmm10
+DB 102,15,56,0,224
+ pxor xmm4,xmm2
+ movdqa xmm2,xmm10
+DB 102,15,56,0,211
+ pxor xmm2,xmm0
+ movdqa xmm3,xmm10
+DB 102,15,56,0,220
+ pxor xmm3,xmm1
+ movdqa xmm4,xmm13
+DB 102,15,56,0,226
+ movdqa xmm0,xmm12
+DB 102,15,56,0,195
+ pxor xmm0,xmm4
+
+
+ pxor xmm0,xmm7
+ movdqa xmm7,xmm0
+ DB 0F3h,0C3h ;repret
+_vpaes_schedule_round ENDP
+
+
+
+
+
+
+
+
+
+
+
+ALIGN 16
+_vpaes_schedule_transform PROC PRIVATE
+ movdqa xmm1,xmm9
+ pandn xmm1,xmm0
+ psrld xmm1,4
+ pand xmm0,xmm9
+ movdqa xmm2,XMMWORD PTR[r11]
+DB 102,15,56,0,208
+ movdqa xmm0,XMMWORD PTR[16+r11]
+DB 102,15,56,0,193
+ pxor xmm0,xmm2
+ DB 0F3h,0C3h ;repret
+_vpaes_schedule_transform ENDP
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN 16
+_vpaes_schedule_mangle PROC PRIVATE
+ movdqa xmm4,xmm0
+ movdqa xmm5,XMMWORD PTR[$L$k_mc_forward]
+ test rcx,rcx
+ jnz $L$schedule_mangle_dec
+
+
+ add rdx,16
+ pxor xmm4,XMMWORD PTR[$L$k_s63]
+DB 102,15,56,0,229
+ movdqa xmm3,xmm4
+DB 102,15,56,0,229
+ pxor xmm3,xmm4
+DB 102,15,56,0,229
+ pxor xmm3,xmm4
+
+ jmp $L$schedule_mangle_both
+ALIGN 16
+$L$schedule_mangle_dec::
+
+ lea r11,QWORD PTR[$L$k_dksd]
+ movdqa xmm1,xmm9
+ pandn xmm1,xmm4
+ psrld xmm1,4
+ pand xmm4,xmm9
+
+ movdqa xmm2,XMMWORD PTR[r11]
+DB 102,15,56,0,212
+ movdqa xmm3,XMMWORD PTR[16+r11]
+DB 102,15,56,0,217
+ pxor xmm3,xmm2
+DB 102,15,56,0,221
+
+ movdqa xmm2,XMMWORD PTR[32+r11]
+DB 102,15,56,0,212
+ pxor xmm2,xmm3
+ movdqa xmm3,XMMWORD PTR[48+r11]
+DB 102,15,56,0,217
+ pxor xmm3,xmm2
+DB 102,15,56,0,221
+
+ movdqa xmm2,XMMWORD PTR[64+r11]
+DB 102,15,56,0,212
+ pxor xmm2,xmm3
+ movdqa xmm3,XMMWORD PTR[80+r11]
+DB 102,15,56,0,217
+ pxor xmm3,xmm2
+DB 102,15,56,0,221
+
+ movdqa xmm2,XMMWORD PTR[96+r11]
+DB 102,15,56,0,212
+ pxor xmm2,xmm3
+ movdqa xmm3,XMMWORD PTR[112+r11]
+DB 102,15,56,0,217
+ pxor xmm3,xmm2
+
+ add rdx,-16
+
+$L$schedule_mangle_both::
+ movdqa xmm1,XMMWORD PTR[r10*1+r8]
+DB 102,15,56,0,217
+ add r8,-16
+ and r8,030h
+ movdqu XMMWORD PTR[rdx],xmm3
+ DB 0F3h,0C3h ;repret
+_vpaes_schedule_mangle ENDP
+
+
+
+
+PUBLIC vpaes_set_encrypt_key
+
+ALIGN 16
+vpaes_set_encrypt_key PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_vpaes_set_encrypt_key::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ lea rsp,QWORD PTR[((-184))+rsp]
+ movaps XMMWORD PTR[16+rsp],xmm6
+ movaps XMMWORD PTR[32+rsp],xmm7
+ movaps XMMWORD PTR[48+rsp],xmm8
+ movaps XMMWORD PTR[64+rsp],xmm9
+ movaps XMMWORD PTR[80+rsp],xmm10
+ movaps XMMWORD PTR[96+rsp],xmm11
+ movaps XMMWORD PTR[112+rsp],xmm12
+ movaps XMMWORD PTR[128+rsp],xmm13
+ movaps XMMWORD PTR[144+rsp],xmm14
+ movaps XMMWORD PTR[160+rsp],xmm15
+$L$enc_key_body::
+ mov eax,esi
+ shr eax,5
+ add eax,5
+ mov DWORD PTR[240+rdx],eax
+
+ mov ecx,0
+ mov r8d,030h
+ call _vpaes_schedule_core
+ movaps xmm6,XMMWORD PTR[16+rsp]
+ movaps xmm7,XMMWORD PTR[32+rsp]
+ movaps xmm8,XMMWORD PTR[48+rsp]
+ movaps xmm9,XMMWORD PTR[64+rsp]
+ movaps xmm10,XMMWORD PTR[80+rsp]
+ movaps xmm11,XMMWORD PTR[96+rsp]
+ movaps xmm12,XMMWORD PTR[112+rsp]
+ movaps xmm13,XMMWORD PTR[128+rsp]
+ movaps xmm14,XMMWORD PTR[144+rsp]
+ movaps xmm15,XMMWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[184+rsp]
+$L$enc_key_epilogue::
+ xor eax,eax
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_vpaes_set_encrypt_key::
+vpaes_set_encrypt_key ENDP
+
+PUBLIC vpaes_set_decrypt_key
+
+ALIGN 16
+vpaes_set_decrypt_key PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_vpaes_set_decrypt_key::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ lea rsp,QWORD PTR[((-184))+rsp]
+ movaps XMMWORD PTR[16+rsp],xmm6
+ movaps XMMWORD PTR[32+rsp],xmm7
+ movaps XMMWORD PTR[48+rsp],xmm8
+ movaps XMMWORD PTR[64+rsp],xmm9
+ movaps XMMWORD PTR[80+rsp],xmm10
+ movaps XMMWORD PTR[96+rsp],xmm11
+ movaps XMMWORD PTR[112+rsp],xmm12
+ movaps XMMWORD PTR[128+rsp],xmm13
+ movaps XMMWORD PTR[144+rsp],xmm14
+ movaps XMMWORD PTR[160+rsp],xmm15
+$L$dec_key_body::
+ mov eax,esi
+ shr eax,5
+ add eax,5
+ mov DWORD PTR[240+rdx],eax
+ shl eax,4
+ lea rdx,QWORD PTR[16+rax*1+rdx]
+
+ mov ecx,1
+ mov r8d,esi
+ shr r8d,1
+ and r8d,32
+ xor r8d,32
+ call _vpaes_schedule_core
+ movaps xmm6,XMMWORD PTR[16+rsp]
+ movaps xmm7,XMMWORD PTR[32+rsp]
+ movaps xmm8,XMMWORD PTR[48+rsp]
+ movaps xmm9,XMMWORD PTR[64+rsp]
+ movaps xmm10,XMMWORD PTR[80+rsp]
+ movaps xmm11,XMMWORD PTR[96+rsp]
+ movaps xmm12,XMMWORD PTR[112+rsp]
+ movaps xmm13,XMMWORD PTR[128+rsp]
+ movaps xmm14,XMMWORD PTR[144+rsp]
+ movaps xmm15,XMMWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[184+rsp]
+$L$dec_key_epilogue::
+ xor eax,eax
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_vpaes_set_decrypt_key::
+vpaes_set_decrypt_key ENDP
+
+PUBLIC vpaes_encrypt
+
+ALIGN 16
+vpaes_encrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_vpaes_encrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ lea rsp,QWORD PTR[((-184))+rsp]
+ movaps XMMWORD PTR[16+rsp],xmm6
+ movaps XMMWORD PTR[32+rsp],xmm7
+ movaps XMMWORD PTR[48+rsp],xmm8
+ movaps XMMWORD PTR[64+rsp],xmm9
+ movaps XMMWORD PTR[80+rsp],xmm10
+ movaps XMMWORD PTR[96+rsp],xmm11
+ movaps XMMWORD PTR[112+rsp],xmm12
+ movaps XMMWORD PTR[128+rsp],xmm13
+ movaps XMMWORD PTR[144+rsp],xmm14
+ movaps XMMWORD PTR[160+rsp],xmm15
+$L$enc_body::
+ movdqu xmm0,XMMWORD PTR[rdi]
+ call _vpaes_preheat
+ call _vpaes_encrypt_core
+ movdqu XMMWORD PTR[rsi],xmm0
+ movaps xmm6,XMMWORD PTR[16+rsp]
+ movaps xmm7,XMMWORD PTR[32+rsp]
+ movaps xmm8,XMMWORD PTR[48+rsp]
+ movaps xmm9,XMMWORD PTR[64+rsp]
+ movaps xmm10,XMMWORD PTR[80+rsp]
+ movaps xmm11,XMMWORD PTR[96+rsp]
+ movaps xmm12,XMMWORD PTR[112+rsp]
+ movaps xmm13,XMMWORD PTR[128+rsp]
+ movaps xmm14,XMMWORD PTR[144+rsp]
+ movaps xmm15,XMMWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[184+rsp]
+$L$enc_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_vpaes_encrypt::
+vpaes_encrypt ENDP
+
+PUBLIC vpaes_decrypt
+
+ALIGN 16
+vpaes_decrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_vpaes_decrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+
+
+ lea rsp,QWORD PTR[((-184))+rsp]
+ movaps XMMWORD PTR[16+rsp],xmm6
+ movaps XMMWORD PTR[32+rsp],xmm7
+ movaps XMMWORD PTR[48+rsp],xmm8
+ movaps XMMWORD PTR[64+rsp],xmm9
+ movaps XMMWORD PTR[80+rsp],xmm10
+ movaps XMMWORD PTR[96+rsp],xmm11
+ movaps XMMWORD PTR[112+rsp],xmm12
+ movaps XMMWORD PTR[128+rsp],xmm13
+ movaps XMMWORD PTR[144+rsp],xmm14
+ movaps XMMWORD PTR[160+rsp],xmm15
+$L$dec_body::
+ movdqu xmm0,XMMWORD PTR[rdi]
+ call _vpaes_preheat
+ call _vpaes_decrypt_core
+ movdqu XMMWORD PTR[rsi],xmm0
+ movaps xmm6,XMMWORD PTR[16+rsp]
+ movaps xmm7,XMMWORD PTR[32+rsp]
+ movaps xmm8,XMMWORD PTR[48+rsp]
+ movaps xmm9,XMMWORD PTR[64+rsp]
+ movaps xmm10,XMMWORD PTR[80+rsp]
+ movaps xmm11,XMMWORD PTR[96+rsp]
+ movaps xmm12,XMMWORD PTR[112+rsp]
+ movaps xmm13,XMMWORD PTR[128+rsp]
+ movaps xmm14,XMMWORD PTR[144+rsp]
+ movaps xmm15,XMMWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[184+rsp]
+$L$dec_epilogue::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_vpaes_decrypt::
+vpaes_decrypt ENDP
+PUBLIC vpaes_cbc_encrypt
+
+ALIGN 16
+vpaes_cbc_encrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+$L$SEH_begin_vpaes_cbc_encrypt::
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+
+
+ xchg rdx,rcx
+ sub rcx,16
+ jc $L$cbc_abort
+ lea rsp,QWORD PTR[((-184))+rsp]
+ movaps XMMWORD PTR[16+rsp],xmm6
+ movaps XMMWORD PTR[32+rsp],xmm7
+ movaps XMMWORD PTR[48+rsp],xmm8
+ movaps XMMWORD PTR[64+rsp],xmm9
+ movaps XMMWORD PTR[80+rsp],xmm10
+ movaps XMMWORD PTR[96+rsp],xmm11
+ movaps XMMWORD PTR[112+rsp],xmm12
+ movaps XMMWORD PTR[128+rsp],xmm13
+ movaps XMMWORD PTR[144+rsp],xmm14
+ movaps XMMWORD PTR[160+rsp],xmm15
+$L$cbc_body::
+ movdqu xmm6,XMMWORD PTR[r8]
+ sub rsi,rdi
+ call _vpaes_preheat
+ cmp r9d,0
+ je $L$cbc_dec_loop
+ jmp $L$cbc_enc_loop
+ALIGN 16
+$L$cbc_enc_loop::
+ movdqu xmm0,XMMWORD PTR[rdi]
+ pxor xmm0,xmm6
+ call _vpaes_encrypt_core
+ movdqa xmm6,xmm0
+ movdqu XMMWORD PTR[rdi*1+rsi],xmm0
+ lea rdi,QWORD PTR[16+rdi]
+ sub rcx,16
+ jnc $L$cbc_enc_loop
+ jmp $L$cbc_done
+ALIGN 16
+$L$cbc_dec_loop::
+ movdqu xmm0,XMMWORD PTR[rdi]
+ movdqa xmm7,xmm0
+ call _vpaes_decrypt_core
+ pxor xmm0,xmm6
+ movdqa xmm6,xmm7
+ movdqu XMMWORD PTR[rdi*1+rsi],xmm0
+ lea rdi,QWORD PTR[16+rdi]
+ sub rcx,16
+ jnc $L$cbc_dec_loop
+$L$cbc_done::
+ movdqu XMMWORD PTR[r8],xmm6
+ movaps xmm6,XMMWORD PTR[16+rsp]
+ movaps xmm7,XMMWORD PTR[32+rsp]
+ movaps xmm8,XMMWORD PTR[48+rsp]
+ movaps xmm9,XMMWORD PTR[64+rsp]
+ movaps xmm10,XMMWORD PTR[80+rsp]
+ movaps xmm11,XMMWORD PTR[96+rsp]
+ movaps xmm12,XMMWORD PTR[112+rsp]
+ movaps xmm13,XMMWORD PTR[128+rsp]
+ movaps xmm14,XMMWORD PTR[144+rsp]
+ movaps xmm15,XMMWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[184+rsp]
+$L$cbc_epilogue::
+$L$cbc_abort::
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+$L$SEH_end_vpaes_cbc_encrypt::
+vpaes_cbc_encrypt ENDP
+
+
+
+
+
+
+
+ALIGN 16
+_vpaes_preheat PROC PRIVATE
+ lea r10,QWORD PTR[$L$k_s0F]
+ movdqa xmm10,XMMWORD PTR[((-32))+r10]
+ movdqa xmm11,XMMWORD PTR[((-16))+r10]
+ movdqa xmm9,XMMWORD PTR[r10]
+ movdqa xmm13,XMMWORD PTR[48+r10]
+ movdqa xmm12,XMMWORD PTR[64+r10]
+ movdqa xmm15,XMMWORD PTR[80+r10]
+ movdqa xmm14,XMMWORD PTR[96+r10]
+ DB 0F3h,0C3h ;repret
+_vpaes_preheat ENDP
+
+
+
+
+
+
+ALIGN 64
+_vpaes_consts::
+$L$k_inv::
+ DQ 00E05060F0D080180h,0040703090A0B0C02h
+ DQ 001040A060F0B0780h,0030D0E0C02050809h
+
+$L$k_s0F::
+ DQ 00F0F0F0F0F0F0F0Fh,00F0F0F0F0F0F0F0Fh
+
+$L$k_ipt::
+ DQ 0C2B2E8985A2A7000h,0CABAE09052227808h
+ DQ 04C01307D317C4D00h,0CD80B1FCB0FDCC81h
+
+$L$k_sb1::
+ DQ 0B19BE18FCB503E00h,0A5DF7A6E142AF544h
+ DQ 03618D415FAE22300h,03BF7CCC10D2ED9EFh
+$L$k_sb2::
+ DQ 0E27A93C60B712400h,05EB7E955BC982FCDh
+ DQ 069EB88400AE12900h,0C2A163C8AB82234Ah
+$L$k_sbo::
+ DQ 0D0D26D176FBDC700h,015AABF7AC502A878h
+ DQ 0CFE474A55FBB6A00h,08E1E90D1412B35FAh
+
+$L$k_mc_forward::
+ DQ 00407060500030201h,00C0F0E0D080B0A09h
+ DQ 0080B0A0904070605h,0000302010C0F0E0Dh
+ DQ 00C0F0E0D080B0A09h,00407060500030201h
+ DQ 0000302010C0F0E0Dh,0080B0A0904070605h
+
+$L$k_mc_backward::
+ DQ 00605040702010003h,00E0D0C0F0A09080Bh
+ DQ 0020100030E0D0C0Fh,00A09080B06050407h
+ DQ 00E0D0C0F0A09080Bh,00605040702010003h
+ DQ 00A09080B06050407h,0020100030E0D0C0Fh
+
+$L$k_sr::
+ DQ 00706050403020100h,00F0E0D0C0B0A0908h
+ DQ 0030E09040F0A0500h,00B06010C07020D08h
+ DQ 00F060D040B020900h,0070E050C030A0108h
+ DQ 00B0E0104070A0D00h,00306090C0F020508h
+
+$L$k_rcon::
+ DQ 01F8391B9AF9DEEB6h,0702A98084D7C7D81h
+
+$L$k_s63::
+ DQ 05B5B5B5B5B5B5B5Bh,05B5B5B5B5B5B5B5Bh
+
+$L$k_opt::
+ DQ 0FF9F4929D6B66000h,0F7974121DEBE6808h
+ DQ 001EDBD5150BCEC00h,0E10D5DB1B05C0CE0h
+
+$L$k_deskew::
+ DQ 007E4A34047A4E300h,01DFEB95A5DBEF91Ah
+ DQ 05F36B5DC83EA6900h,02841C2ABF49D1E77h
+
+
+
+
+
+$L$k_dksd::
+ DQ 0FEB91A5DA3E44700h,00740E3A45A1DBEF9h
+ DQ 041C277F4B5368300h,05FDC69EAAB289D1Eh
+$L$k_dksb::
+ DQ 09A4FCA1F8550D500h,003D653861CC94C99h
+ DQ 0115BEDA7B6FC4A00h,0D993256F7E3482C8h
+$L$k_dkse::
+ DQ 0D5031CCA1FC9D600h,053859A4C994F5086h
+ DQ 0A23196054FDC7BE8h,0CD5EF96A20B31487h
+$L$k_dks9::
+ DQ 0B6116FC87ED9A700h,04AED933482255BFCh
+ DQ 04576516227143300h,08BB89FACE9DAFDCEh
+
+
+
+
+
+$L$k_dipt::
+ DQ 00F505B040B545F00h,0154A411E114E451Ah
+ DQ 086E383E660056500h,012771772F491F194h
+
+$L$k_dsb9::
+ DQ 0851C03539A86D600h,0CAD51F504F994CC9h
+ DQ 0C03B1789ECD74900h,0725E2C9EB2FBA565h
+$L$k_dsbd::
+ DQ 07D57CCDFE6B1A200h,0F56E9B13882A4439h
+ DQ 03CE2FAF724C6CB00h,02931180D15DEEFD3h
+$L$k_dsbb::
+ DQ 0D022649296B44200h,0602646F6B0F2D404h
+ DQ 0C19498A6CD596700h,0F3FF0C3E3255AA6Bh
+$L$k_dsbe::
+ DQ 046F2929626D4D000h,02242600464B4F6B0h
+ DQ 00C55A6CDFFAAC100h,09467F36B98593E32h
+$L$k_dsbo::
+ DQ 01387EA537EF94000h,0C7AA6DB9D4943E2Dh
+ DQ 012D7560F93441D00h,0CA4B8159D8C58E9Ch
+DB 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105
+DB 111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54
+DB 52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97
+DB 109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32
+DB 85,110,105,118,101,114,115,105,116,121,41,0
+ALIGN 64
+
+EXTERN __imp_RtlVirtualUnwind:NEAR
+
+ALIGN 16
+se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$in_prologue
+
+ mov rax,QWORD PTR[152+r8]
+
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$in_prologue
+
+ lea rsi,QWORD PTR[16+rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,20
+ DD 0a548f3fch
+ lea rax,QWORD PTR[184+rax]
+
+$L$in_prologue::
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+se_handler ENDP
+
+.text$ ENDS
+.pdata SEGMENT READONLY ALIGN(4)
+ALIGN 4
+ DD imagerel $L$SEH_begin_vpaes_set_encrypt_key
+ DD imagerel $L$SEH_end_vpaes_set_encrypt_key
+ DD imagerel $L$SEH_info_vpaes_set_encrypt_key
+
+ DD imagerel $L$SEH_begin_vpaes_set_decrypt_key
+ DD imagerel $L$SEH_end_vpaes_set_decrypt_key
+ DD imagerel $L$SEH_info_vpaes_set_decrypt_key
+
+ DD imagerel $L$SEH_begin_vpaes_encrypt
+ DD imagerel $L$SEH_end_vpaes_encrypt
+ DD imagerel $L$SEH_info_vpaes_encrypt
+
+ DD imagerel $L$SEH_begin_vpaes_decrypt
+ DD imagerel $L$SEH_end_vpaes_decrypt
+ DD imagerel $L$SEH_info_vpaes_decrypt
+
+ DD imagerel $L$SEH_begin_vpaes_cbc_encrypt
+ DD imagerel $L$SEH_end_vpaes_cbc_encrypt
+ DD imagerel $L$SEH_info_vpaes_cbc_encrypt
+
+.pdata ENDS
+.xdata SEGMENT READONLY ALIGN(8)
+ALIGN 8
+$L$SEH_info_vpaes_set_encrypt_key::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$enc_key_body,imagerel $L$enc_key_epilogue
+$L$SEH_info_vpaes_set_decrypt_key::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$dec_key_body,imagerel $L$dec_key_epilogue
+$L$SEH_info_vpaes_encrypt::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$enc_body,imagerel $L$enc_epilogue
+$L$SEH_info_vpaes_decrypt::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$dec_body,imagerel $L$dec_epilogue
+$L$SEH_info_vpaes_cbc_encrypt::
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$cbc_body,imagerel $L$cbc_epilogue
+
+.xdata ENDS
+END
+
diff --git a/ext/libressl/crypto/aes/vpaes-mingw64-x86_64.S b/ext/libressl/crypto/aes/vpaes-mingw64-x86_64.S
new file mode 100644
index 0000000..d6cb860
--- /dev/null
+++ b/ext/libressl/crypto/aes/vpaes-mingw64-x86_64.S
@@ -0,0 +1,1125 @@
+#include "x86_arch.h"
+.text
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.def _vpaes_encrypt_core; .scl 3; .type 32; .endef
+.p2align 4
+_vpaes_encrypt_core:
+ movq %rdx,%r9
+ movq $16,%r11
+ movl 240(%rdx),%eax
+ movdqa %xmm9,%xmm1
+ movdqa .Lk_ipt(%rip),%xmm2
+ pandn %xmm0,%xmm1
+ movdqu (%r9),%xmm5
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+.byte 102,15,56,0,208
+ movdqa .Lk_ipt+16(%rip),%xmm0
+.byte 102,15,56,0,193
+ pxor %xmm5,%xmm2
+ pxor %xmm2,%xmm0
+ addq $16,%r9
+ leaq .Lk_mc_backward(%rip),%r10
+ jmp .Lenc_entry
+
+.p2align 4
+.Lenc_loop:
+
+ movdqa %xmm13,%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm5,%xmm4
+ movdqa %xmm12,%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ movdqa %xmm15,%xmm5
+.byte 102,15,56,0,234
+ movdqa -64(%r11,%r10,1),%xmm1
+ movdqa %xmm14,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm5,%xmm2
+ movdqa (%r11,%r10,1),%xmm4
+ movdqa %xmm0,%xmm3
+.byte 102,15,56,0,193
+ addq $16,%r9
+ pxor %xmm2,%xmm0
+.byte 102,15,56,0,220
+ addq $16,%r11
+ pxor %xmm0,%xmm3
+.byte 102,15,56,0,193
+ andq $48,%r11
+ pxor %xmm3,%xmm0
+ subq $1,%rax
+
+.Lenc_entry:
+
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa %xmm11,%xmm5
+.byte 102,15,56,0,232
+ pxor %xmm1,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm5,%xmm3
+ movdqa %xmm10,%xmm4
+.byte 102,15,56,0,224
+ pxor %xmm5,%xmm4
+ movdqa %xmm10,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm0,%xmm2
+ movdqa %xmm10,%xmm3
+ movdqu (%r9),%xmm5
+.byte 102,15,56,0,220
+ pxor %xmm1,%xmm3
+ jnz .Lenc_loop
+
+
+ movdqa -96(%r10),%xmm4
+ movdqa -80(%r10),%xmm0
+.byte 102,15,56,0,226
+ pxor %xmm5,%xmm4
+.byte 102,15,56,0,195
+ movdqa 64(%r11,%r10,1),%xmm1
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,193
+ retq
+
+
+
+
+
+
+
+.def _vpaes_decrypt_core; .scl 3; .type 32; .endef
+.p2align 4
+_vpaes_decrypt_core:
+ movq %rdx,%r9
+ movl 240(%rdx),%eax
+ movdqa %xmm9,%xmm1
+ movdqa .Lk_dipt(%rip),%xmm2
+ pandn %xmm0,%xmm1
+ movq %rax,%r11
+ psrld $4,%xmm1
+ movdqu (%r9),%xmm5
+ shlq $4,%r11
+ pand %xmm9,%xmm0
+.byte 102,15,56,0,208
+ movdqa .Lk_dipt+16(%rip),%xmm0
+ xorq $48,%r11
+ leaq .Lk_dsbd(%rip),%r10
+.byte 102,15,56,0,193
+ andq $48,%r11
+ pxor %xmm5,%xmm2
+ movdqa .Lk_mc_forward+48(%rip),%xmm5
+ pxor %xmm2,%xmm0
+ addq $16,%r9
+ addq %r10,%r11
+ jmp .Ldec_entry
+
+.p2align 4
+.Ldec_loop:
+
+
+
+ movdqa -32(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa -16(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ addq $16,%r9
+
+.byte 102,15,56,0,197
+ movdqa 0(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 16(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ subq $1,%rax
+
+.byte 102,15,56,0,197
+ movdqa 32(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 48(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+
+.byte 102,15,56,0,197
+ movdqa 64(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 80(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+
+.byte 102,15,58,15,237,12
+
+.Ldec_entry:
+
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa %xmm11,%xmm2
+.byte 102,15,56,0,208
+ pxor %xmm1,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+ movdqa %xmm10,%xmm4
+.byte 102,15,56,0,224
+ pxor %xmm2,%xmm4
+ movdqa %xmm10,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm0,%xmm2
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,220
+ pxor %xmm1,%xmm3
+ movdqu (%r9),%xmm0
+ jnz .Ldec_loop
+
+
+ movdqa 96(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 112(%r10),%xmm0
+ movdqa -352(%r11),%xmm2
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,194
+ retq
+
+
+
+
+
+
+
+.def _vpaes_schedule_core; .scl 3; .type 32; .endef
+.p2align 4
+_vpaes_schedule_core:
+
+
+
+
+
+ call _vpaes_preheat
+ movdqa .Lk_rcon(%rip),%xmm8
+ movdqu (%rdi),%xmm0
+
+
+ movdqa %xmm0,%xmm3
+ leaq .Lk_ipt(%rip),%r11
+ call _vpaes_schedule_transform
+ movdqa %xmm0,%xmm7
+
+ leaq .Lk_sr(%rip),%r10
+ testq %rcx,%rcx
+ jnz .Lschedule_am_decrypting
+
+
+ movdqu %xmm0,(%rdx)
+ jmp .Lschedule_go
+
+.Lschedule_am_decrypting:
+
+ movdqa (%r8,%r10,1),%xmm1
+.byte 102,15,56,0,217
+ movdqu %xmm3,(%rdx)
+ xorq $48,%r8
+
+.Lschedule_go:
+ cmpl $192,%esi
+ ja .Lschedule_256
+ je .Lschedule_192
+
+
+
+
+
+
+
+
+
+
+.Lschedule_128:
+ movl $10,%esi
+
+.Loop_schedule_128:
+ call _vpaes_schedule_round
+ decq %rsi
+ jz .Lschedule_mangle_last
+ call _vpaes_schedule_mangle
+ jmp .Loop_schedule_128
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.p2align 4
+.Lschedule_192:
+ movdqu 8(%rdi),%xmm0
+ call _vpaes_schedule_transform
+ movdqa %xmm0,%xmm6
+ pxor %xmm4,%xmm4
+ movhlps %xmm4,%xmm6
+ movl $4,%esi
+
+.Loop_schedule_192:
+ call _vpaes_schedule_round
+.byte 102,15,58,15,198,8
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_192_smear
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_round
+ decq %rsi
+ jz .Lschedule_mangle_last
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_192_smear
+ jmp .Loop_schedule_192
+
+
+
+
+
+
+
+
+
+
+
+.p2align 4
+.Lschedule_256:
+ movdqu 16(%rdi),%xmm0
+ call _vpaes_schedule_transform
+ movl $7,%esi
+
+.Loop_schedule_256:
+ call _vpaes_schedule_mangle
+ movdqa %xmm0,%xmm6
+
+
+ call _vpaes_schedule_round
+ decq %rsi
+ jz .Lschedule_mangle_last
+ call _vpaes_schedule_mangle
+
+
+ pshufd $255,%xmm0,%xmm0
+ movdqa %xmm7,%xmm5
+ movdqa %xmm6,%xmm7
+ call _vpaes_schedule_low_round
+ movdqa %xmm5,%xmm7
+
+ jmp .Loop_schedule_256
+
+
+
+
+
+
+
+
+
+
+
+
+.p2align 4
+.Lschedule_mangle_last:
+
+ leaq .Lk_deskew(%rip),%r11
+ testq %rcx,%rcx
+ jnz .Lschedule_mangle_last_dec
+
+
+ movdqa (%r8,%r10,1),%xmm1
+.byte 102,15,56,0,193
+ leaq .Lk_opt(%rip),%r11
+ addq $32,%rdx
+
+.Lschedule_mangle_last_dec:
+ addq $-16,%rdx
+ pxor .Lk_s63(%rip),%xmm0
+ call _vpaes_schedule_transform
+ movdqu %xmm0,(%rdx)
+
+
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
+ retq
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.def _vpaes_schedule_192_smear; .scl 3; .type 32; .endef
+.p2align 4
+_vpaes_schedule_192_smear:
+ pshufd $128,%xmm6,%xmm0
+ pxor %xmm0,%xmm6
+ pshufd $254,%xmm7,%xmm0
+ pxor %xmm0,%xmm6
+ movdqa %xmm6,%xmm0
+ pxor %xmm1,%xmm1
+ movhlps %xmm1,%xmm6
+ retq
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.def _vpaes_schedule_round; .scl 3; .type 32; .endef
+.p2align 4
+_vpaes_schedule_round:
+
+ pxor %xmm1,%xmm1
+.byte 102,65,15,58,15,200,15
+.byte 102,69,15,58,15,192,15
+ pxor %xmm1,%xmm7
+
+
+ pshufd $255,%xmm0,%xmm0
+.byte 102,15,58,15,192,1
+
+
+
+
+_vpaes_schedule_low_round:
+
+ movdqa %xmm7,%xmm1
+ pslldq $4,%xmm7
+ pxor %xmm1,%xmm7
+ movdqa %xmm7,%xmm1
+ pslldq $8,%xmm7
+ pxor %xmm1,%xmm7
+ pxor .Lk_s63(%rip),%xmm7
+
+
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa %xmm11,%xmm2
+.byte 102,15,56,0,208
+ pxor %xmm1,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+ movdqa %xmm10,%xmm4
+.byte 102,15,56,0,224
+ pxor %xmm2,%xmm4
+ movdqa %xmm10,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm0,%xmm2
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,220
+ pxor %xmm1,%xmm3
+ movdqa %xmm13,%xmm4
+.byte 102,15,56,0,226
+ movdqa %xmm12,%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+
+
+ pxor %xmm7,%xmm0
+ movdqa %xmm0,%xmm7
+ retq
+
+
+
+
+
+
+
+
+
+
+
+.def _vpaes_schedule_transform; .scl 3; .type 32; .endef
+.p2align 4
+_vpaes_schedule_transform:
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa (%r11),%xmm2
+.byte 102,15,56,0,208
+ movdqa 16(%r11),%xmm0
+.byte 102,15,56,0,193
+ pxor %xmm2,%xmm0
+ retq
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.def _vpaes_schedule_mangle; .scl 3; .type 32; .endef
+.p2align 4
+_vpaes_schedule_mangle:
+ movdqa %xmm0,%xmm4
+ movdqa .Lk_mc_forward(%rip),%xmm5
+ testq %rcx,%rcx
+ jnz .Lschedule_mangle_dec
+
+
+ addq $16,%rdx
+ pxor .Lk_s63(%rip),%xmm4
+.byte 102,15,56,0,229
+ movdqa %xmm4,%xmm3
+.byte 102,15,56,0,229
+ pxor %xmm4,%xmm3
+.byte 102,15,56,0,229
+ pxor %xmm4,%xmm3
+
+ jmp .Lschedule_mangle_both
+.p2align 4
+.Lschedule_mangle_dec:
+
+ leaq .Lk_dksd(%rip),%r11
+ movdqa %xmm9,%xmm1
+ pandn %xmm4,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm4
+
+ movdqa 0(%r11),%xmm2
+.byte 102,15,56,0,212
+ movdqa 16(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+.byte 102,15,56,0,221
+
+ movdqa 32(%r11),%xmm2
+.byte 102,15,56,0,212
+ pxor %xmm3,%xmm2
+ movdqa 48(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+.byte 102,15,56,0,221
+
+ movdqa 64(%r11),%xmm2
+.byte 102,15,56,0,212
+ pxor %xmm3,%xmm2
+ movdqa 80(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+.byte 102,15,56,0,221
+
+ movdqa 96(%r11),%xmm2
+.byte 102,15,56,0,212
+ pxor %xmm3,%xmm2
+ movdqa 112(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+
+ addq $-16,%rdx
+
+.Lschedule_mangle_both:
+ movdqa (%r8,%r10,1),%xmm1
+.byte 102,15,56,0,217
+ addq $-16,%r8
+ andq $48,%r8
+ movdqu %xmm3,(%rdx)
+ retq
+
+
+
+
+
+.globl vpaes_set_encrypt_key
+.def vpaes_set_encrypt_key; .scl 2; .type 32; .endef
+.p2align 4
+vpaes_set_encrypt_key:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_vpaes_set_encrypt_key:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+
+ leaq -184(%rsp),%rsp
+ movaps %xmm6,16(%rsp)
+ movaps %xmm7,32(%rsp)
+ movaps %xmm8,48(%rsp)
+ movaps %xmm9,64(%rsp)
+ movaps %xmm10,80(%rsp)
+ movaps %xmm11,96(%rsp)
+ movaps %xmm12,112(%rsp)
+ movaps %xmm13,128(%rsp)
+ movaps %xmm14,144(%rsp)
+ movaps %xmm15,160(%rsp)
+.Lenc_key_body:
+ movl %esi,%eax
+ shrl $5,%eax
+ addl $5,%eax
+ movl %eax,240(%rdx)
+
+ movl $0,%ecx
+ movl $48,%r8d
+ call _vpaes_schedule_core
+ movaps 16(%rsp),%xmm6
+ movaps 32(%rsp),%xmm7
+ movaps 48(%rsp),%xmm8
+ movaps 64(%rsp),%xmm9
+ movaps 80(%rsp),%xmm10
+ movaps 96(%rsp),%xmm11
+ movaps 112(%rsp),%xmm12
+ movaps 128(%rsp),%xmm13
+ movaps 144(%rsp),%xmm14
+ movaps 160(%rsp),%xmm15
+ leaq 184(%rsp),%rsp
+.Lenc_key_epilogue:
+ xorl %eax,%eax
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_vpaes_set_encrypt_key:
+
+.globl vpaes_set_decrypt_key
+.def vpaes_set_decrypt_key; .scl 2; .type 32; .endef
+.p2align 4
+vpaes_set_decrypt_key:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_vpaes_set_decrypt_key:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+
+ leaq -184(%rsp),%rsp
+ movaps %xmm6,16(%rsp)
+ movaps %xmm7,32(%rsp)
+ movaps %xmm8,48(%rsp)
+ movaps %xmm9,64(%rsp)
+ movaps %xmm10,80(%rsp)
+ movaps %xmm11,96(%rsp)
+ movaps %xmm12,112(%rsp)
+ movaps %xmm13,128(%rsp)
+ movaps %xmm14,144(%rsp)
+ movaps %xmm15,160(%rsp)
+.Ldec_key_body:
+ movl %esi,%eax
+ shrl $5,%eax
+ addl $5,%eax
+ movl %eax,240(%rdx)
+ shll $4,%eax
+ leaq 16(%rdx,%rax,1),%rdx
+
+ movl $1,%ecx
+ movl %esi,%r8d
+ shrl $1,%r8d
+ andl $32,%r8d
+ xorl $32,%r8d
+ call _vpaes_schedule_core
+ movaps 16(%rsp),%xmm6
+ movaps 32(%rsp),%xmm7
+ movaps 48(%rsp),%xmm8
+ movaps 64(%rsp),%xmm9
+ movaps 80(%rsp),%xmm10
+ movaps 96(%rsp),%xmm11
+ movaps 112(%rsp),%xmm12
+ movaps 128(%rsp),%xmm13
+ movaps 144(%rsp),%xmm14
+ movaps 160(%rsp),%xmm15
+ leaq 184(%rsp),%rsp
+.Ldec_key_epilogue:
+ xorl %eax,%eax
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_vpaes_set_decrypt_key:
+
+.globl vpaes_encrypt
+.def vpaes_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+vpaes_encrypt:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_vpaes_encrypt:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+
+ leaq -184(%rsp),%rsp
+ movaps %xmm6,16(%rsp)
+ movaps %xmm7,32(%rsp)
+ movaps %xmm8,48(%rsp)
+ movaps %xmm9,64(%rsp)
+ movaps %xmm10,80(%rsp)
+ movaps %xmm11,96(%rsp)
+ movaps %xmm12,112(%rsp)
+ movaps %xmm13,128(%rsp)
+ movaps %xmm14,144(%rsp)
+ movaps %xmm15,160(%rsp)
+.Lenc_body:
+ movdqu (%rdi),%xmm0
+ call _vpaes_preheat
+ call _vpaes_encrypt_core
+ movdqu %xmm0,(%rsi)
+ movaps 16(%rsp),%xmm6
+ movaps 32(%rsp),%xmm7
+ movaps 48(%rsp),%xmm8
+ movaps 64(%rsp),%xmm9
+ movaps 80(%rsp),%xmm10
+ movaps 96(%rsp),%xmm11
+ movaps 112(%rsp),%xmm12
+ movaps 128(%rsp),%xmm13
+ movaps 144(%rsp),%xmm14
+ movaps 160(%rsp),%xmm15
+ leaq 184(%rsp),%rsp
+.Lenc_epilogue:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_vpaes_encrypt:
+
+.globl vpaes_decrypt
+.def vpaes_decrypt; .scl 2; .type 32; .endef
+.p2align 4
+vpaes_decrypt:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_vpaes_decrypt:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+
+ leaq -184(%rsp),%rsp
+ movaps %xmm6,16(%rsp)
+ movaps %xmm7,32(%rsp)
+ movaps %xmm8,48(%rsp)
+ movaps %xmm9,64(%rsp)
+ movaps %xmm10,80(%rsp)
+ movaps %xmm11,96(%rsp)
+ movaps %xmm12,112(%rsp)
+ movaps %xmm13,128(%rsp)
+ movaps %xmm14,144(%rsp)
+ movaps %xmm15,160(%rsp)
+.Ldec_body:
+ movdqu (%rdi),%xmm0
+ call _vpaes_preheat
+ call _vpaes_decrypt_core
+ movdqu %xmm0,(%rsi)
+ movaps 16(%rsp),%xmm6
+ movaps 32(%rsp),%xmm7
+ movaps 48(%rsp),%xmm8
+ movaps 64(%rsp),%xmm9
+ movaps 80(%rsp),%xmm10
+ movaps 96(%rsp),%xmm11
+ movaps 112(%rsp),%xmm12
+ movaps 128(%rsp),%xmm13
+ movaps 144(%rsp),%xmm14
+ movaps 160(%rsp),%xmm15
+ leaq 184(%rsp),%rsp
+.Ldec_epilogue:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_vpaes_decrypt:
+.globl vpaes_cbc_encrypt
+.def vpaes_cbc_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+vpaes_cbc_encrypt:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_vpaes_cbc_encrypt:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+ movq 48(%rsp),%r9
+
+ xchgq %rcx,%rdx
+ subq $16,%rcx
+ jc .Lcbc_abort
+ leaq -184(%rsp),%rsp
+ movaps %xmm6,16(%rsp)
+ movaps %xmm7,32(%rsp)
+ movaps %xmm8,48(%rsp)
+ movaps %xmm9,64(%rsp)
+ movaps %xmm10,80(%rsp)
+ movaps %xmm11,96(%rsp)
+ movaps %xmm12,112(%rsp)
+ movaps %xmm13,128(%rsp)
+ movaps %xmm14,144(%rsp)
+ movaps %xmm15,160(%rsp)
+.Lcbc_body:
+ movdqu (%r8),%xmm6
+ subq %rdi,%rsi
+ call _vpaes_preheat
+ cmpl $0,%r9d
+ je .Lcbc_dec_loop
+ jmp .Lcbc_enc_loop
+.p2align 4
+.Lcbc_enc_loop:
+ movdqu (%rdi),%xmm0
+ pxor %xmm6,%xmm0
+ call _vpaes_encrypt_core
+ movdqa %xmm0,%xmm6
+ movdqu %xmm0,(%rsi,%rdi,1)
+ leaq 16(%rdi),%rdi
+ subq $16,%rcx
+ jnc .Lcbc_enc_loop
+ jmp .Lcbc_done
+.p2align 4
+.Lcbc_dec_loop:
+ movdqu (%rdi),%xmm0
+ movdqa %xmm0,%xmm7
+ call _vpaes_decrypt_core
+ pxor %xmm6,%xmm0
+ movdqa %xmm7,%xmm6
+ movdqu %xmm0,(%rsi,%rdi,1)
+ leaq 16(%rdi),%rdi
+ subq $16,%rcx
+ jnc .Lcbc_dec_loop
+.Lcbc_done:
+ movdqu %xmm6,(%r8)
+ movaps 16(%rsp),%xmm6
+ movaps 32(%rsp),%xmm7
+ movaps 48(%rsp),%xmm8
+ movaps 64(%rsp),%xmm9
+ movaps 80(%rsp),%xmm10
+ movaps 96(%rsp),%xmm11
+ movaps 112(%rsp),%xmm12
+ movaps 128(%rsp),%xmm13
+ movaps 144(%rsp),%xmm14
+ movaps 160(%rsp),%xmm15
+ leaq 184(%rsp),%rsp
+.Lcbc_epilogue:
+.Lcbc_abort:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.LSEH_end_vpaes_cbc_encrypt:
+
+
+
+
+
+
+.def _vpaes_preheat; .scl 3; .type 32; .endef
+.p2align 4
+_vpaes_preheat:
+ leaq .Lk_s0F(%rip),%r10
+ movdqa -32(%r10),%xmm10
+ movdqa -16(%r10),%xmm11
+ movdqa 0(%r10),%xmm9
+ movdqa 48(%r10),%xmm13
+ movdqa 64(%r10),%xmm12
+ movdqa 80(%r10),%xmm15
+ movdqa 96(%r10),%xmm14
+ retq
+
+
+
+
+
+
+
+.p2align 6
+_vpaes_consts:
+.Lk_inv:
+.quad 0x0E05060F0D080180, 0x040703090A0B0C02
+.quad 0x01040A060F0B0780, 0x030D0E0C02050809
+
+.Lk_s0F:
+.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
+
+.Lk_ipt:
+.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808
+.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
+
+.Lk_sb1:
+.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
+.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
+.Lk_sb2:
+.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD
+.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A
+.Lk_sbo:
+.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878
+.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
+
+.Lk_mc_forward:
+.quad 0x0407060500030201, 0x0C0F0E0D080B0A09
+.quad 0x080B0A0904070605, 0x000302010C0F0E0D
+.quad 0x0C0F0E0D080B0A09, 0x0407060500030201
+.quad 0x000302010C0F0E0D, 0x080B0A0904070605
+
+.Lk_mc_backward:
+.quad 0x0605040702010003, 0x0E0D0C0F0A09080B
+.quad 0x020100030E0D0C0F, 0x0A09080B06050407
+.quad 0x0E0D0C0F0A09080B, 0x0605040702010003
+.quad 0x0A09080B06050407, 0x020100030E0D0C0F
+
+.Lk_sr:
+.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908
+.quad 0x030E09040F0A0500, 0x0B06010C07020D08
+.quad 0x0F060D040B020900, 0x070E050C030A0108
+.quad 0x0B0E0104070A0D00, 0x0306090C0F020508
+
+.Lk_rcon:
+.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
+
+.Lk_s63:
+.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B
+
+.Lk_opt:
+.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808
+.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
+
+.Lk_deskew:
+.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
+.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
+
+
+
+
+
+.Lk_dksd:
+.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
+.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E
+.Lk_dksb:
+.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99
+.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
+.Lk_dkse:
+.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086
+.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487
+.Lk_dks9:
+.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC
+.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE
+
+
+
+
+
+.Lk_dipt:
+.quad 0x0F505B040B545F00, 0x154A411E114E451A
+.quad 0x86E383E660056500, 0x12771772F491F194
+
+.Lk_dsb9:
+.quad 0x851C03539A86D600, 0xCAD51F504F994CC9
+.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565
+.Lk_dsbd:
+.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
+.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
+.Lk_dsbb:
+.quad 0xD022649296B44200, 0x602646F6B0F2D404
+.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
+.Lk_dsbe:
+.quad 0x46F2929626D4D000, 0x2242600464B4F6B0
+.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32
+.Lk_dsbo:
+.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
+.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
+.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
+.p2align 6
+
+
+.def se_handler; .scl 3; .type 32; .endef
+.p2align 4
+se_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+
+ movq 8(%r9),%rsi
+ movq 56(%r9),%r11
+
+ movl 0(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jb .Lin_prologue
+
+ movq 152(%r8),%rax
+
+ movl 4(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jae .Lin_prologue
+
+ leaq 16(%rax),%rsi
+ leaq 512(%r8),%rdi
+ movl $20,%ecx
+.long 0xa548f3fc
+ leaq 184(%rax),%rax
+
+.Lin_prologue:
+ movq 8(%rax),%rdi
+ movq 16(%rax),%rsi
+ movq %rax,152(%r8)
+ movq %rsi,168(%r8)
+ movq %rdi,176(%r8)
+
+ movq 40(%r9),%rdi
+ movq %r8,%rsi
+ movl $154,%ecx
+.long 0xa548f3fc
+
+ movq %r9,%rsi
+ xorq %rcx,%rcx
+ movq 8(%rsi),%rdx
+ movq 0(%rsi),%r8
+ movq 16(%rsi),%r9
+ movq 40(%rsi),%r10
+ leaq 56(%rsi),%r11
+ leaq 24(%rsi),%r12
+ movq %r10,32(%rsp)
+ movq %r11,40(%rsp)
+ movq %r12,48(%rsp)
+ movq %rcx,56(%rsp)
+ call *__imp_RtlVirtualUnwind(%rip)
+
+ movl $1,%eax
+ addq $64,%rsp
+ popfq
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ popq %rdi
+ popq %rsi
+ retq
+
+
+.section .pdata
+.p2align 2
+.rva .LSEH_begin_vpaes_set_encrypt_key
+.rva .LSEH_end_vpaes_set_encrypt_key
+.rva .LSEH_info_vpaes_set_encrypt_key
+
+.rva .LSEH_begin_vpaes_set_decrypt_key
+.rva .LSEH_end_vpaes_set_decrypt_key
+.rva .LSEH_info_vpaes_set_decrypt_key
+
+.rva .LSEH_begin_vpaes_encrypt
+.rva .LSEH_end_vpaes_encrypt
+.rva .LSEH_info_vpaes_encrypt
+
+.rva .LSEH_begin_vpaes_decrypt
+.rva .LSEH_end_vpaes_decrypt
+.rva .LSEH_info_vpaes_decrypt
+
+.rva .LSEH_begin_vpaes_cbc_encrypt
+.rva .LSEH_end_vpaes_cbc_encrypt
+.rva .LSEH_info_vpaes_cbc_encrypt
+
+.section .xdata
+.p2align 3
+.LSEH_info_vpaes_set_encrypt_key:
+.byte 9,0,0,0
+.rva se_handler
+.rva .Lenc_key_body,.Lenc_key_epilogue
+.LSEH_info_vpaes_set_decrypt_key:
+.byte 9,0,0,0
+.rva se_handler
+.rva .Ldec_key_body,.Ldec_key_epilogue
+.LSEH_info_vpaes_encrypt:
+.byte 9,0,0,0
+.rva se_handler
+.rva .Lenc_body,.Lenc_epilogue
+.LSEH_info_vpaes_decrypt:
+.byte 9,0,0,0
+.rva se_handler
+.rva .Ldec_body,.Ldec_epilogue
+.LSEH_info_vpaes_cbc_encrypt:
+.byte 9,0,0,0
+.rva se_handler
+.rva .Lcbc_body,.Lcbc_epilogue