diff options
authorUros Majstorovic <>2022-02-02 06:03:27 +0100
committerUros Majstorovic <>2022-02-02 06:03:27 +0100
commitc5d4f45893cd3a70c2387f13fcf5ac97550bbfa0 (patch)
parente1e02465a8bc6e0c8babf22e8320bc1f6c44652b (diff)
added aes and blowfish to ssl
73 files changed, 55850 insertions, 19 deletions
diff --git a/crypto/libressl/Makefile b/crypto/libressl/Makefile
index 60d9ce6..56eabfc 100644
--- a/crypto/libressl/Makefile
+++ b/crypto/libressl/Makefile
@@ -1,6 +1,3 @@
-pwd := $(abspath $(dir $(firstword $(MAKEFILE_LIST))))
-MAKEFLAGS += -I$(pwd)
diff --git a/crypto/libressl/crypto/aead/Makefile b/crypto/libressl/crypto/aead/Makefile
index 2148699..6bf1ccf 100644
--- a/crypto/libressl/crypto/aead/Makefile
+++ b/crypto/libressl/crypto/aead/Makefile
@@ -1,4 +1,4 @@
+include ../../
obj = e_chacha20poly1305.o
diff --git a/crypto/libressl/crypto/aes/Makefile b/crypto/libressl/crypto/aes/Makefile
new file mode 100644
index 0000000..2b3c04c
--- /dev/null
+++ b/crypto/libressl/crypto/aes/Makefile
@@ -0,0 +1,14 @@
+include ../../
+obj = aes_core.o aes_ecb.o aes_cbc.o aes_cfb.o aes_ctr.o aes_ige.o aes_ofb.o
+all: $(obj)
+dep: all
+%.o: %.c
+ $(CC) $(CFLAGS) -c $<
+ rm -f *.o *.a
diff --git a/crypto/libressl/crypto/aes/aes-elf-armv4.S b/crypto/libressl/crypto/aes/aes-elf-armv4.S
new file mode 100644
index 0000000..8164b53
--- /dev/null
+++ b/crypto/libressl/crypto/aes/aes-elf-armv4.S
@@ -0,0 +1,1074 @@
+#include "arm_arch.h"
+.code 32
+.type AES_Te,%object
+.align 5
+.word 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
+.word 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
+.word 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
+.word 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
+.word 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
+.word 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
+.word 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
+.word 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
+.word 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
+.word 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
+.word 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
+.word 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
+.word 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
+.word 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
+.word 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
+.word 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
+.word 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
+.word 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
+.word 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
+.word 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
+.word 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
+.word 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
+.word 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
+.word 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
+.word 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
+.word 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
+.word 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
+.word 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
+.word 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
+.word 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
+.word 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
+.word 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
+.word 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
+.word 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
+.word 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
+.word 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
+.word 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
+.word 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
+.word 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
+.word 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
+.word 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
+.word 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
+.word 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
+.word 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
+.word 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
+.word 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
+.word 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
+.word 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
+.word 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
+.word 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
+.word 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
+.word 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
+.word 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
+.word 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
+.word 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
+.word 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
+.word 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
+.word 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
+.word 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
+.word 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
+.word 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
+.word 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
+.word 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
+.word 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
+@ Te4[256]
+.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
+.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
+.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
+.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
+.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
+.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
+.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
+.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
+.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
+.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
+.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
+.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
+.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
+.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
+.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
+.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
+.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
+.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
+.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
+.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
+.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
+.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
+.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
+.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
+.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
+.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
+.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
+.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
+.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
+.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
+.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
+.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+@ rcon[]
+.word 0x01000000, 0x02000000, 0x04000000, 0x08000000
+.word 0x10000000, 0x20000000, 0x40000000, 0x80000000
+.word 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
+.size AES_Te,.-AES_Te
+@ void AES_encrypt(const unsigned char *in, unsigned char *out,
+@ const AES_KEY *key) { AES_encrypt
+.type AES_encrypt,%function
+.align 5
+ sub r3,pc,#8 @ AES_encrypt
+ stmdb sp!,{r1,r4-r12,lr}
+ mov r12,r0 @ inp
+ mov r11,r2
+ sub r10,r3,#AES_encrypt-AES_Te @ Te
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+ ldrb r0,[r12,#3] @ load input data in endian-neutral
+ ldrb r4,[r12,#2] @ manner...
+ ldrb r5,[r12,#1]
+ ldrb r6,[r12,#0]
+ orr r0,r0,r4,lsl#8
+ ldrb r1,[r12,#7]
+ orr r0,r0,r5,lsl#16
+ ldrb r4,[r12,#6]
+ orr r0,r0,r6,lsl#24
+ ldrb r5,[r12,#5]
+ ldrb r6,[r12,#4]
+ orr r1,r1,r4,lsl#8
+ ldrb r2,[r12,#11]
+ orr r1,r1,r5,lsl#16
+ ldrb r4,[r12,#10]
+ orr r1,r1,r6,lsl#24
+ ldrb r5,[r12,#9]
+ ldrb r6,[r12,#8]
+ orr r2,r2,r4,lsl#8
+ ldrb r3,[r12,#15]
+ orr r2,r2,r5,lsl#16
+ ldrb r4,[r12,#14]
+ orr r2,r2,r6,lsl#24
+ ldrb r5,[r12,#13]
+ ldrb r6,[r12,#12]
+ orr r3,r3,r4,lsl#8
+ orr r3,r3,r5,lsl#16
+ orr r3,r3,r6,lsl#24
+ ldr r0,[r12,#0]
+ ldr r1,[r12,#4]
+ ldr r2,[r12,#8]
+ ldr r3,[r12,#12]
+#ifdef __ARMEL__
+ rev r0,r0
+ rev r1,r1
+ rev r2,r2
+ rev r3,r3
+ bl _armv4_AES_encrypt
+ ldr r12,[sp],#4 @ pop out
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+#ifdef __ARMEL__
+ rev r0,r0
+ rev r1,r1
+ rev r2,r2
+ rev r3,r3
+ str r0,[r12,#0]
+ str r1,[r12,#4]
+ str r2,[r12,#8]
+ str r3,[r12,#12]
+ mov r4,r0,lsr#24 @ write output in endian-neutral
+ mov r5,r0,lsr#16 @ manner...
+ mov r6,r0,lsr#8
+ strb r4,[r12,#0]
+ strb r5,[r12,#1]
+ mov r4,r1,lsr#24
+ strb r6,[r12,#2]
+ mov r5,r1,lsr#16
+ strb r0,[r12,#3]
+ mov r6,r1,lsr#8
+ strb r4,[r12,#4]
+ strb r5,[r12,#5]
+ mov r4,r2,lsr#24
+ strb r6,[r12,#6]
+ mov r5,r2,lsr#16
+ strb r1,[r12,#7]
+ mov r6,r2,lsr#8
+ strb r4,[r12,#8]
+ strb r5,[r12,#9]
+ mov r4,r3,lsr#24
+ strb r6,[r12,#10]
+ mov r5,r3,lsr#16
+ strb r2,[r12,#11]
+ mov r6,r3,lsr#8
+ strb r4,[r12,#12]
+ strb r5,[r12,#13]
+ strb r6,[r12,#14]
+ strb r3,[r12,#15]
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r12,pc}
+ ldmia sp!,{r4-r12,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+.size AES_encrypt,.-AES_encrypt
+.type _armv4_AES_encrypt,%function
+.align 2
+ str lr,[sp,#-4]! @ push lr
+ ldmia r11!,{r4-r7}
+ eor r0,r0,r4
+ ldr r12,[r11,#240-16]
+ eor r1,r1,r5
+ eor r2,r2,r6
+ eor r3,r3,r7
+ sub r12,r12,#1
+ mov lr,#255
+ and r7,lr,r0
+ and r8,lr,r0,lsr#8
+ and r9,lr,r0,lsr#16
+ mov r0,r0,lsr#24
+ ldr r4,[r10,r7,lsl#2] @ Te3[s0>>0]
+ and r7,lr,r1,lsr#16 @ i0
+ ldr r5,[r10,r8,lsl#2] @ Te2[s0>>8]
+ and r8,lr,r1
+ ldr r6,[r10,r9,lsl#2] @ Te1[s0>>16]
+ and r9,lr,r1,lsr#8
+ ldr r0,[r10,r0,lsl#2] @ Te0[s0>>24]
+ mov r1,r1,lsr#24
+ ldr r7,[r10,r7,lsl#2] @ Te1[s1>>16]
+ ldr r8,[r10,r8,lsl#2] @ Te3[s1>>0]
+ ldr r9,[r10,r9,lsl#2] @ Te2[s1>>8]
+ eor r0,r0,r7,ror#8
+ ldr r1,[r10,r1,lsl#2] @ Te0[s1>>24]
+ and r7,lr,r2,lsr#8 @ i0
+ eor r5,r5,r8,ror#8
+ and r8,lr,r2,lsr#16 @ i1
+ eor r6,r6,r9,ror#8
+ and r9,lr,r2
+ ldr r7,[r10,r7,lsl#2] @ Te2[s2>>8]
+ eor r1,r1,r4,ror#24
+ ldr r8,[r10,r8,lsl#2] @ Te1[s2>>16]
+ mov r2,r2,lsr#24
+ ldr r9,[r10,r9,lsl#2] @ Te3[s2>>0]
+ eor r0,r0,r7,ror#16
+ ldr r2,[r10,r2,lsl#2] @ Te0[s2>>24]
+ and r7,lr,r3 @ i0
+ eor r1,r1,r8,ror#8
+ and r8,lr,r3,lsr#8 @ i1
+ eor r6,r6,r9,ror#16
+ and r9,lr,r3,lsr#16 @ i2
+ ldr r7,[r10,r7,lsl#2] @ Te3[s3>>0]
+ eor r2,r2,r5,ror#16
+ ldr r8,[r10,r8,lsl#2] @ Te2[s3>>8]
+ mov r3,r3,lsr#24
+ ldr r9,[r10,r9,lsl#2] @ Te1[s3>>16]
+ eor r0,r0,r7,ror#24
+ ldr r7,[r11],#16
+ eor r1,r1,r8,ror#16
+ ldr r3,[r10,r3,lsl#2] @ Te0[s3>>24]
+ eor r2,r2,r9,ror#8
+ ldr r4,[r11,#-12]
+ eor r3,r3,r6,ror#8
+ ldr r5,[r11,#-8]
+ eor r0,r0,r7
+ ldr r6,[r11,#-4]
+ and r7,lr,r0
+ eor r1,r1,r4
+ and r8,lr,r0,lsr#8
+ eor r2,r2,r5
+ and r9,lr,r0,lsr#16
+ eor r3,r3,r6
+ mov r0,r0,lsr#24
+ subs r12,r12,#1
+ bne .Lenc_loop
+ add r10,r10,#2
+ ldrb r4,[r10,r7,lsl#2] @ Te4[s0>>0]
+ and r7,lr,r1,lsr#16 @ i0
+ ldrb r5,[r10,r8,lsl#2] @ Te4[s0>>8]
+ and r8,lr,r1
+ ldrb r6,[r10,r9,lsl#2] @ Te4[s0>>16]
+ and r9,lr,r1,lsr#8
+ ldrb r0,[r10,r0,lsl#2] @ Te4[s0>>24]
+ mov r1,r1,lsr#24
+ ldrb r7,[r10,r7,lsl#2] @ Te4[s1>>16]
+ ldrb r8,[r10,r8,lsl#2] @ Te4[s1>>0]
+ ldrb r9,[r10,r9,lsl#2] @ Te4[s1>>8]
+ eor r0,r7,r0,lsl#8
+ ldrb r1,[r10,r1,lsl#2] @ Te4[s1>>24]
+ and r7,lr,r2,lsr#8 @ i0
+ eor r5,r8,r5,lsl#8
+ and r8,lr,r2,lsr#16 @ i1
+ eor r6,r9,r6,lsl#8
+ and r9,lr,r2
+ ldrb r7,[r10,r7,lsl#2] @ Te4[s2>>8]
+ eor r1,r4,r1,lsl#24
+ ldrb r8,[r10,r8,lsl#2] @ Te4[s2>>16]
+ mov r2,r2,lsr#24
+ ldrb r9,[r10,r9,lsl#2] @ Te4[s2>>0]
+ eor r0,r7,r0,lsl#8
+ ldrb r2,[r10,r2,lsl#2] @ Te4[s2>>24]
+ and r7,lr,r3 @ i0
+ eor r1,r1,r8,lsl#16
+ and r8,lr,r3,lsr#8 @ i1
+ eor r6,r9,r6,lsl#8
+ and r9,lr,r3,lsr#16 @ i2
+ ldrb r7,[r10,r7,lsl#2] @ Te4[s3>>0]
+ eor r2,r5,r2,lsl#24
+ ldrb r8,[r10,r8,lsl#2] @ Te4[s3>>8]
+ mov r3,r3,lsr#24
+ ldrb r9,[r10,r9,lsl#2] @ Te4[s3>>16]
+ eor r0,r7,r0,lsl#8
+ ldr r7,[r11,#0]
+ ldrb r3,[r10,r3,lsl#2] @ Te4[s3>>24]
+ eor r1,r1,r8,lsl#8
+ ldr r4,[r11,#4]
+ eor r2,r2,r9,lsl#16
+ ldr r5,[r11,#8]
+ eor r3,r6,r3,lsl#24
+ ldr r6,[r11,#12]
+ eor r0,r0,r7
+ eor r1,r1,r4
+ eor r2,r2,r5
+ eor r3,r3,r6
+ sub r10,r10,#2
+ ldr pc,[sp],#4 @ pop and return
+.size _armv4_AES_encrypt,.-_armv4_AES_encrypt
+ AES_set_encrypt_key
+.type AES_set_encrypt_key,%function
+.align 5
+ sub r3,pc,#8 @ AES_set_encrypt_key
+ teq r0,#0
+ moveq r0,#-1
+ beq .Labrt
+ teq r2,#0
+ moveq r0,#-1
+ beq .Labrt
+ teq r1,#128
+ beq .Lok
+ teq r1,#192
+ beq .Lok
+ teq r1,#256
+ movne r0,#-1
+ bne .Labrt
+.Lok: stmdb sp!,{r4-r12,lr}
+ sub r10,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4
+ mov r12,r0 @ inp
+ mov lr,r1 @ bits
+ mov r11,r2 @ key
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+ ldrb r0,[r12,#3] @ load input data in endian-neutral
+ ldrb r4,[r12,#2] @ manner...
+ ldrb r5,[r12,#1]
+ ldrb r6,[r12,#0]
+ orr r0,r0,r4,lsl#8
+ ldrb r1,[r12,#7]
+ orr r0,r0,r5,lsl#16
+ ldrb r4,[r12,#6]
+ orr r0,r0,r6,lsl#24
+ ldrb r5,[r12,#5]
+ ldrb r6,[r12,#4]
+ orr r1,r1,r4,lsl#8
+ ldrb r2,[r12,#11]
+ orr r1,r1,r5,lsl#16
+ ldrb r4,[r12,#10]
+ orr r1,r1,r6,lsl#24
+ ldrb r5,[r12,#9]
+ ldrb r6,[r12,#8]
+ orr r2,r2,r4,lsl#8
+ ldrb r3,[r12,#15]
+ orr r2,r2,r5,lsl#16
+ ldrb r4,[r12,#14]
+ orr r2,r2,r6,lsl#24
+ ldrb r5,[r12,#13]
+ ldrb r6,[r12,#12]
+ orr r3,r3,r4,lsl#8
+ str r0,[r11],#16
+ orr r3,r3,r5,lsl#16
+ str r1,[r11,#-12]
+ orr r3,r3,r6,lsl#24
+ str r2,[r11,#-8]
+ str r3,[r11,#-4]
+ ldr r0,[r12,#0]
+ ldr r1,[r12,#4]
+ ldr r2,[r12,#8]
+ ldr r3,[r12,#12]
+#ifdef __ARMEL__
+ rev r0,r0
+ rev r1,r1
+ rev r2,r2
+ rev r3,r3
+ str r0,[r11],#16
+ str r1,[r11,#-12]
+ str r2,[r11,#-8]
+ str r3,[r11,#-4]
+ teq lr,#128
+ bne .Lnot128
+ mov r12,#10
+ str r12,[r11,#240-16]
+ add r6,r10,#256 @ rcon
+ mov lr,#255
+ and r5,lr,r3,lsr#24
+ and r7,lr,r3,lsr#16
+ ldrb r5,[r10,r5]
+ and r8,lr,r3,lsr#8
+ ldrb r7,[r10,r7]
+ and r9,lr,r3
+ ldrb r8,[r10,r8]
+ orr r5,r5,r7,lsl#24
+ ldrb r9,[r10,r9]
+ orr r5,r5,r8,lsl#16
+ ldr r4,[r6],#4 @ rcon[i++]
+ orr r5,r5,r9,lsl#8
+ eor r5,r5,r4
+ eor r0,r0,r5 @ rk[4]=rk[0]^...
+ eor r1,r1,r0 @ rk[5]=rk[1]^rk[4]
+ str r0,[r11],#16
+ eor r2,r2,r1 @ rk[6]=rk[2]^rk[5]
+ str r1,[r11,#-12]
+ eor r3,r3,r2 @ rk[7]=rk[3]^rk[6]
+ str r2,[r11,#-8]
+ subs r12,r12,#1
+ str r3,[r11,#-4]
+ bne .L128_loop
+ sub r2,r11,#176
+ b .Ldone
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+ ldrb r8,[r12,#19]
+ ldrb r4,[r12,#18]
+ ldrb r5,[r12,#17]
+ ldrb r6,[r12,#16]
+ orr r8,r8,r4,lsl#8
+ ldrb r9,[r12,#23]
+ orr r8,r8,r5,lsl#16
+ ldrb r4,[r12,#22]
+ orr r8,r8,r6,lsl#24
+ ldrb r5,[r12,#21]
+ ldrb r6,[r12,#20]
+ orr r9,r9,r4,lsl#8
+ orr r9,r9,r5,lsl#16
+ str r8,[r11],#8
+ orr r9,r9,r6,lsl#24
+ str r9,[r11,#-4]
+ ldr r8,[r12,#16]
+ ldr r9,[r12,#20]
+#ifdef __ARMEL__
+ rev r8,r8
+ rev r9,r9
+ str r8,[r11],#8
+ str r9,[r11,#-4]
+ teq lr,#192
+ bne .Lnot192
+ mov r12,#12
+ str r12,[r11,#240-24]
+ add r6,r10,#256 @ rcon
+ mov lr,#255
+ mov r12,#8
+ and r5,lr,r9,lsr#24
+ and r7,lr,r9,lsr#16
+ ldrb r5,[r10,r5]
+ and r8,lr,r9,lsr#8
+ ldrb r7,[r10,r7]
+ and r9,lr,r9
+ ldrb r8,[r10,r8]
+ orr r5,r5,r7,lsl#24
+ ldrb r9,[r10,r9]
+ orr r5,r5,r8,lsl#16
+ ldr r4,[r6],#4 @ rcon[i++]
+ orr r5,r5,r9,lsl#8
+ eor r9,r5,r4
+ eor r0,r0,r9 @ rk[6]=rk[0]^...
+ eor r1,r1,r0 @ rk[7]=rk[1]^rk[6]
+ str r0,[r11],#24
+ eor r2,r2,r1 @ rk[8]=rk[2]^rk[7]
+ str r1,[r11,#-20]
+ eor r3,r3,r2 @ rk[9]=rk[3]^rk[8]
+ str r2,[r11,#-16]
+ subs r12,r12,#1
+ str r3,[r11,#-12]
+ subeq r2,r11,#216
+ beq .Ldone
+ ldr r7,[r11,#-32]
+ ldr r8,[r11,#-28]
+ eor r7,r7,r3 @ rk[10]=rk[4]^rk[9]
+ eor r9,r8,r7 @ rk[11]=rk[5]^rk[10]
+ str r7,[r11,#-8]
+ str r9,[r11,#-4]
+ b .L192_loop
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+ ldrb r8,[r12,#27]
+ ldrb r4,[r12,#26]
+ ldrb r5,[r12,#25]
+ ldrb r6,[r12,#24]
+ orr r8,r8,r4,lsl#8
+ ldrb r9,[r12,#31]
+ orr r8,r8,r5,lsl#16
+ ldrb r4,[r12,#30]
+ orr r8,r8,r6,lsl#24
+ ldrb r5,[r12,#29]
+ ldrb r6,[r12,#28]
+ orr r9,r9,r4,lsl#8
+ orr r9,r9,r5,lsl#16
+ str r8,[r11],#8
+ orr r9,r9,r6,lsl#24
+ str r9,[r11,#-4]
+ ldr r8,[r12,#24]
+ ldr r9,[r12,#28]
+#ifdef __ARMEL__
+ rev r8,r8
+ rev r9,r9
+ str r8,[r11],#8
+ str r9,[r11,#-4]
+ mov r12,#14
+ str r12,[r11,#240-32]
+ add r6,r10,#256 @ rcon
+ mov lr,#255
+ mov r12,#7
+ and r5,lr,r9,lsr#24
+ and r7,lr,r9,lsr#16
+ ldrb r5,[r10,r5]
+ and r8,lr,r9,lsr#8
+ ldrb r7,[r10,r7]
+ and r9,lr,r9
+ ldrb r8,[r10,r8]
+ orr r5,r5,r7,lsl#24
+ ldrb r9,[r10,r9]
+ orr r5,r5,r8,lsl#16
+ ldr r4,[r6],#4 @ rcon[i++]
+ orr r5,r5,r9,lsl#8
+ eor r9,r5,r4
+ eor r0,r0,r9 @ rk[8]=rk[0]^...
+ eor r1,r1,r0 @ rk[9]=rk[1]^rk[8]
+ str r0,[r11],#32
+ eor r2,r2,r1 @ rk[10]=rk[2]^rk[9]
+ str r1,[r11,#-28]
+ eor r3,r3,r2 @ rk[11]=rk[3]^rk[10]
+ str r2,[r11,#-24]
+ subs r12,r12,#1
+ str r3,[r11,#-20]
+ subeq r2,r11,#256
+ beq .Ldone
+ and r5,lr,r3
+ and r7,lr,r3,lsr#8
+ ldrb r5,[r10,r5]
+ and r8,lr,r3,lsr#16
+ ldrb r7,[r10,r7]
+ and r9,lr,r3,lsr#24
+ ldrb r8,[r10,r8]
+ orr r5,r5,r7,lsl#8
+ ldrb r9,[r10,r9]
+ orr r5,r5,r8,lsl#16
+ ldr r4,[r11,#-48]
+ orr r5,r5,r9,lsl#24
+ ldr r7,[r11,#-44]
+ ldr r8,[r11,#-40]
+ eor r4,r4,r5 @ rk[12]=rk[4]^...
+ ldr r9,[r11,#-36]
+ eor r7,r7,r4 @ rk[13]=rk[5]^rk[12]
+ str r4,[r11,#-16]
+ eor r8,r8,r7 @ rk[14]=rk[6]^rk[13]
+ str r7,[r11,#-12]
+ eor r9,r9,r8 @ rk[15]=rk[7]^rk[14]
+ str r8,[r11,#-8]
+ str r9,[r11,#-4]
+ b .L256_loop
+.Ldone: mov r0,#0
+ ldmia sp!,{r4-r12,lr}
+.Labrt: tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+.size AES_set_encrypt_key,.-AES_set_encrypt_key
+ AES_set_decrypt_key
+.type AES_set_decrypt_key,%function
+.align 5
+ str lr,[sp,#-4]! @ push lr
+ bl _armv4_AES_set_encrypt_key
+ teq r0,#0
+ ldrne lr,[sp],#4 @ pop lr
+ bne .Labrt
+ stmdb sp!,{r4-r12}
+ ldr r12,[r2,#240] @ AES_set_encrypt_key preserves r2,
+ mov r11,r2 @ which is AES_KEY *key
+ mov r7,r2
+ add r8,r2,r12,lsl#4
+.Linv: ldr r0,[r7]
+ ldr r1,[r7,#4]
+ ldr r2,[r7,#8]
+ ldr r3,[r7,#12]
+ ldr r4,[r8]
+ ldr r5,[r8,#4]
+ ldr r6,[r8,#8]
+ ldr r9,[r8,#12]
+ str r0,[r8],#-16
+ str r1,[r8,#16+4]
+ str r2,[r8,#16+8]
+ str r3,[r8,#16+12]
+ str r4,[r7],#16
+ str r5,[r7,#-12]
+ str r6,[r7,#-8]
+ str r9,[r7,#-4]
+ teq r7,r8
+ bne .Linv
+ ldr r0,[r11,#16]! @ prefetch tp1
+ mov r7,#0x80
+ mov r8,#0x1b
+ orr r7,r7,#0x8000
+ orr r8,r8,#0x1b00
+ orr r7,r7,r7,lsl#16
+ orr r8,r8,r8,lsl#16
+ sub r12,r12,#1
+ mvn r9,r7
+ mov r12,r12,lsl#2 @ (rounds-1)*4
+.Lmix: and r4,r0,r7
+ and r1,r0,r9
+ sub r4,r4,r4,lsr#7
+ and r4,r4,r8
+ eor r1,r4,r1,lsl#1 @ tp2
+ and r4,r1,r7
+ and r2,r1,r9
+ sub r4,r4,r4,lsr#7
+ and r4,r4,r8
+ eor r2,r4,r2,lsl#1 @ tp4
+ and r4,r2,r7
+ and r3,r2,r9
+ sub r4,r4,r4,lsr#7
+ and r4,r4,r8
+ eor r3,r4,r3,lsl#1 @ tp8
+ eor r4,r1,r2
+ eor r5,r0,r3 @ tp9
+ eor r4,r4,r3 @ tpe
+ eor r4,r4,r1,ror#24
+ eor r4,r4,r5,ror#24 @ ^= ROTATE(tpb=tp9^tp2,8)
+ eor r4,r4,r2,ror#16
+ eor r4,r4,r5,ror#16 @ ^= ROTATE(tpd=tp9^tp4,16)
+ eor r4,r4,r5,ror#8 @ ^= ROTATE(tp9,24)
+ ldr r0,[r11,#4] @ prefetch tp1
+ str r4,[r11],#4
+ subs r12,r12,#1
+ bne .Lmix
+ mov r0,#0
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r12,pc}
+ ldmia sp!,{r4-r12,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+.size AES_set_decrypt_key,.-AES_set_decrypt_key
+.type AES_Td,%object
+.align 5
+.word 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
+.word 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
+.word 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
+.word 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
+.word 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
+.word 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
+.word 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
+.word 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
+.word 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
+.word 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
+.word 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
+.word 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
+.word 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
+.word 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
+.word 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
+.word 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
+.word 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
+.word 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
+.word 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
+.word 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
+.word 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
+.word 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
+.word 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
+.word 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
+.word 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
+.word 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
+.word 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
+.word 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
+.word 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
+.word 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
+.word 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
+.word 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
+.word 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
+.word 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
+.word 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
+.word 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
+.word 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
+.word 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
+.word 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
+.word 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
+.word 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
+.word 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
+.word 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
+.word 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
+.word 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
+.word 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
+.word 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
+.word 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
+.word 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
+.word 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
+.word 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
+.word 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
+.word 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
+.word 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
+.word 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
+.word 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
+.word 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
+.word 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
+.word 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
+.word 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
+.word 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
+.word 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
+.word 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
+.word 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
+@ Td4[256]
+.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+.size AES_Td,.-AES_Td
+@ void AES_decrypt(const unsigned char *in, unsigned char *out,
+@ const AES_KEY *key) { AES_decrypt
+.type AES_decrypt,%function
+.align 5
+ sub r3,pc,#8 @ AES_decrypt
+ stmdb sp!,{r1,r4-r12,lr}
+ mov r12,r0 @ inp
+ mov r11,r2
+ sub r10,r3,#AES_decrypt-AES_Td @ Td
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+ ldrb r0,[r12,#3] @ load input data in endian-neutral
+ ldrb r4,[r12,#2] @ manner...
+ ldrb r5,[r12,#1]
+ ldrb r6,[r12,#0]
+ orr r0,r0,r4,lsl#8
+ ldrb r1,[r12,#7]
+ orr r0,r0,r5,lsl#16
+ ldrb r4,[r12,#6]
+ orr r0,r0,r6,lsl#24
+ ldrb r5,[r12,#5]
+ ldrb r6,[r12,#4]
+ orr r1,r1,r4,lsl#8
+ ldrb r2,[r12,#11]
+ orr r1,r1,r5,lsl#16
+ ldrb r4,[r12,#10]
+ orr r1,r1,r6,lsl#24
+ ldrb r5,[r12,#9]
+ ldrb r6,[r12,#8]
+ orr r2,r2,r4,lsl#8
+ ldrb r3,[r12,#15]
+ orr r2,r2,r5,lsl#16
+ ldrb r4,[r12,#14]
+ orr r2,r2,r6,lsl#24
+ ldrb r5,[r12,#13]
+ ldrb r6,[r12,#12]
+ orr r3,r3,r4,lsl#8
+ orr r3,r3,r5,lsl#16
+ orr r3,r3,r6,lsl#24
+ ldr r0,[r12,#0]
+ ldr r1,[r12,#4]
+ ldr r2,[r12,#8]
+ ldr r3,[r12,#12]
+#ifdef __ARMEL__
+ rev r0,r0
+ rev r1,r1
+ rev r2,r2
+ rev r3,r3
+ bl _armv4_AES_decrypt
+ ldr r12,[sp],#4 @ pop out
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+#ifdef __ARMEL__
+ rev r0,r0
+ rev r1,r1
+ rev r2,r2
+ rev r3,r3
+ str r0,[r12,#0]
+ str r1,[r12,#4]
+ str r2,[r12,#8]
+ str r3,[r12,#12]
+ mov r4,r0,lsr#24 @ write output in endian-neutral
+ mov r5,r0,lsr#16 @ manner...
+ mov r6,r0,lsr#8
+ strb r4,[r12,#0]
+ strb r5,[r12,#1]
+ mov r4,r1,lsr#24
+ strb r6,[r12,#2]
+ mov r5,r1,lsr#16
+ strb r0,[r12,#3]
+ mov r6,r1,lsr#8
+ strb r4,[r12,#4]
+ strb r5,[r12,#5]
+ mov r4,r2,lsr#24
+ strb r6,[r12,#6]
+ mov r5,r2,lsr#16
+ strb r1,[r12,#7]
+ mov r6,r2,lsr#8
+ strb r4,[r12,#8]
+ strb r5,[r12,#9]
+ mov r4,r3,lsr#24
+ strb r6,[r12,#10]
+ mov r5,r3,lsr#16
+ strb r2,[r12,#11]
+ mov r6,r3,lsr#8
+ strb r4,[r12,#12]
+ strb r5,[r12,#13]
+ strb r6,[r12,#14]
+ strb r3,[r12,#15]
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r12,pc}
+ ldmia sp!,{r4-r12,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+.size AES_decrypt,.-AES_decrypt
+.type _armv4_AES_decrypt,%function
+.align 2
+ str lr,[sp,#-4]! @ push lr
+ ldmia r11!,{r4-r7}
+ eor r0,r0,r4
+ ldr r12,[r11,#240-16]
+ eor r1,r1,r5
+ eor r2,r2,r6
+ eor r3,r3,r7
+ sub r12,r12,#1
+ mov lr,#255
+ and r7,lr,r0,lsr#16
+ and r8,lr,r0,lsr#8
+ and r9,lr,r0
+ mov r0,r0,lsr#24
+ ldr r4,[r10,r7,lsl#2] @ Td1[s0>>16]
+ and r7,lr,r1 @ i0
+ ldr r5,[r10,r8,lsl#2] @ Td2[s0>>8]
+ and r8,lr,r1,lsr#16
+ ldr r6,[r10,r9,lsl#2] @ Td3[s0>>0]
+ and r9,lr,r1,lsr#8
+ ldr r0,[r10,r0,lsl#2] @ Td0[s0>>24]
+ mov r1,r1,lsr#24
+ ldr r7,[r10,r7,lsl#2] @ Td3[s1>>0]
+ ldr r8,[r10,r8,lsl#2] @ Td1[s1>>16]
+ ldr r9,[r10,r9,lsl#2] @ Td2[s1>>8]
+ eor r0,r0,r7,ror#24
+ ldr r1,[r10,r1,lsl#2] @ Td0[s1>>24]
+ and r7,lr,r2,lsr#8 @ i0
+ eor r5,r8,r5,ror#8
+ and r8,lr,r2 @ i1
+ eor r6,r9,r6,ror#8
+ and r9,lr,r2,lsr#16
+ ldr r7,[r10,r7,lsl#2] @ Td2[s2>>8]
+ eor r1,r1,r4,ror#8
+ ldr r8,[r10,r8,lsl#2] @ Td3[s2>>0]
+ mov r2,r2,lsr#24
+ ldr r9,[r10,r9,lsl#2] @ Td1[s2>>16]
+ eor r0,r0,r7,ror#16
+ ldr r2,[r10,r2,lsl#2] @ Td0[s2>>24]
+ and r7,lr,r3,lsr#16 @ i0
+ eor r1,r1,r8,ror#24
+ and r8,lr,r3,lsr#8 @ i1
+ eor r6,r9,r6,ror#8
+ and r9,lr,r3 @ i2
+ ldr r7,[r10,r7,lsl#2] @ Td1[s3>>16]
+ eor r2,r2,r5,ror#8
+ ldr r8,[r10,r8,lsl#2] @ Td2[s3>>8]
+ mov r3,r3,lsr#24
+ ldr r9,[r10,r9,lsl#2] @ Td3[s3>>0]
+ eor r0,r0,r7,ror#8
+ ldr r7,[r11],#16
+ eor r1,r1,r8,ror#16
+ ldr r3,[r10,r3,lsl#2] @ Td0[s3>>24]
+ eor r2,r2,r9,ror#24
+ ldr r4,[r11,#-12]
+ eor r0,r0,r7
+ ldr r5,[r11,#-8]
+ eor r3,r3,r6,ror#8
+ ldr r6,[r11,#-4]
+ and r7,lr,r0,lsr#16
+ eor r1,r1,r4
+ and r8,lr,r0,lsr#8
+ eor r2,r2,r5
+ and r9,lr,r0
+ eor r3,r3,r6
+ mov r0,r0,lsr#24
+ subs r12,r12,#1
+ bne .Ldec_loop
+ add r10,r10,#1024
+ ldr r5,[r10,#0] @ prefetch Td4
+ ldr r6,[r10,#32]
+ ldr r4,[r10,#64]
+ ldr r5,[r10,#96]
+ ldr r6,[r10,#128]
+ ldr r4,[r10,#160]
+ ldr r5,[r10,#192]
+ ldr r6,[r10,#224]
+ ldrb r0,[r10,r0] @ Td4[s0>>24]
+ ldrb r4,[r10,r7] @ Td4[s0>>16]
+ and r7,lr,r1 @ i0
+ ldrb r5,[r10,r8] @ Td4[s0>>8]
+ and r8,lr,r1,lsr#16
+ ldrb r6,[r10,r9] @ Td4[s0>>0]
+ and r9,lr,r1,lsr#8
+ ldrb r7,[r10,r7] @ Td4[s1>>0]
+ ldrb r1,[r10,r1,lsr#24] @ Td4[s1>>24]
+ ldrb r8,[r10,r8] @ Td4[s1>>16]
+ eor r0,r7,r0,lsl#24
+ ldrb r9,[r10,r9] @ Td4[s1>>8]
+ eor r1,r4,r1,lsl#8
+ and r7,lr,r2,lsr#8 @ i0
+ eor r5,r5,r8,lsl#8
+ and r8,lr,r2 @ i1
+ ldrb r7,[r10,r7] @ Td4[s2>>8]
+ eor r6,r6,r9,lsl#8
+ ldrb r8,[r10,r8] @ Td4[s2>>0]
+ and r9,lr,r2,lsr#16
+ ldrb r2,[r10,r2,lsr#24] @ Td4[s2>>24]
+ eor r0,r0,r7,lsl#8
+ ldrb r9,[r10,r9] @ Td4[s2>>16]
+ eor r1,r8,r1,lsl#16
+ and r7,lr,r3,lsr#16 @ i0
+ eor r2,r5,r2,lsl#16
+ and r8,lr,r3,lsr#8 @ i1
+ ldrb r7,[r10,r7] @ Td4[s3>>16]
+ eor r6,r6,r9,lsl#16
+ ldrb r8,[r10,r8] @ Td4[s3>>8]
+ and r9,lr,r3 @ i2
+ ldrb r9,[r10,r9] @ Td4[s3>>0]
+ ldrb r3,[r10,r3,lsr#24] @ Td4[s3>>24]
+ eor r0,r0,r7,lsl#16
+ ldr r7,[r11,#0]
+ eor r1,r1,r8,lsl#8
+ ldr r4,[r11,#4]
+ eor r2,r9,r2,lsl#8
+ ldr r5,[r11,#8]
+ eor r3,r6,r3,lsl#24
+ ldr r6,[r11,#12]
+ eor r0,r0,r7
+ eor r1,r1,r4
+ eor r2,r2,r5
+ eor r3,r3,r6
+ sub r10,r10,#1024
+ ldr pc,[sp],#4 @ pop and return
+.size _armv4_AES_decrypt,.-_armv4_AES_decrypt
+.asciz "AES for ARMv4, CRYPTOGAMS by <>"
+.align 2
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
diff --git a/crypto/libressl/crypto/aes/aes-elf-x86_64.S b/crypto/libressl/crypto/aes/aes-elf-x86_64.S
new file mode 100644
index 0000000..83c0053
--- /dev/null
+++ b/crypto/libressl/crypto/aes/aes-elf-x86_64.S
@@ -0,0 +1,2547 @@
+#include "x86_arch.h"
+.type _x86_64_AES_encrypt,@function
+.align 16
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ movl 240(%r15),%r13d
+ subl $1,%r13d
+ jmp .Lenc_loop
+.align 16
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movl 0(%r14,%rsi,8),%r10d
+ movl 0(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r12d
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl %dl,%ebp
+ xorl 3(%r14,%rsi,8),%r10d
+ xorl 3(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r8d
+ movzbl %dh,%esi
+ shrl $16,%ecx
+ movzbl %ah,%ebp
+ xorl 3(%r14,%rsi,8),%r12d
+ shrl $16,%edx
+ xorl 3(%r14,%rbp,8),%r8d
+ shrl $16,%ebx
+ leaq 16(%r15),%r15
+ shrl $16,%eax
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ xorl 2(%r14,%rsi,8),%r10d
+ xorl 2(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r12d
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl %bl,%ebp
+ xorl 1(%r14,%rsi,8),%r10d
+ xorl 1(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r8d
+ movl 12(%r15),%edx
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movl 0(%r15),%eax
+ xorl 1(%r14,%rdi,8),%r12d
+ xorl 1(%r14,%rbp,8),%r8d
+ movl 4(%r15),%ebx
+ movl 8(%r15),%ecx
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ subl $1,%r13d
+ jnz .Lenc_loop
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movzbl 2(%r14,%rsi,8),%r10d
+ movzbl 2(%r14,%rdi,8),%r11d
+ movzbl 2(%r14,%rbp,8),%r12d
+ movzbl %dl,%esi
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movzbl 2(%r14,%rsi,8),%r8d
+ movl 0(%r14,%rdi,8),%edi
+ movl 0(%r14,%rbp,8),%ebp
+ andl $65280,%edi
+ andl $65280,%ebp
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+ shrl $16,%ecx
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ shrl $16,%edx
+ movl 0(%r14,%rsi,8),%esi
+ movl 0(%r14,%rdi,8),%edi
+ andl $65280,%esi
+ andl $65280,%edi
+ shrl $16,%ebx
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+ shrl $16,%eax
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ movl 0(%r14,%rsi,8),%esi
+ movl 0(%r14,%rdi,8),%edi
+ movl 0(%r14,%rbp,8),%ebp
+ andl $16711680,%esi
+ andl $16711680,%edi
+ andl $16711680,%ebp
+ xorl %esi,%r10d
+ xorl %edi,%r11d
+ xorl %ebp,%r12d
+ movzbl %bl,%esi
+ movzbl %dh,%edi
+ movzbl %ah,%ebp
+ movl 0(%r14,%rsi,8),%esi
+ movl 2(%r14,%rdi,8),%edi
+ movl 2(%r14,%rbp,8),%ebp
+ andl $16711680,%esi
+ andl $4278190080,%edi
+ andl $4278190080,%ebp
+ xorl %esi,%r8d
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movl 16+12(%r15),%edx
+ movl 2(%r14,%rsi,8),%esi
+ movl 2(%r14,%rdi,8),%edi
+ movl 16+0(%r15),%eax
+ andl $4278190080,%esi
+ andl $4278190080,%edi
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+ movl 16+4(%r15),%ebx
+ movl 16+8(%r15),%ecx
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ retq
+.size _x86_64_AES_encrypt,.-_x86_64_AES_encrypt
+.type _x86_64_AES_encrypt_compact,@function
+.align 16
+ leaq 128(%r14),%r8
+ movl 0-128(%r8),%edi
+ movl 32-128(%r8),%ebp
+ movl 64-128(%r8),%r10d
+ movl 96-128(%r8),%r11d
+ movl 128-128(%r8),%edi
+ movl 160-128(%r8),%ebp
+ movl 192-128(%r8),%r10d
+ movl 224-128(%r8),%r11d
+ jmp .Lenc_loop_compact
+.align 16
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ leaq 16(%r15),%r15
+ movzbl %al,%r10d
+ movzbl %bl,%r11d
+ movzbl %cl,%r12d
+ movzbl (%r14,%r10,1),%r10d
+ movzbl (%r14,%r11,1),%r11d
+ movzbl (%r14,%r12,1),%r12d
+ movzbl %dl,%r8d
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl (%r14,%r8,1),%r8d
+ movzbl (%r14,%rsi,1),%r9d
+ movzbl (%r14,%rdi,1),%r13d
+ movzbl %dh,%ebp
+ movzbl %ah,%esi
+ shrl $16,%ecx
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ shrl $16,%edx
+ movzbl %cl,%edi
+ shll $8,%r9d
+ shll $8,%r13d
+ movzbl (%r14,%rdi,1),%edi
+ xorl %r9d,%r10d
+ xorl %r13d,%r11d
+ movzbl %dl,%r9d
+ shrl $16,%eax
+ shrl $16,%ebx
+ movzbl %al,%r13d
+ shll $8,%ebp
+ shll $8,%esi
+ movzbl (%r14,%r9,1),%r9d
+ movzbl (%r14,%r13,1),%r13d
+ xorl %ebp,%r12d
+ xorl %esi,%r8d
+ movzbl %bl,%ebp
+ movzbl %dh,%esi
+ shll $16,%edi
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ xorl %edi,%r10d
+ movzbl %ah,%edi
+ shrl $8,%ecx
+ shrl $8,%ebx
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rcx,1),%edx
+ movzbl (%r14,%rbx,1),%ecx
+ shll $16,%r9d
+ shll $16,%r13d
+ shll $16,%ebp
+ xorl %r9d,%r11d
+ xorl %r13d,%r12d
+ xorl %ebp,%r8d
+ shll $24,%esi
+ shll $24,%edi
+ shll $24,%edx
+ xorl %esi,%r10d
+ shll $24,%ecx
+ xorl %edi,%r11d
+ movl %r10d,%eax
+ movl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ cmpq 16(%rsp),%r15
+ je .Lenc_compact_done
+ movl %eax,%esi
+ movl %ebx,%edi
+ andl $2155905152,%esi
+ andl $2155905152,%edi
+ movl %esi,%r10d
+ movl %edi,%r11d
+ shrl $7,%r10d
+ leal (%rax,%rax,1),%r8d
+ shrl $7,%r11d
+ leal (%rbx,%rbx,1),%r9d
+ subl %r10d,%esi
+ subl %r11d,%edi
+ andl $4278124286,%r8d
+ andl $4278124286,%r9d
+ andl $454761243,%esi
+ andl $454761243,%edi
+ movl %eax,%r10d
+ movl %ebx,%r11d
+ xorl %esi,%r8d
+ xorl %edi,%r9d
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movl %ecx,%esi
+ movl %edx,%edi
+ roll $24,%eax
+ roll $24,%ebx
+ andl $2155905152,%esi
+ andl $2155905152,%edi
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movl %esi,%r12d
+ movl %edi,%ebp
+ rorl $16,%r10d
+ rorl $16,%r11d
+ shrl $7,%r12d
+ leal (%rcx,%rcx,1),%r8d
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ shrl $7,%ebp
+ leal (%rdx,%rdx,1),%r9d
+ rorl $8,%r10d
+ rorl $8,%r11d
+ subl %r12d,%esi
+ subl %ebp,%edi
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ andl $4278124286,%r8d
+ andl $4278124286,%r9d
+ andl $454761243,%esi
+ andl $454761243,%edi
+ movl %ecx,%r12d
+ movl %edx,%ebp
+ xorl %esi,%r8d
+ xorl %edi,%r9d
+ xorl %r8d,%ecx
+ xorl %r9d,%edx
+ roll $24,%ecx
+ roll $24,%edx
+ xorl %r8d,%ecx
+ xorl %r9d,%edx
+ movl 0(%r14),%esi
+ rorl $16,%r12d
+ rorl $16,%ebp
+ movl 64(%r14),%edi
+ xorl %r12d,%ecx
+ xorl %ebp,%edx
+ movl 128(%r14),%r8d
+ rorl $8,%r12d
+ rorl $8,%ebp
+ movl 192(%r14),%r9d
+ xorl %r12d,%ecx
+ xorl %ebp,%edx
+ jmp .Lenc_loop_compact
+.align 16
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ retq
+.size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact
+.globl AES_encrypt
+.type AES_encrypt,@function
+.align 16
+.globl asm_AES_encrypt
+.hidden asm_AES_encrypt
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r10
+ leaq -63(%rdx),%rcx
+ andq $-64,%rsp
+ subq %rsp,%rcx
+ negq %rcx
+ andq $960,%rcx
+ subq %rcx,%rsp
+ subq $32,%rsp
+ movq %rsi,16(%rsp)
+ movq %r10,24(%rsp)
+ movq %rdx,%r15
+ movl 240(%r15),%r13d
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+ shll $4,%r13d
+ leaq (%r15,%r13,1),%rbp
+ movq %r15,(%rsp)
+ movq %rbp,8(%rsp)
+ leaq .LAES_Te+2048(%rip),%r14
+ leaq 768(%rsp),%rbp
+ subq %r14,%rbp
+ andq $768,%rbp
+ leaq (%r14,%rbp,1),%r14
+ call _x86_64_AES_encrypt_compact
+ movq 16(%rsp),%r9
+ movq 24(%rsp),%rsi
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+ retq
+.size AES_encrypt,.-AES_encrypt
+.type _x86_64_AES_decrypt,@function
+.align 16
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ movl 240(%r15),%r13d
+ subl $1,%r13d
+ jmp .Ldec_loop
+.align 16
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movl 0(%r14,%rsi,8),%r10d
+ movl 0(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r12d
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl %dl,%ebp
+ xorl 3(%r14,%rsi,8),%r10d
+ xorl 3(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r8d
+ movzbl %bh,%esi
+ shrl $16,%eax
+ movzbl %ch,%ebp
+ xorl 3(%r14,%rsi,8),%r12d
+ shrl $16,%edx
+ xorl 3(%r14,%rbp,8),%r8d
+ shrl $16,%ebx
+ leaq 16(%r15),%r15
+ shrl $16,%ecx
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ xorl 2(%r14,%rsi,8),%r10d
+ xorl 2(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r12d
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl %bl,%ebp
+ xorl 1(%r14,%rsi,8),%r10d
+ xorl 1(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r8d
+ movzbl %dh,%esi
+ movl 12(%r15),%edx
+ movzbl %ah,%ebp
+ xorl 1(%r14,%rsi,8),%r12d
+ movl 0(%r15),%eax
+ xorl 1(%r14,%rbp,8),%r8d
+ xorl %r10d,%eax
+ movl 4(%r15),%ebx
+ movl 8(%r15),%ecx
+ xorl %r12d,%ecx
+ xorl %r11d,%ebx
+ xorl %r8d,%edx
+ subl $1,%r13d
+ jnz .Ldec_loop
+ leaq 2048(%r14),%r14
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movzbl (%r14,%rsi,1),%r10d
+ movzbl (%r14,%rdi,1),%r11d
+ movzbl (%r14,%rbp,1),%r12d
+ movzbl %dl,%esi
+ movzbl %dh,%edi
+ movzbl %ah,%ebp
+ movzbl (%r14,%rsi,1),%r8d
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+ shll $8,%edi
+ shll $8,%ebp
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+ shrl $16,%edx
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ shrl $16,%eax
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ shll $8,%esi
+ shll $8,%edi
+ shrl $16,%ebx
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+ shrl $16,%ecx
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+ shll $16,%esi
+ shll $16,%edi
+ shll $16,%ebp
+ xorl %esi,%r10d
+ xorl %edi,%r11d
+ xorl %ebp,%r12d
+ movzbl %bl,%esi
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+ shll $16,%esi
+ shll $24,%edi
+ shll $24,%ebp
+ xorl %esi,%r8d
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movl 16+12(%r15),%edx
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movl 16+0(%r15),%eax
+ shll $24,%esi
+ shll $24,%edi
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+ movl 16+4(%r15),%ebx
+ movl 16+8(%r15),%ecx
+ leaq -2048(%r14),%r14
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ retq
+.size _x86_64_AES_decrypt,.-_x86_64_AES_decrypt
+.type _x86_64_AES_decrypt_compact,@function
+.align 16
+ leaq 128(%r14),%r8
+ movl 0-128(%r8),%edi
+ movl 32-128(%r8),%ebp
+ movl 64-128(%r8),%r10d
+ movl 96-128(%r8),%r11d
+ movl 128-128(%r8),%edi
+ movl 160-128(%r8),%ebp
+ movl 192-128(%r8),%r10d
+ movl 224-128(%r8),%r11d
+ jmp .Ldec_loop_compact
+.align 16
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ leaq 16(%r15),%r15
+ movzbl %al,%r10d
+ movzbl %bl,%r11d
+ movzbl %cl,%r12d
+ movzbl (%r14,%r10,1),%r10d
+ movzbl (%r14,%r11,1),%r11d
+ movzbl (%r14,%r12,1),%r12d
+ movzbl %dl,%r8d
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl (%r14,%r8,1),%r8d
+ movzbl (%r14,%rsi,1),%r9d
+ movzbl (%r14,%rdi,1),%r13d
+ movzbl %bh,%ebp
+ movzbl %ch,%esi
+ shrl $16,%ecx
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ shrl $16,%edx
+ movzbl %cl,%edi
+ shll $8,%r9d
+ shll $8,%r13d
+ movzbl (%r14,%rdi,1),%edi
+ xorl %r9d,%r10d
+ xorl %r13d,%r11d
+ movzbl %dl,%r9d
+ shrl $16,%eax
+ shrl $16,%ebx
+ movzbl %al,%r13d
+ shll $8,%ebp
+ shll $8,%esi
+ movzbl (%r14,%r9,1),%r9d
+ movzbl (%r14,%r13,1),%r13d
+ xorl %ebp,%r12d
+ xorl %esi,%r8d
+ movzbl %bl,%ebp
+ movzbl %bh,%esi
+ shll $16,%edi
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ xorl %edi,%r10d
+ movzbl %ch,%edi
+ shll $16,%r9d
+ shll $16,%r13d
+ movzbl (%r14,%rdi,1),%ebx
+ xorl %r9d,%r11d
+ xorl %r13d,%r12d
+ movzbl %dh,%edi
+ shrl $8,%eax
+ shll $16,%ebp
+ movzbl (%r14,%rdi,1),%ecx
+ movzbl (%r14,%rax,1),%edx
+ xorl %ebp,%r8d
+ shll $24,%esi
+ shll $24,%ebx
+ shll $24,%ecx
+ xorl %esi,%r10d
+ shll $24,%edx
+ xorl %r11d,%ebx
+ movl %r10d,%eax
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ cmpq 16(%rsp),%r15
+ je .Ldec_compact_done
+ movq 256+0(%r14),%rsi
+ shlq $32,%rbx
+ shlq $32,%rdx
+ movq 256+8(%r14),%rdi
+ orq %rbx,%rax
+ orq %rdx,%rcx
+ movq 256+16(%r14),%rbp
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+ shrq $7,%r9
+ leaq (%rax,%rax,1),%r8
+ shrq $7,%r12
+ leaq (%rcx,%rcx,1),%r11
+ subq %r9,%rbx
+ subq %r12,%rdx
+ andq %rdi,%r8
+ andq %rdi,%r11
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r8,%rbx
+ xorq %r11,%rdx
+ movq %rbx,%r8
+ movq %rdx,%r11
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ leaq (%r8,%r8,1),%r9
+ shrq $7,%r13
+ leaq (%r11,%r11,1),%r12
+ subq %r10,%rbx
+ subq %r13,%rdx
+ andq %rdi,%r9
+ andq %rdi,%r12
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r9,%rbx
+ xorq %r12,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ xorq %rax,%r8
+ shrq $7,%r13
+ xorq %rcx,%r11
+ subq %r10,%rbx
+ subq %r13,%rdx
+ leaq (%r9,%r9,1),%r10
+ leaq (%r12,%r12,1),%r13
+ xorq %rax,%r9
+ xorq %rcx,%r12
+ andq %rdi,%r10
+ andq %rdi,%r13
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %rbx,%r10
+ xorq %rdx,%r13
+ xorq %r10,%rax
+ xorq %r13,%rcx
+ xorq %r10,%r8
+ xorq %r13,%r11
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ xorq %r10,%r9
+ xorq %r13,%r12
+ shrq $32,%rbx
+ shrq $32,%rdx
+ xorq %r8,%r10
+ xorq %r11,%r13
+ roll $8,%eax
+ roll $8,%ecx
+ xorq %r9,%r10
+ xorq %r12,%r13
+ roll $8,%ebx
+ roll $8,%edx
+ xorl %r10d,%eax
+ xorl %r13d,%ecx
+ shrq $32,%r10
+ shrq $32,%r13
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+ movq %r8,%r10
+ movq %r11,%r13
+ shrq $32,%r10
+ shrq $32,%r13
+ roll $24,%r8d
+ roll $24,%r11d
+ roll $24,%r10d
+ roll $24,%r13d
+ xorl %r8d,%eax
+ xorl %r11d,%ecx
+ movq %r9,%r8
+ movq %r12,%r11
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+ movq 0(%r14),%rsi
+ shrq $32,%r8
+ shrq $32,%r11
+ movq 64(%r14),%rdi
+ roll $16,%r9d
+ roll $16,%r12d
+ movq 128(%r14),%rbp
+ roll $16,%r8d
+ roll $16,%r11d
+ movq 192(%r14),%r10
+ xorl %r9d,%eax
+ xorl %r12d,%ecx
+ movq 256(%r14),%r13
+ xorl %r8d,%ebx
+ xorl %r11d,%edx
+ jmp .Ldec_loop_compact
+.align 16
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ retq
+.size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact
+.globl AES_decrypt
+.type AES_decrypt,@function
+.align 16
+.globl asm_AES_decrypt
+.hidden asm_AES_decrypt
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r10
+ leaq -63(%rdx),%rcx
+ andq $-64,%rsp
+ subq %rsp,%rcx
+ negq %rcx
+ andq $960,%rcx
+ subq %rcx,%rsp
+ subq $32,%rsp
+ movq %rsi,16(%rsp)
+ movq %r10,24(%rsp)
+ movq %rdx,%r15
+ movl 240(%r15),%r13d
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+ shll $4,%r13d
+ leaq (%r15,%r13,1),%rbp
+ movq %r15,(%rsp)
+ movq %rbp,8(%rsp)
+ leaq .LAES_Td+2048(%rip),%r14
+ leaq 768(%rsp),%rbp
+ subq %r14,%rbp
+ andq $768,%rbp
+ leaq (%r14,%rbp,1),%r14
+ shrq $3,%rbp
+ addq %rbp,%r14
+ call _x86_64_AES_decrypt_compact
+ movq 16(%rsp),%r9
+ movq 24(%rsp),%rsi
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+ retq
+.size AES_decrypt,.-AES_decrypt
+.globl AES_set_encrypt_key
+.type AES_set_encrypt_key,@function
+.align 16
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $8,%rsp
+ call _x86_64_AES_set_encrypt_key
+ movq 8(%rsp),%r15
+ movq 16(%rsp),%r14
+ movq 24(%rsp),%r13
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%rbp
+ movq 48(%rsp),%rbx
+ addq $56,%rsp
+ retq
+.size AES_set_encrypt_key,.-AES_set_encrypt_key
+.type _x86_64_AES_set_encrypt_key,@function
+.align 16
+ movl %esi,%ecx
+ movq %rdi,%rsi
+ movq %rdx,%rdi
+ testq $-1,%rsi
+ jz .Lbadpointer
+ testq $-1,%rdi
+ jz .Lbadpointer
+ leaq .LAES_Te(%rip),%rbp
+ leaq 2048+128(%rbp),%rbp
+ movl 0-128(%rbp),%eax
+ movl 32-128(%rbp),%ebx
+ movl 64-128(%rbp),%r8d
+ movl 96-128(%rbp),%edx
+ movl 128-128(%rbp),%eax
+ movl 160-128(%rbp),%ebx
+ movl 192-128(%rbp),%r8d
+ movl 224-128(%rbp),%edx
+ cmpl $128,%ecx
+ je .L10rounds
+ cmpl $192,%ecx
+ je .L12rounds
+ cmpl $256,%ecx
+ je .L14rounds
+ movq $-2,%rax
+ jmp .Lexit
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rdx,8(%rdi)
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp .L10shortcut
+.align 4
+ movl 0(%rdi),%eax
+ movl 12(%rdi),%edx
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,16(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,20(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,24(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,28(%rdi)
+ addl $1,%ecx
+ leaq 16(%rdi),%rdi
+ cmpl $10,%ecx
+ jl .L10loop
+ movl $10,80(%rdi)
+ xorq %rax,%rax
+ jmp .Lexit
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 16(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rdx,16(%rdi)
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp .L12shortcut
+.align 4
+ movl 0(%rdi),%eax
+ movl 20(%rdi),%edx
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,24(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,28(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,32(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,36(%rdi)
+ cmpl $7,%ecx
+ je .L12break
+ addl $1,%ecx
+ xorl 16(%rdi),%eax
+ movl %eax,40(%rdi)
+ xorl 20(%rdi),%eax
+ movl %eax,44(%rdi)
+ leaq 24(%rdi),%rdi
+ jmp .L12loop
+ movl $12,72(%rdi)
+ xorq %rax,%rax
+ jmp .Lexit
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 16(%rsi),%rcx
+ movq 24(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,16(%rdi)
+ movq %rdx,24(%rdi)
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp .L14shortcut
+.align 4
+ movl 0(%rdi),%eax
+ movl 28(%rdi),%edx
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,32(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,36(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,40(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,44(%rdi)
+ cmpl $6,%ecx
+ je .L14break
+ addl $1,%ecx
+ movl %eax,%edx
+ movl 16(%rdi),%eax
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ shll $8,%ebx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $16,%ebx
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $24,%ebx
+ xorl %ebx,%eax
+ movl %eax,48(%rdi)
+ xorl 20(%rdi),%eax
+ movl %eax,52(%rdi)
+ xorl 24(%rdi),%eax
+ movl %eax,56(%rdi)
+ xorl 28(%rdi),%eax
+ movl %eax,60(%rdi)
+ leaq 32(%rdi),%rdi
+ jmp .L14loop
+ movl $14,48(%rdi)
+ xorq %rax,%rax
+ jmp .Lexit
+ movq $-1,%rax
+ retq
+.size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key
+.globl AES_set_decrypt_key
+.type AES_set_decrypt_key,@function
+.align 16
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rdx
+ call _x86_64_AES_set_encrypt_key
+ movq (%rsp),%r8
+ cmpl $0,%eax
+ jne .Labort
+ movl 240(%r8),%r14d
+ xorq %rdi,%rdi
+ leaq (%rdi,%r14,4),%rcx
+ movq %r8,%rsi
+ leaq (%r8,%rcx,4),%rdi
+.align 4
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 0(%rdi),%rcx
+ movq 8(%rdi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,0(%rsi)
+ movq %rdx,8(%rsi)
+ leaq 16(%rsi),%rsi
+ leaq -16(%rdi),%rdi
+ cmpq %rsi,%rdi
+ jne .Linvert
+ leaq .LAES_Te+2048+1024(%rip),%rax
+ movq 40(%rax),%rsi
+ movq 48(%rax),%rdi
+ movq 56(%rax),%rbp
+ movq %r8,%r15
+ subl $1,%r14d
+.align 4
+ leaq 16(%r15),%r15
+ movq 0(%r15),%rax
+ movq 8(%r15),%rcx
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+ shrq $7,%r9
+ leaq (%rax,%rax,1),%r8
+ shrq $7,%r12
+ leaq (%rcx,%rcx,1),%r11
+ subq %r9,%rbx
+ subq %r12,%rdx
+ andq %rdi,%r8
+ andq %rdi,%r11
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r8,%rbx
+ xorq %r11,%rdx
+ movq %rbx,%r8
+ movq %rdx,%r11
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ leaq (%r8,%r8,1),%r9
+ shrq $7,%r13
+ leaq (%r11,%r11,1),%r12
+ subq %r10,%rbx
+ subq %r13,%rdx
+ andq %rdi,%r9
+ andq %rdi,%r12
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r9,%rbx
+ xorq %r12,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ xorq %rax,%r8
+ shrq $7,%r13
+ xorq %rcx,%r11
+ subq %r10,%rbx
+ subq %r13,%rdx
+ leaq (%r9,%r9,1),%r10
+ leaq (%r12,%r12,1),%r13
+ xorq %rax,%r9
+ xorq %rcx,%r12
+ andq %rdi,%r10
+ andq %rdi,%r13
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %rbx,%r10
+ xorq %rdx,%r13
+ xorq %r10,%rax
+ xorq %r13,%rcx
+ xorq %r10,%r8
+ xorq %r13,%r11
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ xorq %r10,%r9
+ xorq %r13,%r12
+ shrq $32,%rbx
+ shrq $32,%rdx
+ xorq %r8,%r10
+ xorq %r11,%r13
+ roll $8,%eax
+ roll $8,%ecx
+ xorq %r9,%r10
+ xorq %r12,%r13
+ roll $8,%ebx
+ roll $8,%edx
+ xorl %r10d,%eax
+ xorl %r13d,%ecx
+ shrq $32,%r10
+ shrq $32,%r13
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+ movq %r8,%r10
+ movq %r11,%r13
+ shrq $32,%r10
+ shrq $32,%r13
+ roll $24,%r8d
+ roll $24,%r11d
+ roll $24,%r10d
+ roll $24,%r13d
+ xorl %r8d,%eax
+ xorl %r11d,%ecx
+ movq %r9,%r8
+ movq %r12,%r11
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+ shrq $32,%r8
+ shrq $32,%r11
+ roll $16,%r9d
+ roll $16,%r12d
+ roll $16,%r8d
+ roll $16,%r11d
+ xorl %r9d,%eax
+ xorl %r12d,%ecx
+ xorl %r8d,%ebx
+ xorl %r11d,%edx
+ movl %eax,0(%r15)
+ movl %ebx,4(%r15)
+ movl %ecx,8(%r15)
+ movl %edx,12(%r15)
+ subl $1,%r14d
+ jnz .Lpermute
+ xorq %rax,%rax
+ movq 8(%rsp),%r15
+ movq 16(%rsp),%r14
+ movq 24(%rsp),%r13
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%rbp
+ movq 48(%rsp),%rbx
+ addq $56,%rsp
+ retq
+.size AES_set_decrypt_key,.-AES_set_decrypt_key
+.globl AES_cbc_encrypt
+.type AES_cbc_encrypt,@function
+.align 16
+.hidden OPENSSL_ia32cap_P
+.globl asm_AES_cbc_encrypt
+.hidden asm_AES_cbc_encrypt
+ cmpq $0,%rdx
+ je .Lcbc_epilogue
+ pushfq
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ cld
+ movl %r9d,%r9d
+ leaq .LAES_Te(%rip),%r14
+ cmpq $0,%r9
+ jne .Lcbc_picked_te
+ leaq .LAES_Td(%rip),%r14
+ movl OPENSSL_ia32cap_P(%rip),%r10d
+ cmpq $512,%rdx
+ jb .Lcbc_slow_prologue
+ testq $15,%rdx
+ jnz .Lcbc_slow_prologue
+ btl $IA32CAP_BIT0_HT,%r10d
+ jc .Lcbc_slow_prologue
+ leaq -88-248(%rsp),%r15
+ andq $-64,%r15
+ movq %r14,%r10
+ leaq 2304(%r14),%r11
+ movq %r15,%r12
+ andq $4095,%r10
+ andq $4095,%r11
+ andq $4095,%r12
+ cmpq %r11,%r12
+ jb .Lcbc_te_break_out
+ subq %r11,%r12
+ subq %r12,%r15
+ jmp .Lcbc_te_ok
+ subq %r10,%r12
+ andq $4095,%r12
+ addq $320,%r12
+ subq %r12,%r15
+.align 4
+ xchgq %rsp,%r15
+ movq %r15,16(%rsp)
+ movq %rdi,24(%rsp)
+ movq %rsi,32(%rsp)
+ movq %rdx,40(%rsp)
+ movq %rcx,48(%rsp)
+ movq %r8,56(%rsp)
+ movl $0,80+240(%rsp)
+ movq %r8,%rbp
+ movq %r9,%rbx
+ movq %rsi,%r9
+ movq %rdi,%r8
+ movq %rcx,%r15
+ movl 240(%r15),%eax
+ movq %r15,%r10
+ subq %r14,%r10
+ andq $4095,%r10
+ cmpq $2304,%r10
+ jb .Lcbc_do_ecopy
+ cmpq $4096-248,%r10
+ jb .Lcbc_skip_ecopy
+.align 4
+ movq %r15,%rsi
+ leaq 80(%rsp),%rdi
+ leaq 80(%rsp),%r15
+ movl $30,%ecx
+.long 0x90A548F3
+ movl %eax,(%rdi)
+ movq %r15,0(%rsp)
+ movl $18,%ecx
+.align 4
+ movq 0(%r14),%r10
+ movq 32(%r14),%r11
+ movq 64(%r14),%r12
+ movq 96(%r14),%r13
+ leaq 128(%r14),%r14
+ subl $1,%ecx
+ jnz .Lcbc_prefetch_te
+ leaq -2304(%r14),%r14
+ cmpq $0,%rbx
+ movl 0(%rbp),%eax
+ movl 4(%rbp),%ebx
+ movl 8(%rbp),%ecx
+ movl 12(%rbp),%edx
+.align 4
+ xorl 0(%r8),%eax
+ xorl 4(%r8),%ebx
+ xorl 8(%r8),%ecx
+ xorl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ call _x86_64_AES_encrypt
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ subq $16,%r10
+ testq $-16,%r10
+ movq %r10,40(%rsp)
+ jnz .Lcbc_fast_enc_loop
+ movq 56(%rsp),%rbp
+ movl %eax,0(%rbp)
+ movl %ebx,4(%rbp)
+ movl %ecx,8(%rbp)
+ movl %edx,12(%rbp)
+ jmp .Lcbc_fast_cleanup
+.align 16
+ cmpq %r8,%r9
+ je .Lcbc_fast_dec_in_place
+ movq %rbp,64(%rsp)
+.align 4
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ call _x86_64_AES_decrypt
+ movq 64(%rsp),%rbp
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ xorl 0(%rbp),%eax
+ xorl 4(%rbp),%ebx
+ xorl 8(%rbp),%ecx
+ xorl 12(%rbp),%edx
+ movq %r8,%rbp
+ subq $16,%r10
+ movq %r10,40(%rsp)
+ movq %rbp,64(%rsp)
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ jnz .Lcbc_fast_dec_loop
+ movq 56(%rsp),%r12
+ movq 0(%rbp),%r10
+ movq 8(%rbp),%r11
+ movq %r10,0(%r12)
+ movq %r11,8(%r12)
+ jmp .Lcbc_fast_cleanup
+.align 16
+ movq 0(%rbp),%r10
+ movq 8(%rbp),%r11
+ movq %r10,0+64(%rsp)
+ movq %r11,8+64(%rsp)
+.align 4
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ call _x86_64_AES_decrypt
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ xorl 0+64(%rsp),%eax
+ xorl 4+64(%rsp),%ebx
+ xorl 8+64(%rsp),%ecx
+ xorl 12+64(%rsp),%edx
+ movq 0(%r8),%r11
+ movq 8(%r8),%r12
+ subq $16,%r10
+ jz .Lcbc_fast_dec_in_place_done
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ movq %r10,40(%rsp)
+ jmp .Lcbc_fast_dec_in_place_loop
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+.align 4
+ cmpl $0,80+240(%rsp)
+ leaq 80(%rsp),%rdi
+ je .Lcbc_exit
+ movl $30,%ecx
+ xorq %rax,%rax
+.long 0x90AB48F3
+ jmp .Lcbc_exit
+.align 16
+ leaq -88(%rsp),%rbp
+ andq $-64,%rbp
+ leaq -88-63(%rcx),%r10
+ subq %rbp,%r10
+ negq %r10
+ andq $960,%r10
+ subq %r10,%rbp
+ xchgq %rsp,%rbp
+ movq %rbp,16(%rsp)
+ movq %r8,56(%rsp)
+ movq %r8,%rbp
+ movq %r9,%rbx
+ movq %rsi,%r9
+ movq %rdi,%r8
+ movq %rcx,%r15
+ movq %rdx,%r10
+ movl 240(%r15),%eax
+ movq %r15,0(%rsp)
+ shll $4,%eax
+ leaq (%r15,%rax,1),%rax
+ movq %rax,8(%rsp)
+ leaq 2048(%r14),%r14
+ leaq 768-8(%rsp),%rax
+ subq %r14,%rax
+ andq $768,%rax
+ leaq (%r14,%rax,1),%r14
+ cmpq $0,%rbx
+ testq $-16,%r10
+ movl 0(%rbp),%eax
+ movl 4(%rbp),%ebx
+ movl 8(%rbp),%ecx
+ movl 12(%rbp),%edx
+ jz .Lcbc_slow_enc_tail
+.align 4
+ xorl 0(%r8),%eax
+ xorl 4(%r8),%ebx
+ xorl 8(%r8),%ecx
+ xorl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ movq %r9,32(%rsp)
+ movq %r10,40(%rsp)
+ call _x86_64_AES_encrypt_compact
+ movq 24(%rsp),%r8
+ movq 32(%rsp),%r9
+ movq 40(%rsp),%r10
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ subq $16,%r10
+ testq $-16,%r10
+ jnz .Lcbc_slow_enc_loop
+ testq $15,%r10
+ jnz .Lcbc_slow_enc_tail
+ movq 56(%rsp),%rbp
+ movl %eax,0(%rbp)
+ movl %ebx,4(%rbp)
+ movl %ecx,8(%rbp)
+ movl %edx,12(%rbp)
+ jmp .Lcbc_exit
+.align 4
+ movq %rax,%r11
+ movq %rcx,%r12
+ movq %r10,%rcx
+ movq %r8,%rsi
+ movq %r9,%rdi
+.long 0x9066A4F3
+ movq $16,%rcx
+ subq %r10,%rcx
+ xorq %rax,%rax
+.long 0x9066AAF3
+ movq %r9,%r8
+ movq $16,%r10
+ movq %r11,%rax
+ movq %r12,%rcx
+ jmp .Lcbc_slow_enc_loop
+.align 16
+ shrq $3,%rax
+ addq %rax,%r14
+ movq 0(%rbp),%r11
+ movq 8(%rbp),%r12
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+.align 4
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ movq %r9,32(%rsp)
+ movq %r10,40(%rsp)
+ call _x86_64_AES_decrypt_compact
+ movq 24(%rsp),%r8
+ movq 32(%rsp),%r9
+ movq 40(%rsp),%r10
+ xorl 0+64(%rsp),%eax
+ xorl 4+64(%rsp),%ebx
+ xorl 8+64(%rsp),%ecx
+ xorl 12+64(%rsp),%edx
+ movq 0(%r8),%r11
+ movq 8(%r8),%r12
+ subq $16,%r10
+ jc .Lcbc_slow_dec_partial
+ jz .Lcbc_slow_dec_done
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ jmp .Lcbc_slow_dec_loop
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ jmp .Lcbc_exit
+.align 4
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+ movl %eax,0+64(%rsp)
+ movl %ebx,4+64(%rsp)
+ movl %ecx,8+64(%rsp)
+ movl %edx,12+64(%rsp)
+ movq %r9,%rdi
+ leaq 64(%rsp),%rsi
+ leaq 16(%r10),%rcx
+.long 0x9066A4F3
+ jmp .Lcbc_exit
+.align 16
+ movq 16(%rsp),%rsi
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+ popfq
+ retq
+.size AES_cbc_encrypt,.-AES_cbc_encrypt
+.align 64
+.long 0xa56363c6,0xa56363c6
+.long 0x847c7cf8,0x847c7cf8
+.long 0x997777ee,0x997777ee
+.long 0x8d7b7bf6,0x8d7b7bf6
+.long 0x0df2f2ff,0x0df2f2ff
+.long 0xbd6b6bd6,0xbd6b6bd6
+.long 0xb16f6fde,0xb16f6fde
+.long 0x54c5c591,0x54c5c591
+.long 0x50303060,0x50303060
+.long 0x03010102,0x03010102
+.long 0xa96767ce,0xa96767ce
+.long 0x7d2b2b56,0x7d2b2b56
+.long 0x19fefee7,0x19fefee7
+.long 0x62d7d7b5,0x62d7d7b5
+.long 0xe6abab4d,0xe6abab4d
+.long 0x9a7676ec,0x9a7676ec
+.long 0x45caca8f,0x45caca8f
+.long 0x9d82821f,0x9d82821f
+.long 0x40c9c989,0x40c9c989
+.long 0x877d7dfa,0x877d7dfa
+.long 0x15fafaef,0x15fafaef
+.long 0xeb5959b2,0xeb5959b2
+.long 0xc947478e,0xc947478e
+.long 0x0bf0f0fb,0x0bf0f0fb
+.long 0xecadad41,0xecadad41
+.long 0x67d4d4b3,0x67d4d4b3
+.long 0xfda2a25f,0xfda2a25f
+.long 0xeaafaf45,0xeaafaf45
+.long 0xbf9c9c23,0xbf9c9c23
+.long 0xf7a4a453,0xf7a4a453
+.long 0x967272e4,0x967272e4
+.long 0x5bc0c09b,0x5bc0c09b
+.long 0xc2b7b775,0xc2b7b775
+.long 0x1cfdfde1,0x1cfdfde1
+.long 0xae93933d,0xae93933d
+.long 0x6a26264c,0x6a26264c
+.long 0x5a36366c,0x5a36366c
+.long 0x413f3f7e,0x413f3f7e
+.long 0x02f7f7f5,0x02f7f7f5
+.long 0x4fcccc83,0x4fcccc83
+.long 0x5c343468,0x5c343468
+.long 0xf4a5a551,0xf4a5a551
+.long 0x34e5e5d1,0x34e5e5d1
+.long 0x08f1f1f9,0x08f1f1f9
+.long 0x937171e2,0x937171e2
+.long 0x73d8d8ab,0x73d8d8ab
+.long 0x53313162,0x53313162
+.long 0x3f15152a,0x3f15152a
+.long 0x0c040408,0x0c040408
+.long 0x52c7c795,0x52c7c795
+.long 0x65232346,0x65232346
+.long 0x5ec3c39d,0x5ec3c39d
+.long 0x28181830,0x28181830
+.long 0xa1969637,0xa1969637
+.long 0x0f05050a,0x0f05050a
+.long 0xb59a9a2f,0xb59a9a2f
+.long 0x0907070e,0x0907070e
+.long 0x36121224,0x36121224
+.long 0x9b80801b,0x9b80801b
+.long 0x3de2e2df,0x3de2e2df
+.long 0x26ebebcd,0x26ebebcd
+.long 0x6927274e,0x6927274e
+.long 0xcdb2b27f,0xcdb2b27f
+.long 0x9f7575ea,0x9f7575ea
+.long 0x1b090912,0x1b090912
+.long 0x9e83831d,0x9e83831d
+.long 0x742c2c58,0x742c2c58
+.long 0x2e1a1a34,0x2e1a1a34
+.long 0x2d1b1b36,0x2d1b1b36
+.long 0xb26e6edc,0xb26e6edc
+.long 0xee5a5ab4,0xee5a5ab4
+.long 0xfba0a05b,0xfba0a05b
+.long 0xf65252a4,0xf65252a4
+.long 0x4d3b3b76,0x4d3b3b76
+.long 0x61d6d6b7,0x61d6d6b7
+.long 0xceb3b37d,0xceb3b37d
+.long 0x7b292952,0x7b292952
+.long 0x3ee3e3dd,0x3ee3e3dd
+.long 0x712f2f5e,0x712f2f5e
+.long 0x97848413,0x97848413
+.long 0xf55353a6,0xf55353a6
+.long 0x68d1d1b9,0x68d1d1b9
+.long 0x00000000,0x00000000
+.long 0x2cededc1,0x2cededc1
+.long 0x60202040,0x60202040
+.long 0x1ffcfce3,0x1ffcfce3
+.long 0xc8b1b179,0xc8b1b179
+.long 0xed5b5bb6,0xed5b5bb6
+.long 0xbe6a6ad4,0xbe6a6ad4
+.long 0x46cbcb8d,0x46cbcb8d
+.long 0xd9bebe67,0xd9bebe67
+.long 0x4b393972,0x4b393972
+.long 0xde4a4a94,0xde4a4a94
+.long 0xd44c4c98,0xd44c4c98
+.long 0xe85858b0,0xe85858b0
+.long 0x4acfcf85,0x4acfcf85
+.long 0x6bd0d0bb,0x6bd0d0bb
+.long 0x2aefefc5,0x2aefefc5
+.long 0xe5aaaa4f,0xe5aaaa4f
+.long 0x16fbfbed,0x16fbfbed
+.long 0xc5434386,0xc5434386
+.long 0xd74d4d9a,0xd74d4d9a
+.long 0x55333366,0x55333366
+.long 0x94858511,0x94858511
+.long 0xcf45458a,0xcf45458a
+.long 0x10f9f9e9,0x10f9f9e9
+.long 0x06020204,0x06020204
+.long 0x817f7ffe,0x817f7ffe
+.long 0xf05050a0,0xf05050a0
+.long 0x443c3c78,0x443c3c78
+.long 0xba9f9f25,0xba9f9f25
+.long 0xe3a8a84b,0xe3a8a84b
+.long 0xf35151a2,0xf35151a2
+.long 0xfea3a35d,0xfea3a35d
+.long 0xc0404080,0xc0404080
+.long 0x8a8f8f05,0x8a8f8f05
+.long 0xad92923f,0xad92923f
+.long 0xbc9d9d21,0xbc9d9d21
+.long 0x48383870,0x48383870
+.long 0x04f5f5f1,0x04f5f5f1
+.long 0xdfbcbc63,0xdfbcbc63
+.long 0xc1b6b677,0xc1b6b677
+.long 0x75dadaaf,0x75dadaaf
+.long 0x63212142,0x63212142
+.long 0x30101020,0x30101020
+.long 0x1affffe5,0x1affffe5
+.long 0x0ef3f3fd,0x0ef3f3fd
+.long 0x6dd2d2bf,0x6dd2d2bf
+.long 0x4ccdcd81,0x4ccdcd81
+.long 0x140c0c18,0x140c0c18
+.long 0x35131326,0x35131326
+.long 0x2fececc3,0x2fececc3
+.long 0xe15f5fbe,0xe15f5fbe
+.long 0xa2979735,0xa2979735
+.long 0xcc444488,0xcc444488
+.long 0x3917172e,0x3917172e
+.long 0x57c4c493,0x57c4c493
+.long 0xf2a7a755,0xf2a7a755
+.long 0x827e7efc,0x827e7efc
+.long 0x473d3d7a,0x473d3d7a
+.long 0xac6464c8,0xac6464c8
+.long 0xe75d5dba,0xe75d5dba
+.long 0x2b191932,0x2b191932
+.long 0x957373e6,0x957373e6
+.long 0xa06060c0,0xa06060c0
+.long 0x98818119,0x98818119
+.long 0xd14f4f9e,0xd14f4f9e
+.long 0x7fdcdca3,0x7fdcdca3
+.long 0x66222244,0x66222244
+.long 0x7e2a2a54,0x7e2a2a54
+.long 0xab90903b,0xab90903b
+.long 0x8388880b,0x8388880b
+.long 0xca46468c,0xca46468c
+.long 0x29eeeec7,0x29eeeec7
+.long 0xd3b8b86b,0xd3b8b86b
+.long 0x3c141428,0x3c141428
+.long 0x79dedea7,0x79dedea7
+.long 0xe25e5ebc,0xe25e5ebc
+.long 0x1d0b0b16,0x1d0b0b16
+.long 0x76dbdbad,0x76dbdbad
+.long 0x3be0e0db,0x3be0e0db
+.long 0x56323264,0x56323264
+.long 0x4e3a3a74,0x4e3a3a74
+.long 0x1e0a0a14,0x1e0a0a14
+.long 0xdb494992,0xdb494992
+.long 0x0a06060c,0x0a06060c
+.long 0x6c242448,0x6c242448
+.long 0xe45c5cb8,0xe45c5cb8
+.long 0x5dc2c29f,0x5dc2c29f
+.long 0x6ed3d3bd,0x6ed3d3bd
+.long 0xefacac43,0xefacac43
+.long 0xa66262c4,0xa66262c4
+.long 0xa8919139,0xa8919139
+.long 0xa4959531,0xa4959531
+.long 0x37e4e4d3,0x37e4e4d3
+.long 0x8b7979f2,0x8b7979f2
+.long 0x32e7e7d5,0x32e7e7d5
+.long 0x43c8c88b,0x43c8c88b
+.long 0x5937376e,0x5937376e
+.long 0xb76d6dda,0xb76d6dda
+.long 0x8c8d8d01,0x8c8d8d01
+.long 0x64d5d5b1,0x64d5d5b1
+.long 0xd24e4e9c,0xd24e4e9c
+.long 0xe0a9a949,0xe0a9a949
+.long 0xb46c6cd8,0xb46c6cd8
+.long 0xfa5656ac,0xfa5656ac
+.long 0x07f4f4f3,0x07f4f4f3
+.long 0x25eaeacf,0x25eaeacf
+.long 0xaf6565ca,0xaf6565ca
+.long 0x8e7a7af4,0x8e7a7af4
+.long 0xe9aeae47,0xe9aeae47
+.long 0x18080810,0x18080810
+.long 0xd5baba6f,0xd5baba6f
+.long 0x887878f0,0x887878f0
+.long 0x6f25254a,0x6f25254a
+.long 0x722e2e5c,0x722e2e5c
+.long 0x241c1c38,0x241c1c38
+.long 0xf1a6a657,0xf1a6a657
+.long 0xc7b4b473,0xc7b4b473
+.long 0x51c6c697,0x51c6c697
+.long 0x23e8e8cb,0x23e8e8cb
+.long 0x7cdddda1,0x7cdddda1
+.long 0x9c7474e8,0x9c7474e8
+.long 0x211f1f3e,0x211f1f3e
+.long 0xdd4b4b96,0xdd4b4b96
+.long 0xdcbdbd61,0xdcbdbd61
+.long 0x868b8b0d,0x868b8b0d
+.long 0x858a8a0f,0x858a8a0f
+.long 0x907070e0,0x907070e0
+.long 0x423e3e7c,0x423e3e7c
+.long 0xc4b5b571,0xc4b5b571
+.long 0xaa6666cc,0xaa6666cc
+.long 0xd8484890,0xd8484890
+.long 0x05030306,0x05030306
+.long 0x01f6f6f7,0x01f6f6f7
+.long 0x120e0e1c,0x120e0e1c
+.long 0xa36161c2,0xa36161c2
+.long 0x5f35356a,0x5f35356a
+.long 0xf95757ae,0xf95757ae
+.long 0xd0b9b969,0xd0b9b969
+.long 0x91868617,0x91868617
+.long 0x58c1c199,0x58c1c199
+.long 0x271d1d3a,0x271d1d3a
+.long 0xb99e9e27,0xb99e9e27
+.long 0x38e1e1d9,0x38e1e1d9
+.long 0x13f8f8eb,0x13f8f8eb
+.long 0xb398982b,0xb398982b
+.long 0x33111122,0x33111122
+.long 0xbb6969d2,0xbb6969d2
+.long 0x70d9d9a9,0x70d9d9a9
+.long 0x898e8e07,0x898e8e07
+.long 0xa7949433,0xa7949433
+.long 0xb69b9b2d,0xb69b9b2d
+.long 0x221e1e3c,0x221e1e3c
+.long 0x92878715,0x92878715
+.long 0x20e9e9c9,0x20e9e9c9
+.long 0x49cece87,0x49cece87
+.long 0xff5555aa,0xff5555aa
+.long 0x78282850,0x78282850
+.long 0x7adfdfa5,0x7adfdfa5
+.long 0x8f8c8c03,0x8f8c8c03
+.long 0xf8a1a159,0xf8a1a159
+.long 0x80898909,0x80898909
+.long 0x170d0d1a,0x170d0d1a
+.long 0xdabfbf65,0xdabfbf65
+.long 0x31e6e6d7,0x31e6e6d7
+.long 0xc6424284,0xc6424284
+.long 0xb86868d0,0xb86868d0
+.long 0xc3414182,0xc3414182
+.long 0xb0999929,0xb0999929
+.long 0x772d2d5a,0x772d2d5a
+.long 0x110f0f1e,0x110f0f1e
+.long 0xcbb0b07b,0xcbb0b07b
+.long 0xfc5454a8,0xfc5454a8
+.long 0xd6bbbb6d,0xd6bbbb6d
+.long 0x3a16162c,0x3a16162c
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.long 0x00000001, 0x00000002, 0x00000004, 0x00000008
+.long 0x00000010, 0x00000020, 0x00000040, 0x00000080
+.long 0x0000001b, 0x00000036, 0x80808080, 0x80808080
+.long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b
+.align 64
+.long 0x50a7f451,0x50a7f451
+.long 0x5365417e,0x5365417e
+.long 0xc3a4171a,0xc3a4171a
+.long 0x965e273a,0x965e273a
+.long 0xcb6bab3b,0xcb6bab3b
+.long 0xf1459d1f,0xf1459d1f
+.long 0xab58faac,0xab58faac
+.long 0x9303e34b,0x9303e34b
+.long 0x55fa3020,0x55fa3020
+.long 0xf66d76ad,0xf66d76ad
+.long 0x9176cc88,0x9176cc88
+.long 0x254c02f5,0x254c02f5
+.long 0xfcd7e54f,0xfcd7e54f
+.long 0xd7cb2ac5,0xd7cb2ac5
+.long 0x80443526,0x80443526
+.long 0x8fa362b5,0x8fa362b5
+.long 0x495ab1de,0x495ab1de
+.long 0x671bba25,0x671bba25
+.long 0x980eea45,0x980eea45
+.long 0xe1c0fe5d,0xe1c0fe5d
+.long 0x02752fc3,0x02752fc3
+.long 0x12f04c81,0x12f04c81
+.long 0xa397468d,0xa397468d
+.long 0xc6f9d36b,0xc6f9d36b
+.long 0xe75f8f03,0xe75f8f03
+.long 0x959c9215,0x959c9215
+.long 0xeb7a6dbf,0xeb7a6dbf
+.long 0xda595295,0xda595295
+.long 0x2d83bed4,0x2d83bed4
+.long 0xd3217458,0xd3217458
+.long 0x2969e049,0x2969e049
+.long 0x44c8c98e,0x44c8c98e
+.long 0x6a89c275,0x6a89c275
+.long 0x78798ef4,0x78798ef4
+.long 0x6b3e5899,0x6b3e5899
+.long 0xdd71b927,0xdd71b927
+.long 0xb64fe1be,0xb64fe1be
+.long 0x17ad88f0,0x17ad88f0
+.long 0x66ac20c9,0x66ac20c9
+.long 0xb43ace7d,0xb43ace7d
+.long 0x184adf63,0x184adf63
+.long 0x82311ae5,0x82311ae5
+.long 0x60335197,0x60335197
+.long 0x457f5362,0x457f5362
+.long 0xe07764b1,0xe07764b1
+.long 0x84ae6bbb,0x84ae6bbb
+.long 0x1ca081fe,0x1ca081fe
+.long 0x942b08f9,0x942b08f9
+.long 0x58684870,0x58684870
+.long 0x19fd458f,0x19fd458f
+.long 0x876cde94,0x876cde94
+.long 0xb7f87b52,0xb7f87b52
+.long 0x23d373ab,0x23d373ab
+.long 0xe2024b72,0xe2024b72
+.long 0x578f1fe3,0x578f1fe3
+.long 0x2aab5566,0x2aab5566
+.long 0x0728ebb2,0x0728ebb2
+.long 0x03c2b52f,0x03c2b52f
+.long 0x9a7bc586,0x9a7bc586
+.long 0xa50837d3,0xa50837d3
+.long 0xf2872830,0xf2872830
+.long 0xb2a5bf23,0xb2a5bf23
+.long 0xba6a0302,0xba6a0302
+.long 0x5c8216ed,0x5c8216ed
+.long 0x2b1ccf8a,0x2b1ccf8a
+.long 0x92b479a7,0x92b479a7
+.long 0xf0f207f3,0xf0f207f3
+.long 0xa1e2694e,0xa1e2694e
+.long 0xcdf4da65,0xcdf4da65
+.long 0xd5be0506,0xd5be0506
+.long 0x1f6234d1,0x1f6234d1
+.long 0x8afea6c4,0x8afea6c4
+.long 0x9d532e34,0x9d532e34
+.long 0xa055f3a2,0xa055f3a2
+.long 0x32e18a05,0x32e18a05
+.long 0x75ebf6a4,0x75ebf6a4
+.long 0x39ec830b,0x39ec830b
+.long 0xaaef6040,0xaaef6040
+.long 0x069f715e,0x069f715e
+.long 0x51106ebd,0x51106ebd
+.long 0xf98a213e,0xf98a213e
+.long 0x3d06dd96,0x3d06dd96
+.long 0xae053edd,0xae053edd
+.long 0x46bde64d,0x46bde64d
+.long 0xb58d5491,0xb58d5491
+.long 0x055dc471,0x055dc471
+.long 0x6fd40604,0x6fd40604
+.long 0xff155060,0xff155060
+.long 0x24fb9819,0x24fb9819
+.long 0x97e9bdd6,0x97e9bdd6
+.long 0xcc434089,0xcc434089
+.long 0x779ed967,0x779ed967
+.long 0xbd42e8b0,0xbd42e8b0
+.long 0x888b8907,0x888b8907
+.long 0x385b19e7,0x385b19e7
+.long 0xdbeec879,0xdbeec879
+.long 0x470a7ca1,0x470a7ca1
+.long 0xe90f427c,0xe90f427c
+.long 0xc91e84f8,0xc91e84f8
+.long 0x00000000,0x00000000
+.long 0x83868009,0x83868009
+.long 0x48ed2b32,0x48ed2b32
+.long 0xac70111e,0xac70111e
+.long 0x4e725a6c,0x4e725a6c
+.long 0xfbff0efd,0xfbff0efd
+.long 0x5638850f,0x5638850f
+.long 0x1ed5ae3d,0x1ed5ae3d
+.long 0x27392d36,0x27392d36
+.long 0x64d90f0a,0x64d90f0a
+.long 0x21a65c68,0x21a65c68
+.long 0xd1545b9b,0xd1545b9b
+.long 0x3a2e3624,0x3a2e3624
+.long 0xb1670a0c,0xb1670a0c
+.long 0x0fe75793,0x0fe75793
+.long 0xd296eeb4,0xd296eeb4
+.long 0x9e919b1b,0x9e919b1b
+.long 0x4fc5c080,0x4fc5c080
+.long 0xa220dc61,0xa220dc61
+.long 0x694b775a,0x694b775a
+.long 0x161a121c,0x161a121c
+.long 0x0aba93e2,0x0aba93e2
+.long 0xe52aa0c0,0xe52aa0c0
+.long 0x43e0223c,0x43e0223c
+.long 0x1d171b12,0x1d171b12
+.long 0x0b0d090e,0x0b0d090e
+.long 0xadc78bf2,0xadc78bf2
+.long 0xb9a8b62d,0xb9a8b62d
+.long 0xc8a91e14,0xc8a91e14
+.long 0x8519f157,0x8519f157
+.long 0x4c0775af,0x4c0775af
+.long 0xbbdd99ee,0xbbdd99ee
+.long 0xfd607fa3,0xfd607fa3
+.long 0x9f2601f7,0x9f2601f7
+.long 0xbcf5725c,0xbcf5725c
+.long 0xc53b6644,0xc53b6644
+.long 0x347efb5b,0x347efb5b
+.long 0x7629438b,0x7629438b
+.long 0xdcc623cb,0xdcc623cb
+.long 0x68fcedb6,0x68fcedb6
+.long 0x63f1e4b8,0x63f1e4b8
+.long 0xcadc31d7,0xcadc31d7
+.long 0x10856342,0x10856342
+.long 0x40229713,0x40229713
+.long 0x2011c684,0x2011c684
+.long 0x7d244a85,0x7d244a85
+.long 0xf83dbbd2,0xf83dbbd2
+.long 0x1132f9ae,0x1132f9ae
+.long 0x6da129c7,0x6da129c7
+.long 0x4b2f9e1d,0x4b2f9e1d
+.long 0xf330b2dc,0xf330b2dc
+.long 0xec52860d,0xec52860d
+.long 0xd0e3c177,0xd0e3c177
+.long 0x6c16b32b,0x6c16b32b
+.long 0x99b970a9,0x99b970a9
+.long 0xfa489411,0xfa489411
+.long 0x2264e947,0x2264e947
+.long 0xc48cfca8,0xc48cfca8
+.long 0x1a3ff0a0,0x1a3ff0a0
+.long 0xd82c7d56,0xd82c7d56
+.long 0xef903322,0xef903322
+.long 0xc74e4987,0xc74e4987
+.long 0xc1d138d9,0xc1d138d9
+.long 0xfea2ca8c,0xfea2ca8c
+.long 0x360bd498,0x360bd498
+.long 0xcf81f5a6,0xcf81f5a6
+.long 0x28de7aa5,0x28de7aa5
+.long 0x268eb7da,0x268eb7da
+.long 0xa4bfad3f,0xa4bfad3f
+.long 0xe49d3a2c,0xe49d3a2c
+.long 0x0d927850,0x0d927850
+.long 0x9bcc5f6a,0x9bcc5f6a
+.long 0x62467e54,0x62467e54
+.long 0xc2138df6,0xc2138df6
+.long 0xe8b8d890,0xe8b8d890
+.long 0x5ef7392e,0x5ef7392e
+.long 0xf5afc382,0xf5afc382
+.long 0xbe805d9f,0xbe805d9f
+.long 0x7c93d069,0x7c93d069
+.long 0xa92dd56f,0xa92dd56f
+.long 0xb31225cf,0xb31225cf
+.long 0x3b99acc8,0x3b99acc8
+.long 0xa77d1810,0xa77d1810
+.long 0x6e639ce8,0x6e639ce8
+.long 0x7bbb3bdb,0x7bbb3bdb
+.long 0x097826cd,0x097826cd
+.long 0xf418596e,0xf418596e
+.long 0x01b79aec,0x01b79aec
+.long 0xa89a4f83,0xa89a4f83
+.long 0x656e95e6,0x656e95e6
+.long 0x7ee6ffaa,0x7ee6ffaa
+.long 0x08cfbc21,0x08cfbc21
+.long 0xe6e815ef,0xe6e815ef
+.long 0xd99be7ba,0xd99be7ba
+.long 0xce366f4a,0xce366f4a
+.long 0xd4099fea,0xd4099fea
+.long 0xd67cb029,0xd67cb029
+.long 0xafb2a431,0xafb2a431
+.long 0x31233f2a,0x31233f2a
+.long 0x3094a5c6,0x3094a5c6
+.long 0xc066a235,0xc066a235
+.long 0x37bc4e74,0x37bc4e74
+.long 0xa6ca82fc,0xa6ca82fc
+.long 0xb0d090e0,0xb0d090e0
+.long 0x15d8a733,0x15d8a733
+.long 0x4a9804f1,0x4a9804f1
+.long 0xf7daec41,0xf7daec41
+.long 0x0e50cd7f,0x0e50cd7f
+.long 0x2ff69117,0x2ff69117
+.long 0x8dd64d76,0x8dd64d76
+.long 0x4db0ef43,0x4db0ef43
+.long 0x544daacc,0x544daacc
+.long 0xdf0496e4,0xdf0496e4
+.long 0xe3b5d19e,0xe3b5d19e
+.long 0x1b886a4c,0x1b886a4c
+.long 0xb81f2cc1,0xb81f2cc1
+.long 0x7f516546,0x7f516546
+.long 0x04ea5e9d,0x04ea5e9d
+.long 0x5d358c01,0x5d358c01
+.long 0x737487fa,0x737487fa
+.long 0x2e410bfb,0x2e410bfb
+.long 0x5a1d67b3,0x5a1d67b3
+.long 0x52d2db92,0x52d2db92
+.long 0x335610e9,0x335610e9
+.long 0x1347d66d,0x1347d66d
+.long 0x8c61d79a,0x8c61d79a
+.long 0x7a0ca137,0x7a0ca137
+.long 0x8e14f859,0x8e14f859
+.long 0x893c13eb,0x893c13eb
+.long 0xee27a9ce,0xee27a9ce
+.long 0x35c961b7,0x35c961b7
+.long 0xede51ce1,0xede51ce1
+.long 0x3cb1477a,0x3cb1477a
+.long 0x59dfd29c,0x59dfd29c
+.long 0x3f73f255,0x3f73f255
+.long 0x79ce1418,0x79ce1418
+.long 0xbf37c773,0xbf37c773
+.long 0xeacdf753,0xeacdf753
+.long 0x5baafd5f,0x5baafd5f
+.long 0x146f3ddf,0x146f3ddf
+.long 0x86db4478,0x86db4478
+.long 0x81f3afca,0x81f3afca
+.long 0x3ec468b9,0x3ec468b9
+.long 0x2c342438,0x2c342438
+.long 0x5f40a3c2,0x5f40a3c2
+.long 0x72c31d16,0x72c31d16
+.long 0x0c25e2bc,0x0c25e2bc
+.long 0x8b493c28,0x8b493c28
+.long 0x41950dff,0x41950dff
+.long 0x7101a839,0x7101a839
+.long 0xdeb30c08,0xdeb30c08
+.long 0x9ce4b4d8,0x9ce4b4d8
+.long 0x90c15664,0x90c15664
+.long 0x6184cb7b,0x6184cb7b
+.long 0x70b632d5,0x70b632d5
+.long 0x745c6c48,0x745c6c48
+.long 0x4257b8d0,0x4257b8d0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
diff --git a/crypto/libressl/crypto/aes/aes-macosx-x86_64.S b/crypto/libressl/crypto/aes/aes-macosx-x86_64.S
new file mode 100644
index 0000000..8a9c36e
--- /dev/null
+++ b/crypto/libressl/crypto/aes/aes-macosx-x86_64.S
@@ -0,0 +1,2544 @@
+#include "x86_arch.h"
+.p2align 4
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ movl 240(%r15),%r13d
+ subl $1,%r13d
+ jmp L$enc_loop
+.p2align 4
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movl 0(%r14,%rsi,8),%r10d
+ movl 0(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r12d
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl %dl,%ebp
+ xorl 3(%r14,%rsi,8),%r10d
+ xorl 3(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r8d
+ movzbl %dh,%esi
+ shrl $16,%ecx
+ movzbl %ah,%ebp
+ xorl 3(%r14,%rsi,8),%r12d
+ shrl $16,%edx
+ xorl 3(%r14,%rbp,8),%r8d
+ shrl $16,%ebx
+ leaq 16(%r15),%r15
+ shrl $16,%eax
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ xorl 2(%r14,%rsi,8),%r10d
+ xorl 2(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r12d
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl %bl,%ebp
+ xorl 1(%r14,%rsi,8),%r10d
+ xorl 1(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r8d
+ movl 12(%r15),%edx
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movl 0(%r15),%eax
+ xorl 1(%r14,%rdi,8),%r12d
+ xorl 1(%r14,%rbp,8),%r8d
+ movl 4(%r15),%ebx
+ movl 8(%r15),%ecx
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ subl $1,%r13d
+ jnz L$enc_loop
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movzbl 2(%r14,%rsi,8),%r10d
+ movzbl 2(%r14,%rdi,8),%r11d
+ movzbl 2(%r14,%rbp,8),%r12d
+ movzbl %dl,%esi
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movzbl 2(%r14,%rsi,8),%r8d
+ movl 0(%r14,%rdi,8),%edi
+ movl 0(%r14,%rbp,8),%ebp
+ andl $65280,%edi
+ andl $65280,%ebp
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+ shrl $16,%ecx
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ shrl $16,%edx
+ movl 0(%r14,%rsi,8),%esi
+ movl 0(%r14,%rdi,8),%edi
+ andl $65280,%esi
+ andl $65280,%edi
+ shrl $16,%ebx
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+ shrl $16,%eax
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ movl 0(%r14,%rsi,8),%esi
+ movl 0(%r14,%rdi,8),%edi
+ movl 0(%r14,%rbp,8),%ebp
+ andl $16711680,%esi
+ andl $16711680,%edi
+ andl $16711680,%ebp
+ xorl %esi,%r10d
+ xorl %edi,%r11d
+ xorl %ebp,%r12d
+ movzbl %bl,%esi
+ movzbl %dh,%edi
+ movzbl %ah,%ebp
+ movl 0(%r14,%rsi,8),%esi
+ movl 2(%r14,%rdi,8),%edi
+ movl 2(%r14,%rbp,8),%ebp
+ andl $16711680,%esi
+ andl $4278190080,%edi
+ andl $4278190080,%ebp
+ xorl %esi,%r8d
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movl 16+12(%r15),%edx
+ movl 2(%r14,%rsi,8),%esi
+ movl 2(%r14,%rdi,8),%edi
+ movl 16+0(%r15),%eax
+ andl $4278190080,%esi
+ andl $4278190080,%edi
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+ movl 16+4(%r15),%ebx
+ movl 16+8(%r15),%ecx
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ retq
+.p2align 4
+ leaq 128(%r14),%r8
+ movl 0-128(%r8),%edi
+ movl 32-128(%r8),%ebp
+ movl 64-128(%r8),%r10d
+ movl 96-128(%r8),%r11d
+ movl 128-128(%r8),%edi
+ movl 160-128(%r8),%ebp
+ movl 192-128(%r8),%r10d
+ movl 224-128(%r8),%r11d
+ jmp L$enc_loop_compact
+.p2align 4
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ leaq 16(%r15),%r15
+ movzbl %al,%r10d
+ movzbl %bl,%r11d
+ movzbl %cl,%r12d
+ movzbl (%r14,%r10,1),%r10d
+ movzbl (%r14,%r11,1),%r11d
+ movzbl (%r14,%r12,1),%r12d
+ movzbl %dl,%r8d
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl (%r14,%r8,1),%r8d
+ movzbl (%r14,%rsi,1),%r9d
+ movzbl (%r14,%rdi,1),%r13d
+ movzbl %dh,%ebp
+ movzbl %ah,%esi
+ shrl $16,%ecx
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ shrl $16,%edx
+ movzbl %cl,%edi
+ shll $8,%r9d
+ shll $8,%r13d
+ movzbl (%r14,%rdi,1),%edi
+ xorl %r9d,%r10d
+ xorl %r13d,%r11d
+ movzbl %dl,%r9d
+ shrl $16,%eax
+ shrl $16,%ebx
+ movzbl %al,%r13d
+ shll $8,%ebp
+ shll $8,%esi
+ movzbl (%r14,%r9,1),%r9d
+ movzbl (%r14,%r13,1),%r13d
+ xorl %ebp,%r12d
+ xorl %esi,%r8d
+ movzbl %bl,%ebp
+ movzbl %dh,%esi
+ shll $16,%edi
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ xorl %edi,%r10d
+ movzbl %ah,%edi
+ shrl $8,%ecx
+ shrl $8,%ebx
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rcx,1),%edx
+ movzbl (%r14,%rbx,1),%ecx
+ shll $16,%r9d
+ shll $16,%r13d
+ shll $16,%ebp
+ xorl %r9d,%r11d
+ xorl %r13d,%r12d
+ xorl %ebp,%r8d
+ shll $24,%esi
+ shll $24,%edi
+ shll $24,%edx
+ xorl %esi,%r10d
+ shll $24,%ecx
+ xorl %edi,%r11d
+ movl %r10d,%eax
+ movl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ cmpq 16(%rsp),%r15
+ je L$enc_compact_done
+ movl %eax,%esi
+ movl %ebx,%edi
+ andl $2155905152,%esi
+ andl $2155905152,%edi
+ movl %esi,%r10d
+ movl %edi,%r11d
+ shrl $7,%r10d
+ leal (%rax,%rax,1),%r8d
+ shrl $7,%r11d
+ leal (%rbx,%rbx,1),%r9d
+ subl %r10d,%esi
+ subl %r11d,%edi
+ andl $4278124286,%r8d
+ andl $4278124286,%r9d
+ andl $454761243,%esi
+ andl $454761243,%edi
+ movl %eax,%r10d
+ movl %ebx,%r11d
+ xorl %esi,%r8d
+ xorl %edi,%r9d
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movl %ecx,%esi
+ movl %edx,%edi
+ roll $24,%eax
+ roll $24,%ebx
+ andl $2155905152,%esi
+ andl $2155905152,%edi
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movl %esi,%r12d
+ movl %edi,%ebp
+ rorl $16,%r10d
+ rorl $16,%r11d
+ shrl $7,%r12d
+ leal (%rcx,%rcx,1),%r8d
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ shrl $7,%ebp
+ leal (%rdx,%rdx,1),%r9d
+ rorl $8,%r10d
+ rorl $8,%r11d
+ subl %r12d,%esi
+ subl %ebp,%edi
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ andl $4278124286,%r8d
+ andl $4278124286,%r9d
+ andl $454761243,%esi
+ andl $454761243,%edi
+ movl %ecx,%r12d
+ movl %edx,%ebp
+ xorl %esi,%r8d
+ xorl %edi,%r9d
+ xorl %r8d,%ecx
+ xorl %r9d,%edx
+ roll $24,%ecx
+ roll $24,%edx
+ xorl %r8d,%ecx
+ xorl %r9d,%edx
+ movl 0(%r14),%esi
+ rorl $16,%r12d
+ rorl $16,%ebp
+ movl 64(%r14),%edi
+ xorl %r12d,%ecx
+ xorl %ebp,%edx
+ movl 128(%r14),%r8d
+ rorl $8,%r12d
+ rorl $8,%ebp
+ movl 192(%r14),%r9d
+ xorl %r12d,%ecx
+ xorl %ebp,%edx
+ jmp L$enc_loop_compact
+.p2align 4
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ retq
+.globl _AES_encrypt
+.p2align 4
+.globl _asm_AES_encrypt
+.private_extern _asm_AES_encrypt
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r10
+ leaq -63(%rdx),%rcx
+ andq $-64,%rsp
+ subq %rsp,%rcx
+ negq %rcx
+ andq $960,%rcx
+ subq %rcx,%rsp
+ subq $32,%rsp
+ movq %rsi,16(%rsp)
+ movq %r10,24(%rsp)
+ movq %rdx,%r15
+ movl 240(%r15),%r13d
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+ shll $4,%r13d
+ leaq (%r15,%r13,1),%rbp
+ movq %r15,(%rsp)
+ movq %rbp,8(%rsp)
+ leaq L$AES_Te+2048(%rip),%r14
+ leaq 768(%rsp),%rbp
+ subq %r14,%rbp
+ andq $768,%rbp
+ leaq (%r14,%rbp,1),%r14
+ call _x86_64_AES_encrypt_compact
+ movq 16(%rsp),%r9
+ movq 24(%rsp),%rsi
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+ retq
+.p2align 4
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ movl 240(%r15),%r13d
+ subl $1,%r13d
+ jmp L$dec_loop
+.p2align 4
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movl 0(%r14,%rsi,8),%r10d
+ movl 0(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r12d
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl %dl,%ebp
+ xorl 3(%r14,%rsi,8),%r10d
+ xorl 3(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r8d
+ movzbl %bh,%esi
+ shrl $16,%eax
+ movzbl %ch,%ebp
+ xorl 3(%r14,%rsi,8),%r12d
+ shrl $16,%edx
+ xorl 3(%r14,%rbp,8),%r8d
+ shrl $16,%ebx
+ leaq 16(%r15),%r15
+ shrl $16,%ecx
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ xorl 2(%r14,%rsi,8),%r10d
+ xorl 2(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r12d
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl %bl,%ebp
+ xorl 1(%r14,%rsi,8),%r10d
+ xorl 1(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r8d
+ movzbl %dh,%esi
+ movl 12(%r15),%edx
+ movzbl %ah,%ebp
+ xorl 1(%r14,%rsi,8),%r12d
+ movl 0(%r15),%eax
+ xorl 1(%r14,%rbp,8),%r8d
+ xorl %r10d,%eax
+ movl 4(%r15),%ebx
+ movl 8(%r15),%ecx
+ xorl %r12d,%ecx
+ xorl %r11d,%ebx
+ xorl %r8d,%edx
+ subl $1,%r13d
+ jnz L$dec_loop
+ leaq 2048(%r14),%r14
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movzbl (%r14,%rsi,1),%r10d
+ movzbl (%r14,%rdi,1),%r11d
+ movzbl (%r14,%rbp,1),%r12d
+ movzbl %dl,%esi
+ movzbl %dh,%edi
+ movzbl %ah,%ebp
+ movzbl (%r14,%rsi,1),%r8d
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+ shll $8,%edi
+ shll $8,%ebp
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+ shrl $16,%edx
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ shrl $16,%eax
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ shll $8,%esi
+ shll $8,%edi
+ shrl $16,%ebx
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+ shrl $16,%ecx
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+ shll $16,%esi
+ shll $16,%edi
+ shll $16,%ebp
+ xorl %esi,%r10d
+ xorl %edi,%r11d
+ xorl %ebp,%r12d
+ movzbl %bl,%esi
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+ shll $16,%esi
+ shll $24,%edi
+ shll $24,%ebp
+ xorl %esi,%r8d
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movl 16+12(%r15),%edx
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movl 16+0(%r15),%eax
+ shll $24,%esi
+ shll $24,%edi
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+ movl 16+4(%r15),%ebx
+ movl 16+8(%r15),%ecx
+ leaq -2048(%r14),%r14
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ retq
+.p2align 4
+ leaq 128(%r14),%r8
+ movl 0-128(%r8),%edi
+ movl 32-128(%r8),%ebp
+ movl 64-128(%r8),%r10d
+ movl 96-128(%r8),%r11d
+ movl 128-128(%r8),%edi
+ movl 160-128(%r8),%ebp
+ movl 192-128(%r8),%r10d
+ movl 224-128(%r8),%r11d
+ jmp L$dec_loop_compact
+.p2align 4
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ leaq 16(%r15),%r15
+ movzbl %al,%r10d
+ movzbl %bl,%r11d
+ movzbl %cl,%r12d
+ movzbl (%r14,%r10,1),%r10d
+ movzbl (%r14,%r11,1),%r11d
+ movzbl (%r14,%r12,1),%r12d
+ movzbl %dl,%r8d
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl (%r14,%r8,1),%r8d
+ movzbl (%r14,%rsi,1),%r9d
+ movzbl (%r14,%rdi,1),%r13d
+ movzbl %bh,%ebp
+ movzbl %ch,%esi
+ shrl $16,%ecx
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ shrl $16,%edx
+ movzbl %cl,%edi
+ shll $8,%r9d
+ shll $8,%r13d
+ movzbl (%r14,%rdi,1),%edi
+ xorl %r9d,%r10d
+ xorl %r13d,%r11d
+ movzbl %dl,%r9d
+ shrl $16,%eax
+ shrl $16,%ebx
+ movzbl %al,%r13d
+ shll $8,%ebp
+ shll $8,%esi
+ movzbl (%r14,%r9,1),%r9d
+ movzbl (%r14,%r13,1),%r13d
+ xorl %ebp,%r12d
+ xorl %esi,%r8d
+ movzbl %bl,%ebp
+ movzbl %bh,%esi
+ shll $16,%edi
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ xorl %edi,%r10d
+ movzbl %ch,%edi
+ shll $16,%r9d
+ shll $16,%r13d
+ movzbl (%r14,%rdi,1),%ebx
+ xorl %r9d,%r11d
+ xorl %r13d,%r12d
+ movzbl %dh,%edi
+ shrl $8,%eax
+ shll $16,%ebp
+ movzbl (%r14,%rdi,1),%ecx
+ movzbl (%r14,%rax,1),%edx
+ xorl %ebp,%r8d
+ shll $24,%esi
+ shll $24,%ebx
+ shll $24,%ecx
+ xorl %esi,%r10d
+ shll $24,%edx
+ xorl %r11d,%ebx
+ movl %r10d,%eax
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ cmpq 16(%rsp),%r15
+ je L$dec_compact_done
+ movq 256+0(%r14),%rsi
+ shlq $32,%rbx
+ shlq $32,%rdx
+ movq 256+8(%r14),%rdi
+ orq %rbx,%rax
+ orq %rdx,%rcx
+ movq 256+16(%r14),%rbp
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+ shrq $7,%r9
+ leaq (%rax,%rax,1),%r8
+ shrq $7,%r12
+ leaq (%rcx,%rcx,1),%r11
+ subq %r9,%rbx
+ subq %r12,%rdx
+ andq %rdi,%r8
+ andq %rdi,%r11
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r8,%rbx
+ xorq %r11,%rdx
+ movq %rbx,%r8
+ movq %rdx,%r11
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ leaq (%r8,%r8,1),%r9
+ shrq $7,%r13
+ leaq (%r11,%r11,1),%r12
+ subq %r10,%rbx
+ subq %r13,%rdx
+ andq %rdi,%r9
+ andq %rdi,%r12
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r9,%rbx
+ xorq %r12,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ xorq %rax,%r8
+ shrq $7,%r13
+ xorq %rcx,%r11
+ subq %r10,%rbx
+ subq %r13,%rdx
+ leaq (%r9,%r9,1),%r10
+ leaq (%r12,%r12,1),%r13
+ xorq %rax,%r9
+ xorq %rcx,%r12
+ andq %rdi,%r10
+ andq %rdi,%r13
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %rbx,%r10
+ xorq %rdx,%r13
+ xorq %r10,%rax
+ xorq %r13,%rcx
+ xorq %r10,%r8
+ xorq %r13,%r11
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ xorq %r10,%r9
+ xorq %r13,%r12
+ shrq $32,%rbx
+ shrq $32,%rdx
+ xorq %r8,%r10
+ xorq %r11,%r13
+ roll $8,%eax
+ roll $8,%ecx
+ xorq %r9,%r10
+ xorq %r12,%r13
+ roll $8,%ebx
+ roll $8,%edx
+ xorl %r10d,%eax
+ xorl %r13d,%ecx
+ shrq $32,%r10
+ shrq $32,%r13
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+ movq %r8,%r10
+ movq %r11,%r13
+ shrq $32,%r10
+ shrq $32,%r13
+ roll $24,%r8d
+ roll $24,%r11d
+ roll $24,%r10d
+ roll $24,%r13d
+ xorl %r8d,%eax
+ xorl %r11d,%ecx
+ movq %r9,%r8
+ movq %r12,%r11
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+ movq 0(%r14),%rsi
+ shrq $32,%r8
+ shrq $32,%r11
+ movq 64(%r14),%rdi
+ roll $16,%r9d
+ roll $16,%r12d
+ movq 128(%r14),%rbp
+ roll $16,%r8d
+ roll $16,%r11d
+ movq 192(%r14),%r10
+ xorl %r9d,%eax
+ xorl %r12d,%ecx
+ movq 256(%r14),%r13
+ xorl %r8d,%ebx
+ xorl %r11d,%edx
+ jmp L$dec_loop_compact
+.p2align 4
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ retq
+.globl _AES_decrypt
+.p2align 4
+.globl _asm_AES_decrypt
+.private_extern _asm_AES_decrypt
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r10
+ leaq -63(%rdx),%rcx
+ andq $-64,%rsp
+ subq %rsp,%rcx
+ negq %rcx
+ andq $960,%rcx
+ subq %rcx,%rsp
+ subq $32,%rsp
+ movq %rsi,16(%rsp)
+ movq %r10,24(%rsp)
+ movq %rdx,%r15
+ movl 240(%r15),%r13d
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+ shll $4,%r13d
+ leaq (%r15,%r13,1),%rbp
+ movq %r15,(%rsp)
+ movq %rbp,8(%rsp)
+ leaq L$AES_Td+2048(%rip),%r14
+ leaq 768(%rsp),%rbp
+ subq %r14,%rbp
+ andq $768,%rbp
+ leaq (%r14,%rbp,1),%r14
+ shrq $3,%rbp
+ addq %rbp,%r14
+ call _x86_64_AES_decrypt_compact
+ movq 16(%rsp),%r9
+ movq 24(%rsp),%rsi
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+ retq
+.globl _AES_set_encrypt_key
+.p2align 4
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $8,%rsp
+ call _x86_64_AES_set_encrypt_key
+ movq 8(%rsp),%r15
+ movq 16(%rsp),%r14
+ movq 24(%rsp),%r13
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%rbp
+ movq 48(%rsp),%rbx
+ addq $56,%rsp
+ retq
+.p2align 4
+ movl %esi,%ecx
+ movq %rdi,%rsi
+ movq %rdx,%rdi
+ testq $-1,%rsi
+ jz L$badpointer
+ testq $-1,%rdi
+ jz L$badpointer
+ leaq L$AES_Te(%rip),%rbp
+ leaq 2048+128(%rbp),%rbp
+ movl 0-128(%rbp),%eax
+ movl 32-128(%rbp),%ebx
+ movl 64-128(%rbp),%r8d
+ movl 96-128(%rbp),%edx
+ movl 128-128(%rbp),%eax
+ movl 160-128(%rbp),%ebx
+ movl 192-128(%rbp),%r8d
+ movl 224-128(%rbp),%edx
+ cmpl $128,%ecx
+ je L$10rounds
+ cmpl $192,%ecx
+ je L$12rounds
+ cmpl $256,%ecx
+ je L$14rounds
+ movq $-2,%rax
+ jmp L$exit
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rdx,8(%rdi)
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp L$10shortcut
+.p2align 2
+ movl 0(%rdi),%eax
+ movl 12(%rdi),%edx
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,16(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,20(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,24(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,28(%rdi)
+ addl $1,%ecx
+ leaq 16(%rdi),%rdi
+ cmpl $10,%ecx
+ jl L$10loop
+ movl $10,80(%rdi)
+ xorq %rax,%rax
+ jmp L$exit
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 16(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rdx,16(%rdi)
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp L$12shortcut
+.p2align 2
+ movl 0(%rdi),%eax
+ movl 20(%rdi),%edx
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,24(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,28(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,32(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,36(%rdi)
+ cmpl $7,%ecx
+ je L$12break
+ addl $1,%ecx
+ xorl 16(%rdi),%eax
+ movl %eax,40(%rdi)
+ xorl 20(%rdi),%eax
+ movl %eax,44(%rdi)
+ leaq 24(%rdi),%rdi
+ jmp L$12loop
+ movl $12,72(%rdi)
+ xorq %rax,%rax
+ jmp L$exit
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 16(%rsi),%rcx
+ movq 24(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,16(%rdi)
+ movq %rdx,24(%rdi)
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp L$14shortcut
+.p2align 2
+ movl 0(%rdi),%eax
+ movl 28(%rdi),%edx
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,32(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,36(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,40(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,44(%rdi)
+ cmpl $6,%ecx
+ je L$14break
+ addl $1,%ecx
+ movl %eax,%edx
+ movl 16(%rdi),%eax
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ shll $8,%ebx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $16,%ebx
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $24,%ebx
+ xorl %ebx,%eax
+ movl %eax,48(%rdi)
+ xorl 20(%rdi),%eax
+ movl %eax,52(%rdi)
+ xorl 24(%rdi),%eax
+ movl %eax,56(%rdi)
+ xorl 28(%rdi),%eax
+ movl %eax,60(%rdi)
+ leaq 32(%rdi),%rdi
+ jmp L$14loop
+ movl $14,48(%rdi)
+ xorq %rax,%rax
+ jmp L$exit
+ movq $-1,%rax
+ retq
+.globl _AES_set_decrypt_key
+.p2align 4
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rdx
+ call _x86_64_AES_set_encrypt_key
+ movq (%rsp),%r8
+ cmpl $0,%eax
+ jne L$abort
+ movl 240(%r8),%r14d
+ xorq %rdi,%rdi
+ leaq (%rdi,%r14,4),%rcx
+ movq %r8,%rsi
+ leaq (%r8,%rcx,4),%rdi
+.p2align 2
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 0(%rdi),%rcx
+ movq 8(%rdi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,0(%rsi)
+ movq %rdx,8(%rsi)
+ leaq 16(%rsi),%rsi
+ leaq -16(%rdi),%rdi
+ cmpq %rsi,%rdi
+ jne L$invert
+ leaq L$AES_Te+2048+1024(%rip),%rax
+ movq 40(%rax),%rsi
+ movq 48(%rax),%rdi
+ movq 56(%rax),%rbp
+ movq %r8,%r15
+ subl $1,%r14d
+.p2align 2
+ leaq 16(%r15),%r15
+ movq 0(%r15),%rax
+ movq 8(%r15),%rcx
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+ shrq $7,%r9
+ leaq (%rax,%rax,1),%r8
+ shrq $7,%r12
+ leaq (%rcx,%rcx,1),%r11
+ subq %r9,%rbx
+ subq %r12,%rdx
+ andq %rdi,%r8
+ andq %rdi,%r11
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r8,%rbx
+ xorq %r11,%rdx
+ movq %rbx,%r8
+ movq %rdx,%r11
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ leaq (%r8,%r8,1),%r9
+ shrq $7,%r13
+ leaq (%r11,%r11,1),%r12
+ subq %r10,%rbx
+ subq %r13,%rdx
+ andq %rdi,%r9
+ andq %rdi,%r12
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r9,%rbx
+ xorq %r12,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ xorq %rax,%r8
+ shrq $7,%r13
+ xorq %rcx,%r11
+ subq %r10,%rbx
+ subq %r13,%rdx
+ leaq (%r9,%r9,1),%r10
+ leaq (%r12,%r12,1),%r13
+ xorq %rax,%r9
+ xorq %rcx,%r12
+ andq %rdi,%r10
+ andq %rdi,%r13
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %rbx,%r10
+ xorq %rdx,%r13
+ xorq %r10,%rax
+ xorq %r13,%rcx
+ xorq %r10,%r8
+ xorq %r13,%r11
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ xorq %r10,%r9
+ xorq %r13,%r12
+ shrq $32,%rbx
+ shrq $32,%rdx
+ xorq %r8,%r10
+ xorq %r11,%r13
+ roll $8,%eax
+ roll $8,%ecx
+ xorq %r9,%r10
+ xorq %r12,%r13
+ roll $8,%ebx
+ roll $8,%edx
+ xorl %r10d,%eax
+ xorl %r13d,%ecx
+ shrq $32,%r10
+ shrq $32,%r13
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+ movq %r8,%r10
+ movq %r11,%r13
+ shrq $32,%r10
+ shrq $32,%r13
+ roll $24,%r8d
+ roll $24,%r11d
+ roll $24,%r10d
+ roll $24,%r13d
+ xorl %r8d,%eax
+ xorl %r11d,%ecx
+ movq %r9,%r8
+ movq %r12,%r11
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+ shrq $32,%r8
+ shrq $32,%r11
+ roll $16,%r9d
+ roll $16,%r12d
+ roll $16,%r8d
+ roll $16,%r11d
+ xorl %r9d,%eax
+ xorl %r12d,%ecx
+ xorl %r8d,%ebx
+ xorl %r11d,%edx
+ movl %eax,0(%r15)
+ movl %ebx,4(%r15)
+ movl %ecx,8(%r15)
+ movl %edx,12(%r15)
+ subl $1,%r14d
+ jnz L$permute
+ xorq %rax,%rax
+ movq 8(%rsp),%r15
+ movq 16(%rsp),%r14
+ movq 24(%rsp),%r13
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%rbp
+ movq 48(%rsp),%rbx
+ addq $56,%rsp
+ retq
+.globl _AES_cbc_encrypt
+.p2align 4
+.private_extern _OPENSSL_ia32cap_P
+.globl _asm_AES_cbc_encrypt
+.private_extern _asm_AES_cbc_encrypt
+ cmpq $0,%rdx
+ je L$cbc_epilogue
+ pushfq
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ cld
+ movl %r9d,%r9d
+ leaq L$AES_Te(%rip),%r14
+ cmpq $0,%r9
+ jne L$cbc_picked_te
+ leaq L$AES_Td(%rip),%r14
+ movl _OPENSSL_ia32cap_P(%rip),%r10d
+ cmpq $512,%rdx
+ jb L$cbc_slow_prologue
+ testq $15,%rdx
+ jnz L$cbc_slow_prologue
+ btl $IA32CAP_BIT0_HT,%r10d
+ jc L$cbc_slow_prologue
+ leaq -88-248(%rsp),%r15
+ andq $-64,%r15
+ movq %r14,%r10
+ leaq 2304(%r14),%r11
+ movq %r15,%r12
+ andq $4095,%r10
+ andq $4095,%r11
+ andq $4095,%r12
+ cmpq %r11,%r12
+ jb L$cbc_te_break_out
+ subq %r11,%r12
+ subq %r12,%r15
+ jmp L$cbc_te_ok
+ subq %r10,%r12
+ andq $4095,%r12
+ addq $320,%r12
+ subq %r12,%r15
+.p2align 2
+ xchgq %rsp,%r15
+ movq %r15,16(%rsp)
+ movq %rdi,24(%rsp)
+ movq %rsi,32(%rsp)
+ movq %rdx,40(%rsp)
+ movq %rcx,48(%rsp)
+ movq %r8,56(%rsp)
+ movl $0,80+240(%rsp)
+ movq %r8,%rbp
+ movq %r9,%rbx
+ movq %rsi,%r9
+ movq %rdi,%r8
+ movq %rcx,%r15
+ movl 240(%r15),%eax
+ movq %r15,%r10
+ subq %r14,%r10
+ andq $4095,%r10
+ cmpq $2304,%r10
+ jb L$cbc_do_ecopy
+ cmpq $4096-248,%r10
+ jb L$cbc_skip_ecopy
+.p2align 2
+ movq %r15,%rsi
+ leaq 80(%rsp),%rdi
+ leaq 80(%rsp),%r15
+ movl $30,%ecx
+.long 0x90A548F3
+ movl %eax,(%rdi)
+ movq %r15,0(%rsp)
+ movl $18,%ecx
+.p2align 2
+ movq 0(%r14),%r10
+ movq 32(%r14),%r11
+ movq 64(%r14),%r12
+ movq 96(%r14),%r13
+ leaq 128(%r14),%r14
+ subl $1,%ecx
+ jnz L$cbc_prefetch_te
+ leaq -2304(%r14),%r14
+ cmpq $0,%rbx
+ movl 0(%rbp),%eax
+ movl 4(%rbp),%ebx
+ movl 8(%rbp),%ecx
+ movl 12(%rbp),%edx
+.p2align 2
+ xorl 0(%r8),%eax
+ xorl 4(%r8),%ebx
+ xorl 8(%r8),%ecx
+ xorl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ call _x86_64_AES_encrypt
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ subq $16,%r10
+ testq $-16,%r10
+ movq %r10,40(%rsp)
+ jnz L$cbc_fast_enc_loop
+ movq 56(%rsp),%rbp
+ movl %eax,0(%rbp)
+ movl %ebx,4(%rbp)
+ movl %ecx,8(%rbp)
+ movl %edx,12(%rbp)
+ jmp L$cbc_fast_cleanup
+.p2align 4
+ cmpq %r8,%r9
+ je L$cbc_fast_dec_in_place
+ movq %rbp,64(%rsp)
+.p2align 2
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ call _x86_64_AES_decrypt
+ movq 64(%rsp),%rbp
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ xorl 0(%rbp),%eax
+ xorl 4(%rbp),%ebx
+ xorl 8(%rbp),%ecx
+ xorl 12(%rbp),%edx
+ movq %r8,%rbp
+ subq $16,%r10
+ movq %r10,40(%rsp)
+ movq %rbp,64(%rsp)
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ jnz L$cbc_fast_dec_loop
+ movq 56(%rsp),%r12
+ movq 0(%rbp),%r10
+ movq 8(%rbp),%r11
+ movq %r10,0(%r12)
+ movq %r11,8(%r12)
+ jmp L$cbc_fast_cleanup
+.p2align 4
+ movq 0(%rbp),%r10
+ movq 8(%rbp),%r11
+ movq %r10,0+64(%rsp)
+ movq %r11,8+64(%rsp)
+.p2align 2
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ call _x86_64_AES_decrypt
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ xorl 0+64(%rsp),%eax
+ xorl 4+64(%rsp),%ebx
+ xorl 8+64(%rsp),%ecx
+ xorl 12+64(%rsp),%edx
+ movq 0(%r8),%r11
+ movq 8(%r8),%r12
+ subq $16,%r10
+ jz L$cbc_fast_dec_in_place_done
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ movq %r10,40(%rsp)
+ jmp L$cbc_fast_dec_in_place_loop
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+.p2align 2
+ cmpl $0,80+240(%rsp)
+ leaq 80(%rsp),%rdi
+ je L$cbc_exit
+ movl $30,%ecx
+ xorq %rax,%rax
+.long 0x90AB48F3
+ jmp L$cbc_exit
+.p2align 4
+ leaq -88(%rsp),%rbp
+ andq $-64,%rbp
+ leaq -88-63(%rcx),%r10
+ subq %rbp,%r10
+ negq %r10
+ andq $960,%r10
+ subq %r10,%rbp
+ xchgq %rsp,%rbp
+ movq %rbp,16(%rsp)
+ movq %r8,56(%rsp)
+ movq %r8,%rbp
+ movq %r9,%rbx
+ movq %rsi,%r9
+ movq %rdi,%r8
+ movq %rcx,%r15
+ movq %rdx,%r10
+ movl 240(%r15),%eax
+ movq %r15,0(%rsp)
+ shll $4,%eax
+ leaq (%r15,%rax,1),%rax
+ movq %rax,8(%rsp)
+ leaq 2048(%r14),%r14
+ leaq 768-8(%rsp),%rax
+ subq %r14,%rax
+ andq $768,%rax
+ leaq (%r14,%rax,1),%r14
+ cmpq $0,%rbx
+ testq $-16,%r10
+ movl 0(%rbp),%eax
+ movl 4(%rbp),%ebx
+ movl 8(%rbp),%ecx
+ movl 12(%rbp),%edx
+ jz L$cbc_slow_enc_tail
+.p2align 2
+ xorl 0(%r8),%eax
+ xorl 4(%r8),%ebx
+ xorl 8(%r8),%ecx
+ xorl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ movq %r9,32(%rsp)
+ movq %r10,40(%rsp)
+ call _x86_64_AES_encrypt_compact
+ movq 24(%rsp),%r8
+ movq 32(%rsp),%r9
+ movq 40(%rsp),%r10
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ subq $16,%r10
+ testq $-16,%r10
+ jnz L$cbc_slow_enc_loop
+ testq $15,%r10
+ jnz L$cbc_slow_enc_tail
+ movq 56(%rsp),%rbp
+ movl %eax,0(%rbp)
+ movl %ebx,4(%rbp)
+ movl %ecx,8(%rbp)
+ movl %edx,12(%rbp)
+ jmp L$cbc_exit
+.p2align 2
+ movq %rax,%r11
+ movq %rcx,%r12
+ movq %r10,%rcx
+ movq %r8,%rsi
+ movq %r9,%rdi
+.long 0x9066A4F3
+ movq $16,%rcx
+ subq %r10,%rcx
+ xorq %rax,%rax
+.long 0x9066AAF3
+ movq %r9,%r8
+ movq $16,%r10
+ movq %r11,%rax
+ movq %r12,%rcx
+ jmp L$cbc_slow_enc_loop
+.p2align 4
+ shrq $3,%rax
+ addq %rax,%r14
+ movq 0(%rbp),%r11
+ movq 8(%rbp),%r12
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+.p2align 2
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ movq %r9,32(%rsp)
+ movq %r10,40(%rsp)
+ call _x86_64_AES_decrypt_compact
+ movq 24(%rsp),%r8
+ movq 32(%rsp),%r9
+ movq 40(%rsp),%r10
+ xorl 0+64(%rsp),%eax
+ xorl 4+64(%rsp),%ebx
+ xorl 8+64(%rsp),%ecx
+ xorl 12+64(%rsp),%edx
+ movq 0(%r8),%r11
+ movq 8(%r8),%r12
+ subq $16,%r10
+ jc L$cbc_slow_dec_partial
+ jz L$cbc_slow_dec_done
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ jmp L$cbc_slow_dec_loop
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ jmp L$cbc_exit
+.p2align 2
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+ movl %eax,0+64(%rsp)
+ movl %ebx,4+64(%rsp)
+ movl %ecx,8+64(%rsp)
+ movl %edx,12+64(%rsp)
+ movq %r9,%rdi
+ leaq 64(%rsp),%rsi
+ leaq 16(%r10),%rcx
+.long 0x9066A4F3
+ jmp L$cbc_exit
+.p2align 4
+ movq 16(%rsp),%rsi
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+ popfq
+ retq
+.p2align 6
+.long 0xa56363c6,0xa56363c6
+.long 0x847c7cf8,0x847c7cf8
+.long 0x997777ee,0x997777ee
+.long 0x8d7b7bf6,0x8d7b7bf6
+.long 0x0df2f2ff,0x0df2f2ff
+.long 0xbd6b6bd6,0xbd6b6bd6
+.long 0xb16f6fde,0xb16f6fde
+.long 0x54c5c591,0x54c5c591
+.long 0x50303060,0x50303060
+.long 0x03010102,0x03010102
+.long 0xa96767ce,0xa96767ce
+.long 0x7d2b2b56,0x7d2b2b56
+.long 0x19fefee7,0x19fefee7
+.long 0x62d7d7b5,0x62d7d7b5
+.long 0xe6abab4d,0xe6abab4d
+.long 0x9a7676ec,0x9a7676ec
+.long 0x45caca8f,0x45caca8f
+.long 0x9d82821f,0x9d82821f
+.long 0x40c9c989,0x40c9c989
+.long 0x877d7dfa,0x877d7dfa
+.long 0x15fafaef,0x15fafaef
+.long 0xeb5959b2,0xeb5959b2
+.long 0xc947478e,0xc947478e
+.long 0x0bf0f0fb,0x0bf0f0fb
+.long 0xecadad41,0xecadad41
+.long 0x67d4d4b3,0x67d4d4b3
+.long 0xfda2a25f,0xfda2a25f
+.long 0xeaafaf45,0xeaafaf45
+.long 0xbf9c9c23,0xbf9c9c23
+.long 0xf7a4a453,0xf7a4a453
+.long 0x967272e4,0x967272e4
+.long 0x5bc0c09b,0x5bc0c09b
+.long 0xc2b7b775,0xc2b7b775
+.long 0x1cfdfde1,0x1cfdfde1
+.long 0xae93933d,0xae93933d
+.long 0x6a26264c,0x6a26264c
+.long 0x5a36366c,0x5a36366c
+.long 0x413f3f7e,0x413f3f7e
+.long 0x02f7f7f5,0x02f7f7f5
+.long 0x4fcccc83,0x4fcccc83
+.long 0x5c343468,0x5c343468
+.long 0xf4a5a551,0xf4a5a551
+.long 0x34e5e5d1,0x34e5e5d1
+.long 0x08f1f1f9,0x08f1f1f9
+.long 0x937171e2,0x937171e2
+.long 0x73d8d8ab,0x73d8d8ab
+.long 0x53313162,0x53313162
+.long 0x3f15152a,0x3f15152a
+.long 0x0c040408,0x0c040408
+.long 0x52c7c795,0x52c7c795
+.long 0x65232346,0x65232346
+.long 0x5ec3c39d,0x5ec3c39d
+.long 0x28181830,0x28181830
+.long 0xa1969637,0xa1969637
+.long 0x0f05050a,0x0f05050a
+.long 0xb59a9a2f,0xb59a9a2f
+.long 0x0907070e,0x0907070e
+.long 0x36121224,0x36121224
+.long 0x9b80801b,0x9b80801b
+.long 0x3de2e2df,0x3de2e2df
+.long 0x26ebebcd,0x26ebebcd
+.long 0x6927274e,0x6927274e
+.long 0xcdb2b27f,0xcdb2b27f
+.long 0x9f7575ea,0x9f7575ea
+.long 0x1b090912,0x1b090912
+.long 0x9e83831d,0x9e83831d
+.long 0x742c2c58,0x742c2c58
+.long 0x2e1a1a34,0x2e1a1a34
+.long 0x2d1b1b36,0x2d1b1b36
+.long 0xb26e6edc,0xb26e6edc
+.long 0xee5a5ab4,0xee5a5ab4
+.long 0xfba0a05b,0xfba0a05b
+.long 0xf65252a4,0xf65252a4
+.long 0x4d3b3b76,0x4d3b3b76
+.long 0x61d6d6b7,0x61d6d6b7
+.long 0xceb3b37d,0xceb3b37d
+.long 0x7b292952,0x7b292952
+.long 0x3ee3e3dd,0x3ee3e3dd
+.long 0x712f2f5e,0x712f2f5e
+.long 0x97848413,0x97848413
+.long 0xf55353a6,0xf55353a6
+.long 0x68d1d1b9,0x68d1d1b9
+.long 0x00000000,0x00000000
+.long 0x2cededc1,0x2cededc1
+.long 0x60202040,0x60202040
+.long 0x1ffcfce3,0x1ffcfce3
+.long 0xc8b1b179,0xc8b1b179
+.long 0xed5b5bb6,0xed5b5bb6
+.long 0xbe6a6ad4,0xbe6a6ad4
+.long 0x46cbcb8d,0x46cbcb8d
+.long 0xd9bebe67,0xd9bebe67
+.long 0x4b393972,0x4b393972
+.long 0xde4a4a94,0xde4a4a94
+.long 0xd44c4c98,0xd44c4c98
+.long 0xe85858b0,0xe85858b0
+.long 0x4acfcf85,0x4acfcf85
+.long 0x6bd0d0bb,0x6bd0d0bb
+.long 0x2aefefc5,0x2aefefc5
+.long 0xe5aaaa4f,0xe5aaaa4f
+.long 0x16fbfbed,0x16fbfbed
+.long 0xc5434386,0xc5434386
+.long 0xd74d4d9a,0xd74d4d9a
+.long 0x55333366,0x55333366
+.long 0x94858511,0x94858511
+.long 0xcf45458a,0xcf45458a
+.long 0x10f9f9e9,0x10f9f9e9
+.long 0x06020204,0x06020204
+.long 0x817f7ffe,0x817f7ffe
+.long 0xf05050a0,0xf05050a0
+.long 0x443c3c78,0x443c3c78
+.long 0xba9f9f25,0xba9f9f25
+.long 0xe3a8a84b,0xe3a8a84b
+.long 0xf35151a2,0xf35151a2
+.long 0xfea3a35d,0xfea3a35d
+.long 0xc0404080,0xc0404080
+.long 0x8a8f8f05,0x8a8f8f05
+.long 0xad92923f,0xad92923f
+.long 0xbc9d9d21,0xbc9d9d21
+.long 0x48383870,0x48383870
+.long 0x04f5f5f1,0x04f5f5f1
+.long 0xdfbcbc63,0xdfbcbc63
+.long 0xc1b6b677,0xc1b6b677
+.long 0x75dadaaf,0x75dadaaf
+.long 0x63212142,0x63212142
+.long 0x30101020,0x30101020
+.long 0x1affffe5,0x1affffe5
+.long 0x0ef3f3fd,0x0ef3f3fd
+.long 0x6dd2d2bf,0x6dd2d2bf
+.long 0x4ccdcd81,0x4ccdcd81
+.long 0x140c0c18,0x140c0c18
+.long 0x35131326,0x35131326
+.long 0x2fececc3,0x2fececc3
+.long 0xe15f5fbe,0xe15f5fbe
+.long 0xa2979735,0xa2979735
+.long 0xcc444488,0xcc444488
+.long 0x3917172e,0x3917172e
+.long 0x57c4c493,0x57c4c493
+.long 0xf2a7a755,0xf2a7a755
+.long 0x827e7efc,0x827e7efc
+.long 0x473d3d7a,0x473d3d7a
+.long 0xac6464c8,0xac6464c8
+.long 0xe75d5dba,0xe75d5dba
+.long 0x2b191932,0x2b191932
+.long 0x957373e6,0x957373e6
+.long 0xa06060c0,0xa06060c0
+.long 0x98818119,0x98818119
+.long 0xd14f4f9e,0xd14f4f9e
+.long 0x7fdcdca3,0x7fdcdca3
+.long 0x66222244,0x66222244
+.long 0x7e2a2a54,0x7e2a2a54
+.long 0xab90903b,0xab90903b
+.long 0x8388880b,0x8388880b
+.long 0xca46468c,0xca46468c
+.long 0x29eeeec7,0x29eeeec7
+.long 0xd3b8b86b,0xd3b8b86b
+.long 0x3c141428,0x3c141428
+.long 0x79dedea7,0x79dedea7
+.long 0xe25e5ebc,0xe25e5ebc
+.long 0x1d0b0b16,0x1d0b0b16
+.long 0x76dbdbad,0x76dbdbad
+.long 0x3be0e0db,0x3be0e0db
+.long 0x56323264,0x56323264
+.long 0x4e3a3a74,0x4e3a3a74
+.long 0x1e0a0a14,0x1e0a0a14
+.long 0xdb494992,0xdb494992
+.long 0x0a06060c,0x0a06060c
+.long 0x6c242448,0x6c242448
+.long 0xe45c5cb8,0xe45c5cb8
+.long 0x5dc2c29f,0x5dc2c29f
+.long 0x6ed3d3bd,0x6ed3d3bd
+.long 0xefacac43,0xefacac43
+.long 0xa66262c4,0xa66262c4
+.long 0xa8919139,0xa8919139
+.long 0xa4959531,0xa4959531
+.long 0x37e4e4d3,0x37e4e4d3
+.long 0x8b7979f2,0x8b7979f2
+.long 0x32e7e7d5,0x32e7e7d5
+.long 0x43c8c88b,0x43c8c88b
+.long 0x5937376e,0x5937376e
+.long 0xb76d6dda,0xb76d6dda
+.long 0x8c8d8d01,0x8c8d8d01
+.long 0x64d5d5b1,0x64d5d5b1
+.long 0xd24e4e9c,0xd24e4e9c
+.long 0xe0a9a949,0xe0a9a949
+.long 0xb46c6cd8,0xb46c6cd8
+.long 0xfa5656ac,0xfa5656ac
+.long 0x07f4f4f3,0x07f4f4f3
+.long 0x25eaeacf,0x25eaeacf
+.long 0xaf6565ca,0xaf6565ca
+.long 0x8e7a7af4,0x8e7a7af4
+.long 0xe9aeae47,0xe9aeae47
+.long 0x18080810,0x18080810
+.long 0xd5baba6f,0xd5baba6f
+.long 0x887878f0,0x887878f0
+.long 0x6f25254a,0x6f25254a
+.long 0x722e2e5c,0x722e2e5c
+.long 0x241c1c38,0x241c1c38
+.long 0xf1a6a657,0xf1a6a657
+.long 0xc7b4b473,0xc7b4b473
+.long 0x51c6c697,0x51c6c697
+.long 0x23e8e8cb,0x23e8e8cb
+.long 0x7cdddda1,0x7cdddda1
+.long 0x9c7474e8,0x9c7474e8
+.long 0x211f1f3e,0x211f1f3e
+.long 0xdd4b4b96,0xdd4b4b96
+.long 0xdcbdbd61,0xdcbdbd61
+.long 0x868b8b0d,0x868b8b0d
+.long 0x858a8a0f,0x858a8a0f
+.long 0x907070e0,0x907070e0
+.long 0x423e3e7c,0x423e3e7c
+.long 0xc4b5b571,0xc4b5b571
+.long 0xaa6666cc,0xaa6666cc
+.long 0xd8484890,0xd8484890
+.long 0x05030306,0x05030306
+.long 0x01f6f6f7,0x01f6f6f7
+.long 0x120e0e1c,0x120e0e1c
+.long 0xa36161c2,0xa36161c2
+.long 0x5f35356a,0x5f35356a
+.long 0xf95757ae,0xf95757ae
+.long 0xd0b9b969,0xd0b9b969
+.long 0x91868617,0x91868617
+.long 0x58c1c199,0x58c1c199
+.long 0x271d1d3a,0x271d1d3a
+.long 0xb99e9e27,0xb99e9e27
+.long 0x38e1e1d9,0x38e1e1d9
+.long 0x13f8f8eb,0x13f8f8eb
+.long 0xb398982b,0xb398982b
+.long 0x33111122,0x33111122
+.long 0xbb6969d2,0xbb6969d2
+.long 0x70d9d9a9,0x70d9d9a9
+.long 0x898e8e07,0x898e8e07
+.long 0xa7949433,0xa7949433
+.long 0xb69b9b2d,0xb69b9b2d
+.long 0x221e1e3c,0x221e1e3c
+.long 0x92878715,0x92878715
+.long 0x20e9e9c9,0x20e9e9c9
+.long 0x49cece87,0x49cece87
+.long 0xff5555aa,0xff5555aa
+.long 0x78282850,0x78282850
+.long 0x7adfdfa5,0x7adfdfa5
+.long 0x8f8c8c03,0x8f8c8c03
+.long 0xf8a1a159,0xf8a1a159
+.long 0x80898909,0x80898909
+.long 0x170d0d1a,0x170d0d1a
+.long 0xdabfbf65,0xdabfbf65
+.long 0x31e6e6d7,0x31e6e6d7
+.long 0xc6424284,0xc6424284
+.long 0xb86868d0,0xb86868d0
+.long 0xc3414182,0xc3414182
+.long 0xb0999929,0xb0999929
+.long 0x772d2d5a,0x772d2d5a
+.long 0x110f0f1e,0x110f0f1e
+.long 0xcbb0b07b,0xcbb0b07b
+.long 0xfc5454a8,0xfc5454a8
+.long 0xd6bbbb6d,0xd6bbbb6d
+.long 0x3a16162c,0x3a16162c
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.long 0x00000001, 0x00000002, 0x00000004, 0x00000008
+.long 0x00000010, 0x00000020, 0x00000040, 0x00000080
+.long 0x0000001b, 0x00000036, 0x80808080, 0x80808080
+.long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b
+.p2align 6
+.long 0x50a7f451,0x50a7f451
+.long 0x5365417e,0x5365417e
+.long 0xc3a4171a,0xc3a4171a
+.long 0x965e273a,0x965e273a
+.long 0xcb6bab3b,0xcb6bab3b
+.long 0xf1459d1f,0xf1459d1f
+.long 0xab58faac,0xab58faac
+.long 0x9303e34b,0x9303e34b
+.long 0x55fa3020,0x55fa3020
+.long 0xf66d76ad,0xf66d76ad
+.long 0x9176cc88,0x9176cc88
+.long 0x254c02f5,0x254c02f5
+.long 0xfcd7e54f,0xfcd7e54f
+.long 0xd7cb2ac5,0xd7cb2ac5
+.long 0x80443526,0x80443526
+.long 0x8fa362b5,0x8fa362b5
+.long 0x495ab1de,0x495ab1de
+.long 0x671bba25,0x671bba25
+.long 0x980eea45,0x980eea45
+.long 0xe1c0fe5d,0xe1c0fe5d
+.long 0x02752fc3,0x02752fc3
+.long 0x12f04c81,0x12f04c81
+.long 0xa397468d,0xa397468d
+.long 0xc6f9d36b,0xc6f9d36b
+.long 0xe75f8f03,0xe75f8f03
+.long 0x959c9215,0x959c9215
+.long 0xeb7a6dbf,0xeb7a6dbf
+.long 0xda595295,0xda595295
+.long 0x2d83bed4,0x2d83bed4
+.long 0xd3217458,0xd3217458
+.long 0x2969e049,0x2969e049
+.long 0x44c8c98e,0x44c8c98e
+.long 0x6a89c275,0x6a89c275
+.long 0x78798ef4,0x78798ef4
+.long 0x6b3e5899,0x6b3e5899
+.long 0xdd71b927,0xdd71b927
+.long 0xb64fe1be,0xb64fe1be
+.long 0x17ad88f0,0x17ad88f0
+.long 0x66ac20c9,0x66ac20c9
+.long 0xb43ace7d,0xb43ace7d
+.long 0x184adf63,0x184adf63
+.long 0x82311ae5,0x82311ae5
+.long 0x60335197,0x60335197
+.long 0x457f5362,0x457f5362
+.long 0xe07764b1,0xe07764b1
+.long 0x84ae6bbb,0x84ae6bbb
+.long 0x1ca081fe,0x1ca081fe
+.long 0x942b08f9,0x942b08f9
+.long 0x58684870,0x58684870
+.long 0x19fd458f,0x19fd458f
+.long 0x876cde94,0x876cde94
+.long 0xb7f87b52,0xb7f87b52
+.long 0x23d373ab,0x23d373ab
+.long 0xe2024b72,0xe2024b72
+.long 0x578f1fe3,0x578f1fe3
+.long 0x2aab5566,0x2aab5566
+.long 0x0728ebb2,0x0728ebb2
+.long 0x03c2b52f,0x03c2b52f
+.long 0x9a7bc586,0x9a7bc586
+.long 0xa50837d3,0xa50837d3
+.long 0xf2872830,0xf2872830
+.long 0xb2a5bf23,0xb2a5bf23
+.long 0xba6a0302,0xba6a0302
+.long 0x5c8216ed,0x5c8216ed
+.long 0x2b1ccf8a,0x2b1ccf8a
+.long 0x92b479a7,0x92b479a7
+.long 0xf0f207f3,0xf0f207f3
+.long 0xa1e2694e,0xa1e2694e
+.long 0xcdf4da65,0xcdf4da65
+.long 0xd5be0506,0xd5be0506
+.long 0x1f6234d1,0x1f6234d1
+.long 0x8afea6c4,0x8afea6c4
+.long 0x9d532e34,0x9d532e34
+.long 0xa055f3a2,0xa055f3a2
+.long 0x32e18a05,0x32e18a05
+.long 0x75ebf6a4,0x75ebf6a4
+.long 0x39ec830b,0x39ec830b
+.long 0xaaef6040,0xaaef6040
+.long 0x069f715e,0x069f715e
+.long 0x51106ebd,0x51106ebd
+.long 0xf98a213e,0xf98a213e
+.long 0x3d06dd96,0x3d06dd96
+.long 0xae053edd,0xae053edd
+.long 0x46bde64d,0x46bde64d
+.long 0xb58d5491,0xb58d5491
+.long 0x055dc471,0x055dc471
+.long 0x6fd40604,0x6fd40604
+.long 0xff155060,0xff155060
+.long 0x24fb9819,0x24fb9819
+.long 0x97e9bdd6,0x97e9bdd6
+.long 0xcc434089,0xcc434089
+.long 0x779ed967,0x779ed967
+.long 0xbd42e8b0,0xbd42e8b0
+.long 0x888b8907,0x888b8907
+.long 0x385b19e7,0x385b19e7
+.long 0xdbeec879,0xdbeec879
+.long 0x470a7ca1,0x470a7ca1
+.long 0xe90f427c,0xe90f427c
+.long 0xc91e84f8,0xc91e84f8
+.long 0x00000000,0x00000000
+.long 0x83868009,0x83868009
+.long 0x48ed2b32,0x48ed2b32
+.long 0xac70111e,0xac70111e
+.long 0x4e725a6c,0x4e725a6c
+.long 0xfbff0efd,0xfbff0efd
+.long 0x5638850f,0x5638850f
+.long 0x1ed5ae3d,0x1ed5ae3d
+.long 0x27392d36,0x27392d36
+.long 0x64d90f0a,0x64d90f0a
+.long 0x21a65c68,0x21a65c68
+.long 0xd1545b9b,0xd1545b9b
+.long 0x3a2e3624,0x3a2e3624
+.long 0xb1670a0c,0xb1670a0c
+.long 0x0fe75793,0x0fe75793
+.long 0xd296eeb4,0xd296eeb4
+.long 0x9e919b1b,0x9e919b1b
+.long 0x4fc5c080,0x4fc5c080
+.long 0xa220dc61,0xa220dc61
+.long 0x694b775a,0x694b775a
+.long 0x161a121c,0x161a121c
+.long 0x0aba93e2,0x0aba93e2
+.long 0xe52aa0c0,0xe52aa0c0
+.long 0x43e0223c,0x43e0223c
+.long 0x1d171b12,0x1d171b12
+.long 0x0b0d090e,0x0b0d090e
+.long 0xadc78bf2,0xadc78bf2
+.long 0xb9a8b62d,0xb9a8b62d
+.long 0xc8a91e14,0xc8a91e14
+.long 0x8519f157,0x8519f157
+.long 0x4c0775af,0x4c0775af
+.long 0xbbdd99ee,0xbbdd99ee
+.long 0xfd607fa3,0xfd607fa3
+.long 0x9f2601f7,0x9f2601f7
+.long 0xbcf5725c,0xbcf5725c
+.long 0xc53b6644,0xc53b6644
+.long 0x347efb5b,0x347efb5b
+.long 0x7629438b,0x7629438b
+.long 0xdcc623cb,0xdcc623cb
+.long 0x68fcedb6,0x68fcedb6
+.long 0x63f1e4b8,0x63f1e4b8
+.long 0xcadc31d7,0xcadc31d7
+.long 0x10856342,0x10856342
+.long 0x40229713,0x40229713
+.long 0x2011c684,0x2011c684
+.long 0x7d244a85,0x7d244a85
+.long 0xf83dbbd2,0xf83dbbd2
+.long 0x1132f9ae,0x1132f9ae
+.long 0x6da129c7,0x6da129c7
+.long 0x4b2f9e1d,0x4b2f9e1d
+.long 0xf330b2dc,0xf330b2dc
+.long 0xec52860d,0xec52860d
+.long 0xd0e3c177,0xd0e3c177
+.long 0x6c16b32b,0x6c16b32b
+.long 0x99b970a9,0x99b970a9
+.long 0xfa489411,0xfa489411
+.long 0x2264e947,0x2264e947
+.long 0xc48cfca8,0xc48cfca8
+.long 0x1a3ff0a0,0x1a3ff0a0
+.long 0xd82c7d56,0xd82c7d56
+.long 0xef903322,0xef903322
+.long 0xc74e4987,0xc74e4987
+.long 0xc1d138d9,0xc1d138d9
+.long 0xfea2ca8c,0xfea2ca8c
+.long 0x360bd498,0x360bd498
+.long 0xcf81f5a6,0xcf81f5a6
+.long 0x28de7aa5,0x28de7aa5
+.long 0x268eb7da,0x268eb7da
+.long 0xa4bfad3f,0xa4bfad3f
+.long 0xe49d3a2c,0xe49d3a2c
+.long 0x0d927850,0x0d927850
+.long 0x9bcc5f6a,0x9bcc5f6a
+.long 0x62467e54,0x62467e54
+.long 0xc2138df6,0xc2138df6
+.long 0xe8b8d890,0xe8b8d890
+.long 0x5ef7392e,0x5ef7392e
+.long 0xf5afc382,0xf5afc382
+.long 0xbe805d9f,0xbe805d9f
+.long 0x7c93d069,0x7c93d069
+.long 0xa92dd56f,0xa92dd56f
+.long 0xb31225cf,0xb31225cf
+.long 0x3b99acc8,0x3b99acc8
+.long 0xa77d1810,0xa77d1810
+.long 0x6e639ce8,0x6e639ce8
+.long 0x7bbb3bdb,0x7bbb3bdb
+.long 0x097826cd,0x097826cd
+.long 0xf418596e,0xf418596e
+.long 0x01b79aec,0x01b79aec
+.long 0xa89a4f83,0xa89a4f83
+.long 0x656e95e6,0x656e95e6
+.long 0x7ee6ffaa,0x7ee6ffaa
+.long 0x08cfbc21,0x08cfbc21
+.long 0xe6e815ef,0xe6e815ef
+.long 0xd99be7ba,0xd99be7ba
+.long 0xce366f4a,0xce366f4a
+.long 0xd4099fea,0xd4099fea
+.long 0xd67cb029,0xd67cb029
+.long 0xafb2a431,0xafb2a431
+.long 0x31233f2a,0x31233f2a
+.long 0x3094a5c6,0x3094a5c6
+.long 0xc066a235,0xc066a235
+.long 0x37bc4e74,0x37bc4e74
+.long 0xa6ca82fc,0xa6ca82fc
+.long 0xb0d090e0,0xb0d090e0
+.long 0x15d8a733,0x15d8a733
+.long 0x4a9804f1,0x4a9804f1
+.long 0xf7daec41,0xf7daec41
+.long 0x0e50cd7f,0x0e50cd7f
+.long 0x2ff69117,0x2ff69117
+.long 0x8dd64d76,0x8dd64d76
+.long 0x4db0ef43,0x4db0ef43
+.long 0x544daacc,0x544daacc
+.long 0xdf0496e4,0xdf0496e4
+.long 0xe3b5d19e,0xe3b5d19e
+.long 0x1b886a4c,0x1b886a4c
+.long 0xb81f2cc1,0xb81f2cc1
+.long 0x7f516546,0x7f516546
+.long 0x04ea5e9d,0x04ea5e9d
+.long 0x5d358c01,0x5d358c01
+.long 0x737487fa,0x737487fa
+.long 0x2e410bfb,0x2e410bfb
+.long 0x5a1d67b3,0x5a1d67b3
+.long 0x52d2db92,0x52d2db92
+.long 0x335610e9,0x335610e9
+.long 0x1347d66d,0x1347d66d
+.long 0x8c61d79a,0x8c61d79a
+.long 0x7a0ca137,0x7a0ca137
+.long 0x8e14f859,0x8e14f859
+.long 0x893c13eb,0x893c13eb
+.long 0xee27a9ce,0xee27a9ce
+.long 0x35c961b7,0x35c961b7
+.long 0xede51ce1,0xede51ce1
+.long 0x3cb1477a,0x3cb1477a
+.long 0x59dfd29c,0x59dfd29c
+.long 0x3f73f255,0x3f73f255
+.long 0x79ce1418,0x79ce1418
+.long 0xbf37c773,0xbf37c773
+.long 0xeacdf753,0xeacdf753
+.long 0x5baafd5f,0x5baafd5f
+.long 0x146f3ddf,0x146f3ddf
+.long 0x86db4478,0x86db4478
+.long 0x81f3afca,0x81f3afca
+.long 0x3ec468b9,0x3ec468b9
+.long 0x2c342438,0x2c342438
+.long 0x5f40a3c2,0x5f40a3c2
+.long 0x72c31d16,0x72c31d16
+.long 0x0c25e2bc,0x0c25e2bc
+.long 0x8b493c28,0x8b493c28
+.long 0x41950dff,0x41950dff
+.long 0x7101a839,0x7101a839
+.long 0xdeb30c08,0xdeb30c08
+.long 0x9ce4b4d8,0x9ce4b4d8
+.long 0x90c15664,0x90c15664
+.long 0x6184cb7b,0x6184cb7b
+.long 0x70b632d5,0x70b632d5
+.long 0x745c6c48,0x745c6c48
+.long 0x4257b8d0,0x4257b8d0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align 6
diff --git a/crypto/libressl/crypto/aes/aes-masm-x86_64.S b/crypto/libressl/crypto/aes/aes-masm-x86_64.S
new file mode 100644
index 0000000..9094c72
--- /dev/null
+++ b/crypto/libressl/crypto/aes/aes-masm-x86_64.S
@@ -0,0 +1,2948 @@
+; 1 "crypto/aes/aes-masm-x86_64.S.tmp"
+; 1 "<built-in>" 1
+; 1 "<built-in>" 3
+; 340 "<built-in>" 3
+; 1 "<command line>" 1
+; 1 "<built-in>" 2
+; 1 "crypto/aes/aes-masm-x86_64.S.tmp" 2
+; 1 "./crypto/x86_arch.h" 1
+; 16 "./crypto/x86_arch.h"
+; 40 "./crypto/x86_arch.h"
+; 3 "crypto/aes/aes-masm-x86_64.S.tmp" 2
+.text$ SEGMENT ALIGN(64) 'CODE'
+_x86_64_AES_encrypt PROC PRIVATE
+ xor eax,DWORD PTR[r15]
+ xor ebx,DWORD PTR[4+r15]
+ xor ecx,DWORD PTR[8+r15]
+ xor edx,DWORD PTR[12+r15]
+ mov r13d,DWORD PTR[240+r15]
+ sub r13d,1
+ jmp $L$enc_loop
+ movzx esi,al
+ movzx edi,bl
+ movzx ebp,cl
+ mov r10d,DWORD PTR[rsi*8+r14]
+ mov r11d,DWORD PTR[rdi*8+r14]
+ mov r12d,DWORD PTR[rbp*8+r14]
+ movzx esi,bh
+ movzx edi,ch
+ movzx ebp,dl
+ xor r10d,DWORD PTR[3+rsi*8+r14]
+ xor r11d,DWORD PTR[3+rdi*8+r14]
+ mov r8d,DWORD PTR[rbp*8+r14]
+ movzx esi,dh
+ shr ecx,16
+ movzx ebp,ah
+ xor r12d,DWORD PTR[3+rsi*8+r14]
+ shr edx,16
+ xor r8d,DWORD PTR[3+rbp*8+r14]
+ shr ebx,16
+ lea r15,QWORD PTR[16+r15]
+ shr eax,16
+ movzx esi,cl
+ movzx edi,dl
+ movzx ebp,al
+ xor r10d,DWORD PTR[2+rsi*8+r14]
+ xor r11d,DWORD PTR[2+rdi*8+r14]
+ xor r12d,DWORD PTR[2+rbp*8+r14]
+ movzx esi,dh
+ movzx edi,ah
+ movzx ebp,bl
+ xor r10d,DWORD PTR[1+rsi*8+r14]
+ xor r11d,DWORD PTR[1+rdi*8+r14]
+ xor r8d,DWORD PTR[2+rbp*8+r14]
+ mov edx,DWORD PTR[12+r15]
+ movzx edi,bh
+ movzx ebp,ch
+ mov eax,DWORD PTR[r15]
+ xor r12d,DWORD PTR[1+rdi*8+r14]
+ xor r8d,DWORD PTR[1+rbp*8+r14]
+ mov ebx,DWORD PTR[4+r15]
+ mov ecx,DWORD PTR[8+r15]
+ xor eax,r10d
+ xor ebx,r11d
+ xor ecx,r12d
+ xor edx,r8d
+ sub r13d,1
+ jnz $L$enc_loop
+ movzx esi,al
+ movzx edi,bl
+ movzx ebp,cl
+ movzx r10d,BYTE PTR[2+rsi*8+r14]
+ movzx r11d,BYTE PTR[2+rdi*8+r14]
+ movzx r12d,BYTE PTR[2+rbp*8+r14]
+ movzx esi,dl
+ movzx edi,bh
+ movzx ebp,ch
+ movzx r8d,BYTE PTR[2+rsi*8+r14]
+ mov edi,DWORD PTR[rdi*8+r14]
+ mov ebp,DWORD PTR[rbp*8+r14]
+ and edi,00000ff00h
+ and ebp,00000ff00h
+ xor r10d,edi
+ xor r11d,ebp
+ shr ecx,16
+ movzx esi,dh
+ movzx edi,ah
+ shr edx,16
+ mov esi,DWORD PTR[rsi*8+r14]
+ mov edi,DWORD PTR[rdi*8+r14]
+ and esi,00000ff00h
+ and edi,00000ff00h
+ shr ebx,16
+ xor r12d,esi
+ xor r8d,edi
+ shr eax,16
+ movzx esi,cl
+ movzx edi,dl
+ movzx ebp,al
+ mov esi,DWORD PTR[rsi*8+r14]
+ mov edi,DWORD PTR[rdi*8+r14]
+ mov ebp,DWORD PTR[rbp*8+r14]
+ and esi,000ff0000h
+ and edi,000ff0000h
+ and ebp,000ff0000h
+ xor r10d,esi
+ xor r11d,edi
+ xor r12d,ebp
+ movzx esi,bl
+ movzx edi,dh
+ movzx ebp,ah
+ mov esi,DWORD PTR[rsi*8+r14]
+ mov edi,DWORD PTR[2+rdi*8+r14]
+ mov ebp,DWORD PTR[2+rbp*8+r14]
+ and esi,000ff0000h
+ and edi,0ff000000h
+ and ebp,0ff000000h
+ xor r8d,esi
+ xor r10d,edi
+ xor r11d,ebp
+ movzx esi,bh
+ movzx edi,ch
+ mov edx,DWORD PTR[((16+12))+r15]
+ mov esi,DWORD PTR[2+rsi*8+r14]
+ mov edi,DWORD PTR[2+rdi*8+r14]
+ mov eax,DWORD PTR[((16+0))+r15]
+ and esi,0ff000000h
+ and edi,0ff000000h
+ xor r12d,esi
+ xor r8d,edi
+ mov ebx,DWORD PTR[((16+4))+r15]
+ mov ecx,DWORD PTR[((16+8))+r15]
+ xor eax,r10d
+ xor ebx,r11d
+ xor ecx,r12d
+ xor edx,r8d
+ DB 0F3h,0C3h ;repret
+_x86_64_AES_encrypt ENDP
+_x86_64_AES_encrypt_compact PROC PRIVATE
+ lea r8,QWORD PTR[128+r14]
+ mov edi,DWORD PTR[((0-128))+r8]
+ mov ebp,DWORD PTR[((32-128))+r8]
+ mov r10d,DWORD PTR[((64-128))+r8]
+ mov r11d,DWORD PTR[((96-128))+r8]
+ mov edi,DWORD PTR[((128-128))+r8]
+ mov ebp,DWORD PTR[((160-128))+r8]
+ mov r10d,DWORD PTR[((192-128))+r8]
+ mov r11d,DWORD PTR[((224-128))+r8]
+ jmp $L$enc_loop_compact
+ xor eax,DWORD PTR[r15]
+ xor ebx,DWORD PTR[4+r15]
+ xor ecx,DWORD PTR[8+r15]
+ xor edx,DWORD PTR[12+r15]
+ lea r15,QWORD PTR[16+r15]
+ movzx r10d,al
+ movzx r11d,bl
+ movzx r12d,cl
+ movzx r10d,BYTE PTR[r10*1+r14]
+ movzx r11d,BYTE PTR[r11*1+r14]
+ movzx r12d,BYTE PTR[r12*1+r14]
+ movzx r8d,dl
+ movzx esi,bh
+ movzx edi,ch
+ movzx r8d,BYTE PTR[r8*1+r14]
+ movzx r9d,BYTE PTR[rsi*1+r14]
+ movzx r13d,BYTE PTR[rdi*1+r14]
+ movzx ebp,dh
+ movzx esi,ah
+ shr ecx,16
+ movzx ebp,BYTE PTR[rbp*1+r14]
+ movzx esi,BYTE PTR[rsi*1+r14]
+ shr edx,16
+ movzx edi,cl
+ shl r9d,8
+ shl r13d,8
+ movzx edi,BYTE PTR[rdi*1+r14]
+ xor r10d,r9d
+ xor r11d,r13d
+ movzx r9d,dl
+ shr eax,16
+ shr ebx,16
+ movzx r13d,al
+ shl ebp,8
+ shl esi,8
+ movzx r9d,BYTE PTR[r9*1+r14]
+ movzx r13d,BYTE PTR[r13*1+r14]
+ xor r12d,ebp
+ xor r8d,esi
+ movzx ebp,bl
+ movzx esi,dh
+ shl edi,16
+ movzx ebp,BYTE PTR[rbp*1+r14]
+ movzx esi,BYTE PTR[rsi*1+r14]
+ xor r10d,edi
+ movzx edi,ah
+ shr ecx,8
+ shr ebx,8
+ movzx edi,BYTE PTR[rdi*1+r14]
+ movzx edx,BYTE PTR[rcx*1+r14]
+ movzx ecx,BYTE PTR[rbx*1+r14]
+ shl r9d,16
+ shl r13d,16
+ shl ebp,16
+ xor r11d,r9d
+ xor r12d,r13d
+ xor r8d,ebp
+ shl esi,24
+ shl edi,24
+ shl edx,24
+ xor r10d,esi
+ shl ecx,24
+ xor r11d,edi
+ mov eax,r10d
+ mov ebx,r11d
+ xor ecx,r12d
+ xor edx,r8d
+ cmp r15,QWORD PTR[16+rsp]
+ je $L$enc_compact_done
+ mov esi,eax
+ mov edi,ebx
+ and esi,080808080h
+ and edi,080808080h
+ mov r10d,esi
+ mov r11d,edi
+ shr r10d,7
+ lea r8d,DWORD PTR[rax*1+rax]
+ shr r11d,7
+ lea r9d,DWORD PTR[rbx*1+rbx]
+ sub esi,r10d
+ sub edi,r11d
+ and r8d,0fefefefeh
+ and r9d,0fefefefeh
+ and esi,01b1b1b1bh
+ and edi,01b1b1b1bh
+ mov r10d,eax
+ mov r11d,ebx
+ xor r8d,esi
+ xor r9d,edi
+ xor eax,r8d
+ xor ebx,r9d
+ mov esi,ecx
+ mov edi,edx
+ rol eax,24
+ rol ebx,24
+ and esi,080808080h
+ and edi,080808080h
+ xor eax,r8d
+ xor ebx,r9d
+ mov r12d,esi
+ mov ebp,edi
+ ror r10d,16
+ ror r11d,16
+ shr r12d,7
+ lea r8d,DWORD PTR[rcx*1+rcx]
+ xor eax,r10d
+ xor ebx,r11d
+ shr ebp,7
+ lea r9d,DWORD PTR[rdx*1+rdx]
+ ror r10d,8
+ ror r11d,8
+ sub esi,r12d
+ sub edi,ebp
+ xor eax,r10d
+ xor ebx,r11d
+ and r8d,0fefefefeh
+ and r9d,0fefefefeh
+ and esi,01b1b1b1bh
+ and edi,01b1b1b1bh
+ mov r12d,ecx
+ mov ebp,edx
+ xor r8d,esi
+ xor r9d,edi
+ xor ecx,r8d
+ xor edx,r9d
+ rol ecx,24
+ rol edx,24
+ xor ecx,r8d
+ xor edx,r9d
+ mov esi,DWORD PTR[r14]
+ ror r12d,16
+ ror ebp,16
+ mov edi,DWORD PTR[64+r14]
+ xor ecx,r12d
+ xor edx,ebp
+ mov r8d,DWORD PTR[128+r14]
+ ror r12d,8
+ ror ebp,8
+ mov r9d,DWORD PTR[192+r14]
+ xor ecx,r12d
+ xor edx,ebp
+ jmp $L$enc_loop_compact
+ xor eax,DWORD PTR[r15]
+ xor ebx,DWORD PTR[4+r15]
+ xor ecx,DWORD PTR[8+r15]
+ xor edx,DWORD PTR[12+r15]
+ DB 0F3h,0C3h ;repret
+_x86_64_AES_encrypt_compact ENDP
+PUBLIC AES_encrypt
+PUBLIC asm_AES_encrypt
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ mov r10,rsp
+ lea rcx,QWORD PTR[((-63))+rdx]
+ and rsp,-64
+ sub rcx,rsp
+ neg rcx
+ and rcx,03c0h
+ sub rsp,rcx
+ sub rsp,32
+ mov QWORD PTR[16+rsp],rsi
+ mov QWORD PTR[24+rsp],r10
+ mov r15,rdx
+ mov r13d,DWORD PTR[240+r15]
+ mov eax,DWORD PTR[rdi]
+ mov ebx,DWORD PTR[4+rdi]
+ mov ecx,DWORD PTR[8+rdi]
+ mov edx,DWORD PTR[12+rdi]
+ shl r13d,4
+ lea rbp,QWORD PTR[r13*1+r15]
+ mov QWORD PTR[rsp],r15
+ mov QWORD PTR[8+rsp],rbp
+ lea r14,QWORD PTR[(($L$AES_Te+2048))]
+ lea rbp,QWORD PTR[768+rsp]
+ sub rbp,r14
+ and rbp,0300h
+ lea r14,QWORD PTR[rbp*1+r14]
+ call _x86_64_AES_encrypt_compact
+ mov r9,QWORD PTR[16+rsp]
+ mov rsi,QWORD PTR[24+rsp]
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+ mov r15,QWORD PTR[rsi]
+ mov r14,QWORD PTR[8+rsi]
+ mov r13,QWORD PTR[16+rsi]
+ mov r12,QWORD PTR[24+rsi]
+ mov rbp,QWORD PTR[32+rsi]
+ mov rbx,QWORD PTR[40+rsi]
+ lea rsp,QWORD PTR[48+rsi]
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+AES_encrypt ENDP
+_x86_64_AES_decrypt PROC PRIVATE
+ xor eax,DWORD PTR[r15]
+ xor ebx,DWORD PTR[4+r15]
+ xor ecx,DWORD PTR[8+r15]
+ xor edx,DWORD PTR[12+r15]
+ mov r13d,DWORD PTR[240+r15]
+ sub r13d,1
+ jmp $L$dec_loop
+ movzx esi,al
+ movzx edi,bl
+ movzx ebp,cl
+ mov r10d,DWORD PTR[rsi*8+r14]
+ mov r11d,DWORD PTR[rdi*8+r14]
+ mov r12d,DWORD PTR[rbp*8+r14]
+ movzx esi,dh
+ movzx edi,ah
+ movzx ebp,dl
+ xor r10d,DWORD PTR[3+rsi*8+r14]
+ xor r11d,DWORD PTR[3+rdi*8+r14]
+ mov r8d,DWORD PTR[rbp*8+r14]
+ movzx esi,bh
+ shr eax,16
+ movzx ebp,ch
+ xor r12d,DWORD PTR[3+rsi*8+r14]
+ shr edx,16
+ xor r8d,DWORD PTR[3+rbp*8+r14]
+ shr ebx,16
+ lea r15,QWORD PTR[16+r15]
+ shr ecx,16
+ movzx esi,cl
+ movzx edi,dl
+ movzx ebp,al
+ xor r10d,DWORD PTR[2+rsi*8+r14]
+ xor r11d,DWORD PTR[2+rdi*8+r14]
+ xor r12d,DWORD PTR[2+rbp*8+r14]
+ movzx esi,bh
+ movzx edi,ch
+ movzx ebp,bl
+ xor r10d,DWORD PTR[1+rsi*8+r14]
+ xor r11d,DWORD PTR[1+rdi*8+r14]
+ xor r8d,DWORD PTR[2+rbp*8+r14]
+ movzx esi,dh
+ mov edx,DWORD PTR[12+r15]
+ movzx ebp,ah
+ xor r12d,DWORD PTR[1+rsi*8+r14]
+ mov eax,DWORD PTR[r15]
+ xor r8d,DWORD PTR[1+rbp*8+r14]
+ xor eax,r10d
+ mov ebx,DWORD PTR[4+r15]
+ mov ecx,DWORD PTR[8+r15]
+ xor ecx,r12d
+ xor ebx,r11d
+ xor edx,r8d
+ sub r13d,1
+ jnz $L$dec_loop
+ lea r14,QWORD PTR[2048+r14]
+ movzx esi,al
+ movzx edi,bl
+ movzx ebp,cl
+ movzx r10d,BYTE PTR[rsi*1+r14]
+ movzx r11d,BYTE PTR[rdi*1+r14]
+ movzx r12d,BYTE PTR[rbp*1+r14]
+ movzx esi,dl
+ movzx edi,dh
+ movzx ebp,ah
+ movzx r8d,BYTE PTR[rsi*1+r14]
+ movzx edi,BYTE PTR[rdi*1+r14]
+ movzx ebp,BYTE PTR[rbp*1+r14]
+ shl edi,8
+ shl ebp,8
+ xor r10d,edi
+ xor r11d,ebp
+ shr edx,16
+ movzx esi,bh
+ movzx edi,ch
+ shr eax,16
+ movzx esi,BYTE PTR[rsi*1+r14]
+ movzx edi,BYTE PTR[rdi*1+r14]
+ shl esi,8
+ shl edi,8
+ shr ebx,16
+ xor r12d,esi
+ xor r8d,edi
+ shr ecx,16
+ movzx esi,cl
+ movzx edi,dl
+ movzx ebp,al
+ movzx esi,BYTE PTR[rsi*1+r14]
+ movzx edi,BYTE PTR[rdi*1+r14]
+ movzx ebp,BYTE PTR[rbp*1+r14]
+ shl esi,16
+ shl edi,16
+ shl ebp,16
+ xor r10d,esi
+ xor r11d,edi
+ xor r12d,ebp
+ movzx esi,bl
+ movzx edi,bh
+ movzx ebp,ch
+ movzx esi,BYTE PTR[rsi*1+r14]
+ movzx edi,BYTE PTR[rdi*1+r14]
+ movzx ebp,BYTE PTR[rbp*1+r14]
+ shl esi,16
+ shl edi,24
+ shl ebp,24
+ xor r8d,esi
+ xor r10d,edi
+ xor r11d,ebp
+ movzx esi,dh
+ movzx edi,ah
+ mov edx,DWORD PTR[((16+12))+r15]
+ movzx esi,BYTE PTR[rsi*1+r14]
+ movzx edi,BYTE PTR[rdi*1+r14]
+ mov eax,DWORD PTR[((16+0))+r15]
+ shl esi,24
+ shl edi,24
+ xor r12d,esi
+ xor r8d,edi
+ mov ebx,DWORD PTR[((16+4))+r15]
+ mov ecx,DWORD PTR[((16+8))+r15]
+ lea r14,QWORD PTR[((-2048))+r14]
+ xor eax,r10d
+ xor ebx,r11d
+ xor ecx,r12d
+ xor edx,r8d
+ DB 0F3h,0C3h ;repret
+_x86_64_AES_decrypt ENDP
+_x86_64_AES_decrypt_compact PROC PRIVATE
+ lea r8,QWORD PTR[128+r14]
+ mov edi,DWORD PTR[((0-128))+r8]
+ mov ebp,DWORD PTR[((32-128))+r8]
+ mov r10d,DWORD PTR[((64-128))+r8]
+ mov r11d,DWORD PTR[((96-128))+r8]
+ mov edi,DWORD PTR[((128-128))+r8]
+ mov ebp,DWORD PTR[((160-128))+r8]
+ mov r10d,DWORD PTR[((192-128))+r8]
+ mov r11d,DWORD PTR[((224-128))+r8]
+ jmp $L$dec_loop_compact
+ xor eax,DWORD PTR[r15]
+ xor ebx,DWORD PTR[4+r15]
+ xor ecx,DWORD PTR[8+r15]
+ xor edx,DWORD PTR[12+r15]
+ lea r15,QWORD PTR[16+r15]
+ movzx r10d,al
+ movzx r11d,bl
+ movzx r12d,cl
+ movzx r10d,BYTE PTR[r10*1+r14]
+ movzx r11d,BYTE PTR[r11*1+r14]
+ movzx r12d,BYTE PTR[r12*1+r14]
+ movzx r8d,dl
+ movzx esi,dh
+ movzx edi,ah
+ movzx r8d,BYTE PTR[r8*1+r14]
+ movzx r9d,BYTE PTR[rsi*1+r14]
+ movzx r13d,BYTE PTR[rdi*1+r14]
+ movzx ebp,bh
+ movzx esi,ch
+ shr ecx,16
+ movzx ebp,BYTE PTR[rbp*1+r14]
+ movzx esi,BYTE PTR[rsi*1+r14]
+ shr edx,16
+ movzx edi,cl
+ shl r9d,8
+ shl r13d,8
+ movzx edi,BYTE PTR[rdi*1+r14]
+ xor r10d,r9d
+ xor r11d,r13d
+ movzx r9d,dl
+ shr eax,16
+ shr ebx,16
+ movzx r13d,al
+ shl ebp,8
+ shl esi,8
+ movzx r9d,BYTE PTR[r9*1+r14]
+ movzx r13d,BYTE PTR[r13*1+r14]
+ xor r12d,ebp
+ xor r8d,esi
+ movzx ebp,bl
+ movzx esi,bh
+ shl edi,16
+ movzx ebp,BYTE PTR[rbp*1+r14]
+ movzx esi,BYTE PTR[rsi*1+r14]
+ xor r10d,edi
+ movzx edi,ch
+ shl r9d,16
+ shl r13d,16
+ movzx ebx,BYTE PTR[rdi*1+r14]
+ xor r11d,r9d
+ xor r12d,r13d
+ movzx edi,dh
+ shr eax,8
+ shl ebp,16
+ movzx ecx,BYTE PTR[rdi*1+r14]
+ movzx edx,BYTE PTR[rax*1+r14]
+ xor r8d,ebp
+ shl esi,24
+ shl ebx,24
+ shl ecx,24
+ xor r10d,esi
+ shl edx,24
+ xor ebx,r11d
+ mov eax,r10d
+ xor ecx,r12d
+ xor edx,r8d
+ cmp r15,QWORD PTR[16+rsp]
+ je $L$dec_compact_done
+ mov rsi,QWORD PTR[((256+0))+r14]
+ shl rbx,32
+ shl rdx,32
+ mov rdi,QWORD PTR[((256+8))+r14]
+ or rax,rbx
+ or rcx,rdx
+ mov rbp,QWORD PTR[((256+16))+r14]
+ mov rbx,rax
+ mov rdx,rcx
+ and rbx,rsi
+ and rdx,rsi
+ mov r9,rbx
+ mov r12,rdx
+ shr r9,7
+ lea r8,QWORD PTR[rax*1+rax]
+ shr r12,7
+ lea r11,QWORD PTR[rcx*1+rcx]
+ sub rbx,r9
+ sub rdx,r12
+ and r8,rdi
+ and r11,rdi
+ and rbx,rbp
+ and rdx,rbp
+ xor rbx,r8
+ xor rdx,r11
+ mov r8,rbx
+ mov r11,rdx
+ and rbx,rsi
+ and rdx,rsi
+ mov r10,rbx
+ mov r13,rdx
+ shr r10,7
+ lea r9,QWORD PTR[r8*1+r8]
+ shr r13,7
+ lea r12,QWORD PTR[r11*1+r11]
+ sub rbx,r10
+ sub rdx,r13
+ and r9,rdi
+ and r12,rdi
+ and rbx,rbp
+ and rdx,rbp
+ xor rbx,r9
+ xor rdx,r12
+ mov r9,rbx
+ mov r12,rdx
+ and rbx,rsi
+ and rdx,rsi
+ mov r10,rbx
+ mov r13,rdx
+ shr r10,7
+ xor r8,rax
+ shr r13,7
+ xor r11,rcx
+ sub rbx,r10
+ sub rdx,r13
+ lea r10,QWORD PTR[r9*1+r9]
+ lea r13,QWORD PTR[r12*1+r12]
+ xor r9,rax
+ xor r12,rcx
+ and r10,rdi
+ and r13,rdi
+ and rbx,rbp
+ and rdx,rbp
+ xor r10,rbx
+ xor r13,rdx
+ xor rax,r10
+ xor rcx,r13
+ xor r8,r10
+ xor r11,r13
+ mov rbx,rax
+ mov rdx,rcx
+ xor r9,r10
+ xor r12,r13
+ shr rbx,32
+ shr rdx,32
+ xor r10,r8
+ xor r13,r11
+ rol eax,8
+ rol ecx,8
+ xor r10,r9
+ xor r13,r12
+ rol ebx,8
+ rol edx,8
+ xor eax,r10d
+ xor ecx,r13d
+ shr r10,32
+ shr r13,32
+ xor ebx,r10d
+ xor edx,r13d
+ mov r10,r8
+ mov r13,r11
+ shr r10,32
+ shr r13,32
+ rol r8d,24
+ rol r11d,24
+ rol r10d,24
+ rol r13d,24
+ xor eax,r8d
+ xor ecx,r11d
+ mov r8,r9
+ mov r11,r12
+ xor ebx,r10d
+ xor edx,r13d
+ mov rsi,QWORD PTR[r14]
+ shr r8,32
+ shr r11,32
+ mov rdi,QWORD PTR[64+r14]
+ rol r9d,16
+ rol r12d,16
+ mov rbp,QWORD PTR[128+r14]
+ rol r8d,16
+ rol r11d,16
+ mov r10,QWORD PTR[192+r14]
+ xor eax,r9d
+ xor ecx,r12d
+ mov r13,QWORD PTR[256+r14]
+ xor ebx,r8d
+ xor edx,r11d
+ jmp $L$dec_loop_compact
+ xor eax,DWORD PTR[r15]
+ xor ebx,DWORD PTR[4+r15]
+ xor ecx,DWORD PTR[8+r15]
+ xor edx,DWORD PTR[12+r15]
+ DB 0F3h,0C3h ;repret
+_x86_64_AES_decrypt_compact ENDP
+PUBLIC AES_decrypt
+PUBLIC asm_AES_decrypt
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ mov r10,rsp
+ lea rcx,QWORD PTR[((-63))+rdx]
+ and rsp,-64
+ sub rcx,rsp
+ neg rcx
+ and rcx,03c0h
+ sub rsp,rcx
+ sub rsp,32
+ mov QWORD PTR[16+rsp],rsi
+ mov QWORD PTR[24+rsp],r10
+ mov r15,rdx
+ mov r13d,DWORD PTR[240+r15]
+ mov eax,DWORD PTR[rdi]
+ mov ebx,DWORD PTR[4+rdi]
+ mov ecx,DWORD PTR[8+rdi]
+ mov edx,DWORD PTR[12+rdi]
+ shl r13d,4
+ lea rbp,QWORD PTR[r13*1+r15]
+ mov QWORD PTR[rsp],r15
+ mov QWORD PTR[8+rsp],rbp
+ lea r14,QWORD PTR[(($L$AES_Td+2048))]
+ lea rbp,QWORD PTR[768+rsp]
+ sub rbp,r14
+ and rbp,0300h
+ lea r14,QWORD PTR[rbp*1+r14]
+ shr rbp,3
+ add r14,rbp
+ call _x86_64_AES_decrypt_compact
+ mov r9,QWORD PTR[16+rsp]
+ mov rsi,QWORD PTR[24+rsp]
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+ mov r15,QWORD PTR[rsi]
+ mov r14,QWORD PTR[8+rsi]
+ mov r13,QWORD PTR[16+rsi]
+ mov r12,QWORD PTR[24+rsi]
+ mov rbp,QWORD PTR[32+rsi]
+ mov rbx,QWORD PTR[40+rsi]
+ lea rsp,QWORD PTR[48+rsi]
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+AES_decrypt ENDP
+PUBLIC AES_set_encrypt_key
+AES_set_encrypt_key PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ sub rsp,8
+ call _x86_64_AES_set_encrypt_key
+ mov r15,QWORD PTR[8+rsp]
+ mov r14,QWORD PTR[16+rsp]
+ mov r13,QWORD PTR[24+rsp]
+ mov r12,QWORD PTR[32+rsp]
+ mov rbp,QWORD PTR[40+rsp]
+ mov rbx,QWORD PTR[48+rsp]
+ add rsp,56
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+AES_set_encrypt_key ENDP
+_x86_64_AES_set_encrypt_key PROC PRIVATE
+ mov ecx,esi
+ mov rsi,rdi
+ mov rdi,rdx
+ test rsi,-1
+ jz $L$badpointer
+ test rdi,-1
+ jz $L$badpointer
+ lea rbp,QWORD PTR[$L$AES_Te]
+ lea rbp,QWORD PTR[((2048+128))+rbp]
+ mov eax,DWORD PTR[((0-128))+rbp]
+ mov ebx,DWORD PTR[((32-128))+rbp]
+ mov r8d,DWORD PTR[((64-128))+rbp]
+ mov edx,DWORD PTR[((96-128))+rbp]
+ mov eax,DWORD PTR[((128-128))+rbp]
+ mov ebx,DWORD PTR[((160-128))+rbp]
+ mov r8d,DWORD PTR[((192-128))+rbp]
+ mov edx,DWORD PTR[((224-128))+rbp]
+ cmp ecx,128
+ je $L$10rounds
+ cmp ecx,192
+ je $L$12rounds
+ cmp ecx,256
+ je $L$14rounds
+ mov rax,-2
+ jmp $L$exit
+ mov rax,QWORD PTR[rsi]
+ mov rdx,QWORD PTR[8+rsi]
+ mov QWORD PTR[rdi],rax
+ mov QWORD PTR[8+rdi],rdx
+ shr rdx,32
+ xor ecx,ecx
+ jmp $L$10shortcut
+ mov eax,DWORD PTR[rdi]
+ mov edx,DWORD PTR[12+rdi]
+ movzx esi,dl
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,24
+ xor eax,ebx
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shr edx,16
+ movzx esi,dl
+ xor eax,ebx
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,8
+ xor eax,ebx
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shl ebx,16
+ xor eax,ebx
+ xor eax,DWORD PTR[((1024-128))+rcx*4+rbp]
+ mov DWORD PTR[16+rdi],eax
+ xor eax,DWORD PTR[4+rdi]
+ mov DWORD PTR[20+rdi],eax
+ xor eax,DWORD PTR[8+rdi]
+ mov DWORD PTR[24+rdi],eax
+ xor eax,DWORD PTR[12+rdi]
+ mov DWORD PTR[28+rdi],eax
+ add ecx,1
+ lea rdi,QWORD PTR[16+rdi]
+ cmp ecx,10
+ jl $L$10loop
+ mov DWORD PTR[80+rdi],10
+ xor rax,rax
+ jmp $L$exit
+ mov rax,QWORD PTR[rsi]
+ mov rbx,QWORD PTR[8+rsi]
+ mov rdx,QWORD PTR[16+rsi]
+ mov QWORD PTR[rdi],rax
+ mov QWORD PTR[8+rdi],rbx
+ mov QWORD PTR[16+rdi],rdx
+ shr rdx,32
+ xor ecx,ecx
+ jmp $L$12shortcut
+ mov eax,DWORD PTR[rdi]
+ mov edx,DWORD PTR[20+rdi]
+ movzx esi,dl
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,24
+ xor eax,ebx
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shr edx,16
+ movzx esi,dl
+ xor eax,ebx
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,8
+ xor eax,ebx
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shl ebx,16
+ xor eax,ebx
+ xor eax,DWORD PTR[((1024-128))+rcx*4+rbp]
+ mov DWORD PTR[24+rdi],eax
+ xor eax,DWORD PTR[4+rdi]
+ mov DWORD PTR[28+rdi],eax
+ xor eax,DWORD PTR[8+rdi]
+ mov DWORD PTR[32+rdi],eax
+ xor eax,DWORD PTR[12+rdi]
+ mov DWORD PTR[36+rdi],eax
+ cmp ecx,7
+ je $L$12break
+ add ecx,1
+ xor eax,DWORD PTR[16+rdi]
+ mov DWORD PTR[40+rdi],eax
+ xor eax,DWORD PTR[20+rdi]
+ mov DWORD PTR[44+rdi],eax
+ lea rdi,QWORD PTR[24+rdi]
+ jmp $L$12loop
+ mov DWORD PTR[72+rdi],12
+ xor rax,rax
+ jmp $L$exit
+ mov rax,QWORD PTR[rsi]
+ mov rbx,QWORD PTR[8+rsi]
+ mov rcx,QWORD PTR[16+rsi]
+ mov rdx,QWORD PTR[24+rsi]
+ mov QWORD PTR[rdi],rax
+ mov QWORD PTR[8+rdi],rbx
+ mov QWORD PTR[16+rdi],rcx
+ mov QWORD PTR[24+rdi],rdx
+ shr rdx,32
+ xor ecx,ecx
+ jmp $L$14shortcut
+ mov eax,DWORD PTR[rdi]
+ mov edx,DWORD PTR[28+rdi]
+ movzx esi,dl
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,24
+ xor eax,ebx
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shr edx,16
+ movzx esi,dl
+ xor eax,ebx
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,8
+ xor eax,ebx
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shl ebx,16
+ xor eax,ebx
+ xor eax,DWORD PTR[((1024-128))+rcx*4+rbp]
+ mov DWORD PTR[32+rdi],eax
+ xor eax,DWORD PTR[4+rdi]
+ mov DWORD PTR[36+rdi],eax
+ xor eax,DWORD PTR[8+rdi]
+ mov DWORD PTR[40+rdi],eax
+ xor eax,DWORD PTR[12+rdi]
+ mov DWORD PTR[44+rdi],eax
+ cmp ecx,6
+ je $L$14break
+ add ecx,1
+ mov edx,eax
+ mov eax,DWORD PTR[16+rdi]
+ movzx esi,dl
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ xor eax,ebx
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shr edx,16
+ shl ebx,8
+ movzx esi,dl
+ xor eax,ebx
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ movzx esi,dh
+ shl ebx,16
+ xor eax,ebx
+ movzx ebx,BYTE PTR[((-128))+rsi*1+rbp]
+ shl ebx,24
+ xor eax,ebx
+ mov DWORD PTR[48+rdi],eax
+ xor eax,DWORD PTR[20+rdi]
+ mov DWORD PTR[52+rdi],eax
+ xor eax,DWORD PTR[24+rdi]
+ mov DWORD PTR[56+rdi],eax
+ xor eax,DWORD PTR[28+rdi]
+ mov DWORD PTR[60+rdi],eax
+ lea rdi,QWORD PTR[32+rdi]
+ jmp $L$14loop
+ mov DWORD PTR[48+rdi],14
+ xor rax,rax
+ jmp $L$exit
+ mov rax,-1
+ DB 0F3h,0C3h ;repret
+_x86_64_AES_set_encrypt_key ENDP
+PUBLIC AES_set_decrypt_key
+AES_set_decrypt_key PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ push rdx
+ call _x86_64_AES_set_encrypt_key
+ mov r8,QWORD PTR[rsp]
+ cmp eax,0
+ jne $L$abort
+ mov r14d,DWORD PTR[240+r8]
+ xor rdi,rdi
+ lea rcx,QWORD PTR[r14*4+rdi]
+ mov rsi,r8
+ lea rdi,QWORD PTR[rcx*4+r8]
+ mov rax,QWORD PTR[rsi]
+ mov rbx,QWORD PTR[8+rsi]
+ mov rcx,QWORD PTR[rdi]
+ mov rdx,QWORD PTR[8+rdi]
+ mov QWORD PTR[rdi],rax
+ mov QWORD PTR[8+rdi],rbx
+ mov QWORD PTR[rsi],rcx
+ mov QWORD PTR[8+rsi],rdx
+ lea rsi,QWORD PTR[16+rsi]
+ lea rdi,QWORD PTR[((-16))+rdi]
+ cmp rdi,rsi
+ jne $L$invert
+ lea rax,QWORD PTR[(($L$AES_Te+2048+1024))]
+ mov rsi,QWORD PTR[40+rax]
+ mov rdi,QWORD PTR[48+rax]
+ mov rbp,QWORD PTR[56+rax]
+ mov r15,r8
+ sub r14d,1
+ lea r15,QWORD PTR[16+r15]
+ mov rax,QWORD PTR[r15]
+ mov rcx,QWORD PTR[8+r15]
+ mov rbx,rax
+ mov rdx,rcx
+ and rbx,rsi
+ and rdx,rsi
+ mov r9,rbx
+ mov r12,rdx
+ shr r9,7
+ lea r8,QWORD PTR[rax*1+rax]
+ shr r12,7
+ lea r11,QWORD PTR[rcx*1+rcx]
+ sub rbx,r9
+ sub rdx,r12
+ and r8,rdi
+ and r11,rdi
+ and rbx,rbp
+ and rdx,rbp
+ xor rbx,r8
+ xor rdx,r11
+ mov r8,rbx
+ mov r11,rdx
+ and rbx,rsi
+ and rdx,rsi
+ mov r10,rbx
+ mov r13,rdx
+ shr r10,7
+ lea r9,QWORD PTR[r8*1+r8]
+ shr r13,7
+ lea r12,QWORD PTR[r11*1+r11]
+ sub rbx,r10
+ sub rdx,r13
+ and r9,rdi
+ and r12,rdi
+ and rbx,rbp
+ and rdx,rbp
+ xor rbx,r9
+ xor rdx,r12
+ mov r9,rbx
+ mov r12,rdx
+ and rbx,rsi
+ and rdx,rsi
+ mov r10,rbx
+ mov r13,rdx
+ shr r10,7
+ xor r8,rax
+ shr r13,7
+ xor r11,rcx
+ sub rbx,r10
+ sub rdx,r13
+ lea r10,QWORD PTR[r9*1+r9]
+ lea r13,QWORD PTR[r12*1+r12]
+ xor r9,rax
+ xor r12,rcx
+ and r10,rdi
+ and r13,rdi
+ and rbx,rbp
+ and rdx,rbp
+ xor r10,rbx
+ xor r13,rdx
+ xor rax,r10
+ xor rcx,r13
+ xor r8,r10
+ xor r11,r13
+ mov rbx,rax
+ mov rdx,rcx
+ xor r9,r10
+ xor r12,r13
+ shr rbx,32
+ shr rdx,32
+ xor r10,r8
+ xor r13,r11
+ rol eax,8
+ rol ecx,8
+ xor r10,r9
+ xor r13,r12
+ rol ebx,8
+ rol edx,8
+ xor eax,r10d
+ xor ecx,r13d
+ shr r10,32
+ shr r13,32
+ xor ebx,r10d
+ xor edx,r13d
+ mov r10,r8
+ mov r13,r11
+ shr r10,32
+ shr r13,32
+ rol r8d,24
+ rol r11d,24
+ rol r10d,24
+ rol r13d,24
+ xor eax,r8d
+ xor ecx,r11d
+ mov r8,r9
+ mov r11,r12
+ xor ebx,r10d
+ xor edx,r13d
+ shr r8,32
+ shr r11,32
+ rol r9d,16
+ rol r12d,16
+ rol r8d,16
+ rol r11d,16
+ xor eax,r9d
+ xor ecx,r12d
+ xor ebx,r8d
+ xor edx,r11d
+ mov DWORD PTR[r15],eax
+ mov DWORD PTR[4+r15],ebx
+ mov DWORD PTR[8+r15],ecx
+ mov DWORD PTR[12+r15],edx
+ sub r14d,1
+ jnz $L$permute
+ xor rax,rax
+ mov r15,QWORD PTR[8+rsp]
+ mov r14,QWORD PTR[16+rsp]
+ mov r13,QWORD PTR[24+rsp]
+ mov r12,QWORD PTR[32+rsp]
+ mov rbp,QWORD PTR[40+rsp]
+ mov rbx,QWORD PTR[48+rsp]
+ add rsp,56
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+AES_set_decrypt_key ENDP
+PUBLIC AES_cbc_encrypt
+PUBLIC asm_AES_cbc_encrypt
+AES_cbc_encrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+ cmp rdx,0
+ je $L$cbc_epilogue
+ pushfq
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ cld
+ mov r9d,r9d
+ lea r14,QWORD PTR[$L$AES_Te]
+ cmp r9,0
+ jne $L$cbc_picked_te
+ lea r14,QWORD PTR[$L$AES_Td]
+ mov r10d,DWORD PTR[OPENSSL_ia32cap_P]
+ cmp rdx,512
+ jb $L$cbc_slow_prologue
+ test rdx,15
+ jnz $L$cbc_slow_prologue
+ bt r10d,28
+ jc $L$cbc_slow_prologue
+ lea r15,QWORD PTR[((-88-248))+rsp]
+ and r15,-64
+ mov r10,r14
+ lea r11,QWORD PTR[2304+r14]
+ mov r12,r15
+ and r10,0FFFh
+ and r11,0FFFh
+ and r12,0FFFh
+ cmp r12,r11
+ jb $L$cbc_te_break_out
+ sub r12,r11
+ sub r15,r12
+ jmp $L$cbc_te_ok
+ sub r12,r10
+ and r12,0FFFh
+ add r12,320
+ sub r15,r12
+ xchg r15,rsp
+ mov QWORD PTR[16+rsp],r15
+ mov QWORD PTR[24+rsp],rdi
+ mov QWORD PTR[32+rsp],rsi
+ mov QWORD PTR[40+rsp],rdx
+ mov QWORD PTR[48+rsp],rcx
+ mov QWORD PTR[56+rsp],r8
+ mov DWORD PTR[((80+240))+rsp],0
+ mov rbp,r8
+ mov rbx,r9
+ mov r9,rsi
+ mov r8,rdi
+ mov r15,rcx
+ mov eax,DWORD PTR[240+r15]
+ mov r10,r15
+ sub r10,r14
+ and r10,0fffh
+ cmp r10,2304
+ jb $L$cbc_do_ecopy
+ cmp r10,4096-248
+ jb $L$cbc_skip_ecopy
+ mov rsi,r15
+ lea rdi,QWORD PTR[80+rsp]
+ lea r15,QWORD PTR[80+rsp]
+ mov ecx,240/8
+ DD 090A548F3h
+ mov DWORD PTR[rdi],eax
+ mov QWORD PTR[rsp],r15
+ mov ecx,18
+ mov r10,QWORD PTR[r14]
+ mov r11,QWORD PTR[32+r14]
+ mov r12,QWORD PTR[64+r14]
+ mov r13,QWORD PTR[96+r14]
+ lea r14,QWORD PTR[128+r14]
+ sub ecx,1
+ jnz $L$cbc_prefetch_te
+ lea r14,QWORD PTR[((-2304))+r14]
+ cmp rbx,0
+ mov eax,DWORD PTR[rbp]
+ mov ebx,DWORD PTR[4+rbp]
+ mov ecx,DWORD PTR[8+rbp]
+ mov edx,DWORD PTR[12+rbp]
+ xor eax,DWORD PTR[r8]
+ xor ebx,DWORD PTR[4+r8]
+ xor ecx,DWORD PTR[8+r8]
+ xor edx,DWORD PTR[12+r8]
+ mov r15,QWORD PTR[rsp]
+ mov QWORD PTR[24+rsp],r8
+ call _x86_64_AES_encrypt
+ mov r8,QWORD PTR[24+rsp]
+ mov r10,QWORD PTR[40+rsp]
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+ lea r8,QWORD PTR[16+r8]
+ lea r9,QWORD PTR[16+r9]
+ sub r10,16
+ test r10,-16
+ mov QWORD PTR[40+rsp],r10
+ jnz $L$cbc_fast_enc_loop
+ mov rbp,QWORD PTR[56+rsp]
+ mov DWORD PTR[rbp],eax
+ mov DWORD PTR[4+rbp],ebx
+ mov DWORD PTR[8+rbp],ecx
+ mov DWORD PTR[12+rbp],edx
+ jmp $L$cbc_fast_cleanup
+ cmp r9,r8
+ je $L$cbc_fast_dec_in_place
+ mov QWORD PTR[64+rsp],rbp
+ mov eax,DWORD PTR[r8]
+ mov ebx,DWORD PTR[4+r8]
+ mov ecx,DWORD PTR[8+r8]
+ mov edx,DWORD PTR[12+r8]
+ mov r15,QWORD PTR[rsp]
+ mov QWORD PTR[24+rsp],r8
+ call _x86_64_AES_decrypt
+ mov rbp,QWORD PTR[64+rsp]
+ mov r8,QWORD PTR[24+rsp]
+ mov r10,QWORD PTR[40+rsp]
+ xor eax,DWORD PTR[rbp]
+ xor ebx,DWORD PTR[4+rbp]
+ xor ecx,DWORD PTR[8+rbp]
+ xor edx,DWORD PTR[12+rbp]
+ mov rbp,r8
+ sub r10,16
+ mov QWORD PTR[40+rsp],r10
+ mov QWORD PTR[64+rsp],rbp
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+ lea r8,QWORD PTR[16+r8]
+ lea r9,QWORD PTR[16+r9]
+ jnz $L$cbc_fast_dec_loop
+ mov r12,QWORD PTR[56+rsp]
+ mov r10,QWORD PTR[rbp]
+ mov r11,QWORD PTR[8+rbp]
+ mov QWORD PTR[r12],r10
+ mov QWORD PTR[8+r12],r11
+ jmp $L$cbc_fast_cleanup
+ mov r10,QWORD PTR[rbp]
+ mov r11,QWORD PTR[8+rbp]
+ mov QWORD PTR[((0+64))+rsp],r10
+ mov QWORD PTR[((8+64))+rsp],r11
+ mov eax,DWORD PTR[r8]
+ mov ebx,DWORD PTR[4+r8]
+ mov ecx,DWORD PTR[8+r8]
+ mov edx,DWORD PTR[12+r8]
+ mov r15,QWORD PTR[rsp]
+ mov QWORD PTR[24+rsp],r8
+ call _x86_64_AES_decrypt
+ mov r8,QWORD PTR[24+rsp]
+ mov r10,QWORD PTR[40+rsp]
+ xor eax,DWORD PTR[((0+64))+rsp]
+ xor ebx,DWORD PTR[((4+64))+rsp]
+ xor ecx,DWORD PTR[((8+64))+rsp]
+ xor edx,DWORD PTR[((12+64))+rsp]
+ mov r11,QWORD PTR[r8]
+ mov r12,QWORD PTR[8+r8]
+ sub r10,16
+ jz $L$cbc_fast_dec_in_place_done
+ mov QWORD PTR[((0+64))+rsp],r11
+ mov QWORD PTR[((8+64))+rsp],r12
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+ lea r8,QWORD PTR[16+r8]
+ lea r9,QWORD PTR[16+r9]
+ mov QWORD PTR[40+rsp],r10
+ jmp $L$cbc_fast_dec_in_place_loop
+ mov rdi,QWORD PTR[56+rsp]
+ mov QWORD PTR[rdi],r11
+ mov QWORD PTR[8+rdi],r12
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+ cmp DWORD PTR[((80+240))+rsp],0
+ lea rdi,QWORD PTR[80+rsp]
+ je $L$cbc_exit
+ mov ecx,240/8
+ xor rax,rax
+ DD 090AB48F3h
+ jmp $L$cbc_exit
+ lea rbp,QWORD PTR[((-88))+rsp]
+ and rbp,-64
+ lea r10,QWORD PTR[((-88-63))+rcx]
+ sub r10,rbp
+ neg r10
+ and r10,03c0h
+ sub rbp,r10
+ xchg rbp,rsp
+ mov QWORD PTR[16+rsp],rbp
+ mov QWORD PTR[56+rsp],r8
+ mov rbp,r8
+ mov rbx,r9
+ mov r9,rsi
+ mov r8,rdi
+ mov r15,rcx
+ mov r10,rdx
+ mov eax,DWORD PTR[240+r15]
+ mov QWORD PTR[rsp],r15
+ shl eax,4
+ lea rax,QWORD PTR[rax*1+r15]
+ mov QWORD PTR[8+rsp],rax
+ lea r14,QWORD PTR[2048+r14]
+ lea rax,QWORD PTR[((768-8))+rsp]
+ sub rax,r14
+ and rax,0300h
+ lea r14,QWORD PTR[rax*1+r14]
+ cmp rbx,0
+ test r10,-16
+ mov eax,DWORD PTR[rbp]
+ mov ebx,DWORD PTR[4+rbp]
+ mov ecx,DWORD PTR[8+rbp]
+ mov edx,DWORD PTR[12+rbp]
+ jz $L$cbc_slow_enc_tail
+ xor eax,DWORD PTR[r8]
+ xor ebx,DWORD PTR[4+r8]
+ xor ecx,DWORD PTR[8+r8]
+ xor edx,DWORD PTR[12+r8]
+ mov r15,QWORD PTR[rsp]
+ mov QWORD PTR[24+rsp],r8
+ mov QWORD PTR[32+rsp],r9
+ mov QWORD PTR[40+rsp],r10
+ call _x86_64_AES_encrypt_compact
+ mov r8,QWORD PTR[24+rsp]
+ mov r9,QWORD PTR[32+rsp]
+ mov r10,QWORD PTR[40+rsp]
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+ lea r8,QWORD PTR[16+r8]
+ lea r9,QWORD PTR[16+r9]
+ sub r10,16
+ test r10,-16
+ jnz $L$cbc_slow_enc_loop
+ test r10,15
+ jnz $L$cbc_slow_enc_tail
+ mov rbp,QWORD PTR[56+rsp]
+ mov DWORD PTR[rbp],eax
+ mov DWORD PTR[4+rbp],ebx
+ mov DWORD PTR[8+rbp],ecx
+ mov DWORD PTR[12+rbp],edx
+ jmp $L$cbc_exit
+ mov r11,rax
+ mov r12,rcx
+ mov rcx,r10
+ mov rsi,r8
+ mov rdi,r9
+ DD 09066A4F3h
+ mov rcx,16
+ sub rcx,r10
+ xor rax,rax
+ DD 09066AAF3h
+ mov r8,r9
+ mov r10,16
+ mov rax,r11
+ mov rcx,r12
+ jmp $L$cbc_slow_enc_loop
+ shr rax,3
+ add r14,rax
+ mov r11,QWORD PTR[rbp]
+ mov r12,QWORD PTR[8+rbp]
+ mov QWORD PTR[((0+64))+rsp],r11
+ mov QWORD PTR[((8+64))+rsp],r12
+ mov eax,DWORD PTR[r8]
+ mov ebx,DWORD PTR[4+r8]
+ mov ecx,DWORD PTR[8+r8]
+ mov edx,DWORD PTR[12+r8]
+ mov r15,QWORD PTR[rsp]
+ mov QWORD PTR[24+rsp],r8
+ mov QWORD PTR[32+rsp],r9
+ mov QWORD PTR[40+rsp],r10
+ call _x86_64_AES_decrypt_compact
+ mov r8,QWORD PTR[24+rsp]
+ mov r9,QWORD PTR[32+rsp]
+ mov r10,QWORD PTR[40+rsp]
+ xor eax,DWORD PTR[((0+64))+rsp]
+ xor ebx,DWORD PTR[((4+64))+rsp]
+ xor ecx,DWORD PTR[((8+64))+rsp]
+ xor edx,DWORD PTR[((12+64))+rsp]
+ mov r11,QWORD PTR[r8]
+ mov r12,QWORD PTR[8+r8]
+ sub r10,16
+ jc $L$cbc_slow_dec_partial
+ jz $L$cbc_slow_dec_done
+ mov QWORD PTR[((0+64))+rsp],r11
+ mov QWORD PTR[((8+64))+rsp],r12
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+ lea r8,QWORD PTR[16+r8]
+ lea r9,QWORD PTR[16+r9]
+ jmp $L$cbc_slow_dec_loop
+ mov rdi,QWORD PTR[56+rsp]
+ mov QWORD PTR[rdi],r11
+ mov QWORD PTR[8+rdi],r12
+ mov DWORD PTR[r9],eax
+ mov DWORD PTR[4+r9],ebx
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+ jmp $L$cbc_exit
+ mov rdi,QWORD PTR[56+rsp]
+ mov QWORD PTR[rdi],r11
+ mov QWORD PTR[8+rdi],r12
+ mov DWORD PTR[((0+64))+rsp],eax
+ mov DWORD PTR[((4+64))+rsp],ebx
+ mov DWORD PTR[((8+64))+rsp],ecx
+ mov DWORD PTR[((12+64))+rsp],edx
+ mov rdi,r9
+ lea rsi,QWORD PTR[64+rsp]
+ lea rcx,QWORD PTR[16+r10]
+ DD 09066A4F3h
+ jmp $L$cbc_exit
+ mov rsi,QWORD PTR[16+rsp]
+ mov r15,QWORD PTR[rsi]
+ mov r14,QWORD PTR[8+rsi]
+ mov r13,QWORD PTR[16+rsi]
+ mov r12,QWORD PTR[24+rsi]
+ mov rbp,QWORD PTR[32+rsi]
+ mov rbx,QWORD PTR[40+rsi]
+ lea rsp,QWORD PTR[48+rsi]
+ popfq
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+AES_cbc_encrypt ENDP
+ DD 0a56363c6h,0a56363c6h
+ DD 0847c7cf8h,0847c7cf8h
+ DD 0997777eeh,0997777eeh
+ DD 08d7b7bf6h,08d7b7bf6h
+ DD 00df2f2ffh,00df2f2ffh
+ DD 0bd6b6bd6h,0bd6b6bd6h
+ DD 0b16f6fdeh,0b16f6fdeh
+ DD 054c5c591h,054c5c591h
+ DD 050303060h,050303060h
+ DD 003010102h,003010102h
+ DD 0a96767ceh,0a96767ceh
+ DD 07d2b2b56h,07d2b2b56h
+ DD 019fefee7h,019fefee7h
+ DD 062d7d7b5h,062d7d7b5h
+ DD 0e6abab4dh,0e6abab4dh
+ DD 09a7676ech,09a7676ech
+ DD 045caca8fh,045caca8fh
+ DD 09d82821fh,09d82821fh
+ DD 040c9c989h,040c9c989h
+ DD 0877d7dfah,0877d7dfah
+ DD 015fafaefh,015fafaefh
+ DD 0eb5959b2h,0eb5959b2h
+ DD 0c947478eh,0c947478eh
+ DD 00bf0f0fbh,00bf0f0fbh
+ DD 0ecadad41h,0ecadad41h
+ DD 067d4d4b3h,067d4d4b3h
+ DD 0fda2a25fh,0fda2a25fh
+ DD 0eaafaf45h,0eaafaf45h
+ DD 0bf9c9c23h,0bf9c9c23h
+ DD 0f7a4a453h,0f7a4a453h
+ DD 0967272e4h,0967272e4h
+ DD 05bc0c09bh,05bc0c09bh
+ DD 0c2b7b775h,0c2b7b775h
+ DD 01cfdfde1h,01cfdfde1h
+ DD 0ae93933dh,0ae93933dh
+ DD 06a26264ch,06a26264ch
+ DD 05a36366ch,05a36366ch
+ DD 0413f3f7eh,0413f3f7eh
+ DD 002f7f7f5h,002f7f7f5h
+ DD 04fcccc83h,04fcccc83h
+ DD 05c343468h,05c343468h
+ DD 0f4a5a551h,0f4a5a551h
+ DD 034e5e5d1h,034e5e5d1h
+ DD 008f1f1f9h,008f1f1f9h
+ DD 0937171e2h,0937171e2h
+ DD 073d8d8abh,073d8d8abh
+ DD 053313162h,053313162h
+ DD 03f15152ah,03f15152ah
+ DD 00c040408h,00c040408h
+ DD 052c7c795h,052c7c795h
+ DD 065232346h,065232346h
+ DD 05ec3c39dh,05ec3c39dh
+ DD 028181830h,028181830h
+ DD 0a1969637h,0a1969637h
+ DD 00f05050ah,00f05050ah
+ DD 0b59a9a2fh,0b59a9a2fh
+ DD 00907070eh,00907070eh
+ DD 036121224h,036121224h
+ DD 09b80801bh,09b80801bh
+ DD 03de2e2dfh,03de2e2dfh
+ DD 026ebebcdh,026ebebcdh
+ DD 06927274eh,06927274eh
+ DD 0cdb2b27fh,0cdb2b27fh
+ DD 09f7575eah,09f7575eah
+ DD 01b090912h,01b090912h
+ DD 09e83831dh,09e83831dh
+ DD 0742c2c58h,0742c2c58h
+ DD 02e1a1a34h,02e1a1a34h
+ DD 02d1b1b36h,02d1b1b36h
+ DD 0b26e6edch,0b26e6edch
+ DD 0ee5a5ab4h,0ee5a5ab4h
+ DD 0fba0a05bh,0fba0a05bh
+ DD 0f65252a4h,0f65252a4h
+ DD 04d3b3b76h,04d3b3b76h
+ DD 061d6d6b7h,061d6d6b7h
+ DD 0ceb3b37dh,0ceb3b37dh
+ DD 07b292952h,07b292952h
+ DD 03ee3e3ddh,03ee3e3ddh
+ DD 0712f2f5eh,0712f2f5eh
+ DD 097848413h,097848413h
+ DD 0f55353a6h,0f55353a6h
+ DD 068d1d1b9h,068d1d1b9h
+ DD 000000000h,000000000h
+ DD 02cededc1h,02cededc1h
+ DD 060202040h,060202040h
+ DD 01ffcfce3h,01ffcfce3h
+ DD 0c8b1b179h,0c8b1b179h
+ DD 0ed5b5bb6h,0ed5b5bb6h
+ DD 0be6a6ad4h,0be6a6ad4h
+ DD 046cbcb8dh,046cbcb8dh
+ DD 0d9bebe67h,0d9bebe67h
+ DD 04b393972h,04b393972h
+ DD 0de4a4a94h,0de4a4a94h
+ DD 0d44c4c98h,0d44c4c98h
+ DD 0e85858b0h,0e85858b0h
+ DD 04acfcf85h,04acfcf85h
+ DD 06bd0d0bbh,06bd0d0bbh
+ DD 02aefefc5h,02aefefc5h
+ DD 0e5aaaa4fh,0e5aaaa4fh
+ DD 016fbfbedh,016fbfbedh
+ DD 0c5434386h,0c5434386h
+ DD 0d74d4d9ah,0d74d4d9ah
+ DD 055333366h,055333366h
+ DD 094858511h,094858511h
+ DD 0cf45458ah,0cf45458ah
+ DD 010f9f9e9h,010f9f9e9h
+ DD 006020204h,006020204h
+ DD 0817f7ffeh,0817f7ffeh
+ DD 0f05050a0h,0f05050a0h
+ DD 0443c3c78h,0443c3c78h
+ DD 0ba9f9f25h,0ba9f9f25h
+ DD 0e3a8a84bh,0e3a8a84bh
+ DD 0f35151a2h,0f35151a2h
+ DD 0fea3a35dh,0fea3a35dh
+ DD 0c0404080h,0c0404080h
+ DD 08a8f8f05h,08a8f8f05h
+ DD 0ad92923fh,0ad92923fh
+ DD 0bc9d9d21h,0bc9d9d21h
+ DD 048383870h,048383870h
+ DD 004f5f5f1h,004f5f5f1h
+ DD 0dfbcbc63h,0dfbcbc63h
+ DD 0c1b6b677h,0c1b6b677h
+ DD 075dadaafh,075dadaafh
+ DD 063212142h,063212142h
+ DD 030101020h,030101020h
+ DD 01affffe5h,01affffe5h
+ DD 00ef3f3fdh,00ef3f3fdh
+ DD 06dd2d2bfh,06dd2d2bfh
+ DD 04ccdcd81h,04ccdcd81h
+ DD 0140c0c18h,0140c0c18h
+ DD 035131326h,035131326h
+ DD 02fececc3h,02fececc3h
+ DD 0e15f5fbeh,0e15f5fbeh
+ DD 0a2979735h,0a2979735h
+ DD 0cc444488h,0cc444488h
+ DD 03917172eh,03917172eh
+ DD 057c4c493h,057c4c493h
+ DD 0f2a7a755h,0f2a7a755h
+ DD 0827e7efch,0827e7efch
+ DD 0473d3d7ah,0473d3d7ah
+ DD 0ac6464c8h,0ac6464c8h
+ DD 0e75d5dbah,0e75d5dbah
+ DD 02b191932h,02b191932h
+ DD 0957373e6h,0957373e6h
+ DD 0a06060c0h,0a06060c0h
+ DD 098818119h,098818119h
+ DD 0d14f4f9eh,0d14f4f9eh
+ DD 07fdcdca3h,07fdcdca3h
+ DD 066222244h,066222244h
+ DD 07e2a2a54h,07e2a2a54h
+ DD 0ab90903bh,0ab90903bh
+ DD 08388880bh,08388880bh
+ DD 0ca46468ch,0ca46468ch
+ DD 029eeeec7h,029eeeec7h
+ DD 0d3b8b86bh,0d3b8b86bh
+ DD 03c141428h,03c141428h
+ DD 079dedea7h,079dedea7h
+ DD 0e25e5ebch,0e25e5ebch
+ DD 01d0b0b16h,01d0b0b16h
+ DD 076dbdbadh,076dbdbadh
+ DD 03be0e0dbh,03be0e0dbh
+ DD 056323264h,056323264h
+ DD 04e3a3a74h,04e3a3a74h
+ DD 01e0a0a14h,01e0a0a14h
+ DD 0db494992h,0db494992h
+ DD 00a06060ch,00a06060ch
+ DD 06c242448h,06c242448h
+ DD 0e45c5cb8h,0e45c5cb8h
+ DD 05dc2c29fh,05dc2c29fh
+ DD 06ed3d3bdh,06ed3d3bdh
+ DD 0efacac43h,0efacac43h
+ DD 0a66262c4h,0a66262c4h
+ DD 0a8919139h,0a8919139h
+ DD 0a4959531h,0a4959531h
+ DD 037e4e4d3h,037e4e4d3h
+ DD 08b7979f2h,08b7979f2h
+ DD 032e7e7d5h,032e7e7d5h
+ DD 043c8c88bh,043c8c88bh
+ DD 05937376eh,05937376eh
+ DD 0b76d6ddah,0b76d6ddah
+ DD 08c8d8d01h,08c8d8d01h
+ DD 064d5d5b1h,064d5d5b1h
+ DD 0d24e4e9ch,0d24e4e9ch
+ DD 0e0a9a949h,0e0a9a949h
+ DD 0b46c6cd8h,0b46c6cd8h
+ DD 0fa5656ach,0fa5656ach
+ DD 007f4f4f3h,007f4f4f3h
+ DD 025eaeacfh,025eaeacfh
+ DD 0af6565cah,0af6565cah
+ DD 08e7a7af4h,08e7a7af4h
+ DD 0e9aeae47h,0e9aeae47h
+ DD 018080810h,018080810h
+ DD 0d5baba6fh,0d5baba6fh
+ DD 0887878f0h,0887878f0h
+ DD 06f25254ah,06f25254ah
+ DD 0722e2e5ch,0722e2e5ch
+ DD 0241c1c38h,0241c1c38h
+ DD 0f1a6a657h,0f1a6a657h
+ DD 0c7b4b473h,0c7b4b473h
+ DD 051c6c697h,051c6c697h
+ DD 023e8e8cbh,023e8e8cbh
+ DD 07cdddda1h,07cdddda1h
+ DD 09c7474e8h,09c7474e8h
+ DD 0211f1f3eh,0211f1f3eh
+ DD 0dd4b4b96h,0dd4b4b96h
+ DD 0dcbdbd61h,0dcbdbd61h
+ DD 0868b8b0dh,0868b8b0dh
+ DD 0858a8a0fh,0858a8a0fh
+ DD 0907070e0h,0907070e0h
+ DD 0423e3e7ch,0423e3e7ch
+ DD 0c4b5b571h,0c4b5b571h
+ DD 0aa6666cch,0aa6666cch
+ DD 0d8484890h,0d8484890h
+ DD 005030306h,005030306h
+ DD 001f6f6f7h,001f6f6f7h
+ DD 0120e0e1ch,0120e0e1ch
+ DD 0a36161c2h,0a36161c2h
+ DD 05f35356ah,05f35356ah
+ DD 0f95757aeh,0f95757aeh
+ DD 0d0b9b969h,0d0b9b969h
+ DD 091868617h,091868617h
+ DD 058c1c199h,058c1c199h
+ DD 0271d1d3ah,0271d1d3ah
+ DD 0b99e9e27h,0b99e9e27h
+ DD 038e1e1d9h,038e1e1d9h
+ DD 013f8f8ebh,013f8f8ebh
+ DD 0b398982bh,0b398982bh
+ DD 033111122h,033111122h
+ DD 0bb6969d2h,0bb6969d2h
+ DD 070d9d9a9h,070d9d9a9h
+ DD 0898e8e07h,0898e8e07h
+ DD 0a7949433h,0a7949433h
+ DD 0b69b9b2dh,0b69b9b2dh
+ DD 0221e1e3ch,0221e1e3ch
+ DD 092878715h,092878715h
+ DD 020e9e9c9h,020e9e9c9h
+ DD 049cece87h,049cece87h
+ DD 0ff5555aah,0ff5555aah
+ DD 078282850h,078282850h
+ DD 07adfdfa5h,07adfdfa5h
+ DD 08f8c8c03h,08f8c8c03h
+ DD 0f8a1a159h,0f8a1a159h
+ DD 080898909h,080898909h
+ DD 0170d0d1ah,0170d0d1ah
+ DD 0dabfbf65h,0dabfbf65h
+ DD 031e6e6d7h,031e6e6d7h
+ DD 0c6424284h,0c6424284h
+ DD 0b86868d0h,0b86868d0h
+ DD 0c3414182h,0c3414182h
+ DD 0b0999929h,0b0999929h
+ DD 0772d2d5ah,0772d2d5ah
+ DD 0110f0f1eh,0110f0f1eh
+ DD 0cbb0b07bh,0cbb0b07bh
+ DD 0fc5454a8h,0fc5454a8h
+ DD 0d6bbbb6dh,0d6bbbb6dh
+ DD 03a16162ch,03a16162ch
+DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h
+DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h
+DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h
+DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h
+DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch
+DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h
+DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah
+DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h
+DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h
+DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h
+DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh
+DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh
+DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h
+DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h
+DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h
+DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h
+DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h
+DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h
+DB 060h,081h,04fh,0dch,022h,02ah,090h,088h
+DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh
+DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch
+DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h
+DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h
+DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h
+DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h
+DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah
+DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh
+DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh
+DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h
+DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh
+DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h
+DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h
+DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h
+DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h
+DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h
+DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h
+DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch
+DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h
+DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah
+DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h
+DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h
+DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h
+DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh
+DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh
+DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h
+DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h
+DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h
+DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h
+DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h
+DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h
+DB 060h,081h,04fh,0dch,022h,02ah,090h,088h
+DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh
+DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch
+DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h
+DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h
+DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h
+DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h
+DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah
+DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh
+DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh
+DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h
+DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh
+DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h
+DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h
+DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h
+DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h
+DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h
+DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h
+DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch
+DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h
+DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah
+DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h
+DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h
+DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h
+DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh
+DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh
+DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h
+DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h
+DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h
+DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h
+DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h
+DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h
+DB 060h,081h,04fh,0dch,022h,02ah,090h,088h
+DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh
+DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch
+DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h
+DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h
+DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h
+DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h
+DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah
+DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh
+DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh
+DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h
+DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh
+DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h
+DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h
+DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h
+DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h
+DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h
+DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h
+DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch
+DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h
+DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah
+DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h
+DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h
+DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h
+DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh
+DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh
+DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h
+DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h
+DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h
+DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h
+DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h
+DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h
+DB 060h,081h,04fh,0dch,022h,02ah,090h,088h
+DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh
+DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch
+DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h
+DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h
+DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h
+DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h
+DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah
+DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh
+DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh
+DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h
+DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh
+DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h
+DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h
+ DD 000000001h,000000002h,000000004h,000000008h
+ DD 000000010h,000000020h,000000040h,000000080h
+ DD 00000001bh,000000036h,080808080h,080808080h
+ DD 0fefefefeh,0fefefefeh,01b1b1b1bh,01b1b1b1bh
+ DD 050a7f451h,050a7f451h
+ DD 05365417eh,05365417eh
+ DD 0c3a4171ah,0c3a4171ah
+ DD 0965e273ah,0965e273ah
+ DD 0cb6bab3bh,0cb6bab3bh
+ DD 0f1459d1fh,0f1459d1fh
+ DD 0ab58faach,0ab58faach
+ DD 09303e34bh,09303e34bh
+ DD 055fa3020h,055fa3020h
+ DD 0f66d76adh,0f66d76adh
+ DD 09176cc88h,09176cc88h
+ DD 0254c02f5h,0254c02f5h
+ DD 0fcd7e54fh,0fcd7e54fh
+ DD 0d7cb2ac5h,0d7cb2ac5h
+ DD 080443526h,080443526h
+ DD 08fa362b5h,08fa362b5h
+ DD 0495ab1deh,0495ab1deh
+ DD 0671bba25h,0671bba25h
+ DD 0980eea45h,0980eea45h
+ DD 0e1c0fe5dh,0e1c0fe5dh
+ DD 002752fc3h,002752fc3h
+ DD 012f04c81h,012f04c81h
+ DD 0a397468dh,0a397468dh
+ DD 0c6f9d36bh,0c6f9d36bh
+ DD 0e75f8f03h,0e75f8f03h
+ DD 0959c9215h,0959c9215h
+ DD 0eb7a6dbfh,0eb7a6dbfh
+ DD 0da595295h,0da595295h
+ DD 02d83bed4h,02d83bed4h
+ DD 0d3217458h,0d3217458h
+ DD 02969e049h,02969e049h
+ DD 044c8c98eh,044c8c98eh
+ DD 06a89c275h,06a89c275h
+ DD 078798ef4h,078798ef4h
+ DD 06b3e5899h,06b3e5899h
+ DD 0dd71b927h,0dd71b927h
+ DD 0b64fe1beh,0b64fe1beh
+ DD 017ad88f0h,017ad88f0h
+ DD 066ac20c9h,066ac20c9h
+ DD 0b43ace7dh,0b43ace7dh
+ DD 0184adf63h,0184adf63h
+ DD 082311ae5h,082311ae5h
+ DD 060335197h,060335197h
+ DD 0457f5362h,0457f5362h
+ DD 0e07764b1h,0e07764b1h
+ DD 084ae6bbbh,084ae6bbbh
+ DD 01ca081feh,01ca081feh
+ DD 0942b08f9h,0942b08f9h
+ DD 058684870h,058684870h
+ DD 019fd458fh,019fd458fh
+ DD 0876cde94h,0876cde94h
+ DD 0b7f87b52h,0b7f87b52h
+ DD 023d373abh,023d373abh
+ DD 0e2024b72h,0e2024b72h
+ DD 0578f1fe3h,0578f1fe3h
+ DD 02aab5566h,02aab5566h
+ DD 00728ebb2h,00728ebb2h
+ DD 003c2b52fh,003c2b52fh
+ DD 09a7bc586h,09a7bc586h
+ DD 0a50837d3h,0a50837d3h
+ DD 0f2872830h,0f2872830h
+ DD 0b2a5bf23h,0b2a5bf23h
+ DD 0ba6a0302h,0ba6a0302h
+ DD 05c8216edh,05c8216edh
+ DD 02b1ccf8ah,02b1ccf8ah
+ DD 092b479a7h,092b479a7h
+ DD 0f0f207f3h,0f0f207f3h
+ DD 0a1e2694eh,0a1e2694eh
+ DD 0cdf4da65h,0cdf4da65h
+ DD 0d5be0506h,0d5be0506h
+ DD 01f6234d1h,01f6234d1h
+ DD 08afea6c4h,08afea6c4h
+ DD 09d532e34h,09d532e34h
+ DD 0a055f3a2h,0a055f3a2h
+ DD 032e18a05h,032e18a05h
+ DD 075ebf6a4h,075ebf6a4h
+ DD 039ec830bh,039ec830bh
+ DD 0aaef6040h,0aaef6040h
+ DD 0069f715eh,0069f715eh
+ DD 051106ebdh,051106ebdh
+ DD 0f98a213eh,0f98a213eh
+ DD 03d06dd96h,03d06dd96h
+ DD 0ae053eddh,0ae053eddh
+ DD 046bde64dh,046bde64dh
+ DD 0b58d5491h,0b58d5491h
+ DD 0055dc471h,0055dc471h
+ DD 06fd40604h,06fd40604h
+ DD 0ff155060h,0ff155060h
+ DD 024fb9819h,024fb9819h
+ DD 097e9bdd6h,097e9bdd6h
+ DD 0cc434089h,0cc434089h
+ DD 0779ed967h,0779ed967h
+ DD 0bd42e8b0h,0bd42e8b0h
+ DD 0888b8907h,0888b8907h
+ DD 0385b19e7h,0385b19e7h
+ DD 0dbeec879h,0dbeec879h
+ DD 0470a7ca1h,0470a7ca1h
+ DD 0e90f427ch,0e90f427ch
+ DD 0c91e84f8h,0c91e84f8h
+ DD 000000000h,000000000h
+ DD 083868009h,083868009h
+ DD 048ed2b32h,048ed2b32h
+ DD 0ac70111eh,0ac70111eh
+ DD 04e725a6ch,04e725a6ch
+ DD 0fbff0efdh,0fbff0efdh
+ DD 05638850fh,05638850fh
+ DD 01ed5ae3dh,01ed5ae3dh
+ DD 027392d36h,027392d36h
+ DD 064d90f0ah,064d90f0ah
+ DD 021a65c68h,021a65c68h
+ DD 0d1545b9bh,0d1545b9bh
+ DD 03a2e3624h,03a2e3624h
+ DD 0b1670a0ch,0b1670a0ch
+ DD 00fe75793h,00fe75793h
+ DD 0d296eeb4h,0d296eeb4h
+ DD 09e919b1bh,09e919b1bh
+ DD 04fc5c080h,04fc5c080h
+ DD 0a220dc61h,0a220dc61h
+ DD 0694b775ah,0694b775ah
+ DD 0161a121ch,0161a121ch
+ DD 00aba93e2h,00aba93e2h
+ DD 0e52aa0c0h,0e52aa0c0h
+ DD 043e0223ch,043e0223ch
+ DD 01d171b12h,01d171b12h
+ DD 00b0d090eh,00b0d090eh
+ DD 0adc78bf2h,0adc78bf2h
+ DD 0b9a8b62dh,0b9a8b62dh
+ DD 0c8a91e14h,0c8a91e14h
+ DD 08519f157h,08519f157h
+ DD 04c0775afh,04c0775afh
+ DD 0bbdd99eeh,0bbdd99eeh
+ DD 0fd607fa3h,0fd607fa3h
+ DD 09f2601f7h,09f2601f7h
+ DD 0bcf5725ch,0bcf5725ch
+ DD 0c53b6644h,0c53b6644h
+ DD 0347efb5bh,0347efb5bh
+ DD 07629438bh,07629438bh
+ DD 0dcc623cbh,0dcc623cbh
+ DD 068fcedb6h,068fcedb6h
+ DD 063f1e4b8h,063f1e4b8h
+ DD 0cadc31d7h,0cadc31d7h
+ DD 010856342h,010856342h
+ DD 040229713h,040229713h
+ DD 02011c684h,02011c684h
+ DD 07d244a85h,07d244a85h
+ DD 0f83dbbd2h,0f83dbbd2h
+ DD 01132f9aeh,01132f9aeh
+ DD 06da129c7h,06da129c7h
+ DD 04b2f9e1dh,04b2f9e1dh
+ DD 0f330b2dch,0f330b2dch
+ DD 0ec52860dh,0ec52860dh
+ DD 0d0e3c177h,0d0e3c177h
+ DD 06c16b32bh,06c16b32bh
+ DD 099b970a9h,099b970a9h
+ DD 0fa489411h,0fa489411h
+ DD 02264e947h,02264e947h
+ DD 0c48cfca8h,0c48cfca8h
+ DD 01a3ff0a0h,01a3ff0a0h
+ DD 0d82c7d56h,0d82c7d56h
+ DD 0ef903322h,0ef903322h
+ DD 0c74e4987h,0c74e4987h
+ DD 0c1d138d9h,0c1d138d9h
+ DD 0fea2ca8ch,0fea2ca8ch
+ DD 0360bd498h,0360bd498h
+ DD 0cf81f5a6h,0cf81f5a6h
+ DD 028de7aa5h,028de7aa5h
+ DD 0268eb7dah,0268eb7dah
+ DD 0a4bfad3fh,0a4bfad3fh
+ DD 0e49d3a2ch,0e49d3a2ch
+ DD 00d927850h,00d927850h
+ DD 09bcc5f6ah,09bcc5f6ah
+ DD 062467e54h,062467e54h
+ DD 0c2138df6h,0c2138df6h
+ DD 0e8b8d890h,0e8b8d890h
+ DD 05ef7392eh,05ef7392eh
+ DD 0f5afc382h,0f5afc382h
+ DD 0be805d9fh,0be805d9fh
+ DD 07c93d069h,07c93d069h
+ DD 0a92dd56fh,0a92dd56fh
+ DD 0b31225cfh,0b31225cfh
+ DD 03b99acc8h,03b99acc8h
+ DD 0a77d1810h,0a77d1810h
+ DD 06e639ce8h,06e639ce8h
+ DD 07bbb3bdbh,07bbb3bdbh
+ DD 0097826cdh,0097826cdh
+ DD 0f418596eh,0f418596eh
+ DD 001b79aech,001b79aech
+ DD 0a89a4f83h,0a89a4f83h
+ DD 0656e95e6h,0656e95e6h
+ DD 07ee6ffaah,07ee6ffaah
+ DD 008cfbc21h,008cfbc21h
+ DD 0e6e815efh,0e6e815efh
+ DD 0d99be7bah,0d99be7bah
+ DD 0ce366f4ah,0ce366f4ah
+ DD 0d4099feah,0d4099feah
+ DD 0d67cb029h,0d67cb029h
+ DD 0afb2a431h,0afb2a431h
+ DD 031233f2ah,031233f2ah
+ DD 03094a5c6h,03094a5c6h
+ DD 0c066a235h,0c066a235h
+ DD 037bc4e74h,037bc4e74h
+ DD 0a6ca82fch,0a6ca82fch
+ DD 0b0d090e0h,0b0d090e0h
+ DD 015d8a733h,015d8a733h
+ DD 04a9804f1h,04a9804f1h
+ DD 0f7daec41h,0f7daec41h
+ DD 00e50cd7fh,00e50cd7fh
+ DD 02ff69117h,02ff69117h
+ DD 08dd64d76h,08dd64d76h
+ DD 04db0ef43h,04db0ef43h
+ DD 0544daacch,0544daacch
+ DD 0df0496e4h,0df0496e4h
+ DD 0e3b5d19eh,0e3b5d19eh
+ DD 01b886a4ch,01b886a4ch
+ DD 0b81f2cc1h,0b81f2cc1h
+ DD 07f516546h,07f516546h
+ DD 004ea5e9dh,004ea5e9dh
+ DD 05d358c01h,05d358c01h
+ DD 0737487fah,0737487fah
+ DD 02e410bfbh,02e410bfbh
+ DD 05a1d67b3h,05a1d67b3h
+ DD 052d2db92h,052d2db92h
+ DD 0335610e9h,0335610e9h
+ DD 01347d66dh,01347d66dh
+ DD 08c61d79ah,08c61d79ah
+ DD 07a0ca137h,07a0ca137h
+ DD 08e14f859h,08e14f859h
+ DD 0893c13ebh,0893c13ebh
+ DD 0ee27a9ceh,0ee27a9ceh
+ DD 035c961b7h,035c961b7h
+ DD 0ede51ce1h,0ede51ce1h
+ DD 03cb1477ah,03cb1477ah
+ DD 059dfd29ch,059dfd29ch
+ DD 03f73f255h,03f73f255h
+ DD 079ce1418h,079ce1418h
+ DD 0bf37c773h,0bf37c773h
+ DD 0eacdf753h,0eacdf753h
+ DD 05baafd5fh,05baafd5fh
+ DD 0146f3ddfh,0146f3ddfh
+ DD 086db4478h,086db4478h
+ DD 081f3afcah,081f3afcah
+ DD 03ec468b9h,03ec468b9h
+ DD 02c342438h,02c342438h
+ DD 05f40a3c2h,05f40a3c2h
+ DD 072c31d16h,072c31d16h
+ DD 00c25e2bch,00c25e2bch
+ DD 08b493c28h,08b493c28h
+ DD 041950dffh,041950dffh
+ DD 07101a839h,07101a839h
+ DD 0deb30c08h,0deb30c08h
+ DD 09ce4b4d8h,09ce4b4d8h
+ DD 090c15664h,090c15664h
+ DD 06184cb7bh,06184cb7bh
+ DD 070b632d5h,070b632d5h
+ DD 0745c6c48h,0745c6c48h
+ DD 04257b8d0h,04257b8d0h
+DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h
+DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh
+DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h
+DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh
+DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh
+DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh
+DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h
+DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h
+DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h
+DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h
+DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah
+DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h
+DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah
+DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h
+DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h
+DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh
+DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah
+DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h
+DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h
+DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh
+DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h
+DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh
+DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h
+DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h
+DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h
+DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh
+DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh
+DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh
+DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h
+DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h
+DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h
+DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh
+ DD 080808080h,080808080h,0fefefefeh,0fefefefeh
+ DD 01b1b1b1bh,01b1b1b1bh,0,0
+DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h
+DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh
+DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h
+DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh
+DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh
+DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh
+DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h
+DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h
+DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h
+DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h
+DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah
+DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h
+DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah
+DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h
+DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h
+DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh
+DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah
+DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h
+DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h
+DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh
+DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h
+DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh
+DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h
+DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h
+DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h
+DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh
+DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh
+DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh
+DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h
+DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h
+DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h
+DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh
+ DD 080808080h,080808080h,0fefefefeh,0fefefefeh
+ DD 01b1b1b1bh,01b1b1b1bh,0,0
+DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h
+DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh
+DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h
+DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh
+DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh
+DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh
+DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h
+DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h
+DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h
+DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h
+DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah
+DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h
+DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah
+DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h
+DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h
+DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh
+DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah
+DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h
+DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h
+DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh
+DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h
+DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh
+DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h
+DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h
+DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h
+DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh
+DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh
+DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh
+DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h
+DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h
+DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h
+DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh
+ DD 080808080h,080808080h,0fefefefeh,0fefefefeh
+ DD 01b1b1b1bh,01b1b1b1bh,0,0
+DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h
+DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh
+DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h
+DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh
+DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh
+DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh
+DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h
+DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h
+DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h
+DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h
+DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah
+DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h
+DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah
+DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h
+DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h
+DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh
+DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah
+DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h
+DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h
+DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh
+DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h
+DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh
+DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h
+DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h
+DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h
+DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh
+DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh
+DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh
+DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h
+DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h
+DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h
+DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh
+ DD 080808080h,080808080h,0fefefefeh,0fefefefeh
+ DD 01b1b1b1bh,01b1b1b1bh,0,0
+DB 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32
+DB 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+DB 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+DB 62,0
+EXTERN __imp_RtlVirtualUnwind:NEAR
+block_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$in_block_prologue
+ mov rax,QWORD PTR[152+r8]
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$in_block_prologue
+ mov rax,QWORD PTR[24+rax]
+ lea rax,QWORD PTR[48+rax]
+ mov rbx,QWORD PTR[((-8))+rax]
+ mov rbp,QWORD PTR[((-16))+rax]
+ mov r12,QWORD PTR[((-24))+rax]
+ mov r13,QWORD PTR[((-32))+rax]
+ mov r14,QWORD PTR[((-40))+rax]
+ mov r15,QWORD PTR[((-48))+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+ jmp $L$common_seh_exit
+block_se_handler ENDP
+key_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$in_key_prologue
+ mov rax,QWORD PTR[152+r8]
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$in_key_prologue
+ lea rax,QWORD PTR[56+rax]
+ mov rbx,QWORD PTR[((-8))+rax]
+ mov rbp,QWORD PTR[((-16))+rax]
+ mov r12,QWORD PTR[((-24))+rax]
+ mov r13,QWORD PTR[((-32))+rax]
+ mov r14,QWORD PTR[((-40))+rax]
+ mov r15,QWORD PTR[((-48))+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+ jmp $L$common_seh_exit
+key_se_handler ENDP
+cbc_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+ lea r10,QWORD PTR[$L$cbc_prologue]
+ cmp rbx,r10
+ jb $L$in_cbc_prologue
+ lea r10,QWORD PTR[$L$cbc_fast_body]
+ cmp rbx,r10
+ jb $L$in_cbc_frame_setup
+ lea r10,QWORD PTR[$L$cbc_slow_prologue]
+ cmp rbx,r10
+ jb $L$in_cbc_body
+ lea r10,QWORD PTR[$L$cbc_slow_body]
+ cmp rbx,r10
+ jb $L$in_cbc_frame_setup
+ mov rax,QWORD PTR[152+r8]
+ lea r10,QWORD PTR[$L$cbc_epilogue]
+ cmp rbx,r10
+ jae $L$in_cbc_prologue
+ lea rax,QWORD PTR[8+rax]
+ lea r10,QWORD PTR[$L$cbc_popfq]
+ cmp rbx,r10
+ jae $L$in_cbc_prologue
+ mov rax,QWORD PTR[8+rax]
+ lea rax,QWORD PTR[56+rax]
+ mov rbx,QWORD PTR[((-16))+rax]
+ mov rbp,QWORD PTR[((-24))+rax]
+ mov r12,QWORD PTR[((-32))+rax]
+ mov r13,QWORD PTR[((-40))+rax]
+ mov r14,QWORD PTR[((-48))+rax]
+ mov r15,QWORD PTR[((-56))+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+cbc_se_handler ENDP
+.text$ ENDS
+ DD imagerel $L$SEH_begin_AES_encrypt
+ DD imagerel $L$SEH_end_AES_encrypt
+ DD imagerel $L$SEH_info_AES_encrypt
+ DD imagerel $L$SEH_begin_AES_decrypt
+ DD imagerel $L$SEH_end_AES_decrypt
+ DD imagerel $L$SEH_info_AES_decrypt
+ DD imagerel $L$SEH_begin_AES_set_encrypt_key
+ DD imagerel $L$SEH_end_AES_set_encrypt_key
+ DD imagerel $L$SEH_info_AES_set_encrypt_key
+ DD imagerel $L$SEH_begin_AES_set_decrypt_key
+ DD imagerel $L$SEH_end_AES_set_decrypt_key
+ DD imagerel $L$SEH_info_AES_set_decrypt_key
+ DD imagerel $L$SEH_begin_AES_cbc_encrypt
+ DD imagerel $L$SEH_end_AES_cbc_encrypt
+ DD imagerel $L$SEH_info_AES_cbc_encrypt
+.pdata ENDS
+DB 9,0,0,0
+ DD imagerel block_se_handler
+ DD imagerel $L$enc_prologue,imagerel $L$enc_epilogue
+DB 9,0,0,0
+ DD imagerel block_se_handler
+ DD imagerel $L$dec_prologue,imagerel $L$dec_epilogue
+DB 9,0,0,0
+ DD imagerel key_se_handler
+ DD imagerel $L$enc_key_prologue,imagerel $L$enc_key_epilogue
+DB 9,0,0,0
+ DD imagerel key_se_handler
+ DD imagerel $L$dec_key_prologue,imagerel $L$dec_key_epilogue
+DB 9,0,0,0
+ DD imagerel cbc_se_handler
+.xdata ENDS
diff --git a/crypto/libressl/crypto/aes/aes-mingw64-x86_64.S b/crypto/libressl/crypto/aes/aes-mingw64-x86_64.S
new file mode 100644
index 0000000..ca2d60f
--- /dev/null
+++ b/crypto/libressl/crypto/aes/aes-mingw64-x86_64.S
@@ -0,0 +1,2861 @@
+#include "x86_arch.h"
+.def _x86_64_AES_encrypt; .scl 3; .type 32; .endef
+.p2align 4
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ movl 240(%r15),%r13d
+ subl $1,%r13d
+ jmp .Lenc_loop
+.p2align 4
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movl 0(%r14,%rsi,8),%r10d
+ movl 0(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r12d
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl %dl,%ebp
+ xorl 3(%r14,%rsi,8),%r10d
+ xorl 3(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r8d
+ movzbl %dh,%esi
+ shrl $16,%ecx
+ movzbl %ah,%ebp
+ xorl 3(%r14,%rsi,8),%r12d
+ shrl $16,%edx
+ xorl 3(%r14,%rbp,8),%r8d
+ shrl $16,%ebx
+ leaq 16(%r15),%r15
+ shrl $16,%eax
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ xorl 2(%r14,%rsi,8),%r10d
+ xorl 2(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r12d
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl %bl,%ebp
+ xorl 1(%r14,%rsi,8),%r10d
+ xorl 1(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r8d
+ movl 12(%r15),%edx
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movl 0(%r15),%eax
+ xorl 1(%r14,%rdi,8),%r12d
+ xorl 1(%r14,%rbp,8),%r8d
+ movl 4(%r15),%ebx
+ movl 8(%r15),%ecx
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ subl $1,%r13d
+ jnz .Lenc_loop
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movzbl 2(%r14,%rsi,8),%r10d
+ movzbl 2(%r14,%rdi,8),%r11d
+ movzbl 2(%r14,%rbp,8),%r12d
+ movzbl %dl,%esi
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movzbl 2(%r14,%rsi,8),%r8d
+ movl 0(%r14,%rdi,8),%edi
+ movl 0(%r14,%rbp,8),%ebp
+ andl $65280,%edi
+ andl $65280,%ebp
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+ shrl $16,%ecx
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ shrl $16,%edx
+ movl 0(%r14,%rsi,8),%esi
+ movl 0(%r14,%rdi,8),%edi
+ andl $65280,%esi
+ andl $65280,%edi
+ shrl $16,%ebx
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+ shrl $16,%eax
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ movl 0(%r14,%rsi,8),%esi
+ movl 0(%r14,%rdi,8),%edi
+ movl 0(%r14,%rbp,8),%ebp
+ andl $16711680,%esi
+ andl $16711680,%edi
+ andl $16711680,%ebp
+ xorl %esi,%r10d
+ xorl %edi,%r11d
+ xorl %ebp,%r12d
+ movzbl %bl,%esi
+ movzbl %dh,%edi
+ movzbl %ah,%ebp
+ movl 0(%r14,%rsi,8),%esi
+ movl 2(%r14,%rdi,8),%edi
+ movl 2(%r14,%rbp,8),%ebp
+ andl $16711680,%esi
+ andl $4278190080,%edi
+ andl $4278190080,%ebp
+ xorl %esi,%r8d
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movl 16+12(%r15),%edx
+ movl 2(%r14,%rsi,8),%esi
+ movl 2(%r14,%rdi,8),%edi
+ movl 16+0(%r15),%eax
+ andl $4278190080,%esi
+ andl $4278190080,%edi
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+ movl 16+4(%r15),%ebx
+ movl 16+8(%r15),%ecx
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ retq
+.def _x86_64_AES_encrypt_compact; .scl 3; .type 32; .endef
+.p2align 4
+ leaq 128(%r14),%r8
+ movl 0-128(%r8),%edi
+ movl 32-128(%r8),%ebp
+ movl 64-128(%r8),%r10d
+ movl 96-128(%r8),%r11d
+ movl 128-128(%r8),%edi
+ movl 160-128(%r8),%ebp
+ movl 192-128(%r8),%r10d
+ movl 224-128(%r8),%r11d
+ jmp .Lenc_loop_compact
+.p2align 4
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ leaq 16(%r15),%r15
+ movzbl %al,%r10d
+ movzbl %bl,%r11d
+ movzbl %cl,%r12d
+ movzbl (%r14,%r10,1),%r10d
+ movzbl (%r14,%r11,1),%r11d
+ movzbl (%r14,%r12,1),%r12d
+ movzbl %dl,%r8d
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl (%r14,%r8,1),%r8d
+ movzbl (%r14,%rsi,1),%r9d
+ movzbl (%r14,%rdi,1),%r13d
+ movzbl %dh,%ebp
+ movzbl %ah,%esi
+ shrl $16,%ecx
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ shrl $16,%edx
+ movzbl %cl,%edi
+ shll $8,%r9d
+ shll $8,%r13d
+ movzbl (%r14,%rdi,1),%edi
+ xorl %r9d,%r10d
+ xorl %r13d,%r11d
+ movzbl %dl,%r9d
+ shrl $16,%eax
+ shrl $16,%ebx
+ movzbl %al,%r13d
+ shll $8,%ebp
+ shll $8,%esi
+ movzbl (%r14,%r9,1),%r9d
+ movzbl (%r14,%r13,1),%r13d
+ xorl %ebp,%r12d
+ xorl %esi,%r8d
+ movzbl %bl,%ebp
+ movzbl %dh,%esi
+ shll $16,%edi
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ xorl %edi,%r10d
+ movzbl %ah,%edi
+ shrl $8,%ecx
+ shrl $8,%ebx
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rcx,1),%edx
+ movzbl (%r14,%rbx,1),%ecx
+ shll $16,%r9d
+ shll $16,%r13d
+ shll $16,%ebp
+ xorl %r9d,%r11d
+ xorl %r13d,%r12d
+ xorl %ebp,%r8d
+ shll $24,%esi
+ shll $24,%edi
+ shll $24,%edx
+ xorl %esi,%r10d
+ shll $24,%ecx
+ xorl %edi,%r11d
+ movl %r10d,%eax
+ movl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ cmpq 16(%rsp),%r15
+ je .Lenc_compact_done
+ movl %eax,%esi
+ movl %ebx,%edi
+ andl $2155905152,%esi
+ andl $2155905152,%edi
+ movl %esi,%r10d
+ movl %edi,%r11d
+ shrl $7,%r10d
+ leal (%rax,%rax,1),%r8d
+ shrl $7,%r11d
+ leal (%rbx,%rbx,1),%r9d
+ subl %r10d,%esi
+ subl %r11d,%edi
+ andl $4278124286,%r8d
+ andl $4278124286,%r9d
+ andl $454761243,%esi
+ andl $454761243,%edi
+ movl %eax,%r10d
+ movl %ebx,%r11d
+ xorl %esi,%r8d
+ xorl %edi,%r9d
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movl %ecx,%esi
+ movl %edx,%edi
+ roll $24,%eax
+ roll $24,%ebx
+ andl $2155905152,%esi
+ andl $2155905152,%edi
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movl %esi,%r12d
+ movl %edi,%ebp
+ rorl $16,%r10d
+ rorl $16,%r11d
+ shrl $7,%r12d
+ leal (%rcx,%rcx,1),%r8d
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ shrl $7,%ebp
+ leal (%rdx,%rdx,1),%r9d
+ rorl $8,%r10d
+ rorl $8,%r11d
+ subl %r12d,%esi
+ subl %ebp,%edi
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ andl $4278124286,%r8d
+ andl $4278124286,%r9d
+ andl $454761243,%esi
+ andl $454761243,%edi
+ movl %ecx,%r12d
+ movl %edx,%ebp
+ xorl %esi,%r8d
+ xorl %edi,%r9d
+ xorl %r8d,%ecx
+ xorl %r9d,%edx
+ roll $24,%ecx
+ roll $24,%edx
+ xorl %r8d,%ecx
+ xorl %r9d,%edx
+ movl 0(%r14),%esi
+ rorl $16,%r12d
+ rorl $16,%ebp
+ movl 64(%r14),%edi
+ xorl %r12d,%ecx
+ xorl %ebp,%edx
+ movl 128(%r14),%r8d
+ rorl $8,%r12d
+ rorl $8,%ebp
+ movl 192(%r14),%r9d
+ xorl %r12d,%ecx
+ xorl %ebp,%edx
+ jmp .Lenc_loop_compact
+.p2align 4
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ retq
+.globl AES_encrypt
+.def AES_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+.globl asm_AES_encrypt
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r10
+ leaq -63(%rdx),%rcx
+ andq $-64,%rsp
+ subq %rsp,%rcx
+ negq %rcx
+ andq $960,%rcx
+ subq %rcx,%rsp
+ subq $32,%rsp
+ movq %rsi,16(%rsp)
+ movq %r10,24(%rsp)
+ movq %rdx,%r15
+ movl 240(%r15),%r13d
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+ shll $4,%r13d
+ leaq (%r15,%r13,1),%rbp
+ movq %r15,(%rsp)
+ movq %rbp,8(%rsp)
+ leaq .LAES_Te+2048(%rip),%r14
+ leaq 768(%rsp),%rbp
+ subq %r14,%rbp
+ andq $768,%rbp
+ leaq (%r14,%rbp,1),%r14
+ call _x86_64_AES_encrypt_compact
+ movq 16(%rsp),%r9
+ movq 24(%rsp),%rsi
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.def _x86_64_AES_decrypt; .scl 3; .type 32; .endef
+.p2align 4
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ movl 240(%r15),%r13d
+ subl $1,%r13d
+ jmp .Ldec_loop
+.p2align 4
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movl 0(%r14,%rsi,8),%r10d
+ movl 0(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r12d
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl %dl,%ebp
+ xorl 3(%r14,%rsi,8),%r10d
+ xorl 3(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r8d
+ movzbl %bh,%esi
+ shrl $16,%eax
+ movzbl %ch,%ebp
+ xorl 3(%r14,%rsi,8),%r12d
+ shrl $16,%edx
+ xorl 3(%r14,%rbp,8),%r8d
+ shrl $16,%ebx
+ leaq 16(%r15),%r15
+ shrl $16,%ecx
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ xorl 2(%r14,%rsi,8),%r10d
+ xorl 2(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r12d
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl %bl,%ebp
+ xorl 1(%r14,%rsi,8),%r10d
+ xorl 1(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r8d
+ movzbl %dh,%esi
+ movl 12(%r15),%edx
+ movzbl %ah,%ebp
+ xorl 1(%r14,%rsi,8),%r12d
+ movl 0(%r15),%eax
+ xorl 1(%r14,%rbp,8),%r8d
+ xorl %r10d,%eax
+ movl 4(%r15),%ebx
+ movl 8(%r15),%ecx
+ xorl %r12d,%ecx
+ xorl %r11d,%ebx
+ xorl %r8d,%edx
+ subl $1,%r13d
+ jnz .Ldec_loop
+ leaq 2048(%r14),%r14
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movzbl (%r14,%rsi,1),%r10d
+ movzbl (%r14,%rdi,1),%r11d
+ movzbl (%r14,%rbp,1),%r12d
+ movzbl %dl,%esi
+ movzbl %dh,%edi
+ movzbl %ah,%ebp
+ movzbl (%r14,%rsi,1),%r8d
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+ shll $8,%edi
+ shll $8,%ebp
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+ shrl $16,%edx
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ shrl $16,%eax
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ shll $8,%esi
+ shll $8,%edi
+ shrl $16,%ebx
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+ shrl $16,%ecx
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+ shll $16,%esi
+ shll $16,%edi
+ shll $16,%ebp
+ xorl %esi,%r10d
+ xorl %edi,%r11d
+ xorl %ebp,%r12d
+ movzbl %bl,%esi
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+ shll $16,%esi
+ shll $24,%edi
+ shll $24,%ebp
+ xorl %esi,%r8d
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movl 16+12(%r15),%edx
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movl 16+0(%r15),%eax
+ shll $24,%esi
+ shll $24,%edi
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+ movl 16+4(%r15),%ebx
+ movl 16+8(%r15),%ecx
+ leaq -2048(%r14),%r14
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ retq
+.def _x86_64_AES_decrypt_compact; .scl 3; .type 32; .endef
+.p2align 4
+ leaq 128(%r14),%r8
+ movl 0-128(%r8),%edi
+ movl 32-128(%r8),%ebp
+ movl 64-128(%r8),%r10d
+ movl 96-128(%r8),%r11d
+ movl 128-128(%r8),%edi
+ movl 160-128(%r8),%ebp
+ movl 192-128(%r8),%r10d
+ movl 224-128(%r8),%r11d
+ jmp .Ldec_loop_compact
+.p2align 4
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ leaq 16(%r15),%r15
+ movzbl %al,%r10d
+ movzbl %bl,%r11d
+ movzbl %cl,%r12d
+ movzbl (%r14,%r10,1),%r10d
+ movzbl (%r14,%r11,1),%r11d
+ movzbl (%r14,%r12,1),%r12d
+ movzbl %dl,%r8d
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl (%r14,%r8,1),%r8d
+ movzbl (%r14,%rsi,1),%r9d
+ movzbl (%r14,%rdi,1),%r13d
+ movzbl %bh,%ebp
+ movzbl %ch,%esi
+ shrl $16,%ecx
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ shrl $16,%edx
+ movzbl %cl,%edi
+ shll $8,%r9d
+ shll $8,%r13d
+ movzbl (%r14,%rdi,1),%edi
+ xorl %r9d,%r10d
+ xorl %r13d,%r11d
+ movzbl %dl,%r9d
+ shrl $16,%eax
+ shrl $16,%ebx
+ movzbl %al,%r13d
+ shll $8,%ebp
+ shll $8,%esi
+ movzbl (%r14,%r9,1),%r9d
+ movzbl (%r14,%r13,1),%r13d
+ xorl %ebp,%r12d
+ xorl %esi,%r8d
+ movzbl %bl,%ebp
+ movzbl %bh,%esi
+ shll $16,%edi
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ xorl %edi,%r10d
+ movzbl %ch,%edi
+ shll $16,%r9d
+ shll $16,%r13d
+ movzbl (%r14,%rdi,1),%ebx
+ xorl %r9d,%r11d
+ xorl %r13d,%r12d
+ movzbl %dh,%edi
+ shrl $8,%eax
+ shll $16,%ebp
+ movzbl (%r14,%rdi,1),%ecx
+ movzbl (%r14,%rax,1),%edx
+ xorl %ebp,%r8d
+ shll $24,%esi
+ shll $24,%ebx
+ shll $24,%ecx
+ xorl %esi,%r10d
+ shll $24,%edx
+ xorl %r11d,%ebx
+ movl %r10d,%eax
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ cmpq 16(%rsp),%r15
+ je .Ldec_compact_done
+ movq 256+0(%r14),%rsi
+ shlq $32,%rbx
+ shlq $32,%rdx
+ movq 256+8(%r14),%rdi
+ orq %rbx,%rax
+ orq %rdx,%rcx
+ movq 256+16(%r14),%rbp
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+ shrq $7,%r9
+ leaq (%rax,%rax,1),%r8
+ shrq $7,%r12
+ leaq (%rcx,%rcx,1),%r11
+ subq %r9,%rbx
+ subq %r12,%rdx
+ andq %rdi,%r8
+ andq %rdi,%r11
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r8,%rbx
+ xorq %r11,%rdx
+ movq %rbx,%r8
+ movq %rdx,%r11
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ leaq (%r8,%r8,1),%r9
+ shrq $7,%r13
+ leaq (%r11,%r11,1),%r12
+ subq %r10,%rbx
+ subq %r13,%rdx
+ andq %rdi,%r9
+ andq %rdi,%r12
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r9,%rbx
+ xorq %r12,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ xorq %rax,%r8
+ shrq $7,%r13
+ xorq %rcx,%r11
+ subq %r10,%rbx
+ subq %r13,%rdx
+ leaq (%r9,%r9,1),%r10
+ leaq (%r12,%r12,1),%r13
+ xorq %rax,%r9
+ xorq %rcx,%r12
+ andq %rdi,%r10
+ andq %rdi,%r13
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %rbx,%r10
+ xorq %rdx,%r13
+ xorq %r10,%rax
+ xorq %r13,%rcx
+ xorq %r10,%r8
+ xorq %r13,%r11
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ xorq %r10,%r9
+ xorq %r13,%r12
+ shrq $32,%rbx
+ shrq $32,%rdx
+ xorq %r8,%r10
+ xorq %r11,%r13
+ roll $8,%eax
+ roll $8,%ecx
+ xorq %r9,%r10
+ xorq %r12,%r13
+ roll $8,%ebx
+ roll $8,%edx
+ xorl %r10d,%eax
+ xorl %r13d,%ecx
+ shrq $32,%r10
+ shrq $32,%r13
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+ movq %r8,%r10
+ movq %r11,%r13
+ shrq $32,%r10
+ shrq $32,%r13
+ roll $24,%r8d
+ roll $24,%r11d
+ roll $24,%r10d
+ roll $24,%r13d
+ xorl %r8d,%eax
+ xorl %r11d,%ecx
+ movq %r9,%r8
+ movq %r12,%r11
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+ movq 0(%r14),%rsi
+ shrq $32,%r8
+ shrq $32,%r11
+ movq 64(%r14),%rdi
+ roll $16,%r9d
+ roll $16,%r12d
+ movq 128(%r14),%rbp
+ roll $16,%r8d
+ roll $16,%r11d
+ movq 192(%r14),%r10
+ xorl %r9d,%eax
+ xorl %r12d,%ecx
+ movq 256(%r14),%r13
+ xorl %r8d,%ebx
+ xorl %r11d,%edx
+ jmp .Ldec_loop_compact
+.p2align 4
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ retq
+.globl AES_decrypt
+.def AES_decrypt; .scl 2; .type 32; .endef
+.p2align 4
+.globl asm_AES_decrypt
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r10
+ leaq -63(%rdx),%rcx
+ andq $-64,%rsp
+ subq %rsp,%rcx
+ negq %rcx
+ andq $960,%rcx
+ subq %rcx,%rsp
+ subq $32,%rsp
+ movq %rsi,16(%rsp)
+ movq %r10,24(%rsp)
+ movq %rdx,%r15
+ movl 240(%r15),%r13d
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+ shll $4,%r13d
+ leaq (%r15,%r13,1),%rbp
+ movq %r15,(%rsp)
+ movq %rbp,8(%rsp)
+ leaq .LAES_Td+2048(%rip),%r14
+ leaq 768(%rsp),%rbp
+ subq %r14,%rbp
+ andq $768,%rbp
+ leaq (%r14,%rbp,1),%r14
+ shrq $3,%rbp
+ addq %rbp,%r14
+ call _x86_64_AES_decrypt_compact
+ movq 16(%rsp),%r9
+ movq 24(%rsp),%rsi
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.globl AES_set_encrypt_key
+.def AES_set_encrypt_key; .scl 2; .type 32; .endef
+.p2align 4
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $8,%rsp
+ call _x86_64_AES_set_encrypt_key
+ movq 8(%rsp),%r15
+ movq 16(%rsp),%r14
+ movq 24(%rsp),%r13
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%rbp
+ movq 48(%rsp),%rbx
+ addq $56,%rsp
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.def _x86_64_AES_set_encrypt_key; .scl 3; .type 32; .endef
+.p2align 4
+ movl %esi,%ecx
+ movq %rdi,%rsi
+ movq %rdx,%rdi
+ testq $-1,%rsi
+ jz .Lbadpointer
+ testq $-1,%rdi
+ jz .Lbadpointer
+ leaq .LAES_Te(%rip),%rbp
+ leaq 2048+128(%rbp),%rbp
+ movl 0-128(%rbp),%eax
+ movl 32-128(%rbp),%ebx
+ movl 64-128(%rbp),%r8d
+ movl 96-128(%rbp),%edx
+ movl 128-128(%rbp),%eax
+ movl 160-128(%rbp),%ebx
+ movl 192-128(%rbp),%r8d
+ movl 224-128(%rbp),%edx
+ cmpl $128,%ecx
+ je .L10rounds
+ cmpl $192,%ecx
+ je .L12rounds
+ cmpl $256,%ecx
+ je .L14rounds
+ movq $-2,%rax
+ jmp .Lexit
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rdx,8(%rdi)
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp .L10shortcut
+.p2align 2
+ movl 0(%rdi),%eax
+ movl 12(%rdi),%edx
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,16(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,20(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,24(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,28(%rdi)
+ addl $1,%ecx
+ leaq 16(%rdi),%rdi
+ cmpl $10,%ecx
+ jl .L10loop
+ movl $10,80(%rdi)
+ xorq %rax,%rax
+ jmp .Lexit
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 16(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rdx,16(%rdi)
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp .L12shortcut
+.p2align 2
+ movl 0(%rdi),%eax
+ movl 20(%rdi),%edx
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,24(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,28(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,32(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,36(%rdi)
+ cmpl $7,%ecx
+ je .L12break
+ addl $1,%ecx
+ xorl 16(%rdi),%eax
+ movl %eax,40(%rdi)
+ xorl 20(%rdi),%eax
+ movl %eax,44(%rdi)
+ leaq 24(%rdi),%rdi
+ jmp .L12loop
+ movl $12,72(%rdi)
+ xorq %rax,%rax
+ jmp .Lexit
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 16(%rsi),%rcx
+ movq 24(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,16(%rdi)
+ movq %rdx,24(%rdi)
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp .L14shortcut
+.p2align 2
+ movl 0(%rdi),%eax
+ movl 28(%rdi),%edx
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,32(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,36(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,40(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,44(%rdi)
+ cmpl $6,%ecx
+ je .L14break
+ addl $1,%ecx
+ movl %eax,%edx
+ movl 16(%rdi),%eax
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ shll $8,%ebx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $16,%ebx
+ xorl %ebx,%eax
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $24,%ebx
+ xorl %ebx,%eax
+ movl %eax,48(%rdi)
+ xorl 20(%rdi),%eax
+ movl %eax,52(%rdi)
+ xorl 24(%rdi),%eax
+ movl %eax,56(%rdi)
+ xorl 28(%rdi),%eax
+ movl %eax,60(%rdi)
+ leaq 32(%rdi),%rdi
+ jmp .L14loop
+ movl $14,48(%rdi)
+ xorq %rax,%rax
+ jmp .Lexit
+ movq $-1,%rax
+ retq
+.globl AES_set_decrypt_key
+.def AES_set_decrypt_key; .scl 2; .type 32; .endef
+.p2align 4
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rdx
+ call _x86_64_AES_set_encrypt_key
+ movq (%rsp),%r8
+ cmpl $0,%eax
+ jne .Labort
+ movl 240(%r8),%r14d
+ xorq %rdi,%rdi
+ leaq (%rdi,%r14,4),%rcx
+ movq %r8,%rsi
+ leaq (%r8,%rcx,4),%rdi
+.p2align 2
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 0(%rdi),%rcx
+ movq 8(%rdi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,0(%rsi)
+ movq %rdx,8(%rsi)
+ leaq 16(%rsi),%rsi
+ leaq -16(%rdi),%rdi
+ cmpq %rsi,%rdi
+ jne .Linvert
+ leaq .LAES_Te+2048+1024(%rip),%rax
+ movq 40(%rax),%rsi
+ movq 48(%rax),%rdi
+ movq 56(%rax),%rbp
+ movq %r8,%r15
+ subl $1,%r14d
+.p2align 2
+ leaq 16(%r15),%r15
+ movq 0(%r15),%rax
+ movq 8(%r15),%rcx
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+ shrq $7,%r9
+ leaq (%rax,%rax,1),%r8
+ shrq $7,%r12
+ leaq (%rcx,%rcx,1),%r11
+ subq %r9,%rbx
+ subq %r12,%rdx
+ andq %rdi,%r8
+ andq %rdi,%r11
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r8,%rbx
+ xorq %r11,%rdx
+ movq %rbx,%r8
+ movq %rdx,%r11
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ leaq (%r8,%r8,1),%r9
+ shrq $7,%r13
+ leaq (%r11,%r11,1),%r12
+ subq %r10,%rbx
+ subq %r13,%rdx
+ andq %rdi,%r9
+ andq %rdi,%r12
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r9,%rbx
+ xorq %r12,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ xorq %rax,%r8
+ shrq $7,%r13
+ xorq %rcx,%r11
+ subq %r10,%rbx
+ subq %r13,%rdx
+ leaq (%r9,%r9,1),%r10
+ leaq (%r12,%r12,1),%r13
+ xorq %rax,%r9
+ xorq %rcx,%r12
+ andq %rdi,%r10
+ andq %rdi,%r13
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %rbx,%r10
+ xorq %rdx,%r13
+ xorq %r10,%rax
+ xorq %r13,%rcx
+ xorq %r10,%r8
+ xorq %r13,%r11
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ xorq %r10,%r9
+ xorq %r13,%r12
+ shrq $32,%rbx
+ shrq $32,%rdx
+ xorq %r8,%r10
+ xorq %r11,%r13
+ roll $8,%eax
+ roll $8,%ecx
+ xorq %r9,%r10
+ xorq %r12,%r13
+ roll $8,%ebx
+ roll $8,%edx
+ xorl %r10d,%eax
+ xorl %r13d,%ecx
+ shrq $32,%r10
+ shrq $32,%r13
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+ movq %r8,%r10
+ movq %r11,%r13
+ shrq $32,%r10
+ shrq $32,%r13
+ roll $24,%r8d
+ roll $24,%r11d
+ roll $24,%r10d
+ roll $24,%r13d
+ xorl %r8d,%eax
+ xorl %r11d,%ecx
+ movq %r9,%r8
+ movq %r12,%r11
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+ shrq $32,%r8
+ shrq $32,%r11
+ roll $16,%r9d
+ roll $16,%r12d
+ roll $16,%r8d
+ roll $16,%r11d
+ xorl %r9d,%eax
+ xorl %r12d,%ecx
+ xorl %r8d,%ebx
+ xorl %r11d,%edx
+ movl %eax,0(%r15)
+ movl %ebx,4(%r15)
+ movl %ecx,8(%r15)
+ movl %edx,12(%r15)
+ subl $1,%r14d
+ jnz .Lpermute
+ xorq %rax,%rax
+ movq 8(%rsp),%r15
+ movq 16(%rsp),%r14
+ movq 24(%rsp),%r13
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%rbp
+ movq 48(%rsp),%rbx
+ addq $56,%rsp
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.globl AES_cbc_encrypt
+.def AES_cbc_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+.globl asm_AES_cbc_encrypt
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+ movq 48(%rsp),%r9
+ cmpq $0,%rdx
+ je .Lcbc_epilogue
+ pushfq
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ cld
+ movl %r9d,%r9d
+ leaq .LAES_Te(%rip),%r14
+ cmpq $0,%r9
+ jne .Lcbc_picked_te
+ leaq .LAES_Td(%rip),%r14
+ movl OPENSSL_ia32cap_P(%rip),%r10d
+ cmpq $512,%rdx
+ jb .Lcbc_slow_prologue
+ testq $15,%rdx
+ jnz .Lcbc_slow_prologue
+ btl $IA32CAP_BIT0_HT,%r10d
+ jc .Lcbc_slow_prologue
+ leaq -88-248(%rsp),%r15
+ andq $-64,%r15
+ movq %r14,%r10
+ leaq 2304(%r14),%r11
+ movq %r15,%r12
+ andq $4095,%r10
+ andq $4095,%r11
+ andq $4095,%r12
+ cmpq %r11,%r12
+ jb .Lcbc_te_break_out
+ subq %r11,%r12
+ subq %r12,%r15
+ jmp .Lcbc_te_ok
+ subq %r10,%r12
+ andq $4095,%r12
+ addq $320,%r12
+ subq %r12,%r15
+.p2align 2
+ xchgq %rsp,%r15
+ movq %r15,16(%rsp)
+ movq %rdi,24(%rsp)
+ movq %rsi,32(%rsp)
+ movq %rdx,40(%rsp)
+ movq %rcx,48(%rsp)
+ movq %r8,56(%rsp)
+ movl $0,80+240(%rsp)
+ movq %r8,%rbp
+ movq %r9,%rbx
+ movq %rsi,%r9
+ movq %rdi,%r8
+ movq %rcx,%r15
+ movl 240(%r15),%eax
+ movq %r15,%r10
+ subq %r14,%r10
+ andq $4095,%r10
+ cmpq $2304,%r10
+ jb .Lcbc_do_ecopy
+ cmpq $4096-248,%r10
+ jb .Lcbc_skip_ecopy
+.p2align 2
+ movq %r15,%rsi
+ leaq 80(%rsp),%rdi
+ leaq 80(%rsp),%r15
+ movl $30,%ecx
+.long 0x90A548F3
+ movl %eax,(%rdi)
+ movq %r15,0(%rsp)
+ movl $18,%ecx
+.p2align 2
+ movq 0(%r14),%r10
+ movq 32(%r14),%r11
+ movq 64(%r14),%r12
+ movq 96(%r14),%r13
+ leaq 128(%r14),%r14
+ subl $1,%ecx
+ jnz .Lcbc_prefetch_te
+ leaq -2304(%r14),%r14
+ cmpq $0,%rbx
+ movl 0(%rbp),%eax
+ movl 4(%rbp),%ebx
+ movl 8(%rbp),%ecx
+ movl 12(%rbp),%edx
+.p2align 2
+ xorl 0(%r8),%eax
+ xorl 4(%r8),%ebx
+ xorl 8(%r8),%ecx
+ xorl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ call _x86_64_AES_encrypt
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ subq $16,%r10
+ testq $-16,%r10
+ movq %r10,40(%rsp)
+ jnz .Lcbc_fast_enc_loop
+ movq 56(%rsp),%rbp
+ movl %eax,0(%rbp)
+ movl %ebx,4(%rbp)
+ movl %ecx,8(%rbp)
+ movl %edx,12(%rbp)
+ jmp .Lcbc_fast_cleanup
+.p2align 4
+ cmpq %r8,%r9
+ je .Lcbc_fast_dec_in_place
+ movq %rbp,64(%rsp)
+.p2align 2
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ call _x86_64_AES_decrypt
+ movq 64(%rsp),%rbp
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ xorl 0(%rbp),%eax
+ xorl 4(%rbp),%ebx
+ xorl 8(%rbp),%ecx
+ xorl 12(%rbp),%edx
+ movq %r8,%rbp
+ subq $16,%r10
+ movq %r10,40(%rsp)
+ movq %rbp,64(%rsp)
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ jnz .Lcbc_fast_dec_loop
+ movq 56(%rsp),%r12
+ movq 0(%rbp),%r10
+ movq 8(%rbp),%r11
+ movq %r10,0(%r12)
+ movq %r11,8(%r12)
+ jmp .Lcbc_fast_cleanup
+.p2align 4
+ movq 0(%rbp),%r10
+ movq 8(%rbp),%r11
+ movq %r10,0+64(%rsp)
+ movq %r11,8+64(%rsp)
+.p2align 2
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ call _x86_64_AES_decrypt
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ xorl 0+64(%rsp),%eax
+ xorl 4+64(%rsp),%ebx
+ xorl 8+64(%rsp),%ecx
+ xorl 12+64(%rsp),%edx
+ movq 0(%r8),%r11
+ movq 8(%r8),%r12
+ subq $16,%r10
+ jz .Lcbc_fast_dec_in_place_done
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ movq %r10,40(%rsp)
+ jmp .Lcbc_fast_dec_in_place_loop
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+.p2align 2
+ cmpl $0,80+240(%rsp)
+ leaq 80(%rsp),%rdi
+ je .Lcbc_exit
+ movl $30,%ecx
+ xorq %rax,%rax
+.long 0x90AB48F3
+ jmp .Lcbc_exit
+.p2align 4
+ leaq -88(%rsp),%rbp
+ andq $-64,%rbp
+ leaq -88-63(%rcx),%r10
+ subq %rbp,%r10
+ negq %r10
+ andq $960,%r10
+ subq %r10,%rbp
+ xchgq %rsp,%rbp
+ movq %rbp,16(%rsp)
+ movq %r8,56(%rsp)
+ movq %r8,%rbp
+ movq %r9,%rbx
+ movq %rsi,%r9
+ movq %rdi,%r8
+ movq %rcx,%r15
+ movq %rdx,%r10
+ movl 240(%r15),%eax
+ movq %r15,0(%rsp)
+ shll $4,%eax
+ leaq (%r15,%rax,1),%rax
+ movq %rax,8(%rsp)
+ leaq 2048(%r14),%r14
+ leaq 768-8(%rsp),%rax
+ subq %r14,%rax
+ andq $768,%rax
+ leaq (%r14,%rax,1),%r14
+ cmpq $0,%rbx
+ testq $-16,%r10
+ movl 0(%rbp),%eax
+ movl 4(%rbp),%ebx
+ movl 8(%rbp),%ecx
+ movl 12(%rbp),%edx
+ jz .Lcbc_slow_enc_tail
+.p2align 2
+ xorl 0(%r8),%eax
+ xorl 4(%r8),%ebx
+ xorl 8(%r8),%ecx
+ xorl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ movq %r9,32(%rsp)
+ movq %r10,40(%rsp)
+ call _x86_64_AES_encrypt_compact
+ movq 24(%rsp),%r8
+ movq 32(%rsp),%r9
+ movq 40(%rsp),%r10
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ subq $16,%r10
+ testq $-16,%r10
+ jnz .Lcbc_slow_enc_loop
+ testq $15,%r10
+ jnz .Lcbc_slow_enc_tail
+ movq 56(%rsp),%rbp
+ movl %eax,0(%rbp)
+ movl %ebx,4(%rbp)
+ movl %ecx,8(%rbp)
+ movl %edx,12(%rbp)
+ jmp .Lcbc_exit
+.p2align 2
+ movq %rax,%r11
+ movq %rcx,%r12
+ movq %r10,%rcx
+ movq %r8,%rsi
+ movq %r9,%rdi
+.long 0x9066A4F3
+ movq $16,%rcx
+ subq %r10,%rcx
+ xorq %rax,%rax
+.long 0x9066AAF3
+ movq %r9,%r8
+ movq $16,%r10
+ movq %r11,%rax
+ movq %r12,%rcx
+ jmp .Lcbc_slow_enc_loop
+.p2align 4
+ shrq $3,%rax
+ addq %rax,%r14
+ movq 0(%rbp),%r11
+ movq 8(%rbp),%r12
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+.p2align 2
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ movq %r9,32(%rsp)
+ movq %r10,40(%rsp)
+ call _x86_64_AES_decrypt_compact
+ movq 24(%rsp),%r8
+ movq 32(%rsp),%r9
+ movq 40(%rsp),%r10
+ xorl 0+64(%rsp),%eax
+ xorl 4+64(%rsp),%ebx
+ xorl 8+64(%rsp),%ecx
+ xorl 12+64(%rsp),%edx
+ movq 0(%r8),%r11
+ movq 8(%r8),%r12
+ subq $16,%r10
+ jc .Lcbc_slow_dec_partial
+ jz .Lcbc_slow_dec_done
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ jmp .Lcbc_slow_dec_loop
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ jmp .Lcbc_exit
+.p2align 2
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+ movl %eax,0+64(%rsp)
+ movl %ebx,4+64(%rsp)
+ movl %ecx,8+64(%rsp)
+ movl %edx,12+64(%rsp)
+ movq %r9,%rdi
+ leaq 64(%rsp),%rsi
+ leaq 16(%r10),%rcx
+.long 0x9066A4F3
+ jmp .Lcbc_exit
+.p2align 4
+ movq 16(%rsp),%rsi
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+ popfq
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.p2align 6
+.long 0xa56363c6,0xa56363c6
+.long 0x847c7cf8,0x847c7cf8
+.long 0x997777ee,0x997777ee
+.long 0x8d7b7bf6,0x8d7b7bf6
+.long 0x0df2f2ff,0x0df2f2ff
+.long 0xbd6b6bd6,0xbd6b6bd6
+.long 0xb16f6fde,0xb16f6fde
+.long 0x54c5c591,0x54c5c591
+.long 0x50303060,0x50303060
+.long 0x03010102,0x03010102
+.long 0xa96767ce,0xa96767ce
+.long 0x7d2b2b56,0x7d2b2b56
+.long 0x19fefee7,0x19fefee7
+.long 0x62d7d7b5,0x62d7d7b5
+.long 0xe6abab4d,0xe6abab4d
+.long 0x9a7676ec,0x9a7676ec
+.long 0x45caca8f,0x45caca8f
+.long 0x9d82821f,0x9d82821f
+.long 0x40c9c989,0x40c9c989
+.long 0x877d7dfa,0x877d7dfa
+.long 0x15fafaef,0x15fafaef
+.long 0xeb5959b2,0xeb5959b2
+.long 0xc947478e,0xc947478e
+.long 0x0bf0f0fb,0x0bf0f0fb
+.long 0xecadad41,0xecadad41
+.long 0x67d4d4b3,0x67d4d4b3
+.long 0xfda2a25f,0xfda2a25f
+.long 0xeaafaf45,0xeaafaf45
+.long 0xbf9c9c23,0xbf9c9c23
+.long 0xf7a4a453,0xf7a4a453
+.long 0x967272e4,0x967272e4
+.long 0x5bc0c09b,0x5bc0c09b
+.long 0xc2b7b775,0xc2b7b775
+.long 0x1cfdfde1,0x1cfdfde1
+.long 0xae93933d,0xae93933d
+.long 0x6a26264c,0x6a26264c
+.long 0x5a36366c,0x5a36366c
+.long 0x413f3f7e,0x413f3f7e
+.long 0x02f7f7f5,0x02f7f7f5
+.long 0x4fcccc83,0x4fcccc83
+.long 0x5c343468,0x5c343468
+.long 0xf4a5a551,0xf4a5a551
+.long 0x34e5e5d1,0x34e5e5d1
+.long 0x08f1f1f9,0x08f1f1f9
+.long 0x937171e2,0x937171e2
+.long 0x73d8d8ab,0x73d8d8ab
+.long 0x53313162,0x53313162
+.long 0x3f15152a,0x3f15152a
+.long 0x0c040408,0x0c040408
+.long 0x52c7c795,0x52c7c795
+.long 0x65232346,0x65232346
+.long 0x5ec3c39d,0x5ec3c39d
+.long 0x28181830,0x28181830
+.long 0xa1969637,0xa1969637
+.long 0x0f05050a,0x0f05050a
+.long 0xb59a9a2f,0xb59a9a2f
+.long 0x0907070e,0x0907070e
+.long 0x36121224,0x36121224
+.long 0x9b80801b,0x9b80801b
+.long 0x3de2e2df,0x3de2e2df
+.long 0x26ebebcd,0x26ebebcd
+.long 0x6927274e,0x6927274e
+.long 0xcdb2b27f,0xcdb2b27f
+.long 0x9f7575ea,0x9f7575ea
+.long 0x1b090912,0x1b090912
+.long 0x9e83831d,0x9e83831d
+.long 0x742c2c58,0x742c2c58
+.long 0x2e1a1a34,0x2e1a1a34
+.long 0x2d1b1b36,0x2d1b1b36
+.long 0xb26e6edc,0xb26e6edc
+.long 0xee5a5ab4,0xee5a5ab4
+.long 0xfba0a05b,0xfba0a05b
+.long 0xf65252a4,0xf65252a4
+.long 0x4d3b3b76,0x4d3b3b76
+.long 0x61d6d6b7,0x61d6d6b7
+.long 0xceb3b37d,0xceb3b37d
+.long 0x7b292952,0x7b292952
+.long 0x3ee3e3dd,0x3ee3e3dd
+.long 0x712f2f5e,0x712f2f5e
+.long 0x97848413,0x97848413
+.long 0xf55353a6,0xf55353a6
+.long 0x68d1d1b9,0x68d1d1b9
+.long 0x00000000,0x00000000
+.long 0x2cededc1,0x2cededc1
+.long 0x60202040,0x60202040
+.long 0x1ffcfce3,0x1ffcfce3
+.long 0xc8b1b179,0xc8b1b179
+.long 0xed5b5bb6,0xed5b5bb6
+.long 0xbe6a6ad4,0xbe6a6ad4
+.long 0x46cbcb8d,0x46cbcb8d
+.long 0xd9bebe67,0xd9bebe67
+.long 0x4b393972,0x4b393972
+.long 0xde4a4a94,0xde4a4a94
+.long 0xd44c4c98,0xd44c4c98
+.long 0xe85858b0,0xe85858b0
+.long 0x4acfcf85,0x4acfcf85
+.long 0x6bd0d0bb,0x6bd0d0bb
+.long 0x2aefefc5,0x2aefefc5
+.long 0xe5aaaa4f,0xe5aaaa4f
+.long 0x16fbfbed,0x16fbfbed
+.long 0xc5434386,0xc5434386
+.long 0xd74d4d9a,0xd74d4d9a
+.long 0x55333366,0x55333366
+.long 0x94858511,0x94858511
+.long 0xcf45458a,0xcf45458a
+.long 0x10f9f9e9,0x10f9f9e9
+.long 0x06020204,0x06020204
+.long 0x817f7ffe,0x817f7ffe
+.long 0xf05050a0,0xf05050a0
+.long 0x443c3c78,0x443c3c78
+.long 0xba9f9f25,0xba9f9f25
+.long 0xe3a8a84b,0xe3a8a84b
+.long 0xf35151a2,0xf35151a2
+.long 0xfea3a35d,0xfea3a35d
+.long 0xc0404080,0xc0404080
+.long 0x8a8f8f05,0x8a8f8f05
+.long 0xad92923f,0xad92923f
+.long 0xbc9d9d21,0xbc9d9d21
+.long 0x48383870,0x48383870
+.long 0x04f5f5f1,0x04f5f5f1
+.long 0xdfbcbc63,0xdfbcbc63
+.long 0xc1b6b677,0xc1b6b677
+.long 0x75dadaaf,0x75dadaaf
+.long 0x63212142,0x63212142
+.long 0x30101020,0x30101020
+.long 0x1affffe5,0x1affffe5
+.long 0x0ef3f3fd,0x0ef3f3fd
+.long 0x6dd2d2bf,0x6dd2d2bf
+.long 0x4ccdcd81,0x4ccdcd81
+.long 0x140c0c18,0x140c0c18
+.long 0x35131326,0x35131326
+.long 0x2fececc3,0x2fececc3
+.long 0xe15f5fbe,0xe15f5fbe
+.long 0xa2979735,0xa2979735
+.long 0xcc444488,0xcc444488
+.long 0x3917172e,0x3917172e
+.long 0x57c4c493,0x57c4c493
+.long 0xf2a7a755,0xf2a7a755
+.long 0x827e7efc,0x827e7efc
+.long 0x473d3d7a,0x473d3d7a
+.long 0xac6464c8,0xac6464c8
+.long 0xe75d5dba,0xe75d5dba
+.long 0x2b191932,0x2b191932
+.long 0x957373e6,0x957373e6
+.long 0xa06060c0,0xa06060c0
+.long 0x98818119,0x98818119
+.long 0xd14f4f9e,0xd14f4f9e
+.long 0x7fdcdca3,0x7fdcdca3
+.long 0x66222244,0x66222244
+.long 0x7e2a2a54,0x7e2a2a54
+.long 0xab90903b,0xab90903b
+.long 0x8388880b,0x8388880b
+.long 0xca46468c,0xca46468c
+.long 0x29eeeec7,0x29eeeec7
+.long 0xd3b8b86b,0xd3b8b86b
+.long 0x3c141428,0x3c141428
+.long 0x79dedea7,0x79dedea7
+.long 0xe25e5ebc,0xe25e5ebc
+.long 0x1d0b0b16,0x1d0b0b16
+.long 0x76dbdbad,0x76dbdbad
+.long 0x3be0e0db,0x3be0e0db
+.long 0x56323264,0x56323264
+.long 0x4e3a3a74,0x4e3a3a74
+.long 0x1e0a0a14,0x1e0a0a14
+.long 0xdb494992,0xdb494992
+.long 0x0a06060c,0x0a06060c
+.long 0x6c242448,0x6c242448
+.long 0xe45c5cb8,0xe45c5cb8
+.long 0x5dc2c29f,0x5dc2c29f
+.long 0x6ed3d3bd,0x6ed3d3bd
+.long 0xefacac43,0xefacac43
+.long 0xa66262c4,0xa66262c4
+.long 0xa8919139,0xa8919139
+.long 0xa4959531,0xa4959531
+.long 0x37e4e4d3,0x37e4e4d3
+.long 0x8b7979f2,0x8b7979f2
+.long 0x32e7e7d5,0x32e7e7d5
+.long 0x43c8c88b,0x43c8c88b
+.long 0x5937376e,0x5937376e
+.long 0xb76d6dda,0xb76d6dda
+.long 0x8c8d8d01,0x8c8d8d01
+.long 0x64d5d5b1,0x64d5d5b1
+.long 0xd24e4e9c,0xd24e4e9c
+.long 0xe0a9a949,0xe0a9a949
+.long 0xb46c6cd8,0xb46c6cd8
+.long 0xfa5656ac,0xfa5656ac
+.long 0x07f4f4f3,0x07f4f4f3
+.long 0x25eaeacf,0x25eaeacf
+.long 0xaf6565ca,0xaf6565ca
+.long 0x8e7a7af4,0x8e7a7af4
+.long 0xe9aeae47,0xe9aeae47
+.long 0x18080810,0x18080810
+.long 0xd5baba6f,0xd5baba6f
+.long 0x887878f0,0x887878f0
+.long 0x6f25254a,0x6f25254a
+.long 0x722e2e5c,0x722e2e5c
+.long 0x241c1c38,0x241c1c38
+.long 0xf1a6a657,0xf1a6a657
+.long 0xc7b4b473,0xc7b4b473
+.long 0x51c6c697,0x51c6c697
+.long 0x23e8e8cb,0x23e8e8cb
+.long 0x7cdddda1,0x7cdddda1
+.long 0x9c7474e8,0x9c7474e8
+.long 0x211f1f3e,0x211f1f3e
+.long 0xdd4b4b96,0xdd4b4b96
+.long 0xdcbdbd61,0xdcbdbd61
+.long 0x868b8b0d,0x868b8b0d
+.long 0x858a8a0f,0x858a8a0f
+.long 0x907070e0,0x907070e0
+.long 0x423e3e7c,0x423e3e7c
+.long 0xc4b5b571,0xc4b5b571
+.long 0xaa6666cc,0xaa6666cc
+.long 0xd8484890,0xd8484890
+.long 0x05030306,0x05030306
+.long 0x01f6f6f7,0x01f6f6f7
+.long 0x120e0e1c,0x120e0e1c
+.long 0xa36161c2,0xa36161c2
+.long 0x5f35356a,0x5f35356a
+.long 0xf95757ae,0xf95757ae
+.long 0xd0b9b969,0xd0b9b969
+.long 0x91868617,0x91868617
+.long 0x58c1c199,0x58c1c199
+.long 0x271d1d3a,0x271d1d3a
+.long 0xb99e9e27,0xb99e9e27
+.long 0x38e1e1d9,0x38e1e1d9
+.long 0x13f8f8eb,0x13f8f8eb
+.long 0xb398982b,0xb398982b
+.long 0x33111122,0x33111122
+.long 0xbb6969d2,0xbb6969d2
+.long 0x70d9d9a9,0x70d9d9a9
+.long 0x898e8e07,0x898e8e07
+.long 0xa7949433,0xa7949433
+.long 0xb69b9b2d,0xb69b9b2d
+.long 0x221e1e3c,0x221e1e3c
+.long 0x92878715,0x92878715
+.long 0x20e9e9c9,0x20e9e9c9
+.long 0x49cece87,0x49cece87
+.long 0xff5555aa,0xff5555aa
+.long 0x78282850,0x78282850
+.long 0x7adfdfa5,0x7adfdfa5
+.long 0x8f8c8c03,0x8f8c8c03
+.long 0xf8a1a159,0xf8a1a159
+.long 0x80898909,0x80898909
+.long 0x170d0d1a,0x170d0d1a
+.long 0xdabfbf65,0xdabfbf65
+.long 0x31e6e6d7,0x31e6e6d7
+.long 0xc6424284,0xc6424284
+.long 0xb86868d0,0xb86868d0
+.long 0xc3414182,0xc3414182
+.long 0xb0999929,0xb0999929
+.long 0x772d2d5a,0x772d2d5a
+.long 0x110f0f1e,0x110f0f1e
+.long 0xcbb0b07b,0xcbb0b07b
+.long 0xfc5454a8,0xfc5454a8
+.long 0xd6bbbb6d,0xd6bbbb6d
+.long 0x3a16162c,0x3a16162c
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.long 0x00000001, 0x00000002, 0x00000004, 0x00000008
+.long 0x00000010, 0x00000020, 0x00000040, 0x00000080
+.long 0x0000001b, 0x00000036, 0x80808080, 0x80808080
+.long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b
+.p2align 6
+.long 0x50a7f451,0x50a7f451
+.long 0x5365417e,0x5365417e
+.long 0xc3a4171a,0xc3a4171a
+.long 0x965e273a,0x965e273a
+.long 0xcb6bab3b,0xcb6bab3b
+.long 0xf1459d1f,0xf1459d1f
+.long 0xab58faac,0xab58faac
+.long 0x9303e34b,0x9303e34b
+.long 0x55fa3020,0x55fa3020
+.long 0xf66d76ad,0xf66d76ad
+.long 0x9176cc88,0x9176cc88
+.long 0x254c02f5,0x254c02f5
+.long 0xfcd7e54f,0xfcd7e54f
+.long 0xd7cb2ac5,0xd7cb2ac5
+.long 0x80443526,0x80443526
+.long 0x8fa362b5,0x8fa362b5
+.long 0x495ab1de,0x495ab1de
+.long 0x671bba25,0x671bba25
+.long 0x980eea45,0x980eea45
+.long 0xe1c0fe5d,0xe1c0fe5d
+.long 0x02752fc3,0x02752fc3
+.long 0x12f04c81,0x12f04c81
+.long 0xa397468d,0xa397468d
+.long 0xc6f9d36b,0xc6f9d36b
+.long 0xe75f8f03,0xe75f8f03
+.long 0x959c9215,0x959c9215
+.long 0xeb7a6dbf,0xeb7a6dbf
+.long 0xda595295,0xda595295
+.long 0x2d83bed4,0x2d83bed4
+.long 0xd3217458,0xd3217458
+.long 0x2969e049,0x2969e049
+.long 0x44c8c98e,0x44c8c98e
+.long 0x6a89c275,0x6a89c275
+.long 0x78798ef4,0x78798ef4
+.long 0x6b3e5899,0x6b3e5899
+.long 0xdd71b927,0xdd71b927
+.long 0xb64fe1be,0xb64fe1be
+.long 0x17ad88f0,0x17ad88f0
+.long 0x66ac20c9,0x66ac20c9
+.long 0xb43ace7d,0xb43ace7d
+.long 0x184adf63,0x184adf63
+.long 0x82311ae5,0x82311ae5
+.long 0x60335197,0x60335197
+.long 0x457f5362,0x457f5362
+.long 0xe07764b1,0xe07764b1
+.long 0x84ae6bbb,0x84ae6bbb
+.long 0x1ca081fe,0x1ca081fe
+.long 0x942b08f9,0x942b08f9
+.long 0x58684870,0x58684870
+.long 0x19fd458f,0x19fd458f
+.long 0x876cde94,0x876cde94
+.long 0xb7f87b52,0xb7f87b52
+.long 0x23d373ab,0x23d373ab
+.long 0xe2024b72,0xe2024b72
+.long 0x578f1fe3,0x578f1fe3
+.long 0x2aab5566,0x2aab5566
+.long 0x0728ebb2,0x0728ebb2
+.long 0x03c2b52f,0x03c2b52f
+.long 0x9a7bc586,0x9a7bc586
+.long 0xa50837d3,0xa50837d3
+.long 0xf2872830,0xf2872830
+.long 0xb2a5bf23,0xb2a5bf23
+.long 0xba6a0302,0xba6a0302
+.long 0x5c8216ed,0x5c8216ed
+.long 0x2b1ccf8a,0x2b1ccf8a
+.long 0x92b479a7,0x92b479a7
+.long 0xf0f207f3,0xf0f207f3
+.long 0xa1e2694e,0xa1e2694e
+.long 0xcdf4da65,0xcdf4da65
+.long 0xd5be0506,0xd5be0506
+.long 0x1f6234d1,0x1f6234d1
+.long 0x8afea6c4,0x8afea6c4
+.long 0x9d532e34,0x9d532e34
+.long 0xa055f3a2,0xa055f3a2
+.long 0x32e18a05,0x32e18a05
+.long 0x75ebf6a4,0x75ebf6a4
+.long 0x39ec830b,0x39ec830b
+.long 0xaaef6040,0xaaef6040
+.long 0x069f715e,0x069f715e
+.long 0x51106ebd,0x51106ebd
+.long 0xf98a213e,0xf98a213e
+.long 0x3d06dd96,0x3d06dd96
+.long 0xae053edd,0xae053edd
+.long 0x46bde64d,0x46bde64d
+.long 0xb58d5491,0xb58d5491
+.long 0x055dc471,0x055dc471
+.long 0x6fd40604,0x6fd40604
+.long 0xff155060,0xff155060
+.long 0x24fb9819,0x24fb9819
+.long 0x97e9bdd6,0x97e9bdd6
+.long 0xcc434089,0xcc434089
+.long 0x779ed967,0x779ed967
+.long 0xbd42e8b0,0xbd42e8b0
+.long 0x888b8907,0x888b8907
+.long 0x385b19e7,0x385b19e7
+.long 0xdbeec879,0xdbeec879
+.long 0x470a7ca1,0x470a7ca1
+.long 0xe90f427c,0xe90f427c
+.long 0xc91e84f8,0xc91e84f8
+.long 0x00000000,0x00000000
+.long 0x83868009,0x83868009
+.long 0x48ed2b32,0x48ed2b32
+.long 0xac70111e,0xac70111e
+.long 0x4e725a6c,0x4e725a6c
+.long 0xfbff0efd,0xfbff0efd
+.long 0x5638850f,0x5638850f
+.long 0x1ed5ae3d,0x1ed5ae3d
+.long 0x27392d36,0x27392d36
+.long 0x64d90f0a,0x64d90f0a
+.long 0x21a65c68,0x21a65c68
+.long 0xd1545b9b,0xd1545b9b
+.long 0x3a2e3624,0x3a2e3624
+.long 0xb1670a0c,0xb1670a0c
+.long 0x0fe75793,0x0fe75793
+.long 0xd296eeb4,0xd296eeb4
+.long 0x9e919b1b,0x9e919b1b
+.long 0x4fc5c080,0x4fc5c080
+.long 0xa220dc61,0xa220dc61
+.long 0x694b775a,0x694b775a
+.long 0x161a121c,0x161a121c
+.long 0x0aba93e2,0x0aba93e2
+.long 0xe52aa0c0,0xe52aa0c0
+.long 0x43e0223c,0x43e0223c
+.long 0x1d171b12,0x1d171b12
+.long 0x0b0d090e,0x0b0d090e
+.long 0xadc78bf2,0xadc78bf2
+.long 0xb9a8b62d,0xb9a8b62d
+.long 0xc8a91e14,0xc8a91e14
+.long 0x8519f157,0x8519f157
+.long 0x4c0775af,0x4c0775af
+.long 0xbbdd99ee,0xbbdd99ee
+.long 0xfd607fa3,0xfd607fa3
+.long 0x9f2601f7,0x9f2601f7
+.long 0xbcf5725c,0xbcf5725c
+.long 0xc53b6644,0xc53b6644
+.long 0x347efb5b,0x347efb5b
+.long 0x7629438b,0x7629438b
+.long 0xdcc623cb,0xdcc623cb
+.long 0x68fcedb6,0x68fcedb6
+.long 0x63f1e4b8,0x63f1e4b8
+.long 0xcadc31d7,0xcadc31d7
+.long 0x10856342,0x10856342
+.long 0x40229713,0x40229713
+.long 0x2011c684,0x2011c684
+.long 0x7d244a85,0x7d244a85
+.long 0xf83dbbd2,0xf83dbbd2
+.long 0x1132f9ae,0x1132f9ae
+.long 0x6da129c7,0x6da129c7
+.long 0x4b2f9e1d,0x4b2f9e1d
+.long 0xf330b2dc,0xf330b2dc
+.long 0xec52860d,0xec52860d
+.long 0xd0e3c177,0xd0e3c177
+.long 0x6c16b32b,0x6c16b32b
+.long 0x99b970a9,0x99b970a9
+.long 0xfa489411,0xfa489411
+.long 0x2264e947,0x2264e947
+.long 0xc48cfca8,0xc48cfca8
+.long 0x1a3ff0a0,0x1a3ff0a0
+.long 0xd82c7d56,0xd82c7d56
+.long 0xef903322,0xef903322
+.long 0xc74e4987,0xc74e4987
+.long 0xc1d138d9,0xc1d138d9
+.long 0xfea2ca8c,0xfea2ca8c
+.long 0x360bd498,0x360bd498
+.long 0xcf81f5a6,0xcf81f5a6
+.long 0x28de7aa5,0x28de7aa5
+.long 0x268eb7da,0x268eb7da
+.long 0xa4bfad3f,0xa4bfad3f
+.long 0xe49d3a2c,0xe49d3a2c
+.long 0x0d927850,0x0d927850
+.long 0x9bcc5f6a,0x9bcc5f6a
+.long 0x62467e54,0x62467e54
+.long 0xc2138df6,0xc2138df6
+.long 0xe8b8d890,0xe8b8d890
+.long 0x5ef7392e,0x5ef7392e
+.long 0xf5afc382,0xf5afc382
+.long 0xbe805d9f,0xbe805d9f
+.long 0x7c93d069,0x7c93d069
+.long 0xa92dd56f,0xa92dd56f
+.long 0xb31225cf,0xb31225cf
+.long 0x3b99acc8,0x3b99acc8
+.long 0xa77d1810,0xa77d1810
+.long 0x6e639ce8,0x6e639ce8
+.long 0x7bbb3bdb,0x7bbb3bdb
+.long 0x097826cd,0x097826cd
+.long 0xf418596e,0xf418596e
+.long 0x01b79aec,0x01b79aec
+.long 0xa89a4f83,0xa89a4f83
+.long 0x656e95e6,0x656e95e6
+.long 0x7ee6ffaa,0x7ee6ffaa
+.long 0x08cfbc21,0x08cfbc21
+.long 0xe6e815ef,0xe6e815ef
+.long 0xd99be7ba,0xd99be7ba
+.long 0xce366f4a,0xce366f4a
+.long 0xd4099fea,0xd4099fea
+.long 0xd67cb029,0xd67cb029
+.long 0xafb2a431,0xafb2a431
+.long 0x31233f2a,0x31233f2a
+.long 0x3094a5c6,0x3094a5c6
+.long 0xc066a235,0xc066a235
+.long 0x37bc4e74,0x37bc4e74
+.long 0xa6ca82fc,0xa6ca82fc
+.long 0xb0d090e0,0xb0d090e0
+.long 0x15d8a733,0x15d8a733
+.long 0x4a9804f1,0x4a9804f1
+.long 0xf7daec41,0xf7daec41
+.long 0x0e50cd7f,0x0e50cd7f
+.long 0x2ff69117,0x2ff69117
+.long 0x8dd64d76,0x8dd64d76
+.long 0x4db0ef43,0x4db0ef43
+.long 0x544daacc,0x544daacc
+.long 0xdf0496e4,0xdf0496e4
+.long 0xe3b5d19e,0xe3b5d19e
+.long 0x1b886a4c,0x1b886a4c
+.long 0xb81f2cc1,0xb81f2cc1
+.long 0x7f516546,0x7f516546
+.long 0x04ea5e9d,0x04ea5e9d
+.long 0x5d358c01,0x5d358c01
+.long 0x737487fa,0x737487fa
+.long 0x2e410bfb,0x2e410bfb
+.long 0x5a1d67b3,0x5a1d67b3
+.long 0x52d2db92,0x52d2db92
+.long 0x335610e9,0x335610e9
+.long 0x1347d66d,0x1347d66d
+.long 0x8c61d79a,0x8c61d79a
+.long 0x7a0ca137,0x7a0ca137
+.long 0x8e14f859,0x8e14f859
+.long 0x893c13eb,0x893c13eb
+.long 0xee27a9ce,0xee27a9ce
+.long 0x35c961b7,0x35c961b7
+.long 0xede51ce1,0xede51ce1
+.long 0x3cb1477a,0x3cb1477a
+.long 0x59dfd29c,0x59dfd29c
+.long 0x3f73f255,0x3f73f255
+.long 0x79ce1418,0x79ce1418
+.long 0xbf37c773,0xbf37c773
+.long 0xeacdf753,0xeacdf753
+.long 0x5baafd5f,0x5baafd5f
+.long 0x146f3ddf,0x146f3ddf
+.long 0x86db4478,0x86db4478
+.long 0x81f3afca,0x81f3afca
+.long 0x3ec468b9,0x3ec468b9
+.long 0x2c342438,0x2c342438
+.long 0x5f40a3c2,0x5f40a3c2
+.long 0x72c31d16,0x72c31d16
+.long 0x0c25e2bc,0x0c25e2bc
+.long 0x8b493c28,0x8b493c28
+.long 0x41950dff,0x41950dff
+.long 0x7101a839,0x7101a839
+.long 0xdeb30c08,0xdeb30c08
+.long 0x9ce4b4d8,0x9ce4b4d8
+.long 0x90c15664,0x90c15664
+.long 0x6184cb7b,0x6184cb7b
+.long 0x70b632d5,0x70b632d5
+.long 0x745c6c48,0x745c6c48
+.long 0x4257b8d0,0x4257b8d0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align 6
+.def block_se_handler; .scl 3; .type 32; .endef
+.p2align 4
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+ movq 8(%r9),%rsi
+ movq 56(%r9),%r11
+ movl 0(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jb .Lin_block_prologue
+ movq 152(%r8),%rax
+ movl 4(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jae .Lin_block_prologue
+ movq 24(%rax),%rax
+ leaq 48(%rax),%rax
+ movq -8(%rax),%rbx
+ movq -16(%rax),%rbp
+ movq -24(%rax),%r12
+ movq -32(%rax),%r13
+ movq -40(%rax),%r14
+ movq -48(%rax),%r15
+ movq %rbx,144(%r8)
+ movq %rbp,160(%r8)
+ movq %r12,216(%r8)
+ movq %r13,224(%r8)
+ movq %r14,232(%r8)
+ movq %r15,240(%r8)
+ movq 8(%rax),%rdi
+ movq 16(%rax),%rsi
+ movq %rax,152(%r8)
+ movq %rsi,168(%r8)
+ movq %rdi,176(%r8)
+ jmp .Lcommon_seh_exit
+.def key_se_handler; .scl 3; .type 32; .endef
+.p2align 4
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+ movq 8(%r9),%rsi
+ movq 56(%r9),%r11
+ movl 0(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jb .Lin_key_prologue
+ movq 152(%r8),%rax
+ movl 4(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jae .Lin_key_prologue
+ leaq 56(%rax),%rax
+ movq -8(%rax),%rbx
+ movq -16(%rax),%rbp
+ movq -24(%rax),%r12
+ movq -32(%rax),%r13
+ movq -40(%rax),%r14
+ movq -48(%rax),%r15
+ movq %rbx,144(%r8)
+ movq %rbp,160(%r8)
+ movq %r12,216(%r8)
+ movq %r13,224(%r8)
+ movq %r14,232(%r8)
+ movq %r15,240(%r8)
+ movq 8(%rax),%rdi
+ movq 16(%rax),%rsi
+ movq %rax,152(%r8)
+ movq %rsi,168(%r8)
+ movq %rdi,176(%r8)
+ jmp .Lcommon_seh_exit
+.def cbc_se_handler; .scl 3; .type 32; .endef
+.p2align 4
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+ leaq .Lcbc_prologue(%rip),%r10
+ cmpq %r10,%rbx
+ jb .Lin_cbc_prologue
+ leaq .Lcbc_fast_body(%rip),%r10
+ cmpq %r10,%rbx
+ jb .Lin_cbc_frame_setup
+ leaq .Lcbc_slow_prologue(%rip),%r10
+ cmpq %r10,%rbx
+ jb .Lin_cbc_body
+ leaq .Lcbc_slow_body(%rip),%r10
+ cmpq %r10,%rbx
+ jb .Lin_cbc_frame_setup
+ movq 152(%r8),%rax
+ leaq .Lcbc_epilogue(%rip),%r10
+ cmpq %r10,%rbx
+ jae .Lin_cbc_prologue
+ leaq 8(%rax),%rax
+ leaq .Lcbc_popfq(%rip),%r10
+ cmpq %r10,%rbx
+ jae .Lin_cbc_prologue
+ movq 8(%rax),%rax
+ leaq 56(%rax),%rax
+ movq -16(%rax),%rbx
+ movq -24(%rax),%rbp
+ movq -32(%rax),%r12
+ movq -40(%rax),%r13
+ movq -48(%rax),%r14
+ movq -56(%rax),%r15
+ movq %rbx,144(%r8)
+ movq %rbp,160(%r8)
+ movq %r12,216(%r8)
+ movq %r13,224(%r8)
+ movq %r14,232(%r8)
+ movq %r15,240(%r8)
+ movq 8(%rax),%rdi
+ movq 16(%rax),%rsi
+ movq %rax,152(%r8)
+ movq %rsi,168(%r8)
+ movq %rdi,176(%r8)
+ movq 40(%r9),%rdi
+ movq %r8,%rsi
+ movl $154,%ecx
+.long 0xa548f3fc
+ movq %r9,%rsi
+ xorq %rcx,%rcx
+ movq 8(%rsi),%rdx
+ movq 0(%rsi),%r8
+ movq 16(%rsi),%r9
+ movq 40(%rsi),%r10
+ leaq 56(%rsi),%r11
+ leaq 24(%rsi),%r12
+ movq %r10,32(%rsp)
+ movq %r11,40(%rsp)
+ movq %r12,48(%rsp)
+ movq %rcx,56(%rsp)
+ call *__imp_RtlVirtualUnwind(%rip)
+ movl $1,%eax
+ addq $64,%rsp
+ popfq
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ popq %rdi
+ popq %rsi
+ retq
+.section .pdata
+.p2align 2
+.rva .LSEH_begin_AES_encrypt
+.rva .LSEH_end_AES_encrypt
+.rva .LSEH_info_AES_encrypt
+.rva .LSEH_begin_AES_decrypt
+.rva .LSEH_end_AES_decrypt
+.rva .LSEH_info_AES_decrypt
+.rva .LSEH_begin_AES_set_encrypt_key
+.rva .LSEH_end_AES_set_encrypt_key
+.rva .LSEH_info_AES_set_encrypt_key
+.rva .LSEH_begin_AES_set_decrypt_key
+.rva .LSEH_end_AES_set_decrypt_key
+.rva .LSEH_info_AES_set_decrypt_key
+.rva .LSEH_begin_AES_cbc_encrypt
+.rva .LSEH_end_AES_cbc_encrypt
+.rva .LSEH_info_AES_cbc_encrypt
+.section .xdata
+.p2align 3
+.byte 9,0,0,0
+.rva block_se_handler
+.rva .Lenc_prologue,.Lenc_epilogue
+.byte 9,0,0,0
+.rva block_se_handler
+.rva .Ldec_prologue,.Ldec_epilogue
+.byte 9,0,0,0
+.rva key_se_handler
+.rva .Lenc_key_prologue,.Lenc_key_epilogue
+.byte 9,0,0,0
+.rva key_se_handler
+.rva .Ldec_key_prologue,.Ldec_key_epilogue
+.byte 9,0,0,0
+.rva cbc_se_handler
diff --git a/crypto/libressl/crypto/aes/aes_cbc.c b/crypto/libressl/crypto/aes/aes_cbc.c
new file mode 100644
index 0000000..5e76f6e
--- /dev/null
+++ b/crypto/libressl/crypto/aes/aes_cbc.c
@@ -0,0 +1,65 @@
+/* $OpenBSD: aes_cbc.c,v 1.12 2014/06/12 15:49:27 deraadt Exp $ */
+/* ====================================================================
+ * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. ("
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ *
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit ("
+ *
+ * ====================================================================
+ *
+ */
+#include <openssl/aes.h>
+#include <openssl/modes.h>
+AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const AES_KEY *key, unsigned char *ivec, const int enc)
+ if (enc)
+ CRYPTO_cbc128_encrypt(in, out, len, key, ivec,
+ (block128_f)AES_encrypt);
+ else
+ CRYPTO_cbc128_decrypt(in, out, len, key, ivec,
+ (block128_f)AES_decrypt);
diff --git a/crypto/libressl/crypto/aes/aes_cfb.c b/crypto/libressl/crypto/aes/aes_cfb.c
new file mode 100644
index 0000000..a6384f9
--- /dev/null
+++ b/crypto/libressl/crypto/aes/aes_cfb.c
@@ -0,0 +1,84 @@
+/* $OpenBSD: aes_cfb.c,v 1.8 2014/06/12 15:49:27 deraadt Exp $ */
+/* ====================================================================
+ * Copyright (c) 2002-2006 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. ("
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ *
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit ("
+ *
+ * ====================================================================
+ *
+ */
+#include <openssl/aes.h>
+#include <openssl/modes.h>
+/* The input and output encrypted as though 128bit cfb mode is being
+ * used. The extra state information to record how much of the
+ * 128bit block we have used is contained in *num;
+ */
+AES_cfb128_encrypt(const unsigned char *in, unsigned char *out, size_t length,
+ const AES_KEY *key, unsigned char *ivec, int *num, const int enc)
+ CRYPTO_cfb128_encrypt(in, out, length, key, ivec, num, enc,
+ (block128_f)AES_encrypt);
+/* N.B. This expects the input to be packed, MS bit first */
+AES_cfb1_encrypt(const unsigned char *in, unsigned char *out, size_t length,
+ const AES_KEY *key, unsigned char *ivec, int *num, const int enc)
+ CRYPTO_cfb128_1_encrypt(in, out, length, key, ivec, num, enc,
+ (block128_f)AES_encrypt);
+AES_cfb8_encrypt(const unsigned char *in, unsigned char *out, size_t length,
+ const AES_KEY *key, unsigned char *ivec, int *num, const int enc)
+ CRYPTO_cfb128_8_encrypt(in, out, length, key, ivec, num, enc,
+ (block128_f)AES_encrypt);
diff --git a/crypto/libressl/crypto/aes/aes_core.c b/crypto/libressl/crypto/aes/aes_core.c
new file mode 100644
index 0000000..1b8a24c
--- /dev/null
+++ b/crypto/libressl/crypto/aes/aes_core.c
@@ -0,0 +1,1374 @@
+/* $OpenBSD: aes_core.c,v 1.13 2015/11/05 21:59:13 miod Exp $ */
+ * rijndael-alg-fst.c
+ *
+ * @version 3.0 (December 2000)
+ *
+ * Optimised ANSI C code for the Rijndael cipher (now AES)
+ *
+ * @author Vincent Rijmen <>
+ * @author Antoon Bosselaers <>
+ * @author Paulo Barreto <>
+ *
+ * This code is hereby placed in the public domain.
+ *
+ */
+/* Note: rewritten a little bit to provide error control and an OpenSSL-
+ compatible API */
+#ifndef AES_DEBUG
+# ifndef NDEBUG
+# define NDEBUG
+# endif
+#include <stdlib.h>
+#include <openssl/aes.h>
+#include "aes_locl.h"
+#ifndef AES_ASM
+Te0[x] = S [x].[02, 01, 01, 03];
+Te1[x] = S [x].[03, 02, 01, 01];
+Te2[x] = S [x].[01, 03, 02, 01];
+Te3[x] = S [x].[01, 01, 03, 02];
+Td0[x] = Si[x].[0e, 09, 0d, 0b];
+Td1[x] = Si[x].[0b, 0e, 09, 0d];
+Td2[x] = Si[x].[0d, 0b, 0e, 09];
+Td3[x] = Si[x].[09, 0d, 0b, 0e];
+Td4[x] = Si[x].[01];
+static const u32 Te0[256] = {
+ 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
+ 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
+ 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
+ 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
+ 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
+ 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
+ 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
+ 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
+ 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
+ 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
+ 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
+ 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
+ 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
+ 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
+ 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
+ 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
+ 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
+ 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
+ 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
+ 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
+ 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
+ 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
+ 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
+ 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
+ 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
+ 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
+ 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
+ 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
+ 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
+ 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
+ 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
+ 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
+ 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
+ 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
+ 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
+ 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
+ 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
+ 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
+ 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
+ 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
+ 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
+ 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
+ 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
+ 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
+ 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
+ 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
+ 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
+ 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
+ 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
+ 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
+ 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
+ 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
+ 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
+ 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
+ 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
+ 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
+ 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
+ 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
+ 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
+ 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
+ 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
+ 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
+ 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
+ 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
+static const u32 Te1[256] = {
+ 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
+ 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
+ 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
+ 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
+ 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
+ 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
+ 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
+ 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
+ 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
+ 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
+ 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
+ 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
+ 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
+ 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
+ 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
+ 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
+ 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
+ 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
+ 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
+ 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
+ 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
+ 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
+ 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
+ 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
+ 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
+ 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
+ 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
+ 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
+ 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
+ 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
+ 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
+ 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
+ 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
+ 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
+ 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
+ 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
+ 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
+ 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
+ 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
+ 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
+ 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
+ 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
+ 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
+ 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
+ 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
+ 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
+ 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
+ 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
+ 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
+ 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
+ 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
+ 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
+ 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
+ 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
+ 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
+ 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
+ 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
+ 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
+ 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
+ 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
+ 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
+ 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
+ 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
+ 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
+static const u32 Te2[256] = {
+ 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
+ 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
+ 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
+ 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
+ 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
+ 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
+ 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
+ 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
+ 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
+ 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
+ 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
+ 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
+ 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
+ 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
+ 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
+ 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
+ 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
+ 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
+ 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
+ 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
+ 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
+ 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
+ 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
+ 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
+ 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
+ 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
+ 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
+ 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
+ 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
+ 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
+ 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
+ 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
+ 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
+ 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
+ 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
+ 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
+ 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
+ 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
+ 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
+ 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
+ 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
+ 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
+ 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
+ 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
+ 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
+ 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
+ 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
+ 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
+ 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
+ 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
+ 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
+ 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
+ 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
+ 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
+ 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
+ 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
+ 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
+ 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
+ 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
+ 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
+ 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
+ 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
+ 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
+ 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
+static const u32 Te3[256] = {
+ 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
+ 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
+ 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
+ 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
+ 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
+ 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
+ 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
+ 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
+ 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
+ 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
+ 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
+ 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
+ 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
+ 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
+ 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
+ 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
+ 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
+ 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
+ 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
+ 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
+ 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
+ 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
+ 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
+ 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
+ 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
+ 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
+ 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
+ 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
+ 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
+ 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
+ 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
+ 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
+ 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
+ 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
+ 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
+ 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
+ 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
+ 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
+ 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
+ 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
+ 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
+ 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
+ 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
+ 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
+ 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
+ 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
+ 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
+ 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
+ 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
+ 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
+ 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
+ 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
+ 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
+ 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
+ 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
+ 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
+ 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
+ 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
+ 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
+ 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
+ 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
+ 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
+ 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
+ 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
+static const u32 Td0[256] = {
+ 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
+ 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
+ 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
+ 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
+ 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
+ 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
+ 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
+ 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
+ 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
+ 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
+ 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
+ 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
+ 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
+ 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
+ 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
+ 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
+ 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
+ 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
+ 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
+ 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
+ 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
+ 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
+ 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
+ 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
+ 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
+ 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
+ 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
+ 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
+ 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
+ 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
+ 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
+ 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
+ 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
+ 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
+ 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
+ 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
+ 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
+ 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
+ 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
+ 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
+ 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
+ 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
+ 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
+ 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
+ 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
+ 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
+ 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
+ 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
+ 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
+ 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
+ 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
+ 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
+ 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
+ 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
+ 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
+ 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
+ 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
+ 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
+ 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
+ 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
+ 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
+ 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
+ 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
+ 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
+static const u32 Td1[256] = {
+ 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
+ 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
+ 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
+ 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
+ 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
+ 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
+ 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
+ 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
+ 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
+ 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
+ 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
+ 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
+ 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
+ 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
+ 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
+ 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
+ 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
+ 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
+ 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
+ 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
+ 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
+ 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
+ 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
+ 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
+ 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
+ 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
+ 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
+ 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
+ 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
+ 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
+ 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
+ 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
+ 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
+ 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
+ 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
+ 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
+ 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
+ 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
+ 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
+ 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
+ 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
+ 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
+ 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
+ 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
+ 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
+ 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
+ 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
+ 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
+ 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
+ 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
+ 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
+ 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
+ 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
+ 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
+ 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
+ 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
+ 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
+ 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
+ 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
+ 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
+ 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
+ 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
+ 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
+ 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
+static const u32 Td2[256] = {
+ 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
+ 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
+ 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
+ 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
+ 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
+ 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
+ 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
+ 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
+ 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
+ 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
+ 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
+ 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
+ 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
+ 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
+ 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
+ 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
+ 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
+ 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
+ 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
+ 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
+ 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
+ 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
+ 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
+ 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
+ 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
+ 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
+ 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
+ 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
+ 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
+ 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
+ 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
+ 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
+ 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
+ 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
+ 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
+ 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
+ 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
+ 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
+ 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
+ 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
+ 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
+ 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
+ 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
+ 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
+ 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
+ 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
+ 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
+ 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
+ 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
+ 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
+ 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
+ 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
+ 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
+ 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
+ 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
+ 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
+ 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
+ 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
+ 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
+ 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
+ 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
+ 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
+ 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
+ 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
+static const u32 Td3[256] = {
+ 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
+ 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
+ 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
+ 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
+ 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
+ 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
+ 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
+ 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
+ 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
+ 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
+ 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
+ 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
+ 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
+ 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
+ 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
+ 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
+ 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
+ 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
+ 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
+ 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
+ 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
+ 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
+ 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
+ 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
+ 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
+ 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
+ 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
+ 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
+ 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
+ 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
+ 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
+ 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
+ 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
+ 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
+ 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
+ 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
+ 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
+ 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
+ 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
+ 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
+ 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
+ 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
+ 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
+ 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
+ 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
+ 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
+ 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
+ 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
+ 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
+ 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
+ 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
+ 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
+ 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
+ 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
+ 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
+ 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
+ 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
+ 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
+ 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
+ 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
+ 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
+ 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
+ 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
+ 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
+static const u8 Td4[256] = {
+ 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
+ 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
+ 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
+ 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
+ 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
+ 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
+ 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
+ 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
+ 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
+ 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
+ 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
+ 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
+ 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
+ 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
+ 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
+ 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
+ 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
+ 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
+ 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
+ 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
+ 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
+ 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
+ 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
+ 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
+ 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
+ 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
+ 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
+ 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
+ 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
+ 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
+ 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
+ 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU,
+static const u32 rcon[] = {
+ 0x01000000, 0x02000000, 0x04000000, 0x08000000,
+ 0x10000000, 0x20000000, 0x40000000, 0x80000000,
+ 0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
+ * Expand the cipher key into the encryption key schedule.
+ */
+AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key)
+ u32 *rk;
+ int i = 0;
+ u32 temp;
+ if (!userKey || !key)
+ return -1;
+ if (bits != 128 && bits != 192 && bits != 256)
+ return -2;
+ rk = key->rd_key;
+ if (bits == 128)
+ key->rounds = 10;
+ else if (bits == 192)
+ key->rounds = 12;
+ else
+ key->rounds = 14;
+ rk[0] = GETU32(userKey);
+ rk[1] = GETU32(userKey + 4);
+ rk[2] = GETU32(userKey + 8);
+ rk[3] = GETU32(userKey + 12);
+ if (bits == 128) {
+ while (1) {
+ temp = rk[3];
+ rk[4] = rk[0] ^
+ (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
+ (Te3[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (Te0[(temp) & 0xff] & 0x0000ff00) ^
+ (Te1[(temp >> 24)] & 0x000000ff) ^
+ rcon[i];
+ rk[5] = rk[1] ^ rk[4];
+ rk[6] = rk[2] ^ rk[5];
+ rk[7] = rk[3] ^ rk[6];
+ if (++i == 10) {
+ return 0;
+ }
+ rk += 4;
+ }
+ }
+ rk[4] = GETU32(userKey + 16);
+ rk[5] = GETU32(userKey + 20);
+ if (bits == 192) {
+ while (1) {
+ temp = rk[5];
+ rk[6] = rk[ 0] ^
+ (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
+ (Te3[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (Te0[(temp) & 0xff] & 0x0000ff00) ^
+ (Te1[(temp >> 24)] & 0x000000ff) ^
+ rcon[i];
+ rk[7] = rk[1] ^ rk[6];
+ rk[8] = rk[2] ^ rk[7];
+ rk[9] = rk[3] ^ rk[8];
+ if (++i == 8) {
+ return 0;
+ }
+ rk[10] = rk[4] ^ rk[9];
+ rk[11] = rk[5] ^ rk[10];
+ rk += 6;
+ }
+ }
+ rk[6] = GETU32(userKey + 24);
+ rk[7] = GETU32(userKey + 28);
+ if (bits == 256) {
+ while (1) {
+ temp = rk[7];
+ rk[8] = rk[0] ^
+ (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
+ (Te3[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (Te0[(temp) & 0xff] & 0x0000ff00) ^
+ (Te1[(temp >> 24)] & 0x000000ff) ^
+ rcon[i];
+ rk[9] = rk[1] ^ rk[8];
+ rk[10] = rk[2] ^ rk[9];
+ rk[11] = rk[3] ^ rk[10];
+ if (++i == 7) {
+ return 0;
+ }
+ temp = rk[11];
+ rk[12] = rk[4] ^
+ (Te2[(temp >> 24)] & 0xff000000) ^
+ (Te3[(temp >> 16) & 0xff] & 0x00ff0000) ^
+ (Te0[(temp >> 8) & 0xff] & 0x0000ff00) ^
+ (Te1[(temp) & 0xff] & 0x000000ff);
+ rk[13] = rk[5] ^ rk[12];
+ rk[14] = rk[6] ^ rk[13];
+ rk[15] = rk[7] ^ rk[14];
+ rk += 8;
+ }
+ }
+ return 0;
+ * Expand the cipher key into the decryption key schedule.
+ */
+AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key)
+ u32 *rk;
+ int i, j, status;
+ u32 temp;
+ /* first, start with an encryption schedule */
+ status = AES_set_encrypt_key(userKey, bits, key);
+ if (status < 0)
+ return status;
+ rk = key->rd_key;
+ /* invert the order of the round keys: */
+ for (i = 0, j = 4 * (key->rounds); i < j; i += 4, j -= 4) {
+ temp = rk[i];
+ rk[i] = rk[j];
+ rk[j] = temp;
+ temp = rk[i + 1];
+ rk[i + 1] = rk[j + 1];
+ rk[j + 1] = temp;
+ temp = rk[i + 2];
+ rk[i + 2] = rk[j + 2];
+ rk[j + 2] = temp;
+ temp = rk[i + 3];
+ rk[i + 3] = rk[j + 3];
+ rk[j + 3] = temp;
+ }
+ /* apply the inverse MixColumn transform to all round keys but the first and the last: */
+ for (i = 1; i < (key->rounds); i++) {
+ rk += 4;
+ rk[0] =
+ Td0[Te1[(rk[0] >> 24)] & 0xff] ^
+ Td1[Te1[(rk[0] >> 16) & 0xff] & 0xff] ^
+ Td2[Te1[(rk[0] >> 8) & 0xff] & 0xff] ^
+ Td3[Te1[(rk[0]) & 0xff] & 0xff];
+ rk[1] =
+ Td0[Te1[(rk[1] >> 24)] & 0xff] ^
+ Td1[Te1[(rk[1] >> 16) & 0xff] & 0xff] ^
+ Td2[Te1[(rk[1] >> 8) & 0xff] & 0xff] ^
+ Td3[Te1[(rk[1]) & 0xff] & 0xff];
+ rk[2] =
+ Td0[Te1[(rk[2] >> 24)] & 0xff] ^
+ Td1[Te1[(rk[2] >> 16) & 0xff] & 0xff] ^
+ Td2[Te1[(rk[2] >> 8) & 0xff] & 0xff] ^
+ Td3[Te1[(rk[2]) & 0xff] & 0xff];
+ rk[3] =
+ Td0[Te1[(rk[3] >> 24)] & 0xff] ^
+ Td1[Te1[(rk[3] >> 16) & 0xff] & 0xff] ^
+ Td2[Te1[(rk[3] >> 8) & 0xff] & 0xff] ^
+ Td3[Te1[(rk[3]) & 0xff] & 0xff];
+ }
+ return 0;
+ * Encrypt a single block
+ * in and out can overlap
+ */
+AES_encrypt(const unsigned char *in, unsigned char *out, const AES_KEY *key)
+ const u32 *rk;
+ u32 s0, s1, s2, s3, t0, t1, t2, t3;
+#ifndef FULL_UNROLL
+ int r;
+#endif /* ?FULL_UNROLL */
+ rk = key->rd_key;
+ /*
+ * map byte array block to cipher state
+ * and add initial round key:
+ */
+ s0 = GETU32(in ) ^ rk[0];
+ s1 = GETU32(in + 4) ^ rk[1];
+ s2 = GETU32(in + 8) ^ rk[2];
+ s3 = GETU32(in + 12) ^ rk[3];
+ /* round 1: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
+ /* round 2: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
+ /* round 3: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
+ /* round 4: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
+ /* round 5: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
+ /* round 6: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
+ /* round 7: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
+ /* round 8: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
+ /* round 9: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
+ if (key->rounds > 10) {
+ /* round 10: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
+ /* round 11: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
+ if (key->rounds > 12) {
+ /* round 12: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
+ /* round 13: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
+ }
+ }
+ rk += key->rounds << 2;
+#else /* !FULL_UNROLL */
+ /*
+ * Nr - 1 full rounds:
+ */
+ r = key->rounds >> 1;
+ for (;;) {
+ t0 =
+ Te0[(s0 >> 24)] ^
+ Te1[(s1 >> 16) & 0xff] ^
+ Te2[(s2 >> 8) & 0xff] ^
+ Te3[(s3) & 0xff] ^
+ rk[4];
+ t1 =
+ Te0[(s1 >> 24)] ^
+ Te1[(s2 >> 16) & 0xff] ^
+ Te2[(s3 >> 8) & 0xff] ^
+ Te3[(s0) & 0xff] ^
+ rk[5];
+ t2 =
+ Te0[(s2 >> 24)] ^
+ Te1[(s3 >> 16) & 0xff] ^
+ Te2[(s0 >> 8) & 0xff] ^
+ Te3[(s1) & 0xff] ^
+ rk[6];
+ t3 =
+ Te0[(s3 >> 24)] ^
+ Te1[(s0 >> 16) & 0xff] ^
+ Te2[(s1 >> 8) & 0xff] ^
+ Te3[(s2) & 0xff] ^
+ rk[7];
+ rk += 8;
+ if (--r == 0) {
+ break;
+ }
+ s0 =
+ Te0[(t0 >> 24)] ^
+ Te1[(t1 >> 16) & 0xff] ^
+ Te2[(t2 >> 8) & 0xff] ^
+ Te3[(t3) & 0xff] ^
+ rk[0];
+ s1 =
+ Te0[(t1 >> 24)] ^
+ Te1[(t2 >> 16) & 0xff] ^
+ Te2[(t3 >> 8) & 0xff] ^
+ Te3[(t0) & 0xff] ^
+ rk[1];
+ s2 =
+ Te0[(t2 >> 24)] ^
+ Te1[(t3 >> 16) & 0xff] ^
+ Te2[(t0 >> 8) & 0xff] ^
+ Te3[(t1) & 0xff] ^
+ rk[2];
+ s3 =
+ Te0[(t3 >> 24)] ^
+ Te1[(t0 >> 16) & 0xff] ^
+ Te2[(t1 >> 8) & 0xff] ^
+ Te3[(t2) & 0xff] ^
+ rk[3];
+ }
+#endif /* ?FULL_UNROLL */
+ /*
+ * apply last round and
+ * map cipher state to byte array block:
+ */
+ s0 =
+ (Te2[(t0 >> 24)] & 0xff000000) ^
+ (Te3[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te0[(t2 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te1[(t3) & 0xff] & 0x000000ff) ^
+ rk[0];
+ PUTU32(out, s0);
+ s1 =
+ (Te2[(t1 >> 24)] & 0xff000000) ^
+ (Te3[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te0[(t3 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te1[(t0) & 0xff] & 0x000000ff) ^
+ rk[1];
+ PUTU32(out + 4, s1);
+ s2 =
+ (Te2[(t2 >> 24)] & 0xff000000) ^
+ (Te3[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te0[(t0 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te1[(t1) & 0xff] & 0x000000ff) ^
+ rk[2];
+ PUTU32(out + 8, s2);
+ s3 =
+ (Te2[(t3 >> 24)] & 0xff000000) ^
+ (Te3[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te0[(t1 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te1[(t2) & 0xff] & 0x000000ff) ^
+ rk[3];
+ PUTU32(out + 12, s3);
+ * Decrypt a single block
+ * in and out can overlap
+ */
+AES_decrypt(const unsigned char *in, unsigned char *out, const AES_KEY *key)
+ const u32 *rk;
+ u32 s0, s1, s2, s3, t0, t1, t2, t3;
+#ifndef FULL_UNROLL
+ int r;
+#endif /* ?FULL_UNROLL */
+ rk = key->rd_key;
+ /*
+ * map byte array block to cipher state
+ * and add initial round key:
+ */
+ s0 = GETU32(in) ^ rk[0];
+ s1 = GETU32(in + 4) ^ rk[1];
+ s2 = GETU32(in + 8) ^ rk[2];
+ s3 = GETU32(in + 12) ^ rk[3];
+ /* round 1: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
+ /* round 2: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
+ /* round 3: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
+ /* round 4: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
+ /* round 5: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
+ /* round 6: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
+ /* round 7: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
+ /* round 8: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
+ /* round 9: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
+ if (key->rounds > 10) {
+ /* round 10: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
+ /* round 11: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
+ if (key->rounds > 12) {
+ /* round 12: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
+ /* round 13: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
+ }
+ }
+ rk += key->rounds << 2;
+#else /* !FULL_UNROLL */
+ /*
+ * Nr - 1 full rounds:
+ */
+ r = key->rounds >> 1;
+ for (;;) {
+ t0 =
+ Td0[(s0 >> 24)] ^
+ Td1[(s3 >> 16) & 0xff] ^
+ Td2[(s2 >> 8) & 0xff] ^
+ Td3[(s1) & 0xff] ^
+ rk[4];
+ t1 =
+ Td0[(s1 >> 24)] ^
+ Td1[(s0 >> 16) & 0xff] ^
+ Td2[(s3 >> 8) & 0xff] ^
+ Td3[(s2) & 0xff] ^
+ rk[5];
+ t2 =
+ Td0[(s2 >> 24)] ^
+ Td1[(s1 >> 16) & 0xff] ^
+ Td2[(s0 >> 8) & 0xff] ^
+ Td3[(s3) & 0xff] ^
+ rk[6];
+ t3 =
+ Td0[(s3 >> 24)] ^
+ Td1[(s2 >> 16) & 0xff] ^
+ Td2[(s1 >> 8) & 0xff] ^
+ Td3[(s0) & 0xff] ^
+ rk[7];
+ rk += 8;
+ if (--r == 0) {
+ break;
+ }
+ s0 =
+ Td0[(t0 >> 24)] ^
+ Td1[(t3 >> 16) & 0xff] ^
+ Td2[(t2 >> 8) & 0xff] ^
+ Td3[(t1) & 0xff] ^
+ rk[0];
+ s1 =
+ Td0[(t1 >> 24)] ^
+ Td1[(t0 >> 16) & 0xff] ^
+ Td2[(t3 >> 8) & 0xff] ^
+ Td3[(t2) & 0xff] ^
+ rk[1];
+ s2 =
+ Td0[(t2 >> 24)] ^
+ Td1[(t1 >> 16) & 0xff] ^
+ Td2[(t0 >> 8) & 0xff] ^
+ Td3[(t3) & 0xff] ^
+ rk[2];
+ s3 =
+ Td0[(t3 >> 24)] ^
+ Td1[(t2 >> 16) & 0xff] ^
+ Td2[(t1 >> 8) & 0xff] ^
+ Td3[(t0) & 0xff] ^
+ rk[3];
+ }
+#endif /* ?FULL_UNROLL */
+ /*
+ * apply last round and
+ * map cipher state to byte array block:
+ */
+ s0 =
+ (((uint32_t)Td4[(t0 >> 24)]) << 24) ^
+ (Td4[(t3 >> 16) & 0xff] << 16) ^
+ (Td4[(t2 >> 8) & 0xff] << 8) ^
+ (Td4[(t1) & 0xff]) ^
+ rk[0];
+ PUTU32(out, s0);
+ s1 =
+ (((uint32_t)Td4[(t1 >> 24)]) << 24) ^
+ (Td4[(t0 >> 16) & 0xff] << 16) ^
+ (Td4[(t3 >> 8) & 0xff] << 8) ^
+ (Td4[(t2) & 0xff]) ^
+ rk[1];
+ PUTU32(out + 4, s1);
+ s2 =
+ (((uint32_t)Td4[(t2 >> 24)]) << 24) ^
+ (Td4[(t1 >> 16) & 0xff] << 16) ^
+ (Td4[(t0 >> 8) & 0xff] << 8) ^
+ (Td4[(t3) & 0xff]) ^
+ rk[2];
+ PUTU32(out + 8, s2);
+ s3 =
+ (((uint32_t)Td4[(t3 >> 24)]) << 24) ^
+ (Td4[(t2 >> 16) & 0xff] << 16) ^
+ (Td4[(t1 >> 8) & 0xff] << 8) ^
+ (Td4[(t0) & 0xff]) ^
+ rk[3];
+ PUTU32(out + 12, s3);
+#else /* AES_ASM */
+static const u8 Te4[256] = {
+ 0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U,
+ 0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U,
+ 0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U,
+ 0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U,
+ 0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU,
+ 0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U,
+ 0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU,
+ 0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U,
+ 0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U,
+ 0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U,
+ 0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU,
+ 0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU,
+ 0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U,
+ 0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U,
+ 0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U,
+ 0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U,
+ 0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U,
+ 0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U,
+ 0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U,
+ 0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU,
+ 0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU,
+ 0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U,
+ 0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U,
+ 0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U,
+ 0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U,
+ 0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU,
+ 0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU,
+ 0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU,
+ 0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U,
+ 0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU,
+ 0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U,
+ 0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U
+static const u32 rcon[] = {
+ 0x01000000, 0x02000000, 0x04000000, 0x08000000,
+ 0x10000000, 0x20000000, 0x40000000, 0x80000000,
+ 0x1B000000, 0x36000000,
+ /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
+ * Expand the cipher key into the encryption key schedule.
+ */
+AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key)
+ u32 *rk;
+ int i = 0;
+ u32 temp;
+ if (!userKey || !key)
+ return -1;
+ if (bits != 128 && bits != 192 && bits != 256)
+ return -2;
+ rk = key->rd_key;
+ if (bits == 128)
+ key->rounds = 10;
+ else if (bits == 192)
+ key->rounds = 12;
+ else
+ key->rounds = 14;
+ rk[0] = GETU32(userKey);
+ rk[1] = GETU32(userKey + 4);
+ rk[2] = GETU32(userKey + 8);
+ rk[3] = GETU32(userKey + 12);
+ if (bits == 128) {
+ while (1) {
+ temp = rk[3];
+ rk[4] = rk[0] ^
+ (Te4[(temp >> 16) & 0xff] << 24) ^
+ (Te4[(temp >> 8) & 0xff] << 16) ^
+ (Te4[(temp) & 0xff] << 8) ^
+ (Te4[(temp >> 24)]) ^
+ rcon[i];
+ rk[5] = rk[1] ^ rk[4];
+ rk[6] = rk[2] ^ rk[5];
+ rk[7] = rk[3] ^ rk[6];
+ if (++i == 10) {
+ return 0;
+ }
+ rk += 4;
+ }
+ }
+ rk[4] = GETU32(userKey + 16);
+ rk[5] = GETU32(userKey + 20);
+ if (bits == 192) {
+ while (1) {
+ temp = rk[5];
+ rk[6] = rk[0] ^
+ (Te4[(temp >> 16) & 0xff] << 24) ^
+ (Te4[(temp >> 8) & 0xff] << 16) ^
+ (Te4[(temp) & 0xff] << 8) ^
+ (Te4[(temp >> 24)]) ^
+ rcon[i];
+ rk[7] = rk[1] ^ rk[6];
+ rk[8] = rk[2] ^ rk[7];
+ rk[9] = rk[3] ^ rk[8];
+ if (++i == 8) {
+ return 0;
+ }
+ rk[10] = rk[4] ^ rk[9];
+ rk[11] = rk[5] ^ rk[10];
+ rk += 6;
+ }
+ }
+ rk[6] = GETU32(userKey + 24);
+ rk[7] = GETU32(userKey + 28);
+ if (bits == 256) {
+ while (1) {
+ temp = rk[7];
+ rk[8] = rk[0] ^
+ (Te4[(temp >> 16) & 0xff] << 24) ^
+ (Te4[(temp >> 8) & 0xff] << 16) ^
+ (Te4[(temp) & 0xff] << 8) ^
+ (Te4[(temp >> 24)]) ^
+ rcon[i];
+ rk[9] = rk[1] ^ rk[8];
+ rk[10] = rk[2] ^ rk[9];
+ rk[11] = rk[3] ^ rk[10];
+ if (++i == 7) {
+ return 0;
+ }
+ temp = rk[11];
+ rk[12] = rk[4] ^
+ (Te4[(temp >> 24)] << 24) ^
+ (Te4[(temp >> 16) & 0xff] << 16) ^
+ (Te4[(temp >> 8) & 0xff] << 8) ^
+ (Te4[(temp) & 0xff]);
+ rk[13] = rk[5] ^ rk[12];
+ rk[14] = rk[6] ^ rk[13];
+ rk[15] = rk[7] ^ rk[14];
+ rk += 8;
+ }
+ }
+ return 0;
+ * Expand the cipher key into the decryption key schedule.
+ */
+AES_set_decrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key)
+ u32 *rk;
+ int i, j, status;
+ u32 temp;
+ /* first, start with an encryption schedule */
+ status = AES_set_encrypt_key(userKey, bits, key);
+ if (status < 0)
+ return status;
+ rk = key->rd_key;
+ /* invert the order of the round keys: */
+ for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
+ temp = rk[i];
+ rk[i] = rk[j];
+ rk[j] = temp;
+ temp = rk[i + 1];
+ rk[i + 1] = rk[j + 1];
+ rk[j + 1] = temp;
+ temp = rk[i + 2];
+ rk[i + 2] = rk[j + 2];
+ rk[j + 2] = temp;
+ temp = rk[i + 3];
+ rk[i + 3] = rk[j + 3];
+ rk[j + 3] = temp;
+ }
+ /* apply the inverse MixColumn transform to all round keys but the first and the last: */
+ for (i = 1; i < (key->rounds); i++) {
+ rk += 4;
+ for (j = 0; j < 4; j++) {
+ u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
+ tp1 = rk[j];
+ m = tp1 & 0x80808080;
+ tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
+ ((m - (m >> 7)) & 0x1b1b1b1b);
+ m = tp2 & 0x80808080;
+ tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
+ ((m - (m >> 7)) & 0x1b1b1b1b);
+ m = tp4 & 0x80808080;
+ tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
+ ((m - (m >> 7)) & 0x1b1b1b1b);
+ tp9 = tp8 ^ tp1;
+ tpb = tp9 ^ tp2;
+ tpd = tp9 ^ tp4;
+ tpe = tp8 ^ tp4 ^ tp2;
+#if defined(ROTATE)
+ rk[j] = tpe ^ ROTATE(tpd, 16) ^
+ ROTATE(tp9, 24) ^ ROTATE(tpb, 8);
+ rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
+ (tp9 >> 8) ^ (tp9 << 24) ^
+ (tpb >> 24) ^ (tpb << 8);
+ }
+ }
+ return 0;
+#endif /* AES_ASM */
diff --git a/crypto/libressl/crypto/aes/aes_ctr.c b/crypto/libressl/crypto/aes/aes_ctr.c
new file mode 100644
index 0000000..6079145
--- /dev/null
+++ b/crypto/libressl/crypto/aes/aes_ctr.c
@@ -0,0 +1,62 @@
+/* $OpenBSD: aes_ctr.c,v 1.9 2014/06/12 15:49:27 deraadt Exp $ */
+/* ====================================================================
+ * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. ("
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ *
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit ("
+ *
+ * ====================================================================
+ *
+ */
+#include <openssl/aes.h>
+#include <openssl/modes.h>
+AES_ctr128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t length, const AES_KEY *key, unsigned char ivec[AES_BLOCK_SIZE],
+ unsigned char ecount_buf[AES_BLOCK_SIZE], unsigned int *num)
+ CRYPTO_ctr128_encrypt(in, out, length, key, ivec, ecount_buf, num,
+ (block128_f)AES_encrypt);
diff --git a/crypto/libressl/crypto/aes/aes_ecb.c b/crypto/libressl/crypto/aes/aes_ecb.c
new file mode 100644
index 0000000..b05e539
--- /dev/null
+++ b/crypto/libressl/crypto/aes/aes_ecb.c
@@ -0,0 +1,69 @@
+/* $OpenBSD: aes_ecb.c,v 1.6 2015/02/10 09:46:30 miod Exp $ */
+/* ====================================================================
+ * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. ("
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ *
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit ("
+ *
+ * ====================================================================
+ *
+ */
+#ifndef AES_DEBUG
+# ifndef NDEBUG
+# define NDEBUG
+# endif
+#include <openssl/aes.h>
+#include "aes_locl.h"
+AES_ecb_encrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key, const int enc)
+ if (AES_ENCRYPT == enc)
+ AES_encrypt(in, out, key);
+ else
+ AES_decrypt(in, out, key);
diff --git a/crypto/libressl/crypto/aes/aes_ige.c b/crypto/libressl/crypto/aes/aes_ige.c
new file mode 100644
index 0000000..85b7f69
--- /dev/null
+++ b/crypto/libressl/crypto/aes/aes_ige.c
@@ -0,0 +1,194 @@
+/* $OpenBSD: aes_ige.c,v 1.7 2015/02/10 09:46:30 miod Exp $ */
+/* ====================================================================
+ * Copyright (c) 2006 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. ("
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ *
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit ("
+ *
+ * ====================================================================
+ *
+ */
+#include <openssl/aes.h>
+#include <openssl/crypto.h>
+#include "aes_locl.h"
+#define N_WORDS (AES_BLOCK_SIZE / sizeof(unsigned long))
+typedef struct {
+ unsigned long data[N_WORDS];
+} aes_block_t;
+/* XXX: probably some better way to do this */
+#if defined(__i386__) || defined(__x86_64__)
+#define load_block(d, s) (d) = *(const aes_block_t *)(s)
+#define store_block(d, s) *(aes_block_t *)(d) = (s)
+#define load_block(d, s) memcpy((d).data, (s), AES_BLOCK_SIZE)
+#define store_block(d, s) memcpy((d), (s).data, AES_BLOCK_SIZE)
+/* N.B. The IV for this mode is _twice_ the block size */
+AES_ige_encrypt(const unsigned char *in, unsigned char *out, size_t length,
+ const AES_KEY *key, unsigned char *ivec, const int enc)
+ size_t n;
+ size_t len;
+ OPENSSL_assert((length % AES_BLOCK_SIZE) == 0);
+ len = length / AES_BLOCK_SIZE;
+ if (AES_ENCRYPT == enc) {
+ if (in != out && (UNALIGNED_MEMOPS_ARE_FAST ||
+ ((size_t)in|(size_t)out|(size_t)ivec) %
+ sizeof(long) == 0)) {
+ aes_block_t *ivp = (aes_block_t *)ivec;
+ aes_block_t *iv2p = (aes_block_t *)(ivec + AES_BLOCK_SIZE);
+ while (len) {
+ aes_block_t *inp = (aes_block_t *)in;
+ aes_block_t *outp = (aes_block_t *)out;
+ for (n = 0; n < N_WORDS; ++n)
+ outp->data[n] = inp->data[n] ^ ivp->data[n];
+ AES_encrypt((unsigned char *)outp->data, (unsigned char *)outp->data, key);
+ for (n = 0; n < N_WORDS; ++n)
+ outp->data[n] ^= iv2p->data[n];
+ ivp = outp;
+ iv2p = inp;
+ --len;
+ out += AES_BLOCK_SIZE;
+ }
+ memcpy(ivec, ivp->data, AES_BLOCK_SIZE);
+ memcpy(ivec + AES_BLOCK_SIZE, iv2p->data, AES_BLOCK_SIZE);
+ } else {
+ aes_block_t tmp, tmp2;
+ aes_block_t iv;
+ aes_block_t iv2;
+ load_block(iv, ivec);
+ load_block(iv2, ivec + AES_BLOCK_SIZE);
+ while (len) {
+ load_block(tmp, in);
+ for (n = 0; n < N_WORDS; ++n)
+[n] =[n] ^[n];
+ AES_encrypt((unsigned char *),
+ (unsigned char *), key);
+ for (n = 0; n < N_WORDS; ++n)
+[n] ^=[n];
+ store_block(out, tmp2);
+ iv = tmp2;
+ iv2 = tmp;
+ --len;
+ out += AES_BLOCK_SIZE;
+ }
+ memcpy(ivec,, AES_BLOCK_SIZE);
+ memcpy(ivec + AES_BLOCK_SIZE,, AES_BLOCK_SIZE);
+ }
+ } else {
+ if (in != out && (UNALIGNED_MEMOPS_ARE_FAST ||
+ ((size_t)in|(size_t)out|(size_t)ivec) %
+ sizeof(long) == 0)) {
+ aes_block_t *ivp = (aes_block_t *)ivec;
+ aes_block_t *iv2p = (aes_block_t *)(ivec + AES_BLOCK_SIZE);
+ while (len) {
+ aes_block_t tmp;
+ aes_block_t *inp = (aes_block_t *)in;
+ aes_block_t *outp = (aes_block_t *)out;
+ for (n = 0; n < N_WORDS; ++n)
+[n] = inp->data[n] ^ iv2p->data[n];
+ AES_decrypt((unsigned char *),
+ (unsigned char *)outp->data, key);
+ for (n = 0; n < N_WORDS; ++n)
+ outp->data[n] ^= ivp->data[n];
+ ivp = inp;
+ iv2p = outp;
+ --len;
+ out += AES_BLOCK_SIZE;
+ }
+ memcpy(ivec, ivp->data, AES_BLOCK_SIZE);
+ memcpy(ivec + AES_BLOCK_SIZE, iv2p->data, AES_BLOCK_SIZE);
+ } else {
+ aes_block_t tmp, tmp2;
+ aes_block_t iv;
+ aes_block_t iv2;
+ load_block(iv, ivec);
+ load_block(iv2, ivec + AES_BLOCK_SIZE);
+ while (len) {
+ load_block(tmp, in);
+ tmp2 = tmp;
+ for (n = 0; n < N_WORDS; ++n)
+[n] ^=[n];
+ AES_decrypt((unsigned char *),
+ (unsigned char *), key);
+ for (n = 0; n < N_WORDS; ++n)
+[n] ^=[n];
+ store_block(out, tmp);
+ iv = tmp2;
+ iv2 = tmp;
+ --len;
+ out += AES_BLOCK_SIZE;
+ }
+ memcpy(ivec,, AES_BLOCK_SIZE);
+ memcpy(ivec + AES_BLOCK_SIZE,, AES_BLOCK_SIZE);
+ }
+ }
diff --git a/crypto/libressl/crypto/aes/aes_locl.h b/crypto/libressl/crypto/aes/aes_locl.h
new file mode 100644
index 0000000..c47f65d
--- /dev/null
+++ b/crypto/libressl/crypto/aes/aes_locl.h
@@ -0,0 +1,83 @@
+/* $OpenBSD: aes_locl.h,v 1.11 2016/12/21 15:49:29 jsing Exp $ */
+/* ====================================================================
+ * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. ("
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ *
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit ("
+ *
+ * ====================================================================
+ *
+ */
+#include <openssl/opensslconf.h>
+#error AES is disabled.
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] << 8) ^ ((u32)(pt)[3]))
+#define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >> 8); (ct)[3] = (u8)(st); }
+typedef unsigned int u32;
+typedef unsigned short u16;
+typedef unsigned char u8;
+#define MAXKC (256/32)
+#define MAXKB (256/8)
+#define MAXNR 14
+/* This controls loop-unrolling in aes_core.c */
+#endif /* !HEADER_AES_LOCL_H */
diff --git a/crypto/libressl/crypto/aes/aes_misc.c b/crypto/libressl/crypto/aes/aes_misc.c
new file mode 100644
index 0000000..6c1506d
--- /dev/null
+++ b/crypto/libressl/crypto/aes/aes_misc.c
@@ -0,0 +1,65 @@
+/* $OpenBSD: aes_misc.c,v 1.10 2014/07/09 11:10:50 bcook Exp $ */
+/* ====================================================================
+ * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. ("
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ *
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit ("
+ *
+ * ====================================================================
+ *
+ */
+#include <openssl/opensslv.h>
+#include <openssl/crypto.h>
+#include <openssl/aes.h>
+#include "aes_locl.h"
+const char *
+ return "aes(full)";
+ return "aes(partial)";
diff --git a/crypto/libressl/crypto/aes/aes_ofb.c b/crypto/libressl/crypto/aes/aes_ofb.c
new file mode 100644
index 0000000..f8dc03a
--- /dev/null
+++ b/crypto/libressl/crypto/aes/aes_ofb.c
@@ -0,0 +1,61 @@
+/* $OpenBSD: aes_ofb.c,v 1.6 2014/06/12 15:49:27 deraadt Exp $ */
+/* ====================================================================
+ * Copyright (c) 2002-2006 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. ("
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ *
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit ("
+ *
+ * ====================================================================
+ *
+ */
+#include <openssl/aes.h>
+#include <openssl/modes.h>
+AES_ofb128_encrypt(const unsigned char *in, unsigned char *out, size_t length,
+ const AES_KEY *key, unsigned char *ivec, int *num)
+ CRYPTO_ofb128_encrypt(in, out, length, key, ivec, num,
+ (block128_f)AES_encrypt);
diff --git a/crypto/libressl/crypto/aes/aes_wrap.c b/crypto/libressl/crypto/aes/aes_wrap.c
new file mode 100644
index 0000000..b30630f
--- /dev/null
+++ b/crypto/libressl/crypto/aes/aes_wrap.c
@@ -0,0 +1,133 @@
+/* $OpenBSD: aes_wrap.c,v 1.12 2018/11/07 18:31:16 tb Exp $ */
+/* Written by Dr Stephen N Henson ( for the OpenSSL
+ * project.
+ */
+/* ====================================================================
+ * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. ("
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ *
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit ("
+ *
+ * ====================================================================
+ */
+#include <string.h>
+#include <openssl/aes.h>
+#include <openssl/bio.h>
+static const unsigned char default_iv[] = {
+ 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6,
+AES_wrap_key(AES_KEY *key, const unsigned char *iv, unsigned char *out,
+ const unsigned char *in, unsigned int inlen)
+ unsigned char *A, B[16], *R;
+ unsigned int i, j, t;
+ if ((inlen & 0x7) || (inlen < 16))
+ return -1;
+ A = B;
+ t = 1;
+ memmove(out + 8, in, inlen);
+ if (!iv)
+ iv = default_iv;
+ memcpy(A, iv, 8);
+ for (j = 0; j < 6; j++) {
+ R = out + 8;
+ for (i = 0; i < inlen; i += 8, t++, R += 8) {
+ memcpy(B + 8, R, 8);
+ AES_encrypt(B, B, key);
+ A[7] ^= (unsigned char)(t & 0xff);
+ if (t > 0xff) {
+ A[6] ^= (unsigned char)((t >> 8) & 0xff);
+ A[5] ^= (unsigned char)((t >> 16) & 0xff);
+ A[4] ^= (unsigned char)((t >> 24) & 0xff);
+ }
+ memcpy(R, B + 8, 8);
+ }
+ }
+ memcpy(out, A, 8);
+ return inlen + 8;
+AES_unwrap_key(AES_KEY *key, const unsigned char *iv, unsigned char *out,
+ const unsigned char *in, unsigned int inlen)
+ unsigned char *A, B[16], *R;
+ unsigned int i, j, t;
+ if ((inlen & 0x7) || (inlen < 24))
+ return -1;
+ inlen -= 8;
+ A = B;
+ t = 6 * (inlen >> 3);
+ memcpy(A, in, 8);
+ memmove(out, in + 8, inlen);
+ for (j = 0; j < 6; j++) {
+ R = out + inlen - 8;
+ for (i = 0; i < inlen; i += 8, t--, R -= 8) {
+ A[7] ^= (unsigned char)(t & 0xff);
+ if (t > 0xff) {
+ A[6] ^= (unsigned char)((t >> 8) & 0xff);
+ A[5] ^= (unsigned char)((t >> 16) & 0xff);
+ A[4] ^= (unsigned char)((t >> 24) & 0xff);
+ }
+ memcpy(B + 8, R, 8);
+ AES_decrypt(B, B, key);
+ memcpy(R, B + 8, 8);
+ }
+ }
+ if (!iv)
+ iv = default_iv;
+ if (memcmp(A, iv, 8)) {
+ explicit_bzero(out, inlen);
+ return 0;
+ }
+ return inlen;
diff --git a/crypto/libressl/crypto/aes/aesni-elf-x86_64.S b/crypto/libressl/crypto/aes/aesni-elf-x86_64.S
new file mode 100644
index 0000000..3b3dabf
--- /dev/null
+++ b/crypto/libressl/crypto/aes/aesni-elf-x86_64.S
@@ -0,0 +1,2539 @@
+#include "x86_arch.h"
+.globl aesni_encrypt
+.type aesni_encrypt,@function
+.align 16
+ movups (%rdi),%xmm2
+ movl 240(%rdx),%eax
+ movups (%rdx),%xmm0
+ movups 16(%rdx),%xmm1
+ leaq 32(%rdx),%rdx
+ xorps %xmm0,%xmm2
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rdx),%xmm1
+ leaq 16(%rdx),%rdx
+ jnz .Loop_enc1_1
+ aesenclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ retq
+.size aesni_encrypt,.-aesni_encrypt
+.globl aesni_decrypt
+.type aesni_decrypt,@function
+.align 16
+ movups (%rdi),%xmm2
+ movl 240(%rdx),%eax
+ movups (%rdx),%xmm0
+ movups 16(%rdx),%xmm1
+ leaq 32(%rdx),%rdx
+ xorps %xmm0,%xmm2
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rdx),%xmm1
+ leaq 16(%rdx),%rdx
+ jnz .Loop_dec1_2
+ aesdeclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ retq
+.size aesni_decrypt, .-aesni_decrypt
+.type _aesni_encrypt3,@function
+.align 16
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop3
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ retq
+.size _aesni_encrypt3,.-_aesni_encrypt3
+.type _aesni_decrypt3,@function
+.align 16
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop3
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ retq
+.size _aesni_decrypt3,.-_aesni_decrypt3
+.type _aesni_encrypt4,@function
+.align 16
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ xorps %xmm0,%xmm5
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop4
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ aesenclast %xmm0,%xmm5
+ retq
+.size _aesni_encrypt4,.-_aesni_encrypt4
+.type _aesni_decrypt4,@function
+.align 16
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ xorps %xmm0,%xmm5
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop4
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ aesdeclast %xmm0,%xmm5
+ retq
+.size _aesni_decrypt4,.-_aesni_decrypt4
+.type _aesni_encrypt6,@function
+.align 16
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ aesenc %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesenc %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesenc %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesenc %xmm1,%xmm6
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm7
+ jmp .Lenc_loop6_enter
+.align 16
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop6
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ aesenclast %xmm0,%xmm5
+ aesenclast %xmm0,%xmm6
+ aesenclast %xmm0,%xmm7
+ retq
+.size _aesni_encrypt6,.-_aesni_encrypt6
+.type _aesni_decrypt6,@function
+.align 16
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ aesdec %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesdec %xmm1,%xmm6
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm7
+ jmp .Ldec_loop6_enter
+.align 16
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop6
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ aesdeclast %xmm0,%xmm5
+ aesdeclast %xmm0,%xmm6
+ aesdeclast %xmm0,%xmm7
+ retq
+.size _aesni_decrypt6,.-_aesni_decrypt6
+.type _aesni_encrypt8,@function
+.align 16
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ aesenc %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesenc %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesenc %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesenc %xmm1,%xmm6
+ pxor %xmm0,%xmm8
+ aesenc %xmm1,%xmm7
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm8
+ aesenc %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+ jmp .Lenc_loop8_enter
+.align 16
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ aesenc %xmm1,%xmm8
+ aesenc %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ aesenc %xmm0,%xmm8
+ aesenc %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop8
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ aesenc %xmm1,%xmm8
+ aesenc %xmm1,%xmm9
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ aesenclast %xmm0,%xmm5
+ aesenclast %xmm0,%xmm6
+ aesenclast %xmm0,%xmm7
+ aesenclast %xmm0,%xmm8
+ aesenclast %xmm0,%xmm9
+ retq
+.size _aesni_encrypt8,.-_aesni_encrypt8
+.type _aesni_decrypt8,@function
+.align 16
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ aesdec %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesdec %xmm1,%xmm6
+ pxor %xmm0,%xmm8
+ aesdec %xmm1,%xmm7
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+ jmp .Ldec_loop8_enter
+.align 16
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ aesdec %xmm0,%xmm8
+ aesdec %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop8
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ aesdeclast %xmm0,%xmm5
+ aesdeclast %xmm0,%xmm6
+ aesdeclast %xmm0,%xmm7
+ aesdeclast %xmm0,%xmm8
+ aesdeclast %xmm0,%xmm9
+ retq
+.size _aesni_decrypt8,.-_aesni_decrypt8
+.globl aesni_ecb_encrypt
+.type aesni_ecb_encrypt,@function
+.align 16
+ andq $-16,%rdx
+ jz .Lecb_ret
+ movl 240(%rcx),%eax
+ movups (%rcx),%xmm0
+ movq %rcx,%r11
+ movl %eax,%r10d
+ testl %r8d,%r8d
+ jz .Lecb_decrypt
+ cmpq $128,%rdx
+ jb .Lecb_enc_tail
+ movdqu (%rdi),%xmm2
+ movdqu 16(%rdi),%xmm3
+ movdqu 32(%rdi),%xmm4
+ movdqu 48(%rdi),%xmm5
+ movdqu 64(%rdi),%xmm6
+ movdqu 80(%rdi),%xmm7
+ movdqu 96(%rdi),%xmm8
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ subq $128,%rdx
+ jmp .Lecb_enc_loop8_enter
+.align 16
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movdqu (%rdi),%xmm2
+ movl %r10d,%eax
+ movups %xmm3,16(%rsi)
+ movdqu 16(%rdi),%xmm3
+ movups %xmm4,32(%rsi)
+ movdqu 32(%rdi),%xmm4
+ movups %xmm5,48(%rsi)
+ movdqu 48(%rdi),%xmm5
+ movups %xmm6,64(%rsi)
+ movdqu 64(%rdi),%xmm6
+ movups %xmm7,80(%rsi)
+ movdqu 80(%rdi),%xmm7
+ movups %xmm8,96(%rsi)
+ movdqu 96(%rdi),%xmm8
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ call _aesni_encrypt8
+ subq $128,%rdx
+ jnc .Lecb_enc_loop8
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movups %xmm3,16(%rsi)
+ movl %r10d,%eax
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ addq $128,%rdx
+ jz .Lecb_ret
+ movups (%rdi),%xmm2
+ cmpq $32,%rdx
+ jb .Lecb_enc_one
+ movups 16(%rdi),%xmm3
+ je .Lecb_enc_two
+ movups 32(%rdi),%xmm4
+ cmpq $64,%rdx
+ jb .Lecb_enc_three
+ movups 48(%rdi),%xmm5
+ je .Lecb_enc_four
+ movups 64(%rdi),%xmm6
+ cmpq $96,%rdx
+ jb .Lecb_enc_five
+ movups 80(%rdi),%xmm7
+ je .Lecb_enc_six
+ movdqu 96(%rdi),%xmm8
+ call _aesni_encrypt8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ jmp .Lecb_ret
+.align 16
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_3
+ aesenclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ jmp .Lecb_ret
+.align 16
+ xorps %xmm4,%xmm4
+ call _aesni_encrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ jmp .Lecb_ret
+.align 16
+ call _aesni_encrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ jmp .Lecb_ret
+.align 16
+ call _aesni_encrypt4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ jmp .Lecb_ret
+.align 16
+ xorps %xmm7,%xmm7
+ call _aesni_encrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ jmp .Lecb_ret
+.align 16
+ call _aesni_encrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ jmp .Lecb_ret
+.align 16
+ cmpq $128,%rdx
+ jb .Lecb_dec_tail
+ movdqu (%rdi),%xmm2
+ movdqu 16(%rdi),%xmm3
+ movdqu 32(%rdi),%xmm4
+ movdqu 48(%rdi),%xmm5
+ movdqu 64(%rdi),%xmm6
+ movdqu 80(%rdi),%xmm7
+ movdqu 96(%rdi),%xmm8
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ subq $128,%rdx
+ jmp .Lecb_dec_loop8_enter
+.align 16
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movdqu (%rdi),%xmm2
+ movl %r10d,%eax
+ movups %xmm3,16(%rsi)
+ movdqu 16(%rdi),%xmm3
+ movups %xmm4,32(%rsi)
+ movdqu 32(%rdi),%xmm4
+ movups %xmm5,48(%rsi)
+ movdqu 48(%rdi),%xmm5
+ movups %xmm6,64(%rsi)
+ movdqu 64(%rdi),%xmm6
+ movups %xmm7,80(%rsi)
+ movdqu 80(%rdi),%xmm7
+ movups %xmm8,96(%rsi)
+ movdqu 96(%rdi),%xmm8
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ call _aesni_decrypt8
+ movups (%r11),%xmm0
+ subq $128,%rdx
+ jnc .Lecb_dec_loop8
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movups %xmm3,16(%rsi)
+ movl %r10d,%eax
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ addq $128,%rdx
+ jz .Lecb_ret
+ movups (%rdi),%xmm2
+ cmpq $32,%rdx
+ jb .Lecb_dec_one
+ movups 16(%rdi),%xmm3
+ je .Lecb_dec_two
+ movups 32(%rdi),%xmm4
+ cmpq $64,%rdx
+ jb .Lecb_dec_three
+ movups 48(%rdi),%xmm5
+ je .Lecb_dec_four
+ movups 64(%rdi),%xmm6
+ cmpq $96,%rdx
+ jb .Lecb_dec_five
+ movups 80(%rdi),%xmm7
+ je .Lecb_dec_six
+ movups 96(%rdi),%xmm8
+ movups (%rcx),%xmm0
+ call _aesni_decrypt8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ jmp .Lecb_ret
+.align 16
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_4
+ aesdeclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ jmp .Lecb_ret
+.align 16
+ xorps %xmm4,%xmm4
+ call _aesni_decrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ jmp .Lecb_ret
+.align 16
+ call _aesni_decrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ jmp .Lecb_ret
+.align 16
+ call _aesni_decrypt4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ jmp .Lecb_ret
+.align 16
+ xorps %xmm7,%xmm7
+ call _aesni_decrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ jmp .Lecb_ret
+.align 16
+ call _aesni_decrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ retq
+.size aesni_ecb_encrypt,.-aesni_ecb_encrypt
+.globl aesni_ccm64_encrypt_blocks
+.type aesni_ccm64_encrypt_blocks,@function
+.align 16
+ movl 240(%rcx),%eax
+ movdqu (%r8),%xmm9
+ movdqa .Lincrement64(%rip),%xmm6
+ movdqa .Lbswap_mask(%rip),%xmm7
+ shrl $1,%eax
+ leaq 0(%rcx),%r11
+ movdqu (%r9),%xmm3
+ movdqa %xmm9,%xmm2
+ movl %eax,%r10d
+.byte 102,68,15,56,0,207
+ jmp .Lccm64_enc_outer
+.align 16
+ movups (%r11),%xmm0
+ movl %r10d,%eax
+ movups (%rdi),%xmm8
+ xorps %xmm0,%xmm2
+ movups 16(%r11),%xmm1
+ xorps %xmm8,%xmm0
+ leaq 32(%r11),%rcx
+ xorps %xmm0,%xmm3
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm2
+ decl %eax
+ aesenc %xmm1,%xmm3
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm3
+ movups 0(%rcx),%xmm0
+ jnz .Lccm64_enc2_loop
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ paddq %xmm6,%xmm9
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ decq %rdx
+ leaq 16(%rdi),%rdi
+ xorps %xmm2,%xmm8
+ movdqa %xmm9,%xmm2
+ movups %xmm8,(%rsi)
+ leaq 16(%rsi),%rsi
+.byte 102,15,56,0,215
+ jnz .Lccm64_enc_outer
+ movups %xmm3,(%r9)
+ retq
+.size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
+.globl aesni_ccm64_decrypt_blocks
+.type aesni_ccm64_decrypt_blocks,@function
+.align 16
+ movl 240(%rcx),%eax
+ movups (%r8),%xmm9
+ movdqu (%r9),%xmm3
+ movdqa .Lincrement64(%rip),%xmm6
+ movdqa .Lbswap_mask(%rip),%xmm7
+ movaps %xmm9,%xmm2
+ movl %eax,%r10d
+ movq %rcx,%r11
+.byte 102,68,15,56,0,207
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_5
+ aesenclast %xmm1,%xmm2
+ movups (%rdi),%xmm8
+ paddq %xmm6,%xmm9
+ leaq 16(%rdi),%rdi
+ jmp .Lccm64_dec_outer
+.align 16
+ xorps %xmm2,%xmm8
+ movdqa %xmm9,%xmm2
+ movl %r10d,%eax
+ movups %xmm8,(%rsi)
+ leaq 16(%rsi),%rsi
+.byte 102,15,56,0,215
+ subq $1,%rdx
+ jz .Lccm64_dec_break
+ movups (%r11),%xmm0
+ shrl $1,%eax
+ movups 16(%r11),%xmm1
+ xorps %xmm0,%xmm8
+ leaq 32(%r11),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm8,%xmm3
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm2
+ decl %eax
+ aesenc %xmm1,%xmm3
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm3
+ movups 0(%rcx),%xmm0
+ jnz .Lccm64_dec2_loop
+ movups (%rdi),%xmm8
+ paddq %xmm6,%xmm9
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ leaq 16(%rdi),%rdi
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ jmp .Lccm64_dec_outer
+.align 16
+ movups (%r11),%xmm0
+ movups 16(%r11),%xmm1
+ xorps %xmm0,%xmm8
+ leaq 32(%r11),%r11
+ xorps %xmm8,%xmm3
+ aesenc %xmm1,%xmm3
+ decl %eax
+ movups (%r11),%xmm1
+ leaq 16(%r11),%r11
+ jnz .Loop_enc1_6
+ aesenclast %xmm1,%xmm3
+ movups %xmm3,(%r9)
+ retq
+.size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
+.globl aesni_ctr32_encrypt_blocks
+.type aesni_ctr32_encrypt_blocks,@function
+.align 16
+ cmpq $1,%rdx
+ je .Lctr32_one_shortcut
+ movdqu (%r8),%xmm14
+ movdqa .Lbswap_mask(%rip),%xmm15
+ xorl %eax,%eax
+.byte 102,69,15,58,22,242,3
+.byte 102,68,15,58,34,240,3
+ movl 240(%rcx),%eax
+ bswapl %r10d
+ pxor %xmm12,%xmm12
+ pxor %xmm13,%xmm13
+.byte 102,69,15,58,34,226,0
+ leaq 3(%r10),%r11
+.byte 102,69,15,58,34,235,0
+ incl %r10d
+.byte 102,69,15,58,34,226,1
+ incq %r11
+.byte 102,69,15,58,34,235,1
+ incl %r10d
+.byte 102,69,15,58,34,226,2
+ incq %r11
+.byte 102,69,15,58,34,235,2
+ movdqa %xmm12,-40(%rsp)
+.byte 102,69,15,56,0,231
+ movdqa %xmm13,-24(%rsp)
+.byte 102,69,15,56,0,239
+ pshufd $192,%xmm12,%xmm2
+ pshufd $128,%xmm12,%xmm3
+ pshufd $64,%xmm12,%xmm4
+ cmpq $6,%rdx
+ jb .Lctr32_tail
+ shrl $1,%eax
+ movq %rcx,%r11
+ movl %eax,%r10d
+ subq $6,%rdx
+ jmp .Lctr32_loop6
+.align 16
+ pshufd $192,%xmm13,%xmm5
+ por %xmm14,%xmm2
+ movups (%r11),%xmm0
+ pshufd $128,%xmm13,%xmm6
+ por %xmm14,%xmm3
+ movups 16(%r11),%xmm1
+ pshufd $64,%xmm13,%xmm7
+ por %xmm14,%xmm4
+ por %xmm14,%xmm5
+ xorps %xmm0,%xmm2
+ por %xmm14,%xmm6
+ por %xmm14,%xmm7
+ pxor %xmm0,%xmm3
+ aesenc %xmm1,%xmm2
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ aesenc %xmm1,%xmm3
+ movdqa .Lincrement32(%rip),%xmm13
+ pxor %xmm0,%xmm5
+ aesenc %xmm1,%xmm4
+ movdqa -40(%rsp),%xmm12
+ pxor %xmm0,%xmm6
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ jmp .Lctr32_enc_loop6_enter
+.align 16
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Lctr32_enc_loop6
+ aesenc %xmm1,%xmm2
+ paddd %xmm13,%xmm12
+ aesenc %xmm1,%xmm3
+ paddd -24(%rsp),%xmm13
+ aesenc %xmm1,%xmm4
+ movdqa %xmm12,-40(%rsp)
+ aesenc %xmm1,%xmm5
+ movdqa %xmm13,-24(%rsp)
+ aesenc %xmm1,%xmm6
+.byte 102,69,15,56,0,231
+ aesenc %xmm1,%xmm7
+.byte 102,69,15,56,0,239
+ aesenclast %xmm0,%xmm2
+ movups (%rdi),%xmm8
+ aesenclast %xmm0,%xmm3
+ movups 16(%rdi),%xmm9
+ aesenclast %xmm0,%xmm4
+ movups 32(%rdi),%xmm10
+ aesenclast %xmm0,%xmm5
+ movups 48(%rdi),%xmm11
+ aesenclast %xmm0,%xmm6
+ movups 64(%rdi),%xmm1
+ aesenclast %xmm0,%xmm7
+ movups 80(%rdi),%xmm0
+ leaq 96(%rdi),%rdi
+ xorps %xmm2,%xmm8
+ pshufd $192,%xmm12,%xmm2
+ xorps %xmm3,%xmm9
+ pshufd $128,%xmm12,%xmm3
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ pshufd $64,%xmm12,%xmm4
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ xorps %xmm6,%xmm1
+ movups %xmm11,48(%rsi)
+ xorps %xmm7,%xmm0
+ movups %xmm1,64(%rsi)
+ movups %xmm0,80(%rsi)
+ leaq 96(%rsi),%rsi
+ movl %r10d,%eax
+ subq $6,%rdx
+ jnc .Lctr32_loop6
+ addq $6,%rdx
+ jz .Lctr32_done
+ movq %r11,%rcx
+ leal 1(%rax,%rax,1),%eax
+ por %xmm14,%xmm2
+ movups (%rdi),%xmm8
+ cmpq $2,%rdx
+ jb .Lctr32_one
+ por %xmm14,%xmm3
+ movups 16(%rdi),%xmm9
+ je .Lctr32_two
+ pshufd $192,%xmm13,%xmm5
+ por %xmm14,%xmm4
+ movups 32(%rdi),%xmm10
+ cmpq $4,%rdx
+ jb .Lctr32_three
+ pshufd $128,%xmm13,%xmm6
+ por %xmm14,%xmm5
+ movups 48(%rdi),%xmm11
+ je .Lctr32_four
+ por %xmm14,%xmm6
+ xorps %xmm7,%xmm7
+ call _aesni_encrypt6
+ movups 64(%rdi),%xmm1
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ xorps %xmm6,%xmm1
+ movups %xmm11,48(%rsi)
+ movups %xmm1,64(%rsi)
+ jmp .Lctr32_done
+.align 16
+ movups (%r8),%xmm2
+ movups (%rdi),%xmm8
+ movl 240(%rcx),%eax
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_7
+ aesenclast %xmm1,%xmm2
+ xorps %xmm2,%xmm8
+ movups %xmm8,(%rsi)
+ jmp .Lctr32_done
+.align 16
+ xorps %xmm4,%xmm4
+ call _aesni_encrypt3
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ movups %xmm9,16(%rsi)
+ jmp .Lctr32_done
+.align 16
+ call _aesni_encrypt3
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ movups %xmm10,32(%rsi)
+ jmp .Lctr32_done
+.align 16
+ call _aesni_encrypt4
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ movups %xmm11,48(%rsi)
+ retq
+.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
+.globl aesni_xts_encrypt
+.type aesni_xts_encrypt,@function
+.align 16
+ leaq -104(%rsp),%rsp
+ movups (%r9),%xmm15
+ movl 240(%r8),%eax
+ movl 240(%rcx),%r10d
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm15
+ aesenc %xmm1,%xmm15
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz .Loop_enc1_8
+ aesenclast %xmm1,%xmm15
+ movq %rcx,%r11
+ movl %r10d,%eax
+ movq %rdx,%r9
+ andq $-16,%rdx
+ movdqa .Lxts_magic(%rip),%xmm8
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ subq $96,%rdx
+ jc .Lxts_enc_short
+ shrl $1,%eax
+ subl $1,%eax
+ movl %eax,%r10d
+ jmp .Lxts_enc_grandloop
+.align 16
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu 0(%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ pxor %xmm12,%xmm4
+ movdqu 80(%rdi),%xmm7
+ leaq 96(%rdi),%rdi
+ pxor %xmm13,%xmm5
+ movups (%r11),%xmm0
+ pxor %xmm14,%xmm6
+ pxor %xmm15,%xmm7
+ movups 16(%r11),%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movdqa %xmm10,0(%rsp)
+ aesenc %xmm1,%xmm2
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ movdqa %xmm11,16(%rsp)
+ aesenc %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ movdqa %xmm12,32(%rsp)
+ aesenc %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ movdqa %xmm13,48(%rsp)
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ movdqa %xmm14,64(%rsp)
+ aesenc %xmm1,%xmm6
+ movdqa %xmm15,80(%rsp)
+ aesenc %xmm1,%xmm7
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ jmp .Lxts_enc_loop6_enter
+.align 16
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Lxts_enc_loop6
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ paddq %xmm15,%xmm15
+ aesenc %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesenc %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenc %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ aesenc %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesenc %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenc %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups 32(%rcx),%xmm0
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ aesenc %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesenc %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenc %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ aesenclast %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesenclast %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenclast %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesenclast %xmm0,%xmm5
+ aesenclast %xmm0,%xmm6
+ aesenclast %xmm0,%xmm7
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ xorps 0(%rsp),%xmm2
+ pand %xmm8,%xmm9
+ xorps 16(%rsp),%xmm3
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ xorps 32(%rsp),%xmm4
+ movups %xmm2,0(%rsi)
+ xorps 48(%rsp),%xmm5
+ movups %xmm3,16(%rsi)
+ xorps 64(%rsp),%xmm6
+ movups %xmm4,32(%rsi)
+ xorps 80(%rsp),%xmm7
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ subq $96,%rdx
+ jnc .Lxts_enc_grandloop
+ leal 3(%rax,%rax,1),%eax
+ movq %r11,%rcx
+ movl %eax,%r10d
+ addq $96,%rdx
+ jz .Lxts_enc_done
+ cmpq $32,%rdx
+ jb .Lxts_enc_one
+ je .Lxts_enc_two
+ cmpq $64,%rdx
+ jb .Lxts_enc_three
+ je .Lxts_enc_four
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ leaq 80(%rdi),%rdi
+ pxor %xmm12,%xmm4
+ pxor %xmm13,%xmm5
+ pxor %xmm14,%xmm6
+ call _aesni_encrypt6
+ xorps %xmm10,%xmm2
+ movdqa %xmm15,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movdqu %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movdqu %xmm3,16(%rsi)
+ xorps %xmm14,%xmm6
+ movdqu %xmm4,32(%rsi)
+ movdqu %xmm5,48(%rsi)
+ movdqu %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ jmp .Lxts_enc_done
+.align 16
+ movups (%rdi),%xmm2
+ leaq 16(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_9
+ aesenclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movdqa %xmm11,%xmm10
+ movups %xmm2,(%rsi)
+ leaq 16(%rsi),%rsi
+ jmp .Lxts_enc_done
+.align 16
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ leaq 32(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ call _aesni_encrypt3
+ xorps %xmm10,%xmm2
+ movdqa %xmm12,%xmm10
+ xorps %xmm11,%xmm3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ leaq 32(%rsi),%rsi
+ jmp .Lxts_enc_done
+.align 16
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ leaq 48(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ call _aesni_encrypt3
+ xorps %xmm10,%xmm2
+ movdqa %xmm13,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ leaq 48(%rsi),%rsi
+ jmp .Lxts_enc_done
+.align 16
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ xorps %xmm10,%xmm2
+ movups 48(%rdi),%xmm5
+ leaq 64(%rdi),%rdi
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ xorps %xmm13,%xmm5
+ call _aesni_encrypt4
+ xorps %xmm10,%xmm2
+ movdqa %xmm15,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ jmp .Lxts_enc_done
+.align 16
+ andq $15,%r9
+ jz .Lxts_enc_ret
+ movq %r9,%rdx
+ movzbl (%rdi),%eax
+ movzbl -16(%rsi),%ecx
+ leaq 1(%rdi),%rdi
+ movb %al,-16(%rsi)
+ movb %cl,0(%rsi)
+ leaq 1(%rsi),%rsi
+ subq $1,%rdx
+ jnz .Lxts_enc_steal
+ subq %r9,%rsi
+ movq %r11,%rcx
+ movl %r10d,%eax
+ movups -16(%rsi),%xmm2
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_10
+ aesenclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movups %xmm2,-16(%rsi)
+ leaq 104(%rsp),%rsp
+ retq
+.size aesni_xts_encrypt,.-aesni_xts_encrypt
+.globl aesni_xts_decrypt
+.type aesni_xts_decrypt,@function
+.align 16
+ leaq -104(%rsp),%rsp
+ movups (%r9),%xmm15
+ movl 240(%r8),%eax
+ movl 240(%rcx),%r10d
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm15
+ aesenc %xmm1,%xmm15
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz .Loop_enc1_11
+ aesenclast %xmm1,%xmm15
+ xorl %eax,%eax
+ testq $15,%rdx
+ setnz %al
+ shlq $4,%rax
+ subq %rax,%rdx
+ movq %rcx,%r11
+ movl %r10d,%eax
+ movq %rdx,%r9
+ andq $-16,%rdx
+ movdqa .Lxts_magic(%rip),%xmm8
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ subq $96,%rdx
+ jc .Lxts_dec_short
+ shrl $1,%eax
+ subl $1,%eax
+ movl %eax,%r10d
+ jmp .Lxts_dec_grandloop
+.align 16
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu 0(%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ pxor %xmm12,%xmm4
+ movdqu 80(%rdi),%xmm7
+ leaq 96(%rdi),%rdi
+ pxor %xmm13,%xmm5
+ movups (%r11),%xmm0
+ pxor %xmm14,%xmm6
+ pxor %xmm15,%xmm7
+ movups 16(%r11),%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movdqa %xmm10,0(%rsp)
+ aesdec %xmm1,%xmm2
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ movdqa %xmm11,16(%rsp)
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ movdqa %xmm12,32(%rsp)
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ movdqa %xmm13,48(%rsp)
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ movdqa %xmm14,64(%rsp)
+ aesdec %xmm1,%xmm6
+ movdqa %xmm15,80(%rsp)
+ aesdec %xmm1,%xmm7
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ jmp .Lxts_dec_loop6_enter
+.align 16
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Lxts_dec_loop6
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ paddq %xmm15,%xmm15
+ aesdec %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesdec %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdec %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ aesdec %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesdec %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdec %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ movups 32(%rcx),%xmm0
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ aesdec %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesdec %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdec %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ aesdeclast %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesdeclast %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdeclast %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesdeclast %xmm0,%xmm5
+ aesdeclast %xmm0,%xmm6
+ aesdeclast %xmm0,%xmm7
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ xorps 0(%rsp),%xmm2
+ pand %xmm8,%xmm9
+ xorps 16(%rsp),%xmm3
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ xorps 32(%rsp),%xmm4
+ movups %xmm2,0(%rsi)
+ xorps 48(%rsp),%xmm5
+ movups %xmm3,16(%rsi)
+ xorps 64(%rsp),%xmm6
+ movups %xmm4,32(%rsi)
+ xorps 80(%rsp),%xmm7
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ subq $96,%rdx
+ jnc .Lxts_dec_grandloop
+ leal 3(%rax,%rax,1),%eax
+ movq %r11,%rcx
+ movl %eax,%r10d
+ addq $96,%rdx
+ jz .Lxts_dec_done
+ cmpq $32,%rdx
+ jb .Lxts_dec_one
+ je .Lxts_dec_two
+ cmpq $64,%rdx
+ jb .Lxts_dec_three
+ je .Lxts_dec_four
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ leaq 80(%rdi),%rdi
+ pxor %xmm12,%xmm4
+ pxor %xmm13,%xmm5
+ pxor %xmm14,%xmm6
+ call _aesni_decrypt6
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movdqu %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movdqu %xmm3,16(%rsi)
+ xorps %xmm14,%xmm6
+ movdqu %xmm4,32(%rsi)
+ pxor %xmm14,%xmm14
+ movdqu %xmm5,48(%rsi)
+ pcmpgtd %xmm15,%xmm14
+ movdqu %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ pshufd $19,%xmm14,%xmm11
+ andq $15,%r9
+ jz .Lxts_dec_ret
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm11
+ pxor %xmm15,%xmm11
+ jmp .Lxts_dec_done2
+.align 16
+ movups (%rdi),%xmm2
+ leaq 16(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_12
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movdqa %xmm11,%xmm10
+ movups %xmm2,(%rsi)
+ movdqa %xmm12,%xmm11
+ leaq 16(%rsi),%rsi
+ jmp .Lxts_dec_done
+.align 16
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ leaq 32(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ call _aesni_decrypt3
+ xorps %xmm10,%xmm2
+ movdqa %xmm12,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm13,%xmm11
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ leaq 32(%rsi),%rsi
+ jmp .Lxts_dec_done
+.align 16
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ leaq 48(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ call _aesni_decrypt3
+ xorps %xmm10,%xmm2
+ movdqa %xmm13,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm15,%xmm11
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ leaq 48(%rsi),%rsi
+ jmp .Lxts_dec_done
+.align 16
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movups (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movups 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+ movups 32(%rdi),%xmm4
+ xorps %xmm10,%xmm2
+ movups 48(%rdi),%xmm5
+ leaq 64(%rdi),%rdi
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ xorps %xmm13,%xmm5
+ call _aesni_decrypt4
+ xorps %xmm10,%xmm2
+ movdqa %xmm14,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm15,%xmm11
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ jmp .Lxts_dec_done
+.align 16
+ andq $15,%r9
+ jz .Lxts_dec_ret
+ movq %r9,%rdx
+ movq %r11,%rcx
+ movl %r10d,%eax
+ movups (%rdi),%xmm2
+ xorps %xmm11,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_13
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm11,%xmm2
+ movups %xmm2,(%rsi)
+ movzbl 16(%rdi),%eax
+ movzbl (%rsi),%ecx
+ leaq 1(%rdi),%rdi
+ movb %al,(%rsi)
+ movb %cl,16(%rsi)
+ leaq 1(%rsi),%rsi
+ subq $1,%rdx
+ jnz .Lxts_dec_steal
+ subq %r9,%rsi
+ movq %r11,%rcx
+ movl %r10d,%eax
+ movups (%rsi),%xmm2
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_14
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movups %xmm2,(%rsi)
+ leaq 104(%rsp),%rsp
+ retq
+.size aesni_xts_decrypt,.-aesni_xts_decrypt
+.globl aesni_cbc_encrypt
+.type aesni_cbc_encrypt,@function
+.align 16
+ testq %rdx,%rdx
+ jz .Lcbc_ret
+ movl 240(%rcx),%r10d
+ movq %rcx,%r11
+ testl %r9d,%r9d
+ jz .Lcbc_decrypt
+ movups (%r8),%xmm2
+ movl %r10d,%eax
+ cmpq $16,%rdx
+ jb .Lcbc_enc_tail
+ subq $16,%rdx
+ jmp .Lcbc_enc_loop
+.align 16
+ movups (%rdi),%xmm3
+ leaq 16(%rdi),%rdi
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ xorps %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ xorps %xmm3,%xmm2
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_15
+ aesenclast %xmm1,%xmm2
+ movl %r10d,%eax
+ movq %r11,%rcx
+ movups %xmm2,0(%rsi)
+ leaq 16(%rsi),%rsi
+ subq $16,%rdx
+ jnc .Lcbc_enc_loop
+ addq $16,%rdx
+ jnz .Lcbc_enc_tail
+ movups %xmm2,(%r8)
+ jmp .Lcbc_ret
+ movq %rdx,%rcx
+ xchgq %rdi,%rsi
+.long 0x9066A4F3
+ movl $16,%ecx
+ subq %rdx,%rcx
+ xorl %eax,%eax
+.long 0x9066AAF3
+ leaq -16(%rdi),%rdi
+ movl %r10d,%eax
+ movq %rdi,%rsi
+ movq %r11,%rcx
+ xorq %rdx,%rdx
+ jmp .Lcbc_enc_loop
+.align 16
+ movups (%r8),%xmm9
+ movl %r10d,%eax
+ cmpq $112,%rdx
+ jbe .Lcbc_dec_tail
+ shrl $1,%r10d
+ subq $112,%rdx
+ movl %r10d,%eax
+ movaps %xmm9,-24(%rsp)
+ jmp .Lcbc_dec_loop8_enter
+.align 16
+ movaps %xmm0,-24(%rsp)
+ movups %xmm9,(%rsi)
+ leaq 16(%rsi),%rsi
+ movups (%rcx),%xmm0
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ movdqu 32(%rdi),%xmm4
+ xorps %xmm0,%xmm2
+ movdqu 48(%rdi),%xmm5
+ xorps %xmm0,%xmm3
+ movdqu 64(%rdi),%xmm6
+ aesdec %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ movdqu 80(%rdi),%xmm7
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ movdqu 96(%rdi),%xmm8
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ movdqu 112(%rdi),%xmm9
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesdec %xmm1,%xmm6
+ pxor %xmm0,%xmm8
+ aesdec %xmm1,%xmm7
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+ call .Ldec_loop8_enter
+ movups (%rdi),%xmm1
+ movups 16(%rdi),%xmm0
+ xorps -24(%rsp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%rdi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%rdi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%rdi),%xmm1
+ xorps %xmm0,%xmm6
+ movups 80(%rdi),%xmm0
+ xorps %xmm1,%xmm7
+ movups 96(%rdi),%xmm1
+ xorps %xmm0,%xmm8
+ movups 112(%rdi),%xmm0
+ xorps %xmm1,%xmm9
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movq %r11,%rcx
+ movups %xmm7,80(%rsi)
+ leaq 128(%rdi),%rdi
+ movups %xmm8,96(%rsi)
+ leaq 112(%rsi),%rsi
+ subq $128,%rdx
+ ja .Lcbc_dec_loop8
+ movaps %xmm9,%xmm2
+ movaps %xmm0,%xmm9
+ addq $112,%rdx
+ jle .Lcbc_dec_tail_collected
+ movups %xmm2,(%rsi)
+ leal 1(%r10,%r10,1),%eax
+ leaq 16(%rsi),%rsi
+ movups (%rdi),%xmm2
+ movaps %xmm2,%xmm8
+ cmpq $16,%rdx
+ jbe .Lcbc_dec_one
+ movups 16(%rdi),%xmm3
+ movaps %xmm3,%xmm7
+ cmpq $32,%rdx
+ jbe .Lcbc_dec_two
+ movups 32(%rdi),%xmm4
+ movaps %xmm4,%xmm6
+ cmpq $48,%rdx
+ jbe .Lcbc_dec_three
+ movups 48(%rdi),%xmm5
+ cmpq $64,%rdx
+ jbe .Lcbc_dec_four
+ movups 64(%rdi),%xmm6
+ cmpq $80,%rdx
+ jbe .Lcbc_dec_five
+ movups 80(%rdi),%xmm7
+ cmpq $96,%rdx
+ jbe .Lcbc_dec_six
+ movups 96(%rdi),%xmm8
+ movaps %xmm9,-24(%rsp)
+ call _aesni_decrypt8
+ movups (%rdi),%xmm1
+ movups 16(%rdi),%xmm0
+ xorps -24(%rsp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%rdi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%rdi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%rdi),%xmm1
+ xorps %xmm0,%xmm6
+ movups 80(%rdi),%xmm0
+ xorps %xmm1,%xmm7
+ movups 96(%rdi),%xmm9
+ xorps %xmm0,%xmm8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ movaps %xmm8,%xmm2
+ subq $112,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_16
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm9,%xmm2
+ movaps %xmm8,%xmm9
+ subq $16,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+ xorps %xmm4,%xmm4
+ call _aesni_decrypt3
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ movaps %xmm7,%xmm9
+ movaps %xmm3,%xmm2
+ leaq 16(%rsi),%rsi
+ subq $32,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+ call _aesni_decrypt3
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ xorps %xmm7,%xmm4
+ movups %xmm3,16(%rsi)
+ movaps %xmm6,%xmm9
+ movaps %xmm4,%xmm2
+ leaq 32(%rsi),%rsi
+ subq $48,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+ call _aesni_decrypt4
+ xorps %xmm9,%xmm2
+ movups 48(%rdi),%xmm9
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ xorps %xmm7,%xmm4
+ movups %xmm3,16(%rsi)
+ xorps %xmm6,%xmm5
+ movups %xmm4,32(%rsi)
+ movaps %xmm5,%xmm2
+ leaq 48(%rsi),%rsi
+ subq $64,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+ xorps %xmm7,%xmm7
+ call _aesni_decrypt6
+ movups 16(%rdi),%xmm1
+ movups 32(%rdi),%xmm0
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ xorps %xmm1,%xmm4
+ movups 48(%rdi),%xmm1
+ xorps %xmm0,%xmm5
+ movups 64(%rdi),%xmm9
+ xorps %xmm1,%xmm6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ movaps %xmm6,%xmm2
+ subq $80,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+ call _aesni_decrypt6
+ movups 16(%rdi),%xmm1
+ movups 32(%rdi),%xmm0
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ xorps %xmm1,%xmm4
+ movups 48(%rdi),%xmm1
+ xorps %xmm0,%xmm5
+ movups 64(%rdi),%xmm0
+ xorps %xmm1,%xmm6
+ movups 80(%rdi),%xmm9
+ xorps %xmm0,%xmm7
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ movaps %xmm7,%xmm2
+ subq $96,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+ andq $15,%rdx
+ movups %xmm9,(%r8)
+ jnz .Lcbc_dec_tail_partial
+ movups %xmm2,(%rsi)
+ jmp .Lcbc_dec_ret
+.align 16
+ movaps %xmm2,-24(%rsp)
+ movq $16,%rcx
+ movq %rsi,%rdi
+ subq %rdx,%rcx
+ leaq -24(%rsp),%rsi
+.long 0x9066A4F3
+ retq
+.size aesni_cbc_encrypt,.-aesni_cbc_encrypt
+.globl aesni_set_decrypt_key
+.type aesni_set_decrypt_key,@function
+.align 16
+ subq $8,%rsp
+ call __aesni_set_encrypt_key
+ shll $4,%esi
+ testl %eax,%eax
+ jnz .Ldec_key_ret
+ leaq 16(%rdx,%rsi,1),%rdi
+ movups (%rdx),%xmm0
+ movups (%rdi),%xmm1
+ movups %xmm0,(%rdi)
+ movups %xmm1,(%rdx)
+ leaq 16(%rdx),%rdx
+ leaq -16(%rdi),%rdi
+ movups (%rdx),%xmm0
+ movups (%rdi),%xmm1
+ aesimc %xmm0,%xmm0
+ aesimc %xmm1,%xmm1
+ leaq 16(%rdx),%rdx
+ leaq -16(%rdi),%rdi
+ movups %xmm0,16(%rdi)
+ movups %xmm1,-16(%rdx)
+ cmpq %rdx,%rdi
+ ja .Ldec_key_inverse
+ movups (%rdx),%xmm0
+ aesimc %xmm0,%xmm0
+ movups %xmm0,(%rdi)
+ addq $8,%rsp
+ retq
+.size aesni_set_decrypt_key,.-aesni_set_decrypt_key
+.globl aesni_set_encrypt_key
+.type aesni_set_encrypt_key,@function
+.align 16
+ subq $8,%rsp
+ movq $-1,%rax
+ testq %rdi,%rdi
+ jz .Lenc_key_ret
+ testq %rdx,%rdx
+ jz .Lenc_key_ret
+ movups (%rdi),%xmm0
+ xorps %xmm4,%xmm4
+ leaq 16(%rdx),%rax
+ cmpl $256,%esi
+ je .L14rounds
+ cmpl $192,%esi
+ je .L12rounds
+ cmpl $128,%esi
+ jne .Lbad_keybits
+ movl $9,%esi
+ movups %xmm0,(%rdx)
+ aeskeygenassist $1,%xmm0,%xmm1
+ call .Lkey_expansion_128_cold
+ aeskeygenassist $2,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $4,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $8,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $16,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $32,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $64,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $128,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $27,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $54,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ movups %xmm0,(%rax)
+ movl %esi,80(%rax)
+ xorl %eax,%eax
+ jmp .Lenc_key_ret
+.align 16
+ movq 16(%rdi),%xmm2
+ movl $11,%esi
+ movups %xmm0,(%rdx)
+ aeskeygenassist $1,%xmm2,%xmm1
+ call .Lkey_expansion_192a_cold
+ aeskeygenassist $2,%xmm2,%xmm1
+ call .Lkey_expansion_192b
+ aeskeygenassist $4,%xmm2,%xmm1
+ call .Lkey_expansion_192a
+ aeskeygenassist $8,%xmm2,%xmm1
+ call .Lkey_expansion_192b
+ aeskeygenassist $16,%xmm2,%xmm1
+ call .Lkey_expansion_192a
+ aeskeygenassist $32,%xmm2,%xmm1
+ call .Lkey_expansion_192b
+ aeskeygenassist $64,%xmm2,%xmm1
+ call .Lkey_expansion_192a
+ aeskeygenassist $128,%xmm2,%xmm1
+ call .Lkey_expansion_192b
+ movups %xmm0,(%rax)
+ movl %esi,48(%rax)
+ xorq %rax,%rax
+ jmp .Lenc_key_ret
+.align 16
+ movups 16(%rdi),%xmm2
+ movl $13,%esi
+ leaq 16(%rax),%rax
+ movups %xmm0,(%rdx)
+ movups %xmm2,16(%rdx)
+ aeskeygenassist $1,%xmm2,%xmm1
+ call .Lkey_expansion_256a_cold
+ aeskeygenassist $1,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $2,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $2,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $4,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $4,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $8,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $8,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $16,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $16,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $32,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $32,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $64,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ movups %xmm0,(%rax)
+ movl %esi,16(%rax)
+ xorq %rax,%rax
+ jmp .Lenc_key_ret
+.align 16
+ movq $-2,%rax
+ addq $8,%rsp
+ retq
+.align 16
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ retq
+.align 16
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+ movaps %xmm2,%xmm5
+ shufps $16,%xmm0,%xmm4
+ movdqa %xmm2,%xmm3
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ pslldq $4,%xmm3
+ xorps %xmm4,%xmm0
+ pshufd $85,%xmm1,%xmm1
+ pxor %xmm3,%xmm2
+ pxor %xmm1,%xmm0
+ pshufd $255,%xmm0,%xmm3
+ pxor %xmm3,%xmm2
+ retq
+.align 16
+ movaps %xmm0,%xmm3
+ shufps $68,%xmm0,%xmm5
+ movups %xmm5,(%rax)
+ shufps $78,%xmm2,%xmm3
+ movups %xmm3,16(%rax)
+ leaq 32(%rax),%rax
+ jmp .Lkey_expansion_192b_warm
+.align 16
+ movups %xmm2,(%rax)
+ leaq 16(%rax),%rax
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ retq
+.align 16
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+ shufps $16,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $140,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $170,%xmm1,%xmm1
+ xorps %xmm1,%xmm2
+ retq
+.size aesni_set_encrypt_key,.-aesni_set_encrypt_key
+.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key
+.align 64
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.long 6,6,6,0
+.long 1,0,0,0
+.long 0x87,0,1,0
+.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
diff --git a/crypto/libressl/crypto/aes/aesni-macosx-x86_64.S b/crypto/libressl/crypto/aes/aesni-macosx-x86_64.S
new file mode 100644
index 0000000..6b3216b
--- /dev/null
+++ b/crypto/libressl/crypto/aes/aesni-macosx-x86_64.S
@@ -0,0 +1,2536 @@
+#include "x86_arch.h"
+.globl _aesni_encrypt
+.p2align 4
+ movups (%rdi),%xmm2
+ movl 240(%rdx),%eax
+ movups (%rdx),%xmm0
+ movups 16(%rdx),%xmm1
+ leaq 32(%rdx),%rdx
+ xorps %xmm0,%xmm2
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rdx),%xmm1
+ leaq 16(%rdx),%rdx
+ jnz L$oop_enc1_1
+ aesenclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ retq
+.globl _aesni_decrypt
+.p2align 4
+ movups (%rdi),%xmm2
+ movl 240(%rdx),%eax
+ movups (%rdx),%xmm0
+ movups 16(%rdx),%xmm1
+ leaq 32(%rdx),%rdx
+ xorps %xmm0,%xmm2
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rdx),%xmm1
+ leaq 16(%rdx),%rdx
+ jnz L$oop_dec1_2
+ aesdeclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ retq
+.p2align 4
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ movups (%rcx),%xmm0
+ jnz L$enc_loop3
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ retq
+.p2align 4
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ movups (%rcx),%xmm0
+ jnz L$dec_loop3
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ retq
+.p2align 4
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ xorps %xmm0,%xmm5
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ movups (%rcx),%xmm0
+ jnz L$enc_loop4
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ aesenclast %xmm0,%xmm5
+ retq
+.p2align 4
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ xorps %xmm0,%xmm5
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ movups (%rcx),%xmm0
+ jnz L$dec_loop4
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ aesdeclast %xmm0,%xmm5
+ retq
+.p2align 4
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ aesenc %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesenc %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesenc %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesenc %xmm1,%xmm6
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm7
+ jmp L$enc_loop6_enter
+.p2align 4
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz L$enc_loop6
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ aesenclast %xmm0,%xmm5
+ aesenclast %xmm0,%xmm6
+ aesenclast %xmm0,%xmm7
+ retq
+.p2align 4
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ aesdec %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesdec %xmm1,%xmm6
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm7
+ jmp L$dec_loop6_enter
+.p2align 4
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz L$dec_loop6
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ aesdeclast %xmm0,%xmm5
+ aesdeclast %xmm0,%xmm6
+ aesdeclast %xmm0,%xmm7
+ retq
+.p2align 4
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ aesenc %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesenc %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesenc %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesenc %xmm1,%xmm6
+ pxor %xmm0,%xmm8
+ aesenc %xmm1,%xmm7
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm8
+ aesenc %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+ jmp L$enc_loop8_enter
+.p2align 4
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ aesenc %xmm1,%xmm8
+ aesenc %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ aesenc %xmm0,%xmm8
+ aesenc %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ jnz L$enc_loop8
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ aesenc %xmm1,%xmm8
+ aesenc %xmm1,%xmm9
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ aesenclast %xmm0,%xmm5
+ aesenclast %xmm0,%xmm6
+ aesenclast %xmm0,%xmm7
+ aesenclast %xmm0,%xmm8
+ aesenclast %xmm0,%xmm9
+ retq
+.p2align 4
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ aesdec %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesdec %xmm1,%xmm6
+ pxor %xmm0,%xmm8
+ aesdec %xmm1,%xmm7
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+ jmp L$dec_loop8_enter
+.p2align 4
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ aesdec %xmm0,%xmm8
+ aesdec %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ jnz L$dec_loop8
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ aesdeclast %xmm0,%xmm5
+ aesdeclast %xmm0,%xmm6
+ aesdeclast %xmm0,%xmm7
+ aesdeclast %xmm0,%xmm8
+ aesdeclast %xmm0,%xmm9
+ retq
+.globl _aesni_ecb_encrypt
+.p2align 4
+ andq $-16,%rdx
+ jz L$ecb_ret
+ movl 240(%rcx),%eax
+ movups (%rcx),%xmm0
+ movq %rcx,%r11
+ movl %eax,%r10d
+ testl %r8d,%r8d
+ jz L$ecb_decrypt
+ cmpq $128,%rdx
+ jb L$ecb_enc_tail
+ movdqu (%rdi),%xmm2
+ movdqu 16(%rdi),%xmm3
+ movdqu 32(%rdi),%xmm4
+ movdqu 48(%rdi),%xmm5
+ movdqu 64(%rdi),%xmm6
+ movdqu 80(%rdi),%xmm7
+ movdqu 96(%rdi),%xmm8
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ subq $128,%rdx
+ jmp L$ecb_enc_loop8_enter
+.p2align 4
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movdqu (%rdi),%xmm2
+ movl %r10d,%eax
+ movups %xmm3,16(%rsi)
+ movdqu 16(%rdi),%xmm3
+ movups %xmm4,32(%rsi)
+ movdqu 32(%rdi),%xmm4
+ movups %xmm5,48(%rsi)
+ movdqu 48(%rdi),%xmm5
+ movups %xmm6,64(%rsi)
+ movdqu 64(%rdi),%xmm6
+ movups %xmm7,80(%rsi)
+ movdqu 80(%rdi),%xmm7
+ movups %xmm8,96(%rsi)
+ movdqu 96(%rdi),%xmm8
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ call _aesni_encrypt8
+ subq $128,%rdx
+ jnc L$ecb_enc_loop8
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movups %xmm3,16(%rsi)
+ movl %r10d,%eax
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ addq $128,%rdx
+ jz L$ecb_ret
+ movups (%rdi),%xmm2
+ cmpq $32,%rdx
+ jb L$ecb_enc_one
+ movups 16(%rdi),%xmm3
+ je L$ecb_enc_two
+ movups 32(%rdi),%xmm4
+ cmpq $64,%rdx
+ jb L$ecb_enc_three
+ movups 48(%rdi),%xmm5
+ je L$ecb_enc_four
+ movups 64(%rdi),%xmm6
+ cmpq $96,%rdx
+ jb L$ecb_enc_five
+ movups 80(%rdi),%xmm7
+ je L$ecb_enc_six
+ movdqu 96(%rdi),%xmm8
+ call _aesni_encrypt8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_enc1_3
+ aesenclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+ xorps %xmm4,%xmm4
+ call _aesni_encrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+ call _aesni_encrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+ call _aesni_encrypt4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+ xorps %xmm7,%xmm7
+ call _aesni_encrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+ call _aesni_encrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+ cmpq $128,%rdx
+ jb L$ecb_dec_tail
+ movdqu (%rdi),%xmm2
+ movdqu 16(%rdi),%xmm3
+ movdqu 32(%rdi),%xmm4
+ movdqu 48(%rdi),%xmm5
+ movdqu 64(%rdi),%xmm6
+ movdqu 80(%rdi),%xmm7
+ movdqu 96(%rdi),%xmm8
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ subq $128,%rdx
+ jmp L$ecb_dec_loop8_enter
+.p2align 4
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movdqu (%rdi),%xmm2
+ movl %r10d,%eax
+ movups %xmm3,16(%rsi)
+ movdqu 16(%rdi),%xmm3
+ movups %xmm4,32(%rsi)
+ movdqu 32(%rdi),%xmm4
+ movups %xmm5,48(%rsi)
+ movdqu 48(%rdi),%xmm5
+ movups %xmm6,64(%rsi)
+ movdqu 64(%rdi),%xmm6
+ movups %xmm7,80(%rsi)
+ movdqu 80(%rdi),%xmm7
+ movups %xmm8,96(%rsi)
+ movdqu 96(%rdi),%xmm8
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ call _aesni_decrypt8
+ movups (%r11),%xmm0
+ subq $128,%rdx
+ jnc L$ecb_dec_loop8
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movups %xmm3,16(%rsi)
+ movl %r10d,%eax
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ addq $128,%rdx
+ jz L$ecb_ret
+ movups (%rdi),%xmm2
+ cmpq $32,%rdx
+ jb L$ecb_dec_one
+ movups 16(%rdi),%xmm3
+ je L$ecb_dec_two
+ movups 32(%rdi),%xmm4
+ cmpq $64,%rdx
+ jb L$ecb_dec_three
+ movups 48(%rdi),%xmm5
+ je L$ecb_dec_four
+ movups 64(%rdi),%xmm6
+ cmpq $96,%rdx
+ jb L$ecb_dec_five
+ movups 80(%rdi),%xmm7
+ je L$ecb_dec_six
+ movups 96(%rdi),%xmm8
+ movups (%rcx),%xmm0
+ call _aesni_decrypt8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_dec1_4
+ aesdeclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+ xorps %xmm4,%xmm4
+ call _aesni_decrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+ call _aesni_decrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+ call _aesni_decrypt4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+ xorps %xmm7,%xmm7
+ call _aesni_decrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ jmp L$ecb_ret
+.p2align 4
+ call _aesni_decrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ retq
+.globl _aesni_ccm64_encrypt_blocks
+.p2align 4
+ movl 240(%rcx),%eax
+ movdqu (%r8),%xmm9
+ movdqa L$increment64(%rip),%xmm6
+ movdqa L$bswap_mask(%rip),%xmm7
+ shrl $1,%eax
+ leaq 0(%rcx),%r11
+ movdqu (%r9),%xmm3
+ movdqa %xmm9,%xmm2
+ movl %eax,%r10d
+.byte 102,68,15,56,0,207
+ jmp L$ccm64_enc_outer
+.p2align 4
+ movups (%r11),%xmm0
+ movl %r10d,%eax
+ movups (%rdi),%xmm8
+ xorps %xmm0,%xmm2
+ movups 16(%r11),%xmm1
+ xorps %xmm8,%xmm0
+ leaq 32(%r11),%rcx
+ xorps %xmm0,%xmm3
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm2
+ decl %eax
+ aesenc %xmm1,%xmm3
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm3
+ movups 0(%rcx),%xmm0
+ jnz L$ccm64_enc2_loop
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ paddq %xmm6,%xmm9
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ decq %rdx
+ leaq 16(%rdi),%rdi
+ xorps %xmm2,%xmm8
+ movdqa %xmm9,%xmm2
+ movups %xmm8,(%rsi)
+ leaq 16(%rsi),%rsi
+.byte 102,15,56,0,215
+ jnz L$ccm64_enc_outer
+ movups %xmm3,(%r9)
+ retq
+.globl _aesni_ccm64_decrypt_blocks
+.p2align 4
+ movl 240(%rcx),%eax
+ movups (%r8),%xmm9
+ movdqu (%r9),%xmm3
+ movdqa L$increment64(%rip),%xmm6
+ movdqa L$bswap_mask(%rip),%xmm7
+ movaps %xmm9,%xmm2
+ movl %eax,%r10d
+ movq %rcx,%r11
+.byte 102,68,15,56,0,207
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_enc1_5
+ aesenclast %xmm1,%xmm2
+ movups (%rdi),%xmm8
+ paddq %xmm6,%xmm9
+ leaq 16(%rdi),%rdi
+ jmp L$ccm64_dec_outer
+.p2align 4
+ xorps %xmm2,%xmm8
+ movdqa %xmm9,%xmm2
+ movl %r10d,%eax
+ movups %xmm8,(%rsi)
+ leaq 16(%rsi),%rsi
+.byte 102,15,56,0,215
+ subq $1,%rdx
+ jz L$ccm64_dec_break
+ movups (%r11),%xmm0
+ shrl $1,%eax
+ movups 16(%r11),%xmm1
+ xorps %xmm0,%xmm8
+ leaq 32(%r11),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm8,%xmm3
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm2
+ decl %eax
+ aesenc %xmm1,%xmm3
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm3
+ movups 0(%rcx),%xmm0
+ jnz L$ccm64_dec2_loop
+ movups (%rdi),%xmm8
+ paddq %xmm6,%xmm9
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ leaq 16(%rdi),%rdi
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ jmp L$ccm64_dec_outer
+.p2align 4
+ movups (%r11),%xmm0
+ movups 16(%r11),%xmm1
+ xorps %xmm0,%xmm8
+ leaq 32(%r11),%r11
+ xorps %xmm8,%xmm3
+ aesenc %xmm1,%xmm3
+ decl %eax
+ movups (%r11),%xmm1
+ leaq 16(%r11),%r11
+ jnz L$oop_enc1_6
+ aesenclast %xmm1,%xmm3
+ movups %xmm3,(%r9)
+ retq
+.globl _aesni_ctr32_encrypt_blocks
+.p2align 4
+ cmpq $1,%rdx
+ je L$ctr32_one_shortcut
+ movdqu (%r8),%xmm14
+ movdqa L$bswap_mask(%rip),%xmm15
+ xorl %eax,%eax
+.byte 102,69,15,58,22,242,3
+.byte 102,68,15,58,34,240,3
+ movl 240(%rcx),%eax
+ bswapl %r10d
+ pxor %xmm12,%xmm12
+ pxor %xmm13,%xmm13
+.byte 102,69,15,58,34,226,0
+ leaq 3(%r10),%r11
+.byte 102,69,15,58,34,235,0
+ incl %r10d
+.byte 102,69,15,58,34,226,1
+ incq %r11
+.byte 102,69,15,58,34,235,1
+ incl %r10d
+.byte 102,69,15,58,34,226,2
+ incq %r11
+.byte 102,69,15,58,34,235,2
+ movdqa %xmm12,-40(%rsp)
+.byte 102,69,15,56,0,231
+ movdqa %xmm13,-24(%rsp)
+.byte 102,69,15,56,0,239
+ pshufd $192,%xmm12,%xmm2
+ pshufd $128,%xmm12,%xmm3
+ pshufd $64,%xmm12,%xmm4
+ cmpq $6,%rdx
+ jb L$ctr32_tail
+ shrl $1,%eax
+ movq %rcx,%r11
+ movl %eax,%r10d
+ subq $6,%rdx
+ jmp L$ctr32_loop6
+.p2align 4
+ pshufd $192,%xmm13,%xmm5
+ por %xmm14,%xmm2
+ movups (%r11),%xmm0
+ pshufd $128,%xmm13,%xmm6
+ por %xmm14,%xmm3
+ movups 16(%r11),%xmm1
+ pshufd $64,%xmm13,%xmm7
+ por %xmm14,%xmm4
+ por %xmm14,%xmm5
+ xorps %xmm0,%xmm2
+ por %xmm14,%xmm6
+ por %xmm14,%xmm7
+ pxor %xmm0,%xmm3
+ aesenc %xmm1,%xmm2
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ aesenc %xmm1,%xmm3
+ movdqa L$increment32(%rip),%xmm13
+ pxor %xmm0,%xmm5
+ aesenc %xmm1,%xmm4
+ movdqa -40(%rsp),%xmm12
+ pxor %xmm0,%xmm6
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ jmp L$ctr32_enc_loop6_enter
+.p2align 4
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz L$ctr32_enc_loop6
+ aesenc %xmm1,%xmm2
+ paddd %xmm13,%xmm12
+ aesenc %xmm1,%xmm3
+ paddd -24(%rsp),%xmm13
+ aesenc %xmm1,%xmm4
+ movdqa %xmm12,-40(%rsp)
+ aesenc %xmm1,%xmm5
+ movdqa %xmm13,-24(%rsp)
+ aesenc %xmm1,%xmm6
+.byte 102,69,15,56,0,231
+ aesenc %xmm1,%xmm7
+.byte 102,69,15,56,0,239
+ aesenclast %xmm0,%xmm2
+ movups (%rdi),%xmm8
+ aesenclast %xmm0,%xmm3
+ movups 16(%rdi),%xmm9
+ aesenclast %xmm0,%xmm4
+ movups 32(%rdi),%xmm10
+ aesenclast %xmm0,%xmm5
+ movups 48(%rdi),%xmm11
+ aesenclast %xmm0,%xmm6
+ movups 64(%rdi),%xmm1
+ aesenclast %xmm0,%xmm7
+ movups 80(%rdi),%xmm0
+ leaq 96(%rdi),%rdi
+ xorps %xmm2,%xmm8
+ pshufd $192,%xmm12,%xmm2
+ xorps %xmm3,%xmm9
+ pshufd $128,%xmm12,%xmm3
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ pshufd $64,%xmm12,%xmm4
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ xorps %xmm6,%xmm1
+ movups %xmm11,48(%rsi)
+ xorps %xmm7,%xmm0
+ movups %xmm1,64(%rsi)
+ movups %xmm0,80(%rsi)
+ leaq 96(%rsi),%rsi
+ movl %r10d,%eax
+ subq $6,%rdx
+ jnc L$ctr32_loop6
+ addq $6,%rdx
+ jz L$ctr32_done
+ movq %r11,%rcx
+ leal 1(%rax,%rax,1),%eax
+ por %xmm14,%xmm2
+ movups (%rdi),%xmm8
+ cmpq $2,%rdx
+ jb L$ctr32_one
+ por %xmm14,%xmm3
+ movups 16(%rdi),%xmm9
+ je L$ctr32_two
+ pshufd $192,%xmm13,%xmm5
+ por %xmm14,%xmm4
+ movups 32(%rdi),%xmm10
+ cmpq $4,%rdx
+ jb L$ctr32_three
+ pshufd $128,%xmm13,%xmm6
+ por %xmm14,%xmm5
+ movups 48(%rdi),%xmm11
+ je L$ctr32_four
+ por %xmm14,%xmm6
+ xorps %xmm7,%xmm7
+ call _aesni_encrypt6
+ movups 64(%rdi),%xmm1
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ xorps %xmm6,%xmm1
+ movups %xmm11,48(%rsi)
+ movups %xmm1,64(%rsi)
+ jmp L$ctr32_done
+.p2align 4
+ movups (%r8),%xmm2
+ movups (%rdi),%xmm8
+ movl 240(%rcx),%eax
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_enc1_7
+ aesenclast %xmm1,%xmm2
+ xorps %xmm2,%xmm8
+ movups %xmm8,(%rsi)
+ jmp L$ctr32_done
+.p2align 4
+ xorps %xmm4,%xmm4
+ call _aesni_encrypt3
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ movups %xmm9,16(%rsi)
+ jmp L$ctr32_done
+.p2align 4
+ call _aesni_encrypt3
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ movups %xmm10,32(%rsi)
+ jmp L$ctr32_done
+.p2align 4
+ call _aesni_encrypt4
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ movups %xmm11,48(%rsi)
+ retq
+.globl _aesni_xts_encrypt
+.p2align 4
+ leaq -104(%rsp),%rsp
+ movups (%r9),%xmm15
+ movl 240(%r8),%eax
+ movl 240(%rcx),%r10d
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm15
+ aesenc %xmm1,%xmm15
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz L$oop_enc1_8
+ aesenclast %xmm1,%xmm15
+ movq %rcx,%r11
+ movl %r10d,%eax
+ movq %rdx,%r9
+ andq $-16,%rdx
+ movdqa L$xts_magic(%rip),%xmm8
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ subq $96,%rdx
+ jc L$xts_enc_short
+ shrl $1,%eax
+ subl $1,%eax
+ movl %eax,%r10d
+ jmp L$xts_enc_grandloop
+.p2align 4
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu 0(%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ pxor %xmm12,%xmm4
+ movdqu 80(%rdi),%xmm7
+ leaq 96(%rdi),%rdi
+ pxor %xmm13,%xmm5
+ movups (%r11),%xmm0
+ pxor %xmm14,%xmm6
+ pxor %xmm15,%xmm7
+ movups 16(%r11),%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movdqa %xmm10,0(%rsp)
+ aesenc %xmm1,%xmm2
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ movdqa %xmm11,16(%rsp)
+ aesenc %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ movdqa %xmm12,32(%rsp)
+ aesenc %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ movdqa %xmm13,48(%rsp)
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ movdqa %xmm14,64(%rsp)
+ aesenc %xmm1,%xmm6
+ movdqa %xmm15,80(%rsp)
+ aesenc %xmm1,%xmm7
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ jmp L$xts_enc_loop6_enter
+.p2align 4
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz L$xts_enc_loop6
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ paddq %xmm15,%xmm15
+ aesenc %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesenc %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenc %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ aesenc %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesenc %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenc %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups 32(%rcx),%xmm0
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ aesenc %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesenc %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenc %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ aesenclast %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesenclast %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenclast %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesenclast %xmm0,%xmm5
+ aesenclast %xmm0,%xmm6
+ aesenclast %xmm0,%xmm7
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ xorps 0(%rsp),%xmm2
+ pand %xmm8,%xmm9
+ xorps 16(%rsp),%xmm3
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ xorps 32(%rsp),%xmm4
+ movups %xmm2,0(%rsi)
+ xorps 48(%rsp),%xmm5
+ movups %xmm3,16(%rsi)
+ xorps 64(%rsp),%xmm6
+ movups %xmm4,32(%rsi)
+ xorps 80(%rsp),%xmm7
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ subq $96,%rdx
+ jnc L$xts_enc_grandloop
+ leal 3(%rax,%rax,1),%eax
+ movq %r11,%rcx
+ movl %eax,%r10d
+ addq $96,%rdx
+ jz L$xts_enc_done
+ cmpq $32,%rdx
+ jb L$xts_enc_one
+ je L$xts_enc_two
+ cmpq $64,%rdx
+ jb L$xts_enc_three
+ je L$xts_enc_four
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ leaq 80(%rdi),%rdi
+ pxor %xmm12,%xmm4
+ pxor %xmm13,%xmm5
+ pxor %xmm14,%xmm6
+ call _aesni_encrypt6
+ xorps %xmm10,%xmm2
+ movdqa %xmm15,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movdqu %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movdqu %xmm3,16(%rsi)
+ xorps %xmm14,%xmm6
+ movdqu %xmm4,32(%rsi)
+ movdqu %xmm5,48(%rsi)
+ movdqu %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ jmp L$xts_enc_done
+.p2align 4
+ movups (%rdi),%xmm2
+ leaq 16(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_enc1_9
+ aesenclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movdqa %xmm11,%xmm10
+ movups %xmm2,(%rsi)
+ leaq 16(%rsi),%rsi
+ jmp L$xts_enc_done
+.p2align 4
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ leaq 32(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ call _aesni_encrypt3
+ xorps %xmm10,%xmm2
+ movdqa %xmm12,%xmm10
+ xorps %xmm11,%xmm3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ leaq 32(%rsi),%rsi
+ jmp L$xts_enc_done
+.p2align 4
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ leaq 48(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ call _aesni_encrypt3
+ xorps %xmm10,%xmm2
+ movdqa %xmm13,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ leaq 48(%rsi),%rsi
+ jmp L$xts_enc_done
+.p2align 4
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ xorps %xmm10,%xmm2
+ movups 48(%rdi),%xmm5
+ leaq 64(%rdi),%rdi
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ xorps %xmm13,%xmm5
+ call _aesni_encrypt4
+ xorps %xmm10,%xmm2
+ movdqa %xmm15,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ jmp L$xts_enc_done
+.p2align 4
+ andq $15,%r9
+ jz L$xts_enc_ret
+ movq %r9,%rdx
+ movzbl (%rdi),%eax
+ movzbl -16(%rsi),%ecx
+ leaq 1(%rdi),%rdi
+ movb %al,-16(%rsi)
+ movb %cl,0(%rsi)
+ leaq 1(%rsi),%rsi
+ subq $1,%rdx
+ jnz L$xts_enc_steal
+ subq %r9,%rsi
+ movq %r11,%rcx
+ movl %r10d,%eax
+ movups -16(%rsi),%xmm2
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_enc1_10
+ aesenclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movups %xmm2,-16(%rsi)
+ leaq 104(%rsp),%rsp
+ retq
+.globl _aesni_xts_decrypt
+.p2align 4
+ leaq -104(%rsp),%rsp
+ movups (%r9),%xmm15
+ movl 240(%r8),%eax
+ movl 240(%rcx),%r10d
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm15
+ aesenc %xmm1,%xmm15
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz L$oop_enc1_11
+ aesenclast %xmm1,%xmm15
+ xorl %eax,%eax
+ testq $15,%rdx
+ setnz %al
+ shlq $4,%rax
+ subq %rax,%rdx
+ movq %rcx,%r11
+ movl %r10d,%eax
+ movq %rdx,%r9
+ andq $-16,%rdx
+ movdqa L$xts_magic(%rip),%xmm8
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ subq $96,%rdx
+ jc L$xts_dec_short
+ shrl $1,%eax
+ subl $1,%eax
+ movl %eax,%r10d
+ jmp L$xts_dec_grandloop
+.p2align 4
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu 0(%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ pxor %xmm12,%xmm4
+ movdqu 80(%rdi),%xmm7
+ leaq 96(%rdi),%rdi
+ pxor %xmm13,%xmm5
+ movups (%r11),%xmm0
+ pxor %xmm14,%xmm6
+ pxor %xmm15,%xmm7
+ movups 16(%r11),%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movdqa %xmm10,0(%rsp)
+ aesdec %xmm1,%xmm2
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ movdqa %xmm11,16(%rsp)
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ movdqa %xmm12,32(%rsp)
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ movdqa %xmm13,48(%rsp)
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ movdqa %xmm14,64(%rsp)
+ aesdec %xmm1,%xmm6
+ movdqa %xmm15,80(%rsp)
+ aesdec %xmm1,%xmm7
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ jmp L$xts_dec_loop6_enter
+.p2align 4
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz L$xts_dec_loop6
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ paddq %xmm15,%xmm15
+ aesdec %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesdec %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdec %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ aesdec %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesdec %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdec %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ movups 32(%rcx),%xmm0
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ aesdec %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesdec %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdec %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ aesdeclast %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesdeclast %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdeclast %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesdeclast %xmm0,%xmm5
+ aesdeclast %xmm0,%xmm6
+ aesdeclast %xmm0,%xmm7
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ xorps 0(%rsp),%xmm2
+ pand %xmm8,%xmm9
+ xorps 16(%rsp),%xmm3
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ xorps 32(%rsp),%xmm4
+ movups %xmm2,0(%rsi)
+ xorps 48(%rsp),%xmm5
+ movups %xmm3,16(%rsi)
+ xorps 64(%rsp),%xmm6
+ movups %xmm4,32(%rsi)
+ xorps 80(%rsp),%xmm7
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ subq $96,%rdx
+ jnc L$xts_dec_grandloop
+ leal 3(%rax,%rax,1),%eax
+ movq %r11,%rcx
+ movl %eax,%r10d
+ addq $96,%rdx
+ jz L$xts_dec_done
+ cmpq $32,%rdx
+ jb L$xts_dec_one
+ je L$xts_dec_two
+ cmpq $64,%rdx
+ jb L$xts_dec_three
+ je L$xts_dec_four
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ leaq 80(%rdi),%rdi
+ pxor %xmm12,%xmm4
+ pxor %xmm13,%xmm5
+ pxor %xmm14,%xmm6
+ call _aesni_decrypt6
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movdqu %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movdqu %xmm3,16(%rsi)
+ xorps %xmm14,%xmm6
+ movdqu %xmm4,32(%rsi)
+ pxor %xmm14,%xmm14
+ movdqu %xmm5,48(%rsi)
+ pcmpgtd %xmm15,%xmm14
+ movdqu %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ pshufd $19,%xmm14,%xmm11
+ andq $15,%r9
+ jz L$xts_dec_ret
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm11
+ pxor %xmm15,%xmm11
+ jmp L$xts_dec_done2
+.p2align 4
+ movups (%rdi),%xmm2
+ leaq 16(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_dec1_12
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movdqa %xmm11,%xmm10
+ movups %xmm2,(%rsi)
+ movdqa %xmm12,%xmm11
+ leaq 16(%rsi),%rsi
+ jmp L$xts_dec_done
+.p2align 4
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ leaq 32(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ call _aesni_decrypt3
+ xorps %xmm10,%xmm2
+ movdqa %xmm12,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm13,%xmm11
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ leaq 32(%rsi),%rsi
+ jmp L$xts_dec_done
+.p2align 4
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ leaq 48(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ call _aesni_decrypt3
+ xorps %xmm10,%xmm2
+ movdqa %xmm13,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm15,%xmm11
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ leaq 48(%rsi),%rsi
+ jmp L$xts_dec_done
+.p2align 4
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movups (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movups 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+ movups 32(%rdi),%xmm4
+ xorps %xmm10,%xmm2
+ movups 48(%rdi),%xmm5
+ leaq 64(%rdi),%rdi
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ xorps %xmm13,%xmm5
+ call _aesni_decrypt4
+ xorps %xmm10,%xmm2
+ movdqa %xmm14,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm15,%xmm11
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ jmp L$xts_dec_done
+.p2align 4
+ andq $15,%r9
+ jz L$xts_dec_ret
+ movq %r9,%rdx
+ movq %r11,%rcx
+ movl %r10d,%eax
+ movups (%rdi),%xmm2
+ xorps %xmm11,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_dec1_13
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm11,%xmm2
+ movups %xmm2,(%rsi)
+ movzbl 16(%rdi),%eax
+ movzbl (%rsi),%ecx
+ leaq 1(%rdi),%rdi
+ movb %al,(%rsi)
+ movb %cl,16(%rsi)
+ leaq 1(%rsi),%rsi
+ subq $1,%rdx
+ jnz L$xts_dec_steal
+ subq %r9,%rsi
+ movq %r11,%rcx
+ movl %r10d,%eax
+ movups (%rsi),%xmm2
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_dec1_14
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movups %xmm2,(%rsi)
+ leaq 104(%rsp),%rsp
+ retq
+.globl _aesni_cbc_encrypt
+.p2align 4
+ testq %rdx,%rdx
+ jz L$cbc_ret
+ movl 240(%rcx),%r10d
+ movq %rcx,%r11
+ testl %r9d,%r9d
+ jz L$cbc_decrypt
+ movups (%r8),%xmm2
+ movl %r10d,%eax
+ cmpq $16,%rdx
+ jb L$cbc_enc_tail
+ subq $16,%rdx
+ jmp L$cbc_enc_loop
+.p2align 4
+ movups (%rdi),%xmm3
+ leaq 16(%rdi),%rdi
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ xorps %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ xorps %xmm3,%xmm2
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_enc1_15
+ aesenclast %xmm1,%xmm2
+ movl %r10d,%eax
+ movq %r11,%rcx
+ movups %xmm2,0(%rsi)
+ leaq 16(%rsi),%rsi
+ subq $16,%rdx
+ jnc L$cbc_enc_loop
+ addq $16,%rdx
+ jnz L$cbc_enc_tail
+ movups %xmm2,(%r8)
+ jmp L$cbc_ret
+ movq %rdx,%rcx
+ xchgq %rdi,%rsi
+.long 0x9066A4F3
+ movl $16,%ecx
+ subq %rdx,%rcx
+ xorl %eax,%eax
+.long 0x9066AAF3
+ leaq -16(%rdi),%rdi
+ movl %r10d,%eax
+ movq %rdi,%rsi
+ movq %r11,%rcx
+ xorq %rdx,%rdx
+ jmp L$cbc_enc_loop
+.p2align 4
+ movups (%r8),%xmm9
+ movl %r10d,%eax
+ cmpq $112,%rdx
+ jbe L$cbc_dec_tail
+ shrl $1,%r10d
+ subq $112,%rdx
+ movl %r10d,%eax
+ movaps %xmm9,-24(%rsp)
+ jmp L$cbc_dec_loop8_enter
+.p2align 4
+ movaps %xmm0,-24(%rsp)
+ movups %xmm9,(%rsi)
+ leaq 16(%rsi),%rsi
+ movups (%rcx),%xmm0
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ movdqu 32(%rdi),%xmm4
+ xorps %xmm0,%xmm2
+ movdqu 48(%rdi),%xmm5
+ xorps %xmm0,%xmm3
+ movdqu 64(%rdi),%xmm6
+ aesdec %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ movdqu 80(%rdi),%xmm7
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ movdqu 96(%rdi),%xmm8
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ movdqu 112(%rdi),%xmm9
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesdec %xmm1,%xmm6
+ pxor %xmm0,%xmm8
+ aesdec %xmm1,%xmm7
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+ call L$dec_loop8_enter
+ movups (%rdi),%xmm1
+ movups 16(%rdi),%xmm0
+ xorps -24(%rsp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%rdi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%rdi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%rdi),%xmm1
+ xorps %xmm0,%xmm6
+ movups 80(%rdi),%xmm0
+ xorps %xmm1,%xmm7
+ movups 96(%rdi),%xmm1
+ xorps %xmm0,%xmm8
+ movups 112(%rdi),%xmm0
+ xorps %xmm1,%xmm9
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movq %r11,%rcx
+ movups %xmm7,80(%rsi)
+ leaq 128(%rdi),%rdi
+ movups %xmm8,96(%rsi)
+ leaq 112(%rsi),%rsi
+ subq $128,%rdx
+ ja L$cbc_dec_loop8
+ movaps %xmm9,%xmm2
+ movaps %xmm0,%xmm9
+ addq $112,%rdx
+ jle L$cbc_dec_tail_collected
+ movups %xmm2,(%rsi)
+ leal 1(%r10,%r10,1),%eax
+ leaq 16(%rsi),%rsi
+ movups (%rdi),%xmm2
+ movaps %xmm2,%xmm8
+ cmpq $16,%rdx
+ jbe L$cbc_dec_one
+ movups 16(%rdi),%xmm3
+ movaps %xmm3,%xmm7
+ cmpq $32,%rdx
+ jbe L$cbc_dec_two
+ movups 32(%rdi),%xmm4
+ movaps %xmm4,%xmm6
+ cmpq $48,%rdx
+ jbe L$cbc_dec_three
+ movups 48(%rdi),%xmm5
+ cmpq $64,%rdx
+ jbe L$cbc_dec_four
+ movups 64(%rdi),%xmm6
+ cmpq $80,%rdx
+ jbe L$cbc_dec_five
+ movups 80(%rdi),%xmm7
+ cmpq $96,%rdx
+ jbe L$cbc_dec_six
+ movups 96(%rdi),%xmm8
+ movaps %xmm9,-24(%rsp)
+ call _aesni_decrypt8
+ movups (%rdi),%xmm1
+ movups 16(%rdi),%xmm0
+ xorps -24(%rsp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%rdi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%rdi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%rdi),%xmm1
+ xorps %xmm0,%xmm6
+ movups 80(%rdi),%xmm0
+ xorps %xmm1,%xmm7
+ movups 96(%rdi),%xmm9
+ xorps %xmm0,%xmm8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ movaps %xmm8,%xmm2
+ subq $112,%rdx
+ jmp L$cbc_dec_tail_collected
+.p2align 4
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz L$oop_dec1_16
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm9,%xmm2
+ movaps %xmm8,%xmm9
+ subq $16,%rdx
+ jmp L$cbc_dec_tail_collected
+.p2align 4
+ xorps %xmm4,%xmm4
+ call _aesni_decrypt3
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ movaps %xmm7,%xmm9
+ movaps %xmm3,%xmm2
+ leaq 16(%rsi),%rsi
+ subq $32,%rdx
+ jmp L$cbc_dec_tail_collected
+.p2align 4
+ call _aesni_decrypt3
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ xorps %xmm7,%xmm4
+ movups %xmm3,16(%rsi)
+ movaps %xmm6,%xmm9
+ movaps %xmm4,%xmm2
+ leaq 32(%rsi),%rsi
+ subq $48,%rdx
+ jmp L$cbc_dec_tail_collected
+.p2align 4
+ call _aesni_decrypt4
+ xorps %xmm9,%xmm2
+ movups 48(%rdi),%xmm9
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ xorps %xmm7,%xmm4
+ movups %xmm3,16(%rsi)
+ xorps %xmm6,%xmm5
+ movups %xmm4,32(%rsi)
+ movaps %xmm5,%xmm2
+ leaq 48(%rsi),%rsi
+ subq $64,%rdx
+ jmp L$cbc_dec_tail_collected
+.p2align 4
+ xorps %xmm7,%xmm7
+ call _aesni_decrypt6
+ movups 16(%rdi),%xmm1
+ movups 32(%rdi),%xmm0
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ xorps %xmm1,%xmm4
+ movups 48(%rdi),%xmm1
+ xorps %xmm0,%xmm5
+ movups 64(%rdi),%xmm9
+ xorps %xmm1,%xmm6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ movaps %xmm6,%xmm2
+ subq $80,%rdx
+ jmp L$cbc_dec_tail_collected
+.p2align 4
+ call _aesni_decrypt6
+ movups 16(%rdi),%xmm1
+ movups 32(%rdi),%xmm0
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ xorps %xmm1,%xmm4
+ movups 48(%rdi),%xmm1
+ xorps %xmm0,%xmm5
+ movups 64(%rdi),%xmm0
+ xorps %xmm1,%xmm6
+ movups 80(%rdi),%xmm9
+ xorps %xmm0,%xmm7
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ movaps %xmm7,%xmm2
+ subq $96,%rdx
+ jmp L$cbc_dec_tail_collected
+.p2align 4
+ andq $15,%rdx
+ movups %xmm9,(%r8)
+ jnz L$cbc_dec_tail_partial
+ movups %xmm2,(%rsi)
+ jmp L$cbc_dec_ret
+.p2align 4
+ movaps %xmm2,-24(%rsp)
+ movq $16,%rcx
+ movq %rsi,%rdi
+ subq %rdx,%rcx
+ leaq -24(%rsp),%rsi
+.long 0x9066A4F3
+ retq
+.globl _aesni_set_decrypt_key
+.p2align 4
+ subq $8,%rsp
+ call __aesni_set_encrypt_key
+ shll $4,%esi
+ testl %eax,%eax
+ jnz L$dec_key_ret
+ leaq 16(%rdx,%rsi,1),%rdi
+ movups (%rdx),%xmm0
+ movups (%rdi),%xmm1
+ movups %xmm0,(%rdi)
+ movups %xmm1,(%rdx)
+ leaq 16(%rdx),%rdx
+ leaq -16(%rdi),%rdi
+ movups (%rdx),%xmm0
+ movups (%rdi),%xmm1
+ aesimc %xmm0,%xmm0
+ aesimc %xmm1,%xmm1
+ leaq 16(%rdx),%rdx
+ leaq -16(%rdi),%rdi
+ movups %xmm0,16(%rdi)
+ movups %xmm1,-16(%rdx)
+ cmpq %rdx,%rdi
+ ja L$dec_key_inverse
+ movups (%rdx),%xmm0
+ aesimc %xmm0,%xmm0
+ movups %xmm0,(%rdi)
+ addq $8,%rsp
+ retq
+.globl _aesni_set_encrypt_key
+.p2align 4
+ subq $8,%rsp
+ movq $-1,%rax
+ testq %rdi,%rdi
+ jz L$enc_key_ret
+ testq %rdx,%rdx
+ jz L$enc_key_ret
+ movups (%rdi),%xmm0
+ xorps %xmm4,%xmm4
+ leaq 16(%rdx),%rax
+ cmpl $256,%esi
+ je L$14rounds
+ cmpl $192,%esi
+ je L$12rounds
+ cmpl $128,%esi
+ jne L$bad_keybits
+ movl $9,%esi
+ movups %xmm0,(%rdx)
+ aeskeygenassist $1,%xmm0,%xmm1
+ call L$key_expansion_128_cold
+ aeskeygenassist $2,%xmm0,%xmm1
+ call L$key_expansion_128
+ aeskeygenassist $4,%xmm0,%xmm1
+ call L$key_expansion_128
+ aeskeygenassist $8,%xmm0,%xmm1
+ call L$key_expansion_128
+ aeskeygenassist $16,%xmm0,%xmm1
+ call L$key_expansion_128
+ aeskeygenassist $32,%xmm0,%xmm1
+ call L$key_expansion_128
+ aeskeygenassist $64,%xmm0,%xmm1
+ call L$key_expansion_128
+ aeskeygenassist $128,%xmm0,%xmm1
+ call L$key_expansion_128
+ aeskeygenassist $27,%xmm0,%xmm1
+ call L$key_expansion_128
+ aeskeygenassist $54,%xmm0,%xmm1
+ call L$key_expansion_128
+ movups %xmm0,(%rax)
+ movl %esi,80(%rax)
+ xorl %eax,%eax
+ jmp L$enc_key_ret
+.p2align 4
+ movq 16(%rdi),%xmm2
+ movl $11,%esi
+ movups %xmm0,(%rdx)
+ aeskeygenassist $1,%xmm2,%xmm1
+ call L$key_expansion_192a_cold
+ aeskeygenassist $2,%xmm2,%xmm1
+ call L$key_expansion_192b
+ aeskeygenassist $4,%xmm2,%xmm1
+ call L$key_expansion_192a
+ aeskeygenassist $8,%xmm2,%xmm1
+ call L$key_expansion_192b
+ aeskeygenassist $16,%xmm2,%xmm1
+ call L$key_expansion_192a
+ aeskeygenassist $32,%xmm2,%xmm1
+ call L$key_expansion_192b
+ aeskeygenassist $64,%xmm2,%xmm1
+ call L$key_expansion_192a
+ aeskeygenassist $128,%xmm2,%xmm1
+ call L$key_expansion_192b
+ movups %xmm0,(%rax)
+ movl %esi,48(%rax)
+ xorq %rax,%rax
+ jmp L$enc_key_ret
+.p2align 4
+ movups 16(%rdi),%xmm2
+ movl $13,%esi
+ leaq 16(%rax),%rax
+ movups %xmm0,(%rdx)
+ movups %xmm2,16(%rdx)
+ aeskeygenassist $1,%xmm2,%xmm1
+ call L$key_expansion_256a_cold
+ aeskeygenassist $1,%xmm0,%xmm1
+ call L$key_expansion_256b
+ aeskeygenassist $2,%xmm2,%xmm1
+ call L$key_expansion_256a
+ aeskeygenassist $2,%xmm0,%xmm1
+ call L$key_expansion_256b
+ aeskeygenassist $4,%xmm2,%xmm1
+ call L$key_expansion_256a
+ aeskeygenassist $4,%xmm0,%xmm1
+ call L$key_expansion_256b
+ aeskeygenassist $8,%xmm2,%xmm1
+ call L$key_expansion_256a
+ aeskeygenassist $8,%xmm0,%xmm1
+ call L$key_expansion_256b
+ aeskeygenassist $16,%xmm2,%xmm1
+ call L$key_expansion_256a
+ aeskeygenassist $16,%xmm0,%xmm1
+ call L$key_expansion_256b
+ aeskeygenassist $32,%xmm2,%xmm1
+ call L$key_expansion_256a
+ aeskeygenassist $32,%xmm0,%xmm1
+ call L$key_expansion_256b
+ aeskeygenassist $64,%xmm2,%xmm1
+ call L$key_expansion_256a
+ movups %xmm0,(%rax)
+ movl %esi,16(%rax)
+ xorq %rax,%rax
+ jmp L$enc_key_ret
+.p2align 4
+ movq $-2,%rax
+ addq $8,%rsp
+ retq
+.p2align 4
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ retq
+.p2align 4
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+ movaps %xmm2,%xmm5
+ shufps $16,%xmm0,%xmm4
+ movdqa %xmm2,%xmm3
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ pslldq $4,%xmm3
+ xorps %xmm4,%xmm0
+ pshufd $85,%xmm1,%xmm1
+ pxor %xmm3,%xmm2
+ pxor %xmm1,%xmm0
+ pshufd $255,%xmm0,%xmm3
+ pxor %xmm3,%xmm2
+ retq
+.p2align 4
+ movaps %xmm0,%xmm3
+ shufps $68,%xmm0,%xmm5
+ movups %xmm5,(%rax)
+ shufps $78,%xmm2,%xmm3
+ movups %xmm3,16(%rax)
+ leaq 32(%rax),%rax
+ jmp L$key_expansion_192b_warm
+.p2align 4
+ movups %xmm2,(%rax)
+ leaq 16(%rax),%rax
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ retq
+.p2align 4
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+ shufps $16,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $140,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $170,%xmm1,%xmm1
+ xorps %xmm1,%xmm2
+ retq
+.p2align 6
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.long 6,6,6,0
+.long 1,0,0,0
+.long 0x87,0,1,0
+.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align 6
diff --git a/crypto/libressl/crypto/aes/aesni-masm-x86_64.S b/crypto/libressl/crypto/aes/aesni-masm-x86_64.S
new file mode 100644
index 0000000..f2a2490
--- /dev/null
+++ b/crypto/libressl/crypto/aes/aesni-masm-x86_64.S
@@ -0,0 +1,3099 @@
+; 1 "crypto/aes/aesni-masm-x86_64.S.tmp"
+; 1 "<built-in>" 1
+; 1 "<built-in>" 3
+; 340 "<built-in>" 3
+; 1 "<command line>" 1
+; 1 "<built-in>" 2
+; 1 "crypto/aes/aesni-masm-x86_64.S.tmp" 2
+; 1 "./crypto/x86_arch.h" 1
+; 16 "./crypto/x86_arch.h"
+; 40 "./crypto/x86_arch.h"
+; 3 "crypto/aes/aesni-masm-x86_64.S.tmp" 2
+.text$ SEGMENT ALIGN(64) 'CODE'
+PUBLIC aesni_encrypt
+aesni_encrypt PROC PUBLIC
+ movups xmm2,XMMWORD PTR[rcx]
+ mov eax,DWORD PTR[240+r8]
+ movups xmm0,XMMWORD PTR[r8]
+ movups xmm1,XMMWORD PTR[16+r8]
+ lea r8,QWORD PTR[32+r8]
+ xorps xmm2,xmm0
+ aesenc xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[r8]
+ lea r8,QWORD PTR[16+r8]
+ jnz $L$oop_enc1_1
+ aesenclast xmm2,xmm1
+ movups XMMWORD PTR[rdx],xmm2
+ DB 0F3h,0C3h ;repret
+aesni_encrypt ENDP
+PUBLIC aesni_decrypt
+aesni_decrypt PROC PUBLIC
+ movups xmm2,XMMWORD PTR[rcx]
+ mov eax,DWORD PTR[240+r8]
+ movups xmm0,XMMWORD PTR[r8]
+ movups xmm1,XMMWORD PTR[16+r8]
+ lea r8,QWORD PTR[32+r8]
+ xorps xmm2,xmm0
+ aesdec xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[r8]
+ lea r8,QWORD PTR[16+r8]
+ jnz $L$oop_dec1_2
+ aesdeclast xmm2,xmm1
+ movups XMMWORD PTR[rdx],xmm2
+ DB 0F3h,0C3h ;repret
+aesni_decrypt ENDP
+_aesni_encrypt3 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shr eax,1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ xorps xmm4,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ dec eax
+ aesenc xmm4,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesenc xmm2,xmm0
+ aesenc xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesenc xmm4,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$enc_loop3
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ aesenc xmm4,xmm1
+ aesenclast xmm2,xmm0
+ aesenclast xmm3,xmm0
+ aesenclast xmm4,xmm0
+ DB 0F3h,0C3h ;repret
+_aesni_encrypt3 ENDP
+_aesni_decrypt3 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shr eax,1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ xorps xmm4,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ aesdec xmm2,xmm1
+ aesdec xmm3,xmm1
+ dec eax
+ aesdec xmm4,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesdec xmm2,xmm0
+ aesdec xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesdec xmm4,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$dec_loop3
+ aesdec xmm2,xmm1
+ aesdec xmm3,xmm1
+ aesdec xmm4,xmm1
+ aesdeclast xmm2,xmm0
+ aesdeclast xmm3,xmm0
+ aesdeclast xmm4,xmm0
+ DB 0F3h,0C3h ;repret
+_aesni_decrypt3 ENDP
+_aesni_encrypt4 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shr eax,1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ xorps xmm4,xmm0
+ xorps xmm5,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ dec eax
+ aesenc xmm4,xmm1
+ aesenc xmm5,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesenc xmm2,xmm0
+ aesenc xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesenc xmm4,xmm0
+ aesenc xmm5,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$enc_loop4
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ aesenc xmm4,xmm1
+ aesenc xmm5,xmm1
+ aesenclast xmm2,xmm0
+ aesenclast xmm3,xmm0
+ aesenclast xmm4,xmm0
+ aesenclast xmm5,xmm0
+ DB 0F3h,0C3h ;repret
+_aesni_encrypt4 ENDP
+_aesni_decrypt4 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shr eax,1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ xorps xmm4,xmm0
+ xorps xmm5,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ aesdec xmm2,xmm1
+ aesdec xmm3,xmm1
+ dec eax
+ aesdec xmm4,xmm1
+ aesdec xmm5,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesdec xmm2,xmm0
+ aesdec xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesdec xmm4,xmm0
+ aesdec xmm5,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$dec_loop4
+ aesdec xmm2,xmm1
+ aesdec xmm3,xmm1
+ aesdec xmm4,xmm1
+ aesdec xmm5,xmm1
+ aesdeclast xmm2,xmm0
+ aesdeclast xmm3,xmm0
+ aesdeclast xmm4,xmm0
+ aesdeclast xmm5,xmm0
+ DB 0F3h,0C3h ;repret
+_aesni_decrypt4 ENDP
+_aesni_encrypt6 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shr eax,1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ pxor xmm3,xmm0
+ aesenc xmm2,xmm1
+ pxor xmm4,xmm0
+ aesenc xmm3,xmm1
+ pxor xmm5,xmm0
+ aesenc xmm4,xmm1
+ pxor xmm6,xmm0
+ aesenc xmm5,xmm1
+ pxor xmm7,xmm0
+ dec eax
+ aesenc xmm6,xmm1
+ movups xmm0,XMMWORD PTR[rcx]
+ aesenc xmm7,xmm1
+ jmp $L$enc_loop6_enter
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ dec eax
+ aesenc xmm4,xmm1
+ aesenc xmm5,xmm1
+ aesenc xmm6,xmm1
+ aesenc xmm7,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesenc xmm2,xmm0
+ aesenc xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesenc xmm4,xmm0
+ aesenc xmm5,xmm0
+ aesenc xmm6,xmm0
+ aesenc xmm7,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$enc_loop6
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ aesenc xmm4,xmm1
+ aesenc xmm5,xmm1
+ aesenc xmm6,xmm1
+ aesenc xmm7,xmm1
+ aesenclast xmm2,xmm0
+ aesenclast xmm3,xmm0
+ aesenclast xmm4,xmm0
+ aesenclast xmm5,xmm0
+ aesenclast xmm6,xmm0
+ aesenclast xmm7,xmm0
+ DB 0F3h,0C3h ;repret
+_aesni_encrypt6 ENDP
+_aesni_decrypt6 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shr eax,1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ pxor xmm3,xmm0
+ aesdec xmm2,xmm1
+ pxor xmm4,xmm0
+ aesdec xmm3,xmm1
+ pxor xmm5,xmm0
+ aesdec xmm4,xmm1
+ pxor xmm6,xmm0
+ aesdec xmm5,xmm1
+ pxor xmm7,xmm0
+ dec eax
+ aesdec xmm6,xmm1
+ movups xmm0,XMMWORD PTR[rcx]
+ aesdec xmm7,xmm1
+ jmp $L$dec_loop6_enter
+ aesdec xmm2,xmm1
+ aesdec xmm3,xmm1
+ dec eax
+ aesdec xmm4,xmm1
+ aesdec xmm5,xmm1
+ aesdec xmm6,xmm1
+ aesdec xmm7,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesdec xmm2,xmm0
+ aesdec xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesdec xmm4,xmm0
+ aesdec xmm5,xmm0
+ aesdec xmm6,xmm0
+ aesdec xmm7,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$dec_loop6
+ aesdec xmm2,xmm1
+ aesdec xmm3,xmm1
+ aesdec xmm4,xmm1
+ aesdec xmm5,xmm1
+ aesdec xmm6,xmm1
+ aesdec xmm7,xmm1
+ aesdeclast xmm2,xmm0
+ aesdeclast xmm3,xmm0
+ aesdeclast xmm4,xmm0
+ aesdeclast xmm5,xmm0
+ aesdeclast xmm6,xmm0
+ aesdeclast xmm7,xmm0
+ DB 0F3h,0C3h ;repret
+_aesni_decrypt6 ENDP
+_aesni_encrypt8 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shr eax,1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ aesenc xmm2,xmm1
+ pxor xmm4,xmm0
+ aesenc xmm3,xmm1
+ pxor xmm5,xmm0
+ aesenc xmm4,xmm1
+ pxor xmm6,xmm0
+ aesenc xmm5,xmm1
+ pxor xmm7,xmm0
+ dec eax
+ aesenc xmm6,xmm1
+ pxor xmm8,xmm0
+ aesenc xmm7,xmm1
+ pxor xmm9,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ aesenc xmm8,xmm1
+ aesenc xmm9,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ jmp $L$enc_loop8_enter
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ dec eax
+ aesenc xmm4,xmm1
+ aesenc xmm5,xmm1
+ aesenc xmm6,xmm1
+ aesenc xmm7,xmm1
+ aesenc xmm8,xmm1
+ aesenc xmm9,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesenc xmm2,xmm0
+ aesenc xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesenc xmm4,xmm0
+ aesenc xmm5,xmm0
+ aesenc xmm6,xmm0
+ aesenc xmm7,xmm0
+ aesenc xmm8,xmm0
+ aesenc xmm9,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$enc_loop8
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ aesenc xmm4,xmm1
+ aesenc xmm5,xmm1
+ aesenc xmm6,xmm1
+ aesenc xmm7,xmm1
+ aesenc xmm8,xmm1
+ aesenc xmm9,xmm1
+ aesenclast xmm2,xmm0
+ aesenclast xmm3,xmm0
+ aesenclast xmm4,xmm0
+ aesenclast xmm5,xmm0
+ aesenclast xmm6,xmm0
+ aesenclast xmm7,xmm0
+ aesenclast xmm8,xmm0
+ aesenclast xmm9,xmm0
+ DB 0F3h,0C3h ;repret
+_aesni_encrypt8 ENDP
+_aesni_decrypt8 PROC PRIVATE
+ movups xmm0,XMMWORD PTR[rcx]
+ shr eax,1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm0
+ aesdec xmm2,xmm1
+ pxor xmm4,xmm0
+ aesdec xmm3,xmm1
+ pxor xmm5,xmm0
+ aesdec xmm4,xmm1
+ pxor xmm6,xmm0
+ aesdec xmm5,xmm1
+ pxor xmm7,xmm0
+ dec eax
+ aesdec xmm6,xmm1
+ pxor xmm8,xmm0
+ aesdec xmm7,xmm1
+ pxor xmm9,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ aesdec xmm8,xmm1
+ aesdec xmm9,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ jmp $L$dec_loop8_enter
+ aesdec xmm2,xmm1
+ aesdec xmm3,xmm1
+ dec eax
+ aesdec xmm4,xmm1
+ aesdec xmm5,xmm1
+ aesdec xmm6,xmm1
+ aesdec xmm7,xmm1
+ aesdec xmm8,xmm1
+ aesdec xmm9,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesdec xmm2,xmm0
+ aesdec xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesdec xmm4,xmm0
+ aesdec xmm5,xmm0
+ aesdec xmm6,xmm0
+ aesdec xmm7,xmm0
+ aesdec xmm8,xmm0
+ aesdec xmm9,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$dec_loop8
+ aesdec xmm2,xmm1
+ aesdec xmm3,xmm1
+ aesdec xmm4,xmm1
+ aesdec xmm5,xmm1
+ aesdec xmm6,xmm1
+ aesdec xmm7,xmm1
+ aesdec xmm8,xmm1
+ aesdec xmm9,xmm1
+ aesdeclast xmm2,xmm0
+ aesdeclast xmm3,xmm0
+ aesdeclast xmm4,xmm0
+ aesdeclast xmm5,xmm0
+ aesdeclast xmm6,xmm0
+ aesdeclast xmm7,xmm0
+ aesdeclast xmm8,xmm0
+ aesdeclast xmm9,xmm0
+ DB 0F3h,0C3h ;repret
+_aesni_decrypt8 ENDP
+PUBLIC aesni_ecb_encrypt
+aesni_ecb_encrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ and rdx,-16
+ jz $L$ecb_ret
+ mov eax,DWORD PTR[240+rcx]
+ movups xmm0,XMMWORD PTR[rcx]
+ mov r11,rcx
+ mov r10d,eax
+ test r8d,r8d
+ jz $L$ecb_decrypt
+ cmp rdx,080h
+ jb $L$ecb_enc_tail
+ movdqu xmm2,XMMWORD PTR[rdi]
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ movdqu xmm8,XMMWORD PTR[96+rdi]
+ movdqu xmm9,XMMWORD PTR[112+rdi]
+ lea rdi,QWORD PTR[128+rdi]
+ sub rdx,080h
+ jmp $L$ecb_enc_loop8_enter
+ movups XMMWORD PTR[rsi],xmm2
+ mov rcx,r11
+ movdqu xmm2,XMMWORD PTR[rdi]
+ mov eax,r10d
+ movups XMMWORD PTR[16+rsi],xmm3
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ movups XMMWORD PTR[32+rsi],xmm4
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ movups XMMWORD PTR[48+rsi],xmm5
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ movups XMMWORD PTR[64+rsi],xmm6
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ movups XMMWORD PTR[80+rsi],xmm7
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ movups XMMWORD PTR[96+rsi],xmm8
+ movdqu xmm8,XMMWORD PTR[96+rdi]
+ movups XMMWORD PTR[112+rsi],xmm9
+ lea rsi,QWORD PTR[128+rsi]
+ movdqu xmm9,XMMWORD PTR[112+rdi]
+ lea rdi,QWORD PTR[128+rdi]
+ call _aesni_encrypt8
+ sub rdx,080h
+ jnc $L$ecb_enc_loop8
+ movups XMMWORD PTR[rsi],xmm2
+ mov rcx,r11
+ movups XMMWORD PTR[16+rsi],xmm3
+ mov eax,r10d
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ movups XMMWORD PTR[96+rsi],xmm8
+ movups XMMWORD PTR[112+rsi],xmm9
+ lea rsi,QWORD PTR[128+rsi]
+ add rdx,080h
+ jz $L$ecb_ret
+ movups xmm2,XMMWORD PTR[rdi]
+ cmp rdx,020h
+ jb $L$ecb_enc_one
+ movups xmm3,XMMWORD PTR[16+rdi]
+ je $L$ecb_enc_two
+ movups xmm4,XMMWORD PTR[32+rdi]
+ cmp rdx,040h
+ jb $L$ecb_enc_three
+ movups xmm5,XMMWORD PTR[48+rdi]
+ je $L$ecb_enc_four
+ movups xmm6,XMMWORD PTR[64+rdi]
+ cmp rdx,060h
+ jb $L$ecb_enc_five
+ movups xmm7,XMMWORD PTR[80+rdi]
+ je $L$ecb_enc_six
+ movdqu xmm8,XMMWORD PTR[96+rdi]
+ call _aesni_encrypt8
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ movups XMMWORD PTR[96+rsi],xmm8
+ jmp $L$ecb_ret
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ aesenc xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_enc1_3
+ aesenclast xmm2,xmm1
+ movups XMMWORD PTR[rsi],xmm2
+ jmp $L$ecb_ret
+ xorps xmm4,xmm4
+ call _aesni_encrypt3
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ jmp $L$ecb_ret
+ call _aesni_encrypt3
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ jmp $L$ecb_ret
+ call _aesni_encrypt4
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ jmp $L$ecb_ret
+ xorps xmm7,xmm7
+ call _aesni_encrypt6
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ jmp $L$ecb_ret
+ call _aesni_encrypt6
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ jmp $L$ecb_ret
+ cmp rdx,080h
+ jb $L$ecb_dec_tail
+ movdqu xmm2,XMMWORD PTR[rdi]
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ movdqu xmm8,XMMWORD PTR[96+rdi]
+ movdqu xmm9,XMMWORD PTR[112+rdi]
+ lea rdi,QWORD PTR[128+rdi]
+ sub rdx,080h
+ jmp $L$ecb_dec_loop8_enter
+ movups XMMWORD PTR[rsi],xmm2
+ mov rcx,r11
+ movdqu xmm2,XMMWORD PTR[rdi]
+ mov eax,r10d
+ movups XMMWORD PTR[16+rsi],xmm3
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ movups XMMWORD PTR[32+rsi],xmm4
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ movups XMMWORD PTR[48+rsi],xmm5
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ movups XMMWORD PTR[64+rsi],xmm6
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ movups XMMWORD PTR[80+rsi],xmm7
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ movups XMMWORD PTR[96+rsi],xmm8
+ movdqu xmm8,XMMWORD PTR[96+rdi]
+ movups XMMWORD PTR[112+rsi],xmm9
+ lea rsi,QWORD PTR[128+rsi]
+ movdqu xmm9,XMMWORD PTR[112+rdi]
+ lea rdi,QWORD PTR[128+rdi]
+ call _aesni_decrypt8
+ movups xmm0,XMMWORD PTR[r11]
+ sub rdx,080h
+ jnc $L$ecb_dec_loop8
+ movups XMMWORD PTR[rsi],xmm2
+ mov rcx,r11
+ movups XMMWORD PTR[16+rsi],xmm3
+ mov eax,r10d
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ movups XMMWORD PTR[96+rsi],xmm8
+ movups XMMWORD PTR[112+rsi],xmm9
+ lea rsi,QWORD PTR[128+rsi]
+ add rdx,080h
+ jz $L$ecb_ret
+ movups xmm2,XMMWORD PTR[rdi]
+ cmp rdx,020h
+ jb $L$ecb_dec_one
+ movups xmm3,XMMWORD PTR[16+rdi]
+ je $L$ecb_dec_two
+ movups xmm4,XMMWORD PTR[32+rdi]
+ cmp rdx,040h
+ jb $L$ecb_dec_three
+ movups xmm5,XMMWORD PTR[48+rdi]
+ je $L$ecb_dec_four
+ movups xmm6,XMMWORD PTR[64+rdi]
+ cmp rdx,060h
+ jb $L$ecb_dec_five
+ movups xmm7,XMMWORD PTR[80+rdi]
+ je $L$ecb_dec_six
+ movups xmm8,XMMWORD PTR[96+rdi]
+ movups xmm0,XMMWORD PTR[rcx]
+ call _aesni_decrypt8
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ movups XMMWORD PTR[96+rsi],xmm8
+ jmp $L$ecb_ret
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ aesdec xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_dec1_4
+ aesdeclast xmm2,xmm1
+ movups XMMWORD PTR[rsi],xmm2
+ jmp $L$ecb_ret
+ xorps xmm4,xmm4
+ call _aesni_decrypt3
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ jmp $L$ecb_ret
+ call _aesni_decrypt3
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ jmp $L$ecb_ret
+ call _aesni_decrypt4
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ jmp $L$ecb_ret
+ xorps xmm7,xmm7
+ call _aesni_decrypt6
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ jmp $L$ecb_ret
+ call _aesni_decrypt6
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+aesni_ecb_encrypt ENDP
+PUBLIC aesni_ccm64_encrypt_blocks
+aesni_ccm64_encrypt_blocks PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+ lea rsp,QWORD PTR[((-88))+rsp]
+ movaps XMMWORD PTR[rsp],xmm6
+ movaps XMMWORD PTR[16+rsp],xmm7
+ movaps XMMWORD PTR[32+rsp],xmm8
+ movaps XMMWORD PTR[48+rsp],xmm9
+ mov eax,DWORD PTR[240+rcx]
+ movdqu xmm9,XMMWORD PTR[r8]
+ movdqa xmm6,XMMWORD PTR[$L$increment64]
+ movdqa xmm7,XMMWORD PTR[$L$bswap_mask]
+ shr eax,1
+ lea r11,QWORD PTR[rcx]
+ movdqu xmm3,XMMWORD PTR[r9]
+ movdqa xmm2,xmm9
+ mov r10d,eax
+DB 102,68,15,56,0,207
+ jmp $L$ccm64_enc_outer
+ movups xmm0,XMMWORD PTR[r11]
+ mov eax,r10d
+ movups xmm8,XMMWORD PTR[rdi]
+ xorps xmm2,xmm0
+ movups xmm1,XMMWORD PTR[16+r11]
+ xorps xmm0,xmm8
+ lea rcx,QWORD PTR[32+r11]
+ xorps xmm3,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ aesenc xmm2,xmm1
+ dec eax
+ aesenc xmm3,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesenc xmm2,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesenc xmm3,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$ccm64_enc2_loop
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ paddq xmm9,xmm6
+ aesenclast xmm2,xmm0
+ aesenclast xmm3,xmm0
+ dec rdx
+ lea rdi,QWORD PTR[16+rdi]
+ xorps xmm8,xmm2
+ movdqa xmm2,xmm9
+ movups XMMWORD PTR[rsi],xmm8
+ lea rsi,QWORD PTR[16+rsi]
+DB 102,15,56,0,215
+ jnz $L$ccm64_enc_outer
+ movups XMMWORD PTR[r9],xmm3
+ movaps xmm6,XMMWORD PTR[rsp]
+ movaps xmm7,XMMWORD PTR[16+rsp]
+ movaps xmm8,XMMWORD PTR[32+rsp]
+ movaps xmm9,XMMWORD PTR[48+rsp]
+ lea rsp,QWORD PTR[88+rsp]
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+aesni_ccm64_encrypt_blocks ENDP
+PUBLIC aesni_ccm64_decrypt_blocks
+aesni_ccm64_decrypt_blocks PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+ lea rsp,QWORD PTR[((-88))+rsp]
+ movaps XMMWORD PTR[rsp],xmm6
+ movaps XMMWORD PTR[16+rsp],xmm7
+ movaps XMMWORD PTR[32+rsp],xmm8
+ movaps XMMWORD PTR[48+rsp],xmm9
+ mov eax,DWORD PTR[240+rcx]
+ movups xmm9,XMMWORD PTR[r8]
+ movdqu xmm3,XMMWORD PTR[r9]
+ movdqa xmm6,XMMWORD PTR[$L$increment64]
+ movdqa xmm7,XMMWORD PTR[$L$bswap_mask]
+ movaps xmm2,xmm9
+ mov r10d,eax
+ mov r11,rcx
+DB 102,68,15,56,0,207
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ aesenc xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_enc1_5
+ aesenclast xmm2,xmm1
+ movups xmm8,XMMWORD PTR[rdi]
+ paddq xmm9,xmm6
+ lea rdi,QWORD PTR[16+rdi]
+ jmp $L$ccm64_dec_outer
+ xorps xmm8,xmm2
+ movdqa xmm2,xmm9
+ mov eax,r10d
+ movups XMMWORD PTR[rsi],xmm8
+ lea rsi,QWORD PTR[16+rsi]
+DB 102,15,56,0,215
+ sub rdx,1
+ jz $L$ccm64_dec_break
+ movups xmm0,XMMWORD PTR[r11]
+ shr eax,1
+ movups xmm1,XMMWORD PTR[16+r11]
+ xorps xmm8,xmm0
+ lea rcx,QWORD PTR[32+r11]
+ xorps xmm2,xmm0
+ xorps xmm3,xmm8
+ movups xmm0,XMMWORD PTR[rcx]
+ aesenc xmm2,xmm1
+ dec eax
+ aesenc xmm3,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesenc xmm2,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesenc xmm3,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$ccm64_dec2_loop
+ movups xmm8,XMMWORD PTR[rdi]
+ paddq xmm9,xmm6
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ lea rdi,QWORD PTR[16+rdi]
+ aesenclast xmm2,xmm0
+ aesenclast xmm3,xmm0
+ jmp $L$ccm64_dec_outer
+ movups xmm0,XMMWORD PTR[r11]
+ movups xmm1,XMMWORD PTR[16+r11]
+ xorps xmm8,xmm0
+ lea r11,QWORD PTR[32+r11]
+ xorps xmm3,xmm8
+ aesenc xmm3,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[r11]
+ lea r11,QWORD PTR[16+r11]
+ jnz $L$oop_enc1_6
+ aesenclast xmm3,xmm1
+ movups XMMWORD PTR[r9],xmm3
+ movaps xmm6,XMMWORD PTR[rsp]
+ movaps xmm7,XMMWORD PTR[16+rsp]
+ movaps xmm8,XMMWORD PTR[32+rsp]
+ movaps xmm9,XMMWORD PTR[48+rsp]
+ lea rsp,QWORD PTR[88+rsp]
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+aesni_ccm64_decrypt_blocks ENDP
+PUBLIC aesni_ctr32_encrypt_blocks
+aesni_ctr32_encrypt_blocks PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ lea rsp,QWORD PTR[((-200))+rsp]
+ movaps XMMWORD PTR[32+rsp],xmm6
+ movaps XMMWORD PTR[48+rsp],xmm7
+ movaps XMMWORD PTR[64+rsp],xmm8
+ movaps XMMWORD PTR[80+rsp],xmm9
+ movaps XMMWORD PTR[96+rsp],xmm10
+ movaps XMMWORD PTR[112+rsp],xmm11
+ movaps XMMWORD PTR[128+rsp],xmm12
+ movaps XMMWORD PTR[144+rsp],xmm13
+ movaps XMMWORD PTR[160+rsp],xmm14
+ movaps XMMWORD PTR[176+rsp],xmm15
+ cmp rdx,1
+ je $L$ctr32_one_shortcut
+ movdqu xmm14,XMMWORD PTR[r8]
+ movdqa xmm15,XMMWORD PTR[$L$bswap_mask]
+ xor eax,eax
+DB 102,69,15,58,22,242,3
+DB 102,68,15,58,34,240,3
+ mov eax,DWORD PTR[240+rcx]
+ bswap r10d
+ pxor xmm12,xmm12
+ pxor xmm13,xmm13
+DB 102,69,15,58,34,226,0
+ lea r11,QWORD PTR[3+r10]
+DB 102,69,15,58,34,235,0
+ inc r10d
+DB 102,69,15,58,34,226,1
+ inc r11
+DB 102,69,15,58,34,235,1
+ inc r10d
+DB 102,69,15,58,34,226,2
+ inc r11
+DB 102,69,15,58,34,235,2
+ movdqa XMMWORD PTR[rsp],xmm12
+DB 102,69,15,56,0,231
+ movdqa XMMWORD PTR[16+rsp],xmm13
+DB 102,69,15,56,0,239
+ pshufd xmm2,xmm12,192
+ pshufd xmm3,xmm12,128
+ pshufd xmm4,xmm12,64
+ cmp rdx,6
+ jb $L$ctr32_tail
+ shr eax,1
+ mov r11,rcx
+ mov r10d,eax
+ sub rdx,6
+ jmp $L$ctr32_loop6
+ pshufd xmm5,xmm13,192
+ por xmm2,xmm14
+ movups xmm0,XMMWORD PTR[r11]
+ pshufd xmm6,xmm13,128
+ por xmm3,xmm14
+ movups xmm1,XMMWORD PTR[16+r11]
+ pshufd xmm7,xmm13,64
+ por xmm4,xmm14
+ por xmm5,xmm14
+ xorps xmm2,xmm0
+ por xmm6,xmm14
+ por xmm7,xmm14
+ pxor xmm3,xmm0
+ aesenc xmm2,xmm1
+ lea rcx,QWORD PTR[32+r11]
+ pxor xmm4,xmm0
+ aesenc xmm3,xmm1
+ movdqa xmm13,XMMWORD PTR[$L$increment32]
+ pxor xmm5,xmm0
+ aesenc xmm4,xmm1
+ movdqa xmm12,XMMWORD PTR[rsp]
+ pxor xmm6,xmm0
+ aesenc xmm5,xmm1
+ pxor xmm7,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ dec eax
+ aesenc xmm6,xmm1
+ aesenc xmm7,xmm1
+ jmp $L$ctr32_enc_loop6_enter
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ dec eax
+ aesenc xmm4,xmm1
+ aesenc xmm5,xmm1
+ aesenc xmm6,xmm1
+ aesenc xmm7,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesenc xmm2,xmm0
+ aesenc xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesenc xmm4,xmm0
+ aesenc xmm5,xmm0
+ aesenc xmm6,xmm0
+ aesenc xmm7,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$ctr32_enc_loop6
+ aesenc xmm2,xmm1
+ paddd xmm12,xmm13
+ aesenc xmm3,xmm1
+ paddd xmm13,XMMWORD PTR[16+rsp]
+ aesenc xmm4,xmm1
+ movdqa XMMWORD PTR[rsp],xmm12
+ aesenc xmm5,xmm1
+ movdqa XMMWORD PTR[16+rsp],xmm13
+ aesenc xmm6,xmm1
+DB 102,69,15,56,0,231
+ aesenc xmm7,xmm1
+DB 102,69,15,56,0,239
+ aesenclast xmm2,xmm0
+ movups xmm8,XMMWORD PTR[rdi]
+ aesenclast xmm3,xmm0
+ movups xmm9,XMMWORD PTR[16+rdi]
+ aesenclast xmm4,xmm0
+ movups xmm10,XMMWORD PTR[32+rdi]
+ aesenclast xmm5,xmm0
+ movups xmm11,XMMWORD PTR[48+rdi]
+ aesenclast xmm6,xmm0
+ movups xmm1,XMMWORD PTR[64+rdi]
+ aesenclast xmm7,xmm0
+ movups xmm0,XMMWORD PTR[80+rdi]
+ lea rdi,QWORD PTR[96+rdi]
+ xorps xmm8,xmm2
+ pshufd xmm2,xmm12,192
+ xorps xmm9,xmm3
+ pshufd xmm3,xmm12,128
+ movups XMMWORD PTR[rsi],xmm8
+ xorps xmm10,xmm4
+ pshufd xmm4,xmm12,64
+ movups XMMWORD PTR[16+rsi],xmm9
+ xorps xmm11,xmm5
+ movups XMMWORD PTR[32+rsi],xmm10
+ xorps xmm1,xmm6
+ movups XMMWORD PTR[48+rsi],xmm11
+ xorps xmm0,xmm7
+ movups XMMWORD PTR[64+rsi],xmm1
+ movups XMMWORD PTR[80+rsi],xmm0
+ lea rsi,QWORD PTR[96+rsi]
+ mov eax,r10d
+ sub rdx,6
+ jnc $L$ctr32_loop6
+ add rdx,6
+ jz $L$ctr32_done
+ mov rcx,r11
+ lea eax,DWORD PTR[1+rax*1+rax]
+ por xmm2,xmm14
+ movups xmm8,XMMWORD PTR[rdi]
+ cmp rdx,2
+ jb $L$ctr32_one
+ por xmm3,xmm14
+ movups xmm9,XMMWORD PTR[16+rdi]
+ je $L$ctr32_two
+ pshufd xmm5,xmm13,192
+ por xmm4,xmm14
+ movups xmm10,XMMWORD PTR[32+rdi]
+ cmp rdx,4
+ jb $L$ctr32_three
+ pshufd xmm6,xmm13,128
+ por xmm5,xmm14
+ movups xmm11,XMMWORD PTR[48+rdi]
+ je $L$ctr32_four
+ por xmm6,xmm14
+ xorps xmm7,xmm7
+ call _aesni_encrypt6
+ movups xmm1,XMMWORD PTR[64+rdi]
+ xorps xmm8,xmm2
+ xorps xmm9,xmm3
+ movups XMMWORD PTR[rsi],xmm8
+ xorps xmm10,xmm4
+ movups XMMWORD PTR[16+rsi],xmm9
+ xorps xmm11,xmm5
+ movups XMMWORD PTR[32+rsi],xmm10
+ xorps xmm1,xmm6
+ movups XMMWORD PTR[48+rsi],xmm11
+ movups XMMWORD PTR[64+rsi],xmm1
+ jmp $L$ctr32_done
+ movups xmm2,XMMWORD PTR[r8]
+ movups xmm8,XMMWORD PTR[rdi]
+ mov eax,DWORD PTR[240+rcx]
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ aesenc xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_enc1_7
+ aesenclast xmm2,xmm1
+ xorps xmm8,xmm2
+ movups XMMWORD PTR[rsi],xmm8
+ jmp $L$ctr32_done
+ xorps xmm4,xmm4
+ call _aesni_encrypt3
+ xorps xmm8,xmm2
+ xorps xmm9,xmm3
+ movups XMMWORD PTR[rsi],xmm8
+ movups XMMWORD PTR[16+rsi],xmm9
+ jmp $L$ctr32_done
+ call _aesni_encrypt3
+ xorps xmm8,xmm2
+ xorps xmm9,xmm3
+ movups XMMWORD PTR[rsi],xmm8
+ xorps xmm10,xmm4
+ movups XMMWORD PTR[16+rsi],xmm9
+ movups XMMWORD PTR[32+rsi],xmm10
+ jmp $L$ctr32_done
+ call _aesni_encrypt4
+ xorps xmm8,xmm2
+ xorps xmm9,xmm3
+ movups XMMWORD PTR[rsi],xmm8
+ xorps xmm10,xmm4
+ movups XMMWORD PTR[16+rsi],xmm9
+ xorps xmm11,xmm5
+ movups XMMWORD PTR[32+rsi],xmm10
+ movups XMMWORD PTR[48+rsi],xmm11
+ movaps xmm6,XMMWORD PTR[32+rsp]
+ movaps xmm7,XMMWORD PTR[48+rsp]
+ movaps xmm8,XMMWORD PTR[64+rsp]
+ movaps xmm9,XMMWORD PTR[80+rsp]
+ movaps xmm10,XMMWORD PTR[96+rsp]
+ movaps xmm11,XMMWORD PTR[112+rsp]
+ movaps xmm12,XMMWORD PTR[128+rsp]
+ movaps xmm13,XMMWORD PTR[144+rsp]
+ movaps xmm14,XMMWORD PTR[160+rsp]
+ movaps xmm15,XMMWORD PTR[176+rsp]
+ lea rsp,QWORD PTR[200+rsp]
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+aesni_ctr32_encrypt_blocks ENDP
+PUBLIC aesni_xts_encrypt
+aesni_xts_encrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+ lea rsp,QWORD PTR[((-264))+rsp]
+ movaps XMMWORD PTR[96+rsp],xmm6
+ movaps XMMWORD PTR[112+rsp],xmm7
+ movaps XMMWORD PTR[128+rsp],xmm8
+ movaps XMMWORD PTR[144+rsp],xmm9
+ movaps XMMWORD PTR[160+rsp],xmm10
+ movaps XMMWORD PTR[176+rsp],xmm11
+ movaps XMMWORD PTR[192+rsp],xmm12
+ movaps XMMWORD PTR[208+rsp],xmm13
+ movaps XMMWORD PTR[224+rsp],xmm14
+ movaps XMMWORD PTR[240+rsp],xmm15
+ movups xmm15,XMMWORD PTR[r9]
+ mov eax,DWORD PTR[240+r8]
+ mov r10d,DWORD PTR[240+rcx]
+ movups xmm0,XMMWORD PTR[r8]
+ movups xmm1,XMMWORD PTR[16+r8]
+ lea r8,QWORD PTR[32+r8]
+ xorps xmm15,xmm0
+ aesenc xmm15,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[r8]
+ lea r8,QWORD PTR[16+r8]
+ jnz $L$oop_enc1_8
+ aesenclast xmm15,xmm1
+ mov r11,rcx
+ mov eax,r10d
+ mov r9,rdx
+ and rdx,-16
+ movdqa xmm8,XMMWORD PTR[$L$xts_magic]
+ pxor xmm14,xmm14
+ pcmpgtd xmm14,xmm15
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm10,xmm15
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm11,xmm15
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm12,xmm15
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm13,xmm15
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+ sub rdx,16*6
+ jc $L$xts_enc_short
+ shr eax,1
+ sub eax,1
+ mov r10d,eax
+ jmp $L$xts_enc_grandloop
+ pshufd xmm9,xmm14,013h
+ movdqa xmm14,xmm15
+ paddq xmm15,xmm15
+ movdqu xmm2,XMMWORD PTR[rdi]
+ pand xmm9,xmm8
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ pxor xmm15,xmm9
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ pxor xmm2,xmm10
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ pxor xmm3,xmm11
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ pxor xmm4,xmm12
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ lea rdi,QWORD PTR[96+rdi]
+ pxor xmm5,xmm13
+ movups xmm0,XMMWORD PTR[r11]
+ pxor xmm6,xmm14
+ pxor xmm7,xmm15
+ movups xmm1,XMMWORD PTR[16+r11]
+ pxor xmm2,xmm0
+ pxor xmm3,xmm0
+ movdqa XMMWORD PTR[rsp],xmm10
+ aesenc xmm2,xmm1
+ lea rcx,QWORD PTR[32+r11]
+ pxor xmm4,xmm0
+ movdqa XMMWORD PTR[16+rsp],xmm11
+ aesenc xmm3,xmm1
+ pxor xmm5,xmm0
+ movdqa XMMWORD PTR[32+rsp],xmm12
+ aesenc xmm4,xmm1
+ pxor xmm6,xmm0
+ movdqa XMMWORD PTR[48+rsp],xmm13
+ aesenc xmm5,xmm1
+ pxor xmm7,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ dec eax
+ movdqa XMMWORD PTR[64+rsp],xmm14
+ aesenc xmm6,xmm1
+ movdqa XMMWORD PTR[80+rsp],xmm15
+ aesenc xmm7,xmm1
+ pxor xmm14,xmm14
+ pcmpgtd xmm14,xmm15
+ jmp $L$xts_enc_loop6_enter
+ aesenc xmm2,xmm1
+ aesenc xmm3,xmm1
+ dec eax
+ aesenc xmm4,xmm1
+ aesenc xmm5,xmm1
+ aesenc xmm6,xmm1
+ aesenc xmm7,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesenc xmm2,xmm0
+ aesenc xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesenc xmm4,xmm0
+ aesenc xmm5,xmm0
+ aesenc xmm6,xmm0
+ aesenc xmm7,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$xts_enc_loop6
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ paddq xmm15,xmm15
+ aesenc xmm2,xmm1
+ pand xmm9,xmm8
+ aesenc xmm3,xmm1
+ pcmpgtd xmm14,xmm15
+ aesenc xmm4,xmm1
+ pxor xmm15,xmm9
+ aesenc xmm5,xmm1
+ aesenc xmm6,xmm1
+ aesenc xmm7,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm10,xmm15
+ paddq xmm15,xmm15
+ aesenc xmm2,xmm0
+ pand xmm9,xmm8
+ aesenc xmm3,xmm0
+ pcmpgtd xmm14,xmm15
+ aesenc xmm4,xmm0
+ pxor xmm15,xmm9
+ aesenc xmm5,xmm0
+ aesenc xmm6,xmm0
+ aesenc xmm7,xmm0
+ movups xmm0,XMMWORD PTR[32+rcx]
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm11,xmm15
+ paddq xmm15,xmm15
+ aesenc xmm2,xmm1
+ pand xmm9,xmm8
+ aesenc xmm3,xmm1
+ pcmpgtd xmm14,xmm15
+ aesenc xmm4,xmm1
+ pxor xmm15,xmm9
+ aesenc xmm5,xmm1
+ aesenc xmm6,xmm1
+ aesenc xmm7,xmm1
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm12,xmm15
+ paddq xmm15,xmm15
+ aesenclast xmm2,xmm0
+ pand xmm9,xmm8
+ aesenclast xmm3,xmm0
+ pcmpgtd xmm14,xmm15
+ aesenclast xmm4,xmm0
+ pxor xmm15,xmm9
+ aesenclast xmm5,xmm0
+ aesenclast xmm6,xmm0
+ aesenclast xmm7,xmm0
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm13,xmm15
+ paddq xmm15,xmm15
+ xorps xmm2,XMMWORD PTR[rsp]
+ pand xmm9,xmm8
+ xorps xmm3,XMMWORD PTR[16+rsp]
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+ xorps xmm4,XMMWORD PTR[32+rsp]
+ movups XMMWORD PTR[rsi],xmm2
+ xorps xmm5,XMMWORD PTR[48+rsp]
+ movups XMMWORD PTR[16+rsi],xmm3
+ xorps xmm6,XMMWORD PTR[64+rsp]
+ movups XMMWORD PTR[32+rsi],xmm4
+ xorps xmm7,XMMWORD PTR[80+rsp]
+ movups XMMWORD PTR[48+rsi],xmm5
+ mov eax,r10d
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ lea rsi,QWORD PTR[96+rsi]
+ sub rdx,16*6
+ jnc $L$xts_enc_grandloop
+ lea eax,DWORD PTR[3+rax*1+rax]
+ mov rcx,r11
+ mov r10d,eax
+ add rdx,16*6
+ jz $L$xts_enc_done
+ cmp rdx,020h
+ jb $L$xts_enc_one
+ je $L$xts_enc_two
+ cmp rdx,040h
+ jb $L$xts_enc_three
+ je $L$xts_enc_four
+ pshufd xmm9,xmm14,013h
+ movdqa xmm14,xmm15
+ paddq xmm15,xmm15
+ movdqu xmm2,XMMWORD PTR[rdi]
+ pand xmm9,xmm8
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ pxor xmm15,xmm9
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ pxor xmm2,xmm10
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ pxor xmm3,xmm11
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ lea rdi,QWORD PTR[80+rdi]
+ pxor xmm4,xmm12
+ pxor xmm5,xmm13
+ pxor xmm6,xmm14
+ call _aesni_encrypt6
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm15
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ movdqu XMMWORD PTR[rsi],xmm2
+ xorps xmm5,xmm13
+ movdqu XMMWORD PTR[16+rsi],xmm3
+ xorps xmm6,xmm14
+ movdqu XMMWORD PTR[32+rsi],xmm4
+ movdqu XMMWORD PTR[48+rsi],xmm5
+ movdqu XMMWORD PTR[64+rsi],xmm6
+ lea rsi,QWORD PTR[80+rsi]
+ jmp $L$xts_enc_done
+ movups xmm2,XMMWORD PTR[rdi]
+ lea rdi,QWORD PTR[16+rdi]
+ xorps xmm2,xmm10
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ aesenc xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_enc1_9
+ aesenclast xmm2,xmm1
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm11
+ movups XMMWORD PTR[rsi],xmm2
+ lea rsi,QWORD PTR[16+rsi]
+ jmp $L$xts_enc_done
+ movups xmm2,XMMWORD PTR[rdi]
+ movups xmm3,XMMWORD PTR[16+rdi]
+ lea rdi,QWORD PTR[32+rdi]
+ xorps xmm2,xmm10
+ xorps xmm3,xmm11
+ call _aesni_encrypt3
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm12
+ xorps xmm3,xmm11
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ lea rsi,QWORD PTR[32+rsi]
+ jmp $L$xts_enc_done
+ movups xmm2,XMMWORD PTR[rdi]
+ movups xmm3,XMMWORD PTR[16+rdi]
+ movups xmm4,XMMWORD PTR[32+rdi]
+ lea rdi,QWORD PTR[48+rdi]
+ xorps xmm2,xmm10
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ call _aesni_encrypt3
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm13
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ lea rsi,QWORD PTR[48+rsi]
+ jmp $L$xts_enc_done
+ movups xmm2,XMMWORD PTR[rdi]
+ movups xmm3,XMMWORD PTR[16+rdi]
+ movups xmm4,XMMWORD PTR[32+rdi]
+ xorps xmm2,xmm10
+ movups xmm5,XMMWORD PTR[48+rdi]
+ lea rdi,QWORD PTR[64+rdi]
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ xorps xmm5,xmm13
+ call _aesni_encrypt4
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm15
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ movups XMMWORD PTR[rsi],xmm2
+ xorps xmm5,xmm13
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ lea rsi,QWORD PTR[64+rsi]
+ jmp $L$xts_enc_done
+ and r9,15
+ jz $L$xts_enc_ret
+ mov rdx,r9
+ movzx eax,BYTE PTR[rdi]
+ movzx ecx,BYTE PTR[((-16))+rsi]
+ lea rdi,QWORD PTR[1+rdi]
+ mov BYTE PTR[((-16))+rsi],al
+ mov BYTE PTR[rsi],cl
+ lea rsi,QWORD PTR[1+rsi]
+ sub rdx,1
+ jnz $L$xts_enc_steal
+ sub rsi,r9
+ mov rcx,r11
+ mov eax,r10d
+ movups xmm2,XMMWORD PTR[((-16))+rsi]
+ xorps xmm2,xmm10
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ aesenc xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_enc1_10
+ aesenclast xmm2,xmm1
+ xorps xmm2,xmm10
+ movups XMMWORD PTR[(-16)+rsi],xmm2
+ movaps xmm6,XMMWORD PTR[96+rsp]
+ movaps xmm7,XMMWORD PTR[112+rsp]
+ movaps xmm8,XMMWORD PTR[128+rsp]
+ movaps xmm9,XMMWORD PTR[144+rsp]
+ movaps xmm10,XMMWORD PTR[160+rsp]
+ movaps xmm11,XMMWORD PTR[176+rsp]
+ movaps xmm12,XMMWORD PTR[192+rsp]
+ movaps xmm13,XMMWORD PTR[208+rsp]
+ movaps xmm14,XMMWORD PTR[224+rsp]
+ movaps xmm15,XMMWORD PTR[240+rsp]
+ lea rsp,QWORD PTR[264+rsp]
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+aesni_xts_encrypt ENDP
+PUBLIC aesni_xts_decrypt
+aesni_xts_decrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+ lea rsp,QWORD PTR[((-264))+rsp]
+ movaps XMMWORD PTR[96+rsp],xmm6
+ movaps XMMWORD PTR[112+rsp],xmm7
+ movaps XMMWORD PTR[128+rsp],xmm8
+ movaps XMMWORD PTR[144+rsp],xmm9
+ movaps XMMWORD PTR[160+rsp],xmm10
+ movaps XMMWORD PTR[176+rsp],xmm11
+ movaps XMMWORD PTR[192+rsp],xmm12
+ movaps XMMWORD PTR[208+rsp],xmm13
+ movaps XMMWORD PTR[224+rsp],xmm14
+ movaps XMMWORD PTR[240+rsp],xmm15
+ movups xmm15,XMMWORD PTR[r9]
+ mov eax,DWORD PTR[240+r8]
+ mov r10d,DWORD PTR[240+rcx]
+ movups xmm0,XMMWORD PTR[r8]
+ movups xmm1,XMMWORD PTR[16+r8]
+ lea r8,QWORD PTR[32+r8]
+ xorps xmm15,xmm0
+ aesenc xmm15,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[r8]
+ lea r8,QWORD PTR[16+r8]
+ jnz $L$oop_enc1_11
+ aesenclast xmm15,xmm1
+ xor eax,eax
+ test rdx,15
+ setnz al
+ shl rax,4
+ sub rdx,rax
+ mov r11,rcx
+ mov eax,r10d
+ mov r9,rdx
+ and rdx,-16
+ movdqa xmm8,XMMWORD PTR[$L$xts_magic]
+ pxor xmm14,xmm14
+ pcmpgtd xmm14,xmm15
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm10,xmm15
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm11,xmm15
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm12,xmm15
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm13,xmm15
+ paddq xmm15,xmm15
+ pand xmm9,xmm8
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+ sub rdx,16*6
+ jc $L$xts_dec_short
+ shr eax,1
+ sub eax,1
+ mov r10d,eax
+ jmp $L$xts_dec_grandloop
+ pshufd xmm9,xmm14,013h
+ movdqa xmm14,xmm15
+ paddq xmm15,xmm15
+ movdqu xmm2,XMMWORD PTR[rdi]
+ pand xmm9,xmm8
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ pxor xmm15,xmm9
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ pxor xmm2,xmm10
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ pxor xmm3,xmm11
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ pxor xmm4,xmm12
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ lea rdi,QWORD PTR[96+rdi]
+ pxor xmm5,xmm13
+ movups xmm0,XMMWORD PTR[r11]
+ pxor xmm6,xmm14
+ pxor xmm7,xmm15
+ movups xmm1,XMMWORD PTR[16+r11]
+ pxor xmm2,xmm0
+ pxor xmm3,xmm0
+ movdqa XMMWORD PTR[rsp],xmm10
+ aesdec xmm2,xmm1
+ lea rcx,QWORD PTR[32+r11]
+ pxor xmm4,xmm0
+ movdqa XMMWORD PTR[16+rsp],xmm11
+ aesdec xmm3,xmm1
+ pxor xmm5,xmm0
+ movdqa XMMWORD PTR[32+rsp],xmm12
+ aesdec xmm4,xmm1
+ pxor xmm6,xmm0
+ movdqa XMMWORD PTR[48+rsp],xmm13
+ aesdec xmm5,xmm1
+ pxor xmm7,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ dec eax
+ movdqa XMMWORD PTR[64+rsp],xmm14
+ aesdec xmm6,xmm1
+ movdqa XMMWORD PTR[80+rsp],xmm15
+ aesdec xmm7,xmm1
+ pxor xmm14,xmm14
+ pcmpgtd xmm14,xmm15
+ jmp $L$xts_dec_loop6_enter
+ aesdec xmm2,xmm1
+ aesdec xmm3,xmm1
+ dec eax
+ aesdec xmm4,xmm1
+ aesdec xmm5,xmm1
+ aesdec xmm6,xmm1
+ aesdec xmm7,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ aesdec xmm2,xmm0
+ aesdec xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ aesdec xmm4,xmm0
+ aesdec xmm5,xmm0
+ aesdec xmm6,xmm0
+ aesdec xmm7,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ jnz $L$xts_dec_loop6
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ paddq xmm15,xmm15
+ aesdec xmm2,xmm1
+ pand xmm9,xmm8
+ aesdec xmm3,xmm1
+ pcmpgtd xmm14,xmm15
+ aesdec xmm4,xmm1
+ pxor xmm15,xmm9
+ aesdec xmm5,xmm1
+ aesdec xmm6,xmm1
+ aesdec xmm7,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm10,xmm15
+ paddq xmm15,xmm15
+ aesdec xmm2,xmm0
+ pand xmm9,xmm8
+ aesdec xmm3,xmm0
+ pcmpgtd xmm14,xmm15
+ aesdec xmm4,xmm0
+ pxor xmm15,xmm9
+ aesdec xmm5,xmm0
+ aesdec xmm6,xmm0
+ aesdec xmm7,xmm0
+ movups xmm0,XMMWORD PTR[32+rcx]
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm11,xmm15
+ paddq xmm15,xmm15
+ aesdec xmm2,xmm1
+ pand xmm9,xmm8
+ aesdec xmm3,xmm1
+ pcmpgtd xmm14,xmm15
+ aesdec xmm4,xmm1
+ pxor xmm15,xmm9
+ aesdec xmm5,xmm1
+ aesdec xmm6,xmm1
+ aesdec xmm7,xmm1
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm12,xmm15
+ paddq xmm15,xmm15
+ aesdeclast xmm2,xmm0
+ pand xmm9,xmm8
+ aesdeclast xmm3,xmm0
+ pcmpgtd xmm14,xmm15
+ aesdeclast xmm4,xmm0
+ pxor xmm15,xmm9
+ aesdeclast xmm5,xmm0
+ aesdeclast xmm6,xmm0
+ aesdeclast xmm7,xmm0
+ pshufd xmm9,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm13,xmm15
+ paddq xmm15,xmm15
+ xorps xmm2,XMMWORD PTR[rsp]
+ pand xmm9,xmm8
+ xorps xmm3,XMMWORD PTR[16+rsp]
+ pcmpgtd xmm14,xmm15
+ pxor xmm15,xmm9
+ xorps xmm4,XMMWORD PTR[32+rsp]
+ movups XMMWORD PTR[rsi],xmm2
+ xorps xmm5,XMMWORD PTR[48+rsp]
+ movups XMMWORD PTR[16+rsi],xmm3
+ xorps xmm6,XMMWORD PTR[64+rsp]
+ movups XMMWORD PTR[32+rsi],xmm4
+ xorps xmm7,XMMWORD PTR[80+rsp]
+ movups XMMWORD PTR[48+rsi],xmm5
+ mov eax,r10d
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ lea rsi,QWORD PTR[96+rsi]
+ sub rdx,16*6
+ jnc $L$xts_dec_grandloop
+ lea eax,DWORD PTR[3+rax*1+rax]
+ mov rcx,r11
+ mov r10d,eax
+ add rdx,16*6
+ jz $L$xts_dec_done
+ cmp rdx,020h
+ jb $L$xts_dec_one
+ je $L$xts_dec_two
+ cmp rdx,040h
+ jb $L$xts_dec_three
+ je $L$xts_dec_four
+ pshufd xmm9,xmm14,013h
+ movdqa xmm14,xmm15
+ paddq xmm15,xmm15
+ movdqu xmm2,XMMWORD PTR[rdi]
+ pand xmm9,xmm8
+ movdqu xmm3,XMMWORD PTR[16+rdi]
+ pxor xmm15,xmm9
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ pxor xmm2,xmm10
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ pxor xmm3,xmm11
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ lea rdi,QWORD PTR[80+rdi]
+ pxor xmm4,xmm12
+ pxor xmm5,xmm13
+ pxor xmm6,xmm14
+ call _aesni_decrypt6
+ xorps xmm2,xmm10
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ movdqu XMMWORD PTR[rsi],xmm2
+ xorps xmm5,xmm13
+ movdqu XMMWORD PTR[16+rsi],xmm3
+ xorps xmm6,xmm14
+ movdqu XMMWORD PTR[32+rsi],xmm4
+ pxor xmm14,xmm14
+ movdqu XMMWORD PTR[48+rsi],xmm5
+ pcmpgtd xmm14,xmm15
+ movdqu XMMWORD PTR[64+rsi],xmm6
+ lea rsi,QWORD PTR[80+rsi]
+ pshufd xmm11,xmm14,013h
+ and r9,15
+ jz $L$xts_dec_ret
+ movdqa xmm10,xmm15
+ paddq xmm15,xmm15
+ pand xmm11,xmm8
+ pxor xmm11,xmm15
+ jmp $L$xts_dec_done2
+ movups xmm2,XMMWORD PTR[rdi]
+ lea rdi,QWORD PTR[16+rdi]
+ xorps xmm2,xmm10
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ aesdec xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_dec1_12
+ aesdeclast xmm2,xmm1
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm11
+ movups XMMWORD PTR[rsi],xmm2
+ movdqa xmm11,xmm12
+ lea rsi,QWORD PTR[16+rsi]
+ jmp $L$xts_dec_done
+ movups xmm2,XMMWORD PTR[rdi]
+ movups xmm3,XMMWORD PTR[16+rdi]
+ lea rdi,QWORD PTR[32+rdi]
+ xorps xmm2,xmm10
+ xorps xmm3,xmm11
+ call _aesni_decrypt3
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm12
+ xorps xmm3,xmm11
+ movdqa xmm11,xmm13
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ lea rsi,QWORD PTR[32+rsi]
+ jmp $L$xts_dec_done
+ movups xmm2,XMMWORD PTR[rdi]
+ movups xmm3,XMMWORD PTR[16+rdi]
+ movups xmm4,XMMWORD PTR[32+rdi]
+ lea rdi,QWORD PTR[48+rdi]
+ xorps xmm2,xmm10
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ call _aesni_decrypt3
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm13
+ xorps xmm3,xmm11
+ movdqa xmm11,xmm15
+ xorps xmm4,xmm12
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ lea rsi,QWORD PTR[48+rsi]
+ jmp $L$xts_dec_done
+ pshufd xmm9,xmm14,013h
+ movdqa xmm14,xmm15
+ paddq xmm15,xmm15
+ movups xmm2,XMMWORD PTR[rdi]
+ pand xmm9,xmm8
+ movups xmm3,XMMWORD PTR[16+rdi]
+ pxor xmm15,xmm9
+ movups xmm4,XMMWORD PTR[32+rdi]
+ xorps xmm2,xmm10
+ movups xmm5,XMMWORD PTR[48+rdi]
+ lea rdi,QWORD PTR[64+rdi]
+ xorps xmm3,xmm11
+ xorps xmm4,xmm12
+ xorps xmm5,xmm13
+ call _aesni_decrypt4
+ xorps xmm2,xmm10
+ movdqa xmm10,xmm14
+ xorps xmm3,xmm11
+ movdqa xmm11,xmm15
+ xorps xmm4,xmm12
+ movups XMMWORD PTR[rsi],xmm2
+ xorps xmm5,xmm13
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ lea rsi,QWORD PTR[64+rsi]
+ jmp $L$xts_dec_done
+ and r9,15
+ jz $L$xts_dec_ret
+ mov rdx,r9
+ mov rcx,r11
+ mov eax,r10d
+ movups xmm2,XMMWORD PTR[rdi]
+ xorps xmm2,xmm11
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ aesdec xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_dec1_13
+ aesdeclast xmm2,xmm1
+ xorps xmm2,xmm11
+ movups XMMWORD PTR[rsi],xmm2
+ movzx eax,BYTE PTR[16+rdi]
+ movzx ecx,BYTE PTR[rsi]
+ lea rdi,QWORD PTR[1+rdi]
+ mov BYTE PTR[rsi],al
+ mov BYTE PTR[16+rsi],cl
+ lea rsi,QWORD PTR[1+rsi]
+ sub rdx,1
+ jnz $L$xts_dec_steal
+ sub rsi,r9
+ mov rcx,r11
+ mov eax,r10d
+ movups xmm2,XMMWORD PTR[rsi]
+ xorps xmm2,xmm10
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ aesdec xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_dec1_14
+ aesdeclast xmm2,xmm1
+ xorps xmm2,xmm10
+ movups XMMWORD PTR[rsi],xmm2
+ movaps xmm6,XMMWORD PTR[96+rsp]
+ movaps xmm7,XMMWORD PTR[112+rsp]
+ movaps xmm8,XMMWORD PTR[128+rsp]
+ movaps xmm9,XMMWORD PTR[144+rsp]
+ movaps xmm10,XMMWORD PTR[160+rsp]
+ movaps xmm11,XMMWORD PTR[176+rsp]
+ movaps xmm12,XMMWORD PTR[192+rsp]
+ movaps xmm13,XMMWORD PTR[208+rsp]
+ movaps xmm14,XMMWORD PTR[224+rsp]
+ movaps xmm15,XMMWORD PTR[240+rsp]
+ lea rsp,QWORD PTR[264+rsp]
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+aesni_xts_decrypt ENDP
+PUBLIC aesni_cbc_encrypt
+aesni_cbc_encrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+ test rdx,rdx
+ jz $L$cbc_ret
+ mov r10d,DWORD PTR[240+rcx]
+ mov r11,rcx
+ test r9d,r9d
+ jz $L$cbc_decrypt
+ movups xmm2,XMMWORD PTR[r8]
+ mov eax,r10d
+ cmp rdx,16
+ jb $L$cbc_enc_tail
+ sub rdx,16
+ jmp $L$cbc_enc_loop
+ movups xmm3,XMMWORD PTR[rdi]
+ lea rdi,QWORD PTR[16+rdi]
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ xorps xmm3,xmm0
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm3
+ aesenc xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_enc1_15
+ aesenclast xmm2,xmm1
+ mov eax,r10d
+ mov rcx,r11
+ movups XMMWORD PTR[rsi],xmm2
+ lea rsi,QWORD PTR[16+rsi]
+ sub rdx,16
+ jnc $L$cbc_enc_loop
+ add rdx,16
+ jnz $L$cbc_enc_tail
+ movups XMMWORD PTR[r8],xmm2
+ jmp $L$cbc_ret
+ mov rcx,rdx
+ xchg rsi,rdi
+ DD 09066A4F3h
+ mov ecx,16
+ sub rcx,rdx
+ xor eax,eax
+ DD 09066AAF3h
+ lea rdi,QWORD PTR[((-16))+rdi]
+ mov eax,r10d
+ mov rsi,rdi
+ mov rcx,r11
+ xor rdx,rdx
+ jmp $L$cbc_enc_loop
+ lea rsp,QWORD PTR[((-88))+rsp]
+ movaps XMMWORD PTR[rsp],xmm6
+ movaps XMMWORD PTR[16+rsp],xmm7
+ movaps XMMWORD PTR[32+rsp],xmm8
+ movaps XMMWORD PTR[48+rsp],xmm9
+ movups xmm9,XMMWORD PTR[r8]
+ mov eax,r10d
+ cmp rdx,070h
+ jbe $L$cbc_dec_tail
+ shr r10d,1
+ sub rdx,070h
+ mov eax,r10d
+ movaps XMMWORD PTR[64+rsp],xmm9
+ jmp $L$cbc_dec_loop8_enter
+ movaps XMMWORD PTR[64+rsp],xmm0
+ movups XMMWORD PTR[rsi],xmm9
+ lea rsi,QWORD PTR[16+rsi]
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm2,XMMWORD PTR[rdi]
+ movups xmm3,XMMWORD PTR[16+rdi]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ movdqu xmm4,XMMWORD PTR[32+rdi]
+ xorps xmm2,xmm0
+ movdqu xmm5,XMMWORD PTR[48+rdi]
+ xorps xmm3,xmm0
+ movdqu xmm6,XMMWORD PTR[64+rdi]
+ aesdec xmm2,xmm1
+ pxor xmm4,xmm0
+ movdqu xmm7,XMMWORD PTR[80+rdi]
+ aesdec xmm3,xmm1
+ pxor xmm5,xmm0
+ movdqu xmm8,XMMWORD PTR[96+rdi]
+ aesdec xmm4,xmm1
+ pxor xmm6,xmm0
+ movdqu xmm9,XMMWORD PTR[112+rdi]
+ aesdec xmm5,xmm1
+ pxor xmm7,xmm0
+ dec eax
+ aesdec xmm6,xmm1
+ pxor xmm8,xmm0
+ aesdec xmm7,xmm1
+ pxor xmm9,xmm0
+ movups xmm0,XMMWORD PTR[rcx]
+ aesdec xmm8,xmm1
+ aesdec xmm9,xmm1
+ movups xmm1,XMMWORD PTR[16+rcx]
+ call $L$dec_loop8_enter
+ movups xmm1,XMMWORD PTR[rdi]
+ movups xmm0,XMMWORD PTR[16+rdi]
+ xorps xmm2,XMMWORD PTR[64+rsp]
+ xorps xmm3,xmm1
+ movups xmm1,XMMWORD PTR[32+rdi]
+ xorps xmm4,xmm0
+ movups xmm0,XMMWORD PTR[48+rdi]
+ xorps xmm5,xmm1
+ movups xmm1,XMMWORD PTR[64+rdi]
+ xorps xmm6,xmm0
+ movups xmm0,XMMWORD PTR[80+rdi]
+ xorps xmm7,xmm1
+ movups xmm1,XMMWORD PTR[96+rdi]
+ xorps xmm8,xmm0
+ movups xmm0,XMMWORD PTR[112+rdi]
+ xorps xmm9,xmm1
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ mov eax,r10d
+ movups XMMWORD PTR[64+rsi],xmm6
+ mov rcx,r11
+ movups XMMWORD PTR[80+rsi],xmm7
+ lea rdi,QWORD PTR[128+rdi]
+ movups XMMWORD PTR[96+rsi],xmm8
+ lea rsi,QWORD PTR[112+rsi]
+ sub rdx,080h
+ ja $L$cbc_dec_loop8
+ movaps xmm2,xmm9
+ movaps xmm9,xmm0
+ add rdx,070h
+ jle $L$cbc_dec_tail_collected
+ movups XMMWORD PTR[rsi],xmm2
+ lea eax,DWORD PTR[1+r10*1+r10]
+ lea rsi,QWORD PTR[16+rsi]
+ movups xmm2,XMMWORD PTR[rdi]
+ movaps xmm8,xmm2
+ cmp rdx,010h
+ jbe $L$cbc_dec_one
+ movups xmm3,XMMWORD PTR[16+rdi]
+ movaps xmm7,xmm3
+ cmp rdx,020h
+ jbe $L$cbc_dec_two
+ movups xmm4,XMMWORD PTR[32+rdi]
+ movaps xmm6,xmm4
+ cmp rdx,030h
+ jbe $L$cbc_dec_three
+ movups xmm5,XMMWORD PTR[48+rdi]
+ cmp rdx,040h
+ jbe $L$cbc_dec_four
+ movups xmm6,XMMWORD PTR[64+rdi]
+ cmp rdx,050h
+ jbe $L$cbc_dec_five
+ movups xmm7,XMMWORD PTR[80+rdi]
+ cmp rdx,060h
+ jbe $L$cbc_dec_six
+ movups xmm8,XMMWORD PTR[96+rdi]
+ movaps XMMWORD PTR[64+rsp],xmm9
+ call _aesni_decrypt8
+ movups xmm1,XMMWORD PTR[rdi]
+ movups xmm0,XMMWORD PTR[16+rdi]
+ xorps xmm2,XMMWORD PTR[64+rsp]
+ xorps xmm3,xmm1
+ movups xmm1,XMMWORD PTR[32+rdi]
+ xorps xmm4,xmm0
+ movups xmm0,XMMWORD PTR[48+rdi]
+ xorps xmm5,xmm1
+ movups xmm1,XMMWORD PTR[64+rdi]
+ xorps xmm6,xmm0
+ movups xmm0,XMMWORD PTR[80+rdi]
+ xorps xmm7,xmm1
+ movups xmm9,XMMWORD PTR[96+rdi]
+ xorps xmm8,xmm0
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ movups XMMWORD PTR[80+rsi],xmm7
+ lea rsi,QWORD PTR[96+rsi]
+ movaps xmm2,xmm8
+ sub rdx,070h
+ jmp $L$cbc_dec_tail_collected
+ movups xmm0,XMMWORD PTR[rcx]
+ movups xmm1,XMMWORD PTR[16+rcx]
+ lea rcx,QWORD PTR[32+rcx]
+ xorps xmm2,xmm0
+ aesdec xmm2,xmm1
+ dec eax
+ movups xmm1,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ jnz $L$oop_dec1_16
+ aesdeclast xmm2,xmm1
+ xorps xmm2,xmm9
+ movaps xmm9,xmm8
+ sub rdx,010h
+ jmp $L$cbc_dec_tail_collected
+ xorps xmm4,xmm4
+ call _aesni_decrypt3
+ xorps xmm2,xmm9
+ xorps xmm3,xmm8
+ movups XMMWORD PTR[rsi],xmm2
+ movaps xmm9,xmm7
+ movaps xmm2,xmm3
+ lea rsi,QWORD PTR[16+rsi]
+ sub rdx,020h
+ jmp $L$cbc_dec_tail_collected
+ call _aesni_decrypt3
+ xorps xmm2,xmm9
+ xorps xmm3,xmm8
+ movups XMMWORD PTR[rsi],xmm2
+ xorps xmm4,xmm7
+ movups XMMWORD PTR[16+rsi],xmm3
+ movaps xmm9,xmm6
+ movaps xmm2,xmm4
+ lea rsi,QWORD PTR[32+rsi]
+ sub rdx,030h
+ jmp $L$cbc_dec_tail_collected
+ call _aesni_decrypt4
+ xorps xmm2,xmm9
+ movups xmm9,XMMWORD PTR[48+rdi]
+ xorps xmm3,xmm8
+ movups XMMWORD PTR[rsi],xmm2
+ xorps xmm4,xmm7
+ movups XMMWORD PTR[16+rsi],xmm3
+ xorps xmm5,xmm6
+ movups XMMWORD PTR[32+rsi],xmm4
+ movaps xmm2,xmm5
+ lea rsi,QWORD PTR[48+rsi]
+ sub rdx,040h
+ jmp $L$cbc_dec_tail_collected
+ xorps xmm7,xmm7
+ call _aesni_decrypt6
+ movups xmm1,XMMWORD PTR[16+rdi]
+ movups xmm0,XMMWORD PTR[32+rdi]
+ xorps xmm2,xmm9
+ xorps xmm3,xmm8
+ xorps xmm4,xmm1
+ movups xmm1,XMMWORD PTR[48+rdi]
+ xorps xmm5,xmm0
+ movups xmm9,XMMWORD PTR[64+rdi]
+ xorps xmm6,xmm1
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ lea rsi,QWORD PTR[64+rsi]
+ movaps xmm2,xmm6
+ sub rdx,050h
+ jmp $L$cbc_dec_tail_collected
+ call _aesni_decrypt6
+ movups xmm1,XMMWORD PTR[16+rdi]
+ movups xmm0,XMMWORD PTR[32+rdi]
+ xorps xmm2,xmm9
+ xorps xmm3,xmm8
+ xorps xmm4,xmm1
+ movups xmm1,XMMWORD PTR[48+rdi]
+ xorps xmm5,xmm0
+ movups xmm0,XMMWORD PTR[64+rdi]
+ xorps xmm6,xmm1
+ movups xmm9,XMMWORD PTR[80+rdi]
+ xorps xmm7,xmm0
+ movups XMMWORD PTR[rsi],xmm2
+ movups XMMWORD PTR[16+rsi],xmm3
+ movups XMMWORD PTR[32+rsi],xmm4
+ movups XMMWORD PTR[48+rsi],xmm5
+ movups XMMWORD PTR[64+rsi],xmm6
+ lea rsi,QWORD PTR[80+rsi]
+ movaps xmm2,xmm7
+ sub rdx,060h
+ jmp $L$cbc_dec_tail_collected
+ and rdx,15
+ movups XMMWORD PTR[r8],xmm9
+ jnz $L$cbc_dec_tail_partial
+ movups XMMWORD PTR[rsi],xmm2
+ jmp $L$cbc_dec_ret
+ movaps XMMWORD PTR[64+rsp],xmm2
+ mov rcx,16
+ mov rdi,rsi
+ sub rcx,rdx
+ lea rsi,QWORD PTR[64+rsp]
+ DD 09066A4F3h
+ movaps xmm6,XMMWORD PTR[rsp]
+ movaps xmm7,XMMWORD PTR[16+rsp]
+ movaps xmm8,XMMWORD PTR[32+rsp]
+ movaps xmm9,XMMWORD PTR[48+rsp]
+ lea rsp,QWORD PTR[88+rsp]
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+aesni_cbc_encrypt ENDP
+PUBLIC aesni_set_decrypt_key
+aesni_set_decrypt_key PROC PUBLIC
+ sub rsp,8
+ call __aesni_set_encrypt_key
+ shl edx,4
+ test eax,eax
+ jnz $L$dec_key_ret
+ lea rcx,QWORD PTR[16+rdx*1+r8]
+ movups xmm0,XMMWORD PTR[r8]
+ movups xmm1,XMMWORD PTR[rcx]
+ movups XMMWORD PTR[rcx],xmm0
+ movups XMMWORD PTR[r8],xmm1
+ lea r8,QWORD PTR[16+r8]
+ lea rcx,QWORD PTR[((-16))+rcx]
+ movups xmm0,XMMWORD PTR[r8]
+ movups xmm1,XMMWORD PTR[rcx]
+ aesimc xmm0,xmm0
+ aesimc xmm1,xmm1
+ lea r8,QWORD PTR[16+r8]
+ lea rcx,QWORD PTR[((-16))+rcx]
+ movups XMMWORD PTR[16+rcx],xmm0
+ movups XMMWORD PTR[(-16)+r8],xmm1
+ cmp rcx,r8
+ ja $L$dec_key_inverse
+ movups xmm0,XMMWORD PTR[r8]
+ aesimc xmm0,xmm0
+ movups XMMWORD PTR[rcx],xmm0
+ add rsp,8
+ DB 0F3h,0C3h ;repret
+aesni_set_decrypt_key ENDP
+PUBLIC aesni_set_encrypt_key
+aesni_set_encrypt_key PROC PUBLIC
+ sub rsp,8
+ mov rax,-1
+ test rcx,rcx
+ jz $L$enc_key_ret
+ test r8,r8
+ jz $L$enc_key_ret
+ movups xmm0,XMMWORD PTR[rcx]
+ xorps xmm4,xmm4
+ lea rax,QWORD PTR[16+r8]
+ cmp edx,256
+ je $L$14rounds
+ cmp edx,192
+ je $L$12rounds
+ cmp edx,128
+ jne $L$bad_keybits
+ mov edx,9
+ movups XMMWORD PTR[r8],xmm0
+ aeskeygenassist xmm1,xmm0,01h
+ call $L$key_expansion_128_cold
+ aeskeygenassist xmm1,xmm0,02h
+ call $L$key_expansion_128
+ aeskeygenassist xmm1,xmm0,04h
+ call $L$key_expansion_128
+ aeskeygenassist xmm1,xmm0,08h
+ call $L$key_expansion_128
+ aeskeygenassist xmm1,xmm0,010h
+ call $L$key_expansion_128
+ aeskeygenassist xmm1,xmm0,020h
+ call $L$key_expansion_128
+ aeskeygenassist xmm1,xmm0,040h
+ call $L$key_expansion_128
+ aeskeygenassist xmm1,xmm0,080h
+ call $L$key_expansion_128
+ aeskeygenassist xmm1,xmm0,01bh
+ call $L$key_expansion_128
+ aeskeygenassist xmm1,xmm0,036h
+ call $L$key_expansion_128
+ movups XMMWORD PTR[rax],xmm0
+ mov DWORD PTR[80+rax],edx
+ xor eax,eax
+ jmp $L$enc_key_ret
+ movq xmm2,QWORD PTR[16+rcx]
+ mov edx,11
+ movups XMMWORD PTR[r8],xmm0
+ aeskeygenassist xmm1,xmm2,01h
+ call $L$key_expansion_192a_cold
+ aeskeygenassist xmm1,xmm2,02h
+ call $L$key_expansion_192b
+ aeskeygenassist xmm1,xmm2,04h
+ call $L$key_expansion_192a
+ aeskeygenassist xmm1,xmm2,08h
+ call $L$key_expansion_192b
+ aeskeygenassist xmm1,xmm2,010h
+ call $L$key_expansion_192a
+ aeskeygenassist xmm1,xmm2,020h
+ call $L$key_expansion_192b
+ aeskeygenassist xmm1,xmm2,040h
+ call $L$key_expansion_192a
+ aeskeygenassist xmm1,xmm2,080h
+ call $L$key_expansion_192b
+ movups XMMWORD PTR[rax],xmm0
+ mov DWORD PTR[48+rax],edx
+ xor rax,rax
+ jmp $L$enc_key_ret
+ movups xmm2,XMMWORD PTR[16+rcx]
+ mov edx,13
+ lea rax,QWORD PTR[16+rax]
+ movups XMMWORD PTR[r8],xmm0
+ movups XMMWORD PTR[16+r8],xmm2
+ aeskeygenassist xmm1,xmm2,01h
+ call $L$key_expansion_256a_cold
+ aeskeygenassist xmm1,xmm0,01h
+ call $L$key_expansion_256b
+ aeskeygenassist xmm1,xmm2,02h
+ call $L$key_expansion_256a
+ aeskeygenassist xmm1,xmm0,02h
+ call $L$key_expansion_256b
+ aeskeygenassist xmm1,xmm2,04h
+ call $L$key_expansion_256a
+ aeskeygenassist xmm1,xmm0,04h
+ call $L$key_expansion_256b
+ aeskeygenassist xmm1,xmm2,08h
+ call $L$key_expansion_256a
+ aeskeygenassist xmm1,xmm0,08h
+ call $L$key_expansion_256b
+ aeskeygenassist xmm1,xmm2,010h
+ call $L$key_expansion_256a
+ aeskeygenassist xmm1,xmm0,010h
+ call $L$key_expansion_256b
+ aeskeygenassist xmm1,xmm2,020h
+ call $L$key_expansion_256a
+ aeskeygenassist xmm1,xmm0,020h
+ call $L$key_expansion_256b
+ aeskeygenassist xmm1,xmm2,040h
+ call $L$key_expansion_256a
+ movups XMMWORD PTR[rax],xmm0
+ mov DWORD PTR[16+rax],edx
+ xor rax,rax
+ jmp $L$enc_key_ret
+ mov rax,-2
+ add rsp,8
+ DB 0F3h,0C3h ;repret
+ movups XMMWORD PTR[rax],xmm0
+ lea rax,QWORD PTR[16+rax]
+ shufps xmm4,xmm0,16
+ xorps xmm0,xmm4
+ shufps xmm4,xmm0,140
+ xorps xmm0,xmm4
+ shufps xmm1,xmm1,255
+ xorps xmm0,xmm1
+ DB 0F3h,0C3h ;repret
+ movups XMMWORD PTR[rax],xmm0
+ lea rax,QWORD PTR[16+rax]
+ movaps xmm5,xmm2
+ shufps xmm4,xmm0,16
+ movdqa xmm3,xmm2
+ xorps xmm0,xmm4
+ shufps xmm4,xmm0,140
+ pslldq xmm3,4
+ xorps xmm0,xmm4
+ pshufd xmm1,xmm1,85
+ pxor xmm2,xmm3
+ pxor xmm0,xmm1
+ pshufd xmm3,xmm0,255
+ pxor xmm2,xmm3
+ DB 0F3h,0C3h ;repret
+ movaps xmm3,xmm0
+ shufps xmm5,xmm0,68
+ movups XMMWORD PTR[rax],xmm5
+ shufps xmm3,xmm2,78
+ movups XMMWORD PTR[16+rax],xmm3
+ lea rax,QWORD PTR[32+rax]
+ jmp $L$key_expansion_192b_warm
+ movups XMMWORD PTR[rax],xmm2
+ lea rax,QWORD PTR[16+rax]
+ shufps xmm4,xmm0,16
+ xorps xmm0,xmm4
+ shufps xmm4,xmm0,140
+ xorps xmm0,xmm4
+ shufps xmm1,xmm1,255
+ xorps xmm0,xmm1
+ DB 0F3h,0C3h ;repret
+ movups XMMWORD PTR[rax],xmm0
+ lea rax,QWORD PTR[16+rax]
+ shufps xmm4,xmm2,16
+ xorps xmm2,xmm4
+ shufps xmm4,xmm2,140
+ xorps xmm2,xmm4
+ shufps xmm1,xmm1,170
+ xorps xmm2,xmm1
+ DB 0F3h,0C3h ;repret
+aesni_set_encrypt_key ENDP
+DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+ DD 6,6,6,0
+ DD 1,0,0,0
+ DD 087h,0,1,0
+DB 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
+DB 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
+DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
+DB 115,108,46,111,114,103,62,0
+EXTERN __imp_RtlVirtualUnwind:NEAR
+ecb_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+ mov rax,QWORD PTR[152+r8]
+ jmp $L$common_seh_tail
+ecb_se_handler ENDP
+ccm64_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$common_seh_tail
+ mov rax,QWORD PTR[152+r8]
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$common_seh_tail
+ lea rsi,QWORD PTR[rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,8
+ DD 0a548f3fch
+ lea rax,QWORD PTR[88+rax]
+ jmp $L$common_seh_tail
+ccm64_se_handler ENDP
+ctr32_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+ lea r10,QWORD PTR[$L$ctr32_body]
+ cmp rbx,r10
+ jb $L$common_seh_tail
+ mov rax,QWORD PTR[152+r8]
+ lea r10,QWORD PTR[$L$ctr32_ret]
+ cmp rbx,r10
+ jae $L$common_seh_tail
+ lea rsi,QWORD PTR[32+rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,20
+ DD 0a548f3fch
+ lea rax,QWORD PTR[200+rax]
+ jmp $L$common_seh_tail
+ctr32_se_handler ENDP
+xts_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$common_seh_tail
+ mov rax,QWORD PTR[152+r8]
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$common_seh_tail
+ lea rsi,QWORD PTR[96+rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,20
+ DD 0a548f3fch
+ lea rax,QWORD PTR[((104+160))+rax]
+ jmp $L$common_seh_tail
+xts_se_handler ENDP
+cbc_se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+ mov rax,QWORD PTR[152+r8]
+ mov rbx,QWORD PTR[248+r8]
+ lea r10,QWORD PTR[$L$cbc_decrypt]
+ cmp rbx,r10
+ jb $L$common_seh_tail
+ lea r10,QWORD PTR[$L$cbc_decrypt_body]
+ cmp rbx,r10
+ jb $L$restore_cbc_rax
+ lea r10,QWORD PTR[$L$cbc_ret]
+ cmp rbx,r10
+ jae $L$common_seh_tail
+ lea rsi,QWORD PTR[rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,8
+ DD 0a548f3fch
+ lea rax,QWORD PTR[88+rax]
+ jmp $L$common_seh_tail
+ mov rax,QWORD PTR[120+r8]
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+cbc_se_handler ENDP
+.text$ ENDS
+ DD imagerel $L$SEH_begin_aesni_ecb_encrypt
+ DD imagerel $L$SEH_end_aesni_ecb_encrypt
+ DD imagerel $L$SEH_info_ecb
+ DD imagerel $L$SEH_begin_aesni_ccm64_encrypt_blocks
+ DD imagerel $L$SEH_end_aesni_ccm64_encrypt_blocks
+ DD imagerel $L$SEH_info_ccm64_enc
+ DD imagerel $L$SEH_begin_aesni_ccm64_decrypt_blocks
+ DD imagerel $L$SEH_end_aesni_ccm64_decrypt_blocks
+ DD imagerel $L$SEH_info_ccm64_dec
+ DD imagerel $L$SEH_begin_aesni_ctr32_encrypt_blocks
+ DD imagerel $L$SEH_end_aesni_ctr32_encrypt_blocks
+ DD imagerel $L$SEH_info_ctr32
+ DD imagerel $L$SEH_begin_aesni_xts_encrypt
+ DD imagerel $L$SEH_end_aesni_xts_encrypt
+ DD imagerel $L$SEH_info_xts_enc
+ DD imagerel $L$SEH_begin_aesni_xts_decrypt
+ DD imagerel $L$SEH_end_aesni_xts_decrypt
+ DD imagerel $L$SEH_info_xts_dec
+ DD imagerel $L$SEH_begin_aesni_cbc_encrypt
+ DD imagerel $L$SEH_end_aesni_cbc_encrypt
+ DD imagerel $L$SEH_info_cbc
+ DD imagerel aesni_set_decrypt_key
+ DD imagerel $L$SEH_end_set_decrypt_key
+ DD imagerel $L$SEH_info_key
+ DD imagerel aesni_set_encrypt_key
+ DD imagerel $L$SEH_end_set_encrypt_key
+ DD imagerel $L$SEH_info_key
+.pdata ENDS
+DB 9,0,0,0
+ DD imagerel ecb_se_handler
+DB 9,0,0,0
+ DD imagerel ccm64_se_handler
+ DD imagerel $L$ccm64_enc_body,imagerel $L$ccm64_enc_ret
+DB 9,0,0,0
+ DD imagerel ccm64_se_handler
+ DD imagerel $L$ccm64_dec_body,imagerel $L$ccm64_dec_ret
+DB 9,0,0,0
+ DD imagerel ctr32_se_handler
+DB 9,0,0,0
+ DD imagerel xts_se_handler
+ DD imagerel $L$xts_enc_body,imagerel $L$xts_enc_epilogue
+DB 9,0,0,0
+ DD imagerel xts_se_handler
+ DD imagerel $L$xts_dec_body,imagerel $L$xts_dec_epilogue
+DB 9,0,0,0
+ DD imagerel cbc_se_handler
+DB 001h,004h,001h,000h
+DB 004h,002h,000h,000h
+.xdata ENDS
diff --git a/crypto/libressl/crypto/aes/aesni-mingw64-x86_64.S b/crypto/libressl/crypto/aes/aesni-mingw64-x86_64.S
new file mode 100644
index 0000000..0a82a10
--- /dev/null
+++ b/crypto/libressl/crypto/aes/aesni-mingw64-x86_64.S
@@ -0,0 +1,3008 @@
+#include "x86_arch.h"
+.globl aesni_encrypt
+.def aesni_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+ movups (%rcx),%xmm2
+ movl 240(%r8),%eax
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm2
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz .Loop_enc1_1
+ aesenclast %xmm1,%xmm2
+ movups %xmm2,(%rdx)
+ retq
+.globl aesni_decrypt
+.def aesni_decrypt; .scl 2; .type 32; .endef
+.p2align 4
+ movups (%rcx),%xmm2
+ movl 240(%r8),%eax
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm2
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz .Loop_dec1_2
+ aesdeclast %xmm1,%xmm2
+ movups %xmm2,(%rdx)
+ retq
+.def _aesni_encrypt3; .scl 3; .type 32; .endef
+.p2align 4
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop3
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ retq
+.def _aesni_decrypt3; .scl 3; .type 32; .endef
+.p2align 4
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop3
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ retq
+.def _aesni_encrypt4; .scl 3; .type 32; .endef
+.p2align 4
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ xorps %xmm0,%xmm5
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop4
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ aesenclast %xmm0,%xmm5
+ retq
+.def _aesni_decrypt4; .scl 3; .type 32; .endef
+.p2align 4
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ xorps %xmm0,%xmm5
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop4
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ aesdeclast %xmm0,%xmm5
+ retq
+.def _aesni_encrypt6; .scl 3; .type 32; .endef
+.p2align 4
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ aesenc %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesenc %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesenc %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesenc %xmm1,%xmm6
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm7
+ jmp .Lenc_loop6_enter
+.p2align 4
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop6
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ aesenclast %xmm0,%xmm5
+ aesenclast %xmm0,%xmm6
+ aesenclast %xmm0,%xmm7
+ retq
+.def _aesni_decrypt6; .scl 3; .type 32; .endef
+.p2align 4
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ aesdec %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesdec %xmm1,%xmm6
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm7
+ jmp .Ldec_loop6_enter
+.p2align 4
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop6
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ aesdeclast %xmm0,%xmm5
+ aesdeclast %xmm0,%xmm6
+ aesdeclast %xmm0,%xmm7
+ retq
+.def _aesni_encrypt8; .scl 3; .type 32; .endef
+.p2align 4
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ aesenc %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesenc %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesenc %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesenc %xmm1,%xmm6
+ pxor %xmm0,%xmm8
+ aesenc %xmm1,%xmm7
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm8
+ aesenc %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+ jmp .Lenc_loop8_enter
+.p2align 4
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ aesenc %xmm1,%xmm8
+ aesenc %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ aesenc %xmm0,%xmm8
+ aesenc %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop8
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ aesenc %xmm1,%xmm8
+ aesenc %xmm1,%xmm9
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ aesenclast %xmm0,%xmm4
+ aesenclast %xmm0,%xmm5
+ aesenclast %xmm0,%xmm6
+ aesenclast %xmm0,%xmm7
+ aesenclast %xmm0,%xmm8
+ aesenclast %xmm0,%xmm9
+ retq
+.def _aesni_decrypt8; .scl 3; .type 32; .endef
+.p2align 4
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ aesdec %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesdec %xmm1,%xmm6
+ pxor %xmm0,%xmm8
+ aesdec %xmm1,%xmm7
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+ jmp .Ldec_loop8_enter
+.p2align 4
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ aesdec %xmm0,%xmm8
+ aesdec %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop8
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ aesdeclast %xmm0,%xmm2
+ aesdeclast %xmm0,%xmm3
+ aesdeclast %xmm0,%xmm4
+ aesdeclast %xmm0,%xmm5
+ aesdeclast %xmm0,%xmm6
+ aesdeclast %xmm0,%xmm7
+ aesdeclast %xmm0,%xmm8
+ aesdeclast %xmm0,%xmm9
+ retq
+.globl aesni_ecb_encrypt
+.def aesni_ecb_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+ andq $-16,%rdx
+ jz .Lecb_ret
+ movl 240(%rcx),%eax
+ movups (%rcx),%xmm0
+ movq %rcx,%r11
+ movl %eax,%r10d
+ testl %r8d,%r8d
+ jz .Lecb_decrypt
+ cmpq $128,%rdx
+ jb .Lecb_enc_tail
+ movdqu (%rdi),%xmm2
+ movdqu 16(%rdi),%xmm3
+ movdqu 32(%rdi),%xmm4
+ movdqu 48(%rdi),%xmm5
+ movdqu 64(%rdi),%xmm6
+ movdqu 80(%rdi),%xmm7
+ movdqu 96(%rdi),%xmm8
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ subq $128,%rdx
+ jmp .Lecb_enc_loop8_enter
+.p2align 4
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movdqu (%rdi),%xmm2
+ movl %r10d,%eax
+ movups %xmm3,16(%rsi)
+ movdqu 16(%rdi),%xmm3
+ movups %xmm4,32(%rsi)
+ movdqu 32(%rdi),%xmm4
+ movups %xmm5,48(%rsi)
+ movdqu 48(%rdi),%xmm5
+ movups %xmm6,64(%rsi)
+ movdqu 64(%rdi),%xmm6
+ movups %xmm7,80(%rsi)
+ movdqu 80(%rdi),%xmm7
+ movups %xmm8,96(%rsi)
+ movdqu 96(%rdi),%xmm8
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ call _aesni_encrypt8
+ subq $128,%rdx
+ jnc .Lecb_enc_loop8
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movups %xmm3,16(%rsi)
+ movl %r10d,%eax
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ addq $128,%rdx
+ jz .Lecb_ret
+ movups (%rdi),%xmm2
+ cmpq $32,%rdx
+ jb .Lecb_enc_one
+ movups 16(%rdi),%xmm3
+ je .Lecb_enc_two
+ movups 32(%rdi),%xmm4
+ cmpq $64,%rdx
+ jb .Lecb_enc_three
+ movups 48(%rdi),%xmm5
+ je .Lecb_enc_four
+ movups 64(%rdi),%xmm6
+ cmpq $96,%rdx
+ jb .Lecb_enc_five
+ movups 80(%rdi),%xmm7
+ je .Lecb_enc_six
+ movdqu 96(%rdi),%xmm8
+ call _aesni_encrypt8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_3
+ aesenclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+ xorps %xmm4,%xmm4
+ call _aesni_encrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+ call _aesni_encrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+ call _aesni_encrypt4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+ xorps %xmm7,%xmm7
+ call _aesni_encrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+ call _aesni_encrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+ cmpq $128,%rdx
+ jb .Lecb_dec_tail
+ movdqu (%rdi),%xmm2
+ movdqu 16(%rdi),%xmm3
+ movdqu 32(%rdi),%xmm4
+ movdqu 48(%rdi),%xmm5
+ movdqu 64(%rdi),%xmm6
+ movdqu 80(%rdi),%xmm7
+ movdqu 96(%rdi),%xmm8
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ subq $128,%rdx
+ jmp .Lecb_dec_loop8_enter
+.p2align 4
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movdqu (%rdi),%xmm2
+ movl %r10d,%eax
+ movups %xmm3,16(%rsi)
+ movdqu 16(%rdi),%xmm3
+ movups %xmm4,32(%rsi)
+ movdqu 32(%rdi),%xmm4
+ movups %xmm5,48(%rsi)
+ movdqu 48(%rdi),%xmm5
+ movups %xmm6,64(%rsi)
+ movdqu 64(%rdi),%xmm6
+ movups %xmm7,80(%rsi)
+ movdqu 80(%rdi),%xmm7
+ movups %xmm8,96(%rsi)
+ movdqu 96(%rdi),%xmm8
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ call _aesni_decrypt8
+ movups (%r11),%xmm0
+ subq $128,%rdx
+ jnc .Lecb_dec_loop8
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movups %xmm3,16(%rsi)
+ movl %r10d,%eax
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ addq $128,%rdx
+ jz .Lecb_ret
+ movups (%rdi),%xmm2
+ cmpq $32,%rdx
+ jb .Lecb_dec_one
+ movups 16(%rdi),%xmm3
+ je .Lecb_dec_two
+ movups 32(%rdi),%xmm4
+ cmpq $64,%rdx
+ jb .Lecb_dec_three
+ movups 48(%rdi),%xmm5
+ je .Lecb_dec_four
+ movups 64(%rdi),%xmm6
+ cmpq $96,%rdx
+ jb .Lecb_dec_five
+ movups 80(%rdi),%xmm7
+ je .Lecb_dec_six
+ movups 96(%rdi),%xmm8
+ movups (%rcx),%xmm0
+ call _aesni_decrypt8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_4
+ aesdeclast %xmm1,%xmm2
+ movups %xmm2,(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+ xorps %xmm4,%xmm4
+ call _aesni_decrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+ call _aesni_decrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+ call _aesni_decrypt4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+ xorps %xmm7,%xmm7
+ call _aesni_decrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ jmp .Lecb_ret
+.p2align 4
+ call _aesni_decrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.globl aesni_ccm64_encrypt_blocks
+.def aesni_ccm64_encrypt_blocks; .scl 2; .type 32; .endef
+.p2align 4
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+ movq 48(%rsp),%r9
+ leaq -88(%rsp),%rsp
+ movaps %xmm6,(%rsp)
+ movaps %xmm7,16(%rsp)
+ movaps %xmm8,32(%rsp)
+ movaps %xmm9,48(%rsp)
+ movl 240(%rcx),%eax
+ movdqu (%r8),%xmm9
+ movdqa .Lincrement64(%rip),%xmm6
+ movdqa .Lbswap_mask(%rip),%xmm7
+ shrl $1,%eax
+ leaq 0(%rcx),%r11
+ movdqu (%r9),%xmm3
+ movdqa %xmm9,%xmm2
+ movl %eax,%r10d
+.byte 102,68,15,56,0,207
+ jmp .Lccm64_enc_outer
+.p2align 4
+ movups (%r11),%xmm0
+ movl %r10d,%eax
+ movups (%rdi),%xmm8
+ xorps %xmm0,%xmm2
+ movups 16(%r11),%xmm1
+ xorps %xmm8,%xmm0
+ leaq 32(%r11),%rcx
+ xorps %xmm0,%xmm3
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm2
+ decl %eax
+ aesenc %xmm1,%xmm3
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm3
+ movups 0(%rcx),%xmm0
+ jnz .Lccm64_enc2_loop
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ paddq %xmm6,%xmm9
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ decq %rdx
+ leaq 16(%rdi),%rdi
+ xorps %xmm2,%xmm8
+ movdqa %xmm9,%xmm2
+ movups %xmm8,(%rsi)
+ leaq 16(%rsi),%rsi
+.byte 102,15,56,0,215
+ jnz .Lccm64_enc_outer
+ movups %xmm3,(%r9)
+ movaps (%rsp),%xmm6
+ movaps 16(%rsp),%xmm7
+ movaps 32(%rsp),%xmm8
+ movaps 48(%rsp),%xmm9
+ leaq 88(%rsp),%rsp
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.globl aesni_ccm64_decrypt_blocks
+.def aesni_ccm64_decrypt_blocks; .scl 2; .type 32; .endef
+.p2align 4
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+ movq 48(%rsp),%r9
+ leaq -88(%rsp),%rsp
+ movaps %xmm6,(%rsp)
+ movaps %xmm7,16(%rsp)
+ movaps %xmm8,32(%rsp)
+ movaps %xmm9,48(%rsp)
+ movl 240(%rcx),%eax
+ movups (%r8),%xmm9
+ movdqu (%r9),%xmm3
+ movdqa .Lincrement64(%rip),%xmm6
+ movdqa .Lbswap_mask(%rip),%xmm7
+ movaps %xmm9,%xmm2
+ movl %eax,%r10d
+ movq %rcx,%r11
+.byte 102,68,15,56,0,207
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_5
+ aesenclast %xmm1,%xmm2
+ movups (%rdi),%xmm8
+ paddq %xmm6,%xmm9
+ leaq 16(%rdi),%rdi
+ jmp .Lccm64_dec_outer
+.p2align 4
+ xorps %xmm2,%xmm8
+ movdqa %xmm9,%xmm2
+ movl %r10d,%eax
+ movups %xmm8,(%rsi)
+ leaq 16(%rsi),%rsi
+.byte 102,15,56,0,215
+ subq $1,%rdx
+ jz .Lccm64_dec_break
+ movups (%r11),%xmm0
+ shrl $1,%eax
+ movups 16(%r11),%xmm1
+ xorps %xmm0,%xmm8
+ leaq 32(%r11),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm8,%xmm3
+ movups (%rcx),%xmm0
+ aesenc %xmm1,%xmm2
+ decl %eax
+ aesenc %xmm1,%xmm3
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm3
+ movups 0(%rcx),%xmm0
+ jnz .Lccm64_dec2_loop
+ movups (%rdi),%xmm8
+ paddq %xmm6,%xmm9
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ leaq 16(%rdi),%rdi
+ aesenclast %xmm0,%xmm2
+ aesenclast %xmm0,%xmm3
+ jmp .Lccm64_dec_outer
+.p2align 4
+ movups (%r11),%xmm0
+ movups 16(%r11),%xmm1
+ xorps %xmm0,%xmm8
+ leaq 32(%r11),%r11
+ xorps %xmm8,%xmm3
+ aesenc %xmm1,%xmm3
+ decl %eax
+ movups (%r11),%xmm1
+ leaq 16(%r11),%r11
+ jnz .Loop_enc1_6
+ aesenclast %xmm1,%xmm3
+ movups %xmm3,(%r9)
+ movaps (%rsp),%xmm6
+ movaps 16(%rsp),%xmm7
+ movaps 32(%rsp),%xmm8
+ movaps 48(%rsp),%xmm9
+ leaq 88(%rsp),%rsp
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.globl aesni_ctr32_encrypt_blocks
+.def aesni_ctr32_encrypt_blocks; .scl 2; .type 32; .endef
+.p2align 4
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+ leaq -200(%rsp),%rsp
+ movaps %xmm6,32(%rsp)
+ movaps %xmm7,48(%rsp)
+ movaps %xmm8,64(%rsp)
+ movaps %xmm9,80(%rsp)
+ movaps %xmm10,96(%rsp)
+ movaps %xmm11,112(%rsp)
+ movaps %xmm12,128(%rsp)
+ movaps %xmm13,144(%rsp)
+ movaps %xmm14,160(%rsp)
+ movaps %xmm15,176(%rsp)
+ cmpq $1,%rdx
+ je .Lctr32_one_shortcut
+ movdqu (%r8),%xmm14
+ movdqa .Lbswap_mask(%rip),%xmm15
+ xorl %eax,%eax
+.byte 102,69,15,58,22,242,3
+.byte 102,68,15,58,34,240,3
+ movl 240(%rcx),%eax
+ bswapl %r10d
+ pxor %xmm12,%xmm12
+ pxor %xmm13,%xmm13
+.byte 102,69,15,58,34,226,0
+ leaq 3(%r10),%r11
+.byte 102,69,15,58,34,235,0
+ incl %r10d
+.byte 102,69,15,58,34,226,1
+ incq %r11
+.byte 102,69,15,58,34,235,1
+ incl %r10d
+.byte 102,69,15,58,34,226,2
+ incq %r11
+.byte 102,69,15,58,34,235,2
+ movdqa %xmm12,0(%rsp)
+.byte 102,69,15,56,0,231
+ movdqa %xmm13,16(%rsp)
+.byte 102,69,15,56,0,239
+ pshufd $192,%xmm12,%xmm2
+ pshufd $128,%xmm12,%xmm3
+ pshufd $64,%xmm12,%xmm4
+ cmpq $6,%rdx
+ jb .Lctr32_tail
+ shrl $1,%eax
+ movq %rcx,%r11
+ movl %eax,%r10d
+ subq $6,%rdx
+ jmp .Lctr32_loop6
+.p2align 4
+ pshufd $192,%xmm13,%xmm5
+ por %xmm14,%xmm2
+ movups (%r11),%xmm0
+ pshufd $128,%xmm13,%xmm6
+ por %xmm14,%xmm3
+ movups 16(%r11),%xmm1
+ pshufd $64,%xmm13,%xmm7
+ por %xmm14,%xmm4
+ por %xmm14,%xmm5
+ xorps %xmm0,%xmm2
+ por %xmm14,%xmm6
+ por %xmm14,%xmm7
+ pxor %xmm0,%xmm3
+ aesenc %xmm1,%xmm2
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ aesenc %xmm1,%xmm3
+ movdqa .Lincrement32(%rip),%xmm13
+ pxor %xmm0,%xmm5
+ aesenc %xmm1,%xmm4
+ movdqa 0(%rsp),%xmm12
+ pxor %xmm0,%xmm6
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ jmp .Lctr32_enc_loop6_enter
+.p2align 4
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Lctr32_enc_loop6
+ aesenc %xmm1,%xmm2
+ paddd %xmm13,%xmm12
+ aesenc %xmm1,%xmm3
+ paddd 16(%rsp),%xmm13
+ aesenc %xmm1,%xmm4
+ movdqa %xmm12,0(%rsp)
+ aesenc %xmm1,%xmm5
+ movdqa %xmm13,16(%rsp)
+ aesenc %xmm1,%xmm6
+.byte 102,69,15,56,0,231
+ aesenc %xmm1,%xmm7
+.byte 102,69,15,56,0,239
+ aesenclast %xmm0,%xmm2
+ movups (%rdi),%xmm8
+ aesenclast %xmm0,%xmm3
+ movups 16(%rdi),%xmm9
+ aesenclast %xmm0,%xmm4
+ movups 32(%rdi),%xmm10
+ aesenclast %xmm0,%xmm5
+ movups 48(%rdi),%xmm11
+ aesenclast %xmm0,%xmm6
+ movups 64(%rdi),%xmm1
+ aesenclast %xmm0,%xmm7
+ movups 80(%rdi),%xmm0
+ leaq 96(%rdi),%rdi
+ xorps %xmm2,%xmm8
+ pshufd $192,%xmm12,%xmm2
+ xorps %xmm3,%xmm9
+ pshufd $128,%xmm12,%xmm3
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ pshufd $64,%xmm12,%xmm4
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ xorps %xmm6,%xmm1
+ movups %xmm11,48(%rsi)
+ xorps %xmm7,%xmm0
+ movups %xmm1,64(%rsi)
+ movups %xmm0,80(%rsi)
+ leaq 96(%rsi),%rsi
+ movl %r10d,%eax
+ subq $6,%rdx
+ jnc .Lctr32_loop6
+ addq $6,%rdx
+ jz .Lctr32_done
+ movq %r11,%rcx
+ leal 1(%rax,%rax,1),%eax
+ por %xmm14,%xmm2
+ movups (%rdi),%xmm8
+ cmpq $2,%rdx
+ jb .Lctr32_one
+ por %xmm14,%xmm3
+ movups 16(%rdi),%xmm9
+ je .Lctr32_two
+ pshufd $192,%xmm13,%xmm5
+ por %xmm14,%xmm4
+ movups 32(%rdi),%xmm10
+ cmpq $4,%rdx
+ jb .Lctr32_three
+ pshufd $128,%xmm13,%xmm6
+ por %xmm14,%xmm5
+ movups 48(%rdi),%xmm11
+ je .Lctr32_four
+ por %xmm14,%xmm6
+ xorps %xmm7,%xmm7
+ call _aesni_encrypt6
+ movups 64(%rdi),%xmm1
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ xorps %xmm6,%xmm1
+ movups %xmm11,48(%rsi)
+ movups %xmm1,64(%rsi)
+ jmp .Lctr32_done
+.p2align 4
+ movups (%r8),%xmm2
+ movups (%rdi),%xmm8
+ movl 240(%rcx),%eax
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_7
+ aesenclast %xmm1,%xmm2
+ xorps %xmm2,%xmm8
+ movups %xmm8,(%rsi)
+ jmp .Lctr32_done
+.p2align 4
+ xorps %xmm4,%xmm4
+ call _aesni_encrypt3
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ movups %xmm9,16(%rsi)
+ jmp .Lctr32_done
+.p2align 4
+ call _aesni_encrypt3
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ movups %xmm10,32(%rsi)
+ jmp .Lctr32_done
+.p2align 4
+ call _aesni_encrypt4
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ movups %xmm11,48(%rsi)
+ movaps 32(%rsp),%xmm6
+ movaps 48(%rsp),%xmm7
+ movaps 64(%rsp),%xmm8
+ movaps 80(%rsp),%xmm9
+ movaps 96(%rsp),%xmm10
+ movaps 112(%rsp),%xmm11
+ movaps 128(%rsp),%xmm12
+ movaps 144(%rsp),%xmm13
+ movaps 160(%rsp),%xmm14
+ movaps 176(%rsp),%xmm15
+ leaq 200(%rsp),%rsp
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.globl aesni_xts_encrypt
+.def aesni_xts_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+ movq 48(%rsp),%r9
+ leaq -264(%rsp),%rsp
+ movaps %xmm6,96(%rsp)
+ movaps %xmm7,112(%rsp)
+ movaps %xmm8,128(%rsp)
+ movaps %xmm9,144(%rsp)
+ movaps %xmm10,160(%rsp)
+ movaps %xmm11,176(%rsp)
+ movaps %xmm12,192(%rsp)
+ movaps %xmm13,208(%rsp)
+ movaps %xmm14,224(%rsp)
+ movaps %xmm15,240(%rsp)
+ movups (%r9),%xmm15
+ movl 240(%r8),%eax
+ movl 240(%rcx),%r10d
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm15
+ aesenc %xmm1,%xmm15
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz .Loop_enc1_8
+ aesenclast %xmm1,%xmm15
+ movq %rcx,%r11
+ movl %r10d,%eax
+ movq %rdx,%r9
+ andq $-16,%rdx
+ movdqa .Lxts_magic(%rip),%xmm8
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ subq $96,%rdx
+ jc .Lxts_enc_short
+ shrl $1,%eax
+ subl $1,%eax
+ movl %eax,%r10d
+ jmp .Lxts_enc_grandloop
+.p2align 4
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu 0(%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ pxor %xmm12,%xmm4
+ movdqu 80(%rdi),%xmm7
+ leaq 96(%rdi),%rdi
+ pxor %xmm13,%xmm5
+ movups (%r11),%xmm0
+ pxor %xmm14,%xmm6
+ pxor %xmm15,%xmm7
+ movups 16(%r11),%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movdqa %xmm10,0(%rsp)
+ aesenc %xmm1,%xmm2
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ movdqa %xmm11,16(%rsp)
+ aesenc %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ movdqa %xmm12,32(%rsp)
+ aesenc %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ movdqa %xmm13,48(%rsp)
+ aesenc %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ movdqa %xmm14,64(%rsp)
+ aesenc %xmm1,%xmm6
+ movdqa %xmm15,80(%rsp)
+ aesenc %xmm1,%xmm7
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ jmp .Lxts_enc_loop6_enter
+.p2align 4
+ aesenc %xmm1,%xmm2
+ aesenc %xmm1,%xmm3
+ decl %eax
+ aesenc %xmm1,%xmm4
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+ aesenc %xmm0,%xmm2
+ aesenc %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesenc %xmm0,%xmm4
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Lxts_enc_loop6
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ paddq %xmm15,%xmm15
+ aesenc %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesenc %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenc %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ aesenc %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesenc %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenc %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesenc %xmm0,%xmm5
+ aesenc %xmm0,%xmm6
+ aesenc %xmm0,%xmm7
+ movups 32(%rcx),%xmm0
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ aesenc %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesenc %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenc %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesenc %xmm1,%xmm5
+ aesenc %xmm1,%xmm6
+ aesenc %xmm1,%xmm7
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ aesenclast %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesenclast %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesenclast %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesenclast %xmm0,%xmm5
+ aesenclast %xmm0,%xmm6
+ aesenclast %xmm0,%xmm7
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ xorps 0(%rsp),%xmm2
+ pand %xmm8,%xmm9
+ xorps 16(%rsp),%xmm3
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ xorps 32(%rsp),%xmm4
+ movups %xmm2,0(%rsi)
+ xorps 48(%rsp),%xmm5
+ movups %xmm3,16(%rsi)
+ xorps 64(%rsp),%xmm6
+ movups %xmm4,32(%rsi)
+ xorps 80(%rsp),%xmm7
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ subq $96,%rdx
+ jnc .Lxts_enc_grandloop
+ leal 3(%rax,%rax,1),%eax
+ movq %r11,%rcx
+ movl %eax,%r10d
+ addq $96,%rdx
+ jz .Lxts_enc_done
+ cmpq $32,%rdx
+ jb .Lxts_enc_one
+ je .Lxts_enc_two
+ cmpq $64,%rdx
+ jb .Lxts_enc_three
+ je .Lxts_enc_four
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ leaq 80(%rdi),%rdi
+ pxor %xmm12,%xmm4
+ pxor %xmm13,%xmm5
+ pxor %xmm14,%xmm6
+ call _aesni_encrypt6
+ xorps %xmm10,%xmm2
+ movdqa %xmm15,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movdqu %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movdqu %xmm3,16(%rsi)
+ xorps %xmm14,%xmm6
+ movdqu %xmm4,32(%rsi)
+ movdqu %xmm5,48(%rsi)
+ movdqu %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ jmp .Lxts_enc_done
+.p2align 4
+ movups (%rdi),%xmm2
+ leaq 16(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_9
+ aesenclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movdqa %xmm11,%xmm10
+ movups %xmm2,(%rsi)
+ leaq 16(%rsi),%rsi
+ jmp .Lxts_enc_done
+.p2align 4
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ leaq 32(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ call _aesni_encrypt3
+ xorps %xmm10,%xmm2
+ movdqa %xmm12,%xmm10
+ xorps %xmm11,%xmm3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ leaq 32(%rsi),%rsi
+ jmp .Lxts_enc_done
+.p2align 4
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ leaq 48(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ call _aesni_encrypt3
+ xorps %xmm10,%xmm2
+ movdqa %xmm13,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ leaq 48(%rsi),%rsi
+ jmp .Lxts_enc_done
+.p2align 4
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ xorps %xmm10,%xmm2
+ movups 48(%rdi),%xmm5
+ leaq 64(%rdi),%rdi
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ xorps %xmm13,%xmm5
+ call _aesni_encrypt4
+ xorps %xmm10,%xmm2
+ movdqa %xmm15,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ jmp .Lxts_enc_done
+.p2align 4
+ andq $15,%r9
+ jz .Lxts_enc_ret
+ movq %r9,%rdx
+ movzbl (%rdi),%eax
+ movzbl -16(%rsi),%ecx
+ leaq 1(%rdi),%rdi
+ movb %al,-16(%rsi)
+ movb %cl,0(%rsi)
+ leaq 1(%rsi),%rsi
+ subq $1,%rdx
+ jnz .Lxts_enc_steal
+ subq %r9,%rsi
+ movq %r11,%rcx
+ movl %r10d,%eax
+ movups -16(%rsi),%xmm2
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_10
+ aesenclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movups %xmm2,-16(%rsi)
+ movaps 96(%rsp),%xmm6
+ movaps 112(%rsp),%xmm7
+ movaps 128(%rsp),%xmm8
+ movaps 144(%rsp),%xmm9
+ movaps 160(%rsp),%xmm10
+ movaps 176(%rsp),%xmm11
+ movaps 192(%rsp),%xmm12
+ movaps 208(%rsp),%xmm13
+ movaps 224(%rsp),%xmm14
+ movaps 240(%rsp),%xmm15
+ leaq 264(%rsp),%rsp
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.globl aesni_xts_decrypt
+.def aesni_xts_decrypt; .scl 2; .type 32; .endef
+.p2align 4
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+ movq 48(%rsp),%r9
+ leaq -264(%rsp),%rsp
+ movaps %xmm6,96(%rsp)
+ movaps %xmm7,112(%rsp)
+ movaps %xmm8,128(%rsp)
+ movaps %xmm9,144(%rsp)
+ movaps %xmm10,160(%rsp)
+ movaps %xmm11,176(%rsp)
+ movaps %xmm12,192(%rsp)
+ movaps %xmm13,208(%rsp)
+ movaps %xmm14,224(%rsp)
+ movaps %xmm15,240(%rsp)
+ movups (%r9),%xmm15
+ movl 240(%r8),%eax
+ movl 240(%rcx),%r10d
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm15
+ aesenc %xmm1,%xmm15
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz .Loop_enc1_11
+ aesenclast %xmm1,%xmm15
+ xorl %eax,%eax
+ testq $15,%rdx
+ setnz %al
+ shlq $4,%rax
+ subq %rax,%rdx
+ movq %rcx,%r11
+ movl %r10d,%eax
+ movq %rdx,%r9
+ andq $-16,%rdx
+ movdqa .Lxts_magic(%rip),%xmm8
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ subq $96,%rdx
+ jc .Lxts_dec_short
+ shrl $1,%eax
+ subl $1,%eax
+ movl %eax,%r10d
+ jmp .Lxts_dec_grandloop
+.p2align 4
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu 0(%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ pxor %xmm12,%xmm4
+ movdqu 80(%rdi),%xmm7
+ leaq 96(%rdi),%rdi
+ pxor %xmm13,%xmm5
+ movups (%r11),%xmm0
+ pxor %xmm14,%xmm6
+ pxor %xmm15,%xmm7
+ movups 16(%r11),%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movdqa %xmm10,0(%rsp)
+ aesdec %xmm1,%xmm2
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ movdqa %xmm11,16(%rsp)
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ movdqa %xmm12,32(%rsp)
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ movdqa %xmm13,48(%rsp)
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ movdqa %xmm14,64(%rsp)
+ aesdec %xmm1,%xmm6
+ movdqa %xmm15,80(%rsp)
+ aesdec %xmm1,%xmm7
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ jmp .Lxts_dec_loop6_enter
+.p2align 4
+ aesdec %xmm1,%xmm2
+ aesdec %xmm1,%xmm3
+ decl %eax
+ aesdec %xmm1,%xmm4
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+ aesdec %xmm0,%xmm2
+ aesdec %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ aesdec %xmm0,%xmm4
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ jnz .Lxts_dec_loop6
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ paddq %xmm15,%xmm15
+ aesdec %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesdec %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdec %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ movups 16(%rcx),%xmm1
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ aesdec %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesdec %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdec %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesdec %xmm0,%xmm5
+ aesdec %xmm0,%xmm6
+ aesdec %xmm0,%xmm7
+ movups 32(%rcx),%xmm0
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ aesdec %xmm1,%xmm2
+ pand %xmm8,%xmm9
+ aesdec %xmm1,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdec %xmm1,%xmm4
+ pxor %xmm9,%xmm15
+ aesdec %xmm1,%xmm5
+ aesdec %xmm1,%xmm6
+ aesdec %xmm1,%xmm7
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ aesdeclast %xmm0,%xmm2
+ pand %xmm8,%xmm9
+ aesdeclast %xmm0,%xmm3
+ pcmpgtd %xmm15,%xmm14
+ aesdeclast %xmm0,%xmm4
+ pxor %xmm9,%xmm15
+ aesdeclast %xmm0,%xmm5
+ aesdeclast %xmm0,%xmm6
+ aesdeclast %xmm0,%xmm7
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ xorps 0(%rsp),%xmm2
+ pand %xmm8,%xmm9
+ xorps 16(%rsp),%xmm3
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ xorps 32(%rsp),%xmm4
+ movups %xmm2,0(%rsi)
+ xorps 48(%rsp),%xmm5
+ movups %xmm3,16(%rsi)
+ xorps 64(%rsp),%xmm6
+ movups %xmm4,32(%rsi)
+ xorps 80(%rsp),%xmm7
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ subq $96,%rdx
+ jnc .Lxts_dec_grandloop
+ leal 3(%rax,%rax,1),%eax
+ movq %r11,%rcx
+ movl %eax,%r10d
+ addq $96,%rdx
+ jz .Lxts_dec_done
+ cmpq $32,%rdx
+ jb .Lxts_dec_one
+ je .Lxts_dec_two
+ cmpq $64,%rdx
+ jb .Lxts_dec_three
+ je .Lxts_dec_four
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ leaq 80(%rdi),%rdi
+ pxor %xmm12,%xmm4
+ pxor %xmm13,%xmm5
+ pxor %xmm14,%xmm6
+ call _aesni_decrypt6
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movdqu %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movdqu %xmm3,16(%rsi)
+ xorps %xmm14,%xmm6
+ movdqu %xmm4,32(%rsi)
+ pxor %xmm14,%xmm14
+ movdqu %xmm5,48(%rsi)
+ pcmpgtd %xmm15,%xmm14
+ movdqu %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ pshufd $19,%xmm14,%xmm11
+ andq $15,%r9
+ jz .Lxts_dec_ret
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm11
+ pxor %xmm15,%xmm11
+ jmp .Lxts_dec_done2
+.p2align 4
+ movups (%rdi),%xmm2
+ leaq 16(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_12
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movdqa %xmm11,%xmm10
+ movups %xmm2,(%rsi)
+ movdqa %xmm12,%xmm11
+ leaq 16(%rsi),%rsi
+ jmp .Lxts_dec_done
+.p2align 4
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ leaq 32(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ call _aesni_decrypt3
+ xorps %xmm10,%xmm2
+ movdqa %xmm12,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm13,%xmm11
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ leaq 32(%rsi),%rsi
+ jmp .Lxts_dec_done
+.p2align 4
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ leaq 48(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ call _aesni_decrypt3
+ xorps %xmm10,%xmm2
+ movdqa %xmm13,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm15,%xmm11
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ leaq 48(%rsi),%rsi
+ jmp .Lxts_dec_done
+.p2align 4
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movups (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movups 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+ movups 32(%rdi),%xmm4
+ xorps %xmm10,%xmm2
+ movups 48(%rdi),%xmm5
+ leaq 64(%rdi),%rdi
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ xorps %xmm13,%xmm5
+ call _aesni_decrypt4
+ xorps %xmm10,%xmm2
+ movdqa %xmm14,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm15,%xmm11
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ jmp .Lxts_dec_done
+.p2align 4
+ andq $15,%r9
+ jz .Lxts_dec_ret
+ movq %r9,%rdx
+ movq %r11,%rcx
+ movl %r10d,%eax
+ movups (%rdi),%xmm2
+ xorps %xmm11,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_13
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm11,%xmm2
+ movups %xmm2,(%rsi)
+ movzbl 16(%rdi),%eax
+ movzbl (%rsi),%ecx
+ leaq 1(%rdi),%rdi
+ movb %al,(%rsi)
+ movb %cl,16(%rsi)
+ leaq 1(%rsi),%rsi
+ subq $1,%rdx
+ jnz .Lxts_dec_steal
+ subq %r9,%rsi
+ movq %r11,%rcx
+ movl %r10d,%eax
+ movups (%rsi),%xmm2
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_14
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm10,%xmm2
+ movups %xmm2,(%rsi)
+ movaps 96(%rsp),%xmm6
+ movaps 112(%rsp),%xmm7
+ movaps 128(%rsp),%xmm8
+ movaps 144(%rsp),%xmm9
+ movaps 160(%rsp),%xmm10
+ movaps 176(%rsp),%xmm11
+ movaps 192(%rsp),%xmm12
+ movaps 208(%rsp),%xmm13
+ movaps 224(%rsp),%xmm14
+ movaps 240(%rsp),%xmm15
+ leaq 264(%rsp),%rsp
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.globl aesni_cbc_encrypt
+.def aesni_cbc_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+ movq 48(%rsp),%r9
+ testq %rdx,%rdx
+ jz .Lcbc_ret
+ movl 240(%rcx),%r10d
+ movq %rcx,%r11
+ testl %r9d,%r9d
+ jz .Lcbc_decrypt
+ movups (%r8),%xmm2
+ movl %r10d,%eax
+ cmpq $16,%rdx
+ jb .Lcbc_enc_tail
+ subq $16,%rdx
+ jmp .Lcbc_enc_loop
+.p2align 4
+ movups (%rdi),%xmm3
+ leaq 16(%rdi),%rdi
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ xorps %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ xorps %xmm3,%xmm2
+ aesenc %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_15
+ aesenclast %xmm1,%xmm2
+ movl %r10d,%eax
+ movq %r11,%rcx
+ movups %xmm2,0(%rsi)
+ leaq 16(%rsi),%rsi
+ subq $16,%rdx
+ jnc .Lcbc_enc_loop
+ addq $16,%rdx
+ jnz .Lcbc_enc_tail
+ movups %xmm2,(%r8)
+ jmp .Lcbc_ret
+ movq %rdx,%rcx
+ xchgq %rdi,%rsi
+.long 0x9066A4F3
+ movl $16,%ecx
+ subq %rdx,%rcx
+ xorl %eax,%eax
+.long 0x9066AAF3
+ leaq -16(%rdi),%rdi
+ movl %r10d,%eax
+ movq %rdi,%rsi
+ movq %r11,%rcx
+ xorq %rdx,%rdx
+ jmp .Lcbc_enc_loop
+.p2align 4
+ leaq -88(%rsp),%rsp
+ movaps %xmm6,(%rsp)
+ movaps %xmm7,16(%rsp)
+ movaps %xmm8,32(%rsp)
+ movaps %xmm9,48(%rsp)
+ movups (%r8),%xmm9
+ movl %r10d,%eax
+ cmpq $112,%rdx
+ jbe .Lcbc_dec_tail
+ shrl $1,%r10d
+ subq $112,%rdx
+ movl %r10d,%eax
+ movaps %xmm9,64(%rsp)
+ jmp .Lcbc_dec_loop8_enter
+.p2align 4
+ movaps %xmm0,64(%rsp)
+ movups %xmm9,(%rsi)
+ leaq 16(%rsi),%rsi
+ movups (%rcx),%xmm0
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ movdqu 32(%rdi),%xmm4
+ xorps %xmm0,%xmm2
+ movdqu 48(%rdi),%xmm5
+ xorps %xmm0,%xmm3
+ movdqu 64(%rdi),%xmm6
+ aesdec %xmm1,%xmm2
+ pxor %xmm0,%xmm4
+ movdqu 80(%rdi),%xmm7
+ aesdec %xmm1,%xmm3
+ pxor %xmm0,%xmm5
+ movdqu 96(%rdi),%xmm8
+ aesdec %xmm1,%xmm4
+ pxor %xmm0,%xmm6
+ movdqu 112(%rdi),%xmm9
+ aesdec %xmm1,%xmm5
+ pxor %xmm0,%xmm7
+ decl %eax
+ aesdec %xmm1,%xmm6
+ pxor %xmm0,%xmm8
+ aesdec %xmm1,%xmm7
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+ aesdec %xmm1,%xmm8
+ aesdec %xmm1,%xmm9
+ movups 16(%rcx),%xmm1
+ call .Ldec_loop8_enter
+ movups (%rdi),%xmm1
+ movups 16(%rdi),%xmm0
+ xorps 64(%rsp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%rdi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%rdi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%rdi),%xmm1
+ xorps %xmm0,%xmm6
+ movups 80(%rdi),%xmm0
+ xorps %xmm1,%xmm7
+ movups 96(%rdi),%xmm1
+ xorps %xmm0,%xmm8
+ movups 112(%rdi),%xmm0
+ xorps %xmm1,%xmm9
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movq %r11,%rcx
+ movups %xmm7,80(%rsi)
+ leaq 128(%rdi),%rdi
+ movups %xmm8,96(%rsi)
+ leaq 112(%rsi),%rsi
+ subq $128,%rdx
+ ja .Lcbc_dec_loop8
+ movaps %xmm9,%xmm2
+ movaps %xmm0,%xmm9
+ addq $112,%rdx
+ jle .Lcbc_dec_tail_collected
+ movups %xmm2,(%rsi)
+ leal 1(%r10,%r10,1),%eax
+ leaq 16(%rsi),%rsi
+ movups (%rdi),%xmm2
+ movaps %xmm2,%xmm8
+ cmpq $16,%rdx
+ jbe .Lcbc_dec_one
+ movups 16(%rdi),%xmm3
+ movaps %xmm3,%xmm7
+ cmpq $32,%rdx
+ jbe .Lcbc_dec_two
+ movups 32(%rdi),%xmm4
+ movaps %xmm4,%xmm6
+ cmpq $48,%rdx
+ jbe .Lcbc_dec_three
+ movups 48(%rdi),%xmm5
+ cmpq $64,%rdx
+ jbe .Lcbc_dec_four
+ movups 64(%rdi),%xmm6
+ cmpq $80,%rdx
+ jbe .Lcbc_dec_five
+ movups 80(%rdi),%xmm7
+ cmpq $96,%rdx
+ jbe .Lcbc_dec_six
+ movups 96(%rdi),%xmm8
+ movaps %xmm9,64(%rsp)
+ call _aesni_decrypt8
+ movups (%rdi),%xmm1
+ movups 16(%rdi),%xmm0
+ xorps 64(%rsp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%rdi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%rdi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%rdi),%xmm1
+ xorps %xmm0,%xmm6
+ movups 80(%rdi),%xmm0
+ xorps %xmm1,%xmm7
+ movups 96(%rdi),%xmm9
+ xorps %xmm0,%xmm8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ movaps %xmm8,%xmm2
+ subq $112,%rdx
+ jmp .Lcbc_dec_tail_collected
+.p2align 4
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ aesdec %xmm1,%xmm2
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_16
+ aesdeclast %xmm1,%xmm2
+ xorps %xmm9,%xmm2
+ movaps %xmm8,%xmm9
+ subq $16,%rdx
+ jmp .Lcbc_dec_tail_collected
+.p2align 4
+ xorps %xmm4,%xmm4
+ call _aesni_decrypt3
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ movaps %xmm7,%xmm9
+ movaps %xmm3,%xmm2
+ leaq 16(%rsi),%rsi
+ subq $32,%rdx
+ jmp .Lcbc_dec_tail_collected
+.p2align 4
+ call _aesni_decrypt3
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ xorps %xmm7,%xmm4
+ movups %xmm3,16(%rsi)
+ movaps %xmm6,%xmm9
+ movaps %xmm4,%xmm2
+ leaq 32(%rsi),%rsi
+ subq $48,%rdx
+ jmp .Lcbc_dec_tail_collected
+.p2align 4
+ call _aesni_decrypt4
+ xorps %xmm9,%xmm2
+ movups 48(%rdi),%xmm9
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ xorps %xmm7,%xmm4
+ movups %xmm3,16(%rsi)
+ xorps %xmm6,%xmm5
+ movups %xmm4,32(%rsi)
+ movaps %xmm5,%xmm2
+ leaq 48(%rsi),%rsi
+ subq $64,%rdx
+ jmp .Lcbc_dec_tail_collected
+.p2align 4
+ xorps %xmm7,%xmm7
+ call _aesni_decrypt6
+ movups 16(%rdi),%xmm1
+ movups 32(%rdi),%xmm0
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ xorps %xmm1,%xmm4
+ movups 48(%rdi),%xmm1
+ xorps %xmm0,%xmm5
+ movups 64(%rdi),%xmm9
+ xorps %xmm1,%xmm6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ movaps %xmm6,%xmm2
+ subq $80,%rdx
+ jmp .Lcbc_dec_tail_collected
+.p2align 4
+ call _aesni_decrypt6
+ movups 16(%rdi),%xmm1
+ movups 32(%rdi),%xmm0
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ xorps %xmm1,%xmm4
+ movups 48(%rdi),%xmm1
+ xorps %xmm0,%xmm5
+ movups 64(%rdi),%xmm0
+ xorps %xmm1,%xmm6
+ movups 80(%rdi),%xmm9
+ xorps %xmm0,%xmm7
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ movaps %xmm7,%xmm2
+ subq $96,%rdx
+ jmp .Lcbc_dec_tail_collected
+.p2align 4
+ andq $15,%rdx
+ movups %xmm9,(%r8)
+ jnz .Lcbc_dec_tail_partial
+ movups %xmm2,(%rsi)
+ jmp .Lcbc_dec_ret
+.p2align 4
+ movaps %xmm2,64(%rsp)
+ movq $16,%rcx
+ movq %rsi,%rdi
+ subq %rdx,%rcx
+ leaq 64(%rsp),%rsi
+.long 0x9066A4F3
+ movaps (%rsp),%xmm6
+ movaps 16(%rsp),%xmm7
+ movaps 32(%rsp),%xmm8
+ movaps 48(%rsp),%xmm9
+ leaq 88(%rsp),%rsp
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.globl aesni_set_decrypt_key
+.def aesni_set_decrypt_key; .scl 2; .type 32; .endef
+.p2align 4
+ subq $8,%rsp
+ call __aesni_set_encrypt_key
+ shll $4,%edx
+ testl %eax,%eax
+ jnz .Ldec_key_ret
+ leaq 16(%r8,%rdx,1),%rcx
+ movups (%r8),%xmm0
+ movups (%rcx),%xmm1
+ movups %xmm0,(%rcx)
+ movups %xmm1,(%r8)
+ leaq 16(%r8),%r8
+ leaq -16(%rcx),%rcx
+ movups (%r8),%xmm0
+ movups (%rcx),%xmm1
+ aesimc %xmm0,%xmm0
+ aesimc %xmm1,%xmm1
+ leaq 16(%r8),%r8
+ leaq -16(%rcx),%rcx
+ movups %xmm0,16(%rcx)
+ movups %xmm1,-16(%r8)
+ cmpq %r8,%rcx
+ ja .Ldec_key_inverse
+ movups (%r8),%xmm0
+ aesimc %xmm0,%xmm0
+ movups %xmm0,(%rcx)
+ addq $8,%rsp
+ retq
+.globl aesni_set_encrypt_key
+.def aesni_set_encrypt_key; .scl 2; .type 32; .endef
+.p2align 4
+ subq $8,%rsp
+ movq $-1,%rax
+ testq %rcx,%rcx
+ jz .Lenc_key_ret
+ testq %r8,%r8
+ jz .Lenc_key_ret
+ movups (%rcx),%xmm0
+ xorps %xmm4,%xmm4
+ leaq 16(%r8),%rax
+ cmpl $256,%edx
+ je .L14rounds
+ cmpl $192,%edx
+ je .L12rounds
+ cmpl $128,%edx
+ jne .Lbad_keybits
+ movl $9,%edx
+ movups %xmm0,(%r8)
+ aeskeygenassist $1,%xmm0,%xmm1
+ call .Lkey_expansion_128_cold
+ aeskeygenassist $2,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $4,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $8,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $16,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $32,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $64,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $128,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $27,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ aeskeygenassist $54,%xmm0,%xmm1
+ call .Lkey_expansion_128
+ movups %xmm0,(%rax)
+ movl %edx,80(%rax)
+ xorl %eax,%eax
+ jmp .Lenc_key_ret
+.p2align 4
+ movq 16(%rcx),%xmm2
+ movl $11,%edx
+ movups %xmm0,(%r8)
+ aeskeygenassist $1,%xmm2,%xmm1
+ call .Lkey_expansion_192a_cold
+ aeskeygenassist $2,%xmm2,%xmm1
+ call .Lkey_expansion_192b
+ aeskeygenassist $4,%xmm2,%xmm1
+ call .Lkey_expansion_192a
+ aeskeygenassist $8,%xmm2,%xmm1
+ call .Lkey_expansion_192b
+ aeskeygenassist $16,%xmm2,%xmm1
+ call .Lkey_expansion_192a
+ aeskeygenassist $32,%xmm2,%xmm1
+ call .Lkey_expansion_192b
+ aeskeygenassist $64,%xmm2,%xmm1
+ call .Lkey_expansion_192a
+ aeskeygenassist $128,%xmm2,%xmm1
+ call .Lkey_expansion_192b
+ movups %xmm0,(%rax)
+ movl %edx,48(%rax)
+ xorq %rax,%rax
+ jmp .Lenc_key_ret
+.p2align 4
+ movups 16(%rcx),%xmm2
+ movl $13,%edx
+ leaq 16(%rax),%rax
+ movups %xmm0,(%r8)
+ movups %xmm2,16(%r8)
+ aeskeygenassist $1,%xmm2,%xmm1
+ call .Lkey_expansion_256a_cold
+ aeskeygenassist $1,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $2,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $2,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $4,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $4,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $8,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $8,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $16,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $16,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $32,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ aeskeygenassist $32,%xmm0,%xmm1
+ call .Lkey_expansion_256b
+ aeskeygenassist $64,%xmm2,%xmm1
+ call .Lkey_expansion_256a
+ movups %xmm0,(%rax)
+ movl %edx,16(%rax)
+ xorq %rax,%rax
+ jmp .Lenc_key_ret
+.p2align 4
+ movq $-2,%rax
+ addq $8,%rsp
+ retq
+.p2align 4
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ retq
+.p2align 4
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+ movaps %xmm2,%xmm5
+ shufps $16,%xmm0,%xmm4
+ movdqa %xmm2,%xmm3
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ pslldq $4,%xmm3
+ xorps %xmm4,%xmm0
+ pshufd $85,%xmm1,%xmm1
+ pxor %xmm3,%xmm2
+ pxor %xmm1,%xmm0
+ pshufd $255,%xmm0,%xmm3
+ pxor %xmm3,%xmm2
+ retq
+.p2align 4
+ movaps %xmm0,%xmm3
+ shufps $68,%xmm0,%xmm5
+ movups %xmm5,(%rax)
+ shufps $78,%xmm2,%xmm3
+ movups %xmm3,16(%rax)
+ leaq 32(%rax),%rax
+ jmp .Lkey_expansion_192b_warm
+.p2align 4
+ movups %xmm2,(%rax)
+ leaq 16(%rax),%rax
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ retq
+.p2align 4
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+ shufps $16,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $140,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $170,%xmm1,%xmm1
+ xorps %xmm1,%xmm2
+ retq
+.p2align 6
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.long 6,6,6,0
+.long 1,0,0,0
+.long 0x87,0,1,0
+.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align 6
+.def ecb_se_handler; .scl 3; .type 32; .endef
+.p2align 4
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+ movq 152(%r8),%rax
+ jmp .Lcommon_seh_tail
+.def ccm64_se_handler; .scl 3; .type 32; .endef
+.p2align 4
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+ movq 8(%r9),%rsi
+ movq 56(%r9),%r11
+ movl 0(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jb .Lcommon_seh_tail
+ movq 152(%r8),%rax
+ movl 4(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jae .Lcommon_seh_tail
+ leaq 0(%rax),%rsi
+ leaq 512(%r8),%rdi
+ movl $8,%ecx
+.long 0xa548f3fc
+ leaq 88(%rax),%rax
+ jmp .Lcommon_seh_tail
+.def ctr32_se_handler; .scl 3; .type 32; .endef
+.p2align 4
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+ leaq .Lctr32_body(%rip),%r10
+ cmpq %r10,%rbx
+ jb .Lcommon_seh_tail
+ movq 152(%r8),%rax
+ leaq .Lctr32_ret(%rip),%r10
+ cmpq %r10,%rbx
+ jae .Lcommon_seh_tail
+ leaq 32(%rax),%rsi
+ leaq 512(%r8),%rdi
+ movl $20,%ecx
+.long 0xa548f3fc
+ leaq 200(%rax),%rax
+ jmp .Lcommon_seh_tail
+.def xts_se_handler; .scl 3; .type 32; .endef
+.p2align 4
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+ movq 8(%r9),%rsi
+ movq 56(%r9),%r11
+ movl 0(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jb .Lcommon_seh_tail
+ movq 152(%r8),%rax
+ movl 4(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jae .Lcommon_seh_tail
+ leaq 96(%rax),%rsi
+ leaq 512(%r8),%rdi
+ movl $20,%ecx
+.long 0xa548f3fc
+ leaq 104+160(%rax),%rax
+ jmp .Lcommon_seh_tail
+.def cbc_se_handler; .scl 3; .type 32; .endef
+.p2align 4
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+ movq 152(%r8),%rax
+ movq 248(%r8),%rbx
+ leaq .Lcbc_decrypt(%rip),%r10
+ cmpq %r10,%rbx
+ jb .Lcommon_seh_tail
+ leaq .Lcbc_decrypt_body(%rip),%r10
+ cmpq %r10,%rbx
+ jb .Lrestore_cbc_rax
+ leaq .Lcbc_ret(%rip),%r10
+ cmpq %r10,%rbx
+ jae .Lcommon_seh_tail
+ leaq 0(%rax),%rsi
+ leaq 512(%r8),%rdi
+ movl $8,%ecx
+.long 0xa548f3fc
+ leaq 88(%rax),%rax
+ jmp .Lcommon_seh_tail
+ movq 120(%r8),%rax
+ movq 8(%rax),%rdi
+ movq 16(%rax),%rsi
+ movq %rax,152(%r8)
+ movq %rsi,168(%r8)
+ movq %rdi,176(%r8)
+ movq 40(%r9),%rdi
+ movq %r8,%rsi
+ movl $154,%ecx
+.long 0xa548f3fc
+ movq %r9,%rsi
+ xorq %rcx,%rcx
+ movq 8(%rsi),%rdx
+ movq 0(%rsi),%r8
+ movq 16(%rsi),%r9
+ movq 40(%rsi),%r10
+ leaq 56(%rsi),%r11
+ leaq 24(%rsi),%r12
+ movq %r10,32(%rsp)
+ movq %r11,40(%rsp)
+ movq %r12,48(%rsp)
+ movq %rcx,56(%rsp)
+ call *__imp_RtlVirtualUnwind(%rip)
+ movl $1,%eax
+ addq $64,%rsp
+ popfq
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ popq %rdi
+ popq %rsi
+ retq
+.section .pdata
+.p2align 2
+.rva .LSEH_begin_aesni_ecb_encrypt
+.rva .LSEH_end_aesni_ecb_encrypt
+.rva .LSEH_info_ecb
+.rva .LSEH_begin_aesni_ccm64_encrypt_blocks
+.rva .LSEH_end_aesni_ccm64_encrypt_blocks
+.rva .LSEH_info_ccm64_enc
+.rva .LSEH_begin_aesni_ccm64_decrypt_blocks
+.rva .LSEH_end_aesni_ccm64_decrypt_blocks
+.rva .LSEH_info_ccm64_dec
+.rva .LSEH_begin_aesni_ctr32_encrypt_blocks
+.rva .LSEH_end_aesni_ctr32_encrypt_blocks
+.rva .LSEH_info_ctr32
+.rva .LSEH_begin_aesni_xts_encrypt
+.rva .LSEH_end_aesni_xts_encrypt
+.rva .LSEH_info_xts_enc
+.rva .LSEH_begin_aesni_xts_decrypt
+.rva .LSEH_end_aesni_xts_decrypt
+.rva .LSEH_info_xts_dec
+.rva .LSEH_begin_aesni_cbc_encrypt
+.rva .LSEH_end_aesni_cbc_encrypt
+.rva .LSEH_info_cbc
+.rva aesni_set_decrypt_key
+.rva .LSEH_end_set_decrypt_key
+.rva .LSEH_info_key
+.rva aesni_set_encrypt_key
+.rva .LSEH_end_set_encrypt_key
+.rva .LSEH_info_key
+.section .xdata
+.p2align 3
+.byte 9,0,0,0
+.rva ecb_se_handler
+.byte 9,0,0,0
+.rva ccm64_se_handler
+.rva .Lccm64_enc_body,.Lccm64_enc_ret
+.byte 9,0,0,0
+.rva ccm64_se_handler
+.rva .Lccm64_dec_body,.Lccm64_dec_ret
+.byte 9,0,0,0
+.rva ctr32_se_handler
+.byte 9,0,0,0
+.rva xts_se_handler
+.rva .Lxts_enc_body,.Lxts_enc_epilogue
+.byte 9,0,0,0
+.rva xts_se_handler
+.rva .Lxts_dec_body,.Lxts_dec_epilogue
+.byte 9,0,0,0
+.rva cbc_se_handler
+.byte 0x01,0x04,0x01,0x00
+.byte 0x04,0x02,0x00,0x00
diff --git a/crypto/libressl/crypto/aes/aesni-sha1-elf-x86_64.S b/crypto/libressl/crypto/aes/aesni-sha1-elf-x86_64.S
new file mode 100644
index 0000000..c0b3e5f
--- /dev/null
+++ b/crypto/libressl/crypto/aes/aesni-sha1-elf-x86_64.S
@@ -0,0 +1,1401 @@
+#include "x86_arch.h"
+.hidden OPENSSL_ia32cap_P
+.globl aesni_cbc_sha1_enc
+.type aesni_cbc_sha1_enc,@function
+.align 16
+ movl OPENSSL_ia32cap_P+0(%rip),%r10d
+ movl OPENSSL_ia32cap_P+4(%rip),%r11d
+ jmp aesni_cbc_sha1_enc_ssse3
+ retq
+.size aesni_cbc_sha1_enc,.-aesni_cbc_sha1_enc
+.type aesni_cbc_sha1_enc_ssse3,@function
+.align 16
+ movq 8(%rsp),%r10
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -104(%rsp),%rsp
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ movdqu (%r8),%xmm11
+ movq %r8,88(%rsp)
+ shlq $6,%r14
+ subq %r12,%r13
+ movl 240(%r15),%r8d
+ addq %r10,%r14
+ leaq K_XX_XX(%rip),%r11
+ movl 0(%r9),%eax
+ movl 4(%r9),%ebx
+ movl 8(%r9),%ecx
+ movl 12(%r9),%edx
+ movl %ebx,%esi
+ movl 16(%r9),%ebp
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r10),%xmm0
+ movdqu 16(%r10),%xmm1
+ movdqu 32(%r10),%xmm2
+ movdqu 48(%r10),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r10
+.byte 102,15,56,0,206
+.byte 102,15,56,0,214
+.byte 102,15,56,0,222
+ paddd %xmm9,%xmm0
+ paddd %xmm9,%xmm1
+ paddd %xmm9,%xmm2
+ movdqa %xmm0,0(%rsp)
+ psubd %xmm9,%xmm0
+ movdqa %xmm1,16(%rsp)
+ psubd %xmm9,%xmm1
+ movdqa %xmm2,32(%rsp)
+ psubd %xmm9,%xmm2
+ movups (%r15),%xmm13
+ movups 16(%r15),%xmm14
+ jmp .Loop_ssse3
+.align 16
+ movdqa %xmm1,%xmm4
+ addl 0(%rsp),%ebp
+ movups 0(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ xorl %edx,%ecx
+ movdqa %xmm3,%xmm8
+.byte 102,15,58,15,224,8
+ movl %eax,%edi
+ roll $5,%eax
+ paddd %xmm3,%xmm9
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrldq $4,%xmm8
+ xorl %edx,%esi
+ addl %eax,%ebp
+ pxor %xmm0,%xmm4
+ rorl $2,%ebx
+ addl %esi,%ebp
+ pxor %xmm2,%xmm8
+ addl 4(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pxor %xmm8,%xmm4
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm9,48(%rsp)
+ xorl %ecx,%edi
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ addl %ebp,%edx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm4,%xmm8
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 8(%rsp),%ecx
+ xorl %ebx,%eax
+ pslldq $12,%xmm10
+ paddd %xmm4,%xmm4
+ movl %edx,%edi
+ roll $5,%edx
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrld $31,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ movdqa %xmm10,%xmm9
+ rorl $7,%ebp
+ addl %esi,%ecx
+ psrld $30,%xmm10
+ por %xmm8,%xmm4
+ addl 12(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm4
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa 0(%r11),%xmm10
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ pxor %xmm9,%xmm4
+ rorl $7,%edx
+ addl %edi,%ebx
+ movdqa %xmm2,%xmm5
+ addl 16(%rsp),%eax
+ xorl %ebp,%edx
+ movdqa %xmm4,%xmm9
+.byte 102,15,58,15,233,8
+ movl %ebx,%edi
+ roll $5,%ebx
+ paddd %xmm4,%xmm10
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrldq $4,%xmm9
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ pxor %xmm1,%xmm5
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm3,%xmm9
+ addl 20(%rsp),%ebp
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pxor %xmm9,%xmm5
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa %xmm10,0(%rsp)
+ xorl %edx,%edi
+ addl %eax,%ebp
+ movdqa %xmm5,%xmm8
+ movdqa %xmm5,%xmm9
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 24(%rsp),%edx
+ xorl %ecx,%ebx
+ pslldq $12,%xmm8
+ paddd %xmm5,%xmm5
+ movl %ebp,%edi
+ roll $5,%ebp
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ psrld $31,%xmm9
+ xorl %ecx,%esi
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ addl %ebp,%edx
+ movdqa %xmm8,%xmm10
+ rorl $7,%eax
+ addl %esi,%edx
+ psrld $30,%xmm8
+ por %xmm9,%xmm5
+ addl 28(%rsp),%ecx
+ xorl %ebx,%eax
+ movl %edx,%esi
+ roll $5,%edx
+ pslld $2,%xmm10
+ pxor %xmm8,%xmm5
+ andl %eax,%edi
+ xorl %ebx,%eax
+ movdqa 16(%r11),%xmm8
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ pxor %xmm10,%xmm5
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movdqa %xmm3,%xmm6
+ addl 32(%rsp),%ebx
+ xorl %eax,%ebp
+ movdqa %xmm5,%xmm10
+.byte 102,15,58,15,242,8
+ movl %ecx,%edi
+ roll $5,%ecx
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ paddd %xmm5,%xmm8
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ psrldq $4,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ pxor %xmm2,%xmm6
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm4,%xmm10
+ addl 36(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ pxor %xmm10,%xmm6
+ andl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm8,16(%rsp)
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ movdqa %xmm6,%xmm9
+ movdqa %xmm6,%xmm10
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 40(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%ecx
+ pslldq $12,%xmm9
+ paddd %xmm6,%xmm6
+ movl %eax,%edi
+ roll $5,%eax
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrld $31,%xmm10
+ xorl %edx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ psrld $30,%xmm9
+ por %xmm10,%xmm6
+ addl 44(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pslld $2,%xmm8
+ pxor %xmm9,%xmm6
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa 16(%r11),%xmm9
+ xorl %ecx,%edi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %ebp,%edx
+ pxor %xmm8,%xmm6
+ rorl $7,%eax
+ addl %edi,%edx
+ movdqa %xmm4,%xmm7
+ addl 48(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm8
+.byte 102,15,58,15,251,8
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm6,%xmm9
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrldq $4,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ pxor %xmm3,%xmm7
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm5,%xmm8
+ addl 52(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ pxor %xmm8,%xmm7
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm9,32(%rsp)
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ movdqa %xmm7,%xmm10
+ movdqa %xmm7,%xmm8
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 56(%rsp),%eax
+ xorl %ebp,%edx
+ pslldq $12,%xmm10
+ paddd %xmm7,%xmm7
+ movl %ebx,%edi
+ roll $5,%ebx
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrld $31,%xmm8
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ movdqa %xmm10,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ psrld $30,%xmm10
+ por %xmm8,%xmm7
+ addl 60(%rsp),%ebp
+ cmpl $11,%r8d
+ jb .Laesenclast1
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast1
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm7
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa 16(%r11),%xmm10
+ xorl %edx,%edi
+ addl %eax,%ebp
+ pxor %xmm9,%xmm7
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movdqa %xmm7,%xmm9
+ addl 0(%rsp),%edx
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,206,8
+ xorl %ecx,%ebx
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm1,%xmm0
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm7,%xmm10
+ xorl %ecx,%esi
+ movups 16(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,0(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ addl %ebp,%edx
+ pxor %xmm9,%xmm0
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 4(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm0,%xmm9
+ movdqa %xmm10,48(%rsp)
+ movl %edx,%esi
+ roll $5,%edx
+ andl %eax,%edi
+ xorl %ebx,%eax
+ pslld $2,%xmm0
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ psrld $30,%xmm9
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 8(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%edi
+ roll $5,%ecx
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ por %xmm9,%xmm0
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ movdqa %xmm0,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 12(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ andl %edx,%edi
+ xorl %ebp,%edx
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 16(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,215,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm2,%xmm1
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm8,%xmm9
+ paddd %xmm0,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm10,%xmm1
+ addl 20(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm1,%xmm10
+ movdqa %xmm8,0(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm1
+ addl 24(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm10
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm10,%xmm1
+ addl 28(%rsp),%ebx
+ xorl %eax,%edi
+ movdqa %xmm1,%xmm8
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 32(%rsp),%eax
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,192,8
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ pxor %xmm3,%xmm2
+ xorl %edx,%esi
+ addl %ebx,%eax
+ movdqa 32(%r11),%xmm10
+ paddd %xmm1,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm8,%xmm2
+ addl 36(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ movdqa %xmm2,%xmm8
+ movdqa %xmm9,16(%rsp)
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ pslld $2,%xmm2
+ addl 40(%rsp),%edx
+ xorl %ecx,%esi
+ psrld $30,%xmm8
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ por %xmm8,%xmm2
+ addl 44(%rsp),%ecx
+ xorl %ebx,%edi
+ movdqa %xmm2,%xmm9
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 48(%rsp),%ebx
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,201,8
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ pxor %xmm4,%xmm3
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm2,%xmm10
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm9,%xmm3
+ addl 52(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ movdqa %xmm3,%xmm9
+ movdqa %xmm10,32(%rsp)
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ pslld $2,%xmm3
+ addl 56(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ psrld $30,%xmm9
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ por %xmm9,%xmm3
+ addl 60(%rsp),%edx
+ xorl %ecx,%edi
+ movdqa %xmm3,%xmm10
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 0(%rsp),%ecx
+ pxor %xmm0,%xmm4
+.byte 102,68,15,58,15,210,8
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ pxor %xmm5,%xmm4
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ movdqa %xmm8,%xmm9
+ paddd %xmm3,%xmm8
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm10,%xmm4
+ addl 4(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm8,48(%rsp)
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ pslld $2,%xmm4
+ addl 8(%rsp),%eax
+ xorl %ebp,%esi
+ psrld $30,%xmm10
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ por %xmm10,%xmm4
+ addl 12(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movdqa %xmm4,%xmm8
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 16(%rsp),%edx
+ pxor %xmm1,%xmm5
+.byte 102,68,15,58,15,195,8
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm6,%xmm5
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm4,%xmm9
+ rorl $7,%eax
+ addl %esi,%edx
+ pxor %xmm8,%xmm5
+ addl 20(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ movdqa %xmm5,%xmm8
+ movdqa %xmm9,0(%rsp)
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb .Laesenclast2
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast2
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ pslld $2,%xmm5
+ addl 24(%rsp),%ebx
+ xorl %eax,%esi
+ psrld $30,%xmm8
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ por %xmm8,%xmm5
+ addl 28(%rsp),%eax
+ xorl %ebp,%edi
+ movdqa %xmm5,%xmm9
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ecx,%edi
+ movups 32(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,16(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ pxor %xmm2,%xmm6
+.byte 102,68,15,58,15,204,8
+ xorl %edx,%ecx
+ addl 32(%rsp),%ebp
+ andl %edx,%edi
+ pxor %xmm7,%xmm6
+ andl %ecx,%esi
+ rorl $7,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm5,%xmm10
+ addl %edi,%ebp
+ movl %eax,%edi
+ pxor %xmm9,%xmm6
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movdqa %xmm6,%xmm9
+ movdqa %xmm10,16(%rsp)
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 36(%rsp),%edx
+ andl %ecx,%esi
+ pslld $2,%xmm6
+ andl %ebx,%edi
+ rorl $7,%eax
+ psrld $30,%xmm9
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ por %xmm9,%xmm6
+ movl %eax,%edi
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm10
+ addl 40(%rsp),%ecx
+ andl %ebx,%edi
+ andl %eax,%esi
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 44(%rsp),%ebx
+ andl %eax,%esi
+ andl %ebp,%edi
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ rorl $7,%edx
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%edi
+ pxor %xmm3,%xmm7
+.byte 102,68,15,58,15,213,8
+ xorl %ebp,%edx
+ addl 48(%rsp),%eax
+ andl %ebp,%edi
+ pxor %xmm0,%xmm7
+ andl %edx,%esi
+ rorl $7,%ecx
+ movdqa 48(%r11),%xmm9
+ paddd %xmm6,%xmm8
+ addl %edi,%eax
+ movl %ebx,%edi
+ pxor %xmm10,%xmm7
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movdqa %xmm7,%xmm10
+ movdqa %xmm8,32(%rsp)
+ movl %ecx,%esi
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ xorl %edx,%ecx
+ addl 52(%rsp),%ebp
+ andl %edx,%esi
+ pslld $2,%xmm7
+ andl %ecx,%edi
+ rorl $7,%ebx
+ psrld $30,%xmm10
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ por %xmm10,%xmm7
+ movl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm7,%xmm8
+ addl 56(%rsp),%edx
+ andl %ecx,%edi
+ andl %ebx,%esi
+ rorl $7,%eax
+ addl %edi,%edx
+ movl %ebp,%edi
+ roll $5,%ebp
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 60(%rsp),%ecx
+ andl %ebx,%esi
+ andl %eax,%edi
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%edi
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,198,8
+ xorl %eax,%ebp
+ addl 0(%rsp),%ebx
+ andl %eax,%edi
+ pxor %xmm1,%xmm0
+ andl %ebp,%esi
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ rorl $7,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm7,%xmm9
+ addl %edi,%ebx
+ movl %ecx,%edi
+ pxor %xmm8,%xmm0
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movdqa %xmm0,%xmm8
+ movdqa %xmm9,48(%rsp)
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 4(%rsp),%eax
+ andl %ebp,%esi
+ pslld $2,%xmm0
+ andl %edx,%edi
+ rorl $7,%ecx
+ psrld $30,%xmm8
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ por %xmm8,%xmm0
+ movl %ecx,%edi
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%ecx
+ movdqa %xmm0,%xmm9
+ addl 8(%rsp),%ebp
+ andl %edx,%edi
+ andl %ecx,%esi
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movl %eax,%edi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 12(%rsp),%edx
+ andl %ecx,%esi
+ andl %ebx,%edi
+ rorl $7,%eax
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%edi
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,207,8
+ xorl %ebx,%eax
+ addl 16(%rsp),%ecx
+ andl %ebx,%edi
+ pxor %xmm2,%xmm1
+ andl %eax,%esi
+ rorl $7,%ebp
+ movdqa %xmm10,%xmm8
+ paddd %xmm0,%xmm10
+ addl %edi,%ecx
+ movl %edx,%edi
+ pxor %xmm9,%xmm1
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movdqa %xmm1,%xmm9
+ movdqa %xmm10,0(%rsp)
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 20(%rsp),%ebx
+ andl %eax,%esi
+ pslld $2,%xmm1
+ andl %ebp,%edi
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ rorl $7,%edx
+ psrld $30,%xmm9
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ por %xmm9,%xmm1
+ movl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm1,%xmm10
+ addl 24(%rsp),%eax
+ andl %ebp,%edi
+ andl %edx,%esi
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movl %ecx,%esi
+ cmpl $11,%r8d
+ jb .Laesenclast3
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast3
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ xorl %edx,%ecx
+ addl 28(%rsp),%ebp
+ andl %edx,%esi
+ andl %ecx,%edi
+ rorl $7,%ebx
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%edi
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,208,8
+ xorl %ecx,%ebx
+ addl 32(%rsp),%edx
+ andl %ecx,%edi
+ pxor %xmm3,%xmm2
+ andl %ebx,%esi
+ rorl $7,%eax
+ movdqa %xmm8,%xmm9
+ paddd %xmm1,%xmm8
+ addl %edi,%edx
+ movl %ebp,%edi
+ pxor %xmm10,%xmm2
+ roll $5,%ebp
+ movups 48(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,32(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movdqa %xmm2,%xmm10
+ movdqa %xmm8,16(%rsp)
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 36(%rsp),%ecx
+ andl %ebx,%esi
+ pslld $2,%xmm2
+ andl %eax,%edi
+ rorl $7,%ebp
+ psrld $30,%xmm10
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ por %xmm10,%xmm2
+ movl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm2,%xmm8
+ addl 40(%rsp),%ebx
+ andl %eax,%edi
+ andl %ebp,%esi
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ rorl $7,%edx
+ addl %edi,%ebx
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 44(%rsp),%eax
+ andl %ebp,%esi
+ andl %edx,%edi
+ rorl $7,%ecx
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ addl 48(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,193,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm4,%xmm3
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm10
+ paddd %xmm2,%xmm9
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm8,%xmm3
+ addl 52(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm3,%xmm8
+ movdqa %xmm9,32(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm3
+ addl 56(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm8
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm8,%xmm3
+ addl 60(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 0(%rsp),%eax
+ paddd %xmm3,%xmm10
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ movdqa %xmm10,48(%rsp)
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 4(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 8(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 12(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ cmpq %r14,%r10
+ je .Ldone_ssse3
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r10),%xmm0
+ movdqu 16(%r10),%xmm1
+ movdqu 32(%r10),%xmm2
+ movdqu 48(%r10),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r10
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+.byte 102,15,56,0,206
+ movl %ecx,%edi
+ roll $5,%ecx
+ paddd %xmm9,%xmm0
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ movdqa %xmm0,0(%rsp)
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ psubd %xmm9,%xmm0
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+.byte 102,15,56,0,214
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm9,%xmm1
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movdqa %xmm1,16(%rsp)
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ psubd %xmm9,%xmm1
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+.byte 102,15,56,0,222
+ movl %ebp,%edi
+ roll $5,%ebp
+ paddd %xmm9,%xmm2
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ movdqa %xmm2,32(%rsp)
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ psubd %xmm9,%xmm2
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb .Laesenclast4
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast4
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movups %xmm11,48(%r13,%r12,1)
+ leaq 64(%r12),%r12
+ addl 0(%r9),%eax
+ addl 4(%r9),%esi
+ addl 8(%r9),%ecx
+ addl 12(%r9),%edx
+ movl %eax,0(%r9)
+ addl 16(%r9),%ebp
+ movl %esi,4(%r9)
+ movl %esi,%ebx
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movl %ebp,16(%r9)
+ jmp .Loop_ssse3
+.align 16
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb .Laesenclast5
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast5
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movups %xmm11,48(%r13,%r12,1)
+ movq 88(%rsp),%r8
+ addl 0(%r9),%eax
+ addl 4(%r9),%esi
+ addl 8(%r9),%ecx
+ movl %eax,0(%r9)
+ addl 12(%r9),%edx
+ movl %esi,4(%r9)
+ addl 16(%r9),%ebp
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movl %ebp,16(%r9)
+ movups %xmm11,(%r8)
+ leaq 104(%rsp),%rsi
+ movq 0(%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+ retq
+.size aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3
+.align 64
+.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
diff --git a/crypto/libressl/crypto/aes/aesni-sha1-macosx-x86_64.S b/crypto/libressl/crypto/aes/aesni-sha1-macosx-x86_64.S
new file mode 100644
index 0000000..3e88b1a
--- /dev/null
+++ b/crypto/libressl/crypto/aes/aesni-sha1-macosx-x86_64.S
@@ -0,0 +1,1398 @@
+#include "x86_arch.h"
+.private_extern _OPENSSL_ia32cap_P
+.globl _aesni_cbc_sha1_enc
+.p2align 4
+ movl _OPENSSL_ia32cap_P+0(%rip),%r10d
+ movl _OPENSSL_ia32cap_P+4(%rip),%r11d
+ jmp aesni_cbc_sha1_enc_ssse3
+ retq
+.p2align 4
+ movq 8(%rsp),%r10
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -104(%rsp),%rsp
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ movdqu (%r8),%xmm11
+ movq %r8,88(%rsp)
+ shlq $6,%r14
+ subq %r12,%r13
+ movl 240(%r15),%r8d
+ addq %r10,%r14
+ leaq K_XX_XX(%rip),%r11
+ movl 0(%r9),%eax
+ movl 4(%r9),%ebx
+ movl 8(%r9),%ecx
+ movl 12(%r9),%edx
+ movl %ebx,%esi
+ movl 16(%r9),%ebp
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r10),%xmm0
+ movdqu 16(%r10),%xmm1
+ movdqu 32(%r10),%xmm2
+ movdqu 48(%r10),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r10
+.byte 102,15,56,0,206
+.byte 102,15,56,0,214
+.byte 102,15,56,0,222
+ paddd %xmm9,%xmm0
+ paddd %xmm9,%xmm1
+ paddd %xmm9,%xmm2
+ movdqa %xmm0,0(%rsp)
+ psubd %xmm9,%xmm0
+ movdqa %xmm1,16(%rsp)
+ psubd %xmm9,%xmm1
+ movdqa %xmm2,32(%rsp)
+ psubd %xmm9,%xmm2
+ movups (%r15),%xmm13
+ movups 16(%r15),%xmm14
+ jmp L$oop_ssse3
+.p2align 4
+ movdqa %xmm1,%xmm4
+ addl 0(%rsp),%ebp
+ movups 0(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ xorl %edx,%ecx
+ movdqa %xmm3,%xmm8
+.byte 102,15,58,15,224,8
+ movl %eax,%edi
+ roll $5,%eax
+ paddd %xmm3,%xmm9
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrldq $4,%xmm8
+ xorl %edx,%esi
+ addl %eax,%ebp
+ pxor %xmm0,%xmm4
+ rorl $2,%ebx
+ addl %esi,%ebp
+ pxor %xmm2,%xmm8
+ addl 4(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pxor %xmm8,%xmm4
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm9,48(%rsp)
+ xorl %ecx,%edi
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ addl %ebp,%edx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm4,%xmm8
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 8(%rsp),%ecx
+ xorl %ebx,%eax
+ pslldq $12,%xmm10
+ paddd %xmm4,%xmm4
+ movl %edx,%edi
+ roll $5,%edx
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrld $31,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ movdqa %xmm10,%xmm9
+ rorl $7,%ebp
+ addl %esi,%ecx
+ psrld $30,%xmm10
+ por %xmm8,%xmm4
+ addl 12(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm4
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa 0(%r11),%xmm10
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ pxor %xmm9,%xmm4
+ rorl $7,%edx
+ addl %edi,%ebx
+ movdqa %xmm2,%xmm5
+ addl 16(%rsp),%eax
+ xorl %ebp,%edx
+ movdqa %xmm4,%xmm9
+.byte 102,15,58,15,233,8
+ movl %ebx,%edi
+ roll $5,%ebx
+ paddd %xmm4,%xmm10
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrldq $4,%xmm9
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ pxor %xmm1,%xmm5
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm3,%xmm9
+ addl 20(%rsp),%ebp
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pxor %xmm9,%xmm5
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa %xmm10,0(%rsp)
+ xorl %edx,%edi
+ addl %eax,%ebp
+ movdqa %xmm5,%xmm8
+ movdqa %xmm5,%xmm9
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 24(%rsp),%edx
+ xorl %ecx,%ebx
+ pslldq $12,%xmm8
+ paddd %xmm5,%xmm5
+ movl %ebp,%edi
+ roll $5,%ebp
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ psrld $31,%xmm9
+ xorl %ecx,%esi
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ addl %ebp,%edx
+ movdqa %xmm8,%xmm10
+ rorl $7,%eax
+ addl %esi,%edx
+ psrld $30,%xmm8
+ por %xmm9,%xmm5
+ addl 28(%rsp),%ecx
+ xorl %ebx,%eax
+ movl %edx,%esi
+ roll $5,%edx
+ pslld $2,%xmm10
+ pxor %xmm8,%xmm5
+ andl %eax,%edi
+ xorl %ebx,%eax
+ movdqa 16(%r11),%xmm8
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ pxor %xmm10,%xmm5
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movdqa %xmm3,%xmm6
+ addl 32(%rsp),%ebx
+ xorl %eax,%ebp
+ movdqa %xmm5,%xmm10
+.byte 102,15,58,15,242,8
+ movl %ecx,%edi
+ roll $5,%ecx
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ paddd %xmm5,%xmm8
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ psrldq $4,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ pxor %xmm2,%xmm6
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm4,%xmm10
+ addl 36(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ pxor %xmm10,%xmm6
+ andl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm8,16(%rsp)
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ movdqa %xmm6,%xmm9
+ movdqa %xmm6,%xmm10
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 40(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%ecx
+ pslldq $12,%xmm9
+ paddd %xmm6,%xmm6
+ movl %eax,%edi
+ roll $5,%eax
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrld $31,%xmm10
+ xorl %edx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ psrld $30,%xmm9
+ por %xmm10,%xmm6
+ addl 44(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pslld $2,%xmm8
+ pxor %xmm9,%xmm6
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa 16(%r11),%xmm9
+ xorl %ecx,%edi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %ebp,%edx
+ pxor %xmm8,%xmm6
+ rorl $7,%eax
+ addl %edi,%edx
+ movdqa %xmm4,%xmm7
+ addl 48(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm8
+.byte 102,15,58,15,251,8
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm6,%xmm9
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrldq $4,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ pxor %xmm3,%xmm7
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm5,%xmm8
+ addl 52(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ pxor %xmm8,%xmm7
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm9,32(%rsp)
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ movdqa %xmm7,%xmm10
+ movdqa %xmm7,%xmm8
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 56(%rsp),%eax
+ xorl %ebp,%edx
+ pslldq $12,%xmm10
+ paddd %xmm7,%xmm7
+ movl %ebx,%edi
+ roll $5,%ebx
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrld $31,%xmm8
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ movdqa %xmm10,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ psrld $30,%xmm10
+ por %xmm8,%xmm7
+ addl 60(%rsp),%ebp
+ cmpl $11,%r8d
+ jb L$aesenclast1
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je L$aesenclast1
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm7
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa 16(%r11),%xmm10
+ xorl %edx,%edi
+ addl %eax,%ebp
+ pxor %xmm9,%xmm7
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movdqa %xmm7,%xmm9
+ addl 0(%rsp),%edx
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,206,8
+ xorl %ecx,%ebx
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm1,%xmm0
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm7,%xmm10
+ xorl %ecx,%esi
+ movups 16(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,0(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ addl %ebp,%edx
+ pxor %xmm9,%xmm0
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 4(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm0,%xmm9
+ movdqa %xmm10,48(%rsp)
+ movl %edx,%esi
+ roll $5,%edx
+ andl %eax,%edi
+ xorl %ebx,%eax
+ pslld $2,%xmm0
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ psrld $30,%xmm9
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 8(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%edi
+ roll $5,%ecx
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ por %xmm9,%xmm0
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ movdqa %xmm0,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 12(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ andl %edx,%edi
+ xorl %ebp,%edx
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 16(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,215,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm2,%xmm1
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm8,%xmm9
+ paddd %xmm0,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm10,%xmm1
+ addl 20(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm1,%xmm10
+ movdqa %xmm8,0(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm1
+ addl 24(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm10
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm10,%xmm1
+ addl 28(%rsp),%ebx
+ xorl %eax,%edi
+ movdqa %xmm1,%xmm8
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 32(%rsp),%eax
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,192,8
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ pxor %xmm3,%xmm2
+ xorl %edx,%esi
+ addl %ebx,%eax
+ movdqa 32(%r11),%xmm10
+ paddd %xmm1,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm8,%xmm2
+ addl 36(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ movdqa %xmm2,%xmm8
+ movdqa %xmm9,16(%rsp)
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ pslld $2,%xmm2
+ addl 40(%rsp),%edx
+ xorl %ecx,%esi
+ psrld $30,%xmm8
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ por %xmm8,%xmm2
+ addl 44(%rsp),%ecx
+ xorl %ebx,%edi
+ movdqa %xmm2,%xmm9
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 48(%rsp),%ebx
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,201,8
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ pxor %xmm4,%xmm3
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm2,%xmm10
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm9,%xmm3
+ addl 52(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ movdqa %xmm3,%xmm9
+ movdqa %xmm10,32(%rsp)
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ pslld $2,%xmm3
+ addl 56(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ psrld $30,%xmm9
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ por %xmm9,%xmm3
+ addl 60(%rsp),%edx
+ xorl %ecx,%edi
+ movdqa %xmm3,%xmm10
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 0(%rsp),%ecx
+ pxor %xmm0,%xmm4
+.byte 102,68,15,58,15,210,8
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ pxor %xmm5,%xmm4
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ movdqa %xmm8,%xmm9
+ paddd %xmm3,%xmm8
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm10,%xmm4
+ addl 4(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm8,48(%rsp)
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ pslld $2,%xmm4
+ addl 8(%rsp),%eax
+ xorl %ebp,%esi
+ psrld $30,%xmm10
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ por %xmm10,%xmm4
+ addl 12(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movdqa %xmm4,%xmm8
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 16(%rsp),%edx
+ pxor %xmm1,%xmm5
+.byte 102,68,15,58,15,195,8
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm6,%xmm5
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm4,%xmm9
+ rorl $7,%eax
+ addl %esi,%edx
+ pxor %xmm8,%xmm5
+ addl 20(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ movdqa %xmm5,%xmm8
+ movdqa %xmm9,0(%rsp)
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb L$aesenclast2
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je L$aesenclast2
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ pslld $2,%xmm5
+ addl 24(%rsp),%ebx
+ xorl %eax,%esi
+ psrld $30,%xmm8
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ por %xmm8,%xmm5
+ addl 28(%rsp),%eax
+ xorl %ebp,%edi
+ movdqa %xmm5,%xmm9
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ecx,%edi
+ movups 32(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,16(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ pxor %xmm2,%xmm6
+.byte 102,68,15,58,15,204,8
+ xorl %edx,%ecx
+ addl 32(%rsp),%ebp
+ andl %edx,%edi
+ pxor %xmm7,%xmm6
+ andl %ecx,%esi
+ rorl $7,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm5,%xmm10
+ addl %edi,%ebp
+ movl %eax,%edi
+ pxor %xmm9,%xmm6
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movdqa %xmm6,%xmm9
+ movdqa %xmm10,16(%rsp)
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 36(%rsp),%edx
+ andl %ecx,%esi
+ pslld $2,%xmm6
+ andl %ebx,%edi
+ rorl $7,%eax
+ psrld $30,%xmm9
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ por %xmm9,%xmm6
+ movl %eax,%edi
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm10
+ addl 40(%rsp),%ecx
+ andl %ebx,%edi
+ andl %eax,%esi
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 44(%rsp),%ebx
+ andl %eax,%esi
+ andl %ebp,%edi
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ rorl $7,%edx
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%edi
+ pxor %xmm3,%xmm7
+.byte 102,68,15,58,15,213,8
+ xorl %ebp,%edx
+ addl 48(%rsp),%eax
+ andl %ebp,%edi
+ pxor %xmm0,%xmm7
+ andl %edx,%esi
+ rorl $7,%ecx
+ movdqa 48(%r11),%xmm9
+ paddd %xmm6,%xmm8
+ addl %edi,%eax
+ movl %ebx,%edi
+ pxor %xmm10,%xmm7
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movdqa %xmm7,%xmm10
+ movdqa %xmm8,32(%rsp)
+ movl %ecx,%esi
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ xorl %edx,%ecx
+ addl 52(%rsp),%ebp
+ andl %edx,%esi
+ pslld $2,%xmm7
+ andl %ecx,%edi
+ rorl $7,%ebx
+ psrld $30,%xmm10
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ por %xmm10,%xmm7
+ movl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm7,%xmm8
+ addl 56(%rsp),%edx
+ andl %ecx,%edi
+ andl %ebx,%esi
+ rorl $7,%eax
+ addl %edi,%edx
+ movl %ebp,%edi
+ roll $5,%ebp
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 60(%rsp),%ecx
+ andl %ebx,%esi
+ andl %eax,%edi
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%edi
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,198,8
+ xorl %eax,%ebp
+ addl 0(%rsp),%ebx
+ andl %eax,%edi
+ pxor %xmm1,%xmm0
+ andl %ebp,%esi
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ rorl $7,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm7,%xmm9
+ addl %edi,%ebx
+ movl %ecx,%edi
+ pxor %xmm8,%xmm0
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movdqa %xmm0,%xmm8
+ movdqa %xmm9,48(%rsp)
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 4(%rsp),%eax
+ andl %ebp,%esi
+ pslld $2,%xmm0
+ andl %edx,%edi
+ rorl $7,%ecx
+ psrld $30,%xmm8
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ por %xmm8,%xmm0
+ movl %ecx,%edi
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%ecx
+ movdqa %xmm0,%xmm9
+ addl 8(%rsp),%ebp
+ andl %edx,%edi
+ andl %ecx,%esi
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movl %eax,%edi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 12(%rsp),%edx
+ andl %ecx,%esi
+ andl %ebx,%edi
+ rorl $7,%eax
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%edi
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,207,8
+ xorl %ebx,%eax
+ addl 16(%rsp),%ecx
+ andl %ebx,%edi
+ pxor %xmm2,%xmm1
+ andl %eax,%esi
+ rorl $7,%ebp
+ movdqa %xmm10,%xmm8
+ paddd %xmm0,%xmm10
+ addl %edi,%ecx
+ movl %edx,%edi
+ pxor %xmm9,%xmm1
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movdqa %xmm1,%xmm9
+ movdqa %xmm10,0(%rsp)
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 20(%rsp),%ebx
+ andl %eax,%esi
+ pslld $2,%xmm1
+ andl %ebp,%edi
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ rorl $7,%edx
+ psrld $30,%xmm9
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ por %xmm9,%xmm1
+ movl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm1,%xmm10
+ addl 24(%rsp),%eax
+ andl %ebp,%edi
+ andl %edx,%esi
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movl %ecx,%esi
+ cmpl $11,%r8d
+ jb L$aesenclast3
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je L$aesenclast3
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ xorl %edx,%ecx
+ addl 28(%rsp),%ebp
+ andl %edx,%esi
+ andl %ecx,%edi
+ rorl $7,%ebx
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%edi
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,208,8
+ xorl %ecx,%ebx
+ addl 32(%rsp),%edx
+ andl %ecx,%edi
+ pxor %xmm3,%xmm2
+ andl %ebx,%esi
+ rorl $7,%eax
+ movdqa %xmm8,%xmm9
+ paddd %xmm1,%xmm8
+ addl %edi,%edx
+ movl %ebp,%edi
+ pxor %xmm10,%xmm2
+ roll $5,%ebp
+ movups 48(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,32(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movdqa %xmm2,%xmm10
+ movdqa %xmm8,16(%rsp)
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 36(%rsp),%ecx
+ andl %ebx,%esi
+ pslld $2,%xmm2
+ andl %eax,%edi
+ rorl $7,%ebp
+ psrld $30,%xmm10
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ por %xmm10,%xmm2
+ movl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm2,%xmm8
+ addl 40(%rsp),%ebx
+ andl %eax,%edi
+ andl %ebp,%esi
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ rorl $7,%edx
+ addl %edi,%ebx
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 44(%rsp),%eax
+ andl %ebp,%esi
+ andl %edx,%edi
+ rorl $7,%ecx
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ addl 48(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,193,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm4,%xmm3
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm10
+ paddd %xmm2,%xmm9
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm8,%xmm3
+ addl 52(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm3,%xmm8
+ movdqa %xmm9,32(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm3
+ addl 56(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm8
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm8,%xmm3
+ addl 60(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 0(%rsp),%eax
+ paddd %xmm3,%xmm10
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ movdqa %xmm10,48(%rsp)
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 4(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 8(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 12(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ cmpq %r14,%r10
+ je L$done_ssse3
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r10),%xmm0
+ movdqu 16(%r10),%xmm1
+ movdqu 32(%r10),%xmm2
+ movdqu 48(%r10),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r10
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+.byte 102,15,56,0,206
+ movl %ecx,%edi
+ roll $5,%ecx
+ paddd %xmm9,%xmm0
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ movdqa %xmm0,0(%rsp)
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ psubd %xmm9,%xmm0
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+.byte 102,15,56,0,214
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm9,%xmm1
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movdqa %xmm1,16(%rsp)
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ psubd %xmm9,%xmm1
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+.byte 102,15,56,0,222
+ movl %ebp,%edi
+ roll $5,%ebp
+ paddd %xmm9,%xmm2
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ movdqa %xmm2,32(%rsp)
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ psubd %xmm9,%xmm2
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb L$aesenclast4
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je L$aesenclast4
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movups %xmm11,48(%r13,%r12,1)
+ leaq 64(%r12),%r12
+ addl 0(%r9),%eax
+ addl 4(%r9),%esi
+ addl 8(%r9),%ecx
+ addl 12(%r9),%edx
+ movl %eax,0(%r9)
+ addl 16(%r9),%ebp
+ movl %esi,4(%r9)
+ movl %esi,%ebx
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movl %ebp,16(%r9)
+ jmp L$oop_ssse3
+.p2align 4
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb L$aesenclast5
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je L$aesenclast5
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movups %xmm11,48(%r13,%r12,1)
+ movq 88(%rsp),%r8
+ addl 0(%r9),%eax
+ addl 4(%r9),%esi
+ addl 8(%r9),%ecx
+ movl %eax,0(%r9)
+ addl 12(%r9),%edx
+ movl %esi,4(%r9)
+ addl 16(%r9),%ebp
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movl %ebp,16(%r9)
+ movups %xmm11,(%r8)
+ leaq 104(%rsp),%rsi
+ movq 0(%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+ retq
+.p2align 6
+.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align 6
diff --git a/crypto/libressl/crypto/aes/aesni-sha1-masm-x86_64.S b/crypto/libressl/crypto/aes/aesni-sha1-masm-x86_64.S
new file mode 100644
index 0000000..db95881
--- /dev/null
+++ b/crypto/libressl/crypto/aes/aesni-sha1-masm-x86_64.S
@@ -0,0 +1,1616 @@
+; 1 "crypto/aes/aesni-sha1-masm-x86_64.S.tmp"
+; 1 "<built-in>" 1
+; 1 "<built-in>" 3
+; 340 "<built-in>" 3
+; 1 "<command line>" 1
+; 1 "<built-in>" 2
+; 1 "crypto/aes/aesni-sha1-masm-x86_64.S.tmp" 2
+; 1 "./crypto/x86_arch.h" 1
+; 16 "./crypto/x86_arch.h"
+; 40 "./crypto/x86_arch.h"
+; 3 "crypto/aes/aesni-sha1-masm-x86_64.S.tmp" 2
+.text$ SEGMENT ALIGN(64) 'CODE'
+PUBLIC aesni_cbc_sha1_enc
+aesni_cbc_sha1_enc PROC PUBLIC
+ mov r10d,DWORD PTR[((OPENSSL_ia32cap_P+0))]
+ mov r11d,DWORD PTR[((OPENSSL_ia32cap_P+4))]
+ jmp aesni_cbc_sha1_enc_ssse3
+ DB 0F3h,0C3h ;repret
+aesni_cbc_sha1_enc ENDP
+aesni_cbc_sha1_enc_ssse3 PROC PRIVATE
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+ mov r10,QWORD PTR[56+rsp]
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ lea rsp,QWORD PTR[((-264))+rsp]
+ movaps XMMWORD PTR[(96+0)+rsp],xmm6
+ movaps XMMWORD PTR[(96+16)+rsp],xmm7
+ movaps XMMWORD PTR[(96+32)+rsp],xmm8
+ movaps XMMWORD PTR[(96+48)+rsp],xmm9
+ movaps XMMWORD PTR[(96+64)+rsp],xmm10
+ movaps XMMWORD PTR[(96+80)+rsp],xmm11
+ movaps XMMWORD PTR[(96+96)+rsp],xmm12
+ movaps XMMWORD PTR[(96+112)+rsp],xmm13
+ movaps XMMWORD PTR[(96+128)+rsp],xmm14
+ movaps XMMWORD PTR[(96+144)+rsp],xmm15
+ mov r12,rdi
+ mov r13,rsi
+ mov r14,rdx
+ mov r15,rcx
+ movdqu xmm11,XMMWORD PTR[r8]
+ mov QWORD PTR[88+rsp],r8
+ shl r14,6
+ sub r13,r12
+ mov r8d,DWORD PTR[240+r15]
+ add r14,r10
+ lea r11,QWORD PTR[K_XX_XX]
+ mov eax,DWORD PTR[r9]
+ mov ebx,DWORD PTR[4+r9]
+ mov ecx,DWORD PTR[8+r9]
+ mov edx,DWORD PTR[12+r9]
+ mov esi,ebx
+ mov ebp,DWORD PTR[16+r9]
+ movdqa xmm6,XMMWORD PTR[64+r11]
+ movdqa xmm9,XMMWORD PTR[r11]
+ movdqu xmm0,XMMWORD PTR[r10]
+ movdqu xmm1,XMMWORD PTR[16+r10]
+ movdqu xmm2,XMMWORD PTR[32+r10]
+ movdqu xmm3,XMMWORD PTR[48+r10]
+DB 102,15,56,0,198
+ add r10,64
+DB 102,15,56,0,206
+DB 102,15,56,0,214
+DB 102,15,56,0,222
+ paddd xmm0,xmm9
+ paddd xmm1,xmm9
+ paddd xmm2,xmm9
+ movdqa XMMWORD PTR[rsp],xmm0
+ psubd xmm0,xmm9
+ movdqa XMMWORD PTR[16+rsp],xmm1
+ psubd xmm1,xmm9
+ movdqa XMMWORD PTR[32+rsp],xmm2
+ psubd xmm2,xmm9
+ movups xmm13,XMMWORD PTR[r15]
+ movups xmm14,XMMWORD PTR[16+r15]
+ jmp $L$oop_ssse3
+ movdqa xmm4,xmm1
+ add ebp,DWORD PTR[rsp]
+ movups xmm12,XMMWORD PTR[r12]
+ xorps xmm12,xmm13
+ xorps xmm11,xmm12
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[32+r15]
+ xor ecx,edx
+ movdqa xmm8,xmm3
+DB 102,15,58,15,224,8
+ mov edi,eax
+ rol eax,5
+ paddd xmm9,xmm3
+ and esi,ecx
+ xor ecx,edx
+ psrldq xmm8,4
+ xor esi,edx
+ add ebp,eax
+ pxor xmm4,xmm0
+ ror ebx,2
+ add ebp,esi
+ pxor xmm8,xmm2
+ add edx,DWORD PTR[4+rsp]
+ xor ebx,ecx
+ mov esi,ebp
+ rol ebp,5
+ pxor xmm4,xmm8
+ and edi,ebx
+ xor ebx,ecx
+ movdqa XMMWORD PTR[48+rsp],xmm9
+ xor edi,ecx
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[48+r15]
+ add edx,ebp
+ movdqa xmm10,xmm4
+ movdqa xmm8,xmm4
+ ror eax,7
+ add edx,edi
+ add ecx,DWORD PTR[8+rsp]
+ xor eax,ebx
+ pslldq xmm10,12
+ paddd xmm4,xmm4
+ mov edi,edx
+ rol edx,5
+ and esi,eax
+ xor eax,ebx
+ psrld xmm8,31
+ xor esi,ebx
+ add ecx,edx
+ movdqa xmm9,xmm10
+ ror ebp,7
+ add ecx,esi
+ psrld xmm10,30
+ por xmm4,xmm8
+ add ebx,DWORD PTR[12+rsp]
+ xor ebp,eax
+ mov esi,ecx
+ rol ecx,5
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[64+r15]
+ pslld xmm9,2
+ pxor xmm4,xmm10
+ and edi,ebp
+ xor ebp,eax
+ movdqa xmm10,XMMWORD PTR[r11]
+ xor edi,eax
+ add ebx,ecx
+ pxor xmm4,xmm9
+ ror edx,7
+ add ebx,edi
+ movdqa xmm5,xmm2
+ add eax,DWORD PTR[16+rsp]
+ xor edx,ebp
+ movdqa xmm9,xmm4
+DB 102,15,58,15,233,8
+ mov edi,ebx
+ rol ebx,5
+ paddd xmm10,xmm4
+ and esi,edx
+ xor edx,ebp
+ psrldq xmm9,4
+ xor esi,ebp
+ add eax,ebx
+ pxor xmm5,xmm1
+ ror ecx,7
+ add eax,esi
+ pxor xmm9,xmm3
+ add ebp,DWORD PTR[20+rsp]
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[80+r15]
+ xor ecx,edx
+ mov esi,eax
+ rol eax,5
+ pxor xmm5,xmm9
+ and edi,ecx
+ xor ecx,edx
+ movdqa XMMWORD PTR[rsp],xmm10
+ xor edi,edx
+ add ebp,eax
+ movdqa xmm8,xmm5
+ movdqa xmm9,xmm5
+ ror ebx,7
+ add ebp,edi
+ add edx,DWORD PTR[24+rsp]
+ xor ebx,ecx
+ pslldq xmm8,12
+ paddd xmm5,xmm5
+ mov edi,ebp
+ rol ebp,5
+ and esi,ebx
+ xor ebx,ecx
+ psrld xmm9,31
+ xor esi,ecx
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[96+r15]
+ add edx,ebp
+ movdqa xmm10,xmm8
+ ror eax,7
+ add edx,esi
+ psrld xmm8,30
+ por xmm5,xmm9
+ add ecx,DWORD PTR[28+rsp]
+ xor eax,ebx
+ mov esi,edx
+ rol edx,5
+ pslld xmm10,2
+ pxor xmm5,xmm8
+ and edi,eax
+ xor eax,ebx
+ movdqa xmm8,XMMWORD PTR[16+r11]
+ xor edi,ebx
+ add ecx,edx
+ pxor xmm5,xmm10
+ ror ebp,7
+ add ecx,edi
+ movdqa xmm6,xmm3
+ add ebx,DWORD PTR[32+rsp]
+ xor ebp,eax
+ movdqa xmm10,xmm5
+DB 102,15,58,15,242,8
+ mov edi,ecx
+ rol ecx,5
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[112+r15]
+ paddd xmm8,xmm5
+ and esi,ebp
+ xor ebp,eax
+ psrldq xmm10,4
+ xor esi,eax
+ add ebx,ecx
+ pxor xmm6,xmm2
+ ror edx,7
+ add ebx,esi
+ pxor xmm10,xmm4
+ add eax,DWORD PTR[36+rsp]
+ xor edx,ebp
+ mov esi,ebx
+ rol ebx,5
+ pxor xmm6,xmm10
+ and edi,edx
+ xor edx,ebp
+ movdqa XMMWORD PTR[16+rsp],xmm8
+ xor edi,ebp
+ add eax,ebx
+ movdqa xmm9,xmm6
+ movdqa xmm10,xmm6
+ ror ecx,7
+ add eax,edi
+ add ebp,DWORD PTR[40+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[128+r15]
+ xor ecx,edx
+ pslldq xmm9,12
+ paddd xmm6,xmm6
+ mov edi,eax
+ rol eax,5
+ and esi,ecx
+ xor ecx,edx
+ psrld xmm10,31
+ xor esi,edx
+ add ebp,eax
+ movdqa xmm8,xmm9
+ ror ebx,7
+ add ebp,esi
+ psrld xmm9,30
+ por xmm6,xmm10
+ add edx,DWORD PTR[44+rsp]
+ xor ebx,ecx
+ mov esi,ebp
+ rol ebp,5
+ pslld xmm8,2
+ pxor xmm6,xmm9
+ and edi,ebx
+ xor ebx,ecx
+ movdqa xmm9,XMMWORD PTR[16+r11]
+ xor edi,ecx
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[144+r15]
+ add edx,ebp
+ pxor xmm6,xmm8
+ ror eax,7
+ add edx,edi
+ movdqa xmm7,xmm4
+ add ecx,DWORD PTR[48+rsp]
+ xor eax,ebx
+ movdqa xmm8,xmm6
+DB 102,15,58,15,251,8
+ mov edi,edx
+ rol edx,5
+ paddd xmm9,xmm6
+ and esi,eax
+ xor eax,ebx
+ psrldq xmm8,4
+ xor esi,ebx
+ add ecx,edx
+ pxor xmm7,xmm3
+ ror ebp,7
+ add ecx,esi
+ pxor xmm8,xmm5
+ add ebx,DWORD PTR[52+rsp]
+ xor ebp,eax
+ mov esi,ecx
+ rol ecx,5
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[160+r15]
+ pxor xmm7,xmm8
+ and edi,ebp
+ xor ebp,eax
+ movdqa XMMWORD PTR[32+rsp],xmm9
+ xor edi,eax
+ add ebx,ecx
+ movdqa xmm10,xmm7
+ movdqa xmm8,xmm7
+ ror edx,7
+ add ebx,edi
+ add eax,DWORD PTR[56+rsp]
+ xor edx,ebp
+ pslldq xmm10,12
+ paddd xmm7,xmm7
+ mov edi,ebx
+ rol ebx,5
+ and esi,edx
+ xor edx,ebp
+ psrld xmm8,31
+ xor esi,ebp
+ add eax,ebx
+ movdqa xmm9,xmm10
+ ror ecx,7
+ add eax,esi
+ psrld xmm10,30
+ por xmm7,xmm8
+ add ebp,DWORD PTR[60+rsp]
+ cmp r8d,11
+ jb $L$aesenclast1
+ movups xmm14,XMMWORD PTR[176+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[192+r15]
+ aesenc xmm11,xmm14
+ je $L$aesenclast1
+ movups xmm14,XMMWORD PTR[208+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[224+r15]
+ aesenc xmm11,xmm14
+ aesenclast xmm11,xmm15
+ movups xmm14,XMMWORD PTR[16+r15]
+ xor ecx,edx
+ mov esi,eax
+ rol eax,5
+ pslld xmm9,2
+ pxor xmm7,xmm10
+ and edi,ecx
+ xor ecx,edx
+ movdqa xmm10,XMMWORD PTR[16+r11]
+ xor edi,edx
+ add ebp,eax
+ pxor xmm7,xmm9
+ ror ebx,7
+ add ebp,edi
+ movdqa xmm9,xmm7
+ add edx,DWORD PTR[rsp]
+ pxor xmm0,xmm4
+DB 102,68,15,58,15,206,8
+ xor ebx,ecx
+ mov edi,ebp
+ rol ebp,5
+ pxor xmm0,xmm1
+ and esi,ebx
+ xor ebx,ecx
+ movdqa xmm8,xmm10
+ paddd xmm10,xmm7
+ xor esi,ecx
+ movups xmm12,XMMWORD PTR[16+r12]
+ xorps xmm12,xmm13
+ movups XMMWORD PTR[r12*1+r13],xmm11
+ xorps xmm11,xmm12
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[32+r15]
+ add edx,ebp
+ pxor xmm0,xmm9
+ ror eax,7
+ add edx,esi
+ add ecx,DWORD PTR[4+rsp]
+ xor eax,ebx
+ movdqa xmm9,xmm0
+ movdqa XMMWORD PTR[48+rsp],xmm10
+ mov esi,edx
+ rol edx,5
+ and edi,eax
+ xor eax,ebx
+ pslld xmm0,2
+ xor edi,ebx
+ add ecx,edx
+ psrld xmm9,30
+ ror ebp,7
+ add ecx,edi
+ add ebx,DWORD PTR[8+rsp]
+ xor ebp,eax
+ mov edi,ecx
+ rol ecx,5
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[48+r15]
+ por xmm0,xmm9
+ and esi,ebp
+ xor ebp,eax
+ movdqa xmm10,xmm0
+ xor esi,eax
+ add ebx,ecx
+ ror edx,7
+ add ebx,esi
+ add eax,DWORD PTR[12+rsp]
+ xor edx,ebp
+ mov esi,ebx
+ rol ebx,5
+ and edi,edx
+ xor edx,ebp
+ xor edi,ebp
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ add ebp,DWORD PTR[16+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[64+r15]
+ pxor xmm1,xmm5
+DB 102,68,15,58,15,215,8
+ xor esi,edx
+ mov edi,eax
+ rol eax,5
+ pxor xmm1,xmm2
+ xor esi,ecx
+ add ebp,eax
+ movdqa xmm9,xmm8
+ paddd xmm8,xmm0
+ ror ebx,7
+ add ebp,esi
+ pxor xmm1,xmm10
+ add edx,DWORD PTR[20+rsp]
+ xor edi,ecx
+ mov esi,ebp
+ rol ebp,5
+ movdqa xmm10,xmm1
+ movdqa XMMWORD PTR[rsp],xmm8
+ xor edi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,edi
+ pslld xmm1,2
+ add ecx,DWORD PTR[24+rsp]
+ xor esi,ebx
+ psrld xmm10,30
+ mov edi,edx
+ rol edx,5
+ xor esi,eax
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[80+r15]
+ add ecx,edx
+ ror ebp,7
+ add ecx,esi
+ por xmm1,xmm10
+ add ebx,DWORD PTR[28+rsp]
+ xor edi,eax
+ movdqa xmm8,xmm1
+ mov esi,ecx
+ rol ecx,5
+ xor edi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,edi
+ add eax,DWORD PTR[32+rsp]
+ pxor xmm2,xmm6
+DB 102,68,15,58,15,192,8
+ xor esi,ebp
+ mov edi,ebx
+ rol ebx,5
+ pxor xmm2,xmm3
+ xor esi,edx
+ add eax,ebx
+ movdqa xmm10,XMMWORD PTR[32+r11]
+ paddd xmm9,xmm1
+ ror ecx,7
+ add eax,esi
+ pxor xmm2,xmm8
+ add ebp,DWORD PTR[36+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[96+r15]
+ xor edi,edx
+ mov esi,eax
+ rol eax,5
+ movdqa xmm8,xmm2
+ movdqa XMMWORD PTR[16+rsp],xmm9
+ xor edi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,edi
+ pslld xmm2,2
+ add edx,DWORD PTR[40+rsp]
+ xor esi,ecx
+ psrld xmm8,30
+ mov edi,ebp
+ rol ebp,5
+ xor esi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,esi
+ por xmm2,xmm8
+ add ecx,DWORD PTR[44+rsp]
+ xor edi,ebx
+ movdqa xmm9,xmm2
+ mov esi,edx
+ rol edx,5
+ xor edi,eax
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[112+r15]
+ add ecx,edx
+ ror ebp,7
+ add ecx,edi
+ add ebx,DWORD PTR[48+rsp]
+ pxor xmm3,xmm7
+DB 102,68,15,58,15,201,8
+ xor esi,eax
+ mov edi,ecx
+ rol ecx,5
+ pxor xmm3,xmm4
+ xor esi,ebp
+ add ebx,ecx
+ movdqa xmm8,xmm10
+ paddd xmm10,xmm2
+ ror edx,7
+ add ebx,esi
+ pxor xmm3,xmm9
+ add eax,DWORD PTR[52+rsp]
+ xor edi,ebp
+ mov esi,ebx
+ rol ebx,5
+ movdqa xmm9,xmm3
+ movdqa XMMWORD PTR[32+rsp],xmm10
+ xor edi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ pslld xmm3,2
+ add ebp,DWORD PTR[56+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[128+r15]
+ xor esi,edx
+ psrld xmm9,30
+ mov edi,eax
+ rol eax,5
+ xor esi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,esi
+ por xmm3,xmm9
+ add edx,DWORD PTR[60+rsp]
+ xor edi,ecx
+ movdqa xmm10,xmm3
+ mov esi,ebp
+ rol ebp,5
+ xor edi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,edi
+ add ecx,DWORD PTR[rsp]
+ pxor xmm4,xmm0
+DB 102,68,15,58,15,210,8
+ xor esi,ebx
+ mov edi,edx
+ rol edx,5
+ pxor xmm4,xmm5
+ xor esi,eax
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[144+r15]
+ add ecx,edx
+ movdqa xmm9,xmm8
+ paddd xmm8,xmm3
+ ror ebp,7
+ add ecx,esi
+ pxor xmm4,xmm10
+ add ebx,DWORD PTR[4+rsp]
+ xor edi,eax
+ mov esi,ecx
+ rol ecx,5
+ movdqa xmm10,xmm4
+ movdqa XMMWORD PTR[48+rsp],xmm8
+ xor edi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,edi
+ pslld xmm4,2
+ add eax,DWORD PTR[8+rsp]
+ xor esi,ebp
+ psrld xmm10,30
+ mov edi,ebx
+ rol ebx,5
+ xor esi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,esi
+ por xmm4,xmm10
+ add ebp,DWORD PTR[12+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[160+r15]
+ xor edi,edx
+ movdqa xmm8,xmm4
+ mov esi,eax
+ rol eax,5
+ xor edi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,edi
+ add edx,DWORD PTR[16+rsp]
+ pxor xmm5,xmm1
+DB 102,68,15,58,15,195,8
+ xor esi,ecx
+ mov edi,ebp
+ rol ebp,5
+ pxor xmm5,xmm6
+ xor esi,ebx
+ add edx,ebp
+ movdqa xmm10,xmm9
+ paddd xmm9,xmm4
+ ror eax,7
+ add edx,esi
+ pxor xmm5,xmm8
+ add ecx,DWORD PTR[20+rsp]
+ xor edi,ebx
+ mov esi,edx
+ rol edx,5
+ movdqa xmm8,xmm5
+ movdqa XMMWORD PTR[rsp],xmm9
+ xor edi,eax
+ cmp r8d,11
+ jb $L$aesenclast2
+ movups xmm14,XMMWORD PTR[176+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[192+r15]
+ aesenc xmm11,xmm14
+ je $L$aesenclast2
+ movups xmm14,XMMWORD PTR[208+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[224+r15]
+ aesenc xmm11,xmm14
+ aesenclast xmm11,xmm15
+ movups xmm14,XMMWORD PTR[16+r15]
+ add ecx,edx
+ ror ebp,7
+ add ecx,edi
+ pslld xmm5,2
+ add ebx,DWORD PTR[24+rsp]
+ xor esi,eax
+ psrld xmm8,30
+ mov edi,ecx
+ rol ecx,5
+ xor esi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,esi
+ por xmm5,xmm8
+ add eax,DWORD PTR[28+rsp]
+ xor edi,ebp
+ movdqa xmm9,xmm5
+ mov esi,ebx
+ rol ebx,5
+ xor edi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ mov edi,ecx
+ movups xmm12,XMMWORD PTR[32+r12]
+ xorps xmm12,xmm13
+ movups XMMWORD PTR[16+r12*1+r13],xmm11
+ xorps xmm11,xmm12
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[32+r15]
+ pxor xmm6,xmm2
+DB 102,68,15,58,15,204,8
+ xor ecx,edx
+ add ebp,DWORD PTR[32+rsp]
+ and edi,edx
+ pxor xmm6,xmm7
+ and esi,ecx
+ ror ebx,7
+ movdqa xmm8,xmm10
+ paddd xmm10,xmm5
+ add ebp,edi
+ mov edi,eax
+ pxor xmm6,xmm9
+ rol eax,5
+ add ebp,esi
+ xor ecx,edx
+ add ebp,eax
+ movdqa xmm9,xmm6
+ movdqa XMMWORD PTR[16+rsp],xmm10
+ mov esi,ebx
+ xor ebx,ecx
+ add edx,DWORD PTR[36+rsp]
+ and esi,ecx
+ pslld xmm6,2
+ and edi,ebx
+ ror eax,7
+ psrld xmm9,30
+ add edx,esi
+ mov esi,ebp
+ rol ebp,5
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[48+r15]
+ add edx,edi
+ xor ebx,ecx
+ add edx,ebp
+ por xmm6,xmm9
+ mov edi,eax
+ xor eax,ebx
+ movdqa xmm10,xmm6
+ add ecx,DWORD PTR[40+rsp]
+ and edi,ebx
+ and esi,eax
+ ror ebp,7
+ add ecx,edi
+ mov edi,edx
+ rol edx,5
+ add ecx,esi
+ xor eax,ebx
+ add ecx,edx
+ mov esi,ebp
+ xor ebp,eax
+ add ebx,DWORD PTR[44+rsp]
+ and esi,eax
+ and edi,ebp
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[64+r15]
+ ror edx,7
+ add ebx,esi
+ mov esi,ecx
+ rol ecx,5
+ add ebx,edi
+ xor ebp,eax
+ add ebx,ecx
+ mov edi,edx
+ pxor xmm7,xmm3
+DB 102,68,15,58,15,213,8
+ xor edx,ebp
+ add eax,DWORD PTR[48+rsp]
+ and edi,ebp
+ pxor xmm7,xmm0
+ and esi,edx
+ ror ecx,7
+ movdqa xmm9,XMMWORD PTR[48+r11]
+ paddd xmm8,xmm6
+ add eax,edi
+ mov edi,ebx
+ pxor xmm7,xmm10
+ rol ebx,5
+ add eax,esi
+ xor edx,ebp
+ add eax,ebx
+ movdqa xmm10,xmm7
+ movdqa XMMWORD PTR[32+rsp],xmm8
+ mov esi,ecx
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[80+r15]
+ xor ecx,edx
+ add ebp,DWORD PTR[52+rsp]
+ and esi,edx
+ pslld xmm7,2
+ and edi,ecx
+ ror ebx,7
+ psrld xmm10,30
+ add ebp,esi
+ mov esi,eax
+ rol eax,5
+ add ebp,edi
+ xor ecx,edx
+ add ebp,eax
+ por xmm7,xmm10
+ mov edi,ebx
+ xor ebx,ecx
+ movdqa xmm8,xmm7
+ add edx,DWORD PTR[56+rsp]
+ and edi,ecx
+ and esi,ebx
+ ror eax,7
+ add edx,edi
+ mov edi,ebp
+ rol ebp,5
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[96+r15]
+ add edx,esi
+ xor ebx,ecx
+ add edx,ebp
+ mov esi,eax
+ xor eax,ebx
+ add ecx,DWORD PTR[60+rsp]
+ and esi,ebx
+ and edi,eax
+ ror ebp,7
+ add ecx,esi
+ mov esi,edx
+ rol edx,5
+ add ecx,edi
+ xor eax,ebx
+ add ecx,edx
+ mov edi,ebp
+ pxor xmm0,xmm4
+DB 102,68,15,58,15,198,8
+ xor ebp,eax
+ add ebx,DWORD PTR[rsp]
+ and edi,eax
+ pxor xmm0,xmm1
+ and esi,ebp
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[112+r15]
+ ror edx,7
+ movdqa xmm10,xmm9
+ paddd xmm9,xmm7
+ add ebx,edi
+ mov edi,ecx
+ pxor xmm0,xmm8
+ rol ecx,5
+ add ebx,esi
+ xor ebp,eax
+ add ebx,ecx
+ movdqa xmm8,xmm0
+ movdqa XMMWORD PTR[48+rsp],xmm9
+ mov esi,edx
+ xor edx,ebp
+ add eax,DWORD PTR[4+rsp]
+ and esi,ebp
+ pslld xmm0,2
+ and edi,edx
+ ror ecx,7
+ psrld xmm8,30
+ add eax,esi
+ mov esi,ebx
+ rol ebx,5
+ add eax,edi
+ xor edx,ebp
+ add eax,ebx
+ por xmm0,xmm8
+ mov edi,ecx
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[128+r15]
+ xor ecx,edx
+ movdqa xmm9,xmm0
+ add ebp,DWORD PTR[8+rsp]
+ and edi,edx
+ and esi,ecx
+ ror ebx,7
+ add ebp,edi
+ mov edi,eax
+ rol eax,5
+ add ebp,esi
+ xor ecx,edx
+ add ebp,eax
+ mov esi,ebx
+ xor ebx,ecx
+ add edx,DWORD PTR[12+rsp]
+ and esi,ecx
+ and edi,ebx
+ ror eax,7
+ add edx,esi
+ mov esi,ebp
+ rol ebp,5
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[144+r15]
+ add edx,edi
+ xor ebx,ecx
+ add edx,ebp
+ mov edi,eax
+ pxor xmm1,xmm5
+DB 102,68,15,58,15,207,8
+ xor eax,ebx
+ add ecx,DWORD PTR[16+rsp]
+ and edi,ebx
+ pxor xmm1,xmm2
+ and esi,eax
+ ror ebp,7
+ movdqa xmm8,xmm10
+ paddd xmm10,xmm0
+ add ecx,edi
+ mov edi,edx
+ pxor xmm1,xmm9
+ rol edx,5
+ add ecx,esi
+ xor eax,ebx
+ add ecx,edx
+ movdqa xmm9,xmm1
+ movdqa XMMWORD PTR[rsp],xmm10
+ mov esi,ebp
+ xor ebp,eax
+ add ebx,DWORD PTR[20+rsp]
+ and esi,eax
+ pslld xmm1,2
+ and edi,ebp
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[160+r15]
+ ror edx,7
+ psrld xmm9,30
+ add ebx,esi
+ mov esi,ecx
+ rol ecx,5
+ add ebx,edi
+ xor ebp,eax
+ add ebx,ecx
+ por xmm1,xmm9
+ mov edi,edx
+ xor edx,ebp
+ movdqa xmm10,xmm1
+ add eax,DWORD PTR[24+rsp]
+ and edi,ebp
+ and esi,edx
+ ror ecx,7
+ add eax,edi
+ mov edi,ebx
+ rol ebx,5
+ add eax,esi
+ xor edx,ebp
+ add eax,ebx
+ mov esi,ecx
+ cmp r8d,11
+ jb $L$aesenclast3
+ movups xmm14,XMMWORD PTR[176+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[192+r15]
+ aesenc xmm11,xmm14
+ je $L$aesenclast3
+ movups xmm14,XMMWORD PTR[208+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[224+r15]
+ aesenc xmm11,xmm14
+ aesenclast xmm11,xmm15
+ movups xmm14,XMMWORD PTR[16+r15]
+ xor ecx,edx
+ add ebp,DWORD PTR[28+rsp]
+ and esi,edx
+ and edi,ecx
+ ror ebx,7
+ add ebp,esi
+ mov esi,eax
+ rol eax,5
+ add ebp,edi
+ xor ecx,edx
+ add ebp,eax
+ mov edi,ebx
+ pxor xmm2,xmm6
+DB 102,68,15,58,15,208,8
+ xor ebx,ecx
+ add edx,DWORD PTR[32+rsp]
+ and edi,ecx
+ pxor xmm2,xmm3
+ and esi,ebx
+ ror eax,7
+ movdqa xmm9,xmm8
+ paddd xmm8,xmm1
+ add edx,edi
+ mov edi,ebp
+ pxor xmm2,xmm10
+ rol ebp,5
+ movups xmm12,XMMWORD PTR[48+r12]
+ xorps xmm12,xmm13
+ movups XMMWORD PTR[32+r12*1+r13],xmm11
+ xorps xmm11,xmm12
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[32+r15]
+ add edx,esi
+ xor ebx,ecx
+ add edx,ebp
+ movdqa xmm10,xmm2
+ movdqa XMMWORD PTR[16+rsp],xmm8
+ mov esi,eax
+ xor eax,ebx
+ add ecx,DWORD PTR[36+rsp]
+ and esi,ebx
+ pslld xmm2,2
+ and edi,eax
+ ror ebp,7
+ psrld xmm10,30
+ add ecx,esi
+ mov esi,edx
+ rol edx,5
+ add ecx,edi
+ xor eax,ebx
+ add ecx,edx
+ por xmm2,xmm10
+ mov edi,ebp
+ xor ebp,eax
+ movdqa xmm8,xmm2
+ add ebx,DWORD PTR[40+rsp]
+ and edi,eax
+ and esi,ebp
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[48+r15]
+ ror edx,7
+ add ebx,edi
+ mov edi,ecx
+ rol ecx,5
+ add ebx,esi
+ xor ebp,eax
+ add ebx,ecx
+ mov esi,edx
+ xor edx,ebp
+ add eax,DWORD PTR[44+rsp]
+ and esi,ebp
+ and edi,edx
+ ror ecx,7
+ add eax,esi
+ mov esi,ebx
+ rol ebx,5
+ add eax,edi
+ xor edx,ebp
+ add eax,ebx
+ add ebp,DWORD PTR[48+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[64+r15]
+ pxor xmm3,xmm7
+DB 102,68,15,58,15,193,8
+ xor esi,edx
+ mov edi,eax
+ rol eax,5
+ pxor xmm3,xmm4
+ xor esi,ecx
+ add ebp,eax
+ movdqa xmm10,xmm9
+ paddd xmm9,xmm2
+ ror ebx,7
+ add ebp,esi
+ pxor xmm3,xmm8
+ add edx,DWORD PTR[52+rsp]
+ xor edi,ecx
+ mov esi,ebp
+ rol ebp,5
+ movdqa xmm8,xmm3
+ movdqa XMMWORD PTR[32+rsp],xmm9
+ xor edi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,edi
+ pslld xmm3,2
+ add ecx,DWORD PTR[56+rsp]
+ xor esi,ebx
+ psrld xmm8,30
+ mov edi,edx
+ rol edx,5
+ xor esi,eax
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[80+r15]
+ add ecx,edx
+ ror ebp,7
+ add ecx,esi
+ por xmm3,xmm8
+ add ebx,DWORD PTR[60+rsp]
+ xor edi,eax
+ mov esi,ecx
+ rol ecx,5
+ xor edi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,edi
+ add eax,DWORD PTR[rsp]
+ paddd xmm10,xmm3
+ xor esi,ebp
+ mov edi,ebx
+ rol ebx,5
+ xor esi,edx
+ movdqa XMMWORD PTR[48+rsp],xmm10
+ add eax,ebx
+ ror ecx,7
+ add eax,esi
+ add ebp,DWORD PTR[4+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[96+r15]
+ xor edi,edx
+ mov esi,eax
+ rol eax,5
+ xor edi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,edi
+ add edx,DWORD PTR[8+rsp]
+ xor esi,ecx
+ mov edi,ebp
+ rol ebp,5
+ xor esi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,esi
+ add ecx,DWORD PTR[12+rsp]
+ xor edi,ebx
+ mov esi,edx
+ rol edx,5
+ xor edi,eax
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[112+r15]
+ add ecx,edx
+ ror ebp,7
+ add ecx,edi
+ cmp r10,r14
+ je $L$done_ssse3
+ movdqa xmm6,XMMWORD PTR[64+r11]
+ movdqa xmm9,XMMWORD PTR[r11]
+ movdqu xmm0,XMMWORD PTR[r10]
+ movdqu xmm1,XMMWORD PTR[16+r10]
+ movdqu xmm2,XMMWORD PTR[32+r10]
+ movdqu xmm3,XMMWORD PTR[48+r10]
+DB 102,15,56,0,198
+ add r10,64
+ add ebx,DWORD PTR[16+rsp]
+ xor esi,eax
+DB 102,15,56,0,206
+ mov edi,ecx
+ rol ecx,5
+ paddd xmm0,xmm9
+ xor esi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,esi
+ movdqa XMMWORD PTR[rsp],xmm0
+ add eax,DWORD PTR[20+rsp]
+ xor edi,ebp
+ psubd xmm0,xmm9
+ mov esi,ebx
+ rol ebx,5
+ xor edi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ add ebp,DWORD PTR[24+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[128+r15]
+ xor esi,edx
+ mov edi,eax
+ rol eax,5
+ xor esi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,esi
+ add edx,DWORD PTR[28+rsp]
+ xor edi,ecx
+ mov esi,ebp
+ rol ebp,5
+ xor edi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,edi
+ add ecx,DWORD PTR[32+rsp]
+ xor esi,ebx
+DB 102,15,56,0,214
+ mov edi,edx
+ rol edx,5
+ paddd xmm1,xmm9
+ xor esi,eax
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[144+r15]
+ add ecx,edx
+ ror ebp,7
+ add ecx,esi
+ movdqa XMMWORD PTR[16+rsp],xmm1
+ add ebx,DWORD PTR[36+rsp]
+ xor edi,eax
+ psubd xmm1,xmm9
+ mov esi,ecx
+ rol ecx,5
+ xor edi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,edi
+ add eax,DWORD PTR[40+rsp]
+ xor esi,ebp
+ mov edi,ebx
+ rol ebx,5
+ xor esi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,esi
+ add ebp,DWORD PTR[44+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[160+r15]
+ xor edi,edx
+ mov esi,eax
+ rol eax,5
+ xor edi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,edi
+ add edx,DWORD PTR[48+rsp]
+ xor esi,ecx
+DB 102,15,56,0,222
+ mov edi,ebp
+ rol ebp,5
+ paddd xmm2,xmm9
+ xor esi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,esi
+ movdqa XMMWORD PTR[32+rsp],xmm2
+ add ecx,DWORD PTR[52+rsp]
+ xor edi,ebx
+ psubd xmm2,xmm9
+ mov esi,edx
+ rol edx,5
+ xor edi,eax
+ cmp r8d,11
+ jb $L$aesenclast4
+ movups xmm14,XMMWORD PTR[176+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[192+r15]
+ aesenc xmm11,xmm14
+ je $L$aesenclast4
+ movups xmm14,XMMWORD PTR[208+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[224+r15]
+ aesenc xmm11,xmm14
+ aesenclast xmm11,xmm15
+ movups xmm14,XMMWORD PTR[16+r15]
+ add ecx,edx
+ ror ebp,7
+ add ecx,edi
+ add ebx,DWORD PTR[56+rsp]
+ xor esi,eax
+ mov edi,ecx
+ rol ecx,5
+ xor esi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,esi
+ add eax,DWORD PTR[60+rsp]
+ xor edi,ebp
+ mov esi,ebx
+ rol ebx,5
+ xor edi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ movups XMMWORD PTR[48+r12*1+r13],xmm11
+ lea r12,QWORD PTR[64+r12]
+ add eax,DWORD PTR[r9]
+ add esi,DWORD PTR[4+r9]
+ add ecx,DWORD PTR[8+r9]
+ add edx,DWORD PTR[12+r9]
+ mov DWORD PTR[r9],eax
+ add ebp,DWORD PTR[16+r9]
+ mov DWORD PTR[4+r9],esi
+ mov ebx,esi
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+ mov DWORD PTR[16+r9],ebp
+ jmp $L$oop_ssse3
+ add ebx,DWORD PTR[16+rsp]
+ xor esi,eax
+ mov edi,ecx
+ rol ecx,5
+ xor esi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,esi
+ add eax,DWORD PTR[20+rsp]
+ xor edi,ebp
+ mov esi,ebx
+ rol ebx,5
+ xor edi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ add ebp,DWORD PTR[24+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[128+r15]
+ xor esi,edx
+ mov edi,eax
+ rol eax,5
+ xor esi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,esi
+ add edx,DWORD PTR[28+rsp]
+ xor edi,ecx
+ mov esi,ebp
+ rol ebp,5
+ xor edi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,edi
+ add ecx,DWORD PTR[32+rsp]
+ xor esi,ebx
+ mov edi,edx
+ rol edx,5
+ xor esi,eax
+ aesenc xmm11,xmm15
+ movups xmm14,XMMWORD PTR[144+r15]
+ add ecx,edx
+ ror ebp,7
+ add ecx,esi
+ add ebx,DWORD PTR[36+rsp]
+ xor edi,eax
+ mov esi,ecx
+ rol ecx,5
+ xor edi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,edi
+ add eax,DWORD PTR[40+rsp]
+ xor esi,ebp
+ mov edi,ebx
+ rol ebx,5
+ xor esi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,esi
+ add ebp,DWORD PTR[44+rsp]
+ aesenc xmm11,xmm14
+ movups xmm15,XMMWORD PTR[160+r15]
+ xor edi,edx
+ mov esi,eax
+ rol eax,5
+ xor edi,ecx
+ add ebp,eax
+ ror ebx,7
+ add ebp,edi
+ add edx,DWORD PTR[48+rsp]
+ xor esi,ecx
+ mov edi,ebp
+ rol ebp,5
+ xor esi,ebx
+ add edx,ebp
+ ror eax,7
+ add edx,esi
+ add ecx,DWORD PTR[52+rsp]
+ xor edi,ebx
+ mov esi,edx
+ rol edx,5
+ xor edi,eax
+ cmp r8d,11
+ jb $L$aesenclast5
+ movups xmm14,XMMWORD PTR[176+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[192+r15]
+ aesenc xmm11,xmm14
+ je $L$aesenclast5
+ movups xmm14,XMMWORD PTR[208+r15]
+ aesenc xmm11,xmm15
+ movups xmm15,XMMWORD PTR[224+r15]
+ aesenc xmm11,xmm14
+ aesenclast xmm11,xmm15
+ movups xmm14,XMMWORD PTR[16+r15]
+ add ecx,edx
+ ror ebp,7
+ add ecx,edi
+ add ebx,DWORD PTR[56+rsp]
+ xor esi,eax
+ mov edi,ecx
+ rol ecx,5
+ xor esi,ebp
+ add ebx,ecx
+ ror edx,7
+ add ebx,esi
+ add eax,DWORD PTR[60+rsp]
+ xor edi,ebp
+ mov esi,ebx
+ rol ebx,5
+ xor edi,edx
+ add eax,ebx
+ ror ecx,7
+ add eax,edi
+ movups XMMWORD PTR[48+r12*1+r13],xmm11
+ mov r8,QWORD PTR[88+rsp]
+ add eax,DWORD PTR[r9]
+ add esi,DWORD PTR[4+r9]
+ add ecx,DWORD PTR[8+r9]
+ mov DWORD PTR[r9],eax
+ add edx,DWORD PTR[12+r9]
+ mov DWORD PTR[4+r9],esi
+ add ebp,DWORD PTR[16+r9]
+ mov DWORD PTR[8+r9],ecx
+ mov DWORD PTR[12+r9],edx
+ mov DWORD PTR[16+r9],ebp
+ movups XMMWORD PTR[r8],xmm11
+ movaps xmm6,XMMWORD PTR[((96+0))+rsp]
+ movaps xmm7,XMMWORD PTR[((96+16))+rsp]
+ movaps xmm8,XMMWORD PTR[((96+32))+rsp]
+ movaps xmm9,XMMWORD PTR[((96+48))+rsp]
+ movaps xmm10,XMMWORD PTR[((96+64))+rsp]
+ movaps xmm11,XMMWORD PTR[((96+80))+rsp]
+ movaps xmm12,XMMWORD PTR[((96+96))+rsp]
+ movaps xmm13,XMMWORD PTR[((96+112))+rsp]
+ movaps xmm14,XMMWORD PTR[((96+128))+rsp]
+ movaps xmm15,XMMWORD PTR[((96+144))+rsp]
+ lea rsi,QWORD PTR[264+rsp]
+ mov r15,QWORD PTR[rsi]
+ mov r14,QWORD PTR[8+rsi]
+ mov r13,QWORD PTR[16+rsi]
+ mov r12,QWORD PTR[24+rsi]
+ mov rbp,QWORD PTR[32+rsi]
+ mov rbx,QWORD PTR[40+rsi]
+ lea rsp,QWORD PTR[48+rsi]
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+aesni_cbc_sha1_enc_ssse3 ENDP
+ DD 05a827999h,05a827999h,05a827999h,05a827999h
+ DD 06ed9eba1h,06ed9eba1h,06ed9eba1h,06ed9eba1h
+ DD 08f1bbcdch,08f1bbcdch,08f1bbcdch,08f1bbcdch
+ DD 0ca62c1d6h,0ca62c1d6h,0ca62c1d6h,0ca62c1d6h
+ DD 000010203h,004050607h,008090a0bh,00c0d0e0fh
+DB 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115
+DB 116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52
+DB 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
+DB 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
+DB 114,103,62,0
+EXTERN __imp_RtlVirtualUnwind:NEAR
+ssse3_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$common_seh_tail
+ mov rax,QWORD PTR[152+r8]
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$common_seh_tail
+ lea rsi,QWORD PTR[96+rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,20
+ DD 0a548f3fch
+ lea rax,QWORD PTR[264+rax]
+ mov r15,QWORD PTR[rax]
+ mov r14,QWORD PTR[8+rax]
+ mov r13,QWORD PTR[16+rax]
+ mov r12,QWORD PTR[24+rax]
+ mov rbp,QWORD PTR[32+rax]
+ mov rbx,QWORD PTR[40+rax]
+ lea rax,QWORD PTR[48+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+ssse3_handler ENDP
+.text$ ENDS
+ DD imagerel $L$SEH_begin_aesni_cbc_sha1_enc_ssse3
+ DD imagerel $L$SEH_end_aesni_cbc_sha1_enc_ssse3
+ DD imagerel $L$SEH_info_aesni_cbc_sha1_enc_ssse3
+.pdata ENDS
+DB 9,0,0,0
+ DD imagerel ssse3_handler
+ DD imagerel $L$prologue_ssse3,imagerel $L$epilogue_ssse3
+.xdata ENDS
diff --git a/crypto/libressl/crypto/aes/aesni-sha1-mingw64-x86_64.S b/crypto/libressl/crypto/aes/aesni-sha1-mingw64-x86_64.S
new file mode 100644
index 0000000..c7a2d5c
--- /dev/null
+++ b/crypto/libressl/crypto/aes/aesni-sha1-mingw64-x86_64.S
@@ -0,0 +1,1536 @@
+#include "x86_arch.h"
+.globl aesni_cbc_sha1_enc
+.def aesni_cbc_sha1_enc; .scl 2; .type 32; .endef
+.p2align 4
+ movl OPENSSL_ia32cap_P+0(%rip),%r10d
+ movl OPENSSL_ia32cap_P+4(%rip),%r11d
+ jmp aesni_cbc_sha1_enc_ssse3
+ retq
+.def aesni_cbc_sha1_enc_ssse3; .scl 3; .type 32; .endef
+.p2align 4
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+ movq 48(%rsp),%r9
+ movq 56(%rsp),%r10
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -264(%rsp),%rsp
+ movaps %xmm6,96+0(%rsp)
+ movaps %xmm7,96+16(%rsp)
+ movaps %xmm8,96+32(%rsp)
+ movaps %xmm9,96+48(%rsp)
+ movaps %xmm10,96+64(%rsp)
+ movaps %xmm11,96+80(%rsp)
+ movaps %xmm12,96+96(%rsp)
+ movaps %xmm13,96+112(%rsp)
+ movaps %xmm14,96+128(%rsp)
+ movaps %xmm15,96+144(%rsp)
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ movdqu (%r8),%xmm11
+ movq %r8,88(%rsp)
+ shlq $6,%r14
+ subq %r12,%r13
+ movl 240(%r15),%r8d
+ addq %r10,%r14
+ leaq K_XX_XX(%rip),%r11
+ movl 0(%r9),%eax
+ movl 4(%r9),%ebx
+ movl 8(%r9),%ecx
+ movl 12(%r9),%edx
+ movl %ebx,%esi
+ movl 16(%r9),%ebp
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r10),%xmm0
+ movdqu 16(%r10),%xmm1
+ movdqu 32(%r10),%xmm2
+ movdqu 48(%r10),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r10
+.byte 102,15,56,0,206
+.byte 102,15,56,0,214
+.byte 102,15,56,0,222
+ paddd %xmm9,%xmm0
+ paddd %xmm9,%xmm1
+ paddd %xmm9,%xmm2
+ movdqa %xmm0,0(%rsp)
+ psubd %xmm9,%xmm0
+ movdqa %xmm1,16(%rsp)
+ psubd %xmm9,%xmm1
+ movdqa %xmm2,32(%rsp)
+ psubd %xmm9,%xmm2
+ movups (%r15),%xmm13
+ movups 16(%r15),%xmm14
+ jmp .Loop_ssse3
+.p2align 4
+ movdqa %xmm1,%xmm4
+ addl 0(%rsp),%ebp
+ movups 0(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ xorl %edx,%ecx
+ movdqa %xmm3,%xmm8
+.byte 102,15,58,15,224,8
+ movl %eax,%edi
+ roll $5,%eax
+ paddd %xmm3,%xmm9
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrldq $4,%xmm8
+ xorl %edx,%esi
+ addl %eax,%ebp
+ pxor %xmm0,%xmm4
+ rorl $2,%ebx
+ addl %esi,%ebp
+ pxor %xmm2,%xmm8
+ addl 4(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pxor %xmm8,%xmm4
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm9,48(%rsp)
+ xorl %ecx,%edi
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ addl %ebp,%edx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm4,%xmm8
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 8(%rsp),%ecx
+ xorl %ebx,%eax
+ pslldq $12,%xmm10
+ paddd %xmm4,%xmm4
+ movl %edx,%edi
+ roll $5,%edx
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrld $31,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ movdqa %xmm10,%xmm9
+ rorl $7,%ebp
+ addl %esi,%ecx
+ psrld $30,%xmm10
+ por %xmm8,%xmm4
+ addl 12(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm4
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa 0(%r11),%xmm10
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ pxor %xmm9,%xmm4
+ rorl $7,%edx
+ addl %edi,%ebx
+ movdqa %xmm2,%xmm5
+ addl 16(%rsp),%eax
+ xorl %ebp,%edx
+ movdqa %xmm4,%xmm9
+.byte 102,15,58,15,233,8
+ movl %ebx,%edi
+ roll $5,%ebx
+ paddd %xmm4,%xmm10
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrldq $4,%xmm9
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ pxor %xmm1,%xmm5
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm3,%xmm9
+ addl 20(%rsp),%ebp
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pxor %xmm9,%xmm5
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa %xmm10,0(%rsp)
+ xorl %edx,%edi
+ addl %eax,%ebp
+ movdqa %xmm5,%xmm8
+ movdqa %xmm5,%xmm9
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 24(%rsp),%edx
+ xorl %ecx,%ebx
+ pslldq $12,%xmm8
+ paddd %xmm5,%xmm5
+ movl %ebp,%edi
+ roll $5,%ebp
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ psrld $31,%xmm9
+ xorl %ecx,%esi
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ addl %ebp,%edx
+ movdqa %xmm8,%xmm10
+ rorl $7,%eax
+ addl %esi,%edx
+ psrld $30,%xmm8
+ por %xmm9,%xmm5
+ addl 28(%rsp),%ecx
+ xorl %ebx,%eax
+ movl %edx,%esi
+ roll $5,%edx
+ pslld $2,%xmm10
+ pxor %xmm8,%xmm5
+ andl %eax,%edi
+ xorl %ebx,%eax
+ movdqa 16(%r11),%xmm8
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ pxor %xmm10,%xmm5
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movdqa %xmm3,%xmm6
+ addl 32(%rsp),%ebx
+ xorl %eax,%ebp
+ movdqa %xmm5,%xmm10
+.byte 102,15,58,15,242,8
+ movl %ecx,%edi
+ roll $5,%ecx
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ paddd %xmm5,%xmm8
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ psrldq $4,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ pxor %xmm2,%xmm6
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm4,%xmm10
+ addl 36(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ pxor %xmm10,%xmm6
+ andl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm8,16(%rsp)
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ movdqa %xmm6,%xmm9
+ movdqa %xmm6,%xmm10
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 40(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%ecx
+ pslldq $12,%xmm9
+ paddd %xmm6,%xmm6
+ movl %eax,%edi
+ roll $5,%eax
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrld $31,%xmm10
+ xorl %edx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ psrld $30,%xmm9
+ por %xmm10,%xmm6
+ addl 44(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pslld $2,%xmm8
+ pxor %xmm9,%xmm6
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa 16(%r11),%xmm9
+ xorl %ecx,%edi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %ebp,%edx
+ pxor %xmm8,%xmm6
+ rorl $7,%eax
+ addl %edi,%edx
+ movdqa %xmm4,%xmm7
+ addl 48(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm8
+.byte 102,15,58,15,251,8
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm6,%xmm9
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrldq $4,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ pxor %xmm3,%xmm7
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm5,%xmm8
+ addl 52(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ pxor %xmm8,%xmm7
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm9,32(%rsp)
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ movdqa %xmm7,%xmm10
+ movdqa %xmm7,%xmm8
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 56(%rsp),%eax
+ xorl %ebp,%edx
+ pslldq $12,%xmm10
+ paddd %xmm7,%xmm7
+ movl %ebx,%edi
+ roll $5,%ebx
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrld $31,%xmm8
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ movdqa %xmm10,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ psrld $30,%xmm10
+ por %xmm8,%xmm7
+ addl 60(%rsp),%ebp
+ cmpl $11,%r8d
+ jb .Laesenclast1
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast1
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm7
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa 16(%r11),%xmm10
+ xorl %edx,%edi
+ addl %eax,%ebp
+ pxor %xmm9,%xmm7
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movdqa %xmm7,%xmm9
+ addl 0(%rsp),%edx
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,206,8
+ xorl %ecx,%ebx
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm1,%xmm0
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm7,%xmm10
+ xorl %ecx,%esi
+ movups 16(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,0(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ addl %ebp,%edx
+ pxor %xmm9,%xmm0
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 4(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm0,%xmm9
+ movdqa %xmm10,48(%rsp)
+ movl %edx,%esi
+ roll $5,%edx
+ andl %eax,%edi
+ xorl %ebx,%eax
+ pslld $2,%xmm0
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ psrld $30,%xmm9
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 8(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%edi
+ roll $5,%ecx
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ por %xmm9,%xmm0
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ movdqa %xmm0,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 12(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ andl %edx,%edi
+ xorl %ebp,%edx
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 16(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,215,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm2,%xmm1
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm8,%xmm9
+ paddd %xmm0,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm10,%xmm1
+ addl 20(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm1,%xmm10
+ movdqa %xmm8,0(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm1
+ addl 24(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm10
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm10,%xmm1
+ addl 28(%rsp),%ebx
+ xorl %eax,%edi
+ movdqa %xmm1,%xmm8
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 32(%rsp),%eax
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,192,8
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ pxor %xmm3,%xmm2
+ xorl %edx,%esi
+ addl %ebx,%eax
+ movdqa 32(%r11),%xmm10
+ paddd %xmm1,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm8,%xmm2
+ addl 36(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ movdqa %xmm2,%xmm8
+ movdqa %xmm9,16(%rsp)
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ pslld $2,%xmm2
+ addl 40(%rsp),%edx
+ xorl %ecx,%esi
+ psrld $30,%xmm8
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ por %xmm8,%xmm2
+ addl 44(%rsp),%ecx
+ xorl %ebx,%edi
+ movdqa %xmm2,%xmm9
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 48(%rsp),%ebx
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,201,8
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ pxor %xmm4,%xmm3
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm2,%xmm10
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm9,%xmm3
+ addl 52(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ movdqa %xmm3,%xmm9
+ movdqa %xmm10,32(%rsp)
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ pslld $2,%xmm3
+ addl 56(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ psrld $30,%xmm9
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ por %xmm9,%xmm3
+ addl 60(%rsp),%edx
+ xorl %ecx,%edi
+ movdqa %xmm3,%xmm10
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 0(%rsp),%ecx
+ pxor %xmm0,%xmm4
+.byte 102,68,15,58,15,210,8
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ pxor %xmm5,%xmm4
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ movdqa %xmm8,%xmm9
+ paddd %xmm3,%xmm8
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm10,%xmm4
+ addl 4(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm8,48(%rsp)
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ pslld $2,%xmm4
+ addl 8(%rsp),%eax
+ xorl %ebp,%esi
+ psrld $30,%xmm10
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ por %xmm10,%xmm4
+ addl 12(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movdqa %xmm4,%xmm8
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 16(%rsp),%edx
+ pxor %xmm1,%xmm5
+.byte 102,68,15,58,15,195,8
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm6,%xmm5
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm4,%xmm9
+ rorl $7,%eax
+ addl %esi,%edx
+ pxor %xmm8,%xmm5
+ addl 20(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ movdqa %xmm5,%xmm8
+ movdqa %xmm9,0(%rsp)
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb .Laesenclast2
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast2
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ pslld $2,%xmm5
+ addl 24(%rsp),%ebx
+ xorl %eax,%esi
+ psrld $30,%xmm8
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ por %xmm8,%xmm5
+ addl 28(%rsp),%eax
+ xorl %ebp,%edi
+ movdqa %xmm5,%xmm9
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ecx,%edi
+ movups 32(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,16(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ pxor %xmm2,%xmm6
+.byte 102,68,15,58,15,204,8
+ xorl %edx,%ecx
+ addl 32(%rsp),%ebp
+ andl %edx,%edi
+ pxor %xmm7,%xmm6
+ andl %ecx,%esi
+ rorl $7,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm5,%xmm10
+ addl %edi,%ebp
+ movl %eax,%edi
+ pxor %xmm9,%xmm6
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movdqa %xmm6,%xmm9
+ movdqa %xmm10,16(%rsp)
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 36(%rsp),%edx
+ andl %ecx,%esi
+ pslld $2,%xmm6
+ andl %ebx,%edi
+ rorl $7,%eax
+ psrld $30,%xmm9
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ por %xmm9,%xmm6
+ movl %eax,%edi
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm10
+ addl 40(%rsp),%ecx
+ andl %ebx,%edi
+ andl %eax,%esi
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 44(%rsp),%ebx
+ andl %eax,%esi
+ andl %ebp,%edi
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ rorl $7,%edx
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%edi
+ pxor %xmm3,%xmm7
+.byte 102,68,15,58,15,213,8
+ xorl %ebp,%edx
+ addl 48(%rsp),%eax
+ andl %ebp,%edi
+ pxor %xmm0,%xmm7
+ andl %edx,%esi
+ rorl $7,%ecx
+ movdqa 48(%r11),%xmm9
+ paddd %xmm6,%xmm8
+ addl %edi,%eax
+ movl %ebx,%edi
+ pxor %xmm10,%xmm7
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movdqa %xmm7,%xmm10
+ movdqa %xmm8,32(%rsp)
+ movl %ecx,%esi
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ xorl %edx,%ecx
+ addl 52(%rsp),%ebp
+ andl %edx,%esi
+ pslld $2,%xmm7
+ andl %ecx,%edi
+ rorl $7,%ebx
+ psrld $30,%xmm10
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ por %xmm10,%xmm7
+ movl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm7,%xmm8
+ addl 56(%rsp),%edx
+ andl %ecx,%edi
+ andl %ebx,%esi
+ rorl $7,%eax
+ addl %edi,%edx
+ movl %ebp,%edi
+ roll $5,%ebp
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 60(%rsp),%ecx
+ andl %ebx,%esi
+ andl %eax,%edi
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%edi
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,198,8
+ xorl %eax,%ebp
+ addl 0(%rsp),%ebx
+ andl %eax,%edi
+ pxor %xmm1,%xmm0
+ andl %ebp,%esi
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ rorl $7,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm7,%xmm9
+ addl %edi,%ebx
+ movl %ecx,%edi
+ pxor %xmm8,%xmm0
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movdqa %xmm0,%xmm8
+ movdqa %xmm9,48(%rsp)
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 4(%rsp),%eax
+ andl %ebp,%esi
+ pslld $2,%xmm0
+ andl %edx,%edi
+ rorl $7,%ecx
+ psrld $30,%xmm8
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ por %xmm8,%xmm0
+ movl %ecx,%edi
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%ecx
+ movdqa %xmm0,%xmm9
+ addl 8(%rsp),%ebp
+ andl %edx,%edi
+ andl %ecx,%esi
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movl %eax,%edi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 12(%rsp),%edx
+ andl %ecx,%esi
+ andl %ebx,%edi
+ rorl $7,%eax
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%edi
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,207,8
+ xorl %ebx,%eax
+ addl 16(%rsp),%ecx
+ andl %ebx,%edi
+ pxor %xmm2,%xmm1
+ andl %eax,%esi
+ rorl $7,%ebp
+ movdqa %xmm10,%xmm8
+ paddd %xmm0,%xmm10
+ addl %edi,%ecx
+ movl %edx,%edi
+ pxor %xmm9,%xmm1
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movdqa %xmm1,%xmm9
+ movdqa %xmm10,0(%rsp)
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 20(%rsp),%ebx
+ andl %eax,%esi
+ pslld $2,%xmm1
+ andl %ebp,%edi
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ rorl $7,%edx
+ psrld $30,%xmm9
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ por %xmm9,%xmm1
+ movl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm1,%xmm10
+ addl 24(%rsp),%eax
+ andl %ebp,%edi
+ andl %edx,%esi
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movl %ecx,%esi
+ cmpl $11,%r8d
+ jb .Laesenclast3
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast3
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ xorl %edx,%ecx
+ addl 28(%rsp),%ebp
+ andl %edx,%esi
+ andl %ecx,%edi
+ rorl $7,%ebx
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%edi
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,208,8
+ xorl %ecx,%ebx
+ addl 32(%rsp),%edx
+ andl %ecx,%edi
+ pxor %xmm3,%xmm2
+ andl %ebx,%esi
+ rorl $7,%eax
+ movdqa %xmm8,%xmm9
+ paddd %xmm1,%xmm8
+ addl %edi,%edx
+ movl %ebp,%edi
+ pxor %xmm10,%xmm2
+ roll $5,%ebp
+ movups 48(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,32(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+ aesenc %xmm14,%xmm11
+ movups 32(%r15),%xmm15
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movdqa %xmm2,%xmm10
+ movdqa %xmm8,16(%rsp)
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 36(%rsp),%ecx
+ andl %ebx,%esi
+ pslld $2,%xmm2
+ andl %eax,%edi
+ rorl $7,%ebp
+ psrld $30,%xmm10
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ por %xmm10,%xmm2
+ movl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm2,%xmm8
+ addl 40(%rsp),%ebx
+ andl %eax,%edi
+ andl %ebp,%esi
+ aesenc %xmm15,%xmm11
+ movups 48(%r15),%xmm14
+ rorl $7,%edx
+ addl %edi,%ebx
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 44(%rsp),%eax
+ andl %ebp,%esi
+ andl %edx,%edi
+ rorl $7,%ecx
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ addl 48(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 64(%r15),%xmm15
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,193,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm4,%xmm3
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm10
+ paddd %xmm2,%xmm9
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm8,%xmm3
+ addl 52(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm3,%xmm8
+ movdqa %xmm9,32(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm3
+ addl 56(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm8
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 80(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm8,%xmm3
+ addl 60(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 0(%rsp),%eax
+ paddd %xmm3,%xmm10
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ movdqa %xmm10,48(%rsp)
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 4(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 96(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 8(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 12(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ aesenc %xmm15,%xmm11
+ movups 112(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ cmpq %r14,%r10
+ je .Ldone_ssse3
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r10),%xmm0
+ movdqu 16(%r10),%xmm1
+ movdqu 32(%r10),%xmm2
+ movdqu 48(%r10),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r10
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+.byte 102,15,56,0,206
+ movl %ecx,%edi
+ roll $5,%ecx
+ paddd %xmm9,%xmm0
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ movdqa %xmm0,0(%rsp)
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ psubd %xmm9,%xmm0
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+.byte 102,15,56,0,214
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm9,%xmm1
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movdqa %xmm1,16(%rsp)
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ psubd %xmm9,%xmm1
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+.byte 102,15,56,0,222
+ movl %ebp,%edi
+ roll $5,%ebp
+ paddd %xmm9,%xmm2
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ movdqa %xmm2,32(%rsp)
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ psubd %xmm9,%xmm2
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb .Laesenclast4
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast4
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movups %xmm11,48(%r13,%r12,1)
+ leaq 64(%r12),%r12
+ addl 0(%r9),%eax
+ addl 4(%r9),%esi
+ addl 8(%r9),%ecx
+ addl 12(%r9),%edx
+ movl %eax,0(%r9)
+ addl 16(%r9),%ebp
+ movl %esi,4(%r9)
+ movl %esi,%ebx
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movl %ebp,16(%r9)
+ jmp .Loop_ssse3
+.p2align 4
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ aesenc %xmm15,%xmm11
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ aesenc %xmm14,%xmm11
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb .Laesenclast5
+ movups 176(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 192(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ je .Laesenclast5
+ movups 208(%r15),%xmm14
+ aesenc %xmm15,%xmm11
+ movups 224(%r15),%xmm15
+ aesenc %xmm14,%xmm11
+ aesenclast %xmm15,%xmm11
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movups %xmm11,48(%r13,%r12,1)
+ movq 88(%rsp),%r8
+ addl 0(%r9),%eax
+ addl 4(%r9),%esi
+ addl 8(%r9),%ecx
+ movl %eax,0(%r9)
+ addl 12(%r9),%edx
+ movl %esi,4(%r9)
+ addl 16(%r9),%ebp
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movl %ebp,16(%r9)
+ movups %xmm11,(%r8)
+ movaps 96+0(%rsp),%xmm6
+ movaps 96+16(%rsp),%xmm7
+ movaps 96+32(%rsp),%xmm8
+ movaps 96+48(%rsp),%xmm9
+ movaps 96+64(%rsp),%xmm10
+ movaps 96+80(%rsp),%xmm11
+ movaps 96+96(%rsp),%xmm12
+ movaps 96+112(%rsp),%xmm13
+ movaps 96+128(%rsp),%xmm14
+ movaps 96+144(%rsp),%xmm15
+ leaq 264(%rsp),%rsi
+ movq 0(%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.p2align 6
+.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align 6
+.def ssse3_handler; .scl 3; .type 32; .endef
+.p2align 4
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+ movq 8(%r9),%rsi
+ movq 56(%r9),%r11
+ movl 0(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jb .Lcommon_seh_tail
+ movq 152(%r8),%rax
+ movl 4(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jae .Lcommon_seh_tail
+ leaq 96(%rax),%rsi
+ leaq 512(%r8),%rdi
+ movl $20,%ecx
+.long 0xa548f3fc
+ leaq 264(%rax),%rax
+ movq 0(%rax),%r15
+ movq 8(%rax),%r14
+ movq 16(%rax),%r13
+ movq 24(%rax),%r12
+ movq 32(%rax),%rbp
+ movq 40(%rax),%rbx
+ leaq 48(%rax),%rax
+ movq %rbx,144(%r8)
+ movq %rbp,160(%r8)
+ movq %r12,216(%r8)
+ movq %r13,224(%r8)
+ movq %r14,232(%r8)
+ movq %r15,240(%r8)
+ movq 8(%rax),%rdi
+ movq 16(%rax),%rsi
+ movq %rax,152(%r8)
+ movq %rsi,168(%r8)
+ movq %rdi,176(%r8)
+ movq 40(%r9),%rdi
+ movq %r8,%rsi
+ movl $154,%ecx
+.long 0xa548f3fc
+ movq %r9,%rsi
+ xorq %rcx,%rcx
+ movq 8(%rsi),%rdx
+ movq 0(%rsi),%r8
+ movq 16(%rsi),%r9
+ movq 40(%rsi),%r10
+ leaq 56(%rsi),%r11
+ leaq 24(%rsi),%r12
+ movq %r10,32(%rsp)
+ movq %r11,40(%rsp)
+ movq %r12,48(%rsp)
+ movq %rcx,56(%rsp)
+ call *__imp_RtlVirtualUnwind(%rip)
+ movl $1,%eax
+ addq $64,%rsp
+ popfq
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ popq %rdi
+ popq %rsi
+ retq
+.section .pdata
+.p2align 2
+.rva .LSEH_begin_aesni_cbc_sha1_enc_ssse3
+.rva .LSEH_end_aesni_cbc_sha1_enc_ssse3
+.rva .LSEH_info_aesni_cbc_sha1_enc_ssse3
+.section .xdata
+.p2align 3
+.byte 9,0,0,0
+.rva ssse3_handler
+.rva .Lprologue_ssse3,.Lepilogue_ssse3
diff --git a/crypto/libressl/crypto/aes/bsaes-elf-x86_64.S b/crypto/libressl/crypto/aes/bsaes-elf-x86_64.S
new file mode 100644
index 0000000..903e374
--- /dev/null
+++ b/crypto/libressl/crypto/aes/bsaes-elf-x86_64.S
@@ -0,0 +1,2502 @@
+#include "x86_arch.h"
+.type _bsaes_encrypt8,@function
+.align 64
+ leaq .LBS0(%rip),%r11
+ movdqa (%rax),%xmm8
+ leaq 16(%rax),%rax
+ movdqa 80(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+.byte 102,15,56,0,247
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $1,%xmm3
+ pxor %xmm6,%xmm5
+ pxor %xmm4,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm6
+ psllq $1,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $1,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm15
+ pxor %xmm1,%xmm2
+ psllq $1,%xmm1
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm4,%xmm9
+ psrlq $2,%xmm4
+ movdqa %xmm3,%xmm10
+ psrlq $2,%xmm3
+ pxor %xmm6,%xmm4
+ pxor %xmm5,%xmm3
+ pand %xmm8,%xmm4
+ pand %xmm8,%xmm3
+ pxor %xmm4,%xmm6
+ psllq $2,%xmm4
+ pxor %xmm3,%xmm5
+ psllq $2,%xmm3
+ pxor %xmm9,%xmm4
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm2,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm2
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm2,%xmm9
+ psrlq $4,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $4,%xmm1
+ pxor %xmm6,%xmm2
+ pxor %xmm5,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $4,%xmm2
+ pxor %xmm1,%xmm5
+ psllq $4,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm4
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ decl %r10d
+ jmp .Lenc_sbox
+.align 16
+ pxor 0(%rax),%xmm15
+ pxor 16(%rax),%xmm0
+.byte 102,68,15,56,0,255
+ pxor 32(%rax),%xmm1
+.byte 102,15,56,0,199
+ pxor 48(%rax),%xmm2
+.byte 102,15,56,0,207
+ pxor 64(%rax),%xmm3
+.byte 102,15,56,0,215
+ pxor 80(%rax),%xmm4
+.byte 102,15,56,0,223
+ pxor 96(%rax),%xmm5
+.byte 102,15,56,0,231
+ pxor 112(%rax),%xmm6
+.byte 102,15,56,0,239
+ leaq 128(%rax),%rax
+.byte 102,15,56,0,247
+ pxor %xmm5,%xmm4
+ pxor %xmm0,%xmm1
+ pxor %xmm15,%xmm2
+ pxor %xmm1,%xmm5
+ pxor %xmm15,%xmm4
+ pxor %xmm2,%xmm5
+ pxor %xmm6,%xmm2
+ pxor %xmm4,%xmm6
+ pxor %xmm3,%xmm2
+ pxor %xmm4,%xmm3
+ pxor %xmm0,%xmm2
+ pxor %xmm6,%xmm1
+ pxor %xmm4,%xmm0
+ movdqa %xmm6,%xmm10
+ movdqa %xmm0,%xmm9
+ movdqa %xmm4,%xmm8
+ movdqa %xmm1,%xmm12
+ movdqa %xmm5,%xmm11
+ pxor %xmm3,%xmm10
+ pxor %xmm1,%xmm9
+ pxor %xmm2,%xmm8
+ movdqa %xmm10,%xmm13
+ pxor %xmm3,%xmm12
+ movdqa %xmm9,%xmm7
+ pxor %xmm15,%xmm11
+ movdqa %xmm10,%xmm14
+ por %xmm8,%xmm9
+ por %xmm11,%xmm10
+ pxor %xmm7,%xmm14
+ pand %xmm11,%xmm13
+ pxor %xmm8,%xmm11
+ pand %xmm8,%xmm7
+ pand %xmm11,%xmm14
+ movdqa %xmm2,%xmm11
+ pxor %xmm15,%xmm11
+ pand %xmm11,%xmm12
+ pxor %xmm12,%xmm10
+ pxor %xmm12,%xmm9
+ movdqa %xmm6,%xmm12
+ movdqa %xmm4,%xmm11
+ pxor %xmm0,%xmm12
+ pxor %xmm5,%xmm11
+ movdqa %xmm12,%xmm8
+ pand %xmm11,%xmm12
+ por %xmm11,%xmm8
+ pxor %xmm12,%xmm7
+ pxor %xmm14,%xmm10
+ pxor %xmm13,%xmm9
+ pxor %xmm14,%xmm8
+ movdqa %xmm1,%xmm11
+ pxor %xmm13,%xmm7
+ movdqa %xmm3,%xmm12
+ pxor %xmm13,%xmm8
+ movdqa %xmm0,%xmm13
+ pand %xmm2,%xmm11
+ movdqa %xmm6,%xmm14
+ pand %xmm15,%xmm12
+ pand %xmm4,%xmm13
+ por %xmm5,%xmm14
+ pxor %xmm11,%xmm10
+ pxor %xmm12,%xmm9
+ pxor %xmm13,%xmm8
+ pxor %xmm14,%xmm7
+ movdqa %xmm10,%xmm11
+ pand %xmm8,%xmm10
+ pxor %xmm9,%xmm11
+ movdqa %xmm7,%xmm13
+ movdqa %xmm11,%xmm14
+ pxor %xmm10,%xmm13
+ pand %xmm13,%xmm14
+ movdqa %xmm8,%xmm12
+ pxor %xmm9,%xmm14
+ pxor %xmm7,%xmm12
+ pxor %xmm9,%xmm10
+ pand %xmm10,%xmm12
+ movdqa %xmm13,%xmm9
+ pxor %xmm7,%xmm12
+ pxor %xmm12,%xmm9
+ pxor %xmm12,%xmm8
+ pand %xmm7,%xmm9
+ pxor %xmm9,%xmm13
+ pxor %xmm9,%xmm8
+ pand %xmm14,%xmm13
+ pxor %xmm11,%xmm13
+ movdqa %xmm5,%xmm11
+ movdqa %xmm4,%xmm7
+ movdqa %xmm14,%xmm9
+ pxor %xmm13,%xmm9
+ pand %xmm5,%xmm9
+ pxor %xmm4,%xmm5
+ pand %xmm14,%xmm4
+ pand %xmm13,%xmm5
+ pxor %xmm4,%xmm5
+ pxor %xmm9,%xmm4
+ pxor %xmm15,%xmm11
+ pxor %xmm2,%xmm7
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm15,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm2,%xmm15
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm2
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm15
+ pxor %xmm11,%xmm7
+ pxor %xmm2,%xmm15
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm2
+ pxor %xmm11,%xmm5
+ pxor %xmm11,%xmm15
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm2
+ movdqa %xmm6,%xmm11
+ movdqa %xmm0,%xmm7
+ pxor %xmm3,%xmm11
+ pxor %xmm1,%xmm7
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm3,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm1,%xmm3
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm1
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm3
+ pxor %xmm11,%xmm7
+ pxor %xmm1,%xmm3
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm1
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ pxor %xmm13,%xmm10
+ pand %xmm6,%xmm10
+ pxor %xmm0,%xmm6
+ pand %xmm14,%xmm0
+ pand %xmm13,%xmm6
+ pxor %xmm0,%xmm6
+ pxor %xmm10,%xmm0
+ pxor %xmm11,%xmm6
+ pxor %xmm11,%xmm3
+ pxor %xmm7,%xmm0
+ pxor %xmm7,%xmm1
+ pxor %xmm15,%xmm6
+ pxor %xmm5,%xmm0
+ pxor %xmm6,%xmm3
+ pxor %xmm15,%xmm5
+ pxor %xmm0,%xmm15
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ pxor %xmm2,%xmm1
+ pxor %xmm4,%xmm2
+ pxor %xmm4,%xmm3
+ pxor %xmm2,%xmm5
+ decl %r10d
+ jl .Lenc_done
+ pshufd $147,%xmm15,%xmm7
+ pshufd $147,%xmm0,%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $147,%xmm3,%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $147,%xmm5,%xmm10
+ pxor %xmm9,%xmm3
+ pshufd $147,%xmm2,%xmm11
+ pxor %xmm10,%xmm5
+ pshufd $147,%xmm6,%xmm12
+ pxor %xmm11,%xmm2
+ pshufd $147,%xmm1,%xmm13
+ pxor %xmm12,%xmm6
+ pshufd $147,%xmm4,%xmm14
+ pxor %xmm13,%xmm1
+ pxor %xmm14,%xmm4
+ pxor %xmm15,%xmm8
+ pxor %xmm4,%xmm7
+ pxor %xmm4,%xmm8
+ pshufd $78,%xmm15,%xmm15
+ pxor %xmm0,%xmm9
+ pshufd $78,%xmm0,%xmm0
+ pxor %xmm2,%xmm12
+ pxor %xmm7,%xmm15
+ pxor %xmm6,%xmm13
+ pxor %xmm8,%xmm0
+ pxor %xmm5,%xmm11
+ pshufd $78,%xmm2,%xmm7
+ pxor %xmm1,%xmm14
+ pshufd $78,%xmm6,%xmm8
+ pxor %xmm3,%xmm10
+ pshufd $78,%xmm5,%xmm2
+ pxor %xmm4,%xmm10
+ pshufd $78,%xmm4,%xmm6
+ pxor %xmm4,%xmm11
+ pshufd $78,%xmm1,%xmm5
+ pxor %xmm11,%xmm7
+ pshufd $78,%xmm3,%xmm1
+ pxor %xmm12,%xmm8
+ pxor %xmm10,%xmm2
+ pxor %xmm14,%xmm6
+ pxor %xmm13,%xmm5
+ movdqa %xmm7,%xmm3
+ pxor %xmm9,%xmm1
+ movdqa %xmm8,%xmm4
+ movdqa 48(%r11),%xmm7
+ jnz .Lenc_loop
+ movdqa 64(%r11),%xmm7
+ jmp .Lenc_loop
+.align 16
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm2,%xmm10
+ psrlq $1,%xmm2
+ pxor %xmm4,%xmm1
+ pxor %xmm6,%xmm2
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm2
+ pxor %xmm1,%xmm4
+ psllq $1,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $1,%xmm2
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm2
+ movdqa %xmm3,%xmm9
+ psrlq $1,%xmm3
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm5,%xmm3
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm3
+ pand %xmm7,%xmm15
+ pxor %xmm3,%xmm5
+ psllq $1,%xmm3
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm3
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm6,%xmm9
+ psrlq $2,%xmm6
+ movdqa %xmm2,%xmm10
+ psrlq $2,%xmm2
+ pxor %xmm4,%xmm6
+ pxor %xmm1,%xmm2
+ pand %xmm8,%xmm6
+ pand %xmm8,%xmm2
+ pxor %xmm6,%xmm4
+ psllq $2,%xmm6
+ pxor %xmm2,%xmm1
+ psllq $2,%xmm2
+ pxor %xmm9,%xmm6
+ pxor %xmm10,%xmm2
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm5,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm5
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm5,%xmm9
+ psrlq $4,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $4,%xmm3
+ pxor %xmm4,%xmm5
+ pxor %xmm1,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm4
+ psllq $4,%xmm5
+ pxor %xmm3,%xmm1
+ psllq $4,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm6,%xmm0
+ pxor %xmm2,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm6
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm2
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa (%rax),%xmm7
+ pxor %xmm7,%xmm3
+ pxor %xmm7,%xmm5
+ pxor %xmm7,%xmm2
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm1
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm15
+ pxor %xmm7,%xmm0
+ retq
+.size _bsaes_encrypt8,.-_bsaes_encrypt8
+.type _bsaes_decrypt8,@function
+.align 64
+ leaq .LBS0(%rip),%r11
+ movdqa (%rax),%xmm8
+ leaq 16(%rax),%rax
+ movdqa -48(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+.byte 102,15,56,0,247
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $1,%xmm3
+ pxor %xmm6,%xmm5
+ pxor %xmm4,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm6
+ psllq $1,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $1,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm15
+ pxor %xmm1,%xmm2
+ psllq $1,%xmm1
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm4,%xmm9
+ psrlq $2,%xmm4
+ movdqa %xmm3,%xmm10
+ psrlq $2,%xmm3
+ pxor %xmm6,%xmm4
+ pxor %xmm5,%xmm3
+ pand %xmm8,%xmm4
+ pand %xmm8,%xmm3
+ pxor %xmm4,%xmm6
+ psllq $2,%xmm4
+ pxor %xmm3,%xmm5
+ psllq $2,%xmm3
+ pxor %xmm9,%xmm4
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm2,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm2
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm2,%xmm9
+ psrlq $4,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $4,%xmm1
+ pxor %xmm6,%xmm2
+ pxor %xmm5,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $4,%xmm2
+ pxor %xmm1,%xmm5
+ psllq $4,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm4
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ decl %r10d
+ jmp .Ldec_sbox
+.align 16
+ pxor 0(%rax),%xmm15
+ pxor 16(%rax),%xmm0
+.byte 102,68,15,56,0,255
+ pxor 32(%rax),%xmm1
+.byte 102,15,56,0,199
+ pxor 48(%rax),%xmm2
+.byte 102,15,56,0,207
+ pxor 64(%rax),%xmm3
+.byte 102,15,56,0,215
+ pxor 80(%rax),%xmm4
+.byte 102,15,56,0,223
+ pxor 96(%rax),%xmm5
+.byte 102,15,56,0,231
+ pxor 112(%rax),%xmm6
+.byte 102,15,56,0,239
+ leaq 128(%rax),%rax
+.byte 102,15,56,0,247
+ pxor %xmm3,%xmm2
+ pxor %xmm6,%xmm3
+ pxor %xmm6,%xmm1
+ pxor %xmm3,%xmm5
+ pxor %xmm5,%xmm6
+ pxor %xmm6,%xmm0
+ pxor %xmm0,%xmm15
+ pxor %xmm4,%xmm1
+ pxor %xmm15,%xmm2
+ pxor %xmm15,%xmm4
+ pxor %xmm2,%xmm0
+ movdqa %xmm2,%xmm10
+ movdqa %xmm6,%xmm9
+ movdqa %xmm0,%xmm8
+ movdqa %xmm3,%xmm12
+ movdqa %xmm4,%xmm11
+ pxor %xmm15,%xmm10
+ pxor %xmm3,%xmm9
+ pxor %xmm5,%xmm8
+ movdqa %xmm10,%xmm13
+ pxor %xmm15,%xmm12
+ movdqa %xmm9,%xmm7
+ pxor %xmm1,%xmm11
+ movdqa %xmm10,%xmm14
+ por %xmm8,%xmm9
+ por %xmm11,%xmm10
+ pxor %xmm7,%xmm14
+ pand %xmm11,%xmm13
+ pxor %xmm8,%xmm11
+ pand %xmm8,%xmm7
+ pand %xmm11,%xmm14
+ movdqa %xmm5,%xmm11
+ pxor %xmm1,%xmm11
+ pand %xmm11,%xmm12
+ pxor %xmm12,%xmm10
+ pxor %xmm12,%xmm9
+ movdqa %xmm2,%xmm12
+ movdqa %xmm0,%xmm11
+ pxor %xmm6,%xmm12
+ pxor %xmm4,%xmm11
+ movdqa %xmm12,%xmm8
+ pand %xmm11,%xmm12
+ por %xmm11,%xmm8
+ pxor %xmm12,%xmm7
+ pxor %xmm14,%xmm10
+ pxor %xmm13,%xmm9
+ pxor %xmm14,%xmm8
+ movdqa %xmm3,%xmm11
+ pxor %xmm13,%xmm7
+ movdqa %xmm15,%xmm12
+ pxor %xmm13,%xmm8
+ movdqa %xmm6,%xmm13
+ pand %xmm5,%xmm11
+ movdqa %xmm2,%xmm14
+ pand %xmm1,%xmm12
+ pand %xmm0,%xmm13
+ por %xmm4,%xmm14
+ pxor %xmm11,%xmm10
+ pxor %xmm12,%xmm9
+ pxor %xmm13,%xmm8
+ pxor %xmm14,%xmm7
+ movdqa %xmm10,%xmm11
+ pand %xmm8,%xmm10
+ pxor %xmm9,%xmm11
+ movdqa %xmm7,%xmm13
+ movdqa %xmm11,%xmm14
+ pxor %xmm10,%xmm13
+ pand %xmm13,%xmm14
+ movdqa %xmm8,%xmm12
+ pxor %xmm9,%xmm14
+ pxor %xmm7,%xmm12
+ pxor %xmm9,%xmm10
+ pand %xmm10,%xmm12
+ movdqa %xmm13,%xmm9
+ pxor %xmm7,%xmm12
+ pxor %xmm12,%xmm9
+ pxor %xmm12,%xmm8
+ pand %xmm7,%xmm9
+ pxor %xmm9,%xmm13
+ pxor %xmm9,%xmm8
+ pand %xmm14,%xmm13
+ pxor %xmm11,%xmm13
+ movdqa %xmm4,%xmm11
+ movdqa %xmm0,%xmm7
+ movdqa %xmm14,%xmm9
+ pxor %xmm13,%xmm9
+ pand %xmm4,%xmm9
+ pxor %xmm0,%xmm4
+ pand %xmm14,%xmm0
+ pand %xmm13,%xmm4
+ pxor %xmm0,%xmm4
+ pxor %xmm9,%xmm0
+ pxor %xmm1,%xmm11
+ pxor %xmm5,%xmm7
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm1,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm5,%xmm1
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm5
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm1
+ pxor %xmm11,%xmm7
+ pxor %xmm5,%xmm1
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm5
+ pxor %xmm11,%xmm4
+ pxor %xmm11,%xmm1
+ pxor %xmm7,%xmm0
+ pxor %xmm7,%xmm5
+ movdqa %xmm2,%xmm11
+ movdqa %xmm6,%xmm7
+ pxor %xmm15,%xmm11
+ pxor %xmm3,%xmm7
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm15,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm3,%xmm15
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm3
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm15
+ pxor %xmm11,%xmm7
+ pxor %xmm3,%xmm15
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm3
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ pxor %xmm13,%xmm10
+ pand %xmm2,%xmm10
+ pxor %xmm6,%xmm2
+ pand %xmm14,%xmm6
+ pand %xmm13,%xmm2
+ pxor %xmm6,%xmm2
+ pxor %xmm10,%xmm6
+ pxor %xmm11,%xmm2
+ pxor %xmm11,%xmm15
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm3
+ pxor %xmm6,%xmm0
+ pxor %xmm4,%xmm5
+ pxor %xmm0,%xmm3
+ pxor %xmm6,%xmm1
+ pxor %xmm6,%xmm4
+ pxor %xmm1,%xmm3
+ pxor %xmm15,%xmm6
+ pxor %xmm4,%xmm3
+ pxor %xmm5,%xmm2
+ pxor %xmm0,%xmm5
+ pxor %xmm3,%xmm2
+ pxor %xmm15,%xmm3
+ pxor %xmm2,%xmm6
+ decl %r10d
+ jl .Ldec_done
+ pshufd $78,%xmm15,%xmm7
+ pshufd $78,%xmm2,%xmm13
+ pxor %xmm15,%xmm7
+ pshufd $78,%xmm4,%xmm14
+ pxor %xmm2,%xmm13
+ pshufd $78,%xmm0,%xmm8
+ pxor %xmm4,%xmm14
+ pshufd $78,%xmm5,%xmm9
+ pxor %xmm0,%xmm8
+ pshufd $78,%xmm3,%xmm10
+ pxor %xmm5,%xmm9
+ pxor %xmm13,%xmm15
+ pxor %xmm13,%xmm0
+ pshufd $78,%xmm1,%xmm11
+ pxor %xmm3,%xmm10
+ pxor %xmm7,%xmm5
+ pxor %xmm8,%xmm3
+ pshufd $78,%xmm6,%xmm12
+ pxor %xmm1,%xmm11
+ pxor %xmm14,%xmm0
+ pxor %xmm9,%xmm1
+ pxor %xmm6,%xmm12
+ pxor %xmm14,%xmm5
+ pxor %xmm13,%xmm3
+ pxor %xmm13,%xmm1
+ pxor %xmm10,%xmm6
+ pxor %xmm11,%xmm2
+ pxor %xmm14,%xmm1
+ pxor %xmm14,%xmm6
+ pxor %xmm12,%xmm4
+ pshufd $147,%xmm15,%xmm7
+ pshufd $147,%xmm0,%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $147,%xmm5,%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $147,%xmm3,%xmm10
+ pxor %xmm9,%xmm5
+ pshufd $147,%xmm1,%xmm11
+ pxor %xmm10,%xmm3
+ pshufd $147,%xmm6,%xmm12
+ pxor %xmm11,%xmm1
+ pshufd $147,%xmm2,%xmm13
+ pxor %xmm12,%xmm6
+ pshufd $147,%xmm4,%xmm14
+ pxor %xmm13,%xmm2
+ pxor %xmm14,%xmm4
+ pxor %xmm15,%xmm8
+ pxor %xmm4,%xmm7
+ pxor %xmm4,%xmm8
+ pshufd $78,%xmm15,%xmm15
+ pxor %xmm0,%xmm9
+ pshufd $78,%xmm0,%xmm0
+ pxor %xmm1,%xmm12
+ pxor %xmm7,%xmm15
+ pxor %xmm6,%xmm13
+ pxor %xmm8,%xmm0
+ pxor %xmm3,%xmm11
+ pshufd $78,%xmm1,%xmm7
+ pxor %xmm2,%xmm14
+ pshufd $78,%xmm6,%xmm8
+ pxor %xmm5,%xmm10
+ pshufd $78,%xmm3,%xmm1
+ pxor %xmm4,%xmm10
+ pshufd $78,%xmm4,%xmm6
+ pxor %xmm4,%xmm11
+ pshufd $78,%xmm2,%xmm3
+ pxor %xmm11,%xmm7
+ pshufd $78,%xmm5,%xmm2
+ pxor %xmm12,%xmm8
+ pxor %xmm1,%xmm10
+ pxor %xmm14,%xmm6
+ pxor %xmm3,%xmm13
+ movdqa %xmm7,%xmm3
+ pxor %xmm9,%xmm2
+ movdqa %xmm13,%xmm5
+ movdqa %xmm8,%xmm4
+ movdqa %xmm2,%xmm1
+ movdqa %xmm10,%xmm2
+ movdqa -16(%r11),%xmm7
+ jnz .Ldec_loop
+ movdqa -32(%r11),%xmm7
+ jmp .Ldec_loop
+.align 16
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm2,%xmm9
+ psrlq $1,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $1,%xmm1
+ pxor %xmm4,%xmm2
+ pxor %xmm6,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm4
+ psllq $1,%xmm2
+ pxor %xmm1,%xmm6
+ psllq $1,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm3,%xmm5
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm15
+ pxor %xmm5,%xmm3
+ psllq $1,%xmm5
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm6,%xmm9
+ psrlq $2,%xmm6
+ movdqa %xmm1,%xmm10
+ psrlq $2,%xmm1
+ pxor %xmm4,%xmm6
+ pxor %xmm2,%xmm1
+ pand %xmm8,%xmm6
+ pand %xmm8,%xmm1
+ pxor %xmm6,%xmm4
+ psllq $2,%xmm6
+ pxor %xmm1,%xmm2
+ psllq $2,%xmm1
+ pxor %xmm9,%xmm6
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm3,%xmm0
+ pxor %xmm5,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm3
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm5
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm3,%xmm9
+ psrlq $4,%xmm3
+ movdqa %xmm5,%xmm10
+ psrlq $4,%xmm5
+ pxor %xmm4,%xmm3
+ pxor %xmm2,%xmm5
+ pand %xmm7,%xmm3
+ pand %xmm7,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $4,%xmm3
+ pxor %xmm5,%xmm2
+ psllq $4,%xmm5
+ pxor %xmm9,%xmm3
+ pxor %xmm10,%xmm5
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm6,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm6
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa (%rax),%xmm7
+ pxor %xmm7,%xmm5
+ pxor %xmm7,%xmm3
+ pxor %xmm7,%xmm1
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm2
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm15
+ pxor %xmm7,%xmm0
+ retq
+.size _bsaes_decrypt8,.-_bsaes_decrypt8
+.type _bsaes_key_convert,@function
+.align 16
+ leaq .Lmasks(%rip),%r11
+ movdqu (%rcx),%xmm7
+ leaq 16(%rcx),%rcx
+ movdqa 0(%r11),%xmm0
+ movdqa 16(%r11),%xmm1
+ movdqa 32(%r11),%xmm2
+ movdqa 48(%r11),%xmm3
+ movdqa 64(%r11),%xmm4
+ pcmpeqd %xmm5,%xmm5
+ movdqu (%rcx),%xmm6
+ movdqa %xmm7,(%rax)
+ leaq 16(%rax),%rax
+ decl %r10d
+ jmp .Lkey_loop
+.align 16
+.byte 102,15,56,0,244
+ movdqa %xmm0,%xmm8
+ movdqa %xmm1,%xmm9
+ pand %xmm6,%xmm8
+ pand %xmm6,%xmm9
+ movdqa %xmm2,%xmm10
+ pcmpeqb %xmm0,%xmm8
+ psllq $4,%xmm0
+ movdqa %xmm3,%xmm11
+ pcmpeqb %xmm1,%xmm9
+ psllq $4,%xmm1
+ pand %xmm6,%xmm10
+ pand %xmm6,%xmm11
+ movdqa %xmm0,%xmm12
+ pcmpeqb %xmm2,%xmm10
+ psllq $4,%xmm2
+ movdqa %xmm1,%xmm13
+ pcmpeqb %xmm3,%xmm11
+ psllq $4,%xmm3
+ movdqa %xmm2,%xmm14
+ movdqa %xmm3,%xmm15
+ pxor %xmm5,%xmm8
+ pxor %xmm5,%xmm9
+ pand %xmm6,%xmm12
+ pand %xmm6,%xmm13
+ movdqa %xmm8,0(%rax)
+ pcmpeqb %xmm0,%xmm12
+ psrlq $4,%xmm0
+ movdqa %xmm9,16(%rax)
+ pcmpeqb %xmm1,%xmm13
+ psrlq $4,%xmm1
+ leaq 16(%rcx),%rcx
+ pand %xmm6,%xmm14
+ pand %xmm6,%xmm15
+ movdqa %xmm10,32(%rax)
+ pcmpeqb %xmm2,%xmm14
+ psrlq $4,%xmm2
+ movdqa %xmm11,48(%rax)
+ pcmpeqb %xmm3,%xmm15
+ psrlq $4,%xmm3
+ movdqu (%rcx),%xmm6
+ pxor %xmm5,%xmm13
+ pxor %xmm5,%xmm14
+ movdqa %xmm12,64(%rax)
+ movdqa %xmm13,80(%rax)
+ movdqa %xmm14,96(%rax)
+ movdqa %xmm15,112(%rax)
+ leaq 128(%rax),%rax
+ decl %r10d
+ jnz .Lkey_loop
+ movdqa 80(%r11),%xmm7
+ retq
+.size _bsaes_key_convert,.-_bsaes_key_convert
+.globl bsaes_cbc_encrypt
+.type bsaes_cbc_encrypt,@function
+.align 16
+ cmpl $0,%r9d
+ jne asm_AES_cbc_encrypt
+ cmpq $128,%rdx
+ jb asm_AES_cbc_encrypt
+ movq %rsp,%rax
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movl 240(%rcx),%eax
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ movq %r8,%rbx
+ shrq $4,%r14
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor (%rsp),%xmm7
+ movdqa %xmm6,(%rax)
+ movdqa %xmm7,(%rsp)
+ movdqu (%rbx),%xmm14
+ subq $8,%r14
+ movdqu 0(%r12),%xmm15
+ movdqu 16(%r12),%xmm0
+ movdqu 32(%r12),%xmm1
+ movdqu 48(%r12),%xmm2
+ movdqu 64(%r12),%xmm3
+ movdqu 80(%r12),%xmm4
+ movq %rsp,%rax
+ movdqu 96(%r12),%xmm5
+ movl %edx,%r10d
+ movdqu 112(%r12),%xmm6
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm6
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm2
+ movdqu 112(%r12),%xmm14
+ pxor %xmm13,%xmm4
+ movdqu %xmm15,0(%r13)
+ leaq 128(%r12),%r12
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+ subq $8,%r14
+ jnc .Lcbc_dec_loop
+ addq $8,%r14
+ jz .Lcbc_dec_done
+ movdqu 0(%r12),%xmm15
+ movq %rsp,%rax
+ movl %edx,%r10d
+ cmpq $2,%r14
+ jb .Lcbc_dec_one
+ movdqu 16(%r12),%xmm0
+ je .Lcbc_dec_two
+ movdqu 32(%r12),%xmm1
+ cmpq $4,%r14
+ jb .Lcbc_dec_three
+ movdqu 48(%r12),%xmm2
+ je .Lcbc_dec_four
+ movdqu 64(%r12),%xmm3
+ cmpq $6,%r14
+ jb .Lcbc_dec_five
+ movdqu 80(%r12),%xmm4
+ je .Lcbc_dec_six
+ movdqu 96(%r12),%xmm5
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm6
+ movdqu 96(%r12),%xmm14
+ pxor %xmm12,%xmm2
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ jmp .Lcbc_dec_done
+.align 16
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm14
+ pxor %xmm11,%xmm6
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ jmp .Lcbc_dec_done
+.align 16
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm14
+ pxor %xmm10,%xmm1
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ jmp .Lcbc_dec_done
+.align 16
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm14
+ pxor %xmm9,%xmm3
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ jmp .Lcbc_dec_done
+.align 16
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm14
+ pxor %xmm8,%xmm5
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ jmp .Lcbc_dec_done
+.align 16
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm14
+ pxor %xmm7,%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ jmp .Lcbc_dec_done
+.align 16
+ leaq (%r12),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r15),%rdx
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm14
+ movdqu %xmm14,(%r13)
+ movdqa %xmm15,%xmm14
+ movdqu %xmm14,(%rbx)
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lcbc_dec_bzero
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+ retq
+.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
+.globl bsaes_ctr32_encrypt_blocks
+.type bsaes_ctr32_encrypt_blocks,@function
+.align 16
+ movq %rsp,%rax
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movdqu (%r8),%xmm0
+ movl 240(%rcx),%eax
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ movdqa %xmm0,32(%rbp)
+ cmpq $8,%rdx
+ jb .Lctr_enc_short
+ movl %eax,%ebx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %ebx,%r10d
+ call _bsaes_key_convert
+ pxor %xmm6,%xmm7
+ movdqa %xmm7,(%rax)
+ movdqa (%rsp),%xmm8
+ leaq .LADD1(%rip),%r11
+ movdqa 32(%rbp),%xmm15
+ movdqa -32(%r11),%xmm7
+.byte 102,68,15,56,0,199
+.byte 102,68,15,56,0,255
+ movdqa %xmm8,(%rsp)
+ jmp .Lctr_enc_loop
+.align 16
+ movdqa %xmm15,32(%rbp)
+ movdqa %xmm15,%xmm0
+ movdqa %xmm15,%xmm1
+ paddd 0(%r11),%xmm0
+ movdqa %xmm15,%xmm2
+ paddd 16(%r11),%xmm1
+ movdqa %xmm15,%xmm3
+ paddd 32(%r11),%xmm2
+ movdqa %xmm15,%xmm4
+ paddd 48(%r11),%xmm3
+ movdqa %xmm15,%xmm5
+ paddd 64(%r11),%xmm4
+ movdqa %xmm15,%xmm6
+ paddd 80(%r11),%xmm5
+ paddd 96(%r11),%xmm6
+ movdqa (%rsp),%xmm8
+ leaq 16(%rsp),%rax
+ movdqa -16(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+ leaq .LBS0(%rip),%r11
+.byte 102,15,56,0,247
+ movl %ebx,%r10d
+ call _bsaes_encrypt8_bitslice
+ subq $8,%r14
+ jc .Lctr_enc_loop_done
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ movdqu 32(%r12),%xmm9
+ movdqu 48(%r12),%xmm10
+ movdqu 64(%r12),%xmm11
+ movdqu 80(%r12),%xmm12
+ movdqu 96(%r12),%xmm13
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ pxor %xmm15,%xmm7
+ movdqa 32(%rbp),%xmm15
+ pxor %xmm8,%xmm0
+ movdqu %xmm7,0(%r13)
+ pxor %xmm9,%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor %xmm10,%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor %xmm11,%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor %xmm12,%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor %xmm13,%xmm1
+ movdqu %xmm6,80(%r13)
+ pxor %xmm14,%xmm4
+ movdqu %xmm1,96(%r13)
+ leaq .LADD1(%rip),%r11
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+ paddd 112(%r11),%xmm15
+ jnz .Lctr_enc_loop
+ jmp .Lctr_enc_done
+.align 16
+ addq $8,%r14
+ movdqu 0(%r12),%xmm7
+ pxor %xmm7,%xmm15
+ movdqu %xmm15,0(%r13)
+ cmpq $2,%r14
+ jb .Lctr_enc_done
+ movdqu 16(%r12),%xmm8
+ pxor %xmm8,%xmm0
+ movdqu %xmm0,16(%r13)
+ je .Lctr_enc_done
+ movdqu 32(%r12),%xmm9
+ pxor %xmm9,%xmm3
+ movdqu %xmm3,32(%r13)
+ cmpq $4,%r14
+ jb .Lctr_enc_done
+ movdqu 48(%r12),%xmm10
+ pxor %xmm10,%xmm5
+ movdqu %xmm5,48(%r13)
+ je .Lctr_enc_done
+ movdqu 64(%r12),%xmm11
+ pxor %xmm11,%xmm2
+ movdqu %xmm2,64(%r13)
+ cmpq $6,%r14
+ jb .Lctr_enc_done
+ movdqu 80(%r12),%xmm12
+ pxor %xmm12,%xmm6
+ movdqu %xmm6,80(%r13)
+ je .Lctr_enc_done
+ movdqu 96(%r12),%xmm13
+ pxor %xmm13,%xmm1
+ movdqu %xmm1,96(%r13)
+ jmp .Lctr_enc_done
+.align 16
+ leaq 32(%rbp),%rdi
+ leaq 48(%rbp),%rsi
+ leaq (%r15),%rdx
+ call asm_AES_encrypt
+ movdqu (%r12),%xmm0
+ leaq 16(%r12),%r12
+ movl 44(%rbp),%eax
+ bswapl %eax
+ pxor 48(%rbp),%xmm0
+ incl %eax
+ movdqu %xmm0,(%r13)
+ bswapl %eax
+ leaq 16(%r13),%r13
+ movl %eax,44(%rsp)
+ decq %r14
+ jnz .Lctr_enc_short
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lctr_enc_bzero
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+ retq
+.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
+.globl bsaes_xts_encrypt
+.type bsaes_xts_encrypt,@function
+.align 16
+ movq %rsp,%rax
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ leaq (%r9),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r8),%rdx
+ call asm_AES_encrypt
+ movl 240(%r15),%eax
+ movq %r14,%rbx
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor %xmm6,%xmm7
+ movdqa %xmm7,(%rax)
+ andq $-16,%r14
+ subq $128,%rsp
+ movdqa 32(%rbp),%xmm6
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ subq $128,%r14
+ jc .Lxts_enc_short
+ jmp .Lxts_enc_loop
+.align 16
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ movdqa %xmm6,112(%rsp)
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ pxor %xmm14,%xmm6
+ movl %edx,%r10d
+ call _bsaes_encrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor 96(%rsp),%xmm1
+ movdqu %xmm6,80(%r13)
+ pxor 112(%rsp),%xmm4
+ movdqu %xmm1,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+ movdqa 112(%rsp),%xmm6
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ subq $128,%r14
+ jnc .Lxts_enc_loop
+ addq $128,%r14
+ jz .Lxts_enc_done
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ cmpq $16,%r14
+ je .Lxts_enc_1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ cmpq $32,%r14
+ je .Lxts_enc_2
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ cmpq $48,%r14
+ je .Lxts_enc_3
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ cmpq $64,%r14
+ je .Lxts_enc_4
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ cmpq $80,%r14
+ je .Lxts_enc_5
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ cmpq $96,%r14
+ je .Lxts_enc_6
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqa %xmm6,112(%rsp)
+ leaq 112(%r12),%r12
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_encrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor 96(%rsp),%xmm1
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm1,96(%r13)
+ leaq 112(%r13),%r13
+ movdqa 112(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.align 16
+ pxor %xmm11,%xmm3
+ leaq 96(%r12),%r12
+ pxor %xmm12,%xmm4
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_encrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ movdqu %xmm6,80(%r13)
+ leaq 96(%r13),%r13
+ movdqa 96(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.align 16
+ pxor %xmm10,%xmm2
+ leaq 80(%r12),%r12
+ pxor %xmm11,%xmm3
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_encrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ movdqu %xmm2,64(%r13)
+ leaq 80(%r13),%r13
+ movdqa 80(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.align 16
+ pxor %xmm9,%xmm1
+ leaq 64(%r12),%r12
+ pxor %xmm10,%xmm2
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_encrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ movdqu %xmm5,48(%r13)
+ leaq 64(%r13),%r13
+ movdqa 64(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.align 16
+ pxor %xmm8,%xmm0
+ leaq 48(%r12),%r12
+ pxor %xmm9,%xmm1
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_encrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm3,32(%r13)
+ leaq 48(%r13),%r13
+ movdqa 48(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.align 16
+ pxor %xmm7,%xmm15
+ leaq 32(%r12),%r12
+ pxor %xmm8,%xmm0
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_encrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ leaq 32(%r13),%r13
+ movdqa 32(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.align 16
+ pxor %xmm15,%xmm7
+ leaq 16(%r12),%r12
+ movdqa %xmm7,32(%rbp)
+ leaq 32(%rbp),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r15),%rdx
+ call asm_AES_encrypt
+ pxor 32(%rbp),%xmm15
+ movdqu %xmm15,0(%r13)
+ leaq 16(%r13),%r13
+ movdqa 16(%rsp),%xmm6
+ andl $15,%ebx
+ jz .Lxts_enc_ret
+ movq %r13,%rdx
+ movzbl (%r12),%eax
+ movzbl -16(%rdx),%ecx
+ leaq 1(%r12),%r12
+ movb %al,-16(%rdx)
+ movb %cl,0(%rdx)
+ leaq 1(%rdx),%rdx
+ subl $1,%ebx
+ jnz .Lxts_enc_steal
+ movdqu -16(%r13),%xmm15
+ leaq 32(%rbp),%rdi
+ pxor %xmm6,%xmm15
+ leaq 32(%rbp),%rsi
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%rdx
+ call asm_AES_encrypt
+ pxor 32(%rbp),%xmm6
+ movdqu %xmm6,-16(%r13)
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lxts_enc_bzero
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+ retq
+.size bsaes_xts_encrypt,.-bsaes_xts_encrypt
+.globl bsaes_xts_decrypt
+.type bsaes_xts_decrypt,@function
+.align 16
+ movq %rsp,%rax
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ leaq (%r9),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r8),%rdx
+ call asm_AES_encrypt
+ movl 240(%r15),%eax
+ movq %r14,%rbx
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor (%rsp),%xmm7
+ movdqa %xmm6,(%rax)
+ movdqa %xmm7,(%rsp)
+ xorl %eax,%eax
+ andq $-16,%r14
+ testl $15,%ebx
+ setnz %al
+ shlq $4,%rax
+ subq %rax,%r14
+ subq $128,%rsp
+ movdqa 32(%rbp),%xmm6
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ subq $128,%r14
+ jc .Lxts_dec_short
+ jmp .Lxts_dec_loop
+.align 16
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ movdqa %xmm6,112(%rsp)
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ pxor %xmm14,%xmm6
+ movl %edx,%r10d
+ call _bsaes_decrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ pxor 96(%rsp),%xmm2
+ movdqu %xmm6,80(%r13)
+ pxor 112(%rsp),%xmm4
+ movdqu %xmm2,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+ movdqa 112(%rsp),%xmm6
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ subq $128,%r14
+ jnc .Lxts_dec_loop
+ addq $128,%r14
+ jz .Lxts_dec_done
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ cmpq $16,%r14
+ je .Lxts_dec_1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ cmpq $32,%r14
+ je .Lxts_dec_2
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ cmpq $48,%r14
+ je .Lxts_dec_3
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ cmpq $64,%r14
+ je .Lxts_dec_4
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ cmpq $80,%r14
+ je .Lxts_dec_5
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ cmpq $96,%r14
+ je .Lxts_dec_6
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqa %xmm6,112(%rsp)
+ leaq 112(%r12),%r12
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_decrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ pxor 96(%rsp),%xmm2
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ leaq 112(%r13),%r13
+ movdqa 112(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.align 16
+ pxor %xmm11,%xmm3
+ leaq 96(%r12),%r12
+ pxor %xmm12,%xmm4
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_decrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ leaq 96(%r13),%r13
+ movdqa 96(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.align 16
+ pxor %xmm10,%xmm2
+ leaq 80(%r12),%r12
+ pxor %xmm11,%xmm3
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_decrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ leaq 80(%r13),%r13
+ movdqa 80(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.align 16
+ pxor %xmm9,%xmm1
+ leaq 64(%r12),%r12
+ pxor %xmm10,%xmm2
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_decrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ leaq 64(%r13),%r13
+ movdqa 64(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.align 16
+ pxor %xmm8,%xmm0
+ leaq 48(%r12),%r12
+ pxor %xmm9,%xmm1
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_decrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ leaq 48(%r13),%r13
+ movdqa 48(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.align 16
+ pxor %xmm7,%xmm15
+ leaq 32(%r12),%r12
+ pxor %xmm8,%xmm0
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_decrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ leaq 32(%r13),%r13
+ movdqa 32(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.align 16
+ pxor %xmm15,%xmm7
+ leaq 16(%r12),%r12
+ movdqa %xmm7,32(%rbp)
+ leaq 32(%rbp),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r15),%rdx
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm15
+ movdqu %xmm15,0(%r13)
+ leaq 16(%r13),%r13
+ movdqa 16(%rsp),%xmm6
+ andl $15,%ebx
+ jz .Lxts_dec_ret
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ movdqa %xmm6,%xmm5
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ movdqu (%r12),%xmm15
+ pxor %xmm13,%xmm6
+ leaq 32(%rbp),%rdi
+ pxor %xmm6,%xmm15
+ leaq 32(%rbp),%rsi
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%rdx
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm6
+ movq %r13,%rdx
+ movdqu %xmm6,(%r13)
+ movzbl 16(%r12),%eax
+ movzbl (%rdx),%ecx
+ leaq 1(%r12),%r12
+ movb %al,(%rdx)
+ movb %cl,16(%rdx)
+ leaq 1(%rdx),%rdx
+ subl $1,%ebx
+ jnz .Lxts_dec_steal
+ movdqu (%r13),%xmm15
+ leaq 32(%rbp),%rdi
+ pxor %xmm5,%xmm15
+ leaq 32(%rbp),%rsi
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%rdx
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm5
+ movdqu %xmm5,(%r13)
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lxts_dec_bzero
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+ retq
+.size bsaes_xts_decrypt,.-bsaes_xts_decrypt
+.type _bsaes_const,@object
+.align 64
+.quad 0x0a0e0206070b0f03, 0x0004080c0d010509
+.quad 0x01040b0e0205080f, 0x0306090c00070a0d
+.quad 0x0504070602010003, 0x0f0e0d0c080b0a09
+.quad 0x5555555555555555, 0x5555555555555555
+.quad 0x3333333333333333, 0x3333333333333333
+.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+.quad 0x0504070600030201, 0x0f0e0d0c0a09080b
+.quad 0x0304090e00050a0f, 0x01060b0c0207080d
+.quad 0x0a0e02060f03070b, 0x0004080c05090d01
+.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908
+.quad 0x0a0d02060c03070b, 0x0004080f05090e01
+.quad 0x0000000000000000, 0x0000000100000000
+.quad 0x0000000000000000, 0x0000000200000000
+.quad 0x0000000000000000, 0x0000000300000000
+.quad 0x0000000000000000, 0x0000000400000000
+.quad 0x0000000000000000, 0x0000000500000000
+.quad 0x0000000000000000, 0x0000000600000000
+.quad 0x0000000000000000, 0x0000000700000000
+.quad 0x0000000000000000, 0x0000000800000000
+.long 0x87,0,1,0
+.quad 0x0101010101010101, 0x0101010101010101
+.quad 0x0202020202020202, 0x0202020202020202
+.quad 0x0404040404040404, 0x0404040404040404
+.quad 0x0808080808080808, 0x0808080808080808
+.quad 0x02060a0e03070b0f, 0x0004080c0105090d
+.quad 0x6363636363636363, 0x6363636363636363
+.byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44,32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32,65,110,100,121,32,80,111,108,121,97,107,111,118,0
+.align 64
+.size _bsaes_const,.-_bsaes_const
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
diff --git a/crypto/libressl/crypto/aes/bsaes-macosx-x86_64.S b/crypto/libressl/crypto/aes/bsaes-macosx-x86_64.S
new file mode 100644
index 0000000..5f780f0
--- /dev/null
+++ b/crypto/libressl/crypto/aes/bsaes-macosx-x86_64.S
@@ -0,0 +1,2499 @@
+#include "x86_arch.h"
+.p2align 6
+ leaq L$BS0(%rip),%r11
+ movdqa (%rax),%xmm8
+ leaq 16(%rax),%rax
+ movdqa 80(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+.byte 102,15,56,0,247
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $1,%xmm3
+ pxor %xmm6,%xmm5
+ pxor %xmm4,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm6
+ psllq $1,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $1,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm15
+ pxor %xmm1,%xmm2
+ psllq $1,%xmm1
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm4,%xmm9
+ psrlq $2,%xmm4
+ movdqa %xmm3,%xmm10
+ psrlq $2,%xmm3
+ pxor %xmm6,%xmm4
+ pxor %xmm5,%xmm3
+ pand %xmm8,%xmm4
+ pand %xmm8,%xmm3
+ pxor %xmm4,%xmm6
+ psllq $2,%xmm4
+ pxor %xmm3,%xmm5
+ psllq $2,%xmm3
+ pxor %xmm9,%xmm4
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm2,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm2
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm2,%xmm9
+ psrlq $4,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $4,%xmm1
+ pxor %xmm6,%xmm2
+ pxor %xmm5,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $4,%xmm2
+ pxor %xmm1,%xmm5
+ psllq $4,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm4
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ decl %r10d
+ jmp L$enc_sbox
+.p2align 4
+ pxor 0(%rax),%xmm15
+ pxor 16(%rax),%xmm0
+.byte 102,68,15,56,0,255
+ pxor 32(%rax),%xmm1
+.byte 102,15,56,0,199
+ pxor 48(%rax),%xmm2
+.byte 102,15,56,0,207
+ pxor 64(%rax),%xmm3
+.byte 102,15,56,0,215
+ pxor 80(%rax),%xmm4
+.byte 102,15,56,0,223
+ pxor 96(%rax),%xmm5
+.byte 102,15,56,0,231
+ pxor 112(%rax),%xmm6
+.byte 102,15,56,0,239
+ leaq 128(%rax),%rax
+.byte 102,15,56,0,247
+ pxor %xmm5,%xmm4
+ pxor %xmm0,%xmm1
+ pxor %xmm15,%xmm2
+ pxor %xmm1,%xmm5
+ pxor %xmm15,%xmm4
+ pxor %xmm2,%xmm5
+ pxor %xmm6,%xmm2
+ pxor %xmm4,%xmm6
+ pxor %xmm3,%xmm2
+ pxor %xmm4,%xmm3
+ pxor %xmm0,%xmm2
+ pxor %xmm6,%xmm1
+ pxor %xmm4,%xmm0
+ movdqa %xmm6,%xmm10
+ movdqa %xmm0,%xmm9
+ movdqa %xmm4,%xmm8
+ movdqa %xmm1,%xmm12
+ movdqa %xmm5,%xmm11
+ pxor %xmm3,%xmm10
+ pxor %xmm1,%xmm9
+ pxor %xmm2,%xmm8
+ movdqa %xmm10,%xmm13
+ pxor %xmm3,%xmm12
+ movdqa %xmm9,%xmm7
+ pxor %xmm15,%xmm11
+ movdqa %xmm10,%xmm14
+ por %xmm8,%xmm9
+ por %xmm11,%xmm10
+ pxor %xmm7,%xmm14
+ pand %xmm11,%xmm13
+ pxor %xmm8,%xmm11
+ pand %xmm8,%xmm7
+ pand %xmm11,%xmm14
+ movdqa %xmm2,%xmm11
+ pxor %xmm15,%xmm11
+ pand %xmm11,%xmm12
+ pxor %xmm12,%xmm10
+ pxor %xmm12,%xmm9
+ movdqa %xmm6,%xmm12
+ movdqa %xmm4,%xmm11
+ pxor %xmm0,%xmm12
+ pxor %xmm5,%xmm11
+ movdqa %xmm12,%xmm8
+ pand %xmm11,%xmm12
+ por %xmm11,%xmm8
+ pxor %xmm12,%xmm7
+ pxor %xmm14,%xmm10
+ pxor %xmm13,%xmm9
+ pxor %xmm14,%xmm8
+ movdqa %xmm1,%xmm11
+ pxor %xmm13,%xmm7
+ movdqa %xmm3,%xmm12
+ pxor %xmm13,%xmm8
+ movdqa %xmm0,%xmm13
+ pand %xmm2,%xmm11
+ movdqa %xmm6,%xmm14
+ pand %xmm15,%xmm12
+ pand %xmm4,%xmm13
+ por %xmm5,%xmm14
+ pxor %xmm11,%xmm10
+ pxor %xmm12,%xmm9
+ pxor %xmm13,%xmm8
+ pxor %xmm14,%xmm7
+ movdqa %xmm10,%xmm11
+ pand %xmm8,%xmm10
+ pxor %xmm9,%xmm11
+ movdqa %xmm7,%xmm13
+ movdqa %xmm11,%xmm14
+ pxor %xmm10,%xmm13
+ pand %xmm13,%xmm14
+ movdqa %xmm8,%xmm12
+ pxor %xmm9,%xmm14
+ pxor %xmm7,%xmm12
+ pxor %xmm9,%xmm10
+ pand %xmm10,%xmm12
+ movdqa %xmm13,%xmm9
+ pxor %xmm7,%xmm12
+ pxor %xmm12,%xmm9
+ pxor %xmm12,%xmm8
+ pand %xmm7,%xmm9
+ pxor %xmm9,%xmm13
+ pxor %xmm9,%xmm8
+ pand %xmm14,%xmm13
+ pxor %xmm11,%xmm13
+ movdqa %xmm5,%xmm11
+ movdqa %xmm4,%xmm7
+ movdqa %xmm14,%xmm9
+ pxor %xmm13,%xmm9
+ pand %xmm5,%xmm9
+ pxor %xmm4,%xmm5
+ pand %xmm14,%xmm4
+ pand %xmm13,%xmm5
+ pxor %xmm4,%xmm5
+ pxor %xmm9,%xmm4
+ pxor %xmm15,%xmm11
+ pxor %xmm2,%xmm7
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm15,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm2,%xmm15
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm2
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm15
+ pxor %xmm11,%xmm7
+ pxor %xmm2,%xmm15
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm2
+ pxor %xmm11,%xmm5
+ pxor %xmm11,%xmm15
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm2
+ movdqa %xmm6,%xmm11
+ movdqa %xmm0,%xmm7
+ pxor %xmm3,%xmm11
+ pxor %xmm1,%xmm7
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm3,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm1,%xmm3
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm1
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm3
+ pxor %xmm11,%xmm7
+ pxor %xmm1,%xmm3
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm1
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ pxor %xmm13,%xmm10
+ pand %xmm6,%xmm10
+ pxor %xmm0,%xmm6
+ pand %xmm14,%xmm0
+ pand %xmm13,%xmm6
+ pxor %xmm0,%xmm6
+ pxor %xmm10,%xmm0
+ pxor %xmm11,%xmm6
+ pxor %xmm11,%xmm3
+ pxor %xmm7,%xmm0
+ pxor %xmm7,%xmm1
+ pxor %xmm15,%xmm6
+ pxor %xmm5,%xmm0
+ pxor %xmm6,%xmm3
+ pxor %xmm15,%xmm5
+ pxor %xmm0,%xmm15
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ pxor %xmm2,%xmm1
+ pxor %xmm4,%xmm2
+ pxor %xmm4,%xmm3
+ pxor %xmm2,%xmm5
+ decl %r10d
+ jl L$enc_done
+ pshufd $147,%xmm15,%xmm7
+ pshufd $147,%xmm0,%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $147,%xmm3,%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $147,%xmm5,%xmm10
+ pxor %xmm9,%xmm3
+ pshufd $147,%xmm2,%xmm11
+ pxor %xmm10,%xmm5
+ pshufd $147,%xmm6,%xmm12
+ pxor %xmm11,%xmm2
+ pshufd $147,%xmm1,%xmm13
+ pxor %xmm12,%xmm6
+ pshufd $147,%xmm4,%xmm14
+ pxor %xmm13,%xmm1
+ pxor %xmm14,%xmm4
+ pxor %xmm15,%xmm8
+ pxor %xmm4,%xmm7
+ pxor %xmm4,%xmm8
+ pshufd $78,%xmm15,%xmm15
+ pxor %xmm0,%xmm9
+ pshufd $78,%xmm0,%xmm0
+ pxor %xmm2,%xmm12
+ pxor %xmm7,%xmm15
+ pxor %xmm6,%xmm13
+ pxor %xmm8,%xmm0
+ pxor %xmm5,%xmm11
+ pshufd $78,%xmm2,%xmm7
+ pxor %xmm1,%xmm14
+ pshufd $78,%xmm6,%xmm8
+ pxor %xmm3,%xmm10
+ pshufd $78,%xmm5,%xmm2
+ pxor %xmm4,%xmm10
+ pshufd $78,%xmm4,%xmm6
+ pxor %xmm4,%xmm11
+ pshufd $78,%xmm1,%xmm5
+ pxor %xmm11,%xmm7
+ pshufd $78,%xmm3,%xmm1
+ pxor %xmm12,%xmm8
+ pxor %xmm10,%xmm2
+ pxor %xmm14,%xmm6
+ pxor %xmm13,%xmm5
+ movdqa %xmm7,%xmm3
+ pxor %xmm9,%xmm1
+ movdqa %xmm8,%xmm4
+ movdqa 48(%r11),%xmm7
+ jnz L$enc_loop
+ movdqa 64(%r11),%xmm7
+ jmp L$enc_loop
+.p2align 4
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm2,%xmm10
+ psrlq $1,%xmm2
+ pxor %xmm4,%xmm1
+ pxor %xmm6,%xmm2
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm2
+ pxor %xmm1,%xmm4
+ psllq $1,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $1,%xmm2
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm2
+ movdqa %xmm3,%xmm9
+ psrlq $1,%xmm3
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm5,%xmm3
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm3
+ pand %xmm7,%xmm15
+ pxor %xmm3,%xmm5
+ psllq $1,%xmm3
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm3
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm6,%xmm9
+ psrlq $2,%xmm6
+ movdqa %xmm2,%xmm10
+ psrlq $2,%xmm2
+ pxor %xmm4,%xmm6
+ pxor %xmm1,%xmm2
+ pand %xmm8,%xmm6
+ pand %xmm8,%xmm2
+ pxor %xmm6,%xmm4
+ psllq $2,%xmm6
+ pxor %xmm2,%xmm1
+ psllq $2,%xmm2
+ pxor %xmm9,%xmm6
+ pxor %xmm10,%xmm2
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm5,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm5
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm5,%xmm9
+ psrlq $4,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $4,%xmm3
+ pxor %xmm4,%xmm5
+ pxor %xmm1,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm4
+ psllq $4,%xmm5
+ pxor %xmm3,%xmm1
+ psllq $4,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm6,%xmm0
+ pxor %xmm2,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm6
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm2
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa (%rax),%xmm7
+ pxor %xmm7,%xmm3
+ pxor %xmm7,%xmm5
+ pxor %xmm7,%xmm2
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm1
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm15
+ pxor %xmm7,%xmm0
+ retq
+.p2align 6
+ leaq L$BS0(%rip),%r11
+ movdqa (%rax),%xmm8
+ leaq 16(%rax),%rax
+ movdqa -48(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+.byte 102,15,56,0,247
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $1,%xmm3
+ pxor %xmm6,%xmm5
+ pxor %xmm4,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm6
+ psllq $1,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $1,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm15
+ pxor %xmm1,%xmm2
+ psllq $1,%xmm1
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm4,%xmm9
+ psrlq $2,%xmm4
+ movdqa %xmm3,%xmm10
+ psrlq $2,%xmm3
+ pxor %xmm6,%xmm4
+ pxor %xmm5,%xmm3
+ pand %xmm8,%xmm4
+ pand %xmm8,%xmm3
+ pxor %xmm4,%xmm6
+ psllq $2,%xmm4
+ pxor %xmm3,%xmm5
+ psllq $2,%xmm3
+ pxor %xmm9,%xmm4
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm2,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm2
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm2,%xmm9
+ psrlq $4,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $4,%xmm1
+ pxor %xmm6,%xmm2
+ pxor %xmm5,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $4,%xmm2
+ pxor %xmm1,%xmm5
+ psllq $4,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm4
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ decl %r10d
+ jmp L$dec_sbox
+.p2align 4
+ pxor 0(%rax),%xmm15
+ pxor 16(%rax),%xmm0
+.byte 102,68,15,56,0,255
+ pxor 32(%rax),%xmm1
+.byte 102,15,56,0,199
+ pxor 48(%rax),%xmm2
+.byte 102,15,56,0,207
+ pxor 64(%rax),%xmm3
+.byte 102,15,56,0,215
+ pxor 80(%rax),%xmm4
+.byte 102,15,56,0,223
+ pxor 96(%rax),%xmm5
+.byte 102,15,56,0,231
+ pxor 112(%rax),%xmm6
+.byte 102,15,56,0,239
+ leaq 128(%rax),%rax
+.byte 102,15,56,0,247
+ pxor %xmm3,%xmm2
+ pxor %xmm6,%xmm3
+ pxor %xmm6,%xmm1
+ pxor %xmm3,%xmm5
+ pxor %xmm5,%xmm6
+ pxor %xmm6,%xmm0
+ pxor %xmm0,%xmm15
+ pxor %xmm4,%xmm1
+ pxor %xmm15,%xmm2
+ pxor %xmm15,%xmm4
+ pxor %xmm2,%xmm0
+ movdqa %xmm2,%xmm10
+ movdqa %xmm6,%xmm9
+ movdqa %xmm0,%xmm8
+ movdqa %xmm3,%xmm12
+ movdqa %xmm4,%xmm11
+ pxor %xmm15,%xmm10
+ pxor %xmm3,%xmm9
+ pxor %xmm5,%xmm8
+ movdqa %xmm10,%xmm13
+ pxor %xmm15,%xmm12
+ movdqa %xmm9,%xmm7
+ pxor %xmm1,%xmm11
+ movdqa %xmm10,%xmm14
+ por %xmm8,%xmm9
+ por %xmm11,%xmm10
+ pxor %xmm7,%xmm14
+ pand %xmm11,%xmm13
+ pxor %xmm8,%xmm11
+ pand %xmm8,%xmm7
+ pand %xmm11,%xmm14
+ movdqa %xmm5,%xmm11
+ pxor %xmm1,%xmm11
+ pand %xmm11,%xmm12
+ pxor %xmm12,%xmm10
+ pxor %xmm12,%xmm9
+ movdqa %xmm2,%xmm12
+ movdqa %xmm0,%xmm11
+ pxor %xmm6,%xmm12
+ pxor %xmm4,%xmm11
+ movdqa %xmm12,%xmm8
+ pand %xmm11,%xmm12
+ por %xmm11,%xmm8
+ pxor %xmm12,%xmm7
+ pxor %xmm14,%xmm10
+ pxor %xmm13,%xmm9
+ pxor %xmm14,%xmm8
+ movdqa %xmm3,%xmm11
+ pxor %xmm13,%xmm7
+ movdqa %xmm15,%xmm12
+ pxor %xmm13,%xmm8
+ movdqa %xmm6,%xmm13
+ pand %xmm5,%xmm11
+ movdqa %xmm2,%xmm14
+ pand %xmm1,%xmm12
+ pand %xmm0,%xmm13
+ por %xmm4,%xmm14
+ pxor %xmm11,%xmm10
+ pxor %xmm12,%xmm9
+ pxor %xmm13,%xmm8
+ pxor %xmm14,%xmm7
+ movdqa %xmm10,%xmm11
+ pand %xmm8,%xmm10
+ pxor %xmm9,%xmm11
+ movdqa %xmm7,%xmm13
+ movdqa %xmm11,%xmm14
+ pxor %xmm10,%xmm13
+ pand %xmm13,%xmm14
+ movdqa %xmm8,%xmm12
+ pxor %xmm9,%xmm14
+ pxor %xmm7,%xmm12
+ pxor %xmm9,%xmm10
+ pand %xmm10,%xmm12
+ movdqa %xmm13,%xmm9
+ pxor %xmm7,%xmm12
+ pxor %xmm12,%xmm9
+ pxor %xmm12,%xmm8
+ pand %xmm7,%xmm9
+ pxor %xmm9,%xmm13
+ pxor %xmm9,%xmm8
+ pand %xmm14,%xmm13
+ pxor %xmm11,%xmm13
+ movdqa %xmm4,%xmm11
+ movdqa %xmm0,%xmm7
+ movdqa %xmm14,%xmm9
+ pxor %xmm13,%xmm9
+ pand %xmm4,%xmm9
+ pxor %xmm0,%xmm4
+ pand %xmm14,%xmm0
+ pand %xmm13,%xmm4
+ pxor %xmm0,%xmm4
+ pxor %xmm9,%xmm0
+ pxor %xmm1,%xmm11
+ pxor %xmm5,%xmm7
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm1,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm5,%xmm1
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm5
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm1
+ pxor %xmm11,%xmm7
+ pxor %xmm5,%xmm1
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm5
+ pxor %xmm11,%xmm4
+ pxor %xmm11,%xmm1
+ pxor %xmm7,%xmm0
+ pxor %xmm7,%xmm5
+ movdqa %xmm2,%xmm11
+ movdqa %xmm6,%xmm7
+ pxor %xmm15,%xmm11
+ pxor %xmm3,%xmm7
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm15,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm3,%xmm15
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm3
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm15
+ pxor %xmm11,%xmm7
+ pxor %xmm3,%xmm15
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm3
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ pxor %xmm13,%xmm10
+ pand %xmm2,%xmm10
+ pxor %xmm6,%xmm2
+ pand %xmm14,%xmm6
+ pand %xmm13,%xmm2
+ pxor %xmm6,%xmm2
+ pxor %xmm10,%xmm6
+ pxor %xmm11,%xmm2
+ pxor %xmm11,%xmm15
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm3
+ pxor %xmm6,%xmm0
+ pxor %xmm4,%xmm5
+ pxor %xmm0,%xmm3
+ pxor %xmm6,%xmm1
+ pxor %xmm6,%xmm4
+ pxor %xmm1,%xmm3
+ pxor %xmm15,%xmm6
+ pxor %xmm4,%xmm3
+ pxor %xmm5,%xmm2
+ pxor %xmm0,%xmm5
+ pxor %xmm3,%xmm2
+ pxor %xmm15,%xmm3
+ pxor %xmm2,%xmm6
+ decl %r10d
+ jl L$dec_done
+ pshufd $78,%xmm15,%xmm7
+ pshufd $78,%xmm2,%xmm13
+ pxor %xmm15,%xmm7
+ pshufd $78,%xmm4,%xmm14
+ pxor %xmm2,%xmm13
+ pshufd $78,%xmm0,%xmm8
+ pxor %xmm4,%xmm14
+ pshufd $78,%xmm5,%xmm9
+ pxor %xmm0,%xmm8
+ pshufd $78,%xmm3,%xmm10
+ pxor %xmm5,%xmm9
+ pxor %xmm13,%xmm15
+ pxor %xmm13,%xmm0
+ pshufd $78,%xmm1,%xmm11
+ pxor %xmm3,%xmm10
+ pxor %xmm7,%xmm5
+ pxor %xmm8,%xmm3
+ pshufd $78,%xmm6,%xmm12
+ pxor %xmm1,%xmm11
+ pxor %xmm14,%xmm0
+ pxor %xmm9,%xmm1
+ pxor %xmm6,%xmm12
+ pxor %xmm14,%xmm5
+ pxor %xmm13,%xmm3
+ pxor %xmm13,%xmm1
+ pxor %xmm10,%xmm6
+ pxor %xmm11,%xmm2
+ pxor %xmm14,%xmm1
+ pxor %xmm14,%xmm6
+ pxor %xmm12,%xmm4
+ pshufd $147,%xmm15,%xmm7
+ pshufd $147,%xmm0,%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $147,%xmm5,%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $147,%xmm3,%xmm10
+ pxor %xmm9,%xmm5
+ pshufd $147,%xmm1,%xmm11
+ pxor %xmm10,%xmm3
+ pshufd $147,%xmm6,%xmm12
+ pxor %xmm11,%xmm1
+ pshufd $147,%xmm2,%xmm13
+ pxor %xmm12,%xmm6
+ pshufd $147,%xmm4,%xmm14
+ pxor %xmm13,%xmm2
+ pxor %xmm14,%xmm4
+ pxor %xmm15,%xmm8
+ pxor %xmm4,%xmm7
+ pxor %xmm4,%xmm8
+ pshufd $78,%xmm15,%xmm15
+ pxor %xmm0,%xmm9
+ pshufd $78,%xmm0,%xmm0
+ pxor %xmm1,%xmm12
+ pxor %xmm7,%xmm15
+ pxor %xmm6,%xmm13
+ pxor %xmm8,%xmm0
+ pxor %xmm3,%xmm11
+ pshufd $78,%xmm1,%xmm7
+ pxor %xmm2,%xmm14
+ pshufd $78,%xmm6,%xmm8
+ pxor %xmm5,%xmm10
+ pshufd $78,%xmm3,%xmm1
+ pxor %xmm4,%xmm10
+ pshufd $78,%xmm4,%xmm6
+ pxor %xmm4,%xmm11
+ pshufd $78,%xmm2,%xmm3
+ pxor %xmm11,%xmm7
+ pshufd $78,%xmm5,%xmm2
+ pxor %xmm12,%xmm8
+ pxor %xmm1,%xmm10
+ pxor %xmm14,%xmm6
+ pxor %xmm3,%xmm13
+ movdqa %xmm7,%xmm3
+ pxor %xmm9,%xmm2
+ movdqa %xmm13,%xmm5
+ movdqa %xmm8,%xmm4
+ movdqa %xmm2,%xmm1
+ movdqa %xmm10,%xmm2
+ movdqa -16(%r11),%xmm7
+ jnz L$dec_loop
+ movdqa -32(%r11),%xmm7
+ jmp L$dec_loop
+.p2align 4
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm2,%xmm9
+ psrlq $1,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $1,%xmm1
+ pxor %xmm4,%xmm2
+ pxor %xmm6,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm4
+ psllq $1,%xmm2
+ pxor %xmm1,%xmm6
+ psllq $1,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm3,%xmm5
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm15
+ pxor %xmm5,%xmm3
+ psllq $1,%xmm5
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm6,%xmm9
+ psrlq $2,%xmm6
+ movdqa %xmm1,%xmm10
+ psrlq $2,%xmm1
+ pxor %xmm4,%xmm6
+ pxor %xmm2,%xmm1
+ pand %xmm8,%xmm6
+ pand %xmm8,%xmm1
+ pxor %xmm6,%xmm4
+ psllq $2,%xmm6
+ pxor %xmm1,%xmm2
+ psllq $2,%xmm1
+ pxor %xmm9,%xmm6
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm3,%xmm0
+ pxor %xmm5,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm3
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm5
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm3,%xmm9
+ psrlq $4,%xmm3
+ movdqa %xmm5,%xmm10
+ psrlq $4,%xmm5
+ pxor %xmm4,%xmm3
+ pxor %xmm2,%xmm5
+ pand %xmm7,%xmm3
+ pand %xmm7,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $4,%xmm3
+ pxor %xmm5,%xmm2
+ psllq $4,%xmm5
+ pxor %xmm9,%xmm3
+ pxor %xmm10,%xmm5
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm6,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm6
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa (%rax),%xmm7
+ pxor %xmm7,%xmm5
+ pxor %xmm7,%xmm3
+ pxor %xmm7,%xmm1
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm2
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm15
+ pxor %xmm7,%xmm0
+ retq
+.p2align 4
+ leaq L$masks(%rip),%r11
+ movdqu (%rcx),%xmm7
+ leaq 16(%rcx),%rcx
+ movdqa 0(%r11),%xmm0
+ movdqa 16(%r11),%xmm1
+ movdqa 32(%r11),%xmm2
+ movdqa 48(%r11),%xmm3
+ movdqa 64(%r11),%xmm4
+ pcmpeqd %xmm5,%xmm5
+ movdqu (%rcx),%xmm6
+ movdqa %xmm7,(%rax)
+ leaq 16(%rax),%rax
+ decl %r10d
+ jmp L$key_loop
+.p2align 4
+.byte 102,15,56,0,244
+ movdqa %xmm0,%xmm8
+ movdqa %xmm1,%xmm9
+ pand %xmm6,%xmm8
+ pand %xmm6,%xmm9
+ movdqa %xmm2,%xmm10
+ pcmpeqb %xmm0,%xmm8
+ psllq $4,%xmm0
+ movdqa %xmm3,%xmm11
+ pcmpeqb %xmm1,%xmm9
+ psllq $4,%xmm1
+ pand %xmm6,%xmm10
+ pand %xmm6,%xmm11
+ movdqa %xmm0,%xmm12
+ pcmpeqb %xmm2,%xmm10
+ psllq $4,%xmm2
+ movdqa %xmm1,%xmm13
+ pcmpeqb %xmm3,%xmm11
+ psllq $4,%xmm3
+ movdqa %xmm2,%xmm14
+ movdqa %xmm3,%xmm15
+ pxor %xmm5,%xmm8
+ pxor %xmm5,%xmm9
+ pand %xmm6,%xmm12
+ pand %xmm6,%xmm13
+ movdqa %xmm8,0(%rax)
+ pcmpeqb %xmm0,%xmm12
+ psrlq $4,%xmm0
+ movdqa %xmm9,16(%rax)
+ pcmpeqb %xmm1,%xmm13
+ psrlq $4,%xmm1
+ leaq 16(%rcx),%rcx
+ pand %xmm6,%xmm14
+ pand %xmm6,%xmm15
+ movdqa %xmm10,32(%rax)
+ pcmpeqb %xmm2,%xmm14
+ psrlq $4,%xmm2
+ movdqa %xmm11,48(%rax)
+ pcmpeqb %xmm3,%xmm15
+ psrlq $4,%xmm3
+ movdqu (%rcx),%xmm6
+ pxor %xmm5,%xmm13
+ pxor %xmm5,%xmm14
+ movdqa %xmm12,64(%rax)
+ movdqa %xmm13,80(%rax)
+ movdqa %xmm14,96(%rax)
+ movdqa %xmm15,112(%rax)
+ leaq 128(%rax),%rax
+ decl %r10d
+ jnz L$key_loop
+ movdqa 80(%r11),%xmm7
+ retq
+.globl _bsaes_cbc_encrypt
+.p2align 4
+ cmpl $0,%r9d
+ jne _asm_AES_cbc_encrypt
+ cmpq $128,%rdx
+ jb _asm_AES_cbc_encrypt
+ movq %rsp,%rax
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movl 240(%rcx),%eax
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ movq %r8,%rbx
+ shrq $4,%r14
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor (%rsp),%xmm7
+ movdqa %xmm6,(%rax)
+ movdqa %xmm7,(%rsp)
+ movdqu (%rbx),%xmm14
+ subq $8,%r14
+ movdqu 0(%r12),%xmm15
+ movdqu 16(%r12),%xmm0
+ movdqu 32(%r12),%xmm1
+ movdqu 48(%r12),%xmm2
+ movdqu 64(%r12),%xmm3
+ movdqu 80(%r12),%xmm4
+ movq %rsp,%rax
+ movdqu 96(%r12),%xmm5
+ movl %edx,%r10d
+ movdqu 112(%r12),%xmm6
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm6
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm2
+ movdqu 112(%r12),%xmm14
+ pxor %xmm13,%xmm4
+ movdqu %xmm15,0(%r13)
+ leaq 128(%r12),%r12
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+ subq $8,%r14
+ jnc L$cbc_dec_loop
+ addq $8,%r14
+ jz L$cbc_dec_done
+ movdqu 0(%r12),%xmm15
+ movq %rsp,%rax
+ movl %edx,%r10d
+ cmpq $2,%r14
+ jb L$cbc_dec_one
+ movdqu 16(%r12),%xmm0
+ je L$cbc_dec_two
+ movdqu 32(%r12),%xmm1
+ cmpq $4,%r14
+ jb L$cbc_dec_three
+ movdqu 48(%r12),%xmm2
+ je L$cbc_dec_four
+ movdqu 64(%r12),%xmm3
+ cmpq $6,%r14
+ jb L$cbc_dec_five
+ movdqu 80(%r12),%xmm4
+ je L$cbc_dec_six
+ movdqu 96(%r12),%xmm5
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm6
+ movdqu 96(%r12),%xmm14
+ pxor %xmm12,%xmm2
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ jmp L$cbc_dec_done
+.p2align 4
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm14
+ pxor %xmm11,%xmm6
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ jmp L$cbc_dec_done
+.p2align 4
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm14
+ pxor %xmm10,%xmm1
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ jmp L$cbc_dec_done
+.p2align 4
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm14
+ pxor %xmm9,%xmm3
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ jmp L$cbc_dec_done
+.p2align 4
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm14
+ pxor %xmm8,%xmm5
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ jmp L$cbc_dec_done
+.p2align 4
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm14
+ pxor %xmm7,%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ jmp L$cbc_dec_done
+.p2align 4
+ leaq (%r12),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r15),%rdx
+ call _asm_AES_decrypt
+ pxor 32(%rbp),%xmm14
+ movdqu %xmm14,(%r13)
+ movdqa %xmm15,%xmm14
+ movdqu %xmm14,(%rbx)
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja L$cbc_dec_bzero
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+ retq
+.globl _bsaes_ctr32_encrypt_blocks
+.p2align 4
+ movq %rsp,%rax
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movdqu (%r8),%xmm0
+ movl 240(%rcx),%eax
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ movdqa %xmm0,32(%rbp)
+ cmpq $8,%rdx
+ jb L$ctr_enc_short
+ movl %eax,%ebx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %ebx,%r10d
+ call _bsaes_key_convert
+ pxor %xmm6,%xmm7
+ movdqa %xmm7,(%rax)
+ movdqa (%rsp),%xmm8
+ leaq L$ADD1(%rip),%r11
+ movdqa 32(%rbp),%xmm15
+ movdqa -32(%r11),%xmm7
+.byte 102,68,15,56,0,199
+.byte 102,68,15,56,0,255
+ movdqa %xmm8,(%rsp)
+ jmp L$ctr_enc_loop
+.p2align 4
+ movdqa %xmm15,32(%rbp)
+ movdqa %xmm15,%xmm0
+ movdqa %xmm15,%xmm1
+ paddd 0(%r11),%xmm0
+ movdqa %xmm15,%xmm2
+ paddd 16(%r11),%xmm1
+ movdqa %xmm15,%xmm3
+ paddd 32(%r11),%xmm2
+ movdqa %xmm15,%xmm4
+ paddd 48(%r11),%xmm3
+ movdqa %xmm15,%xmm5
+ paddd 64(%r11),%xmm4
+ movdqa %xmm15,%xmm6
+ paddd 80(%r11),%xmm5
+ paddd 96(%r11),%xmm6
+ movdqa (%rsp),%xmm8
+ leaq 16(%rsp),%rax
+ movdqa -16(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+ leaq L$BS0(%rip),%r11
+.byte 102,15,56,0,247
+ movl %ebx,%r10d
+ call _bsaes_encrypt8_bitslice
+ subq $8,%r14
+ jc L$ctr_enc_loop_done
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ movdqu 32(%r12),%xmm9
+ movdqu 48(%r12),%xmm10
+ movdqu 64(%r12),%xmm11
+ movdqu 80(%r12),%xmm12
+ movdqu 96(%r12),%xmm13
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ pxor %xmm15,%xmm7
+ movdqa 32(%rbp),%xmm15
+ pxor %xmm8,%xmm0
+ movdqu %xmm7,0(%r13)
+ pxor %xmm9,%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor %xmm10,%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor %xmm11,%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor %xmm12,%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor %xmm13,%xmm1
+ movdqu %xmm6,80(%r13)
+ pxor %xmm14,%xmm4
+ movdqu %xmm1,96(%r13)
+ leaq L$ADD1(%rip),%r11
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+ paddd 112(%r11),%xmm15
+ jnz L$ctr_enc_loop
+ jmp L$ctr_enc_done
+.p2align 4
+ addq $8,%r14
+ movdqu 0(%r12),%xmm7
+ pxor %xmm7,%xmm15
+ movdqu %xmm15,0(%r13)
+ cmpq $2,%r14
+ jb L$ctr_enc_done
+ movdqu 16(%r12),%xmm8
+ pxor %xmm8,%xmm0
+ movdqu %xmm0,16(%r13)
+ je L$ctr_enc_done
+ movdqu 32(%r12),%xmm9
+ pxor %xmm9,%xmm3
+ movdqu %xmm3,32(%r13)
+ cmpq $4,%r14
+ jb L$ctr_enc_done
+ movdqu 48(%r12),%xmm10
+ pxor %xmm10,%xmm5
+ movdqu %xmm5,48(%r13)
+ je L$ctr_enc_done
+ movdqu 64(%r12),%xmm11
+ pxor %xmm11,%xmm2
+ movdqu %xmm2,64(%r13)
+ cmpq $6,%r14
+ jb L$ctr_enc_done
+ movdqu 80(%r12),%xmm12
+ pxor %xmm12,%xmm6
+ movdqu %xmm6,80(%r13)
+ je L$ctr_enc_done
+ movdqu 96(%r12),%xmm13
+ pxor %xmm13,%xmm1
+ movdqu %xmm1,96(%r13)
+ jmp L$ctr_enc_done
+.p2align 4
+ leaq 32(%rbp),%rdi
+ leaq 48(%rbp),%rsi
+ leaq (%r15),%rdx
+ call _asm_AES_encrypt
+ movdqu (%r12),%xmm0
+ leaq 16(%r12),%r12
+ movl 44(%rbp),%eax
+ bswapl %eax
+ pxor 48(%rbp),%xmm0
+ incl %eax
+ movdqu %xmm0,(%r13)
+ bswapl %eax
+ leaq 16(%r13),%r13
+ movl %eax,44(%rsp)
+ decq %r14
+ jnz L$ctr_enc_short
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja L$ctr_enc_bzero
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+ retq
+.globl _bsaes_xts_encrypt
+.p2align 4
+ movq %rsp,%rax
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ leaq (%r9),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r8),%rdx
+ call _asm_AES_encrypt
+ movl 240(%r15),%eax
+ movq %r14,%rbx
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor %xmm6,%xmm7
+ movdqa %xmm7,(%rax)
+ andq $-16,%r14
+ subq $128,%rsp
+ movdqa 32(%rbp),%xmm6
+ pxor %xmm14,%xmm14
+ movdqa L$xts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ subq $128,%r14
+ jc L$xts_enc_short
+ jmp L$xts_enc_loop
+.p2align 4
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ movdqa %xmm6,112(%rsp)
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ pxor %xmm14,%xmm6
+ movl %edx,%r10d
+ call _bsaes_encrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor 96(%rsp),%xmm1
+ movdqu %xmm6,80(%r13)
+ pxor 112(%rsp),%xmm4
+ movdqu %xmm1,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+ movdqa 112(%rsp),%xmm6
+ pxor %xmm14,%xmm14
+ movdqa L$xts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ subq $128,%r14
+ jnc L$xts_enc_loop
+ addq $128,%r14
+ jz L$xts_enc_done
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ cmpq $16,%r14
+ je L$xts_enc_1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ cmpq $32,%r14
+ je L$xts_enc_2
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ cmpq $48,%r14
+ je L$xts_enc_3
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ cmpq $64,%r14
+ je L$xts_enc_4
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ cmpq $80,%r14
+ je L$xts_enc_5
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ cmpq $96,%r14
+ je L$xts_enc_6
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqa %xmm6,112(%rsp)
+ leaq 112(%r12),%r12
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_encrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor 96(%rsp),%xmm1
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm1,96(%r13)
+ leaq 112(%r13),%r13
+ movdqa 112(%rsp),%xmm6
+ jmp L$xts_enc_done
+.p2align 4
+ pxor %xmm11,%xmm3
+ leaq 96(%r12),%r12
+ pxor %xmm12,%xmm4
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_encrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ movdqu %xmm6,80(%r13)
+ leaq 96(%r13),%r13
+ movdqa 96(%rsp),%xmm6
+ jmp L$xts_enc_done
+.p2align 4
+ pxor %xmm10,%xmm2
+ leaq 80(%r12),%r12
+ pxor %xmm11,%xmm3
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_encrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ movdqu %xmm2,64(%r13)
+ leaq 80(%r13),%r13
+ movdqa 80(%rsp),%xmm6
+ jmp L$xts_enc_done
+.p2align 4
+ pxor %xmm9,%xmm1
+ leaq 64(%r12),%r12
+ pxor %xmm10,%xmm2
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_encrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ movdqu %xmm5,48(%r13)
+ leaq 64(%r13),%r13
+ movdqa 64(%rsp),%xmm6
+ jmp L$xts_enc_done
+.p2align 4
+ pxor %xmm8,%xmm0
+ leaq 48(%r12),%r12
+ pxor %xmm9,%xmm1
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_encrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm3,32(%r13)
+ leaq 48(%r13),%r13
+ movdqa 48(%rsp),%xmm6
+ jmp L$xts_enc_done
+.p2align 4
+ pxor %xmm7,%xmm15
+ leaq 32(%r12),%r12
+ pxor %xmm8,%xmm0
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_encrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ leaq 32(%r13),%r13
+ movdqa 32(%rsp),%xmm6
+ jmp L$xts_enc_done
+.p2align 4
+ pxor %xmm15,%xmm7
+ leaq 16(%r12),%r12
+ movdqa %xmm7,32(%rbp)
+ leaq 32(%rbp),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r15),%rdx
+ call _asm_AES_encrypt
+ pxor 32(%rbp),%xmm15
+ movdqu %xmm15,0(%r13)
+ leaq 16(%r13),%r13
+ movdqa 16(%rsp),%xmm6
+ andl $15,%ebx
+ jz L$xts_enc_ret
+ movq %r13,%rdx
+ movzbl (%r12),%eax
+ movzbl -16(%rdx),%ecx
+ leaq 1(%r12),%r12
+ movb %al,-16(%rdx)
+ movb %cl,0(%rdx)
+ leaq 1(%rdx),%rdx
+ subl $1,%ebx
+ jnz L$xts_enc_steal
+ movdqu -16(%r13),%xmm15
+ leaq 32(%rbp),%rdi
+ pxor %xmm6,%xmm15
+ leaq 32(%rbp),%rsi
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%rdx
+ call _asm_AES_encrypt
+ pxor 32(%rbp),%xmm6
+ movdqu %xmm6,-16(%r13)
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja L$xts_enc_bzero
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+ retq
+.globl _bsaes_xts_decrypt
+.p2align 4
+ movq %rsp,%rax
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ leaq (%r9),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r8),%rdx
+ call _asm_AES_encrypt
+ movl 240(%r15),%eax
+ movq %r14,%rbx
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor (%rsp),%xmm7
+ movdqa %xmm6,(%rax)
+ movdqa %xmm7,(%rsp)
+ xorl %eax,%eax
+ andq $-16,%r14
+ testl $15,%ebx
+ setnz %al
+ shlq $4,%rax
+ subq %rax,%r14
+ subq $128,%rsp
+ movdqa 32(%rbp),%xmm6
+ pxor %xmm14,%xmm14
+ movdqa L$xts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ subq $128,%r14
+ jc L$xts_dec_short
+ jmp L$xts_dec_loop
+.p2align 4
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ movdqa %xmm6,112(%rsp)
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ pxor %xmm14,%xmm6
+ movl %edx,%r10d
+ call _bsaes_decrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ pxor 96(%rsp),%xmm2
+ movdqu %xmm6,80(%r13)
+ pxor 112(%rsp),%xmm4
+ movdqu %xmm2,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+ movdqa 112(%rsp),%xmm6
+ pxor %xmm14,%xmm14
+ movdqa L$xts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ subq $128,%r14
+ jnc L$xts_dec_loop
+ addq $128,%r14
+ jz L$xts_dec_done
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ cmpq $16,%r14
+ je L$xts_dec_1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ cmpq $32,%r14
+ je L$xts_dec_2
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ cmpq $48,%r14
+ je L$xts_dec_3
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ cmpq $64,%r14
+ je L$xts_dec_4
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ cmpq $80,%r14
+ je L$xts_dec_5
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ cmpq $96,%r14
+ je L$xts_dec_6
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqa %xmm6,112(%rsp)
+ leaq 112(%r12),%r12
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_decrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ pxor 96(%rsp),%xmm2
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ leaq 112(%r13),%r13
+ movdqa 112(%rsp),%xmm6
+ jmp L$xts_dec_done
+.p2align 4
+ pxor %xmm11,%xmm3
+ leaq 96(%r12),%r12
+ pxor %xmm12,%xmm4
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_decrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ leaq 96(%r13),%r13
+ movdqa 96(%rsp),%xmm6
+ jmp L$xts_dec_done
+.p2align 4
+ pxor %xmm10,%xmm2
+ leaq 80(%r12),%r12
+ pxor %xmm11,%xmm3
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_decrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ leaq 80(%r13),%r13
+ movdqa 80(%rsp),%xmm6
+ jmp L$xts_dec_done
+.p2align 4
+ pxor %xmm9,%xmm1
+ leaq 64(%r12),%r12
+ pxor %xmm10,%xmm2
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_decrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ leaq 64(%r13),%r13
+ movdqa 64(%rsp),%xmm6
+ jmp L$xts_dec_done
+.p2align 4
+ pxor %xmm8,%xmm0
+ leaq 48(%r12),%r12
+ pxor %xmm9,%xmm1
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_decrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ leaq 48(%r13),%r13
+ movdqa 48(%rsp),%xmm6
+ jmp L$xts_dec_done
+.p2align 4
+ pxor %xmm7,%xmm15
+ leaq 32(%r12),%r12
+ pxor %xmm8,%xmm0
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_decrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ leaq 32(%r13),%r13
+ movdqa 32(%rsp),%xmm6
+ jmp L$xts_dec_done
+.p2align 4
+ pxor %xmm15,%xmm7
+ leaq 16(%r12),%r12
+ movdqa %xmm7,32(%rbp)
+ leaq 32(%rbp),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r15),%rdx
+ call _asm_AES_decrypt
+ pxor 32(%rbp),%xmm15
+ movdqu %xmm15,0(%r13)
+ leaq 16(%r13),%r13
+ movdqa 16(%rsp),%xmm6
+ andl $15,%ebx
+ jz L$xts_dec_ret
+ pxor %xmm14,%xmm14
+ movdqa L$xts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ movdqa %xmm6,%xmm5
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ movdqu (%r12),%xmm15
+ pxor %xmm13,%xmm6
+ leaq 32(%rbp),%rdi
+ pxor %xmm6,%xmm15
+ leaq 32(%rbp),%rsi
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%rdx
+ call _asm_AES_decrypt
+ pxor 32(%rbp),%xmm6
+ movq %r13,%rdx
+ movdqu %xmm6,(%r13)
+ movzbl 16(%r12),%eax
+ movzbl (%rdx),%ecx
+ leaq 1(%r12),%r12
+ movb %al,(%rdx)
+ movb %cl,16(%rdx)
+ leaq 1(%rdx),%rdx
+ subl $1,%ebx
+ jnz L$xts_dec_steal
+ movdqu (%r13),%xmm15
+ leaq 32(%rbp),%rdi
+ pxor %xmm5,%xmm15
+ leaq 32(%rbp),%rsi
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%rdx
+ call _asm_AES_decrypt
+ pxor 32(%rbp),%xmm5
+ movdqu %xmm5,(%r13)
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja L$xts_dec_bzero
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+ retq
+.p2align 6
+.quad 0x0a0e0206070b0f03, 0x0004080c0d010509
+.quad 0x01040b0e0205080f, 0x0306090c00070a0d
+.quad 0x0504070602010003, 0x0f0e0d0c080b0a09
+.quad 0x5555555555555555, 0x5555555555555555
+.quad 0x3333333333333333, 0x3333333333333333
+.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+.quad 0x0504070600030201, 0x0f0e0d0c0a09080b
+.quad 0x0304090e00050a0f, 0x01060b0c0207080d
+.quad 0x0a0e02060f03070b, 0x0004080c05090d01
+.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908
+.quad 0x0a0d02060c03070b, 0x0004080f05090e01
+.quad 0x0000000000000000, 0x0000000100000000
+.quad 0x0000000000000000, 0x0000000200000000
+.quad 0x0000000000000000, 0x0000000300000000
+.quad 0x0000000000000000, 0x0000000400000000
+.quad 0x0000000000000000, 0x0000000500000000
+.quad 0x0000000000000000, 0x0000000600000000
+.quad 0x0000000000000000, 0x0000000700000000
+.quad 0x0000000000000000, 0x0000000800000000
+.long 0x87,0,1,0
+.quad 0x0101010101010101, 0x0101010101010101
+.quad 0x0202020202020202, 0x0202020202020202
+.quad 0x0404040404040404, 0x0404040404040404
+.quad 0x0808080808080808, 0x0808080808080808
+.quad 0x02060a0e03070b0f, 0x0004080c0105090d
+.quad 0x6363636363636363, 0x6363636363636363
+.byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44,32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32,65,110,100,121,32,80,111,108,121,97,107,111,118,0
+.p2align 6
diff --git a/crypto/libressl/crypto/aes/bsaes-masm-x86_64.S b/crypto/libressl/crypto/aes/bsaes-masm-x86_64.S
new file mode 100644
index 0000000..6b1a97d
--- /dev/null
+++ b/crypto/libressl/crypto/aes/bsaes-masm-x86_64.S
@@ -0,0 +1,2803 @@
+; 1 "crypto/aes/bsaes-masm-x86_64.S.tmp"
+; 1 "<built-in>" 1
+; 1 "<built-in>" 3
+; 340 "<built-in>" 3
+; 1 "<command line>" 1
+; 1 "<built-in>" 2
+; 1 "crypto/aes/bsaes-masm-x86_64.S.tmp" 2
+; 1 "./crypto/x86_arch.h" 1
+; 16 "./crypto/x86_arch.h"
+; 40 "./crypto/x86_arch.h"
+; 3 "crypto/aes/bsaes-masm-x86_64.S.tmp" 2
+.text$ SEGMENT ALIGN(64) 'CODE'
+EXTERN asm_AES_encrypt:NEAR
+EXTERN asm_AES_decrypt:NEAR
+_bsaes_encrypt8 PROC PRIVATE
+ lea r11,QWORD PTR[$L$BS0]
+ movdqa xmm8,XMMWORD PTR[rax]
+ lea rax,QWORD PTR[16+rax]
+ movdqa xmm7,XMMWORD PTR[80+r11]
+ pxor xmm15,xmm8
+ pxor xmm0,xmm8
+DB 102,68,15,56,0,255
+ pxor xmm1,xmm8
+DB 102,15,56,0,199
+ pxor xmm2,xmm8
+DB 102,15,56,0,207
+ pxor xmm3,xmm8
+DB 102,15,56,0,215
+ pxor xmm4,xmm8
+DB 102,15,56,0,223
+ pxor xmm5,xmm8
+DB 102,15,56,0,231
+ pxor xmm6,xmm8
+DB 102,15,56,0,239
+DB 102,15,56,0,247
+ movdqa xmm7,XMMWORD PTR[r11]
+ movdqa xmm8,XMMWORD PTR[16+r11]
+ movdqa xmm9,xmm5
+ psrlq xmm5,1
+ movdqa xmm10,xmm3
+ psrlq xmm3,1
+ pxor xmm5,xmm6
+ pxor xmm3,xmm4
+ pand xmm5,xmm7
+ pand xmm3,xmm7
+ pxor xmm6,xmm5
+ psllq xmm5,1
+ pxor xmm4,xmm3
+ psllq xmm3,1
+ pxor xmm5,xmm9
+ pxor xmm3,xmm10
+ movdqa xmm9,xmm1
+ psrlq xmm1,1
+ movdqa xmm10,xmm15
+ psrlq xmm15,1
+ pxor xmm1,xmm2
+ pxor xmm15,xmm0
+ pand xmm1,xmm7
+ pand xmm15,xmm7
+ pxor xmm2,xmm1
+ psllq xmm1,1
+ pxor xmm0,xmm15
+ psllq xmm15,1
+ pxor xmm1,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm7,XMMWORD PTR[32+r11]
+ movdqa xmm9,xmm4
+ psrlq xmm4,2
+ movdqa xmm10,xmm3
+ psrlq xmm3,2
+ pxor xmm4,xmm6
+ pxor xmm3,xmm5
+ pand xmm4,xmm8
+ pand xmm3,xmm8
+ pxor xmm6,xmm4
+ psllq xmm4,2
+ pxor xmm5,xmm3
+ psllq xmm3,2
+ pxor xmm4,xmm9
+ pxor xmm3,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,2
+ movdqa xmm10,xmm15
+ psrlq xmm15,2
+ pxor xmm0,xmm2
+ pxor xmm15,xmm1
+ pand xmm0,xmm8
+ pand xmm15,xmm8
+ pxor xmm2,xmm0
+ psllq xmm0,2
+ pxor xmm1,xmm15
+ psllq xmm15,2
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm9,xmm2
+ psrlq xmm2,4
+ movdqa xmm10,xmm1
+ psrlq xmm1,4
+ pxor xmm2,xmm6
+ pxor xmm1,xmm5
+ pand xmm2,xmm7
+ pand xmm1,xmm7
+ pxor xmm6,xmm2
+ psllq xmm2,4
+ pxor xmm5,xmm1
+ psllq xmm1,4
+ pxor xmm2,xmm9
+ pxor xmm1,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,4
+ movdqa xmm10,xmm15
+ psrlq xmm15,4
+ pxor xmm0,xmm4
+ pxor xmm15,xmm3
+ pand xmm0,xmm7
+ pand xmm15,xmm7
+ pxor xmm4,xmm0
+ psllq xmm0,4
+ pxor xmm3,xmm15
+ psllq xmm15,4
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ dec r10d
+ jmp $L$enc_sbox
+ pxor xmm15,XMMWORD PTR[rax]
+ pxor xmm0,XMMWORD PTR[16+rax]
+DB 102,68,15,56,0,255
+ pxor xmm1,XMMWORD PTR[32+rax]
+DB 102,15,56,0,199
+ pxor xmm2,XMMWORD PTR[48+rax]
+DB 102,15,56,0,207
+ pxor xmm3,XMMWORD PTR[64+rax]
+DB 102,15,56,0,215
+ pxor xmm4,XMMWORD PTR[80+rax]
+DB 102,15,56,0,223
+ pxor xmm5,XMMWORD PTR[96+rax]
+DB 102,15,56,0,231
+ pxor xmm6,XMMWORD PTR[112+rax]
+DB 102,15,56,0,239
+ lea rax,QWORD PTR[128+rax]
+DB 102,15,56,0,247
+ pxor xmm4,xmm5
+ pxor xmm1,xmm0
+ pxor xmm2,xmm15
+ pxor xmm5,xmm1
+ pxor xmm4,xmm15
+ pxor xmm5,xmm2
+ pxor xmm2,xmm6
+ pxor xmm6,xmm4
+ pxor xmm2,xmm3
+ pxor xmm3,xmm4
+ pxor xmm2,xmm0
+ pxor xmm1,xmm6
+ pxor xmm0,xmm4
+ movdqa xmm10,xmm6
+ movdqa xmm9,xmm0
+ movdqa xmm8,xmm4
+ movdqa xmm12,xmm1
+ movdqa xmm11,xmm5
+ pxor xmm10,xmm3
+ pxor xmm9,xmm1
+ pxor xmm8,xmm2
+ movdqa xmm13,xmm10
+ pxor xmm12,xmm3
+ movdqa xmm7,xmm9
+ pxor xmm11,xmm15
+ movdqa xmm14,xmm10
+ por xmm9,xmm8
+ por xmm10,xmm11
+ pxor xmm14,xmm7
+ pand xmm13,xmm11
+ pxor xmm11,xmm8
+ pand xmm7,xmm8
+ pand xmm14,xmm11
+ movdqa xmm11,xmm2
+ pxor xmm11,xmm15
+ pand xmm12,xmm11
+ pxor xmm10,xmm12
+ pxor xmm9,xmm12
+ movdqa xmm12,xmm6
+ movdqa xmm11,xmm4
+ pxor xmm12,xmm0
+ pxor xmm11,xmm5
+ movdqa xmm8,xmm12
+ pand xmm12,xmm11
+ por xmm8,xmm11
+ pxor xmm7,xmm12
+ pxor xmm10,xmm14
+ pxor xmm9,xmm13
+ pxor xmm8,xmm14
+ movdqa xmm11,xmm1
+ pxor xmm7,xmm13
+ movdqa xmm12,xmm3
+ pxor xmm8,xmm13
+ movdqa xmm13,xmm0
+ pand xmm11,xmm2
+ movdqa xmm14,xmm6
+ pand xmm12,xmm15
+ pand xmm13,xmm4
+ por xmm14,xmm5
+ pxor xmm10,xmm11
+ pxor xmm9,xmm12
+ pxor xmm8,xmm13
+ pxor xmm7,xmm14
+ movdqa xmm11,xmm10
+ pand xmm10,xmm8
+ pxor xmm11,xmm9
+ movdqa xmm13,xmm7
+ movdqa xmm14,xmm11
+ pxor xmm13,xmm10
+ pand xmm14,xmm13
+ movdqa xmm12,xmm8
+ pxor xmm14,xmm9
+ pxor xmm12,xmm7
+ pxor xmm10,xmm9
+ pand xmm12,xmm10
+ movdqa xmm9,xmm13
+ pxor xmm12,xmm7
+ pxor xmm9,xmm12
+ pxor xmm8,xmm12
+ pand xmm9,xmm7
+ pxor xmm13,xmm9
+ pxor xmm8,xmm9
+ pand xmm13,xmm14
+ pxor xmm13,xmm11
+ movdqa xmm11,xmm5
+ movdqa xmm7,xmm4
+ movdqa xmm9,xmm14
+ pxor xmm9,xmm13
+ pand xmm9,xmm5
+ pxor xmm5,xmm4
+ pand xmm4,xmm14
+ pand xmm5,xmm13
+ pxor xmm5,xmm4
+ pxor xmm4,xmm9
+ pxor xmm11,xmm15
+ pxor xmm7,xmm2
+ pxor xmm14,xmm12
+ pxor xmm13,xmm8
+ movdqa xmm10,xmm14
+ movdqa xmm9,xmm12
+ pxor xmm10,xmm13
+ pxor xmm9,xmm8
+ pand xmm10,xmm11
+ pand xmm9,xmm15
+ pxor xmm11,xmm7
+ pxor xmm15,xmm2
+ pand xmm7,xmm14
+ pand xmm2,xmm12
+ pand xmm11,xmm13
+ pand xmm15,xmm8
+ pxor xmm7,xmm11
+ pxor xmm15,xmm2
+ pxor xmm11,xmm10
+ pxor xmm2,xmm9
+ pxor xmm5,xmm11
+ pxor xmm15,xmm11
+ pxor xmm4,xmm7
+ pxor xmm2,xmm7
+ movdqa xmm11,xmm6
+ movdqa xmm7,xmm0
+ pxor xmm11,xmm3
+ pxor xmm7,xmm1
+ movdqa xmm10,xmm14
+ movdqa xmm9,xmm12
+ pxor xmm10,xmm13
+ pxor xmm9,xmm8
+ pand xmm10,xmm11
+ pand xmm9,xmm3
+ pxor xmm11,xmm7
+ pxor xmm3,xmm1
+ pand xmm7,xmm14
+ pand xmm1,xmm12
+ pand xmm11,xmm13
+ pand xmm3,xmm8
+ pxor xmm7,xmm11
+ pxor xmm3,xmm1
+ pxor xmm11,xmm10
+ pxor xmm1,xmm9
+ pxor xmm14,xmm12
+ pxor xmm13,xmm8
+ movdqa xmm10,xmm14
+ pxor xmm10,xmm13
+ pand xmm10,xmm6
+ pxor xmm6,xmm0
+ pand xmm0,xmm14
+ pand xmm6,xmm13
+ pxor xmm6,xmm0
+ pxor xmm0,xmm10
+ pxor xmm6,xmm11
+ pxor xmm3,xmm11
+ pxor xmm0,xmm7
+ pxor xmm1,xmm7
+ pxor xmm6,xmm15
+ pxor xmm0,xmm5
+ pxor xmm3,xmm6
+ pxor xmm5,xmm15
+ pxor xmm15,xmm0
+ pxor xmm0,xmm4
+ pxor xmm4,xmm1
+ pxor xmm1,xmm2
+ pxor xmm2,xmm4
+ pxor xmm3,xmm4
+ pxor xmm5,xmm2
+ dec r10d
+ jl $L$enc_done
+ pshufd xmm7,xmm15,093h
+ pshufd xmm8,xmm0,093h
+ pxor xmm15,xmm7
+ pshufd xmm9,xmm3,093h
+ pxor xmm0,xmm8
+ pshufd xmm10,xmm5,093h
+ pxor xmm3,xmm9
+ pshufd xmm11,xmm2,093h
+ pxor xmm5,xmm10
+ pshufd xmm12,xmm6,093h
+ pxor xmm2,xmm11
+ pshufd xmm13,xmm1,093h
+ pxor xmm6,xmm12
+ pshufd xmm14,xmm4,093h
+ pxor xmm1,xmm13
+ pxor xmm4,xmm14
+ pxor xmm8,xmm15
+ pxor xmm7,xmm4
+ pxor xmm8,xmm4
+ pshufd xmm15,xmm15,04Eh
+ pxor xmm9,xmm0
+ pshufd xmm0,xmm0,04Eh
+ pxor xmm12,xmm2
+ pxor xmm15,xmm7
+ pxor xmm13,xmm6
+ pxor xmm0,xmm8
+ pxor xmm11,xmm5
+ pshufd xmm7,xmm2,04Eh
+ pxor xmm14,xmm1
+ pshufd xmm8,xmm6,04Eh
+ pxor xmm10,xmm3
+ pshufd xmm2,xmm5,04Eh
+ pxor xmm10,xmm4
+ pshufd xmm6,xmm4,04Eh
+ pxor xmm11,xmm4
+ pshufd xmm5,xmm1,04Eh
+ pxor xmm7,xmm11
+ pshufd xmm1,xmm3,04Eh
+ pxor xmm8,xmm12
+ pxor xmm2,xmm10
+ pxor xmm6,xmm14
+ pxor xmm5,xmm13
+ movdqa xmm3,xmm7
+ pxor xmm1,xmm9
+ movdqa xmm4,xmm8
+ movdqa xmm7,XMMWORD PTR[48+r11]
+ jnz $L$enc_loop
+ movdqa xmm7,XMMWORD PTR[64+r11]
+ jmp $L$enc_loop
+ movdqa xmm7,XMMWORD PTR[r11]
+ movdqa xmm8,XMMWORD PTR[16+r11]
+ movdqa xmm9,xmm1
+ psrlq xmm1,1
+ movdqa xmm10,xmm2
+ psrlq xmm2,1
+ pxor xmm1,xmm4
+ pxor xmm2,xmm6
+ pand xmm1,xmm7
+ pand xmm2,xmm7
+ pxor xmm4,xmm1
+ psllq xmm1,1
+ pxor xmm6,xmm2
+ psllq xmm2,1
+ pxor xmm1,xmm9
+ pxor xmm2,xmm10
+ movdqa xmm9,xmm3
+ psrlq xmm3,1
+ movdqa xmm10,xmm15
+ psrlq xmm15,1
+ pxor xmm3,xmm5
+ pxor xmm15,xmm0
+ pand xmm3,xmm7
+ pand xmm15,xmm7
+ pxor xmm5,xmm3
+ psllq xmm3,1
+ pxor xmm0,xmm15
+ psllq xmm15,1
+ pxor xmm3,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm7,XMMWORD PTR[32+r11]
+ movdqa xmm9,xmm6
+ psrlq xmm6,2
+ movdqa xmm10,xmm2
+ psrlq xmm2,2
+ pxor xmm6,xmm4
+ pxor xmm2,xmm1
+ pand xmm6,xmm8
+ pand xmm2,xmm8
+ pxor xmm4,xmm6
+ psllq xmm6,2
+ pxor xmm1,xmm2
+ psllq xmm2,2
+ pxor xmm6,xmm9
+ pxor xmm2,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,2
+ movdqa xmm10,xmm15
+ psrlq xmm15,2
+ pxor xmm0,xmm5
+ pxor xmm15,xmm3
+ pand xmm0,xmm8
+ pand xmm15,xmm8
+ pxor xmm5,xmm0
+ psllq xmm0,2
+ pxor xmm3,xmm15
+ psllq xmm15,2
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm9,xmm5
+ psrlq xmm5,4
+ movdqa xmm10,xmm3
+ psrlq xmm3,4
+ pxor xmm5,xmm4
+ pxor xmm3,xmm1
+ pand xmm5,xmm7
+ pand xmm3,xmm7
+ pxor xmm4,xmm5
+ psllq xmm5,4
+ pxor xmm1,xmm3
+ psllq xmm3,4
+ pxor xmm5,xmm9
+ pxor xmm3,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,4
+ movdqa xmm10,xmm15
+ psrlq xmm15,4
+ pxor xmm0,xmm6
+ pxor xmm15,xmm2
+ pand xmm0,xmm7
+ pand xmm15,xmm7
+ pxor xmm6,xmm0
+ psllq xmm0,4
+ pxor xmm2,xmm15
+ psllq xmm15,4
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm7,XMMWORD PTR[rax]
+ pxor xmm3,xmm7
+ pxor xmm5,xmm7
+ pxor xmm2,xmm7
+ pxor xmm6,xmm7
+ pxor xmm1,xmm7
+ pxor xmm4,xmm7
+ pxor xmm15,xmm7
+ pxor xmm0,xmm7
+ DB 0F3h,0C3h ;repret
+_bsaes_encrypt8 ENDP
+_bsaes_decrypt8 PROC PRIVATE
+ lea r11,QWORD PTR[$L$BS0]
+ movdqa xmm8,XMMWORD PTR[rax]
+ lea rax,QWORD PTR[16+rax]
+ movdqa xmm7,XMMWORD PTR[((-48))+r11]
+ pxor xmm15,xmm8
+ pxor xmm0,xmm8
+DB 102,68,15,56,0,255
+ pxor xmm1,xmm8
+DB 102,15,56,0,199
+ pxor xmm2,xmm8
+DB 102,15,56,0,207
+ pxor xmm3,xmm8
+DB 102,15,56,0,215
+ pxor xmm4,xmm8
+DB 102,15,56,0,223
+ pxor xmm5,xmm8
+DB 102,15,56,0,231
+ pxor xmm6,xmm8
+DB 102,15,56,0,239
+DB 102,15,56,0,247
+ movdqa xmm7,XMMWORD PTR[r11]
+ movdqa xmm8,XMMWORD PTR[16+r11]
+ movdqa xmm9,xmm5
+ psrlq xmm5,1
+ movdqa xmm10,xmm3
+ psrlq xmm3,1
+ pxor xmm5,xmm6
+ pxor xmm3,xmm4
+ pand xmm5,xmm7
+ pand xmm3,xmm7
+ pxor xmm6,xmm5
+ psllq xmm5,1
+ pxor xmm4,xmm3
+ psllq xmm3,1
+ pxor xmm5,xmm9
+ pxor xmm3,xmm10
+ movdqa xmm9,xmm1
+ psrlq xmm1,1
+ movdqa xmm10,xmm15
+ psrlq xmm15,1
+ pxor xmm1,xmm2
+ pxor xmm15,xmm0
+ pand xmm1,xmm7
+ pand xmm15,xmm7
+ pxor xmm2,xmm1
+ psllq xmm1,1
+ pxor xmm0,xmm15
+ psllq xmm15,1
+ pxor xmm1,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm7,XMMWORD PTR[32+r11]
+ movdqa xmm9,xmm4
+ psrlq xmm4,2
+ movdqa xmm10,xmm3
+ psrlq xmm3,2
+ pxor xmm4,xmm6
+ pxor xmm3,xmm5
+ pand xmm4,xmm8
+ pand xmm3,xmm8
+ pxor xmm6,xmm4
+ psllq xmm4,2
+ pxor xmm5,xmm3
+ psllq xmm3,2
+ pxor xmm4,xmm9
+ pxor xmm3,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,2
+ movdqa xmm10,xmm15
+ psrlq xmm15,2
+ pxor xmm0,xmm2
+ pxor xmm15,xmm1
+ pand xmm0,xmm8
+ pand xmm15,xmm8
+ pxor xmm2,xmm0
+ psllq xmm0,2
+ pxor xmm1,xmm15
+ psllq xmm15,2
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm9,xmm2
+ psrlq xmm2,4
+ movdqa xmm10,xmm1
+ psrlq xmm1,4
+ pxor xmm2,xmm6
+ pxor xmm1,xmm5
+ pand xmm2,xmm7
+ pand xmm1,xmm7
+ pxor xmm6,xmm2
+ psllq xmm2,4
+ pxor xmm5,xmm1
+ psllq xmm1,4
+ pxor xmm2,xmm9
+ pxor xmm1,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,4
+ movdqa xmm10,xmm15
+ psrlq xmm15,4
+ pxor xmm0,xmm4
+ pxor xmm15,xmm3
+ pand xmm0,xmm7
+ pand xmm15,xmm7
+ pxor xmm4,xmm0
+ psllq xmm0,4
+ pxor xmm3,xmm15
+ psllq xmm15,4
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ dec r10d
+ jmp $L$dec_sbox
+ pxor xmm15,XMMWORD PTR[rax]
+ pxor xmm0,XMMWORD PTR[16+rax]
+DB 102,68,15,56,0,255
+ pxor xmm1,XMMWORD PTR[32+rax]
+DB 102,15,56,0,199
+ pxor xmm2,XMMWORD PTR[48+rax]
+DB 102,15,56,0,207
+ pxor xmm3,XMMWORD PTR[64+rax]
+DB 102,15,56,0,215
+ pxor xmm4,XMMWORD PTR[80+rax]
+DB 102,15,56,0,223
+ pxor xmm5,XMMWORD PTR[96+rax]
+DB 102,15,56,0,231
+ pxor xmm6,XMMWORD PTR[112+rax]
+DB 102,15,56,0,239
+ lea rax,QWORD PTR[128+rax]
+DB 102,15,56,0,247
+ pxor xmm2,xmm3
+ pxor xmm3,xmm6
+ pxor xmm1,xmm6
+ pxor xmm5,xmm3
+ pxor xmm6,xmm5
+ pxor xmm0,xmm6
+ pxor xmm15,xmm0
+ pxor xmm1,xmm4
+ pxor xmm2,xmm15
+ pxor xmm4,xmm15
+ pxor xmm0,xmm2
+ movdqa xmm10,xmm2
+ movdqa xmm9,xmm6
+ movdqa xmm8,xmm0
+ movdqa xmm12,xmm3
+ movdqa xmm11,xmm4
+ pxor xmm10,xmm15
+ pxor xmm9,xmm3
+ pxor xmm8,xmm5
+ movdqa xmm13,xmm10
+ pxor xmm12,xmm15
+ movdqa xmm7,xmm9
+ pxor xmm11,xmm1
+ movdqa xmm14,xmm10
+ por xmm9,xmm8
+ por xmm10,xmm11
+ pxor xmm14,xmm7
+ pand xmm13,xmm11
+ pxor xmm11,xmm8
+ pand xmm7,xmm8
+ pand xmm14,xmm11
+ movdqa xmm11,xmm5
+ pxor xmm11,xmm1
+ pand xmm12,xmm11
+ pxor xmm10,xmm12
+ pxor xmm9,xmm12
+ movdqa xmm12,xmm2
+ movdqa xmm11,xmm0
+ pxor xmm12,xmm6
+ pxor xmm11,xmm4
+ movdqa xmm8,xmm12
+ pand xmm12,xmm11
+ por xmm8,xmm11
+ pxor xmm7,xmm12
+ pxor xmm10,xmm14
+ pxor xmm9,xmm13
+ pxor xmm8,xmm14
+ movdqa xmm11,xmm3
+ pxor xmm7,xmm13
+ movdqa xmm12,xmm15
+ pxor xmm8,xmm13
+ movdqa xmm13,xmm6
+ pand xmm11,xmm5
+ movdqa xmm14,xmm2
+ pand xmm12,xmm1
+ pand xmm13,xmm0
+ por xmm14,xmm4
+ pxor xmm10,xmm11
+ pxor xmm9,xmm12
+ pxor xmm8,xmm13
+ pxor xmm7,xmm14
+ movdqa xmm11,xmm10
+ pand xmm10,xmm8
+ pxor xmm11,xmm9
+ movdqa xmm13,xmm7
+ movdqa xmm14,xmm11
+ pxor xmm13,xmm10
+ pand xmm14,xmm13
+ movdqa xmm12,xmm8
+ pxor xmm14,xmm9
+ pxor xmm12,xmm7
+ pxor xmm10,xmm9
+ pand xmm12,xmm10
+ movdqa xmm9,xmm13
+ pxor xmm12,xmm7
+ pxor xmm9,xmm12
+ pxor xmm8,xmm12
+ pand xmm9,xmm7
+ pxor xmm13,xmm9
+ pxor xmm8,xmm9
+ pand xmm13,xmm14
+ pxor xmm13,xmm11
+ movdqa xmm11,xmm4
+ movdqa xmm7,xmm0
+ movdqa xmm9,xmm14
+ pxor xmm9,xmm13
+ pand xmm9,xmm4
+ pxor xmm4,xmm0
+ pand xmm0,xmm14
+ pand xmm4,xmm13
+ pxor xmm4,xmm0
+ pxor xmm0,xmm9
+ pxor xmm11,xmm1
+ pxor xmm7,xmm5
+ pxor xmm14,xmm12
+ pxor xmm13,xmm8
+ movdqa xmm10,xmm14
+ movdqa xmm9,xmm12
+ pxor xmm10,xmm13
+ pxor xmm9,xmm8
+ pand xmm10,xmm11
+ pand xmm9,xmm1
+ pxor xmm11,xmm7
+ pxor xmm1,xmm5
+ pand xmm7,xmm14
+ pand xmm5,xmm12
+ pand xmm11,xmm13
+ pand xmm1,xmm8
+ pxor xmm7,xmm11
+ pxor xmm1,xmm5
+ pxor xmm11,xmm10
+ pxor xmm5,xmm9
+ pxor xmm4,xmm11
+ pxor xmm1,xmm11
+ pxor xmm0,xmm7
+ pxor xmm5,xmm7
+ movdqa xmm11,xmm2
+ movdqa xmm7,xmm6
+ pxor xmm11,xmm15
+ pxor xmm7,xmm3
+ movdqa xmm10,xmm14
+ movdqa xmm9,xmm12
+ pxor xmm10,xmm13
+ pxor xmm9,xmm8
+ pand xmm10,xmm11
+ pand xmm9,xmm15
+ pxor xmm11,xmm7
+ pxor xmm15,xmm3
+ pand xmm7,xmm14
+ pand xmm3,xmm12
+ pand xmm11,xmm13
+ pand xmm15,xmm8
+ pxor xmm7,xmm11
+ pxor xmm15,xmm3
+ pxor xmm11,xmm10
+ pxor xmm3,xmm9
+ pxor xmm14,xmm12
+ pxor xmm13,xmm8
+ movdqa xmm10,xmm14
+ pxor xmm10,xmm13
+ pand xmm10,xmm2
+ pxor xmm2,xmm6
+ pand xmm6,xmm14
+ pand xmm2,xmm13
+ pxor xmm2,xmm6
+ pxor xmm6,xmm10
+ pxor xmm2,xmm11
+ pxor xmm15,xmm11
+ pxor xmm6,xmm7
+ pxor xmm3,xmm7
+ pxor xmm0,xmm6
+ pxor xmm5,xmm4
+ pxor xmm3,xmm0
+ pxor xmm1,xmm6
+ pxor xmm4,xmm6
+ pxor xmm3,xmm1
+ pxor xmm6,xmm15
+ pxor xmm3,xmm4
+ pxor xmm2,xmm5
+ pxor xmm5,xmm0
+ pxor xmm2,xmm3
+ pxor xmm3,xmm15
+ pxor xmm6,xmm2
+ dec r10d
+ jl $L$dec_done
+ pshufd xmm7,xmm15,04Eh
+ pshufd xmm13,xmm2,04Eh
+ pxor xmm7,xmm15
+ pshufd xmm14,xmm4,04Eh
+ pxor xmm13,xmm2
+ pshufd xmm8,xmm0,04Eh
+ pxor xmm14,xmm4
+ pshufd xmm9,xmm5,04Eh
+ pxor xmm8,xmm0
+ pshufd xmm10,xmm3,04Eh
+ pxor xmm9,xmm5
+ pxor xmm15,xmm13
+ pxor xmm0,xmm13
+ pshufd xmm11,xmm1,04Eh
+ pxor xmm10,xmm3
+ pxor xmm5,xmm7
+ pxor xmm3,xmm8
+ pshufd xmm12,xmm6,04Eh
+ pxor xmm11,xmm1
+ pxor xmm0,xmm14
+ pxor xmm1,xmm9
+ pxor xmm12,xmm6
+ pxor xmm5,xmm14
+ pxor xmm3,xmm13
+ pxor xmm1,xmm13
+ pxor xmm6,xmm10
+ pxor xmm2,xmm11
+ pxor xmm1,xmm14
+ pxor xmm6,xmm14
+ pxor xmm4,xmm12
+ pshufd xmm7,xmm15,093h
+ pshufd xmm8,xmm0,093h
+ pxor xmm15,xmm7
+ pshufd xmm9,xmm5,093h
+ pxor xmm0,xmm8
+ pshufd xmm10,xmm3,093h
+ pxor xmm5,xmm9
+ pshufd xmm11,xmm1,093h
+ pxor xmm3,xmm10
+ pshufd xmm12,xmm6,093h
+ pxor xmm1,xmm11
+ pshufd xmm13,xmm2,093h
+ pxor xmm6,xmm12
+ pshufd xmm14,xmm4,093h
+ pxor xmm2,xmm13
+ pxor xmm4,xmm14
+ pxor xmm8,xmm15
+ pxor xmm7,xmm4
+ pxor xmm8,xmm4
+ pshufd xmm15,xmm15,04Eh
+ pxor xmm9,xmm0
+ pshufd xmm0,xmm0,04Eh
+ pxor xmm12,xmm1
+ pxor xmm15,xmm7
+ pxor xmm13,xmm6
+ pxor xmm0,xmm8
+ pxor xmm11,xmm3
+ pshufd xmm7,xmm1,04Eh
+ pxor xmm14,xmm2
+ pshufd xmm8,xmm6,04Eh
+ pxor xmm10,xmm5
+ pshufd xmm1,xmm3,04Eh
+ pxor xmm10,xmm4
+ pshufd xmm6,xmm4,04Eh
+ pxor xmm11,xmm4
+ pshufd xmm3,xmm2,04Eh
+ pxor xmm7,xmm11
+ pshufd xmm2,xmm5,04Eh
+ pxor xmm8,xmm12
+ pxor xmm10,xmm1
+ pxor xmm6,xmm14
+ pxor xmm13,xmm3
+ movdqa xmm3,xmm7
+ pxor xmm2,xmm9
+ movdqa xmm5,xmm13
+ movdqa xmm4,xmm8
+ movdqa xmm1,xmm2
+ movdqa xmm2,xmm10
+ movdqa xmm7,XMMWORD PTR[((-16))+r11]
+ jnz $L$dec_loop
+ movdqa xmm7,XMMWORD PTR[((-32))+r11]
+ jmp $L$dec_loop
+ movdqa xmm7,XMMWORD PTR[r11]
+ movdqa xmm8,XMMWORD PTR[16+r11]
+ movdqa xmm9,xmm2
+ psrlq xmm2,1
+ movdqa xmm10,xmm1
+ psrlq xmm1,1
+ pxor xmm2,xmm4
+ pxor xmm1,xmm6
+ pand xmm2,xmm7
+ pand xmm1,xmm7
+ pxor xmm4,xmm2
+ psllq xmm2,1
+ pxor xmm6,xmm1
+ psllq xmm1,1
+ pxor xmm2,xmm9
+ pxor xmm1,xmm10
+ movdqa xmm9,xmm5
+ psrlq xmm5,1
+ movdqa xmm10,xmm15
+ psrlq xmm15,1
+ pxor xmm5,xmm3
+ pxor xmm15,xmm0
+ pand xmm5,xmm7
+ pand xmm15,xmm7
+ pxor xmm3,xmm5
+ psllq xmm5,1
+ pxor xmm0,xmm15
+ psllq xmm15,1
+ pxor xmm5,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm7,XMMWORD PTR[32+r11]
+ movdqa xmm9,xmm6
+ psrlq xmm6,2
+ movdqa xmm10,xmm1
+ psrlq xmm1,2
+ pxor xmm6,xmm4
+ pxor xmm1,xmm2
+ pand xmm6,xmm8
+ pand xmm1,xmm8
+ pxor xmm4,xmm6
+ psllq xmm6,2
+ pxor xmm2,xmm1
+ psllq xmm1,2
+ pxor xmm6,xmm9
+ pxor xmm1,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,2
+ movdqa xmm10,xmm15
+ psrlq xmm15,2
+ pxor xmm0,xmm3
+ pxor xmm15,xmm5
+ pand xmm0,xmm8
+ pand xmm15,xmm8
+ pxor xmm3,xmm0
+ psllq xmm0,2
+ pxor xmm5,xmm15
+ psllq xmm15,2
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm9,xmm3
+ psrlq xmm3,4
+ movdqa xmm10,xmm5
+ psrlq xmm5,4
+ pxor xmm3,xmm4
+ pxor xmm5,xmm2
+ pand xmm3,xmm7
+ pand xmm5,xmm7
+ pxor xmm4,xmm3
+ psllq xmm3,4
+ pxor xmm2,xmm5
+ psllq xmm5,4
+ pxor xmm3,xmm9
+ pxor xmm5,xmm10
+ movdqa xmm9,xmm0
+ psrlq xmm0,4
+ movdqa xmm10,xmm15
+ psrlq xmm15,4
+ pxor xmm0,xmm6
+ pxor xmm15,xmm1
+ pand xmm0,xmm7
+ pand xmm15,xmm7
+ pxor xmm6,xmm0
+ psllq xmm0,4
+ pxor xmm1,xmm15
+ psllq xmm15,4
+ pxor xmm0,xmm9
+ pxor xmm15,xmm10
+ movdqa xmm7,XMMWORD PTR[rax]
+ pxor xmm5,xmm7
+ pxor xmm3,xmm7
+ pxor xmm1,xmm7
+ pxor xmm6,xmm7
+ pxor xmm2,xmm7
+ pxor xmm4,xmm7
+ pxor xmm15,xmm7
+ pxor xmm0,xmm7
+ DB 0F3h,0C3h ;repret
+_bsaes_decrypt8 ENDP
+_bsaes_key_convert PROC PRIVATE
+ lea r11,QWORD PTR[$L$masks]
+ movdqu xmm7,XMMWORD PTR[rcx]
+ lea rcx,QWORD PTR[16+rcx]
+ movdqa xmm0,XMMWORD PTR[r11]
+ movdqa xmm1,XMMWORD PTR[16+r11]
+ movdqa xmm2,XMMWORD PTR[32+r11]
+ movdqa xmm3,XMMWORD PTR[48+r11]
+ movdqa xmm4,XMMWORD PTR[64+r11]
+ pcmpeqd xmm5,xmm5
+ movdqu xmm6,XMMWORD PTR[rcx]
+ movdqa XMMWORD PTR[rax],xmm7
+ lea rax,QWORD PTR[16+rax]
+ dec r10d
+ jmp $L$key_loop
+DB 102,15,56,0,244
+ movdqa xmm8,xmm0
+ movdqa xmm9,xmm1
+ pand xmm8,xmm6
+ pand xmm9,xmm6
+ movdqa xmm10,xmm2
+ pcmpeqb xmm8,xmm0
+ psllq xmm0,4
+ movdqa xmm11,xmm3
+ pcmpeqb xmm9,xmm1
+ psllq xmm1,4
+ pand xmm10,xmm6
+ pand xmm11,xmm6
+ movdqa xmm12,xmm0
+ pcmpeqb xmm10,xmm2
+ psllq xmm2,4
+ movdqa xmm13,xmm1
+ pcmpeqb xmm11,xmm3
+ psllq xmm3,4
+ movdqa xmm14,xmm2
+ movdqa xmm15,xmm3
+ pxor xmm8,xmm5
+ pxor xmm9,xmm5
+ pand xmm12,xmm6
+ pand xmm13,xmm6
+ movdqa XMMWORD PTR[rax],xmm8
+ pcmpeqb xmm12,xmm0
+ psrlq xmm0,4
+ movdqa XMMWORD PTR[16+rax],xmm9
+ pcmpeqb xmm13,xmm1
+ psrlq xmm1,4
+ lea rcx,QWORD PTR[16+rcx]
+ pand xmm14,xmm6
+ pand xmm15,xmm6
+ movdqa XMMWORD PTR[32+rax],xmm10
+ pcmpeqb xmm14,xmm2
+ psrlq xmm2,4
+ movdqa XMMWORD PTR[48+rax],xmm11
+ pcmpeqb xmm15,xmm3
+ psrlq xmm3,4
+ movdqu xmm6,XMMWORD PTR[rcx]
+ pxor xmm13,xmm5
+ pxor xmm14,xmm5
+ movdqa XMMWORD PTR[64+rax],xmm12
+ movdqa XMMWORD PTR[80+rax],xmm13
+ movdqa XMMWORD PTR[96+rax],xmm14
+ movdqa XMMWORD PTR[112+rax],xmm15
+ lea rax,QWORD PTR[128+rax]
+ dec r10d
+ jnz $L$key_loop
+ movdqa xmm7,XMMWORD PTR[80+r11]
+ DB 0F3h,0C3h ;repret
+_bsaes_key_convert ENDP
+EXTERN asm_AES_cbc_encrypt:NEAR
+PUBLIC bsaes_cbc_encrypt
+bsaes_cbc_encrypt PROC PUBLIC
+ mov r11d,DWORD PTR[48+rsp]
+ cmp r11d,0
+ jne asm_AES_cbc_encrypt
+ cmp r8,128
+ jb asm_AES_cbc_encrypt
+ mov rax,rsp
+ push rbp
+ push rbx
+ push r12
+ push r13
+ push r14
+ push r15
+ lea rsp,QWORD PTR[((-72))+rsp]
+ mov r10,QWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[((-160))+rsp]
+ movaps XMMWORD PTR[64+rsp],xmm6
+ movaps XMMWORD PTR[80+rsp],xmm7
+ movaps XMMWORD PTR[96+rsp],xmm8
+ movaps XMMWORD PTR[112+rsp],xmm9
+ movaps XMMWORD PTR[128+rsp],xmm10
+ movaps XMMWORD PTR[144+rsp],xmm11
+ movaps XMMWORD PTR[160+rsp],xmm12
+ movaps XMMWORD PTR[176+rsp],xmm13
+ movaps XMMWORD PTR[192+rsp],xmm14
+ movaps XMMWORD PTR[208+rsp],xmm15
+ mov rbp,rsp
+ mov eax,DWORD PTR[240+r9]
+ mov r12,rcx
+ mov r13,rdx
+ mov r14,r8
+ mov r15,r9
+ mov rbx,r10
+ shr r14,4
+ mov edx,eax
+ shl rax,7
+ sub rax,96
+ sub rsp,rax
+ mov rax,rsp
+ mov rcx,r15
+ mov r10d,edx
+ call _bsaes_key_convert
+ pxor xmm7,XMMWORD PTR[rsp]
+ movdqa XMMWORD PTR[rax],xmm6
+ movdqa XMMWORD PTR[rsp],xmm7
+ movdqu xmm14,XMMWORD PTR[rbx]
+ sub r14,8
+ movdqu xmm15,XMMWORD PTR[r12]
+ movdqu xmm0,XMMWORD PTR[16+r12]
+ movdqu xmm1,XMMWORD PTR[32+r12]
+ movdqu xmm2,XMMWORD PTR[48+r12]
+ movdqu xmm3,XMMWORD PTR[64+r12]
+ movdqu xmm4,XMMWORD PTR[80+r12]
+ mov rax,rsp
+ movdqu xmm5,XMMWORD PTR[96+r12]
+ mov r10d,edx
+ movdqu xmm6,XMMWORD PTR[112+r12]
+ movdqa XMMWORD PTR[32+rbp],xmm14
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm5,xmm8
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm3,xmm9
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ pxor xmm1,xmm10
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ pxor xmm6,xmm11
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ pxor xmm2,xmm12
+ movdqu xmm14,XMMWORD PTR[112+r12]
+ pxor xmm4,xmm13
+ movdqu XMMWORD PTR[r13],xmm15
+ lea r12,QWORD PTR[128+r12]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ movdqu XMMWORD PTR[48+r13],xmm3
+ movdqu XMMWORD PTR[64+r13],xmm1
+ movdqu XMMWORD PTR[80+r13],xmm6
+ movdqu XMMWORD PTR[96+r13],xmm2
+ movdqu XMMWORD PTR[112+r13],xmm4
+ lea r13,QWORD PTR[128+r13]
+ sub r14,8
+ jnc $L$cbc_dec_loop
+ add r14,8
+ jz $L$cbc_dec_done
+ movdqu xmm15,XMMWORD PTR[r12]
+ mov rax,rsp
+ mov r10d,edx
+ cmp r14,2
+ jb $L$cbc_dec_one
+ movdqu xmm0,XMMWORD PTR[16+r12]
+ je $L$cbc_dec_two
+ movdqu xmm1,XMMWORD PTR[32+r12]
+ cmp r14,4
+ jb $L$cbc_dec_three
+ movdqu xmm2,XMMWORD PTR[48+r12]
+ je $L$cbc_dec_four
+ movdqu xmm3,XMMWORD PTR[64+r12]
+ cmp r14,6
+ jb $L$cbc_dec_five
+ movdqu xmm4,XMMWORD PTR[80+r12]
+ je $L$cbc_dec_six
+ movdqu xmm5,XMMWORD PTR[96+r12]
+ movdqa XMMWORD PTR[32+rbp],xmm14
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm5,xmm8
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm3,xmm9
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ pxor xmm1,xmm10
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ pxor xmm6,xmm11
+ movdqu xmm14,XMMWORD PTR[96+r12]
+ pxor xmm2,xmm12
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ movdqu XMMWORD PTR[48+r13],xmm3
+ movdqu XMMWORD PTR[64+r13],xmm1
+ movdqu XMMWORD PTR[80+r13],xmm6
+ movdqu XMMWORD PTR[96+r13],xmm2
+ jmp $L$cbc_dec_done
+ movdqa XMMWORD PTR[32+rbp],xmm14
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm5,xmm8
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm3,xmm9
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ pxor xmm1,xmm10
+ movdqu xmm14,XMMWORD PTR[80+r12]
+ pxor xmm6,xmm11
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ movdqu XMMWORD PTR[48+r13],xmm3
+ movdqu XMMWORD PTR[64+r13],xmm1
+ movdqu XMMWORD PTR[80+r13],xmm6
+ jmp $L$cbc_dec_done
+ movdqa XMMWORD PTR[32+rbp],xmm14
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm5,xmm8
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm3,xmm9
+ movdqu xmm14,XMMWORD PTR[64+r12]
+ pxor xmm1,xmm10
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ movdqu XMMWORD PTR[48+r13],xmm3
+ movdqu XMMWORD PTR[64+r13],xmm1
+ jmp $L$cbc_dec_done
+ movdqa XMMWORD PTR[32+rbp],xmm14
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm5,xmm8
+ movdqu xmm14,XMMWORD PTR[48+r12]
+ pxor xmm3,xmm9
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ movdqu XMMWORD PTR[48+r13],xmm3
+ jmp $L$cbc_dec_done
+ movdqa XMMWORD PTR[32+rbp],xmm14
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu xmm14,XMMWORD PTR[32+r12]
+ pxor xmm5,xmm8
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ jmp $L$cbc_dec_done
+ movdqa XMMWORD PTR[32+rbp],xmm14
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm14,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm7
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ jmp $L$cbc_dec_done
+ lea rcx,QWORD PTR[r12]
+ lea rdx,QWORD PTR[32+rbp]
+ lea r8,QWORD PTR[r15]
+ call asm_AES_decrypt
+ pxor xmm14,XMMWORD PTR[32+rbp]
+ movdqu XMMWORD PTR[r13],xmm14
+ movdqa xmm14,xmm15
+ movdqu XMMWORD PTR[rbx],xmm14
+ lea rax,QWORD PTR[rsp]
+ pxor xmm0,xmm0
+ movdqa XMMWORD PTR[rax],xmm0
+ movdqa XMMWORD PTR[16+rax],xmm0
+ lea rax,QWORD PTR[32+rax]
+ cmp rbp,rax
+ ja $L$cbc_dec_bzero
+ lea rsp,QWORD PTR[rbp]
+ movaps xmm6,XMMWORD PTR[64+rbp]
+ movaps xmm7,XMMWORD PTR[80+rbp]
+ movaps xmm8,XMMWORD PTR[96+rbp]
+ movaps xmm9,XMMWORD PTR[112+rbp]
+ movaps xmm10,XMMWORD PTR[128+rbp]
+ movaps xmm11,XMMWORD PTR[144+rbp]
+ movaps xmm12,XMMWORD PTR[160+rbp]
+ movaps xmm13,XMMWORD PTR[176+rbp]
+ movaps xmm14,XMMWORD PTR[192+rbp]
+ movaps xmm15,XMMWORD PTR[208+rbp]
+ lea rsp,QWORD PTR[160+rbp]
+ mov r15,QWORD PTR[72+rsp]
+ mov r14,QWORD PTR[80+rsp]
+ mov r13,QWORD PTR[88+rsp]
+ mov r12,QWORD PTR[96+rsp]
+ mov rbx,QWORD PTR[104+rsp]
+ mov rax,QWORD PTR[112+rsp]
+ lea rsp,QWORD PTR[120+rsp]
+ mov rbp,rax
+ DB 0F3h,0C3h ;repret
+bsaes_cbc_encrypt ENDP
+PUBLIC bsaes_ctr32_encrypt_blocks
+bsaes_ctr32_encrypt_blocks PROC PUBLIC
+ mov rax,rsp
+ push rbp
+ push rbx
+ push r12
+ push r13
+ push r14
+ push r15
+ lea rsp,QWORD PTR[((-72))+rsp]
+ mov r10,QWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[((-160))+rsp]
+ movaps XMMWORD PTR[64+rsp],xmm6
+ movaps XMMWORD PTR[80+rsp],xmm7
+ movaps XMMWORD PTR[96+rsp],xmm8
+ movaps XMMWORD PTR[112+rsp],xmm9
+ movaps XMMWORD PTR[128+rsp],xmm10
+ movaps XMMWORD PTR[144+rsp],xmm11
+ movaps XMMWORD PTR[160+rsp],xmm12
+ movaps XMMWORD PTR[176+rsp],xmm13
+ movaps XMMWORD PTR[192+rsp],xmm14
+ movaps XMMWORD PTR[208+rsp],xmm15
+ mov rbp,rsp
+ movdqu xmm0,XMMWORD PTR[r10]
+ mov eax,DWORD PTR[240+r9]
+ mov r12,rcx
+ mov r13,rdx
+ mov r14,r8
+ mov r15,r9
+ movdqa XMMWORD PTR[32+rbp],xmm0
+ cmp r8,8
+ jb $L$ctr_enc_short
+ mov ebx,eax
+ shl rax,7
+ sub rax,96
+ sub rsp,rax
+ mov rax,rsp
+ mov rcx,r15
+ mov r10d,ebx
+ call _bsaes_key_convert
+ pxor xmm7,xmm6
+ movdqa XMMWORD PTR[rax],xmm7
+ movdqa xmm8,XMMWORD PTR[rsp]
+ lea r11,QWORD PTR[$L$ADD1]
+ movdqa xmm15,XMMWORD PTR[32+rbp]
+ movdqa xmm7,XMMWORD PTR[((-32))+r11]
+DB 102,68,15,56,0,199
+DB 102,68,15,56,0,255
+ movdqa XMMWORD PTR[rsp],xmm8
+ jmp $L$ctr_enc_loop
+ movdqa XMMWORD PTR[32+rbp],xmm15
+ movdqa xmm0,xmm15
+ movdqa xmm1,xmm15
+ paddd xmm0,XMMWORD PTR[r11]
+ movdqa xmm2,xmm15
+ paddd xmm1,XMMWORD PTR[16+r11]
+ movdqa xmm3,xmm15
+ paddd xmm2,XMMWORD PTR[32+r11]
+ movdqa xmm4,xmm15
+ paddd xmm3,XMMWORD PTR[48+r11]
+ movdqa xmm5,xmm15
+ paddd xmm4,XMMWORD PTR[64+r11]
+ movdqa xmm6,xmm15
+ paddd xmm5,XMMWORD PTR[80+r11]
+ paddd xmm6,XMMWORD PTR[96+r11]
+ movdqa xmm8,XMMWORD PTR[rsp]
+ lea rax,QWORD PTR[16+rsp]
+ movdqa xmm7,XMMWORD PTR[((-16))+r11]
+ pxor xmm15,xmm8
+ pxor xmm0,xmm8
+DB 102,68,15,56,0,255
+ pxor xmm1,xmm8
+DB 102,15,56,0,199
+ pxor xmm2,xmm8
+DB 102,15,56,0,207
+ pxor xmm3,xmm8
+DB 102,15,56,0,215
+ pxor xmm4,xmm8
+DB 102,15,56,0,223
+ pxor xmm5,xmm8
+DB 102,15,56,0,231
+ pxor xmm6,xmm8
+DB 102,15,56,0,239
+ lea r11,QWORD PTR[$L$BS0]
+DB 102,15,56,0,247
+ mov r10d,ebx
+ call _bsaes_encrypt8_bitslice
+ sub r14,8
+ jc $L$ctr_enc_loop_done
+ movdqu xmm7,XMMWORD PTR[r12]
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ movdqu xmm14,XMMWORD PTR[112+r12]
+ lea r12,QWORD PTR[128+r12]
+ pxor xmm7,xmm15
+ movdqa xmm15,XMMWORD PTR[32+rbp]
+ pxor xmm0,xmm8
+ movdqu XMMWORD PTR[r13],xmm7
+ pxor xmm3,xmm9
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm5,xmm10
+ movdqu XMMWORD PTR[32+r13],xmm3
+ pxor xmm2,xmm11
+ movdqu XMMWORD PTR[48+r13],xmm5
+ pxor xmm6,xmm12
+ movdqu XMMWORD PTR[64+r13],xmm2
+ pxor xmm1,xmm13
+ movdqu XMMWORD PTR[80+r13],xmm6
+ pxor xmm4,xmm14
+ movdqu XMMWORD PTR[96+r13],xmm1
+ lea r11,QWORD PTR[$L$ADD1]
+ movdqu XMMWORD PTR[112+r13],xmm4
+ lea r13,QWORD PTR[128+r13]
+ paddd xmm15,XMMWORD PTR[112+r11]
+ jnz $L$ctr_enc_loop
+ jmp $L$ctr_enc_done
+ add r14,8
+ movdqu xmm7,XMMWORD PTR[r12]
+ pxor xmm15,xmm7
+ movdqu XMMWORD PTR[r13],xmm15
+ cmp r14,2
+ jb $L$ctr_enc_done
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm0,xmm8
+ movdqu XMMWORD PTR[16+r13],xmm0
+ je $L$ctr_enc_done
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm3,xmm9
+ movdqu XMMWORD PTR[32+r13],xmm3
+ cmp r14,4
+ jb $L$ctr_enc_done
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm5,xmm10
+ movdqu XMMWORD PTR[48+r13],xmm5
+ je $L$ctr_enc_done
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ pxor xmm2,xmm11
+ movdqu XMMWORD PTR[64+r13],xmm2
+ cmp r14,6
+ jb $L$ctr_enc_done
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ pxor xmm6,xmm12
+ movdqu XMMWORD PTR[80+r13],xmm6
+ je $L$ctr_enc_done
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ pxor xmm1,xmm13
+ movdqu XMMWORD PTR[96+r13],xmm1
+ jmp $L$ctr_enc_done
+ lea rcx,QWORD PTR[32+rbp]
+ lea rdx,QWORD PTR[48+rbp]
+ lea r8,QWORD PTR[r15]
+ call asm_AES_encrypt
+ movdqu xmm0,XMMWORD PTR[r12]
+ lea r12,QWORD PTR[16+r12]
+ mov eax,DWORD PTR[44+rbp]
+ bswap eax
+ pxor xmm0,XMMWORD PTR[48+rbp]
+ inc eax
+ movdqu XMMWORD PTR[r13],xmm0
+ bswap eax
+ lea r13,QWORD PTR[16+r13]
+ mov DWORD PTR[44+rsp],eax
+ dec r14
+ jnz $L$ctr_enc_short
+ lea rax,QWORD PTR[rsp]
+ pxor xmm0,xmm0
+ movdqa XMMWORD PTR[rax],xmm0
+ movdqa XMMWORD PTR[16+rax],xmm0
+ lea rax,QWORD PTR[32+rax]
+ cmp rbp,rax
+ ja $L$ctr_enc_bzero
+ lea rsp,QWORD PTR[rbp]
+ movaps xmm6,XMMWORD PTR[64+rbp]
+ movaps xmm7,XMMWORD PTR[80+rbp]
+ movaps xmm8,XMMWORD PTR[96+rbp]
+ movaps xmm9,XMMWORD PTR[112+rbp]
+ movaps xmm10,XMMWORD PTR[128+rbp]
+ movaps xmm11,XMMWORD PTR[144+rbp]
+ movaps xmm12,XMMWORD PTR[160+rbp]
+ movaps xmm13,XMMWORD PTR[176+rbp]
+ movaps xmm14,XMMWORD PTR[192+rbp]
+ movaps xmm15,XMMWORD PTR[208+rbp]
+ lea rsp,QWORD PTR[160+rbp]
+ mov r15,QWORD PTR[72+rsp]
+ mov r14,QWORD PTR[80+rsp]
+ mov r13,QWORD PTR[88+rsp]
+ mov r12,QWORD PTR[96+rsp]
+ mov rbx,QWORD PTR[104+rsp]
+ mov rax,QWORD PTR[112+rsp]
+ lea rsp,QWORD PTR[120+rsp]
+ mov rbp,rax
+ DB 0F3h,0C3h ;repret
+bsaes_ctr32_encrypt_blocks ENDP
+PUBLIC bsaes_xts_encrypt
+bsaes_xts_encrypt PROC PUBLIC
+ mov rax,rsp
+ push rbp
+ push rbx
+ push r12
+ push r13
+ push r14
+ push r15
+ lea rsp,QWORD PTR[((-72))+rsp]
+ mov r10,QWORD PTR[160+rsp]
+ mov r11,QWORD PTR[168+rsp]
+ lea rsp,QWORD PTR[((-160))+rsp]
+ movaps XMMWORD PTR[64+rsp],xmm6
+ movaps XMMWORD PTR[80+rsp],xmm7
+ movaps XMMWORD PTR[96+rsp],xmm8
+ movaps XMMWORD PTR[112+rsp],xmm9
+ movaps XMMWORD PTR[128+rsp],xmm10
+ movaps XMMWORD PTR[144+rsp],xmm11
+ movaps XMMWORD PTR[160+rsp],xmm12
+ movaps XMMWORD PTR[176+rsp],xmm13
+ movaps XMMWORD PTR[192+rsp],xmm14
+ movaps XMMWORD PTR[208+rsp],xmm15
+ mov rbp,rsp
+ mov r12,rcx
+ mov r13,rdx
+ mov r14,r8
+ mov r15,r9
+ lea rcx,QWORD PTR[r11]
+ lea rdx,QWORD PTR[32+rbp]
+ lea r8,QWORD PTR[r10]
+ call asm_AES_encrypt
+ mov eax,DWORD PTR[240+r15]
+ mov rbx,r14
+ mov edx,eax
+ shl rax,7
+ sub rax,96
+ sub rsp,rax
+ mov rax,rsp
+ mov rcx,r15
+ mov r10d,edx
+ call _bsaes_key_convert
+ pxor xmm7,xmm6
+ movdqa XMMWORD PTR[rax],xmm7
+ and r14,-16
+ sub rsp,080h
+ movdqa xmm6,XMMWORD PTR[32+rbp]
+ pxor xmm14,xmm14
+ movdqa xmm12,XMMWORD PTR[$L$xts_magic]
+ pcmpgtd xmm14,xmm6
+ sub r14,080h
+ jc $L$xts_enc_short
+ jmp $L$xts_enc_loop
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm15,xmm6
+ movdqa XMMWORD PTR[rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm0,xmm6
+ movdqa XMMWORD PTR[16+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm7,XMMWORD PTR[r12]
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm1,xmm6
+ movdqa XMMWORD PTR[32+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm15,xmm7
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm2,xmm6
+ movdqa XMMWORD PTR[48+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm0,xmm8
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm3,xmm6
+ movdqa XMMWORD PTR[64+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm1,xmm9
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm4,xmm6
+ movdqa XMMWORD PTR[80+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ pxor xmm2,xmm10
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm5,xmm6
+ movdqa XMMWORD PTR[96+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ pxor xmm3,xmm11
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ pxor xmm4,xmm12
+ movdqu xmm14,XMMWORD PTR[112+r12]
+ lea r12,QWORD PTR[128+r12]
+ movdqa XMMWORD PTR[112+rsp],xmm6
+ pxor xmm5,xmm13
+ lea rax,QWORD PTR[128+rsp]
+ pxor xmm6,xmm14
+ mov r10d,edx
+ call _bsaes_encrypt8
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm3,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm5,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm3
+ pxor xmm2,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm5
+ pxor xmm6,XMMWORD PTR[80+rsp]
+ movdqu XMMWORD PTR[64+r13],xmm2
+ pxor xmm1,XMMWORD PTR[96+rsp]
+ movdqu XMMWORD PTR[80+r13],xmm6
+ pxor xmm4,XMMWORD PTR[112+rsp]
+ movdqu XMMWORD PTR[96+r13],xmm1
+ movdqu XMMWORD PTR[112+r13],xmm4
+ lea r13,QWORD PTR[128+r13]
+ movdqa xmm6,XMMWORD PTR[112+rsp]
+ pxor xmm14,xmm14
+ movdqa xmm12,XMMWORD PTR[$L$xts_magic]
+ pcmpgtd xmm14,xmm6
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ sub r14,080h
+ jnc $L$xts_enc_loop
+ add r14,080h
+ jz $L$xts_enc_done
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm15,xmm6
+ movdqa XMMWORD PTR[rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm0,xmm6
+ movdqa XMMWORD PTR[16+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm7,XMMWORD PTR[r12]
+ cmp r14,16
+ je $L$xts_enc_1
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm1,xmm6
+ movdqa XMMWORD PTR[32+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ cmp r14,32
+ je $L$xts_enc_2
+ pxor xmm15,xmm7
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm2,xmm6
+ movdqa XMMWORD PTR[48+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ cmp r14,48
+ je $L$xts_enc_3
+ pxor xmm0,xmm8
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm3,xmm6
+ movdqa XMMWORD PTR[64+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ cmp r14,64
+ je $L$xts_enc_4
+ pxor xmm1,xmm9
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm4,xmm6
+ movdqa XMMWORD PTR[80+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ cmp r14,80
+ je $L$xts_enc_5
+ pxor xmm2,xmm10
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm5,xmm6
+ movdqa XMMWORD PTR[96+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ cmp r14,96
+ je $L$xts_enc_6
+ pxor xmm3,xmm11
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ pxor xmm4,xmm12
+ movdqa XMMWORD PTR[112+rsp],xmm6
+ lea r12,QWORD PTR[112+r12]
+ pxor xmm5,xmm13
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+ call _bsaes_encrypt8
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm3,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm5,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm3
+ pxor xmm2,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm5
+ pxor xmm6,XMMWORD PTR[80+rsp]
+ movdqu XMMWORD PTR[64+r13],xmm2
+ pxor xmm1,XMMWORD PTR[96+rsp]
+ movdqu XMMWORD PTR[80+r13],xmm6
+ movdqu XMMWORD PTR[96+r13],xmm1
+ lea r13,QWORD PTR[112+r13]
+ movdqa xmm6,XMMWORD PTR[112+rsp]
+ jmp $L$xts_enc_done
+ pxor xmm3,xmm11
+ lea r12,QWORD PTR[96+r12]
+ pxor xmm4,xmm12
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+ call _bsaes_encrypt8
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm3,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm5,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm3
+ pxor xmm2,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm5
+ pxor xmm6,XMMWORD PTR[80+rsp]
+ movdqu XMMWORD PTR[64+r13],xmm2
+ movdqu XMMWORD PTR[80+r13],xmm6
+ lea r13,QWORD PTR[96+r13]
+ movdqa xmm6,XMMWORD PTR[96+rsp]
+ jmp $L$xts_enc_done
+ pxor xmm2,xmm10
+ lea r12,QWORD PTR[80+r12]
+ pxor xmm3,xmm11
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+ call _bsaes_encrypt8
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm3,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm5,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm3
+ pxor xmm2,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm5
+ movdqu XMMWORD PTR[64+r13],xmm2
+ lea r13,QWORD PTR[80+r13]
+ movdqa xmm6,XMMWORD PTR[80+rsp]
+ jmp $L$xts_enc_done
+ pxor xmm1,xmm9
+ lea r12,QWORD PTR[64+r12]
+ pxor xmm2,xmm10
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+ call _bsaes_encrypt8
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm3,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm5,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm3
+ movdqu XMMWORD PTR[48+r13],xmm5
+ lea r13,QWORD PTR[64+r13]
+ movdqa xmm6,XMMWORD PTR[64+rsp]
+ jmp $L$xts_enc_done
+ pxor xmm0,xmm8
+ lea r12,QWORD PTR[48+r12]
+ pxor xmm1,xmm9
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+ call _bsaes_encrypt8
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm3,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm3
+ lea r13,QWORD PTR[48+r13]
+ movdqa xmm6,XMMWORD PTR[48+rsp]
+ jmp $L$xts_enc_done
+ pxor xmm15,xmm7
+ lea r12,QWORD PTR[32+r12]
+ pxor xmm0,xmm8
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+ call _bsaes_encrypt8
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ lea r13,QWORD PTR[32+r13]
+ movdqa xmm6,XMMWORD PTR[32+rsp]
+ jmp $L$xts_enc_done
+ pxor xmm7,xmm15
+ lea r12,QWORD PTR[16+r12]
+ movdqa XMMWORD PTR[32+rbp],xmm7
+ lea rcx,QWORD PTR[32+rbp]
+ lea rdx,QWORD PTR[32+rbp]
+ lea r8,QWORD PTR[r15]
+ call asm_AES_encrypt
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu XMMWORD PTR[r13],xmm15
+ lea r13,QWORD PTR[16+r13]
+ movdqa xmm6,XMMWORD PTR[16+rsp]
+ and ebx,15
+ jz $L$xts_enc_ret
+ mov rdx,r13
+ movzx eax,BYTE PTR[r12]
+ movzx ecx,BYTE PTR[((-16))+rdx]
+ lea r12,QWORD PTR[1+r12]
+ mov BYTE PTR[((-16))+rdx],al
+ mov BYTE PTR[rdx],cl
+ lea rdx,QWORD PTR[1+rdx]
+ sub ebx,1
+ jnz $L$xts_enc_steal
+ movdqu xmm15,XMMWORD PTR[((-16))+r13]
+ lea rcx,QWORD PTR[32+rbp]
+ pxor xmm15,xmm6
+ lea rdx,QWORD PTR[32+rbp]
+ movdqa XMMWORD PTR[32+rbp],xmm15
+ lea r8,QWORD PTR[r15]
+ call asm_AES_encrypt
+ pxor xmm6,XMMWORD PTR[32+rbp]
+ movdqu XMMWORD PTR[(-16)+r13],xmm6
+ lea rax,QWORD PTR[rsp]
+ pxor xmm0,xmm0
+ movdqa XMMWORD PTR[rax],xmm0
+ movdqa XMMWORD PTR[16+rax],xmm0
+ lea rax,QWORD PTR[32+rax]
+ cmp rbp,rax
+ ja $L$xts_enc_bzero
+ lea rsp,QWORD PTR[rbp]
+ movaps xmm6,XMMWORD PTR[64+rbp]
+ movaps xmm7,XMMWORD PTR[80+rbp]
+ movaps xmm8,XMMWORD PTR[96+rbp]
+ movaps xmm9,XMMWORD PTR[112+rbp]
+ movaps xmm10,XMMWORD PTR[128+rbp]
+ movaps xmm11,XMMWORD PTR[144+rbp]
+ movaps xmm12,XMMWORD PTR[160+rbp]
+ movaps xmm13,XMMWORD PTR[176+rbp]
+ movaps xmm14,XMMWORD PTR[192+rbp]
+ movaps xmm15,XMMWORD PTR[208+rbp]
+ lea rsp,QWORD PTR[160+rbp]
+ mov r15,QWORD PTR[72+rsp]
+ mov r14,QWORD PTR[80+rsp]
+ mov r13,QWORD PTR[88+rsp]
+ mov r12,QWORD PTR[96+rsp]
+ mov rbx,QWORD PTR[104+rsp]
+ mov rax,QWORD PTR[112+rsp]
+ lea rsp,QWORD PTR[120+rsp]
+ mov rbp,rax
+ DB 0F3h,0C3h ;repret
+bsaes_xts_encrypt ENDP
+PUBLIC bsaes_xts_decrypt
+bsaes_xts_decrypt PROC PUBLIC
+ mov rax,rsp
+ push rbp
+ push rbx
+ push r12
+ push r13
+ push r14
+ push r15
+ lea rsp,QWORD PTR[((-72))+rsp]
+ mov r10,QWORD PTR[160+rsp]
+ mov r11,QWORD PTR[168+rsp]
+ lea rsp,QWORD PTR[((-160))+rsp]
+ movaps XMMWORD PTR[64+rsp],xmm6
+ movaps XMMWORD PTR[80+rsp],xmm7
+ movaps XMMWORD PTR[96+rsp],xmm8
+ movaps XMMWORD PTR[112+rsp],xmm9
+ movaps XMMWORD PTR[128+rsp],xmm10
+ movaps XMMWORD PTR[144+rsp],xmm11
+ movaps XMMWORD PTR[160+rsp],xmm12
+ movaps XMMWORD PTR[176+rsp],xmm13
+ movaps XMMWORD PTR[192+rsp],xmm14
+ movaps XMMWORD PTR[208+rsp],xmm15
+ mov rbp,rsp
+ mov r12,rcx
+ mov r13,rdx
+ mov r14,r8
+ mov r15,r9
+ lea rcx,QWORD PTR[r11]
+ lea rdx,QWORD PTR[32+rbp]
+ lea r8,QWORD PTR[r10]
+ call asm_AES_encrypt
+ mov eax,DWORD PTR[240+r15]
+ mov rbx,r14
+ mov edx,eax
+ shl rax,7
+ sub rax,96
+ sub rsp,rax
+ mov rax,rsp
+ mov rcx,r15
+ mov r10d,edx
+ call _bsaes_key_convert
+ pxor xmm7,XMMWORD PTR[rsp]
+ movdqa XMMWORD PTR[rax],xmm6
+ movdqa XMMWORD PTR[rsp],xmm7
+ xor eax,eax
+ and r14,-16
+ test ebx,15
+ setnz al
+ shl rax,4
+ sub r14,rax
+ sub rsp,080h
+ movdqa xmm6,XMMWORD PTR[32+rbp]
+ pxor xmm14,xmm14
+ movdqa xmm12,XMMWORD PTR[$L$xts_magic]
+ pcmpgtd xmm14,xmm6
+ sub r14,080h
+ jc $L$xts_dec_short
+ jmp $L$xts_dec_loop
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm15,xmm6
+ movdqa XMMWORD PTR[rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm0,xmm6
+ movdqa XMMWORD PTR[16+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm7,XMMWORD PTR[r12]
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm1,xmm6
+ movdqa XMMWORD PTR[32+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ pxor xmm15,xmm7
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm2,xmm6
+ movdqa XMMWORD PTR[48+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ pxor xmm0,xmm8
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm3,xmm6
+ movdqa XMMWORD PTR[64+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ pxor xmm1,xmm9
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm4,xmm6
+ movdqa XMMWORD PTR[80+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ pxor xmm2,xmm10
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm5,xmm6
+ movdqa XMMWORD PTR[96+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ pxor xmm3,xmm11
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ pxor xmm4,xmm12
+ movdqu xmm14,XMMWORD PTR[112+r12]
+ lea r12,QWORD PTR[128+r12]
+ movdqa XMMWORD PTR[112+rsp],xmm6
+ pxor xmm5,xmm13
+ lea rax,QWORD PTR[128+rsp]
+ pxor xmm6,xmm14
+ mov r10d,edx
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm5,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm3,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm5
+ pxor xmm1,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm3
+ pxor xmm6,XMMWORD PTR[80+rsp]
+ movdqu XMMWORD PTR[64+r13],xmm1
+ pxor xmm2,XMMWORD PTR[96+rsp]
+ movdqu XMMWORD PTR[80+r13],xmm6
+ pxor xmm4,XMMWORD PTR[112+rsp]
+ movdqu XMMWORD PTR[96+r13],xmm2
+ movdqu XMMWORD PTR[112+r13],xmm4
+ lea r13,QWORD PTR[128+r13]
+ movdqa xmm6,XMMWORD PTR[112+rsp]
+ pxor xmm14,xmm14
+ movdqa xmm12,XMMWORD PTR[$L$xts_magic]
+ pcmpgtd xmm14,xmm6
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ sub r14,080h
+ jnc $L$xts_dec_loop
+ add r14,080h
+ jz $L$xts_dec_done
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm15,xmm6
+ movdqa XMMWORD PTR[rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm0,xmm6
+ movdqa XMMWORD PTR[16+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm7,XMMWORD PTR[r12]
+ cmp r14,16
+ je $L$xts_dec_1
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm1,xmm6
+ movdqa XMMWORD PTR[32+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm8,XMMWORD PTR[16+r12]
+ cmp r14,32
+ je $L$xts_dec_2
+ pxor xmm15,xmm7
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm2,xmm6
+ movdqa XMMWORD PTR[48+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm9,XMMWORD PTR[32+r12]
+ cmp r14,48
+ je $L$xts_dec_3
+ pxor xmm0,xmm8
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm3,xmm6
+ movdqa XMMWORD PTR[64+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm10,XMMWORD PTR[48+r12]
+ cmp r14,64
+ je $L$xts_dec_4
+ pxor xmm1,xmm9
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm4,xmm6
+ movdqa XMMWORD PTR[80+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm11,XMMWORD PTR[64+r12]
+ cmp r14,80
+ je $L$xts_dec_5
+ pxor xmm2,xmm10
+ pshufd xmm13,xmm14,013h
+ pxor xmm14,xmm14
+ movdqa xmm5,xmm6
+ movdqa XMMWORD PTR[96+rsp],xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ pcmpgtd xmm14,xmm6
+ pxor xmm6,xmm13
+ movdqu xmm12,XMMWORD PTR[80+r12]
+ cmp r14,96
+ je $L$xts_dec_6
+ pxor xmm3,xmm11
+ movdqu xmm13,XMMWORD PTR[96+r12]
+ pxor xmm4,xmm12
+ movdqa XMMWORD PTR[112+rsp],xmm6
+ lea r12,QWORD PTR[112+r12]
+ pxor xmm5,xmm13
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm5,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm3,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm5
+ pxor xmm1,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm3
+ pxor xmm6,XMMWORD PTR[80+rsp]
+ movdqu XMMWORD PTR[64+r13],xmm1
+ pxor xmm2,XMMWORD PTR[96+rsp]
+ movdqu XMMWORD PTR[80+r13],xmm6
+ movdqu XMMWORD PTR[96+r13],xmm2
+ lea r13,QWORD PTR[112+r13]
+ movdqa xmm6,XMMWORD PTR[112+rsp]
+ jmp $L$xts_dec_done
+ pxor xmm3,xmm11
+ lea r12,QWORD PTR[96+r12]
+ pxor xmm4,xmm12
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm5,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm3,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm5
+ pxor xmm1,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm3
+ pxor xmm6,XMMWORD PTR[80+rsp]
+ movdqu XMMWORD PTR[64+r13],xmm1
+ movdqu XMMWORD PTR[80+r13],xmm6
+ lea r13,QWORD PTR[96+r13]
+ movdqa xmm6,XMMWORD PTR[96+rsp]
+ jmp $L$xts_dec_done
+ pxor xmm2,xmm10
+ lea r12,QWORD PTR[80+r12]
+ pxor xmm3,xmm11
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm5,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm3,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm5
+ pxor xmm1,XMMWORD PTR[64+rsp]
+ movdqu XMMWORD PTR[48+r13],xmm3
+ movdqu XMMWORD PTR[64+r13],xmm1
+ lea r13,QWORD PTR[80+r13]
+ movdqa xmm6,XMMWORD PTR[80+rsp]
+ jmp $L$xts_dec_done
+ pxor xmm1,xmm9
+ lea r12,QWORD PTR[64+r12]
+ pxor xmm2,xmm10
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm5,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ pxor xmm3,XMMWORD PTR[48+rsp]
+ movdqu XMMWORD PTR[32+r13],xmm5
+ movdqu XMMWORD PTR[48+r13],xmm3
+ lea r13,QWORD PTR[64+r13]
+ movdqa xmm6,XMMWORD PTR[64+rsp]
+ jmp $L$xts_dec_done
+ pxor xmm0,xmm8
+ lea r12,QWORD PTR[48+r12]
+ pxor xmm1,xmm9
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ pxor xmm5,XMMWORD PTR[32+rsp]
+ movdqu XMMWORD PTR[16+r13],xmm0
+ movdqu XMMWORD PTR[32+r13],xmm5
+ lea r13,QWORD PTR[48+r13]
+ movdqa xmm6,XMMWORD PTR[48+rsp]
+ jmp $L$xts_dec_done
+ pxor xmm15,xmm7
+ lea r12,QWORD PTR[32+r12]
+ pxor xmm0,xmm8
+ lea rax,QWORD PTR[128+rsp]
+ mov r10d,edx
+ call _bsaes_decrypt8
+ pxor xmm15,XMMWORD PTR[rsp]
+ pxor xmm0,XMMWORD PTR[16+rsp]
+ movdqu XMMWORD PTR[r13],xmm15
+ movdqu XMMWORD PTR[16+r13],xmm0
+ lea r13,QWORD PTR[32+r13]
+ movdqa xmm6,XMMWORD PTR[32+rsp]
+ jmp $L$xts_dec_done
+ pxor xmm7,xmm15
+ lea r12,QWORD PTR[16+r12]
+ movdqa XMMWORD PTR[32+rbp],xmm7
+ lea rcx,QWORD PTR[32+rbp]
+ lea rdx,QWORD PTR[32+rbp]
+ lea r8,QWORD PTR[r15]
+ call asm_AES_decrypt
+ pxor xmm15,XMMWORD PTR[32+rbp]
+ movdqu XMMWORD PTR[r13],xmm15
+ lea r13,QWORD PTR[16+r13]
+ movdqa xmm6,XMMWORD PTR[16+rsp]
+ and ebx,15
+ jz $L$xts_dec_ret
+ pxor xmm14,xmm14
+ movdqa xmm12,XMMWORD PTR[$L$xts_magic]
+ pcmpgtd xmm14,xmm6
+ pshufd xmm13,xmm14,013h
+ movdqa xmm5,xmm6
+ paddq xmm6,xmm6
+ pand xmm13,xmm12
+ movdqu xmm15,XMMWORD PTR[r12]
+ pxor xmm6,xmm13
+ lea rcx,QWORD PTR[32+rbp]
+ pxor xmm15,xmm6
+ lea rdx,QWORD PTR[32+rbp]
+ movdqa XMMWORD PTR[32+rbp],xmm15
+ lea r8,QWORD PTR[r15]
+ call asm_AES_decrypt
+ pxor xmm6,XMMWORD PTR[32+rbp]
+ mov rdx,r13
+ movdqu XMMWORD PTR[r13],xmm6
+ movzx eax,BYTE PTR[16+r12]
+ movzx ecx,BYTE PTR[rdx]
+ lea r12,QWORD PTR[1+r12]
+ mov BYTE PTR[rdx],al
+ mov BYTE PTR[16+rdx],cl
+ lea rdx,QWORD PTR[1+rdx]
+ sub ebx,1
+ jnz $L$xts_dec_steal
+ movdqu xmm15,XMMWORD PTR[r13]
+ lea rcx,QWORD PTR[32+rbp]
+ pxor xmm15,xmm5
+ lea rdx,QWORD PTR[32+rbp]
+ movdqa XMMWORD PTR[32+rbp],xmm15
+ lea r8,QWORD PTR[r15]
+ call asm_AES_decrypt
+ pxor xmm5,XMMWORD PTR[32+rbp]
+ movdqu XMMWORD PTR[r13],xmm5
+ lea rax,QWORD PTR[rsp]
+ pxor xmm0,xmm0
+ movdqa XMMWORD PTR[rax],xmm0
+ movdqa XMMWORD PTR[16+rax],xmm0
+ lea rax,QWORD PTR[32+rax]
+ cmp rbp,rax
+ ja $L$xts_dec_bzero
+ lea rsp,QWORD PTR[rbp]
+ movaps xmm6,XMMWORD PTR[64+rbp]
+ movaps xmm7,XMMWORD PTR[80+rbp]
+ movaps xmm8,XMMWORD PTR[96+rbp]
+ movaps xmm9,XMMWORD PTR[112+rbp]
+ movaps xmm10,XMMWORD PTR[128+rbp]
+ movaps xmm11,XMMWORD PTR[144+rbp]
+ movaps xmm12,XMMWORD PTR[160+rbp]
+ movaps xmm13,XMMWORD PTR[176+rbp]
+ movaps xmm14,XMMWORD PTR[192+rbp]
+ movaps xmm15,XMMWORD PTR[208+rbp]
+ lea rsp,QWORD PTR[160+rbp]
+ mov r15,QWORD PTR[72+rsp]
+ mov r14,QWORD PTR[80+rsp]
+ mov r13,QWORD PTR[88+rsp]
+ mov r12,QWORD PTR[96+rsp]
+ mov rbx,QWORD PTR[104+rsp]
+ mov rax,QWORD PTR[112+rsp]
+ lea rsp,QWORD PTR[120+rsp]
+ mov rbp,rax
+ DB 0F3h,0C3h ;repret
+bsaes_xts_decrypt ENDP
+ DQ 00a0e0206070b0f03h,00004080c0d010509h
+ DQ 001040b0e0205080fh,00306090c00070a0dh
+ DQ 00504070602010003h,00f0e0d0c080b0a09h
+ DQ 05555555555555555h,05555555555555555h
+ DQ 03333333333333333h,03333333333333333h
+ DQ 00f0f0f0f0f0f0f0fh,00f0f0f0f0f0f0f0fh
+ DQ 00504070600030201h,00f0e0d0c0a09080bh
+ DQ 00304090e00050a0fh,001060b0c0207080dh
+ DQ 00a0e02060f03070bh,00004080c05090d01h
+ DQ 00706050403020100h,00c0d0e0f0b0a0908h
+ DQ 00a0d02060c03070bh,00004080f05090e01h
+ DQ 00000000000000000h,00000000100000000h
+ DQ 00000000000000000h,00000000200000000h
+ DQ 00000000000000000h,00000000300000000h
+ DQ 00000000000000000h,00000000400000000h
+ DQ 00000000000000000h,00000000500000000h
+ DQ 00000000000000000h,00000000600000000h
+ DQ 00000000000000000h,00000000700000000h
+ DQ 00000000000000000h,00000000800000000h
+ DD 087h,0,1,0
+ DQ 00101010101010101h,00101010101010101h
+ DQ 00202020202020202h,00202020202020202h
+ DQ 00404040404040404h,00404040404040404h
+ DQ 00808080808080808h,00808080808080808h
+ DQ 002060a0e03070b0fh,00004080c0105090dh
+ DQ 06363636363636363h,06363636363636363h
+DB 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102
+DB 111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44
+DB 32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44
+DB 32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32
+DB 65,110,100,121,32,80,111,108,121,97,107,111,118,0
+EXTERN __imp_RtlVirtualUnwind:NEAR
+se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$in_prologue
+ mov rax,QWORD PTR[152+r8]
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$in_prologue
+ mov rax,QWORD PTR[160+r8]
+ lea rsi,QWORD PTR[64+rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,20
+ DD 0a548f3fch
+ lea rax,QWORD PTR[160+rax]
+ mov rbp,QWORD PTR[112+rax]
+ mov rbx,QWORD PTR[104+rax]
+ mov r12,QWORD PTR[96+rax]
+ mov r13,QWORD PTR[88+rax]
+ mov r14,QWORD PTR[80+rax]
+ mov r15,QWORD PTR[72+rax]
+ lea rax,QWORD PTR[120+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov QWORD PTR[224+r8],r13
+ mov QWORD PTR[232+r8],r14
+ mov QWORD PTR[240+r8],r15
+ mov QWORD PTR[152+r8],rax
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+se_handler ENDP
+.text$ ENDS
+ DD imagerel $L$cbc_dec_prologue
+ DD imagerel $L$cbc_dec_epilogue
+ DD imagerel $L$cbc_dec_info
+ DD imagerel $L$ctr_enc_prologue
+ DD imagerel $L$ctr_enc_epilogue
+ DD imagerel $L$ctr_enc_info
+ DD imagerel $L$xts_enc_prologue
+ DD imagerel $L$xts_enc_epilogue
+ DD imagerel $L$xts_enc_info
+ DD imagerel $L$xts_dec_prologue
+ DD imagerel $L$xts_dec_epilogue
+ DD imagerel $L$xts_dec_info
+.pdata ENDS
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$cbc_dec_body,imagerel $L$cbc_dec_epilogue
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$ctr_enc_body,imagerel $L$ctr_enc_epilogue
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$xts_enc_body,imagerel $L$xts_enc_epilogue
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$xts_dec_body,imagerel $L$xts_dec_epilogue
+.xdata ENDS
diff --git a/crypto/libressl/crypto/aes/bsaes-mingw64-x86_64.S b/crypto/libressl/crypto/aes/bsaes-mingw64-x86_64.S
new file mode 100644
index 0000000..f0b07cb
--- /dev/null
+++ b/crypto/libressl/crypto/aes/bsaes-mingw64-x86_64.S
@@ -0,0 +1,2725 @@
+#include "x86_arch.h"
+.def _bsaes_encrypt8; .scl 3; .type 32; .endef
+.p2align 6
+ leaq .LBS0(%rip),%r11
+ movdqa (%rax),%xmm8
+ leaq 16(%rax),%rax
+ movdqa 80(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+.byte 102,15,56,0,247
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $1,%xmm3
+ pxor %xmm6,%xmm5
+ pxor %xmm4,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm6
+ psllq $1,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $1,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm15
+ pxor %xmm1,%xmm2
+ psllq $1,%xmm1
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm4,%xmm9
+ psrlq $2,%xmm4
+ movdqa %xmm3,%xmm10
+ psrlq $2,%xmm3
+ pxor %xmm6,%xmm4
+ pxor %xmm5,%xmm3
+ pand %xmm8,%xmm4
+ pand %xmm8,%xmm3
+ pxor %xmm4,%xmm6
+ psllq $2,%xmm4
+ pxor %xmm3,%xmm5
+ psllq $2,%xmm3
+ pxor %xmm9,%xmm4
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm2,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm2
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm2,%xmm9
+ psrlq $4,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $4,%xmm1
+ pxor %xmm6,%xmm2
+ pxor %xmm5,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $4,%xmm2
+ pxor %xmm1,%xmm5
+ psllq $4,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm4
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ decl %r10d
+ jmp .Lenc_sbox
+.p2align 4
+ pxor 0(%rax),%xmm15
+ pxor 16(%rax),%xmm0
+.byte 102,68,15,56,0,255
+ pxor 32(%rax),%xmm1
+.byte 102,15,56,0,199
+ pxor 48(%rax),%xmm2
+.byte 102,15,56,0,207
+ pxor 64(%rax),%xmm3
+.byte 102,15,56,0,215
+ pxor 80(%rax),%xmm4
+.byte 102,15,56,0,223
+ pxor 96(%rax),%xmm5
+.byte 102,15,56,0,231
+ pxor 112(%rax),%xmm6
+.byte 102,15,56,0,239
+ leaq 128(%rax),%rax
+.byte 102,15,56,0,247
+ pxor %xmm5,%xmm4
+ pxor %xmm0,%xmm1
+ pxor %xmm15,%xmm2
+ pxor %xmm1,%xmm5
+ pxor %xmm15,%xmm4
+ pxor %xmm2,%xmm5
+ pxor %xmm6,%xmm2
+ pxor %xmm4,%xmm6
+ pxor %xmm3,%xmm2
+ pxor %xmm4,%xmm3
+ pxor %xmm0,%xmm2
+ pxor %xmm6,%xmm1
+ pxor %xmm4,%xmm0
+ movdqa %xmm6,%xmm10
+ movdqa %xmm0,%xmm9
+ movdqa %xmm4,%xmm8
+ movdqa %xmm1,%xmm12
+ movdqa %xmm5,%xmm11
+ pxor %xmm3,%xmm10
+ pxor %xmm1,%xmm9
+ pxor %xmm2,%xmm8
+ movdqa %xmm10,%xmm13
+ pxor %xmm3,%xmm12
+ movdqa %xmm9,%xmm7
+ pxor %xmm15,%xmm11
+ movdqa %xmm10,%xmm14
+ por %xmm8,%xmm9
+ por %xmm11,%xmm10
+ pxor %xmm7,%xmm14
+ pand %xmm11,%xmm13
+ pxor %xmm8,%xmm11
+ pand %xmm8,%xmm7
+ pand %xmm11,%xmm14
+ movdqa %xmm2,%xmm11
+ pxor %xmm15,%xmm11
+ pand %xmm11,%xmm12
+ pxor %xmm12,%xmm10
+ pxor %xmm12,%xmm9
+ movdqa %xmm6,%xmm12
+ movdqa %xmm4,%xmm11
+ pxor %xmm0,%xmm12
+ pxor %xmm5,%xmm11
+ movdqa %xmm12,%xmm8
+ pand %xmm11,%xmm12
+ por %xmm11,%xmm8
+ pxor %xmm12,%xmm7
+ pxor %xmm14,%xmm10
+ pxor %xmm13,%xmm9
+ pxor %xmm14,%xmm8
+ movdqa %xmm1,%xmm11
+ pxor %xmm13,%xmm7
+ movdqa %xmm3,%xmm12
+ pxor %xmm13,%xmm8
+ movdqa %xmm0,%xmm13
+ pand %xmm2,%xmm11
+ movdqa %xmm6,%xmm14
+ pand %xmm15,%xmm12
+ pand %xmm4,%xmm13
+ por %xmm5,%xmm14
+ pxor %xmm11,%xmm10
+ pxor %xmm12,%xmm9
+ pxor %xmm13,%xmm8
+ pxor %xmm14,%xmm7
+ movdqa %xmm10,%xmm11
+ pand %xmm8,%xmm10
+ pxor %xmm9,%xmm11
+ movdqa %xmm7,%xmm13
+ movdqa %xmm11,%xmm14
+ pxor %xmm10,%xmm13
+ pand %xmm13,%xmm14
+ movdqa %xmm8,%xmm12
+ pxor %xmm9,%xmm14
+ pxor %xmm7,%xmm12
+ pxor %xmm9,%xmm10
+ pand %xmm10,%xmm12
+ movdqa %xmm13,%xmm9
+ pxor %xmm7,%xmm12
+ pxor %xmm12,%xmm9
+ pxor %xmm12,%xmm8
+ pand %xmm7,%xmm9
+ pxor %xmm9,%xmm13
+ pxor %xmm9,%xmm8
+ pand %xmm14,%xmm13
+ pxor %xmm11,%xmm13
+ movdqa %xmm5,%xmm11
+ movdqa %xmm4,%xmm7
+ movdqa %xmm14,%xmm9
+ pxor %xmm13,%xmm9
+ pand %xmm5,%xmm9
+ pxor %xmm4,%xmm5
+ pand %xmm14,%xmm4
+ pand %xmm13,%xmm5
+ pxor %xmm4,%xmm5
+ pxor %xmm9,%xmm4
+ pxor %xmm15,%xmm11
+ pxor %xmm2,%xmm7
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm15,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm2,%xmm15
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm2
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm15
+ pxor %xmm11,%xmm7
+ pxor %xmm2,%xmm15
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm2
+ pxor %xmm11,%xmm5
+ pxor %xmm11,%xmm15
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm2
+ movdqa %xmm6,%xmm11
+ movdqa %xmm0,%xmm7
+ pxor %xmm3,%xmm11
+ pxor %xmm1,%xmm7
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm3,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm1,%xmm3
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm1
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm3
+ pxor %xmm11,%xmm7
+ pxor %xmm1,%xmm3
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm1
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ pxor %xmm13,%xmm10
+ pand %xmm6,%xmm10
+ pxor %xmm0,%xmm6
+ pand %xmm14,%xmm0
+ pand %xmm13,%xmm6
+ pxor %xmm0,%xmm6
+ pxor %xmm10,%xmm0
+ pxor %xmm11,%xmm6
+ pxor %xmm11,%xmm3
+ pxor %xmm7,%xmm0
+ pxor %xmm7,%xmm1
+ pxor %xmm15,%xmm6
+ pxor %xmm5,%xmm0
+ pxor %xmm6,%xmm3
+ pxor %xmm15,%xmm5
+ pxor %xmm0,%xmm15
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ pxor %xmm2,%xmm1
+ pxor %xmm4,%xmm2
+ pxor %xmm4,%xmm3
+ pxor %xmm2,%xmm5
+ decl %r10d
+ jl .Lenc_done
+ pshufd $147,%xmm15,%xmm7
+ pshufd $147,%xmm0,%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $147,%xmm3,%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $147,%xmm5,%xmm10
+ pxor %xmm9,%xmm3
+ pshufd $147,%xmm2,%xmm11
+ pxor %xmm10,%xmm5
+ pshufd $147,%xmm6,%xmm12
+ pxor %xmm11,%xmm2
+ pshufd $147,%xmm1,%xmm13
+ pxor %xmm12,%xmm6
+ pshufd $147,%xmm4,%xmm14
+ pxor %xmm13,%xmm1
+ pxor %xmm14,%xmm4
+ pxor %xmm15,%xmm8
+ pxor %xmm4,%xmm7
+ pxor %xmm4,%xmm8
+ pshufd $78,%xmm15,%xmm15
+ pxor %xmm0,%xmm9
+ pshufd $78,%xmm0,%xmm0
+ pxor %xmm2,%xmm12
+ pxor %xmm7,%xmm15
+ pxor %xmm6,%xmm13
+ pxor %xmm8,%xmm0
+ pxor %xmm5,%xmm11
+ pshufd $78,%xmm2,%xmm7
+ pxor %xmm1,%xmm14
+ pshufd $78,%xmm6,%xmm8
+ pxor %xmm3,%xmm10
+ pshufd $78,%xmm5,%xmm2
+ pxor %xmm4,%xmm10
+ pshufd $78,%xmm4,%xmm6
+ pxor %xmm4,%xmm11
+ pshufd $78,%xmm1,%xmm5
+ pxor %xmm11,%xmm7
+ pshufd $78,%xmm3,%xmm1
+ pxor %xmm12,%xmm8
+ pxor %xmm10,%xmm2
+ pxor %xmm14,%xmm6
+ pxor %xmm13,%xmm5
+ movdqa %xmm7,%xmm3
+ pxor %xmm9,%xmm1
+ movdqa %xmm8,%xmm4
+ movdqa 48(%r11),%xmm7
+ jnz .Lenc_loop
+ movdqa 64(%r11),%xmm7
+ jmp .Lenc_loop
+.p2align 4
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm2,%xmm10
+ psrlq $1,%xmm2
+ pxor %xmm4,%xmm1
+ pxor %xmm6,%xmm2
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm2
+ pxor %xmm1,%xmm4
+ psllq $1,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $1,%xmm2
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm2
+ movdqa %xmm3,%xmm9
+ psrlq $1,%xmm3
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm5,%xmm3
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm3
+ pand %xmm7,%xmm15
+ pxor %xmm3,%xmm5
+ psllq $1,%xmm3
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm3
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm6,%xmm9
+ psrlq $2,%xmm6
+ movdqa %xmm2,%xmm10
+ psrlq $2,%xmm2
+ pxor %xmm4,%xmm6
+ pxor %xmm1,%xmm2
+ pand %xmm8,%xmm6
+ pand %xmm8,%xmm2
+ pxor %xmm6,%xmm4
+ psllq $2,%xmm6
+ pxor %xmm2,%xmm1
+ psllq $2,%xmm2
+ pxor %xmm9,%xmm6
+ pxor %xmm10,%xmm2
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm5,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm5
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm5,%xmm9
+ psrlq $4,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $4,%xmm3
+ pxor %xmm4,%xmm5
+ pxor %xmm1,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm4
+ psllq $4,%xmm5
+ pxor %xmm3,%xmm1
+ psllq $4,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm6,%xmm0
+ pxor %xmm2,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm6
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm2
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa (%rax),%xmm7
+ pxor %xmm7,%xmm3
+ pxor %xmm7,%xmm5
+ pxor %xmm7,%xmm2
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm1
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm15
+ pxor %xmm7,%xmm0
+ retq
+.def _bsaes_decrypt8; .scl 3; .type 32; .endef
+.p2align 6
+ leaq .LBS0(%rip),%r11
+ movdqa (%rax),%xmm8
+ leaq 16(%rax),%rax
+ movdqa -48(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+.byte 102,15,56,0,247
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $1,%xmm3
+ pxor %xmm6,%xmm5
+ pxor %xmm4,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm6
+ psllq $1,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $1,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm15
+ pxor %xmm1,%xmm2
+ psllq $1,%xmm1
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm4,%xmm9
+ psrlq $2,%xmm4
+ movdqa %xmm3,%xmm10
+ psrlq $2,%xmm3
+ pxor %xmm6,%xmm4
+ pxor %xmm5,%xmm3
+ pand %xmm8,%xmm4
+ pand %xmm8,%xmm3
+ pxor %xmm4,%xmm6
+ psllq $2,%xmm4
+ pxor %xmm3,%xmm5
+ psllq $2,%xmm3
+ pxor %xmm9,%xmm4
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm2,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm2
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm2,%xmm9
+ psrlq $4,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $4,%xmm1
+ pxor %xmm6,%xmm2
+ pxor %xmm5,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $4,%xmm2
+ pxor %xmm1,%xmm5
+ psllq $4,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm4
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ decl %r10d
+ jmp .Ldec_sbox
+.p2align 4
+ pxor 0(%rax),%xmm15
+ pxor 16(%rax),%xmm0
+.byte 102,68,15,56,0,255
+ pxor 32(%rax),%xmm1
+.byte 102,15,56,0,199
+ pxor 48(%rax),%xmm2
+.byte 102,15,56,0,207
+ pxor 64(%rax),%xmm3
+.byte 102,15,56,0,215
+ pxor 80(%rax),%xmm4
+.byte 102,15,56,0,223
+ pxor 96(%rax),%xmm5
+.byte 102,15,56,0,231
+ pxor 112(%rax),%xmm6
+.byte 102,15,56,0,239
+ leaq 128(%rax),%rax
+.byte 102,15,56,0,247
+ pxor %xmm3,%xmm2
+ pxor %xmm6,%xmm3
+ pxor %xmm6,%xmm1
+ pxor %xmm3,%xmm5
+ pxor %xmm5,%xmm6
+ pxor %xmm6,%xmm0
+ pxor %xmm0,%xmm15
+ pxor %xmm4,%xmm1
+ pxor %xmm15,%xmm2
+ pxor %xmm15,%xmm4
+ pxor %xmm2,%xmm0
+ movdqa %xmm2,%xmm10
+ movdqa %xmm6,%xmm9
+ movdqa %xmm0,%xmm8
+ movdqa %xmm3,%xmm12
+ movdqa %xmm4,%xmm11
+ pxor %xmm15,%xmm10
+ pxor %xmm3,%xmm9
+ pxor %xmm5,%xmm8
+ movdqa %xmm10,%xmm13
+ pxor %xmm15,%xmm12
+ movdqa %xmm9,%xmm7
+ pxor %xmm1,%xmm11
+ movdqa %xmm10,%xmm14
+ por %xmm8,%xmm9
+ por %xmm11,%xmm10
+ pxor %xmm7,%xmm14
+ pand %xmm11,%xmm13
+ pxor %xmm8,%xmm11
+ pand %xmm8,%xmm7
+ pand %xmm11,%xmm14
+ movdqa %xmm5,%xmm11
+ pxor %xmm1,%xmm11
+ pand %xmm11,%xmm12
+ pxor %xmm12,%xmm10
+ pxor %xmm12,%xmm9
+ movdqa %xmm2,%xmm12
+ movdqa %xmm0,%xmm11
+ pxor %xmm6,%xmm12
+ pxor %xmm4,%xmm11
+ movdqa %xmm12,%xmm8
+ pand %xmm11,%xmm12
+ por %xmm11,%xmm8
+ pxor %xmm12,%xmm7
+ pxor %xmm14,%xmm10
+ pxor %xmm13,%xmm9
+ pxor %xmm14,%xmm8
+ movdqa %xmm3,%xmm11
+ pxor %xmm13,%xmm7
+ movdqa %xmm15,%xmm12
+ pxor %xmm13,%xmm8
+ movdqa %xmm6,%xmm13
+ pand %xmm5,%xmm11
+ movdqa %xmm2,%xmm14
+ pand %xmm1,%xmm12
+ pand %xmm0,%xmm13
+ por %xmm4,%xmm14
+ pxor %xmm11,%xmm10
+ pxor %xmm12,%xmm9
+ pxor %xmm13,%xmm8
+ pxor %xmm14,%xmm7
+ movdqa %xmm10,%xmm11
+ pand %xmm8,%xmm10
+ pxor %xmm9,%xmm11
+ movdqa %xmm7,%xmm13
+ movdqa %xmm11,%xmm14
+ pxor %xmm10,%xmm13
+ pand %xmm13,%xmm14
+ movdqa %xmm8,%xmm12
+ pxor %xmm9,%xmm14
+ pxor %xmm7,%xmm12
+ pxor %xmm9,%xmm10
+ pand %xmm10,%xmm12
+ movdqa %xmm13,%xmm9
+ pxor %xmm7,%xmm12
+ pxor %xmm12,%xmm9
+ pxor %xmm12,%xmm8
+ pand %xmm7,%xmm9
+ pxor %xmm9,%xmm13
+ pxor %xmm9,%xmm8
+ pand %xmm14,%xmm13
+ pxor %xmm11,%xmm13
+ movdqa %xmm4,%xmm11
+ movdqa %xmm0,%xmm7
+ movdqa %xmm14,%xmm9
+ pxor %xmm13,%xmm9
+ pand %xmm4,%xmm9
+ pxor %xmm0,%xmm4
+ pand %xmm14,%xmm0
+ pand %xmm13,%xmm4
+ pxor %xmm0,%xmm4
+ pxor %xmm9,%xmm0
+ pxor %xmm1,%xmm11
+ pxor %xmm5,%xmm7
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm1,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm5,%xmm1
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm5
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm1
+ pxor %xmm11,%xmm7
+ pxor %xmm5,%xmm1
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm5
+ pxor %xmm11,%xmm4
+ pxor %xmm11,%xmm1
+ pxor %xmm7,%xmm0
+ pxor %xmm7,%xmm5
+ movdqa %xmm2,%xmm11
+ movdqa %xmm6,%xmm7
+ pxor %xmm15,%xmm11
+ pxor %xmm3,%xmm7
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm15,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm3,%xmm15
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm3
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm15
+ pxor %xmm11,%xmm7
+ pxor %xmm3,%xmm15
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm3
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ pxor %xmm13,%xmm10
+ pand %xmm2,%xmm10
+ pxor %xmm6,%xmm2
+ pand %xmm14,%xmm6
+ pand %xmm13,%xmm2
+ pxor %xmm6,%xmm2
+ pxor %xmm10,%xmm6
+ pxor %xmm11,%xmm2
+ pxor %xmm11,%xmm15
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm3
+ pxor %xmm6,%xmm0
+ pxor %xmm4,%xmm5
+ pxor %xmm0,%xmm3
+ pxor %xmm6,%xmm1
+ pxor %xmm6,%xmm4
+ pxor %xmm1,%xmm3
+ pxor %xmm15,%xmm6
+ pxor %xmm4,%xmm3
+ pxor %xmm5,%xmm2
+ pxor %xmm0,%xmm5
+ pxor %xmm3,%xmm2
+ pxor %xmm15,%xmm3
+ pxor %xmm2,%xmm6
+ decl %r10d
+ jl .Ldec_done
+ pshufd $78,%xmm15,%xmm7
+ pshufd $78,%xmm2,%xmm13
+ pxor %xmm15,%xmm7
+ pshufd $78,%xmm4,%xmm14
+ pxor %xmm2,%xmm13
+ pshufd $78,%xmm0,%xmm8
+ pxor %xmm4,%xmm14
+ pshufd $78,%xmm5,%xmm9
+ pxor %xmm0,%xmm8
+ pshufd $78,%xmm3,%xmm10
+ pxor %xmm5,%xmm9
+ pxor %xmm13,%xmm15
+ pxor %xmm13,%xmm0
+ pshufd $78,%xmm1,%xmm11
+ pxor %xmm3,%xmm10
+ pxor %xmm7,%xmm5
+ pxor %xmm8,%xmm3
+ pshufd $78,%xmm6,%xmm12
+ pxor %xmm1,%xmm11
+ pxor %xmm14,%xmm0
+ pxor %xmm9,%xmm1
+ pxor %xmm6,%xmm12
+ pxor %xmm14,%xmm5
+ pxor %xmm13,%xmm3
+ pxor %xmm13,%xmm1
+ pxor %xmm10,%xmm6
+ pxor %xmm11,%xmm2
+ pxor %xmm14,%xmm1
+ pxor %xmm14,%xmm6
+ pxor %xmm12,%xmm4
+ pshufd $147,%xmm15,%xmm7
+ pshufd $147,%xmm0,%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $147,%xmm5,%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $147,%xmm3,%xmm10
+ pxor %xmm9,%xmm5
+ pshufd $147,%xmm1,%xmm11
+ pxor %xmm10,%xmm3
+ pshufd $147,%xmm6,%xmm12
+ pxor %xmm11,%xmm1
+ pshufd $147,%xmm2,%xmm13
+ pxor %xmm12,%xmm6
+ pshufd $147,%xmm4,%xmm14
+ pxor %xmm13,%xmm2
+ pxor %xmm14,%xmm4
+ pxor %xmm15,%xmm8
+ pxor %xmm4,%xmm7
+ pxor %xmm4,%xmm8
+ pshufd $78,%xmm15,%xmm15
+ pxor %xmm0,%xmm9
+ pshufd $78,%xmm0,%xmm0
+ pxor %xmm1,%xmm12
+ pxor %xmm7,%xmm15
+ pxor %xmm6,%xmm13
+ pxor %xmm8,%xmm0
+ pxor %xmm3,%xmm11
+ pshufd $78,%xmm1,%xmm7
+ pxor %xmm2,%xmm14
+ pshufd $78,%xmm6,%xmm8
+ pxor %xmm5,%xmm10
+ pshufd $78,%xmm3,%xmm1
+ pxor %xmm4,%xmm10
+ pshufd $78,%xmm4,%xmm6
+ pxor %xmm4,%xmm11
+ pshufd $78,%xmm2,%xmm3
+ pxor %xmm11,%xmm7
+ pshufd $78,%xmm5,%xmm2
+ pxor %xmm12,%xmm8
+ pxor %xmm1,%xmm10
+ pxor %xmm14,%xmm6
+ pxor %xmm3,%xmm13
+ movdqa %xmm7,%xmm3
+ pxor %xmm9,%xmm2
+ movdqa %xmm13,%xmm5
+ movdqa %xmm8,%xmm4
+ movdqa %xmm2,%xmm1
+ movdqa %xmm10,%xmm2
+ movdqa -16(%r11),%xmm7
+ jnz .Ldec_loop
+ movdqa -32(%r11),%xmm7
+ jmp .Ldec_loop
+.p2align 4
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm2,%xmm9
+ psrlq $1,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $1,%xmm1
+ pxor %xmm4,%xmm2
+ pxor %xmm6,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm4
+ psllq $1,%xmm2
+ pxor %xmm1,%xmm6
+ psllq $1,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm3,%xmm5
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm15
+ pxor %xmm5,%xmm3
+ psllq $1,%xmm5
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm6,%xmm9
+ psrlq $2,%xmm6
+ movdqa %xmm1,%xmm10
+ psrlq $2,%xmm1
+ pxor %xmm4,%xmm6
+ pxor %xmm2,%xmm1
+ pand %xmm8,%xmm6
+ pand %xmm8,%xmm1
+ pxor %xmm6,%xmm4
+ psllq $2,%xmm6
+ pxor %xmm1,%xmm2
+ psllq $2,%xmm1
+ pxor %xmm9,%xmm6
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm3,%xmm0
+ pxor %xmm5,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm3
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm5
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm3,%xmm9
+ psrlq $4,%xmm3
+ movdqa %xmm5,%xmm10
+ psrlq $4,%xmm5
+ pxor %xmm4,%xmm3
+ pxor %xmm2,%xmm5
+ pand %xmm7,%xmm3
+ pand %xmm7,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $4,%xmm3
+ pxor %xmm5,%xmm2
+ psllq $4,%xmm5
+ pxor %xmm9,%xmm3
+ pxor %xmm10,%xmm5
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm6,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm6
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa (%rax),%xmm7
+ pxor %xmm7,%xmm5
+ pxor %xmm7,%xmm3
+ pxor %xmm7,%xmm1
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm2
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm15
+ pxor %xmm7,%xmm0
+ retq
+.def _bsaes_key_convert; .scl 3; .type 32; .endef
+.p2align 4
+ leaq .Lmasks(%rip),%r11
+ movdqu (%rcx),%xmm7
+ leaq 16(%rcx),%rcx
+ movdqa 0(%r11),%xmm0
+ movdqa 16(%r11),%xmm1
+ movdqa 32(%r11),%xmm2
+ movdqa 48(%r11),%xmm3
+ movdqa 64(%r11),%xmm4
+ pcmpeqd %xmm5,%xmm5
+ movdqu (%rcx),%xmm6
+ movdqa %xmm7,(%rax)
+ leaq 16(%rax),%rax
+ decl %r10d
+ jmp .Lkey_loop
+.p2align 4
+.byte 102,15,56,0,244
+ movdqa %xmm0,%xmm8
+ movdqa %xmm1,%xmm9
+ pand %xmm6,%xmm8
+ pand %xmm6,%xmm9
+ movdqa %xmm2,%xmm10
+ pcmpeqb %xmm0,%xmm8
+ psllq $4,%xmm0
+ movdqa %xmm3,%xmm11
+ pcmpeqb %xmm1,%xmm9
+ psllq $4,%xmm1
+ pand %xmm6,%xmm10
+ pand %xmm6,%xmm11
+ movdqa %xmm0,%xmm12
+ pcmpeqb %xmm2,%xmm10
+ psllq $4,%xmm2
+ movdqa %xmm1,%xmm13
+ pcmpeqb %xmm3,%xmm11
+ psllq $4,%xmm3
+ movdqa %xmm2,%xmm14
+ movdqa %xmm3,%xmm15
+ pxor %xmm5,%xmm8
+ pxor %xmm5,%xmm9
+ pand %xmm6,%xmm12
+ pand %xmm6,%xmm13
+ movdqa %xmm8,0(%rax)
+ pcmpeqb %xmm0,%xmm12
+ psrlq $4,%xmm0
+ movdqa %xmm9,16(%rax)
+ pcmpeqb %xmm1,%xmm13
+ psrlq $4,%xmm1
+ leaq 16(%rcx),%rcx
+ pand %xmm6,%xmm14
+ pand %xmm6,%xmm15
+ movdqa %xmm10,32(%rax)
+ pcmpeqb %xmm2,%xmm14
+ psrlq $4,%xmm2
+ movdqa %xmm11,48(%rax)
+ pcmpeqb %xmm3,%xmm15
+ psrlq $4,%xmm3
+ movdqu (%rcx),%xmm6
+ pxor %xmm5,%xmm13
+ pxor %xmm5,%xmm14
+ movdqa %xmm12,64(%rax)
+ movdqa %xmm13,80(%rax)
+ movdqa %xmm14,96(%rax)
+ movdqa %xmm15,112(%rax)
+ leaq 128(%rax),%rax
+ decl %r10d
+ jnz .Lkey_loop
+ movdqa 80(%r11),%xmm7
+ retq
+.globl bsaes_cbc_encrypt
+.def bsaes_cbc_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+ movl 48(%rsp),%r11d
+ cmpl $0,%r11d
+ jne asm_AES_cbc_encrypt
+ cmpq $128,%r8
+ jb asm_AES_cbc_encrypt
+ movq %rsp,%rax
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq 160(%rsp),%r10
+ leaq -160(%rsp),%rsp
+ movaps %xmm6,64(%rsp)
+ movaps %xmm7,80(%rsp)
+ movaps %xmm8,96(%rsp)
+ movaps %xmm9,112(%rsp)
+ movaps %xmm10,128(%rsp)
+ movaps %xmm11,144(%rsp)
+ movaps %xmm12,160(%rsp)
+ movaps %xmm13,176(%rsp)
+ movaps %xmm14,192(%rsp)
+ movaps %xmm15,208(%rsp)
+ movq %rsp,%rbp
+ movl 240(%r9),%eax
+ movq %rcx,%r12
+ movq %rdx,%r13
+ movq %r8,%r14
+ movq %r9,%r15
+ movq %r10,%rbx
+ shrq $4,%r14
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor (%rsp),%xmm7
+ movdqa %xmm6,(%rax)
+ movdqa %xmm7,(%rsp)
+ movdqu (%rbx),%xmm14
+ subq $8,%r14
+ movdqu 0(%r12),%xmm15
+ movdqu 16(%r12),%xmm0
+ movdqu 32(%r12),%xmm1
+ movdqu 48(%r12),%xmm2
+ movdqu 64(%r12),%xmm3
+ movdqu 80(%r12),%xmm4
+ movq %rsp,%rax
+ movdqu 96(%r12),%xmm5
+ movl %edx,%r10d
+ movdqu 112(%r12),%xmm6
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm6
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm2
+ movdqu 112(%r12),%xmm14
+ pxor %xmm13,%xmm4
+ movdqu %xmm15,0(%r13)
+ leaq 128(%r12),%r12
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+ subq $8,%r14
+ jnc .Lcbc_dec_loop
+ addq $8,%r14
+ jz .Lcbc_dec_done
+ movdqu 0(%r12),%xmm15
+ movq %rsp,%rax
+ movl %edx,%r10d
+ cmpq $2,%r14
+ jb .Lcbc_dec_one
+ movdqu 16(%r12),%xmm0
+ je .Lcbc_dec_two
+ movdqu 32(%r12),%xmm1
+ cmpq $4,%r14
+ jb .Lcbc_dec_three
+ movdqu 48(%r12),%xmm2
+ je .Lcbc_dec_four
+ movdqu 64(%r12),%xmm3
+ cmpq $6,%r14
+ jb .Lcbc_dec_five
+ movdqu 80(%r12),%xmm4
+ je .Lcbc_dec_six
+ movdqu 96(%r12),%xmm5
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm6
+ movdqu 96(%r12),%xmm14
+ pxor %xmm12,%xmm2
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ jmp .Lcbc_dec_done
+.p2align 4
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm14
+ pxor %xmm11,%xmm6
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ jmp .Lcbc_dec_done
+.p2align 4
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm14
+ pxor %xmm10,%xmm1
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ jmp .Lcbc_dec_done
+.p2align 4
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm14
+ pxor %xmm9,%xmm3
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ jmp .Lcbc_dec_done
+.p2align 4
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm14
+ pxor %xmm8,%xmm5
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ jmp .Lcbc_dec_done
+.p2align 4
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm14
+ pxor %xmm7,%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ jmp .Lcbc_dec_done
+.p2align 4
+ leaq (%r12),%rcx
+ leaq 32(%rbp),%rdx
+ leaq (%r15),%r8
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm14
+ movdqu %xmm14,(%r13)
+ movdqa %xmm15,%xmm14
+ movdqu %xmm14,(%rbx)
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lcbc_dec_bzero
+ leaq (%rbp),%rsp
+ movaps 64(%rbp),%xmm6
+ movaps 80(%rbp),%xmm7
+ movaps 96(%rbp),%xmm8
+ movaps 112(%rbp),%xmm9
+ movaps 128(%rbp),%xmm10
+ movaps 144(%rbp),%xmm11
+ movaps 160(%rbp),%xmm12
+ movaps 176(%rbp),%xmm13
+ movaps 192(%rbp),%xmm14
+ movaps 208(%rbp),%xmm15
+ leaq 160(%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+ retq
+.globl bsaes_ctr32_encrypt_blocks
+.def bsaes_ctr32_encrypt_blocks; .scl 2; .type 32; .endef
+.p2align 4
+ movq %rsp,%rax
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq 160(%rsp),%r10
+ leaq -160(%rsp),%rsp
+ movaps %xmm6,64(%rsp)
+ movaps %xmm7,80(%rsp)
+ movaps %xmm8,96(%rsp)
+ movaps %xmm9,112(%rsp)
+ movaps %xmm10,128(%rsp)
+ movaps %xmm11,144(%rsp)
+ movaps %xmm12,160(%rsp)
+ movaps %xmm13,176(%rsp)
+ movaps %xmm14,192(%rsp)
+ movaps %xmm15,208(%rsp)
+ movq %rsp,%rbp
+ movdqu (%r10),%xmm0
+ movl 240(%r9),%eax
+ movq %rcx,%r12
+ movq %rdx,%r13
+ movq %r8,%r14
+ movq %r9,%r15
+ movdqa %xmm0,32(%rbp)
+ cmpq $8,%r8
+ jb .Lctr_enc_short
+ movl %eax,%ebx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %ebx,%r10d
+ call _bsaes_key_convert
+ pxor %xmm6,%xmm7
+ movdqa %xmm7,(%rax)
+ movdqa (%rsp),%xmm8
+ leaq .LADD1(%rip),%r11
+ movdqa 32(%rbp),%xmm15
+ movdqa -32(%r11),%xmm7
+.byte 102,68,15,56,0,199
+.byte 102,68,15,56,0,255
+ movdqa %xmm8,(%rsp)
+ jmp .Lctr_enc_loop
+.p2align 4
+ movdqa %xmm15,32(%rbp)
+ movdqa %xmm15,%xmm0
+ movdqa %xmm15,%xmm1
+ paddd 0(%r11),%xmm0
+ movdqa %xmm15,%xmm2
+ paddd 16(%r11),%xmm1
+ movdqa %xmm15,%xmm3
+ paddd 32(%r11),%xmm2
+ movdqa %xmm15,%xmm4
+ paddd 48(%r11),%xmm3
+ movdqa %xmm15,%xmm5
+ paddd 64(%r11),%xmm4
+ movdqa %xmm15,%xmm6
+ paddd 80(%r11),%xmm5
+ paddd 96(%r11),%xmm6
+ movdqa (%rsp),%xmm8
+ leaq 16(%rsp),%rax
+ movdqa -16(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+ leaq .LBS0(%rip),%r11
+.byte 102,15,56,0,247
+ movl %ebx,%r10d
+ call _bsaes_encrypt8_bitslice
+ subq $8,%r14
+ jc .Lctr_enc_loop_done
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ movdqu 32(%r12),%xmm9
+ movdqu 48(%r12),%xmm10
+ movdqu 64(%r12),%xmm11
+ movdqu 80(%r12),%xmm12
+ movdqu 96(%r12),%xmm13
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ pxor %xmm15,%xmm7
+ movdqa 32(%rbp),%xmm15
+ pxor %xmm8,%xmm0
+ movdqu %xmm7,0(%r13)
+ pxor %xmm9,%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor %xmm10,%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor %xmm11,%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor %xmm12,%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor %xmm13,%xmm1
+ movdqu %xmm6,80(%r13)
+ pxor %xmm14,%xmm4
+ movdqu %xmm1,96(%r13)
+ leaq .LADD1(%rip),%r11
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+ paddd 112(%r11),%xmm15
+ jnz .Lctr_enc_loop
+ jmp .Lctr_enc_done
+.p2align 4
+ addq $8,%r14
+ movdqu 0(%r12),%xmm7
+ pxor %xmm7,%xmm15
+ movdqu %xmm15,0(%r13)
+ cmpq $2,%r14
+ jb .Lctr_enc_done
+ movdqu 16(%r12),%xmm8
+ pxor %xmm8,%xmm0
+ movdqu %xmm0,16(%r13)
+ je .Lctr_enc_done
+ movdqu 32(%r12),%xmm9
+ pxor %xmm9,%xmm3
+ movdqu %xmm3,32(%r13)
+ cmpq $4,%r14
+ jb .Lctr_enc_done
+ movdqu 48(%r12),%xmm10
+ pxor %xmm10,%xmm5
+ movdqu %xmm5,48(%r13)
+ je .Lctr_enc_done
+ movdqu 64(%r12),%xmm11
+ pxor %xmm11,%xmm2
+ movdqu %xmm2,64(%r13)
+ cmpq $6,%r14
+ jb .Lctr_enc_done
+ movdqu 80(%r12),%xmm12
+ pxor %xmm12,%xmm6
+ movdqu %xmm6,80(%r13)
+ je .Lctr_enc_done
+ movdqu 96(%r12),%xmm13
+ pxor %xmm13,%xmm1
+ movdqu %xmm1,96(%r13)
+ jmp .Lctr_enc_done
+.p2align 4
+ leaq 32(%rbp),%rcx
+ leaq 48(%rbp),%rdx
+ leaq (%r15),%r8
+ call asm_AES_encrypt
+ movdqu (%r12),%xmm0
+ leaq 16(%r12),%r12
+ movl 44(%rbp),%eax
+ bswapl %eax
+ pxor 48(%rbp),%xmm0
+ incl %eax
+ movdqu %xmm0,(%r13)
+ bswapl %eax
+ leaq 16(%r13),%r13
+ movl %eax,44(%rsp)
+ decq %r14
+ jnz .Lctr_enc_short
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lctr_enc_bzero
+ leaq (%rbp),%rsp
+ movaps 64(%rbp),%xmm6
+ movaps 80(%rbp),%xmm7
+ movaps 96(%rbp),%xmm8
+ movaps 112(%rbp),%xmm9
+ movaps 128(%rbp),%xmm10
+ movaps 144(%rbp),%xmm11
+ movaps 160(%rbp),%xmm12
+ movaps 176(%rbp),%xmm13
+ movaps 192(%rbp),%xmm14
+ movaps 208(%rbp),%xmm15
+ leaq 160(%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+ retq
+.globl bsaes_xts_encrypt
+.def bsaes_xts_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+ movq %rsp,%rax
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq 160(%rsp),%r10
+ movq 168(%rsp),%r11
+ leaq -160(%rsp),%rsp
+ movaps %xmm6,64(%rsp)
+ movaps %xmm7,80(%rsp)
+ movaps %xmm8,96(%rsp)
+ movaps %xmm9,112(%rsp)
+ movaps %xmm10,128(%rsp)
+ movaps %xmm11,144(%rsp)
+ movaps %xmm12,160(%rsp)
+ movaps %xmm13,176(%rsp)
+ movaps %xmm14,192(%rsp)
+ movaps %xmm15,208(%rsp)
+ movq %rsp,%rbp
+ movq %rcx,%r12
+ movq %rdx,%r13
+ movq %r8,%r14
+ movq %r9,%r15
+ leaq (%r11),%rcx
+ leaq 32(%rbp),%rdx
+ leaq (%r10),%r8
+ call asm_AES_encrypt
+ movl 240(%r15),%eax
+ movq %r14,%rbx
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor %xmm6,%xmm7
+ movdqa %xmm7,(%rax)
+ andq $-16,%r14
+ subq $128,%rsp
+ movdqa 32(%rbp),%xmm6
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ subq $128,%r14
+ jc .Lxts_enc_short
+ jmp .Lxts_enc_loop
+.p2align 4
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ movdqa %xmm6,112(%rsp)
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ pxor %xmm14,%xmm6
+ movl %edx,%r10d
+ call _bsaes_encrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor 96(%rsp),%xmm1
+ movdqu %xmm6,80(%r13)
+ pxor 112(%rsp),%xmm4
+ movdqu %xmm1,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+ movdqa 112(%rsp),%xmm6
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ subq $128,%r14
+ jnc .Lxts_enc_loop
+ addq $128,%r14
+ jz .Lxts_enc_done
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ cmpq $16,%r14
+ je .Lxts_enc_1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ cmpq $32,%r14
+ je .Lxts_enc_2
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ cmpq $48,%r14
+ je .Lxts_enc_3
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ cmpq $64,%r14
+ je .Lxts_enc_4
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ cmpq $80,%r14
+ je .Lxts_enc_5
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ cmpq $96,%r14
+ je .Lxts_enc_6
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqa %xmm6,112(%rsp)
+ leaq 112(%r12),%r12
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_encrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor 96(%rsp),%xmm1
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm1,96(%r13)
+ leaq 112(%r13),%r13
+ movdqa 112(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.p2align 4
+ pxor %xmm11,%xmm3
+ leaq 96(%r12),%r12
+ pxor %xmm12,%xmm4
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_encrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ movdqu %xmm6,80(%r13)
+ leaq 96(%r13),%r13
+ movdqa 96(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.p2align 4
+ pxor %xmm10,%xmm2
+ leaq 80(%r12),%r12
+ pxor %xmm11,%xmm3
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_encrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ movdqu %xmm2,64(%r13)
+ leaq 80(%r13),%r13
+ movdqa 80(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.p2align 4
+ pxor %xmm9,%xmm1
+ leaq 64(%r12),%r12
+ pxor %xmm10,%xmm2
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_encrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ movdqu %xmm5,48(%r13)
+ leaq 64(%r13),%r13
+ movdqa 64(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.p2align 4
+ pxor %xmm8,%xmm0
+ leaq 48(%r12),%r12
+ pxor %xmm9,%xmm1
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_encrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm3,32(%r13)
+ leaq 48(%r13),%r13
+ movdqa 48(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.p2align 4
+ pxor %xmm7,%xmm15
+ leaq 32(%r12),%r12
+ pxor %xmm8,%xmm0
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_encrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ leaq 32(%r13),%r13
+ movdqa 32(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.p2align 4
+ pxor %xmm15,%xmm7
+ leaq 16(%r12),%r12
+ movdqa %xmm7,32(%rbp)
+ leaq 32(%rbp),%rcx
+ leaq 32(%rbp),%rdx
+ leaq (%r15),%r8
+ call asm_AES_encrypt
+ pxor 32(%rbp),%xmm15
+ movdqu %xmm15,0(%r13)
+ leaq 16(%r13),%r13
+ movdqa 16(%rsp),%xmm6
+ andl $15,%ebx
+ jz .Lxts_enc_ret
+ movq %r13,%rdx
+ movzbl (%r12),%eax
+ movzbl -16(%rdx),%ecx
+ leaq 1(%r12),%r12
+ movb %al,-16(%rdx)
+ movb %cl,0(%rdx)
+ leaq 1(%rdx),%rdx
+ subl $1,%ebx
+ jnz .Lxts_enc_steal
+ movdqu -16(%r13),%xmm15
+ leaq 32(%rbp),%rcx
+ pxor %xmm6,%xmm15
+ leaq 32(%rbp),%rdx
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%r8
+ call asm_AES_encrypt
+ pxor 32(%rbp),%xmm6
+ movdqu %xmm6,-16(%r13)
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lxts_enc_bzero
+ leaq (%rbp),%rsp
+ movaps 64(%rbp),%xmm6
+ movaps 80(%rbp),%xmm7
+ movaps 96(%rbp),%xmm8
+ movaps 112(%rbp),%xmm9
+ movaps 128(%rbp),%xmm10
+ movaps 144(%rbp),%xmm11
+ movaps 160(%rbp),%xmm12
+ movaps 176(%rbp),%xmm13
+ movaps 192(%rbp),%xmm14
+ movaps 208(%rbp),%xmm15
+ leaq 160(%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+ retq
+.globl bsaes_xts_decrypt
+.def bsaes_xts_decrypt; .scl 2; .type 32; .endef
+.p2align 4
+ movq %rsp,%rax
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq 160(%rsp),%r10
+ movq 168(%rsp),%r11
+ leaq -160(%rsp),%rsp
+ movaps %xmm6,64(%rsp)
+ movaps %xmm7,80(%rsp)
+ movaps %xmm8,96(%rsp)
+ movaps %xmm9,112(%rsp)
+ movaps %xmm10,128(%rsp)
+ movaps %xmm11,144(%rsp)
+ movaps %xmm12,160(%rsp)
+ movaps %xmm13,176(%rsp)
+ movaps %xmm14,192(%rsp)
+ movaps %xmm15,208(%rsp)
+ movq %rsp,%rbp
+ movq %rcx,%r12
+ movq %rdx,%r13
+ movq %r8,%r14
+ movq %r9,%r15
+ leaq (%r11),%rcx
+ leaq 32(%rbp),%rdx
+ leaq (%r10),%r8
+ call asm_AES_encrypt
+ movl 240(%r15),%eax
+ movq %r14,%rbx
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor (%rsp),%xmm7
+ movdqa %xmm6,(%rax)
+ movdqa %xmm7,(%rsp)
+ xorl %eax,%eax
+ andq $-16,%r14
+ testl $15,%ebx
+ setnz %al
+ shlq $4,%rax
+ subq %rax,%r14
+ subq $128,%rsp
+ movdqa 32(%rbp),%xmm6
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ subq $128,%r14
+ jc .Lxts_dec_short
+ jmp .Lxts_dec_loop
+.p2align 4
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ movdqa %xmm6,112(%rsp)
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ pxor %xmm14,%xmm6
+ movl %edx,%r10d
+ call _bsaes_decrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ pxor 96(%rsp),%xmm2
+ movdqu %xmm6,80(%r13)
+ pxor 112(%rsp),%xmm4
+ movdqu %xmm2,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+ movdqa 112(%rsp),%xmm6
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ subq $128,%r14
+ jnc .Lxts_dec_loop
+ addq $128,%r14
+ jz .Lxts_dec_done
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ cmpq $16,%r14
+ je .Lxts_dec_1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ cmpq $32,%r14
+ je .Lxts_dec_2
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ cmpq $48,%r14
+ je .Lxts_dec_3
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ cmpq $64,%r14
+ je .Lxts_dec_4
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ cmpq $80,%r14
+ je .Lxts_dec_5
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ cmpq $96,%r14
+ je .Lxts_dec_6
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqa %xmm6,112(%rsp)
+ leaq 112(%r12),%r12
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_decrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ pxor 96(%rsp),%xmm2
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ leaq 112(%r13),%r13
+ movdqa 112(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.p2align 4
+ pxor %xmm11,%xmm3
+ leaq 96(%r12),%r12
+ pxor %xmm12,%xmm4
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_decrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ leaq 96(%r13),%r13
+ movdqa 96(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.p2align 4
+ pxor %xmm10,%xmm2
+ leaq 80(%r12),%r12
+ pxor %xmm11,%xmm3
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_decrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ leaq 80(%r13),%r13
+ movdqa 80(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.p2align 4
+ pxor %xmm9,%xmm1
+ leaq 64(%r12),%r12
+ pxor %xmm10,%xmm2
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_decrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ leaq 64(%r13),%r13
+ movdqa 64(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.p2align 4
+ pxor %xmm8,%xmm0
+ leaq 48(%r12),%r12
+ pxor %xmm9,%xmm1
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_decrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ leaq 48(%r13),%r13
+ movdqa 48(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.p2align 4
+ pxor %xmm7,%xmm15
+ leaq 32(%r12),%r12
+ pxor %xmm8,%xmm0
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+ call _bsaes_decrypt8
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ leaq 32(%r13),%r13
+ movdqa 32(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.p2align 4
+ pxor %xmm15,%xmm7
+ leaq 16(%r12),%r12
+ movdqa %xmm7,32(%rbp)
+ leaq 32(%rbp),%rcx
+ leaq 32(%rbp),%rdx
+ leaq (%r15),%r8
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm15
+ movdqu %xmm15,0(%r13)
+ leaq 16(%r13),%r13
+ movdqa 16(%rsp),%xmm6
+ andl $15,%ebx
+ jz .Lxts_dec_ret
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ movdqa %xmm6,%xmm5
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ movdqu (%r12),%xmm15
+ pxor %xmm13,%xmm6
+ leaq 32(%rbp),%rcx
+ pxor %xmm6,%xmm15
+ leaq 32(%rbp),%rdx
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%r8
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm6
+ movq %r13,%rdx
+ movdqu %xmm6,(%r13)
+ movzbl 16(%r12),%eax
+ movzbl (%rdx),%ecx
+ leaq 1(%r12),%r12
+ movb %al,(%rdx)
+ movb %cl,16(%rdx)
+ leaq 1(%rdx),%rdx
+ subl $1,%ebx
+ jnz .Lxts_dec_steal
+ movdqu (%r13),%xmm15
+ leaq 32(%rbp),%rcx
+ pxor %xmm5,%xmm15
+ leaq 32(%rbp),%rdx
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%r8
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm5
+ movdqu %xmm5,(%r13)
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lxts_dec_bzero
+ leaq (%rbp),%rsp
+ movaps 64(%rbp),%xmm6
+ movaps 80(%rbp),%xmm7
+ movaps 96(%rbp),%xmm8
+ movaps 112(%rbp),%xmm9
+ movaps 128(%rbp),%xmm10
+ movaps 144(%rbp),%xmm11
+ movaps 160(%rbp),%xmm12
+ movaps 176(%rbp),%xmm13
+ movaps 192(%rbp),%xmm14
+ movaps 208(%rbp),%xmm15
+ leaq 160(%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+ retq
+.p2align 6
+.quad 0x0a0e0206070b0f03, 0x0004080c0d010509
+.quad 0x01040b0e0205080f, 0x0306090c00070a0d
+.quad 0x0504070602010003, 0x0f0e0d0c080b0a09
+.quad 0x5555555555555555, 0x5555555555555555
+.quad 0x3333333333333333, 0x3333333333333333
+.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+.quad 0x0504070600030201, 0x0f0e0d0c0a09080b
+.quad 0x0304090e00050a0f, 0x01060b0c0207080d
+.quad 0x0a0e02060f03070b, 0x0004080c05090d01
+.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908
+.quad 0x0a0d02060c03070b, 0x0004080f05090e01
+.quad 0x0000000000000000, 0x0000000100000000
+.quad 0x0000000000000000, 0x0000000200000000
+.quad 0x0000000000000000, 0x0000000300000000
+.quad 0x0000000000000000, 0x0000000400000000
+.quad 0x0000000000000000, 0x0000000500000000
+.quad 0x0000000000000000, 0x0000000600000000
+.quad 0x0000000000000000, 0x0000000700000000
+.quad 0x0000000000000000, 0x0000000800000000
+.long 0x87,0,1,0
+.quad 0x0101010101010101, 0x0101010101010101
+.quad 0x0202020202020202, 0x0202020202020202
+.quad 0x0404040404040404, 0x0404040404040404
+.quad 0x0808080808080808, 0x0808080808080808
+.quad 0x02060a0e03070b0f, 0x0004080c0105090d
+.quad 0x6363636363636363, 0x6363636363636363
+.byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44,32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32,65,110,100,121,32,80,111,108,121,97,107,111,118,0
+.p2align 6
+.def se_handler; .scl 3; .type 32; .endef
+.p2align 4
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+ movq 8(%r9),%rsi
+ movq 56(%r9),%r11
+ movl 0(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jb .Lin_prologue
+ movq 152(%r8),%rax
+ movl 4(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jae .Lin_prologue
+ movq 160(%r8),%rax
+ leaq 64(%rax),%rsi
+ leaq 512(%r8),%rdi
+ movl $20,%ecx
+.long 0xa548f3fc
+ leaq 160(%rax),%rax
+ movq 112(%rax),%rbp
+ movq 104(%rax),%rbx
+ movq 96(%rax),%r12
+ movq 88(%rax),%r13
+ movq 80(%rax),%r14
+ movq 72(%rax),%r15
+ leaq 120(%rax),%rax
+ movq %rbx,144(%r8)
+ movq %rbp,160(%r8)
+ movq %r12,216(%r8)
+ movq %r13,224(%r8)
+ movq %r14,232(%r8)
+ movq %r15,240(%r8)
+ movq %rax,152(%r8)
+ movq 40(%r9),%rdi
+ movq %r8,%rsi
+ movl $154,%ecx
+.long 0xa548f3fc
+ movq %r9,%rsi
+ xorq %rcx,%rcx
+ movq 8(%rsi),%rdx
+ movq 0(%rsi),%r8
+ movq 16(%rsi),%r9
+ movq 40(%rsi),%r10
+ leaq 56(%rsi),%r11
+ leaq 24(%rsi),%r12
+ movq %r10,32(%rsp)
+ movq %r11,40(%rsp)
+ movq %r12,48(%rsp)
+ movq %rcx,56(%rsp)
+ call *__imp_RtlVirtualUnwind(%rip)
+ movl $1,%eax
+ addq $64,%rsp
+ popfq
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ popq %rdi
+ popq %rsi
+ retq
+.section .pdata
+.p2align 2
+.rva .Lcbc_dec_prologue
+.rva .Lcbc_dec_epilogue
+.rva .Lcbc_dec_info
+.rva .Lctr_enc_prologue
+.rva .Lctr_enc_epilogue
+.rva .Lctr_enc_info
+.rva .Lxts_enc_prologue
+.rva .Lxts_enc_epilogue
+.rva .Lxts_enc_info
+.rva .Lxts_dec_prologue
+.rva .Lxts_dec_epilogue
+.rva .Lxts_dec_info
+.section .xdata
+.p2align 3
+.byte 9,0,0,0
+.rva se_handler
+.rva .Lcbc_dec_body,.Lcbc_dec_epilogue
+.byte 9,0,0,0
+.rva se_handler
+.rva .Lctr_enc_body,.Lctr_enc_epilogue
+.byte 9,0,0,0
+.rva se_handler
+.rva .Lxts_enc_body,.Lxts_enc_epilogue
+.byte 9,0,0,0
+.rva se_handler
+.rva .Lxts_dec_body,.Lxts_dec_epilogue
diff --git a/crypto/libressl/crypto/aes/vpaes-elf-x86_64.S b/crypto/libressl/crypto/aes/vpaes-elf-x86_64.S
new file mode 100644
index 0000000..1e1a6e8
--- /dev/null
+++ b/crypto/libressl/crypto/aes/vpaes-elf-x86_64.S
@@ -0,0 +1,832 @@
+#include "x86_arch.h"
+.type _vpaes_encrypt_core,@function
+.align 16
+ movq %rdx,%r9
+ movq $16,%r11
+ movl 240(%rdx),%eax
+ movdqa %xmm9,%xmm1
+ movdqa .Lk_ipt(%rip),%xmm2
+ pandn %xmm0,%xmm1
+ movdqu (%r9),%xmm5
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+.byte 102,15,56,0,208
+ movdqa .Lk_ipt+16(%rip),%xmm0
+.byte 102,15,56,0,193
+ pxor %xmm5,%xmm2
+ pxor %xmm2,%xmm0
+ addq $16,%r9
+ leaq .Lk_mc_backward(%rip),%r10
+ jmp .Lenc_entry
+.align 16
+ movdqa %xmm13,%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm5,%xmm4
+ movdqa %xmm12,%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ movdqa %xmm15,%xmm5
+.byte 102,15,56,0,234
+ movdqa -64(%r11,%r10,1),%xmm1
+ movdqa %xmm14,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm5,%xmm2
+ movdqa (%r11,%r10,1),%xmm4
+ movdqa %xmm0,%xmm3
+.byte 102,15,56,0,193
+ addq $16,%r9
+ pxor %xmm2,%xmm0
+.byte 102,15,56,0,220
+ addq $16,%r11
+ pxor %xmm0,%xmm3
+.byte 102,15,56,0,193
+ andq $48,%r11
+ pxor %xmm3,%xmm0
+ subq $1,%rax
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa %xmm11,%xmm5
+.byte 102,15,56,0,232
+ pxor %xmm1,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm5,%xmm3
+ movdqa %xmm10,%xmm4
+.byte 102,15,56,0,224
+ pxor %xmm5,%xmm4
+ movdqa %xmm10,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm0,%xmm2
+ movdqa %xmm10,%xmm3
+ movdqu (%r9),%xmm5
+.byte 102,15,56,0,220
+ pxor %xmm1,%xmm3
+ jnz .Lenc_loop
+ movdqa -96(%r10),%xmm4
+ movdqa -80(%r10),%xmm0
+.byte 102,15,56,0,226
+ pxor %xmm5,%xmm4
+.byte 102,15,56,0,195
+ movdqa 64(%r11,%r10,1),%xmm1
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,193
+ retq
+.size _vpaes_encrypt_core,.-_vpaes_encrypt_core
+.type _vpaes_decrypt_core,@function
+.align 16
+ movq %rdx,%r9
+ movl 240(%rdx),%eax
+ movdqa %xmm9,%xmm1
+ movdqa .Lk_dipt(%rip),%xmm2
+ pandn %xmm0,%xmm1
+ movq %rax,%r11
+ psrld $4,%xmm1
+ movdqu (%r9),%xmm5
+ shlq $4,%r11
+ pand %xmm9,%xmm0
+.byte 102,15,56,0,208
+ movdqa .Lk_dipt+16(%rip),%xmm0
+ xorq $48,%r11
+ leaq .Lk_dsbd(%rip),%r10
+.byte 102,15,56,0,193
+ andq $48,%r11
+ pxor %xmm5,%xmm2
+ movdqa .Lk_mc_forward+48(%rip),%xmm5
+ pxor %xmm2,%xmm0
+ addq $16,%r9
+ addq %r10,%r11
+ jmp .Ldec_entry
+.align 16
+ movdqa -32(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa -16(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ addq $16,%r9
+.byte 102,15,56,0,197
+ movdqa 0(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 16(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ subq $1,%rax
+.byte 102,15,56,0,197
+ movdqa 32(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 48(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,197
+ movdqa 64(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 80(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+.byte 102,15,58,15,237,12
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa %xmm11,%xmm2
+.byte 102,15,56,0,208
+ pxor %xmm1,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+ movdqa %xmm10,%xmm4
+.byte 102,15,56,0,224
+ pxor %xmm2,%xmm4
+ movdqa %xmm10,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm0,%xmm2
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,220
+ pxor %xmm1,%xmm3
+ movdqu (%r9),%xmm0
+ jnz .Ldec_loop
+ movdqa 96(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 112(%r10),%xmm0
+ movdqa -352(%r11),%xmm2
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,194
+ retq
+.size _vpaes_decrypt_core,.-_vpaes_decrypt_core
+.type _vpaes_schedule_core,@function
+.align 16
+ call _vpaes_preheat
+ movdqa .Lk_rcon(%rip),%xmm8
+ movdqu (%rdi),%xmm0
+ movdqa %xmm0,%xmm3
+ leaq .Lk_ipt(%rip),%r11
+ call _vpaes_schedule_transform
+ movdqa %xmm0,%xmm7
+ leaq .Lk_sr(%rip),%r10
+ testq %rcx,%rcx
+ jnz .Lschedule_am_decrypting
+ movdqu %xmm0,(%rdx)
+ jmp .Lschedule_go
+ movdqa (%r8,%r10,1),%xmm1
+.byte 102,15,56,0,217
+ movdqu %xmm3,(%rdx)
+ xorq $48,%r8
+ cmpl $192,%esi
+ ja .Lschedule_256
+ je .Lschedule_192
+ movl $10,%esi
+ call _vpaes_schedule_round
+ decq %rsi
+ jz .Lschedule_mangle_last
+ call _vpaes_schedule_mangle
+ jmp .Loop_schedule_128
+.align 16
+ movdqu 8(%rdi),%xmm0
+ call _vpaes_schedule_transform
+ movdqa %xmm0,%xmm6
+ pxor %xmm4,%xmm4
+ movhlps %xmm4,%xmm6
+ movl $4,%esi
+ call _vpaes_schedule_round
+.byte 102,15,58,15,198,8
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_192_smear
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_round
+ decq %rsi
+ jz .Lschedule_mangle_last
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_192_smear
+ jmp .Loop_schedule_192
+.align 16
+ movdqu 16(%rdi),%xmm0
+ call _vpaes_schedule_transform
+ movl $7,%esi
+ call _vpaes_schedule_mangle
+ movdqa %xmm0,%xmm6
+ call _vpaes_schedule_round
+ decq %rsi
+ jz .Lschedule_mangle_last
+ call _vpaes_schedule_mangle
+ pshufd $255,%xmm0,%xmm0
+ movdqa %xmm7,%xmm5
+ movdqa %xmm6,%xmm7
+ call _vpaes_schedule_low_round
+ movdqa %xmm5,%xmm7
+ jmp .Loop_schedule_256
+.align 16
+ leaq .Lk_deskew(%rip),%r11
+ testq %rcx,%rcx
+ jnz .Lschedule_mangle_last_dec
+ movdqa (%r8,%r10,1),%xmm1
+.byte 102,15,56,0,193
+ leaq .Lk_opt(%rip),%r11
+ addq $32,%rdx
+ addq $-16,%rdx
+ pxor .Lk_s63(%rip),%xmm0
+ call _vpaes_schedule_transform
+ movdqu %xmm0,(%rdx)
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
+ retq
+.size _vpaes_schedule_core,.-_vpaes_schedule_core
+.type _vpaes_schedule_192_smear,@function
+.align 16
+ pshufd $128,%xmm6,%xmm0
+ pxor %xmm0,%xmm6
+ pshufd $254,%xmm7,%xmm0
+ pxor %xmm0,%xmm6
+ movdqa %xmm6,%xmm0
+ pxor %xmm1,%xmm1
+ movhlps %xmm1,%xmm6
+ retq
+.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
+.type _vpaes_schedule_round,@function
+.align 16
+ pxor %xmm1,%xmm1
+.byte 102,65,15,58,15,200,15
+.byte 102,69,15,58,15,192,15
+ pxor %xmm1,%xmm7
+ pshufd $255,%xmm0,%xmm0
+.byte 102,15,58,15,192,1
+ movdqa %xmm7,%xmm1
+ pslldq $4,%xmm7
+ pxor %xmm1,%xmm7
+ movdqa %xmm7,%xmm1
+ pslldq $8,%xmm7
+ pxor %xmm1,%xmm7
+ pxor .Lk_s63(%rip),%xmm7
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa %xmm11,%xmm2
+.byte 102,15,56,0,208
+ pxor %xmm1,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+ movdqa %xmm10,%xmm4
+.byte 102,15,56,0,224
+ pxor %xmm2,%xmm4
+ movdqa %xmm10,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm0,%xmm2
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,220
+ pxor %xmm1,%xmm3
+ movdqa %xmm13,%xmm4
+.byte 102,15,56,0,226
+ movdqa %xmm12,%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ pxor %xmm7,%xmm0
+ movdqa %xmm0,%xmm7
+ retq
+.size _vpaes_schedule_round,.-_vpaes_schedule_round
+.type _vpaes_schedule_transform,@function
+.align 16
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa (%r11),%xmm2
+.byte 102,15,56,0,208
+ movdqa 16(%r11),%xmm0
+.byte 102,15,56,0,193
+ pxor %xmm2,%xmm0
+ retq
+.size _vpaes_schedule_transform,.-_vpaes_schedule_transform
+.type _vpaes_schedule_mangle,@function
+.align 16
+ movdqa %xmm0,%xmm4
+ movdqa .Lk_mc_forward(%rip),%xmm5
+ testq %rcx,%rcx
+ jnz .Lschedule_mangle_dec
+ addq $16,%rdx
+ pxor .Lk_s63(%rip),%xmm4
+.byte 102,15,56,0,229
+ movdqa %xmm4,%xmm3
+.byte 102,15,56,0,229
+ pxor %xmm4,%xmm3
+.byte 102,15,56,0,229
+ pxor %xmm4,%xmm3
+ jmp .Lschedule_mangle_both
+.align 16
+ leaq .Lk_dksd(%rip),%r11
+ movdqa %xmm9,%xmm1
+ pandn %xmm4,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm4
+ movdqa 0(%r11),%xmm2
+.byte 102,15,56,0,212
+ movdqa 16(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+.byte 102,15,56,0,221
+ movdqa 32(%r11),%xmm2
+.byte 102,15,56,0,212
+ pxor %xmm3,%xmm2
+ movdqa 48(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+.byte 102,15,56,0,221
+ movdqa 64(%r11),%xmm2
+.byte 102,15,56,0,212
+ pxor %xmm3,%xmm2
+ movdqa 80(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+.byte 102,15,56,0,221
+ movdqa 96(%r11),%xmm2
+.byte 102,15,56,0,212
+ pxor %xmm3,%xmm2
+ movdqa 112(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+ addq $-16,%rdx
+ movdqa (%r8,%r10,1),%xmm1
+.byte 102,15,56,0,217
+ addq $-16,%r8
+ andq $48,%r8
+ movdqu %xmm3,(%rdx)
+ retq
+.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle
+.globl vpaes_set_encrypt_key
+.type vpaes_set_encrypt_key,@function
+.align 16
+ movl %esi,%eax
+ shrl $5,%eax
+ addl $5,%eax
+ movl %eax,240(%rdx)
+ movl $0,%ecx
+ movl $48,%r8d
+ call _vpaes_schedule_core
+ xorl %eax,%eax
+ retq
+.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key
+.globl vpaes_set_decrypt_key
+.type vpaes_set_decrypt_key,@function
+.align 16
+ movl %esi,%eax
+ shrl $5,%eax
+ addl $5,%eax
+ movl %eax,240(%rdx)
+ shll $4,%eax
+ leaq 16(%rdx,%rax,1),%rdx
+ movl $1,%ecx
+ movl %esi,%r8d
+ shrl $1,%r8d
+ andl $32,%r8d
+ xorl $32,%r8d
+ call _vpaes_schedule_core
+ xorl %eax,%eax
+ retq
+.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key
+.globl vpaes_encrypt
+.type vpaes_encrypt,@function
+.align 16
+ movdqu (%rdi),%xmm0
+ call _vpaes_preheat
+ call _vpaes_encrypt_core
+ movdqu %xmm0,(%rsi)
+ retq
+.size vpaes_encrypt,.-vpaes_encrypt
+.globl vpaes_decrypt
+.type vpaes_decrypt,@function
+.align 16
+ movdqu (%rdi),%xmm0
+ call _vpaes_preheat
+ call _vpaes_decrypt_core
+ movdqu %xmm0,(%rsi)
+ retq
+.size vpaes_decrypt,.-vpaes_decrypt
+.globl vpaes_cbc_encrypt
+.type vpaes_cbc_encrypt,@function
+.align 16
+ xchgq %rcx,%rdx
+ subq $16,%rcx
+ jc .Lcbc_abort
+ movdqu (%r8),%xmm6
+ subq %rdi,%rsi
+ call _vpaes_preheat
+ cmpl $0,%r9d
+ je .Lcbc_dec_loop
+ jmp .Lcbc_enc_loop
+.align 16
+ movdqu (%rdi),%xmm0
+ pxor %xmm6,%xmm0
+ call _vpaes_encrypt_core
+ movdqa %xmm0,%xmm6
+ movdqu %xmm0,(%rsi,%rdi,1)
+ leaq 16(%rdi),%rdi
+ subq $16,%rcx
+ jnc .Lcbc_enc_loop
+ jmp .Lcbc_done
+.align 16
+ movdqu (%rdi),%xmm0
+ movdqa %xmm0,%xmm7
+ call _vpaes_decrypt_core
+ pxor %xmm6,%xmm0
+ movdqa %xmm7,%xmm6
+ movdqu %xmm0,(%rsi,%rdi,1)
+ leaq 16(%rdi),%rdi
+ subq $16,%rcx
+ jnc .Lcbc_dec_loop
+ movdqu %xmm6,(%r8)
+ retq
+.size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt
+.type _vpaes_preheat,@function
+.align 16
+ leaq .Lk_s0F(%rip),%r10
+ movdqa -32(%r10),%xmm10
+ movdqa -16(%r10),%xmm11
+ movdqa 0(%r10),%xmm9
+ movdqa 48(%r10),%xmm13
+ movdqa 64(%r10),%xmm12
+ movdqa 80(%r10),%xmm15
+ movdqa 96(%r10),%xmm14
+ retq
+.size _vpaes_preheat,.-_vpaes_preheat
+.type _vpaes_consts,@object
+.align 64
+.quad 0x0E05060F0D080180, 0x040703090A0B0C02
+.quad 0x01040A060F0B0780, 0x030D0E0C02050809
+.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
+.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808
+.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
+.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
+.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
+.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD
+.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A
+.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878
+.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
+.quad 0x0407060500030201, 0x0C0F0E0D080B0A09
+.quad 0x080B0A0904070605, 0x000302010C0F0E0D
+.quad 0x0C0F0E0D080B0A09, 0x0407060500030201
+.quad 0x000302010C0F0E0D, 0x080B0A0904070605
+.quad 0x0605040702010003, 0x0E0D0C0F0A09080B
+.quad 0x020100030E0D0C0F, 0x0A09080B06050407
+.quad 0x0E0D0C0F0A09080B, 0x0605040702010003
+.quad 0x0A09080B06050407, 0x020100030E0D0C0F
+.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908
+.quad 0x030E09040F0A0500, 0x0B06010C07020D08
+.quad 0x0F060D040B020900, 0x070E050C030A0108
+.quad 0x0B0E0104070A0D00, 0x0306090C0F020508
+.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
+.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B
+.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808
+.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
+.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
+.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
+.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
+.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E
+.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99
+.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
+.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086
+.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487
+.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC
+.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE
+.quad 0x0F505B040B545F00, 0x154A411E114E451A
+.quad 0x86E383E660056500, 0x12771772F491F194
+.quad 0x851C03539A86D600, 0xCAD51F504F994CC9
+.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565
+.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
+.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
+.quad 0xD022649296B44200, 0x602646F6B0F2D404
+.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
+.quad 0x46F2929626D4D000, 0x2242600464B4F6B0
+.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32
+.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
+.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
+.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
+.align 64
+.size _vpaes_consts,.-_vpaes_consts
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
diff --git a/crypto/libressl/crypto/aes/vpaes-macosx-x86_64.S b/crypto/libressl/crypto/aes/vpaes-macosx-x86_64.S
new file mode 100644
index 0000000..0a892a9
--- /dev/null
+++ b/crypto/libressl/crypto/aes/vpaes-macosx-x86_64.S
@@ -0,0 +1,829 @@
+#include "x86_arch.h"
+.p2align 4
+ movq %rdx,%r9
+ movq $16,%r11
+ movl 240(%rdx),%eax
+ movdqa %xmm9,%xmm1
+ movdqa L$k_ipt(%rip),%xmm2
+ pandn %xmm0,%xmm1
+ movdqu (%r9),%xmm5
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+.byte 102,15,56,0,208
+ movdqa L$k_ipt+16(%rip),%xmm0
+.byte 102,15,56,0,193
+ pxor %xmm5,%xmm2
+ pxor %xmm2,%xmm0
+ addq $16,%r9
+ leaq L$k_mc_backward(%rip),%r10
+ jmp L$enc_entry
+.p2align 4
+ movdqa %xmm13,%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm5,%xmm4
+ movdqa %xmm12,%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ movdqa %xmm15,%xmm5
+.byte 102,15,56,0,234
+ movdqa -64(%r11,%r10,1),%xmm1
+ movdqa %xmm14,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm5,%xmm2
+ movdqa (%r11,%r10,1),%xmm4
+ movdqa %xmm0,%xmm3
+.byte 102,15,56,0,193
+ addq $16,%r9
+ pxor %xmm2,%xmm0
+.byte 102,15,56,0,220
+ addq $16,%r11
+ pxor %xmm0,%xmm3
+.byte 102,15,56,0,193
+ andq $48,%r11
+ pxor %xmm3,%xmm0
+ subq $1,%rax
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa %xmm11,%xmm5
+.byte 102,15,56,0,232
+ pxor %xmm1,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm5,%xmm3
+ movdqa %xmm10,%xmm4
+.byte 102,15,56,0,224
+ pxor %xmm5,%xmm4
+ movdqa %xmm10,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm0,%xmm2
+ movdqa %xmm10,%xmm3
+ movdqu (%r9),%xmm5
+.byte 102,15,56,0,220
+ pxor %xmm1,%xmm3
+ jnz L$enc_loop
+ movdqa -96(%r10),%xmm4
+ movdqa -80(%r10),%xmm0
+.byte 102,15,56,0,226
+ pxor %xmm5,%xmm4
+.byte 102,15,56,0,195
+ movdqa 64(%r11,%r10,1),%xmm1
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,193
+ retq
+.p2align 4
+ movq %rdx,%r9
+ movl 240(%rdx),%eax
+ movdqa %xmm9,%xmm1
+ movdqa L$k_dipt(%rip),%xmm2
+ pandn %xmm0,%xmm1
+ movq %rax,%r11
+ psrld $4,%xmm1
+ movdqu (%r9),%xmm5
+ shlq $4,%r11
+ pand %xmm9,%xmm0
+.byte 102,15,56,0,208
+ movdqa L$k_dipt+16(%rip),%xmm0
+ xorq $48,%r11
+ leaq L$k_dsbd(%rip),%r10
+.byte 102,15,56,0,193
+ andq $48,%r11
+ pxor %xmm5,%xmm2
+ movdqa L$k_mc_forward+48(%rip),%xmm5
+ pxor %xmm2,%xmm0
+ addq $16,%r9
+ addq %r10,%r11
+ jmp L$dec_entry
+.p2align 4
+ movdqa -32(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa -16(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ addq $16,%r9
+.byte 102,15,56,0,197
+ movdqa 0(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 16(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ subq $1,%rax
+.byte 102,15,56,0,197
+ movdqa 32(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 48(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,197
+ movdqa 64(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 80(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+.byte 102,15,58,15,237,12
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa %xmm11,%xmm2
+.byte 102,15,56,0,208
+ pxor %xmm1,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+ movdqa %xmm10,%xmm4
+.byte 102,15,56,0,224
+ pxor %xmm2,%xmm4
+ movdqa %xmm10,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm0,%xmm2
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,220
+ pxor %xmm1,%xmm3
+ movdqu (%r9),%xmm0
+ jnz L$dec_loop
+ movdqa 96(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 112(%r10),%xmm0
+ movdqa -352(%r11),%xmm2
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,194
+ retq
+.p2align 4
+ call _vpaes_preheat
+ movdqa L$k_rcon(%rip),%xmm8
+ movdqu (%rdi),%xmm0
+ movdqa %xmm0,%xmm3
+ leaq L$k_ipt(%rip),%r11
+ call _vpaes_schedule_transform
+ movdqa %xmm0,%xmm7
+ leaq L$k_sr(%rip),%r10
+ testq %rcx,%rcx
+ jnz L$schedule_am_decrypting
+ movdqu %xmm0,(%rdx)
+ jmp L$schedule_go
+ movdqa (%r8,%r10,1),%xmm1
+.byte 102,15,56,0,217
+ movdqu %xmm3,(%rdx)
+ xorq $48,%r8
+ cmpl $192,%esi
+ ja L$schedule_256
+ je L$schedule_192
+ movl $10,%esi
+ call _vpaes_schedule_round
+ decq %rsi
+ jz L$schedule_mangle_last
+ call _vpaes_schedule_mangle
+ jmp L$oop_schedule_128
+.p2align 4
+ movdqu 8(%rdi),%xmm0
+ call _vpaes_schedule_transform
+ movdqa %xmm0,%xmm6
+ pxor %xmm4,%xmm4
+ movhlps %xmm4,%xmm6
+ movl $4,%esi
+ call _vpaes_schedule_round
+.byte 102,15,58,15,198,8
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_192_smear
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_round
+ decq %rsi
+ jz L$schedule_mangle_last
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_192_smear
+ jmp L$oop_schedule_192
+.p2align 4
+ movdqu 16(%rdi),%xmm0
+ call _vpaes_schedule_transform
+ movl $7,%esi
+ call _vpaes_schedule_mangle
+ movdqa %xmm0,%xmm6
+ call _vpaes_schedule_round
+ decq %rsi
+ jz L$schedule_mangle_last
+ call _vpaes_schedule_mangle
+ pshufd $255,%xmm0,%xmm0
+ movdqa %xmm7,%xmm5
+ movdqa %xmm6,%xmm7
+ call _vpaes_schedule_low_round
+ movdqa %xmm5,%xmm7
+ jmp L$oop_schedule_256
+.p2align 4
+ leaq L$k_deskew(%rip),%r11
+ testq %rcx,%rcx
+ jnz L$schedule_mangle_last_dec
+ movdqa (%r8,%r10,1),%xmm1
+.byte 102,15,56,0,193
+ leaq L$k_opt(%rip),%r11
+ addq $32,%rdx
+ addq $-16,%rdx
+ pxor L$k_s63(%rip),%xmm0
+ call _vpaes_schedule_transform
+ movdqu %xmm0,(%rdx)
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
+ retq
+.p2align 4
+ pshufd $128,%xmm6,%xmm0
+ pxor %xmm0,%xmm6
+ pshufd $254,%xmm7,%xmm0
+ pxor %xmm0,%xmm6
+ movdqa %xmm6,%xmm0
+ pxor %xmm1,%xmm1
+ movhlps %xmm1,%xmm6
+ retq
+.p2align 4
+ pxor %xmm1,%xmm1
+.byte 102,65,15,58,15,200,15
+.byte 102,69,15,58,15,192,15
+ pxor %xmm1,%xmm7
+ pshufd $255,%xmm0,%xmm0
+.byte 102,15,58,15,192,1
+ movdqa %xmm7,%xmm1
+ pslldq $4,%xmm7
+ pxor %xmm1,%xmm7
+ movdqa %xmm7,%xmm1
+ pslldq $8,%xmm7
+ pxor %xmm1,%xmm7
+ pxor L$k_s63(%rip),%xmm7
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa %xmm11,%xmm2
+.byte 102,15,56,0,208
+ pxor %xmm1,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+ movdqa %xmm10,%xmm4
+.byte 102,15,56,0,224
+ pxor %xmm2,%xmm4
+ movdqa %xmm10,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm0,%xmm2
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,220
+ pxor %xmm1,%xmm3
+ movdqa %xmm13,%xmm4
+.byte 102,15,56,0,226
+ movdqa %xmm12,%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ pxor %xmm7,%xmm0
+ movdqa %xmm0,%xmm7
+ retq
+.p2align 4
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa (%r11),%xmm2
+.byte 102,15,56,0,208
+ movdqa 16(%r11),%xmm0
+.byte 102,15,56,0,193
+ pxor %xmm2,%xmm0
+ retq
+.p2align 4
+ movdqa %xmm0,%xmm4
+ movdqa L$k_mc_forward(%rip),%xmm5
+ testq %rcx,%rcx
+ jnz L$schedule_mangle_dec
+ addq $16,%rdx
+ pxor L$k_s63(%rip),%xmm4
+.byte 102,15,56,0,229
+ movdqa %xmm4,%xmm3
+.byte 102,15,56,0,229
+ pxor %xmm4,%xmm3
+.byte 102,15,56,0,229
+ pxor %xmm4,%xmm3
+ jmp L$schedule_mangle_both
+.p2align 4
+ leaq L$k_dksd(%rip),%r11
+ movdqa %xmm9,%xmm1
+ pandn %xmm4,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm4
+ movdqa 0(%r11),%xmm2
+.byte 102,15,56,0,212
+ movdqa 16(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+.byte 102,15,56,0,221
+ movdqa 32(%r11),%xmm2
+.byte 102,15,56,0,212
+ pxor %xmm3,%xmm2
+ movdqa 48(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+.byte 102,15,56,0,221
+ movdqa 64(%r11),%xmm2
+.byte 102,15,56,0,212
+ pxor %xmm3,%xmm2
+ movdqa 80(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+.byte 102,15,56,0,221
+ movdqa 96(%r11),%xmm2
+.byte 102,15,56,0,212
+ pxor %xmm3,%xmm2
+ movdqa 112(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+ addq $-16,%rdx
+ movdqa (%r8,%r10,1),%xmm1
+.byte 102,15,56,0,217
+ addq $-16,%r8
+ andq $48,%r8
+ movdqu %xmm3,(%rdx)
+ retq
+.globl _vpaes_set_encrypt_key
+.p2align 4
+ movl %esi,%eax
+ shrl $5,%eax
+ addl $5,%eax
+ movl %eax,240(%rdx)
+ movl $0,%ecx
+ movl $48,%r8d
+ call _vpaes_schedule_core
+ xorl %eax,%eax
+ retq
+.globl _vpaes_set_decrypt_key
+.p2align 4
+ movl %esi,%eax
+ shrl $5,%eax
+ addl $5,%eax
+ movl %eax,240(%rdx)
+ shll $4,%eax
+ leaq 16(%rdx,%rax,1),%rdx
+ movl $1,%ecx
+ movl %esi,%r8d
+ shrl $1,%r8d
+ andl $32,%r8d
+ xorl $32,%r8d
+ call _vpaes_schedule_core
+ xorl %eax,%eax
+ retq
+.globl _vpaes_encrypt
+.p2align 4
+ movdqu (%rdi),%xmm0
+ call _vpaes_preheat
+ call _vpaes_encrypt_core
+ movdqu %xmm0,(%rsi)
+ retq
+.globl _vpaes_decrypt
+.p2align 4
+ movdqu (%rdi),%xmm0
+ call _vpaes_preheat
+ call _vpaes_decrypt_core
+ movdqu %xmm0,(%rsi)
+ retq
+.globl _vpaes_cbc_encrypt
+.p2align 4
+ xchgq %rcx,%rdx
+ subq $16,%rcx
+ jc L$cbc_abort
+ movdqu (%r8),%xmm6
+ subq %rdi,%rsi
+ call _vpaes_preheat
+ cmpl $0,%r9d
+ je L$cbc_dec_loop
+ jmp L$cbc_enc_loop
+.p2align 4
+ movdqu (%rdi),%xmm0
+ pxor %xmm6,%xmm0
+ call _vpaes_encrypt_core
+ movdqa %xmm0,%xmm6
+ movdqu %xmm0,(%rsi,%rdi,1)
+ leaq 16(%rdi),%rdi
+ subq $16,%rcx
+ jnc L$cbc_enc_loop
+ jmp L$cbc_done
+.p2align 4
+ movdqu (%rdi),%xmm0
+ movdqa %xmm0,%xmm7
+ call _vpaes_decrypt_core
+ pxor %xmm6,%xmm0
+ movdqa %xmm7,%xmm6
+ movdqu %xmm0,(%rsi,%rdi,1)
+ leaq 16(%rdi),%rdi
+ subq $16,%rcx
+ jnc L$cbc_dec_loop
+ movdqu %xmm6,(%r8)
+ retq
+.p2align 4
+ leaq L$k_s0F(%rip),%r10
+ movdqa -32(%r10),%xmm10
+ movdqa -16(%r10),%xmm11
+ movdqa 0(%r10),%xmm9
+ movdqa 48(%r10),%xmm13
+ movdqa 64(%r10),%xmm12
+ movdqa 80(%r10),%xmm15
+ movdqa 96(%r10),%xmm14
+ retq
+.p2align 6
+.quad 0x0E05060F0D080180, 0x040703090A0B0C02
+.quad 0x01040A060F0B0780, 0x030D0E0C02050809
+.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
+.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808
+.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
+.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
+.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
+.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD
+.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A
+.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878
+.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
+.quad 0x0407060500030201, 0x0C0F0E0D080B0A09
+.quad 0x080B0A0904070605, 0x000302010C0F0E0D
+.quad 0x0C0F0E0D080B0A09, 0x0407060500030201
+.quad 0x000302010C0F0E0D, 0x080B0A0904070605
+.quad 0x0605040702010003, 0x0E0D0C0F0A09080B
+.quad 0x020100030E0D0C0F, 0x0A09080B06050407
+.quad 0x0E0D0C0F0A09080B, 0x0605040702010003
+.quad 0x0A09080B06050407, 0x020100030E0D0C0F
+.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908
+.quad 0x030E09040F0A0500, 0x0B06010C07020D08
+.quad 0x0F060D040B020900, 0x070E050C030A0108
+.quad 0x0B0E0104070A0D00, 0x0306090C0F020508
+.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
+.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B
+.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808
+.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
+.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
+.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
+.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
+.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E
+.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99
+.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
+.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086
+.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487
+.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC
+.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE
+.quad 0x0F505B040B545F00, 0x154A411E114E451A
+.quad 0x86E383E660056500, 0x12771772F491F194
+.quad 0x851C03539A86D600, 0xCAD51F504F994CC9
+.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565
+.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
+.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
+.quad 0xD022649296B44200, 0x602646F6B0F2D404
+.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
+.quad 0x46F2929626D4D000, 0x2242600464B4F6B0
+.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32
+.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
+.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
+.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
+.p2align 6
diff --git a/crypto/libressl/crypto/aes/vpaes-masm-x86_64.S b/crypto/libressl/crypto/aes/vpaes-masm-x86_64.S
new file mode 100644
index 0000000..e10d98d
--- /dev/null
+++ b/crypto/libressl/crypto/aes/vpaes-masm-x86_64.S
@@ -0,0 +1,1213 @@
+; 1 "crypto/aes/vpaes-masm-x86_64.S.tmp"
+; 1 "<built-in>" 1
+; 1 "<built-in>" 3
+; 340 "<built-in>" 3
+; 1 "<command line>" 1
+; 1 "<built-in>" 2
+; 1 "crypto/aes/vpaes-masm-x86_64.S.tmp" 2
+; 1 "./crypto/x86_arch.h" 1
+; 16 "./crypto/x86_arch.h"
+; 40 "./crypto/x86_arch.h"
+; 3 "crypto/aes/vpaes-masm-x86_64.S.tmp" 2
+.text$ SEGMENT ALIGN(64) 'CODE'
+_vpaes_encrypt_core PROC PRIVATE
+ mov r9,rdx
+ mov r11,16
+ mov eax,DWORD PTR[240+rdx]
+ movdqa xmm1,xmm9
+ movdqa xmm2,XMMWORD PTR[$L$k_ipt]
+ pandn xmm1,xmm0
+ movdqu xmm5,XMMWORD PTR[r9]
+ psrld xmm1,4
+ pand xmm0,xmm9
+DB 102,15,56,0,208
+ movdqa xmm0,XMMWORD PTR[(($L$k_ipt+16))]
+DB 102,15,56,0,193
+ pxor xmm2,xmm5
+ pxor xmm0,xmm2
+ add r9,16
+ lea r10,QWORD PTR[$L$k_mc_backward]
+ jmp $L$enc_entry
+ movdqa xmm4,xmm13
+DB 102,15,56,0,226
+ pxor xmm4,xmm5
+ movdqa xmm0,xmm12
+DB 102,15,56,0,195
+ pxor xmm0,xmm4
+ movdqa xmm5,xmm15
+DB 102,15,56,0,234
+ movdqa xmm1,XMMWORD PTR[((-64))+r10*1+r11]
+ movdqa xmm2,xmm14
+DB 102,15,56,0,211
+ pxor xmm2,xmm5
+ movdqa xmm4,XMMWORD PTR[r10*1+r11]
+ movdqa xmm3,xmm0
+DB 102,15,56,0,193
+ add r9,16
+ pxor xmm0,xmm2
+DB 102,15,56,0,220
+ add r11,16
+ pxor xmm3,xmm0
+DB 102,15,56,0,193
+ and r11,030h
+ pxor xmm0,xmm3
+ sub rax,1
+ movdqa xmm1,xmm9
+ pandn xmm1,xmm0
+ psrld xmm1,4
+ pand xmm0,xmm9
+ movdqa xmm5,xmm11
+DB 102,15,56,0,232
+ pxor xmm0,xmm1
+ movdqa xmm3,xmm10
+DB 102,15,56,0,217
+ pxor xmm3,xmm5
+ movdqa xmm4,xmm10
+DB 102,15,56,0,224
+ pxor xmm4,xmm5
+ movdqa xmm2,xmm10
+DB 102,15,56,0,211
+ pxor xmm2,xmm0
+ movdqa xmm3,xmm10
+ movdqu xmm5,XMMWORD PTR[r9]
+DB 102,15,56,0,220
+ pxor xmm3,xmm1
+ jnz $L$enc_loop
+ movdqa xmm4,XMMWORD PTR[((-96))+r10]
+ movdqa xmm0,XMMWORD PTR[((-80))+r10]
+DB 102,15,56,0,226
+ pxor xmm4,xmm5
+DB 102,15,56,0,195
+ movdqa xmm1,XMMWORD PTR[64+r10*1+r11]
+ pxor xmm0,xmm4
+DB 102,15,56,0,193
+ DB 0F3h,0C3h ;repret
+_vpaes_encrypt_core ENDP
+_vpaes_decrypt_core PROC PRIVATE
+ mov r9,rdx
+ mov eax,DWORD PTR[240+rdx]
+ movdqa xmm1,xmm9
+ movdqa xmm2,XMMWORD PTR[$L$k_dipt]
+ pandn xmm1,xmm0
+ mov r11,rax
+ psrld xmm1,4
+ movdqu xmm5,XMMWORD PTR[r9]
+ shl r11,4
+ pand xmm0,xmm9
+DB 102,15,56,0,208
+ movdqa xmm0,XMMWORD PTR[(($L$k_dipt+16))]
+ xor r11,030h
+ lea r10,QWORD PTR[$L$k_dsbd]
+DB 102,15,56,0,193
+ and r11,030h
+ pxor xmm2,xmm5
+ movdqa xmm5,XMMWORD PTR[(($L$k_mc_forward+48))]
+ pxor xmm0,xmm2
+ add r9,16
+ add r11,r10
+ jmp $L$dec_entry
+ movdqa xmm4,XMMWORD PTR[((-32))+r10]
+DB 102,15,56,0,226
+ pxor xmm4,xmm0
+ movdqa xmm0,XMMWORD PTR[((-16))+r10]
+DB 102,15,56,0,195
+ pxor xmm0,xmm4
+ add r9,16
+DB 102,15,56,0,197
+ movdqa xmm4,XMMWORD PTR[r10]
+DB 102,15,56,0,226
+ pxor xmm4,xmm0
+ movdqa xmm0,XMMWORD PTR[16+r10]
+DB 102,15,56,0,195
+ pxor xmm0,xmm4
+ sub rax,1
+DB 102,15,56,0,197
+ movdqa xmm4,XMMWORD PTR[32+r10]
+DB 102,15,56,0,226
+ pxor xmm4,xmm0
+ movdqa xmm0,XMMWORD PTR[48+r10]
+DB 102,15,56,0,195
+ pxor xmm0,xmm4
+DB 102,15,56,0,197
+ movdqa xmm4,XMMWORD PTR[64+r10]
+DB 102,15,56,0,226
+ pxor xmm4,xmm0
+ movdqa xmm0,XMMWORD PTR[80+r10]
+DB 102,15,56,0,195
+ pxor xmm0,xmm4
+DB 102,15,58,15,237,12
+ movdqa xmm1,xmm9
+ pandn xmm1,xmm0
+ psrld xmm1,4
+ pand xmm0,xmm9
+ movdqa xmm2,xmm11
+DB 102,15,56,0,208
+ pxor xmm0,xmm1
+ movdqa xmm3,xmm10
+DB 102,15,56,0,217
+ pxor xmm3,xmm2
+ movdqa xmm4,xmm10
+DB 102,15,56,0,224
+ pxor xmm4,xmm2
+ movdqa xmm2,xmm10
+DB 102,15,56,0,211
+ pxor xmm2,xmm0
+ movdqa xmm3,xmm10
+DB 102,15,56,0,220
+ pxor xmm3,xmm1
+ movdqu xmm0,XMMWORD PTR[r9]
+ jnz $L$dec_loop
+ movdqa xmm4,XMMWORD PTR[96+r10]
+DB 102,15,56,0,226
+ pxor xmm4,xmm0
+ movdqa xmm0,XMMWORD PTR[112+r10]
+ movdqa xmm2,XMMWORD PTR[((-352))+r11]
+DB 102,15,56,0,195
+ pxor xmm0,xmm4
+DB 102,15,56,0,194
+ DB 0F3h,0C3h ;repret
+_vpaes_decrypt_core ENDP
+_vpaes_schedule_core PROC PRIVATE
+ call _vpaes_preheat
+ movdqa xmm8,XMMWORD PTR[$L$k_rcon]
+ movdqu xmm0,XMMWORD PTR[rdi]
+ movdqa xmm3,xmm0
+ lea r11,QWORD PTR[$L$k_ipt]
+ call _vpaes_schedule_transform
+ movdqa xmm7,xmm0
+ lea r10,QWORD PTR[$L$k_sr]
+ test rcx,rcx
+ jnz $L$schedule_am_decrypting
+ movdqu XMMWORD PTR[rdx],xmm0
+ jmp $L$schedule_go
+ movdqa xmm1,XMMWORD PTR[r10*1+r8]
+DB 102,15,56,0,217
+ movdqu XMMWORD PTR[rdx],xmm3
+ xor r8,030h
+ cmp esi,192
+ ja $L$schedule_256
+ je $L$schedule_192
+ mov esi,10
+ call _vpaes_schedule_round
+ dec rsi
+ jz $L$schedule_mangle_last
+ call _vpaes_schedule_mangle
+ jmp $L$oop_schedule_128
+ movdqu xmm0,XMMWORD PTR[8+rdi]
+ call _vpaes_schedule_transform
+ movdqa xmm6,xmm0
+ pxor xmm4,xmm4
+ movhlps xmm6,xmm4
+ mov esi,4
+ call _vpaes_schedule_round
+DB 102,15,58,15,198,8
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_192_smear
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_round
+ dec rsi
+ jz $L$schedule_mangle_last
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_192_smear
+ jmp $L$oop_schedule_192
+ movdqu xmm0,XMMWORD PTR[16+rdi]
+ call _vpaes_schedule_transform
+ mov esi,7
+ call _vpaes_schedule_mangle
+ movdqa xmm6,xmm0
+ call _vpaes_schedule_round
+ dec rsi
+ jz $L$schedule_mangle_last
+ call _vpaes_schedule_mangle
+ pshufd xmm0,xmm0,0FFh
+ movdqa xmm5,xmm7
+ movdqa xmm7,xmm6
+ call _vpaes_schedule_low_round
+ movdqa xmm7,xmm5
+ jmp $L$oop_schedule_256
+ lea r11,QWORD PTR[$L$k_deskew]
+ test rcx,rcx
+ jnz $L$schedule_mangle_last_dec
+ movdqa xmm1,XMMWORD PTR[r10*1+r8]
+DB 102,15,56,0,193
+ lea r11,QWORD PTR[$L$k_opt]
+ add rdx,32
+ add rdx,-16
+ pxor xmm0,XMMWORD PTR[$L$k_s63]
+ call _vpaes_schedule_transform
+ movdqu XMMWORD PTR[rdx],xmm0
+ pxor xmm0,xmm0
+ pxor xmm1,xmm1
+ pxor xmm2,xmm2
+ pxor xmm3,xmm3
+ pxor xmm4,xmm4
+ pxor xmm5,xmm5
+ pxor xmm6,xmm6
+ pxor xmm7,xmm7
+ DB 0F3h,0C3h ;repret
+_vpaes_schedule_core ENDP
+_vpaes_schedule_192_smear PROC PRIVATE
+ pshufd xmm0,xmm6,080h
+ pxor xmm6,xmm0
+ pshufd xmm0,xmm7,0FEh
+ pxor xmm6,xmm0
+ movdqa xmm0,xmm6
+ pxor xmm1,xmm1
+ movhlps xmm6,xmm1
+ DB 0F3h,0C3h ;repret
+_vpaes_schedule_192_smear ENDP
+_vpaes_schedule_round PROC PRIVATE
+ pxor xmm1,xmm1
+DB 102,65,15,58,15,200,15
+DB 102,69,15,58,15,192,15
+ pxor xmm7,xmm1
+ pshufd xmm0,xmm0,0FFh
+DB 102,15,58,15,192,1
+ movdqa xmm1,xmm7
+ pslldq xmm7,4
+ pxor xmm7,xmm1
+ movdqa xmm1,xmm7
+ pslldq xmm7,8
+ pxor xmm7,xmm1
+ pxor xmm7,XMMWORD PTR[$L$k_s63]
+ movdqa xmm1,xmm9
+ pandn xmm1,xmm0
+ psrld xmm1,4
+ pand xmm0,xmm9
+ movdqa xmm2,xmm11
+DB 102,15,56,0,208
+ pxor xmm0,xmm1
+ movdqa xmm3,xmm10
+DB 102,15,56,0,217
+ pxor xmm3,xmm2
+ movdqa xmm4,xmm10
+DB 102,15,56,0,224
+ pxor xmm4,xmm2
+ movdqa xmm2,xmm10
+DB 102,15,56,0,211
+ pxor xmm2,xmm0
+ movdqa xmm3,xmm10
+DB 102,15,56,0,220
+ pxor xmm3,xmm1
+ movdqa xmm4,xmm13
+DB 102,15,56,0,226
+ movdqa xmm0,xmm12
+DB 102,15,56,0,195
+ pxor xmm0,xmm4
+ pxor xmm0,xmm7
+ movdqa xmm7,xmm0
+ DB 0F3h,0C3h ;repret
+_vpaes_schedule_round ENDP
+_vpaes_schedule_transform PROC PRIVATE
+ movdqa xmm1,xmm9
+ pandn xmm1,xmm0
+ psrld xmm1,4
+ pand xmm0,xmm9
+ movdqa xmm2,XMMWORD PTR[r11]
+DB 102,15,56,0,208
+ movdqa xmm0,XMMWORD PTR[16+r11]
+DB 102,15,56,0,193
+ pxor xmm0,xmm2
+ DB 0F3h,0C3h ;repret
+_vpaes_schedule_transform ENDP
+_vpaes_schedule_mangle PROC PRIVATE
+ movdqa xmm4,xmm0
+ movdqa xmm5,XMMWORD PTR[$L$k_mc_forward]
+ test rcx,rcx
+ jnz $L$schedule_mangle_dec
+ add rdx,16
+ pxor xmm4,XMMWORD PTR[$L$k_s63]
+DB 102,15,56,0,229
+ movdqa xmm3,xmm4
+DB 102,15,56,0,229
+ pxor xmm3,xmm4
+DB 102,15,56,0,229
+ pxor xmm3,xmm4
+ jmp $L$schedule_mangle_both
+ lea r11,QWORD PTR[$L$k_dksd]
+ movdqa xmm1,xmm9
+ pandn xmm1,xmm4
+ psrld xmm1,4
+ pand xmm4,xmm9
+ movdqa xmm2,XMMWORD PTR[r11]
+DB 102,15,56,0,212
+ movdqa xmm3,XMMWORD PTR[16+r11]
+DB 102,15,56,0,217
+ pxor xmm3,xmm2
+DB 102,15,56,0,221
+ movdqa xmm2,XMMWORD PTR[32+r11]
+DB 102,15,56,0,212
+ pxor xmm2,xmm3
+ movdqa xmm3,XMMWORD PTR[48+r11]
+DB 102,15,56,0,217
+ pxor xmm3,xmm2
+DB 102,15,56,0,221
+ movdqa xmm2,XMMWORD PTR[64+r11]
+DB 102,15,56,0,212
+ pxor xmm2,xmm3
+ movdqa xmm3,XMMWORD PTR[80+r11]
+DB 102,15,56,0,217
+ pxor xmm3,xmm2
+DB 102,15,56,0,221
+ movdqa xmm2,XMMWORD PTR[96+r11]
+DB 102,15,56,0,212
+ pxor xmm2,xmm3
+ movdqa xmm3,XMMWORD PTR[112+r11]
+DB 102,15,56,0,217
+ pxor xmm3,xmm2
+ add rdx,-16
+ movdqa xmm1,XMMWORD PTR[r10*1+r8]
+DB 102,15,56,0,217
+ add r8,-16
+ and r8,030h
+ movdqu XMMWORD PTR[rdx],xmm3
+ DB 0F3h,0C3h ;repret
+_vpaes_schedule_mangle ENDP
+PUBLIC vpaes_set_encrypt_key
+vpaes_set_encrypt_key PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ lea rsp,QWORD PTR[((-184))+rsp]
+ movaps XMMWORD PTR[16+rsp],xmm6
+ movaps XMMWORD PTR[32+rsp],xmm7
+ movaps XMMWORD PTR[48+rsp],xmm8
+ movaps XMMWORD PTR[64+rsp],xmm9
+ movaps XMMWORD PTR[80+rsp],xmm10
+ movaps XMMWORD PTR[96+rsp],xmm11
+ movaps XMMWORD PTR[112+rsp],xmm12
+ movaps XMMWORD PTR[128+rsp],xmm13
+ movaps XMMWORD PTR[144+rsp],xmm14
+ movaps XMMWORD PTR[160+rsp],xmm15
+ mov eax,esi
+ shr eax,5
+ add eax,5
+ mov DWORD PTR[240+rdx],eax
+ mov ecx,0
+ mov r8d,030h
+ call _vpaes_schedule_core
+ movaps xmm6,XMMWORD PTR[16+rsp]
+ movaps xmm7,XMMWORD PTR[32+rsp]
+ movaps xmm8,XMMWORD PTR[48+rsp]
+ movaps xmm9,XMMWORD PTR[64+rsp]
+ movaps xmm10,XMMWORD PTR[80+rsp]
+ movaps xmm11,XMMWORD PTR[96+rsp]
+ movaps xmm12,XMMWORD PTR[112+rsp]
+ movaps xmm13,XMMWORD PTR[128+rsp]
+ movaps xmm14,XMMWORD PTR[144+rsp]
+ movaps xmm15,XMMWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[184+rsp]
+ xor eax,eax
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+vpaes_set_encrypt_key ENDP
+PUBLIC vpaes_set_decrypt_key
+vpaes_set_decrypt_key PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ lea rsp,QWORD PTR[((-184))+rsp]
+ movaps XMMWORD PTR[16+rsp],xmm6
+ movaps XMMWORD PTR[32+rsp],xmm7
+ movaps XMMWORD PTR[48+rsp],xmm8
+ movaps XMMWORD PTR[64+rsp],xmm9
+ movaps XMMWORD PTR[80+rsp],xmm10
+ movaps XMMWORD PTR[96+rsp],xmm11
+ movaps XMMWORD PTR[112+rsp],xmm12
+ movaps XMMWORD PTR[128+rsp],xmm13
+ movaps XMMWORD PTR[144+rsp],xmm14
+ movaps XMMWORD PTR[160+rsp],xmm15
+ mov eax,esi
+ shr eax,5
+ add eax,5
+ mov DWORD PTR[240+rdx],eax
+ shl eax,4
+ lea rdx,QWORD PTR[16+rax*1+rdx]
+ mov ecx,1
+ mov r8d,esi
+ shr r8d,1
+ and r8d,32
+ xor r8d,32
+ call _vpaes_schedule_core
+ movaps xmm6,XMMWORD PTR[16+rsp]
+ movaps xmm7,XMMWORD PTR[32+rsp]
+ movaps xmm8,XMMWORD PTR[48+rsp]
+ movaps xmm9,XMMWORD PTR[64+rsp]
+ movaps xmm10,XMMWORD PTR[80+rsp]
+ movaps xmm11,XMMWORD PTR[96+rsp]
+ movaps xmm12,XMMWORD PTR[112+rsp]
+ movaps xmm13,XMMWORD PTR[128+rsp]
+ movaps xmm14,XMMWORD PTR[144+rsp]
+ movaps xmm15,XMMWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[184+rsp]
+ xor eax,eax
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+vpaes_set_decrypt_key ENDP
+PUBLIC vpaes_encrypt
+vpaes_encrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ lea rsp,QWORD PTR[((-184))+rsp]
+ movaps XMMWORD PTR[16+rsp],xmm6
+ movaps XMMWORD PTR[32+rsp],xmm7
+ movaps XMMWORD PTR[48+rsp],xmm8
+ movaps XMMWORD PTR[64+rsp],xmm9
+ movaps XMMWORD PTR[80+rsp],xmm10
+ movaps XMMWORD PTR[96+rsp],xmm11
+ movaps XMMWORD PTR[112+rsp],xmm12
+ movaps XMMWORD PTR[128+rsp],xmm13
+ movaps XMMWORD PTR[144+rsp],xmm14
+ movaps XMMWORD PTR[160+rsp],xmm15
+ movdqu xmm0,XMMWORD PTR[rdi]
+ call _vpaes_preheat
+ call _vpaes_encrypt_core
+ movdqu XMMWORD PTR[rsi],xmm0
+ movaps xmm6,XMMWORD PTR[16+rsp]
+ movaps xmm7,XMMWORD PTR[32+rsp]
+ movaps xmm8,XMMWORD PTR[48+rsp]
+ movaps xmm9,XMMWORD PTR[64+rsp]
+ movaps xmm10,XMMWORD PTR[80+rsp]
+ movaps xmm11,XMMWORD PTR[96+rsp]
+ movaps xmm12,XMMWORD PTR[112+rsp]
+ movaps xmm13,XMMWORD PTR[128+rsp]
+ movaps xmm14,XMMWORD PTR[144+rsp]
+ movaps xmm15,XMMWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[184+rsp]
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+vpaes_encrypt ENDP
+PUBLIC vpaes_decrypt
+vpaes_decrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ lea rsp,QWORD PTR[((-184))+rsp]
+ movaps XMMWORD PTR[16+rsp],xmm6
+ movaps XMMWORD PTR[32+rsp],xmm7
+ movaps XMMWORD PTR[48+rsp],xmm8
+ movaps XMMWORD PTR[64+rsp],xmm9
+ movaps XMMWORD PTR[80+rsp],xmm10
+ movaps XMMWORD PTR[96+rsp],xmm11
+ movaps XMMWORD PTR[112+rsp],xmm12
+ movaps XMMWORD PTR[128+rsp],xmm13
+ movaps XMMWORD PTR[144+rsp],xmm14
+ movaps XMMWORD PTR[160+rsp],xmm15
+ movdqu xmm0,XMMWORD PTR[rdi]
+ call _vpaes_preheat
+ call _vpaes_decrypt_core
+ movdqu XMMWORD PTR[rsi],xmm0
+ movaps xmm6,XMMWORD PTR[16+rsp]
+ movaps xmm7,XMMWORD PTR[32+rsp]
+ movaps xmm8,XMMWORD PTR[48+rsp]
+ movaps xmm9,XMMWORD PTR[64+rsp]
+ movaps xmm10,XMMWORD PTR[80+rsp]
+ movaps xmm11,XMMWORD PTR[96+rsp]
+ movaps xmm12,XMMWORD PTR[112+rsp]
+ movaps xmm13,XMMWORD PTR[128+rsp]
+ movaps xmm14,XMMWORD PTR[144+rsp]
+ movaps xmm15,XMMWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[184+rsp]
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+vpaes_decrypt ENDP
+PUBLIC vpaes_cbc_encrypt
+vpaes_cbc_encrypt PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ mov r8,QWORD PTR[40+rsp]
+ mov r9,QWORD PTR[48+rsp]
+ xchg rdx,rcx
+ sub rcx,16
+ jc $L$cbc_abort
+ lea rsp,QWORD PTR[((-184))+rsp]
+ movaps XMMWORD PTR[16+rsp],xmm6
+ movaps XMMWORD PTR[32+rsp],xmm7
+ movaps XMMWORD PTR[48+rsp],xmm8
+ movaps XMMWORD PTR[64+rsp],xmm9
+ movaps XMMWORD PTR[80+rsp],xmm10
+ movaps XMMWORD PTR[96+rsp],xmm11
+ movaps XMMWORD PTR[112+rsp],xmm12
+ movaps XMMWORD PTR[128+rsp],xmm13
+ movaps XMMWORD PTR[144+rsp],xmm14
+ movaps XMMWORD PTR[160+rsp],xmm15
+ movdqu xmm6,XMMWORD PTR[r8]
+ sub rsi,rdi
+ call _vpaes_preheat
+ cmp r9d,0
+ je $L$cbc_dec_loop
+ jmp $L$cbc_enc_loop
+ movdqu xmm0,XMMWORD PTR[rdi]
+ pxor xmm0,xmm6
+ call _vpaes_encrypt_core
+ movdqa xmm6,xmm0
+ movdqu XMMWORD PTR[rdi*1+rsi],xmm0
+ lea rdi,QWORD PTR[16+rdi]
+ sub rcx,16
+ jnc $L$cbc_enc_loop
+ jmp $L$cbc_done
+ movdqu xmm0,XMMWORD PTR[rdi]
+ movdqa xmm7,xmm0
+ call _vpaes_decrypt_core
+ pxor xmm0,xmm6
+ movdqa xmm6,xmm7
+ movdqu XMMWORD PTR[rdi*1+rsi],xmm0
+ lea rdi,QWORD PTR[16+rdi]
+ sub rcx,16
+ jnc $L$cbc_dec_loop
+ movdqu XMMWORD PTR[r8],xmm6
+ movaps xmm6,XMMWORD PTR[16+rsp]
+ movaps xmm7,XMMWORD PTR[32+rsp]
+ movaps xmm8,XMMWORD PTR[48+rsp]
+ movaps xmm9,XMMWORD PTR[64+rsp]
+ movaps xmm10,XMMWORD PTR[80+rsp]
+ movaps xmm11,XMMWORD PTR[96+rsp]
+ movaps xmm12,XMMWORD PTR[112+rsp]
+ movaps xmm13,XMMWORD PTR[128+rsp]
+ movaps xmm14,XMMWORD PTR[144+rsp]
+ movaps xmm15,XMMWORD PTR[160+rsp]
+ lea rsp,QWORD PTR[184+rsp]
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+vpaes_cbc_encrypt ENDP
+_vpaes_preheat PROC PRIVATE
+ lea r10,QWORD PTR[$L$k_s0F]
+ movdqa xmm10,XMMWORD PTR[((-32))+r10]
+ movdqa xmm11,XMMWORD PTR[((-16))+r10]
+ movdqa xmm9,XMMWORD PTR[r10]
+ movdqa xmm13,XMMWORD PTR[48+r10]
+ movdqa xmm12,XMMWORD PTR[64+r10]
+ movdqa xmm15,XMMWORD PTR[80+r10]
+ movdqa xmm14,XMMWORD PTR[96+r10]
+ DB 0F3h,0C3h ;repret
+_vpaes_preheat ENDP
+ DQ 00E05060F0D080180h,0040703090A0B0C02h
+ DQ 001040A060F0B0780h,0030D0E0C02050809h
+ DQ 00F0F0F0F0F0F0F0Fh,00F0F0F0F0F0F0F0Fh
+ DQ 0C2B2E8985A2A7000h,0CABAE09052227808h
+ DQ 04C01307D317C4D00h,0CD80B1FCB0FDCC81h
+ DQ 0B19BE18FCB503E00h,0A5DF7A6E142AF544h
+ DQ 03618D415FAE22300h,03BF7CCC10D2ED9EFh
+ DQ 0E27A93C60B712400h,05EB7E955BC982FCDh
+ DQ 069EB88400AE12900h,0C2A163C8AB82234Ah
+ DQ 0D0D26D176FBDC700h,015AABF7AC502A878h
+ DQ 0CFE474A55FBB6A00h,08E1E90D1412B35FAh
+ DQ 00407060500030201h,00C0F0E0D080B0A09h
+ DQ 0080B0A0904070605h,0000302010C0F0E0Dh
+ DQ 00C0F0E0D080B0A09h,00407060500030201h
+ DQ 0000302010C0F0E0Dh,0080B0A0904070605h
+ DQ 00605040702010003h,00E0D0C0F0A09080Bh
+ DQ 0020100030E0D0C0Fh,00A09080B06050407h
+ DQ 00E0D0C0F0A09080Bh,00605040702010003h
+ DQ 00A09080B06050407h,0020100030E0D0C0Fh
+ DQ 00706050403020100h,00F0E0D0C0B0A0908h
+ DQ 0030E09040F0A0500h,00B06010C07020D08h
+ DQ 00F060D040B020900h,0070E050C030A0108h
+ DQ 00B0E0104070A0D00h,00306090C0F020508h
+ DQ 01F8391B9AF9DEEB6h,0702A98084D7C7D81h
+ DQ 05B5B5B5B5B5B5B5Bh,05B5B5B5B5B5B5B5Bh
+ DQ 0FF9F4929D6B66000h,0F7974121DEBE6808h
+ DQ 001EDBD5150BCEC00h,0E10D5DB1B05C0CE0h
+ DQ 007E4A34047A4E300h,01DFEB95A5DBEF91Ah
+ DQ 05F36B5DC83EA6900h,02841C2ABF49D1E77h
+ DQ 0FEB91A5DA3E44700h,00740E3A45A1DBEF9h
+ DQ 041C277F4B5368300h,05FDC69EAAB289D1Eh
+ DQ 09A4FCA1F8550D500h,003D653861CC94C99h
+ DQ 0115BEDA7B6FC4A00h,0D993256F7E3482C8h
+ DQ 0D5031CCA1FC9D600h,053859A4C994F5086h
+ DQ 0A23196054FDC7BE8h,0CD5EF96A20B31487h
+ DQ 0B6116FC87ED9A700h,04AED933482255BFCh
+ DQ 04576516227143300h,08BB89FACE9DAFDCEh
+ DQ 00F505B040B545F00h,0154A411E114E451Ah
+ DQ 086E383E660056500h,012771772F491F194h
+ DQ 0851C03539A86D600h,0CAD51F504F994CC9h
+ DQ 0C03B1789ECD74900h,0725E2C9EB2FBA565h
+ DQ 07D57CCDFE6B1A200h,0F56E9B13882A4439h
+ DQ 03CE2FAF724C6CB00h,02931180D15DEEFD3h
+ DQ 0D022649296B44200h,0602646F6B0F2D404h
+ DQ 0C19498A6CD596700h,0F3FF0C3E3255AA6Bh
+ DQ 046F2929626D4D000h,02242600464B4F6B0h
+ DQ 00C55A6CDFFAAC100h,09467F36B98593E32h
+ DQ 01387EA537EF94000h,0C7AA6DB9D4943E2Dh
+ DQ 012D7560F93441D00h,0CA4B8159D8C58E9Ch
+DB 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105
+DB 111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54
+DB 52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97
+DB 109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32
+DB 85,110,105,118,101,114,115,105,116,121,41,0
+EXTERN __imp_RtlVirtualUnwind:NEAR
+se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$in_prologue
+ mov rax,QWORD PTR[152+r8]
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$in_prologue
+ lea rsi,QWORD PTR[16+rax]
+ lea rdi,QWORD PTR[512+r8]
+ mov ecx,20
+ DD 0a548f3fch
+ lea rax,QWORD PTR[184+rax]
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+se_handler ENDP
+.text$ ENDS
+ DD imagerel $L$SEH_begin_vpaes_set_encrypt_key
+ DD imagerel $L$SEH_end_vpaes_set_encrypt_key
+ DD imagerel $L$SEH_info_vpaes_set_encrypt_key
+ DD imagerel $L$SEH_begin_vpaes_set_decrypt_key
+ DD imagerel $L$SEH_end_vpaes_set_decrypt_key
+ DD imagerel $L$SEH_info_vpaes_set_decrypt_key
+ DD imagerel $L$SEH_begin_vpaes_encrypt
+ DD imagerel $L$SEH_end_vpaes_encrypt
+ DD imagerel $L$SEH_info_vpaes_encrypt
+ DD imagerel $L$SEH_begin_vpaes_decrypt
+ DD imagerel $L$SEH_end_vpaes_decrypt
+ DD imagerel $L$SEH_info_vpaes_decrypt
+ DD imagerel $L$SEH_begin_vpaes_cbc_encrypt
+ DD imagerel $L$SEH_end_vpaes_cbc_encrypt
+ DD imagerel $L$SEH_info_vpaes_cbc_encrypt
+.pdata ENDS
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$enc_key_body,imagerel $L$enc_key_epilogue
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$dec_key_body,imagerel $L$dec_key_epilogue
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$enc_body,imagerel $L$enc_epilogue
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$dec_body,imagerel $L$dec_epilogue
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$cbc_body,imagerel $L$cbc_epilogue
+.xdata ENDS
diff --git a/crypto/libressl/crypto/aes/vpaes-mingw64-x86_64.S b/crypto/libressl/crypto/aes/vpaes-mingw64-x86_64.S
new file mode 100644
index 0000000..d6cb860
--- /dev/null
+++ b/crypto/libressl/crypto/aes/vpaes-mingw64-x86_64.S
@@ -0,0 +1,1125 @@
+#include "x86_arch.h"
+.def _vpaes_encrypt_core; .scl 3; .type 32; .endef
+.p2align 4
+ movq %rdx,%r9
+ movq $16,%r11
+ movl 240(%rdx),%eax
+ movdqa %xmm9,%xmm1
+ movdqa .Lk_ipt(%rip),%xmm2
+ pandn %xmm0,%xmm1
+ movdqu (%r9),%xmm5
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+.byte 102,15,56,0,208
+ movdqa .Lk_ipt+16(%rip),%xmm0
+.byte 102,15,56,0,193
+ pxor %xmm5,%xmm2
+ pxor %xmm2,%xmm0
+ addq $16,%r9
+ leaq .Lk_mc_backward(%rip),%r10
+ jmp .Lenc_entry
+.p2align 4
+ movdqa %xmm13,%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm5,%xmm4
+ movdqa %xmm12,%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ movdqa %xmm15,%xmm5
+.byte 102,15,56,0,234
+ movdqa -64(%r11,%r10,1),%xmm1
+ movdqa %xmm14,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm5,%xmm2
+ movdqa (%r11,%r10,1),%xmm4
+ movdqa %xmm0,%xmm3
+.byte 102,15,56,0,193
+ addq $16,%r9
+ pxor %xmm2,%xmm0
+.byte 102,15,56,0,220
+ addq $16,%r11
+ pxor %xmm0,%xmm3
+.byte 102,15,56,0,193
+ andq $48,%r11
+ pxor %xmm3,%xmm0
+ subq $1,%rax
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa %xmm11,%xmm5
+.byte 102,15,56,0,232
+ pxor %xmm1,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm5,%xmm3
+ movdqa %xmm10,%xmm4
+.byte 102,15,56,0,224
+ pxor %xmm5,%xmm4
+ movdqa %xmm10,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm0,%xmm2
+ movdqa %xmm10,%xmm3
+ movdqu (%r9),%xmm5
+.byte 102,15,56,0,220
+ pxor %xmm1,%xmm3
+ jnz .Lenc_loop
+ movdqa -96(%r10),%xmm4
+ movdqa -80(%r10),%xmm0
+.byte 102,15,56,0,226
+ pxor %xmm5,%xmm4
+.byte 102,15,56,0,195
+ movdqa 64(%r11,%r10,1),%xmm1
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,193
+ retq
+.def _vpaes_decrypt_core; .scl 3; .type 32; .endef
+.p2align 4
+ movq %rdx,%r9
+ movl 240(%rdx),%eax
+ movdqa %xmm9,%xmm1
+ movdqa .Lk_dipt(%rip),%xmm2
+ pandn %xmm0,%xmm1
+ movq %rax,%r11
+ psrld $4,%xmm1
+ movdqu (%r9),%xmm5
+ shlq $4,%r11
+ pand %xmm9,%xmm0
+.byte 102,15,56,0,208
+ movdqa .Lk_dipt+16(%rip),%xmm0
+ xorq $48,%r11
+ leaq .Lk_dsbd(%rip),%r10
+.byte 102,15,56,0,193
+ andq $48,%r11
+ pxor %xmm5,%xmm2
+ movdqa .Lk_mc_forward+48(%rip),%xmm5
+ pxor %xmm2,%xmm0
+ addq $16,%r9
+ addq %r10,%r11
+ jmp .Ldec_entry
+.p2align 4
+ movdqa -32(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa -16(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ addq $16,%r9
+.byte 102,15,56,0,197
+ movdqa 0(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 16(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ subq $1,%rax
+.byte 102,15,56,0,197
+ movdqa 32(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 48(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,197
+ movdqa 64(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 80(%r10),%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+.byte 102,15,58,15,237,12
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa %xmm11,%xmm2
+.byte 102,15,56,0,208
+ pxor %xmm1,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+ movdqa %xmm10,%xmm4
+.byte 102,15,56,0,224
+ pxor %xmm2,%xmm4
+ movdqa %xmm10,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm0,%xmm2
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,220
+ pxor %xmm1,%xmm3
+ movdqu (%r9),%xmm0
+ jnz .Ldec_loop
+ movdqa 96(%r10),%xmm4
+.byte 102,15,56,0,226
+ pxor %xmm0,%xmm4
+ movdqa 112(%r10),%xmm0
+ movdqa -352(%r11),%xmm2
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,194
+ retq
+.def _vpaes_schedule_core; .scl 3; .type 32; .endef
+.p2align 4
+ call _vpaes_preheat
+ movdqa .Lk_rcon(%rip),%xmm8
+ movdqu (%rdi),%xmm0
+ movdqa %xmm0,%xmm3
+ leaq .Lk_ipt(%rip),%r11
+ call _vpaes_schedule_transform
+ movdqa %xmm0,%xmm7
+ leaq .Lk_sr(%rip),%r10
+ testq %rcx,%rcx
+ jnz .Lschedule_am_decrypting
+ movdqu %xmm0,(%rdx)
+ jmp .Lschedule_go
+ movdqa (%r8,%r10,1),%xmm1
+.byte 102,15,56,0,217
+ movdqu %xmm3,(%rdx)
+ xorq $48,%r8
+ cmpl $192,%esi
+ ja .Lschedule_256
+ je .Lschedule_192
+ movl $10,%esi
+ call _vpaes_schedule_round
+ decq %rsi
+ jz .Lschedule_mangle_last
+ call _vpaes_schedule_mangle
+ jmp .Loop_schedule_128
+.p2align 4
+ movdqu 8(%rdi),%xmm0
+ call _vpaes_schedule_transform
+ movdqa %xmm0,%xmm6
+ pxor %xmm4,%xmm4
+ movhlps %xmm4,%xmm6
+ movl $4,%esi
+ call _vpaes_schedule_round
+.byte 102,15,58,15,198,8
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_192_smear
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_round
+ decq %rsi
+ jz .Lschedule_mangle_last
+ call _vpaes_schedule_mangle
+ call _vpaes_schedule_192_smear
+ jmp .Loop_schedule_192
+.p2align 4
+ movdqu 16(%rdi),%xmm0
+ call _vpaes_schedule_transform
+ movl $7,%esi
+ call _vpaes_schedule_mangle
+ movdqa %xmm0,%xmm6
+ call _vpaes_schedule_round
+ decq %rsi
+ jz .Lschedule_mangle_last
+ call _vpaes_schedule_mangle
+ pshufd $255,%xmm0,%xmm0
+ movdqa %xmm7,%xmm5
+ movdqa %xmm6,%xmm7
+ call _vpaes_schedule_low_round
+ movdqa %xmm5,%xmm7
+ jmp .Loop_schedule_256
+.p2align 4
+ leaq .Lk_deskew(%rip),%r11
+ testq %rcx,%rcx
+ jnz .Lschedule_mangle_last_dec
+ movdqa (%r8,%r10,1),%xmm1
+.byte 102,15,56,0,193
+ leaq .Lk_opt(%rip),%r11
+ addq $32,%rdx
+ addq $-16,%rdx
+ pxor .Lk_s63(%rip),%xmm0
+ call _vpaes_schedule_transform
+ movdqu %xmm0,(%rdx)
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
+ retq
+.def _vpaes_schedule_192_smear; .scl 3; .type 32; .endef
+.p2align 4
+ pshufd $128,%xmm6,%xmm0
+ pxor %xmm0,%xmm6
+ pshufd $254,%xmm7,%xmm0
+ pxor %xmm0,%xmm6
+ movdqa %xmm6,%xmm0
+ pxor %xmm1,%xmm1
+ movhlps %xmm1,%xmm6
+ retq
+.def _vpaes_schedule_round; .scl 3; .type 32; .endef
+.p2align 4
+ pxor %xmm1,%xmm1
+.byte 102,65,15,58,15,200,15
+.byte 102,69,15,58,15,192,15
+ pxor %xmm1,%xmm7
+ pshufd $255,%xmm0,%xmm0
+.byte 102,15,58,15,192,1
+ movdqa %xmm7,%xmm1
+ pslldq $4,%xmm7
+ pxor %xmm1,%xmm7
+ movdqa %xmm7,%xmm1
+ pslldq $8,%xmm7
+ pxor %xmm1,%xmm7
+ pxor .Lk_s63(%rip),%xmm7
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa %xmm11,%xmm2
+.byte 102,15,56,0,208
+ pxor %xmm1,%xmm0
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+ movdqa %xmm10,%xmm4
+.byte 102,15,56,0,224
+ pxor %xmm2,%xmm4
+ movdqa %xmm10,%xmm2
+.byte 102,15,56,0,211
+ pxor %xmm0,%xmm2
+ movdqa %xmm10,%xmm3
+.byte 102,15,56,0,220
+ pxor %xmm1,%xmm3
+ movdqa %xmm13,%xmm4
+.byte 102,15,56,0,226
+ movdqa %xmm12,%xmm0
+.byte 102,15,56,0,195
+ pxor %xmm4,%xmm0
+ pxor %xmm7,%xmm0
+ movdqa %xmm0,%xmm7
+ retq
+.def _vpaes_schedule_transform; .scl 3; .type 32; .endef
+.p2align 4
+ movdqa %xmm9,%xmm1
+ pandn %xmm0,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm0
+ movdqa (%r11),%xmm2
+.byte 102,15,56,0,208
+ movdqa 16(%r11),%xmm0
+.byte 102,15,56,0,193
+ pxor %xmm2,%xmm0
+ retq
+.def _vpaes_schedule_mangle; .scl 3; .type 32; .endef
+.p2align 4
+ movdqa %xmm0,%xmm4
+ movdqa .Lk_mc_forward(%rip),%xmm5
+ testq %rcx,%rcx
+ jnz .Lschedule_mangle_dec
+ addq $16,%rdx
+ pxor .Lk_s63(%rip),%xmm4
+.byte 102,15,56,0,229
+ movdqa %xmm4,%xmm3
+.byte 102,15,56,0,229
+ pxor %xmm4,%xmm3
+.byte 102,15,56,0,229
+ pxor %xmm4,%xmm3
+ jmp .Lschedule_mangle_both
+.p2align 4
+ leaq .Lk_dksd(%rip),%r11
+ movdqa %xmm9,%xmm1
+ pandn %xmm4,%xmm1
+ psrld $4,%xmm1
+ pand %xmm9,%xmm4
+ movdqa 0(%r11),%xmm2
+.byte 102,15,56,0,212
+ movdqa 16(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+.byte 102,15,56,0,221
+ movdqa 32(%r11),%xmm2
+.byte 102,15,56,0,212
+ pxor %xmm3,%xmm2
+ movdqa 48(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+.byte 102,15,56,0,221
+ movdqa 64(%r11),%xmm2
+.byte 102,15,56,0,212
+ pxor %xmm3,%xmm2
+ movdqa 80(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+.byte 102,15,56,0,221
+ movdqa 96(%r11),%xmm2
+.byte 102,15,56,0,212
+ pxor %xmm3,%xmm2
+ movdqa 112(%r11),%xmm3
+.byte 102,15,56,0,217
+ pxor %xmm2,%xmm3
+ addq $-16,%rdx
+ movdqa (%r8,%r10,1),%xmm1
+.byte 102,15,56,0,217
+ addq $-16,%r8
+ andq $48,%r8
+ movdqu %xmm3,(%rdx)
+ retq
+.globl vpaes_set_encrypt_key
+.def vpaes_set_encrypt_key; .scl 2; .type 32; .endef
+.p2align 4
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ leaq -184(%rsp),%rsp
+ movaps %xmm6,16(%rsp)
+ movaps %xmm7,32(%rsp)
+ movaps %xmm8,48(%rsp)
+ movaps %xmm9,64(%rsp)
+ movaps %xmm10,80(%rsp)
+ movaps %xmm11,96(%rsp)
+ movaps %xmm12,112(%rsp)
+ movaps %xmm13,128(%rsp)
+ movaps %xmm14,144(%rsp)
+ movaps %xmm15,160(%rsp)
+ movl %esi,%eax
+ shrl $5,%eax
+ addl $5,%eax
+ movl %eax,240(%rdx)
+ movl $0,%ecx
+ movl $48,%r8d
+ call _vpaes_schedule_core
+ movaps 16(%rsp),%xmm6
+ movaps 32(%rsp),%xmm7
+ movaps 48(%rsp),%xmm8
+ movaps 64(%rsp),%xmm9
+ movaps 80(%rsp),%xmm10
+ movaps 96(%rsp),%xmm11
+ movaps 112(%rsp),%xmm12
+ movaps 128(%rsp),%xmm13
+ movaps 144(%rsp),%xmm14
+ movaps 160(%rsp),%xmm15
+ leaq 184(%rsp),%rsp
+ xorl %eax,%eax
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.globl vpaes_set_decrypt_key
+.def vpaes_set_decrypt_key; .scl 2; .type 32; .endef
+.p2align 4
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ leaq -184(%rsp),%rsp
+ movaps %xmm6,16(%rsp)
+ movaps %xmm7,32(%rsp)
+ movaps %xmm8,48(%rsp)
+ movaps %xmm9,64(%rsp)
+ movaps %xmm10,80(%rsp)
+ movaps %xmm11,96(%rsp)
+ movaps %xmm12,112(%rsp)
+ movaps %xmm13,128(%rsp)
+ movaps %xmm14,144(%rsp)
+ movaps %xmm15,160(%rsp)
+ movl %esi,%eax
+ shrl $5,%eax
+ addl $5,%eax
+ movl %eax,240(%rdx)
+ shll $4,%eax
+ leaq 16(%rdx,%rax,1),%rdx
+ movl $1,%ecx
+ movl %esi,%r8d
+ shrl $1,%r8d
+ andl $32,%r8d
+ xorl $32,%r8d
+ call _vpaes_schedule_core
+ movaps 16(%rsp),%xmm6
+ movaps 32(%rsp),%xmm7
+ movaps 48(%rsp),%xmm8
+ movaps 64(%rsp),%xmm9
+ movaps 80(%rsp),%xmm10
+ movaps 96(%rsp),%xmm11
+ movaps 112(%rsp),%xmm12
+ movaps 128(%rsp),%xmm13
+ movaps 144(%rsp),%xmm14
+ movaps 160(%rsp),%xmm15
+ leaq 184(%rsp),%rsp
+ xorl %eax,%eax
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.globl vpaes_encrypt
+.def vpaes_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ leaq -184(%rsp),%rsp
+ movaps %xmm6,16(%rsp)
+ movaps %xmm7,32(%rsp)
+ movaps %xmm8,48(%rsp)
+ movaps %xmm9,64(%rsp)
+ movaps %xmm10,80(%rsp)
+ movaps %xmm11,96(%rsp)
+ movaps %xmm12,112(%rsp)
+ movaps %xmm13,128(%rsp)
+ movaps %xmm14,144(%rsp)
+ movaps %xmm15,160(%rsp)
+ movdqu (%rdi),%xmm0
+ call _vpaes_preheat
+ call _vpaes_encrypt_core
+ movdqu %xmm0,(%rsi)
+ movaps 16(%rsp),%xmm6
+ movaps 32(%rsp),%xmm7
+ movaps 48(%rsp),%xmm8
+ movaps 64(%rsp),%xmm9
+ movaps 80(%rsp),%xmm10
+ movaps 96(%rsp),%xmm11
+ movaps 112(%rsp),%xmm12
+ movaps 128(%rsp),%xmm13
+ movaps 144(%rsp),%xmm14
+ movaps 160(%rsp),%xmm15
+ leaq 184(%rsp),%rsp
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.globl vpaes_decrypt
+.def vpaes_decrypt; .scl 2; .type 32; .endef
+.p2align 4
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ leaq -184(%rsp),%rsp
+ movaps %xmm6,16(%rsp)
+ movaps %xmm7,32(%rsp)
+ movaps %xmm8,48(%rsp)
+ movaps %xmm9,64(%rsp)
+ movaps %xmm10,80(%rsp)
+ movaps %xmm11,96(%rsp)
+ movaps %xmm12,112(%rsp)
+ movaps %xmm13,128(%rsp)
+ movaps %xmm14,144(%rsp)
+ movaps %xmm15,160(%rsp)
+ movdqu (%rdi),%xmm0
+ call _vpaes_preheat
+ call _vpaes_decrypt_core
+ movdqu %xmm0,(%rsi)
+ movaps 16(%rsp),%xmm6
+ movaps 32(%rsp),%xmm7
+ movaps 48(%rsp),%xmm8
+ movaps 64(%rsp),%xmm9
+ movaps 80(%rsp),%xmm10
+ movaps 96(%rsp),%xmm11
+ movaps 112(%rsp),%xmm12
+ movaps 128(%rsp),%xmm13
+ movaps 144(%rsp),%xmm14
+ movaps 160(%rsp),%xmm15
+ leaq 184(%rsp),%rsp
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.globl vpaes_cbc_encrypt
+.def vpaes_cbc_encrypt; .scl 2; .type 32; .endef
+.p2align 4
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ movq 40(%rsp),%r8
+ movq 48(%rsp),%r9
+ xchgq %rcx,%rdx
+ subq $16,%rcx
+ jc .Lcbc_abort
+ leaq -184(%rsp),%rsp
+ movaps %xmm6,16(%rsp)
+ movaps %xmm7,32(%rsp)
+ movaps %xmm8,48(%rsp)
+ movaps %xmm9,64(%rsp)
+ movaps %xmm10,80(%rsp)
+ movaps %xmm11,96(%rsp)
+ movaps %xmm12,112(%rsp)
+ movaps %xmm13,128(%rsp)
+ movaps %xmm14,144(%rsp)
+ movaps %xmm15,160(%rsp)
+ movdqu (%r8),%xmm6
+ subq %rdi,%rsi
+ call _vpaes_preheat
+ cmpl $0,%r9d
+ je .Lcbc_dec_loop
+ jmp .Lcbc_enc_loop
+.p2align 4
+ movdqu (%rdi),%xmm0
+ pxor %xmm6,%xmm0
+ call _vpaes_encrypt_core
+ movdqa %xmm0,%xmm6
+ movdqu %xmm0,(%rsi,%rdi,1)
+ leaq 16(%rdi),%rdi
+ subq $16,%rcx
+ jnc .Lcbc_enc_loop
+ jmp .Lcbc_done
+.p2align 4
+ movdqu (%rdi),%xmm0
+ movdqa %xmm0,%xmm7
+ call _vpaes_decrypt_core
+ pxor %xmm6,%xmm0
+ movdqa %xmm7,%xmm6
+ movdqu %xmm0,(%rsi,%rdi,1)
+ leaq 16(%rdi),%rdi
+ subq $16,%rcx
+ jnc .Lcbc_dec_loop
+ movdqu %xmm6,(%r8)
+ movaps 16(%rsp),%xmm6
+ movaps 32(%rsp),%xmm7
+ movaps 48(%rsp),%xmm8
+ movaps 64(%rsp),%xmm9
+ movaps 80(%rsp),%xmm10
+ movaps 96(%rsp),%xmm11
+ movaps 112(%rsp),%xmm12
+ movaps 128(%rsp),%xmm13
+ movaps 144(%rsp),%xmm14
+ movaps 160(%rsp),%xmm15
+ leaq 184(%rsp),%rsp
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.def _vpaes_preheat; .scl 3; .type 32; .endef
+.p2align 4
+ leaq .Lk_s0F(%rip),%r10
+ movdqa -32(%r10),%xmm10
+ movdqa -16(%r10),%xmm11
+ movdqa 0(%r10),%xmm9
+ movdqa 48(%r10),%xmm13
+ movdqa 64(%r10),%xmm12
+ movdqa 80(%r10),%xmm15
+ movdqa 96(%r10),%xmm14
+ retq
+.p2align 6
+.quad 0x0E05060F0D080180, 0x040703090A0B0C02
+.quad 0x01040A060F0B0780, 0x030D0E0C02050809
+.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
+.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808
+.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
+.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
+.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
+.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD
+.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A
+.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878
+.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
+.quad 0x0407060500030201, 0x0C0F0E0D080B0A09
+.quad 0x080B0A0904070605, 0x000302010C0F0E0D
+.quad 0x0C0F0E0D080B0A09, 0x0407060500030201
+.quad 0x000302010C0F0E0D, 0x080B0A0904070605
+.quad 0x0605040702010003, 0x0E0D0C0F0A09080B
+.quad 0x020100030E0D0C0F, 0x0A09080B06050407
+.quad 0x0E0D0C0F0A09080B, 0x0605040702010003
+.quad 0x0A09080B06050407, 0x020100030E0D0C0F
+.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908
+.quad 0x030E09040F0A0500, 0x0B06010C07020D08
+.quad 0x0F060D040B020900, 0x070E050C030A0108
+.quad 0x0B0E0104070A0D00, 0x0306090C0F020508
+.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
+.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B
+.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808
+.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
+.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
+.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
+.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
+.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E
+.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99
+.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
+.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086
+.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487
+.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC
+.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE
+.quad 0x0F505B040B545F00, 0x154A411E114E451A
+.quad 0x86E383E660056500, 0x12771772F491F194
+.quad 0x851C03539A86D600, 0xCAD51F504F994CC9
+.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565
+.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
+.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
+.quad 0xD022649296B44200, 0x602646F6B0F2D404
+.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
+.quad 0x46F2929626D4D000, 0x2242600464B4F6B0
+.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32
+.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
+.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
+.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
+.p2align 6
+.def se_handler; .scl 3; .type 32; .endef
+.p2align 4
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+ movq 8(%r9),%rsi
+ movq 56(%r9),%r11
+ movl 0(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jb .Lin_prologue
+ movq 152(%r8),%rax
+ movl 4(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jae .Lin_prologue
+ leaq 16(%rax),%rsi
+ leaq 512(%r8),%rdi
+ movl $20,%ecx
+.long 0xa548f3fc
+ leaq 184(%rax),%rax
+ movq 8(%rax),%rdi
+ movq 16(%rax),%rsi
+ movq %rax,152(%r8)
+ movq %rsi,168(%r8)
+ movq %rdi,176(%r8)
+ movq 40(%r9),%rdi
+ movq %r8,%rsi
+ movl $154,%ecx
+.long 0xa548f3fc
+ movq %r9,%rsi
+ xorq %rcx,%rcx
+ movq 8(%rsi),%rdx
+ movq 0(%rsi),%r8
+ movq 16(%rsi),%r9
+ movq 40(%rsi),%r10
+ leaq 56(%rsi),%r11
+ leaq 24(%rsi),%r12
+ movq %r10,32(%rsp)
+ movq %r11,40(%rsp)
+ movq %r12,48(%rsp)
+ movq %rcx,56(%rsp)
+ call *__imp_RtlVirtualUnwind(%rip)
+ movl $1,%eax
+ addq $64,%rsp
+ popfq
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ popq %rdi
+ popq %rsi
+ retq
+.section .pdata
+.p2align 2
+.rva .LSEH_begin_vpaes_set_encrypt_key
+.rva .LSEH_end_vpaes_set_encrypt_key
+.rva .LSEH_info_vpaes_set_encrypt_key
+.rva .LSEH_begin_vpaes_set_decrypt_key
+.rva .LSEH_end_vpaes_set_decrypt_key
+.rva .LSEH_info_vpaes_set_decrypt_key
+.rva .LSEH_begin_vpaes_encrypt
+.rva .LSEH_end_vpaes_encrypt
+.rva .LSEH_info_vpaes_encrypt
+.rva .LSEH_begin_vpaes_decrypt
+.rva .LSEH_end_vpaes_decrypt
+.rva .LSEH_info_vpaes_decrypt
+.rva .LSEH_begin_vpaes_cbc_encrypt
+.rva .LSEH_end_vpaes_cbc_encrypt
+.rva .LSEH_info_vpaes_cbc_encrypt
+.section .xdata
+.p2align 3
+.byte 9,0,0,0
+.rva se_handler
+.rva .Lenc_key_body,.Lenc_key_epilogue
+.byte 9,0,0,0
+.rva se_handler
+.rva .Ldec_key_body,.Ldec_key_epilogue
+.byte 9,0,0,0
+.rva se_handler
+.rva .Lenc_body,.Lenc_epilogue
+.byte 9,0,0,0
+.rva se_handler
+.rva .Ldec_body,.Ldec_epilogue
+.byte 9,0,0,0
+.rva se_handler
+.rva .Lcbc_body,.Lcbc_epilogue
diff --git a/crypto/libressl/crypto/bf/Makefile b/crypto/libressl/crypto/bf/Makefile
new file mode 100644
index 0000000..dac4aba
--- /dev/null
+++ b/crypto/libressl/crypto/bf/Makefile
@@ -0,0 +1,14 @@
+include ../../
+obj = bf_skey.o bf_enc.o bf_ecb.o bf_cfb64.o bf_ofb64.o
+all: $(obj)
+dep: all
+%.o: %.c
+ $(CC) $(CFLAGS) -c $<
+ rm -f *.o *.a
diff --git a/crypto/libressl/crypto/bf/bf_cfb64.c b/crypto/libressl/crypto/bf/bf_cfb64.c
new file mode 100644
index 0000000..6cc0bb9
--- /dev/null
+++ b/crypto/libressl/crypto/bf/bf_cfb64.c
@@ -0,0 +1,121 @@
+/* $OpenBSD: bf_cfb64.c,v 1.5 2014/10/28 07:35:58 jsg Exp $ */
+/* Copyright (C) 1995-1998 Eric Young (
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young ("
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson ("
+ *
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+#include <openssl/blowfish.h>
+#include "bf_locl.h"
+/* The input and output encrypted as though 64bit cfb mode is being
+ * used. The extra state information to record how much of the
+ * 64bit block we have used is contained in *num;
+ */
+void BF_cfb64_encrypt(const unsigned char *in, unsigned char *out, long length,
+ const BF_KEY *schedule, unsigned char *ivec, int *num, int encrypt)
+ {
+ BF_LONG v0,v1,t;
+ int n= *num;
+ long l=length;
+ BF_LONG ti[2];
+ unsigned char *iv,c,cc;
+ iv=(unsigned char *)ivec;
+ if (encrypt)
+ {
+ while (l--)
+ {
+ if (n == 0)
+ {
+ n2l(iv,v0); ti[0]=v0;
+ n2l(iv,v1); ti[1]=v1;
+ BF_encrypt((BF_LONG *)ti,schedule);
+ iv=(unsigned char *)ivec;
+ t=ti[0]; l2n(t,iv);
+ t=ti[1]; l2n(t,iv);
+ iv=(unsigned char *)ivec;
+ }
+ c= *(in++)^iv[n];
+ *(out++)=c;
+ iv[n]=c;
+ n=(n+1)&0x07;
+ }
+ }
+ else
+ {
+ while (l--)
+ {
+ if (n == 0)
+ {
+ n2l(iv,v0); ti[0]=v0;
+ n2l(iv,v1); ti[1]=v1;
+ BF_encrypt((BF_LONG *)ti,schedule);
+ iv=(unsigned char *)ivec;
+ t=ti[0]; l2n(t,iv);
+ t=ti[1]; l2n(t,iv);
+ iv=(unsigned char *)ivec;
+ }
+ cc= *(in++);
+ c=iv[n];
+ iv[n]=cc;
+ *(out++)=c^cc;
+ n=(n+1)&0x07;
+ }
+ }
+ v0=v1=ti[0]=ti[1]=t=c=cc=0;
+ *num=n;
+ }
diff --git a/crypto/libressl/crypto/bf/bf_ecb.c b/crypto/libressl/crypto/bf/bf_ecb.c
new file mode 100644
index 0000000..305bd78
--- /dev/null
+++ b/crypto/libressl/crypto/bf/bf_ecb.c
@@ -0,0 +1,94 @@
+/* $OpenBSD: bf_ecb.c,v 1.6 2014/07/09 11:10:50 bcook Exp $ */
+/* Copyright (C) 1995-1998 Eric Young (
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young ("
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson ("
+ *
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+#include <openssl/blowfish.h>
+#include "bf_locl.h"
+#include <openssl/opensslv.h>
+/* Blowfish as implemented from 'Blowfish: Springer-Verlag paper'
+ */
+const char *BF_options(void)
+ {
+#ifdef BF_PTR
+ return("blowfish(ptr)");
+#elif defined(BF_PTR2)
+ return("blowfish(ptr2)");
+ return("blowfish(idx)");
+ }
+void BF_ecb_encrypt(const unsigned char *in, unsigned char *out,
+ const BF_KEY *key, int encrypt)
+ {
+ BF_LONG l,d[2];
+ n2l(in,l); d[0]=l;
+ n2l(in,l); d[1]=l;
+ if (encrypt)
+ BF_encrypt(d,key);
+ else
+ BF_decrypt(d,key);
+ l=d[0]; l2n(l,out);
+ l=d[1]; l2n(l,out);
+ l=d[0]=d[1]=0;
+ }
diff --git a/crypto/libressl/crypto/bf/bf_enc.c b/crypto/libressl/crypto/bf/bf_enc.c
new file mode 100644
index 0000000..2cf1c86
--- /dev/null
+++ b/crypto/libressl/crypto/bf/bf_enc.c
@@ -0,0 +1,306 @@
+/* $OpenBSD: bf_enc.c,v 1.6 2014/10/28 07:35:58 jsg Exp $ */
+/* Copyright (C) 1995-1998 Eric Young (
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young ("
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson ("
+ *
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+#include <openssl/blowfish.h>
+#include "bf_locl.h"
+/* Blowfish as implemented from 'Blowfish: Springer-Verlag paper'
+ */
+#if (BF_ROUNDS != 16) && (BF_ROUNDS != 20)
+#error If you set BF_ROUNDS to some value other than 16 or 20, you will have \
+to modify the code.
+void BF_encrypt(BF_LONG *data, const BF_KEY *key)
+ {
+#ifndef BF_PTR2
+ BF_LONG l,r;
+ const BF_LONG *p,*s;
+ p=key->P;
+ s= &(key->S[0]);
+ l=data[0];
+ r=data[1];
+ l^=p[0];
+ BF_ENC(r,l,s,p[ 1]);
+ BF_ENC(l,r,s,p[ 2]);
+ BF_ENC(r,l,s,p[ 3]);
+ BF_ENC(l,r,s,p[ 4]);
+ BF_ENC(r,l,s,p[ 5]);
+ BF_ENC(l,r,s,p[ 6]);
+ BF_ENC(r,l,s,p[ 7]);
+ BF_ENC(l,r,s,p[ 8]);
+ BF_ENC(r,l,s,p[ 9]);
+ BF_ENC(l,r,s,p[10]);
+ BF_ENC(r,l,s,p[11]);
+ BF_ENC(l,r,s,p[12]);
+ BF_ENC(r,l,s,p[13]);
+ BF_ENC(l,r,s,p[14]);
+ BF_ENC(r,l,s,p[15]);
+ BF_ENC(l,r,s,p[16]);
+#if BF_ROUNDS == 20
+ BF_ENC(r,l,s,p[17]);
+ BF_ENC(l,r,s,p[18]);
+ BF_ENC(r,l,s,p[19]);
+ BF_ENC(l,r,s,p[20]);
+ r^=p[BF_ROUNDS+1];
+ data[1]=l&0xffffffffL;
+ data[0]=r&0xffffffffL;
+ BF_LONG l,r,t,*k;
+ l=data[0];
+ r=data[1];
+ k=(BF_LONG*)key;
+ l^=k[0];
+ BF_ENC(r,l,k, 1);
+ BF_ENC(l,r,k, 2);
+ BF_ENC(r,l,k, 3);
+ BF_ENC(l,r,k, 4);
+ BF_ENC(r,l,k, 5);
+ BF_ENC(l,r,k, 6);
+ BF_ENC(r,l,k, 7);
+ BF_ENC(l,r,k, 8);
+ BF_ENC(r,l,k, 9);
+ BF_ENC(l,r,k,10);
+ BF_ENC(r,l,k,11);
+ BF_ENC(l,r,k,12);
+ BF_ENC(r,l,k,13);
+ BF_ENC(l,r,k,14);
+ BF_ENC(r,l,k,15);
+ BF_ENC(l,r,k,16);
+#if BF_ROUNDS == 20
+ BF_ENC(r,l,k,17);
+ BF_ENC(l,r,k,18);
+ BF_ENC(r,l,k,19);
+ BF_ENC(l,r,k,20);
+ r^=k[BF_ROUNDS+1];
+ data[1]=l&0xffffffffL;
+ data[0]=r&0xffffffffL;
+ }
+void BF_decrypt(BF_LONG *data, const BF_KEY *key)
+ {
+#ifndef BF_PTR2
+ BF_LONG l,r;
+ const BF_LONG *p,*s;
+ p=key->P;
+ s= &(key->S[0]);
+ l=data[0];
+ r=data[1];
+ l^=p[BF_ROUNDS+1];
+#if BF_ROUNDS == 20
+ BF_ENC(r,l,s,p[20]);
+ BF_ENC(l,r,s,p[19]);
+ BF_ENC(r,l,s,p[18]);
+ BF_ENC(l,r,s,p[17]);
+ BF_ENC(r,l,s,p[16]);
+ BF_ENC(l,r,s,p[15]);
+ BF_ENC(r,l,s,p[14]);
+ BF_ENC(l,r,s,p[13]);
+ BF_ENC(r,l,s,p[12]);
+ BF_ENC(l,r,s,p[11]);
+ BF_ENC(r,l,s,p[10]);
+ BF_ENC(l,r,s,p[ 9]);
+ BF_ENC(r,l,s,p[ 8]);
+ BF_ENC(l,r,s,p[ 7]);
+ BF_ENC(r,l,s,p[ 6]);
+ BF_ENC(l,r,s,p[ 5]);
+ BF_ENC(r,l,s,p[ 4]);
+ BF_ENC(l,r,s,p[ 3]);
+ BF_ENC(r,l,s,p[ 2]);
+ BF_ENC(l,r,s,p[ 1]);
+ r^=p[0];
+ data[1]=l&0xffffffffL;
+ data[0]=r&0xffffffffL;
+ BF_LONG l,r,t,*k;
+ l=data[0];
+ r=data[1];
+ k=(BF_LONG *)key;
+ l^=k[BF_ROUNDS+1];
+#if BF_ROUNDS == 20
+ BF_ENC(r,l,k,20);
+ BF_ENC(l,r,k,19);
+ BF_ENC(r,l,k,18);
+ BF_ENC(l,r,k,17);
+ BF_ENC(r,l,k,16);
+ BF_ENC(l,r,k,15);
+ BF_ENC(r,l,k,14);
+ BF_ENC(l,r,k,13);
+ BF_ENC(r,l,k,12);
+ BF_ENC(l,r,k,11);
+ BF_ENC(r,l,k,10);
+ BF_ENC(l,r,k, 9);
+ BF_ENC(r,l,k, 8);
+ BF_ENC(l,r,k, 7);
+ BF_ENC(r,l,k, 6);
+ BF_ENC(l,r,k, 5);
+ BF_ENC(r,l,k, 4);
+ BF_ENC(l,r,k, 3);
+ BF_ENC(r,l,k, 2);
+ BF_ENC(l,r,k, 1);
+ r^=k[0];
+ data[1]=l&0xffffffffL;
+ data[0]=r&0xffffffffL;
+ }
+void BF_cbc_encrypt(const unsigned char *in, unsigned char *out, long length,
+ const BF_KEY *schedule, unsigned char *ivec, int encrypt)
+ {
+ BF_LONG tin0,tin1;
+ BF_LONG tout0,tout1,xor0,xor1;
+ long l=length;
+ BF_LONG tin[2];
+ if (encrypt)
+ {
+ n2l(ivec,tout0);
+ n2l(ivec,tout1);
+ ivec-=8;
+ for (l-=8; l>=0; l-=8)
+ {
+ n2l(in,tin0);
+ n2l(in,tin1);
+ tin0^=tout0;
+ tin1^=tout1;
+ tin[0]=tin0;
+ tin[1]=tin1;
+ BF_encrypt(tin,schedule);
+ tout0=tin[0];
+ tout1=tin[1];
+ l2n(tout0,out);
+ l2n(tout1,out);
+ }
+ if (l != -8)
+ {
+ n2ln(in,tin0,tin1,l+8);
+ tin0^=tout0;
+ tin1^=tout1;
+ tin[0]=tin0;
+ tin[1]=tin1;
+ BF_encrypt(tin,schedule);
+ tout0=tin[0];
+ tout1=tin[1];
+ l2n(tout0,out);
+ l2n(tout1,out);
+ }
+ l2n(tout0,ivec);
+ l2n(tout1,ivec);
+ }
+ else
+ {
+ n2l(ivec,xor0);
+ n2l(ivec,xor1);
+ ivec-=8;
+ for (l-=8; l>=0; l-=8)
+ {
+ n2l(in,tin0);
+ n2l(in,tin1);
+ tin[0]=tin0;
+ tin[1]=tin1;
+ BF_decrypt(tin,schedule);
+ tout0=tin[0]^xor0;
+ tout1=tin[1]^xor1;
+ l2n(tout0,out);
+ l2n(tout1,out);
+ xor0=tin0;
+ xor1=tin1;
+ }
+ if (l != -8)
+ {
+ n2l(in,tin0);
+ n2l(in,tin1);
+ tin[0]=tin0;
+ tin[1]=tin1;
+ BF_decrypt(tin,schedule);
+ tout0=tin[0]^xor0;
+ tout1=tin[1]^xor1;
+ l2nn(tout0,tout1,out,l+8);
+ xor0=tin0;
+ xor1=tin1;
+ }
+ l2n(xor0,ivec);
+ l2n(xor1,ivec);
+ }
+ tin0=tin1=tout0=tout1=xor0=xor1=0;
+ tin[0]=tin[1]=0;
+ }
diff --git a/crypto/libressl/crypto/bf/bf_locl.h b/crypto/libressl/crypto/bf/bf_locl.h
new file mode 100644
index 0000000..0b66362
--- /dev/null
+++ b/crypto/libressl/crypto/bf/bf_locl.h
@@ -0,0 +1,219 @@
+/* $OpenBSD: bf_locl.h,v 1.3 2014/06/12 15:49:28 deraadt Exp $ */
+/* Copyright (C) 1995-1997 Eric Young (
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young ("
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson ("
+ *
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+#include <openssl/opensslconf.h> /* BF_PTR, BF_PTR2 */
+#undef c2l
+#define c2l(c,l) (l =((unsigned long)(*((c)++))) , \
+ l|=((unsigned long)(*((c)++)))<< 8L, \
+ l|=((unsigned long)(*((c)++)))<<16L, \
+ l|=((unsigned long)(*((c)++)))<<24L)
+/* NOTE - c is not incremented as per c2l */
+#undef c2ln
+#define c2ln(c,l1,l2,n) { \
+ c+=n; \
+ l1=l2=0; \
+ switch (n) { \
+ case 8: l2 =((unsigned long)(*(--(c))))<<24L; \
+ case 7: l2|=((unsigned long)(*(--(c))))<<16L; \
+ case 6: l2|=((unsigned long)(*(--(c))))<< 8L; \
+ case 5: l2|=((unsigned long)(*(--(c)))); \
+ case 4: l1 =((unsigned long)(*(--(c))))<<24L; \
+ case 3: l1|=((unsigned long)(*(--(c))))<<16L; \
+ case 2: l1|=((unsigned long)(*(--(c))))<< 8L; \
+ case 1: l1|=((unsigned long)(*(--(c)))); \
+ } \
+ }
+#undef l2c
+#define l2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \
+ *((c)++)=(unsigned char)(((l)>> 8L)&0xff), \
+ *((c)++)=(unsigned char)(((l)>>16L)&0xff), \
+ *((c)++)=(unsigned char)(((l)>>24L)&0xff))
+/* NOTE - c is not incremented as per l2c */
+#undef l2cn
+#define l2cn(l1,l2,c,n) { \
+ c+=n; \
+ switch (n) { \
+ case 8: *(--(c))=(unsigned char)(((l2)>>24L)&0xff); \
+ case 7: *(--(c))=(unsigned char)(((l2)>>16L)&0xff); \
+ case 6: *(--(c))=(unsigned char)(((l2)>> 8L)&0xff); \
+ case 5: *(--(c))=(unsigned char)(((l2) )&0xff); \
+ case 4: *(--(c))=(unsigned char)(((l1)>>24L)&0xff); \
+ case 3: *(--(c))=(unsigned char)(((l1)>>16L)&0xff); \
+ case 2: *(--(c))=(unsigned char)(((l1)>> 8L)&0xff); \
+ case 1: *(--(c))=(unsigned char)(((l1) )&0xff); \
+ } \
+ }
+/* NOTE - c is not incremented as per n2l */
+#define n2ln(c,l1,l2,n) { \
+ c+=n; \
+ l1=l2=0; \
+ switch (n) { \
+ case 8: l2 =((unsigned long)(*(--(c)))) ; \
+ case 7: l2|=((unsigned long)(*(--(c))))<< 8; \
+ case 6: l2|=((unsigned long)(*(--(c))))<<16; \
+ case 5: l2|=((unsigned long)(*(--(c))))<<24; \
+ case 4: l1 =((unsigned long)(*(--(c)))) ; \
+ case 3: l1|=((unsigned long)(*(--(c))))<< 8; \
+ case 2: l1|=((unsigned long)(*(--(c))))<<16; \
+ case 1: l1|=((unsigned long)(*(--(c))))<<24; \
+ } \
+ }
+/* NOTE - c is not incremented as per l2n */
+#define l2nn(l1,l2,c,n) { \
+ c+=n; \
+ switch (n) { \
+ case 8: *(--(c))=(unsigned char)(((l2) )&0xff); \
+ case 7: *(--(c))=(unsigned char)(((l2)>> 8)&0xff); \
+ case 6: *(--(c))=(unsigned char)(((l2)>>16)&0xff); \
+ case 5: *(--(c))=(unsigned char)(((l2)>>24)&0xff); \
+ case 4: *(--(c))=(unsigned char)(((l1) )&0xff); \
+ case 3: *(--(c))=(unsigned char)(((l1)>> 8)&0xff); \
+ case 2: *(--(c))=(unsigned char)(((l1)>>16)&0xff); \
+ case 1: *(--(c))=(unsigned char)(((l1)>>24)&0xff); \
+ } \
+ }
+#undef n2l
+#define n2l(c,l) (l =((unsigned long)(*((c)++)))<<24L, \
+ l|=((unsigned long)(*((c)++)))<<16L, \
+ l|=((unsigned long)(*((c)++)))<< 8L, \
+ l|=((unsigned long)(*((c)++))))
+#undef l2n
+#define l2n(l,c) (*((c)++)=(unsigned char)(((l)>>24L)&0xff), \
+ *((c)++)=(unsigned char)(((l)>>16L)&0xff), \
+ *((c)++)=(unsigned char)(((l)>> 8L)&0xff), \
+ *((c)++)=(unsigned char)(((l) )&0xff))
+/* This is actually a big endian algorithm, the most significant byte
+ * is used to lookup array 0 */
+#if defined(BF_PTR2)
+ * This is basically a special Intel version. Point is that Intel
+ * doesn't have many registers, but offers a reach choice of addressing
+ * modes. So we spare some registers by directly traversing BF_KEY
+ * structure and hiring the most decorated addressing mode. The code
+ * generated by EGCS is *perfectly* competitive with assembler
+ * implementation!
+ */
+#define BF_ENC(LL,R,KEY,Pi) (\
+ LL^=KEY[Pi], \
+ t= KEY[BF_ROUNDS+2 + 0 + ((R>>24)&0xFF)], \
+ t+= KEY[BF_ROUNDS+2 + 256 + ((R>>16)&0xFF)], \
+ t^= KEY[BF_ROUNDS+2 + 512 + ((R>>8 )&0xFF)], \
+ t+= KEY[BF_ROUNDS+2 + 768 + ((R )&0xFF)], \
+ LL^=t \
+ )
+#elif defined(BF_PTR)
+#ifndef BF_LONG_LOG2
+#define BF_LONG_LOG2 2 /* default to BF_LONG being 32 bits */
+#define BF_M (0xFF<<BF_LONG_LOG2)
+#define BF_0 (24-BF_LONG_LOG2)
+#define BF_1 (16-BF_LONG_LOG2)
+#define BF_2 ( 8-BF_LONG_LOG2)
+#define BF_3 BF_LONG_LOG2 /* left shift */
+ * This is normally very good on RISC platforms where normally you
+ * have to explicitly "multiply" array index by sizeof(BF_LONG)
+ * in order to calculate the effective address. This implementation
+ * excuses CPU from this extra work. Power[PC] uses should have most
+ * fun as (R>>BF_i)&BF_M gets folded into a single instruction, namely
+ * rlwinm. So let'em double-check if their compiler does it.
+ */
+#define BF_ENC(LL,R,S,P) ( \
+ LL^=P, \
+ LL^= (((*(BF_LONG *)((unsigned char *)&(S[ 0])+((R>>BF_0)&BF_M))+ \
+ *(BF_LONG *)((unsigned char *)&(S[256])+((R>>BF_1)&BF_M)))^ \
+ *(BF_LONG *)((unsigned char *)&(S[512])+((R>>BF_2)&BF_M)))+ \
+ *(BF_LONG *)((unsigned char *)&(S[768])+((R<<BF_3)&BF_M))) \
+ )
+ * This is a *generic* version. Seem to perform best on platforms that
+ * offer explicit support for extraction of 8-bit nibbles preferably
+ * complemented with "multiplying" of array index by sizeof(BF_LONG).
+ * For the moment of this writing the list comprises Alpha CPU featuring
+ * extbl and s[48]addq instructions.
+ */
+#define BF_ENC(LL,R,S,P) ( \
+ LL^=P, \
+ LL^=((( S[ ((int)(R>>24)&0xff)] + \
+ S[0x0100+((int)(R>>16)&0xff)])^ \
+ S[0x0200+((int)(R>> 8)&0xff)])+ \
+ S[0x0300+((int)(R )&0xff)])&0xffffffffL \
+ )
diff --git a/crypto/libressl/crypto/bf/bf_ofb64.c b/crypto/libressl/crypto/bf/bf_ofb64.c
new file mode 100644
index 0000000..9e33162
--- /dev/null
+++ b/crypto/libressl/crypto/bf/bf_ofb64.c
@@ -0,0 +1,110 @@
+/* $OpenBSD: bf_ofb64.c,v 1.5 2014/10/28 07:35:58 jsg Exp $ */
+/* Copyright (C) 1995-1998 Eric Young (
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young ("
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson ("
+ *
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+#include <openssl/blowfish.h>
+#include "bf_locl.h"
+/* The input and output encrypted as though 64bit ofb mode is being
+ * used. The extra state information to record how much of the
+ * 64bit block we have used is contained in *num;
+ */
+void BF_ofb64_encrypt(const unsigned char *in, unsigned char *out, long length,
+ const BF_KEY *schedule, unsigned char *ivec, int *num)
+ {
+ BF_LONG v0,v1,t;
+ int n= *num;
+ long l=length;
+ unsigned char d[8];
+ char *dp;
+ BF_LONG ti[2];
+ unsigned char *iv;
+ int save=0;
+ iv=(unsigned char *)ivec;
+ n2l(iv,v0);
+ n2l(iv,v1);
+ ti[0]=v0;
+ ti[1]=v1;
+ dp=(char *)d;
+ l2n(v0,dp);
+ l2n(v1,dp);
+ while (l--)
+ {
+ if (n == 0)
+ {
+ BF_encrypt((BF_LONG *)ti,schedule);
+ dp=(char *)d;
+ t=ti[0]; l2n(t,dp);
+ t=ti[1]; l2n(t,dp);
+ save++;
+ }
+ *(out++)= *(in++)^d[n];
+ n=(n+1)&0x07;
+ }
+ if (save)
+ {
+ v0=ti[0];
+ v1=ti[1];
+ iv=(unsigned char *)ivec;
+ l2n(v0,iv);
+ l2n(v1,iv);
+ }
+ t=v0=v1=ti[0]=ti[1]=0;
+ *num=n;
+ }
diff --git a/crypto/libressl/crypto/bf/bf_pi.h b/crypto/libressl/crypto/bf/bf_pi.h
new file mode 100644
index 0000000..ce4843a
--- /dev/null
+++ b/crypto/libressl/crypto/bf/bf_pi.h
@@ -0,0 +1,328 @@
+/* $OpenBSD: bf_pi.h,v 1.4 2016/12/21 15:49:29 jsing Exp $ */
+/* Copyright (C) 1995-1998 Eric Young (
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young ("
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson ("
+ *
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+static const BF_KEY bf_init= {
+ {
+ 0x243f6a88L, 0x85a308d3L, 0x13198a2eL, 0x03707344L,
+ 0xa4093822L, 0x299f31d0L, 0x082efa98L, 0xec4e6c89L,
+ 0x452821e6L, 0x38d01377L, 0xbe5466cfL, 0x34e90c6cL,
+ 0xc0ac29b7L, 0xc97c50ddL, 0x3f84d5b5L, 0xb5470917L,
+ 0x9216d5d9L, 0x8979fb1b
+ },{
+ 0xd1310ba6L, 0x98dfb5acL, 0x2ffd72dbL, 0xd01adfb7L,
+ 0xb8e1afedL, 0x6a267e96L, 0xba7c9045L, 0xf12c7f99L,
+ 0x24a19947L, 0xb3916cf7L, 0x0801f2e2L, 0x858efc16L,
+ 0x636920d8L, 0x71574e69L, 0xa458fea3L, 0xf4933d7eL,
+ 0x0d95748fL, 0x728eb658L, 0x718bcd58L, 0x82154aeeL,
+ 0x7b54a41dL, 0xc25a59b5L, 0x9c30d539L, 0x2af26013L,
+ 0xc5d1b023L, 0x286085f0L, 0xca417918L, 0xb8db38efL,
+ 0x8e79dcb0L, 0x603a180eL, 0x6c9e0e8bL, 0xb01e8a3eL,
+ 0xd71577c1L, 0xbd314b27L, 0x78af2fdaL, 0x55605c60L,
+ 0xe65525f3L, 0xaa55ab94L, 0x57489862L, 0x63e81440L,
+ 0x55ca396aL, 0x2aab10b6L, 0xb4cc5c34L, 0x1141e8ceL,
+ 0xa15486afL, 0x7c72e993L, 0xb3ee1411L, 0x636fbc2aL,
+ 0x2ba9c55dL, 0x741831f6L, 0xce5c3e16L, 0x9b87931eL,
+ 0xafd6ba33L, 0x6c24cf5cL, 0x7a325381L, 0x28958677L,
+ 0x3b8f4898L, 0x6b4bb9afL, 0xc4bfe81bL, 0x66282193L,
+ 0x61d809ccL, 0xfb21a991L, 0x487cac60L, 0x5dec8032L,
+ 0xef845d5dL, 0xe98575b1L, 0xdc262302L, 0xeb651b88L,
+ 0x23893e81L, 0xd396acc5L, 0x0f6d6ff3L, 0x83f44239L,
+ 0x2e0b4482L, 0xa4842004L, 0x69c8f04aL, 0x9e1f9b5eL,
+ 0x21c66842L, 0xf6e96c9aL, 0x670c9c61L, 0xabd388f0L,
+ 0x6a51a0d2L, 0xd8542f68L, 0x960fa728L, 0xab5133a3L,
+ 0x6eef0b6cL, 0x137a3be4L, 0xba3bf050L, 0x7efb2a98L,
+ 0xa1f1651dL, 0x39af0176L, 0x66ca593eL, 0x82430e88L,
+ 0x8cee8619L, 0x456f9fb4L, 0x7d84a5c3L, 0x3b8b5ebeL,
+ 0xe06f75d8L, 0x85c12073L, 0x401a449fL, 0x56c16aa6L,
+ 0x4ed3aa62L, 0x363f7706L, 0x1bfedf72L, 0x429b023dL,
+ 0x37d0d724L, 0xd00a1248L, 0xdb0fead3L, 0x49f1c09bL,
+ 0x075372c9L, 0x80991b7bL, 0x25d479d8L, 0xf6e8def7L,
+ 0xe3fe501aL, 0xb6794c3bL, 0x976ce0bdL, 0x04c006baL,
+ 0xc1a94fb6L, 0x409f60c4L, 0x5e5c9ec2L, 0x196a2463L,
+ 0x68fb6fafL, 0x3e6c53b5L, 0x1339b2ebL, 0x3b52ec6fL,
+ 0x6dfc511fL, 0x9b30952cL, 0xcc814544L, 0xaf5ebd09L,
+ 0xbee3d004L, 0xde334afdL, 0x660f2807L, 0x192e4bb3L,
+ 0xc0cba857L, 0x45c8740fL, 0xd20b5f39L, 0xb9d3fbdbL,
+ 0x5579c0bdL, 0x1a60320aL, 0xd6a100c6L, 0x402c7279L,
+ 0x679f25feL, 0xfb1fa3ccL, 0x8ea5e9f8L, 0xdb3222f8L,
+ 0x3c7516dfL, 0xfd616b15L, 0x2f501ec8L, 0xad0552abL,
+ 0x323db5faL, 0xfd238760L, 0x53317b48L, 0x3e00df82L,
+ 0x9e5c57bbL, 0xca6f8ca0L, 0x1a87562eL, 0xdf1769dbL,
+ 0xd542a8f6L, 0x287effc3L, 0xac6732c6L, 0x8c4f5573L,
+ 0x695b27b0L, 0xbbca58c8L, 0xe1ffa35dL, 0xb8f011a0L,
+ 0x10fa3d98L, 0xfd2183b8L, 0x4afcb56cL, 0x2dd1d35bL,
+ 0x9a53e479L, 0xb6f84565L, 0xd28e49bcL, 0x4bfb9790L,
+ 0xe1ddf2daL, 0xa4cb7e33L, 0x62fb1341L, 0xcee4c6e8L,
+ 0xef20cadaL, 0x36774c01L, 0xd07e9efeL, 0x2bf11fb4L,
+ 0x95dbda4dL, 0xae909198L, 0xeaad8e71L, 0x6b93d5a0L,
+ 0xd08ed1d0L, 0xafc725e0L, 0x8e3c5b2fL, 0x8e7594b7L,
+ 0x8ff6e2fbL, 0xf2122b64L, 0x8888b812L, 0x900df01cL,
+ 0x4fad5ea0L, 0x688fc31cL, 0xd1cff191L, 0xb3a8c1adL,
+ 0x2f2f2218L, 0xbe0e1777L, 0xea752dfeL, 0x8b021fa1L,
+ 0xe5a0cc0fL, 0xb56f74e8L, 0x18acf3d6L, 0xce89e299L,
+ 0xb4a84fe0L, 0xfd13e0b7L, 0x7cc43b81L, 0xd2ada8d9L,
+ 0x165fa266L, 0x80957705L, 0x93cc7314L, 0x211a1477L,
+ 0xe6ad2065L, 0x77b5fa86L, 0xc75442f5L, 0xfb9d35cfL,
+ 0xebcdaf0cL, 0x7b3e89a0L, 0xd6411bd3L, 0xae1e7e49L,
+ 0x00250e2dL, 0x2071b35eL, 0x226800bbL, 0x57b8e0afL,
+ 0x2464369bL, 0xf009b91eL, 0x5563911dL, 0x59dfa6aaL,
+ 0x78c14389L, 0xd95a537fL, 0x207d5ba2L, 0x02e5b9c5L,
+ 0x83260376L, 0x6295cfa9L, 0x11c81968L, 0x4e734a41L,
+ 0xb3472dcaL, 0x7b14a94aL, 0x1b510052L, 0x9a532915L,
+ 0xd60f573fL, 0xbc9bc6e4L, 0x2b60a476L, 0x81e67400L,
+ 0x08ba6fb5L, 0x571be91fL, 0xf296ec6bL, 0x2a0dd915L,
+ 0xb6636521L, 0xe7b9f9b6L, 0xff34052eL, 0xc5855664L,
+ 0x53b02d5dL, 0xa99f8fa1L, 0x08ba4799L, 0x6e85076aL,
+ 0x4b7a70e9L, 0xb5b32944L, 0xdb75092eL, 0xc4192623L,
+ 0xad6ea6b0L, 0x49a7df7dL, 0x9cee60b8L, 0x8fedb266L,
+ 0xecaa8c71L, 0x699a17ffL, 0x5664526cL, 0xc2b19ee1L,
+ 0x193602a5L, 0x75094c29L, 0xa0591340L, 0xe4183a3eL,
+ 0x3f54989aL, 0x5b429d65L, 0x6b8fe4d6L, 0x99f73fd6L,
+ 0xa1d29c07L, 0xefe830f5L, 0x4d2d38e6L, 0xf0255dc1L,
+ 0x4cdd2086L, 0x8470eb26L, 0x6382e9c6L, 0x021ecc5eL,
+ 0x09686b3fL, 0x3ebaefc9L, 0x3c971814L, 0x6b6a70a1L,
+ 0x687f3584L, 0x52a0e286L, 0xb79c5305L, 0xaa500737L,
+ 0x3e07841cL, 0x7fdeae5cL, 0x8e7d44ecL, 0x5716f2b8L,
+ 0xb03ada37L, 0xf0500c0dL, 0xf01c1f04L, 0x0200b3ffL,
+ 0xae0cf51aL, 0x3cb574b2L, 0x25837a58L, 0xdc0921bdL,
+ 0xd19113f9L, 0x7ca92ff6L, 0x94324773L, 0x22f54701L,
+ 0x3ae5e581L, 0x37c2dadcL, 0xc8b57634L, 0x9af3dda7L,
+ 0xa9446146L, 0x0fd0030eL, 0xecc8c73eL, 0xa4751e41L,
+ 0xe238cd99L, 0x3bea0e2fL, 0x3280bba1L, 0x183eb331L,
+ 0x4e548b38L, 0x4f6db908L, 0x6f420d03L, 0xf60a04bfL,
+ 0x2cb81290L, 0x24977c79L, 0x5679b072L, 0xbcaf89afL,
+ 0xde9a771fL, 0xd9930810L, 0xb38bae12L, 0xdccf3f2eL,
+ 0x5512721fL, 0x2e6b7124L, 0x501adde6L, 0x9f84cd87L,
+ 0x7a584718L, 0x7408da17L, 0xbc9f9abcL, 0xe94b7d8cL,
+ 0xec7aec3aL, 0xdb851dfaL, 0x63094366L, 0xc464c3d2L,
+ 0xef1c1847L, 0x3215d908L, 0xdd433b37L, 0x24c2ba16L,
+ 0x12a14d43L, 0x2a65c451L, 0x50940002L, 0x133ae4ddL,
+ 0x71dff89eL, 0x10314e55L, 0x81ac77d6L, 0x5f11199bL,
+ 0x043556f1L, 0xd7a3c76bL, 0x3c11183bL, 0x5924a509L,
+ 0xf28fe6edL, 0x97f1fbfaL, 0x9ebabf2cL, 0x1e153c6eL,
+ 0x86e34570L, 0xeae96fb1L, 0x860e5e0aL, 0x5a3e2ab3L,
+ 0x771fe71cL, 0x4e3d06faL, 0x2965dcb9L, 0x99e71d0fL,
+ 0x803e89d6L, 0x5266c825L, 0x2e4cc978L, 0x9c10b36aL,
+ 0xc6150ebaL, 0x94e2ea78L, 0xa5fc3c53L, 0x1e0a2df4L,
+ 0xf2f74ea7L, 0x361d2b3dL, 0x1939260fL, 0x19c27960L,
+ 0x5223a708L, 0xf71312b6L, 0xebadfe6eL, 0xeac31f66L,
+ 0xe3bc4595L, 0xa67bc883L, 0xb17f37d1L, 0x018cff28L,
+ 0xc332ddefL, 0xbe6c5aa5L, 0x65582185L, 0x68ab9802L,
+ 0xeecea50fL, 0xdb2f953bL, 0x2aef7dadL, 0x5b6e2f84L,
+ 0x1521b628L, 0x29076170L, 0xecdd4775L, 0x619f1510L,
+ 0x13cca830L, 0xeb61bd96L, 0x0334fe1eL, 0xaa0363cfL,
+ 0xb5735c90L, 0x4c70a239L, 0xd59e9e0bL, 0xcbaade14L,
+ 0xeecc86bcL, 0x60622ca7L, 0x9cab5cabL, 0xb2f3846eL,
+ 0x648b1eafL, 0x19bdf0caL, 0xa02369b9L, 0x655abb50L,
+ 0x40685a32L, 0x3c2ab4b3L, 0x319ee9d5L, 0xc021b8f7L,
+ 0x9b540b19L, 0x875fa099L, 0x95f7997eL, 0x623d7da8L,
+ 0xf837889aL, 0x97e32d77L, 0x11ed935fL, 0x16681281L,
+ 0x0e358829L, 0xc7e61fd6L, 0x96dedfa1L, 0x7858ba99L,
+ 0x57f584a5L, 0x1b227263L, 0x9b83c3ffL, 0x1ac24696L,
+ 0xcdb30aebL, 0x532e3054L, 0x8fd948e4L, 0x6dbc3128L,
+ 0x58ebf2efL, 0x34c6ffeaL, 0xfe28ed61L, 0xee7c3c73L,
+ 0x5d4a14d9L, 0xe864b7e3L, 0x42105d14L, 0x203e13e0L,
+ 0x45eee2b6L, 0xa3aaabeaL, 0xdb6c4f15L, 0xfacb4fd0L,
+ 0xc742f442L, 0xef6abbb5L, 0x654f3b1dL, 0x41cd2105L,
+ 0xd81e799eL, 0x86854dc7L, 0xe44b476aL, 0x3d816250L,
+ 0xcf62a1f2L, 0x5b8d2646L, 0xfc8883a0L, 0xc1c7b6a3L,
+ 0x7f1524c3L, 0x69cb7492L, 0x47848a0bL, 0x5692b285L,
+ 0x095bbf00L, 0xad19489dL, 0x1462b174L, 0x23820e00L,
+ 0x58428d2aL, 0x0c55f5eaL, 0x1dadf43eL, 0x233f7061L,
+ 0x3372f092L, 0x8d937e41L, 0xd65fecf1L, 0x6c223bdbL,
+ 0x7cde3759L, 0xcbee7460L, 0x4085f2a7L, 0xce77326eL,
+ 0xa6078084L, 0x19f8509eL, 0xe8efd855L, 0x61d99735L,
+ 0xa969a7aaL, 0xc50c06c2L, 0x5a04abfcL, 0x800bcadcL,
+ 0x9e447a2eL, 0xc3453484L, 0xfdd56705L, 0x0e1e9ec9L,
+ 0xdb73dbd3L, 0x105588cdL, 0x675fda79L, 0xe3674340L,
+ 0xc5c43465L, 0x713e38d8L, 0x3d28f89eL, 0xf16dff20L,
+ 0x153e21e7L, 0x8fb03d4aL, 0xe6e39f2bL, 0xdb83adf7L,
+ 0xe93d5a68L, 0x948140f7L, 0xf64c261cL, 0x94692934L,
+ 0x411520f7L, 0x7602d4f7L, 0xbcf46b2eL, 0xd4a20068L,
+ 0xd4082471L, 0x3320f46aL, 0x43b7d4b7L, 0x500061afL,
+ 0x1e39f62eL, 0x97244546L, 0x14214f74L, 0xbf8b8840L,
+ 0x4d95fc1dL, 0x96b591afL, 0x70f4ddd3L, 0x66a02f45L,
+ 0xbfbc09ecL, 0x03bd9785L, 0x7fac6dd0L, 0x31cb8504L,
+ 0x96eb27b3L, 0x55fd3941L, 0xda2547e6L, 0xabca0a9aL,
+ 0x28507825L, 0x530429f4L, 0x0a2c86daL, 0xe9b66dfbL,
+ 0x68dc1462L, 0xd7486900L, 0x680ec0a4L, 0x27a18deeL,
+ 0x4f3ffea2L, 0xe887ad8cL, 0xb58ce006L, 0x7af4d6b6L,
+ 0xaace1e7cL, 0xd3375fecL, 0xce78a399L, 0x406b2a42L,
+ 0x20fe9e35L, 0xd9f385b9L, 0xee39d7abL, 0x3b124e8bL,
+ 0x1dc9faf7L, 0x4b6d1856L, 0x26a36631L, 0xeae397b2L,
+ 0x3a6efa74L, 0xdd5b4332L, 0x6841e7f7L, 0xca7820fbL,
+ 0xfb0af54eL, 0xd8feb397L, 0x454056acL, 0xba489527L,
+ 0x55533a3aL, 0x20838d87L, 0xfe6ba9b7L, 0xd096954bL,
+ 0x55a867bcL, 0xa1159a58L, 0xcca92963L, 0x99e1db33L,
+ 0xa62a4a56L, 0x3f3125f9L, 0x5ef47e1cL, 0x9029317cL,
+ 0xfdf8e802L, 0x04272f70L, 0x80bb155cL, 0x05282ce3L,
+ 0x95c11548L, 0xe4c66d22L, 0x48c1133fL, 0xc70f86dcL,
+ 0x07f9c9eeL, 0x41041f0fL, 0x404779a4L, 0x5d886e17L,
+ 0x325f51ebL, 0xd59bc0d1L, 0xf2bcc18fL, 0x41113564L,
+ 0x257b7834L, 0x602a9c60L, 0xdff8e8a3L, 0x1f636c1bL,
+ 0x0e12b4c2L, 0x02e1329eL, 0xaf664fd1L, 0xcad18115L,
+ 0x6b2395e0L, 0x333e92e1L, 0x3b240b62L, 0xeebeb922L,
+ 0x85b2a20eL, 0xe6ba0d99L, 0xde720c8cL, 0x2da2f728L,
+ 0xd0127845L, 0x95b794fdL, 0x647d0862L, 0xe7ccf5f0L,
+ 0x5449a36fL, 0x877d48faL, 0xc39dfd27L, 0xf33e8d1eL,
+ 0x0a476341L, 0x992eff74L, 0x3a6f6eabL, 0xf4f8fd37L,
+ 0xa812dc60L, 0xa1ebddf8L, 0x991be14cL, 0xdb6e6b0dL,
+ 0xc67b5510L, 0x6d672c37L, 0x2765d43bL, 0xdcd0e804L,
+ 0xf1290dc7L, 0xcc00ffa3L, 0xb5390f92L, 0x690fed0bL,
+ 0x667b9ffbL, 0xcedb7d9cL, 0xa091cf0bL, 0xd9155ea3L,
+ 0xbb132f88L, 0x515bad24L, 0x7b9479bfL, 0x763bd6ebL,
+ 0x37392eb3L, 0xcc115979L, 0x8026e297L, 0xf42e312dL,
+ 0x6842ada7L, 0xc66a2b3bL, 0x12754cccL, 0x782ef11cL,
+ 0x6a124237L, 0xb79251e7L, 0x06a1bbe6L, 0x4bfb6350L,
+ 0x1a6b1018L, 0x11caedfaL, 0x3d25bdd8L, 0xe2e1c3c9L,
+ 0x44421659L, 0x0a121386L, 0xd90cec6eL, 0xd5abea2aL,
+ 0x64af674eL, 0xda86a85fL, 0xbebfe988L, 0x64e4c3feL,
+ 0x9dbc8057L, 0xf0f7c086L, 0x60787bf8L, 0x6003604dL,
+ 0xd1fd8346L, 0xf6381fb0L, 0x7745ae04L, 0xd736fcccL,
+ 0x83426b33L, 0xf01eab71L, 0xb0804187L, 0x3c005e5fL,
+ 0x77a057beL, 0xbde8ae24L, 0x55464299L, 0xbf582e61L,
+ 0x4e58f48fL, 0xf2ddfda2L, 0xf474ef38L, 0x8789bdc2L,
+ 0x5366f9c3L, 0xc8b38e74L, 0xb475f255L, 0x46fcd9b9L,
+ 0x7aeb2661L, 0x8b1ddf84L, 0x846a0e79L, 0x915f95e2L,
+ 0x466e598eL, 0x20b45770L, 0x8cd55591L, 0xc902de4cL,
+ 0xb90bace1L, 0xbb8205d0L, 0x11a86248L, 0x7574a99eL,
+ 0xb77f19b6L, 0xe0a9dc09L, 0x662d09a1L, 0xc4324633L,
+ 0xe85a1f02L, 0x09f0be8cL, 0x4a99a025L, 0x1d6efe10L,
+ 0x1ab93d1dL, 0x0ba5a4dfL, 0xa186f20fL, 0x2868f169L,
+ 0xdcb7da83L, 0x573906feL, 0xa1e2ce9bL, 0x4fcd7f52L,
+ 0x50115e01L, 0xa70683faL, 0xa002b5c4L, 0x0de6d027L,
+ 0x9af88c27L, 0x773f8641L, 0xc3604c06L, 0x61a806b5L,
+ 0xf0177a28L, 0xc0f586e0L, 0x006058aaL, 0x30dc7d62L,
+ 0x11e69ed7L, 0x2338ea63L, 0x53c2dd94L, 0xc2c21634L,
+ 0xbbcbee56L, 0x90bcb6deL, 0xebfc7da1L, 0xce591d76L,
+ 0x6f05e409L, 0x4b7c0188L, 0x39720a3dL, 0x7c927c24L,
+ 0x86e3725fL, 0x724d9db9L, 0x1ac15bb4L, 0xd39eb8fcL,
+ 0xed545578L, 0x08fca5b5L, 0xd83d7cd3L, 0x4dad0fc4L,
+ 0x1e50ef5eL, 0xb161e6f8L, 0xa28514d9L, 0x6c51133cL,
+ 0x6fd5c7e7L, 0x56e14ec4L, 0x362abfceL, 0xddc6c837L,
+ 0xd79a3234L, 0x92638212L, 0x670efa8eL, 0x406000e0L,
+ 0x3a39ce37L, 0xd3faf5cfL, 0xabc27737L, 0x5ac52d1bL,
+ 0x5cb0679eL, 0x4fa33742L, 0xd3822740L, 0x99bc9bbeL,
+ 0xd5118e9dL, 0xbf0f7315L, 0xd62d1c7eL, 0xc700c47bL,
+ 0xb78c1b6bL, 0x21a19045L, 0xb26eb1beL, 0x6a366eb4L,
+ 0x5748ab2fL, 0xbc946e79L, 0xc6a376d2L, 0x6549c2c8L,
+ 0x530ff8eeL, 0x468dde7dL, 0xd5730a1dL, 0x4cd04dc6L,
+ 0x2939bbdbL, 0xa9ba4650L, 0xac9526e8L, 0xbe5ee304L,
+ 0xa1fad5f0L, 0x6a2d519aL, 0x63ef8ce2L, 0x9a86ee22L,
+ 0xc089c2b8L, 0x43242ef6L, 0xa51e03aaL, 0x9cf2d0a4L,
+ 0x83c061baL, 0x9be96a4dL, 0x8fe51550L, 0xba645bd6L,
+ 0x2826a2f9L, 0xa73a3ae1L, 0x4ba99586L, 0xef5562e9L,
+ 0xc72fefd3L, 0xf752f7daL, 0x3f046f69L, 0x77fa0a59L,
+ 0x80e4a915L, 0x87b08601L, 0x9b09e6adL, 0x3b3ee593L,
+ 0xe990fd5aL, 0x9e34d797L, 0x2cf0b7d9L, 0x022b8b51L,
+ 0x96d5ac3aL, 0x017da67dL, 0xd1cf3ed6L, 0x7c7d2d28L,
+ 0x1f9f25cfL, 0xadf2b89bL, 0x5ad6b472L, 0x5a88f54cL,
+ 0xe029ac71L, 0xe019a5e6L, 0x47b0acfdL, 0xed93fa9bL,
+ 0xe8d3c48dL, 0x283b57ccL, 0xf8d56629L, 0x79132e28L,
+ 0x785f0191L, 0xed756055L, 0xf7960e44L, 0xe3d35e8cL,
+ 0x15056dd4L, 0x88f46dbaL, 0x03a16125L, 0x0564f0bdL,
+ 0xc3eb9e15L, 0x3c9057a2L, 0x97271aecL, 0xa93a072aL,
+ 0x1b3f6d9bL, 0x1e6321f5L, 0xf59c66fbL, 0x26dcf319L,
+ 0x7533d928L, 0xb155fdf5L, 0x03563482L, 0x8aba3cbbL,
+ 0x28517711L, 0xc20ad9f8L, 0xabcc5167L, 0xccad925fL,
+ 0x4de81751L, 0x3830dc8eL, 0x379d5862L, 0x9320f991L,
+ 0xea7a90c2L, 0xfb3e7bceL, 0x5121ce64L, 0x774fbe32L,
+ 0xa8b6e37eL, 0xc3293d46L, 0x48de5369L, 0x6413e680L,
+ 0xa2ae0810L, 0xdd6db224L, 0x69852dfdL, 0x09072166L,
+ 0xb39a460aL, 0x6445c0ddL, 0x586cdecfL, 0x1c20c8aeL,
+ 0x5bbef7ddL, 0x1b588d40L, 0xccd2017fL, 0x6bb4e3bbL,
+ 0xdda26a7eL, 0x3a59ff45L, 0x3e350a44L, 0xbcb4cdd5L,
+ 0x72eacea8L, 0xfa6484bbL, 0x8d6612aeL, 0xbf3c6f47L,
+ 0xd29be463L, 0x542f5d9eL, 0xaec2771bL, 0xf64e6370L,
+ 0x740e0d8dL, 0xe75b1357L, 0xf8721671L, 0xaf537d5dL,
+ 0x4040cb08L, 0x4eb4e2ccL, 0x34d2466aL, 0x0115af84L,
+ 0xe1b00428L, 0x95983a1dL, 0x06b89fb4L, 0xce6ea048L,
+ 0x6f3f3b82L, 0x3520ab82L, 0x011a1d4bL, 0x277227f8L,
+ 0x611560b1L, 0xe7933fdcL, 0xbb3a792bL, 0x344525bdL,
+ 0xa08839e1L, 0x51ce794bL, 0x2f32c9b7L, 0xa01fbac9L,
+ 0xe01cc87eL, 0xbcc7d1f6L, 0xcf0111c3L, 0xa1e8aac7L,
+ 0x1a908749L, 0xd44fbd9aL, 0xd0dadecbL, 0xd50ada38L,
+ 0x0339c32aL, 0xc6913667L, 0x8df9317cL, 0xe0b12b4fL,
+ 0xf79e59b7L, 0x43f5bb3aL, 0xf2d519ffL, 0x27d9459cL,
+ 0xbf97222cL, 0x15e6fc2aL, 0x0f91fc71L, 0x9b941525L,
+ 0xfae59361L, 0xceb69cebL, 0xc2a86459L, 0x12baa8d1L,
+ 0xb6c1075eL, 0xe3056a0cL, 0x10d25065L, 0xcb03a442L,
+ 0xe0ec6e0eL, 0x1698db3bL, 0x4c98a0beL, 0x3278e964L,
+ 0x9f1f9532L, 0xe0d392dfL, 0xd3a0342bL, 0x8971f21eL,
+ 0x1b0a7441L, 0x4ba3348cL, 0xc5be7120L, 0xc37632d8L,
+ 0xdf359f8dL, 0x9b992f2eL, 0xe60b6f47L, 0x0fe3f11dL,
+ 0xe54cda54L, 0x1edad891L, 0xce6279cfL, 0xcd3e7e6fL,
+ 0x1618b166L, 0xfd2c1d05L, 0x848fd2c5L, 0xf6fb2299L,
+ 0xf523f357L, 0xa6327623L, 0x93a83531L, 0x56cccd02L,
+ 0xacf08162L, 0x5a75ebb5L, 0x6e163697L, 0x88d273ccL,
+ 0xde966292L, 0x81b949d0L, 0x4c50901bL, 0x71c65614L,
+ 0xe6c6c7bdL, 0x327a140aL, 0x45e1d006L, 0xc3f27b9aL,
+ 0xc9aa53fdL, 0x62a80f00L, 0xbb25bfe2L, 0x35bdd2f6L,
+ 0x71126905L, 0xb2040222L, 0xb6cbcf7cL, 0xcd769c2bL,
+ 0x53113ec0L, 0x1640e3d3L, 0x38abbd60L, 0x2547adf0L,
+ 0xba38209cL, 0xf746ce76L, 0x77afa1c5L, 0x20756060L,
+ 0x85cbfe4eL, 0x8ae88dd8L, 0x7aaaf9b0L, 0x4cf9aa7eL,
+ 0x1948c25cL, 0x02fb8a8cL, 0x01c36ae4L, 0xd6ebe1f9L,
+ 0x90d4f869L, 0xa65cdea0L, 0x3f09252dL, 0xc208e69fL,
+ 0xb74e6132L, 0xce77e25bL, 0x578fdfe3L, 0x3ac372e6L,
+ }
+ };
diff --git a/crypto/libressl/crypto/bf/bf_skey.c b/crypto/libressl/crypto/bf/bf_skey.c
new file mode 100644
index 0000000..8191d17
--- /dev/null
+++ b/crypto/libressl/crypto/bf/bf_skey.c
@@ -0,0 +1,117 @@
+/* $OpenBSD: bf_skey.c,v 1.12 2014/06/12 15:49:28 deraadt Exp $ */
+/* Copyright (C) 1995-1998 Eric Young (
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young ("
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson ("
+ *
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+#include <stdio.h>
+#include <string.h>
+#include <openssl/crypto.h>
+#include <openssl/blowfish.h>
+#include "bf_locl.h"
+#include "bf_pi.h"
+void BF_set_key(BF_KEY *key, int len, const unsigned char *data)
+ {
+ int i;
+ BF_LONG *p,ri,in[2];
+ const unsigned char *d,*end;
+ memcpy(key,&bf_init,sizeof(BF_KEY));
+ p=key->P;
+ if (len > ((BF_ROUNDS+2)*4)) len=(BF_ROUNDS+2)*4;
+ d=data;
+ end= &(data[len]);
+ for (i=0; i<(BF_ROUNDS+2); i++)
+ {
+ ri= *(d++);
+ if (d >= end) d=data;
+ ri<<=8;
+ ri|= *(d++);
+ if (d >= end) d=data;
+ ri<<=8;
+ ri|= *(d++);
+ if (d >= end) d=data;
+ ri<<=8;
+ ri|= *(d++);
+ if (d >= end) d=data;
+ p[i]^=ri;
+ }
+ in[0]=0L;
+ in[1]=0L;
+ for (i=0; i<(BF_ROUNDS+2); i+=2)
+ {
+ BF_encrypt(in,key);
+ p[i ]=in[0];
+ p[i+1]=in[1];
+ }
+ p=key->S;
+ for (i=0; i<4*256; i+=2)
+ {
+ BF_encrypt(in,key);
+ p[i ]=in[0];
+ p[i+1]=in[1];
+ }
+ }
diff --git a/crypto/libressl/crypto/chacha/Makefile b/crypto/libressl/crypto/chacha/Makefile
index 9f7aa3a..2eb56cb 100644
--- a/crypto/libressl/crypto/chacha/Makefile
+++ b/crypto/libressl/crypto/chacha/Makefile
@@ -1,4 +1,4 @@
+include ../../
obj = chacha.o
diff --git a/crypto/libressl/crypto/compat/Makefile b/crypto/libressl/crypto/compat/Makefile
index 7fd5ea1..00e3a67 100644
--- a/crypto/libressl/crypto/compat/Makefile
+++ b/crypto/libressl/crypto/compat/Makefile
@@ -1,4 +1,4 @@
+include ../../
obj = arc4random.o explicit_bzero.o timingsafe_bcmp.o timingsafe_memcmp.o
diff --git a/crypto/libressl/crypto/compat/arc4random.c b/crypto/libressl/crypto/compat/arc4random.c
index ac41fb3..67a47f6 100644
--- a/crypto/libressl/crypto/compat/arc4random.c
+++ b/crypto/libressl/crypto/compat/arc4random.c
@@ -84,6 +84,9 @@ _rs_init(u_char *buf, size_t n)
if (rs == NULL) {
if (_rs_allocate(&rs, &rsx) == -1)
+#if defined(__FE310__)
+ rs->rs_blocks = (RSBLKS - 1);
chacha_keysetup(&rsx->rs_chacha, buf, KEYSZ * 8, 0);
@@ -109,9 +112,6 @@ _rs_stir(void)
memset(rsx->rs_buf, 0, sizeof(rsx->rs_buf));
rs->rs_count = 1600000;
-#if defined(__FE310__)
- rs->rs_blocks = RSBLKS;
static inline void
@@ -145,7 +145,7 @@ _rs_rekey(u_char *dat, size_t datlen)
#if defined(__FE310__)
if (dat || (rs->rs_blocks == 0)) {
- rs->rs_blocks = RSBLKS;
+ rs->rs_blocks = (RSBLKS - 1);
} else {
rs->rs_have = sizeof(rsx->rs_buf);
diff --git a/crypto/libressl/crypto/curve25519/Makefile b/crypto/libressl/crypto/curve25519/Makefile
index 47696b1..459383d 100644
--- a/crypto/libressl/crypto/curve25519/Makefile
+++ b/crypto/libressl/crypto/curve25519/Makefile
@@ -1,4 +1,4 @@
+include ../../
obj = curve25519.o curve25519-generic.o
diff --git a/crypto/libressl/crypto/modes/Makefile b/crypto/libressl/crypto/modes/Makefile
new file mode 100644
index 0000000..aeba042
--- /dev/null
+++ b/crypto/libressl/crypto/modes/Makefile
@@ -0,0 +1,14 @@
+include ../../
+obj = cbc128.o ccm128.o cfb128.o ctr128.o cts128.o gcm128.o ofb128.o xts128.o
+all: $(obj)
+dep: all
+%.o: %.c
+ $(CC) $(CFLAGS) -c $<
+ rm -f *.o *.a
diff --git a/crypto/libressl/crypto/modes/cbc128.c b/crypto/libressl/crypto/modes/cbc128.c
new file mode 100644
index 0000000..7502a48
--- /dev/null
+++ b/crypto/libressl/crypto/modes/cbc128.c
@@ -0,0 +1,202 @@
+/* $OpenBSD: cbc128.c,v 1.4 2015/02/10 09:46:30 miod Exp $ */
+/* ====================================================================
+ * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. ("
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ *
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit ("
+ *
+ * ====================================================================
+ *
+ */
+#include <openssl/crypto.h>
+#include "modes_lcl.h"
+#include <string.h>
+#ifndef MODES_DEBUG
+# ifndef NDEBUG
+# define NDEBUG
+# endif
+void CRYPTO_cbc128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], block128_f block)
+ size_t n;
+ const unsigned char *iv = ivec;
+ ((size_t)in|(size_t)out|(size_t)ivec)%sizeof(size_t) != 0) {
+ while (len>=16) {
+ for(n=0; n<16; ++n)
+ out[n] = in[n] ^ iv[n];
+ (*block)(out, out, key);
+ iv = out;
+ len -= 16;
+ in += 16;
+ out += 16;
+ }
+ } else {
+ while (len>=16) {
+ for(n=0; n<16; n+=sizeof(size_t))
+ *(size_t*)(out+n) =
+ *(size_t*)(in+n) ^ *(size_t*)(iv+n);
+ (*block)(out, out, key);
+ iv = out;
+ len -= 16;
+ in += 16;
+ out += 16;
+ }
+ }
+ while (len) {
+ for(n=0; n<16 && n<len; ++n)
+ out[n] = in[n] ^ iv[n];
+ for(; n<16; ++n)
+ out[n] = iv[n];
+ (*block)(out, out, key);
+ iv = out;
+ if (len<=16) break;
+ len -= 16;
+ in += 16;
+ out += 16;
+ }
+ memcpy(ivec,iv,16);
+void CRYPTO_cbc128_decrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], block128_f block)
+ size_t n;
+ union { size_t t[16/sizeof(size_t)]; unsigned char c[16]; } tmp;
+ if (in != out) {
+ const unsigned char *iv = ivec;
+ ((size_t)in|(size_t)out|(size_t)ivec)%sizeof(size_t) != 0) {
+ while (len>=16) {
+ (*block)(in, out, key);
+ for(n=0; n<16; ++n)
+ out[n] ^= iv[n];
+ iv = in;
+ len -= 16;
+ in += 16;
+ out += 16;
+ }
+ } else if (16%sizeof(size_t) == 0) { /* always true */
+ while (len>=16) {
+ size_t *out_t=(size_t *)out, *iv_t=(size_t *)iv;
+ (*block)(in, out, key);
+ for(n=0; n<16/sizeof(size_t); n++)
+ out_t[n] ^= iv_t[n];
+ iv = in;
+ len -= 16;
+ in += 16;
+ out += 16;
+ }
+ }
+ memcpy(ivec,iv,16);
+ } else {
+ ((size_t)in|(size_t)out|(size_t)ivec)%sizeof(size_t) != 0) {
+ unsigned char c;
+ while (len>=16) {
+ (*block)(in, tmp.c, key);
+ for(n=0; n<16; ++n) {
+ c = in[n];
+ out[n] = tmp.c[n] ^ ivec[n];
+ ivec[n] = c;
+ }
+ len -= 16;
+ in += 16;
+ out += 16;
+ }
+ } else if (16%sizeof(size_t) == 0) { /* always true */
+ while (len>=16) {
+ size_t c, *out_t=(size_t *)out, *ivec_t=(size_t *)ivec;
+ const size_t *in_t=(const size_t *)in;
+ (*block)(in, tmp.c, key);
+ for(n=0; n<16/sizeof(size_t); n++) {
+ c = in_t[n];
+ out_t[n] = tmp.t[n] ^ ivec_t[n];
+ ivec_t[n] = c;
+ }
+ len -= 16;
+ in += 16;
+ out += 16;
+ }
+ }
+ }
+ while (len) {
+ unsigned char c;
+ (*block)(in, tmp.c, key);
+ for(n=0; n<16 && n<len; ++n) {
+ c = in[n];
+ out[n] = tmp.c[n] ^ ivec[n];
+ ivec[n] = c;
+ }
+ if (len<=16) {
+ for (; n<16; ++n)
+ ivec[n] = in[n];
+ break;
+ }
+ len -= 16;
+ in += 16;
+ out += 16;
+ }
diff --git a/crypto/libressl/crypto/modes/ccm128.c b/crypto/libressl/crypto/modes/ccm128.c
new file mode 100644
index 0000000..ffeb4e4
--- /dev/null
+++ b/crypto/libressl/crypto/modes/ccm128.c
@@ -0,0 +1,441 @@
+/* $OpenBSD: ccm128.c,v 1.5 2019/05/08 14:18:25 tb Exp $ */
+/* ====================================================================
+ * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. ("
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ *
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit ("
+ *
+ * ====================================================================
+ */
+#include <openssl/crypto.h>
+#include "modes_lcl.h"
+#include <string.h>
+#ifndef MODES_DEBUG
+# ifndef NDEBUG
+# define NDEBUG
+# endif
+/* First you setup M and L parameters and pass the key schedule.
+ * This is called once per session setup... */
+void CRYPTO_ccm128_init(CCM128_CONTEXT *ctx,
+ unsigned int M,unsigned int L,void *key,block128_f block)
+ memset(ctx->nonce.c,0,sizeof(ctx->nonce.c));
+ ctx->nonce.c[0] = ((u8)(L-1)&7) | (u8)(((M-2)/2)&7)<<3;
+ ctx->blocks = 0;
+ ctx->block = block;
+ ctx->key = key;
+/* !!! Following interfaces are to be called *once* per packet !!! */
+/* Then you setup per-message nonce and pass the length of the message */
+int CRYPTO_ccm128_setiv(CCM128_CONTEXT *ctx,
+ const unsigned char *nonce,size_t nlen,size_t mlen)
+ unsigned int L = ctx->nonce.c[0]&7; /* the L parameter */
+ if (nlen<(14-L)) return -1; /* nonce is too short */
+ if (sizeof(mlen)==8 && L>=3) {
+ ctx->nonce.c[8] = (u8)(mlen>>(56%(sizeof(mlen)*8)));
+ ctx->nonce.c[9] = (u8)(mlen>>(48%(sizeof(mlen)*8)));
+ ctx->nonce.c[10] = (u8)(mlen>>(40%(sizeof(mlen)*8)));
+ ctx->nonce.c[11] = (u8)(mlen>>(32%(sizeof(mlen)*8)));
+ }
+ else
+ ctx->nonce.u[1] = 0;
+ ctx->nonce.c[12] = (u8)(mlen>>24);
+ ctx->nonce.c[13] = (u8)(mlen>>16);
+ ctx->nonce.c[14] = (u8)(mlen>>8);
+ ctx->nonce.c[15] = (u8)mlen;
+ ctx->nonce.c[0] &= ~0x40; /* clear Adata flag */
+ memcpy(&ctx->nonce.c[1],nonce,14-L);
+ return 0;
+/* Then you pass additional authentication data, this is optional */
+void CRYPTO_ccm128_aad(CCM128_CONTEXT *ctx,
+ const unsigned char *aad,size_t alen)
+{ unsigned int i;
+ block128_f block = ctx->block;
+ if (alen==0) return;
+ ctx->nonce.c[0] |= 0x40; /* set Adata flag */
+ (*block)(ctx->nonce.c,ctx->cmac.c,ctx->key),
+ ctx->blocks++;
+ if (alen<(0x10000-0x100)) {
+ ctx->cmac.c[0] ^= (u8)(alen>>8);
+ ctx->cmac.c[1] ^= (u8)alen;
+ i=2;
+ }
+ else if (sizeof(alen)==8 && alen>=(size_t)1<<(32%(sizeof(alen)*8))) {
+ ctx->cmac.c[0] ^= 0xFF;
+ ctx->cmac.c[1] ^= 0xFF;
+ ctx->cmac.c[2] ^= (u8)(alen>>(56%(sizeof(alen)*8)));
+ ctx->cmac.c[3] ^= (u8)(alen>>(48%(sizeof(alen)*8)));
+ ctx->cmac.c[4] ^= (u8)(alen>>(40%(sizeof(alen)*8)));
+ ctx->cmac.c[5] ^= (u8)(alen>>(32%(sizeof(alen)*8)));
+ ctx->cmac.c[6] ^= (u8)(alen>>24);
+ ctx->cmac.c[7] ^= (u8)(alen>>16);
+ ctx->cmac.c[8] ^= (u8)(alen>>8);
+ ctx->cmac.c[9] ^= (u8)alen;
+ i=10;
+ }
+ else {
+ ctx->cmac.c[0] ^= 0xFF;
+ ctx->cmac.c[1] ^= 0xFE;
+ ctx->cmac.c[2] ^= (u8)(alen>>24);
+ ctx->cmac.c[3] ^= (u8)(alen>>16);
+ ctx->cmac.c[4] ^= (u8)(alen>>8);
+ ctx->cmac.c[5] ^= (u8)alen;
+ i=6;
+ }
+ do {
+ for(;i<16 && alen;++i,++aad,--alen)
+ ctx->cmac.c[i] ^= *aad;
+ (*block)(ctx->cmac.c,ctx->cmac.c,ctx->key),
+ ctx->blocks++;
+ i=0;
+ } while (alen);
+/* Finally you encrypt or decrypt the message */
+/* counter part of nonce may not be larger than L*8 bits,
+ * L is not larger than 8, therefore 64-bit counter... */
+static void ctr64_inc(unsigned char *counter) {
+ unsigned int n=8;
+ u8 c;
+ counter += 8;
+ do {
+ --n;
+ c = counter[n];
+ ++c;
+ counter[n] = c;
+ if (c) return;
+ } while (n);
+int CRYPTO_ccm128_encrypt(CCM128_CONTEXT *ctx,
+ const unsigned char *inp, unsigned char *out,
+ size_t len)
+ size_t n;
+ unsigned int i,L;
+ unsigned char flags0 = ctx->nonce.c[0];
+ block128_f block = ctx->block;
+ void * key = ctx->key;
+ union { u64 u[2]; u8 c[16]; } scratch;
+ if (!(flags0&0x40))
+ (*block)(ctx->nonce.c,ctx->cmac.c,key),
+ ctx->blocks++;
+ ctx->nonce.c[0] = L = flags0&7;
+ for (n=0,i=15-L;i<15;++i) {
+ n |= ctx->nonce.c[i];
+ ctx->nonce.c[i]=0;
+ n <<= 8;
+ }
+ n |= ctx->nonce.c[15]; /* reconstructed length */
+ ctx->nonce.c[15]=1;
+ if (n!=len) return -1; /* length mismatch */
+ ctx->blocks += ((len+15)>>3)|1;
+ if (ctx->blocks > (U64(1)<<61)) return -2; /* too much data */
+ while (len>=16) {
+ union { u64 u[2]; u8 c[16]; } temp;
+ memcpy (temp.c,inp,16);
+ ctx->cmac.u[0] ^= temp.u[0];
+ ctx->cmac.u[1] ^= temp.u[1];
+ ctx->cmac.u[0] ^= ((u64*)inp)[0];
+ ctx->cmac.u[1] ^= ((u64*)inp)[1];
+ (*block)(ctx->cmac.c,ctx->cmac.c,key);
+ (*block)(ctx->nonce.c,scratch.c,key);
+ ctr64_inc(ctx->nonce.c);
+ temp.u[0] ^= scratch.u[0];
+ temp.u[1] ^= scratch.u[1];
+ memcpy(out,temp.c,16);
+ ((u64*)out)[0] = scratch.u[0]^((u64*)inp)[0];
+ ((u64*)out)[1] = scratch.u[1]^((u64*)inp)[1];
+ inp += 16;
+ out += 16;
+ len -= 16;
+ }
+ if (len) {
+ for (i=0; i<len; ++i) ctx->cmac.c[i] ^= inp[i];
+ (*block)(ctx->cmac.c,ctx->cmac.c,key);
+ (*block)(ctx->nonce.c,scratch.c,key);
+ for (i=0; i<len; ++i) out[i] = scratch.c[i]^inp[i];
+ }
+ for (i=15-L;i<16;++i)
+ ctx->nonce.c[i]=0;
+ (*block)(ctx->nonce.c,scratch.c,key);
+ ctx->cmac.u[0] ^= scratch.u[0];
+ ctx->cmac.u[1] ^= scratch.u[1];
+ ctx->nonce.c[0] = flags0;
+ return 0;
+int CRYPTO_ccm128_decrypt(CCM128_CONTEXT *ctx,
+ const unsigned char *inp, unsigned char *out,
+ size_t len)
+ size_t n;
+ unsigned int i,L;
+ unsigned char flags0 = ctx->nonce.c[0];
+ block128_f block = ctx->block;
+ void * key = ctx->key;
+ union { u64 u[2]; u8 c[16]; } scratch;
+ if (!(flags0&0x40))
+ (*block)(ctx->nonce.c,ctx->cmac.c,key);
+ ctx->nonce.c[0] = L = flags0&7;
+ for (n=0,i=15-L;i<15;++i) {
+ n |= ctx->nonce.c[i];
+ ctx->nonce.c[i]=0;
+ n <<= 8;
+ }
+ n |= ctx->nonce.c[15]; /* reconstructed length */
+ ctx->nonce.c[15]=1;
+ if (n!=len) return -1;
+ while (len>=16) {
+ union { u64 u[2]; u8 c[16]; } temp;
+ (*block)(ctx->nonce.c,scratch.c,key);
+ ctr64_inc(ctx->nonce.c);
+ memcpy (temp.c,inp,16);
+ ctx->cmac.u[0] ^= (scratch.u[0] ^= temp.u[0]);
+ ctx->cmac.u[1] ^= (scratch.u[1] ^= temp.u[1]);
+ memcpy (out,scratch.c,16);
+ ctx->cmac.u[0] ^= (((u64*)out)[0] = scratch.u[0]^((u64*)inp)[0]);
+ ctx->cmac.u[1] ^= (((u64*)out)[1] = scratch.u[1]^((u64*)inp)[1]);
+ (*block)(ctx->cmac.c,ctx->cmac.c,key);
+ inp += 16;
+ out += 16;
+ len -= 16;
+ }
+ if (len) {
+ (*block)(ctx->nonce.c,scratch.c,key);
+ for (i=0; i<len; ++i)
+ ctx->cmac.c[i] ^= (out[i] = scratch.c[i]^inp[i]);
+ (*block)(ctx->cmac.c,ctx->cmac.c,key);
+ }
+ for (i=15-L;i<16;++i)
+ ctx->nonce.c[i]=0;
+ (*block)(ctx->nonce.c,scratch.c,key);
+ ctx->cmac.u[0] ^= scratch.u[0];
+ ctx->cmac.u[1] ^= scratch.u[1];
+ ctx->nonce.c[0] = flags0;
+ return 0;
+static void ctr64_add (unsigned char *counter,size_t inc)
+{ size_t n=8, val=0;
+ counter += 8;
+ do {
+ --n;
+ val += counter[n] + (inc&0xff);
+ counter[n] = (unsigned char)val;
+ val >>= 8; /* carry bit */
+ inc >>= 8;
+ } while(n && (inc || val));
+int CRYPTO_ccm128_encrypt_ccm64(CCM128_CONTEXT *ctx,
+ const unsigned char *inp, unsigned char *out,
+ size_t len,ccm128_f stream)
+ size_t n;
+ unsigned int i,L;
+ unsigned char flags0 = ctx->nonce.c[0];
+ block128_f block = ctx->block;
+ void * key = ctx->key;
+ union { u64 u[2]; u8 c[16]; } scratch;
+ if (!(flags0&0x40))
+ (*block)(ctx->nonce.c,ctx->cmac.c,key),
+ ctx->blocks++;
+ ctx->nonce.c[0] = L = flags0&7;
+ for (n=0,i=15-L;i<15;++i) {
+ n |= ctx->nonce.c[i];
+ ctx->nonce.c[i]=0;
+ n <<= 8;
+ }
+ n |= ctx->nonce.c[15]; /* reconstructed length */
+ ctx->nonce.c[15]=1;
+ if (n!=len) return -1; /* length mismatch */
+ ctx->blocks += ((len+15)>>3)|1;
+ if (ctx->blocks > (U64(1)<<61)) return -2; /* too much data */
+ if ((n=len/16)) {
+ (*stream)(inp,out,n,key,ctx->nonce.c,ctx->cmac.c);
+ n *= 16;
+ inp += n;
+ out += n;
+ len -= n;
+ if (len) ctr64_add(ctx->nonce.c,n/16);
+ }
+ if (len) {
+ for (i=0; i<len; ++i) ctx->cmac.c[i] ^= inp[i];
+ (*block)(ctx->cmac.c,ctx->cmac.c,key);
+ (*block)(ctx->nonce.c,scratch.c,key);
+ for (i=0; i<len; ++i) out[i] = scratch.c[i]^inp[i];
+ }
+ for (i=15-L;i<16;++i)
+ ctx->nonce.c[i]=0;
+ (*block)(ctx->nonce.c,scratch.c,key);
+ ctx->cmac.u[0] ^= scratch.u[0];
+ ctx->cmac.u[1] ^= scratch.u[1];
+ ctx->nonce.c[0] = flags0;
+ return 0;
+int CRYPTO_ccm128_decrypt_ccm64(CCM128_CONTEXT *ctx,
+ const unsigned char *inp, unsigned char *out,
+ size_t len,ccm128_f stream)
+ size_t n;
+ unsigned int i,L;
+ unsigned char flags0 = ctx->nonce.c[0];
+ block128_f block = ctx->block;
+ void * key = ctx->key;
+ union { u64 u[2]; u8 c[16]; } scratch;
+ if (!(flags0&0x40))
+ (*block)(ctx->nonce.c,ctx->cmac.c,key);
+ ctx->nonce.c[0] = L = flags0&7;
+ for (n=0,i=15-L;i<15;++i) {
+ n |= ctx->nonce.c[i];
+ ctx->nonce.c[i]=0;
+ n <<= 8;
+ }
+ n |= ctx->nonce.c[15]; /* reconstructed length */
+ ctx->nonce.c[15]=1;
+ if (n!=len) return -1;
+ if ((n=len/16)) {
+ (*stream)(inp,out,n,key,ctx->nonce.c,ctx->cmac.c);
+ n *= 16;
+ inp += n;
+ out += n;
+ len -= n;
+ if (len) ctr64_add(ctx->nonce.c,n/16);
+ }
+ if (len) {
+ (*block)(ctx->nonce.c,scratch.c,key);
+ for (i=0; i<len; ++i)
+ ctx->cmac.c[i] ^= (out[i] = scratch.c[i]^inp[i]);
+ (*block)(ctx->cmac.c,ctx->cmac.c,key);
+ }
+ for (i=15-L;i<16;++i)
+ ctx->nonce.c[i]=0;
+ (*block)(ctx->nonce.c,scratch.c,key);
+ ctx->cmac.u[0] ^= scratch.u[0];
+ ctx->cmac.u[1] ^= scratch.u[1];
+ ctx->nonce.c[0] = flags0;
+ return 0;
+size_t CRYPTO_ccm128_tag(CCM128_CONTEXT *ctx,unsigned char *tag,size_t len)
+{ unsigned int M = (ctx->nonce.c[0]>>3)&7; /* the M parameter */
+ M *= 2; M += 2;
+ if (len != M) return 0;
+ memcpy(tag,ctx->cmac.c,M);
+ return M;
diff --git a/crypto/libressl/crypto/modes/cfb128.c b/crypto/libressl/crypto/modes/cfb128.c
new file mode 100644
index 0000000..88bfbc4
--- /dev/null
+++ b/crypto/libressl/crypto/modes/cfb128.c
@@ -0,0 +1,234 @@
+/* $OpenBSD: cfb128.c,v 1.4 2015/02/10 09:46:30 miod Exp $ */
+/* ====================================================================
+ * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. ("
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ *
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit ("
+ *
+ * ====================================================================
+ *
+ */
+#include <openssl/crypto.h>
+#include "modes_lcl.h"
+#include <string.h>
+#ifndef MODES_DEBUG
+# ifndef NDEBUG
+# define NDEBUG
+# endif
+/* The input and output encrypted as though 128bit cfb mode is being
+ * used. The extra state information to record how much of the
+ * 128bit block we have used is contained in *num;
+ */
+void CRYPTO_cfb128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], int *num,
+ int enc, block128_f block)
+ unsigned int n;
+ size_t l = 0;
+ n = *num;
+ if (enc) {
+ if (16%sizeof(size_t) == 0) do { /* always true actually */
+ while (n && len) {
+ *(out++) = ivec[n] ^= *(in++);
+ --len;
+ n = (n+1) % 16;
+ }
+ if (((size_t)in|(size_t)out|(size_t)ivec)%sizeof(size_t) != 0)
+ break;
+ while (len>=16) {
+ (*block)(ivec, ivec, key);
+ for (; n<16; n+=sizeof(size_t)) {
+ *(size_t*)(out+n) =
+ *(size_t*)(ivec+n) ^= *(size_t*)(in+n);
+ }
+ len -= 16;
+ out += 16;
+ in += 16;
+ n = 0;
+ }
+ if (len) {
+ (*block)(ivec, ivec, key);
+ while (len--) {
+ out[n] = ivec[n] ^= in[n];
+ ++n;
+ }
+ }
+ *num = n;
+ return;
+ } while (0);
+ /* the rest would be commonly eliminated by x86* compiler */
+ while (l<len) {
+ if (n == 0) {
+ (*block)(ivec, ivec, key);
+ }
+ out[l] = ivec[n] ^= in[l];
+ ++l;
+ n = (n+1) % 16;
+ }
+ *num = n;
+ } else {
+ if (16%sizeof(size_t) == 0) do { /* always true actually */
+ while (n && len) {
+ unsigned char c;
+ *(out++) = ivec[n] ^ (c = *(in++)); ivec[n] = c;
+ --len;
+ n = (n+1) % 16;
+ }
+ if (((size_t)in|(size_t)out|(size_t)ivec)%sizeof(size_t) != 0)
+ break;
+ while (len>=16) {
+ (*block)(ivec, ivec, key);
+ for (; n<16; n+=sizeof(size_t)) {
+ size_t t = *(size_t*)(in+n);
+ *(size_t*)(out+n) = *(size_t*)(ivec+n) ^ t;
+ *(size_t*)(ivec+n) = t;
+ }
+ len -= 16;
+ out += 16;
+ in += 16;
+ n = 0;
+ }
+ if (len) {
+ (*block)(ivec, ivec, key);
+ while (len--) {
+ unsigned char c;
+ out[n] = ivec[n] ^ (c = in[n]); ivec[n] = c;
+ ++n;
+ }
+ }
+ *num = n;
+ return;
+ } while (0);
+ /* the rest would be commonly eliminated by x86* compiler */
+ while (l<len) {
+ unsigned char c;
+ if (n == 0) {
+ (*block)(ivec, ivec, key);
+ }
+ out[l] = ivec[n] ^ (c = in[l]); ivec[n] = c;
+ ++l;
+ n = (n+1) % 16;
+ }
+ *num=n;
+ }
+/* This expects a single block of size nbits for both in and out. Note that
+ it corrupts any extra bits in the last byte of out */
+static void cfbr_encrypt_block(const unsigned char *in,unsigned char *out,
+ int nbits,const void *key,
+ unsigned char ivec[16],int enc,
+ block128_f block)
+ int n,rem,num;
+ unsigned char ovec[16*2 + 1]; /* +1 because we dererefence (but don't use) one byte off the end */
+ if (nbits<=0 || nbits>128) return;
+ /* fill in the first half of the new IV with the current IV */
+ memcpy(ovec,ivec,16);
+ /* construct the new IV */
+ (*block)(ivec,ivec,key);
+ num = (nbits+7)/8;
+ if (enc) /* encrypt the input */
+ for(n=0 ; n < num ; ++n)
+ out[n] = (ovec[16+n] = in[n] ^ ivec[n]);
+ else /* decrypt the input */
+ for(n=0 ; n < num ; ++n)
+ out[n] = (ovec[16+n] = in[n]) ^ ivec[n];
+ /* shift ovec left... */
+ rem = nbits%8;
+ num = nbits/8;
+ if(rem==0)
+ memcpy(ivec,ovec+num,16);
+ else
+ for(n=0 ; n < 16 ; ++n)
+ ivec[n] = ovec[n+num]<<rem | ovec[n+num+1]>>(8-rem);
+ /* it is not necessary to cleanse ovec, since the IV is not secret */
+/* N.B. This expects the input to be packed, MS bit first */
+void CRYPTO_cfb128_1_encrypt(const unsigned char *in, unsigned char *out,
+ size_t bits, const void *key,
+ unsigned char ivec[16], int *num,
+ int enc, block128_f block)
+ size_t n;
+ unsigned char c[1],d[1];
+ for(n=0 ; n<bits ; ++n)
+ {
+ c[0]=(in[n/8]&(1 << (7-n%8))) ? 0x80 : 0;
+ cfbr_encrypt_block(c,d,1,key,ivec,enc,block);
+ out[n/8]=(out[n/8]&~(1 << (unsigned int)(7-n%8))) |
+ ((d[0]&0x80) >> (unsigned int)(n%8));
+ }
+void CRYPTO_cfb128_8_encrypt(const unsigned char *in, unsigned char *out,
+ size_t length, const void *key,
+ unsigned char ivec[16], int *num,
+ int enc, block128_f block)
+ size_t n;
+ for(n=0 ; n<length ; ++n)
+ cfbr_encrypt_block(&in[n],&out[n],8,key,ivec,enc,block);
diff --git a/crypto/libressl/crypto/modes/ctr128.c b/crypto/libressl/crypto/modes/ctr128.c
new file mode 100644
index 0000000..3f14e4e
--- /dev/null
+++ b/crypto/libressl/crypto/modes/ctr128.c
@@ -0,0 +1,251 @@
+/* $OpenBSD: ctr128.c,v 1.7 2017/08/13 17:46:24 bcook Exp $ */
+/* ====================================================================
+ * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. ("
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ *
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit ("
+ *
+ * ====================================================================
+ *
+ */
+#include <openssl/crypto.h>
+#include "modes_lcl.h"
+#include <string.h>
+#ifndef MODES_DEBUG
+# ifndef NDEBUG
+# define NDEBUG
+# endif
+#include <assert.h>
+/* NOTE: the IV/counter CTR mode is big-endian. The code itself
+ * is endian-neutral. */
+/* increment counter (128-bit int) by 1 */
+static void ctr128_inc(unsigned char *counter) {
+ u32 n=16;
+ u8 c;
+ do {
+ --n;
+ c = counter[n];
+ ++c;
+ counter[n] = c;
+ if (c) return;
+ } while (n);
+static void
+ctr128_inc_aligned(unsigned char *counter)
+ ctr128_inc(counter);
+ size_t *data, c, n;
+ data = (size_t *)counter;
+ n = 16 / sizeof(size_t);
+ do {
+ --n;
+ c = data[n];
+ ++c;
+ data[n] = c;
+ if (c)
+ return;
+ } while (n);
+/* The input encrypted as though 128bit counter mode is being
+ * used. The extra state information to record how much of the
+ * 128bit block we have used is contained in *num, and the
+ * encrypted counter is kept in ecount_buf. Both *num and
+ * ecount_buf must be initialised with zeros before the first
+ * call to CRYPTO_ctr128_encrypt().
+ *
+ * This algorithm assumes that the counter is in the x lower bits
+ * of the IV (ivec), and that the application has full control over
+ * overflow and the rest of the IV. This implementation takes NO
+ * responsability for checking that the counter doesn't overflow
+ * into the rest of the IV when incremented.
+ */
+void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], unsigned char ecount_buf[16],
+ unsigned int *num, block128_f block)
+ unsigned int n;
+ size_t l=0;
+ assert(*num < 16);
+ n = *num;
+ if (16%sizeof(size_t) == 0) do { /* always true actually */
+ while (n && len) {
+ *(out++) = *(in++) ^ ecount_buf[n];
+ --len;
+ n = (n+1) % 16;
+ }
+ if (((size_t)in|(size_t)out|(size_t)ivec)%sizeof(size_t) != 0)
+ break;
+ while (len>=16) {
+ (*block)(ivec, ecount_buf, key);
+ ctr128_inc_aligned(ivec);
+ for (; n<16; n+=sizeof(size_t))
+ *(size_t *)(out+n) =
+ *(size_t *)(in+n) ^ *(size_t *)(ecount_buf+n);
+ len -= 16;
+ out += 16;
+ in += 16;
+ n = 0;
+ }
+ if (len) {
+ (*block)(ivec, ecount_buf, key);
+ ctr128_inc_aligned(ivec);
+ while (len--) {
+ out[n] = in[n] ^ ecount_buf[n];
+ ++n;
+ }
+ }
+ *num = n;
+ return;
+ } while(0);
+ /* the rest would be commonly eliminated by x86* compiler */
+ while (l<len) {
+ if (n==0) {
+ (*block)(ivec, ecount_buf, key);
+ ctr128_inc(ivec);
+ }
+ out[l] = in[l] ^ ecount_buf[n];
+ ++l;
+ n = (n+1) % 16;
+ }
+ *num=n;
+/* increment upper 96 bits of 128-bit counter by 1 */
+static void ctr96_inc(unsigned char *counter) {
+ u32 n=12;
+ u8 c;
+ do {
+ --n;
+ c = counter[n];
+ ++c;
+ counter[n] = c;
+ if (c) return;
+ } while (n);
+void CRYPTO_ctr128_encrypt_ctr32(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], unsigned char ecount_buf[16],
+ unsigned int *num, ctr128_f func)
+ unsigned int n,ctr32;
+ assert(*num < 16);
+ n = *num;
+ while (n && len) {
+ *(out++) = *(in++) ^ ecount_buf[n];
+ --len;
+ n = (n+1) % 16;
+ }
+ ctr32 = GETU32(ivec+12);
+ while (len>=16) {
+ size_t blocks = len/16;
+ /*
+ * 1<<28 is just a not-so-small yet not-so-large number...
+ * Below condition is practically never met, but it has to
+ * be checked for code correctness.
+ */
+ if (sizeof(size_t)>sizeof(unsigned int) && blocks>(1U<<28))
+ blocks = (1U<<28);
+ /*
+ * As (*func) operates on 32-bit counter, caller
+ * has to handle overflow. 'if' below detects the
+ * overflow, which is then handled by limiting the
+ * amount of blocks to the exact overflow point...
+ */
+ ctr32 += (u32)blocks;
+ if (ctr32 < blocks) {
+ blocks -= ctr32;
+ ctr32 = 0;
+ }
+ (*func)(in,out,blocks,key,ivec);
+ /* (*ctr) does not update ivec, caller does: */
+ PUTU32(ivec+12,ctr32);
+ /* ... overflow was detected, propogate carry. */
+ if (ctr32 == 0) ctr96_inc(ivec);
+ blocks *= 16;
+ len -= blocks;
+ out += blocks;
+ in += blocks;
+ }
+ if (len) {
+ memset(ecount_buf,0,16);
+ (*func)(ecount_buf,ecount_buf,1,key,ivec);
+ ++ctr32;
+ PUTU32(ivec+12,ctr32);
+ if (ctr32 == 0) ctr96_inc(ivec);
+ while (len--) {
+ out[n] = in[n] ^ ecount_buf[n];
+ ++n;
+ }
+ }
+ *num=n;
diff --git a/crypto/libressl/crypto/modes/cts128.c b/crypto/libressl/crypto/modes/cts128.c
new file mode 100644
index 0000000..b2f7174
--- /dev/null
+++ b/crypto/libressl/crypto/modes/cts128.c
@@ -0,0 +1,267 @@
+/* $OpenBSD: cts128.c,v 1.5 2015/07/19 18:27:26 miod Exp $ */
+/* ====================================================================
+ * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
+ *
+ * Rights for redistribution and usage in source and binary
+ * forms are granted according to the OpenSSL license.
+ */
+#include <openssl/crypto.h>
+#include "modes_lcl.h"
+#include <string.h>
+#ifndef MODES_DEBUG
+# ifndef NDEBUG
+# define NDEBUG
+# endif
+ * Trouble with Ciphertext Stealing, CTS, mode is that there is no
+ * common official specification, but couple of cipher/application
+ * specific ones: RFC2040 and RFC3962. Then there is 'Proposal to
+ * Extend CBC Mode By "Ciphertext Stealing"' at NIST site, which
+ * deviates from mentioned RFCs. Most notably it allows input to be
+ * of block length and it doesn't flip the order of the last two
+ * blocks. CTS is being discussed even in ECB context, but it's not
+ * adopted for any known application. This implementation provides
+ * two interfaces: one compliant with above mentioned RFCs and one
+ * compliant with the NIST proposal, both extending CBC mode.
+ */
+size_t CRYPTO_cts128_encrypt_block(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], block128_f block)
+{ size_t residue, n;
+ if (len <= 16) return 0;
+ if ((residue=len%16) == 0) residue = 16;
+ len -= residue;
+ CRYPTO_cbc128_encrypt(in,out,len,key,ivec,block);
+ in += len;
+ out += len;
+ for (n=0; n<residue; ++n)
+ ivec[n] ^= in[n];
+ (*block)(ivec,ivec,key);
+ memcpy(out,out-16,residue);
+ memcpy(out-16,ivec,16);
+ return len+residue;
+size_t CRYPTO_nistcts128_encrypt_block(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], block128_f block)
+{ size_t residue, n;
+ if (len < 16) return 0;
+ residue=len%16;
+ len -= residue;
+ CRYPTO_cbc128_encrypt(in,out,len,key,ivec,block);
+ if (residue==0) return len;
+ in += len;
+ out += len;
+ for (n=0; n<residue; ++n)
+ ivec[n] ^= in[n];
+ (*block)(ivec,ivec,key);
+ memcpy(out-16+residue,ivec,16);
+ return len+residue;
+size_t CRYPTO_cts128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], cbc128_f cbc)
+{ size_t residue;
+ union { size_t align; unsigned char c[16]; } tmp;
+ if (len <= 16) return 0;
+ if ((residue=len%16) == 0) residue = 16;
+ len -= residue;
+ (*cbc)(in,out,len,key,ivec,1);
+ in += len;
+ out += len;
+ memset(tmp.c,0,sizeof(tmp));
+ memcpy(tmp.c,in,residue);
+ memcpy(out,out-16,residue);
+ (*cbc)(tmp.c,out-16,16,key,ivec,1);
+ return len+residue;
+size_t CRYPTO_nistcts128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], cbc128_f cbc)
+{ size_t residue;
+ union { size_t align; unsigned char c[16]; } tmp;
+ if (len < 16) return 0;
+ residue=len%16;
+ len -= residue;
+ (*cbc)(in,out,len,key,ivec,1);
+ if (residue==0) return len;
+ in += len;
+ out += len;
+ memset(tmp.c,0,sizeof(tmp));
+ memcpy(tmp.c,in,residue);
+ (*cbc)(tmp.c,out-16+residue,16,key,ivec,1);
+ return len+residue;
+size_t CRYPTO_cts128_decrypt_block(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], block128_f block)
+{ size_t residue, n;
+ union { size_t align; unsigned char c[32]; } tmp;
+ if (len<=16) return 0;
+ if ((residue=len%16) == 0) residue = 16;
+ len -= 16+residue;
+ if (len) {
+ CRYPTO_cbc128_decrypt(in,out,len,key,ivec,block);
+ in += len;
+ out += len;
+ }
+ (*block)(in,tmp.c+16,key);
+ memcpy(tmp.c,tmp.c+16,16);
+ memcpy(tmp.c,in+16,residue);
+ (*block)(tmp.c,tmp.c,key);
+ for(n=0; n<16; ++n) {
+ unsigned char c = in[n];
+ out[n] = tmp.c[n] ^ ivec[n];
+ ivec[n] = c;
+ }
+ for(residue+=16; n<residue; ++n)
+ out[n] = tmp.c[n] ^ in[n];
+ return 16+len+residue;
+size_t CRYPTO_nistcts128_decrypt_block(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], block128_f block)
+{ size_t residue, n;
+ union { size_t align; unsigned char c[32]; } tmp;
+ if (len<16) return 0;
+ residue=len%16;
+ if (residue==0) {
+ CRYPTO_cbc128_decrypt(in,out,len,key,ivec,block);
+ return len;
+ }
+ len -= 16+residue;
+ if (len) {
+ CRYPTO_cbc128_decrypt(in,out,len,key,ivec,block);
+ in += len;
+ out += len;
+ }
+ (*block)(in+residue,tmp.c+16,key);
+ memcpy(tmp.c,tmp.c+16,16);
+ memcpy(tmp.c,in,residue);
+ (*block)(tmp.c,tmp.c,key);
+ for(n=0; n<16; ++n) {
+ unsigned char c = in[n];
+ out[n] = tmp.c[n] ^ ivec[n];
+ ivec[n] = in[n+residue];
+ tmp.c[n] = c;
+ }
+ for(residue+=16; n<residue; ++n)
+ out[n] = tmp.c[n] ^ tmp.c[n-16];
+ return 16+len+residue;
+size_t CRYPTO_cts128_decrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], cbc128_f cbc)
+{ size_t residue;
+ union { size_t align; unsigned char c[32]; } tmp;
+ if (len<=16) return 0;
+ if ((residue=len%16) == 0) residue = 16;
+ len -= 16+residue;
+ if (len) {
+ (*cbc)(in,out,len,key,ivec,0);
+ in += len;
+ out += len;
+ }
+ memset(tmp.c,0,sizeof(tmp));
+ /* this places in[16] at &tmp.c[16] and decrypted block at &tmp.c[0] */
+ (*cbc)(in,tmp.c,16,key,tmp.c+16,0);
+ memcpy(tmp.c,in+16,residue);
+ (*cbc)(tmp.c,tmp.c,32,key,ivec,0);
+ memcpy(out,tmp.c,16+residue);
+ return 16+len+residue;
+size_t CRYPTO_nistcts128_decrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], cbc128_f cbc)
+{ size_t residue;
+ union { size_t align; unsigned char c[32]; } tmp;
+ if (len<16) return 0;
+ residue=len%16;
+ if (residue==0) {
+ (*cbc)(in,out,len,key,ivec,0);
+ return len;
+ }
+ len -= 16+residue;
+ if (len) {
+ (*cbc)(in,out,len,key,ivec,0);
+ in += len;
+ out += len;
+ }
+ memset(tmp.c,0,sizeof(tmp));
+ /* this places in[16] at &tmp.c[16] and decrypted block at &tmp.c[0] */
+ (*cbc)(in+residue,tmp.c,16,key,tmp.c+16,0);
+ memcpy(tmp.c,in,residue);
+ (*cbc)(tmp.c,tmp.c,32,key,ivec,0);
+ memcpy(out,tmp.c,16+residue);
+ return 16+len+residue;
diff --git a/crypto/libressl/crypto/modes/gcm128.c b/crypto/libressl/crypto/modes/gcm128.c
new file mode 100644
index 0000000..d6c1bbe
--- /dev/null
+++ b/crypto/libressl/crypto/modes/gcm128.c
@@ -0,0 +1,1566 @@
+/* $OpenBSD: gcm128.c,v 1.22 2018/01/24 23:03:37 kettenis Exp $ */
+/* ====================================================================
+ * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. ("
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ *
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit ("
+ *
+ * ====================================================================
+ */
+#include <openssl/crypto.h>
+#include "modes_lcl.h"
+#include <string.h>
+#ifndef MODES_DEBUG
+# ifndef NDEBUG
+# define NDEBUG
+# endif
+#if defined(BSWAP4) && defined(__STRICT_ALIGNMENT)
+/* redefine, because alignment is ensured */
+#undef GETU32
+#define GETU32(p) BSWAP4(*(const u32 *)(p))
+#undef PUTU32
+#define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
+#define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
+#define REDUCE1BIT(V) \
+ do { \
+ if (sizeof(size_t)==8) { \
+ u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
+ V.lo = (V.hi<<63)|(V.lo>>1); \
+ V.hi = (V.hi>>1 )^T; \
+ } else { \
+ u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
+ V.lo = (V.hi<<63)|(V.lo>>1); \
+ V.hi = (V.hi>>1 )^((u64)T<<32); \
+ } \
+ } while(0)
+ * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
+ * never be set to 8. 8 is effectively reserved for testing purposes.
+ * TABLE_BITS>1 are lookup-table-driven implementations referred to as
+ * "Shoup's" in GCM specification. In other words OpenSSL does not cover
+ * whole spectrum of possible table driven implementations. Why? In
+ * non-"Shoup's" case memory access pattern is segmented in such manner,
+ * that it's trivial to see that cache timing information can reveal
+ * fair portion of intermediate hash value. Given that ciphertext is
+ * always available to attacker, it's possible for him to attempt to
+ * deduce secret parameter H and if successful, tamper with messages
+ * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
+ * not as trivial, but there is no reason to believe that it's resistant
+ * to cache-timing attack. And the thing about "8-bit" implementation is
+ * that it consumes 16 (sixteen) times more memory, 4KB per individual
+ * key + 1KB shared. Well, on pros side it should be twice as fast as
+ * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
+ * was observed to run ~75% faster, closer to 100% for commercial
+ * compilers... Yet "4-bit" procedure is preferred, because it's
+ * believed to provide better security-performance balance and adequate
+ * all-round performance. "All-round" refers to things like:
+ *
+ * - shorter setup time effectively improves overall timing for
+ * handling short messages;
+ * - larger table allocation can become unbearable because of VM
+ * subsystem penalties (for example on Windows large enough free
+ * results in VM working set trimming, meaning that consequent
+ * malloc would immediately incur working set expansion);
+ * - larger table has larger cache footprint, which can affect
+ * performance of other code paths (not necessarily even from same
+ * thread in Hyper-Threading world);
+ *
+ * Value of 1 is not appropriate for performance reasons.
+ */
+#if TABLE_BITS==8
+static void gcm_init_8bit(u128 Htable[256], u64 H[2])
+ int i, j;
+ u128 V;
+ Htable[0].hi = 0;
+ Htable[0].lo = 0;
+ V.hi = H[0];
+ V.lo = H[1];
+ for (Htable[128]=V, i=64; i>0; i>>=1) {
+ Htable[i] = V;
+ }
+ for (i=2; i<256; i<<=1) {
+ u128 *Hi = Htable+i, H0 = *Hi;
+ for (j=1; j<i; ++j) {
+ Hi[j].hi = H0.hi^Htable[j].hi;
+ Hi[j].lo = H0.lo^Htable[j].lo;
+ }
+ }
+static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
+ u128 Z = { 0, 0};
+ const u8 *xi = (const u8 *)Xi+15;
+ size_t rem, n = *xi;
+ static const size_t rem_8bit[256] = {
+ PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
+ PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
+ PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
+ PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
+ PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
+ PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
+ PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
+ PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
+ PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
+ PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
+ PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
+ PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
+ PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
+ PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
+ PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
+ PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
+ PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
+ PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
+ PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
+ PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
+ PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
+ PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
+ PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
+ PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
+ PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
+ PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
+ PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
+ PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
+ PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
+ PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
+ PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
+ PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
+ PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
+ PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
+ PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
+ PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
+ PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
+ PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
+ PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
+ PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
+ PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
+ PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
+ PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
+ PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
+ PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
+ PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
+ PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
+ PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
+ PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
+ PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
+ PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
+ PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
+ PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
+ PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
+ PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
+ PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
+ PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
+ PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
+ PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
+ PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
+ PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
+ PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
+ PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
+ while (1) {
+ Z.hi ^= Htable[n].hi;
+ Z.lo ^= Htable[n].lo;
+ if ((u8 *)Xi==xi) break;
+ n = *(--xi);
+ rem = (size_t)Z.lo&0xff;
+ Z.lo = (Z.hi<<56)|(Z.lo>>8);
+ Z.hi = (Z.hi>>8);
+#if SIZE_MAX == 0xffffffffffffffff
+ Z.hi ^= rem_8bit[rem];
+ Z.hi ^= (u64)rem_8bit[rem]<<32;
+ }
+#ifdef BSWAP8
+ Xi[0] = BSWAP8(Z.hi);
+ Xi[1] = BSWAP8(Z.lo);
+ u8 *p = (u8 *)Xi;
+ u32 v;
+ v = (u32)(Z.hi>>32); PUTU32(p,v);
+ v = (u32)(Z.hi); PUTU32(p+4,v);
+ v = (u32)(Z.lo>>32); PUTU32(p+8,v);
+ v = (u32)(Z.lo); PUTU32(p+12,v);
+#else /* BIG_ENDIAN */
+ Xi[0] = Z.hi;
+ Xi[1] = Z.lo;
+#define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
+#elif TABLE_BITS==4
+static void gcm_init_4bit(u128 Htable[16], u64 H[2])
+ u128 V;
+ int i;
+ Htable[0].hi = 0;
+ Htable[0].lo = 0;
+ V.hi = H[0];
+ V.lo = H[1];
+ for (Htable[8]=V, i=4; i>0; i>>=1) {
+ Htable[i] = V;
+ }
+ for (i=2; i<16; i<<=1) {
+ u128 *Hi = Htable+i;
+ int j;
+ for (V=*Hi, j=1; j<i; ++j) {
+ Hi[j].hi = V.hi^Htable[j].hi;
+ Hi[j].lo = V.lo^Htable[j].lo;
+ }
+ }
+ Htable[8] = V;
+ Htable[4] = V;
+ Htable[2] = V;
+ Htable[1] = V;
+ Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
+ V=Htable[4];
+ Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
+ Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
+ Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
+ V=Htable[8];
+ Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
+ Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
+ Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
+ Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
+ Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
+ Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
+ Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
+#if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
+ /*
+ * ARM assembler expects specific dword order in Htable.
+ */
+ {
+ int j;
+ for (j=0;j<16;++j) {
+ V = Htable[j];
+ Htable[j].hi = V.lo;
+ Htable[j].lo = V.hi;
+ }
+#else /* BIG_ENDIAN */
+ for (j=0;j<16;++j) {
+ V = Htable[j];
+ Htable[j].hi = V.lo<<32|V.lo>>32;
+ Htable[j].lo = V.hi<<32|V.hi>>32;
+ }
+ }
+#ifndef GHASH_ASM
+static const size_t rem_4bit[16] = {
+ PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
+ PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
+ PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
+ PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
+static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
+ u128 Z;
+ int cnt = 15;
+ size_t rem, nlo, nhi;
+ nlo = ((const u8 *)Xi)[15];
+ nhi = nlo>>4;
+ nlo &= 0xf;
+ Z.hi = Htable[nlo].hi;
+ Z.lo = Htable[nlo].lo;
+ while (1) {
+ rem = (size_t)Z.lo&0xf;
+ Z.lo = (Z.hi<<60)|(Z.lo>>4);
+ Z.hi = (Z.hi>>4);
+#if SIZE_MAX == 0xffffffffffffffff
+ Z.hi ^= rem_4bit[rem];
+ Z.hi ^= (u64)rem_4bit[rem]<<32;
+ Z.hi ^= Htable[nhi].hi;
+ Z.lo ^= Htable[nhi].lo;
+ if (--cnt<0) break;
+ nlo = ((const u8 *)Xi)[cnt];
+ nhi = nlo>>4;
+ nlo &= 0xf;
+ rem = (size_t)Z.lo&0xf;
+ Z.lo = (Z.hi<<60)|(Z.lo>>4);
+ Z.hi = (Z.hi>>4);
+#if SIZE_MAX == 0xffffffffffffffff
+ Z.hi ^= rem_4bit[rem];
+ Z.hi ^= (u64)rem_4bit[rem]<<32;
+ Z.hi ^= Htable[nlo].hi;
+ Z.lo ^= Htable[nlo].lo;
+ }
+#ifdef BSWAP8
+ Xi[0] = BSWAP8(Z.hi);
+ Xi[1] = BSWAP8(Z.lo);
+ u8 *p = (u8 *)Xi;
+ u32 v;
+ v = (u32)(Z.hi>>32); PUTU32(p,v);
+ v = (u32)(Z.hi); PUTU32(p+4,v);
+ v = (u32)(Z.lo>>32); PUTU32(p+8,v);
+ v = (u32)(Z.lo); PUTU32(p+12,v);
+#else /* BIG_ENDIAN */
+ Xi[0] = Z.hi;
+ Xi[1] = Z.lo;
+ * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
+ * details... Compiler-generated code doesn't seem to give any
+ * performance improvement, at least not on x86[_64]. It's here
+ * mostly as reference and a placeholder for possible future
+ * non-trivial optimization[s]...
+ */
+static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
+ const u8 *inp,size_t len)
+ u128 Z;
+ int cnt;
+ size_t rem, nlo, nhi;
+#if 1
+ do {
+ cnt = 15;
+ nlo = ((const u8 *)Xi)[15];
+ nlo ^= inp[15];
+ nhi = nlo>>4;
+ nlo &= 0xf;
+ Z.hi = Htable[nlo].hi;
+ Z.lo = Htable[nlo].lo;
+ while (1) {
+ rem = (size_t)Z.lo&0xf;
+ Z.lo = (Z.hi<<60)|(Z.lo>>4);
+ Z.hi = (Z.hi>>4);
+#if SIZE_MAX == 0xffffffffffffffff
+ Z.hi ^= rem_4bit[rem];
+ Z.hi ^= (u64)rem_4bit[rem]<<32;
+ Z.hi ^= Htable[nhi].hi;
+ Z.lo ^= Htable[nhi].lo;
+ if (--cnt<0) break;
+ nlo = ((const u8 *)Xi)[cnt];
+ nlo ^= inp[cnt];
+ nhi = nlo>>4;
+ nlo &= 0xf;
+ rem = (size_t)Z.lo&0xf;
+ Z.lo = (Z.hi<<60)|(Z.lo>>4);
+ Z.hi = (Z.hi>>4);
+#if SIZE_MAX == 0xffffffffffffffff
+ Z.hi ^= rem_4bit[rem];
+ Z.hi ^= (u64)rem_4bit[rem]<<32;
+ Z.hi ^= Htable[nlo].hi;
+ Z.lo ^= Htable[nlo].lo;
+ }
+ /*
+ * Extra 256+16 bytes per-key plus 512 bytes shared tables
+ * [should] give ~50% improvement... One could have PACK()-ed
+ * the rem_8bit even here, but the priority is to minimize
+ * cache footprint...
+ */
+ u128 Hshr4[16]; /* Htable shifted right by 4 bits */
+ u8 Hshl4[16]; /* Htable shifted left by 4 bits */
+ static const unsigned short rem_8bit[256] = {
+ 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
+ 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
+ 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
+ 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
+ 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
+ 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
+ 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
+ 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
+ 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
+ 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
+ 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
+ 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
+ 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
+ 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
+ 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
+ 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
+ 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
+ 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
+ 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
+ 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
+ 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
+ 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
+ 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
+ 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
+ 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
+ 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
+ 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
+ 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
+ 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
+ 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
+ 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
+ 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
+ /*
+ * This pre-processing phase slows down procedure by approximately
+ * same time as it makes each loop spin faster. In other words
+ * single block performance is approximately same as straightforward
+ * "4-bit" implementation, and then it goes only faster...
+ */
+ for (cnt=0; cnt<16; ++cnt) {
+ Z.hi = Htable[cnt].hi;
+ Z.lo = Htable[cnt].lo;
+ Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
+ Hshr4[cnt].hi = (Z.hi>>4);
+ Hshl4[cnt] = (u8)(Z.lo<<4);
+ }
+ do {
+ for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
+ nlo = ((const u8 *)Xi)[cnt];
+ nlo ^= inp[cnt];
+ nhi = nlo>>4;
+ nlo &= 0xf;
+ Z.hi ^= Htable[nlo].hi;
+ Z.lo ^= Htable[nlo].lo;
+ rem = (size_t)Z.lo&0xff;
+ Z.lo = (Z.hi<<56)|(Z.lo>>8);
+ Z.hi = (Z.hi>>8);
+ Z.hi ^= Hshr4[nhi].hi;
+ Z.lo ^= Hshr4[nhi].lo;
+ Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
+ }
+ nlo = ((const u8 *)Xi)[0];
+ nlo ^= inp[0];
+ nhi = nlo>>4;
+ nlo &= 0xf;
+ Z.hi ^= Htable[nlo].hi;
+ Z.lo ^= Htable[nlo].lo;
+ rem = (size_t)Z.lo&0xf;
+ Z.lo = (Z.hi<<60)|(Z.lo>>4);
+ Z.hi = (Z.hi>>4);
+ Z.hi ^= Htable[nhi].hi;
+ Z.lo ^= Htable[nhi].lo;
+ Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
+#ifdef BSWAP8
+ Xi[0] = BSWAP8(Z.hi);
+ Xi[1] = BSWAP8(Z.lo);
+ u8 *p = (u8 *)Xi;
+ u32 v;
+ v = (u32)(Z.hi>>32); PUTU32(p,v);
+ v = (u32)(Z.hi); PUTU32(p+4,v);
+ v = (u32)(Z.lo>>32); PUTU32(p+8,v);
+ v = (u32)(Z.lo); PUTU32(p+12,v);
+#else /* BIG_ENDIAN */
+ Xi[0] = Z.hi;
+ Xi[1] = Z.lo;
+ } while (inp+=16, len-=16);
+void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
+void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
+#define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
+#if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
+#define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
+/* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
+ * trashing effect. In other words idea is to hash data while it's
+ * still in L1 cache after encryption pass... */
+#define GHASH_CHUNK (3*1024)
+#else /* TABLE_BITS */
+static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
+ u128 V,Z = { 0,0 };
+ long X;
+ int i,j;
+ const long *xi = (const long *)Xi;
+ V.hi = H[0]; /* H is in host byte order, no byte swapping */
+ V.lo = H[1];
+ for (j=0; j<16/sizeof(long); ++j) {
+#if SIZE_MAX == 0xffffffffffffffff
+#ifdef BSWAP8
+ X = (long)(BSWAP8(xi[j]));
+ const u8 *p = (const u8 *)(xi+j);
+ X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
+ const u8 *p = (const u8 *)(xi+j);
+ X = (long)GETU32(p);
+#else /* BIG_ENDIAN */
+ X = xi[j];
+ for (i=0; i<8*sizeof(long); ++i, X<<=1) {
+ u64 M = (u64)(X>>(8*sizeof(long)-1));
+ Z.hi ^= V.hi&M;
+ Z.lo ^= V.lo&M;
+ }
+ }
+#ifdef BSWAP8
+ Xi[0] = BSWAP8(Z.hi);
+ Xi[1] = BSWAP8(Z.lo);
+ u8 *p = (u8 *)Xi;
+ u32 v;
+ v = (u32)(Z.hi>>32); PUTU32(p,v);
+ v = (u32)(Z.hi); PUTU32(p+4,v);
+ v = (u32)(Z.lo>>32); PUTU32(p+8,v);
+ v = (u32)(Z.lo); PUTU32(p+12,v);
+#else /* BIG_ENDIAN */
+ Xi[0] = Z.hi;
+ Xi[1] = Z.lo;
+#define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
+#if defined(GHASH_ASM) && \
+ (defined(__i386) || defined(__i386__) || \
+ defined(__x86_64) || defined(__x86_64__) || \
+ defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
+#include "x86_arch.h"
+#if TABLE_BITS==4 && defined(GHASH_ASM)
+# if (defined(__i386) || defined(__i386__) || \
+ defined(__x86_64) || defined(__x86_64__) || \
+ defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
+# define GHASH_ASM_X86_OR_64
+# define GCM_FUNCREF_4BIT
+void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
+void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
+void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
+# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
+# define GHASH_ASM_X86
+void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
+void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
+void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
+void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
+# endif
+# elif defined(__arm__) || defined(__arm)
+# include "arm_arch.h"
+# if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+# define GHASH_ASM_ARM
+# define GCM_FUNCREF_4BIT
+void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
+void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
+# endif
+# endif
+# undef GCM_MUL
+# define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
+# ifdef GHASH
+# undef GHASH
+# define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
+# endif
+void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
+ memset(ctx,0,sizeof(*ctx));
+ ctx->block = block;
+ ctx->key = key;
+ (*block)(ctx->H.c,ctx->H.c,key);
+ /* H is stored in host byte order */
+#ifdef BSWAP8
+ ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
+ ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
+ u8 *p = ctx->H.c;
+ u64 hi,lo;
+ hi = (u64)GETU32(p) <<32|GETU32(p+4);
+ lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
+ ctx->H.u[0] = hi;
+ ctx->H.u[1] = lo;
+#if TABLE_BITS==8
+ gcm_init_8bit(ctx->Htable,ctx->H.u);
+#elif TABLE_BITS==4
+# if defined(GHASH_ASM_X86_OR_64)
+# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
+ /* check FXSR and PCLMULQDQ bits */
+ gcm_init_clmul(ctx->Htable,ctx->H.u);
+ ctx->gmult = gcm_gmult_clmul;
+ ctx->ghash = gcm_ghash_clmul;
+ return;
+ }
+# endif
+ gcm_init_4bit(ctx->Htable,ctx->H.u);
+# if defined(GHASH_ASM_X86) /* x86 only */
+# if defined(OPENSSL_IA32_SSE2)
+ if (OPENSSL_cpu_caps() & CPUCAP_MASK_SSE) { /* check SSE bit */
+# else
+ if (OPENSSL_cpu_caps() & CPUCAP_MASK_MMX) { /* check MMX bit */
+# endif
+ ctx->gmult = gcm_gmult_4bit_mmx;
+ ctx->ghash = gcm_ghash_4bit_mmx;
+ } else {
+ ctx->gmult = gcm_gmult_4bit_x86;
+ ctx->ghash = gcm_ghash_4bit_x86;
+ }
+# else
+ ctx->gmult = gcm_gmult_4bit;
+ ctx->ghash = gcm_ghash_4bit;
+# endif
+# elif defined(GHASH_ASM_ARM)
+ if (OPENSSL_armcap_P & ARMV7_NEON) {
+ ctx->gmult = gcm_gmult_neon;
+ ctx->ghash = gcm_ghash_neon;
+ } else {
+ gcm_init_4bit(ctx->Htable,ctx->H.u);
+ ctx->gmult = gcm_gmult_4bit;
+ ctx->ghash = gcm_ghash_4bit;
+ }
+# else
+ gcm_init_4bit(ctx->Htable,ctx->H.u);
+# endif
+void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
+ unsigned int ctr;
+ void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
+ ctx->Yi.u[0] = 0;
+ ctx->Yi.u[1] = 0;
+ ctx->Xi.u[0] = 0;
+ ctx->Xi.u[1] = 0;
+ ctx->len.u[0] = 0; /* AAD length */
+ ctx->len.u[1] = 0; /* message length */
+ ctx->ares = 0;
+ ctx->mres = 0;
+ if (len==12) {
+ memcpy(ctx->Yi.c,iv,12);
+ ctx->Yi.c[15]=1;
+ ctr=1;
+ }
+ else {
+ size_t i;
+ u64 len0 = len;
+ while (len>=16) {
+ for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
+ GCM_MUL(ctx,Yi);
+ iv += 16;
+ len -= 16;
+ }
+ if (len) {
+ for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
+ GCM_MUL(ctx,Yi);
+ }
+ len0 <<= 3;
+#ifdef BSWAP8
+ ctx->Yi.u[1] ^= BSWAP8(len0);
+ ctx->Yi.c[8] ^= (u8)(len0>>56);
+ ctx->Yi.c[9] ^= (u8)(len0>>48);
+ ctx->Yi.c[10] ^= (u8)(len0>>40);
+ ctx->Yi.c[11] ^= (u8)(len0>>32);
+ ctx->Yi.c[12] ^= (u8)(len0>>24);
+ ctx->Yi.c[13] ^= (u8)(len0>>16);
+ ctx->Yi.c[14] ^= (u8)(len0>>8);
+ ctx->Yi.c[15] ^= (u8)(len0);
+#else /* BIG_ENDIAN */
+ ctx->Yi.u[1] ^= len0;
+ GCM_MUL(ctx,Yi);
+#ifdef BSWAP4
+ ctr = BSWAP4(ctx->Yi.d[3]);
+ ctr = GETU32(ctx->Yi.c+12);
+#else /* BIG_ENDIAN */
+ ctr = ctx->Yi.d[3];
+ }
+ (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
+ ++ctr;
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+ PUTU32(ctx->Yi.c+12,ctr);
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
+ size_t i;
+ unsigned int n;
+ u64 alen = ctx->len.u[0];
+ void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
+# ifdef GHASH
+ void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
+ const u8 *inp,size_t len) = ctx->ghash;
+# endif
+ if (ctx->len.u[1]) return -2;
+ alen += len;
+ if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
+ return -1;
+ ctx->len.u[0] = alen;
+ n = ctx->ares;
+ if (n) {
+ while (n && len) {
+ ctx->Xi.c[n] ^= *(aad++);
+ --len;
+ n = (n+1)%16;
+ }
+ if (n==0) GCM_MUL(ctx,Xi);
+ else {
+ ctx->ares = n;
+ return 0;
+ }
+ }
+#ifdef GHASH
+ if ((i = (len&(size_t)-16))) {
+ GHASH(ctx,aad,i);
+ aad += i;
+ len -= i;
+ }
+ while (len>=16) {
+ for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
+ GCM_MUL(ctx,Xi);
+ aad += 16;
+ len -= 16;
+ }
+ if (len) {
+ n = (unsigned int)len;
+ for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
+ }
+ ctx->ares = n;
+ return 0;
+int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
+ const unsigned char *in, unsigned char *out,
+ size_t len)
+ unsigned int n, ctr;
+ size_t i;
+ u64 mlen = ctx->len.u[1];
+ block128_f block = ctx->block;
+ void *key = ctx->key;
+ void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
+# ifdef GHASH
+ void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
+ const u8 *inp,size_t len) = ctx->ghash;
+# endif
+ mlen += len;
+ if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
+ return -1;
+ ctx->len.u[1] = mlen;
+ if (ctx->ares) {
+ /* First call to encrypt finalizes GHASH(AAD) */
+ GCM_MUL(ctx,Xi);
+ ctx->ares = 0;
+ }
+#ifdef BSWAP4
+ ctr = BSWAP4(ctx->Yi.d[3]);
+ ctr = GETU32(ctx->Yi.c+12);
+#else /* BIG_ENDIAN */
+ ctr = ctx->Yi.d[3];
+ n = ctx->mres;
+ if (16%sizeof(size_t) == 0) do { /* always true actually */
+ if (n) {
+ while (n && len) {
+ ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
+ --len;
+ n = (n+1)%16;
+ }
+ if (n==0) GCM_MUL(ctx,Xi);
+ else {
+ ctx->mres = n;
+ return 0;
+ }
+ }
+ if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
+ break;
+#if defined(GHASH) && defined(GHASH_CHUNK)
+ while (len>=GHASH_CHUNK) {
+ size_t j=GHASH_CHUNK;
+ while (j) {
+ size_t *out_t=(size_t *)out;
+ const size_t *in_t=(const size_t *)in;
+ (*block)(ctx->Yi.c,ctx->EKi.c,key);
+ ++ctr;
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+ PUTU32(ctx->Yi.c+12,ctr);
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+ for (i=0; i<16/sizeof(size_t); ++i)
+ out_t[i] = in_t[i] ^ ctx->EKi.t[i];
+ out += 16;
+ in += 16;
+ j -= 16;
+ }
+ len -= GHASH_CHUNK;
+ }
+ if ((i = (len&(size_t)-16))) {
+ size_t j=i;
+ while (len>=16) {
+ size_t *out_t=(size_t *)out;
+ const size_t *in_t=(const size_t *)in;
+ (*block)(ctx->Yi.c,ctx->EKi.c,key);
+ ++ctr;
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+ PUTU32(ctx->Yi.c+12,ctr);
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+ for (i=0; i<16/sizeof(size_t); ++i)
+ out_t[i] = in_t[i] ^ ctx->EKi.t[i];
+ out += 16;
+ in += 16;
+ len -= 16;
+ }
+ GHASH(ctx,out-j,j);
+ }
+ while (len>=16) {
+ size_t *out_t=(size_t *)out;
+ const size_t *in_t=(const size_t *)in;
+ (*block)(ctx->Yi.c,ctx->EKi.c,key);
+ ++ctr;
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+ PUTU32(ctx->Yi.c+12,ctr);
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+ for (i=0; i<16/sizeof(size_t); ++i)
+ ctx->Xi.t[i] ^=
+ out_t[i] = in_t[i]^ctx->EKi.t[i];
+ GCM_MUL(ctx,Xi);
+ out += 16;
+ in += 16;
+ len -= 16;
+ }
+ if (len) {
+ (*block)(ctx->Yi.c,ctx->EKi.c,key);
+ ++ctr;
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+ PUTU32(ctx->Yi.c+12,ctr);
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+ while (len--) {
+ ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
+ ++n;
+ }
+ }
+ ctx->mres = n;
+ return 0;
+ } while(0);
+ for (i=0;i<len;++i) {
+ if (n==0) {
+ (*block)(ctx->Yi.c,ctx->EKi.c,key);
+ ++ctr;
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+ PUTU32(ctx->Yi.c+12,ctr);
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+ }
+ ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
+ n = (n+1)%16;
+ if (n==0)
+ GCM_MUL(ctx,Xi);
+ }
+ ctx->mres = n;
+ return 0;
+int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
+ const unsigned char *in, unsigned char *out,
+ size_t len)
+ unsigned int n, ctr;
+ size_t i;
+ u64 mlen = ctx->len.u[1];
+ block128_f block = ctx->block;
+ void *key = ctx->key;
+ void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
+# ifdef GHASH
+ void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
+ const u8 *inp,size_t len) = ctx->ghash;
+# endif
+ mlen += len;
+ if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
+ return -1;
+ ctx->len.u[1] = mlen;
+ if (ctx->ares) {
+ /* First call to decrypt finalizes GHASH(AAD) */
+ GCM_MUL(ctx,Xi);
+ ctx->ares = 0;
+ }
+#ifdef BSWAP4
+ ctr = BSWAP4(ctx->Yi.d[3]);
+ ctr = GETU32(ctx->Yi.c+12);
+#else /* BIG_ENDIAN */
+ ctr = ctx->Yi.d[3];
+ n = ctx->mres;
+ if (16%sizeof(size_t) == 0) do { /* always true actually */
+ if (n) {
+ while (n && len) {
+ u8 c = *(in++);
+ *(out++) = c^ctx->EKi.c[n];
+ ctx->Xi.c[n] ^= c;
+ --len;
+ n = (n+1)%16;
+ }
+ if (n==0) GCM_MUL (ctx,Xi);
+ else {
+ ctx->mres = n;
+ return 0;
+ }
+ }
+ if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
+ break;
+#if defined(GHASH) && defined(GHASH_CHUNK)
+ while (len>=GHASH_CHUNK) {
+ size_t j=GHASH_CHUNK;
+ while (j) {
+ size_t *out_t=(size_t *)out;
+ const size_t *in_t=(const size_t *)in;
+ (*block)(ctx->Yi.c,ctx->EKi.c,key);
+ ++ctr;
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+ PUTU32(ctx->Yi.c+12,ctr);
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+ for (i=0; i<16/sizeof(size_t); ++i)
+ out_t[i] = in_t[i]^ctx->EKi.t[i];
+ out += 16;
+ in += 16;
+ j -= 16;
+ }
+ len -= GHASH_CHUNK;
+ }
+ if ((i = (len&(size_t)-16))) {
+ GHASH(ctx,in,i);
+ while (len>=16) {
+ size_t *out_t=(size_t *)out;
+ const size_t *in_t=(const size_t *)in;
+ (*block)(ctx->Yi.c,ctx->EKi.c,key);
+ ++ctr;
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+ PUTU32(ctx->Yi.c+12,ctr);
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+ for (i=0; i<16/sizeof(size_t); ++i)
+ out_t[i] = in_t[i]^ctx->EKi.t[i];
+ out += 16;
+ in += 16;
+ len -= 16;
+ }
+ }
+ while (len>=16) {
+ size_t *out_t=(size_t *)out;
+ const size_t *in_t=(const size_t *)in;
+ (*block)(ctx->Yi.c,ctx->EKi.c,key);
+ ++ctr;
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+ PUTU32(ctx->Yi.c+12,ctr);
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+ for (i=0; i<16/sizeof(size_t); ++i) {
+ size_t c = in[i];
+ out[i] = c^ctx->EKi.t[i];
+ ctx->Xi.t[i] ^= c;
+ }
+ GCM_MUL(ctx,Xi);
+ out += 16;
+ in += 16;
+ len -= 16;
+ }
+ if (len) {
+ (*block)(ctx->Yi.c,ctx->EKi.c,key);
+ ++ctr;
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+ PUTU32(ctx->Yi.c+12,ctr);
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+ while (len--) {
+ u8 c = in[n];
+ ctx->Xi.c[n] ^= c;
+ out[n] = c^ctx->EKi.c[n];
+ ++n;
+ }
+ }
+ ctx->mres = n;
+ return 0;
+ } while(0);
+ for (i=0;i<len;++i) {
+ u8 c;
+ if (n==0) {
+ (*block)(ctx->Yi.c,ctx->EKi.c,key);
+ ++ctr;
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+ PUTU32(ctx->Yi.c+12,ctr);
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+ }
+ c = in[i];
+ out[i] = c^ctx->EKi.c[n];
+ ctx->Xi.c[n] ^= c;
+ n = (n+1)%16;
+ if (n==0)
+ GCM_MUL(ctx,Xi);
+ }
+ ctx->mres = n;
+ return 0;
+int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
+ const unsigned char *in, unsigned char *out,
+ size_t len, ctr128_f stream)
+ unsigned int n, ctr;
+ size_t i;
+ u64 mlen = ctx->len.u[1];
+ void *key = ctx->key;
+ void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
+# ifdef GHASH
+ void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
+ const u8 *inp,size_t len) = ctx->ghash;
+# endif
+ mlen += len;
+ if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
+ return -1;
+ ctx->len.u[1] = mlen;
+ if (ctx->ares) {
+ /* First call to encrypt finalizes GHASH(AAD) */
+ GCM_MUL(ctx,Xi);
+ ctx->ares = 0;
+ }
+#ifdef BSWAP4
+ ctr = BSWAP4(ctx->Yi.d[3]);
+ ctr = GETU32(ctx->Yi.c+12);
+#else /* BIG_ENDIAN */
+ ctr = ctx->Yi.d[3];
+ n = ctx->mres;
+ if (n) {
+ while (n && len) {
+ ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
+ --len;
+ n = (n+1)%16;
+ }
+ if (n==0) GCM_MUL(ctx,Xi);
+ else {
+ ctx->mres = n;
+ return 0;
+ }
+ }
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+ while (len>=GHASH_CHUNK) {
+ (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
+ ctr += GHASH_CHUNK/16;
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+ PUTU32(ctx->Yi.c+12,ctr);
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+ out += GHASH_CHUNK;
+ in += GHASH_CHUNK;
+ len -= GHASH_CHUNK;
+ }
+ if ((i = (len&(size_t)-16))) {
+ size_t j=i/16;
+ (*stream)(in,out,j,key,ctx->Yi.c);
+ ctr += (unsigned int)j;
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+ PUTU32(ctx->Yi.c+12,ctr);
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+ in += i;
+ len -= i;
+#if defined(GHASH)
+ GHASH(ctx,out,i);
+ out += i;
+ while (j--) {
+ for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
+ GCM_MUL(ctx,Xi);
+ out += 16;
+ }
+ }
+ if (len) {
+ (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
+ ++ctr;
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+ PUTU32(ctx->Yi.c+12,ctr);
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+ while (len--) {
+ ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
+ ++n;
+ }
+ }
+ ctx->mres = n;
+ return 0;
+int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
+ const unsigned char *in, unsigned char *out,
+ size_t len,ctr128_f stream)
+ unsigned int n, ctr;
+ size_t i;
+ u64 mlen = ctx->len.u[1];
+ void *key = ctx->key;
+ void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
+# ifdef GHASH
+ void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
+ const u8 *inp,size_t len) = ctx->ghash;
+# endif
+ mlen += len;
+ if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
+ return -1;
+ ctx->len.u[1] = mlen;
+ if (ctx->ares) {
+ /* First call to decrypt finalizes GHASH(AAD) */
+ GCM_MUL(ctx,Xi);
+ ctx->ares = 0;
+ }
+#ifdef BSWAP4
+ ctr = BSWAP4(ctx->Yi.d[3]);
+ ctr = GETU32(ctx->Yi.c+12);
+#else /* BIG_ENDIAN */
+ ctr = ctx->Yi.d[3];
+ n = ctx->mres;
+ if (n) {
+ while (n && len) {
+ u8 c = *(in++);
+ *(out++) = c^ctx->EKi.c[n];
+ ctx->Xi.c[n] ^= c;
+ --len;
+ n = (n+1)%16;
+ }
+ if (n==0) GCM_MUL (ctx,Xi);
+ else {
+ ctx->mres = n;
+ return 0;
+ }
+ }
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+ while (len>=GHASH_CHUNK) {
+ (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
+ ctr += GHASH_CHUNK/16;
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+ PUTU32(ctx->Yi.c+12,ctr);
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+ out += GHASH_CHUNK;
+ in += GHASH_CHUNK;
+ len -= GHASH_CHUNK;
+ }
+ if ((i = (len&(size_t)-16))) {
+ size_t j=i/16;
+#if defined(GHASH)
+ GHASH(ctx,in,i);
+ while (j--) {
+ size_t k;
+ for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
+ GCM_MUL(ctx,Xi);
+ in += 16;
+ }
+ j = i/16;
+ in -= i;
+ (*stream)(in,out,j,key,ctx->Yi.c);
+ ctr += (unsigned int)j;
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+ PUTU32(ctx->Yi.c+12,ctr);
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+ out += i;
+ in += i;
+ len -= i;
+ }
+ if (len) {
+ (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
+ ++ctr;
+#ifdef BSWAP4
+ ctx->Yi.d[3] = BSWAP4(ctr);
+ PUTU32(ctx->Yi.c+12,ctr);
+#else /* BIG_ENDIAN */
+ ctx->Yi.d[3] = ctr;
+ while (len--) {
+ u8 c = in[n];
+ ctx->Xi.c[n] ^= c;
+ out[n] = c^ctx->EKi.c[n];
+ ++n;
+ }
+ }
+ ctx->mres = n;
+ return 0;
+int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
+ size_t len)
+ u64 alen = ctx->len.u[0]<<3;
+ u64 clen = ctx->len.u[1]<<3;
+ void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
+ if (ctx->mres || ctx->ares)
+ GCM_MUL(ctx,Xi);
+#ifdef BSWAP8
+ alen = BSWAP8(alen);
+ clen = BSWAP8(clen);
+ {
+ u8 *p = ctx->len.c;
+ ctx->len.u[0] = alen;
+ ctx->len.u[1] = clen;
+ alen = (u64)GETU32(p) <<32|GETU32(p+4);
+ clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
+ }
+ ctx->Xi.u[0] ^= alen;
+ ctx->Xi.u[1] ^= clen;
+ GCM_MUL(ctx,Xi);
+ ctx->Xi.u[0] ^= ctx->EK0.u[0];
+ ctx->Xi.u[1] ^= ctx->EK0.u[1];
+ if (tag && len<=sizeof(ctx->Xi))
+ return memcmp(ctx->Xi.c,tag,len);
+ else
+ return -1;
+void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
+ CRYPTO_gcm128_finish(ctx, NULL, 0);
+ memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
+#if 0
+GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
+ GCM128_CONTEXT *ret;
+ if ((ret = malloc(sizeof(GCM128_CONTEXT))))
+ CRYPTO_gcm128_init(ret,key,block);
+ return ret;
+void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
+ freezero(ctx, sizeof(*ctx));
diff --git a/crypto/libressl/crypto/modes/ghash-elf-armv4.S b/crypto/libressl/crypto/modes/ghash-elf-armv4.S
new file mode 100644
index 0000000..af42593
--- /dev/null
+++ b/crypto/libressl/crypto/modes/ghash-elf-armv4.S
@@ -0,0 +1,412 @@
+#include "arm_arch.h"
+.syntax unified
+.code 32
+.type rem_4bit,%object
+.align 5
+.short 0x0000,0x1C20,0x3840,0x2460
+.short 0x7080,0x6CA0,0x48C0,0x54E0
+.short 0xE100,0xFD20,0xD940,0xC560
+.short 0x9180,0x8DA0,0xA9C0,0xB5E0
+.size rem_4bit,.-rem_4bit
+.type rem_4bit_get,%function
+ sub r2,pc,#8
+ sub r2,r2,#32 @ &rem_4bit
+ b .Lrem_4bit_got
+ nop
+.size rem_4bit_get,.-rem_4bit_get
+ gcm_ghash_4bit
+.type gcm_ghash_4bit,%function
+ sub r12,pc,#8
+ add r3,r2,r3 @ r3 to point at the end
+ stmdb sp!,{r3-r11,lr} @ save r3/end too
+ sub r12,r12,#48 @ &rem_4bit
+ ldmia r12,{r4-r11} @ copy rem_4bit ...
+ stmdb sp!,{r4-r11} @ ... to stack
+ ldrb r12,[r2,#15]
+ ldrb r14,[r0,#15]
+ eor r12,r12,r14
+ and r14,r12,#0xf0
+ and r12,r12,#0x0f
+ mov r3,#14
+ add r7,r1,r12,lsl#4
+ ldmia r7,{r4-r7} @ load Htbl[nlo]
+ add r11,r1,r14
+ ldrb r12,[r2,#14]
+ and r14,r4,#0xf @ rem
+ ldmia r11,{r8-r11} @ load Htbl[nhi]
+ add r14,r14,r14
+ eor r4,r8,r4,lsr#4
+ ldrh r8,[sp,r14] @ rem_4bit[rem]
+ eor r4,r4,r5,lsl#28
+ ldrb r14,[r0,#14]
+ eor r5,r9,r5,lsr#4
+ eor r5,r5,r6,lsl#28
+ eor r6,r10,r6,lsr#4
+ eor r6,r6,r7,lsl#28
+ eor r7,r11,r7,lsr#4
+ eor r12,r12,r14
+ and r14,r12,#0xf0
+ and r12,r12,#0x0f
+ eor r7,r7,r8,lsl#16
+ add r11,r1,r12,lsl#4
+ and r12,r4,#0xf @ rem
+ subs r3,r3,#1
+ add r12,r12,r12
+ ldmia r11,{r8-r11} @ load Htbl[nlo]
+ eor r4,r8,r4,lsr#4
+ eor r4,r4,r5,lsl#28
+ eor r5,r9,r5,lsr#4
+ eor r5,r5,r6,lsl#28
+ ldrh r8,[sp,r12] @ rem_4bit[rem]
+ eor r6,r10,r6,lsr#4
+ ldrbpl r12,[r2,r3]
+ eor r6,r6,r7,lsl#28
+ eor r7,r11,r7,lsr#4
+ add r11,r1,r14
+ and r14,r4,#0xf @ rem
+ eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
+ add r14,r14,r14
+ ldmia r11,{r8-r11} @ load Htbl[nhi]
+ eor r4,r8,r4,lsr#4
+ ldrbpl r8,[r0,r3]
+ eor r4,r4,r5,lsl#28
+ eor r5,r9,r5,lsr#4
+ ldrh r9,[sp,r14]
+ eor r5,r5,r6,lsl#28
+ eor r6,r10,r6,lsr#4
+ eor r6,r6,r7,lsl#28
+ eorpl r12,r12,r8
+ eor r7,r11,r7,lsr#4
+ andpl r14,r12,#0xf0
+ andpl r12,r12,#0x0f
+ eor r7,r7,r9,lsl#16 @ ^= rem_4bit[rem]
+ bpl .Linner
+ ldr r3,[sp,#32] @ re-load r3/end
+ add r2,r2,#16
+ mov r14,r4
+#if __ARM_ARCH__>=7 && defined(__ARMEL__)
+ rev r4,r4
+ str r4,[r0,#12]
+#elif defined(__ARMEB__)
+ str r4,[r0,#12]
+ mov r9,r4,lsr#8
+ strb r4,[r0,#12+3]
+ mov r10,r4,lsr#16
+ strb r9,[r0,#12+2]
+ mov r11,r4,lsr#24
+ strb r10,[r0,#12+1]
+ strb r11,[r0,#12]
+ cmp r2,r3
+#if __ARM_ARCH__>=7 && defined(__ARMEL__)
+ rev r5,r5
+ str r5,[r0,#8]
+#elif defined(__ARMEB__)
+ str r5,[r0,#8]
+ mov r9,r5,lsr#8
+ strb r5,[r0,#8+3]
+ mov r10,r5,lsr#16
+ strb r9,[r0,#8+2]
+ mov r11,r5,lsr#24
+ strb r10,[r0,#8+1]
+ strb r11,[r0,#8]
+ ldrbne r12,[r2,#15]
+#if __ARM_ARCH__>=7 && defined(__ARMEL__)
+ rev r6,r6
+ str r6,[r0,#4]
+#elif defined(__ARMEB__)
+ str r6,[r0,#4]
+ mov r9,r6,lsr#8
+ strb r6,[r0,#4+3]
+ mov r10,r6,lsr#16
+ strb r9,[r0,#4+2]
+ mov r11,r6,lsr#24
+ strb r10,[r0,#4+1]
+ strb r11,[r0,#4]
+#if __ARM_ARCH__>=7 && defined(__ARMEL__)
+ rev r7,r7
+ str r7,[r0,#0]
+#elif defined(__ARMEB__)
+ str r7,[r0,#0]
+ mov r9,r7,lsr#8
+ strb r7,[r0,#0+3]
+ mov r10,r7,lsr#16
+ strb r9,[r0,#0+2]
+ mov r11,r7,lsr#24
+ strb r10,[r0,#0+1]
+ strb r11,[r0,#0]
+ bne .Louter
+ add sp,sp,#36
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r11,pc}
+ ldmia sp!,{r4-r11,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+.size gcm_ghash_4bit,.-gcm_ghash_4bit
+ gcm_gmult_4bit
+.type gcm_gmult_4bit,%function
+ stmdb sp!,{r4-r11,lr}
+ ldrb r12,[r0,#15]
+ b rem_4bit_get
+ and r14,r12,#0xf0
+ and r12,r12,#0x0f
+ mov r3,#14
+ add r7,r1,r12,lsl#4
+ ldmia r7,{r4-r7} @ load Htbl[nlo]
+ ldrb r12,[r0,#14]
+ add r11,r1,r14
+ and r14,r4,#0xf @ rem
+ ldmia r11,{r8-r11} @ load Htbl[nhi]
+ add r14,r14,r14
+ eor r4,r8,r4,lsr#4
+ ldrh r8,[r2,r14] @ rem_4bit[rem]
+ eor r4,r4,r5,lsl#28
+ eor r5,r9,r5,lsr#4
+ eor r5,r5,r6,lsl#28
+ eor r6,r10,r6,lsr#4
+ eor r6,r6,r7,lsl#28
+ eor r7,r11,r7,lsr#4
+ and r14,r12,#0xf0
+ eor r7,r7,r8,lsl#16
+ and r12,r12,#0x0f
+ add r11,r1,r12,lsl#4
+ and r12,r4,#0xf @ rem
+ subs r3,r3,#1
+ add r12,r12,r12
+ ldmia r11,{r8-r11} @ load Htbl[nlo]
+ eor r4,r8,r4,lsr#4
+ eor r4,r4,r5,lsl#28
+ eor r5,r9,r5,lsr#4
+ eor r5,r5,r6,lsl#28
+ ldrh r8,[r2,r12] @ rem_4bit[rem]
+ eor r6,r10,r6,lsr#4
+ ldrbpl r12,[r0,r3]
+ eor r6,r6,r7,lsl#28
+ eor r7,r11,r7,lsr#4
+ add r11,r1,r14
+ and r14,r4,#0xf @ rem
+ eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
+ add r14,r14,r14
+ ldmia r11,{r8-r11} @ load Htbl[nhi]
+ eor r4,r8,r4,lsr#4
+ eor r4,r4,r5,lsl#28
+ eor r5,r9,r5,lsr#4
+ ldrh r8,[r2,r14] @ rem_4bit[rem]
+ eor r5,r5,r6,lsl#28
+ eor r6,r10,r6,lsr#4
+ eor r6,r6,r7,lsl#28
+ eor r7,r11,r7,lsr#4
+ andpl r14,r12,#0xf0
+ andpl r12,r12,#0x0f
+ eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
+ bpl .Loop
+#if __ARM_ARCH__>=7 && defined(__ARMEL__)
+ rev r4,r4
+ str r4,[r0,#12]
+#elif defined(__ARMEB__)
+ str r4,[r0,#12]
+ mov r9,r4,lsr#8
+ strb r4,[r0,#12+3]
+ mov r10,r4,lsr#16
+ strb r9,[r0,#12+2]
+ mov r11,r4,lsr#24
+ strb r10,[r0,#12+1]
+ strb r11,[r0,#12]
+#if __ARM_ARCH__>=7 && defined(__ARMEL__)
+ rev r5,r5
+ str r5,[r0,#8]
+#elif defined(__ARMEB__)
+ str r5,[r0,#8]
+ mov r9,r5,lsr#8
+ strb r5,[r0,#8+3]
+ mov r10,r5,lsr#16
+ strb r9,[r0,#8+2]
+ mov r11,r5,lsr#24
+ strb r10,[r0,#8+1]
+ strb r11,[r0,#8]
+#if __ARM_ARCH__>=7 && defined(__ARMEL__)
+ rev r6,r6
+ str r6,[r0,#4]
+#elif defined(__ARMEB__)
+ str r6,[r0,#4]
+ mov r9,r6,lsr#8
+ strb r6,[r0,#4+3]
+ mov r10,r6,lsr#16
+ strb r9,[r0,#4+2]
+ mov r11,r6,lsr#24
+ strb r10,[r0,#4+1]
+ strb r11,[r0,#4]
+#if __ARM_ARCH__>=7 && defined(__ARMEL__)
+ rev r7,r7
+ str r7,[r0,#0]
+#elif defined(__ARMEB__)
+ str r7,[r0,#0]
+ mov r9,r7,lsr#8
+ strb r7,[r0,#0+3]
+ mov r10,r7,lsr#16
+ strb r9,[r0,#0+2]
+ mov r11,r7,lsr#24
+ strb r10,[r0,#0+1]
+ strb r11,[r0,#0]
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r11,pc}
+ ldmia sp!,{r4-r11,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+.size gcm_gmult_4bit,.-gcm_gmult_4bit
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+.fpu neon
+ gcm_gmult_neon
+.type gcm_gmult_neon,%function
+.align 4
+ sub r1,#16 @ point at H in GCM128_CTX
+ vld1.64 d29,[r0,:64]!@ load Xi
+ vmov.i32 d5,#0xe1 @ our irreducible polynomial
+ vld1.64 d28,[r0,:64]!
+ vshr.u64 d5,#32
+ vldmia r1,{d0-d1} @ load H
+ veor q12,q12
+#ifdef __ARMEL__
+ vrev64.8 q14,q14
+ veor q13,q13
+ veor q11,q11
+ mov r1,#16
+ veor q10,q10
+ mov r3,#16
+ veor d2,d2
+ vdup.8 d4,d28[0] @ broadcast lowest byte
+ b .Linner_neon
+.size gcm_gmult_neon,.-gcm_gmult_neon
+ gcm_ghash_neon
+.type gcm_ghash_neon,%function
+.align 4
+ vld1.64 d21,[r0,:64]! @ load Xi
+ vmov.i32 d5,#0xe1 @ our irreducible polynomial
+ vld1.64 d20,[r0,:64]!
+ vshr.u64 d5,#32
+ vldmia r0,{d0-d1} @ load H
+ veor q12,q12
+ nop
+#ifdef __ARMEL__
+ vrev64.8 q10,q10
+ vld1.64 d29,[r2]! @ load inp
+ veor q13,q13
+ vld1.64 d28,[r2]!
+ veor q11,q11
+ mov r1,#16
+#ifdef __ARMEL__
+ vrev64.8 q14,q14
+ veor d2,d2
+ veor q14,q10 @ inp^=Xi
+ veor q10,q10
+ vdup.8 d4,d28[0] @ broadcast lowest byte
+ subs r1,r1,#1
+ vmull.p8 q9,d1,d4 @ H.lo·Xi[i]
+ vmull.p8 q8,d0,d4 @ H.hi·Xi[i]
+ vext.8 q14,q12,#1 @ IN>>=8
+ veor q10,q13 @ modulo-scheduled part
+ vshl.i64 d22,#48
+ vdup.8 d4,d28[0] @ broadcast lowest byte
+ veor d3,d18,d20
+ veor d21,d22
+ vuzp.8 q9,q8
+ vsli.8 d2,d3,#1 @ compose the "carry" byte
+ vext.8 q10,q12,#1 @ Z>>=8
+ vmull.p8 q11,d2,d5 @ "carry"·0xe1
+ vshr.u8 d2,d3,#7 @ save Z's bottom bit
+ vext.8 q13,q9,q12,#1 @ Qlo>>=8
+ veor q10,q8
+ bne .Linner_neon
+ veor q10,q13 @ modulo-scheduled artefact
+ vshl.i64 d22,#48
+ veor d21,d22
+ @ finalization, normalize Z:Zo
+ vand d2,d5 @ suffices to mask the bit
+ vshr.u64 d3,d20,#63
+ vshl.i64 q10,#1
+ subs r3,#16
+ vorr q10,q1 @ Z=Z:Zo<<1
+ bne .Louter_neon
+#ifdef __ARMEL__
+ vrev64.8 q10,q10
+ sub r0,#16
+ vst1.64 d21,[r0,:64]! @ write out Xi
+ vst1.64 d20,[r0,:64]
+ .word 0xe12fff1e
+.size gcm_ghash_neon,.-gcm_ghash_neon
+.asciz "GHASH for ARMv4/NEON, CRYPTOGAMS by <>"
+.align 2
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
diff --git a/crypto/libressl/crypto/modes/ghash-elf-x86_64.S b/crypto/libressl/crypto/modes/ghash-elf-x86_64.S
new file mode 100644
index 0000000..5f31626
--- /dev/null
+++ b/crypto/libressl/crypto/modes/ghash-elf-x86_64.S
@@ -0,0 +1,1030 @@
+#include "x86_arch.h"
+.globl gcm_gmult_4bit
+.type gcm_gmult_4bit,@function
+.align 16
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ movzbq 15(%rdi),%r8
+ leaq .Lrem_4bit(%rip),%r11
+ xorq %rax,%rax
+ xorq %rbx,%rbx
+ movb %r8b,%al
+ movb %r8b,%bl
+ shlb $4,%al
+ movq $14,%rcx
+ movq 8(%rsi,%rax,1),%r8
+ movq (%rsi,%rax,1),%r9
+ andb $240,%bl
+ movq %r8,%rdx
+ jmp .Loop1
+.align 16
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ movb (%rdi,%rcx,1),%al
+ shrq $4,%r9
+ xorq 8(%rsi,%rbx,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rbx,1),%r9
+ movb %al,%bl
+ xorq (%r11,%rdx,8),%r9
+ movq %r8,%rdx
+ shlb $4,%al
+ xorq %r10,%r8
+ decq %rcx
+ js .Lbreak1
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ shrq $4,%r9
+ xorq 8(%rsi,%rax,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rax,1),%r9
+ andb $240,%bl
+ xorq (%r11,%rdx,8),%r9
+ movq %r8,%rdx
+ xorq %r10,%r8
+ jmp .Loop1
+.align 16
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ shrq $4,%r9
+ xorq 8(%rsi,%rax,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rax,1),%r9
+ andb $240,%bl
+ xorq (%r11,%rdx,8),%r9
+ movq %r8,%rdx
+ xorq %r10,%r8
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ shrq $4,%r9
+ xorq 8(%rsi,%rbx,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rbx,1),%r9
+ xorq %r10,%r8
+ xorq (%r11,%rdx,8),%r9
+ bswapq %r8
+ bswapq %r9
+ movq %r8,8(%rdi)
+ movq %r9,(%rdi)
+ movq 16(%rsp),%rbx
+ leaq 24(%rsp),%rsp
+ retq
+.size gcm_gmult_4bit,.-gcm_gmult_4bit
+.globl gcm_ghash_4bit
+.type gcm_ghash_4bit,@function
+.align 16
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $280,%rsp
+ movq %rdx,%r14
+ movq %rcx,%r15
+ subq $-128,%rsi
+ leaq 16+128(%rsp),%rbp
+ xorl %edx,%edx
+ movq 0+0-128(%rsi),%r8
+ movq 0+8-128(%rsi),%rax
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq 16+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq 16+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,0(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,0(%rbp)
+ movq 32+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,0-128(%rbp)
+ movq 32+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,1(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,8(%rbp)
+ movq 48+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,8-128(%rbp)
+ movq 48+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,2(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,16(%rbp)
+ movq 64+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,16-128(%rbp)
+ movq 64+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,3(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,24(%rbp)
+ movq 80+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,24-128(%rbp)
+ movq 80+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,4(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,32(%rbp)
+ movq 96+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,32-128(%rbp)
+ movq 96+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,5(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,40(%rbp)
+ movq 112+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,40-128(%rbp)
+ movq 112+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,6(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,48(%rbp)
+ movq 128+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,48-128(%rbp)
+ movq 128+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,7(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,56(%rbp)
+ movq 144+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,56-128(%rbp)
+ movq 144+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,8(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,64(%rbp)
+ movq 160+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,64-128(%rbp)
+ movq 160+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,9(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,72(%rbp)
+ movq 176+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,72-128(%rbp)
+ movq 176+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,10(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,80(%rbp)
+ movq 192+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,80-128(%rbp)
+ movq 192+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,11(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,88(%rbp)
+ movq 208+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,88-128(%rbp)
+ movq 208+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,12(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,96(%rbp)
+ movq 224+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,96-128(%rbp)
+ movq 224+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,13(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,104(%rbp)
+ movq 240+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,104-128(%rbp)
+ movq 240+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,14(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,112(%rbp)
+ shlb $4,%dl
+ movq %rax,112-128(%rbp)
+ shlq $60,%r10
+ movb %dl,15(%rsp)
+ orq %r10,%rbx
+ movq %r9,120(%rbp)
+ movq %rbx,120-128(%rbp)
+ addq $-128,%rsi
+ movq 8(%rdi),%r8
+ movq 0(%rdi),%r9
+ addq %r14,%r15
+ leaq .Lrem_8bit(%rip),%r11
+ jmp .Louter_loop
+.align 16
+ xorq (%r14),%r9
+ movq 8(%r14),%rdx
+ leaq 16(%r14),%r14
+ xorq %r8,%rdx
+ movq %r9,(%rdi)
+ movq %rdx,8(%rdi)
+ shrq $32,%rdx
+ xorq %rax,%rax
+ roll $8,%edx
+ movb %dl,%al
+ movzbl %dl,%ebx
+ shlb $4,%al
+ shrl $4,%ebx
+ roll $8,%edx
+ movq 8(%rsi,%rax,1),%r8
+ movq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ xorq %r8,%r12
+ movq %r9,%r10
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl 8(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl 4(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl 0(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ andl $240,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl -4(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ movzwq (%r11,%r12,2),%r12
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ shlq $48,%r12
+ xorq %r10,%r8
+ xorq %r12,%r9
+ movzbq %r8b,%r13
+ shrq $4,%r8
+ movq %r9,%r10
+ shlb $4,%r13b
+ shrq $4,%r9
+ xorq 8(%rsi,%rcx,1),%r8
+ movzwq (%r11,%r13,2),%r13
+ shlq $60,%r10
+ xorq (%rsi,%rcx,1),%r9
+ xorq %r10,%r8
+ shlq $48,%r13
+ bswapq %r8
+ xorq %r13,%r9
+ bswapq %r9
+ cmpq %r15,%r14
+ jb .Louter_loop
+ movq %r8,8(%rdi)
+ movq %r9,(%rdi)
+ leaq 280(%rsp),%rsi
+ movq 0(%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+ retq
+.size gcm_ghash_4bit,.-gcm_ghash_4bit
+.globl gcm_init_clmul
+.type gcm_init_clmul,@function
+.align 16
+ movdqu (%rsi),%xmm2
+ pshufd $78,%xmm2,%xmm2
+ pshufd $255,%xmm2,%xmm4
+ movdqa %xmm2,%xmm3
+ psllq $1,%xmm2
+ pxor %xmm5,%xmm5
+ psrlq $63,%xmm3
+ pcmpgtd %xmm4,%xmm5
+ pslldq $8,%xmm3
+ por %xmm3,%xmm2
+ pand .L0x1c2_polynomial(%rip),%xmm5
+ pxor %xmm5,%xmm2
+ movdqa %xmm2,%xmm0
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ movdqu %xmm2,(%rdi)
+ movdqu %xmm0,16(%rdi)
+ retq
+.size gcm_init_clmul,.-gcm_init_clmul
+.globl gcm_gmult_clmul
+.type gcm_gmult_clmul,@function
+.align 16
+ movdqu (%rdi),%xmm0
+ movdqa .Lbswap_mask(%rip),%xmm5
+ movdqu (%rsi),%xmm2
+.byte 102,15,56,0,197
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,197
+ movdqu %xmm0,(%rdi)
+ retq
+.size gcm_gmult_clmul,.-gcm_gmult_clmul
+.globl gcm_ghash_clmul
+.type gcm_ghash_clmul,@function
+.align 16
+ movdqa .Lbswap_mask(%rip),%xmm5
+ movdqu (%rdi),%xmm0
+ movdqu (%rsi),%xmm2
+.byte 102,15,56,0,197
+ subq $16,%rcx
+ jz .Lodd_tail
+ movdqu 16(%rsi),%xmm8
+ movdqu (%rdx),%xmm3
+ movdqu 16(%rdx),%xmm6
+.byte 102,15,56,0,221
+.byte 102,15,56,0,245
+ pxor %xmm3,%xmm0
+ movdqa %xmm6,%xmm7
+ pshufd $78,%xmm6,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm6,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,242,0
+.byte 102,15,58,68,250,17
+.byte 102,15,58,68,220,0
+ pxor %xmm6,%xmm3
+ pxor %xmm7,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm7
+ pxor %xmm4,%xmm6
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm8,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm8,%xmm4
+ leaq 32(%rdx),%rdx
+ subq $32,%rcx
+ jbe .Leven_tail
+.byte 102,65,15,58,68,192,0
+.byte 102,65,15,58,68,200,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ movdqu (%rdx),%xmm3
+ pxor %xmm6,%xmm0
+ pxor %xmm7,%xmm1
+ movdqu 16(%rdx),%xmm6
+.byte 102,15,56,0,221
+.byte 102,15,56,0,245
+ movdqa %xmm6,%xmm7
+ pshufd $78,%xmm6,%xmm9
+ pshufd $78,%xmm2,%xmm10
+ pxor %xmm6,%xmm9
+ pxor %xmm2,%xmm10
+ pxor %xmm3,%xmm1
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+.byte 102,15,58,68,242,0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+.byte 102,15,58,68,250,17
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+.byte 102,69,15,58,68,202,0
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm8,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm8,%xmm4
+ pxor %xmm6,%xmm9
+ pxor %xmm7,%xmm9
+ movdqa %xmm9,%xmm10
+ psrldq $8,%xmm9
+ pslldq $8,%xmm10
+ pxor %xmm9,%xmm7
+ pxor %xmm10,%xmm6
+ leaq 32(%rdx),%rdx
+ subq $32,%rcx
+ ja .Lmod_loop
+.byte 102,65,15,58,68,192,0
+.byte 102,65,15,58,68,200,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm6,%xmm0
+ pxor %xmm7,%xmm1
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ testq %rcx,%rcx
+ jnz .Ldone
+ movdqu (%rdx),%xmm3
+.byte 102,15,56,0,221
+ pxor %xmm3,%xmm0
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,197
+ movdqu %xmm0,(%rdi)
+ retq
+.size gcm_ghash_clmul,.-gcm_ghash_clmul
+.align 64
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
+.align 64
+.type .Lrem_4bit,@object
+.long 0,0,0,471859200,0,943718400,0,610271232
+.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
+.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
+.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
+.type .Lrem_8bit,@object
+.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
+.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
+.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
+.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
+.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
+.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
+.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
+.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
+.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
+.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
+.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
+.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
+.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
+.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
+.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
+.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
+.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
+.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
+.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
+.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
+.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
+.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
+.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
+.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
+.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
+.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
+.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
+.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
+.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
+.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
+.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
+.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
+.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
diff --git a/crypto/libressl/crypto/modes/ghash-macosx-x86_64.S b/crypto/libressl/crypto/modes/ghash-macosx-x86_64.S
new file mode 100644
index 0000000..e6840a7
--- /dev/null
+++ b/crypto/libressl/crypto/modes/ghash-macosx-x86_64.S
@@ -0,0 +1,1027 @@
+#include "x86_arch.h"
+.globl _gcm_gmult_4bit
+.p2align 4
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ movzbq 15(%rdi),%r8
+ leaq L$rem_4bit(%rip),%r11
+ xorq %rax,%rax
+ xorq %rbx,%rbx
+ movb %r8b,%al
+ movb %r8b,%bl
+ shlb $4,%al
+ movq $14,%rcx
+ movq 8(%rsi,%rax,1),%r8
+ movq (%rsi,%rax,1),%r9
+ andb $240,%bl
+ movq %r8,%rdx
+ jmp L$oop1
+.p2align 4
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ movb (%rdi,%rcx,1),%al
+ shrq $4,%r9
+ xorq 8(%rsi,%rbx,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rbx,1),%r9
+ movb %al,%bl
+ xorq (%r11,%rdx,8),%r9
+ movq %r8,%rdx
+ shlb $4,%al
+ xorq %r10,%r8
+ decq %rcx
+ js L$break1
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ shrq $4,%r9
+ xorq 8(%rsi,%rax,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rax,1),%r9
+ andb $240,%bl
+ xorq (%r11,%rdx,8),%r9
+ movq %r8,%rdx
+ xorq %r10,%r8
+ jmp L$oop1
+.p2align 4
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ shrq $4,%r9
+ xorq 8(%rsi,%rax,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rax,1),%r9
+ andb $240,%bl
+ xorq (%r11,%rdx,8),%r9
+ movq %r8,%rdx
+ xorq %r10,%r8
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ shrq $4,%r9
+ xorq 8(%rsi,%rbx,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rbx,1),%r9
+ xorq %r10,%r8
+ xorq (%r11,%rdx,8),%r9
+ bswapq %r8
+ bswapq %r9
+ movq %r8,8(%rdi)
+ movq %r9,(%rdi)
+ movq 16(%rsp),%rbx
+ leaq 24(%rsp),%rsp
+ retq
+.globl _gcm_ghash_4bit
+.p2align 4
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $280,%rsp
+ movq %rdx,%r14
+ movq %rcx,%r15
+ subq $-128,%rsi
+ leaq 16+128(%rsp),%rbp
+ xorl %edx,%edx
+ movq 0+0-128(%rsi),%r8
+ movq 0+8-128(%rsi),%rax
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq 16+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq 16+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,0(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,0(%rbp)
+ movq 32+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,0-128(%rbp)
+ movq 32+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,1(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,8(%rbp)
+ movq 48+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,8-128(%rbp)
+ movq 48+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,2(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,16(%rbp)
+ movq 64+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,16-128(%rbp)
+ movq 64+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,3(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,24(%rbp)
+ movq 80+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,24-128(%rbp)
+ movq 80+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,4(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,32(%rbp)
+ movq 96+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,32-128(%rbp)
+ movq 96+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,5(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,40(%rbp)
+ movq 112+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,40-128(%rbp)
+ movq 112+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,6(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,48(%rbp)
+ movq 128+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,48-128(%rbp)
+ movq 128+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,7(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,56(%rbp)
+ movq 144+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,56-128(%rbp)
+ movq 144+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,8(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,64(%rbp)
+ movq 160+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,64-128(%rbp)
+ movq 160+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,9(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,72(%rbp)
+ movq 176+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,72-128(%rbp)
+ movq 176+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,10(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,80(%rbp)
+ movq 192+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,80-128(%rbp)
+ movq 192+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,11(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,88(%rbp)
+ movq 208+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,88-128(%rbp)
+ movq 208+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,12(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,96(%rbp)
+ movq 224+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,96-128(%rbp)
+ movq 224+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,13(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,104(%rbp)
+ movq 240+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,104-128(%rbp)
+ movq 240+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,14(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,112(%rbp)
+ shlb $4,%dl
+ movq %rax,112-128(%rbp)
+ shlq $60,%r10
+ movb %dl,15(%rsp)
+ orq %r10,%rbx
+ movq %r9,120(%rbp)
+ movq %rbx,120-128(%rbp)
+ addq $-128,%rsi
+ movq 8(%rdi),%r8
+ movq 0(%rdi),%r9
+ addq %r14,%r15
+ leaq L$rem_8bit(%rip),%r11
+ jmp L$outer_loop
+.p2align 4
+ xorq (%r14),%r9
+ movq 8(%r14),%rdx
+ leaq 16(%r14),%r14
+ xorq %r8,%rdx
+ movq %r9,(%rdi)
+ movq %rdx,8(%rdi)
+ shrq $32,%rdx
+ xorq %rax,%rax
+ roll $8,%edx
+ movb %dl,%al
+ movzbl %dl,%ebx
+ shlb $4,%al
+ shrl $4,%ebx
+ roll $8,%edx
+ movq 8(%rsi,%rax,1),%r8
+ movq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ xorq %r8,%r12
+ movq %r9,%r10
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl 8(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl 4(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl 0(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ andl $240,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl -4(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ movzwq (%r11,%r12,2),%r12
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ shlq $48,%r12
+ xorq %r10,%r8
+ xorq %r12,%r9
+ movzbq %r8b,%r13
+ shrq $4,%r8
+ movq %r9,%r10
+ shlb $4,%r13b
+ shrq $4,%r9
+ xorq 8(%rsi,%rcx,1),%r8
+ movzwq (%r11,%r13,2),%r13
+ shlq $60,%r10
+ xorq (%rsi,%rcx,1),%r9
+ xorq %r10,%r8
+ shlq $48,%r13
+ bswapq %r8
+ xorq %r13,%r9
+ bswapq %r9
+ cmpq %r15,%r14
+ jb L$outer_loop
+ movq %r8,8(%rdi)
+ movq %r9,(%rdi)
+ leaq 280(%rsp),%rsi
+ movq 0(%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+ retq
+.globl _gcm_init_clmul
+.p2align 4
+ movdqu (%rsi),%xmm2
+ pshufd $78,%xmm2,%xmm2
+ pshufd $255,%xmm2,%xmm4
+ movdqa %xmm2,%xmm3
+ psllq $1,%xmm2
+ pxor %xmm5,%xmm5
+ psrlq $63,%xmm3
+ pcmpgtd %xmm4,%xmm5
+ pslldq $8,%xmm3
+ por %xmm3,%xmm2
+ pand L$0x1c2_polynomial(%rip),%xmm5
+ pxor %xmm5,%xmm2
+ movdqa %xmm2,%xmm0
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ movdqu %xmm2,(%rdi)
+ movdqu %xmm0,16(%rdi)
+ retq
+.globl _gcm_gmult_clmul
+.p2align 4
+ movdqu (%rdi),%xmm0
+ movdqa L$bswap_mask(%rip),%xmm5
+ movdqu (%rsi),%xmm2
+.byte 102,15,56,0,197
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,197
+ movdqu %xmm0,(%rdi)
+ retq
+.globl _gcm_ghash_clmul
+.p2align 4
+ movdqa L$bswap_mask(%rip),%xmm5
+ movdqu (%rdi),%xmm0
+ movdqu (%rsi),%xmm2
+.byte 102,15,56,0,197
+ subq $16,%rcx
+ jz L$odd_tail
+ movdqu 16(%rsi),%xmm8
+ movdqu (%rdx),%xmm3
+ movdqu 16(%rdx),%xmm6
+.byte 102,15,56,0,221
+.byte 102,15,56,0,245
+ pxor %xmm3,%xmm0
+ movdqa %xmm6,%xmm7
+ pshufd $78,%xmm6,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm6,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,242,0
+.byte 102,15,58,68,250,17
+.byte 102,15,58,68,220,0
+ pxor %xmm6,%xmm3
+ pxor %xmm7,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm7
+ pxor %xmm4,%xmm6
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm8,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm8,%xmm4
+ leaq 32(%rdx),%rdx
+ subq $32,%rcx
+ jbe L$even_tail
+.byte 102,65,15,58,68,192,0
+.byte 102,65,15,58,68,200,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ movdqu (%rdx),%xmm3
+ pxor %xmm6,%xmm0
+ pxor %xmm7,%xmm1
+ movdqu 16(%rdx),%xmm6
+.byte 102,15,56,0,221
+.byte 102,15,56,0,245
+ movdqa %xmm6,%xmm7
+ pshufd $78,%xmm6,%xmm9
+ pshufd $78,%xmm2,%xmm10
+ pxor %xmm6,%xmm9
+ pxor %xmm2,%xmm10
+ pxor %xmm3,%xmm1
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+.byte 102,15,58,68,242,0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+.byte 102,15,58,68,250,17
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+.byte 102,69,15,58,68,202,0
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm8,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm8,%xmm4
+ pxor %xmm6,%xmm9
+ pxor %xmm7,%xmm9
+ movdqa %xmm9,%xmm10
+ psrldq $8,%xmm9
+ pslldq $8,%xmm10
+ pxor %xmm9,%xmm7
+ pxor %xmm10,%xmm6
+ leaq 32(%rdx),%rdx
+ subq $32,%rcx
+ ja L$mod_loop
+.byte 102,65,15,58,68,192,0
+.byte 102,65,15,58,68,200,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm6,%xmm0
+ pxor %xmm7,%xmm1
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ testq %rcx,%rcx
+ jnz L$done
+ movdqu (%rdx),%xmm3
+.byte 102,15,56,0,221
+ pxor %xmm3,%xmm0
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,197
+ movdqu %xmm0,(%rdi)
+ retq
+.p2align 6
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
+.p2align 6
+.long 0,0,0,471859200,0,943718400,0,610271232
+.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
+.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
+.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
+.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
+.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
+.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
+.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
+.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
+.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
+.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
+.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
+.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
+.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
+.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
+.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
+.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
+.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
+.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
+.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
+.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
+.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
+.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
+.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
+.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
+.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
+.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
+.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
+.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
+.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
+.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
+.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
+.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
+.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
+.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
+.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
+.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align 6
diff --git a/crypto/libressl/crypto/modes/ghash-masm-x86_64.S b/crypto/libressl/crypto/modes/ghash-masm-x86_64.S
new file mode 100644
index 0000000..ffdc1b5
--- /dev/null
+++ b/crypto/libressl/crypto/modes/ghash-masm-x86_64.S
@@ -0,0 +1,1256 @@
+; 1 "crypto/modes/ghash-masm-x86_64.S.tmp"
+; 1 "<built-in>" 1
+; 1 "<built-in>" 3
+; 340 "<built-in>" 3
+; 1 "<command line>" 1
+; 1 "<built-in>" 2
+; 1 "crypto/modes/ghash-masm-x86_64.S.tmp" 2
+; 1 "./crypto/x86_arch.h" 1
+; 16 "./crypto/x86_arch.h"
+; 40 "./crypto/x86_arch.h"
+; 3 "crypto/modes/ghash-masm-x86_64.S.tmp" 2
+.text$ SEGMENT ALIGN(64) 'CODE'
+PUBLIC gcm_gmult_4bit
+gcm_gmult_4bit PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+ mov rdi,rcx
+ mov rsi,rdx
+ push rbx
+ push rbp
+ push r12
+ movzx r8,BYTE PTR[15+rdi]
+ lea r11,QWORD PTR[$L$rem_4bit]
+ xor rax,rax
+ xor rbx,rbx
+ mov al,r8b
+ mov bl,r8b
+ shl al,4
+ mov rcx,14
+ mov r8,QWORD PTR[8+rax*1+rsi]
+ mov r9,QWORD PTR[rax*1+rsi]
+ and bl,0f0h
+ mov rdx,r8
+ jmp $L$oop1
+ shr r8,4
+ and rdx,0fh
+ mov r10,r9
+ mov al,BYTE PTR[rcx*1+rdi]
+ shr r9,4
+ xor r8,QWORD PTR[8+rbx*1+rsi]
+ shl r10,60
+ xor r9,QWORD PTR[rbx*1+rsi]
+ mov bl,al
+ xor r9,QWORD PTR[rdx*8+r11]
+ mov rdx,r8
+ shl al,4
+ xor r8,r10
+ dec rcx
+ js $L$break1
+ shr r8,4
+ and rdx,0fh
+ mov r10,r9
+ shr r9,4
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ shl r10,60
+ xor r9,QWORD PTR[rax*1+rsi]
+ and bl,0f0h
+ xor r9,QWORD PTR[rdx*8+r11]
+ mov rdx,r8
+ xor r8,r10
+ jmp $L$oop1
+ shr r8,4
+ and rdx,0fh
+ mov r10,r9
+ shr r9,4
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ shl r10,60
+ xor r9,QWORD PTR[rax*1+rsi]
+ and bl,0f0h
+ xor r9,QWORD PTR[rdx*8+r11]
+ mov rdx,r8
+ xor r8,r10
+ shr r8,4
+ and rdx,0fh
+ mov r10,r9
+ shr r9,4
+ xor r8,QWORD PTR[8+rbx*1+rsi]
+ shl r10,60
+ xor r9,QWORD PTR[rbx*1+rsi]
+ xor r8,r10
+ xor r9,QWORD PTR[rdx*8+r11]
+ bswap r8
+ bswap r9
+ mov QWORD PTR[8+rdi],r8
+ mov QWORD PTR[rdi],r9
+ mov rbx,QWORD PTR[16+rsp]
+ lea rsp,QWORD PTR[24+rsp]
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+gcm_gmult_4bit ENDP
+PUBLIC gcm_ghash_4bit
+gcm_ghash_4bit PROC PUBLIC
+ mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
+ mov QWORD PTR[16+rsp],rsi
+ mov rax,rsp
+ mov rdi,rcx
+ mov rsi,rdx
+ mov rdx,r8
+ mov rcx,r9
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ sub rsp,280
+ mov r14,rdx
+ mov r15,rcx
+ sub rsi,-128
+ lea rbp,QWORD PTR[((16+128))+rsp]
+ xor edx,edx
+ mov r8,QWORD PTR[((0+0-128))+rsi]
+ mov rax,QWORD PTR[((0+8-128))+rsi]
+ mov dl,al
+ shr rax,4
+ mov r10,r8
+ shr r8,4
+ mov r9,QWORD PTR[((16+0-128))+rsi]
+ shl dl,4
+ mov rbx,QWORD PTR[((16+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[rsp],dl
+ or rax,r10
+ mov dl,bl
+ shr rbx,4
+ mov r10,r9
+ shr r9,4
+ mov QWORD PTR[rbp],r8
+ mov r8,QWORD PTR[((32+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((0-128))+rbp],rax
+ mov rax,QWORD PTR[((32+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[1+rsp],dl
+ or rbx,r10
+ mov dl,al
+ shr rax,4
+ mov r10,r8
+ shr r8,4
+ mov QWORD PTR[8+rbp],r9
+ mov r9,QWORD PTR[((48+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((8-128))+rbp],rbx
+ mov rbx,QWORD PTR[((48+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[2+rsp],dl
+ or rax,r10
+ mov dl,bl
+ shr rbx,4
+ mov r10,r9
+ shr r9,4
+ mov QWORD PTR[16+rbp],r8
+ mov r8,QWORD PTR[((64+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((16-128))+rbp],rax
+ mov rax,QWORD PTR[((64+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[3+rsp],dl
+ or rbx,r10
+ mov dl,al
+ shr rax,4
+ mov r10,r8
+ shr r8,4
+ mov QWORD PTR[24+rbp],r9
+ mov r9,QWORD PTR[((80+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((24-128))+rbp],rbx
+ mov rbx,QWORD PTR[((80+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[4+rsp],dl
+ or rax,r10
+ mov dl,bl
+ shr rbx,4
+ mov r10,r9
+ shr r9,4
+ mov QWORD PTR[32+rbp],r8
+ mov r8,QWORD PTR[((96+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((32-128))+rbp],rax
+ mov rax,QWORD PTR[((96+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[5+rsp],dl
+ or rbx,r10
+ mov dl,al
+ shr rax,4
+ mov r10,r8
+ shr r8,4
+ mov QWORD PTR[40+rbp],r9
+ mov r9,QWORD PTR[((112+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((40-128))+rbp],rbx
+ mov rbx,QWORD PTR[((112+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[6+rsp],dl
+ or rax,r10
+ mov dl,bl
+ shr rbx,4
+ mov r10,r9
+ shr r9,4
+ mov QWORD PTR[48+rbp],r8
+ mov r8,QWORD PTR[((128+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((48-128))+rbp],rax
+ mov rax,QWORD PTR[((128+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[7+rsp],dl
+ or rbx,r10
+ mov dl,al
+ shr rax,4
+ mov r10,r8
+ shr r8,4
+ mov QWORD PTR[56+rbp],r9
+ mov r9,QWORD PTR[((144+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((56-128))+rbp],rbx
+ mov rbx,QWORD PTR[((144+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[8+rsp],dl
+ or rax,r10
+ mov dl,bl
+ shr rbx,4
+ mov r10,r9
+ shr r9,4
+ mov QWORD PTR[64+rbp],r8
+ mov r8,QWORD PTR[((160+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((64-128))+rbp],rax
+ mov rax,QWORD PTR[((160+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[9+rsp],dl
+ or rbx,r10
+ mov dl,al
+ shr rax,4
+ mov r10,r8
+ shr r8,4
+ mov QWORD PTR[72+rbp],r9
+ mov r9,QWORD PTR[((176+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((72-128))+rbp],rbx
+ mov rbx,QWORD PTR[((176+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[10+rsp],dl
+ or rax,r10
+ mov dl,bl
+ shr rbx,4
+ mov r10,r9
+ shr r9,4
+ mov QWORD PTR[80+rbp],r8
+ mov r8,QWORD PTR[((192+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((80-128))+rbp],rax
+ mov rax,QWORD PTR[((192+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[11+rsp],dl
+ or rbx,r10
+ mov dl,al
+ shr rax,4
+ mov r10,r8
+ shr r8,4
+ mov QWORD PTR[88+rbp],r9
+ mov r9,QWORD PTR[((208+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((88-128))+rbp],rbx
+ mov rbx,QWORD PTR[((208+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[12+rsp],dl
+ or rax,r10
+ mov dl,bl
+ shr rbx,4
+ mov r10,r9
+ shr r9,4
+ mov QWORD PTR[96+rbp],r8
+ mov r8,QWORD PTR[((224+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((96-128))+rbp],rax
+ mov rax,QWORD PTR[((224+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[13+rsp],dl
+ or rbx,r10
+ mov dl,al
+ shr rax,4
+ mov r10,r8
+ shr r8,4
+ mov QWORD PTR[104+rbp],r9
+ mov r9,QWORD PTR[((240+0-128))+rsi]
+ shl dl,4
+ mov QWORD PTR[((104-128))+rbp],rbx
+ mov rbx,QWORD PTR[((240+8-128))+rsi]
+ shl r10,60
+ mov BYTE PTR[14+rsp],dl
+ or rax,r10
+ mov dl,bl
+ shr rbx,4
+ mov r10,r9
+ shr r9,4
+ mov QWORD PTR[112+rbp],r8
+ shl dl,4
+ mov QWORD PTR[((112-128))+rbp],rax
+ shl r10,60
+ mov BYTE PTR[15+rsp],dl
+ or rbx,r10
+ mov QWORD PTR[120+rbp],r9
+ mov QWORD PTR[((120-128))+rbp],rbx
+ add rsi,-128
+ mov r8,QWORD PTR[8+rdi]
+ mov r9,QWORD PTR[rdi]
+ add r15,r14
+ lea r11,QWORD PTR[$L$rem_8bit]
+ jmp $L$outer_loop
+ xor r9,QWORD PTR[r14]
+ mov rdx,QWORD PTR[8+r14]
+ lea r14,QWORD PTR[16+r14]
+ xor rdx,r8
+ mov QWORD PTR[rdi],r9
+ mov QWORD PTR[8+rdi],rdx
+ shr rdx,32
+ xor rax,rax
+ rol edx,8
+ mov al,dl
+ movzx ebx,dl
+ shl al,4
+ shr ebx,4
+ rol edx,8
+ mov r8,QWORD PTR[8+rax*1+rsi]
+ mov r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ movzx ecx,dl
+ shl al,4
+ movzx r12,BYTE PTR[rbx*1+rsp]
+ shr ecx,4
+ xor r12,r8
+ mov r10,r9
+ shr r8,8
+ movzx r12,r12b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rbx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rbx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r12,WORD PTR[r12*2+r11]
+ movzx ebx,dl
+ shl al,4
+ movzx r13,BYTE PTR[rcx*1+rsp]
+ shr ebx,4
+ shl r12,48
+ xor r13,r8
+ mov r10,r9
+ xor r9,r12
+ shr r8,8
+ movzx r13,r13b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rcx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rcx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r13,WORD PTR[r13*2+r11]
+ movzx ecx,dl
+ shl al,4
+ movzx r12,BYTE PTR[rbx*1+rsp]
+ shr ecx,4
+ shl r13,48
+ xor r12,r8
+ mov r10,r9
+ xor r9,r13
+ shr r8,8
+ movzx r12,r12b
+ mov edx,DWORD PTR[8+rdi]
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rbx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rbx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r12,WORD PTR[r12*2+r11]
+ movzx ebx,dl
+ shl al,4
+ movzx r13,BYTE PTR[rcx*1+rsp]
+ shr ebx,4
+ shl r12,48
+ xor r13,r8
+ mov r10,r9
+ xor r9,r12
+ shr r8,8
+ movzx r13,r13b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rcx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rcx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r13,WORD PTR[r13*2+r11]
+ movzx ecx,dl
+ shl al,4
+ movzx r12,BYTE PTR[rbx*1+rsp]
+ shr ecx,4
+ shl r13,48
+ xor r12,r8
+ mov r10,r9
+ xor r9,r13
+ shr r8,8
+ movzx r12,r12b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rbx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rbx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r12,WORD PTR[r12*2+r11]
+ movzx ebx,dl
+ shl al,4
+ movzx r13,BYTE PTR[rcx*1+rsp]
+ shr ebx,4
+ shl r12,48
+ xor r13,r8
+ mov r10,r9
+ xor r9,r12
+ shr r8,8
+ movzx r13,r13b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rcx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rcx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r13,WORD PTR[r13*2+r11]
+ movzx ecx,dl
+ shl al,4
+ movzx r12,BYTE PTR[rbx*1+rsp]
+ shr ecx,4
+ shl r13,48
+ xor r12,r8
+ mov r10,r9
+ xor r9,r13
+ shr r8,8
+ movzx r12,r12b
+ mov edx,DWORD PTR[4+rdi]
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rbx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rbx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r12,WORD PTR[r12*2+r11]
+ movzx ebx,dl
+ shl al,4
+ movzx r13,BYTE PTR[rcx*1+rsp]
+ shr ebx,4
+ shl r12,48
+ xor r13,r8
+ mov r10,r9
+ xor r9,r12
+ shr r8,8
+ movzx r13,r13b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rcx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rcx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r13,WORD PTR[r13*2+r11]
+ movzx ecx,dl
+ shl al,4
+ movzx r12,BYTE PTR[rbx*1+rsp]
+ shr ecx,4
+ shl r13,48
+ xor r12,r8
+ mov r10,r9
+ xor r9,r13
+ shr r8,8
+ movzx r12,r12b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rbx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rbx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r12,WORD PTR[r12*2+r11]
+ movzx ebx,dl
+ shl al,4
+ movzx r13,BYTE PTR[rcx*1+rsp]
+ shr ebx,4
+ shl r12,48
+ xor r13,r8
+ mov r10,r9
+ xor r9,r12
+ shr r8,8
+ movzx r13,r13b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rcx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rcx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r13,WORD PTR[r13*2+r11]
+ movzx ecx,dl
+ shl al,4
+ movzx r12,BYTE PTR[rbx*1+rsp]
+ shr ecx,4
+ shl r13,48
+ xor r12,r8
+ mov r10,r9
+ xor r9,r13
+ shr r8,8
+ movzx r12,r12b
+ mov edx,DWORD PTR[rdi]
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rbx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rbx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r12,WORD PTR[r12*2+r11]
+ movzx ebx,dl
+ shl al,4
+ movzx r13,BYTE PTR[rcx*1+rsp]
+ shr ebx,4
+ shl r12,48
+ xor r13,r8
+ mov r10,r9
+ xor r9,r12
+ shr r8,8
+ movzx r13,r13b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rcx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rcx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r13,WORD PTR[r13*2+r11]
+ movzx ecx,dl
+ shl al,4
+ movzx r12,BYTE PTR[rbx*1+rsp]
+ shr ecx,4
+ shl r13,48
+ xor r12,r8
+ mov r10,r9
+ xor r9,r13
+ shr r8,8
+ movzx r12,r12b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rbx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rbx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r12,WORD PTR[r12*2+r11]
+ movzx ebx,dl
+ shl al,4
+ movzx r13,BYTE PTR[rcx*1+rsp]
+ shr ebx,4
+ shl r12,48
+ xor r13,r8
+ mov r10,r9
+ xor r9,r12
+ shr r8,8
+ movzx r13,r13b
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rcx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rcx*8+rbp]
+ rol edx,8
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ mov al,dl
+ xor r8,r10
+ movzx r13,WORD PTR[r13*2+r11]
+ movzx ecx,dl
+ shl al,4
+ movzx r12,BYTE PTR[rbx*1+rsp]
+ and ecx,240
+ shl r13,48
+ xor r12,r8
+ mov r10,r9
+ xor r9,r13
+ shr r8,8
+ movzx r12,r12b
+ mov edx,DWORD PTR[((-4))+rdi]
+ shr r9,8
+ xor r8,QWORD PTR[((-128))+rbx*8+rbp]
+ shl r10,56
+ xor r9,QWORD PTR[rbx*8+rbp]
+ movzx r12,WORD PTR[r12*2+r11]
+ xor r8,QWORD PTR[8+rax*1+rsi]
+ xor r9,QWORD PTR[rax*1+rsi]
+ shl r12,48
+ xor r8,r10
+ xor r9,r12
+ movzx r13,r8b
+ shr r8,4
+ mov r10,r9
+ shl r13b,4
+ shr r9,4
+ xor r8,QWORD PTR[8+rcx*1+rsi]
+ movzx r13,WORD PTR[r13*2+r11]
+ shl r10,60
+ xor r9,QWORD PTR[rcx*1+rsi]
+ xor r8,r10
+ shl r13,48
+ bswap r8
+ xor r9,r13
+ bswap r9
+ cmp r14,r15
+ jb $L$outer_loop
+ mov QWORD PTR[8+rdi],r8
+ mov QWORD PTR[rdi],r9
+ lea rsi,QWORD PTR[280+rsp]
+ mov r15,QWORD PTR[rsi]
+ mov r14,QWORD PTR[8+rsi]
+ mov r13,QWORD PTR[16+rsi]
+ mov r12,QWORD PTR[24+rsi]
+ mov rbp,QWORD PTR[32+rsi]
+ mov rbx,QWORD PTR[40+rsi]
+ lea rsp,QWORD PTR[48+rsi]
+ mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
+ mov rsi,QWORD PTR[16+rsp]
+ DB 0F3h,0C3h ;repret
+gcm_ghash_4bit ENDP
+PUBLIC gcm_init_clmul
+gcm_init_clmul PROC PUBLIC
+ movdqu xmm2,XMMWORD PTR[rdx]
+ pshufd xmm2,xmm2,78
+ pshufd xmm4,xmm2,255
+ movdqa xmm3,xmm2
+ psllq xmm2,1
+ pxor xmm5,xmm5
+ psrlq xmm3,63
+ pcmpgtd xmm5,xmm4
+ pslldq xmm3,8
+ por xmm2,xmm3
+ pand xmm5,XMMWORD PTR[$L$0x1c2_polynomial]
+ pxor xmm2,xmm5
+ movdqa xmm0,xmm2
+ movdqa xmm1,xmm0
+ pshufd xmm3,xmm0,78
+ pshufd xmm4,xmm2,78
+ pxor xmm3,xmm0
+ pxor xmm4,xmm2
+DB 102,15,58,68,194,0
+DB 102,15,58,68,202,17
+DB 102,15,58,68,220,0
+ pxor xmm3,xmm0
+ pxor xmm3,xmm1
+ movdqa xmm4,xmm3
+ psrldq xmm3,8
+ pslldq xmm4,8
+ pxor xmm1,xmm3
+ pxor xmm0,xmm4
+ movdqa xmm3,xmm0
+ psllq xmm0,1
+ pxor xmm0,xmm3
+ psllq xmm0,5
+ pxor xmm0,xmm3
+ psllq xmm0,57
+ movdqa xmm4,xmm0
+ pslldq xmm0,8
+ psrldq xmm4,8
+ pxor xmm0,xmm3
+ pxor xmm1,xmm4
+ movdqa xmm4,xmm0
+ psrlq xmm0,5
+ pxor xmm0,xmm4
+ psrlq xmm0,1
+ pxor xmm0,xmm4
+ pxor xmm4,xmm1
+ psrlq xmm0,1
+ pxor xmm0,xmm4
+ movdqu XMMWORD PTR[rcx],xmm2
+ movdqu XMMWORD PTR[16+rcx],xmm0
+ DB 0F3h,0C3h ;repret
+gcm_init_clmul ENDP
+PUBLIC gcm_gmult_clmul
+gcm_gmult_clmul PROC PUBLIC
+ movdqu xmm0,XMMWORD PTR[rcx]
+ movdqa xmm5,XMMWORD PTR[$L$bswap_mask]
+ movdqu xmm2,XMMWORD PTR[rdx]
+DB 102,15,56,0,197
+ movdqa xmm1,xmm0
+ pshufd xmm3,xmm0,78
+ pshufd xmm4,xmm2,78
+ pxor xmm3,xmm0
+ pxor xmm4,xmm2
+DB 102,15,58,68,194,0
+DB 102,15,58,68,202,17
+DB 102,15,58,68,220,0
+ pxor xmm3,xmm0
+ pxor xmm3,xmm1
+ movdqa xmm4,xmm3
+ psrldq xmm3,8
+ pslldq xmm4,8
+ pxor xmm1,xmm3
+ pxor xmm0,xmm4
+ movdqa xmm3,xmm0
+ psllq xmm0,1
+ pxor xmm0,xmm3
+ psllq xmm0,5
+ pxor xmm0,xmm3
+ psllq xmm0,57
+ movdqa xmm4,xmm0
+ pslldq xmm0,8
+ psrldq xmm4,8
+ pxor xmm0,xmm3
+ pxor xmm1,xmm4
+ movdqa xmm4,xmm0
+ psrlq xmm0,5
+ pxor xmm0,xmm4
+ psrlq xmm0,1
+ pxor xmm0,xmm4
+ pxor xmm4,xmm1
+ psrlq xmm0,1
+ pxor xmm0,xmm4
+DB 102,15,56,0,197
+ movdqu XMMWORD PTR[rcx],xmm0
+ DB 0F3h,0C3h ;repret
+gcm_gmult_clmul ENDP
+PUBLIC gcm_ghash_clmul
+gcm_ghash_clmul PROC PUBLIC
+DB 048h,083h,0ech,058h
+DB 00fh,029h,034h,024h
+DB 00fh,029h,07ch,024h,010h
+DB 044h,00fh,029h,044h,024h,020h
+DB 044h,00fh,029h,04ch,024h,030h
+DB 044h,00fh,029h,054h,024h,040h
+ movdqa xmm5,XMMWORD PTR[$L$bswap_mask]
+ movdqu xmm0,XMMWORD PTR[rcx]
+ movdqu xmm2,XMMWORD PTR[rdx]
+DB 102,15,56,0,197
+ sub r9,010h
+ jz $L$odd_tail
+ movdqu xmm8,XMMWORD PTR[16+rdx]
+ movdqu xmm3,XMMWORD PTR[r8]
+ movdqu xmm6,XMMWORD PTR[16+r8]
+DB 102,15,56,0,221
+DB 102,15,56,0,245
+ pxor xmm0,xmm3
+ movdqa xmm7,xmm6
+ pshufd xmm3,xmm6,78
+ pshufd xmm4,xmm2,78
+ pxor xmm3,xmm6
+ pxor xmm4,xmm2
+DB 102,15,58,68,242,0
+DB 102,15,58,68,250,17
+DB 102,15,58,68,220,0
+ pxor xmm3,xmm6
+ pxor xmm3,xmm7
+ movdqa xmm4,xmm3
+ psrldq xmm3,8
+ pslldq xmm4,8
+ pxor xmm7,xmm3
+ pxor xmm6,xmm4
+ movdqa xmm1,xmm0
+ pshufd xmm3,xmm0,78
+ pshufd xmm4,xmm8,78
+ pxor xmm3,xmm0
+ pxor xmm4,xmm8
+ lea r8,QWORD PTR[32+r8]
+ sub r9,020h
+ jbe $L$even_tail
+DB 102,65,15,58,68,192,0
+DB 102,65,15,58,68,200,17
+DB 102,15,58,68,220,0
+ pxor xmm3,xmm0
+ pxor xmm3,xmm1
+ movdqa xmm4,xmm3
+ psrldq xmm3,8
+ pslldq xmm4,8
+ pxor xmm1,xmm3
+ pxor xmm0,xmm4
+ movdqu xmm3,XMMWORD PTR[r8]
+ pxor xmm0,xmm6
+ pxor xmm1,xmm7
+ movdqu xmm6,XMMWORD PTR[16+r8]
+DB 102,15,56,0,221
+DB 102,15,56,0,245
+ movdqa xmm7,xmm6
+ pshufd xmm9,xmm6,78
+ pshufd xmm10,xmm2,78
+ pxor xmm9,xmm6
+ pxor xmm10,xmm2
+ pxor xmm1,xmm3
+ movdqa xmm3,xmm0
+ psllq xmm0,1
+ pxor xmm0,xmm3
+ psllq xmm0,5
+ pxor xmm0,xmm3
+DB 102,15,58,68,242,0
+ psllq xmm0,57
+ movdqa xmm4,xmm0
+ pslldq xmm0,8
+ psrldq xmm4,8
+ pxor xmm0,xmm3
+ pxor xmm1,xmm4
+DB 102,15,58,68,250,17
+ movdqa xmm4,xmm0
+ psrlq xmm0,5
+ pxor xmm0,xmm4
+ psrlq xmm0,1
+ pxor xmm0,xmm4
+ pxor xmm4,xmm1
+ psrlq xmm0,1
+ pxor xmm0,xmm4
+DB 102,69,15,58,68,202,0
+ movdqa xmm1,xmm0
+ pshufd xmm3,xmm0,78
+ pshufd xmm4,xmm8,78
+ pxor xmm3,xmm0
+ pxor xmm4,xmm8
+ pxor xmm9,xmm6
+ pxor xmm9,xmm7
+ movdqa xmm10,xmm9
+ psrldq xmm9,8
+ pslldq xmm10,8
+ pxor xmm7,xmm9
+ pxor xmm6,xmm10
+ lea r8,QWORD PTR[32+r8]
+ sub r9,020h
+ ja $L$mod_loop
+DB 102,65,15,58,68,192,0
+DB 102,65,15,58,68,200,17
+DB 102,15,58,68,220,0
+ pxor xmm3,xmm0
+ pxor xmm3,xmm1
+ movdqa xmm4,xmm3
+ psrldq xmm3,8
+ pslldq xmm4,8
+ pxor xmm1,xmm3
+ pxor xmm0,xmm4
+ pxor xmm0,xmm6
+ pxor xmm1,xmm7
+ movdqa xmm3,xmm0
+ psllq xmm0,1
+ pxor xmm0,xmm3
+ psllq xmm0,5
+ pxor xmm0,xmm3
+ psllq xmm0,57
+ movdqa xmm4,xmm0
+ pslldq xmm0,8
+ psrldq xmm4,8
+ pxor xmm0,xmm3
+ pxor xmm1,xmm4
+ movdqa xmm4,xmm0
+ psrlq xmm0,5
+ pxor xmm0,xmm4
+ psrlq xmm0,1
+ pxor xmm0,xmm4
+ pxor xmm4,xmm1
+ psrlq xmm0,1
+ pxor xmm0,xmm4
+ test r9,r9
+ jnz $L$done
+ movdqu xmm3,XMMWORD PTR[r8]
+DB 102,15,56,0,221
+ pxor xmm0,xmm3
+ movdqa xmm1,xmm0
+ pshufd xmm3,xmm0,78
+ pshufd xmm4,xmm2,78
+ pxor xmm3,xmm0
+ pxor xmm4,xmm2
+DB 102,15,58,68,194,0
+DB 102,15,58,68,202,17
+DB 102,15,58,68,220,0
+ pxor xmm3,xmm0
+ pxor xmm3,xmm1
+ movdqa xmm4,xmm3
+ psrldq xmm3,8
+ pslldq xmm4,8
+ pxor xmm1,xmm3
+ pxor xmm0,xmm4
+ movdqa xmm3,xmm0
+ psllq xmm0,1
+ pxor xmm0,xmm3
+ psllq xmm0,5
+ pxor xmm0,xmm3
+ psllq xmm0,57
+ movdqa xmm4,xmm0
+ pslldq xmm0,8
+ psrldq xmm4,8
+ pxor xmm0,xmm3
+ pxor xmm1,xmm4
+ movdqa xmm4,xmm0
+ psrlq xmm0,5
+ pxor xmm0,xmm4
+ psrlq xmm0,1
+ pxor xmm0,xmm4
+ pxor xmm4,xmm1
+ psrlq xmm0,1
+ pxor xmm0,xmm4
+DB 102,15,56,0,197
+ movdqu XMMWORD PTR[rcx],xmm0
+ movaps xmm6,XMMWORD PTR[rsp]
+ movaps xmm7,XMMWORD PTR[16+rsp]
+ movaps xmm8,XMMWORD PTR[32+rsp]
+ movaps xmm9,XMMWORD PTR[48+rsp]
+ movaps xmm10,XMMWORD PTR[64+rsp]
+ add rsp,058h
+ DB 0F3h,0C3h ;repret
+gcm_ghash_clmul ENDP
+DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0c2h
+ DD 0,0,0,471859200,0,943718400,0,610271232
+ DD 0,1887436800,0,1822425088,0,1220542464,0,1423966208
+ DD 0,3774873600,0,4246732800,0,3644850176,0,3311403008
+ DD 0,2441084928,0,2376073216,0,2847932416,0,3051356160
+ DW 00000h,001C2h,00384h,00246h,00708h,006CAh,0048Ch,0054Eh
+ DW 00E10h,00FD2h,00D94h,00C56h,00918h,008DAh,00A9Ch,00B5Eh
+ DW 01C20h,01DE2h,01FA4h,01E66h,01B28h,01AEAh,018ACh,0196Eh
+ DW 01230h,013F2h,011B4h,01076h,01538h,014FAh,016BCh,0177Eh
+ DW 03840h,03982h,03BC4h,03A06h,03F48h,03E8Ah,03CCCh,03D0Eh
+ DW 03650h,03792h,035D4h,03416h,03158h,0309Ah,032DCh,0331Eh
+ DW 02460h,025A2h,027E4h,02626h,02368h,022AAh,020ECh,0212Eh
+ DW 02A70h,02BB2h,029F4h,02836h,02D78h,02CBAh,02EFCh,02F3Eh
+ DW 07080h,07142h,07304h,072C6h,07788h,0764Ah,0740Ch,075CEh
+ DW 07E90h,07F52h,07D14h,07CD6h,07998h,0785Ah,07A1Ch,07BDEh
+ DW 06CA0h,06D62h,06F24h,06EE6h,06BA8h,06A6Ah,0682Ch,069EEh
+ DW 062B0h,06372h,06134h,060F6h,065B8h,0647Ah,0663Ch,067FEh
+ DW 048C0h,04902h,04B44h,04A86h,04FC8h,04E0Ah,04C4Ch,04D8Eh
+ DW 046D0h,04712h,04554h,04496h,041D8h,0401Ah,0425Ch,0439Eh
+ DW 054E0h,05522h,05764h,056A6h,053E8h,0522Ah,0506Ch,051AEh
+ DW 05AF0h,05B32h,05974h,058B6h,05DF8h,05C3Ah,05E7Ch,05FBEh
+ DW 0E100h,0E0C2h,0E284h,0E346h,0E608h,0E7CAh,0E58Ch,0E44Eh
+ DW 0EF10h,0EED2h,0EC94h,0ED56h,0E818h,0E9DAh,0EB9Ch,0EA5Eh
+ DW 0FD20h,0FCE2h,0FEA4h,0FF66h,0FA28h,0FBEAh,0F9ACh,0F86Eh
+ DW 0F330h,0F2F2h,0F0B4h,0F176h,0F438h,0F5FAh,0F7BCh,0F67Eh
+ DW 0D940h,0D882h,0DAC4h,0DB06h,0DE48h,0DF8Ah,0DDCCh,0DC0Eh
+ DW 0D750h,0D692h,0D4D4h,0D516h,0D058h,0D19Ah,0D3DCh,0D21Eh
+ DW 0C560h,0C4A2h,0C6E4h,0C726h,0C268h,0C3AAh,0C1ECh,0C02Eh
+ DW 0CB70h,0CAB2h,0C8F4h,0C936h,0CC78h,0CDBAh,0CFFCh,0CE3Eh
+ DW 09180h,09042h,09204h,093C6h,09688h,0974Ah,0950Ch,094CEh
+ DW 09F90h,09E52h,09C14h,09DD6h,09898h,0995Ah,09B1Ch,09ADEh
+ DW 08DA0h,08C62h,08E24h,08FE6h,08AA8h,08B6Ah,0892Ch,088EEh
+ DW 083B0h,08272h,08034h,081F6h,084B8h,0857Ah,0873Ch,086FEh
+ DW 0A9C0h,0A802h,0AA44h,0AB86h,0AEC8h,0AF0Ah,0AD4Ch,0AC8Eh
+ DW 0A7D0h,0A612h,0A454h,0A596h,0A0D8h,0A11Ah,0A35Ch,0A29Eh
+ DW 0B5E0h,0B422h,0B664h,0B7A6h,0B2E8h,0B32Ah,0B16Ch,0B0AEh
+ DW 0BBF0h,0BA32h,0B874h,0B9B6h,0BCF8h,0BD3Ah,0BF7Ch,0BEBEh
+DB 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52
+DB 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
+DB 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
+DB 114,103,62,0
+EXTERN __imp_RtlVirtualUnwind:NEAR
+se_handler PROC PRIVATE
+ push rsi
+ push rdi
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ pushfq
+ sub rsp,64
+ mov rax,QWORD PTR[120+r8]
+ mov rbx,QWORD PTR[248+r8]
+ mov rsi,QWORD PTR[8+r9]
+ mov r11,QWORD PTR[56+r9]
+ mov r10d,DWORD PTR[r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jb $L$in_prologue
+ mov rax,QWORD PTR[152+r8]
+ mov r10d,DWORD PTR[4+r11]
+ lea r10,QWORD PTR[r10*1+rsi]
+ cmp rbx,r10
+ jae $L$in_prologue
+ lea rax,QWORD PTR[24+rax]
+ mov rbx,QWORD PTR[((-8))+rax]
+ mov rbp,QWORD PTR[((-16))+rax]
+ mov r12,QWORD PTR[((-24))+rax]
+ mov QWORD PTR[144+r8],rbx
+ mov QWORD PTR[160+r8],rbp
+ mov QWORD PTR[216+r8],r12
+ mov rdi,QWORD PTR[8+rax]
+ mov rsi,QWORD PTR[16+rax]
+ mov QWORD PTR[152+r8],rax
+ mov QWORD PTR[168+r8],rsi
+ mov QWORD PTR[176+r8],rdi
+ mov rdi,QWORD PTR[40+r9]
+ mov rsi,r8
+ mov ecx,154
+ DD 0a548f3fch
+ mov rsi,r9
+ xor rcx,rcx
+ mov rdx,QWORD PTR[8+rsi]
+ mov r8,QWORD PTR[rsi]
+ mov r9,QWORD PTR[16+rsi]
+ mov r10,QWORD PTR[40+rsi]
+ lea r11,QWORD PTR[56+rsi]
+ lea r12,QWORD PTR[24+rsi]
+ mov QWORD PTR[32+rsp],r10
+ mov QWORD PTR[40+rsp],r11
+ mov QWORD PTR[48+rsp],r12
+ mov QWORD PTR[56+rsp],rcx
+ call QWORD PTR[__imp_RtlVirtualUnwind]
+ mov eax,1
+ add rsp,64
+ popfq
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ DB 0F3h,0C3h ;repret
+se_handler ENDP
+.text$ ENDS
+ DD imagerel $L$SEH_begin_gcm_gmult_4bit
+ DD imagerel $L$SEH_end_gcm_gmult_4bit
+ DD imagerel $L$SEH_info_gcm_gmult_4bit
+ DD imagerel $L$SEH_begin_gcm_ghash_4bit
+ DD imagerel $L$SEH_end_gcm_ghash_4bit
+ DD imagerel $L$SEH_info_gcm_ghash_4bit
+ DD imagerel $L$SEH_begin_gcm_ghash_clmul
+ DD imagerel $L$SEH_end_gcm_ghash_clmul
+ DD imagerel $L$SEH_info_gcm_ghash_clmul
+.pdata ENDS
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$gmult_prologue,imagerel $L$gmult_epilogue
+DB 9,0,0,0
+ DD imagerel se_handler
+ DD imagerel $L$ghash_prologue,imagerel $L$ghash_epilogue
+DB 001h,01fh,00bh,000h
+DB 01fh,0a8h,004h,000h
+DB 019h,098h,003h,000h
+DB 013h,088h,002h,000h
+DB 00dh,078h,001h,000h
+DB 008h,068h,000h,000h
+DB 004h,0a2h,000h,000h
+.xdata ENDS
diff --git a/crypto/libressl/crypto/modes/ghash-mingw64-x86_64.S b/crypto/libressl/crypto/modes/ghash-mingw64-x86_64.S
new file mode 100644
index 0000000..cd0823b
--- /dev/null
+++ b/crypto/libressl/crypto/modes/ghash-mingw64-x86_64.S
@@ -0,0 +1,1175 @@
+#include "x86_arch.h"
+.globl gcm_gmult_4bit
+.def gcm_gmult_4bit; .scl 2; .type 32; .endef
+.p2align 4
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ movzbq 15(%rdi),%r8
+ leaq .Lrem_4bit(%rip),%r11
+ xorq %rax,%rax
+ xorq %rbx,%rbx
+ movb %r8b,%al
+ movb %r8b,%bl
+ shlb $4,%al
+ movq $14,%rcx
+ movq 8(%rsi,%rax,1),%r8
+ movq (%rsi,%rax,1),%r9
+ andb $240,%bl
+ movq %r8,%rdx
+ jmp .Loop1
+.p2align 4
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ movb (%rdi,%rcx,1),%al
+ shrq $4,%r9
+ xorq 8(%rsi,%rbx,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rbx,1),%r9
+ movb %al,%bl
+ xorq (%r11,%rdx,8),%r9
+ movq %r8,%rdx
+ shlb $4,%al
+ xorq %r10,%r8
+ decq %rcx
+ js .Lbreak1
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ shrq $4,%r9
+ xorq 8(%rsi,%rax,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rax,1),%r9
+ andb $240,%bl
+ xorq (%r11,%rdx,8),%r9
+ movq %r8,%rdx
+ xorq %r10,%r8
+ jmp .Loop1
+.p2align 4
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ shrq $4,%r9
+ xorq 8(%rsi,%rax,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rax,1),%r9
+ andb $240,%bl
+ xorq (%r11,%rdx,8),%r9
+ movq %r8,%rdx
+ xorq %r10,%r8
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ shrq $4,%r9
+ xorq 8(%rsi,%rbx,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rbx,1),%r9
+ xorq %r10,%r8
+ xorq (%r11,%rdx,8),%r9
+ bswapq %r8
+ bswapq %r9
+ movq %r8,8(%rdi)
+ movq %r9,(%rdi)
+ movq 16(%rsp),%rbx
+ leaq 24(%rsp),%rsp
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.globl gcm_ghash_4bit
+.def gcm_ghash_4bit; .scl 2; .type 32; .endef
+.p2align 4
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+ movq %r9,%rcx
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $280,%rsp
+ movq %rdx,%r14
+ movq %rcx,%r15
+ subq $-128,%rsi
+ leaq 16+128(%rsp),%rbp
+ xorl %edx,%edx
+ movq 0+0-128(%rsi),%r8
+ movq 0+8-128(%rsi),%rax
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq 16+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq 16+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,0(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,0(%rbp)
+ movq 32+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,0-128(%rbp)
+ movq 32+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,1(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,8(%rbp)
+ movq 48+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,8-128(%rbp)
+ movq 48+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,2(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,16(%rbp)
+ movq 64+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,16-128(%rbp)
+ movq 64+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,3(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,24(%rbp)
+ movq 80+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,24-128(%rbp)
+ movq 80+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,4(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,32(%rbp)
+ movq 96+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,32-128(%rbp)
+ movq 96+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,5(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,40(%rbp)
+ movq 112+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,40-128(%rbp)
+ movq 112+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,6(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,48(%rbp)
+ movq 128+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,48-128(%rbp)
+ movq 128+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,7(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,56(%rbp)
+ movq 144+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,56-128(%rbp)
+ movq 144+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,8(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,64(%rbp)
+ movq 160+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,64-128(%rbp)
+ movq 160+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,9(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,72(%rbp)
+ movq 176+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,72-128(%rbp)
+ movq 176+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,10(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,80(%rbp)
+ movq 192+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,80-128(%rbp)
+ movq 192+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,11(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,88(%rbp)
+ movq 208+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,88-128(%rbp)
+ movq 208+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,12(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,96(%rbp)
+ movq 224+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,96-128(%rbp)
+ movq 224+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,13(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,104(%rbp)
+ movq 240+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,104-128(%rbp)
+ movq 240+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,14(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,112(%rbp)
+ shlb $4,%dl
+ movq %rax,112-128(%rbp)
+ shlq $60,%r10
+ movb %dl,15(%rsp)
+ orq %r10,%rbx
+ movq %r9,120(%rbp)
+ movq %rbx,120-128(%rbp)
+ addq $-128,%rsi
+ movq 8(%rdi),%r8
+ movq 0(%rdi),%r9
+ addq %r14,%r15
+ leaq .Lrem_8bit(%rip),%r11
+ jmp .Louter_loop
+.p2align 4
+ xorq (%r14),%r9
+ movq 8(%r14),%rdx
+ leaq 16(%r14),%r14
+ xorq %r8,%rdx
+ movq %r9,(%rdi)
+ movq %rdx,8(%rdi)
+ shrq $32,%rdx
+ xorq %rax,%rax
+ roll $8,%edx
+ movb %dl,%al
+ movzbl %dl,%ebx
+ shlb $4,%al
+ shrl $4,%ebx
+ roll $8,%edx
+ movq 8(%rsi,%rax,1),%r8
+ movq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ xorq %r8,%r12
+ movq %r9,%r10
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl 8(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl 4(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl 0(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ andl $240,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl -4(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ movzwq (%r11,%r12,2),%r12
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ shlq $48,%r12
+ xorq %r10,%r8
+ xorq %r12,%r9
+ movzbq %r8b,%r13
+ shrq $4,%r8
+ movq %r9,%r10
+ shlb $4,%r13b
+ shrq $4,%r9
+ xorq 8(%rsi,%rcx,1),%r8
+ movzwq (%r11,%r13,2),%r13
+ shlq $60,%r10
+ xorq (%rsi,%rcx,1),%r9
+ xorq %r10,%r8
+ shlq $48,%r13
+ bswapq %r8
+ xorq %r13,%r9
+ bswapq %r9
+ cmpq %r15,%r14
+ jb .Louter_loop
+ movq %r8,8(%rdi)
+ movq %r9,(%rdi)
+ leaq 280(%rsp),%rsi
+ movq 0(%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ retq
+.globl gcm_init_clmul
+.def gcm_init_clmul; .scl 2; .type 32; .endef
+.p2align 4
+ movdqu (%rdx),%xmm2
+ pshufd $78,%xmm2,%xmm2
+ pshufd $255,%xmm2,%xmm4
+ movdqa %xmm2,%xmm3
+ psllq $1,%xmm2
+ pxor %xmm5,%xmm5
+ psrlq $63,%xmm3
+ pcmpgtd %xmm4,%xmm5
+ pslldq $8,%xmm3
+ por %xmm3,%xmm2
+ pand .L0x1c2_polynomial(%rip),%xmm5
+ pxor %xmm5,%xmm2
+ movdqa %xmm2,%xmm0
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ movdqu %xmm2,(%rcx)
+ movdqu %xmm0,16(%rcx)
+ retq
+.globl gcm_gmult_clmul
+.def gcm_gmult_clmul; .scl 2; .type 32; .endef
+.p2align 4
+ movdqu (%rcx),%xmm0
+ movdqa .Lbswap_mask(%rip),%xmm5
+ movdqu (%rdx),%xmm2
+.byte 102,15,56,0,197
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,197
+ movdqu %xmm0,(%rcx)
+ retq
+.globl gcm_ghash_clmul
+.def gcm_ghash_clmul; .scl 2; .type 32; .endef
+.p2align 4
+.byte 0x48,0x83,0xec,0x58
+.byte 0x0f,0x29,0x34,0x24
+.byte 0x0f,0x29,0x7c,0x24,0x10
+.byte 0x44,0x0f,0x29,0x44,0x24,0x20
+.byte 0x44,0x0f,0x29,0x4c,0x24,0x30
+.byte 0x44,0x0f,0x29,0x54,0x24,0x40
+ movdqa .Lbswap_mask(%rip),%xmm5
+ movdqu (%rcx),%xmm0
+ movdqu (%rdx),%xmm2
+.byte 102,15,56,0,197
+ subq $16,%r9
+ jz .Lodd_tail
+ movdqu 16(%rdx),%xmm8
+ movdqu (%r8),%xmm3
+ movdqu 16(%r8),%xmm6
+.byte 102,15,56,0,221
+.byte 102,15,56,0,245
+ pxor %xmm3,%xmm0
+ movdqa %xmm6,%xmm7
+ pshufd $78,%xmm6,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm6,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,242,0
+.byte 102,15,58,68,250,17
+.byte 102,15,58,68,220,0
+ pxor %xmm6,%xmm3
+ pxor %xmm7,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm7
+ pxor %xmm4,%xmm6
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm8,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm8,%xmm4
+ leaq 32(%r8),%r8
+ subq $32,%r9
+ jbe .Leven_tail
+.byte 102,65,15,58,68,192,0
+.byte 102,65,15,58,68,200,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ movdqu (%r8),%xmm3
+ pxor %xmm6,%xmm0
+ pxor %xmm7,%xmm1
+ movdqu 16(%r8),%xmm6
+.byte 102,15,56,0,221
+.byte 102,15,56,0,245
+ movdqa %xmm6,%xmm7
+ pshufd $78,%xmm6,%xmm9
+ pshufd $78,%xmm2,%xmm10
+ pxor %xmm6,%xmm9
+ pxor %xmm2,%xmm10
+ pxor %xmm3,%xmm1
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+.byte 102,15,58,68,242,0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+.byte 102,15,58,68,250,17
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+.byte 102,69,15,58,68,202,0
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm8,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm8,%xmm4
+ pxor %xmm6,%xmm9
+ pxor %xmm7,%xmm9
+ movdqa %xmm9,%xmm10
+ psrldq $8,%xmm9
+ pslldq $8,%xmm10
+ pxor %xmm9,%xmm7
+ pxor %xmm10,%xmm6
+ leaq 32(%r8),%r8
+ subq $32,%r9
+ ja .Lmod_loop
+.byte 102,65,15,58,68,192,0
+.byte 102,65,15,58,68,200,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm6,%xmm0
+ pxor %xmm7,%xmm1
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ testq %r9,%r9
+ jnz .Ldone
+ movdqu (%r8),%xmm3
+.byte 102,15,56,0,221
+ pxor %xmm3,%xmm0
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,197
+ movdqu %xmm0,(%rcx)
+ movaps (%rsp),%xmm6
+ movaps 16(%rsp),%xmm7
+ movaps 32(%rsp),%xmm8
+ movaps 48(%rsp),%xmm9
+ movaps 64(%rsp),%xmm10
+ addq $88,%rsp
+ retq
+.p2align 6
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
+.p2align 6
+.long 0,0,0,471859200,0,943718400,0,610271232
+.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
+.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
+.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
+.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
+.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
+.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
+.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
+.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
+.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
+.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
+.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
+.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
+.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
+.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
+.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
+.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
+.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
+.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
+.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
+.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
+.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
+.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
+.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
+.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
+.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
+.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
+.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
+.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
+.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
+.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
+.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
+.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
+.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
+.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
+.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
+.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align 6
+.def se_handler; .scl 3; .type 32; .endef
+.p2align 4
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+ movq 8(%r9),%rsi
+ movq 56(%r9),%r11
+ movl 0(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jb .Lin_prologue
+ movq 152(%r8),%rax
+ movl 4(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jae .Lin_prologue
+ leaq 24(%rax),%rax
+ movq -8(%rax),%rbx
+ movq -16(%rax),%rbp
+ movq -24(%rax),%r12
+ movq %rbx,144(%r8)
+ movq %rbp,160(%r8)
+ movq %r12,216(%r8)
+ movq 8(%rax),%rdi
+ movq 16(%rax),%rsi
+ movq %rax,152(%r8)
+ movq %rsi,168(%r8)
+ movq %rdi,176(%r8)
+ movq 40(%r9),%rdi
+ movq %r8,%rsi
+ movl $154,%ecx
+.long 0xa548f3fc
+ movq %r9,%rsi
+ xorq %rcx,%rcx
+ movq 8(%rsi),%rdx
+ movq 0(%rsi),%r8
+ movq 16(%rsi),%r9
+ movq 40(%rsi),%r10
+ leaq 56(%rsi),%r11
+ leaq 24(%rsi),%r12
+ movq %r10,32(%rsp)
+ movq %r11,40(%rsp)
+ movq %r12,48(%rsp)
+ movq %rcx,56(%rsp)
+ call *__imp_RtlVirtualUnwind(%rip)
+ movl $1,%eax
+ addq $64,%rsp
+ popfq
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ popq %rdi
+ popq %rsi
+ retq
+.section .pdata
+.p2align 2
+.rva .LSEH_begin_gcm_gmult_4bit
+.rva .LSEH_end_gcm_gmult_4bit
+.rva .LSEH_info_gcm_gmult_4bit
+.rva .LSEH_begin_gcm_ghash_4bit
+.rva .LSEH_end_gcm_ghash_4bit
+.rva .LSEH_info_gcm_ghash_4bit
+.rva .LSEH_begin_gcm_ghash_clmul
+.rva .LSEH_end_gcm_ghash_clmul
+.rva .LSEH_info_gcm_ghash_clmul
+.section .xdata
+.p2align 3
+.byte 9,0,0,0
+.rva se_handler
+.rva .Lgmult_prologue,.Lgmult_epilogue
+.byte 9,0,0,0
+.rva se_handler
+.rva .Lghash_prologue,.Lghash_epilogue
+.byte 0x01,0x1f,0x0b,0x00
+.byte 0x1f,0xa8,0x04,0x00
+.byte 0x19,0x98,0x03,0x00
+.byte 0x13,0x88,0x02,0x00
+.byte 0x0d,0x78,0x01,0x00
+.byte 0x08,0x68,0x00,0x00
+.byte 0x04,0xa2,0x00,0x00
diff --git a/crypto/libressl/crypto/modes/modes_lcl.h b/crypto/libressl/crypto/modes/modes_lcl.h
new file mode 100644
index 0000000..bfea189
--- /dev/null
+++ b/crypto/libressl/crypto/modes/modes_lcl.h
@@ -0,0 +1,113 @@
+/* $OpenBSD: modes_lcl.h,v 1.10 2016/12/21 15:49:29 jsing Exp $ */
+/* ====================================================================
+ * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use is governed by OpenSSL license.
+ * ====================================================================
+ */
+#include <machine/endian.h>
+#include <openssl/opensslconf.h>
+#include <openssl/modes.h>
+#if defined(_LP64)
+typedef long i64;
+typedef unsigned long u64;
+#define U64(C) C##UL
+typedef long long i64;
+typedef unsigned long long u64;
+#define U64(C) C##ULL
+typedef unsigned int u32;
+typedef unsigned char u8;
+#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
+#if defined(__GNUC__) && __GNUC__>=2
+# if defined(__x86_64) || defined(__x86_64__)
+# define BSWAP8(x) ({ u64 ret=(x); \
+ asm ("bswapq %0" \
+ : "+r"(ret)); ret; })
+# define BSWAP4(x) ({ u32 ret=(x); \
+ asm ("bswapl %0" \
+ : "+r"(ret)); ret; })
+# elif (defined(__i386) || defined(__i386__))
+# define BSWAP8(x) ({ u32 lo=(u64)(x)>>32,hi=(x); \
+ asm ("bswapl %0; bswapl %1" \
+ : "+r"(hi),"+r"(lo)); \
+ (u64)hi<<32|lo; })
+# define BSWAP4(x) ({ u32 ret=(x); \
+ asm ("bswapl %0" \
+ : "+r"(ret)); ret; })
+# elif (defined(__arm__) || defined(__arm)) && !defined(__STRICT_ALIGNMENT)
+# if (__ARM_ARCH >= 6)
+# define BSWAP8(x) ({ u32 lo=(u64)(x)>>32,hi=(x); \
+ asm ("rev %0,%0; rev %1,%1" \
+ : "+r"(hi),"+r"(lo)); \
+ (u64)hi<<32|lo; })
+# define BSWAP4(x) ({ u32 ret; \
+ asm ("rev %0,%1" \
+ : "=r"(ret) : "r"((u32)(x))); \
+ ret; })
+# endif
+# endif
+#if defined(BSWAP4) && !defined(__STRICT_ALIGNMENT)
+#define GETU32(p) BSWAP4(*(const u32 *)(p))
+#define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
+#define GETU32(p) ((u32)(p)[0]<<24|(u32)(p)[1]<<16|(u32)(p)[2]<<8|(u32)(p)[3])
+#define PUTU32(p,v) ((p)[0]=(u8)((v)>>24),(p)[1]=(u8)((v)>>16),(p)[2]=(u8)((v)>>8),(p)[3]=(u8)(v))
+/* GCM definitions */
+typedef struct { u64 hi,lo; } u128;
+#ifdef TABLE_BITS
+#undef TABLE_BITS
+ * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
+ * never be set to 8 [or 1]. For further information see gcm128.c.
+ */
+#define TABLE_BITS 4
+struct gcm128_context {
+ /* Following 6 names follow names in GCM specification */
+ union { u64 u[2]; u32 d[4]; u8 c[16]; size_t t[16/sizeof(size_t)]; }
+ Yi,EKi,EK0,len,Xi,H;
+ /* Relative position of Xi, H and pre-computed Htable is used
+ * in some assembler modules, i.e. don't change the order! */
+#if TABLE_BITS==8
+ u128 Htable[256];
+ u128 Htable[16];
+ void (*gmult)(u64 Xi[2],const u128 Htable[16]);
+ void (*ghash)(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
+ unsigned int mres, ares;
+ block128_f block;
+ void *key;
+struct xts128_context {
+ void *key1, *key2;
+ block128_f block1,block2;
+struct ccm128_context {
+ union { u64 u[2]; u8 c[16]; } nonce, cmac;
+ u64 blocks;
+ block128_f block;
+ void *key;
diff --git a/crypto/libressl/crypto/modes/ofb128.c b/crypto/libressl/crypto/modes/ofb128.c
new file mode 100644
index 0000000..c6ca67a
--- /dev/null
+++ b/crypto/libressl/crypto/modes/ofb128.c
@@ -0,0 +1,119 @@
+/* $OpenBSD: ofb128.c,v 1.4 2015/02/10 09:46:30 miod Exp $ */
+/* ====================================================================
+ * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. ("
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ *
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit ("
+ *
+ * ====================================================================
+ *
+ */
+#include <openssl/crypto.h>
+#include "modes_lcl.h"
+#include <string.h>
+#ifndef MODES_DEBUG
+# ifndef NDEBUG
+# define NDEBUG
+# endif
+/* The input and output encrypted as though 128bit ofb mode is being
+ * used. The extra state information to record how much of the
+ * 128bit block we have used is contained in *num;
+ */
+void CRYPTO_ofb128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], int *num,
+ block128_f block)
+ unsigned int n;
+ size_t l=0;
+ n = *num;
+ if (16%sizeof(size_t) == 0) do { /* always true actually */
+ while (n && len) {
+ *(out++) = *(in++) ^ ivec[n];
+ --len;
+ n = (n+1) % 16;
+ }
+ if (((size_t)in|(size_t)out|(size_t)ivec)%sizeof(size_t) != 0)
+ break;
+ while (len>=16) {
+ (*block)(ivec, ivec, key);
+ for (; n<16; n+=sizeof(size_t))
+ *(size_t*)(out+n) =
+ *(size_t*)(in+n) ^ *(size_t*)(ivec+n);
+ len -= 16;
+ out += 16;
+ in += 16;
+ n = 0;
+ }
+ if (len) {
+ (*block)(ivec, ivec, key);
+ while (len--) {
+ out[n] = in[n] ^ ivec[n];
+ ++n;
+ }
+ }
+ *num = n;
+ return;
+ } while(0);
+ /* the rest would be commonly eliminated by x86* compiler */
+ while (l<len) {
+ if (n==0) {
+ (*block)(ivec, ivec, key);
+ }
+ out[l] = in[l] ^ ivec[n];
+ ++l;
+ n = (n+1) % 16;
+ }
+ *num=n;
diff --git a/crypto/libressl/crypto/modes/xts128.c b/crypto/libressl/crypto/modes/xts128.c
new file mode 100644
index 0000000..e40505e
--- /dev/null
+++ b/crypto/libressl/crypto/modes/xts128.c
@@ -0,0 +1,185 @@
+/* $OpenBSD: xts128.c,v 1.7 2017/08/13 17:46:24 bcook Exp $ */
+/* ====================================================================
+ * Copyright (c) 2011 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. ("
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ *
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit ("
+ *
+ * ====================================================================
+ */
+#include <machine/endian.h>
+#include <openssl/crypto.h>
+#include "modes_lcl.h"
+#include <string.h>
+#ifndef MODES_DEBUG
+# ifndef NDEBUG
+# define NDEBUG
+# endif
+int CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
+ const unsigned char *inp, unsigned char *out,
+ size_t len, int enc)
+ union { u64 u[2]; u32 d[4]; u8 c[16]; } tweak, scratch;
+ unsigned int i;
+ if (len<16) return -1;
+ memcpy(tweak.c, iv, 16);
+ (*ctx->block2)(tweak.c,tweak.c,ctx->key2);
+ if (!enc && (len%16)) len-=16;
+ while (len>=16) {
+ memcpy(scratch.c,inp,16);
+ scratch.u[0] ^= tweak.u[0];
+ scratch.u[1] ^= tweak.u[1];
+ scratch.u[0] = ((u64*)inp)[0]^tweak.u[0];
+ scratch.u[1] = ((u64*)inp)[1]^tweak.u[1];
+ (*ctx->block1)(scratch.c,scratch.c,ctx->key1);
+ scratch.u[0] ^= tweak.u[0];
+ scratch.u[1] ^= tweak.u[1];
+ memcpy(out,scratch.c,16);
+ ((u64*)out)[0] = scratch.u[0]^=tweak.u[0];
+ ((u64*)out)[1] = scratch.u[1]^=tweak.u[1];
+ inp += 16;
+ out += 16;
+ len -= 16;
+ if (len==0) return 0;
+ unsigned int carry,res;
+ res = 0x87&(((int)tweak.d[3])>>31);
+ carry = (unsigned int)(tweak.u[0]>>63);
+ tweak.u[0] = (tweak.u[0]<<1)^res;
+ tweak.u[1] = (tweak.u[1]<<1)|carry;
+#else /* BIG_ENDIAN */
+ size_t c;
+ for (c=0,i=0;i<16;++i) {
+ /*+ substitutes for |, because c is 1 bit */
+ c += ((size_t)tweak.c[i])<<1;
+ tweak.c[i] = (u8)c;
+ c = c>>8;
+ }
+ tweak.c[0] ^= (u8)(0x87&(0-c));
+ }
+ if (enc) {
+ for (i=0;i<len;++i) {
+ u8 c = inp[i];
+ out[i] = scratch.c[i];
+ scratch.c[i] = c;
+ }
+ scratch.u[0] ^= tweak.u[0];
+ scratch.u[1] ^= tweak.u[1];
+ (*ctx->block1)(scratch.c,scratch.c,ctx->key1);
+ scratch.u[0] ^= tweak.u[0];
+ scratch.u[1] ^= tweak.u[1];
+ memcpy(out-16,scratch.c,16);
+ }
+ else {
+ union { u64 u[2]; u8 c[16]; } tweak1;
+ unsigned int carry,res;
+ res = 0x87&(((int)tweak.d[3])>>31);
+ carry = (unsigned int)(tweak.u[0]>>63);
+ tweak1.u[0] = (tweak.u[0]<<1)^res;
+ tweak1.u[1] = (tweak.u[1]<<1)|carry;
+ size_t c;
+ for (c=0,i=0;i<16;++i) {
+ /*+ substitutes for |, because c is 1 bit */
+ c += ((size_t)tweak.c[i])<<1;
+ tweak1.c[i] = (u8)c;
+ c = c>>8;
+ }
+ tweak1.c[0] ^= (u8)(0x87&(0-c));
+ memcpy(scratch.c,inp,16);
+ scratch.u[0] ^= tweak1.u[0];
+ scratch.u[1] ^= tweak1.u[1];
+ scratch.u[0] = ((u64*)inp)[0]^tweak1.u[0];
+ scratch.u[1] = ((u64*)inp)[1]^tweak1.u[1];
+ (*ctx->block1)(scratch.c,scratch.c,ctx->key1);
+ scratch.u[0] ^= tweak1.u[0];
+ scratch.u[1] ^= tweak1.u[1];
+ for (i=0;i<len;++i) {
+ u8 c = inp[16+i];
+ out[16+i] = scratch.c[i];
+ scratch.c[i] = c;
+ }
+ scratch.u[0] ^= tweak.u[0];
+ scratch.u[1] ^= tweak.u[1];
+ (*ctx->block1)(scratch.c,scratch.c,ctx->key1);
+ scratch.u[0] ^= tweak.u[0];
+ scratch.u[1] ^= tweak.u[1];
+ memcpy (out,scratch.c,16);
+ ((u64*)out)[0] = scratch.u[0]^tweak.u[0];
+ ((u64*)out)[1] = scratch.u[1]^tweak.u[1];
+ }
+ return 0;
diff --git a/crypto/libressl/crypto/poly1305/Makefile b/crypto/libressl/crypto/poly1305/Makefile
index 50eb22a..94ceaf6 100644
--- a/crypto/libressl/crypto/poly1305/Makefile
+++ b/crypto/libressl/crypto/poly1305/Makefile
@@ -1,4 +1,4 @@
+include ../../
obj = poly1305.o
diff --git a/crypto/libressl/crypto/sha/Makefile b/crypto/libressl/crypto/sha/Makefile
index 51fbfcf..6eb0c20 100644
--- a/crypto/libressl/crypto/sha/Makefile
+++ b/crypto/libressl/crypto/sha/Makefile
@@ -1,4 +1,4 @@
+include ../../
obj = sha1dgst.o sha1_one.o sha256.o sha512.o
diff --git a/crypto/libressl/crypto/sha/sha1_one.c b/crypto/libressl/crypto/sha/sha1_one.c
index 8c8a0e9..57e5220 100644
--- a/crypto/libressl/crypto/sha/sha1_one.c
+++ b/crypto/libressl/crypto/sha/sha1_one.c
@@ -61,7 +61,7 @@
#include <openssl/opensslconf.h>
-//#include <openssl/crypto.h>
+#include <openssl/crypto.h>
#include <openssl/sha.h>
diff --git a/crypto/libressl/crypto/sha/sha1dgst.c b/crypto/libressl/crypto/sha/sha1dgst.c
index b741f3b..0c3df49 100644
--- a/crypto/libressl/crypto/sha/sha1dgst.c
+++ b/crypto/libressl/crypto/sha/sha1dgst.c
@@ -58,7 +58,7 @@
#include <openssl/opensslconf.h>
-//#include <openssl/crypto.h>
+#include <openssl/crypto.h>
#if !defined(OPENSSL_NO_SHA1) && !defined(OPENSSL_NO_SHA)
diff --git a/crypto/libressl/crypto/sha/sha256.c b/crypto/libressl/crypto/sha/sha256.c
index 86e5070..9c05d3b 100644
--- a/crypto/libressl/crypto/sha/sha256.c
+++ b/crypto/libressl/crypto/sha/sha256.c
@@ -14,7 +14,7 @@
#include <stdlib.h>
#include <string.h>
-//#include <openssl/crypto.h>
+#include <openssl/crypto.h>
#include <openssl/sha.h>
#include <openssl/opensslv.h>
diff --git a/crypto/libressl/crypto/sha/sha512.c b/crypto/libressl/crypto/sha/sha512.c
index 2d23614..6b95cfa 100644
--- a/crypto/libressl/crypto/sha/sha512.c
+++ b/crypto/libressl/crypto/sha/sha512.c
@@ -49,7 +49,7 @@
* <>
-//#include <openssl/crypto.h>
+#include <openssl/crypto.h>
#include <openssl/opensslv.h>
#include <openssl/sha.h>
diff --git a/crypto/libressl/include/openssl/aes.h b/crypto/libressl/include/openssl/aes.h
new file mode 100644
index 0000000..c904485
--- /dev/null
+++ b/crypto/libressl/include/openssl/aes.h
@@ -0,0 +1,126 @@
+/* $OpenBSD: aes.h,v 1.14 2014/07/09 09:10:07 miod Exp $ */
+/* ====================================================================
+ * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. ("
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ *
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit ("
+ *
+ * ====================================================================
+ *
+ */
+#ifndef HEADER_AES_H
+#define HEADER_AES_H
+#include <openssl/opensslconf.h>
+#error AES is disabled.
+#include <stddef.h>
+#define AES_ENCRYPT 1
+#define AES_DECRYPT 0
+/* Because array size can't be a const in C, the following two are macros.
+ Both sizes are in bytes. */
+#define AES_MAXNR 14
+#define AES_BLOCK_SIZE 16
+#ifdef __cplusplus
+extern "C" {
+/* This should be a hidden type, but EVP requires that the size be known */
+struct aes_key_st {
+ unsigned int rd_key[4 *(AES_MAXNR + 1)];
+ int rounds;
+typedef struct aes_key_st AES_KEY;
+const char *AES_options(void);
+int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key);
+int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key);
+void AES_encrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key);
+void AES_decrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key);
+void AES_ecb_encrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key, const int enc);
+void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
+ size_t length, const AES_KEY *key, unsigned char *ivec, const int enc);
+void AES_cfb128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t length, const AES_KEY *key, unsigned char *ivec, int *num,
+ const int enc);
+void AES_cfb1_encrypt(const unsigned char *in, unsigned char *out,
+ size_t length, const AES_KEY *key, unsigned char *ivec, int *num,
+ const int enc);
+void AES_cfb8_encrypt(const unsigned char *in, unsigned char *out,
+ size_t length, const AES_KEY *key, unsigned char *ivec, int *num,
+ const int enc);
+void AES_ofb128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t length, const AES_KEY *key, unsigned char *ivec, int *num);
+void AES_ctr128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t length, const AES_KEY *key, unsigned char ivec[AES_BLOCK_SIZE],
+ unsigned char ecount_buf[AES_BLOCK_SIZE], unsigned int *num);
+/* NB: the IV is _two_ blocks long */
+void AES_ige_encrypt(const unsigned char *in, unsigned char *out,
+ size_t length, const AES_KEY *key, unsigned char *ivec, const int enc);
+int AES_wrap_key(AES_KEY *key, const unsigned char *iv, unsigned char *out,
+ const unsigned char *in, unsigned int inlen);
+int AES_unwrap_key(AES_KEY *key, const unsigned char *iv, unsigned char *out,
+ const unsigned char *in, unsigned int inlen);
+#ifdef __cplusplus
+#endif /* !HEADER_AES_H */
diff --git a/crypto/libressl/include/openssl/blowfish.h b/crypto/libressl/include/openssl/blowfish.h
new file mode 100644
index 0000000..4d2db80
--- /dev/null
+++ b/crypto/libressl/include/openssl/blowfish.h
@@ -0,0 +1,112 @@
+/* $OpenBSD: blowfish.h,v 1.14 2014/07/10 09:01:04 miod Exp $ */
+/* Copyright (C) 1995-1998 Eric Young (
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young ("
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson ("
+ *
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+#include <openssl/opensslconf.h>
+#ifdef __cplusplus
+extern "C" {
+#error BF is disabled.
+#define BF_ENCRYPT 1
+#define BF_DECRYPT 0
+ * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ * ! BF_LONG has to be at least 32 bits wide. If it's wider, then !
+ * ! BF_LONG_LOG2 has to be defined along. !
+ * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ */
+#define BF_LONG unsigned int
+#define BF_ROUNDS 16
+#define BF_BLOCK 8
+typedef struct bf_key_st
+ {
+ BF_LONG S[4*256];
+ } BF_KEY;
+void BF_set_key(BF_KEY *key, int len, const unsigned char *data);
+void BF_encrypt(BF_LONG *data,const BF_KEY *key);
+void BF_decrypt(BF_LONG *data,const BF_KEY *key);
+void BF_ecb_encrypt(const unsigned char *in, unsigned char *out,
+ const BF_KEY *key, int enc);
+void BF_cbc_encrypt(const unsigned char *in, unsigned char *out, long length,
+ const BF_KEY *schedule, unsigned char *ivec, int enc);
+void BF_cfb64_encrypt(const unsigned char *in, unsigned char *out, long length,
+ const BF_KEY *schedule, unsigned char *ivec, int *num, int enc);
+void BF_ofb64_encrypt(const unsigned char *in, unsigned char *out, long length,
+ const BF_KEY *schedule, unsigned char *ivec, int *num);
+const char *BF_options(void);
+#ifdef __cplusplus
diff --git a/crypto/libressl/include/openssl/crypto.h b/crypto/libressl/include/openssl/crypto.h
new file mode 100644
index 0000000..46cc836
--- /dev/null
+++ b/crypto/libressl/include/openssl/crypto.h
@@ -0,0 +1,12 @@
+#include <unistd.h>
+#include <openssl/opensslconf.h>
+static inline void
+OpenSSLDie(const char *file, int line, const char *assertion)
+ _exit(1);
+/* die if we have to */
+void OpenSSLDie(const char *file, int line, const char *assertion);
+#define OPENSSL_assert(e) (void)((e) ? 0 : (OpenSSLDie(__FILE__, __LINE__, #e),1))
diff --git a/crypto/libressl/include/openssl/modes.h b/crypto/libressl/include/openssl/modes.h
new file mode 100644
index 0000000..67ec751
--- /dev/null
+++ b/crypto/libressl/include/openssl/modes.h
@@ -0,0 +1,144 @@
+/* $OpenBSD: modes.h,v 1.3 2018/07/24 10:47:19 bcook Exp $ */
+/* ====================================================================
+ * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
+ *
+ * Rights for redistribution and usage in source and binary
+ * forms are granted according to the OpenSSL license.
+ */
+#include <stddef.h>
+#ifdef __cplusplus
+extern "C" {
+typedef void (*block128_f)(const unsigned char in[16],
+ unsigned char out[16],
+ const void *key);
+typedef void (*cbc128_f)(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], int enc);
+typedef void (*ctr128_f)(const unsigned char *in, unsigned char *out,
+ size_t blocks, const void *key,
+ const unsigned char ivec[16]);
+typedef void (*ccm128_f)(const unsigned char *in, unsigned char *out,
+ size_t blocks, const void *key,
+ const unsigned char ivec[16],unsigned char cmac[16]);
+void CRYPTO_cbc128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], block128_f block);
+void CRYPTO_cbc128_decrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], block128_f block);
+void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], unsigned char ecount_buf[16],
+ unsigned int *num, block128_f block);
+void CRYPTO_ctr128_encrypt_ctr32(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], unsigned char ecount_buf[16],
+ unsigned int *num, ctr128_f ctr);
+void CRYPTO_ofb128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], int *num,
+ block128_f block);
+void CRYPTO_cfb128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], int *num,
+ int enc, block128_f block);
+void CRYPTO_cfb128_8_encrypt(const unsigned char *in, unsigned char *out,
+ size_t length, const void *key,
+ unsigned char ivec[16], int *num,
+ int enc, block128_f block);
+void CRYPTO_cfb128_1_encrypt(const unsigned char *in, unsigned char *out,
+ size_t bits, const void *key,
+ unsigned char ivec[16], int *num,
+ int enc, block128_f block);
+size_t CRYPTO_cts128_encrypt_block(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], block128_f block);
+size_t CRYPTO_cts128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], cbc128_f cbc);
+size_t CRYPTO_cts128_decrypt_block(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], block128_f block);
+size_t CRYPTO_cts128_decrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], cbc128_f cbc);
+size_t CRYPTO_nistcts128_encrypt_block(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], block128_f block);
+size_t CRYPTO_nistcts128_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], cbc128_f cbc);
+size_t CRYPTO_nistcts128_decrypt_block(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], block128_f block);
+size_t CRYPTO_nistcts128_decrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key,
+ unsigned char ivec[16], cbc128_f cbc);
+typedef struct gcm128_context GCM128_CONTEXT;
+GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block);
+void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block);
+void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
+ size_t len);
+int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
+ size_t len);
+int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
+ const unsigned char *in, unsigned char *out,
+ size_t len);
+int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
+ const unsigned char *in, unsigned char *out,
+ size_t len);
+int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
+ const unsigned char *in, unsigned char *out,
+ size_t len, ctr128_f stream);
+int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
+ const unsigned char *in, unsigned char *out,
+ size_t len, ctr128_f stream);
+int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
+ size_t len);
+void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len);
+void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx);
+typedef struct ccm128_context CCM128_CONTEXT;
+void CRYPTO_ccm128_init(CCM128_CONTEXT *ctx,
+ unsigned int M, unsigned int L, void *key,block128_f block);
+int CRYPTO_ccm128_setiv(CCM128_CONTEXT *ctx,
+ const unsigned char *nonce, size_t nlen, size_t mlen);
+void CRYPTO_ccm128_aad(CCM128_CONTEXT *ctx,
+ const unsigned char *aad, size_t alen);
+int CRYPTO_ccm128_encrypt(CCM128_CONTEXT *ctx,
+ const unsigned char *inp, unsigned char *out, size_t len);
+int CRYPTO_ccm128_decrypt(CCM128_CONTEXT *ctx,
+ const unsigned char *inp, unsigned char *out, size_t len);
+int CRYPTO_ccm128_encrypt_ccm64(CCM128_CONTEXT *ctx,
+ const unsigned char *inp, unsigned char *out, size_t len,
+ ccm128_f stream);
+int CRYPTO_ccm128_decrypt_ccm64(CCM128_CONTEXT *ctx,
+ const unsigned char *inp, unsigned char *out, size_t len,
+ ccm128_f stream);
+size_t CRYPTO_ccm128_tag(CCM128_CONTEXT *ctx, unsigned char *tag, size_t len);
+typedef struct xts128_context XTS128_CONTEXT;
+int CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
+ const unsigned char *inp, unsigned char *out, size_t len, int enc);
+#ifdef __cplusplus
diff --git a/crypto/libressl/ b/crypto/libressl/
index e004457..fab0710 100644
--- a/crypto/libressl/
+++ b/crypto/libressl/
@@ -1,4 +1,4 @@
CFLAGS += -I../../include/compat -I../../include
# exclude from include/compat/string.h
diff --git a/crypto/libressl/ b/crypto/libressl/
index 8e99414..1eef355 100644
--- a/crypto/libressl/
+++ b/crypto/libressl/
@@ -4,5 +4,5 @@ obj_dep_ = curve25519/curve25519.o curve25519/curve25519-generic.o \
compat/arc4random.o compat/explicit_bzero.o compat/timingsafe_memcmp.o compat/timingsafe_bcmp.o
obj_dep = $(addprefix crypto/,$(obj_dep_))
-subdirs_ = curve25519 chacha poly1305 aead sha compat
+subdirs_ = curve25519 chacha poly1305 aead sha aes bf modes compat
subdirs = $(addprefix crypto/,$(subdirs_))