From 378d4ce7552df580e3ddd89c2faa9f8c5086d646 Mon Sep 17 00:00:00 2001
From: Uros Majstorovic <majstor@majstor.org>
Date: Wed, 2 Feb 2022 06:25:38 +0100
Subject: renamed crypto -> ext

---
 ext/libressl/crypto/sha/Makefile                |   14 +
 ext/libressl/crypto/sha/sha1-elf-armv4.S        |  455 ++++
 ext/libressl/crypto/sha/sha1-elf-x86_64.S       | 2491 ++++++++++++++++++++
 ext/libressl/crypto/sha/sha1-macosx-x86_64.S    | 2488 ++++++++++++++++++++
 ext/libressl/crypto/sha/sha1-masm-x86_64.S      | 2746 +++++++++++++++++++++++
 ext/libressl/crypto/sha/sha1-mingw64-x86_64.S   | 2664 ++++++++++++++++++++++
 ext/libressl/crypto/sha/sha1_one.c              |   81 +
 ext/libressl/crypto/sha/sha1dgst.c              |   72 +
 ext/libressl/crypto/sha/sha256-elf-armv4.S      | 1520 +++++++++++++
 ext/libressl/crypto/sha/sha256-elf-x86_64.S     | 1782 +++++++++++++++
 ext/libressl/crypto/sha/sha256-macosx-x86_64.S  | 1779 +++++++++++++++
 ext/libressl/crypto/sha/sha256-masm-x86_64.S    | 1864 +++++++++++++++
 ext/libressl/crypto/sha/sha256-mingw64-x86_64.S | 1790 +++++++++++++++
 ext/libressl/crypto/sha/sha256.c                |  284 +++
 ext/libressl/crypto/sha/sha512-elf-armv4.S      | 1786 +++++++++++++++
 ext/libressl/crypto/sha/sha512-elf-x86_64.S     | 1806 +++++++++++++++
 ext/libressl/crypto/sha/sha512-macosx-x86_64.S  | 1803 +++++++++++++++
 ext/libressl/crypto/sha/sha512-masm-x86_64.S    | 1888 ++++++++++++++++
 ext/libressl/crypto/sha/sha512-mingw64-x86_64.S | 1814 +++++++++++++++
 ext/libressl/crypto/sha/sha512.c                |  547 +++++
 ext/libressl/crypto/sha/sha_locl.h              |  419 ++++
 21 files changed, 30093 insertions(+)
 create mode 100644 ext/libressl/crypto/sha/Makefile
 create mode 100644 ext/libressl/crypto/sha/sha1-elf-armv4.S
 create mode 100644 ext/libressl/crypto/sha/sha1-elf-x86_64.S
 create mode 100644 ext/libressl/crypto/sha/sha1-macosx-x86_64.S
 create mode 100644 ext/libressl/crypto/sha/sha1-masm-x86_64.S
 create mode 100644 ext/libressl/crypto/sha/sha1-mingw64-x86_64.S
 create mode 100644 ext/libressl/crypto/sha/sha1_one.c
 create mode 100644 ext/libressl/crypto/sha/sha1dgst.c
 create mode 100644 ext/libressl/crypto/sha/sha256-elf-armv4.S
 create mode 100644 ext/libressl/crypto/sha/sha256-elf-x86_64.S
 create mode 100644 ext/libressl/crypto/sha/sha256-macosx-x86_64.S
 create mode 100644 ext/libressl/crypto/sha/sha256-masm-x86_64.S
 create mode 100644 ext/libressl/crypto/sha/sha256-mingw64-x86_64.S
 create mode 100644 ext/libressl/crypto/sha/sha256.c
 create mode 100644 ext/libressl/crypto/sha/sha512-elf-armv4.S
 create mode 100644 ext/libressl/crypto/sha/sha512-elf-x86_64.S
 create mode 100644 ext/libressl/crypto/sha/sha512-macosx-x86_64.S
 create mode 100644 ext/libressl/crypto/sha/sha512-masm-x86_64.S
 create mode 100644 ext/libressl/crypto/sha/sha512-mingw64-x86_64.S
 create mode 100644 ext/libressl/crypto/sha/sha512.c
 create mode 100644 ext/libressl/crypto/sha/sha_locl.h

(limited to 'ext/libressl/crypto/sha')

diff --git a/ext/libressl/crypto/sha/Makefile b/ext/libressl/crypto/sha/Makefile
new file mode 100644
index 0000000..6eb0c20
--- /dev/null
+++ b/ext/libressl/crypto/sha/Makefile
@@ -0,0 +1,14 @@
+include ../../ssl_common.mk
+CFLAGS += -D__BEGIN_HIDDEN_DECLS= -D__END_HIDDEN_DECLS=
+
+obj = sha1dgst.o sha1_one.o sha256.o sha512.o
+
+
+all: $(obj)
+dep: all
+
+%.o: %.c
+	$(CC) $(CFLAGS) -c $<
+
+clean:
+	rm -f *.o *.a
diff --git a/ext/libressl/crypto/sha/sha1-elf-armv4.S b/ext/libressl/crypto/sha/sha1-elf-armv4.S
new file mode 100644
index 0000000..5aeaf7c
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha1-elf-armv4.S
@@ -0,0 +1,455 @@
+#include "arm_arch.h"
+
+.text
+
+.global	sha1_block_data_order
+.type	sha1_block_data_order,%function
+
+.align	2
+sha1_block_data_order:
+	stmdb	sp!,{r4-r12,lr}
+	add	r2,r1,r2,lsl#6	@ r2 to point at the end of r1
+	ldmia	r0,{r3,r4,r5,r6,r7}
+.Lloop:
+	ldr	r8,.LK_00_19
+	mov	r14,sp
+	sub	sp,sp,#15*4
+	mov	r5,r5,ror#30
+	mov	r6,r6,ror#30
+	mov	r7,r7,ror#30		@ [6]
+.L_00_15:
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+	ldrb	r10,[r1,#2]
+	ldrb	r9,[r1,#3]
+	ldrb	r11,[r1,#1]
+	add	r7,r8,r7,ror#2			@ E+=K_00_19
+	ldrb	r12,[r1],#4
+	orr	r9,r9,r10,lsl#8
+	eor	r10,r5,r6			@ F_xx_xx
+	orr	r9,r9,r11,lsl#16
+	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
+	orr	r9,r9,r12,lsl#24
+#else
+	ldr	r9,[r1],#4			@ handles unaligned
+	add	r7,r8,r7,ror#2			@ E+=K_00_19
+	eor	r10,r5,r6			@ F_xx_xx
+	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
+#ifdef __ARMEL__
+	rev	r9,r9				@ byte swap
+#endif
+#endif
+	and	r10,r4,r10,ror#2
+	add	r7,r7,r9			@ E+=X[i]
+	eor	r10,r10,r6,ror#2		@ F_00_19(B,C,D)
+	str	r9,[r14,#-4]!
+	add	r7,r7,r10			@ E+=F_00_19(B,C,D)
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+	ldrb	r10,[r1,#2]
+	ldrb	r9,[r1,#3]
+	ldrb	r11,[r1,#1]
+	add	r6,r8,r6,ror#2			@ E+=K_00_19
+	ldrb	r12,[r1],#4
+	orr	r9,r9,r10,lsl#8
+	eor	r10,r4,r5			@ F_xx_xx
+	orr	r9,r9,r11,lsl#16
+	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
+	orr	r9,r9,r12,lsl#24
+#else
+	ldr	r9,[r1],#4			@ handles unaligned
+	add	r6,r8,r6,ror#2			@ E+=K_00_19
+	eor	r10,r4,r5			@ F_xx_xx
+	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
+#ifdef __ARMEL__
+	rev	r9,r9				@ byte swap
+#endif
+#endif
+	and	r10,r3,r10,ror#2
+	add	r6,r6,r9			@ E+=X[i]
+	eor	r10,r10,r5,ror#2		@ F_00_19(B,C,D)
+	str	r9,[r14,#-4]!
+	add	r6,r6,r10			@ E+=F_00_19(B,C,D)
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+	ldrb	r10,[r1,#2]
+	ldrb	r9,[r1,#3]
+	ldrb	r11,[r1,#1]
+	add	r5,r8,r5,ror#2			@ E+=K_00_19
+	ldrb	r12,[r1],#4
+	orr	r9,r9,r10,lsl#8
+	eor	r10,r3,r4			@ F_xx_xx
+	orr	r9,r9,r11,lsl#16
+	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
+	orr	r9,r9,r12,lsl#24
+#else
+	ldr	r9,[r1],#4			@ handles unaligned
+	add	r5,r8,r5,ror#2			@ E+=K_00_19
+	eor	r10,r3,r4			@ F_xx_xx
+	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
+#ifdef __ARMEL__
+	rev	r9,r9				@ byte swap
+#endif
+#endif
+	and	r10,r7,r10,ror#2
+	add	r5,r5,r9			@ E+=X[i]
+	eor	r10,r10,r4,ror#2		@ F_00_19(B,C,D)
+	str	r9,[r14,#-4]!
+	add	r5,r5,r10			@ E+=F_00_19(B,C,D)
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+	ldrb	r10,[r1,#2]
+	ldrb	r9,[r1,#3]
+	ldrb	r11,[r1,#1]
+	add	r4,r8,r4,ror#2			@ E+=K_00_19
+	ldrb	r12,[r1],#4
+	orr	r9,r9,r10,lsl#8
+	eor	r10,r7,r3			@ F_xx_xx
+	orr	r9,r9,r11,lsl#16
+	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
+	orr	r9,r9,r12,lsl#24
+#else
+	ldr	r9,[r1],#4			@ handles unaligned
+	add	r4,r8,r4,ror#2			@ E+=K_00_19
+	eor	r10,r7,r3			@ F_xx_xx
+	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
+#ifdef __ARMEL__
+	rev	r9,r9				@ byte swap
+#endif
+#endif
+	and	r10,r6,r10,ror#2
+	add	r4,r4,r9			@ E+=X[i]
+	eor	r10,r10,r3,ror#2		@ F_00_19(B,C,D)
+	str	r9,[r14,#-4]!
+	add	r4,r4,r10			@ E+=F_00_19(B,C,D)
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+	ldrb	r10,[r1,#2]
+	ldrb	r9,[r1,#3]
+	ldrb	r11,[r1,#1]
+	add	r3,r8,r3,ror#2			@ E+=K_00_19
+	ldrb	r12,[r1],#4
+	orr	r9,r9,r10,lsl#8
+	eor	r10,r6,r7			@ F_xx_xx
+	orr	r9,r9,r11,lsl#16
+	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
+	orr	r9,r9,r12,lsl#24
+#else
+	ldr	r9,[r1],#4			@ handles unaligned
+	add	r3,r8,r3,ror#2			@ E+=K_00_19
+	eor	r10,r6,r7			@ F_xx_xx
+	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
+#ifdef __ARMEL__
+	rev	r9,r9				@ byte swap
+#endif
+#endif
+	and	r10,r5,r10,ror#2
+	add	r3,r3,r9			@ E+=X[i]
+	eor	r10,r10,r7,ror#2		@ F_00_19(B,C,D)
+	str	r9,[r14,#-4]!
+	add	r3,r3,r10			@ E+=F_00_19(B,C,D)
+	teq	r14,sp
+	bne	.L_00_15		@ [((11+4)*5+2)*3]
+	sub	sp,sp,#25*4
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+	ldrb	r10,[r1,#2]
+	ldrb	r9,[r1,#3]
+	ldrb	r11,[r1,#1]
+	add	r7,r8,r7,ror#2			@ E+=K_00_19
+	ldrb	r12,[r1],#4
+	orr	r9,r9,r10,lsl#8
+	eor	r10,r5,r6			@ F_xx_xx
+	orr	r9,r9,r11,lsl#16
+	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
+	orr	r9,r9,r12,lsl#24
+#else
+	ldr	r9,[r1],#4			@ handles unaligned
+	add	r7,r8,r7,ror#2			@ E+=K_00_19
+	eor	r10,r5,r6			@ F_xx_xx
+	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
+#ifdef __ARMEL__
+	rev	r9,r9				@ byte swap
+#endif
+#endif
+	and	r10,r4,r10,ror#2
+	add	r7,r7,r9			@ E+=X[i]
+	eor	r10,r10,r6,ror#2		@ F_00_19(B,C,D)
+	str	r9,[r14,#-4]!
+	add	r7,r7,r10			@ E+=F_00_19(B,C,D)
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r6,r8,r6,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r4,r5			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	and r10,r3,r10,ror#2					@ F_xx_xx
+						@ F_xx_xx
+	add	r6,r6,r9			@ E+=X[i]
+	eor	r10,r10,r5,ror#2		@ F_00_19(B,C,D)
+	add	r6,r6,r10			@ E+=F_00_19(B,C,D)
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r5,r8,r5,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r3,r4			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	and r10,r7,r10,ror#2					@ F_xx_xx
+						@ F_xx_xx
+	add	r5,r5,r9			@ E+=X[i]
+	eor	r10,r10,r4,ror#2		@ F_00_19(B,C,D)
+	add	r5,r5,r10			@ E+=F_00_19(B,C,D)
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r4,r8,r4,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r7,r3			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	and r10,r6,r10,ror#2					@ F_xx_xx
+						@ F_xx_xx
+	add	r4,r4,r9			@ E+=X[i]
+	eor	r10,r10,r3,ror#2		@ F_00_19(B,C,D)
+	add	r4,r4,r10			@ E+=F_00_19(B,C,D)
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r3,r8,r3,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r6,r7			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	and r10,r5,r10,ror#2					@ F_xx_xx
+						@ F_xx_xx
+	add	r3,r3,r9			@ E+=X[i]
+	eor	r10,r10,r7,ror#2		@ F_00_19(B,C,D)
+	add	r3,r3,r10			@ E+=F_00_19(B,C,D)
+
+	ldr	r8,.LK_20_39		@ [+15+16*4]
+	cmn	sp,#0			@ [+3], clear carry to denote 20_39
+.L_20_39_or_60_79:
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r7,r8,r7,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r5,r6			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	eor r10,r4,r10,ror#2					@ F_xx_xx
+						@ F_xx_xx
+	add	r7,r7,r9			@ E+=X[i]
+	add	r7,r7,r10			@ E+=F_20_39(B,C,D)
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r6,r8,r6,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r4,r5			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	eor r10,r3,r10,ror#2					@ F_xx_xx
+						@ F_xx_xx
+	add	r6,r6,r9			@ E+=X[i]
+	add	r6,r6,r10			@ E+=F_20_39(B,C,D)
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r5,r8,r5,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r3,r4			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	eor r10,r7,r10,ror#2					@ F_xx_xx
+						@ F_xx_xx
+	add	r5,r5,r9			@ E+=X[i]
+	add	r5,r5,r10			@ E+=F_20_39(B,C,D)
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r4,r8,r4,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r7,r3			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	eor r10,r6,r10,ror#2					@ F_xx_xx
+						@ F_xx_xx
+	add	r4,r4,r9			@ E+=X[i]
+	add	r4,r4,r10			@ E+=F_20_39(B,C,D)
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r3,r8,r3,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r6,r7			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	eor r10,r5,r10,ror#2					@ F_xx_xx
+						@ F_xx_xx
+	add	r3,r3,r9			@ E+=X[i]
+	add	r3,r3,r10			@ E+=F_20_39(B,C,D)
+	teq	r14,sp			@ preserve carry
+	bne	.L_20_39_or_60_79	@ [+((12+3)*5+2)*4]
+	bcs	.L_done			@ [+((12+3)*5+2)*4], spare 300 bytes
+
+	ldr	r8,.LK_40_59
+	sub	sp,sp,#20*4		@ [+2]
+.L_40_59:
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r7,r8,r7,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r5,r6			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	and r10,r4,r10,ror#2					@ F_xx_xx
+	and r11,r5,r6					@ F_xx_xx
+	add	r7,r7,r9			@ E+=X[i]
+	add	r7,r7,r10			@ E+=F_40_59(B,C,D)
+	add	r7,r7,r11,ror#2
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r6,r8,r6,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r4,r5			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	and r10,r3,r10,ror#2					@ F_xx_xx
+	and r11,r4,r5					@ F_xx_xx
+	add	r6,r6,r9			@ E+=X[i]
+	add	r6,r6,r10			@ E+=F_40_59(B,C,D)
+	add	r6,r6,r11,ror#2
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r5,r8,r5,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r3,r4			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	and r10,r7,r10,ror#2					@ F_xx_xx
+	and r11,r3,r4					@ F_xx_xx
+	add	r5,r5,r9			@ E+=X[i]
+	add	r5,r5,r10			@ E+=F_40_59(B,C,D)
+	add	r5,r5,r11,ror#2
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r4,r8,r4,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r7,r3			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	and r10,r6,r10,ror#2					@ F_xx_xx
+	and r11,r7,r3					@ F_xx_xx
+	add	r4,r4,r9			@ E+=X[i]
+	add	r4,r4,r10			@ E+=F_40_59(B,C,D)
+	add	r4,r4,r11,ror#2
+	ldr	r9,[r14,#15*4]
+	ldr	r10,[r14,#13*4]
+	ldr	r11,[r14,#7*4]
+	add	r3,r8,r3,ror#2			@ E+=K_xx_xx
+	ldr	r12,[r14,#2*4]
+	eor	r9,r9,r10
+	eor	r11,r11,r12			@ 1 cycle stall
+	eor	r10,r6,r7			@ F_xx_xx
+	mov	r9,r9,ror#31
+	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
+	eor	r9,r9,r11,ror#31
+	str	r9,[r14,#-4]!
+	and r10,r5,r10,ror#2					@ F_xx_xx
+	and r11,r6,r7					@ F_xx_xx
+	add	r3,r3,r9			@ E+=X[i]
+	add	r3,r3,r10			@ E+=F_40_59(B,C,D)
+	add	r3,r3,r11,ror#2
+	teq	r14,sp
+	bne	.L_40_59		@ [+((12+5)*5+2)*4]
+
+	ldr	r8,.LK_60_79
+	sub	sp,sp,#20*4
+	cmp	sp,#0			@ set carry to denote 60_79
+	b	.L_20_39_or_60_79	@ [+4], spare 300 bytes
+.L_done:
+	add	sp,sp,#80*4		@ "deallocate" stack frame
+	ldmia	r0,{r8,r9,r10,r11,r12}
+	add	r3,r8,r3
+	add	r4,r9,r4
+	add	r5,r10,r5,ror#2
+	add	r6,r11,r6,ror#2
+	add	r7,r12,r7,ror#2
+	stmia	r0,{r3,r4,r5,r6,r7}
+	teq	r1,r2
+	bne	.Lloop			@ [+18], total 1307
+
+#if __ARM_ARCH__>=5
+	ldmia	sp!,{r4-r12,pc}
+#else
+	ldmia	sp!,{r4-r12,lr}
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
+#endif
+.align	2
+.LK_00_19:	.word	0x5a827999
+.LK_20_39:	.word	0x6ed9eba1
+.LK_40_59:	.word	0x8f1bbcdc
+.LK_60_79:	.word	0xca62c1d6
+.size	sha1_block_data_order,.-sha1_block_data_order
+.asciz	"SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
+.align	2
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/ext/libressl/crypto/sha/sha1-elf-x86_64.S b/ext/libressl/crypto/sha/sha1-elf-x86_64.S
new file mode 100644
index 0000000..5a37019
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha1-elf-x86_64.S
@@ -0,0 +1,2491 @@
+#include "x86_arch.h"
+.text	
+
+.hidden	OPENSSL_ia32cap_P
+
+.globl	sha1_block_data_order
+.type	sha1_block_data_order,@function
+.align	16
+sha1_block_data_order:
+	movl	OPENSSL_ia32cap_P+0(%rip),%r9d
+	movl	OPENSSL_ia32cap_P+4(%rip),%r8d
+	testl	$IA32CAP_MASK1_SSSE3,%r8d
+	jz	.Lialu
+	jmp	_ssse3_shortcut
+
+.align	16
+.Lialu:
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	movq	%rsp,%r11
+	movq	%rdi,%r8
+	subq	$72,%rsp
+	movq	%rsi,%r9
+	andq	$-64,%rsp
+	movq	%rdx,%r10
+	movq	%r11,64(%rsp)
+.Lprologue:
+
+	movl	0(%r8),%esi
+	movl	4(%r8),%edi
+	movl	8(%r8),%r11d
+	movl	12(%r8),%r12d
+	movl	16(%r8),%r13d
+	jmp	.Lloop
+
+.align	16
+.Lloop:
+	movl	0(%r9),%edx
+	bswapl	%edx
+	movl	%edx,0(%rsp)
+	movl	%r11d,%eax
+	movl	4(%r9),%ebp
+	movl	%esi,%ecx
+	xorl	%r12d,%eax
+	bswapl	%ebp
+	roll	$5,%ecx
+	leal	1518500249(%rdx,%r13,1),%r13d
+	andl	%edi,%eax
+	movl	%ebp,4(%rsp)
+	addl	%ecx,%r13d
+	xorl	%r12d,%eax
+	roll	$30,%edi
+	addl	%eax,%r13d
+	movl	%edi,%eax
+	movl	8(%r9),%edx
+	movl	%r13d,%ecx
+	xorl	%r11d,%eax
+	bswapl	%edx
+	roll	$5,%ecx
+	leal	1518500249(%rbp,%r12,1),%r12d
+	andl	%esi,%eax
+	movl	%edx,8(%rsp)
+	addl	%ecx,%r12d
+	xorl	%r11d,%eax
+	roll	$30,%esi
+	addl	%eax,%r12d
+	movl	%esi,%eax
+	movl	12(%r9),%ebp
+	movl	%r12d,%ecx
+	xorl	%edi,%eax
+	bswapl	%ebp
+	roll	$5,%ecx
+	leal	1518500249(%rdx,%r11,1),%r11d
+	andl	%r13d,%eax
+	movl	%ebp,12(%rsp)
+	addl	%ecx,%r11d
+	xorl	%edi,%eax
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	movl	%r13d,%eax
+	movl	16(%r9),%edx
+	movl	%r11d,%ecx
+	xorl	%esi,%eax
+	bswapl	%edx
+	roll	$5,%ecx
+	leal	1518500249(%rbp,%rdi,1),%edi
+	andl	%r12d,%eax
+	movl	%edx,16(%rsp)
+	addl	%ecx,%edi
+	xorl	%esi,%eax
+	roll	$30,%r12d
+	addl	%eax,%edi
+	movl	%r12d,%eax
+	movl	20(%r9),%ebp
+	movl	%edi,%ecx
+	xorl	%r13d,%eax
+	bswapl	%ebp
+	roll	$5,%ecx
+	leal	1518500249(%rdx,%rsi,1),%esi
+	andl	%r11d,%eax
+	movl	%ebp,20(%rsp)
+	addl	%ecx,%esi
+	xorl	%r13d,%eax
+	roll	$30,%r11d
+	addl	%eax,%esi
+	movl	%r11d,%eax
+	movl	24(%r9),%edx
+	movl	%esi,%ecx
+	xorl	%r12d,%eax
+	bswapl	%edx
+	roll	$5,%ecx
+	leal	1518500249(%rbp,%r13,1),%r13d
+	andl	%edi,%eax
+	movl	%edx,24(%rsp)
+	addl	%ecx,%r13d
+	xorl	%r12d,%eax
+	roll	$30,%edi
+	addl	%eax,%r13d
+	movl	%edi,%eax
+	movl	28(%r9),%ebp
+	movl	%r13d,%ecx
+	xorl	%r11d,%eax
+	bswapl	%ebp
+	roll	$5,%ecx
+	leal	1518500249(%rdx,%r12,1),%r12d
+	andl	%esi,%eax
+	movl	%ebp,28(%rsp)
+	addl	%ecx,%r12d
+	xorl	%r11d,%eax
+	roll	$30,%esi
+	addl	%eax,%r12d
+	movl	%esi,%eax
+	movl	32(%r9),%edx
+	movl	%r12d,%ecx
+	xorl	%edi,%eax
+	bswapl	%edx
+	roll	$5,%ecx
+	leal	1518500249(%rbp,%r11,1),%r11d
+	andl	%r13d,%eax
+	movl	%edx,32(%rsp)
+	addl	%ecx,%r11d
+	xorl	%edi,%eax
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	movl	%r13d,%eax
+	movl	36(%r9),%ebp
+	movl	%r11d,%ecx
+	xorl	%esi,%eax
+	bswapl	%ebp
+	roll	$5,%ecx
+	leal	1518500249(%rdx,%rdi,1),%edi
+	andl	%r12d,%eax
+	movl	%ebp,36(%rsp)
+	addl	%ecx,%edi
+	xorl	%esi,%eax
+	roll	$30,%r12d
+	addl	%eax,%edi
+	movl	%r12d,%eax
+	movl	40(%r9),%edx
+	movl	%edi,%ecx
+	xorl	%r13d,%eax
+	bswapl	%edx
+	roll	$5,%ecx
+	leal	1518500249(%rbp,%rsi,1),%esi
+	andl	%r11d,%eax
+	movl	%edx,40(%rsp)
+	addl	%ecx,%esi
+	xorl	%r13d,%eax
+	roll	$30,%r11d
+	addl	%eax,%esi
+	movl	%r11d,%eax
+	movl	44(%r9),%ebp
+	movl	%esi,%ecx
+	xorl	%r12d,%eax
+	bswapl	%ebp
+	roll	$5,%ecx
+	leal	1518500249(%rdx,%r13,1),%r13d
+	andl	%edi,%eax
+	movl	%ebp,44(%rsp)
+	addl	%ecx,%r13d
+	xorl	%r12d,%eax
+	roll	$30,%edi
+	addl	%eax,%r13d
+	movl	%edi,%eax
+	movl	48(%r9),%edx
+	movl	%r13d,%ecx
+	xorl	%r11d,%eax
+	bswapl	%edx
+	roll	$5,%ecx
+	leal	1518500249(%rbp,%r12,1),%r12d
+	andl	%esi,%eax
+	movl	%edx,48(%rsp)
+	addl	%ecx,%r12d
+	xorl	%r11d,%eax
+	roll	$30,%esi
+	addl	%eax,%r12d
+	movl	%esi,%eax
+	movl	52(%r9),%ebp
+	movl	%r12d,%ecx
+	xorl	%edi,%eax
+	bswapl	%ebp
+	roll	$5,%ecx
+	leal	1518500249(%rdx,%r11,1),%r11d
+	andl	%r13d,%eax
+	movl	%ebp,52(%rsp)
+	addl	%ecx,%r11d
+	xorl	%edi,%eax
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	movl	%r13d,%eax
+	movl	56(%r9),%edx
+	movl	%r11d,%ecx
+	xorl	%esi,%eax
+	bswapl	%edx
+	roll	$5,%ecx
+	leal	1518500249(%rbp,%rdi,1),%edi
+	andl	%r12d,%eax
+	movl	%edx,56(%rsp)
+	addl	%ecx,%edi
+	xorl	%esi,%eax
+	roll	$30,%r12d
+	addl	%eax,%edi
+	movl	%r12d,%eax
+	movl	60(%r9),%ebp
+	movl	%edi,%ecx
+	xorl	%r13d,%eax
+	bswapl	%ebp
+	roll	$5,%ecx
+	leal	1518500249(%rdx,%rsi,1),%esi
+	andl	%r11d,%eax
+	movl	%ebp,60(%rsp)
+	addl	%ecx,%esi
+	xorl	%r13d,%eax
+	roll	$30,%r11d
+	addl	%eax,%esi
+	movl	0(%rsp),%edx
+	movl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	8(%rsp),%edx
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	xorl	32(%rsp),%edx
+	andl	%edi,%eax
+	leal	1518500249(%rbp,%r13,1),%r13d
+	xorl	52(%rsp),%edx
+	xorl	%r12d,%eax
+	roll	$1,%edx
+	addl	%ecx,%r13d
+	roll	$30,%edi
+	movl	%edx,0(%rsp)
+	addl	%eax,%r13d
+	movl	4(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	12(%rsp),%ebp
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	xorl	36(%rsp),%ebp
+	andl	%esi,%eax
+	leal	1518500249(%rdx,%r12,1),%r12d
+	xorl	56(%rsp),%ebp
+	xorl	%r11d,%eax
+	roll	$1,%ebp
+	addl	%ecx,%r12d
+	roll	$30,%esi
+	movl	%ebp,4(%rsp)
+	addl	%eax,%r12d
+	movl	8(%rsp),%edx
+	movl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	16(%rsp),%edx
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	xorl	40(%rsp),%edx
+	andl	%r13d,%eax
+	leal	1518500249(%rbp,%r11,1),%r11d
+	xorl	60(%rsp),%edx
+	xorl	%edi,%eax
+	roll	$1,%edx
+	addl	%ecx,%r11d
+	roll	$30,%r13d
+	movl	%edx,8(%rsp)
+	addl	%eax,%r11d
+	movl	12(%rsp),%ebp
+	movl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	20(%rsp),%ebp
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	xorl	44(%rsp),%ebp
+	andl	%r12d,%eax
+	leal	1518500249(%rdx,%rdi,1),%edi
+	xorl	0(%rsp),%ebp
+	xorl	%esi,%eax
+	roll	$1,%ebp
+	addl	%ecx,%edi
+	roll	$30,%r12d
+	movl	%ebp,12(%rsp)
+	addl	%eax,%edi
+	movl	16(%rsp),%edx
+	movl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	24(%rsp),%edx
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	xorl	48(%rsp),%edx
+	andl	%r11d,%eax
+	leal	1518500249(%rbp,%rsi,1),%esi
+	xorl	4(%rsp),%edx
+	xorl	%r13d,%eax
+	roll	$1,%edx
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	movl	%edx,16(%rsp)
+	addl	%eax,%esi
+	movl	20(%rsp),%ebp
+	movl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	28(%rsp),%ebp
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%r13,1),%r13d
+	xorl	52(%rsp),%ebp
+	xorl	%r12d,%eax
+	addl	%ecx,%r13d
+	xorl	8(%rsp),%ebp
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%ebp
+	movl	%ebp,20(%rsp)
+	movl	24(%rsp),%edx
+	movl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	32(%rsp),%edx
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%r12,1),%r12d
+	xorl	56(%rsp),%edx
+	xorl	%r11d,%eax
+	addl	%ecx,%r12d
+	xorl	12(%rsp),%edx
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%edx
+	movl	%edx,24(%rsp)
+	movl	28(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	36(%rsp),%ebp
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%r11,1),%r11d
+	xorl	60(%rsp),%ebp
+	xorl	%edi,%eax
+	addl	%ecx,%r11d
+	xorl	16(%rsp),%ebp
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%ebp
+	movl	%ebp,28(%rsp)
+	movl	32(%rsp),%edx
+	movl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	40(%rsp),%edx
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%rdi,1),%edi
+	xorl	0(%rsp),%edx
+	xorl	%esi,%eax
+	addl	%ecx,%edi
+	xorl	20(%rsp),%edx
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%edx
+	movl	%edx,32(%rsp)
+	movl	36(%rsp),%ebp
+	movl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	44(%rsp),%ebp
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%rsi,1),%esi
+	xorl	4(%rsp),%ebp
+	xorl	%r13d,%eax
+	addl	%ecx,%esi
+	xorl	24(%rsp),%ebp
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%ebp
+	movl	%ebp,36(%rsp)
+	movl	40(%rsp),%edx
+	movl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	48(%rsp),%edx
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%r13,1),%r13d
+	xorl	8(%rsp),%edx
+	xorl	%r12d,%eax
+	addl	%ecx,%r13d
+	xorl	28(%rsp),%edx
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%edx
+	movl	%edx,40(%rsp)
+	movl	44(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	52(%rsp),%ebp
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%r12,1),%r12d
+	xorl	12(%rsp),%ebp
+	xorl	%r11d,%eax
+	addl	%ecx,%r12d
+	xorl	32(%rsp),%ebp
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%ebp
+	movl	%ebp,44(%rsp)
+	movl	48(%rsp),%edx
+	movl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	56(%rsp),%edx
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%r11,1),%r11d
+	xorl	16(%rsp),%edx
+	xorl	%edi,%eax
+	addl	%ecx,%r11d
+	xorl	36(%rsp),%edx
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%edx
+	movl	%edx,48(%rsp)
+	movl	52(%rsp),%ebp
+	movl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	60(%rsp),%ebp
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%rdi,1),%edi
+	xorl	20(%rsp),%ebp
+	xorl	%esi,%eax
+	addl	%ecx,%edi
+	xorl	40(%rsp),%ebp
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%ebp
+	movl	%ebp,52(%rsp)
+	movl	56(%rsp),%edx
+	movl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	0(%rsp),%edx
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%rsi,1),%esi
+	xorl	24(%rsp),%edx
+	xorl	%r13d,%eax
+	addl	%ecx,%esi
+	xorl	44(%rsp),%edx
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%edx
+	movl	%edx,56(%rsp)
+	movl	60(%rsp),%ebp
+	movl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	4(%rsp),%ebp
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%r13,1),%r13d
+	xorl	28(%rsp),%ebp
+	xorl	%r12d,%eax
+	addl	%ecx,%r13d
+	xorl	48(%rsp),%ebp
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%ebp
+	movl	%ebp,60(%rsp)
+	movl	0(%rsp),%edx
+	movl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	8(%rsp),%edx
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%r12,1),%r12d
+	xorl	32(%rsp),%edx
+	xorl	%r11d,%eax
+	addl	%ecx,%r12d
+	xorl	52(%rsp),%edx
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%edx
+	movl	%edx,0(%rsp)
+	movl	4(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	12(%rsp),%ebp
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%r11,1),%r11d
+	xorl	36(%rsp),%ebp
+	xorl	%edi,%eax
+	addl	%ecx,%r11d
+	xorl	56(%rsp),%ebp
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%ebp
+	movl	%ebp,4(%rsp)
+	movl	8(%rsp),%edx
+	movl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	16(%rsp),%edx
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%rdi,1),%edi
+	xorl	40(%rsp),%edx
+	xorl	%esi,%eax
+	addl	%ecx,%edi
+	xorl	60(%rsp),%edx
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%edx
+	movl	%edx,8(%rsp)
+	movl	12(%rsp),%ebp
+	movl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	20(%rsp),%ebp
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%rsi,1),%esi
+	xorl	44(%rsp),%ebp
+	xorl	%r13d,%eax
+	addl	%ecx,%esi
+	xorl	0(%rsp),%ebp
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%ebp
+	movl	%ebp,12(%rsp)
+	movl	16(%rsp),%edx
+	movl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	24(%rsp),%edx
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%r13,1),%r13d
+	xorl	48(%rsp),%edx
+	xorl	%r12d,%eax
+	addl	%ecx,%r13d
+	xorl	4(%rsp),%edx
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%edx
+	movl	%edx,16(%rsp)
+	movl	20(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	28(%rsp),%ebp
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%r12,1),%r12d
+	xorl	52(%rsp),%ebp
+	xorl	%r11d,%eax
+	addl	%ecx,%r12d
+	xorl	8(%rsp),%ebp
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%ebp
+	movl	%ebp,20(%rsp)
+	movl	24(%rsp),%edx
+	movl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	32(%rsp),%edx
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%r11,1),%r11d
+	xorl	56(%rsp),%edx
+	xorl	%edi,%eax
+	addl	%ecx,%r11d
+	xorl	12(%rsp),%edx
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%edx
+	movl	%edx,24(%rsp)
+	movl	28(%rsp),%ebp
+	movl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	36(%rsp),%ebp
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%rdi,1),%edi
+	xorl	60(%rsp),%ebp
+	xorl	%esi,%eax
+	addl	%ecx,%edi
+	xorl	16(%rsp),%ebp
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%ebp
+	movl	%ebp,28(%rsp)
+	movl	32(%rsp),%edx
+	movl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	40(%rsp),%edx
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%rsi,1),%esi
+	xorl	0(%rsp),%edx
+	xorl	%r13d,%eax
+	addl	%ecx,%esi
+	xorl	20(%rsp),%edx
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%edx
+	movl	%edx,32(%rsp)
+	movl	36(%rsp),%ebp
+	movl	%r11d,%eax
+	movl	%r11d,%ebx
+	xorl	44(%rsp),%ebp
+	andl	%r12d,%eax
+	movl	%esi,%ecx
+	xorl	4(%rsp),%ebp
+	xorl	%r12d,%ebx
+	leal	-1894007588(%rdx,%r13,1),%r13d
+	roll	$5,%ecx
+	xorl	24(%rsp),%ebp
+	addl	%eax,%r13d
+	andl	%edi,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%r13d
+	roll	$30,%edi
+	movl	%ebp,36(%rsp)
+	addl	%ecx,%r13d
+	movl	40(%rsp),%edx
+	movl	%edi,%eax
+	movl	%edi,%ebx
+	xorl	48(%rsp),%edx
+	andl	%r11d,%eax
+	movl	%r13d,%ecx
+	xorl	8(%rsp),%edx
+	xorl	%r11d,%ebx
+	leal	-1894007588(%rbp,%r12,1),%r12d
+	roll	$5,%ecx
+	xorl	28(%rsp),%edx
+	addl	%eax,%r12d
+	andl	%esi,%ebx
+	roll	$1,%edx
+	addl	%ebx,%r12d
+	roll	$30,%esi
+	movl	%edx,40(%rsp)
+	addl	%ecx,%r12d
+	movl	44(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%esi,%ebx
+	xorl	52(%rsp),%ebp
+	andl	%edi,%eax
+	movl	%r12d,%ecx
+	xorl	12(%rsp),%ebp
+	xorl	%edi,%ebx
+	leal	-1894007588(%rdx,%r11,1),%r11d
+	roll	$5,%ecx
+	xorl	32(%rsp),%ebp
+	addl	%eax,%r11d
+	andl	%r13d,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%r11d
+	roll	$30,%r13d
+	movl	%ebp,44(%rsp)
+	addl	%ecx,%r11d
+	movl	48(%rsp),%edx
+	movl	%r13d,%eax
+	movl	%r13d,%ebx
+	xorl	56(%rsp),%edx
+	andl	%esi,%eax
+	movl	%r11d,%ecx
+	xorl	16(%rsp),%edx
+	xorl	%esi,%ebx
+	leal	-1894007588(%rbp,%rdi,1),%edi
+	roll	$5,%ecx
+	xorl	36(%rsp),%edx
+	addl	%eax,%edi
+	andl	%r12d,%ebx
+	roll	$1,%edx
+	addl	%ebx,%edi
+	roll	$30,%r12d
+	movl	%edx,48(%rsp)
+	addl	%ecx,%edi
+	movl	52(%rsp),%ebp
+	movl	%r12d,%eax
+	movl	%r12d,%ebx
+	xorl	60(%rsp),%ebp
+	andl	%r13d,%eax
+	movl	%edi,%ecx
+	xorl	20(%rsp),%ebp
+	xorl	%r13d,%ebx
+	leal	-1894007588(%rdx,%rsi,1),%esi
+	roll	$5,%ecx
+	xorl	40(%rsp),%ebp
+	addl	%eax,%esi
+	andl	%r11d,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%esi
+	roll	$30,%r11d
+	movl	%ebp,52(%rsp)
+	addl	%ecx,%esi
+	movl	56(%rsp),%edx
+	movl	%r11d,%eax
+	movl	%r11d,%ebx
+	xorl	0(%rsp),%edx
+	andl	%r12d,%eax
+	movl	%esi,%ecx
+	xorl	24(%rsp),%edx
+	xorl	%r12d,%ebx
+	leal	-1894007588(%rbp,%r13,1),%r13d
+	roll	$5,%ecx
+	xorl	44(%rsp),%edx
+	addl	%eax,%r13d
+	andl	%edi,%ebx
+	roll	$1,%edx
+	addl	%ebx,%r13d
+	roll	$30,%edi
+	movl	%edx,56(%rsp)
+	addl	%ecx,%r13d
+	movl	60(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%edi,%ebx
+	xorl	4(%rsp),%ebp
+	andl	%r11d,%eax
+	movl	%r13d,%ecx
+	xorl	28(%rsp),%ebp
+	xorl	%r11d,%ebx
+	leal	-1894007588(%rdx,%r12,1),%r12d
+	roll	$5,%ecx
+	xorl	48(%rsp),%ebp
+	addl	%eax,%r12d
+	andl	%esi,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%r12d
+	roll	$30,%esi
+	movl	%ebp,60(%rsp)
+	addl	%ecx,%r12d
+	movl	0(%rsp),%edx
+	movl	%esi,%eax
+	movl	%esi,%ebx
+	xorl	8(%rsp),%edx
+	andl	%edi,%eax
+	movl	%r12d,%ecx
+	xorl	32(%rsp),%edx
+	xorl	%edi,%ebx
+	leal	-1894007588(%rbp,%r11,1),%r11d
+	roll	$5,%ecx
+	xorl	52(%rsp),%edx
+	addl	%eax,%r11d
+	andl	%r13d,%ebx
+	roll	$1,%edx
+	addl	%ebx,%r11d
+	roll	$30,%r13d
+	movl	%edx,0(%rsp)
+	addl	%ecx,%r11d
+	movl	4(%rsp),%ebp
+	movl	%r13d,%eax
+	movl	%r13d,%ebx
+	xorl	12(%rsp),%ebp
+	andl	%esi,%eax
+	movl	%r11d,%ecx
+	xorl	36(%rsp),%ebp
+	xorl	%esi,%ebx
+	leal	-1894007588(%rdx,%rdi,1),%edi
+	roll	$5,%ecx
+	xorl	56(%rsp),%ebp
+	addl	%eax,%edi
+	andl	%r12d,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%edi
+	roll	$30,%r12d
+	movl	%ebp,4(%rsp)
+	addl	%ecx,%edi
+	movl	8(%rsp),%edx
+	movl	%r12d,%eax
+	movl	%r12d,%ebx
+	xorl	16(%rsp),%edx
+	andl	%r13d,%eax
+	movl	%edi,%ecx
+	xorl	40(%rsp),%edx
+	xorl	%r13d,%ebx
+	leal	-1894007588(%rbp,%rsi,1),%esi
+	roll	$5,%ecx
+	xorl	60(%rsp),%edx
+	addl	%eax,%esi
+	andl	%r11d,%ebx
+	roll	$1,%edx
+	addl	%ebx,%esi
+	roll	$30,%r11d
+	movl	%edx,8(%rsp)
+	addl	%ecx,%esi
+	movl	12(%rsp),%ebp
+	movl	%r11d,%eax
+	movl	%r11d,%ebx
+	xorl	20(%rsp),%ebp
+	andl	%r12d,%eax
+	movl	%esi,%ecx
+	xorl	44(%rsp),%ebp
+	xorl	%r12d,%ebx
+	leal	-1894007588(%rdx,%r13,1),%r13d
+	roll	$5,%ecx
+	xorl	0(%rsp),%ebp
+	addl	%eax,%r13d
+	andl	%edi,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%r13d
+	roll	$30,%edi
+	movl	%ebp,12(%rsp)
+	addl	%ecx,%r13d
+	movl	16(%rsp),%edx
+	movl	%edi,%eax
+	movl	%edi,%ebx
+	xorl	24(%rsp),%edx
+	andl	%r11d,%eax
+	movl	%r13d,%ecx
+	xorl	48(%rsp),%edx
+	xorl	%r11d,%ebx
+	leal	-1894007588(%rbp,%r12,1),%r12d
+	roll	$5,%ecx
+	xorl	4(%rsp),%edx
+	addl	%eax,%r12d
+	andl	%esi,%ebx
+	roll	$1,%edx
+	addl	%ebx,%r12d
+	roll	$30,%esi
+	movl	%edx,16(%rsp)
+	addl	%ecx,%r12d
+	movl	20(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%esi,%ebx
+	xorl	28(%rsp),%ebp
+	andl	%edi,%eax
+	movl	%r12d,%ecx
+	xorl	52(%rsp),%ebp
+	xorl	%edi,%ebx
+	leal	-1894007588(%rdx,%r11,1),%r11d
+	roll	$5,%ecx
+	xorl	8(%rsp),%ebp
+	addl	%eax,%r11d
+	andl	%r13d,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%r11d
+	roll	$30,%r13d
+	movl	%ebp,20(%rsp)
+	addl	%ecx,%r11d
+	movl	24(%rsp),%edx
+	movl	%r13d,%eax
+	movl	%r13d,%ebx
+	xorl	32(%rsp),%edx
+	andl	%esi,%eax
+	movl	%r11d,%ecx
+	xorl	56(%rsp),%edx
+	xorl	%esi,%ebx
+	leal	-1894007588(%rbp,%rdi,1),%edi
+	roll	$5,%ecx
+	xorl	12(%rsp),%edx
+	addl	%eax,%edi
+	andl	%r12d,%ebx
+	roll	$1,%edx
+	addl	%ebx,%edi
+	roll	$30,%r12d
+	movl	%edx,24(%rsp)
+	addl	%ecx,%edi
+	movl	28(%rsp),%ebp
+	movl	%r12d,%eax
+	movl	%r12d,%ebx
+	xorl	36(%rsp),%ebp
+	andl	%r13d,%eax
+	movl	%edi,%ecx
+	xorl	60(%rsp),%ebp
+	xorl	%r13d,%ebx
+	leal	-1894007588(%rdx,%rsi,1),%esi
+	roll	$5,%ecx
+	xorl	16(%rsp),%ebp
+	addl	%eax,%esi
+	andl	%r11d,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%esi
+	roll	$30,%r11d
+	movl	%ebp,28(%rsp)
+	addl	%ecx,%esi
+	movl	32(%rsp),%edx
+	movl	%r11d,%eax
+	movl	%r11d,%ebx
+	xorl	40(%rsp),%edx
+	andl	%r12d,%eax
+	movl	%esi,%ecx
+	xorl	0(%rsp),%edx
+	xorl	%r12d,%ebx
+	leal	-1894007588(%rbp,%r13,1),%r13d
+	roll	$5,%ecx
+	xorl	20(%rsp),%edx
+	addl	%eax,%r13d
+	andl	%edi,%ebx
+	roll	$1,%edx
+	addl	%ebx,%r13d
+	roll	$30,%edi
+	movl	%edx,32(%rsp)
+	addl	%ecx,%r13d
+	movl	36(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%edi,%ebx
+	xorl	44(%rsp),%ebp
+	andl	%r11d,%eax
+	movl	%r13d,%ecx
+	xorl	4(%rsp),%ebp
+	xorl	%r11d,%ebx
+	leal	-1894007588(%rdx,%r12,1),%r12d
+	roll	$5,%ecx
+	xorl	24(%rsp),%ebp
+	addl	%eax,%r12d
+	andl	%esi,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%r12d
+	roll	$30,%esi
+	movl	%ebp,36(%rsp)
+	addl	%ecx,%r12d
+	movl	40(%rsp),%edx
+	movl	%esi,%eax
+	movl	%esi,%ebx
+	xorl	48(%rsp),%edx
+	andl	%edi,%eax
+	movl	%r12d,%ecx
+	xorl	8(%rsp),%edx
+	xorl	%edi,%ebx
+	leal	-1894007588(%rbp,%r11,1),%r11d
+	roll	$5,%ecx
+	xorl	28(%rsp),%edx
+	addl	%eax,%r11d
+	andl	%r13d,%ebx
+	roll	$1,%edx
+	addl	%ebx,%r11d
+	roll	$30,%r13d
+	movl	%edx,40(%rsp)
+	addl	%ecx,%r11d
+	movl	44(%rsp),%ebp
+	movl	%r13d,%eax
+	movl	%r13d,%ebx
+	xorl	52(%rsp),%ebp
+	andl	%esi,%eax
+	movl	%r11d,%ecx
+	xorl	12(%rsp),%ebp
+	xorl	%esi,%ebx
+	leal	-1894007588(%rdx,%rdi,1),%edi
+	roll	$5,%ecx
+	xorl	32(%rsp),%ebp
+	addl	%eax,%edi
+	andl	%r12d,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%edi
+	roll	$30,%r12d
+	movl	%ebp,44(%rsp)
+	addl	%ecx,%edi
+	movl	48(%rsp),%edx
+	movl	%r12d,%eax
+	movl	%r12d,%ebx
+	xorl	56(%rsp),%edx
+	andl	%r13d,%eax
+	movl	%edi,%ecx
+	xorl	16(%rsp),%edx
+	xorl	%r13d,%ebx
+	leal	-1894007588(%rbp,%rsi,1),%esi
+	roll	$5,%ecx
+	xorl	36(%rsp),%edx
+	addl	%eax,%esi
+	andl	%r11d,%ebx
+	roll	$1,%edx
+	addl	%ebx,%esi
+	roll	$30,%r11d
+	movl	%edx,48(%rsp)
+	addl	%ecx,%esi
+	movl	52(%rsp),%ebp
+	movl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	60(%rsp),%ebp
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%r13,1),%r13d
+	xorl	20(%rsp),%ebp
+	xorl	%r12d,%eax
+	addl	%ecx,%r13d
+	xorl	40(%rsp),%ebp
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%ebp
+	movl	%ebp,52(%rsp)
+	movl	56(%rsp),%edx
+	movl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	0(%rsp),%edx
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rbp,%r12,1),%r12d
+	xorl	24(%rsp),%edx
+	xorl	%r11d,%eax
+	addl	%ecx,%r12d
+	xorl	44(%rsp),%edx
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%edx
+	movl	%edx,56(%rsp)
+	movl	60(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	4(%rsp),%ebp
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%r11,1),%r11d
+	xorl	28(%rsp),%ebp
+	xorl	%edi,%eax
+	addl	%ecx,%r11d
+	xorl	48(%rsp),%ebp
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%ebp
+	movl	%ebp,60(%rsp)
+	movl	0(%rsp),%edx
+	movl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	8(%rsp),%edx
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rbp,%rdi,1),%edi
+	xorl	32(%rsp),%edx
+	xorl	%esi,%eax
+	addl	%ecx,%edi
+	xorl	52(%rsp),%edx
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%edx
+	movl	%edx,0(%rsp)
+	movl	4(%rsp),%ebp
+	movl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	12(%rsp),%ebp
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%rsi,1),%esi
+	xorl	36(%rsp),%ebp
+	xorl	%r13d,%eax
+	addl	%ecx,%esi
+	xorl	56(%rsp),%ebp
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%ebp
+	movl	%ebp,4(%rsp)
+	movl	8(%rsp),%edx
+	movl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	16(%rsp),%edx
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rbp,%r13,1),%r13d
+	xorl	40(%rsp),%edx
+	xorl	%r12d,%eax
+	addl	%ecx,%r13d
+	xorl	60(%rsp),%edx
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%edx
+	movl	%edx,8(%rsp)
+	movl	12(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	20(%rsp),%ebp
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%r12,1),%r12d
+	xorl	44(%rsp),%ebp
+	xorl	%r11d,%eax
+	addl	%ecx,%r12d
+	xorl	0(%rsp),%ebp
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%ebp
+	movl	%ebp,12(%rsp)
+	movl	16(%rsp),%edx
+	movl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	24(%rsp),%edx
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rbp,%r11,1),%r11d
+	xorl	48(%rsp),%edx
+	xorl	%edi,%eax
+	addl	%ecx,%r11d
+	xorl	4(%rsp),%edx
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%edx
+	movl	%edx,16(%rsp)
+	movl	20(%rsp),%ebp
+	movl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	28(%rsp),%ebp
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%rdi,1),%edi
+	xorl	52(%rsp),%ebp
+	xorl	%esi,%eax
+	addl	%ecx,%edi
+	xorl	8(%rsp),%ebp
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%ebp
+	movl	%ebp,20(%rsp)
+	movl	24(%rsp),%edx
+	movl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	32(%rsp),%edx
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rbp,%rsi,1),%esi
+	xorl	56(%rsp),%edx
+	xorl	%r13d,%eax
+	addl	%ecx,%esi
+	xorl	12(%rsp),%edx
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%edx
+	movl	%edx,24(%rsp)
+	movl	28(%rsp),%ebp
+	movl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	36(%rsp),%ebp
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%r13,1),%r13d
+	xorl	60(%rsp),%ebp
+	xorl	%r12d,%eax
+	addl	%ecx,%r13d
+	xorl	16(%rsp),%ebp
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%ebp
+	movl	%ebp,28(%rsp)
+	movl	32(%rsp),%edx
+	movl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	40(%rsp),%edx
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rbp,%r12,1),%r12d
+	xorl	0(%rsp),%edx
+	xorl	%r11d,%eax
+	addl	%ecx,%r12d
+	xorl	20(%rsp),%edx
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%edx
+	movl	%edx,32(%rsp)
+	movl	36(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	44(%rsp),%ebp
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%r11,1),%r11d
+	xorl	4(%rsp),%ebp
+	xorl	%edi,%eax
+	addl	%ecx,%r11d
+	xorl	24(%rsp),%ebp
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%ebp
+	movl	%ebp,36(%rsp)
+	movl	40(%rsp),%edx
+	movl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	48(%rsp),%edx
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rbp,%rdi,1),%edi
+	xorl	8(%rsp),%edx
+	xorl	%esi,%eax
+	addl	%ecx,%edi
+	xorl	28(%rsp),%edx
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%edx
+	movl	%edx,40(%rsp)
+	movl	44(%rsp),%ebp
+	movl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	52(%rsp),%ebp
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%rsi,1),%esi
+	xorl	12(%rsp),%ebp
+	xorl	%r13d,%eax
+	addl	%ecx,%esi
+	xorl	32(%rsp),%ebp
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%ebp
+	movl	%ebp,44(%rsp)
+	movl	48(%rsp),%edx
+	movl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	56(%rsp),%edx
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rbp,%r13,1),%r13d
+	xorl	16(%rsp),%edx
+	xorl	%r12d,%eax
+	addl	%ecx,%r13d
+	xorl	36(%rsp),%edx
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%edx
+	movl	%edx,48(%rsp)
+	movl	52(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	60(%rsp),%ebp
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%r12,1),%r12d
+	xorl	20(%rsp),%ebp
+	xorl	%r11d,%eax
+	addl	%ecx,%r12d
+	xorl	40(%rsp),%ebp
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%ebp
+	movl	56(%rsp),%edx
+	movl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	0(%rsp),%edx
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rbp,%r11,1),%r11d
+	xorl	24(%rsp),%edx
+	xorl	%edi,%eax
+	addl	%ecx,%r11d
+	xorl	44(%rsp),%edx
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%edx
+	movl	60(%rsp),%ebp
+	movl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	4(%rsp),%ebp
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%rdi,1),%edi
+	xorl	28(%rsp),%ebp
+	xorl	%esi,%eax
+	addl	%ecx,%edi
+	xorl	48(%rsp),%ebp
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%ebp
+	movl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	%r11d,%eax
+	leal	-899497514(%rbp,%rsi,1),%esi
+	roll	$5,%ecx
+	xorl	%r13d,%eax
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	addl	%eax,%esi
+	addl	0(%r8),%esi
+	addl	4(%r8),%edi
+	addl	8(%r8),%r11d
+	addl	12(%r8),%r12d
+	addl	16(%r8),%r13d
+	movl	%esi,0(%r8)
+	movl	%edi,4(%r8)
+	movl	%r11d,8(%r8)
+	movl	%r12d,12(%r8)
+	movl	%r13d,16(%r8)
+
+	subq	$1,%r10
+	leaq	64(%r9),%r9
+	jnz	.Lloop
+
+	movq	64(%rsp),%rsi
+	movq	(%rsi),%r13
+	movq	8(%rsi),%r12
+	movq	16(%rsi),%rbp
+	movq	24(%rsi),%rbx
+	leaq	32(%rsi),%rsp
+.Lepilogue:
+	retq
+.size	sha1_block_data_order,.-sha1_block_data_order
+.type	sha1_block_data_order_ssse3,@function
+.align	16
+sha1_block_data_order_ssse3:
+_ssse3_shortcut:
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	leaq	-64(%rsp),%rsp
+	movq	%rdi,%r8
+	movq	%rsi,%r9
+	movq	%rdx,%r10
+
+	shlq	$6,%r10
+	addq	%r9,%r10
+	leaq	K_XX_XX(%rip),%r11
+
+	movl	0(%r8),%eax
+	movl	4(%r8),%ebx
+	movl	8(%r8),%ecx
+	movl	12(%r8),%edx
+	movl	%ebx,%esi
+	movl	16(%r8),%ebp
+
+	movdqa	64(%r11),%xmm6
+	movdqa	0(%r11),%xmm9
+	movdqu	0(%r9),%xmm0
+	movdqu	16(%r9),%xmm1
+	movdqu	32(%r9),%xmm2
+	movdqu	48(%r9),%xmm3
+.byte	102,15,56,0,198
+	addq	$64,%r9
+.byte	102,15,56,0,206
+.byte	102,15,56,0,214
+.byte	102,15,56,0,222
+	paddd	%xmm9,%xmm0
+	paddd	%xmm9,%xmm1
+	paddd	%xmm9,%xmm2
+	movdqa	%xmm0,0(%rsp)
+	psubd	%xmm9,%xmm0
+	movdqa	%xmm1,16(%rsp)
+	psubd	%xmm9,%xmm1
+	movdqa	%xmm2,32(%rsp)
+	psubd	%xmm9,%xmm2
+	jmp	.Loop_ssse3
+.align	16
+.Loop_ssse3:
+	movdqa	%xmm1,%xmm4
+	addl	0(%rsp),%ebp
+	xorl	%edx,%ecx
+	movdqa	%xmm3,%xmm8
+.byte	102,15,58,15,224,8
+	movl	%eax,%edi
+	roll	$5,%eax
+	paddd	%xmm3,%xmm9
+	andl	%ecx,%esi
+	xorl	%edx,%ecx
+	psrldq	$4,%xmm8
+	xorl	%edx,%esi
+	addl	%eax,%ebp
+	pxor	%xmm0,%xmm4
+	rorl	$2,%ebx
+	addl	%esi,%ebp
+	pxor	%xmm2,%xmm8
+	addl	4(%rsp),%edx
+	xorl	%ecx,%ebx
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	pxor	%xmm8,%xmm4
+	andl	%ebx,%edi
+	xorl	%ecx,%ebx
+	movdqa	%xmm9,48(%rsp)
+	xorl	%ecx,%edi
+	addl	%ebp,%edx
+	movdqa	%xmm4,%xmm10
+	movdqa	%xmm4,%xmm8
+	rorl	$7,%eax
+	addl	%edi,%edx
+	addl	8(%rsp),%ecx
+	xorl	%ebx,%eax
+	pslldq	$12,%xmm10
+	paddd	%xmm4,%xmm4
+	movl	%edx,%edi
+	roll	$5,%edx
+	andl	%eax,%esi
+	xorl	%ebx,%eax
+	psrld	$31,%xmm8
+	xorl	%ebx,%esi
+	addl	%edx,%ecx
+	movdqa	%xmm10,%xmm9
+	rorl	$7,%ebp
+	addl	%esi,%ecx
+	psrld	$30,%xmm10
+	por	%xmm8,%xmm4
+	addl	12(%rsp),%ebx
+	xorl	%eax,%ebp
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	pslld	$2,%xmm9
+	pxor	%xmm10,%xmm4
+	andl	%ebp,%edi
+	xorl	%eax,%ebp
+	movdqa	0(%r11),%xmm10
+	xorl	%eax,%edi
+	addl	%ecx,%ebx
+	pxor	%xmm9,%xmm4
+	rorl	$7,%edx
+	addl	%edi,%ebx
+	movdqa	%xmm2,%xmm5
+	addl	16(%rsp),%eax
+	xorl	%ebp,%edx
+	movdqa	%xmm4,%xmm9
+.byte	102,15,58,15,233,8
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	paddd	%xmm4,%xmm10
+	andl	%edx,%esi
+	xorl	%ebp,%edx
+	psrldq	$4,%xmm9
+	xorl	%ebp,%esi
+	addl	%ebx,%eax
+	pxor	%xmm1,%xmm5
+	rorl	$7,%ecx
+	addl	%esi,%eax
+	pxor	%xmm3,%xmm9
+	addl	20(%rsp),%ebp
+	xorl	%edx,%ecx
+	movl	%eax,%esi
+	roll	$5,%eax
+	pxor	%xmm9,%xmm5
+	andl	%ecx,%edi
+	xorl	%edx,%ecx
+	movdqa	%xmm10,0(%rsp)
+	xorl	%edx,%edi
+	addl	%eax,%ebp
+	movdqa	%xmm5,%xmm8
+	movdqa	%xmm5,%xmm9
+	rorl	$7,%ebx
+	addl	%edi,%ebp
+	addl	24(%rsp),%edx
+	xorl	%ecx,%ebx
+	pslldq	$12,%xmm8
+	paddd	%xmm5,%xmm5
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	andl	%ebx,%esi
+	xorl	%ecx,%ebx
+	psrld	$31,%xmm9
+	xorl	%ecx,%esi
+	addl	%ebp,%edx
+	movdqa	%xmm8,%xmm10
+	rorl	$7,%eax
+	addl	%esi,%edx
+	psrld	$30,%xmm8
+	por	%xmm9,%xmm5
+	addl	28(%rsp),%ecx
+	xorl	%ebx,%eax
+	movl	%edx,%esi
+	roll	$5,%edx
+	pslld	$2,%xmm10
+	pxor	%xmm8,%xmm5
+	andl	%eax,%edi
+	xorl	%ebx,%eax
+	movdqa	16(%r11),%xmm8
+	xorl	%ebx,%edi
+	addl	%edx,%ecx
+	pxor	%xmm10,%xmm5
+	rorl	$7,%ebp
+	addl	%edi,%ecx
+	movdqa	%xmm3,%xmm6
+	addl	32(%rsp),%ebx
+	xorl	%eax,%ebp
+	movdqa	%xmm5,%xmm10
+.byte	102,15,58,15,242,8
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	paddd	%xmm5,%xmm8
+	andl	%ebp,%esi
+	xorl	%eax,%ebp
+	psrldq	$4,%xmm10
+	xorl	%eax,%esi
+	addl	%ecx,%ebx
+	pxor	%xmm2,%xmm6
+	rorl	$7,%edx
+	addl	%esi,%ebx
+	pxor	%xmm4,%xmm10
+	addl	36(%rsp),%eax
+	xorl	%ebp,%edx
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	pxor	%xmm10,%xmm6
+	andl	%edx,%edi
+	xorl	%ebp,%edx
+	movdqa	%xmm8,16(%rsp)
+	xorl	%ebp,%edi
+	addl	%ebx,%eax
+	movdqa	%xmm6,%xmm9
+	movdqa	%xmm6,%xmm10
+	rorl	$7,%ecx
+	addl	%edi,%eax
+	addl	40(%rsp),%ebp
+	xorl	%edx,%ecx
+	pslldq	$12,%xmm9
+	paddd	%xmm6,%xmm6
+	movl	%eax,%edi
+	roll	$5,%eax
+	andl	%ecx,%esi
+	xorl	%edx,%ecx
+	psrld	$31,%xmm10
+	xorl	%edx,%esi
+	addl	%eax,%ebp
+	movdqa	%xmm9,%xmm8
+	rorl	$7,%ebx
+	addl	%esi,%ebp
+	psrld	$30,%xmm9
+	por	%xmm10,%xmm6
+	addl	44(%rsp),%edx
+	xorl	%ecx,%ebx
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	pslld	$2,%xmm8
+	pxor	%xmm9,%xmm6
+	andl	%ebx,%edi
+	xorl	%ecx,%ebx
+	movdqa	16(%r11),%xmm9
+	xorl	%ecx,%edi
+	addl	%ebp,%edx
+	pxor	%xmm8,%xmm6
+	rorl	$7,%eax
+	addl	%edi,%edx
+	movdqa	%xmm4,%xmm7
+	addl	48(%rsp),%ecx
+	xorl	%ebx,%eax
+	movdqa	%xmm6,%xmm8
+.byte	102,15,58,15,251,8
+	movl	%edx,%edi
+	roll	$5,%edx
+	paddd	%xmm6,%xmm9
+	andl	%eax,%esi
+	xorl	%ebx,%eax
+	psrldq	$4,%xmm8
+	xorl	%ebx,%esi
+	addl	%edx,%ecx
+	pxor	%xmm3,%xmm7
+	rorl	$7,%ebp
+	addl	%esi,%ecx
+	pxor	%xmm5,%xmm8
+	addl	52(%rsp),%ebx
+	xorl	%eax,%ebp
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	pxor	%xmm8,%xmm7
+	andl	%ebp,%edi
+	xorl	%eax,%ebp
+	movdqa	%xmm9,32(%rsp)
+	xorl	%eax,%edi
+	addl	%ecx,%ebx
+	movdqa	%xmm7,%xmm10
+	movdqa	%xmm7,%xmm8
+	rorl	$7,%edx
+	addl	%edi,%ebx
+	addl	56(%rsp),%eax
+	xorl	%ebp,%edx
+	pslldq	$12,%xmm10
+	paddd	%xmm7,%xmm7
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	andl	%edx,%esi
+	xorl	%ebp,%edx
+	psrld	$31,%xmm8
+	xorl	%ebp,%esi
+	addl	%ebx,%eax
+	movdqa	%xmm10,%xmm9
+	rorl	$7,%ecx
+	addl	%esi,%eax
+	psrld	$30,%xmm10
+	por	%xmm8,%xmm7
+	addl	60(%rsp),%ebp
+	xorl	%edx,%ecx
+	movl	%eax,%esi
+	roll	$5,%eax
+	pslld	$2,%xmm9
+	pxor	%xmm10,%xmm7
+	andl	%ecx,%edi
+	xorl	%edx,%ecx
+	movdqa	16(%r11),%xmm10
+	xorl	%edx,%edi
+	addl	%eax,%ebp
+	pxor	%xmm9,%xmm7
+	rorl	$7,%ebx
+	addl	%edi,%ebp
+	movdqa	%xmm7,%xmm9
+	addl	0(%rsp),%edx
+	pxor	%xmm4,%xmm0
+.byte	102,68,15,58,15,206,8
+	xorl	%ecx,%ebx
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	pxor	%xmm1,%xmm0
+	andl	%ebx,%esi
+	xorl	%ecx,%ebx
+	movdqa	%xmm10,%xmm8
+	paddd	%xmm7,%xmm10
+	xorl	%ecx,%esi
+	addl	%ebp,%edx
+	pxor	%xmm9,%xmm0
+	rorl	$7,%eax
+	addl	%esi,%edx
+	addl	4(%rsp),%ecx
+	xorl	%ebx,%eax
+	movdqa	%xmm0,%xmm9
+	movdqa	%xmm10,48(%rsp)
+	movl	%edx,%esi
+	roll	$5,%edx
+	andl	%eax,%edi
+	xorl	%ebx,%eax
+	pslld	$2,%xmm0
+	xorl	%ebx,%edi
+	addl	%edx,%ecx
+	psrld	$30,%xmm9
+	rorl	$7,%ebp
+	addl	%edi,%ecx
+	addl	8(%rsp),%ebx
+	xorl	%eax,%ebp
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	por	%xmm9,%xmm0
+	andl	%ebp,%esi
+	xorl	%eax,%ebp
+	movdqa	%xmm0,%xmm10
+	xorl	%eax,%esi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%esi,%ebx
+	addl	12(%rsp),%eax
+	xorl	%ebp,%edx
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	andl	%edx,%edi
+	xorl	%ebp,%edx
+	xorl	%ebp,%edi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%edi,%eax
+	addl	16(%rsp),%ebp
+	pxor	%xmm5,%xmm1
+.byte	102,68,15,58,15,215,8
+	xorl	%edx,%esi
+	movl	%eax,%edi
+	roll	$5,%eax
+	pxor	%xmm2,%xmm1
+	xorl	%ecx,%esi
+	addl	%eax,%ebp
+	movdqa	%xmm8,%xmm9
+	paddd	%xmm0,%xmm8
+	rorl	$7,%ebx
+	addl	%esi,%ebp
+	pxor	%xmm10,%xmm1
+	addl	20(%rsp),%edx
+	xorl	%ecx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	movdqa	%xmm1,%xmm10
+	movdqa	%xmm8,0(%rsp)
+	xorl	%ebx,%edi
+	addl	%ebp,%edx
+	rorl	$7,%eax
+	addl	%edi,%edx
+	pslld	$2,%xmm1
+	addl	24(%rsp),%ecx
+	xorl	%ebx,%esi
+	psrld	$30,%xmm10
+	movl	%edx,%edi
+	roll	$5,%edx
+	xorl	%eax,%esi
+	addl	%edx,%ecx
+	rorl	$7,%ebp
+	addl	%esi,%ecx
+	por	%xmm10,%xmm1
+	addl	28(%rsp),%ebx
+	xorl	%eax,%edi
+	movdqa	%xmm1,%xmm8
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	xorl	%ebp,%edi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%edi,%ebx
+	addl	32(%rsp),%eax
+	pxor	%xmm6,%xmm2
+.byte	102,68,15,58,15,192,8
+	xorl	%ebp,%esi
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	pxor	%xmm3,%xmm2
+	xorl	%edx,%esi
+	addl	%ebx,%eax
+	movdqa	32(%r11),%xmm10
+	paddd	%xmm1,%xmm9
+	rorl	$7,%ecx
+	addl	%esi,%eax
+	pxor	%xmm8,%xmm2
+	addl	36(%rsp),%ebp
+	xorl	%edx,%edi
+	movl	%eax,%esi
+	roll	$5,%eax
+	movdqa	%xmm2,%xmm8
+	movdqa	%xmm9,16(%rsp)
+	xorl	%ecx,%edi
+	addl	%eax,%ebp
+	rorl	$7,%ebx
+	addl	%edi,%ebp
+	pslld	$2,%xmm2
+	addl	40(%rsp),%edx
+	xorl	%ecx,%esi
+	psrld	$30,%xmm8
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	xorl	%ebx,%esi
+	addl	%ebp,%edx
+	rorl	$7,%eax
+	addl	%esi,%edx
+	por	%xmm8,%xmm2
+	addl	44(%rsp),%ecx
+	xorl	%ebx,%edi
+	movdqa	%xmm2,%xmm9
+	movl	%edx,%esi
+	roll	$5,%edx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	rorl	$7,%ebp
+	addl	%edi,%ecx
+	addl	48(%rsp),%ebx
+	pxor	%xmm7,%xmm3
+.byte	102,68,15,58,15,201,8
+	xorl	%eax,%esi
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	pxor	%xmm4,%xmm3
+	xorl	%ebp,%esi
+	addl	%ecx,%ebx
+	movdqa	%xmm10,%xmm8
+	paddd	%xmm2,%xmm10
+	rorl	$7,%edx
+	addl	%esi,%ebx
+	pxor	%xmm9,%xmm3
+	addl	52(%rsp),%eax
+	xorl	%ebp,%edi
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	movdqa	%xmm3,%xmm9
+	movdqa	%xmm10,32(%rsp)
+	xorl	%edx,%edi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%edi,%eax
+	pslld	$2,%xmm3
+	addl	56(%rsp),%ebp
+	xorl	%edx,%esi
+	psrld	$30,%xmm9
+	movl	%eax,%edi
+	roll	$5,%eax
+	xorl	%ecx,%esi
+	addl	%eax,%ebp
+	rorl	$7,%ebx
+	addl	%esi,%ebp
+	por	%xmm9,%xmm3
+	addl	60(%rsp),%edx
+	xorl	%ecx,%edi
+	movdqa	%xmm3,%xmm10
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	xorl	%ebx,%edi
+	addl	%ebp,%edx
+	rorl	$7,%eax
+	addl	%edi,%edx
+	addl	0(%rsp),%ecx
+	pxor	%xmm0,%xmm4
+.byte	102,68,15,58,15,210,8
+	xorl	%ebx,%esi
+	movl	%edx,%edi
+	roll	$5,%edx
+	pxor	%xmm5,%xmm4
+	xorl	%eax,%esi
+	addl	%edx,%ecx
+	movdqa	%xmm8,%xmm9
+	paddd	%xmm3,%xmm8
+	rorl	$7,%ebp
+	addl	%esi,%ecx
+	pxor	%xmm10,%xmm4
+	addl	4(%rsp),%ebx
+	xorl	%eax,%edi
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	movdqa	%xmm4,%xmm10
+	movdqa	%xmm8,48(%rsp)
+	xorl	%ebp,%edi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%edi,%ebx
+	pslld	$2,%xmm4
+	addl	8(%rsp),%eax
+	xorl	%ebp,%esi
+	psrld	$30,%xmm10
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	xorl	%edx,%esi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%esi,%eax
+	por	%xmm10,%xmm4
+	addl	12(%rsp),%ebp
+	xorl	%edx,%edi
+	movdqa	%xmm4,%xmm8
+	movl	%eax,%esi
+	roll	$5,%eax
+	xorl	%ecx,%edi
+	addl	%eax,%ebp
+	rorl	$7,%ebx
+	addl	%edi,%ebp
+	addl	16(%rsp),%edx
+	pxor	%xmm1,%xmm5
+.byte	102,68,15,58,15,195,8
+	xorl	%ecx,%esi
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	pxor	%xmm6,%xmm5
+	xorl	%ebx,%esi
+	addl	%ebp,%edx
+	movdqa	%xmm9,%xmm10
+	paddd	%xmm4,%xmm9
+	rorl	$7,%eax
+	addl	%esi,%edx
+	pxor	%xmm8,%xmm5
+	addl	20(%rsp),%ecx
+	xorl	%ebx,%edi
+	movl	%edx,%esi
+	roll	$5,%edx
+	movdqa	%xmm5,%xmm8
+	movdqa	%xmm9,0(%rsp)
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	rorl	$7,%ebp
+	addl	%edi,%ecx
+	pslld	$2,%xmm5
+	addl	24(%rsp),%ebx
+	xorl	%eax,%esi
+	psrld	$30,%xmm8
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	xorl	%ebp,%esi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%esi,%ebx
+	por	%xmm8,%xmm5
+	addl	28(%rsp),%eax
+	xorl	%ebp,%edi
+	movdqa	%xmm5,%xmm9
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	xorl	%edx,%edi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%edi,%eax
+	movl	%ecx,%edi
+	pxor	%xmm2,%xmm6
+.byte	102,68,15,58,15,204,8
+	xorl	%edx,%ecx
+	addl	32(%rsp),%ebp
+	andl	%edx,%edi
+	pxor	%xmm7,%xmm6
+	andl	%ecx,%esi
+	rorl	$7,%ebx
+	movdqa	%xmm10,%xmm8
+	paddd	%xmm5,%xmm10
+	addl	%edi,%ebp
+	movl	%eax,%edi
+	pxor	%xmm9,%xmm6
+	roll	$5,%eax
+	addl	%esi,%ebp
+	xorl	%edx,%ecx
+	addl	%eax,%ebp
+	movdqa	%xmm6,%xmm9
+	movdqa	%xmm10,16(%rsp)
+	movl	%ebx,%esi
+	xorl	%ecx,%ebx
+	addl	36(%rsp),%edx
+	andl	%ecx,%esi
+	pslld	$2,%xmm6
+	andl	%ebx,%edi
+	rorl	$7,%eax
+	psrld	$30,%xmm9
+	addl	%esi,%edx
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	addl	%edi,%edx
+	xorl	%ecx,%ebx
+	addl	%ebp,%edx
+	por	%xmm9,%xmm6
+	movl	%eax,%edi
+	xorl	%ebx,%eax
+	movdqa	%xmm6,%xmm10
+	addl	40(%rsp),%ecx
+	andl	%ebx,%edi
+	andl	%eax,%esi
+	rorl	$7,%ebp
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$5,%edx
+	addl	%esi,%ecx
+	xorl	%ebx,%eax
+	addl	%edx,%ecx
+	movl	%ebp,%esi
+	xorl	%eax,%ebp
+	addl	44(%rsp),%ebx
+	andl	%eax,%esi
+	andl	%ebp,%edi
+	rorl	$7,%edx
+	addl	%esi,%ebx
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	addl	%edi,%ebx
+	xorl	%eax,%ebp
+	addl	%ecx,%ebx
+	movl	%edx,%edi
+	pxor	%xmm3,%xmm7
+.byte	102,68,15,58,15,213,8
+	xorl	%ebp,%edx
+	addl	48(%rsp),%eax
+	andl	%ebp,%edi
+	pxor	%xmm0,%xmm7
+	andl	%edx,%esi
+	rorl	$7,%ecx
+	movdqa	48(%r11),%xmm9
+	paddd	%xmm6,%xmm8
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	pxor	%xmm10,%xmm7
+	roll	$5,%ebx
+	addl	%esi,%eax
+	xorl	%ebp,%edx
+	addl	%ebx,%eax
+	movdqa	%xmm7,%xmm10
+	movdqa	%xmm8,32(%rsp)
+	movl	%ecx,%esi
+	xorl	%edx,%ecx
+	addl	52(%rsp),%ebp
+	andl	%edx,%esi
+	pslld	$2,%xmm7
+	andl	%ecx,%edi
+	rorl	$7,%ebx
+	psrld	$30,%xmm10
+	addl	%esi,%ebp
+	movl	%eax,%esi
+	roll	$5,%eax
+	addl	%edi,%ebp
+	xorl	%edx,%ecx
+	addl	%eax,%ebp
+	por	%xmm10,%xmm7
+	movl	%ebx,%edi
+	xorl	%ecx,%ebx
+	movdqa	%xmm7,%xmm8
+	addl	56(%rsp),%edx
+	andl	%ecx,%edi
+	andl	%ebx,%esi
+	rorl	$7,%eax
+	addl	%edi,%edx
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	addl	%esi,%edx
+	xorl	%ecx,%ebx
+	addl	%ebp,%edx
+	movl	%eax,%esi
+	xorl	%ebx,%eax
+	addl	60(%rsp),%ecx
+	andl	%ebx,%esi
+	andl	%eax,%edi
+	rorl	$7,%ebp
+	addl	%esi,%ecx
+	movl	%edx,%esi
+	roll	$5,%edx
+	addl	%edi,%ecx
+	xorl	%ebx,%eax
+	addl	%edx,%ecx
+	movl	%ebp,%edi
+	pxor	%xmm4,%xmm0
+.byte	102,68,15,58,15,198,8
+	xorl	%eax,%ebp
+	addl	0(%rsp),%ebx
+	andl	%eax,%edi
+	pxor	%xmm1,%xmm0
+	andl	%ebp,%esi
+	rorl	$7,%edx
+	movdqa	%xmm9,%xmm10
+	paddd	%xmm7,%xmm9
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	pxor	%xmm8,%xmm0
+	roll	$5,%ecx
+	addl	%esi,%ebx
+	xorl	%eax,%ebp
+	addl	%ecx,%ebx
+	movdqa	%xmm0,%xmm8
+	movdqa	%xmm9,48(%rsp)
+	movl	%edx,%esi
+	xorl	%ebp,%edx
+	addl	4(%rsp),%eax
+	andl	%ebp,%esi
+	pslld	$2,%xmm0
+	andl	%edx,%edi
+	rorl	$7,%ecx
+	psrld	$30,%xmm8
+	addl	%esi,%eax
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	addl	%edi,%eax
+	xorl	%ebp,%edx
+	addl	%ebx,%eax
+	por	%xmm8,%xmm0
+	movl	%ecx,%edi
+	xorl	%edx,%ecx
+	movdqa	%xmm0,%xmm9
+	addl	8(%rsp),%ebp
+	andl	%edx,%edi
+	andl	%ecx,%esi
+	rorl	$7,%ebx
+	addl	%edi,%ebp
+	movl	%eax,%edi
+	roll	$5,%eax
+	addl	%esi,%ebp
+	xorl	%edx,%ecx
+	addl	%eax,%ebp
+	movl	%ebx,%esi
+	xorl	%ecx,%ebx
+	addl	12(%rsp),%edx
+	andl	%ecx,%esi
+	andl	%ebx,%edi
+	rorl	$7,%eax
+	addl	%esi,%edx
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	addl	%edi,%edx
+	xorl	%ecx,%ebx
+	addl	%ebp,%edx
+	movl	%eax,%edi
+	pxor	%xmm5,%xmm1
+.byte	102,68,15,58,15,207,8
+	xorl	%ebx,%eax
+	addl	16(%rsp),%ecx
+	andl	%ebx,%edi
+	pxor	%xmm2,%xmm1
+	andl	%eax,%esi
+	rorl	$7,%ebp
+	movdqa	%xmm10,%xmm8
+	paddd	%xmm0,%xmm10
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	pxor	%xmm9,%xmm1
+	roll	$5,%edx
+	addl	%esi,%ecx
+	xorl	%ebx,%eax
+	addl	%edx,%ecx
+	movdqa	%xmm1,%xmm9
+	movdqa	%xmm10,0(%rsp)
+	movl	%ebp,%esi
+	xorl	%eax,%ebp
+	addl	20(%rsp),%ebx
+	andl	%eax,%esi
+	pslld	$2,%xmm1
+	andl	%ebp,%edi
+	rorl	$7,%edx
+	psrld	$30,%xmm9
+	addl	%esi,%ebx
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	addl	%edi,%ebx
+	xorl	%eax,%ebp
+	addl	%ecx,%ebx
+	por	%xmm9,%xmm1
+	movl	%edx,%edi
+	xorl	%ebp,%edx
+	movdqa	%xmm1,%xmm10
+	addl	24(%rsp),%eax
+	andl	%ebp,%edi
+	andl	%edx,%esi
+	rorl	$7,%ecx
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	addl	%esi,%eax
+	xorl	%ebp,%edx
+	addl	%ebx,%eax
+	movl	%ecx,%esi
+	xorl	%edx,%ecx
+	addl	28(%rsp),%ebp
+	andl	%edx,%esi
+	andl	%ecx,%edi
+	rorl	$7,%ebx
+	addl	%esi,%ebp
+	movl	%eax,%esi
+	roll	$5,%eax
+	addl	%edi,%ebp
+	xorl	%edx,%ecx
+	addl	%eax,%ebp
+	movl	%ebx,%edi
+	pxor	%xmm6,%xmm2
+.byte	102,68,15,58,15,208,8
+	xorl	%ecx,%ebx
+	addl	32(%rsp),%edx
+	andl	%ecx,%edi
+	pxor	%xmm3,%xmm2
+	andl	%ebx,%esi
+	rorl	$7,%eax
+	movdqa	%xmm8,%xmm9
+	paddd	%xmm1,%xmm8
+	addl	%edi,%edx
+	movl	%ebp,%edi
+	pxor	%xmm10,%xmm2
+	roll	$5,%ebp
+	addl	%esi,%edx
+	xorl	%ecx,%ebx
+	addl	%ebp,%edx
+	movdqa	%xmm2,%xmm10
+	movdqa	%xmm8,16(%rsp)
+	movl	%eax,%esi
+	xorl	%ebx,%eax
+	addl	36(%rsp),%ecx
+	andl	%ebx,%esi
+	pslld	$2,%xmm2
+	andl	%eax,%edi
+	rorl	$7,%ebp
+	psrld	$30,%xmm10
+	addl	%esi,%ecx
+	movl	%edx,%esi
+	roll	$5,%edx
+	addl	%edi,%ecx
+	xorl	%ebx,%eax
+	addl	%edx,%ecx
+	por	%xmm10,%xmm2
+	movl	%ebp,%edi
+	xorl	%eax,%ebp
+	movdqa	%xmm2,%xmm8
+	addl	40(%rsp),%ebx
+	andl	%eax,%edi
+	andl	%ebp,%esi
+	rorl	$7,%edx
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	addl	%esi,%ebx
+	xorl	%eax,%ebp
+	addl	%ecx,%ebx
+	movl	%edx,%esi
+	xorl	%ebp,%edx
+	addl	44(%rsp),%eax
+	andl	%ebp,%esi
+	andl	%edx,%edi
+	rorl	$7,%ecx
+	addl	%esi,%eax
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	addl	%edi,%eax
+	xorl	%ebp,%edx
+	addl	%ebx,%eax
+	addl	48(%rsp),%ebp
+	pxor	%xmm7,%xmm3
+.byte	102,68,15,58,15,193,8
+	xorl	%edx,%esi
+	movl	%eax,%edi
+	roll	$5,%eax
+	pxor	%xmm4,%xmm3
+	xorl	%ecx,%esi
+	addl	%eax,%ebp
+	movdqa	%xmm9,%xmm10
+	paddd	%xmm2,%xmm9
+	rorl	$7,%ebx
+	addl	%esi,%ebp
+	pxor	%xmm8,%xmm3
+	addl	52(%rsp),%edx
+	xorl	%ecx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	movdqa	%xmm3,%xmm8
+	movdqa	%xmm9,32(%rsp)
+	xorl	%ebx,%edi
+	addl	%ebp,%edx
+	rorl	$7,%eax
+	addl	%edi,%edx
+	pslld	$2,%xmm3
+	addl	56(%rsp),%ecx
+	xorl	%ebx,%esi
+	psrld	$30,%xmm8
+	movl	%edx,%edi
+	roll	$5,%edx
+	xorl	%eax,%esi
+	addl	%edx,%ecx
+	rorl	$7,%ebp
+	addl	%esi,%ecx
+	por	%xmm8,%xmm3
+	addl	60(%rsp),%ebx
+	xorl	%eax,%edi
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	xorl	%ebp,%edi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%edi,%ebx
+	addl	0(%rsp),%eax
+	paddd	%xmm3,%xmm10
+	xorl	%ebp,%esi
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	xorl	%edx,%esi
+	movdqa	%xmm10,48(%rsp)
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%esi,%eax
+	addl	4(%rsp),%ebp
+	xorl	%edx,%edi
+	movl	%eax,%esi
+	roll	$5,%eax
+	xorl	%ecx,%edi
+	addl	%eax,%ebp
+	rorl	$7,%ebx
+	addl	%edi,%ebp
+	addl	8(%rsp),%edx
+	xorl	%ecx,%esi
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	xorl	%ebx,%esi
+	addl	%ebp,%edx
+	rorl	$7,%eax
+	addl	%esi,%edx
+	addl	12(%rsp),%ecx
+	xorl	%ebx,%edi
+	movl	%edx,%esi
+	roll	$5,%edx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	rorl	$7,%ebp
+	addl	%edi,%ecx
+	cmpq	%r10,%r9
+	je	.Ldone_ssse3
+	movdqa	64(%r11),%xmm6
+	movdqa	0(%r11),%xmm9
+	movdqu	0(%r9),%xmm0
+	movdqu	16(%r9),%xmm1
+	movdqu	32(%r9),%xmm2
+	movdqu	48(%r9),%xmm3
+.byte	102,15,56,0,198
+	addq	$64,%r9
+	addl	16(%rsp),%ebx
+	xorl	%eax,%esi
+.byte	102,15,56,0,206
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	paddd	%xmm9,%xmm0
+	xorl	%ebp,%esi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%esi,%ebx
+	movdqa	%xmm0,0(%rsp)
+	addl	20(%rsp),%eax
+	xorl	%ebp,%edi
+	psubd	%xmm9,%xmm0
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	xorl	%edx,%edi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%edi,%eax
+	addl	24(%rsp),%ebp
+	xorl	%edx,%esi
+	movl	%eax,%edi
+	roll	$5,%eax
+	xorl	%ecx,%esi
+	addl	%eax,%ebp
+	rorl	$7,%ebx
+	addl	%esi,%ebp
+	addl	28(%rsp),%edx
+	xorl	%ecx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	xorl	%ebx,%edi
+	addl	%ebp,%edx
+	rorl	$7,%eax
+	addl	%edi,%edx
+	addl	32(%rsp),%ecx
+	xorl	%ebx,%esi
+.byte	102,15,56,0,214
+	movl	%edx,%edi
+	roll	$5,%edx
+	paddd	%xmm9,%xmm1
+	xorl	%eax,%esi
+	addl	%edx,%ecx
+	rorl	$7,%ebp
+	addl	%esi,%ecx
+	movdqa	%xmm1,16(%rsp)
+	addl	36(%rsp),%ebx
+	xorl	%eax,%edi
+	psubd	%xmm9,%xmm1
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	xorl	%ebp,%edi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%edi,%ebx
+	addl	40(%rsp),%eax
+	xorl	%ebp,%esi
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	xorl	%edx,%esi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%esi,%eax
+	addl	44(%rsp),%ebp
+	xorl	%edx,%edi
+	movl	%eax,%esi
+	roll	$5,%eax
+	xorl	%ecx,%edi
+	addl	%eax,%ebp
+	rorl	$7,%ebx
+	addl	%edi,%ebp
+	addl	48(%rsp),%edx
+	xorl	%ecx,%esi
+.byte	102,15,56,0,222
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	paddd	%xmm9,%xmm2
+	xorl	%ebx,%esi
+	addl	%ebp,%edx
+	rorl	$7,%eax
+	addl	%esi,%edx
+	movdqa	%xmm2,32(%rsp)
+	addl	52(%rsp),%ecx
+	xorl	%ebx,%edi
+	psubd	%xmm9,%xmm2
+	movl	%edx,%esi
+	roll	$5,%edx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	rorl	$7,%ebp
+	addl	%edi,%ecx
+	addl	56(%rsp),%ebx
+	xorl	%eax,%esi
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	xorl	%ebp,%esi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%esi,%ebx
+	addl	60(%rsp),%eax
+	xorl	%ebp,%edi
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	xorl	%edx,%edi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%edi,%eax
+	addl	0(%r8),%eax
+	addl	4(%r8),%esi
+	addl	8(%r8),%ecx
+	addl	12(%r8),%edx
+	movl	%eax,0(%r8)
+	addl	16(%r8),%ebp
+	movl	%esi,4(%r8)
+	movl	%esi,%ebx
+	movl	%ecx,8(%r8)
+	movl	%edx,12(%r8)
+	movl	%ebp,16(%r8)
+	jmp	.Loop_ssse3
+
+.align	16
+.Ldone_ssse3:
+	addl	16(%rsp),%ebx
+	xorl	%eax,%esi
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	xorl	%ebp,%esi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%esi,%ebx
+	addl	20(%rsp),%eax
+	xorl	%ebp,%edi
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	xorl	%edx,%edi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%edi,%eax
+	addl	24(%rsp),%ebp
+	xorl	%edx,%esi
+	movl	%eax,%edi
+	roll	$5,%eax
+	xorl	%ecx,%esi
+	addl	%eax,%ebp
+	rorl	$7,%ebx
+	addl	%esi,%ebp
+	addl	28(%rsp),%edx
+	xorl	%ecx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	xorl	%ebx,%edi
+	addl	%ebp,%edx
+	rorl	$7,%eax
+	addl	%edi,%edx
+	addl	32(%rsp),%ecx
+	xorl	%ebx,%esi
+	movl	%edx,%edi
+	roll	$5,%edx
+	xorl	%eax,%esi
+	addl	%edx,%ecx
+	rorl	$7,%ebp
+	addl	%esi,%ecx
+	addl	36(%rsp),%ebx
+	xorl	%eax,%edi
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	xorl	%ebp,%edi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%edi,%ebx
+	addl	40(%rsp),%eax
+	xorl	%ebp,%esi
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	xorl	%edx,%esi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%esi,%eax
+	addl	44(%rsp),%ebp
+	xorl	%edx,%edi
+	movl	%eax,%esi
+	roll	$5,%eax
+	xorl	%ecx,%edi
+	addl	%eax,%ebp
+	rorl	$7,%ebx
+	addl	%edi,%ebp
+	addl	48(%rsp),%edx
+	xorl	%ecx,%esi
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	xorl	%ebx,%esi
+	addl	%ebp,%edx
+	rorl	$7,%eax
+	addl	%esi,%edx
+	addl	52(%rsp),%ecx
+	xorl	%ebx,%edi
+	movl	%edx,%esi
+	roll	$5,%edx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	rorl	$7,%ebp
+	addl	%edi,%ecx
+	addl	56(%rsp),%ebx
+	xorl	%eax,%esi
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	xorl	%ebp,%esi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%esi,%ebx
+	addl	60(%rsp),%eax
+	xorl	%ebp,%edi
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	xorl	%edx,%edi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%edi,%eax
+	addl	0(%r8),%eax
+	addl	4(%r8),%esi
+	addl	8(%r8),%ecx
+	movl	%eax,0(%r8)
+	addl	12(%r8),%edx
+	movl	%esi,4(%r8)
+	addl	16(%r8),%ebp
+	movl	%ecx,8(%r8)
+	movl	%edx,12(%r8)
+	movl	%ebp,16(%r8)
+	leaq	64(%rsp),%rsi
+	movq	0(%rsi),%r12
+	movq	8(%rsi),%rbp
+	movq	16(%rsi),%rbx
+	leaq	24(%rsi),%rsp
+.Lepilogue_ssse3:
+	retq
+.size	sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3
+.align	64
+K_XX_XX:
+.long	0x5a827999,0x5a827999,0x5a827999,0x5a827999	
+.long	0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1	
+.long	0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc	
+.long	0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6	
+.long	0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f	
+.byte	83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	64
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/ext/libressl/crypto/sha/sha1-macosx-x86_64.S b/ext/libressl/crypto/sha/sha1-macosx-x86_64.S
new file mode 100644
index 0000000..04a8aff
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha1-macosx-x86_64.S
@@ -0,0 +1,2488 @@
+#include "x86_arch.h"
+.text	
+
+.private_extern	_OPENSSL_ia32cap_P
+
+.globl	_sha1_block_data_order
+
+.p2align	4
+_sha1_block_data_order:
+	movl	_OPENSSL_ia32cap_P+0(%rip),%r9d
+	movl	_OPENSSL_ia32cap_P+4(%rip),%r8d
+	testl	$IA32CAP_MASK1_SSSE3,%r8d
+	jz	L$ialu
+	jmp	_ssse3_shortcut
+
+.p2align	4
+L$ialu:
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	movq	%rsp,%r11
+	movq	%rdi,%r8
+	subq	$72,%rsp
+	movq	%rsi,%r9
+	andq	$-64,%rsp
+	movq	%rdx,%r10
+	movq	%r11,64(%rsp)
+L$prologue:
+
+	movl	0(%r8),%esi
+	movl	4(%r8),%edi
+	movl	8(%r8),%r11d
+	movl	12(%r8),%r12d
+	movl	16(%r8),%r13d
+	jmp	L$loop
+
+.p2align	4
+L$loop:
+	movl	0(%r9),%edx
+	bswapl	%edx
+	movl	%edx,0(%rsp)
+	movl	%r11d,%eax
+	movl	4(%r9),%ebp
+	movl	%esi,%ecx
+	xorl	%r12d,%eax
+	bswapl	%ebp
+	roll	$5,%ecx
+	leal	1518500249(%rdx,%r13,1),%r13d
+	andl	%edi,%eax
+	movl	%ebp,4(%rsp)
+	addl	%ecx,%r13d
+	xorl	%r12d,%eax
+	roll	$30,%edi
+	addl	%eax,%r13d
+	movl	%edi,%eax
+	movl	8(%r9),%edx
+	movl	%r13d,%ecx
+	xorl	%r11d,%eax
+	bswapl	%edx
+	roll	$5,%ecx
+	leal	1518500249(%rbp,%r12,1),%r12d
+	andl	%esi,%eax
+	movl	%edx,8(%rsp)
+	addl	%ecx,%r12d
+	xorl	%r11d,%eax
+	roll	$30,%esi
+	addl	%eax,%r12d
+	movl	%esi,%eax
+	movl	12(%r9),%ebp
+	movl	%r12d,%ecx
+	xorl	%edi,%eax
+	bswapl	%ebp
+	roll	$5,%ecx
+	leal	1518500249(%rdx,%r11,1),%r11d
+	andl	%r13d,%eax
+	movl	%ebp,12(%rsp)
+	addl	%ecx,%r11d
+	xorl	%edi,%eax
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	movl	%r13d,%eax
+	movl	16(%r9),%edx
+	movl	%r11d,%ecx
+	xorl	%esi,%eax
+	bswapl	%edx
+	roll	$5,%ecx
+	leal	1518500249(%rbp,%rdi,1),%edi
+	andl	%r12d,%eax
+	movl	%edx,16(%rsp)
+	addl	%ecx,%edi
+	xorl	%esi,%eax
+	roll	$30,%r12d
+	addl	%eax,%edi
+	movl	%r12d,%eax
+	movl	20(%r9),%ebp
+	movl	%edi,%ecx
+	xorl	%r13d,%eax
+	bswapl	%ebp
+	roll	$5,%ecx
+	leal	1518500249(%rdx,%rsi,1),%esi
+	andl	%r11d,%eax
+	movl	%ebp,20(%rsp)
+	addl	%ecx,%esi
+	xorl	%r13d,%eax
+	roll	$30,%r11d
+	addl	%eax,%esi
+	movl	%r11d,%eax
+	movl	24(%r9),%edx
+	movl	%esi,%ecx
+	xorl	%r12d,%eax
+	bswapl	%edx
+	roll	$5,%ecx
+	leal	1518500249(%rbp,%r13,1),%r13d
+	andl	%edi,%eax
+	movl	%edx,24(%rsp)
+	addl	%ecx,%r13d
+	xorl	%r12d,%eax
+	roll	$30,%edi
+	addl	%eax,%r13d
+	movl	%edi,%eax
+	movl	28(%r9),%ebp
+	movl	%r13d,%ecx
+	xorl	%r11d,%eax
+	bswapl	%ebp
+	roll	$5,%ecx
+	leal	1518500249(%rdx,%r12,1),%r12d
+	andl	%esi,%eax
+	movl	%ebp,28(%rsp)
+	addl	%ecx,%r12d
+	xorl	%r11d,%eax
+	roll	$30,%esi
+	addl	%eax,%r12d
+	movl	%esi,%eax
+	movl	32(%r9),%edx
+	movl	%r12d,%ecx
+	xorl	%edi,%eax
+	bswapl	%edx
+	roll	$5,%ecx
+	leal	1518500249(%rbp,%r11,1),%r11d
+	andl	%r13d,%eax
+	movl	%edx,32(%rsp)
+	addl	%ecx,%r11d
+	xorl	%edi,%eax
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	movl	%r13d,%eax
+	movl	36(%r9),%ebp
+	movl	%r11d,%ecx
+	xorl	%esi,%eax
+	bswapl	%ebp
+	roll	$5,%ecx
+	leal	1518500249(%rdx,%rdi,1),%edi
+	andl	%r12d,%eax
+	movl	%ebp,36(%rsp)
+	addl	%ecx,%edi
+	xorl	%esi,%eax
+	roll	$30,%r12d
+	addl	%eax,%edi
+	movl	%r12d,%eax
+	movl	40(%r9),%edx
+	movl	%edi,%ecx
+	xorl	%r13d,%eax
+	bswapl	%edx
+	roll	$5,%ecx
+	leal	1518500249(%rbp,%rsi,1),%esi
+	andl	%r11d,%eax
+	movl	%edx,40(%rsp)
+	addl	%ecx,%esi
+	xorl	%r13d,%eax
+	roll	$30,%r11d
+	addl	%eax,%esi
+	movl	%r11d,%eax
+	movl	44(%r9),%ebp
+	movl	%esi,%ecx
+	xorl	%r12d,%eax
+	bswapl	%ebp
+	roll	$5,%ecx
+	leal	1518500249(%rdx,%r13,1),%r13d
+	andl	%edi,%eax
+	movl	%ebp,44(%rsp)
+	addl	%ecx,%r13d
+	xorl	%r12d,%eax
+	roll	$30,%edi
+	addl	%eax,%r13d
+	movl	%edi,%eax
+	movl	48(%r9),%edx
+	movl	%r13d,%ecx
+	xorl	%r11d,%eax
+	bswapl	%edx
+	roll	$5,%ecx
+	leal	1518500249(%rbp,%r12,1),%r12d
+	andl	%esi,%eax
+	movl	%edx,48(%rsp)
+	addl	%ecx,%r12d
+	xorl	%r11d,%eax
+	roll	$30,%esi
+	addl	%eax,%r12d
+	movl	%esi,%eax
+	movl	52(%r9),%ebp
+	movl	%r12d,%ecx
+	xorl	%edi,%eax
+	bswapl	%ebp
+	roll	$5,%ecx
+	leal	1518500249(%rdx,%r11,1),%r11d
+	andl	%r13d,%eax
+	movl	%ebp,52(%rsp)
+	addl	%ecx,%r11d
+	xorl	%edi,%eax
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	movl	%r13d,%eax
+	movl	56(%r9),%edx
+	movl	%r11d,%ecx
+	xorl	%esi,%eax
+	bswapl	%edx
+	roll	$5,%ecx
+	leal	1518500249(%rbp,%rdi,1),%edi
+	andl	%r12d,%eax
+	movl	%edx,56(%rsp)
+	addl	%ecx,%edi
+	xorl	%esi,%eax
+	roll	$30,%r12d
+	addl	%eax,%edi
+	movl	%r12d,%eax
+	movl	60(%r9),%ebp
+	movl	%edi,%ecx
+	xorl	%r13d,%eax
+	bswapl	%ebp
+	roll	$5,%ecx
+	leal	1518500249(%rdx,%rsi,1),%esi
+	andl	%r11d,%eax
+	movl	%ebp,60(%rsp)
+	addl	%ecx,%esi
+	xorl	%r13d,%eax
+	roll	$30,%r11d
+	addl	%eax,%esi
+	movl	0(%rsp),%edx
+	movl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	8(%rsp),%edx
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	xorl	32(%rsp),%edx
+	andl	%edi,%eax
+	leal	1518500249(%rbp,%r13,1),%r13d
+	xorl	52(%rsp),%edx
+	xorl	%r12d,%eax
+	roll	$1,%edx
+	addl	%ecx,%r13d
+	roll	$30,%edi
+	movl	%edx,0(%rsp)
+	addl	%eax,%r13d
+	movl	4(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	12(%rsp),%ebp
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	xorl	36(%rsp),%ebp
+	andl	%esi,%eax
+	leal	1518500249(%rdx,%r12,1),%r12d
+	xorl	56(%rsp),%ebp
+	xorl	%r11d,%eax
+	roll	$1,%ebp
+	addl	%ecx,%r12d
+	roll	$30,%esi
+	movl	%ebp,4(%rsp)
+	addl	%eax,%r12d
+	movl	8(%rsp),%edx
+	movl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	16(%rsp),%edx
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	xorl	40(%rsp),%edx
+	andl	%r13d,%eax
+	leal	1518500249(%rbp,%r11,1),%r11d
+	xorl	60(%rsp),%edx
+	xorl	%edi,%eax
+	roll	$1,%edx
+	addl	%ecx,%r11d
+	roll	$30,%r13d
+	movl	%edx,8(%rsp)
+	addl	%eax,%r11d
+	movl	12(%rsp),%ebp
+	movl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	20(%rsp),%ebp
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	xorl	44(%rsp),%ebp
+	andl	%r12d,%eax
+	leal	1518500249(%rdx,%rdi,1),%edi
+	xorl	0(%rsp),%ebp
+	xorl	%esi,%eax
+	roll	$1,%ebp
+	addl	%ecx,%edi
+	roll	$30,%r12d
+	movl	%ebp,12(%rsp)
+	addl	%eax,%edi
+	movl	16(%rsp),%edx
+	movl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	24(%rsp),%edx
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	xorl	48(%rsp),%edx
+	andl	%r11d,%eax
+	leal	1518500249(%rbp,%rsi,1),%esi
+	xorl	4(%rsp),%edx
+	xorl	%r13d,%eax
+	roll	$1,%edx
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	movl	%edx,16(%rsp)
+	addl	%eax,%esi
+	movl	20(%rsp),%ebp
+	movl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	28(%rsp),%ebp
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%r13,1),%r13d
+	xorl	52(%rsp),%ebp
+	xorl	%r12d,%eax
+	addl	%ecx,%r13d
+	xorl	8(%rsp),%ebp
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%ebp
+	movl	%ebp,20(%rsp)
+	movl	24(%rsp),%edx
+	movl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	32(%rsp),%edx
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%r12,1),%r12d
+	xorl	56(%rsp),%edx
+	xorl	%r11d,%eax
+	addl	%ecx,%r12d
+	xorl	12(%rsp),%edx
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%edx
+	movl	%edx,24(%rsp)
+	movl	28(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	36(%rsp),%ebp
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%r11,1),%r11d
+	xorl	60(%rsp),%ebp
+	xorl	%edi,%eax
+	addl	%ecx,%r11d
+	xorl	16(%rsp),%ebp
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%ebp
+	movl	%ebp,28(%rsp)
+	movl	32(%rsp),%edx
+	movl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	40(%rsp),%edx
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%rdi,1),%edi
+	xorl	0(%rsp),%edx
+	xorl	%esi,%eax
+	addl	%ecx,%edi
+	xorl	20(%rsp),%edx
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%edx
+	movl	%edx,32(%rsp)
+	movl	36(%rsp),%ebp
+	movl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	44(%rsp),%ebp
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%rsi,1),%esi
+	xorl	4(%rsp),%ebp
+	xorl	%r13d,%eax
+	addl	%ecx,%esi
+	xorl	24(%rsp),%ebp
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%ebp
+	movl	%ebp,36(%rsp)
+	movl	40(%rsp),%edx
+	movl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	48(%rsp),%edx
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%r13,1),%r13d
+	xorl	8(%rsp),%edx
+	xorl	%r12d,%eax
+	addl	%ecx,%r13d
+	xorl	28(%rsp),%edx
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%edx
+	movl	%edx,40(%rsp)
+	movl	44(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	52(%rsp),%ebp
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%r12,1),%r12d
+	xorl	12(%rsp),%ebp
+	xorl	%r11d,%eax
+	addl	%ecx,%r12d
+	xorl	32(%rsp),%ebp
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%ebp
+	movl	%ebp,44(%rsp)
+	movl	48(%rsp),%edx
+	movl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	56(%rsp),%edx
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%r11,1),%r11d
+	xorl	16(%rsp),%edx
+	xorl	%edi,%eax
+	addl	%ecx,%r11d
+	xorl	36(%rsp),%edx
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%edx
+	movl	%edx,48(%rsp)
+	movl	52(%rsp),%ebp
+	movl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	60(%rsp),%ebp
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%rdi,1),%edi
+	xorl	20(%rsp),%ebp
+	xorl	%esi,%eax
+	addl	%ecx,%edi
+	xorl	40(%rsp),%ebp
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%ebp
+	movl	%ebp,52(%rsp)
+	movl	56(%rsp),%edx
+	movl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	0(%rsp),%edx
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%rsi,1),%esi
+	xorl	24(%rsp),%edx
+	xorl	%r13d,%eax
+	addl	%ecx,%esi
+	xorl	44(%rsp),%edx
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%edx
+	movl	%edx,56(%rsp)
+	movl	60(%rsp),%ebp
+	movl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	4(%rsp),%ebp
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%r13,1),%r13d
+	xorl	28(%rsp),%ebp
+	xorl	%r12d,%eax
+	addl	%ecx,%r13d
+	xorl	48(%rsp),%ebp
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%ebp
+	movl	%ebp,60(%rsp)
+	movl	0(%rsp),%edx
+	movl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	8(%rsp),%edx
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%r12,1),%r12d
+	xorl	32(%rsp),%edx
+	xorl	%r11d,%eax
+	addl	%ecx,%r12d
+	xorl	52(%rsp),%edx
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%edx
+	movl	%edx,0(%rsp)
+	movl	4(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	12(%rsp),%ebp
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%r11,1),%r11d
+	xorl	36(%rsp),%ebp
+	xorl	%edi,%eax
+	addl	%ecx,%r11d
+	xorl	56(%rsp),%ebp
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%ebp
+	movl	%ebp,4(%rsp)
+	movl	8(%rsp),%edx
+	movl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	16(%rsp),%edx
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%rdi,1),%edi
+	xorl	40(%rsp),%edx
+	xorl	%esi,%eax
+	addl	%ecx,%edi
+	xorl	60(%rsp),%edx
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%edx
+	movl	%edx,8(%rsp)
+	movl	12(%rsp),%ebp
+	movl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	20(%rsp),%ebp
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%rsi,1),%esi
+	xorl	44(%rsp),%ebp
+	xorl	%r13d,%eax
+	addl	%ecx,%esi
+	xorl	0(%rsp),%ebp
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%ebp
+	movl	%ebp,12(%rsp)
+	movl	16(%rsp),%edx
+	movl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	24(%rsp),%edx
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%r13,1),%r13d
+	xorl	48(%rsp),%edx
+	xorl	%r12d,%eax
+	addl	%ecx,%r13d
+	xorl	4(%rsp),%edx
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%edx
+	movl	%edx,16(%rsp)
+	movl	20(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	28(%rsp),%ebp
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%r12,1),%r12d
+	xorl	52(%rsp),%ebp
+	xorl	%r11d,%eax
+	addl	%ecx,%r12d
+	xorl	8(%rsp),%ebp
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%ebp
+	movl	%ebp,20(%rsp)
+	movl	24(%rsp),%edx
+	movl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	32(%rsp),%edx
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%r11,1),%r11d
+	xorl	56(%rsp),%edx
+	xorl	%edi,%eax
+	addl	%ecx,%r11d
+	xorl	12(%rsp),%edx
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%edx
+	movl	%edx,24(%rsp)
+	movl	28(%rsp),%ebp
+	movl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	36(%rsp),%ebp
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%rdi,1),%edi
+	xorl	60(%rsp),%ebp
+	xorl	%esi,%eax
+	addl	%ecx,%edi
+	xorl	16(%rsp),%ebp
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%ebp
+	movl	%ebp,28(%rsp)
+	movl	32(%rsp),%edx
+	movl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	40(%rsp),%edx
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%rsi,1),%esi
+	xorl	0(%rsp),%edx
+	xorl	%r13d,%eax
+	addl	%ecx,%esi
+	xorl	20(%rsp),%edx
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%edx
+	movl	%edx,32(%rsp)
+	movl	36(%rsp),%ebp
+	movl	%r11d,%eax
+	movl	%r11d,%ebx
+	xorl	44(%rsp),%ebp
+	andl	%r12d,%eax
+	movl	%esi,%ecx
+	xorl	4(%rsp),%ebp
+	xorl	%r12d,%ebx
+	leal	-1894007588(%rdx,%r13,1),%r13d
+	roll	$5,%ecx
+	xorl	24(%rsp),%ebp
+	addl	%eax,%r13d
+	andl	%edi,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%r13d
+	roll	$30,%edi
+	movl	%ebp,36(%rsp)
+	addl	%ecx,%r13d
+	movl	40(%rsp),%edx
+	movl	%edi,%eax
+	movl	%edi,%ebx
+	xorl	48(%rsp),%edx
+	andl	%r11d,%eax
+	movl	%r13d,%ecx
+	xorl	8(%rsp),%edx
+	xorl	%r11d,%ebx
+	leal	-1894007588(%rbp,%r12,1),%r12d
+	roll	$5,%ecx
+	xorl	28(%rsp),%edx
+	addl	%eax,%r12d
+	andl	%esi,%ebx
+	roll	$1,%edx
+	addl	%ebx,%r12d
+	roll	$30,%esi
+	movl	%edx,40(%rsp)
+	addl	%ecx,%r12d
+	movl	44(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%esi,%ebx
+	xorl	52(%rsp),%ebp
+	andl	%edi,%eax
+	movl	%r12d,%ecx
+	xorl	12(%rsp),%ebp
+	xorl	%edi,%ebx
+	leal	-1894007588(%rdx,%r11,1),%r11d
+	roll	$5,%ecx
+	xorl	32(%rsp),%ebp
+	addl	%eax,%r11d
+	andl	%r13d,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%r11d
+	roll	$30,%r13d
+	movl	%ebp,44(%rsp)
+	addl	%ecx,%r11d
+	movl	48(%rsp),%edx
+	movl	%r13d,%eax
+	movl	%r13d,%ebx
+	xorl	56(%rsp),%edx
+	andl	%esi,%eax
+	movl	%r11d,%ecx
+	xorl	16(%rsp),%edx
+	xorl	%esi,%ebx
+	leal	-1894007588(%rbp,%rdi,1),%edi
+	roll	$5,%ecx
+	xorl	36(%rsp),%edx
+	addl	%eax,%edi
+	andl	%r12d,%ebx
+	roll	$1,%edx
+	addl	%ebx,%edi
+	roll	$30,%r12d
+	movl	%edx,48(%rsp)
+	addl	%ecx,%edi
+	movl	52(%rsp),%ebp
+	movl	%r12d,%eax
+	movl	%r12d,%ebx
+	xorl	60(%rsp),%ebp
+	andl	%r13d,%eax
+	movl	%edi,%ecx
+	xorl	20(%rsp),%ebp
+	xorl	%r13d,%ebx
+	leal	-1894007588(%rdx,%rsi,1),%esi
+	roll	$5,%ecx
+	xorl	40(%rsp),%ebp
+	addl	%eax,%esi
+	andl	%r11d,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%esi
+	roll	$30,%r11d
+	movl	%ebp,52(%rsp)
+	addl	%ecx,%esi
+	movl	56(%rsp),%edx
+	movl	%r11d,%eax
+	movl	%r11d,%ebx
+	xorl	0(%rsp),%edx
+	andl	%r12d,%eax
+	movl	%esi,%ecx
+	xorl	24(%rsp),%edx
+	xorl	%r12d,%ebx
+	leal	-1894007588(%rbp,%r13,1),%r13d
+	roll	$5,%ecx
+	xorl	44(%rsp),%edx
+	addl	%eax,%r13d
+	andl	%edi,%ebx
+	roll	$1,%edx
+	addl	%ebx,%r13d
+	roll	$30,%edi
+	movl	%edx,56(%rsp)
+	addl	%ecx,%r13d
+	movl	60(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%edi,%ebx
+	xorl	4(%rsp),%ebp
+	andl	%r11d,%eax
+	movl	%r13d,%ecx
+	xorl	28(%rsp),%ebp
+	xorl	%r11d,%ebx
+	leal	-1894007588(%rdx,%r12,1),%r12d
+	roll	$5,%ecx
+	xorl	48(%rsp),%ebp
+	addl	%eax,%r12d
+	andl	%esi,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%r12d
+	roll	$30,%esi
+	movl	%ebp,60(%rsp)
+	addl	%ecx,%r12d
+	movl	0(%rsp),%edx
+	movl	%esi,%eax
+	movl	%esi,%ebx
+	xorl	8(%rsp),%edx
+	andl	%edi,%eax
+	movl	%r12d,%ecx
+	xorl	32(%rsp),%edx
+	xorl	%edi,%ebx
+	leal	-1894007588(%rbp,%r11,1),%r11d
+	roll	$5,%ecx
+	xorl	52(%rsp),%edx
+	addl	%eax,%r11d
+	andl	%r13d,%ebx
+	roll	$1,%edx
+	addl	%ebx,%r11d
+	roll	$30,%r13d
+	movl	%edx,0(%rsp)
+	addl	%ecx,%r11d
+	movl	4(%rsp),%ebp
+	movl	%r13d,%eax
+	movl	%r13d,%ebx
+	xorl	12(%rsp),%ebp
+	andl	%esi,%eax
+	movl	%r11d,%ecx
+	xorl	36(%rsp),%ebp
+	xorl	%esi,%ebx
+	leal	-1894007588(%rdx,%rdi,1),%edi
+	roll	$5,%ecx
+	xorl	56(%rsp),%ebp
+	addl	%eax,%edi
+	andl	%r12d,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%edi
+	roll	$30,%r12d
+	movl	%ebp,4(%rsp)
+	addl	%ecx,%edi
+	movl	8(%rsp),%edx
+	movl	%r12d,%eax
+	movl	%r12d,%ebx
+	xorl	16(%rsp),%edx
+	andl	%r13d,%eax
+	movl	%edi,%ecx
+	xorl	40(%rsp),%edx
+	xorl	%r13d,%ebx
+	leal	-1894007588(%rbp,%rsi,1),%esi
+	roll	$5,%ecx
+	xorl	60(%rsp),%edx
+	addl	%eax,%esi
+	andl	%r11d,%ebx
+	roll	$1,%edx
+	addl	%ebx,%esi
+	roll	$30,%r11d
+	movl	%edx,8(%rsp)
+	addl	%ecx,%esi
+	movl	12(%rsp),%ebp
+	movl	%r11d,%eax
+	movl	%r11d,%ebx
+	xorl	20(%rsp),%ebp
+	andl	%r12d,%eax
+	movl	%esi,%ecx
+	xorl	44(%rsp),%ebp
+	xorl	%r12d,%ebx
+	leal	-1894007588(%rdx,%r13,1),%r13d
+	roll	$5,%ecx
+	xorl	0(%rsp),%ebp
+	addl	%eax,%r13d
+	andl	%edi,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%r13d
+	roll	$30,%edi
+	movl	%ebp,12(%rsp)
+	addl	%ecx,%r13d
+	movl	16(%rsp),%edx
+	movl	%edi,%eax
+	movl	%edi,%ebx
+	xorl	24(%rsp),%edx
+	andl	%r11d,%eax
+	movl	%r13d,%ecx
+	xorl	48(%rsp),%edx
+	xorl	%r11d,%ebx
+	leal	-1894007588(%rbp,%r12,1),%r12d
+	roll	$5,%ecx
+	xorl	4(%rsp),%edx
+	addl	%eax,%r12d
+	andl	%esi,%ebx
+	roll	$1,%edx
+	addl	%ebx,%r12d
+	roll	$30,%esi
+	movl	%edx,16(%rsp)
+	addl	%ecx,%r12d
+	movl	20(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%esi,%ebx
+	xorl	28(%rsp),%ebp
+	andl	%edi,%eax
+	movl	%r12d,%ecx
+	xorl	52(%rsp),%ebp
+	xorl	%edi,%ebx
+	leal	-1894007588(%rdx,%r11,1),%r11d
+	roll	$5,%ecx
+	xorl	8(%rsp),%ebp
+	addl	%eax,%r11d
+	andl	%r13d,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%r11d
+	roll	$30,%r13d
+	movl	%ebp,20(%rsp)
+	addl	%ecx,%r11d
+	movl	24(%rsp),%edx
+	movl	%r13d,%eax
+	movl	%r13d,%ebx
+	xorl	32(%rsp),%edx
+	andl	%esi,%eax
+	movl	%r11d,%ecx
+	xorl	56(%rsp),%edx
+	xorl	%esi,%ebx
+	leal	-1894007588(%rbp,%rdi,1),%edi
+	roll	$5,%ecx
+	xorl	12(%rsp),%edx
+	addl	%eax,%edi
+	andl	%r12d,%ebx
+	roll	$1,%edx
+	addl	%ebx,%edi
+	roll	$30,%r12d
+	movl	%edx,24(%rsp)
+	addl	%ecx,%edi
+	movl	28(%rsp),%ebp
+	movl	%r12d,%eax
+	movl	%r12d,%ebx
+	xorl	36(%rsp),%ebp
+	andl	%r13d,%eax
+	movl	%edi,%ecx
+	xorl	60(%rsp),%ebp
+	xorl	%r13d,%ebx
+	leal	-1894007588(%rdx,%rsi,1),%esi
+	roll	$5,%ecx
+	xorl	16(%rsp),%ebp
+	addl	%eax,%esi
+	andl	%r11d,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%esi
+	roll	$30,%r11d
+	movl	%ebp,28(%rsp)
+	addl	%ecx,%esi
+	movl	32(%rsp),%edx
+	movl	%r11d,%eax
+	movl	%r11d,%ebx
+	xorl	40(%rsp),%edx
+	andl	%r12d,%eax
+	movl	%esi,%ecx
+	xorl	0(%rsp),%edx
+	xorl	%r12d,%ebx
+	leal	-1894007588(%rbp,%r13,1),%r13d
+	roll	$5,%ecx
+	xorl	20(%rsp),%edx
+	addl	%eax,%r13d
+	andl	%edi,%ebx
+	roll	$1,%edx
+	addl	%ebx,%r13d
+	roll	$30,%edi
+	movl	%edx,32(%rsp)
+	addl	%ecx,%r13d
+	movl	36(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%edi,%ebx
+	xorl	44(%rsp),%ebp
+	andl	%r11d,%eax
+	movl	%r13d,%ecx
+	xorl	4(%rsp),%ebp
+	xorl	%r11d,%ebx
+	leal	-1894007588(%rdx,%r12,1),%r12d
+	roll	$5,%ecx
+	xorl	24(%rsp),%ebp
+	addl	%eax,%r12d
+	andl	%esi,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%r12d
+	roll	$30,%esi
+	movl	%ebp,36(%rsp)
+	addl	%ecx,%r12d
+	movl	40(%rsp),%edx
+	movl	%esi,%eax
+	movl	%esi,%ebx
+	xorl	48(%rsp),%edx
+	andl	%edi,%eax
+	movl	%r12d,%ecx
+	xorl	8(%rsp),%edx
+	xorl	%edi,%ebx
+	leal	-1894007588(%rbp,%r11,1),%r11d
+	roll	$5,%ecx
+	xorl	28(%rsp),%edx
+	addl	%eax,%r11d
+	andl	%r13d,%ebx
+	roll	$1,%edx
+	addl	%ebx,%r11d
+	roll	$30,%r13d
+	movl	%edx,40(%rsp)
+	addl	%ecx,%r11d
+	movl	44(%rsp),%ebp
+	movl	%r13d,%eax
+	movl	%r13d,%ebx
+	xorl	52(%rsp),%ebp
+	andl	%esi,%eax
+	movl	%r11d,%ecx
+	xorl	12(%rsp),%ebp
+	xorl	%esi,%ebx
+	leal	-1894007588(%rdx,%rdi,1),%edi
+	roll	$5,%ecx
+	xorl	32(%rsp),%ebp
+	addl	%eax,%edi
+	andl	%r12d,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%edi
+	roll	$30,%r12d
+	movl	%ebp,44(%rsp)
+	addl	%ecx,%edi
+	movl	48(%rsp),%edx
+	movl	%r12d,%eax
+	movl	%r12d,%ebx
+	xorl	56(%rsp),%edx
+	andl	%r13d,%eax
+	movl	%edi,%ecx
+	xorl	16(%rsp),%edx
+	xorl	%r13d,%ebx
+	leal	-1894007588(%rbp,%rsi,1),%esi
+	roll	$5,%ecx
+	xorl	36(%rsp),%edx
+	addl	%eax,%esi
+	andl	%r11d,%ebx
+	roll	$1,%edx
+	addl	%ebx,%esi
+	roll	$30,%r11d
+	movl	%edx,48(%rsp)
+	addl	%ecx,%esi
+	movl	52(%rsp),%ebp
+	movl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	60(%rsp),%ebp
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%r13,1),%r13d
+	xorl	20(%rsp),%ebp
+	xorl	%r12d,%eax
+	addl	%ecx,%r13d
+	xorl	40(%rsp),%ebp
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%ebp
+	movl	%ebp,52(%rsp)
+	movl	56(%rsp),%edx
+	movl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	0(%rsp),%edx
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rbp,%r12,1),%r12d
+	xorl	24(%rsp),%edx
+	xorl	%r11d,%eax
+	addl	%ecx,%r12d
+	xorl	44(%rsp),%edx
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%edx
+	movl	%edx,56(%rsp)
+	movl	60(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	4(%rsp),%ebp
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%r11,1),%r11d
+	xorl	28(%rsp),%ebp
+	xorl	%edi,%eax
+	addl	%ecx,%r11d
+	xorl	48(%rsp),%ebp
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%ebp
+	movl	%ebp,60(%rsp)
+	movl	0(%rsp),%edx
+	movl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	8(%rsp),%edx
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rbp,%rdi,1),%edi
+	xorl	32(%rsp),%edx
+	xorl	%esi,%eax
+	addl	%ecx,%edi
+	xorl	52(%rsp),%edx
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%edx
+	movl	%edx,0(%rsp)
+	movl	4(%rsp),%ebp
+	movl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	12(%rsp),%ebp
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%rsi,1),%esi
+	xorl	36(%rsp),%ebp
+	xorl	%r13d,%eax
+	addl	%ecx,%esi
+	xorl	56(%rsp),%ebp
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%ebp
+	movl	%ebp,4(%rsp)
+	movl	8(%rsp),%edx
+	movl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	16(%rsp),%edx
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rbp,%r13,1),%r13d
+	xorl	40(%rsp),%edx
+	xorl	%r12d,%eax
+	addl	%ecx,%r13d
+	xorl	60(%rsp),%edx
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%edx
+	movl	%edx,8(%rsp)
+	movl	12(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	20(%rsp),%ebp
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%r12,1),%r12d
+	xorl	44(%rsp),%ebp
+	xorl	%r11d,%eax
+	addl	%ecx,%r12d
+	xorl	0(%rsp),%ebp
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%ebp
+	movl	%ebp,12(%rsp)
+	movl	16(%rsp),%edx
+	movl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	24(%rsp),%edx
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rbp,%r11,1),%r11d
+	xorl	48(%rsp),%edx
+	xorl	%edi,%eax
+	addl	%ecx,%r11d
+	xorl	4(%rsp),%edx
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%edx
+	movl	%edx,16(%rsp)
+	movl	20(%rsp),%ebp
+	movl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	28(%rsp),%ebp
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%rdi,1),%edi
+	xorl	52(%rsp),%ebp
+	xorl	%esi,%eax
+	addl	%ecx,%edi
+	xorl	8(%rsp),%ebp
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%ebp
+	movl	%ebp,20(%rsp)
+	movl	24(%rsp),%edx
+	movl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	32(%rsp),%edx
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rbp,%rsi,1),%esi
+	xorl	56(%rsp),%edx
+	xorl	%r13d,%eax
+	addl	%ecx,%esi
+	xorl	12(%rsp),%edx
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%edx
+	movl	%edx,24(%rsp)
+	movl	28(%rsp),%ebp
+	movl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	36(%rsp),%ebp
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%r13,1),%r13d
+	xorl	60(%rsp),%ebp
+	xorl	%r12d,%eax
+	addl	%ecx,%r13d
+	xorl	16(%rsp),%ebp
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%ebp
+	movl	%ebp,28(%rsp)
+	movl	32(%rsp),%edx
+	movl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	40(%rsp),%edx
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rbp,%r12,1),%r12d
+	xorl	0(%rsp),%edx
+	xorl	%r11d,%eax
+	addl	%ecx,%r12d
+	xorl	20(%rsp),%edx
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%edx
+	movl	%edx,32(%rsp)
+	movl	36(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	44(%rsp),%ebp
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%r11,1),%r11d
+	xorl	4(%rsp),%ebp
+	xorl	%edi,%eax
+	addl	%ecx,%r11d
+	xorl	24(%rsp),%ebp
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%ebp
+	movl	%ebp,36(%rsp)
+	movl	40(%rsp),%edx
+	movl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	48(%rsp),%edx
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rbp,%rdi,1),%edi
+	xorl	8(%rsp),%edx
+	xorl	%esi,%eax
+	addl	%ecx,%edi
+	xorl	28(%rsp),%edx
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%edx
+	movl	%edx,40(%rsp)
+	movl	44(%rsp),%ebp
+	movl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	52(%rsp),%ebp
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%rsi,1),%esi
+	xorl	12(%rsp),%ebp
+	xorl	%r13d,%eax
+	addl	%ecx,%esi
+	xorl	32(%rsp),%ebp
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%ebp
+	movl	%ebp,44(%rsp)
+	movl	48(%rsp),%edx
+	movl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	56(%rsp),%edx
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rbp,%r13,1),%r13d
+	xorl	16(%rsp),%edx
+	xorl	%r12d,%eax
+	addl	%ecx,%r13d
+	xorl	36(%rsp),%edx
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%edx
+	movl	%edx,48(%rsp)
+	movl	52(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	60(%rsp),%ebp
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%r12,1),%r12d
+	xorl	20(%rsp),%ebp
+	xorl	%r11d,%eax
+	addl	%ecx,%r12d
+	xorl	40(%rsp),%ebp
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%ebp
+	movl	56(%rsp),%edx
+	movl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	0(%rsp),%edx
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rbp,%r11,1),%r11d
+	xorl	24(%rsp),%edx
+	xorl	%edi,%eax
+	addl	%ecx,%r11d
+	xorl	44(%rsp),%edx
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%edx
+	movl	60(%rsp),%ebp
+	movl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	4(%rsp),%ebp
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%rdi,1),%edi
+	xorl	28(%rsp),%ebp
+	xorl	%esi,%eax
+	addl	%ecx,%edi
+	xorl	48(%rsp),%ebp
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%ebp
+	movl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	%r11d,%eax
+	leal	-899497514(%rbp,%rsi,1),%esi
+	roll	$5,%ecx
+	xorl	%r13d,%eax
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	addl	%eax,%esi
+	addl	0(%r8),%esi
+	addl	4(%r8),%edi
+	addl	8(%r8),%r11d
+	addl	12(%r8),%r12d
+	addl	16(%r8),%r13d
+	movl	%esi,0(%r8)
+	movl	%edi,4(%r8)
+	movl	%r11d,8(%r8)
+	movl	%r12d,12(%r8)
+	movl	%r13d,16(%r8)
+
+	subq	$1,%r10
+	leaq	64(%r9),%r9
+	jnz	L$loop
+
+	movq	64(%rsp),%rsi
+	movq	(%rsi),%r13
+	movq	8(%rsi),%r12
+	movq	16(%rsi),%rbp
+	movq	24(%rsi),%rbx
+	leaq	32(%rsi),%rsp
+L$epilogue:
+	retq
+
+
+.p2align	4
+sha1_block_data_order_ssse3:
+_ssse3_shortcut:
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	leaq	-64(%rsp),%rsp
+	movq	%rdi,%r8
+	movq	%rsi,%r9
+	movq	%rdx,%r10
+
+	shlq	$6,%r10
+	addq	%r9,%r10
+	leaq	K_XX_XX(%rip),%r11
+
+	movl	0(%r8),%eax
+	movl	4(%r8),%ebx
+	movl	8(%r8),%ecx
+	movl	12(%r8),%edx
+	movl	%ebx,%esi
+	movl	16(%r8),%ebp
+
+	movdqa	64(%r11),%xmm6
+	movdqa	0(%r11),%xmm9
+	movdqu	0(%r9),%xmm0
+	movdqu	16(%r9),%xmm1
+	movdqu	32(%r9),%xmm2
+	movdqu	48(%r9),%xmm3
+.byte	102,15,56,0,198
+	addq	$64,%r9
+.byte	102,15,56,0,206
+.byte	102,15,56,0,214
+.byte	102,15,56,0,222
+	paddd	%xmm9,%xmm0
+	paddd	%xmm9,%xmm1
+	paddd	%xmm9,%xmm2
+	movdqa	%xmm0,0(%rsp)
+	psubd	%xmm9,%xmm0
+	movdqa	%xmm1,16(%rsp)
+	psubd	%xmm9,%xmm1
+	movdqa	%xmm2,32(%rsp)
+	psubd	%xmm9,%xmm2
+	jmp	L$oop_ssse3
+.p2align	4
+L$oop_ssse3:
+	movdqa	%xmm1,%xmm4
+	addl	0(%rsp),%ebp
+	xorl	%edx,%ecx
+	movdqa	%xmm3,%xmm8
+.byte	102,15,58,15,224,8
+	movl	%eax,%edi
+	roll	$5,%eax
+	paddd	%xmm3,%xmm9
+	andl	%ecx,%esi
+	xorl	%edx,%ecx
+	psrldq	$4,%xmm8
+	xorl	%edx,%esi
+	addl	%eax,%ebp
+	pxor	%xmm0,%xmm4
+	rorl	$2,%ebx
+	addl	%esi,%ebp
+	pxor	%xmm2,%xmm8
+	addl	4(%rsp),%edx
+	xorl	%ecx,%ebx
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	pxor	%xmm8,%xmm4
+	andl	%ebx,%edi
+	xorl	%ecx,%ebx
+	movdqa	%xmm9,48(%rsp)
+	xorl	%ecx,%edi
+	addl	%ebp,%edx
+	movdqa	%xmm4,%xmm10
+	movdqa	%xmm4,%xmm8
+	rorl	$7,%eax
+	addl	%edi,%edx
+	addl	8(%rsp),%ecx
+	xorl	%ebx,%eax
+	pslldq	$12,%xmm10
+	paddd	%xmm4,%xmm4
+	movl	%edx,%edi
+	roll	$5,%edx
+	andl	%eax,%esi
+	xorl	%ebx,%eax
+	psrld	$31,%xmm8
+	xorl	%ebx,%esi
+	addl	%edx,%ecx
+	movdqa	%xmm10,%xmm9
+	rorl	$7,%ebp
+	addl	%esi,%ecx
+	psrld	$30,%xmm10
+	por	%xmm8,%xmm4
+	addl	12(%rsp),%ebx
+	xorl	%eax,%ebp
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	pslld	$2,%xmm9
+	pxor	%xmm10,%xmm4
+	andl	%ebp,%edi
+	xorl	%eax,%ebp
+	movdqa	0(%r11),%xmm10
+	xorl	%eax,%edi
+	addl	%ecx,%ebx
+	pxor	%xmm9,%xmm4
+	rorl	$7,%edx
+	addl	%edi,%ebx
+	movdqa	%xmm2,%xmm5
+	addl	16(%rsp),%eax
+	xorl	%ebp,%edx
+	movdqa	%xmm4,%xmm9
+.byte	102,15,58,15,233,8
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	paddd	%xmm4,%xmm10
+	andl	%edx,%esi
+	xorl	%ebp,%edx
+	psrldq	$4,%xmm9
+	xorl	%ebp,%esi
+	addl	%ebx,%eax
+	pxor	%xmm1,%xmm5
+	rorl	$7,%ecx
+	addl	%esi,%eax
+	pxor	%xmm3,%xmm9
+	addl	20(%rsp),%ebp
+	xorl	%edx,%ecx
+	movl	%eax,%esi
+	roll	$5,%eax
+	pxor	%xmm9,%xmm5
+	andl	%ecx,%edi
+	xorl	%edx,%ecx
+	movdqa	%xmm10,0(%rsp)
+	xorl	%edx,%edi
+	addl	%eax,%ebp
+	movdqa	%xmm5,%xmm8
+	movdqa	%xmm5,%xmm9
+	rorl	$7,%ebx
+	addl	%edi,%ebp
+	addl	24(%rsp),%edx
+	xorl	%ecx,%ebx
+	pslldq	$12,%xmm8
+	paddd	%xmm5,%xmm5
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	andl	%ebx,%esi
+	xorl	%ecx,%ebx
+	psrld	$31,%xmm9
+	xorl	%ecx,%esi
+	addl	%ebp,%edx
+	movdqa	%xmm8,%xmm10
+	rorl	$7,%eax
+	addl	%esi,%edx
+	psrld	$30,%xmm8
+	por	%xmm9,%xmm5
+	addl	28(%rsp),%ecx
+	xorl	%ebx,%eax
+	movl	%edx,%esi
+	roll	$5,%edx
+	pslld	$2,%xmm10
+	pxor	%xmm8,%xmm5
+	andl	%eax,%edi
+	xorl	%ebx,%eax
+	movdqa	16(%r11),%xmm8
+	xorl	%ebx,%edi
+	addl	%edx,%ecx
+	pxor	%xmm10,%xmm5
+	rorl	$7,%ebp
+	addl	%edi,%ecx
+	movdqa	%xmm3,%xmm6
+	addl	32(%rsp),%ebx
+	xorl	%eax,%ebp
+	movdqa	%xmm5,%xmm10
+.byte	102,15,58,15,242,8
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	paddd	%xmm5,%xmm8
+	andl	%ebp,%esi
+	xorl	%eax,%ebp
+	psrldq	$4,%xmm10
+	xorl	%eax,%esi
+	addl	%ecx,%ebx
+	pxor	%xmm2,%xmm6
+	rorl	$7,%edx
+	addl	%esi,%ebx
+	pxor	%xmm4,%xmm10
+	addl	36(%rsp),%eax
+	xorl	%ebp,%edx
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	pxor	%xmm10,%xmm6
+	andl	%edx,%edi
+	xorl	%ebp,%edx
+	movdqa	%xmm8,16(%rsp)
+	xorl	%ebp,%edi
+	addl	%ebx,%eax
+	movdqa	%xmm6,%xmm9
+	movdqa	%xmm6,%xmm10
+	rorl	$7,%ecx
+	addl	%edi,%eax
+	addl	40(%rsp),%ebp
+	xorl	%edx,%ecx
+	pslldq	$12,%xmm9
+	paddd	%xmm6,%xmm6
+	movl	%eax,%edi
+	roll	$5,%eax
+	andl	%ecx,%esi
+	xorl	%edx,%ecx
+	psrld	$31,%xmm10
+	xorl	%edx,%esi
+	addl	%eax,%ebp
+	movdqa	%xmm9,%xmm8
+	rorl	$7,%ebx
+	addl	%esi,%ebp
+	psrld	$30,%xmm9
+	por	%xmm10,%xmm6
+	addl	44(%rsp),%edx
+	xorl	%ecx,%ebx
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	pslld	$2,%xmm8
+	pxor	%xmm9,%xmm6
+	andl	%ebx,%edi
+	xorl	%ecx,%ebx
+	movdqa	16(%r11),%xmm9
+	xorl	%ecx,%edi
+	addl	%ebp,%edx
+	pxor	%xmm8,%xmm6
+	rorl	$7,%eax
+	addl	%edi,%edx
+	movdqa	%xmm4,%xmm7
+	addl	48(%rsp),%ecx
+	xorl	%ebx,%eax
+	movdqa	%xmm6,%xmm8
+.byte	102,15,58,15,251,8
+	movl	%edx,%edi
+	roll	$5,%edx
+	paddd	%xmm6,%xmm9
+	andl	%eax,%esi
+	xorl	%ebx,%eax
+	psrldq	$4,%xmm8
+	xorl	%ebx,%esi
+	addl	%edx,%ecx
+	pxor	%xmm3,%xmm7
+	rorl	$7,%ebp
+	addl	%esi,%ecx
+	pxor	%xmm5,%xmm8
+	addl	52(%rsp),%ebx
+	xorl	%eax,%ebp
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	pxor	%xmm8,%xmm7
+	andl	%ebp,%edi
+	xorl	%eax,%ebp
+	movdqa	%xmm9,32(%rsp)
+	xorl	%eax,%edi
+	addl	%ecx,%ebx
+	movdqa	%xmm7,%xmm10
+	movdqa	%xmm7,%xmm8
+	rorl	$7,%edx
+	addl	%edi,%ebx
+	addl	56(%rsp),%eax
+	xorl	%ebp,%edx
+	pslldq	$12,%xmm10
+	paddd	%xmm7,%xmm7
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	andl	%edx,%esi
+	xorl	%ebp,%edx
+	psrld	$31,%xmm8
+	xorl	%ebp,%esi
+	addl	%ebx,%eax
+	movdqa	%xmm10,%xmm9
+	rorl	$7,%ecx
+	addl	%esi,%eax
+	psrld	$30,%xmm10
+	por	%xmm8,%xmm7
+	addl	60(%rsp),%ebp
+	xorl	%edx,%ecx
+	movl	%eax,%esi
+	roll	$5,%eax
+	pslld	$2,%xmm9
+	pxor	%xmm10,%xmm7
+	andl	%ecx,%edi
+	xorl	%edx,%ecx
+	movdqa	16(%r11),%xmm10
+	xorl	%edx,%edi
+	addl	%eax,%ebp
+	pxor	%xmm9,%xmm7
+	rorl	$7,%ebx
+	addl	%edi,%ebp
+	movdqa	%xmm7,%xmm9
+	addl	0(%rsp),%edx
+	pxor	%xmm4,%xmm0
+.byte	102,68,15,58,15,206,8
+	xorl	%ecx,%ebx
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	pxor	%xmm1,%xmm0
+	andl	%ebx,%esi
+	xorl	%ecx,%ebx
+	movdqa	%xmm10,%xmm8
+	paddd	%xmm7,%xmm10
+	xorl	%ecx,%esi
+	addl	%ebp,%edx
+	pxor	%xmm9,%xmm0
+	rorl	$7,%eax
+	addl	%esi,%edx
+	addl	4(%rsp),%ecx
+	xorl	%ebx,%eax
+	movdqa	%xmm0,%xmm9
+	movdqa	%xmm10,48(%rsp)
+	movl	%edx,%esi
+	roll	$5,%edx
+	andl	%eax,%edi
+	xorl	%ebx,%eax
+	pslld	$2,%xmm0
+	xorl	%ebx,%edi
+	addl	%edx,%ecx
+	psrld	$30,%xmm9
+	rorl	$7,%ebp
+	addl	%edi,%ecx
+	addl	8(%rsp),%ebx
+	xorl	%eax,%ebp
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	por	%xmm9,%xmm0
+	andl	%ebp,%esi
+	xorl	%eax,%ebp
+	movdqa	%xmm0,%xmm10
+	xorl	%eax,%esi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%esi,%ebx
+	addl	12(%rsp),%eax
+	xorl	%ebp,%edx
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	andl	%edx,%edi
+	xorl	%ebp,%edx
+	xorl	%ebp,%edi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%edi,%eax
+	addl	16(%rsp),%ebp
+	pxor	%xmm5,%xmm1
+.byte	102,68,15,58,15,215,8
+	xorl	%edx,%esi
+	movl	%eax,%edi
+	roll	$5,%eax
+	pxor	%xmm2,%xmm1
+	xorl	%ecx,%esi
+	addl	%eax,%ebp
+	movdqa	%xmm8,%xmm9
+	paddd	%xmm0,%xmm8
+	rorl	$7,%ebx
+	addl	%esi,%ebp
+	pxor	%xmm10,%xmm1
+	addl	20(%rsp),%edx
+	xorl	%ecx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	movdqa	%xmm1,%xmm10
+	movdqa	%xmm8,0(%rsp)
+	xorl	%ebx,%edi
+	addl	%ebp,%edx
+	rorl	$7,%eax
+	addl	%edi,%edx
+	pslld	$2,%xmm1
+	addl	24(%rsp),%ecx
+	xorl	%ebx,%esi
+	psrld	$30,%xmm10
+	movl	%edx,%edi
+	roll	$5,%edx
+	xorl	%eax,%esi
+	addl	%edx,%ecx
+	rorl	$7,%ebp
+	addl	%esi,%ecx
+	por	%xmm10,%xmm1
+	addl	28(%rsp),%ebx
+	xorl	%eax,%edi
+	movdqa	%xmm1,%xmm8
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	xorl	%ebp,%edi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%edi,%ebx
+	addl	32(%rsp),%eax
+	pxor	%xmm6,%xmm2
+.byte	102,68,15,58,15,192,8
+	xorl	%ebp,%esi
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	pxor	%xmm3,%xmm2
+	xorl	%edx,%esi
+	addl	%ebx,%eax
+	movdqa	32(%r11),%xmm10
+	paddd	%xmm1,%xmm9
+	rorl	$7,%ecx
+	addl	%esi,%eax
+	pxor	%xmm8,%xmm2
+	addl	36(%rsp),%ebp
+	xorl	%edx,%edi
+	movl	%eax,%esi
+	roll	$5,%eax
+	movdqa	%xmm2,%xmm8
+	movdqa	%xmm9,16(%rsp)
+	xorl	%ecx,%edi
+	addl	%eax,%ebp
+	rorl	$7,%ebx
+	addl	%edi,%ebp
+	pslld	$2,%xmm2
+	addl	40(%rsp),%edx
+	xorl	%ecx,%esi
+	psrld	$30,%xmm8
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	xorl	%ebx,%esi
+	addl	%ebp,%edx
+	rorl	$7,%eax
+	addl	%esi,%edx
+	por	%xmm8,%xmm2
+	addl	44(%rsp),%ecx
+	xorl	%ebx,%edi
+	movdqa	%xmm2,%xmm9
+	movl	%edx,%esi
+	roll	$5,%edx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	rorl	$7,%ebp
+	addl	%edi,%ecx
+	addl	48(%rsp),%ebx
+	pxor	%xmm7,%xmm3
+.byte	102,68,15,58,15,201,8
+	xorl	%eax,%esi
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	pxor	%xmm4,%xmm3
+	xorl	%ebp,%esi
+	addl	%ecx,%ebx
+	movdqa	%xmm10,%xmm8
+	paddd	%xmm2,%xmm10
+	rorl	$7,%edx
+	addl	%esi,%ebx
+	pxor	%xmm9,%xmm3
+	addl	52(%rsp),%eax
+	xorl	%ebp,%edi
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	movdqa	%xmm3,%xmm9
+	movdqa	%xmm10,32(%rsp)
+	xorl	%edx,%edi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%edi,%eax
+	pslld	$2,%xmm3
+	addl	56(%rsp),%ebp
+	xorl	%edx,%esi
+	psrld	$30,%xmm9
+	movl	%eax,%edi
+	roll	$5,%eax
+	xorl	%ecx,%esi
+	addl	%eax,%ebp
+	rorl	$7,%ebx
+	addl	%esi,%ebp
+	por	%xmm9,%xmm3
+	addl	60(%rsp),%edx
+	xorl	%ecx,%edi
+	movdqa	%xmm3,%xmm10
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	xorl	%ebx,%edi
+	addl	%ebp,%edx
+	rorl	$7,%eax
+	addl	%edi,%edx
+	addl	0(%rsp),%ecx
+	pxor	%xmm0,%xmm4
+.byte	102,68,15,58,15,210,8
+	xorl	%ebx,%esi
+	movl	%edx,%edi
+	roll	$5,%edx
+	pxor	%xmm5,%xmm4
+	xorl	%eax,%esi
+	addl	%edx,%ecx
+	movdqa	%xmm8,%xmm9
+	paddd	%xmm3,%xmm8
+	rorl	$7,%ebp
+	addl	%esi,%ecx
+	pxor	%xmm10,%xmm4
+	addl	4(%rsp),%ebx
+	xorl	%eax,%edi
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	movdqa	%xmm4,%xmm10
+	movdqa	%xmm8,48(%rsp)
+	xorl	%ebp,%edi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%edi,%ebx
+	pslld	$2,%xmm4
+	addl	8(%rsp),%eax
+	xorl	%ebp,%esi
+	psrld	$30,%xmm10
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	xorl	%edx,%esi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%esi,%eax
+	por	%xmm10,%xmm4
+	addl	12(%rsp),%ebp
+	xorl	%edx,%edi
+	movdqa	%xmm4,%xmm8
+	movl	%eax,%esi
+	roll	$5,%eax
+	xorl	%ecx,%edi
+	addl	%eax,%ebp
+	rorl	$7,%ebx
+	addl	%edi,%ebp
+	addl	16(%rsp),%edx
+	pxor	%xmm1,%xmm5
+.byte	102,68,15,58,15,195,8
+	xorl	%ecx,%esi
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	pxor	%xmm6,%xmm5
+	xorl	%ebx,%esi
+	addl	%ebp,%edx
+	movdqa	%xmm9,%xmm10
+	paddd	%xmm4,%xmm9
+	rorl	$7,%eax
+	addl	%esi,%edx
+	pxor	%xmm8,%xmm5
+	addl	20(%rsp),%ecx
+	xorl	%ebx,%edi
+	movl	%edx,%esi
+	roll	$5,%edx
+	movdqa	%xmm5,%xmm8
+	movdqa	%xmm9,0(%rsp)
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	rorl	$7,%ebp
+	addl	%edi,%ecx
+	pslld	$2,%xmm5
+	addl	24(%rsp),%ebx
+	xorl	%eax,%esi
+	psrld	$30,%xmm8
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	xorl	%ebp,%esi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%esi,%ebx
+	por	%xmm8,%xmm5
+	addl	28(%rsp),%eax
+	xorl	%ebp,%edi
+	movdqa	%xmm5,%xmm9
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	xorl	%edx,%edi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%edi,%eax
+	movl	%ecx,%edi
+	pxor	%xmm2,%xmm6
+.byte	102,68,15,58,15,204,8
+	xorl	%edx,%ecx
+	addl	32(%rsp),%ebp
+	andl	%edx,%edi
+	pxor	%xmm7,%xmm6
+	andl	%ecx,%esi
+	rorl	$7,%ebx
+	movdqa	%xmm10,%xmm8
+	paddd	%xmm5,%xmm10
+	addl	%edi,%ebp
+	movl	%eax,%edi
+	pxor	%xmm9,%xmm6
+	roll	$5,%eax
+	addl	%esi,%ebp
+	xorl	%edx,%ecx
+	addl	%eax,%ebp
+	movdqa	%xmm6,%xmm9
+	movdqa	%xmm10,16(%rsp)
+	movl	%ebx,%esi
+	xorl	%ecx,%ebx
+	addl	36(%rsp),%edx
+	andl	%ecx,%esi
+	pslld	$2,%xmm6
+	andl	%ebx,%edi
+	rorl	$7,%eax
+	psrld	$30,%xmm9
+	addl	%esi,%edx
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	addl	%edi,%edx
+	xorl	%ecx,%ebx
+	addl	%ebp,%edx
+	por	%xmm9,%xmm6
+	movl	%eax,%edi
+	xorl	%ebx,%eax
+	movdqa	%xmm6,%xmm10
+	addl	40(%rsp),%ecx
+	andl	%ebx,%edi
+	andl	%eax,%esi
+	rorl	$7,%ebp
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$5,%edx
+	addl	%esi,%ecx
+	xorl	%ebx,%eax
+	addl	%edx,%ecx
+	movl	%ebp,%esi
+	xorl	%eax,%ebp
+	addl	44(%rsp),%ebx
+	andl	%eax,%esi
+	andl	%ebp,%edi
+	rorl	$7,%edx
+	addl	%esi,%ebx
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	addl	%edi,%ebx
+	xorl	%eax,%ebp
+	addl	%ecx,%ebx
+	movl	%edx,%edi
+	pxor	%xmm3,%xmm7
+.byte	102,68,15,58,15,213,8
+	xorl	%ebp,%edx
+	addl	48(%rsp),%eax
+	andl	%ebp,%edi
+	pxor	%xmm0,%xmm7
+	andl	%edx,%esi
+	rorl	$7,%ecx
+	movdqa	48(%r11),%xmm9
+	paddd	%xmm6,%xmm8
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	pxor	%xmm10,%xmm7
+	roll	$5,%ebx
+	addl	%esi,%eax
+	xorl	%ebp,%edx
+	addl	%ebx,%eax
+	movdqa	%xmm7,%xmm10
+	movdqa	%xmm8,32(%rsp)
+	movl	%ecx,%esi
+	xorl	%edx,%ecx
+	addl	52(%rsp),%ebp
+	andl	%edx,%esi
+	pslld	$2,%xmm7
+	andl	%ecx,%edi
+	rorl	$7,%ebx
+	psrld	$30,%xmm10
+	addl	%esi,%ebp
+	movl	%eax,%esi
+	roll	$5,%eax
+	addl	%edi,%ebp
+	xorl	%edx,%ecx
+	addl	%eax,%ebp
+	por	%xmm10,%xmm7
+	movl	%ebx,%edi
+	xorl	%ecx,%ebx
+	movdqa	%xmm7,%xmm8
+	addl	56(%rsp),%edx
+	andl	%ecx,%edi
+	andl	%ebx,%esi
+	rorl	$7,%eax
+	addl	%edi,%edx
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	addl	%esi,%edx
+	xorl	%ecx,%ebx
+	addl	%ebp,%edx
+	movl	%eax,%esi
+	xorl	%ebx,%eax
+	addl	60(%rsp),%ecx
+	andl	%ebx,%esi
+	andl	%eax,%edi
+	rorl	$7,%ebp
+	addl	%esi,%ecx
+	movl	%edx,%esi
+	roll	$5,%edx
+	addl	%edi,%ecx
+	xorl	%ebx,%eax
+	addl	%edx,%ecx
+	movl	%ebp,%edi
+	pxor	%xmm4,%xmm0
+.byte	102,68,15,58,15,198,8
+	xorl	%eax,%ebp
+	addl	0(%rsp),%ebx
+	andl	%eax,%edi
+	pxor	%xmm1,%xmm0
+	andl	%ebp,%esi
+	rorl	$7,%edx
+	movdqa	%xmm9,%xmm10
+	paddd	%xmm7,%xmm9
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	pxor	%xmm8,%xmm0
+	roll	$5,%ecx
+	addl	%esi,%ebx
+	xorl	%eax,%ebp
+	addl	%ecx,%ebx
+	movdqa	%xmm0,%xmm8
+	movdqa	%xmm9,48(%rsp)
+	movl	%edx,%esi
+	xorl	%ebp,%edx
+	addl	4(%rsp),%eax
+	andl	%ebp,%esi
+	pslld	$2,%xmm0
+	andl	%edx,%edi
+	rorl	$7,%ecx
+	psrld	$30,%xmm8
+	addl	%esi,%eax
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	addl	%edi,%eax
+	xorl	%ebp,%edx
+	addl	%ebx,%eax
+	por	%xmm8,%xmm0
+	movl	%ecx,%edi
+	xorl	%edx,%ecx
+	movdqa	%xmm0,%xmm9
+	addl	8(%rsp),%ebp
+	andl	%edx,%edi
+	andl	%ecx,%esi
+	rorl	$7,%ebx
+	addl	%edi,%ebp
+	movl	%eax,%edi
+	roll	$5,%eax
+	addl	%esi,%ebp
+	xorl	%edx,%ecx
+	addl	%eax,%ebp
+	movl	%ebx,%esi
+	xorl	%ecx,%ebx
+	addl	12(%rsp),%edx
+	andl	%ecx,%esi
+	andl	%ebx,%edi
+	rorl	$7,%eax
+	addl	%esi,%edx
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	addl	%edi,%edx
+	xorl	%ecx,%ebx
+	addl	%ebp,%edx
+	movl	%eax,%edi
+	pxor	%xmm5,%xmm1
+.byte	102,68,15,58,15,207,8
+	xorl	%ebx,%eax
+	addl	16(%rsp),%ecx
+	andl	%ebx,%edi
+	pxor	%xmm2,%xmm1
+	andl	%eax,%esi
+	rorl	$7,%ebp
+	movdqa	%xmm10,%xmm8
+	paddd	%xmm0,%xmm10
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	pxor	%xmm9,%xmm1
+	roll	$5,%edx
+	addl	%esi,%ecx
+	xorl	%ebx,%eax
+	addl	%edx,%ecx
+	movdqa	%xmm1,%xmm9
+	movdqa	%xmm10,0(%rsp)
+	movl	%ebp,%esi
+	xorl	%eax,%ebp
+	addl	20(%rsp),%ebx
+	andl	%eax,%esi
+	pslld	$2,%xmm1
+	andl	%ebp,%edi
+	rorl	$7,%edx
+	psrld	$30,%xmm9
+	addl	%esi,%ebx
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	addl	%edi,%ebx
+	xorl	%eax,%ebp
+	addl	%ecx,%ebx
+	por	%xmm9,%xmm1
+	movl	%edx,%edi
+	xorl	%ebp,%edx
+	movdqa	%xmm1,%xmm10
+	addl	24(%rsp),%eax
+	andl	%ebp,%edi
+	andl	%edx,%esi
+	rorl	$7,%ecx
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	addl	%esi,%eax
+	xorl	%ebp,%edx
+	addl	%ebx,%eax
+	movl	%ecx,%esi
+	xorl	%edx,%ecx
+	addl	28(%rsp),%ebp
+	andl	%edx,%esi
+	andl	%ecx,%edi
+	rorl	$7,%ebx
+	addl	%esi,%ebp
+	movl	%eax,%esi
+	roll	$5,%eax
+	addl	%edi,%ebp
+	xorl	%edx,%ecx
+	addl	%eax,%ebp
+	movl	%ebx,%edi
+	pxor	%xmm6,%xmm2
+.byte	102,68,15,58,15,208,8
+	xorl	%ecx,%ebx
+	addl	32(%rsp),%edx
+	andl	%ecx,%edi
+	pxor	%xmm3,%xmm2
+	andl	%ebx,%esi
+	rorl	$7,%eax
+	movdqa	%xmm8,%xmm9
+	paddd	%xmm1,%xmm8
+	addl	%edi,%edx
+	movl	%ebp,%edi
+	pxor	%xmm10,%xmm2
+	roll	$5,%ebp
+	addl	%esi,%edx
+	xorl	%ecx,%ebx
+	addl	%ebp,%edx
+	movdqa	%xmm2,%xmm10
+	movdqa	%xmm8,16(%rsp)
+	movl	%eax,%esi
+	xorl	%ebx,%eax
+	addl	36(%rsp),%ecx
+	andl	%ebx,%esi
+	pslld	$2,%xmm2
+	andl	%eax,%edi
+	rorl	$7,%ebp
+	psrld	$30,%xmm10
+	addl	%esi,%ecx
+	movl	%edx,%esi
+	roll	$5,%edx
+	addl	%edi,%ecx
+	xorl	%ebx,%eax
+	addl	%edx,%ecx
+	por	%xmm10,%xmm2
+	movl	%ebp,%edi
+	xorl	%eax,%ebp
+	movdqa	%xmm2,%xmm8
+	addl	40(%rsp),%ebx
+	andl	%eax,%edi
+	andl	%ebp,%esi
+	rorl	$7,%edx
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	addl	%esi,%ebx
+	xorl	%eax,%ebp
+	addl	%ecx,%ebx
+	movl	%edx,%esi
+	xorl	%ebp,%edx
+	addl	44(%rsp),%eax
+	andl	%ebp,%esi
+	andl	%edx,%edi
+	rorl	$7,%ecx
+	addl	%esi,%eax
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	addl	%edi,%eax
+	xorl	%ebp,%edx
+	addl	%ebx,%eax
+	addl	48(%rsp),%ebp
+	pxor	%xmm7,%xmm3
+.byte	102,68,15,58,15,193,8
+	xorl	%edx,%esi
+	movl	%eax,%edi
+	roll	$5,%eax
+	pxor	%xmm4,%xmm3
+	xorl	%ecx,%esi
+	addl	%eax,%ebp
+	movdqa	%xmm9,%xmm10
+	paddd	%xmm2,%xmm9
+	rorl	$7,%ebx
+	addl	%esi,%ebp
+	pxor	%xmm8,%xmm3
+	addl	52(%rsp),%edx
+	xorl	%ecx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	movdqa	%xmm3,%xmm8
+	movdqa	%xmm9,32(%rsp)
+	xorl	%ebx,%edi
+	addl	%ebp,%edx
+	rorl	$7,%eax
+	addl	%edi,%edx
+	pslld	$2,%xmm3
+	addl	56(%rsp),%ecx
+	xorl	%ebx,%esi
+	psrld	$30,%xmm8
+	movl	%edx,%edi
+	roll	$5,%edx
+	xorl	%eax,%esi
+	addl	%edx,%ecx
+	rorl	$7,%ebp
+	addl	%esi,%ecx
+	por	%xmm8,%xmm3
+	addl	60(%rsp),%ebx
+	xorl	%eax,%edi
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	xorl	%ebp,%edi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%edi,%ebx
+	addl	0(%rsp),%eax
+	paddd	%xmm3,%xmm10
+	xorl	%ebp,%esi
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	xorl	%edx,%esi
+	movdqa	%xmm10,48(%rsp)
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%esi,%eax
+	addl	4(%rsp),%ebp
+	xorl	%edx,%edi
+	movl	%eax,%esi
+	roll	$5,%eax
+	xorl	%ecx,%edi
+	addl	%eax,%ebp
+	rorl	$7,%ebx
+	addl	%edi,%ebp
+	addl	8(%rsp),%edx
+	xorl	%ecx,%esi
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	xorl	%ebx,%esi
+	addl	%ebp,%edx
+	rorl	$7,%eax
+	addl	%esi,%edx
+	addl	12(%rsp),%ecx
+	xorl	%ebx,%edi
+	movl	%edx,%esi
+	roll	$5,%edx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	rorl	$7,%ebp
+	addl	%edi,%ecx
+	cmpq	%r10,%r9
+	je	L$done_ssse3
+	movdqa	64(%r11),%xmm6
+	movdqa	0(%r11),%xmm9
+	movdqu	0(%r9),%xmm0
+	movdqu	16(%r9),%xmm1
+	movdqu	32(%r9),%xmm2
+	movdqu	48(%r9),%xmm3
+.byte	102,15,56,0,198
+	addq	$64,%r9
+	addl	16(%rsp),%ebx
+	xorl	%eax,%esi
+.byte	102,15,56,0,206
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	paddd	%xmm9,%xmm0
+	xorl	%ebp,%esi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%esi,%ebx
+	movdqa	%xmm0,0(%rsp)
+	addl	20(%rsp),%eax
+	xorl	%ebp,%edi
+	psubd	%xmm9,%xmm0
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	xorl	%edx,%edi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%edi,%eax
+	addl	24(%rsp),%ebp
+	xorl	%edx,%esi
+	movl	%eax,%edi
+	roll	$5,%eax
+	xorl	%ecx,%esi
+	addl	%eax,%ebp
+	rorl	$7,%ebx
+	addl	%esi,%ebp
+	addl	28(%rsp),%edx
+	xorl	%ecx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	xorl	%ebx,%edi
+	addl	%ebp,%edx
+	rorl	$7,%eax
+	addl	%edi,%edx
+	addl	32(%rsp),%ecx
+	xorl	%ebx,%esi
+.byte	102,15,56,0,214
+	movl	%edx,%edi
+	roll	$5,%edx
+	paddd	%xmm9,%xmm1
+	xorl	%eax,%esi
+	addl	%edx,%ecx
+	rorl	$7,%ebp
+	addl	%esi,%ecx
+	movdqa	%xmm1,16(%rsp)
+	addl	36(%rsp),%ebx
+	xorl	%eax,%edi
+	psubd	%xmm9,%xmm1
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	xorl	%ebp,%edi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%edi,%ebx
+	addl	40(%rsp),%eax
+	xorl	%ebp,%esi
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	xorl	%edx,%esi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%esi,%eax
+	addl	44(%rsp),%ebp
+	xorl	%edx,%edi
+	movl	%eax,%esi
+	roll	$5,%eax
+	xorl	%ecx,%edi
+	addl	%eax,%ebp
+	rorl	$7,%ebx
+	addl	%edi,%ebp
+	addl	48(%rsp),%edx
+	xorl	%ecx,%esi
+.byte	102,15,56,0,222
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	paddd	%xmm9,%xmm2
+	xorl	%ebx,%esi
+	addl	%ebp,%edx
+	rorl	$7,%eax
+	addl	%esi,%edx
+	movdqa	%xmm2,32(%rsp)
+	addl	52(%rsp),%ecx
+	xorl	%ebx,%edi
+	psubd	%xmm9,%xmm2
+	movl	%edx,%esi
+	roll	$5,%edx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	rorl	$7,%ebp
+	addl	%edi,%ecx
+	addl	56(%rsp),%ebx
+	xorl	%eax,%esi
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	xorl	%ebp,%esi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%esi,%ebx
+	addl	60(%rsp),%eax
+	xorl	%ebp,%edi
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	xorl	%edx,%edi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%edi,%eax
+	addl	0(%r8),%eax
+	addl	4(%r8),%esi
+	addl	8(%r8),%ecx
+	addl	12(%r8),%edx
+	movl	%eax,0(%r8)
+	addl	16(%r8),%ebp
+	movl	%esi,4(%r8)
+	movl	%esi,%ebx
+	movl	%ecx,8(%r8)
+	movl	%edx,12(%r8)
+	movl	%ebp,16(%r8)
+	jmp	L$oop_ssse3
+
+.p2align	4
+L$done_ssse3:
+	addl	16(%rsp),%ebx
+	xorl	%eax,%esi
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	xorl	%ebp,%esi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%esi,%ebx
+	addl	20(%rsp),%eax
+	xorl	%ebp,%edi
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	xorl	%edx,%edi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%edi,%eax
+	addl	24(%rsp),%ebp
+	xorl	%edx,%esi
+	movl	%eax,%edi
+	roll	$5,%eax
+	xorl	%ecx,%esi
+	addl	%eax,%ebp
+	rorl	$7,%ebx
+	addl	%esi,%ebp
+	addl	28(%rsp),%edx
+	xorl	%ecx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	xorl	%ebx,%edi
+	addl	%ebp,%edx
+	rorl	$7,%eax
+	addl	%edi,%edx
+	addl	32(%rsp),%ecx
+	xorl	%ebx,%esi
+	movl	%edx,%edi
+	roll	$5,%edx
+	xorl	%eax,%esi
+	addl	%edx,%ecx
+	rorl	$7,%ebp
+	addl	%esi,%ecx
+	addl	36(%rsp),%ebx
+	xorl	%eax,%edi
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	xorl	%ebp,%edi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%edi,%ebx
+	addl	40(%rsp),%eax
+	xorl	%ebp,%esi
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	xorl	%edx,%esi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%esi,%eax
+	addl	44(%rsp),%ebp
+	xorl	%edx,%edi
+	movl	%eax,%esi
+	roll	$5,%eax
+	xorl	%ecx,%edi
+	addl	%eax,%ebp
+	rorl	$7,%ebx
+	addl	%edi,%ebp
+	addl	48(%rsp),%edx
+	xorl	%ecx,%esi
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	xorl	%ebx,%esi
+	addl	%ebp,%edx
+	rorl	$7,%eax
+	addl	%esi,%edx
+	addl	52(%rsp),%ecx
+	xorl	%ebx,%edi
+	movl	%edx,%esi
+	roll	$5,%edx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	rorl	$7,%ebp
+	addl	%edi,%ecx
+	addl	56(%rsp),%ebx
+	xorl	%eax,%esi
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	xorl	%ebp,%esi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%esi,%ebx
+	addl	60(%rsp),%eax
+	xorl	%ebp,%edi
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	xorl	%edx,%edi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%edi,%eax
+	addl	0(%r8),%eax
+	addl	4(%r8),%esi
+	addl	8(%r8),%ecx
+	movl	%eax,0(%r8)
+	addl	12(%r8),%edx
+	movl	%esi,4(%r8)
+	addl	16(%r8),%ebp
+	movl	%ecx,8(%r8)
+	movl	%edx,12(%r8)
+	movl	%ebp,16(%r8)
+	leaq	64(%rsp),%rsi
+	movq	0(%rsi),%r12
+	movq	8(%rsi),%rbp
+	movq	16(%rsi),%rbx
+	leaq	24(%rsi),%rsp
+L$epilogue_ssse3:
+	retq
+
+.p2align	6
+K_XX_XX:
+.long	0x5a827999,0x5a827999,0x5a827999,0x5a827999	
+.long	0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1	
+.long	0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc	
+.long	0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6	
+.long	0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f	
+.byte	83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align	6
diff --git a/ext/libressl/crypto/sha/sha1-masm-x86_64.S b/ext/libressl/crypto/sha/sha1-masm-x86_64.S
new file mode 100644
index 0000000..36d8732
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha1-masm-x86_64.S
@@ -0,0 +1,2746 @@
+; 1 "crypto/sha/sha1-masm-x86_64.S.tmp"
+; 1 "<built-in>" 1
+; 1 "<built-in>" 3
+; 340 "<built-in>" 3
+; 1 "<command line>" 1
+; 1 "<built-in>" 2
+; 1 "crypto/sha/sha1-masm-x86_64.S.tmp" 2
+OPTION	DOTNAME
+
+; 1 "./crypto/x86_arch.h" 1
+
+
+; 16 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+; 40 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+; 3 "crypto/sha/sha1-masm-x86_64.S.tmp" 2
+.text$	SEGMENT ALIGN(64) 'CODE'
+EXTERN	OPENSSL_ia32cap_P:NEAR
+
+
+PUBLIC	sha1_block_data_order
+
+ALIGN	16
+sha1_block_data_order	PROC PUBLIC
+	mov	QWORD PTR[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD PTR[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_sha1_block_data_order::
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+	mov	r9d,DWORD PTR[((OPENSSL_ia32cap_P+0))]
+	mov	r8d,DWORD PTR[((OPENSSL_ia32cap_P+4))]
+	test	r8d,(1 SHL 9)
+	jz	$L$ialu
+	jmp	_ssse3_shortcut
+
+ALIGN	16
+$L$ialu::
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	mov	r11,rsp
+	mov	r8,rdi
+	sub	rsp,72
+	mov	r9,rsi
+	and	rsp,-64
+	mov	r10,rdx
+	mov	QWORD PTR[64+rsp],r11
+$L$prologue::
+
+	mov	esi,DWORD PTR[r8]
+	mov	edi,DWORD PTR[4+r8]
+	mov	r11d,DWORD PTR[8+r8]
+	mov	r12d,DWORD PTR[12+r8]
+	mov	r13d,DWORD PTR[16+r8]
+	jmp	$L$loop
+
+ALIGN	16
+$L$loop::
+	mov	edx,DWORD PTR[r9]
+	bswap	edx
+	mov	DWORD PTR[rsp],edx
+	mov	eax,r11d
+	mov	ebp,DWORD PTR[4+r9]
+	mov	ecx,esi
+	xor	eax,r12d
+	bswap	ebp
+	rol	ecx,5
+	lea	r13d,DWORD PTR[1518500249+r13*1+rdx]
+	and	eax,edi
+	mov	DWORD PTR[4+rsp],ebp
+	add	r13d,ecx
+	xor	eax,r12d
+	rol	edi,30
+	add	r13d,eax
+	mov	eax,edi
+	mov	edx,DWORD PTR[8+r9]
+	mov	ecx,r13d
+	xor	eax,r11d
+	bswap	edx
+	rol	ecx,5
+	lea	r12d,DWORD PTR[1518500249+r12*1+rbp]
+	and	eax,esi
+	mov	DWORD PTR[8+rsp],edx
+	add	r12d,ecx
+	xor	eax,r11d
+	rol	esi,30
+	add	r12d,eax
+	mov	eax,esi
+	mov	ebp,DWORD PTR[12+r9]
+	mov	ecx,r12d
+	xor	eax,edi
+	bswap	ebp
+	rol	ecx,5
+	lea	r11d,DWORD PTR[1518500249+r11*1+rdx]
+	and	eax,r13d
+	mov	DWORD PTR[12+rsp],ebp
+	add	r11d,ecx
+	xor	eax,edi
+	rol	r13d,30
+	add	r11d,eax
+	mov	eax,r13d
+	mov	edx,DWORD PTR[16+r9]
+	mov	ecx,r11d
+	xor	eax,esi
+	bswap	edx
+	rol	ecx,5
+	lea	edi,DWORD PTR[1518500249+rdi*1+rbp]
+	and	eax,r12d
+	mov	DWORD PTR[16+rsp],edx
+	add	edi,ecx
+	xor	eax,esi
+	rol	r12d,30
+	add	edi,eax
+	mov	eax,r12d
+	mov	ebp,DWORD PTR[20+r9]
+	mov	ecx,edi
+	xor	eax,r13d
+	bswap	ebp
+	rol	ecx,5
+	lea	esi,DWORD PTR[1518500249+rsi*1+rdx]
+	and	eax,r11d
+	mov	DWORD PTR[20+rsp],ebp
+	add	esi,ecx
+	xor	eax,r13d
+	rol	r11d,30
+	add	esi,eax
+	mov	eax,r11d
+	mov	edx,DWORD PTR[24+r9]
+	mov	ecx,esi
+	xor	eax,r12d
+	bswap	edx
+	rol	ecx,5
+	lea	r13d,DWORD PTR[1518500249+r13*1+rbp]
+	and	eax,edi
+	mov	DWORD PTR[24+rsp],edx
+	add	r13d,ecx
+	xor	eax,r12d
+	rol	edi,30
+	add	r13d,eax
+	mov	eax,edi
+	mov	ebp,DWORD PTR[28+r9]
+	mov	ecx,r13d
+	xor	eax,r11d
+	bswap	ebp
+	rol	ecx,5
+	lea	r12d,DWORD PTR[1518500249+r12*1+rdx]
+	and	eax,esi
+	mov	DWORD PTR[28+rsp],ebp
+	add	r12d,ecx
+	xor	eax,r11d
+	rol	esi,30
+	add	r12d,eax
+	mov	eax,esi
+	mov	edx,DWORD PTR[32+r9]
+	mov	ecx,r12d
+	xor	eax,edi
+	bswap	edx
+	rol	ecx,5
+	lea	r11d,DWORD PTR[1518500249+r11*1+rbp]
+	and	eax,r13d
+	mov	DWORD PTR[32+rsp],edx
+	add	r11d,ecx
+	xor	eax,edi
+	rol	r13d,30
+	add	r11d,eax
+	mov	eax,r13d
+	mov	ebp,DWORD PTR[36+r9]
+	mov	ecx,r11d
+	xor	eax,esi
+	bswap	ebp
+	rol	ecx,5
+	lea	edi,DWORD PTR[1518500249+rdi*1+rdx]
+	and	eax,r12d
+	mov	DWORD PTR[36+rsp],ebp
+	add	edi,ecx
+	xor	eax,esi
+	rol	r12d,30
+	add	edi,eax
+	mov	eax,r12d
+	mov	edx,DWORD PTR[40+r9]
+	mov	ecx,edi
+	xor	eax,r13d
+	bswap	edx
+	rol	ecx,5
+	lea	esi,DWORD PTR[1518500249+rsi*1+rbp]
+	and	eax,r11d
+	mov	DWORD PTR[40+rsp],edx
+	add	esi,ecx
+	xor	eax,r13d
+	rol	r11d,30
+	add	esi,eax
+	mov	eax,r11d
+	mov	ebp,DWORD PTR[44+r9]
+	mov	ecx,esi
+	xor	eax,r12d
+	bswap	ebp
+	rol	ecx,5
+	lea	r13d,DWORD PTR[1518500249+r13*1+rdx]
+	and	eax,edi
+	mov	DWORD PTR[44+rsp],ebp
+	add	r13d,ecx
+	xor	eax,r12d
+	rol	edi,30
+	add	r13d,eax
+	mov	eax,edi
+	mov	edx,DWORD PTR[48+r9]
+	mov	ecx,r13d
+	xor	eax,r11d
+	bswap	edx
+	rol	ecx,5
+	lea	r12d,DWORD PTR[1518500249+r12*1+rbp]
+	and	eax,esi
+	mov	DWORD PTR[48+rsp],edx
+	add	r12d,ecx
+	xor	eax,r11d
+	rol	esi,30
+	add	r12d,eax
+	mov	eax,esi
+	mov	ebp,DWORD PTR[52+r9]
+	mov	ecx,r12d
+	xor	eax,edi
+	bswap	ebp
+	rol	ecx,5
+	lea	r11d,DWORD PTR[1518500249+r11*1+rdx]
+	and	eax,r13d
+	mov	DWORD PTR[52+rsp],ebp
+	add	r11d,ecx
+	xor	eax,edi
+	rol	r13d,30
+	add	r11d,eax
+	mov	eax,r13d
+	mov	edx,DWORD PTR[56+r9]
+	mov	ecx,r11d
+	xor	eax,esi
+	bswap	edx
+	rol	ecx,5
+	lea	edi,DWORD PTR[1518500249+rdi*1+rbp]
+	and	eax,r12d
+	mov	DWORD PTR[56+rsp],edx
+	add	edi,ecx
+	xor	eax,esi
+	rol	r12d,30
+	add	edi,eax
+	mov	eax,r12d
+	mov	ebp,DWORD PTR[60+r9]
+	mov	ecx,edi
+	xor	eax,r13d
+	bswap	ebp
+	rol	ecx,5
+	lea	esi,DWORD PTR[1518500249+rsi*1+rdx]
+	and	eax,r11d
+	mov	DWORD PTR[60+rsp],ebp
+	add	esi,ecx
+	xor	eax,r13d
+	rol	r11d,30
+	add	esi,eax
+	mov	edx,DWORD PTR[rsp]
+	mov	eax,r11d
+	mov	ecx,esi
+	xor	edx,DWORD PTR[8+rsp]
+	xor	eax,r12d
+	rol	ecx,5
+	xor	edx,DWORD PTR[32+rsp]
+	and	eax,edi
+	lea	r13d,DWORD PTR[1518500249+r13*1+rbp]
+	xor	edx,DWORD PTR[52+rsp]
+	xor	eax,r12d
+	rol	edx,1
+	add	r13d,ecx
+	rol	edi,30
+	mov	DWORD PTR[rsp],edx
+	add	r13d,eax
+	mov	ebp,DWORD PTR[4+rsp]
+	mov	eax,edi
+	mov	ecx,r13d
+	xor	ebp,DWORD PTR[12+rsp]
+	xor	eax,r11d
+	rol	ecx,5
+	xor	ebp,DWORD PTR[36+rsp]
+	and	eax,esi
+	lea	r12d,DWORD PTR[1518500249+r12*1+rdx]
+	xor	ebp,DWORD PTR[56+rsp]
+	xor	eax,r11d
+	rol	ebp,1
+	add	r12d,ecx
+	rol	esi,30
+	mov	DWORD PTR[4+rsp],ebp
+	add	r12d,eax
+	mov	edx,DWORD PTR[8+rsp]
+	mov	eax,esi
+	mov	ecx,r12d
+	xor	edx,DWORD PTR[16+rsp]
+	xor	eax,edi
+	rol	ecx,5
+	xor	edx,DWORD PTR[40+rsp]
+	and	eax,r13d
+	lea	r11d,DWORD PTR[1518500249+r11*1+rbp]
+	xor	edx,DWORD PTR[60+rsp]
+	xor	eax,edi
+	rol	edx,1
+	add	r11d,ecx
+	rol	r13d,30
+	mov	DWORD PTR[8+rsp],edx
+	add	r11d,eax
+	mov	ebp,DWORD PTR[12+rsp]
+	mov	eax,r13d
+	mov	ecx,r11d
+	xor	ebp,DWORD PTR[20+rsp]
+	xor	eax,esi
+	rol	ecx,5
+	xor	ebp,DWORD PTR[44+rsp]
+	and	eax,r12d
+	lea	edi,DWORD PTR[1518500249+rdi*1+rdx]
+	xor	ebp,DWORD PTR[rsp]
+	xor	eax,esi
+	rol	ebp,1
+	add	edi,ecx
+	rol	r12d,30
+	mov	DWORD PTR[12+rsp],ebp
+	add	edi,eax
+	mov	edx,DWORD PTR[16+rsp]
+	mov	eax,r12d
+	mov	ecx,edi
+	xor	edx,DWORD PTR[24+rsp]
+	xor	eax,r13d
+	rol	ecx,5
+	xor	edx,DWORD PTR[48+rsp]
+	and	eax,r11d
+	lea	esi,DWORD PTR[1518500249+rsi*1+rbp]
+	xor	edx,DWORD PTR[4+rsp]
+	xor	eax,r13d
+	rol	edx,1
+	add	esi,ecx
+	rol	r11d,30
+	mov	DWORD PTR[16+rsp],edx
+	add	esi,eax
+	mov	ebp,DWORD PTR[20+rsp]
+	mov	eax,r11d
+	mov	ecx,esi
+	xor	ebp,DWORD PTR[28+rsp]
+	xor	eax,edi
+	rol	ecx,5
+	lea	r13d,DWORD PTR[1859775393+r13*1+rdx]
+	xor	ebp,DWORD PTR[52+rsp]
+	xor	eax,r12d
+	add	r13d,ecx
+	xor	ebp,DWORD PTR[8+rsp]
+	rol	edi,30
+	add	r13d,eax
+	rol	ebp,1
+	mov	DWORD PTR[20+rsp],ebp
+	mov	edx,DWORD PTR[24+rsp]
+	mov	eax,edi
+	mov	ecx,r13d
+	xor	edx,DWORD PTR[32+rsp]
+	xor	eax,esi
+	rol	ecx,5
+	lea	r12d,DWORD PTR[1859775393+r12*1+rbp]
+	xor	edx,DWORD PTR[56+rsp]
+	xor	eax,r11d
+	add	r12d,ecx
+	xor	edx,DWORD PTR[12+rsp]
+	rol	esi,30
+	add	r12d,eax
+	rol	edx,1
+	mov	DWORD PTR[24+rsp],edx
+	mov	ebp,DWORD PTR[28+rsp]
+	mov	eax,esi
+	mov	ecx,r12d
+	xor	ebp,DWORD PTR[36+rsp]
+	xor	eax,r13d
+	rol	ecx,5
+	lea	r11d,DWORD PTR[1859775393+r11*1+rdx]
+	xor	ebp,DWORD PTR[60+rsp]
+	xor	eax,edi
+	add	r11d,ecx
+	xor	ebp,DWORD PTR[16+rsp]
+	rol	r13d,30
+	add	r11d,eax
+	rol	ebp,1
+	mov	DWORD PTR[28+rsp],ebp
+	mov	edx,DWORD PTR[32+rsp]
+	mov	eax,r13d
+	mov	ecx,r11d
+	xor	edx,DWORD PTR[40+rsp]
+	xor	eax,r12d
+	rol	ecx,5
+	lea	edi,DWORD PTR[1859775393+rdi*1+rbp]
+	xor	edx,DWORD PTR[rsp]
+	xor	eax,esi
+	add	edi,ecx
+	xor	edx,DWORD PTR[20+rsp]
+	rol	r12d,30
+	add	edi,eax
+	rol	edx,1
+	mov	DWORD PTR[32+rsp],edx
+	mov	ebp,DWORD PTR[36+rsp]
+	mov	eax,r12d
+	mov	ecx,edi
+	xor	ebp,DWORD PTR[44+rsp]
+	xor	eax,r11d
+	rol	ecx,5
+	lea	esi,DWORD PTR[1859775393+rsi*1+rdx]
+	xor	ebp,DWORD PTR[4+rsp]
+	xor	eax,r13d
+	add	esi,ecx
+	xor	ebp,DWORD PTR[24+rsp]
+	rol	r11d,30
+	add	esi,eax
+	rol	ebp,1
+	mov	DWORD PTR[36+rsp],ebp
+	mov	edx,DWORD PTR[40+rsp]
+	mov	eax,r11d
+	mov	ecx,esi
+	xor	edx,DWORD PTR[48+rsp]
+	xor	eax,edi
+	rol	ecx,5
+	lea	r13d,DWORD PTR[1859775393+r13*1+rbp]
+	xor	edx,DWORD PTR[8+rsp]
+	xor	eax,r12d
+	add	r13d,ecx
+	xor	edx,DWORD PTR[28+rsp]
+	rol	edi,30
+	add	r13d,eax
+	rol	edx,1
+	mov	DWORD PTR[40+rsp],edx
+	mov	ebp,DWORD PTR[44+rsp]
+	mov	eax,edi
+	mov	ecx,r13d
+	xor	ebp,DWORD PTR[52+rsp]
+	xor	eax,esi
+	rol	ecx,5
+	lea	r12d,DWORD PTR[1859775393+r12*1+rdx]
+	xor	ebp,DWORD PTR[12+rsp]
+	xor	eax,r11d
+	add	r12d,ecx
+	xor	ebp,DWORD PTR[32+rsp]
+	rol	esi,30
+	add	r12d,eax
+	rol	ebp,1
+	mov	DWORD PTR[44+rsp],ebp
+	mov	edx,DWORD PTR[48+rsp]
+	mov	eax,esi
+	mov	ecx,r12d
+	xor	edx,DWORD PTR[56+rsp]
+	xor	eax,r13d
+	rol	ecx,5
+	lea	r11d,DWORD PTR[1859775393+r11*1+rbp]
+	xor	edx,DWORD PTR[16+rsp]
+	xor	eax,edi
+	add	r11d,ecx
+	xor	edx,DWORD PTR[36+rsp]
+	rol	r13d,30
+	add	r11d,eax
+	rol	edx,1
+	mov	DWORD PTR[48+rsp],edx
+	mov	ebp,DWORD PTR[52+rsp]
+	mov	eax,r13d
+	mov	ecx,r11d
+	xor	ebp,DWORD PTR[60+rsp]
+	xor	eax,r12d
+	rol	ecx,5
+	lea	edi,DWORD PTR[1859775393+rdi*1+rdx]
+	xor	ebp,DWORD PTR[20+rsp]
+	xor	eax,esi
+	add	edi,ecx
+	xor	ebp,DWORD PTR[40+rsp]
+	rol	r12d,30
+	add	edi,eax
+	rol	ebp,1
+	mov	DWORD PTR[52+rsp],ebp
+	mov	edx,DWORD PTR[56+rsp]
+	mov	eax,r12d
+	mov	ecx,edi
+	xor	edx,DWORD PTR[rsp]
+	xor	eax,r11d
+	rol	ecx,5
+	lea	esi,DWORD PTR[1859775393+rsi*1+rbp]
+	xor	edx,DWORD PTR[24+rsp]
+	xor	eax,r13d
+	add	esi,ecx
+	xor	edx,DWORD PTR[44+rsp]
+	rol	r11d,30
+	add	esi,eax
+	rol	edx,1
+	mov	DWORD PTR[56+rsp],edx
+	mov	ebp,DWORD PTR[60+rsp]
+	mov	eax,r11d
+	mov	ecx,esi
+	xor	ebp,DWORD PTR[4+rsp]
+	xor	eax,edi
+	rol	ecx,5
+	lea	r13d,DWORD PTR[1859775393+r13*1+rdx]
+	xor	ebp,DWORD PTR[28+rsp]
+	xor	eax,r12d
+	add	r13d,ecx
+	xor	ebp,DWORD PTR[48+rsp]
+	rol	edi,30
+	add	r13d,eax
+	rol	ebp,1
+	mov	DWORD PTR[60+rsp],ebp
+	mov	edx,DWORD PTR[rsp]
+	mov	eax,edi
+	mov	ecx,r13d
+	xor	edx,DWORD PTR[8+rsp]
+	xor	eax,esi
+	rol	ecx,5
+	lea	r12d,DWORD PTR[1859775393+r12*1+rbp]
+	xor	edx,DWORD PTR[32+rsp]
+	xor	eax,r11d
+	add	r12d,ecx
+	xor	edx,DWORD PTR[52+rsp]
+	rol	esi,30
+	add	r12d,eax
+	rol	edx,1
+	mov	DWORD PTR[rsp],edx
+	mov	ebp,DWORD PTR[4+rsp]
+	mov	eax,esi
+	mov	ecx,r12d
+	xor	ebp,DWORD PTR[12+rsp]
+	xor	eax,r13d
+	rol	ecx,5
+	lea	r11d,DWORD PTR[1859775393+r11*1+rdx]
+	xor	ebp,DWORD PTR[36+rsp]
+	xor	eax,edi
+	add	r11d,ecx
+	xor	ebp,DWORD PTR[56+rsp]
+	rol	r13d,30
+	add	r11d,eax
+	rol	ebp,1
+	mov	DWORD PTR[4+rsp],ebp
+	mov	edx,DWORD PTR[8+rsp]
+	mov	eax,r13d
+	mov	ecx,r11d
+	xor	edx,DWORD PTR[16+rsp]
+	xor	eax,r12d
+	rol	ecx,5
+	lea	edi,DWORD PTR[1859775393+rdi*1+rbp]
+	xor	edx,DWORD PTR[40+rsp]
+	xor	eax,esi
+	add	edi,ecx
+	xor	edx,DWORD PTR[60+rsp]
+	rol	r12d,30
+	add	edi,eax
+	rol	edx,1
+	mov	DWORD PTR[8+rsp],edx
+	mov	ebp,DWORD PTR[12+rsp]
+	mov	eax,r12d
+	mov	ecx,edi
+	xor	ebp,DWORD PTR[20+rsp]
+	xor	eax,r11d
+	rol	ecx,5
+	lea	esi,DWORD PTR[1859775393+rsi*1+rdx]
+	xor	ebp,DWORD PTR[44+rsp]
+	xor	eax,r13d
+	add	esi,ecx
+	xor	ebp,DWORD PTR[rsp]
+	rol	r11d,30
+	add	esi,eax
+	rol	ebp,1
+	mov	DWORD PTR[12+rsp],ebp
+	mov	edx,DWORD PTR[16+rsp]
+	mov	eax,r11d
+	mov	ecx,esi
+	xor	edx,DWORD PTR[24+rsp]
+	xor	eax,edi
+	rol	ecx,5
+	lea	r13d,DWORD PTR[1859775393+r13*1+rbp]
+	xor	edx,DWORD PTR[48+rsp]
+	xor	eax,r12d
+	add	r13d,ecx
+	xor	edx,DWORD PTR[4+rsp]
+	rol	edi,30
+	add	r13d,eax
+	rol	edx,1
+	mov	DWORD PTR[16+rsp],edx
+	mov	ebp,DWORD PTR[20+rsp]
+	mov	eax,edi
+	mov	ecx,r13d
+	xor	ebp,DWORD PTR[28+rsp]
+	xor	eax,esi
+	rol	ecx,5
+	lea	r12d,DWORD PTR[1859775393+r12*1+rdx]
+	xor	ebp,DWORD PTR[52+rsp]
+	xor	eax,r11d
+	add	r12d,ecx
+	xor	ebp,DWORD PTR[8+rsp]
+	rol	esi,30
+	add	r12d,eax
+	rol	ebp,1
+	mov	DWORD PTR[20+rsp],ebp
+	mov	edx,DWORD PTR[24+rsp]
+	mov	eax,esi
+	mov	ecx,r12d
+	xor	edx,DWORD PTR[32+rsp]
+	xor	eax,r13d
+	rol	ecx,5
+	lea	r11d,DWORD PTR[1859775393+r11*1+rbp]
+	xor	edx,DWORD PTR[56+rsp]
+	xor	eax,edi
+	add	r11d,ecx
+	xor	edx,DWORD PTR[12+rsp]
+	rol	r13d,30
+	add	r11d,eax
+	rol	edx,1
+	mov	DWORD PTR[24+rsp],edx
+	mov	ebp,DWORD PTR[28+rsp]
+	mov	eax,r13d
+	mov	ecx,r11d
+	xor	ebp,DWORD PTR[36+rsp]
+	xor	eax,r12d
+	rol	ecx,5
+	lea	edi,DWORD PTR[1859775393+rdi*1+rdx]
+	xor	ebp,DWORD PTR[60+rsp]
+	xor	eax,esi
+	add	edi,ecx
+	xor	ebp,DWORD PTR[16+rsp]
+	rol	r12d,30
+	add	edi,eax
+	rol	ebp,1
+	mov	DWORD PTR[28+rsp],ebp
+	mov	edx,DWORD PTR[32+rsp]
+	mov	eax,r12d
+	mov	ecx,edi
+	xor	edx,DWORD PTR[40+rsp]
+	xor	eax,r11d
+	rol	ecx,5
+	lea	esi,DWORD PTR[1859775393+rsi*1+rbp]
+	xor	edx,DWORD PTR[rsp]
+	xor	eax,r13d
+	add	esi,ecx
+	xor	edx,DWORD PTR[20+rsp]
+	rol	r11d,30
+	add	esi,eax
+	rol	edx,1
+	mov	DWORD PTR[32+rsp],edx
+	mov	ebp,DWORD PTR[36+rsp]
+	mov	eax,r11d
+	mov	ebx,r11d
+	xor	ebp,DWORD PTR[44+rsp]
+	and	eax,r12d
+	mov	ecx,esi
+	xor	ebp,DWORD PTR[4+rsp]
+	xor	ebx,r12d
+	lea	r13d,DWORD PTR[((-1894007588))+r13*1+rdx]
+	rol	ecx,5
+	xor	ebp,DWORD PTR[24+rsp]
+	add	r13d,eax
+	and	ebx,edi
+	rol	ebp,1
+	add	r13d,ebx
+	rol	edi,30
+	mov	DWORD PTR[36+rsp],ebp
+	add	r13d,ecx
+	mov	edx,DWORD PTR[40+rsp]
+	mov	eax,edi
+	mov	ebx,edi
+	xor	edx,DWORD PTR[48+rsp]
+	and	eax,r11d
+	mov	ecx,r13d
+	xor	edx,DWORD PTR[8+rsp]
+	xor	ebx,r11d
+	lea	r12d,DWORD PTR[((-1894007588))+r12*1+rbp]
+	rol	ecx,5
+	xor	edx,DWORD PTR[28+rsp]
+	add	r12d,eax
+	and	ebx,esi
+	rol	edx,1
+	add	r12d,ebx
+	rol	esi,30
+	mov	DWORD PTR[40+rsp],edx
+	add	r12d,ecx
+	mov	ebp,DWORD PTR[44+rsp]
+	mov	eax,esi
+	mov	ebx,esi
+	xor	ebp,DWORD PTR[52+rsp]
+	and	eax,edi
+	mov	ecx,r12d
+	xor	ebp,DWORD PTR[12+rsp]
+	xor	ebx,edi
+	lea	r11d,DWORD PTR[((-1894007588))+r11*1+rdx]
+	rol	ecx,5
+	xor	ebp,DWORD PTR[32+rsp]
+	add	r11d,eax
+	and	ebx,r13d
+	rol	ebp,1
+	add	r11d,ebx
+	rol	r13d,30
+	mov	DWORD PTR[44+rsp],ebp
+	add	r11d,ecx
+	mov	edx,DWORD PTR[48+rsp]
+	mov	eax,r13d
+	mov	ebx,r13d
+	xor	edx,DWORD PTR[56+rsp]
+	and	eax,esi
+	mov	ecx,r11d
+	xor	edx,DWORD PTR[16+rsp]
+	xor	ebx,esi
+	lea	edi,DWORD PTR[((-1894007588))+rdi*1+rbp]
+	rol	ecx,5
+	xor	edx,DWORD PTR[36+rsp]
+	add	edi,eax
+	and	ebx,r12d
+	rol	edx,1
+	add	edi,ebx
+	rol	r12d,30
+	mov	DWORD PTR[48+rsp],edx
+	add	edi,ecx
+	mov	ebp,DWORD PTR[52+rsp]
+	mov	eax,r12d
+	mov	ebx,r12d
+	xor	ebp,DWORD PTR[60+rsp]
+	and	eax,r13d
+	mov	ecx,edi
+	xor	ebp,DWORD PTR[20+rsp]
+	xor	ebx,r13d
+	lea	esi,DWORD PTR[((-1894007588))+rsi*1+rdx]
+	rol	ecx,5
+	xor	ebp,DWORD PTR[40+rsp]
+	add	esi,eax
+	and	ebx,r11d
+	rol	ebp,1
+	add	esi,ebx
+	rol	r11d,30
+	mov	DWORD PTR[52+rsp],ebp
+	add	esi,ecx
+	mov	edx,DWORD PTR[56+rsp]
+	mov	eax,r11d
+	mov	ebx,r11d
+	xor	edx,DWORD PTR[rsp]
+	and	eax,r12d
+	mov	ecx,esi
+	xor	edx,DWORD PTR[24+rsp]
+	xor	ebx,r12d
+	lea	r13d,DWORD PTR[((-1894007588))+r13*1+rbp]
+	rol	ecx,5
+	xor	edx,DWORD PTR[44+rsp]
+	add	r13d,eax
+	and	ebx,edi
+	rol	edx,1
+	add	r13d,ebx
+	rol	edi,30
+	mov	DWORD PTR[56+rsp],edx
+	add	r13d,ecx
+	mov	ebp,DWORD PTR[60+rsp]
+	mov	eax,edi
+	mov	ebx,edi
+	xor	ebp,DWORD PTR[4+rsp]
+	and	eax,r11d
+	mov	ecx,r13d
+	xor	ebp,DWORD PTR[28+rsp]
+	xor	ebx,r11d
+	lea	r12d,DWORD PTR[((-1894007588))+r12*1+rdx]
+	rol	ecx,5
+	xor	ebp,DWORD PTR[48+rsp]
+	add	r12d,eax
+	and	ebx,esi
+	rol	ebp,1
+	add	r12d,ebx
+	rol	esi,30
+	mov	DWORD PTR[60+rsp],ebp
+	add	r12d,ecx
+	mov	edx,DWORD PTR[rsp]
+	mov	eax,esi
+	mov	ebx,esi
+	xor	edx,DWORD PTR[8+rsp]
+	and	eax,edi
+	mov	ecx,r12d
+	xor	edx,DWORD PTR[32+rsp]
+	xor	ebx,edi
+	lea	r11d,DWORD PTR[((-1894007588))+r11*1+rbp]
+	rol	ecx,5
+	xor	edx,DWORD PTR[52+rsp]
+	add	r11d,eax
+	and	ebx,r13d
+	rol	edx,1
+	add	r11d,ebx
+	rol	r13d,30
+	mov	DWORD PTR[rsp],edx
+	add	r11d,ecx
+	mov	ebp,DWORD PTR[4+rsp]
+	mov	eax,r13d
+	mov	ebx,r13d
+	xor	ebp,DWORD PTR[12+rsp]
+	and	eax,esi
+	mov	ecx,r11d
+	xor	ebp,DWORD PTR[36+rsp]
+	xor	ebx,esi
+	lea	edi,DWORD PTR[((-1894007588))+rdi*1+rdx]
+	rol	ecx,5
+	xor	ebp,DWORD PTR[56+rsp]
+	add	edi,eax
+	and	ebx,r12d
+	rol	ebp,1
+	add	edi,ebx
+	rol	r12d,30
+	mov	DWORD PTR[4+rsp],ebp
+	add	edi,ecx
+	mov	edx,DWORD PTR[8+rsp]
+	mov	eax,r12d
+	mov	ebx,r12d
+	xor	edx,DWORD PTR[16+rsp]
+	and	eax,r13d
+	mov	ecx,edi
+	xor	edx,DWORD PTR[40+rsp]
+	xor	ebx,r13d
+	lea	esi,DWORD PTR[((-1894007588))+rsi*1+rbp]
+	rol	ecx,5
+	xor	edx,DWORD PTR[60+rsp]
+	add	esi,eax
+	and	ebx,r11d
+	rol	edx,1
+	add	esi,ebx
+	rol	r11d,30
+	mov	DWORD PTR[8+rsp],edx
+	add	esi,ecx
+	mov	ebp,DWORD PTR[12+rsp]
+	mov	eax,r11d
+	mov	ebx,r11d
+	xor	ebp,DWORD PTR[20+rsp]
+	and	eax,r12d
+	mov	ecx,esi
+	xor	ebp,DWORD PTR[44+rsp]
+	xor	ebx,r12d
+	lea	r13d,DWORD PTR[((-1894007588))+r13*1+rdx]
+	rol	ecx,5
+	xor	ebp,DWORD PTR[rsp]
+	add	r13d,eax
+	and	ebx,edi
+	rol	ebp,1
+	add	r13d,ebx
+	rol	edi,30
+	mov	DWORD PTR[12+rsp],ebp
+	add	r13d,ecx
+	mov	edx,DWORD PTR[16+rsp]
+	mov	eax,edi
+	mov	ebx,edi
+	xor	edx,DWORD PTR[24+rsp]
+	and	eax,r11d
+	mov	ecx,r13d
+	xor	edx,DWORD PTR[48+rsp]
+	xor	ebx,r11d
+	lea	r12d,DWORD PTR[((-1894007588))+r12*1+rbp]
+	rol	ecx,5
+	xor	edx,DWORD PTR[4+rsp]
+	add	r12d,eax
+	and	ebx,esi
+	rol	edx,1
+	add	r12d,ebx
+	rol	esi,30
+	mov	DWORD PTR[16+rsp],edx
+	add	r12d,ecx
+	mov	ebp,DWORD PTR[20+rsp]
+	mov	eax,esi
+	mov	ebx,esi
+	xor	ebp,DWORD PTR[28+rsp]
+	and	eax,edi
+	mov	ecx,r12d
+	xor	ebp,DWORD PTR[52+rsp]
+	xor	ebx,edi
+	lea	r11d,DWORD PTR[((-1894007588))+r11*1+rdx]
+	rol	ecx,5
+	xor	ebp,DWORD PTR[8+rsp]
+	add	r11d,eax
+	and	ebx,r13d
+	rol	ebp,1
+	add	r11d,ebx
+	rol	r13d,30
+	mov	DWORD PTR[20+rsp],ebp
+	add	r11d,ecx
+	mov	edx,DWORD PTR[24+rsp]
+	mov	eax,r13d
+	mov	ebx,r13d
+	xor	edx,DWORD PTR[32+rsp]
+	and	eax,esi
+	mov	ecx,r11d
+	xor	edx,DWORD PTR[56+rsp]
+	xor	ebx,esi
+	lea	edi,DWORD PTR[((-1894007588))+rdi*1+rbp]
+	rol	ecx,5
+	xor	edx,DWORD PTR[12+rsp]
+	add	edi,eax
+	and	ebx,r12d
+	rol	edx,1
+	add	edi,ebx
+	rol	r12d,30
+	mov	DWORD PTR[24+rsp],edx
+	add	edi,ecx
+	mov	ebp,DWORD PTR[28+rsp]
+	mov	eax,r12d
+	mov	ebx,r12d
+	xor	ebp,DWORD PTR[36+rsp]
+	and	eax,r13d
+	mov	ecx,edi
+	xor	ebp,DWORD PTR[60+rsp]
+	xor	ebx,r13d
+	lea	esi,DWORD PTR[((-1894007588))+rsi*1+rdx]
+	rol	ecx,5
+	xor	ebp,DWORD PTR[16+rsp]
+	add	esi,eax
+	and	ebx,r11d
+	rol	ebp,1
+	add	esi,ebx
+	rol	r11d,30
+	mov	DWORD PTR[28+rsp],ebp
+	add	esi,ecx
+	mov	edx,DWORD PTR[32+rsp]
+	mov	eax,r11d
+	mov	ebx,r11d
+	xor	edx,DWORD PTR[40+rsp]
+	and	eax,r12d
+	mov	ecx,esi
+	xor	edx,DWORD PTR[rsp]
+	xor	ebx,r12d
+	lea	r13d,DWORD PTR[((-1894007588))+r13*1+rbp]
+	rol	ecx,5
+	xor	edx,DWORD PTR[20+rsp]
+	add	r13d,eax
+	and	ebx,edi
+	rol	edx,1
+	add	r13d,ebx
+	rol	edi,30
+	mov	DWORD PTR[32+rsp],edx
+	add	r13d,ecx
+	mov	ebp,DWORD PTR[36+rsp]
+	mov	eax,edi
+	mov	ebx,edi
+	xor	ebp,DWORD PTR[44+rsp]
+	and	eax,r11d
+	mov	ecx,r13d
+	xor	ebp,DWORD PTR[4+rsp]
+	xor	ebx,r11d
+	lea	r12d,DWORD PTR[((-1894007588))+r12*1+rdx]
+	rol	ecx,5
+	xor	ebp,DWORD PTR[24+rsp]
+	add	r12d,eax
+	and	ebx,esi
+	rol	ebp,1
+	add	r12d,ebx
+	rol	esi,30
+	mov	DWORD PTR[36+rsp],ebp
+	add	r12d,ecx
+	mov	edx,DWORD PTR[40+rsp]
+	mov	eax,esi
+	mov	ebx,esi
+	xor	edx,DWORD PTR[48+rsp]
+	and	eax,edi
+	mov	ecx,r12d
+	xor	edx,DWORD PTR[8+rsp]
+	xor	ebx,edi
+	lea	r11d,DWORD PTR[((-1894007588))+r11*1+rbp]
+	rol	ecx,5
+	xor	edx,DWORD PTR[28+rsp]
+	add	r11d,eax
+	and	ebx,r13d
+	rol	edx,1
+	add	r11d,ebx
+	rol	r13d,30
+	mov	DWORD PTR[40+rsp],edx
+	add	r11d,ecx
+	mov	ebp,DWORD PTR[44+rsp]
+	mov	eax,r13d
+	mov	ebx,r13d
+	xor	ebp,DWORD PTR[52+rsp]
+	and	eax,esi
+	mov	ecx,r11d
+	xor	ebp,DWORD PTR[12+rsp]
+	xor	ebx,esi
+	lea	edi,DWORD PTR[((-1894007588))+rdi*1+rdx]
+	rol	ecx,5
+	xor	ebp,DWORD PTR[32+rsp]
+	add	edi,eax
+	and	ebx,r12d
+	rol	ebp,1
+	add	edi,ebx
+	rol	r12d,30
+	mov	DWORD PTR[44+rsp],ebp
+	add	edi,ecx
+	mov	edx,DWORD PTR[48+rsp]
+	mov	eax,r12d
+	mov	ebx,r12d
+	xor	edx,DWORD PTR[56+rsp]
+	and	eax,r13d
+	mov	ecx,edi
+	xor	edx,DWORD PTR[16+rsp]
+	xor	ebx,r13d
+	lea	esi,DWORD PTR[((-1894007588))+rsi*1+rbp]
+	rol	ecx,5
+	xor	edx,DWORD PTR[36+rsp]
+	add	esi,eax
+	and	ebx,r11d
+	rol	edx,1
+	add	esi,ebx
+	rol	r11d,30
+	mov	DWORD PTR[48+rsp],edx
+	add	esi,ecx
+	mov	ebp,DWORD PTR[52+rsp]
+	mov	eax,r11d
+	mov	ecx,esi
+	xor	ebp,DWORD PTR[60+rsp]
+	xor	eax,edi
+	rol	ecx,5
+	lea	r13d,DWORD PTR[((-899497514))+r13*1+rdx]
+	xor	ebp,DWORD PTR[20+rsp]
+	xor	eax,r12d
+	add	r13d,ecx
+	xor	ebp,DWORD PTR[40+rsp]
+	rol	edi,30
+	add	r13d,eax
+	rol	ebp,1
+	mov	DWORD PTR[52+rsp],ebp
+	mov	edx,DWORD PTR[56+rsp]
+	mov	eax,edi
+	mov	ecx,r13d
+	xor	edx,DWORD PTR[rsp]
+	xor	eax,esi
+	rol	ecx,5
+	lea	r12d,DWORD PTR[((-899497514))+r12*1+rbp]
+	xor	edx,DWORD PTR[24+rsp]
+	xor	eax,r11d
+	add	r12d,ecx
+	xor	edx,DWORD PTR[44+rsp]
+	rol	esi,30
+	add	r12d,eax
+	rol	edx,1
+	mov	DWORD PTR[56+rsp],edx
+	mov	ebp,DWORD PTR[60+rsp]
+	mov	eax,esi
+	mov	ecx,r12d
+	xor	ebp,DWORD PTR[4+rsp]
+	xor	eax,r13d
+	rol	ecx,5
+	lea	r11d,DWORD PTR[((-899497514))+r11*1+rdx]
+	xor	ebp,DWORD PTR[28+rsp]
+	xor	eax,edi
+	add	r11d,ecx
+	xor	ebp,DWORD PTR[48+rsp]
+	rol	r13d,30
+	add	r11d,eax
+	rol	ebp,1
+	mov	DWORD PTR[60+rsp],ebp
+	mov	edx,DWORD PTR[rsp]
+	mov	eax,r13d
+	mov	ecx,r11d
+	xor	edx,DWORD PTR[8+rsp]
+	xor	eax,r12d
+	rol	ecx,5
+	lea	edi,DWORD PTR[((-899497514))+rdi*1+rbp]
+	xor	edx,DWORD PTR[32+rsp]
+	xor	eax,esi
+	add	edi,ecx
+	xor	edx,DWORD PTR[52+rsp]
+	rol	r12d,30
+	add	edi,eax
+	rol	edx,1
+	mov	DWORD PTR[rsp],edx
+	mov	ebp,DWORD PTR[4+rsp]
+	mov	eax,r12d
+	mov	ecx,edi
+	xor	ebp,DWORD PTR[12+rsp]
+	xor	eax,r11d
+	rol	ecx,5
+	lea	esi,DWORD PTR[((-899497514))+rsi*1+rdx]
+	xor	ebp,DWORD PTR[36+rsp]
+	xor	eax,r13d
+	add	esi,ecx
+	xor	ebp,DWORD PTR[56+rsp]
+	rol	r11d,30
+	add	esi,eax
+	rol	ebp,1
+	mov	DWORD PTR[4+rsp],ebp
+	mov	edx,DWORD PTR[8+rsp]
+	mov	eax,r11d
+	mov	ecx,esi
+	xor	edx,DWORD PTR[16+rsp]
+	xor	eax,edi
+	rol	ecx,5
+	lea	r13d,DWORD PTR[((-899497514))+r13*1+rbp]
+	xor	edx,DWORD PTR[40+rsp]
+	xor	eax,r12d
+	add	r13d,ecx
+	xor	edx,DWORD PTR[60+rsp]
+	rol	edi,30
+	add	r13d,eax
+	rol	edx,1
+	mov	DWORD PTR[8+rsp],edx
+	mov	ebp,DWORD PTR[12+rsp]
+	mov	eax,edi
+	mov	ecx,r13d
+	xor	ebp,DWORD PTR[20+rsp]
+	xor	eax,esi
+	rol	ecx,5
+	lea	r12d,DWORD PTR[((-899497514))+r12*1+rdx]
+	xor	ebp,DWORD PTR[44+rsp]
+	xor	eax,r11d
+	add	r12d,ecx
+	xor	ebp,DWORD PTR[rsp]
+	rol	esi,30
+	add	r12d,eax
+	rol	ebp,1
+	mov	DWORD PTR[12+rsp],ebp
+	mov	edx,DWORD PTR[16+rsp]
+	mov	eax,esi
+	mov	ecx,r12d
+	xor	edx,DWORD PTR[24+rsp]
+	xor	eax,r13d
+	rol	ecx,5
+	lea	r11d,DWORD PTR[((-899497514))+r11*1+rbp]
+	xor	edx,DWORD PTR[48+rsp]
+	xor	eax,edi
+	add	r11d,ecx
+	xor	edx,DWORD PTR[4+rsp]
+	rol	r13d,30
+	add	r11d,eax
+	rol	edx,1
+	mov	DWORD PTR[16+rsp],edx
+	mov	ebp,DWORD PTR[20+rsp]
+	mov	eax,r13d
+	mov	ecx,r11d
+	xor	ebp,DWORD PTR[28+rsp]
+	xor	eax,r12d
+	rol	ecx,5
+	lea	edi,DWORD PTR[((-899497514))+rdi*1+rdx]
+	xor	ebp,DWORD PTR[52+rsp]
+	xor	eax,esi
+	add	edi,ecx
+	xor	ebp,DWORD PTR[8+rsp]
+	rol	r12d,30
+	add	edi,eax
+	rol	ebp,1
+	mov	DWORD PTR[20+rsp],ebp
+	mov	edx,DWORD PTR[24+rsp]
+	mov	eax,r12d
+	mov	ecx,edi
+	xor	edx,DWORD PTR[32+rsp]
+	xor	eax,r11d
+	rol	ecx,5
+	lea	esi,DWORD PTR[((-899497514))+rsi*1+rbp]
+	xor	edx,DWORD PTR[56+rsp]
+	xor	eax,r13d
+	add	esi,ecx
+	xor	edx,DWORD PTR[12+rsp]
+	rol	r11d,30
+	add	esi,eax
+	rol	edx,1
+	mov	DWORD PTR[24+rsp],edx
+	mov	ebp,DWORD PTR[28+rsp]
+	mov	eax,r11d
+	mov	ecx,esi
+	xor	ebp,DWORD PTR[36+rsp]
+	xor	eax,edi
+	rol	ecx,5
+	lea	r13d,DWORD PTR[((-899497514))+r13*1+rdx]
+	xor	ebp,DWORD PTR[60+rsp]
+	xor	eax,r12d
+	add	r13d,ecx
+	xor	ebp,DWORD PTR[16+rsp]
+	rol	edi,30
+	add	r13d,eax
+	rol	ebp,1
+	mov	DWORD PTR[28+rsp],ebp
+	mov	edx,DWORD PTR[32+rsp]
+	mov	eax,edi
+	mov	ecx,r13d
+	xor	edx,DWORD PTR[40+rsp]
+	xor	eax,esi
+	rol	ecx,5
+	lea	r12d,DWORD PTR[((-899497514))+r12*1+rbp]
+	xor	edx,DWORD PTR[rsp]
+	xor	eax,r11d
+	add	r12d,ecx
+	xor	edx,DWORD PTR[20+rsp]
+	rol	esi,30
+	add	r12d,eax
+	rol	edx,1
+	mov	DWORD PTR[32+rsp],edx
+	mov	ebp,DWORD PTR[36+rsp]
+	mov	eax,esi
+	mov	ecx,r12d
+	xor	ebp,DWORD PTR[44+rsp]
+	xor	eax,r13d
+	rol	ecx,5
+	lea	r11d,DWORD PTR[((-899497514))+r11*1+rdx]
+	xor	ebp,DWORD PTR[4+rsp]
+	xor	eax,edi
+	add	r11d,ecx
+	xor	ebp,DWORD PTR[24+rsp]
+	rol	r13d,30
+	add	r11d,eax
+	rol	ebp,1
+	mov	DWORD PTR[36+rsp],ebp
+	mov	edx,DWORD PTR[40+rsp]
+	mov	eax,r13d
+	mov	ecx,r11d
+	xor	edx,DWORD PTR[48+rsp]
+	xor	eax,r12d
+	rol	ecx,5
+	lea	edi,DWORD PTR[((-899497514))+rdi*1+rbp]
+	xor	edx,DWORD PTR[8+rsp]
+	xor	eax,esi
+	add	edi,ecx
+	xor	edx,DWORD PTR[28+rsp]
+	rol	r12d,30
+	add	edi,eax
+	rol	edx,1
+	mov	DWORD PTR[40+rsp],edx
+	mov	ebp,DWORD PTR[44+rsp]
+	mov	eax,r12d
+	mov	ecx,edi
+	xor	ebp,DWORD PTR[52+rsp]
+	xor	eax,r11d
+	rol	ecx,5
+	lea	esi,DWORD PTR[((-899497514))+rsi*1+rdx]
+	xor	ebp,DWORD PTR[12+rsp]
+	xor	eax,r13d
+	add	esi,ecx
+	xor	ebp,DWORD PTR[32+rsp]
+	rol	r11d,30
+	add	esi,eax
+	rol	ebp,1
+	mov	DWORD PTR[44+rsp],ebp
+	mov	edx,DWORD PTR[48+rsp]
+	mov	eax,r11d
+	mov	ecx,esi
+	xor	edx,DWORD PTR[56+rsp]
+	xor	eax,edi
+	rol	ecx,5
+	lea	r13d,DWORD PTR[((-899497514))+r13*1+rbp]
+	xor	edx,DWORD PTR[16+rsp]
+	xor	eax,r12d
+	add	r13d,ecx
+	xor	edx,DWORD PTR[36+rsp]
+	rol	edi,30
+	add	r13d,eax
+	rol	edx,1
+	mov	DWORD PTR[48+rsp],edx
+	mov	ebp,DWORD PTR[52+rsp]
+	mov	eax,edi
+	mov	ecx,r13d
+	xor	ebp,DWORD PTR[60+rsp]
+	xor	eax,esi
+	rol	ecx,5
+	lea	r12d,DWORD PTR[((-899497514))+r12*1+rdx]
+	xor	ebp,DWORD PTR[20+rsp]
+	xor	eax,r11d
+	add	r12d,ecx
+	xor	ebp,DWORD PTR[40+rsp]
+	rol	esi,30
+	add	r12d,eax
+	rol	ebp,1
+	mov	edx,DWORD PTR[56+rsp]
+	mov	eax,esi
+	mov	ecx,r12d
+	xor	edx,DWORD PTR[rsp]
+	xor	eax,r13d
+	rol	ecx,5
+	lea	r11d,DWORD PTR[((-899497514))+r11*1+rbp]
+	xor	edx,DWORD PTR[24+rsp]
+	xor	eax,edi
+	add	r11d,ecx
+	xor	edx,DWORD PTR[44+rsp]
+	rol	r13d,30
+	add	r11d,eax
+	rol	edx,1
+	mov	ebp,DWORD PTR[60+rsp]
+	mov	eax,r13d
+	mov	ecx,r11d
+	xor	ebp,DWORD PTR[4+rsp]
+	xor	eax,r12d
+	rol	ecx,5
+	lea	edi,DWORD PTR[((-899497514))+rdi*1+rdx]
+	xor	ebp,DWORD PTR[28+rsp]
+	xor	eax,esi
+	add	edi,ecx
+	xor	ebp,DWORD PTR[48+rsp]
+	rol	r12d,30
+	add	edi,eax
+	rol	ebp,1
+	mov	eax,r12d
+	mov	ecx,edi
+	xor	eax,r11d
+	lea	esi,DWORD PTR[((-899497514))+rsi*1+rbp]
+	rol	ecx,5
+	xor	eax,r13d
+	add	esi,ecx
+	rol	r11d,30
+	add	esi,eax
+	add	esi,DWORD PTR[r8]
+	add	edi,DWORD PTR[4+r8]
+	add	r11d,DWORD PTR[8+r8]
+	add	r12d,DWORD PTR[12+r8]
+	add	r13d,DWORD PTR[16+r8]
+	mov	DWORD PTR[r8],esi
+	mov	DWORD PTR[4+r8],edi
+	mov	DWORD PTR[8+r8],r11d
+	mov	DWORD PTR[12+r8],r12d
+	mov	DWORD PTR[16+r8],r13d
+
+	sub	r10,1
+	lea	r9,QWORD PTR[64+r9]
+	jnz	$L$loop
+
+	mov	rsi,QWORD PTR[64+rsp]
+	mov	r13,QWORD PTR[rsi]
+	mov	r12,QWORD PTR[8+rsi]
+	mov	rbp,QWORD PTR[16+rsi]
+	mov	rbx,QWORD PTR[24+rsi]
+	lea	rsp,QWORD PTR[32+rsi]
+$L$epilogue::
+	mov	rdi,QWORD PTR[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD PTR[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_sha1_block_data_order::
+sha1_block_data_order	ENDP
+
+ALIGN	16
+sha1_block_data_order_ssse3	PROC PRIVATE
+	mov	QWORD PTR[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD PTR[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_sha1_block_data_order_ssse3::
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+_ssse3_shortcut::
+	push	rbx
+	push	rbp
+	push	r12
+	lea	rsp,QWORD PTR[((-144))+rsp]
+	movaps	XMMWORD PTR[(64+0)+rsp],xmm6
+	movaps	XMMWORD PTR[(64+16)+rsp],xmm7
+	movaps	XMMWORD PTR[(64+32)+rsp],xmm8
+	movaps	XMMWORD PTR[(64+48)+rsp],xmm9
+	movaps	XMMWORD PTR[(64+64)+rsp],xmm10
+$L$prologue_ssse3::
+	mov	r8,rdi
+	mov	r9,rsi
+	mov	r10,rdx
+
+	shl	r10,6
+	add	r10,r9
+	lea	r11,QWORD PTR[K_XX_XX]
+
+	mov	eax,DWORD PTR[r8]
+	mov	ebx,DWORD PTR[4+r8]
+	mov	ecx,DWORD PTR[8+r8]
+	mov	edx,DWORD PTR[12+r8]
+	mov	esi,ebx
+	mov	ebp,DWORD PTR[16+r8]
+
+	movdqa	xmm6,XMMWORD PTR[64+r11]
+	movdqa	xmm9,XMMWORD PTR[r11]
+	movdqu	xmm0,XMMWORD PTR[r9]
+	movdqu	xmm1,XMMWORD PTR[16+r9]
+	movdqu	xmm2,XMMWORD PTR[32+r9]
+	movdqu	xmm3,XMMWORD PTR[48+r9]
+DB	102,15,56,0,198
+	add	r9,64
+DB	102,15,56,0,206
+DB	102,15,56,0,214
+DB	102,15,56,0,222
+	paddd	xmm0,xmm9
+	paddd	xmm1,xmm9
+	paddd	xmm2,xmm9
+	movdqa	XMMWORD PTR[rsp],xmm0
+	psubd	xmm0,xmm9
+	movdqa	XMMWORD PTR[16+rsp],xmm1
+	psubd	xmm1,xmm9
+	movdqa	XMMWORD PTR[32+rsp],xmm2
+	psubd	xmm2,xmm9
+	jmp	$L$oop_ssse3
+ALIGN	16
+$L$oop_ssse3::
+	movdqa	xmm4,xmm1
+	add	ebp,DWORD PTR[rsp]
+	xor	ecx,edx
+	movdqa	xmm8,xmm3
+DB	102,15,58,15,224,8
+	mov	edi,eax
+	rol	eax,5
+	paddd	xmm9,xmm3
+	and	esi,ecx
+	xor	ecx,edx
+	psrldq	xmm8,4
+	xor	esi,edx
+	add	ebp,eax
+	pxor	xmm4,xmm0
+	ror	ebx,2
+	add	ebp,esi
+	pxor	xmm8,xmm2
+	add	edx,DWORD PTR[4+rsp]
+	xor	ebx,ecx
+	mov	esi,ebp
+	rol	ebp,5
+	pxor	xmm4,xmm8
+	and	edi,ebx
+	xor	ebx,ecx
+	movdqa	XMMWORD PTR[48+rsp],xmm9
+	xor	edi,ecx
+	add	edx,ebp
+	movdqa	xmm10,xmm4
+	movdqa	xmm8,xmm4
+	ror	eax,7
+	add	edx,edi
+	add	ecx,DWORD PTR[8+rsp]
+	xor	eax,ebx
+	pslldq	xmm10,12
+	paddd	xmm4,xmm4
+	mov	edi,edx
+	rol	edx,5
+	and	esi,eax
+	xor	eax,ebx
+	psrld	xmm8,31
+	xor	esi,ebx
+	add	ecx,edx
+	movdqa	xmm9,xmm10
+	ror	ebp,7
+	add	ecx,esi
+	psrld	xmm10,30
+	por	xmm4,xmm8
+	add	ebx,DWORD PTR[12+rsp]
+	xor	ebp,eax
+	mov	esi,ecx
+	rol	ecx,5
+	pslld	xmm9,2
+	pxor	xmm4,xmm10
+	and	edi,ebp
+	xor	ebp,eax
+	movdqa	xmm10,XMMWORD PTR[r11]
+	xor	edi,eax
+	add	ebx,ecx
+	pxor	xmm4,xmm9
+	ror	edx,7
+	add	ebx,edi
+	movdqa	xmm5,xmm2
+	add	eax,DWORD PTR[16+rsp]
+	xor	edx,ebp
+	movdqa	xmm9,xmm4
+DB	102,15,58,15,233,8
+	mov	edi,ebx
+	rol	ebx,5
+	paddd	xmm10,xmm4
+	and	esi,edx
+	xor	edx,ebp
+	psrldq	xmm9,4
+	xor	esi,ebp
+	add	eax,ebx
+	pxor	xmm5,xmm1
+	ror	ecx,7
+	add	eax,esi
+	pxor	xmm9,xmm3
+	add	ebp,DWORD PTR[20+rsp]
+	xor	ecx,edx
+	mov	esi,eax
+	rol	eax,5
+	pxor	xmm5,xmm9
+	and	edi,ecx
+	xor	ecx,edx
+	movdqa	XMMWORD PTR[rsp],xmm10
+	xor	edi,edx
+	add	ebp,eax
+	movdqa	xmm8,xmm5
+	movdqa	xmm9,xmm5
+	ror	ebx,7
+	add	ebp,edi
+	add	edx,DWORD PTR[24+rsp]
+	xor	ebx,ecx
+	pslldq	xmm8,12
+	paddd	xmm5,xmm5
+	mov	edi,ebp
+	rol	ebp,5
+	and	esi,ebx
+	xor	ebx,ecx
+	psrld	xmm9,31
+	xor	esi,ecx
+	add	edx,ebp
+	movdqa	xmm10,xmm8
+	ror	eax,7
+	add	edx,esi
+	psrld	xmm8,30
+	por	xmm5,xmm9
+	add	ecx,DWORD PTR[28+rsp]
+	xor	eax,ebx
+	mov	esi,edx
+	rol	edx,5
+	pslld	xmm10,2
+	pxor	xmm5,xmm8
+	and	edi,eax
+	xor	eax,ebx
+	movdqa	xmm8,XMMWORD PTR[16+r11]
+	xor	edi,ebx
+	add	ecx,edx
+	pxor	xmm5,xmm10
+	ror	ebp,7
+	add	ecx,edi
+	movdqa	xmm6,xmm3
+	add	ebx,DWORD PTR[32+rsp]
+	xor	ebp,eax
+	movdqa	xmm10,xmm5
+DB	102,15,58,15,242,8
+	mov	edi,ecx
+	rol	ecx,5
+	paddd	xmm8,xmm5
+	and	esi,ebp
+	xor	ebp,eax
+	psrldq	xmm10,4
+	xor	esi,eax
+	add	ebx,ecx
+	pxor	xmm6,xmm2
+	ror	edx,7
+	add	ebx,esi
+	pxor	xmm10,xmm4
+	add	eax,DWORD PTR[36+rsp]
+	xor	edx,ebp
+	mov	esi,ebx
+	rol	ebx,5
+	pxor	xmm6,xmm10
+	and	edi,edx
+	xor	edx,ebp
+	movdqa	XMMWORD PTR[16+rsp],xmm8
+	xor	edi,ebp
+	add	eax,ebx
+	movdqa	xmm9,xmm6
+	movdqa	xmm10,xmm6
+	ror	ecx,7
+	add	eax,edi
+	add	ebp,DWORD PTR[40+rsp]
+	xor	ecx,edx
+	pslldq	xmm9,12
+	paddd	xmm6,xmm6
+	mov	edi,eax
+	rol	eax,5
+	and	esi,ecx
+	xor	ecx,edx
+	psrld	xmm10,31
+	xor	esi,edx
+	add	ebp,eax
+	movdqa	xmm8,xmm9
+	ror	ebx,7
+	add	ebp,esi
+	psrld	xmm9,30
+	por	xmm6,xmm10
+	add	edx,DWORD PTR[44+rsp]
+	xor	ebx,ecx
+	mov	esi,ebp
+	rol	ebp,5
+	pslld	xmm8,2
+	pxor	xmm6,xmm9
+	and	edi,ebx
+	xor	ebx,ecx
+	movdqa	xmm9,XMMWORD PTR[16+r11]
+	xor	edi,ecx
+	add	edx,ebp
+	pxor	xmm6,xmm8
+	ror	eax,7
+	add	edx,edi
+	movdqa	xmm7,xmm4
+	add	ecx,DWORD PTR[48+rsp]
+	xor	eax,ebx
+	movdqa	xmm8,xmm6
+DB	102,15,58,15,251,8
+	mov	edi,edx
+	rol	edx,5
+	paddd	xmm9,xmm6
+	and	esi,eax
+	xor	eax,ebx
+	psrldq	xmm8,4
+	xor	esi,ebx
+	add	ecx,edx
+	pxor	xmm7,xmm3
+	ror	ebp,7
+	add	ecx,esi
+	pxor	xmm8,xmm5
+	add	ebx,DWORD PTR[52+rsp]
+	xor	ebp,eax
+	mov	esi,ecx
+	rol	ecx,5
+	pxor	xmm7,xmm8
+	and	edi,ebp
+	xor	ebp,eax
+	movdqa	XMMWORD PTR[32+rsp],xmm9
+	xor	edi,eax
+	add	ebx,ecx
+	movdqa	xmm10,xmm7
+	movdqa	xmm8,xmm7
+	ror	edx,7
+	add	ebx,edi
+	add	eax,DWORD PTR[56+rsp]
+	xor	edx,ebp
+	pslldq	xmm10,12
+	paddd	xmm7,xmm7
+	mov	edi,ebx
+	rol	ebx,5
+	and	esi,edx
+	xor	edx,ebp
+	psrld	xmm8,31
+	xor	esi,ebp
+	add	eax,ebx
+	movdqa	xmm9,xmm10
+	ror	ecx,7
+	add	eax,esi
+	psrld	xmm10,30
+	por	xmm7,xmm8
+	add	ebp,DWORD PTR[60+rsp]
+	xor	ecx,edx
+	mov	esi,eax
+	rol	eax,5
+	pslld	xmm9,2
+	pxor	xmm7,xmm10
+	and	edi,ecx
+	xor	ecx,edx
+	movdqa	xmm10,XMMWORD PTR[16+r11]
+	xor	edi,edx
+	add	ebp,eax
+	pxor	xmm7,xmm9
+	ror	ebx,7
+	add	ebp,edi
+	movdqa	xmm9,xmm7
+	add	edx,DWORD PTR[rsp]
+	pxor	xmm0,xmm4
+DB	102,68,15,58,15,206,8
+	xor	ebx,ecx
+	mov	edi,ebp
+	rol	ebp,5
+	pxor	xmm0,xmm1
+	and	esi,ebx
+	xor	ebx,ecx
+	movdqa	xmm8,xmm10
+	paddd	xmm10,xmm7
+	xor	esi,ecx
+	add	edx,ebp
+	pxor	xmm0,xmm9
+	ror	eax,7
+	add	edx,esi
+	add	ecx,DWORD PTR[4+rsp]
+	xor	eax,ebx
+	movdqa	xmm9,xmm0
+	movdqa	XMMWORD PTR[48+rsp],xmm10
+	mov	esi,edx
+	rol	edx,5
+	and	edi,eax
+	xor	eax,ebx
+	pslld	xmm0,2
+	xor	edi,ebx
+	add	ecx,edx
+	psrld	xmm9,30
+	ror	ebp,7
+	add	ecx,edi
+	add	ebx,DWORD PTR[8+rsp]
+	xor	ebp,eax
+	mov	edi,ecx
+	rol	ecx,5
+	por	xmm0,xmm9
+	and	esi,ebp
+	xor	ebp,eax
+	movdqa	xmm10,xmm0
+	xor	esi,eax
+	add	ebx,ecx
+	ror	edx,7
+	add	ebx,esi
+	add	eax,DWORD PTR[12+rsp]
+	xor	edx,ebp
+	mov	esi,ebx
+	rol	ebx,5
+	and	edi,edx
+	xor	edx,ebp
+	xor	edi,ebp
+	add	eax,ebx
+	ror	ecx,7
+	add	eax,edi
+	add	ebp,DWORD PTR[16+rsp]
+	pxor	xmm1,xmm5
+DB	102,68,15,58,15,215,8
+	xor	esi,edx
+	mov	edi,eax
+	rol	eax,5
+	pxor	xmm1,xmm2
+	xor	esi,ecx
+	add	ebp,eax
+	movdqa	xmm9,xmm8
+	paddd	xmm8,xmm0
+	ror	ebx,7
+	add	ebp,esi
+	pxor	xmm1,xmm10
+	add	edx,DWORD PTR[20+rsp]
+	xor	edi,ecx
+	mov	esi,ebp
+	rol	ebp,5
+	movdqa	xmm10,xmm1
+	movdqa	XMMWORD PTR[rsp],xmm8
+	xor	edi,ebx
+	add	edx,ebp
+	ror	eax,7
+	add	edx,edi
+	pslld	xmm1,2
+	add	ecx,DWORD PTR[24+rsp]
+	xor	esi,ebx
+	psrld	xmm10,30
+	mov	edi,edx
+	rol	edx,5
+	xor	esi,eax
+	add	ecx,edx
+	ror	ebp,7
+	add	ecx,esi
+	por	xmm1,xmm10
+	add	ebx,DWORD PTR[28+rsp]
+	xor	edi,eax
+	movdqa	xmm8,xmm1
+	mov	esi,ecx
+	rol	ecx,5
+	xor	edi,ebp
+	add	ebx,ecx
+	ror	edx,7
+	add	ebx,edi
+	add	eax,DWORD PTR[32+rsp]
+	pxor	xmm2,xmm6
+DB	102,68,15,58,15,192,8
+	xor	esi,ebp
+	mov	edi,ebx
+	rol	ebx,5
+	pxor	xmm2,xmm3
+	xor	esi,edx
+	add	eax,ebx
+	movdqa	xmm10,XMMWORD PTR[32+r11]
+	paddd	xmm9,xmm1
+	ror	ecx,7
+	add	eax,esi
+	pxor	xmm2,xmm8
+	add	ebp,DWORD PTR[36+rsp]
+	xor	edi,edx
+	mov	esi,eax
+	rol	eax,5
+	movdqa	xmm8,xmm2
+	movdqa	XMMWORD PTR[16+rsp],xmm9
+	xor	edi,ecx
+	add	ebp,eax
+	ror	ebx,7
+	add	ebp,edi
+	pslld	xmm2,2
+	add	edx,DWORD PTR[40+rsp]
+	xor	esi,ecx
+	psrld	xmm8,30
+	mov	edi,ebp
+	rol	ebp,5
+	xor	esi,ebx
+	add	edx,ebp
+	ror	eax,7
+	add	edx,esi
+	por	xmm2,xmm8
+	add	ecx,DWORD PTR[44+rsp]
+	xor	edi,ebx
+	movdqa	xmm9,xmm2
+	mov	esi,edx
+	rol	edx,5
+	xor	edi,eax
+	add	ecx,edx
+	ror	ebp,7
+	add	ecx,edi
+	add	ebx,DWORD PTR[48+rsp]
+	pxor	xmm3,xmm7
+DB	102,68,15,58,15,201,8
+	xor	esi,eax
+	mov	edi,ecx
+	rol	ecx,5
+	pxor	xmm3,xmm4
+	xor	esi,ebp
+	add	ebx,ecx
+	movdqa	xmm8,xmm10
+	paddd	xmm10,xmm2
+	ror	edx,7
+	add	ebx,esi
+	pxor	xmm3,xmm9
+	add	eax,DWORD PTR[52+rsp]
+	xor	edi,ebp
+	mov	esi,ebx
+	rol	ebx,5
+	movdqa	xmm9,xmm3
+	movdqa	XMMWORD PTR[32+rsp],xmm10
+	xor	edi,edx
+	add	eax,ebx
+	ror	ecx,7
+	add	eax,edi
+	pslld	xmm3,2
+	add	ebp,DWORD PTR[56+rsp]
+	xor	esi,edx
+	psrld	xmm9,30
+	mov	edi,eax
+	rol	eax,5
+	xor	esi,ecx
+	add	ebp,eax
+	ror	ebx,7
+	add	ebp,esi
+	por	xmm3,xmm9
+	add	edx,DWORD PTR[60+rsp]
+	xor	edi,ecx
+	movdqa	xmm10,xmm3
+	mov	esi,ebp
+	rol	ebp,5
+	xor	edi,ebx
+	add	edx,ebp
+	ror	eax,7
+	add	edx,edi
+	add	ecx,DWORD PTR[rsp]
+	pxor	xmm4,xmm0
+DB	102,68,15,58,15,210,8
+	xor	esi,ebx
+	mov	edi,edx
+	rol	edx,5
+	pxor	xmm4,xmm5
+	xor	esi,eax
+	add	ecx,edx
+	movdqa	xmm9,xmm8
+	paddd	xmm8,xmm3
+	ror	ebp,7
+	add	ecx,esi
+	pxor	xmm4,xmm10
+	add	ebx,DWORD PTR[4+rsp]
+	xor	edi,eax
+	mov	esi,ecx
+	rol	ecx,5
+	movdqa	xmm10,xmm4
+	movdqa	XMMWORD PTR[48+rsp],xmm8
+	xor	edi,ebp
+	add	ebx,ecx
+	ror	edx,7
+	add	ebx,edi
+	pslld	xmm4,2
+	add	eax,DWORD PTR[8+rsp]
+	xor	esi,ebp
+	psrld	xmm10,30
+	mov	edi,ebx
+	rol	ebx,5
+	xor	esi,edx
+	add	eax,ebx
+	ror	ecx,7
+	add	eax,esi
+	por	xmm4,xmm10
+	add	ebp,DWORD PTR[12+rsp]
+	xor	edi,edx
+	movdqa	xmm8,xmm4
+	mov	esi,eax
+	rol	eax,5
+	xor	edi,ecx
+	add	ebp,eax
+	ror	ebx,7
+	add	ebp,edi
+	add	edx,DWORD PTR[16+rsp]
+	pxor	xmm5,xmm1
+DB	102,68,15,58,15,195,8
+	xor	esi,ecx
+	mov	edi,ebp
+	rol	ebp,5
+	pxor	xmm5,xmm6
+	xor	esi,ebx
+	add	edx,ebp
+	movdqa	xmm10,xmm9
+	paddd	xmm9,xmm4
+	ror	eax,7
+	add	edx,esi
+	pxor	xmm5,xmm8
+	add	ecx,DWORD PTR[20+rsp]
+	xor	edi,ebx
+	mov	esi,edx
+	rol	edx,5
+	movdqa	xmm8,xmm5
+	movdqa	XMMWORD PTR[rsp],xmm9
+	xor	edi,eax
+	add	ecx,edx
+	ror	ebp,7
+	add	ecx,edi
+	pslld	xmm5,2
+	add	ebx,DWORD PTR[24+rsp]
+	xor	esi,eax
+	psrld	xmm8,30
+	mov	edi,ecx
+	rol	ecx,5
+	xor	esi,ebp
+	add	ebx,ecx
+	ror	edx,7
+	add	ebx,esi
+	por	xmm5,xmm8
+	add	eax,DWORD PTR[28+rsp]
+	xor	edi,ebp
+	movdqa	xmm9,xmm5
+	mov	esi,ebx
+	rol	ebx,5
+	xor	edi,edx
+	add	eax,ebx
+	ror	ecx,7
+	add	eax,edi
+	mov	edi,ecx
+	pxor	xmm6,xmm2
+DB	102,68,15,58,15,204,8
+	xor	ecx,edx
+	add	ebp,DWORD PTR[32+rsp]
+	and	edi,edx
+	pxor	xmm6,xmm7
+	and	esi,ecx
+	ror	ebx,7
+	movdqa	xmm8,xmm10
+	paddd	xmm10,xmm5
+	add	ebp,edi
+	mov	edi,eax
+	pxor	xmm6,xmm9
+	rol	eax,5
+	add	ebp,esi
+	xor	ecx,edx
+	add	ebp,eax
+	movdqa	xmm9,xmm6
+	movdqa	XMMWORD PTR[16+rsp],xmm10
+	mov	esi,ebx
+	xor	ebx,ecx
+	add	edx,DWORD PTR[36+rsp]
+	and	esi,ecx
+	pslld	xmm6,2
+	and	edi,ebx
+	ror	eax,7
+	psrld	xmm9,30
+	add	edx,esi
+	mov	esi,ebp
+	rol	ebp,5
+	add	edx,edi
+	xor	ebx,ecx
+	add	edx,ebp
+	por	xmm6,xmm9
+	mov	edi,eax
+	xor	eax,ebx
+	movdqa	xmm10,xmm6
+	add	ecx,DWORD PTR[40+rsp]
+	and	edi,ebx
+	and	esi,eax
+	ror	ebp,7
+	add	ecx,edi
+	mov	edi,edx
+	rol	edx,5
+	add	ecx,esi
+	xor	eax,ebx
+	add	ecx,edx
+	mov	esi,ebp
+	xor	ebp,eax
+	add	ebx,DWORD PTR[44+rsp]
+	and	esi,eax
+	and	edi,ebp
+	ror	edx,7
+	add	ebx,esi
+	mov	esi,ecx
+	rol	ecx,5
+	add	ebx,edi
+	xor	ebp,eax
+	add	ebx,ecx
+	mov	edi,edx
+	pxor	xmm7,xmm3
+DB	102,68,15,58,15,213,8
+	xor	edx,ebp
+	add	eax,DWORD PTR[48+rsp]
+	and	edi,ebp
+	pxor	xmm7,xmm0
+	and	esi,edx
+	ror	ecx,7
+	movdqa	xmm9,XMMWORD PTR[48+r11]
+	paddd	xmm8,xmm6
+	add	eax,edi
+	mov	edi,ebx
+	pxor	xmm7,xmm10
+	rol	ebx,5
+	add	eax,esi
+	xor	edx,ebp
+	add	eax,ebx
+	movdqa	xmm10,xmm7
+	movdqa	XMMWORD PTR[32+rsp],xmm8
+	mov	esi,ecx
+	xor	ecx,edx
+	add	ebp,DWORD PTR[52+rsp]
+	and	esi,edx
+	pslld	xmm7,2
+	and	edi,ecx
+	ror	ebx,7
+	psrld	xmm10,30
+	add	ebp,esi
+	mov	esi,eax
+	rol	eax,5
+	add	ebp,edi
+	xor	ecx,edx
+	add	ebp,eax
+	por	xmm7,xmm10
+	mov	edi,ebx
+	xor	ebx,ecx
+	movdqa	xmm8,xmm7
+	add	edx,DWORD PTR[56+rsp]
+	and	edi,ecx
+	and	esi,ebx
+	ror	eax,7
+	add	edx,edi
+	mov	edi,ebp
+	rol	ebp,5
+	add	edx,esi
+	xor	ebx,ecx
+	add	edx,ebp
+	mov	esi,eax
+	xor	eax,ebx
+	add	ecx,DWORD PTR[60+rsp]
+	and	esi,ebx
+	and	edi,eax
+	ror	ebp,7
+	add	ecx,esi
+	mov	esi,edx
+	rol	edx,5
+	add	ecx,edi
+	xor	eax,ebx
+	add	ecx,edx
+	mov	edi,ebp
+	pxor	xmm0,xmm4
+DB	102,68,15,58,15,198,8
+	xor	ebp,eax
+	add	ebx,DWORD PTR[rsp]
+	and	edi,eax
+	pxor	xmm0,xmm1
+	and	esi,ebp
+	ror	edx,7
+	movdqa	xmm10,xmm9
+	paddd	xmm9,xmm7
+	add	ebx,edi
+	mov	edi,ecx
+	pxor	xmm0,xmm8
+	rol	ecx,5
+	add	ebx,esi
+	xor	ebp,eax
+	add	ebx,ecx
+	movdqa	xmm8,xmm0
+	movdqa	XMMWORD PTR[48+rsp],xmm9
+	mov	esi,edx
+	xor	edx,ebp
+	add	eax,DWORD PTR[4+rsp]
+	and	esi,ebp
+	pslld	xmm0,2
+	and	edi,edx
+	ror	ecx,7
+	psrld	xmm8,30
+	add	eax,esi
+	mov	esi,ebx
+	rol	ebx,5
+	add	eax,edi
+	xor	edx,ebp
+	add	eax,ebx
+	por	xmm0,xmm8
+	mov	edi,ecx
+	xor	ecx,edx
+	movdqa	xmm9,xmm0
+	add	ebp,DWORD PTR[8+rsp]
+	and	edi,edx
+	and	esi,ecx
+	ror	ebx,7
+	add	ebp,edi
+	mov	edi,eax
+	rol	eax,5
+	add	ebp,esi
+	xor	ecx,edx
+	add	ebp,eax
+	mov	esi,ebx
+	xor	ebx,ecx
+	add	edx,DWORD PTR[12+rsp]
+	and	esi,ecx
+	and	edi,ebx
+	ror	eax,7
+	add	edx,esi
+	mov	esi,ebp
+	rol	ebp,5
+	add	edx,edi
+	xor	ebx,ecx
+	add	edx,ebp
+	mov	edi,eax
+	pxor	xmm1,xmm5
+DB	102,68,15,58,15,207,8
+	xor	eax,ebx
+	add	ecx,DWORD PTR[16+rsp]
+	and	edi,ebx
+	pxor	xmm1,xmm2
+	and	esi,eax
+	ror	ebp,7
+	movdqa	xmm8,xmm10
+	paddd	xmm10,xmm0
+	add	ecx,edi
+	mov	edi,edx
+	pxor	xmm1,xmm9
+	rol	edx,5
+	add	ecx,esi
+	xor	eax,ebx
+	add	ecx,edx
+	movdqa	xmm9,xmm1
+	movdqa	XMMWORD PTR[rsp],xmm10
+	mov	esi,ebp
+	xor	ebp,eax
+	add	ebx,DWORD PTR[20+rsp]
+	and	esi,eax
+	pslld	xmm1,2
+	and	edi,ebp
+	ror	edx,7
+	psrld	xmm9,30
+	add	ebx,esi
+	mov	esi,ecx
+	rol	ecx,5
+	add	ebx,edi
+	xor	ebp,eax
+	add	ebx,ecx
+	por	xmm1,xmm9
+	mov	edi,edx
+	xor	edx,ebp
+	movdqa	xmm10,xmm1
+	add	eax,DWORD PTR[24+rsp]
+	and	edi,ebp
+	and	esi,edx
+	ror	ecx,7
+	add	eax,edi
+	mov	edi,ebx
+	rol	ebx,5
+	add	eax,esi
+	xor	edx,ebp
+	add	eax,ebx
+	mov	esi,ecx
+	xor	ecx,edx
+	add	ebp,DWORD PTR[28+rsp]
+	and	esi,edx
+	and	edi,ecx
+	ror	ebx,7
+	add	ebp,esi
+	mov	esi,eax
+	rol	eax,5
+	add	ebp,edi
+	xor	ecx,edx
+	add	ebp,eax
+	mov	edi,ebx
+	pxor	xmm2,xmm6
+DB	102,68,15,58,15,208,8
+	xor	ebx,ecx
+	add	edx,DWORD PTR[32+rsp]
+	and	edi,ecx
+	pxor	xmm2,xmm3
+	and	esi,ebx
+	ror	eax,7
+	movdqa	xmm9,xmm8
+	paddd	xmm8,xmm1
+	add	edx,edi
+	mov	edi,ebp
+	pxor	xmm2,xmm10
+	rol	ebp,5
+	add	edx,esi
+	xor	ebx,ecx
+	add	edx,ebp
+	movdqa	xmm10,xmm2
+	movdqa	XMMWORD PTR[16+rsp],xmm8
+	mov	esi,eax
+	xor	eax,ebx
+	add	ecx,DWORD PTR[36+rsp]
+	and	esi,ebx
+	pslld	xmm2,2
+	and	edi,eax
+	ror	ebp,7
+	psrld	xmm10,30
+	add	ecx,esi
+	mov	esi,edx
+	rol	edx,5
+	add	ecx,edi
+	xor	eax,ebx
+	add	ecx,edx
+	por	xmm2,xmm10
+	mov	edi,ebp
+	xor	ebp,eax
+	movdqa	xmm8,xmm2
+	add	ebx,DWORD PTR[40+rsp]
+	and	edi,eax
+	and	esi,ebp
+	ror	edx,7
+	add	ebx,edi
+	mov	edi,ecx
+	rol	ecx,5
+	add	ebx,esi
+	xor	ebp,eax
+	add	ebx,ecx
+	mov	esi,edx
+	xor	edx,ebp
+	add	eax,DWORD PTR[44+rsp]
+	and	esi,ebp
+	and	edi,edx
+	ror	ecx,7
+	add	eax,esi
+	mov	esi,ebx
+	rol	ebx,5
+	add	eax,edi
+	xor	edx,ebp
+	add	eax,ebx
+	add	ebp,DWORD PTR[48+rsp]
+	pxor	xmm3,xmm7
+DB	102,68,15,58,15,193,8
+	xor	esi,edx
+	mov	edi,eax
+	rol	eax,5
+	pxor	xmm3,xmm4
+	xor	esi,ecx
+	add	ebp,eax
+	movdqa	xmm10,xmm9
+	paddd	xmm9,xmm2
+	ror	ebx,7
+	add	ebp,esi
+	pxor	xmm3,xmm8
+	add	edx,DWORD PTR[52+rsp]
+	xor	edi,ecx
+	mov	esi,ebp
+	rol	ebp,5
+	movdqa	xmm8,xmm3
+	movdqa	XMMWORD PTR[32+rsp],xmm9
+	xor	edi,ebx
+	add	edx,ebp
+	ror	eax,7
+	add	edx,edi
+	pslld	xmm3,2
+	add	ecx,DWORD PTR[56+rsp]
+	xor	esi,ebx
+	psrld	xmm8,30
+	mov	edi,edx
+	rol	edx,5
+	xor	esi,eax
+	add	ecx,edx
+	ror	ebp,7
+	add	ecx,esi
+	por	xmm3,xmm8
+	add	ebx,DWORD PTR[60+rsp]
+	xor	edi,eax
+	mov	esi,ecx
+	rol	ecx,5
+	xor	edi,ebp
+	add	ebx,ecx
+	ror	edx,7
+	add	ebx,edi
+	add	eax,DWORD PTR[rsp]
+	paddd	xmm10,xmm3
+	xor	esi,ebp
+	mov	edi,ebx
+	rol	ebx,5
+	xor	esi,edx
+	movdqa	XMMWORD PTR[48+rsp],xmm10
+	add	eax,ebx
+	ror	ecx,7
+	add	eax,esi
+	add	ebp,DWORD PTR[4+rsp]
+	xor	edi,edx
+	mov	esi,eax
+	rol	eax,5
+	xor	edi,ecx
+	add	ebp,eax
+	ror	ebx,7
+	add	ebp,edi
+	add	edx,DWORD PTR[8+rsp]
+	xor	esi,ecx
+	mov	edi,ebp
+	rol	ebp,5
+	xor	esi,ebx
+	add	edx,ebp
+	ror	eax,7
+	add	edx,esi
+	add	ecx,DWORD PTR[12+rsp]
+	xor	edi,ebx
+	mov	esi,edx
+	rol	edx,5
+	xor	edi,eax
+	add	ecx,edx
+	ror	ebp,7
+	add	ecx,edi
+	cmp	r9,r10
+	je	$L$done_ssse3
+	movdqa	xmm6,XMMWORD PTR[64+r11]
+	movdqa	xmm9,XMMWORD PTR[r11]
+	movdqu	xmm0,XMMWORD PTR[r9]
+	movdqu	xmm1,XMMWORD PTR[16+r9]
+	movdqu	xmm2,XMMWORD PTR[32+r9]
+	movdqu	xmm3,XMMWORD PTR[48+r9]
+DB	102,15,56,0,198
+	add	r9,64
+	add	ebx,DWORD PTR[16+rsp]
+	xor	esi,eax
+DB	102,15,56,0,206
+	mov	edi,ecx
+	rol	ecx,5
+	paddd	xmm0,xmm9
+	xor	esi,ebp
+	add	ebx,ecx
+	ror	edx,7
+	add	ebx,esi
+	movdqa	XMMWORD PTR[rsp],xmm0
+	add	eax,DWORD PTR[20+rsp]
+	xor	edi,ebp
+	psubd	xmm0,xmm9
+	mov	esi,ebx
+	rol	ebx,5
+	xor	edi,edx
+	add	eax,ebx
+	ror	ecx,7
+	add	eax,edi
+	add	ebp,DWORD PTR[24+rsp]
+	xor	esi,edx
+	mov	edi,eax
+	rol	eax,5
+	xor	esi,ecx
+	add	ebp,eax
+	ror	ebx,7
+	add	ebp,esi
+	add	edx,DWORD PTR[28+rsp]
+	xor	edi,ecx
+	mov	esi,ebp
+	rol	ebp,5
+	xor	edi,ebx
+	add	edx,ebp
+	ror	eax,7
+	add	edx,edi
+	add	ecx,DWORD PTR[32+rsp]
+	xor	esi,ebx
+DB	102,15,56,0,214
+	mov	edi,edx
+	rol	edx,5
+	paddd	xmm1,xmm9
+	xor	esi,eax
+	add	ecx,edx
+	ror	ebp,7
+	add	ecx,esi
+	movdqa	XMMWORD PTR[16+rsp],xmm1
+	add	ebx,DWORD PTR[36+rsp]
+	xor	edi,eax
+	psubd	xmm1,xmm9
+	mov	esi,ecx
+	rol	ecx,5
+	xor	edi,ebp
+	add	ebx,ecx
+	ror	edx,7
+	add	ebx,edi
+	add	eax,DWORD PTR[40+rsp]
+	xor	esi,ebp
+	mov	edi,ebx
+	rol	ebx,5
+	xor	esi,edx
+	add	eax,ebx
+	ror	ecx,7
+	add	eax,esi
+	add	ebp,DWORD PTR[44+rsp]
+	xor	edi,edx
+	mov	esi,eax
+	rol	eax,5
+	xor	edi,ecx
+	add	ebp,eax
+	ror	ebx,7
+	add	ebp,edi
+	add	edx,DWORD PTR[48+rsp]
+	xor	esi,ecx
+DB	102,15,56,0,222
+	mov	edi,ebp
+	rol	ebp,5
+	paddd	xmm2,xmm9
+	xor	esi,ebx
+	add	edx,ebp
+	ror	eax,7
+	add	edx,esi
+	movdqa	XMMWORD PTR[32+rsp],xmm2
+	add	ecx,DWORD PTR[52+rsp]
+	xor	edi,ebx
+	psubd	xmm2,xmm9
+	mov	esi,edx
+	rol	edx,5
+	xor	edi,eax
+	add	ecx,edx
+	ror	ebp,7
+	add	ecx,edi
+	add	ebx,DWORD PTR[56+rsp]
+	xor	esi,eax
+	mov	edi,ecx
+	rol	ecx,5
+	xor	esi,ebp
+	add	ebx,ecx
+	ror	edx,7
+	add	ebx,esi
+	add	eax,DWORD PTR[60+rsp]
+	xor	edi,ebp
+	mov	esi,ebx
+	rol	ebx,5
+	xor	edi,edx
+	add	eax,ebx
+	ror	ecx,7
+	add	eax,edi
+	add	eax,DWORD PTR[r8]
+	add	esi,DWORD PTR[4+r8]
+	add	ecx,DWORD PTR[8+r8]
+	add	edx,DWORD PTR[12+r8]
+	mov	DWORD PTR[r8],eax
+	add	ebp,DWORD PTR[16+r8]
+	mov	DWORD PTR[4+r8],esi
+	mov	ebx,esi
+	mov	DWORD PTR[8+r8],ecx
+	mov	DWORD PTR[12+r8],edx
+	mov	DWORD PTR[16+r8],ebp
+	jmp	$L$oop_ssse3
+
+ALIGN	16
+$L$done_ssse3::
+	add	ebx,DWORD PTR[16+rsp]
+	xor	esi,eax
+	mov	edi,ecx
+	rol	ecx,5
+	xor	esi,ebp
+	add	ebx,ecx
+	ror	edx,7
+	add	ebx,esi
+	add	eax,DWORD PTR[20+rsp]
+	xor	edi,ebp
+	mov	esi,ebx
+	rol	ebx,5
+	xor	edi,edx
+	add	eax,ebx
+	ror	ecx,7
+	add	eax,edi
+	add	ebp,DWORD PTR[24+rsp]
+	xor	esi,edx
+	mov	edi,eax
+	rol	eax,5
+	xor	esi,ecx
+	add	ebp,eax
+	ror	ebx,7
+	add	ebp,esi
+	add	edx,DWORD PTR[28+rsp]
+	xor	edi,ecx
+	mov	esi,ebp
+	rol	ebp,5
+	xor	edi,ebx
+	add	edx,ebp
+	ror	eax,7
+	add	edx,edi
+	add	ecx,DWORD PTR[32+rsp]
+	xor	esi,ebx
+	mov	edi,edx
+	rol	edx,5
+	xor	esi,eax
+	add	ecx,edx
+	ror	ebp,7
+	add	ecx,esi
+	add	ebx,DWORD PTR[36+rsp]
+	xor	edi,eax
+	mov	esi,ecx
+	rol	ecx,5
+	xor	edi,ebp
+	add	ebx,ecx
+	ror	edx,7
+	add	ebx,edi
+	add	eax,DWORD PTR[40+rsp]
+	xor	esi,ebp
+	mov	edi,ebx
+	rol	ebx,5
+	xor	esi,edx
+	add	eax,ebx
+	ror	ecx,7
+	add	eax,esi
+	add	ebp,DWORD PTR[44+rsp]
+	xor	edi,edx
+	mov	esi,eax
+	rol	eax,5
+	xor	edi,ecx
+	add	ebp,eax
+	ror	ebx,7
+	add	ebp,edi
+	add	edx,DWORD PTR[48+rsp]
+	xor	esi,ecx
+	mov	edi,ebp
+	rol	ebp,5
+	xor	esi,ebx
+	add	edx,ebp
+	ror	eax,7
+	add	edx,esi
+	add	ecx,DWORD PTR[52+rsp]
+	xor	edi,ebx
+	mov	esi,edx
+	rol	edx,5
+	xor	edi,eax
+	add	ecx,edx
+	ror	ebp,7
+	add	ecx,edi
+	add	ebx,DWORD PTR[56+rsp]
+	xor	esi,eax
+	mov	edi,ecx
+	rol	ecx,5
+	xor	esi,ebp
+	add	ebx,ecx
+	ror	edx,7
+	add	ebx,esi
+	add	eax,DWORD PTR[60+rsp]
+	xor	edi,ebp
+	mov	esi,ebx
+	rol	ebx,5
+	xor	edi,edx
+	add	eax,ebx
+	ror	ecx,7
+	add	eax,edi
+	add	eax,DWORD PTR[r8]
+	add	esi,DWORD PTR[4+r8]
+	add	ecx,DWORD PTR[8+r8]
+	mov	DWORD PTR[r8],eax
+	add	edx,DWORD PTR[12+r8]
+	mov	DWORD PTR[4+r8],esi
+	add	ebp,DWORD PTR[16+r8]
+	mov	DWORD PTR[8+r8],ecx
+	mov	DWORD PTR[12+r8],edx
+	mov	DWORD PTR[16+r8],ebp
+	movaps	xmm6,XMMWORD PTR[((64+0))+rsp]
+	movaps	xmm7,XMMWORD PTR[((64+16))+rsp]
+	movaps	xmm8,XMMWORD PTR[((64+32))+rsp]
+	movaps	xmm9,XMMWORD PTR[((64+48))+rsp]
+	movaps	xmm10,XMMWORD PTR[((64+64))+rsp]
+	lea	rsi,QWORD PTR[144+rsp]
+	mov	r12,QWORD PTR[rsi]
+	mov	rbp,QWORD PTR[8+rsi]
+	mov	rbx,QWORD PTR[16+rsi]
+	lea	rsp,QWORD PTR[24+rsi]
+$L$epilogue_ssse3::
+	mov	rdi,QWORD PTR[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD PTR[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_sha1_block_data_order_ssse3::
+sha1_block_data_order_ssse3	ENDP
+ALIGN	64
+K_XX_XX::
+	DD	05a827999h,05a827999h,05a827999h,05a827999h	
+	DD	06ed9eba1h,06ed9eba1h,06ed9eba1h,06ed9eba1h	
+	DD	08f1bbcdch,08f1bbcdch,08f1bbcdch,08f1bbcdch	
+	DD	0ca62c1d6h,0ca62c1d6h,0ca62c1d6h,0ca62c1d6h	
+	DD	000010203h,004050607h,008090a0bh,00c0d0e0fh	
+DB	83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115
+DB	102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44
+DB	32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60
+DB	97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114
+DB	103,62,0
+ALIGN	64
+EXTERN	__imp_RtlVirtualUnwind:NEAR
+
+ALIGN	16
+se_handler	PROC PRIVATE
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD PTR[120+r8]
+	mov	rbx,QWORD PTR[248+r8]
+
+	lea	r10,QWORD PTR[$L$prologue]
+	cmp	rbx,r10
+	jb	$L$common_seh_tail
+
+	mov	rax,QWORD PTR[152+r8]
+
+	lea	r10,QWORD PTR[$L$epilogue]
+	cmp	rbx,r10
+	jae	$L$common_seh_tail
+
+	mov	rax,QWORD PTR[64+rax]
+	lea	rax,QWORD PTR[32+rax]
+
+	mov	rbx,QWORD PTR[((-8))+rax]
+	mov	rbp,QWORD PTR[((-16))+rax]
+	mov	r12,QWORD PTR[((-24))+rax]
+	mov	r13,QWORD PTR[((-32))+rax]
+	mov	QWORD PTR[144+r8],rbx
+	mov	QWORD PTR[160+r8],rbp
+	mov	QWORD PTR[216+r8],r12
+	mov	QWORD PTR[224+r8],r13
+
+	jmp	$L$common_seh_tail
+se_handler	ENDP
+
+
+ALIGN	16
+ssse3_handler	PROC PRIVATE
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD PTR[120+r8]
+	mov	rbx,QWORD PTR[248+r8]
+
+	mov	rsi,QWORD PTR[8+r9]
+	mov	r11,QWORD PTR[56+r9]
+
+	mov	r10d,DWORD PTR[r11]
+	lea	r10,QWORD PTR[r10*1+rsi]
+	cmp	rbx,r10
+	jb	$L$common_seh_tail
+
+	mov	rax,QWORD PTR[152+r8]
+
+	mov	r10d,DWORD PTR[4+r11]
+	lea	r10,QWORD PTR[r10*1+rsi]
+	cmp	rbx,r10
+	jae	$L$common_seh_tail
+
+	lea	rsi,QWORD PTR[64+rax]
+	lea	rdi,QWORD PTR[512+r8]
+	mov	ecx,10
+	DD	0a548f3fch		
+	lea	rax,QWORD PTR[168+rax]
+
+	mov	rbx,QWORD PTR[((-8))+rax]
+	mov	rbp,QWORD PTR[((-16))+rax]
+	mov	r12,QWORD PTR[((-24))+rax]
+	mov	QWORD PTR[144+r8],rbx
+	mov	QWORD PTR[160+r8],rbp
+	mov	QWORD PTR[216+r8],r12
+
+$L$common_seh_tail::
+	mov	rdi,QWORD PTR[8+rax]
+	mov	rsi,QWORD PTR[16+rax]
+	mov	QWORD PTR[152+r8],rax
+	mov	QWORD PTR[168+r8],rsi
+	mov	QWORD PTR[176+r8],rdi
+
+	mov	rdi,QWORD PTR[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0a548f3fch		
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD PTR[8+rsi]
+	mov	r8,QWORD PTR[rsi]
+	mov	r9,QWORD PTR[16+rsi]
+	mov	r10,QWORD PTR[40+rsi]
+	lea	r11,QWORD PTR[56+rsi]
+	lea	r12,QWORD PTR[24+rsi]
+	mov	QWORD PTR[32+rsp],r10
+	mov	QWORD PTR[40+rsp],r11
+	mov	QWORD PTR[48+rsp],r12
+	mov	QWORD PTR[56+rsp],rcx
+	call	QWORD PTR[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	DB	0F3h,0C3h		;repret
+ssse3_handler	ENDP
+
+.text$	ENDS
+.pdata	SEGMENT READONLY ALIGN(4)
+ALIGN	4
+	DD	imagerel $L$SEH_begin_sha1_block_data_order
+	DD	imagerel $L$SEH_end_sha1_block_data_order
+	DD	imagerel $L$SEH_info_sha1_block_data_order
+	DD	imagerel $L$SEH_begin_sha1_block_data_order_ssse3
+	DD	imagerel $L$SEH_end_sha1_block_data_order_ssse3
+	DD	imagerel $L$SEH_info_sha1_block_data_order_ssse3
+.pdata	ENDS
+.xdata	SEGMENT READONLY ALIGN(8)
+ALIGN	8
+$L$SEH_info_sha1_block_data_order::
+DB	9,0,0,0
+	DD	imagerel se_handler
+$L$SEH_info_sha1_block_data_order_ssse3::
+DB	9,0,0,0
+	DD	imagerel ssse3_handler
+	DD	imagerel $L$prologue_ssse3,imagerel $L$epilogue_ssse3	
+
+.xdata	ENDS
+END
+
diff --git a/ext/libressl/crypto/sha/sha1-mingw64-x86_64.S b/ext/libressl/crypto/sha/sha1-mingw64-x86_64.S
new file mode 100644
index 0000000..3ce9fc9
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha1-mingw64-x86_64.S
@@ -0,0 +1,2664 @@
+#include "x86_arch.h"
+.text	
+
+
+
+.globl	sha1_block_data_order
+.def	sha1_block_data_order;	.scl 2;	.type 32;	.endef
+.p2align	4
+sha1_block_data_order:
+	movq	%rdi,8(%rsp)
+	movq	%rsi,16(%rsp)
+	movq	%rsp,%rax
+.LSEH_begin_sha1_block_data_order:
+	movq	%rcx,%rdi
+	movq	%rdx,%rsi
+	movq	%r8,%rdx
+
+	movl	OPENSSL_ia32cap_P+0(%rip),%r9d
+	movl	OPENSSL_ia32cap_P+4(%rip),%r8d
+	testl	$IA32CAP_MASK1_SSSE3,%r8d
+	jz	.Lialu
+	jmp	_ssse3_shortcut
+
+.p2align	4
+.Lialu:
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	movq	%rsp,%r11
+	movq	%rdi,%r8
+	subq	$72,%rsp
+	movq	%rsi,%r9
+	andq	$-64,%rsp
+	movq	%rdx,%r10
+	movq	%r11,64(%rsp)
+.Lprologue:
+
+	movl	0(%r8),%esi
+	movl	4(%r8),%edi
+	movl	8(%r8),%r11d
+	movl	12(%r8),%r12d
+	movl	16(%r8),%r13d
+	jmp	.Lloop
+
+.p2align	4
+.Lloop:
+	movl	0(%r9),%edx
+	bswapl	%edx
+	movl	%edx,0(%rsp)
+	movl	%r11d,%eax
+	movl	4(%r9),%ebp
+	movl	%esi,%ecx
+	xorl	%r12d,%eax
+	bswapl	%ebp
+	roll	$5,%ecx
+	leal	1518500249(%rdx,%r13,1),%r13d
+	andl	%edi,%eax
+	movl	%ebp,4(%rsp)
+	addl	%ecx,%r13d
+	xorl	%r12d,%eax
+	roll	$30,%edi
+	addl	%eax,%r13d
+	movl	%edi,%eax
+	movl	8(%r9),%edx
+	movl	%r13d,%ecx
+	xorl	%r11d,%eax
+	bswapl	%edx
+	roll	$5,%ecx
+	leal	1518500249(%rbp,%r12,1),%r12d
+	andl	%esi,%eax
+	movl	%edx,8(%rsp)
+	addl	%ecx,%r12d
+	xorl	%r11d,%eax
+	roll	$30,%esi
+	addl	%eax,%r12d
+	movl	%esi,%eax
+	movl	12(%r9),%ebp
+	movl	%r12d,%ecx
+	xorl	%edi,%eax
+	bswapl	%ebp
+	roll	$5,%ecx
+	leal	1518500249(%rdx,%r11,1),%r11d
+	andl	%r13d,%eax
+	movl	%ebp,12(%rsp)
+	addl	%ecx,%r11d
+	xorl	%edi,%eax
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	movl	%r13d,%eax
+	movl	16(%r9),%edx
+	movl	%r11d,%ecx
+	xorl	%esi,%eax
+	bswapl	%edx
+	roll	$5,%ecx
+	leal	1518500249(%rbp,%rdi,1),%edi
+	andl	%r12d,%eax
+	movl	%edx,16(%rsp)
+	addl	%ecx,%edi
+	xorl	%esi,%eax
+	roll	$30,%r12d
+	addl	%eax,%edi
+	movl	%r12d,%eax
+	movl	20(%r9),%ebp
+	movl	%edi,%ecx
+	xorl	%r13d,%eax
+	bswapl	%ebp
+	roll	$5,%ecx
+	leal	1518500249(%rdx,%rsi,1),%esi
+	andl	%r11d,%eax
+	movl	%ebp,20(%rsp)
+	addl	%ecx,%esi
+	xorl	%r13d,%eax
+	roll	$30,%r11d
+	addl	%eax,%esi
+	movl	%r11d,%eax
+	movl	24(%r9),%edx
+	movl	%esi,%ecx
+	xorl	%r12d,%eax
+	bswapl	%edx
+	roll	$5,%ecx
+	leal	1518500249(%rbp,%r13,1),%r13d
+	andl	%edi,%eax
+	movl	%edx,24(%rsp)
+	addl	%ecx,%r13d
+	xorl	%r12d,%eax
+	roll	$30,%edi
+	addl	%eax,%r13d
+	movl	%edi,%eax
+	movl	28(%r9),%ebp
+	movl	%r13d,%ecx
+	xorl	%r11d,%eax
+	bswapl	%ebp
+	roll	$5,%ecx
+	leal	1518500249(%rdx,%r12,1),%r12d
+	andl	%esi,%eax
+	movl	%ebp,28(%rsp)
+	addl	%ecx,%r12d
+	xorl	%r11d,%eax
+	roll	$30,%esi
+	addl	%eax,%r12d
+	movl	%esi,%eax
+	movl	32(%r9),%edx
+	movl	%r12d,%ecx
+	xorl	%edi,%eax
+	bswapl	%edx
+	roll	$5,%ecx
+	leal	1518500249(%rbp,%r11,1),%r11d
+	andl	%r13d,%eax
+	movl	%edx,32(%rsp)
+	addl	%ecx,%r11d
+	xorl	%edi,%eax
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	movl	%r13d,%eax
+	movl	36(%r9),%ebp
+	movl	%r11d,%ecx
+	xorl	%esi,%eax
+	bswapl	%ebp
+	roll	$5,%ecx
+	leal	1518500249(%rdx,%rdi,1),%edi
+	andl	%r12d,%eax
+	movl	%ebp,36(%rsp)
+	addl	%ecx,%edi
+	xorl	%esi,%eax
+	roll	$30,%r12d
+	addl	%eax,%edi
+	movl	%r12d,%eax
+	movl	40(%r9),%edx
+	movl	%edi,%ecx
+	xorl	%r13d,%eax
+	bswapl	%edx
+	roll	$5,%ecx
+	leal	1518500249(%rbp,%rsi,1),%esi
+	andl	%r11d,%eax
+	movl	%edx,40(%rsp)
+	addl	%ecx,%esi
+	xorl	%r13d,%eax
+	roll	$30,%r11d
+	addl	%eax,%esi
+	movl	%r11d,%eax
+	movl	44(%r9),%ebp
+	movl	%esi,%ecx
+	xorl	%r12d,%eax
+	bswapl	%ebp
+	roll	$5,%ecx
+	leal	1518500249(%rdx,%r13,1),%r13d
+	andl	%edi,%eax
+	movl	%ebp,44(%rsp)
+	addl	%ecx,%r13d
+	xorl	%r12d,%eax
+	roll	$30,%edi
+	addl	%eax,%r13d
+	movl	%edi,%eax
+	movl	48(%r9),%edx
+	movl	%r13d,%ecx
+	xorl	%r11d,%eax
+	bswapl	%edx
+	roll	$5,%ecx
+	leal	1518500249(%rbp,%r12,1),%r12d
+	andl	%esi,%eax
+	movl	%edx,48(%rsp)
+	addl	%ecx,%r12d
+	xorl	%r11d,%eax
+	roll	$30,%esi
+	addl	%eax,%r12d
+	movl	%esi,%eax
+	movl	52(%r9),%ebp
+	movl	%r12d,%ecx
+	xorl	%edi,%eax
+	bswapl	%ebp
+	roll	$5,%ecx
+	leal	1518500249(%rdx,%r11,1),%r11d
+	andl	%r13d,%eax
+	movl	%ebp,52(%rsp)
+	addl	%ecx,%r11d
+	xorl	%edi,%eax
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	movl	%r13d,%eax
+	movl	56(%r9),%edx
+	movl	%r11d,%ecx
+	xorl	%esi,%eax
+	bswapl	%edx
+	roll	$5,%ecx
+	leal	1518500249(%rbp,%rdi,1),%edi
+	andl	%r12d,%eax
+	movl	%edx,56(%rsp)
+	addl	%ecx,%edi
+	xorl	%esi,%eax
+	roll	$30,%r12d
+	addl	%eax,%edi
+	movl	%r12d,%eax
+	movl	60(%r9),%ebp
+	movl	%edi,%ecx
+	xorl	%r13d,%eax
+	bswapl	%ebp
+	roll	$5,%ecx
+	leal	1518500249(%rdx,%rsi,1),%esi
+	andl	%r11d,%eax
+	movl	%ebp,60(%rsp)
+	addl	%ecx,%esi
+	xorl	%r13d,%eax
+	roll	$30,%r11d
+	addl	%eax,%esi
+	movl	0(%rsp),%edx
+	movl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	8(%rsp),%edx
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	xorl	32(%rsp),%edx
+	andl	%edi,%eax
+	leal	1518500249(%rbp,%r13,1),%r13d
+	xorl	52(%rsp),%edx
+	xorl	%r12d,%eax
+	roll	$1,%edx
+	addl	%ecx,%r13d
+	roll	$30,%edi
+	movl	%edx,0(%rsp)
+	addl	%eax,%r13d
+	movl	4(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	12(%rsp),%ebp
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	xorl	36(%rsp),%ebp
+	andl	%esi,%eax
+	leal	1518500249(%rdx,%r12,1),%r12d
+	xorl	56(%rsp),%ebp
+	xorl	%r11d,%eax
+	roll	$1,%ebp
+	addl	%ecx,%r12d
+	roll	$30,%esi
+	movl	%ebp,4(%rsp)
+	addl	%eax,%r12d
+	movl	8(%rsp),%edx
+	movl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	16(%rsp),%edx
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	xorl	40(%rsp),%edx
+	andl	%r13d,%eax
+	leal	1518500249(%rbp,%r11,1),%r11d
+	xorl	60(%rsp),%edx
+	xorl	%edi,%eax
+	roll	$1,%edx
+	addl	%ecx,%r11d
+	roll	$30,%r13d
+	movl	%edx,8(%rsp)
+	addl	%eax,%r11d
+	movl	12(%rsp),%ebp
+	movl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	20(%rsp),%ebp
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	xorl	44(%rsp),%ebp
+	andl	%r12d,%eax
+	leal	1518500249(%rdx,%rdi,1),%edi
+	xorl	0(%rsp),%ebp
+	xorl	%esi,%eax
+	roll	$1,%ebp
+	addl	%ecx,%edi
+	roll	$30,%r12d
+	movl	%ebp,12(%rsp)
+	addl	%eax,%edi
+	movl	16(%rsp),%edx
+	movl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	24(%rsp),%edx
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	xorl	48(%rsp),%edx
+	andl	%r11d,%eax
+	leal	1518500249(%rbp,%rsi,1),%esi
+	xorl	4(%rsp),%edx
+	xorl	%r13d,%eax
+	roll	$1,%edx
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	movl	%edx,16(%rsp)
+	addl	%eax,%esi
+	movl	20(%rsp),%ebp
+	movl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	28(%rsp),%ebp
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%r13,1),%r13d
+	xorl	52(%rsp),%ebp
+	xorl	%r12d,%eax
+	addl	%ecx,%r13d
+	xorl	8(%rsp),%ebp
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%ebp
+	movl	%ebp,20(%rsp)
+	movl	24(%rsp),%edx
+	movl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	32(%rsp),%edx
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%r12,1),%r12d
+	xorl	56(%rsp),%edx
+	xorl	%r11d,%eax
+	addl	%ecx,%r12d
+	xorl	12(%rsp),%edx
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%edx
+	movl	%edx,24(%rsp)
+	movl	28(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	36(%rsp),%ebp
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%r11,1),%r11d
+	xorl	60(%rsp),%ebp
+	xorl	%edi,%eax
+	addl	%ecx,%r11d
+	xorl	16(%rsp),%ebp
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%ebp
+	movl	%ebp,28(%rsp)
+	movl	32(%rsp),%edx
+	movl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	40(%rsp),%edx
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%rdi,1),%edi
+	xorl	0(%rsp),%edx
+	xorl	%esi,%eax
+	addl	%ecx,%edi
+	xorl	20(%rsp),%edx
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%edx
+	movl	%edx,32(%rsp)
+	movl	36(%rsp),%ebp
+	movl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	44(%rsp),%ebp
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%rsi,1),%esi
+	xorl	4(%rsp),%ebp
+	xorl	%r13d,%eax
+	addl	%ecx,%esi
+	xorl	24(%rsp),%ebp
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%ebp
+	movl	%ebp,36(%rsp)
+	movl	40(%rsp),%edx
+	movl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	48(%rsp),%edx
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%r13,1),%r13d
+	xorl	8(%rsp),%edx
+	xorl	%r12d,%eax
+	addl	%ecx,%r13d
+	xorl	28(%rsp),%edx
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%edx
+	movl	%edx,40(%rsp)
+	movl	44(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	52(%rsp),%ebp
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%r12,1),%r12d
+	xorl	12(%rsp),%ebp
+	xorl	%r11d,%eax
+	addl	%ecx,%r12d
+	xorl	32(%rsp),%ebp
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%ebp
+	movl	%ebp,44(%rsp)
+	movl	48(%rsp),%edx
+	movl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	56(%rsp),%edx
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%r11,1),%r11d
+	xorl	16(%rsp),%edx
+	xorl	%edi,%eax
+	addl	%ecx,%r11d
+	xorl	36(%rsp),%edx
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%edx
+	movl	%edx,48(%rsp)
+	movl	52(%rsp),%ebp
+	movl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	60(%rsp),%ebp
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%rdi,1),%edi
+	xorl	20(%rsp),%ebp
+	xorl	%esi,%eax
+	addl	%ecx,%edi
+	xorl	40(%rsp),%ebp
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%ebp
+	movl	%ebp,52(%rsp)
+	movl	56(%rsp),%edx
+	movl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	0(%rsp),%edx
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%rsi,1),%esi
+	xorl	24(%rsp),%edx
+	xorl	%r13d,%eax
+	addl	%ecx,%esi
+	xorl	44(%rsp),%edx
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%edx
+	movl	%edx,56(%rsp)
+	movl	60(%rsp),%ebp
+	movl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	4(%rsp),%ebp
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%r13,1),%r13d
+	xorl	28(%rsp),%ebp
+	xorl	%r12d,%eax
+	addl	%ecx,%r13d
+	xorl	48(%rsp),%ebp
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%ebp
+	movl	%ebp,60(%rsp)
+	movl	0(%rsp),%edx
+	movl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	8(%rsp),%edx
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%r12,1),%r12d
+	xorl	32(%rsp),%edx
+	xorl	%r11d,%eax
+	addl	%ecx,%r12d
+	xorl	52(%rsp),%edx
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%edx
+	movl	%edx,0(%rsp)
+	movl	4(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	12(%rsp),%ebp
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%r11,1),%r11d
+	xorl	36(%rsp),%ebp
+	xorl	%edi,%eax
+	addl	%ecx,%r11d
+	xorl	56(%rsp),%ebp
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%ebp
+	movl	%ebp,4(%rsp)
+	movl	8(%rsp),%edx
+	movl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	16(%rsp),%edx
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%rdi,1),%edi
+	xorl	40(%rsp),%edx
+	xorl	%esi,%eax
+	addl	%ecx,%edi
+	xorl	60(%rsp),%edx
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%edx
+	movl	%edx,8(%rsp)
+	movl	12(%rsp),%ebp
+	movl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	20(%rsp),%ebp
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%rsi,1),%esi
+	xorl	44(%rsp),%ebp
+	xorl	%r13d,%eax
+	addl	%ecx,%esi
+	xorl	0(%rsp),%ebp
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%ebp
+	movl	%ebp,12(%rsp)
+	movl	16(%rsp),%edx
+	movl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	24(%rsp),%edx
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%r13,1),%r13d
+	xorl	48(%rsp),%edx
+	xorl	%r12d,%eax
+	addl	%ecx,%r13d
+	xorl	4(%rsp),%edx
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%edx
+	movl	%edx,16(%rsp)
+	movl	20(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	28(%rsp),%ebp
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%r12,1),%r12d
+	xorl	52(%rsp),%ebp
+	xorl	%r11d,%eax
+	addl	%ecx,%r12d
+	xorl	8(%rsp),%ebp
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%ebp
+	movl	%ebp,20(%rsp)
+	movl	24(%rsp),%edx
+	movl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	32(%rsp),%edx
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%r11,1),%r11d
+	xorl	56(%rsp),%edx
+	xorl	%edi,%eax
+	addl	%ecx,%r11d
+	xorl	12(%rsp),%edx
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%edx
+	movl	%edx,24(%rsp)
+	movl	28(%rsp),%ebp
+	movl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	36(%rsp),%ebp
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rdx,%rdi,1),%edi
+	xorl	60(%rsp),%ebp
+	xorl	%esi,%eax
+	addl	%ecx,%edi
+	xorl	16(%rsp),%ebp
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%ebp
+	movl	%ebp,28(%rsp)
+	movl	32(%rsp),%edx
+	movl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	40(%rsp),%edx
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	leal	1859775393(%rbp,%rsi,1),%esi
+	xorl	0(%rsp),%edx
+	xorl	%r13d,%eax
+	addl	%ecx,%esi
+	xorl	20(%rsp),%edx
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%edx
+	movl	%edx,32(%rsp)
+	movl	36(%rsp),%ebp
+	movl	%r11d,%eax
+	movl	%r11d,%ebx
+	xorl	44(%rsp),%ebp
+	andl	%r12d,%eax
+	movl	%esi,%ecx
+	xorl	4(%rsp),%ebp
+	xorl	%r12d,%ebx
+	leal	-1894007588(%rdx,%r13,1),%r13d
+	roll	$5,%ecx
+	xorl	24(%rsp),%ebp
+	addl	%eax,%r13d
+	andl	%edi,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%r13d
+	roll	$30,%edi
+	movl	%ebp,36(%rsp)
+	addl	%ecx,%r13d
+	movl	40(%rsp),%edx
+	movl	%edi,%eax
+	movl	%edi,%ebx
+	xorl	48(%rsp),%edx
+	andl	%r11d,%eax
+	movl	%r13d,%ecx
+	xorl	8(%rsp),%edx
+	xorl	%r11d,%ebx
+	leal	-1894007588(%rbp,%r12,1),%r12d
+	roll	$5,%ecx
+	xorl	28(%rsp),%edx
+	addl	%eax,%r12d
+	andl	%esi,%ebx
+	roll	$1,%edx
+	addl	%ebx,%r12d
+	roll	$30,%esi
+	movl	%edx,40(%rsp)
+	addl	%ecx,%r12d
+	movl	44(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%esi,%ebx
+	xorl	52(%rsp),%ebp
+	andl	%edi,%eax
+	movl	%r12d,%ecx
+	xorl	12(%rsp),%ebp
+	xorl	%edi,%ebx
+	leal	-1894007588(%rdx,%r11,1),%r11d
+	roll	$5,%ecx
+	xorl	32(%rsp),%ebp
+	addl	%eax,%r11d
+	andl	%r13d,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%r11d
+	roll	$30,%r13d
+	movl	%ebp,44(%rsp)
+	addl	%ecx,%r11d
+	movl	48(%rsp),%edx
+	movl	%r13d,%eax
+	movl	%r13d,%ebx
+	xorl	56(%rsp),%edx
+	andl	%esi,%eax
+	movl	%r11d,%ecx
+	xorl	16(%rsp),%edx
+	xorl	%esi,%ebx
+	leal	-1894007588(%rbp,%rdi,1),%edi
+	roll	$5,%ecx
+	xorl	36(%rsp),%edx
+	addl	%eax,%edi
+	andl	%r12d,%ebx
+	roll	$1,%edx
+	addl	%ebx,%edi
+	roll	$30,%r12d
+	movl	%edx,48(%rsp)
+	addl	%ecx,%edi
+	movl	52(%rsp),%ebp
+	movl	%r12d,%eax
+	movl	%r12d,%ebx
+	xorl	60(%rsp),%ebp
+	andl	%r13d,%eax
+	movl	%edi,%ecx
+	xorl	20(%rsp),%ebp
+	xorl	%r13d,%ebx
+	leal	-1894007588(%rdx,%rsi,1),%esi
+	roll	$5,%ecx
+	xorl	40(%rsp),%ebp
+	addl	%eax,%esi
+	andl	%r11d,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%esi
+	roll	$30,%r11d
+	movl	%ebp,52(%rsp)
+	addl	%ecx,%esi
+	movl	56(%rsp),%edx
+	movl	%r11d,%eax
+	movl	%r11d,%ebx
+	xorl	0(%rsp),%edx
+	andl	%r12d,%eax
+	movl	%esi,%ecx
+	xorl	24(%rsp),%edx
+	xorl	%r12d,%ebx
+	leal	-1894007588(%rbp,%r13,1),%r13d
+	roll	$5,%ecx
+	xorl	44(%rsp),%edx
+	addl	%eax,%r13d
+	andl	%edi,%ebx
+	roll	$1,%edx
+	addl	%ebx,%r13d
+	roll	$30,%edi
+	movl	%edx,56(%rsp)
+	addl	%ecx,%r13d
+	movl	60(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%edi,%ebx
+	xorl	4(%rsp),%ebp
+	andl	%r11d,%eax
+	movl	%r13d,%ecx
+	xorl	28(%rsp),%ebp
+	xorl	%r11d,%ebx
+	leal	-1894007588(%rdx,%r12,1),%r12d
+	roll	$5,%ecx
+	xorl	48(%rsp),%ebp
+	addl	%eax,%r12d
+	andl	%esi,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%r12d
+	roll	$30,%esi
+	movl	%ebp,60(%rsp)
+	addl	%ecx,%r12d
+	movl	0(%rsp),%edx
+	movl	%esi,%eax
+	movl	%esi,%ebx
+	xorl	8(%rsp),%edx
+	andl	%edi,%eax
+	movl	%r12d,%ecx
+	xorl	32(%rsp),%edx
+	xorl	%edi,%ebx
+	leal	-1894007588(%rbp,%r11,1),%r11d
+	roll	$5,%ecx
+	xorl	52(%rsp),%edx
+	addl	%eax,%r11d
+	andl	%r13d,%ebx
+	roll	$1,%edx
+	addl	%ebx,%r11d
+	roll	$30,%r13d
+	movl	%edx,0(%rsp)
+	addl	%ecx,%r11d
+	movl	4(%rsp),%ebp
+	movl	%r13d,%eax
+	movl	%r13d,%ebx
+	xorl	12(%rsp),%ebp
+	andl	%esi,%eax
+	movl	%r11d,%ecx
+	xorl	36(%rsp),%ebp
+	xorl	%esi,%ebx
+	leal	-1894007588(%rdx,%rdi,1),%edi
+	roll	$5,%ecx
+	xorl	56(%rsp),%ebp
+	addl	%eax,%edi
+	andl	%r12d,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%edi
+	roll	$30,%r12d
+	movl	%ebp,4(%rsp)
+	addl	%ecx,%edi
+	movl	8(%rsp),%edx
+	movl	%r12d,%eax
+	movl	%r12d,%ebx
+	xorl	16(%rsp),%edx
+	andl	%r13d,%eax
+	movl	%edi,%ecx
+	xorl	40(%rsp),%edx
+	xorl	%r13d,%ebx
+	leal	-1894007588(%rbp,%rsi,1),%esi
+	roll	$5,%ecx
+	xorl	60(%rsp),%edx
+	addl	%eax,%esi
+	andl	%r11d,%ebx
+	roll	$1,%edx
+	addl	%ebx,%esi
+	roll	$30,%r11d
+	movl	%edx,8(%rsp)
+	addl	%ecx,%esi
+	movl	12(%rsp),%ebp
+	movl	%r11d,%eax
+	movl	%r11d,%ebx
+	xorl	20(%rsp),%ebp
+	andl	%r12d,%eax
+	movl	%esi,%ecx
+	xorl	44(%rsp),%ebp
+	xorl	%r12d,%ebx
+	leal	-1894007588(%rdx,%r13,1),%r13d
+	roll	$5,%ecx
+	xorl	0(%rsp),%ebp
+	addl	%eax,%r13d
+	andl	%edi,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%r13d
+	roll	$30,%edi
+	movl	%ebp,12(%rsp)
+	addl	%ecx,%r13d
+	movl	16(%rsp),%edx
+	movl	%edi,%eax
+	movl	%edi,%ebx
+	xorl	24(%rsp),%edx
+	andl	%r11d,%eax
+	movl	%r13d,%ecx
+	xorl	48(%rsp),%edx
+	xorl	%r11d,%ebx
+	leal	-1894007588(%rbp,%r12,1),%r12d
+	roll	$5,%ecx
+	xorl	4(%rsp),%edx
+	addl	%eax,%r12d
+	andl	%esi,%ebx
+	roll	$1,%edx
+	addl	%ebx,%r12d
+	roll	$30,%esi
+	movl	%edx,16(%rsp)
+	addl	%ecx,%r12d
+	movl	20(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%esi,%ebx
+	xorl	28(%rsp),%ebp
+	andl	%edi,%eax
+	movl	%r12d,%ecx
+	xorl	52(%rsp),%ebp
+	xorl	%edi,%ebx
+	leal	-1894007588(%rdx,%r11,1),%r11d
+	roll	$5,%ecx
+	xorl	8(%rsp),%ebp
+	addl	%eax,%r11d
+	andl	%r13d,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%r11d
+	roll	$30,%r13d
+	movl	%ebp,20(%rsp)
+	addl	%ecx,%r11d
+	movl	24(%rsp),%edx
+	movl	%r13d,%eax
+	movl	%r13d,%ebx
+	xorl	32(%rsp),%edx
+	andl	%esi,%eax
+	movl	%r11d,%ecx
+	xorl	56(%rsp),%edx
+	xorl	%esi,%ebx
+	leal	-1894007588(%rbp,%rdi,1),%edi
+	roll	$5,%ecx
+	xorl	12(%rsp),%edx
+	addl	%eax,%edi
+	andl	%r12d,%ebx
+	roll	$1,%edx
+	addl	%ebx,%edi
+	roll	$30,%r12d
+	movl	%edx,24(%rsp)
+	addl	%ecx,%edi
+	movl	28(%rsp),%ebp
+	movl	%r12d,%eax
+	movl	%r12d,%ebx
+	xorl	36(%rsp),%ebp
+	andl	%r13d,%eax
+	movl	%edi,%ecx
+	xorl	60(%rsp),%ebp
+	xorl	%r13d,%ebx
+	leal	-1894007588(%rdx,%rsi,1),%esi
+	roll	$5,%ecx
+	xorl	16(%rsp),%ebp
+	addl	%eax,%esi
+	andl	%r11d,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%esi
+	roll	$30,%r11d
+	movl	%ebp,28(%rsp)
+	addl	%ecx,%esi
+	movl	32(%rsp),%edx
+	movl	%r11d,%eax
+	movl	%r11d,%ebx
+	xorl	40(%rsp),%edx
+	andl	%r12d,%eax
+	movl	%esi,%ecx
+	xorl	0(%rsp),%edx
+	xorl	%r12d,%ebx
+	leal	-1894007588(%rbp,%r13,1),%r13d
+	roll	$5,%ecx
+	xorl	20(%rsp),%edx
+	addl	%eax,%r13d
+	andl	%edi,%ebx
+	roll	$1,%edx
+	addl	%ebx,%r13d
+	roll	$30,%edi
+	movl	%edx,32(%rsp)
+	addl	%ecx,%r13d
+	movl	36(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%edi,%ebx
+	xorl	44(%rsp),%ebp
+	andl	%r11d,%eax
+	movl	%r13d,%ecx
+	xorl	4(%rsp),%ebp
+	xorl	%r11d,%ebx
+	leal	-1894007588(%rdx,%r12,1),%r12d
+	roll	$5,%ecx
+	xorl	24(%rsp),%ebp
+	addl	%eax,%r12d
+	andl	%esi,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%r12d
+	roll	$30,%esi
+	movl	%ebp,36(%rsp)
+	addl	%ecx,%r12d
+	movl	40(%rsp),%edx
+	movl	%esi,%eax
+	movl	%esi,%ebx
+	xorl	48(%rsp),%edx
+	andl	%edi,%eax
+	movl	%r12d,%ecx
+	xorl	8(%rsp),%edx
+	xorl	%edi,%ebx
+	leal	-1894007588(%rbp,%r11,1),%r11d
+	roll	$5,%ecx
+	xorl	28(%rsp),%edx
+	addl	%eax,%r11d
+	andl	%r13d,%ebx
+	roll	$1,%edx
+	addl	%ebx,%r11d
+	roll	$30,%r13d
+	movl	%edx,40(%rsp)
+	addl	%ecx,%r11d
+	movl	44(%rsp),%ebp
+	movl	%r13d,%eax
+	movl	%r13d,%ebx
+	xorl	52(%rsp),%ebp
+	andl	%esi,%eax
+	movl	%r11d,%ecx
+	xorl	12(%rsp),%ebp
+	xorl	%esi,%ebx
+	leal	-1894007588(%rdx,%rdi,1),%edi
+	roll	$5,%ecx
+	xorl	32(%rsp),%ebp
+	addl	%eax,%edi
+	andl	%r12d,%ebx
+	roll	$1,%ebp
+	addl	%ebx,%edi
+	roll	$30,%r12d
+	movl	%ebp,44(%rsp)
+	addl	%ecx,%edi
+	movl	48(%rsp),%edx
+	movl	%r12d,%eax
+	movl	%r12d,%ebx
+	xorl	56(%rsp),%edx
+	andl	%r13d,%eax
+	movl	%edi,%ecx
+	xorl	16(%rsp),%edx
+	xorl	%r13d,%ebx
+	leal	-1894007588(%rbp,%rsi,1),%esi
+	roll	$5,%ecx
+	xorl	36(%rsp),%edx
+	addl	%eax,%esi
+	andl	%r11d,%ebx
+	roll	$1,%edx
+	addl	%ebx,%esi
+	roll	$30,%r11d
+	movl	%edx,48(%rsp)
+	addl	%ecx,%esi
+	movl	52(%rsp),%ebp
+	movl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	60(%rsp),%ebp
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%r13,1),%r13d
+	xorl	20(%rsp),%ebp
+	xorl	%r12d,%eax
+	addl	%ecx,%r13d
+	xorl	40(%rsp),%ebp
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%ebp
+	movl	%ebp,52(%rsp)
+	movl	56(%rsp),%edx
+	movl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	0(%rsp),%edx
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rbp,%r12,1),%r12d
+	xorl	24(%rsp),%edx
+	xorl	%r11d,%eax
+	addl	%ecx,%r12d
+	xorl	44(%rsp),%edx
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%edx
+	movl	%edx,56(%rsp)
+	movl	60(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	4(%rsp),%ebp
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%r11,1),%r11d
+	xorl	28(%rsp),%ebp
+	xorl	%edi,%eax
+	addl	%ecx,%r11d
+	xorl	48(%rsp),%ebp
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%ebp
+	movl	%ebp,60(%rsp)
+	movl	0(%rsp),%edx
+	movl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	8(%rsp),%edx
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rbp,%rdi,1),%edi
+	xorl	32(%rsp),%edx
+	xorl	%esi,%eax
+	addl	%ecx,%edi
+	xorl	52(%rsp),%edx
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%edx
+	movl	%edx,0(%rsp)
+	movl	4(%rsp),%ebp
+	movl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	12(%rsp),%ebp
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%rsi,1),%esi
+	xorl	36(%rsp),%ebp
+	xorl	%r13d,%eax
+	addl	%ecx,%esi
+	xorl	56(%rsp),%ebp
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%ebp
+	movl	%ebp,4(%rsp)
+	movl	8(%rsp),%edx
+	movl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	16(%rsp),%edx
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rbp,%r13,1),%r13d
+	xorl	40(%rsp),%edx
+	xorl	%r12d,%eax
+	addl	%ecx,%r13d
+	xorl	60(%rsp),%edx
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%edx
+	movl	%edx,8(%rsp)
+	movl	12(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	20(%rsp),%ebp
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%r12,1),%r12d
+	xorl	44(%rsp),%ebp
+	xorl	%r11d,%eax
+	addl	%ecx,%r12d
+	xorl	0(%rsp),%ebp
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%ebp
+	movl	%ebp,12(%rsp)
+	movl	16(%rsp),%edx
+	movl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	24(%rsp),%edx
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rbp,%r11,1),%r11d
+	xorl	48(%rsp),%edx
+	xorl	%edi,%eax
+	addl	%ecx,%r11d
+	xorl	4(%rsp),%edx
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%edx
+	movl	%edx,16(%rsp)
+	movl	20(%rsp),%ebp
+	movl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	28(%rsp),%ebp
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%rdi,1),%edi
+	xorl	52(%rsp),%ebp
+	xorl	%esi,%eax
+	addl	%ecx,%edi
+	xorl	8(%rsp),%ebp
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%ebp
+	movl	%ebp,20(%rsp)
+	movl	24(%rsp),%edx
+	movl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	32(%rsp),%edx
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rbp,%rsi,1),%esi
+	xorl	56(%rsp),%edx
+	xorl	%r13d,%eax
+	addl	%ecx,%esi
+	xorl	12(%rsp),%edx
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%edx
+	movl	%edx,24(%rsp)
+	movl	28(%rsp),%ebp
+	movl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	36(%rsp),%ebp
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%r13,1),%r13d
+	xorl	60(%rsp),%ebp
+	xorl	%r12d,%eax
+	addl	%ecx,%r13d
+	xorl	16(%rsp),%ebp
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%ebp
+	movl	%ebp,28(%rsp)
+	movl	32(%rsp),%edx
+	movl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	40(%rsp),%edx
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rbp,%r12,1),%r12d
+	xorl	0(%rsp),%edx
+	xorl	%r11d,%eax
+	addl	%ecx,%r12d
+	xorl	20(%rsp),%edx
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%edx
+	movl	%edx,32(%rsp)
+	movl	36(%rsp),%ebp
+	movl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	44(%rsp),%ebp
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%r11,1),%r11d
+	xorl	4(%rsp),%ebp
+	xorl	%edi,%eax
+	addl	%ecx,%r11d
+	xorl	24(%rsp),%ebp
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%ebp
+	movl	%ebp,36(%rsp)
+	movl	40(%rsp),%edx
+	movl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	48(%rsp),%edx
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rbp,%rdi,1),%edi
+	xorl	8(%rsp),%edx
+	xorl	%esi,%eax
+	addl	%ecx,%edi
+	xorl	28(%rsp),%edx
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%edx
+	movl	%edx,40(%rsp)
+	movl	44(%rsp),%ebp
+	movl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	52(%rsp),%ebp
+	xorl	%r11d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%rsi,1),%esi
+	xorl	12(%rsp),%ebp
+	xorl	%r13d,%eax
+	addl	%ecx,%esi
+	xorl	32(%rsp),%ebp
+	roll	$30,%r11d
+	addl	%eax,%esi
+	roll	$1,%ebp
+	movl	%ebp,44(%rsp)
+	movl	48(%rsp),%edx
+	movl	%r11d,%eax
+	movl	%esi,%ecx
+	xorl	56(%rsp),%edx
+	xorl	%edi,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rbp,%r13,1),%r13d
+	xorl	16(%rsp),%edx
+	xorl	%r12d,%eax
+	addl	%ecx,%r13d
+	xorl	36(%rsp),%edx
+	roll	$30,%edi
+	addl	%eax,%r13d
+	roll	$1,%edx
+	movl	%edx,48(%rsp)
+	movl	52(%rsp),%ebp
+	movl	%edi,%eax
+	movl	%r13d,%ecx
+	xorl	60(%rsp),%ebp
+	xorl	%esi,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%r12,1),%r12d
+	xorl	20(%rsp),%ebp
+	xorl	%r11d,%eax
+	addl	%ecx,%r12d
+	xorl	40(%rsp),%ebp
+	roll	$30,%esi
+	addl	%eax,%r12d
+	roll	$1,%ebp
+	movl	56(%rsp),%edx
+	movl	%esi,%eax
+	movl	%r12d,%ecx
+	xorl	0(%rsp),%edx
+	xorl	%r13d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rbp,%r11,1),%r11d
+	xorl	24(%rsp),%edx
+	xorl	%edi,%eax
+	addl	%ecx,%r11d
+	xorl	44(%rsp),%edx
+	roll	$30,%r13d
+	addl	%eax,%r11d
+	roll	$1,%edx
+	movl	60(%rsp),%ebp
+	movl	%r13d,%eax
+	movl	%r11d,%ecx
+	xorl	4(%rsp),%ebp
+	xorl	%r12d,%eax
+	roll	$5,%ecx
+	leal	-899497514(%rdx,%rdi,1),%edi
+	xorl	28(%rsp),%ebp
+	xorl	%esi,%eax
+	addl	%ecx,%edi
+	xorl	48(%rsp),%ebp
+	roll	$30,%r12d
+	addl	%eax,%edi
+	roll	$1,%ebp
+	movl	%r12d,%eax
+	movl	%edi,%ecx
+	xorl	%r11d,%eax
+	leal	-899497514(%rbp,%rsi,1),%esi
+	roll	$5,%ecx
+	xorl	%r13d,%eax
+	addl	%ecx,%esi
+	roll	$30,%r11d
+	addl	%eax,%esi
+	addl	0(%r8),%esi
+	addl	4(%r8),%edi
+	addl	8(%r8),%r11d
+	addl	12(%r8),%r12d
+	addl	16(%r8),%r13d
+	movl	%esi,0(%r8)
+	movl	%edi,4(%r8)
+	movl	%r11d,8(%r8)
+	movl	%r12d,12(%r8)
+	movl	%r13d,16(%r8)
+
+	subq	$1,%r10
+	leaq	64(%r9),%r9
+	jnz	.Lloop
+
+	movq	64(%rsp),%rsi
+	movq	(%rsi),%r13
+	movq	8(%rsi),%r12
+	movq	16(%rsi),%rbp
+	movq	24(%rsi),%rbx
+	leaq	32(%rsi),%rsp
+.Lepilogue:
+	movq	8(%rsp),%rdi
+	movq	16(%rsp),%rsi
+	retq
+.LSEH_end_sha1_block_data_order:
+.def	sha1_block_data_order_ssse3;	.scl 3;	.type 32;	.endef
+.p2align	4
+sha1_block_data_order_ssse3:
+	movq	%rdi,8(%rsp)
+	movq	%rsi,16(%rsp)
+	movq	%rsp,%rax
+.LSEH_begin_sha1_block_data_order_ssse3:
+	movq	%rcx,%rdi
+	movq	%rdx,%rsi
+	movq	%r8,%rdx
+
+_ssse3_shortcut:
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	leaq	-144(%rsp),%rsp
+	movaps	%xmm6,64+0(%rsp)
+	movaps	%xmm7,64+16(%rsp)
+	movaps	%xmm8,64+32(%rsp)
+	movaps	%xmm9,64+48(%rsp)
+	movaps	%xmm10,64+64(%rsp)
+.Lprologue_ssse3:
+	movq	%rdi,%r8
+	movq	%rsi,%r9
+	movq	%rdx,%r10
+
+	shlq	$6,%r10
+	addq	%r9,%r10
+	leaq	K_XX_XX(%rip),%r11
+
+	movl	0(%r8),%eax
+	movl	4(%r8),%ebx
+	movl	8(%r8),%ecx
+	movl	12(%r8),%edx
+	movl	%ebx,%esi
+	movl	16(%r8),%ebp
+
+	movdqa	64(%r11),%xmm6
+	movdqa	0(%r11),%xmm9
+	movdqu	0(%r9),%xmm0
+	movdqu	16(%r9),%xmm1
+	movdqu	32(%r9),%xmm2
+	movdqu	48(%r9),%xmm3
+.byte	102,15,56,0,198
+	addq	$64,%r9
+.byte	102,15,56,0,206
+.byte	102,15,56,0,214
+.byte	102,15,56,0,222
+	paddd	%xmm9,%xmm0
+	paddd	%xmm9,%xmm1
+	paddd	%xmm9,%xmm2
+	movdqa	%xmm0,0(%rsp)
+	psubd	%xmm9,%xmm0
+	movdqa	%xmm1,16(%rsp)
+	psubd	%xmm9,%xmm1
+	movdqa	%xmm2,32(%rsp)
+	psubd	%xmm9,%xmm2
+	jmp	.Loop_ssse3
+.p2align	4
+.Loop_ssse3:
+	movdqa	%xmm1,%xmm4
+	addl	0(%rsp),%ebp
+	xorl	%edx,%ecx
+	movdqa	%xmm3,%xmm8
+.byte	102,15,58,15,224,8
+	movl	%eax,%edi
+	roll	$5,%eax
+	paddd	%xmm3,%xmm9
+	andl	%ecx,%esi
+	xorl	%edx,%ecx
+	psrldq	$4,%xmm8
+	xorl	%edx,%esi
+	addl	%eax,%ebp
+	pxor	%xmm0,%xmm4
+	rorl	$2,%ebx
+	addl	%esi,%ebp
+	pxor	%xmm2,%xmm8
+	addl	4(%rsp),%edx
+	xorl	%ecx,%ebx
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	pxor	%xmm8,%xmm4
+	andl	%ebx,%edi
+	xorl	%ecx,%ebx
+	movdqa	%xmm9,48(%rsp)
+	xorl	%ecx,%edi
+	addl	%ebp,%edx
+	movdqa	%xmm4,%xmm10
+	movdqa	%xmm4,%xmm8
+	rorl	$7,%eax
+	addl	%edi,%edx
+	addl	8(%rsp),%ecx
+	xorl	%ebx,%eax
+	pslldq	$12,%xmm10
+	paddd	%xmm4,%xmm4
+	movl	%edx,%edi
+	roll	$5,%edx
+	andl	%eax,%esi
+	xorl	%ebx,%eax
+	psrld	$31,%xmm8
+	xorl	%ebx,%esi
+	addl	%edx,%ecx
+	movdqa	%xmm10,%xmm9
+	rorl	$7,%ebp
+	addl	%esi,%ecx
+	psrld	$30,%xmm10
+	por	%xmm8,%xmm4
+	addl	12(%rsp),%ebx
+	xorl	%eax,%ebp
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	pslld	$2,%xmm9
+	pxor	%xmm10,%xmm4
+	andl	%ebp,%edi
+	xorl	%eax,%ebp
+	movdqa	0(%r11),%xmm10
+	xorl	%eax,%edi
+	addl	%ecx,%ebx
+	pxor	%xmm9,%xmm4
+	rorl	$7,%edx
+	addl	%edi,%ebx
+	movdqa	%xmm2,%xmm5
+	addl	16(%rsp),%eax
+	xorl	%ebp,%edx
+	movdqa	%xmm4,%xmm9
+.byte	102,15,58,15,233,8
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	paddd	%xmm4,%xmm10
+	andl	%edx,%esi
+	xorl	%ebp,%edx
+	psrldq	$4,%xmm9
+	xorl	%ebp,%esi
+	addl	%ebx,%eax
+	pxor	%xmm1,%xmm5
+	rorl	$7,%ecx
+	addl	%esi,%eax
+	pxor	%xmm3,%xmm9
+	addl	20(%rsp),%ebp
+	xorl	%edx,%ecx
+	movl	%eax,%esi
+	roll	$5,%eax
+	pxor	%xmm9,%xmm5
+	andl	%ecx,%edi
+	xorl	%edx,%ecx
+	movdqa	%xmm10,0(%rsp)
+	xorl	%edx,%edi
+	addl	%eax,%ebp
+	movdqa	%xmm5,%xmm8
+	movdqa	%xmm5,%xmm9
+	rorl	$7,%ebx
+	addl	%edi,%ebp
+	addl	24(%rsp),%edx
+	xorl	%ecx,%ebx
+	pslldq	$12,%xmm8
+	paddd	%xmm5,%xmm5
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	andl	%ebx,%esi
+	xorl	%ecx,%ebx
+	psrld	$31,%xmm9
+	xorl	%ecx,%esi
+	addl	%ebp,%edx
+	movdqa	%xmm8,%xmm10
+	rorl	$7,%eax
+	addl	%esi,%edx
+	psrld	$30,%xmm8
+	por	%xmm9,%xmm5
+	addl	28(%rsp),%ecx
+	xorl	%ebx,%eax
+	movl	%edx,%esi
+	roll	$5,%edx
+	pslld	$2,%xmm10
+	pxor	%xmm8,%xmm5
+	andl	%eax,%edi
+	xorl	%ebx,%eax
+	movdqa	16(%r11),%xmm8
+	xorl	%ebx,%edi
+	addl	%edx,%ecx
+	pxor	%xmm10,%xmm5
+	rorl	$7,%ebp
+	addl	%edi,%ecx
+	movdqa	%xmm3,%xmm6
+	addl	32(%rsp),%ebx
+	xorl	%eax,%ebp
+	movdqa	%xmm5,%xmm10
+.byte	102,15,58,15,242,8
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	paddd	%xmm5,%xmm8
+	andl	%ebp,%esi
+	xorl	%eax,%ebp
+	psrldq	$4,%xmm10
+	xorl	%eax,%esi
+	addl	%ecx,%ebx
+	pxor	%xmm2,%xmm6
+	rorl	$7,%edx
+	addl	%esi,%ebx
+	pxor	%xmm4,%xmm10
+	addl	36(%rsp),%eax
+	xorl	%ebp,%edx
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	pxor	%xmm10,%xmm6
+	andl	%edx,%edi
+	xorl	%ebp,%edx
+	movdqa	%xmm8,16(%rsp)
+	xorl	%ebp,%edi
+	addl	%ebx,%eax
+	movdqa	%xmm6,%xmm9
+	movdqa	%xmm6,%xmm10
+	rorl	$7,%ecx
+	addl	%edi,%eax
+	addl	40(%rsp),%ebp
+	xorl	%edx,%ecx
+	pslldq	$12,%xmm9
+	paddd	%xmm6,%xmm6
+	movl	%eax,%edi
+	roll	$5,%eax
+	andl	%ecx,%esi
+	xorl	%edx,%ecx
+	psrld	$31,%xmm10
+	xorl	%edx,%esi
+	addl	%eax,%ebp
+	movdqa	%xmm9,%xmm8
+	rorl	$7,%ebx
+	addl	%esi,%ebp
+	psrld	$30,%xmm9
+	por	%xmm10,%xmm6
+	addl	44(%rsp),%edx
+	xorl	%ecx,%ebx
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	pslld	$2,%xmm8
+	pxor	%xmm9,%xmm6
+	andl	%ebx,%edi
+	xorl	%ecx,%ebx
+	movdqa	16(%r11),%xmm9
+	xorl	%ecx,%edi
+	addl	%ebp,%edx
+	pxor	%xmm8,%xmm6
+	rorl	$7,%eax
+	addl	%edi,%edx
+	movdqa	%xmm4,%xmm7
+	addl	48(%rsp),%ecx
+	xorl	%ebx,%eax
+	movdqa	%xmm6,%xmm8
+.byte	102,15,58,15,251,8
+	movl	%edx,%edi
+	roll	$5,%edx
+	paddd	%xmm6,%xmm9
+	andl	%eax,%esi
+	xorl	%ebx,%eax
+	psrldq	$4,%xmm8
+	xorl	%ebx,%esi
+	addl	%edx,%ecx
+	pxor	%xmm3,%xmm7
+	rorl	$7,%ebp
+	addl	%esi,%ecx
+	pxor	%xmm5,%xmm8
+	addl	52(%rsp),%ebx
+	xorl	%eax,%ebp
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	pxor	%xmm8,%xmm7
+	andl	%ebp,%edi
+	xorl	%eax,%ebp
+	movdqa	%xmm9,32(%rsp)
+	xorl	%eax,%edi
+	addl	%ecx,%ebx
+	movdqa	%xmm7,%xmm10
+	movdqa	%xmm7,%xmm8
+	rorl	$7,%edx
+	addl	%edi,%ebx
+	addl	56(%rsp),%eax
+	xorl	%ebp,%edx
+	pslldq	$12,%xmm10
+	paddd	%xmm7,%xmm7
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	andl	%edx,%esi
+	xorl	%ebp,%edx
+	psrld	$31,%xmm8
+	xorl	%ebp,%esi
+	addl	%ebx,%eax
+	movdqa	%xmm10,%xmm9
+	rorl	$7,%ecx
+	addl	%esi,%eax
+	psrld	$30,%xmm10
+	por	%xmm8,%xmm7
+	addl	60(%rsp),%ebp
+	xorl	%edx,%ecx
+	movl	%eax,%esi
+	roll	$5,%eax
+	pslld	$2,%xmm9
+	pxor	%xmm10,%xmm7
+	andl	%ecx,%edi
+	xorl	%edx,%ecx
+	movdqa	16(%r11),%xmm10
+	xorl	%edx,%edi
+	addl	%eax,%ebp
+	pxor	%xmm9,%xmm7
+	rorl	$7,%ebx
+	addl	%edi,%ebp
+	movdqa	%xmm7,%xmm9
+	addl	0(%rsp),%edx
+	pxor	%xmm4,%xmm0
+.byte	102,68,15,58,15,206,8
+	xorl	%ecx,%ebx
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	pxor	%xmm1,%xmm0
+	andl	%ebx,%esi
+	xorl	%ecx,%ebx
+	movdqa	%xmm10,%xmm8
+	paddd	%xmm7,%xmm10
+	xorl	%ecx,%esi
+	addl	%ebp,%edx
+	pxor	%xmm9,%xmm0
+	rorl	$7,%eax
+	addl	%esi,%edx
+	addl	4(%rsp),%ecx
+	xorl	%ebx,%eax
+	movdqa	%xmm0,%xmm9
+	movdqa	%xmm10,48(%rsp)
+	movl	%edx,%esi
+	roll	$5,%edx
+	andl	%eax,%edi
+	xorl	%ebx,%eax
+	pslld	$2,%xmm0
+	xorl	%ebx,%edi
+	addl	%edx,%ecx
+	psrld	$30,%xmm9
+	rorl	$7,%ebp
+	addl	%edi,%ecx
+	addl	8(%rsp),%ebx
+	xorl	%eax,%ebp
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	por	%xmm9,%xmm0
+	andl	%ebp,%esi
+	xorl	%eax,%ebp
+	movdqa	%xmm0,%xmm10
+	xorl	%eax,%esi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%esi,%ebx
+	addl	12(%rsp),%eax
+	xorl	%ebp,%edx
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	andl	%edx,%edi
+	xorl	%ebp,%edx
+	xorl	%ebp,%edi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%edi,%eax
+	addl	16(%rsp),%ebp
+	pxor	%xmm5,%xmm1
+.byte	102,68,15,58,15,215,8
+	xorl	%edx,%esi
+	movl	%eax,%edi
+	roll	$5,%eax
+	pxor	%xmm2,%xmm1
+	xorl	%ecx,%esi
+	addl	%eax,%ebp
+	movdqa	%xmm8,%xmm9
+	paddd	%xmm0,%xmm8
+	rorl	$7,%ebx
+	addl	%esi,%ebp
+	pxor	%xmm10,%xmm1
+	addl	20(%rsp),%edx
+	xorl	%ecx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	movdqa	%xmm1,%xmm10
+	movdqa	%xmm8,0(%rsp)
+	xorl	%ebx,%edi
+	addl	%ebp,%edx
+	rorl	$7,%eax
+	addl	%edi,%edx
+	pslld	$2,%xmm1
+	addl	24(%rsp),%ecx
+	xorl	%ebx,%esi
+	psrld	$30,%xmm10
+	movl	%edx,%edi
+	roll	$5,%edx
+	xorl	%eax,%esi
+	addl	%edx,%ecx
+	rorl	$7,%ebp
+	addl	%esi,%ecx
+	por	%xmm10,%xmm1
+	addl	28(%rsp),%ebx
+	xorl	%eax,%edi
+	movdqa	%xmm1,%xmm8
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	xorl	%ebp,%edi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%edi,%ebx
+	addl	32(%rsp),%eax
+	pxor	%xmm6,%xmm2
+.byte	102,68,15,58,15,192,8
+	xorl	%ebp,%esi
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	pxor	%xmm3,%xmm2
+	xorl	%edx,%esi
+	addl	%ebx,%eax
+	movdqa	32(%r11),%xmm10
+	paddd	%xmm1,%xmm9
+	rorl	$7,%ecx
+	addl	%esi,%eax
+	pxor	%xmm8,%xmm2
+	addl	36(%rsp),%ebp
+	xorl	%edx,%edi
+	movl	%eax,%esi
+	roll	$5,%eax
+	movdqa	%xmm2,%xmm8
+	movdqa	%xmm9,16(%rsp)
+	xorl	%ecx,%edi
+	addl	%eax,%ebp
+	rorl	$7,%ebx
+	addl	%edi,%ebp
+	pslld	$2,%xmm2
+	addl	40(%rsp),%edx
+	xorl	%ecx,%esi
+	psrld	$30,%xmm8
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	xorl	%ebx,%esi
+	addl	%ebp,%edx
+	rorl	$7,%eax
+	addl	%esi,%edx
+	por	%xmm8,%xmm2
+	addl	44(%rsp),%ecx
+	xorl	%ebx,%edi
+	movdqa	%xmm2,%xmm9
+	movl	%edx,%esi
+	roll	$5,%edx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	rorl	$7,%ebp
+	addl	%edi,%ecx
+	addl	48(%rsp),%ebx
+	pxor	%xmm7,%xmm3
+.byte	102,68,15,58,15,201,8
+	xorl	%eax,%esi
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	pxor	%xmm4,%xmm3
+	xorl	%ebp,%esi
+	addl	%ecx,%ebx
+	movdqa	%xmm10,%xmm8
+	paddd	%xmm2,%xmm10
+	rorl	$7,%edx
+	addl	%esi,%ebx
+	pxor	%xmm9,%xmm3
+	addl	52(%rsp),%eax
+	xorl	%ebp,%edi
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	movdqa	%xmm3,%xmm9
+	movdqa	%xmm10,32(%rsp)
+	xorl	%edx,%edi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%edi,%eax
+	pslld	$2,%xmm3
+	addl	56(%rsp),%ebp
+	xorl	%edx,%esi
+	psrld	$30,%xmm9
+	movl	%eax,%edi
+	roll	$5,%eax
+	xorl	%ecx,%esi
+	addl	%eax,%ebp
+	rorl	$7,%ebx
+	addl	%esi,%ebp
+	por	%xmm9,%xmm3
+	addl	60(%rsp),%edx
+	xorl	%ecx,%edi
+	movdqa	%xmm3,%xmm10
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	xorl	%ebx,%edi
+	addl	%ebp,%edx
+	rorl	$7,%eax
+	addl	%edi,%edx
+	addl	0(%rsp),%ecx
+	pxor	%xmm0,%xmm4
+.byte	102,68,15,58,15,210,8
+	xorl	%ebx,%esi
+	movl	%edx,%edi
+	roll	$5,%edx
+	pxor	%xmm5,%xmm4
+	xorl	%eax,%esi
+	addl	%edx,%ecx
+	movdqa	%xmm8,%xmm9
+	paddd	%xmm3,%xmm8
+	rorl	$7,%ebp
+	addl	%esi,%ecx
+	pxor	%xmm10,%xmm4
+	addl	4(%rsp),%ebx
+	xorl	%eax,%edi
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	movdqa	%xmm4,%xmm10
+	movdqa	%xmm8,48(%rsp)
+	xorl	%ebp,%edi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%edi,%ebx
+	pslld	$2,%xmm4
+	addl	8(%rsp),%eax
+	xorl	%ebp,%esi
+	psrld	$30,%xmm10
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	xorl	%edx,%esi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%esi,%eax
+	por	%xmm10,%xmm4
+	addl	12(%rsp),%ebp
+	xorl	%edx,%edi
+	movdqa	%xmm4,%xmm8
+	movl	%eax,%esi
+	roll	$5,%eax
+	xorl	%ecx,%edi
+	addl	%eax,%ebp
+	rorl	$7,%ebx
+	addl	%edi,%ebp
+	addl	16(%rsp),%edx
+	pxor	%xmm1,%xmm5
+.byte	102,68,15,58,15,195,8
+	xorl	%ecx,%esi
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	pxor	%xmm6,%xmm5
+	xorl	%ebx,%esi
+	addl	%ebp,%edx
+	movdqa	%xmm9,%xmm10
+	paddd	%xmm4,%xmm9
+	rorl	$7,%eax
+	addl	%esi,%edx
+	pxor	%xmm8,%xmm5
+	addl	20(%rsp),%ecx
+	xorl	%ebx,%edi
+	movl	%edx,%esi
+	roll	$5,%edx
+	movdqa	%xmm5,%xmm8
+	movdqa	%xmm9,0(%rsp)
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	rorl	$7,%ebp
+	addl	%edi,%ecx
+	pslld	$2,%xmm5
+	addl	24(%rsp),%ebx
+	xorl	%eax,%esi
+	psrld	$30,%xmm8
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	xorl	%ebp,%esi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%esi,%ebx
+	por	%xmm8,%xmm5
+	addl	28(%rsp),%eax
+	xorl	%ebp,%edi
+	movdqa	%xmm5,%xmm9
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	xorl	%edx,%edi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%edi,%eax
+	movl	%ecx,%edi
+	pxor	%xmm2,%xmm6
+.byte	102,68,15,58,15,204,8
+	xorl	%edx,%ecx
+	addl	32(%rsp),%ebp
+	andl	%edx,%edi
+	pxor	%xmm7,%xmm6
+	andl	%ecx,%esi
+	rorl	$7,%ebx
+	movdqa	%xmm10,%xmm8
+	paddd	%xmm5,%xmm10
+	addl	%edi,%ebp
+	movl	%eax,%edi
+	pxor	%xmm9,%xmm6
+	roll	$5,%eax
+	addl	%esi,%ebp
+	xorl	%edx,%ecx
+	addl	%eax,%ebp
+	movdqa	%xmm6,%xmm9
+	movdqa	%xmm10,16(%rsp)
+	movl	%ebx,%esi
+	xorl	%ecx,%ebx
+	addl	36(%rsp),%edx
+	andl	%ecx,%esi
+	pslld	$2,%xmm6
+	andl	%ebx,%edi
+	rorl	$7,%eax
+	psrld	$30,%xmm9
+	addl	%esi,%edx
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	addl	%edi,%edx
+	xorl	%ecx,%ebx
+	addl	%ebp,%edx
+	por	%xmm9,%xmm6
+	movl	%eax,%edi
+	xorl	%ebx,%eax
+	movdqa	%xmm6,%xmm10
+	addl	40(%rsp),%ecx
+	andl	%ebx,%edi
+	andl	%eax,%esi
+	rorl	$7,%ebp
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$5,%edx
+	addl	%esi,%ecx
+	xorl	%ebx,%eax
+	addl	%edx,%ecx
+	movl	%ebp,%esi
+	xorl	%eax,%ebp
+	addl	44(%rsp),%ebx
+	andl	%eax,%esi
+	andl	%ebp,%edi
+	rorl	$7,%edx
+	addl	%esi,%ebx
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	addl	%edi,%ebx
+	xorl	%eax,%ebp
+	addl	%ecx,%ebx
+	movl	%edx,%edi
+	pxor	%xmm3,%xmm7
+.byte	102,68,15,58,15,213,8
+	xorl	%ebp,%edx
+	addl	48(%rsp),%eax
+	andl	%ebp,%edi
+	pxor	%xmm0,%xmm7
+	andl	%edx,%esi
+	rorl	$7,%ecx
+	movdqa	48(%r11),%xmm9
+	paddd	%xmm6,%xmm8
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	pxor	%xmm10,%xmm7
+	roll	$5,%ebx
+	addl	%esi,%eax
+	xorl	%ebp,%edx
+	addl	%ebx,%eax
+	movdqa	%xmm7,%xmm10
+	movdqa	%xmm8,32(%rsp)
+	movl	%ecx,%esi
+	xorl	%edx,%ecx
+	addl	52(%rsp),%ebp
+	andl	%edx,%esi
+	pslld	$2,%xmm7
+	andl	%ecx,%edi
+	rorl	$7,%ebx
+	psrld	$30,%xmm10
+	addl	%esi,%ebp
+	movl	%eax,%esi
+	roll	$5,%eax
+	addl	%edi,%ebp
+	xorl	%edx,%ecx
+	addl	%eax,%ebp
+	por	%xmm10,%xmm7
+	movl	%ebx,%edi
+	xorl	%ecx,%ebx
+	movdqa	%xmm7,%xmm8
+	addl	56(%rsp),%edx
+	andl	%ecx,%edi
+	andl	%ebx,%esi
+	rorl	$7,%eax
+	addl	%edi,%edx
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	addl	%esi,%edx
+	xorl	%ecx,%ebx
+	addl	%ebp,%edx
+	movl	%eax,%esi
+	xorl	%ebx,%eax
+	addl	60(%rsp),%ecx
+	andl	%ebx,%esi
+	andl	%eax,%edi
+	rorl	$7,%ebp
+	addl	%esi,%ecx
+	movl	%edx,%esi
+	roll	$5,%edx
+	addl	%edi,%ecx
+	xorl	%ebx,%eax
+	addl	%edx,%ecx
+	movl	%ebp,%edi
+	pxor	%xmm4,%xmm0
+.byte	102,68,15,58,15,198,8
+	xorl	%eax,%ebp
+	addl	0(%rsp),%ebx
+	andl	%eax,%edi
+	pxor	%xmm1,%xmm0
+	andl	%ebp,%esi
+	rorl	$7,%edx
+	movdqa	%xmm9,%xmm10
+	paddd	%xmm7,%xmm9
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	pxor	%xmm8,%xmm0
+	roll	$5,%ecx
+	addl	%esi,%ebx
+	xorl	%eax,%ebp
+	addl	%ecx,%ebx
+	movdqa	%xmm0,%xmm8
+	movdqa	%xmm9,48(%rsp)
+	movl	%edx,%esi
+	xorl	%ebp,%edx
+	addl	4(%rsp),%eax
+	andl	%ebp,%esi
+	pslld	$2,%xmm0
+	andl	%edx,%edi
+	rorl	$7,%ecx
+	psrld	$30,%xmm8
+	addl	%esi,%eax
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	addl	%edi,%eax
+	xorl	%ebp,%edx
+	addl	%ebx,%eax
+	por	%xmm8,%xmm0
+	movl	%ecx,%edi
+	xorl	%edx,%ecx
+	movdqa	%xmm0,%xmm9
+	addl	8(%rsp),%ebp
+	andl	%edx,%edi
+	andl	%ecx,%esi
+	rorl	$7,%ebx
+	addl	%edi,%ebp
+	movl	%eax,%edi
+	roll	$5,%eax
+	addl	%esi,%ebp
+	xorl	%edx,%ecx
+	addl	%eax,%ebp
+	movl	%ebx,%esi
+	xorl	%ecx,%ebx
+	addl	12(%rsp),%edx
+	andl	%ecx,%esi
+	andl	%ebx,%edi
+	rorl	$7,%eax
+	addl	%esi,%edx
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	addl	%edi,%edx
+	xorl	%ecx,%ebx
+	addl	%ebp,%edx
+	movl	%eax,%edi
+	pxor	%xmm5,%xmm1
+.byte	102,68,15,58,15,207,8
+	xorl	%ebx,%eax
+	addl	16(%rsp),%ecx
+	andl	%ebx,%edi
+	pxor	%xmm2,%xmm1
+	andl	%eax,%esi
+	rorl	$7,%ebp
+	movdqa	%xmm10,%xmm8
+	paddd	%xmm0,%xmm10
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	pxor	%xmm9,%xmm1
+	roll	$5,%edx
+	addl	%esi,%ecx
+	xorl	%ebx,%eax
+	addl	%edx,%ecx
+	movdqa	%xmm1,%xmm9
+	movdqa	%xmm10,0(%rsp)
+	movl	%ebp,%esi
+	xorl	%eax,%ebp
+	addl	20(%rsp),%ebx
+	andl	%eax,%esi
+	pslld	$2,%xmm1
+	andl	%ebp,%edi
+	rorl	$7,%edx
+	psrld	$30,%xmm9
+	addl	%esi,%ebx
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	addl	%edi,%ebx
+	xorl	%eax,%ebp
+	addl	%ecx,%ebx
+	por	%xmm9,%xmm1
+	movl	%edx,%edi
+	xorl	%ebp,%edx
+	movdqa	%xmm1,%xmm10
+	addl	24(%rsp),%eax
+	andl	%ebp,%edi
+	andl	%edx,%esi
+	rorl	$7,%ecx
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	addl	%esi,%eax
+	xorl	%ebp,%edx
+	addl	%ebx,%eax
+	movl	%ecx,%esi
+	xorl	%edx,%ecx
+	addl	28(%rsp),%ebp
+	andl	%edx,%esi
+	andl	%ecx,%edi
+	rorl	$7,%ebx
+	addl	%esi,%ebp
+	movl	%eax,%esi
+	roll	$5,%eax
+	addl	%edi,%ebp
+	xorl	%edx,%ecx
+	addl	%eax,%ebp
+	movl	%ebx,%edi
+	pxor	%xmm6,%xmm2
+.byte	102,68,15,58,15,208,8
+	xorl	%ecx,%ebx
+	addl	32(%rsp),%edx
+	andl	%ecx,%edi
+	pxor	%xmm3,%xmm2
+	andl	%ebx,%esi
+	rorl	$7,%eax
+	movdqa	%xmm8,%xmm9
+	paddd	%xmm1,%xmm8
+	addl	%edi,%edx
+	movl	%ebp,%edi
+	pxor	%xmm10,%xmm2
+	roll	$5,%ebp
+	addl	%esi,%edx
+	xorl	%ecx,%ebx
+	addl	%ebp,%edx
+	movdqa	%xmm2,%xmm10
+	movdqa	%xmm8,16(%rsp)
+	movl	%eax,%esi
+	xorl	%ebx,%eax
+	addl	36(%rsp),%ecx
+	andl	%ebx,%esi
+	pslld	$2,%xmm2
+	andl	%eax,%edi
+	rorl	$7,%ebp
+	psrld	$30,%xmm10
+	addl	%esi,%ecx
+	movl	%edx,%esi
+	roll	$5,%edx
+	addl	%edi,%ecx
+	xorl	%ebx,%eax
+	addl	%edx,%ecx
+	por	%xmm10,%xmm2
+	movl	%ebp,%edi
+	xorl	%eax,%ebp
+	movdqa	%xmm2,%xmm8
+	addl	40(%rsp),%ebx
+	andl	%eax,%edi
+	andl	%ebp,%esi
+	rorl	$7,%edx
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	addl	%esi,%ebx
+	xorl	%eax,%ebp
+	addl	%ecx,%ebx
+	movl	%edx,%esi
+	xorl	%ebp,%edx
+	addl	44(%rsp),%eax
+	andl	%ebp,%esi
+	andl	%edx,%edi
+	rorl	$7,%ecx
+	addl	%esi,%eax
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	addl	%edi,%eax
+	xorl	%ebp,%edx
+	addl	%ebx,%eax
+	addl	48(%rsp),%ebp
+	pxor	%xmm7,%xmm3
+.byte	102,68,15,58,15,193,8
+	xorl	%edx,%esi
+	movl	%eax,%edi
+	roll	$5,%eax
+	pxor	%xmm4,%xmm3
+	xorl	%ecx,%esi
+	addl	%eax,%ebp
+	movdqa	%xmm9,%xmm10
+	paddd	%xmm2,%xmm9
+	rorl	$7,%ebx
+	addl	%esi,%ebp
+	pxor	%xmm8,%xmm3
+	addl	52(%rsp),%edx
+	xorl	%ecx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	movdqa	%xmm3,%xmm8
+	movdqa	%xmm9,32(%rsp)
+	xorl	%ebx,%edi
+	addl	%ebp,%edx
+	rorl	$7,%eax
+	addl	%edi,%edx
+	pslld	$2,%xmm3
+	addl	56(%rsp),%ecx
+	xorl	%ebx,%esi
+	psrld	$30,%xmm8
+	movl	%edx,%edi
+	roll	$5,%edx
+	xorl	%eax,%esi
+	addl	%edx,%ecx
+	rorl	$7,%ebp
+	addl	%esi,%ecx
+	por	%xmm8,%xmm3
+	addl	60(%rsp),%ebx
+	xorl	%eax,%edi
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	xorl	%ebp,%edi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%edi,%ebx
+	addl	0(%rsp),%eax
+	paddd	%xmm3,%xmm10
+	xorl	%ebp,%esi
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	xorl	%edx,%esi
+	movdqa	%xmm10,48(%rsp)
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%esi,%eax
+	addl	4(%rsp),%ebp
+	xorl	%edx,%edi
+	movl	%eax,%esi
+	roll	$5,%eax
+	xorl	%ecx,%edi
+	addl	%eax,%ebp
+	rorl	$7,%ebx
+	addl	%edi,%ebp
+	addl	8(%rsp),%edx
+	xorl	%ecx,%esi
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	xorl	%ebx,%esi
+	addl	%ebp,%edx
+	rorl	$7,%eax
+	addl	%esi,%edx
+	addl	12(%rsp),%ecx
+	xorl	%ebx,%edi
+	movl	%edx,%esi
+	roll	$5,%edx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	rorl	$7,%ebp
+	addl	%edi,%ecx
+	cmpq	%r10,%r9
+	je	.Ldone_ssse3
+	movdqa	64(%r11),%xmm6
+	movdqa	0(%r11),%xmm9
+	movdqu	0(%r9),%xmm0
+	movdqu	16(%r9),%xmm1
+	movdqu	32(%r9),%xmm2
+	movdqu	48(%r9),%xmm3
+.byte	102,15,56,0,198
+	addq	$64,%r9
+	addl	16(%rsp),%ebx
+	xorl	%eax,%esi
+.byte	102,15,56,0,206
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	paddd	%xmm9,%xmm0
+	xorl	%ebp,%esi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%esi,%ebx
+	movdqa	%xmm0,0(%rsp)
+	addl	20(%rsp),%eax
+	xorl	%ebp,%edi
+	psubd	%xmm9,%xmm0
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	xorl	%edx,%edi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%edi,%eax
+	addl	24(%rsp),%ebp
+	xorl	%edx,%esi
+	movl	%eax,%edi
+	roll	$5,%eax
+	xorl	%ecx,%esi
+	addl	%eax,%ebp
+	rorl	$7,%ebx
+	addl	%esi,%ebp
+	addl	28(%rsp),%edx
+	xorl	%ecx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	xorl	%ebx,%edi
+	addl	%ebp,%edx
+	rorl	$7,%eax
+	addl	%edi,%edx
+	addl	32(%rsp),%ecx
+	xorl	%ebx,%esi
+.byte	102,15,56,0,214
+	movl	%edx,%edi
+	roll	$5,%edx
+	paddd	%xmm9,%xmm1
+	xorl	%eax,%esi
+	addl	%edx,%ecx
+	rorl	$7,%ebp
+	addl	%esi,%ecx
+	movdqa	%xmm1,16(%rsp)
+	addl	36(%rsp),%ebx
+	xorl	%eax,%edi
+	psubd	%xmm9,%xmm1
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	xorl	%ebp,%edi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%edi,%ebx
+	addl	40(%rsp),%eax
+	xorl	%ebp,%esi
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	xorl	%edx,%esi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%esi,%eax
+	addl	44(%rsp),%ebp
+	xorl	%edx,%edi
+	movl	%eax,%esi
+	roll	$5,%eax
+	xorl	%ecx,%edi
+	addl	%eax,%ebp
+	rorl	$7,%ebx
+	addl	%edi,%ebp
+	addl	48(%rsp),%edx
+	xorl	%ecx,%esi
+.byte	102,15,56,0,222
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	paddd	%xmm9,%xmm2
+	xorl	%ebx,%esi
+	addl	%ebp,%edx
+	rorl	$7,%eax
+	addl	%esi,%edx
+	movdqa	%xmm2,32(%rsp)
+	addl	52(%rsp),%ecx
+	xorl	%ebx,%edi
+	psubd	%xmm9,%xmm2
+	movl	%edx,%esi
+	roll	$5,%edx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	rorl	$7,%ebp
+	addl	%edi,%ecx
+	addl	56(%rsp),%ebx
+	xorl	%eax,%esi
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	xorl	%ebp,%esi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%esi,%ebx
+	addl	60(%rsp),%eax
+	xorl	%ebp,%edi
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	xorl	%edx,%edi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%edi,%eax
+	addl	0(%r8),%eax
+	addl	4(%r8),%esi
+	addl	8(%r8),%ecx
+	addl	12(%r8),%edx
+	movl	%eax,0(%r8)
+	addl	16(%r8),%ebp
+	movl	%esi,4(%r8)
+	movl	%esi,%ebx
+	movl	%ecx,8(%r8)
+	movl	%edx,12(%r8)
+	movl	%ebp,16(%r8)
+	jmp	.Loop_ssse3
+
+.p2align	4
+.Ldone_ssse3:
+	addl	16(%rsp),%ebx
+	xorl	%eax,%esi
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	xorl	%ebp,%esi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%esi,%ebx
+	addl	20(%rsp),%eax
+	xorl	%ebp,%edi
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	xorl	%edx,%edi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%edi,%eax
+	addl	24(%rsp),%ebp
+	xorl	%edx,%esi
+	movl	%eax,%edi
+	roll	$5,%eax
+	xorl	%ecx,%esi
+	addl	%eax,%ebp
+	rorl	$7,%ebx
+	addl	%esi,%ebp
+	addl	28(%rsp),%edx
+	xorl	%ecx,%edi
+	movl	%ebp,%esi
+	roll	$5,%ebp
+	xorl	%ebx,%edi
+	addl	%ebp,%edx
+	rorl	$7,%eax
+	addl	%edi,%edx
+	addl	32(%rsp),%ecx
+	xorl	%ebx,%esi
+	movl	%edx,%edi
+	roll	$5,%edx
+	xorl	%eax,%esi
+	addl	%edx,%ecx
+	rorl	$7,%ebp
+	addl	%esi,%ecx
+	addl	36(%rsp),%ebx
+	xorl	%eax,%edi
+	movl	%ecx,%esi
+	roll	$5,%ecx
+	xorl	%ebp,%edi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%edi,%ebx
+	addl	40(%rsp),%eax
+	xorl	%ebp,%esi
+	movl	%ebx,%edi
+	roll	$5,%ebx
+	xorl	%edx,%esi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%esi,%eax
+	addl	44(%rsp),%ebp
+	xorl	%edx,%edi
+	movl	%eax,%esi
+	roll	$5,%eax
+	xorl	%ecx,%edi
+	addl	%eax,%ebp
+	rorl	$7,%ebx
+	addl	%edi,%ebp
+	addl	48(%rsp),%edx
+	xorl	%ecx,%esi
+	movl	%ebp,%edi
+	roll	$5,%ebp
+	xorl	%ebx,%esi
+	addl	%ebp,%edx
+	rorl	$7,%eax
+	addl	%esi,%edx
+	addl	52(%rsp),%ecx
+	xorl	%ebx,%edi
+	movl	%edx,%esi
+	roll	$5,%edx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+	rorl	$7,%ebp
+	addl	%edi,%ecx
+	addl	56(%rsp),%ebx
+	xorl	%eax,%esi
+	movl	%ecx,%edi
+	roll	$5,%ecx
+	xorl	%ebp,%esi
+	addl	%ecx,%ebx
+	rorl	$7,%edx
+	addl	%esi,%ebx
+	addl	60(%rsp),%eax
+	xorl	%ebp,%edi
+	movl	%ebx,%esi
+	roll	$5,%ebx
+	xorl	%edx,%edi
+	addl	%ebx,%eax
+	rorl	$7,%ecx
+	addl	%edi,%eax
+	addl	0(%r8),%eax
+	addl	4(%r8),%esi
+	addl	8(%r8),%ecx
+	movl	%eax,0(%r8)
+	addl	12(%r8),%edx
+	movl	%esi,4(%r8)
+	addl	16(%r8),%ebp
+	movl	%ecx,8(%r8)
+	movl	%edx,12(%r8)
+	movl	%ebp,16(%r8)
+	movaps	64+0(%rsp),%xmm6
+	movaps	64+16(%rsp),%xmm7
+	movaps	64+32(%rsp),%xmm8
+	movaps	64+48(%rsp),%xmm9
+	movaps	64+64(%rsp),%xmm10
+	leaq	144(%rsp),%rsi
+	movq	0(%rsi),%r12
+	movq	8(%rsi),%rbp
+	movq	16(%rsi),%rbx
+	leaq	24(%rsi),%rsp
+.Lepilogue_ssse3:
+	movq	8(%rsp),%rdi
+	movq	16(%rsp),%rsi
+	retq
+.LSEH_end_sha1_block_data_order_ssse3:
+.p2align	6
+K_XX_XX:
+.long	0x5a827999,0x5a827999,0x5a827999,0x5a827999	
+.long	0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1	
+.long	0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc	
+.long	0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6	
+.long	0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f	
+.byte	83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align	6
+
+.def	se_handler;	.scl 3;	.type 32;	.endef
+.p2align	4
+se_handler:
+	pushq	%rsi
+	pushq	%rdi
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	pushfq
+	subq	$64,%rsp
+
+	movq	120(%r8),%rax
+	movq	248(%r8),%rbx
+
+	leaq	.Lprologue(%rip),%r10
+	cmpq	%r10,%rbx
+	jb	.Lcommon_seh_tail
+
+	movq	152(%r8),%rax
+
+	leaq	.Lepilogue(%rip),%r10
+	cmpq	%r10,%rbx
+	jae	.Lcommon_seh_tail
+
+	movq	64(%rax),%rax
+	leaq	32(%rax),%rax
+
+	movq	-8(%rax),%rbx
+	movq	-16(%rax),%rbp
+	movq	-24(%rax),%r12
+	movq	-32(%rax),%r13
+	movq	%rbx,144(%r8)
+	movq	%rbp,160(%r8)
+	movq	%r12,216(%r8)
+	movq	%r13,224(%r8)
+
+	jmp	.Lcommon_seh_tail
+
+
+.def	ssse3_handler;	.scl 3;	.type 32;	.endef
+.p2align	4
+ssse3_handler:
+	pushq	%rsi
+	pushq	%rdi
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	pushfq
+	subq	$64,%rsp
+
+	movq	120(%r8),%rax
+	movq	248(%r8),%rbx
+
+	movq	8(%r9),%rsi
+	movq	56(%r9),%r11
+
+	movl	0(%r11),%r10d
+	leaq	(%rsi,%r10,1),%r10
+	cmpq	%r10,%rbx
+	jb	.Lcommon_seh_tail
+
+	movq	152(%r8),%rax
+
+	movl	4(%r11),%r10d
+	leaq	(%rsi,%r10,1),%r10
+	cmpq	%r10,%rbx
+	jae	.Lcommon_seh_tail
+
+	leaq	64(%rax),%rsi
+	leaq	512(%r8),%rdi
+	movl	$10,%ecx
+.long	0xa548f3fc		
+	leaq	168(%rax),%rax
+
+	movq	-8(%rax),%rbx
+	movq	-16(%rax),%rbp
+	movq	-24(%rax),%r12
+	movq	%rbx,144(%r8)
+	movq	%rbp,160(%r8)
+	movq	%r12,216(%r8)
+
+.Lcommon_seh_tail:
+	movq	8(%rax),%rdi
+	movq	16(%rax),%rsi
+	movq	%rax,152(%r8)
+	movq	%rsi,168(%r8)
+	movq	%rdi,176(%r8)
+
+	movq	40(%r9),%rdi
+	movq	%r8,%rsi
+	movl	$154,%ecx
+.long	0xa548f3fc		
+
+	movq	%r9,%rsi
+	xorq	%rcx,%rcx
+	movq	8(%rsi),%rdx
+	movq	0(%rsi),%r8
+	movq	16(%rsi),%r9
+	movq	40(%rsi),%r10
+	leaq	56(%rsi),%r11
+	leaq	24(%rsi),%r12
+	movq	%r10,32(%rsp)
+	movq	%r11,40(%rsp)
+	movq	%r12,48(%rsp)
+	movq	%rcx,56(%rsp)
+	call	*__imp_RtlVirtualUnwind(%rip)
+
+	movl	$1,%eax
+	addq	$64,%rsp
+	popfq
+	popq	%r15
+	popq	%r14
+	popq	%r13
+	popq	%r12
+	popq	%rbp
+	popq	%rbx
+	popq	%rdi
+	popq	%rsi
+	retq
+
+
+.section	.pdata
+.p2align	2
+.rva	.LSEH_begin_sha1_block_data_order
+.rva	.LSEH_end_sha1_block_data_order
+.rva	.LSEH_info_sha1_block_data_order
+.rva	.LSEH_begin_sha1_block_data_order_ssse3
+.rva	.LSEH_end_sha1_block_data_order_ssse3
+.rva	.LSEH_info_sha1_block_data_order_ssse3
+.section	.xdata
+.p2align	3
+.LSEH_info_sha1_block_data_order:
+.byte	9,0,0,0
+.rva	se_handler
+.LSEH_info_sha1_block_data_order_ssse3:
+.byte	9,0,0,0
+.rva	ssse3_handler
+.rva	.Lprologue_ssse3,.Lepilogue_ssse3	
diff --git a/ext/libressl/crypto/sha/sha1_one.c b/ext/libressl/crypto/sha/sha1_one.c
new file mode 100644
index 0000000..57e5220
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha1_one.c
@@ -0,0 +1,81 @@
+/* $OpenBSD: sha1_one.c,v 1.12 2015/09/10 15:56:26 jsing Exp $ */
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <openssl/opensslconf.h>
+
+#include <openssl/crypto.h>
+#include <openssl/sha.h>
+
+#ifndef OPENSSL_NO_SHA1
+unsigned char *SHA1(const unsigned char *d, size_t n, unsigned char *md)
+	{
+	SHA_CTX c;
+	static unsigned char m[SHA_DIGEST_LENGTH];
+
+	if (md == NULL) md=m;
+	if (!SHA1_Init(&c))
+		return NULL;
+	SHA1_Update(&c,d,n);
+	SHA1_Final(md,&c);
+	explicit_bzero(&c,sizeof(c));
+	return(md);
+	}
+#endif
diff --git a/ext/libressl/crypto/sha/sha1dgst.c b/ext/libressl/crypto/sha/sha1dgst.c
new file mode 100644
index 0000000..0c3df49
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha1dgst.c
@@ -0,0 +1,72 @@
+/* $OpenBSD: sha1dgst.c,v 1.14 2015/09/13 21:09:56 doug Exp $ */
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+#include <openssl/opensslconf.h>
+
+#include <openssl/crypto.h>
+
+#if !defined(OPENSSL_NO_SHA1) && !defined(OPENSSL_NO_SHA)
+
+#include <openssl/opensslv.h>
+
+/* The implementation is in ../md32_common.h */
+
+#include "sha_locl.h"
+
+#endif
+
diff --git a/ext/libressl/crypto/sha/sha256-elf-armv4.S b/ext/libressl/crypto/sha/sha256-elf-armv4.S
new file mode 100644
index 0000000..9b155c7
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha256-elf-armv4.S
@@ -0,0 +1,1520 @@
+#include "arm_arch.h"
+
+.text
+.code	32
+
+.type	K256,%object
+.align	5
+K256:
+.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.size	K256,.-K256
+
+.global	sha256_block_data_order
+.type	sha256_block_data_order,%function
+sha256_block_data_order:
+	sub	r3,pc,#8		@ sha256_block_data_order
+	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
+	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
+	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
+	sub	r14,r3,#256		@ K256
+	sub	sp,sp,#16*4		@ alloca(X[16])
+.Loop:
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+	ldr	r3,[r1],#4
+#else
+	ldrb	r3,[r1,#3]			@ 0
+	ldrb	r12,[r1,#2]
+	ldrb	r2,[r1,#1]
+	ldrb	r0,[r1],#4
+	orr	r3,r3,r12,lsl#8
+	orr	r3,r3,r2,lsl#16
+	orr	r3,r3,r0,lsl#24
+#endif
+	mov	r0,r8,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r8,ror#11
+	eor	r2,r9,r10
+#if 0>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 0==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r8,ror#25	@ Sigma1(e)
+	and	r2,r2,r8
+	str	r3,[sp,#0*4]
+	add	r3,r3,r0
+	eor	r2,r2,r10			@ Ch(e,f,g)
+	add	r3,r3,r11
+	mov	r11,r4,ror#2
+	add	r3,r3,r2
+	eor	r11,r11,r4,ror#13
+	add	r3,r3,r12
+	eor	r11,r11,r4,ror#22		@ Sigma0(a)
+#if 0>=15
+	ldr	r1,[sp,#2*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r4,r5
+	and	r2,r4,r5
+	and	r0,r0,r6
+	add	r11,r11,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r7,r7,r3
+	add	r11,r11,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+	ldr	r3,[r1],#4
+#else
+	ldrb	r3,[r1,#3]			@ 1
+	ldrb	r12,[r1,#2]
+	ldrb	r2,[r1,#1]
+	ldrb	r0,[r1],#4
+	orr	r3,r3,r12,lsl#8
+	orr	r3,r3,r2,lsl#16
+	orr	r3,r3,r0,lsl#24
+#endif
+	mov	r0,r7,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r7,ror#11
+	eor	r2,r8,r9
+#if 1>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 1==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r7,ror#25	@ Sigma1(e)
+	and	r2,r2,r7
+	str	r3,[sp,#1*4]
+	add	r3,r3,r0
+	eor	r2,r2,r9			@ Ch(e,f,g)
+	add	r3,r3,r10
+	mov	r10,r11,ror#2
+	add	r3,r3,r2
+	eor	r10,r10,r11,ror#13
+	add	r3,r3,r12
+	eor	r10,r10,r11,ror#22		@ Sigma0(a)
+#if 1>=15
+	ldr	r1,[sp,#3*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r11,r4
+	and	r2,r11,r4
+	and	r0,r0,r5
+	add	r10,r10,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r6,r6,r3
+	add	r10,r10,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+	ldr	r3,[r1],#4
+#else
+	ldrb	r3,[r1,#3]			@ 2
+	ldrb	r12,[r1,#2]
+	ldrb	r2,[r1,#1]
+	ldrb	r0,[r1],#4
+	orr	r3,r3,r12,lsl#8
+	orr	r3,r3,r2,lsl#16
+	orr	r3,r3,r0,lsl#24
+#endif
+	mov	r0,r6,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r6,ror#11
+	eor	r2,r7,r8
+#if 2>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 2==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r6,ror#25	@ Sigma1(e)
+	and	r2,r2,r6
+	str	r3,[sp,#2*4]
+	add	r3,r3,r0
+	eor	r2,r2,r8			@ Ch(e,f,g)
+	add	r3,r3,r9
+	mov	r9,r10,ror#2
+	add	r3,r3,r2
+	eor	r9,r9,r10,ror#13
+	add	r3,r3,r12
+	eor	r9,r9,r10,ror#22		@ Sigma0(a)
+#if 2>=15
+	ldr	r1,[sp,#4*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r10,r11
+	and	r2,r10,r11
+	and	r0,r0,r4
+	add	r9,r9,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r5,r5,r3
+	add	r9,r9,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+	ldr	r3,[r1],#4
+#else
+	ldrb	r3,[r1,#3]			@ 3
+	ldrb	r12,[r1,#2]
+	ldrb	r2,[r1,#1]
+	ldrb	r0,[r1],#4
+	orr	r3,r3,r12,lsl#8
+	orr	r3,r3,r2,lsl#16
+	orr	r3,r3,r0,lsl#24
+#endif
+	mov	r0,r5,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r5,ror#11
+	eor	r2,r6,r7
+#if 3>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 3==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r5,ror#25	@ Sigma1(e)
+	and	r2,r2,r5
+	str	r3,[sp,#3*4]
+	add	r3,r3,r0
+	eor	r2,r2,r7			@ Ch(e,f,g)
+	add	r3,r3,r8
+	mov	r8,r9,ror#2
+	add	r3,r3,r2
+	eor	r8,r8,r9,ror#13
+	add	r3,r3,r12
+	eor	r8,r8,r9,ror#22		@ Sigma0(a)
+#if 3>=15
+	ldr	r1,[sp,#5*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r9,r10
+	and	r2,r9,r10
+	and	r0,r0,r11
+	add	r8,r8,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r4,r4,r3
+	add	r8,r8,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+	ldr	r3,[r1],#4
+#else
+	ldrb	r3,[r1,#3]			@ 4
+	ldrb	r12,[r1,#2]
+	ldrb	r2,[r1,#1]
+	ldrb	r0,[r1],#4
+	orr	r3,r3,r12,lsl#8
+	orr	r3,r3,r2,lsl#16
+	orr	r3,r3,r0,lsl#24
+#endif
+	mov	r0,r4,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r4,ror#11
+	eor	r2,r5,r6
+#if 4>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 4==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r4,ror#25	@ Sigma1(e)
+	and	r2,r2,r4
+	str	r3,[sp,#4*4]
+	add	r3,r3,r0
+	eor	r2,r2,r6			@ Ch(e,f,g)
+	add	r3,r3,r7
+	mov	r7,r8,ror#2
+	add	r3,r3,r2
+	eor	r7,r7,r8,ror#13
+	add	r3,r3,r12
+	eor	r7,r7,r8,ror#22		@ Sigma0(a)
+#if 4>=15
+	ldr	r1,[sp,#6*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r8,r9
+	and	r2,r8,r9
+	and	r0,r0,r10
+	add	r7,r7,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r11,r11,r3
+	add	r7,r7,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+	ldr	r3,[r1],#4
+#else
+	ldrb	r3,[r1,#3]			@ 5
+	ldrb	r12,[r1,#2]
+	ldrb	r2,[r1,#1]
+	ldrb	r0,[r1],#4
+	orr	r3,r3,r12,lsl#8
+	orr	r3,r3,r2,lsl#16
+	orr	r3,r3,r0,lsl#24
+#endif
+	mov	r0,r11,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r11,ror#11
+	eor	r2,r4,r5
+#if 5>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 5==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r11,ror#25	@ Sigma1(e)
+	and	r2,r2,r11
+	str	r3,[sp,#5*4]
+	add	r3,r3,r0
+	eor	r2,r2,r5			@ Ch(e,f,g)
+	add	r3,r3,r6
+	mov	r6,r7,ror#2
+	add	r3,r3,r2
+	eor	r6,r6,r7,ror#13
+	add	r3,r3,r12
+	eor	r6,r6,r7,ror#22		@ Sigma0(a)
+#if 5>=15
+	ldr	r1,[sp,#7*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r7,r8
+	and	r2,r7,r8
+	and	r0,r0,r9
+	add	r6,r6,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r10,r10,r3
+	add	r6,r6,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+	ldr	r3,[r1],#4
+#else
+	ldrb	r3,[r1,#3]			@ 6
+	ldrb	r12,[r1,#2]
+	ldrb	r2,[r1,#1]
+	ldrb	r0,[r1],#4
+	orr	r3,r3,r12,lsl#8
+	orr	r3,r3,r2,lsl#16
+	orr	r3,r3,r0,lsl#24
+#endif
+	mov	r0,r10,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r10,ror#11
+	eor	r2,r11,r4
+#if 6>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 6==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r10,ror#25	@ Sigma1(e)
+	and	r2,r2,r10
+	str	r3,[sp,#6*4]
+	add	r3,r3,r0
+	eor	r2,r2,r4			@ Ch(e,f,g)
+	add	r3,r3,r5
+	mov	r5,r6,ror#2
+	add	r3,r3,r2
+	eor	r5,r5,r6,ror#13
+	add	r3,r3,r12
+	eor	r5,r5,r6,ror#22		@ Sigma0(a)
+#if 6>=15
+	ldr	r1,[sp,#8*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r6,r7
+	and	r2,r6,r7
+	and	r0,r0,r8
+	add	r5,r5,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r9,r9,r3
+	add	r5,r5,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+	ldr	r3,[r1],#4
+#else
+	ldrb	r3,[r1,#3]			@ 7
+	ldrb	r12,[r1,#2]
+	ldrb	r2,[r1,#1]
+	ldrb	r0,[r1],#4
+	orr	r3,r3,r12,lsl#8
+	orr	r3,r3,r2,lsl#16
+	orr	r3,r3,r0,lsl#24
+#endif
+	mov	r0,r9,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r9,ror#11
+	eor	r2,r10,r11
+#if 7>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 7==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r9,ror#25	@ Sigma1(e)
+	and	r2,r2,r9
+	str	r3,[sp,#7*4]
+	add	r3,r3,r0
+	eor	r2,r2,r11			@ Ch(e,f,g)
+	add	r3,r3,r4
+	mov	r4,r5,ror#2
+	add	r3,r3,r2
+	eor	r4,r4,r5,ror#13
+	add	r3,r3,r12
+	eor	r4,r4,r5,ror#22		@ Sigma0(a)
+#if 7>=15
+	ldr	r1,[sp,#9*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r5,r6
+	and	r2,r5,r6
+	and	r0,r0,r7
+	add	r4,r4,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r8,r8,r3
+	add	r4,r4,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+	ldr	r3,[r1],#4
+#else
+	ldrb	r3,[r1,#3]			@ 8
+	ldrb	r12,[r1,#2]
+	ldrb	r2,[r1,#1]
+	ldrb	r0,[r1],#4
+	orr	r3,r3,r12,lsl#8
+	orr	r3,r3,r2,lsl#16
+	orr	r3,r3,r0,lsl#24
+#endif
+	mov	r0,r8,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r8,ror#11
+	eor	r2,r9,r10
+#if 8>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 8==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r8,ror#25	@ Sigma1(e)
+	and	r2,r2,r8
+	str	r3,[sp,#8*4]
+	add	r3,r3,r0
+	eor	r2,r2,r10			@ Ch(e,f,g)
+	add	r3,r3,r11
+	mov	r11,r4,ror#2
+	add	r3,r3,r2
+	eor	r11,r11,r4,ror#13
+	add	r3,r3,r12
+	eor	r11,r11,r4,ror#22		@ Sigma0(a)
+#if 8>=15
+	ldr	r1,[sp,#10*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r4,r5
+	and	r2,r4,r5
+	and	r0,r0,r6
+	add	r11,r11,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r7,r7,r3
+	add	r11,r11,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+	ldr	r3,[r1],#4
+#else
+	ldrb	r3,[r1,#3]			@ 9
+	ldrb	r12,[r1,#2]
+	ldrb	r2,[r1,#1]
+	ldrb	r0,[r1],#4
+	orr	r3,r3,r12,lsl#8
+	orr	r3,r3,r2,lsl#16
+	orr	r3,r3,r0,lsl#24
+#endif
+	mov	r0,r7,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r7,ror#11
+	eor	r2,r8,r9
+#if 9>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 9==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r7,ror#25	@ Sigma1(e)
+	and	r2,r2,r7
+	str	r3,[sp,#9*4]
+	add	r3,r3,r0
+	eor	r2,r2,r9			@ Ch(e,f,g)
+	add	r3,r3,r10
+	mov	r10,r11,ror#2
+	add	r3,r3,r2
+	eor	r10,r10,r11,ror#13
+	add	r3,r3,r12
+	eor	r10,r10,r11,ror#22		@ Sigma0(a)
+#if 9>=15
+	ldr	r1,[sp,#11*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r11,r4
+	and	r2,r11,r4
+	and	r0,r0,r5
+	add	r10,r10,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r6,r6,r3
+	add	r10,r10,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+	ldr	r3,[r1],#4
+#else
+	ldrb	r3,[r1,#3]			@ 10
+	ldrb	r12,[r1,#2]
+	ldrb	r2,[r1,#1]
+	ldrb	r0,[r1],#4
+	orr	r3,r3,r12,lsl#8
+	orr	r3,r3,r2,lsl#16
+	orr	r3,r3,r0,lsl#24
+#endif
+	mov	r0,r6,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r6,ror#11
+	eor	r2,r7,r8
+#if 10>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 10==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r6,ror#25	@ Sigma1(e)
+	and	r2,r2,r6
+	str	r3,[sp,#10*4]
+	add	r3,r3,r0
+	eor	r2,r2,r8			@ Ch(e,f,g)
+	add	r3,r3,r9
+	mov	r9,r10,ror#2
+	add	r3,r3,r2
+	eor	r9,r9,r10,ror#13
+	add	r3,r3,r12
+	eor	r9,r9,r10,ror#22		@ Sigma0(a)
+#if 10>=15
+	ldr	r1,[sp,#12*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r10,r11
+	and	r2,r10,r11
+	and	r0,r0,r4
+	add	r9,r9,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r5,r5,r3
+	add	r9,r9,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+	ldr	r3,[r1],#4
+#else
+	ldrb	r3,[r1,#3]			@ 11
+	ldrb	r12,[r1,#2]
+	ldrb	r2,[r1,#1]
+	ldrb	r0,[r1],#4
+	orr	r3,r3,r12,lsl#8
+	orr	r3,r3,r2,lsl#16
+	orr	r3,r3,r0,lsl#24
+#endif
+	mov	r0,r5,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r5,ror#11
+	eor	r2,r6,r7
+#if 11>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 11==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r5,ror#25	@ Sigma1(e)
+	and	r2,r2,r5
+	str	r3,[sp,#11*4]
+	add	r3,r3,r0
+	eor	r2,r2,r7			@ Ch(e,f,g)
+	add	r3,r3,r8
+	mov	r8,r9,ror#2
+	add	r3,r3,r2
+	eor	r8,r8,r9,ror#13
+	add	r3,r3,r12
+	eor	r8,r8,r9,ror#22		@ Sigma0(a)
+#if 11>=15
+	ldr	r1,[sp,#13*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r9,r10
+	and	r2,r9,r10
+	and	r0,r0,r11
+	add	r8,r8,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r4,r4,r3
+	add	r8,r8,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+	ldr	r3,[r1],#4
+#else
+	ldrb	r3,[r1,#3]			@ 12
+	ldrb	r12,[r1,#2]
+	ldrb	r2,[r1,#1]
+	ldrb	r0,[r1],#4
+	orr	r3,r3,r12,lsl#8
+	orr	r3,r3,r2,lsl#16
+	orr	r3,r3,r0,lsl#24
+#endif
+	mov	r0,r4,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r4,ror#11
+	eor	r2,r5,r6
+#if 12>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 12==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r4,ror#25	@ Sigma1(e)
+	and	r2,r2,r4
+	str	r3,[sp,#12*4]
+	add	r3,r3,r0
+	eor	r2,r2,r6			@ Ch(e,f,g)
+	add	r3,r3,r7
+	mov	r7,r8,ror#2
+	add	r3,r3,r2
+	eor	r7,r7,r8,ror#13
+	add	r3,r3,r12
+	eor	r7,r7,r8,ror#22		@ Sigma0(a)
+#if 12>=15
+	ldr	r1,[sp,#14*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r8,r9
+	and	r2,r8,r9
+	and	r0,r0,r10
+	add	r7,r7,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r11,r11,r3
+	add	r7,r7,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+	ldr	r3,[r1],#4
+#else
+	ldrb	r3,[r1,#3]			@ 13
+	ldrb	r12,[r1,#2]
+	ldrb	r2,[r1,#1]
+	ldrb	r0,[r1],#4
+	orr	r3,r3,r12,lsl#8
+	orr	r3,r3,r2,lsl#16
+	orr	r3,r3,r0,lsl#24
+#endif
+	mov	r0,r11,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r11,ror#11
+	eor	r2,r4,r5
+#if 13>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 13==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r11,ror#25	@ Sigma1(e)
+	and	r2,r2,r11
+	str	r3,[sp,#13*4]
+	add	r3,r3,r0
+	eor	r2,r2,r5			@ Ch(e,f,g)
+	add	r3,r3,r6
+	mov	r6,r7,ror#2
+	add	r3,r3,r2
+	eor	r6,r6,r7,ror#13
+	add	r3,r3,r12
+	eor	r6,r6,r7,ror#22		@ Sigma0(a)
+#if 13>=15
+	ldr	r1,[sp,#15*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r7,r8
+	and	r2,r7,r8
+	and	r0,r0,r9
+	add	r6,r6,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r10,r10,r3
+	add	r6,r6,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+	ldr	r3,[r1],#4
+#else
+	ldrb	r3,[r1,#3]			@ 14
+	ldrb	r12,[r1,#2]
+	ldrb	r2,[r1,#1]
+	ldrb	r0,[r1],#4
+	orr	r3,r3,r12,lsl#8
+	orr	r3,r3,r2,lsl#16
+	orr	r3,r3,r0,lsl#24
+#endif
+	mov	r0,r10,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r10,ror#11
+	eor	r2,r11,r4
+#if 14>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 14==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r10,ror#25	@ Sigma1(e)
+	and	r2,r2,r10
+	str	r3,[sp,#14*4]
+	add	r3,r3,r0
+	eor	r2,r2,r4			@ Ch(e,f,g)
+	add	r3,r3,r5
+	mov	r5,r6,ror#2
+	add	r3,r3,r2
+	eor	r5,r5,r6,ror#13
+	add	r3,r3,r12
+	eor	r5,r5,r6,ror#22		@ Sigma0(a)
+#if 14>=15
+	ldr	r1,[sp,#0*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r6,r7
+	and	r2,r6,r7
+	and	r0,r0,r8
+	add	r5,r5,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r9,r9,r3
+	add	r5,r5,r0
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+	ldr	r3,[r1],#4
+#else
+	ldrb	r3,[r1,#3]			@ 15
+	ldrb	r12,[r1,#2]
+	ldrb	r2,[r1,#1]
+	ldrb	r0,[r1],#4
+	orr	r3,r3,r12,lsl#8
+	orr	r3,r3,r2,lsl#16
+	orr	r3,r3,r0,lsl#24
+#endif
+	mov	r0,r9,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r9,ror#11
+	eor	r2,r10,r11
+#if 15>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 15==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r9,ror#25	@ Sigma1(e)
+	and	r2,r2,r9
+	str	r3,[sp,#15*4]
+	add	r3,r3,r0
+	eor	r2,r2,r11			@ Ch(e,f,g)
+	add	r3,r3,r4
+	mov	r4,r5,ror#2
+	add	r3,r3,r2
+	eor	r4,r4,r5,ror#13
+	add	r3,r3,r12
+	eor	r4,r4,r5,ror#22		@ Sigma0(a)
+#if 15>=15
+	ldr	r1,[sp,#1*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r5,r6
+	and	r2,r5,r6
+	and	r0,r0,r7
+	add	r4,r4,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r8,r8,r3
+	add	r4,r4,r0
+.Lrounds_16_xx:
+	@ ldr	r1,[sp,#1*4]		@ 16
+	ldr	r12,[sp,#14*4]
+	mov	r0,r1,ror#7
+	ldr	r3,[sp,#0*4]
+	eor	r0,r0,r1,ror#18
+	ldr	r2,[sp,#9*4]
+	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1])
+	mov	r1,r12,ror#17
+	add	r3,r3,r0
+	eor	r1,r1,r12,ror#19
+	add	r3,r3,r2
+	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14])
+	@ add	r3,r3,r1
+	mov	r0,r8,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r8,ror#11
+	eor	r2,r9,r10
+#if 16>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 16==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r8,ror#25	@ Sigma1(e)
+	and	r2,r2,r8
+	str	r3,[sp,#0*4]
+	add	r3,r3,r0
+	eor	r2,r2,r10			@ Ch(e,f,g)
+	add	r3,r3,r11
+	mov	r11,r4,ror#2
+	add	r3,r3,r2
+	eor	r11,r11,r4,ror#13
+	add	r3,r3,r12
+	eor	r11,r11,r4,ror#22		@ Sigma0(a)
+#if 16>=15
+	ldr	r1,[sp,#2*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r4,r5
+	and	r2,r4,r5
+	and	r0,r0,r6
+	add	r11,r11,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r7,r7,r3
+	add	r11,r11,r0
+	@ ldr	r1,[sp,#2*4]		@ 17
+	ldr	r12,[sp,#15*4]
+	mov	r0,r1,ror#7
+	ldr	r3,[sp,#1*4]
+	eor	r0,r0,r1,ror#18
+	ldr	r2,[sp,#10*4]
+	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1])
+	mov	r1,r12,ror#17
+	add	r3,r3,r0
+	eor	r1,r1,r12,ror#19
+	add	r3,r3,r2
+	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14])
+	@ add	r3,r3,r1
+	mov	r0,r7,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r7,ror#11
+	eor	r2,r8,r9
+#if 17>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 17==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r7,ror#25	@ Sigma1(e)
+	and	r2,r2,r7
+	str	r3,[sp,#1*4]
+	add	r3,r3,r0
+	eor	r2,r2,r9			@ Ch(e,f,g)
+	add	r3,r3,r10
+	mov	r10,r11,ror#2
+	add	r3,r3,r2
+	eor	r10,r10,r11,ror#13
+	add	r3,r3,r12
+	eor	r10,r10,r11,ror#22		@ Sigma0(a)
+#if 17>=15
+	ldr	r1,[sp,#3*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r11,r4
+	and	r2,r11,r4
+	and	r0,r0,r5
+	add	r10,r10,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r6,r6,r3
+	add	r10,r10,r0
+	@ ldr	r1,[sp,#3*4]		@ 18
+	ldr	r12,[sp,#0*4]
+	mov	r0,r1,ror#7
+	ldr	r3,[sp,#2*4]
+	eor	r0,r0,r1,ror#18
+	ldr	r2,[sp,#11*4]
+	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1])
+	mov	r1,r12,ror#17
+	add	r3,r3,r0
+	eor	r1,r1,r12,ror#19
+	add	r3,r3,r2
+	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14])
+	@ add	r3,r3,r1
+	mov	r0,r6,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r6,ror#11
+	eor	r2,r7,r8
+#if 18>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 18==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r6,ror#25	@ Sigma1(e)
+	and	r2,r2,r6
+	str	r3,[sp,#2*4]
+	add	r3,r3,r0
+	eor	r2,r2,r8			@ Ch(e,f,g)
+	add	r3,r3,r9
+	mov	r9,r10,ror#2
+	add	r3,r3,r2
+	eor	r9,r9,r10,ror#13
+	add	r3,r3,r12
+	eor	r9,r9,r10,ror#22		@ Sigma0(a)
+#if 18>=15
+	ldr	r1,[sp,#4*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r10,r11
+	and	r2,r10,r11
+	and	r0,r0,r4
+	add	r9,r9,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r5,r5,r3
+	add	r9,r9,r0
+	@ ldr	r1,[sp,#4*4]		@ 19
+	ldr	r12,[sp,#1*4]
+	mov	r0,r1,ror#7
+	ldr	r3,[sp,#3*4]
+	eor	r0,r0,r1,ror#18
+	ldr	r2,[sp,#12*4]
+	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1])
+	mov	r1,r12,ror#17
+	add	r3,r3,r0
+	eor	r1,r1,r12,ror#19
+	add	r3,r3,r2
+	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14])
+	@ add	r3,r3,r1
+	mov	r0,r5,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r5,ror#11
+	eor	r2,r6,r7
+#if 19>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 19==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r5,ror#25	@ Sigma1(e)
+	and	r2,r2,r5
+	str	r3,[sp,#3*4]
+	add	r3,r3,r0
+	eor	r2,r2,r7			@ Ch(e,f,g)
+	add	r3,r3,r8
+	mov	r8,r9,ror#2
+	add	r3,r3,r2
+	eor	r8,r8,r9,ror#13
+	add	r3,r3,r12
+	eor	r8,r8,r9,ror#22		@ Sigma0(a)
+#if 19>=15
+	ldr	r1,[sp,#5*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r9,r10
+	and	r2,r9,r10
+	and	r0,r0,r11
+	add	r8,r8,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r4,r4,r3
+	add	r8,r8,r0
+	@ ldr	r1,[sp,#5*4]		@ 20
+	ldr	r12,[sp,#2*4]
+	mov	r0,r1,ror#7
+	ldr	r3,[sp,#4*4]
+	eor	r0,r0,r1,ror#18
+	ldr	r2,[sp,#13*4]
+	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1])
+	mov	r1,r12,ror#17
+	add	r3,r3,r0
+	eor	r1,r1,r12,ror#19
+	add	r3,r3,r2
+	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14])
+	@ add	r3,r3,r1
+	mov	r0,r4,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r4,ror#11
+	eor	r2,r5,r6
+#if 20>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 20==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r4,ror#25	@ Sigma1(e)
+	and	r2,r2,r4
+	str	r3,[sp,#4*4]
+	add	r3,r3,r0
+	eor	r2,r2,r6			@ Ch(e,f,g)
+	add	r3,r3,r7
+	mov	r7,r8,ror#2
+	add	r3,r3,r2
+	eor	r7,r7,r8,ror#13
+	add	r3,r3,r12
+	eor	r7,r7,r8,ror#22		@ Sigma0(a)
+#if 20>=15
+	ldr	r1,[sp,#6*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r8,r9
+	and	r2,r8,r9
+	and	r0,r0,r10
+	add	r7,r7,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r11,r11,r3
+	add	r7,r7,r0
+	@ ldr	r1,[sp,#6*4]		@ 21
+	ldr	r12,[sp,#3*4]
+	mov	r0,r1,ror#7
+	ldr	r3,[sp,#5*4]
+	eor	r0,r0,r1,ror#18
+	ldr	r2,[sp,#14*4]
+	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1])
+	mov	r1,r12,ror#17
+	add	r3,r3,r0
+	eor	r1,r1,r12,ror#19
+	add	r3,r3,r2
+	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14])
+	@ add	r3,r3,r1
+	mov	r0,r11,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r11,ror#11
+	eor	r2,r4,r5
+#if 21>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 21==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r11,ror#25	@ Sigma1(e)
+	and	r2,r2,r11
+	str	r3,[sp,#5*4]
+	add	r3,r3,r0
+	eor	r2,r2,r5			@ Ch(e,f,g)
+	add	r3,r3,r6
+	mov	r6,r7,ror#2
+	add	r3,r3,r2
+	eor	r6,r6,r7,ror#13
+	add	r3,r3,r12
+	eor	r6,r6,r7,ror#22		@ Sigma0(a)
+#if 21>=15
+	ldr	r1,[sp,#7*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r7,r8
+	and	r2,r7,r8
+	and	r0,r0,r9
+	add	r6,r6,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r10,r10,r3
+	add	r6,r6,r0
+	@ ldr	r1,[sp,#7*4]		@ 22
+	ldr	r12,[sp,#4*4]
+	mov	r0,r1,ror#7
+	ldr	r3,[sp,#6*4]
+	eor	r0,r0,r1,ror#18
+	ldr	r2,[sp,#15*4]
+	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1])
+	mov	r1,r12,ror#17
+	add	r3,r3,r0
+	eor	r1,r1,r12,ror#19
+	add	r3,r3,r2
+	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14])
+	@ add	r3,r3,r1
+	mov	r0,r10,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r10,ror#11
+	eor	r2,r11,r4
+#if 22>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 22==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r10,ror#25	@ Sigma1(e)
+	and	r2,r2,r10
+	str	r3,[sp,#6*4]
+	add	r3,r3,r0
+	eor	r2,r2,r4			@ Ch(e,f,g)
+	add	r3,r3,r5
+	mov	r5,r6,ror#2
+	add	r3,r3,r2
+	eor	r5,r5,r6,ror#13
+	add	r3,r3,r12
+	eor	r5,r5,r6,ror#22		@ Sigma0(a)
+#if 22>=15
+	ldr	r1,[sp,#8*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r6,r7
+	and	r2,r6,r7
+	and	r0,r0,r8
+	add	r5,r5,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r9,r9,r3
+	add	r5,r5,r0
+	@ ldr	r1,[sp,#8*4]		@ 23
+	ldr	r12,[sp,#5*4]
+	mov	r0,r1,ror#7
+	ldr	r3,[sp,#7*4]
+	eor	r0,r0,r1,ror#18
+	ldr	r2,[sp,#0*4]
+	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1])
+	mov	r1,r12,ror#17
+	add	r3,r3,r0
+	eor	r1,r1,r12,ror#19
+	add	r3,r3,r2
+	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14])
+	@ add	r3,r3,r1
+	mov	r0,r9,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r9,ror#11
+	eor	r2,r10,r11
+#if 23>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 23==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r9,ror#25	@ Sigma1(e)
+	and	r2,r2,r9
+	str	r3,[sp,#7*4]
+	add	r3,r3,r0
+	eor	r2,r2,r11			@ Ch(e,f,g)
+	add	r3,r3,r4
+	mov	r4,r5,ror#2
+	add	r3,r3,r2
+	eor	r4,r4,r5,ror#13
+	add	r3,r3,r12
+	eor	r4,r4,r5,ror#22		@ Sigma0(a)
+#if 23>=15
+	ldr	r1,[sp,#9*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r5,r6
+	and	r2,r5,r6
+	and	r0,r0,r7
+	add	r4,r4,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r8,r8,r3
+	add	r4,r4,r0
+	@ ldr	r1,[sp,#9*4]		@ 24
+	ldr	r12,[sp,#6*4]
+	mov	r0,r1,ror#7
+	ldr	r3,[sp,#8*4]
+	eor	r0,r0,r1,ror#18
+	ldr	r2,[sp,#1*4]
+	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1])
+	mov	r1,r12,ror#17
+	add	r3,r3,r0
+	eor	r1,r1,r12,ror#19
+	add	r3,r3,r2
+	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14])
+	@ add	r3,r3,r1
+	mov	r0,r8,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r8,ror#11
+	eor	r2,r9,r10
+#if 24>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 24==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r8,ror#25	@ Sigma1(e)
+	and	r2,r2,r8
+	str	r3,[sp,#8*4]
+	add	r3,r3,r0
+	eor	r2,r2,r10			@ Ch(e,f,g)
+	add	r3,r3,r11
+	mov	r11,r4,ror#2
+	add	r3,r3,r2
+	eor	r11,r11,r4,ror#13
+	add	r3,r3,r12
+	eor	r11,r11,r4,ror#22		@ Sigma0(a)
+#if 24>=15
+	ldr	r1,[sp,#10*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r4,r5
+	and	r2,r4,r5
+	and	r0,r0,r6
+	add	r11,r11,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r7,r7,r3
+	add	r11,r11,r0
+	@ ldr	r1,[sp,#10*4]		@ 25
+	ldr	r12,[sp,#7*4]
+	mov	r0,r1,ror#7
+	ldr	r3,[sp,#9*4]
+	eor	r0,r0,r1,ror#18
+	ldr	r2,[sp,#2*4]
+	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1])
+	mov	r1,r12,ror#17
+	add	r3,r3,r0
+	eor	r1,r1,r12,ror#19
+	add	r3,r3,r2
+	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14])
+	@ add	r3,r3,r1
+	mov	r0,r7,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r7,ror#11
+	eor	r2,r8,r9
+#if 25>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 25==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r7,ror#25	@ Sigma1(e)
+	and	r2,r2,r7
+	str	r3,[sp,#9*4]
+	add	r3,r3,r0
+	eor	r2,r2,r9			@ Ch(e,f,g)
+	add	r3,r3,r10
+	mov	r10,r11,ror#2
+	add	r3,r3,r2
+	eor	r10,r10,r11,ror#13
+	add	r3,r3,r12
+	eor	r10,r10,r11,ror#22		@ Sigma0(a)
+#if 25>=15
+	ldr	r1,[sp,#11*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r11,r4
+	and	r2,r11,r4
+	and	r0,r0,r5
+	add	r10,r10,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r6,r6,r3
+	add	r10,r10,r0
+	@ ldr	r1,[sp,#11*4]		@ 26
+	ldr	r12,[sp,#8*4]
+	mov	r0,r1,ror#7
+	ldr	r3,[sp,#10*4]
+	eor	r0,r0,r1,ror#18
+	ldr	r2,[sp,#3*4]
+	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1])
+	mov	r1,r12,ror#17
+	add	r3,r3,r0
+	eor	r1,r1,r12,ror#19
+	add	r3,r3,r2
+	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14])
+	@ add	r3,r3,r1
+	mov	r0,r6,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r6,ror#11
+	eor	r2,r7,r8
+#if 26>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 26==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r6,ror#25	@ Sigma1(e)
+	and	r2,r2,r6
+	str	r3,[sp,#10*4]
+	add	r3,r3,r0
+	eor	r2,r2,r8			@ Ch(e,f,g)
+	add	r3,r3,r9
+	mov	r9,r10,ror#2
+	add	r3,r3,r2
+	eor	r9,r9,r10,ror#13
+	add	r3,r3,r12
+	eor	r9,r9,r10,ror#22		@ Sigma0(a)
+#if 26>=15
+	ldr	r1,[sp,#12*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r10,r11
+	and	r2,r10,r11
+	and	r0,r0,r4
+	add	r9,r9,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r5,r5,r3
+	add	r9,r9,r0
+	@ ldr	r1,[sp,#12*4]		@ 27
+	ldr	r12,[sp,#9*4]
+	mov	r0,r1,ror#7
+	ldr	r3,[sp,#11*4]
+	eor	r0,r0,r1,ror#18
+	ldr	r2,[sp,#4*4]
+	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1])
+	mov	r1,r12,ror#17
+	add	r3,r3,r0
+	eor	r1,r1,r12,ror#19
+	add	r3,r3,r2
+	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14])
+	@ add	r3,r3,r1
+	mov	r0,r5,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r5,ror#11
+	eor	r2,r6,r7
+#if 27>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 27==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r5,ror#25	@ Sigma1(e)
+	and	r2,r2,r5
+	str	r3,[sp,#11*4]
+	add	r3,r3,r0
+	eor	r2,r2,r7			@ Ch(e,f,g)
+	add	r3,r3,r8
+	mov	r8,r9,ror#2
+	add	r3,r3,r2
+	eor	r8,r8,r9,ror#13
+	add	r3,r3,r12
+	eor	r8,r8,r9,ror#22		@ Sigma0(a)
+#if 27>=15
+	ldr	r1,[sp,#13*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r9,r10
+	and	r2,r9,r10
+	and	r0,r0,r11
+	add	r8,r8,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r4,r4,r3
+	add	r8,r8,r0
+	@ ldr	r1,[sp,#13*4]		@ 28
+	ldr	r12,[sp,#10*4]
+	mov	r0,r1,ror#7
+	ldr	r3,[sp,#12*4]
+	eor	r0,r0,r1,ror#18
+	ldr	r2,[sp,#5*4]
+	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1])
+	mov	r1,r12,ror#17
+	add	r3,r3,r0
+	eor	r1,r1,r12,ror#19
+	add	r3,r3,r2
+	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14])
+	@ add	r3,r3,r1
+	mov	r0,r4,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r4,ror#11
+	eor	r2,r5,r6
+#if 28>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 28==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r4,ror#25	@ Sigma1(e)
+	and	r2,r2,r4
+	str	r3,[sp,#12*4]
+	add	r3,r3,r0
+	eor	r2,r2,r6			@ Ch(e,f,g)
+	add	r3,r3,r7
+	mov	r7,r8,ror#2
+	add	r3,r3,r2
+	eor	r7,r7,r8,ror#13
+	add	r3,r3,r12
+	eor	r7,r7,r8,ror#22		@ Sigma0(a)
+#if 28>=15
+	ldr	r1,[sp,#14*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r8,r9
+	and	r2,r8,r9
+	and	r0,r0,r10
+	add	r7,r7,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r11,r11,r3
+	add	r7,r7,r0
+	@ ldr	r1,[sp,#14*4]		@ 29
+	ldr	r12,[sp,#11*4]
+	mov	r0,r1,ror#7
+	ldr	r3,[sp,#13*4]
+	eor	r0,r0,r1,ror#18
+	ldr	r2,[sp,#6*4]
+	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1])
+	mov	r1,r12,ror#17
+	add	r3,r3,r0
+	eor	r1,r1,r12,ror#19
+	add	r3,r3,r2
+	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14])
+	@ add	r3,r3,r1
+	mov	r0,r11,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r11,ror#11
+	eor	r2,r4,r5
+#if 29>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 29==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r11,ror#25	@ Sigma1(e)
+	and	r2,r2,r11
+	str	r3,[sp,#13*4]
+	add	r3,r3,r0
+	eor	r2,r2,r5			@ Ch(e,f,g)
+	add	r3,r3,r6
+	mov	r6,r7,ror#2
+	add	r3,r3,r2
+	eor	r6,r6,r7,ror#13
+	add	r3,r3,r12
+	eor	r6,r6,r7,ror#22		@ Sigma0(a)
+#if 29>=15
+	ldr	r1,[sp,#15*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r7,r8
+	and	r2,r7,r8
+	and	r0,r0,r9
+	add	r6,r6,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r10,r10,r3
+	add	r6,r6,r0
+	@ ldr	r1,[sp,#15*4]		@ 30
+	ldr	r12,[sp,#12*4]
+	mov	r0,r1,ror#7
+	ldr	r3,[sp,#14*4]
+	eor	r0,r0,r1,ror#18
+	ldr	r2,[sp,#7*4]
+	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1])
+	mov	r1,r12,ror#17
+	add	r3,r3,r0
+	eor	r1,r1,r12,ror#19
+	add	r3,r3,r2
+	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14])
+	@ add	r3,r3,r1
+	mov	r0,r10,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r10,ror#11
+	eor	r2,r11,r4
+#if 30>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 30==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r10,ror#25	@ Sigma1(e)
+	and	r2,r2,r10
+	str	r3,[sp,#14*4]
+	add	r3,r3,r0
+	eor	r2,r2,r4			@ Ch(e,f,g)
+	add	r3,r3,r5
+	mov	r5,r6,ror#2
+	add	r3,r3,r2
+	eor	r5,r5,r6,ror#13
+	add	r3,r3,r12
+	eor	r5,r5,r6,ror#22		@ Sigma0(a)
+#if 30>=15
+	ldr	r1,[sp,#0*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r6,r7
+	and	r2,r6,r7
+	and	r0,r0,r8
+	add	r5,r5,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r9,r9,r3
+	add	r5,r5,r0
+	@ ldr	r1,[sp,#0*4]		@ 31
+	ldr	r12,[sp,#13*4]
+	mov	r0,r1,ror#7
+	ldr	r3,[sp,#15*4]
+	eor	r0,r0,r1,ror#18
+	ldr	r2,[sp,#8*4]
+	eor	r0,r0,r1,lsr#3	@ sigma0(X[i+1])
+	mov	r1,r12,ror#17
+	add	r3,r3,r0
+	eor	r1,r1,r12,ror#19
+	add	r3,r3,r2
+	eor	r1,r1,r12,lsr#10	@ sigma1(X[i+14])
+	@ add	r3,r3,r1
+	mov	r0,r9,ror#6
+	ldr	r12,[r14],#4			@ *K256++
+	eor	r0,r0,r9,ror#11
+	eor	r2,r10,r11
+#if 31>=16
+	add	r3,r3,r1			@ from BODY_16_xx
+#elif __ARM_ARCH__>=7 && defined(__ARMEL__) && !defined(__STRICT_ALIGNMENT)
+	rev	r3,r3
+#endif
+#if 31==15
+	str	r1,[sp,#17*4]			@ leave room for r1
+#endif
+	eor	r0,r0,r9,ror#25	@ Sigma1(e)
+	and	r2,r2,r9
+	str	r3,[sp,#15*4]
+	add	r3,r3,r0
+	eor	r2,r2,r11			@ Ch(e,f,g)
+	add	r3,r3,r4
+	mov	r4,r5,ror#2
+	add	r3,r3,r2
+	eor	r4,r4,r5,ror#13
+	add	r3,r3,r12
+	eor	r4,r4,r5,ror#22		@ Sigma0(a)
+#if 31>=15
+	ldr	r1,[sp,#1*4]		@ from BODY_16_xx
+#endif
+	orr	r0,r5,r6
+	and	r2,r5,r6
+	and	r0,r0,r7
+	add	r4,r4,r3
+	orr	r0,r0,r2			@ Maj(a,b,c)
+	add	r8,r8,r3
+	add	r4,r4,r0
+	and	r12,r12,#0xff
+	cmp	r12,#0xf2
+	bne	.Lrounds_16_xx
+
+	ldr	r3,[sp,#16*4]		@ pull ctx
+	ldr	r0,[r3,#0]
+	ldr	r2,[r3,#4]
+	ldr	r12,[r3,#8]
+	add	r4,r4,r0
+	ldr	r0,[r3,#12]
+	add	r5,r5,r2
+	ldr	r2,[r3,#16]
+	add	r6,r6,r12
+	ldr	r12,[r3,#20]
+	add	r7,r7,r0
+	ldr	r0,[r3,#24]
+	add	r8,r8,r2
+	ldr	r2,[r3,#28]
+	add	r9,r9,r12
+	ldr	r1,[sp,#17*4]		@ pull inp
+	ldr	r12,[sp,#18*4]		@ pull inp+len
+	add	r10,r10,r0
+	add	r11,r11,r2
+	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
+	cmp	r1,r12
+	sub	r14,r14,#256	@ rewind Ktbl
+	bne	.Loop
+
+	add	sp,sp,#19*4	@ destroy frame
+#if __ARM_ARCH__>=5
+	ldmia	sp!,{r4-r11,pc}
+#else
+	ldmia	sp!,{r4-r11,lr}
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
+#endif
+.size   sha256_block_data_order,.-sha256_block_data_order
+.asciz  "SHA256 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
+.align	2
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/ext/libressl/crypto/sha/sha256-elf-x86_64.S b/ext/libressl/crypto/sha/sha256-elf-x86_64.S
new file mode 100644
index 0000000..9eea6a7
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha256-elf-x86_64.S
@@ -0,0 +1,1782 @@
+#include "x86_arch.h"
+.text	
+
+.globl	sha256_block_data_order
+.type	sha256_block_data_order,@function
+.align	16
+sha256_block_data_order:
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	movq	%rsp,%r11
+	shlq	$4,%rdx
+	subq	$64+32,%rsp
+	leaq	(%rsi,%rdx,4),%rdx
+	andq	$-64,%rsp
+	movq	%rdi,64+0(%rsp)
+	movq	%rsi,64+8(%rsp)
+	movq	%rdx,64+16(%rsp)
+	movq	%r11,64+24(%rsp)
+.Lprologue:
+
+	leaq	K256(%rip),%rbp
+
+	movl	0(%rdi),%eax
+	movl	4(%rdi),%ebx
+	movl	8(%rdi),%ecx
+	movl	12(%rdi),%edx
+	movl	16(%rdi),%r8d
+	movl	20(%rdi),%r9d
+	movl	24(%rdi),%r10d
+	movl	28(%rdi),%r11d
+	jmp	.Lloop
+
+.align	16
+.Lloop:
+	xorq	%rdi,%rdi
+	movl	0(%rsi),%r12d
+	movl	%r8d,%r13d
+	movl	%eax,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r9d,%r15d
+	movl	%r12d,0(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r11d,%r12d
+	xorl	%eax,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r8d,%r15d
+	movl	%ebx,%r11d
+
+	rorl	$11,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r15d
+
+	xorl	%ecx,%r11d
+	xorl	%eax,%r14d
+	addl	%r15d,%r12d
+	movl	%ebx,%r15d
+
+	rorl	$6,%r13d
+	andl	%eax,%r11d
+	andl	%ecx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r11d
+
+	addl	%r12d,%edx
+	addl	%r12d,%r11d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r11d
+
+	movl	4(%rsi),%r12d
+	movl	%edx,%r13d
+	movl	%r11d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r8d,%r15d
+	movl	%r12d,4(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r10d,%r12d
+	xorl	%r11d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%edx,%r15d
+	movl	%eax,%r10d
+
+	rorl	$11,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r15d
+
+	xorl	%ebx,%r10d
+	xorl	%r11d,%r14d
+	addl	%r15d,%r12d
+	movl	%eax,%r15d
+
+	rorl	$6,%r13d
+	andl	%r11d,%r10d
+	andl	%ebx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r10d
+
+	addl	%r12d,%ecx
+	addl	%r12d,%r10d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r10d
+
+	movl	8(%rsi),%r12d
+	movl	%ecx,%r13d
+	movl	%r10d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%edx,%r15d
+	movl	%r12d,8(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r9d,%r12d
+	xorl	%r10d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%ecx,%r15d
+	movl	%r11d,%r9d
+
+	rorl	$11,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r15d
+
+	xorl	%eax,%r9d
+	xorl	%r10d,%r14d
+	addl	%r15d,%r12d
+	movl	%r11d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r10d,%r9d
+	andl	%eax,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r9d
+
+	addl	%r12d,%ebx
+	addl	%r12d,%r9d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r9d
+
+	movl	12(%rsi),%r12d
+	movl	%ebx,%r13d
+	movl	%r9d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%ecx,%r15d
+	movl	%r12d,12(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r15d
+
+	rorl	$5,%r13d
+	addl	%r8d,%r12d
+	xorl	%r9d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%ebx,%r15d
+	movl	%r10d,%r8d
+
+	rorl	$11,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r15d
+
+	xorl	%r11d,%r8d
+	xorl	%r9d,%r14d
+	addl	%r15d,%r12d
+	movl	%r10d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r9d,%r8d
+	andl	%r11d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r8d
+
+	addl	%r12d,%eax
+	addl	%r12d,%r8d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r8d
+
+	movl	16(%rsi),%r12d
+	movl	%eax,%r13d
+	movl	%r8d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%ebx,%r15d
+	movl	%r12d,16(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r15d
+
+	rorl	$5,%r13d
+	addl	%edx,%r12d
+	xorl	%r8d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%eax,%r15d
+	movl	%r9d,%edx
+
+	rorl	$11,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r15d
+
+	xorl	%r10d,%edx
+	xorl	%r8d,%r14d
+	addl	%r15d,%r12d
+	movl	%r9d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r8d,%edx
+	andl	%r10d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%edx
+
+	addl	%r12d,%r11d
+	addl	%r12d,%edx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%edx
+
+	movl	20(%rsi),%r12d
+	movl	%r11d,%r13d
+	movl	%edx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%eax,%r15d
+	movl	%r12d,20(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r15d
+
+	rorl	$5,%r13d
+	addl	%ecx,%r12d
+	xorl	%edx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r11d,%r15d
+	movl	%r8d,%ecx
+
+	rorl	$11,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r15d
+
+	xorl	%r9d,%ecx
+	xorl	%edx,%r14d
+	addl	%r15d,%r12d
+	movl	%r8d,%r15d
+
+	rorl	$6,%r13d
+	andl	%edx,%ecx
+	andl	%r9d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%ecx
+
+	addl	%r12d,%r10d
+	addl	%r12d,%ecx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%ecx
+
+	movl	24(%rsi),%r12d
+	movl	%r10d,%r13d
+	movl	%ecx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r11d,%r15d
+	movl	%r12d,24(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r15d
+
+	rorl	$5,%r13d
+	addl	%ebx,%r12d
+	xorl	%ecx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r10d,%r15d
+	movl	%edx,%ebx
+
+	rorl	$11,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r15d
+
+	xorl	%r8d,%ebx
+	xorl	%ecx,%r14d
+	addl	%r15d,%r12d
+	movl	%edx,%r15d
+
+	rorl	$6,%r13d
+	andl	%ecx,%ebx
+	andl	%r8d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%ebx
+
+	addl	%r12d,%r9d
+	addl	%r12d,%ebx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%ebx
+
+	movl	28(%rsi),%r12d
+	movl	%r9d,%r13d
+	movl	%ebx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r10d,%r15d
+	movl	%r12d,28(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r15d
+
+	rorl	$5,%r13d
+	addl	%eax,%r12d
+	xorl	%ebx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r9d,%r15d
+	movl	%ecx,%eax
+
+	rorl	$11,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r15d
+
+	xorl	%edx,%eax
+	xorl	%ebx,%r14d
+	addl	%r15d,%r12d
+	movl	%ecx,%r15d
+
+	rorl	$6,%r13d
+	andl	%ebx,%eax
+	andl	%edx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%eax
+
+	addl	%r12d,%r8d
+	addl	%r12d,%eax
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%eax
+
+	movl	32(%rsi),%r12d
+	movl	%r8d,%r13d
+	movl	%eax,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r9d,%r15d
+	movl	%r12d,32(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r11d,%r12d
+	xorl	%eax,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r8d,%r15d
+	movl	%ebx,%r11d
+
+	rorl	$11,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r15d
+
+	xorl	%ecx,%r11d
+	xorl	%eax,%r14d
+	addl	%r15d,%r12d
+	movl	%ebx,%r15d
+
+	rorl	$6,%r13d
+	andl	%eax,%r11d
+	andl	%ecx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r11d
+
+	addl	%r12d,%edx
+	addl	%r12d,%r11d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r11d
+
+	movl	36(%rsi),%r12d
+	movl	%edx,%r13d
+	movl	%r11d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r8d,%r15d
+	movl	%r12d,36(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r10d,%r12d
+	xorl	%r11d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%edx,%r15d
+	movl	%eax,%r10d
+
+	rorl	$11,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r15d
+
+	xorl	%ebx,%r10d
+	xorl	%r11d,%r14d
+	addl	%r15d,%r12d
+	movl	%eax,%r15d
+
+	rorl	$6,%r13d
+	andl	%r11d,%r10d
+	andl	%ebx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r10d
+
+	addl	%r12d,%ecx
+	addl	%r12d,%r10d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r10d
+
+	movl	40(%rsi),%r12d
+	movl	%ecx,%r13d
+	movl	%r10d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%edx,%r15d
+	movl	%r12d,40(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r9d,%r12d
+	xorl	%r10d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%ecx,%r15d
+	movl	%r11d,%r9d
+
+	rorl	$11,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r15d
+
+	xorl	%eax,%r9d
+	xorl	%r10d,%r14d
+	addl	%r15d,%r12d
+	movl	%r11d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r10d,%r9d
+	andl	%eax,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r9d
+
+	addl	%r12d,%ebx
+	addl	%r12d,%r9d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r9d
+
+	movl	44(%rsi),%r12d
+	movl	%ebx,%r13d
+	movl	%r9d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%ecx,%r15d
+	movl	%r12d,44(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r15d
+
+	rorl	$5,%r13d
+	addl	%r8d,%r12d
+	xorl	%r9d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%ebx,%r15d
+	movl	%r10d,%r8d
+
+	rorl	$11,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r15d
+
+	xorl	%r11d,%r8d
+	xorl	%r9d,%r14d
+	addl	%r15d,%r12d
+	movl	%r10d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r9d,%r8d
+	andl	%r11d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r8d
+
+	addl	%r12d,%eax
+	addl	%r12d,%r8d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r8d
+
+	movl	48(%rsi),%r12d
+	movl	%eax,%r13d
+	movl	%r8d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%ebx,%r15d
+	movl	%r12d,48(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r15d
+
+	rorl	$5,%r13d
+	addl	%edx,%r12d
+	xorl	%r8d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%eax,%r15d
+	movl	%r9d,%edx
+
+	rorl	$11,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r15d
+
+	xorl	%r10d,%edx
+	xorl	%r8d,%r14d
+	addl	%r15d,%r12d
+	movl	%r9d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r8d,%edx
+	andl	%r10d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%edx
+
+	addl	%r12d,%r11d
+	addl	%r12d,%edx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%edx
+
+	movl	52(%rsi),%r12d
+	movl	%r11d,%r13d
+	movl	%edx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%eax,%r15d
+	movl	%r12d,52(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r15d
+
+	rorl	$5,%r13d
+	addl	%ecx,%r12d
+	xorl	%edx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r11d,%r15d
+	movl	%r8d,%ecx
+
+	rorl	$11,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r15d
+
+	xorl	%r9d,%ecx
+	xorl	%edx,%r14d
+	addl	%r15d,%r12d
+	movl	%r8d,%r15d
+
+	rorl	$6,%r13d
+	andl	%edx,%ecx
+	andl	%r9d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%ecx
+
+	addl	%r12d,%r10d
+	addl	%r12d,%ecx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%ecx
+
+	movl	56(%rsi),%r12d
+	movl	%r10d,%r13d
+	movl	%ecx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r11d,%r15d
+	movl	%r12d,56(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r15d
+
+	rorl	$5,%r13d
+	addl	%ebx,%r12d
+	xorl	%ecx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r10d,%r15d
+	movl	%edx,%ebx
+
+	rorl	$11,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r15d
+
+	xorl	%r8d,%ebx
+	xorl	%ecx,%r14d
+	addl	%r15d,%r12d
+	movl	%edx,%r15d
+
+	rorl	$6,%r13d
+	andl	%ecx,%ebx
+	andl	%r8d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%ebx
+
+	addl	%r12d,%r9d
+	addl	%r12d,%ebx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%ebx
+
+	movl	60(%rsi),%r12d
+	movl	%r9d,%r13d
+	movl	%ebx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r10d,%r15d
+	movl	%r12d,60(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r15d
+
+	rorl	$5,%r13d
+	addl	%eax,%r12d
+	xorl	%ebx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r9d,%r15d
+	movl	%ecx,%eax
+
+	rorl	$11,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r15d
+
+	xorl	%edx,%eax
+	xorl	%ebx,%r14d
+	addl	%r15d,%r12d
+	movl	%ecx,%r15d
+
+	rorl	$6,%r13d
+	andl	%ebx,%eax
+	andl	%edx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%eax
+
+	addl	%r12d,%r8d
+	addl	%r12d,%eax
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%eax
+
+	jmp	.Lrounds_16_xx
+.align	16
+.Lrounds_16_xx:
+	movl	4(%rsp),%r13d
+	movl	56(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	36(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	0(%rsp),%r12d
+	movl	%r8d,%r13d
+	addl	%r14d,%r12d
+	movl	%eax,%r14d
+	rorl	$14,%r13d
+	movl	%r9d,%r15d
+	movl	%r12d,0(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r11d,%r12d
+	xorl	%eax,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r8d,%r15d
+	movl	%ebx,%r11d
+
+	rorl	$11,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r15d
+
+	xorl	%ecx,%r11d
+	xorl	%eax,%r14d
+	addl	%r15d,%r12d
+	movl	%ebx,%r15d
+
+	rorl	$6,%r13d
+	andl	%eax,%r11d
+	andl	%ecx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r11d
+
+	addl	%r12d,%edx
+	addl	%r12d,%r11d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r11d
+
+	movl	8(%rsp),%r13d
+	movl	60(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	40(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	4(%rsp),%r12d
+	movl	%edx,%r13d
+	addl	%r14d,%r12d
+	movl	%r11d,%r14d
+	rorl	$14,%r13d
+	movl	%r8d,%r15d
+	movl	%r12d,4(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r10d,%r12d
+	xorl	%r11d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%edx,%r15d
+	movl	%eax,%r10d
+
+	rorl	$11,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r15d
+
+	xorl	%ebx,%r10d
+	xorl	%r11d,%r14d
+	addl	%r15d,%r12d
+	movl	%eax,%r15d
+
+	rorl	$6,%r13d
+	andl	%r11d,%r10d
+	andl	%ebx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r10d
+
+	addl	%r12d,%ecx
+	addl	%r12d,%r10d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r10d
+
+	movl	12(%rsp),%r13d
+	movl	0(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	44(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	8(%rsp),%r12d
+	movl	%ecx,%r13d
+	addl	%r14d,%r12d
+	movl	%r10d,%r14d
+	rorl	$14,%r13d
+	movl	%edx,%r15d
+	movl	%r12d,8(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r9d,%r12d
+	xorl	%r10d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%ecx,%r15d
+	movl	%r11d,%r9d
+
+	rorl	$11,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r15d
+
+	xorl	%eax,%r9d
+	xorl	%r10d,%r14d
+	addl	%r15d,%r12d
+	movl	%r11d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r10d,%r9d
+	andl	%eax,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r9d
+
+	addl	%r12d,%ebx
+	addl	%r12d,%r9d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r9d
+
+	movl	16(%rsp),%r13d
+	movl	4(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	48(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	12(%rsp),%r12d
+	movl	%ebx,%r13d
+	addl	%r14d,%r12d
+	movl	%r9d,%r14d
+	rorl	$14,%r13d
+	movl	%ecx,%r15d
+	movl	%r12d,12(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r15d
+
+	rorl	$5,%r13d
+	addl	%r8d,%r12d
+	xorl	%r9d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%ebx,%r15d
+	movl	%r10d,%r8d
+
+	rorl	$11,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r15d
+
+	xorl	%r11d,%r8d
+	xorl	%r9d,%r14d
+	addl	%r15d,%r12d
+	movl	%r10d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r9d,%r8d
+	andl	%r11d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r8d
+
+	addl	%r12d,%eax
+	addl	%r12d,%r8d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r8d
+
+	movl	20(%rsp),%r13d
+	movl	8(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	52(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	16(%rsp),%r12d
+	movl	%eax,%r13d
+	addl	%r14d,%r12d
+	movl	%r8d,%r14d
+	rorl	$14,%r13d
+	movl	%ebx,%r15d
+	movl	%r12d,16(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r15d
+
+	rorl	$5,%r13d
+	addl	%edx,%r12d
+	xorl	%r8d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%eax,%r15d
+	movl	%r9d,%edx
+
+	rorl	$11,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r15d
+
+	xorl	%r10d,%edx
+	xorl	%r8d,%r14d
+	addl	%r15d,%r12d
+	movl	%r9d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r8d,%edx
+	andl	%r10d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%edx
+
+	addl	%r12d,%r11d
+	addl	%r12d,%edx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%edx
+
+	movl	24(%rsp),%r13d
+	movl	12(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	56(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	20(%rsp),%r12d
+	movl	%r11d,%r13d
+	addl	%r14d,%r12d
+	movl	%edx,%r14d
+	rorl	$14,%r13d
+	movl	%eax,%r15d
+	movl	%r12d,20(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r15d
+
+	rorl	$5,%r13d
+	addl	%ecx,%r12d
+	xorl	%edx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r11d,%r15d
+	movl	%r8d,%ecx
+
+	rorl	$11,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r15d
+
+	xorl	%r9d,%ecx
+	xorl	%edx,%r14d
+	addl	%r15d,%r12d
+	movl	%r8d,%r15d
+
+	rorl	$6,%r13d
+	andl	%edx,%ecx
+	andl	%r9d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%ecx
+
+	addl	%r12d,%r10d
+	addl	%r12d,%ecx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%ecx
+
+	movl	28(%rsp),%r13d
+	movl	16(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	60(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	24(%rsp),%r12d
+	movl	%r10d,%r13d
+	addl	%r14d,%r12d
+	movl	%ecx,%r14d
+	rorl	$14,%r13d
+	movl	%r11d,%r15d
+	movl	%r12d,24(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r15d
+
+	rorl	$5,%r13d
+	addl	%ebx,%r12d
+	xorl	%ecx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r10d,%r15d
+	movl	%edx,%ebx
+
+	rorl	$11,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r15d
+
+	xorl	%r8d,%ebx
+	xorl	%ecx,%r14d
+	addl	%r15d,%r12d
+	movl	%edx,%r15d
+
+	rorl	$6,%r13d
+	andl	%ecx,%ebx
+	andl	%r8d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%ebx
+
+	addl	%r12d,%r9d
+	addl	%r12d,%ebx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%ebx
+
+	movl	32(%rsp),%r13d
+	movl	20(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	0(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	28(%rsp),%r12d
+	movl	%r9d,%r13d
+	addl	%r14d,%r12d
+	movl	%ebx,%r14d
+	rorl	$14,%r13d
+	movl	%r10d,%r15d
+	movl	%r12d,28(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r15d
+
+	rorl	$5,%r13d
+	addl	%eax,%r12d
+	xorl	%ebx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r9d,%r15d
+	movl	%ecx,%eax
+
+	rorl	$11,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r15d
+
+	xorl	%edx,%eax
+	xorl	%ebx,%r14d
+	addl	%r15d,%r12d
+	movl	%ecx,%r15d
+
+	rorl	$6,%r13d
+	andl	%ebx,%eax
+	andl	%edx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%eax
+
+	addl	%r12d,%r8d
+	addl	%r12d,%eax
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%eax
+
+	movl	36(%rsp),%r13d
+	movl	24(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	4(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	32(%rsp),%r12d
+	movl	%r8d,%r13d
+	addl	%r14d,%r12d
+	movl	%eax,%r14d
+	rorl	$14,%r13d
+	movl	%r9d,%r15d
+	movl	%r12d,32(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r11d,%r12d
+	xorl	%eax,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r8d,%r15d
+	movl	%ebx,%r11d
+
+	rorl	$11,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r15d
+
+	xorl	%ecx,%r11d
+	xorl	%eax,%r14d
+	addl	%r15d,%r12d
+	movl	%ebx,%r15d
+
+	rorl	$6,%r13d
+	andl	%eax,%r11d
+	andl	%ecx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r11d
+
+	addl	%r12d,%edx
+	addl	%r12d,%r11d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r11d
+
+	movl	40(%rsp),%r13d
+	movl	28(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	8(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	36(%rsp),%r12d
+	movl	%edx,%r13d
+	addl	%r14d,%r12d
+	movl	%r11d,%r14d
+	rorl	$14,%r13d
+	movl	%r8d,%r15d
+	movl	%r12d,36(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r10d,%r12d
+	xorl	%r11d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%edx,%r15d
+	movl	%eax,%r10d
+
+	rorl	$11,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r15d
+
+	xorl	%ebx,%r10d
+	xorl	%r11d,%r14d
+	addl	%r15d,%r12d
+	movl	%eax,%r15d
+
+	rorl	$6,%r13d
+	andl	%r11d,%r10d
+	andl	%ebx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r10d
+
+	addl	%r12d,%ecx
+	addl	%r12d,%r10d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r10d
+
+	movl	44(%rsp),%r13d
+	movl	32(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	12(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	40(%rsp),%r12d
+	movl	%ecx,%r13d
+	addl	%r14d,%r12d
+	movl	%r10d,%r14d
+	rorl	$14,%r13d
+	movl	%edx,%r15d
+	movl	%r12d,40(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r9d,%r12d
+	xorl	%r10d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%ecx,%r15d
+	movl	%r11d,%r9d
+
+	rorl	$11,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r15d
+
+	xorl	%eax,%r9d
+	xorl	%r10d,%r14d
+	addl	%r15d,%r12d
+	movl	%r11d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r10d,%r9d
+	andl	%eax,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r9d
+
+	addl	%r12d,%ebx
+	addl	%r12d,%r9d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r9d
+
+	movl	48(%rsp),%r13d
+	movl	36(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	16(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	44(%rsp),%r12d
+	movl	%ebx,%r13d
+	addl	%r14d,%r12d
+	movl	%r9d,%r14d
+	rorl	$14,%r13d
+	movl	%ecx,%r15d
+	movl	%r12d,44(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r15d
+
+	rorl	$5,%r13d
+	addl	%r8d,%r12d
+	xorl	%r9d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%ebx,%r15d
+	movl	%r10d,%r8d
+
+	rorl	$11,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r15d
+
+	xorl	%r11d,%r8d
+	xorl	%r9d,%r14d
+	addl	%r15d,%r12d
+	movl	%r10d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r9d,%r8d
+	andl	%r11d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r8d
+
+	addl	%r12d,%eax
+	addl	%r12d,%r8d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r8d
+
+	movl	52(%rsp),%r13d
+	movl	40(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	20(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	48(%rsp),%r12d
+	movl	%eax,%r13d
+	addl	%r14d,%r12d
+	movl	%r8d,%r14d
+	rorl	$14,%r13d
+	movl	%ebx,%r15d
+	movl	%r12d,48(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r15d
+
+	rorl	$5,%r13d
+	addl	%edx,%r12d
+	xorl	%r8d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%eax,%r15d
+	movl	%r9d,%edx
+
+	rorl	$11,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r15d
+
+	xorl	%r10d,%edx
+	xorl	%r8d,%r14d
+	addl	%r15d,%r12d
+	movl	%r9d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r8d,%edx
+	andl	%r10d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%edx
+
+	addl	%r12d,%r11d
+	addl	%r12d,%edx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%edx
+
+	movl	56(%rsp),%r13d
+	movl	44(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	24(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	52(%rsp),%r12d
+	movl	%r11d,%r13d
+	addl	%r14d,%r12d
+	movl	%edx,%r14d
+	rorl	$14,%r13d
+	movl	%eax,%r15d
+	movl	%r12d,52(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r15d
+
+	rorl	$5,%r13d
+	addl	%ecx,%r12d
+	xorl	%edx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r11d,%r15d
+	movl	%r8d,%ecx
+
+	rorl	$11,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r15d
+
+	xorl	%r9d,%ecx
+	xorl	%edx,%r14d
+	addl	%r15d,%r12d
+	movl	%r8d,%r15d
+
+	rorl	$6,%r13d
+	andl	%edx,%ecx
+	andl	%r9d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%ecx
+
+	addl	%r12d,%r10d
+	addl	%r12d,%ecx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%ecx
+
+	movl	60(%rsp),%r13d
+	movl	48(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	28(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	56(%rsp),%r12d
+	movl	%r10d,%r13d
+	addl	%r14d,%r12d
+	movl	%ecx,%r14d
+	rorl	$14,%r13d
+	movl	%r11d,%r15d
+	movl	%r12d,56(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r15d
+
+	rorl	$5,%r13d
+	addl	%ebx,%r12d
+	xorl	%ecx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r10d,%r15d
+	movl	%edx,%ebx
+
+	rorl	$11,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r15d
+
+	xorl	%r8d,%ebx
+	xorl	%ecx,%r14d
+	addl	%r15d,%r12d
+	movl	%edx,%r15d
+
+	rorl	$6,%r13d
+	andl	%ecx,%ebx
+	andl	%r8d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%ebx
+
+	addl	%r12d,%r9d
+	addl	%r12d,%ebx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%ebx
+
+	movl	0(%rsp),%r13d
+	movl	52(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	32(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	60(%rsp),%r12d
+	movl	%r9d,%r13d
+	addl	%r14d,%r12d
+	movl	%ebx,%r14d
+	rorl	$14,%r13d
+	movl	%r10d,%r15d
+	movl	%r12d,60(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r15d
+
+	rorl	$5,%r13d
+	addl	%eax,%r12d
+	xorl	%ebx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r9d,%r15d
+	movl	%ecx,%eax
+
+	rorl	$11,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r15d
+
+	xorl	%edx,%eax
+	xorl	%ebx,%r14d
+	addl	%r15d,%r12d
+	movl	%ecx,%r15d
+
+	rorl	$6,%r13d
+	andl	%ebx,%eax
+	andl	%edx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%eax
+
+	addl	%r12d,%r8d
+	addl	%r12d,%eax
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%eax
+
+	cmpq	$64,%rdi
+	jb	.Lrounds_16_xx
+
+	movq	64+0(%rsp),%rdi
+	leaq	64(%rsi),%rsi
+
+	addl	0(%rdi),%eax
+	addl	4(%rdi),%ebx
+	addl	8(%rdi),%ecx
+	addl	12(%rdi),%edx
+	addl	16(%rdi),%r8d
+	addl	20(%rdi),%r9d
+	addl	24(%rdi),%r10d
+	addl	28(%rdi),%r11d
+
+	cmpq	64+16(%rsp),%rsi
+
+	movl	%eax,0(%rdi)
+	movl	%ebx,4(%rdi)
+	movl	%ecx,8(%rdi)
+	movl	%edx,12(%rdi)
+	movl	%r8d,16(%rdi)
+	movl	%r9d,20(%rdi)
+	movl	%r10d,24(%rdi)
+	movl	%r11d,28(%rdi)
+	jb	.Lloop
+
+	movq	64+24(%rsp),%rsi
+	movq	(%rsi),%r15
+	movq	8(%rsi),%r14
+	movq	16(%rsi),%r13
+	movq	24(%rsi),%r12
+	movq	32(%rsi),%rbp
+	movq	40(%rsi),%rbx
+	leaq	48(%rsi),%rsp
+.Lepilogue:
+	retq
+.size	sha256_block_data_order,.-sha256_block_data_order
+.align	64
+.type	K256,@object
+K256:
+.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/ext/libressl/crypto/sha/sha256-macosx-x86_64.S b/ext/libressl/crypto/sha/sha256-macosx-x86_64.S
new file mode 100644
index 0000000..4b468b7
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha256-macosx-x86_64.S
@@ -0,0 +1,1779 @@
+#include "x86_arch.h"
+.text	
+
+.globl	_sha256_block_data_order
+
+.p2align	4
+_sha256_block_data_order:
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	movq	%rsp,%r11
+	shlq	$4,%rdx
+	subq	$64+32,%rsp
+	leaq	(%rsi,%rdx,4),%rdx
+	andq	$-64,%rsp
+	movq	%rdi,64+0(%rsp)
+	movq	%rsi,64+8(%rsp)
+	movq	%rdx,64+16(%rsp)
+	movq	%r11,64+24(%rsp)
+L$prologue:
+
+	leaq	K256(%rip),%rbp
+
+	movl	0(%rdi),%eax
+	movl	4(%rdi),%ebx
+	movl	8(%rdi),%ecx
+	movl	12(%rdi),%edx
+	movl	16(%rdi),%r8d
+	movl	20(%rdi),%r9d
+	movl	24(%rdi),%r10d
+	movl	28(%rdi),%r11d
+	jmp	L$loop
+
+.p2align	4
+L$loop:
+	xorq	%rdi,%rdi
+	movl	0(%rsi),%r12d
+	movl	%r8d,%r13d
+	movl	%eax,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r9d,%r15d
+	movl	%r12d,0(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r11d,%r12d
+	xorl	%eax,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r8d,%r15d
+	movl	%ebx,%r11d
+
+	rorl	$11,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r15d
+
+	xorl	%ecx,%r11d
+	xorl	%eax,%r14d
+	addl	%r15d,%r12d
+	movl	%ebx,%r15d
+
+	rorl	$6,%r13d
+	andl	%eax,%r11d
+	andl	%ecx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r11d
+
+	addl	%r12d,%edx
+	addl	%r12d,%r11d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r11d
+
+	movl	4(%rsi),%r12d
+	movl	%edx,%r13d
+	movl	%r11d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r8d,%r15d
+	movl	%r12d,4(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r10d,%r12d
+	xorl	%r11d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%edx,%r15d
+	movl	%eax,%r10d
+
+	rorl	$11,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r15d
+
+	xorl	%ebx,%r10d
+	xorl	%r11d,%r14d
+	addl	%r15d,%r12d
+	movl	%eax,%r15d
+
+	rorl	$6,%r13d
+	andl	%r11d,%r10d
+	andl	%ebx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r10d
+
+	addl	%r12d,%ecx
+	addl	%r12d,%r10d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r10d
+
+	movl	8(%rsi),%r12d
+	movl	%ecx,%r13d
+	movl	%r10d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%edx,%r15d
+	movl	%r12d,8(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r9d,%r12d
+	xorl	%r10d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%ecx,%r15d
+	movl	%r11d,%r9d
+
+	rorl	$11,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r15d
+
+	xorl	%eax,%r9d
+	xorl	%r10d,%r14d
+	addl	%r15d,%r12d
+	movl	%r11d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r10d,%r9d
+	andl	%eax,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r9d
+
+	addl	%r12d,%ebx
+	addl	%r12d,%r9d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r9d
+
+	movl	12(%rsi),%r12d
+	movl	%ebx,%r13d
+	movl	%r9d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%ecx,%r15d
+	movl	%r12d,12(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r15d
+
+	rorl	$5,%r13d
+	addl	%r8d,%r12d
+	xorl	%r9d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%ebx,%r15d
+	movl	%r10d,%r8d
+
+	rorl	$11,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r15d
+
+	xorl	%r11d,%r8d
+	xorl	%r9d,%r14d
+	addl	%r15d,%r12d
+	movl	%r10d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r9d,%r8d
+	andl	%r11d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r8d
+
+	addl	%r12d,%eax
+	addl	%r12d,%r8d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r8d
+
+	movl	16(%rsi),%r12d
+	movl	%eax,%r13d
+	movl	%r8d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%ebx,%r15d
+	movl	%r12d,16(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r15d
+
+	rorl	$5,%r13d
+	addl	%edx,%r12d
+	xorl	%r8d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%eax,%r15d
+	movl	%r9d,%edx
+
+	rorl	$11,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r15d
+
+	xorl	%r10d,%edx
+	xorl	%r8d,%r14d
+	addl	%r15d,%r12d
+	movl	%r9d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r8d,%edx
+	andl	%r10d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%edx
+
+	addl	%r12d,%r11d
+	addl	%r12d,%edx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%edx
+
+	movl	20(%rsi),%r12d
+	movl	%r11d,%r13d
+	movl	%edx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%eax,%r15d
+	movl	%r12d,20(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r15d
+
+	rorl	$5,%r13d
+	addl	%ecx,%r12d
+	xorl	%edx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r11d,%r15d
+	movl	%r8d,%ecx
+
+	rorl	$11,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r15d
+
+	xorl	%r9d,%ecx
+	xorl	%edx,%r14d
+	addl	%r15d,%r12d
+	movl	%r8d,%r15d
+
+	rorl	$6,%r13d
+	andl	%edx,%ecx
+	andl	%r9d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%ecx
+
+	addl	%r12d,%r10d
+	addl	%r12d,%ecx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%ecx
+
+	movl	24(%rsi),%r12d
+	movl	%r10d,%r13d
+	movl	%ecx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r11d,%r15d
+	movl	%r12d,24(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r15d
+
+	rorl	$5,%r13d
+	addl	%ebx,%r12d
+	xorl	%ecx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r10d,%r15d
+	movl	%edx,%ebx
+
+	rorl	$11,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r15d
+
+	xorl	%r8d,%ebx
+	xorl	%ecx,%r14d
+	addl	%r15d,%r12d
+	movl	%edx,%r15d
+
+	rorl	$6,%r13d
+	andl	%ecx,%ebx
+	andl	%r8d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%ebx
+
+	addl	%r12d,%r9d
+	addl	%r12d,%ebx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%ebx
+
+	movl	28(%rsi),%r12d
+	movl	%r9d,%r13d
+	movl	%ebx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r10d,%r15d
+	movl	%r12d,28(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r15d
+
+	rorl	$5,%r13d
+	addl	%eax,%r12d
+	xorl	%ebx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r9d,%r15d
+	movl	%ecx,%eax
+
+	rorl	$11,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r15d
+
+	xorl	%edx,%eax
+	xorl	%ebx,%r14d
+	addl	%r15d,%r12d
+	movl	%ecx,%r15d
+
+	rorl	$6,%r13d
+	andl	%ebx,%eax
+	andl	%edx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%eax
+
+	addl	%r12d,%r8d
+	addl	%r12d,%eax
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%eax
+
+	movl	32(%rsi),%r12d
+	movl	%r8d,%r13d
+	movl	%eax,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r9d,%r15d
+	movl	%r12d,32(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r11d,%r12d
+	xorl	%eax,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r8d,%r15d
+	movl	%ebx,%r11d
+
+	rorl	$11,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r15d
+
+	xorl	%ecx,%r11d
+	xorl	%eax,%r14d
+	addl	%r15d,%r12d
+	movl	%ebx,%r15d
+
+	rorl	$6,%r13d
+	andl	%eax,%r11d
+	andl	%ecx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r11d
+
+	addl	%r12d,%edx
+	addl	%r12d,%r11d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r11d
+
+	movl	36(%rsi),%r12d
+	movl	%edx,%r13d
+	movl	%r11d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r8d,%r15d
+	movl	%r12d,36(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r10d,%r12d
+	xorl	%r11d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%edx,%r15d
+	movl	%eax,%r10d
+
+	rorl	$11,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r15d
+
+	xorl	%ebx,%r10d
+	xorl	%r11d,%r14d
+	addl	%r15d,%r12d
+	movl	%eax,%r15d
+
+	rorl	$6,%r13d
+	andl	%r11d,%r10d
+	andl	%ebx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r10d
+
+	addl	%r12d,%ecx
+	addl	%r12d,%r10d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r10d
+
+	movl	40(%rsi),%r12d
+	movl	%ecx,%r13d
+	movl	%r10d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%edx,%r15d
+	movl	%r12d,40(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r9d,%r12d
+	xorl	%r10d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%ecx,%r15d
+	movl	%r11d,%r9d
+
+	rorl	$11,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r15d
+
+	xorl	%eax,%r9d
+	xorl	%r10d,%r14d
+	addl	%r15d,%r12d
+	movl	%r11d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r10d,%r9d
+	andl	%eax,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r9d
+
+	addl	%r12d,%ebx
+	addl	%r12d,%r9d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r9d
+
+	movl	44(%rsi),%r12d
+	movl	%ebx,%r13d
+	movl	%r9d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%ecx,%r15d
+	movl	%r12d,44(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r15d
+
+	rorl	$5,%r13d
+	addl	%r8d,%r12d
+	xorl	%r9d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%ebx,%r15d
+	movl	%r10d,%r8d
+
+	rorl	$11,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r15d
+
+	xorl	%r11d,%r8d
+	xorl	%r9d,%r14d
+	addl	%r15d,%r12d
+	movl	%r10d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r9d,%r8d
+	andl	%r11d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r8d
+
+	addl	%r12d,%eax
+	addl	%r12d,%r8d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r8d
+
+	movl	48(%rsi),%r12d
+	movl	%eax,%r13d
+	movl	%r8d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%ebx,%r15d
+	movl	%r12d,48(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r15d
+
+	rorl	$5,%r13d
+	addl	%edx,%r12d
+	xorl	%r8d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%eax,%r15d
+	movl	%r9d,%edx
+
+	rorl	$11,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r15d
+
+	xorl	%r10d,%edx
+	xorl	%r8d,%r14d
+	addl	%r15d,%r12d
+	movl	%r9d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r8d,%edx
+	andl	%r10d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%edx
+
+	addl	%r12d,%r11d
+	addl	%r12d,%edx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%edx
+
+	movl	52(%rsi),%r12d
+	movl	%r11d,%r13d
+	movl	%edx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%eax,%r15d
+	movl	%r12d,52(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r15d
+
+	rorl	$5,%r13d
+	addl	%ecx,%r12d
+	xorl	%edx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r11d,%r15d
+	movl	%r8d,%ecx
+
+	rorl	$11,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r15d
+
+	xorl	%r9d,%ecx
+	xorl	%edx,%r14d
+	addl	%r15d,%r12d
+	movl	%r8d,%r15d
+
+	rorl	$6,%r13d
+	andl	%edx,%ecx
+	andl	%r9d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%ecx
+
+	addl	%r12d,%r10d
+	addl	%r12d,%ecx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%ecx
+
+	movl	56(%rsi),%r12d
+	movl	%r10d,%r13d
+	movl	%ecx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r11d,%r15d
+	movl	%r12d,56(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r15d
+
+	rorl	$5,%r13d
+	addl	%ebx,%r12d
+	xorl	%ecx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r10d,%r15d
+	movl	%edx,%ebx
+
+	rorl	$11,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r15d
+
+	xorl	%r8d,%ebx
+	xorl	%ecx,%r14d
+	addl	%r15d,%r12d
+	movl	%edx,%r15d
+
+	rorl	$6,%r13d
+	andl	%ecx,%ebx
+	andl	%r8d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%ebx
+
+	addl	%r12d,%r9d
+	addl	%r12d,%ebx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%ebx
+
+	movl	60(%rsi),%r12d
+	movl	%r9d,%r13d
+	movl	%ebx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r10d,%r15d
+	movl	%r12d,60(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r15d
+
+	rorl	$5,%r13d
+	addl	%eax,%r12d
+	xorl	%ebx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r9d,%r15d
+	movl	%ecx,%eax
+
+	rorl	$11,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r15d
+
+	xorl	%edx,%eax
+	xorl	%ebx,%r14d
+	addl	%r15d,%r12d
+	movl	%ecx,%r15d
+
+	rorl	$6,%r13d
+	andl	%ebx,%eax
+	andl	%edx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%eax
+
+	addl	%r12d,%r8d
+	addl	%r12d,%eax
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%eax
+
+	jmp	L$rounds_16_xx
+.p2align	4
+L$rounds_16_xx:
+	movl	4(%rsp),%r13d
+	movl	56(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	36(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	0(%rsp),%r12d
+	movl	%r8d,%r13d
+	addl	%r14d,%r12d
+	movl	%eax,%r14d
+	rorl	$14,%r13d
+	movl	%r9d,%r15d
+	movl	%r12d,0(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r11d,%r12d
+	xorl	%eax,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r8d,%r15d
+	movl	%ebx,%r11d
+
+	rorl	$11,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r15d
+
+	xorl	%ecx,%r11d
+	xorl	%eax,%r14d
+	addl	%r15d,%r12d
+	movl	%ebx,%r15d
+
+	rorl	$6,%r13d
+	andl	%eax,%r11d
+	andl	%ecx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r11d
+
+	addl	%r12d,%edx
+	addl	%r12d,%r11d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r11d
+
+	movl	8(%rsp),%r13d
+	movl	60(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	40(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	4(%rsp),%r12d
+	movl	%edx,%r13d
+	addl	%r14d,%r12d
+	movl	%r11d,%r14d
+	rorl	$14,%r13d
+	movl	%r8d,%r15d
+	movl	%r12d,4(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r10d,%r12d
+	xorl	%r11d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%edx,%r15d
+	movl	%eax,%r10d
+
+	rorl	$11,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r15d
+
+	xorl	%ebx,%r10d
+	xorl	%r11d,%r14d
+	addl	%r15d,%r12d
+	movl	%eax,%r15d
+
+	rorl	$6,%r13d
+	andl	%r11d,%r10d
+	andl	%ebx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r10d
+
+	addl	%r12d,%ecx
+	addl	%r12d,%r10d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r10d
+
+	movl	12(%rsp),%r13d
+	movl	0(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	44(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	8(%rsp),%r12d
+	movl	%ecx,%r13d
+	addl	%r14d,%r12d
+	movl	%r10d,%r14d
+	rorl	$14,%r13d
+	movl	%edx,%r15d
+	movl	%r12d,8(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r9d,%r12d
+	xorl	%r10d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%ecx,%r15d
+	movl	%r11d,%r9d
+
+	rorl	$11,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r15d
+
+	xorl	%eax,%r9d
+	xorl	%r10d,%r14d
+	addl	%r15d,%r12d
+	movl	%r11d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r10d,%r9d
+	andl	%eax,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r9d
+
+	addl	%r12d,%ebx
+	addl	%r12d,%r9d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r9d
+
+	movl	16(%rsp),%r13d
+	movl	4(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	48(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	12(%rsp),%r12d
+	movl	%ebx,%r13d
+	addl	%r14d,%r12d
+	movl	%r9d,%r14d
+	rorl	$14,%r13d
+	movl	%ecx,%r15d
+	movl	%r12d,12(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r15d
+
+	rorl	$5,%r13d
+	addl	%r8d,%r12d
+	xorl	%r9d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%ebx,%r15d
+	movl	%r10d,%r8d
+
+	rorl	$11,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r15d
+
+	xorl	%r11d,%r8d
+	xorl	%r9d,%r14d
+	addl	%r15d,%r12d
+	movl	%r10d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r9d,%r8d
+	andl	%r11d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r8d
+
+	addl	%r12d,%eax
+	addl	%r12d,%r8d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r8d
+
+	movl	20(%rsp),%r13d
+	movl	8(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	52(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	16(%rsp),%r12d
+	movl	%eax,%r13d
+	addl	%r14d,%r12d
+	movl	%r8d,%r14d
+	rorl	$14,%r13d
+	movl	%ebx,%r15d
+	movl	%r12d,16(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r15d
+
+	rorl	$5,%r13d
+	addl	%edx,%r12d
+	xorl	%r8d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%eax,%r15d
+	movl	%r9d,%edx
+
+	rorl	$11,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r15d
+
+	xorl	%r10d,%edx
+	xorl	%r8d,%r14d
+	addl	%r15d,%r12d
+	movl	%r9d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r8d,%edx
+	andl	%r10d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%edx
+
+	addl	%r12d,%r11d
+	addl	%r12d,%edx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%edx
+
+	movl	24(%rsp),%r13d
+	movl	12(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	56(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	20(%rsp),%r12d
+	movl	%r11d,%r13d
+	addl	%r14d,%r12d
+	movl	%edx,%r14d
+	rorl	$14,%r13d
+	movl	%eax,%r15d
+	movl	%r12d,20(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r15d
+
+	rorl	$5,%r13d
+	addl	%ecx,%r12d
+	xorl	%edx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r11d,%r15d
+	movl	%r8d,%ecx
+
+	rorl	$11,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r15d
+
+	xorl	%r9d,%ecx
+	xorl	%edx,%r14d
+	addl	%r15d,%r12d
+	movl	%r8d,%r15d
+
+	rorl	$6,%r13d
+	andl	%edx,%ecx
+	andl	%r9d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%ecx
+
+	addl	%r12d,%r10d
+	addl	%r12d,%ecx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%ecx
+
+	movl	28(%rsp),%r13d
+	movl	16(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	60(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	24(%rsp),%r12d
+	movl	%r10d,%r13d
+	addl	%r14d,%r12d
+	movl	%ecx,%r14d
+	rorl	$14,%r13d
+	movl	%r11d,%r15d
+	movl	%r12d,24(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r15d
+
+	rorl	$5,%r13d
+	addl	%ebx,%r12d
+	xorl	%ecx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r10d,%r15d
+	movl	%edx,%ebx
+
+	rorl	$11,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r15d
+
+	xorl	%r8d,%ebx
+	xorl	%ecx,%r14d
+	addl	%r15d,%r12d
+	movl	%edx,%r15d
+
+	rorl	$6,%r13d
+	andl	%ecx,%ebx
+	andl	%r8d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%ebx
+
+	addl	%r12d,%r9d
+	addl	%r12d,%ebx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%ebx
+
+	movl	32(%rsp),%r13d
+	movl	20(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	0(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	28(%rsp),%r12d
+	movl	%r9d,%r13d
+	addl	%r14d,%r12d
+	movl	%ebx,%r14d
+	rorl	$14,%r13d
+	movl	%r10d,%r15d
+	movl	%r12d,28(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r15d
+
+	rorl	$5,%r13d
+	addl	%eax,%r12d
+	xorl	%ebx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r9d,%r15d
+	movl	%ecx,%eax
+
+	rorl	$11,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r15d
+
+	xorl	%edx,%eax
+	xorl	%ebx,%r14d
+	addl	%r15d,%r12d
+	movl	%ecx,%r15d
+
+	rorl	$6,%r13d
+	andl	%ebx,%eax
+	andl	%edx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%eax
+
+	addl	%r12d,%r8d
+	addl	%r12d,%eax
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%eax
+
+	movl	36(%rsp),%r13d
+	movl	24(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	4(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	32(%rsp),%r12d
+	movl	%r8d,%r13d
+	addl	%r14d,%r12d
+	movl	%eax,%r14d
+	rorl	$14,%r13d
+	movl	%r9d,%r15d
+	movl	%r12d,32(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r11d,%r12d
+	xorl	%eax,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r8d,%r15d
+	movl	%ebx,%r11d
+
+	rorl	$11,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r15d
+
+	xorl	%ecx,%r11d
+	xorl	%eax,%r14d
+	addl	%r15d,%r12d
+	movl	%ebx,%r15d
+
+	rorl	$6,%r13d
+	andl	%eax,%r11d
+	andl	%ecx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r11d
+
+	addl	%r12d,%edx
+	addl	%r12d,%r11d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r11d
+
+	movl	40(%rsp),%r13d
+	movl	28(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	8(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	36(%rsp),%r12d
+	movl	%edx,%r13d
+	addl	%r14d,%r12d
+	movl	%r11d,%r14d
+	rorl	$14,%r13d
+	movl	%r8d,%r15d
+	movl	%r12d,36(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r10d,%r12d
+	xorl	%r11d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%edx,%r15d
+	movl	%eax,%r10d
+
+	rorl	$11,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r15d
+
+	xorl	%ebx,%r10d
+	xorl	%r11d,%r14d
+	addl	%r15d,%r12d
+	movl	%eax,%r15d
+
+	rorl	$6,%r13d
+	andl	%r11d,%r10d
+	andl	%ebx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r10d
+
+	addl	%r12d,%ecx
+	addl	%r12d,%r10d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r10d
+
+	movl	44(%rsp),%r13d
+	movl	32(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	12(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	40(%rsp),%r12d
+	movl	%ecx,%r13d
+	addl	%r14d,%r12d
+	movl	%r10d,%r14d
+	rorl	$14,%r13d
+	movl	%edx,%r15d
+	movl	%r12d,40(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r9d,%r12d
+	xorl	%r10d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%ecx,%r15d
+	movl	%r11d,%r9d
+
+	rorl	$11,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r15d
+
+	xorl	%eax,%r9d
+	xorl	%r10d,%r14d
+	addl	%r15d,%r12d
+	movl	%r11d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r10d,%r9d
+	andl	%eax,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r9d
+
+	addl	%r12d,%ebx
+	addl	%r12d,%r9d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r9d
+
+	movl	48(%rsp),%r13d
+	movl	36(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	16(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	44(%rsp),%r12d
+	movl	%ebx,%r13d
+	addl	%r14d,%r12d
+	movl	%r9d,%r14d
+	rorl	$14,%r13d
+	movl	%ecx,%r15d
+	movl	%r12d,44(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r15d
+
+	rorl	$5,%r13d
+	addl	%r8d,%r12d
+	xorl	%r9d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%ebx,%r15d
+	movl	%r10d,%r8d
+
+	rorl	$11,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r15d
+
+	xorl	%r11d,%r8d
+	xorl	%r9d,%r14d
+	addl	%r15d,%r12d
+	movl	%r10d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r9d,%r8d
+	andl	%r11d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r8d
+
+	addl	%r12d,%eax
+	addl	%r12d,%r8d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r8d
+
+	movl	52(%rsp),%r13d
+	movl	40(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	20(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	48(%rsp),%r12d
+	movl	%eax,%r13d
+	addl	%r14d,%r12d
+	movl	%r8d,%r14d
+	rorl	$14,%r13d
+	movl	%ebx,%r15d
+	movl	%r12d,48(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r15d
+
+	rorl	$5,%r13d
+	addl	%edx,%r12d
+	xorl	%r8d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%eax,%r15d
+	movl	%r9d,%edx
+
+	rorl	$11,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r15d
+
+	xorl	%r10d,%edx
+	xorl	%r8d,%r14d
+	addl	%r15d,%r12d
+	movl	%r9d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r8d,%edx
+	andl	%r10d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%edx
+
+	addl	%r12d,%r11d
+	addl	%r12d,%edx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%edx
+
+	movl	56(%rsp),%r13d
+	movl	44(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	24(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	52(%rsp),%r12d
+	movl	%r11d,%r13d
+	addl	%r14d,%r12d
+	movl	%edx,%r14d
+	rorl	$14,%r13d
+	movl	%eax,%r15d
+	movl	%r12d,52(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r15d
+
+	rorl	$5,%r13d
+	addl	%ecx,%r12d
+	xorl	%edx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r11d,%r15d
+	movl	%r8d,%ecx
+
+	rorl	$11,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r15d
+
+	xorl	%r9d,%ecx
+	xorl	%edx,%r14d
+	addl	%r15d,%r12d
+	movl	%r8d,%r15d
+
+	rorl	$6,%r13d
+	andl	%edx,%ecx
+	andl	%r9d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%ecx
+
+	addl	%r12d,%r10d
+	addl	%r12d,%ecx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%ecx
+
+	movl	60(%rsp),%r13d
+	movl	48(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	28(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	56(%rsp),%r12d
+	movl	%r10d,%r13d
+	addl	%r14d,%r12d
+	movl	%ecx,%r14d
+	rorl	$14,%r13d
+	movl	%r11d,%r15d
+	movl	%r12d,56(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r15d
+
+	rorl	$5,%r13d
+	addl	%ebx,%r12d
+	xorl	%ecx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r10d,%r15d
+	movl	%edx,%ebx
+
+	rorl	$11,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r15d
+
+	xorl	%r8d,%ebx
+	xorl	%ecx,%r14d
+	addl	%r15d,%r12d
+	movl	%edx,%r15d
+
+	rorl	$6,%r13d
+	andl	%ecx,%ebx
+	andl	%r8d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%ebx
+
+	addl	%r12d,%r9d
+	addl	%r12d,%ebx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%ebx
+
+	movl	0(%rsp),%r13d
+	movl	52(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	32(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	60(%rsp),%r12d
+	movl	%r9d,%r13d
+	addl	%r14d,%r12d
+	movl	%ebx,%r14d
+	rorl	$14,%r13d
+	movl	%r10d,%r15d
+	movl	%r12d,60(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r15d
+
+	rorl	$5,%r13d
+	addl	%eax,%r12d
+	xorl	%ebx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r9d,%r15d
+	movl	%ecx,%eax
+
+	rorl	$11,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r15d
+
+	xorl	%edx,%eax
+	xorl	%ebx,%r14d
+	addl	%r15d,%r12d
+	movl	%ecx,%r15d
+
+	rorl	$6,%r13d
+	andl	%ebx,%eax
+	andl	%edx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%eax
+
+	addl	%r12d,%r8d
+	addl	%r12d,%eax
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%eax
+
+	cmpq	$64,%rdi
+	jb	L$rounds_16_xx
+
+	movq	64+0(%rsp),%rdi
+	leaq	64(%rsi),%rsi
+
+	addl	0(%rdi),%eax
+	addl	4(%rdi),%ebx
+	addl	8(%rdi),%ecx
+	addl	12(%rdi),%edx
+	addl	16(%rdi),%r8d
+	addl	20(%rdi),%r9d
+	addl	24(%rdi),%r10d
+	addl	28(%rdi),%r11d
+
+	cmpq	64+16(%rsp),%rsi
+
+	movl	%eax,0(%rdi)
+	movl	%ebx,4(%rdi)
+	movl	%ecx,8(%rdi)
+	movl	%edx,12(%rdi)
+	movl	%r8d,16(%rdi)
+	movl	%r9d,20(%rdi)
+	movl	%r10d,24(%rdi)
+	movl	%r11d,28(%rdi)
+	jb	L$loop
+
+	movq	64+24(%rsp),%rsi
+	movq	(%rsi),%r15
+	movq	8(%rsi),%r14
+	movq	16(%rsi),%r13
+	movq	24(%rsi),%r12
+	movq	32(%rsi),%rbp
+	movq	40(%rsi),%rbx
+	leaq	48(%rsi),%rsp
+L$epilogue:
+	retq
+
+.p2align	6
+
+K256:
+.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
diff --git a/ext/libressl/crypto/sha/sha256-masm-x86_64.S b/ext/libressl/crypto/sha/sha256-masm-x86_64.S
new file mode 100644
index 0000000..33c705d
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha256-masm-x86_64.S
@@ -0,0 +1,1864 @@
+; 1 "crypto/sha/sha256-masm-x86_64.S.tmp"
+; 1 "<built-in>" 1
+; 1 "<built-in>" 3
+; 340 "<built-in>" 3
+; 1 "<command line>" 1
+; 1 "<built-in>" 2
+; 1 "crypto/sha/sha256-masm-x86_64.S.tmp" 2
+OPTION	DOTNAME
+
+; 1 "./crypto/x86_arch.h" 1
+
+
+; 16 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+; 40 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+; 3 "crypto/sha/sha256-masm-x86_64.S.tmp" 2
+.text$	SEGMENT ALIGN(64) 'CODE'
+
+PUBLIC	sha256_block_data_order
+
+ALIGN	16
+sha256_block_data_order	PROC PUBLIC
+	mov	QWORD PTR[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD PTR[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_sha256_block_data_order::
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+
+
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	mov	r11,rsp
+	shl	rdx,4
+	sub	rsp,16*4+4*8
+	lea	rdx,QWORD PTR[rdx*4+rsi]
+	and	rsp,-64
+	mov	QWORD PTR[((64+0))+rsp],rdi
+	mov	QWORD PTR[((64+8))+rsp],rsi
+	mov	QWORD PTR[((64+16))+rsp],rdx
+	mov	QWORD PTR[((64+24))+rsp],r11
+$L$prologue::
+
+	lea	rbp,QWORD PTR[K256]
+
+	mov	eax,DWORD PTR[rdi]
+	mov	ebx,DWORD PTR[4+rdi]
+	mov	ecx,DWORD PTR[8+rdi]
+	mov	edx,DWORD PTR[12+rdi]
+	mov	r8d,DWORD PTR[16+rdi]
+	mov	r9d,DWORD PTR[20+rdi]
+	mov	r10d,DWORD PTR[24+rdi]
+	mov	r11d,DWORD PTR[28+rdi]
+	jmp	$L$loop
+
+ALIGN	16
+$L$loop::
+	xor	rdi,rdi
+	mov	r12d,DWORD PTR[rsi]
+	mov	r13d,r8d
+	mov	r14d,eax
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,r9d
+	mov	DWORD PTR[rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,r8d
+	xor	r15d,r10d
+
+	ror	r13d,5
+	add	r12d,r11d
+	xor	r14d,eax
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,r8d
+	mov	r11d,ebx
+
+	ror	r14d,11
+	xor	r13d,r8d
+	xor	r15d,r10d
+
+	xor	r11d,ecx
+	xor	r14d,eax
+	add	r12d,r15d
+	mov	r15d,ebx
+
+	ror	r13d,6
+	and	r11d,eax
+	and	r15d,ecx
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	r11d,r15d
+
+	add	edx,r12d
+	add	r11d,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r11d,r14d
+
+	mov	r12d,DWORD PTR[4+rsi]
+	mov	r13d,edx
+	mov	r14d,r11d
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,r8d
+	mov	DWORD PTR[4+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,edx
+	xor	r15d,r9d
+
+	ror	r13d,5
+	add	r12d,r10d
+	xor	r14d,r11d
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,edx
+	mov	r10d,eax
+
+	ror	r14d,11
+	xor	r13d,edx
+	xor	r15d,r9d
+
+	xor	r10d,ebx
+	xor	r14d,r11d
+	add	r12d,r15d
+	mov	r15d,eax
+
+	ror	r13d,6
+	and	r10d,r11d
+	and	r15d,ebx
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	r10d,r15d
+
+	add	ecx,r12d
+	add	r10d,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r10d,r14d
+
+	mov	r12d,DWORD PTR[8+rsi]
+	mov	r13d,ecx
+	mov	r14d,r10d
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,edx
+	mov	DWORD PTR[8+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,ecx
+	xor	r15d,r8d
+
+	ror	r13d,5
+	add	r12d,r9d
+	xor	r14d,r10d
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,ecx
+	mov	r9d,r11d
+
+	ror	r14d,11
+	xor	r13d,ecx
+	xor	r15d,r8d
+
+	xor	r9d,eax
+	xor	r14d,r10d
+	add	r12d,r15d
+	mov	r15d,r11d
+
+	ror	r13d,6
+	and	r9d,r10d
+	and	r15d,eax
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	r9d,r15d
+
+	add	ebx,r12d
+	add	r9d,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r9d,r14d
+
+	mov	r12d,DWORD PTR[12+rsi]
+	mov	r13d,ebx
+	mov	r14d,r9d
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,ecx
+	mov	DWORD PTR[12+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,ebx
+	xor	r15d,edx
+
+	ror	r13d,5
+	add	r12d,r8d
+	xor	r14d,r9d
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,ebx
+	mov	r8d,r10d
+
+	ror	r14d,11
+	xor	r13d,ebx
+	xor	r15d,edx
+
+	xor	r8d,r11d
+	xor	r14d,r9d
+	add	r12d,r15d
+	mov	r15d,r10d
+
+	ror	r13d,6
+	and	r8d,r9d
+	and	r15d,r11d
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	r8d,r15d
+
+	add	eax,r12d
+	add	r8d,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r8d,r14d
+
+	mov	r12d,DWORD PTR[16+rsi]
+	mov	r13d,eax
+	mov	r14d,r8d
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,ebx
+	mov	DWORD PTR[16+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,eax
+	xor	r15d,ecx
+
+	ror	r13d,5
+	add	r12d,edx
+	xor	r14d,r8d
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,eax
+	mov	edx,r9d
+
+	ror	r14d,11
+	xor	r13d,eax
+	xor	r15d,ecx
+
+	xor	edx,r10d
+	xor	r14d,r8d
+	add	r12d,r15d
+	mov	r15d,r9d
+
+	ror	r13d,6
+	and	edx,r8d
+	and	r15d,r10d
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	edx,r15d
+
+	add	r11d,r12d
+	add	edx,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	edx,r14d
+
+	mov	r12d,DWORD PTR[20+rsi]
+	mov	r13d,r11d
+	mov	r14d,edx
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,eax
+	mov	DWORD PTR[20+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,r11d
+	xor	r15d,ebx
+
+	ror	r13d,5
+	add	r12d,ecx
+	xor	r14d,edx
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,r11d
+	mov	ecx,r8d
+
+	ror	r14d,11
+	xor	r13d,r11d
+	xor	r15d,ebx
+
+	xor	ecx,r9d
+	xor	r14d,edx
+	add	r12d,r15d
+	mov	r15d,r8d
+
+	ror	r13d,6
+	and	ecx,edx
+	and	r15d,r9d
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	ecx,r15d
+
+	add	r10d,r12d
+	add	ecx,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	ecx,r14d
+
+	mov	r12d,DWORD PTR[24+rsi]
+	mov	r13d,r10d
+	mov	r14d,ecx
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,r11d
+	mov	DWORD PTR[24+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,r10d
+	xor	r15d,eax
+
+	ror	r13d,5
+	add	r12d,ebx
+	xor	r14d,ecx
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,r10d
+	mov	ebx,edx
+
+	ror	r14d,11
+	xor	r13d,r10d
+	xor	r15d,eax
+
+	xor	ebx,r8d
+	xor	r14d,ecx
+	add	r12d,r15d
+	mov	r15d,edx
+
+	ror	r13d,6
+	and	ebx,ecx
+	and	r15d,r8d
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	ebx,r15d
+
+	add	r9d,r12d
+	add	ebx,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	ebx,r14d
+
+	mov	r12d,DWORD PTR[28+rsi]
+	mov	r13d,r9d
+	mov	r14d,ebx
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,r10d
+	mov	DWORD PTR[28+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,r9d
+	xor	r15d,r11d
+
+	ror	r13d,5
+	add	r12d,eax
+	xor	r14d,ebx
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,r9d
+	mov	eax,ecx
+
+	ror	r14d,11
+	xor	r13d,r9d
+	xor	r15d,r11d
+
+	xor	eax,edx
+	xor	r14d,ebx
+	add	r12d,r15d
+	mov	r15d,ecx
+
+	ror	r13d,6
+	and	eax,ebx
+	and	r15d,edx
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	eax,r15d
+
+	add	r8d,r12d
+	add	eax,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	eax,r14d
+
+	mov	r12d,DWORD PTR[32+rsi]
+	mov	r13d,r8d
+	mov	r14d,eax
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,r9d
+	mov	DWORD PTR[32+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,r8d
+	xor	r15d,r10d
+
+	ror	r13d,5
+	add	r12d,r11d
+	xor	r14d,eax
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,r8d
+	mov	r11d,ebx
+
+	ror	r14d,11
+	xor	r13d,r8d
+	xor	r15d,r10d
+
+	xor	r11d,ecx
+	xor	r14d,eax
+	add	r12d,r15d
+	mov	r15d,ebx
+
+	ror	r13d,6
+	and	r11d,eax
+	and	r15d,ecx
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	r11d,r15d
+
+	add	edx,r12d
+	add	r11d,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r11d,r14d
+
+	mov	r12d,DWORD PTR[36+rsi]
+	mov	r13d,edx
+	mov	r14d,r11d
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,r8d
+	mov	DWORD PTR[36+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,edx
+	xor	r15d,r9d
+
+	ror	r13d,5
+	add	r12d,r10d
+	xor	r14d,r11d
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,edx
+	mov	r10d,eax
+
+	ror	r14d,11
+	xor	r13d,edx
+	xor	r15d,r9d
+
+	xor	r10d,ebx
+	xor	r14d,r11d
+	add	r12d,r15d
+	mov	r15d,eax
+
+	ror	r13d,6
+	and	r10d,r11d
+	and	r15d,ebx
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	r10d,r15d
+
+	add	ecx,r12d
+	add	r10d,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r10d,r14d
+
+	mov	r12d,DWORD PTR[40+rsi]
+	mov	r13d,ecx
+	mov	r14d,r10d
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,edx
+	mov	DWORD PTR[40+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,ecx
+	xor	r15d,r8d
+
+	ror	r13d,5
+	add	r12d,r9d
+	xor	r14d,r10d
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,ecx
+	mov	r9d,r11d
+
+	ror	r14d,11
+	xor	r13d,ecx
+	xor	r15d,r8d
+
+	xor	r9d,eax
+	xor	r14d,r10d
+	add	r12d,r15d
+	mov	r15d,r11d
+
+	ror	r13d,6
+	and	r9d,r10d
+	and	r15d,eax
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	r9d,r15d
+
+	add	ebx,r12d
+	add	r9d,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r9d,r14d
+
+	mov	r12d,DWORD PTR[44+rsi]
+	mov	r13d,ebx
+	mov	r14d,r9d
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,ecx
+	mov	DWORD PTR[44+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,ebx
+	xor	r15d,edx
+
+	ror	r13d,5
+	add	r12d,r8d
+	xor	r14d,r9d
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,ebx
+	mov	r8d,r10d
+
+	ror	r14d,11
+	xor	r13d,ebx
+	xor	r15d,edx
+
+	xor	r8d,r11d
+	xor	r14d,r9d
+	add	r12d,r15d
+	mov	r15d,r10d
+
+	ror	r13d,6
+	and	r8d,r9d
+	and	r15d,r11d
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	r8d,r15d
+
+	add	eax,r12d
+	add	r8d,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r8d,r14d
+
+	mov	r12d,DWORD PTR[48+rsi]
+	mov	r13d,eax
+	mov	r14d,r8d
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,ebx
+	mov	DWORD PTR[48+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,eax
+	xor	r15d,ecx
+
+	ror	r13d,5
+	add	r12d,edx
+	xor	r14d,r8d
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,eax
+	mov	edx,r9d
+
+	ror	r14d,11
+	xor	r13d,eax
+	xor	r15d,ecx
+
+	xor	edx,r10d
+	xor	r14d,r8d
+	add	r12d,r15d
+	mov	r15d,r9d
+
+	ror	r13d,6
+	and	edx,r8d
+	and	r15d,r10d
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	edx,r15d
+
+	add	r11d,r12d
+	add	edx,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	edx,r14d
+
+	mov	r12d,DWORD PTR[52+rsi]
+	mov	r13d,r11d
+	mov	r14d,edx
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,eax
+	mov	DWORD PTR[52+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,r11d
+	xor	r15d,ebx
+
+	ror	r13d,5
+	add	r12d,ecx
+	xor	r14d,edx
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,r11d
+	mov	ecx,r8d
+
+	ror	r14d,11
+	xor	r13d,r11d
+	xor	r15d,ebx
+
+	xor	ecx,r9d
+	xor	r14d,edx
+	add	r12d,r15d
+	mov	r15d,r8d
+
+	ror	r13d,6
+	and	ecx,edx
+	and	r15d,r9d
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	ecx,r15d
+
+	add	r10d,r12d
+	add	ecx,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	ecx,r14d
+
+	mov	r12d,DWORD PTR[56+rsi]
+	mov	r13d,r10d
+	mov	r14d,ecx
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,r11d
+	mov	DWORD PTR[56+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,r10d
+	xor	r15d,eax
+
+	ror	r13d,5
+	add	r12d,ebx
+	xor	r14d,ecx
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,r10d
+	mov	ebx,edx
+
+	ror	r14d,11
+	xor	r13d,r10d
+	xor	r15d,eax
+
+	xor	ebx,r8d
+	xor	r14d,ecx
+	add	r12d,r15d
+	mov	r15d,edx
+
+	ror	r13d,6
+	and	ebx,ecx
+	and	r15d,r8d
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	ebx,r15d
+
+	add	r9d,r12d
+	add	ebx,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	ebx,r14d
+
+	mov	r12d,DWORD PTR[60+rsi]
+	mov	r13d,r9d
+	mov	r14d,ebx
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,r10d
+	mov	DWORD PTR[60+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,r9d
+	xor	r15d,r11d
+
+	ror	r13d,5
+	add	r12d,eax
+	xor	r14d,ebx
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,r9d
+	mov	eax,ecx
+
+	ror	r14d,11
+	xor	r13d,r9d
+	xor	r15d,r11d
+
+	xor	eax,edx
+	xor	r14d,ebx
+	add	r12d,r15d
+	mov	r15d,ecx
+
+	ror	r13d,6
+	and	eax,ebx
+	and	r15d,edx
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	eax,r15d
+
+	add	r8d,r12d
+	add	eax,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	eax,r14d
+
+	jmp	$L$rounds_16_xx
+ALIGN	16
+$L$rounds_16_xx::
+	mov	r13d,DWORD PTR[4+rsp]
+	mov	r14d,DWORD PTR[56+rsp]
+	mov	r12d,r13d
+	mov	r15d,r14d
+
+	ror	r12d,11
+	xor	r12d,r13d
+	shr	r13d,3
+
+	ror	r12d,7
+	xor	r13d,r12d
+	mov	r12d,DWORD PTR[36+rsp]
+
+	ror	r15d,2
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	add	r12d,r13d
+	xor	r14d,r15d
+
+	add	r12d,DWORD PTR[rsp]
+	mov	r13d,r8d
+	add	r12d,r14d
+	mov	r14d,eax
+	ror	r13d,14
+	mov	r15d,r9d
+	mov	DWORD PTR[rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,r8d
+	xor	r15d,r10d
+
+	ror	r13d,5
+	add	r12d,r11d
+	xor	r14d,eax
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,r8d
+	mov	r11d,ebx
+
+	ror	r14d,11
+	xor	r13d,r8d
+	xor	r15d,r10d
+
+	xor	r11d,ecx
+	xor	r14d,eax
+	add	r12d,r15d
+	mov	r15d,ebx
+
+	ror	r13d,6
+	and	r11d,eax
+	and	r15d,ecx
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	r11d,r15d
+
+	add	edx,r12d
+	add	r11d,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r11d,r14d
+
+	mov	r13d,DWORD PTR[8+rsp]
+	mov	r14d,DWORD PTR[60+rsp]
+	mov	r12d,r13d
+	mov	r15d,r14d
+
+	ror	r12d,11
+	xor	r12d,r13d
+	shr	r13d,3
+
+	ror	r12d,7
+	xor	r13d,r12d
+	mov	r12d,DWORD PTR[40+rsp]
+
+	ror	r15d,2
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	add	r12d,r13d
+	xor	r14d,r15d
+
+	add	r12d,DWORD PTR[4+rsp]
+	mov	r13d,edx
+	add	r12d,r14d
+	mov	r14d,r11d
+	ror	r13d,14
+	mov	r15d,r8d
+	mov	DWORD PTR[4+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,edx
+	xor	r15d,r9d
+
+	ror	r13d,5
+	add	r12d,r10d
+	xor	r14d,r11d
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,edx
+	mov	r10d,eax
+
+	ror	r14d,11
+	xor	r13d,edx
+	xor	r15d,r9d
+
+	xor	r10d,ebx
+	xor	r14d,r11d
+	add	r12d,r15d
+	mov	r15d,eax
+
+	ror	r13d,6
+	and	r10d,r11d
+	and	r15d,ebx
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	r10d,r15d
+
+	add	ecx,r12d
+	add	r10d,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r10d,r14d
+
+	mov	r13d,DWORD PTR[12+rsp]
+	mov	r14d,DWORD PTR[rsp]
+	mov	r12d,r13d
+	mov	r15d,r14d
+
+	ror	r12d,11
+	xor	r12d,r13d
+	shr	r13d,3
+
+	ror	r12d,7
+	xor	r13d,r12d
+	mov	r12d,DWORD PTR[44+rsp]
+
+	ror	r15d,2
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	add	r12d,r13d
+	xor	r14d,r15d
+
+	add	r12d,DWORD PTR[8+rsp]
+	mov	r13d,ecx
+	add	r12d,r14d
+	mov	r14d,r10d
+	ror	r13d,14
+	mov	r15d,edx
+	mov	DWORD PTR[8+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,ecx
+	xor	r15d,r8d
+
+	ror	r13d,5
+	add	r12d,r9d
+	xor	r14d,r10d
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,ecx
+	mov	r9d,r11d
+
+	ror	r14d,11
+	xor	r13d,ecx
+	xor	r15d,r8d
+
+	xor	r9d,eax
+	xor	r14d,r10d
+	add	r12d,r15d
+	mov	r15d,r11d
+
+	ror	r13d,6
+	and	r9d,r10d
+	and	r15d,eax
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	r9d,r15d
+
+	add	ebx,r12d
+	add	r9d,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r9d,r14d
+
+	mov	r13d,DWORD PTR[16+rsp]
+	mov	r14d,DWORD PTR[4+rsp]
+	mov	r12d,r13d
+	mov	r15d,r14d
+
+	ror	r12d,11
+	xor	r12d,r13d
+	shr	r13d,3
+
+	ror	r12d,7
+	xor	r13d,r12d
+	mov	r12d,DWORD PTR[48+rsp]
+
+	ror	r15d,2
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	add	r12d,r13d
+	xor	r14d,r15d
+
+	add	r12d,DWORD PTR[12+rsp]
+	mov	r13d,ebx
+	add	r12d,r14d
+	mov	r14d,r9d
+	ror	r13d,14
+	mov	r15d,ecx
+	mov	DWORD PTR[12+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,ebx
+	xor	r15d,edx
+
+	ror	r13d,5
+	add	r12d,r8d
+	xor	r14d,r9d
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,ebx
+	mov	r8d,r10d
+
+	ror	r14d,11
+	xor	r13d,ebx
+	xor	r15d,edx
+
+	xor	r8d,r11d
+	xor	r14d,r9d
+	add	r12d,r15d
+	mov	r15d,r10d
+
+	ror	r13d,6
+	and	r8d,r9d
+	and	r15d,r11d
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	r8d,r15d
+
+	add	eax,r12d
+	add	r8d,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r8d,r14d
+
+	mov	r13d,DWORD PTR[20+rsp]
+	mov	r14d,DWORD PTR[8+rsp]
+	mov	r12d,r13d
+	mov	r15d,r14d
+
+	ror	r12d,11
+	xor	r12d,r13d
+	shr	r13d,3
+
+	ror	r12d,7
+	xor	r13d,r12d
+	mov	r12d,DWORD PTR[52+rsp]
+
+	ror	r15d,2
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	add	r12d,r13d
+	xor	r14d,r15d
+
+	add	r12d,DWORD PTR[16+rsp]
+	mov	r13d,eax
+	add	r12d,r14d
+	mov	r14d,r8d
+	ror	r13d,14
+	mov	r15d,ebx
+	mov	DWORD PTR[16+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,eax
+	xor	r15d,ecx
+
+	ror	r13d,5
+	add	r12d,edx
+	xor	r14d,r8d
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,eax
+	mov	edx,r9d
+
+	ror	r14d,11
+	xor	r13d,eax
+	xor	r15d,ecx
+
+	xor	edx,r10d
+	xor	r14d,r8d
+	add	r12d,r15d
+	mov	r15d,r9d
+
+	ror	r13d,6
+	and	edx,r8d
+	and	r15d,r10d
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	edx,r15d
+
+	add	r11d,r12d
+	add	edx,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	edx,r14d
+
+	mov	r13d,DWORD PTR[24+rsp]
+	mov	r14d,DWORD PTR[12+rsp]
+	mov	r12d,r13d
+	mov	r15d,r14d
+
+	ror	r12d,11
+	xor	r12d,r13d
+	shr	r13d,3
+
+	ror	r12d,7
+	xor	r13d,r12d
+	mov	r12d,DWORD PTR[56+rsp]
+
+	ror	r15d,2
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	add	r12d,r13d
+	xor	r14d,r15d
+
+	add	r12d,DWORD PTR[20+rsp]
+	mov	r13d,r11d
+	add	r12d,r14d
+	mov	r14d,edx
+	ror	r13d,14
+	mov	r15d,eax
+	mov	DWORD PTR[20+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,r11d
+	xor	r15d,ebx
+
+	ror	r13d,5
+	add	r12d,ecx
+	xor	r14d,edx
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,r11d
+	mov	ecx,r8d
+
+	ror	r14d,11
+	xor	r13d,r11d
+	xor	r15d,ebx
+
+	xor	ecx,r9d
+	xor	r14d,edx
+	add	r12d,r15d
+	mov	r15d,r8d
+
+	ror	r13d,6
+	and	ecx,edx
+	and	r15d,r9d
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	ecx,r15d
+
+	add	r10d,r12d
+	add	ecx,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	ecx,r14d
+
+	mov	r13d,DWORD PTR[28+rsp]
+	mov	r14d,DWORD PTR[16+rsp]
+	mov	r12d,r13d
+	mov	r15d,r14d
+
+	ror	r12d,11
+	xor	r12d,r13d
+	shr	r13d,3
+
+	ror	r12d,7
+	xor	r13d,r12d
+	mov	r12d,DWORD PTR[60+rsp]
+
+	ror	r15d,2
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	add	r12d,r13d
+	xor	r14d,r15d
+
+	add	r12d,DWORD PTR[24+rsp]
+	mov	r13d,r10d
+	add	r12d,r14d
+	mov	r14d,ecx
+	ror	r13d,14
+	mov	r15d,r11d
+	mov	DWORD PTR[24+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,r10d
+	xor	r15d,eax
+
+	ror	r13d,5
+	add	r12d,ebx
+	xor	r14d,ecx
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,r10d
+	mov	ebx,edx
+
+	ror	r14d,11
+	xor	r13d,r10d
+	xor	r15d,eax
+
+	xor	ebx,r8d
+	xor	r14d,ecx
+	add	r12d,r15d
+	mov	r15d,edx
+
+	ror	r13d,6
+	and	ebx,ecx
+	and	r15d,r8d
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	ebx,r15d
+
+	add	r9d,r12d
+	add	ebx,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	ebx,r14d
+
+	mov	r13d,DWORD PTR[32+rsp]
+	mov	r14d,DWORD PTR[20+rsp]
+	mov	r12d,r13d
+	mov	r15d,r14d
+
+	ror	r12d,11
+	xor	r12d,r13d
+	shr	r13d,3
+
+	ror	r12d,7
+	xor	r13d,r12d
+	mov	r12d,DWORD PTR[rsp]
+
+	ror	r15d,2
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	add	r12d,r13d
+	xor	r14d,r15d
+
+	add	r12d,DWORD PTR[28+rsp]
+	mov	r13d,r9d
+	add	r12d,r14d
+	mov	r14d,ebx
+	ror	r13d,14
+	mov	r15d,r10d
+	mov	DWORD PTR[28+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,r9d
+	xor	r15d,r11d
+
+	ror	r13d,5
+	add	r12d,eax
+	xor	r14d,ebx
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,r9d
+	mov	eax,ecx
+
+	ror	r14d,11
+	xor	r13d,r9d
+	xor	r15d,r11d
+
+	xor	eax,edx
+	xor	r14d,ebx
+	add	r12d,r15d
+	mov	r15d,ecx
+
+	ror	r13d,6
+	and	eax,ebx
+	and	r15d,edx
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	eax,r15d
+
+	add	r8d,r12d
+	add	eax,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	eax,r14d
+
+	mov	r13d,DWORD PTR[36+rsp]
+	mov	r14d,DWORD PTR[24+rsp]
+	mov	r12d,r13d
+	mov	r15d,r14d
+
+	ror	r12d,11
+	xor	r12d,r13d
+	shr	r13d,3
+
+	ror	r12d,7
+	xor	r13d,r12d
+	mov	r12d,DWORD PTR[4+rsp]
+
+	ror	r15d,2
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	add	r12d,r13d
+	xor	r14d,r15d
+
+	add	r12d,DWORD PTR[32+rsp]
+	mov	r13d,r8d
+	add	r12d,r14d
+	mov	r14d,eax
+	ror	r13d,14
+	mov	r15d,r9d
+	mov	DWORD PTR[32+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,r8d
+	xor	r15d,r10d
+
+	ror	r13d,5
+	add	r12d,r11d
+	xor	r14d,eax
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,r8d
+	mov	r11d,ebx
+
+	ror	r14d,11
+	xor	r13d,r8d
+	xor	r15d,r10d
+
+	xor	r11d,ecx
+	xor	r14d,eax
+	add	r12d,r15d
+	mov	r15d,ebx
+
+	ror	r13d,6
+	and	r11d,eax
+	and	r15d,ecx
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	r11d,r15d
+
+	add	edx,r12d
+	add	r11d,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r11d,r14d
+
+	mov	r13d,DWORD PTR[40+rsp]
+	mov	r14d,DWORD PTR[28+rsp]
+	mov	r12d,r13d
+	mov	r15d,r14d
+
+	ror	r12d,11
+	xor	r12d,r13d
+	shr	r13d,3
+
+	ror	r12d,7
+	xor	r13d,r12d
+	mov	r12d,DWORD PTR[8+rsp]
+
+	ror	r15d,2
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	add	r12d,r13d
+	xor	r14d,r15d
+
+	add	r12d,DWORD PTR[36+rsp]
+	mov	r13d,edx
+	add	r12d,r14d
+	mov	r14d,r11d
+	ror	r13d,14
+	mov	r15d,r8d
+	mov	DWORD PTR[36+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,edx
+	xor	r15d,r9d
+
+	ror	r13d,5
+	add	r12d,r10d
+	xor	r14d,r11d
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,edx
+	mov	r10d,eax
+
+	ror	r14d,11
+	xor	r13d,edx
+	xor	r15d,r9d
+
+	xor	r10d,ebx
+	xor	r14d,r11d
+	add	r12d,r15d
+	mov	r15d,eax
+
+	ror	r13d,6
+	and	r10d,r11d
+	and	r15d,ebx
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	r10d,r15d
+
+	add	ecx,r12d
+	add	r10d,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r10d,r14d
+
+	mov	r13d,DWORD PTR[44+rsp]
+	mov	r14d,DWORD PTR[32+rsp]
+	mov	r12d,r13d
+	mov	r15d,r14d
+
+	ror	r12d,11
+	xor	r12d,r13d
+	shr	r13d,3
+
+	ror	r12d,7
+	xor	r13d,r12d
+	mov	r12d,DWORD PTR[12+rsp]
+
+	ror	r15d,2
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	add	r12d,r13d
+	xor	r14d,r15d
+
+	add	r12d,DWORD PTR[40+rsp]
+	mov	r13d,ecx
+	add	r12d,r14d
+	mov	r14d,r10d
+	ror	r13d,14
+	mov	r15d,edx
+	mov	DWORD PTR[40+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,ecx
+	xor	r15d,r8d
+
+	ror	r13d,5
+	add	r12d,r9d
+	xor	r14d,r10d
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,ecx
+	mov	r9d,r11d
+
+	ror	r14d,11
+	xor	r13d,ecx
+	xor	r15d,r8d
+
+	xor	r9d,eax
+	xor	r14d,r10d
+	add	r12d,r15d
+	mov	r15d,r11d
+
+	ror	r13d,6
+	and	r9d,r10d
+	and	r15d,eax
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	r9d,r15d
+
+	add	ebx,r12d
+	add	r9d,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r9d,r14d
+
+	mov	r13d,DWORD PTR[48+rsp]
+	mov	r14d,DWORD PTR[36+rsp]
+	mov	r12d,r13d
+	mov	r15d,r14d
+
+	ror	r12d,11
+	xor	r12d,r13d
+	shr	r13d,3
+
+	ror	r12d,7
+	xor	r13d,r12d
+	mov	r12d,DWORD PTR[16+rsp]
+
+	ror	r15d,2
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	add	r12d,r13d
+	xor	r14d,r15d
+
+	add	r12d,DWORD PTR[44+rsp]
+	mov	r13d,ebx
+	add	r12d,r14d
+	mov	r14d,r9d
+	ror	r13d,14
+	mov	r15d,ecx
+	mov	DWORD PTR[44+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,ebx
+	xor	r15d,edx
+
+	ror	r13d,5
+	add	r12d,r8d
+	xor	r14d,r9d
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,ebx
+	mov	r8d,r10d
+
+	ror	r14d,11
+	xor	r13d,ebx
+	xor	r15d,edx
+
+	xor	r8d,r11d
+	xor	r14d,r9d
+	add	r12d,r15d
+	mov	r15d,r10d
+
+	ror	r13d,6
+	and	r8d,r9d
+	and	r15d,r11d
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	r8d,r15d
+
+	add	eax,r12d
+	add	r8d,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r8d,r14d
+
+	mov	r13d,DWORD PTR[52+rsp]
+	mov	r14d,DWORD PTR[40+rsp]
+	mov	r12d,r13d
+	mov	r15d,r14d
+
+	ror	r12d,11
+	xor	r12d,r13d
+	shr	r13d,3
+
+	ror	r12d,7
+	xor	r13d,r12d
+	mov	r12d,DWORD PTR[20+rsp]
+
+	ror	r15d,2
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	add	r12d,r13d
+	xor	r14d,r15d
+
+	add	r12d,DWORD PTR[48+rsp]
+	mov	r13d,eax
+	add	r12d,r14d
+	mov	r14d,r8d
+	ror	r13d,14
+	mov	r15d,ebx
+	mov	DWORD PTR[48+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,eax
+	xor	r15d,ecx
+
+	ror	r13d,5
+	add	r12d,edx
+	xor	r14d,r8d
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,eax
+	mov	edx,r9d
+
+	ror	r14d,11
+	xor	r13d,eax
+	xor	r15d,ecx
+
+	xor	edx,r10d
+	xor	r14d,r8d
+	add	r12d,r15d
+	mov	r15d,r9d
+
+	ror	r13d,6
+	and	edx,r8d
+	and	r15d,r10d
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	edx,r15d
+
+	add	r11d,r12d
+	add	edx,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	edx,r14d
+
+	mov	r13d,DWORD PTR[56+rsp]
+	mov	r14d,DWORD PTR[44+rsp]
+	mov	r12d,r13d
+	mov	r15d,r14d
+
+	ror	r12d,11
+	xor	r12d,r13d
+	shr	r13d,3
+
+	ror	r12d,7
+	xor	r13d,r12d
+	mov	r12d,DWORD PTR[24+rsp]
+
+	ror	r15d,2
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	add	r12d,r13d
+	xor	r14d,r15d
+
+	add	r12d,DWORD PTR[52+rsp]
+	mov	r13d,r11d
+	add	r12d,r14d
+	mov	r14d,edx
+	ror	r13d,14
+	mov	r15d,eax
+	mov	DWORD PTR[52+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,r11d
+	xor	r15d,ebx
+
+	ror	r13d,5
+	add	r12d,ecx
+	xor	r14d,edx
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,r11d
+	mov	ecx,r8d
+
+	ror	r14d,11
+	xor	r13d,r11d
+	xor	r15d,ebx
+
+	xor	ecx,r9d
+	xor	r14d,edx
+	add	r12d,r15d
+	mov	r15d,r8d
+
+	ror	r13d,6
+	and	ecx,edx
+	and	r15d,r9d
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	ecx,r15d
+
+	add	r10d,r12d
+	add	ecx,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	ecx,r14d
+
+	mov	r13d,DWORD PTR[60+rsp]
+	mov	r14d,DWORD PTR[48+rsp]
+	mov	r12d,r13d
+	mov	r15d,r14d
+
+	ror	r12d,11
+	xor	r12d,r13d
+	shr	r13d,3
+
+	ror	r12d,7
+	xor	r13d,r12d
+	mov	r12d,DWORD PTR[28+rsp]
+
+	ror	r15d,2
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	add	r12d,r13d
+	xor	r14d,r15d
+
+	add	r12d,DWORD PTR[56+rsp]
+	mov	r13d,r10d
+	add	r12d,r14d
+	mov	r14d,ecx
+	ror	r13d,14
+	mov	r15d,r11d
+	mov	DWORD PTR[56+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,r10d
+	xor	r15d,eax
+
+	ror	r13d,5
+	add	r12d,ebx
+	xor	r14d,ecx
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,r10d
+	mov	ebx,edx
+
+	ror	r14d,11
+	xor	r13d,r10d
+	xor	r15d,eax
+
+	xor	ebx,r8d
+	xor	r14d,ecx
+	add	r12d,r15d
+	mov	r15d,edx
+
+	ror	r13d,6
+	and	ebx,ecx
+	and	r15d,r8d
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	ebx,r15d
+
+	add	r9d,r12d
+	add	ebx,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	ebx,r14d
+
+	mov	r13d,DWORD PTR[rsp]
+	mov	r14d,DWORD PTR[52+rsp]
+	mov	r12d,r13d
+	mov	r15d,r14d
+
+	ror	r12d,11
+	xor	r12d,r13d
+	shr	r13d,3
+
+	ror	r12d,7
+	xor	r13d,r12d
+	mov	r12d,DWORD PTR[32+rsp]
+
+	ror	r15d,2
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	add	r12d,r13d
+	xor	r14d,r15d
+
+	add	r12d,DWORD PTR[60+rsp]
+	mov	r13d,r9d
+	add	r12d,r14d
+	mov	r14d,ebx
+	ror	r13d,14
+	mov	r15d,r10d
+	mov	DWORD PTR[60+rsp],r12d
+
+	ror	r14d,9
+	xor	r13d,r9d
+	xor	r15d,r11d
+
+	ror	r13d,5
+	add	r12d,eax
+	xor	r14d,ebx
+
+	add	r12d,DWORD PTR[rdi*4+rbp]
+	and	r15d,r9d
+	mov	eax,ecx
+
+	ror	r14d,11
+	xor	r13d,r9d
+	xor	r15d,r11d
+
+	xor	eax,edx
+	xor	r14d,ebx
+	add	r12d,r15d
+	mov	r15d,ecx
+
+	ror	r13d,6
+	and	eax,ebx
+	and	r15d,edx
+
+	ror	r14d,2
+	add	r12d,r13d
+	add	eax,r15d
+
+	add	r8d,r12d
+	add	eax,r12d
+	lea	rdi,QWORD PTR[1+rdi]
+	add	eax,r14d
+
+	cmp	rdi,64
+	jb	$L$rounds_16_xx
+
+	mov	rdi,QWORD PTR[((64+0))+rsp]
+	lea	rsi,QWORD PTR[64+rsi]
+
+	add	eax,DWORD PTR[rdi]
+	add	ebx,DWORD PTR[4+rdi]
+	add	ecx,DWORD PTR[8+rdi]
+	add	edx,DWORD PTR[12+rdi]
+	add	r8d,DWORD PTR[16+rdi]
+	add	r9d,DWORD PTR[20+rdi]
+	add	r10d,DWORD PTR[24+rdi]
+	add	r11d,DWORD PTR[28+rdi]
+
+	cmp	rsi,QWORD PTR[((64+16))+rsp]
+
+	mov	DWORD PTR[rdi],eax
+	mov	DWORD PTR[4+rdi],ebx
+	mov	DWORD PTR[8+rdi],ecx
+	mov	DWORD PTR[12+rdi],edx
+	mov	DWORD PTR[16+rdi],r8d
+	mov	DWORD PTR[20+rdi],r9d
+	mov	DWORD PTR[24+rdi],r10d
+	mov	DWORD PTR[28+rdi],r11d
+	jb	$L$loop
+
+	mov	rsi,QWORD PTR[((64+24))+rsp]
+	mov	r15,QWORD PTR[rsi]
+	mov	r14,QWORD PTR[8+rsi]
+	mov	r13,QWORD PTR[16+rsi]
+	mov	r12,QWORD PTR[24+rsi]
+	mov	rbp,QWORD PTR[32+rsi]
+	mov	rbx,QWORD PTR[40+rsi]
+	lea	rsp,QWORD PTR[48+rsi]
+$L$epilogue::
+	mov	rdi,QWORD PTR[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD PTR[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_sha256_block_data_order::
+sha256_block_data_order	ENDP
+ALIGN	64
+
+K256::
+	DD	0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h
+	DD	03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h
+	DD	0d807aa98h,012835b01h,0243185beh,0550c7dc3h
+	DD	072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h
+	DD	0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch
+	DD	02de92c6fh,04a7484aah,05cb0a9dch,076f988dah
+	DD	0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h
+	DD	0c6e00bf3h,0d5a79147h,006ca6351h,014292967h
+	DD	027b70a85h,02e1b2138h,04d2c6dfch,053380d13h
+	DD	0650a7354h,0766a0abbh,081c2c92eh,092722c85h
+	DD	0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h
+	DD	0d192e819h,0d6990624h,0f40e3585h,0106aa070h
+	DD	019a4c116h,01e376c08h,02748774ch,034b0bcb5h
+	DD	0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h
+	DD	0748f82eeh,078a5636fh,084c87814h,08cc70208h
+	DD	090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h
+
+.text$	ENDS
+END
+
diff --git a/ext/libressl/crypto/sha/sha256-mingw64-x86_64.S b/ext/libressl/crypto/sha/sha256-mingw64-x86_64.S
new file mode 100644
index 0000000..3de981b
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha256-mingw64-x86_64.S
@@ -0,0 +1,1790 @@
+#include "x86_arch.h"
+.text	
+
+.globl	sha256_block_data_order
+.def	sha256_block_data_order;	.scl 2;	.type 32;	.endef
+.p2align	4
+sha256_block_data_order:
+	movq	%rdi,8(%rsp)
+	movq	%rsi,16(%rsp)
+	movq	%rsp,%rax
+.LSEH_begin_sha256_block_data_order:
+	movq	%rcx,%rdi
+	movq	%rdx,%rsi
+	movq	%r8,%rdx
+	movq	%r9,%rcx
+
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	movq	%rsp,%r11
+	shlq	$4,%rdx
+	subq	$64+32,%rsp
+	leaq	(%rsi,%rdx,4),%rdx
+	andq	$-64,%rsp
+	movq	%rdi,64+0(%rsp)
+	movq	%rsi,64+8(%rsp)
+	movq	%rdx,64+16(%rsp)
+	movq	%r11,64+24(%rsp)
+.Lprologue:
+
+	leaq	K256(%rip),%rbp
+
+	movl	0(%rdi),%eax
+	movl	4(%rdi),%ebx
+	movl	8(%rdi),%ecx
+	movl	12(%rdi),%edx
+	movl	16(%rdi),%r8d
+	movl	20(%rdi),%r9d
+	movl	24(%rdi),%r10d
+	movl	28(%rdi),%r11d
+	jmp	.Lloop
+
+.p2align	4
+.Lloop:
+	xorq	%rdi,%rdi
+	movl	0(%rsi),%r12d
+	movl	%r8d,%r13d
+	movl	%eax,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r9d,%r15d
+	movl	%r12d,0(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r11d,%r12d
+	xorl	%eax,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r8d,%r15d
+	movl	%ebx,%r11d
+
+	rorl	$11,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r15d
+
+	xorl	%ecx,%r11d
+	xorl	%eax,%r14d
+	addl	%r15d,%r12d
+	movl	%ebx,%r15d
+
+	rorl	$6,%r13d
+	andl	%eax,%r11d
+	andl	%ecx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r11d
+
+	addl	%r12d,%edx
+	addl	%r12d,%r11d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r11d
+
+	movl	4(%rsi),%r12d
+	movl	%edx,%r13d
+	movl	%r11d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r8d,%r15d
+	movl	%r12d,4(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r10d,%r12d
+	xorl	%r11d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%edx,%r15d
+	movl	%eax,%r10d
+
+	rorl	$11,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r15d
+
+	xorl	%ebx,%r10d
+	xorl	%r11d,%r14d
+	addl	%r15d,%r12d
+	movl	%eax,%r15d
+
+	rorl	$6,%r13d
+	andl	%r11d,%r10d
+	andl	%ebx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r10d
+
+	addl	%r12d,%ecx
+	addl	%r12d,%r10d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r10d
+
+	movl	8(%rsi),%r12d
+	movl	%ecx,%r13d
+	movl	%r10d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%edx,%r15d
+	movl	%r12d,8(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r9d,%r12d
+	xorl	%r10d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%ecx,%r15d
+	movl	%r11d,%r9d
+
+	rorl	$11,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r15d
+
+	xorl	%eax,%r9d
+	xorl	%r10d,%r14d
+	addl	%r15d,%r12d
+	movl	%r11d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r10d,%r9d
+	andl	%eax,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r9d
+
+	addl	%r12d,%ebx
+	addl	%r12d,%r9d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r9d
+
+	movl	12(%rsi),%r12d
+	movl	%ebx,%r13d
+	movl	%r9d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%ecx,%r15d
+	movl	%r12d,12(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r15d
+
+	rorl	$5,%r13d
+	addl	%r8d,%r12d
+	xorl	%r9d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%ebx,%r15d
+	movl	%r10d,%r8d
+
+	rorl	$11,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r15d
+
+	xorl	%r11d,%r8d
+	xorl	%r9d,%r14d
+	addl	%r15d,%r12d
+	movl	%r10d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r9d,%r8d
+	andl	%r11d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r8d
+
+	addl	%r12d,%eax
+	addl	%r12d,%r8d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r8d
+
+	movl	16(%rsi),%r12d
+	movl	%eax,%r13d
+	movl	%r8d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%ebx,%r15d
+	movl	%r12d,16(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r15d
+
+	rorl	$5,%r13d
+	addl	%edx,%r12d
+	xorl	%r8d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%eax,%r15d
+	movl	%r9d,%edx
+
+	rorl	$11,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r15d
+
+	xorl	%r10d,%edx
+	xorl	%r8d,%r14d
+	addl	%r15d,%r12d
+	movl	%r9d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r8d,%edx
+	andl	%r10d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%edx
+
+	addl	%r12d,%r11d
+	addl	%r12d,%edx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%edx
+
+	movl	20(%rsi),%r12d
+	movl	%r11d,%r13d
+	movl	%edx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%eax,%r15d
+	movl	%r12d,20(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r15d
+
+	rorl	$5,%r13d
+	addl	%ecx,%r12d
+	xorl	%edx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r11d,%r15d
+	movl	%r8d,%ecx
+
+	rorl	$11,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r15d
+
+	xorl	%r9d,%ecx
+	xorl	%edx,%r14d
+	addl	%r15d,%r12d
+	movl	%r8d,%r15d
+
+	rorl	$6,%r13d
+	andl	%edx,%ecx
+	andl	%r9d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%ecx
+
+	addl	%r12d,%r10d
+	addl	%r12d,%ecx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%ecx
+
+	movl	24(%rsi),%r12d
+	movl	%r10d,%r13d
+	movl	%ecx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r11d,%r15d
+	movl	%r12d,24(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r15d
+
+	rorl	$5,%r13d
+	addl	%ebx,%r12d
+	xorl	%ecx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r10d,%r15d
+	movl	%edx,%ebx
+
+	rorl	$11,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r15d
+
+	xorl	%r8d,%ebx
+	xorl	%ecx,%r14d
+	addl	%r15d,%r12d
+	movl	%edx,%r15d
+
+	rorl	$6,%r13d
+	andl	%ecx,%ebx
+	andl	%r8d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%ebx
+
+	addl	%r12d,%r9d
+	addl	%r12d,%ebx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%ebx
+
+	movl	28(%rsi),%r12d
+	movl	%r9d,%r13d
+	movl	%ebx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r10d,%r15d
+	movl	%r12d,28(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r15d
+
+	rorl	$5,%r13d
+	addl	%eax,%r12d
+	xorl	%ebx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r9d,%r15d
+	movl	%ecx,%eax
+
+	rorl	$11,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r15d
+
+	xorl	%edx,%eax
+	xorl	%ebx,%r14d
+	addl	%r15d,%r12d
+	movl	%ecx,%r15d
+
+	rorl	$6,%r13d
+	andl	%ebx,%eax
+	andl	%edx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%eax
+
+	addl	%r12d,%r8d
+	addl	%r12d,%eax
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%eax
+
+	movl	32(%rsi),%r12d
+	movl	%r8d,%r13d
+	movl	%eax,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r9d,%r15d
+	movl	%r12d,32(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r11d,%r12d
+	xorl	%eax,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r8d,%r15d
+	movl	%ebx,%r11d
+
+	rorl	$11,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r15d
+
+	xorl	%ecx,%r11d
+	xorl	%eax,%r14d
+	addl	%r15d,%r12d
+	movl	%ebx,%r15d
+
+	rorl	$6,%r13d
+	andl	%eax,%r11d
+	andl	%ecx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r11d
+
+	addl	%r12d,%edx
+	addl	%r12d,%r11d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r11d
+
+	movl	36(%rsi),%r12d
+	movl	%edx,%r13d
+	movl	%r11d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r8d,%r15d
+	movl	%r12d,36(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r10d,%r12d
+	xorl	%r11d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%edx,%r15d
+	movl	%eax,%r10d
+
+	rorl	$11,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r15d
+
+	xorl	%ebx,%r10d
+	xorl	%r11d,%r14d
+	addl	%r15d,%r12d
+	movl	%eax,%r15d
+
+	rorl	$6,%r13d
+	andl	%r11d,%r10d
+	andl	%ebx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r10d
+
+	addl	%r12d,%ecx
+	addl	%r12d,%r10d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r10d
+
+	movl	40(%rsi),%r12d
+	movl	%ecx,%r13d
+	movl	%r10d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%edx,%r15d
+	movl	%r12d,40(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r9d,%r12d
+	xorl	%r10d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%ecx,%r15d
+	movl	%r11d,%r9d
+
+	rorl	$11,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r15d
+
+	xorl	%eax,%r9d
+	xorl	%r10d,%r14d
+	addl	%r15d,%r12d
+	movl	%r11d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r10d,%r9d
+	andl	%eax,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r9d
+
+	addl	%r12d,%ebx
+	addl	%r12d,%r9d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r9d
+
+	movl	44(%rsi),%r12d
+	movl	%ebx,%r13d
+	movl	%r9d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%ecx,%r15d
+	movl	%r12d,44(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r15d
+
+	rorl	$5,%r13d
+	addl	%r8d,%r12d
+	xorl	%r9d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%ebx,%r15d
+	movl	%r10d,%r8d
+
+	rorl	$11,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r15d
+
+	xorl	%r11d,%r8d
+	xorl	%r9d,%r14d
+	addl	%r15d,%r12d
+	movl	%r10d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r9d,%r8d
+	andl	%r11d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r8d
+
+	addl	%r12d,%eax
+	addl	%r12d,%r8d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r8d
+
+	movl	48(%rsi),%r12d
+	movl	%eax,%r13d
+	movl	%r8d,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%ebx,%r15d
+	movl	%r12d,48(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r15d
+
+	rorl	$5,%r13d
+	addl	%edx,%r12d
+	xorl	%r8d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%eax,%r15d
+	movl	%r9d,%edx
+
+	rorl	$11,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r15d
+
+	xorl	%r10d,%edx
+	xorl	%r8d,%r14d
+	addl	%r15d,%r12d
+	movl	%r9d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r8d,%edx
+	andl	%r10d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%edx
+
+	addl	%r12d,%r11d
+	addl	%r12d,%edx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%edx
+
+	movl	52(%rsi),%r12d
+	movl	%r11d,%r13d
+	movl	%edx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%eax,%r15d
+	movl	%r12d,52(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r15d
+
+	rorl	$5,%r13d
+	addl	%ecx,%r12d
+	xorl	%edx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r11d,%r15d
+	movl	%r8d,%ecx
+
+	rorl	$11,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r15d
+
+	xorl	%r9d,%ecx
+	xorl	%edx,%r14d
+	addl	%r15d,%r12d
+	movl	%r8d,%r15d
+
+	rorl	$6,%r13d
+	andl	%edx,%ecx
+	andl	%r9d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%ecx
+
+	addl	%r12d,%r10d
+	addl	%r12d,%ecx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%ecx
+
+	movl	56(%rsi),%r12d
+	movl	%r10d,%r13d
+	movl	%ecx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r11d,%r15d
+	movl	%r12d,56(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r15d
+
+	rorl	$5,%r13d
+	addl	%ebx,%r12d
+	xorl	%ecx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r10d,%r15d
+	movl	%edx,%ebx
+
+	rorl	$11,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r15d
+
+	xorl	%r8d,%ebx
+	xorl	%ecx,%r14d
+	addl	%r15d,%r12d
+	movl	%edx,%r15d
+
+	rorl	$6,%r13d
+	andl	%ecx,%ebx
+	andl	%r8d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%ebx
+
+	addl	%r12d,%r9d
+	addl	%r12d,%ebx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%ebx
+
+	movl	60(%rsi),%r12d
+	movl	%r9d,%r13d
+	movl	%ebx,%r14d
+	bswapl	%r12d
+	rorl	$14,%r13d
+	movl	%r10d,%r15d
+	movl	%r12d,60(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r15d
+
+	rorl	$5,%r13d
+	addl	%eax,%r12d
+	xorl	%ebx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r9d,%r15d
+	movl	%ecx,%eax
+
+	rorl	$11,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r15d
+
+	xorl	%edx,%eax
+	xorl	%ebx,%r14d
+	addl	%r15d,%r12d
+	movl	%ecx,%r15d
+
+	rorl	$6,%r13d
+	andl	%ebx,%eax
+	andl	%edx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%eax
+
+	addl	%r12d,%r8d
+	addl	%r12d,%eax
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%eax
+
+	jmp	.Lrounds_16_xx
+.p2align	4
+.Lrounds_16_xx:
+	movl	4(%rsp),%r13d
+	movl	56(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	36(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	0(%rsp),%r12d
+	movl	%r8d,%r13d
+	addl	%r14d,%r12d
+	movl	%eax,%r14d
+	rorl	$14,%r13d
+	movl	%r9d,%r15d
+	movl	%r12d,0(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r11d,%r12d
+	xorl	%eax,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r8d,%r15d
+	movl	%ebx,%r11d
+
+	rorl	$11,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r15d
+
+	xorl	%ecx,%r11d
+	xorl	%eax,%r14d
+	addl	%r15d,%r12d
+	movl	%ebx,%r15d
+
+	rorl	$6,%r13d
+	andl	%eax,%r11d
+	andl	%ecx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r11d
+
+	addl	%r12d,%edx
+	addl	%r12d,%r11d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r11d
+
+	movl	8(%rsp),%r13d
+	movl	60(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	40(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	4(%rsp),%r12d
+	movl	%edx,%r13d
+	addl	%r14d,%r12d
+	movl	%r11d,%r14d
+	rorl	$14,%r13d
+	movl	%r8d,%r15d
+	movl	%r12d,4(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r10d,%r12d
+	xorl	%r11d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%edx,%r15d
+	movl	%eax,%r10d
+
+	rorl	$11,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r15d
+
+	xorl	%ebx,%r10d
+	xorl	%r11d,%r14d
+	addl	%r15d,%r12d
+	movl	%eax,%r15d
+
+	rorl	$6,%r13d
+	andl	%r11d,%r10d
+	andl	%ebx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r10d
+
+	addl	%r12d,%ecx
+	addl	%r12d,%r10d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r10d
+
+	movl	12(%rsp),%r13d
+	movl	0(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	44(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	8(%rsp),%r12d
+	movl	%ecx,%r13d
+	addl	%r14d,%r12d
+	movl	%r10d,%r14d
+	rorl	$14,%r13d
+	movl	%edx,%r15d
+	movl	%r12d,8(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r9d,%r12d
+	xorl	%r10d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%ecx,%r15d
+	movl	%r11d,%r9d
+
+	rorl	$11,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r15d
+
+	xorl	%eax,%r9d
+	xorl	%r10d,%r14d
+	addl	%r15d,%r12d
+	movl	%r11d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r10d,%r9d
+	andl	%eax,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r9d
+
+	addl	%r12d,%ebx
+	addl	%r12d,%r9d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r9d
+
+	movl	16(%rsp),%r13d
+	movl	4(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	48(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	12(%rsp),%r12d
+	movl	%ebx,%r13d
+	addl	%r14d,%r12d
+	movl	%r9d,%r14d
+	rorl	$14,%r13d
+	movl	%ecx,%r15d
+	movl	%r12d,12(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r15d
+
+	rorl	$5,%r13d
+	addl	%r8d,%r12d
+	xorl	%r9d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%ebx,%r15d
+	movl	%r10d,%r8d
+
+	rorl	$11,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r15d
+
+	xorl	%r11d,%r8d
+	xorl	%r9d,%r14d
+	addl	%r15d,%r12d
+	movl	%r10d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r9d,%r8d
+	andl	%r11d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r8d
+
+	addl	%r12d,%eax
+	addl	%r12d,%r8d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r8d
+
+	movl	20(%rsp),%r13d
+	movl	8(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	52(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	16(%rsp),%r12d
+	movl	%eax,%r13d
+	addl	%r14d,%r12d
+	movl	%r8d,%r14d
+	rorl	$14,%r13d
+	movl	%ebx,%r15d
+	movl	%r12d,16(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r15d
+
+	rorl	$5,%r13d
+	addl	%edx,%r12d
+	xorl	%r8d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%eax,%r15d
+	movl	%r9d,%edx
+
+	rorl	$11,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r15d
+
+	xorl	%r10d,%edx
+	xorl	%r8d,%r14d
+	addl	%r15d,%r12d
+	movl	%r9d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r8d,%edx
+	andl	%r10d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%edx
+
+	addl	%r12d,%r11d
+	addl	%r12d,%edx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%edx
+
+	movl	24(%rsp),%r13d
+	movl	12(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	56(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	20(%rsp),%r12d
+	movl	%r11d,%r13d
+	addl	%r14d,%r12d
+	movl	%edx,%r14d
+	rorl	$14,%r13d
+	movl	%eax,%r15d
+	movl	%r12d,20(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r15d
+
+	rorl	$5,%r13d
+	addl	%ecx,%r12d
+	xorl	%edx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r11d,%r15d
+	movl	%r8d,%ecx
+
+	rorl	$11,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r15d
+
+	xorl	%r9d,%ecx
+	xorl	%edx,%r14d
+	addl	%r15d,%r12d
+	movl	%r8d,%r15d
+
+	rorl	$6,%r13d
+	andl	%edx,%ecx
+	andl	%r9d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%ecx
+
+	addl	%r12d,%r10d
+	addl	%r12d,%ecx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%ecx
+
+	movl	28(%rsp),%r13d
+	movl	16(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	60(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	24(%rsp),%r12d
+	movl	%r10d,%r13d
+	addl	%r14d,%r12d
+	movl	%ecx,%r14d
+	rorl	$14,%r13d
+	movl	%r11d,%r15d
+	movl	%r12d,24(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r15d
+
+	rorl	$5,%r13d
+	addl	%ebx,%r12d
+	xorl	%ecx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r10d,%r15d
+	movl	%edx,%ebx
+
+	rorl	$11,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r15d
+
+	xorl	%r8d,%ebx
+	xorl	%ecx,%r14d
+	addl	%r15d,%r12d
+	movl	%edx,%r15d
+
+	rorl	$6,%r13d
+	andl	%ecx,%ebx
+	andl	%r8d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%ebx
+
+	addl	%r12d,%r9d
+	addl	%r12d,%ebx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%ebx
+
+	movl	32(%rsp),%r13d
+	movl	20(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	0(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	28(%rsp),%r12d
+	movl	%r9d,%r13d
+	addl	%r14d,%r12d
+	movl	%ebx,%r14d
+	rorl	$14,%r13d
+	movl	%r10d,%r15d
+	movl	%r12d,28(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r15d
+
+	rorl	$5,%r13d
+	addl	%eax,%r12d
+	xorl	%ebx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r9d,%r15d
+	movl	%ecx,%eax
+
+	rorl	$11,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r15d
+
+	xorl	%edx,%eax
+	xorl	%ebx,%r14d
+	addl	%r15d,%r12d
+	movl	%ecx,%r15d
+
+	rorl	$6,%r13d
+	andl	%ebx,%eax
+	andl	%edx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%eax
+
+	addl	%r12d,%r8d
+	addl	%r12d,%eax
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%eax
+
+	movl	36(%rsp),%r13d
+	movl	24(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	4(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	32(%rsp),%r12d
+	movl	%r8d,%r13d
+	addl	%r14d,%r12d
+	movl	%eax,%r14d
+	rorl	$14,%r13d
+	movl	%r9d,%r15d
+	movl	%r12d,32(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r11d,%r12d
+	xorl	%eax,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r8d,%r15d
+	movl	%ebx,%r11d
+
+	rorl	$11,%r14d
+	xorl	%r8d,%r13d
+	xorl	%r10d,%r15d
+
+	xorl	%ecx,%r11d
+	xorl	%eax,%r14d
+	addl	%r15d,%r12d
+	movl	%ebx,%r15d
+
+	rorl	$6,%r13d
+	andl	%eax,%r11d
+	andl	%ecx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r11d
+
+	addl	%r12d,%edx
+	addl	%r12d,%r11d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r11d
+
+	movl	40(%rsp),%r13d
+	movl	28(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	8(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	36(%rsp),%r12d
+	movl	%edx,%r13d
+	addl	%r14d,%r12d
+	movl	%r11d,%r14d
+	rorl	$14,%r13d
+	movl	%r8d,%r15d
+	movl	%r12d,36(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r10d,%r12d
+	xorl	%r11d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%edx,%r15d
+	movl	%eax,%r10d
+
+	rorl	$11,%r14d
+	xorl	%edx,%r13d
+	xorl	%r9d,%r15d
+
+	xorl	%ebx,%r10d
+	xorl	%r11d,%r14d
+	addl	%r15d,%r12d
+	movl	%eax,%r15d
+
+	rorl	$6,%r13d
+	andl	%r11d,%r10d
+	andl	%ebx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r10d
+
+	addl	%r12d,%ecx
+	addl	%r12d,%r10d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r10d
+
+	movl	44(%rsp),%r13d
+	movl	32(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	12(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	40(%rsp),%r12d
+	movl	%ecx,%r13d
+	addl	%r14d,%r12d
+	movl	%r10d,%r14d
+	rorl	$14,%r13d
+	movl	%edx,%r15d
+	movl	%r12d,40(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r15d
+
+	rorl	$5,%r13d
+	addl	%r9d,%r12d
+	xorl	%r10d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%ecx,%r15d
+	movl	%r11d,%r9d
+
+	rorl	$11,%r14d
+	xorl	%ecx,%r13d
+	xorl	%r8d,%r15d
+
+	xorl	%eax,%r9d
+	xorl	%r10d,%r14d
+	addl	%r15d,%r12d
+	movl	%r11d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r10d,%r9d
+	andl	%eax,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r9d
+
+	addl	%r12d,%ebx
+	addl	%r12d,%r9d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r9d
+
+	movl	48(%rsp),%r13d
+	movl	36(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	16(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	44(%rsp),%r12d
+	movl	%ebx,%r13d
+	addl	%r14d,%r12d
+	movl	%r9d,%r14d
+	rorl	$14,%r13d
+	movl	%ecx,%r15d
+	movl	%r12d,44(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r15d
+
+	rorl	$5,%r13d
+	addl	%r8d,%r12d
+	xorl	%r9d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%ebx,%r15d
+	movl	%r10d,%r8d
+
+	rorl	$11,%r14d
+	xorl	%ebx,%r13d
+	xorl	%edx,%r15d
+
+	xorl	%r11d,%r8d
+	xorl	%r9d,%r14d
+	addl	%r15d,%r12d
+	movl	%r10d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r9d,%r8d
+	andl	%r11d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%r8d
+
+	addl	%r12d,%eax
+	addl	%r12d,%r8d
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%r8d
+
+	movl	52(%rsp),%r13d
+	movl	40(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	20(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	48(%rsp),%r12d
+	movl	%eax,%r13d
+	addl	%r14d,%r12d
+	movl	%r8d,%r14d
+	rorl	$14,%r13d
+	movl	%ebx,%r15d
+	movl	%r12d,48(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r15d
+
+	rorl	$5,%r13d
+	addl	%edx,%r12d
+	xorl	%r8d,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%eax,%r15d
+	movl	%r9d,%edx
+
+	rorl	$11,%r14d
+	xorl	%eax,%r13d
+	xorl	%ecx,%r15d
+
+	xorl	%r10d,%edx
+	xorl	%r8d,%r14d
+	addl	%r15d,%r12d
+	movl	%r9d,%r15d
+
+	rorl	$6,%r13d
+	andl	%r8d,%edx
+	andl	%r10d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%edx
+
+	addl	%r12d,%r11d
+	addl	%r12d,%edx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%edx
+
+	movl	56(%rsp),%r13d
+	movl	44(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	24(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	52(%rsp),%r12d
+	movl	%r11d,%r13d
+	addl	%r14d,%r12d
+	movl	%edx,%r14d
+	rorl	$14,%r13d
+	movl	%eax,%r15d
+	movl	%r12d,52(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r15d
+
+	rorl	$5,%r13d
+	addl	%ecx,%r12d
+	xorl	%edx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r11d,%r15d
+	movl	%r8d,%ecx
+
+	rorl	$11,%r14d
+	xorl	%r11d,%r13d
+	xorl	%ebx,%r15d
+
+	xorl	%r9d,%ecx
+	xorl	%edx,%r14d
+	addl	%r15d,%r12d
+	movl	%r8d,%r15d
+
+	rorl	$6,%r13d
+	andl	%edx,%ecx
+	andl	%r9d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%ecx
+
+	addl	%r12d,%r10d
+	addl	%r12d,%ecx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%ecx
+
+	movl	60(%rsp),%r13d
+	movl	48(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	28(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	56(%rsp),%r12d
+	movl	%r10d,%r13d
+	addl	%r14d,%r12d
+	movl	%ecx,%r14d
+	rorl	$14,%r13d
+	movl	%r11d,%r15d
+	movl	%r12d,56(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r15d
+
+	rorl	$5,%r13d
+	addl	%ebx,%r12d
+	xorl	%ecx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r10d,%r15d
+	movl	%edx,%ebx
+
+	rorl	$11,%r14d
+	xorl	%r10d,%r13d
+	xorl	%eax,%r15d
+
+	xorl	%r8d,%ebx
+	xorl	%ecx,%r14d
+	addl	%r15d,%r12d
+	movl	%edx,%r15d
+
+	rorl	$6,%r13d
+	andl	%ecx,%ebx
+	andl	%r8d,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%ebx
+
+	addl	%r12d,%r9d
+	addl	%r12d,%ebx
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%ebx
+
+	movl	0(%rsp),%r13d
+	movl	52(%rsp),%r14d
+	movl	%r13d,%r12d
+	movl	%r14d,%r15d
+
+	rorl	$11,%r12d
+	xorl	%r13d,%r12d
+	shrl	$3,%r13d
+
+	rorl	$7,%r12d
+	xorl	%r12d,%r13d
+	movl	32(%rsp),%r12d
+
+	rorl	$2,%r15d
+	xorl	%r14d,%r15d
+	shrl	$10,%r14d
+
+	rorl	$17,%r15d
+	addl	%r13d,%r12d
+	xorl	%r15d,%r14d
+
+	addl	60(%rsp),%r12d
+	movl	%r9d,%r13d
+	addl	%r14d,%r12d
+	movl	%ebx,%r14d
+	rorl	$14,%r13d
+	movl	%r10d,%r15d
+	movl	%r12d,60(%rsp)
+
+	rorl	$9,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r15d
+
+	rorl	$5,%r13d
+	addl	%eax,%r12d
+	xorl	%ebx,%r14d
+
+	addl	(%rbp,%rdi,4),%r12d
+	andl	%r9d,%r15d
+	movl	%ecx,%eax
+
+	rorl	$11,%r14d
+	xorl	%r9d,%r13d
+	xorl	%r11d,%r15d
+
+	xorl	%edx,%eax
+	xorl	%ebx,%r14d
+	addl	%r15d,%r12d
+	movl	%ecx,%r15d
+
+	rorl	$6,%r13d
+	andl	%ebx,%eax
+	andl	%edx,%r15d
+
+	rorl	$2,%r14d
+	addl	%r13d,%r12d
+	addl	%r15d,%eax
+
+	addl	%r12d,%r8d
+	addl	%r12d,%eax
+	leaq	1(%rdi),%rdi
+	addl	%r14d,%eax
+
+	cmpq	$64,%rdi
+	jb	.Lrounds_16_xx
+
+	movq	64+0(%rsp),%rdi
+	leaq	64(%rsi),%rsi
+
+	addl	0(%rdi),%eax
+	addl	4(%rdi),%ebx
+	addl	8(%rdi),%ecx
+	addl	12(%rdi),%edx
+	addl	16(%rdi),%r8d
+	addl	20(%rdi),%r9d
+	addl	24(%rdi),%r10d
+	addl	28(%rdi),%r11d
+
+	cmpq	64+16(%rsp),%rsi
+
+	movl	%eax,0(%rdi)
+	movl	%ebx,4(%rdi)
+	movl	%ecx,8(%rdi)
+	movl	%edx,12(%rdi)
+	movl	%r8d,16(%rdi)
+	movl	%r9d,20(%rdi)
+	movl	%r10d,24(%rdi)
+	movl	%r11d,28(%rdi)
+	jb	.Lloop
+
+	movq	64+24(%rsp),%rsi
+	movq	(%rsi),%r15
+	movq	8(%rsi),%r14
+	movq	16(%rsi),%r13
+	movq	24(%rsi),%r12
+	movq	32(%rsi),%rbp
+	movq	40(%rsi),%rbx
+	leaq	48(%rsi),%rsp
+.Lepilogue:
+	movq	8(%rsp),%rdi
+	movq	16(%rsp),%rsi
+	retq
+.LSEH_end_sha256_block_data_order:
+.p2align	6
+
+K256:
+.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
diff --git a/ext/libressl/crypto/sha/sha256.c b/ext/libressl/crypto/sha/sha256.c
new file mode 100644
index 0000000..9c05d3b
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha256.c
@@ -0,0 +1,284 @@
+/* $OpenBSD: sha256.c,v 1.10 2019/01/21 23:20:31 jsg Exp $ */
+/* ====================================================================
+ * Copyright (c) 2004 The OpenSSL Project.  All rights reserved
+ * according to the OpenSSL license [found in ../../LICENSE].
+ * ====================================================================
+ */
+
+#include <openssl/opensslconf.h>
+
+#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA256)
+
+#include <machine/endian.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <openssl/crypto.h>
+#include <openssl/sha.h>
+#include <openssl/opensslv.h>
+
+int SHA224_Init(SHA256_CTX *c)
+	{
+	memset (c,0,sizeof(*c));
+	c->h[0]=0xc1059ed8UL;	c->h[1]=0x367cd507UL;
+	c->h[2]=0x3070dd17UL;	c->h[3]=0xf70e5939UL;
+	c->h[4]=0xffc00b31UL;	c->h[5]=0x68581511UL;
+	c->h[6]=0x64f98fa7UL;	c->h[7]=0xbefa4fa4UL;
+	c->md_len=SHA224_DIGEST_LENGTH;
+	return 1;
+	}
+
+int SHA256_Init(SHA256_CTX *c)
+	{
+	memset (c,0,sizeof(*c));
+	c->h[0]=0x6a09e667UL;	c->h[1]=0xbb67ae85UL;
+	c->h[2]=0x3c6ef372UL;	c->h[3]=0xa54ff53aUL;
+	c->h[4]=0x510e527fUL;	c->h[5]=0x9b05688cUL;
+	c->h[6]=0x1f83d9abUL;	c->h[7]=0x5be0cd19UL;
+	c->md_len=SHA256_DIGEST_LENGTH;
+	return 1;
+	}
+
+unsigned char *SHA224(const unsigned char *d, size_t n, unsigned char *md)
+	{
+	SHA256_CTX c;
+	static unsigned char m[SHA224_DIGEST_LENGTH];
+
+	if (md == NULL) md=m;
+	SHA224_Init(&c);
+	SHA256_Update(&c,d,n);
+	SHA256_Final(md,&c);
+	explicit_bzero(&c,sizeof(c));
+	return(md);
+	}
+
+unsigned char *SHA256(const unsigned char *d, size_t n, unsigned char *md)
+	{
+	SHA256_CTX c;
+	static unsigned char m[SHA256_DIGEST_LENGTH];
+
+	if (md == NULL) md=m;
+	SHA256_Init(&c);
+	SHA256_Update(&c,d,n);
+	SHA256_Final(md,&c);
+	explicit_bzero(&c,sizeof(c));
+	return(md);
+	}
+
+int SHA224_Update(SHA256_CTX *c, const void *data, size_t len)
+{   return SHA256_Update (c,data,len);   }
+int SHA224_Final (unsigned char *md, SHA256_CTX *c)
+{   return SHA256_Final (md,c);   }
+
+#define	DATA_ORDER_IS_BIG_ENDIAN
+
+#define	HASH_LONG		SHA_LONG
+#define	HASH_CTX		SHA256_CTX
+#define	HASH_CBLOCK		SHA_CBLOCK
+/*
+ * Note that FIPS180-2 discusses "Truncation of the Hash Function Output."
+ * default: case below covers for it. It's not clear however if it's
+ * permitted to truncate to amount of bytes not divisible by 4. I bet not,
+ * but if it is, then default: case shall be extended. For reference.
+ * Idea behind separate cases for pre-defined lengths is to let the
+ * compiler decide if it's appropriate to unroll small loops.
+ */
+#define	HASH_MAKE_STRING(c,s)	do {	\
+	unsigned long ll;		\
+	unsigned int  nn;		\
+	switch ((c)->md_len)		\
+	{   case SHA224_DIGEST_LENGTH:	\
+		for (nn=0;nn<SHA224_DIGEST_LENGTH/4;nn++)	\
+		{   ll=(c)->h[nn]; HOST_l2c(ll,(s));   }	\
+		break;			\
+	    case SHA256_DIGEST_LENGTH:	\
+		for (nn=0;nn<SHA256_DIGEST_LENGTH/4;nn++)	\
+		{   ll=(c)->h[nn]; HOST_l2c(ll,(s));   }	\
+		break;			\
+	    default:			\
+		if ((c)->md_len > SHA256_DIGEST_LENGTH)	\
+		    return 0;				\
+		for (nn=0;nn<(c)->md_len/4;nn++)		\
+		{   ll=(c)->h[nn]; HOST_l2c(ll,(s));   }	\
+		break;			\
+	}				\
+	} while (0)
+
+#define	HASH_UPDATE		SHA256_Update
+#define	HASH_TRANSFORM		SHA256_Transform
+#define	HASH_FINAL		SHA256_Final
+#define	HASH_BLOCK_DATA_ORDER	sha256_block_data_order
+#ifndef SHA256_ASM
+static
+#endif
+void sha256_block_data_order (SHA256_CTX *ctx, const void *in, size_t num);
+
+#include "md32_common.h"
+
+#ifndef SHA256_ASM
+static const SHA_LONG K256[64] = {
+	0x428a2f98UL,0x71374491UL,0xb5c0fbcfUL,0xe9b5dba5UL,
+	0x3956c25bUL,0x59f111f1UL,0x923f82a4UL,0xab1c5ed5UL,
+	0xd807aa98UL,0x12835b01UL,0x243185beUL,0x550c7dc3UL,
+	0x72be5d74UL,0x80deb1feUL,0x9bdc06a7UL,0xc19bf174UL,
+	0xe49b69c1UL,0xefbe4786UL,0x0fc19dc6UL,0x240ca1ccUL,
+	0x2de92c6fUL,0x4a7484aaUL,0x5cb0a9dcUL,0x76f988daUL,
+	0x983e5152UL,0xa831c66dUL,0xb00327c8UL,0xbf597fc7UL,
+	0xc6e00bf3UL,0xd5a79147UL,0x06ca6351UL,0x14292967UL,
+	0x27b70a85UL,0x2e1b2138UL,0x4d2c6dfcUL,0x53380d13UL,
+	0x650a7354UL,0x766a0abbUL,0x81c2c92eUL,0x92722c85UL,
+	0xa2bfe8a1UL,0xa81a664bUL,0xc24b8b70UL,0xc76c51a3UL,
+	0xd192e819UL,0xd6990624UL,0xf40e3585UL,0x106aa070UL,
+	0x19a4c116UL,0x1e376c08UL,0x2748774cUL,0x34b0bcb5UL,
+	0x391c0cb3UL,0x4ed8aa4aUL,0x5b9cca4fUL,0x682e6ff3UL,
+	0x748f82eeUL,0x78a5636fUL,0x84c87814UL,0x8cc70208UL,
+	0x90befffaUL,0xa4506cebUL,0xbef9a3f7UL,0xc67178f2UL };
+
+/*
+ * FIPS specification refers to right rotations, while our ROTATE macro
+ * is left one. This is why you might notice that rotation coefficients
+ * differ from those observed in FIPS document by 32-N...
+ */
+#define Sigma0(x)	(ROTATE((x),30) ^ ROTATE((x),19) ^ ROTATE((x),10))
+#define Sigma1(x)	(ROTATE((x),26) ^ ROTATE((x),21) ^ ROTATE((x),7))
+#define sigma0(x)	(ROTATE((x),25) ^ ROTATE((x),14) ^ ((x)>>3))
+#define sigma1(x)	(ROTATE((x),15) ^ ROTATE((x),13) ^ ((x)>>10))
+
+#define Ch(x,y,z)	(((x) & (y)) ^ ((~(x)) & (z)))
+#define Maj(x,y,z)	(((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
+
+#ifdef OPENSSL_SMALL_FOOTPRINT
+
+static void sha256_block_data_order (SHA256_CTX *ctx, const void *in, size_t num)
+	{
+	unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1,T2;
+	SHA_LONG	X[16],l;
+	int i;
+	const unsigned char *data=in;
+
+			while (num--) {
+
+	a = ctx->h[0];	b = ctx->h[1];	c = ctx->h[2];	d = ctx->h[3];
+	e = ctx->h[4];	f = ctx->h[5];	g = ctx->h[6];	h = ctx->h[7];
+
+	for (i=0;i<16;i++)
+		{
+		HOST_c2l(data,l); T1 = X[i] = l;
+		T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];
+		T2 = Sigma0(a) + Maj(a,b,c);
+		h = g;	g = f;	f = e;	e = d + T1;
+		d = c;	c = b;	b = a;	a = T1 + T2;
+		}
+
+	for (;i<64;i++)
+		{
+		s0 = X[(i+1)&0x0f];	s0 = sigma0(s0);
+		s1 = X[(i+14)&0x0f];	s1 = sigma1(s1);
+
+		T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
+		T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];
+		T2 = Sigma0(a) + Maj(a,b,c);
+		h = g;	g = f;	f = e;	e = d + T1;
+		d = c;	c = b;	b = a;	a = T1 + T2;
+		}
+
+	ctx->h[0] += a;	ctx->h[1] += b;	ctx->h[2] += c;	ctx->h[3] += d;
+	ctx->h[4] += e;	ctx->h[5] += f;	ctx->h[6] += g;	ctx->h[7] += h;
+
+			}
+}
+
+#else
+
+#define	ROUND_00_15(i,a,b,c,d,e,f,g,h)		do {	\
+	T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];	\
+	h = Sigma0(a) + Maj(a,b,c);			\
+	d += T1;	h += T1;		} while (0)
+
+#define	ROUND_16_63(i,a,b,c,d,e,f,g,h,X)	do {	\
+	s0 = X[(i+1)&0x0f];	s0 = sigma0(s0);	\
+	s1 = X[(i+14)&0x0f];	s1 = sigma1(s1);	\
+	T1 = X[(i)&0x0f] += s0 + s1 + X[(i+9)&0x0f];	\
+	ROUND_00_15(i,a,b,c,d,e,f,g,h);		} while (0)
+
+static void sha256_block_data_order (SHA256_CTX *ctx, const void *in, size_t num)
+	{
+	unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1;
+	SHA_LONG	X[16];
+	int i;
+	const unsigned char *data=in;
+
+			while (num--) {
+
+	a = ctx->h[0];	b = ctx->h[1];	c = ctx->h[2];	d = ctx->h[3];
+	e = ctx->h[4];	f = ctx->h[5];	g = ctx->h[6];	h = ctx->h[7];
+
+	if (BYTE_ORDER != LITTLE_ENDIAN &&
+	    sizeof(SHA_LONG)==4 && ((size_t)in%4)==0)
+		{
+		const SHA_LONG *W=(const SHA_LONG *)data;
+
+		T1 = X[0] = W[0];	ROUND_00_15(0,a,b,c,d,e,f,g,h);
+		T1 = X[1] = W[1];	ROUND_00_15(1,h,a,b,c,d,e,f,g);
+		T1 = X[2] = W[2];	ROUND_00_15(2,g,h,a,b,c,d,e,f);
+		T1 = X[3] = W[3];	ROUND_00_15(3,f,g,h,a,b,c,d,e);
+		T1 = X[4] = W[4];	ROUND_00_15(4,e,f,g,h,a,b,c,d);
+		T1 = X[5] = W[5];	ROUND_00_15(5,d,e,f,g,h,a,b,c);
+		T1 = X[6] = W[6];	ROUND_00_15(6,c,d,e,f,g,h,a,b);
+		T1 = X[7] = W[7];	ROUND_00_15(7,b,c,d,e,f,g,h,a);
+		T1 = X[8] = W[8];	ROUND_00_15(8,a,b,c,d,e,f,g,h);
+		T1 = X[9] = W[9];	ROUND_00_15(9,h,a,b,c,d,e,f,g);
+		T1 = X[10] = W[10];	ROUND_00_15(10,g,h,a,b,c,d,e,f);
+		T1 = X[11] = W[11];	ROUND_00_15(11,f,g,h,a,b,c,d,e);
+		T1 = X[12] = W[12];	ROUND_00_15(12,e,f,g,h,a,b,c,d);
+		T1 = X[13] = W[13];	ROUND_00_15(13,d,e,f,g,h,a,b,c);
+		T1 = X[14] = W[14];	ROUND_00_15(14,c,d,e,f,g,h,a,b);
+		T1 = X[15] = W[15];	ROUND_00_15(15,b,c,d,e,f,g,h,a);
+
+		data += SHA256_CBLOCK;
+		}
+	else
+		{
+		SHA_LONG l;
+
+		HOST_c2l(data,l); T1 = X[0] = l;  ROUND_00_15(0,a,b,c,d,e,f,g,h);
+		HOST_c2l(data,l); T1 = X[1] = l;  ROUND_00_15(1,h,a,b,c,d,e,f,g);
+		HOST_c2l(data,l); T1 = X[2] = l;  ROUND_00_15(2,g,h,a,b,c,d,e,f);
+		HOST_c2l(data,l); T1 = X[3] = l;  ROUND_00_15(3,f,g,h,a,b,c,d,e);
+		HOST_c2l(data,l); T1 = X[4] = l;  ROUND_00_15(4,e,f,g,h,a,b,c,d);
+		HOST_c2l(data,l); T1 = X[5] = l;  ROUND_00_15(5,d,e,f,g,h,a,b,c);
+		HOST_c2l(data,l); T1 = X[6] = l;  ROUND_00_15(6,c,d,e,f,g,h,a,b);
+		HOST_c2l(data,l); T1 = X[7] = l;  ROUND_00_15(7,b,c,d,e,f,g,h,a);
+		HOST_c2l(data,l); T1 = X[8] = l;  ROUND_00_15(8,a,b,c,d,e,f,g,h);
+		HOST_c2l(data,l); T1 = X[9] = l;  ROUND_00_15(9,h,a,b,c,d,e,f,g);
+		HOST_c2l(data,l); T1 = X[10] = l; ROUND_00_15(10,g,h,a,b,c,d,e,f);
+		HOST_c2l(data,l); T1 = X[11] = l; ROUND_00_15(11,f,g,h,a,b,c,d,e);
+		HOST_c2l(data,l); T1 = X[12] = l; ROUND_00_15(12,e,f,g,h,a,b,c,d);
+		HOST_c2l(data,l); T1 = X[13] = l; ROUND_00_15(13,d,e,f,g,h,a,b,c);
+		HOST_c2l(data,l); T1 = X[14] = l; ROUND_00_15(14,c,d,e,f,g,h,a,b);
+		HOST_c2l(data,l); T1 = X[15] = l; ROUND_00_15(15,b,c,d,e,f,g,h,a);
+		}
+
+	for (i=16;i<64;i+=8)
+		{
+		ROUND_16_63(i+0,a,b,c,d,e,f,g,h,X);
+		ROUND_16_63(i+1,h,a,b,c,d,e,f,g,X);
+		ROUND_16_63(i+2,g,h,a,b,c,d,e,f,X);
+		ROUND_16_63(i+3,f,g,h,a,b,c,d,e,X);
+		ROUND_16_63(i+4,e,f,g,h,a,b,c,d,X);
+		ROUND_16_63(i+5,d,e,f,g,h,a,b,c,X);
+		ROUND_16_63(i+6,c,d,e,f,g,h,a,b,X);
+		ROUND_16_63(i+7,b,c,d,e,f,g,h,a,X);
+		}
+
+	ctx->h[0] += a;	ctx->h[1] += b;	ctx->h[2] += c;	ctx->h[3] += d;
+	ctx->h[4] += e;	ctx->h[5] += f;	ctx->h[6] += g;	ctx->h[7] += h;
+
+			}
+	}
+
+#endif
+#endif /* SHA256_ASM */
+
+#endif /* OPENSSL_NO_SHA256 */
diff --git a/ext/libressl/crypto/sha/sha512-elf-armv4.S b/ext/libressl/crypto/sha/sha512-elf-armv4.S
new file mode 100644
index 0000000..8abf8d5
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha512-elf-armv4.S
@@ -0,0 +1,1786 @@
+#include "arm_arch.h"
+#ifdef __ARMEL__
+# define LO 0
+# define HI 4
+# define WORD64(hi0,lo0,hi1,lo1)	.word	lo0,hi0, lo1,hi1
+#else
+# define HI 0
+# define LO 4
+# define WORD64(hi0,lo0,hi1,lo1)	.word	hi0,lo0, hi1,lo1
+#endif
+
+.text
+.code	32
+.type	K512,%object
+.align	5
+K512:
+WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd)
+WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc)
+WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019)
+WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118)
+WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe)
+WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2)
+WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1)
+WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694)
+WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3)
+WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65)
+WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483)
+WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5)
+WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210)
+WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4)
+WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725)
+WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70)
+WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926)
+WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df)
+WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8)
+WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b)
+WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001)
+WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30)
+WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910)
+WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8)
+WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53)
+WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8)
+WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb)
+WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3)
+WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60)
+WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec)
+WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9)
+WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b)
+WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207)
+WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178)
+WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6)
+WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b)
+WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493)
+WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
+WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
+WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
+.size	K512,.-K512
+.LOPENSSL_armcap:
+.word	OPENSSL_armcap_P-sha512_block_data_order
+.skip	32-4
+
+.global	sha512_block_data_order
+.type	sha512_block_data_order,%function
+sha512_block_data_order:
+	sub	r3,pc,#8		@ sha512_block_data_order
+	add	r2,r1,r2,lsl#7	@ len to point at the end of inp
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+	ldr	r12,.LOPENSSL_armcap
+	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
+	tst	r12,#1
+	bne	.LNEON
+#endif
+	stmdb	sp!,{r4-r12,lr}
+	sub	r14,r3,#672		@ K512
+	sub	sp,sp,#9*8
+
+	ldr	r7,[r0,#32+LO]
+	ldr	r8,[r0,#32+HI]
+	ldr	r9, [r0,#48+LO]
+	ldr	r10, [r0,#48+HI]
+	ldr	r11, [r0,#56+LO]
+	ldr	r12, [r0,#56+HI]
+.Loop:
+	str	r9, [sp,#48+0]
+	str	r10, [sp,#48+4]
+	str	r11, [sp,#56+0]
+	str	r12, [sp,#56+4]
+	ldr	r5,[r0,#0+LO]
+	ldr	r6,[r0,#0+HI]
+	ldr	r3,[r0,#8+LO]
+	ldr	r4,[r0,#8+HI]
+	ldr	r9, [r0,#16+LO]
+	ldr	r10, [r0,#16+HI]
+	ldr	r11, [r0,#24+LO]
+	ldr	r12, [r0,#24+HI]
+	str	r3,[sp,#8+0]
+	str	r4,[sp,#8+4]
+	str	r9, [sp,#16+0]
+	str	r10, [sp,#16+4]
+	str	r11, [sp,#24+0]
+	str	r12, [sp,#24+4]
+	ldr	r3,[r0,#40+LO]
+	ldr	r4,[r0,#40+HI]
+	str	r3,[sp,#40+0]
+	str	r4,[sp,#40+4]
+
+.L00_15:
+#if __ARM_ARCH__<7 || defined(__STRICT_ALIGNMENT)
+	ldrb	r3,[r1,#7]
+	ldrb	r9, [r1,#6]
+	ldrb	r10, [r1,#5]
+	ldrb	r11, [r1,#4]
+	ldrb	r4,[r1,#3]
+	ldrb	r12, [r1,#2]
+	orr	r3,r3,r9,lsl#8
+	ldrb	r9, [r1,#1]
+	orr	r3,r3,r10,lsl#16
+	ldrb	r10, [r1],#8
+	orr	r3,r3,r11,lsl#24
+	orr	r4,r4,r12,lsl#8
+	orr	r4,r4,r9,lsl#16
+	orr	r4,r4,r10,lsl#24
+#else
+	ldr	r3,[r1,#4]
+	ldr	r4,[r1],#8
+#ifdef __ARMEL__
+	rev	r3,r3
+	rev	r4,r4
+#endif
+#endif
+	@ Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
+	@ LO		lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
+	@ HI		hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
+	mov	r9,r7,lsr#14
+	str	r3,[sp,#64+0]
+	mov	r10,r8,lsr#14
+	str	r4,[sp,#64+4]
+	eor	r9,r9,r8,lsl#18
+	ldr	r11,[sp,#56+0]	@ h.lo
+	eor	r10,r10,r7,lsl#18
+	ldr	r12,[sp,#56+4]	@ h.hi
+	eor	r9,r9,r7,lsr#18
+	eor	r10,r10,r8,lsr#18
+	eor	r9,r9,r8,lsl#14
+	eor	r10,r10,r7,lsl#14
+	eor	r9,r9,r8,lsr#9
+	eor	r10,r10,r7,lsr#9
+	eor	r9,r9,r7,lsl#23
+	eor	r10,r10,r8,lsl#23	@ Sigma1(e)
+	adds	r3,r3,r9
+	ldr	r9,[sp,#40+0]	@ f.lo
+	adc	r4,r4,r10		@ T += Sigma1(e)
+	ldr	r10,[sp,#40+4]	@ f.hi
+	adds	r3,r3,r11
+	ldr	r11,[sp,#48+0]	@ g.lo
+	adc	r4,r4,r12		@ T += h
+	ldr	r12,[sp,#48+4]	@ g.hi
+
+	eor	r9,r9,r11
+	str	r7,[sp,#32+0]
+	eor	r10,r10,r12
+	str	r8,[sp,#32+4]
+	and	r9,r9,r7
+	str	r5,[sp,#0+0]
+	and	r10,r10,r8
+	str	r6,[sp,#0+4]
+	eor	r9,r9,r11
+	ldr	r11,[r14,#LO]	@ K[i].lo
+	eor	r10,r10,r12		@ Ch(e,f,g)
+	ldr	r12,[r14,#HI]	@ K[i].hi
+
+	adds	r3,r3,r9
+	ldr	r7,[sp,#24+0]	@ d.lo
+	adc	r4,r4,r10		@ T += Ch(e,f,g)
+	ldr	r8,[sp,#24+4]	@ d.hi
+	adds	r3,r3,r11
+	and	r9,r11,#0xff
+	adc	r4,r4,r12		@ T += K[i]
+	adds	r7,r7,r3
+	ldr	r11,[sp,#8+0]	@ b.lo
+	adc	r8,r8,r4		@ d += T
+	teq	r9,#148
+
+	ldr	r12,[sp,#16+0]	@ c.lo
+	orreq	r14,r14,#1
+	@ Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
+	@ LO		lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
+	@ HI		hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
+	mov	r9,r5,lsr#28
+	mov	r10,r6,lsr#28
+	eor	r9,r9,r6,lsl#4
+	eor	r10,r10,r5,lsl#4
+	eor	r9,r9,r6,lsr#2
+	eor	r10,r10,r5,lsr#2
+	eor	r9,r9,r5,lsl#30
+	eor	r10,r10,r6,lsl#30
+	eor	r9,r9,r6,lsr#7
+	eor	r10,r10,r5,lsr#7
+	eor	r9,r9,r5,lsl#25
+	eor	r10,r10,r6,lsl#25	@ Sigma0(a)
+	adds	r3,r3,r9
+	and	r9,r5,r11
+	adc	r4,r4,r10		@ T += Sigma0(a)
+
+	ldr	r10,[sp,#8+4]	@ b.hi
+	orr	r5,r5,r11
+	ldr	r11,[sp,#16+4]	@ c.hi
+	and	r5,r5,r12
+	and	r12,r6,r10
+	orr	r6,r6,r10
+	orr	r5,r5,r9		@ Maj(a,b,c).lo
+	and	r6,r6,r11
+	adds	r5,r5,r3
+	orr	r6,r6,r12		@ Maj(a,b,c).hi
+	sub	sp,sp,#8
+	adc	r6,r6,r4		@ h += T
+	tst	r14,#1
+	add	r14,r14,#8
+	tst	r14,#1
+	beq	.L00_15
+	ldr	r9,[sp,#184+0]
+	ldr	r10,[sp,#184+4]
+	bic	r14,r14,#1
+.L16_79:
+	@ sigma0(x)	(ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
+	@ LO		lo>>1^hi<<31  ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
+	@ HI		hi>>1^lo<<31  ^ hi>>8^lo<<24 ^ hi>>7
+	mov	r3,r9,lsr#1
+	ldr	r11,[sp,#80+0]
+	mov	r4,r10,lsr#1
+	ldr	r12,[sp,#80+4]
+	eor	r3,r3,r10,lsl#31
+	eor	r4,r4,r9,lsl#31
+	eor	r3,r3,r9,lsr#8
+	eor	r4,r4,r10,lsr#8
+	eor	r3,r3,r10,lsl#24
+	eor	r4,r4,r9,lsl#24
+	eor	r3,r3,r9,lsr#7
+	eor	r4,r4,r10,lsr#7
+	eor	r3,r3,r10,lsl#25
+
+	@ sigma1(x)	(ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
+	@ LO		lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
+	@ HI		hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
+	mov	r9,r11,lsr#19
+	mov	r10,r12,lsr#19
+	eor	r9,r9,r12,lsl#13
+	eor	r10,r10,r11,lsl#13
+	eor	r9,r9,r12,lsr#29
+	eor	r10,r10,r11,lsr#29
+	eor	r9,r9,r11,lsl#3
+	eor	r10,r10,r12,lsl#3
+	eor	r9,r9,r11,lsr#6
+	eor	r10,r10,r12,lsr#6
+	ldr	r11,[sp,#120+0]
+	eor	r9,r9,r12,lsl#26
+
+	ldr	r12,[sp,#120+4]
+	adds	r3,r3,r9
+	ldr	r9,[sp,#192+0]
+	adc	r4,r4,r10
+
+	ldr	r10,[sp,#192+4]
+	adds	r3,r3,r11
+	adc	r4,r4,r12
+	adds	r3,r3,r9
+	adc	r4,r4,r10
+	@ Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
+	@ LO		lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
+	@ HI		hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
+	mov	r9,r7,lsr#14
+	str	r3,[sp,#64+0]
+	mov	r10,r8,lsr#14
+	str	r4,[sp,#64+4]
+	eor	r9,r9,r8,lsl#18
+	ldr	r11,[sp,#56+0]	@ h.lo
+	eor	r10,r10,r7,lsl#18
+	ldr	r12,[sp,#56+4]	@ h.hi
+	eor	r9,r9,r7,lsr#18
+	eor	r10,r10,r8,lsr#18
+	eor	r9,r9,r8,lsl#14
+	eor	r10,r10,r7,lsl#14
+	eor	r9,r9,r8,lsr#9
+	eor	r10,r10,r7,lsr#9
+	eor	r9,r9,r7,lsl#23
+	eor	r10,r10,r8,lsl#23	@ Sigma1(e)
+	adds	r3,r3,r9
+	ldr	r9,[sp,#40+0]	@ f.lo
+	adc	r4,r4,r10		@ T += Sigma1(e)
+	ldr	r10,[sp,#40+4]	@ f.hi
+	adds	r3,r3,r11
+	ldr	r11,[sp,#48+0]	@ g.lo
+	adc	r4,r4,r12		@ T += h
+	ldr	r12,[sp,#48+4]	@ g.hi
+
+	eor	r9,r9,r11
+	str	r7,[sp,#32+0]
+	eor	r10,r10,r12
+	str	r8,[sp,#32+4]
+	and	r9,r9,r7
+	str	r5,[sp,#0+0]
+	and	r10,r10,r8
+	str	r6,[sp,#0+4]
+	eor	r9,r9,r11
+	ldr	r11,[r14,#LO]	@ K[i].lo
+	eor	r10,r10,r12		@ Ch(e,f,g)
+	ldr	r12,[r14,#HI]	@ K[i].hi
+
+	adds	r3,r3,r9
+	ldr	r7,[sp,#24+0]	@ d.lo
+	adc	r4,r4,r10		@ T += Ch(e,f,g)
+	ldr	r8,[sp,#24+4]	@ d.hi
+	adds	r3,r3,r11
+	and	r9,r11,#0xff
+	adc	r4,r4,r12		@ T += K[i]
+	adds	r7,r7,r3
+	ldr	r11,[sp,#8+0]	@ b.lo
+	adc	r8,r8,r4		@ d += T
+	teq	r9,#23
+
+	ldr	r12,[sp,#16+0]	@ c.lo
+	orreq	r14,r14,#1
+	@ Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
+	@ LO		lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
+	@ HI		hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
+	mov	r9,r5,lsr#28
+	mov	r10,r6,lsr#28
+	eor	r9,r9,r6,lsl#4
+	eor	r10,r10,r5,lsl#4
+	eor	r9,r9,r6,lsr#2
+	eor	r10,r10,r5,lsr#2
+	eor	r9,r9,r5,lsl#30
+	eor	r10,r10,r6,lsl#30
+	eor	r9,r9,r6,lsr#7
+	eor	r10,r10,r5,lsr#7
+	eor	r9,r9,r5,lsl#25
+	eor	r10,r10,r6,lsl#25	@ Sigma0(a)
+	adds	r3,r3,r9
+	and	r9,r5,r11
+	adc	r4,r4,r10		@ T += Sigma0(a)
+
+	ldr	r10,[sp,#8+4]	@ b.hi
+	orr	r5,r5,r11
+	ldr	r11,[sp,#16+4]	@ c.hi
+	and	r5,r5,r12
+	and	r12,r6,r10
+	orr	r6,r6,r10
+	orr	r5,r5,r9		@ Maj(a,b,c).lo
+	and	r6,r6,r11
+	adds	r5,r5,r3
+	orr	r6,r6,r12		@ Maj(a,b,c).hi
+	sub	sp,sp,#8
+	adc	r6,r6,r4		@ h += T
+	tst	r14,#1
+	add	r14,r14,#8
+	ldreq	r9,[sp,#184+0]
+	ldreq	r10,[sp,#184+4]
+	beq	.L16_79
+	bic	r14,r14,#1
+
+	ldr	r3,[sp,#8+0]
+	ldr	r4,[sp,#8+4]
+	ldr	r9, [r0,#0+LO]
+	ldr	r10, [r0,#0+HI]
+	ldr	r11, [r0,#8+LO]
+	ldr	r12, [r0,#8+HI]
+	adds	r9,r5,r9
+	str	r9, [r0,#0+LO]
+	adc	r10,r6,r10
+	str	r10, [r0,#0+HI]
+	adds	r11,r3,r11
+	str	r11, [r0,#8+LO]
+	adc	r12,r4,r12
+	str	r12, [r0,#8+HI]
+
+	ldr	r5,[sp,#16+0]
+	ldr	r6,[sp,#16+4]
+	ldr	r3,[sp,#24+0]
+	ldr	r4,[sp,#24+4]
+	ldr	r9, [r0,#16+LO]
+	ldr	r10, [r0,#16+HI]
+	ldr	r11, [r0,#24+LO]
+	ldr	r12, [r0,#24+HI]
+	adds	r9,r5,r9
+	str	r9, [r0,#16+LO]
+	adc	r10,r6,r10
+	str	r10, [r0,#16+HI]
+	adds	r11,r3,r11
+	str	r11, [r0,#24+LO]
+	adc	r12,r4,r12
+	str	r12, [r0,#24+HI]
+
+	ldr	r3,[sp,#40+0]
+	ldr	r4,[sp,#40+4]
+	ldr	r9, [r0,#32+LO]
+	ldr	r10, [r0,#32+HI]
+	ldr	r11, [r0,#40+LO]
+	ldr	r12, [r0,#40+HI]
+	adds	r7,r7,r9
+	str	r7,[r0,#32+LO]
+	adc	r8,r8,r10
+	str	r8,[r0,#32+HI]
+	adds	r11,r3,r11
+	str	r11, [r0,#40+LO]
+	adc	r12,r4,r12
+	str	r12, [r0,#40+HI]
+
+	ldr	r5,[sp,#48+0]
+	ldr	r6,[sp,#48+4]
+	ldr	r3,[sp,#56+0]
+	ldr	r4,[sp,#56+4]
+	ldr	r9, [r0,#48+LO]
+	ldr	r10, [r0,#48+HI]
+	ldr	r11, [r0,#56+LO]
+	ldr	r12, [r0,#56+HI]
+	adds	r9,r5,r9
+	str	r9, [r0,#48+LO]
+	adc	r10,r6,r10
+	str	r10, [r0,#48+HI]
+	adds	r11,r3,r11
+	str	r11, [r0,#56+LO]
+	adc	r12,r4,r12
+	str	r12, [r0,#56+HI]
+
+	add	sp,sp,#640
+	sub	r14,r14,#640
+
+	teq	r1,r2
+	bne	.Loop
+
+	add	sp,sp,#8*9		@ destroy frame
+#if __ARM_ARCH__>=5
+	ldmia	sp!,{r4-r12,pc}
+#else
+	ldmia	sp!,{r4-r12,lr}
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
+#endif
+#if __ARM_ARCH__>=7 && !defined(__STRICT_ALIGNMENT)
+.fpu	neon
+
+.align	4
+.LNEON:
+	dmb				@ errata #451034 on early Cortex A8
+	vstmdb	sp!,{d8-d15}		@ ABI specification says so
+	sub	r3,r3,#672		@ K512
+	vldmia	r0,{d16-d23}		@ load context
+.Loop_neon:
+	vshr.u64	d24,d20,#14	@ 0
+#if 0<16
+	vld1.64		{d0},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d20,#18
+	vshr.u64	d26,d20,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d20,#50
+	vsli.64		d25,d20,#46
+	vsli.64		d26,d20,#23
+#if 0<16 && defined(__ARMEL__)
+	vrev64.8	d0,d0
+#endif
+	vadd.i64	d27,d28,d23
+	veor		d29,d21,d22
+	veor		d24,d25
+	vand		d29,d20
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d22			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d16,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d16,#34
+	vshr.u64	d26,d16,#39
+	vsli.64		d24,d16,#36
+	vsli.64		d25,d16,#30
+	vsli.64		d26,d16,#25
+	vadd.i64	d27,d0
+	vorr		d30,d16,d18
+	vand		d29,d16,d18
+	veor		d23,d24,d25
+	vand		d30,d17
+	veor		d23,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d23,d27
+	vadd.i64	d19,d27
+	vadd.i64	d23,d30
+	vshr.u64	d24,d19,#14	@ 1
+#if 1<16
+	vld1.64		{d1},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d19,#18
+	vshr.u64	d26,d19,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d19,#50
+	vsli.64		d25,d19,#46
+	vsli.64		d26,d19,#23
+#if 1<16 && defined(__ARMEL__)
+	vrev64.8	d1,d1
+#endif
+	vadd.i64	d27,d28,d22
+	veor		d29,d20,d21
+	veor		d24,d25
+	vand		d29,d19
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d21			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d23,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d23,#34
+	vshr.u64	d26,d23,#39
+	vsli.64		d24,d23,#36
+	vsli.64		d25,d23,#30
+	vsli.64		d26,d23,#25
+	vadd.i64	d27,d1
+	vorr		d30,d23,d17
+	vand		d29,d23,d17
+	veor		d22,d24,d25
+	vand		d30,d16
+	veor		d22,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d22,d27
+	vadd.i64	d18,d27
+	vadd.i64	d22,d30
+	vshr.u64	d24,d18,#14	@ 2
+#if 2<16
+	vld1.64		{d2},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d18,#18
+	vshr.u64	d26,d18,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d18,#50
+	vsli.64		d25,d18,#46
+	vsli.64		d26,d18,#23
+#if 2<16 && defined(__ARMEL__)
+	vrev64.8	d2,d2
+#endif
+	vadd.i64	d27,d28,d21
+	veor		d29,d19,d20
+	veor		d24,d25
+	vand		d29,d18
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d20			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d22,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d22,#34
+	vshr.u64	d26,d22,#39
+	vsli.64		d24,d22,#36
+	vsli.64		d25,d22,#30
+	vsli.64		d26,d22,#25
+	vadd.i64	d27,d2
+	vorr		d30,d22,d16
+	vand		d29,d22,d16
+	veor		d21,d24,d25
+	vand		d30,d23
+	veor		d21,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d21,d27
+	vadd.i64	d17,d27
+	vadd.i64	d21,d30
+	vshr.u64	d24,d17,#14	@ 3
+#if 3<16
+	vld1.64		{d3},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d17,#18
+	vshr.u64	d26,d17,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d17,#50
+	vsli.64		d25,d17,#46
+	vsli.64		d26,d17,#23
+#if 3<16 && defined(__ARMEL__)
+	vrev64.8	d3,d3
+#endif
+	vadd.i64	d27,d28,d20
+	veor		d29,d18,d19
+	veor		d24,d25
+	vand		d29,d17
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d19			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d21,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d21,#34
+	vshr.u64	d26,d21,#39
+	vsli.64		d24,d21,#36
+	vsli.64		d25,d21,#30
+	vsli.64		d26,d21,#25
+	vadd.i64	d27,d3
+	vorr		d30,d21,d23
+	vand		d29,d21,d23
+	veor		d20,d24,d25
+	vand		d30,d22
+	veor		d20,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d20,d27
+	vadd.i64	d16,d27
+	vadd.i64	d20,d30
+	vshr.u64	d24,d16,#14	@ 4
+#if 4<16
+	vld1.64		{d4},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d16,#18
+	vshr.u64	d26,d16,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d16,#50
+	vsli.64		d25,d16,#46
+	vsli.64		d26,d16,#23
+#if 4<16 && defined(__ARMEL__)
+	vrev64.8	d4,d4
+#endif
+	vadd.i64	d27,d28,d19
+	veor		d29,d17,d18
+	veor		d24,d25
+	vand		d29,d16
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d18			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d20,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d20,#34
+	vshr.u64	d26,d20,#39
+	vsli.64		d24,d20,#36
+	vsli.64		d25,d20,#30
+	vsli.64		d26,d20,#25
+	vadd.i64	d27,d4
+	vorr		d30,d20,d22
+	vand		d29,d20,d22
+	veor		d19,d24,d25
+	vand		d30,d21
+	veor		d19,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d19,d27
+	vadd.i64	d23,d27
+	vadd.i64	d19,d30
+	vshr.u64	d24,d23,#14	@ 5
+#if 5<16
+	vld1.64		{d5},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d23,#18
+	vshr.u64	d26,d23,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d23,#50
+	vsli.64		d25,d23,#46
+	vsli.64		d26,d23,#23
+#if 5<16 && defined(__ARMEL__)
+	vrev64.8	d5,d5
+#endif
+	vadd.i64	d27,d28,d18
+	veor		d29,d16,d17
+	veor		d24,d25
+	vand		d29,d23
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d17			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d19,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d19,#34
+	vshr.u64	d26,d19,#39
+	vsli.64		d24,d19,#36
+	vsli.64		d25,d19,#30
+	vsli.64		d26,d19,#25
+	vadd.i64	d27,d5
+	vorr		d30,d19,d21
+	vand		d29,d19,d21
+	veor		d18,d24,d25
+	vand		d30,d20
+	veor		d18,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d18,d27
+	vadd.i64	d22,d27
+	vadd.i64	d18,d30
+	vshr.u64	d24,d22,#14	@ 6
+#if 6<16
+	vld1.64		{d6},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d22,#18
+	vshr.u64	d26,d22,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d22,#50
+	vsli.64		d25,d22,#46
+	vsli.64		d26,d22,#23
+#if 6<16 && defined(__ARMEL__)
+	vrev64.8	d6,d6
+#endif
+	vadd.i64	d27,d28,d17
+	veor		d29,d23,d16
+	veor		d24,d25
+	vand		d29,d22
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d16			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d18,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d18,#34
+	vshr.u64	d26,d18,#39
+	vsli.64		d24,d18,#36
+	vsli.64		d25,d18,#30
+	vsli.64		d26,d18,#25
+	vadd.i64	d27,d6
+	vorr		d30,d18,d20
+	vand		d29,d18,d20
+	veor		d17,d24,d25
+	vand		d30,d19
+	veor		d17,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d17,d27
+	vadd.i64	d21,d27
+	vadd.i64	d17,d30
+	vshr.u64	d24,d21,#14	@ 7
+#if 7<16
+	vld1.64		{d7},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d21,#18
+	vshr.u64	d26,d21,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d21,#50
+	vsli.64		d25,d21,#46
+	vsli.64		d26,d21,#23
+#if 7<16 && defined(__ARMEL__)
+	vrev64.8	d7,d7
+#endif
+	vadd.i64	d27,d28,d16
+	veor		d29,d22,d23
+	veor		d24,d25
+	vand		d29,d21
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d23			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d17,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d17,#34
+	vshr.u64	d26,d17,#39
+	vsli.64		d24,d17,#36
+	vsli.64		d25,d17,#30
+	vsli.64		d26,d17,#25
+	vadd.i64	d27,d7
+	vorr		d30,d17,d19
+	vand		d29,d17,d19
+	veor		d16,d24,d25
+	vand		d30,d18
+	veor		d16,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d16,d27
+	vadd.i64	d20,d27
+	vadd.i64	d16,d30
+	vshr.u64	d24,d20,#14	@ 8
+#if 8<16
+	vld1.64		{d8},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d20,#18
+	vshr.u64	d26,d20,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d20,#50
+	vsli.64		d25,d20,#46
+	vsli.64		d26,d20,#23
+#if 8<16 && defined(__ARMEL__)
+	vrev64.8	d8,d8
+#endif
+	vadd.i64	d27,d28,d23
+	veor		d29,d21,d22
+	veor		d24,d25
+	vand		d29,d20
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d22			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d16,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d16,#34
+	vshr.u64	d26,d16,#39
+	vsli.64		d24,d16,#36
+	vsli.64		d25,d16,#30
+	vsli.64		d26,d16,#25
+	vadd.i64	d27,d8
+	vorr		d30,d16,d18
+	vand		d29,d16,d18
+	veor		d23,d24,d25
+	vand		d30,d17
+	veor		d23,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d23,d27
+	vadd.i64	d19,d27
+	vadd.i64	d23,d30
+	vshr.u64	d24,d19,#14	@ 9
+#if 9<16
+	vld1.64		{d9},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d19,#18
+	vshr.u64	d26,d19,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d19,#50
+	vsli.64		d25,d19,#46
+	vsli.64		d26,d19,#23
+#if 9<16 && defined(__ARMEL__)
+	vrev64.8	d9,d9
+#endif
+	vadd.i64	d27,d28,d22
+	veor		d29,d20,d21
+	veor		d24,d25
+	vand		d29,d19
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d21			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d23,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d23,#34
+	vshr.u64	d26,d23,#39
+	vsli.64		d24,d23,#36
+	vsli.64		d25,d23,#30
+	vsli.64		d26,d23,#25
+	vadd.i64	d27,d9
+	vorr		d30,d23,d17
+	vand		d29,d23,d17
+	veor		d22,d24,d25
+	vand		d30,d16
+	veor		d22,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d22,d27
+	vadd.i64	d18,d27
+	vadd.i64	d22,d30
+	vshr.u64	d24,d18,#14	@ 10
+#if 10<16
+	vld1.64		{d10},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d18,#18
+	vshr.u64	d26,d18,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d18,#50
+	vsli.64		d25,d18,#46
+	vsli.64		d26,d18,#23
+#if 10<16 && defined(__ARMEL__)
+	vrev64.8	d10,d10
+#endif
+	vadd.i64	d27,d28,d21
+	veor		d29,d19,d20
+	veor		d24,d25
+	vand		d29,d18
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d20			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d22,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d22,#34
+	vshr.u64	d26,d22,#39
+	vsli.64		d24,d22,#36
+	vsli.64		d25,d22,#30
+	vsli.64		d26,d22,#25
+	vadd.i64	d27,d10
+	vorr		d30,d22,d16
+	vand		d29,d22,d16
+	veor		d21,d24,d25
+	vand		d30,d23
+	veor		d21,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d21,d27
+	vadd.i64	d17,d27
+	vadd.i64	d21,d30
+	vshr.u64	d24,d17,#14	@ 11
+#if 11<16
+	vld1.64		{d11},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d17,#18
+	vshr.u64	d26,d17,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d17,#50
+	vsli.64		d25,d17,#46
+	vsli.64		d26,d17,#23
+#if 11<16 && defined(__ARMEL__)
+	vrev64.8	d11,d11
+#endif
+	vadd.i64	d27,d28,d20
+	veor		d29,d18,d19
+	veor		d24,d25
+	vand		d29,d17
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d19			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d21,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d21,#34
+	vshr.u64	d26,d21,#39
+	vsli.64		d24,d21,#36
+	vsli.64		d25,d21,#30
+	vsli.64		d26,d21,#25
+	vadd.i64	d27,d11
+	vorr		d30,d21,d23
+	vand		d29,d21,d23
+	veor		d20,d24,d25
+	vand		d30,d22
+	veor		d20,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d20,d27
+	vadd.i64	d16,d27
+	vadd.i64	d20,d30
+	vshr.u64	d24,d16,#14	@ 12
+#if 12<16
+	vld1.64		{d12},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d16,#18
+	vshr.u64	d26,d16,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d16,#50
+	vsli.64		d25,d16,#46
+	vsli.64		d26,d16,#23
+#if 12<16 && defined(__ARMEL__)
+	vrev64.8	d12,d12
+#endif
+	vadd.i64	d27,d28,d19
+	veor		d29,d17,d18
+	veor		d24,d25
+	vand		d29,d16
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d18			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d20,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d20,#34
+	vshr.u64	d26,d20,#39
+	vsli.64		d24,d20,#36
+	vsli.64		d25,d20,#30
+	vsli.64		d26,d20,#25
+	vadd.i64	d27,d12
+	vorr		d30,d20,d22
+	vand		d29,d20,d22
+	veor		d19,d24,d25
+	vand		d30,d21
+	veor		d19,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d19,d27
+	vadd.i64	d23,d27
+	vadd.i64	d19,d30
+	vshr.u64	d24,d23,#14	@ 13
+#if 13<16
+	vld1.64		{d13},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d23,#18
+	vshr.u64	d26,d23,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d23,#50
+	vsli.64		d25,d23,#46
+	vsli.64		d26,d23,#23
+#if 13<16 && defined(__ARMEL__)
+	vrev64.8	d13,d13
+#endif
+	vadd.i64	d27,d28,d18
+	veor		d29,d16,d17
+	veor		d24,d25
+	vand		d29,d23
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d17			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d19,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d19,#34
+	vshr.u64	d26,d19,#39
+	vsli.64		d24,d19,#36
+	vsli.64		d25,d19,#30
+	vsli.64		d26,d19,#25
+	vadd.i64	d27,d13
+	vorr		d30,d19,d21
+	vand		d29,d19,d21
+	veor		d18,d24,d25
+	vand		d30,d20
+	veor		d18,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d18,d27
+	vadd.i64	d22,d27
+	vadd.i64	d18,d30
+	vshr.u64	d24,d22,#14	@ 14
+#if 14<16
+	vld1.64		{d14},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d22,#18
+	vshr.u64	d26,d22,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d22,#50
+	vsli.64		d25,d22,#46
+	vsli.64		d26,d22,#23
+#if 14<16 && defined(__ARMEL__)
+	vrev64.8	d14,d14
+#endif
+	vadd.i64	d27,d28,d17
+	veor		d29,d23,d16
+	veor		d24,d25
+	vand		d29,d22
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d16			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d18,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d18,#34
+	vshr.u64	d26,d18,#39
+	vsli.64		d24,d18,#36
+	vsli.64		d25,d18,#30
+	vsli.64		d26,d18,#25
+	vadd.i64	d27,d14
+	vorr		d30,d18,d20
+	vand		d29,d18,d20
+	veor		d17,d24,d25
+	vand		d30,d19
+	veor		d17,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d17,d27
+	vadd.i64	d21,d27
+	vadd.i64	d17,d30
+	vshr.u64	d24,d21,#14	@ 15
+#if 15<16
+	vld1.64		{d15},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d21,#18
+	vshr.u64	d26,d21,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d21,#50
+	vsli.64		d25,d21,#46
+	vsli.64		d26,d21,#23
+#if 15<16 && defined(__ARMEL__)
+	vrev64.8	d15,d15
+#endif
+	vadd.i64	d27,d28,d16
+	veor		d29,d22,d23
+	veor		d24,d25
+	vand		d29,d21
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d23			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d17,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d17,#34
+	vshr.u64	d26,d17,#39
+	vsli.64		d24,d17,#36
+	vsli.64		d25,d17,#30
+	vsli.64		d26,d17,#25
+	vadd.i64	d27,d15
+	vorr		d30,d17,d19
+	vand		d29,d17,d19
+	veor		d16,d24,d25
+	vand		d30,d18
+	veor		d16,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d16,d27
+	vadd.i64	d20,d27
+	vadd.i64	d16,d30
+	mov		r12,#4
+.L16_79_neon:
+	subs		r12,#1
+	vshr.u64	q12,q7,#19
+	vshr.u64	q13,q7,#61
+	vshr.u64	q15,q7,#6
+	vsli.64		q12,q7,#45
+	vext.8		q14,q0,q1,#8	@ X[i+1]
+	vsli.64		q13,q7,#3
+	veor		q15,q12
+	vshr.u64	q12,q14,#1
+	veor		q15,q13				@ sigma1(X[i+14])
+	vshr.u64	q13,q14,#8
+	vadd.i64	q0,q15
+	vshr.u64	q15,q14,#7
+	vsli.64		q12,q14,#63
+	vsli.64		q13,q14,#56
+	vext.8		q14,q4,q5,#8	@ X[i+9]
+	veor		q15,q12
+	vshr.u64	d24,d20,#14		@ from NEON_00_15
+	vadd.i64	q0,q14
+	vshr.u64	d25,d20,#18		@ from NEON_00_15
+	veor		q15,q13				@ sigma0(X[i+1])
+	vshr.u64	d26,d20,#41		@ from NEON_00_15
+	vadd.i64	q0,q15
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d20,#50
+	vsli.64		d25,d20,#46
+	vsli.64		d26,d20,#23
+#if 16<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	vadd.i64	d27,d28,d23
+	veor		d29,d21,d22
+	veor		d24,d25
+	vand		d29,d20
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d22			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d16,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d16,#34
+	vshr.u64	d26,d16,#39
+	vsli.64		d24,d16,#36
+	vsli.64		d25,d16,#30
+	vsli.64		d26,d16,#25
+	vadd.i64	d27,d0
+	vorr		d30,d16,d18
+	vand		d29,d16,d18
+	veor		d23,d24,d25
+	vand		d30,d17
+	veor		d23,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d23,d27
+	vadd.i64	d19,d27
+	vadd.i64	d23,d30
+	vshr.u64	d24,d19,#14	@ 17
+#if 17<16
+	vld1.64		{d1},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d19,#18
+	vshr.u64	d26,d19,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d19,#50
+	vsli.64		d25,d19,#46
+	vsli.64		d26,d19,#23
+#if 17<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	vadd.i64	d27,d28,d22
+	veor		d29,d20,d21
+	veor		d24,d25
+	vand		d29,d19
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d21			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d23,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d23,#34
+	vshr.u64	d26,d23,#39
+	vsli.64		d24,d23,#36
+	vsli.64		d25,d23,#30
+	vsli.64		d26,d23,#25
+	vadd.i64	d27,d1
+	vorr		d30,d23,d17
+	vand		d29,d23,d17
+	veor		d22,d24,d25
+	vand		d30,d16
+	veor		d22,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d22,d27
+	vadd.i64	d18,d27
+	vadd.i64	d22,d30
+	vshr.u64	q12,q0,#19
+	vshr.u64	q13,q0,#61
+	vshr.u64	q15,q0,#6
+	vsli.64		q12,q0,#45
+	vext.8		q14,q1,q2,#8	@ X[i+1]
+	vsli.64		q13,q0,#3
+	veor		q15,q12
+	vshr.u64	q12,q14,#1
+	veor		q15,q13				@ sigma1(X[i+14])
+	vshr.u64	q13,q14,#8
+	vadd.i64	q1,q15
+	vshr.u64	q15,q14,#7
+	vsli.64		q12,q14,#63
+	vsli.64		q13,q14,#56
+	vext.8		q14,q5,q6,#8	@ X[i+9]
+	veor		q15,q12
+	vshr.u64	d24,d18,#14		@ from NEON_00_15
+	vadd.i64	q1,q14
+	vshr.u64	d25,d18,#18		@ from NEON_00_15
+	veor		q15,q13				@ sigma0(X[i+1])
+	vshr.u64	d26,d18,#41		@ from NEON_00_15
+	vadd.i64	q1,q15
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d18,#50
+	vsli.64		d25,d18,#46
+	vsli.64		d26,d18,#23
+#if 18<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	vadd.i64	d27,d28,d21
+	veor		d29,d19,d20
+	veor		d24,d25
+	vand		d29,d18
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d20			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d22,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d22,#34
+	vshr.u64	d26,d22,#39
+	vsli.64		d24,d22,#36
+	vsli.64		d25,d22,#30
+	vsli.64		d26,d22,#25
+	vadd.i64	d27,d2
+	vorr		d30,d22,d16
+	vand		d29,d22,d16
+	veor		d21,d24,d25
+	vand		d30,d23
+	veor		d21,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d21,d27
+	vadd.i64	d17,d27
+	vadd.i64	d21,d30
+	vshr.u64	d24,d17,#14	@ 19
+#if 19<16
+	vld1.64		{d3},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d17,#18
+	vshr.u64	d26,d17,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d17,#50
+	vsli.64		d25,d17,#46
+	vsli.64		d26,d17,#23
+#if 19<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	vadd.i64	d27,d28,d20
+	veor		d29,d18,d19
+	veor		d24,d25
+	vand		d29,d17
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d19			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d21,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d21,#34
+	vshr.u64	d26,d21,#39
+	vsli.64		d24,d21,#36
+	vsli.64		d25,d21,#30
+	vsli.64		d26,d21,#25
+	vadd.i64	d27,d3
+	vorr		d30,d21,d23
+	vand		d29,d21,d23
+	veor		d20,d24,d25
+	vand		d30,d22
+	veor		d20,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d20,d27
+	vadd.i64	d16,d27
+	vadd.i64	d20,d30
+	vshr.u64	q12,q1,#19
+	vshr.u64	q13,q1,#61
+	vshr.u64	q15,q1,#6
+	vsli.64		q12,q1,#45
+	vext.8		q14,q2,q3,#8	@ X[i+1]
+	vsli.64		q13,q1,#3
+	veor		q15,q12
+	vshr.u64	q12,q14,#1
+	veor		q15,q13				@ sigma1(X[i+14])
+	vshr.u64	q13,q14,#8
+	vadd.i64	q2,q15
+	vshr.u64	q15,q14,#7
+	vsli.64		q12,q14,#63
+	vsli.64		q13,q14,#56
+	vext.8		q14,q6,q7,#8	@ X[i+9]
+	veor		q15,q12
+	vshr.u64	d24,d16,#14		@ from NEON_00_15
+	vadd.i64	q2,q14
+	vshr.u64	d25,d16,#18		@ from NEON_00_15
+	veor		q15,q13				@ sigma0(X[i+1])
+	vshr.u64	d26,d16,#41		@ from NEON_00_15
+	vadd.i64	q2,q15
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d16,#50
+	vsli.64		d25,d16,#46
+	vsli.64		d26,d16,#23
+#if 20<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	vadd.i64	d27,d28,d19
+	veor		d29,d17,d18
+	veor		d24,d25
+	vand		d29,d16
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d18			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d20,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d20,#34
+	vshr.u64	d26,d20,#39
+	vsli.64		d24,d20,#36
+	vsli.64		d25,d20,#30
+	vsli.64		d26,d20,#25
+	vadd.i64	d27,d4
+	vorr		d30,d20,d22
+	vand		d29,d20,d22
+	veor		d19,d24,d25
+	vand		d30,d21
+	veor		d19,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d19,d27
+	vadd.i64	d23,d27
+	vadd.i64	d19,d30
+	vshr.u64	d24,d23,#14	@ 21
+#if 21<16
+	vld1.64		{d5},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d23,#18
+	vshr.u64	d26,d23,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d23,#50
+	vsli.64		d25,d23,#46
+	vsli.64		d26,d23,#23
+#if 21<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	vadd.i64	d27,d28,d18
+	veor		d29,d16,d17
+	veor		d24,d25
+	vand		d29,d23
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d17			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d19,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d19,#34
+	vshr.u64	d26,d19,#39
+	vsli.64		d24,d19,#36
+	vsli.64		d25,d19,#30
+	vsli.64		d26,d19,#25
+	vadd.i64	d27,d5
+	vorr		d30,d19,d21
+	vand		d29,d19,d21
+	veor		d18,d24,d25
+	vand		d30,d20
+	veor		d18,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d18,d27
+	vadd.i64	d22,d27
+	vadd.i64	d18,d30
+	vshr.u64	q12,q2,#19
+	vshr.u64	q13,q2,#61
+	vshr.u64	q15,q2,#6
+	vsli.64		q12,q2,#45
+	vext.8		q14,q3,q4,#8	@ X[i+1]
+	vsli.64		q13,q2,#3
+	veor		q15,q12
+	vshr.u64	q12,q14,#1
+	veor		q15,q13				@ sigma1(X[i+14])
+	vshr.u64	q13,q14,#8
+	vadd.i64	q3,q15
+	vshr.u64	q15,q14,#7
+	vsli.64		q12,q14,#63
+	vsli.64		q13,q14,#56
+	vext.8		q14,q7,q0,#8	@ X[i+9]
+	veor		q15,q12
+	vshr.u64	d24,d22,#14		@ from NEON_00_15
+	vadd.i64	q3,q14
+	vshr.u64	d25,d22,#18		@ from NEON_00_15
+	veor		q15,q13				@ sigma0(X[i+1])
+	vshr.u64	d26,d22,#41		@ from NEON_00_15
+	vadd.i64	q3,q15
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d22,#50
+	vsli.64		d25,d22,#46
+	vsli.64		d26,d22,#23
+#if 22<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	vadd.i64	d27,d28,d17
+	veor		d29,d23,d16
+	veor		d24,d25
+	vand		d29,d22
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d16			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d18,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d18,#34
+	vshr.u64	d26,d18,#39
+	vsli.64		d24,d18,#36
+	vsli.64		d25,d18,#30
+	vsli.64		d26,d18,#25
+	vadd.i64	d27,d6
+	vorr		d30,d18,d20
+	vand		d29,d18,d20
+	veor		d17,d24,d25
+	vand		d30,d19
+	veor		d17,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d17,d27
+	vadd.i64	d21,d27
+	vadd.i64	d17,d30
+	vshr.u64	d24,d21,#14	@ 23
+#if 23<16
+	vld1.64		{d7},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d21,#18
+	vshr.u64	d26,d21,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d21,#50
+	vsli.64		d25,d21,#46
+	vsli.64		d26,d21,#23
+#if 23<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	vadd.i64	d27,d28,d16
+	veor		d29,d22,d23
+	veor		d24,d25
+	vand		d29,d21
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d23			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d17,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d17,#34
+	vshr.u64	d26,d17,#39
+	vsli.64		d24,d17,#36
+	vsli.64		d25,d17,#30
+	vsli.64		d26,d17,#25
+	vadd.i64	d27,d7
+	vorr		d30,d17,d19
+	vand		d29,d17,d19
+	veor		d16,d24,d25
+	vand		d30,d18
+	veor		d16,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d16,d27
+	vadd.i64	d20,d27
+	vadd.i64	d16,d30
+	vshr.u64	q12,q3,#19
+	vshr.u64	q13,q3,#61
+	vshr.u64	q15,q3,#6
+	vsli.64		q12,q3,#45
+	vext.8		q14,q4,q5,#8	@ X[i+1]
+	vsli.64		q13,q3,#3
+	veor		q15,q12
+	vshr.u64	q12,q14,#1
+	veor		q15,q13				@ sigma1(X[i+14])
+	vshr.u64	q13,q14,#8
+	vadd.i64	q4,q15
+	vshr.u64	q15,q14,#7
+	vsli.64		q12,q14,#63
+	vsli.64		q13,q14,#56
+	vext.8		q14,q0,q1,#8	@ X[i+9]
+	veor		q15,q12
+	vshr.u64	d24,d20,#14		@ from NEON_00_15
+	vadd.i64	q4,q14
+	vshr.u64	d25,d20,#18		@ from NEON_00_15
+	veor		q15,q13				@ sigma0(X[i+1])
+	vshr.u64	d26,d20,#41		@ from NEON_00_15
+	vadd.i64	q4,q15
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d20,#50
+	vsli.64		d25,d20,#46
+	vsli.64		d26,d20,#23
+#if 24<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	vadd.i64	d27,d28,d23
+	veor		d29,d21,d22
+	veor		d24,d25
+	vand		d29,d20
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d22			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d16,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d16,#34
+	vshr.u64	d26,d16,#39
+	vsli.64		d24,d16,#36
+	vsli.64		d25,d16,#30
+	vsli.64		d26,d16,#25
+	vadd.i64	d27,d8
+	vorr		d30,d16,d18
+	vand		d29,d16,d18
+	veor		d23,d24,d25
+	vand		d30,d17
+	veor		d23,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d23,d27
+	vadd.i64	d19,d27
+	vadd.i64	d23,d30
+	vshr.u64	d24,d19,#14	@ 25
+#if 25<16
+	vld1.64		{d9},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d19,#18
+	vshr.u64	d26,d19,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d19,#50
+	vsli.64		d25,d19,#46
+	vsli.64		d26,d19,#23
+#if 25<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	vadd.i64	d27,d28,d22
+	veor		d29,d20,d21
+	veor		d24,d25
+	vand		d29,d19
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d21			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d23,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d23,#34
+	vshr.u64	d26,d23,#39
+	vsli.64		d24,d23,#36
+	vsli.64		d25,d23,#30
+	vsli.64		d26,d23,#25
+	vadd.i64	d27,d9
+	vorr		d30,d23,d17
+	vand		d29,d23,d17
+	veor		d22,d24,d25
+	vand		d30,d16
+	veor		d22,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d22,d27
+	vadd.i64	d18,d27
+	vadd.i64	d22,d30
+	vshr.u64	q12,q4,#19
+	vshr.u64	q13,q4,#61
+	vshr.u64	q15,q4,#6
+	vsli.64		q12,q4,#45
+	vext.8		q14,q5,q6,#8	@ X[i+1]
+	vsli.64		q13,q4,#3
+	veor		q15,q12
+	vshr.u64	q12,q14,#1
+	veor		q15,q13				@ sigma1(X[i+14])
+	vshr.u64	q13,q14,#8
+	vadd.i64	q5,q15
+	vshr.u64	q15,q14,#7
+	vsli.64		q12,q14,#63
+	vsli.64		q13,q14,#56
+	vext.8		q14,q1,q2,#8	@ X[i+9]
+	veor		q15,q12
+	vshr.u64	d24,d18,#14		@ from NEON_00_15
+	vadd.i64	q5,q14
+	vshr.u64	d25,d18,#18		@ from NEON_00_15
+	veor		q15,q13				@ sigma0(X[i+1])
+	vshr.u64	d26,d18,#41		@ from NEON_00_15
+	vadd.i64	q5,q15
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d18,#50
+	vsli.64		d25,d18,#46
+	vsli.64		d26,d18,#23
+#if 26<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	vadd.i64	d27,d28,d21
+	veor		d29,d19,d20
+	veor		d24,d25
+	vand		d29,d18
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d20			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d22,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d22,#34
+	vshr.u64	d26,d22,#39
+	vsli.64		d24,d22,#36
+	vsli.64		d25,d22,#30
+	vsli.64		d26,d22,#25
+	vadd.i64	d27,d10
+	vorr		d30,d22,d16
+	vand		d29,d22,d16
+	veor		d21,d24,d25
+	vand		d30,d23
+	veor		d21,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d21,d27
+	vadd.i64	d17,d27
+	vadd.i64	d21,d30
+	vshr.u64	d24,d17,#14	@ 27
+#if 27<16
+	vld1.64		{d11},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d17,#18
+	vshr.u64	d26,d17,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d17,#50
+	vsli.64		d25,d17,#46
+	vsli.64		d26,d17,#23
+#if 27<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	vadd.i64	d27,d28,d20
+	veor		d29,d18,d19
+	veor		d24,d25
+	vand		d29,d17
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d19			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d21,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d21,#34
+	vshr.u64	d26,d21,#39
+	vsli.64		d24,d21,#36
+	vsli.64		d25,d21,#30
+	vsli.64		d26,d21,#25
+	vadd.i64	d27,d11
+	vorr		d30,d21,d23
+	vand		d29,d21,d23
+	veor		d20,d24,d25
+	vand		d30,d22
+	veor		d20,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d20,d27
+	vadd.i64	d16,d27
+	vadd.i64	d20,d30
+	vshr.u64	q12,q5,#19
+	vshr.u64	q13,q5,#61
+	vshr.u64	q15,q5,#6
+	vsli.64		q12,q5,#45
+	vext.8		q14,q6,q7,#8	@ X[i+1]
+	vsli.64		q13,q5,#3
+	veor		q15,q12
+	vshr.u64	q12,q14,#1
+	veor		q15,q13				@ sigma1(X[i+14])
+	vshr.u64	q13,q14,#8
+	vadd.i64	q6,q15
+	vshr.u64	q15,q14,#7
+	vsli.64		q12,q14,#63
+	vsli.64		q13,q14,#56
+	vext.8		q14,q2,q3,#8	@ X[i+9]
+	veor		q15,q12
+	vshr.u64	d24,d16,#14		@ from NEON_00_15
+	vadd.i64	q6,q14
+	vshr.u64	d25,d16,#18		@ from NEON_00_15
+	veor		q15,q13				@ sigma0(X[i+1])
+	vshr.u64	d26,d16,#41		@ from NEON_00_15
+	vadd.i64	q6,q15
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d16,#50
+	vsli.64		d25,d16,#46
+	vsli.64		d26,d16,#23
+#if 28<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	vadd.i64	d27,d28,d19
+	veor		d29,d17,d18
+	veor		d24,d25
+	vand		d29,d16
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d18			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d20,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d20,#34
+	vshr.u64	d26,d20,#39
+	vsli.64		d24,d20,#36
+	vsli.64		d25,d20,#30
+	vsli.64		d26,d20,#25
+	vadd.i64	d27,d12
+	vorr		d30,d20,d22
+	vand		d29,d20,d22
+	veor		d19,d24,d25
+	vand		d30,d21
+	veor		d19,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d19,d27
+	vadd.i64	d23,d27
+	vadd.i64	d19,d30
+	vshr.u64	d24,d23,#14	@ 29
+#if 29<16
+	vld1.64		{d13},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d23,#18
+	vshr.u64	d26,d23,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d23,#50
+	vsli.64		d25,d23,#46
+	vsli.64		d26,d23,#23
+#if 29<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	vadd.i64	d27,d28,d18
+	veor		d29,d16,d17
+	veor		d24,d25
+	vand		d29,d23
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d17			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d19,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d19,#34
+	vshr.u64	d26,d19,#39
+	vsli.64		d24,d19,#36
+	vsli.64		d25,d19,#30
+	vsli.64		d26,d19,#25
+	vadd.i64	d27,d13
+	vorr		d30,d19,d21
+	vand		d29,d19,d21
+	veor		d18,d24,d25
+	vand		d30,d20
+	veor		d18,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d18,d27
+	vadd.i64	d22,d27
+	vadd.i64	d18,d30
+	vshr.u64	q12,q6,#19
+	vshr.u64	q13,q6,#61
+	vshr.u64	q15,q6,#6
+	vsli.64		q12,q6,#45
+	vext.8		q14,q7,q0,#8	@ X[i+1]
+	vsli.64		q13,q6,#3
+	veor		q15,q12
+	vshr.u64	q12,q14,#1
+	veor		q15,q13				@ sigma1(X[i+14])
+	vshr.u64	q13,q14,#8
+	vadd.i64	q7,q15
+	vshr.u64	q15,q14,#7
+	vsli.64		q12,q14,#63
+	vsli.64		q13,q14,#56
+	vext.8		q14,q3,q4,#8	@ X[i+9]
+	veor		q15,q12
+	vshr.u64	d24,d22,#14		@ from NEON_00_15
+	vadd.i64	q7,q14
+	vshr.u64	d25,d22,#18		@ from NEON_00_15
+	veor		q15,q13				@ sigma0(X[i+1])
+	vshr.u64	d26,d22,#41		@ from NEON_00_15
+	vadd.i64	q7,q15
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d22,#50
+	vsli.64		d25,d22,#46
+	vsli.64		d26,d22,#23
+#if 30<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	vadd.i64	d27,d28,d17
+	veor		d29,d23,d16
+	veor		d24,d25
+	vand		d29,d22
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d16			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d18,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d18,#34
+	vshr.u64	d26,d18,#39
+	vsli.64		d24,d18,#36
+	vsli.64		d25,d18,#30
+	vsli.64		d26,d18,#25
+	vadd.i64	d27,d14
+	vorr		d30,d18,d20
+	vand		d29,d18,d20
+	veor		d17,d24,d25
+	vand		d30,d19
+	veor		d17,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d17,d27
+	vadd.i64	d21,d27
+	vadd.i64	d17,d30
+	vshr.u64	d24,d21,#14	@ 31
+#if 31<16
+	vld1.64		{d15},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d21,#18
+	vshr.u64	d26,d21,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d21,#50
+	vsli.64		d25,d21,#46
+	vsli.64		d26,d21,#23
+#if 31<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	vadd.i64	d27,d28,d16
+	veor		d29,d22,d23
+	veor		d24,d25
+	vand		d29,d21
+	veor		d24,d26			@ Sigma1(e)
+	veor		d29,d23			@ Ch(e,f,g)
+	vadd.i64	d27,d24
+	vshr.u64	d24,d17,#28
+	vadd.i64	d27,d29
+	vshr.u64	d25,d17,#34
+	vshr.u64	d26,d17,#39
+	vsli.64		d24,d17,#36
+	vsli.64		d25,d17,#30
+	vsli.64		d26,d17,#25
+	vadd.i64	d27,d15
+	vorr		d30,d17,d19
+	vand		d29,d17,d19
+	veor		d16,d24,d25
+	vand		d30,d18
+	veor		d16,d26			@ Sigma0(a)
+	vorr		d30,d29		@ Maj(a,b,c)
+	vadd.i64	d16,d27
+	vadd.i64	d20,d27
+	vadd.i64	d16,d30
+	bne		.L16_79_neon
+
+	vldmia		r0,{d24-d31}	@ load context to temp
+	vadd.i64	q8,q12		@ vectorized accumulate
+	vadd.i64	q9,q13
+	vadd.i64	q10,q14
+	vadd.i64	q11,q15
+	vstmia		r0,{d16-d23}	@ save context
+	teq		r1,r2
+	sub		r3,#640	@ rewind K512
+	bne		.Loop_neon
+
+	vldmia	sp!,{d8-d15}		@ epilogue
+	.word	0xe12fff1e
+#endif
+.size	sha512_block_data_order,.-sha512_block_data_order
+.asciz	"SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro@openssl.org>"
+.align	2
+.comm	OPENSSL_armcap_P,4,4
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/ext/libressl/crypto/sha/sha512-elf-x86_64.S b/ext/libressl/crypto/sha/sha512-elf-x86_64.S
new file mode 100644
index 0000000..1173407
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha512-elf-x86_64.S
@@ -0,0 +1,1806 @@
+#include "x86_arch.h"
+.text	
+
+.globl	sha512_block_data_order
+.type	sha512_block_data_order,@function
+.align	16
+sha512_block_data_order:
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	movq	%rsp,%r11
+	shlq	$4,%rdx
+	subq	$128+32,%rsp
+	leaq	(%rsi,%rdx,8),%rdx
+	andq	$-64,%rsp
+	movq	%rdi,128+0(%rsp)
+	movq	%rsi,128+8(%rsp)
+	movq	%rdx,128+16(%rsp)
+	movq	%r11,128+24(%rsp)
+.Lprologue:
+
+	leaq	K512(%rip),%rbp
+
+	movq	0(%rdi),%rax
+	movq	8(%rdi),%rbx
+	movq	16(%rdi),%rcx
+	movq	24(%rdi),%rdx
+	movq	32(%rdi),%r8
+	movq	40(%rdi),%r9
+	movq	48(%rdi),%r10
+	movq	56(%rdi),%r11
+	jmp	.Lloop
+
+.align	16
+.Lloop:
+	xorq	%rdi,%rdi
+	movq	0(%rsi),%r12
+	movq	%r8,%r13
+	movq	%rax,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r9,%r15
+	movq	%r12,0(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r15
+
+	rorq	$4,%r13
+	addq	%r11,%r12
+	xorq	%rax,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r8,%r15
+	movq	%rbx,%r11
+
+	rorq	$6,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r15
+
+	xorq	%rcx,%r11
+	xorq	%rax,%r14
+	addq	%r15,%r12
+	movq	%rbx,%r15
+
+	rorq	$14,%r13
+	andq	%rax,%r11
+	andq	%rcx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r11
+
+	addq	%r12,%rdx
+	addq	%r12,%r11
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r11
+
+	movq	8(%rsi),%r12
+	movq	%rdx,%r13
+	movq	%r11,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r8,%r15
+	movq	%r12,8(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r15
+
+	rorq	$4,%r13
+	addq	%r10,%r12
+	xorq	%r11,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rdx,%r15
+	movq	%rax,%r10
+
+	rorq	$6,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r15
+
+	xorq	%rbx,%r10
+	xorq	%r11,%r14
+	addq	%r15,%r12
+	movq	%rax,%r15
+
+	rorq	$14,%r13
+	andq	%r11,%r10
+	andq	%rbx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r10
+
+	addq	%r12,%rcx
+	addq	%r12,%r10
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r10
+
+	movq	16(%rsi),%r12
+	movq	%rcx,%r13
+	movq	%r10,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rdx,%r15
+	movq	%r12,16(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r15
+
+	rorq	$4,%r13
+	addq	%r9,%r12
+	xorq	%r10,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rcx,%r15
+	movq	%r11,%r9
+
+	rorq	$6,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r15
+
+	xorq	%rax,%r9
+	xorq	%r10,%r14
+	addq	%r15,%r12
+	movq	%r11,%r15
+
+	rorq	$14,%r13
+	andq	%r10,%r9
+	andq	%rax,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r9
+
+	addq	%r12,%rbx
+	addq	%r12,%r9
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r9
+
+	movq	24(%rsi),%r12
+	movq	%rbx,%r13
+	movq	%r9,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rcx,%r15
+	movq	%r12,24(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r15
+
+	rorq	$4,%r13
+	addq	%r8,%r12
+	xorq	%r9,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rbx,%r15
+	movq	%r10,%r8
+
+	rorq	$6,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r15
+
+	xorq	%r11,%r8
+	xorq	%r9,%r14
+	addq	%r15,%r12
+	movq	%r10,%r15
+
+	rorq	$14,%r13
+	andq	%r9,%r8
+	andq	%r11,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r8
+
+	addq	%r12,%rax
+	addq	%r12,%r8
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r8
+
+	movq	32(%rsi),%r12
+	movq	%rax,%r13
+	movq	%r8,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rbx,%r15
+	movq	%r12,32(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r15
+
+	rorq	$4,%r13
+	addq	%rdx,%r12
+	xorq	%r8,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rax,%r15
+	movq	%r9,%rdx
+
+	rorq	$6,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r15
+
+	xorq	%r10,%rdx
+	xorq	%r8,%r14
+	addq	%r15,%r12
+	movq	%r9,%r15
+
+	rorq	$14,%r13
+	andq	%r8,%rdx
+	andq	%r10,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rdx
+
+	addq	%r12,%r11
+	addq	%r12,%rdx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rdx
+
+	movq	40(%rsi),%r12
+	movq	%r11,%r13
+	movq	%rdx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rax,%r15
+	movq	%r12,40(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r15
+
+	rorq	$4,%r13
+	addq	%rcx,%r12
+	xorq	%rdx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r11,%r15
+	movq	%r8,%rcx
+
+	rorq	$6,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r15
+
+	xorq	%r9,%rcx
+	xorq	%rdx,%r14
+	addq	%r15,%r12
+	movq	%r8,%r15
+
+	rorq	$14,%r13
+	andq	%rdx,%rcx
+	andq	%r9,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rcx
+
+	addq	%r12,%r10
+	addq	%r12,%rcx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rcx
+
+	movq	48(%rsi),%r12
+	movq	%r10,%r13
+	movq	%rcx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r11,%r15
+	movq	%r12,48(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r15
+
+	rorq	$4,%r13
+	addq	%rbx,%r12
+	xorq	%rcx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r10,%r15
+	movq	%rdx,%rbx
+
+	rorq	$6,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r15
+
+	xorq	%r8,%rbx
+	xorq	%rcx,%r14
+	addq	%r15,%r12
+	movq	%rdx,%r15
+
+	rorq	$14,%r13
+	andq	%rcx,%rbx
+	andq	%r8,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rbx
+
+	addq	%r12,%r9
+	addq	%r12,%rbx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rbx
+
+	movq	56(%rsi),%r12
+	movq	%r9,%r13
+	movq	%rbx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r10,%r15
+	movq	%r12,56(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r15
+
+	rorq	$4,%r13
+	addq	%rax,%r12
+	xorq	%rbx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r9,%r15
+	movq	%rcx,%rax
+
+	rorq	$6,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r15
+
+	xorq	%rdx,%rax
+	xorq	%rbx,%r14
+	addq	%r15,%r12
+	movq	%rcx,%r15
+
+	rorq	$14,%r13
+	andq	%rbx,%rax
+	andq	%rdx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rax
+
+	addq	%r12,%r8
+	addq	%r12,%rax
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rax
+
+	movq	64(%rsi),%r12
+	movq	%r8,%r13
+	movq	%rax,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r9,%r15
+	movq	%r12,64(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r15
+
+	rorq	$4,%r13
+	addq	%r11,%r12
+	xorq	%rax,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r8,%r15
+	movq	%rbx,%r11
+
+	rorq	$6,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r15
+
+	xorq	%rcx,%r11
+	xorq	%rax,%r14
+	addq	%r15,%r12
+	movq	%rbx,%r15
+
+	rorq	$14,%r13
+	andq	%rax,%r11
+	andq	%rcx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r11
+
+	addq	%r12,%rdx
+	addq	%r12,%r11
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r11
+
+	movq	72(%rsi),%r12
+	movq	%rdx,%r13
+	movq	%r11,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r8,%r15
+	movq	%r12,72(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r15
+
+	rorq	$4,%r13
+	addq	%r10,%r12
+	xorq	%r11,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rdx,%r15
+	movq	%rax,%r10
+
+	rorq	$6,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r15
+
+	xorq	%rbx,%r10
+	xorq	%r11,%r14
+	addq	%r15,%r12
+	movq	%rax,%r15
+
+	rorq	$14,%r13
+	andq	%r11,%r10
+	andq	%rbx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r10
+
+	addq	%r12,%rcx
+	addq	%r12,%r10
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r10
+
+	movq	80(%rsi),%r12
+	movq	%rcx,%r13
+	movq	%r10,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rdx,%r15
+	movq	%r12,80(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r15
+
+	rorq	$4,%r13
+	addq	%r9,%r12
+	xorq	%r10,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rcx,%r15
+	movq	%r11,%r9
+
+	rorq	$6,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r15
+
+	xorq	%rax,%r9
+	xorq	%r10,%r14
+	addq	%r15,%r12
+	movq	%r11,%r15
+
+	rorq	$14,%r13
+	andq	%r10,%r9
+	andq	%rax,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r9
+
+	addq	%r12,%rbx
+	addq	%r12,%r9
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r9
+
+	movq	88(%rsi),%r12
+	movq	%rbx,%r13
+	movq	%r9,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rcx,%r15
+	movq	%r12,88(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r15
+
+	rorq	$4,%r13
+	addq	%r8,%r12
+	xorq	%r9,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rbx,%r15
+	movq	%r10,%r8
+
+	rorq	$6,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r15
+
+	xorq	%r11,%r8
+	xorq	%r9,%r14
+	addq	%r15,%r12
+	movq	%r10,%r15
+
+	rorq	$14,%r13
+	andq	%r9,%r8
+	andq	%r11,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r8
+
+	addq	%r12,%rax
+	addq	%r12,%r8
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r8
+
+	movq	96(%rsi),%r12
+	movq	%rax,%r13
+	movq	%r8,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rbx,%r15
+	movq	%r12,96(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r15
+
+	rorq	$4,%r13
+	addq	%rdx,%r12
+	xorq	%r8,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rax,%r15
+	movq	%r9,%rdx
+
+	rorq	$6,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r15
+
+	xorq	%r10,%rdx
+	xorq	%r8,%r14
+	addq	%r15,%r12
+	movq	%r9,%r15
+
+	rorq	$14,%r13
+	andq	%r8,%rdx
+	andq	%r10,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rdx
+
+	addq	%r12,%r11
+	addq	%r12,%rdx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rdx
+
+	movq	104(%rsi),%r12
+	movq	%r11,%r13
+	movq	%rdx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rax,%r15
+	movq	%r12,104(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r15
+
+	rorq	$4,%r13
+	addq	%rcx,%r12
+	xorq	%rdx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r11,%r15
+	movq	%r8,%rcx
+
+	rorq	$6,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r15
+
+	xorq	%r9,%rcx
+	xorq	%rdx,%r14
+	addq	%r15,%r12
+	movq	%r8,%r15
+
+	rorq	$14,%r13
+	andq	%rdx,%rcx
+	andq	%r9,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rcx
+
+	addq	%r12,%r10
+	addq	%r12,%rcx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rcx
+
+	movq	112(%rsi),%r12
+	movq	%r10,%r13
+	movq	%rcx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r11,%r15
+	movq	%r12,112(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r15
+
+	rorq	$4,%r13
+	addq	%rbx,%r12
+	xorq	%rcx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r10,%r15
+	movq	%rdx,%rbx
+
+	rorq	$6,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r15
+
+	xorq	%r8,%rbx
+	xorq	%rcx,%r14
+	addq	%r15,%r12
+	movq	%rdx,%r15
+
+	rorq	$14,%r13
+	andq	%rcx,%rbx
+	andq	%r8,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rbx
+
+	addq	%r12,%r9
+	addq	%r12,%rbx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rbx
+
+	movq	120(%rsi),%r12
+	movq	%r9,%r13
+	movq	%rbx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r10,%r15
+	movq	%r12,120(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r15
+
+	rorq	$4,%r13
+	addq	%rax,%r12
+	xorq	%rbx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r9,%r15
+	movq	%rcx,%rax
+
+	rorq	$6,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r15
+
+	xorq	%rdx,%rax
+	xorq	%rbx,%r14
+	addq	%r15,%r12
+	movq	%rcx,%r15
+
+	rorq	$14,%r13
+	andq	%rbx,%rax
+	andq	%rdx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rax
+
+	addq	%r12,%r8
+	addq	%r12,%rax
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rax
+
+	jmp	.Lrounds_16_xx
+.align	16
+.Lrounds_16_xx:
+	movq	8(%rsp),%r13
+	movq	112(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	72(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	0(%rsp),%r12
+	movq	%r8,%r13
+	addq	%r14,%r12
+	movq	%rax,%r14
+	rorq	$23,%r13
+	movq	%r9,%r15
+	movq	%r12,0(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r15
+
+	rorq	$4,%r13
+	addq	%r11,%r12
+	xorq	%rax,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r8,%r15
+	movq	%rbx,%r11
+
+	rorq	$6,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r15
+
+	xorq	%rcx,%r11
+	xorq	%rax,%r14
+	addq	%r15,%r12
+	movq	%rbx,%r15
+
+	rorq	$14,%r13
+	andq	%rax,%r11
+	andq	%rcx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r11
+
+	addq	%r12,%rdx
+	addq	%r12,%r11
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r11
+
+	movq	16(%rsp),%r13
+	movq	120(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	80(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	8(%rsp),%r12
+	movq	%rdx,%r13
+	addq	%r14,%r12
+	movq	%r11,%r14
+	rorq	$23,%r13
+	movq	%r8,%r15
+	movq	%r12,8(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r15
+
+	rorq	$4,%r13
+	addq	%r10,%r12
+	xorq	%r11,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rdx,%r15
+	movq	%rax,%r10
+
+	rorq	$6,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r15
+
+	xorq	%rbx,%r10
+	xorq	%r11,%r14
+	addq	%r15,%r12
+	movq	%rax,%r15
+
+	rorq	$14,%r13
+	andq	%r11,%r10
+	andq	%rbx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r10
+
+	addq	%r12,%rcx
+	addq	%r12,%r10
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r10
+
+	movq	24(%rsp),%r13
+	movq	0(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	88(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	16(%rsp),%r12
+	movq	%rcx,%r13
+	addq	%r14,%r12
+	movq	%r10,%r14
+	rorq	$23,%r13
+	movq	%rdx,%r15
+	movq	%r12,16(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r15
+
+	rorq	$4,%r13
+	addq	%r9,%r12
+	xorq	%r10,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rcx,%r15
+	movq	%r11,%r9
+
+	rorq	$6,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r15
+
+	xorq	%rax,%r9
+	xorq	%r10,%r14
+	addq	%r15,%r12
+	movq	%r11,%r15
+
+	rorq	$14,%r13
+	andq	%r10,%r9
+	andq	%rax,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r9
+
+	addq	%r12,%rbx
+	addq	%r12,%r9
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r9
+
+	movq	32(%rsp),%r13
+	movq	8(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	96(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	24(%rsp),%r12
+	movq	%rbx,%r13
+	addq	%r14,%r12
+	movq	%r9,%r14
+	rorq	$23,%r13
+	movq	%rcx,%r15
+	movq	%r12,24(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r15
+
+	rorq	$4,%r13
+	addq	%r8,%r12
+	xorq	%r9,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rbx,%r15
+	movq	%r10,%r8
+
+	rorq	$6,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r15
+
+	xorq	%r11,%r8
+	xorq	%r9,%r14
+	addq	%r15,%r12
+	movq	%r10,%r15
+
+	rorq	$14,%r13
+	andq	%r9,%r8
+	andq	%r11,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r8
+
+	addq	%r12,%rax
+	addq	%r12,%r8
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r8
+
+	movq	40(%rsp),%r13
+	movq	16(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	104(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	32(%rsp),%r12
+	movq	%rax,%r13
+	addq	%r14,%r12
+	movq	%r8,%r14
+	rorq	$23,%r13
+	movq	%rbx,%r15
+	movq	%r12,32(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r15
+
+	rorq	$4,%r13
+	addq	%rdx,%r12
+	xorq	%r8,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rax,%r15
+	movq	%r9,%rdx
+
+	rorq	$6,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r15
+
+	xorq	%r10,%rdx
+	xorq	%r8,%r14
+	addq	%r15,%r12
+	movq	%r9,%r15
+
+	rorq	$14,%r13
+	andq	%r8,%rdx
+	andq	%r10,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rdx
+
+	addq	%r12,%r11
+	addq	%r12,%rdx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rdx
+
+	movq	48(%rsp),%r13
+	movq	24(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	112(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	40(%rsp),%r12
+	movq	%r11,%r13
+	addq	%r14,%r12
+	movq	%rdx,%r14
+	rorq	$23,%r13
+	movq	%rax,%r15
+	movq	%r12,40(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r15
+
+	rorq	$4,%r13
+	addq	%rcx,%r12
+	xorq	%rdx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r11,%r15
+	movq	%r8,%rcx
+
+	rorq	$6,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r15
+
+	xorq	%r9,%rcx
+	xorq	%rdx,%r14
+	addq	%r15,%r12
+	movq	%r8,%r15
+
+	rorq	$14,%r13
+	andq	%rdx,%rcx
+	andq	%r9,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rcx
+
+	addq	%r12,%r10
+	addq	%r12,%rcx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rcx
+
+	movq	56(%rsp),%r13
+	movq	32(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	120(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	48(%rsp),%r12
+	movq	%r10,%r13
+	addq	%r14,%r12
+	movq	%rcx,%r14
+	rorq	$23,%r13
+	movq	%r11,%r15
+	movq	%r12,48(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r15
+
+	rorq	$4,%r13
+	addq	%rbx,%r12
+	xorq	%rcx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r10,%r15
+	movq	%rdx,%rbx
+
+	rorq	$6,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r15
+
+	xorq	%r8,%rbx
+	xorq	%rcx,%r14
+	addq	%r15,%r12
+	movq	%rdx,%r15
+
+	rorq	$14,%r13
+	andq	%rcx,%rbx
+	andq	%r8,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rbx
+
+	addq	%r12,%r9
+	addq	%r12,%rbx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rbx
+
+	movq	64(%rsp),%r13
+	movq	40(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	0(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	56(%rsp),%r12
+	movq	%r9,%r13
+	addq	%r14,%r12
+	movq	%rbx,%r14
+	rorq	$23,%r13
+	movq	%r10,%r15
+	movq	%r12,56(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r15
+
+	rorq	$4,%r13
+	addq	%rax,%r12
+	xorq	%rbx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r9,%r15
+	movq	%rcx,%rax
+
+	rorq	$6,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r15
+
+	xorq	%rdx,%rax
+	xorq	%rbx,%r14
+	addq	%r15,%r12
+	movq	%rcx,%r15
+
+	rorq	$14,%r13
+	andq	%rbx,%rax
+	andq	%rdx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rax
+
+	addq	%r12,%r8
+	addq	%r12,%rax
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rax
+
+	movq	72(%rsp),%r13
+	movq	48(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	8(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	64(%rsp),%r12
+	movq	%r8,%r13
+	addq	%r14,%r12
+	movq	%rax,%r14
+	rorq	$23,%r13
+	movq	%r9,%r15
+	movq	%r12,64(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r15
+
+	rorq	$4,%r13
+	addq	%r11,%r12
+	xorq	%rax,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r8,%r15
+	movq	%rbx,%r11
+
+	rorq	$6,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r15
+
+	xorq	%rcx,%r11
+	xorq	%rax,%r14
+	addq	%r15,%r12
+	movq	%rbx,%r15
+
+	rorq	$14,%r13
+	andq	%rax,%r11
+	andq	%rcx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r11
+
+	addq	%r12,%rdx
+	addq	%r12,%r11
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r11
+
+	movq	80(%rsp),%r13
+	movq	56(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	16(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	72(%rsp),%r12
+	movq	%rdx,%r13
+	addq	%r14,%r12
+	movq	%r11,%r14
+	rorq	$23,%r13
+	movq	%r8,%r15
+	movq	%r12,72(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r15
+
+	rorq	$4,%r13
+	addq	%r10,%r12
+	xorq	%r11,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rdx,%r15
+	movq	%rax,%r10
+
+	rorq	$6,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r15
+
+	xorq	%rbx,%r10
+	xorq	%r11,%r14
+	addq	%r15,%r12
+	movq	%rax,%r15
+
+	rorq	$14,%r13
+	andq	%r11,%r10
+	andq	%rbx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r10
+
+	addq	%r12,%rcx
+	addq	%r12,%r10
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r10
+
+	movq	88(%rsp),%r13
+	movq	64(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	24(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	80(%rsp),%r12
+	movq	%rcx,%r13
+	addq	%r14,%r12
+	movq	%r10,%r14
+	rorq	$23,%r13
+	movq	%rdx,%r15
+	movq	%r12,80(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r15
+
+	rorq	$4,%r13
+	addq	%r9,%r12
+	xorq	%r10,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rcx,%r15
+	movq	%r11,%r9
+
+	rorq	$6,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r15
+
+	xorq	%rax,%r9
+	xorq	%r10,%r14
+	addq	%r15,%r12
+	movq	%r11,%r15
+
+	rorq	$14,%r13
+	andq	%r10,%r9
+	andq	%rax,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r9
+
+	addq	%r12,%rbx
+	addq	%r12,%r9
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r9
+
+	movq	96(%rsp),%r13
+	movq	72(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	32(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	88(%rsp),%r12
+	movq	%rbx,%r13
+	addq	%r14,%r12
+	movq	%r9,%r14
+	rorq	$23,%r13
+	movq	%rcx,%r15
+	movq	%r12,88(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r15
+
+	rorq	$4,%r13
+	addq	%r8,%r12
+	xorq	%r9,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rbx,%r15
+	movq	%r10,%r8
+
+	rorq	$6,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r15
+
+	xorq	%r11,%r8
+	xorq	%r9,%r14
+	addq	%r15,%r12
+	movq	%r10,%r15
+
+	rorq	$14,%r13
+	andq	%r9,%r8
+	andq	%r11,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r8
+
+	addq	%r12,%rax
+	addq	%r12,%r8
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r8
+
+	movq	104(%rsp),%r13
+	movq	80(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	40(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	96(%rsp),%r12
+	movq	%rax,%r13
+	addq	%r14,%r12
+	movq	%r8,%r14
+	rorq	$23,%r13
+	movq	%rbx,%r15
+	movq	%r12,96(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r15
+
+	rorq	$4,%r13
+	addq	%rdx,%r12
+	xorq	%r8,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rax,%r15
+	movq	%r9,%rdx
+
+	rorq	$6,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r15
+
+	xorq	%r10,%rdx
+	xorq	%r8,%r14
+	addq	%r15,%r12
+	movq	%r9,%r15
+
+	rorq	$14,%r13
+	andq	%r8,%rdx
+	andq	%r10,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rdx
+
+	addq	%r12,%r11
+	addq	%r12,%rdx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rdx
+
+	movq	112(%rsp),%r13
+	movq	88(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	48(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	104(%rsp),%r12
+	movq	%r11,%r13
+	addq	%r14,%r12
+	movq	%rdx,%r14
+	rorq	$23,%r13
+	movq	%rax,%r15
+	movq	%r12,104(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r15
+
+	rorq	$4,%r13
+	addq	%rcx,%r12
+	xorq	%rdx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r11,%r15
+	movq	%r8,%rcx
+
+	rorq	$6,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r15
+
+	xorq	%r9,%rcx
+	xorq	%rdx,%r14
+	addq	%r15,%r12
+	movq	%r8,%r15
+
+	rorq	$14,%r13
+	andq	%rdx,%rcx
+	andq	%r9,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rcx
+
+	addq	%r12,%r10
+	addq	%r12,%rcx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rcx
+
+	movq	120(%rsp),%r13
+	movq	96(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	56(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	112(%rsp),%r12
+	movq	%r10,%r13
+	addq	%r14,%r12
+	movq	%rcx,%r14
+	rorq	$23,%r13
+	movq	%r11,%r15
+	movq	%r12,112(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r15
+
+	rorq	$4,%r13
+	addq	%rbx,%r12
+	xorq	%rcx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r10,%r15
+	movq	%rdx,%rbx
+
+	rorq	$6,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r15
+
+	xorq	%r8,%rbx
+	xorq	%rcx,%r14
+	addq	%r15,%r12
+	movq	%rdx,%r15
+
+	rorq	$14,%r13
+	andq	%rcx,%rbx
+	andq	%r8,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rbx
+
+	addq	%r12,%r9
+	addq	%r12,%rbx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rbx
+
+	movq	0(%rsp),%r13
+	movq	104(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	64(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	120(%rsp),%r12
+	movq	%r9,%r13
+	addq	%r14,%r12
+	movq	%rbx,%r14
+	rorq	$23,%r13
+	movq	%r10,%r15
+	movq	%r12,120(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r15
+
+	rorq	$4,%r13
+	addq	%rax,%r12
+	xorq	%rbx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r9,%r15
+	movq	%rcx,%rax
+
+	rorq	$6,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r15
+
+	xorq	%rdx,%rax
+	xorq	%rbx,%r14
+	addq	%r15,%r12
+	movq	%rcx,%r15
+
+	rorq	$14,%r13
+	andq	%rbx,%rax
+	andq	%rdx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rax
+
+	addq	%r12,%r8
+	addq	%r12,%rax
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rax
+
+	cmpq	$80,%rdi
+	jb	.Lrounds_16_xx
+
+	movq	128+0(%rsp),%rdi
+	leaq	128(%rsi),%rsi
+
+	addq	0(%rdi),%rax
+	addq	8(%rdi),%rbx
+	addq	16(%rdi),%rcx
+	addq	24(%rdi),%rdx
+	addq	32(%rdi),%r8
+	addq	40(%rdi),%r9
+	addq	48(%rdi),%r10
+	addq	56(%rdi),%r11
+
+	cmpq	128+16(%rsp),%rsi
+
+	movq	%rax,0(%rdi)
+	movq	%rbx,8(%rdi)
+	movq	%rcx,16(%rdi)
+	movq	%rdx,24(%rdi)
+	movq	%r8,32(%rdi)
+	movq	%r9,40(%rdi)
+	movq	%r10,48(%rdi)
+	movq	%r11,56(%rdi)
+	jb	.Lloop
+
+	movq	128+24(%rsp),%rsi
+	movq	(%rsi),%r15
+	movq	8(%rsi),%r14
+	movq	16(%rsi),%r13
+	movq	24(%rsi),%r12
+	movq	32(%rsi),%rbp
+	movq	40(%rsi),%rbx
+	leaq	48(%rsi),%rsp
+.Lepilogue:
+	retq
+.size	sha512_block_data_order,.-sha512_block_data_order
+.align	64
+.type	K512,@object
+K512:
+.quad	0x428a2f98d728ae22,0x7137449123ef65cd
+.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+.quad	0x3956c25bf348b538,0x59f111f1b605d019
+.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
+.quad	0xd807aa98a3030242,0x12835b0145706fbe
+.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
+.quad	0x9bdc06a725c71235,0xc19bf174cf692694
+.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
+.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
+.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+.quad	0x983e5152ee66dfab,0xa831c66d2db43210
+.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
+.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
+.quad	0x06ca6351e003826f,0x142929670a0e6e70
+.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
+.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
+.quad	0x81c2c92e47edaee6,0x92722c851482353b
+.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
+.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
+.quad	0xd192e819d6ef5218,0xd69906245565a910
+.quad	0xf40e35855771202a,0x106aa07032bbd1b8
+.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
+.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
+.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
+.quad	0x90befffa23631e28,0xa4506cebde82bde9
+.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
+.quad	0xca273eceea26619c,0xd186b8c721c0c207
+.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
+.quad	0x113f9804bef90dae,0x1b710b35131c471b
+.quad	0x28db77f523047d84,0x32caab7b40c72493
+.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
+#if defined(HAVE_GNU_STACK)
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/ext/libressl/crypto/sha/sha512-macosx-x86_64.S b/ext/libressl/crypto/sha/sha512-macosx-x86_64.S
new file mode 100644
index 0000000..7581da4
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha512-macosx-x86_64.S
@@ -0,0 +1,1803 @@
+#include "x86_arch.h"
+.text	
+
+.globl	_sha512_block_data_order
+
+.p2align	4
+_sha512_block_data_order:
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	movq	%rsp,%r11
+	shlq	$4,%rdx
+	subq	$128+32,%rsp
+	leaq	(%rsi,%rdx,8),%rdx
+	andq	$-64,%rsp
+	movq	%rdi,128+0(%rsp)
+	movq	%rsi,128+8(%rsp)
+	movq	%rdx,128+16(%rsp)
+	movq	%r11,128+24(%rsp)
+L$prologue:
+
+	leaq	K512(%rip),%rbp
+
+	movq	0(%rdi),%rax
+	movq	8(%rdi),%rbx
+	movq	16(%rdi),%rcx
+	movq	24(%rdi),%rdx
+	movq	32(%rdi),%r8
+	movq	40(%rdi),%r9
+	movq	48(%rdi),%r10
+	movq	56(%rdi),%r11
+	jmp	L$loop
+
+.p2align	4
+L$loop:
+	xorq	%rdi,%rdi
+	movq	0(%rsi),%r12
+	movq	%r8,%r13
+	movq	%rax,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r9,%r15
+	movq	%r12,0(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r15
+
+	rorq	$4,%r13
+	addq	%r11,%r12
+	xorq	%rax,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r8,%r15
+	movq	%rbx,%r11
+
+	rorq	$6,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r15
+
+	xorq	%rcx,%r11
+	xorq	%rax,%r14
+	addq	%r15,%r12
+	movq	%rbx,%r15
+
+	rorq	$14,%r13
+	andq	%rax,%r11
+	andq	%rcx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r11
+
+	addq	%r12,%rdx
+	addq	%r12,%r11
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r11
+
+	movq	8(%rsi),%r12
+	movq	%rdx,%r13
+	movq	%r11,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r8,%r15
+	movq	%r12,8(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r15
+
+	rorq	$4,%r13
+	addq	%r10,%r12
+	xorq	%r11,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rdx,%r15
+	movq	%rax,%r10
+
+	rorq	$6,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r15
+
+	xorq	%rbx,%r10
+	xorq	%r11,%r14
+	addq	%r15,%r12
+	movq	%rax,%r15
+
+	rorq	$14,%r13
+	andq	%r11,%r10
+	andq	%rbx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r10
+
+	addq	%r12,%rcx
+	addq	%r12,%r10
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r10
+
+	movq	16(%rsi),%r12
+	movq	%rcx,%r13
+	movq	%r10,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rdx,%r15
+	movq	%r12,16(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r15
+
+	rorq	$4,%r13
+	addq	%r9,%r12
+	xorq	%r10,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rcx,%r15
+	movq	%r11,%r9
+
+	rorq	$6,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r15
+
+	xorq	%rax,%r9
+	xorq	%r10,%r14
+	addq	%r15,%r12
+	movq	%r11,%r15
+
+	rorq	$14,%r13
+	andq	%r10,%r9
+	andq	%rax,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r9
+
+	addq	%r12,%rbx
+	addq	%r12,%r9
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r9
+
+	movq	24(%rsi),%r12
+	movq	%rbx,%r13
+	movq	%r9,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rcx,%r15
+	movq	%r12,24(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r15
+
+	rorq	$4,%r13
+	addq	%r8,%r12
+	xorq	%r9,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rbx,%r15
+	movq	%r10,%r8
+
+	rorq	$6,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r15
+
+	xorq	%r11,%r8
+	xorq	%r9,%r14
+	addq	%r15,%r12
+	movq	%r10,%r15
+
+	rorq	$14,%r13
+	andq	%r9,%r8
+	andq	%r11,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r8
+
+	addq	%r12,%rax
+	addq	%r12,%r8
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r8
+
+	movq	32(%rsi),%r12
+	movq	%rax,%r13
+	movq	%r8,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rbx,%r15
+	movq	%r12,32(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r15
+
+	rorq	$4,%r13
+	addq	%rdx,%r12
+	xorq	%r8,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rax,%r15
+	movq	%r9,%rdx
+
+	rorq	$6,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r15
+
+	xorq	%r10,%rdx
+	xorq	%r8,%r14
+	addq	%r15,%r12
+	movq	%r9,%r15
+
+	rorq	$14,%r13
+	andq	%r8,%rdx
+	andq	%r10,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rdx
+
+	addq	%r12,%r11
+	addq	%r12,%rdx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rdx
+
+	movq	40(%rsi),%r12
+	movq	%r11,%r13
+	movq	%rdx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rax,%r15
+	movq	%r12,40(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r15
+
+	rorq	$4,%r13
+	addq	%rcx,%r12
+	xorq	%rdx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r11,%r15
+	movq	%r8,%rcx
+
+	rorq	$6,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r15
+
+	xorq	%r9,%rcx
+	xorq	%rdx,%r14
+	addq	%r15,%r12
+	movq	%r8,%r15
+
+	rorq	$14,%r13
+	andq	%rdx,%rcx
+	andq	%r9,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rcx
+
+	addq	%r12,%r10
+	addq	%r12,%rcx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rcx
+
+	movq	48(%rsi),%r12
+	movq	%r10,%r13
+	movq	%rcx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r11,%r15
+	movq	%r12,48(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r15
+
+	rorq	$4,%r13
+	addq	%rbx,%r12
+	xorq	%rcx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r10,%r15
+	movq	%rdx,%rbx
+
+	rorq	$6,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r15
+
+	xorq	%r8,%rbx
+	xorq	%rcx,%r14
+	addq	%r15,%r12
+	movq	%rdx,%r15
+
+	rorq	$14,%r13
+	andq	%rcx,%rbx
+	andq	%r8,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rbx
+
+	addq	%r12,%r9
+	addq	%r12,%rbx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rbx
+
+	movq	56(%rsi),%r12
+	movq	%r9,%r13
+	movq	%rbx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r10,%r15
+	movq	%r12,56(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r15
+
+	rorq	$4,%r13
+	addq	%rax,%r12
+	xorq	%rbx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r9,%r15
+	movq	%rcx,%rax
+
+	rorq	$6,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r15
+
+	xorq	%rdx,%rax
+	xorq	%rbx,%r14
+	addq	%r15,%r12
+	movq	%rcx,%r15
+
+	rorq	$14,%r13
+	andq	%rbx,%rax
+	andq	%rdx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rax
+
+	addq	%r12,%r8
+	addq	%r12,%rax
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rax
+
+	movq	64(%rsi),%r12
+	movq	%r8,%r13
+	movq	%rax,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r9,%r15
+	movq	%r12,64(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r15
+
+	rorq	$4,%r13
+	addq	%r11,%r12
+	xorq	%rax,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r8,%r15
+	movq	%rbx,%r11
+
+	rorq	$6,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r15
+
+	xorq	%rcx,%r11
+	xorq	%rax,%r14
+	addq	%r15,%r12
+	movq	%rbx,%r15
+
+	rorq	$14,%r13
+	andq	%rax,%r11
+	andq	%rcx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r11
+
+	addq	%r12,%rdx
+	addq	%r12,%r11
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r11
+
+	movq	72(%rsi),%r12
+	movq	%rdx,%r13
+	movq	%r11,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r8,%r15
+	movq	%r12,72(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r15
+
+	rorq	$4,%r13
+	addq	%r10,%r12
+	xorq	%r11,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rdx,%r15
+	movq	%rax,%r10
+
+	rorq	$6,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r15
+
+	xorq	%rbx,%r10
+	xorq	%r11,%r14
+	addq	%r15,%r12
+	movq	%rax,%r15
+
+	rorq	$14,%r13
+	andq	%r11,%r10
+	andq	%rbx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r10
+
+	addq	%r12,%rcx
+	addq	%r12,%r10
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r10
+
+	movq	80(%rsi),%r12
+	movq	%rcx,%r13
+	movq	%r10,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rdx,%r15
+	movq	%r12,80(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r15
+
+	rorq	$4,%r13
+	addq	%r9,%r12
+	xorq	%r10,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rcx,%r15
+	movq	%r11,%r9
+
+	rorq	$6,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r15
+
+	xorq	%rax,%r9
+	xorq	%r10,%r14
+	addq	%r15,%r12
+	movq	%r11,%r15
+
+	rorq	$14,%r13
+	andq	%r10,%r9
+	andq	%rax,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r9
+
+	addq	%r12,%rbx
+	addq	%r12,%r9
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r9
+
+	movq	88(%rsi),%r12
+	movq	%rbx,%r13
+	movq	%r9,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rcx,%r15
+	movq	%r12,88(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r15
+
+	rorq	$4,%r13
+	addq	%r8,%r12
+	xorq	%r9,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rbx,%r15
+	movq	%r10,%r8
+
+	rorq	$6,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r15
+
+	xorq	%r11,%r8
+	xorq	%r9,%r14
+	addq	%r15,%r12
+	movq	%r10,%r15
+
+	rorq	$14,%r13
+	andq	%r9,%r8
+	andq	%r11,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r8
+
+	addq	%r12,%rax
+	addq	%r12,%r8
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r8
+
+	movq	96(%rsi),%r12
+	movq	%rax,%r13
+	movq	%r8,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rbx,%r15
+	movq	%r12,96(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r15
+
+	rorq	$4,%r13
+	addq	%rdx,%r12
+	xorq	%r8,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rax,%r15
+	movq	%r9,%rdx
+
+	rorq	$6,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r15
+
+	xorq	%r10,%rdx
+	xorq	%r8,%r14
+	addq	%r15,%r12
+	movq	%r9,%r15
+
+	rorq	$14,%r13
+	andq	%r8,%rdx
+	andq	%r10,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rdx
+
+	addq	%r12,%r11
+	addq	%r12,%rdx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rdx
+
+	movq	104(%rsi),%r12
+	movq	%r11,%r13
+	movq	%rdx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rax,%r15
+	movq	%r12,104(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r15
+
+	rorq	$4,%r13
+	addq	%rcx,%r12
+	xorq	%rdx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r11,%r15
+	movq	%r8,%rcx
+
+	rorq	$6,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r15
+
+	xorq	%r9,%rcx
+	xorq	%rdx,%r14
+	addq	%r15,%r12
+	movq	%r8,%r15
+
+	rorq	$14,%r13
+	andq	%rdx,%rcx
+	andq	%r9,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rcx
+
+	addq	%r12,%r10
+	addq	%r12,%rcx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rcx
+
+	movq	112(%rsi),%r12
+	movq	%r10,%r13
+	movq	%rcx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r11,%r15
+	movq	%r12,112(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r15
+
+	rorq	$4,%r13
+	addq	%rbx,%r12
+	xorq	%rcx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r10,%r15
+	movq	%rdx,%rbx
+
+	rorq	$6,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r15
+
+	xorq	%r8,%rbx
+	xorq	%rcx,%r14
+	addq	%r15,%r12
+	movq	%rdx,%r15
+
+	rorq	$14,%r13
+	andq	%rcx,%rbx
+	andq	%r8,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rbx
+
+	addq	%r12,%r9
+	addq	%r12,%rbx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rbx
+
+	movq	120(%rsi),%r12
+	movq	%r9,%r13
+	movq	%rbx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r10,%r15
+	movq	%r12,120(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r15
+
+	rorq	$4,%r13
+	addq	%rax,%r12
+	xorq	%rbx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r9,%r15
+	movq	%rcx,%rax
+
+	rorq	$6,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r15
+
+	xorq	%rdx,%rax
+	xorq	%rbx,%r14
+	addq	%r15,%r12
+	movq	%rcx,%r15
+
+	rorq	$14,%r13
+	andq	%rbx,%rax
+	andq	%rdx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rax
+
+	addq	%r12,%r8
+	addq	%r12,%rax
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rax
+
+	jmp	L$rounds_16_xx
+.p2align	4
+L$rounds_16_xx:
+	movq	8(%rsp),%r13
+	movq	112(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	72(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	0(%rsp),%r12
+	movq	%r8,%r13
+	addq	%r14,%r12
+	movq	%rax,%r14
+	rorq	$23,%r13
+	movq	%r9,%r15
+	movq	%r12,0(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r15
+
+	rorq	$4,%r13
+	addq	%r11,%r12
+	xorq	%rax,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r8,%r15
+	movq	%rbx,%r11
+
+	rorq	$6,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r15
+
+	xorq	%rcx,%r11
+	xorq	%rax,%r14
+	addq	%r15,%r12
+	movq	%rbx,%r15
+
+	rorq	$14,%r13
+	andq	%rax,%r11
+	andq	%rcx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r11
+
+	addq	%r12,%rdx
+	addq	%r12,%r11
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r11
+
+	movq	16(%rsp),%r13
+	movq	120(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	80(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	8(%rsp),%r12
+	movq	%rdx,%r13
+	addq	%r14,%r12
+	movq	%r11,%r14
+	rorq	$23,%r13
+	movq	%r8,%r15
+	movq	%r12,8(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r15
+
+	rorq	$4,%r13
+	addq	%r10,%r12
+	xorq	%r11,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rdx,%r15
+	movq	%rax,%r10
+
+	rorq	$6,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r15
+
+	xorq	%rbx,%r10
+	xorq	%r11,%r14
+	addq	%r15,%r12
+	movq	%rax,%r15
+
+	rorq	$14,%r13
+	andq	%r11,%r10
+	andq	%rbx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r10
+
+	addq	%r12,%rcx
+	addq	%r12,%r10
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r10
+
+	movq	24(%rsp),%r13
+	movq	0(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	88(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	16(%rsp),%r12
+	movq	%rcx,%r13
+	addq	%r14,%r12
+	movq	%r10,%r14
+	rorq	$23,%r13
+	movq	%rdx,%r15
+	movq	%r12,16(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r15
+
+	rorq	$4,%r13
+	addq	%r9,%r12
+	xorq	%r10,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rcx,%r15
+	movq	%r11,%r9
+
+	rorq	$6,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r15
+
+	xorq	%rax,%r9
+	xorq	%r10,%r14
+	addq	%r15,%r12
+	movq	%r11,%r15
+
+	rorq	$14,%r13
+	andq	%r10,%r9
+	andq	%rax,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r9
+
+	addq	%r12,%rbx
+	addq	%r12,%r9
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r9
+
+	movq	32(%rsp),%r13
+	movq	8(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	96(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	24(%rsp),%r12
+	movq	%rbx,%r13
+	addq	%r14,%r12
+	movq	%r9,%r14
+	rorq	$23,%r13
+	movq	%rcx,%r15
+	movq	%r12,24(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r15
+
+	rorq	$4,%r13
+	addq	%r8,%r12
+	xorq	%r9,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rbx,%r15
+	movq	%r10,%r8
+
+	rorq	$6,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r15
+
+	xorq	%r11,%r8
+	xorq	%r9,%r14
+	addq	%r15,%r12
+	movq	%r10,%r15
+
+	rorq	$14,%r13
+	andq	%r9,%r8
+	andq	%r11,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r8
+
+	addq	%r12,%rax
+	addq	%r12,%r8
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r8
+
+	movq	40(%rsp),%r13
+	movq	16(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	104(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	32(%rsp),%r12
+	movq	%rax,%r13
+	addq	%r14,%r12
+	movq	%r8,%r14
+	rorq	$23,%r13
+	movq	%rbx,%r15
+	movq	%r12,32(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r15
+
+	rorq	$4,%r13
+	addq	%rdx,%r12
+	xorq	%r8,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rax,%r15
+	movq	%r9,%rdx
+
+	rorq	$6,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r15
+
+	xorq	%r10,%rdx
+	xorq	%r8,%r14
+	addq	%r15,%r12
+	movq	%r9,%r15
+
+	rorq	$14,%r13
+	andq	%r8,%rdx
+	andq	%r10,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rdx
+
+	addq	%r12,%r11
+	addq	%r12,%rdx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rdx
+
+	movq	48(%rsp),%r13
+	movq	24(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	112(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	40(%rsp),%r12
+	movq	%r11,%r13
+	addq	%r14,%r12
+	movq	%rdx,%r14
+	rorq	$23,%r13
+	movq	%rax,%r15
+	movq	%r12,40(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r15
+
+	rorq	$4,%r13
+	addq	%rcx,%r12
+	xorq	%rdx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r11,%r15
+	movq	%r8,%rcx
+
+	rorq	$6,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r15
+
+	xorq	%r9,%rcx
+	xorq	%rdx,%r14
+	addq	%r15,%r12
+	movq	%r8,%r15
+
+	rorq	$14,%r13
+	andq	%rdx,%rcx
+	andq	%r9,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rcx
+
+	addq	%r12,%r10
+	addq	%r12,%rcx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rcx
+
+	movq	56(%rsp),%r13
+	movq	32(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	120(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	48(%rsp),%r12
+	movq	%r10,%r13
+	addq	%r14,%r12
+	movq	%rcx,%r14
+	rorq	$23,%r13
+	movq	%r11,%r15
+	movq	%r12,48(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r15
+
+	rorq	$4,%r13
+	addq	%rbx,%r12
+	xorq	%rcx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r10,%r15
+	movq	%rdx,%rbx
+
+	rorq	$6,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r15
+
+	xorq	%r8,%rbx
+	xorq	%rcx,%r14
+	addq	%r15,%r12
+	movq	%rdx,%r15
+
+	rorq	$14,%r13
+	andq	%rcx,%rbx
+	andq	%r8,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rbx
+
+	addq	%r12,%r9
+	addq	%r12,%rbx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rbx
+
+	movq	64(%rsp),%r13
+	movq	40(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	0(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	56(%rsp),%r12
+	movq	%r9,%r13
+	addq	%r14,%r12
+	movq	%rbx,%r14
+	rorq	$23,%r13
+	movq	%r10,%r15
+	movq	%r12,56(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r15
+
+	rorq	$4,%r13
+	addq	%rax,%r12
+	xorq	%rbx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r9,%r15
+	movq	%rcx,%rax
+
+	rorq	$6,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r15
+
+	xorq	%rdx,%rax
+	xorq	%rbx,%r14
+	addq	%r15,%r12
+	movq	%rcx,%r15
+
+	rorq	$14,%r13
+	andq	%rbx,%rax
+	andq	%rdx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rax
+
+	addq	%r12,%r8
+	addq	%r12,%rax
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rax
+
+	movq	72(%rsp),%r13
+	movq	48(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	8(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	64(%rsp),%r12
+	movq	%r8,%r13
+	addq	%r14,%r12
+	movq	%rax,%r14
+	rorq	$23,%r13
+	movq	%r9,%r15
+	movq	%r12,64(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r15
+
+	rorq	$4,%r13
+	addq	%r11,%r12
+	xorq	%rax,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r8,%r15
+	movq	%rbx,%r11
+
+	rorq	$6,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r15
+
+	xorq	%rcx,%r11
+	xorq	%rax,%r14
+	addq	%r15,%r12
+	movq	%rbx,%r15
+
+	rorq	$14,%r13
+	andq	%rax,%r11
+	andq	%rcx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r11
+
+	addq	%r12,%rdx
+	addq	%r12,%r11
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r11
+
+	movq	80(%rsp),%r13
+	movq	56(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	16(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	72(%rsp),%r12
+	movq	%rdx,%r13
+	addq	%r14,%r12
+	movq	%r11,%r14
+	rorq	$23,%r13
+	movq	%r8,%r15
+	movq	%r12,72(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r15
+
+	rorq	$4,%r13
+	addq	%r10,%r12
+	xorq	%r11,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rdx,%r15
+	movq	%rax,%r10
+
+	rorq	$6,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r15
+
+	xorq	%rbx,%r10
+	xorq	%r11,%r14
+	addq	%r15,%r12
+	movq	%rax,%r15
+
+	rorq	$14,%r13
+	andq	%r11,%r10
+	andq	%rbx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r10
+
+	addq	%r12,%rcx
+	addq	%r12,%r10
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r10
+
+	movq	88(%rsp),%r13
+	movq	64(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	24(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	80(%rsp),%r12
+	movq	%rcx,%r13
+	addq	%r14,%r12
+	movq	%r10,%r14
+	rorq	$23,%r13
+	movq	%rdx,%r15
+	movq	%r12,80(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r15
+
+	rorq	$4,%r13
+	addq	%r9,%r12
+	xorq	%r10,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rcx,%r15
+	movq	%r11,%r9
+
+	rorq	$6,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r15
+
+	xorq	%rax,%r9
+	xorq	%r10,%r14
+	addq	%r15,%r12
+	movq	%r11,%r15
+
+	rorq	$14,%r13
+	andq	%r10,%r9
+	andq	%rax,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r9
+
+	addq	%r12,%rbx
+	addq	%r12,%r9
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r9
+
+	movq	96(%rsp),%r13
+	movq	72(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	32(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	88(%rsp),%r12
+	movq	%rbx,%r13
+	addq	%r14,%r12
+	movq	%r9,%r14
+	rorq	$23,%r13
+	movq	%rcx,%r15
+	movq	%r12,88(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r15
+
+	rorq	$4,%r13
+	addq	%r8,%r12
+	xorq	%r9,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rbx,%r15
+	movq	%r10,%r8
+
+	rorq	$6,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r15
+
+	xorq	%r11,%r8
+	xorq	%r9,%r14
+	addq	%r15,%r12
+	movq	%r10,%r15
+
+	rorq	$14,%r13
+	andq	%r9,%r8
+	andq	%r11,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r8
+
+	addq	%r12,%rax
+	addq	%r12,%r8
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r8
+
+	movq	104(%rsp),%r13
+	movq	80(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	40(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	96(%rsp),%r12
+	movq	%rax,%r13
+	addq	%r14,%r12
+	movq	%r8,%r14
+	rorq	$23,%r13
+	movq	%rbx,%r15
+	movq	%r12,96(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r15
+
+	rorq	$4,%r13
+	addq	%rdx,%r12
+	xorq	%r8,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rax,%r15
+	movq	%r9,%rdx
+
+	rorq	$6,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r15
+
+	xorq	%r10,%rdx
+	xorq	%r8,%r14
+	addq	%r15,%r12
+	movq	%r9,%r15
+
+	rorq	$14,%r13
+	andq	%r8,%rdx
+	andq	%r10,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rdx
+
+	addq	%r12,%r11
+	addq	%r12,%rdx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rdx
+
+	movq	112(%rsp),%r13
+	movq	88(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	48(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	104(%rsp),%r12
+	movq	%r11,%r13
+	addq	%r14,%r12
+	movq	%rdx,%r14
+	rorq	$23,%r13
+	movq	%rax,%r15
+	movq	%r12,104(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r15
+
+	rorq	$4,%r13
+	addq	%rcx,%r12
+	xorq	%rdx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r11,%r15
+	movq	%r8,%rcx
+
+	rorq	$6,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r15
+
+	xorq	%r9,%rcx
+	xorq	%rdx,%r14
+	addq	%r15,%r12
+	movq	%r8,%r15
+
+	rorq	$14,%r13
+	andq	%rdx,%rcx
+	andq	%r9,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rcx
+
+	addq	%r12,%r10
+	addq	%r12,%rcx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rcx
+
+	movq	120(%rsp),%r13
+	movq	96(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	56(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	112(%rsp),%r12
+	movq	%r10,%r13
+	addq	%r14,%r12
+	movq	%rcx,%r14
+	rorq	$23,%r13
+	movq	%r11,%r15
+	movq	%r12,112(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r15
+
+	rorq	$4,%r13
+	addq	%rbx,%r12
+	xorq	%rcx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r10,%r15
+	movq	%rdx,%rbx
+
+	rorq	$6,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r15
+
+	xorq	%r8,%rbx
+	xorq	%rcx,%r14
+	addq	%r15,%r12
+	movq	%rdx,%r15
+
+	rorq	$14,%r13
+	andq	%rcx,%rbx
+	andq	%r8,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rbx
+
+	addq	%r12,%r9
+	addq	%r12,%rbx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rbx
+
+	movq	0(%rsp),%r13
+	movq	104(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	64(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	120(%rsp),%r12
+	movq	%r9,%r13
+	addq	%r14,%r12
+	movq	%rbx,%r14
+	rorq	$23,%r13
+	movq	%r10,%r15
+	movq	%r12,120(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r15
+
+	rorq	$4,%r13
+	addq	%rax,%r12
+	xorq	%rbx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r9,%r15
+	movq	%rcx,%rax
+
+	rorq	$6,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r15
+
+	xorq	%rdx,%rax
+	xorq	%rbx,%r14
+	addq	%r15,%r12
+	movq	%rcx,%r15
+
+	rorq	$14,%r13
+	andq	%rbx,%rax
+	andq	%rdx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rax
+
+	addq	%r12,%r8
+	addq	%r12,%rax
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rax
+
+	cmpq	$80,%rdi
+	jb	L$rounds_16_xx
+
+	movq	128+0(%rsp),%rdi
+	leaq	128(%rsi),%rsi
+
+	addq	0(%rdi),%rax
+	addq	8(%rdi),%rbx
+	addq	16(%rdi),%rcx
+	addq	24(%rdi),%rdx
+	addq	32(%rdi),%r8
+	addq	40(%rdi),%r9
+	addq	48(%rdi),%r10
+	addq	56(%rdi),%r11
+
+	cmpq	128+16(%rsp),%rsi
+
+	movq	%rax,0(%rdi)
+	movq	%rbx,8(%rdi)
+	movq	%rcx,16(%rdi)
+	movq	%rdx,24(%rdi)
+	movq	%r8,32(%rdi)
+	movq	%r9,40(%rdi)
+	movq	%r10,48(%rdi)
+	movq	%r11,56(%rdi)
+	jb	L$loop
+
+	movq	128+24(%rsp),%rsi
+	movq	(%rsi),%r15
+	movq	8(%rsi),%r14
+	movq	16(%rsi),%r13
+	movq	24(%rsi),%r12
+	movq	32(%rsi),%rbp
+	movq	40(%rsi),%rbx
+	leaq	48(%rsi),%rsp
+L$epilogue:
+	retq
+
+.p2align	6
+
+K512:
+.quad	0x428a2f98d728ae22,0x7137449123ef65cd
+.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+.quad	0x3956c25bf348b538,0x59f111f1b605d019
+.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
+.quad	0xd807aa98a3030242,0x12835b0145706fbe
+.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
+.quad	0x9bdc06a725c71235,0xc19bf174cf692694
+.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
+.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
+.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+.quad	0x983e5152ee66dfab,0xa831c66d2db43210
+.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
+.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
+.quad	0x06ca6351e003826f,0x142929670a0e6e70
+.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
+.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
+.quad	0x81c2c92e47edaee6,0x92722c851482353b
+.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
+.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
+.quad	0xd192e819d6ef5218,0xd69906245565a910
+.quad	0xf40e35855771202a,0x106aa07032bbd1b8
+.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
+.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
+.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
+.quad	0x90befffa23631e28,0xa4506cebde82bde9
+.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
+.quad	0xca273eceea26619c,0xd186b8c721c0c207
+.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
+.quad	0x113f9804bef90dae,0x1b710b35131c471b
+.quad	0x28db77f523047d84,0x32caab7b40c72493
+.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
diff --git a/ext/libressl/crypto/sha/sha512-masm-x86_64.S b/ext/libressl/crypto/sha/sha512-masm-x86_64.S
new file mode 100644
index 0000000..4a2b9af
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha512-masm-x86_64.S
@@ -0,0 +1,1888 @@
+; 1 "crypto/sha/sha512-masm-x86_64.S.tmp"
+; 1 "<built-in>" 1
+; 1 "<built-in>" 3
+; 340 "<built-in>" 3
+; 1 "<command line>" 1
+; 1 "<built-in>" 2
+; 1 "crypto/sha/sha512-masm-x86_64.S.tmp" 2
+OPTION	DOTNAME
+
+; 1 "./crypto/x86_arch.h" 1
+
+
+; 16 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+; 40 "./crypto/x86_arch.h"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+; 3 "crypto/sha/sha512-masm-x86_64.S.tmp" 2
+.text$	SEGMENT ALIGN(64) 'CODE'
+
+PUBLIC	sha512_block_data_order
+
+ALIGN	16
+sha512_block_data_order	PROC PUBLIC
+	mov	QWORD PTR[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD PTR[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_sha512_block_data_order::
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+
+
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	mov	r11,rsp
+	shl	rdx,4
+	sub	rsp,16*8+4*8
+	lea	rdx,QWORD PTR[rdx*8+rsi]
+	and	rsp,-64
+	mov	QWORD PTR[((128+0))+rsp],rdi
+	mov	QWORD PTR[((128+8))+rsp],rsi
+	mov	QWORD PTR[((128+16))+rsp],rdx
+	mov	QWORD PTR[((128+24))+rsp],r11
+$L$prologue::
+
+	lea	rbp,QWORD PTR[K512]
+
+	mov	rax,QWORD PTR[rdi]
+	mov	rbx,QWORD PTR[8+rdi]
+	mov	rcx,QWORD PTR[16+rdi]
+	mov	rdx,QWORD PTR[24+rdi]
+	mov	r8,QWORD PTR[32+rdi]
+	mov	r9,QWORD PTR[40+rdi]
+	mov	r10,QWORD PTR[48+rdi]
+	mov	r11,QWORD PTR[56+rdi]
+	jmp	$L$loop
+
+ALIGN	16
+$L$loop::
+	xor	rdi,rdi
+	mov	r12,QWORD PTR[rsi]
+	mov	r13,r8
+	mov	r14,rax
+	bswap	r12
+	ror	r13,23
+	mov	r15,r9
+	mov	QWORD PTR[rsp],r12
+
+	ror	r14,5
+	xor	r13,r8
+	xor	r15,r10
+
+	ror	r13,4
+	add	r12,r11
+	xor	r14,rax
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,r8
+	mov	r11,rbx
+
+	ror	r14,6
+	xor	r13,r8
+	xor	r15,r10
+
+	xor	r11,rcx
+	xor	r14,rax
+	add	r12,r15
+	mov	r15,rbx
+
+	ror	r13,14
+	and	r11,rax
+	and	r15,rcx
+
+	ror	r14,28
+	add	r12,r13
+	add	r11,r15
+
+	add	rdx,r12
+	add	r11,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r11,r14
+
+	mov	r12,QWORD PTR[8+rsi]
+	mov	r13,rdx
+	mov	r14,r11
+	bswap	r12
+	ror	r13,23
+	mov	r15,r8
+	mov	QWORD PTR[8+rsp],r12
+
+	ror	r14,5
+	xor	r13,rdx
+	xor	r15,r9
+
+	ror	r13,4
+	add	r12,r10
+	xor	r14,r11
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,rdx
+	mov	r10,rax
+
+	ror	r14,6
+	xor	r13,rdx
+	xor	r15,r9
+
+	xor	r10,rbx
+	xor	r14,r11
+	add	r12,r15
+	mov	r15,rax
+
+	ror	r13,14
+	and	r10,r11
+	and	r15,rbx
+
+	ror	r14,28
+	add	r12,r13
+	add	r10,r15
+
+	add	rcx,r12
+	add	r10,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r10,r14
+
+	mov	r12,QWORD PTR[16+rsi]
+	mov	r13,rcx
+	mov	r14,r10
+	bswap	r12
+	ror	r13,23
+	mov	r15,rdx
+	mov	QWORD PTR[16+rsp],r12
+
+	ror	r14,5
+	xor	r13,rcx
+	xor	r15,r8
+
+	ror	r13,4
+	add	r12,r9
+	xor	r14,r10
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,rcx
+	mov	r9,r11
+
+	ror	r14,6
+	xor	r13,rcx
+	xor	r15,r8
+
+	xor	r9,rax
+	xor	r14,r10
+	add	r12,r15
+	mov	r15,r11
+
+	ror	r13,14
+	and	r9,r10
+	and	r15,rax
+
+	ror	r14,28
+	add	r12,r13
+	add	r9,r15
+
+	add	rbx,r12
+	add	r9,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r9,r14
+
+	mov	r12,QWORD PTR[24+rsi]
+	mov	r13,rbx
+	mov	r14,r9
+	bswap	r12
+	ror	r13,23
+	mov	r15,rcx
+	mov	QWORD PTR[24+rsp],r12
+
+	ror	r14,5
+	xor	r13,rbx
+	xor	r15,rdx
+
+	ror	r13,4
+	add	r12,r8
+	xor	r14,r9
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,rbx
+	mov	r8,r10
+
+	ror	r14,6
+	xor	r13,rbx
+	xor	r15,rdx
+
+	xor	r8,r11
+	xor	r14,r9
+	add	r12,r15
+	mov	r15,r10
+
+	ror	r13,14
+	and	r8,r9
+	and	r15,r11
+
+	ror	r14,28
+	add	r12,r13
+	add	r8,r15
+
+	add	rax,r12
+	add	r8,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r8,r14
+
+	mov	r12,QWORD PTR[32+rsi]
+	mov	r13,rax
+	mov	r14,r8
+	bswap	r12
+	ror	r13,23
+	mov	r15,rbx
+	mov	QWORD PTR[32+rsp],r12
+
+	ror	r14,5
+	xor	r13,rax
+	xor	r15,rcx
+
+	ror	r13,4
+	add	r12,rdx
+	xor	r14,r8
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,rax
+	mov	rdx,r9
+
+	ror	r14,6
+	xor	r13,rax
+	xor	r15,rcx
+
+	xor	rdx,r10
+	xor	r14,r8
+	add	r12,r15
+	mov	r15,r9
+
+	ror	r13,14
+	and	rdx,r8
+	and	r15,r10
+
+	ror	r14,28
+	add	r12,r13
+	add	rdx,r15
+
+	add	r11,r12
+	add	rdx,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	rdx,r14
+
+	mov	r12,QWORD PTR[40+rsi]
+	mov	r13,r11
+	mov	r14,rdx
+	bswap	r12
+	ror	r13,23
+	mov	r15,rax
+	mov	QWORD PTR[40+rsp],r12
+
+	ror	r14,5
+	xor	r13,r11
+	xor	r15,rbx
+
+	ror	r13,4
+	add	r12,rcx
+	xor	r14,rdx
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,r11
+	mov	rcx,r8
+
+	ror	r14,6
+	xor	r13,r11
+	xor	r15,rbx
+
+	xor	rcx,r9
+	xor	r14,rdx
+	add	r12,r15
+	mov	r15,r8
+
+	ror	r13,14
+	and	rcx,rdx
+	and	r15,r9
+
+	ror	r14,28
+	add	r12,r13
+	add	rcx,r15
+
+	add	r10,r12
+	add	rcx,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	rcx,r14
+
+	mov	r12,QWORD PTR[48+rsi]
+	mov	r13,r10
+	mov	r14,rcx
+	bswap	r12
+	ror	r13,23
+	mov	r15,r11
+	mov	QWORD PTR[48+rsp],r12
+
+	ror	r14,5
+	xor	r13,r10
+	xor	r15,rax
+
+	ror	r13,4
+	add	r12,rbx
+	xor	r14,rcx
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,r10
+	mov	rbx,rdx
+
+	ror	r14,6
+	xor	r13,r10
+	xor	r15,rax
+
+	xor	rbx,r8
+	xor	r14,rcx
+	add	r12,r15
+	mov	r15,rdx
+
+	ror	r13,14
+	and	rbx,rcx
+	and	r15,r8
+
+	ror	r14,28
+	add	r12,r13
+	add	rbx,r15
+
+	add	r9,r12
+	add	rbx,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	rbx,r14
+
+	mov	r12,QWORD PTR[56+rsi]
+	mov	r13,r9
+	mov	r14,rbx
+	bswap	r12
+	ror	r13,23
+	mov	r15,r10
+	mov	QWORD PTR[56+rsp],r12
+
+	ror	r14,5
+	xor	r13,r9
+	xor	r15,r11
+
+	ror	r13,4
+	add	r12,rax
+	xor	r14,rbx
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,r9
+	mov	rax,rcx
+
+	ror	r14,6
+	xor	r13,r9
+	xor	r15,r11
+
+	xor	rax,rdx
+	xor	r14,rbx
+	add	r12,r15
+	mov	r15,rcx
+
+	ror	r13,14
+	and	rax,rbx
+	and	r15,rdx
+
+	ror	r14,28
+	add	r12,r13
+	add	rax,r15
+
+	add	r8,r12
+	add	rax,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	rax,r14
+
+	mov	r12,QWORD PTR[64+rsi]
+	mov	r13,r8
+	mov	r14,rax
+	bswap	r12
+	ror	r13,23
+	mov	r15,r9
+	mov	QWORD PTR[64+rsp],r12
+
+	ror	r14,5
+	xor	r13,r8
+	xor	r15,r10
+
+	ror	r13,4
+	add	r12,r11
+	xor	r14,rax
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,r8
+	mov	r11,rbx
+
+	ror	r14,6
+	xor	r13,r8
+	xor	r15,r10
+
+	xor	r11,rcx
+	xor	r14,rax
+	add	r12,r15
+	mov	r15,rbx
+
+	ror	r13,14
+	and	r11,rax
+	and	r15,rcx
+
+	ror	r14,28
+	add	r12,r13
+	add	r11,r15
+
+	add	rdx,r12
+	add	r11,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r11,r14
+
+	mov	r12,QWORD PTR[72+rsi]
+	mov	r13,rdx
+	mov	r14,r11
+	bswap	r12
+	ror	r13,23
+	mov	r15,r8
+	mov	QWORD PTR[72+rsp],r12
+
+	ror	r14,5
+	xor	r13,rdx
+	xor	r15,r9
+
+	ror	r13,4
+	add	r12,r10
+	xor	r14,r11
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,rdx
+	mov	r10,rax
+
+	ror	r14,6
+	xor	r13,rdx
+	xor	r15,r9
+
+	xor	r10,rbx
+	xor	r14,r11
+	add	r12,r15
+	mov	r15,rax
+
+	ror	r13,14
+	and	r10,r11
+	and	r15,rbx
+
+	ror	r14,28
+	add	r12,r13
+	add	r10,r15
+
+	add	rcx,r12
+	add	r10,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r10,r14
+
+	mov	r12,QWORD PTR[80+rsi]
+	mov	r13,rcx
+	mov	r14,r10
+	bswap	r12
+	ror	r13,23
+	mov	r15,rdx
+	mov	QWORD PTR[80+rsp],r12
+
+	ror	r14,5
+	xor	r13,rcx
+	xor	r15,r8
+
+	ror	r13,4
+	add	r12,r9
+	xor	r14,r10
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,rcx
+	mov	r9,r11
+
+	ror	r14,6
+	xor	r13,rcx
+	xor	r15,r8
+
+	xor	r9,rax
+	xor	r14,r10
+	add	r12,r15
+	mov	r15,r11
+
+	ror	r13,14
+	and	r9,r10
+	and	r15,rax
+
+	ror	r14,28
+	add	r12,r13
+	add	r9,r15
+
+	add	rbx,r12
+	add	r9,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r9,r14
+
+	mov	r12,QWORD PTR[88+rsi]
+	mov	r13,rbx
+	mov	r14,r9
+	bswap	r12
+	ror	r13,23
+	mov	r15,rcx
+	mov	QWORD PTR[88+rsp],r12
+
+	ror	r14,5
+	xor	r13,rbx
+	xor	r15,rdx
+
+	ror	r13,4
+	add	r12,r8
+	xor	r14,r9
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,rbx
+	mov	r8,r10
+
+	ror	r14,6
+	xor	r13,rbx
+	xor	r15,rdx
+
+	xor	r8,r11
+	xor	r14,r9
+	add	r12,r15
+	mov	r15,r10
+
+	ror	r13,14
+	and	r8,r9
+	and	r15,r11
+
+	ror	r14,28
+	add	r12,r13
+	add	r8,r15
+
+	add	rax,r12
+	add	r8,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r8,r14
+
+	mov	r12,QWORD PTR[96+rsi]
+	mov	r13,rax
+	mov	r14,r8
+	bswap	r12
+	ror	r13,23
+	mov	r15,rbx
+	mov	QWORD PTR[96+rsp],r12
+
+	ror	r14,5
+	xor	r13,rax
+	xor	r15,rcx
+
+	ror	r13,4
+	add	r12,rdx
+	xor	r14,r8
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,rax
+	mov	rdx,r9
+
+	ror	r14,6
+	xor	r13,rax
+	xor	r15,rcx
+
+	xor	rdx,r10
+	xor	r14,r8
+	add	r12,r15
+	mov	r15,r9
+
+	ror	r13,14
+	and	rdx,r8
+	and	r15,r10
+
+	ror	r14,28
+	add	r12,r13
+	add	rdx,r15
+
+	add	r11,r12
+	add	rdx,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	rdx,r14
+
+	mov	r12,QWORD PTR[104+rsi]
+	mov	r13,r11
+	mov	r14,rdx
+	bswap	r12
+	ror	r13,23
+	mov	r15,rax
+	mov	QWORD PTR[104+rsp],r12
+
+	ror	r14,5
+	xor	r13,r11
+	xor	r15,rbx
+
+	ror	r13,4
+	add	r12,rcx
+	xor	r14,rdx
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,r11
+	mov	rcx,r8
+
+	ror	r14,6
+	xor	r13,r11
+	xor	r15,rbx
+
+	xor	rcx,r9
+	xor	r14,rdx
+	add	r12,r15
+	mov	r15,r8
+
+	ror	r13,14
+	and	rcx,rdx
+	and	r15,r9
+
+	ror	r14,28
+	add	r12,r13
+	add	rcx,r15
+
+	add	r10,r12
+	add	rcx,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	rcx,r14
+
+	mov	r12,QWORD PTR[112+rsi]
+	mov	r13,r10
+	mov	r14,rcx
+	bswap	r12
+	ror	r13,23
+	mov	r15,r11
+	mov	QWORD PTR[112+rsp],r12
+
+	ror	r14,5
+	xor	r13,r10
+	xor	r15,rax
+
+	ror	r13,4
+	add	r12,rbx
+	xor	r14,rcx
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,r10
+	mov	rbx,rdx
+
+	ror	r14,6
+	xor	r13,r10
+	xor	r15,rax
+
+	xor	rbx,r8
+	xor	r14,rcx
+	add	r12,r15
+	mov	r15,rdx
+
+	ror	r13,14
+	and	rbx,rcx
+	and	r15,r8
+
+	ror	r14,28
+	add	r12,r13
+	add	rbx,r15
+
+	add	r9,r12
+	add	rbx,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	rbx,r14
+
+	mov	r12,QWORD PTR[120+rsi]
+	mov	r13,r9
+	mov	r14,rbx
+	bswap	r12
+	ror	r13,23
+	mov	r15,r10
+	mov	QWORD PTR[120+rsp],r12
+
+	ror	r14,5
+	xor	r13,r9
+	xor	r15,r11
+
+	ror	r13,4
+	add	r12,rax
+	xor	r14,rbx
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,r9
+	mov	rax,rcx
+
+	ror	r14,6
+	xor	r13,r9
+	xor	r15,r11
+
+	xor	rax,rdx
+	xor	r14,rbx
+	add	r12,r15
+	mov	r15,rcx
+
+	ror	r13,14
+	and	rax,rbx
+	and	r15,rdx
+
+	ror	r14,28
+	add	r12,r13
+	add	rax,r15
+
+	add	r8,r12
+	add	rax,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	rax,r14
+
+	jmp	$L$rounds_16_xx
+ALIGN	16
+$L$rounds_16_xx::
+	mov	r13,QWORD PTR[8+rsp]
+	mov	r14,QWORD PTR[112+rsp]
+	mov	r12,r13
+	mov	r15,r14
+
+	ror	r12,7
+	xor	r12,r13
+	shr	r13,7
+
+	ror	r12,1
+	xor	r13,r12
+	mov	r12,QWORD PTR[72+rsp]
+
+	ror	r15,42
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	add	r12,r13
+	xor	r14,r15
+
+	add	r12,QWORD PTR[rsp]
+	mov	r13,r8
+	add	r12,r14
+	mov	r14,rax
+	ror	r13,23
+	mov	r15,r9
+	mov	QWORD PTR[rsp],r12
+
+	ror	r14,5
+	xor	r13,r8
+	xor	r15,r10
+
+	ror	r13,4
+	add	r12,r11
+	xor	r14,rax
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,r8
+	mov	r11,rbx
+
+	ror	r14,6
+	xor	r13,r8
+	xor	r15,r10
+
+	xor	r11,rcx
+	xor	r14,rax
+	add	r12,r15
+	mov	r15,rbx
+
+	ror	r13,14
+	and	r11,rax
+	and	r15,rcx
+
+	ror	r14,28
+	add	r12,r13
+	add	r11,r15
+
+	add	rdx,r12
+	add	r11,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r11,r14
+
+	mov	r13,QWORD PTR[16+rsp]
+	mov	r14,QWORD PTR[120+rsp]
+	mov	r12,r13
+	mov	r15,r14
+
+	ror	r12,7
+	xor	r12,r13
+	shr	r13,7
+
+	ror	r12,1
+	xor	r13,r12
+	mov	r12,QWORD PTR[80+rsp]
+
+	ror	r15,42
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	add	r12,r13
+	xor	r14,r15
+
+	add	r12,QWORD PTR[8+rsp]
+	mov	r13,rdx
+	add	r12,r14
+	mov	r14,r11
+	ror	r13,23
+	mov	r15,r8
+	mov	QWORD PTR[8+rsp],r12
+
+	ror	r14,5
+	xor	r13,rdx
+	xor	r15,r9
+
+	ror	r13,4
+	add	r12,r10
+	xor	r14,r11
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,rdx
+	mov	r10,rax
+
+	ror	r14,6
+	xor	r13,rdx
+	xor	r15,r9
+
+	xor	r10,rbx
+	xor	r14,r11
+	add	r12,r15
+	mov	r15,rax
+
+	ror	r13,14
+	and	r10,r11
+	and	r15,rbx
+
+	ror	r14,28
+	add	r12,r13
+	add	r10,r15
+
+	add	rcx,r12
+	add	r10,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r10,r14
+
+	mov	r13,QWORD PTR[24+rsp]
+	mov	r14,QWORD PTR[rsp]
+	mov	r12,r13
+	mov	r15,r14
+
+	ror	r12,7
+	xor	r12,r13
+	shr	r13,7
+
+	ror	r12,1
+	xor	r13,r12
+	mov	r12,QWORD PTR[88+rsp]
+
+	ror	r15,42
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	add	r12,r13
+	xor	r14,r15
+
+	add	r12,QWORD PTR[16+rsp]
+	mov	r13,rcx
+	add	r12,r14
+	mov	r14,r10
+	ror	r13,23
+	mov	r15,rdx
+	mov	QWORD PTR[16+rsp],r12
+
+	ror	r14,5
+	xor	r13,rcx
+	xor	r15,r8
+
+	ror	r13,4
+	add	r12,r9
+	xor	r14,r10
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,rcx
+	mov	r9,r11
+
+	ror	r14,6
+	xor	r13,rcx
+	xor	r15,r8
+
+	xor	r9,rax
+	xor	r14,r10
+	add	r12,r15
+	mov	r15,r11
+
+	ror	r13,14
+	and	r9,r10
+	and	r15,rax
+
+	ror	r14,28
+	add	r12,r13
+	add	r9,r15
+
+	add	rbx,r12
+	add	r9,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r9,r14
+
+	mov	r13,QWORD PTR[32+rsp]
+	mov	r14,QWORD PTR[8+rsp]
+	mov	r12,r13
+	mov	r15,r14
+
+	ror	r12,7
+	xor	r12,r13
+	shr	r13,7
+
+	ror	r12,1
+	xor	r13,r12
+	mov	r12,QWORD PTR[96+rsp]
+
+	ror	r15,42
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	add	r12,r13
+	xor	r14,r15
+
+	add	r12,QWORD PTR[24+rsp]
+	mov	r13,rbx
+	add	r12,r14
+	mov	r14,r9
+	ror	r13,23
+	mov	r15,rcx
+	mov	QWORD PTR[24+rsp],r12
+
+	ror	r14,5
+	xor	r13,rbx
+	xor	r15,rdx
+
+	ror	r13,4
+	add	r12,r8
+	xor	r14,r9
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,rbx
+	mov	r8,r10
+
+	ror	r14,6
+	xor	r13,rbx
+	xor	r15,rdx
+
+	xor	r8,r11
+	xor	r14,r9
+	add	r12,r15
+	mov	r15,r10
+
+	ror	r13,14
+	and	r8,r9
+	and	r15,r11
+
+	ror	r14,28
+	add	r12,r13
+	add	r8,r15
+
+	add	rax,r12
+	add	r8,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r8,r14
+
+	mov	r13,QWORD PTR[40+rsp]
+	mov	r14,QWORD PTR[16+rsp]
+	mov	r12,r13
+	mov	r15,r14
+
+	ror	r12,7
+	xor	r12,r13
+	shr	r13,7
+
+	ror	r12,1
+	xor	r13,r12
+	mov	r12,QWORD PTR[104+rsp]
+
+	ror	r15,42
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	add	r12,r13
+	xor	r14,r15
+
+	add	r12,QWORD PTR[32+rsp]
+	mov	r13,rax
+	add	r12,r14
+	mov	r14,r8
+	ror	r13,23
+	mov	r15,rbx
+	mov	QWORD PTR[32+rsp],r12
+
+	ror	r14,5
+	xor	r13,rax
+	xor	r15,rcx
+
+	ror	r13,4
+	add	r12,rdx
+	xor	r14,r8
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,rax
+	mov	rdx,r9
+
+	ror	r14,6
+	xor	r13,rax
+	xor	r15,rcx
+
+	xor	rdx,r10
+	xor	r14,r8
+	add	r12,r15
+	mov	r15,r9
+
+	ror	r13,14
+	and	rdx,r8
+	and	r15,r10
+
+	ror	r14,28
+	add	r12,r13
+	add	rdx,r15
+
+	add	r11,r12
+	add	rdx,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	rdx,r14
+
+	mov	r13,QWORD PTR[48+rsp]
+	mov	r14,QWORD PTR[24+rsp]
+	mov	r12,r13
+	mov	r15,r14
+
+	ror	r12,7
+	xor	r12,r13
+	shr	r13,7
+
+	ror	r12,1
+	xor	r13,r12
+	mov	r12,QWORD PTR[112+rsp]
+
+	ror	r15,42
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	add	r12,r13
+	xor	r14,r15
+
+	add	r12,QWORD PTR[40+rsp]
+	mov	r13,r11
+	add	r12,r14
+	mov	r14,rdx
+	ror	r13,23
+	mov	r15,rax
+	mov	QWORD PTR[40+rsp],r12
+
+	ror	r14,5
+	xor	r13,r11
+	xor	r15,rbx
+
+	ror	r13,4
+	add	r12,rcx
+	xor	r14,rdx
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,r11
+	mov	rcx,r8
+
+	ror	r14,6
+	xor	r13,r11
+	xor	r15,rbx
+
+	xor	rcx,r9
+	xor	r14,rdx
+	add	r12,r15
+	mov	r15,r8
+
+	ror	r13,14
+	and	rcx,rdx
+	and	r15,r9
+
+	ror	r14,28
+	add	r12,r13
+	add	rcx,r15
+
+	add	r10,r12
+	add	rcx,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	rcx,r14
+
+	mov	r13,QWORD PTR[56+rsp]
+	mov	r14,QWORD PTR[32+rsp]
+	mov	r12,r13
+	mov	r15,r14
+
+	ror	r12,7
+	xor	r12,r13
+	shr	r13,7
+
+	ror	r12,1
+	xor	r13,r12
+	mov	r12,QWORD PTR[120+rsp]
+
+	ror	r15,42
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	add	r12,r13
+	xor	r14,r15
+
+	add	r12,QWORD PTR[48+rsp]
+	mov	r13,r10
+	add	r12,r14
+	mov	r14,rcx
+	ror	r13,23
+	mov	r15,r11
+	mov	QWORD PTR[48+rsp],r12
+
+	ror	r14,5
+	xor	r13,r10
+	xor	r15,rax
+
+	ror	r13,4
+	add	r12,rbx
+	xor	r14,rcx
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,r10
+	mov	rbx,rdx
+
+	ror	r14,6
+	xor	r13,r10
+	xor	r15,rax
+
+	xor	rbx,r8
+	xor	r14,rcx
+	add	r12,r15
+	mov	r15,rdx
+
+	ror	r13,14
+	and	rbx,rcx
+	and	r15,r8
+
+	ror	r14,28
+	add	r12,r13
+	add	rbx,r15
+
+	add	r9,r12
+	add	rbx,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	rbx,r14
+
+	mov	r13,QWORD PTR[64+rsp]
+	mov	r14,QWORD PTR[40+rsp]
+	mov	r12,r13
+	mov	r15,r14
+
+	ror	r12,7
+	xor	r12,r13
+	shr	r13,7
+
+	ror	r12,1
+	xor	r13,r12
+	mov	r12,QWORD PTR[rsp]
+
+	ror	r15,42
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	add	r12,r13
+	xor	r14,r15
+
+	add	r12,QWORD PTR[56+rsp]
+	mov	r13,r9
+	add	r12,r14
+	mov	r14,rbx
+	ror	r13,23
+	mov	r15,r10
+	mov	QWORD PTR[56+rsp],r12
+
+	ror	r14,5
+	xor	r13,r9
+	xor	r15,r11
+
+	ror	r13,4
+	add	r12,rax
+	xor	r14,rbx
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,r9
+	mov	rax,rcx
+
+	ror	r14,6
+	xor	r13,r9
+	xor	r15,r11
+
+	xor	rax,rdx
+	xor	r14,rbx
+	add	r12,r15
+	mov	r15,rcx
+
+	ror	r13,14
+	and	rax,rbx
+	and	r15,rdx
+
+	ror	r14,28
+	add	r12,r13
+	add	rax,r15
+
+	add	r8,r12
+	add	rax,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	rax,r14
+
+	mov	r13,QWORD PTR[72+rsp]
+	mov	r14,QWORD PTR[48+rsp]
+	mov	r12,r13
+	mov	r15,r14
+
+	ror	r12,7
+	xor	r12,r13
+	shr	r13,7
+
+	ror	r12,1
+	xor	r13,r12
+	mov	r12,QWORD PTR[8+rsp]
+
+	ror	r15,42
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	add	r12,r13
+	xor	r14,r15
+
+	add	r12,QWORD PTR[64+rsp]
+	mov	r13,r8
+	add	r12,r14
+	mov	r14,rax
+	ror	r13,23
+	mov	r15,r9
+	mov	QWORD PTR[64+rsp],r12
+
+	ror	r14,5
+	xor	r13,r8
+	xor	r15,r10
+
+	ror	r13,4
+	add	r12,r11
+	xor	r14,rax
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,r8
+	mov	r11,rbx
+
+	ror	r14,6
+	xor	r13,r8
+	xor	r15,r10
+
+	xor	r11,rcx
+	xor	r14,rax
+	add	r12,r15
+	mov	r15,rbx
+
+	ror	r13,14
+	and	r11,rax
+	and	r15,rcx
+
+	ror	r14,28
+	add	r12,r13
+	add	r11,r15
+
+	add	rdx,r12
+	add	r11,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r11,r14
+
+	mov	r13,QWORD PTR[80+rsp]
+	mov	r14,QWORD PTR[56+rsp]
+	mov	r12,r13
+	mov	r15,r14
+
+	ror	r12,7
+	xor	r12,r13
+	shr	r13,7
+
+	ror	r12,1
+	xor	r13,r12
+	mov	r12,QWORD PTR[16+rsp]
+
+	ror	r15,42
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	add	r12,r13
+	xor	r14,r15
+
+	add	r12,QWORD PTR[72+rsp]
+	mov	r13,rdx
+	add	r12,r14
+	mov	r14,r11
+	ror	r13,23
+	mov	r15,r8
+	mov	QWORD PTR[72+rsp],r12
+
+	ror	r14,5
+	xor	r13,rdx
+	xor	r15,r9
+
+	ror	r13,4
+	add	r12,r10
+	xor	r14,r11
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,rdx
+	mov	r10,rax
+
+	ror	r14,6
+	xor	r13,rdx
+	xor	r15,r9
+
+	xor	r10,rbx
+	xor	r14,r11
+	add	r12,r15
+	mov	r15,rax
+
+	ror	r13,14
+	and	r10,r11
+	and	r15,rbx
+
+	ror	r14,28
+	add	r12,r13
+	add	r10,r15
+
+	add	rcx,r12
+	add	r10,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r10,r14
+
+	mov	r13,QWORD PTR[88+rsp]
+	mov	r14,QWORD PTR[64+rsp]
+	mov	r12,r13
+	mov	r15,r14
+
+	ror	r12,7
+	xor	r12,r13
+	shr	r13,7
+
+	ror	r12,1
+	xor	r13,r12
+	mov	r12,QWORD PTR[24+rsp]
+
+	ror	r15,42
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	add	r12,r13
+	xor	r14,r15
+
+	add	r12,QWORD PTR[80+rsp]
+	mov	r13,rcx
+	add	r12,r14
+	mov	r14,r10
+	ror	r13,23
+	mov	r15,rdx
+	mov	QWORD PTR[80+rsp],r12
+
+	ror	r14,5
+	xor	r13,rcx
+	xor	r15,r8
+
+	ror	r13,4
+	add	r12,r9
+	xor	r14,r10
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,rcx
+	mov	r9,r11
+
+	ror	r14,6
+	xor	r13,rcx
+	xor	r15,r8
+
+	xor	r9,rax
+	xor	r14,r10
+	add	r12,r15
+	mov	r15,r11
+
+	ror	r13,14
+	and	r9,r10
+	and	r15,rax
+
+	ror	r14,28
+	add	r12,r13
+	add	r9,r15
+
+	add	rbx,r12
+	add	r9,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r9,r14
+
+	mov	r13,QWORD PTR[96+rsp]
+	mov	r14,QWORD PTR[72+rsp]
+	mov	r12,r13
+	mov	r15,r14
+
+	ror	r12,7
+	xor	r12,r13
+	shr	r13,7
+
+	ror	r12,1
+	xor	r13,r12
+	mov	r12,QWORD PTR[32+rsp]
+
+	ror	r15,42
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	add	r12,r13
+	xor	r14,r15
+
+	add	r12,QWORD PTR[88+rsp]
+	mov	r13,rbx
+	add	r12,r14
+	mov	r14,r9
+	ror	r13,23
+	mov	r15,rcx
+	mov	QWORD PTR[88+rsp],r12
+
+	ror	r14,5
+	xor	r13,rbx
+	xor	r15,rdx
+
+	ror	r13,4
+	add	r12,r8
+	xor	r14,r9
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,rbx
+	mov	r8,r10
+
+	ror	r14,6
+	xor	r13,rbx
+	xor	r15,rdx
+
+	xor	r8,r11
+	xor	r14,r9
+	add	r12,r15
+	mov	r15,r10
+
+	ror	r13,14
+	and	r8,r9
+	and	r15,r11
+
+	ror	r14,28
+	add	r12,r13
+	add	r8,r15
+
+	add	rax,r12
+	add	r8,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	r8,r14
+
+	mov	r13,QWORD PTR[104+rsp]
+	mov	r14,QWORD PTR[80+rsp]
+	mov	r12,r13
+	mov	r15,r14
+
+	ror	r12,7
+	xor	r12,r13
+	shr	r13,7
+
+	ror	r12,1
+	xor	r13,r12
+	mov	r12,QWORD PTR[40+rsp]
+
+	ror	r15,42
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	add	r12,r13
+	xor	r14,r15
+
+	add	r12,QWORD PTR[96+rsp]
+	mov	r13,rax
+	add	r12,r14
+	mov	r14,r8
+	ror	r13,23
+	mov	r15,rbx
+	mov	QWORD PTR[96+rsp],r12
+
+	ror	r14,5
+	xor	r13,rax
+	xor	r15,rcx
+
+	ror	r13,4
+	add	r12,rdx
+	xor	r14,r8
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,rax
+	mov	rdx,r9
+
+	ror	r14,6
+	xor	r13,rax
+	xor	r15,rcx
+
+	xor	rdx,r10
+	xor	r14,r8
+	add	r12,r15
+	mov	r15,r9
+
+	ror	r13,14
+	and	rdx,r8
+	and	r15,r10
+
+	ror	r14,28
+	add	r12,r13
+	add	rdx,r15
+
+	add	r11,r12
+	add	rdx,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	rdx,r14
+
+	mov	r13,QWORD PTR[112+rsp]
+	mov	r14,QWORD PTR[88+rsp]
+	mov	r12,r13
+	mov	r15,r14
+
+	ror	r12,7
+	xor	r12,r13
+	shr	r13,7
+
+	ror	r12,1
+	xor	r13,r12
+	mov	r12,QWORD PTR[48+rsp]
+
+	ror	r15,42
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	add	r12,r13
+	xor	r14,r15
+
+	add	r12,QWORD PTR[104+rsp]
+	mov	r13,r11
+	add	r12,r14
+	mov	r14,rdx
+	ror	r13,23
+	mov	r15,rax
+	mov	QWORD PTR[104+rsp],r12
+
+	ror	r14,5
+	xor	r13,r11
+	xor	r15,rbx
+
+	ror	r13,4
+	add	r12,rcx
+	xor	r14,rdx
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,r11
+	mov	rcx,r8
+
+	ror	r14,6
+	xor	r13,r11
+	xor	r15,rbx
+
+	xor	rcx,r9
+	xor	r14,rdx
+	add	r12,r15
+	mov	r15,r8
+
+	ror	r13,14
+	and	rcx,rdx
+	and	r15,r9
+
+	ror	r14,28
+	add	r12,r13
+	add	rcx,r15
+
+	add	r10,r12
+	add	rcx,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	rcx,r14
+
+	mov	r13,QWORD PTR[120+rsp]
+	mov	r14,QWORD PTR[96+rsp]
+	mov	r12,r13
+	mov	r15,r14
+
+	ror	r12,7
+	xor	r12,r13
+	shr	r13,7
+
+	ror	r12,1
+	xor	r13,r12
+	mov	r12,QWORD PTR[56+rsp]
+
+	ror	r15,42
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	add	r12,r13
+	xor	r14,r15
+
+	add	r12,QWORD PTR[112+rsp]
+	mov	r13,r10
+	add	r12,r14
+	mov	r14,rcx
+	ror	r13,23
+	mov	r15,r11
+	mov	QWORD PTR[112+rsp],r12
+
+	ror	r14,5
+	xor	r13,r10
+	xor	r15,rax
+
+	ror	r13,4
+	add	r12,rbx
+	xor	r14,rcx
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,r10
+	mov	rbx,rdx
+
+	ror	r14,6
+	xor	r13,r10
+	xor	r15,rax
+
+	xor	rbx,r8
+	xor	r14,rcx
+	add	r12,r15
+	mov	r15,rdx
+
+	ror	r13,14
+	and	rbx,rcx
+	and	r15,r8
+
+	ror	r14,28
+	add	r12,r13
+	add	rbx,r15
+
+	add	r9,r12
+	add	rbx,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	rbx,r14
+
+	mov	r13,QWORD PTR[rsp]
+	mov	r14,QWORD PTR[104+rsp]
+	mov	r12,r13
+	mov	r15,r14
+
+	ror	r12,7
+	xor	r12,r13
+	shr	r13,7
+
+	ror	r12,1
+	xor	r13,r12
+	mov	r12,QWORD PTR[64+rsp]
+
+	ror	r15,42
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	add	r12,r13
+	xor	r14,r15
+
+	add	r12,QWORD PTR[120+rsp]
+	mov	r13,r9
+	add	r12,r14
+	mov	r14,rbx
+	ror	r13,23
+	mov	r15,r10
+	mov	QWORD PTR[120+rsp],r12
+
+	ror	r14,5
+	xor	r13,r9
+	xor	r15,r11
+
+	ror	r13,4
+	add	r12,rax
+	xor	r14,rbx
+
+	add	r12,QWORD PTR[rdi*8+rbp]
+	and	r15,r9
+	mov	rax,rcx
+
+	ror	r14,6
+	xor	r13,r9
+	xor	r15,r11
+
+	xor	rax,rdx
+	xor	r14,rbx
+	add	r12,r15
+	mov	r15,rcx
+
+	ror	r13,14
+	and	rax,rbx
+	and	r15,rdx
+
+	ror	r14,28
+	add	r12,r13
+	add	rax,r15
+
+	add	r8,r12
+	add	rax,r12
+	lea	rdi,QWORD PTR[1+rdi]
+	add	rax,r14
+
+	cmp	rdi,80
+	jb	$L$rounds_16_xx
+
+	mov	rdi,QWORD PTR[((128+0))+rsp]
+	lea	rsi,QWORD PTR[128+rsi]
+
+	add	rax,QWORD PTR[rdi]
+	add	rbx,QWORD PTR[8+rdi]
+	add	rcx,QWORD PTR[16+rdi]
+	add	rdx,QWORD PTR[24+rdi]
+	add	r8,QWORD PTR[32+rdi]
+	add	r9,QWORD PTR[40+rdi]
+	add	r10,QWORD PTR[48+rdi]
+	add	r11,QWORD PTR[56+rdi]
+
+	cmp	rsi,QWORD PTR[((128+16))+rsp]
+
+	mov	QWORD PTR[rdi],rax
+	mov	QWORD PTR[8+rdi],rbx
+	mov	QWORD PTR[16+rdi],rcx
+	mov	QWORD PTR[24+rdi],rdx
+	mov	QWORD PTR[32+rdi],r8
+	mov	QWORD PTR[40+rdi],r9
+	mov	QWORD PTR[48+rdi],r10
+	mov	QWORD PTR[56+rdi],r11
+	jb	$L$loop
+
+	mov	rsi,QWORD PTR[((128+24))+rsp]
+	mov	r15,QWORD PTR[rsi]
+	mov	r14,QWORD PTR[8+rsi]
+	mov	r13,QWORD PTR[16+rsi]
+	mov	r12,QWORD PTR[24+rsi]
+	mov	rbp,QWORD PTR[32+rsi]
+	mov	rbx,QWORD PTR[40+rsi]
+	lea	rsp,QWORD PTR[48+rsi]
+$L$epilogue::
+	mov	rdi,QWORD PTR[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD PTR[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_sha512_block_data_order::
+sha512_block_data_order	ENDP
+ALIGN	64
+
+K512::
+	DQ	0428a2f98d728ae22h,07137449123ef65cdh
+	DQ	0b5c0fbcfec4d3b2fh,0e9b5dba58189dbbch
+	DQ	03956c25bf348b538h,059f111f1b605d019h
+	DQ	0923f82a4af194f9bh,0ab1c5ed5da6d8118h
+	DQ	0d807aa98a3030242h,012835b0145706fbeh
+	DQ	0243185be4ee4b28ch,0550c7dc3d5ffb4e2h
+	DQ	072be5d74f27b896fh,080deb1fe3b1696b1h
+	DQ	09bdc06a725c71235h,0c19bf174cf692694h
+	DQ	0e49b69c19ef14ad2h,0efbe4786384f25e3h
+	DQ	00fc19dc68b8cd5b5h,0240ca1cc77ac9c65h
+	DQ	02de92c6f592b0275h,04a7484aa6ea6e483h
+	DQ	05cb0a9dcbd41fbd4h,076f988da831153b5h
+	DQ	0983e5152ee66dfabh,0a831c66d2db43210h
+	DQ	0b00327c898fb213fh,0bf597fc7beef0ee4h
+	DQ	0c6e00bf33da88fc2h,0d5a79147930aa725h
+	DQ	006ca6351e003826fh,0142929670a0e6e70h
+	DQ	027b70a8546d22ffch,02e1b21385c26c926h
+	DQ	04d2c6dfc5ac42aedh,053380d139d95b3dfh
+	DQ	0650a73548baf63deh,0766a0abb3c77b2a8h
+	DQ	081c2c92e47edaee6h,092722c851482353bh
+	DQ	0a2bfe8a14cf10364h,0a81a664bbc423001h
+	DQ	0c24b8b70d0f89791h,0c76c51a30654be30h
+	DQ	0d192e819d6ef5218h,0d69906245565a910h
+	DQ	0f40e35855771202ah,0106aa07032bbd1b8h
+	DQ	019a4c116b8d2d0c8h,01e376c085141ab53h
+	DQ	02748774cdf8eeb99h,034b0bcb5e19b48a8h
+	DQ	0391c0cb3c5c95a63h,04ed8aa4ae3418acbh
+	DQ	05b9cca4f7763e373h,0682e6ff3d6b2b8a3h
+	DQ	0748f82ee5defb2fch,078a5636f43172f60h
+	DQ	084c87814a1f0ab72h,08cc702081a6439ech
+	DQ	090befffa23631e28h,0a4506cebde82bde9h
+	DQ	0bef9a3f7b2c67915h,0c67178f2e372532bh
+	DQ	0ca273eceea26619ch,0d186b8c721c0c207h
+	DQ	0eada7dd6cde0eb1eh,0f57d4f7fee6ed178h
+	DQ	006f067aa72176fbah,00a637dc5a2c898a6h
+	DQ	0113f9804bef90daeh,01b710b35131c471bh
+	DQ	028db77f523047d84h,032caab7b40c72493h
+	DQ	03c9ebe0a15c9bebch,0431d67c49c100d4ch
+	DQ	04cc5d4becb3e42b6h,0597f299cfc657e2ah
+	DQ	05fcb6fab3ad6faech,06c44198c4a475817h
+
+.text$	ENDS
+END
+
diff --git a/ext/libressl/crypto/sha/sha512-mingw64-x86_64.S b/ext/libressl/crypto/sha/sha512-mingw64-x86_64.S
new file mode 100644
index 0000000..5153952
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha512-mingw64-x86_64.S
@@ -0,0 +1,1814 @@
+#include "x86_arch.h"
+.text	
+
+.globl	sha512_block_data_order
+.def	sha512_block_data_order;	.scl 2;	.type 32;	.endef
+.p2align	4
+sha512_block_data_order:
+	movq	%rdi,8(%rsp)
+	movq	%rsi,16(%rsp)
+	movq	%rsp,%rax
+.LSEH_begin_sha512_block_data_order:
+	movq	%rcx,%rdi
+	movq	%rdx,%rsi
+	movq	%r8,%rdx
+	movq	%r9,%rcx
+
+	pushq	%rbx
+	pushq	%rbp
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+	movq	%rsp,%r11
+	shlq	$4,%rdx
+	subq	$128+32,%rsp
+	leaq	(%rsi,%rdx,8),%rdx
+	andq	$-64,%rsp
+	movq	%rdi,128+0(%rsp)
+	movq	%rsi,128+8(%rsp)
+	movq	%rdx,128+16(%rsp)
+	movq	%r11,128+24(%rsp)
+.Lprologue:
+
+	leaq	K512(%rip),%rbp
+
+	movq	0(%rdi),%rax
+	movq	8(%rdi),%rbx
+	movq	16(%rdi),%rcx
+	movq	24(%rdi),%rdx
+	movq	32(%rdi),%r8
+	movq	40(%rdi),%r9
+	movq	48(%rdi),%r10
+	movq	56(%rdi),%r11
+	jmp	.Lloop
+
+.p2align	4
+.Lloop:
+	xorq	%rdi,%rdi
+	movq	0(%rsi),%r12
+	movq	%r8,%r13
+	movq	%rax,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r9,%r15
+	movq	%r12,0(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r15
+
+	rorq	$4,%r13
+	addq	%r11,%r12
+	xorq	%rax,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r8,%r15
+	movq	%rbx,%r11
+
+	rorq	$6,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r15
+
+	xorq	%rcx,%r11
+	xorq	%rax,%r14
+	addq	%r15,%r12
+	movq	%rbx,%r15
+
+	rorq	$14,%r13
+	andq	%rax,%r11
+	andq	%rcx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r11
+
+	addq	%r12,%rdx
+	addq	%r12,%r11
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r11
+
+	movq	8(%rsi),%r12
+	movq	%rdx,%r13
+	movq	%r11,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r8,%r15
+	movq	%r12,8(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r15
+
+	rorq	$4,%r13
+	addq	%r10,%r12
+	xorq	%r11,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rdx,%r15
+	movq	%rax,%r10
+
+	rorq	$6,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r15
+
+	xorq	%rbx,%r10
+	xorq	%r11,%r14
+	addq	%r15,%r12
+	movq	%rax,%r15
+
+	rorq	$14,%r13
+	andq	%r11,%r10
+	andq	%rbx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r10
+
+	addq	%r12,%rcx
+	addq	%r12,%r10
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r10
+
+	movq	16(%rsi),%r12
+	movq	%rcx,%r13
+	movq	%r10,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rdx,%r15
+	movq	%r12,16(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r15
+
+	rorq	$4,%r13
+	addq	%r9,%r12
+	xorq	%r10,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rcx,%r15
+	movq	%r11,%r9
+
+	rorq	$6,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r15
+
+	xorq	%rax,%r9
+	xorq	%r10,%r14
+	addq	%r15,%r12
+	movq	%r11,%r15
+
+	rorq	$14,%r13
+	andq	%r10,%r9
+	andq	%rax,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r9
+
+	addq	%r12,%rbx
+	addq	%r12,%r9
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r9
+
+	movq	24(%rsi),%r12
+	movq	%rbx,%r13
+	movq	%r9,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rcx,%r15
+	movq	%r12,24(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r15
+
+	rorq	$4,%r13
+	addq	%r8,%r12
+	xorq	%r9,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rbx,%r15
+	movq	%r10,%r8
+
+	rorq	$6,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r15
+
+	xorq	%r11,%r8
+	xorq	%r9,%r14
+	addq	%r15,%r12
+	movq	%r10,%r15
+
+	rorq	$14,%r13
+	andq	%r9,%r8
+	andq	%r11,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r8
+
+	addq	%r12,%rax
+	addq	%r12,%r8
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r8
+
+	movq	32(%rsi),%r12
+	movq	%rax,%r13
+	movq	%r8,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rbx,%r15
+	movq	%r12,32(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r15
+
+	rorq	$4,%r13
+	addq	%rdx,%r12
+	xorq	%r8,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rax,%r15
+	movq	%r9,%rdx
+
+	rorq	$6,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r15
+
+	xorq	%r10,%rdx
+	xorq	%r8,%r14
+	addq	%r15,%r12
+	movq	%r9,%r15
+
+	rorq	$14,%r13
+	andq	%r8,%rdx
+	andq	%r10,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rdx
+
+	addq	%r12,%r11
+	addq	%r12,%rdx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rdx
+
+	movq	40(%rsi),%r12
+	movq	%r11,%r13
+	movq	%rdx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rax,%r15
+	movq	%r12,40(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r15
+
+	rorq	$4,%r13
+	addq	%rcx,%r12
+	xorq	%rdx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r11,%r15
+	movq	%r8,%rcx
+
+	rorq	$6,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r15
+
+	xorq	%r9,%rcx
+	xorq	%rdx,%r14
+	addq	%r15,%r12
+	movq	%r8,%r15
+
+	rorq	$14,%r13
+	andq	%rdx,%rcx
+	andq	%r9,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rcx
+
+	addq	%r12,%r10
+	addq	%r12,%rcx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rcx
+
+	movq	48(%rsi),%r12
+	movq	%r10,%r13
+	movq	%rcx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r11,%r15
+	movq	%r12,48(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r15
+
+	rorq	$4,%r13
+	addq	%rbx,%r12
+	xorq	%rcx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r10,%r15
+	movq	%rdx,%rbx
+
+	rorq	$6,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r15
+
+	xorq	%r8,%rbx
+	xorq	%rcx,%r14
+	addq	%r15,%r12
+	movq	%rdx,%r15
+
+	rorq	$14,%r13
+	andq	%rcx,%rbx
+	andq	%r8,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rbx
+
+	addq	%r12,%r9
+	addq	%r12,%rbx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rbx
+
+	movq	56(%rsi),%r12
+	movq	%r9,%r13
+	movq	%rbx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r10,%r15
+	movq	%r12,56(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r15
+
+	rorq	$4,%r13
+	addq	%rax,%r12
+	xorq	%rbx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r9,%r15
+	movq	%rcx,%rax
+
+	rorq	$6,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r15
+
+	xorq	%rdx,%rax
+	xorq	%rbx,%r14
+	addq	%r15,%r12
+	movq	%rcx,%r15
+
+	rorq	$14,%r13
+	andq	%rbx,%rax
+	andq	%rdx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rax
+
+	addq	%r12,%r8
+	addq	%r12,%rax
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rax
+
+	movq	64(%rsi),%r12
+	movq	%r8,%r13
+	movq	%rax,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r9,%r15
+	movq	%r12,64(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r15
+
+	rorq	$4,%r13
+	addq	%r11,%r12
+	xorq	%rax,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r8,%r15
+	movq	%rbx,%r11
+
+	rorq	$6,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r15
+
+	xorq	%rcx,%r11
+	xorq	%rax,%r14
+	addq	%r15,%r12
+	movq	%rbx,%r15
+
+	rorq	$14,%r13
+	andq	%rax,%r11
+	andq	%rcx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r11
+
+	addq	%r12,%rdx
+	addq	%r12,%r11
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r11
+
+	movq	72(%rsi),%r12
+	movq	%rdx,%r13
+	movq	%r11,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r8,%r15
+	movq	%r12,72(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r15
+
+	rorq	$4,%r13
+	addq	%r10,%r12
+	xorq	%r11,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rdx,%r15
+	movq	%rax,%r10
+
+	rorq	$6,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r15
+
+	xorq	%rbx,%r10
+	xorq	%r11,%r14
+	addq	%r15,%r12
+	movq	%rax,%r15
+
+	rorq	$14,%r13
+	andq	%r11,%r10
+	andq	%rbx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r10
+
+	addq	%r12,%rcx
+	addq	%r12,%r10
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r10
+
+	movq	80(%rsi),%r12
+	movq	%rcx,%r13
+	movq	%r10,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rdx,%r15
+	movq	%r12,80(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r15
+
+	rorq	$4,%r13
+	addq	%r9,%r12
+	xorq	%r10,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rcx,%r15
+	movq	%r11,%r9
+
+	rorq	$6,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r15
+
+	xorq	%rax,%r9
+	xorq	%r10,%r14
+	addq	%r15,%r12
+	movq	%r11,%r15
+
+	rorq	$14,%r13
+	andq	%r10,%r9
+	andq	%rax,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r9
+
+	addq	%r12,%rbx
+	addq	%r12,%r9
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r9
+
+	movq	88(%rsi),%r12
+	movq	%rbx,%r13
+	movq	%r9,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rcx,%r15
+	movq	%r12,88(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r15
+
+	rorq	$4,%r13
+	addq	%r8,%r12
+	xorq	%r9,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rbx,%r15
+	movq	%r10,%r8
+
+	rorq	$6,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r15
+
+	xorq	%r11,%r8
+	xorq	%r9,%r14
+	addq	%r15,%r12
+	movq	%r10,%r15
+
+	rorq	$14,%r13
+	andq	%r9,%r8
+	andq	%r11,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r8
+
+	addq	%r12,%rax
+	addq	%r12,%r8
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r8
+
+	movq	96(%rsi),%r12
+	movq	%rax,%r13
+	movq	%r8,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rbx,%r15
+	movq	%r12,96(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r15
+
+	rorq	$4,%r13
+	addq	%rdx,%r12
+	xorq	%r8,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rax,%r15
+	movq	%r9,%rdx
+
+	rorq	$6,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r15
+
+	xorq	%r10,%rdx
+	xorq	%r8,%r14
+	addq	%r15,%r12
+	movq	%r9,%r15
+
+	rorq	$14,%r13
+	andq	%r8,%rdx
+	andq	%r10,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rdx
+
+	addq	%r12,%r11
+	addq	%r12,%rdx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rdx
+
+	movq	104(%rsi),%r12
+	movq	%r11,%r13
+	movq	%rdx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%rax,%r15
+	movq	%r12,104(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r15
+
+	rorq	$4,%r13
+	addq	%rcx,%r12
+	xorq	%rdx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r11,%r15
+	movq	%r8,%rcx
+
+	rorq	$6,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r15
+
+	xorq	%r9,%rcx
+	xorq	%rdx,%r14
+	addq	%r15,%r12
+	movq	%r8,%r15
+
+	rorq	$14,%r13
+	andq	%rdx,%rcx
+	andq	%r9,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rcx
+
+	addq	%r12,%r10
+	addq	%r12,%rcx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rcx
+
+	movq	112(%rsi),%r12
+	movq	%r10,%r13
+	movq	%rcx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r11,%r15
+	movq	%r12,112(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r15
+
+	rorq	$4,%r13
+	addq	%rbx,%r12
+	xorq	%rcx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r10,%r15
+	movq	%rdx,%rbx
+
+	rorq	$6,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r15
+
+	xorq	%r8,%rbx
+	xorq	%rcx,%r14
+	addq	%r15,%r12
+	movq	%rdx,%r15
+
+	rorq	$14,%r13
+	andq	%rcx,%rbx
+	andq	%r8,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rbx
+
+	addq	%r12,%r9
+	addq	%r12,%rbx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rbx
+
+	movq	120(%rsi),%r12
+	movq	%r9,%r13
+	movq	%rbx,%r14
+	bswapq	%r12
+	rorq	$23,%r13
+	movq	%r10,%r15
+	movq	%r12,120(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r15
+
+	rorq	$4,%r13
+	addq	%rax,%r12
+	xorq	%rbx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r9,%r15
+	movq	%rcx,%rax
+
+	rorq	$6,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r15
+
+	xorq	%rdx,%rax
+	xorq	%rbx,%r14
+	addq	%r15,%r12
+	movq	%rcx,%r15
+
+	rorq	$14,%r13
+	andq	%rbx,%rax
+	andq	%rdx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rax
+
+	addq	%r12,%r8
+	addq	%r12,%rax
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rax
+
+	jmp	.Lrounds_16_xx
+.p2align	4
+.Lrounds_16_xx:
+	movq	8(%rsp),%r13
+	movq	112(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	72(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	0(%rsp),%r12
+	movq	%r8,%r13
+	addq	%r14,%r12
+	movq	%rax,%r14
+	rorq	$23,%r13
+	movq	%r9,%r15
+	movq	%r12,0(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r15
+
+	rorq	$4,%r13
+	addq	%r11,%r12
+	xorq	%rax,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r8,%r15
+	movq	%rbx,%r11
+
+	rorq	$6,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r15
+
+	xorq	%rcx,%r11
+	xorq	%rax,%r14
+	addq	%r15,%r12
+	movq	%rbx,%r15
+
+	rorq	$14,%r13
+	andq	%rax,%r11
+	andq	%rcx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r11
+
+	addq	%r12,%rdx
+	addq	%r12,%r11
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r11
+
+	movq	16(%rsp),%r13
+	movq	120(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	80(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	8(%rsp),%r12
+	movq	%rdx,%r13
+	addq	%r14,%r12
+	movq	%r11,%r14
+	rorq	$23,%r13
+	movq	%r8,%r15
+	movq	%r12,8(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r15
+
+	rorq	$4,%r13
+	addq	%r10,%r12
+	xorq	%r11,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rdx,%r15
+	movq	%rax,%r10
+
+	rorq	$6,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r15
+
+	xorq	%rbx,%r10
+	xorq	%r11,%r14
+	addq	%r15,%r12
+	movq	%rax,%r15
+
+	rorq	$14,%r13
+	andq	%r11,%r10
+	andq	%rbx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r10
+
+	addq	%r12,%rcx
+	addq	%r12,%r10
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r10
+
+	movq	24(%rsp),%r13
+	movq	0(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	88(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	16(%rsp),%r12
+	movq	%rcx,%r13
+	addq	%r14,%r12
+	movq	%r10,%r14
+	rorq	$23,%r13
+	movq	%rdx,%r15
+	movq	%r12,16(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r15
+
+	rorq	$4,%r13
+	addq	%r9,%r12
+	xorq	%r10,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rcx,%r15
+	movq	%r11,%r9
+
+	rorq	$6,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r15
+
+	xorq	%rax,%r9
+	xorq	%r10,%r14
+	addq	%r15,%r12
+	movq	%r11,%r15
+
+	rorq	$14,%r13
+	andq	%r10,%r9
+	andq	%rax,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r9
+
+	addq	%r12,%rbx
+	addq	%r12,%r9
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r9
+
+	movq	32(%rsp),%r13
+	movq	8(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	96(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	24(%rsp),%r12
+	movq	%rbx,%r13
+	addq	%r14,%r12
+	movq	%r9,%r14
+	rorq	$23,%r13
+	movq	%rcx,%r15
+	movq	%r12,24(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r15
+
+	rorq	$4,%r13
+	addq	%r8,%r12
+	xorq	%r9,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rbx,%r15
+	movq	%r10,%r8
+
+	rorq	$6,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r15
+
+	xorq	%r11,%r8
+	xorq	%r9,%r14
+	addq	%r15,%r12
+	movq	%r10,%r15
+
+	rorq	$14,%r13
+	andq	%r9,%r8
+	andq	%r11,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r8
+
+	addq	%r12,%rax
+	addq	%r12,%r8
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r8
+
+	movq	40(%rsp),%r13
+	movq	16(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	104(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	32(%rsp),%r12
+	movq	%rax,%r13
+	addq	%r14,%r12
+	movq	%r8,%r14
+	rorq	$23,%r13
+	movq	%rbx,%r15
+	movq	%r12,32(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r15
+
+	rorq	$4,%r13
+	addq	%rdx,%r12
+	xorq	%r8,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rax,%r15
+	movq	%r9,%rdx
+
+	rorq	$6,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r15
+
+	xorq	%r10,%rdx
+	xorq	%r8,%r14
+	addq	%r15,%r12
+	movq	%r9,%r15
+
+	rorq	$14,%r13
+	andq	%r8,%rdx
+	andq	%r10,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rdx
+
+	addq	%r12,%r11
+	addq	%r12,%rdx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rdx
+
+	movq	48(%rsp),%r13
+	movq	24(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	112(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	40(%rsp),%r12
+	movq	%r11,%r13
+	addq	%r14,%r12
+	movq	%rdx,%r14
+	rorq	$23,%r13
+	movq	%rax,%r15
+	movq	%r12,40(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r15
+
+	rorq	$4,%r13
+	addq	%rcx,%r12
+	xorq	%rdx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r11,%r15
+	movq	%r8,%rcx
+
+	rorq	$6,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r15
+
+	xorq	%r9,%rcx
+	xorq	%rdx,%r14
+	addq	%r15,%r12
+	movq	%r8,%r15
+
+	rorq	$14,%r13
+	andq	%rdx,%rcx
+	andq	%r9,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rcx
+
+	addq	%r12,%r10
+	addq	%r12,%rcx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rcx
+
+	movq	56(%rsp),%r13
+	movq	32(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	120(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	48(%rsp),%r12
+	movq	%r10,%r13
+	addq	%r14,%r12
+	movq	%rcx,%r14
+	rorq	$23,%r13
+	movq	%r11,%r15
+	movq	%r12,48(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r15
+
+	rorq	$4,%r13
+	addq	%rbx,%r12
+	xorq	%rcx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r10,%r15
+	movq	%rdx,%rbx
+
+	rorq	$6,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r15
+
+	xorq	%r8,%rbx
+	xorq	%rcx,%r14
+	addq	%r15,%r12
+	movq	%rdx,%r15
+
+	rorq	$14,%r13
+	andq	%rcx,%rbx
+	andq	%r8,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rbx
+
+	addq	%r12,%r9
+	addq	%r12,%rbx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rbx
+
+	movq	64(%rsp),%r13
+	movq	40(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	0(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	56(%rsp),%r12
+	movq	%r9,%r13
+	addq	%r14,%r12
+	movq	%rbx,%r14
+	rorq	$23,%r13
+	movq	%r10,%r15
+	movq	%r12,56(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r15
+
+	rorq	$4,%r13
+	addq	%rax,%r12
+	xorq	%rbx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r9,%r15
+	movq	%rcx,%rax
+
+	rorq	$6,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r15
+
+	xorq	%rdx,%rax
+	xorq	%rbx,%r14
+	addq	%r15,%r12
+	movq	%rcx,%r15
+
+	rorq	$14,%r13
+	andq	%rbx,%rax
+	andq	%rdx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rax
+
+	addq	%r12,%r8
+	addq	%r12,%rax
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rax
+
+	movq	72(%rsp),%r13
+	movq	48(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	8(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	64(%rsp),%r12
+	movq	%r8,%r13
+	addq	%r14,%r12
+	movq	%rax,%r14
+	rorq	$23,%r13
+	movq	%r9,%r15
+	movq	%r12,64(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r15
+
+	rorq	$4,%r13
+	addq	%r11,%r12
+	xorq	%rax,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r8,%r15
+	movq	%rbx,%r11
+
+	rorq	$6,%r14
+	xorq	%r8,%r13
+	xorq	%r10,%r15
+
+	xorq	%rcx,%r11
+	xorq	%rax,%r14
+	addq	%r15,%r12
+	movq	%rbx,%r15
+
+	rorq	$14,%r13
+	andq	%rax,%r11
+	andq	%rcx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r11
+
+	addq	%r12,%rdx
+	addq	%r12,%r11
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r11
+
+	movq	80(%rsp),%r13
+	movq	56(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	16(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	72(%rsp),%r12
+	movq	%rdx,%r13
+	addq	%r14,%r12
+	movq	%r11,%r14
+	rorq	$23,%r13
+	movq	%r8,%r15
+	movq	%r12,72(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r15
+
+	rorq	$4,%r13
+	addq	%r10,%r12
+	xorq	%r11,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rdx,%r15
+	movq	%rax,%r10
+
+	rorq	$6,%r14
+	xorq	%rdx,%r13
+	xorq	%r9,%r15
+
+	xorq	%rbx,%r10
+	xorq	%r11,%r14
+	addq	%r15,%r12
+	movq	%rax,%r15
+
+	rorq	$14,%r13
+	andq	%r11,%r10
+	andq	%rbx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r10
+
+	addq	%r12,%rcx
+	addq	%r12,%r10
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r10
+
+	movq	88(%rsp),%r13
+	movq	64(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	24(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	80(%rsp),%r12
+	movq	%rcx,%r13
+	addq	%r14,%r12
+	movq	%r10,%r14
+	rorq	$23,%r13
+	movq	%rdx,%r15
+	movq	%r12,80(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r15
+
+	rorq	$4,%r13
+	addq	%r9,%r12
+	xorq	%r10,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rcx,%r15
+	movq	%r11,%r9
+
+	rorq	$6,%r14
+	xorq	%rcx,%r13
+	xorq	%r8,%r15
+
+	xorq	%rax,%r9
+	xorq	%r10,%r14
+	addq	%r15,%r12
+	movq	%r11,%r15
+
+	rorq	$14,%r13
+	andq	%r10,%r9
+	andq	%rax,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r9
+
+	addq	%r12,%rbx
+	addq	%r12,%r9
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r9
+
+	movq	96(%rsp),%r13
+	movq	72(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	32(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	88(%rsp),%r12
+	movq	%rbx,%r13
+	addq	%r14,%r12
+	movq	%r9,%r14
+	rorq	$23,%r13
+	movq	%rcx,%r15
+	movq	%r12,88(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r15
+
+	rorq	$4,%r13
+	addq	%r8,%r12
+	xorq	%r9,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rbx,%r15
+	movq	%r10,%r8
+
+	rorq	$6,%r14
+	xorq	%rbx,%r13
+	xorq	%rdx,%r15
+
+	xorq	%r11,%r8
+	xorq	%r9,%r14
+	addq	%r15,%r12
+	movq	%r10,%r15
+
+	rorq	$14,%r13
+	andq	%r9,%r8
+	andq	%r11,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%r8
+
+	addq	%r12,%rax
+	addq	%r12,%r8
+	leaq	1(%rdi),%rdi
+	addq	%r14,%r8
+
+	movq	104(%rsp),%r13
+	movq	80(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	40(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	96(%rsp),%r12
+	movq	%rax,%r13
+	addq	%r14,%r12
+	movq	%r8,%r14
+	rorq	$23,%r13
+	movq	%rbx,%r15
+	movq	%r12,96(%rsp)
+
+	rorq	$5,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r15
+
+	rorq	$4,%r13
+	addq	%rdx,%r12
+	xorq	%r8,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%rax,%r15
+	movq	%r9,%rdx
+
+	rorq	$6,%r14
+	xorq	%rax,%r13
+	xorq	%rcx,%r15
+
+	xorq	%r10,%rdx
+	xorq	%r8,%r14
+	addq	%r15,%r12
+	movq	%r9,%r15
+
+	rorq	$14,%r13
+	andq	%r8,%rdx
+	andq	%r10,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rdx
+
+	addq	%r12,%r11
+	addq	%r12,%rdx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rdx
+
+	movq	112(%rsp),%r13
+	movq	88(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	48(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	104(%rsp),%r12
+	movq	%r11,%r13
+	addq	%r14,%r12
+	movq	%rdx,%r14
+	rorq	$23,%r13
+	movq	%rax,%r15
+	movq	%r12,104(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r15
+
+	rorq	$4,%r13
+	addq	%rcx,%r12
+	xorq	%rdx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r11,%r15
+	movq	%r8,%rcx
+
+	rorq	$6,%r14
+	xorq	%r11,%r13
+	xorq	%rbx,%r15
+
+	xorq	%r9,%rcx
+	xorq	%rdx,%r14
+	addq	%r15,%r12
+	movq	%r8,%r15
+
+	rorq	$14,%r13
+	andq	%rdx,%rcx
+	andq	%r9,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rcx
+
+	addq	%r12,%r10
+	addq	%r12,%rcx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rcx
+
+	movq	120(%rsp),%r13
+	movq	96(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	56(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	112(%rsp),%r12
+	movq	%r10,%r13
+	addq	%r14,%r12
+	movq	%rcx,%r14
+	rorq	$23,%r13
+	movq	%r11,%r15
+	movq	%r12,112(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r15
+
+	rorq	$4,%r13
+	addq	%rbx,%r12
+	xorq	%rcx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r10,%r15
+	movq	%rdx,%rbx
+
+	rorq	$6,%r14
+	xorq	%r10,%r13
+	xorq	%rax,%r15
+
+	xorq	%r8,%rbx
+	xorq	%rcx,%r14
+	addq	%r15,%r12
+	movq	%rdx,%r15
+
+	rorq	$14,%r13
+	andq	%rcx,%rbx
+	andq	%r8,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rbx
+
+	addq	%r12,%r9
+	addq	%r12,%rbx
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rbx
+
+	movq	0(%rsp),%r13
+	movq	104(%rsp),%r14
+	movq	%r13,%r12
+	movq	%r14,%r15
+
+	rorq	$7,%r12
+	xorq	%r13,%r12
+	shrq	$7,%r13
+
+	rorq	$1,%r12
+	xorq	%r12,%r13
+	movq	64(%rsp),%r12
+
+	rorq	$42,%r15
+	xorq	%r14,%r15
+	shrq	$6,%r14
+
+	rorq	$19,%r15
+	addq	%r13,%r12
+	xorq	%r15,%r14
+
+	addq	120(%rsp),%r12
+	movq	%r9,%r13
+	addq	%r14,%r12
+	movq	%rbx,%r14
+	rorq	$23,%r13
+	movq	%r10,%r15
+	movq	%r12,120(%rsp)
+
+	rorq	$5,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r15
+
+	rorq	$4,%r13
+	addq	%rax,%r12
+	xorq	%rbx,%r14
+
+	addq	(%rbp,%rdi,8),%r12
+	andq	%r9,%r15
+	movq	%rcx,%rax
+
+	rorq	$6,%r14
+	xorq	%r9,%r13
+	xorq	%r11,%r15
+
+	xorq	%rdx,%rax
+	xorq	%rbx,%r14
+	addq	%r15,%r12
+	movq	%rcx,%r15
+
+	rorq	$14,%r13
+	andq	%rbx,%rax
+	andq	%rdx,%r15
+
+	rorq	$28,%r14
+	addq	%r13,%r12
+	addq	%r15,%rax
+
+	addq	%r12,%r8
+	addq	%r12,%rax
+	leaq	1(%rdi),%rdi
+	addq	%r14,%rax
+
+	cmpq	$80,%rdi
+	jb	.Lrounds_16_xx
+
+	movq	128+0(%rsp),%rdi
+	leaq	128(%rsi),%rsi
+
+	addq	0(%rdi),%rax
+	addq	8(%rdi),%rbx
+	addq	16(%rdi),%rcx
+	addq	24(%rdi),%rdx
+	addq	32(%rdi),%r8
+	addq	40(%rdi),%r9
+	addq	48(%rdi),%r10
+	addq	56(%rdi),%r11
+
+	cmpq	128+16(%rsp),%rsi
+
+	movq	%rax,0(%rdi)
+	movq	%rbx,8(%rdi)
+	movq	%rcx,16(%rdi)
+	movq	%rdx,24(%rdi)
+	movq	%r8,32(%rdi)
+	movq	%r9,40(%rdi)
+	movq	%r10,48(%rdi)
+	movq	%r11,56(%rdi)
+	jb	.Lloop
+
+	movq	128+24(%rsp),%rsi
+	movq	(%rsi),%r15
+	movq	8(%rsi),%r14
+	movq	16(%rsi),%r13
+	movq	24(%rsi),%r12
+	movq	32(%rsi),%rbp
+	movq	40(%rsi),%rbx
+	leaq	48(%rsi),%rsp
+.Lepilogue:
+	movq	8(%rsp),%rdi
+	movq	16(%rsp),%rsi
+	retq
+.LSEH_end_sha512_block_data_order:
+.p2align	6
+
+K512:
+.quad	0x428a2f98d728ae22,0x7137449123ef65cd
+.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+.quad	0x3956c25bf348b538,0x59f111f1b605d019
+.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
+.quad	0xd807aa98a3030242,0x12835b0145706fbe
+.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
+.quad	0x9bdc06a725c71235,0xc19bf174cf692694
+.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
+.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
+.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+.quad	0x983e5152ee66dfab,0xa831c66d2db43210
+.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
+.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
+.quad	0x06ca6351e003826f,0x142929670a0e6e70
+.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
+.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
+.quad	0x81c2c92e47edaee6,0x92722c851482353b
+.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
+.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
+.quad	0xd192e819d6ef5218,0xd69906245565a910
+.quad	0xf40e35855771202a,0x106aa07032bbd1b8
+.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
+.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
+.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
+.quad	0x90befffa23631e28,0xa4506cebde82bde9
+.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
+.quad	0xca273eceea26619c,0xd186b8c721c0c207
+.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
+.quad	0x113f9804bef90dae,0x1b710b35131c471b
+.quad	0x28db77f523047d84,0x32caab7b40c72493
+.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
diff --git a/ext/libressl/crypto/sha/sha512.c b/ext/libressl/crypto/sha/sha512.c
new file mode 100644
index 0000000..6b95cfa
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha512.c
@@ -0,0 +1,547 @@
+/* $OpenBSD: sha512.c,v 1.15 2016/11/04 13:56:05 miod Exp $ */
+/* ====================================================================
+ * Copyright (c) 2004 The OpenSSL Project.  All rights reserved
+ * according to the OpenSSL license [found in ../../LICENSE].
+ * ====================================================================
+ */
+
+#include <machine/endian.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <openssl/opensslconf.h>
+
+#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
+/*
+ * IMPLEMENTATION NOTES.
+ *
+ * As you might have noticed 32-bit hash algorithms:
+ *
+ * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
+ * - optimized versions implement two transform functions: one operating
+ *   on [aligned] data in host byte order and one - on data in input
+ *   stream byte order;
+ * - share common byte-order neutral collector and padding function
+ *   implementations, ../md32_common.h;
+ *
+ * Neither of the above applies to this SHA-512 implementations. Reasons
+ * [in reverse order] are:
+ *
+ * - it's the only 64-bit hash algorithm for the moment of this writing,
+ *   there is no need for common collector/padding implementation [yet];
+ * - by supporting only one transform function [which operates on
+ *   *aligned* data in input stream byte order, big-endian in this case]
+ *   we minimize burden of maintenance in two ways: a) collector/padding
+ *   function is simpler; b) only one transform function to stare at;
+ * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
+ *   apply a number of optimizations to mitigate potential performance
+ *   penalties caused by previous design decision;
+ *
+ * Caveat lector.
+ *
+ * Implementation relies on the fact that "long long" is 64-bit on
+ * both 32- and 64-bit platforms. If some compiler vendor comes up
+ * with 128-bit long long, adjustment to sha.h would be required.
+ * As this implementation relies on 64-bit integer type, it's totally
+ * inappropriate for platforms which don't support it, most notably
+ * 16-bit platforms.
+ *					<appro@fy.chalmers.se>
+ */
+
+#include <openssl/crypto.h>
+#include <openssl/opensslv.h>
+#include <openssl/sha.h>
+
+#if !defined(__STRICT_ALIGNMENT) || defined(SHA512_ASM)
+#define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
+#endif
+
+int SHA384_Init(SHA512_CTX *c)
+	{
+	c->h[0]=U64(0xcbbb9d5dc1059ed8);
+	c->h[1]=U64(0x629a292a367cd507);
+	c->h[2]=U64(0x9159015a3070dd17);
+	c->h[3]=U64(0x152fecd8f70e5939);
+	c->h[4]=U64(0x67332667ffc00b31);
+	c->h[5]=U64(0x8eb44a8768581511);
+	c->h[6]=U64(0xdb0c2e0d64f98fa7);
+	c->h[7]=U64(0x47b5481dbefa4fa4);
+
+        c->Nl=0;        c->Nh=0;
+        c->num=0;       c->md_len=SHA384_DIGEST_LENGTH;
+        return 1;
+	}
+
+int SHA512_Init(SHA512_CTX *c)
+	{
+	c->h[0]=U64(0x6a09e667f3bcc908);
+	c->h[1]=U64(0xbb67ae8584caa73b);
+	c->h[2]=U64(0x3c6ef372fe94f82b);
+	c->h[3]=U64(0xa54ff53a5f1d36f1);
+	c->h[4]=U64(0x510e527fade682d1);
+	c->h[5]=U64(0x9b05688c2b3e6c1f);
+	c->h[6]=U64(0x1f83d9abfb41bd6b);
+	c->h[7]=U64(0x5be0cd19137e2179);
+
+        c->Nl=0;        c->Nh=0;
+        c->num=0;       c->md_len=SHA512_DIGEST_LENGTH;
+        return 1;
+	}
+
+#ifndef SHA512_ASM
+static
+#endif
+void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num);
+
+int SHA512_Final (unsigned char *md, SHA512_CTX *c)
+	{
+	unsigned char *p=(unsigned char *)c->u.p;
+	size_t n=c->num;
+
+	p[n]=0x80;	/* There always is a room for one */
+	n++;
+	if (n > (sizeof(c->u)-16))
+		memset (p+n,0,sizeof(c->u)-n), n=0,
+		sha512_block_data_order (c,p,1);
+
+	memset (p+n,0,sizeof(c->u)-16-n);
+#if BYTE_ORDER == BIG_ENDIAN
+	c->u.d[SHA_LBLOCK-2] = c->Nh;
+	c->u.d[SHA_LBLOCK-1] = c->Nl;
+#else
+	p[sizeof(c->u)-1]  = (unsigned char)(c->Nl);
+	p[sizeof(c->u)-2]  = (unsigned char)(c->Nl>>8);
+	p[sizeof(c->u)-3]  = (unsigned char)(c->Nl>>16);
+	p[sizeof(c->u)-4]  = (unsigned char)(c->Nl>>24);
+	p[sizeof(c->u)-5]  = (unsigned char)(c->Nl>>32);
+	p[sizeof(c->u)-6]  = (unsigned char)(c->Nl>>40);
+	p[sizeof(c->u)-7]  = (unsigned char)(c->Nl>>48);
+	p[sizeof(c->u)-8]  = (unsigned char)(c->Nl>>56);
+	p[sizeof(c->u)-9]  = (unsigned char)(c->Nh);
+	p[sizeof(c->u)-10] = (unsigned char)(c->Nh>>8);
+	p[sizeof(c->u)-11] = (unsigned char)(c->Nh>>16);
+	p[sizeof(c->u)-12] = (unsigned char)(c->Nh>>24);
+	p[sizeof(c->u)-13] = (unsigned char)(c->Nh>>32);
+	p[sizeof(c->u)-14] = (unsigned char)(c->Nh>>40);
+	p[sizeof(c->u)-15] = (unsigned char)(c->Nh>>48);
+	p[sizeof(c->u)-16] = (unsigned char)(c->Nh>>56);
+#endif
+
+	sha512_block_data_order (c,p,1);
+
+	if (md==0) return 0;
+
+	switch (c->md_len)
+		{
+		/* Let compiler decide if it's appropriate to unroll... */
+		case SHA384_DIGEST_LENGTH:
+			for (n=0;n<SHA384_DIGEST_LENGTH/8;n++)
+				{
+				SHA_LONG64 t = c->h[n];
+
+				*(md++)	= (unsigned char)(t>>56);
+				*(md++)	= (unsigned char)(t>>48);
+				*(md++)	= (unsigned char)(t>>40);
+				*(md++)	= (unsigned char)(t>>32);
+				*(md++)	= (unsigned char)(t>>24);
+				*(md++)	= (unsigned char)(t>>16);
+				*(md++)	= (unsigned char)(t>>8);
+				*(md++)	= (unsigned char)(t);
+				}
+			break;
+		case SHA512_DIGEST_LENGTH:
+			for (n=0;n<SHA512_DIGEST_LENGTH/8;n++)
+				{
+				SHA_LONG64 t = c->h[n];
+
+				*(md++)	= (unsigned char)(t>>56);
+				*(md++)	= (unsigned char)(t>>48);
+				*(md++)	= (unsigned char)(t>>40);
+				*(md++)	= (unsigned char)(t>>32);
+				*(md++)	= (unsigned char)(t>>24);
+				*(md++)	= (unsigned char)(t>>16);
+				*(md++)	= (unsigned char)(t>>8);
+				*(md++)	= (unsigned char)(t);
+				}
+			break;
+		/* ... as well as make sure md_len is not abused. */
+		default:	return 0;
+		}
+
+	return 1;
+	}
+
+int SHA384_Final (unsigned char *md,SHA512_CTX *c)
+{   return SHA512_Final (md,c);   }
+
+int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
+	{
+	SHA_LONG64	l;
+	unsigned char  *p=c->u.p;
+	const unsigned char *data=(const unsigned char *)_data;
+
+	if (len==0) return  1;
+
+	l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff);
+	if (l < c->Nl)		c->Nh++;
+	if (sizeof(len)>=8)	c->Nh+=(((SHA_LONG64)len)>>61);
+	c->Nl=l;
+
+	if (c->num != 0)
+		{
+		size_t n = sizeof(c->u) - c->num;
+
+		if (len < n)
+			{
+			memcpy (p+c->num,data,len), c->num += (unsigned int)len;
+			return 1;
+			}
+		else	{
+			memcpy (p+c->num,data,n), c->num = 0;
+			len-=n, data+=n;
+			sha512_block_data_order (c,p,1);
+			}
+		}
+
+	if (len >= sizeof(c->u))
+		{
+#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
+		if ((size_t)data%sizeof(c->u.d[0]) != 0)
+			while (len >= sizeof(c->u))
+				memcpy (p,data,sizeof(c->u)),
+				sha512_block_data_order (c,p,1),
+				len  -= sizeof(c->u),
+				data += sizeof(c->u);
+		else
+#endif
+			sha512_block_data_order (c,data,len/sizeof(c->u)),
+			data += len,
+			len  %= sizeof(c->u),
+			data -= len;
+		}
+
+	if (len != 0)	memcpy (p,data,len), c->num = (int)len;
+
+	return 1;
+	}
+
+int SHA384_Update (SHA512_CTX *c, const void *data, size_t len)
+{   return SHA512_Update (c,data,len);   }
+
+void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
+	{
+#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
+	if ((size_t)data%sizeof(c->u.d[0]) != 0)
+		memcpy(c->u.p,data,sizeof(c->u.p)),
+		data = c->u.p;
+#endif
+	sha512_block_data_order (c,data,1);
+	}
+
+unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
+	{
+	SHA512_CTX c;
+	static unsigned char m[SHA384_DIGEST_LENGTH];
+
+	if (md == NULL) md=m;
+	SHA384_Init(&c);
+	SHA512_Update(&c,d,n);
+	SHA512_Final(md,&c);
+	explicit_bzero(&c,sizeof(c));
+	return(md);
+	}
+
+unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
+	{
+	SHA512_CTX c;
+	static unsigned char m[SHA512_DIGEST_LENGTH];
+
+	if (md == NULL) md=m;
+	SHA512_Init(&c);
+	SHA512_Update(&c,d,n);
+	SHA512_Final(md,&c);
+	explicit_bzero(&c,sizeof(c));
+	return(md);
+	}
+
+#ifndef SHA512_ASM
+static const SHA_LONG64 K512[80] = {
+        U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd),
+        U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc),
+        U64(0x3956c25bf348b538),U64(0x59f111f1b605d019),
+        U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118),
+        U64(0xd807aa98a3030242),U64(0x12835b0145706fbe),
+        U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2),
+        U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1),
+        U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694),
+        U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3),
+        U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65),
+        U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483),
+        U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5),
+        U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210),
+        U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4),
+        U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725),
+        U64(0x06ca6351e003826f),U64(0x142929670a0e6e70),
+        U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926),
+        U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df),
+        U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8),
+        U64(0x81c2c92e47edaee6),U64(0x92722c851482353b),
+        U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001),
+        U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30),
+        U64(0xd192e819d6ef5218),U64(0xd69906245565a910),
+        U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8),
+        U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53),
+        U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8),
+        U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb),
+        U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3),
+        U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60),
+        U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec),
+        U64(0x90befffa23631e28),U64(0xa4506cebde82bde9),
+        U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b),
+        U64(0xca273eceea26619c),U64(0xd186b8c721c0c207),
+        U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178),
+        U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6),
+        U64(0x113f9804bef90dae),U64(0x1b710b35131c471b),
+        U64(0x28db77f523047d84),U64(0x32caab7b40c72493),
+        U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c),
+        U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a),
+        U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) };
+
+#if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
+# if defined(__x86_64) || defined(__x86_64__)
+#  define ROTR(a,n)	({ SHA_LONG64 ret;		\
+				asm ("rorq %1,%0"	\
+				: "=r"(ret)		\
+				: "J"(n),"0"(a)		\
+				: "cc"); ret;		})
+#   define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));	\
+				asm ("bswapq	%0"		\
+				: "=r"(ret)			\
+				: "0"(ret)); ret;		})
+# elif (defined(__i386) || defined(__i386__))
+#   define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
+			 unsigned int hi=p[0],lo=p[1];		\
+				asm ("bswapl %0; bswapl %1;"	\
+				: "=r"(lo),"=r"(hi)		\
+				: "0"(lo),"1"(hi));		\
+				((SHA_LONG64)hi)<<32|lo;	})
+# elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
+#  define ROTR(a,n)	({ SHA_LONG64 ret;		\
+				asm ("rotrdi %0,%1,%2"	\
+				: "=r"(ret)		\
+				: "r"(a),"K"(n)); ret;	})
+# endif
+#endif
+
+#ifndef PULL64
+#define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
+#define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
+#endif
+
+#ifndef ROTR
+#define ROTR(x,s)	(((x)>>s) | (x)<<(64-s))
+#endif
+
+#define Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
+#define Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
+#define sigma0(x)	(ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
+#define sigma1(x)	(ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
+
+#define Ch(x,y,z)	(((x) & (y)) ^ ((~(x)) & (z)))
+#define Maj(x,y,z)	(((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
+
+
+#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
+/*
+ * This code should give better results on 32-bit CPU with less than
+ * ~24 registers, both size and performance wise...
+ */
+static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
+	{
+	const SHA_LONG64 *W=in;
+	SHA_LONG64	A,E,T;
+	SHA_LONG64	X[9+80],*F;
+	int i;
+
+			while (num--) {
+
+	F    = X+80;
+	A    = ctx->h[0];	F[1] = ctx->h[1];
+	F[2] = ctx->h[2];	F[3] = ctx->h[3];
+	E    = ctx->h[4];	F[5] = ctx->h[5];
+	F[6] = ctx->h[6];	F[7] = ctx->h[7];
+
+	for (i=0;i<16;i++,F--)
+		{
+		T = PULL64(W[i]);
+		F[0] = A;
+		F[4] = E;
+		F[8] = T;
+		T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
+		E    = F[3] + T;
+		A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
+		}
+
+	for (;i<80;i++,F--)
+		{
+		T    = sigma0(F[8+16-1]);
+		T   += sigma1(F[8+16-14]);
+		T   += F[8+16] + F[8+16-9];
+
+		F[0] = A;
+		F[4] = E;
+		F[8] = T;
+		T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
+		E    = F[3] + T;
+		A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
+		}
+
+	ctx->h[0] += A;		ctx->h[1] += F[1];
+	ctx->h[2] += F[2];	ctx->h[3] += F[3];
+	ctx->h[4] += E;		ctx->h[5] += F[5];
+	ctx->h[6] += F[6];	ctx->h[7] += F[7];
+
+			W+=SHA_LBLOCK;
+			}
+	}
+
+#elif defined(OPENSSL_SMALL_FOOTPRINT)
+
+static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
+	{
+	const SHA_LONG64 *W=in;
+	SHA_LONG64	a,b,c,d,e,f,g,h,s0,s1,T1,T2;
+	SHA_LONG64	X[16];
+	int i;
+
+			while (num--) {
+
+	a = ctx->h[0];	b = ctx->h[1];	c = ctx->h[2];	d = ctx->h[3];
+	e = ctx->h[4];	f = ctx->h[5];	g = ctx->h[6];	h = ctx->h[7];
+
+	for (i=0;i<16;i++)
+		{
+#if BYTE_ORDER == BIG_ENDIAN
+		T1 = X[i] = W[i];
+#else
+		T1 = X[i] = PULL64(W[i]);
+#endif
+		T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
+		T2 = Sigma0(a) + Maj(a,b,c);
+		h = g;	g = f;	f = e;	e = d + T1;
+		d = c;	c = b;	b = a;	a = T1 + T2;
+		}
+
+	for (;i<80;i++)
+		{
+		s0 = X[(i+1)&0x0f];	s0 = sigma0(s0);
+		s1 = X[(i+14)&0x0f];	s1 = sigma1(s1);
+
+		T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
+		T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
+		T2 = Sigma0(a) + Maj(a,b,c);
+		h = g;	g = f;	f = e;	e = d + T1;
+		d = c;	c = b;	b = a;	a = T1 + T2;
+		}
+
+	ctx->h[0] += a;	ctx->h[1] += b;	ctx->h[2] += c;	ctx->h[3] += d;
+	ctx->h[4] += e;	ctx->h[5] += f;	ctx->h[6] += g;	ctx->h[7] += h;
+
+			W+=SHA_LBLOCK;
+			}
+	}
+
+#else
+
+#define	ROUND_00_15(i,a,b,c,d,e,f,g,h)		do {	\
+	T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];	\
+	h = Sigma0(a) + Maj(a,b,c);			\
+	d += T1;	h += T1;		} while (0)
+
+#define	ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)	do {	\
+	s0 = X[(j+1)&0x0f];	s0 = sigma0(s0);	\
+	s1 = X[(j+14)&0x0f];	s1 = sigma1(s1);	\
+	T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];	\
+	ROUND_00_15(i+j,a,b,c,d,e,f,g,h);		} while (0)
+
+static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
+	{
+	const SHA_LONG64 *W=in;
+	SHA_LONG64	a,b,c,d,e,f,g,h,s0,s1,T1;
+	SHA_LONG64	X[16];
+	int i;
+
+			while (num--) {
+
+	a = ctx->h[0];	b = ctx->h[1];	c = ctx->h[2];	d = ctx->h[3];
+	e = ctx->h[4];	f = ctx->h[5];	g = ctx->h[6];	h = ctx->h[7];
+
+#if BYTE_ORDER == BIG_ENDIAN
+	T1 = X[0] = W[0];	ROUND_00_15(0,a,b,c,d,e,f,g,h);
+	T1 = X[1] = W[1];	ROUND_00_15(1,h,a,b,c,d,e,f,g);
+	T1 = X[2] = W[2];	ROUND_00_15(2,g,h,a,b,c,d,e,f);
+	T1 = X[3] = W[3];	ROUND_00_15(3,f,g,h,a,b,c,d,e);
+	T1 = X[4] = W[4];	ROUND_00_15(4,e,f,g,h,a,b,c,d);
+	T1 = X[5] = W[5];	ROUND_00_15(5,d,e,f,g,h,a,b,c);
+	T1 = X[6] = W[6];	ROUND_00_15(6,c,d,e,f,g,h,a,b);
+	T1 = X[7] = W[7];	ROUND_00_15(7,b,c,d,e,f,g,h,a);
+	T1 = X[8] = W[8];	ROUND_00_15(8,a,b,c,d,e,f,g,h);
+	T1 = X[9] = W[9];	ROUND_00_15(9,h,a,b,c,d,e,f,g);
+	T1 = X[10] = W[10];	ROUND_00_15(10,g,h,a,b,c,d,e,f);
+	T1 = X[11] = W[11];	ROUND_00_15(11,f,g,h,a,b,c,d,e);
+	T1 = X[12] = W[12];	ROUND_00_15(12,e,f,g,h,a,b,c,d);
+	T1 = X[13] = W[13];	ROUND_00_15(13,d,e,f,g,h,a,b,c);
+	T1 = X[14] = W[14];	ROUND_00_15(14,c,d,e,f,g,h,a,b);
+	T1 = X[15] = W[15];	ROUND_00_15(15,b,c,d,e,f,g,h,a);
+#else
+	T1 = X[0]  = PULL64(W[0]);	ROUND_00_15(0,a,b,c,d,e,f,g,h);
+	T1 = X[1]  = PULL64(W[1]);	ROUND_00_15(1,h,a,b,c,d,e,f,g);
+	T1 = X[2]  = PULL64(W[2]);	ROUND_00_15(2,g,h,a,b,c,d,e,f);
+	T1 = X[3]  = PULL64(W[3]);	ROUND_00_15(3,f,g,h,a,b,c,d,e);
+	T1 = X[4]  = PULL64(W[4]);	ROUND_00_15(4,e,f,g,h,a,b,c,d);
+	T1 = X[5]  = PULL64(W[5]);	ROUND_00_15(5,d,e,f,g,h,a,b,c);
+	T1 = X[6]  = PULL64(W[6]);	ROUND_00_15(6,c,d,e,f,g,h,a,b);
+	T1 = X[7]  = PULL64(W[7]);	ROUND_00_15(7,b,c,d,e,f,g,h,a);
+	T1 = X[8]  = PULL64(W[8]);	ROUND_00_15(8,a,b,c,d,e,f,g,h);
+	T1 = X[9]  = PULL64(W[9]);	ROUND_00_15(9,h,a,b,c,d,e,f,g);
+	T1 = X[10] = PULL64(W[10]);	ROUND_00_15(10,g,h,a,b,c,d,e,f);
+	T1 = X[11] = PULL64(W[11]);	ROUND_00_15(11,f,g,h,a,b,c,d,e);
+	T1 = X[12] = PULL64(W[12]);	ROUND_00_15(12,e,f,g,h,a,b,c,d);
+	T1 = X[13] = PULL64(W[13]);	ROUND_00_15(13,d,e,f,g,h,a,b,c);
+	T1 = X[14] = PULL64(W[14]);	ROUND_00_15(14,c,d,e,f,g,h,a,b);
+	T1 = X[15] = PULL64(W[15]);	ROUND_00_15(15,b,c,d,e,f,g,h,a);
+#endif
+
+	for (i=16;i<80;i+=16)
+		{
+		ROUND_16_80(i, 0,a,b,c,d,e,f,g,h,X);
+		ROUND_16_80(i, 1,h,a,b,c,d,e,f,g,X);
+		ROUND_16_80(i, 2,g,h,a,b,c,d,e,f,X);
+		ROUND_16_80(i, 3,f,g,h,a,b,c,d,e,X);
+		ROUND_16_80(i, 4,e,f,g,h,a,b,c,d,X);
+		ROUND_16_80(i, 5,d,e,f,g,h,a,b,c,X);
+		ROUND_16_80(i, 6,c,d,e,f,g,h,a,b,X);
+		ROUND_16_80(i, 7,b,c,d,e,f,g,h,a,X);
+		ROUND_16_80(i, 8,a,b,c,d,e,f,g,h,X);
+		ROUND_16_80(i, 9,h,a,b,c,d,e,f,g,X);
+		ROUND_16_80(i,10,g,h,a,b,c,d,e,f,X);
+		ROUND_16_80(i,11,f,g,h,a,b,c,d,e,X);
+		ROUND_16_80(i,12,e,f,g,h,a,b,c,d,X);
+		ROUND_16_80(i,13,d,e,f,g,h,a,b,c,X);
+		ROUND_16_80(i,14,c,d,e,f,g,h,a,b,X);
+		ROUND_16_80(i,15,b,c,d,e,f,g,h,a,X);
+		}
+
+	ctx->h[0] += a;	ctx->h[1] += b;	ctx->h[2] += c;	ctx->h[3] += d;
+	ctx->h[4] += e;	ctx->h[5] += f;	ctx->h[6] += g;	ctx->h[7] += h;
+
+			W+=SHA_LBLOCK;
+			}
+	}
+
+#endif
+
+#endif /* SHA512_ASM */
+
+#endif /* !OPENSSL_NO_SHA512 */
diff --git a/ext/libressl/crypto/sha/sha_locl.h b/ext/libressl/crypto/sha/sha_locl.h
new file mode 100644
index 0000000..46c9a39
--- /dev/null
+++ b/ext/libressl/crypto/sha/sha_locl.h
@@ -0,0 +1,419 @@
+/* $OpenBSD: sha_locl.h,v 1.23 2016/12/23 23:22:25 patrick Exp $ */
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ * 
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ * 
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from 
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ * 
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * 
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <openssl/opensslconf.h>
+#include <openssl/sha.h>
+
+#define DATA_ORDER_IS_BIG_ENDIAN
+
+#define HASH_LONG               SHA_LONG
+#define HASH_CTX                SHA_CTX
+#define HASH_CBLOCK             SHA_CBLOCK
+#define HASH_MAKE_STRING(c,s)   do {	\
+	unsigned long ll;		\
+	ll=(c)->h0; HOST_l2c(ll,(s));	\
+	ll=(c)->h1; HOST_l2c(ll,(s));	\
+	ll=(c)->h2; HOST_l2c(ll,(s));	\
+	ll=(c)->h3; HOST_l2c(ll,(s));	\
+	ll=(c)->h4; HOST_l2c(ll,(s));	\
+	} while (0)
+
+# define HASH_UPDATE             	SHA1_Update
+# define HASH_TRANSFORM          	SHA1_Transform
+# define HASH_FINAL              	SHA1_Final
+# define HASH_INIT			SHA1_Init
+# define HASH_BLOCK_DATA_ORDER   	sha1_block_data_order
+# define Xupdate(a,ix,ia,ib,ic,id)	( (a)=(ia^ib^ic^id),	\
+					  ix=(a)=ROTATE((a),1)	\
+					)
+
+__BEGIN_HIDDEN_DECLS
+
+#ifndef SHA1_ASM
+static
+#endif
+
+void sha1_block_data_order (SHA_CTX *c, const void *p,size_t num);
+
+__END_HIDDEN_DECLS
+
+#include "md32_common.h"
+
+#define INIT_DATA_h0 0x67452301UL
+#define INIT_DATA_h1 0xefcdab89UL
+#define INIT_DATA_h2 0x98badcfeUL
+#define INIT_DATA_h3 0x10325476UL
+#define INIT_DATA_h4 0xc3d2e1f0UL
+
+int SHA1_Init(SHA_CTX *c)
+	{
+	memset (c,0,sizeof(*c));
+	c->h0=INIT_DATA_h0;
+	c->h1=INIT_DATA_h1;
+	c->h2=INIT_DATA_h2;
+	c->h3=INIT_DATA_h3;
+	c->h4=INIT_DATA_h4;
+	return 1;
+	}
+
+#define K_00_19	0x5a827999UL
+#define K_20_39 0x6ed9eba1UL
+#define K_40_59 0x8f1bbcdcUL
+#define K_60_79 0xca62c1d6UL
+
+/* As  pointed out by Wei Dai <weidai@eskimo.com>, F() below can be
+ * simplified to the code in F_00_19.  Wei attributes these optimisations
+ * to Peter Gutmann's SHS code, and he attributes it to Rich Schroeppel.
+ * #define F(x,y,z) (((x) & (y))  |  ((~(x)) & (z)))
+ * I've just become aware of another tweak to be made, again from Wei Dai,
+ * in F_40_59, (x&a)|(y&a) -> (x|y)&a
+ */
+#define	F_00_19(b,c,d)	((((c) ^ (d)) & (b)) ^ (d)) 
+#define	F_20_39(b,c,d)	((b) ^ (c) ^ (d))
+#define F_40_59(b,c,d)	(((b) & (c)) | (((b)|(c)) & (d))) 
+#define	F_60_79(b,c,d)	F_20_39(b,c,d)
+
+#ifndef OPENSSL_SMALL_FOOTPRINT
+
+#define BODY_00_15(i,a,b,c,d,e,f,xi) \
+	(f)=xi+(e)+K_00_19+ROTATE((a),5)+F_00_19((b),(c),(d)); \
+	(b)=ROTATE((b),30);
+
+#define BODY_16_19(i,a,b,c,d,e,f,xi,xa,xb,xc,xd) \
+	Xupdate(f,xi,xa,xb,xc,xd); \
+	(f)+=(e)+K_00_19+ROTATE((a),5)+F_00_19((b),(c),(d)); \
+	(b)=ROTATE((b),30);
+
+#define BODY_20_31(i,a,b,c,d,e,f,xi,xa,xb,xc,xd) \
+	Xupdate(f,xi,xa,xb,xc,xd); \
+	(f)+=(e)+K_20_39+ROTATE((a),5)+F_20_39((b),(c),(d)); \
+	(b)=ROTATE((b),30);
+
+#define BODY_32_39(i,a,b,c,d,e,f,xa,xb,xc,xd) \
+	Xupdate(f,xa,xa,xb,xc,xd); \
+	(f)+=(e)+K_20_39+ROTATE((a),5)+F_20_39((b),(c),(d)); \
+	(b)=ROTATE((b),30);
+
+#define BODY_40_59(i,a,b,c,d,e,f,xa,xb,xc,xd) \
+	Xupdate(f,xa,xa,xb,xc,xd); \
+	(f)+=(e)+K_40_59+ROTATE((a),5)+F_40_59((b),(c),(d)); \
+	(b)=ROTATE((b),30);
+
+#define BODY_60_79(i,a,b,c,d,e,f,xa,xb,xc,xd) \
+	Xupdate(f,xa,xa,xb,xc,xd); \
+	(f)=xa+(e)+K_60_79+ROTATE((a),5)+F_60_79((b),(c),(d)); \
+	(b)=ROTATE((b),30);
+
+#ifdef X
+#undef X
+#endif
+#ifndef MD32_XARRAY
+  /*
+   * Originally X was an array. As it's automatic it's natural
+   * to expect RISC compiler to accommodate at least part of it in
+   * the register bank, isn't it? Unfortunately not all compilers
+   * "find" this expectation reasonable:-( On order to make such
+   * compilers generate better code I replace X[] with a bunch of
+   * X0, X1, etc. See the function body below...
+   *					<appro@fy.chalmers.se>
+   */
+# define X(i)	XX##i
+#else
+  /*
+   * However! Some compilers (most notably HP C) get overwhelmed by
+   * that many local variables so that we have to have the way to
+   * fall down to the original behavior.
+   */
+# define X(i)	XX[i]
+#endif
+
+#if !defined(SHA1_ASM)
+#include <machine/endian.h>
+static void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, size_t num)
+	{
+	const unsigned char *data=p;
+	unsigned MD32_REG_T A,B,C,D,E,T,l;
+#ifndef MD32_XARRAY
+	unsigned MD32_REG_T	XX0, XX1, XX2, XX3, XX4, XX5, XX6, XX7,
+				XX8, XX9,XX10,XX11,XX12,XX13,XX14,XX15;
+#else
+	SHA_LONG	XX[16];
+#endif
+
+	A=c->h0;
+	B=c->h1;
+	C=c->h2;
+	D=c->h3;
+	E=c->h4;
+
+	for (;;)
+			{
+
+	if (BYTE_ORDER != LITTLE_ENDIAN &&
+	    sizeof(SHA_LONG)==4 && ((size_t)p%4)==0)
+		{
+		const SHA_LONG *W=(const SHA_LONG *)data;
+
+		X( 0) = W[0];				X( 1) = W[ 1];
+		BODY_00_15( 0,A,B,C,D,E,T,X( 0));	X( 2) = W[ 2];
+		BODY_00_15( 1,T,A,B,C,D,E,X( 1));	X( 3) = W[ 3];
+		BODY_00_15( 2,E,T,A,B,C,D,X( 2));	X( 4) = W[ 4];
+		BODY_00_15( 3,D,E,T,A,B,C,X( 3));	X( 5) = W[ 5];
+		BODY_00_15( 4,C,D,E,T,A,B,X( 4));	X( 6) = W[ 6];
+		BODY_00_15( 5,B,C,D,E,T,A,X( 5));	X( 7) = W[ 7];
+		BODY_00_15( 6,A,B,C,D,E,T,X( 6));	X( 8) = W[ 8];
+		BODY_00_15( 7,T,A,B,C,D,E,X( 7));	X( 9) = W[ 9];
+		BODY_00_15( 8,E,T,A,B,C,D,X( 8));	X(10) = W[10];
+		BODY_00_15( 9,D,E,T,A,B,C,X( 9));	X(11) = W[11];
+		BODY_00_15(10,C,D,E,T,A,B,X(10));	X(12) = W[12];
+		BODY_00_15(11,B,C,D,E,T,A,X(11));	X(13) = W[13];
+		BODY_00_15(12,A,B,C,D,E,T,X(12));	X(14) = W[14];
+		BODY_00_15(13,T,A,B,C,D,E,X(13));	X(15) = W[15];
+		BODY_00_15(14,E,T,A,B,C,D,X(14));
+		BODY_00_15(15,D,E,T,A,B,C,X(15));
+
+		data += SHA_CBLOCK;
+		}
+	else
+		{
+		HOST_c2l(data,l); X( 0)=l;		HOST_c2l(data,l); X( 1)=l;
+		BODY_00_15( 0,A,B,C,D,E,T,X( 0));	HOST_c2l(data,l); X( 2)=l;
+		BODY_00_15( 1,T,A,B,C,D,E,X( 1));	HOST_c2l(data,l); X( 3)=l;
+		BODY_00_15( 2,E,T,A,B,C,D,X( 2));	HOST_c2l(data,l); X( 4)=l;
+		BODY_00_15( 3,D,E,T,A,B,C,X( 3));	HOST_c2l(data,l); X( 5)=l;
+		BODY_00_15( 4,C,D,E,T,A,B,X( 4));	HOST_c2l(data,l); X( 6)=l;
+		BODY_00_15( 5,B,C,D,E,T,A,X( 5));	HOST_c2l(data,l); X( 7)=l;
+		BODY_00_15( 6,A,B,C,D,E,T,X( 6));	HOST_c2l(data,l); X( 8)=l;
+		BODY_00_15( 7,T,A,B,C,D,E,X( 7));	HOST_c2l(data,l); X( 9)=l;
+		BODY_00_15( 8,E,T,A,B,C,D,X( 8));	HOST_c2l(data,l); X(10)=l;
+		BODY_00_15( 9,D,E,T,A,B,C,X( 9));	HOST_c2l(data,l); X(11)=l;
+		BODY_00_15(10,C,D,E,T,A,B,X(10));	HOST_c2l(data,l); X(12)=l;
+		BODY_00_15(11,B,C,D,E,T,A,X(11));	HOST_c2l(data,l); X(13)=l;
+		BODY_00_15(12,A,B,C,D,E,T,X(12));	HOST_c2l(data,l); X(14)=l;
+		BODY_00_15(13,T,A,B,C,D,E,X(13));	HOST_c2l(data,l); X(15)=l;
+		BODY_00_15(14,E,T,A,B,C,D,X(14));
+		BODY_00_15(15,D,E,T,A,B,C,X(15));
+		}
+
+	BODY_16_19(16,C,D,E,T,A,B,X( 0),X( 0),X( 2),X( 8),X(13));
+	BODY_16_19(17,B,C,D,E,T,A,X( 1),X( 1),X( 3),X( 9),X(14));
+	BODY_16_19(18,A,B,C,D,E,T,X( 2),X( 2),X( 4),X(10),X(15));
+	BODY_16_19(19,T,A,B,C,D,E,X( 3),X( 3),X( 5),X(11),X( 0));
+
+	BODY_20_31(20,E,T,A,B,C,D,X( 4),X( 4),X( 6),X(12),X( 1));
+	BODY_20_31(21,D,E,T,A,B,C,X( 5),X( 5),X( 7),X(13),X( 2));
+	BODY_20_31(22,C,D,E,T,A,B,X( 6),X( 6),X( 8),X(14),X( 3));
+	BODY_20_31(23,B,C,D,E,T,A,X( 7),X( 7),X( 9),X(15),X( 4));
+	BODY_20_31(24,A,B,C,D,E,T,X( 8),X( 8),X(10),X( 0),X( 5));
+	BODY_20_31(25,T,A,B,C,D,E,X( 9),X( 9),X(11),X( 1),X( 6));
+	BODY_20_31(26,E,T,A,B,C,D,X(10),X(10),X(12),X( 2),X( 7));
+	BODY_20_31(27,D,E,T,A,B,C,X(11),X(11),X(13),X( 3),X( 8));
+	BODY_20_31(28,C,D,E,T,A,B,X(12),X(12),X(14),X( 4),X( 9));
+	BODY_20_31(29,B,C,D,E,T,A,X(13),X(13),X(15),X( 5),X(10));
+	BODY_20_31(30,A,B,C,D,E,T,X(14),X(14),X( 0),X( 6),X(11));
+	BODY_20_31(31,T,A,B,C,D,E,X(15),X(15),X( 1),X( 7),X(12));
+
+	BODY_32_39(32,E,T,A,B,C,D,X( 0),X( 2),X( 8),X(13));
+	BODY_32_39(33,D,E,T,A,B,C,X( 1),X( 3),X( 9),X(14));
+	BODY_32_39(34,C,D,E,T,A,B,X( 2),X( 4),X(10),X(15));
+	BODY_32_39(35,B,C,D,E,T,A,X( 3),X( 5),X(11),X( 0));
+	BODY_32_39(36,A,B,C,D,E,T,X( 4),X( 6),X(12),X( 1));
+	BODY_32_39(37,T,A,B,C,D,E,X( 5),X( 7),X(13),X( 2));
+	BODY_32_39(38,E,T,A,B,C,D,X( 6),X( 8),X(14),X( 3));
+	BODY_32_39(39,D,E,T,A,B,C,X( 7),X( 9),X(15),X( 4));
+
+	BODY_40_59(40,C,D,E,T,A,B,X( 8),X(10),X( 0),X( 5));
+	BODY_40_59(41,B,C,D,E,T,A,X( 9),X(11),X( 1),X( 6));
+	BODY_40_59(42,A,B,C,D,E,T,X(10),X(12),X( 2),X( 7));
+	BODY_40_59(43,T,A,B,C,D,E,X(11),X(13),X( 3),X( 8));
+	BODY_40_59(44,E,T,A,B,C,D,X(12),X(14),X( 4),X( 9));
+	BODY_40_59(45,D,E,T,A,B,C,X(13),X(15),X( 5),X(10));
+	BODY_40_59(46,C,D,E,T,A,B,X(14),X( 0),X( 6),X(11));
+	BODY_40_59(47,B,C,D,E,T,A,X(15),X( 1),X( 7),X(12));
+	BODY_40_59(48,A,B,C,D,E,T,X( 0),X( 2),X( 8),X(13));
+	BODY_40_59(49,T,A,B,C,D,E,X( 1),X( 3),X( 9),X(14));
+	BODY_40_59(50,E,T,A,B,C,D,X( 2),X( 4),X(10),X(15));
+	BODY_40_59(51,D,E,T,A,B,C,X( 3),X( 5),X(11),X( 0));
+	BODY_40_59(52,C,D,E,T,A,B,X( 4),X( 6),X(12),X( 1));
+	BODY_40_59(53,B,C,D,E,T,A,X( 5),X( 7),X(13),X( 2));
+	BODY_40_59(54,A,B,C,D,E,T,X( 6),X( 8),X(14),X( 3));
+	BODY_40_59(55,T,A,B,C,D,E,X( 7),X( 9),X(15),X( 4));
+	BODY_40_59(56,E,T,A,B,C,D,X( 8),X(10),X( 0),X( 5));
+	BODY_40_59(57,D,E,T,A,B,C,X( 9),X(11),X( 1),X( 6));
+	BODY_40_59(58,C,D,E,T,A,B,X(10),X(12),X( 2),X( 7));
+	BODY_40_59(59,B,C,D,E,T,A,X(11),X(13),X( 3),X( 8));
+
+	BODY_60_79(60,A,B,C,D,E,T,X(12),X(14),X( 4),X( 9));
+	BODY_60_79(61,T,A,B,C,D,E,X(13),X(15),X( 5),X(10));
+	BODY_60_79(62,E,T,A,B,C,D,X(14),X( 0),X( 6),X(11));
+	BODY_60_79(63,D,E,T,A,B,C,X(15),X( 1),X( 7),X(12));
+	BODY_60_79(64,C,D,E,T,A,B,X( 0),X( 2),X( 8),X(13));
+	BODY_60_79(65,B,C,D,E,T,A,X( 1),X( 3),X( 9),X(14));
+	BODY_60_79(66,A,B,C,D,E,T,X( 2),X( 4),X(10),X(15));
+	BODY_60_79(67,T,A,B,C,D,E,X( 3),X( 5),X(11),X( 0));
+	BODY_60_79(68,E,T,A,B,C,D,X( 4),X( 6),X(12),X( 1));
+	BODY_60_79(69,D,E,T,A,B,C,X( 5),X( 7),X(13),X( 2));
+	BODY_60_79(70,C,D,E,T,A,B,X( 6),X( 8),X(14),X( 3));
+	BODY_60_79(71,B,C,D,E,T,A,X( 7),X( 9),X(15),X( 4));
+	BODY_60_79(72,A,B,C,D,E,T,X( 8),X(10),X( 0),X( 5));
+	BODY_60_79(73,T,A,B,C,D,E,X( 9),X(11),X( 1),X( 6));
+	BODY_60_79(74,E,T,A,B,C,D,X(10),X(12),X( 2),X( 7));
+	BODY_60_79(75,D,E,T,A,B,C,X(11),X(13),X( 3),X( 8));
+	BODY_60_79(76,C,D,E,T,A,B,X(12),X(14),X( 4),X( 9));
+	BODY_60_79(77,B,C,D,E,T,A,X(13),X(15),X( 5),X(10));
+	BODY_60_79(78,A,B,C,D,E,T,X(14),X( 0),X( 6),X(11));
+	BODY_60_79(79,T,A,B,C,D,E,X(15),X( 1),X( 7),X(12));
+	
+	c->h0=(c->h0+E)&0xffffffffL; 
+	c->h1=(c->h1+T)&0xffffffffL;
+	c->h2=(c->h2+A)&0xffffffffL;
+	c->h3=(c->h3+B)&0xffffffffL;
+	c->h4=(c->h4+C)&0xffffffffL;
+
+	if (--num == 0) break;
+
+	A=c->h0;
+	B=c->h1;
+	C=c->h2;
+	D=c->h3;
+	E=c->h4;
+
+			}
+	}
+#endif
+
+#else	/* OPENSSL_SMALL_FOOTPRINT */
+
+#define BODY_00_15(xi)		 do {	\
+	T=E+K_00_19+F_00_19(B,C,D);	\
+	E=D, D=C, C=ROTATE(B,30), B=A;	\
+	A=ROTATE(A,5)+T+xi;	    } while(0)
+
+#define BODY_16_19(xa,xb,xc,xd)	 do {	\
+	Xupdate(T,xa,xa,xb,xc,xd);	\
+	T+=E+K_00_19+F_00_19(B,C,D);	\
+	E=D, D=C, C=ROTATE(B,30), B=A;	\
+	A=ROTATE(A,5)+T;	    } while(0)
+
+#define BODY_20_39(xa,xb,xc,xd)	 do {	\
+	Xupdate(T,xa,xa,xb,xc,xd);	\
+	T+=E+K_20_39+F_20_39(B,C,D);	\
+	E=D, D=C, C=ROTATE(B,30), B=A;	\
+	A=ROTATE(A,5)+T;	    } while(0)
+
+#define BODY_40_59(xa,xb,xc,xd)	 do {	\
+	Xupdate(T,xa,xa,xb,xc,xd);	\
+	T+=E+K_40_59+F_40_59(B,C,D);	\
+	E=D, D=C, C=ROTATE(B,30), B=A;	\
+	A=ROTATE(A,5)+T;	    } while(0)
+
+#define BODY_60_79(xa,xb,xc,xd)	 do {	\
+	Xupdate(T,xa,xa,xb,xc,xd);	\
+	T=E+K_60_79+F_60_79(B,C,D);	\
+	E=D, D=C, C=ROTATE(B,30), B=A;	\
+	A=ROTATE(A,5)+T+xa;	    } while(0)
+
+#if !defined(SHA1_ASM)
+static void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, size_t num)
+	{
+	const unsigned char *data=p;
+	unsigned MD32_REG_T A,B,C,D,E,T,l;
+	int i;
+	SHA_LONG	X[16];
+
+	A=c->h0;
+	B=c->h1;
+	C=c->h2;
+	D=c->h3;
+	E=c->h4;
+
+	for (;;)
+		{
+	for (i=0;i<16;i++)
+	{ HOST_c2l(data,l); X[i]=l; BODY_00_15(X[i]); }
+	for (i=0;i<4;i++)
+	{ BODY_16_19(X[i],       X[i+2],      X[i+8],     X[(i+13)&15]); }
+	for (;i<24;i++)
+	{ BODY_20_39(X[i&15],    X[(i+2)&15], X[(i+8)&15],X[(i+13)&15]); }
+	for (i=0;i<20;i++)
+	{ BODY_40_59(X[(i+8)&15],X[(i+10)&15],X[i&15],    X[(i+5)&15]);  }
+	for (i=4;i<24;i++)
+	{ BODY_60_79(X[(i+8)&15],X[(i+10)&15],X[i&15],    X[(i+5)&15]);  }
+
+	c->h0=(c->h0+A)&0xffffffffL; 
+	c->h1=(c->h1+B)&0xffffffffL;
+	c->h2=(c->h2+C)&0xffffffffL;
+	c->h3=(c->h3+D)&0xffffffffL;
+	c->h4=(c->h4+E)&0xffffffffL;
+
+	if (--num == 0) break;
+
+	A=c->h0;
+	B=c->h1;
+	C=c->h2;
+	D=c->h3;
+	E=c->h4;
+
+		}
+	}
+#endif
+
+#endif
-- 
cgit v1.2.3