diff options
author | polwex <polwex@sortug.com> | 2025-10-05 21:56:51 +0700 |
---|---|---|
committer | polwex <polwex@sortug.com> | 2025-10-05 21:56:51 +0700 |
commit | fcedfddf00b3f994e4f4e40332ac7fc192c63244 (patch) | |
tree | 51d38e62c7bdfcc5f9a5e9435fe820c93cfc9a3d /vere/ext/openssl/gen/windows-x86_64/crypto/sha |
claude is gud
Diffstat (limited to 'vere/ext/openssl/gen/windows-x86_64/crypto/sha')
6 files changed, 33498 insertions, 0 deletions
diff --git a/vere/ext/openssl/gen/windows-x86_64/crypto/sha/keccak1600-x86_64.asm b/vere/ext/openssl/gen/windows-x86_64/crypto/sha/keccak1600-x86_64.asm new file mode 100644 index 0000000..fdab35d --- /dev/null +++ b/vere/ext/openssl/gen/windows-x86_64/crypto/sha/keccak1600-x86_64.asm @@ -0,0 +1,527 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + + + +ALIGN 32 +__KeccakF1600: + + mov rax,QWORD[60+rdi] + mov rbx,QWORD[68+rdi] + mov rcx,QWORD[76+rdi] + mov rdx,QWORD[84+rdi] + mov rbp,QWORD[92+rdi] + jmp NEAR $L$oop + +ALIGN 32 +$L$oop: + mov r8,QWORD[((-100))+rdi] + mov r9,QWORD[((-52))+rdi] + mov r10,QWORD[((-4))+rdi] + mov r11,QWORD[44+rdi] + + xor rcx,QWORD[((-84))+rdi] + xor rdx,QWORD[((-76))+rdi] + xor rax,r8 + xor rbx,QWORD[((-92))+rdi] + xor rcx,QWORD[((-44))+rdi] + xor rax,QWORD[((-60))+rdi] + mov r12,rbp + xor rbp,QWORD[((-68))+rdi] + + xor rcx,r10 + xor rax,QWORD[((-20))+rdi] + xor rdx,QWORD[((-36))+rdi] + xor rbx,r9 + xor rbp,QWORD[((-28))+rdi] + + xor rcx,QWORD[36+rdi] + xor rax,QWORD[20+rdi] + xor rdx,QWORD[4+rdi] + xor rbx,QWORD[((-12))+rdi] + xor rbp,QWORD[12+rdi] + + mov r13,rcx + rol rcx,1 + xor rcx,rax + xor rdx,r11 + + rol rax,1 + xor rax,rdx + xor rbx,QWORD[28+rdi] + + rol rdx,1 + xor rdx,rbx + xor rbp,QWORD[52+rdi] + + rol rbx,1 + xor rbx,rbp + + rol rbp,1 + xor rbp,r13 + xor r9,rcx + xor r10,rdx + rol r9,44 + xor r11,rbp + xor r12,rax + rol r10,43 + xor r8,rbx + mov r13,r9 + rol r11,21 + or r9,r10 + xor r9,r8 + rol r12,14 + + xor r9,QWORD[r15] + lea r15,[8+r15] + + mov r14,r12 + and r12,r11 + mov QWORD[((-100))+rsi],r9 + xor r12,r10 + not r10 + mov QWORD[((-84))+rsi],r12 + + or r10,r11 + mov r12,QWORD[76+rdi] + xor r10,r13 + mov QWORD[((-92))+rsi],r10 + + and r13,r8 + mov r9,QWORD[((-28))+rdi] + xor r13,r14 + mov r10,QWORD[((-20))+rdi] + mov QWORD[((-68))+rsi],r13 + + or r14,r8 + mov r8,QWORD[((-76))+rdi] + xor r14,r11 + mov r11,QWORD[28+rdi] + mov QWORD[((-76))+rsi],r14 + + + xor r8,rbp + xor r12,rdx + rol r8,28 + xor r11,rcx + xor r9,rax + rol r12,61 + rol r11,45 + xor r10,rbx + rol r9,20 + mov r13,r8 + or r8,r12 + rol r10,3 + + xor r8,r11 + mov QWORD[((-36))+rsi],r8 + + mov r14,r9 + and r9,r13 + mov r8,QWORD[((-92))+rdi] + xor r9,r12 + not r12 + mov QWORD[((-28))+rsi],r9 + + or r12,r11 + mov r9,QWORD[((-44))+rdi] + xor r12,r10 + mov QWORD[((-44))+rsi],r12 + + and r11,r10 + mov r12,QWORD[60+rdi] + xor r11,r14 + mov QWORD[((-52))+rsi],r11 + + or r14,r10 + mov r10,QWORD[4+rdi] + xor r14,r13 + mov r11,QWORD[52+rdi] + mov QWORD[((-60))+rsi],r14 + + + xor r10,rbp + xor r11,rax + rol r10,25 + xor r9,rdx + rol r11,8 + xor r12,rbx + rol r9,6 + xor r8,rcx + rol r12,18 + mov r13,r10 + and r10,r11 + rol r8,1 + + not r11 + xor r10,r9 + mov QWORD[((-12))+rsi],r10 + + mov r14,r12 + and r12,r11 + mov r10,QWORD[((-12))+rdi] + xor r12,r13 + mov QWORD[((-4))+rsi],r12 + + or r13,r9 + mov r12,QWORD[84+rdi] + xor r13,r8 + mov QWORD[((-20))+rsi],r13 + + and r9,r8 + xor r9,r14 + mov QWORD[12+rsi],r9 + + or r14,r8 + mov r9,QWORD[((-60))+rdi] + xor r14,r11 + mov r11,QWORD[36+rdi] + mov QWORD[4+rsi],r14 + + + mov r8,QWORD[((-68))+rdi] + + xor r10,rcx + xor r11,rdx + rol r10,10 + xor r9,rbx + rol r11,15 + xor r12,rbp + rol r9,36 + xor r8,rax + rol r12,56 + mov r13,r10 + or r10,r11 + rol r8,27 + + not r11 + xor r10,r9 + mov QWORD[28+rsi],r10 + + mov r14,r12 + or r12,r11 + xor r12,r13 + mov QWORD[36+rsi],r12 + + and r13,r9 + xor r13,r8 + mov QWORD[20+rsi],r13 + + or r9,r8 + xor r9,r14 + mov QWORD[52+rsi],r9 + + and r8,r14 + xor r8,r11 + mov QWORD[44+rsi],r8 + + + xor rdx,QWORD[((-84))+rdi] + xor rbp,QWORD[((-36))+rdi] + rol rdx,62 + xor rcx,QWORD[68+rdi] + rol rbp,55 + xor rax,QWORD[12+rdi] + rol rcx,2 + xor rbx,QWORD[20+rdi] + xchg rdi,rsi + rol rax,39 + rol rbx,41 + mov r13,rdx + and rdx,rbp + not rbp + xor rdx,rcx + mov QWORD[92+rdi],rdx + + mov r14,rax + and rax,rbp + xor rax,r13 + mov QWORD[60+rdi],rax + + or r13,rcx + xor r13,rbx + mov QWORD[84+rdi],r13 + + and rcx,rbx + xor rcx,r14 + mov QWORD[76+rdi],rcx + + or rbx,r14 + xor rbx,rbp + mov QWORD[68+rdi],rbx + + mov rbp,rdx + mov rdx,r13 + + test r15,255 + jnz NEAR $L$oop + + lea r15,[((-192))+r15] + DB 0F3h,0C3h ;repret + + + + +ALIGN 32 +KeccakF1600: + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + + lea rdi,[100+rdi] + sub rsp,200 + + + not QWORD[((-92))+rdi] + not QWORD[((-84))+rdi] + not QWORD[((-36))+rdi] + not QWORD[((-4))+rdi] + not QWORD[36+rdi] + not QWORD[60+rdi] + + lea r15,[iotas] + lea rsi,[100+rsp] + + call __KeccakF1600 + + not QWORD[((-92))+rdi] + not QWORD[((-84))+rdi] + not QWORD[((-36))+rdi] + not QWORD[((-4))+rdi] + not QWORD[36+rdi] + not QWORD[60+rdi] + lea rdi,[((-100))+rdi] + + add rsp,200 + + + pop r15 + + pop r14 + + pop r13 + + pop r12 + + pop rbp + + pop rbx + + DB 0F3h,0C3h ;repret + + +global SHA3_absorb + +ALIGN 32 +SHA3_absorb: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_SHA3_absorb: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + + + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + + lea rdi,[100+rdi] + sub rsp,232 + + + mov r9,rsi + lea rsi,[100+rsp] + + not QWORD[((-92))+rdi] + not QWORD[((-84))+rdi] + not QWORD[((-36))+rdi] + not QWORD[((-4))+rdi] + not QWORD[36+rdi] + not QWORD[60+rdi] + lea r15,[iotas] + + mov QWORD[((216-100))+rsi],rcx + +$L$oop_absorb: + cmp rdx,rcx + jc NEAR $L$done_absorb + + shr rcx,3 + lea r8,[((-100))+rdi] + +$L$block_absorb: + mov rax,QWORD[r9] + lea r9,[8+r9] + xor rax,QWORD[r8] + lea r8,[8+r8] + sub rdx,8 + mov QWORD[((-8))+r8],rax + sub rcx,1 + jnz NEAR $L$block_absorb + + mov QWORD[((200-100))+rsi],r9 + mov QWORD[((208-100))+rsi],rdx + call __KeccakF1600 + mov r9,QWORD[((200-100))+rsi] + mov rdx,QWORD[((208-100))+rsi] + mov rcx,QWORD[((216-100))+rsi] + jmp NEAR $L$oop_absorb + +ALIGN 32 +$L$done_absorb: + mov rax,rdx + + not QWORD[((-92))+rdi] + not QWORD[((-84))+rdi] + not QWORD[((-36))+rdi] + not QWORD[((-4))+rdi] + not QWORD[36+rdi] + not QWORD[60+rdi] + + add rsp,232 + + + pop r15 + + pop r14 + + pop r13 + + pop r12 + + pop rbp + + pop rbx + + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_SHA3_absorb: +global SHA3_squeeze + +ALIGN 32 +SHA3_squeeze: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_SHA3_squeeze: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + + + + push r12 + + push r13 + + push r14 + + + shr rcx,3 + mov r8,rdi + mov r12,rsi + mov r13,rdx + mov r14,rcx + jmp NEAR $L$oop_squeeze + +ALIGN 32 +$L$oop_squeeze: + cmp r13,8 + jb NEAR $L$tail_squeeze + + mov rax,QWORD[r8] + lea r8,[8+r8] + mov QWORD[r12],rax + lea r12,[8+r12] + sub r13,8 + jz NEAR $L$done_squeeze + + sub rcx,1 + jnz NEAR $L$oop_squeeze + + call KeccakF1600 + mov r8,rdi + mov rcx,r14 + jmp NEAR $L$oop_squeeze + +$L$tail_squeeze: + mov rsi,r8 + mov rdi,r12 + mov rcx,r13 +DB 0xf3,0xa4 + +$L$done_squeeze: + pop r14 + + pop r13 + + pop r12 + + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_SHA3_squeeze: +ALIGN 256 + DQ 0,0,0,0,0,0,0,0 + +iotas: + DQ 0x0000000000000001 + DQ 0x0000000000008082 + DQ 0x800000000000808a + DQ 0x8000000080008000 + DQ 0x000000000000808b + DQ 0x0000000080000001 + DQ 0x8000000080008081 + DQ 0x8000000000008009 + DQ 0x000000000000008a + DQ 0x0000000000000088 + DQ 0x0000000080008009 + DQ 0x000000008000000a + DQ 0x000000008000808b + DQ 0x800000000000008b + DQ 0x8000000000008089 + DQ 0x8000000000008003 + DQ 0x8000000000008002 + DQ 0x8000000000000080 + DQ 0x000000000000800a + DQ 0x800000008000000a + DQ 0x8000000080008081 + DQ 0x8000000000008080 + DQ 0x0000000080000001 + DQ 0x8000000080008008 + +DB 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111 +DB 114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102 +DB 111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84 +DB 79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64 +DB 111,112,101,110,115,115,108,46,111,114,103,62,0 diff --git a/vere/ext/openssl/gen/windows-x86_64/crypto/sha/sha1-mb-x86_64.asm b/vere/ext/openssl/gen/windows-x86_64/crypto/sha/sha1-mb-x86_64.asm new file mode 100644 index 0000000..725bf4e --- /dev/null +++ b/vere/ext/openssl/gen/windows-x86_64/crypto/sha/sha1-mb-x86_64.asm @@ -0,0 +1,7574 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + + +EXTERN OPENSSL_ia32cap_P + +global sha1_multi_block + +ALIGN 32 +sha1_multi_block: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_multi_block: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + + mov rcx,QWORD[((OPENSSL_ia32cap_P+4))] + bt rcx,61 + jc NEAR _shaext_shortcut + test ecx,268435456 + jnz NEAR _avx_shortcut + mov rax,rsp + + push rbx + + push rbp + + lea rsp,[((-168))+rsp] + movaps XMMWORD[rsp],xmm6 + movaps XMMWORD[16+rsp],xmm7 + movaps XMMWORD[32+rsp],xmm8 + movaps XMMWORD[48+rsp],xmm9 + movaps XMMWORD[(-120)+rax],xmm10 + movaps XMMWORD[(-104)+rax],xmm11 + movaps XMMWORD[(-88)+rax],xmm12 + movaps XMMWORD[(-72)+rax],xmm13 + movaps XMMWORD[(-56)+rax],xmm14 + movaps XMMWORD[(-40)+rax],xmm15 + sub rsp,288 + and rsp,-256 + mov QWORD[272+rsp],rax + +$L$body: + lea rbp,[K_XX_XX] + lea rbx,[256+rsp] + +$L$oop_grande: + mov DWORD[280+rsp],edx + xor edx,edx + mov r8,QWORD[rsi] + mov ecx,DWORD[8+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[rbx],ecx + cmovle r8,rbp + mov r9,QWORD[16+rsi] + mov ecx,DWORD[24+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[4+rbx],ecx + cmovle r9,rbp + mov r10,QWORD[32+rsi] + mov ecx,DWORD[40+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[8+rbx],ecx + cmovle r10,rbp + mov r11,QWORD[48+rsi] + mov ecx,DWORD[56+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[12+rbx],ecx + cmovle r11,rbp + test edx,edx + jz NEAR $L$done + + movdqu xmm10,XMMWORD[rdi] + lea rax,[128+rsp] + movdqu xmm11,XMMWORD[32+rdi] + movdqu xmm12,XMMWORD[64+rdi] + movdqu xmm13,XMMWORD[96+rdi] + movdqu xmm14,XMMWORD[128+rdi] + movdqa xmm5,XMMWORD[96+rbp] + movdqa xmm15,XMMWORD[((-32))+rbp] + jmp NEAR $L$oop + +ALIGN 32 +$L$oop: + movd xmm0,DWORD[r8] + lea r8,[64+r8] + movd xmm2,DWORD[r9] + lea r9,[64+r9] + movd xmm3,DWORD[r10] + lea r10,[64+r10] + movd xmm4,DWORD[r11] + lea r11,[64+r11] + punpckldq xmm0,xmm3 + movd xmm1,DWORD[((-60))+r8] + punpckldq xmm2,xmm4 + movd xmm9,DWORD[((-60))+r9] + punpckldq xmm0,xmm2 + movd xmm8,DWORD[((-60))+r10] +DB 102,15,56,0,197 + movd xmm7,DWORD[((-60))+r11] + punpckldq xmm1,xmm8 + movdqa xmm8,xmm10 + paddd xmm14,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm11 + movdqa xmm6,xmm11 + pslld xmm8,5 + pandn xmm7,xmm13 + pand xmm6,xmm12 + punpckldq xmm1,xmm9 + movdqa xmm9,xmm10 + + movdqa XMMWORD[(0-128)+rax],xmm0 + paddd xmm14,xmm0 + movd xmm2,DWORD[((-56))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm11 + + por xmm8,xmm9 + movd xmm9,DWORD[((-56))+r9] + pslld xmm7,30 + paddd xmm14,xmm6 + + psrld xmm11,2 + paddd xmm14,xmm8 +DB 102,15,56,0,205 + movd xmm8,DWORD[((-56))+r10] + por xmm11,xmm7 + movd xmm7,DWORD[((-56))+r11] + punpckldq xmm2,xmm8 + movdqa xmm8,xmm14 + paddd xmm13,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm10 + movdqa xmm6,xmm10 + pslld xmm8,5 + pandn xmm7,xmm12 + pand xmm6,xmm11 + punpckldq xmm2,xmm9 + movdqa xmm9,xmm14 + + movdqa XMMWORD[(16-128)+rax],xmm1 + paddd xmm13,xmm1 + movd xmm3,DWORD[((-52))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm10 + + por xmm8,xmm9 + movd xmm9,DWORD[((-52))+r9] + pslld xmm7,30 + paddd xmm13,xmm6 + + psrld xmm10,2 + paddd xmm13,xmm8 +DB 102,15,56,0,213 + movd xmm8,DWORD[((-52))+r10] + por xmm10,xmm7 + movd xmm7,DWORD[((-52))+r11] + punpckldq xmm3,xmm8 + movdqa xmm8,xmm13 + paddd xmm12,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm14 + movdqa xmm6,xmm14 + pslld xmm8,5 + pandn xmm7,xmm11 + pand xmm6,xmm10 + punpckldq xmm3,xmm9 + movdqa xmm9,xmm13 + + movdqa XMMWORD[(32-128)+rax],xmm2 + paddd xmm12,xmm2 + movd xmm4,DWORD[((-48))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm14 + + por xmm8,xmm9 + movd xmm9,DWORD[((-48))+r9] + pslld xmm7,30 + paddd xmm12,xmm6 + + psrld xmm14,2 + paddd xmm12,xmm8 +DB 102,15,56,0,221 + movd xmm8,DWORD[((-48))+r10] + por xmm14,xmm7 + movd xmm7,DWORD[((-48))+r11] + punpckldq xmm4,xmm8 + movdqa xmm8,xmm12 + paddd xmm11,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm13 + movdqa xmm6,xmm13 + pslld xmm8,5 + pandn xmm7,xmm10 + pand xmm6,xmm14 + punpckldq xmm4,xmm9 + movdqa xmm9,xmm12 + + movdqa XMMWORD[(48-128)+rax],xmm3 + paddd xmm11,xmm3 + movd xmm0,DWORD[((-44))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm13 + + por xmm8,xmm9 + movd xmm9,DWORD[((-44))+r9] + pslld xmm7,30 + paddd xmm11,xmm6 + + psrld xmm13,2 + paddd xmm11,xmm8 +DB 102,15,56,0,229 + movd xmm8,DWORD[((-44))+r10] + por xmm13,xmm7 + movd xmm7,DWORD[((-44))+r11] + punpckldq xmm0,xmm8 + movdqa xmm8,xmm11 + paddd xmm10,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm12 + movdqa xmm6,xmm12 + pslld xmm8,5 + pandn xmm7,xmm14 + pand xmm6,xmm13 + punpckldq xmm0,xmm9 + movdqa xmm9,xmm11 + + movdqa XMMWORD[(64-128)+rax],xmm4 + paddd xmm10,xmm4 + movd xmm1,DWORD[((-40))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm12 + + por xmm8,xmm9 + movd xmm9,DWORD[((-40))+r9] + pslld xmm7,30 + paddd xmm10,xmm6 + + psrld xmm12,2 + paddd xmm10,xmm8 +DB 102,15,56,0,197 + movd xmm8,DWORD[((-40))+r10] + por xmm12,xmm7 + movd xmm7,DWORD[((-40))+r11] + punpckldq xmm1,xmm8 + movdqa xmm8,xmm10 + paddd xmm14,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm11 + movdqa xmm6,xmm11 + pslld xmm8,5 + pandn xmm7,xmm13 + pand xmm6,xmm12 + punpckldq xmm1,xmm9 + movdqa xmm9,xmm10 + + movdqa XMMWORD[(80-128)+rax],xmm0 + paddd xmm14,xmm0 + movd xmm2,DWORD[((-36))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm11 + + por xmm8,xmm9 + movd xmm9,DWORD[((-36))+r9] + pslld xmm7,30 + paddd xmm14,xmm6 + + psrld xmm11,2 + paddd xmm14,xmm8 +DB 102,15,56,0,205 + movd xmm8,DWORD[((-36))+r10] + por xmm11,xmm7 + movd xmm7,DWORD[((-36))+r11] + punpckldq xmm2,xmm8 + movdqa xmm8,xmm14 + paddd xmm13,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm10 + movdqa xmm6,xmm10 + pslld xmm8,5 + pandn xmm7,xmm12 + pand xmm6,xmm11 + punpckldq xmm2,xmm9 + movdqa xmm9,xmm14 + + movdqa XMMWORD[(96-128)+rax],xmm1 + paddd xmm13,xmm1 + movd xmm3,DWORD[((-32))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm10 + + por xmm8,xmm9 + movd xmm9,DWORD[((-32))+r9] + pslld xmm7,30 + paddd xmm13,xmm6 + + psrld xmm10,2 + paddd xmm13,xmm8 +DB 102,15,56,0,213 + movd xmm8,DWORD[((-32))+r10] + por xmm10,xmm7 + movd xmm7,DWORD[((-32))+r11] + punpckldq xmm3,xmm8 + movdqa xmm8,xmm13 + paddd xmm12,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm14 + movdqa xmm6,xmm14 + pslld xmm8,5 + pandn xmm7,xmm11 + pand xmm6,xmm10 + punpckldq xmm3,xmm9 + movdqa xmm9,xmm13 + + movdqa XMMWORD[(112-128)+rax],xmm2 + paddd xmm12,xmm2 + movd xmm4,DWORD[((-28))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm14 + + por xmm8,xmm9 + movd xmm9,DWORD[((-28))+r9] + pslld xmm7,30 + paddd xmm12,xmm6 + + psrld xmm14,2 + paddd xmm12,xmm8 +DB 102,15,56,0,221 + movd xmm8,DWORD[((-28))+r10] + por xmm14,xmm7 + movd xmm7,DWORD[((-28))+r11] + punpckldq xmm4,xmm8 + movdqa xmm8,xmm12 + paddd xmm11,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm13 + movdqa xmm6,xmm13 + pslld xmm8,5 + pandn xmm7,xmm10 + pand xmm6,xmm14 + punpckldq xmm4,xmm9 + movdqa xmm9,xmm12 + + movdqa XMMWORD[(128-128)+rax],xmm3 + paddd xmm11,xmm3 + movd xmm0,DWORD[((-24))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm13 + + por xmm8,xmm9 + movd xmm9,DWORD[((-24))+r9] + pslld xmm7,30 + paddd xmm11,xmm6 + + psrld xmm13,2 + paddd xmm11,xmm8 +DB 102,15,56,0,229 + movd xmm8,DWORD[((-24))+r10] + por xmm13,xmm7 + movd xmm7,DWORD[((-24))+r11] + punpckldq xmm0,xmm8 + movdqa xmm8,xmm11 + paddd xmm10,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm12 + movdqa xmm6,xmm12 + pslld xmm8,5 + pandn xmm7,xmm14 + pand xmm6,xmm13 + punpckldq xmm0,xmm9 + movdqa xmm9,xmm11 + + movdqa XMMWORD[(144-128)+rax],xmm4 + paddd xmm10,xmm4 + movd xmm1,DWORD[((-20))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm12 + + por xmm8,xmm9 + movd xmm9,DWORD[((-20))+r9] + pslld xmm7,30 + paddd xmm10,xmm6 + + psrld xmm12,2 + paddd xmm10,xmm8 +DB 102,15,56,0,197 + movd xmm8,DWORD[((-20))+r10] + por xmm12,xmm7 + movd xmm7,DWORD[((-20))+r11] + punpckldq xmm1,xmm8 + movdqa xmm8,xmm10 + paddd xmm14,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm11 + movdqa xmm6,xmm11 + pslld xmm8,5 + pandn xmm7,xmm13 + pand xmm6,xmm12 + punpckldq xmm1,xmm9 + movdqa xmm9,xmm10 + + movdqa XMMWORD[(160-128)+rax],xmm0 + paddd xmm14,xmm0 + movd xmm2,DWORD[((-16))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm11 + + por xmm8,xmm9 + movd xmm9,DWORD[((-16))+r9] + pslld xmm7,30 + paddd xmm14,xmm6 + + psrld xmm11,2 + paddd xmm14,xmm8 +DB 102,15,56,0,205 + movd xmm8,DWORD[((-16))+r10] + por xmm11,xmm7 + movd xmm7,DWORD[((-16))+r11] + punpckldq xmm2,xmm8 + movdqa xmm8,xmm14 + paddd xmm13,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm10 + movdqa xmm6,xmm10 + pslld xmm8,5 + pandn xmm7,xmm12 + pand xmm6,xmm11 + punpckldq xmm2,xmm9 + movdqa xmm9,xmm14 + + movdqa XMMWORD[(176-128)+rax],xmm1 + paddd xmm13,xmm1 + movd xmm3,DWORD[((-12))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm10 + + por xmm8,xmm9 + movd xmm9,DWORD[((-12))+r9] + pslld xmm7,30 + paddd xmm13,xmm6 + + psrld xmm10,2 + paddd xmm13,xmm8 +DB 102,15,56,0,213 + movd xmm8,DWORD[((-12))+r10] + por xmm10,xmm7 + movd xmm7,DWORD[((-12))+r11] + punpckldq xmm3,xmm8 + movdqa xmm8,xmm13 + paddd xmm12,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm14 + movdqa xmm6,xmm14 + pslld xmm8,5 + pandn xmm7,xmm11 + pand xmm6,xmm10 + punpckldq xmm3,xmm9 + movdqa xmm9,xmm13 + + movdqa XMMWORD[(192-128)+rax],xmm2 + paddd xmm12,xmm2 + movd xmm4,DWORD[((-8))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm14 + + por xmm8,xmm9 + movd xmm9,DWORD[((-8))+r9] + pslld xmm7,30 + paddd xmm12,xmm6 + + psrld xmm14,2 + paddd xmm12,xmm8 +DB 102,15,56,0,221 + movd xmm8,DWORD[((-8))+r10] + por xmm14,xmm7 + movd xmm7,DWORD[((-8))+r11] + punpckldq xmm4,xmm8 + movdqa xmm8,xmm12 + paddd xmm11,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm13 + movdqa xmm6,xmm13 + pslld xmm8,5 + pandn xmm7,xmm10 + pand xmm6,xmm14 + punpckldq xmm4,xmm9 + movdqa xmm9,xmm12 + + movdqa XMMWORD[(208-128)+rax],xmm3 + paddd xmm11,xmm3 + movd xmm0,DWORD[((-4))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm13 + + por xmm8,xmm9 + movd xmm9,DWORD[((-4))+r9] + pslld xmm7,30 + paddd xmm11,xmm6 + + psrld xmm13,2 + paddd xmm11,xmm8 +DB 102,15,56,0,229 + movd xmm8,DWORD[((-4))+r10] + por xmm13,xmm7 + movdqa xmm1,XMMWORD[((0-128))+rax] + movd xmm7,DWORD[((-4))+r11] + punpckldq xmm0,xmm8 + movdqa xmm8,xmm11 + paddd xmm10,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm12 + movdqa xmm6,xmm12 + pslld xmm8,5 + prefetcht0 [63+r8] + pandn xmm7,xmm14 + pand xmm6,xmm13 + punpckldq xmm0,xmm9 + movdqa xmm9,xmm11 + + movdqa XMMWORD[(224-128)+rax],xmm4 + paddd xmm10,xmm4 + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm12 + prefetcht0 [63+r9] + + por xmm8,xmm9 + pslld xmm7,30 + paddd xmm10,xmm6 + prefetcht0 [63+r10] + + psrld xmm12,2 + paddd xmm10,xmm8 +DB 102,15,56,0,197 + prefetcht0 [63+r11] + por xmm12,xmm7 + movdqa xmm2,XMMWORD[((16-128))+rax] + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD[((32-128))+rax] + + movdqa xmm8,xmm10 + pxor xmm1,XMMWORD[((128-128))+rax] + paddd xmm14,xmm15 + movdqa xmm7,xmm11 + pslld xmm8,5 + pxor xmm1,xmm3 + movdqa xmm6,xmm11 + pandn xmm7,xmm13 + movdqa xmm5,xmm1 + pand xmm6,xmm12 + movdqa xmm9,xmm10 + psrld xmm5,31 + paddd xmm1,xmm1 + + movdqa XMMWORD[(240-128)+rax],xmm0 + paddd xmm14,xmm0 + psrld xmm9,27 + pxor xmm6,xmm7 + + movdqa xmm7,xmm11 + por xmm8,xmm9 + pslld xmm7,30 + paddd xmm14,xmm6 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD[((48-128))+rax] + + movdqa xmm8,xmm14 + pxor xmm2,XMMWORD[((144-128))+rax] + paddd xmm13,xmm15 + movdqa xmm7,xmm10 + pslld xmm8,5 + pxor xmm2,xmm4 + movdqa xmm6,xmm10 + pandn xmm7,xmm12 + movdqa xmm5,xmm2 + pand xmm6,xmm11 + movdqa xmm9,xmm14 + psrld xmm5,31 + paddd xmm2,xmm2 + + movdqa XMMWORD[(0-128)+rax],xmm1 + paddd xmm13,xmm1 + psrld xmm9,27 + pxor xmm6,xmm7 + + movdqa xmm7,xmm10 + por xmm8,xmm9 + pslld xmm7,30 + paddd xmm13,xmm6 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD[((64-128))+rax] + + movdqa xmm8,xmm13 + pxor xmm3,XMMWORD[((160-128))+rax] + paddd xmm12,xmm15 + movdqa xmm7,xmm14 + pslld xmm8,5 + pxor xmm3,xmm0 + movdqa xmm6,xmm14 + pandn xmm7,xmm11 + movdqa xmm5,xmm3 + pand xmm6,xmm10 + movdqa xmm9,xmm13 + psrld xmm5,31 + paddd xmm3,xmm3 + + movdqa XMMWORD[(16-128)+rax],xmm2 + paddd xmm12,xmm2 + psrld xmm9,27 + pxor xmm6,xmm7 + + movdqa xmm7,xmm14 + por xmm8,xmm9 + pslld xmm7,30 + paddd xmm12,xmm6 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD[((80-128))+rax] + + movdqa xmm8,xmm12 + pxor xmm4,XMMWORD[((176-128))+rax] + paddd xmm11,xmm15 + movdqa xmm7,xmm13 + pslld xmm8,5 + pxor xmm4,xmm1 + movdqa xmm6,xmm13 + pandn xmm7,xmm10 + movdqa xmm5,xmm4 + pand xmm6,xmm14 + movdqa xmm9,xmm12 + psrld xmm5,31 + paddd xmm4,xmm4 + + movdqa XMMWORD[(32-128)+rax],xmm3 + paddd xmm11,xmm3 + psrld xmm9,27 + pxor xmm6,xmm7 + + movdqa xmm7,xmm13 + por xmm8,xmm9 + pslld xmm7,30 + paddd xmm11,xmm6 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD[((96-128))+rax] + + movdqa xmm8,xmm11 + pxor xmm0,XMMWORD[((192-128))+rax] + paddd xmm10,xmm15 + movdqa xmm7,xmm12 + pslld xmm8,5 + pxor xmm0,xmm2 + movdqa xmm6,xmm12 + pandn xmm7,xmm14 + movdqa xmm5,xmm0 + pand xmm6,xmm13 + movdqa xmm9,xmm11 + psrld xmm5,31 + paddd xmm0,xmm0 + + movdqa XMMWORD[(48-128)+rax],xmm4 + paddd xmm10,xmm4 + psrld xmm9,27 + pxor xmm6,xmm7 + + movdqa xmm7,xmm12 + por xmm8,xmm9 + pslld xmm7,30 + paddd xmm10,xmm6 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + movdqa xmm15,XMMWORD[rbp] + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD[((112-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm6,xmm13 + pxor xmm1,XMMWORD[((208-128))+rax] + paddd xmm14,xmm15 + pslld xmm8,5 + pxor xmm6,xmm11 + + movdqa xmm9,xmm10 + movdqa XMMWORD[(64-128)+rax],xmm0 + paddd xmm14,xmm0 + pxor xmm1,xmm3 + psrld xmm9,27 + pxor xmm6,xmm12 + movdqa xmm7,xmm11 + + pslld xmm7,30 + movdqa xmm5,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm14,xmm6 + paddd xmm1,xmm1 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD[((128-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm6,xmm12 + pxor xmm2,XMMWORD[((224-128))+rax] + paddd xmm13,xmm15 + pslld xmm8,5 + pxor xmm6,xmm10 + + movdqa xmm9,xmm14 + movdqa XMMWORD[(80-128)+rax],xmm1 + paddd xmm13,xmm1 + pxor xmm2,xmm4 + psrld xmm9,27 + pxor xmm6,xmm11 + movdqa xmm7,xmm10 + + pslld xmm7,30 + movdqa xmm5,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm13,xmm6 + paddd xmm2,xmm2 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD[((144-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm6,xmm11 + pxor xmm3,XMMWORD[((240-128))+rax] + paddd xmm12,xmm15 + pslld xmm8,5 + pxor xmm6,xmm14 + + movdqa xmm9,xmm13 + movdqa XMMWORD[(96-128)+rax],xmm2 + paddd xmm12,xmm2 + pxor xmm3,xmm0 + psrld xmm9,27 + pxor xmm6,xmm10 + movdqa xmm7,xmm14 + + pslld xmm7,30 + movdqa xmm5,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm12,xmm6 + paddd xmm3,xmm3 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD[((160-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm6,xmm10 + pxor xmm4,XMMWORD[((0-128))+rax] + paddd xmm11,xmm15 + pslld xmm8,5 + pxor xmm6,xmm13 + + movdqa xmm9,xmm12 + movdqa XMMWORD[(112-128)+rax],xmm3 + paddd xmm11,xmm3 + pxor xmm4,xmm1 + psrld xmm9,27 + pxor xmm6,xmm14 + movdqa xmm7,xmm13 + + pslld xmm7,30 + movdqa xmm5,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm11,xmm6 + paddd xmm4,xmm4 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD[((176-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm6,xmm14 + pxor xmm0,XMMWORD[((16-128))+rax] + paddd xmm10,xmm15 + pslld xmm8,5 + pxor xmm6,xmm12 + + movdqa xmm9,xmm11 + movdqa XMMWORD[(128-128)+rax],xmm4 + paddd xmm10,xmm4 + pxor xmm0,xmm2 + psrld xmm9,27 + pxor xmm6,xmm13 + movdqa xmm7,xmm12 + + pslld xmm7,30 + movdqa xmm5,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm10,xmm6 + paddd xmm0,xmm0 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD[((192-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm6,xmm13 + pxor xmm1,XMMWORD[((32-128))+rax] + paddd xmm14,xmm15 + pslld xmm8,5 + pxor xmm6,xmm11 + + movdqa xmm9,xmm10 + movdqa XMMWORD[(144-128)+rax],xmm0 + paddd xmm14,xmm0 + pxor xmm1,xmm3 + psrld xmm9,27 + pxor xmm6,xmm12 + movdqa xmm7,xmm11 + + pslld xmm7,30 + movdqa xmm5,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm14,xmm6 + paddd xmm1,xmm1 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD[((208-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm6,xmm12 + pxor xmm2,XMMWORD[((48-128))+rax] + paddd xmm13,xmm15 + pslld xmm8,5 + pxor xmm6,xmm10 + + movdqa xmm9,xmm14 + movdqa XMMWORD[(160-128)+rax],xmm1 + paddd xmm13,xmm1 + pxor xmm2,xmm4 + psrld xmm9,27 + pxor xmm6,xmm11 + movdqa xmm7,xmm10 + + pslld xmm7,30 + movdqa xmm5,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm13,xmm6 + paddd xmm2,xmm2 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD[((224-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm6,xmm11 + pxor xmm3,XMMWORD[((64-128))+rax] + paddd xmm12,xmm15 + pslld xmm8,5 + pxor xmm6,xmm14 + + movdqa xmm9,xmm13 + movdqa XMMWORD[(176-128)+rax],xmm2 + paddd xmm12,xmm2 + pxor xmm3,xmm0 + psrld xmm9,27 + pxor xmm6,xmm10 + movdqa xmm7,xmm14 + + pslld xmm7,30 + movdqa xmm5,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm12,xmm6 + paddd xmm3,xmm3 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD[((240-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm6,xmm10 + pxor xmm4,XMMWORD[((80-128))+rax] + paddd xmm11,xmm15 + pslld xmm8,5 + pxor xmm6,xmm13 + + movdqa xmm9,xmm12 + movdqa XMMWORD[(192-128)+rax],xmm3 + paddd xmm11,xmm3 + pxor xmm4,xmm1 + psrld xmm9,27 + pxor xmm6,xmm14 + movdqa xmm7,xmm13 + + pslld xmm7,30 + movdqa xmm5,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm11,xmm6 + paddd xmm4,xmm4 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD[((0-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm6,xmm14 + pxor xmm0,XMMWORD[((96-128))+rax] + paddd xmm10,xmm15 + pslld xmm8,5 + pxor xmm6,xmm12 + + movdqa xmm9,xmm11 + movdqa XMMWORD[(208-128)+rax],xmm4 + paddd xmm10,xmm4 + pxor xmm0,xmm2 + psrld xmm9,27 + pxor xmm6,xmm13 + movdqa xmm7,xmm12 + + pslld xmm7,30 + movdqa xmm5,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm10,xmm6 + paddd xmm0,xmm0 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD[((16-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm6,xmm13 + pxor xmm1,XMMWORD[((112-128))+rax] + paddd xmm14,xmm15 + pslld xmm8,5 + pxor xmm6,xmm11 + + movdqa xmm9,xmm10 + movdqa XMMWORD[(224-128)+rax],xmm0 + paddd xmm14,xmm0 + pxor xmm1,xmm3 + psrld xmm9,27 + pxor xmm6,xmm12 + movdqa xmm7,xmm11 + + pslld xmm7,30 + movdqa xmm5,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm14,xmm6 + paddd xmm1,xmm1 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD[((32-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm6,xmm12 + pxor xmm2,XMMWORD[((128-128))+rax] + paddd xmm13,xmm15 + pslld xmm8,5 + pxor xmm6,xmm10 + + movdqa xmm9,xmm14 + movdqa XMMWORD[(240-128)+rax],xmm1 + paddd xmm13,xmm1 + pxor xmm2,xmm4 + psrld xmm9,27 + pxor xmm6,xmm11 + movdqa xmm7,xmm10 + + pslld xmm7,30 + movdqa xmm5,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm13,xmm6 + paddd xmm2,xmm2 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD[((48-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm6,xmm11 + pxor xmm3,XMMWORD[((144-128))+rax] + paddd xmm12,xmm15 + pslld xmm8,5 + pxor xmm6,xmm14 + + movdqa xmm9,xmm13 + movdqa XMMWORD[(0-128)+rax],xmm2 + paddd xmm12,xmm2 + pxor xmm3,xmm0 + psrld xmm9,27 + pxor xmm6,xmm10 + movdqa xmm7,xmm14 + + pslld xmm7,30 + movdqa xmm5,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm12,xmm6 + paddd xmm3,xmm3 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD[((64-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm6,xmm10 + pxor xmm4,XMMWORD[((160-128))+rax] + paddd xmm11,xmm15 + pslld xmm8,5 + pxor xmm6,xmm13 + + movdqa xmm9,xmm12 + movdqa XMMWORD[(16-128)+rax],xmm3 + paddd xmm11,xmm3 + pxor xmm4,xmm1 + psrld xmm9,27 + pxor xmm6,xmm14 + movdqa xmm7,xmm13 + + pslld xmm7,30 + movdqa xmm5,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm11,xmm6 + paddd xmm4,xmm4 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD[((80-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm6,xmm14 + pxor xmm0,XMMWORD[((176-128))+rax] + paddd xmm10,xmm15 + pslld xmm8,5 + pxor xmm6,xmm12 + + movdqa xmm9,xmm11 + movdqa XMMWORD[(32-128)+rax],xmm4 + paddd xmm10,xmm4 + pxor xmm0,xmm2 + psrld xmm9,27 + pxor xmm6,xmm13 + movdqa xmm7,xmm12 + + pslld xmm7,30 + movdqa xmm5,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm10,xmm6 + paddd xmm0,xmm0 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD[((96-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm6,xmm13 + pxor xmm1,XMMWORD[((192-128))+rax] + paddd xmm14,xmm15 + pslld xmm8,5 + pxor xmm6,xmm11 + + movdqa xmm9,xmm10 + movdqa XMMWORD[(48-128)+rax],xmm0 + paddd xmm14,xmm0 + pxor xmm1,xmm3 + psrld xmm9,27 + pxor xmm6,xmm12 + movdqa xmm7,xmm11 + + pslld xmm7,30 + movdqa xmm5,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm14,xmm6 + paddd xmm1,xmm1 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD[((112-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm6,xmm12 + pxor xmm2,XMMWORD[((208-128))+rax] + paddd xmm13,xmm15 + pslld xmm8,5 + pxor xmm6,xmm10 + + movdqa xmm9,xmm14 + movdqa XMMWORD[(64-128)+rax],xmm1 + paddd xmm13,xmm1 + pxor xmm2,xmm4 + psrld xmm9,27 + pxor xmm6,xmm11 + movdqa xmm7,xmm10 + + pslld xmm7,30 + movdqa xmm5,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm13,xmm6 + paddd xmm2,xmm2 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD[((128-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm6,xmm11 + pxor xmm3,XMMWORD[((224-128))+rax] + paddd xmm12,xmm15 + pslld xmm8,5 + pxor xmm6,xmm14 + + movdqa xmm9,xmm13 + movdqa XMMWORD[(80-128)+rax],xmm2 + paddd xmm12,xmm2 + pxor xmm3,xmm0 + psrld xmm9,27 + pxor xmm6,xmm10 + movdqa xmm7,xmm14 + + pslld xmm7,30 + movdqa xmm5,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm12,xmm6 + paddd xmm3,xmm3 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD[((144-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm6,xmm10 + pxor xmm4,XMMWORD[((240-128))+rax] + paddd xmm11,xmm15 + pslld xmm8,5 + pxor xmm6,xmm13 + + movdqa xmm9,xmm12 + movdqa XMMWORD[(96-128)+rax],xmm3 + paddd xmm11,xmm3 + pxor xmm4,xmm1 + psrld xmm9,27 + pxor xmm6,xmm14 + movdqa xmm7,xmm13 + + pslld xmm7,30 + movdqa xmm5,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm11,xmm6 + paddd xmm4,xmm4 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD[((160-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm6,xmm14 + pxor xmm0,XMMWORD[((0-128))+rax] + paddd xmm10,xmm15 + pslld xmm8,5 + pxor xmm6,xmm12 + + movdqa xmm9,xmm11 + movdqa XMMWORD[(112-128)+rax],xmm4 + paddd xmm10,xmm4 + pxor xmm0,xmm2 + psrld xmm9,27 + pxor xmm6,xmm13 + movdqa xmm7,xmm12 + + pslld xmm7,30 + movdqa xmm5,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm10,xmm6 + paddd xmm0,xmm0 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + movdqa xmm15,XMMWORD[32+rbp] + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD[((176-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm7,xmm13 + pxor xmm1,XMMWORD[((16-128))+rax] + pxor xmm1,xmm3 + paddd xmm14,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm10 + pand xmm7,xmm12 + + movdqa xmm6,xmm13 + movdqa xmm5,xmm1 + psrld xmm9,27 + paddd xmm14,xmm7 + pxor xmm6,xmm12 + + movdqa XMMWORD[(128-128)+rax],xmm0 + paddd xmm14,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm11 + movdqa xmm7,xmm11 + + pslld xmm7,30 + paddd xmm1,xmm1 + paddd xmm14,xmm6 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD[((192-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm7,xmm12 + pxor xmm2,XMMWORD[((32-128))+rax] + pxor xmm2,xmm4 + paddd xmm13,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm14 + pand xmm7,xmm11 + + movdqa xmm6,xmm12 + movdqa xmm5,xmm2 + psrld xmm9,27 + paddd xmm13,xmm7 + pxor xmm6,xmm11 + + movdqa XMMWORD[(144-128)+rax],xmm1 + paddd xmm13,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm10 + movdqa xmm7,xmm10 + + pslld xmm7,30 + paddd xmm2,xmm2 + paddd xmm13,xmm6 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD[((208-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm7,xmm11 + pxor xmm3,XMMWORD[((48-128))+rax] + pxor xmm3,xmm0 + paddd xmm12,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm13 + pand xmm7,xmm10 + + movdqa xmm6,xmm11 + movdqa xmm5,xmm3 + psrld xmm9,27 + paddd xmm12,xmm7 + pxor xmm6,xmm10 + + movdqa XMMWORD[(160-128)+rax],xmm2 + paddd xmm12,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm14 + movdqa xmm7,xmm14 + + pslld xmm7,30 + paddd xmm3,xmm3 + paddd xmm12,xmm6 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD[((224-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm7,xmm10 + pxor xmm4,XMMWORD[((64-128))+rax] + pxor xmm4,xmm1 + paddd xmm11,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm12 + pand xmm7,xmm14 + + movdqa xmm6,xmm10 + movdqa xmm5,xmm4 + psrld xmm9,27 + paddd xmm11,xmm7 + pxor xmm6,xmm14 + + movdqa XMMWORD[(176-128)+rax],xmm3 + paddd xmm11,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm13 + movdqa xmm7,xmm13 + + pslld xmm7,30 + paddd xmm4,xmm4 + paddd xmm11,xmm6 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD[((240-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm7,xmm14 + pxor xmm0,XMMWORD[((80-128))+rax] + pxor xmm0,xmm2 + paddd xmm10,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm11 + pand xmm7,xmm13 + + movdqa xmm6,xmm14 + movdqa xmm5,xmm0 + psrld xmm9,27 + paddd xmm10,xmm7 + pxor xmm6,xmm13 + + movdqa XMMWORD[(192-128)+rax],xmm4 + paddd xmm10,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm12 + movdqa xmm7,xmm12 + + pslld xmm7,30 + paddd xmm0,xmm0 + paddd xmm10,xmm6 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD[((0-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm7,xmm13 + pxor xmm1,XMMWORD[((96-128))+rax] + pxor xmm1,xmm3 + paddd xmm14,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm10 + pand xmm7,xmm12 + + movdqa xmm6,xmm13 + movdqa xmm5,xmm1 + psrld xmm9,27 + paddd xmm14,xmm7 + pxor xmm6,xmm12 + + movdqa XMMWORD[(208-128)+rax],xmm0 + paddd xmm14,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm11 + movdqa xmm7,xmm11 + + pslld xmm7,30 + paddd xmm1,xmm1 + paddd xmm14,xmm6 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD[((16-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm7,xmm12 + pxor xmm2,XMMWORD[((112-128))+rax] + pxor xmm2,xmm4 + paddd xmm13,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm14 + pand xmm7,xmm11 + + movdqa xmm6,xmm12 + movdqa xmm5,xmm2 + psrld xmm9,27 + paddd xmm13,xmm7 + pxor xmm6,xmm11 + + movdqa XMMWORD[(224-128)+rax],xmm1 + paddd xmm13,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm10 + movdqa xmm7,xmm10 + + pslld xmm7,30 + paddd xmm2,xmm2 + paddd xmm13,xmm6 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD[((32-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm7,xmm11 + pxor xmm3,XMMWORD[((128-128))+rax] + pxor xmm3,xmm0 + paddd xmm12,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm13 + pand xmm7,xmm10 + + movdqa xmm6,xmm11 + movdqa xmm5,xmm3 + psrld xmm9,27 + paddd xmm12,xmm7 + pxor xmm6,xmm10 + + movdqa XMMWORD[(240-128)+rax],xmm2 + paddd xmm12,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm14 + movdqa xmm7,xmm14 + + pslld xmm7,30 + paddd xmm3,xmm3 + paddd xmm12,xmm6 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD[((48-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm7,xmm10 + pxor xmm4,XMMWORD[((144-128))+rax] + pxor xmm4,xmm1 + paddd xmm11,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm12 + pand xmm7,xmm14 + + movdqa xmm6,xmm10 + movdqa xmm5,xmm4 + psrld xmm9,27 + paddd xmm11,xmm7 + pxor xmm6,xmm14 + + movdqa XMMWORD[(0-128)+rax],xmm3 + paddd xmm11,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm13 + movdqa xmm7,xmm13 + + pslld xmm7,30 + paddd xmm4,xmm4 + paddd xmm11,xmm6 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD[((64-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm7,xmm14 + pxor xmm0,XMMWORD[((160-128))+rax] + pxor xmm0,xmm2 + paddd xmm10,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm11 + pand xmm7,xmm13 + + movdqa xmm6,xmm14 + movdqa xmm5,xmm0 + psrld xmm9,27 + paddd xmm10,xmm7 + pxor xmm6,xmm13 + + movdqa XMMWORD[(16-128)+rax],xmm4 + paddd xmm10,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm12 + movdqa xmm7,xmm12 + + pslld xmm7,30 + paddd xmm0,xmm0 + paddd xmm10,xmm6 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD[((80-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm7,xmm13 + pxor xmm1,XMMWORD[((176-128))+rax] + pxor xmm1,xmm3 + paddd xmm14,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm10 + pand xmm7,xmm12 + + movdqa xmm6,xmm13 + movdqa xmm5,xmm1 + psrld xmm9,27 + paddd xmm14,xmm7 + pxor xmm6,xmm12 + + movdqa XMMWORD[(32-128)+rax],xmm0 + paddd xmm14,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm11 + movdqa xmm7,xmm11 + + pslld xmm7,30 + paddd xmm1,xmm1 + paddd xmm14,xmm6 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD[((96-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm7,xmm12 + pxor xmm2,XMMWORD[((192-128))+rax] + pxor xmm2,xmm4 + paddd xmm13,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm14 + pand xmm7,xmm11 + + movdqa xmm6,xmm12 + movdqa xmm5,xmm2 + psrld xmm9,27 + paddd xmm13,xmm7 + pxor xmm6,xmm11 + + movdqa XMMWORD[(48-128)+rax],xmm1 + paddd xmm13,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm10 + movdqa xmm7,xmm10 + + pslld xmm7,30 + paddd xmm2,xmm2 + paddd xmm13,xmm6 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD[((112-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm7,xmm11 + pxor xmm3,XMMWORD[((208-128))+rax] + pxor xmm3,xmm0 + paddd xmm12,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm13 + pand xmm7,xmm10 + + movdqa xmm6,xmm11 + movdqa xmm5,xmm3 + psrld xmm9,27 + paddd xmm12,xmm7 + pxor xmm6,xmm10 + + movdqa XMMWORD[(64-128)+rax],xmm2 + paddd xmm12,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm14 + movdqa xmm7,xmm14 + + pslld xmm7,30 + paddd xmm3,xmm3 + paddd xmm12,xmm6 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD[((128-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm7,xmm10 + pxor xmm4,XMMWORD[((224-128))+rax] + pxor xmm4,xmm1 + paddd xmm11,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm12 + pand xmm7,xmm14 + + movdqa xmm6,xmm10 + movdqa xmm5,xmm4 + psrld xmm9,27 + paddd xmm11,xmm7 + pxor xmm6,xmm14 + + movdqa XMMWORD[(80-128)+rax],xmm3 + paddd xmm11,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm13 + movdqa xmm7,xmm13 + + pslld xmm7,30 + paddd xmm4,xmm4 + paddd xmm11,xmm6 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD[((144-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm7,xmm14 + pxor xmm0,XMMWORD[((240-128))+rax] + pxor xmm0,xmm2 + paddd xmm10,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm11 + pand xmm7,xmm13 + + movdqa xmm6,xmm14 + movdqa xmm5,xmm0 + psrld xmm9,27 + paddd xmm10,xmm7 + pxor xmm6,xmm13 + + movdqa XMMWORD[(96-128)+rax],xmm4 + paddd xmm10,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm12 + movdqa xmm7,xmm12 + + pslld xmm7,30 + paddd xmm0,xmm0 + paddd xmm10,xmm6 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD[((160-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm7,xmm13 + pxor xmm1,XMMWORD[((0-128))+rax] + pxor xmm1,xmm3 + paddd xmm14,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm10 + pand xmm7,xmm12 + + movdqa xmm6,xmm13 + movdqa xmm5,xmm1 + psrld xmm9,27 + paddd xmm14,xmm7 + pxor xmm6,xmm12 + + movdqa XMMWORD[(112-128)+rax],xmm0 + paddd xmm14,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm11 + movdqa xmm7,xmm11 + + pslld xmm7,30 + paddd xmm1,xmm1 + paddd xmm14,xmm6 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD[((176-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm7,xmm12 + pxor xmm2,XMMWORD[((16-128))+rax] + pxor xmm2,xmm4 + paddd xmm13,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm14 + pand xmm7,xmm11 + + movdqa xmm6,xmm12 + movdqa xmm5,xmm2 + psrld xmm9,27 + paddd xmm13,xmm7 + pxor xmm6,xmm11 + + movdqa XMMWORD[(128-128)+rax],xmm1 + paddd xmm13,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm10 + movdqa xmm7,xmm10 + + pslld xmm7,30 + paddd xmm2,xmm2 + paddd xmm13,xmm6 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD[((192-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm7,xmm11 + pxor xmm3,XMMWORD[((32-128))+rax] + pxor xmm3,xmm0 + paddd xmm12,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm13 + pand xmm7,xmm10 + + movdqa xmm6,xmm11 + movdqa xmm5,xmm3 + psrld xmm9,27 + paddd xmm12,xmm7 + pxor xmm6,xmm10 + + movdqa XMMWORD[(144-128)+rax],xmm2 + paddd xmm12,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm14 + movdqa xmm7,xmm14 + + pslld xmm7,30 + paddd xmm3,xmm3 + paddd xmm12,xmm6 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD[((208-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm7,xmm10 + pxor xmm4,XMMWORD[((48-128))+rax] + pxor xmm4,xmm1 + paddd xmm11,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm12 + pand xmm7,xmm14 + + movdqa xmm6,xmm10 + movdqa xmm5,xmm4 + psrld xmm9,27 + paddd xmm11,xmm7 + pxor xmm6,xmm14 + + movdqa XMMWORD[(160-128)+rax],xmm3 + paddd xmm11,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm13 + movdqa xmm7,xmm13 + + pslld xmm7,30 + paddd xmm4,xmm4 + paddd xmm11,xmm6 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD[((224-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm7,xmm14 + pxor xmm0,XMMWORD[((64-128))+rax] + pxor xmm0,xmm2 + paddd xmm10,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm11 + pand xmm7,xmm13 + + movdqa xmm6,xmm14 + movdqa xmm5,xmm0 + psrld xmm9,27 + paddd xmm10,xmm7 + pxor xmm6,xmm13 + + movdqa XMMWORD[(176-128)+rax],xmm4 + paddd xmm10,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm12 + movdqa xmm7,xmm12 + + pslld xmm7,30 + paddd xmm0,xmm0 + paddd xmm10,xmm6 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + movdqa xmm15,XMMWORD[64+rbp] + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD[((240-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm6,xmm13 + pxor xmm1,XMMWORD[((80-128))+rax] + paddd xmm14,xmm15 + pslld xmm8,5 + pxor xmm6,xmm11 + + movdqa xmm9,xmm10 + movdqa XMMWORD[(192-128)+rax],xmm0 + paddd xmm14,xmm0 + pxor xmm1,xmm3 + psrld xmm9,27 + pxor xmm6,xmm12 + movdqa xmm7,xmm11 + + pslld xmm7,30 + movdqa xmm5,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm14,xmm6 + paddd xmm1,xmm1 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD[((0-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm6,xmm12 + pxor xmm2,XMMWORD[((96-128))+rax] + paddd xmm13,xmm15 + pslld xmm8,5 + pxor xmm6,xmm10 + + movdqa xmm9,xmm14 + movdqa XMMWORD[(208-128)+rax],xmm1 + paddd xmm13,xmm1 + pxor xmm2,xmm4 + psrld xmm9,27 + pxor xmm6,xmm11 + movdqa xmm7,xmm10 + + pslld xmm7,30 + movdqa xmm5,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm13,xmm6 + paddd xmm2,xmm2 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD[((16-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm6,xmm11 + pxor xmm3,XMMWORD[((112-128))+rax] + paddd xmm12,xmm15 + pslld xmm8,5 + pxor xmm6,xmm14 + + movdqa xmm9,xmm13 + movdqa XMMWORD[(224-128)+rax],xmm2 + paddd xmm12,xmm2 + pxor xmm3,xmm0 + psrld xmm9,27 + pxor xmm6,xmm10 + movdqa xmm7,xmm14 + + pslld xmm7,30 + movdqa xmm5,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm12,xmm6 + paddd xmm3,xmm3 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD[((32-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm6,xmm10 + pxor xmm4,XMMWORD[((128-128))+rax] + paddd xmm11,xmm15 + pslld xmm8,5 + pxor xmm6,xmm13 + + movdqa xmm9,xmm12 + movdqa XMMWORD[(240-128)+rax],xmm3 + paddd xmm11,xmm3 + pxor xmm4,xmm1 + psrld xmm9,27 + pxor xmm6,xmm14 + movdqa xmm7,xmm13 + + pslld xmm7,30 + movdqa xmm5,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm11,xmm6 + paddd xmm4,xmm4 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD[((48-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm6,xmm14 + pxor xmm0,XMMWORD[((144-128))+rax] + paddd xmm10,xmm15 + pslld xmm8,5 + pxor xmm6,xmm12 + + movdqa xmm9,xmm11 + movdqa XMMWORD[(0-128)+rax],xmm4 + paddd xmm10,xmm4 + pxor xmm0,xmm2 + psrld xmm9,27 + pxor xmm6,xmm13 + movdqa xmm7,xmm12 + + pslld xmm7,30 + movdqa xmm5,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm10,xmm6 + paddd xmm0,xmm0 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD[((64-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm6,xmm13 + pxor xmm1,XMMWORD[((160-128))+rax] + paddd xmm14,xmm15 + pslld xmm8,5 + pxor xmm6,xmm11 + + movdqa xmm9,xmm10 + movdqa XMMWORD[(16-128)+rax],xmm0 + paddd xmm14,xmm0 + pxor xmm1,xmm3 + psrld xmm9,27 + pxor xmm6,xmm12 + movdqa xmm7,xmm11 + + pslld xmm7,30 + movdqa xmm5,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm14,xmm6 + paddd xmm1,xmm1 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD[((80-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm6,xmm12 + pxor xmm2,XMMWORD[((176-128))+rax] + paddd xmm13,xmm15 + pslld xmm8,5 + pxor xmm6,xmm10 + + movdqa xmm9,xmm14 + movdqa XMMWORD[(32-128)+rax],xmm1 + paddd xmm13,xmm1 + pxor xmm2,xmm4 + psrld xmm9,27 + pxor xmm6,xmm11 + movdqa xmm7,xmm10 + + pslld xmm7,30 + movdqa xmm5,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm13,xmm6 + paddd xmm2,xmm2 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD[((96-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm6,xmm11 + pxor xmm3,XMMWORD[((192-128))+rax] + paddd xmm12,xmm15 + pslld xmm8,5 + pxor xmm6,xmm14 + + movdqa xmm9,xmm13 + movdqa XMMWORD[(48-128)+rax],xmm2 + paddd xmm12,xmm2 + pxor xmm3,xmm0 + psrld xmm9,27 + pxor xmm6,xmm10 + movdqa xmm7,xmm14 + + pslld xmm7,30 + movdqa xmm5,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm12,xmm6 + paddd xmm3,xmm3 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD[((112-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm6,xmm10 + pxor xmm4,XMMWORD[((208-128))+rax] + paddd xmm11,xmm15 + pslld xmm8,5 + pxor xmm6,xmm13 + + movdqa xmm9,xmm12 + movdqa XMMWORD[(64-128)+rax],xmm3 + paddd xmm11,xmm3 + pxor xmm4,xmm1 + psrld xmm9,27 + pxor xmm6,xmm14 + movdqa xmm7,xmm13 + + pslld xmm7,30 + movdqa xmm5,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm11,xmm6 + paddd xmm4,xmm4 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD[((128-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm6,xmm14 + pxor xmm0,XMMWORD[((224-128))+rax] + paddd xmm10,xmm15 + pslld xmm8,5 + pxor xmm6,xmm12 + + movdqa xmm9,xmm11 + movdqa XMMWORD[(80-128)+rax],xmm4 + paddd xmm10,xmm4 + pxor xmm0,xmm2 + psrld xmm9,27 + pxor xmm6,xmm13 + movdqa xmm7,xmm12 + + pslld xmm7,30 + movdqa xmm5,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm10,xmm6 + paddd xmm0,xmm0 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD[((144-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm6,xmm13 + pxor xmm1,XMMWORD[((240-128))+rax] + paddd xmm14,xmm15 + pslld xmm8,5 + pxor xmm6,xmm11 + + movdqa xmm9,xmm10 + movdqa XMMWORD[(96-128)+rax],xmm0 + paddd xmm14,xmm0 + pxor xmm1,xmm3 + psrld xmm9,27 + pxor xmm6,xmm12 + movdqa xmm7,xmm11 + + pslld xmm7,30 + movdqa xmm5,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm14,xmm6 + paddd xmm1,xmm1 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD[((160-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm6,xmm12 + pxor xmm2,XMMWORD[((0-128))+rax] + paddd xmm13,xmm15 + pslld xmm8,5 + pxor xmm6,xmm10 + + movdqa xmm9,xmm14 + movdqa XMMWORD[(112-128)+rax],xmm1 + paddd xmm13,xmm1 + pxor xmm2,xmm4 + psrld xmm9,27 + pxor xmm6,xmm11 + movdqa xmm7,xmm10 + + pslld xmm7,30 + movdqa xmm5,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm13,xmm6 + paddd xmm2,xmm2 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD[((176-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm6,xmm11 + pxor xmm3,XMMWORD[((16-128))+rax] + paddd xmm12,xmm15 + pslld xmm8,5 + pxor xmm6,xmm14 + + movdqa xmm9,xmm13 + paddd xmm12,xmm2 + pxor xmm3,xmm0 + psrld xmm9,27 + pxor xmm6,xmm10 + movdqa xmm7,xmm14 + + pslld xmm7,30 + movdqa xmm5,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm12,xmm6 + paddd xmm3,xmm3 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD[((192-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm6,xmm10 + pxor xmm4,XMMWORD[((32-128))+rax] + paddd xmm11,xmm15 + pslld xmm8,5 + pxor xmm6,xmm13 + + movdqa xmm9,xmm12 + paddd xmm11,xmm3 + pxor xmm4,xmm1 + psrld xmm9,27 + pxor xmm6,xmm14 + movdqa xmm7,xmm13 + + pslld xmm7,30 + movdqa xmm5,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm11,xmm6 + paddd xmm4,xmm4 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD[((208-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm6,xmm14 + pxor xmm0,XMMWORD[((48-128))+rax] + paddd xmm10,xmm15 + pslld xmm8,5 + pxor xmm6,xmm12 + + movdqa xmm9,xmm11 + paddd xmm10,xmm4 + pxor xmm0,xmm2 + psrld xmm9,27 + pxor xmm6,xmm13 + movdqa xmm7,xmm12 + + pslld xmm7,30 + movdqa xmm5,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm10,xmm6 + paddd xmm0,xmm0 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD[((224-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm6,xmm13 + pxor xmm1,XMMWORD[((64-128))+rax] + paddd xmm14,xmm15 + pslld xmm8,5 + pxor xmm6,xmm11 + + movdqa xmm9,xmm10 + paddd xmm14,xmm0 + pxor xmm1,xmm3 + psrld xmm9,27 + pxor xmm6,xmm12 + movdqa xmm7,xmm11 + + pslld xmm7,30 + movdqa xmm5,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm14,xmm6 + paddd xmm1,xmm1 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD[((240-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm6,xmm12 + pxor xmm2,XMMWORD[((80-128))+rax] + paddd xmm13,xmm15 + pslld xmm8,5 + pxor xmm6,xmm10 + + movdqa xmm9,xmm14 + paddd xmm13,xmm1 + pxor xmm2,xmm4 + psrld xmm9,27 + pxor xmm6,xmm11 + movdqa xmm7,xmm10 + + pslld xmm7,30 + movdqa xmm5,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm13,xmm6 + paddd xmm2,xmm2 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD[((0-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm6,xmm11 + pxor xmm3,XMMWORD[((96-128))+rax] + paddd xmm12,xmm15 + pslld xmm8,5 + pxor xmm6,xmm14 + + movdqa xmm9,xmm13 + paddd xmm12,xmm2 + pxor xmm3,xmm0 + psrld xmm9,27 + pxor xmm6,xmm10 + movdqa xmm7,xmm14 + + pslld xmm7,30 + movdqa xmm5,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm12,xmm6 + paddd xmm3,xmm3 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD[((16-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm6,xmm10 + pxor xmm4,XMMWORD[((112-128))+rax] + paddd xmm11,xmm15 + pslld xmm8,5 + pxor xmm6,xmm13 + + movdqa xmm9,xmm12 + paddd xmm11,xmm3 + pxor xmm4,xmm1 + psrld xmm9,27 + pxor xmm6,xmm14 + movdqa xmm7,xmm13 + + pslld xmm7,30 + movdqa xmm5,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm11,xmm6 + paddd xmm4,xmm4 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + movdqa xmm8,xmm11 + paddd xmm10,xmm15 + movdqa xmm6,xmm14 + pslld xmm8,5 + pxor xmm6,xmm12 + + movdqa xmm9,xmm11 + paddd xmm10,xmm4 + psrld xmm9,27 + movdqa xmm7,xmm12 + pxor xmm6,xmm13 + + pslld xmm7,30 + por xmm8,xmm9 + paddd xmm10,xmm6 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm12,xmm7 + movdqa xmm0,XMMWORD[rbx] + mov ecx,1 + cmp ecx,DWORD[rbx] + pxor xmm8,xmm8 + cmovge r8,rbp + cmp ecx,DWORD[4+rbx] + movdqa xmm1,xmm0 + cmovge r9,rbp + cmp ecx,DWORD[8+rbx] + pcmpgtd xmm1,xmm8 + cmovge r10,rbp + cmp ecx,DWORD[12+rbx] + paddd xmm0,xmm1 + cmovge r11,rbp + + movdqu xmm6,XMMWORD[rdi] + pand xmm10,xmm1 + movdqu xmm7,XMMWORD[32+rdi] + pand xmm11,xmm1 + paddd xmm10,xmm6 + movdqu xmm8,XMMWORD[64+rdi] + pand xmm12,xmm1 + paddd xmm11,xmm7 + movdqu xmm9,XMMWORD[96+rdi] + pand xmm13,xmm1 + paddd xmm12,xmm8 + movdqu xmm5,XMMWORD[128+rdi] + pand xmm14,xmm1 + movdqu XMMWORD[rdi],xmm10 + paddd xmm13,xmm9 + movdqu XMMWORD[32+rdi],xmm11 + paddd xmm14,xmm5 + movdqu XMMWORD[64+rdi],xmm12 + movdqu XMMWORD[96+rdi],xmm13 + movdqu XMMWORD[128+rdi],xmm14 + + movdqa XMMWORD[rbx],xmm0 + movdqa xmm5,XMMWORD[96+rbp] + movdqa xmm15,XMMWORD[((-32))+rbp] + dec edx + jnz NEAR $L$oop + + mov edx,DWORD[280+rsp] + lea rdi,[16+rdi] + lea rsi,[64+rsi] + dec edx + jnz NEAR $L$oop_grande + +$L$done: + mov rax,QWORD[272+rsp] + + movaps xmm6,XMMWORD[((-184))+rax] + movaps xmm7,XMMWORD[((-168))+rax] + movaps xmm8,XMMWORD[((-152))+rax] + movaps xmm9,XMMWORD[((-136))+rax] + movaps xmm10,XMMWORD[((-120))+rax] + movaps xmm11,XMMWORD[((-104))+rax] + movaps xmm12,XMMWORD[((-88))+rax] + movaps xmm13,XMMWORD[((-72))+rax] + movaps xmm14,XMMWORD[((-56))+rax] + movaps xmm15,XMMWORD[((-40))+rax] + mov rbp,QWORD[((-16))+rax] + + mov rbx,QWORD[((-8))+rax] + + lea rsp,[rax] + +$L$epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_sha1_multi_block: + +ALIGN 32 +sha1_multi_block_shaext: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_multi_block_shaext: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_shaext_shortcut: + mov rax,rsp + + push rbx + + push rbp + + lea rsp,[((-168))+rsp] + movaps XMMWORD[rsp],xmm6 + movaps XMMWORD[16+rsp],xmm7 + movaps XMMWORD[32+rsp],xmm8 + movaps XMMWORD[48+rsp],xmm9 + movaps XMMWORD[(-120)+rax],xmm10 + movaps XMMWORD[(-104)+rax],xmm11 + movaps XMMWORD[(-88)+rax],xmm12 + movaps XMMWORD[(-72)+rax],xmm13 + movaps XMMWORD[(-56)+rax],xmm14 + movaps XMMWORD[(-40)+rax],xmm15 + sub rsp,288 + shl edx,1 + and rsp,-256 + lea rdi,[64+rdi] + mov QWORD[272+rsp],rax +$L$body_shaext: + lea rbx,[256+rsp] + movdqa xmm3,XMMWORD[((K_XX_XX+128))] + +$L$oop_grande_shaext: + mov DWORD[280+rsp],edx + xor edx,edx + mov r8,QWORD[rsi] + mov ecx,DWORD[8+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[rbx],ecx + cmovle r8,rsp + mov r9,QWORD[16+rsi] + mov ecx,DWORD[24+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[4+rbx],ecx + cmovle r9,rsp + test edx,edx + jz NEAR $L$done_shaext + + movq xmm0,QWORD[((0-64))+rdi] + movq xmm4,QWORD[((32-64))+rdi] + movq xmm5,QWORD[((64-64))+rdi] + movq xmm6,QWORD[((96-64))+rdi] + movq xmm7,QWORD[((128-64))+rdi] + + punpckldq xmm0,xmm4 + punpckldq xmm5,xmm6 + + movdqa xmm8,xmm0 + punpcklqdq xmm0,xmm5 + punpckhqdq xmm8,xmm5 + + pshufd xmm1,xmm7,63 + pshufd xmm9,xmm7,127 + pshufd xmm0,xmm0,27 + pshufd xmm8,xmm8,27 + jmp NEAR $L$oop_shaext + +ALIGN 32 +$L$oop_shaext: + movdqu xmm4,XMMWORD[r8] + movdqu xmm11,XMMWORD[r9] + movdqu xmm5,XMMWORD[16+r8] + movdqu xmm12,XMMWORD[16+r9] + movdqu xmm6,XMMWORD[32+r8] +DB 102,15,56,0,227 + movdqu xmm13,XMMWORD[32+r9] +DB 102,68,15,56,0,219 + movdqu xmm7,XMMWORD[48+r8] + lea r8,[64+r8] +DB 102,15,56,0,235 + movdqu xmm14,XMMWORD[48+r9] + lea r9,[64+r9] +DB 102,68,15,56,0,227 + + movdqa XMMWORD[80+rsp],xmm1 + paddd xmm1,xmm4 + movdqa XMMWORD[112+rsp],xmm9 + paddd xmm9,xmm11 + movdqa XMMWORD[64+rsp],xmm0 + movdqa xmm2,xmm0 + movdqa XMMWORD[96+rsp],xmm8 + movdqa xmm10,xmm8 +DB 15,58,204,193,0 +DB 15,56,200,213 +DB 69,15,58,204,193,0 +DB 69,15,56,200,212 +DB 102,15,56,0,243 + prefetcht0 [127+r8] +DB 15,56,201,229 +DB 102,68,15,56,0,235 + prefetcht0 [127+r9] +DB 69,15,56,201,220 + +DB 102,15,56,0,251 + movdqa xmm1,xmm0 +DB 102,68,15,56,0,243 + movdqa xmm9,xmm8 +DB 15,58,204,194,0 +DB 15,56,200,206 +DB 69,15,58,204,194,0 +DB 69,15,56,200,205 + pxor xmm4,xmm6 +DB 15,56,201,238 + pxor xmm11,xmm13 +DB 69,15,56,201,229 + movdqa xmm2,xmm0 + movdqa xmm10,xmm8 +DB 15,58,204,193,0 +DB 15,56,200,215 +DB 69,15,58,204,193,0 +DB 69,15,56,200,214 +DB 15,56,202,231 +DB 69,15,56,202,222 + pxor xmm5,xmm7 +DB 15,56,201,247 + pxor xmm12,xmm14 +DB 69,15,56,201,238 + movdqa xmm1,xmm0 + movdqa xmm9,xmm8 +DB 15,58,204,194,0 +DB 15,56,200,204 +DB 69,15,58,204,194,0 +DB 69,15,56,200,203 +DB 15,56,202,236 +DB 69,15,56,202,227 + pxor xmm6,xmm4 +DB 15,56,201,252 + pxor xmm13,xmm11 +DB 69,15,56,201,243 + movdqa xmm2,xmm0 + movdqa xmm10,xmm8 +DB 15,58,204,193,0 +DB 15,56,200,213 +DB 69,15,58,204,193,0 +DB 69,15,56,200,212 +DB 15,56,202,245 +DB 69,15,56,202,236 + pxor xmm7,xmm5 +DB 15,56,201,229 + pxor xmm14,xmm12 +DB 69,15,56,201,220 + movdqa xmm1,xmm0 + movdqa xmm9,xmm8 +DB 15,58,204,194,1 +DB 15,56,200,206 +DB 69,15,58,204,194,1 +DB 69,15,56,200,205 +DB 15,56,202,254 +DB 69,15,56,202,245 + pxor xmm4,xmm6 +DB 15,56,201,238 + pxor xmm11,xmm13 +DB 69,15,56,201,229 + movdqa xmm2,xmm0 + movdqa xmm10,xmm8 +DB 15,58,204,193,1 +DB 15,56,200,215 +DB 69,15,58,204,193,1 +DB 69,15,56,200,214 +DB 15,56,202,231 +DB 69,15,56,202,222 + pxor xmm5,xmm7 +DB 15,56,201,247 + pxor xmm12,xmm14 +DB 69,15,56,201,238 + movdqa xmm1,xmm0 + movdqa xmm9,xmm8 +DB 15,58,204,194,1 +DB 15,56,200,204 +DB 69,15,58,204,194,1 +DB 69,15,56,200,203 +DB 15,56,202,236 +DB 69,15,56,202,227 + pxor xmm6,xmm4 +DB 15,56,201,252 + pxor xmm13,xmm11 +DB 69,15,56,201,243 + movdqa xmm2,xmm0 + movdqa xmm10,xmm8 +DB 15,58,204,193,1 +DB 15,56,200,213 +DB 69,15,58,204,193,1 +DB 69,15,56,200,212 +DB 15,56,202,245 +DB 69,15,56,202,236 + pxor xmm7,xmm5 +DB 15,56,201,229 + pxor xmm14,xmm12 +DB 69,15,56,201,220 + movdqa xmm1,xmm0 + movdqa xmm9,xmm8 +DB 15,58,204,194,1 +DB 15,56,200,206 +DB 69,15,58,204,194,1 +DB 69,15,56,200,205 +DB 15,56,202,254 +DB 69,15,56,202,245 + pxor xmm4,xmm6 +DB 15,56,201,238 + pxor xmm11,xmm13 +DB 69,15,56,201,229 + movdqa xmm2,xmm0 + movdqa xmm10,xmm8 +DB 15,58,204,193,2 +DB 15,56,200,215 +DB 69,15,58,204,193,2 +DB 69,15,56,200,214 +DB 15,56,202,231 +DB 69,15,56,202,222 + pxor xmm5,xmm7 +DB 15,56,201,247 + pxor xmm12,xmm14 +DB 69,15,56,201,238 + movdqa xmm1,xmm0 + movdqa xmm9,xmm8 +DB 15,58,204,194,2 +DB 15,56,200,204 +DB 69,15,58,204,194,2 +DB 69,15,56,200,203 +DB 15,56,202,236 +DB 69,15,56,202,227 + pxor xmm6,xmm4 +DB 15,56,201,252 + pxor xmm13,xmm11 +DB 69,15,56,201,243 + movdqa xmm2,xmm0 + movdqa xmm10,xmm8 +DB 15,58,204,193,2 +DB 15,56,200,213 +DB 69,15,58,204,193,2 +DB 69,15,56,200,212 +DB 15,56,202,245 +DB 69,15,56,202,236 + pxor xmm7,xmm5 +DB 15,56,201,229 + pxor xmm14,xmm12 +DB 69,15,56,201,220 + movdqa xmm1,xmm0 + movdqa xmm9,xmm8 +DB 15,58,204,194,2 +DB 15,56,200,206 +DB 69,15,58,204,194,2 +DB 69,15,56,200,205 +DB 15,56,202,254 +DB 69,15,56,202,245 + pxor xmm4,xmm6 +DB 15,56,201,238 + pxor xmm11,xmm13 +DB 69,15,56,201,229 + movdqa xmm2,xmm0 + movdqa xmm10,xmm8 +DB 15,58,204,193,2 +DB 15,56,200,215 +DB 69,15,58,204,193,2 +DB 69,15,56,200,214 +DB 15,56,202,231 +DB 69,15,56,202,222 + pxor xmm5,xmm7 +DB 15,56,201,247 + pxor xmm12,xmm14 +DB 69,15,56,201,238 + movdqa xmm1,xmm0 + movdqa xmm9,xmm8 +DB 15,58,204,194,3 +DB 15,56,200,204 +DB 69,15,58,204,194,3 +DB 69,15,56,200,203 +DB 15,56,202,236 +DB 69,15,56,202,227 + pxor xmm6,xmm4 +DB 15,56,201,252 + pxor xmm13,xmm11 +DB 69,15,56,201,243 + movdqa xmm2,xmm0 + movdqa xmm10,xmm8 +DB 15,58,204,193,3 +DB 15,56,200,213 +DB 69,15,58,204,193,3 +DB 69,15,56,200,212 +DB 15,56,202,245 +DB 69,15,56,202,236 + pxor xmm7,xmm5 + pxor xmm14,xmm12 + + mov ecx,1 + pxor xmm4,xmm4 + cmp ecx,DWORD[rbx] + cmovge r8,rsp + + movdqa xmm1,xmm0 + movdqa xmm9,xmm8 +DB 15,58,204,194,3 +DB 15,56,200,206 +DB 69,15,58,204,194,3 +DB 69,15,56,200,205 +DB 15,56,202,254 +DB 69,15,56,202,245 + + cmp ecx,DWORD[4+rbx] + cmovge r9,rsp + movq xmm6,QWORD[rbx] + + movdqa xmm2,xmm0 + movdqa xmm10,xmm8 +DB 15,58,204,193,3 +DB 15,56,200,215 +DB 69,15,58,204,193,3 +DB 69,15,56,200,214 + + pshufd xmm11,xmm6,0x00 + pshufd xmm12,xmm6,0x55 + movdqa xmm7,xmm6 + pcmpgtd xmm11,xmm4 + pcmpgtd xmm12,xmm4 + + movdqa xmm1,xmm0 + movdqa xmm9,xmm8 +DB 15,58,204,194,3 +DB 15,56,200,204 +DB 69,15,58,204,194,3 +DB 68,15,56,200,204 + + pcmpgtd xmm7,xmm4 + pand xmm0,xmm11 + pand xmm1,xmm11 + pand xmm8,xmm12 + pand xmm9,xmm12 + paddd xmm6,xmm7 + + paddd xmm0,XMMWORD[64+rsp] + paddd xmm1,XMMWORD[80+rsp] + paddd xmm8,XMMWORD[96+rsp] + paddd xmm9,XMMWORD[112+rsp] + + movq QWORD[rbx],xmm6 + dec edx + jnz NEAR $L$oop_shaext + + mov edx,DWORD[280+rsp] + + pshufd xmm0,xmm0,27 + pshufd xmm8,xmm8,27 + + movdqa xmm6,xmm0 + punpckldq xmm0,xmm8 + punpckhdq xmm6,xmm8 + punpckhdq xmm1,xmm9 + movq QWORD[(0-64)+rdi],xmm0 + psrldq xmm0,8 + movq QWORD[(64-64)+rdi],xmm6 + psrldq xmm6,8 + movq QWORD[(32-64)+rdi],xmm0 + psrldq xmm1,8 + movq QWORD[(96-64)+rdi],xmm6 + movq QWORD[(128-64)+rdi],xmm1 + + lea rdi,[8+rdi] + lea rsi,[32+rsi] + dec edx + jnz NEAR $L$oop_grande_shaext + +$L$done_shaext: + + movaps xmm6,XMMWORD[((-184))+rax] + movaps xmm7,XMMWORD[((-168))+rax] + movaps xmm8,XMMWORD[((-152))+rax] + movaps xmm9,XMMWORD[((-136))+rax] + movaps xmm10,XMMWORD[((-120))+rax] + movaps xmm11,XMMWORD[((-104))+rax] + movaps xmm12,XMMWORD[((-88))+rax] + movaps xmm13,XMMWORD[((-72))+rax] + movaps xmm14,XMMWORD[((-56))+rax] + movaps xmm15,XMMWORD[((-40))+rax] + mov rbp,QWORD[((-16))+rax] + + mov rbx,QWORD[((-8))+rax] + + lea rsp,[rax] + +$L$epilogue_shaext: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_sha1_multi_block_shaext: + +ALIGN 32 +sha1_multi_block_avx: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_multi_block_avx: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_avx_shortcut: + shr rcx,32 + cmp edx,2 + jb NEAR $L$avx + test ecx,32 + jnz NEAR _avx2_shortcut + jmp NEAR $L$avx +ALIGN 32 +$L$avx: + mov rax,rsp + + push rbx + + push rbp + + lea rsp,[((-168))+rsp] + movaps XMMWORD[rsp],xmm6 + movaps XMMWORD[16+rsp],xmm7 + movaps XMMWORD[32+rsp],xmm8 + movaps XMMWORD[48+rsp],xmm9 + movaps XMMWORD[(-120)+rax],xmm10 + movaps XMMWORD[(-104)+rax],xmm11 + movaps XMMWORD[(-88)+rax],xmm12 + movaps XMMWORD[(-72)+rax],xmm13 + movaps XMMWORD[(-56)+rax],xmm14 + movaps XMMWORD[(-40)+rax],xmm15 + sub rsp,288 + and rsp,-256 + mov QWORD[272+rsp],rax + +$L$body_avx: + lea rbp,[K_XX_XX] + lea rbx,[256+rsp] + + vzeroupper +$L$oop_grande_avx: + mov DWORD[280+rsp],edx + xor edx,edx + mov r8,QWORD[rsi] + mov ecx,DWORD[8+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[rbx],ecx + cmovle r8,rbp + mov r9,QWORD[16+rsi] + mov ecx,DWORD[24+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[4+rbx],ecx + cmovle r9,rbp + mov r10,QWORD[32+rsi] + mov ecx,DWORD[40+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[8+rbx],ecx + cmovle r10,rbp + mov r11,QWORD[48+rsi] + mov ecx,DWORD[56+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[12+rbx],ecx + cmovle r11,rbp + test edx,edx + jz NEAR $L$done_avx + + vmovdqu xmm10,XMMWORD[rdi] + lea rax,[128+rsp] + vmovdqu xmm11,XMMWORD[32+rdi] + vmovdqu xmm12,XMMWORD[64+rdi] + vmovdqu xmm13,XMMWORD[96+rdi] + vmovdqu xmm14,XMMWORD[128+rdi] + vmovdqu xmm5,XMMWORD[96+rbp] + jmp NEAR $L$oop_avx + +ALIGN 32 +$L$oop_avx: + vmovdqa xmm15,XMMWORD[((-32))+rbp] + vmovd xmm0,DWORD[r8] + lea r8,[64+r8] + vmovd xmm2,DWORD[r9] + lea r9,[64+r9] + vpinsrd xmm0,xmm0,DWORD[r10],1 + lea r10,[64+r10] + vpinsrd xmm2,xmm2,DWORD[r11],1 + lea r11,[64+r11] + vmovd xmm1,DWORD[((-60))+r8] + vpunpckldq xmm0,xmm0,xmm2 + vmovd xmm9,DWORD[((-60))+r9] + vpshufb xmm0,xmm0,xmm5 + vpinsrd xmm1,xmm1,DWORD[((-60))+r10],1 + vpinsrd xmm9,xmm9,DWORD[((-60))+r11],1 + vpaddd xmm14,xmm14,xmm15 + vpslld xmm8,xmm10,5 + vpandn xmm7,xmm11,xmm13 + vpand xmm6,xmm11,xmm12 + + vmovdqa XMMWORD[(0-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpunpckldq xmm1,xmm1,xmm9 + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm2,DWORD[((-56))+r8] + + vpslld xmm7,xmm11,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD[((-56))+r9] + vpaddd xmm14,xmm14,xmm6 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpshufb xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpinsrd xmm2,xmm2,DWORD[((-56))+r10],1 + vpinsrd xmm9,xmm9,DWORD[((-56))+r11],1 + vpaddd xmm13,xmm13,xmm15 + vpslld xmm8,xmm14,5 + vpandn xmm7,xmm10,xmm12 + vpand xmm6,xmm10,xmm11 + + vmovdqa XMMWORD[(16-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpunpckldq xmm2,xmm2,xmm9 + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm3,DWORD[((-52))+r8] + + vpslld xmm7,xmm10,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD[((-52))+r9] + vpaddd xmm13,xmm13,xmm6 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpshufb xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpinsrd xmm3,xmm3,DWORD[((-52))+r10],1 + vpinsrd xmm9,xmm9,DWORD[((-52))+r11],1 + vpaddd xmm12,xmm12,xmm15 + vpslld xmm8,xmm13,5 + vpandn xmm7,xmm14,xmm11 + vpand xmm6,xmm14,xmm10 + + vmovdqa XMMWORD[(32-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpunpckldq xmm3,xmm3,xmm9 + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm4,DWORD[((-48))+r8] + + vpslld xmm7,xmm14,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD[((-48))+r9] + vpaddd xmm12,xmm12,xmm6 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpshufb xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpinsrd xmm4,xmm4,DWORD[((-48))+r10],1 + vpinsrd xmm9,xmm9,DWORD[((-48))+r11],1 + vpaddd xmm11,xmm11,xmm15 + vpslld xmm8,xmm12,5 + vpandn xmm7,xmm13,xmm10 + vpand xmm6,xmm13,xmm14 + + vmovdqa XMMWORD[(48-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpunpckldq xmm4,xmm4,xmm9 + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm0,DWORD[((-44))+r8] + + vpslld xmm7,xmm13,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD[((-44))+r9] + vpaddd xmm11,xmm11,xmm6 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpshufb xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpinsrd xmm0,xmm0,DWORD[((-44))+r10],1 + vpinsrd xmm9,xmm9,DWORD[((-44))+r11],1 + vpaddd xmm10,xmm10,xmm15 + vpslld xmm8,xmm11,5 + vpandn xmm7,xmm12,xmm14 + vpand xmm6,xmm12,xmm13 + + vmovdqa XMMWORD[(64-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpunpckldq xmm0,xmm0,xmm9 + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm1,DWORD[((-40))+r8] + + vpslld xmm7,xmm12,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD[((-40))+r9] + vpaddd xmm10,xmm10,xmm6 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpshufb xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vpinsrd xmm1,xmm1,DWORD[((-40))+r10],1 + vpinsrd xmm9,xmm9,DWORD[((-40))+r11],1 + vpaddd xmm14,xmm14,xmm15 + vpslld xmm8,xmm10,5 + vpandn xmm7,xmm11,xmm13 + vpand xmm6,xmm11,xmm12 + + vmovdqa XMMWORD[(80-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpunpckldq xmm1,xmm1,xmm9 + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm2,DWORD[((-36))+r8] + + vpslld xmm7,xmm11,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD[((-36))+r9] + vpaddd xmm14,xmm14,xmm6 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpshufb xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpinsrd xmm2,xmm2,DWORD[((-36))+r10],1 + vpinsrd xmm9,xmm9,DWORD[((-36))+r11],1 + vpaddd xmm13,xmm13,xmm15 + vpslld xmm8,xmm14,5 + vpandn xmm7,xmm10,xmm12 + vpand xmm6,xmm10,xmm11 + + vmovdqa XMMWORD[(96-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpunpckldq xmm2,xmm2,xmm9 + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm3,DWORD[((-32))+r8] + + vpslld xmm7,xmm10,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD[((-32))+r9] + vpaddd xmm13,xmm13,xmm6 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpshufb xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpinsrd xmm3,xmm3,DWORD[((-32))+r10],1 + vpinsrd xmm9,xmm9,DWORD[((-32))+r11],1 + vpaddd xmm12,xmm12,xmm15 + vpslld xmm8,xmm13,5 + vpandn xmm7,xmm14,xmm11 + vpand xmm6,xmm14,xmm10 + + vmovdqa XMMWORD[(112-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpunpckldq xmm3,xmm3,xmm9 + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm4,DWORD[((-28))+r8] + + vpslld xmm7,xmm14,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD[((-28))+r9] + vpaddd xmm12,xmm12,xmm6 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpshufb xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpinsrd xmm4,xmm4,DWORD[((-28))+r10],1 + vpinsrd xmm9,xmm9,DWORD[((-28))+r11],1 + vpaddd xmm11,xmm11,xmm15 + vpslld xmm8,xmm12,5 + vpandn xmm7,xmm13,xmm10 + vpand xmm6,xmm13,xmm14 + + vmovdqa XMMWORD[(128-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpunpckldq xmm4,xmm4,xmm9 + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm0,DWORD[((-24))+r8] + + vpslld xmm7,xmm13,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD[((-24))+r9] + vpaddd xmm11,xmm11,xmm6 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpshufb xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpinsrd xmm0,xmm0,DWORD[((-24))+r10],1 + vpinsrd xmm9,xmm9,DWORD[((-24))+r11],1 + vpaddd xmm10,xmm10,xmm15 + vpslld xmm8,xmm11,5 + vpandn xmm7,xmm12,xmm14 + vpand xmm6,xmm12,xmm13 + + vmovdqa XMMWORD[(144-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpunpckldq xmm0,xmm0,xmm9 + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm1,DWORD[((-20))+r8] + + vpslld xmm7,xmm12,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD[((-20))+r9] + vpaddd xmm10,xmm10,xmm6 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpshufb xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vpinsrd xmm1,xmm1,DWORD[((-20))+r10],1 + vpinsrd xmm9,xmm9,DWORD[((-20))+r11],1 + vpaddd xmm14,xmm14,xmm15 + vpslld xmm8,xmm10,5 + vpandn xmm7,xmm11,xmm13 + vpand xmm6,xmm11,xmm12 + + vmovdqa XMMWORD[(160-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpunpckldq xmm1,xmm1,xmm9 + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm2,DWORD[((-16))+r8] + + vpslld xmm7,xmm11,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD[((-16))+r9] + vpaddd xmm14,xmm14,xmm6 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpshufb xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpinsrd xmm2,xmm2,DWORD[((-16))+r10],1 + vpinsrd xmm9,xmm9,DWORD[((-16))+r11],1 + vpaddd xmm13,xmm13,xmm15 + vpslld xmm8,xmm14,5 + vpandn xmm7,xmm10,xmm12 + vpand xmm6,xmm10,xmm11 + + vmovdqa XMMWORD[(176-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpunpckldq xmm2,xmm2,xmm9 + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm3,DWORD[((-12))+r8] + + vpslld xmm7,xmm10,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD[((-12))+r9] + vpaddd xmm13,xmm13,xmm6 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpshufb xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpinsrd xmm3,xmm3,DWORD[((-12))+r10],1 + vpinsrd xmm9,xmm9,DWORD[((-12))+r11],1 + vpaddd xmm12,xmm12,xmm15 + vpslld xmm8,xmm13,5 + vpandn xmm7,xmm14,xmm11 + vpand xmm6,xmm14,xmm10 + + vmovdqa XMMWORD[(192-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpunpckldq xmm3,xmm3,xmm9 + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm4,DWORD[((-8))+r8] + + vpslld xmm7,xmm14,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD[((-8))+r9] + vpaddd xmm12,xmm12,xmm6 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpshufb xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpinsrd xmm4,xmm4,DWORD[((-8))+r10],1 + vpinsrd xmm9,xmm9,DWORD[((-8))+r11],1 + vpaddd xmm11,xmm11,xmm15 + vpslld xmm8,xmm12,5 + vpandn xmm7,xmm13,xmm10 + vpand xmm6,xmm13,xmm14 + + vmovdqa XMMWORD[(208-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpunpckldq xmm4,xmm4,xmm9 + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm0,DWORD[((-4))+r8] + + vpslld xmm7,xmm13,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD[((-4))+r9] + vpaddd xmm11,xmm11,xmm6 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpshufb xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vmovdqa xmm1,XMMWORD[((0-128))+rax] + vpinsrd xmm0,xmm0,DWORD[((-4))+r10],1 + vpinsrd xmm9,xmm9,DWORD[((-4))+r11],1 + vpaddd xmm10,xmm10,xmm15 + prefetcht0 [63+r8] + vpslld xmm8,xmm11,5 + vpandn xmm7,xmm12,xmm14 + vpand xmm6,xmm12,xmm13 + + vmovdqa XMMWORD[(224-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpunpckldq xmm0,xmm0,xmm9 + vpsrld xmm9,xmm11,27 + prefetcht0 [63+r9] + vpxor xmm6,xmm6,xmm7 + + vpslld xmm7,xmm12,30 + vpor xmm8,xmm8,xmm9 + prefetcht0 [63+r10] + vpaddd xmm10,xmm10,xmm6 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + prefetcht0 [63+r11] + vpshufb xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vmovdqa xmm2,XMMWORD[((16-128))+rax] + vpxor xmm1,xmm1,xmm3 + vmovdqa xmm3,XMMWORD[((32-128))+rax] + + vpaddd xmm14,xmm14,xmm15 + vpslld xmm8,xmm10,5 + vpandn xmm7,xmm11,xmm13 + + vpand xmm6,xmm11,xmm12 + + vmovdqa XMMWORD[(240-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpxor xmm1,xmm1,XMMWORD[((128-128))+rax] + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm6,xmm7 + vpxor xmm1,xmm1,xmm3 + + + vpslld xmm7,xmm11,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm14,xmm14,xmm6 + + vpsrld xmm5,xmm1,31 + vpaddd xmm1,xmm1,xmm1 + + vpsrld xmm11,xmm11,2 + + vpaddd xmm14,xmm14,xmm8 + vpor xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpxor xmm2,xmm2,xmm4 + vmovdqa xmm4,XMMWORD[((48-128))+rax] + + vpaddd xmm13,xmm13,xmm15 + vpslld xmm8,xmm14,5 + vpandn xmm7,xmm10,xmm12 + + vpand xmm6,xmm10,xmm11 + + vmovdqa XMMWORD[(0-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpxor xmm2,xmm2,XMMWORD[((144-128))+rax] + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm6,xmm7 + vpxor xmm2,xmm2,xmm4 + + + vpslld xmm7,xmm10,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm13,xmm13,xmm6 + + vpsrld xmm5,xmm2,31 + vpaddd xmm2,xmm2,xmm2 + + vpsrld xmm10,xmm10,2 + + vpaddd xmm13,xmm13,xmm8 + vpor xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpxor xmm3,xmm3,xmm0 + vmovdqa xmm0,XMMWORD[((64-128))+rax] + + vpaddd xmm12,xmm12,xmm15 + vpslld xmm8,xmm13,5 + vpandn xmm7,xmm14,xmm11 + + vpand xmm6,xmm14,xmm10 + + vmovdqa XMMWORD[(16-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpxor xmm3,xmm3,XMMWORD[((160-128))+rax] + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm6,xmm7 + vpxor xmm3,xmm3,xmm0 + + + vpslld xmm7,xmm14,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm12,xmm12,xmm6 + + vpsrld xmm5,xmm3,31 + vpaddd xmm3,xmm3,xmm3 + + vpsrld xmm14,xmm14,2 + + vpaddd xmm12,xmm12,xmm8 + vpor xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpxor xmm4,xmm4,xmm1 + vmovdqa xmm1,XMMWORD[((80-128))+rax] + + vpaddd xmm11,xmm11,xmm15 + vpslld xmm8,xmm12,5 + vpandn xmm7,xmm13,xmm10 + + vpand xmm6,xmm13,xmm14 + + vmovdqa XMMWORD[(32-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpxor xmm4,xmm4,XMMWORD[((176-128))+rax] + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm6,xmm7 + vpxor xmm4,xmm4,xmm1 + + + vpslld xmm7,xmm13,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm11,xmm11,xmm6 + + vpsrld xmm5,xmm4,31 + vpaddd xmm4,xmm4,xmm4 + + vpsrld xmm13,xmm13,2 + + vpaddd xmm11,xmm11,xmm8 + vpor xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpxor xmm0,xmm0,xmm2 + vmovdqa xmm2,XMMWORD[((96-128))+rax] + + vpaddd xmm10,xmm10,xmm15 + vpslld xmm8,xmm11,5 + vpandn xmm7,xmm12,xmm14 + + vpand xmm6,xmm12,xmm13 + + vmovdqa XMMWORD[(48-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpxor xmm0,xmm0,XMMWORD[((192-128))+rax] + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm6,xmm7 + vpxor xmm0,xmm0,xmm2 + + + vpslld xmm7,xmm12,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm10,xmm10,xmm6 + + vpsrld xmm5,xmm0,31 + vpaddd xmm0,xmm0,xmm0 + + vpsrld xmm12,xmm12,2 + + vpaddd xmm10,xmm10,xmm8 + vpor xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vmovdqa xmm15,XMMWORD[rbp] + vpxor xmm1,xmm1,xmm3 + vmovdqa xmm3,XMMWORD[((112-128))+rax] + + vpslld xmm8,xmm10,5 + vpaddd xmm14,xmm14,xmm15 + vpxor xmm6,xmm13,xmm11 + vmovdqa XMMWORD[(64-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpxor xmm1,xmm1,XMMWORD[((208-128))+rax] + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm6,xmm12 + vpxor xmm1,xmm1,xmm3 + + vpslld xmm7,xmm11,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm14,xmm14,xmm6 + vpsrld xmm5,xmm1,31 + vpaddd xmm1,xmm1,xmm1 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpor xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpxor xmm2,xmm2,xmm4 + vmovdqa xmm4,XMMWORD[((128-128))+rax] + + vpslld xmm8,xmm14,5 + vpaddd xmm13,xmm13,xmm15 + vpxor xmm6,xmm12,xmm10 + vmovdqa XMMWORD[(80-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpxor xmm2,xmm2,XMMWORD[((224-128))+rax] + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm6,xmm11 + vpxor xmm2,xmm2,xmm4 + + vpslld xmm7,xmm10,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm13,xmm13,xmm6 + vpsrld xmm5,xmm2,31 + vpaddd xmm2,xmm2,xmm2 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpor xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpxor xmm3,xmm3,xmm0 + vmovdqa xmm0,XMMWORD[((144-128))+rax] + + vpslld xmm8,xmm13,5 + vpaddd xmm12,xmm12,xmm15 + vpxor xmm6,xmm11,xmm14 + vmovdqa XMMWORD[(96-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpxor xmm3,xmm3,XMMWORD[((240-128))+rax] + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm6,xmm10 + vpxor xmm3,xmm3,xmm0 + + vpslld xmm7,xmm14,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm12,xmm12,xmm6 + vpsrld xmm5,xmm3,31 + vpaddd xmm3,xmm3,xmm3 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpor xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpxor xmm4,xmm4,xmm1 + vmovdqa xmm1,XMMWORD[((160-128))+rax] + + vpslld xmm8,xmm12,5 + vpaddd xmm11,xmm11,xmm15 + vpxor xmm6,xmm10,xmm13 + vmovdqa XMMWORD[(112-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpxor xmm4,xmm4,XMMWORD[((0-128))+rax] + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm6,xmm14 + vpxor xmm4,xmm4,xmm1 + + vpslld xmm7,xmm13,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm11,xmm11,xmm6 + vpsrld xmm5,xmm4,31 + vpaddd xmm4,xmm4,xmm4 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpor xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpxor xmm0,xmm0,xmm2 + vmovdqa xmm2,XMMWORD[((176-128))+rax] + + vpslld xmm8,xmm11,5 + vpaddd xmm10,xmm10,xmm15 + vpxor xmm6,xmm14,xmm12 + vmovdqa XMMWORD[(128-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpxor xmm0,xmm0,XMMWORD[((16-128))+rax] + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm6,xmm13 + vpxor xmm0,xmm0,xmm2 + + vpslld xmm7,xmm12,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm10,xmm10,xmm6 + vpsrld xmm5,xmm0,31 + vpaddd xmm0,xmm0,xmm0 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpor xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vpxor xmm1,xmm1,xmm3 + vmovdqa xmm3,XMMWORD[((192-128))+rax] + + vpslld xmm8,xmm10,5 + vpaddd xmm14,xmm14,xmm15 + vpxor xmm6,xmm13,xmm11 + vmovdqa XMMWORD[(144-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpxor xmm1,xmm1,XMMWORD[((32-128))+rax] + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm6,xmm12 + vpxor xmm1,xmm1,xmm3 + + vpslld xmm7,xmm11,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm14,xmm14,xmm6 + vpsrld xmm5,xmm1,31 + vpaddd xmm1,xmm1,xmm1 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpor xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpxor xmm2,xmm2,xmm4 + vmovdqa xmm4,XMMWORD[((208-128))+rax] + + vpslld xmm8,xmm14,5 + vpaddd xmm13,xmm13,xmm15 + vpxor xmm6,xmm12,xmm10 + vmovdqa XMMWORD[(160-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpxor xmm2,xmm2,XMMWORD[((48-128))+rax] + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm6,xmm11 + vpxor xmm2,xmm2,xmm4 + + vpslld xmm7,xmm10,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm13,xmm13,xmm6 + vpsrld xmm5,xmm2,31 + vpaddd xmm2,xmm2,xmm2 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpor xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpxor xmm3,xmm3,xmm0 + vmovdqa xmm0,XMMWORD[((224-128))+rax] + + vpslld xmm8,xmm13,5 + vpaddd xmm12,xmm12,xmm15 + vpxor xmm6,xmm11,xmm14 + vmovdqa XMMWORD[(176-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpxor xmm3,xmm3,XMMWORD[((64-128))+rax] + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm6,xmm10 + vpxor xmm3,xmm3,xmm0 + + vpslld xmm7,xmm14,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm12,xmm12,xmm6 + vpsrld xmm5,xmm3,31 + vpaddd xmm3,xmm3,xmm3 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpor xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpxor xmm4,xmm4,xmm1 + vmovdqa xmm1,XMMWORD[((240-128))+rax] + + vpslld xmm8,xmm12,5 + vpaddd xmm11,xmm11,xmm15 + vpxor xmm6,xmm10,xmm13 + vmovdqa XMMWORD[(192-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpxor xmm4,xmm4,XMMWORD[((80-128))+rax] + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm6,xmm14 + vpxor xmm4,xmm4,xmm1 + + vpslld xmm7,xmm13,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm11,xmm11,xmm6 + vpsrld xmm5,xmm4,31 + vpaddd xmm4,xmm4,xmm4 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpor xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpxor xmm0,xmm0,xmm2 + vmovdqa xmm2,XMMWORD[((0-128))+rax] + + vpslld xmm8,xmm11,5 + vpaddd xmm10,xmm10,xmm15 + vpxor xmm6,xmm14,xmm12 + vmovdqa XMMWORD[(208-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpxor xmm0,xmm0,XMMWORD[((96-128))+rax] + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm6,xmm13 + vpxor xmm0,xmm0,xmm2 + + vpslld xmm7,xmm12,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm10,xmm10,xmm6 + vpsrld xmm5,xmm0,31 + vpaddd xmm0,xmm0,xmm0 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpor xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vpxor xmm1,xmm1,xmm3 + vmovdqa xmm3,XMMWORD[((16-128))+rax] + + vpslld xmm8,xmm10,5 + vpaddd xmm14,xmm14,xmm15 + vpxor xmm6,xmm13,xmm11 + vmovdqa XMMWORD[(224-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpxor xmm1,xmm1,XMMWORD[((112-128))+rax] + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm6,xmm12 + vpxor xmm1,xmm1,xmm3 + + vpslld xmm7,xmm11,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm14,xmm14,xmm6 + vpsrld xmm5,xmm1,31 + vpaddd xmm1,xmm1,xmm1 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpor xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpxor xmm2,xmm2,xmm4 + vmovdqa xmm4,XMMWORD[((32-128))+rax] + + vpslld xmm8,xmm14,5 + vpaddd xmm13,xmm13,xmm15 + vpxor xmm6,xmm12,xmm10 + vmovdqa XMMWORD[(240-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpxor xmm2,xmm2,XMMWORD[((128-128))+rax] + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm6,xmm11 + vpxor xmm2,xmm2,xmm4 + + vpslld xmm7,xmm10,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm13,xmm13,xmm6 + vpsrld xmm5,xmm2,31 + vpaddd xmm2,xmm2,xmm2 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpor xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpxor xmm3,xmm3,xmm0 + vmovdqa xmm0,XMMWORD[((48-128))+rax] + + vpslld xmm8,xmm13,5 + vpaddd xmm12,xmm12,xmm15 + vpxor xmm6,xmm11,xmm14 + vmovdqa XMMWORD[(0-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpxor xmm3,xmm3,XMMWORD[((144-128))+rax] + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm6,xmm10 + vpxor xmm3,xmm3,xmm0 + + vpslld xmm7,xmm14,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm12,xmm12,xmm6 + vpsrld xmm5,xmm3,31 + vpaddd xmm3,xmm3,xmm3 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpor xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpxor xmm4,xmm4,xmm1 + vmovdqa xmm1,XMMWORD[((64-128))+rax] + + vpslld xmm8,xmm12,5 + vpaddd xmm11,xmm11,xmm15 + vpxor xmm6,xmm10,xmm13 + vmovdqa XMMWORD[(16-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpxor xmm4,xmm4,XMMWORD[((160-128))+rax] + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm6,xmm14 + vpxor xmm4,xmm4,xmm1 + + vpslld xmm7,xmm13,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm11,xmm11,xmm6 + vpsrld xmm5,xmm4,31 + vpaddd xmm4,xmm4,xmm4 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpor xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpxor xmm0,xmm0,xmm2 + vmovdqa xmm2,XMMWORD[((80-128))+rax] + + vpslld xmm8,xmm11,5 + vpaddd xmm10,xmm10,xmm15 + vpxor xmm6,xmm14,xmm12 + vmovdqa XMMWORD[(32-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpxor xmm0,xmm0,XMMWORD[((176-128))+rax] + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm6,xmm13 + vpxor xmm0,xmm0,xmm2 + + vpslld xmm7,xmm12,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm10,xmm10,xmm6 + vpsrld xmm5,xmm0,31 + vpaddd xmm0,xmm0,xmm0 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpor xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vpxor xmm1,xmm1,xmm3 + vmovdqa xmm3,XMMWORD[((96-128))+rax] + + vpslld xmm8,xmm10,5 + vpaddd xmm14,xmm14,xmm15 + vpxor xmm6,xmm13,xmm11 + vmovdqa XMMWORD[(48-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpxor xmm1,xmm1,XMMWORD[((192-128))+rax] + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm6,xmm12 + vpxor xmm1,xmm1,xmm3 + + vpslld xmm7,xmm11,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm14,xmm14,xmm6 + vpsrld xmm5,xmm1,31 + vpaddd xmm1,xmm1,xmm1 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpor xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpxor xmm2,xmm2,xmm4 + vmovdqa xmm4,XMMWORD[((112-128))+rax] + + vpslld xmm8,xmm14,5 + vpaddd xmm13,xmm13,xmm15 + vpxor xmm6,xmm12,xmm10 + vmovdqa XMMWORD[(64-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpxor xmm2,xmm2,XMMWORD[((208-128))+rax] + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm6,xmm11 + vpxor xmm2,xmm2,xmm4 + + vpslld xmm7,xmm10,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm13,xmm13,xmm6 + vpsrld xmm5,xmm2,31 + vpaddd xmm2,xmm2,xmm2 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpor xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpxor xmm3,xmm3,xmm0 + vmovdqa xmm0,XMMWORD[((128-128))+rax] + + vpslld xmm8,xmm13,5 + vpaddd xmm12,xmm12,xmm15 + vpxor xmm6,xmm11,xmm14 + vmovdqa XMMWORD[(80-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpxor xmm3,xmm3,XMMWORD[((224-128))+rax] + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm6,xmm10 + vpxor xmm3,xmm3,xmm0 + + vpslld xmm7,xmm14,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm12,xmm12,xmm6 + vpsrld xmm5,xmm3,31 + vpaddd xmm3,xmm3,xmm3 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpor xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpxor xmm4,xmm4,xmm1 + vmovdqa xmm1,XMMWORD[((144-128))+rax] + + vpslld xmm8,xmm12,5 + vpaddd xmm11,xmm11,xmm15 + vpxor xmm6,xmm10,xmm13 + vmovdqa XMMWORD[(96-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpxor xmm4,xmm4,XMMWORD[((240-128))+rax] + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm6,xmm14 + vpxor xmm4,xmm4,xmm1 + + vpslld xmm7,xmm13,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm11,xmm11,xmm6 + vpsrld xmm5,xmm4,31 + vpaddd xmm4,xmm4,xmm4 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpor xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpxor xmm0,xmm0,xmm2 + vmovdqa xmm2,XMMWORD[((160-128))+rax] + + vpslld xmm8,xmm11,5 + vpaddd xmm10,xmm10,xmm15 + vpxor xmm6,xmm14,xmm12 + vmovdqa XMMWORD[(112-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpxor xmm0,xmm0,XMMWORD[((0-128))+rax] + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm6,xmm13 + vpxor xmm0,xmm0,xmm2 + + vpslld xmm7,xmm12,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm10,xmm10,xmm6 + vpsrld xmm5,xmm0,31 + vpaddd xmm0,xmm0,xmm0 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpor xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vmovdqa xmm15,XMMWORD[32+rbp] + vpxor xmm1,xmm1,xmm3 + vmovdqa xmm3,XMMWORD[((176-128))+rax] + + vpaddd xmm14,xmm14,xmm15 + vpslld xmm8,xmm10,5 + vpand xmm7,xmm13,xmm12 + vpxor xmm1,xmm1,XMMWORD[((16-128))+rax] + + vpaddd xmm14,xmm14,xmm7 + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm13,xmm12 + vpxor xmm1,xmm1,xmm3 + + vmovdqu XMMWORD[(128-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm1,31 + vpand xmm6,xmm6,xmm11 + vpaddd xmm1,xmm1,xmm1 + + vpslld xmm7,xmm11,30 + vpaddd xmm14,xmm14,xmm6 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpor xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpxor xmm2,xmm2,xmm4 + vmovdqa xmm4,XMMWORD[((192-128))+rax] + + vpaddd xmm13,xmm13,xmm15 + vpslld xmm8,xmm14,5 + vpand xmm7,xmm12,xmm11 + vpxor xmm2,xmm2,XMMWORD[((32-128))+rax] + + vpaddd xmm13,xmm13,xmm7 + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm12,xmm11 + vpxor xmm2,xmm2,xmm4 + + vmovdqu XMMWORD[(144-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm2,31 + vpand xmm6,xmm6,xmm10 + vpaddd xmm2,xmm2,xmm2 + + vpslld xmm7,xmm10,30 + vpaddd xmm13,xmm13,xmm6 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpor xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpxor xmm3,xmm3,xmm0 + vmovdqa xmm0,XMMWORD[((208-128))+rax] + + vpaddd xmm12,xmm12,xmm15 + vpslld xmm8,xmm13,5 + vpand xmm7,xmm11,xmm10 + vpxor xmm3,xmm3,XMMWORD[((48-128))+rax] + + vpaddd xmm12,xmm12,xmm7 + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm11,xmm10 + vpxor xmm3,xmm3,xmm0 + + vmovdqu XMMWORD[(160-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm3,31 + vpand xmm6,xmm6,xmm14 + vpaddd xmm3,xmm3,xmm3 + + vpslld xmm7,xmm14,30 + vpaddd xmm12,xmm12,xmm6 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpor xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpxor xmm4,xmm4,xmm1 + vmovdqa xmm1,XMMWORD[((224-128))+rax] + + vpaddd xmm11,xmm11,xmm15 + vpslld xmm8,xmm12,5 + vpand xmm7,xmm10,xmm14 + vpxor xmm4,xmm4,XMMWORD[((64-128))+rax] + + vpaddd xmm11,xmm11,xmm7 + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm10,xmm14 + vpxor xmm4,xmm4,xmm1 + + vmovdqu XMMWORD[(176-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm4,31 + vpand xmm6,xmm6,xmm13 + vpaddd xmm4,xmm4,xmm4 + + vpslld xmm7,xmm13,30 + vpaddd xmm11,xmm11,xmm6 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpor xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpxor xmm0,xmm0,xmm2 + vmovdqa xmm2,XMMWORD[((240-128))+rax] + + vpaddd xmm10,xmm10,xmm15 + vpslld xmm8,xmm11,5 + vpand xmm7,xmm14,xmm13 + vpxor xmm0,xmm0,XMMWORD[((80-128))+rax] + + vpaddd xmm10,xmm10,xmm7 + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm14,xmm13 + vpxor xmm0,xmm0,xmm2 + + vmovdqu XMMWORD[(192-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm0,31 + vpand xmm6,xmm6,xmm12 + vpaddd xmm0,xmm0,xmm0 + + vpslld xmm7,xmm12,30 + vpaddd xmm10,xmm10,xmm6 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpor xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vpxor xmm1,xmm1,xmm3 + vmovdqa xmm3,XMMWORD[((0-128))+rax] + + vpaddd xmm14,xmm14,xmm15 + vpslld xmm8,xmm10,5 + vpand xmm7,xmm13,xmm12 + vpxor xmm1,xmm1,XMMWORD[((96-128))+rax] + + vpaddd xmm14,xmm14,xmm7 + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm13,xmm12 + vpxor xmm1,xmm1,xmm3 + + vmovdqu XMMWORD[(208-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm1,31 + vpand xmm6,xmm6,xmm11 + vpaddd xmm1,xmm1,xmm1 + + vpslld xmm7,xmm11,30 + vpaddd xmm14,xmm14,xmm6 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpor xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpxor xmm2,xmm2,xmm4 + vmovdqa xmm4,XMMWORD[((16-128))+rax] + + vpaddd xmm13,xmm13,xmm15 + vpslld xmm8,xmm14,5 + vpand xmm7,xmm12,xmm11 + vpxor xmm2,xmm2,XMMWORD[((112-128))+rax] + + vpaddd xmm13,xmm13,xmm7 + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm12,xmm11 + vpxor xmm2,xmm2,xmm4 + + vmovdqu XMMWORD[(224-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm2,31 + vpand xmm6,xmm6,xmm10 + vpaddd xmm2,xmm2,xmm2 + + vpslld xmm7,xmm10,30 + vpaddd xmm13,xmm13,xmm6 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpor xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpxor xmm3,xmm3,xmm0 + vmovdqa xmm0,XMMWORD[((32-128))+rax] + + vpaddd xmm12,xmm12,xmm15 + vpslld xmm8,xmm13,5 + vpand xmm7,xmm11,xmm10 + vpxor xmm3,xmm3,XMMWORD[((128-128))+rax] + + vpaddd xmm12,xmm12,xmm7 + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm11,xmm10 + vpxor xmm3,xmm3,xmm0 + + vmovdqu XMMWORD[(240-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm3,31 + vpand xmm6,xmm6,xmm14 + vpaddd xmm3,xmm3,xmm3 + + vpslld xmm7,xmm14,30 + vpaddd xmm12,xmm12,xmm6 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpor xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpxor xmm4,xmm4,xmm1 + vmovdqa xmm1,XMMWORD[((48-128))+rax] + + vpaddd xmm11,xmm11,xmm15 + vpslld xmm8,xmm12,5 + vpand xmm7,xmm10,xmm14 + vpxor xmm4,xmm4,XMMWORD[((144-128))+rax] + + vpaddd xmm11,xmm11,xmm7 + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm10,xmm14 + vpxor xmm4,xmm4,xmm1 + + vmovdqu XMMWORD[(0-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm4,31 + vpand xmm6,xmm6,xmm13 + vpaddd xmm4,xmm4,xmm4 + + vpslld xmm7,xmm13,30 + vpaddd xmm11,xmm11,xmm6 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpor xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpxor xmm0,xmm0,xmm2 + vmovdqa xmm2,XMMWORD[((64-128))+rax] + + vpaddd xmm10,xmm10,xmm15 + vpslld xmm8,xmm11,5 + vpand xmm7,xmm14,xmm13 + vpxor xmm0,xmm0,XMMWORD[((160-128))+rax] + + vpaddd xmm10,xmm10,xmm7 + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm14,xmm13 + vpxor xmm0,xmm0,xmm2 + + vmovdqu XMMWORD[(16-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm0,31 + vpand xmm6,xmm6,xmm12 + vpaddd xmm0,xmm0,xmm0 + + vpslld xmm7,xmm12,30 + vpaddd xmm10,xmm10,xmm6 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpor xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vpxor xmm1,xmm1,xmm3 + vmovdqa xmm3,XMMWORD[((80-128))+rax] + + vpaddd xmm14,xmm14,xmm15 + vpslld xmm8,xmm10,5 + vpand xmm7,xmm13,xmm12 + vpxor xmm1,xmm1,XMMWORD[((176-128))+rax] + + vpaddd xmm14,xmm14,xmm7 + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm13,xmm12 + vpxor xmm1,xmm1,xmm3 + + vmovdqu XMMWORD[(32-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm1,31 + vpand xmm6,xmm6,xmm11 + vpaddd xmm1,xmm1,xmm1 + + vpslld xmm7,xmm11,30 + vpaddd xmm14,xmm14,xmm6 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpor xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpxor xmm2,xmm2,xmm4 + vmovdqa xmm4,XMMWORD[((96-128))+rax] + + vpaddd xmm13,xmm13,xmm15 + vpslld xmm8,xmm14,5 + vpand xmm7,xmm12,xmm11 + vpxor xmm2,xmm2,XMMWORD[((192-128))+rax] + + vpaddd xmm13,xmm13,xmm7 + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm12,xmm11 + vpxor xmm2,xmm2,xmm4 + + vmovdqu XMMWORD[(48-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm2,31 + vpand xmm6,xmm6,xmm10 + vpaddd xmm2,xmm2,xmm2 + + vpslld xmm7,xmm10,30 + vpaddd xmm13,xmm13,xmm6 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpor xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpxor xmm3,xmm3,xmm0 + vmovdqa xmm0,XMMWORD[((112-128))+rax] + + vpaddd xmm12,xmm12,xmm15 + vpslld xmm8,xmm13,5 + vpand xmm7,xmm11,xmm10 + vpxor xmm3,xmm3,XMMWORD[((208-128))+rax] + + vpaddd xmm12,xmm12,xmm7 + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm11,xmm10 + vpxor xmm3,xmm3,xmm0 + + vmovdqu XMMWORD[(64-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm3,31 + vpand xmm6,xmm6,xmm14 + vpaddd xmm3,xmm3,xmm3 + + vpslld xmm7,xmm14,30 + vpaddd xmm12,xmm12,xmm6 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpor xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpxor xmm4,xmm4,xmm1 + vmovdqa xmm1,XMMWORD[((128-128))+rax] + + vpaddd xmm11,xmm11,xmm15 + vpslld xmm8,xmm12,5 + vpand xmm7,xmm10,xmm14 + vpxor xmm4,xmm4,XMMWORD[((224-128))+rax] + + vpaddd xmm11,xmm11,xmm7 + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm10,xmm14 + vpxor xmm4,xmm4,xmm1 + + vmovdqu XMMWORD[(80-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm4,31 + vpand xmm6,xmm6,xmm13 + vpaddd xmm4,xmm4,xmm4 + + vpslld xmm7,xmm13,30 + vpaddd xmm11,xmm11,xmm6 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpor xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpxor xmm0,xmm0,xmm2 + vmovdqa xmm2,XMMWORD[((144-128))+rax] + + vpaddd xmm10,xmm10,xmm15 + vpslld xmm8,xmm11,5 + vpand xmm7,xmm14,xmm13 + vpxor xmm0,xmm0,XMMWORD[((240-128))+rax] + + vpaddd xmm10,xmm10,xmm7 + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm14,xmm13 + vpxor xmm0,xmm0,xmm2 + + vmovdqu XMMWORD[(96-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm0,31 + vpand xmm6,xmm6,xmm12 + vpaddd xmm0,xmm0,xmm0 + + vpslld xmm7,xmm12,30 + vpaddd xmm10,xmm10,xmm6 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpor xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vpxor xmm1,xmm1,xmm3 + vmovdqa xmm3,XMMWORD[((160-128))+rax] + + vpaddd xmm14,xmm14,xmm15 + vpslld xmm8,xmm10,5 + vpand xmm7,xmm13,xmm12 + vpxor xmm1,xmm1,XMMWORD[((0-128))+rax] + + vpaddd xmm14,xmm14,xmm7 + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm13,xmm12 + vpxor xmm1,xmm1,xmm3 + + vmovdqu XMMWORD[(112-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm1,31 + vpand xmm6,xmm6,xmm11 + vpaddd xmm1,xmm1,xmm1 + + vpslld xmm7,xmm11,30 + vpaddd xmm14,xmm14,xmm6 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpor xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpxor xmm2,xmm2,xmm4 + vmovdqa xmm4,XMMWORD[((176-128))+rax] + + vpaddd xmm13,xmm13,xmm15 + vpslld xmm8,xmm14,5 + vpand xmm7,xmm12,xmm11 + vpxor xmm2,xmm2,XMMWORD[((16-128))+rax] + + vpaddd xmm13,xmm13,xmm7 + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm12,xmm11 + vpxor xmm2,xmm2,xmm4 + + vmovdqu XMMWORD[(128-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm2,31 + vpand xmm6,xmm6,xmm10 + vpaddd xmm2,xmm2,xmm2 + + vpslld xmm7,xmm10,30 + vpaddd xmm13,xmm13,xmm6 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpor xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpxor xmm3,xmm3,xmm0 + vmovdqa xmm0,XMMWORD[((192-128))+rax] + + vpaddd xmm12,xmm12,xmm15 + vpslld xmm8,xmm13,5 + vpand xmm7,xmm11,xmm10 + vpxor xmm3,xmm3,XMMWORD[((32-128))+rax] + + vpaddd xmm12,xmm12,xmm7 + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm11,xmm10 + vpxor xmm3,xmm3,xmm0 + + vmovdqu XMMWORD[(144-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm3,31 + vpand xmm6,xmm6,xmm14 + vpaddd xmm3,xmm3,xmm3 + + vpslld xmm7,xmm14,30 + vpaddd xmm12,xmm12,xmm6 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpor xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpxor xmm4,xmm4,xmm1 + vmovdqa xmm1,XMMWORD[((208-128))+rax] + + vpaddd xmm11,xmm11,xmm15 + vpslld xmm8,xmm12,5 + vpand xmm7,xmm10,xmm14 + vpxor xmm4,xmm4,XMMWORD[((48-128))+rax] + + vpaddd xmm11,xmm11,xmm7 + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm10,xmm14 + vpxor xmm4,xmm4,xmm1 + + vmovdqu XMMWORD[(160-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm4,31 + vpand xmm6,xmm6,xmm13 + vpaddd xmm4,xmm4,xmm4 + + vpslld xmm7,xmm13,30 + vpaddd xmm11,xmm11,xmm6 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpor xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpxor xmm0,xmm0,xmm2 + vmovdqa xmm2,XMMWORD[((224-128))+rax] + + vpaddd xmm10,xmm10,xmm15 + vpslld xmm8,xmm11,5 + vpand xmm7,xmm14,xmm13 + vpxor xmm0,xmm0,XMMWORD[((64-128))+rax] + + vpaddd xmm10,xmm10,xmm7 + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm14,xmm13 + vpxor xmm0,xmm0,xmm2 + + vmovdqu XMMWORD[(176-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm0,31 + vpand xmm6,xmm6,xmm12 + vpaddd xmm0,xmm0,xmm0 + + vpslld xmm7,xmm12,30 + vpaddd xmm10,xmm10,xmm6 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpor xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vmovdqa xmm15,XMMWORD[64+rbp] + vpxor xmm1,xmm1,xmm3 + vmovdqa xmm3,XMMWORD[((240-128))+rax] + + vpslld xmm8,xmm10,5 + vpaddd xmm14,xmm14,xmm15 + vpxor xmm6,xmm13,xmm11 + vmovdqa XMMWORD[(192-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpxor xmm1,xmm1,XMMWORD[((80-128))+rax] + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm6,xmm12 + vpxor xmm1,xmm1,xmm3 + + vpslld xmm7,xmm11,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm14,xmm14,xmm6 + vpsrld xmm5,xmm1,31 + vpaddd xmm1,xmm1,xmm1 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpor xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpxor xmm2,xmm2,xmm4 + vmovdqa xmm4,XMMWORD[((0-128))+rax] + + vpslld xmm8,xmm14,5 + vpaddd xmm13,xmm13,xmm15 + vpxor xmm6,xmm12,xmm10 + vmovdqa XMMWORD[(208-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpxor xmm2,xmm2,XMMWORD[((96-128))+rax] + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm6,xmm11 + vpxor xmm2,xmm2,xmm4 + + vpslld xmm7,xmm10,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm13,xmm13,xmm6 + vpsrld xmm5,xmm2,31 + vpaddd xmm2,xmm2,xmm2 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpor xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpxor xmm3,xmm3,xmm0 + vmovdqa xmm0,XMMWORD[((16-128))+rax] + + vpslld xmm8,xmm13,5 + vpaddd xmm12,xmm12,xmm15 + vpxor xmm6,xmm11,xmm14 + vmovdqa XMMWORD[(224-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpxor xmm3,xmm3,XMMWORD[((112-128))+rax] + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm6,xmm10 + vpxor xmm3,xmm3,xmm0 + + vpslld xmm7,xmm14,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm12,xmm12,xmm6 + vpsrld xmm5,xmm3,31 + vpaddd xmm3,xmm3,xmm3 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpor xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpxor xmm4,xmm4,xmm1 + vmovdqa xmm1,XMMWORD[((32-128))+rax] + + vpslld xmm8,xmm12,5 + vpaddd xmm11,xmm11,xmm15 + vpxor xmm6,xmm10,xmm13 + vmovdqa XMMWORD[(240-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpxor xmm4,xmm4,XMMWORD[((128-128))+rax] + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm6,xmm14 + vpxor xmm4,xmm4,xmm1 + + vpslld xmm7,xmm13,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm11,xmm11,xmm6 + vpsrld xmm5,xmm4,31 + vpaddd xmm4,xmm4,xmm4 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpor xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpxor xmm0,xmm0,xmm2 + vmovdqa xmm2,XMMWORD[((48-128))+rax] + + vpslld xmm8,xmm11,5 + vpaddd xmm10,xmm10,xmm15 + vpxor xmm6,xmm14,xmm12 + vmovdqa XMMWORD[(0-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpxor xmm0,xmm0,XMMWORD[((144-128))+rax] + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm6,xmm13 + vpxor xmm0,xmm0,xmm2 + + vpslld xmm7,xmm12,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm10,xmm10,xmm6 + vpsrld xmm5,xmm0,31 + vpaddd xmm0,xmm0,xmm0 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpor xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vpxor xmm1,xmm1,xmm3 + vmovdqa xmm3,XMMWORD[((64-128))+rax] + + vpslld xmm8,xmm10,5 + vpaddd xmm14,xmm14,xmm15 + vpxor xmm6,xmm13,xmm11 + vmovdqa XMMWORD[(16-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpxor xmm1,xmm1,XMMWORD[((160-128))+rax] + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm6,xmm12 + vpxor xmm1,xmm1,xmm3 + + vpslld xmm7,xmm11,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm14,xmm14,xmm6 + vpsrld xmm5,xmm1,31 + vpaddd xmm1,xmm1,xmm1 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpor xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpxor xmm2,xmm2,xmm4 + vmovdqa xmm4,XMMWORD[((80-128))+rax] + + vpslld xmm8,xmm14,5 + vpaddd xmm13,xmm13,xmm15 + vpxor xmm6,xmm12,xmm10 + vmovdqa XMMWORD[(32-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpxor xmm2,xmm2,XMMWORD[((176-128))+rax] + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm6,xmm11 + vpxor xmm2,xmm2,xmm4 + + vpslld xmm7,xmm10,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm13,xmm13,xmm6 + vpsrld xmm5,xmm2,31 + vpaddd xmm2,xmm2,xmm2 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpor xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpxor xmm3,xmm3,xmm0 + vmovdqa xmm0,XMMWORD[((96-128))+rax] + + vpslld xmm8,xmm13,5 + vpaddd xmm12,xmm12,xmm15 + vpxor xmm6,xmm11,xmm14 + vmovdqa XMMWORD[(48-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpxor xmm3,xmm3,XMMWORD[((192-128))+rax] + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm6,xmm10 + vpxor xmm3,xmm3,xmm0 + + vpslld xmm7,xmm14,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm12,xmm12,xmm6 + vpsrld xmm5,xmm3,31 + vpaddd xmm3,xmm3,xmm3 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpor xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpxor xmm4,xmm4,xmm1 + vmovdqa xmm1,XMMWORD[((112-128))+rax] + + vpslld xmm8,xmm12,5 + vpaddd xmm11,xmm11,xmm15 + vpxor xmm6,xmm10,xmm13 + vmovdqa XMMWORD[(64-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpxor xmm4,xmm4,XMMWORD[((208-128))+rax] + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm6,xmm14 + vpxor xmm4,xmm4,xmm1 + + vpslld xmm7,xmm13,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm11,xmm11,xmm6 + vpsrld xmm5,xmm4,31 + vpaddd xmm4,xmm4,xmm4 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpor xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpxor xmm0,xmm0,xmm2 + vmovdqa xmm2,XMMWORD[((128-128))+rax] + + vpslld xmm8,xmm11,5 + vpaddd xmm10,xmm10,xmm15 + vpxor xmm6,xmm14,xmm12 + vmovdqa XMMWORD[(80-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpxor xmm0,xmm0,XMMWORD[((224-128))+rax] + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm6,xmm13 + vpxor xmm0,xmm0,xmm2 + + vpslld xmm7,xmm12,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm10,xmm10,xmm6 + vpsrld xmm5,xmm0,31 + vpaddd xmm0,xmm0,xmm0 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpor xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vpxor xmm1,xmm1,xmm3 + vmovdqa xmm3,XMMWORD[((144-128))+rax] + + vpslld xmm8,xmm10,5 + vpaddd xmm14,xmm14,xmm15 + vpxor xmm6,xmm13,xmm11 + vmovdqa XMMWORD[(96-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpxor xmm1,xmm1,XMMWORD[((240-128))+rax] + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm6,xmm12 + vpxor xmm1,xmm1,xmm3 + + vpslld xmm7,xmm11,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm14,xmm14,xmm6 + vpsrld xmm5,xmm1,31 + vpaddd xmm1,xmm1,xmm1 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpor xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpxor xmm2,xmm2,xmm4 + vmovdqa xmm4,XMMWORD[((160-128))+rax] + + vpslld xmm8,xmm14,5 + vpaddd xmm13,xmm13,xmm15 + vpxor xmm6,xmm12,xmm10 + vmovdqa XMMWORD[(112-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpxor xmm2,xmm2,XMMWORD[((0-128))+rax] + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm6,xmm11 + vpxor xmm2,xmm2,xmm4 + + vpslld xmm7,xmm10,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm13,xmm13,xmm6 + vpsrld xmm5,xmm2,31 + vpaddd xmm2,xmm2,xmm2 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpor xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpxor xmm3,xmm3,xmm0 + vmovdqa xmm0,XMMWORD[((176-128))+rax] + + vpslld xmm8,xmm13,5 + vpaddd xmm12,xmm12,xmm15 + vpxor xmm6,xmm11,xmm14 + vpaddd xmm12,xmm12,xmm2 + vpxor xmm3,xmm3,XMMWORD[((16-128))+rax] + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm6,xmm10 + vpxor xmm3,xmm3,xmm0 + + vpslld xmm7,xmm14,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm12,xmm12,xmm6 + vpsrld xmm5,xmm3,31 + vpaddd xmm3,xmm3,xmm3 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpor xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpxor xmm4,xmm4,xmm1 + vmovdqa xmm1,XMMWORD[((192-128))+rax] + + vpslld xmm8,xmm12,5 + vpaddd xmm11,xmm11,xmm15 + vpxor xmm6,xmm10,xmm13 + vpaddd xmm11,xmm11,xmm3 + vpxor xmm4,xmm4,XMMWORD[((32-128))+rax] + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm6,xmm14 + vpxor xmm4,xmm4,xmm1 + + vpslld xmm7,xmm13,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm11,xmm11,xmm6 + vpsrld xmm5,xmm4,31 + vpaddd xmm4,xmm4,xmm4 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpor xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpxor xmm0,xmm0,xmm2 + vmovdqa xmm2,XMMWORD[((208-128))+rax] + + vpslld xmm8,xmm11,5 + vpaddd xmm10,xmm10,xmm15 + vpxor xmm6,xmm14,xmm12 + vpaddd xmm10,xmm10,xmm4 + vpxor xmm0,xmm0,XMMWORD[((48-128))+rax] + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm6,xmm13 + vpxor xmm0,xmm0,xmm2 + + vpslld xmm7,xmm12,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm10,xmm10,xmm6 + vpsrld xmm5,xmm0,31 + vpaddd xmm0,xmm0,xmm0 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpor xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vpxor xmm1,xmm1,xmm3 + vmovdqa xmm3,XMMWORD[((224-128))+rax] + + vpslld xmm8,xmm10,5 + vpaddd xmm14,xmm14,xmm15 + vpxor xmm6,xmm13,xmm11 + vpaddd xmm14,xmm14,xmm0 + vpxor xmm1,xmm1,XMMWORD[((64-128))+rax] + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm6,xmm12 + vpxor xmm1,xmm1,xmm3 + + vpslld xmm7,xmm11,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm14,xmm14,xmm6 + vpsrld xmm5,xmm1,31 + vpaddd xmm1,xmm1,xmm1 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpor xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpxor xmm2,xmm2,xmm4 + vmovdqa xmm4,XMMWORD[((240-128))+rax] + + vpslld xmm8,xmm14,5 + vpaddd xmm13,xmm13,xmm15 + vpxor xmm6,xmm12,xmm10 + vpaddd xmm13,xmm13,xmm1 + vpxor xmm2,xmm2,XMMWORD[((80-128))+rax] + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm6,xmm11 + vpxor xmm2,xmm2,xmm4 + + vpslld xmm7,xmm10,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm13,xmm13,xmm6 + vpsrld xmm5,xmm2,31 + vpaddd xmm2,xmm2,xmm2 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpor xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpxor xmm3,xmm3,xmm0 + vmovdqa xmm0,XMMWORD[((0-128))+rax] + + vpslld xmm8,xmm13,5 + vpaddd xmm12,xmm12,xmm15 + vpxor xmm6,xmm11,xmm14 + vpaddd xmm12,xmm12,xmm2 + vpxor xmm3,xmm3,XMMWORD[((96-128))+rax] + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm6,xmm10 + vpxor xmm3,xmm3,xmm0 + + vpslld xmm7,xmm14,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm12,xmm12,xmm6 + vpsrld xmm5,xmm3,31 + vpaddd xmm3,xmm3,xmm3 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpor xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpxor xmm4,xmm4,xmm1 + vmovdqa xmm1,XMMWORD[((16-128))+rax] + + vpslld xmm8,xmm12,5 + vpaddd xmm11,xmm11,xmm15 + vpxor xmm6,xmm10,xmm13 + vpaddd xmm11,xmm11,xmm3 + vpxor xmm4,xmm4,XMMWORD[((112-128))+rax] + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm6,xmm14 + vpxor xmm4,xmm4,xmm1 + + vpslld xmm7,xmm13,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm11,xmm11,xmm6 + vpsrld xmm5,xmm4,31 + vpaddd xmm4,xmm4,xmm4 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpor xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpslld xmm8,xmm11,5 + vpaddd xmm10,xmm10,xmm15 + vpxor xmm6,xmm14,xmm12 + + vpsrld xmm9,xmm11,27 + vpaddd xmm10,xmm10,xmm4 + vpxor xmm6,xmm6,xmm13 + + vpslld xmm7,xmm12,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm10,xmm10,xmm6 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpor xmm12,xmm12,xmm7 + mov ecx,1 + cmp ecx,DWORD[rbx] + cmovge r8,rbp + cmp ecx,DWORD[4+rbx] + cmovge r9,rbp + cmp ecx,DWORD[8+rbx] + cmovge r10,rbp + cmp ecx,DWORD[12+rbx] + cmovge r11,rbp + vmovdqu xmm6,XMMWORD[rbx] + vpxor xmm8,xmm8,xmm8 + vmovdqa xmm7,xmm6 + vpcmpgtd xmm7,xmm7,xmm8 + vpaddd xmm6,xmm6,xmm7 + + vpand xmm10,xmm10,xmm7 + vpand xmm11,xmm11,xmm7 + vpaddd xmm10,xmm10,XMMWORD[rdi] + vpand xmm12,xmm12,xmm7 + vpaddd xmm11,xmm11,XMMWORD[32+rdi] + vpand xmm13,xmm13,xmm7 + vpaddd xmm12,xmm12,XMMWORD[64+rdi] + vpand xmm14,xmm14,xmm7 + vpaddd xmm13,xmm13,XMMWORD[96+rdi] + vpaddd xmm14,xmm14,XMMWORD[128+rdi] + vmovdqu XMMWORD[rdi],xmm10 + vmovdqu XMMWORD[32+rdi],xmm11 + vmovdqu XMMWORD[64+rdi],xmm12 + vmovdqu XMMWORD[96+rdi],xmm13 + vmovdqu XMMWORD[128+rdi],xmm14 + + vmovdqu XMMWORD[rbx],xmm6 + vmovdqu xmm5,XMMWORD[96+rbp] + dec edx + jnz NEAR $L$oop_avx + + mov edx,DWORD[280+rsp] + lea rdi,[16+rdi] + lea rsi,[64+rsi] + dec edx + jnz NEAR $L$oop_grande_avx + +$L$done_avx: + mov rax,QWORD[272+rsp] + + vzeroupper + movaps xmm6,XMMWORD[((-184))+rax] + movaps xmm7,XMMWORD[((-168))+rax] + movaps xmm8,XMMWORD[((-152))+rax] + movaps xmm9,XMMWORD[((-136))+rax] + movaps xmm10,XMMWORD[((-120))+rax] + movaps xmm11,XMMWORD[((-104))+rax] + movaps xmm12,XMMWORD[((-88))+rax] + movaps xmm13,XMMWORD[((-72))+rax] + movaps xmm14,XMMWORD[((-56))+rax] + movaps xmm15,XMMWORD[((-40))+rax] + mov rbp,QWORD[((-16))+rax] + + mov rbx,QWORD[((-8))+rax] + + lea rsp,[rax] + +$L$epilogue_avx: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_sha1_multi_block_avx: + +ALIGN 32 +sha1_multi_block_avx2: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_multi_block_avx2: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_avx2_shortcut: + mov rax,rsp + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + lea rsp,[((-168))+rsp] + movaps XMMWORD[rsp],xmm6 + movaps XMMWORD[16+rsp],xmm7 + movaps XMMWORD[32+rsp],xmm8 + movaps XMMWORD[48+rsp],xmm9 + movaps XMMWORD[64+rsp],xmm10 + movaps XMMWORD[80+rsp],xmm11 + movaps XMMWORD[(-120)+rax],xmm12 + movaps XMMWORD[(-104)+rax],xmm13 + movaps XMMWORD[(-88)+rax],xmm14 + movaps XMMWORD[(-72)+rax],xmm15 + sub rsp,576 + and rsp,-256 + mov QWORD[544+rsp],rax + +$L$body_avx2: + lea rbp,[K_XX_XX] + shr edx,1 + + vzeroupper +$L$oop_grande_avx2: + mov DWORD[552+rsp],edx + xor edx,edx + lea rbx,[512+rsp] + mov r12,QWORD[rsi] + mov ecx,DWORD[8+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[rbx],ecx + cmovle r12,rbp + mov r13,QWORD[16+rsi] + mov ecx,DWORD[24+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[4+rbx],ecx + cmovle r13,rbp + mov r14,QWORD[32+rsi] + mov ecx,DWORD[40+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[8+rbx],ecx + cmovle r14,rbp + mov r15,QWORD[48+rsi] + mov ecx,DWORD[56+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[12+rbx],ecx + cmovle r15,rbp + mov r8,QWORD[64+rsi] + mov ecx,DWORD[72+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[16+rbx],ecx + cmovle r8,rbp + mov r9,QWORD[80+rsi] + mov ecx,DWORD[88+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[20+rbx],ecx + cmovle r9,rbp + mov r10,QWORD[96+rsi] + mov ecx,DWORD[104+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[24+rbx],ecx + cmovle r10,rbp + mov r11,QWORD[112+rsi] + mov ecx,DWORD[120+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[28+rbx],ecx + cmovle r11,rbp + vmovdqu ymm0,YMMWORD[rdi] + lea rax,[128+rsp] + vmovdqu ymm1,YMMWORD[32+rdi] + lea rbx,[((256+128))+rsp] + vmovdqu ymm2,YMMWORD[64+rdi] + vmovdqu ymm3,YMMWORD[96+rdi] + vmovdqu ymm4,YMMWORD[128+rdi] + vmovdqu ymm9,YMMWORD[96+rbp] + jmp NEAR $L$oop_avx2 + +ALIGN 32 +$L$oop_avx2: + vmovdqa ymm15,YMMWORD[((-32))+rbp] + vmovd xmm10,DWORD[r12] + lea r12,[64+r12] + vmovd xmm12,DWORD[r8] + lea r8,[64+r8] + vmovd xmm7,DWORD[r13] + lea r13,[64+r13] + vmovd xmm6,DWORD[r9] + lea r9,[64+r9] + vpinsrd xmm10,xmm10,DWORD[r14],1 + lea r14,[64+r14] + vpinsrd xmm12,xmm12,DWORD[r10],1 + lea r10,[64+r10] + vpinsrd xmm7,xmm7,DWORD[r15],1 + lea r15,[64+r15] + vpunpckldq ymm10,ymm10,ymm7 + vpinsrd xmm6,xmm6,DWORD[r11],1 + lea r11,[64+r11] + vpunpckldq ymm12,ymm12,ymm6 + vmovd xmm11,DWORD[((-60))+r12] + vinserti128 ymm10,ymm10,xmm12,1 + vmovd xmm8,DWORD[((-60))+r8] + vpshufb ymm10,ymm10,ymm9 + vmovd xmm7,DWORD[((-60))+r13] + vmovd xmm6,DWORD[((-60))+r9] + vpinsrd xmm11,xmm11,DWORD[((-60))+r14],1 + vpinsrd xmm8,xmm8,DWORD[((-60))+r10],1 + vpinsrd xmm7,xmm7,DWORD[((-60))+r15],1 + vpunpckldq ymm11,ymm11,ymm7 + vpinsrd xmm6,xmm6,DWORD[((-60))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm4,ymm4,ymm15 + vpslld ymm7,ymm0,5 + vpandn ymm6,ymm1,ymm3 + vpand ymm5,ymm1,ymm2 + + vmovdqa YMMWORD[(0-128)+rax],ymm10 + vpaddd ymm4,ymm4,ymm10 + vinserti128 ymm11,ymm11,xmm8,1 + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm12,DWORD[((-56))+r12] + + vpslld ymm6,ymm1,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD[((-56))+r8] + vpaddd ymm4,ymm4,ymm5 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpshufb ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vmovd xmm7,DWORD[((-56))+r13] + vmovd xmm6,DWORD[((-56))+r9] + vpinsrd xmm12,xmm12,DWORD[((-56))+r14],1 + vpinsrd xmm8,xmm8,DWORD[((-56))+r10],1 + vpinsrd xmm7,xmm7,DWORD[((-56))+r15],1 + vpunpckldq ymm12,ymm12,ymm7 + vpinsrd xmm6,xmm6,DWORD[((-56))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm3,ymm3,ymm15 + vpslld ymm7,ymm4,5 + vpandn ymm6,ymm0,ymm2 + vpand ymm5,ymm0,ymm1 + + vmovdqa YMMWORD[(32-128)+rax],ymm11 + vpaddd ymm3,ymm3,ymm11 + vinserti128 ymm12,ymm12,xmm8,1 + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm13,DWORD[((-52))+r12] + + vpslld ymm6,ymm0,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD[((-52))+r8] + vpaddd ymm3,ymm3,ymm5 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpshufb ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vmovd xmm7,DWORD[((-52))+r13] + vmovd xmm6,DWORD[((-52))+r9] + vpinsrd xmm13,xmm13,DWORD[((-52))+r14],1 + vpinsrd xmm8,xmm8,DWORD[((-52))+r10],1 + vpinsrd xmm7,xmm7,DWORD[((-52))+r15],1 + vpunpckldq ymm13,ymm13,ymm7 + vpinsrd xmm6,xmm6,DWORD[((-52))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm2,ymm2,ymm15 + vpslld ymm7,ymm3,5 + vpandn ymm6,ymm4,ymm1 + vpand ymm5,ymm4,ymm0 + + vmovdqa YMMWORD[(64-128)+rax],ymm12 + vpaddd ymm2,ymm2,ymm12 + vinserti128 ymm13,ymm13,xmm8,1 + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm14,DWORD[((-48))+r12] + + vpslld ymm6,ymm4,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD[((-48))+r8] + vpaddd ymm2,ymm2,ymm5 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpshufb ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vmovd xmm7,DWORD[((-48))+r13] + vmovd xmm6,DWORD[((-48))+r9] + vpinsrd xmm14,xmm14,DWORD[((-48))+r14],1 + vpinsrd xmm8,xmm8,DWORD[((-48))+r10],1 + vpinsrd xmm7,xmm7,DWORD[((-48))+r15],1 + vpunpckldq ymm14,ymm14,ymm7 + vpinsrd xmm6,xmm6,DWORD[((-48))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm1,ymm1,ymm15 + vpslld ymm7,ymm2,5 + vpandn ymm6,ymm3,ymm0 + vpand ymm5,ymm3,ymm4 + + vmovdqa YMMWORD[(96-128)+rax],ymm13 + vpaddd ymm1,ymm1,ymm13 + vinserti128 ymm14,ymm14,xmm8,1 + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm10,DWORD[((-44))+r12] + + vpslld ymm6,ymm3,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD[((-44))+r8] + vpaddd ymm1,ymm1,ymm5 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpshufb ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vmovd xmm7,DWORD[((-44))+r13] + vmovd xmm6,DWORD[((-44))+r9] + vpinsrd xmm10,xmm10,DWORD[((-44))+r14],1 + vpinsrd xmm8,xmm8,DWORD[((-44))+r10],1 + vpinsrd xmm7,xmm7,DWORD[((-44))+r15],1 + vpunpckldq ymm10,ymm10,ymm7 + vpinsrd xmm6,xmm6,DWORD[((-44))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm0,ymm0,ymm15 + vpslld ymm7,ymm1,5 + vpandn ymm6,ymm2,ymm4 + vpand ymm5,ymm2,ymm3 + + vmovdqa YMMWORD[(128-128)+rax],ymm14 + vpaddd ymm0,ymm0,ymm14 + vinserti128 ymm10,ymm10,xmm8,1 + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm11,DWORD[((-40))+r12] + + vpslld ymm6,ymm2,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD[((-40))+r8] + vpaddd ymm0,ymm0,ymm5 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpshufb ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vmovd xmm7,DWORD[((-40))+r13] + vmovd xmm6,DWORD[((-40))+r9] + vpinsrd xmm11,xmm11,DWORD[((-40))+r14],1 + vpinsrd xmm8,xmm8,DWORD[((-40))+r10],1 + vpinsrd xmm7,xmm7,DWORD[((-40))+r15],1 + vpunpckldq ymm11,ymm11,ymm7 + vpinsrd xmm6,xmm6,DWORD[((-40))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm4,ymm4,ymm15 + vpslld ymm7,ymm0,5 + vpandn ymm6,ymm1,ymm3 + vpand ymm5,ymm1,ymm2 + + vmovdqa YMMWORD[(160-128)+rax],ymm10 + vpaddd ymm4,ymm4,ymm10 + vinserti128 ymm11,ymm11,xmm8,1 + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm12,DWORD[((-36))+r12] + + vpslld ymm6,ymm1,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD[((-36))+r8] + vpaddd ymm4,ymm4,ymm5 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpshufb ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vmovd xmm7,DWORD[((-36))+r13] + vmovd xmm6,DWORD[((-36))+r9] + vpinsrd xmm12,xmm12,DWORD[((-36))+r14],1 + vpinsrd xmm8,xmm8,DWORD[((-36))+r10],1 + vpinsrd xmm7,xmm7,DWORD[((-36))+r15],1 + vpunpckldq ymm12,ymm12,ymm7 + vpinsrd xmm6,xmm6,DWORD[((-36))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm3,ymm3,ymm15 + vpslld ymm7,ymm4,5 + vpandn ymm6,ymm0,ymm2 + vpand ymm5,ymm0,ymm1 + + vmovdqa YMMWORD[(192-128)+rax],ymm11 + vpaddd ymm3,ymm3,ymm11 + vinserti128 ymm12,ymm12,xmm8,1 + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm13,DWORD[((-32))+r12] + + vpslld ymm6,ymm0,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD[((-32))+r8] + vpaddd ymm3,ymm3,ymm5 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpshufb ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vmovd xmm7,DWORD[((-32))+r13] + vmovd xmm6,DWORD[((-32))+r9] + vpinsrd xmm13,xmm13,DWORD[((-32))+r14],1 + vpinsrd xmm8,xmm8,DWORD[((-32))+r10],1 + vpinsrd xmm7,xmm7,DWORD[((-32))+r15],1 + vpunpckldq ymm13,ymm13,ymm7 + vpinsrd xmm6,xmm6,DWORD[((-32))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm2,ymm2,ymm15 + vpslld ymm7,ymm3,5 + vpandn ymm6,ymm4,ymm1 + vpand ymm5,ymm4,ymm0 + + vmovdqa YMMWORD[(224-128)+rax],ymm12 + vpaddd ymm2,ymm2,ymm12 + vinserti128 ymm13,ymm13,xmm8,1 + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm14,DWORD[((-28))+r12] + + vpslld ymm6,ymm4,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD[((-28))+r8] + vpaddd ymm2,ymm2,ymm5 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpshufb ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vmovd xmm7,DWORD[((-28))+r13] + vmovd xmm6,DWORD[((-28))+r9] + vpinsrd xmm14,xmm14,DWORD[((-28))+r14],1 + vpinsrd xmm8,xmm8,DWORD[((-28))+r10],1 + vpinsrd xmm7,xmm7,DWORD[((-28))+r15],1 + vpunpckldq ymm14,ymm14,ymm7 + vpinsrd xmm6,xmm6,DWORD[((-28))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm1,ymm1,ymm15 + vpslld ymm7,ymm2,5 + vpandn ymm6,ymm3,ymm0 + vpand ymm5,ymm3,ymm4 + + vmovdqa YMMWORD[(256-256-128)+rbx],ymm13 + vpaddd ymm1,ymm1,ymm13 + vinserti128 ymm14,ymm14,xmm8,1 + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm10,DWORD[((-24))+r12] + + vpslld ymm6,ymm3,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD[((-24))+r8] + vpaddd ymm1,ymm1,ymm5 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpshufb ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vmovd xmm7,DWORD[((-24))+r13] + vmovd xmm6,DWORD[((-24))+r9] + vpinsrd xmm10,xmm10,DWORD[((-24))+r14],1 + vpinsrd xmm8,xmm8,DWORD[((-24))+r10],1 + vpinsrd xmm7,xmm7,DWORD[((-24))+r15],1 + vpunpckldq ymm10,ymm10,ymm7 + vpinsrd xmm6,xmm6,DWORD[((-24))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm0,ymm0,ymm15 + vpslld ymm7,ymm1,5 + vpandn ymm6,ymm2,ymm4 + vpand ymm5,ymm2,ymm3 + + vmovdqa YMMWORD[(288-256-128)+rbx],ymm14 + vpaddd ymm0,ymm0,ymm14 + vinserti128 ymm10,ymm10,xmm8,1 + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm11,DWORD[((-20))+r12] + + vpslld ymm6,ymm2,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD[((-20))+r8] + vpaddd ymm0,ymm0,ymm5 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpshufb ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vmovd xmm7,DWORD[((-20))+r13] + vmovd xmm6,DWORD[((-20))+r9] + vpinsrd xmm11,xmm11,DWORD[((-20))+r14],1 + vpinsrd xmm8,xmm8,DWORD[((-20))+r10],1 + vpinsrd xmm7,xmm7,DWORD[((-20))+r15],1 + vpunpckldq ymm11,ymm11,ymm7 + vpinsrd xmm6,xmm6,DWORD[((-20))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm4,ymm4,ymm15 + vpslld ymm7,ymm0,5 + vpandn ymm6,ymm1,ymm3 + vpand ymm5,ymm1,ymm2 + + vmovdqa YMMWORD[(320-256-128)+rbx],ymm10 + vpaddd ymm4,ymm4,ymm10 + vinserti128 ymm11,ymm11,xmm8,1 + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm12,DWORD[((-16))+r12] + + vpslld ymm6,ymm1,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD[((-16))+r8] + vpaddd ymm4,ymm4,ymm5 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpshufb ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vmovd xmm7,DWORD[((-16))+r13] + vmovd xmm6,DWORD[((-16))+r9] + vpinsrd xmm12,xmm12,DWORD[((-16))+r14],1 + vpinsrd xmm8,xmm8,DWORD[((-16))+r10],1 + vpinsrd xmm7,xmm7,DWORD[((-16))+r15],1 + vpunpckldq ymm12,ymm12,ymm7 + vpinsrd xmm6,xmm6,DWORD[((-16))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm3,ymm3,ymm15 + vpslld ymm7,ymm4,5 + vpandn ymm6,ymm0,ymm2 + vpand ymm5,ymm0,ymm1 + + vmovdqa YMMWORD[(352-256-128)+rbx],ymm11 + vpaddd ymm3,ymm3,ymm11 + vinserti128 ymm12,ymm12,xmm8,1 + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm13,DWORD[((-12))+r12] + + vpslld ymm6,ymm0,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD[((-12))+r8] + vpaddd ymm3,ymm3,ymm5 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpshufb ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vmovd xmm7,DWORD[((-12))+r13] + vmovd xmm6,DWORD[((-12))+r9] + vpinsrd xmm13,xmm13,DWORD[((-12))+r14],1 + vpinsrd xmm8,xmm8,DWORD[((-12))+r10],1 + vpinsrd xmm7,xmm7,DWORD[((-12))+r15],1 + vpunpckldq ymm13,ymm13,ymm7 + vpinsrd xmm6,xmm6,DWORD[((-12))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm2,ymm2,ymm15 + vpslld ymm7,ymm3,5 + vpandn ymm6,ymm4,ymm1 + vpand ymm5,ymm4,ymm0 + + vmovdqa YMMWORD[(384-256-128)+rbx],ymm12 + vpaddd ymm2,ymm2,ymm12 + vinserti128 ymm13,ymm13,xmm8,1 + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm14,DWORD[((-8))+r12] + + vpslld ymm6,ymm4,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD[((-8))+r8] + vpaddd ymm2,ymm2,ymm5 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpshufb ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vmovd xmm7,DWORD[((-8))+r13] + vmovd xmm6,DWORD[((-8))+r9] + vpinsrd xmm14,xmm14,DWORD[((-8))+r14],1 + vpinsrd xmm8,xmm8,DWORD[((-8))+r10],1 + vpinsrd xmm7,xmm7,DWORD[((-8))+r15],1 + vpunpckldq ymm14,ymm14,ymm7 + vpinsrd xmm6,xmm6,DWORD[((-8))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm1,ymm1,ymm15 + vpslld ymm7,ymm2,5 + vpandn ymm6,ymm3,ymm0 + vpand ymm5,ymm3,ymm4 + + vmovdqa YMMWORD[(416-256-128)+rbx],ymm13 + vpaddd ymm1,ymm1,ymm13 + vinserti128 ymm14,ymm14,xmm8,1 + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm10,DWORD[((-4))+r12] + + vpslld ymm6,ymm3,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD[((-4))+r8] + vpaddd ymm1,ymm1,ymm5 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpshufb ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vmovdqa ymm11,YMMWORD[((0-128))+rax] + vmovd xmm7,DWORD[((-4))+r13] + vmovd xmm6,DWORD[((-4))+r9] + vpinsrd xmm10,xmm10,DWORD[((-4))+r14],1 + vpinsrd xmm8,xmm8,DWORD[((-4))+r10],1 + vpinsrd xmm7,xmm7,DWORD[((-4))+r15],1 + vpunpckldq ymm10,ymm10,ymm7 + vpinsrd xmm6,xmm6,DWORD[((-4))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm0,ymm0,ymm15 + prefetcht0 [63+r12] + vpslld ymm7,ymm1,5 + vpandn ymm6,ymm2,ymm4 + vpand ymm5,ymm2,ymm3 + + vmovdqa YMMWORD[(448-256-128)+rbx],ymm14 + vpaddd ymm0,ymm0,ymm14 + vinserti128 ymm10,ymm10,xmm8,1 + vpsrld ymm8,ymm1,27 + prefetcht0 [63+r13] + vpxor ymm5,ymm5,ymm6 + + vpslld ymm6,ymm2,30 + vpor ymm7,ymm7,ymm8 + prefetcht0 [63+r14] + vpaddd ymm0,ymm0,ymm5 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + prefetcht0 [63+r15] + vpshufb ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vmovdqa ymm12,YMMWORD[((32-128))+rax] + vpxor ymm11,ymm11,ymm13 + vmovdqa ymm13,YMMWORD[((64-128))+rax] + + vpaddd ymm4,ymm4,ymm15 + vpslld ymm7,ymm0,5 + vpandn ymm6,ymm1,ymm3 + prefetcht0 [63+r8] + vpand ymm5,ymm1,ymm2 + + vmovdqa YMMWORD[(480-256-128)+rbx],ymm10 + vpaddd ymm4,ymm4,ymm10 + vpxor ymm11,ymm11,YMMWORD[((256-256-128))+rbx] + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm5,ymm6 + vpxor ymm11,ymm11,ymm13 + prefetcht0 [63+r9] + + vpslld ymm6,ymm1,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm4,ymm4,ymm5 + prefetcht0 [63+r10] + vpsrld ymm9,ymm11,31 + vpaddd ymm11,ymm11,ymm11 + + vpsrld ymm1,ymm1,2 + prefetcht0 [63+r11] + vpaddd ymm4,ymm4,ymm7 + vpor ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vpxor ymm12,ymm12,ymm14 + vmovdqa ymm14,YMMWORD[((96-128))+rax] + + vpaddd ymm3,ymm3,ymm15 + vpslld ymm7,ymm4,5 + vpandn ymm6,ymm0,ymm2 + + vpand ymm5,ymm0,ymm1 + + vmovdqa YMMWORD[(0-128)+rax],ymm11 + vpaddd ymm3,ymm3,ymm11 + vpxor ymm12,ymm12,YMMWORD[((288-256-128))+rbx] + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm5,ymm6 + vpxor ymm12,ymm12,ymm14 + + + vpslld ymm6,ymm0,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm3,ymm3,ymm5 + + vpsrld ymm9,ymm12,31 + vpaddd ymm12,ymm12,ymm12 + + vpsrld ymm0,ymm0,2 + + vpaddd ymm3,ymm3,ymm7 + vpor ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vpxor ymm13,ymm13,ymm10 + vmovdqa ymm10,YMMWORD[((128-128))+rax] + + vpaddd ymm2,ymm2,ymm15 + vpslld ymm7,ymm3,5 + vpandn ymm6,ymm4,ymm1 + + vpand ymm5,ymm4,ymm0 + + vmovdqa YMMWORD[(32-128)+rax],ymm12 + vpaddd ymm2,ymm2,ymm12 + vpxor ymm13,ymm13,YMMWORD[((320-256-128))+rbx] + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm5,ymm6 + vpxor ymm13,ymm13,ymm10 + + + vpslld ymm6,ymm4,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm2,ymm2,ymm5 + + vpsrld ymm9,ymm13,31 + vpaddd ymm13,ymm13,ymm13 + + vpsrld ymm4,ymm4,2 + + vpaddd ymm2,ymm2,ymm7 + vpor ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vpxor ymm14,ymm14,ymm11 + vmovdqa ymm11,YMMWORD[((160-128))+rax] + + vpaddd ymm1,ymm1,ymm15 + vpslld ymm7,ymm2,5 + vpandn ymm6,ymm3,ymm0 + + vpand ymm5,ymm3,ymm4 + + vmovdqa YMMWORD[(64-128)+rax],ymm13 + vpaddd ymm1,ymm1,ymm13 + vpxor ymm14,ymm14,YMMWORD[((352-256-128))+rbx] + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm5,ymm6 + vpxor ymm14,ymm14,ymm11 + + + vpslld ymm6,ymm3,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm1,ymm1,ymm5 + + vpsrld ymm9,ymm14,31 + vpaddd ymm14,ymm14,ymm14 + + vpsrld ymm3,ymm3,2 + + vpaddd ymm1,ymm1,ymm7 + vpor ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vpxor ymm10,ymm10,ymm12 + vmovdqa ymm12,YMMWORD[((192-128))+rax] + + vpaddd ymm0,ymm0,ymm15 + vpslld ymm7,ymm1,5 + vpandn ymm6,ymm2,ymm4 + + vpand ymm5,ymm2,ymm3 + + vmovdqa YMMWORD[(96-128)+rax],ymm14 + vpaddd ymm0,ymm0,ymm14 + vpxor ymm10,ymm10,YMMWORD[((384-256-128))+rbx] + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm5,ymm6 + vpxor ymm10,ymm10,ymm12 + + + vpslld ymm6,ymm2,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm0,ymm0,ymm5 + + vpsrld ymm9,ymm10,31 + vpaddd ymm10,ymm10,ymm10 + + vpsrld ymm2,ymm2,2 + + vpaddd ymm0,ymm0,ymm7 + vpor ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vmovdqa ymm15,YMMWORD[rbp] + vpxor ymm11,ymm11,ymm13 + vmovdqa ymm13,YMMWORD[((224-128))+rax] + + vpslld ymm7,ymm0,5 + vpaddd ymm4,ymm4,ymm15 + vpxor ymm5,ymm3,ymm1 + vmovdqa YMMWORD[(128-128)+rax],ymm10 + vpaddd ymm4,ymm4,ymm10 + vpxor ymm11,ymm11,YMMWORD[((416-256-128))+rbx] + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm5,ymm2 + vpxor ymm11,ymm11,ymm13 + + vpslld ymm6,ymm1,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm4,ymm4,ymm5 + vpsrld ymm9,ymm11,31 + vpaddd ymm11,ymm11,ymm11 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpor ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vpxor ymm12,ymm12,ymm14 + vmovdqa ymm14,YMMWORD[((256-256-128))+rbx] + + vpslld ymm7,ymm4,5 + vpaddd ymm3,ymm3,ymm15 + vpxor ymm5,ymm2,ymm0 + vmovdqa YMMWORD[(160-128)+rax],ymm11 + vpaddd ymm3,ymm3,ymm11 + vpxor ymm12,ymm12,YMMWORD[((448-256-128))+rbx] + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm5,ymm1 + vpxor ymm12,ymm12,ymm14 + + vpslld ymm6,ymm0,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm3,ymm3,ymm5 + vpsrld ymm9,ymm12,31 + vpaddd ymm12,ymm12,ymm12 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpor ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vpxor ymm13,ymm13,ymm10 + vmovdqa ymm10,YMMWORD[((288-256-128))+rbx] + + vpslld ymm7,ymm3,5 + vpaddd ymm2,ymm2,ymm15 + vpxor ymm5,ymm1,ymm4 + vmovdqa YMMWORD[(192-128)+rax],ymm12 + vpaddd ymm2,ymm2,ymm12 + vpxor ymm13,ymm13,YMMWORD[((480-256-128))+rbx] + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm5,ymm0 + vpxor ymm13,ymm13,ymm10 + + vpslld ymm6,ymm4,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm2,ymm2,ymm5 + vpsrld ymm9,ymm13,31 + vpaddd ymm13,ymm13,ymm13 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpor ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vpxor ymm14,ymm14,ymm11 + vmovdqa ymm11,YMMWORD[((320-256-128))+rbx] + + vpslld ymm7,ymm2,5 + vpaddd ymm1,ymm1,ymm15 + vpxor ymm5,ymm0,ymm3 + vmovdqa YMMWORD[(224-128)+rax],ymm13 + vpaddd ymm1,ymm1,ymm13 + vpxor ymm14,ymm14,YMMWORD[((0-128))+rax] + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm5,ymm4 + vpxor ymm14,ymm14,ymm11 + + vpslld ymm6,ymm3,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm1,ymm1,ymm5 + vpsrld ymm9,ymm14,31 + vpaddd ymm14,ymm14,ymm14 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpor ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vpxor ymm10,ymm10,ymm12 + vmovdqa ymm12,YMMWORD[((352-256-128))+rbx] + + vpslld ymm7,ymm1,5 + vpaddd ymm0,ymm0,ymm15 + vpxor ymm5,ymm4,ymm2 + vmovdqa YMMWORD[(256-256-128)+rbx],ymm14 + vpaddd ymm0,ymm0,ymm14 + vpxor ymm10,ymm10,YMMWORD[((32-128))+rax] + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm5,ymm3 + vpxor ymm10,ymm10,ymm12 + + vpslld ymm6,ymm2,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm0,ymm0,ymm5 + vpsrld ymm9,ymm10,31 + vpaddd ymm10,ymm10,ymm10 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpor ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vpxor ymm11,ymm11,ymm13 + vmovdqa ymm13,YMMWORD[((384-256-128))+rbx] + + vpslld ymm7,ymm0,5 + vpaddd ymm4,ymm4,ymm15 + vpxor ymm5,ymm3,ymm1 + vmovdqa YMMWORD[(288-256-128)+rbx],ymm10 + vpaddd ymm4,ymm4,ymm10 + vpxor ymm11,ymm11,YMMWORD[((64-128))+rax] + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm5,ymm2 + vpxor ymm11,ymm11,ymm13 + + vpslld ymm6,ymm1,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm4,ymm4,ymm5 + vpsrld ymm9,ymm11,31 + vpaddd ymm11,ymm11,ymm11 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpor ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vpxor ymm12,ymm12,ymm14 + vmovdqa ymm14,YMMWORD[((416-256-128))+rbx] + + vpslld ymm7,ymm4,5 + vpaddd ymm3,ymm3,ymm15 + vpxor ymm5,ymm2,ymm0 + vmovdqa YMMWORD[(320-256-128)+rbx],ymm11 + vpaddd ymm3,ymm3,ymm11 + vpxor ymm12,ymm12,YMMWORD[((96-128))+rax] + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm5,ymm1 + vpxor ymm12,ymm12,ymm14 + + vpslld ymm6,ymm0,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm3,ymm3,ymm5 + vpsrld ymm9,ymm12,31 + vpaddd ymm12,ymm12,ymm12 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpor ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vpxor ymm13,ymm13,ymm10 + vmovdqa ymm10,YMMWORD[((448-256-128))+rbx] + + vpslld ymm7,ymm3,5 + vpaddd ymm2,ymm2,ymm15 + vpxor ymm5,ymm1,ymm4 + vmovdqa YMMWORD[(352-256-128)+rbx],ymm12 + vpaddd ymm2,ymm2,ymm12 + vpxor ymm13,ymm13,YMMWORD[((128-128))+rax] + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm5,ymm0 + vpxor ymm13,ymm13,ymm10 + + vpslld ymm6,ymm4,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm2,ymm2,ymm5 + vpsrld ymm9,ymm13,31 + vpaddd ymm13,ymm13,ymm13 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpor ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vpxor ymm14,ymm14,ymm11 + vmovdqa ymm11,YMMWORD[((480-256-128))+rbx] + + vpslld ymm7,ymm2,5 + vpaddd ymm1,ymm1,ymm15 + vpxor ymm5,ymm0,ymm3 + vmovdqa YMMWORD[(384-256-128)+rbx],ymm13 + vpaddd ymm1,ymm1,ymm13 + vpxor ymm14,ymm14,YMMWORD[((160-128))+rax] + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm5,ymm4 + vpxor ymm14,ymm14,ymm11 + + vpslld ymm6,ymm3,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm1,ymm1,ymm5 + vpsrld ymm9,ymm14,31 + vpaddd ymm14,ymm14,ymm14 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpor ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vpxor ymm10,ymm10,ymm12 + vmovdqa ymm12,YMMWORD[((0-128))+rax] + + vpslld ymm7,ymm1,5 + vpaddd ymm0,ymm0,ymm15 + vpxor ymm5,ymm4,ymm2 + vmovdqa YMMWORD[(416-256-128)+rbx],ymm14 + vpaddd ymm0,ymm0,ymm14 + vpxor ymm10,ymm10,YMMWORD[((192-128))+rax] + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm5,ymm3 + vpxor ymm10,ymm10,ymm12 + + vpslld ymm6,ymm2,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm0,ymm0,ymm5 + vpsrld ymm9,ymm10,31 + vpaddd ymm10,ymm10,ymm10 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpor ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vpxor ymm11,ymm11,ymm13 + vmovdqa ymm13,YMMWORD[((32-128))+rax] + + vpslld ymm7,ymm0,5 + vpaddd ymm4,ymm4,ymm15 + vpxor ymm5,ymm3,ymm1 + vmovdqa YMMWORD[(448-256-128)+rbx],ymm10 + vpaddd ymm4,ymm4,ymm10 + vpxor ymm11,ymm11,YMMWORD[((224-128))+rax] + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm5,ymm2 + vpxor ymm11,ymm11,ymm13 + + vpslld ymm6,ymm1,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm4,ymm4,ymm5 + vpsrld ymm9,ymm11,31 + vpaddd ymm11,ymm11,ymm11 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpor ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vpxor ymm12,ymm12,ymm14 + vmovdqa ymm14,YMMWORD[((64-128))+rax] + + vpslld ymm7,ymm4,5 + vpaddd ymm3,ymm3,ymm15 + vpxor ymm5,ymm2,ymm0 + vmovdqa YMMWORD[(480-256-128)+rbx],ymm11 + vpaddd ymm3,ymm3,ymm11 + vpxor ymm12,ymm12,YMMWORD[((256-256-128))+rbx] + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm5,ymm1 + vpxor ymm12,ymm12,ymm14 + + vpslld ymm6,ymm0,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm3,ymm3,ymm5 + vpsrld ymm9,ymm12,31 + vpaddd ymm12,ymm12,ymm12 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpor ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vpxor ymm13,ymm13,ymm10 + vmovdqa ymm10,YMMWORD[((96-128))+rax] + + vpslld ymm7,ymm3,5 + vpaddd ymm2,ymm2,ymm15 + vpxor ymm5,ymm1,ymm4 + vmovdqa YMMWORD[(0-128)+rax],ymm12 + vpaddd ymm2,ymm2,ymm12 + vpxor ymm13,ymm13,YMMWORD[((288-256-128))+rbx] + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm5,ymm0 + vpxor ymm13,ymm13,ymm10 + + vpslld ymm6,ymm4,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm2,ymm2,ymm5 + vpsrld ymm9,ymm13,31 + vpaddd ymm13,ymm13,ymm13 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpor ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vpxor ymm14,ymm14,ymm11 + vmovdqa ymm11,YMMWORD[((128-128))+rax] + + vpslld ymm7,ymm2,5 + vpaddd ymm1,ymm1,ymm15 + vpxor ymm5,ymm0,ymm3 + vmovdqa YMMWORD[(32-128)+rax],ymm13 + vpaddd ymm1,ymm1,ymm13 + vpxor ymm14,ymm14,YMMWORD[((320-256-128))+rbx] + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm5,ymm4 + vpxor ymm14,ymm14,ymm11 + + vpslld ymm6,ymm3,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm1,ymm1,ymm5 + vpsrld ymm9,ymm14,31 + vpaddd ymm14,ymm14,ymm14 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpor ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vpxor ymm10,ymm10,ymm12 + vmovdqa ymm12,YMMWORD[((160-128))+rax] + + vpslld ymm7,ymm1,5 + vpaddd ymm0,ymm0,ymm15 + vpxor ymm5,ymm4,ymm2 + vmovdqa YMMWORD[(64-128)+rax],ymm14 + vpaddd ymm0,ymm0,ymm14 + vpxor ymm10,ymm10,YMMWORD[((352-256-128))+rbx] + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm5,ymm3 + vpxor ymm10,ymm10,ymm12 + + vpslld ymm6,ymm2,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm0,ymm0,ymm5 + vpsrld ymm9,ymm10,31 + vpaddd ymm10,ymm10,ymm10 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpor ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vpxor ymm11,ymm11,ymm13 + vmovdqa ymm13,YMMWORD[((192-128))+rax] + + vpslld ymm7,ymm0,5 + vpaddd ymm4,ymm4,ymm15 + vpxor ymm5,ymm3,ymm1 + vmovdqa YMMWORD[(96-128)+rax],ymm10 + vpaddd ymm4,ymm4,ymm10 + vpxor ymm11,ymm11,YMMWORD[((384-256-128))+rbx] + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm5,ymm2 + vpxor ymm11,ymm11,ymm13 + + vpslld ymm6,ymm1,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm4,ymm4,ymm5 + vpsrld ymm9,ymm11,31 + vpaddd ymm11,ymm11,ymm11 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpor ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vpxor ymm12,ymm12,ymm14 + vmovdqa ymm14,YMMWORD[((224-128))+rax] + + vpslld ymm7,ymm4,5 + vpaddd ymm3,ymm3,ymm15 + vpxor ymm5,ymm2,ymm0 + vmovdqa YMMWORD[(128-128)+rax],ymm11 + vpaddd ymm3,ymm3,ymm11 + vpxor ymm12,ymm12,YMMWORD[((416-256-128))+rbx] + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm5,ymm1 + vpxor ymm12,ymm12,ymm14 + + vpslld ymm6,ymm0,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm3,ymm3,ymm5 + vpsrld ymm9,ymm12,31 + vpaddd ymm12,ymm12,ymm12 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpor ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vpxor ymm13,ymm13,ymm10 + vmovdqa ymm10,YMMWORD[((256-256-128))+rbx] + + vpslld ymm7,ymm3,5 + vpaddd ymm2,ymm2,ymm15 + vpxor ymm5,ymm1,ymm4 + vmovdqa YMMWORD[(160-128)+rax],ymm12 + vpaddd ymm2,ymm2,ymm12 + vpxor ymm13,ymm13,YMMWORD[((448-256-128))+rbx] + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm5,ymm0 + vpxor ymm13,ymm13,ymm10 + + vpslld ymm6,ymm4,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm2,ymm2,ymm5 + vpsrld ymm9,ymm13,31 + vpaddd ymm13,ymm13,ymm13 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpor ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vpxor ymm14,ymm14,ymm11 + vmovdqa ymm11,YMMWORD[((288-256-128))+rbx] + + vpslld ymm7,ymm2,5 + vpaddd ymm1,ymm1,ymm15 + vpxor ymm5,ymm0,ymm3 + vmovdqa YMMWORD[(192-128)+rax],ymm13 + vpaddd ymm1,ymm1,ymm13 + vpxor ymm14,ymm14,YMMWORD[((480-256-128))+rbx] + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm5,ymm4 + vpxor ymm14,ymm14,ymm11 + + vpslld ymm6,ymm3,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm1,ymm1,ymm5 + vpsrld ymm9,ymm14,31 + vpaddd ymm14,ymm14,ymm14 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpor ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vpxor ymm10,ymm10,ymm12 + vmovdqa ymm12,YMMWORD[((320-256-128))+rbx] + + vpslld ymm7,ymm1,5 + vpaddd ymm0,ymm0,ymm15 + vpxor ymm5,ymm4,ymm2 + vmovdqa YMMWORD[(224-128)+rax],ymm14 + vpaddd ymm0,ymm0,ymm14 + vpxor ymm10,ymm10,YMMWORD[((0-128))+rax] + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm5,ymm3 + vpxor ymm10,ymm10,ymm12 + + vpslld ymm6,ymm2,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm0,ymm0,ymm5 + vpsrld ymm9,ymm10,31 + vpaddd ymm10,ymm10,ymm10 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpor ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vmovdqa ymm15,YMMWORD[32+rbp] + vpxor ymm11,ymm11,ymm13 + vmovdqa ymm13,YMMWORD[((352-256-128))+rbx] + + vpaddd ymm4,ymm4,ymm15 + vpslld ymm7,ymm0,5 + vpand ymm6,ymm3,ymm2 + vpxor ymm11,ymm11,YMMWORD[((32-128))+rax] + + vpaddd ymm4,ymm4,ymm6 + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm3,ymm2 + vpxor ymm11,ymm11,ymm13 + + vmovdqu YMMWORD[(256-256-128)+rbx],ymm10 + vpaddd ymm4,ymm4,ymm10 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm11,31 + vpand ymm5,ymm5,ymm1 + vpaddd ymm11,ymm11,ymm11 + + vpslld ymm6,ymm1,30 + vpaddd ymm4,ymm4,ymm5 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpor ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vpxor ymm12,ymm12,ymm14 + vmovdqa ymm14,YMMWORD[((384-256-128))+rbx] + + vpaddd ymm3,ymm3,ymm15 + vpslld ymm7,ymm4,5 + vpand ymm6,ymm2,ymm1 + vpxor ymm12,ymm12,YMMWORD[((64-128))+rax] + + vpaddd ymm3,ymm3,ymm6 + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm2,ymm1 + vpxor ymm12,ymm12,ymm14 + + vmovdqu YMMWORD[(288-256-128)+rbx],ymm11 + vpaddd ymm3,ymm3,ymm11 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm12,31 + vpand ymm5,ymm5,ymm0 + vpaddd ymm12,ymm12,ymm12 + + vpslld ymm6,ymm0,30 + vpaddd ymm3,ymm3,ymm5 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpor ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vpxor ymm13,ymm13,ymm10 + vmovdqa ymm10,YMMWORD[((416-256-128))+rbx] + + vpaddd ymm2,ymm2,ymm15 + vpslld ymm7,ymm3,5 + vpand ymm6,ymm1,ymm0 + vpxor ymm13,ymm13,YMMWORD[((96-128))+rax] + + vpaddd ymm2,ymm2,ymm6 + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm1,ymm0 + vpxor ymm13,ymm13,ymm10 + + vmovdqu YMMWORD[(320-256-128)+rbx],ymm12 + vpaddd ymm2,ymm2,ymm12 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm13,31 + vpand ymm5,ymm5,ymm4 + vpaddd ymm13,ymm13,ymm13 + + vpslld ymm6,ymm4,30 + vpaddd ymm2,ymm2,ymm5 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpor ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vpxor ymm14,ymm14,ymm11 + vmovdqa ymm11,YMMWORD[((448-256-128))+rbx] + + vpaddd ymm1,ymm1,ymm15 + vpslld ymm7,ymm2,5 + vpand ymm6,ymm0,ymm4 + vpxor ymm14,ymm14,YMMWORD[((128-128))+rax] + + vpaddd ymm1,ymm1,ymm6 + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm0,ymm4 + vpxor ymm14,ymm14,ymm11 + + vmovdqu YMMWORD[(352-256-128)+rbx],ymm13 + vpaddd ymm1,ymm1,ymm13 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm14,31 + vpand ymm5,ymm5,ymm3 + vpaddd ymm14,ymm14,ymm14 + + vpslld ymm6,ymm3,30 + vpaddd ymm1,ymm1,ymm5 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpor ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vpxor ymm10,ymm10,ymm12 + vmovdqa ymm12,YMMWORD[((480-256-128))+rbx] + + vpaddd ymm0,ymm0,ymm15 + vpslld ymm7,ymm1,5 + vpand ymm6,ymm4,ymm3 + vpxor ymm10,ymm10,YMMWORD[((160-128))+rax] + + vpaddd ymm0,ymm0,ymm6 + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm4,ymm3 + vpxor ymm10,ymm10,ymm12 + + vmovdqu YMMWORD[(384-256-128)+rbx],ymm14 + vpaddd ymm0,ymm0,ymm14 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm10,31 + vpand ymm5,ymm5,ymm2 + vpaddd ymm10,ymm10,ymm10 + + vpslld ymm6,ymm2,30 + vpaddd ymm0,ymm0,ymm5 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpor ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vpxor ymm11,ymm11,ymm13 + vmovdqa ymm13,YMMWORD[((0-128))+rax] + + vpaddd ymm4,ymm4,ymm15 + vpslld ymm7,ymm0,5 + vpand ymm6,ymm3,ymm2 + vpxor ymm11,ymm11,YMMWORD[((192-128))+rax] + + vpaddd ymm4,ymm4,ymm6 + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm3,ymm2 + vpxor ymm11,ymm11,ymm13 + + vmovdqu YMMWORD[(416-256-128)+rbx],ymm10 + vpaddd ymm4,ymm4,ymm10 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm11,31 + vpand ymm5,ymm5,ymm1 + vpaddd ymm11,ymm11,ymm11 + + vpslld ymm6,ymm1,30 + vpaddd ymm4,ymm4,ymm5 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpor ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vpxor ymm12,ymm12,ymm14 + vmovdqa ymm14,YMMWORD[((32-128))+rax] + + vpaddd ymm3,ymm3,ymm15 + vpslld ymm7,ymm4,5 + vpand ymm6,ymm2,ymm1 + vpxor ymm12,ymm12,YMMWORD[((224-128))+rax] + + vpaddd ymm3,ymm3,ymm6 + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm2,ymm1 + vpxor ymm12,ymm12,ymm14 + + vmovdqu YMMWORD[(448-256-128)+rbx],ymm11 + vpaddd ymm3,ymm3,ymm11 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm12,31 + vpand ymm5,ymm5,ymm0 + vpaddd ymm12,ymm12,ymm12 + + vpslld ymm6,ymm0,30 + vpaddd ymm3,ymm3,ymm5 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpor ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vpxor ymm13,ymm13,ymm10 + vmovdqa ymm10,YMMWORD[((64-128))+rax] + + vpaddd ymm2,ymm2,ymm15 + vpslld ymm7,ymm3,5 + vpand ymm6,ymm1,ymm0 + vpxor ymm13,ymm13,YMMWORD[((256-256-128))+rbx] + + vpaddd ymm2,ymm2,ymm6 + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm1,ymm0 + vpxor ymm13,ymm13,ymm10 + + vmovdqu YMMWORD[(480-256-128)+rbx],ymm12 + vpaddd ymm2,ymm2,ymm12 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm13,31 + vpand ymm5,ymm5,ymm4 + vpaddd ymm13,ymm13,ymm13 + + vpslld ymm6,ymm4,30 + vpaddd ymm2,ymm2,ymm5 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpor ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vpxor ymm14,ymm14,ymm11 + vmovdqa ymm11,YMMWORD[((96-128))+rax] + + vpaddd ymm1,ymm1,ymm15 + vpslld ymm7,ymm2,5 + vpand ymm6,ymm0,ymm4 + vpxor ymm14,ymm14,YMMWORD[((288-256-128))+rbx] + + vpaddd ymm1,ymm1,ymm6 + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm0,ymm4 + vpxor ymm14,ymm14,ymm11 + + vmovdqu YMMWORD[(0-128)+rax],ymm13 + vpaddd ymm1,ymm1,ymm13 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm14,31 + vpand ymm5,ymm5,ymm3 + vpaddd ymm14,ymm14,ymm14 + + vpslld ymm6,ymm3,30 + vpaddd ymm1,ymm1,ymm5 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpor ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vpxor ymm10,ymm10,ymm12 + vmovdqa ymm12,YMMWORD[((128-128))+rax] + + vpaddd ymm0,ymm0,ymm15 + vpslld ymm7,ymm1,5 + vpand ymm6,ymm4,ymm3 + vpxor ymm10,ymm10,YMMWORD[((320-256-128))+rbx] + + vpaddd ymm0,ymm0,ymm6 + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm4,ymm3 + vpxor ymm10,ymm10,ymm12 + + vmovdqu YMMWORD[(32-128)+rax],ymm14 + vpaddd ymm0,ymm0,ymm14 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm10,31 + vpand ymm5,ymm5,ymm2 + vpaddd ymm10,ymm10,ymm10 + + vpslld ymm6,ymm2,30 + vpaddd ymm0,ymm0,ymm5 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpor ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vpxor ymm11,ymm11,ymm13 + vmovdqa ymm13,YMMWORD[((160-128))+rax] + + vpaddd ymm4,ymm4,ymm15 + vpslld ymm7,ymm0,5 + vpand ymm6,ymm3,ymm2 + vpxor ymm11,ymm11,YMMWORD[((352-256-128))+rbx] + + vpaddd ymm4,ymm4,ymm6 + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm3,ymm2 + vpxor ymm11,ymm11,ymm13 + + vmovdqu YMMWORD[(64-128)+rax],ymm10 + vpaddd ymm4,ymm4,ymm10 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm11,31 + vpand ymm5,ymm5,ymm1 + vpaddd ymm11,ymm11,ymm11 + + vpslld ymm6,ymm1,30 + vpaddd ymm4,ymm4,ymm5 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpor ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vpxor ymm12,ymm12,ymm14 + vmovdqa ymm14,YMMWORD[((192-128))+rax] + + vpaddd ymm3,ymm3,ymm15 + vpslld ymm7,ymm4,5 + vpand ymm6,ymm2,ymm1 + vpxor ymm12,ymm12,YMMWORD[((384-256-128))+rbx] + + vpaddd ymm3,ymm3,ymm6 + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm2,ymm1 + vpxor ymm12,ymm12,ymm14 + + vmovdqu YMMWORD[(96-128)+rax],ymm11 + vpaddd ymm3,ymm3,ymm11 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm12,31 + vpand ymm5,ymm5,ymm0 + vpaddd ymm12,ymm12,ymm12 + + vpslld ymm6,ymm0,30 + vpaddd ymm3,ymm3,ymm5 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpor ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vpxor ymm13,ymm13,ymm10 + vmovdqa ymm10,YMMWORD[((224-128))+rax] + + vpaddd ymm2,ymm2,ymm15 + vpslld ymm7,ymm3,5 + vpand ymm6,ymm1,ymm0 + vpxor ymm13,ymm13,YMMWORD[((416-256-128))+rbx] + + vpaddd ymm2,ymm2,ymm6 + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm1,ymm0 + vpxor ymm13,ymm13,ymm10 + + vmovdqu YMMWORD[(128-128)+rax],ymm12 + vpaddd ymm2,ymm2,ymm12 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm13,31 + vpand ymm5,ymm5,ymm4 + vpaddd ymm13,ymm13,ymm13 + + vpslld ymm6,ymm4,30 + vpaddd ymm2,ymm2,ymm5 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpor ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vpxor ymm14,ymm14,ymm11 + vmovdqa ymm11,YMMWORD[((256-256-128))+rbx] + + vpaddd ymm1,ymm1,ymm15 + vpslld ymm7,ymm2,5 + vpand ymm6,ymm0,ymm4 + vpxor ymm14,ymm14,YMMWORD[((448-256-128))+rbx] + + vpaddd ymm1,ymm1,ymm6 + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm0,ymm4 + vpxor ymm14,ymm14,ymm11 + + vmovdqu YMMWORD[(160-128)+rax],ymm13 + vpaddd ymm1,ymm1,ymm13 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm14,31 + vpand ymm5,ymm5,ymm3 + vpaddd ymm14,ymm14,ymm14 + + vpslld ymm6,ymm3,30 + vpaddd ymm1,ymm1,ymm5 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpor ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vpxor ymm10,ymm10,ymm12 + vmovdqa ymm12,YMMWORD[((288-256-128))+rbx] + + vpaddd ymm0,ymm0,ymm15 + vpslld ymm7,ymm1,5 + vpand ymm6,ymm4,ymm3 + vpxor ymm10,ymm10,YMMWORD[((480-256-128))+rbx] + + vpaddd ymm0,ymm0,ymm6 + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm4,ymm3 + vpxor ymm10,ymm10,ymm12 + + vmovdqu YMMWORD[(192-128)+rax],ymm14 + vpaddd ymm0,ymm0,ymm14 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm10,31 + vpand ymm5,ymm5,ymm2 + vpaddd ymm10,ymm10,ymm10 + + vpslld ymm6,ymm2,30 + vpaddd ymm0,ymm0,ymm5 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpor ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vpxor ymm11,ymm11,ymm13 + vmovdqa ymm13,YMMWORD[((320-256-128))+rbx] + + vpaddd ymm4,ymm4,ymm15 + vpslld ymm7,ymm0,5 + vpand ymm6,ymm3,ymm2 + vpxor ymm11,ymm11,YMMWORD[((0-128))+rax] + + vpaddd ymm4,ymm4,ymm6 + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm3,ymm2 + vpxor ymm11,ymm11,ymm13 + + vmovdqu YMMWORD[(224-128)+rax],ymm10 + vpaddd ymm4,ymm4,ymm10 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm11,31 + vpand ymm5,ymm5,ymm1 + vpaddd ymm11,ymm11,ymm11 + + vpslld ymm6,ymm1,30 + vpaddd ymm4,ymm4,ymm5 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpor ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vpxor ymm12,ymm12,ymm14 + vmovdqa ymm14,YMMWORD[((352-256-128))+rbx] + + vpaddd ymm3,ymm3,ymm15 + vpslld ymm7,ymm4,5 + vpand ymm6,ymm2,ymm1 + vpxor ymm12,ymm12,YMMWORD[((32-128))+rax] + + vpaddd ymm3,ymm3,ymm6 + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm2,ymm1 + vpxor ymm12,ymm12,ymm14 + + vmovdqu YMMWORD[(256-256-128)+rbx],ymm11 + vpaddd ymm3,ymm3,ymm11 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm12,31 + vpand ymm5,ymm5,ymm0 + vpaddd ymm12,ymm12,ymm12 + + vpslld ymm6,ymm0,30 + vpaddd ymm3,ymm3,ymm5 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpor ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vpxor ymm13,ymm13,ymm10 + vmovdqa ymm10,YMMWORD[((384-256-128))+rbx] + + vpaddd ymm2,ymm2,ymm15 + vpslld ymm7,ymm3,5 + vpand ymm6,ymm1,ymm0 + vpxor ymm13,ymm13,YMMWORD[((64-128))+rax] + + vpaddd ymm2,ymm2,ymm6 + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm1,ymm0 + vpxor ymm13,ymm13,ymm10 + + vmovdqu YMMWORD[(288-256-128)+rbx],ymm12 + vpaddd ymm2,ymm2,ymm12 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm13,31 + vpand ymm5,ymm5,ymm4 + vpaddd ymm13,ymm13,ymm13 + + vpslld ymm6,ymm4,30 + vpaddd ymm2,ymm2,ymm5 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpor ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vpxor ymm14,ymm14,ymm11 + vmovdqa ymm11,YMMWORD[((416-256-128))+rbx] + + vpaddd ymm1,ymm1,ymm15 + vpslld ymm7,ymm2,5 + vpand ymm6,ymm0,ymm4 + vpxor ymm14,ymm14,YMMWORD[((96-128))+rax] + + vpaddd ymm1,ymm1,ymm6 + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm0,ymm4 + vpxor ymm14,ymm14,ymm11 + + vmovdqu YMMWORD[(320-256-128)+rbx],ymm13 + vpaddd ymm1,ymm1,ymm13 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm14,31 + vpand ymm5,ymm5,ymm3 + vpaddd ymm14,ymm14,ymm14 + + vpslld ymm6,ymm3,30 + vpaddd ymm1,ymm1,ymm5 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpor ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vpxor ymm10,ymm10,ymm12 + vmovdqa ymm12,YMMWORD[((448-256-128))+rbx] + + vpaddd ymm0,ymm0,ymm15 + vpslld ymm7,ymm1,5 + vpand ymm6,ymm4,ymm3 + vpxor ymm10,ymm10,YMMWORD[((128-128))+rax] + + vpaddd ymm0,ymm0,ymm6 + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm4,ymm3 + vpxor ymm10,ymm10,ymm12 + + vmovdqu YMMWORD[(352-256-128)+rbx],ymm14 + vpaddd ymm0,ymm0,ymm14 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm10,31 + vpand ymm5,ymm5,ymm2 + vpaddd ymm10,ymm10,ymm10 + + vpslld ymm6,ymm2,30 + vpaddd ymm0,ymm0,ymm5 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpor ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vmovdqa ymm15,YMMWORD[64+rbp] + vpxor ymm11,ymm11,ymm13 + vmovdqa ymm13,YMMWORD[((480-256-128))+rbx] + + vpslld ymm7,ymm0,5 + vpaddd ymm4,ymm4,ymm15 + vpxor ymm5,ymm3,ymm1 + vmovdqa YMMWORD[(384-256-128)+rbx],ymm10 + vpaddd ymm4,ymm4,ymm10 + vpxor ymm11,ymm11,YMMWORD[((160-128))+rax] + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm5,ymm2 + vpxor ymm11,ymm11,ymm13 + + vpslld ymm6,ymm1,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm4,ymm4,ymm5 + vpsrld ymm9,ymm11,31 + vpaddd ymm11,ymm11,ymm11 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpor ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vpxor ymm12,ymm12,ymm14 + vmovdqa ymm14,YMMWORD[((0-128))+rax] + + vpslld ymm7,ymm4,5 + vpaddd ymm3,ymm3,ymm15 + vpxor ymm5,ymm2,ymm0 + vmovdqa YMMWORD[(416-256-128)+rbx],ymm11 + vpaddd ymm3,ymm3,ymm11 + vpxor ymm12,ymm12,YMMWORD[((192-128))+rax] + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm5,ymm1 + vpxor ymm12,ymm12,ymm14 + + vpslld ymm6,ymm0,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm3,ymm3,ymm5 + vpsrld ymm9,ymm12,31 + vpaddd ymm12,ymm12,ymm12 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpor ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vpxor ymm13,ymm13,ymm10 + vmovdqa ymm10,YMMWORD[((32-128))+rax] + + vpslld ymm7,ymm3,5 + vpaddd ymm2,ymm2,ymm15 + vpxor ymm5,ymm1,ymm4 + vmovdqa YMMWORD[(448-256-128)+rbx],ymm12 + vpaddd ymm2,ymm2,ymm12 + vpxor ymm13,ymm13,YMMWORD[((224-128))+rax] + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm5,ymm0 + vpxor ymm13,ymm13,ymm10 + + vpslld ymm6,ymm4,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm2,ymm2,ymm5 + vpsrld ymm9,ymm13,31 + vpaddd ymm13,ymm13,ymm13 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpor ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vpxor ymm14,ymm14,ymm11 + vmovdqa ymm11,YMMWORD[((64-128))+rax] + + vpslld ymm7,ymm2,5 + vpaddd ymm1,ymm1,ymm15 + vpxor ymm5,ymm0,ymm3 + vmovdqa YMMWORD[(480-256-128)+rbx],ymm13 + vpaddd ymm1,ymm1,ymm13 + vpxor ymm14,ymm14,YMMWORD[((256-256-128))+rbx] + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm5,ymm4 + vpxor ymm14,ymm14,ymm11 + + vpslld ymm6,ymm3,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm1,ymm1,ymm5 + vpsrld ymm9,ymm14,31 + vpaddd ymm14,ymm14,ymm14 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpor ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vpxor ymm10,ymm10,ymm12 + vmovdqa ymm12,YMMWORD[((96-128))+rax] + + vpslld ymm7,ymm1,5 + vpaddd ymm0,ymm0,ymm15 + vpxor ymm5,ymm4,ymm2 + vmovdqa YMMWORD[(0-128)+rax],ymm14 + vpaddd ymm0,ymm0,ymm14 + vpxor ymm10,ymm10,YMMWORD[((288-256-128))+rbx] + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm5,ymm3 + vpxor ymm10,ymm10,ymm12 + + vpslld ymm6,ymm2,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm0,ymm0,ymm5 + vpsrld ymm9,ymm10,31 + vpaddd ymm10,ymm10,ymm10 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpor ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vpxor ymm11,ymm11,ymm13 + vmovdqa ymm13,YMMWORD[((128-128))+rax] + + vpslld ymm7,ymm0,5 + vpaddd ymm4,ymm4,ymm15 + vpxor ymm5,ymm3,ymm1 + vmovdqa YMMWORD[(32-128)+rax],ymm10 + vpaddd ymm4,ymm4,ymm10 + vpxor ymm11,ymm11,YMMWORD[((320-256-128))+rbx] + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm5,ymm2 + vpxor ymm11,ymm11,ymm13 + + vpslld ymm6,ymm1,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm4,ymm4,ymm5 + vpsrld ymm9,ymm11,31 + vpaddd ymm11,ymm11,ymm11 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpor ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vpxor ymm12,ymm12,ymm14 + vmovdqa ymm14,YMMWORD[((160-128))+rax] + + vpslld ymm7,ymm4,5 + vpaddd ymm3,ymm3,ymm15 + vpxor ymm5,ymm2,ymm0 + vmovdqa YMMWORD[(64-128)+rax],ymm11 + vpaddd ymm3,ymm3,ymm11 + vpxor ymm12,ymm12,YMMWORD[((352-256-128))+rbx] + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm5,ymm1 + vpxor ymm12,ymm12,ymm14 + + vpslld ymm6,ymm0,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm3,ymm3,ymm5 + vpsrld ymm9,ymm12,31 + vpaddd ymm12,ymm12,ymm12 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpor ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vpxor ymm13,ymm13,ymm10 + vmovdqa ymm10,YMMWORD[((192-128))+rax] + + vpslld ymm7,ymm3,5 + vpaddd ymm2,ymm2,ymm15 + vpxor ymm5,ymm1,ymm4 + vmovdqa YMMWORD[(96-128)+rax],ymm12 + vpaddd ymm2,ymm2,ymm12 + vpxor ymm13,ymm13,YMMWORD[((384-256-128))+rbx] + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm5,ymm0 + vpxor ymm13,ymm13,ymm10 + + vpslld ymm6,ymm4,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm2,ymm2,ymm5 + vpsrld ymm9,ymm13,31 + vpaddd ymm13,ymm13,ymm13 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpor ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vpxor ymm14,ymm14,ymm11 + vmovdqa ymm11,YMMWORD[((224-128))+rax] + + vpslld ymm7,ymm2,5 + vpaddd ymm1,ymm1,ymm15 + vpxor ymm5,ymm0,ymm3 + vmovdqa YMMWORD[(128-128)+rax],ymm13 + vpaddd ymm1,ymm1,ymm13 + vpxor ymm14,ymm14,YMMWORD[((416-256-128))+rbx] + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm5,ymm4 + vpxor ymm14,ymm14,ymm11 + + vpslld ymm6,ymm3,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm1,ymm1,ymm5 + vpsrld ymm9,ymm14,31 + vpaddd ymm14,ymm14,ymm14 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpor ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vpxor ymm10,ymm10,ymm12 + vmovdqa ymm12,YMMWORD[((256-256-128))+rbx] + + vpslld ymm7,ymm1,5 + vpaddd ymm0,ymm0,ymm15 + vpxor ymm5,ymm4,ymm2 + vmovdqa YMMWORD[(160-128)+rax],ymm14 + vpaddd ymm0,ymm0,ymm14 + vpxor ymm10,ymm10,YMMWORD[((448-256-128))+rbx] + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm5,ymm3 + vpxor ymm10,ymm10,ymm12 + + vpslld ymm6,ymm2,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm0,ymm0,ymm5 + vpsrld ymm9,ymm10,31 + vpaddd ymm10,ymm10,ymm10 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpor ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vpxor ymm11,ymm11,ymm13 + vmovdqa ymm13,YMMWORD[((288-256-128))+rbx] + + vpslld ymm7,ymm0,5 + vpaddd ymm4,ymm4,ymm15 + vpxor ymm5,ymm3,ymm1 + vmovdqa YMMWORD[(192-128)+rax],ymm10 + vpaddd ymm4,ymm4,ymm10 + vpxor ymm11,ymm11,YMMWORD[((480-256-128))+rbx] + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm5,ymm2 + vpxor ymm11,ymm11,ymm13 + + vpslld ymm6,ymm1,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm4,ymm4,ymm5 + vpsrld ymm9,ymm11,31 + vpaddd ymm11,ymm11,ymm11 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpor ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vpxor ymm12,ymm12,ymm14 + vmovdqa ymm14,YMMWORD[((320-256-128))+rbx] + + vpslld ymm7,ymm4,5 + vpaddd ymm3,ymm3,ymm15 + vpxor ymm5,ymm2,ymm0 + vmovdqa YMMWORD[(224-128)+rax],ymm11 + vpaddd ymm3,ymm3,ymm11 + vpxor ymm12,ymm12,YMMWORD[((0-128))+rax] + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm5,ymm1 + vpxor ymm12,ymm12,ymm14 + + vpslld ymm6,ymm0,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm3,ymm3,ymm5 + vpsrld ymm9,ymm12,31 + vpaddd ymm12,ymm12,ymm12 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpor ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vpxor ymm13,ymm13,ymm10 + vmovdqa ymm10,YMMWORD[((352-256-128))+rbx] + + vpslld ymm7,ymm3,5 + vpaddd ymm2,ymm2,ymm15 + vpxor ymm5,ymm1,ymm4 + vpaddd ymm2,ymm2,ymm12 + vpxor ymm13,ymm13,YMMWORD[((32-128))+rax] + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm5,ymm0 + vpxor ymm13,ymm13,ymm10 + + vpslld ymm6,ymm4,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm2,ymm2,ymm5 + vpsrld ymm9,ymm13,31 + vpaddd ymm13,ymm13,ymm13 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpor ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vpxor ymm14,ymm14,ymm11 + vmovdqa ymm11,YMMWORD[((384-256-128))+rbx] + + vpslld ymm7,ymm2,5 + vpaddd ymm1,ymm1,ymm15 + vpxor ymm5,ymm0,ymm3 + vpaddd ymm1,ymm1,ymm13 + vpxor ymm14,ymm14,YMMWORD[((64-128))+rax] + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm5,ymm4 + vpxor ymm14,ymm14,ymm11 + + vpslld ymm6,ymm3,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm1,ymm1,ymm5 + vpsrld ymm9,ymm14,31 + vpaddd ymm14,ymm14,ymm14 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpor ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vpxor ymm10,ymm10,ymm12 + vmovdqa ymm12,YMMWORD[((416-256-128))+rbx] + + vpslld ymm7,ymm1,5 + vpaddd ymm0,ymm0,ymm15 + vpxor ymm5,ymm4,ymm2 + vpaddd ymm0,ymm0,ymm14 + vpxor ymm10,ymm10,YMMWORD[((96-128))+rax] + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm5,ymm3 + vpxor ymm10,ymm10,ymm12 + + vpslld ymm6,ymm2,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm0,ymm0,ymm5 + vpsrld ymm9,ymm10,31 + vpaddd ymm10,ymm10,ymm10 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpor ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vpxor ymm11,ymm11,ymm13 + vmovdqa ymm13,YMMWORD[((448-256-128))+rbx] + + vpslld ymm7,ymm0,5 + vpaddd ymm4,ymm4,ymm15 + vpxor ymm5,ymm3,ymm1 + vpaddd ymm4,ymm4,ymm10 + vpxor ymm11,ymm11,YMMWORD[((128-128))+rax] + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm5,ymm2 + vpxor ymm11,ymm11,ymm13 + + vpslld ymm6,ymm1,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm4,ymm4,ymm5 + vpsrld ymm9,ymm11,31 + vpaddd ymm11,ymm11,ymm11 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpor ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vpxor ymm12,ymm12,ymm14 + vmovdqa ymm14,YMMWORD[((480-256-128))+rbx] + + vpslld ymm7,ymm4,5 + vpaddd ymm3,ymm3,ymm15 + vpxor ymm5,ymm2,ymm0 + vpaddd ymm3,ymm3,ymm11 + vpxor ymm12,ymm12,YMMWORD[((160-128))+rax] + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm5,ymm1 + vpxor ymm12,ymm12,ymm14 + + vpslld ymm6,ymm0,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm3,ymm3,ymm5 + vpsrld ymm9,ymm12,31 + vpaddd ymm12,ymm12,ymm12 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpor ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vpxor ymm13,ymm13,ymm10 + vmovdqa ymm10,YMMWORD[((0-128))+rax] + + vpslld ymm7,ymm3,5 + vpaddd ymm2,ymm2,ymm15 + vpxor ymm5,ymm1,ymm4 + vpaddd ymm2,ymm2,ymm12 + vpxor ymm13,ymm13,YMMWORD[((192-128))+rax] + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm5,ymm0 + vpxor ymm13,ymm13,ymm10 + + vpslld ymm6,ymm4,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm2,ymm2,ymm5 + vpsrld ymm9,ymm13,31 + vpaddd ymm13,ymm13,ymm13 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpor ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vpxor ymm14,ymm14,ymm11 + vmovdqa ymm11,YMMWORD[((32-128))+rax] + + vpslld ymm7,ymm2,5 + vpaddd ymm1,ymm1,ymm15 + vpxor ymm5,ymm0,ymm3 + vpaddd ymm1,ymm1,ymm13 + vpxor ymm14,ymm14,YMMWORD[((224-128))+rax] + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm5,ymm4 + vpxor ymm14,ymm14,ymm11 + + vpslld ymm6,ymm3,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm1,ymm1,ymm5 + vpsrld ymm9,ymm14,31 + vpaddd ymm14,ymm14,ymm14 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpor ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vpslld ymm7,ymm1,5 + vpaddd ymm0,ymm0,ymm15 + vpxor ymm5,ymm4,ymm2 + + vpsrld ymm8,ymm1,27 + vpaddd ymm0,ymm0,ymm14 + vpxor ymm5,ymm5,ymm3 + + vpslld ymm6,ymm2,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm0,ymm0,ymm5 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpor ymm2,ymm2,ymm6 + mov ecx,1 + lea rbx,[512+rsp] + cmp ecx,DWORD[rbx] + cmovge r12,rbp + cmp ecx,DWORD[4+rbx] + cmovge r13,rbp + cmp ecx,DWORD[8+rbx] + cmovge r14,rbp + cmp ecx,DWORD[12+rbx] + cmovge r15,rbp + cmp ecx,DWORD[16+rbx] + cmovge r8,rbp + cmp ecx,DWORD[20+rbx] + cmovge r9,rbp + cmp ecx,DWORD[24+rbx] + cmovge r10,rbp + cmp ecx,DWORD[28+rbx] + cmovge r11,rbp + vmovdqu ymm5,YMMWORD[rbx] + vpxor ymm7,ymm7,ymm7 + vmovdqa ymm6,ymm5 + vpcmpgtd ymm6,ymm6,ymm7 + vpaddd ymm5,ymm5,ymm6 + + vpand ymm0,ymm0,ymm6 + vpand ymm1,ymm1,ymm6 + vpaddd ymm0,ymm0,YMMWORD[rdi] + vpand ymm2,ymm2,ymm6 + vpaddd ymm1,ymm1,YMMWORD[32+rdi] + vpand ymm3,ymm3,ymm6 + vpaddd ymm2,ymm2,YMMWORD[64+rdi] + vpand ymm4,ymm4,ymm6 + vpaddd ymm3,ymm3,YMMWORD[96+rdi] + vpaddd ymm4,ymm4,YMMWORD[128+rdi] + vmovdqu YMMWORD[rdi],ymm0 + vmovdqu YMMWORD[32+rdi],ymm1 + vmovdqu YMMWORD[64+rdi],ymm2 + vmovdqu YMMWORD[96+rdi],ymm3 + vmovdqu YMMWORD[128+rdi],ymm4 + + vmovdqu YMMWORD[rbx],ymm5 + lea rbx,[((256+128))+rsp] + vmovdqu ymm9,YMMWORD[96+rbp] + dec edx + jnz NEAR $L$oop_avx2 + + + + + + + +$L$done_avx2: + mov rax,QWORD[544+rsp] + + vzeroupper + movaps xmm6,XMMWORD[((-216))+rax] + movaps xmm7,XMMWORD[((-200))+rax] + movaps xmm8,XMMWORD[((-184))+rax] + movaps xmm9,XMMWORD[((-168))+rax] + movaps xmm10,XMMWORD[((-152))+rax] + movaps xmm11,XMMWORD[((-136))+rax] + movaps xmm12,XMMWORD[((-120))+rax] + movaps xmm13,XMMWORD[((-104))+rax] + movaps xmm14,XMMWORD[((-88))+rax] + movaps xmm15,XMMWORD[((-72))+rax] + mov r15,QWORD[((-48))+rax] + + mov r14,QWORD[((-40))+rax] + + mov r13,QWORD[((-32))+rax] + + mov r12,QWORD[((-24))+rax] + + mov rbp,QWORD[((-16))+rax] + + mov rbx,QWORD[((-8))+rax] + + lea rsp,[rax] + +$L$epilogue_avx2: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_sha1_multi_block_avx2: + +ALIGN 256 + DD 0x5a827999,0x5a827999,0x5a827999,0x5a827999 + DD 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +K_XX_XX: + DD 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 + DD 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 + DD 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc + DD 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc + DD 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 + DD 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 + DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +DB 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 +DB 83,72,65,49,32,109,117,108,116,105,45,98,108,111,99,107 +DB 32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120 +DB 56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77 +DB 83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110 +DB 115,115,108,46,111,114,103,62,0 +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$in_prologue + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$in_prologue + + mov rax,QWORD[272+rax] + + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + + lea rsi,[((-24-160))+rax] + lea rdi,[512+r8] + mov ecx,20 + DD 0xa548f3fc + +$L$in_prologue: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret + + +ALIGN 16 +avx2_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$in_prologue + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$in_prologue + + mov rax,QWORD[544+r8] + + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov r15,QWORD[((-48))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + + lea rsi,[((-56-160))+rax] + lea rdi,[512+r8] + mov ecx,20 + DD 0xa548f3fc + + jmp NEAR $L$in_prologue + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_sha1_multi_block wrt ..imagebase + DD $L$SEH_end_sha1_multi_block wrt ..imagebase + DD $L$SEH_info_sha1_multi_block wrt ..imagebase + DD $L$SEH_begin_sha1_multi_block_shaext wrt ..imagebase + DD $L$SEH_end_sha1_multi_block_shaext wrt ..imagebase + DD $L$SEH_info_sha1_multi_block_shaext wrt ..imagebase + DD $L$SEH_begin_sha1_multi_block_avx wrt ..imagebase + DD $L$SEH_end_sha1_multi_block_avx wrt ..imagebase + DD $L$SEH_info_sha1_multi_block_avx wrt ..imagebase + DD $L$SEH_begin_sha1_multi_block_avx2 wrt ..imagebase + DD $L$SEH_end_sha1_multi_block_avx2 wrt ..imagebase + DD $L$SEH_info_sha1_multi_block_avx2 wrt ..imagebase +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_sha1_multi_block: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$body wrt ..imagebase,$L$epilogue wrt ..imagebase +$L$SEH_info_sha1_multi_block_shaext: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$body_shaext wrt ..imagebase,$L$epilogue_shaext wrt ..imagebase +$L$SEH_info_sha1_multi_block_avx: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$body_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase +$L$SEH_info_sha1_multi_block_avx2: +DB 9,0,0,0 + DD avx2_handler wrt ..imagebase + DD $L$body_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase diff --git a/vere/ext/openssl/gen/windows-x86_64/crypto/sha/sha1-x86_64.asm b/vere/ext/openssl/gen/windows-x86_64/crypto/sha/sha1-x86_64.asm new file mode 100644 index 0000000..1b60a7c --- /dev/null +++ b/vere/ext/openssl/gen/windows-x86_64/crypto/sha/sha1-x86_64.asm @@ -0,0 +1,5766 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + +EXTERN OPENSSL_ia32cap_P + +global sha1_block_data_order + +ALIGN 16 +sha1_block_data_order: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_block_data_order: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + + mov r9d,DWORD[((OPENSSL_ia32cap_P+0))] + mov r8d,DWORD[((OPENSSL_ia32cap_P+4))] + mov r10d,DWORD[((OPENSSL_ia32cap_P+8))] + test r8d,512 + jz NEAR $L$ialu + test r10d,536870912 + jnz NEAR _shaext_shortcut + and r10d,296 + cmp r10d,296 + je NEAR _avx2_shortcut + and r8d,268435456 + and r9d,1073741824 + or r8d,r9d + cmp r8d,1342177280 + je NEAR _avx_shortcut + jmp NEAR _ssse3_shortcut + +ALIGN 16 +$L$ialu: + mov rax,rsp + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + mov r8,rdi + sub rsp,72 + mov r9,rsi + and rsp,-64 + mov r10,rdx + mov QWORD[64+rsp],rax + +$L$prologue: + + mov esi,DWORD[r8] + mov edi,DWORD[4+r8] + mov r11d,DWORD[8+r8] + mov r12d,DWORD[12+r8] + mov r13d,DWORD[16+r8] + jmp NEAR $L$loop + +ALIGN 16 +$L$loop: + mov edx,DWORD[r9] + bswap edx + mov ebp,DWORD[4+r9] + mov eax,r12d + mov DWORD[rsp],edx + mov ecx,esi + bswap ebp + xor eax,r11d + rol ecx,5 + and eax,edi + lea r13d,[1518500249+r13*1+rdx] + add r13d,ecx + xor eax,r12d + rol edi,30 + add r13d,eax + mov r14d,DWORD[8+r9] + mov eax,r11d + mov DWORD[4+rsp],ebp + mov ecx,r13d + bswap r14d + xor eax,edi + rol ecx,5 + and eax,esi + lea r12d,[1518500249+r12*1+rbp] + add r12d,ecx + xor eax,r11d + rol esi,30 + add r12d,eax + mov edx,DWORD[12+r9] + mov eax,edi + mov DWORD[8+rsp],r14d + mov ecx,r12d + bswap edx + xor eax,esi + rol ecx,5 + and eax,r13d + lea r11d,[1518500249+r11*1+r14] + add r11d,ecx + xor eax,edi + rol r13d,30 + add r11d,eax + mov ebp,DWORD[16+r9] + mov eax,esi + mov DWORD[12+rsp],edx + mov ecx,r11d + bswap ebp + xor eax,r13d + rol ecx,5 + and eax,r12d + lea edi,[1518500249+rdi*1+rdx] + add edi,ecx + xor eax,esi + rol r12d,30 + add edi,eax + mov r14d,DWORD[20+r9] + mov eax,r13d + mov DWORD[16+rsp],ebp + mov ecx,edi + bswap r14d + xor eax,r12d + rol ecx,5 + and eax,r11d + lea esi,[1518500249+rsi*1+rbp] + add esi,ecx + xor eax,r13d + rol r11d,30 + add esi,eax + mov edx,DWORD[24+r9] + mov eax,r12d + mov DWORD[20+rsp],r14d + mov ecx,esi + bswap edx + xor eax,r11d + rol ecx,5 + and eax,edi + lea r13d,[1518500249+r13*1+r14] + add r13d,ecx + xor eax,r12d + rol edi,30 + add r13d,eax + mov ebp,DWORD[28+r9] + mov eax,r11d + mov DWORD[24+rsp],edx + mov ecx,r13d + bswap ebp + xor eax,edi + rol ecx,5 + and eax,esi + lea r12d,[1518500249+r12*1+rdx] + add r12d,ecx + xor eax,r11d + rol esi,30 + add r12d,eax + mov r14d,DWORD[32+r9] + mov eax,edi + mov DWORD[28+rsp],ebp + mov ecx,r12d + bswap r14d + xor eax,esi + rol ecx,5 + and eax,r13d + lea r11d,[1518500249+r11*1+rbp] + add r11d,ecx + xor eax,edi + rol r13d,30 + add r11d,eax + mov edx,DWORD[36+r9] + mov eax,esi + mov DWORD[32+rsp],r14d + mov ecx,r11d + bswap edx + xor eax,r13d + rol ecx,5 + and eax,r12d + lea edi,[1518500249+rdi*1+r14] + add edi,ecx + xor eax,esi + rol r12d,30 + add edi,eax + mov ebp,DWORD[40+r9] + mov eax,r13d + mov DWORD[36+rsp],edx + mov ecx,edi + bswap ebp + xor eax,r12d + rol ecx,5 + and eax,r11d + lea esi,[1518500249+rsi*1+rdx] + add esi,ecx + xor eax,r13d + rol r11d,30 + add esi,eax + mov r14d,DWORD[44+r9] + mov eax,r12d + mov DWORD[40+rsp],ebp + mov ecx,esi + bswap r14d + xor eax,r11d + rol ecx,5 + and eax,edi + lea r13d,[1518500249+r13*1+rbp] + add r13d,ecx + xor eax,r12d + rol edi,30 + add r13d,eax + mov edx,DWORD[48+r9] + mov eax,r11d + mov DWORD[44+rsp],r14d + mov ecx,r13d + bswap edx + xor eax,edi + rol ecx,5 + and eax,esi + lea r12d,[1518500249+r12*1+r14] + add r12d,ecx + xor eax,r11d + rol esi,30 + add r12d,eax + mov ebp,DWORD[52+r9] + mov eax,edi + mov DWORD[48+rsp],edx + mov ecx,r12d + bswap ebp + xor eax,esi + rol ecx,5 + and eax,r13d + lea r11d,[1518500249+r11*1+rdx] + add r11d,ecx + xor eax,edi + rol r13d,30 + add r11d,eax + mov r14d,DWORD[56+r9] + mov eax,esi + mov DWORD[52+rsp],ebp + mov ecx,r11d + bswap r14d + xor eax,r13d + rol ecx,5 + and eax,r12d + lea edi,[1518500249+rdi*1+rbp] + add edi,ecx + xor eax,esi + rol r12d,30 + add edi,eax + mov edx,DWORD[60+r9] + mov eax,r13d + mov DWORD[56+rsp],r14d + mov ecx,edi + bswap edx + xor eax,r12d + rol ecx,5 + and eax,r11d + lea esi,[1518500249+rsi*1+r14] + add esi,ecx + xor eax,r13d + rol r11d,30 + add esi,eax + xor ebp,DWORD[rsp] + mov eax,r12d + mov DWORD[60+rsp],edx + mov ecx,esi + xor ebp,DWORD[8+rsp] + xor eax,r11d + rol ecx,5 + xor ebp,DWORD[32+rsp] + and eax,edi + lea r13d,[1518500249+r13*1+rdx] + rol edi,30 + xor eax,r12d + add r13d,ecx + rol ebp,1 + add r13d,eax + xor r14d,DWORD[4+rsp] + mov eax,r11d + mov DWORD[rsp],ebp + mov ecx,r13d + xor r14d,DWORD[12+rsp] + xor eax,edi + rol ecx,5 + xor r14d,DWORD[36+rsp] + and eax,esi + lea r12d,[1518500249+r12*1+rbp] + rol esi,30 + xor eax,r11d + add r12d,ecx + rol r14d,1 + add r12d,eax + xor edx,DWORD[8+rsp] + mov eax,edi + mov DWORD[4+rsp],r14d + mov ecx,r12d + xor edx,DWORD[16+rsp] + xor eax,esi + rol ecx,5 + xor edx,DWORD[40+rsp] + and eax,r13d + lea r11d,[1518500249+r11*1+r14] + rol r13d,30 + xor eax,edi + add r11d,ecx + rol edx,1 + add r11d,eax + xor ebp,DWORD[12+rsp] + mov eax,esi + mov DWORD[8+rsp],edx + mov ecx,r11d + xor ebp,DWORD[20+rsp] + xor eax,r13d + rol ecx,5 + xor ebp,DWORD[44+rsp] + and eax,r12d + lea edi,[1518500249+rdi*1+rdx] + rol r12d,30 + xor eax,esi + add edi,ecx + rol ebp,1 + add edi,eax + xor r14d,DWORD[16+rsp] + mov eax,r13d + mov DWORD[12+rsp],ebp + mov ecx,edi + xor r14d,DWORD[24+rsp] + xor eax,r12d + rol ecx,5 + xor r14d,DWORD[48+rsp] + and eax,r11d + lea esi,[1518500249+rsi*1+rbp] + rol r11d,30 + xor eax,r13d + add esi,ecx + rol r14d,1 + add esi,eax + xor edx,DWORD[20+rsp] + mov eax,edi + mov DWORD[16+rsp],r14d + mov ecx,esi + xor edx,DWORD[28+rsp] + xor eax,r12d + rol ecx,5 + xor edx,DWORD[52+rsp] + lea r13d,[1859775393+r13*1+r14] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol edx,1 + xor ebp,DWORD[24+rsp] + mov eax,esi + mov DWORD[20+rsp],edx + mov ecx,r13d + xor ebp,DWORD[32+rsp] + xor eax,r11d + rol ecx,5 + xor ebp,DWORD[56+rsp] + lea r12d,[1859775393+r12*1+rdx] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol ebp,1 + xor r14d,DWORD[28+rsp] + mov eax,r13d + mov DWORD[24+rsp],ebp + mov ecx,r12d + xor r14d,DWORD[36+rsp] + xor eax,edi + rol ecx,5 + xor r14d,DWORD[60+rsp] + lea r11d,[1859775393+r11*1+rbp] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol r14d,1 + xor edx,DWORD[32+rsp] + mov eax,r12d + mov DWORD[28+rsp],r14d + mov ecx,r11d + xor edx,DWORD[40+rsp] + xor eax,esi + rol ecx,5 + xor edx,DWORD[rsp] + lea edi,[1859775393+rdi*1+r14] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol edx,1 + xor ebp,DWORD[36+rsp] + mov eax,r11d + mov DWORD[32+rsp],edx + mov ecx,edi + xor ebp,DWORD[44+rsp] + xor eax,r13d + rol ecx,5 + xor ebp,DWORD[4+rsp] + lea esi,[1859775393+rsi*1+rdx] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol ebp,1 + xor r14d,DWORD[40+rsp] + mov eax,edi + mov DWORD[36+rsp],ebp + mov ecx,esi + xor r14d,DWORD[48+rsp] + xor eax,r12d + rol ecx,5 + xor r14d,DWORD[8+rsp] + lea r13d,[1859775393+r13*1+rbp] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol r14d,1 + xor edx,DWORD[44+rsp] + mov eax,esi + mov DWORD[40+rsp],r14d + mov ecx,r13d + xor edx,DWORD[52+rsp] + xor eax,r11d + rol ecx,5 + xor edx,DWORD[12+rsp] + lea r12d,[1859775393+r12*1+r14] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol edx,1 + xor ebp,DWORD[48+rsp] + mov eax,r13d + mov DWORD[44+rsp],edx + mov ecx,r12d + xor ebp,DWORD[56+rsp] + xor eax,edi + rol ecx,5 + xor ebp,DWORD[16+rsp] + lea r11d,[1859775393+r11*1+rdx] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol ebp,1 + xor r14d,DWORD[52+rsp] + mov eax,r12d + mov DWORD[48+rsp],ebp + mov ecx,r11d + xor r14d,DWORD[60+rsp] + xor eax,esi + rol ecx,5 + xor r14d,DWORD[20+rsp] + lea edi,[1859775393+rdi*1+rbp] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol r14d,1 + xor edx,DWORD[56+rsp] + mov eax,r11d + mov DWORD[52+rsp],r14d + mov ecx,edi + xor edx,DWORD[rsp] + xor eax,r13d + rol ecx,5 + xor edx,DWORD[24+rsp] + lea esi,[1859775393+rsi*1+r14] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol edx,1 + xor ebp,DWORD[60+rsp] + mov eax,edi + mov DWORD[56+rsp],edx + mov ecx,esi + xor ebp,DWORD[4+rsp] + xor eax,r12d + rol ecx,5 + xor ebp,DWORD[28+rsp] + lea r13d,[1859775393+r13*1+rdx] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol ebp,1 + xor r14d,DWORD[rsp] + mov eax,esi + mov DWORD[60+rsp],ebp + mov ecx,r13d + xor r14d,DWORD[8+rsp] + xor eax,r11d + rol ecx,5 + xor r14d,DWORD[32+rsp] + lea r12d,[1859775393+r12*1+rbp] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol r14d,1 + xor edx,DWORD[4+rsp] + mov eax,r13d + mov DWORD[rsp],r14d + mov ecx,r12d + xor edx,DWORD[12+rsp] + xor eax,edi + rol ecx,5 + xor edx,DWORD[36+rsp] + lea r11d,[1859775393+r11*1+r14] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol edx,1 + xor ebp,DWORD[8+rsp] + mov eax,r12d + mov DWORD[4+rsp],edx + mov ecx,r11d + xor ebp,DWORD[16+rsp] + xor eax,esi + rol ecx,5 + xor ebp,DWORD[40+rsp] + lea edi,[1859775393+rdi*1+rdx] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol ebp,1 + xor r14d,DWORD[12+rsp] + mov eax,r11d + mov DWORD[8+rsp],ebp + mov ecx,edi + xor r14d,DWORD[20+rsp] + xor eax,r13d + rol ecx,5 + xor r14d,DWORD[44+rsp] + lea esi,[1859775393+rsi*1+rbp] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol r14d,1 + xor edx,DWORD[16+rsp] + mov eax,edi + mov DWORD[12+rsp],r14d + mov ecx,esi + xor edx,DWORD[24+rsp] + xor eax,r12d + rol ecx,5 + xor edx,DWORD[48+rsp] + lea r13d,[1859775393+r13*1+r14] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol edx,1 + xor ebp,DWORD[20+rsp] + mov eax,esi + mov DWORD[16+rsp],edx + mov ecx,r13d + xor ebp,DWORD[28+rsp] + xor eax,r11d + rol ecx,5 + xor ebp,DWORD[52+rsp] + lea r12d,[1859775393+r12*1+rdx] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol ebp,1 + xor r14d,DWORD[24+rsp] + mov eax,r13d + mov DWORD[20+rsp],ebp + mov ecx,r12d + xor r14d,DWORD[32+rsp] + xor eax,edi + rol ecx,5 + xor r14d,DWORD[56+rsp] + lea r11d,[1859775393+r11*1+rbp] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol r14d,1 + xor edx,DWORD[28+rsp] + mov eax,r12d + mov DWORD[24+rsp],r14d + mov ecx,r11d + xor edx,DWORD[36+rsp] + xor eax,esi + rol ecx,5 + xor edx,DWORD[60+rsp] + lea edi,[1859775393+rdi*1+r14] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol edx,1 + xor ebp,DWORD[32+rsp] + mov eax,r11d + mov DWORD[28+rsp],edx + mov ecx,edi + xor ebp,DWORD[40+rsp] + xor eax,r13d + rol ecx,5 + xor ebp,DWORD[rsp] + lea esi,[1859775393+rsi*1+rdx] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol ebp,1 + xor r14d,DWORD[36+rsp] + mov eax,r12d + mov DWORD[32+rsp],ebp + mov ebx,r12d + xor r14d,DWORD[44+rsp] + and eax,r11d + mov ecx,esi + xor r14d,DWORD[4+rsp] + lea r13d,[((-1894007588))+r13*1+rbp] + xor ebx,r11d + rol ecx,5 + add r13d,eax + rol r14d,1 + and ebx,edi + add r13d,ecx + rol edi,30 + add r13d,ebx + xor edx,DWORD[40+rsp] + mov eax,r11d + mov DWORD[36+rsp],r14d + mov ebx,r11d + xor edx,DWORD[48+rsp] + and eax,edi + mov ecx,r13d + xor edx,DWORD[8+rsp] + lea r12d,[((-1894007588))+r12*1+r14] + xor ebx,edi + rol ecx,5 + add r12d,eax + rol edx,1 + and ebx,esi + add r12d,ecx + rol esi,30 + add r12d,ebx + xor ebp,DWORD[44+rsp] + mov eax,edi + mov DWORD[40+rsp],edx + mov ebx,edi + xor ebp,DWORD[52+rsp] + and eax,esi + mov ecx,r12d + xor ebp,DWORD[12+rsp] + lea r11d,[((-1894007588))+r11*1+rdx] + xor ebx,esi + rol ecx,5 + add r11d,eax + rol ebp,1 + and ebx,r13d + add r11d,ecx + rol r13d,30 + add r11d,ebx + xor r14d,DWORD[48+rsp] + mov eax,esi + mov DWORD[44+rsp],ebp + mov ebx,esi + xor r14d,DWORD[56+rsp] + and eax,r13d + mov ecx,r11d + xor r14d,DWORD[16+rsp] + lea edi,[((-1894007588))+rdi*1+rbp] + xor ebx,r13d + rol ecx,5 + add edi,eax + rol r14d,1 + and ebx,r12d + add edi,ecx + rol r12d,30 + add edi,ebx + xor edx,DWORD[52+rsp] + mov eax,r13d + mov DWORD[48+rsp],r14d + mov ebx,r13d + xor edx,DWORD[60+rsp] + and eax,r12d + mov ecx,edi + xor edx,DWORD[20+rsp] + lea esi,[((-1894007588))+rsi*1+r14] + xor ebx,r12d + rol ecx,5 + add esi,eax + rol edx,1 + and ebx,r11d + add esi,ecx + rol r11d,30 + add esi,ebx + xor ebp,DWORD[56+rsp] + mov eax,r12d + mov DWORD[52+rsp],edx + mov ebx,r12d + xor ebp,DWORD[rsp] + and eax,r11d + mov ecx,esi + xor ebp,DWORD[24+rsp] + lea r13d,[((-1894007588))+r13*1+rdx] + xor ebx,r11d + rol ecx,5 + add r13d,eax + rol ebp,1 + and ebx,edi + add r13d,ecx + rol edi,30 + add r13d,ebx + xor r14d,DWORD[60+rsp] + mov eax,r11d + mov DWORD[56+rsp],ebp + mov ebx,r11d + xor r14d,DWORD[4+rsp] + and eax,edi + mov ecx,r13d + xor r14d,DWORD[28+rsp] + lea r12d,[((-1894007588))+r12*1+rbp] + xor ebx,edi + rol ecx,5 + add r12d,eax + rol r14d,1 + and ebx,esi + add r12d,ecx + rol esi,30 + add r12d,ebx + xor edx,DWORD[rsp] + mov eax,edi + mov DWORD[60+rsp],r14d + mov ebx,edi + xor edx,DWORD[8+rsp] + and eax,esi + mov ecx,r12d + xor edx,DWORD[32+rsp] + lea r11d,[((-1894007588))+r11*1+r14] + xor ebx,esi + rol ecx,5 + add r11d,eax + rol edx,1 + and ebx,r13d + add r11d,ecx + rol r13d,30 + add r11d,ebx + xor ebp,DWORD[4+rsp] + mov eax,esi + mov DWORD[rsp],edx + mov ebx,esi + xor ebp,DWORD[12+rsp] + and eax,r13d + mov ecx,r11d + xor ebp,DWORD[36+rsp] + lea edi,[((-1894007588))+rdi*1+rdx] + xor ebx,r13d + rol ecx,5 + add edi,eax + rol ebp,1 + and ebx,r12d + add edi,ecx + rol r12d,30 + add edi,ebx + xor r14d,DWORD[8+rsp] + mov eax,r13d + mov DWORD[4+rsp],ebp + mov ebx,r13d + xor r14d,DWORD[16+rsp] + and eax,r12d + mov ecx,edi + xor r14d,DWORD[40+rsp] + lea esi,[((-1894007588))+rsi*1+rbp] + xor ebx,r12d + rol ecx,5 + add esi,eax + rol r14d,1 + and ebx,r11d + add esi,ecx + rol r11d,30 + add esi,ebx + xor edx,DWORD[12+rsp] + mov eax,r12d + mov DWORD[8+rsp],r14d + mov ebx,r12d + xor edx,DWORD[20+rsp] + and eax,r11d + mov ecx,esi + xor edx,DWORD[44+rsp] + lea r13d,[((-1894007588))+r13*1+r14] + xor ebx,r11d + rol ecx,5 + add r13d,eax + rol edx,1 + and ebx,edi + add r13d,ecx + rol edi,30 + add r13d,ebx + xor ebp,DWORD[16+rsp] + mov eax,r11d + mov DWORD[12+rsp],edx + mov ebx,r11d + xor ebp,DWORD[24+rsp] + and eax,edi + mov ecx,r13d + xor ebp,DWORD[48+rsp] + lea r12d,[((-1894007588))+r12*1+rdx] + xor ebx,edi + rol ecx,5 + add r12d,eax + rol ebp,1 + and ebx,esi + add r12d,ecx + rol esi,30 + add r12d,ebx + xor r14d,DWORD[20+rsp] + mov eax,edi + mov DWORD[16+rsp],ebp + mov ebx,edi + xor r14d,DWORD[28+rsp] + and eax,esi + mov ecx,r12d + xor r14d,DWORD[52+rsp] + lea r11d,[((-1894007588))+r11*1+rbp] + xor ebx,esi + rol ecx,5 + add r11d,eax + rol r14d,1 + and ebx,r13d + add r11d,ecx + rol r13d,30 + add r11d,ebx + xor edx,DWORD[24+rsp] + mov eax,esi + mov DWORD[20+rsp],r14d + mov ebx,esi + xor edx,DWORD[32+rsp] + and eax,r13d + mov ecx,r11d + xor edx,DWORD[56+rsp] + lea edi,[((-1894007588))+rdi*1+r14] + xor ebx,r13d + rol ecx,5 + add edi,eax + rol edx,1 + and ebx,r12d + add edi,ecx + rol r12d,30 + add edi,ebx + xor ebp,DWORD[28+rsp] + mov eax,r13d + mov DWORD[24+rsp],edx + mov ebx,r13d + xor ebp,DWORD[36+rsp] + and eax,r12d + mov ecx,edi + xor ebp,DWORD[60+rsp] + lea esi,[((-1894007588))+rsi*1+rdx] + xor ebx,r12d + rol ecx,5 + add esi,eax + rol ebp,1 + and ebx,r11d + add esi,ecx + rol r11d,30 + add esi,ebx + xor r14d,DWORD[32+rsp] + mov eax,r12d + mov DWORD[28+rsp],ebp + mov ebx,r12d + xor r14d,DWORD[40+rsp] + and eax,r11d + mov ecx,esi + xor r14d,DWORD[rsp] + lea r13d,[((-1894007588))+r13*1+rbp] + xor ebx,r11d + rol ecx,5 + add r13d,eax + rol r14d,1 + and ebx,edi + add r13d,ecx + rol edi,30 + add r13d,ebx + xor edx,DWORD[36+rsp] + mov eax,r11d + mov DWORD[32+rsp],r14d + mov ebx,r11d + xor edx,DWORD[44+rsp] + and eax,edi + mov ecx,r13d + xor edx,DWORD[4+rsp] + lea r12d,[((-1894007588))+r12*1+r14] + xor ebx,edi + rol ecx,5 + add r12d,eax + rol edx,1 + and ebx,esi + add r12d,ecx + rol esi,30 + add r12d,ebx + xor ebp,DWORD[40+rsp] + mov eax,edi + mov DWORD[36+rsp],edx + mov ebx,edi + xor ebp,DWORD[48+rsp] + and eax,esi + mov ecx,r12d + xor ebp,DWORD[8+rsp] + lea r11d,[((-1894007588))+r11*1+rdx] + xor ebx,esi + rol ecx,5 + add r11d,eax + rol ebp,1 + and ebx,r13d + add r11d,ecx + rol r13d,30 + add r11d,ebx + xor r14d,DWORD[44+rsp] + mov eax,esi + mov DWORD[40+rsp],ebp + mov ebx,esi + xor r14d,DWORD[52+rsp] + and eax,r13d + mov ecx,r11d + xor r14d,DWORD[12+rsp] + lea edi,[((-1894007588))+rdi*1+rbp] + xor ebx,r13d + rol ecx,5 + add edi,eax + rol r14d,1 + and ebx,r12d + add edi,ecx + rol r12d,30 + add edi,ebx + xor edx,DWORD[48+rsp] + mov eax,r13d + mov DWORD[44+rsp],r14d + mov ebx,r13d + xor edx,DWORD[56+rsp] + and eax,r12d + mov ecx,edi + xor edx,DWORD[16+rsp] + lea esi,[((-1894007588))+rsi*1+r14] + xor ebx,r12d + rol ecx,5 + add esi,eax + rol edx,1 + and ebx,r11d + add esi,ecx + rol r11d,30 + add esi,ebx + xor ebp,DWORD[52+rsp] + mov eax,edi + mov DWORD[48+rsp],edx + mov ecx,esi + xor ebp,DWORD[60+rsp] + xor eax,r12d + rol ecx,5 + xor ebp,DWORD[20+rsp] + lea r13d,[((-899497514))+r13*1+rdx] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol ebp,1 + xor r14d,DWORD[56+rsp] + mov eax,esi + mov DWORD[52+rsp],ebp + mov ecx,r13d + xor r14d,DWORD[rsp] + xor eax,r11d + rol ecx,5 + xor r14d,DWORD[24+rsp] + lea r12d,[((-899497514))+r12*1+rbp] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol r14d,1 + xor edx,DWORD[60+rsp] + mov eax,r13d + mov DWORD[56+rsp],r14d + mov ecx,r12d + xor edx,DWORD[4+rsp] + xor eax,edi + rol ecx,5 + xor edx,DWORD[28+rsp] + lea r11d,[((-899497514))+r11*1+r14] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol edx,1 + xor ebp,DWORD[rsp] + mov eax,r12d + mov DWORD[60+rsp],edx + mov ecx,r11d + xor ebp,DWORD[8+rsp] + xor eax,esi + rol ecx,5 + xor ebp,DWORD[32+rsp] + lea edi,[((-899497514))+rdi*1+rdx] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol ebp,1 + xor r14d,DWORD[4+rsp] + mov eax,r11d + mov DWORD[rsp],ebp + mov ecx,edi + xor r14d,DWORD[12+rsp] + xor eax,r13d + rol ecx,5 + xor r14d,DWORD[36+rsp] + lea esi,[((-899497514))+rsi*1+rbp] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol r14d,1 + xor edx,DWORD[8+rsp] + mov eax,edi + mov DWORD[4+rsp],r14d + mov ecx,esi + xor edx,DWORD[16+rsp] + xor eax,r12d + rol ecx,5 + xor edx,DWORD[40+rsp] + lea r13d,[((-899497514))+r13*1+r14] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol edx,1 + xor ebp,DWORD[12+rsp] + mov eax,esi + mov DWORD[8+rsp],edx + mov ecx,r13d + xor ebp,DWORD[20+rsp] + xor eax,r11d + rol ecx,5 + xor ebp,DWORD[44+rsp] + lea r12d,[((-899497514))+r12*1+rdx] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol ebp,1 + xor r14d,DWORD[16+rsp] + mov eax,r13d + mov DWORD[12+rsp],ebp + mov ecx,r12d + xor r14d,DWORD[24+rsp] + xor eax,edi + rol ecx,5 + xor r14d,DWORD[48+rsp] + lea r11d,[((-899497514))+r11*1+rbp] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol r14d,1 + xor edx,DWORD[20+rsp] + mov eax,r12d + mov DWORD[16+rsp],r14d + mov ecx,r11d + xor edx,DWORD[28+rsp] + xor eax,esi + rol ecx,5 + xor edx,DWORD[52+rsp] + lea edi,[((-899497514))+rdi*1+r14] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol edx,1 + xor ebp,DWORD[24+rsp] + mov eax,r11d + mov DWORD[20+rsp],edx + mov ecx,edi + xor ebp,DWORD[32+rsp] + xor eax,r13d + rol ecx,5 + xor ebp,DWORD[56+rsp] + lea esi,[((-899497514))+rsi*1+rdx] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol ebp,1 + xor r14d,DWORD[28+rsp] + mov eax,edi + mov DWORD[24+rsp],ebp + mov ecx,esi + xor r14d,DWORD[36+rsp] + xor eax,r12d + rol ecx,5 + xor r14d,DWORD[60+rsp] + lea r13d,[((-899497514))+r13*1+rbp] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol r14d,1 + xor edx,DWORD[32+rsp] + mov eax,esi + mov DWORD[28+rsp],r14d + mov ecx,r13d + xor edx,DWORD[40+rsp] + xor eax,r11d + rol ecx,5 + xor edx,DWORD[rsp] + lea r12d,[((-899497514))+r12*1+r14] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol edx,1 + xor ebp,DWORD[36+rsp] + mov eax,r13d + + mov ecx,r12d + xor ebp,DWORD[44+rsp] + xor eax,edi + rol ecx,5 + xor ebp,DWORD[4+rsp] + lea r11d,[((-899497514))+r11*1+rdx] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol ebp,1 + xor r14d,DWORD[40+rsp] + mov eax,r12d + + mov ecx,r11d + xor r14d,DWORD[48+rsp] + xor eax,esi + rol ecx,5 + xor r14d,DWORD[8+rsp] + lea edi,[((-899497514))+rdi*1+rbp] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol r14d,1 + xor edx,DWORD[44+rsp] + mov eax,r11d + + mov ecx,edi + xor edx,DWORD[52+rsp] + xor eax,r13d + rol ecx,5 + xor edx,DWORD[12+rsp] + lea esi,[((-899497514))+rsi*1+r14] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol edx,1 + xor ebp,DWORD[48+rsp] + mov eax,edi + + mov ecx,esi + xor ebp,DWORD[56+rsp] + xor eax,r12d + rol ecx,5 + xor ebp,DWORD[16+rsp] + lea r13d,[((-899497514))+r13*1+rdx] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol ebp,1 + xor r14d,DWORD[52+rsp] + mov eax,esi + + mov ecx,r13d + xor r14d,DWORD[60+rsp] + xor eax,r11d + rol ecx,5 + xor r14d,DWORD[20+rsp] + lea r12d,[((-899497514))+r12*1+rbp] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol r14d,1 + xor edx,DWORD[56+rsp] + mov eax,r13d + + mov ecx,r12d + xor edx,DWORD[rsp] + xor eax,edi + rol ecx,5 + xor edx,DWORD[24+rsp] + lea r11d,[((-899497514))+r11*1+r14] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol edx,1 + xor ebp,DWORD[60+rsp] + mov eax,r12d + + mov ecx,r11d + xor ebp,DWORD[4+rsp] + xor eax,esi + rol ecx,5 + xor ebp,DWORD[28+rsp] + lea edi,[((-899497514))+rdi*1+rdx] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol ebp,1 + mov eax,r11d + mov ecx,edi + xor eax,r13d + lea esi,[((-899497514))+rsi*1+rbp] + rol ecx,5 + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + add esi,DWORD[r8] + add edi,DWORD[4+r8] + add r11d,DWORD[8+r8] + add r12d,DWORD[12+r8] + add r13d,DWORD[16+r8] + mov DWORD[r8],esi + mov DWORD[4+r8],edi + mov DWORD[8+r8],r11d + mov DWORD[12+r8],r12d + mov DWORD[16+r8],r13d + + sub r10,1 + lea r9,[64+r9] + jnz NEAR $L$loop + + mov rsi,QWORD[64+rsp] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbp,QWORD[((-16))+rsi] + + mov rbx,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_sha1_block_data_order: + +ALIGN 32 +sha1_block_data_order_shaext: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_block_data_order_shaext: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +_shaext_shortcut: + + lea rsp,[((-72))+rsp] + movaps XMMWORD[(-8-64)+rax],xmm6 + movaps XMMWORD[(-8-48)+rax],xmm7 + movaps XMMWORD[(-8-32)+rax],xmm8 + movaps XMMWORD[(-8-16)+rax],xmm9 +$L$prologue_shaext: + movdqu xmm0,XMMWORD[rdi] + movd xmm1,DWORD[16+rdi] + movdqa xmm3,XMMWORD[((K_XX_XX+160))] + + movdqu xmm4,XMMWORD[rsi] + pshufd xmm0,xmm0,27 + movdqu xmm5,XMMWORD[16+rsi] + pshufd xmm1,xmm1,27 + movdqu xmm6,XMMWORD[32+rsi] +DB 102,15,56,0,227 + movdqu xmm7,XMMWORD[48+rsi] +DB 102,15,56,0,235 +DB 102,15,56,0,243 + movdqa xmm9,xmm1 +DB 102,15,56,0,251 + jmp NEAR $L$oop_shaext + +ALIGN 16 +$L$oop_shaext: + dec rdx + lea r8,[64+rsi] + paddd xmm1,xmm4 + cmovne rsi,r8 + movdqa xmm8,xmm0 +DB 15,56,201,229 + movdqa xmm2,xmm0 +DB 15,58,204,193,0 +DB 15,56,200,213 + pxor xmm4,xmm6 +DB 15,56,201,238 +DB 15,56,202,231 + + movdqa xmm1,xmm0 +DB 15,58,204,194,0 +DB 15,56,200,206 + pxor xmm5,xmm7 +DB 15,56,202,236 +DB 15,56,201,247 + movdqa xmm2,xmm0 +DB 15,58,204,193,0 +DB 15,56,200,215 + pxor xmm6,xmm4 +DB 15,56,201,252 +DB 15,56,202,245 + + movdqa xmm1,xmm0 +DB 15,58,204,194,0 +DB 15,56,200,204 + pxor xmm7,xmm5 +DB 15,56,202,254 +DB 15,56,201,229 + movdqa xmm2,xmm0 +DB 15,58,204,193,0 +DB 15,56,200,213 + pxor xmm4,xmm6 +DB 15,56,201,238 +DB 15,56,202,231 + + movdqa xmm1,xmm0 +DB 15,58,204,194,1 +DB 15,56,200,206 + pxor xmm5,xmm7 +DB 15,56,202,236 +DB 15,56,201,247 + movdqa xmm2,xmm0 +DB 15,58,204,193,1 +DB 15,56,200,215 + pxor xmm6,xmm4 +DB 15,56,201,252 +DB 15,56,202,245 + + movdqa xmm1,xmm0 +DB 15,58,204,194,1 +DB 15,56,200,204 + pxor xmm7,xmm5 +DB 15,56,202,254 +DB 15,56,201,229 + movdqa xmm2,xmm0 +DB 15,58,204,193,1 +DB 15,56,200,213 + pxor xmm4,xmm6 +DB 15,56,201,238 +DB 15,56,202,231 + + movdqa xmm1,xmm0 +DB 15,58,204,194,1 +DB 15,56,200,206 + pxor xmm5,xmm7 +DB 15,56,202,236 +DB 15,56,201,247 + movdqa xmm2,xmm0 +DB 15,58,204,193,2 +DB 15,56,200,215 + pxor xmm6,xmm4 +DB 15,56,201,252 +DB 15,56,202,245 + + movdqa xmm1,xmm0 +DB 15,58,204,194,2 +DB 15,56,200,204 + pxor xmm7,xmm5 +DB 15,56,202,254 +DB 15,56,201,229 + movdqa xmm2,xmm0 +DB 15,58,204,193,2 +DB 15,56,200,213 + pxor xmm4,xmm6 +DB 15,56,201,238 +DB 15,56,202,231 + + movdqa xmm1,xmm0 +DB 15,58,204,194,2 +DB 15,56,200,206 + pxor xmm5,xmm7 +DB 15,56,202,236 +DB 15,56,201,247 + movdqa xmm2,xmm0 +DB 15,58,204,193,2 +DB 15,56,200,215 + pxor xmm6,xmm4 +DB 15,56,201,252 +DB 15,56,202,245 + + movdqa xmm1,xmm0 +DB 15,58,204,194,3 +DB 15,56,200,204 + pxor xmm7,xmm5 +DB 15,56,202,254 + movdqu xmm4,XMMWORD[rsi] + movdqa xmm2,xmm0 +DB 15,58,204,193,3 +DB 15,56,200,213 + movdqu xmm5,XMMWORD[16+rsi] +DB 102,15,56,0,227 + + movdqa xmm1,xmm0 +DB 15,58,204,194,3 +DB 15,56,200,206 + movdqu xmm6,XMMWORD[32+rsi] +DB 102,15,56,0,235 + + movdqa xmm2,xmm0 +DB 15,58,204,193,3 +DB 15,56,200,215 + movdqu xmm7,XMMWORD[48+rsi] +DB 102,15,56,0,243 + + movdqa xmm1,xmm0 +DB 15,58,204,194,3 +DB 65,15,56,200,201 +DB 102,15,56,0,251 + + paddd xmm0,xmm8 + movdqa xmm9,xmm1 + + jnz NEAR $L$oop_shaext + + pshufd xmm0,xmm0,27 + pshufd xmm1,xmm1,27 + movdqu XMMWORD[rdi],xmm0 + movd DWORD[16+rdi],xmm1 + movaps xmm6,XMMWORD[((-8-64))+rax] + movaps xmm7,XMMWORD[((-8-48))+rax] + movaps xmm8,XMMWORD[((-8-32))+rax] + movaps xmm9,XMMWORD[((-8-16))+rax] + mov rsp,rax +$L$epilogue_shaext: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_sha1_block_data_order_shaext: + +ALIGN 16 +sha1_block_data_order_ssse3: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_block_data_order_ssse3: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +_ssse3_shortcut: + + mov r11,rsp + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + lea rsp,[((-160))+rsp] + movaps XMMWORD[(-40-96)+r11],xmm6 + movaps XMMWORD[(-40-80)+r11],xmm7 + movaps XMMWORD[(-40-64)+r11],xmm8 + movaps XMMWORD[(-40-48)+r11],xmm9 + movaps XMMWORD[(-40-32)+r11],xmm10 + movaps XMMWORD[(-40-16)+r11],xmm11 +$L$prologue_ssse3: + and rsp,-64 + mov r8,rdi + mov r9,rsi + mov r10,rdx + + shl r10,6 + add r10,r9 + lea r14,[((K_XX_XX+64))] + + mov eax,DWORD[r8] + mov ebx,DWORD[4+r8] + mov ecx,DWORD[8+r8] + mov edx,DWORD[12+r8] + mov esi,ebx + mov ebp,DWORD[16+r8] + mov edi,ecx + xor edi,edx + and esi,edi + + movdqa xmm6,XMMWORD[64+r14] + movdqa xmm9,XMMWORD[((-64))+r14] + movdqu xmm0,XMMWORD[r9] + movdqu xmm1,XMMWORD[16+r9] + movdqu xmm2,XMMWORD[32+r9] + movdqu xmm3,XMMWORD[48+r9] +DB 102,15,56,0,198 +DB 102,15,56,0,206 +DB 102,15,56,0,214 + add r9,64 + paddd xmm0,xmm9 +DB 102,15,56,0,222 + paddd xmm1,xmm9 + paddd xmm2,xmm9 + movdqa XMMWORD[rsp],xmm0 + psubd xmm0,xmm9 + movdqa XMMWORD[16+rsp],xmm1 + psubd xmm1,xmm9 + movdqa XMMWORD[32+rsp],xmm2 + psubd xmm2,xmm9 + jmp NEAR $L$oop_ssse3 +ALIGN 16 +$L$oop_ssse3: + ror ebx,2 + pshufd xmm4,xmm0,238 + xor esi,edx + movdqa xmm8,xmm3 + paddd xmm9,xmm3 + mov edi,eax + add ebp,DWORD[rsp] + punpcklqdq xmm4,xmm1 + xor ebx,ecx + rol eax,5 + add ebp,esi + psrldq xmm8,4 + and edi,ebx + xor ebx,ecx + pxor xmm4,xmm0 + add ebp,eax + ror eax,7 + pxor xmm8,xmm2 + xor edi,ecx + mov esi,ebp + add edx,DWORD[4+rsp] + pxor xmm4,xmm8 + xor eax,ebx + rol ebp,5 + movdqa XMMWORD[48+rsp],xmm9 + add edx,edi + and esi,eax + movdqa xmm10,xmm4 + xor eax,ebx + add edx,ebp + ror ebp,7 + movdqa xmm8,xmm4 + xor esi,ebx + pslldq xmm10,12 + paddd xmm4,xmm4 + mov edi,edx + add ecx,DWORD[8+rsp] + psrld xmm8,31 + xor ebp,eax + rol edx,5 + add ecx,esi + movdqa xmm9,xmm10 + and edi,ebp + xor ebp,eax + psrld xmm10,30 + add ecx,edx + ror edx,7 + por xmm4,xmm8 + xor edi,eax + mov esi,ecx + add ebx,DWORD[12+rsp] + pslld xmm9,2 + pxor xmm4,xmm10 + xor edx,ebp + movdqa xmm10,XMMWORD[((-64))+r14] + rol ecx,5 + add ebx,edi + and esi,edx + pxor xmm4,xmm9 + xor edx,ebp + add ebx,ecx + ror ecx,7 + pshufd xmm5,xmm1,238 + xor esi,ebp + movdqa xmm9,xmm4 + paddd xmm10,xmm4 + mov edi,ebx + add eax,DWORD[16+rsp] + punpcklqdq xmm5,xmm2 + xor ecx,edx + rol ebx,5 + add eax,esi + psrldq xmm9,4 + and edi,ecx + xor ecx,edx + pxor xmm5,xmm1 + add eax,ebx + ror ebx,7 + pxor xmm9,xmm3 + xor edi,edx + mov esi,eax + add ebp,DWORD[20+rsp] + pxor xmm5,xmm9 + xor ebx,ecx + rol eax,5 + movdqa XMMWORD[rsp],xmm10 + add ebp,edi + and esi,ebx + movdqa xmm8,xmm5 + xor ebx,ecx + add ebp,eax + ror eax,7 + movdqa xmm9,xmm5 + xor esi,ecx + pslldq xmm8,12 + paddd xmm5,xmm5 + mov edi,ebp + add edx,DWORD[24+rsp] + psrld xmm9,31 + xor eax,ebx + rol ebp,5 + add edx,esi + movdqa xmm10,xmm8 + and edi,eax + xor eax,ebx + psrld xmm8,30 + add edx,ebp + ror ebp,7 + por xmm5,xmm9 + xor edi,ebx + mov esi,edx + add ecx,DWORD[28+rsp] + pslld xmm10,2 + pxor xmm5,xmm8 + xor ebp,eax + movdqa xmm8,XMMWORD[((-32))+r14] + rol edx,5 + add ecx,edi + and esi,ebp + pxor xmm5,xmm10 + xor ebp,eax + add ecx,edx + ror edx,7 + pshufd xmm6,xmm2,238 + xor esi,eax + movdqa xmm10,xmm5 + paddd xmm8,xmm5 + mov edi,ecx + add ebx,DWORD[32+rsp] + punpcklqdq xmm6,xmm3 + xor edx,ebp + rol ecx,5 + add ebx,esi + psrldq xmm10,4 + and edi,edx + xor edx,ebp + pxor xmm6,xmm2 + add ebx,ecx + ror ecx,7 + pxor xmm10,xmm4 + xor edi,ebp + mov esi,ebx + add eax,DWORD[36+rsp] + pxor xmm6,xmm10 + xor ecx,edx + rol ebx,5 + movdqa XMMWORD[16+rsp],xmm8 + add eax,edi + and esi,ecx + movdqa xmm9,xmm6 + xor ecx,edx + add eax,ebx + ror ebx,7 + movdqa xmm10,xmm6 + xor esi,edx + pslldq xmm9,12 + paddd xmm6,xmm6 + mov edi,eax + add ebp,DWORD[40+rsp] + psrld xmm10,31 + xor ebx,ecx + rol eax,5 + add ebp,esi + movdqa xmm8,xmm9 + and edi,ebx + xor ebx,ecx + psrld xmm9,30 + add ebp,eax + ror eax,7 + por xmm6,xmm10 + xor edi,ecx + mov esi,ebp + add edx,DWORD[44+rsp] + pslld xmm8,2 + pxor xmm6,xmm9 + xor eax,ebx + movdqa xmm9,XMMWORD[((-32))+r14] + rol ebp,5 + add edx,edi + and esi,eax + pxor xmm6,xmm8 + xor eax,ebx + add edx,ebp + ror ebp,7 + pshufd xmm7,xmm3,238 + xor esi,ebx + movdqa xmm8,xmm6 + paddd xmm9,xmm6 + mov edi,edx + add ecx,DWORD[48+rsp] + punpcklqdq xmm7,xmm4 + xor ebp,eax + rol edx,5 + add ecx,esi + psrldq xmm8,4 + and edi,ebp + xor ebp,eax + pxor xmm7,xmm3 + add ecx,edx + ror edx,7 + pxor xmm8,xmm5 + xor edi,eax + mov esi,ecx + add ebx,DWORD[52+rsp] + pxor xmm7,xmm8 + xor edx,ebp + rol ecx,5 + movdqa XMMWORD[32+rsp],xmm9 + add ebx,edi + and esi,edx + movdqa xmm10,xmm7 + xor edx,ebp + add ebx,ecx + ror ecx,7 + movdqa xmm8,xmm7 + xor esi,ebp + pslldq xmm10,12 + paddd xmm7,xmm7 + mov edi,ebx + add eax,DWORD[56+rsp] + psrld xmm8,31 + xor ecx,edx + rol ebx,5 + add eax,esi + movdqa xmm9,xmm10 + and edi,ecx + xor ecx,edx + psrld xmm10,30 + add eax,ebx + ror ebx,7 + por xmm7,xmm8 + xor edi,edx + mov esi,eax + add ebp,DWORD[60+rsp] + pslld xmm9,2 + pxor xmm7,xmm10 + xor ebx,ecx + movdqa xmm10,XMMWORD[((-32))+r14] + rol eax,5 + add ebp,edi + and esi,ebx + pxor xmm7,xmm9 + pshufd xmm9,xmm6,238 + xor ebx,ecx + add ebp,eax + ror eax,7 + pxor xmm0,xmm4 + xor esi,ecx + mov edi,ebp + add edx,DWORD[rsp] + punpcklqdq xmm9,xmm7 + xor eax,ebx + rol ebp,5 + pxor xmm0,xmm1 + add edx,esi + and edi,eax + movdqa xmm8,xmm10 + xor eax,ebx + paddd xmm10,xmm7 + add edx,ebp + pxor xmm0,xmm9 + ror ebp,7 + xor edi,ebx + mov esi,edx + add ecx,DWORD[4+rsp] + movdqa xmm9,xmm0 + xor ebp,eax + rol edx,5 + movdqa XMMWORD[48+rsp],xmm10 + add ecx,edi + and esi,ebp + xor ebp,eax + pslld xmm0,2 + add ecx,edx + ror edx,7 + psrld xmm9,30 + xor esi,eax + mov edi,ecx + add ebx,DWORD[8+rsp] + por xmm0,xmm9 + xor edx,ebp + rol ecx,5 + pshufd xmm10,xmm7,238 + add ebx,esi + and edi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD[12+rsp] + xor edi,ebp + mov esi,ebx + rol ebx,5 + add eax,edi + xor esi,edx + ror ecx,7 + add eax,ebx + pxor xmm1,xmm5 + add ebp,DWORD[16+rsp] + xor esi,ecx + punpcklqdq xmm10,xmm0 + mov edi,eax + rol eax,5 + pxor xmm1,xmm2 + add ebp,esi + xor edi,ecx + movdqa xmm9,xmm8 + ror ebx,7 + paddd xmm8,xmm0 + add ebp,eax + pxor xmm1,xmm10 + add edx,DWORD[20+rsp] + xor edi,ebx + mov esi,ebp + rol ebp,5 + movdqa xmm10,xmm1 + add edx,edi + xor esi,ebx + movdqa XMMWORD[rsp],xmm8 + ror eax,7 + add edx,ebp + add ecx,DWORD[24+rsp] + pslld xmm1,2 + xor esi,eax + mov edi,edx + psrld xmm10,30 + rol edx,5 + add ecx,esi + xor edi,eax + ror ebp,7 + por xmm1,xmm10 + add ecx,edx + add ebx,DWORD[28+rsp] + pshufd xmm8,xmm0,238 + xor edi,ebp + mov esi,ecx + rol ecx,5 + add ebx,edi + xor esi,ebp + ror edx,7 + add ebx,ecx + pxor xmm2,xmm6 + add eax,DWORD[32+rsp] + xor esi,edx + punpcklqdq xmm8,xmm1 + mov edi,ebx + rol ebx,5 + pxor xmm2,xmm3 + add eax,esi + xor edi,edx + movdqa xmm10,XMMWORD[r14] + ror ecx,7 + paddd xmm9,xmm1 + add eax,ebx + pxor xmm2,xmm8 + add ebp,DWORD[36+rsp] + xor edi,ecx + mov esi,eax + rol eax,5 + movdqa xmm8,xmm2 + add ebp,edi + xor esi,ecx + movdqa XMMWORD[16+rsp],xmm9 + ror ebx,7 + add ebp,eax + add edx,DWORD[40+rsp] + pslld xmm2,2 + xor esi,ebx + mov edi,ebp + psrld xmm8,30 + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + por xmm2,xmm8 + add edx,ebp + add ecx,DWORD[44+rsp] + pshufd xmm9,xmm1,238 + xor edi,eax + mov esi,edx + rol edx,5 + add ecx,edi + xor esi,eax + ror ebp,7 + add ecx,edx + pxor xmm3,xmm7 + add ebx,DWORD[48+rsp] + xor esi,ebp + punpcklqdq xmm9,xmm2 + mov edi,ecx + rol ecx,5 + pxor xmm3,xmm4 + add ebx,esi + xor edi,ebp + movdqa xmm8,xmm10 + ror edx,7 + paddd xmm10,xmm2 + add ebx,ecx + pxor xmm3,xmm9 + add eax,DWORD[52+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + movdqa xmm9,xmm3 + add eax,edi + xor esi,edx + movdqa XMMWORD[32+rsp],xmm10 + ror ecx,7 + add eax,ebx + add ebp,DWORD[56+rsp] + pslld xmm3,2 + xor esi,ecx + mov edi,eax + psrld xmm9,30 + rol eax,5 + add ebp,esi + xor edi,ecx + ror ebx,7 + por xmm3,xmm9 + add ebp,eax + add edx,DWORD[60+rsp] + pshufd xmm10,xmm2,238 + xor edi,ebx + mov esi,ebp + rol ebp,5 + add edx,edi + xor esi,ebx + ror eax,7 + add edx,ebp + pxor xmm4,xmm0 + add ecx,DWORD[rsp] + xor esi,eax + punpcklqdq xmm10,xmm3 + mov edi,edx + rol edx,5 + pxor xmm4,xmm5 + add ecx,esi + xor edi,eax + movdqa xmm9,xmm8 + ror ebp,7 + paddd xmm8,xmm3 + add ecx,edx + pxor xmm4,xmm10 + add ebx,DWORD[4+rsp] + xor edi,ebp + mov esi,ecx + rol ecx,5 + movdqa xmm10,xmm4 + add ebx,edi + xor esi,ebp + movdqa XMMWORD[48+rsp],xmm8 + ror edx,7 + add ebx,ecx + add eax,DWORD[8+rsp] + pslld xmm4,2 + xor esi,edx + mov edi,ebx + psrld xmm10,30 + rol ebx,5 + add eax,esi + xor edi,edx + ror ecx,7 + por xmm4,xmm10 + add eax,ebx + add ebp,DWORD[12+rsp] + pshufd xmm8,xmm3,238 + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + pxor xmm5,xmm1 + add edx,DWORD[16+rsp] + xor esi,ebx + punpcklqdq xmm8,xmm4 + mov edi,ebp + rol ebp,5 + pxor xmm5,xmm6 + add edx,esi + xor edi,ebx + movdqa xmm10,xmm9 + ror eax,7 + paddd xmm9,xmm4 + add edx,ebp + pxor xmm5,xmm8 + add ecx,DWORD[20+rsp] + xor edi,eax + mov esi,edx + rol edx,5 + movdqa xmm8,xmm5 + add ecx,edi + xor esi,eax + movdqa XMMWORD[rsp],xmm9 + ror ebp,7 + add ecx,edx + add ebx,DWORD[24+rsp] + pslld xmm5,2 + xor esi,ebp + mov edi,ecx + psrld xmm8,30 + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + por xmm5,xmm8 + add ebx,ecx + add eax,DWORD[28+rsp] + pshufd xmm9,xmm4,238 + ror ecx,7 + mov esi,ebx + xor edi,edx + rol ebx,5 + add eax,edi + xor esi,ecx + xor ecx,edx + add eax,ebx + pxor xmm6,xmm2 + add ebp,DWORD[32+rsp] + and esi,ecx + xor ecx,edx + ror ebx,7 + punpcklqdq xmm9,xmm5 + mov edi,eax + xor esi,ecx + pxor xmm6,xmm7 + rol eax,5 + add ebp,esi + movdqa xmm8,xmm10 + xor edi,ebx + paddd xmm10,xmm5 + xor ebx,ecx + pxor xmm6,xmm9 + add ebp,eax + add edx,DWORD[36+rsp] + and edi,ebx + xor ebx,ecx + ror eax,7 + movdqa xmm9,xmm6 + mov esi,ebp + xor edi,ebx + movdqa XMMWORD[16+rsp],xmm10 + rol ebp,5 + add edx,edi + xor esi,eax + pslld xmm6,2 + xor eax,ebx + add edx,ebp + psrld xmm9,30 + add ecx,DWORD[40+rsp] + and esi,eax + xor eax,ebx + por xmm6,xmm9 + ror ebp,7 + mov edi,edx + xor esi,eax + rol edx,5 + pshufd xmm10,xmm5,238 + add ecx,esi + xor edi,ebp + xor ebp,eax + add ecx,edx + add ebx,DWORD[44+rsp] + and edi,ebp + xor ebp,eax + ror edx,7 + mov esi,ecx + xor edi,ebp + rol ecx,5 + add ebx,edi + xor esi,edx + xor edx,ebp + add ebx,ecx + pxor xmm7,xmm3 + add eax,DWORD[48+rsp] + and esi,edx + xor edx,ebp + ror ecx,7 + punpcklqdq xmm10,xmm6 + mov edi,ebx + xor esi,edx + pxor xmm7,xmm0 + rol ebx,5 + add eax,esi + movdqa xmm9,XMMWORD[32+r14] + xor edi,ecx + paddd xmm8,xmm6 + xor ecx,edx + pxor xmm7,xmm10 + add eax,ebx + add ebp,DWORD[52+rsp] + and edi,ecx + xor ecx,edx + ror ebx,7 + movdqa xmm10,xmm7 + mov esi,eax + xor edi,ecx + movdqa XMMWORD[32+rsp],xmm8 + rol eax,5 + add ebp,edi + xor esi,ebx + pslld xmm7,2 + xor ebx,ecx + add ebp,eax + psrld xmm10,30 + add edx,DWORD[56+rsp] + and esi,ebx + xor ebx,ecx + por xmm7,xmm10 + ror eax,7 + mov edi,ebp + xor esi,ebx + rol ebp,5 + pshufd xmm8,xmm6,238 + add edx,esi + xor edi,eax + xor eax,ebx + add edx,ebp + add ecx,DWORD[60+rsp] + and edi,eax + xor eax,ebx + ror ebp,7 + mov esi,edx + xor edi,eax + rol edx,5 + add ecx,edi + xor esi,ebp + xor ebp,eax + add ecx,edx + pxor xmm0,xmm4 + add ebx,DWORD[rsp] + and esi,ebp + xor ebp,eax + ror edx,7 + punpcklqdq xmm8,xmm7 + mov edi,ecx + xor esi,ebp + pxor xmm0,xmm1 + rol ecx,5 + add ebx,esi + movdqa xmm10,xmm9 + xor edi,edx + paddd xmm9,xmm7 + xor edx,ebp + pxor xmm0,xmm8 + add ebx,ecx + add eax,DWORD[4+rsp] + and edi,edx + xor edx,ebp + ror ecx,7 + movdqa xmm8,xmm0 + mov esi,ebx + xor edi,edx + movdqa XMMWORD[48+rsp],xmm9 + rol ebx,5 + add eax,edi + xor esi,ecx + pslld xmm0,2 + xor ecx,edx + add eax,ebx + psrld xmm8,30 + add ebp,DWORD[8+rsp] + and esi,ecx + xor ecx,edx + por xmm0,xmm8 + ror ebx,7 + mov edi,eax + xor esi,ecx + rol eax,5 + pshufd xmm9,xmm7,238 + add ebp,esi + xor edi,ebx + xor ebx,ecx + add ebp,eax + add edx,DWORD[12+rsp] + and edi,ebx + xor ebx,ecx + ror eax,7 + mov esi,ebp + xor edi,ebx + rol ebp,5 + add edx,edi + xor esi,eax + xor eax,ebx + add edx,ebp + pxor xmm1,xmm5 + add ecx,DWORD[16+rsp] + and esi,eax + xor eax,ebx + ror ebp,7 + punpcklqdq xmm9,xmm0 + mov edi,edx + xor esi,eax + pxor xmm1,xmm2 + rol edx,5 + add ecx,esi + movdqa xmm8,xmm10 + xor edi,ebp + paddd xmm10,xmm0 + xor ebp,eax + pxor xmm1,xmm9 + add ecx,edx + add ebx,DWORD[20+rsp] + and edi,ebp + xor ebp,eax + ror edx,7 + movdqa xmm9,xmm1 + mov esi,ecx + xor edi,ebp + movdqa XMMWORD[rsp],xmm10 + rol ecx,5 + add ebx,edi + xor esi,edx + pslld xmm1,2 + xor edx,ebp + add ebx,ecx + psrld xmm9,30 + add eax,DWORD[24+rsp] + and esi,edx + xor edx,ebp + por xmm1,xmm9 + ror ecx,7 + mov edi,ebx + xor esi,edx + rol ebx,5 + pshufd xmm10,xmm0,238 + add eax,esi + xor edi,ecx + xor ecx,edx + add eax,ebx + add ebp,DWORD[28+rsp] + and edi,ecx + xor ecx,edx + ror ebx,7 + mov esi,eax + xor edi,ecx + rol eax,5 + add ebp,edi + xor esi,ebx + xor ebx,ecx + add ebp,eax + pxor xmm2,xmm6 + add edx,DWORD[32+rsp] + and esi,ebx + xor ebx,ecx + ror eax,7 + punpcklqdq xmm10,xmm1 + mov edi,ebp + xor esi,ebx + pxor xmm2,xmm3 + rol ebp,5 + add edx,esi + movdqa xmm9,xmm8 + xor edi,eax + paddd xmm8,xmm1 + xor eax,ebx + pxor xmm2,xmm10 + add edx,ebp + add ecx,DWORD[36+rsp] + and edi,eax + xor eax,ebx + ror ebp,7 + movdqa xmm10,xmm2 + mov esi,edx + xor edi,eax + movdqa XMMWORD[16+rsp],xmm8 + rol edx,5 + add ecx,edi + xor esi,ebp + pslld xmm2,2 + xor ebp,eax + add ecx,edx + psrld xmm10,30 + add ebx,DWORD[40+rsp] + and esi,ebp + xor ebp,eax + por xmm2,xmm10 + ror edx,7 + mov edi,ecx + xor esi,ebp + rol ecx,5 + pshufd xmm8,xmm1,238 + add ebx,esi + xor edi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD[44+rsp] + and edi,edx + xor edx,ebp + ror ecx,7 + mov esi,ebx + xor edi,edx + rol ebx,5 + add eax,edi + xor esi,edx + add eax,ebx + pxor xmm3,xmm7 + add ebp,DWORD[48+rsp] + xor esi,ecx + punpcklqdq xmm8,xmm2 + mov edi,eax + rol eax,5 + pxor xmm3,xmm4 + add ebp,esi + xor edi,ecx + movdqa xmm10,xmm9 + ror ebx,7 + paddd xmm9,xmm2 + add ebp,eax + pxor xmm3,xmm8 + add edx,DWORD[52+rsp] + xor edi,ebx + mov esi,ebp + rol ebp,5 + movdqa xmm8,xmm3 + add edx,edi + xor esi,ebx + movdqa XMMWORD[32+rsp],xmm9 + ror eax,7 + add edx,ebp + add ecx,DWORD[56+rsp] + pslld xmm3,2 + xor esi,eax + mov edi,edx + psrld xmm8,30 + rol edx,5 + add ecx,esi + xor edi,eax + ror ebp,7 + por xmm3,xmm8 + add ecx,edx + add ebx,DWORD[60+rsp] + xor edi,ebp + mov esi,ecx + rol ecx,5 + add ebx,edi + xor esi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD[rsp] + xor esi,edx + mov edi,ebx + rol ebx,5 + paddd xmm10,xmm3 + add eax,esi + xor edi,edx + movdqa XMMWORD[48+rsp],xmm10 + ror ecx,7 + add eax,ebx + add ebp,DWORD[4+rsp] + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD[8+rsp] + xor esi,ebx + mov edi,ebp + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD[12+rsp] + xor edi,eax + mov esi,edx + rol edx,5 + add ecx,edi + xor esi,eax + ror ebp,7 + add ecx,edx + cmp r9,r10 + je NEAR $L$done_ssse3 + movdqa xmm6,XMMWORD[64+r14] + movdqa xmm9,XMMWORD[((-64))+r14] + movdqu xmm0,XMMWORD[r9] + movdqu xmm1,XMMWORD[16+r9] + movdqu xmm2,XMMWORD[32+r9] + movdqu xmm3,XMMWORD[48+r9] +DB 102,15,56,0,198 + add r9,64 + add ebx,DWORD[16+rsp] + xor esi,ebp + mov edi,ecx +DB 102,15,56,0,206 + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + paddd xmm0,xmm9 + add ebx,ecx + add eax,DWORD[20+rsp] + xor edi,edx + mov esi,ebx + movdqa XMMWORD[rsp],xmm0 + rol ebx,5 + add eax,edi + xor esi,edx + ror ecx,7 + psubd xmm0,xmm9 + add eax,ebx + add ebp,DWORD[24+rsp] + xor esi,ecx + mov edi,eax + rol eax,5 + add ebp,esi + xor edi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD[28+rsp] + xor edi,ebx + mov esi,ebp + rol ebp,5 + add edx,edi + xor esi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD[32+rsp] + xor esi,eax + mov edi,edx +DB 102,15,56,0,214 + rol edx,5 + add ecx,esi + xor edi,eax + ror ebp,7 + paddd xmm1,xmm9 + add ecx,edx + add ebx,DWORD[36+rsp] + xor edi,ebp + mov esi,ecx + movdqa XMMWORD[16+rsp],xmm1 + rol ecx,5 + add ebx,edi + xor esi,ebp + ror edx,7 + psubd xmm1,xmm9 + add ebx,ecx + add eax,DWORD[40+rsp] + xor esi,edx + mov edi,ebx + rol ebx,5 + add eax,esi + xor edi,edx + ror ecx,7 + add eax,ebx + add ebp,DWORD[44+rsp] + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD[48+rsp] + xor esi,ebx + mov edi,ebp +DB 102,15,56,0,222 + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + paddd xmm2,xmm9 + add edx,ebp + add ecx,DWORD[52+rsp] + xor edi,eax + mov esi,edx + movdqa XMMWORD[32+rsp],xmm2 + rol edx,5 + add ecx,edi + xor esi,eax + ror ebp,7 + psubd xmm2,xmm9 + add ecx,edx + add ebx,DWORD[56+rsp] + xor esi,ebp + mov edi,ecx + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD[60+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + add eax,edi + ror ecx,7 + add eax,ebx + add eax,DWORD[r8] + add esi,DWORD[4+r8] + add ecx,DWORD[8+r8] + add edx,DWORD[12+r8] + mov DWORD[r8],eax + add ebp,DWORD[16+r8] + mov DWORD[4+r8],esi + mov ebx,esi + mov DWORD[8+r8],ecx + mov edi,ecx + mov DWORD[12+r8],edx + xor edi,edx + mov DWORD[16+r8],ebp + and esi,edi + jmp NEAR $L$oop_ssse3 + +ALIGN 16 +$L$done_ssse3: + add ebx,DWORD[16+rsp] + xor esi,ebp + mov edi,ecx + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD[20+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + add eax,edi + xor esi,edx + ror ecx,7 + add eax,ebx + add ebp,DWORD[24+rsp] + xor esi,ecx + mov edi,eax + rol eax,5 + add ebp,esi + xor edi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD[28+rsp] + xor edi,ebx + mov esi,ebp + rol ebp,5 + add edx,edi + xor esi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD[32+rsp] + xor esi,eax + mov edi,edx + rol edx,5 + add ecx,esi + xor edi,eax + ror ebp,7 + add ecx,edx + add ebx,DWORD[36+rsp] + xor edi,ebp + mov esi,ecx + rol ecx,5 + add ebx,edi + xor esi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD[40+rsp] + xor esi,edx + mov edi,ebx + rol ebx,5 + add eax,esi + xor edi,edx + ror ecx,7 + add eax,ebx + add ebp,DWORD[44+rsp] + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD[48+rsp] + xor esi,ebx + mov edi,ebp + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD[52+rsp] + xor edi,eax + mov esi,edx + rol edx,5 + add ecx,edi + xor esi,eax + ror ebp,7 + add ecx,edx + add ebx,DWORD[56+rsp] + xor esi,ebp + mov edi,ecx + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD[60+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + add eax,edi + ror ecx,7 + add eax,ebx + add eax,DWORD[r8] + add esi,DWORD[4+r8] + add ecx,DWORD[8+r8] + mov DWORD[r8],eax + add edx,DWORD[12+r8] + mov DWORD[4+r8],esi + add ebp,DWORD[16+r8] + mov DWORD[8+r8],ecx + mov DWORD[12+r8],edx + mov DWORD[16+r8],ebp + movaps xmm6,XMMWORD[((-40-96))+r11] + movaps xmm7,XMMWORD[((-40-80))+r11] + movaps xmm8,XMMWORD[((-40-64))+r11] + movaps xmm9,XMMWORD[((-40-48))+r11] + movaps xmm10,XMMWORD[((-40-32))+r11] + movaps xmm11,XMMWORD[((-40-16))+r11] + mov r14,QWORD[((-40))+r11] + + mov r13,QWORD[((-32))+r11] + + mov r12,QWORD[((-24))+r11] + + mov rbp,QWORD[((-16))+r11] + + mov rbx,QWORD[((-8))+r11] + + lea rsp,[r11] + +$L$epilogue_ssse3: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_sha1_block_data_order_ssse3: + +ALIGN 16 +sha1_block_data_order_avx: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_block_data_order_avx: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +_avx_shortcut: + + mov r11,rsp + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + lea rsp,[((-160))+rsp] + vzeroupper + vmovaps XMMWORD[(-40-96)+r11],xmm6 + vmovaps XMMWORD[(-40-80)+r11],xmm7 + vmovaps XMMWORD[(-40-64)+r11],xmm8 + vmovaps XMMWORD[(-40-48)+r11],xmm9 + vmovaps XMMWORD[(-40-32)+r11],xmm10 + vmovaps XMMWORD[(-40-16)+r11],xmm11 +$L$prologue_avx: + and rsp,-64 + mov r8,rdi + mov r9,rsi + mov r10,rdx + + shl r10,6 + add r10,r9 + lea r14,[((K_XX_XX+64))] + + mov eax,DWORD[r8] + mov ebx,DWORD[4+r8] + mov ecx,DWORD[8+r8] + mov edx,DWORD[12+r8] + mov esi,ebx + mov ebp,DWORD[16+r8] + mov edi,ecx + xor edi,edx + and esi,edi + + vmovdqa xmm6,XMMWORD[64+r14] + vmovdqa xmm11,XMMWORD[((-64))+r14] + vmovdqu xmm0,XMMWORD[r9] + vmovdqu xmm1,XMMWORD[16+r9] + vmovdqu xmm2,XMMWORD[32+r9] + vmovdqu xmm3,XMMWORD[48+r9] + vpshufb xmm0,xmm0,xmm6 + add r9,64 + vpshufb xmm1,xmm1,xmm6 + vpshufb xmm2,xmm2,xmm6 + vpshufb xmm3,xmm3,xmm6 + vpaddd xmm4,xmm0,xmm11 + vpaddd xmm5,xmm1,xmm11 + vpaddd xmm6,xmm2,xmm11 + vmovdqa XMMWORD[rsp],xmm4 + vmovdqa XMMWORD[16+rsp],xmm5 + vmovdqa XMMWORD[32+rsp],xmm6 + jmp NEAR $L$oop_avx +ALIGN 16 +$L$oop_avx: + shrd ebx,ebx,2 + xor esi,edx + vpalignr xmm4,xmm1,xmm0,8 + mov edi,eax + add ebp,DWORD[rsp] + vpaddd xmm9,xmm11,xmm3 + xor ebx,ecx + shld eax,eax,5 + vpsrldq xmm8,xmm3,4 + add ebp,esi + and edi,ebx + vpxor xmm4,xmm4,xmm0 + xor ebx,ecx + add ebp,eax + vpxor xmm8,xmm8,xmm2 + shrd eax,eax,7 + xor edi,ecx + mov esi,ebp + add edx,DWORD[4+rsp] + vpxor xmm4,xmm4,xmm8 + xor eax,ebx + shld ebp,ebp,5 + vmovdqa XMMWORD[48+rsp],xmm9 + add edx,edi + and esi,eax + vpsrld xmm8,xmm4,31 + xor eax,ebx + add edx,ebp + shrd ebp,ebp,7 + xor esi,ebx + vpslldq xmm10,xmm4,12 + vpaddd xmm4,xmm4,xmm4 + mov edi,edx + add ecx,DWORD[8+rsp] + xor ebp,eax + shld edx,edx,5 + vpsrld xmm9,xmm10,30 + vpor xmm4,xmm4,xmm8 + add ecx,esi + and edi,ebp + xor ebp,eax + add ecx,edx + vpslld xmm10,xmm10,2 + vpxor xmm4,xmm4,xmm9 + shrd edx,edx,7 + xor edi,eax + mov esi,ecx + add ebx,DWORD[12+rsp] + vpxor xmm4,xmm4,xmm10 + xor edx,ebp + shld ecx,ecx,5 + add ebx,edi + and esi,edx + xor edx,ebp + add ebx,ecx + shrd ecx,ecx,7 + xor esi,ebp + vpalignr xmm5,xmm2,xmm1,8 + mov edi,ebx + add eax,DWORD[16+rsp] + vpaddd xmm9,xmm11,xmm4 + xor ecx,edx + shld ebx,ebx,5 + vpsrldq xmm8,xmm4,4 + add eax,esi + and edi,ecx + vpxor xmm5,xmm5,xmm1 + xor ecx,edx + add eax,ebx + vpxor xmm8,xmm8,xmm3 + shrd ebx,ebx,7 + xor edi,edx + mov esi,eax + add ebp,DWORD[20+rsp] + vpxor xmm5,xmm5,xmm8 + xor ebx,ecx + shld eax,eax,5 + vmovdqa XMMWORD[rsp],xmm9 + add ebp,edi + and esi,ebx + vpsrld xmm8,xmm5,31 + xor ebx,ecx + add ebp,eax + shrd eax,eax,7 + xor esi,ecx + vpslldq xmm10,xmm5,12 + vpaddd xmm5,xmm5,xmm5 + mov edi,ebp + add edx,DWORD[24+rsp] + xor eax,ebx + shld ebp,ebp,5 + vpsrld xmm9,xmm10,30 + vpor xmm5,xmm5,xmm8 + add edx,esi + and edi,eax + xor eax,ebx + add edx,ebp + vpslld xmm10,xmm10,2 + vpxor xmm5,xmm5,xmm9 + shrd ebp,ebp,7 + xor edi,ebx + mov esi,edx + add ecx,DWORD[28+rsp] + vpxor xmm5,xmm5,xmm10 + xor ebp,eax + shld edx,edx,5 + vmovdqa xmm11,XMMWORD[((-32))+r14] + add ecx,edi + and esi,ebp + xor ebp,eax + add ecx,edx + shrd edx,edx,7 + xor esi,eax + vpalignr xmm6,xmm3,xmm2,8 + mov edi,ecx + add ebx,DWORD[32+rsp] + vpaddd xmm9,xmm11,xmm5 + xor edx,ebp + shld ecx,ecx,5 + vpsrldq xmm8,xmm5,4 + add ebx,esi + and edi,edx + vpxor xmm6,xmm6,xmm2 + xor edx,ebp + add ebx,ecx + vpxor xmm8,xmm8,xmm4 + shrd ecx,ecx,7 + xor edi,ebp + mov esi,ebx + add eax,DWORD[36+rsp] + vpxor xmm6,xmm6,xmm8 + xor ecx,edx + shld ebx,ebx,5 + vmovdqa XMMWORD[16+rsp],xmm9 + add eax,edi + and esi,ecx + vpsrld xmm8,xmm6,31 + xor ecx,edx + add eax,ebx + shrd ebx,ebx,7 + xor esi,edx + vpslldq xmm10,xmm6,12 + vpaddd xmm6,xmm6,xmm6 + mov edi,eax + add ebp,DWORD[40+rsp] + xor ebx,ecx + shld eax,eax,5 + vpsrld xmm9,xmm10,30 + vpor xmm6,xmm6,xmm8 + add ebp,esi + and edi,ebx + xor ebx,ecx + add ebp,eax + vpslld xmm10,xmm10,2 + vpxor xmm6,xmm6,xmm9 + shrd eax,eax,7 + xor edi,ecx + mov esi,ebp + add edx,DWORD[44+rsp] + vpxor xmm6,xmm6,xmm10 + xor eax,ebx + shld ebp,ebp,5 + add edx,edi + and esi,eax + xor eax,ebx + add edx,ebp + shrd ebp,ebp,7 + xor esi,ebx + vpalignr xmm7,xmm4,xmm3,8 + mov edi,edx + add ecx,DWORD[48+rsp] + vpaddd xmm9,xmm11,xmm6 + xor ebp,eax + shld edx,edx,5 + vpsrldq xmm8,xmm6,4 + add ecx,esi + and edi,ebp + vpxor xmm7,xmm7,xmm3 + xor ebp,eax + add ecx,edx + vpxor xmm8,xmm8,xmm5 + shrd edx,edx,7 + xor edi,eax + mov esi,ecx + add ebx,DWORD[52+rsp] + vpxor xmm7,xmm7,xmm8 + xor edx,ebp + shld ecx,ecx,5 + vmovdqa XMMWORD[32+rsp],xmm9 + add ebx,edi + and esi,edx + vpsrld xmm8,xmm7,31 + xor edx,ebp + add ebx,ecx + shrd ecx,ecx,7 + xor esi,ebp + vpslldq xmm10,xmm7,12 + vpaddd xmm7,xmm7,xmm7 + mov edi,ebx + add eax,DWORD[56+rsp] + xor ecx,edx + shld ebx,ebx,5 + vpsrld xmm9,xmm10,30 + vpor xmm7,xmm7,xmm8 + add eax,esi + and edi,ecx + xor ecx,edx + add eax,ebx + vpslld xmm10,xmm10,2 + vpxor xmm7,xmm7,xmm9 + shrd ebx,ebx,7 + xor edi,edx + mov esi,eax + add ebp,DWORD[60+rsp] + vpxor xmm7,xmm7,xmm10 + xor ebx,ecx + shld eax,eax,5 + add ebp,edi + and esi,ebx + xor ebx,ecx + add ebp,eax + vpalignr xmm8,xmm7,xmm6,8 + vpxor xmm0,xmm0,xmm4 + shrd eax,eax,7 + xor esi,ecx + mov edi,ebp + add edx,DWORD[rsp] + vpxor xmm0,xmm0,xmm1 + xor eax,ebx + shld ebp,ebp,5 + vpaddd xmm9,xmm11,xmm7 + add edx,esi + and edi,eax + vpxor xmm0,xmm0,xmm8 + xor eax,ebx + add edx,ebp + shrd ebp,ebp,7 + xor edi,ebx + vpsrld xmm8,xmm0,30 + vmovdqa XMMWORD[48+rsp],xmm9 + mov esi,edx + add ecx,DWORD[4+rsp] + xor ebp,eax + shld edx,edx,5 + vpslld xmm0,xmm0,2 + add ecx,edi + and esi,ebp + xor ebp,eax + add ecx,edx + shrd edx,edx,7 + xor esi,eax + mov edi,ecx + add ebx,DWORD[8+rsp] + vpor xmm0,xmm0,xmm8 + xor edx,ebp + shld ecx,ecx,5 + add ebx,esi + and edi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD[12+rsp] + xor edi,ebp + mov esi,ebx + shld ebx,ebx,5 + add eax,edi + xor esi,edx + shrd ecx,ecx,7 + add eax,ebx + vpalignr xmm8,xmm0,xmm7,8 + vpxor xmm1,xmm1,xmm5 + add ebp,DWORD[16+rsp] + xor esi,ecx + mov edi,eax + shld eax,eax,5 + vpxor xmm1,xmm1,xmm2 + add ebp,esi + xor edi,ecx + vpaddd xmm9,xmm11,xmm0 + shrd ebx,ebx,7 + add ebp,eax + vpxor xmm1,xmm1,xmm8 + add edx,DWORD[20+rsp] + xor edi,ebx + mov esi,ebp + shld ebp,ebp,5 + vpsrld xmm8,xmm1,30 + vmovdqa XMMWORD[rsp],xmm9 + add edx,edi + xor esi,ebx + shrd eax,eax,7 + add edx,ebp + vpslld xmm1,xmm1,2 + add ecx,DWORD[24+rsp] + xor esi,eax + mov edi,edx + shld edx,edx,5 + add ecx,esi + xor edi,eax + shrd ebp,ebp,7 + add ecx,edx + vpor xmm1,xmm1,xmm8 + add ebx,DWORD[28+rsp] + xor edi,ebp + mov esi,ecx + shld ecx,ecx,5 + add ebx,edi + xor esi,ebp + shrd edx,edx,7 + add ebx,ecx + vpalignr xmm8,xmm1,xmm0,8 + vpxor xmm2,xmm2,xmm6 + add eax,DWORD[32+rsp] + xor esi,edx + mov edi,ebx + shld ebx,ebx,5 + vpxor xmm2,xmm2,xmm3 + add eax,esi + xor edi,edx + vpaddd xmm9,xmm11,xmm1 + vmovdqa xmm11,XMMWORD[r14] + shrd ecx,ecx,7 + add eax,ebx + vpxor xmm2,xmm2,xmm8 + add ebp,DWORD[36+rsp] + xor edi,ecx + mov esi,eax + shld eax,eax,5 + vpsrld xmm8,xmm2,30 + vmovdqa XMMWORD[16+rsp],xmm9 + add ebp,edi + xor esi,ecx + shrd ebx,ebx,7 + add ebp,eax + vpslld xmm2,xmm2,2 + add edx,DWORD[40+rsp] + xor esi,ebx + mov edi,ebp + shld ebp,ebp,5 + add edx,esi + xor edi,ebx + shrd eax,eax,7 + add edx,ebp + vpor xmm2,xmm2,xmm8 + add ecx,DWORD[44+rsp] + xor edi,eax + mov esi,edx + shld edx,edx,5 + add ecx,edi + xor esi,eax + shrd ebp,ebp,7 + add ecx,edx + vpalignr xmm8,xmm2,xmm1,8 + vpxor xmm3,xmm3,xmm7 + add ebx,DWORD[48+rsp] + xor esi,ebp + mov edi,ecx + shld ecx,ecx,5 + vpxor xmm3,xmm3,xmm4 + add ebx,esi + xor edi,ebp + vpaddd xmm9,xmm11,xmm2 + shrd edx,edx,7 + add ebx,ecx + vpxor xmm3,xmm3,xmm8 + add eax,DWORD[52+rsp] + xor edi,edx + mov esi,ebx + shld ebx,ebx,5 + vpsrld xmm8,xmm3,30 + vmovdqa XMMWORD[32+rsp],xmm9 + add eax,edi + xor esi,edx + shrd ecx,ecx,7 + add eax,ebx + vpslld xmm3,xmm3,2 + add ebp,DWORD[56+rsp] + xor esi,ecx + mov edi,eax + shld eax,eax,5 + add ebp,esi + xor edi,ecx + shrd ebx,ebx,7 + add ebp,eax + vpor xmm3,xmm3,xmm8 + add edx,DWORD[60+rsp] + xor edi,ebx + mov esi,ebp + shld ebp,ebp,5 + add edx,edi + xor esi,ebx + shrd eax,eax,7 + add edx,ebp + vpalignr xmm8,xmm3,xmm2,8 + vpxor xmm4,xmm4,xmm0 + add ecx,DWORD[rsp] + xor esi,eax + mov edi,edx + shld edx,edx,5 + vpxor xmm4,xmm4,xmm5 + add ecx,esi + xor edi,eax + vpaddd xmm9,xmm11,xmm3 + shrd ebp,ebp,7 + add ecx,edx + vpxor xmm4,xmm4,xmm8 + add ebx,DWORD[4+rsp] + xor edi,ebp + mov esi,ecx + shld ecx,ecx,5 + vpsrld xmm8,xmm4,30 + vmovdqa XMMWORD[48+rsp],xmm9 + add ebx,edi + xor esi,ebp + shrd edx,edx,7 + add ebx,ecx + vpslld xmm4,xmm4,2 + add eax,DWORD[8+rsp] + xor esi,edx + mov edi,ebx + shld ebx,ebx,5 + add eax,esi + xor edi,edx + shrd ecx,ecx,7 + add eax,ebx + vpor xmm4,xmm4,xmm8 + add ebp,DWORD[12+rsp] + xor edi,ecx + mov esi,eax + shld eax,eax,5 + add ebp,edi + xor esi,ecx + shrd ebx,ebx,7 + add ebp,eax + vpalignr xmm8,xmm4,xmm3,8 + vpxor xmm5,xmm5,xmm1 + add edx,DWORD[16+rsp] + xor esi,ebx + mov edi,ebp + shld ebp,ebp,5 + vpxor xmm5,xmm5,xmm6 + add edx,esi + xor edi,ebx + vpaddd xmm9,xmm11,xmm4 + shrd eax,eax,7 + add edx,ebp + vpxor xmm5,xmm5,xmm8 + add ecx,DWORD[20+rsp] + xor edi,eax + mov esi,edx + shld edx,edx,5 + vpsrld xmm8,xmm5,30 + vmovdqa XMMWORD[rsp],xmm9 + add ecx,edi + xor esi,eax + shrd ebp,ebp,7 + add ecx,edx + vpslld xmm5,xmm5,2 + add ebx,DWORD[24+rsp] + xor esi,ebp + mov edi,ecx + shld ecx,ecx,5 + add ebx,esi + xor edi,ebp + shrd edx,edx,7 + add ebx,ecx + vpor xmm5,xmm5,xmm8 + add eax,DWORD[28+rsp] + shrd ecx,ecx,7 + mov esi,ebx + xor edi,edx + shld ebx,ebx,5 + add eax,edi + xor esi,ecx + xor ecx,edx + add eax,ebx + vpalignr xmm8,xmm5,xmm4,8 + vpxor xmm6,xmm6,xmm2 + add ebp,DWORD[32+rsp] + and esi,ecx + xor ecx,edx + shrd ebx,ebx,7 + vpxor xmm6,xmm6,xmm7 + mov edi,eax + xor esi,ecx + vpaddd xmm9,xmm11,xmm5 + shld eax,eax,5 + add ebp,esi + vpxor xmm6,xmm6,xmm8 + xor edi,ebx + xor ebx,ecx + add ebp,eax + add edx,DWORD[36+rsp] + vpsrld xmm8,xmm6,30 + vmovdqa XMMWORD[16+rsp],xmm9 + and edi,ebx + xor ebx,ecx + shrd eax,eax,7 + mov esi,ebp + vpslld xmm6,xmm6,2 + xor edi,ebx + shld ebp,ebp,5 + add edx,edi + xor esi,eax + xor eax,ebx + add edx,ebp + add ecx,DWORD[40+rsp] + and esi,eax + vpor xmm6,xmm6,xmm8 + xor eax,ebx + shrd ebp,ebp,7 + mov edi,edx + xor esi,eax + shld edx,edx,5 + add ecx,esi + xor edi,ebp + xor ebp,eax + add ecx,edx + add ebx,DWORD[44+rsp] + and edi,ebp + xor ebp,eax + shrd edx,edx,7 + mov esi,ecx + xor edi,ebp + shld ecx,ecx,5 + add ebx,edi + xor esi,edx + xor edx,ebp + add ebx,ecx + vpalignr xmm8,xmm6,xmm5,8 + vpxor xmm7,xmm7,xmm3 + add eax,DWORD[48+rsp] + and esi,edx + xor edx,ebp + shrd ecx,ecx,7 + vpxor xmm7,xmm7,xmm0 + mov edi,ebx + xor esi,edx + vpaddd xmm9,xmm11,xmm6 + vmovdqa xmm11,XMMWORD[32+r14] + shld ebx,ebx,5 + add eax,esi + vpxor xmm7,xmm7,xmm8 + xor edi,ecx + xor ecx,edx + add eax,ebx + add ebp,DWORD[52+rsp] + vpsrld xmm8,xmm7,30 + vmovdqa XMMWORD[32+rsp],xmm9 + and edi,ecx + xor ecx,edx + shrd ebx,ebx,7 + mov esi,eax + vpslld xmm7,xmm7,2 + xor edi,ecx + shld eax,eax,5 + add ebp,edi + xor esi,ebx + xor ebx,ecx + add ebp,eax + add edx,DWORD[56+rsp] + and esi,ebx + vpor xmm7,xmm7,xmm8 + xor ebx,ecx + shrd eax,eax,7 + mov edi,ebp + xor esi,ebx + shld ebp,ebp,5 + add edx,esi + xor edi,eax + xor eax,ebx + add edx,ebp + add ecx,DWORD[60+rsp] + and edi,eax + xor eax,ebx + shrd ebp,ebp,7 + mov esi,edx + xor edi,eax + shld edx,edx,5 + add ecx,edi + xor esi,ebp + xor ebp,eax + add ecx,edx + vpalignr xmm8,xmm7,xmm6,8 + vpxor xmm0,xmm0,xmm4 + add ebx,DWORD[rsp] + and esi,ebp + xor ebp,eax + shrd edx,edx,7 + vpxor xmm0,xmm0,xmm1 + mov edi,ecx + xor esi,ebp + vpaddd xmm9,xmm11,xmm7 + shld ecx,ecx,5 + add ebx,esi + vpxor xmm0,xmm0,xmm8 + xor edi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD[4+rsp] + vpsrld xmm8,xmm0,30 + vmovdqa XMMWORD[48+rsp],xmm9 + and edi,edx + xor edx,ebp + shrd ecx,ecx,7 + mov esi,ebx + vpslld xmm0,xmm0,2 + xor edi,edx + shld ebx,ebx,5 + add eax,edi + xor esi,ecx + xor ecx,edx + add eax,ebx + add ebp,DWORD[8+rsp] + and esi,ecx + vpor xmm0,xmm0,xmm8 + xor ecx,edx + shrd ebx,ebx,7 + mov edi,eax + xor esi,ecx + shld eax,eax,5 + add ebp,esi + xor edi,ebx + xor ebx,ecx + add ebp,eax + add edx,DWORD[12+rsp] + and edi,ebx + xor ebx,ecx + shrd eax,eax,7 + mov esi,ebp + xor edi,ebx + shld ebp,ebp,5 + add edx,edi + xor esi,eax + xor eax,ebx + add edx,ebp + vpalignr xmm8,xmm0,xmm7,8 + vpxor xmm1,xmm1,xmm5 + add ecx,DWORD[16+rsp] + and esi,eax + xor eax,ebx + shrd ebp,ebp,7 + vpxor xmm1,xmm1,xmm2 + mov edi,edx + xor esi,eax + vpaddd xmm9,xmm11,xmm0 + shld edx,edx,5 + add ecx,esi + vpxor xmm1,xmm1,xmm8 + xor edi,ebp + xor ebp,eax + add ecx,edx + add ebx,DWORD[20+rsp] + vpsrld xmm8,xmm1,30 + vmovdqa XMMWORD[rsp],xmm9 + and edi,ebp + xor ebp,eax + shrd edx,edx,7 + mov esi,ecx + vpslld xmm1,xmm1,2 + xor edi,ebp + shld ecx,ecx,5 + add ebx,edi + xor esi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD[24+rsp] + and esi,edx + vpor xmm1,xmm1,xmm8 + xor edx,ebp + shrd ecx,ecx,7 + mov edi,ebx + xor esi,edx + shld ebx,ebx,5 + add eax,esi + xor edi,ecx + xor ecx,edx + add eax,ebx + add ebp,DWORD[28+rsp] + and edi,ecx + xor ecx,edx + shrd ebx,ebx,7 + mov esi,eax + xor edi,ecx + shld eax,eax,5 + add ebp,edi + xor esi,ebx + xor ebx,ecx + add ebp,eax + vpalignr xmm8,xmm1,xmm0,8 + vpxor xmm2,xmm2,xmm6 + add edx,DWORD[32+rsp] + and esi,ebx + xor ebx,ecx + shrd eax,eax,7 + vpxor xmm2,xmm2,xmm3 + mov edi,ebp + xor esi,ebx + vpaddd xmm9,xmm11,xmm1 + shld ebp,ebp,5 + add edx,esi + vpxor xmm2,xmm2,xmm8 + xor edi,eax + xor eax,ebx + add edx,ebp + add ecx,DWORD[36+rsp] + vpsrld xmm8,xmm2,30 + vmovdqa XMMWORD[16+rsp],xmm9 + and edi,eax + xor eax,ebx + shrd ebp,ebp,7 + mov esi,edx + vpslld xmm2,xmm2,2 + xor edi,eax + shld edx,edx,5 + add ecx,edi + xor esi,ebp + xor ebp,eax + add ecx,edx + add ebx,DWORD[40+rsp] + and esi,ebp + vpor xmm2,xmm2,xmm8 + xor ebp,eax + shrd edx,edx,7 + mov edi,ecx + xor esi,ebp + shld ecx,ecx,5 + add ebx,esi + xor edi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD[44+rsp] + and edi,edx + xor edx,ebp + shrd ecx,ecx,7 + mov esi,ebx + xor edi,edx + shld ebx,ebx,5 + add eax,edi + xor esi,edx + add eax,ebx + vpalignr xmm8,xmm2,xmm1,8 + vpxor xmm3,xmm3,xmm7 + add ebp,DWORD[48+rsp] + xor esi,ecx + mov edi,eax + shld eax,eax,5 + vpxor xmm3,xmm3,xmm4 + add ebp,esi + xor edi,ecx + vpaddd xmm9,xmm11,xmm2 + shrd ebx,ebx,7 + add ebp,eax + vpxor xmm3,xmm3,xmm8 + add edx,DWORD[52+rsp] + xor edi,ebx + mov esi,ebp + shld ebp,ebp,5 + vpsrld xmm8,xmm3,30 + vmovdqa XMMWORD[32+rsp],xmm9 + add edx,edi + xor esi,ebx + shrd eax,eax,7 + add edx,ebp + vpslld xmm3,xmm3,2 + add ecx,DWORD[56+rsp] + xor esi,eax + mov edi,edx + shld edx,edx,5 + add ecx,esi + xor edi,eax + shrd ebp,ebp,7 + add ecx,edx + vpor xmm3,xmm3,xmm8 + add ebx,DWORD[60+rsp] + xor edi,ebp + mov esi,ecx + shld ecx,ecx,5 + add ebx,edi + xor esi,ebp + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD[rsp] + vpaddd xmm9,xmm11,xmm3 + xor esi,edx + mov edi,ebx + shld ebx,ebx,5 + add eax,esi + vmovdqa XMMWORD[48+rsp],xmm9 + xor edi,edx + shrd ecx,ecx,7 + add eax,ebx + add ebp,DWORD[4+rsp] + xor edi,ecx + mov esi,eax + shld eax,eax,5 + add ebp,edi + xor esi,ecx + shrd ebx,ebx,7 + add ebp,eax + add edx,DWORD[8+rsp] + xor esi,ebx + mov edi,ebp + shld ebp,ebp,5 + add edx,esi + xor edi,ebx + shrd eax,eax,7 + add edx,ebp + add ecx,DWORD[12+rsp] + xor edi,eax + mov esi,edx + shld edx,edx,5 + add ecx,edi + xor esi,eax + shrd ebp,ebp,7 + add ecx,edx + cmp r9,r10 + je NEAR $L$done_avx + vmovdqa xmm6,XMMWORD[64+r14] + vmovdqa xmm11,XMMWORD[((-64))+r14] + vmovdqu xmm0,XMMWORD[r9] + vmovdqu xmm1,XMMWORD[16+r9] + vmovdqu xmm2,XMMWORD[32+r9] + vmovdqu xmm3,XMMWORD[48+r9] + vpshufb xmm0,xmm0,xmm6 + add r9,64 + add ebx,DWORD[16+rsp] + xor esi,ebp + vpshufb xmm1,xmm1,xmm6 + mov edi,ecx + shld ecx,ecx,5 + vpaddd xmm4,xmm0,xmm11 + add ebx,esi + xor edi,ebp + shrd edx,edx,7 + add ebx,ecx + vmovdqa XMMWORD[rsp],xmm4 + add eax,DWORD[20+rsp] + xor edi,edx + mov esi,ebx + shld ebx,ebx,5 + add eax,edi + xor esi,edx + shrd ecx,ecx,7 + add eax,ebx + add ebp,DWORD[24+rsp] + xor esi,ecx + mov edi,eax + shld eax,eax,5 + add ebp,esi + xor edi,ecx + shrd ebx,ebx,7 + add ebp,eax + add edx,DWORD[28+rsp] + xor edi,ebx + mov esi,ebp + shld ebp,ebp,5 + add edx,edi + xor esi,ebx + shrd eax,eax,7 + add edx,ebp + add ecx,DWORD[32+rsp] + xor esi,eax + vpshufb xmm2,xmm2,xmm6 + mov edi,edx + shld edx,edx,5 + vpaddd xmm5,xmm1,xmm11 + add ecx,esi + xor edi,eax + shrd ebp,ebp,7 + add ecx,edx + vmovdqa XMMWORD[16+rsp],xmm5 + add ebx,DWORD[36+rsp] + xor edi,ebp + mov esi,ecx + shld ecx,ecx,5 + add ebx,edi + xor esi,ebp + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD[40+rsp] + xor esi,edx + mov edi,ebx + shld ebx,ebx,5 + add eax,esi + xor edi,edx + shrd ecx,ecx,7 + add eax,ebx + add ebp,DWORD[44+rsp] + xor edi,ecx + mov esi,eax + shld eax,eax,5 + add ebp,edi + xor esi,ecx + shrd ebx,ebx,7 + add ebp,eax + add edx,DWORD[48+rsp] + xor esi,ebx + vpshufb xmm3,xmm3,xmm6 + mov edi,ebp + shld ebp,ebp,5 + vpaddd xmm6,xmm2,xmm11 + add edx,esi + xor edi,ebx + shrd eax,eax,7 + add edx,ebp + vmovdqa XMMWORD[32+rsp],xmm6 + add ecx,DWORD[52+rsp] + xor edi,eax + mov esi,edx + shld edx,edx,5 + add ecx,edi + xor esi,eax + shrd ebp,ebp,7 + add ecx,edx + add ebx,DWORD[56+rsp] + xor esi,ebp + mov edi,ecx + shld ecx,ecx,5 + add ebx,esi + xor edi,ebp + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD[60+rsp] + xor edi,edx + mov esi,ebx + shld ebx,ebx,5 + add eax,edi + shrd ecx,ecx,7 + add eax,ebx + add eax,DWORD[r8] + add esi,DWORD[4+r8] + add ecx,DWORD[8+r8] + add edx,DWORD[12+r8] + mov DWORD[r8],eax + add ebp,DWORD[16+r8] + mov DWORD[4+r8],esi + mov ebx,esi + mov DWORD[8+r8],ecx + mov edi,ecx + mov DWORD[12+r8],edx + xor edi,edx + mov DWORD[16+r8],ebp + and esi,edi + jmp NEAR $L$oop_avx + +ALIGN 16 +$L$done_avx: + add ebx,DWORD[16+rsp] + xor esi,ebp + mov edi,ecx + shld ecx,ecx,5 + add ebx,esi + xor edi,ebp + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD[20+rsp] + xor edi,edx + mov esi,ebx + shld ebx,ebx,5 + add eax,edi + xor esi,edx + shrd ecx,ecx,7 + add eax,ebx + add ebp,DWORD[24+rsp] + xor esi,ecx + mov edi,eax + shld eax,eax,5 + add ebp,esi + xor edi,ecx + shrd ebx,ebx,7 + add ebp,eax + add edx,DWORD[28+rsp] + xor edi,ebx + mov esi,ebp + shld ebp,ebp,5 + add edx,edi + xor esi,ebx + shrd eax,eax,7 + add edx,ebp + add ecx,DWORD[32+rsp] + xor esi,eax + mov edi,edx + shld edx,edx,5 + add ecx,esi + xor edi,eax + shrd ebp,ebp,7 + add ecx,edx + add ebx,DWORD[36+rsp] + xor edi,ebp + mov esi,ecx + shld ecx,ecx,5 + add ebx,edi + xor esi,ebp + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD[40+rsp] + xor esi,edx + mov edi,ebx + shld ebx,ebx,5 + add eax,esi + xor edi,edx + shrd ecx,ecx,7 + add eax,ebx + add ebp,DWORD[44+rsp] + xor edi,ecx + mov esi,eax + shld eax,eax,5 + add ebp,edi + xor esi,ecx + shrd ebx,ebx,7 + add ebp,eax + add edx,DWORD[48+rsp] + xor esi,ebx + mov edi,ebp + shld ebp,ebp,5 + add edx,esi + xor edi,ebx + shrd eax,eax,7 + add edx,ebp + add ecx,DWORD[52+rsp] + xor edi,eax + mov esi,edx + shld edx,edx,5 + add ecx,edi + xor esi,eax + shrd ebp,ebp,7 + add ecx,edx + add ebx,DWORD[56+rsp] + xor esi,ebp + mov edi,ecx + shld ecx,ecx,5 + add ebx,esi + xor edi,ebp + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD[60+rsp] + xor edi,edx + mov esi,ebx + shld ebx,ebx,5 + add eax,edi + shrd ecx,ecx,7 + add eax,ebx + vzeroupper + + add eax,DWORD[r8] + add esi,DWORD[4+r8] + add ecx,DWORD[8+r8] + mov DWORD[r8],eax + add edx,DWORD[12+r8] + mov DWORD[4+r8],esi + add ebp,DWORD[16+r8] + mov DWORD[8+r8],ecx + mov DWORD[12+r8],edx + mov DWORD[16+r8],ebp + movaps xmm6,XMMWORD[((-40-96))+r11] + movaps xmm7,XMMWORD[((-40-80))+r11] + movaps xmm8,XMMWORD[((-40-64))+r11] + movaps xmm9,XMMWORD[((-40-48))+r11] + movaps xmm10,XMMWORD[((-40-32))+r11] + movaps xmm11,XMMWORD[((-40-16))+r11] + mov r14,QWORD[((-40))+r11] + + mov r13,QWORD[((-32))+r11] + + mov r12,QWORD[((-24))+r11] + + mov rbp,QWORD[((-16))+r11] + + mov rbx,QWORD[((-8))+r11] + + lea rsp,[r11] + +$L$epilogue_avx: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_sha1_block_data_order_avx: + +ALIGN 16 +sha1_block_data_order_avx2: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_block_data_order_avx2: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +_avx2_shortcut: + + mov r11,rsp + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + vzeroupper + lea rsp,[((-96))+rsp] + vmovaps XMMWORD[(-40-96)+r11],xmm6 + vmovaps XMMWORD[(-40-80)+r11],xmm7 + vmovaps XMMWORD[(-40-64)+r11],xmm8 + vmovaps XMMWORD[(-40-48)+r11],xmm9 + vmovaps XMMWORD[(-40-32)+r11],xmm10 + vmovaps XMMWORD[(-40-16)+r11],xmm11 +$L$prologue_avx2: + mov r8,rdi + mov r9,rsi + mov r10,rdx + + lea rsp,[((-640))+rsp] + shl r10,6 + lea r13,[64+r9] + and rsp,-128 + add r10,r9 + lea r14,[((K_XX_XX+64))] + + mov eax,DWORD[r8] + cmp r13,r10 + cmovae r13,r9 + mov ebp,DWORD[4+r8] + mov ecx,DWORD[8+r8] + mov edx,DWORD[12+r8] + mov esi,DWORD[16+r8] + vmovdqu ymm6,YMMWORD[64+r14] + + vmovdqu xmm0,XMMWORD[r9] + vmovdqu xmm1,XMMWORD[16+r9] + vmovdqu xmm2,XMMWORD[32+r9] + vmovdqu xmm3,XMMWORD[48+r9] + lea r9,[64+r9] + vinserti128 ymm0,ymm0,XMMWORD[r13],1 + vinserti128 ymm1,ymm1,XMMWORD[16+r13],1 + vpshufb ymm0,ymm0,ymm6 + vinserti128 ymm2,ymm2,XMMWORD[32+r13],1 + vpshufb ymm1,ymm1,ymm6 + vinserti128 ymm3,ymm3,XMMWORD[48+r13],1 + vpshufb ymm2,ymm2,ymm6 + vmovdqu ymm11,YMMWORD[((-64))+r14] + vpshufb ymm3,ymm3,ymm6 + + vpaddd ymm4,ymm0,ymm11 + vpaddd ymm5,ymm1,ymm11 + vmovdqu YMMWORD[rsp],ymm4 + vpaddd ymm6,ymm2,ymm11 + vmovdqu YMMWORD[32+rsp],ymm5 + vpaddd ymm7,ymm3,ymm11 + vmovdqu YMMWORD[64+rsp],ymm6 + vmovdqu YMMWORD[96+rsp],ymm7 + vpalignr ymm4,ymm1,ymm0,8 + vpsrldq ymm8,ymm3,4 + vpxor ymm4,ymm4,ymm0 + vpxor ymm8,ymm8,ymm2 + vpxor ymm4,ymm4,ymm8 + vpsrld ymm8,ymm4,31 + vpslldq ymm10,ymm4,12 + vpaddd ymm4,ymm4,ymm4 + vpsrld ymm9,ymm10,30 + vpor ymm4,ymm4,ymm8 + vpslld ymm10,ymm10,2 + vpxor ymm4,ymm4,ymm9 + vpxor ymm4,ymm4,ymm10 + vpaddd ymm9,ymm4,ymm11 + vmovdqu YMMWORD[128+rsp],ymm9 + vpalignr ymm5,ymm2,ymm1,8 + vpsrldq ymm8,ymm4,4 + vpxor ymm5,ymm5,ymm1 + vpxor ymm8,ymm8,ymm3 + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm5,31 + vmovdqu ymm11,YMMWORD[((-32))+r14] + vpslldq ymm10,ymm5,12 + vpaddd ymm5,ymm5,ymm5 + vpsrld ymm9,ymm10,30 + vpor ymm5,ymm5,ymm8 + vpslld ymm10,ymm10,2 + vpxor ymm5,ymm5,ymm9 + vpxor ymm5,ymm5,ymm10 + vpaddd ymm9,ymm5,ymm11 + vmovdqu YMMWORD[160+rsp],ymm9 + vpalignr ymm6,ymm3,ymm2,8 + vpsrldq ymm8,ymm5,4 + vpxor ymm6,ymm6,ymm2 + vpxor ymm8,ymm8,ymm4 + vpxor ymm6,ymm6,ymm8 + vpsrld ymm8,ymm6,31 + vpslldq ymm10,ymm6,12 + vpaddd ymm6,ymm6,ymm6 + vpsrld ymm9,ymm10,30 + vpor ymm6,ymm6,ymm8 + vpslld ymm10,ymm10,2 + vpxor ymm6,ymm6,ymm9 + vpxor ymm6,ymm6,ymm10 + vpaddd ymm9,ymm6,ymm11 + vmovdqu YMMWORD[192+rsp],ymm9 + vpalignr ymm7,ymm4,ymm3,8 + vpsrldq ymm8,ymm6,4 + vpxor ymm7,ymm7,ymm3 + vpxor ymm8,ymm8,ymm5 + vpxor ymm7,ymm7,ymm8 + vpsrld ymm8,ymm7,31 + vpslldq ymm10,ymm7,12 + vpaddd ymm7,ymm7,ymm7 + vpsrld ymm9,ymm10,30 + vpor ymm7,ymm7,ymm8 + vpslld ymm10,ymm10,2 + vpxor ymm7,ymm7,ymm9 + vpxor ymm7,ymm7,ymm10 + vpaddd ymm9,ymm7,ymm11 + vmovdqu YMMWORD[224+rsp],ymm9 + lea r13,[128+rsp] + jmp NEAR $L$oop_avx2 +ALIGN 32 +$L$oop_avx2: + rorx ebx,ebp,2 + andn edi,ebp,edx + and ebp,ecx + xor ebp,edi + jmp NEAR $L$align32_1 +ALIGN 32 +$L$align32_1: + vpalignr ymm8,ymm7,ymm6,8 + vpxor ymm0,ymm0,ymm4 + add esi,DWORD[((-128))+r13] + andn edi,eax,ecx + vpxor ymm0,ymm0,ymm1 + add esi,ebp + rorx r12d,eax,27 + rorx ebp,eax,2 + vpxor ymm0,ymm0,ymm8 + and eax,ebx + add esi,r12d + xor eax,edi + vpsrld ymm8,ymm0,30 + vpslld ymm0,ymm0,2 + add edx,DWORD[((-124))+r13] + andn edi,esi,ebx + add edx,eax + rorx r12d,esi,27 + rorx eax,esi,2 + and esi,ebp + vpor ymm0,ymm0,ymm8 + add edx,r12d + xor esi,edi + add ecx,DWORD[((-120))+r13] + andn edi,edx,ebp + vpaddd ymm9,ymm0,ymm11 + add ecx,esi + rorx r12d,edx,27 + rorx esi,edx,2 + and edx,eax + vmovdqu YMMWORD[256+rsp],ymm9 + add ecx,r12d + xor edx,edi + add ebx,DWORD[((-116))+r13] + andn edi,ecx,eax + add ebx,edx + rorx r12d,ecx,27 + rorx edx,ecx,2 + and ecx,esi + add ebx,r12d + xor ecx,edi + add ebp,DWORD[((-96))+r13] + andn edi,ebx,esi + add ebp,ecx + rorx r12d,ebx,27 + rorx ecx,ebx,2 + and ebx,edx + add ebp,r12d + xor ebx,edi + vpalignr ymm8,ymm0,ymm7,8 + vpxor ymm1,ymm1,ymm5 + add eax,DWORD[((-92))+r13] + andn edi,ebp,edx + vpxor ymm1,ymm1,ymm2 + add eax,ebx + rorx r12d,ebp,27 + rorx ebx,ebp,2 + vpxor ymm1,ymm1,ymm8 + and ebp,ecx + add eax,r12d + xor ebp,edi + vpsrld ymm8,ymm1,30 + vpslld ymm1,ymm1,2 + add esi,DWORD[((-88))+r13] + andn edi,eax,ecx + add esi,ebp + rorx r12d,eax,27 + rorx ebp,eax,2 + and eax,ebx + vpor ymm1,ymm1,ymm8 + add esi,r12d + xor eax,edi + add edx,DWORD[((-84))+r13] + andn edi,esi,ebx + vpaddd ymm9,ymm1,ymm11 + add edx,eax + rorx r12d,esi,27 + rorx eax,esi,2 + and esi,ebp + vmovdqu YMMWORD[288+rsp],ymm9 + add edx,r12d + xor esi,edi + add ecx,DWORD[((-64))+r13] + andn edi,edx,ebp + add ecx,esi + rorx r12d,edx,27 + rorx esi,edx,2 + and edx,eax + add ecx,r12d + xor edx,edi + add ebx,DWORD[((-60))+r13] + andn edi,ecx,eax + add ebx,edx + rorx r12d,ecx,27 + rorx edx,ecx,2 + and ecx,esi + add ebx,r12d + xor ecx,edi + vpalignr ymm8,ymm1,ymm0,8 + vpxor ymm2,ymm2,ymm6 + add ebp,DWORD[((-56))+r13] + andn edi,ebx,esi + vpxor ymm2,ymm2,ymm3 + vmovdqu ymm11,YMMWORD[r14] + add ebp,ecx + rorx r12d,ebx,27 + rorx ecx,ebx,2 + vpxor ymm2,ymm2,ymm8 + and ebx,edx + add ebp,r12d + xor ebx,edi + vpsrld ymm8,ymm2,30 + vpslld ymm2,ymm2,2 + add eax,DWORD[((-52))+r13] + andn edi,ebp,edx + add eax,ebx + rorx r12d,ebp,27 + rorx ebx,ebp,2 + and ebp,ecx + vpor ymm2,ymm2,ymm8 + add eax,r12d + xor ebp,edi + add esi,DWORD[((-32))+r13] + andn edi,eax,ecx + vpaddd ymm9,ymm2,ymm11 + add esi,ebp + rorx r12d,eax,27 + rorx ebp,eax,2 + and eax,ebx + vmovdqu YMMWORD[320+rsp],ymm9 + add esi,r12d + xor eax,edi + add edx,DWORD[((-28))+r13] + andn edi,esi,ebx + add edx,eax + rorx r12d,esi,27 + rorx eax,esi,2 + and esi,ebp + add edx,r12d + xor esi,edi + add ecx,DWORD[((-24))+r13] + andn edi,edx,ebp + add ecx,esi + rorx r12d,edx,27 + rorx esi,edx,2 + and edx,eax + add ecx,r12d + xor edx,edi + vpalignr ymm8,ymm2,ymm1,8 + vpxor ymm3,ymm3,ymm7 + add ebx,DWORD[((-20))+r13] + andn edi,ecx,eax + vpxor ymm3,ymm3,ymm4 + add ebx,edx + rorx r12d,ecx,27 + rorx edx,ecx,2 + vpxor ymm3,ymm3,ymm8 + and ecx,esi + add ebx,r12d + xor ecx,edi + vpsrld ymm8,ymm3,30 + vpslld ymm3,ymm3,2 + add ebp,DWORD[r13] + andn edi,ebx,esi + add ebp,ecx + rorx r12d,ebx,27 + rorx ecx,ebx,2 + and ebx,edx + vpor ymm3,ymm3,ymm8 + add ebp,r12d + xor ebx,edi + add eax,DWORD[4+r13] + andn edi,ebp,edx + vpaddd ymm9,ymm3,ymm11 + add eax,ebx + rorx r12d,ebp,27 + rorx ebx,ebp,2 + and ebp,ecx + vmovdqu YMMWORD[352+rsp],ymm9 + add eax,r12d + xor ebp,edi + add esi,DWORD[8+r13] + andn edi,eax,ecx + add esi,ebp + rorx r12d,eax,27 + rorx ebp,eax,2 + and eax,ebx + add esi,r12d + xor eax,edi + add edx,DWORD[12+r13] + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + xor esi,ebx + vpalignr ymm8,ymm3,ymm2,8 + vpxor ymm4,ymm4,ymm0 + add ecx,DWORD[32+r13] + lea ecx,[rsi*1+rcx] + vpxor ymm4,ymm4,ymm5 + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + vpxor ymm4,ymm4,ymm8 + add ecx,r12d + xor edx,ebp + add ebx,DWORD[36+r13] + vpsrld ymm8,ymm4,30 + vpslld ymm4,ymm4,2 + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + vpor ymm4,ymm4,ymm8 + add ebp,DWORD[40+r13] + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + vpaddd ymm9,ymm4,ymm11 + xor ebx,edx + add ebp,r12d + xor ebx,esi + add eax,DWORD[44+r13] + vmovdqu YMMWORD[384+rsp],ymm9 + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + add esi,DWORD[64+r13] + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + vpalignr ymm8,ymm4,ymm3,8 + vpxor ymm5,ymm5,ymm1 + add edx,DWORD[68+r13] + lea edx,[rax*1+rdx] + vpxor ymm5,ymm5,ymm6 + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + vpxor ymm5,ymm5,ymm8 + add edx,r12d + xor esi,ebx + add ecx,DWORD[72+r13] + vpsrld ymm8,ymm5,30 + vpslld ymm5,ymm5,2 + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + vpor ymm5,ymm5,ymm8 + add ebx,DWORD[76+r13] + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + vpaddd ymm9,ymm5,ymm11 + xor ecx,esi + add ebx,r12d + xor ecx,eax + add ebp,DWORD[96+r13] + vmovdqu YMMWORD[416+rsp],ymm9 + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + add eax,DWORD[100+r13] + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + vpalignr ymm8,ymm5,ymm4,8 + vpxor ymm6,ymm6,ymm2 + add esi,DWORD[104+r13] + lea esi,[rbp*1+rsi] + vpxor ymm6,ymm6,ymm7 + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + vpxor ymm6,ymm6,ymm8 + add esi,r12d + xor eax,ecx + add edx,DWORD[108+r13] + lea r13,[256+r13] + vpsrld ymm8,ymm6,30 + vpslld ymm6,ymm6,2 + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + xor esi,ebx + vpor ymm6,ymm6,ymm8 + add ecx,DWORD[((-128))+r13] + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + vpaddd ymm9,ymm6,ymm11 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD[((-124))+r13] + vmovdqu YMMWORD[448+rsp],ymm9 + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + add ebp,DWORD[((-120))+r13] + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + vpalignr ymm8,ymm6,ymm5,8 + vpxor ymm7,ymm7,ymm3 + add eax,DWORD[((-116))+r13] + lea eax,[rbx*1+rax] + vpxor ymm7,ymm7,ymm0 + vmovdqu ymm11,YMMWORD[32+r14] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + vpxor ymm7,ymm7,ymm8 + add eax,r12d + xor ebp,edx + add esi,DWORD[((-96))+r13] + vpsrld ymm8,ymm7,30 + vpslld ymm7,ymm7,2 + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + vpor ymm7,ymm7,ymm8 + add edx,DWORD[((-92))+r13] + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + vpaddd ymm9,ymm7,ymm11 + xor esi,ebp + add edx,r12d + xor esi,ebx + add ecx,DWORD[((-88))+r13] + vmovdqu YMMWORD[480+rsp],ymm9 + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD[((-84))+r13] + mov edi,esi + xor edi,eax + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + and ecx,edi + jmp NEAR $L$align32_2 +ALIGN 32 +$L$align32_2: + vpalignr ymm8,ymm7,ymm6,8 + vpxor ymm0,ymm0,ymm4 + add ebp,DWORD[((-64))+r13] + xor ecx,esi + vpxor ymm0,ymm0,ymm1 + mov edi,edx + xor edi,esi + lea ebp,[rbp*1+rcx] + vpxor ymm0,ymm0,ymm8 + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + vpsrld ymm8,ymm0,30 + vpslld ymm0,ymm0,2 + add ebp,r12d + and ebx,edi + add eax,DWORD[((-60))+r13] + xor ebx,edx + mov edi,ecx + xor edi,edx + vpor ymm0,ymm0,ymm8 + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + vpaddd ymm9,ymm0,ymm11 + add eax,r12d + and ebp,edi + add esi,DWORD[((-56))+r13] + xor ebp,ecx + vmovdqu YMMWORD[512+rsp],ymm9 + mov edi,ebx + xor edi,ecx + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + and eax,edi + add edx,DWORD[((-52))+r13] + xor eax,ebx + mov edi,ebp + xor edi,ebx + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + and esi,edi + add ecx,DWORD[((-32))+r13] + xor esi,ebp + mov edi,eax + xor edi,ebp + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + and edx,edi + vpalignr ymm8,ymm0,ymm7,8 + vpxor ymm1,ymm1,ymm5 + add ebx,DWORD[((-28))+r13] + xor edx,eax + vpxor ymm1,ymm1,ymm2 + mov edi,esi + xor edi,eax + lea ebx,[rdx*1+rbx] + vpxor ymm1,ymm1,ymm8 + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + vpsrld ymm8,ymm1,30 + vpslld ymm1,ymm1,2 + add ebx,r12d + and ecx,edi + add ebp,DWORD[((-24))+r13] + xor ecx,esi + mov edi,edx + xor edi,esi + vpor ymm1,ymm1,ymm8 + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + vpaddd ymm9,ymm1,ymm11 + add ebp,r12d + and ebx,edi + add eax,DWORD[((-20))+r13] + xor ebx,edx + vmovdqu YMMWORD[544+rsp],ymm9 + mov edi,ecx + xor edi,edx + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + and ebp,edi + add esi,DWORD[r13] + xor ebp,ecx + mov edi,ebx + xor edi,ecx + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + and eax,edi + add edx,DWORD[4+r13] + xor eax,ebx + mov edi,ebp + xor edi,ebx + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + and esi,edi + vpalignr ymm8,ymm1,ymm0,8 + vpxor ymm2,ymm2,ymm6 + add ecx,DWORD[8+r13] + xor esi,ebp + vpxor ymm2,ymm2,ymm3 + mov edi,eax + xor edi,ebp + lea ecx,[rsi*1+rcx] + vpxor ymm2,ymm2,ymm8 + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + vpsrld ymm8,ymm2,30 + vpslld ymm2,ymm2,2 + add ecx,r12d + and edx,edi + add ebx,DWORD[12+r13] + xor edx,eax + mov edi,esi + xor edi,eax + vpor ymm2,ymm2,ymm8 + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + vpaddd ymm9,ymm2,ymm11 + add ebx,r12d + and ecx,edi + add ebp,DWORD[32+r13] + xor ecx,esi + vmovdqu YMMWORD[576+rsp],ymm9 + mov edi,edx + xor edi,esi + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + and ebx,edi + add eax,DWORD[36+r13] + xor ebx,edx + mov edi,ecx + xor edi,edx + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + and ebp,edi + add esi,DWORD[40+r13] + xor ebp,ecx + mov edi,ebx + xor edi,ecx + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + and eax,edi + vpalignr ymm8,ymm2,ymm1,8 + vpxor ymm3,ymm3,ymm7 + add edx,DWORD[44+r13] + xor eax,ebx + vpxor ymm3,ymm3,ymm4 + mov edi,ebp + xor edi,ebx + lea edx,[rax*1+rdx] + vpxor ymm3,ymm3,ymm8 + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + vpsrld ymm8,ymm3,30 + vpslld ymm3,ymm3,2 + add edx,r12d + and esi,edi + add ecx,DWORD[64+r13] + xor esi,ebp + mov edi,eax + xor edi,ebp + vpor ymm3,ymm3,ymm8 + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + vpaddd ymm9,ymm3,ymm11 + add ecx,r12d + and edx,edi + add ebx,DWORD[68+r13] + xor edx,eax + vmovdqu YMMWORD[608+rsp],ymm9 + mov edi,esi + xor edi,eax + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + and ecx,edi + add ebp,DWORD[72+r13] + xor ecx,esi + mov edi,edx + xor edi,esi + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + and ebx,edi + add eax,DWORD[76+r13] + xor ebx,edx + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + add esi,DWORD[96+r13] + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + add edx,DWORD[100+r13] + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + xor esi,ebx + add ecx,DWORD[104+r13] + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD[108+r13] + lea r13,[256+r13] + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + add ebp,DWORD[((-128))+r13] + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + add eax,DWORD[((-124))+r13] + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + add esi,DWORD[((-120))+r13] + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + add edx,DWORD[((-116))+r13] + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + xor esi,ebx + add ecx,DWORD[((-96))+r13] + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD[((-92))+r13] + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + add ebp,DWORD[((-88))+r13] + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + add eax,DWORD[((-84))+r13] + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + add esi,DWORD[((-64))+r13] + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + add edx,DWORD[((-60))+r13] + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + xor esi,ebx + add ecx,DWORD[((-56))+r13] + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD[((-52))+r13] + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + add ebp,DWORD[((-32))+r13] + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + add eax,DWORD[((-28))+r13] + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + add esi,DWORD[((-24))+r13] + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + add edx,DWORD[((-20))+r13] + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + add edx,r12d + lea r13,[128+r9] + lea rdi,[128+r9] + cmp r13,r10 + cmovae r13,r9 + + + add edx,DWORD[r8] + add esi,DWORD[4+r8] + add ebp,DWORD[8+r8] + mov DWORD[r8],edx + add ebx,DWORD[12+r8] + mov DWORD[4+r8],esi + mov eax,edx + add ecx,DWORD[16+r8] + mov r12d,ebp + mov DWORD[8+r8],ebp + mov edx,ebx + + mov DWORD[12+r8],ebx + mov ebp,esi + mov DWORD[16+r8],ecx + + mov esi,ecx + mov ecx,r12d + + + cmp r9,r10 + je NEAR $L$done_avx2 + vmovdqu ymm6,YMMWORD[64+r14] + cmp rdi,r10 + ja NEAR $L$ast_avx2 + + vmovdqu xmm0,XMMWORD[((-64))+rdi] + vmovdqu xmm1,XMMWORD[((-48))+rdi] + vmovdqu xmm2,XMMWORD[((-32))+rdi] + vmovdqu xmm3,XMMWORD[((-16))+rdi] + vinserti128 ymm0,ymm0,XMMWORD[r13],1 + vinserti128 ymm1,ymm1,XMMWORD[16+r13],1 + vinserti128 ymm2,ymm2,XMMWORD[32+r13],1 + vinserti128 ymm3,ymm3,XMMWORD[48+r13],1 + jmp NEAR $L$ast_avx2 + +ALIGN 32 +$L$ast_avx2: + lea r13,[((128+16))+rsp] + rorx ebx,ebp,2 + andn edi,ebp,edx + and ebp,ecx + xor ebp,edi + sub r9,-128 + add esi,DWORD[((-128))+r13] + andn edi,eax,ecx + add esi,ebp + rorx r12d,eax,27 + rorx ebp,eax,2 + and eax,ebx + add esi,r12d + xor eax,edi + add edx,DWORD[((-124))+r13] + andn edi,esi,ebx + add edx,eax + rorx r12d,esi,27 + rorx eax,esi,2 + and esi,ebp + add edx,r12d + xor esi,edi + add ecx,DWORD[((-120))+r13] + andn edi,edx,ebp + add ecx,esi + rorx r12d,edx,27 + rorx esi,edx,2 + and edx,eax + add ecx,r12d + xor edx,edi + add ebx,DWORD[((-116))+r13] + andn edi,ecx,eax + add ebx,edx + rorx r12d,ecx,27 + rorx edx,ecx,2 + and ecx,esi + add ebx,r12d + xor ecx,edi + add ebp,DWORD[((-96))+r13] + andn edi,ebx,esi + add ebp,ecx + rorx r12d,ebx,27 + rorx ecx,ebx,2 + and ebx,edx + add ebp,r12d + xor ebx,edi + add eax,DWORD[((-92))+r13] + andn edi,ebp,edx + add eax,ebx + rorx r12d,ebp,27 + rorx ebx,ebp,2 + and ebp,ecx + add eax,r12d + xor ebp,edi + add esi,DWORD[((-88))+r13] + andn edi,eax,ecx + add esi,ebp + rorx r12d,eax,27 + rorx ebp,eax,2 + and eax,ebx + add esi,r12d + xor eax,edi + add edx,DWORD[((-84))+r13] + andn edi,esi,ebx + add edx,eax + rorx r12d,esi,27 + rorx eax,esi,2 + and esi,ebp + add edx,r12d + xor esi,edi + add ecx,DWORD[((-64))+r13] + andn edi,edx,ebp + add ecx,esi + rorx r12d,edx,27 + rorx esi,edx,2 + and edx,eax + add ecx,r12d + xor edx,edi + add ebx,DWORD[((-60))+r13] + andn edi,ecx,eax + add ebx,edx + rorx r12d,ecx,27 + rorx edx,ecx,2 + and ecx,esi + add ebx,r12d + xor ecx,edi + add ebp,DWORD[((-56))+r13] + andn edi,ebx,esi + add ebp,ecx + rorx r12d,ebx,27 + rorx ecx,ebx,2 + and ebx,edx + add ebp,r12d + xor ebx,edi + add eax,DWORD[((-52))+r13] + andn edi,ebp,edx + add eax,ebx + rorx r12d,ebp,27 + rorx ebx,ebp,2 + and ebp,ecx + add eax,r12d + xor ebp,edi + add esi,DWORD[((-32))+r13] + andn edi,eax,ecx + add esi,ebp + rorx r12d,eax,27 + rorx ebp,eax,2 + and eax,ebx + add esi,r12d + xor eax,edi + add edx,DWORD[((-28))+r13] + andn edi,esi,ebx + add edx,eax + rorx r12d,esi,27 + rorx eax,esi,2 + and esi,ebp + add edx,r12d + xor esi,edi + add ecx,DWORD[((-24))+r13] + andn edi,edx,ebp + add ecx,esi + rorx r12d,edx,27 + rorx esi,edx,2 + and edx,eax + add ecx,r12d + xor edx,edi + add ebx,DWORD[((-20))+r13] + andn edi,ecx,eax + add ebx,edx + rorx r12d,ecx,27 + rorx edx,ecx,2 + and ecx,esi + add ebx,r12d + xor ecx,edi + add ebp,DWORD[r13] + andn edi,ebx,esi + add ebp,ecx + rorx r12d,ebx,27 + rorx ecx,ebx,2 + and ebx,edx + add ebp,r12d + xor ebx,edi + add eax,DWORD[4+r13] + andn edi,ebp,edx + add eax,ebx + rorx r12d,ebp,27 + rorx ebx,ebp,2 + and ebp,ecx + add eax,r12d + xor ebp,edi + add esi,DWORD[8+r13] + andn edi,eax,ecx + add esi,ebp + rorx r12d,eax,27 + rorx ebp,eax,2 + and eax,ebx + add esi,r12d + xor eax,edi + add edx,DWORD[12+r13] + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + xor esi,ebx + add ecx,DWORD[32+r13] + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD[36+r13] + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + add ebp,DWORD[40+r13] + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + add eax,DWORD[44+r13] + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + add esi,DWORD[64+r13] + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + vmovdqu ymm11,YMMWORD[((-64))+r14] + vpshufb ymm0,ymm0,ymm6 + add edx,DWORD[68+r13] + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + xor esi,ebx + add ecx,DWORD[72+r13] + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD[76+r13] + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + add ebp,DWORD[96+r13] + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + add eax,DWORD[100+r13] + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + vpshufb ymm1,ymm1,ymm6 + vpaddd ymm8,ymm0,ymm11 + add esi,DWORD[104+r13] + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + add edx,DWORD[108+r13] + lea r13,[256+r13] + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + xor esi,ebx + add ecx,DWORD[((-128))+r13] + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD[((-124))+r13] + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + add ebp,DWORD[((-120))+r13] + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + vmovdqu YMMWORD[rsp],ymm8 + vpshufb ymm2,ymm2,ymm6 + vpaddd ymm9,ymm1,ymm11 + add eax,DWORD[((-116))+r13] + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + add esi,DWORD[((-96))+r13] + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + add edx,DWORD[((-92))+r13] + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + xor esi,ebx + add ecx,DWORD[((-88))+r13] + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD[((-84))+r13] + mov edi,esi + xor edi,eax + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + and ecx,edi + vmovdqu YMMWORD[32+rsp],ymm9 + vpshufb ymm3,ymm3,ymm6 + vpaddd ymm6,ymm2,ymm11 + add ebp,DWORD[((-64))+r13] + xor ecx,esi + mov edi,edx + xor edi,esi + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + and ebx,edi + add eax,DWORD[((-60))+r13] + xor ebx,edx + mov edi,ecx + xor edi,edx + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + and ebp,edi + add esi,DWORD[((-56))+r13] + xor ebp,ecx + mov edi,ebx + xor edi,ecx + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + and eax,edi + add edx,DWORD[((-52))+r13] + xor eax,ebx + mov edi,ebp + xor edi,ebx + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + and esi,edi + add ecx,DWORD[((-32))+r13] + xor esi,ebp + mov edi,eax + xor edi,ebp + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + and edx,edi + jmp NEAR $L$align32_3 +ALIGN 32 +$L$align32_3: + vmovdqu YMMWORD[64+rsp],ymm6 + vpaddd ymm7,ymm3,ymm11 + add ebx,DWORD[((-28))+r13] + xor edx,eax + mov edi,esi + xor edi,eax + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + and ecx,edi + add ebp,DWORD[((-24))+r13] + xor ecx,esi + mov edi,edx + xor edi,esi + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + and ebx,edi + add eax,DWORD[((-20))+r13] + xor ebx,edx + mov edi,ecx + xor edi,edx + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + and ebp,edi + add esi,DWORD[r13] + xor ebp,ecx + mov edi,ebx + xor edi,ecx + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + and eax,edi + add edx,DWORD[4+r13] + xor eax,ebx + mov edi,ebp + xor edi,ebx + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + and esi,edi + vmovdqu YMMWORD[96+rsp],ymm7 + add ecx,DWORD[8+r13] + xor esi,ebp + mov edi,eax + xor edi,ebp + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + and edx,edi + add ebx,DWORD[12+r13] + xor edx,eax + mov edi,esi + xor edi,eax + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + and ecx,edi + add ebp,DWORD[32+r13] + xor ecx,esi + mov edi,edx + xor edi,esi + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + and ebx,edi + add eax,DWORD[36+r13] + xor ebx,edx + mov edi,ecx + xor edi,edx + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + and ebp,edi + add esi,DWORD[40+r13] + xor ebp,ecx + mov edi,ebx + xor edi,ecx + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + and eax,edi + vpalignr ymm4,ymm1,ymm0,8 + add edx,DWORD[44+r13] + xor eax,ebx + mov edi,ebp + xor edi,ebx + vpsrldq ymm8,ymm3,4 + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + vpxor ymm4,ymm4,ymm0 + vpxor ymm8,ymm8,ymm2 + xor esi,ebp + add edx,r12d + vpxor ymm4,ymm4,ymm8 + and esi,edi + add ecx,DWORD[64+r13] + xor esi,ebp + mov edi,eax + vpsrld ymm8,ymm4,31 + xor edi,ebp + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + vpslldq ymm10,ymm4,12 + vpaddd ymm4,ymm4,ymm4 + rorx esi,edx,2 + xor edx,eax + vpsrld ymm9,ymm10,30 + vpor ymm4,ymm4,ymm8 + add ecx,r12d + and edx,edi + vpslld ymm10,ymm10,2 + vpxor ymm4,ymm4,ymm9 + add ebx,DWORD[68+r13] + xor edx,eax + vpxor ymm4,ymm4,ymm10 + mov edi,esi + xor edi,eax + lea ebx,[rdx*1+rbx] + vpaddd ymm9,ymm4,ymm11 + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + vmovdqu YMMWORD[128+rsp],ymm9 + add ebx,r12d + and ecx,edi + add ebp,DWORD[72+r13] + xor ecx,esi + mov edi,edx + xor edi,esi + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + and ebx,edi + add eax,DWORD[76+r13] + xor ebx,edx + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + vpalignr ymm5,ymm2,ymm1,8 + add esi,DWORD[96+r13] + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + vpsrldq ymm8,ymm4,4 + xor eax,ebx + add esi,r12d + xor eax,ecx + vpxor ymm5,ymm5,ymm1 + vpxor ymm8,ymm8,ymm3 + add edx,DWORD[100+r13] + lea edx,[rax*1+rdx] + vpxor ymm5,ymm5,ymm8 + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + vpsrld ymm8,ymm5,31 + vmovdqu ymm11,YMMWORD[((-32))+r14] + xor esi,ebx + add ecx,DWORD[104+r13] + lea ecx,[rsi*1+rcx] + vpslldq ymm10,ymm5,12 + vpaddd ymm5,ymm5,ymm5 + rorx r12d,edx,27 + rorx esi,edx,2 + vpsrld ymm9,ymm10,30 + vpor ymm5,ymm5,ymm8 + xor edx,eax + add ecx,r12d + vpslld ymm10,ymm10,2 + vpxor ymm5,ymm5,ymm9 + xor edx,ebp + add ebx,DWORD[108+r13] + lea r13,[256+r13] + vpxor ymm5,ymm5,ymm10 + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + vpaddd ymm9,ymm5,ymm11 + xor ecx,esi + add ebx,r12d + xor ecx,eax + vmovdqu YMMWORD[160+rsp],ymm9 + add ebp,DWORD[((-128))+r13] + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + vpalignr ymm6,ymm3,ymm2,8 + add eax,DWORD[((-124))+r13] + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + vpsrldq ymm8,ymm5,4 + xor ebp,ecx + add eax,r12d + xor ebp,edx + vpxor ymm6,ymm6,ymm2 + vpxor ymm8,ymm8,ymm4 + add esi,DWORD[((-120))+r13] + lea esi,[rbp*1+rsi] + vpxor ymm6,ymm6,ymm8 + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + vpsrld ymm8,ymm6,31 + xor eax,ecx + add edx,DWORD[((-116))+r13] + lea edx,[rax*1+rdx] + vpslldq ymm10,ymm6,12 + vpaddd ymm6,ymm6,ymm6 + rorx r12d,esi,27 + rorx eax,esi,2 + vpsrld ymm9,ymm10,30 + vpor ymm6,ymm6,ymm8 + xor esi,ebp + add edx,r12d + vpslld ymm10,ymm10,2 + vpxor ymm6,ymm6,ymm9 + xor esi,ebx + add ecx,DWORD[((-96))+r13] + vpxor ymm6,ymm6,ymm10 + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + vpaddd ymm9,ymm6,ymm11 + xor edx,eax + add ecx,r12d + xor edx,ebp + vmovdqu YMMWORD[192+rsp],ymm9 + add ebx,DWORD[((-92))+r13] + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + vpalignr ymm7,ymm4,ymm3,8 + add ebp,DWORD[((-88))+r13] + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + vpsrldq ymm8,ymm6,4 + xor ebx,edx + add ebp,r12d + xor ebx,esi + vpxor ymm7,ymm7,ymm3 + vpxor ymm8,ymm8,ymm5 + add eax,DWORD[((-84))+r13] + lea eax,[rbx*1+rax] + vpxor ymm7,ymm7,ymm8 + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + vpsrld ymm8,ymm7,31 + xor ebp,edx + add esi,DWORD[((-64))+r13] + lea esi,[rbp*1+rsi] + vpslldq ymm10,ymm7,12 + vpaddd ymm7,ymm7,ymm7 + rorx r12d,eax,27 + rorx ebp,eax,2 + vpsrld ymm9,ymm10,30 + vpor ymm7,ymm7,ymm8 + xor eax,ebx + add esi,r12d + vpslld ymm10,ymm10,2 + vpxor ymm7,ymm7,ymm9 + xor eax,ecx + add edx,DWORD[((-60))+r13] + vpxor ymm7,ymm7,ymm10 + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + vpaddd ymm9,ymm7,ymm11 + xor esi,ebp + add edx,r12d + xor esi,ebx + vmovdqu YMMWORD[224+rsp],ymm9 + add ecx,DWORD[((-56))+r13] + lea ecx,[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD[((-52))+r13] + lea ebx,[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + add ebp,DWORD[((-32))+r13] + lea ebp,[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + add eax,DWORD[((-28))+r13] + lea eax,[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + add esi,DWORD[((-24))+r13] + lea esi,[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + add edx,DWORD[((-20))+r13] + lea edx,[rax*1+rdx] + rorx r12d,esi,27 + add edx,r12d + lea r13,[128+rsp] + + + add edx,DWORD[r8] + add esi,DWORD[4+r8] + add ebp,DWORD[8+r8] + mov DWORD[r8],edx + add ebx,DWORD[12+r8] + mov DWORD[4+r8],esi + mov eax,edx + add ecx,DWORD[16+r8] + mov r12d,ebp + mov DWORD[8+r8],ebp + mov edx,ebx + + mov DWORD[12+r8],ebx + mov ebp,esi + mov DWORD[16+r8],ecx + + mov esi,ecx + mov ecx,r12d + + + cmp r9,r10 + jbe NEAR $L$oop_avx2 + +$L$done_avx2: + vzeroupper + movaps xmm6,XMMWORD[((-40-96))+r11] + movaps xmm7,XMMWORD[((-40-80))+r11] + movaps xmm8,XMMWORD[((-40-64))+r11] + movaps xmm9,XMMWORD[((-40-48))+r11] + movaps xmm10,XMMWORD[((-40-32))+r11] + movaps xmm11,XMMWORD[((-40-16))+r11] + mov r14,QWORD[((-40))+r11] + + mov r13,QWORD[((-32))+r11] + + mov r12,QWORD[((-24))+r11] + + mov rbp,QWORD[((-16))+r11] + + mov rbx,QWORD[((-8))+r11] + + lea rsp,[r11] + +$L$epilogue_avx2: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_sha1_block_data_order_avx2: +ALIGN 64 +K_XX_XX: + DD 0x5a827999,0x5a827999,0x5a827999,0x5a827999 + DD 0x5a827999,0x5a827999,0x5a827999,0x5a827999 + DD 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 + DD 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 + DD 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc + DD 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc + DD 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 + DD 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 + DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +DB 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 +DB 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 +DB 102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44 +DB 32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60 +DB 97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114 +DB 103,62,0 +ALIGN 64 +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + lea r10,[$L$prologue] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov rax,QWORD[152+r8] + + lea r10,[$L$epilogue] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + mov rax,QWORD[64+rax] + + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + + jmp NEAR $L$common_seh_tail + + +ALIGN 16 +shaext_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + lea r10,[$L$prologue_shaext] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + lea r10,[$L$epilogue_shaext] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + lea rsi,[((-8-64))+rax] + lea rdi,[512+r8] + mov ecx,8 + DD 0xa548f3fc + + jmp NEAR $L$common_seh_tail + + +ALIGN 16 +ssse3_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$common_seh_tail + + mov rax,QWORD[208+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$common_seh_tail + + lea rsi,[((-40-96))+rax] + lea rdi,[512+r8] + mov ecx,12 + DD 0xa548f3fc + + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + +$L$common_seh_tail: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret + + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_sha1_block_data_order wrt ..imagebase + DD $L$SEH_end_sha1_block_data_order wrt ..imagebase + DD $L$SEH_info_sha1_block_data_order wrt ..imagebase + DD $L$SEH_begin_sha1_block_data_order_shaext wrt ..imagebase + DD $L$SEH_end_sha1_block_data_order_shaext wrt ..imagebase + DD $L$SEH_info_sha1_block_data_order_shaext wrt ..imagebase + DD $L$SEH_begin_sha1_block_data_order_ssse3 wrt ..imagebase + DD $L$SEH_end_sha1_block_data_order_ssse3 wrt ..imagebase + DD $L$SEH_info_sha1_block_data_order_ssse3 wrt ..imagebase + DD $L$SEH_begin_sha1_block_data_order_avx wrt ..imagebase + DD $L$SEH_end_sha1_block_data_order_avx wrt ..imagebase + DD $L$SEH_info_sha1_block_data_order_avx wrt ..imagebase + DD $L$SEH_begin_sha1_block_data_order_avx2 wrt ..imagebase + DD $L$SEH_end_sha1_block_data_order_avx2 wrt ..imagebase + DD $L$SEH_info_sha1_block_data_order_avx2 wrt ..imagebase +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_sha1_block_data_order: +DB 9,0,0,0 + DD se_handler wrt ..imagebase +$L$SEH_info_sha1_block_data_order_shaext: +DB 9,0,0,0 + DD shaext_handler wrt ..imagebase +$L$SEH_info_sha1_block_data_order_ssse3: +DB 9,0,0,0 + DD ssse3_handler wrt ..imagebase + DD $L$prologue_ssse3 wrt ..imagebase,$L$epilogue_ssse3 wrt ..imagebase +$L$SEH_info_sha1_block_data_order_avx: +DB 9,0,0,0 + DD ssse3_handler wrt ..imagebase + DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase +$L$SEH_info_sha1_block_data_order_avx2: +DB 9,0,0,0 + DD ssse3_handler wrt ..imagebase + DD $L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase diff --git a/vere/ext/openssl/gen/windows-x86_64/crypto/sha/sha256-mb-x86_64.asm b/vere/ext/openssl/gen/windows-x86_64/crypto/sha/sha256-mb-x86_64.asm new file mode 100644 index 0000000..f075400 --- /dev/null +++ b/vere/ext/openssl/gen/windows-x86_64/crypto/sha/sha256-mb-x86_64.asm @@ -0,0 +1,8255 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + + +EXTERN OPENSSL_ia32cap_P + +global sha256_multi_block + +ALIGN 32 +sha256_multi_block: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_multi_block: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + + mov rcx,QWORD[((OPENSSL_ia32cap_P+4))] + bt rcx,61 + jc NEAR _shaext_shortcut + test ecx,268435456 + jnz NEAR _avx_shortcut + mov rax,rsp + + push rbx + + push rbp + + lea rsp,[((-168))+rsp] + movaps XMMWORD[rsp],xmm6 + movaps XMMWORD[16+rsp],xmm7 + movaps XMMWORD[32+rsp],xmm8 + movaps XMMWORD[48+rsp],xmm9 + movaps XMMWORD[(-120)+rax],xmm10 + movaps XMMWORD[(-104)+rax],xmm11 + movaps XMMWORD[(-88)+rax],xmm12 + movaps XMMWORD[(-72)+rax],xmm13 + movaps XMMWORD[(-56)+rax],xmm14 + movaps XMMWORD[(-40)+rax],xmm15 + sub rsp,288 + and rsp,-256 + mov QWORD[272+rsp],rax + +$L$body: + lea rbp,[((K256+128))] + lea rbx,[256+rsp] + lea rdi,[128+rdi] + +$L$oop_grande: + mov DWORD[280+rsp],edx + xor edx,edx + mov r8,QWORD[rsi] + mov ecx,DWORD[8+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[rbx],ecx + cmovle r8,rbp + mov r9,QWORD[16+rsi] + mov ecx,DWORD[24+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[4+rbx],ecx + cmovle r9,rbp + mov r10,QWORD[32+rsi] + mov ecx,DWORD[40+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[8+rbx],ecx + cmovle r10,rbp + mov r11,QWORD[48+rsi] + mov ecx,DWORD[56+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[12+rbx],ecx + cmovle r11,rbp + test edx,edx + jz NEAR $L$done + + movdqu xmm8,XMMWORD[((0-128))+rdi] + lea rax,[128+rsp] + movdqu xmm9,XMMWORD[((32-128))+rdi] + movdqu xmm10,XMMWORD[((64-128))+rdi] + movdqu xmm11,XMMWORD[((96-128))+rdi] + movdqu xmm12,XMMWORD[((128-128))+rdi] + movdqu xmm13,XMMWORD[((160-128))+rdi] + movdqu xmm14,XMMWORD[((192-128))+rdi] + movdqu xmm15,XMMWORD[((224-128))+rdi] + movdqu xmm6,XMMWORD[$L$pbswap] + jmp NEAR $L$oop + +ALIGN 32 +$L$oop: + movdqa xmm4,xmm10 + pxor xmm4,xmm9 + movd xmm5,DWORD[r8] + movd xmm0,DWORD[r9] + movd xmm1,DWORD[r10] + movd xmm2,DWORD[r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm12 +DB 102,15,56,0,238 + movdqa xmm2,xmm12 + + psrld xmm7,6 + movdqa xmm1,xmm12 + pslld xmm2,7 + movdqa XMMWORD[(0-128)+rax],xmm5 + paddd xmm5,xmm15 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD[((-128))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm12 + + pxor xmm7,xmm2 + movdqa xmm3,xmm12 + pslld xmm2,26-21 + pandn xmm0,xmm14 + pand xmm3,xmm13 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm8 + pxor xmm7,xmm2 + movdqa xmm2,xmm8 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm9 + movdqa xmm7,xmm8 + pslld xmm2,10 + pxor xmm3,xmm8 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm15,xmm9 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm15,xmm4 + paddd xmm11,xmm5 + pxor xmm7,xmm2 + + paddd xmm15,xmm5 + paddd xmm15,xmm7 + movd xmm5,DWORD[4+r8] + movd xmm0,DWORD[4+r9] + movd xmm1,DWORD[4+r10] + movd xmm2,DWORD[4+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm11 + + movdqa xmm2,xmm11 +DB 102,15,56,0,238 + psrld xmm7,6 + movdqa xmm1,xmm11 + pslld xmm2,7 + movdqa XMMWORD[(16-128)+rax],xmm5 + paddd xmm5,xmm14 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD[((-96))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm11 + + pxor xmm7,xmm2 + movdqa xmm4,xmm11 + pslld xmm2,26-21 + pandn xmm0,xmm13 + pand xmm4,xmm12 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm15 + pxor xmm7,xmm2 + movdqa xmm2,xmm15 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm8 + movdqa xmm7,xmm15 + pslld xmm2,10 + pxor xmm4,xmm15 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm14,xmm8 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm14,xmm3 + paddd xmm10,xmm5 + pxor xmm7,xmm2 + + paddd xmm14,xmm5 + paddd xmm14,xmm7 + movd xmm5,DWORD[8+r8] + movd xmm0,DWORD[8+r9] + movd xmm1,DWORD[8+r10] + movd xmm2,DWORD[8+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm10 +DB 102,15,56,0,238 + movdqa xmm2,xmm10 + + psrld xmm7,6 + movdqa xmm1,xmm10 + pslld xmm2,7 + movdqa XMMWORD[(32-128)+rax],xmm5 + paddd xmm5,xmm13 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD[((-64))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm10 + + pxor xmm7,xmm2 + movdqa xmm3,xmm10 + pslld xmm2,26-21 + pandn xmm0,xmm12 + pand xmm3,xmm11 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm14 + pxor xmm7,xmm2 + movdqa xmm2,xmm14 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm15 + movdqa xmm7,xmm14 + pslld xmm2,10 + pxor xmm3,xmm14 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm13,xmm15 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm13,xmm4 + paddd xmm9,xmm5 + pxor xmm7,xmm2 + + paddd xmm13,xmm5 + paddd xmm13,xmm7 + movd xmm5,DWORD[12+r8] + movd xmm0,DWORD[12+r9] + movd xmm1,DWORD[12+r10] + movd xmm2,DWORD[12+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm9 + + movdqa xmm2,xmm9 +DB 102,15,56,0,238 + psrld xmm7,6 + movdqa xmm1,xmm9 + pslld xmm2,7 + movdqa XMMWORD[(48-128)+rax],xmm5 + paddd xmm5,xmm12 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD[((-32))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm9 + + pxor xmm7,xmm2 + movdqa xmm4,xmm9 + pslld xmm2,26-21 + pandn xmm0,xmm11 + pand xmm4,xmm10 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm13 + pxor xmm7,xmm2 + movdqa xmm2,xmm13 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm14 + movdqa xmm7,xmm13 + pslld xmm2,10 + pxor xmm4,xmm13 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm12,xmm14 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm12,xmm3 + paddd xmm8,xmm5 + pxor xmm7,xmm2 + + paddd xmm12,xmm5 + paddd xmm12,xmm7 + movd xmm5,DWORD[16+r8] + movd xmm0,DWORD[16+r9] + movd xmm1,DWORD[16+r10] + movd xmm2,DWORD[16+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm8 +DB 102,15,56,0,238 + movdqa xmm2,xmm8 + + psrld xmm7,6 + movdqa xmm1,xmm8 + pslld xmm2,7 + movdqa XMMWORD[(64-128)+rax],xmm5 + paddd xmm5,xmm11 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD[rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm8 + + pxor xmm7,xmm2 + movdqa xmm3,xmm8 + pslld xmm2,26-21 + pandn xmm0,xmm10 + pand xmm3,xmm9 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm12 + pxor xmm7,xmm2 + movdqa xmm2,xmm12 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm13 + movdqa xmm7,xmm12 + pslld xmm2,10 + pxor xmm3,xmm12 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm11,xmm13 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm11,xmm4 + paddd xmm15,xmm5 + pxor xmm7,xmm2 + + paddd xmm11,xmm5 + paddd xmm11,xmm7 + movd xmm5,DWORD[20+r8] + movd xmm0,DWORD[20+r9] + movd xmm1,DWORD[20+r10] + movd xmm2,DWORD[20+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm15 + + movdqa xmm2,xmm15 +DB 102,15,56,0,238 + psrld xmm7,6 + movdqa xmm1,xmm15 + pslld xmm2,7 + movdqa XMMWORD[(80-128)+rax],xmm5 + paddd xmm5,xmm10 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD[32+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm15 + + pxor xmm7,xmm2 + movdqa xmm4,xmm15 + pslld xmm2,26-21 + pandn xmm0,xmm9 + pand xmm4,xmm8 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm11 + pxor xmm7,xmm2 + movdqa xmm2,xmm11 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm12 + movdqa xmm7,xmm11 + pslld xmm2,10 + pxor xmm4,xmm11 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm10,xmm12 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm10,xmm3 + paddd xmm14,xmm5 + pxor xmm7,xmm2 + + paddd xmm10,xmm5 + paddd xmm10,xmm7 + movd xmm5,DWORD[24+r8] + movd xmm0,DWORD[24+r9] + movd xmm1,DWORD[24+r10] + movd xmm2,DWORD[24+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm14 +DB 102,15,56,0,238 + movdqa xmm2,xmm14 + + psrld xmm7,6 + movdqa xmm1,xmm14 + pslld xmm2,7 + movdqa XMMWORD[(96-128)+rax],xmm5 + paddd xmm5,xmm9 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD[64+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm14 + + pxor xmm7,xmm2 + movdqa xmm3,xmm14 + pslld xmm2,26-21 + pandn xmm0,xmm8 + pand xmm3,xmm15 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm10 + pxor xmm7,xmm2 + movdqa xmm2,xmm10 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm11 + movdqa xmm7,xmm10 + pslld xmm2,10 + pxor xmm3,xmm10 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm9,xmm11 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm9,xmm4 + paddd xmm13,xmm5 + pxor xmm7,xmm2 + + paddd xmm9,xmm5 + paddd xmm9,xmm7 + movd xmm5,DWORD[28+r8] + movd xmm0,DWORD[28+r9] + movd xmm1,DWORD[28+r10] + movd xmm2,DWORD[28+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm13 + + movdqa xmm2,xmm13 +DB 102,15,56,0,238 + psrld xmm7,6 + movdqa xmm1,xmm13 + pslld xmm2,7 + movdqa XMMWORD[(112-128)+rax],xmm5 + paddd xmm5,xmm8 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD[96+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm13 + + pxor xmm7,xmm2 + movdqa xmm4,xmm13 + pslld xmm2,26-21 + pandn xmm0,xmm15 + pand xmm4,xmm14 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm9 + pxor xmm7,xmm2 + movdqa xmm2,xmm9 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm10 + movdqa xmm7,xmm9 + pslld xmm2,10 + pxor xmm4,xmm9 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm8,xmm10 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm8,xmm3 + paddd xmm12,xmm5 + pxor xmm7,xmm2 + + paddd xmm8,xmm5 + paddd xmm8,xmm7 + lea rbp,[256+rbp] + movd xmm5,DWORD[32+r8] + movd xmm0,DWORD[32+r9] + movd xmm1,DWORD[32+r10] + movd xmm2,DWORD[32+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm12 +DB 102,15,56,0,238 + movdqa xmm2,xmm12 + + psrld xmm7,6 + movdqa xmm1,xmm12 + pslld xmm2,7 + movdqa XMMWORD[(128-128)+rax],xmm5 + paddd xmm5,xmm15 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD[((-128))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm12 + + pxor xmm7,xmm2 + movdqa xmm3,xmm12 + pslld xmm2,26-21 + pandn xmm0,xmm14 + pand xmm3,xmm13 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm8 + pxor xmm7,xmm2 + movdqa xmm2,xmm8 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm9 + movdqa xmm7,xmm8 + pslld xmm2,10 + pxor xmm3,xmm8 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm15,xmm9 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm15,xmm4 + paddd xmm11,xmm5 + pxor xmm7,xmm2 + + paddd xmm15,xmm5 + paddd xmm15,xmm7 + movd xmm5,DWORD[36+r8] + movd xmm0,DWORD[36+r9] + movd xmm1,DWORD[36+r10] + movd xmm2,DWORD[36+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm11 + + movdqa xmm2,xmm11 +DB 102,15,56,0,238 + psrld xmm7,6 + movdqa xmm1,xmm11 + pslld xmm2,7 + movdqa XMMWORD[(144-128)+rax],xmm5 + paddd xmm5,xmm14 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD[((-96))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm11 + + pxor xmm7,xmm2 + movdqa xmm4,xmm11 + pslld xmm2,26-21 + pandn xmm0,xmm13 + pand xmm4,xmm12 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm15 + pxor xmm7,xmm2 + movdqa xmm2,xmm15 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm8 + movdqa xmm7,xmm15 + pslld xmm2,10 + pxor xmm4,xmm15 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm14,xmm8 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm14,xmm3 + paddd xmm10,xmm5 + pxor xmm7,xmm2 + + paddd xmm14,xmm5 + paddd xmm14,xmm7 + movd xmm5,DWORD[40+r8] + movd xmm0,DWORD[40+r9] + movd xmm1,DWORD[40+r10] + movd xmm2,DWORD[40+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm10 +DB 102,15,56,0,238 + movdqa xmm2,xmm10 + + psrld xmm7,6 + movdqa xmm1,xmm10 + pslld xmm2,7 + movdqa XMMWORD[(160-128)+rax],xmm5 + paddd xmm5,xmm13 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD[((-64))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm10 + + pxor xmm7,xmm2 + movdqa xmm3,xmm10 + pslld xmm2,26-21 + pandn xmm0,xmm12 + pand xmm3,xmm11 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm14 + pxor xmm7,xmm2 + movdqa xmm2,xmm14 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm15 + movdqa xmm7,xmm14 + pslld xmm2,10 + pxor xmm3,xmm14 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm13,xmm15 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm13,xmm4 + paddd xmm9,xmm5 + pxor xmm7,xmm2 + + paddd xmm13,xmm5 + paddd xmm13,xmm7 + movd xmm5,DWORD[44+r8] + movd xmm0,DWORD[44+r9] + movd xmm1,DWORD[44+r10] + movd xmm2,DWORD[44+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm9 + + movdqa xmm2,xmm9 +DB 102,15,56,0,238 + psrld xmm7,6 + movdqa xmm1,xmm9 + pslld xmm2,7 + movdqa XMMWORD[(176-128)+rax],xmm5 + paddd xmm5,xmm12 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD[((-32))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm9 + + pxor xmm7,xmm2 + movdqa xmm4,xmm9 + pslld xmm2,26-21 + pandn xmm0,xmm11 + pand xmm4,xmm10 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm13 + pxor xmm7,xmm2 + movdqa xmm2,xmm13 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm14 + movdqa xmm7,xmm13 + pslld xmm2,10 + pxor xmm4,xmm13 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm12,xmm14 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm12,xmm3 + paddd xmm8,xmm5 + pxor xmm7,xmm2 + + paddd xmm12,xmm5 + paddd xmm12,xmm7 + movd xmm5,DWORD[48+r8] + movd xmm0,DWORD[48+r9] + movd xmm1,DWORD[48+r10] + movd xmm2,DWORD[48+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm8 +DB 102,15,56,0,238 + movdqa xmm2,xmm8 + + psrld xmm7,6 + movdqa xmm1,xmm8 + pslld xmm2,7 + movdqa XMMWORD[(192-128)+rax],xmm5 + paddd xmm5,xmm11 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD[rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm8 + + pxor xmm7,xmm2 + movdqa xmm3,xmm8 + pslld xmm2,26-21 + pandn xmm0,xmm10 + pand xmm3,xmm9 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm12 + pxor xmm7,xmm2 + movdqa xmm2,xmm12 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm13 + movdqa xmm7,xmm12 + pslld xmm2,10 + pxor xmm3,xmm12 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm11,xmm13 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm11,xmm4 + paddd xmm15,xmm5 + pxor xmm7,xmm2 + + paddd xmm11,xmm5 + paddd xmm11,xmm7 + movd xmm5,DWORD[52+r8] + movd xmm0,DWORD[52+r9] + movd xmm1,DWORD[52+r10] + movd xmm2,DWORD[52+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm15 + + movdqa xmm2,xmm15 +DB 102,15,56,0,238 + psrld xmm7,6 + movdqa xmm1,xmm15 + pslld xmm2,7 + movdqa XMMWORD[(208-128)+rax],xmm5 + paddd xmm5,xmm10 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD[32+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm15 + + pxor xmm7,xmm2 + movdqa xmm4,xmm15 + pslld xmm2,26-21 + pandn xmm0,xmm9 + pand xmm4,xmm8 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm11 + pxor xmm7,xmm2 + movdqa xmm2,xmm11 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm12 + movdqa xmm7,xmm11 + pslld xmm2,10 + pxor xmm4,xmm11 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm10,xmm12 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm10,xmm3 + paddd xmm14,xmm5 + pxor xmm7,xmm2 + + paddd xmm10,xmm5 + paddd xmm10,xmm7 + movd xmm5,DWORD[56+r8] + movd xmm0,DWORD[56+r9] + movd xmm1,DWORD[56+r10] + movd xmm2,DWORD[56+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm14 +DB 102,15,56,0,238 + movdqa xmm2,xmm14 + + psrld xmm7,6 + movdqa xmm1,xmm14 + pslld xmm2,7 + movdqa XMMWORD[(224-128)+rax],xmm5 + paddd xmm5,xmm9 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD[64+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm14 + + pxor xmm7,xmm2 + movdqa xmm3,xmm14 + pslld xmm2,26-21 + pandn xmm0,xmm8 + pand xmm3,xmm15 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm10 + pxor xmm7,xmm2 + movdqa xmm2,xmm10 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm11 + movdqa xmm7,xmm10 + pslld xmm2,10 + pxor xmm3,xmm10 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm9,xmm11 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm9,xmm4 + paddd xmm13,xmm5 + pxor xmm7,xmm2 + + paddd xmm9,xmm5 + paddd xmm9,xmm7 + movd xmm5,DWORD[60+r8] + lea r8,[64+r8] + movd xmm0,DWORD[60+r9] + lea r9,[64+r9] + movd xmm1,DWORD[60+r10] + lea r10,[64+r10] + movd xmm2,DWORD[60+r11] + lea r11,[64+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm13 + + movdqa xmm2,xmm13 +DB 102,15,56,0,238 + psrld xmm7,6 + movdqa xmm1,xmm13 + pslld xmm2,7 + movdqa XMMWORD[(240-128)+rax],xmm5 + paddd xmm5,xmm8 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD[96+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm13 + prefetcht0 [63+r8] + pxor xmm7,xmm2 + movdqa xmm4,xmm13 + pslld xmm2,26-21 + pandn xmm0,xmm15 + pand xmm4,xmm14 + pxor xmm7,xmm1 + + prefetcht0 [63+r9] + movdqa xmm1,xmm9 + pxor xmm7,xmm2 + movdqa xmm2,xmm9 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm10 + movdqa xmm7,xmm9 + pslld xmm2,10 + pxor xmm4,xmm9 + + prefetcht0 [63+r10] + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + prefetcht0 [63+r11] + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm8,xmm10 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm8,xmm3 + paddd xmm12,xmm5 + pxor xmm7,xmm2 + + paddd xmm8,xmm5 + paddd xmm8,xmm7 + lea rbp,[256+rbp] + movdqu xmm5,XMMWORD[((0-128))+rax] + mov ecx,3 + jmp NEAR $L$oop_16_xx +ALIGN 32 +$L$oop_16_xx: + movdqa xmm6,XMMWORD[((16-128))+rax] + paddd xmm5,XMMWORD[((144-128))+rax] + + movdqa xmm7,xmm6 + movdqa xmm1,xmm6 + psrld xmm7,3 + movdqa xmm2,xmm6 + + psrld xmm1,7 + movdqa xmm0,XMMWORD[((224-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm3,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm3 + + psrld xmm3,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + psrld xmm3,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm3 + pxor xmm0,xmm1 + paddd xmm5,xmm0 + movdqa xmm7,xmm12 + + movdqa xmm2,xmm12 + + psrld xmm7,6 + movdqa xmm1,xmm12 + pslld xmm2,7 + movdqa XMMWORD[(0-128)+rax],xmm5 + paddd xmm5,xmm15 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD[((-128))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm12 + + pxor xmm7,xmm2 + movdqa xmm3,xmm12 + pslld xmm2,26-21 + pandn xmm0,xmm14 + pand xmm3,xmm13 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm8 + pxor xmm7,xmm2 + movdqa xmm2,xmm8 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm9 + movdqa xmm7,xmm8 + pslld xmm2,10 + pxor xmm3,xmm8 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm15,xmm9 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm15,xmm4 + paddd xmm11,xmm5 + pxor xmm7,xmm2 + + paddd xmm15,xmm5 + paddd xmm15,xmm7 + movdqa xmm5,XMMWORD[((32-128))+rax] + paddd xmm6,XMMWORD[((160-128))+rax] + + movdqa xmm7,xmm5 + movdqa xmm1,xmm5 + psrld xmm7,3 + movdqa xmm2,xmm5 + + psrld xmm1,7 + movdqa xmm0,XMMWORD[((240-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm4,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm4 + + psrld xmm4,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + psrld xmm4,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm4 + pxor xmm0,xmm1 + paddd xmm6,xmm0 + movdqa xmm7,xmm11 + + movdqa xmm2,xmm11 + + psrld xmm7,6 + movdqa xmm1,xmm11 + pslld xmm2,7 + movdqa XMMWORD[(16-128)+rax],xmm6 + paddd xmm6,xmm14 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm6,XMMWORD[((-96))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm11 + + pxor xmm7,xmm2 + movdqa xmm4,xmm11 + pslld xmm2,26-21 + pandn xmm0,xmm13 + pand xmm4,xmm12 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm15 + pxor xmm7,xmm2 + movdqa xmm2,xmm15 + psrld xmm1,2 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm8 + movdqa xmm7,xmm15 + pslld xmm2,10 + pxor xmm4,xmm15 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm6,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm14,xmm8 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm14,xmm3 + paddd xmm10,xmm6 + pxor xmm7,xmm2 + + paddd xmm14,xmm6 + paddd xmm14,xmm7 + movdqa xmm6,XMMWORD[((48-128))+rax] + paddd xmm5,XMMWORD[((176-128))+rax] + + movdqa xmm7,xmm6 + movdqa xmm1,xmm6 + psrld xmm7,3 + movdqa xmm2,xmm6 + + psrld xmm1,7 + movdqa xmm0,XMMWORD[((0-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm3,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm3 + + psrld xmm3,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + psrld xmm3,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm3 + pxor xmm0,xmm1 + paddd xmm5,xmm0 + movdqa xmm7,xmm10 + + movdqa xmm2,xmm10 + + psrld xmm7,6 + movdqa xmm1,xmm10 + pslld xmm2,7 + movdqa XMMWORD[(32-128)+rax],xmm5 + paddd xmm5,xmm13 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD[((-64))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm10 + + pxor xmm7,xmm2 + movdqa xmm3,xmm10 + pslld xmm2,26-21 + pandn xmm0,xmm12 + pand xmm3,xmm11 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm14 + pxor xmm7,xmm2 + movdqa xmm2,xmm14 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm15 + movdqa xmm7,xmm14 + pslld xmm2,10 + pxor xmm3,xmm14 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm13,xmm15 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm13,xmm4 + paddd xmm9,xmm5 + pxor xmm7,xmm2 + + paddd xmm13,xmm5 + paddd xmm13,xmm7 + movdqa xmm5,XMMWORD[((64-128))+rax] + paddd xmm6,XMMWORD[((192-128))+rax] + + movdqa xmm7,xmm5 + movdqa xmm1,xmm5 + psrld xmm7,3 + movdqa xmm2,xmm5 + + psrld xmm1,7 + movdqa xmm0,XMMWORD[((16-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm4,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm4 + + psrld xmm4,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + psrld xmm4,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm4 + pxor xmm0,xmm1 + paddd xmm6,xmm0 + movdqa xmm7,xmm9 + + movdqa xmm2,xmm9 + + psrld xmm7,6 + movdqa xmm1,xmm9 + pslld xmm2,7 + movdqa XMMWORD[(48-128)+rax],xmm6 + paddd xmm6,xmm12 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm6,XMMWORD[((-32))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm9 + + pxor xmm7,xmm2 + movdqa xmm4,xmm9 + pslld xmm2,26-21 + pandn xmm0,xmm11 + pand xmm4,xmm10 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm13 + pxor xmm7,xmm2 + movdqa xmm2,xmm13 + psrld xmm1,2 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm14 + movdqa xmm7,xmm13 + pslld xmm2,10 + pxor xmm4,xmm13 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm6,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm12,xmm14 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm12,xmm3 + paddd xmm8,xmm6 + pxor xmm7,xmm2 + + paddd xmm12,xmm6 + paddd xmm12,xmm7 + movdqa xmm6,XMMWORD[((80-128))+rax] + paddd xmm5,XMMWORD[((208-128))+rax] + + movdqa xmm7,xmm6 + movdqa xmm1,xmm6 + psrld xmm7,3 + movdqa xmm2,xmm6 + + psrld xmm1,7 + movdqa xmm0,XMMWORD[((32-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm3,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm3 + + psrld xmm3,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + psrld xmm3,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm3 + pxor xmm0,xmm1 + paddd xmm5,xmm0 + movdqa xmm7,xmm8 + + movdqa xmm2,xmm8 + + psrld xmm7,6 + movdqa xmm1,xmm8 + pslld xmm2,7 + movdqa XMMWORD[(64-128)+rax],xmm5 + paddd xmm5,xmm11 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD[rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm8 + + pxor xmm7,xmm2 + movdqa xmm3,xmm8 + pslld xmm2,26-21 + pandn xmm0,xmm10 + pand xmm3,xmm9 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm12 + pxor xmm7,xmm2 + movdqa xmm2,xmm12 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm13 + movdqa xmm7,xmm12 + pslld xmm2,10 + pxor xmm3,xmm12 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm11,xmm13 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm11,xmm4 + paddd xmm15,xmm5 + pxor xmm7,xmm2 + + paddd xmm11,xmm5 + paddd xmm11,xmm7 + movdqa xmm5,XMMWORD[((96-128))+rax] + paddd xmm6,XMMWORD[((224-128))+rax] + + movdqa xmm7,xmm5 + movdqa xmm1,xmm5 + psrld xmm7,3 + movdqa xmm2,xmm5 + + psrld xmm1,7 + movdqa xmm0,XMMWORD[((48-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm4,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm4 + + psrld xmm4,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + psrld xmm4,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm4 + pxor xmm0,xmm1 + paddd xmm6,xmm0 + movdqa xmm7,xmm15 + + movdqa xmm2,xmm15 + + psrld xmm7,6 + movdqa xmm1,xmm15 + pslld xmm2,7 + movdqa XMMWORD[(80-128)+rax],xmm6 + paddd xmm6,xmm10 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm6,XMMWORD[32+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm15 + + pxor xmm7,xmm2 + movdqa xmm4,xmm15 + pslld xmm2,26-21 + pandn xmm0,xmm9 + pand xmm4,xmm8 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm11 + pxor xmm7,xmm2 + movdqa xmm2,xmm11 + psrld xmm1,2 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm12 + movdqa xmm7,xmm11 + pslld xmm2,10 + pxor xmm4,xmm11 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm6,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm10,xmm12 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm10,xmm3 + paddd xmm14,xmm6 + pxor xmm7,xmm2 + + paddd xmm10,xmm6 + paddd xmm10,xmm7 + movdqa xmm6,XMMWORD[((112-128))+rax] + paddd xmm5,XMMWORD[((240-128))+rax] + + movdqa xmm7,xmm6 + movdqa xmm1,xmm6 + psrld xmm7,3 + movdqa xmm2,xmm6 + + psrld xmm1,7 + movdqa xmm0,XMMWORD[((64-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm3,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm3 + + psrld xmm3,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + psrld xmm3,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm3 + pxor xmm0,xmm1 + paddd xmm5,xmm0 + movdqa xmm7,xmm14 + + movdqa xmm2,xmm14 + + psrld xmm7,6 + movdqa xmm1,xmm14 + pslld xmm2,7 + movdqa XMMWORD[(96-128)+rax],xmm5 + paddd xmm5,xmm9 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD[64+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm14 + + pxor xmm7,xmm2 + movdqa xmm3,xmm14 + pslld xmm2,26-21 + pandn xmm0,xmm8 + pand xmm3,xmm15 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm10 + pxor xmm7,xmm2 + movdqa xmm2,xmm10 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm11 + movdqa xmm7,xmm10 + pslld xmm2,10 + pxor xmm3,xmm10 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm9,xmm11 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm9,xmm4 + paddd xmm13,xmm5 + pxor xmm7,xmm2 + + paddd xmm9,xmm5 + paddd xmm9,xmm7 + movdqa xmm5,XMMWORD[((128-128))+rax] + paddd xmm6,XMMWORD[((0-128))+rax] + + movdqa xmm7,xmm5 + movdqa xmm1,xmm5 + psrld xmm7,3 + movdqa xmm2,xmm5 + + psrld xmm1,7 + movdqa xmm0,XMMWORD[((80-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm4,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm4 + + psrld xmm4,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + psrld xmm4,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm4 + pxor xmm0,xmm1 + paddd xmm6,xmm0 + movdqa xmm7,xmm13 + + movdqa xmm2,xmm13 + + psrld xmm7,6 + movdqa xmm1,xmm13 + pslld xmm2,7 + movdqa XMMWORD[(112-128)+rax],xmm6 + paddd xmm6,xmm8 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm6,XMMWORD[96+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm13 + + pxor xmm7,xmm2 + movdqa xmm4,xmm13 + pslld xmm2,26-21 + pandn xmm0,xmm15 + pand xmm4,xmm14 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm9 + pxor xmm7,xmm2 + movdqa xmm2,xmm9 + psrld xmm1,2 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm10 + movdqa xmm7,xmm9 + pslld xmm2,10 + pxor xmm4,xmm9 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm6,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm8,xmm10 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm8,xmm3 + paddd xmm12,xmm6 + pxor xmm7,xmm2 + + paddd xmm8,xmm6 + paddd xmm8,xmm7 + lea rbp,[256+rbp] + movdqa xmm6,XMMWORD[((144-128))+rax] + paddd xmm5,XMMWORD[((16-128))+rax] + + movdqa xmm7,xmm6 + movdqa xmm1,xmm6 + psrld xmm7,3 + movdqa xmm2,xmm6 + + psrld xmm1,7 + movdqa xmm0,XMMWORD[((96-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm3,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm3 + + psrld xmm3,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + psrld xmm3,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm3 + pxor xmm0,xmm1 + paddd xmm5,xmm0 + movdqa xmm7,xmm12 + + movdqa xmm2,xmm12 + + psrld xmm7,6 + movdqa xmm1,xmm12 + pslld xmm2,7 + movdqa XMMWORD[(128-128)+rax],xmm5 + paddd xmm5,xmm15 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD[((-128))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm12 + + pxor xmm7,xmm2 + movdqa xmm3,xmm12 + pslld xmm2,26-21 + pandn xmm0,xmm14 + pand xmm3,xmm13 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm8 + pxor xmm7,xmm2 + movdqa xmm2,xmm8 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm9 + movdqa xmm7,xmm8 + pslld xmm2,10 + pxor xmm3,xmm8 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm15,xmm9 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm15,xmm4 + paddd xmm11,xmm5 + pxor xmm7,xmm2 + + paddd xmm15,xmm5 + paddd xmm15,xmm7 + movdqa xmm5,XMMWORD[((160-128))+rax] + paddd xmm6,XMMWORD[((32-128))+rax] + + movdqa xmm7,xmm5 + movdqa xmm1,xmm5 + psrld xmm7,3 + movdqa xmm2,xmm5 + + psrld xmm1,7 + movdqa xmm0,XMMWORD[((112-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm4,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm4 + + psrld xmm4,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + psrld xmm4,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm4 + pxor xmm0,xmm1 + paddd xmm6,xmm0 + movdqa xmm7,xmm11 + + movdqa xmm2,xmm11 + + psrld xmm7,6 + movdqa xmm1,xmm11 + pslld xmm2,7 + movdqa XMMWORD[(144-128)+rax],xmm6 + paddd xmm6,xmm14 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm6,XMMWORD[((-96))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm11 + + pxor xmm7,xmm2 + movdqa xmm4,xmm11 + pslld xmm2,26-21 + pandn xmm0,xmm13 + pand xmm4,xmm12 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm15 + pxor xmm7,xmm2 + movdqa xmm2,xmm15 + psrld xmm1,2 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm8 + movdqa xmm7,xmm15 + pslld xmm2,10 + pxor xmm4,xmm15 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm6,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm14,xmm8 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm14,xmm3 + paddd xmm10,xmm6 + pxor xmm7,xmm2 + + paddd xmm14,xmm6 + paddd xmm14,xmm7 + movdqa xmm6,XMMWORD[((176-128))+rax] + paddd xmm5,XMMWORD[((48-128))+rax] + + movdqa xmm7,xmm6 + movdqa xmm1,xmm6 + psrld xmm7,3 + movdqa xmm2,xmm6 + + psrld xmm1,7 + movdqa xmm0,XMMWORD[((128-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm3,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm3 + + psrld xmm3,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + psrld xmm3,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm3 + pxor xmm0,xmm1 + paddd xmm5,xmm0 + movdqa xmm7,xmm10 + + movdqa xmm2,xmm10 + + psrld xmm7,6 + movdqa xmm1,xmm10 + pslld xmm2,7 + movdqa XMMWORD[(160-128)+rax],xmm5 + paddd xmm5,xmm13 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD[((-64))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm10 + + pxor xmm7,xmm2 + movdqa xmm3,xmm10 + pslld xmm2,26-21 + pandn xmm0,xmm12 + pand xmm3,xmm11 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm14 + pxor xmm7,xmm2 + movdqa xmm2,xmm14 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm15 + movdqa xmm7,xmm14 + pslld xmm2,10 + pxor xmm3,xmm14 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm13,xmm15 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm13,xmm4 + paddd xmm9,xmm5 + pxor xmm7,xmm2 + + paddd xmm13,xmm5 + paddd xmm13,xmm7 + movdqa xmm5,XMMWORD[((192-128))+rax] + paddd xmm6,XMMWORD[((64-128))+rax] + + movdqa xmm7,xmm5 + movdqa xmm1,xmm5 + psrld xmm7,3 + movdqa xmm2,xmm5 + + psrld xmm1,7 + movdqa xmm0,XMMWORD[((144-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm4,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm4 + + psrld xmm4,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + psrld xmm4,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm4 + pxor xmm0,xmm1 + paddd xmm6,xmm0 + movdqa xmm7,xmm9 + + movdqa xmm2,xmm9 + + psrld xmm7,6 + movdqa xmm1,xmm9 + pslld xmm2,7 + movdqa XMMWORD[(176-128)+rax],xmm6 + paddd xmm6,xmm12 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm6,XMMWORD[((-32))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm9 + + pxor xmm7,xmm2 + movdqa xmm4,xmm9 + pslld xmm2,26-21 + pandn xmm0,xmm11 + pand xmm4,xmm10 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm13 + pxor xmm7,xmm2 + movdqa xmm2,xmm13 + psrld xmm1,2 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm14 + movdqa xmm7,xmm13 + pslld xmm2,10 + pxor xmm4,xmm13 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm6,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm12,xmm14 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm12,xmm3 + paddd xmm8,xmm6 + pxor xmm7,xmm2 + + paddd xmm12,xmm6 + paddd xmm12,xmm7 + movdqa xmm6,XMMWORD[((208-128))+rax] + paddd xmm5,XMMWORD[((80-128))+rax] + + movdqa xmm7,xmm6 + movdqa xmm1,xmm6 + psrld xmm7,3 + movdqa xmm2,xmm6 + + psrld xmm1,7 + movdqa xmm0,XMMWORD[((160-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm3,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm3 + + psrld xmm3,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + psrld xmm3,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm3 + pxor xmm0,xmm1 + paddd xmm5,xmm0 + movdqa xmm7,xmm8 + + movdqa xmm2,xmm8 + + psrld xmm7,6 + movdqa xmm1,xmm8 + pslld xmm2,7 + movdqa XMMWORD[(192-128)+rax],xmm5 + paddd xmm5,xmm11 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD[rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm8 + + pxor xmm7,xmm2 + movdqa xmm3,xmm8 + pslld xmm2,26-21 + pandn xmm0,xmm10 + pand xmm3,xmm9 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm12 + pxor xmm7,xmm2 + movdqa xmm2,xmm12 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm13 + movdqa xmm7,xmm12 + pslld xmm2,10 + pxor xmm3,xmm12 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm11,xmm13 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm11,xmm4 + paddd xmm15,xmm5 + pxor xmm7,xmm2 + + paddd xmm11,xmm5 + paddd xmm11,xmm7 + movdqa xmm5,XMMWORD[((224-128))+rax] + paddd xmm6,XMMWORD[((96-128))+rax] + + movdqa xmm7,xmm5 + movdqa xmm1,xmm5 + psrld xmm7,3 + movdqa xmm2,xmm5 + + psrld xmm1,7 + movdqa xmm0,XMMWORD[((176-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm4,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm4 + + psrld xmm4,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + psrld xmm4,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm4 + pxor xmm0,xmm1 + paddd xmm6,xmm0 + movdqa xmm7,xmm15 + + movdqa xmm2,xmm15 + + psrld xmm7,6 + movdqa xmm1,xmm15 + pslld xmm2,7 + movdqa XMMWORD[(208-128)+rax],xmm6 + paddd xmm6,xmm10 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm6,XMMWORD[32+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm15 + + pxor xmm7,xmm2 + movdqa xmm4,xmm15 + pslld xmm2,26-21 + pandn xmm0,xmm9 + pand xmm4,xmm8 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm11 + pxor xmm7,xmm2 + movdqa xmm2,xmm11 + psrld xmm1,2 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm12 + movdqa xmm7,xmm11 + pslld xmm2,10 + pxor xmm4,xmm11 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm6,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm10,xmm12 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm10,xmm3 + paddd xmm14,xmm6 + pxor xmm7,xmm2 + + paddd xmm10,xmm6 + paddd xmm10,xmm7 + movdqa xmm6,XMMWORD[((240-128))+rax] + paddd xmm5,XMMWORD[((112-128))+rax] + + movdqa xmm7,xmm6 + movdqa xmm1,xmm6 + psrld xmm7,3 + movdqa xmm2,xmm6 + + psrld xmm1,7 + movdqa xmm0,XMMWORD[((192-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm3,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm3 + + psrld xmm3,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + psrld xmm3,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm3 + pxor xmm0,xmm1 + paddd xmm5,xmm0 + movdqa xmm7,xmm14 + + movdqa xmm2,xmm14 + + psrld xmm7,6 + movdqa xmm1,xmm14 + pslld xmm2,7 + movdqa XMMWORD[(224-128)+rax],xmm5 + paddd xmm5,xmm9 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD[64+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm14 + + pxor xmm7,xmm2 + movdqa xmm3,xmm14 + pslld xmm2,26-21 + pandn xmm0,xmm8 + pand xmm3,xmm15 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm10 + pxor xmm7,xmm2 + movdqa xmm2,xmm10 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm11 + movdqa xmm7,xmm10 + pslld xmm2,10 + pxor xmm3,xmm10 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm9,xmm11 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm9,xmm4 + paddd xmm13,xmm5 + pxor xmm7,xmm2 + + paddd xmm9,xmm5 + paddd xmm9,xmm7 + movdqa xmm5,XMMWORD[((0-128))+rax] + paddd xmm6,XMMWORD[((128-128))+rax] + + movdqa xmm7,xmm5 + movdqa xmm1,xmm5 + psrld xmm7,3 + movdqa xmm2,xmm5 + + psrld xmm1,7 + movdqa xmm0,XMMWORD[((208-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm4,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm4 + + psrld xmm4,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + psrld xmm4,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm4 + pxor xmm0,xmm1 + paddd xmm6,xmm0 + movdqa xmm7,xmm13 + + movdqa xmm2,xmm13 + + psrld xmm7,6 + movdqa xmm1,xmm13 + pslld xmm2,7 + movdqa XMMWORD[(240-128)+rax],xmm6 + paddd xmm6,xmm8 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm6,XMMWORD[96+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm13 + + pxor xmm7,xmm2 + movdqa xmm4,xmm13 + pslld xmm2,26-21 + pandn xmm0,xmm15 + pand xmm4,xmm14 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm9 + pxor xmm7,xmm2 + movdqa xmm2,xmm9 + psrld xmm1,2 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm10 + movdqa xmm7,xmm9 + pslld xmm2,10 + pxor xmm4,xmm9 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm6,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm8,xmm10 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm8,xmm3 + paddd xmm12,xmm6 + pxor xmm7,xmm2 + + paddd xmm8,xmm6 + paddd xmm8,xmm7 + lea rbp,[256+rbp] + dec ecx + jnz NEAR $L$oop_16_xx + + mov ecx,1 + lea rbp,[((K256+128))] + + movdqa xmm7,XMMWORD[rbx] + cmp ecx,DWORD[rbx] + pxor xmm0,xmm0 + cmovge r8,rbp + cmp ecx,DWORD[4+rbx] + movdqa xmm6,xmm7 + cmovge r9,rbp + cmp ecx,DWORD[8+rbx] + pcmpgtd xmm6,xmm0 + cmovge r10,rbp + cmp ecx,DWORD[12+rbx] + paddd xmm7,xmm6 + cmovge r11,rbp + + movdqu xmm0,XMMWORD[((0-128))+rdi] + pand xmm8,xmm6 + movdqu xmm1,XMMWORD[((32-128))+rdi] + pand xmm9,xmm6 + movdqu xmm2,XMMWORD[((64-128))+rdi] + pand xmm10,xmm6 + movdqu xmm5,XMMWORD[((96-128))+rdi] + pand xmm11,xmm6 + paddd xmm8,xmm0 + movdqu xmm0,XMMWORD[((128-128))+rdi] + pand xmm12,xmm6 + paddd xmm9,xmm1 + movdqu xmm1,XMMWORD[((160-128))+rdi] + pand xmm13,xmm6 + paddd xmm10,xmm2 + movdqu xmm2,XMMWORD[((192-128))+rdi] + pand xmm14,xmm6 + paddd xmm11,xmm5 + movdqu xmm5,XMMWORD[((224-128))+rdi] + pand xmm15,xmm6 + paddd xmm12,xmm0 + paddd xmm13,xmm1 + movdqu XMMWORD[(0-128)+rdi],xmm8 + paddd xmm14,xmm2 + movdqu XMMWORD[(32-128)+rdi],xmm9 + paddd xmm15,xmm5 + movdqu XMMWORD[(64-128)+rdi],xmm10 + movdqu XMMWORD[(96-128)+rdi],xmm11 + movdqu XMMWORD[(128-128)+rdi],xmm12 + movdqu XMMWORD[(160-128)+rdi],xmm13 + movdqu XMMWORD[(192-128)+rdi],xmm14 + movdqu XMMWORD[(224-128)+rdi],xmm15 + + movdqa XMMWORD[rbx],xmm7 + movdqa xmm6,XMMWORD[$L$pbswap] + dec edx + jnz NEAR $L$oop + + mov edx,DWORD[280+rsp] + lea rdi,[16+rdi] + lea rsi,[64+rsi] + dec edx + jnz NEAR $L$oop_grande + +$L$done: + mov rax,QWORD[272+rsp] + + movaps xmm6,XMMWORD[((-184))+rax] + movaps xmm7,XMMWORD[((-168))+rax] + movaps xmm8,XMMWORD[((-152))+rax] + movaps xmm9,XMMWORD[((-136))+rax] + movaps xmm10,XMMWORD[((-120))+rax] + movaps xmm11,XMMWORD[((-104))+rax] + movaps xmm12,XMMWORD[((-88))+rax] + movaps xmm13,XMMWORD[((-72))+rax] + movaps xmm14,XMMWORD[((-56))+rax] + movaps xmm15,XMMWORD[((-40))+rax] + mov rbp,QWORD[((-16))+rax] + + mov rbx,QWORD[((-8))+rax] + + lea rsp,[rax] + +$L$epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_sha256_multi_block: + +ALIGN 32 +sha256_multi_block_shaext: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_multi_block_shaext: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_shaext_shortcut: + mov rax,rsp + + push rbx + + push rbp + + lea rsp,[((-168))+rsp] + movaps XMMWORD[rsp],xmm6 + movaps XMMWORD[16+rsp],xmm7 + movaps XMMWORD[32+rsp],xmm8 + movaps XMMWORD[48+rsp],xmm9 + movaps XMMWORD[(-120)+rax],xmm10 + movaps XMMWORD[(-104)+rax],xmm11 + movaps XMMWORD[(-88)+rax],xmm12 + movaps XMMWORD[(-72)+rax],xmm13 + movaps XMMWORD[(-56)+rax],xmm14 + movaps XMMWORD[(-40)+rax],xmm15 + sub rsp,288 + shl edx,1 + and rsp,-256 + lea rdi,[128+rdi] + mov QWORD[272+rsp],rax +$L$body_shaext: + lea rbx,[256+rsp] + lea rbp,[((K256_shaext+128))] + +$L$oop_grande_shaext: + mov DWORD[280+rsp],edx + xor edx,edx + mov r8,QWORD[rsi] + mov ecx,DWORD[8+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[rbx],ecx + cmovle r8,rsp + mov r9,QWORD[16+rsi] + mov ecx,DWORD[24+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[4+rbx],ecx + cmovle r9,rsp + test edx,edx + jz NEAR $L$done_shaext + + movq xmm12,QWORD[((0-128))+rdi] + movq xmm4,QWORD[((32-128))+rdi] + movq xmm13,QWORD[((64-128))+rdi] + movq xmm5,QWORD[((96-128))+rdi] + movq xmm8,QWORD[((128-128))+rdi] + movq xmm9,QWORD[((160-128))+rdi] + movq xmm10,QWORD[((192-128))+rdi] + movq xmm11,QWORD[((224-128))+rdi] + + punpckldq xmm12,xmm4 + punpckldq xmm13,xmm5 + punpckldq xmm8,xmm9 + punpckldq xmm10,xmm11 + movdqa xmm3,XMMWORD[((K256_shaext-16))] + + movdqa xmm14,xmm12 + movdqa xmm15,xmm13 + punpcklqdq xmm12,xmm8 + punpcklqdq xmm13,xmm10 + punpckhqdq xmm14,xmm8 + punpckhqdq xmm15,xmm10 + + pshufd xmm12,xmm12,27 + pshufd xmm13,xmm13,27 + pshufd xmm14,xmm14,27 + pshufd xmm15,xmm15,27 + jmp NEAR $L$oop_shaext + +ALIGN 32 +$L$oop_shaext: + movdqu xmm4,XMMWORD[r8] + movdqu xmm8,XMMWORD[r9] + movdqu xmm5,XMMWORD[16+r8] + movdqu xmm9,XMMWORD[16+r9] + movdqu xmm6,XMMWORD[32+r8] +DB 102,15,56,0,227 + movdqu xmm10,XMMWORD[32+r9] +DB 102,68,15,56,0,195 + movdqu xmm7,XMMWORD[48+r8] + lea r8,[64+r8] + movdqu xmm11,XMMWORD[48+r9] + lea r9,[64+r9] + + movdqa xmm0,XMMWORD[((0-128))+rbp] +DB 102,15,56,0,235 + paddd xmm0,xmm4 + pxor xmm4,xmm12 + movdqa xmm1,xmm0 + movdqa xmm2,XMMWORD[((0-128))+rbp] +DB 102,68,15,56,0,203 + paddd xmm2,xmm8 + movdqa XMMWORD[80+rsp],xmm13 +DB 69,15,56,203,236 + pxor xmm8,xmm14 + movdqa xmm0,xmm2 + movdqa XMMWORD[112+rsp],xmm15 +DB 69,15,56,203,254 + pshufd xmm0,xmm1,0x0e + pxor xmm4,xmm12 + movdqa XMMWORD[64+rsp],xmm12 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,0x0e + pxor xmm8,xmm14 + movdqa XMMWORD[96+rsp],xmm14 + movdqa xmm1,XMMWORD[((16-128))+rbp] + paddd xmm1,xmm5 +DB 102,15,56,0,243 +DB 69,15,56,203,247 + + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD[((16-128))+rbp] + paddd xmm2,xmm9 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + prefetcht0 [127+r8] +DB 102,15,56,0,251 +DB 102,68,15,56,0,211 + prefetcht0 [127+r9] +DB 69,15,56,203,254 + pshufd xmm0,xmm1,0x0e +DB 102,68,15,56,0,219 +DB 15,56,204,229 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,0x0e + movdqa xmm1,XMMWORD[((32-128))+rbp] + paddd xmm1,xmm6 +DB 69,15,56,203,247 + + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD[((32-128))+rbp] + paddd xmm2,xmm10 +DB 69,15,56,203,236 +DB 69,15,56,204,193 + movdqa xmm0,xmm2 + movdqa xmm3,xmm7 +DB 69,15,56,203,254 + pshufd xmm0,xmm1,0x0e +DB 102,15,58,15,222,4 + paddd xmm4,xmm3 + movdqa xmm3,xmm11 +DB 102,65,15,58,15,218,4 +DB 15,56,204,238 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,0x0e + movdqa xmm1,XMMWORD[((48-128))+rbp] + paddd xmm1,xmm7 +DB 69,15,56,203,247 +DB 69,15,56,204,202 + + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD[((48-128))+rbp] + paddd xmm8,xmm3 + paddd xmm2,xmm11 +DB 15,56,205,231 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm4 +DB 102,15,58,15,223,4 +DB 69,15,56,203,254 +DB 69,15,56,205,195 + pshufd xmm0,xmm1,0x0e + paddd xmm5,xmm3 + movdqa xmm3,xmm8 +DB 102,65,15,58,15,219,4 +DB 15,56,204,247 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,0x0e + movdqa xmm1,XMMWORD[((64-128))+rbp] + paddd xmm1,xmm4 +DB 69,15,56,203,247 +DB 69,15,56,204,211 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD[((64-128))+rbp] + paddd xmm9,xmm3 + paddd xmm2,xmm8 +DB 15,56,205,236 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm5 +DB 102,15,58,15,220,4 +DB 69,15,56,203,254 +DB 69,15,56,205,200 + pshufd xmm0,xmm1,0x0e + paddd xmm6,xmm3 + movdqa xmm3,xmm9 +DB 102,65,15,58,15,216,4 +DB 15,56,204,252 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,0x0e + movdqa xmm1,XMMWORD[((80-128))+rbp] + paddd xmm1,xmm5 +DB 69,15,56,203,247 +DB 69,15,56,204,216 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD[((80-128))+rbp] + paddd xmm10,xmm3 + paddd xmm2,xmm9 +DB 15,56,205,245 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm6 +DB 102,15,58,15,221,4 +DB 69,15,56,203,254 +DB 69,15,56,205,209 + pshufd xmm0,xmm1,0x0e + paddd xmm7,xmm3 + movdqa xmm3,xmm10 +DB 102,65,15,58,15,217,4 +DB 15,56,204,229 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,0x0e + movdqa xmm1,XMMWORD[((96-128))+rbp] + paddd xmm1,xmm6 +DB 69,15,56,203,247 +DB 69,15,56,204,193 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD[((96-128))+rbp] + paddd xmm11,xmm3 + paddd xmm2,xmm10 +DB 15,56,205,254 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm7 +DB 102,15,58,15,222,4 +DB 69,15,56,203,254 +DB 69,15,56,205,218 + pshufd xmm0,xmm1,0x0e + paddd xmm4,xmm3 + movdqa xmm3,xmm11 +DB 102,65,15,58,15,218,4 +DB 15,56,204,238 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,0x0e + movdqa xmm1,XMMWORD[((112-128))+rbp] + paddd xmm1,xmm7 +DB 69,15,56,203,247 +DB 69,15,56,204,202 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD[((112-128))+rbp] + paddd xmm8,xmm3 + paddd xmm2,xmm11 +DB 15,56,205,231 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm4 +DB 102,15,58,15,223,4 +DB 69,15,56,203,254 +DB 69,15,56,205,195 + pshufd xmm0,xmm1,0x0e + paddd xmm5,xmm3 + movdqa xmm3,xmm8 +DB 102,65,15,58,15,219,4 +DB 15,56,204,247 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,0x0e + movdqa xmm1,XMMWORD[((128-128))+rbp] + paddd xmm1,xmm4 +DB 69,15,56,203,247 +DB 69,15,56,204,211 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD[((128-128))+rbp] + paddd xmm9,xmm3 + paddd xmm2,xmm8 +DB 15,56,205,236 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm5 +DB 102,15,58,15,220,4 +DB 69,15,56,203,254 +DB 69,15,56,205,200 + pshufd xmm0,xmm1,0x0e + paddd xmm6,xmm3 + movdqa xmm3,xmm9 +DB 102,65,15,58,15,216,4 +DB 15,56,204,252 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,0x0e + movdqa xmm1,XMMWORD[((144-128))+rbp] + paddd xmm1,xmm5 +DB 69,15,56,203,247 +DB 69,15,56,204,216 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD[((144-128))+rbp] + paddd xmm10,xmm3 + paddd xmm2,xmm9 +DB 15,56,205,245 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm6 +DB 102,15,58,15,221,4 +DB 69,15,56,203,254 +DB 69,15,56,205,209 + pshufd xmm0,xmm1,0x0e + paddd xmm7,xmm3 + movdqa xmm3,xmm10 +DB 102,65,15,58,15,217,4 +DB 15,56,204,229 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,0x0e + movdqa xmm1,XMMWORD[((160-128))+rbp] + paddd xmm1,xmm6 +DB 69,15,56,203,247 +DB 69,15,56,204,193 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD[((160-128))+rbp] + paddd xmm11,xmm3 + paddd xmm2,xmm10 +DB 15,56,205,254 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm7 +DB 102,15,58,15,222,4 +DB 69,15,56,203,254 +DB 69,15,56,205,218 + pshufd xmm0,xmm1,0x0e + paddd xmm4,xmm3 + movdqa xmm3,xmm11 +DB 102,65,15,58,15,218,4 +DB 15,56,204,238 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,0x0e + movdqa xmm1,XMMWORD[((176-128))+rbp] + paddd xmm1,xmm7 +DB 69,15,56,203,247 +DB 69,15,56,204,202 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD[((176-128))+rbp] + paddd xmm8,xmm3 + paddd xmm2,xmm11 +DB 15,56,205,231 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm4 +DB 102,15,58,15,223,4 +DB 69,15,56,203,254 +DB 69,15,56,205,195 + pshufd xmm0,xmm1,0x0e + paddd xmm5,xmm3 + movdqa xmm3,xmm8 +DB 102,65,15,58,15,219,4 +DB 15,56,204,247 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,0x0e + movdqa xmm1,XMMWORD[((192-128))+rbp] + paddd xmm1,xmm4 +DB 69,15,56,203,247 +DB 69,15,56,204,211 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD[((192-128))+rbp] + paddd xmm9,xmm3 + paddd xmm2,xmm8 +DB 15,56,205,236 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm5 +DB 102,15,58,15,220,4 +DB 69,15,56,203,254 +DB 69,15,56,205,200 + pshufd xmm0,xmm1,0x0e + paddd xmm6,xmm3 + movdqa xmm3,xmm9 +DB 102,65,15,58,15,216,4 +DB 15,56,204,252 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,0x0e + movdqa xmm1,XMMWORD[((208-128))+rbp] + paddd xmm1,xmm5 +DB 69,15,56,203,247 +DB 69,15,56,204,216 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD[((208-128))+rbp] + paddd xmm10,xmm3 + paddd xmm2,xmm9 +DB 15,56,205,245 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm6 +DB 102,15,58,15,221,4 +DB 69,15,56,203,254 +DB 69,15,56,205,209 + pshufd xmm0,xmm1,0x0e + paddd xmm7,xmm3 + movdqa xmm3,xmm10 +DB 102,65,15,58,15,217,4 + nop +DB 69,15,56,203,229 + pshufd xmm0,xmm2,0x0e + movdqa xmm1,XMMWORD[((224-128))+rbp] + paddd xmm1,xmm6 +DB 69,15,56,203,247 + + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD[((224-128))+rbp] + paddd xmm11,xmm3 + paddd xmm2,xmm10 +DB 15,56,205,254 + nop +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + mov ecx,1 + pxor xmm6,xmm6 +DB 69,15,56,203,254 +DB 69,15,56,205,218 + pshufd xmm0,xmm1,0x0e + movdqa xmm1,XMMWORD[((240-128))+rbp] + paddd xmm1,xmm7 + movq xmm7,QWORD[rbx] + nop +DB 69,15,56,203,229 + pshufd xmm0,xmm2,0x0e + movdqa xmm2,XMMWORD[((240-128))+rbp] + paddd xmm2,xmm11 +DB 69,15,56,203,247 + + movdqa xmm0,xmm1 + cmp ecx,DWORD[rbx] + cmovge r8,rsp + cmp ecx,DWORD[4+rbx] + cmovge r9,rsp + pshufd xmm9,xmm7,0x00 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + pshufd xmm10,xmm7,0x55 + movdqa xmm11,xmm7 +DB 69,15,56,203,254 + pshufd xmm0,xmm1,0x0e + pcmpgtd xmm9,xmm6 + pcmpgtd xmm10,xmm6 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,0x0e + pcmpgtd xmm11,xmm6 + movdqa xmm3,XMMWORD[((K256_shaext-16))] +DB 69,15,56,203,247 + + pand xmm13,xmm9 + pand xmm15,xmm10 + pand xmm12,xmm9 + pand xmm14,xmm10 + paddd xmm11,xmm7 + + paddd xmm13,XMMWORD[80+rsp] + paddd xmm15,XMMWORD[112+rsp] + paddd xmm12,XMMWORD[64+rsp] + paddd xmm14,XMMWORD[96+rsp] + + movq QWORD[rbx],xmm11 + dec edx + jnz NEAR $L$oop_shaext + + mov edx,DWORD[280+rsp] + + pshufd xmm12,xmm12,27 + pshufd xmm13,xmm13,27 + pshufd xmm14,xmm14,27 + pshufd xmm15,xmm15,27 + + movdqa xmm5,xmm12 + movdqa xmm6,xmm13 + punpckldq xmm12,xmm14 + punpckhdq xmm5,xmm14 + punpckldq xmm13,xmm15 + punpckhdq xmm6,xmm15 + + movq QWORD[(0-128)+rdi],xmm12 + psrldq xmm12,8 + movq QWORD[(128-128)+rdi],xmm5 + psrldq xmm5,8 + movq QWORD[(32-128)+rdi],xmm12 + movq QWORD[(160-128)+rdi],xmm5 + + movq QWORD[(64-128)+rdi],xmm13 + psrldq xmm13,8 + movq QWORD[(192-128)+rdi],xmm6 + psrldq xmm6,8 + movq QWORD[(96-128)+rdi],xmm13 + movq QWORD[(224-128)+rdi],xmm6 + + lea rdi,[8+rdi] + lea rsi,[32+rsi] + dec edx + jnz NEAR $L$oop_grande_shaext + +$L$done_shaext: + + movaps xmm6,XMMWORD[((-184))+rax] + movaps xmm7,XMMWORD[((-168))+rax] + movaps xmm8,XMMWORD[((-152))+rax] + movaps xmm9,XMMWORD[((-136))+rax] + movaps xmm10,XMMWORD[((-120))+rax] + movaps xmm11,XMMWORD[((-104))+rax] + movaps xmm12,XMMWORD[((-88))+rax] + movaps xmm13,XMMWORD[((-72))+rax] + movaps xmm14,XMMWORD[((-56))+rax] + movaps xmm15,XMMWORD[((-40))+rax] + mov rbp,QWORD[((-16))+rax] + + mov rbx,QWORD[((-8))+rax] + + lea rsp,[rax] + +$L$epilogue_shaext: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_sha256_multi_block_shaext: + +ALIGN 32 +sha256_multi_block_avx: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_multi_block_avx: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_avx_shortcut: + shr rcx,32 + cmp edx,2 + jb NEAR $L$avx + test ecx,32 + jnz NEAR _avx2_shortcut + jmp NEAR $L$avx +ALIGN 32 +$L$avx: + mov rax,rsp + + push rbx + + push rbp + + lea rsp,[((-168))+rsp] + movaps XMMWORD[rsp],xmm6 + movaps XMMWORD[16+rsp],xmm7 + movaps XMMWORD[32+rsp],xmm8 + movaps XMMWORD[48+rsp],xmm9 + movaps XMMWORD[(-120)+rax],xmm10 + movaps XMMWORD[(-104)+rax],xmm11 + movaps XMMWORD[(-88)+rax],xmm12 + movaps XMMWORD[(-72)+rax],xmm13 + movaps XMMWORD[(-56)+rax],xmm14 + movaps XMMWORD[(-40)+rax],xmm15 + sub rsp,288 + and rsp,-256 + mov QWORD[272+rsp],rax + +$L$body_avx: + lea rbp,[((K256+128))] + lea rbx,[256+rsp] + lea rdi,[128+rdi] + +$L$oop_grande_avx: + mov DWORD[280+rsp],edx + xor edx,edx + mov r8,QWORD[rsi] + mov ecx,DWORD[8+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[rbx],ecx + cmovle r8,rbp + mov r9,QWORD[16+rsi] + mov ecx,DWORD[24+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[4+rbx],ecx + cmovle r9,rbp + mov r10,QWORD[32+rsi] + mov ecx,DWORD[40+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[8+rbx],ecx + cmovle r10,rbp + mov r11,QWORD[48+rsi] + mov ecx,DWORD[56+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[12+rbx],ecx + cmovle r11,rbp + test edx,edx + jz NEAR $L$done_avx + + vmovdqu xmm8,XMMWORD[((0-128))+rdi] + lea rax,[128+rsp] + vmovdqu xmm9,XMMWORD[((32-128))+rdi] + vmovdqu xmm10,XMMWORD[((64-128))+rdi] + vmovdqu xmm11,XMMWORD[((96-128))+rdi] + vmovdqu xmm12,XMMWORD[((128-128))+rdi] + vmovdqu xmm13,XMMWORD[((160-128))+rdi] + vmovdqu xmm14,XMMWORD[((192-128))+rdi] + vmovdqu xmm15,XMMWORD[((224-128))+rdi] + vmovdqu xmm6,XMMWORD[$L$pbswap] + jmp NEAR $L$oop_avx + +ALIGN 32 +$L$oop_avx: + vpxor xmm4,xmm10,xmm9 + vmovd xmm5,DWORD[r8] + vmovd xmm0,DWORD[r9] + vpinsrd xmm5,xmm5,DWORD[r10],1 + vpinsrd xmm0,xmm0,DWORD[r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm12,6 + vpslld xmm2,xmm12,26 + vmovdqu XMMWORD[(0-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm15 + + vpsrld xmm1,xmm12,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm12,21 + vpaddd xmm5,xmm5,XMMWORD[((-128))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm12,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm12,7 + vpandn xmm0,xmm12,xmm14 + vpand xmm3,xmm12,xmm13 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm15,xmm8,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm8,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm9,xmm8 + + vpxor xmm15,xmm15,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm8,13 + + vpslld xmm2,xmm8,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm15,xmm1 + + vpsrld xmm1,xmm8,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm8,10 + vpxor xmm15,xmm9,xmm4 + vpaddd xmm11,xmm11,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm15,xmm15,xmm5 + vpaddd xmm15,xmm15,xmm7 + vmovd xmm5,DWORD[4+r8] + vmovd xmm0,DWORD[4+r9] + vpinsrd xmm5,xmm5,DWORD[4+r10],1 + vpinsrd xmm0,xmm0,DWORD[4+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm11,6 + vpslld xmm2,xmm11,26 + vmovdqu XMMWORD[(16-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm14 + + vpsrld xmm1,xmm11,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm11,21 + vpaddd xmm5,xmm5,XMMWORD[((-96))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm11,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm11,7 + vpandn xmm0,xmm11,xmm13 + vpand xmm4,xmm11,xmm12 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm14,xmm15,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm15,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm8,xmm15 + + vpxor xmm14,xmm14,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm15,13 + + vpslld xmm2,xmm15,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm14,xmm1 + + vpsrld xmm1,xmm15,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm15,10 + vpxor xmm14,xmm8,xmm3 + vpaddd xmm10,xmm10,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm14,xmm14,xmm5 + vpaddd xmm14,xmm14,xmm7 + vmovd xmm5,DWORD[8+r8] + vmovd xmm0,DWORD[8+r9] + vpinsrd xmm5,xmm5,DWORD[8+r10],1 + vpinsrd xmm0,xmm0,DWORD[8+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm10,6 + vpslld xmm2,xmm10,26 + vmovdqu XMMWORD[(32-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm13 + + vpsrld xmm1,xmm10,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm10,21 + vpaddd xmm5,xmm5,XMMWORD[((-64))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm10,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm10,7 + vpandn xmm0,xmm10,xmm12 + vpand xmm3,xmm10,xmm11 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm13,xmm14,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm14,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm15,xmm14 + + vpxor xmm13,xmm13,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm14,13 + + vpslld xmm2,xmm14,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm13,xmm1 + + vpsrld xmm1,xmm14,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm14,10 + vpxor xmm13,xmm15,xmm4 + vpaddd xmm9,xmm9,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm13,xmm13,xmm5 + vpaddd xmm13,xmm13,xmm7 + vmovd xmm5,DWORD[12+r8] + vmovd xmm0,DWORD[12+r9] + vpinsrd xmm5,xmm5,DWORD[12+r10],1 + vpinsrd xmm0,xmm0,DWORD[12+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm9,6 + vpslld xmm2,xmm9,26 + vmovdqu XMMWORD[(48-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm12 + + vpsrld xmm1,xmm9,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm9,21 + vpaddd xmm5,xmm5,XMMWORD[((-32))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm9,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm9,7 + vpandn xmm0,xmm9,xmm11 + vpand xmm4,xmm9,xmm10 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm12,xmm13,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm13,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm14,xmm13 + + vpxor xmm12,xmm12,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm13,13 + + vpslld xmm2,xmm13,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm12,xmm1 + + vpsrld xmm1,xmm13,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm13,10 + vpxor xmm12,xmm14,xmm3 + vpaddd xmm8,xmm8,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm12,xmm12,xmm5 + vpaddd xmm12,xmm12,xmm7 + vmovd xmm5,DWORD[16+r8] + vmovd xmm0,DWORD[16+r9] + vpinsrd xmm5,xmm5,DWORD[16+r10],1 + vpinsrd xmm0,xmm0,DWORD[16+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm8,6 + vpslld xmm2,xmm8,26 + vmovdqu XMMWORD[(64-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm11 + + vpsrld xmm1,xmm8,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm8,21 + vpaddd xmm5,xmm5,XMMWORD[rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm8,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm8,7 + vpandn xmm0,xmm8,xmm10 + vpand xmm3,xmm8,xmm9 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm11,xmm12,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm12,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm13,xmm12 + + vpxor xmm11,xmm11,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm12,13 + + vpslld xmm2,xmm12,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm11,xmm1 + + vpsrld xmm1,xmm12,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm12,10 + vpxor xmm11,xmm13,xmm4 + vpaddd xmm15,xmm15,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm11,xmm11,xmm5 + vpaddd xmm11,xmm11,xmm7 + vmovd xmm5,DWORD[20+r8] + vmovd xmm0,DWORD[20+r9] + vpinsrd xmm5,xmm5,DWORD[20+r10],1 + vpinsrd xmm0,xmm0,DWORD[20+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm15,6 + vpslld xmm2,xmm15,26 + vmovdqu XMMWORD[(80-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm10 + + vpsrld xmm1,xmm15,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm15,21 + vpaddd xmm5,xmm5,XMMWORD[32+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm15,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm15,7 + vpandn xmm0,xmm15,xmm9 + vpand xmm4,xmm15,xmm8 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm10,xmm11,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm11,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm12,xmm11 + + vpxor xmm10,xmm10,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm11,13 + + vpslld xmm2,xmm11,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm10,xmm1 + + vpsrld xmm1,xmm11,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm11,10 + vpxor xmm10,xmm12,xmm3 + vpaddd xmm14,xmm14,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm10,xmm10,xmm5 + vpaddd xmm10,xmm10,xmm7 + vmovd xmm5,DWORD[24+r8] + vmovd xmm0,DWORD[24+r9] + vpinsrd xmm5,xmm5,DWORD[24+r10],1 + vpinsrd xmm0,xmm0,DWORD[24+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm14,6 + vpslld xmm2,xmm14,26 + vmovdqu XMMWORD[(96-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm9 + + vpsrld xmm1,xmm14,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm14,21 + vpaddd xmm5,xmm5,XMMWORD[64+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm14,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm14,7 + vpandn xmm0,xmm14,xmm8 + vpand xmm3,xmm14,xmm15 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm9,xmm10,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm10,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm11,xmm10 + + vpxor xmm9,xmm9,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm10,13 + + vpslld xmm2,xmm10,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm9,xmm1 + + vpsrld xmm1,xmm10,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm10,10 + vpxor xmm9,xmm11,xmm4 + vpaddd xmm13,xmm13,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm9,xmm9,xmm5 + vpaddd xmm9,xmm9,xmm7 + vmovd xmm5,DWORD[28+r8] + vmovd xmm0,DWORD[28+r9] + vpinsrd xmm5,xmm5,DWORD[28+r10],1 + vpinsrd xmm0,xmm0,DWORD[28+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm13,6 + vpslld xmm2,xmm13,26 + vmovdqu XMMWORD[(112-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm8 + + vpsrld xmm1,xmm13,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm13,21 + vpaddd xmm5,xmm5,XMMWORD[96+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm13,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm13,7 + vpandn xmm0,xmm13,xmm15 + vpand xmm4,xmm13,xmm14 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm8,xmm9,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm9,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm10,xmm9 + + vpxor xmm8,xmm8,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm9,13 + + vpslld xmm2,xmm9,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm8,xmm1 + + vpsrld xmm1,xmm9,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm9,10 + vpxor xmm8,xmm10,xmm3 + vpaddd xmm12,xmm12,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm8,xmm8,xmm5 + vpaddd xmm8,xmm8,xmm7 + add rbp,256 + vmovd xmm5,DWORD[32+r8] + vmovd xmm0,DWORD[32+r9] + vpinsrd xmm5,xmm5,DWORD[32+r10],1 + vpinsrd xmm0,xmm0,DWORD[32+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm12,6 + vpslld xmm2,xmm12,26 + vmovdqu XMMWORD[(128-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm15 + + vpsrld xmm1,xmm12,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm12,21 + vpaddd xmm5,xmm5,XMMWORD[((-128))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm12,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm12,7 + vpandn xmm0,xmm12,xmm14 + vpand xmm3,xmm12,xmm13 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm15,xmm8,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm8,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm9,xmm8 + + vpxor xmm15,xmm15,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm8,13 + + vpslld xmm2,xmm8,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm15,xmm1 + + vpsrld xmm1,xmm8,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm8,10 + vpxor xmm15,xmm9,xmm4 + vpaddd xmm11,xmm11,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm15,xmm15,xmm5 + vpaddd xmm15,xmm15,xmm7 + vmovd xmm5,DWORD[36+r8] + vmovd xmm0,DWORD[36+r9] + vpinsrd xmm5,xmm5,DWORD[36+r10],1 + vpinsrd xmm0,xmm0,DWORD[36+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm11,6 + vpslld xmm2,xmm11,26 + vmovdqu XMMWORD[(144-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm14 + + vpsrld xmm1,xmm11,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm11,21 + vpaddd xmm5,xmm5,XMMWORD[((-96))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm11,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm11,7 + vpandn xmm0,xmm11,xmm13 + vpand xmm4,xmm11,xmm12 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm14,xmm15,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm15,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm8,xmm15 + + vpxor xmm14,xmm14,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm15,13 + + vpslld xmm2,xmm15,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm14,xmm1 + + vpsrld xmm1,xmm15,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm15,10 + vpxor xmm14,xmm8,xmm3 + vpaddd xmm10,xmm10,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm14,xmm14,xmm5 + vpaddd xmm14,xmm14,xmm7 + vmovd xmm5,DWORD[40+r8] + vmovd xmm0,DWORD[40+r9] + vpinsrd xmm5,xmm5,DWORD[40+r10],1 + vpinsrd xmm0,xmm0,DWORD[40+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm10,6 + vpslld xmm2,xmm10,26 + vmovdqu XMMWORD[(160-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm13 + + vpsrld xmm1,xmm10,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm10,21 + vpaddd xmm5,xmm5,XMMWORD[((-64))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm10,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm10,7 + vpandn xmm0,xmm10,xmm12 + vpand xmm3,xmm10,xmm11 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm13,xmm14,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm14,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm15,xmm14 + + vpxor xmm13,xmm13,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm14,13 + + vpslld xmm2,xmm14,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm13,xmm1 + + vpsrld xmm1,xmm14,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm14,10 + vpxor xmm13,xmm15,xmm4 + vpaddd xmm9,xmm9,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm13,xmm13,xmm5 + vpaddd xmm13,xmm13,xmm7 + vmovd xmm5,DWORD[44+r8] + vmovd xmm0,DWORD[44+r9] + vpinsrd xmm5,xmm5,DWORD[44+r10],1 + vpinsrd xmm0,xmm0,DWORD[44+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm9,6 + vpslld xmm2,xmm9,26 + vmovdqu XMMWORD[(176-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm12 + + vpsrld xmm1,xmm9,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm9,21 + vpaddd xmm5,xmm5,XMMWORD[((-32))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm9,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm9,7 + vpandn xmm0,xmm9,xmm11 + vpand xmm4,xmm9,xmm10 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm12,xmm13,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm13,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm14,xmm13 + + vpxor xmm12,xmm12,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm13,13 + + vpslld xmm2,xmm13,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm12,xmm1 + + vpsrld xmm1,xmm13,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm13,10 + vpxor xmm12,xmm14,xmm3 + vpaddd xmm8,xmm8,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm12,xmm12,xmm5 + vpaddd xmm12,xmm12,xmm7 + vmovd xmm5,DWORD[48+r8] + vmovd xmm0,DWORD[48+r9] + vpinsrd xmm5,xmm5,DWORD[48+r10],1 + vpinsrd xmm0,xmm0,DWORD[48+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm8,6 + vpslld xmm2,xmm8,26 + vmovdqu XMMWORD[(192-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm11 + + vpsrld xmm1,xmm8,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm8,21 + vpaddd xmm5,xmm5,XMMWORD[rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm8,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm8,7 + vpandn xmm0,xmm8,xmm10 + vpand xmm3,xmm8,xmm9 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm11,xmm12,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm12,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm13,xmm12 + + vpxor xmm11,xmm11,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm12,13 + + vpslld xmm2,xmm12,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm11,xmm1 + + vpsrld xmm1,xmm12,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm12,10 + vpxor xmm11,xmm13,xmm4 + vpaddd xmm15,xmm15,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm11,xmm11,xmm5 + vpaddd xmm11,xmm11,xmm7 + vmovd xmm5,DWORD[52+r8] + vmovd xmm0,DWORD[52+r9] + vpinsrd xmm5,xmm5,DWORD[52+r10],1 + vpinsrd xmm0,xmm0,DWORD[52+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm15,6 + vpslld xmm2,xmm15,26 + vmovdqu XMMWORD[(208-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm10 + + vpsrld xmm1,xmm15,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm15,21 + vpaddd xmm5,xmm5,XMMWORD[32+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm15,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm15,7 + vpandn xmm0,xmm15,xmm9 + vpand xmm4,xmm15,xmm8 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm10,xmm11,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm11,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm12,xmm11 + + vpxor xmm10,xmm10,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm11,13 + + vpslld xmm2,xmm11,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm10,xmm1 + + vpsrld xmm1,xmm11,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm11,10 + vpxor xmm10,xmm12,xmm3 + vpaddd xmm14,xmm14,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm10,xmm10,xmm5 + vpaddd xmm10,xmm10,xmm7 + vmovd xmm5,DWORD[56+r8] + vmovd xmm0,DWORD[56+r9] + vpinsrd xmm5,xmm5,DWORD[56+r10],1 + vpinsrd xmm0,xmm0,DWORD[56+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm14,6 + vpslld xmm2,xmm14,26 + vmovdqu XMMWORD[(224-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm9 + + vpsrld xmm1,xmm14,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm14,21 + vpaddd xmm5,xmm5,XMMWORD[64+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm14,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm14,7 + vpandn xmm0,xmm14,xmm8 + vpand xmm3,xmm14,xmm15 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm9,xmm10,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm10,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm11,xmm10 + + vpxor xmm9,xmm9,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm10,13 + + vpslld xmm2,xmm10,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm9,xmm1 + + vpsrld xmm1,xmm10,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm10,10 + vpxor xmm9,xmm11,xmm4 + vpaddd xmm13,xmm13,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm9,xmm9,xmm5 + vpaddd xmm9,xmm9,xmm7 + vmovd xmm5,DWORD[60+r8] + lea r8,[64+r8] + vmovd xmm0,DWORD[60+r9] + lea r9,[64+r9] + vpinsrd xmm5,xmm5,DWORD[60+r10],1 + lea r10,[64+r10] + vpinsrd xmm0,xmm0,DWORD[60+r11],1 + lea r11,[64+r11] + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm13,6 + vpslld xmm2,xmm13,26 + vmovdqu XMMWORD[(240-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm8 + + vpsrld xmm1,xmm13,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm13,21 + vpaddd xmm5,xmm5,XMMWORD[96+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm13,25 + vpxor xmm7,xmm7,xmm2 + prefetcht0 [63+r8] + vpslld xmm2,xmm13,7 + vpandn xmm0,xmm13,xmm15 + vpand xmm4,xmm13,xmm14 + prefetcht0 [63+r9] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm8,xmm9,2 + vpxor xmm7,xmm7,xmm2 + prefetcht0 [63+r10] + vpslld xmm1,xmm9,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm10,xmm9 + prefetcht0 [63+r11] + vpxor xmm8,xmm8,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm9,13 + + vpslld xmm2,xmm9,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm8,xmm1 + + vpsrld xmm1,xmm9,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm9,10 + vpxor xmm8,xmm10,xmm3 + vpaddd xmm12,xmm12,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm8,xmm8,xmm5 + vpaddd xmm8,xmm8,xmm7 + add rbp,256 + vmovdqu xmm5,XMMWORD[((0-128))+rax] + mov ecx,3 + jmp NEAR $L$oop_16_xx_avx +ALIGN 32 +$L$oop_16_xx_avx: + vmovdqu xmm6,XMMWORD[((16-128))+rax] + vpaddd xmm5,xmm5,XMMWORD[((144-128))+rax] + + vpsrld xmm7,xmm6,3 + vpsrld xmm1,xmm6,7 + vpslld xmm2,xmm6,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm6,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm6,14 + vmovdqu xmm0,XMMWORD[((224-128))+rax] + vpsrld xmm3,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm5,xmm5,xmm7 + vpxor xmm7,xmm3,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm5,xmm5,xmm7 + vpsrld xmm7,xmm12,6 + vpslld xmm2,xmm12,26 + vmovdqu XMMWORD[(0-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm15 + + vpsrld xmm1,xmm12,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm12,21 + vpaddd xmm5,xmm5,XMMWORD[((-128))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm12,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm12,7 + vpandn xmm0,xmm12,xmm14 + vpand xmm3,xmm12,xmm13 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm15,xmm8,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm8,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm9,xmm8 + + vpxor xmm15,xmm15,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm8,13 + + vpslld xmm2,xmm8,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm15,xmm1 + + vpsrld xmm1,xmm8,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm8,10 + vpxor xmm15,xmm9,xmm4 + vpaddd xmm11,xmm11,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm15,xmm15,xmm5 + vpaddd xmm15,xmm15,xmm7 + vmovdqu xmm5,XMMWORD[((32-128))+rax] + vpaddd xmm6,xmm6,XMMWORD[((160-128))+rax] + + vpsrld xmm7,xmm5,3 + vpsrld xmm1,xmm5,7 + vpslld xmm2,xmm5,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm5,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm5,14 + vmovdqu xmm0,XMMWORD[((240-128))+rax] + vpsrld xmm4,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm6,xmm6,xmm7 + vpxor xmm7,xmm4,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm6,xmm6,xmm7 + vpsrld xmm7,xmm11,6 + vpslld xmm2,xmm11,26 + vmovdqu XMMWORD[(16-128)+rax],xmm6 + vpaddd xmm6,xmm6,xmm14 + + vpsrld xmm1,xmm11,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm11,21 + vpaddd xmm6,xmm6,XMMWORD[((-96))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm11,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm11,7 + vpandn xmm0,xmm11,xmm13 + vpand xmm4,xmm11,xmm12 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm14,xmm15,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm15,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm8,xmm15 + + vpxor xmm14,xmm14,xmm1 + vpaddd xmm6,xmm6,xmm7 + + vpsrld xmm1,xmm15,13 + + vpslld xmm2,xmm15,19 + vpaddd xmm6,xmm6,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm14,xmm1 + + vpsrld xmm1,xmm15,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm15,10 + vpxor xmm14,xmm8,xmm3 + vpaddd xmm10,xmm10,xmm6 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm14,xmm14,xmm6 + vpaddd xmm14,xmm14,xmm7 + vmovdqu xmm6,XMMWORD[((48-128))+rax] + vpaddd xmm5,xmm5,XMMWORD[((176-128))+rax] + + vpsrld xmm7,xmm6,3 + vpsrld xmm1,xmm6,7 + vpslld xmm2,xmm6,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm6,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm6,14 + vmovdqu xmm0,XMMWORD[((0-128))+rax] + vpsrld xmm3,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm5,xmm5,xmm7 + vpxor xmm7,xmm3,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm5,xmm5,xmm7 + vpsrld xmm7,xmm10,6 + vpslld xmm2,xmm10,26 + vmovdqu XMMWORD[(32-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm13 + + vpsrld xmm1,xmm10,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm10,21 + vpaddd xmm5,xmm5,XMMWORD[((-64))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm10,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm10,7 + vpandn xmm0,xmm10,xmm12 + vpand xmm3,xmm10,xmm11 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm13,xmm14,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm14,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm15,xmm14 + + vpxor xmm13,xmm13,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm14,13 + + vpslld xmm2,xmm14,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm13,xmm1 + + vpsrld xmm1,xmm14,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm14,10 + vpxor xmm13,xmm15,xmm4 + vpaddd xmm9,xmm9,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm13,xmm13,xmm5 + vpaddd xmm13,xmm13,xmm7 + vmovdqu xmm5,XMMWORD[((64-128))+rax] + vpaddd xmm6,xmm6,XMMWORD[((192-128))+rax] + + vpsrld xmm7,xmm5,3 + vpsrld xmm1,xmm5,7 + vpslld xmm2,xmm5,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm5,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm5,14 + vmovdqu xmm0,XMMWORD[((16-128))+rax] + vpsrld xmm4,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm6,xmm6,xmm7 + vpxor xmm7,xmm4,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm6,xmm6,xmm7 + vpsrld xmm7,xmm9,6 + vpslld xmm2,xmm9,26 + vmovdqu XMMWORD[(48-128)+rax],xmm6 + vpaddd xmm6,xmm6,xmm12 + + vpsrld xmm1,xmm9,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm9,21 + vpaddd xmm6,xmm6,XMMWORD[((-32))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm9,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm9,7 + vpandn xmm0,xmm9,xmm11 + vpand xmm4,xmm9,xmm10 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm12,xmm13,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm13,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm14,xmm13 + + vpxor xmm12,xmm12,xmm1 + vpaddd xmm6,xmm6,xmm7 + + vpsrld xmm1,xmm13,13 + + vpslld xmm2,xmm13,19 + vpaddd xmm6,xmm6,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm12,xmm1 + + vpsrld xmm1,xmm13,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm13,10 + vpxor xmm12,xmm14,xmm3 + vpaddd xmm8,xmm8,xmm6 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm12,xmm12,xmm6 + vpaddd xmm12,xmm12,xmm7 + vmovdqu xmm6,XMMWORD[((80-128))+rax] + vpaddd xmm5,xmm5,XMMWORD[((208-128))+rax] + + vpsrld xmm7,xmm6,3 + vpsrld xmm1,xmm6,7 + vpslld xmm2,xmm6,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm6,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm6,14 + vmovdqu xmm0,XMMWORD[((32-128))+rax] + vpsrld xmm3,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm5,xmm5,xmm7 + vpxor xmm7,xmm3,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm5,xmm5,xmm7 + vpsrld xmm7,xmm8,6 + vpslld xmm2,xmm8,26 + vmovdqu XMMWORD[(64-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm11 + + vpsrld xmm1,xmm8,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm8,21 + vpaddd xmm5,xmm5,XMMWORD[rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm8,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm8,7 + vpandn xmm0,xmm8,xmm10 + vpand xmm3,xmm8,xmm9 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm11,xmm12,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm12,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm13,xmm12 + + vpxor xmm11,xmm11,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm12,13 + + vpslld xmm2,xmm12,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm11,xmm1 + + vpsrld xmm1,xmm12,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm12,10 + vpxor xmm11,xmm13,xmm4 + vpaddd xmm15,xmm15,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm11,xmm11,xmm5 + vpaddd xmm11,xmm11,xmm7 + vmovdqu xmm5,XMMWORD[((96-128))+rax] + vpaddd xmm6,xmm6,XMMWORD[((224-128))+rax] + + vpsrld xmm7,xmm5,3 + vpsrld xmm1,xmm5,7 + vpslld xmm2,xmm5,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm5,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm5,14 + vmovdqu xmm0,XMMWORD[((48-128))+rax] + vpsrld xmm4,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm6,xmm6,xmm7 + vpxor xmm7,xmm4,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm6,xmm6,xmm7 + vpsrld xmm7,xmm15,6 + vpslld xmm2,xmm15,26 + vmovdqu XMMWORD[(80-128)+rax],xmm6 + vpaddd xmm6,xmm6,xmm10 + + vpsrld xmm1,xmm15,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm15,21 + vpaddd xmm6,xmm6,XMMWORD[32+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm15,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm15,7 + vpandn xmm0,xmm15,xmm9 + vpand xmm4,xmm15,xmm8 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm10,xmm11,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm11,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm12,xmm11 + + vpxor xmm10,xmm10,xmm1 + vpaddd xmm6,xmm6,xmm7 + + vpsrld xmm1,xmm11,13 + + vpslld xmm2,xmm11,19 + vpaddd xmm6,xmm6,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm10,xmm1 + + vpsrld xmm1,xmm11,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm11,10 + vpxor xmm10,xmm12,xmm3 + vpaddd xmm14,xmm14,xmm6 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm10,xmm10,xmm6 + vpaddd xmm10,xmm10,xmm7 + vmovdqu xmm6,XMMWORD[((112-128))+rax] + vpaddd xmm5,xmm5,XMMWORD[((240-128))+rax] + + vpsrld xmm7,xmm6,3 + vpsrld xmm1,xmm6,7 + vpslld xmm2,xmm6,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm6,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm6,14 + vmovdqu xmm0,XMMWORD[((64-128))+rax] + vpsrld xmm3,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm5,xmm5,xmm7 + vpxor xmm7,xmm3,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm5,xmm5,xmm7 + vpsrld xmm7,xmm14,6 + vpslld xmm2,xmm14,26 + vmovdqu XMMWORD[(96-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm9 + + vpsrld xmm1,xmm14,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm14,21 + vpaddd xmm5,xmm5,XMMWORD[64+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm14,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm14,7 + vpandn xmm0,xmm14,xmm8 + vpand xmm3,xmm14,xmm15 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm9,xmm10,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm10,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm11,xmm10 + + vpxor xmm9,xmm9,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm10,13 + + vpslld xmm2,xmm10,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm9,xmm1 + + vpsrld xmm1,xmm10,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm10,10 + vpxor xmm9,xmm11,xmm4 + vpaddd xmm13,xmm13,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm9,xmm9,xmm5 + vpaddd xmm9,xmm9,xmm7 + vmovdqu xmm5,XMMWORD[((128-128))+rax] + vpaddd xmm6,xmm6,XMMWORD[((0-128))+rax] + + vpsrld xmm7,xmm5,3 + vpsrld xmm1,xmm5,7 + vpslld xmm2,xmm5,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm5,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm5,14 + vmovdqu xmm0,XMMWORD[((80-128))+rax] + vpsrld xmm4,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm6,xmm6,xmm7 + vpxor xmm7,xmm4,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm6,xmm6,xmm7 + vpsrld xmm7,xmm13,6 + vpslld xmm2,xmm13,26 + vmovdqu XMMWORD[(112-128)+rax],xmm6 + vpaddd xmm6,xmm6,xmm8 + + vpsrld xmm1,xmm13,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm13,21 + vpaddd xmm6,xmm6,XMMWORD[96+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm13,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm13,7 + vpandn xmm0,xmm13,xmm15 + vpand xmm4,xmm13,xmm14 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm8,xmm9,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm9,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm10,xmm9 + + vpxor xmm8,xmm8,xmm1 + vpaddd xmm6,xmm6,xmm7 + + vpsrld xmm1,xmm9,13 + + vpslld xmm2,xmm9,19 + vpaddd xmm6,xmm6,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm8,xmm1 + + vpsrld xmm1,xmm9,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm9,10 + vpxor xmm8,xmm10,xmm3 + vpaddd xmm12,xmm12,xmm6 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm8,xmm8,xmm6 + vpaddd xmm8,xmm8,xmm7 + add rbp,256 + vmovdqu xmm6,XMMWORD[((144-128))+rax] + vpaddd xmm5,xmm5,XMMWORD[((16-128))+rax] + + vpsrld xmm7,xmm6,3 + vpsrld xmm1,xmm6,7 + vpslld xmm2,xmm6,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm6,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm6,14 + vmovdqu xmm0,XMMWORD[((96-128))+rax] + vpsrld xmm3,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm5,xmm5,xmm7 + vpxor xmm7,xmm3,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm5,xmm5,xmm7 + vpsrld xmm7,xmm12,6 + vpslld xmm2,xmm12,26 + vmovdqu XMMWORD[(128-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm15 + + vpsrld xmm1,xmm12,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm12,21 + vpaddd xmm5,xmm5,XMMWORD[((-128))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm12,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm12,7 + vpandn xmm0,xmm12,xmm14 + vpand xmm3,xmm12,xmm13 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm15,xmm8,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm8,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm9,xmm8 + + vpxor xmm15,xmm15,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm8,13 + + vpslld xmm2,xmm8,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm15,xmm1 + + vpsrld xmm1,xmm8,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm8,10 + vpxor xmm15,xmm9,xmm4 + vpaddd xmm11,xmm11,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm15,xmm15,xmm5 + vpaddd xmm15,xmm15,xmm7 + vmovdqu xmm5,XMMWORD[((160-128))+rax] + vpaddd xmm6,xmm6,XMMWORD[((32-128))+rax] + + vpsrld xmm7,xmm5,3 + vpsrld xmm1,xmm5,7 + vpslld xmm2,xmm5,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm5,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm5,14 + vmovdqu xmm0,XMMWORD[((112-128))+rax] + vpsrld xmm4,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm6,xmm6,xmm7 + vpxor xmm7,xmm4,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm6,xmm6,xmm7 + vpsrld xmm7,xmm11,6 + vpslld xmm2,xmm11,26 + vmovdqu XMMWORD[(144-128)+rax],xmm6 + vpaddd xmm6,xmm6,xmm14 + + vpsrld xmm1,xmm11,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm11,21 + vpaddd xmm6,xmm6,XMMWORD[((-96))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm11,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm11,7 + vpandn xmm0,xmm11,xmm13 + vpand xmm4,xmm11,xmm12 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm14,xmm15,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm15,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm8,xmm15 + + vpxor xmm14,xmm14,xmm1 + vpaddd xmm6,xmm6,xmm7 + + vpsrld xmm1,xmm15,13 + + vpslld xmm2,xmm15,19 + vpaddd xmm6,xmm6,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm14,xmm1 + + vpsrld xmm1,xmm15,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm15,10 + vpxor xmm14,xmm8,xmm3 + vpaddd xmm10,xmm10,xmm6 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm14,xmm14,xmm6 + vpaddd xmm14,xmm14,xmm7 + vmovdqu xmm6,XMMWORD[((176-128))+rax] + vpaddd xmm5,xmm5,XMMWORD[((48-128))+rax] + + vpsrld xmm7,xmm6,3 + vpsrld xmm1,xmm6,7 + vpslld xmm2,xmm6,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm6,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm6,14 + vmovdqu xmm0,XMMWORD[((128-128))+rax] + vpsrld xmm3,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm5,xmm5,xmm7 + vpxor xmm7,xmm3,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm5,xmm5,xmm7 + vpsrld xmm7,xmm10,6 + vpslld xmm2,xmm10,26 + vmovdqu XMMWORD[(160-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm13 + + vpsrld xmm1,xmm10,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm10,21 + vpaddd xmm5,xmm5,XMMWORD[((-64))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm10,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm10,7 + vpandn xmm0,xmm10,xmm12 + vpand xmm3,xmm10,xmm11 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm13,xmm14,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm14,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm15,xmm14 + + vpxor xmm13,xmm13,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm14,13 + + vpslld xmm2,xmm14,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm13,xmm1 + + vpsrld xmm1,xmm14,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm14,10 + vpxor xmm13,xmm15,xmm4 + vpaddd xmm9,xmm9,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm13,xmm13,xmm5 + vpaddd xmm13,xmm13,xmm7 + vmovdqu xmm5,XMMWORD[((192-128))+rax] + vpaddd xmm6,xmm6,XMMWORD[((64-128))+rax] + + vpsrld xmm7,xmm5,3 + vpsrld xmm1,xmm5,7 + vpslld xmm2,xmm5,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm5,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm5,14 + vmovdqu xmm0,XMMWORD[((144-128))+rax] + vpsrld xmm4,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm6,xmm6,xmm7 + vpxor xmm7,xmm4,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm6,xmm6,xmm7 + vpsrld xmm7,xmm9,6 + vpslld xmm2,xmm9,26 + vmovdqu XMMWORD[(176-128)+rax],xmm6 + vpaddd xmm6,xmm6,xmm12 + + vpsrld xmm1,xmm9,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm9,21 + vpaddd xmm6,xmm6,XMMWORD[((-32))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm9,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm9,7 + vpandn xmm0,xmm9,xmm11 + vpand xmm4,xmm9,xmm10 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm12,xmm13,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm13,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm14,xmm13 + + vpxor xmm12,xmm12,xmm1 + vpaddd xmm6,xmm6,xmm7 + + vpsrld xmm1,xmm13,13 + + vpslld xmm2,xmm13,19 + vpaddd xmm6,xmm6,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm12,xmm1 + + vpsrld xmm1,xmm13,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm13,10 + vpxor xmm12,xmm14,xmm3 + vpaddd xmm8,xmm8,xmm6 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm12,xmm12,xmm6 + vpaddd xmm12,xmm12,xmm7 + vmovdqu xmm6,XMMWORD[((208-128))+rax] + vpaddd xmm5,xmm5,XMMWORD[((80-128))+rax] + + vpsrld xmm7,xmm6,3 + vpsrld xmm1,xmm6,7 + vpslld xmm2,xmm6,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm6,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm6,14 + vmovdqu xmm0,XMMWORD[((160-128))+rax] + vpsrld xmm3,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm5,xmm5,xmm7 + vpxor xmm7,xmm3,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm5,xmm5,xmm7 + vpsrld xmm7,xmm8,6 + vpslld xmm2,xmm8,26 + vmovdqu XMMWORD[(192-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm11 + + vpsrld xmm1,xmm8,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm8,21 + vpaddd xmm5,xmm5,XMMWORD[rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm8,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm8,7 + vpandn xmm0,xmm8,xmm10 + vpand xmm3,xmm8,xmm9 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm11,xmm12,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm12,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm13,xmm12 + + vpxor xmm11,xmm11,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm12,13 + + vpslld xmm2,xmm12,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm11,xmm1 + + vpsrld xmm1,xmm12,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm12,10 + vpxor xmm11,xmm13,xmm4 + vpaddd xmm15,xmm15,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm11,xmm11,xmm5 + vpaddd xmm11,xmm11,xmm7 + vmovdqu xmm5,XMMWORD[((224-128))+rax] + vpaddd xmm6,xmm6,XMMWORD[((96-128))+rax] + + vpsrld xmm7,xmm5,3 + vpsrld xmm1,xmm5,7 + vpslld xmm2,xmm5,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm5,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm5,14 + vmovdqu xmm0,XMMWORD[((176-128))+rax] + vpsrld xmm4,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm6,xmm6,xmm7 + vpxor xmm7,xmm4,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm6,xmm6,xmm7 + vpsrld xmm7,xmm15,6 + vpslld xmm2,xmm15,26 + vmovdqu XMMWORD[(208-128)+rax],xmm6 + vpaddd xmm6,xmm6,xmm10 + + vpsrld xmm1,xmm15,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm15,21 + vpaddd xmm6,xmm6,XMMWORD[32+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm15,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm15,7 + vpandn xmm0,xmm15,xmm9 + vpand xmm4,xmm15,xmm8 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm10,xmm11,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm11,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm12,xmm11 + + vpxor xmm10,xmm10,xmm1 + vpaddd xmm6,xmm6,xmm7 + + vpsrld xmm1,xmm11,13 + + vpslld xmm2,xmm11,19 + vpaddd xmm6,xmm6,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm10,xmm1 + + vpsrld xmm1,xmm11,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm11,10 + vpxor xmm10,xmm12,xmm3 + vpaddd xmm14,xmm14,xmm6 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm10,xmm10,xmm6 + vpaddd xmm10,xmm10,xmm7 + vmovdqu xmm6,XMMWORD[((240-128))+rax] + vpaddd xmm5,xmm5,XMMWORD[((112-128))+rax] + + vpsrld xmm7,xmm6,3 + vpsrld xmm1,xmm6,7 + vpslld xmm2,xmm6,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm6,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm6,14 + vmovdqu xmm0,XMMWORD[((192-128))+rax] + vpsrld xmm3,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm5,xmm5,xmm7 + vpxor xmm7,xmm3,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm5,xmm5,xmm7 + vpsrld xmm7,xmm14,6 + vpslld xmm2,xmm14,26 + vmovdqu XMMWORD[(224-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm9 + + vpsrld xmm1,xmm14,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm14,21 + vpaddd xmm5,xmm5,XMMWORD[64+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm14,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm14,7 + vpandn xmm0,xmm14,xmm8 + vpand xmm3,xmm14,xmm15 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm9,xmm10,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm10,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm11,xmm10 + + vpxor xmm9,xmm9,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm10,13 + + vpslld xmm2,xmm10,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm9,xmm1 + + vpsrld xmm1,xmm10,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm10,10 + vpxor xmm9,xmm11,xmm4 + vpaddd xmm13,xmm13,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm9,xmm9,xmm5 + vpaddd xmm9,xmm9,xmm7 + vmovdqu xmm5,XMMWORD[((0-128))+rax] + vpaddd xmm6,xmm6,XMMWORD[((128-128))+rax] + + vpsrld xmm7,xmm5,3 + vpsrld xmm1,xmm5,7 + vpslld xmm2,xmm5,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm5,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm5,14 + vmovdqu xmm0,XMMWORD[((208-128))+rax] + vpsrld xmm4,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm6,xmm6,xmm7 + vpxor xmm7,xmm4,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm6,xmm6,xmm7 + vpsrld xmm7,xmm13,6 + vpslld xmm2,xmm13,26 + vmovdqu XMMWORD[(240-128)+rax],xmm6 + vpaddd xmm6,xmm6,xmm8 + + vpsrld xmm1,xmm13,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm13,21 + vpaddd xmm6,xmm6,XMMWORD[96+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm13,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm13,7 + vpandn xmm0,xmm13,xmm15 + vpand xmm4,xmm13,xmm14 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm8,xmm9,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm9,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm10,xmm9 + + vpxor xmm8,xmm8,xmm1 + vpaddd xmm6,xmm6,xmm7 + + vpsrld xmm1,xmm9,13 + + vpslld xmm2,xmm9,19 + vpaddd xmm6,xmm6,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm8,xmm1 + + vpsrld xmm1,xmm9,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm9,10 + vpxor xmm8,xmm10,xmm3 + vpaddd xmm12,xmm12,xmm6 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm8,xmm8,xmm6 + vpaddd xmm8,xmm8,xmm7 + add rbp,256 + dec ecx + jnz NEAR $L$oop_16_xx_avx + + mov ecx,1 + lea rbp,[((K256+128))] + cmp ecx,DWORD[rbx] + cmovge r8,rbp + cmp ecx,DWORD[4+rbx] + cmovge r9,rbp + cmp ecx,DWORD[8+rbx] + cmovge r10,rbp + cmp ecx,DWORD[12+rbx] + cmovge r11,rbp + vmovdqa xmm7,XMMWORD[rbx] + vpxor xmm0,xmm0,xmm0 + vmovdqa xmm6,xmm7 + vpcmpgtd xmm6,xmm6,xmm0 + vpaddd xmm7,xmm7,xmm6 + + vmovdqu xmm0,XMMWORD[((0-128))+rdi] + vpand xmm8,xmm8,xmm6 + vmovdqu xmm1,XMMWORD[((32-128))+rdi] + vpand xmm9,xmm9,xmm6 + vmovdqu xmm2,XMMWORD[((64-128))+rdi] + vpand xmm10,xmm10,xmm6 + vmovdqu xmm5,XMMWORD[((96-128))+rdi] + vpand xmm11,xmm11,xmm6 + vpaddd xmm8,xmm8,xmm0 + vmovdqu xmm0,XMMWORD[((128-128))+rdi] + vpand xmm12,xmm12,xmm6 + vpaddd xmm9,xmm9,xmm1 + vmovdqu xmm1,XMMWORD[((160-128))+rdi] + vpand xmm13,xmm13,xmm6 + vpaddd xmm10,xmm10,xmm2 + vmovdqu xmm2,XMMWORD[((192-128))+rdi] + vpand xmm14,xmm14,xmm6 + vpaddd xmm11,xmm11,xmm5 + vmovdqu xmm5,XMMWORD[((224-128))+rdi] + vpand xmm15,xmm15,xmm6 + vpaddd xmm12,xmm12,xmm0 + vpaddd xmm13,xmm13,xmm1 + vmovdqu XMMWORD[(0-128)+rdi],xmm8 + vpaddd xmm14,xmm14,xmm2 + vmovdqu XMMWORD[(32-128)+rdi],xmm9 + vpaddd xmm15,xmm15,xmm5 + vmovdqu XMMWORD[(64-128)+rdi],xmm10 + vmovdqu XMMWORD[(96-128)+rdi],xmm11 + vmovdqu XMMWORD[(128-128)+rdi],xmm12 + vmovdqu XMMWORD[(160-128)+rdi],xmm13 + vmovdqu XMMWORD[(192-128)+rdi],xmm14 + vmovdqu XMMWORD[(224-128)+rdi],xmm15 + + vmovdqu XMMWORD[rbx],xmm7 + vmovdqu xmm6,XMMWORD[$L$pbswap] + dec edx + jnz NEAR $L$oop_avx + + mov edx,DWORD[280+rsp] + lea rdi,[16+rdi] + lea rsi,[64+rsi] + dec edx + jnz NEAR $L$oop_grande_avx + +$L$done_avx: + mov rax,QWORD[272+rsp] + + vzeroupper + movaps xmm6,XMMWORD[((-184))+rax] + movaps xmm7,XMMWORD[((-168))+rax] + movaps xmm8,XMMWORD[((-152))+rax] + movaps xmm9,XMMWORD[((-136))+rax] + movaps xmm10,XMMWORD[((-120))+rax] + movaps xmm11,XMMWORD[((-104))+rax] + movaps xmm12,XMMWORD[((-88))+rax] + movaps xmm13,XMMWORD[((-72))+rax] + movaps xmm14,XMMWORD[((-56))+rax] + movaps xmm15,XMMWORD[((-40))+rax] + mov rbp,QWORD[((-16))+rax] + + mov rbx,QWORD[((-8))+rax] + + lea rsp,[rax] + +$L$epilogue_avx: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_sha256_multi_block_avx: + +ALIGN 32 +sha256_multi_block_avx2: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_multi_block_avx2: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +_avx2_shortcut: + mov rax,rsp + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + lea rsp,[((-168))+rsp] + movaps XMMWORD[rsp],xmm6 + movaps XMMWORD[16+rsp],xmm7 + movaps XMMWORD[32+rsp],xmm8 + movaps XMMWORD[48+rsp],xmm9 + movaps XMMWORD[64+rsp],xmm10 + movaps XMMWORD[80+rsp],xmm11 + movaps XMMWORD[(-120)+rax],xmm12 + movaps XMMWORD[(-104)+rax],xmm13 + movaps XMMWORD[(-88)+rax],xmm14 + movaps XMMWORD[(-72)+rax],xmm15 + sub rsp,576 + and rsp,-256 + mov QWORD[544+rsp],rax + +$L$body_avx2: + lea rbp,[((K256+128))] + lea rdi,[128+rdi] + +$L$oop_grande_avx2: + mov DWORD[552+rsp],edx + xor edx,edx + lea rbx,[512+rsp] + mov r12,QWORD[rsi] + mov ecx,DWORD[8+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[rbx],ecx + cmovle r12,rbp + mov r13,QWORD[16+rsi] + mov ecx,DWORD[24+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[4+rbx],ecx + cmovle r13,rbp + mov r14,QWORD[32+rsi] + mov ecx,DWORD[40+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[8+rbx],ecx + cmovle r14,rbp + mov r15,QWORD[48+rsi] + mov ecx,DWORD[56+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[12+rbx],ecx + cmovle r15,rbp + mov r8,QWORD[64+rsi] + mov ecx,DWORD[72+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[16+rbx],ecx + cmovle r8,rbp + mov r9,QWORD[80+rsi] + mov ecx,DWORD[88+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[20+rbx],ecx + cmovle r9,rbp + mov r10,QWORD[96+rsi] + mov ecx,DWORD[104+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[24+rbx],ecx + cmovle r10,rbp + mov r11,QWORD[112+rsi] + mov ecx,DWORD[120+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD[28+rbx],ecx + cmovle r11,rbp + vmovdqu ymm8,YMMWORD[((0-128))+rdi] + lea rax,[128+rsp] + vmovdqu ymm9,YMMWORD[((32-128))+rdi] + lea rbx,[((256+128))+rsp] + vmovdqu ymm10,YMMWORD[((64-128))+rdi] + vmovdqu ymm11,YMMWORD[((96-128))+rdi] + vmovdqu ymm12,YMMWORD[((128-128))+rdi] + vmovdqu ymm13,YMMWORD[((160-128))+rdi] + vmovdqu ymm14,YMMWORD[((192-128))+rdi] + vmovdqu ymm15,YMMWORD[((224-128))+rdi] + vmovdqu ymm6,YMMWORD[$L$pbswap] + jmp NEAR $L$oop_avx2 + +ALIGN 32 +$L$oop_avx2: + vpxor ymm4,ymm10,ymm9 + vmovd xmm5,DWORD[r12] + vmovd xmm0,DWORD[r8] + vmovd xmm1,DWORD[r13] + vmovd xmm2,DWORD[r9] + vpinsrd xmm5,xmm5,DWORD[r14],1 + vpinsrd xmm0,xmm0,DWORD[r10],1 + vpinsrd xmm1,xmm1,DWORD[r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD[r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm12,6 + vpslld ymm2,ymm12,26 + vmovdqu YMMWORD[(0-128)+rax],ymm5 + vpaddd ymm5,ymm5,ymm15 + + vpsrld ymm1,ymm12,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm12,21 + vpaddd ymm5,ymm5,YMMWORD[((-128))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm12,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm12,7 + vpandn ymm0,ymm12,ymm14 + vpand ymm3,ymm12,ymm13 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm15,ymm8,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm8,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm9,ymm8 + + vpxor ymm15,ymm15,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm8,13 + + vpslld ymm2,ymm8,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm15,ymm1 + + vpsrld ymm1,ymm8,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm8,10 + vpxor ymm15,ymm9,ymm4 + vpaddd ymm11,ymm11,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm15,ymm15,ymm5 + vpaddd ymm15,ymm15,ymm7 + vmovd xmm5,DWORD[4+r12] + vmovd xmm0,DWORD[4+r8] + vmovd xmm1,DWORD[4+r13] + vmovd xmm2,DWORD[4+r9] + vpinsrd xmm5,xmm5,DWORD[4+r14],1 + vpinsrd xmm0,xmm0,DWORD[4+r10],1 + vpinsrd xmm1,xmm1,DWORD[4+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD[4+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm11,6 + vpslld ymm2,ymm11,26 + vmovdqu YMMWORD[(32-128)+rax],ymm5 + vpaddd ymm5,ymm5,ymm14 + + vpsrld ymm1,ymm11,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm11,21 + vpaddd ymm5,ymm5,YMMWORD[((-96))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm11,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm11,7 + vpandn ymm0,ymm11,ymm13 + vpand ymm4,ymm11,ymm12 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm14,ymm15,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm15,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm8,ymm15 + + vpxor ymm14,ymm14,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm15,13 + + vpslld ymm2,ymm15,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm14,ymm1 + + vpsrld ymm1,ymm15,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm15,10 + vpxor ymm14,ymm8,ymm3 + vpaddd ymm10,ymm10,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm14,ymm14,ymm5 + vpaddd ymm14,ymm14,ymm7 + vmovd xmm5,DWORD[8+r12] + vmovd xmm0,DWORD[8+r8] + vmovd xmm1,DWORD[8+r13] + vmovd xmm2,DWORD[8+r9] + vpinsrd xmm5,xmm5,DWORD[8+r14],1 + vpinsrd xmm0,xmm0,DWORD[8+r10],1 + vpinsrd xmm1,xmm1,DWORD[8+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD[8+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm10,6 + vpslld ymm2,ymm10,26 + vmovdqu YMMWORD[(64-128)+rax],ymm5 + vpaddd ymm5,ymm5,ymm13 + + vpsrld ymm1,ymm10,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm10,21 + vpaddd ymm5,ymm5,YMMWORD[((-64))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm10,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm10,7 + vpandn ymm0,ymm10,ymm12 + vpand ymm3,ymm10,ymm11 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm13,ymm14,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm14,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm15,ymm14 + + vpxor ymm13,ymm13,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm14,13 + + vpslld ymm2,ymm14,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm13,ymm1 + + vpsrld ymm1,ymm14,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm14,10 + vpxor ymm13,ymm15,ymm4 + vpaddd ymm9,ymm9,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm13,ymm13,ymm5 + vpaddd ymm13,ymm13,ymm7 + vmovd xmm5,DWORD[12+r12] + vmovd xmm0,DWORD[12+r8] + vmovd xmm1,DWORD[12+r13] + vmovd xmm2,DWORD[12+r9] + vpinsrd xmm5,xmm5,DWORD[12+r14],1 + vpinsrd xmm0,xmm0,DWORD[12+r10],1 + vpinsrd xmm1,xmm1,DWORD[12+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD[12+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm9,6 + vpslld ymm2,ymm9,26 + vmovdqu YMMWORD[(96-128)+rax],ymm5 + vpaddd ymm5,ymm5,ymm12 + + vpsrld ymm1,ymm9,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm9,21 + vpaddd ymm5,ymm5,YMMWORD[((-32))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm9,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm9,7 + vpandn ymm0,ymm9,ymm11 + vpand ymm4,ymm9,ymm10 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm12,ymm13,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm13,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm14,ymm13 + + vpxor ymm12,ymm12,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm13,13 + + vpslld ymm2,ymm13,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm12,ymm1 + + vpsrld ymm1,ymm13,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm13,10 + vpxor ymm12,ymm14,ymm3 + vpaddd ymm8,ymm8,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm12,ymm12,ymm5 + vpaddd ymm12,ymm12,ymm7 + vmovd xmm5,DWORD[16+r12] + vmovd xmm0,DWORD[16+r8] + vmovd xmm1,DWORD[16+r13] + vmovd xmm2,DWORD[16+r9] + vpinsrd xmm5,xmm5,DWORD[16+r14],1 + vpinsrd xmm0,xmm0,DWORD[16+r10],1 + vpinsrd xmm1,xmm1,DWORD[16+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD[16+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm8,6 + vpslld ymm2,ymm8,26 + vmovdqu YMMWORD[(128-128)+rax],ymm5 + vpaddd ymm5,ymm5,ymm11 + + vpsrld ymm1,ymm8,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm8,21 + vpaddd ymm5,ymm5,YMMWORD[rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm8,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm8,7 + vpandn ymm0,ymm8,ymm10 + vpand ymm3,ymm8,ymm9 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm11,ymm12,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm12,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm13,ymm12 + + vpxor ymm11,ymm11,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm12,13 + + vpslld ymm2,ymm12,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm11,ymm1 + + vpsrld ymm1,ymm12,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm12,10 + vpxor ymm11,ymm13,ymm4 + vpaddd ymm15,ymm15,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm11,ymm11,ymm5 + vpaddd ymm11,ymm11,ymm7 + vmovd xmm5,DWORD[20+r12] + vmovd xmm0,DWORD[20+r8] + vmovd xmm1,DWORD[20+r13] + vmovd xmm2,DWORD[20+r9] + vpinsrd xmm5,xmm5,DWORD[20+r14],1 + vpinsrd xmm0,xmm0,DWORD[20+r10],1 + vpinsrd xmm1,xmm1,DWORD[20+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD[20+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm15,6 + vpslld ymm2,ymm15,26 + vmovdqu YMMWORD[(160-128)+rax],ymm5 + vpaddd ymm5,ymm5,ymm10 + + vpsrld ymm1,ymm15,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm15,21 + vpaddd ymm5,ymm5,YMMWORD[32+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm15,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm15,7 + vpandn ymm0,ymm15,ymm9 + vpand ymm4,ymm15,ymm8 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm10,ymm11,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm11,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm12,ymm11 + + vpxor ymm10,ymm10,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm11,13 + + vpslld ymm2,ymm11,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm10,ymm1 + + vpsrld ymm1,ymm11,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm11,10 + vpxor ymm10,ymm12,ymm3 + vpaddd ymm14,ymm14,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm10,ymm10,ymm5 + vpaddd ymm10,ymm10,ymm7 + vmovd xmm5,DWORD[24+r12] + vmovd xmm0,DWORD[24+r8] + vmovd xmm1,DWORD[24+r13] + vmovd xmm2,DWORD[24+r9] + vpinsrd xmm5,xmm5,DWORD[24+r14],1 + vpinsrd xmm0,xmm0,DWORD[24+r10],1 + vpinsrd xmm1,xmm1,DWORD[24+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD[24+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm14,6 + vpslld ymm2,ymm14,26 + vmovdqu YMMWORD[(192-128)+rax],ymm5 + vpaddd ymm5,ymm5,ymm9 + + vpsrld ymm1,ymm14,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm14,21 + vpaddd ymm5,ymm5,YMMWORD[64+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm14,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm14,7 + vpandn ymm0,ymm14,ymm8 + vpand ymm3,ymm14,ymm15 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm9,ymm10,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm10,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm11,ymm10 + + vpxor ymm9,ymm9,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm10,13 + + vpslld ymm2,ymm10,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm9,ymm1 + + vpsrld ymm1,ymm10,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm10,10 + vpxor ymm9,ymm11,ymm4 + vpaddd ymm13,ymm13,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm9,ymm9,ymm5 + vpaddd ymm9,ymm9,ymm7 + vmovd xmm5,DWORD[28+r12] + vmovd xmm0,DWORD[28+r8] + vmovd xmm1,DWORD[28+r13] + vmovd xmm2,DWORD[28+r9] + vpinsrd xmm5,xmm5,DWORD[28+r14],1 + vpinsrd xmm0,xmm0,DWORD[28+r10],1 + vpinsrd xmm1,xmm1,DWORD[28+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD[28+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm13,6 + vpslld ymm2,ymm13,26 + vmovdqu YMMWORD[(224-128)+rax],ymm5 + vpaddd ymm5,ymm5,ymm8 + + vpsrld ymm1,ymm13,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm13,21 + vpaddd ymm5,ymm5,YMMWORD[96+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm13,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm13,7 + vpandn ymm0,ymm13,ymm15 + vpand ymm4,ymm13,ymm14 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm8,ymm9,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm9,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm10,ymm9 + + vpxor ymm8,ymm8,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm9,13 + + vpslld ymm2,ymm9,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm8,ymm1 + + vpsrld ymm1,ymm9,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm9,10 + vpxor ymm8,ymm10,ymm3 + vpaddd ymm12,ymm12,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm8,ymm8,ymm5 + vpaddd ymm8,ymm8,ymm7 + add rbp,256 + vmovd xmm5,DWORD[32+r12] + vmovd xmm0,DWORD[32+r8] + vmovd xmm1,DWORD[32+r13] + vmovd xmm2,DWORD[32+r9] + vpinsrd xmm5,xmm5,DWORD[32+r14],1 + vpinsrd xmm0,xmm0,DWORD[32+r10],1 + vpinsrd xmm1,xmm1,DWORD[32+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD[32+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm12,6 + vpslld ymm2,ymm12,26 + vmovdqu YMMWORD[(256-256-128)+rbx],ymm5 + vpaddd ymm5,ymm5,ymm15 + + vpsrld ymm1,ymm12,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm12,21 + vpaddd ymm5,ymm5,YMMWORD[((-128))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm12,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm12,7 + vpandn ymm0,ymm12,ymm14 + vpand ymm3,ymm12,ymm13 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm15,ymm8,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm8,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm9,ymm8 + + vpxor ymm15,ymm15,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm8,13 + + vpslld ymm2,ymm8,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm15,ymm1 + + vpsrld ymm1,ymm8,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm8,10 + vpxor ymm15,ymm9,ymm4 + vpaddd ymm11,ymm11,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm15,ymm15,ymm5 + vpaddd ymm15,ymm15,ymm7 + vmovd xmm5,DWORD[36+r12] + vmovd xmm0,DWORD[36+r8] + vmovd xmm1,DWORD[36+r13] + vmovd xmm2,DWORD[36+r9] + vpinsrd xmm5,xmm5,DWORD[36+r14],1 + vpinsrd xmm0,xmm0,DWORD[36+r10],1 + vpinsrd xmm1,xmm1,DWORD[36+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD[36+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm11,6 + vpslld ymm2,ymm11,26 + vmovdqu YMMWORD[(288-256-128)+rbx],ymm5 + vpaddd ymm5,ymm5,ymm14 + + vpsrld ymm1,ymm11,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm11,21 + vpaddd ymm5,ymm5,YMMWORD[((-96))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm11,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm11,7 + vpandn ymm0,ymm11,ymm13 + vpand ymm4,ymm11,ymm12 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm14,ymm15,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm15,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm8,ymm15 + + vpxor ymm14,ymm14,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm15,13 + + vpslld ymm2,ymm15,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm14,ymm1 + + vpsrld ymm1,ymm15,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm15,10 + vpxor ymm14,ymm8,ymm3 + vpaddd ymm10,ymm10,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm14,ymm14,ymm5 + vpaddd ymm14,ymm14,ymm7 + vmovd xmm5,DWORD[40+r12] + vmovd xmm0,DWORD[40+r8] + vmovd xmm1,DWORD[40+r13] + vmovd xmm2,DWORD[40+r9] + vpinsrd xmm5,xmm5,DWORD[40+r14],1 + vpinsrd xmm0,xmm0,DWORD[40+r10],1 + vpinsrd xmm1,xmm1,DWORD[40+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD[40+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm10,6 + vpslld ymm2,ymm10,26 + vmovdqu YMMWORD[(320-256-128)+rbx],ymm5 + vpaddd ymm5,ymm5,ymm13 + + vpsrld ymm1,ymm10,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm10,21 + vpaddd ymm5,ymm5,YMMWORD[((-64))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm10,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm10,7 + vpandn ymm0,ymm10,ymm12 + vpand ymm3,ymm10,ymm11 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm13,ymm14,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm14,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm15,ymm14 + + vpxor ymm13,ymm13,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm14,13 + + vpslld ymm2,ymm14,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm13,ymm1 + + vpsrld ymm1,ymm14,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm14,10 + vpxor ymm13,ymm15,ymm4 + vpaddd ymm9,ymm9,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm13,ymm13,ymm5 + vpaddd ymm13,ymm13,ymm7 + vmovd xmm5,DWORD[44+r12] + vmovd xmm0,DWORD[44+r8] + vmovd xmm1,DWORD[44+r13] + vmovd xmm2,DWORD[44+r9] + vpinsrd xmm5,xmm5,DWORD[44+r14],1 + vpinsrd xmm0,xmm0,DWORD[44+r10],1 + vpinsrd xmm1,xmm1,DWORD[44+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD[44+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm9,6 + vpslld ymm2,ymm9,26 + vmovdqu YMMWORD[(352-256-128)+rbx],ymm5 + vpaddd ymm5,ymm5,ymm12 + + vpsrld ymm1,ymm9,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm9,21 + vpaddd ymm5,ymm5,YMMWORD[((-32))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm9,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm9,7 + vpandn ymm0,ymm9,ymm11 + vpand ymm4,ymm9,ymm10 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm12,ymm13,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm13,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm14,ymm13 + + vpxor ymm12,ymm12,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm13,13 + + vpslld ymm2,ymm13,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm12,ymm1 + + vpsrld ymm1,ymm13,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm13,10 + vpxor ymm12,ymm14,ymm3 + vpaddd ymm8,ymm8,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm12,ymm12,ymm5 + vpaddd ymm12,ymm12,ymm7 + vmovd xmm5,DWORD[48+r12] + vmovd xmm0,DWORD[48+r8] + vmovd xmm1,DWORD[48+r13] + vmovd xmm2,DWORD[48+r9] + vpinsrd xmm5,xmm5,DWORD[48+r14],1 + vpinsrd xmm0,xmm0,DWORD[48+r10],1 + vpinsrd xmm1,xmm1,DWORD[48+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD[48+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm8,6 + vpslld ymm2,ymm8,26 + vmovdqu YMMWORD[(384-256-128)+rbx],ymm5 + vpaddd ymm5,ymm5,ymm11 + + vpsrld ymm1,ymm8,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm8,21 + vpaddd ymm5,ymm5,YMMWORD[rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm8,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm8,7 + vpandn ymm0,ymm8,ymm10 + vpand ymm3,ymm8,ymm9 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm11,ymm12,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm12,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm13,ymm12 + + vpxor ymm11,ymm11,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm12,13 + + vpslld ymm2,ymm12,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm11,ymm1 + + vpsrld ymm1,ymm12,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm12,10 + vpxor ymm11,ymm13,ymm4 + vpaddd ymm15,ymm15,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm11,ymm11,ymm5 + vpaddd ymm11,ymm11,ymm7 + vmovd xmm5,DWORD[52+r12] + vmovd xmm0,DWORD[52+r8] + vmovd xmm1,DWORD[52+r13] + vmovd xmm2,DWORD[52+r9] + vpinsrd xmm5,xmm5,DWORD[52+r14],1 + vpinsrd xmm0,xmm0,DWORD[52+r10],1 + vpinsrd xmm1,xmm1,DWORD[52+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD[52+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm15,6 + vpslld ymm2,ymm15,26 + vmovdqu YMMWORD[(416-256-128)+rbx],ymm5 + vpaddd ymm5,ymm5,ymm10 + + vpsrld ymm1,ymm15,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm15,21 + vpaddd ymm5,ymm5,YMMWORD[32+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm15,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm15,7 + vpandn ymm0,ymm15,ymm9 + vpand ymm4,ymm15,ymm8 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm10,ymm11,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm11,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm12,ymm11 + + vpxor ymm10,ymm10,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm11,13 + + vpslld ymm2,ymm11,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm10,ymm1 + + vpsrld ymm1,ymm11,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm11,10 + vpxor ymm10,ymm12,ymm3 + vpaddd ymm14,ymm14,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm10,ymm10,ymm5 + vpaddd ymm10,ymm10,ymm7 + vmovd xmm5,DWORD[56+r12] + vmovd xmm0,DWORD[56+r8] + vmovd xmm1,DWORD[56+r13] + vmovd xmm2,DWORD[56+r9] + vpinsrd xmm5,xmm5,DWORD[56+r14],1 + vpinsrd xmm0,xmm0,DWORD[56+r10],1 + vpinsrd xmm1,xmm1,DWORD[56+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD[56+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm14,6 + vpslld ymm2,ymm14,26 + vmovdqu YMMWORD[(448-256-128)+rbx],ymm5 + vpaddd ymm5,ymm5,ymm9 + + vpsrld ymm1,ymm14,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm14,21 + vpaddd ymm5,ymm5,YMMWORD[64+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm14,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm14,7 + vpandn ymm0,ymm14,ymm8 + vpand ymm3,ymm14,ymm15 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm9,ymm10,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm10,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm11,ymm10 + + vpxor ymm9,ymm9,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm10,13 + + vpslld ymm2,ymm10,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm9,ymm1 + + vpsrld ymm1,ymm10,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm10,10 + vpxor ymm9,ymm11,ymm4 + vpaddd ymm13,ymm13,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm9,ymm9,ymm5 + vpaddd ymm9,ymm9,ymm7 + vmovd xmm5,DWORD[60+r12] + lea r12,[64+r12] + vmovd xmm0,DWORD[60+r8] + lea r8,[64+r8] + vmovd xmm1,DWORD[60+r13] + lea r13,[64+r13] + vmovd xmm2,DWORD[60+r9] + lea r9,[64+r9] + vpinsrd xmm5,xmm5,DWORD[60+r14],1 + lea r14,[64+r14] + vpinsrd xmm0,xmm0,DWORD[60+r10],1 + lea r10,[64+r10] + vpinsrd xmm1,xmm1,DWORD[60+r15],1 + lea r15,[64+r15] + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD[60+r11],1 + lea r11,[64+r11] + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm13,6 + vpslld ymm2,ymm13,26 + vmovdqu YMMWORD[(480-256-128)+rbx],ymm5 + vpaddd ymm5,ymm5,ymm8 + + vpsrld ymm1,ymm13,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm13,21 + vpaddd ymm5,ymm5,YMMWORD[96+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm13,25 + vpxor ymm7,ymm7,ymm2 + prefetcht0 [63+r12] + vpslld ymm2,ymm13,7 + vpandn ymm0,ymm13,ymm15 + vpand ymm4,ymm13,ymm14 + prefetcht0 [63+r13] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm8,ymm9,2 + vpxor ymm7,ymm7,ymm2 + prefetcht0 [63+r14] + vpslld ymm1,ymm9,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm10,ymm9 + prefetcht0 [63+r15] + vpxor ymm8,ymm8,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm9,13 + prefetcht0 [63+r8] + vpslld ymm2,ymm9,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm3,ymm3,ymm4 + prefetcht0 [63+r9] + vpxor ymm7,ymm8,ymm1 + + vpsrld ymm1,ymm9,22 + vpxor ymm7,ymm7,ymm2 + prefetcht0 [63+r10] + vpslld ymm2,ymm9,10 + vpxor ymm8,ymm10,ymm3 + vpaddd ymm12,ymm12,ymm5 + prefetcht0 [63+r11] + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm8,ymm8,ymm5 + vpaddd ymm8,ymm8,ymm7 + add rbp,256 + vmovdqu ymm5,YMMWORD[((0-128))+rax] + mov ecx,3 + jmp NEAR $L$oop_16_xx_avx2 +ALIGN 32 +$L$oop_16_xx_avx2: + vmovdqu ymm6,YMMWORD[((32-128))+rax] + vpaddd ymm5,ymm5,YMMWORD[((288-256-128))+rbx] + + vpsrld ymm7,ymm6,3 + vpsrld ymm1,ymm6,7 + vpslld ymm2,ymm6,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm6,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm6,14 + vmovdqu ymm0,YMMWORD[((448-256-128))+rbx] + vpsrld ymm3,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm5,ymm5,ymm7 + vpxor ymm7,ymm3,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm5,ymm5,ymm7 + vpsrld ymm7,ymm12,6 + vpslld ymm2,ymm12,26 + vmovdqu YMMWORD[(0-128)+rax],ymm5 + vpaddd ymm5,ymm5,ymm15 + + vpsrld ymm1,ymm12,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm12,21 + vpaddd ymm5,ymm5,YMMWORD[((-128))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm12,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm12,7 + vpandn ymm0,ymm12,ymm14 + vpand ymm3,ymm12,ymm13 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm15,ymm8,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm8,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm9,ymm8 + + vpxor ymm15,ymm15,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm8,13 + + vpslld ymm2,ymm8,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm15,ymm1 + + vpsrld ymm1,ymm8,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm8,10 + vpxor ymm15,ymm9,ymm4 + vpaddd ymm11,ymm11,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm15,ymm15,ymm5 + vpaddd ymm15,ymm15,ymm7 + vmovdqu ymm5,YMMWORD[((64-128))+rax] + vpaddd ymm6,ymm6,YMMWORD[((320-256-128))+rbx] + + vpsrld ymm7,ymm5,3 + vpsrld ymm1,ymm5,7 + vpslld ymm2,ymm5,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm5,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm5,14 + vmovdqu ymm0,YMMWORD[((480-256-128))+rbx] + vpsrld ymm4,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm6,ymm6,ymm7 + vpxor ymm7,ymm4,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm6,ymm6,ymm7 + vpsrld ymm7,ymm11,6 + vpslld ymm2,ymm11,26 + vmovdqu YMMWORD[(32-128)+rax],ymm6 + vpaddd ymm6,ymm6,ymm14 + + vpsrld ymm1,ymm11,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm11,21 + vpaddd ymm6,ymm6,YMMWORD[((-96))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm11,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm11,7 + vpandn ymm0,ymm11,ymm13 + vpand ymm4,ymm11,ymm12 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm14,ymm15,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm15,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm8,ymm15 + + vpxor ymm14,ymm14,ymm1 + vpaddd ymm6,ymm6,ymm7 + + vpsrld ymm1,ymm15,13 + + vpslld ymm2,ymm15,19 + vpaddd ymm6,ymm6,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm14,ymm1 + + vpsrld ymm1,ymm15,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm15,10 + vpxor ymm14,ymm8,ymm3 + vpaddd ymm10,ymm10,ymm6 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm14,ymm14,ymm6 + vpaddd ymm14,ymm14,ymm7 + vmovdqu ymm6,YMMWORD[((96-128))+rax] + vpaddd ymm5,ymm5,YMMWORD[((352-256-128))+rbx] + + vpsrld ymm7,ymm6,3 + vpsrld ymm1,ymm6,7 + vpslld ymm2,ymm6,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm6,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm6,14 + vmovdqu ymm0,YMMWORD[((0-128))+rax] + vpsrld ymm3,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm5,ymm5,ymm7 + vpxor ymm7,ymm3,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm5,ymm5,ymm7 + vpsrld ymm7,ymm10,6 + vpslld ymm2,ymm10,26 + vmovdqu YMMWORD[(64-128)+rax],ymm5 + vpaddd ymm5,ymm5,ymm13 + + vpsrld ymm1,ymm10,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm10,21 + vpaddd ymm5,ymm5,YMMWORD[((-64))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm10,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm10,7 + vpandn ymm0,ymm10,ymm12 + vpand ymm3,ymm10,ymm11 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm13,ymm14,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm14,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm15,ymm14 + + vpxor ymm13,ymm13,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm14,13 + + vpslld ymm2,ymm14,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm13,ymm1 + + vpsrld ymm1,ymm14,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm14,10 + vpxor ymm13,ymm15,ymm4 + vpaddd ymm9,ymm9,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm13,ymm13,ymm5 + vpaddd ymm13,ymm13,ymm7 + vmovdqu ymm5,YMMWORD[((128-128))+rax] + vpaddd ymm6,ymm6,YMMWORD[((384-256-128))+rbx] + + vpsrld ymm7,ymm5,3 + vpsrld ymm1,ymm5,7 + vpslld ymm2,ymm5,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm5,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm5,14 + vmovdqu ymm0,YMMWORD[((32-128))+rax] + vpsrld ymm4,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm6,ymm6,ymm7 + vpxor ymm7,ymm4,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm6,ymm6,ymm7 + vpsrld ymm7,ymm9,6 + vpslld ymm2,ymm9,26 + vmovdqu YMMWORD[(96-128)+rax],ymm6 + vpaddd ymm6,ymm6,ymm12 + + vpsrld ymm1,ymm9,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm9,21 + vpaddd ymm6,ymm6,YMMWORD[((-32))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm9,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm9,7 + vpandn ymm0,ymm9,ymm11 + vpand ymm4,ymm9,ymm10 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm12,ymm13,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm13,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm14,ymm13 + + vpxor ymm12,ymm12,ymm1 + vpaddd ymm6,ymm6,ymm7 + + vpsrld ymm1,ymm13,13 + + vpslld ymm2,ymm13,19 + vpaddd ymm6,ymm6,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm12,ymm1 + + vpsrld ymm1,ymm13,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm13,10 + vpxor ymm12,ymm14,ymm3 + vpaddd ymm8,ymm8,ymm6 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm12,ymm12,ymm6 + vpaddd ymm12,ymm12,ymm7 + vmovdqu ymm6,YMMWORD[((160-128))+rax] + vpaddd ymm5,ymm5,YMMWORD[((416-256-128))+rbx] + + vpsrld ymm7,ymm6,3 + vpsrld ymm1,ymm6,7 + vpslld ymm2,ymm6,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm6,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm6,14 + vmovdqu ymm0,YMMWORD[((64-128))+rax] + vpsrld ymm3,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm5,ymm5,ymm7 + vpxor ymm7,ymm3,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm5,ymm5,ymm7 + vpsrld ymm7,ymm8,6 + vpslld ymm2,ymm8,26 + vmovdqu YMMWORD[(128-128)+rax],ymm5 + vpaddd ymm5,ymm5,ymm11 + + vpsrld ymm1,ymm8,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm8,21 + vpaddd ymm5,ymm5,YMMWORD[rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm8,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm8,7 + vpandn ymm0,ymm8,ymm10 + vpand ymm3,ymm8,ymm9 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm11,ymm12,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm12,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm13,ymm12 + + vpxor ymm11,ymm11,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm12,13 + + vpslld ymm2,ymm12,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm11,ymm1 + + vpsrld ymm1,ymm12,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm12,10 + vpxor ymm11,ymm13,ymm4 + vpaddd ymm15,ymm15,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm11,ymm11,ymm5 + vpaddd ymm11,ymm11,ymm7 + vmovdqu ymm5,YMMWORD[((192-128))+rax] + vpaddd ymm6,ymm6,YMMWORD[((448-256-128))+rbx] + + vpsrld ymm7,ymm5,3 + vpsrld ymm1,ymm5,7 + vpslld ymm2,ymm5,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm5,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm5,14 + vmovdqu ymm0,YMMWORD[((96-128))+rax] + vpsrld ymm4,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm6,ymm6,ymm7 + vpxor ymm7,ymm4,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm6,ymm6,ymm7 + vpsrld ymm7,ymm15,6 + vpslld ymm2,ymm15,26 + vmovdqu YMMWORD[(160-128)+rax],ymm6 + vpaddd ymm6,ymm6,ymm10 + + vpsrld ymm1,ymm15,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm15,21 + vpaddd ymm6,ymm6,YMMWORD[32+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm15,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm15,7 + vpandn ymm0,ymm15,ymm9 + vpand ymm4,ymm15,ymm8 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm10,ymm11,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm11,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm12,ymm11 + + vpxor ymm10,ymm10,ymm1 + vpaddd ymm6,ymm6,ymm7 + + vpsrld ymm1,ymm11,13 + + vpslld ymm2,ymm11,19 + vpaddd ymm6,ymm6,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm10,ymm1 + + vpsrld ymm1,ymm11,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm11,10 + vpxor ymm10,ymm12,ymm3 + vpaddd ymm14,ymm14,ymm6 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm10,ymm10,ymm6 + vpaddd ymm10,ymm10,ymm7 + vmovdqu ymm6,YMMWORD[((224-128))+rax] + vpaddd ymm5,ymm5,YMMWORD[((480-256-128))+rbx] + + vpsrld ymm7,ymm6,3 + vpsrld ymm1,ymm6,7 + vpslld ymm2,ymm6,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm6,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm6,14 + vmovdqu ymm0,YMMWORD[((128-128))+rax] + vpsrld ymm3,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm5,ymm5,ymm7 + vpxor ymm7,ymm3,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm5,ymm5,ymm7 + vpsrld ymm7,ymm14,6 + vpslld ymm2,ymm14,26 + vmovdqu YMMWORD[(192-128)+rax],ymm5 + vpaddd ymm5,ymm5,ymm9 + + vpsrld ymm1,ymm14,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm14,21 + vpaddd ymm5,ymm5,YMMWORD[64+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm14,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm14,7 + vpandn ymm0,ymm14,ymm8 + vpand ymm3,ymm14,ymm15 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm9,ymm10,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm10,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm11,ymm10 + + vpxor ymm9,ymm9,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm10,13 + + vpslld ymm2,ymm10,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm9,ymm1 + + vpsrld ymm1,ymm10,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm10,10 + vpxor ymm9,ymm11,ymm4 + vpaddd ymm13,ymm13,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm9,ymm9,ymm5 + vpaddd ymm9,ymm9,ymm7 + vmovdqu ymm5,YMMWORD[((256-256-128))+rbx] + vpaddd ymm6,ymm6,YMMWORD[((0-128))+rax] + + vpsrld ymm7,ymm5,3 + vpsrld ymm1,ymm5,7 + vpslld ymm2,ymm5,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm5,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm5,14 + vmovdqu ymm0,YMMWORD[((160-128))+rax] + vpsrld ymm4,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm6,ymm6,ymm7 + vpxor ymm7,ymm4,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm6,ymm6,ymm7 + vpsrld ymm7,ymm13,6 + vpslld ymm2,ymm13,26 + vmovdqu YMMWORD[(224-128)+rax],ymm6 + vpaddd ymm6,ymm6,ymm8 + + vpsrld ymm1,ymm13,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm13,21 + vpaddd ymm6,ymm6,YMMWORD[96+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm13,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm13,7 + vpandn ymm0,ymm13,ymm15 + vpand ymm4,ymm13,ymm14 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm8,ymm9,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm9,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm10,ymm9 + + vpxor ymm8,ymm8,ymm1 + vpaddd ymm6,ymm6,ymm7 + + vpsrld ymm1,ymm9,13 + + vpslld ymm2,ymm9,19 + vpaddd ymm6,ymm6,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm8,ymm1 + + vpsrld ymm1,ymm9,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm9,10 + vpxor ymm8,ymm10,ymm3 + vpaddd ymm12,ymm12,ymm6 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm8,ymm8,ymm6 + vpaddd ymm8,ymm8,ymm7 + add rbp,256 + vmovdqu ymm6,YMMWORD[((288-256-128))+rbx] + vpaddd ymm5,ymm5,YMMWORD[((32-128))+rax] + + vpsrld ymm7,ymm6,3 + vpsrld ymm1,ymm6,7 + vpslld ymm2,ymm6,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm6,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm6,14 + vmovdqu ymm0,YMMWORD[((192-128))+rax] + vpsrld ymm3,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm5,ymm5,ymm7 + vpxor ymm7,ymm3,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm5,ymm5,ymm7 + vpsrld ymm7,ymm12,6 + vpslld ymm2,ymm12,26 + vmovdqu YMMWORD[(256-256-128)+rbx],ymm5 + vpaddd ymm5,ymm5,ymm15 + + vpsrld ymm1,ymm12,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm12,21 + vpaddd ymm5,ymm5,YMMWORD[((-128))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm12,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm12,7 + vpandn ymm0,ymm12,ymm14 + vpand ymm3,ymm12,ymm13 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm15,ymm8,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm8,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm9,ymm8 + + vpxor ymm15,ymm15,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm8,13 + + vpslld ymm2,ymm8,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm15,ymm1 + + vpsrld ymm1,ymm8,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm8,10 + vpxor ymm15,ymm9,ymm4 + vpaddd ymm11,ymm11,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm15,ymm15,ymm5 + vpaddd ymm15,ymm15,ymm7 + vmovdqu ymm5,YMMWORD[((320-256-128))+rbx] + vpaddd ymm6,ymm6,YMMWORD[((64-128))+rax] + + vpsrld ymm7,ymm5,3 + vpsrld ymm1,ymm5,7 + vpslld ymm2,ymm5,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm5,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm5,14 + vmovdqu ymm0,YMMWORD[((224-128))+rax] + vpsrld ymm4,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm6,ymm6,ymm7 + vpxor ymm7,ymm4,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm6,ymm6,ymm7 + vpsrld ymm7,ymm11,6 + vpslld ymm2,ymm11,26 + vmovdqu YMMWORD[(288-256-128)+rbx],ymm6 + vpaddd ymm6,ymm6,ymm14 + + vpsrld ymm1,ymm11,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm11,21 + vpaddd ymm6,ymm6,YMMWORD[((-96))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm11,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm11,7 + vpandn ymm0,ymm11,ymm13 + vpand ymm4,ymm11,ymm12 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm14,ymm15,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm15,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm8,ymm15 + + vpxor ymm14,ymm14,ymm1 + vpaddd ymm6,ymm6,ymm7 + + vpsrld ymm1,ymm15,13 + + vpslld ymm2,ymm15,19 + vpaddd ymm6,ymm6,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm14,ymm1 + + vpsrld ymm1,ymm15,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm15,10 + vpxor ymm14,ymm8,ymm3 + vpaddd ymm10,ymm10,ymm6 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm14,ymm14,ymm6 + vpaddd ymm14,ymm14,ymm7 + vmovdqu ymm6,YMMWORD[((352-256-128))+rbx] + vpaddd ymm5,ymm5,YMMWORD[((96-128))+rax] + + vpsrld ymm7,ymm6,3 + vpsrld ymm1,ymm6,7 + vpslld ymm2,ymm6,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm6,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm6,14 + vmovdqu ymm0,YMMWORD[((256-256-128))+rbx] + vpsrld ymm3,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm5,ymm5,ymm7 + vpxor ymm7,ymm3,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm5,ymm5,ymm7 + vpsrld ymm7,ymm10,6 + vpslld ymm2,ymm10,26 + vmovdqu YMMWORD[(320-256-128)+rbx],ymm5 + vpaddd ymm5,ymm5,ymm13 + + vpsrld ymm1,ymm10,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm10,21 + vpaddd ymm5,ymm5,YMMWORD[((-64))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm10,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm10,7 + vpandn ymm0,ymm10,ymm12 + vpand ymm3,ymm10,ymm11 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm13,ymm14,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm14,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm15,ymm14 + + vpxor ymm13,ymm13,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm14,13 + + vpslld ymm2,ymm14,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm13,ymm1 + + vpsrld ymm1,ymm14,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm14,10 + vpxor ymm13,ymm15,ymm4 + vpaddd ymm9,ymm9,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm13,ymm13,ymm5 + vpaddd ymm13,ymm13,ymm7 + vmovdqu ymm5,YMMWORD[((384-256-128))+rbx] + vpaddd ymm6,ymm6,YMMWORD[((128-128))+rax] + + vpsrld ymm7,ymm5,3 + vpsrld ymm1,ymm5,7 + vpslld ymm2,ymm5,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm5,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm5,14 + vmovdqu ymm0,YMMWORD[((288-256-128))+rbx] + vpsrld ymm4,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm6,ymm6,ymm7 + vpxor ymm7,ymm4,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm6,ymm6,ymm7 + vpsrld ymm7,ymm9,6 + vpslld ymm2,ymm9,26 + vmovdqu YMMWORD[(352-256-128)+rbx],ymm6 + vpaddd ymm6,ymm6,ymm12 + + vpsrld ymm1,ymm9,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm9,21 + vpaddd ymm6,ymm6,YMMWORD[((-32))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm9,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm9,7 + vpandn ymm0,ymm9,ymm11 + vpand ymm4,ymm9,ymm10 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm12,ymm13,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm13,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm14,ymm13 + + vpxor ymm12,ymm12,ymm1 + vpaddd ymm6,ymm6,ymm7 + + vpsrld ymm1,ymm13,13 + + vpslld ymm2,ymm13,19 + vpaddd ymm6,ymm6,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm12,ymm1 + + vpsrld ymm1,ymm13,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm13,10 + vpxor ymm12,ymm14,ymm3 + vpaddd ymm8,ymm8,ymm6 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm12,ymm12,ymm6 + vpaddd ymm12,ymm12,ymm7 + vmovdqu ymm6,YMMWORD[((416-256-128))+rbx] + vpaddd ymm5,ymm5,YMMWORD[((160-128))+rax] + + vpsrld ymm7,ymm6,3 + vpsrld ymm1,ymm6,7 + vpslld ymm2,ymm6,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm6,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm6,14 + vmovdqu ymm0,YMMWORD[((320-256-128))+rbx] + vpsrld ymm3,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm5,ymm5,ymm7 + vpxor ymm7,ymm3,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm5,ymm5,ymm7 + vpsrld ymm7,ymm8,6 + vpslld ymm2,ymm8,26 + vmovdqu YMMWORD[(384-256-128)+rbx],ymm5 + vpaddd ymm5,ymm5,ymm11 + + vpsrld ymm1,ymm8,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm8,21 + vpaddd ymm5,ymm5,YMMWORD[rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm8,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm8,7 + vpandn ymm0,ymm8,ymm10 + vpand ymm3,ymm8,ymm9 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm11,ymm12,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm12,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm13,ymm12 + + vpxor ymm11,ymm11,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm12,13 + + vpslld ymm2,ymm12,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm11,ymm1 + + vpsrld ymm1,ymm12,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm12,10 + vpxor ymm11,ymm13,ymm4 + vpaddd ymm15,ymm15,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm11,ymm11,ymm5 + vpaddd ymm11,ymm11,ymm7 + vmovdqu ymm5,YMMWORD[((448-256-128))+rbx] + vpaddd ymm6,ymm6,YMMWORD[((192-128))+rax] + + vpsrld ymm7,ymm5,3 + vpsrld ymm1,ymm5,7 + vpslld ymm2,ymm5,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm5,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm5,14 + vmovdqu ymm0,YMMWORD[((352-256-128))+rbx] + vpsrld ymm4,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm6,ymm6,ymm7 + vpxor ymm7,ymm4,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm6,ymm6,ymm7 + vpsrld ymm7,ymm15,6 + vpslld ymm2,ymm15,26 + vmovdqu YMMWORD[(416-256-128)+rbx],ymm6 + vpaddd ymm6,ymm6,ymm10 + + vpsrld ymm1,ymm15,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm15,21 + vpaddd ymm6,ymm6,YMMWORD[32+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm15,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm15,7 + vpandn ymm0,ymm15,ymm9 + vpand ymm4,ymm15,ymm8 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm10,ymm11,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm11,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm12,ymm11 + + vpxor ymm10,ymm10,ymm1 + vpaddd ymm6,ymm6,ymm7 + + vpsrld ymm1,ymm11,13 + + vpslld ymm2,ymm11,19 + vpaddd ymm6,ymm6,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm10,ymm1 + + vpsrld ymm1,ymm11,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm11,10 + vpxor ymm10,ymm12,ymm3 + vpaddd ymm14,ymm14,ymm6 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm10,ymm10,ymm6 + vpaddd ymm10,ymm10,ymm7 + vmovdqu ymm6,YMMWORD[((480-256-128))+rbx] + vpaddd ymm5,ymm5,YMMWORD[((224-128))+rax] + + vpsrld ymm7,ymm6,3 + vpsrld ymm1,ymm6,7 + vpslld ymm2,ymm6,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm6,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm6,14 + vmovdqu ymm0,YMMWORD[((384-256-128))+rbx] + vpsrld ymm3,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm5,ymm5,ymm7 + vpxor ymm7,ymm3,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm5,ymm5,ymm7 + vpsrld ymm7,ymm14,6 + vpslld ymm2,ymm14,26 + vmovdqu YMMWORD[(448-256-128)+rbx],ymm5 + vpaddd ymm5,ymm5,ymm9 + + vpsrld ymm1,ymm14,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm14,21 + vpaddd ymm5,ymm5,YMMWORD[64+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm14,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm14,7 + vpandn ymm0,ymm14,ymm8 + vpand ymm3,ymm14,ymm15 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm9,ymm10,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm10,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm11,ymm10 + + vpxor ymm9,ymm9,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm10,13 + + vpslld ymm2,ymm10,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm9,ymm1 + + vpsrld ymm1,ymm10,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm10,10 + vpxor ymm9,ymm11,ymm4 + vpaddd ymm13,ymm13,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm9,ymm9,ymm5 + vpaddd ymm9,ymm9,ymm7 + vmovdqu ymm5,YMMWORD[((0-128))+rax] + vpaddd ymm6,ymm6,YMMWORD[((256-256-128))+rbx] + + vpsrld ymm7,ymm5,3 + vpsrld ymm1,ymm5,7 + vpslld ymm2,ymm5,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm5,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm5,14 + vmovdqu ymm0,YMMWORD[((416-256-128))+rbx] + vpsrld ymm4,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm6,ymm6,ymm7 + vpxor ymm7,ymm4,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm6,ymm6,ymm7 + vpsrld ymm7,ymm13,6 + vpslld ymm2,ymm13,26 + vmovdqu YMMWORD[(480-256-128)+rbx],ymm6 + vpaddd ymm6,ymm6,ymm8 + + vpsrld ymm1,ymm13,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm13,21 + vpaddd ymm6,ymm6,YMMWORD[96+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm13,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm13,7 + vpandn ymm0,ymm13,ymm15 + vpand ymm4,ymm13,ymm14 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm8,ymm9,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm9,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm10,ymm9 + + vpxor ymm8,ymm8,ymm1 + vpaddd ymm6,ymm6,ymm7 + + vpsrld ymm1,ymm9,13 + + vpslld ymm2,ymm9,19 + vpaddd ymm6,ymm6,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm8,ymm1 + + vpsrld ymm1,ymm9,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm9,10 + vpxor ymm8,ymm10,ymm3 + vpaddd ymm12,ymm12,ymm6 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm8,ymm8,ymm6 + vpaddd ymm8,ymm8,ymm7 + add rbp,256 + dec ecx + jnz NEAR $L$oop_16_xx_avx2 + + mov ecx,1 + lea rbx,[512+rsp] + lea rbp,[((K256+128))] + cmp ecx,DWORD[rbx] + cmovge r12,rbp + cmp ecx,DWORD[4+rbx] + cmovge r13,rbp + cmp ecx,DWORD[8+rbx] + cmovge r14,rbp + cmp ecx,DWORD[12+rbx] + cmovge r15,rbp + cmp ecx,DWORD[16+rbx] + cmovge r8,rbp + cmp ecx,DWORD[20+rbx] + cmovge r9,rbp + cmp ecx,DWORD[24+rbx] + cmovge r10,rbp + cmp ecx,DWORD[28+rbx] + cmovge r11,rbp + vmovdqa ymm7,YMMWORD[rbx] + vpxor ymm0,ymm0,ymm0 + vmovdqa ymm6,ymm7 + vpcmpgtd ymm6,ymm6,ymm0 + vpaddd ymm7,ymm7,ymm6 + + vmovdqu ymm0,YMMWORD[((0-128))+rdi] + vpand ymm8,ymm8,ymm6 + vmovdqu ymm1,YMMWORD[((32-128))+rdi] + vpand ymm9,ymm9,ymm6 + vmovdqu ymm2,YMMWORD[((64-128))+rdi] + vpand ymm10,ymm10,ymm6 + vmovdqu ymm5,YMMWORD[((96-128))+rdi] + vpand ymm11,ymm11,ymm6 + vpaddd ymm8,ymm8,ymm0 + vmovdqu ymm0,YMMWORD[((128-128))+rdi] + vpand ymm12,ymm12,ymm6 + vpaddd ymm9,ymm9,ymm1 + vmovdqu ymm1,YMMWORD[((160-128))+rdi] + vpand ymm13,ymm13,ymm6 + vpaddd ymm10,ymm10,ymm2 + vmovdqu ymm2,YMMWORD[((192-128))+rdi] + vpand ymm14,ymm14,ymm6 + vpaddd ymm11,ymm11,ymm5 + vmovdqu ymm5,YMMWORD[((224-128))+rdi] + vpand ymm15,ymm15,ymm6 + vpaddd ymm12,ymm12,ymm0 + vpaddd ymm13,ymm13,ymm1 + vmovdqu YMMWORD[(0-128)+rdi],ymm8 + vpaddd ymm14,ymm14,ymm2 + vmovdqu YMMWORD[(32-128)+rdi],ymm9 + vpaddd ymm15,ymm15,ymm5 + vmovdqu YMMWORD[(64-128)+rdi],ymm10 + vmovdqu YMMWORD[(96-128)+rdi],ymm11 + vmovdqu YMMWORD[(128-128)+rdi],ymm12 + vmovdqu YMMWORD[(160-128)+rdi],ymm13 + vmovdqu YMMWORD[(192-128)+rdi],ymm14 + vmovdqu YMMWORD[(224-128)+rdi],ymm15 + + vmovdqu YMMWORD[rbx],ymm7 + lea rbx,[((256+128))+rsp] + vmovdqu ymm6,YMMWORD[$L$pbswap] + dec edx + jnz NEAR $L$oop_avx2 + + + + + + + +$L$done_avx2: + mov rax,QWORD[544+rsp] + + vzeroupper + movaps xmm6,XMMWORD[((-216))+rax] + movaps xmm7,XMMWORD[((-200))+rax] + movaps xmm8,XMMWORD[((-184))+rax] + movaps xmm9,XMMWORD[((-168))+rax] + movaps xmm10,XMMWORD[((-152))+rax] + movaps xmm11,XMMWORD[((-136))+rax] + movaps xmm12,XMMWORD[((-120))+rax] + movaps xmm13,XMMWORD[((-104))+rax] + movaps xmm14,XMMWORD[((-88))+rax] + movaps xmm15,XMMWORD[((-72))+rax] + mov r15,QWORD[((-48))+rax] + + mov r14,QWORD[((-40))+rax] + + mov r13,QWORD[((-32))+rax] + + mov r12,QWORD[((-24))+rax] + + mov rbp,QWORD[((-16))+rax] + + mov rbx,QWORD[((-8))+rax] + + lea rsp,[rax] + +$L$epilogue_avx2: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_sha256_multi_block_avx2: +ALIGN 256 +K256: + DD 1116352408,1116352408,1116352408,1116352408 + DD 1116352408,1116352408,1116352408,1116352408 + DD 1899447441,1899447441,1899447441,1899447441 + DD 1899447441,1899447441,1899447441,1899447441 + DD 3049323471,3049323471,3049323471,3049323471 + DD 3049323471,3049323471,3049323471,3049323471 + DD 3921009573,3921009573,3921009573,3921009573 + DD 3921009573,3921009573,3921009573,3921009573 + DD 961987163,961987163,961987163,961987163 + DD 961987163,961987163,961987163,961987163 + DD 1508970993,1508970993,1508970993,1508970993 + DD 1508970993,1508970993,1508970993,1508970993 + DD 2453635748,2453635748,2453635748,2453635748 + DD 2453635748,2453635748,2453635748,2453635748 + DD 2870763221,2870763221,2870763221,2870763221 + DD 2870763221,2870763221,2870763221,2870763221 + DD 3624381080,3624381080,3624381080,3624381080 + DD 3624381080,3624381080,3624381080,3624381080 + DD 310598401,310598401,310598401,310598401 + DD 310598401,310598401,310598401,310598401 + DD 607225278,607225278,607225278,607225278 + DD 607225278,607225278,607225278,607225278 + DD 1426881987,1426881987,1426881987,1426881987 + DD 1426881987,1426881987,1426881987,1426881987 + DD 1925078388,1925078388,1925078388,1925078388 + DD 1925078388,1925078388,1925078388,1925078388 + DD 2162078206,2162078206,2162078206,2162078206 + DD 2162078206,2162078206,2162078206,2162078206 + DD 2614888103,2614888103,2614888103,2614888103 + DD 2614888103,2614888103,2614888103,2614888103 + DD 3248222580,3248222580,3248222580,3248222580 + DD 3248222580,3248222580,3248222580,3248222580 + DD 3835390401,3835390401,3835390401,3835390401 + DD 3835390401,3835390401,3835390401,3835390401 + DD 4022224774,4022224774,4022224774,4022224774 + DD 4022224774,4022224774,4022224774,4022224774 + DD 264347078,264347078,264347078,264347078 + DD 264347078,264347078,264347078,264347078 + DD 604807628,604807628,604807628,604807628 + DD 604807628,604807628,604807628,604807628 + DD 770255983,770255983,770255983,770255983 + DD 770255983,770255983,770255983,770255983 + DD 1249150122,1249150122,1249150122,1249150122 + DD 1249150122,1249150122,1249150122,1249150122 + DD 1555081692,1555081692,1555081692,1555081692 + DD 1555081692,1555081692,1555081692,1555081692 + DD 1996064986,1996064986,1996064986,1996064986 + DD 1996064986,1996064986,1996064986,1996064986 + DD 2554220882,2554220882,2554220882,2554220882 + DD 2554220882,2554220882,2554220882,2554220882 + DD 2821834349,2821834349,2821834349,2821834349 + DD 2821834349,2821834349,2821834349,2821834349 + DD 2952996808,2952996808,2952996808,2952996808 + DD 2952996808,2952996808,2952996808,2952996808 + DD 3210313671,3210313671,3210313671,3210313671 + DD 3210313671,3210313671,3210313671,3210313671 + DD 3336571891,3336571891,3336571891,3336571891 + DD 3336571891,3336571891,3336571891,3336571891 + DD 3584528711,3584528711,3584528711,3584528711 + DD 3584528711,3584528711,3584528711,3584528711 + DD 113926993,113926993,113926993,113926993 + DD 113926993,113926993,113926993,113926993 + DD 338241895,338241895,338241895,338241895 + DD 338241895,338241895,338241895,338241895 + DD 666307205,666307205,666307205,666307205 + DD 666307205,666307205,666307205,666307205 + DD 773529912,773529912,773529912,773529912 + DD 773529912,773529912,773529912,773529912 + DD 1294757372,1294757372,1294757372,1294757372 + DD 1294757372,1294757372,1294757372,1294757372 + DD 1396182291,1396182291,1396182291,1396182291 + DD 1396182291,1396182291,1396182291,1396182291 + DD 1695183700,1695183700,1695183700,1695183700 + DD 1695183700,1695183700,1695183700,1695183700 + DD 1986661051,1986661051,1986661051,1986661051 + DD 1986661051,1986661051,1986661051,1986661051 + DD 2177026350,2177026350,2177026350,2177026350 + DD 2177026350,2177026350,2177026350,2177026350 + DD 2456956037,2456956037,2456956037,2456956037 + DD 2456956037,2456956037,2456956037,2456956037 + DD 2730485921,2730485921,2730485921,2730485921 + DD 2730485921,2730485921,2730485921,2730485921 + DD 2820302411,2820302411,2820302411,2820302411 + DD 2820302411,2820302411,2820302411,2820302411 + DD 3259730800,3259730800,3259730800,3259730800 + DD 3259730800,3259730800,3259730800,3259730800 + DD 3345764771,3345764771,3345764771,3345764771 + DD 3345764771,3345764771,3345764771,3345764771 + DD 3516065817,3516065817,3516065817,3516065817 + DD 3516065817,3516065817,3516065817,3516065817 + DD 3600352804,3600352804,3600352804,3600352804 + DD 3600352804,3600352804,3600352804,3600352804 + DD 4094571909,4094571909,4094571909,4094571909 + DD 4094571909,4094571909,4094571909,4094571909 + DD 275423344,275423344,275423344,275423344 + DD 275423344,275423344,275423344,275423344 + DD 430227734,430227734,430227734,430227734 + DD 430227734,430227734,430227734,430227734 + DD 506948616,506948616,506948616,506948616 + DD 506948616,506948616,506948616,506948616 + DD 659060556,659060556,659060556,659060556 + DD 659060556,659060556,659060556,659060556 + DD 883997877,883997877,883997877,883997877 + DD 883997877,883997877,883997877,883997877 + DD 958139571,958139571,958139571,958139571 + DD 958139571,958139571,958139571,958139571 + DD 1322822218,1322822218,1322822218,1322822218 + DD 1322822218,1322822218,1322822218,1322822218 + DD 1537002063,1537002063,1537002063,1537002063 + DD 1537002063,1537002063,1537002063,1537002063 + DD 1747873779,1747873779,1747873779,1747873779 + DD 1747873779,1747873779,1747873779,1747873779 + DD 1955562222,1955562222,1955562222,1955562222 + DD 1955562222,1955562222,1955562222,1955562222 + DD 2024104815,2024104815,2024104815,2024104815 + DD 2024104815,2024104815,2024104815,2024104815 + DD 2227730452,2227730452,2227730452,2227730452 + DD 2227730452,2227730452,2227730452,2227730452 + DD 2361852424,2361852424,2361852424,2361852424 + DD 2361852424,2361852424,2361852424,2361852424 + DD 2428436474,2428436474,2428436474,2428436474 + DD 2428436474,2428436474,2428436474,2428436474 + DD 2756734187,2756734187,2756734187,2756734187 + DD 2756734187,2756734187,2756734187,2756734187 + DD 3204031479,3204031479,3204031479,3204031479 + DD 3204031479,3204031479,3204031479,3204031479 + DD 3329325298,3329325298,3329325298,3329325298 + DD 3329325298,3329325298,3329325298,3329325298 +$L$pbswap: + DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +K256_shaext: + DD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + DD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + DD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + DD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + DD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + DD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + DD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + DD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + DD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + DD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + DD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + DD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + DD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + DD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + DD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + DD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +DB 83,72,65,50,53,54,32,109,117,108,116,105,45,98,108,111 +DB 99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114 +DB 32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71 +DB 65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112 +DB 101,110,115,115,108,46,111,114,103,62,0 +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$in_prologue + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$in_prologue + + mov rax,QWORD[272+rax] + + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + + lea rsi,[((-24-160))+rax] + lea rdi,[512+r8] + mov ecx,20 + DD 0xa548f3fc + +$L$in_prologue: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret + + +ALIGN 16 +avx2_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$in_prologue + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$in_prologue + + mov rax,QWORD[544+r8] + + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov r15,QWORD[((-48))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + + lea rsi,[((-56-160))+rax] + lea rdi,[512+r8] + mov ecx,20 + DD 0xa548f3fc + + jmp NEAR $L$in_prologue + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_sha256_multi_block wrt ..imagebase + DD $L$SEH_end_sha256_multi_block wrt ..imagebase + DD $L$SEH_info_sha256_multi_block wrt ..imagebase + DD $L$SEH_begin_sha256_multi_block_shaext wrt ..imagebase + DD $L$SEH_end_sha256_multi_block_shaext wrt ..imagebase + DD $L$SEH_info_sha256_multi_block_shaext wrt ..imagebase + DD $L$SEH_begin_sha256_multi_block_avx wrt ..imagebase + DD $L$SEH_end_sha256_multi_block_avx wrt ..imagebase + DD $L$SEH_info_sha256_multi_block_avx wrt ..imagebase + DD $L$SEH_begin_sha256_multi_block_avx2 wrt ..imagebase + DD $L$SEH_end_sha256_multi_block_avx2 wrt ..imagebase + DD $L$SEH_info_sha256_multi_block_avx2 wrt ..imagebase +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_sha256_multi_block: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$body wrt ..imagebase,$L$epilogue wrt ..imagebase +$L$SEH_info_sha256_multi_block_shaext: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$body_shaext wrt ..imagebase,$L$epilogue_shaext wrt ..imagebase +$L$SEH_info_sha256_multi_block_avx: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$body_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase +$L$SEH_info_sha256_multi_block_avx2: +DB 9,0,0,0 + DD avx2_handler wrt ..imagebase + DD $L$body_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase diff --git a/vere/ext/openssl/gen/windows-x86_64/crypto/sha/sha256-x86_64.asm b/vere/ext/openssl/gen/windows-x86_64/crypto/sha/sha256-x86_64.asm new file mode 100644 index 0000000..9ff3cbb --- /dev/null +++ b/vere/ext/openssl/gen/windows-x86_64/crypto/sha/sha256-x86_64.asm @@ -0,0 +1,5711 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + + +EXTERN OPENSSL_ia32cap_P +global sha256_block_data_order + +ALIGN 16 +sha256_block_data_order: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_block_data_order: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + + lea r11,[OPENSSL_ia32cap_P] + mov r9d,DWORD[r11] + mov r10d,DWORD[4+r11] + mov r11d,DWORD[8+r11] + test r11d,536870912 + jnz NEAR _shaext_shortcut + and r11d,296 + cmp r11d,296 + je NEAR $L$avx2_shortcut + and r9d,1073741824 + and r10d,268435968 + or r10d,r9d + cmp r10d,1342177792 + je NEAR $L$avx_shortcut + test r10d,512 + jnz NEAR $L$ssse3_shortcut + mov rax,rsp + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + shl rdx,4 + sub rsp,16*4+4*8 + lea rdx,[rdx*4+rsi] + and rsp,-64 + mov QWORD[((64+0))+rsp],rdi + mov QWORD[((64+8))+rsp],rsi + mov QWORD[((64+16))+rsp],rdx + mov QWORD[88+rsp],rax + +$L$prologue: + + mov eax,DWORD[rdi] + mov ebx,DWORD[4+rdi] + mov ecx,DWORD[8+rdi] + mov edx,DWORD[12+rdi] + mov r8d,DWORD[16+rdi] + mov r9d,DWORD[20+rdi] + mov r10d,DWORD[24+rdi] + mov r11d,DWORD[28+rdi] + jmp NEAR $L$loop + +ALIGN 16 +$L$loop: + mov edi,ebx + lea rbp,[K256] + xor edi,ecx + mov r12d,DWORD[rsi] + mov r13d,r8d + mov r14d,eax + bswap r12d + ror r13d,14 + mov r15d,r9d + + xor r13d,r8d + ror r14d,9 + xor r15d,r10d + + mov DWORD[rsp],r12d + xor r14d,eax + and r15d,r8d + + ror r13d,5 + add r12d,r11d + xor r15d,r10d + + ror r14d,11 + xor r13d,r8d + add r12d,r15d + + mov r15d,eax + add r12d,DWORD[rbp] + xor r14d,eax + + xor r15d,ebx + ror r13d,6 + mov r11d,ebx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r11d,edi + add edx,r12d + add r11d,r12d + + lea rbp,[4+rbp] + add r11d,r14d + mov r12d,DWORD[4+rsi] + mov r13d,edx + mov r14d,r11d + bswap r12d + ror r13d,14 + mov edi,r8d + + xor r13d,edx + ror r14d,9 + xor edi,r9d + + mov DWORD[4+rsp],r12d + xor r14d,r11d + and edi,edx + + ror r13d,5 + add r12d,r10d + xor edi,r9d + + ror r14d,11 + xor r13d,edx + add r12d,edi + + mov edi,r11d + add r12d,DWORD[rbp] + xor r14d,r11d + + xor edi,eax + ror r13d,6 + mov r10d,eax + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r10d,r15d + add ecx,r12d + add r10d,r12d + + lea rbp,[4+rbp] + add r10d,r14d + mov r12d,DWORD[8+rsi] + mov r13d,ecx + mov r14d,r10d + bswap r12d + ror r13d,14 + mov r15d,edx + + xor r13d,ecx + ror r14d,9 + xor r15d,r8d + + mov DWORD[8+rsp],r12d + xor r14d,r10d + and r15d,ecx + + ror r13d,5 + add r12d,r9d + xor r15d,r8d + + ror r14d,11 + xor r13d,ecx + add r12d,r15d + + mov r15d,r10d + add r12d,DWORD[rbp] + xor r14d,r10d + + xor r15d,r11d + ror r13d,6 + mov r9d,r11d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r9d,edi + add ebx,r12d + add r9d,r12d + + lea rbp,[4+rbp] + add r9d,r14d + mov r12d,DWORD[12+rsi] + mov r13d,ebx + mov r14d,r9d + bswap r12d + ror r13d,14 + mov edi,ecx + + xor r13d,ebx + ror r14d,9 + xor edi,edx + + mov DWORD[12+rsp],r12d + xor r14d,r9d + and edi,ebx + + ror r13d,5 + add r12d,r8d + xor edi,edx + + ror r14d,11 + xor r13d,ebx + add r12d,edi + + mov edi,r9d + add r12d,DWORD[rbp] + xor r14d,r9d + + xor edi,r10d + ror r13d,6 + mov r8d,r10d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r8d,r15d + add eax,r12d + add r8d,r12d + + lea rbp,[20+rbp] + add r8d,r14d + mov r12d,DWORD[16+rsi] + mov r13d,eax + mov r14d,r8d + bswap r12d + ror r13d,14 + mov r15d,ebx + + xor r13d,eax + ror r14d,9 + xor r15d,ecx + + mov DWORD[16+rsp],r12d + xor r14d,r8d + and r15d,eax + + ror r13d,5 + add r12d,edx + xor r15d,ecx + + ror r14d,11 + xor r13d,eax + add r12d,r15d + + mov r15d,r8d + add r12d,DWORD[rbp] + xor r14d,r8d + + xor r15d,r9d + ror r13d,6 + mov edx,r9d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor edx,edi + add r11d,r12d + add edx,r12d + + lea rbp,[4+rbp] + add edx,r14d + mov r12d,DWORD[20+rsi] + mov r13d,r11d + mov r14d,edx + bswap r12d + ror r13d,14 + mov edi,eax + + xor r13d,r11d + ror r14d,9 + xor edi,ebx + + mov DWORD[20+rsp],r12d + xor r14d,edx + and edi,r11d + + ror r13d,5 + add r12d,ecx + xor edi,ebx + + ror r14d,11 + xor r13d,r11d + add r12d,edi + + mov edi,edx + add r12d,DWORD[rbp] + xor r14d,edx + + xor edi,r8d + ror r13d,6 + mov ecx,r8d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor ecx,r15d + add r10d,r12d + add ecx,r12d + + lea rbp,[4+rbp] + add ecx,r14d + mov r12d,DWORD[24+rsi] + mov r13d,r10d + mov r14d,ecx + bswap r12d + ror r13d,14 + mov r15d,r11d + + xor r13d,r10d + ror r14d,9 + xor r15d,eax + + mov DWORD[24+rsp],r12d + xor r14d,ecx + and r15d,r10d + + ror r13d,5 + add r12d,ebx + xor r15d,eax + + ror r14d,11 + xor r13d,r10d + add r12d,r15d + + mov r15d,ecx + add r12d,DWORD[rbp] + xor r14d,ecx + + xor r15d,edx + ror r13d,6 + mov ebx,edx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor ebx,edi + add r9d,r12d + add ebx,r12d + + lea rbp,[4+rbp] + add ebx,r14d + mov r12d,DWORD[28+rsi] + mov r13d,r9d + mov r14d,ebx + bswap r12d + ror r13d,14 + mov edi,r10d + + xor r13d,r9d + ror r14d,9 + xor edi,r11d + + mov DWORD[28+rsp],r12d + xor r14d,ebx + and edi,r9d + + ror r13d,5 + add r12d,eax + xor edi,r11d + + ror r14d,11 + xor r13d,r9d + add r12d,edi + + mov edi,ebx + add r12d,DWORD[rbp] + xor r14d,ebx + + xor edi,ecx + ror r13d,6 + mov eax,ecx + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor eax,r15d + add r8d,r12d + add eax,r12d + + lea rbp,[20+rbp] + add eax,r14d + mov r12d,DWORD[32+rsi] + mov r13d,r8d + mov r14d,eax + bswap r12d + ror r13d,14 + mov r15d,r9d + + xor r13d,r8d + ror r14d,9 + xor r15d,r10d + + mov DWORD[32+rsp],r12d + xor r14d,eax + and r15d,r8d + + ror r13d,5 + add r12d,r11d + xor r15d,r10d + + ror r14d,11 + xor r13d,r8d + add r12d,r15d + + mov r15d,eax + add r12d,DWORD[rbp] + xor r14d,eax + + xor r15d,ebx + ror r13d,6 + mov r11d,ebx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r11d,edi + add edx,r12d + add r11d,r12d + + lea rbp,[4+rbp] + add r11d,r14d + mov r12d,DWORD[36+rsi] + mov r13d,edx + mov r14d,r11d + bswap r12d + ror r13d,14 + mov edi,r8d + + xor r13d,edx + ror r14d,9 + xor edi,r9d + + mov DWORD[36+rsp],r12d + xor r14d,r11d + and edi,edx + + ror r13d,5 + add r12d,r10d + xor edi,r9d + + ror r14d,11 + xor r13d,edx + add r12d,edi + + mov edi,r11d + add r12d,DWORD[rbp] + xor r14d,r11d + + xor edi,eax + ror r13d,6 + mov r10d,eax + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r10d,r15d + add ecx,r12d + add r10d,r12d + + lea rbp,[4+rbp] + add r10d,r14d + mov r12d,DWORD[40+rsi] + mov r13d,ecx + mov r14d,r10d + bswap r12d + ror r13d,14 + mov r15d,edx + + xor r13d,ecx + ror r14d,9 + xor r15d,r8d + + mov DWORD[40+rsp],r12d + xor r14d,r10d + and r15d,ecx + + ror r13d,5 + add r12d,r9d + xor r15d,r8d + + ror r14d,11 + xor r13d,ecx + add r12d,r15d + + mov r15d,r10d + add r12d,DWORD[rbp] + xor r14d,r10d + + xor r15d,r11d + ror r13d,6 + mov r9d,r11d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r9d,edi + add ebx,r12d + add r9d,r12d + + lea rbp,[4+rbp] + add r9d,r14d + mov r12d,DWORD[44+rsi] + mov r13d,ebx + mov r14d,r9d + bswap r12d + ror r13d,14 + mov edi,ecx + + xor r13d,ebx + ror r14d,9 + xor edi,edx + + mov DWORD[44+rsp],r12d + xor r14d,r9d + and edi,ebx + + ror r13d,5 + add r12d,r8d + xor edi,edx + + ror r14d,11 + xor r13d,ebx + add r12d,edi + + mov edi,r9d + add r12d,DWORD[rbp] + xor r14d,r9d + + xor edi,r10d + ror r13d,6 + mov r8d,r10d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r8d,r15d + add eax,r12d + add r8d,r12d + + lea rbp,[20+rbp] + add r8d,r14d + mov r12d,DWORD[48+rsi] + mov r13d,eax + mov r14d,r8d + bswap r12d + ror r13d,14 + mov r15d,ebx + + xor r13d,eax + ror r14d,9 + xor r15d,ecx + + mov DWORD[48+rsp],r12d + xor r14d,r8d + and r15d,eax + + ror r13d,5 + add r12d,edx + xor r15d,ecx + + ror r14d,11 + xor r13d,eax + add r12d,r15d + + mov r15d,r8d + add r12d,DWORD[rbp] + xor r14d,r8d + + xor r15d,r9d + ror r13d,6 + mov edx,r9d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor edx,edi + add r11d,r12d + add edx,r12d + + lea rbp,[4+rbp] + add edx,r14d + mov r12d,DWORD[52+rsi] + mov r13d,r11d + mov r14d,edx + bswap r12d + ror r13d,14 + mov edi,eax + + xor r13d,r11d + ror r14d,9 + xor edi,ebx + + mov DWORD[52+rsp],r12d + xor r14d,edx + and edi,r11d + + ror r13d,5 + add r12d,ecx + xor edi,ebx + + ror r14d,11 + xor r13d,r11d + add r12d,edi + + mov edi,edx + add r12d,DWORD[rbp] + xor r14d,edx + + xor edi,r8d + ror r13d,6 + mov ecx,r8d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor ecx,r15d + add r10d,r12d + add ecx,r12d + + lea rbp,[4+rbp] + add ecx,r14d + mov r12d,DWORD[56+rsi] + mov r13d,r10d + mov r14d,ecx + bswap r12d + ror r13d,14 + mov r15d,r11d + + xor r13d,r10d + ror r14d,9 + xor r15d,eax + + mov DWORD[56+rsp],r12d + xor r14d,ecx + and r15d,r10d + + ror r13d,5 + add r12d,ebx + xor r15d,eax + + ror r14d,11 + xor r13d,r10d + add r12d,r15d + + mov r15d,ecx + add r12d,DWORD[rbp] + xor r14d,ecx + + xor r15d,edx + ror r13d,6 + mov ebx,edx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor ebx,edi + add r9d,r12d + add ebx,r12d + + lea rbp,[4+rbp] + add ebx,r14d + mov r12d,DWORD[60+rsi] + mov r13d,r9d + mov r14d,ebx + bswap r12d + ror r13d,14 + mov edi,r10d + + xor r13d,r9d + ror r14d,9 + xor edi,r11d + + mov DWORD[60+rsp],r12d + xor r14d,ebx + and edi,r9d + + ror r13d,5 + add r12d,eax + xor edi,r11d + + ror r14d,11 + xor r13d,r9d + add r12d,edi + + mov edi,ebx + add r12d,DWORD[rbp] + xor r14d,ebx + + xor edi,ecx + ror r13d,6 + mov eax,ecx + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor eax,r15d + add r8d,r12d + add eax,r12d + + lea rbp,[20+rbp] + jmp NEAR $L$rounds_16_xx +ALIGN 16 +$L$rounds_16_xx: + mov r13d,DWORD[4+rsp] + mov r15d,DWORD[56+rsp] + + mov r12d,r13d + ror r13d,11 + add eax,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD[36+rsp] + + add r12d,DWORD[rsp] + mov r13d,r8d + add r12d,r15d + mov r14d,eax + ror r13d,14 + mov r15d,r9d + + xor r13d,r8d + ror r14d,9 + xor r15d,r10d + + mov DWORD[rsp],r12d + xor r14d,eax + and r15d,r8d + + ror r13d,5 + add r12d,r11d + xor r15d,r10d + + ror r14d,11 + xor r13d,r8d + add r12d,r15d + + mov r15d,eax + add r12d,DWORD[rbp] + xor r14d,eax + + xor r15d,ebx + ror r13d,6 + mov r11d,ebx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r11d,edi + add edx,r12d + add r11d,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[8+rsp] + mov edi,DWORD[60+rsp] + + mov r12d,r13d + ror r13d,11 + add r11d,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD[40+rsp] + + add r12d,DWORD[4+rsp] + mov r13d,edx + add r12d,edi + mov r14d,r11d + ror r13d,14 + mov edi,r8d + + xor r13d,edx + ror r14d,9 + xor edi,r9d + + mov DWORD[4+rsp],r12d + xor r14d,r11d + and edi,edx + + ror r13d,5 + add r12d,r10d + xor edi,r9d + + ror r14d,11 + xor r13d,edx + add r12d,edi + + mov edi,r11d + add r12d,DWORD[rbp] + xor r14d,r11d + + xor edi,eax + ror r13d,6 + mov r10d,eax + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r10d,r15d + add ecx,r12d + add r10d,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[12+rsp] + mov r15d,DWORD[rsp] + + mov r12d,r13d + ror r13d,11 + add r10d,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD[44+rsp] + + add r12d,DWORD[8+rsp] + mov r13d,ecx + add r12d,r15d + mov r14d,r10d + ror r13d,14 + mov r15d,edx + + xor r13d,ecx + ror r14d,9 + xor r15d,r8d + + mov DWORD[8+rsp],r12d + xor r14d,r10d + and r15d,ecx + + ror r13d,5 + add r12d,r9d + xor r15d,r8d + + ror r14d,11 + xor r13d,ecx + add r12d,r15d + + mov r15d,r10d + add r12d,DWORD[rbp] + xor r14d,r10d + + xor r15d,r11d + ror r13d,6 + mov r9d,r11d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r9d,edi + add ebx,r12d + add r9d,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[16+rsp] + mov edi,DWORD[4+rsp] + + mov r12d,r13d + ror r13d,11 + add r9d,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD[48+rsp] + + add r12d,DWORD[12+rsp] + mov r13d,ebx + add r12d,edi + mov r14d,r9d + ror r13d,14 + mov edi,ecx + + xor r13d,ebx + ror r14d,9 + xor edi,edx + + mov DWORD[12+rsp],r12d + xor r14d,r9d + and edi,ebx + + ror r13d,5 + add r12d,r8d + xor edi,edx + + ror r14d,11 + xor r13d,ebx + add r12d,edi + + mov edi,r9d + add r12d,DWORD[rbp] + xor r14d,r9d + + xor edi,r10d + ror r13d,6 + mov r8d,r10d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r8d,r15d + add eax,r12d + add r8d,r12d + + lea rbp,[20+rbp] + mov r13d,DWORD[20+rsp] + mov r15d,DWORD[8+rsp] + + mov r12d,r13d + ror r13d,11 + add r8d,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD[52+rsp] + + add r12d,DWORD[16+rsp] + mov r13d,eax + add r12d,r15d + mov r14d,r8d + ror r13d,14 + mov r15d,ebx + + xor r13d,eax + ror r14d,9 + xor r15d,ecx + + mov DWORD[16+rsp],r12d + xor r14d,r8d + and r15d,eax + + ror r13d,5 + add r12d,edx + xor r15d,ecx + + ror r14d,11 + xor r13d,eax + add r12d,r15d + + mov r15d,r8d + add r12d,DWORD[rbp] + xor r14d,r8d + + xor r15d,r9d + ror r13d,6 + mov edx,r9d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor edx,edi + add r11d,r12d + add edx,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[24+rsp] + mov edi,DWORD[12+rsp] + + mov r12d,r13d + ror r13d,11 + add edx,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD[56+rsp] + + add r12d,DWORD[20+rsp] + mov r13d,r11d + add r12d,edi + mov r14d,edx + ror r13d,14 + mov edi,eax + + xor r13d,r11d + ror r14d,9 + xor edi,ebx + + mov DWORD[20+rsp],r12d + xor r14d,edx + and edi,r11d + + ror r13d,5 + add r12d,ecx + xor edi,ebx + + ror r14d,11 + xor r13d,r11d + add r12d,edi + + mov edi,edx + add r12d,DWORD[rbp] + xor r14d,edx + + xor edi,r8d + ror r13d,6 + mov ecx,r8d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor ecx,r15d + add r10d,r12d + add ecx,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[28+rsp] + mov r15d,DWORD[16+rsp] + + mov r12d,r13d + ror r13d,11 + add ecx,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD[60+rsp] + + add r12d,DWORD[24+rsp] + mov r13d,r10d + add r12d,r15d + mov r14d,ecx + ror r13d,14 + mov r15d,r11d + + xor r13d,r10d + ror r14d,9 + xor r15d,eax + + mov DWORD[24+rsp],r12d + xor r14d,ecx + and r15d,r10d + + ror r13d,5 + add r12d,ebx + xor r15d,eax + + ror r14d,11 + xor r13d,r10d + add r12d,r15d + + mov r15d,ecx + add r12d,DWORD[rbp] + xor r14d,ecx + + xor r15d,edx + ror r13d,6 + mov ebx,edx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor ebx,edi + add r9d,r12d + add ebx,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[32+rsp] + mov edi,DWORD[20+rsp] + + mov r12d,r13d + ror r13d,11 + add ebx,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD[rsp] + + add r12d,DWORD[28+rsp] + mov r13d,r9d + add r12d,edi + mov r14d,ebx + ror r13d,14 + mov edi,r10d + + xor r13d,r9d + ror r14d,9 + xor edi,r11d + + mov DWORD[28+rsp],r12d + xor r14d,ebx + and edi,r9d + + ror r13d,5 + add r12d,eax + xor edi,r11d + + ror r14d,11 + xor r13d,r9d + add r12d,edi + + mov edi,ebx + add r12d,DWORD[rbp] + xor r14d,ebx + + xor edi,ecx + ror r13d,6 + mov eax,ecx + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor eax,r15d + add r8d,r12d + add eax,r12d + + lea rbp,[20+rbp] + mov r13d,DWORD[36+rsp] + mov r15d,DWORD[24+rsp] + + mov r12d,r13d + ror r13d,11 + add eax,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD[4+rsp] + + add r12d,DWORD[32+rsp] + mov r13d,r8d + add r12d,r15d + mov r14d,eax + ror r13d,14 + mov r15d,r9d + + xor r13d,r8d + ror r14d,9 + xor r15d,r10d + + mov DWORD[32+rsp],r12d + xor r14d,eax + and r15d,r8d + + ror r13d,5 + add r12d,r11d + xor r15d,r10d + + ror r14d,11 + xor r13d,r8d + add r12d,r15d + + mov r15d,eax + add r12d,DWORD[rbp] + xor r14d,eax + + xor r15d,ebx + ror r13d,6 + mov r11d,ebx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r11d,edi + add edx,r12d + add r11d,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[40+rsp] + mov edi,DWORD[28+rsp] + + mov r12d,r13d + ror r13d,11 + add r11d,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD[8+rsp] + + add r12d,DWORD[36+rsp] + mov r13d,edx + add r12d,edi + mov r14d,r11d + ror r13d,14 + mov edi,r8d + + xor r13d,edx + ror r14d,9 + xor edi,r9d + + mov DWORD[36+rsp],r12d + xor r14d,r11d + and edi,edx + + ror r13d,5 + add r12d,r10d + xor edi,r9d + + ror r14d,11 + xor r13d,edx + add r12d,edi + + mov edi,r11d + add r12d,DWORD[rbp] + xor r14d,r11d + + xor edi,eax + ror r13d,6 + mov r10d,eax + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r10d,r15d + add ecx,r12d + add r10d,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[44+rsp] + mov r15d,DWORD[32+rsp] + + mov r12d,r13d + ror r13d,11 + add r10d,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD[12+rsp] + + add r12d,DWORD[40+rsp] + mov r13d,ecx + add r12d,r15d + mov r14d,r10d + ror r13d,14 + mov r15d,edx + + xor r13d,ecx + ror r14d,9 + xor r15d,r8d + + mov DWORD[40+rsp],r12d + xor r14d,r10d + and r15d,ecx + + ror r13d,5 + add r12d,r9d + xor r15d,r8d + + ror r14d,11 + xor r13d,ecx + add r12d,r15d + + mov r15d,r10d + add r12d,DWORD[rbp] + xor r14d,r10d + + xor r15d,r11d + ror r13d,6 + mov r9d,r11d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r9d,edi + add ebx,r12d + add r9d,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[48+rsp] + mov edi,DWORD[36+rsp] + + mov r12d,r13d + ror r13d,11 + add r9d,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD[16+rsp] + + add r12d,DWORD[44+rsp] + mov r13d,ebx + add r12d,edi + mov r14d,r9d + ror r13d,14 + mov edi,ecx + + xor r13d,ebx + ror r14d,9 + xor edi,edx + + mov DWORD[44+rsp],r12d + xor r14d,r9d + and edi,ebx + + ror r13d,5 + add r12d,r8d + xor edi,edx + + ror r14d,11 + xor r13d,ebx + add r12d,edi + + mov edi,r9d + add r12d,DWORD[rbp] + xor r14d,r9d + + xor edi,r10d + ror r13d,6 + mov r8d,r10d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r8d,r15d + add eax,r12d + add r8d,r12d + + lea rbp,[20+rbp] + mov r13d,DWORD[52+rsp] + mov r15d,DWORD[40+rsp] + + mov r12d,r13d + ror r13d,11 + add r8d,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD[20+rsp] + + add r12d,DWORD[48+rsp] + mov r13d,eax + add r12d,r15d + mov r14d,r8d + ror r13d,14 + mov r15d,ebx + + xor r13d,eax + ror r14d,9 + xor r15d,ecx + + mov DWORD[48+rsp],r12d + xor r14d,r8d + and r15d,eax + + ror r13d,5 + add r12d,edx + xor r15d,ecx + + ror r14d,11 + xor r13d,eax + add r12d,r15d + + mov r15d,r8d + add r12d,DWORD[rbp] + xor r14d,r8d + + xor r15d,r9d + ror r13d,6 + mov edx,r9d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor edx,edi + add r11d,r12d + add edx,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[56+rsp] + mov edi,DWORD[44+rsp] + + mov r12d,r13d + ror r13d,11 + add edx,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD[24+rsp] + + add r12d,DWORD[52+rsp] + mov r13d,r11d + add r12d,edi + mov r14d,edx + ror r13d,14 + mov edi,eax + + xor r13d,r11d + ror r14d,9 + xor edi,ebx + + mov DWORD[52+rsp],r12d + xor r14d,edx + and edi,r11d + + ror r13d,5 + add r12d,ecx + xor edi,ebx + + ror r14d,11 + xor r13d,r11d + add r12d,edi + + mov edi,edx + add r12d,DWORD[rbp] + xor r14d,edx + + xor edi,r8d + ror r13d,6 + mov ecx,r8d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor ecx,r15d + add r10d,r12d + add ecx,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[60+rsp] + mov r15d,DWORD[48+rsp] + + mov r12d,r13d + ror r13d,11 + add ecx,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD[28+rsp] + + add r12d,DWORD[56+rsp] + mov r13d,r10d + add r12d,r15d + mov r14d,ecx + ror r13d,14 + mov r15d,r11d + + xor r13d,r10d + ror r14d,9 + xor r15d,eax + + mov DWORD[56+rsp],r12d + xor r14d,ecx + and r15d,r10d + + ror r13d,5 + add r12d,ebx + xor r15d,eax + + ror r14d,11 + xor r13d,r10d + add r12d,r15d + + mov r15d,ecx + add r12d,DWORD[rbp] + xor r14d,ecx + + xor r15d,edx + ror r13d,6 + mov ebx,edx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor ebx,edi + add r9d,r12d + add ebx,r12d + + lea rbp,[4+rbp] + mov r13d,DWORD[rsp] + mov edi,DWORD[52+rsp] + + mov r12d,r13d + ror r13d,11 + add ebx,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD[32+rsp] + + add r12d,DWORD[60+rsp] + mov r13d,r9d + add r12d,edi + mov r14d,ebx + ror r13d,14 + mov edi,r10d + + xor r13d,r9d + ror r14d,9 + xor edi,r11d + + mov DWORD[60+rsp],r12d + xor r14d,ebx + and edi,r9d + + ror r13d,5 + add r12d,eax + xor edi,r11d + + ror r14d,11 + xor r13d,r9d + add r12d,edi + + mov edi,ebx + add r12d,DWORD[rbp] + xor r14d,ebx + + xor edi,ecx + ror r13d,6 + mov eax,ecx + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor eax,r15d + add r8d,r12d + add eax,r12d + + lea rbp,[20+rbp] + cmp BYTE[3+rbp],0 + jnz NEAR $L$rounds_16_xx + + mov rdi,QWORD[((64+0))+rsp] + add eax,r14d + lea rsi,[64+rsi] + + add eax,DWORD[rdi] + add ebx,DWORD[4+rdi] + add ecx,DWORD[8+rdi] + add edx,DWORD[12+rdi] + add r8d,DWORD[16+rdi] + add r9d,DWORD[20+rdi] + add r10d,DWORD[24+rdi] + add r11d,DWORD[28+rdi] + + cmp rsi,QWORD[((64+16))+rsp] + + mov DWORD[rdi],eax + mov DWORD[4+rdi],ebx + mov DWORD[8+rdi],ecx + mov DWORD[12+rdi],edx + mov DWORD[16+rdi],r8d + mov DWORD[20+rdi],r9d + mov DWORD[24+rdi],r10d + mov DWORD[28+rdi],r11d + jb NEAR $L$loop + + mov rsi,QWORD[88+rsp] + + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbp,QWORD[((-16))+rsi] + + mov rbx,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_sha256_block_data_order: +ALIGN 64 + +K256: + DD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + DD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + DD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + DD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + DD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + DD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + DD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + DD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + DD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + DD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + DD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + DD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + DD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + DD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + DD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + DD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + DD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + DD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + DD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + DD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + DD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + DD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + DD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + DD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + DD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + DD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + DD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + DD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + DD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + DD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + DD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + DD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + + DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + DD 0x03020100,0x0b0a0908,0xffffffff,0xffffffff + DD 0x03020100,0x0b0a0908,0xffffffff,0xffffffff + DD 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 + DD 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +DB 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 +DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54 +DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 +DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 +DB 111,114,103,62,0 + +ALIGN 64 +sha256_block_data_order_shaext: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_block_data_order_shaext: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +_shaext_shortcut: + + lea rsp,[((-88))+rsp] + movaps XMMWORD[(-8-80)+rax],xmm6 + movaps XMMWORD[(-8-64)+rax],xmm7 + movaps XMMWORD[(-8-48)+rax],xmm8 + movaps XMMWORD[(-8-32)+rax],xmm9 + movaps XMMWORD[(-8-16)+rax],xmm10 +$L$prologue_shaext: + lea rcx,[((K256+128))] + movdqu xmm1,XMMWORD[rdi] + movdqu xmm2,XMMWORD[16+rdi] + movdqa xmm7,XMMWORD[((512-128))+rcx] + + pshufd xmm0,xmm1,0x1b + pshufd xmm1,xmm1,0xb1 + pshufd xmm2,xmm2,0x1b + movdqa xmm8,xmm7 +DB 102,15,58,15,202,8 + punpcklqdq xmm2,xmm0 + jmp NEAR $L$oop_shaext + +ALIGN 16 +$L$oop_shaext: + movdqu xmm3,XMMWORD[rsi] + movdqu xmm4,XMMWORD[16+rsi] + movdqu xmm5,XMMWORD[32+rsi] +DB 102,15,56,0,223 + movdqu xmm6,XMMWORD[48+rsi] + + movdqa xmm0,XMMWORD[((0-128))+rcx] + paddd xmm0,xmm3 +DB 102,15,56,0,231 + movdqa xmm10,xmm2 +DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + nop + movdqa xmm9,xmm1 +DB 15,56,203,202 + + movdqa xmm0,XMMWORD[((32-128))+rcx] + paddd xmm0,xmm4 +DB 102,15,56,0,239 +DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + lea rsi,[64+rsi] +DB 15,56,204,220 +DB 15,56,203,202 + + movdqa xmm0,XMMWORD[((64-128))+rcx] + paddd xmm0,xmm5 +DB 102,15,56,0,247 +DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + movdqa xmm7,xmm6 +DB 102,15,58,15,253,4 + nop + paddd xmm3,xmm7 +DB 15,56,204,229 +DB 15,56,203,202 + + movdqa xmm0,XMMWORD[((96-128))+rcx] + paddd xmm0,xmm6 +DB 15,56,205,222 +DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + movdqa xmm7,xmm3 +DB 102,15,58,15,254,4 + nop + paddd xmm4,xmm7 +DB 15,56,204,238 +DB 15,56,203,202 + movdqa xmm0,XMMWORD[((128-128))+rcx] + paddd xmm0,xmm3 +DB 15,56,205,227 +DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + movdqa xmm7,xmm4 +DB 102,15,58,15,251,4 + nop + paddd xmm5,xmm7 +DB 15,56,204,243 +DB 15,56,203,202 + movdqa xmm0,XMMWORD[((160-128))+rcx] + paddd xmm0,xmm4 +DB 15,56,205,236 +DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + movdqa xmm7,xmm5 +DB 102,15,58,15,252,4 + nop + paddd xmm6,xmm7 +DB 15,56,204,220 +DB 15,56,203,202 + movdqa xmm0,XMMWORD[((192-128))+rcx] + paddd xmm0,xmm5 +DB 15,56,205,245 +DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + movdqa xmm7,xmm6 +DB 102,15,58,15,253,4 + nop + paddd xmm3,xmm7 +DB 15,56,204,229 +DB 15,56,203,202 + movdqa xmm0,XMMWORD[((224-128))+rcx] + paddd xmm0,xmm6 +DB 15,56,205,222 +DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + movdqa xmm7,xmm3 +DB 102,15,58,15,254,4 + nop + paddd xmm4,xmm7 +DB 15,56,204,238 +DB 15,56,203,202 + movdqa xmm0,XMMWORD[((256-128))+rcx] + paddd xmm0,xmm3 +DB 15,56,205,227 +DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + movdqa xmm7,xmm4 +DB 102,15,58,15,251,4 + nop + paddd xmm5,xmm7 +DB 15,56,204,243 +DB 15,56,203,202 + movdqa xmm0,XMMWORD[((288-128))+rcx] + paddd xmm0,xmm4 +DB 15,56,205,236 +DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + movdqa xmm7,xmm5 +DB 102,15,58,15,252,4 + nop + paddd xmm6,xmm7 +DB 15,56,204,220 +DB 15,56,203,202 + movdqa xmm0,XMMWORD[((320-128))+rcx] + paddd xmm0,xmm5 +DB 15,56,205,245 +DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + movdqa xmm7,xmm6 +DB 102,15,58,15,253,4 + nop + paddd xmm3,xmm7 +DB 15,56,204,229 +DB 15,56,203,202 + movdqa xmm0,XMMWORD[((352-128))+rcx] + paddd xmm0,xmm6 +DB 15,56,205,222 +DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + movdqa xmm7,xmm3 +DB 102,15,58,15,254,4 + nop + paddd xmm4,xmm7 +DB 15,56,204,238 +DB 15,56,203,202 + movdqa xmm0,XMMWORD[((384-128))+rcx] + paddd xmm0,xmm3 +DB 15,56,205,227 +DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + movdqa xmm7,xmm4 +DB 102,15,58,15,251,4 + nop + paddd xmm5,xmm7 +DB 15,56,204,243 +DB 15,56,203,202 + movdqa xmm0,XMMWORD[((416-128))+rcx] + paddd xmm0,xmm4 +DB 15,56,205,236 +DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + movdqa xmm7,xmm5 +DB 102,15,58,15,252,4 +DB 15,56,203,202 + paddd xmm6,xmm7 + + movdqa xmm0,XMMWORD[((448-128))+rcx] + paddd xmm0,xmm5 +DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e +DB 15,56,205,245 + movdqa xmm7,xmm8 +DB 15,56,203,202 + + movdqa xmm0,XMMWORD[((480-128))+rcx] + paddd xmm0,xmm6 + nop +DB 15,56,203,209 + pshufd xmm0,xmm0,0x0e + dec rdx + nop +DB 15,56,203,202 + + paddd xmm2,xmm10 + paddd xmm1,xmm9 + jnz NEAR $L$oop_shaext + + pshufd xmm2,xmm2,0xb1 + pshufd xmm7,xmm1,0x1b + pshufd xmm1,xmm1,0xb1 + punpckhqdq xmm1,xmm2 +DB 102,15,58,15,215,8 + + movdqu XMMWORD[rdi],xmm1 + movdqu XMMWORD[16+rdi],xmm2 + movaps xmm6,XMMWORD[((-8-80))+rax] + movaps xmm7,XMMWORD[((-8-64))+rax] + movaps xmm8,XMMWORD[((-8-48))+rax] + movaps xmm9,XMMWORD[((-8-32))+rax] + movaps xmm10,XMMWORD[((-8-16))+rax] + mov rsp,rax +$L$epilogue_shaext: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_sha256_block_data_order_shaext: + +ALIGN 64 +sha256_block_data_order_ssse3: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_block_data_order_ssse3: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +$L$ssse3_shortcut: + mov rax,rsp + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + shl rdx,4 + sub rsp,160 + lea rdx,[rdx*4+rsi] + and rsp,-64 + mov QWORD[((64+0))+rsp],rdi + mov QWORD[((64+8))+rsp],rsi + mov QWORD[((64+16))+rsp],rdx + mov QWORD[88+rsp],rax + + movaps XMMWORD[(64+32)+rsp],xmm6 + movaps XMMWORD[(64+48)+rsp],xmm7 + movaps XMMWORD[(64+64)+rsp],xmm8 + movaps XMMWORD[(64+80)+rsp],xmm9 +$L$prologue_ssse3: + + mov eax,DWORD[rdi] + mov ebx,DWORD[4+rdi] + mov ecx,DWORD[8+rdi] + mov edx,DWORD[12+rdi] + mov r8d,DWORD[16+rdi] + mov r9d,DWORD[20+rdi] + mov r10d,DWORD[24+rdi] + mov r11d,DWORD[28+rdi] + + + jmp NEAR $L$loop_ssse3 +ALIGN 16 +$L$loop_ssse3: + movdqa xmm7,XMMWORD[((K256+512))] + movdqu xmm0,XMMWORD[rsi] + movdqu xmm1,XMMWORD[16+rsi] + movdqu xmm2,XMMWORD[32+rsi] +DB 102,15,56,0,199 + movdqu xmm3,XMMWORD[48+rsi] + lea rbp,[K256] +DB 102,15,56,0,207 + movdqa xmm4,XMMWORD[rbp] + movdqa xmm5,XMMWORD[32+rbp] +DB 102,15,56,0,215 + paddd xmm4,xmm0 + movdqa xmm6,XMMWORD[64+rbp] +DB 102,15,56,0,223 + movdqa xmm7,XMMWORD[96+rbp] + paddd xmm5,xmm1 + paddd xmm6,xmm2 + paddd xmm7,xmm3 + movdqa XMMWORD[rsp],xmm4 + mov r14d,eax + movdqa XMMWORD[16+rsp],xmm5 + mov edi,ebx + movdqa XMMWORD[32+rsp],xmm6 + xor edi,ecx + movdqa XMMWORD[48+rsp],xmm7 + mov r13d,r8d + jmp NEAR $L$ssse3_00_47 + +ALIGN 16 +$L$ssse3_00_47: + sub rbp,-128 + ror r13d,14 + movdqa xmm4,xmm1 + mov eax,r14d + mov r12d,r9d + movdqa xmm7,xmm3 + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax +DB 102,15,58,15,224,4 + and r12d,r8d + xor r13d,r8d +DB 102,15,58,15,250,4 + add r11d,DWORD[rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,ebx + add r11d,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,eax + add r11d,r13d + xor edi,ebx + paddd xmm0,xmm7 + ror r14d,2 + add edx,r11d + psrld xmm6,7 + add r11d,edi + mov r13d,edx + pshufd xmm7,xmm3,250 + add r14d,r11d + ror r13d,14 + pslld xmm5,14 + mov r11d,r14d + mov r12d,r8d + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + psrld xmm6,11 + xor r14d,r11d + pxor xmm4,xmm5 + and r12d,edx + xor r13d,edx + pslld xmm5,11 + add r10d,DWORD[4+rsp] + mov edi,r11d + pxor xmm4,xmm6 + xor r12d,r9d + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,eax + add r10d,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,r11d + psrld xmm7,10 + add r10d,r13d + xor r15d,eax + paddd xmm0,xmm4 + ror r14d,2 + add ecx,r10d + psrlq xmm6,17 + add r10d,r15d + mov r13d,ecx + add r14d,r10d + pxor xmm7,xmm6 + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + psrlq xmm6,2 + xor r13d,ecx + xor r12d,r8d + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,r10d + and r12d,ecx + pshufd xmm7,xmm7,128 + xor r13d,ecx + add r9d,DWORD[8+rsp] + mov r15d,r10d + psrldq xmm7,8 + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + paddd xmm0,xmm7 + and edi,r15d + xor r14d,r10d + add r9d,r13d + pshufd xmm7,xmm0,80 + xor edi,r11d + ror r14d,2 + add ebx,r9d + movdqa xmm6,xmm7 + add r9d,edi + mov r13d,ebx + psrld xmm7,10 + add r14d,r9d + ror r13d,14 + psrlq xmm6,17 + mov r9d,r14d + mov r12d,ecx + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + psrlq xmm6,2 + and r12d,ebx + xor r13d,ebx + add r8d,DWORD[12+rsp] + pxor xmm7,xmm6 + mov edi,r9d + xor r12d,edx + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,r10d + add r8d,r12d + movdqa xmm6,XMMWORD[rbp] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + paddd xmm0,xmm7 + ror r14d,2 + add eax,r8d + add r8d,r15d + paddd xmm6,xmm0 + mov r13d,eax + add r14d,r8d + movdqa XMMWORD[rsp],xmm6 + ror r13d,14 + movdqa xmm4,xmm2 + mov r8d,r14d + mov r12d,ebx + movdqa xmm7,xmm0 + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d +DB 102,15,58,15,225,4 + and r12d,eax + xor r13d,eax +DB 102,15,58,15,251,4 + add edx,DWORD[16+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,r9d + add edx,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,r8d + add edx,r13d + xor edi,r9d + paddd xmm1,xmm7 + ror r14d,2 + add r11d,edx + psrld xmm6,7 + add edx,edi + mov r13d,r11d + pshufd xmm7,xmm0,250 + add r14d,edx + ror r13d,14 + pslld xmm5,14 + mov edx,r14d + mov r12d,eax + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + psrld xmm6,11 + xor r14d,edx + pxor xmm4,xmm5 + and r12d,r11d + xor r13d,r11d + pslld xmm5,11 + add ecx,DWORD[20+rsp] + mov edi,edx + pxor xmm4,xmm6 + xor r12d,ebx + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,r8d + add ecx,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,edx + psrld xmm7,10 + add ecx,r13d + xor r15d,r8d + paddd xmm1,xmm4 + ror r14d,2 + add r10d,ecx + psrlq xmm6,17 + add ecx,r15d + mov r13d,r10d + add r14d,ecx + pxor xmm7,xmm6 + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + psrlq xmm6,2 + xor r13d,r10d + xor r12d,eax + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,ecx + and r12d,r10d + pshufd xmm7,xmm7,128 + xor r13d,r10d + add ebx,DWORD[24+rsp] + mov r15d,ecx + psrldq xmm7,8 + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + paddd xmm1,xmm7 + and edi,r15d + xor r14d,ecx + add ebx,r13d + pshufd xmm7,xmm1,80 + xor edi,edx + ror r14d,2 + add r9d,ebx + movdqa xmm6,xmm7 + add ebx,edi + mov r13d,r9d + psrld xmm7,10 + add r14d,ebx + ror r13d,14 + psrlq xmm6,17 + mov ebx,r14d + mov r12d,r10d + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + psrlq xmm6,2 + and r12d,r9d + xor r13d,r9d + add eax,DWORD[28+rsp] + pxor xmm7,xmm6 + mov edi,ebx + xor r12d,r11d + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,ecx + add eax,r12d + movdqa xmm6,XMMWORD[32+rbp] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,ebx + add eax,r13d + xor r15d,ecx + paddd xmm1,xmm7 + ror r14d,2 + add r8d,eax + add eax,r15d + paddd xmm6,xmm1 + mov r13d,r8d + add r14d,eax + movdqa XMMWORD[16+rsp],xmm6 + ror r13d,14 + movdqa xmm4,xmm3 + mov eax,r14d + mov r12d,r9d + movdqa xmm7,xmm1 + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax +DB 102,15,58,15,226,4 + and r12d,r8d + xor r13d,r8d +DB 102,15,58,15,248,4 + add r11d,DWORD[32+rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,ebx + add r11d,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,eax + add r11d,r13d + xor edi,ebx + paddd xmm2,xmm7 + ror r14d,2 + add edx,r11d + psrld xmm6,7 + add r11d,edi + mov r13d,edx + pshufd xmm7,xmm1,250 + add r14d,r11d + ror r13d,14 + pslld xmm5,14 + mov r11d,r14d + mov r12d,r8d + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + psrld xmm6,11 + xor r14d,r11d + pxor xmm4,xmm5 + and r12d,edx + xor r13d,edx + pslld xmm5,11 + add r10d,DWORD[36+rsp] + mov edi,r11d + pxor xmm4,xmm6 + xor r12d,r9d + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,eax + add r10d,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,r11d + psrld xmm7,10 + add r10d,r13d + xor r15d,eax + paddd xmm2,xmm4 + ror r14d,2 + add ecx,r10d + psrlq xmm6,17 + add r10d,r15d + mov r13d,ecx + add r14d,r10d + pxor xmm7,xmm6 + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + psrlq xmm6,2 + xor r13d,ecx + xor r12d,r8d + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,r10d + and r12d,ecx + pshufd xmm7,xmm7,128 + xor r13d,ecx + add r9d,DWORD[40+rsp] + mov r15d,r10d + psrldq xmm7,8 + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + paddd xmm2,xmm7 + and edi,r15d + xor r14d,r10d + add r9d,r13d + pshufd xmm7,xmm2,80 + xor edi,r11d + ror r14d,2 + add ebx,r9d + movdqa xmm6,xmm7 + add r9d,edi + mov r13d,ebx + psrld xmm7,10 + add r14d,r9d + ror r13d,14 + psrlq xmm6,17 + mov r9d,r14d + mov r12d,ecx + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + psrlq xmm6,2 + and r12d,ebx + xor r13d,ebx + add r8d,DWORD[44+rsp] + pxor xmm7,xmm6 + mov edi,r9d + xor r12d,edx + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,r10d + add r8d,r12d + movdqa xmm6,XMMWORD[64+rbp] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + paddd xmm2,xmm7 + ror r14d,2 + add eax,r8d + add r8d,r15d + paddd xmm6,xmm2 + mov r13d,eax + add r14d,r8d + movdqa XMMWORD[32+rsp],xmm6 + ror r13d,14 + movdqa xmm4,xmm0 + mov r8d,r14d + mov r12d,ebx + movdqa xmm7,xmm2 + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d +DB 102,15,58,15,227,4 + and r12d,eax + xor r13d,eax +DB 102,15,58,15,249,4 + add edx,DWORD[48+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,r9d + add edx,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,r8d + add edx,r13d + xor edi,r9d + paddd xmm3,xmm7 + ror r14d,2 + add r11d,edx + psrld xmm6,7 + add edx,edi + mov r13d,r11d + pshufd xmm7,xmm2,250 + add r14d,edx + ror r13d,14 + pslld xmm5,14 + mov edx,r14d + mov r12d,eax + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + psrld xmm6,11 + xor r14d,edx + pxor xmm4,xmm5 + and r12d,r11d + xor r13d,r11d + pslld xmm5,11 + add ecx,DWORD[52+rsp] + mov edi,edx + pxor xmm4,xmm6 + xor r12d,ebx + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,r8d + add ecx,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,edx + psrld xmm7,10 + add ecx,r13d + xor r15d,r8d + paddd xmm3,xmm4 + ror r14d,2 + add r10d,ecx + psrlq xmm6,17 + add ecx,r15d + mov r13d,r10d + add r14d,ecx + pxor xmm7,xmm6 + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + psrlq xmm6,2 + xor r13d,r10d + xor r12d,eax + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,ecx + and r12d,r10d + pshufd xmm7,xmm7,128 + xor r13d,r10d + add ebx,DWORD[56+rsp] + mov r15d,ecx + psrldq xmm7,8 + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + paddd xmm3,xmm7 + and edi,r15d + xor r14d,ecx + add ebx,r13d + pshufd xmm7,xmm3,80 + xor edi,edx + ror r14d,2 + add r9d,ebx + movdqa xmm6,xmm7 + add ebx,edi + mov r13d,r9d + psrld xmm7,10 + add r14d,ebx + ror r13d,14 + psrlq xmm6,17 + mov ebx,r14d + mov r12d,r10d + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + psrlq xmm6,2 + and r12d,r9d + xor r13d,r9d + add eax,DWORD[60+rsp] + pxor xmm7,xmm6 + mov edi,ebx + xor r12d,r11d + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,ecx + add eax,r12d + movdqa xmm6,XMMWORD[96+rbp] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,ebx + add eax,r13d + xor r15d,ecx + paddd xmm3,xmm7 + ror r14d,2 + add r8d,eax + add eax,r15d + paddd xmm6,xmm3 + mov r13d,r8d + add r14d,eax + movdqa XMMWORD[48+rsp],xmm6 + cmp BYTE[131+rbp],0 + jne NEAR $L$ssse3_00_47 + ror r13d,14 + mov eax,r14d + mov r12d,r9d + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax + and r12d,r8d + xor r13d,r8d + add r11d,DWORD[rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + xor r15d,ebx + add r11d,r12d + ror r13d,6 + and edi,r15d + xor r14d,eax + add r11d,r13d + xor edi,ebx + ror r14d,2 + add edx,r11d + add r11d,edi + mov r13d,edx + add r14d,r11d + ror r13d,14 + mov r11d,r14d + mov r12d,r8d + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + xor r14d,r11d + and r12d,edx + xor r13d,edx + add r10d,DWORD[4+rsp] + mov edi,r11d + xor r12d,r9d + ror r14d,11 + xor edi,eax + add r10d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r11d + add r10d,r13d + xor r15d,eax + ror r14d,2 + add ecx,r10d + add r10d,r15d + mov r13d,ecx + add r14d,r10d + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + xor r13d,ecx + xor r12d,r8d + ror r13d,5 + xor r14d,r10d + and r12d,ecx + xor r13d,ecx + add r9d,DWORD[8+rsp] + mov r15d,r10d + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + and edi,r15d + xor r14d,r10d + add r9d,r13d + xor edi,r11d + ror r14d,2 + add ebx,r9d + add r9d,edi + mov r13d,ebx + add r14d,r9d + ror r13d,14 + mov r9d,r14d + mov r12d,ecx + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + and r12d,ebx + xor r13d,ebx + add r8d,DWORD[12+rsp] + mov edi,r9d + xor r12d,edx + ror r14d,11 + xor edi,r10d + add r8d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + ror r14d,2 + add eax,r8d + add r8d,r15d + mov r13d,eax + add r14d,r8d + ror r13d,14 + mov r8d,r14d + mov r12d,ebx + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d + and r12d,eax + xor r13d,eax + add edx,DWORD[16+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + xor r15d,r9d + add edx,r12d + ror r13d,6 + and edi,r15d + xor r14d,r8d + add edx,r13d + xor edi,r9d + ror r14d,2 + add r11d,edx + add edx,edi + mov r13d,r11d + add r14d,edx + ror r13d,14 + mov edx,r14d + mov r12d,eax + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + xor r14d,edx + and r12d,r11d + xor r13d,r11d + add ecx,DWORD[20+rsp] + mov edi,edx + xor r12d,ebx + ror r14d,11 + xor edi,r8d + add ecx,r12d + ror r13d,6 + and r15d,edi + xor r14d,edx + add ecx,r13d + xor r15d,r8d + ror r14d,2 + add r10d,ecx + add ecx,r15d + mov r13d,r10d + add r14d,ecx + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + xor r13d,r10d + xor r12d,eax + ror r13d,5 + xor r14d,ecx + and r12d,r10d + xor r13d,r10d + add ebx,DWORD[24+rsp] + mov r15d,ecx + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + and edi,r15d + xor r14d,ecx + add ebx,r13d + xor edi,edx + ror r14d,2 + add r9d,ebx + add ebx,edi + mov r13d,r9d + add r14d,ebx + ror r13d,14 + mov ebx,r14d + mov r12d,r10d + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + and r12d,r9d + xor r13d,r9d + add eax,DWORD[28+rsp] + mov edi,ebx + xor r12d,r11d + ror r14d,11 + xor edi,ecx + add eax,r12d + ror r13d,6 + and r15d,edi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + ror r14d,2 + add r8d,eax + add eax,r15d + mov r13d,r8d + add r14d,eax + ror r13d,14 + mov eax,r14d + mov r12d,r9d + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax + and r12d,r8d + xor r13d,r8d + add r11d,DWORD[32+rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + xor r15d,ebx + add r11d,r12d + ror r13d,6 + and edi,r15d + xor r14d,eax + add r11d,r13d + xor edi,ebx + ror r14d,2 + add edx,r11d + add r11d,edi + mov r13d,edx + add r14d,r11d + ror r13d,14 + mov r11d,r14d + mov r12d,r8d + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + xor r14d,r11d + and r12d,edx + xor r13d,edx + add r10d,DWORD[36+rsp] + mov edi,r11d + xor r12d,r9d + ror r14d,11 + xor edi,eax + add r10d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r11d + add r10d,r13d + xor r15d,eax + ror r14d,2 + add ecx,r10d + add r10d,r15d + mov r13d,ecx + add r14d,r10d + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + xor r13d,ecx + xor r12d,r8d + ror r13d,5 + xor r14d,r10d + and r12d,ecx + xor r13d,ecx + add r9d,DWORD[40+rsp] + mov r15d,r10d + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + and edi,r15d + xor r14d,r10d + add r9d,r13d + xor edi,r11d + ror r14d,2 + add ebx,r9d + add r9d,edi + mov r13d,ebx + add r14d,r9d + ror r13d,14 + mov r9d,r14d + mov r12d,ecx + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + and r12d,ebx + xor r13d,ebx + add r8d,DWORD[44+rsp] + mov edi,r9d + xor r12d,edx + ror r14d,11 + xor edi,r10d + add r8d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + ror r14d,2 + add eax,r8d + add r8d,r15d + mov r13d,eax + add r14d,r8d + ror r13d,14 + mov r8d,r14d + mov r12d,ebx + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d + and r12d,eax + xor r13d,eax + add edx,DWORD[48+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + xor r15d,r9d + add edx,r12d + ror r13d,6 + and edi,r15d + xor r14d,r8d + add edx,r13d + xor edi,r9d + ror r14d,2 + add r11d,edx + add edx,edi + mov r13d,r11d + add r14d,edx + ror r13d,14 + mov edx,r14d + mov r12d,eax + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + xor r14d,edx + and r12d,r11d + xor r13d,r11d + add ecx,DWORD[52+rsp] + mov edi,edx + xor r12d,ebx + ror r14d,11 + xor edi,r8d + add ecx,r12d + ror r13d,6 + and r15d,edi + xor r14d,edx + add ecx,r13d + xor r15d,r8d + ror r14d,2 + add r10d,ecx + add ecx,r15d + mov r13d,r10d + add r14d,ecx + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + xor r13d,r10d + xor r12d,eax + ror r13d,5 + xor r14d,ecx + and r12d,r10d + xor r13d,r10d + add ebx,DWORD[56+rsp] + mov r15d,ecx + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + and edi,r15d + xor r14d,ecx + add ebx,r13d + xor edi,edx + ror r14d,2 + add r9d,ebx + add ebx,edi + mov r13d,r9d + add r14d,ebx + ror r13d,14 + mov ebx,r14d + mov r12d,r10d + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + and r12d,r9d + xor r13d,r9d + add eax,DWORD[60+rsp] + mov edi,ebx + xor r12d,r11d + ror r14d,11 + xor edi,ecx + add eax,r12d + ror r13d,6 + and r15d,edi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + ror r14d,2 + add r8d,eax + add eax,r15d + mov r13d,r8d + add r14d,eax + mov rdi,QWORD[((64+0))+rsp] + mov eax,r14d + + add eax,DWORD[rdi] + lea rsi,[64+rsi] + add ebx,DWORD[4+rdi] + add ecx,DWORD[8+rdi] + add edx,DWORD[12+rdi] + add r8d,DWORD[16+rdi] + add r9d,DWORD[20+rdi] + add r10d,DWORD[24+rdi] + add r11d,DWORD[28+rdi] + + cmp rsi,QWORD[((64+16))+rsp] + + mov DWORD[rdi],eax + mov DWORD[4+rdi],ebx + mov DWORD[8+rdi],ecx + mov DWORD[12+rdi],edx + mov DWORD[16+rdi],r8d + mov DWORD[20+rdi],r9d + mov DWORD[24+rdi],r10d + mov DWORD[28+rdi],r11d + jb NEAR $L$loop_ssse3 + + mov rsi,QWORD[88+rsp] + + movaps xmm6,XMMWORD[((64+32))+rsp] + movaps xmm7,XMMWORD[((64+48))+rsp] + movaps xmm8,XMMWORD[((64+64))+rsp] + movaps xmm9,XMMWORD[((64+80))+rsp] + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbp,QWORD[((-16))+rsi] + + mov rbx,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$epilogue_ssse3: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_sha256_block_data_order_ssse3: + +ALIGN 64 +sha256_block_data_order_avx: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_block_data_order_avx: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +$L$avx_shortcut: + mov rax,rsp + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + shl rdx,4 + sub rsp,160 + lea rdx,[rdx*4+rsi] + and rsp,-64 + mov QWORD[((64+0))+rsp],rdi + mov QWORD[((64+8))+rsp],rsi + mov QWORD[((64+16))+rsp],rdx + mov QWORD[88+rsp],rax + + movaps XMMWORD[(64+32)+rsp],xmm6 + movaps XMMWORD[(64+48)+rsp],xmm7 + movaps XMMWORD[(64+64)+rsp],xmm8 + movaps XMMWORD[(64+80)+rsp],xmm9 +$L$prologue_avx: + + vzeroupper + mov eax,DWORD[rdi] + mov ebx,DWORD[4+rdi] + mov ecx,DWORD[8+rdi] + mov edx,DWORD[12+rdi] + mov r8d,DWORD[16+rdi] + mov r9d,DWORD[20+rdi] + mov r10d,DWORD[24+rdi] + mov r11d,DWORD[28+rdi] + vmovdqa xmm8,XMMWORD[((K256+512+32))] + vmovdqa xmm9,XMMWORD[((K256+512+64))] + jmp NEAR $L$loop_avx +ALIGN 16 +$L$loop_avx: + vmovdqa xmm7,XMMWORD[((K256+512))] + vmovdqu xmm0,XMMWORD[rsi] + vmovdqu xmm1,XMMWORD[16+rsi] + vmovdqu xmm2,XMMWORD[32+rsi] + vmovdqu xmm3,XMMWORD[48+rsi] + vpshufb xmm0,xmm0,xmm7 + lea rbp,[K256] + vpshufb xmm1,xmm1,xmm7 + vpshufb xmm2,xmm2,xmm7 + vpaddd xmm4,xmm0,XMMWORD[rbp] + vpshufb xmm3,xmm3,xmm7 + vpaddd xmm5,xmm1,XMMWORD[32+rbp] + vpaddd xmm6,xmm2,XMMWORD[64+rbp] + vpaddd xmm7,xmm3,XMMWORD[96+rbp] + vmovdqa XMMWORD[rsp],xmm4 + mov r14d,eax + vmovdqa XMMWORD[16+rsp],xmm5 + mov edi,ebx + vmovdqa XMMWORD[32+rsp],xmm6 + xor edi,ecx + vmovdqa XMMWORD[48+rsp],xmm7 + mov r13d,r8d + jmp NEAR $L$avx_00_47 + +ALIGN 16 +$L$avx_00_47: + sub rbp,-128 + vpalignr xmm4,xmm1,xmm0,4 + shrd r13d,r13d,14 + mov eax,r14d + mov r12d,r9d + vpalignr xmm7,xmm3,xmm2,4 + shrd r14d,r14d,9 + xor r13d,r8d + xor r12d,r10d + vpsrld xmm6,xmm4,7 + shrd r13d,r13d,5 + xor r14d,eax + and r12d,r8d + vpaddd xmm0,xmm0,xmm7 + xor r13d,r8d + add r11d,DWORD[rsp] + mov r15d,eax + vpsrld xmm7,xmm4,3 + xor r12d,r10d + shrd r14d,r14d,11 + xor r15d,ebx + vpslld xmm5,xmm4,14 + add r11d,r12d + shrd r13d,r13d,6 + and edi,r15d + vpxor xmm4,xmm7,xmm6 + xor r14d,eax + add r11d,r13d + xor edi,ebx + vpshufd xmm7,xmm3,250 + shrd r14d,r14d,2 + add edx,r11d + add r11d,edi + vpsrld xmm6,xmm6,11 + mov r13d,edx + add r14d,r11d + shrd r13d,r13d,14 + vpxor xmm4,xmm4,xmm5 + mov r11d,r14d + mov r12d,r8d + shrd r14d,r14d,9 + vpslld xmm5,xmm5,11 + xor r13d,edx + xor r12d,r9d + shrd r13d,r13d,5 + vpxor xmm4,xmm4,xmm6 + xor r14d,r11d + and r12d,edx + xor r13d,edx + vpsrld xmm6,xmm7,10 + add r10d,DWORD[4+rsp] + mov edi,r11d + xor r12d,r9d + vpxor xmm4,xmm4,xmm5 + shrd r14d,r14d,11 + xor edi,eax + add r10d,r12d + vpsrlq xmm7,xmm7,17 + shrd r13d,r13d,6 + and r15d,edi + xor r14d,r11d + vpaddd xmm0,xmm0,xmm4 + add r10d,r13d + xor r15d,eax + shrd r14d,r14d,2 + vpxor xmm6,xmm6,xmm7 + add ecx,r10d + add r10d,r15d + mov r13d,ecx + vpsrlq xmm7,xmm7,2 + add r14d,r10d + shrd r13d,r13d,14 + mov r10d,r14d + vpxor xmm6,xmm6,xmm7 + mov r12d,edx + shrd r14d,r14d,9 + xor r13d,ecx + vpshufb xmm6,xmm6,xmm8 + xor r12d,r8d + shrd r13d,r13d,5 + xor r14d,r10d + vpaddd xmm0,xmm0,xmm6 + and r12d,ecx + xor r13d,ecx + add r9d,DWORD[8+rsp] + vpshufd xmm7,xmm0,80 + mov r15d,r10d + xor r12d,r8d + shrd r14d,r14d,11 + vpsrld xmm6,xmm7,10 + xor r15d,r11d + add r9d,r12d + shrd r13d,r13d,6 + vpsrlq xmm7,xmm7,17 + and edi,r15d + xor r14d,r10d + add r9d,r13d + vpxor xmm6,xmm6,xmm7 + xor edi,r11d + shrd r14d,r14d,2 + add ebx,r9d + vpsrlq xmm7,xmm7,2 + add r9d,edi + mov r13d,ebx + add r14d,r9d + vpxor xmm6,xmm6,xmm7 + shrd r13d,r13d,14 + mov r9d,r14d + mov r12d,ecx + vpshufb xmm6,xmm6,xmm9 + shrd r14d,r14d,9 + xor r13d,ebx + xor r12d,edx + vpaddd xmm0,xmm0,xmm6 + shrd r13d,r13d,5 + xor r14d,r9d + and r12d,ebx + vpaddd xmm6,xmm0,XMMWORD[rbp] + xor r13d,ebx + add r8d,DWORD[12+rsp] + mov edi,r9d + xor r12d,edx + shrd r14d,r14d,11 + xor edi,r10d + add r8d,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + shrd r14d,r14d,2 + add eax,r8d + add r8d,r15d + mov r13d,eax + add r14d,r8d + vmovdqa XMMWORD[rsp],xmm6 + vpalignr xmm4,xmm2,xmm1,4 + shrd r13d,r13d,14 + mov r8d,r14d + mov r12d,ebx + vpalignr xmm7,xmm0,xmm3,4 + shrd r14d,r14d,9 + xor r13d,eax + xor r12d,ecx + vpsrld xmm6,xmm4,7 + shrd r13d,r13d,5 + xor r14d,r8d + and r12d,eax + vpaddd xmm1,xmm1,xmm7 + xor r13d,eax + add edx,DWORD[16+rsp] + mov r15d,r8d + vpsrld xmm7,xmm4,3 + xor r12d,ecx + shrd r14d,r14d,11 + xor r15d,r9d + vpslld xmm5,xmm4,14 + add edx,r12d + shrd r13d,r13d,6 + and edi,r15d + vpxor xmm4,xmm7,xmm6 + xor r14d,r8d + add edx,r13d + xor edi,r9d + vpshufd xmm7,xmm0,250 + shrd r14d,r14d,2 + add r11d,edx + add edx,edi + vpsrld xmm6,xmm6,11 + mov r13d,r11d + add r14d,edx + shrd r13d,r13d,14 + vpxor xmm4,xmm4,xmm5 + mov edx,r14d + mov r12d,eax + shrd r14d,r14d,9 + vpslld xmm5,xmm5,11 + xor r13d,r11d + xor r12d,ebx + shrd r13d,r13d,5 + vpxor xmm4,xmm4,xmm6 + xor r14d,edx + and r12d,r11d + xor r13d,r11d + vpsrld xmm6,xmm7,10 + add ecx,DWORD[20+rsp] + mov edi,edx + xor r12d,ebx + vpxor xmm4,xmm4,xmm5 + shrd r14d,r14d,11 + xor edi,r8d + add ecx,r12d + vpsrlq xmm7,xmm7,17 + shrd r13d,r13d,6 + and r15d,edi + xor r14d,edx + vpaddd xmm1,xmm1,xmm4 + add ecx,r13d + xor r15d,r8d + shrd r14d,r14d,2 + vpxor xmm6,xmm6,xmm7 + add r10d,ecx + add ecx,r15d + mov r13d,r10d + vpsrlq xmm7,xmm7,2 + add r14d,ecx + shrd r13d,r13d,14 + mov ecx,r14d + vpxor xmm6,xmm6,xmm7 + mov r12d,r11d + shrd r14d,r14d,9 + xor r13d,r10d + vpshufb xmm6,xmm6,xmm8 + xor r12d,eax + shrd r13d,r13d,5 + xor r14d,ecx + vpaddd xmm1,xmm1,xmm6 + and r12d,r10d + xor r13d,r10d + add ebx,DWORD[24+rsp] + vpshufd xmm7,xmm1,80 + mov r15d,ecx + xor r12d,eax + shrd r14d,r14d,11 + vpsrld xmm6,xmm7,10 + xor r15d,edx + add ebx,r12d + shrd r13d,r13d,6 + vpsrlq xmm7,xmm7,17 + and edi,r15d + xor r14d,ecx + add ebx,r13d + vpxor xmm6,xmm6,xmm7 + xor edi,edx + shrd r14d,r14d,2 + add r9d,ebx + vpsrlq xmm7,xmm7,2 + add ebx,edi + mov r13d,r9d + add r14d,ebx + vpxor xmm6,xmm6,xmm7 + shrd r13d,r13d,14 + mov ebx,r14d + mov r12d,r10d + vpshufb xmm6,xmm6,xmm9 + shrd r14d,r14d,9 + xor r13d,r9d + xor r12d,r11d + vpaddd xmm1,xmm1,xmm6 + shrd r13d,r13d,5 + xor r14d,ebx + and r12d,r9d + vpaddd xmm6,xmm1,XMMWORD[32+rbp] + xor r13d,r9d + add eax,DWORD[28+rsp] + mov edi,ebx + xor r12d,r11d + shrd r14d,r14d,11 + xor edi,ecx + add eax,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + shrd r14d,r14d,2 + add r8d,eax + add eax,r15d + mov r13d,r8d + add r14d,eax + vmovdqa XMMWORD[16+rsp],xmm6 + vpalignr xmm4,xmm3,xmm2,4 + shrd r13d,r13d,14 + mov eax,r14d + mov r12d,r9d + vpalignr xmm7,xmm1,xmm0,4 + shrd r14d,r14d,9 + xor r13d,r8d + xor r12d,r10d + vpsrld xmm6,xmm4,7 + shrd r13d,r13d,5 + xor r14d,eax + and r12d,r8d + vpaddd xmm2,xmm2,xmm7 + xor r13d,r8d + add r11d,DWORD[32+rsp] + mov r15d,eax + vpsrld xmm7,xmm4,3 + xor r12d,r10d + shrd r14d,r14d,11 + xor r15d,ebx + vpslld xmm5,xmm4,14 + add r11d,r12d + shrd r13d,r13d,6 + and edi,r15d + vpxor xmm4,xmm7,xmm6 + xor r14d,eax + add r11d,r13d + xor edi,ebx + vpshufd xmm7,xmm1,250 + shrd r14d,r14d,2 + add edx,r11d + add r11d,edi + vpsrld xmm6,xmm6,11 + mov r13d,edx + add r14d,r11d + shrd r13d,r13d,14 + vpxor xmm4,xmm4,xmm5 + mov r11d,r14d + mov r12d,r8d + shrd r14d,r14d,9 + vpslld xmm5,xmm5,11 + xor r13d,edx + xor r12d,r9d + shrd r13d,r13d,5 + vpxor xmm4,xmm4,xmm6 + xor r14d,r11d + and r12d,edx + xor r13d,edx + vpsrld xmm6,xmm7,10 + add r10d,DWORD[36+rsp] + mov edi,r11d + xor r12d,r9d + vpxor xmm4,xmm4,xmm5 + shrd r14d,r14d,11 + xor edi,eax + add r10d,r12d + vpsrlq xmm7,xmm7,17 + shrd r13d,r13d,6 + and r15d,edi + xor r14d,r11d + vpaddd xmm2,xmm2,xmm4 + add r10d,r13d + xor r15d,eax + shrd r14d,r14d,2 + vpxor xmm6,xmm6,xmm7 + add ecx,r10d + add r10d,r15d + mov r13d,ecx + vpsrlq xmm7,xmm7,2 + add r14d,r10d + shrd r13d,r13d,14 + mov r10d,r14d + vpxor xmm6,xmm6,xmm7 + mov r12d,edx + shrd r14d,r14d,9 + xor r13d,ecx + vpshufb xmm6,xmm6,xmm8 + xor r12d,r8d + shrd r13d,r13d,5 + xor r14d,r10d + vpaddd xmm2,xmm2,xmm6 + and r12d,ecx + xor r13d,ecx + add r9d,DWORD[40+rsp] + vpshufd xmm7,xmm2,80 + mov r15d,r10d + xor r12d,r8d + shrd r14d,r14d,11 + vpsrld xmm6,xmm7,10 + xor r15d,r11d + add r9d,r12d + shrd r13d,r13d,6 + vpsrlq xmm7,xmm7,17 + and edi,r15d + xor r14d,r10d + add r9d,r13d + vpxor xmm6,xmm6,xmm7 + xor edi,r11d + shrd r14d,r14d,2 + add ebx,r9d + vpsrlq xmm7,xmm7,2 + add r9d,edi + mov r13d,ebx + add r14d,r9d + vpxor xmm6,xmm6,xmm7 + shrd r13d,r13d,14 + mov r9d,r14d + mov r12d,ecx + vpshufb xmm6,xmm6,xmm9 + shrd r14d,r14d,9 + xor r13d,ebx + xor r12d,edx + vpaddd xmm2,xmm2,xmm6 + shrd r13d,r13d,5 + xor r14d,r9d + and r12d,ebx + vpaddd xmm6,xmm2,XMMWORD[64+rbp] + xor r13d,ebx + add r8d,DWORD[44+rsp] + mov edi,r9d + xor r12d,edx + shrd r14d,r14d,11 + xor edi,r10d + add r8d,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + shrd r14d,r14d,2 + add eax,r8d + add r8d,r15d + mov r13d,eax + add r14d,r8d + vmovdqa XMMWORD[32+rsp],xmm6 + vpalignr xmm4,xmm0,xmm3,4 + shrd r13d,r13d,14 + mov r8d,r14d + mov r12d,ebx + vpalignr xmm7,xmm2,xmm1,4 + shrd r14d,r14d,9 + xor r13d,eax + xor r12d,ecx + vpsrld xmm6,xmm4,7 + shrd r13d,r13d,5 + xor r14d,r8d + and r12d,eax + vpaddd xmm3,xmm3,xmm7 + xor r13d,eax + add edx,DWORD[48+rsp] + mov r15d,r8d + vpsrld xmm7,xmm4,3 + xor r12d,ecx + shrd r14d,r14d,11 + xor r15d,r9d + vpslld xmm5,xmm4,14 + add edx,r12d + shrd r13d,r13d,6 + and edi,r15d + vpxor xmm4,xmm7,xmm6 + xor r14d,r8d + add edx,r13d + xor edi,r9d + vpshufd xmm7,xmm2,250 + shrd r14d,r14d,2 + add r11d,edx + add edx,edi + vpsrld xmm6,xmm6,11 + mov r13d,r11d + add r14d,edx + shrd r13d,r13d,14 + vpxor xmm4,xmm4,xmm5 + mov edx,r14d + mov r12d,eax + shrd r14d,r14d,9 + vpslld xmm5,xmm5,11 + xor r13d,r11d + xor r12d,ebx + shrd r13d,r13d,5 + vpxor xmm4,xmm4,xmm6 + xor r14d,edx + and r12d,r11d + xor r13d,r11d + vpsrld xmm6,xmm7,10 + add ecx,DWORD[52+rsp] + mov edi,edx + xor r12d,ebx + vpxor xmm4,xmm4,xmm5 + shrd r14d,r14d,11 + xor edi,r8d + add ecx,r12d + vpsrlq xmm7,xmm7,17 + shrd r13d,r13d,6 + and r15d,edi + xor r14d,edx + vpaddd xmm3,xmm3,xmm4 + add ecx,r13d + xor r15d,r8d + shrd r14d,r14d,2 + vpxor xmm6,xmm6,xmm7 + add r10d,ecx + add ecx,r15d + mov r13d,r10d + vpsrlq xmm7,xmm7,2 + add r14d,ecx + shrd r13d,r13d,14 + mov ecx,r14d + vpxor xmm6,xmm6,xmm7 + mov r12d,r11d + shrd r14d,r14d,9 + xor r13d,r10d + vpshufb xmm6,xmm6,xmm8 + xor r12d,eax + shrd r13d,r13d,5 + xor r14d,ecx + vpaddd xmm3,xmm3,xmm6 + and r12d,r10d + xor r13d,r10d + add ebx,DWORD[56+rsp] + vpshufd xmm7,xmm3,80 + mov r15d,ecx + xor r12d,eax + shrd r14d,r14d,11 + vpsrld xmm6,xmm7,10 + xor r15d,edx + add ebx,r12d + shrd r13d,r13d,6 + vpsrlq xmm7,xmm7,17 + and edi,r15d + xor r14d,ecx + add ebx,r13d + vpxor xmm6,xmm6,xmm7 + xor edi,edx + shrd r14d,r14d,2 + add r9d,ebx + vpsrlq xmm7,xmm7,2 + add ebx,edi + mov r13d,r9d + add r14d,ebx + vpxor xmm6,xmm6,xmm7 + shrd r13d,r13d,14 + mov ebx,r14d + mov r12d,r10d + vpshufb xmm6,xmm6,xmm9 + shrd r14d,r14d,9 + xor r13d,r9d + xor r12d,r11d + vpaddd xmm3,xmm3,xmm6 + shrd r13d,r13d,5 + xor r14d,ebx + and r12d,r9d + vpaddd xmm6,xmm3,XMMWORD[96+rbp] + xor r13d,r9d + add eax,DWORD[60+rsp] + mov edi,ebx + xor r12d,r11d + shrd r14d,r14d,11 + xor edi,ecx + add eax,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + shrd r14d,r14d,2 + add r8d,eax + add eax,r15d + mov r13d,r8d + add r14d,eax + vmovdqa XMMWORD[48+rsp],xmm6 + cmp BYTE[131+rbp],0 + jne NEAR $L$avx_00_47 + shrd r13d,r13d,14 + mov eax,r14d + mov r12d,r9d + shrd r14d,r14d,9 + xor r13d,r8d + xor r12d,r10d + shrd r13d,r13d,5 + xor r14d,eax + and r12d,r8d + xor r13d,r8d + add r11d,DWORD[rsp] + mov r15d,eax + xor r12d,r10d + shrd r14d,r14d,11 + xor r15d,ebx + add r11d,r12d + shrd r13d,r13d,6 + and edi,r15d + xor r14d,eax + add r11d,r13d + xor edi,ebx + shrd r14d,r14d,2 + add edx,r11d + add r11d,edi + mov r13d,edx + add r14d,r11d + shrd r13d,r13d,14 + mov r11d,r14d + mov r12d,r8d + shrd r14d,r14d,9 + xor r13d,edx + xor r12d,r9d + shrd r13d,r13d,5 + xor r14d,r11d + and r12d,edx + xor r13d,edx + add r10d,DWORD[4+rsp] + mov edi,r11d + xor r12d,r9d + shrd r14d,r14d,11 + xor edi,eax + add r10d,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,r11d + add r10d,r13d + xor r15d,eax + shrd r14d,r14d,2 + add ecx,r10d + add r10d,r15d + mov r13d,ecx + add r14d,r10d + shrd r13d,r13d,14 + mov r10d,r14d + mov r12d,edx + shrd r14d,r14d,9 + xor r13d,ecx + xor r12d,r8d + shrd r13d,r13d,5 + xor r14d,r10d + and r12d,ecx + xor r13d,ecx + add r9d,DWORD[8+rsp] + mov r15d,r10d + xor r12d,r8d + shrd r14d,r14d,11 + xor r15d,r11d + add r9d,r12d + shrd r13d,r13d,6 + and edi,r15d + xor r14d,r10d + add r9d,r13d + xor edi,r11d + shrd r14d,r14d,2 + add ebx,r9d + add r9d,edi + mov r13d,ebx + add r14d,r9d + shrd r13d,r13d,14 + mov r9d,r14d + mov r12d,ecx + shrd r14d,r14d,9 + xor r13d,ebx + xor r12d,edx + shrd r13d,r13d,5 + xor r14d,r9d + and r12d,ebx + xor r13d,ebx + add r8d,DWORD[12+rsp] + mov edi,r9d + xor r12d,edx + shrd r14d,r14d,11 + xor edi,r10d + add r8d,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + shrd r14d,r14d,2 + add eax,r8d + add r8d,r15d + mov r13d,eax + add r14d,r8d + shrd r13d,r13d,14 + mov r8d,r14d + mov r12d,ebx + shrd r14d,r14d,9 + xor r13d,eax + xor r12d,ecx + shrd r13d,r13d,5 + xor r14d,r8d + and r12d,eax + xor r13d,eax + add edx,DWORD[16+rsp] + mov r15d,r8d + xor r12d,ecx + shrd r14d,r14d,11 + xor r15d,r9d + add edx,r12d + shrd r13d,r13d,6 + and edi,r15d + xor r14d,r8d + add edx,r13d + xor edi,r9d + shrd r14d,r14d,2 + add r11d,edx + add edx,edi + mov r13d,r11d + add r14d,edx + shrd r13d,r13d,14 + mov edx,r14d + mov r12d,eax + shrd r14d,r14d,9 + xor r13d,r11d + xor r12d,ebx + shrd r13d,r13d,5 + xor r14d,edx + and r12d,r11d + xor r13d,r11d + add ecx,DWORD[20+rsp] + mov edi,edx + xor r12d,ebx + shrd r14d,r14d,11 + xor edi,r8d + add ecx,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,edx + add ecx,r13d + xor r15d,r8d + shrd r14d,r14d,2 + add r10d,ecx + add ecx,r15d + mov r13d,r10d + add r14d,ecx + shrd r13d,r13d,14 + mov ecx,r14d + mov r12d,r11d + shrd r14d,r14d,9 + xor r13d,r10d + xor r12d,eax + shrd r13d,r13d,5 + xor r14d,ecx + and r12d,r10d + xor r13d,r10d + add ebx,DWORD[24+rsp] + mov r15d,ecx + xor r12d,eax + shrd r14d,r14d,11 + xor r15d,edx + add ebx,r12d + shrd r13d,r13d,6 + and edi,r15d + xor r14d,ecx + add ebx,r13d + xor edi,edx + shrd r14d,r14d,2 + add r9d,ebx + add ebx,edi + mov r13d,r9d + add r14d,ebx + shrd r13d,r13d,14 + mov ebx,r14d + mov r12d,r10d + shrd r14d,r14d,9 + xor r13d,r9d + xor r12d,r11d + shrd r13d,r13d,5 + xor r14d,ebx + and r12d,r9d + xor r13d,r9d + add eax,DWORD[28+rsp] + mov edi,ebx + xor r12d,r11d + shrd r14d,r14d,11 + xor edi,ecx + add eax,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + shrd r14d,r14d,2 + add r8d,eax + add eax,r15d + mov r13d,r8d + add r14d,eax + shrd r13d,r13d,14 + mov eax,r14d + mov r12d,r9d + shrd r14d,r14d,9 + xor r13d,r8d + xor r12d,r10d + shrd r13d,r13d,5 + xor r14d,eax + and r12d,r8d + xor r13d,r8d + add r11d,DWORD[32+rsp] + mov r15d,eax + xor r12d,r10d + shrd r14d,r14d,11 + xor r15d,ebx + add r11d,r12d + shrd r13d,r13d,6 + and edi,r15d + xor r14d,eax + add r11d,r13d + xor edi,ebx + shrd r14d,r14d,2 + add edx,r11d + add r11d,edi + mov r13d,edx + add r14d,r11d + shrd r13d,r13d,14 + mov r11d,r14d + mov r12d,r8d + shrd r14d,r14d,9 + xor r13d,edx + xor r12d,r9d + shrd r13d,r13d,5 + xor r14d,r11d + and r12d,edx + xor r13d,edx + add r10d,DWORD[36+rsp] + mov edi,r11d + xor r12d,r9d + shrd r14d,r14d,11 + xor edi,eax + add r10d,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,r11d + add r10d,r13d + xor r15d,eax + shrd r14d,r14d,2 + add ecx,r10d + add r10d,r15d + mov r13d,ecx + add r14d,r10d + shrd r13d,r13d,14 + mov r10d,r14d + mov r12d,edx + shrd r14d,r14d,9 + xor r13d,ecx + xor r12d,r8d + shrd r13d,r13d,5 + xor r14d,r10d + and r12d,ecx + xor r13d,ecx + add r9d,DWORD[40+rsp] + mov r15d,r10d + xor r12d,r8d + shrd r14d,r14d,11 + xor r15d,r11d + add r9d,r12d + shrd r13d,r13d,6 + and edi,r15d + xor r14d,r10d + add r9d,r13d + xor edi,r11d + shrd r14d,r14d,2 + add ebx,r9d + add r9d,edi + mov r13d,ebx + add r14d,r9d + shrd r13d,r13d,14 + mov r9d,r14d + mov r12d,ecx + shrd r14d,r14d,9 + xor r13d,ebx + xor r12d,edx + shrd r13d,r13d,5 + xor r14d,r9d + and r12d,ebx + xor r13d,ebx + add r8d,DWORD[44+rsp] + mov edi,r9d + xor r12d,edx + shrd r14d,r14d,11 + xor edi,r10d + add r8d,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + shrd r14d,r14d,2 + add eax,r8d + add r8d,r15d + mov r13d,eax + add r14d,r8d + shrd r13d,r13d,14 + mov r8d,r14d + mov r12d,ebx + shrd r14d,r14d,9 + xor r13d,eax + xor r12d,ecx + shrd r13d,r13d,5 + xor r14d,r8d + and r12d,eax + xor r13d,eax + add edx,DWORD[48+rsp] + mov r15d,r8d + xor r12d,ecx + shrd r14d,r14d,11 + xor r15d,r9d + add edx,r12d + shrd r13d,r13d,6 + and edi,r15d + xor r14d,r8d + add edx,r13d + xor edi,r9d + shrd r14d,r14d,2 + add r11d,edx + add edx,edi + mov r13d,r11d + add r14d,edx + shrd r13d,r13d,14 + mov edx,r14d + mov r12d,eax + shrd r14d,r14d,9 + xor r13d,r11d + xor r12d,ebx + shrd r13d,r13d,5 + xor r14d,edx + and r12d,r11d + xor r13d,r11d + add ecx,DWORD[52+rsp] + mov edi,edx + xor r12d,ebx + shrd r14d,r14d,11 + xor edi,r8d + add ecx,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,edx + add ecx,r13d + xor r15d,r8d + shrd r14d,r14d,2 + add r10d,ecx + add ecx,r15d + mov r13d,r10d + add r14d,ecx + shrd r13d,r13d,14 + mov ecx,r14d + mov r12d,r11d + shrd r14d,r14d,9 + xor r13d,r10d + xor r12d,eax + shrd r13d,r13d,5 + xor r14d,ecx + and r12d,r10d + xor r13d,r10d + add ebx,DWORD[56+rsp] + mov r15d,ecx + xor r12d,eax + shrd r14d,r14d,11 + xor r15d,edx + add ebx,r12d + shrd r13d,r13d,6 + and edi,r15d + xor r14d,ecx + add ebx,r13d + xor edi,edx + shrd r14d,r14d,2 + add r9d,ebx + add ebx,edi + mov r13d,r9d + add r14d,ebx + shrd r13d,r13d,14 + mov ebx,r14d + mov r12d,r10d + shrd r14d,r14d,9 + xor r13d,r9d + xor r12d,r11d + shrd r13d,r13d,5 + xor r14d,ebx + and r12d,r9d + xor r13d,r9d + add eax,DWORD[60+rsp] + mov edi,ebx + xor r12d,r11d + shrd r14d,r14d,11 + xor edi,ecx + add eax,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + shrd r14d,r14d,2 + add r8d,eax + add eax,r15d + mov r13d,r8d + add r14d,eax + mov rdi,QWORD[((64+0))+rsp] + mov eax,r14d + + add eax,DWORD[rdi] + lea rsi,[64+rsi] + add ebx,DWORD[4+rdi] + add ecx,DWORD[8+rdi] + add edx,DWORD[12+rdi] + add r8d,DWORD[16+rdi] + add r9d,DWORD[20+rdi] + add r10d,DWORD[24+rdi] + add r11d,DWORD[28+rdi] + + cmp rsi,QWORD[((64+16))+rsp] + + mov DWORD[rdi],eax + mov DWORD[4+rdi],ebx + mov DWORD[8+rdi],ecx + mov DWORD[12+rdi],edx + mov DWORD[16+rdi],r8d + mov DWORD[20+rdi],r9d + mov DWORD[24+rdi],r10d + mov DWORD[28+rdi],r11d + jb NEAR $L$loop_avx + + mov rsi,QWORD[88+rsp] + + vzeroupper + movaps xmm6,XMMWORD[((64+32))+rsp] + movaps xmm7,XMMWORD[((64+48))+rsp] + movaps xmm8,XMMWORD[((64+64))+rsp] + movaps xmm9,XMMWORD[((64+80))+rsp] + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbp,QWORD[((-16))+rsi] + + mov rbx,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$epilogue_avx: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_sha256_block_data_order_avx: + +ALIGN 64 +sha256_block_data_order_avx2: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_block_data_order_avx2: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +$L$avx2_shortcut: + mov rax,rsp + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,608 + shl rdx,4 + and rsp,-256*4 + lea rdx,[rdx*4+rsi] + add rsp,448 + mov QWORD[((64+0))+rsp],rdi + mov QWORD[((64+8))+rsp],rsi + mov QWORD[((64+16))+rsp],rdx + mov QWORD[88+rsp],rax + + movaps XMMWORD[(64+32)+rsp],xmm6 + movaps XMMWORD[(64+48)+rsp],xmm7 + movaps XMMWORD[(64+64)+rsp],xmm8 + movaps XMMWORD[(64+80)+rsp],xmm9 +$L$prologue_avx2: + + vzeroupper + sub rsi,-16*4 + mov eax,DWORD[rdi] + mov r12,rsi + mov ebx,DWORD[4+rdi] + cmp rsi,rdx + mov ecx,DWORD[8+rdi] + cmove r12,rsp + mov edx,DWORD[12+rdi] + mov r8d,DWORD[16+rdi] + mov r9d,DWORD[20+rdi] + mov r10d,DWORD[24+rdi] + mov r11d,DWORD[28+rdi] + vmovdqa ymm8,YMMWORD[((K256+512+32))] + vmovdqa ymm9,YMMWORD[((K256+512+64))] + jmp NEAR $L$oop_avx2 +ALIGN 16 +$L$oop_avx2: + vmovdqa ymm7,YMMWORD[((K256+512))] + vmovdqu xmm0,XMMWORD[((-64+0))+rsi] + vmovdqu xmm1,XMMWORD[((-64+16))+rsi] + vmovdqu xmm2,XMMWORD[((-64+32))+rsi] + vmovdqu xmm3,XMMWORD[((-64+48))+rsi] + + vinserti128 ymm0,ymm0,XMMWORD[r12],1 + vinserti128 ymm1,ymm1,XMMWORD[16+r12],1 + vpshufb ymm0,ymm0,ymm7 + vinserti128 ymm2,ymm2,XMMWORD[32+r12],1 + vpshufb ymm1,ymm1,ymm7 + vinserti128 ymm3,ymm3,XMMWORD[48+r12],1 + + lea rbp,[K256] + vpshufb ymm2,ymm2,ymm7 + vpaddd ymm4,ymm0,YMMWORD[rbp] + vpshufb ymm3,ymm3,ymm7 + vpaddd ymm5,ymm1,YMMWORD[32+rbp] + vpaddd ymm6,ymm2,YMMWORD[64+rbp] + vpaddd ymm7,ymm3,YMMWORD[96+rbp] + vmovdqa YMMWORD[rsp],ymm4 + xor r14d,r14d + vmovdqa YMMWORD[32+rsp],ymm5 + lea rsp,[((-64))+rsp] + mov edi,ebx + vmovdqa YMMWORD[rsp],ymm6 + xor edi,ecx + vmovdqa YMMWORD[32+rsp],ymm7 + mov r12d,r9d + sub rbp,-16*2*4 + jmp NEAR $L$avx2_00_47 + +ALIGN 16 +$L$avx2_00_47: + lea rsp,[((-64))+rsp] + vpalignr ymm4,ymm1,ymm0,4 + add r11d,DWORD[((0+128))+rsp] + and r12d,r8d + rorx r13d,r8d,25 + vpalignr ymm7,ymm3,ymm2,4 + rorx r15d,r8d,11 + lea eax,[r14*1+rax] + lea r11d,[r12*1+r11] + vpsrld ymm6,ymm4,7 + andn r12d,r8d,r10d + xor r13d,r15d + rorx r14d,r8d,6 + vpaddd ymm0,ymm0,ymm7 + lea r11d,[r12*1+r11] + xor r13d,r14d + mov r15d,eax + vpsrld ymm7,ymm4,3 + rorx r12d,eax,22 + lea r11d,[r13*1+r11] + xor r15d,ebx + vpslld ymm5,ymm4,14 + rorx r14d,eax,13 + rorx r13d,eax,2 + lea edx,[r11*1+rdx] + vpxor ymm4,ymm7,ymm6 + and edi,r15d + xor r14d,r12d + xor edi,ebx + vpshufd ymm7,ymm3,250 + xor r14d,r13d + lea r11d,[rdi*1+r11] + mov r12d,r8d + vpsrld ymm6,ymm6,11 + add r10d,DWORD[((4+128))+rsp] + and r12d,edx + rorx r13d,edx,25 + vpxor ymm4,ymm4,ymm5 + rorx edi,edx,11 + lea r11d,[r14*1+r11] + lea r10d,[r12*1+r10] + vpslld ymm5,ymm5,11 + andn r12d,edx,r9d + xor r13d,edi + rorx r14d,edx,6 + vpxor ymm4,ymm4,ymm6 + lea r10d,[r12*1+r10] + xor r13d,r14d + mov edi,r11d + vpsrld ymm6,ymm7,10 + rorx r12d,r11d,22 + lea r10d,[r13*1+r10] + xor edi,eax + vpxor ymm4,ymm4,ymm5 + rorx r14d,r11d,13 + rorx r13d,r11d,2 + lea ecx,[r10*1+rcx] + vpsrlq ymm7,ymm7,17 + and r15d,edi + xor r14d,r12d + xor r15d,eax + vpaddd ymm0,ymm0,ymm4 + xor r14d,r13d + lea r10d,[r15*1+r10] + mov r12d,edx + vpxor ymm6,ymm6,ymm7 + add r9d,DWORD[((8+128))+rsp] + and r12d,ecx + rorx r13d,ecx,25 + vpsrlq ymm7,ymm7,2 + rorx r15d,ecx,11 + lea r10d,[r14*1+r10] + lea r9d,[r12*1+r9] + vpxor ymm6,ymm6,ymm7 + andn r12d,ecx,r8d + xor r13d,r15d + rorx r14d,ecx,6 + vpshufb ymm6,ymm6,ymm8 + lea r9d,[r12*1+r9] + xor r13d,r14d + mov r15d,r10d + vpaddd ymm0,ymm0,ymm6 + rorx r12d,r10d,22 + lea r9d,[r13*1+r9] + xor r15d,r11d + vpshufd ymm7,ymm0,80 + rorx r14d,r10d,13 + rorx r13d,r10d,2 + lea ebx,[r9*1+rbx] + vpsrld ymm6,ymm7,10 + and edi,r15d + xor r14d,r12d + xor edi,r11d + vpsrlq ymm7,ymm7,17 + xor r14d,r13d + lea r9d,[rdi*1+r9] + mov r12d,ecx + vpxor ymm6,ymm6,ymm7 + add r8d,DWORD[((12+128))+rsp] + and r12d,ebx + rorx r13d,ebx,25 + vpsrlq ymm7,ymm7,2 + rorx edi,ebx,11 + lea r9d,[r14*1+r9] + lea r8d,[r12*1+r8] + vpxor ymm6,ymm6,ymm7 + andn r12d,ebx,edx + xor r13d,edi + rorx r14d,ebx,6 + vpshufb ymm6,ymm6,ymm9 + lea r8d,[r12*1+r8] + xor r13d,r14d + mov edi,r9d + vpaddd ymm0,ymm0,ymm6 + rorx r12d,r9d,22 + lea r8d,[r13*1+r8] + xor edi,r10d + vpaddd ymm6,ymm0,YMMWORD[rbp] + rorx r14d,r9d,13 + rorx r13d,r9d,2 + lea eax,[r8*1+rax] + and r15d,edi + xor r14d,r12d + xor r15d,r10d + xor r14d,r13d + lea r8d,[r15*1+r8] + mov r12d,ebx + vmovdqa YMMWORD[rsp],ymm6 + vpalignr ymm4,ymm2,ymm1,4 + add edx,DWORD[((32+128))+rsp] + and r12d,eax + rorx r13d,eax,25 + vpalignr ymm7,ymm0,ymm3,4 + rorx r15d,eax,11 + lea r8d,[r14*1+r8] + lea edx,[r12*1+rdx] + vpsrld ymm6,ymm4,7 + andn r12d,eax,ecx + xor r13d,r15d + rorx r14d,eax,6 + vpaddd ymm1,ymm1,ymm7 + lea edx,[r12*1+rdx] + xor r13d,r14d + mov r15d,r8d + vpsrld ymm7,ymm4,3 + rorx r12d,r8d,22 + lea edx,[r13*1+rdx] + xor r15d,r9d + vpslld ymm5,ymm4,14 + rorx r14d,r8d,13 + rorx r13d,r8d,2 + lea r11d,[rdx*1+r11] + vpxor ymm4,ymm7,ymm6 + and edi,r15d + xor r14d,r12d + xor edi,r9d + vpshufd ymm7,ymm0,250 + xor r14d,r13d + lea edx,[rdi*1+rdx] + mov r12d,eax + vpsrld ymm6,ymm6,11 + add ecx,DWORD[((36+128))+rsp] + and r12d,r11d + rorx r13d,r11d,25 + vpxor ymm4,ymm4,ymm5 + rorx edi,r11d,11 + lea edx,[r14*1+rdx] + lea ecx,[r12*1+rcx] + vpslld ymm5,ymm5,11 + andn r12d,r11d,ebx + xor r13d,edi + rorx r14d,r11d,6 + vpxor ymm4,ymm4,ymm6 + lea ecx,[r12*1+rcx] + xor r13d,r14d + mov edi,edx + vpsrld ymm6,ymm7,10 + rorx r12d,edx,22 + lea ecx,[r13*1+rcx] + xor edi,r8d + vpxor ymm4,ymm4,ymm5 + rorx r14d,edx,13 + rorx r13d,edx,2 + lea r10d,[rcx*1+r10] + vpsrlq ymm7,ymm7,17 + and r15d,edi + xor r14d,r12d + xor r15d,r8d + vpaddd ymm1,ymm1,ymm4 + xor r14d,r13d + lea ecx,[r15*1+rcx] + mov r12d,r11d + vpxor ymm6,ymm6,ymm7 + add ebx,DWORD[((40+128))+rsp] + and r12d,r10d + rorx r13d,r10d,25 + vpsrlq ymm7,ymm7,2 + rorx r15d,r10d,11 + lea ecx,[r14*1+rcx] + lea ebx,[r12*1+rbx] + vpxor ymm6,ymm6,ymm7 + andn r12d,r10d,eax + xor r13d,r15d + rorx r14d,r10d,6 + vpshufb ymm6,ymm6,ymm8 + lea ebx,[r12*1+rbx] + xor r13d,r14d + mov r15d,ecx + vpaddd ymm1,ymm1,ymm6 + rorx r12d,ecx,22 + lea ebx,[r13*1+rbx] + xor r15d,edx + vpshufd ymm7,ymm1,80 + rorx r14d,ecx,13 + rorx r13d,ecx,2 + lea r9d,[rbx*1+r9] + vpsrld ymm6,ymm7,10 + and edi,r15d + xor r14d,r12d + xor edi,edx + vpsrlq ymm7,ymm7,17 + xor r14d,r13d + lea ebx,[rdi*1+rbx] + mov r12d,r10d + vpxor ymm6,ymm6,ymm7 + add eax,DWORD[((44+128))+rsp] + and r12d,r9d + rorx r13d,r9d,25 + vpsrlq ymm7,ymm7,2 + rorx edi,r9d,11 + lea ebx,[r14*1+rbx] + lea eax,[r12*1+rax] + vpxor ymm6,ymm6,ymm7 + andn r12d,r9d,r11d + xor r13d,edi + rorx r14d,r9d,6 + vpshufb ymm6,ymm6,ymm9 + lea eax,[r12*1+rax] + xor r13d,r14d + mov edi,ebx + vpaddd ymm1,ymm1,ymm6 + rorx r12d,ebx,22 + lea eax,[r13*1+rax] + xor edi,ecx + vpaddd ymm6,ymm1,YMMWORD[32+rbp] + rorx r14d,ebx,13 + rorx r13d,ebx,2 + lea r8d,[rax*1+r8] + and r15d,edi + xor r14d,r12d + xor r15d,ecx + xor r14d,r13d + lea eax,[r15*1+rax] + mov r12d,r9d + vmovdqa YMMWORD[32+rsp],ymm6 + lea rsp,[((-64))+rsp] + vpalignr ymm4,ymm3,ymm2,4 + add r11d,DWORD[((0+128))+rsp] + and r12d,r8d + rorx r13d,r8d,25 + vpalignr ymm7,ymm1,ymm0,4 + rorx r15d,r8d,11 + lea eax,[r14*1+rax] + lea r11d,[r12*1+r11] + vpsrld ymm6,ymm4,7 + andn r12d,r8d,r10d + xor r13d,r15d + rorx r14d,r8d,6 + vpaddd ymm2,ymm2,ymm7 + lea r11d,[r12*1+r11] + xor r13d,r14d + mov r15d,eax + vpsrld ymm7,ymm4,3 + rorx r12d,eax,22 + lea r11d,[r13*1+r11] + xor r15d,ebx + vpslld ymm5,ymm4,14 + rorx r14d,eax,13 + rorx r13d,eax,2 + lea edx,[r11*1+rdx] + vpxor ymm4,ymm7,ymm6 + and edi,r15d + xor r14d,r12d + xor edi,ebx + vpshufd ymm7,ymm1,250 + xor r14d,r13d + lea r11d,[rdi*1+r11] + mov r12d,r8d + vpsrld ymm6,ymm6,11 + add r10d,DWORD[((4+128))+rsp] + and r12d,edx + rorx r13d,edx,25 + vpxor ymm4,ymm4,ymm5 + rorx edi,edx,11 + lea r11d,[r14*1+r11] + lea r10d,[r12*1+r10] + vpslld ymm5,ymm5,11 + andn r12d,edx,r9d + xor r13d,edi + rorx r14d,edx,6 + vpxor ymm4,ymm4,ymm6 + lea r10d,[r12*1+r10] + xor r13d,r14d + mov edi,r11d + vpsrld ymm6,ymm7,10 + rorx r12d,r11d,22 + lea r10d,[r13*1+r10] + xor edi,eax + vpxor ymm4,ymm4,ymm5 + rorx r14d,r11d,13 + rorx r13d,r11d,2 + lea ecx,[r10*1+rcx] + vpsrlq ymm7,ymm7,17 + and r15d,edi + xor r14d,r12d + xor r15d,eax + vpaddd ymm2,ymm2,ymm4 + xor r14d,r13d + lea r10d,[r15*1+r10] + mov r12d,edx + vpxor ymm6,ymm6,ymm7 + add r9d,DWORD[((8+128))+rsp] + and r12d,ecx + rorx r13d,ecx,25 + vpsrlq ymm7,ymm7,2 + rorx r15d,ecx,11 + lea r10d,[r14*1+r10] + lea r9d,[r12*1+r9] + vpxor ymm6,ymm6,ymm7 + andn r12d,ecx,r8d + xor r13d,r15d + rorx r14d,ecx,6 + vpshufb ymm6,ymm6,ymm8 + lea r9d,[r12*1+r9] + xor r13d,r14d + mov r15d,r10d + vpaddd ymm2,ymm2,ymm6 + rorx r12d,r10d,22 + lea r9d,[r13*1+r9] + xor r15d,r11d + vpshufd ymm7,ymm2,80 + rorx r14d,r10d,13 + rorx r13d,r10d,2 + lea ebx,[r9*1+rbx] + vpsrld ymm6,ymm7,10 + and edi,r15d + xor r14d,r12d + xor edi,r11d + vpsrlq ymm7,ymm7,17 + xor r14d,r13d + lea r9d,[rdi*1+r9] + mov r12d,ecx + vpxor ymm6,ymm6,ymm7 + add r8d,DWORD[((12+128))+rsp] + and r12d,ebx + rorx r13d,ebx,25 + vpsrlq ymm7,ymm7,2 + rorx edi,ebx,11 + lea r9d,[r14*1+r9] + lea r8d,[r12*1+r8] + vpxor ymm6,ymm6,ymm7 + andn r12d,ebx,edx + xor r13d,edi + rorx r14d,ebx,6 + vpshufb ymm6,ymm6,ymm9 + lea r8d,[r12*1+r8] + xor r13d,r14d + mov edi,r9d + vpaddd ymm2,ymm2,ymm6 + rorx r12d,r9d,22 + lea r8d,[r13*1+r8] + xor edi,r10d + vpaddd ymm6,ymm2,YMMWORD[64+rbp] + rorx r14d,r9d,13 + rorx r13d,r9d,2 + lea eax,[r8*1+rax] + and r15d,edi + xor r14d,r12d + xor r15d,r10d + xor r14d,r13d + lea r8d,[r15*1+r8] + mov r12d,ebx + vmovdqa YMMWORD[rsp],ymm6 + vpalignr ymm4,ymm0,ymm3,4 + add edx,DWORD[((32+128))+rsp] + and r12d,eax + rorx r13d,eax,25 + vpalignr ymm7,ymm2,ymm1,4 + rorx r15d,eax,11 + lea r8d,[r14*1+r8] + lea edx,[r12*1+rdx] + vpsrld ymm6,ymm4,7 + andn r12d,eax,ecx + xor r13d,r15d + rorx r14d,eax,6 + vpaddd ymm3,ymm3,ymm7 + lea edx,[r12*1+rdx] + xor r13d,r14d + mov r15d,r8d + vpsrld ymm7,ymm4,3 + rorx r12d,r8d,22 + lea edx,[r13*1+rdx] + xor r15d,r9d + vpslld ymm5,ymm4,14 + rorx r14d,r8d,13 + rorx r13d,r8d,2 + lea r11d,[rdx*1+r11] + vpxor ymm4,ymm7,ymm6 + and edi,r15d + xor r14d,r12d + xor edi,r9d + vpshufd ymm7,ymm2,250 + xor r14d,r13d + lea edx,[rdi*1+rdx] + mov r12d,eax + vpsrld ymm6,ymm6,11 + add ecx,DWORD[((36+128))+rsp] + and r12d,r11d + rorx r13d,r11d,25 + vpxor ymm4,ymm4,ymm5 + rorx edi,r11d,11 + lea edx,[r14*1+rdx] + lea ecx,[r12*1+rcx] + vpslld ymm5,ymm5,11 + andn r12d,r11d,ebx + xor r13d,edi + rorx r14d,r11d,6 + vpxor ymm4,ymm4,ymm6 + lea ecx,[r12*1+rcx] + xor r13d,r14d + mov edi,edx + vpsrld ymm6,ymm7,10 + rorx r12d,edx,22 + lea ecx,[r13*1+rcx] + xor edi,r8d + vpxor ymm4,ymm4,ymm5 + rorx r14d,edx,13 + rorx r13d,edx,2 + lea r10d,[rcx*1+r10] + vpsrlq ymm7,ymm7,17 + and r15d,edi + xor r14d,r12d + xor r15d,r8d + vpaddd ymm3,ymm3,ymm4 + xor r14d,r13d + lea ecx,[r15*1+rcx] + mov r12d,r11d + vpxor ymm6,ymm6,ymm7 + add ebx,DWORD[((40+128))+rsp] + and r12d,r10d + rorx r13d,r10d,25 + vpsrlq ymm7,ymm7,2 + rorx r15d,r10d,11 + lea ecx,[r14*1+rcx] + lea ebx,[r12*1+rbx] + vpxor ymm6,ymm6,ymm7 + andn r12d,r10d,eax + xor r13d,r15d + rorx r14d,r10d,6 + vpshufb ymm6,ymm6,ymm8 + lea ebx,[r12*1+rbx] + xor r13d,r14d + mov r15d,ecx + vpaddd ymm3,ymm3,ymm6 + rorx r12d,ecx,22 + lea ebx,[r13*1+rbx] + xor r15d,edx + vpshufd ymm7,ymm3,80 + rorx r14d,ecx,13 + rorx r13d,ecx,2 + lea r9d,[rbx*1+r9] + vpsrld ymm6,ymm7,10 + and edi,r15d + xor r14d,r12d + xor edi,edx + vpsrlq ymm7,ymm7,17 + xor r14d,r13d + lea ebx,[rdi*1+rbx] + mov r12d,r10d + vpxor ymm6,ymm6,ymm7 + add eax,DWORD[((44+128))+rsp] + and r12d,r9d + rorx r13d,r9d,25 + vpsrlq ymm7,ymm7,2 + rorx edi,r9d,11 + lea ebx,[r14*1+rbx] + lea eax,[r12*1+rax] + vpxor ymm6,ymm6,ymm7 + andn r12d,r9d,r11d + xor r13d,edi + rorx r14d,r9d,6 + vpshufb ymm6,ymm6,ymm9 + lea eax,[r12*1+rax] + xor r13d,r14d + mov edi,ebx + vpaddd ymm3,ymm3,ymm6 + rorx r12d,ebx,22 + lea eax,[r13*1+rax] + xor edi,ecx + vpaddd ymm6,ymm3,YMMWORD[96+rbp] + rorx r14d,ebx,13 + rorx r13d,ebx,2 + lea r8d,[rax*1+r8] + and r15d,edi + xor r14d,r12d + xor r15d,ecx + xor r14d,r13d + lea eax,[r15*1+rax] + mov r12d,r9d + vmovdqa YMMWORD[32+rsp],ymm6 + lea rbp,[128+rbp] + cmp BYTE[3+rbp],0 + jne NEAR $L$avx2_00_47 + add r11d,DWORD[((0+64))+rsp] + and r12d,r8d + rorx r13d,r8d,25 + rorx r15d,r8d,11 + lea eax,[r14*1+rax] + lea r11d,[r12*1+r11] + andn r12d,r8d,r10d + xor r13d,r15d + rorx r14d,r8d,6 + lea r11d,[r12*1+r11] + xor r13d,r14d + mov r15d,eax + rorx r12d,eax,22 + lea r11d,[r13*1+r11] + xor r15d,ebx + rorx r14d,eax,13 + rorx r13d,eax,2 + lea edx,[r11*1+rdx] + and edi,r15d + xor r14d,r12d + xor edi,ebx + xor r14d,r13d + lea r11d,[rdi*1+r11] + mov r12d,r8d + add r10d,DWORD[((4+64))+rsp] + and r12d,edx + rorx r13d,edx,25 + rorx edi,edx,11 + lea r11d,[r14*1+r11] + lea r10d,[r12*1+r10] + andn r12d,edx,r9d + xor r13d,edi + rorx r14d,edx,6 + lea r10d,[r12*1+r10] + xor r13d,r14d + mov edi,r11d + rorx r12d,r11d,22 + lea r10d,[r13*1+r10] + xor edi,eax + rorx r14d,r11d,13 + rorx r13d,r11d,2 + lea ecx,[r10*1+rcx] + and r15d,edi + xor r14d,r12d + xor r15d,eax + xor r14d,r13d + lea r10d,[r15*1+r10] + mov r12d,edx + add r9d,DWORD[((8+64))+rsp] + and r12d,ecx + rorx r13d,ecx,25 + rorx r15d,ecx,11 + lea r10d,[r14*1+r10] + lea r9d,[r12*1+r9] + andn r12d,ecx,r8d + xor r13d,r15d + rorx r14d,ecx,6 + lea r9d,[r12*1+r9] + xor r13d,r14d + mov r15d,r10d + rorx r12d,r10d,22 + lea r9d,[r13*1+r9] + xor r15d,r11d + rorx r14d,r10d,13 + rorx r13d,r10d,2 + lea ebx,[r9*1+rbx] + and edi,r15d + xor r14d,r12d + xor edi,r11d + xor r14d,r13d + lea r9d,[rdi*1+r9] + mov r12d,ecx + add r8d,DWORD[((12+64))+rsp] + and r12d,ebx + rorx r13d,ebx,25 + rorx edi,ebx,11 + lea r9d,[r14*1+r9] + lea r8d,[r12*1+r8] + andn r12d,ebx,edx + xor r13d,edi + rorx r14d,ebx,6 + lea r8d,[r12*1+r8] + xor r13d,r14d + mov edi,r9d + rorx r12d,r9d,22 + lea r8d,[r13*1+r8] + xor edi,r10d + rorx r14d,r9d,13 + rorx r13d,r9d,2 + lea eax,[r8*1+rax] + and r15d,edi + xor r14d,r12d + xor r15d,r10d + xor r14d,r13d + lea r8d,[r15*1+r8] + mov r12d,ebx + add edx,DWORD[((32+64))+rsp] + and r12d,eax + rorx r13d,eax,25 + rorx r15d,eax,11 + lea r8d,[r14*1+r8] + lea edx,[r12*1+rdx] + andn r12d,eax,ecx + xor r13d,r15d + rorx r14d,eax,6 + lea edx,[r12*1+rdx] + xor r13d,r14d + mov r15d,r8d + rorx r12d,r8d,22 + lea edx,[r13*1+rdx] + xor r15d,r9d + rorx r14d,r8d,13 + rorx r13d,r8d,2 + lea r11d,[rdx*1+r11] + and edi,r15d + xor r14d,r12d + xor edi,r9d + xor r14d,r13d + lea edx,[rdi*1+rdx] + mov r12d,eax + add ecx,DWORD[((36+64))+rsp] + and r12d,r11d + rorx r13d,r11d,25 + rorx edi,r11d,11 + lea edx,[r14*1+rdx] + lea ecx,[r12*1+rcx] + andn r12d,r11d,ebx + xor r13d,edi + rorx r14d,r11d,6 + lea ecx,[r12*1+rcx] + xor r13d,r14d + mov edi,edx + rorx r12d,edx,22 + lea ecx,[r13*1+rcx] + xor edi,r8d + rorx r14d,edx,13 + rorx r13d,edx,2 + lea r10d,[rcx*1+r10] + and r15d,edi + xor r14d,r12d + xor r15d,r8d + xor r14d,r13d + lea ecx,[r15*1+rcx] + mov r12d,r11d + add ebx,DWORD[((40+64))+rsp] + and r12d,r10d + rorx r13d,r10d,25 + rorx r15d,r10d,11 + lea ecx,[r14*1+rcx] + lea ebx,[r12*1+rbx] + andn r12d,r10d,eax + xor r13d,r15d + rorx r14d,r10d,6 + lea ebx,[r12*1+rbx] + xor r13d,r14d + mov r15d,ecx + rorx r12d,ecx,22 + lea ebx,[r13*1+rbx] + xor r15d,edx + rorx r14d,ecx,13 + rorx r13d,ecx,2 + lea r9d,[rbx*1+r9] + and edi,r15d + xor r14d,r12d + xor edi,edx + xor r14d,r13d + lea ebx,[rdi*1+rbx] + mov r12d,r10d + add eax,DWORD[((44+64))+rsp] + and r12d,r9d + rorx r13d,r9d,25 + rorx edi,r9d,11 + lea ebx,[r14*1+rbx] + lea eax,[r12*1+rax] + andn r12d,r9d,r11d + xor r13d,edi + rorx r14d,r9d,6 + lea eax,[r12*1+rax] + xor r13d,r14d + mov edi,ebx + rorx r12d,ebx,22 + lea eax,[r13*1+rax] + xor edi,ecx + rorx r14d,ebx,13 + rorx r13d,ebx,2 + lea r8d,[rax*1+r8] + and r15d,edi + xor r14d,r12d + xor r15d,ecx + xor r14d,r13d + lea eax,[r15*1+rax] + mov r12d,r9d + add r11d,DWORD[rsp] + and r12d,r8d + rorx r13d,r8d,25 + rorx r15d,r8d,11 + lea eax,[r14*1+rax] + lea r11d,[r12*1+r11] + andn r12d,r8d,r10d + xor r13d,r15d + rorx r14d,r8d,6 + lea r11d,[r12*1+r11] + xor r13d,r14d + mov r15d,eax + rorx r12d,eax,22 + lea r11d,[r13*1+r11] + xor r15d,ebx + rorx r14d,eax,13 + rorx r13d,eax,2 + lea edx,[r11*1+rdx] + and edi,r15d + xor r14d,r12d + xor edi,ebx + xor r14d,r13d + lea r11d,[rdi*1+r11] + mov r12d,r8d + add r10d,DWORD[4+rsp] + and r12d,edx + rorx r13d,edx,25 + rorx edi,edx,11 + lea r11d,[r14*1+r11] + lea r10d,[r12*1+r10] + andn r12d,edx,r9d + xor r13d,edi + rorx r14d,edx,6 + lea r10d,[r12*1+r10] + xor r13d,r14d + mov edi,r11d + rorx r12d,r11d,22 + lea r10d,[r13*1+r10] + xor edi,eax + rorx r14d,r11d,13 + rorx r13d,r11d,2 + lea ecx,[r10*1+rcx] + and r15d,edi + xor r14d,r12d + xor r15d,eax + xor r14d,r13d + lea r10d,[r15*1+r10] + mov r12d,edx + add r9d,DWORD[8+rsp] + and r12d,ecx + rorx r13d,ecx,25 + rorx r15d,ecx,11 + lea r10d,[r14*1+r10] + lea r9d,[r12*1+r9] + andn r12d,ecx,r8d + xor r13d,r15d + rorx r14d,ecx,6 + lea r9d,[r12*1+r9] + xor r13d,r14d + mov r15d,r10d + rorx r12d,r10d,22 + lea r9d,[r13*1+r9] + xor r15d,r11d + rorx r14d,r10d,13 + rorx r13d,r10d,2 + lea ebx,[r9*1+rbx] + and edi,r15d + xor r14d,r12d + xor edi,r11d + xor r14d,r13d + lea r9d,[rdi*1+r9] + mov r12d,ecx + add r8d,DWORD[12+rsp] + and r12d,ebx + rorx r13d,ebx,25 + rorx edi,ebx,11 + lea r9d,[r14*1+r9] + lea r8d,[r12*1+r8] + andn r12d,ebx,edx + xor r13d,edi + rorx r14d,ebx,6 + lea r8d,[r12*1+r8] + xor r13d,r14d + mov edi,r9d + rorx r12d,r9d,22 + lea r8d,[r13*1+r8] + xor edi,r10d + rorx r14d,r9d,13 + rorx r13d,r9d,2 + lea eax,[r8*1+rax] + and r15d,edi + xor r14d,r12d + xor r15d,r10d + xor r14d,r13d + lea r8d,[r15*1+r8] + mov r12d,ebx + add edx,DWORD[32+rsp] + and r12d,eax + rorx r13d,eax,25 + rorx r15d,eax,11 + lea r8d,[r14*1+r8] + lea edx,[r12*1+rdx] + andn r12d,eax,ecx + xor r13d,r15d + rorx r14d,eax,6 + lea edx,[r12*1+rdx] + xor r13d,r14d + mov r15d,r8d + rorx r12d,r8d,22 + lea edx,[r13*1+rdx] + xor r15d,r9d + rorx r14d,r8d,13 + rorx r13d,r8d,2 + lea r11d,[rdx*1+r11] + and edi,r15d + xor r14d,r12d + xor edi,r9d + xor r14d,r13d + lea edx,[rdi*1+rdx] + mov r12d,eax + add ecx,DWORD[36+rsp] + and r12d,r11d + rorx r13d,r11d,25 + rorx edi,r11d,11 + lea edx,[r14*1+rdx] + lea ecx,[r12*1+rcx] + andn r12d,r11d,ebx + xor r13d,edi + rorx r14d,r11d,6 + lea ecx,[r12*1+rcx] + xor r13d,r14d + mov edi,edx + rorx r12d,edx,22 + lea ecx,[r13*1+rcx] + xor edi,r8d + rorx r14d,edx,13 + rorx r13d,edx,2 + lea r10d,[rcx*1+r10] + and r15d,edi + xor r14d,r12d + xor r15d,r8d + xor r14d,r13d + lea ecx,[r15*1+rcx] + mov r12d,r11d + add ebx,DWORD[40+rsp] + and r12d,r10d + rorx r13d,r10d,25 + rorx r15d,r10d,11 + lea ecx,[r14*1+rcx] + lea ebx,[r12*1+rbx] + andn r12d,r10d,eax + xor r13d,r15d + rorx r14d,r10d,6 + lea ebx,[r12*1+rbx] + xor r13d,r14d + mov r15d,ecx + rorx r12d,ecx,22 + lea ebx,[r13*1+rbx] + xor r15d,edx + rorx r14d,ecx,13 + rorx r13d,ecx,2 + lea r9d,[rbx*1+r9] + and edi,r15d + xor r14d,r12d + xor edi,edx + xor r14d,r13d + lea ebx,[rdi*1+rbx] + mov r12d,r10d + add eax,DWORD[44+rsp] + and r12d,r9d + rorx r13d,r9d,25 + rorx edi,r9d,11 + lea ebx,[r14*1+rbx] + lea eax,[r12*1+rax] + andn r12d,r9d,r11d + xor r13d,edi + rorx r14d,r9d,6 + lea eax,[r12*1+rax] + xor r13d,r14d + mov edi,ebx + rorx r12d,ebx,22 + lea eax,[r13*1+rax] + xor edi,ecx + rorx r14d,ebx,13 + rorx r13d,ebx,2 + lea r8d,[rax*1+r8] + and r15d,edi + xor r14d,r12d + xor r15d,ecx + xor r14d,r13d + lea eax,[r15*1+rax] + mov r12d,r9d + mov rdi,QWORD[512+rsp] + add eax,r14d + + lea rbp,[448+rsp] + + add eax,DWORD[rdi] + add ebx,DWORD[4+rdi] + add ecx,DWORD[8+rdi] + add edx,DWORD[12+rdi] + add r8d,DWORD[16+rdi] + add r9d,DWORD[20+rdi] + add r10d,DWORD[24+rdi] + add r11d,DWORD[28+rdi] + + mov DWORD[rdi],eax + mov DWORD[4+rdi],ebx + mov DWORD[8+rdi],ecx + mov DWORD[12+rdi],edx + mov DWORD[16+rdi],r8d + mov DWORD[20+rdi],r9d + mov DWORD[24+rdi],r10d + mov DWORD[28+rdi],r11d + + cmp rsi,QWORD[80+rbp] + je NEAR $L$done_avx2 + + xor r14d,r14d + mov edi,ebx + xor edi,ecx + mov r12d,r9d + jmp NEAR $L$ower_avx2 +ALIGN 16 +$L$ower_avx2: + add r11d,DWORD[((0+16))+rbp] + and r12d,r8d + rorx r13d,r8d,25 + rorx r15d,r8d,11 + lea eax,[r14*1+rax] + lea r11d,[r12*1+r11] + andn r12d,r8d,r10d + xor r13d,r15d + rorx r14d,r8d,6 + lea r11d,[r12*1+r11] + xor r13d,r14d + mov r15d,eax + rorx r12d,eax,22 + lea r11d,[r13*1+r11] + xor r15d,ebx + rorx r14d,eax,13 + rorx r13d,eax,2 + lea edx,[r11*1+rdx] + and edi,r15d + xor r14d,r12d + xor edi,ebx + xor r14d,r13d + lea r11d,[rdi*1+r11] + mov r12d,r8d + add r10d,DWORD[((4+16))+rbp] + and r12d,edx + rorx r13d,edx,25 + rorx edi,edx,11 + lea r11d,[r14*1+r11] + lea r10d,[r12*1+r10] + andn r12d,edx,r9d + xor r13d,edi + rorx r14d,edx,6 + lea r10d,[r12*1+r10] + xor r13d,r14d + mov edi,r11d + rorx r12d,r11d,22 + lea r10d,[r13*1+r10] + xor edi,eax + rorx r14d,r11d,13 + rorx r13d,r11d,2 + lea ecx,[r10*1+rcx] + and r15d,edi + xor r14d,r12d + xor r15d,eax + xor r14d,r13d + lea r10d,[r15*1+r10] + mov r12d,edx + add r9d,DWORD[((8+16))+rbp] + and r12d,ecx + rorx r13d,ecx,25 + rorx r15d,ecx,11 + lea r10d,[r14*1+r10] + lea r9d,[r12*1+r9] + andn r12d,ecx,r8d + xor r13d,r15d + rorx r14d,ecx,6 + lea r9d,[r12*1+r9] + xor r13d,r14d + mov r15d,r10d + rorx r12d,r10d,22 + lea r9d,[r13*1+r9] + xor r15d,r11d + rorx r14d,r10d,13 + rorx r13d,r10d,2 + lea ebx,[r9*1+rbx] + and edi,r15d + xor r14d,r12d + xor edi,r11d + xor r14d,r13d + lea r9d,[rdi*1+r9] + mov r12d,ecx + add r8d,DWORD[((12+16))+rbp] + and r12d,ebx + rorx r13d,ebx,25 + rorx edi,ebx,11 + lea r9d,[r14*1+r9] + lea r8d,[r12*1+r8] + andn r12d,ebx,edx + xor r13d,edi + rorx r14d,ebx,6 + lea r8d,[r12*1+r8] + xor r13d,r14d + mov edi,r9d + rorx r12d,r9d,22 + lea r8d,[r13*1+r8] + xor edi,r10d + rorx r14d,r9d,13 + rorx r13d,r9d,2 + lea eax,[r8*1+rax] + and r15d,edi + xor r14d,r12d + xor r15d,r10d + xor r14d,r13d + lea r8d,[r15*1+r8] + mov r12d,ebx + add edx,DWORD[((32+16))+rbp] + and r12d,eax + rorx r13d,eax,25 + rorx r15d,eax,11 + lea r8d,[r14*1+r8] + lea edx,[r12*1+rdx] + andn r12d,eax,ecx + xor r13d,r15d + rorx r14d,eax,6 + lea edx,[r12*1+rdx] + xor r13d,r14d + mov r15d,r8d + rorx r12d,r8d,22 + lea edx,[r13*1+rdx] + xor r15d,r9d + rorx r14d,r8d,13 + rorx r13d,r8d,2 + lea r11d,[rdx*1+r11] + and edi,r15d + xor r14d,r12d + xor edi,r9d + xor r14d,r13d + lea edx,[rdi*1+rdx] + mov r12d,eax + add ecx,DWORD[((36+16))+rbp] + and r12d,r11d + rorx r13d,r11d,25 + rorx edi,r11d,11 + lea edx,[r14*1+rdx] + lea ecx,[r12*1+rcx] + andn r12d,r11d,ebx + xor r13d,edi + rorx r14d,r11d,6 + lea ecx,[r12*1+rcx] + xor r13d,r14d + mov edi,edx + rorx r12d,edx,22 + lea ecx,[r13*1+rcx] + xor edi,r8d + rorx r14d,edx,13 + rorx r13d,edx,2 + lea r10d,[rcx*1+r10] + and r15d,edi + xor r14d,r12d + xor r15d,r8d + xor r14d,r13d + lea ecx,[r15*1+rcx] + mov r12d,r11d + add ebx,DWORD[((40+16))+rbp] + and r12d,r10d + rorx r13d,r10d,25 + rorx r15d,r10d,11 + lea ecx,[r14*1+rcx] + lea ebx,[r12*1+rbx] + andn r12d,r10d,eax + xor r13d,r15d + rorx r14d,r10d,6 + lea ebx,[r12*1+rbx] + xor r13d,r14d + mov r15d,ecx + rorx r12d,ecx,22 + lea ebx,[r13*1+rbx] + xor r15d,edx + rorx r14d,ecx,13 + rorx r13d,ecx,2 + lea r9d,[rbx*1+r9] + and edi,r15d + xor r14d,r12d + xor edi,edx + xor r14d,r13d + lea ebx,[rdi*1+rbx] + mov r12d,r10d + add eax,DWORD[((44+16))+rbp] + and r12d,r9d + rorx r13d,r9d,25 + rorx edi,r9d,11 + lea ebx,[r14*1+rbx] + lea eax,[r12*1+rax] + andn r12d,r9d,r11d + xor r13d,edi + rorx r14d,r9d,6 + lea eax,[r12*1+rax] + xor r13d,r14d + mov edi,ebx + rorx r12d,ebx,22 + lea eax,[r13*1+rax] + xor edi,ecx + rorx r14d,ebx,13 + rorx r13d,ebx,2 + lea r8d,[rax*1+r8] + and r15d,edi + xor r14d,r12d + xor r15d,ecx + xor r14d,r13d + lea eax,[r15*1+rax] + mov r12d,r9d + lea rbp,[((-64))+rbp] + cmp rbp,rsp + jae NEAR $L$ower_avx2 + + mov rdi,QWORD[512+rsp] + add eax,r14d + + lea rsp,[448+rsp] + + + + add eax,DWORD[rdi] + add ebx,DWORD[4+rdi] + add ecx,DWORD[8+rdi] + add edx,DWORD[12+rdi] + add r8d,DWORD[16+rdi] + add r9d,DWORD[20+rdi] + lea rsi,[128+rsi] + add r10d,DWORD[24+rdi] + mov r12,rsi + add r11d,DWORD[28+rdi] + cmp rsi,QWORD[((64+16))+rsp] + + mov DWORD[rdi],eax + cmove r12,rsp + mov DWORD[4+rdi],ebx + mov DWORD[8+rdi],ecx + mov DWORD[12+rdi],edx + mov DWORD[16+rdi],r8d + mov DWORD[20+rdi],r9d + mov DWORD[24+rdi],r10d + mov DWORD[28+rdi],r11d + + jbe NEAR $L$oop_avx2 + lea rbp,[rsp] + + + + +$L$done_avx2: + mov rsi,QWORD[88+rbp] + + vzeroupper + movaps xmm6,XMMWORD[((64+32))+rbp] + movaps xmm7,XMMWORD[((64+48))+rbp] + movaps xmm8,XMMWORD[((64+64))+rbp] + movaps xmm9,XMMWORD[((64+80))+rbp] + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbp,QWORD[((-16))+rsi] + + mov rbx,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$epilogue_avx2: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_sha256_block_data_order_avx2: +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$in_prologue + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$in_prologue + lea r10,[$L$avx2_shortcut] + cmp rbx,r10 + jb NEAR $L$not_in_avx2 + + and rax,-256*4 + add rax,448 +$L$not_in_avx2: + mov rsi,rax + mov rax,QWORD[((64+24))+rax] + + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov r15,QWORD[((-48))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + + lea r10,[$L$epilogue] + cmp rbx,r10 + jb NEAR $L$in_prologue + + lea rsi,[((64+32))+rsi] + lea rdi,[512+r8] + mov ecx,8 + DD 0xa548f3fc + +$L$in_prologue: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret + + +ALIGN 16 +shaext_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + lea r10,[$L$prologue_shaext] + cmp rbx,r10 + jb NEAR $L$in_prologue + + lea r10,[$L$epilogue_shaext] + cmp rbx,r10 + jae NEAR $L$in_prologue + + lea rsi,[((-8-80))+rax] + lea rdi,[512+r8] + mov ecx,10 + DD 0xa548f3fc + + jmp NEAR $L$in_prologue + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_sha256_block_data_order wrt ..imagebase + DD $L$SEH_end_sha256_block_data_order wrt ..imagebase + DD $L$SEH_info_sha256_block_data_order wrt ..imagebase + DD $L$SEH_begin_sha256_block_data_order_shaext wrt ..imagebase + DD $L$SEH_end_sha256_block_data_order_shaext wrt ..imagebase + DD $L$SEH_info_sha256_block_data_order_shaext wrt ..imagebase + DD $L$SEH_begin_sha256_block_data_order_ssse3 wrt ..imagebase + DD $L$SEH_end_sha256_block_data_order_ssse3 wrt ..imagebase + DD $L$SEH_info_sha256_block_data_order_ssse3 wrt ..imagebase + DD $L$SEH_begin_sha256_block_data_order_avx wrt ..imagebase + DD $L$SEH_end_sha256_block_data_order_avx wrt ..imagebase + DD $L$SEH_info_sha256_block_data_order_avx wrt ..imagebase + DD $L$SEH_begin_sha256_block_data_order_avx2 wrt ..imagebase + DD $L$SEH_end_sha256_block_data_order_avx2 wrt ..imagebase + DD $L$SEH_info_sha256_block_data_order_avx2 wrt ..imagebase +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_sha256_block_data_order: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$prologue wrt ..imagebase,$L$epilogue wrt ..imagebase +$L$SEH_info_sha256_block_data_order_shaext: +DB 9,0,0,0 + DD shaext_handler wrt ..imagebase +$L$SEH_info_sha256_block_data_order_ssse3: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$prologue_ssse3 wrt ..imagebase,$L$epilogue_ssse3 wrt ..imagebase +$L$SEH_info_sha256_block_data_order_avx: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase +$L$SEH_info_sha256_block_data_order_avx2: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase diff --git a/vere/ext/openssl/gen/windows-x86_64/crypto/sha/sha512-x86_64.asm b/vere/ext/openssl/gen/windows-x86_64/crypto/sha/sha512-x86_64.asm new file mode 100644 index 0000000..fc2269c --- /dev/null +++ b/vere/ext/openssl/gen/windows-x86_64/crypto/sha/sha512-x86_64.asm @@ -0,0 +1,5665 @@ +default rel +%define XMMWORD +%define YMMWORD +%define ZMMWORD +section .text code align=64 + + +EXTERN OPENSSL_ia32cap_P +global sha512_block_data_order + +ALIGN 16 +sha512_block_data_order: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha512_block_data_order: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + + lea r11,[OPENSSL_ia32cap_P] + mov r9d,DWORD[r11] + mov r10d,DWORD[4+r11] + mov r11d,DWORD[8+r11] + test r10d,2048 + jnz NEAR $L$xop_shortcut + and r11d,296 + cmp r11d,296 + je NEAR $L$avx2_shortcut + and r9d,1073741824 + and r10d,268435968 + or r10d,r9d + cmp r10d,1342177792 + je NEAR $L$avx_shortcut + mov rax,rsp + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + shl rdx,4 + sub rsp,16*8+4*8 + lea rdx,[rdx*8+rsi] + and rsp,-64 + mov QWORD[((128+0))+rsp],rdi + mov QWORD[((128+8))+rsp],rsi + mov QWORD[((128+16))+rsp],rdx + mov QWORD[152+rsp],rax + +$L$prologue: + + mov rax,QWORD[rdi] + mov rbx,QWORD[8+rdi] + mov rcx,QWORD[16+rdi] + mov rdx,QWORD[24+rdi] + mov r8,QWORD[32+rdi] + mov r9,QWORD[40+rdi] + mov r10,QWORD[48+rdi] + mov r11,QWORD[56+rdi] + jmp NEAR $L$loop + +ALIGN 16 +$L$loop: + mov rdi,rbx + lea rbp,[K512] + xor rdi,rcx + mov r12,QWORD[rsi] + mov r13,r8 + mov r14,rax + bswap r12 + ror r13,23 + mov r15,r9 + + xor r13,r8 + ror r14,5 + xor r15,r10 + + mov QWORD[rsp],r12 + xor r14,rax + and r15,r8 + + ror r13,4 + add r12,r11 + xor r15,r10 + + ror r14,6 + xor r13,r8 + add r12,r15 + + mov r15,rax + add r12,QWORD[rbp] + xor r14,rax + + xor r15,rbx + ror r13,14 + mov r11,rbx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r11,rdi + add rdx,r12 + add r11,r12 + + lea rbp,[8+rbp] + add r11,r14 + mov r12,QWORD[8+rsi] + mov r13,rdx + mov r14,r11 + bswap r12 + ror r13,23 + mov rdi,r8 + + xor r13,rdx + ror r14,5 + xor rdi,r9 + + mov QWORD[8+rsp],r12 + xor r14,r11 + and rdi,rdx + + ror r13,4 + add r12,r10 + xor rdi,r9 + + ror r14,6 + xor r13,rdx + add r12,rdi + + mov rdi,r11 + add r12,QWORD[rbp] + xor r14,r11 + + xor rdi,rax + ror r13,14 + mov r10,rax + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r10,r15 + add rcx,r12 + add r10,r12 + + lea rbp,[24+rbp] + add r10,r14 + mov r12,QWORD[16+rsi] + mov r13,rcx + mov r14,r10 + bswap r12 + ror r13,23 + mov r15,rdx + + xor r13,rcx + ror r14,5 + xor r15,r8 + + mov QWORD[16+rsp],r12 + xor r14,r10 + and r15,rcx + + ror r13,4 + add r12,r9 + xor r15,r8 + + ror r14,6 + xor r13,rcx + add r12,r15 + + mov r15,r10 + add r12,QWORD[rbp] + xor r14,r10 + + xor r15,r11 + ror r13,14 + mov r9,r11 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r9,rdi + add rbx,r12 + add r9,r12 + + lea rbp,[8+rbp] + add r9,r14 + mov r12,QWORD[24+rsi] + mov r13,rbx + mov r14,r9 + bswap r12 + ror r13,23 + mov rdi,rcx + + xor r13,rbx + ror r14,5 + xor rdi,rdx + + mov QWORD[24+rsp],r12 + xor r14,r9 + and rdi,rbx + + ror r13,4 + add r12,r8 + xor rdi,rdx + + ror r14,6 + xor r13,rbx + add r12,rdi + + mov rdi,r9 + add r12,QWORD[rbp] + xor r14,r9 + + xor rdi,r10 + ror r13,14 + mov r8,r10 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r8,r15 + add rax,r12 + add r8,r12 + + lea rbp,[24+rbp] + add r8,r14 + mov r12,QWORD[32+rsi] + mov r13,rax + mov r14,r8 + bswap r12 + ror r13,23 + mov r15,rbx + + xor r13,rax + ror r14,5 + xor r15,rcx + + mov QWORD[32+rsp],r12 + xor r14,r8 + and r15,rax + + ror r13,4 + add r12,rdx + xor r15,rcx + + ror r14,6 + xor r13,rax + add r12,r15 + + mov r15,r8 + add r12,QWORD[rbp] + xor r14,r8 + + xor r15,r9 + ror r13,14 + mov rdx,r9 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rdx,rdi + add r11,r12 + add rdx,r12 + + lea rbp,[8+rbp] + add rdx,r14 + mov r12,QWORD[40+rsi] + mov r13,r11 + mov r14,rdx + bswap r12 + ror r13,23 + mov rdi,rax + + xor r13,r11 + ror r14,5 + xor rdi,rbx + + mov QWORD[40+rsp],r12 + xor r14,rdx + and rdi,r11 + + ror r13,4 + add r12,rcx + xor rdi,rbx + + ror r14,6 + xor r13,r11 + add r12,rdi + + mov rdi,rdx + add r12,QWORD[rbp] + xor r14,rdx + + xor rdi,r8 + ror r13,14 + mov rcx,r8 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rcx,r15 + add r10,r12 + add rcx,r12 + + lea rbp,[24+rbp] + add rcx,r14 + mov r12,QWORD[48+rsi] + mov r13,r10 + mov r14,rcx + bswap r12 + ror r13,23 + mov r15,r11 + + xor r13,r10 + ror r14,5 + xor r15,rax + + mov QWORD[48+rsp],r12 + xor r14,rcx + and r15,r10 + + ror r13,4 + add r12,rbx + xor r15,rax + + ror r14,6 + xor r13,r10 + add r12,r15 + + mov r15,rcx + add r12,QWORD[rbp] + xor r14,rcx + + xor r15,rdx + ror r13,14 + mov rbx,rdx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rbx,rdi + add r9,r12 + add rbx,r12 + + lea rbp,[8+rbp] + add rbx,r14 + mov r12,QWORD[56+rsi] + mov r13,r9 + mov r14,rbx + bswap r12 + ror r13,23 + mov rdi,r10 + + xor r13,r9 + ror r14,5 + xor rdi,r11 + + mov QWORD[56+rsp],r12 + xor r14,rbx + and rdi,r9 + + ror r13,4 + add r12,rax + xor rdi,r11 + + ror r14,6 + xor r13,r9 + add r12,rdi + + mov rdi,rbx + add r12,QWORD[rbp] + xor r14,rbx + + xor rdi,rcx + ror r13,14 + mov rax,rcx + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rax,r15 + add r8,r12 + add rax,r12 + + lea rbp,[24+rbp] + add rax,r14 + mov r12,QWORD[64+rsi] + mov r13,r8 + mov r14,rax + bswap r12 + ror r13,23 + mov r15,r9 + + xor r13,r8 + ror r14,5 + xor r15,r10 + + mov QWORD[64+rsp],r12 + xor r14,rax + and r15,r8 + + ror r13,4 + add r12,r11 + xor r15,r10 + + ror r14,6 + xor r13,r8 + add r12,r15 + + mov r15,rax + add r12,QWORD[rbp] + xor r14,rax + + xor r15,rbx + ror r13,14 + mov r11,rbx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r11,rdi + add rdx,r12 + add r11,r12 + + lea rbp,[8+rbp] + add r11,r14 + mov r12,QWORD[72+rsi] + mov r13,rdx + mov r14,r11 + bswap r12 + ror r13,23 + mov rdi,r8 + + xor r13,rdx + ror r14,5 + xor rdi,r9 + + mov QWORD[72+rsp],r12 + xor r14,r11 + and rdi,rdx + + ror r13,4 + add r12,r10 + xor rdi,r9 + + ror r14,6 + xor r13,rdx + add r12,rdi + + mov rdi,r11 + add r12,QWORD[rbp] + xor r14,r11 + + xor rdi,rax + ror r13,14 + mov r10,rax + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r10,r15 + add rcx,r12 + add r10,r12 + + lea rbp,[24+rbp] + add r10,r14 + mov r12,QWORD[80+rsi] + mov r13,rcx + mov r14,r10 + bswap r12 + ror r13,23 + mov r15,rdx + + xor r13,rcx + ror r14,5 + xor r15,r8 + + mov QWORD[80+rsp],r12 + xor r14,r10 + and r15,rcx + + ror r13,4 + add r12,r9 + xor r15,r8 + + ror r14,6 + xor r13,rcx + add r12,r15 + + mov r15,r10 + add r12,QWORD[rbp] + xor r14,r10 + + xor r15,r11 + ror r13,14 + mov r9,r11 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r9,rdi + add rbx,r12 + add r9,r12 + + lea rbp,[8+rbp] + add r9,r14 + mov r12,QWORD[88+rsi] + mov r13,rbx + mov r14,r9 + bswap r12 + ror r13,23 + mov rdi,rcx + + xor r13,rbx + ror r14,5 + xor rdi,rdx + + mov QWORD[88+rsp],r12 + xor r14,r9 + and rdi,rbx + + ror r13,4 + add r12,r8 + xor rdi,rdx + + ror r14,6 + xor r13,rbx + add r12,rdi + + mov rdi,r9 + add r12,QWORD[rbp] + xor r14,r9 + + xor rdi,r10 + ror r13,14 + mov r8,r10 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r8,r15 + add rax,r12 + add r8,r12 + + lea rbp,[24+rbp] + add r8,r14 + mov r12,QWORD[96+rsi] + mov r13,rax + mov r14,r8 + bswap r12 + ror r13,23 + mov r15,rbx + + xor r13,rax + ror r14,5 + xor r15,rcx + + mov QWORD[96+rsp],r12 + xor r14,r8 + and r15,rax + + ror r13,4 + add r12,rdx + xor r15,rcx + + ror r14,6 + xor r13,rax + add r12,r15 + + mov r15,r8 + add r12,QWORD[rbp] + xor r14,r8 + + xor r15,r9 + ror r13,14 + mov rdx,r9 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rdx,rdi + add r11,r12 + add rdx,r12 + + lea rbp,[8+rbp] + add rdx,r14 + mov r12,QWORD[104+rsi] + mov r13,r11 + mov r14,rdx + bswap r12 + ror r13,23 + mov rdi,rax + + xor r13,r11 + ror r14,5 + xor rdi,rbx + + mov QWORD[104+rsp],r12 + xor r14,rdx + and rdi,r11 + + ror r13,4 + add r12,rcx + xor rdi,rbx + + ror r14,6 + xor r13,r11 + add r12,rdi + + mov rdi,rdx + add r12,QWORD[rbp] + xor r14,rdx + + xor rdi,r8 + ror r13,14 + mov rcx,r8 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rcx,r15 + add r10,r12 + add rcx,r12 + + lea rbp,[24+rbp] + add rcx,r14 + mov r12,QWORD[112+rsi] + mov r13,r10 + mov r14,rcx + bswap r12 + ror r13,23 + mov r15,r11 + + xor r13,r10 + ror r14,5 + xor r15,rax + + mov QWORD[112+rsp],r12 + xor r14,rcx + and r15,r10 + + ror r13,4 + add r12,rbx + xor r15,rax + + ror r14,6 + xor r13,r10 + add r12,r15 + + mov r15,rcx + add r12,QWORD[rbp] + xor r14,rcx + + xor r15,rdx + ror r13,14 + mov rbx,rdx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rbx,rdi + add r9,r12 + add rbx,r12 + + lea rbp,[8+rbp] + add rbx,r14 + mov r12,QWORD[120+rsi] + mov r13,r9 + mov r14,rbx + bswap r12 + ror r13,23 + mov rdi,r10 + + xor r13,r9 + ror r14,5 + xor rdi,r11 + + mov QWORD[120+rsp],r12 + xor r14,rbx + and rdi,r9 + + ror r13,4 + add r12,rax + xor rdi,r11 + + ror r14,6 + xor r13,r9 + add r12,rdi + + mov rdi,rbx + add r12,QWORD[rbp] + xor r14,rbx + + xor rdi,rcx + ror r13,14 + mov rax,rcx + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rax,r15 + add r8,r12 + add rax,r12 + + lea rbp,[24+rbp] + jmp NEAR $L$rounds_16_xx +ALIGN 16 +$L$rounds_16_xx: + mov r13,QWORD[8+rsp] + mov r15,QWORD[112+rsp] + + mov r12,r13 + ror r13,7 + add rax,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD[72+rsp] + + add r12,QWORD[rsp] + mov r13,r8 + add r12,r15 + mov r14,rax + ror r13,23 + mov r15,r9 + + xor r13,r8 + ror r14,5 + xor r15,r10 + + mov QWORD[rsp],r12 + xor r14,rax + and r15,r8 + + ror r13,4 + add r12,r11 + xor r15,r10 + + ror r14,6 + xor r13,r8 + add r12,r15 + + mov r15,rax + add r12,QWORD[rbp] + xor r14,rax + + xor r15,rbx + ror r13,14 + mov r11,rbx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r11,rdi + add rdx,r12 + add r11,r12 + + lea rbp,[8+rbp] + mov r13,QWORD[16+rsp] + mov rdi,QWORD[120+rsp] + + mov r12,r13 + ror r13,7 + add r11,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD[80+rsp] + + add r12,QWORD[8+rsp] + mov r13,rdx + add r12,rdi + mov r14,r11 + ror r13,23 + mov rdi,r8 + + xor r13,rdx + ror r14,5 + xor rdi,r9 + + mov QWORD[8+rsp],r12 + xor r14,r11 + and rdi,rdx + + ror r13,4 + add r12,r10 + xor rdi,r9 + + ror r14,6 + xor r13,rdx + add r12,rdi + + mov rdi,r11 + add r12,QWORD[rbp] + xor r14,r11 + + xor rdi,rax + ror r13,14 + mov r10,rax + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r10,r15 + add rcx,r12 + add r10,r12 + + lea rbp,[24+rbp] + mov r13,QWORD[24+rsp] + mov r15,QWORD[rsp] + + mov r12,r13 + ror r13,7 + add r10,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD[88+rsp] + + add r12,QWORD[16+rsp] + mov r13,rcx + add r12,r15 + mov r14,r10 + ror r13,23 + mov r15,rdx + + xor r13,rcx + ror r14,5 + xor r15,r8 + + mov QWORD[16+rsp],r12 + xor r14,r10 + and r15,rcx + + ror r13,4 + add r12,r9 + xor r15,r8 + + ror r14,6 + xor r13,rcx + add r12,r15 + + mov r15,r10 + add r12,QWORD[rbp] + xor r14,r10 + + xor r15,r11 + ror r13,14 + mov r9,r11 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r9,rdi + add rbx,r12 + add r9,r12 + + lea rbp,[8+rbp] + mov r13,QWORD[32+rsp] + mov rdi,QWORD[8+rsp] + + mov r12,r13 + ror r13,7 + add r9,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD[96+rsp] + + add r12,QWORD[24+rsp] + mov r13,rbx + add r12,rdi + mov r14,r9 + ror r13,23 + mov rdi,rcx + + xor r13,rbx + ror r14,5 + xor rdi,rdx + + mov QWORD[24+rsp],r12 + xor r14,r9 + and rdi,rbx + + ror r13,4 + add r12,r8 + xor rdi,rdx + + ror r14,6 + xor r13,rbx + add r12,rdi + + mov rdi,r9 + add r12,QWORD[rbp] + xor r14,r9 + + xor rdi,r10 + ror r13,14 + mov r8,r10 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r8,r15 + add rax,r12 + add r8,r12 + + lea rbp,[24+rbp] + mov r13,QWORD[40+rsp] + mov r15,QWORD[16+rsp] + + mov r12,r13 + ror r13,7 + add r8,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD[104+rsp] + + add r12,QWORD[32+rsp] + mov r13,rax + add r12,r15 + mov r14,r8 + ror r13,23 + mov r15,rbx + + xor r13,rax + ror r14,5 + xor r15,rcx + + mov QWORD[32+rsp],r12 + xor r14,r8 + and r15,rax + + ror r13,4 + add r12,rdx + xor r15,rcx + + ror r14,6 + xor r13,rax + add r12,r15 + + mov r15,r8 + add r12,QWORD[rbp] + xor r14,r8 + + xor r15,r9 + ror r13,14 + mov rdx,r9 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rdx,rdi + add r11,r12 + add rdx,r12 + + lea rbp,[8+rbp] + mov r13,QWORD[48+rsp] + mov rdi,QWORD[24+rsp] + + mov r12,r13 + ror r13,7 + add rdx,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD[112+rsp] + + add r12,QWORD[40+rsp] + mov r13,r11 + add r12,rdi + mov r14,rdx + ror r13,23 + mov rdi,rax + + xor r13,r11 + ror r14,5 + xor rdi,rbx + + mov QWORD[40+rsp],r12 + xor r14,rdx + and rdi,r11 + + ror r13,4 + add r12,rcx + xor rdi,rbx + + ror r14,6 + xor r13,r11 + add r12,rdi + + mov rdi,rdx + add r12,QWORD[rbp] + xor r14,rdx + + xor rdi,r8 + ror r13,14 + mov rcx,r8 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rcx,r15 + add r10,r12 + add rcx,r12 + + lea rbp,[24+rbp] + mov r13,QWORD[56+rsp] + mov r15,QWORD[32+rsp] + + mov r12,r13 + ror r13,7 + add rcx,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD[120+rsp] + + add r12,QWORD[48+rsp] + mov r13,r10 + add r12,r15 + mov r14,rcx + ror r13,23 + mov r15,r11 + + xor r13,r10 + ror r14,5 + xor r15,rax + + mov QWORD[48+rsp],r12 + xor r14,rcx + and r15,r10 + + ror r13,4 + add r12,rbx + xor r15,rax + + ror r14,6 + xor r13,r10 + add r12,r15 + + mov r15,rcx + add r12,QWORD[rbp] + xor r14,rcx + + xor r15,rdx + ror r13,14 + mov rbx,rdx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rbx,rdi + add r9,r12 + add rbx,r12 + + lea rbp,[8+rbp] + mov r13,QWORD[64+rsp] + mov rdi,QWORD[40+rsp] + + mov r12,r13 + ror r13,7 + add rbx,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD[rsp] + + add r12,QWORD[56+rsp] + mov r13,r9 + add r12,rdi + mov r14,rbx + ror r13,23 + mov rdi,r10 + + xor r13,r9 + ror r14,5 + xor rdi,r11 + + mov QWORD[56+rsp],r12 + xor r14,rbx + and rdi,r9 + + ror r13,4 + add r12,rax + xor rdi,r11 + + ror r14,6 + xor r13,r9 + add r12,rdi + + mov rdi,rbx + add r12,QWORD[rbp] + xor r14,rbx + + xor rdi,rcx + ror r13,14 + mov rax,rcx + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rax,r15 + add r8,r12 + add rax,r12 + + lea rbp,[24+rbp] + mov r13,QWORD[72+rsp] + mov r15,QWORD[48+rsp] + + mov r12,r13 + ror r13,7 + add rax,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD[8+rsp] + + add r12,QWORD[64+rsp] + mov r13,r8 + add r12,r15 + mov r14,rax + ror r13,23 + mov r15,r9 + + xor r13,r8 + ror r14,5 + xor r15,r10 + + mov QWORD[64+rsp],r12 + xor r14,rax + and r15,r8 + + ror r13,4 + add r12,r11 + xor r15,r10 + + ror r14,6 + xor r13,r8 + add r12,r15 + + mov r15,rax + add r12,QWORD[rbp] + xor r14,rax + + xor r15,rbx + ror r13,14 + mov r11,rbx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r11,rdi + add rdx,r12 + add r11,r12 + + lea rbp,[8+rbp] + mov r13,QWORD[80+rsp] + mov rdi,QWORD[56+rsp] + + mov r12,r13 + ror r13,7 + add r11,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD[16+rsp] + + add r12,QWORD[72+rsp] + mov r13,rdx + add r12,rdi + mov r14,r11 + ror r13,23 + mov rdi,r8 + + xor r13,rdx + ror r14,5 + xor rdi,r9 + + mov QWORD[72+rsp],r12 + xor r14,r11 + and rdi,rdx + + ror r13,4 + add r12,r10 + xor rdi,r9 + + ror r14,6 + xor r13,rdx + add r12,rdi + + mov rdi,r11 + add r12,QWORD[rbp] + xor r14,r11 + + xor rdi,rax + ror r13,14 + mov r10,rax + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r10,r15 + add rcx,r12 + add r10,r12 + + lea rbp,[24+rbp] + mov r13,QWORD[88+rsp] + mov r15,QWORD[64+rsp] + + mov r12,r13 + ror r13,7 + add r10,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD[24+rsp] + + add r12,QWORD[80+rsp] + mov r13,rcx + add r12,r15 + mov r14,r10 + ror r13,23 + mov r15,rdx + + xor r13,rcx + ror r14,5 + xor r15,r8 + + mov QWORD[80+rsp],r12 + xor r14,r10 + and r15,rcx + + ror r13,4 + add r12,r9 + xor r15,r8 + + ror r14,6 + xor r13,rcx + add r12,r15 + + mov r15,r10 + add r12,QWORD[rbp] + xor r14,r10 + + xor r15,r11 + ror r13,14 + mov r9,r11 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r9,rdi + add rbx,r12 + add r9,r12 + + lea rbp,[8+rbp] + mov r13,QWORD[96+rsp] + mov rdi,QWORD[72+rsp] + + mov r12,r13 + ror r13,7 + add r9,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD[32+rsp] + + add r12,QWORD[88+rsp] + mov r13,rbx + add r12,rdi + mov r14,r9 + ror r13,23 + mov rdi,rcx + + xor r13,rbx + ror r14,5 + xor rdi,rdx + + mov QWORD[88+rsp],r12 + xor r14,r9 + and rdi,rbx + + ror r13,4 + add r12,r8 + xor rdi,rdx + + ror r14,6 + xor r13,rbx + add r12,rdi + + mov rdi,r9 + add r12,QWORD[rbp] + xor r14,r9 + + xor rdi,r10 + ror r13,14 + mov r8,r10 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r8,r15 + add rax,r12 + add r8,r12 + + lea rbp,[24+rbp] + mov r13,QWORD[104+rsp] + mov r15,QWORD[80+rsp] + + mov r12,r13 + ror r13,7 + add r8,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD[40+rsp] + + add r12,QWORD[96+rsp] + mov r13,rax + add r12,r15 + mov r14,r8 + ror r13,23 + mov r15,rbx + + xor r13,rax + ror r14,5 + xor r15,rcx + + mov QWORD[96+rsp],r12 + xor r14,r8 + and r15,rax + + ror r13,4 + add r12,rdx + xor r15,rcx + + ror r14,6 + xor r13,rax + add r12,r15 + + mov r15,r8 + add r12,QWORD[rbp] + xor r14,r8 + + xor r15,r9 + ror r13,14 + mov rdx,r9 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rdx,rdi + add r11,r12 + add rdx,r12 + + lea rbp,[8+rbp] + mov r13,QWORD[112+rsp] + mov rdi,QWORD[88+rsp] + + mov r12,r13 + ror r13,7 + add rdx,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD[48+rsp] + + add r12,QWORD[104+rsp] + mov r13,r11 + add r12,rdi + mov r14,rdx + ror r13,23 + mov rdi,rax + + xor r13,r11 + ror r14,5 + xor rdi,rbx + + mov QWORD[104+rsp],r12 + xor r14,rdx + and rdi,r11 + + ror r13,4 + add r12,rcx + xor rdi,rbx + + ror r14,6 + xor r13,r11 + add r12,rdi + + mov rdi,rdx + add r12,QWORD[rbp] + xor r14,rdx + + xor rdi,r8 + ror r13,14 + mov rcx,r8 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rcx,r15 + add r10,r12 + add rcx,r12 + + lea rbp,[24+rbp] + mov r13,QWORD[120+rsp] + mov r15,QWORD[96+rsp] + + mov r12,r13 + ror r13,7 + add rcx,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD[56+rsp] + + add r12,QWORD[112+rsp] + mov r13,r10 + add r12,r15 + mov r14,rcx + ror r13,23 + mov r15,r11 + + xor r13,r10 + ror r14,5 + xor r15,rax + + mov QWORD[112+rsp],r12 + xor r14,rcx + and r15,r10 + + ror r13,4 + add r12,rbx + xor r15,rax + + ror r14,6 + xor r13,r10 + add r12,r15 + + mov r15,rcx + add r12,QWORD[rbp] + xor r14,rcx + + xor r15,rdx + ror r13,14 + mov rbx,rdx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rbx,rdi + add r9,r12 + add rbx,r12 + + lea rbp,[8+rbp] + mov r13,QWORD[rsp] + mov rdi,QWORD[104+rsp] + + mov r12,r13 + ror r13,7 + add rbx,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD[64+rsp] + + add r12,QWORD[120+rsp] + mov r13,r9 + add r12,rdi + mov r14,rbx + ror r13,23 + mov rdi,r10 + + xor r13,r9 + ror r14,5 + xor rdi,r11 + + mov QWORD[120+rsp],r12 + xor r14,rbx + and rdi,r9 + + ror r13,4 + add r12,rax + xor rdi,r11 + + ror r14,6 + xor r13,r9 + add r12,rdi + + mov rdi,rbx + add r12,QWORD[rbp] + xor r14,rbx + + xor rdi,rcx + ror r13,14 + mov rax,rcx + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rax,r15 + add r8,r12 + add rax,r12 + + lea rbp,[24+rbp] + cmp BYTE[7+rbp],0 + jnz NEAR $L$rounds_16_xx + + mov rdi,QWORD[((128+0))+rsp] + add rax,r14 + lea rsi,[128+rsi] + + add rax,QWORD[rdi] + add rbx,QWORD[8+rdi] + add rcx,QWORD[16+rdi] + add rdx,QWORD[24+rdi] + add r8,QWORD[32+rdi] + add r9,QWORD[40+rdi] + add r10,QWORD[48+rdi] + add r11,QWORD[56+rdi] + + cmp rsi,QWORD[((128+16))+rsp] + + mov QWORD[rdi],rax + mov QWORD[8+rdi],rbx + mov QWORD[16+rdi],rcx + mov QWORD[24+rdi],rdx + mov QWORD[32+rdi],r8 + mov QWORD[40+rdi],r9 + mov QWORD[48+rdi],r10 + mov QWORD[56+rdi],r11 + jb NEAR $L$loop + + mov rsi,QWORD[152+rsp] + + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbp,QWORD[((-16))+rsi] + + mov rbx,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$epilogue: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_sha512_block_data_order: +ALIGN 64 + +K512: + DQ 0x428a2f98d728ae22,0x7137449123ef65cd + DQ 0x428a2f98d728ae22,0x7137449123ef65cd + DQ 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + DQ 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + DQ 0x3956c25bf348b538,0x59f111f1b605d019 + DQ 0x3956c25bf348b538,0x59f111f1b605d019 + DQ 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + DQ 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + DQ 0xd807aa98a3030242,0x12835b0145706fbe + DQ 0xd807aa98a3030242,0x12835b0145706fbe + DQ 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + DQ 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + DQ 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + DQ 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + DQ 0x9bdc06a725c71235,0xc19bf174cf692694 + DQ 0x9bdc06a725c71235,0xc19bf174cf692694 + DQ 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + DQ 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + DQ 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + DQ 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + DQ 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + DQ 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + DQ 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + DQ 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + DQ 0x983e5152ee66dfab,0xa831c66d2db43210 + DQ 0x983e5152ee66dfab,0xa831c66d2db43210 + DQ 0xb00327c898fb213f,0xbf597fc7beef0ee4 + DQ 0xb00327c898fb213f,0xbf597fc7beef0ee4 + DQ 0xc6e00bf33da88fc2,0xd5a79147930aa725 + DQ 0xc6e00bf33da88fc2,0xd5a79147930aa725 + DQ 0x06ca6351e003826f,0x142929670a0e6e70 + DQ 0x06ca6351e003826f,0x142929670a0e6e70 + DQ 0x27b70a8546d22ffc,0x2e1b21385c26c926 + DQ 0x27b70a8546d22ffc,0x2e1b21385c26c926 + DQ 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + DQ 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + DQ 0x650a73548baf63de,0x766a0abb3c77b2a8 + DQ 0x650a73548baf63de,0x766a0abb3c77b2a8 + DQ 0x81c2c92e47edaee6,0x92722c851482353b + DQ 0x81c2c92e47edaee6,0x92722c851482353b + DQ 0xa2bfe8a14cf10364,0xa81a664bbc423001 + DQ 0xa2bfe8a14cf10364,0xa81a664bbc423001 + DQ 0xc24b8b70d0f89791,0xc76c51a30654be30 + DQ 0xc24b8b70d0f89791,0xc76c51a30654be30 + DQ 0xd192e819d6ef5218,0xd69906245565a910 + DQ 0xd192e819d6ef5218,0xd69906245565a910 + DQ 0xf40e35855771202a,0x106aa07032bbd1b8 + DQ 0xf40e35855771202a,0x106aa07032bbd1b8 + DQ 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + DQ 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + DQ 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + DQ 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + DQ 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + DQ 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + DQ 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + DQ 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + DQ 0x748f82ee5defb2fc,0x78a5636f43172f60 + DQ 0x748f82ee5defb2fc,0x78a5636f43172f60 + DQ 0x84c87814a1f0ab72,0x8cc702081a6439ec + DQ 0x84c87814a1f0ab72,0x8cc702081a6439ec + DQ 0x90befffa23631e28,0xa4506cebde82bde9 + DQ 0x90befffa23631e28,0xa4506cebde82bde9 + DQ 0xbef9a3f7b2c67915,0xc67178f2e372532b + DQ 0xbef9a3f7b2c67915,0xc67178f2e372532b + DQ 0xca273eceea26619c,0xd186b8c721c0c207 + DQ 0xca273eceea26619c,0xd186b8c721c0c207 + DQ 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + DQ 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + DQ 0x06f067aa72176fba,0x0a637dc5a2c898a6 + DQ 0x06f067aa72176fba,0x0a637dc5a2c898a6 + DQ 0x113f9804bef90dae,0x1b710b35131c471b + DQ 0x113f9804bef90dae,0x1b710b35131c471b + DQ 0x28db77f523047d84,0x32caab7b40c72493 + DQ 0x28db77f523047d84,0x32caab7b40c72493 + DQ 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + DQ 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + DQ 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + DQ 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + DQ 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + DQ 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + + DQ 0x0001020304050607,0x08090a0b0c0d0e0f + DQ 0x0001020304050607,0x08090a0b0c0d0e0f +DB 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 +DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54 +DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 +DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 +DB 111,114,103,62,0 + +ALIGN 64 +sha512_block_data_order_xop: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha512_block_data_order_xop: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +$L$xop_shortcut: + mov rax,rsp + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + shl rdx,4 + sub rsp,256 + lea rdx,[rdx*8+rsi] + and rsp,-64 + mov QWORD[((128+0))+rsp],rdi + mov QWORD[((128+8))+rsp],rsi + mov QWORD[((128+16))+rsp],rdx + mov QWORD[152+rsp],rax + + movaps XMMWORD[(128+32)+rsp],xmm6 + movaps XMMWORD[(128+48)+rsp],xmm7 + movaps XMMWORD[(128+64)+rsp],xmm8 + movaps XMMWORD[(128+80)+rsp],xmm9 + movaps XMMWORD[(128+96)+rsp],xmm10 + movaps XMMWORD[(128+112)+rsp],xmm11 +$L$prologue_xop: + + vzeroupper + mov rax,QWORD[rdi] + mov rbx,QWORD[8+rdi] + mov rcx,QWORD[16+rdi] + mov rdx,QWORD[24+rdi] + mov r8,QWORD[32+rdi] + mov r9,QWORD[40+rdi] + mov r10,QWORD[48+rdi] + mov r11,QWORD[56+rdi] + jmp NEAR $L$loop_xop +ALIGN 16 +$L$loop_xop: + vmovdqa xmm11,XMMWORD[((K512+1280))] + vmovdqu xmm0,XMMWORD[rsi] + lea rbp,[((K512+128))] + vmovdqu xmm1,XMMWORD[16+rsi] + vmovdqu xmm2,XMMWORD[32+rsi] + vpshufb xmm0,xmm0,xmm11 + vmovdqu xmm3,XMMWORD[48+rsi] + vpshufb xmm1,xmm1,xmm11 + vmovdqu xmm4,XMMWORD[64+rsi] + vpshufb xmm2,xmm2,xmm11 + vmovdqu xmm5,XMMWORD[80+rsi] + vpshufb xmm3,xmm3,xmm11 + vmovdqu xmm6,XMMWORD[96+rsi] + vpshufb xmm4,xmm4,xmm11 + vmovdqu xmm7,XMMWORD[112+rsi] + vpshufb xmm5,xmm5,xmm11 + vpaddq xmm8,xmm0,XMMWORD[((-128))+rbp] + vpshufb xmm6,xmm6,xmm11 + vpaddq xmm9,xmm1,XMMWORD[((-96))+rbp] + vpshufb xmm7,xmm7,xmm11 + vpaddq xmm10,xmm2,XMMWORD[((-64))+rbp] + vpaddq xmm11,xmm3,XMMWORD[((-32))+rbp] + vmovdqa XMMWORD[rsp],xmm8 + vpaddq xmm8,xmm4,XMMWORD[rbp] + vmovdqa XMMWORD[16+rsp],xmm9 + vpaddq xmm9,xmm5,XMMWORD[32+rbp] + vmovdqa XMMWORD[32+rsp],xmm10 + vpaddq xmm10,xmm6,XMMWORD[64+rbp] + vmovdqa XMMWORD[48+rsp],xmm11 + vpaddq xmm11,xmm7,XMMWORD[96+rbp] + vmovdqa XMMWORD[64+rsp],xmm8 + mov r14,rax + vmovdqa XMMWORD[80+rsp],xmm9 + mov rdi,rbx + vmovdqa XMMWORD[96+rsp],xmm10 + xor rdi,rcx + vmovdqa XMMWORD[112+rsp],xmm11 + mov r13,r8 + jmp NEAR $L$xop_00_47 + +ALIGN 16 +$L$xop_00_47: + add rbp,256 + vpalignr xmm8,xmm1,xmm0,8 + ror r13,23 + mov rax,r14 + vpalignr xmm11,xmm5,xmm4,8 + mov r12,r9 + ror r14,5 +DB 143,72,120,195,200,56 + xor r13,r8 + xor r12,r10 + vpsrlq xmm8,xmm8,7 + ror r13,4 + xor r14,rax + vpaddq xmm0,xmm0,xmm11 + and r12,r8 + xor r13,r8 + add r11,QWORD[rsp] + mov r15,rax +DB 143,72,120,195,209,7 + xor r12,r10 + ror r14,6 + vpxor xmm8,xmm8,xmm9 + xor r15,rbx + add r11,r12 + ror r13,14 + and rdi,r15 +DB 143,104,120,195,223,3 + xor r14,rax + add r11,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,rbx + ror r14,28 + vpsrlq xmm10,xmm7,6 + add rdx,r11 + add r11,rdi + vpaddq xmm0,xmm0,xmm8 + mov r13,rdx + add r14,r11 +DB 143,72,120,195,203,42 + ror r13,23 + mov r11,r14 + vpxor xmm11,xmm11,xmm10 + mov r12,r8 + ror r14,5 + xor r13,rdx + xor r12,r9 + vpxor xmm11,xmm11,xmm9 + ror r13,4 + xor r14,r11 + and r12,rdx + xor r13,rdx + vpaddq xmm0,xmm0,xmm11 + add r10,QWORD[8+rsp] + mov rdi,r11 + xor r12,r9 + ror r14,6 + vpaddq xmm10,xmm0,XMMWORD[((-128))+rbp] + xor rdi,rax + add r10,r12 + ror r13,14 + and r15,rdi + xor r14,r11 + add r10,r13 + xor r15,rax + ror r14,28 + add rcx,r10 + add r10,r15 + mov r13,rcx + add r14,r10 + vmovdqa XMMWORD[rsp],xmm10 + vpalignr xmm8,xmm2,xmm1,8 + ror r13,23 + mov r10,r14 + vpalignr xmm11,xmm6,xmm5,8 + mov r12,rdx + ror r14,5 +DB 143,72,120,195,200,56 + xor r13,rcx + xor r12,r8 + vpsrlq xmm8,xmm8,7 + ror r13,4 + xor r14,r10 + vpaddq xmm1,xmm1,xmm11 + and r12,rcx + xor r13,rcx + add r9,QWORD[16+rsp] + mov r15,r10 +DB 143,72,120,195,209,7 + xor r12,r8 + ror r14,6 + vpxor xmm8,xmm8,xmm9 + xor r15,r11 + add r9,r12 + ror r13,14 + and rdi,r15 +DB 143,104,120,195,216,3 + xor r14,r10 + add r9,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,r11 + ror r14,28 + vpsrlq xmm10,xmm0,6 + add rbx,r9 + add r9,rdi + vpaddq xmm1,xmm1,xmm8 + mov r13,rbx + add r14,r9 +DB 143,72,120,195,203,42 + ror r13,23 + mov r9,r14 + vpxor xmm11,xmm11,xmm10 + mov r12,rcx + ror r14,5 + xor r13,rbx + xor r12,rdx + vpxor xmm11,xmm11,xmm9 + ror r13,4 + xor r14,r9 + and r12,rbx + xor r13,rbx + vpaddq xmm1,xmm1,xmm11 + add r8,QWORD[24+rsp] + mov rdi,r9 + xor r12,rdx + ror r14,6 + vpaddq xmm10,xmm1,XMMWORD[((-96))+rbp] + xor rdi,r10 + add r8,r12 + ror r13,14 + and r15,rdi + xor r14,r9 + add r8,r13 + xor r15,r10 + ror r14,28 + add rax,r8 + add r8,r15 + mov r13,rax + add r14,r8 + vmovdqa XMMWORD[16+rsp],xmm10 + vpalignr xmm8,xmm3,xmm2,8 + ror r13,23 + mov r8,r14 + vpalignr xmm11,xmm7,xmm6,8 + mov r12,rbx + ror r14,5 +DB 143,72,120,195,200,56 + xor r13,rax + xor r12,rcx + vpsrlq xmm8,xmm8,7 + ror r13,4 + xor r14,r8 + vpaddq xmm2,xmm2,xmm11 + and r12,rax + xor r13,rax + add rdx,QWORD[32+rsp] + mov r15,r8 +DB 143,72,120,195,209,7 + xor r12,rcx + ror r14,6 + vpxor xmm8,xmm8,xmm9 + xor r15,r9 + add rdx,r12 + ror r13,14 + and rdi,r15 +DB 143,104,120,195,217,3 + xor r14,r8 + add rdx,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,r9 + ror r14,28 + vpsrlq xmm10,xmm1,6 + add r11,rdx + add rdx,rdi + vpaddq xmm2,xmm2,xmm8 + mov r13,r11 + add r14,rdx +DB 143,72,120,195,203,42 + ror r13,23 + mov rdx,r14 + vpxor xmm11,xmm11,xmm10 + mov r12,rax + ror r14,5 + xor r13,r11 + xor r12,rbx + vpxor xmm11,xmm11,xmm9 + ror r13,4 + xor r14,rdx + and r12,r11 + xor r13,r11 + vpaddq xmm2,xmm2,xmm11 + add rcx,QWORD[40+rsp] + mov rdi,rdx + xor r12,rbx + ror r14,6 + vpaddq xmm10,xmm2,XMMWORD[((-64))+rbp] + xor rdi,r8 + add rcx,r12 + ror r13,14 + and r15,rdi + xor r14,rdx + add rcx,r13 + xor r15,r8 + ror r14,28 + add r10,rcx + add rcx,r15 + mov r13,r10 + add r14,rcx + vmovdqa XMMWORD[32+rsp],xmm10 + vpalignr xmm8,xmm4,xmm3,8 + ror r13,23 + mov rcx,r14 + vpalignr xmm11,xmm0,xmm7,8 + mov r12,r11 + ror r14,5 +DB 143,72,120,195,200,56 + xor r13,r10 + xor r12,rax + vpsrlq xmm8,xmm8,7 + ror r13,4 + xor r14,rcx + vpaddq xmm3,xmm3,xmm11 + and r12,r10 + xor r13,r10 + add rbx,QWORD[48+rsp] + mov r15,rcx +DB 143,72,120,195,209,7 + xor r12,rax + ror r14,6 + vpxor xmm8,xmm8,xmm9 + xor r15,rdx + add rbx,r12 + ror r13,14 + and rdi,r15 +DB 143,104,120,195,218,3 + xor r14,rcx + add rbx,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,rdx + ror r14,28 + vpsrlq xmm10,xmm2,6 + add r9,rbx + add rbx,rdi + vpaddq xmm3,xmm3,xmm8 + mov r13,r9 + add r14,rbx +DB 143,72,120,195,203,42 + ror r13,23 + mov rbx,r14 + vpxor xmm11,xmm11,xmm10 + mov r12,r10 + ror r14,5 + xor r13,r9 + xor r12,r11 + vpxor xmm11,xmm11,xmm9 + ror r13,4 + xor r14,rbx + and r12,r9 + xor r13,r9 + vpaddq xmm3,xmm3,xmm11 + add rax,QWORD[56+rsp] + mov rdi,rbx + xor r12,r11 + ror r14,6 + vpaddq xmm10,xmm3,XMMWORD[((-32))+rbp] + xor rdi,rcx + add rax,r12 + ror r13,14 + and r15,rdi + xor r14,rbx + add rax,r13 + xor r15,rcx + ror r14,28 + add r8,rax + add rax,r15 + mov r13,r8 + add r14,rax + vmovdqa XMMWORD[48+rsp],xmm10 + vpalignr xmm8,xmm5,xmm4,8 + ror r13,23 + mov rax,r14 + vpalignr xmm11,xmm1,xmm0,8 + mov r12,r9 + ror r14,5 +DB 143,72,120,195,200,56 + xor r13,r8 + xor r12,r10 + vpsrlq xmm8,xmm8,7 + ror r13,4 + xor r14,rax + vpaddq xmm4,xmm4,xmm11 + and r12,r8 + xor r13,r8 + add r11,QWORD[64+rsp] + mov r15,rax +DB 143,72,120,195,209,7 + xor r12,r10 + ror r14,6 + vpxor xmm8,xmm8,xmm9 + xor r15,rbx + add r11,r12 + ror r13,14 + and rdi,r15 +DB 143,104,120,195,219,3 + xor r14,rax + add r11,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,rbx + ror r14,28 + vpsrlq xmm10,xmm3,6 + add rdx,r11 + add r11,rdi + vpaddq xmm4,xmm4,xmm8 + mov r13,rdx + add r14,r11 +DB 143,72,120,195,203,42 + ror r13,23 + mov r11,r14 + vpxor xmm11,xmm11,xmm10 + mov r12,r8 + ror r14,5 + xor r13,rdx + xor r12,r9 + vpxor xmm11,xmm11,xmm9 + ror r13,4 + xor r14,r11 + and r12,rdx + xor r13,rdx + vpaddq xmm4,xmm4,xmm11 + add r10,QWORD[72+rsp] + mov rdi,r11 + xor r12,r9 + ror r14,6 + vpaddq xmm10,xmm4,XMMWORD[rbp] + xor rdi,rax + add r10,r12 + ror r13,14 + and r15,rdi + xor r14,r11 + add r10,r13 + xor r15,rax + ror r14,28 + add rcx,r10 + add r10,r15 + mov r13,rcx + add r14,r10 + vmovdqa XMMWORD[64+rsp],xmm10 + vpalignr xmm8,xmm6,xmm5,8 + ror r13,23 + mov r10,r14 + vpalignr xmm11,xmm2,xmm1,8 + mov r12,rdx + ror r14,5 +DB 143,72,120,195,200,56 + xor r13,rcx + xor r12,r8 + vpsrlq xmm8,xmm8,7 + ror r13,4 + xor r14,r10 + vpaddq xmm5,xmm5,xmm11 + and r12,rcx + xor r13,rcx + add r9,QWORD[80+rsp] + mov r15,r10 +DB 143,72,120,195,209,7 + xor r12,r8 + ror r14,6 + vpxor xmm8,xmm8,xmm9 + xor r15,r11 + add r9,r12 + ror r13,14 + and rdi,r15 +DB 143,104,120,195,220,3 + xor r14,r10 + add r9,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,r11 + ror r14,28 + vpsrlq xmm10,xmm4,6 + add rbx,r9 + add r9,rdi + vpaddq xmm5,xmm5,xmm8 + mov r13,rbx + add r14,r9 +DB 143,72,120,195,203,42 + ror r13,23 + mov r9,r14 + vpxor xmm11,xmm11,xmm10 + mov r12,rcx + ror r14,5 + xor r13,rbx + xor r12,rdx + vpxor xmm11,xmm11,xmm9 + ror r13,4 + xor r14,r9 + and r12,rbx + xor r13,rbx + vpaddq xmm5,xmm5,xmm11 + add r8,QWORD[88+rsp] + mov rdi,r9 + xor r12,rdx + ror r14,6 + vpaddq xmm10,xmm5,XMMWORD[32+rbp] + xor rdi,r10 + add r8,r12 + ror r13,14 + and r15,rdi + xor r14,r9 + add r8,r13 + xor r15,r10 + ror r14,28 + add rax,r8 + add r8,r15 + mov r13,rax + add r14,r8 + vmovdqa XMMWORD[80+rsp],xmm10 + vpalignr xmm8,xmm7,xmm6,8 + ror r13,23 + mov r8,r14 + vpalignr xmm11,xmm3,xmm2,8 + mov r12,rbx + ror r14,5 +DB 143,72,120,195,200,56 + xor r13,rax + xor r12,rcx + vpsrlq xmm8,xmm8,7 + ror r13,4 + xor r14,r8 + vpaddq xmm6,xmm6,xmm11 + and r12,rax + xor r13,rax + add rdx,QWORD[96+rsp] + mov r15,r8 +DB 143,72,120,195,209,7 + xor r12,rcx + ror r14,6 + vpxor xmm8,xmm8,xmm9 + xor r15,r9 + add rdx,r12 + ror r13,14 + and rdi,r15 +DB 143,104,120,195,221,3 + xor r14,r8 + add rdx,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,r9 + ror r14,28 + vpsrlq xmm10,xmm5,6 + add r11,rdx + add rdx,rdi + vpaddq xmm6,xmm6,xmm8 + mov r13,r11 + add r14,rdx +DB 143,72,120,195,203,42 + ror r13,23 + mov rdx,r14 + vpxor xmm11,xmm11,xmm10 + mov r12,rax + ror r14,5 + xor r13,r11 + xor r12,rbx + vpxor xmm11,xmm11,xmm9 + ror r13,4 + xor r14,rdx + and r12,r11 + xor r13,r11 + vpaddq xmm6,xmm6,xmm11 + add rcx,QWORD[104+rsp] + mov rdi,rdx + xor r12,rbx + ror r14,6 + vpaddq xmm10,xmm6,XMMWORD[64+rbp] + xor rdi,r8 + add rcx,r12 + ror r13,14 + and r15,rdi + xor r14,rdx + add rcx,r13 + xor r15,r8 + ror r14,28 + add r10,rcx + add rcx,r15 + mov r13,r10 + add r14,rcx + vmovdqa XMMWORD[96+rsp],xmm10 + vpalignr xmm8,xmm0,xmm7,8 + ror r13,23 + mov rcx,r14 + vpalignr xmm11,xmm4,xmm3,8 + mov r12,r11 + ror r14,5 +DB 143,72,120,195,200,56 + xor r13,r10 + xor r12,rax + vpsrlq xmm8,xmm8,7 + ror r13,4 + xor r14,rcx + vpaddq xmm7,xmm7,xmm11 + and r12,r10 + xor r13,r10 + add rbx,QWORD[112+rsp] + mov r15,rcx +DB 143,72,120,195,209,7 + xor r12,rax + ror r14,6 + vpxor xmm8,xmm8,xmm9 + xor r15,rdx + add rbx,r12 + ror r13,14 + and rdi,r15 +DB 143,104,120,195,222,3 + xor r14,rcx + add rbx,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,rdx + ror r14,28 + vpsrlq xmm10,xmm6,6 + add r9,rbx + add rbx,rdi + vpaddq xmm7,xmm7,xmm8 + mov r13,r9 + add r14,rbx +DB 143,72,120,195,203,42 + ror r13,23 + mov rbx,r14 + vpxor xmm11,xmm11,xmm10 + mov r12,r10 + ror r14,5 + xor r13,r9 + xor r12,r11 + vpxor xmm11,xmm11,xmm9 + ror r13,4 + xor r14,rbx + and r12,r9 + xor r13,r9 + vpaddq xmm7,xmm7,xmm11 + add rax,QWORD[120+rsp] + mov rdi,rbx + xor r12,r11 + ror r14,6 + vpaddq xmm10,xmm7,XMMWORD[96+rbp] + xor rdi,rcx + add rax,r12 + ror r13,14 + and r15,rdi + xor r14,rbx + add rax,r13 + xor r15,rcx + ror r14,28 + add r8,rax + add rax,r15 + mov r13,r8 + add r14,rax + vmovdqa XMMWORD[112+rsp],xmm10 + cmp BYTE[135+rbp],0 + jne NEAR $L$xop_00_47 + ror r13,23 + mov rax,r14 + mov r12,r9 + ror r14,5 + xor r13,r8 + xor r12,r10 + ror r13,4 + xor r14,rax + and r12,r8 + xor r13,r8 + add r11,QWORD[rsp] + mov r15,rax + xor r12,r10 + ror r14,6 + xor r15,rbx + add r11,r12 + ror r13,14 + and rdi,r15 + xor r14,rax + add r11,r13 + xor rdi,rbx + ror r14,28 + add rdx,r11 + add r11,rdi + mov r13,rdx + add r14,r11 + ror r13,23 + mov r11,r14 + mov r12,r8 + ror r14,5 + xor r13,rdx + xor r12,r9 + ror r13,4 + xor r14,r11 + and r12,rdx + xor r13,rdx + add r10,QWORD[8+rsp] + mov rdi,r11 + xor r12,r9 + ror r14,6 + xor rdi,rax + add r10,r12 + ror r13,14 + and r15,rdi + xor r14,r11 + add r10,r13 + xor r15,rax + ror r14,28 + add rcx,r10 + add r10,r15 + mov r13,rcx + add r14,r10 + ror r13,23 + mov r10,r14 + mov r12,rdx + ror r14,5 + xor r13,rcx + xor r12,r8 + ror r13,4 + xor r14,r10 + and r12,rcx + xor r13,rcx + add r9,QWORD[16+rsp] + mov r15,r10 + xor r12,r8 + ror r14,6 + xor r15,r11 + add r9,r12 + ror r13,14 + and rdi,r15 + xor r14,r10 + add r9,r13 + xor rdi,r11 + ror r14,28 + add rbx,r9 + add r9,rdi + mov r13,rbx + add r14,r9 + ror r13,23 + mov r9,r14 + mov r12,rcx + ror r14,5 + xor r13,rbx + xor r12,rdx + ror r13,4 + xor r14,r9 + and r12,rbx + xor r13,rbx + add r8,QWORD[24+rsp] + mov rdi,r9 + xor r12,rdx + ror r14,6 + xor rdi,r10 + add r8,r12 + ror r13,14 + and r15,rdi + xor r14,r9 + add r8,r13 + xor r15,r10 + ror r14,28 + add rax,r8 + add r8,r15 + mov r13,rax + add r14,r8 + ror r13,23 + mov r8,r14 + mov r12,rbx + ror r14,5 + xor r13,rax + xor r12,rcx + ror r13,4 + xor r14,r8 + and r12,rax + xor r13,rax + add rdx,QWORD[32+rsp] + mov r15,r8 + xor r12,rcx + ror r14,6 + xor r15,r9 + add rdx,r12 + ror r13,14 + and rdi,r15 + xor r14,r8 + add rdx,r13 + xor rdi,r9 + ror r14,28 + add r11,rdx + add rdx,rdi + mov r13,r11 + add r14,rdx + ror r13,23 + mov rdx,r14 + mov r12,rax + ror r14,5 + xor r13,r11 + xor r12,rbx + ror r13,4 + xor r14,rdx + and r12,r11 + xor r13,r11 + add rcx,QWORD[40+rsp] + mov rdi,rdx + xor r12,rbx + ror r14,6 + xor rdi,r8 + add rcx,r12 + ror r13,14 + and r15,rdi + xor r14,rdx + add rcx,r13 + xor r15,r8 + ror r14,28 + add r10,rcx + add rcx,r15 + mov r13,r10 + add r14,rcx + ror r13,23 + mov rcx,r14 + mov r12,r11 + ror r14,5 + xor r13,r10 + xor r12,rax + ror r13,4 + xor r14,rcx + and r12,r10 + xor r13,r10 + add rbx,QWORD[48+rsp] + mov r15,rcx + xor r12,rax + ror r14,6 + xor r15,rdx + add rbx,r12 + ror r13,14 + and rdi,r15 + xor r14,rcx + add rbx,r13 + xor rdi,rdx + ror r14,28 + add r9,rbx + add rbx,rdi + mov r13,r9 + add r14,rbx + ror r13,23 + mov rbx,r14 + mov r12,r10 + ror r14,5 + xor r13,r9 + xor r12,r11 + ror r13,4 + xor r14,rbx + and r12,r9 + xor r13,r9 + add rax,QWORD[56+rsp] + mov rdi,rbx + xor r12,r11 + ror r14,6 + xor rdi,rcx + add rax,r12 + ror r13,14 + and r15,rdi + xor r14,rbx + add rax,r13 + xor r15,rcx + ror r14,28 + add r8,rax + add rax,r15 + mov r13,r8 + add r14,rax + ror r13,23 + mov rax,r14 + mov r12,r9 + ror r14,5 + xor r13,r8 + xor r12,r10 + ror r13,4 + xor r14,rax + and r12,r8 + xor r13,r8 + add r11,QWORD[64+rsp] + mov r15,rax + xor r12,r10 + ror r14,6 + xor r15,rbx + add r11,r12 + ror r13,14 + and rdi,r15 + xor r14,rax + add r11,r13 + xor rdi,rbx + ror r14,28 + add rdx,r11 + add r11,rdi + mov r13,rdx + add r14,r11 + ror r13,23 + mov r11,r14 + mov r12,r8 + ror r14,5 + xor r13,rdx + xor r12,r9 + ror r13,4 + xor r14,r11 + and r12,rdx + xor r13,rdx + add r10,QWORD[72+rsp] + mov rdi,r11 + xor r12,r9 + ror r14,6 + xor rdi,rax + add r10,r12 + ror r13,14 + and r15,rdi + xor r14,r11 + add r10,r13 + xor r15,rax + ror r14,28 + add rcx,r10 + add r10,r15 + mov r13,rcx + add r14,r10 + ror r13,23 + mov r10,r14 + mov r12,rdx + ror r14,5 + xor r13,rcx + xor r12,r8 + ror r13,4 + xor r14,r10 + and r12,rcx + xor r13,rcx + add r9,QWORD[80+rsp] + mov r15,r10 + xor r12,r8 + ror r14,6 + xor r15,r11 + add r9,r12 + ror r13,14 + and rdi,r15 + xor r14,r10 + add r9,r13 + xor rdi,r11 + ror r14,28 + add rbx,r9 + add r9,rdi + mov r13,rbx + add r14,r9 + ror r13,23 + mov r9,r14 + mov r12,rcx + ror r14,5 + xor r13,rbx + xor r12,rdx + ror r13,4 + xor r14,r9 + and r12,rbx + xor r13,rbx + add r8,QWORD[88+rsp] + mov rdi,r9 + xor r12,rdx + ror r14,6 + xor rdi,r10 + add r8,r12 + ror r13,14 + and r15,rdi + xor r14,r9 + add r8,r13 + xor r15,r10 + ror r14,28 + add rax,r8 + add r8,r15 + mov r13,rax + add r14,r8 + ror r13,23 + mov r8,r14 + mov r12,rbx + ror r14,5 + xor r13,rax + xor r12,rcx + ror r13,4 + xor r14,r8 + and r12,rax + xor r13,rax + add rdx,QWORD[96+rsp] + mov r15,r8 + xor r12,rcx + ror r14,6 + xor r15,r9 + add rdx,r12 + ror r13,14 + and rdi,r15 + xor r14,r8 + add rdx,r13 + xor rdi,r9 + ror r14,28 + add r11,rdx + add rdx,rdi + mov r13,r11 + add r14,rdx + ror r13,23 + mov rdx,r14 + mov r12,rax + ror r14,5 + xor r13,r11 + xor r12,rbx + ror r13,4 + xor r14,rdx + and r12,r11 + xor r13,r11 + add rcx,QWORD[104+rsp] + mov rdi,rdx + xor r12,rbx + ror r14,6 + xor rdi,r8 + add rcx,r12 + ror r13,14 + and r15,rdi + xor r14,rdx + add rcx,r13 + xor r15,r8 + ror r14,28 + add r10,rcx + add rcx,r15 + mov r13,r10 + add r14,rcx + ror r13,23 + mov rcx,r14 + mov r12,r11 + ror r14,5 + xor r13,r10 + xor r12,rax + ror r13,4 + xor r14,rcx + and r12,r10 + xor r13,r10 + add rbx,QWORD[112+rsp] + mov r15,rcx + xor r12,rax + ror r14,6 + xor r15,rdx + add rbx,r12 + ror r13,14 + and rdi,r15 + xor r14,rcx + add rbx,r13 + xor rdi,rdx + ror r14,28 + add r9,rbx + add rbx,rdi + mov r13,r9 + add r14,rbx + ror r13,23 + mov rbx,r14 + mov r12,r10 + ror r14,5 + xor r13,r9 + xor r12,r11 + ror r13,4 + xor r14,rbx + and r12,r9 + xor r13,r9 + add rax,QWORD[120+rsp] + mov rdi,rbx + xor r12,r11 + ror r14,6 + xor rdi,rcx + add rax,r12 + ror r13,14 + and r15,rdi + xor r14,rbx + add rax,r13 + xor r15,rcx + ror r14,28 + add r8,rax + add rax,r15 + mov r13,r8 + add r14,rax + mov rdi,QWORD[((128+0))+rsp] + mov rax,r14 + + add rax,QWORD[rdi] + lea rsi,[128+rsi] + add rbx,QWORD[8+rdi] + add rcx,QWORD[16+rdi] + add rdx,QWORD[24+rdi] + add r8,QWORD[32+rdi] + add r9,QWORD[40+rdi] + add r10,QWORD[48+rdi] + add r11,QWORD[56+rdi] + + cmp rsi,QWORD[((128+16))+rsp] + + mov QWORD[rdi],rax + mov QWORD[8+rdi],rbx + mov QWORD[16+rdi],rcx + mov QWORD[24+rdi],rdx + mov QWORD[32+rdi],r8 + mov QWORD[40+rdi],r9 + mov QWORD[48+rdi],r10 + mov QWORD[56+rdi],r11 + jb NEAR $L$loop_xop + + mov rsi,QWORD[152+rsp] + + vzeroupper + movaps xmm6,XMMWORD[((128+32))+rsp] + movaps xmm7,XMMWORD[((128+48))+rsp] + movaps xmm8,XMMWORD[((128+64))+rsp] + movaps xmm9,XMMWORD[((128+80))+rsp] + movaps xmm10,XMMWORD[((128+96))+rsp] + movaps xmm11,XMMWORD[((128+112))+rsp] + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbp,QWORD[((-16))+rsi] + + mov rbx,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$epilogue_xop: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_sha512_block_data_order_xop: + +ALIGN 64 +sha512_block_data_order_avx: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha512_block_data_order_avx: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +$L$avx_shortcut: + mov rax,rsp + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + shl rdx,4 + sub rsp,256 + lea rdx,[rdx*8+rsi] + and rsp,-64 + mov QWORD[((128+0))+rsp],rdi + mov QWORD[((128+8))+rsp],rsi + mov QWORD[((128+16))+rsp],rdx + mov QWORD[152+rsp],rax + + movaps XMMWORD[(128+32)+rsp],xmm6 + movaps XMMWORD[(128+48)+rsp],xmm7 + movaps XMMWORD[(128+64)+rsp],xmm8 + movaps XMMWORD[(128+80)+rsp],xmm9 + movaps XMMWORD[(128+96)+rsp],xmm10 + movaps XMMWORD[(128+112)+rsp],xmm11 +$L$prologue_avx: + + vzeroupper + mov rax,QWORD[rdi] + mov rbx,QWORD[8+rdi] + mov rcx,QWORD[16+rdi] + mov rdx,QWORD[24+rdi] + mov r8,QWORD[32+rdi] + mov r9,QWORD[40+rdi] + mov r10,QWORD[48+rdi] + mov r11,QWORD[56+rdi] + jmp NEAR $L$loop_avx +ALIGN 16 +$L$loop_avx: + vmovdqa xmm11,XMMWORD[((K512+1280))] + vmovdqu xmm0,XMMWORD[rsi] + lea rbp,[((K512+128))] + vmovdqu xmm1,XMMWORD[16+rsi] + vmovdqu xmm2,XMMWORD[32+rsi] + vpshufb xmm0,xmm0,xmm11 + vmovdqu xmm3,XMMWORD[48+rsi] + vpshufb xmm1,xmm1,xmm11 + vmovdqu xmm4,XMMWORD[64+rsi] + vpshufb xmm2,xmm2,xmm11 + vmovdqu xmm5,XMMWORD[80+rsi] + vpshufb xmm3,xmm3,xmm11 + vmovdqu xmm6,XMMWORD[96+rsi] + vpshufb xmm4,xmm4,xmm11 + vmovdqu xmm7,XMMWORD[112+rsi] + vpshufb xmm5,xmm5,xmm11 + vpaddq xmm8,xmm0,XMMWORD[((-128))+rbp] + vpshufb xmm6,xmm6,xmm11 + vpaddq xmm9,xmm1,XMMWORD[((-96))+rbp] + vpshufb xmm7,xmm7,xmm11 + vpaddq xmm10,xmm2,XMMWORD[((-64))+rbp] + vpaddq xmm11,xmm3,XMMWORD[((-32))+rbp] + vmovdqa XMMWORD[rsp],xmm8 + vpaddq xmm8,xmm4,XMMWORD[rbp] + vmovdqa XMMWORD[16+rsp],xmm9 + vpaddq xmm9,xmm5,XMMWORD[32+rbp] + vmovdqa XMMWORD[32+rsp],xmm10 + vpaddq xmm10,xmm6,XMMWORD[64+rbp] + vmovdqa XMMWORD[48+rsp],xmm11 + vpaddq xmm11,xmm7,XMMWORD[96+rbp] + vmovdqa XMMWORD[64+rsp],xmm8 + mov r14,rax + vmovdqa XMMWORD[80+rsp],xmm9 + mov rdi,rbx + vmovdqa XMMWORD[96+rsp],xmm10 + xor rdi,rcx + vmovdqa XMMWORD[112+rsp],xmm11 + mov r13,r8 + jmp NEAR $L$avx_00_47 + +ALIGN 16 +$L$avx_00_47: + add rbp,256 + vpalignr xmm8,xmm1,xmm0,8 + shrd r13,r13,23 + mov rax,r14 + vpalignr xmm11,xmm5,xmm4,8 + mov r12,r9 + shrd r14,r14,5 + vpsrlq xmm10,xmm8,1 + xor r13,r8 + xor r12,r10 + vpaddq xmm0,xmm0,xmm11 + shrd r13,r13,4 + xor r14,rax + vpsrlq xmm11,xmm8,7 + and r12,r8 + xor r13,r8 + vpsllq xmm9,xmm8,56 + add r11,QWORD[rsp] + mov r15,rax + vpxor xmm8,xmm11,xmm10 + xor r12,r10 + shrd r14,r14,6 + vpsrlq xmm10,xmm10,7 + xor r15,rbx + add r11,r12 + vpxor xmm8,xmm8,xmm9 + shrd r13,r13,14 + and rdi,r15 + vpsllq xmm9,xmm9,7 + xor r14,rax + add r11,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,rbx + shrd r14,r14,28 + vpsrlq xmm11,xmm7,6 + add rdx,r11 + add r11,rdi + vpxor xmm8,xmm8,xmm9 + mov r13,rdx + add r14,r11 + vpsllq xmm10,xmm7,3 + shrd r13,r13,23 + mov r11,r14 + vpaddq xmm0,xmm0,xmm8 + mov r12,r8 + shrd r14,r14,5 + vpsrlq xmm9,xmm7,19 + xor r13,rdx + xor r12,r9 + vpxor xmm11,xmm11,xmm10 + shrd r13,r13,4 + xor r14,r11 + vpsllq xmm10,xmm10,42 + and r12,rdx + xor r13,rdx + vpxor xmm11,xmm11,xmm9 + add r10,QWORD[8+rsp] + mov rdi,r11 + vpsrlq xmm9,xmm9,42 + xor r12,r9 + shrd r14,r14,6 + vpxor xmm11,xmm11,xmm10 + xor rdi,rax + add r10,r12 + vpxor xmm11,xmm11,xmm9 + shrd r13,r13,14 + and r15,rdi + vpaddq xmm0,xmm0,xmm11 + xor r14,r11 + add r10,r13 + vpaddq xmm10,xmm0,XMMWORD[((-128))+rbp] + xor r15,rax + shrd r14,r14,28 + add rcx,r10 + add r10,r15 + mov r13,rcx + add r14,r10 + vmovdqa XMMWORD[rsp],xmm10 + vpalignr xmm8,xmm2,xmm1,8 + shrd r13,r13,23 + mov r10,r14 + vpalignr xmm11,xmm6,xmm5,8 + mov r12,rdx + shrd r14,r14,5 + vpsrlq xmm10,xmm8,1 + xor r13,rcx + xor r12,r8 + vpaddq xmm1,xmm1,xmm11 + shrd r13,r13,4 + xor r14,r10 + vpsrlq xmm11,xmm8,7 + and r12,rcx + xor r13,rcx + vpsllq xmm9,xmm8,56 + add r9,QWORD[16+rsp] + mov r15,r10 + vpxor xmm8,xmm11,xmm10 + xor r12,r8 + shrd r14,r14,6 + vpsrlq xmm10,xmm10,7 + xor r15,r11 + add r9,r12 + vpxor xmm8,xmm8,xmm9 + shrd r13,r13,14 + and rdi,r15 + vpsllq xmm9,xmm9,7 + xor r14,r10 + add r9,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,r11 + shrd r14,r14,28 + vpsrlq xmm11,xmm0,6 + add rbx,r9 + add r9,rdi + vpxor xmm8,xmm8,xmm9 + mov r13,rbx + add r14,r9 + vpsllq xmm10,xmm0,3 + shrd r13,r13,23 + mov r9,r14 + vpaddq xmm1,xmm1,xmm8 + mov r12,rcx + shrd r14,r14,5 + vpsrlq xmm9,xmm0,19 + xor r13,rbx + xor r12,rdx + vpxor xmm11,xmm11,xmm10 + shrd r13,r13,4 + xor r14,r9 + vpsllq xmm10,xmm10,42 + and r12,rbx + xor r13,rbx + vpxor xmm11,xmm11,xmm9 + add r8,QWORD[24+rsp] + mov rdi,r9 + vpsrlq xmm9,xmm9,42 + xor r12,rdx + shrd r14,r14,6 + vpxor xmm11,xmm11,xmm10 + xor rdi,r10 + add r8,r12 + vpxor xmm11,xmm11,xmm9 + shrd r13,r13,14 + and r15,rdi + vpaddq xmm1,xmm1,xmm11 + xor r14,r9 + add r8,r13 + vpaddq xmm10,xmm1,XMMWORD[((-96))+rbp] + xor r15,r10 + shrd r14,r14,28 + add rax,r8 + add r8,r15 + mov r13,rax + add r14,r8 + vmovdqa XMMWORD[16+rsp],xmm10 + vpalignr xmm8,xmm3,xmm2,8 + shrd r13,r13,23 + mov r8,r14 + vpalignr xmm11,xmm7,xmm6,8 + mov r12,rbx + shrd r14,r14,5 + vpsrlq xmm10,xmm8,1 + xor r13,rax + xor r12,rcx + vpaddq xmm2,xmm2,xmm11 + shrd r13,r13,4 + xor r14,r8 + vpsrlq xmm11,xmm8,7 + and r12,rax + xor r13,rax + vpsllq xmm9,xmm8,56 + add rdx,QWORD[32+rsp] + mov r15,r8 + vpxor xmm8,xmm11,xmm10 + xor r12,rcx + shrd r14,r14,6 + vpsrlq xmm10,xmm10,7 + xor r15,r9 + add rdx,r12 + vpxor xmm8,xmm8,xmm9 + shrd r13,r13,14 + and rdi,r15 + vpsllq xmm9,xmm9,7 + xor r14,r8 + add rdx,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,r9 + shrd r14,r14,28 + vpsrlq xmm11,xmm1,6 + add r11,rdx + add rdx,rdi + vpxor xmm8,xmm8,xmm9 + mov r13,r11 + add r14,rdx + vpsllq xmm10,xmm1,3 + shrd r13,r13,23 + mov rdx,r14 + vpaddq xmm2,xmm2,xmm8 + mov r12,rax + shrd r14,r14,5 + vpsrlq xmm9,xmm1,19 + xor r13,r11 + xor r12,rbx + vpxor xmm11,xmm11,xmm10 + shrd r13,r13,4 + xor r14,rdx + vpsllq xmm10,xmm10,42 + and r12,r11 + xor r13,r11 + vpxor xmm11,xmm11,xmm9 + add rcx,QWORD[40+rsp] + mov rdi,rdx + vpsrlq xmm9,xmm9,42 + xor r12,rbx + shrd r14,r14,6 + vpxor xmm11,xmm11,xmm10 + xor rdi,r8 + add rcx,r12 + vpxor xmm11,xmm11,xmm9 + shrd r13,r13,14 + and r15,rdi + vpaddq xmm2,xmm2,xmm11 + xor r14,rdx + add rcx,r13 + vpaddq xmm10,xmm2,XMMWORD[((-64))+rbp] + xor r15,r8 + shrd r14,r14,28 + add r10,rcx + add rcx,r15 + mov r13,r10 + add r14,rcx + vmovdqa XMMWORD[32+rsp],xmm10 + vpalignr xmm8,xmm4,xmm3,8 + shrd r13,r13,23 + mov rcx,r14 + vpalignr xmm11,xmm0,xmm7,8 + mov r12,r11 + shrd r14,r14,5 + vpsrlq xmm10,xmm8,1 + xor r13,r10 + xor r12,rax + vpaddq xmm3,xmm3,xmm11 + shrd r13,r13,4 + xor r14,rcx + vpsrlq xmm11,xmm8,7 + and r12,r10 + xor r13,r10 + vpsllq xmm9,xmm8,56 + add rbx,QWORD[48+rsp] + mov r15,rcx + vpxor xmm8,xmm11,xmm10 + xor r12,rax + shrd r14,r14,6 + vpsrlq xmm10,xmm10,7 + xor r15,rdx + add rbx,r12 + vpxor xmm8,xmm8,xmm9 + shrd r13,r13,14 + and rdi,r15 + vpsllq xmm9,xmm9,7 + xor r14,rcx + add rbx,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,rdx + shrd r14,r14,28 + vpsrlq xmm11,xmm2,6 + add r9,rbx + add rbx,rdi + vpxor xmm8,xmm8,xmm9 + mov r13,r9 + add r14,rbx + vpsllq xmm10,xmm2,3 + shrd r13,r13,23 + mov rbx,r14 + vpaddq xmm3,xmm3,xmm8 + mov r12,r10 + shrd r14,r14,5 + vpsrlq xmm9,xmm2,19 + xor r13,r9 + xor r12,r11 + vpxor xmm11,xmm11,xmm10 + shrd r13,r13,4 + xor r14,rbx + vpsllq xmm10,xmm10,42 + and r12,r9 + xor r13,r9 + vpxor xmm11,xmm11,xmm9 + add rax,QWORD[56+rsp] + mov rdi,rbx + vpsrlq xmm9,xmm9,42 + xor r12,r11 + shrd r14,r14,6 + vpxor xmm11,xmm11,xmm10 + xor rdi,rcx + add rax,r12 + vpxor xmm11,xmm11,xmm9 + shrd r13,r13,14 + and r15,rdi + vpaddq xmm3,xmm3,xmm11 + xor r14,rbx + add rax,r13 + vpaddq xmm10,xmm3,XMMWORD[((-32))+rbp] + xor r15,rcx + shrd r14,r14,28 + add r8,rax + add rax,r15 + mov r13,r8 + add r14,rax + vmovdqa XMMWORD[48+rsp],xmm10 + vpalignr xmm8,xmm5,xmm4,8 + shrd r13,r13,23 + mov rax,r14 + vpalignr xmm11,xmm1,xmm0,8 + mov r12,r9 + shrd r14,r14,5 + vpsrlq xmm10,xmm8,1 + xor r13,r8 + xor r12,r10 + vpaddq xmm4,xmm4,xmm11 + shrd r13,r13,4 + xor r14,rax + vpsrlq xmm11,xmm8,7 + and r12,r8 + xor r13,r8 + vpsllq xmm9,xmm8,56 + add r11,QWORD[64+rsp] + mov r15,rax + vpxor xmm8,xmm11,xmm10 + xor r12,r10 + shrd r14,r14,6 + vpsrlq xmm10,xmm10,7 + xor r15,rbx + add r11,r12 + vpxor xmm8,xmm8,xmm9 + shrd r13,r13,14 + and rdi,r15 + vpsllq xmm9,xmm9,7 + xor r14,rax + add r11,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,rbx + shrd r14,r14,28 + vpsrlq xmm11,xmm3,6 + add rdx,r11 + add r11,rdi + vpxor xmm8,xmm8,xmm9 + mov r13,rdx + add r14,r11 + vpsllq xmm10,xmm3,3 + shrd r13,r13,23 + mov r11,r14 + vpaddq xmm4,xmm4,xmm8 + mov r12,r8 + shrd r14,r14,5 + vpsrlq xmm9,xmm3,19 + xor r13,rdx + xor r12,r9 + vpxor xmm11,xmm11,xmm10 + shrd r13,r13,4 + xor r14,r11 + vpsllq xmm10,xmm10,42 + and r12,rdx + xor r13,rdx + vpxor xmm11,xmm11,xmm9 + add r10,QWORD[72+rsp] + mov rdi,r11 + vpsrlq xmm9,xmm9,42 + xor r12,r9 + shrd r14,r14,6 + vpxor xmm11,xmm11,xmm10 + xor rdi,rax + add r10,r12 + vpxor xmm11,xmm11,xmm9 + shrd r13,r13,14 + and r15,rdi + vpaddq xmm4,xmm4,xmm11 + xor r14,r11 + add r10,r13 + vpaddq xmm10,xmm4,XMMWORD[rbp] + xor r15,rax + shrd r14,r14,28 + add rcx,r10 + add r10,r15 + mov r13,rcx + add r14,r10 + vmovdqa XMMWORD[64+rsp],xmm10 + vpalignr xmm8,xmm6,xmm5,8 + shrd r13,r13,23 + mov r10,r14 + vpalignr xmm11,xmm2,xmm1,8 + mov r12,rdx + shrd r14,r14,5 + vpsrlq xmm10,xmm8,1 + xor r13,rcx + xor r12,r8 + vpaddq xmm5,xmm5,xmm11 + shrd r13,r13,4 + xor r14,r10 + vpsrlq xmm11,xmm8,7 + and r12,rcx + xor r13,rcx + vpsllq xmm9,xmm8,56 + add r9,QWORD[80+rsp] + mov r15,r10 + vpxor xmm8,xmm11,xmm10 + xor r12,r8 + shrd r14,r14,6 + vpsrlq xmm10,xmm10,7 + xor r15,r11 + add r9,r12 + vpxor xmm8,xmm8,xmm9 + shrd r13,r13,14 + and rdi,r15 + vpsllq xmm9,xmm9,7 + xor r14,r10 + add r9,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,r11 + shrd r14,r14,28 + vpsrlq xmm11,xmm4,6 + add rbx,r9 + add r9,rdi + vpxor xmm8,xmm8,xmm9 + mov r13,rbx + add r14,r9 + vpsllq xmm10,xmm4,3 + shrd r13,r13,23 + mov r9,r14 + vpaddq xmm5,xmm5,xmm8 + mov r12,rcx + shrd r14,r14,5 + vpsrlq xmm9,xmm4,19 + xor r13,rbx + xor r12,rdx + vpxor xmm11,xmm11,xmm10 + shrd r13,r13,4 + xor r14,r9 + vpsllq xmm10,xmm10,42 + and r12,rbx + xor r13,rbx + vpxor xmm11,xmm11,xmm9 + add r8,QWORD[88+rsp] + mov rdi,r9 + vpsrlq xmm9,xmm9,42 + xor r12,rdx + shrd r14,r14,6 + vpxor xmm11,xmm11,xmm10 + xor rdi,r10 + add r8,r12 + vpxor xmm11,xmm11,xmm9 + shrd r13,r13,14 + and r15,rdi + vpaddq xmm5,xmm5,xmm11 + xor r14,r9 + add r8,r13 + vpaddq xmm10,xmm5,XMMWORD[32+rbp] + xor r15,r10 + shrd r14,r14,28 + add rax,r8 + add r8,r15 + mov r13,rax + add r14,r8 + vmovdqa XMMWORD[80+rsp],xmm10 + vpalignr xmm8,xmm7,xmm6,8 + shrd r13,r13,23 + mov r8,r14 + vpalignr xmm11,xmm3,xmm2,8 + mov r12,rbx + shrd r14,r14,5 + vpsrlq xmm10,xmm8,1 + xor r13,rax + xor r12,rcx + vpaddq xmm6,xmm6,xmm11 + shrd r13,r13,4 + xor r14,r8 + vpsrlq xmm11,xmm8,7 + and r12,rax + xor r13,rax + vpsllq xmm9,xmm8,56 + add rdx,QWORD[96+rsp] + mov r15,r8 + vpxor xmm8,xmm11,xmm10 + xor r12,rcx + shrd r14,r14,6 + vpsrlq xmm10,xmm10,7 + xor r15,r9 + add rdx,r12 + vpxor xmm8,xmm8,xmm9 + shrd r13,r13,14 + and rdi,r15 + vpsllq xmm9,xmm9,7 + xor r14,r8 + add rdx,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,r9 + shrd r14,r14,28 + vpsrlq xmm11,xmm5,6 + add r11,rdx + add rdx,rdi + vpxor xmm8,xmm8,xmm9 + mov r13,r11 + add r14,rdx + vpsllq xmm10,xmm5,3 + shrd r13,r13,23 + mov rdx,r14 + vpaddq xmm6,xmm6,xmm8 + mov r12,rax + shrd r14,r14,5 + vpsrlq xmm9,xmm5,19 + xor r13,r11 + xor r12,rbx + vpxor xmm11,xmm11,xmm10 + shrd r13,r13,4 + xor r14,rdx + vpsllq xmm10,xmm10,42 + and r12,r11 + xor r13,r11 + vpxor xmm11,xmm11,xmm9 + add rcx,QWORD[104+rsp] + mov rdi,rdx + vpsrlq xmm9,xmm9,42 + xor r12,rbx + shrd r14,r14,6 + vpxor xmm11,xmm11,xmm10 + xor rdi,r8 + add rcx,r12 + vpxor xmm11,xmm11,xmm9 + shrd r13,r13,14 + and r15,rdi + vpaddq xmm6,xmm6,xmm11 + xor r14,rdx + add rcx,r13 + vpaddq xmm10,xmm6,XMMWORD[64+rbp] + xor r15,r8 + shrd r14,r14,28 + add r10,rcx + add rcx,r15 + mov r13,r10 + add r14,rcx + vmovdqa XMMWORD[96+rsp],xmm10 + vpalignr xmm8,xmm0,xmm7,8 + shrd r13,r13,23 + mov rcx,r14 + vpalignr xmm11,xmm4,xmm3,8 + mov r12,r11 + shrd r14,r14,5 + vpsrlq xmm10,xmm8,1 + xor r13,r10 + xor r12,rax + vpaddq xmm7,xmm7,xmm11 + shrd r13,r13,4 + xor r14,rcx + vpsrlq xmm11,xmm8,7 + and r12,r10 + xor r13,r10 + vpsllq xmm9,xmm8,56 + add rbx,QWORD[112+rsp] + mov r15,rcx + vpxor xmm8,xmm11,xmm10 + xor r12,rax + shrd r14,r14,6 + vpsrlq xmm10,xmm10,7 + xor r15,rdx + add rbx,r12 + vpxor xmm8,xmm8,xmm9 + shrd r13,r13,14 + and rdi,r15 + vpsllq xmm9,xmm9,7 + xor r14,rcx + add rbx,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,rdx + shrd r14,r14,28 + vpsrlq xmm11,xmm6,6 + add r9,rbx + add rbx,rdi + vpxor xmm8,xmm8,xmm9 + mov r13,r9 + add r14,rbx + vpsllq xmm10,xmm6,3 + shrd r13,r13,23 + mov rbx,r14 + vpaddq xmm7,xmm7,xmm8 + mov r12,r10 + shrd r14,r14,5 + vpsrlq xmm9,xmm6,19 + xor r13,r9 + xor r12,r11 + vpxor xmm11,xmm11,xmm10 + shrd r13,r13,4 + xor r14,rbx + vpsllq xmm10,xmm10,42 + and r12,r9 + xor r13,r9 + vpxor xmm11,xmm11,xmm9 + add rax,QWORD[120+rsp] + mov rdi,rbx + vpsrlq xmm9,xmm9,42 + xor r12,r11 + shrd r14,r14,6 + vpxor xmm11,xmm11,xmm10 + xor rdi,rcx + add rax,r12 + vpxor xmm11,xmm11,xmm9 + shrd r13,r13,14 + and r15,rdi + vpaddq xmm7,xmm7,xmm11 + xor r14,rbx + add rax,r13 + vpaddq xmm10,xmm7,XMMWORD[96+rbp] + xor r15,rcx + shrd r14,r14,28 + add r8,rax + add rax,r15 + mov r13,r8 + add r14,rax + vmovdqa XMMWORD[112+rsp],xmm10 + cmp BYTE[135+rbp],0 + jne NEAR $L$avx_00_47 + shrd r13,r13,23 + mov rax,r14 + mov r12,r9 + shrd r14,r14,5 + xor r13,r8 + xor r12,r10 + shrd r13,r13,4 + xor r14,rax + and r12,r8 + xor r13,r8 + add r11,QWORD[rsp] + mov r15,rax + xor r12,r10 + shrd r14,r14,6 + xor r15,rbx + add r11,r12 + shrd r13,r13,14 + and rdi,r15 + xor r14,rax + add r11,r13 + xor rdi,rbx + shrd r14,r14,28 + add rdx,r11 + add r11,rdi + mov r13,rdx + add r14,r11 + shrd r13,r13,23 + mov r11,r14 + mov r12,r8 + shrd r14,r14,5 + xor r13,rdx + xor r12,r9 + shrd r13,r13,4 + xor r14,r11 + and r12,rdx + xor r13,rdx + add r10,QWORD[8+rsp] + mov rdi,r11 + xor r12,r9 + shrd r14,r14,6 + xor rdi,rax + add r10,r12 + shrd r13,r13,14 + and r15,rdi + xor r14,r11 + add r10,r13 + xor r15,rax + shrd r14,r14,28 + add rcx,r10 + add r10,r15 + mov r13,rcx + add r14,r10 + shrd r13,r13,23 + mov r10,r14 + mov r12,rdx + shrd r14,r14,5 + xor r13,rcx + xor r12,r8 + shrd r13,r13,4 + xor r14,r10 + and r12,rcx + xor r13,rcx + add r9,QWORD[16+rsp] + mov r15,r10 + xor r12,r8 + shrd r14,r14,6 + xor r15,r11 + add r9,r12 + shrd r13,r13,14 + and rdi,r15 + xor r14,r10 + add r9,r13 + xor rdi,r11 + shrd r14,r14,28 + add rbx,r9 + add r9,rdi + mov r13,rbx + add r14,r9 + shrd r13,r13,23 + mov r9,r14 + mov r12,rcx + shrd r14,r14,5 + xor r13,rbx + xor r12,rdx + shrd r13,r13,4 + xor r14,r9 + and r12,rbx + xor r13,rbx + add r8,QWORD[24+rsp] + mov rdi,r9 + xor r12,rdx + shrd r14,r14,6 + xor rdi,r10 + add r8,r12 + shrd r13,r13,14 + and r15,rdi + xor r14,r9 + add r8,r13 + xor r15,r10 + shrd r14,r14,28 + add rax,r8 + add r8,r15 + mov r13,rax + add r14,r8 + shrd r13,r13,23 + mov r8,r14 + mov r12,rbx + shrd r14,r14,5 + xor r13,rax + xor r12,rcx + shrd r13,r13,4 + xor r14,r8 + and r12,rax + xor r13,rax + add rdx,QWORD[32+rsp] + mov r15,r8 + xor r12,rcx + shrd r14,r14,6 + xor r15,r9 + add rdx,r12 + shrd r13,r13,14 + and rdi,r15 + xor r14,r8 + add rdx,r13 + xor rdi,r9 + shrd r14,r14,28 + add r11,rdx + add rdx,rdi + mov r13,r11 + add r14,rdx + shrd r13,r13,23 + mov rdx,r14 + mov r12,rax + shrd r14,r14,5 + xor r13,r11 + xor r12,rbx + shrd r13,r13,4 + xor r14,rdx + and r12,r11 + xor r13,r11 + add rcx,QWORD[40+rsp] + mov rdi,rdx + xor r12,rbx + shrd r14,r14,6 + xor rdi,r8 + add rcx,r12 + shrd r13,r13,14 + and r15,rdi + xor r14,rdx + add rcx,r13 + xor r15,r8 + shrd r14,r14,28 + add r10,rcx + add rcx,r15 + mov r13,r10 + add r14,rcx + shrd r13,r13,23 + mov rcx,r14 + mov r12,r11 + shrd r14,r14,5 + xor r13,r10 + xor r12,rax + shrd r13,r13,4 + xor r14,rcx + and r12,r10 + xor r13,r10 + add rbx,QWORD[48+rsp] + mov r15,rcx + xor r12,rax + shrd r14,r14,6 + xor r15,rdx + add rbx,r12 + shrd r13,r13,14 + and rdi,r15 + xor r14,rcx + add rbx,r13 + xor rdi,rdx + shrd r14,r14,28 + add r9,rbx + add rbx,rdi + mov r13,r9 + add r14,rbx + shrd r13,r13,23 + mov rbx,r14 + mov r12,r10 + shrd r14,r14,5 + xor r13,r9 + xor r12,r11 + shrd r13,r13,4 + xor r14,rbx + and r12,r9 + xor r13,r9 + add rax,QWORD[56+rsp] + mov rdi,rbx + xor r12,r11 + shrd r14,r14,6 + xor rdi,rcx + add rax,r12 + shrd r13,r13,14 + and r15,rdi + xor r14,rbx + add rax,r13 + xor r15,rcx + shrd r14,r14,28 + add r8,rax + add rax,r15 + mov r13,r8 + add r14,rax + shrd r13,r13,23 + mov rax,r14 + mov r12,r9 + shrd r14,r14,5 + xor r13,r8 + xor r12,r10 + shrd r13,r13,4 + xor r14,rax + and r12,r8 + xor r13,r8 + add r11,QWORD[64+rsp] + mov r15,rax + xor r12,r10 + shrd r14,r14,6 + xor r15,rbx + add r11,r12 + shrd r13,r13,14 + and rdi,r15 + xor r14,rax + add r11,r13 + xor rdi,rbx + shrd r14,r14,28 + add rdx,r11 + add r11,rdi + mov r13,rdx + add r14,r11 + shrd r13,r13,23 + mov r11,r14 + mov r12,r8 + shrd r14,r14,5 + xor r13,rdx + xor r12,r9 + shrd r13,r13,4 + xor r14,r11 + and r12,rdx + xor r13,rdx + add r10,QWORD[72+rsp] + mov rdi,r11 + xor r12,r9 + shrd r14,r14,6 + xor rdi,rax + add r10,r12 + shrd r13,r13,14 + and r15,rdi + xor r14,r11 + add r10,r13 + xor r15,rax + shrd r14,r14,28 + add rcx,r10 + add r10,r15 + mov r13,rcx + add r14,r10 + shrd r13,r13,23 + mov r10,r14 + mov r12,rdx + shrd r14,r14,5 + xor r13,rcx + xor r12,r8 + shrd r13,r13,4 + xor r14,r10 + and r12,rcx + xor r13,rcx + add r9,QWORD[80+rsp] + mov r15,r10 + xor r12,r8 + shrd r14,r14,6 + xor r15,r11 + add r9,r12 + shrd r13,r13,14 + and rdi,r15 + xor r14,r10 + add r9,r13 + xor rdi,r11 + shrd r14,r14,28 + add rbx,r9 + add r9,rdi + mov r13,rbx + add r14,r9 + shrd r13,r13,23 + mov r9,r14 + mov r12,rcx + shrd r14,r14,5 + xor r13,rbx + xor r12,rdx + shrd r13,r13,4 + xor r14,r9 + and r12,rbx + xor r13,rbx + add r8,QWORD[88+rsp] + mov rdi,r9 + xor r12,rdx + shrd r14,r14,6 + xor rdi,r10 + add r8,r12 + shrd r13,r13,14 + and r15,rdi + xor r14,r9 + add r8,r13 + xor r15,r10 + shrd r14,r14,28 + add rax,r8 + add r8,r15 + mov r13,rax + add r14,r8 + shrd r13,r13,23 + mov r8,r14 + mov r12,rbx + shrd r14,r14,5 + xor r13,rax + xor r12,rcx + shrd r13,r13,4 + xor r14,r8 + and r12,rax + xor r13,rax + add rdx,QWORD[96+rsp] + mov r15,r8 + xor r12,rcx + shrd r14,r14,6 + xor r15,r9 + add rdx,r12 + shrd r13,r13,14 + and rdi,r15 + xor r14,r8 + add rdx,r13 + xor rdi,r9 + shrd r14,r14,28 + add r11,rdx + add rdx,rdi + mov r13,r11 + add r14,rdx + shrd r13,r13,23 + mov rdx,r14 + mov r12,rax + shrd r14,r14,5 + xor r13,r11 + xor r12,rbx + shrd r13,r13,4 + xor r14,rdx + and r12,r11 + xor r13,r11 + add rcx,QWORD[104+rsp] + mov rdi,rdx + xor r12,rbx + shrd r14,r14,6 + xor rdi,r8 + add rcx,r12 + shrd r13,r13,14 + and r15,rdi + xor r14,rdx + add rcx,r13 + xor r15,r8 + shrd r14,r14,28 + add r10,rcx + add rcx,r15 + mov r13,r10 + add r14,rcx + shrd r13,r13,23 + mov rcx,r14 + mov r12,r11 + shrd r14,r14,5 + xor r13,r10 + xor r12,rax + shrd r13,r13,4 + xor r14,rcx + and r12,r10 + xor r13,r10 + add rbx,QWORD[112+rsp] + mov r15,rcx + xor r12,rax + shrd r14,r14,6 + xor r15,rdx + add rbx,r12 + shrd r13,r13,14 + and rdi,r15 + xor r14,rcx + add rbx,r13 + xor rdi,rdx + shrd r14,r14,28 + add r9,rbx + add rbx,rdi + mov r13,r9 + add r14,rbx + shrd r13,r13,23 + mov rbx,r14 + mov r12,r10 + shrd r14,r14,5 + xor r13,r9 + xor r12,r11 + shrd r13,r13,4 + xor r14,rbx + and r12,r9 + xor r13,r9 + add rax,QWORD[120+rsp] + mov rdi,rbx + xor r12,r11 + shrd r14,r14,6 + xor rdi,rcx + add rax,r12 + shrd r13,r13,14 + and r15,rdi + xor r14,rbx + add rax,r13 + xor r15,rcx + shrd r14,r14,28 + add r8,rax + add rax,r15 + mov r13,r8 + add r14,rax + mov rdi,QWORD[((128+0))+rsp] + mov rax,r14 + + add rax,QWORD[rdi] + lea rsi,[128+rsi] + add rbx,QWORD[8+rdi] + add rcx,QWORD[16+rdi] + add rdx,QWORD[24+rdi] + add r8,QWORD[32+rdi] + add r9,QWORD[40+rdi] + add r10,QWORD[48+rdi] + add r11,QWORD[56+rdi] + + cmp rsi,QWORD[((128+16))+rsp] + + mov QWORD[rdi],rax + mov QWORD[8+rdi],rbx + mov QWORD[16+rdi],rcx + mov QWORD[24+rdi],rdx + mov QWORD[32+rdi],r8 + mov QWORD[40+rdi],r9 + mov QWORD[48+rdi],r10 + mov QWORD[56+rdi],r11 + jb NEAR $L$loop_avx + + mov rsi,QWORD[152+rsp] + + vzeroupper + movaps xmm6,XMMWORD[((128+32))+rsp] + movaps xmm7,XMMWORD[((128+48))+rsp] + movaps xmm8,XMMWORD[((128+64))+rsp] + movaps xmm9,XMMWORD[((128+80))+rsp] + movaps xmm10,XMMWORD[((128+96))+rsp] + movaps xmm11,XMMWORD[((128+112))+rsp] + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbp,QWORD[((-16))+rsi] + + mov rbx,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$epilogue_avx: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_sha512_block_data_order_avx: + +ALIGN 64 +sha512_block_data_order_avx2: + mov QWORD[8+rsp],rdi ;WIN64 prologue + mov QWORD[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha512_block_data_order_avx2: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + +$L$avx2_shortcut: + mov rax,rsp + + push rbx + + push rbp + + push r12 + + push r13 + + push r14 + + push r15 + + sub rsp,1408 + shl rdx,4 + and rsp,-256*8 + lea rdx,[rdx*8+rsi] + add rsp,1152 + mov QWORD[((128+0))+rsp],rdi + mov QWORD[((128+8))+rsp],rsi + mov QWORD[((128+16))+rsp],rdx + mov QWORD[152+rsp],rax + + movaps XMMWORD[(128+32)+rsp],xmm6 + movaps XMMWORD[(128+48)+rsp],xmm7 + movaps XMMWORD[(128+64)+rsp],xmm8 + movaps XMMWORD[(128+80)+rsp],xmm9 + movaps XMMWORD[(128+96)+rsp],xmm10 + movaps XMMWORD[(128+112)+rsp],xmm11 +$L$prologue_avx2: + + vzeroupper + sub rsi,-16*8 + mov rax,QWORD[rdi] + mov r12,rsi + mov rbx,QWORD[8+rdi] + cmp rsi,rdx + mov rcx,QWORD[16+rdi] + cmove r12,rsp + mov rdx,QWORD[24+rdi] + mov r8,QWORD[32+rdi] + mov r9,QWORD[40+rdi] + mov r10,QWORD[48+rdi] + mov r11,QWORD[56+rdi] + jmp NEAR $L$oop_avx2 +ALIGN 16 +$L$oop_avx2: + vmovdqu xmm0,XMMWORD[((-128))+rsi] + vmovdqu xmm1,XMMWORD[((-128+16))+rsi] + vmovdqu xmm2,XMMWORD[((-128+32))+rsi] + lea rbp,[((K512+128))] + vmovdqu xmm3,XMMWORD[((-128+48))+rsi] + vmovdqu xmm4,XMMWORD[((-128+64))+rsi] + vmovdqu xmm5,XMMWORD[((-128+80))+rsi] + vmovdqu xmm6,XMMWORD[((-128+96))+rsi] + vmovdqu xmm7,XMMWORD[((-128+112))+rsi] + + vmovdqa ymm10,YMMWORD[1152+rbp] + vinserti128 ymm0,ymm0,XMMWORD[r12],1 + vinserti128 ymm1,ymm1,XMMWORD[16+r12],1 + vpshufb ymm0,ymm0,ymm10 + vinserti128 ymm2,ymm2,XMMWORD[32+r12],1 + vpshufb ymm1,ymm1,ymm10 + vinserti128 ymm3,ymm3,XMMWORD[48+r12],1 + vpshufb ymm2,ymm2,ymm10 + vinserti128 ymm4,ymm4,XMMWORD[64+r12],1 + vpshufb ymm3,ymm3,ymm10 + vinserti128 ymm5,ymm5,XMMWORD[80+r12],1 + vpshufb ymm4,ymm4,ymm10 + vinserti128 ymm6,ymm6,XMMWORD[96+r12],1 + vpshufb ymm5,ymm5,ymm10 + vinserti128 ymm7,ymm7,XMMWORD[112+r12],1 + + vpaddq ymm8,ymm0,YMMWORD[((-128))+rbp] + vpshufb ymm6,ymm6,ymm10 + vpaddq ymm9,ymm1,YMMWORD[((-96))+rbp] + vpshufb ymm7,ymm7,ymm10 + vpaddq ymm10,ymm2,YMMWORD[((-64))+rbp] + vpaddq ymm11,ymm3,YMMWORD[((-32))+rbp] + vmovdqa YMMWORD[rsp],ymm8 + vpaddq ymm8,ymm4,YMMWORD[rbp] + vmovdqa YMMWORD[32+rsp],ymm9 + vpaddq ymm9,ymm5,YMMWORD[32+rbp] + vmovdqa YMMWORD[64+rsp],ymm10 + vpaddq ymm10,ymm6,YMMWORD[64+rbp] + vmovdqa YMMWORD[96+rsp],ymm11 + lea rsp,[((-128))+rsp] + vpaddq ymm11,ymm7,YMMWORD[96+rbp] + vmovdqa YMMWORD[rsp],ymm8 + xor r14,r14 + vmovdqa YMMWORD[32+rsp],ymm9 + mov rdi,rbx + vmovdqa YMMWORD[64+rsp],ymm10 + xor rdi,rcx + vmovdqa YMMWORD[96+rsp],ymm11 + mov r12,r9 + add rbp,16*2*8 + jmp NEAR $L$avx2_00_47 + +ALIGN 16 +$L$avx2_00_47: + lea rsp,[((-128))+rsp] + vpalignr ymm8,ymm1,ymm0,8 + add r11,QWORD[((0+256))+rsp] + and r12,r8 + rorx r13,r8,41 + vpalignr ymm11,ymm5,ymm4,8 + rorx r15,r8,18 + lea rax,[r14*1+rax] + lea r11,[r12*1+r11] + vpsrlq ymm10,ymm8,1 + andn r12,r8,r10 + xor r13,r15 + rorx r14,r8,14 + vpaddq ymm0,ymm0,ymm11 + vpsrlq ymm11,ymm8,7 + lea r11,[r12*1+r11] + xor r13,r14 + mov r15,rax + vpsllq ymm9,ymm8,56 + vpxor ymm8,ymm11,ymm10 + rorx r12,rax,39 + lea r11,[r13*1+r11] + xor r15,rbx + vpsrlq ymm10,ymm10,7 + vpxor ymm8,ymm8,ymm9 + rorx r14,rax,34 + rorx r13,rax,28 + lea rdx,[r11*1+rdx] + vpsllq ymm9,ymm9,7 + vpxor ymm8,ymm8,ymm10 + and rdi,r15 + xor r14,r12 + xor rdi,rbx + vpsrlq ymm11,ymm7,6 + vpxor ymm8,ymm8,ymm9 + xor r14,r13 + lea r11,[rdi*1+r11] + mov r12,r8 + vpsllq ymm10,ymm7,3 + vpaddq ymm0,ymm0,ymm8 + add r10,QWORD[((8+256))+rsp] + and r12,rdx + rorx r13,rdx,41 + vpsrlq ymm9,ymm7,19 + vpxor ymm11,ymm11,ymm10 + rorx rdi,rdx,18 + lea r11,[r14*1+r11] + lea r10,[r12*1+r10] + vpsllq ymm10,ymm10,42 + vpxor ymm11,ymm11,ymm9 + andn r12,rdx,r9 + xor r13,rdi + rorx r14,rdx,14 + vpsrlq ymm9,ymm9,42 + vpxor ymm11,ymm11,ymm10 + lea r10,[r12*1+r10] + xor r13,r14 + mov rdi,r11 + vpxor ymm11,ymm11,ymm9 + rorx r12,r11,39 + lea r10,[r13*1+r10] + xor rdi,rax + vpaddq ymm0,ymm0,ymm11 + rorx r14,r11,34 + rorx r13,r11,28 + lea rcx,[r10*1+rcx] + vpaddq ymm10,ymm0,YMMWORD[((-128))+rbp] + and r15,rdi + xor r14,r12 + xor r15,rax + xor r14,r13 + lea r10,[r15*1+r10] + mov r12,rdx + vmovdqa YMMWORD[rsp],ymm10 + vpalignr ymm8,ymm2,ymm1,8 + add r9,QWORD[((32+256))+rsp] + and r12,rcx + rorx r13,rcx,41 + vpalignr ymm11,ymm6,ymm5,8 + rorx r15,rcx,18 + lea r10,[r14*1+r10] + lea r9,[r12*1+r9] + vpsrlq ymm10,ymm8,1 + andn r12,rcx,r8 + xor r13,r15 + rorx r14,rcx,14 + vpaddq ymm1,ymm1,ymm11 + vpsrlq ymm11,ymm8,7 + lea r9,[r12*1+r9] + xor r13,r14 + mov r15,r10 + vpsllq ymm9,ymm8,56 + vpxor ymm8,ymm11,ymm10 + rorx r12,r10,39 + lea r9,[r13*1+r9] + xor r15,r11 + vpsrlq ymm10,ymm10,7 + vpxor ymm8,ymm8,ymm9 + rorx r14,r10,34 + rorx r13,r10,28 + lea rbx,[r9*1+rbx] + vpsllq ymm9,ymm9,7 + vpxor ymm8,ymm8,ymm10 + and rdi,r15 + xor r14,r12 + xor rdi,r11 + vpsrlq ymm11,ymm0,6 + vpxor ymm8,ymm8,ymm9 + xor r14,r13 + lea r9,[rdi*1+r9] + mov r12,rcx + vpsllq ymm10,ymm0,3 + vpaddq ymm1,ymm1,ymm8 + add r8,QWORD[((40+256))+rsp] + and r12,rbx + rorx r13,rbx,41 + vpsrlq ymm9,ymm0,19 + vpxor ymm11,ymm11,ymm10 + rorx rdi,rbx,18 + lea r9,[r14*1+r9] + lea r8,[r12*1+r8] + vpsllq ymm10,ymm10,42 + vpxor ymm11,ymm11,ymm9 + andn r12,rbx,rdx + xor r13,rdi + rorx r14,rbx,14 + vpsrlq ymm9,ymm9,42 + vpxor ymm11,ymm11,ymm10 + lea r8,[r12*1+r8] + xor r13,r14 + mov rdi,r9 + vpxor ymm11,ymm11,ymm9 + rorx r12,r9,39 + lea r8,[r13*1+r8] + xor rdi,r10 + vpaddq ymm1,ymm1,ymm11 + rorx r14,r9,34 + rorx r13,r9,28 + lea rax,[r8*1+rax] + vpaddq ymm10,ymm1,YMMWORD[((-96))+rbp] + and r15,rdi + xor r14,r12 + xor r15,r10 + xor r14,r13 + lea r8,[r15*1+r8] + mov r12,rbx + vmovdqa YMMWORD[32+rsp],ymm10 + vpalignr ymm8,ymm3,ymm2,8 + add rdx,QWORD[((64+256))+rsp] + and r12,rax + rorx r13,rax,41 + vpalignr ymm11,ymm7,ymm6,8 + rorx r15,rax,18 + lea r8,[r14*1+r8] + lea rdx,[r12*1+rdx] + vpsrlq ymm10,ymm8,1 + andn r12,rax,rcx + xor r13,r15 + rorx r14,rax,14 + vpaddq ymm2,ymm2,ymm11 + vpsrlq ymm11,ymm8,7 + lea rdx,[r12*1+rdx] + xor r13,r14 + mov r15,r8 + vpsllq ymm9,ymm8,56 + vpxor ymm8,ymm11,ymm10 + rorx r12,r8,39 + lea rdx,[r13*1+rdx] + xor r15,r9 + vpsrlq ymm10,ymm10,7 + vpxor ymm8,ymm8,ymm9 + rorx r14,r8,34 + rorx r13,r8,28 + lea r11,[rdx*1+r11] + vpsllq ymm9,ymm9,7 + vpxor ymm8,ymm8,ymm10 + and rdi,r15 + xor r14,r12 + xor rdi,r9 + vpsrlq ymm11,ymm1,6 + vpxor ymm8,ymm8,ymm9 + xor r14,r13 + lea rdx,[rdi*1+rdx] + mov r12,rax + vpsllq ymm10,ymm1,3 + vpaddq ymm2,ymm2,ymm8 + add rcx,QWORD[((72+256))+rsp] + and r12,r11 + rorx r13,r11,41 + vpsrlq ymm9,ymm1,19 + vpxor ymm11,ymm11,ymm10 + rorx rdi,r11,18 + lea rdx,[r14*1+rdx] + lea rcx,[r12*1+rcx] + vpsllq ymm10,ymm10,42 + vpxor ymm11,ymm11,ymm9 + andn r12,r11,rbx + xor r13,rdi + rorx r14,r11,14 + vpsrlq ymm9,ymm9,42 + vpxor ymm11,ymm11,ymm10 + lea rcx,[r12*1+rcx] + xor r13,r14 + mov rdi,rdx + vpxor ymm11,ymm11,ymm9 + rorx r12,rdx,39 + lea rcx,[r13*1+rcx] + xor rdi,r8 + vpaddq ymm2,ymm2,ymm11 + rorx r14,rdx,34 + rorx r13,rdx,28 + lea r10,[rcx*1+r10] + vpaddq ymm10,ymm2,YMMWORD[((-64))+rbp] + and r15,rdi + xor r14,r12 + xor r15,r8 + xor r14,r13 + lea rcx,[r15*1+rcx] + mov r12,r11 + vmovdqa YMMWORD[64+rsp],ymm10 + vpalignr ymm8,ymm4,ymm3,8 + add rbx,QWORD[((96+256))+rsp] + and r12,r10 + rorx r13,r10,41 + vpalignr ymm11,ymm0,ymm7,8 + rorx r15,r10,18 + lea rcx,[r14*1+rcx] + lea rbx,[r12*1+rbx] + vpsrlq ymm10,ymm8,1 + andn r12,r10,rax + xor r13,r15 + rorx r14,r10,14 + vpaddq ymm3,ymm3,ymm11 + vpsrlq ymm11,ymm8,7 + lea rbx,[r12*1+rbx] + xor r13,r14 + mov r15,rcx + vpsllq ymm9,ymm8,56 + vpxor ymm8,ymm11,ymm10 + rorx r12,rcx,39 + lea rbx,[r13*1+rbx] + xor r15,rdx + vpsrlq ymm10,ymm10,7 + vpxor ymm8,ymm8,ymm9 + rorx r14,rcx,34 + rorx r13,rcx,28 + lea r9,[rbx*1+r9] + vpsllq ymm9,ymm9,7 + vpxor ymm8,ymm8,ymm10 + and rdi,r15 + xor r14,r12 + xor rdi,rdx + vpsrlq ymm11,ymm2,6 + vpxor ymm8,ymm8,ymm9 + xor r14,r13 + lea rbx,[rdi*1+rbx] + mov r12,r10 + vpsllq ymm10,ymm2,3 + vpaddq ymm3,ymm3,ymm8 + add rax,QWORD[((104+256))+rsp] + and r12,r9 + rorx r13,r9,41 + vpsrlq ymm9,ymm2,19 + vpxor ymm11,ymm11,ymm10 + rorx rdi,r9,18 + lea rbx,[r14*1+rbx] + lea rax,[r12*1+rax] + vpsllq ymm10,ymm10,42 + vpxor ymm11,ymm11,ymm9 + andn r12,r9,r11 + xor r13,rdi + rorx r14,r9,14 + vpsrlq ymm9,ymm9,42 + vpxor ymm11,ymm11,ymm10 + lea rax,[r12*1+rax] + xor r13,r14 + mov rdi,rbx + vpxor ymm11,ymm11,ymm9 + rorx r12,rbx,39 + lea rax,[r13*1+rax] + xor rdi,rcx + vpaddq ymm3,ymm3,ymm11 + rorx r14,rbx,34 + rorx r13,rbx,28 + lea r8,[rax*1+r8] + vpaddq ymm10,ymm3,YMMWORD[((-32))+rbp] + and r15,rdi + xor r14,r12 + xor r15,rcx + xor r14,r13 + lea rax,[r15*1+rax] + mov r12,r9 + vmovdqa YMMWORD[96+rsp],ymm10 + lea rsp,[((-128))+rsp] + vpalignr ymm8,ymm5,ymm4,8 + add r11,QWORD[((0+256))+rsp] + and r12,r8 + rorx r13,r8,41 + vpalignr ymm11,ymm1,ymm0,8 + rorx r15,r8,18 + lea rax,[r14*1+rax] + lea r11,[r12*1+r11] + vpsrlq ymm10,ymm8,1 + andn r12,r8,r10 + xor r13,r15 + rorx r14,r8,14 + vpaddq ymm4,ymm4,ymm11 + vpsrlq ymm11,ymm8,7 + lea r11,[r12*1+r11] + xor r13,r14 + mov r15,rax + vpsllq ymm9,ymm8,56 + vpxor ymm8,ymm11,ymm10 + rorx r12,rax,39 + lea r11,[r13*1+r11] + xor r15,rbx + vpsrlq ymm10,ymm10,7 + vpxor ymm8,ymm8,ymm9 + rorx r14,rax,34 + rorx r13,rax,28 + lea rdx,[r11*1+rdx] + vpsllq ymm9,ymm9,7 + vpxor ymm8,ymm8,ymm10 + and rdi,r15 + xor r14,r12 + xor rdi,rbx + vpsrlq ymm11,ymm3,6 + vpxor ymm8,ymm8,ymm9 + xor r14,r13 + lea r11,[rdi*1+r11] + mov r12,r8 + vpsllq ymm10,ymm3,3 + vpaddq ymm4,ymm4,ymm8 + add r10,QWORD[((8+256))+rsp] + and r12,rdx + rorx r13,rdx,41 + vpsrlq ymm9,ymm3,19 + vpxor ymm11,ymm11,ymm10 + rorx rdi,rdx,18 + lea r11,[r14*1+r11] + lea r10,[r12*1+r10] + vpsllq ymm10,ymm10,42 + vpxor ymm11,ymm11,ymm9 + andn r12,rdx,r9 + xor r13,rdi + rorx r14,rdx,14 + vpsrlq ymm9,ymm9,42 + vpxor ymm11,ymm11,ymm10 + lea r10,[r12*1+r10] + xor r13,r14 + mov rdi,r11 + vpxor ymm11,ymm11,ymm9 + rorx r12,r11,39 + lea r10,[r13*1+r10] + xor rdi,rax + vpaddq ymm4,ymm4,ymm11 + rorx r14,r11,34 + rorx r13,r11,28 + lea rcx,[r10*1+rcx] + vpaddq ymm10,ymm4,YMMWORD[rbp] + and r15,rdi + xor r14,r12 + xor r15,rax + xor r14,r13 + lea r10,[r15*1+r10] + mov r12,rdx + vmovdqa YMMWORD[rsp],ymm10 + vpalignr ymm8,ymm6,ymm5,8 + add r9,QWORD[((32+256))+rsp] + and r12,rcx + rorx r13,rcx,41 + vpalignr ymm11,ymm2,ymm1,8 + rorx r15,rcx,18 + lea r10,[r14*1+r10] + lea r9,[r12*1+r9] + vpsrlq ymm10,ymm8,1 + andn r12,rcx,r8 + xor r13,r15 + rorx r14,rcx,14 + vpaddq ymm5,ymm5,ymm11 + vpsrlq ymm11,ymm8,7 + lea r9,[r12*1+r9] + xor r13,r14 + mov r15,r10 + vpsllq ymm9,ymm8,56 + vpxor ymm8,ymm11,ymm10 + rorx r12,r10,39 + lea r9,[r13*1+r9] + xor r15,r11 + vpsrlq ymm10,ymm10,7 + vpxor ymm8,ymm8,ymm9 + rorx r14,r10,34 + rorx r13,r10,28 + lea rbx,[r9*1+rbx] + vpsllq ymm9,ymm9,7 + vpxor ymm8,ymm8,ymm10 + and rdi,r15 + xor r14,r12 + xor rdi,r11 + vpsrlq ymm11,ymm4,6 + vpxor ymm8,ymm8,ymm9 + xor r14,r13 + lea r9,[rdi*1+r9] + mov r12,rcx + vpsllq ymm10,ymm4,3 + vpaddq ymm5,ymm5,ymm8 + add r8,QWORD[((40+256))+rsp] + and r12,rbx + rorx r13,rbx,41 + vpsrlq ymm9,ymm4,19 + vpxor ymm11,ymm11,ymm10 + rorx rdi,rbx,18 + lea r9,[r14*1+r9] + lea r8,[r12*1+r8] + vpsllq ymm10,ymm10,42 + vpxor ymm11,ymm11,ymm9 + andn r12,rbx,rdx + xor r13,rdi + rorx r14,rbx,14 + vpsrlq ymm9,ymm9,42 + vpxor ymm11,ymm11,ymm10 + lea r8,[r12*1+r8] + xor r13,r14 + mov rdi,r9 + vpxor ymm11,ymm11,ymm9 + rorx r12,r9,39 + lea r8,[r13*1+r8] + xor rdi,r10 + vpaddq ymm5,ymm5,ymm11 + rorx r14,r9,34 + rorx r13,r9,28 + lea rax,[r8*1+rax] + vpaddq ymm10,ymm5,YMMWORD[32+rbp] + and r15,rdi + xor r14,r12 + xor r15,r10 + xor r14,r13 + lea r8,[r15*1+r8] + mov r12,rbx + vmovdqa YMMWORD[32+rsp],ymm10 + vpalignr ymm8,ymm7,ymm6,8 + add rdx,QWORD[((64+256))+rsp] + and r12,rax + rorx r13,rax,41 + vpalignr ymm11,ymm3,ymm2,8 + rorx r15,rax,18 + lea r8,[r14*1+r8] + lea rdx,[r12*1+rdx] + vpsrlq ymm10,ymm8,1 + andn r12,rax,rcx + xor r13,r15 + rorx r14,rax,14 + vpaddq ymm6,ymm6,ymm11 + vpsrlq ymm11,ymm8,7 + lea rdx,[r12*1+rdx] + xor r13,r14 + mov r15,r8 + vpsllq ymm9,ymm8,56 + vpxor ymm8,ymm11,ymm10 + rorx r12,r8,39 + lea rdx,[r13*1+rdx] + xor r15,r9 + vpsrlq ymm10,ymm10,7 + vpxor ymm8,ymm8,ymm9 + rorx r14,r8,34 + rorx r13,r8,28 + lea r11,[rdx*1+r11] + vpsllq ymm9,ymm9,7 + vpxor ymm8,ymm8,ymm10 + and rdi,r15 + xor r14,r12 + xor rdi,r9 + vpsrlq ymm11,ymm5,6 + vpxor ymm8,ymm8,ymm9 + xor r14,r13 + lea rdx,[rdi*1+rdx] + mov r12,rax + vpsllq ymm10,ymm5,3 + vpaddq ymm6,ymm6,ymm8 + add rcx,QWORD[((72+256))+rsp] + and r12,r11 + rorx r13,r11,41 + vpsrlq ymm9,ymm5,19 + vpxor ymm11,ymm11,ymm10 + rorx rdi,r11,18 + lea rdx,[r14*1+rdx] + lea rcx,[r12*1+rcx] + vpsllq ymm10,ymm10,42 + vpxor ymm11,ymm11,ymm9 + andn r12,r11,rbx + xor r13,rdi + rorx r14,r11,14 + vpsrlq ymm9,ymm9,42 + vpxor ymm11,ymm11,ymm10 + lea rcx,[r12*1+rcx] + xor r13,r14 + mov rdi,rdx + vpxor ymm11,ymm11,ymm9 + rorx r12,rdx,39 + lea rcx,[r13*1+rcx] + xor rdi,r8 + vpaddq ymm6,ymm6,ymm11 + rorx r14,rdx,34 + rorx r13,rdx,28 + lea r10,[rcx*1+r10] + vpaddq ymm10,ymm6,YMMWORD[64+rbp] + and r15,rdi + xor r14,r12 + xor r15,r8 + xor r14,r13 + lea rcx,[r15*1+rcx] + mov r12,r11 + vmovdqa YMMWORD[64+rsp],ymm10 + vpalignr ymm8,ymm0,ymm7,8 + add rbx,QWORD[((96+256))+rsp] + and r12,r10 + rorx r13,r10,41 + vpalignr ymm11,ymm4,ymm3,8 + rorx r15,r10,18 + lea rcx,[r14*1+rcx] + lea rbx,[r12*1+rbx] + vpsrlq ymm10,ymm8,1 + andn r12,r10,rax + xor r13,r15 + rorx r14,r10,14 + vpaddq ymm7,ymm7,ymm11 + vpsrlq ymm11,ymm8,7 + lea rbx,[r12*1+rbx] + xor r13,r14 + mov r15,rcx + vpsllq ymm9,ymm8,56 + vpxor ymm8,ymm11,ymm10 + rorx r12,rcx,39 + lea rbx,[r13*1+rbx] + xor r15,rdx + vpsrlq ymm10,ymm10,7 + vpxor ymm8,ymm8,ymm9 + rorx r14,rcx,34 + rorx r13,rcx,28 + lea r9,[rbx*1+r9] + vpsllq ymm9,ymm9,7 + vpxor ymm8,ymm8,ymm10 + and rdi,r15 + xor r14,r12 + xor rdi,rdx + vpsrlq ymm11,ymm6,6 + vpxor ymm8,ymm8,ymm9 + xor r14,r13 + lea rbx,[rdi*1+rbx] + mov r12,r10 + vpsllq ymm10,ymm6,3 + vpaddq ymm7,ymm7,ymm8 + add rax,QWORD[((104+256))+rsp] + and r12,r9 + rorx r13,r9,41 + vpsrlq ymm9,ymm6,19 + vpxor ymm11,ymm11,ymm10 + rorx rdi,r9,18 + lea rbx,[r14*1+rbx] + lea rax,[r12*1+rax] + vpsllq ymm10,ymm10,42 + vpxor ymm11,ymm11,ymm9 + andn r12,r9,r11 + xor r13,rdi + rorx r14,r9,14 + vpsrlq ymm9,ymm9,42 + vpxor ymm11,ymm11,ymm10 + lea rax,[r12*1+rax] + xor r13,r14 + mov rdi,rbx + vpxor ymm11,ymm11,ymm9 + rorx r12,rbx,39 + lea rax,[r13*1+rax] + xor rdi,rcx + vpaddq ymm7,ymm7,ymm11 + rorx r14,rbx,34 + rorx r13,rbx,28 + lea r8,[rax*1+r8] + vpaddq ymm10,ymm7,YMMWORD[96+rbp] + and r15,rdi + xor r14,r12 + xor r15,rcx + xor r14,r13 + lea rax,[r15*1+rax] + mov r12,r9 + vmovdqa YMMWORD[96+rsp],ymm10 + lea rbp,[256+rbp] + cmp BYTE[((-121))+rbp],0 + jne NEAR $L$avx2_00_47 + add r11,QWORD[((0+128))+rsp] + and r12,r8 + rorx r13,r8,41 + rorx r15,r8,18 + lea rax,[r14*1+rax] + lea r11,[r12*1+r11] + andn r12,r8,r10 + xor r13,r15 + rorx r14,r8,14 + lea r11,[r12*1+r11] + xor r13,r14 + mov r15,rax + rorx r12,rax,39 + lea r11,[r13*1+r11] + xor r15,rbx + rorx r14,rax,34 + rorx r13,rax,28 + lea rdx,[r11*1+rdx] + and rdi,r15 + xor r14,r12 + xor rdi,rbx + xor r14,r13 + lea r11,[rdi*1+r11] + mov r12,r8 + add r10,QWORD[((8+128))+rsp] + and r12,rdx + rorx r13,rdx,41 + rorx rdi,rdx,18 + lea r11,[r14*1+r11] + lea r10,[r12*1+r10] + andn r12,rdx,r9 + xor r13,rdi + rorx r14,rdx,14 + lea r10,[r12*1+r10] + xor r13,r14 + mov rdi,r11 + rorx r12,r11,39 + lea r10,[r13*1+r10] + xor rdi,rax + rorx r14,r11,34 + rorx r13,r11,28 + lea rcx,[r10*1+rcx] + and r15,rdi + xor r14,r12 + xor r15,rax + xor r14,r13 + lea r10,[r15*1+r10] + mov r12,rdx + add r9,QWORD[((32+128))+rsp] + and r12,rcx + rorx r13,rcx,41 + rorx r15,rcx,18 + lea r10,[r14*1+r10] + lea r9,[r12*1+r9] + andn r12,rcx,r8 + xor r13,r15 + rorx r14,rcx,14 + lea r9,[r12*1+r9] + xor r13,r14 + mov r15,r10 + rorx r12,r10,39 + lea r9,[r13*1+r9] + xor r15,r11 + rorx r14,r10,34 + rorx r13,r10,28 + lea rbx,[r9*1+rbx] + and rdi,r15 + xor r14,r12 + xor rdi,r11 + xor r14,r13 + lea r9,[rdi*1+r9] + mov r12,rcx + add r8,QWORD[((40+128))+rsp] + and r12,rbx + rorx r13,rbx,41 + rorx rdi,rbx,18 + lea r9,[r14*1+r9] + lea r8,[r12*1+r8] + andn r12,rbx,rdx + xor r13,rdi + rorx r14,rbx,14 + lea r8,[r12*1+r8] + xor r13,r14 + mov rdi,r9 + rorx r12,r9,39 + lea r8,[r13*1+r8] + xor rdi,r10 + rorx r14,r9,34 + rorx r13,r9,28 + lea rax,[r8*1+rax] + and r15,rdi + xor r14,r12 + xor r15,r10 + xor r14,r13 + lea r8,[r15*1+r8] + mov r12,rbx + add rdx,QWORD[((64+128))+rsp] + and r12,rax + rorx r13,rax,41 + rorx r15,rax,18 + lea r8,[r14*1+r8] + lea rdx,[r12*1+rdx] + andn r12,rax,rcx + xor r13,r15 + rorx r14,rax,14 + lea rdx,[r12*1+rdx] + xor r13,r14 + mov r15,r8 + rorx r12,r8,39 + lea rdx,[r13*1+rdx] + xor r15,r9 + rorx r14,r8,34 + rorx r13,r8,28 + lea r11,[rdx*1+r11] + and rdi,r15 + xor r14,r12 + xor rdi,r9 + xor r14,r13 + lea rdx,[rdi*1+rdx] + mov r12,rax + add rcx,QWORD[((72+128))+rsp] + and r12,r11 + rorx r13,r11,41 + rorx rdi,r11,18 + lea rdx,[r14*1+rdx] + lea rcx,[r12*1+rcx] + andn r12,r11,rbx + xor r13,rdi + rorx r14,r11,14 + lea rcx,[r12*1+rcx] + xor r13,r14 + mov rdi,rdx + rorx r12,rdx,39 + lea rcx,[r13*1+rcx] + xor rdi,r8 + rorx r14,rdx,34 + rorx r13,rdx,28 + lea r10,[rcx*1+r10] + and r15,rdi + xor r14,r12 + xor r15,r8 + xor r14,r13 + lea rcx,[r15*1+rcx] + mov r12,r11 + add rbx,QWORD[((96+128))+rsp] + and r12,r10 + rorx r13,r10,41 + rorx r15,r10,18 + lea rcx,[r14*1+rcx] + lea rbx,[r12*1+rbx] + andn r12,r10,rax + xor r13,r15 + rorx r14,r10,14 + lea rbx,[r12*1+rbx] + xor r13,r14 + mov r15,rcx + rorx r12,rcx,39 + lea rbx,[r13*1+rbx] + xor r15,rdx + rorx r14,rcx,34 + rorx r13,rcx,28 + lea r9,[rbx*1+r9] + and rdi,r15 + xor r14,r12 + xor rdi,rdx + xor r14,r13 + lea rbx,[rdi*1+rbx] + mov r12,r10 + add rax,QWORD[((104+128))+rsp] + and r12,r9 + rorx r13,r9,41 + rorx rdi,r9,18 + lea rbx,[r14*1+rbx] + lea rax,[r12*1+rax] + andn r12,r9,r11 + xor r13,rdi + rorx r14,r9,14 + lea rax,[r12*1+rax] + xor r13,r14 + mov rdi,rbx + rorx r12,rbx,39 + lea rax,[r13*1+rax] + xor rdi,rcx + rorx r14,rbx,34 + rorx r13,rbx,28 + lea r8,[rax*1+r8] + and r15,rdi + xor r14,r12 + xor r15,rcx + xor r14,r13 + lea rax,[r15*1+rax] + mov r12,r9 + add r11,QWORD[rsp] + and r12,r8 + rorx r13,r8,41 + rorx r15,r8,18 + lea rax,[r14*1+rax] + lea r11,[r12*1+r11] + andn r12,r8,r10 + xor r13,r15 + rorx r14,r8,14 + lea r11,[r12*1+r11] + xor r13,r14 + mov r15,rax + rorx r12,rax,39 + lea r11,[r13*1+r11] + xor r15,rbx + rorx r14,rax,34 + rorx r13,rax,28 + lea rdx,[r11*1+rdx] + and rdi,r15 + xor r14,r12 + xor rdi,rbx + xor r14,r13 + lea r11,[rdi*1+r11] + mov r12,r8 + add r10,QWORD[8+rsp] + and r12,rdx + rorx r13,rdx,41 + rorx rdi,rdx,18 + lea r11,[r14*1+r11] + lea r10,[r12*1+r10] + andn r12,rdx,r9 + xor r13,rdi + rorx r14,rdx,14 + lea r10,[r12*1+r10] + xor r13,r14 + mov rdi,r11 + rorx r12,r11,39 + lea r10,[r13*1+r10] + xor rdi,rax + rorx r14,r11,34 + rorx r13,r11,28 + lea rcx,[r10*1+rcx] + and r15,rdi + xor r14,r12 + xor r15,rax + xor r14,r13 + lea r10,[r15*1+r10] + mov r12,rdx + add r9,QWORD[32+rsp] + and r12,rcx + rorx r13,rcx,41 + rorx r15,rcx,18 + lea r10,[r14*1+r10] + lea r9,[r12*1+r9] + andn r12,rcx,r8 + xor r13,r15 + rorx r14,rcx,14 + lea r9,[r12*1+r9] + xor r13,r14 + mov r15,r10 + rorx r12,r10,39 + lea r9,[r13*1+r9] + xor r15,r11 + rorx r14,r10,34 + rorx r13,r10,28 + lea rbx,[r9*1+rbx] + and rdi,r15 + xor r14,r12 + xor rdi,r11 + xor r14,r13 + lea r9,[rdi*1+r9] + mov r12,rcx + add r8,QWORD[40+rsp] + and r12,rbx + rorx r13,rbx,41 + rorx rdi,rbx,18 + lea r9,[r14*1+r9] + lea r8,[r12*1+r8] + andn r12,rbx,rdx + xor r13,rdi + rorx r14,rbx,14 + lea r8,[r12*1+r8] + xor r13,r14 + mov rdi,r9 + rorx r12,r9,39 + lea r8,[r13*1+r8] + xor rdi,r10 + rorx r14,r9,34 + rorx r13,r9,28 + lea rax,[r8*1+rax] + and r15,rdi + xor r14,r12 + xor r15,r10 + xor r14,r13 + lea r8,[r15*1+r8] + mov r12,rbx + add rdx,QWORD[64+rsp] + and r12,rax + rorx r13,rax,41 + rorx r15,rax,18 + lea r8,[r14*1+r8] + lea rdx,[r12*1+rdx] + andn r12,rax,rcx + xor r13,r15 + rorx r14,rax,14 + lea rdx,[r12*1+rdx] + xor r13,r14 + mov r15,r8 + rorx r12,r8,39 + lea rdx,[r13*1+rdx] + xor r15,r9 + rorx r14,r8,34 + rorx r13,r8,28 + lea r11,[rdx*1+r11] + and rdi,r15 + xor r14,r12 + xor rdi,r9 + xor r14,r13 + lea rdx,[rdi*1+rdx] + mov r12,rax + add rcx,QWORD[72+rsp] + and r12,r11 + rorx r13,r11,41 + rorx rdi,r11,18 + lea rdx,[r14*1+rdx] + lea rcx,[r12*1+rcx] + andn r12,r11,rbx + xor r13,rdi + rorx r14,r11,14 + lea rcx,[r12*1+rcx] + xor r13,r14 + mov rdi,rdx + rorx r12,rdx,39 + lea rcx,[r13*1+rcx] + xor rdi,r8 + rorx r14,rdx,34 + rorx r13,rdx,28 + lea r10,[rcx*1+r10] + and r15,rdi + xor r14,r12 + xor r15,r8 + xor r14,r13 + lea rcx,[r15*1+rcx] + mov r12,r11 + add rbx,QWORD[96+rsp] + and r12,r10 + rorx r13,r10,41 + rorx r15,r10,18 + lea rcx,[r14*1+rcx] + lea rbx,[r12*1+rbx] + andn r12,r10,rax + xor r13,r15 + rorx r14,r10,14 + lea rbx,[r12*1+rbx] + xor r13,r14 + mov r15,rcx + rorx r12,rcx,39 + lea rbx,[r13*1+rbx] + xor r15,rdx + rorx r14,rcx,34 + rorx r13,rcx,28 + lea r9,[rbx*1+r9] + and rdi,r15 + xor r14,r12 + xor rdi,rdx + xor r14,r13 + lea rbx,[rdi*1+rbx] + mov r12,r10 + add rax,QWORD[104+rsp] + and r12,r9 + rorx r13,r9,41 + rorx rdi,r9,18 + lea rbx,[r14*1+rbx] + lea rax,[r12*1+rax] + andn r12,r9,r11 + xor r13,rdi + rorx r14,r9,14 + lea rax,[r12*1+rax] + xor r13,r14 + mov rdi,rbx + rorx r12,rbx,39 + lea rax,[r13*1+rax] + xor rdi,rcx + rorx r14,rbx,34 + rorx r13,rbx,28 + lea r8,[rax*1+r8] + and r15,rdi + xor r14,r12 + xor r15,rcx + xor r14,r13 + lea rax,[r15*1+rax] + mov r12,r9 + mov rdi,QWORD[1280+rsp] + add rax,r14 + + lea rbp,[1152+rsp] + + add rax,QWORD[rdi] + add rbx,QWORD[8+rdi] + add rcx,QWORD[16+rdi] + add rdx,QWORD[24+rdi] + add r8,QWORD[32+rdi] + add r9,QWORD[40+rdi] + add r10,QWORD[48+rdi] + add r11,QWORD[56+rdi] + + mov QWORD[rdi],rax + mov QWORD[8+rdi],rbx + mov QWORD[16+rdi],rcx + mov QWORD[24+rdi],rdx + mov QWORD[32+rdi],r8 + mov QWORD[40+rdi],r9 + mov QWORD[48+rdi],r10 + mov QWORD[56+rdi],r11 + + cmp rsi,QWORD[144+rbp] + je NEAR $L$done_avx2 + + xor r14,r14 + mov rdi,rbx + xor rdi,rcx + mov r12,r9 + jmp NEAR $L$ower_avx2 +ALIGN 16 +$L$ower_avx2: + add r11,QWORD[((0+16))+rbp] + and r12,r8 + rorx r13,r8,41 + rorx r15,r8,18 + lea rax,[r14*1+rax] + lea r11,[r12*1+r11] + andn r12,r8,r10 + xor r13,r15 + rorx r14,r8,14 + lea r11,[r12*1+r11] + xor r13,r14 + mov r15,rax + rorx r12,rax,39 + lea r11,[r13*1+r11] + xor r15,rbx + rorx r14,rax,34 + rorx r13,rax,28 + lea rdx,[r11*1+rdx] + and rdi,r15 + xor r14,r12 + xor rdi,rbx + xor r14,r13 + lea r11,[rdi*1+r11] + mov r12,r8 + add r10,QWORD[((8+16))+rbp] + and r12,rdx + rorx r13,rdx,41 + rorx rdi,rdx,18 + lea r11,[r14*1+r11] + lea r10,[r12*1+r10] + andn r12,rdx,r9 + xor r13,rdi + rorx r14,rdx,14 + lea r10,[r12*1+r10] + xor r13,r14 + mov rdi,r11 + rorx r12,r11,39 + lea r10,[r13*1+r10] + xor rdi,rax + rorx r14,r11,34 + rorx r13,r11,28 + lea rcx,[r10*1+rcx] + and r15,rdi + xor r14,r12 + xor r15,rax + xor r14,r13 + lea r10,[r15*1+r10] + mov r12,rdx + add r9,QWORD[((32+16))+rbp] + and r12,rcx + rorx r13,rcx,41 + rorx r15,rcx,18 + lea r10,[r14*1+r10] + lea r9,[r12*1+r9] + andn r12,rcx,r8 + xor r13,r15 + rorx r14,rcx,14 + lea r9,[r12*1+r9] + xor r13,r14 + mov r15,r10 + rorx r12,r10,39 + lea r9,[r13*1+r9] + xor r15,r11 + rorx r14,r10,34 + rorx r13,r10,28 + lea rbx,[r9*1+rbx] + and rdi,r15 + xor r14,r12 + xor rdi,r11 + xor r14,r13 + lea r9,[rdi*1+r9] + mov r12,rcx + add r8,QWORD[((40+16))+rbp] + and r12,rbx + rorx r13,rbx,41 + rorx rdi,rbx,18 + lea r9,[r14*1+r9] + lea r8,[r12*1+r8] + andn r12,rbx,rdx + xor r13,rdi + rorx r14,rbx,14 + lea r8,[r12*1+r8] + xor r13,r14 + mov rdi,r9 + rorx r12,r9,39 + lea r8,[r13*1+r8] + xor rdi,r10 + rorx r14,r9,34 + rorx r13,r9,28 + lea rax,[r8*1+rax] + and r15,rdi + xor r14,r12 + xor r15,r10 + xor r14,r13 + lea r8,[r15*1+r8] + mov r12,rbx + add rdx,QWORD[((64+16))+rbp] + and r12,rax + rorx r13,rax,41 + rorx r15,rax,18 + lea r8,[r14*1+r8] + lea rdx,[r12*1+rdx] + andn r12,rax,rcx + xor r13,r15 + rorx r14,rax,14 + lea rdx,[r12*1+rdx] + xor r13,r14 + mov r15,r8 + rorx r12,r8,39 + lea rdx,[r13*1+rdx] + xor r15,r9 + rorx r14,r8,34 + rorx r13,r8,28 + lea r11,[rdx*1+r11] + and rdi,r15 + xor r14,r12 + xor rdi,r9 + xor r14,r13 + lea rdx,[rdi*1+rdx] + mov r12,rax + add rcx,QWORD[((72+16))+rbp] + and r12,r11 + rorx r13,r11,41 + rorx rdi,r11,18 + lea rdx,[r14*1+rdx] + lea rcx,[r12*1+rcx] + andn r12,r11,rbx + xor r13,rdi + rorx r14,r11,14 + lea rcx,[r12*1+rcx] + xor r13,r14 + mov rdi,rdx + rorx r12,rdx,39 + lea rcx,[r13*1+rcx] + xor rdi,r8 + rorx r14,rdx,34 + rorx r13,rdx,28 + lea r10,[rcx*1+r10] + and r15,rdi + xor r14,r12 + xor r15,r8 + xor r14,r13 + lea rcx,[r15*1+rcx] + mov r12,r11 + add rbx,QWORD[((96+16))+rbp] + and r12,r10 + rorx r13,r10,41 + rorx r15,r10,18 + lea rcx,[r14*1+rcx] + lea rbx,[r12*1+rbx] + andn r12,r10,rax + xor r13,r15 + rorx r14,r10,14 + lea rbx,[r12*1+rbx] + xor r13,r14 + mov r15,rcx + rorx r12,rcx,39 + lea rbx,[r13*1+rbx] + xor r15,rdx + rorx r14,rcx,34 + rorx r13,rcx,28 + lea r9,[rbx*1+r9] + and rdi,r15 + xor r14,r12 + xor rdi,rdx + xor r14,r13 + lea rbx,[rdi*1+rbx] + mov r12,r10 + add rax,QWORD[((104+16))+rbp] + and r12,r9 + rorx r13,r9,41 + rorx rdi,r9,18 + lea rbx,[r14*1+rbx] + lea rax,[r12*1+rax] + andn r12,r9,r11 + xor r13,rdi + rorx r14,r9,14 + lea rax,[r12*1+rax] + xor r13,r14 + mov rdi,rbx + rorx r12,rbx,39 + lea rax,[r13*1+rax] + xor rdi,rcx + rorx r14,rbx,34 + rorx r13,rbx,28 + lea r8,[rax*1+r8] + and r15,rdi + xor r14,r12 + xor r15,rcx + xor r14,r13 + lea rax,[r15*1+rax] + mov r12,r9 + lea rbp,[((-128))+rbp] + cmp rbp,rsp + jae NEAR $L$ower_avx2 + + mov rdi,QWORD[1280+rsp] + add rax,r14 + + lea rsp,[1152+rsp] + + + + add rax,QWORD[rdi] + add rbx,QWORD[8+rdi] + add rcx,QWORD[16+rdi] + add rdx,QWORD[24+rdi] + add r8,QWORD[32+rdi] + add r9,QWORD[40+rdi] + lea rsi,[256+rsi] + add r10,QWORD[48+rdi] + mov r12,rsi + add r11,QWORD[56+rdi] + cmp rsi,QWORD[((128+16))+rsp] + + mov QWORD[rdi],rax + cmove r12,rsp + mov QWORD[8+rdi],rbx + mov QWORD[16+rdi],rcx + mov QWORD[24+rdi],rdx + mov QWORD[32+rdi],r8 + mov QWORD[40+rdi],r9 + mov QWORD[48+rdi],r10 + mov QWORD[56+rdi],r11 + + jbe NEAR $L$oop_avx2 + lea rbp,[rsp] + + + + +$L$done_avx2: + mov rsi,QWORD[152+rbp] + + vzeroupper + movaps xmm6,XMMWORD[((128+32))+rbp] + movaps xmm7,XMMWORD[((128+48))+rbp] + movaps xmm8,XMMWORD[((128+64))+rbp] + movaps xmm9,XMMWORD[((128+80))+rbp] + movaps xmm10,XMMWORD[((128+96))+rbp] + movaps xmm11,XMMWORD[((128+112))+rbp] + mov r15,QWORD[((-48))+rsi] + + mov r14,QWORD[((-40))+rsi] + + mov r13,QWORD[((-32))+rsi] + + mov r12,QWORD[((-24))+rsi] + + mov rbp,QWORD[((-16))+rsi] + + mov rbx,QWORD[((-8))+rsi] + + lea rsp,[rsi] + +$L$epilogue_avx2: + mov rdi,QWORD[8+rsp] ;WIN64 epilogue + mov rsi,QWORD[16+rsp] + DB 0F3h,0C3h ;repret + +$L$SEH_end_sha512_block_data_order_avx2: +EXTERN __imp_RtlVirtualUnwind + +ALIGN 16 +se_handler: + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD[120+r8] + mov rbx,QWORD[248+r8] + + mov rsi,QWORD[8+r9] + mov r11,QWORD[56+r9] + + mov r10d,DWORD[r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jb NEAR $L$in_prologue + + mov rax,QWORD[152+r8] + + mov r10d,DWORD[4+r11] + lea r10,[r10*1+rsi] + cmp rbx,r10 + jae NEAR $L$in_prologue + lea r10,[$L$avx2_shortcut] + cmp rbx,r10 + jb NEAR $L$not_in_avx2 + + and rax,-256*8 + add rax,1152 +$L$not_in_avx2: + mov rsi,rax + mov rax,QWORD[((128+24))+rax] + + mov rbx,QWORD[((-8))+rax] + mov rbp,QWORD[((-16))+rax] + mov r12,QWORD[((-24))+rax] + mov r13,QWORD[((-32))+rax] + mov r14,QWORD[((-40))+rax] + mov r15,QWORD[((-48))+rax] + mov QWORD[144+r8],rbx + mov QWORD[160+r8],rbp + mov QWORD[216+r8],r12 + mov QWORD[224+r8],r13 + mov QWORD[232+r8],r14 + mov QWORD[240+r8],r15 + + lea r10,[$L$epilogue] + cmp rbx,r10 + jb NEAR $L$in_prologue + + lea rsi,[((128+32))+rsi] + lea rdi,[512+r8] + mov ecx,12 + DD 0xa548f3fc + +$L$in_prologue: + mov rdi,QWORD[8+rax] + mov rsi,QWORD[16+rax] + mov QWORD[152+r8],rax + mov QWORD[168+r8],rsi + mov QWORD[176+r8],rdi + + mov rdi,QWORD[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0xa548f3fc + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD[8+rsi] + mov r8,QWORD[rsi] + mov r9,QWORD[16+rsi] + mov r10,QWORD[40+rsi] + lea r11,[56+rsi] + lea r12,[24+rsi] + mov QWORD[32+rsp],r10 + mov QWORD[40+rsp],r11 + mov QWORD[48+rsp],r12 + mov QWORD[56+rsp],rcx + call QWORD[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret + +section .pdata rdata align=4 +ALIGN 4 + DD $L$SEH_begin_sha512_block_data_order wrt ..imagebase + DD $L$SEH_end_sha512_block_data_order wrt ..imagebase + DD $L$SEH_info_sha512_block_data_order wrt ..imagebase + DD $L$SEH_begin_sha512_block_data_order_xop wrt ..imagebase + DD $L$SEH_end_sha512_block_data_order_xop wrt ..imagebase + DD $L$SEH_info_sha512_block_data_order_xop wrt ..imagebase + DD $L$SEH_begin_sha512_block_data_order_avx wrt ..imagebase + DD $L$SEH_end_sha512_block_data_order_avx wrt ..imagebase + DD $L$SEH_info_sha512_block_data_order_avx wrt ..imagebase + DD $L$SEH_begin_sha512_block_data_order_avx2 wrt ..imagebase + DD $L$SEH_end_sha512_block_data_order_avx2 wrt ..imagebase + DD $L$SEH_info_sha512_block_data_order_avx2 wrt ..imagebase +section .xdata rdata align=8 +ALIGN 8 +$L$SEH_info_sha512_block_data_order: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$prologue wrt ..imagebase,$L$epilogue wrt ..imagebase +$L$SEH_info_sha512_block_data_order_xop: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$prologue_xop wrt ..imagebase,$L$epilogue_xop wrt ..imagebase +$L$SEH_info_sha512_block_data_order_avx: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase +$L$SEH_info_sha512_block_data_order_avx2: +DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase |