gotosocial/vendor/github.com/zeebo/xxh3/accum_vector_sse_amd64.s
kim 07207e71e9
[performance] cache library performance enhancements (updates go-structr => v0.2.0) (#2575)
* update go-structr => v0.2.0

* update readme

* whoops, fix the link
2024-01-26 12:14:10 +00:00

1237 lines
23 KiB
ArmAsm

// Code generated by command: go run gen.go -sse -out ../accum_vector_sse_amd64.s -pkg xxh3. DO NOT EDIT.
#include "textflag.h"
DATA prime_sse<>+0(SB)/4, $0x9e3779b1
DATA prime_sse<>+4(SB)/4, $0x9e3779b1
DATA prime_sse<>+8(SB)/4, $0x9e3779b1
DATA prime_sse<>+12(SB)/4, $0x9e3779b1
GLOBL prime_sse<>(SB), RODATA|NOPTR, $16
// func accumSSE(acc *[8]uint64, data *byte, key *byte, len uint64)
// Requires: SSE2
TEXT ·accumSSE(SB), NOSPLIT, $0-32
MOVQ acc+0(FP), AX
MOVQ data+8(FP), CX
MOVQ key+16(FP), DX
MOVQ key+16(FP), BX
MOVQ len+24(FP), SI
MOVOU (AX), X1
MOVOU 16(AX), X2
MOVOU 32(AX), X3
MOVOU 48(AX), X4
MOVOU prime_sse<>+0(SB), X0
accum_large:
CMPQ SI, $0x00000400
JLE accum
MOVOU (CX), X5
MOVOU (DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 16(CX), X5
MOVOU 16(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 32(CX), X5
MOVOU 32(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 48(CX), X5
MOVOU 48(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 64(CX), X5
MOVOU 8(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 80(CX), X5
MOVOU 24(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 96(CX), X5
MOVOU 40(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 112(CX), X5
MOVOU 56(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 128(CX), X5
MOVOU 16(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 144(CX), X5
MOVOU 32(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 160(CX), X5
MOVOU 48(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 176(CX), X5
MOVOU 64(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 192(CX), X5
MOVOU 24(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 208(CX), X5
MOVOU 40(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 224(CX), X5
MOVOU 56(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 240(CX), X5
MOVOU 72(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 256(CX), X5
MOVOU 32(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 272(CX), X5
MOVOU 48(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 288(CX), X5
MOVOU 64(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 304(CX), X5
MOVOU 80(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 320(CX), X5
MOVOU 40(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 336(CX), X5
MOVOU 56(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 352(CX), X5
MOVOU 72(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 368(CX), X5
MOVOU 88(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 384(CX), X5
MOVOU 48(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 400(CX), X5
MOVOU 64(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 416(CX), X5
MOVOU 80(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 432(CX), X5
MOVOU 96(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 448(CX), X5
MOVOU 56(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 464(CX), X5
MOVOU 72(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 480(CX), X5
MOVOU 88(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 496(CX), X5
MOVOU 104(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 512(CX), X5
MOVOU 64(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 528(CX), X5
MOVOU 80(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 544(CX), X5
MOVOU 96(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 560(CX), X5
MOVOU 112(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 576(CX), X5
MOVOU 72(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 592(CX), X5
MOVOU 88(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 608(CX), X5
MOVOU 104(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 624(CX), X5
MOVOU 120(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 640(CX), X5
MOVOU 80(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 656(CX), X5
MOVOU 96(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 672(CX), X5
MOVOU 112(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 688(CX), X5
MOVOU 128(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 704(CX), X5
MOVOU 88(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 720(CX), X5
MOVOU 104(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 736(CX), X5
MOVOU 120(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 752(CX), X5
MOVOU 136(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 768(CX), X5
MOVOU 96(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 784(CX), X5
MOVOU 112(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 800(CX), X5
MOVOU 128(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 816(CX), X5
MOVOU 144(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 832(CX), X5
MOVOU 104(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 848(CX), X5
MOVOU 120(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 864(CX), X5
MOVOU 136(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 880(CX), X5
MOVOU 152(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 896(CX), X5
MOVOU 112(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 912(CX), X5
MOVOU 128(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 928(CX), X5
MOVOU 144(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 944(CX), X5
MOVOU 160(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 960(CX), X5
MOVOU 120(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 976(CX), X5
MOVOU 136(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 992(CX), X5
MOVOU 152(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 1008(CX), X5
MOVOU 168(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
ADDQ $0x00000400, CX
SUBQ $0x00000400, SI
MOVOU X1, X5
PSRLQ $0x2f, X5
PXOR X5, X1
MOVOU 128(DX), X5
PXOR X5, X1
PSHUFD $0xf5, X1, X5
PMULULQ X0, X1
PMULULQ X0, X5
PSLLQ $0x20, X5
PADDQ X5, X1
MOVOU X2, X5
PSRLQ $0x2f, X5
PXOR X5, X2
MOVOU 144(DX), X5
PXOR X5, X2
PSHUFD $0xf5, X2, X5
PMULULQ X0, X2
PMULULQ X0, X5
PSLLQ $0x20, X5
PADDQ X5, X2
MOVOU X3, X5
PSRLQ $0x2f, X5
PXOR X5, X3
MOVOU 160(DX), X5
PXOR X5, X3
PSHUFD $0xf5, X3, X5
PMULULQ X0, X3
PMULULQ X0, X5
PSLLQ $0x20, X5
PADDQ X5, X3
MOVOU X4, X5
PSRLQ $0x2f, X5
PXOR X5, X4
MOVOU 176(DX), X5
PXOR X5, X4
PSHUFD $0xf5, X4, X5
PMULULQ X0, X4
PMULULQ X0, X5
PSLLQ $0x20, X5
PADDQ X5, X4
JMP accum_large
accum:
CMPQ SI, $0x40
JLE finalize
MOVOU (CX), X0
MOVOU (BX), X5
PXOR X0, X5
PSHUFD $0x31, X5, X6
PMULULQ X5, X6
PSHUFD $0x4e, X0, X0
PADDQ X0, X1
PADDQ X6, X1
MOVOU 16(CX), X0
MOVOU 16(BX), X5
PXOR X0, X5
PSHUFD $0x31, X5, X6
PMULULQ X5, X6
PSHUFD $0x4e, X0, X0
PADDQ X0, X2
PADDQ X6, X2
MOVOU 32(CX), X0
MOVOU 32(BX), X5
PXOR X0, X5
PSHUFD $0x31, X5, X6
PMULULQ X5, X6
PSHUFD $0x4e, X0, X0
PADDQ X0, X3
PADDQ X6, X3
MOVOU 48(CX), X0
MOVOU 48(BX), X5
PXOR X0, X5
PSHUFD $0x31, X5, X6
PMULULQ X5, X6
PSHUFD $0x4e, X0, X0
PADDQ X0, X4
PADDQ X6, X4
ADDQ $0x00000040, CX
SUBQ $0x00000040, SI
ADDQ $0x00000008, BX
JMP accum
finalize:
CMPQ SI, $0x00
JE return
SUBQ $0x40, CX
ADDQ SI, CX
MOVOU (CX), X0
MOVOU 121(DX), X5
PXOR X0, X5
PSHUFD $0x31, X5, X6
PMULULQ X5, X6
PSHUFD $0x4e, X0, X0
PADDQ X0, X1
PADDQ X6, X1
MOVOU 16(CX), X0
MOVOU 137(DX), X5
PXOR X0, X5
PSHUFD $0x31, X5, X6
PMULULQ X5, X6
PSHUFD $0x4e, X0, X0
PADDQ X0, X2
PADDQ X6, X2
MOVOU 32(CX), X0
MOVOU 153(DX), X5
PXOR X0, X5
PSHUFD $0x31, X5, X6
PMULULQ X5, X6
PSHUFD $0x4e, X0, X0
PADDQ X0, X3
PADDQ X6, X3
MOVOU 48(CX), X0
MOVOU 169(DX), X5
PXOR X0, X5
PSHUFD $0x31, X5, X6
PMULULQ X5, X6
PSHUFD $0x4e, X0, X0
PADDQ X0, X4
PADDQ X6, X4
return:
MOVOU X1, (AX)
MOVOU X2, 16(AX)
MOVOU X3, 32(AX)
MOVOU X4, 48(AX)
RET
// func accumBlockSSE(acc *[8]uint64, data *byte, key *byte)
// Requires: SSE2
TEXT ·accumBlockSSE(SB), NOSPLIT, $0-24
MOVQ acc+0(FP), AX
MOVQ data+8(FP), CX
MOVQ key+16(FP), DX
MOVOU (AX), X1
MOVOU 16(AX), X2
MOVOU 32(AX), X3
MOVOU 48(AX), X4
MOVOU prime_sse<>+0(SB), X0
MOVOU (CX), X5
MOVOU (DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 16(CX), X5
MOVOU 16(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 32(CX), X5
MOVOU 32(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 48(CX), X5
MOVOU 48(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 64(CX), X5
MOVOU 8(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 80(CX), X5
MOVOU 24(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 96(CX), X5
MOVOU 40(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 112(CX), X5
MOVOU 56(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 128(CX), X5
MOVOU 16(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 144(CX), X5
MOVOU 32(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 160(CX), X5
MOVOU 48(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 176(CX), X5
MOVOU 64(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 192(CX), X5
MOVOU 24(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 208(CX), X5
MOVOU 40(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 224(CX), X5
MOVOU 56(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 240(CX), X5
MOVOU 72(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 256(CX), X5
MOVOU 32(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 272(CX), X5
MOVOU 48(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 288(CX), X5
MOVOU 64(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 304(CX), X5
MOVOU 80(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 320(CX), X5
MOVOU 40(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 336(CX), X5
MOVOU 56(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 352(CX), X5
MOVOU 72(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 368(CX), X5
MOVOU 88(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 384(CX), X5
MOVOU 48(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 400(CX), X5
MOVOU 64(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 416(CX), X5
MOVOU 80(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 432(CX), X5
MOVOU 96(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 448(CX), X5
MOVOU 56(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 464(CX), X5
MOVOU 72(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 480(CX), X5
MOVOU 88(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 496(CX), X5
MOVOU 104(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 512(CX), X5
MOVOU 64(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 528(CX), X5
MOVOU 80(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 544(CX), X5
MOVOU 96(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 560(CX), X5
MOVOU 112(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 576(CX), X5
MOVOU 72(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 592(CX), X5
MOVOU 88(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 608(CX), X5
MOVOU 104(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 624(CX), X5
MOVOU 120(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 640(CX), X5
MOVOU 80(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 656(CX), X5
MOVOU 96(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 672(CX), X5
MOVOU 112(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 688(CX), X5
MOVOU 128(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 704(CX), X5
MOVOU 88(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 720(CX), X5
MOVOU 104(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 736(CX), X5
MOVOU 120(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 752(CX), X5
MOVOU 136(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 768(CX), X5
MOVOU 96(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 784(CX), X5
MOVOU 112(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 800(CX), X5
MOVOU 128(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 816(CX), X5
MOVOU 144(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 832(CX), X5
MOVOU 104(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 848(CX), X5
MOVOU 120(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 864(CX), X5
MOVOU 136(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 880(CX), X5
MOVOU 152(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 896(CX), X5
MOVOU 112(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 912(CX), X5
MOVOU 128(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 928(CX), X5
MOVOU 144(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 944(CX), X5
MOVOU 160(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU 960(CX), X5
MOVOU 120(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X1
PADDQ X7, X1
MOVOU 976(CX), X5
MOVOU 136(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X2
PADDQ X7, X2
MOVOU 992(CX), X5
MOVOU 152(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X3
PADDQ X7, X3
MOVOU 1008(CX), X5
MOVOU 168(DX), X6
PXOR X5, X6
PSHUFD $0x31, X6, X7
PMULULQ X6, X7
PSHUFD $0x4e, X5, X5
PADDQ X5, X4
PADDQ X7, X4
MOVOU X1, X5
PSRLQ $0x2f, X5
PXOR X5, X1
MOVOU 128(DX), X5
PXOR X5, X1
PSHUFD $0xf5, X1, X5
PMULULQ X0, X1
PMULULQ X0, X5
PSLLQ $0x20, X5
PADDQ X5, X1
MOVOU X2, X5
PSRLQ $0x2f, X5
PXOR X5, X2
MOVOU 144(DX), X5
PXOR X5, X2
PSHUFD $0xf5, X2, X5
PMULULQ X0, X2
PMULULQ X0, X5
PSLLQ $0x20, X5
PADDQ X5, X2
MOVOU X3, X5
PSRLQ $0x2f, X5
PXOR X5, X3
MOVOU 160(DX), X5
PXOR X5, X3
PSHUFD $0xf5, X3, X5
PMULULQ X0, X3
PMULULQ X0, X5
PSLLQ $0x20, X5
PADDQ X5, X3
MOVOU X4, X5
PSRLQ $0x2f, X5
PXOR X5, X4
MOVOU 176(DX), X5
PXOR X5, X4
PSHUFD $0xf5, X4, X5
PMULULQ X0, X4
PMULULQ X0, X5
PSLLQ $0x20, X5
PADDQ X5, X4
MOVOU X1, (AX)
MOVOU X2, 16(AX)
MOVOU X3, 32(AX)
MOVOU X4, 48(AX)
RET