mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2024-05-18 16:28:07 +00:00
07207e71e9
* update go-structr => v0.2.0 * update readme * whoops, fix the link
1237 lines
23 KiB
ArmAsm
1237 lines
23 KiB
ArmAsm
// Code generated by command: go run gen.go -sse -out ../accum_vector_sse_amd64.s -pkg xxh3. DO NOT EDIT.
|
|
|
|
#include "textflag.h"
|
|
|
|
DATA prime_sse<>+0(SB)/4, $0x9e3779b1
|
|
DATA prime_sse<>+4(SB)/4, $0x9e3779b1
|
|
DATA prime_sse<>+8(SB)/4, $0x9e3779b1
|
|
DATA prime_sse<>+12(SB)/4, $0x9e3779b1
|
|
GLOBL prime_sse<>(SB), RODATA|NOPTR, $16
|
|
|
|
// func accumSSE(acc *[8]uint64, data *byte, key *byte, len uint64)
|
|
// Requires: SSE2
|
|
TEXT ·accumSSE(SB), NOSPLIT, $0-32
|
|
MOVQ acc+0(FP), AX
|
|
MOVQ data+8(FP), CX
|
|
MOVQ key+16(FP), DX
|
|
MOVQ key+16(FP), BX
|
|
MOVQ len+24(FP), SI
|
|
MOVOU (AX), X1
|
|
MOVOU 16(AX), X2
|
|
MOVOU 32(AX), X3
|
|
MOVOU 48(AX), X4
|
|
MOVOU prime_sse<>+0(SB), X0
|
|
|
|
accum_large:
|
|
CMPQ SI, $0x00000400
|
|
JLE accum
|
|
MOVOU (CX), X5
|
|
MOVOU (DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 16(CX), X5
|
|
MOVOU 16(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 32(CX), X5
|
|
MOVOU 32(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 48(CX), X5
|
|
MOVOU 48(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 64(CX), X5
|
|
MOVOU 8(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 80(CX), X5
|
|
MOVOU 24(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 96(CX), X5
|
|
MOVOU 40(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 112(CX), X5
|
|
MOVOU 56(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 128(CX), X5
|
|
MOVOU 16(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 144(CX), X5
|
|
MOVOU 32(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 160(CX), X5
|
|
MOVOU 48(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 176(CX), X5
|
|
MOVOU 64(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 192(CX), X5
|
|
MOVOU 24(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 208(CX), X5
|
|
MOVOU 40(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 224(CX), X5
|
|
MOVOU 56(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 240(CX), X5
|
|
MOVOU 72(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 256(CX), X5
|
|
MOVOU 32(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 272(CX), X5
|
|
MOVOU 48(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 288(CX), X5
|
|
MOVOU 64(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 304(CX), X5
|
|
MOVOU 80(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 320(CX), X5
|
|
MOVOU 40(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 336(CX), X5
|
|
MOVOU 56(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 352(CX), X5
|
|
MOVOU 72(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 368(CX), X5
|
|
MOVOU 88(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 384(CX), X5
|
|
MOVOU 48(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 400(CX), X5
|
|
MOVOU 64(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 416(CX), X5
|
|
MOVOU 80(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 432(CX), X5
|
|
MOVOU 96(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 448(CX), X5
|
|
MOVOU 56(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 464(CX), X5
|
|
MOVOU 72(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 480(CX), X5
|
|
MOVOU 88(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 496(CX), X5
|
|
MOVOU 104(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 512(CX), X5
|
|
MOVOU 64(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 528(CX), X5
|
|
MOVOU 80(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 544(CX), X5
|
|
MOVOU 96(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 560(CX), X5
|
|
MOVOU 112(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 576(CX), X5
|
|
MOVOU 72(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 592(CX), X5
|
|
MOVOU 88(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 608(CX), X5
|
|
MOVOU 104(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 624(CX), X5
|
|
MOVOU 120(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 640(CX), X5
|
|
MOVOU 80(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 656(CX), X5
|
|
MOVOU 96(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 672(CX), X5
|
|
MOVOU 112(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 688(CX), X5
|
|
MOVOU 128(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 704(CX), X5
|
|
MOVOU 88(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 720(CX), X5
|
|
MOVOU 104(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 736(CX), X5
|
|
MOVOU 120(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 752(CX), X5
|
|
MOVOU 136(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 768(CX), X5
|
|
MOVOU 96(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 784(CX), X5
|
|
MOVOU 112(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 800(CX), X5
|
|
MOVOU 128(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 816(CX), X5
|
|
MOVOU 144(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 832(CX), X5
|
|
MOVOU 104(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 848(CX), X5
|
|
MOVOU 120(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 864(CX), X5
|
|
MOVOU 136(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 880(CX), X5
|
|
MOVOU 152(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 896(CX), X5
|
|
MOVOU 112(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 912(CX), X5
|
|
MOVOU 128(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 928(CX), X5
|
|
MOVOU 144(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 944(CX), X5
|
|
MOVOU 160(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 960(CX), X5
|
|
MOVOU 120(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 976(CX), X5
|
|
MOVOU 136(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 992(CX), X5
|
|
MOVOU 152(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 1008(CX), X5
|
|
MOVOU 168(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
ADDQ $0x00000400, CX
|
|
SUBQ $0x00000400, SI
|
|
MOVOU X1, X5
|
|
PSRLQ $0x2f, X5
|
|
PXOR X5, X1
|
|
MOVOU 128(DX), X5
|
|
PXOR X5, X1
|
|
PSHUFD $0xf5, X1, X5
|
|
PMULULQ X0, X1
|
|
PMULULQ X0, X5
|
|
PSLLQ $0x20, X5
|
|
PADDQ X5, X1
|
|
MOVOU X2, X5
|
|
PSRLQ $0x2f, X5
|
|
PXOR X5, X2
|
|
MOVOU 144(DX), X5
|
|
PXOR X5, X2
|
|
PSHUFD $0xf5, X2, X5
|
|
PMULULQ X0, X2
|
|
PMULULQ X0, X5
|
|
PSLLQ $0x20, X5
|
|
PADDQ X5, X2
|
|
MOVOU X3, X5
|
|
PSRLQ $0x2f, X5
|
|
PXOR X5, X3
|
|
MOVOU 160(DX), X5
|
|
PXOR X5, X3
|
|
PSHUFD $0xf5, X3, X5
|
|
PMULULQ X0, X3
|
|
PMULULQ X0, X5
|
|
PSLLQ $0x20, X5
|
|
PADDQ X5, X3
|
|
MOVOU X4, X5
|
|
PSRLQ $0x2f, X5
|
|
PXOR X5, X4
|
|
MOVOU 176(DX), X5
|
|
PXOR X5, X4
|
|
PSHUFD $0xf5, X4, X5
|
|
PMULULQ X0, X4
|
|
PMULULQ X0, X5
|
|
PSLLQ $0x20, X5
|
|
PADDQ X5, X4
|
|
JMP accum_large
|
|
|
|
accum:
|
|
CMPQ SI, $0x40
|
|
JLE finalize
|
|
MOVOU (CX), X0
|
|
MOVOU (BX), X5
|
|
PXOR X0, X5
|
|
PSHUFD $0x31, X5, X6
|
|
PMULULQ X5, X6
|
|
PSHUFD $0x4e, X0, X0
|
|
PADDQ X0, X1
|
|
PADDQ X6, X1
|
|
MOVOU 16(CX), X0
|
|
MOVOU 16(BX), X5
|
|
PXOR X0, X5
|
|
PSHUFD $0x31, X5, X6
|
|
PMULULQ X5, X6
|
|
PSHUFD $0x4e, X0, X0
|
|
PADDQ X0, X2
|
|
PADDQ X6, X2
|
|
MOVOU 32(CX), X0
|
|
MOVOU 32(BX), X5
|
|
PXOR X0, X5
|
|
PSHUFD $0x31, X5, X6
|
|
PMULULQ X5, X6
|
|
PSHUFD $0x4e, X0, X0
|
|
PADDQ X0, X3
|
|
PADDQ X6, X3
|
|
MOVOU 48(CX), X0
|
|
MOVOU 48(BX), X5
|
|
PXOR X0, X5
|
|
PSHUFD $0x31, X5, X6
|
|
PMULULQ X5, X6
|
|
PSHUFD $0x4e, X0, X0
|
|
PADDQ X0, X4
|
|
PADDQ X6, X4
|
|
ADDQ $0x00000040, CX
|
|
SUBQ $0x00000040, SI
|
|
ADDQ $0x00000008, BX
|
|
JMP accum
|
|
|
|
finalize:
|
|
CMPQ SI, $0x00
|
|
JE return
|
|
SUBQ $0x40, CX
|
|
ADDQ SI, CX
|
|
MOVOU (CX), X0
|
|
MOVOU 121(DX), X5
|
|
PXOR X0, X5
|
|
PSHUFD $0x31, X5, X6
|
|
PMULULQ X5, X6
|
|
PSHUFD $0x4e, X0, X0
|
|
PADDQ X0, X1
|
|
PADDQ X6, X1
|
|
MOVOU 16(CX), X0
|
|
MOVOU 137(DX), X5
|
|
PXOR X0, X5
|
|
PSHUFD $0x31, X5, X6
|
|
PMULULQ X5, X6
|
|
PSHUFD $0x4e, X0, X0
|
|
PADDQ X0, X2
|
|
PADDQ X6, X2
|
|
MOVOU 32(CX), X0
|
|
MOVOU 153(DX), X5
|
|
PXOR X0, X5
|
|
PSHUFD $0x31, X5, X6
|
|
PMULULQ X5, X6
|
|
PSHUFD $0x4e, X0, X0
|
|
PADDQ X0, X3
|
|
PADDQ X6, X3
|
|
MOVOU 48(CX), X0
|
|
MOVOU 169(DX), X5
|
|
PXOR X0, X5
|
|
PSHUFD $0x31, X5, X6
|
|
PMULULQ X5, X6
|
|
PSHUFD $0x4e, X0, X0
|
|
PADDQ X0, X4
|
|
PADDQ X6, X4
|
|
|
|
return:
|
|
MOVOU X1, (AX)
|
|
MOVOU X2, 16(AX)
|
|
MOVOU X3, 32(AX)
|
|
MOVOU X4, 48(AX)
|
|
RET
|
|
|
|
// func accumBlockSSE(acc *[8]uint64, data *byte, key *byte)
|
|
// Requires: SSE2
|
|
TEXT ·accumBlockSSE(SB), NOSPLIT, $0-24
|
|
MOVQ acc+0(FP), AX
|
|
MOVQ data+8(FP), CX
|
|
MOVQ key+16(FP), DX
|
|
MOVOU (AX), X1
|
|
MOVOU 16(AX), X2
|
|
MOVOU 32(AX), X3
|
|
MOVOU 48(AX), X4
|
|
MOVOU prime_sse<>+0(SB), X0
|
|
MOVOU (CX), X5
|
|
MOVOU (DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 16(CX), X5
|
|
MOVOU 16(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 32(CX), X5
|
|
MOVOU 32(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 48(CX), X5
|
|
MOVOU 48(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 64(CX), X5
|
|
MOVOU 8(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 80(CX), X5
|
|
MOVOU 24(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 96(CX), X5
|
|
MOVOU 40(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 112(CX), X5
|
|
MOVOU 56(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 128(CX), X5
|
|
MOVOU 16(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 144(CX), X5
|
|
MOVOU 32(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 160(CX), X5
|
|
MOVOU 48(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 176(CX), X5
|
|
MOVOU 64(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 192(CX), X5
|
|
MOVOU 24(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 208(CX), X5
|
|
MOVOU 40(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 224(CX), X5
|
|
MOVOU 56(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 240(CX), X5
|
|
MOVOU 72(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 256(CX), X5
|
|
MOVOU 32(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 272(CX), X5
|
|
MOVOU 48(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 288(CX), X5
|
|
MOVOU 64(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 304(CX), X5
|
|
MOVOU 80(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 320(CX), X5
|
|
MOVOU 40(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 336(CX), X5
|
|
MOVOU 56(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 352(CX), X5
|
|
MOVOU 72(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 368(CX), X5
|
|
MOVOU 88(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 384(CX), X5
|
|
MOVOU 48(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 400(CX), X5
|
|
MOVOU 64(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 416(CX), X5
|
|
MOVOU 80(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 432(CX), X5
|
|
MOVOU 96(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 448(CX), X5
|
|
MOVOU 56(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 464(CX), X5
|
|
MOVOU 72(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 480(CX), X5
|
|
MOVOU 88(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 496(CX), X5
|
|
MOVOU 104(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 512(CX), X5
|
|
MOVOU 64(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 528(CX), X5
|
|
MOVOU 80(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 544(CX), X5
|
|
MOVOU 96(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 560(CX), X5
|
|
MOVOU 112(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 576(CX), X5
|
|
MOVOU 72(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 592(CX), X5
|
|
MOVOU 88(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 608(CX), X5
|
|
MOVOU 104(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 624(CX), X5
|
|
MOVOU 120(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 640(CX), X5
|
|
MOVOU 80(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 656(CX), X5
|
|
MOVOU 96(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 672(CX), X5
|
|
MOVOU 112(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 688(CX), X5
|
|
MOVOU 128(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 704(CX), X5
|
|
MOVOU 88(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 720(CX), X5
|
|
MOVOU 104(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 736(CX), X5
|
|
MOVOU 120(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 752(CX), X5
|
|
MOVOU 136(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 768(CX), X5
|
|
MOVOU 96(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 784(CX), X5
|
|
MOVOU 112(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 800(CX), X5
|
|
MOVOU 128(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 816(CX), X5
|
|
MOVOU 144(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 832(CX), X5
|
|
MOVOU 104(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 848(CX), X5
|
|
MOVOU 120(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 864(CX), X5
|
|
MOVOU 136(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 880(CX), X5
|
|
MOVOU 152(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 896(CX), X5
|
|
MOVOU 112(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 912(CX), X5
|
|
MOVOU 128(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 928(CX), X5
|
|
MOVOU 144(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 944(CX), X5
|
|
MOVOU 160(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU 960(CX), X5
|
|
MOVOU 120(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X1
|
|
PADDQ X7, X1
|
|
MOVOU 976(CX), X5
|
|
MOVOU 136(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X2
|
|
PADDQ X7, X2
|
|
MOVOU 992(CX), X5
|
|
MOVOU 152(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X3
|
|
PADDQ X7, X3
|
|
MOVOU 1008(CX), X5
|
|
MOVOU 168(DX), X6
|
|
PXOR X5, X6
|
|
PSHUFD $0x31, X6, X7
|
|
PMULULQ X6, X7
|
|
PSHUFD $0x4e, X5, X5
|
|
PADDQ X5, X4
|
|
PADDQ X7, X4
|
|
MOVOU X1, X5
|
|
PSRLQ $0x2f, X5
|
|
PXOR X5, X1
|
|
MOVOU 128(DX), X5
|
|
PXOR X5, X1
|
|
PSHUFD $0xf5, X1, X5
|
|
PMULULQ X0, X1
|
|
PMULULQ X0, X5
|
|
PSLLQ $0x20, X5
|
|
PADDQ X5, X1
|
|
MOVOU X2, X5
|
|
PSRLQ $0x2f, X5
|
|
PXOR X5, X2
|
|
MOVOU 144(DX), X5
|
|
PXOR X5, X2
|
|
PSHUFD $0xf5, X2, X5
|
|
PMULULQ X0, X2
|
|
PMULULQ X0, X5
|
|
PSLLQ $0x20, X5
|
|
PADDQ X5, X2
|
|
MOVOU X3, X5
|
|
PSRLQ $0x2f, X5
|
|
PXOR X5, X3
|
|
MOVOU 160(DX), X5
|
|
PXOR X5, X3
|
|
PSHUFD $0xf5, X3, X5
|
|
PMULULQ X0, X3
|
|
PMULULQ X0, X5
|
|
PSLLQ $0x20, X5
|
|
PADDQ X5, X3
|
|
MOVOU X4, X5
|
|
PSRLQ $0x2f, X5
|
|
PXOR X5, X4
|
|
MOVOU 176(DX), X5
|
|
PXOR X5, X4
|
|
PSHUFD $0xf5, X4, X5
|
|
PMULULQ X0, X4
|
|
PMULULQ X0, X5
|
|
PSLLQ $0x20, X5
|
|
PADDQ X5, X4
|
|
MOVOU X1, (AX)
|
|
MOVOU X2, 16(AX)
|
|
MOVOU X3, 32(AX)
|
|
MOVOU X4, 48(AX)
|
|
RET
|