gotosocial/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
Dominik Süß 9d0df426da
[feature] S3 support (#674)
* feat: vendor minio client

* feat: introduce storage package with s3 support

* feat: serve s3 files directly

this saves a lot of bandwith as the files are fetched from the object
store directly

* fix: use explicit local storage in tests

* feat: integrate s3 storage with the main server

* fix: add s3 config to cli tests

* docs: explicitly set values in example config

also adds license header to the storage package

* fix: use better http status code on s3 redirect

HTTP 302 Found is the best fit, as it signifies that the resource
requested was found but not under its presumed URL

307/TemporaryRedirect would mean that this resource is usually located
here, not in this case

303/SeeOther indicates that the redirection does not link to the
requested resource but to another page

* refactor: use context in storage driver interface
2022-07-03 12:08:30 +02:00

15678 lines
393 KiB
ArmAsm

// Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT.
// +build !appengine
// +build !noasm
// +build gc
#include "textflag.h"
// func encodeBlockAsm(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeBlockAsm(SB), $65560-56
MOVQ dst_base+0(FP), AX
MOVQ $0x00000200, CX
LEAQ 24(SP), DX
PXOR X0, X0
zero_loop_encodeBlockAsm:
MOVOU X0, (DX)
MOVOU X0, 16(DX)
MOVOU X0, 32(DX)
MOVOU X0, 48(DX)
MOVOU X0, 64(DX)
MOVOU X0, 80(DX)
MOVOU X0, 96(DX)
MOVOU X0, 112(DX)
ADDQ $0x80, DX
DECQ CX
JNZ zero_loop_encodeBlockAsm
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), CX
LEAQ -9(CX), DX
LEAQ -8(CX), SI
MOVL SI, 8(SP)
SHRQ $0x05, CX
SUBL CX, DX
LEAQ (AX)(DX*1), DX
MOVQ DX, (SP)
MOVL $0x00000001, CX
MOVL CX, 16(SP)
MOVQ src_base+24(FP), DX
search_loop_encodeBlockAsm:
MOVL CX, SI
SUBL 12(SP), SI
SHRL $0x06, SI
LEAL 4(CX)(SI*1), SI
CMPL SI, 8(SP)
JGE emit_remainder_encodeBlockAsm
MOVQ (DX)(CX*1), DI
MOVL SI, 20(SP)
MOVQ $0x0000cf1bbcdcbf9b, R9
MOVQ DI, R10
MOVQ DI, R11
SHRQ $0x08, R11
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x32, R10
SHLQ $0x10, R11
IMULQ R9, R11
SHRQ $0x32, R11
MOVL 24(SP)(R10*4), SI
MOVL 24(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
LEAL 1(CX), R10
MOVL R10, 24(SP)(R11*4)
MOVQ DI, R10
SHRQ $0x10, R10
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x32, R10
MOVL CX, R9
SUBL 16(SP), R9
MOVL 1(DX)(R9*1), R11
MOVQ DI, R9
SHRQ $0x08, R9
CMPL R9, R11
JNE no_repeat_found_encodeBlockAsm
LEAL 1(CX), DI
MOVL 12(SP), R8
MOVL DI, SI
SUBL 16(SP), SI
JZ repeat_extend_back_end_encodeBlockAsm
repeat_extend_back_loop_encodeBlockAsm:
CMPL DI, R8
JLE repeat_extend_back_end_encodeBlockAsm
MOVB -1(DX)(SI*1), BL
MOVB -1(DX)(DI*1), R9
CMPB BL, R9
JNE repeat_extend_back_end_encodeBlockAsm
LEAL -1(DI), DI
DECL SI
JNZ repeat_extend_back_loop_encodeBlockAsm
repeat_extend_back_end_encodeBlockAsm:
MOVL 12(SP), SI
CMPL SI, DI
JEQ emit_literal_done_repeat_emit_encodeBlockAsm
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(SI*1), R10
SUBL SI, R9
LEAL -1(R9), SI
CMPL SI, $0x3c
JLT one_byte_repeat_emit_encodeBlockAsm
CMPL SI, $0x00000100
JLT two_bytes_repeat_emit_encodeBlockAsm
CMPL SI, $0x00010000
JLT three_bytes_repeat_emit_encodeBlockAsm
CMPL SI, $0x01000000
JLT four_bytes_repeat_emit_encodeBlockAsm
MOVB $0xfc, (AX)
MOVL SI, 1(AX)
ADDQ $0x05, AX
JMP memmove_long_repeat_emit_encodeBlockAsm
four_bytes_repeat_emit_encodeBlockAsm:
MOVL SI, R11
SHRL $0x10, R11
MOVB $0xf8, (AX)
MOVW SI, 1(AX)
MOVB R11, 3(AX)
ADDQ $0x04, AX
JMP memmove_long_repeat_emit_encodeBlockAsm
three_bytes_repeat_emit_encodeBlockAsm:
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_repeat_emit_encodeBlockAsm
two_bytes_repeat_emit_encodeBlockAsm:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, AX
CMPL SI, $0x40
JL memmove_repeat_emit_encodeBlockAsm
JMP memmove_long_repeat_emit_encodeBlockAsm
one_byte_repeat_emit_encodeBlockAsm:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, AX
memmove_repeat_emit_encodeBlockAsm:
LEAQ (AX)(R9*1), SI
// genMemMoveShort
CMPQ R9, $0x08
JLE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8
CMPQ R9, $0x10
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8:
MOVQ (R10), R11
MOVQ R11, (AX)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16:
MOVQ (R10), R11
MOVQ -8(R10)(R9*1), R10
MOVQ R11, (AX)
MOVQ R10, -8(AX)(R9*1)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32:
MOVOU (R10), X0
MOVOU -16(R10)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm
emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64:
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_repeat_emit_encodeBlockAsm:
MOVQ SI, AX
JMP emit_literal_done_repeat_emit_encodeBlockAsm
memmove_long_repeat_emit_encodeBlockAsm:
LEAQ (AX)(R9*1), SI
// genMemMoveLong
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVQ R9, R12
SHRQ $0x05, R12
MOVQ AX, R11
ANDL $0x0000001f, R11
MOVQ $0x00000040, R13
SUBQ R11, R13
DECQ R12
JA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32
LEAQ -32(R10)(R13*1), R11
LEAQ -32(AX)(R13*1), R14
emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back:
MOVOU (R11), X4
MOVOU 16(R11), X5
MOVOA X4, (R14)
MOVOA X5, 16(R14)
ADDQ $0x20, R14
ADDQ $0x20, R11
ADDQ $0x20, R13
DECQ R12
JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back
emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32:
MOVOU -32(R10)(R13*1), X4
MOVOU -16(R10)(R13*1), X5
MOVOA X4, -32(AX)(R13*1)
MOVOA X5, -16(AX)(R13*1)
ADDQ $0x20, R13
CMPQ R9, R13
JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ SI, AX
emit_literal_done_repeat_emit_encodeBlockAsm:
ADDL $0x05, CX
MOVL CX, SI
SUBL 16(SP), SI
MOVQ src_len+32(FP), R9
SUBL CX, R9
LEAQ (DX)(CX*1), R10
LEAQ (DX)(SI*1), SI
// matchLen
XORL R12, R12
CMPL R9, $0x08
JL matchlen_single_repeat_extend_encodeBlockAsm
matchlen_loopback_repeat_extend_encodeBlockAsm:
MOVQ (R10)(R12*1), R11
XORQ (SI)(R12*1), R11
TESTQ R11, R11
JZ matchlen_loop_repeat_extend_encodeBlockAsm
BSFQ R11, R11
SARQ $0x03, R11
LEAL (R12)(R11*1), R12
JMP repeat_extend_forward_end_encodeBlockAsm
matchlen_loop_repeat_extend_encodeBlockAsm:
LEAL -8(R9), R9
LEAL 8(R12), R12
CMPL R9, $0x08
JGE matchlen_loopback_repeat_extend_encodeBlockAsm
matchlen_single_repeat_extend_encodeBlockAsm:
TESTL R9, R9
JZ repeat_extend_forward_end_encodeBlockAsm
matchlen_single_loopback_repeat_extend_encodeBlockAsm:
MOVB (R10)(R12*1), R11
CMPB (SI)(R12*1), R11
JNE repeat_extend_forward_end_encodeBlockAsm
LEAL 1(R12), R12
DECL R9
JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm
repeat_extend_forward_end_encodeBlockAsm:
ADDL R12, CX
MOVL CX, SI
SUBL DI, SI
MOVL 16(SP), DI
TESTL R8, R8
JZ repeat_as_copy_encodeBlockAsm
// emitRepeat
emit_repeat_again_match_repeat_encodeBlockAsm:
MOVL SI, R8
LEAL -4(SI), SI
CMPL R8, $0x08
JLE repeat_two_match_repeat_encodeBlockAsm
CMPL R8, $0x0c
JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm
CMPL DI, $0x00000800
JLT repeat_two_offset_match_repeat_encodeBlockAsm
cant_repeat_two_offset_match_repeat_encodeBlockAsm:
CMPL SI, $0x00000104
JLT repeat_three_match_repeat_encodeBlockAsm
CMPL SI, $0x00010100
JLT repeat_four_match_repeat_encodeBlockAsm
CMPL SI, $0x0100ffff
JLT repeat_five_match_repeat_encodeBlockAsm
LEAL -16842747(SI), SI
MOVW $0x001d, (AX)
MOVW $0xfffb, 2(AX)
MOVB $0xff, 4(AX)
ADDQ $0x05, AX
JMP emit_repeat_again_match_repeat_encodeBlockAsm
repeat_five_match_repeat_encodeBlockAsm:
LEAL -65536(SI), SI
MOVL SI, DI
MOVW $0x001d, (AX)
MOVW SI, 2(AX)
SARL $0x10, DI
MOVB DI, 4(AX)
ADDQ $0x05, AX
JMP repeat_end_emit_encodeBlockAsm
repeat_four_match_repeat_encodeBlockAsm:
LEAL -256(SI), SI
MOVW $0x0019, (AX)
MOVW SI, 2(AX)
ADDQ $0x04, AX
JMP repeat_end_emit_encodeBlockAsm
repeat_three_match_repeat_encodeBlockAsm:
LEAL -4(SI), SI
MOVW $0x0015, (AX)
MOVB SI, 2(AX)
ADDQ $0x03, AX
JMP repeat_end_emit_encodeBlockAsm
repeat_two_match_repeat_encodeBlockAsm:
SHLL $0x02, SI
ORL $0x01, SI
MOVW SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm
repeat_two_offset_match_repeat_encodeBlockAsm:
XORQ R8, R8
LEAL 1(R8)(SI*4), SI
MOVB DI, 1(AX)
SARL $0x08, DI
SHLL $0x05, DI
ORL DI, SI
MOVB SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm
repeat_as_copy_encodeBlockAsm:
// emitCopy
CMPL DI, $0x00010000
JL two_byte_offset_repeat_as_copy_encodeBlockAsm
four_bytes_loop_back_repeat_as_copy_encodeBlockAsm:
CMPL SI, $0x40
JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm
MOVB $0xff, (AX)
MOVL DI, 1(AX)
LEAL -64(SI), SI
ADDQ $0x05, AX
CMPL SI, $0x04
JL four_bytes_remain_repeat_as_copy_encodeBlockAsm
// emitRepeat
emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy:
MOVL SI, R8
LEAL -4(SI), SI
CMPL R8, $0x08
JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy
CMPL R8, $0x0c
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
CMPL DI, $0x00000800
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
CMPL SI, $0x00000104
JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy
CMPL SI, $0x00010100
JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy
CMPL SI, $0x0100ffff
JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy
LEAL -16842747(SI), SI
MOVW $0x001d, (AX)
MOVW $0xfffb, 2(AX)
MOVB $0xff, 4(AX)
ADDQ $0x05, AX
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy
repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy:
LEAL -65536(SI), SI
MOVL SI, DI
MOVW $0x001d, (AX)
MOVW SI, 2(AX)
SARL $0x10, DI
MOVB DI, 4(AX)
ADDQ $0x05, AX
JMP repeat_end_emit_encodeBlockAsm
repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy:
LEAL -256(SI), SI
MOVW $0x0019, (AX)
MOVW SI, 2(AX)
ADDQ $0x04, AX
JMP repeat_end_emit_encodeBlockAsm
repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy:
LEAL -4(SI), SI
MOVW $0x0015, (AX)
MOVB SI, 2(AX)
ADDQ $0x03, AX
JMP repeat_end_emit_encodeBlockAsm
repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy:
SHLL $0x02, SI
ORL $0x01, SI
MOVW SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm
repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
XORQ R8, R8
LEAL 1(R8)(SI*4), SI
MOVB DI, 1(AX)
SARL $0x08, DI
SHLL $0x05, DI
ORL DI, SI
MOVB SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm
JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm
four_bytes_remain_repeat_as_copy_encodeBlockAsm:
TESTL SI, SI
JZ repeat_end_emit_encodeBlockAsm
MOVB $0x03, BL
LEAL -4(BX)(SI*4), SI
MOVB SI, (AX)
MOVL DI, 1(AX)
ADDQ $0x05, AX
JMP repeat_end_emit_encodeBlockAsm
two_byte_offset_repeat_as_copy_encodeBlockAsm:
CMPL SI, $0x40
JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm
MOVB $0xee, (AX)
MOVW DI, 1(AX)
LEAL -60(SI), SI
ADDQ $0x03, AX
// emitRepeat
emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short:
MOVL SI, R8
LEAL -4(SI), SI
CMPL R8, $0x08
JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short
CMPL R8, $0x0c
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
CMPL DI, $0x00000800
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
CMPL SI, $0x00000104
JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short
CMPL SI, $0x00010100
JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short
CMPL SI, $0x0100ffff
JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short
LEAL -16842747(SI), SI
MOVW $0x001d, (AX)
MOVW $0xfffb, 2(AX)
MOVB $0xff, 4(AX)
ADDQ $0x05, AX
JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short
repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short:
LEAL -65536(SI), SI
MOVL SI, DI
MOVW $0x001d, (AX)
MOVW SI, 2(AX)
SARL $0x10, DI
MOVB DI, 4(AX)
ADDQ $0x05, AX
JMP repeat_end_emit_encodeBlockAsm
repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short:
LEAL -256(SI), SI
MOVW $0x0019, (AX)
MOVW SI, 2(AX)
ADDQ $0x04, AX
JMP repeat_end_emit_encodeBlockAsm
repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short:
LEAL -4(SI), SI
MOVW $0x0015, (AX)
MOVB SI, 2(AX)
ADDQ $0x03, AX
JMP repeat_end_emit_encodeBlockAsm
repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short:
SHLL $0x02, SI
ORL $0x01, SI
MOVW SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm
repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
XORQ R8, R8
LEAL 1(R8)(SI*4), SI
MOVB DI, 1(AX)
SARL $0x08, DI
SHLL $0x05, DI
ORL DI, SI
MOVB SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm
JMP two_byte_offset_repeat_as_copy_encodeBlockAsm
two_byte_offset_short_repeat_as_copy_encodeBlockAsm:
CMPL SI, $0x0c
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm
CMPL DI, $0x00000800
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm
MOVB $0x01, BL
LEAL -16(BX)(SI*4), SI
MOVB DI, 1(AX)
SHRL $0x08, DI
SHLL $0x05, DI
ORL DI, SI
MOVB SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm
emit_copy_three_repeat_as_copy_encodeBlockAsm:
MOVB $0x02, BL
LEAL -4(BX)(SI*4), SI
MOVB SI, (AX)
MOVW DI, 1(AX)
ADDQ $0x03, AX
repeat_end_emit_encodeBlockAsm:
MOVL CX, 12(SP)
JMP search_loop_encodeBlockAsm
no_repeat_found_encodeBlockAsm:
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeBlockAsm
SHRQ $0x08, DI
MOVL 24(SP)(R10*4), SI
LEAL 2(CX), R9
CMPL (DX)(R8*1), DI
JEQ candidate2_match_encodeBlockAsm
MOVL R9, 24(SP)(R10*4)
SHRQ $0x08, DI
CMPL (DX)(SI*1), DI
JEQ candidate3_match_encodeBlockAsm
MOVL 20(SP), CX
JMP search_loop_encodeBlockAsm
candidate3_match_encodeBlockAsm:
ADDL $0x02, CX
JMP candidate_match_encodeBlockAsm
candidate2_match_encodeBlockAsm:
MOVL R9, 24(SP)(R10*4)
INCL CX
MOVL R8, SI
candidate_match_encodeBlockAsm:
MOVL 12(SP), DI
TESTL SI, SI
JZ match_extend_back_end_encodeBlockAsm
match_extend_back_loop_encodeBlockAsm:
CMPL CX, DI
JLE match_extend_back_end_encodeBlockAsm
MOVB -1(DX)(SI*1), BL
MOVB -1(DX)(CX*1), R8
CMPB BL, R8
JNE match_extend_back_end_encodeBlockAsm
LEAL -1(CX), CX
DECL SI
JZ match_extend_back_end_encodeBlockAsm
JMP match_extend_back_loop_encodeBlockAsm
match_extend_back_end_encodeBlockAsm:
MOVL CX, DI
SUBL 12(SP), DI
LEAQ 5(AX)(DI*1), DI
CMPQ DI, (SP)
JL match_dst_size_check_encodeBlockAsm
MOVQ $0x00000000, ret+48(FP)
RET
match_dst_size_check_encodeBlockAsm:
MOVL CX, DI
MOVL 12(SP), R8
CMPL R8, DI
JEQ emit_literal_done_match_emit_encodeBlockAsm
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(R8*1), DI
SUBL R8, R9
LEAL -1(R9), R8
CMPL R8, $0x3c
JLT one_byte_match_emit_encodeBlockAsm
CMPL R8, $0x00000100
JLT two_bytes_match_emit_encodeBlockAsm
CMPL R8, $0x00010000
JLT three_bytes_match_emit_encodeBlockAsm
CMPL R8, $0x01000000
JLT four_bytes_match_emit_encodeBlockAsm
MOVB $0xfc, (AX)
MOVL R8, 1(AX)
ADDQ $0x05, AX
JMP memmove_long_match_emit_encodeBlockAsm
four_bytes_match_emit_encodeBlockAsm:
MOVL R8, R10
SHRL $0x10, R10
MOVB $0xf8, (AX)
MOVW R8, 1(AX)
MOVB R10, 3(AX)
ADDQ $0x04, AX
JMP memmove_long_match_emit_encodeBlockAsm
three_bytes_match_emit_encodeBlockAsm:
MOVB $0xf4, (AX)
MOVW R8, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_match_emit_encodeBlockAsm
two_bytes_match_emit_encodeBlockAsm:
MOVB $0xf0, (AX)
MOVB R8, 1(AX)
ADDQ $0x02, AX
CMPL R8, $0x40
JL memmove_match_emit_encodeBlockAsm
JMP memmove_long_match_emit_encodeBlockAsm
one_byte_match_emit_encodeBlockAsm:
SHLB $0x02, R8
MOVB R8, (AX)
ADDQ $0x01, AX
memmove_match_emit_encodeBlockAsm:
LEAQ (AX)(R9*1), R8
// genMemMoveShort
CMPQ R9, $0x08
JLE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8
CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8:
MOVQ (DI), R10
MOVQ R10, (AX)
JMP memmove_end_copy_match_emit_encodeBlockAsm
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16:
MOVQ (DI), R10
MOVQ -8(DI)(R9*1), DI
MOVQ R10, (AX)
MOVQ DI, -8(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBlockAsm
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBlockAsm
emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R9*1), X2
MOVOU -16(DI)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_match_emit_encodeBlockAsm:
MOVQ R8, AX
JMP emit_literal_done_match_emit_encodeBlockAsm
memmove_long_match_emit_encodeBlockAsm:
LEAQ (AX)(R9*1), R8
// genMemMoveLong
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R9*1), X2
MOVOU -16(DI)(R9*1), X3
MOVQ R9, R11
SHRQ $0x05, R11
MOVQ AX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R12
SUBQ R10, R12
DECQ R11
JA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32
LEAQ -32(DI)(R12*1), R10
LEAQ -32(AX)(R12*1), R13
emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R10
ADDQ $0x20, R12
DECQ R11
JNA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32:
MOVOU -32(DI)(R12*1), X4
MOVOU -16(DI)(R12*1), X5
MOVOA X4, -32(AX)(R12*1)
MOVOA X5, -16(AX)(R12*1)
ADDQ $0x20, R12
CMPQ R9, R12
JAE emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ R8, AX
emit_literal_done_match_emit_encodeBlockAsm:
match_nolit_loop_encodeBlockAsm:
MOVL CX, DI
SUBL SI, DI
MOVL DI, 16(SP)
ADDL $0x04, CX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL CX, DI
LEAQ (DX)(CX*1), R8
LEAQ (DX)(SI*1), SI
// matchLen
XORL R10, R10
CMPL DI, $0x08
JL matchlen_single_match_nolit_encodeBlockAsm
matchlen_loopback_match_nolit_encodeBlockAsm:
MOVQ (R8)(R10*1), R9
XORQ (SI)(R10*1), R9
TESTQ R9, R9
JZ matchlen_loop_match_nolit_encodeBlockAsm
BSFQ R9, R9
SARQ $0x03, R9
LEAL (R10)(R9*1), R10
JMP match_nolit_end_encodeBlockAsm
matchlen_loop_match_nolit_encodeBlockAsm:
LEAL -8(DI), DI
LEAL 8(R10), R10
CMPL DI, $0x08
JGE matchlen_loopback_match_nolit_encodeBlockAsm
matchlen_single_match_nolit_encodeBlockAsm:
TESTL DI, DI
JZ match_nolit_end_encodeBlockAsm
matchlen_single_loopback_match_nolit_encodeBlockAsm:
MOVB (R8)(R10*1), R9
CMPB (SI)(R10*1), R9
JNE match_nolit_end_encodeBlockAsm
LEAL 1(R10), R10
DECL DI
JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm
match_nolit_end_encodeBlockAsm:
ADDL R10, CX
MOVL 16(SP), SI
ADDL $0x04, R10
MOVL CX, 12(SP)
// emitCopy
CMPL SI, $0x00010000
JL two_byte_offset_match_nolit_encodeBlockAsm
four_bytes_loop_back_match_nolit_encodeBlockAsm:
CMPL R10, $0x40
JLE four_bytes_remain_match_nolit_encodeBlockAsm
MOVB $0xff, (AX)
MOVL SI, 1(AX)
LEAL -64(R10), R10
ADDQ $0x05, AX
CMPL R10, $0x04
JL four_bytes_remain_match_nolit_encodeBlockAsm
// emitRepeat
emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy:
MOVL R10, DI
LEAL -4(R10), R10
CMPL DI, $0x08
JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy
CMPL DI, $0x0c
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
CMPL SI, $0x00000800
JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
CMPL R10, $0x00000104
JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy
CMPL R10, $0x00010100
JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy
CMPL R10, $0x0100ffff
JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy
LEAL -16842747(R10), R10
MOVW $0x001d, (AX)
MOVW $0xfffb, 2(AX)
MOVB $0xff, 4(AX)
ADDQ $0x05, AX
JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy
repeat_five_match_nolit_encodeBlockAsm_emit_copy:
LEAL -65536(R10), R10
MOVL R10, SI
MOVW $0x001d, (AX)
MOVW R10, 2(AX)
SARL $0x10, SI
MOVB SI, 4(AX)
ADDQ $0x05, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_four_match_nolit_encodeBlockAsm_emit_copy:
LEAL -256(R10), R10
MOVW $0x0019, (AX)
MOVW R10, 2(AX)
ADDQ $0x04, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_three_match_nolit_encodeBlockAsm_emit_copy:
LEAL -4(R10), R10
MOVW $0x0015, (AX)
MOVB R10, 2(AX)
ADDQ $0x03, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_two_match_nolit_encodeBlockAsm_emit_copy:
SHLL $0x02, R10
ORL $0x01, R10
MOVW R10, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
XORQ DI, DI
LEAL 1(DI)(R10*4), R10
MOVB SI, 1(AX)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, R10
MOVB R10, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm
JMP four_bytes_loop_back_match_nolit_encodeBlockAsm
four_bytes_remain_match_nolit_encodeBlockAsm:
TESTL R10, R10
JZ match_nolit_emitcopy_end_encodeBlockAsm
MOVB $0x03, BL
LEAL -4(BX)(R10*4), R10
MOVB R10, (AX)
MOVL SI, 1(AX)
ADDQ $0x05, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm
two_byte_offset_match_nolit_encodeBlockAsm:
CMPL R10, $0x40
JLE two_byte_offset_short_match_nolit_encodeBlockAsm
MOVB $0xee, (AX)
MOVW SI, 1(AX)
LEAL -60(R10), R10
ADDQ $0x03, AX
// emitRepeat
emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short:
MOVL R10, DI
LEAL -4(R10), R10
CMPL DI, $0x08
JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short
CMPL DI, $0x0c
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
CMPL SI, $0x00000800
JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
CMPL R10, $0x00000104
JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy_short
CMPL R10, $0x00010100
JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy_short
CMPL R10, $0x0100ffff
JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy_short
LEAL -16842747(R10), R10
MOVW $0x001d, (AX)
MOVW $0xfffb, 2(AX)
MOVB $0xff, 4(AX)
ADDQ $0x05, AX
JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short
repeat_five_match_nolit_encodeBlockAsm_emit_copy_short:
LEAL -65536(R10), R10
MOVL R10, SI
MOVW $0x001d, (AX)
MOVW R10, 2(AX)
SARL $0x10, SI
MOVB SI, 4(AX)
ADDQ $0x05, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_four_match_nolit_encodeBlockAsm_emit_copy_short:
LEAL -256(R10), R10
MOVW $0x0019, (AX)
MOVW R10, 2(AX)
ADDQ $0x04, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_three_match_nolit_encodeBlockAsm_emit_copy_short:
LEAL -4(R10), R10
MOVW $0x0015, (AX)
MOVB R10, 2(AX)
ADDQ $0x03, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_two_match_nolit_encodeBlockAsm_emit_copy_short:
SHLL $0x02, R10
ORL $0x01, R10
MOVW R10, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm
repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
XORQ DI, DI
LEAL 1(DI)(R10*4), R10
MOVB SI, 1(AX)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, R10
MOVB R10, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm
JMP two_byte_offset_match_nolit_encodeBlockAsm
two_byte_offset_short_match_nolit_encodeBlockAsm:
CMPL R10, $0x0c
JGE emit_copy_three_match_nolit_encodeBlockAsm
CMPL SI, $0x00000800
JGE emit_copy_three_match_nolit_encodeBlockAsm
MOVB $0x01, BL
LEAL -16(BX)(R10*4), R10
MOVB SI, 1(AX)
SHRL $0x08, SI
SHLL $0x05, SI
ORL SI, R10
MOVB R10, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm
emit_copy_three_match_nolit_encodeBlockAsm:
MOVB $0x02, BL
LEAL -4(BX)(R10*4), R10
MOVB R10, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
match_nolit_emitcopy_end_encodeBlockAsm:
CMPL CX, 8(SP)
JGE emit_remainder_encodeBlockAsm
MOVQ -2(DX)(CX*1), DI
CMPQ AX, (SP)
JL match_nolit_dst_ok_encodeBlockAsm
MOVQ $0x00000000, ret+48(FP)
RET
match_nolit_dst_ok_encodeBlockAsm:
MOVQ $0x0000cf1bbcdcbf9b, R9
MOVQ DI, R8
SHRQ $0x10, DI
MOVQ DI, SI
SHLQ $0x10, R8
IMULQ R9, R8
SHRQ $0x32, R8
SHLQ $0x10, SI
IMULQ R9, SI
SHRQ $0x32, SI
LEAL -2(CX), R9
LEAQ 24(SP)(SI*4), R10
MOVL (R10), SI
MOVL R9, 24(SP)(R8*4)
MOVL CX, (R10)
CMPL (DX)(SI*1), DI
JEQ match_nolit_loop_encodeBlockAsm
INCL CX
JMP search_loop_encodeBlockAsm
emit_remainder_encodeBlockAsm:
MOVQ src_len+32(FP), CX
SUBL 12(SP), CX
LEAQ 5(AX)(CX*1), CX
CMPQ CX, (SP)
JL emit_remainder_ok_encodeBlockAsm
MOVQ $0x00000000, ret+48(FP)
RET
emit_remainder_ok_encodeBlockAsm:
MOVQ src_len+32(FP), CX
MOVL 12(SP), BX
CMPL BX, CX
JEQ emit_literal_done_emit_remainder_encodeBlockAsm
MOVL CX, SI
MOVL CX, 12(SP)
LEAQ (DX)(BX*1), CX
SUBL BX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JLT one_byte_emit_remainder_encodeBlockAsm
CMPL DX, $0x00000100
JLT two_bytes_emit_remainder_encodeBlockAsm
CMPL DX, $0x00010000
JLT three_bytes_emit_remainder_encodeBlockAsm
CMPL DX, $0x01000000
JLT four_bytes_emit_remainder_encodeBlockAsm
MOVB $0xfc, (AX)
MOVL DX, 1(AX)
ADDQ $0x05, AX
JMP memmove_long_emit_remainder_encodeBlockAsm
four_bytes_emit_remainder_encodeBlockAsm:
MOVL DX, BX
SHRL $0x10, BX
MOVB $0xf8, (AX)
MOVW DX, 1(AX)
MOVB BL, 3(AX)
ADDQ $0x04, AX
JMP memmove_long_emit_remainder_encodeBlockAsm
three_bytes_emit_remainder_encodeBlockAsm:
MOVB $0xf4, (AX)
MOVW DX, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_emit_remainder_encodeBlockAsm
two_bytes_emit_remainder_encodeBlockAsm:
MOVB $0xf0, (AX)
MOVB DL, 1(AX)
ADDQ $0x02, AX
CMPL DX, $0x40
JL memmove_emit_remainder_encodeBlockAsm
JMP memmove_long_emit_remainder_encodeBlockAsm
one_byte_emit_remainder_encodeBlockAsm:
SHLB $0x02, DL
MOVB DL, (AX)
ADDQ $0x01, AX
memmove_emit_remainder_encodeBlockAsm:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
CMPQ BX, $0x08
JLE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8:
MOVQ (CX), SI
MOVQ SI, (AX)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16:
MOVQ (CX), SI
MOVQ -8(CX)(BX*1), CX
MOVQ SI, (AX)
MOVQ CX, -8(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32:
MOVOU (CX), X0
MOVOU -16(CX)(BX*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm
emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
memmove_end_copy_emit_remainder_encodeBlockAsm:
MOVQ DX, AX
JMP emit_literal_done_emit_remainder_encodeBlockAsm
memmove_long_emit_remainder_encodeBlockAsm:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
MOVQ AX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32
LEAQ -32(CX)(R8*1), SI
LEAQ -32(AX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32:
MOVOU -32(CX)(R8*1), X4
MOVOU -16(CX)(R8*1), X5
MOVOA X4, -32(AX)(R8*1)
MOVOA X5, -16(AX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
MOVQ DX, AX
emit_literal_done_emit_remainder_encodeBlockAsm:
MOVQ dst_base+0(FP), CX
SUBQ CX, AX
MOVQ AX, ret+48(FP)
RET
// func encodeBlockAsm4MB(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeBlockAsm4MB(SB), $65560-56
MOVQ dst_base+0(FP), AX
MOVQ $0x00000200, CX
LEAQ 24(SP), DX
PXOR X0, X0
zero_loop_encodeBlockAsm4MB:
MOVOU X0, (DX)
MOVOU X0, 16(DX)
MOVOU X0, 32(DX)
MOVOU X0, 48(DX)
MOVOU X0, 64(DX)
MOVOU X0, 80(DX)
MOVOU X0, 96(DX)
MOVOU X0, 112(DX)
ADDQ $0x80, DX
DECQ CX
JNZ zero_loop_encodeBlockAsm4MB
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), CX
LEAQ -9(CX), DX
LEAQ -8(CX), SI
MOVL SI, 8(SP)
SHRQ $0x05, CX
SUBL CX, DX
LEAQ (AX)(DX*1), DX
MOVQ DX, (SP)
MOVL $0x00000001, CX
MOVL CX, 16(SP)
MOVQ src_base+24(FP), DX
search_loop_encodeBlockAsm4MB:
MOVL CX, SI
SUBL 12(SP), SI
SHRL $0x06, SI
LEAL 4(CX)(SI*1), SI
CMPL SI, 8(SP)
JGE emit_remainder_encodeBlockAsm4MB
MOVQ (DX)(CX*1), DI
MOVL SI, 20(SP)
MOVQ $0x0000cf1bbcdcbf9b, R9
MOVQ DI, R10
MOVQ DI, R11
SHRQ $0x08, R11
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x32, R10
SHLQ $0x10, R11
IMULQ R9, R11
SHRQ $0x32, R11
MOVL 24(SP)(R10*4), SI
MOVL 24(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
LEAL 1(CX), R10
MOVL R10, 24(SP)(R11*4)
MOVQ DI, R10
SHRQ $0x10, R10
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x32, R10
MOVL CX, R9
SUBL 16(SP), R9
MOVL 1(DX)(R9*1), R11
MOVQ DI, R9
SHRQ $0x08, R9
CMPL R9, R11
JNE no_repeat_found_encodeBlockAsm4MB
LEAL 1(CX), DI
MOVL 12(SP), R8
MOVL DI, SI
SUBL 16(SP), SI
JZ repeat_extend_back_end_encodeBlockAsm4MB
repeat_extend_back_loop_encodeBlockAsm4MB:
CMPL DI, R8
JLE repeat_extend_back_end_encodeBlockAsm4MB
MOVB -1(DX)(SI*1), BL
MOVB -1(DX)(DI*1), R9
CMPB BL, R9
JNE repeat_extend_back_end_encodeBlockAsm4MB
LEAL -1(DI), DI
DECL SI
JNZ repeat_extend_back_loop_encodeBlockAsm4MB
repeat_extend_back_end_encodeBlockAsm4MB:
MOVL 12(SP), SI
CMPL SI, DI
JEQ emit_literal_done_repeat_emit_encodeBlockAsm4MB
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(SI*1), R10
SUBL SI, R9
LEAL -1(R9), SI
CMPL SI, $0x3c
JLT one_byte_repeat_emit_encodeBlockAsm4MB
CMPL SI, $0x00000100
JLT two_bytes_repeat_emit_encodeBlockAsm4MB
CMPL SI, $0x00010000
JLT three_bytes_repeat_emit_encodeBlockAsm4MB
MOVL SI, R11
SHRL $0x10, R11
MOVB $0xf8, (AX)
MOVW SI, 1(AX)
MOVB R11, 3(AX)
ADDQ $0x04, AX
JMP memmove_long_repeat_emit_encodeBlockAsm4MB
three_bytes_repeat_emit_encodeBlockAsm4MB:
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_repeat_emit_encodeBlockAsm4MB
two_bytes_repeat_emit_encodeBlockAsm4MB:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, AX
CMPL SI, $0x40
JL memmove_repeat_emit_encodeBlockAsm4MB
JMP memmove_long_repeat_emit_encodeBlockAsm4MB
one_byte_repeat_emit_encodeBlockAsm4MB:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, AX
memmove_repeat_emit_encodeBlockAsm4MB:
LEAQ (AX)(R9*1), SI
// genMemMoveShort
CMPQ R9, $0x08
JLE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8
CMPQ R9, $0x10
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64
emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8:
MOVQ (R10), R11
MOVQ R11, (AX)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB
emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16:
MOVQ (R10), R11
MOVQ -8(R10)(R9*1), R10
MOVQ R11, (AX)
MOVQ R10, -8(AX)(R9*1)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB
emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32:
MOVOU (R10), X0
MOVOU -16(R10)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB
emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64:
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_repeat_emit_encodeBlockAsm4MB:
MOVQ SI, AX
JMP emit_literal_done_repeat_emit_encodeBlockAsm4MB
memmove_long_repeat_emit_encodeBlockAsm4MB:
LEAQ (AX)(R9*1), SI
// genMemMoveLong
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVQ R9, R12
SHRQ $0x05, R12
MOVQ AX, R11
ANDL $0x0000001f, R11
MOVQ $0x00000040, R13
SUBQ R11, R13
DECQ R12
JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
LEAQ -32(R10)(R13*1), R11
LEAQ -32(AX)(R13*1), R14
emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back:
MOVOU (R11), X4
MOVOU 16(R11), X5
MOVOA X4, (R14)
MOVOA X5, 16(R14)
ADDQ $0x20, R14
ADDQ $0x20, R11
ADDQ $0x20, R13
DECQ R12
JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back
emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32:
MOVOU -32(R10)(R13*1), X4
MOVOU -16(R10)(R13*1), X5
MOVOA X4, -32(AX)(R13*1)
MOVOA X5, -16(AX)(R13*1)
ADDQ $0x20, R13
CMPQ R9, R13
JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ SI, AX
emit_literal_done_repeat_emit_encodeBlockAsm4MB:
ADDL $0x05, CX
MOVL CX, SI
SUBL 16(SP), SI
MOVQ src_len+32(FP), R9
SUBL CX, R9
LEAQ (DX)(CX*1), R10
LEAQ (DX)(SI*1), SI
// matchLen
XORL R12, R12
CMPL R9, $0x08
JL matchlen_single_repeat_extend_encodeBlockAsm4MB
matchlen_loopback_repeat_extend_encodeBlockAsm4MB:
MOVQ (R10)(R12*1), R11
XORQ (SI)(R12*1), R11
TESTQ R11, R11
JZ matchlen_loop_repeat_extend_encodeBlockAsm4MB
BSFQ R11, R11
SARQ $0x03, R11
LEAL (R12)(R11*1), R12
JMP repeat_extend_forward_end_encodeBlockAsm4MB
matchlen_loop_repeat_extend_encodeBlockAsm4MB:
LEAL -8(R9), R9
LEAL 8(R12), R12
CMPL R9, $0x08
JGE matchlen_loopback_repeat_extend_encodeBlockAsm4MB
matchlen_single_repeat_extend_encodeBlockAsm4MB:
TESTL R9, R9
JZ repeat_extend_forward_end_encodeBlockAsm4MB
matchlen_single_loopback_repeat_extend_encodeBlockAsm4MB:
MOVB (R10)(R12*1), R11
CMPB (SI)(R12*1), R11
JNE repeat_extend_forward_end_encodeBlockAsm4MB
LEAL 1(R12), R12
DECL R9
JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm4MB
repeat_extend_forward_end_encodeBlockAsm4MB:
ADDL R12, CX
MOVL CX, SI
SUBL DI, SI
MOVL 16(SP), DI
TESTL R8, R8
JZ repeat_as_copy_encodeBlockAsm4MB
// emitRepeat
MOVL SI, R8
LEAL -4(SI), SI
CMPL R8, $0x08
JLE repeat_two_match_repeat_encodeBlockAsm4MB
CMPL R8, $0x0c
JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB
CMPL DI, $0x00000800
JLT repeat_two_offset_match_repeat_encodeBlockAsm4MB
cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB:
CMPL SI, $0x00000104
JLT repeat_three_match_repeat_encodeBlockAsm4MB
CMPL SI, $0x00010100
JLT repeat_four_match_repeat_encodeBlockAsm4MB
LEAL -65536(SI), SI
MOVL SI, DI
MOVW $0x001d, (AX)
MOVW SI, 2(AX)
SARL $0x10, DI
MOVB DI, 4(AX)
ADDQ $0x05, AX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_four_match_repeat_encodeBlockAsm4MB:
LEAL -256(SI), SI
MOVW $0x0019, (AX)
MOVW SI, 2(AX)
ADDQ $0x04, AX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_three_match_repeat_encodeBlockAsm4MB:
LEAL -4(SI), SI
MOVW $0x0015, (AX)
MOVB SI, 2(AX)
ADDQ $0x03, AX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_two_match_repeat_encodeBlockAsm4MB:
SHLL $0x02, SI
ORL $0x01, SI
MOVW SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_two_offset_match_repeat_encodeBlockAsm4MB:
XORQ R8, R8
LEAL 1(R8)(SI*4), SI
MOVB DI, 1(AX)
SARL $0x08, DI
SHLL $0x05, DI
ORL DI, SI
MOVB SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_as_copy_encodeBlockAsm4MB:
// emitCopy
CMPL DI, $0x00010000
JL two_byte_offset_repeat_as_copy_encodeBlockAsm4MB
four_bytes_loop_back_repeat_as_copy_encodeBlockAsm4MB:
CMPL SI, $0x40
JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB
MOVB $0xff, (AX)
MOVL DI, 1(AX)
LEAL -64(SI), SI
ADDQ $0x05, AX
CMPL SI, $0x04
JL four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB
// emitRepeat
MOVL SI, R8
LEAL -4(SI), SI
CMPL R8, $0x08
JLE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy
CMPL R8, $0x0c
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy
CMPL DI, $0x00000800
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
CMPL SI, $0x00000104
JLT repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy
CMPL SI, $0x00010100
JLT repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy
LEAL -65536(SI), SI
MOVL SI, DI
MOVW $0x001d, (AX)
MOVW SI, 2(AX)
SARL $0x10, DI
MOVB DI, 4(AX)
ADDQ $0x05, AX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
LEAL -256(SI), SI
MOVW $0x0019, (AX)
MOVW SI, 2(AX)
ADDQ $0x04, AX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
LEAL -4(SI), SI
MOVW $0x0015, (AX)
MOVB SI, 2(AX)
ADDQ $0x03, AX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
SHLL $0x02, SI
ORL $0x01, SI
MOVW SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
XORQ R8, R8
LEAL 1(R8)(SI*4), SI
MOVB DI, 1(AX)
SARL $0x08, DI
SHLL $0x05, DI
ORL DI, SI
MOVB SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm4MB
JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm4MB
four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB:
TESTL SI, SI
JZ repeat_end_emit_encodeBlockAsm4MB
MOVB $0x03, BL
LEAL -4(BX)(SI*4), SI
MOVB SI, (AX)
MOVL DI, 1(AX)
ADDQ $0x05, AX
JMP repeat_end_emit_encodeBlockAsm4MB
two_byte_offset_repeat_as_copy_encodeBlockAsm4MB:
CMPL SI, $0x40
JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB
MOVB $0xee, (AX)
MOVW DI, 1(AX)
LEAL -60(SI), SI
ADDQ $0x03, AX
// emitRepeat
MOVL SI, R8
LEAL -4(SI), SI
CMPL R8, $0x08
JLE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
CMPL R8, $0x0c
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
CMPL DI, $0x00000800
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
CMPL SI, $0x00000104
JLT repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
CMPL SI, $0x00010100
JLT repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
LEAL -65536(SI), SI
MOVL SI, DI
MOVW $0x001d, (AX)
MOVW SI, 2(AX)
SARL $0x10, DI
MOVB DI, 4(AX)
ADDQ $0x05, AX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
LEAL -256(SI), SI
MOVW $0x0019, (AX)
MOVW SI, 2(AX)
ADDQ $0x04, AX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
LEAL -4(SI), SI
MOVW $0x0015, (AX)
MOVB SI, 2(AX)
ADDQ $0x03, AX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
SHLL $0x02, SI
ORL $0x01, SI
MOVW SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm4MB
repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
XORQ R8, R8
LEAL 1(R8)(SI*4), SI
MOVB DI, 1(AX)
SARL $0x08, DI
SHLL $0x05, DI
ORL DI, SI
MOVB SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm4MB
JMP two_byte_offset_repeat_as_copy_encodeBlockAsm4MB
two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB:
CMPL SI, $0x0c
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB
CMPL DI, $0x00000800
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB
MOVB $0x01, BL
LEAL -16(BX)(SI*4), SI
MOVB DI, 1(AX)
SHRL $0x08, DI
SHLL $0x05, DI
ORL DI, SI
MOVB SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm4MB
emit_copy_three_repeat_as_copy_encodeBlockAsm4MB:
MOVB $0x02, BL
LEAL -4(BX)(SI*4), SI
MOVB SI, (AX)
MOVW DI, 1(AX)
ADDQ $0x03, AX
repeat_end_emit_encodeBlockAsm4MB:
MOVL CX, 12(SP)
JMP search_loop_encodeBlockAsm4MB
no_repeat_found_encodeBlockAsm4MB:
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeBlockAsm4MB
SHRQ $0x08, DI
MOVL 24(SP)(R10*4), SI
LEAL 2(CX), R9
CMPL (DX)(R8*1), DI
JEQ candidate2_match_encodeBlockAsm4MB
MOVL R9, 24(SP)(R10*4)
SHRQ $0x08, DI
CMPL (DX)(SI*1), DI
JEQ candidate3_match_encodeBlockAsm4MB
MOVL 20(SP), CX
JMP search_loop_encodeBlockAsm4MB
candidate3_match_encodeBlockAsm4MB:
ADDL $0x02, CX
JMP candidate_match_encodeBlockAsm4MB
candidate2_match_encodeBlockAsm4MB:
MOVL R9, 24(SP)(R10*4)
INCL CX
MOVL R8, SI
candidate_match_encodeBlockAsm4MB:
MOVL 12(SP), DI
TESTL SI, SI
JZ match_extend_back_end_encodeBlockAsm4MB
match_extend_back_loop_encodeBlockAsm4MB:
CMPL CX, DI
JLE match_extend_back_end_encodeBlockAsm4MB
MOVB -1(DX)(SI*1), BL
MOVB -1(DX)(CX*1), R8
CMPB BL, R8
JNE match_extend_back_end_encodeBlockAsm4MB
LEAL -1(CX), CX
DECL SI
JZ match_extend_back_end_encodeBlockAsm4MB
JMP match_extend_back_loop_encodeBlockAsm4MB
match_extend_back_end_encodeBlockAsm4MB:
MOVL CX, DI
SUBL 12(SP), DI
LEAQ 4(AX)(DI*1), DI
CMPQ DI, (SP)
JL match_dst_size_check_encodeBlockAsm4MB
MOVQ $0x00000000, ret+48(FP)
RET
match_dst_size_check_encodeBlockAsm4MB:
MOVL CX, DI
MOVL 12(SP), R8
CMPL R8, DI
JEQ emit_literal_done_match_emit_encodeBlockAsm4MB
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(R8*1), DI
SUBL R8, R9
LEAL -1(R9), R8
CMPL R8, $0x3c
JLT one_byte_match_emit_encodeBlockAsm4MB
CMPL R8, $0x00000100
JLT two_bytes_match_emit_encodeBlockAsm4MB
CMPL R8, $0x00010000
JLT three_bytes_match_emit_encodeBlockAsm4MB
MOVL R8, R10
SHRL $0x10, R10
MOVB $0xf8, (AX)
MOVW R8, 1(AX)
MOVB R10, 3(AX)
ADDQ $0x04, AX
JMP memmove_long_match_emit_encodeBlockAsm4MB
three_bytes_match_emit_encodeBlockAsm4MB:
MOVB $0xf4, (AX)
MOVW R8, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_match_emit_encodeBlockAsm4MB
two_bytes_match_emit_encodeBlockAsm4MB:
MOVB $0xf0, (AX)
MOVB R8, 1(AX)
ADDQ $0x02, AX
CMPL R8, $0x40
JL memmove_match_emit_encodeBlockAsm4MB
JMP memmove_long_match_emit_encodeBlockAsm4MB
one_byte_match_emit_encodeBlockAsm4MB:
SHLB $0x02, R8
MOVB R8, (AX)
ADDQ $0x01, AX
memmove_match_emit_encodeBlockAsm4MB:
LEAQ (AX)(R9*1), R8
// genMemMoveShort
CMPQ R9, $0x08
JLE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8
CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8:
MOVQ (DI), R10
MOVQ R10, (AX)
JMP memmove_end_copy_match_emit_encodeBlockAsm4MB
emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16:
MOVQ (DI), R10
MOVQ -8(DI)(R9*1), DI
MOVQ R10, (AX)
MOVQ DI, -8(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBlockAsm4MB
emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBlockAsm4MB
emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R9*1), X2
MOVOU -16(DI)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_match_emit_encodeBlockAsm4MB:
MOVQ R8, AX
JMP emit_literal_done_match_emit_encodeBlockAsm4MB
memmove_long_match_emit_encodeBlockAsm4MB:
LEAQ (AX)(R9*1), R8
// genMemMoveLong
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R9*1), X2
MOVOU -16(DI)(R9*1), X3
MOVQ R9, R11
SHRQ $0x05, R11
MOVQ AX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R12
SUBQ R10, R12
DECQ R11
JA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
LEAQ -32(DI)(R12*1), R10
LEAQ -32(AX)(R12*1), R13
emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R10
ADDQ $0x20, R12
DECQ R11
JNA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32:
MOVOU -32(DI)(R12*1), X4
MOVOU -16(DI)(R12*1), X5
MOVOA X4, -32(AX)(R12*1)
MOVOA X5, -16(AX)(R12*1)
ADDQ $0x20, R12
CMPQ R9, R12
JAE emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ R8, AX
emit_literal_done_match_emit_encodeBlockAsm4MB:
match_nolit_loop_encodeBlockAsm4MB:
MOVL CX, DI
SUBL SI, DI
MOVL DI, 16(SP)
ADDL $0x04, CX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL CX, DI
LEAQ (DX)(CX*1), R8
LEAQ (DX)(SI*1), SI
// matchLen
XORL R10, R10
CMPL DI, $0x08
JL matchlen_single_match_nolit_encodeBlockAsm4MB
matchlen_loopback_match_nolit_encodeBlockAsm4MB:
MOVQ (R8)(R10*1), R9
XORQ (SI)(R10*1), R9
TESTQ R9, R9
JZ matchlen_loop_match_nolit_encodeBlockAsm4MB
BSFQ R9, R9
SARQ $0x03, R9
LEAL (R10)(R9*1), R10
JMP match_nolit_end_encodeBlockAsm4MB
matchlen_loop_match_nolit_encodeBlockAsm4MB:
LEAL -8(DI), DI
LEAL 8(R10), R10
CMPL DI, $0x08
JGE matchlen_loopback_match_nolit_encodeBlockAsm4MB
matchlen_single_match_nolit_encodeBlockAsm4MB:
TESTL DI, DI
JZ match_nolit_end_encodeBlockAsm4MB
matchlen_single_loopback_match_nolit_encodeBlockAsm4MB:
MOVB (R8)(R10*1), R9
CMPB (SI)(R10*1), R9
JNE match_nolit_end_encodeBlockAsm4MB
LEAL 1(R10), R10
DECL DI
JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm4MB
match_nolit_end_encodeBlockAsm4MB:
ADDL R10, CX
MOVL 16(SP), SI
ADDL $0x04, R10
MOVL CX, 12(SP)
// emitCopy
CMPL SI, $0x00010000
JL two_byte_offset_match_nolit_encodeBlockAsm4MB
four_bytes_loop_back_match_nolit_encodeBlockAsm4MB:
CMPL R10, $0x40
JLE four_bytes_remain_match_nolit_encodeBlockAsm4MB
MOVB $0xff, (AX)
MOVL SI, 1(AX)
LEAL -64(R10), R10
ADDQ $0x05, AX
CMPL R10, $0x04
JL four_bytes_remain_match_nolit_encodeBlockAsm4MB
// emitRepeat
MOVL R10, DI
LEAL -4(R10), R10
CMPL DI, $0x08
JLE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy
CMPL DI, $0x0c
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy
CMPL SI, $0x00000800
JLT repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy
cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy:
CMPL R10, $0x00000104
JLT repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy
CMPL R10, $0x00010100
JLT repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy
LEAL -65536(R10), R10
MOVL R10, SI
MOVW $0x001d, (AX)
MOVW R10, 2(AX)
SARL $0x10, SI
MOVB SI, 4(AX)
ADDQ $0x05, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy:
LEAL -256(R10), R10
MOVW $0x0019, (AX)
MOVW R10, 2(AX)
ADDQ $0x04, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy:
LEAL -4(R10), R10
MOVW $0x0015, (AX)
MOVB R10, 2(AX)
ADDQ $0x03, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy:
SHLL $0x02, R10
ORL $0x01, R10
MOVW R10, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy:
XORQ DI, DI
LEAL 1(DI)(R10*4), R10
MOVB SI, 1(AX)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, R10
MOVB R10, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
JMP four_bytes_loop_back_match_nolit_encodeBlockAsm4MB
four_bytes_remain_match_nolit_encodeBlockAsm4MB:
TESTL R10, R10
JZ match_nolit_emitcopy_end_encodeBlockAsm4MB
MOVB $0x03, BL
LEAL -4(BX)(R10*4), R10
MOVB R10, (AX)
MOVL SI, 1(AX)
ADDQ $0x05, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
two_byte_offset_match_nolit_encodeBlockAsm4MB:
CMPL R10, $0x40
JLE two_byte_offset_short_match_nolit_encodeBlockAsm4MB
MOVB $0xee, (AX)
MOVW SI, 1(AX)
LEAL -60(R10), R10
ADDQ $0x03, AX
// emitRepeat
MOVL R10, DI
LEAL -4(R10), R10
CMPL DI, $0x08
JLE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short
CMPL DI, $0x0c
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short
CMPL SI, $0x00000800
JLT repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short
cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short:
CMPL R10, $0x00000104
JLT repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short
CMPL R10, $0x00010100
JLT repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short
LEAL -65536(R10), R10
MOVL R10, SI
MOVW $0x001d, (AX)
MOVW R10, 2(AX)
SARL $0x10, SI
MOVB SI, 4(AX)
ADDQ $0x05, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short:
LEAL -256(R10), R10
MOVW $0x0019, (AX)
MOVW R10, 2(AX)
ADDQ $0x04, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short:
LEAL -4(R10), R10
MOVW $0x0015, (AX)
MOVB R10, 2(AX)
ADDQ $0x03, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short:
SHLL $0x02, R10
ORL $0x01, R10
MOVW R10, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short:
XORQ DI, DI
LEAL 1(DI)(R10*4), R10
MOVB SI, 1(AX)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, R10
MOVB R10, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
JMP two_byte_offset_match_nolit_encodeBlockAsm4MB
two_byte_offset_short_match_nolit_encodeBlockAsm4MB:
CMPL R10, $0x0c
JGE emit_copy_three_match_nolit_encodeBlockAsm4MB
CMPL SI, $0x00000800
JGE emit_copy_three_match_nolit_encodeBlockAsm4MB
MOVB $0x01, BL
LEAL -16(BX)(R10*4), R10
MOVB SI, 1(AX)
SHRL $0x08, SI
SHLL $0x05, SI
ORL SI, R10
MOVB R10, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
emit_copy_three_match_nolit_encodeBlockAsm4MB:
MOVB $0x02, BL
LEAL -4(BX)(R10*4), R10
MOVB R10, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
match_nolit_emitcopy_end_encodeBlockAsm4MB:
CMPL CX, 8(SP)
JGE emit_remainder_encodeBlockAsm4MB
MOVQ -2(DX)(CX*1), DI
CMPQ AX, (SP)
JL match_nolit_dst_ok_encodeBlockAsm4MB
MOVQ $0x00000000, ret+48(FP)
RET
match_nolit_dst_ok_encodeBlockAsm4MB:
MOVQ $0x0000cf1bbcdcbf9b, R9
MOVQ DI, R8
SHRQ $0x10, DI
MOVQ DI, SI
SHLQ $0x10, R8
IMULQ R9, R8
SHRQ $0x32, R8
SHLQ $0x10, SI
IMULQ R9, SI
SHRQ $0x32, SI
LEAL -2(CX), R9
LEAQ 24(SP)(SI*4), R10
MOVL (R10), SI
MOVL R9, 24(SP)(R8*4)
MOVL CX, (R10)
CMPL (DX)(SI*1), DI
JEQ match_nolit_loop_encodeBlockAsm4MB
INCL CX
JMP search_loop_encodeBlockAsm4MB
emit_remainder_encodeBlockAsm4MB:
MOVQ src_len+32(FP), CX
SUBL 12(SP), CX
LEAQ 4(AX)(CX*1), CX
CMPQ CX, (SP)
JL emit_remainder_ok_encodeBlockAsm4MB
MOVQ $0x00000000, ret+48(FP)
RET
emit_remainder_ok_encodeBlockAsm4MB:
MOVQ src_len+32(FP), CX
MOVL 12(SP), BX
CMPL BX, CX
JEQ emit_literal_done_emit_remainder_encodeBlockAsm4MB
MOVL CX, SI
MOVL CX, 12(SP)
LEAQ (DX)(BX*1), CX
SUBL BX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JLT one_byte_emit_remainder_encodeBlockAsm4MB
CMPL DX, $0x00000100
JLT two_bytes_emit_remainder_encodeBlockAsm4MB
CMPL DX, $0x00010000
JLT three_bytes_emit_remainder_encodeBlockAsm4MB
MOVL DX, BX
SHRL $0x10, BX
MOVB $0xf8, (AX)
MOVW DX, 1(AX)
MOVB BL, 3(AX)
ADDQ $0x04, AX
JMP memmove_long_emit_remainder_encodeBlockAsm4MB
three_bytes_emit_remainder_encodeBlockAsm4MB:
MOVB $0xf4, (AX)
MOVW DX, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_emit_remainder_encodeBlockAsm4MB
two_bytes_emit_remainder_encodeBlockAsm4MB:
MOVB $0xf0, (AX)
MOVB DL, 1(AX)
ADDQ $0x02, AX
CMPL DX, $0x40
JL memmove_emit_remainder_encodeBlockAsm4MB
JMP memmove_long_emit_remainder_encodeBlockAsm4MB
one_byte_emit_remainder_encodeBlockAsm4MB:
SHLB $0x02, DL
MOVB DL, (AX)
ADDQ $0x01, AX
memmove_emit_remainder_encodeBlockAsm4MB:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
CMPQ BX, $0x08
JLE emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8:
MOVQ (CX), SI
MOVQ SI, (AX)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB
emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16:
MOVQ (CX), SI
MOVQ -8(CX)(BX*1), CX
MOVQ SI, (AX)
MOVQ CX, -8(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB
emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32:
MOVOU (CX), X0
MOVOU -16(CX)(BX*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB
emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
memmove_end_copy_emit_remainder_encodeBlockAsm4MB:
MOVQ DX, AX
JMP emit_literal_done_emit_remainder_encodeBlockAsm4MB
memmove_long_emit_remainder_encodeBlockAsm4MB:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
MOVQ AX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32
LEAQ -32(CX)(R8*1), SI
LEAQ -32(AX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32:
MOVOU -32(CX)(R8*1), X4
MOVOU -16(CX)(R8*1), X5
MOVOA X4, -32(AX)(R8*1)
MOVOA X5, -16(AX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
MOVQ DX, AX
emit_literal_done_emit_remainder_encodeBlockAsm4MB:
MOVQ dst_base+0(FP), CX
SUBQ CX, AX
MOVQ AX, ret+48(FP)
RET
// func encodeBlockAsm12B(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeBlockAsm12B(SB), $16408-56
MOVQ dst_base+0(FP), AX
MOVQ $0x00000080, CX
LEAQ 24(SP), DX
PXOR X0, X0
zero_loop_encodeBlockAsm12B:
MOVOU X0, (DX)
MOVOU X0, 16(DX)
MOVOU X0, 32(DX)
MOVOU X0, 48(DX)
MOVOU X0, 64(DX)
MOVOU X0, 80(DX)
MOVOU X0, 96(DX)
MOVOU X0, 112(DX)
ADDQ $0x80, DX
DECQ CX
JNZ zero_loop_encodeBlockAsm12B
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), CX
LEAQ -9(CX), DX
LEAQ -8(CX), SI
MOVL SI, 8(SP)
SHRQ $0x05, CX
SUBL CX, DX
LEAQ (AX)(DX*1), DX
MOVQ DX, (SP)
MOVL $0x00000001, CX
MOVL CX, 16(SP)
MOVQ src_base+24(FP), DX
search_loop_encodeBlockAsm12B:
MOVL CX, SI
SUBL 12(SP), SI
SHRL $0x05, SI
LEAL 4(CX)(SI*1), SI
CMPL SI, 8(SP)
JGE emit_remainder_encodeBlockAsm12B
MOVQ (DX)(CX*1), DI
MOVL SI, 20(SP)
MOVQ $0x000000cf1bbcdcbb, R9
MOVQ DI, R10
MOVQ DI, R11
SHRQ $0x08, R11
SHLQ $0x18, R10
IMULQ R9, R10
SHRQ $0x34, R10
SHLQ $0x18, R11
IMULQ R9, R11
SHRQ $0x34, R11
MOVL 24(SP)(R10*4), SI
MOVL 24(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
LEAL 1(CX), R10
MOVL R10, 24(SP)(R11*4)
MOVQ DI, R10
SHRQ $0x10, R10
SHLQ $0x18, R10
IMULQ R9, R10
SHRQ $0x34, R10
MOVL CX, R9
SUBL 16(SP), R9
MOVL 1(DX)(R9*1), R11
MOVQ DI, R9
SHRQ $0x08, R9
CMPL R9, R11
JNE no_repeat_found_encodeBlockAsm12B
LEAL 1(CX), DI
MOVL 12(SP), R8
MOVL DI, SI
SUBL 16(SP), SI
JZ repeat_extend_back_end_encodeBlockAsm12B
repeat_extend_back_loop_encodeBlockAsm12B:
CMPL DI, R8
JLE repeat_extend_back_end_encodeBlockAsm12B
MOVB -1(DX)(SI*1), BL
MOVB -1(DX)(DI*1), R9
CMPB BL, R9
JNE repeat_extend_back_end_encodeBlockAsm12B
LEAL -1(DI), DI
DECL SI
JNZ repeat_extend_back_loop_encodeBlockAsm12B
repeat_extend_back_end_encodeBlockAsm12B:
MOVL 12(SP), SI
CMPL SI, DI
JEQ emit_literal_done_repeat_emit_encodeBlockAsm12B
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(SI*1), R10
SUBL SI, R9
LEAL -1(R9), SI
CMPL SI, $0x3c
JLT one_byte_repeat_emit_encodeBlockAsm12B
CMPL SI, $0x00000100
JLT two_bytes_repeat_emit_encodeBlockAsm12B
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_repeat_emit_encodeBlockAsm12B
two_bytes_repeat_emit_encodeBlockAsm12B:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, AX
CMPL SI, $0x40
JL memmove_repeat_emit_encodeBlockAsm12B
JMP memmove_long_repeat_emit_encodeBlockAsm12B
one_byte_repeat_emit_encodeBlockAsm12B:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, AX
memmove_repeat_emit_encodeBlockAsm12B:
LEAQ (AX)(R9*1), SI
// genMemMoveShort
CMPQ R9, $0x08
JLE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8
CMPQ R9, $0x10
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8:
MOVQ (R10), R11
MOVQ R11, (AX)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16:
MOVQ (R10), R11
MOVQ -8(R10)(R9*1), R10
MOVQ R11, (AX)
MOVQ R10, -8(AX)(R9*1)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32:
MOVOU (R10), X0
MOVOU -16(R10)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B
emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64:
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_repeat_emit_encodeBlockAsm12B:
MOVQ SI, AX
JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
memmove_long_repeat_emit_encodeBlockAsm12B:
LEAQ (AX)(R9*1), SI
// genMemMoveLong
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVQ R9, R12
SHRQ $0x05, R12
MOVQ AX, R11
ANDL $0x0000001f, R11
MOVQ $0x00000040, R13
SUBQ R11, R13
DECQ R12
JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
LEAQ -32(R10)(R13*1), R11
LEAQ -32(AX)(R13*1), R14
emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back:
MOVOU (R11), X4
MOVOU 16(R11), X5
MOVOA X4, (R14)
MOVOA X5, 16(R14)
ADDQ $0x20, R14
ADDQ $0x20, R11
ADDQ $0x20, R13
DECQ R12
JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back
emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32:
MOVOU -32(R10)(R13*1), X4
MOVOU -16(R10)(R13*1), X5
MOVOA X4, -32(AX)(R13*1)
MOVOA X5, -16(AX)(R13*1)
ADDQ $0x20, R13
CMPQ R9, R13
JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ SI, AX
emit_literal_done_repeat_emit_encodeBlockAsm12B:
ADDL $0x05, CX
MOVL CX, SI
SUBL 16(SP), SI
MOVQ src_len+32(FP), R9
SUBL CX, R9
LEAQ (DX)(CX*1), R10
LEAQ (DX)(SI*1), SI
// matchLen
XORL R12, R12
CMPL R9, $0x08
JL matchlen_single_repeat_extend_encodeBlockAsm12B
matchlen_loopback_repeat_extend_encodeBlockAsm12B:
MOVQ (R10)(R12*1), R11
XORQ (SI)(R12*1), R11
TESTQ R11, R11
JZ matchlen_loop_repeat_extend_encodeBlockAsm12B
BSFQ R11, R11
SARQ $0x03, R11
LEAL (R12)(R11*1), R12
JMP repeat_extend_forward_end_encodeBlockAsm12B
matchlen_loop_repeat_extend_encodeBlockAsm12B:
LEAL -8(R9), R9
LEAL 8(R12), R12
CMPL R9, $0x08
JGE matchlen_loopback_repeat_extend_encodeBlockAsm12B
matchlen_single_repeat_extend_encodeBlockAsm12B:
TESTL R9, R9
JZ repeat_extend_forward_end_encodeBlockAsm12B
matchlen_single_loopback_repeat_extend_encodeBlockAsm12B:
MOVB (R10)(R12*1), R11
CMPB (SI)(R12*1), R11
JNE repeat_extend_forward_end_encodeBlockAsm12B
LEAL 1(R12), R12
DECL R9
JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm12B
repeat_extend_forward_end_encodeBlockAsm12B:
ADDL R12, CX
MOVL CX, SI
SUBL DI, SI
MOVL 16(SP), DI
TESTL R8, R8
JZ repeat_as_copy_encodeBlockAsm12B
// emitRepeat
MOVL SI, R8
LEAL -4(SI), SI
CMPL R8, $0x08
JLE repeat_two_match_repeat_encodeBlockAsm12B
CMPL R8, $0x0c
JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm12B
CMPL DI, $0x00000800
JLT repeat_two_offset_match_repeat_encodeBlockAsm12B
cant_repeat_two_offset_match_repeat_encodeBlockAsm12B:
CMPL SI, $0x00000104
JLT repeat_three_match_repeat_encodeBlockAsm12B
LEAL -256(SI), SI
MOVW $0x0019, (AX)
MOVW SI, 2(AX)
ADDQ $0x04, AX
JMP repeat_end_emit_encodeBlockAsm12B
repeat_three_match_repeat_encodeBlockAsm12B:
LEAL -4(SI), SI
MOVW $0x0015, (AX)
MOVB SI, 2(AX)
ADDQ $0x03, AX
JMP repeat_end_emit_encodeBlockAsm12B
repeat_two_match_repeat_encodeBlockAsm12B:
SHLL $0x02, SI
ORL $0x01, SI
MOVW SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm12B
repeat_two_offset_match_repeat_encodeBlockAsm12B:
XORQ R8, R8
LEAL 1(R8)(SI*4), SI
MOVB DI, 1(AX)
SARL $0x08, DI
SHLL $0x05, DI
ORL DI, SI
MOVB SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm12B
repeat_as_copy_encodeBlockAsm12B:
// emitCopy
two_byte_offset_repeat_as_copy_encodeBlockAsm12B:
CMPL SI, $0x40
JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B
MOVB $0xee, (AX)
MOVW DI, 1(AX)
LEAL -60(SI), SI
ADDQ $0x03, AX
// emitRepeat
MOVL SI, R8
LEAL -4(SI), SI
CMPL R8, $0x08
JLE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
CMPL R8, $0x0c
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
CMPL DI, $0x00000800
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
CMPL SI, $0x00000104
JLT repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
LEAL -256(SI), SI
MOVW $0x0019, (AX)
MOVW SI, 2(AX)
ADDQ $0x04, AX
JMP repeat_end_emit_encodeBlockAsm12B
repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
LEAL -4(SI), SI
MOVW $0x0015, (AX)
MOVB SI, 2(AX)
ADDQ $0x03, AX
JMP repeat_end_emit_encodeBlockAsm12B
repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
SHLL $0x02, SI
ORL $0x01, SI
MOVW SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm12B
repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
XORQ R8, R8
LEAL 1(R8)(SI*4), SI
MOVB DI, 1(AX)
SARL $0x08, DI
SHLL $0x05, DI
ORL DI, SI
MOVB SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm12B
JMP two_byte_offset_repeat_as_copy_encodeBlockAsm12B
two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B:
CMPL SI, $0x0c
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B
CMPL DI, $0x00000800
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B
MOVB $0x01, BL
LEAL -16(BX)(SI*4), SI
MOVB DI, 1(AX)
SHRL $0x08, DI
SHLL $0x05, DI
ORL DI, SI
MOVB SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm12B
emit_copy_three_repeat_as_copy_encodeBlockAsm12B:
MOVB $0x02, BL
LEAL -4(BX)(SI*4), SI
MOVB SI, (AX)
MOVW DI, 1(AX)
ADDQ $0x03, AX
repeat_end_emit_encodeBlockAsm12B:
MOVL CX, 12(SP)
JMP search_loop_encodeBlockAsm12B
no_repeat_found_encodeBlockAsm12B:
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeBlockAsm12B
SHRQ $0x08, DI
MOVL 24(SP)(R10*4), SI
LEAL 2(CX), R9
CMPL (DX)(R8*1), DI
JEQ candidate2_match_encodeBlockAsm12B
MOVL R9, 24(SP)(R10*4)
SHRQ $0x08, DI
CMPL (DX)(SI*1), DI
JEQ candidate3_match_encodeBlockAsm12B
MOVL 20(SP), CX
JMP search_loop_encodeBlockAsm12B
candidate3_match_encodeBlockAsm12B:
ADDL $0x02, CX
JMP candidate_match_encodeBlockAsm12B
candidate2_match_encodeBlockAsm12B:
MOVL R9, 24(SP)(R10*4)
INCL CX
MOVL R8, SI
candidate_match_encodeBlockAsm12B:
MOVL 12(SP), DI
TESTL SI, SI
JZ match_extend_back_end_encodeBlockAsm12B
match_extend_back_loop_encodeBlockAsm12B:
CMPL CX, DI
JLE match_extend_back_end_encodeBlockAsm12B
MOVB -1(DX)(SI*1), BL
MOVB -1(DX)(CX*1), R8
CMPB BL, R8
JNE match_extend_back_end_encodeBlockAsm12B
LEAL -1(CX), CX
DECL SI
JZ match_extend_back_end_encodeBlockAsm12B
JMP match_extend_back_loop_encodeBlockAsm12B
match_extend_back_end_encodeBlockAsm12B:
MOVL CX, DI
SUBL 12(SP), DI
LEAQ 3(AX)(DI*1), DI
CMPQ DI, (SP)
JL match_dst_size_check_encodeBlockAsm12B
MOVQ $0x00000000, ret+48(FP)
RET
match_dst_size_check_encodeBlockAsm12B:
MOVL CX, DI
MOVL 12(SP), R8
CMPL R8, DI
JEQ emit_literal_done_match_emit_encodeBlockAsm12B
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(R8*1), DI
SUBL R8, R9
LEAL -1(R9), R8
CMPL R8, $0x3c
JLT one_byte_match_emit_encodeBlockAsm12B
CMPL R8, $0x00000100
JLT two_bytes_match_emit_encodeBlockAsm12B
MOVB $0xf4, (AX)
MOVW R8, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_match_emit_encodeBlockAsm12B
two_bytes_match_emit_encodeBlockAsm12B:
MOVB $0xf0, (AX)
MOVB R8, 1(AX)
ADDQ $0x02, AX
CMPL R8, $0x40
JL memmove_match_emit_encodeBlockAsm12B
JMP memmove_long_match_emit_encodeBlockAsm12B
one_byte_match_emit_encodeBlockAsm12B:
SHLB $0x02, R8
MOVB R8, (AX)
ADDQ $0x01, AX
memmove_match_emit_encodeBlockAsm12B:
LEAQ (AX)(R9*1), R8
// genMemMoveShort
CMPQ R9, $0x08
JLE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8
CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8:
MOVQ (DI), R10
MOVQ R10, (AX)
JMP memmove_end_copy_match_emit_encodeBlockAsm12B
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16:
MOVQ (DI), R10
MOVQ -8(DI)(R9*1), DI
MOVQ R10, (AX)
MOVQ DI, -8(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBlockAsm12B
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBlockAsm12B
emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R9*1), X2
MOVOU -16(DI)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_match_emit_encodeBlockAsm12B:
MOVQ R8, AX
JMP emit_literal_done_match_emit_encodeBlockAsm12B
memmove_long_match_emit_encodeBlockAsm12B:
LEAQ (AX)(R9*1), R8
// genMemMoveLong
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R9*1), X2
MOVOU -16(DI)(R9*1), X3
MOVQ R9, R11
SHRQ $0x05, R11
MOVQ AX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R12
SUBQ R10, R12
DECQ R11
JA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
LEAQ -32(DI)(R12*1), R10
LEAQ -32(AX)(R12*1), R13
emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R10
ADDQ $0x20, R12
DECQ R11
JNA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32:
MOVOU -32(DI)(R12*1), X4
MOVOU -16(DI)(R12*1), X5
MOVOA X4, -32(AX)(R12*1)
MOVOA X5, -16(AX)(R12*1)
ADDQ $0x20, R12
CMPQ R9, R12
JAE emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ R8, AX
emit_literal_done_match_emit_encodeBlockAsm12B:
match_nolit_loop_encodeBlockAsm12B:
MOVL CX, DI
SUBL SI, DI
MOVL DI, 16(SP)
ADDL $0x04, CX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL CX, DI
LEAQ (DX)(CX*1), R8
LEAQ (DX)(SI*1), SI
// matchLen
XORL R10, R10
CMPL DI, $0x08
JL matchlen_single_match_nolit_encodeBlockAsm12B
matchlen_loopback_match_nolit_encodeBlockAsm12B:
MOVQ (R8)(R10*1), R9
XORQ (SI)(R10*1), R9
TESTQ R9, R9
JZ matchlen_loop_match_nolit_encodeBlockAsm12B
BSFQ R9, R9
SARQ $0x03, R9
LEAL (R10)(R9*1), R10
JMP match_nolit_end_encodeBlockAsm12B
matchlen_loop_match_nolit_encodeBlockAsm12B:
LEAL -8(DI), DI
LEAL 8(R10), R10
CMPL DI, $0x08
JGE matchlen_loopback_match_nolit_encodeBlockAsm12B
matchlen_single_match_nolit_encodeBlockAsm12B:
TESTL DI, DI
JZ match_nolit_end_encodeBlockAsm12B
matchlen_single_loopback_match_nolit_encodeBlockAsm12B:
MOVB (R8)(R10*1), R9
CMPB (SI)(R10*1), R9
JNE match_nolit_end_encodeBlockAsm12B
LEAL 1(R10), R10
DECL DI
JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm12B
match_nolit_end_encodeBlockAsm12B:
ADDL R10, CX
MOVL 16(SP), SI
ADDL $0x04, R10
MOVL CX, 12(SP)
// emitCopy
two_byte_offset_match_nolit_encodeBlockAsm12B:
CMPL R10, $0x40
JLE two_byte_offset_short_match_nolit_encodeBlockAsm12B
MOVB $0xee, (AX)
MOVW SI, 1(AX)
LEAL -60(R10), R10
ADDQ $0x03, AX
// emitRepeat
MOVL R10, DI
LEAL -4(R10), R10
CMPL DI, $0x08
JLE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short
CMPL DI, $0x0c
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
CMPL SI, $0x00000800
JLT repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
CMPL R10, $0x00000104
JLT repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short
LEAL -256(R10), R10
MOVW $0x0019, (AX)
MOVW R10, 2(AX)
ADDQ $0x04, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short:
LEAL -4(R10), R10
MOVW $0x0015, (AX)
MOVB R10, 2(AX)
ADDQ $0x03, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short:
SHLL $0x02, R10
ORL $0x01, R10
MOVW R10, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
XORQ DI, DI
LEAL 1(DI)(R10*4), R10
MOVB SI, 1(AX)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, R10
MOVB R10, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
JMP two_byte_offset_match_nolit_encodeBlockAsm12B
two_byte_offset_short_match_nolit_encodeBlockAsm12B:
CMPL R10, $0x0c
JGE emit_copy_three_match_nolit_encodeBlockAsm12B
CMPL SI, $0x00000800
JGE emit_copy_three_match_nolit_encodeBlockAsm12B
MOVB $0x01, BL
LEAL -16(BX)(R10*4), R10
MOVB SI, 1(AX)
SHRL $0x08, SI
SHLL $0x05, SI
ORL SI, R10
MOVB R10, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm12B
emit_copy_three_match_nolit_encodeBlockAsm12B:
MOVB $0x02, BL
LEAL -4(BX)(R10*4), R10
MOVB R10, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
match_nolit_emitcopy_end_encodeBlockAsm12B:
CMPL CX, 8(SP)
JGE emit_remainder_encodeBlockAsm12B
MOVQ -2(DX)(CX*1), DI
CMPQ AX, (SP)
JL match_nolit_dst_ok_encodeBlockAsm12B
MOVQ $0x00000000, ret+48(FP)
RET
match_nolit_dst_ok_encodeBlockAsm12B:
MOVQ $0x000000cf1bbcdcbb, R9
MOVQ DI, R8
SHRQ $0x10, DI
MOVQ DI, SI
SHLQ $0x18, R8
IMULQ R9, R8
SHRQ $0x34, R8
SHLQ $0x18, SI
IMULQ R9, SI
SHRQ $0x34, SI
LEAL -2(CX), R9
LEAQ 24(SP)(SI*4), R10
MOVL (R10), SI
MOVL R9, 24(SP)(R8*4)
MOVL CX, (R10)
CMPL (DX)(SI*1), DI
JEQ match_nolit_loop_encodeBlockAsm12B
INCL CX
JMP search_loop_encodeBlockAsm12B
emit_remainder_encodeBlockAsm12B:
MOVQ src_len+32(FP), CX
SUBL 12(SP), CX
LEAQ 3(AX)(CX*1), CX
CMPQ CX, (SP)
JL emit_remainder_ok_encodeBlockAsm12B
MOVQ $0x00000000, ret+48(FP)
RET
emit_remainder_ok_encodeBlockAsm12B:
MOVQ src_len+32(FP), CX
MOVL 12(SP), BX
CMPL BX, CX
JEQ emit_literal_done_emit_remainder_encodeBlockAsm12B
MOVL CX, SI
MOVL CX, 12(SP)
LEAQ (DX)(BX*1), CX
SUBL BX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JLT one_byte_emit_remainder_encodeBlockAsm12B
CMPL DX, $0x00000100
JLT two_bytes_emit_remainder_encodeBlockAsm12B
MOVB $0xf4, (AX)
MOVW DX, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_emit_remainder_encodeBlockAsm12B
two_bytes_emit_remainder_encodeBlockAsm12B:
MOVB $0xf0, (AX)
MOVB DL, 1(AX)
ADDQ $0x02, AX
CMPL DX, $0x40
JL memmove_emit_remainder_encodeBlockAsm12B
JMP memmove_long_emit_remainder_encodeBlockAsm12B
one_byte_emit_remainder_encodeBlockAsm12B:
SHLB $0x02, DL
MOVB DL, (AX)
ADDQ $0x01, AX
memmove_emit_remainder_encodeBlockAsm12B:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
CMPQ BX, $0x08
JLE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8:
MOVQ (CX), SI
MOVQ SI, (AX)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16:
MOVQ (CX), SI
MOVQ -8(CX)(BX*1), CX
MOVQ SI, (AX)
MOVQ CX, -8(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32:
MOVOU (CX), X0
MOVOU -16(CX)(BX*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B
emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
memmove_end_copy_emit_remainder_encodeBlockAsm12B:
MOVQ DX, AX
JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
memmove_long_emit_remainder_encodeBlockAsm12B:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
MOVQ AX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32
LEAQ -32(CX)(R8*1), SI
LEAQ -32(AX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32:
MOVOU -32(CX)(R8*1), X4
MOVOU -16(CX)(R8*1), X5
MOVOA X4, -32(AX)(R8*1)
MOVOA X5, -16(AX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
MOVQ DX, AX
emit_literal_done_emit_remainder_encodeBlockAsm12B:
MOVQ dst_base+0(FP), CX
SUBQ CX, AX
MOVQ AX, ret+48(FP)
RET
// func encodeBlockAsm10B(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeBlockAsm10B(SB), $4120-56
MOVQ dst_base+0(FP), AX
MOVQ $0x00000020, CX
LEAQ 24(SP), DX
PXOR X0, X0
zero_loop_encodeBlockAsm10B:
MOVOU X0, (DX)
MOVOU X0, 16(DX)
MOVOU X0, 32(DX)
MOVOU X0, 48(DX)
MOVOU X0, 64(DX)
MOVOU X0, 80(DX)
MOVOU X0, 96(DX)
MOVOU X0, 112(DX)
ADDQ $0x80, DX
DECQ CX
JNZ zero_loop_encodeBlockAsm10B
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), CX
LEAQ -9(CX), DX
LEAQ -8(CX), SI
MOVL SI, 8(SP)
SHRQ $0x05, CX
SUBL CX, DX
LEAQ (AX)(DX*1), DX
MOVQ DX, (SP)
MOVL $0x00000001, CX
MOVL CX, 16(SP)
MOVQ src_base+24(FP), DX
search_loop_encodeBlockAsm10B:
MOVL CX, SI
SUBL 12(SP), SI
SHRL $0x05, SI
LEAL 4(CX)(SI*1), SI
CMPL SI, 8(SP)
JGE emit_remainder_encodeBlockAsm10B
MOVQ (DX)(CX*1), DI
MOVL SI, 20(SP)
MOVQ $0x9e3779b1, R9
MOVQ DI, R10
MOVQ DI, R11
SHRQ $0x08, R11
SHLQ $0x20, R10
IMULQ R9, R10
SHRQ $0x36, R10
SHLQ $0x20, R11
IMULQ R9, R11
SHRQ $0x36, R11
MOVL 24(SP)(R10*4), SI
MOVL 24(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
LEAL 1(CX), R10
MOVL R10, 24(SP)(R11*4)
MOVQ DI, R10
SHRQ $0x10, R10
SHLQ $0x20, R10
IMULQ R9, R10
SHRQ $0x36, R10
MOVL CX, R9
SUBL 16(SP), R9
MOVL 1(DX)(R9*1), R11
MOVQ DI, R9
SHRQ $0x08, R9
CMPL R9, R11
JNE no_repeat_found_encodeBlockAsm10B
LEAL 1(CX), DI
MOVL 12(SP), R8
MOVL DI, SI
SUBL 16(SP), SI
JZ repeat_extend_back_end_encodeBlockAsm10B
repeat_extend_back_loop_encodeBlockAsm10B:
CMPL DI, R8
JLE repeat_extend_back_end_encodeBlockAsm10B
MOVB -1(DX)(SI*1), BL
MOVB -1(DX)(DI*1), R9
CMPB BL, R9
JNE repeat_extend_back_end_encodeBlockAsm10B
LEAL -1(DI), DI
DECL SI
JNZ repeat_extend_back_loop_encodeBlockAsm10B
repeat_extend_back_end_encodeBlockAsm10B:
MOVL 12(SP), SI
CMPL SI, DI
JEQ emit_literal_done_repeat_emit_encodeBlockAsm10B
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(SI*1), R10
SUBL SI, R9
LEAL -1(R9), SI
CMPL SI, $0x3c
JLT one_byte_repeat_emit_encodeBlockAsm10B
CMPL SI, $0x00000100
JLT two_bytes_repeat_emit_encodeBlockAsm10B
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_repeat_emit_encodeBlockAsm10B
two_bytes_repeat_emit_encodeBlockAsm10B:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, AX
CMPL SI, $0x40
JL memmove_repeat_emit_encodeBlockAsm10B
JMP memmove_long_repeat_emit_encodeBlockAsm10B
one_byte_repeat_emit_encodeBlockAsm10B:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, AX
memmove_repeat_emit_encodeBlockAsm10B:
LEAQ (AX)(R9*1), SI
// genMemMoveShort
CMPQ R9, $0x08
JLE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8
CMPQ R9, $0x10
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64
emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8:
MOVQ (R10), R11
MOVQ R11, (AX)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B
emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16:
MOVQ (R10), R11
MOVQ -8(R10)(R9*1), R10
MOVQ R11, (AX)
MOVQ R10, -8(AX)(R9*1)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B
emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32:
MOVOU (R10), X0
MOVOU -16(R10)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B
emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64:
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_repeat_emit_encodeBlockAsm10B:
MOVQ SI, AX
JMP emit_literal_done_repeat_emit_encodeBlockAsm10B
memmove_long_repeat_emit_encodeBlockAsm10B:
LEAQ (AX)(R9*1), SI
// genMemMoveLong
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVQ R9, R12
SHRQ $0x05, R12
MOVQ AX, R11
ANDL $0x0000001f, R11
MOVQ $0x00000040, R13
SUBQ R11, R13
DECQ R12
JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
LEAQ -32(R10)(R13*1), R11
LEAQ -32(AX)(R13*1), R14
emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back:
MOVOU (R11), X4
MOVOU 16(R11), X5
MOVOA X4, (R14)
MOVOA X5, 16(R14)
ADDQ $0x20, R14
ADDQ $0x20, R11
ADDQ $0x20, R13
DECQ R12
JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back
emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32:
MOVOU -32(R10)(R13*1), X4
MOVOU -16(R10)(R13*1), X5
MOVOA X4, -32(AX)(R13*1)
MOVOA X5, -16(AX)(R13*1)
ADDQ $0x20, R13
CMPQ R9, R13
JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ SI, AX
emit_literal_done_repeat_emit_encodeBlockAsm10B:
ADDL $0x05, CX
MOVL CX, SI
SUBL 16(SP), SI
MOVQ src_len+32(FP), R9
SUBL CX, R9
LEAQ (DX)(CX*1), R10
LEAQ (DX)(SI*1), SI
// matchLen
XORL R12, R12
CMPL R9, $0x08
JL matchlen_single_repeat_extend_encodeBlockAsm10B
matchlen_loopback_repeat_extend_encodeBlockAsm10B:
MOVQ (R10)(R12*1), R11
XORQ (SI)(R12*1), R11
TESTQ R11, R11
JZ matchlen_loop_repeat_extend_encodeBlockAsm10B
BSFQ R11, R11
SARQ $0x03, R11
LEAL (R12)(R11*1), R12
JMP repeat_extend_forward_end_encodeBlockAsm10B
matchlen_loop_repeat_extend_encodeBlockAsm10B:
LEAL -8(R9), R9
LEAL 8(R12), R12
CMPL R9, $0x08
JGE matchlen_loopback_repeat_extend_encodeBlockAsm10B
matchlen_single_repeat_extend_encodeBlockAsm10B:
TESTL R9, R9
JZ repeat_extend_forward_end_encodeBlockAsm10B
matchlen_single_loopback_repeat_extend_encodeBlockAsm10B:
MOVB (R10)(R12*1), R11
CMPB (SI)(R12*1), R11
JNE repeat_extend_forward_end_encodeBlockAsm10B
LEAL 1(R12), R12
DECL R9
JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm10B
repeat_extend_forward_end_encodeBlockAsm10B:
ADDL R12, CX
MOVL CX, SI
SUBL DI, SI
MOVL 16(SP), DI
TESTL R8, R8
JZ repeat_as_copy_encodeBlockAsm10B
// emitRepeat
MOVL SI, R8
LEAL -4(SI), SI
CMPL R8, $0x08
JLE repeat_two_match_repeat_encodeBlockAsm10B
CMPL R8, $0x0c
JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm10B
CMPL DI, $0x00000800
JLT repeat_two_offset_match_repeat_encodeBlockAsm10B
cant_repeat_two_offset_match_repeat_encodeBlockAsm10B:
CMPL SI, $0x00000104
JLT repeat_three_match_repeat_encodeBlockAsm10B
LEAL -256(SI), SI
MOVW $0x0019, (AX)
MOVW SI, 2(AX)
ADDQ $0x04, AX
JMP repeat_end_emit_encodeBlockAsm10B
repeat_three_match_repeat_encodeBlockAsm10B:
LEAL -4(SI), SI
MOVW $0x0015, (AX)
MOVB SI, 2(AX)
ADDQ $0x03, AX
JMP repeat_end_emit_encodeBlockAsm10B
repeat_two_match_repeat_encodeBlockAsm10B:
SHLL $0x02, SI
ORL $0x01, SI
MOVW SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm10B
repeat_two_offset_match_repeat_encodeBlockAsm10B:
XORQ R8, R8
LEAL 1(R8)(SI*4), SI
MOVB DI, 1(AX)
SARL $0x08, DI
SHLL $0x05, DI
ORL DI, SI
MOVB SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm10B
repeat_as_copy_encodeBlockAsm10B:
// emitCopy
two_byte_offset_repeat_as_copy_encodeBlockAsm10B:
CMPL SI, $0x40
JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B
MOVB $0xee, (AX)
MOVW DI, 1(AX)
LEAL -60(SI), SI
ADDQ $0x03, AX
// emitRepeat
MOVL SI, R8
LEAL -4(SI), SI
CMPL R8, $0x08
JLE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
CMPL R8, $0x0c
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
CMPL DI, $0x00000800
JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
CMPL SI, $0x00000104
JLT repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
LEAL -256(SI), SI
MOVW $0x0019, (AX)
MOVW SI, 2(AX)
ADDQ $0x04, AX
JMP repeat_end_emit_encodeBlockAsm10B
repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
LEAL -4(SI), SI
MOVW $0x0015, (AX)
MOVB SI, 2(AX)
ADDQ $0x03, AX
JMP repeat_end_emit_encodeBlockAsm10B
repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
SHLL $0x02, SI
ORL $0x01, SI
MOVW SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm10B
repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
XORQ R8, R8
LEAL 1(R8)(SI*4), SI
MOVB DI, 1(AX)
SARL $0x08, DI
SHLL $0x05, DI
ORL DI, SI
MOVB SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm10B
JMP two_byte_offset_repeat_as_copy_encodeBlockAsm10B
two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B:
CMPL SI, $0x0c
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm10B
CMPL DI, $0x00000800
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm10B
MOVB $0x01, BL
LEAL -16(BX)(SI*4), SI
MOVB DI, 1(AX)
SHRL $0x08, DI
SHLL $0x05, DI
ORL DI, SI
MOVB SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm10B
emit_copy_three_repeat_as_copy_encodeBlockAsm10B:
MOVB $0x02, BL
LEAL -4(BX)(SI*4), SI
MOVB SI, (AX)
MOVW DI, 1(AX)
ADDQ $0x03, AX
repeat_end_emit_encodeBlockAsm10B:
MOVL CX, 12(SP)
JMP search_loop_encodeBlockAsm10B
no_repeat_found_encodeBlockAsm10B:
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeBlockAsm10B
SHRQ $0x08, DI
MOVL 24(SP)(R10*4), SI
LEAL 2(CX), R9
CMPL (DX)(R8*1), DI
JEQ candidate2_match_encodeBlockAsm10B
MOVL R9, 24(SP)(R10*4)
SHRQ $0x08, DI
CMPL (DX)(SI*1), DI
JEQ candidate3_match_encodeBlockAsm10B
MOVL 20(SP), CX
JMP search_loop_encodeBlockAsm10B
candidate3_match_encodeBlockAsm10B:
ADDL $0x02, CX
JMP candidate_match_encodeBlockAsm10B
candidate2_match_encodeBlockAsm10B:
MOVL R9, 24(SP)(R10*4)
INCL CX
MOVL R8, SI
candidate_match_encodeBlockAsm10B:
MOVL 12(SP), DI
TESTL SI, SI
JZ match_extend_back_end_encodeBlockAsm10B
match_extend_back_loop_encodeBlockAsm10B:
CMPL CX, DI
JLE match_extend_back_end_encodeBlockAsm10B
MOVB -1(DX)(SI*1), BL
MOVB -1(DX)(CX*1), R8
CMPB BL, R8
JNE match_extend_back_end_encodeBlockAsm10B
LEAL -1(CX), CX
DECL SI
JZ match_extend_back_end_encodeBlockAsm10B
JMP match_extend_back_loop_encodeBlockAsm10B
match_extend_back_end_encodeBlockAsm10B:
MOVL CX, DI
SUBL 12(SP), DI
LEAQ 3(AX)(DI*1), DI
CMPQ DI, (SP)
JL match_dst_size_check_encodeBlockAsm10B
MOVQ $0x00000000, ret+48(FP)
RET
match_dst_size_check_encodeBlockAsm10B:
MOVL CX, DI
MOVL 12(SP), R8
CMPL R8, DI
JEQ emit_literal_done_match_emit_encodeBlockAsm10B
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(R8*1), DI
SUBL R8, R9
LEAL -1(R9), R8
CMPL R8, $0x3c
JLT one_byte_match_emit_encodeBlockAsm10B
CMPL R8, $0x00000100
JLT two_bytes_match_emit_encodeBlockAsm10B
MOVB $0xf4, (AX)
MOVW R8, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_match_emit_encodeBlockAsm10B
two_bytes_match_emit_encodeBlockAsm10B:
MOVB $0xf0, (AX)
MOVB R8, 1(AX)
ADDQ $0x02, AX
CMPL R8, $0x40
JL memmove_match_emit_encodeBlockAsm10B
JMP memmove_long_match_emit_encodeBlockAsm10B
one_byte_match_emit_encodeBlockAsm10B:
SHLB $0x02, R8
MOVB R8, (AX)
ADDQ $0x01, AX
memmove_match_emit_encodeBlockAsm10B:
LEAQ (AX)(R9*1), R8
// genMemMoveShort
CMPQ R9, $0x08
JLE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8
CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8:
MOVQ (DI), R10
MOVQ R10, (AX)
JMP memmove_end_copy_match_emit_encodeBlockAsm10B
emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16:
MOVQ (DI), R10
MOVQ -8(DI)(R9*1), DI
MOVQ R10, (AX)
MOVQ DI, -8(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBlockAsm10B
emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBlockAsm10B
emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R9*1), X2
MOVOU -16(DI)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_match_emit_encodeBlockAsm10B:
MOVQ R8, AX
JMP emit_literal_done_match_emit_encodeBlockAsm10B
memmove_long_match_emit_encodeBlockAsm10B:
LEAQ (AX)(R9*1), R8
// genMemMoveLong
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R9*1), X2
MOVOU -16(DI)(R9*1), X3
MOVQ R9, R11
SHRQ $0x05, R11
MOVQ AX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R12
SUBQ R10, R12
DECQ R11
JA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
LEAQ -32(DI)(R12*1), R10
LEAQ -32(AX)(R12*1), R13
emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R10
ADDQ $0x20, R12
DECQ R11
JNA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32:
MOVOU -32(DI)(R12*1), X4
MOVOU -16(DI)(R12*1), X5
MOVOA X4, -32(AX)(R12*1)
MOVOA X5, -16(AX)(R12*1)
ADDQ $0x20, R12
CMPQ R9, R12
JAE emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ R8, AX
emit_literal_done_match_emit_encodeBlockAsm10B:
match_nolit_loop_encodeBlockAsm10B:
MOVL CX, DI
SUBL SI, DI
MOVL DI, 16(SP)
ADDL $0x04, CX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL CX, DI
LEAQ (DX)(CX*1), R8
LEAQ (DX)(SI*1), SI
// matchLen
XORL R10, R10
CMPL DI, $0x08
JL matchlen_single_match_nolit_encodeBlockAsm10B
matchlen_loopback_match_nolit_encodeBlockAsm10B:
MOVQ (R8)(R10*1), R9
XORQ (SI)(R10*1), R9
TESTQ R9, R9
JZ matchlen_loop_match_nolit_encodeBlockAsm10B
BSFQ R9, R9
SARQ $0x03, R9
LEAL (R10)(R9*1), R10
JMP match_nolit_end_encodeBlockAsm10B
matchlen_loop_match_nolit_encodeBlockAsm10B:
LEAL -8(DI), DI
LEAL 8(R10), R10
CMPL DI, $0x08
JGE matchlen_loopback_match_nolit_encodeBlockAsm10B
matchlen_single_match_nolit_encodeBlockAsm10B:
TESTL DI, DI
JZ match_nolit_end_encodeBlockAsm10B
matchlen_single_loopback_match_nolit_encodeBlockAsm10B:
MOVB (R8)(R10*1), R9
CMPB (SI)(R10*1), R9
JNE match_nolit_end_encodeBlockAsm10B
LEAL 1(R10), R10
DECL DI
JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm10B
match_nolit_end_encodeBlockAsm10B:
ADDL R10, CX
MOVL 16(SP), SI
ADDL $0x04, R10
MOVL CX, 12(SP)
// emitCopy
two_byte_offset_match_nolit_encodeBlockAsm10B:
CMPL R10, $0x40
JLE two_byte_offset_short_match_nolit_encodeBlockAsm10B
MOVB $0xee, (AX)
MOVW SI, 1(AX)
LEAL -60(R10), R10
ADDQ $0x03, AX
// emitRepeat
MOVL R10, DI
LEAL -4(R10), R10
CMPL DI, $0x08
JLE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short
CMPL DI, $0x0c
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short
CMPL SI, $0x00000800
JLT repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short
cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short:
CMPL R10, $0x00000104
JLT repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short
LEAL -256(R10), R10
MOVW $0x0019, (AX)
MOVW R10, 2(AX)
ADDQ $0x04, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm10B
repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short:
LEAL -4(R10), R10
MOVW $0x0015, (AX)
MOVB R10, 2(AX)
ADDQ $0x03, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm10B
repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short:
SHLL $0x02, R10
ORL $0x01, R10
MOVW R10, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm10B
repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short:
XORQ DI, DI
LEAL 1(DI)(R10*4), R10
MOVB SI, 1(AX)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, R10
MOVB R10, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm10B
JMP two_byte_offset_match_nolit_encodeBlockAsm10B
two_byte_offset_short_match_nolit_encodeBlockAsm10B:
CMPL R10, $0x0c
JGE emit_copy_three_match_nolit_encodeBlockAsm10B
CMPL SI, $0x00000800
JGE emit_copy_three_match_nolit_encodeBlockAsm10B
MOVB $0x01, BL
LEAL -16(BX)(R10*4), R10
MOVB SI, 1(AX)
SHRL $0x08, SI
SHLL $0x05, SI
ORL SI, R10
MOVB R10, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm10B
emit_copy_three_match_nolit_encodeBlockAsm10B:
MOVB $0x02, BL
LEAL -4(BX)(R10*4), R10
MOVB R10, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
match_nolit_emitcopy_end_encodeBlockAsm10B:
CMPL CX, 8(SP)
JGE emit_remainder_encodeBlockAsm10B
MOVQ -2(DX)(CX*1), DI
CMPQ AX, (SP)
JL match_nolit_dst_ok_encodeBlockAsm10B
MOVQ $0x00000000, ret+48(FP)
RET
match_nolit_dst_ok_encodeBlockAsm10B:
MOVQ $0x9e3779b1, R9
MOVQ DI, R8
SHRQ $0x10, DI
MOVQ DI, SI
SHLQ $0x20, R8
IMULQ R9, R8
SHRQ $0x36, R8
SHLQ $0x20, SI
IMULQ R9, SI
SHRQ $0x36, SI
LEAL -2(CX), R9
LEAQ 24(SP)(SI*4), R10
MOVL (R10), SI
MOVL R9, 24(SP)(R8*4)
MOVL CX, (R10)
CMPL (DX)(SI*1), DI
JEQ match_nolit_loop_encodeBlockAsm10B
INCL CX
JMP search_loop_encodeBlockAsm10B
emit_remainder_encodeBlockAsm10B:
MOVQ src_len+32(FP), CX
SUBL 12(SP), CX
LEAQ 3(AX)(CX*1), CX
CMPQ CX, (SP)
JL emit_remainder_ok_encodeBlockAsm10B
MOVQ $0x00000000, ret+48(FP)
RET
emit_remainder_ok_encodeBlockAsm10B:
MOVQ src_len+32(FP), CX
MOVL 12(SP), BX
CMPL BX, CX
JEQ emit_literal_done_emit_remainder_encodeBlockAsm10B
MOVL CX, SI
MOVL CX, 12(SP)
LEAQ (DX)(BX*1), CX
SUBL BX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JLT one_byte_emit_remainder_encodeBlockAsm10B
CMPL DX, $0x00000100
JLT two_bytes_emit_remainder_encodeBlockAsm10B
MOVB $0xf4, (AX)
MOVW DX, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_emit_remainder_encodeBlockAsm10B
two_bytes_emit_remainder_encodeBlockAsm10B:
MOVB $0xf0, (AX)
MOVB DL, 1(AX)
ADDQ $0x02, AX
CMPL DX, $0x40
JL memmove_emit_remainder_encodeBlockAsm10B
JMP memmove_long_emit_remainder_encodeBlockAsm10B
one_byte_emit_remainder_encodeBlockAsm10B:
SHLB $0x02, DL
MOVB DL, (AX)
ADDQ $0x01, AX
memmove_emit_remainder_encodeBlockAsm10B:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
CMPQ BX, $0x08
JLE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8:
MOVQ (CX), SI
MOVQ SI, (AX)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B
emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16:
MOVQ (CX), SI
MOVQ -8(CX)(BX*1), CX
MOVQ SI, (AX)
MOVQ CX, -8(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B
emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32:
MOVOU (CX), X0
MOVOU -16(CX)(BX*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B
emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
memmove_end_copy_emit_remainder_encodeBlockAsm10B:
MOVQ DX, AX
JMP emit_literal_done_emit_remainder_encodeBlockAsm10B
memmove_long_emit_remainder_encodeBlockAsm10B:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
MOVQ AX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32
LEAQ -32(CX)(R8*1), SI
LEAQ -32(AX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32:
MOVOU -32(CX)(R8*1), X4
MOVOU -16(CX)(R8*1), X5
MOVOA X4, -32(AX)(R8*1)
MOVOA X5, -16(AX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
MOVQ DX, AX
emit_literal_done_emit_remainder_encodeBlockAsm10B:
MOVQ dst_base+0(FP), CX
SUBQ CX, AX
MOVQ AX, ret+48(FP)
RET
// func encodeBlockAsm8B(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeBlockAsm8B(SB), $1048-56
MOVQ dst_base+0(FP), AX
MOVQ $0x00000008, CX
LEAQ 24(SP), DX
PXOR X0, X0
zero_loop_encodeBlockAsm8B:
MOVOU X0, (DX)
MOVOU X0, 16(DX)
MOVOU X0, 32(DX)
MOVOU X0, 48(DX)
MOVOU X0, 64(DX)
MOVOU X0, 80(DX)
MOVOU X0, 96(DX)
MOVOU X0, 112(DX)
ADDQ $0x80, DX
DECQ CX
JNZ zero_loop_encodeBlockAsm8B
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), CX
LEAQ -9(CX), DX
LEAQ -8(CX), SI
MOVL SI, 8(SP)
SHRQ $0x05, CX
SUBL CX, DX
LEAQ (AX)(DX*1), DX
MOVQ DX, (SP)
MOVL $0x00000001, CX
MOVL CX, 16(SP)
MOVQ src_base+24(FP), DX
search_loop_encodeBlockAsm8B:
MOVL CX, SI
SUBL 12(SP), SI
SHRL $0x04, SI
LEAL 4(CX)(SI*1), SI
CMPL SI, 8(SP)
JGE emit_remainder_encodeBlockAsm8B
MOVQ (DX)(CX*1), DI
MOVL SI, 20(SP)
MOVQ $0x9e3779b1, R9
MOVQ DI, R10
MOVQ DI, R11
SHRQ $0x08, R11
SHLQ $0x20, R10
IMULQ R9, R10
SHRQ $0x38, R10
SHLQ $0x20, R11
IMULQ R9, R11
SHRQ $0x38, R11
MOVL 24(SP)(R10*4), SI
MOVL 24(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
LEAL 1(CX), R10
MOVL R10, 24(SP)(R11*4)
MOVQ DI, R10
SHRQ $0x10, R10
SHLQ $0x20, R10
IMULQ R9, R10
SHRQ $0x38, R10
MOVL CX, R9
SUBL 16(SP), R9
MOVL 1(DX)(R9*1), R11
MOVQ DI, R9
SHRQ $0x08, R9
CMPL R9, R11
JNE no_repeat_found_encodeBlockAsm8B
LEAL 1(CX), DI
MOVL 12(SP), R8
MOVL DI, SI
SUBL 16(SP), SI
JZ repeat_extend_back_end_encodeBlockAsm8B
repeat_extend_back_loop_encodeBlockAsm8B:
CMPL DI, R8
JLE repeat_extend_back_end_encodeBlockAsm8B
MOVB -1(DX)(SI*1), BL
MOVB -1(DX)(DI*1), R9
CMPB BL, R9
JNE repeat_extend_back_end_encodeBlockAsm8B
LEAL -1(DI), DI
DECL SI
JNZ repeat_extend_back_loop_encodeBlockAsm8B
repeat_extend_back_end_encodeBlockAsm8B:
MOVL 12(SP), SI
CMPL SI, DI
JEQ emit_literal_done_repeat_emit_encodeBlockAsm8B
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(SI*1), R10
SUBL SI, R9
LEAL -1(R9), SI
CMPL SI, $0x3c
JLT one_byte_repeat_emit_encodeBlockAsm8B
CMPL SI, $0x00000100
JLT two_bytes_repeat_emit_encodeBlockAsm8B
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_repeat_emit_encodeBlockAsm8B
two_bytes_repeat_emit_encodeBlockAsm8B:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, AX
CMPL SI, $0x40
JL memmove_repeat_emit_encodeBlockAsm8B
JMP memmove_long_repeat_emit_encodeBlockAsm8B
one_byte_repeat_emit_encodeBlockAsm8B:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, AX
memmove_repeat_emit_encodeBlockAsm8B:
LEAQ (AX)(R9*1), SI
// genMemMoveShort
CMPQ R9, $0x08
JLE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8
CMPQ R9, $0x10
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64
emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8:
MOVQ (R10), R11
MOVQ R11, (AX)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B
emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16:
MOVQ (R10), R11
MOVQ -8(R10)(R9*1), R10
MOVQ R11, (AX)
MOVQ R10, -8(AX)(R9*1)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B
emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32:
MOVOU (R10), X0
MOVOU -16(R10)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B
emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64:
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_repeat_emit_encodeBlockAsm8B:
MOVQ SI, AX
JMP emit_literal_done_repeat_emit_encodeBlockAsm8B
memmove_long_repeat_emit_encodeBlockAsm8B:
LEAQ (AX)(R9*1), SI
// genMemMoveLong
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVQ R9, R12
SHRQ $0x05, R12
MOVQ AX, R11
ANDL $0x0000001f, R11
MOVQ $0x00000040, R13
SUBQ R11, R13
DECQ R12
JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
LEAQ -32(R10)(R13*1), R11
LEAQ -32(AX)(R13*1), R14
emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back:
MOVOU (R11), X4
MOVOU 16(R11), X5
MOVOA X4, (R14)
MOVOA X5, 16(R14)
ADDQ $0x20, R14
ADDQ $0x20, R11
ADDQ $0x20, R13
DECQ R12
JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back
emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32:
MOVOU -32(R10)(R13*1), X4
MOVOU -16(R10)(R13*1), X5
MOVOA X4, -32(AX)(R13*1)
MOVOA X5, -16(AX)(R13*1)
ADDQ $0x20, R13
CMPQ R9, R13
JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ SI, AX
emit_literal_done_repeat_emit_encodeBlockAsm8B:
ADDL $0x05, CX
MOVL CX, SI
SUBL 16(SP), SI
MOVQ src_len+32(FP), R9
SUBL CX, R9
LEAQ (DX)(CX*1), R10
LEAQ (DX)(SI*1), SI
// matchLen
XORL R12, R12
CMPL R9, $0x08
JL matchlen_single_repeat_extend_encodeBlockAsm8B
matchlen_loopback_repeat_extend_encodeBlockAsm8B:
MOVQ (R10)(R12*1), R11
XORQ (SI)(R12*1), R11
TESTQ R11, R11
JZ matchlen_loop_repeat_extend_encodeBlockAsm8B
BSFQ R11, R11
SARQ $0x03, R11
LEAL (R12)(R11*1), R12
JMP repeat_extend_forward_end_encodeBlockAsm8B
matchlen_loop_repeat_extend_encodeBlockAsm8B:
LEAL -8(R9), R9
LEAL 8(R12), R12
CMPL R9, $0x08
JGE matchlen_loopback_repeat_extend_encodeBlockAsm8B
matchlen_single_repeat_extend_encodeBlockAsm8B:
TESTL R9, R9
JZ repeat_extend_forward_end_encodeBlockAsm8B
matchlen_single_loopback_repeat_extend_encodeBlockAsm8B:
MOVB (R10)(R12*1), R11
CMPB (SI)(R12*1), R11
JNE repeat_extend_forward_end_encodeBlockAsm8B
LEAL 1(R12), R12
DECL R9
JNZ matchlen_single_loopback_repeat_extend_encodeBlockAsm8B
repeat_extend_forward_end_encodeBlockAsm8B:
ADDL R12, CX
MOVL CX, SI
SUBL DI, SI
MOVL 16(SP), DI
TESTL R8, R8
JZ repeat_as_copy_encodeBlockAsm8B
// emitRepeat
MOVL SI, DI
LEAL -4(SI), SI
CMPL DI, $0x08
JLE repeat_two_match_repeat_encodeBlockAsm8B
CMPL DI, $0x0c
JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm8B
cant_repeat_two_offset_match_repeat_encodeBlockAsm8B:
CMPL SI, $0x00000104
JLT repeat_three_match_repeat_encodeBlockAsm8B
LEAL -256(SI), SI
MOVW $0x0019, (AX)
MOVW SI, 2(AX)
ADDQ $0x04, AX
JMP repeat_end_emit_encodeBlockAsm8B
repeat_three_match_repeat_encodeBlockAsm8B:
LEAL -4(SI), SI
MOVW $0x0015, (AX)
MOVB SI, 2(AX)
ADDQ $0x03, AX
JMP repeat_end_emit_encodeBlockAsm8B
repeat_two_match_repeat_encodeBlockAsm8B:
SHLL $0x02, SI
ORL $0x01, SI
MOVW SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm8B
XORQ R8, R8
LEAL 1(R8)(SI*4), SI
MOVB DI, 1(AX)
SARL $0x08, DI
SHLL $0x05, DI
ORL DI, SI
MOVB SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm8B
repeat_as_copy_encodeBlockAsm8B:
// emitCopy
two_byte_offset_repeat_as_copy_encodeBlockAsm8B:
CMPL SI, $0x40
JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B
MOVB $0xee, (AX)
MOVW DI, 1(AX)
LEAL -60(SI), SI
ADDQ $0x03, AX
// emitRepeat
MOVL SI, DI
LEAL -4(SI), SI
CMPL DI, $0x08
JLE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
CMPL DI, $0x0c
JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
CMPL SI, $0x00000104
JLT repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
LEAL -256(SI), SI
MOVW $0x0019, (AX)
MOVW SI, 2(AX)
ADDQ $0x04, AX
JMP repeat_end_emit_encodeBlockAsm8B
repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
LEAL -4(SI), SI
MOVW $0x0015, (AX)
MOVB SI, 2(AX)
ADDQ $0x03, AX
JMP repeat_end_emit_encodeBlockAsm8B
repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
SHLL $0x02, SI
ORL $0x01, SI
MOVW SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm8B
XORQ R8, R8
LEAL 1(R8)(SI*4), SI
MOVB DI, 1(AX)
SARL $0x08, DI
SHLL $0x05, DI
ORL DI, SI
MOVB SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm8B
JMP two_byte_offset_repeat_as_copy_encodeBlockAsm8B
two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B:
CMPL SI, $0x0c
JGE emit_copy_three_repeat_as_copy_encodeBlockAsm8B
MOVB $0x01, BL
LEAL -16(BX)(SI*4), SI
MOVB DI, 1(AX)
SHRL $0x08, DI
SHLL $0x05, DI
ORL DI, SI
MOVB SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeBlockAsm8B
emit_copy_three_repeat_as_copy_encodeBlockAsm8B:
MOVB $0x02, BL
LEAL -4(BX)(SI*4), SI
MOVB SI, (AX)
MOVW DI, 1(AX)
ADDQ $0x03, AX
repeat_end_emit_encodeBlockAsm8B:
MOVL CX, 12(SP)
JMP search_loop_encodeBlockAsm8B
no_repeat_found_encodeBlockAsm8B:
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeBlockAsm8B
SHRQ $0x08, DI
MOVL 24(SP)(R10*4), SI
LEAL 2(CX), R9
CMPL (DX)(R8*1), DI
JEQ candidate2_match_encodeBlockAsm8B
MOVL R9, 24(SP)(R10*4)
SHRQ $0x08, DI
CMPL (DX)(SI*1), DI
JEQ candidate3_match_encodeBlockAsm8B
MOVL 20(SP), CX
JMP search_loop_encodeBlockAsm8B
candidate3_match_encodeBlockAsm8B:
ADDL $0x02, CX
JMP candidate_match_encodeBlockAsm8B
candidate2_match_encodeBlockAsm8B:
MOVL R9, 24(SP)(R10*4)
INCL CX
MOVL R8, SI
candidate_match_encodeBlockAsm8B:
MOVL 12(SP), DI
TESTL SI, SI
JZ match_extend_back_end_encodeBlockAsm8B
match_extend_back_loop_encodeBlockAsm8B:
CMPL CX, DI
JLE match_extend_back_end_encodeBlockAsm8B
MOVB -1(DX)(SI*1), BL
MOVB -1(DX)(CX*1), R8
CMPB BL, R8
JNE match_extend_back_end_encodeBlockAsm8B
LEAL -1(CX), CX
DECL SI
JZ match_extend_back_end_encodeBlockAsm8B
JMP match_extend_back_loop_encodeBlockAsm8B
match_extend_back_end_encodeBlockAsm8B:
MOVL CX, DI
SUBL 12(SP), DI
LEAQ 3(AX)(DI*1), DI
CMPQ DI, (SP)
JL match_dst_size_check_encodeBlockAsm8B
MOVQ $0x00000000, ret+48(FP)
RET
match_dst_size_check_encodeBlockAsm8B:
MOVL CX, DI
MOVL 12(SP), R8
CMPL R8, DI
JEQ emit_literal_done_match_emit_encodeBlockAsm8B
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(R8*1), DI
SUBL R8, R9
LEAL -1(R9), R8
CMPL R8, $0x3c
JLT one_byte_match_emit_encodeBlockAsm8B
CMPL R8, $0x00000100
JLT two_bytes_match_emit_encodeBlockAsm8B
MOVB $0xf4, (AX)
MOVW R8, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_match_emit_encodeBlockAsm8B
two_bytes_match_emit_encodeBlockAsm8B:
MOVB $0xf0, (AX)
MOVB R8, 1(AX)
ADDQ $0x02, AX
CMPL R8, $0x40
JL memmove_match_emit_encodeBlockAsm8B
JMP memmove_long_match_emit_encodeBlockAsm8B
one_byte_match_emit_encodeBlockAsm8B:
SHLB $0x02, R8
MOVB R8, (AX)
ADDQ $0x01, AX
memmove_match_emit_encodeBlockAsm8B:
LEAQ (AX)(R9*1), R8
// genMemMoveShort
CMPQ R9, $0x08
JLE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8
CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8:
MOVQ (DI), R10
MOVQ R10, (AX)
JMP memmove_end_copy_match_emit_encodeBlockAsm8B
emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16:
MOVQ (DI), R10
MOVQ -8(DI)(R9*1), DI
MOVQ R10, (AX)
MOVQ DI, -8(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBlockAsm8B
emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBlockAsm8B
emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R9*1), X2
MOVOU -16(DI)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_match_emit_encodeBlockAsm8B:
MOVQ R8, AX
JMP emit_literal_done_match_emit_encodeBlockAsm8B
memmove_long_match_emit_encodeBlockAsm8B:
LEAQ (AX)(R9*1), R8
// genMemMoveLong
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R9*1), X2
MOVOU -16(DI)(R9*1), X3
MOVQ R9, R11
SHRQ $0x05, R11
MOVQ AX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R12
SUBQ R10, R12
DECQ R11
JA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
LEAQ -32(DI)(R12*1), R10
LEAQ -32(AX)(R12*1), R13
emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R10
ADDQ $0x20, R12
DECQ R11
JNA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32:
MOVOU -32(DI)(R12*1), X4
MOVOU -16(DI)(R12*1), X5
MOVOA X4, -32(AX)(R12*1)
MOVOA X5, -16(AX)(R12*1)
ADDQ $0x20, R12
CMPQ R9, R12
JAE emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ R8, AX
emit_literal_done_match_emit_encodeBlockAsm8B:
match_nolit_loop_encodeBlockAsm8B:
MOVL CX, DI
SUBL SI, DI
MOVL DI, 16(SP)
ADDL $0x04, CX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL CX, DI
LEAQ (DX)(CX*1), R8
LEAQ (DX)(SI*1), SI
// matchLen
XORL R10, R10
CMPL DI, $0x08
JL matchlen_single_match_nolit_encodeBlockAsm8B
matchlen_loopback_match_nolit_encodeBlockAsm8B:
MOVQ (R8)(R10*1), R9
XORQ (SI)(R10*1), R9
TESTQ R9, R9
JZ matchlen_loop_match_nolit_encodeBlockAsm8B
BSFQ R9, R9
SARQ $0x03, R9
LEAL (R10)(R9*1), R10
JMP match_nolit_end_encodeBlockAsm8B
matchlen_loop_match_nolit_encodeBlockAsm8B:
LEAL -8(DI), DI
LEAL 8(R10), R10
CMPL DI, $0x08
JGE matchlen_loopback_match_nolit_encodeBlockAsm8B
matchlen_single_match_nolit_encodeBlockAsm8B:
TESTL DI, DI
JZ match_nolit_end_encodeBlockAsm8B
matchlen_single_loopback_match_nolit_encodeBlockAsm8B:
MOVB (R8)(R10*1), R9
CMPB (SI)(R10*1), R9
JNE match_nolit_end_encodeBlockAsm8B
LEAL 1(R10), R10
DECL DI
JNZ matchlen_single_loopback_match_nolit_encodeBlockAsm8B
match_nolit_end_encodeBlockAsm8B:
ADDL R10, CX
MOVL 16(SP), SI
ADDL $0x04, R10
MOVL CX, 12(SP)
// emitCopy
two_byte_offset_match_nolit_encodeBlockAsm8B:
CMPL R10, $0x40
JLE two_byte_offset_short_match_nolit_encodeBlockAsm8B
MOVB $0xee, (AX)
MOVW SI, 1(AX)
LEAL -60(R10), R10
ADDQ $0x03, AX
// emitRepeat
MOVL R10, SI
LEAL -4(R10), R10
CMPL SI, $0x08
JLE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short
CMPL SI, $0x0c
JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short
cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short:
CMPL R10, $0x00000104
JLT repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short
LEAL -256(R10), R10
MOVW $0x0019, (AX)
MOVW R10, 2(AX)
ADDQ $0x04, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm8B
repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short:
LEAL -4(R10), R10
MOVW $0x0015, (AX)
MOVB R10, 2(AX)
ADDQ $0x03, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm8B
repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short:
SHLL $0x02, R10
ORL $0x01, R10
MOVW R10, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm8B
XORQ DI, DI
LEAL 1(DI)(R10*4), R10
MOVB SI, 1(AX)
SARL $0x08, SI
SHLL $0x05, SI
ORL SI, R10
MOVB R10, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm8B
JMP two_byte_offset_match_nolit_encodeBlockAsm8B
two_byte_offset_short_match_nolit_encodeBlockAsm8B:
CMPL R10, $0x0c
JGE emit_copy_three_match_nolit_encodeBlockAsm8B
MOVB $0x01, BL
LEAL -16(BX)(R10*4), R10
MOVB SI, 1(AX)
SHRL $0x08, SI
SHLL $0x05, SI
ORL SI, R10
MOVB R10, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBlockAsm8B
emit_copy_three_match_nolit_encodeBlockAsm8B:
MOVB $0x02, BL
LEAL -4(BX)(R10*4), R10
MOVB R10, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
match_nolit_emitcopy_end_encodeBlockAsm8B:
CMPL CX, 8(SP)
JGE emit_remainder_encodeBlockAsm8B
MOVQ -2(DX)(CX*1), DI
CMPQ AX, (SP)
JL match_nolit_dst_ok_encodeBlockAsm8B
MOVQ $0x00000000, ret+48(FP)
RET
match_nolit_dst_ok_encodeBlockAsm8B:
MOVQ $0x9e3779b1, R9
MOVQ DI, R8
SHRQ $0x10, DI
MOVQ DI, SI
SHLQ $0x20, R8
IMULQ R9, R8
SHRQ $0x38, R8
SHLQ $0x20, SI
IMULQ R9, SI
SHRQ $0x38, SI
LEAL -2(CX), R9
LEAQ 24(SP)(SI*4), R10
MOVL (R10), SI
MOVL R9, 24(SP)(R8*4)
MOVL CX, (R10)
CMPL (DX)(SI*1), DI
JEQ match_nolit_loop_encodeBlockAsm8B
INCL CX
JMP search_loop_encodeBlockAsm8B
emit_remainder_encodeBlockAsm8B:
MOVQ src_len+32(FP), CX
SUBL 12(SP), CX
LEAQ 3(AX)(CX*1), CX
CMPQ CX, (SP)
JL emit_remainder_ok_encodeBlockAsm8B
MOVQ $0x00000000, ret+48(FP)
RET
emit_remainder_ok_encodeBlockAsm8B:
MOVQ src_len+32(FP), CX
MOVL 12(SP), BX
CMPL BX, CX
JEQ emit_literal_done_emit_remainder_encodeBlockAsm8B
MOVL CX, SI
MOVL CX, 12(SP)
LEAQ (DX)(BX*1), CX
SUBL BX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JLT one_byte_emit_remainder_encodeBlockAsm8B
CMPL DX, $0x00000100
JLT two_bytes_emit_remainder_encodeBlockAsm8B
MOVB $0xf4, (AX)
MOVW DX, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_emit_remainder_encodeBlockAsm8B
two_bytes_emit_remainder_encodeBlockAsm8B:
MOVB $0xf0, (AX)
MOVB DL, 1(AX)
ADDQ $0x02, AX
CMPL DX, $0x40
JL memmove_emit_remainder_encodeBlockAsm8B
JMP memmove_long_emit_remainder_encodeBlockAsm8B
one_byte_emit_remainder_encodeBlockAsm8B:
SHLB $0x02, DL
MOVB DL, (AX)
ADDQ $0x01, AX
memmove_emit_remainder_encodeBlockAsm8B:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
CMPQ BX, $0x08
JLE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8:
MOVQ (CX), SI
MOVQ SI, (AX)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B
emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16:
MOVQ (CX), SI
MOVQ -8(CX)(BX*1), CX
MOVQ SI, (AX)
MOVQ CX, -8(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B
emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32:
MOVOU (CX), X0
MOVOU -16(CX)(BX*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B
emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
memmove_end_copy_emit_remainder_encodeBlockAsm8B:
MOVQ DX, AX
JMP emit_literal_done_emit_remainder_encodeBlockAsm8B
memmove_long_emit_remainder_encodeBlockAsm8B:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
MOVQ AX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32
LEAQ -32(CX)(R8*1), SI
LEAQ -32(AX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32:
MOVOU -32(CX)(R8*1), X4
MOVOU -16(CX)(R8*1), X5
MOVOA X4, -32(AX)(R8*1)
MOVOA X5, -16(AX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
MOVQ DX, AX
emit_literal_done_emit_remainder_encodeBlockAsm8B:
MOVQ dst_base+0(FP), CX
SUBQ CX, AX
MOVQ AX, ret+48(FP)
RET
// func encodeBetterBlockAsm(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeBetterBlockAsm(SB), $327704-56
MOVQ dst_base+0(FP), AX
MOVQ $0x00000a00, CX
LEAQ 24(SP), DX
PXOR X0, X0
zero_loop_encodeBetterBlockAsm:
MOVOU X0, (DX)
MOVOU X0, 16(DX)
MOVOU X0, 32(DX)
MOVOU X0, 48(DX)
MOVOU X0, 64(DX)
MOVOU X0, 80(DX)
MOVOU X0, 96(DX)
MOVOU X0, 112(DX)
ADDQ $0x80, DX
DECQ CX
JNZ zero_loop_encodeBetterBlockAsm
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), CX
LEAQ -6(CX), DX
LEAQ -8(CX), SI
MOVL SI, 8(SP)
SHRQ $0x05, CX
SUBL CX, DX
LEAQ (AX)(DX*1), DX
MOVQ DX, (SP)
MOVL $0x00000001, CX
MOVL $0x00000000, 16(SP)
MOVQ src_base+24(FP), DX
search_loop_encodeBetterBlockAsm:
MOVL CX, SI
SUBL 12(SP), SI
SHRL $0x07, SI
CMPL SI, $0x63
JLE check_maxskip_ok_encodeBetterBlockAsm
LEAL 100(CX), SI
JMP check_maxskip_cont_encodeBetterBlockAsm
check_maxskip_ok_encodeBetterBlockAsm:
LEAL 1(CX)(SI*1), SI
check_maxskip_cont_encodeBetterBlockAsm:
CMPL SI, 8(SP)
JGE emit_remainder_encodeBetterBlockAsm
MOVQ (DX)(CX*1), DI
MOVL SI, 20(SP)
MOVQ $0x00cf1bbcdcbfa563, R9
MOVQ $0x9e3779b1, SI
MOVQ DI, R10
MOVQ DI, R11
SHLQ $0x08, R10
IMULQ R9, R10
SHRQ $0x30, R10
SHLQ $0x20, R11
IMULQ SI, R11
SHRQ $0x32, R11
MOVL 24(SP)(R10*4), SI
MOVL 262168(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
MOVL CX, 262168(SP)(R11*4)
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeBetterBlockAsm
CMPL (DX)(R8*1), DI
JEQ candidateS_match_encodeBetterBlockAsm
MOVL 20(SP), CX
JMP search_loop_encodeBetterBlockAsm
candidateS_match_encodeBetterBlockAsm:
SHRQ $0x08, DI
MOVQ DI, R10
SHLQ $0x08, R10
IMULQ R9, R10
SHRQ $0x30, R10
MOVL 24(SP)(R10*4), SI
INCL CX
MOVL CX, 24(SP)(R10*4)
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeBetterBlockAsm
DECL CX
MOVL R8, SI
candidate_match_encodeBetterBlockAsm:
MOVL 12(SP), DI
TESTL SI, SI
JZ match_extend_back_end_encodeBetterBlockAsm
match_extend_back_loop_encodeBetterBlockAsm:
CMPL CX, DI
JLE match_extend_back_end_encodeBetterBlockAsm
MOVB -1(DX)(SI*1), BL
MOVB -1(DX)(CX*1), R8
CMPB BL, R8
JNE match_extend_back_end_encodeBetterBlockAsm
LEAL -1(CX), CX
DECL SI
JZ match_extend_back_end_encodeBetterBlockAsm
JMP match_extend_back_loop_encodeBetterBlockAsm
match_extend_back_end_encodeBetterBlockAsm:
MOVL CX, DI
SUBL 12(SP), DI
LEAQ 5(AX)(DI*1), DI
CMPQ DI, (SP)
JL match_dst_size_check_encodeBetterBlockAsm
MOVQ $0x00000000, ret+48(FP)
RET
match_dst_size_check_encodeBetterBlockAsm:
MOVL CX, DI
ADDL $0x04, CX
ADDL $0x04, SI
MOVQ src_len+32(FP), R8
SUBL CX, R8
LEAQ (DX)(CX*1), R9
LEAQ (DX)(SI*1), R10
// matchLen
XORL R12, R12
CMPL R8, $0x08
JL matchlen_single_match_nolit_encodeBetterBlockAsm
matchlen_loopback_match_nolit_encodeBetterBlockAsm:
MOVQ (R9)(R12*1), R11
XORQ (R10)(R12*1), R11
TESTQ R11, R11
JZ matchlen_loop_match_nolit_encodeBetterBlockAsm
BSFQ R11, R11
SARQ $0x03, R11
LEAL (R12)(R11*1), R12
JMP match_nolit_end_encodeBetterBlockAsm
matchlen_loop_match_nolit_encodeBetterBlockAsm:
LEAL -8(R8), R8
LEAL 8(R12), R12
CMPL R8, $0x08
JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm
matchlen_single_match_nolit_encodeBetterBlockAsm:
TESTL R8, R8
JZ match_nolit_end_encodeBetterBlockAsm
matchlen_single_loopback_match_nolit_encodeBetterBlockAsm:
MOVB (R9)(R12*1), R11
CMPB (R10)(R12*1), R11
JNE match_nolit_end_encodeBetterBlockAsm
LEAL 1(R12), R12
DECL R8
JNZ matchlen_single_loopback_match_nolit_encodeBetterBlockAsm
match_nolit_end_encodeBetterBlockAsm:
MOVL CX, R8
SUBL SI, R8
// Check if repeat
CMPL 16(SP), R8
JEQ match_is_repeat_encodeBetterBlockAsm
CMPL R12, $0x01
JG match_length_ok_encodeBetterBlockAsm
CMPL R8, $0x0000ffff
JLE match_length_ok_encodeBetterBlockAsm
MOVL 20(SP), CX
INCL CX
JMP search_loop_encodeBetterBlockAsm
match_length_ok_encodeBetterBlockAsm:
MOVL R8, 16(SP)
MOVL 12(SP), SI
CMPL SI, DI
JEQ emit_literal_done_match_emit_encodeBetterBlockAsm
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(SI*1), R10
SUBL SI, R9
LEAL -1(R9), SI
CMPL SI, $0x3c
JLT one_byte_match_emit_encodeBetterBlockAsm
CMPL SI, $0x00000100
JLT two_bytes_match_emit_encodeBetterBlockAsm
CMPL SI, $0x00010000
JLT three_bytes_match_emit_encodeBetterBlockAsm
CMPL SI, $0x01000000
JLT four_bytes_match_emit_encodeBetterBlockAsm
MOVB $0xfc, (AX)
MOVL SI, 1(AX)
ADDQ $0x05, AX
JMP memmove_long_match_emit_encodeBetterBlockAsm
four_bytes_match_emit_encodeBetterBlockAsm:
MOVL SI, R11
SHRL $0x10, R11
MOVB $0xf8, (AX)
MOVW SI, 1(AX)
MOVB R11, 3(AX)
ADDQ $0x04, AX
JMP memmove_long_match_emit_encodeBetterBlockAsm
three_bytes_match_emit_encodeBetterBlockAsm:
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_match_emit_encodeBetterBlockAsm
two_bytes_match_emit_encodeBetterBlockAsm:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, AX
CMPL SI, $0x40
JL memmove_match_emit_encodeBetterBlockAsm
JMP memmove_long_match_emit_encodeBetterBlockAsm
one_byte_match_emit_encodeBetterBlockAsm:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, AX
memmove_match_emit_encodeBetterBlockAsm:
LEAQ (AX)(R9*1), SI
// genMemMoveShort
CMPQ R9, $0x04
JLE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4
CMPQ R9, $0x08
JB emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7
CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4:
MOVL (R10), R11
MOVL R11, (AX)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm
emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7:
MOVL (R10), R11
MOVL -4(R10)(R9*1), R10
MOVL R11, (AX)
MOVL R10, -4(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm
emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16:
MOVQ (R10), R11
MOVQ -8(R10)(R9*1), R10
MOVQ R11, (AX)
MOVQ R10, -8(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm
emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32:
MOVOU (R10), X0
MOVOU -16(R10)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm
emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64:
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_match_emit_encodeBetterBlockAsm:
MOVQ SI, AX
JMP emit_literal_done_match_emit_encodeBetterBlockAsm
memmove_long_match_emit_encodeBetterBlockAsm:
LEAQ (AX)(R9*1), SI
// genMemMoveLong
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVQ R9, R13
SHRQ $0x05, R13
MOVQ AX, R11
ANDL $0x0000001f, R11
MOVQ $0x00000040, R14
SUBQ R11, R14
DECQ R13
JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32
LEAQ -32(R10)(R14*1), R11
LEAQ -32(AX)(R14*1), R15
emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back:
MOVOU (R11), X4
MOVOU 16(R11), X5
MOVOA X4, (R15)
MOVOA X5, 16(R15)
ADDQ $0x20, R15
ADDQ $0x20, R11
ADDQ $0x20, R14
DECQ R13
JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32:
MOVOU -32(R10)(R14*1), X4
MOVOU -16(R10)(R14*1), X5
MOVOA X4, -32(AX)(R14*1)
MOVOA X5, -16(AX)(R14*1)
ADDQ $0x20, R14
CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ SI, AX
emit_literal_done_match_emit_encodeBetterBlockAsm:
ADDL R12, CX
ADDL $0x04, R12
MOVL CX, 12(SP)
// emitCopy
CMPL R8, $0x00010000
JL two_byte_offset_match_nolit_encodeBetterBlockAsm
four_bytes_loop_back_match_nolit_encodeBetterBlockAsm:
CMPL R12, $0x40
JLE four_bytes_remain_match_nolit_encodeBetterBlockAsm
MOVB $0xff, (AX)
MOVL R8, 1(AX)
LEAL -64(R12), R12
ADDQ $0x05, AX
CMPL R12, $0x04
JL four_bytes_remain_match_nolit_encodeBetterBlockAsm
// emitRepeat
emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy:
MOVL R12, SI
LEAL -4(R12), R12
CMPL SI, $0x08
JLE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy
CMPL SI, $0x0c
JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy
CMPL R8, $0x00000800
JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy
cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy:
CMPL R12, $0x00000104
JLT repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy
CMPL R12, $0x00010100
JLT repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy
CMPL R12, $0x0100ffff
JLT repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy
LEAL -16842747(R12), R12
MOVW $0x001d, (AX)
MOVW $0xfffb, 2(AX)
MOVB $0xff, 4(AX)
ADDQ $0x05, AX
JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy
repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy:
LEAL -65536(R12), R12
MOVL R12, R8
MOVW $0x001d, (AX)
MOVW R12, 2(AX)
SARL $0x10, R8
MOVB R8, 4(AX)
ADDQ $0x05, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy:
LEAL -256(R12), R12
MOVW $0x0019, (AX)
MOVW R12, 2(AX)
ADDQ $0x04, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy:
LEAL -4(R12), R12
MOVW $0x0015, (AX)
MOVB R12, 2(AX)
ADDQ $0x03, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy:
SHLL $0x02, R12
ORL $0x01, R12
MOVW R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy:
XORQ SI, SI
LEAL 1(SI)(R12*4), R12
MOVB R8, 1(AX)
SARL $0x08, R8
SHLL $0x05, R8
ORL R8, R12
MOVB R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
JMP four_bytes_loop_back_match_nolit_encodeBetterBlockAsm
four_bytes_remain_match_nolit_encodeBetterBlockAsm:
TESTL R12, R12
JZ match_nolit_emitcopy_end_encodeBetterBlockAsm
MOVB $0x03, BL
LEAL -4(BX)(R12*4), R12
MOVB R12, (AX)
MOVL R8, 1(AX)
ADDQ $0x05, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
two_byte_offset_match_nolit_encodeBetterBlockAsm:
CMPL R12, $0x40
JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm
MOVB $0xee, (AX)
MOVW R8, 1(AX)
LEAL -60(R12), R12
ADDQ $0x03, AX
// emitRepeat
emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short:
MOVL R12, SI
LEAL -4(R12), R12
CMPL SI, $0x08
JLE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short
CMPL SI, $0x0c
JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short
CMPL R8, $0x00000800
JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short
cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short:
CMPL R12, $0x00000104
JLT repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short
CMPL R12, $0x00010100
JLT repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short
CMPL R12, $0x0100ffff
JLT repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short
LEAL -16842747(R12), R12
MOVW $0x001d, (AX)
MOVW $0xfffb, 2(AX)
MOVB $0xff, 4(AX)
ADDQ $0x05, AX
JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short
repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short:
LEAL -65536(R12), R12
MOVL R12, R8
MOVW $0x001d, (AX)
MOVW R12, 2(AX)
SARL $0x10, R8
MOVB R8, 4(AX)
ADDQ $0x05, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short:
LEAL -256(R12), R12
MOVW $0x0019, (AX)
MOVW R12, 2(AX)
ADDQ $0x04, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short:
LEAL -4(R12), R12
MOVW $0x0015, (AX)
MOVB R12, 2(AX)
ADDQ $0x03, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short:
SHLL $0x02, R12
ORL $0x01, R12
MOVW R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short:
XORQ SI, SI
LEAL 1(SI)(R12*4), R12
MOVB R8, 1(AX)
SARL $0x08, R8
SHLL $0x05, R8
ORL R8, R12
MOVB R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
JMP two_byte_offset_match_nolit_encodeBetterBlockAsm
two_byte_offset_short_match_nolit_encodeBetterBlockAsm:
CMPL R12, $0x0c
JGE emit_copy_three_match_nolit_encodeBetterBlockAsm
CMPL R8, $0x00000800
JGE emit_copy_three_match_nolit_encodeBetterBlockAsm
MOVB $0x01, BL
LEAL -16(BX)(R12*4), R12
MOVB R8, 1(AX)
SHRL $0x08, R8
SHLL $0x05, R8
ORL R8, R12
MOVB R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
emit_copy_three_match_nolit_encodeBetterBlockAsm:
MOVB $0x02, BL
LEAL -4(BX)(R12*4), R12
MOVB R12, (AX)
MOVW R8, 1(AX)
ADDQ $0x03, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
match_is_repeat_encodeBetterBlockAsm:
MOVL 12(SP), SI
CMPL SI, DI
JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(SI*1), R10
SUBL SI, R9
LEAL -1(R9), SI
CMPL SI, $0x3c
JLT one_byte_match_emit_repeat_encodeBetterBlockAsm
CMPL SI, $0x00000100
JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm
CMPL SI, $0x00010000
JLT three_bytes_match_emit_repeat_encodeBetterBlockAsm
CMPL SI, $0x01000000
JLT four_bytes_match_emit_repeat_encodeBetterBlockAsm
MOVB $0xfc, (AX)
MOVL SI, 1(AX)
ADDQ $0x05, AX
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm
four_bytes_match_emit_repeat_encodeBetterBlockAsm:
MOVL SI, R11
SHRL $0x10, R11
MOVB $0xf8, (AX)
MOVW SI, 1(AX)
MOVB R11, 3(AX)
ADDQ $0x04, AX
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm
three_bytes_match_emit_repeat_encodeBetterBlockAsm:
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm
two_bytes_match_emit_repeat_encodeBetterBlockAsm:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, AX
CMPL SI, $0x40
JL memmove_match_emit_repeat_encodeBetterBlockAsm
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm
one_byte_match_emit_repeat_encodeBetterBlockAsm:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, AX
memmove_match_emit_repeat_encodeBetterBlockAsm:
LEAQ (AX)(R9*1), SI
// genMemMoveShort
CMPQ R9, $0x04
JLE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4
CMPQ R9, $0x08
JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7
CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4:
MOVL (R10), R11
MOVL R11, (AX)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7:
MOVL (R10), R11
MOVL -4(R10)(R9*1), R10
MOVL R11, (AX)
MOVL R10, -4(AX)(R9*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16:
MOVQ (R10), R11
MOVQ -8(R10)(R9*1), R10
MOVQ R11, (AX)
MOVQ R10, -8(AX)(R9*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32:
MOVOU (R10), X0
MOVOU -16(R10)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64:
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm:
MOVQ SI, AX
JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm
memmove_long_match_emit_repeat_encodeBetterBlockAsm:
LEAQ (AX)(R9*1), SI
// genMemMoveLong
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVQ R9, R13
SHRQ $0x05, R13
MOVQ AX, R11
ANDL $0x0000001f, R11
MOVQ $0x00000040, R14
SUBQ R11, R14
DECQ R13
JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32
LEAQ -32(R10)(R14*1), R11
LEAQ -32(AX)(R14*1), R15
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back:
MOVOU (R11), X4
MOVOU 16(R11), X5
MOVOA X4, (R15)
MOVOA X5, 16(R15)
ADDQ $0x20, R15
ADDQ $0x20, R11
ADDQ $0x20, R14
DECQ R13
JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32:
MOVOU -32(R10)(R14*1), X4
MOVOU -16(R10)(R14*1), X5
MOVOA X4, -32(AX)(R14*1)
MOVOA X5, -16(AX)(R14*1)
ADDQ $0x20, R14
CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ SI, AX
emit_literal_done_match_emit_repeat_encodeBetterBlockAsm:
ADDL R12, CX
ADDL $0x04, R12
MOVL CX, 12(SP)
// emitRepeat
emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm:
MOVL R12, SI
LEAL -4(R12), R12
CMPL SI, $0x08
JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm
CMPL SI, $0x0c
JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm
CMPL R8, $0x00000800
JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm
cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm:
CMPL R12, $0x00000104
JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm
CMPL R12, $0x00010100
JLT repeat_four_match_nolit_repeat_encodeBetterBlockAsm
CMPL R12, $0x0100ffff
JLT repeat_five_match_nolit_repeat_encodeBetterBlockAsm
LEAL -16842747(R12), R12
MOVW $0x001d, (AX)
MOVW $0xfffb, 2(AX)
MOVB $0xff, 4(AX)
ADDQ $0x05, AX
JMP emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm
repeat_five_match_nolit_repeat_encodeBetterBlockAsm:
LEAL -65536(R12), R12
MOVL R12, R8
MOVW $0x001d, (AX)
MOVW R12, 2(AX)
SARL $0x10, R8
MOVB R8, 4(AX)
ADDQ $0x05, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_four_match_nolit_repeat_encodeBetterBlockAsm:
LEAL -256(R12), R12
MOVW $0x0019, (AX)
MOVW R12, 2(AX)
ADDQ $0x04, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_three_match_nolit_repeat_encodeBetterBlockAsm:
LEAL -4(R12), R12
MOVW $0x0015, (AX)
MOVB R12, 2(AX)
ADDQ $0x03, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_two_match_nolit_repeat_encodeBetterBlockAsm:
SHLL $0x02, R12
ORL $0x01, R12
MOVW R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm:
XORQ SI, SI
LEAL 1(SI)(R12*4), R12
MOVB R8, 1(AX)
SARL $0x08, R8
SHLL $0x05, R8
ORL R8, R12
MOVB R12, (AX)
ADDQ $0x02, AX
match_nolit_emitcopy_end_encodeBetterBlockAsm:
CMPL CX, 8(SP)
JGE emit_remainder_encodeBetterBlockAsm
CMPQ AX, (SP)
JL match_nolit_dst_ok_encodeBetterBlockAsm
MOVQ $0x00000000, ret+48(FP)
RET
match_nolit_dst_ok_encodeBetterBlockAsm:
MOVQ $0x00cf1bbcdcbfa563, SI
MOVQ $0x9e3779b1, R8
INCL DI
MOVQ (DX)(DI*1), R9
MOVQ R9, R10
MOVQ R9, R11
MOVQ R9, R12
SHRQ $0x08, R11
MOVQ R11, R13
SHRQ $0x10, R12
LEAL 1(DI), R14
LEAL 2(DI), R15
MOVQ -2(DX)(CX*1), R9
SHLQ $0x08, R10
IMULQ SI, R10
SHRQ $0x30, R10
SHLQ $0x08, R13
IMULQ SI, R13
SHRQ $0x30, R13
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x32, R11
SHLQ $0x20, R12
IMULQ R8, R12
SHRQ $0x32, R12
MOVL DI, 24(SP)(R10*4)
MOVL R14, 24(SP)(R13*4)
MOVL R14, 262168(SP)(R11*4)
MOVL R15, 262168(SP)(R12*4)
MOVQ R9, R10
MOVQ R9, R11
SHRQ $0x08, R11
MOVQ R11, R13
LEAL -2(CX), R9
LEAL -1(CX), DI
SHLQ $0x08, R10
IMULQ SI, R10
SHRQ $0x30, R10
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x32, R11
SHLQ $0x08, R13
IMULQ SI, R13
SHRQ $0x30, R13
MOVL R9, 24(SP)(R10*4)
MOVL DI, 262168(SP)(R11*4)
MOVL DI, 24(SP)(R13*4)
JMP search_loop_encodeBetterBlockAsm
emit_remainder_encodeBetterBlockAsm:
MOVQ src_len+32(FP), CX
SUBL 12(SP), CX
LEAQ 5(AX)(CX*1), CX
CMPQ CX, (SP)
JL emit_remainder_ok_encodeBetterBlockAsm
MOVQ $0x00000000, ret+48(FP)
RET
emit_remainder_ok_encodeBetterBlockAsm:
MOVQ src_len+32(FP), CX
MOVL 12(SP), BX
CMPL BX, CX
JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm
MOVL CX, SI
MOVL CX, 12(SP)
LEAQ (DX)(BX*1), CX
SUBL BX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JLT one_byte_emit_remainder_encodeBetterBlockAsm
CMPL DX, $0x00000100
JLT two_bytes_emit_remainder_encodeBetterBlockAsm
CMPL DX, $0x00010000
JLT three_bytes_emit_remainder_encodeBetterBlockAsm
CMPL DX, $0x01000000
JLT four_bytes_emit_remainder_encodeBetterBlockAsm
MOVB $0xfc, (AX)
MOVL DX, 1(AX)
ADDQ $0x05, AX
JMP memmove_long_emit_remainder_encodeBetterBlockAsm
four_bytes_emit_remainder_encodeBetterBlockAsm:
MOVL DX, BX
SHRL $0x10, BX
MOVB $0xf8, (AX)
MOVW DX, 1(AX)
MOVB BL, 3(AX)
ADDQ $0x04, AX
JMP memmove_long_emit_remainder_encodeBetterBlockAsm
three_bytes_emit_remainder_encodeBetterBlockAsm:
MOVB $0xf4, (AX)
MOVW DX, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_emit_remainder_encodeBetterBlockAsm
two_bytes_emit_remainder_encodeBetterBlockAsm:
MOVB $0xf0, (AX)
MOVB DL, 1(AX)
ADDQ $0x02, AX
CMPL DX, $0x40
JL memmove_emit_remainder_encodeBetterBlockAsm
JMP memmove_long_emit_remainder_encodeBetterBlockAsm
one_byte_emit_remainder_encodeBetterBlockAsm:
SHLB $0x02, DL
MOVB DL, (AX)
ADDQ $0x01, AX
memmove_emit_remainder_encodeBetterBlockAsm:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
CMPQ BX, $0x04
JLE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4
CMPQ BX, $0x08
JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4:
MOVL (CX), SI
MOVL SI, (AX)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7:
MOVL (CX), SI
MOVL -4(CX)(BX*1), CX
MOVL SI, (AX)
MOVL CX, -4(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16:
MOVQ (CX), SI
MOVQ -8(CX)(BX*1), CX
MOVQ SI, (AX)
MOVQ CX, -8(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32:
MOVOU (CX), X0
MOVOU -16(CX)(BX*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
memmove_end_copy_emit_remainder_encodeBetterBlockAsm:
MOVQ DX, AX
JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm
memmove_long_emit_remainder_encodeBetterBlockAsm:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
MOVQ AX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32
LEAQ -32(CX)(R8*1), SI
LEAQ -32(AX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32:
MOVOU -32(CX)(R8*1), X4
MOVOU -16(CX)(R8*1), X5
MOVOA X4, -32(AX)(R8*1)
MOVOA X5, -16(AX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
MOVQ DX, AX
emit_literal_done_emit_remainder_encodeBetterBlockAsm:
MOVQ dst_base+0(FP), CX
SUBQ CX, AX
MOVQ AX, ret+48(FP)
RET
// func encodeBetterBlockAsm4MB(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeBetterBlockAsm4MB(SB), $327704-56
MOVQ dst_base+0(FP), AX
MOVQ $0x00000a00, CX
LEAQ 24(SP), DX
PXOR X0, X0
zero_loop_encodeBetterBlockAsm4MB:
MOVOU X0, (DX)
MOVOU X0, 16(DX)
MOVOU X0, 32(DX)
MOVOU X0, 48(DX)
MOVOU X0, 64(DX)
MOVOU X0, 80(DX)
MOVOU X0, 96(DX)
MOVOU X0, 112(DX)
ADDQ $0x80, DX
DECQ CX
JNZ zero_loop_encodeBetterBlockAsm4MB
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), CX
LEAQ -6(CX), DX
LEAQ -8(CX), SI
MOVL SI, 8(SP)
SHRQ $0x05, CX
SUBL CX, DX
LEAQ (AX)(DX*1), DX
MOVQ DX, (SP)
MOVL $0x00000001, CX
MOVL $0x00000000, 16(SP)
MOVQ src_base+24(FP), DX
search_loop_encodeBetterBlockAsm4MB:
MOVL CX, SI
SUBL 12(SP), SI
SHRL $0x07, SI
CMPL SI, $0x63
JLE check_maxskip_ok_encodeBetterBlockAsm4MB
LEAL 100(CX), SI
JMP check_maxskip_cont_encodeBetterBlockAsm4MB
check_maxskip_ok_encodeBetterBlockAsm4MB:
LEAL 1(CX)(SI*1), SI
check_maxskip_cont_encodeBetterBlockAsm4MB:
CMPL SI, 8(SP)
JGE emit_remainder_encodeBetterBlockAsm4MB
MOVQ (DX)(CX*1), DI
MOVL SI, 20(SP)
MOVQ $0x00cf1bbcdcbfa563, R9
MOVQ $0x9e3779b1, SI
MOVQ DI, R10
MOVQ DI, R11
SHLQ $0x08, R10
IMULQ R9, R10
SHRQ $0x30, R10
SHLQ $0x20, R11
IMULQ SI, R11
SHRQ $0x32, R11
MOVL 24(SP)(R10*4), SI
MOVL 262168(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
MOVL CX, 262168(SP)(R11*4)
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeBetterBlockAsm4MB
CMPL (DX)(R8*1), DI
JEQ candidateS_match_encodeBetterBlockAsm4MB
MOVL 20(SP), CX
JMP search_loop_encodeBetterBlockAsm4MB
candidateS_match_encodeBetterBlockAsm4MB:
SHRQ $0x08, DI
MOVQ DI, R10
SHLQ $0x08, R10
IMULQ R9, R10
SHRQ $0x30, R10
MOVL 24(SP)(R10*4), SI
INCL CX
MOVL CX, 24(SP)(R10*4)
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeBetterBlockAsm4MB
DECL CX
MOVL R8, SI
candidate_match_encodeBetterBlockAsm4MB:
MOVL 12(SP), DI
TESTL SI, SI
JZ match_extend_back_end_encodeBetterBlockAsm4MB
match_extend_back_loop_encodeBetterBlockAsm4MB:
CMPL CX, DI
JLE match_extend_back_end_encodeBetterBlockAsm4MB
MOVB -1(DX)(SI*1), BL
MOVB -1(DX)(CX*1), R8
CMPB BL, R8
JNE match_extend_back_end_encodeBetterBlockAsm4MB
LEAL -1(CX), CX
DECL SI
JZ match_extend_back_end_encodeBetterBlockAsm4MB
JMP match_extend_back_loop_encodeBetterBlockAsm4MB
match_extend_back_end_encodeBetterBlockAsm4MB:
MOVL CX, DI
SUBL 12(SP), DI
LEAQ 4(AX)(DI*1), DI
CMPQ DI, (SP)
JL match_dst_size_check_encodeBetterBlockAsm4MB
MOVQ $0x00000000, ret+48(FP)
RET
match_dst_size_check_encodeBetterBlockAsm4MB:
MOVL CX, DI
ADDL $0x04, CX
ADDL $0x04, SI
MOVQ src_len+32(FP), R8
SUBL CX, R8
LEAQ (DX)(CX*1), R9
LEAQ (DX)(SI*1), R10
// matchLen
XORL R12, R12
CMPL R8, $0x08
JL matchlen_single_match_nolit_encodeBetterBlockAsm4MB
matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB:
MOVQ (R9)(R12*1), R11
XORQ (R10)(R12*1), R11
TESTQ R11, R11
JZ matchlen_loop_match_nolit_encodeBetterBlockAsm4MB
BSFQ R11, R11
SARQ $0x03, R11
LEAL (R12)(R11*1), R12
JMP match_nolit_end_encodeBetterBlockAsm4MB
matchlen_loop_match_nolit_encodeBetterBlockAsm4MB:
LEAL -8(R8), R8
LEAL 8(R12), R12
CMPL R8, $0x08
JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB
matchlen_single_match_nolit_encodeBetterBlockAsm4MB:
TESTL R8, R8
JZ match_nolit_end_encodeBetterBlockAsm4MB
matchlen_single_loopback_match_nolit_encodeBetterBlockAsm4MB:
MOVB (R9)(R12*1), R11
CMPB (R10)(R12*1), R11
JNE match_nolit_end_encodeBetterBlockAsm4MB
LEAL 1(R12), R12
DECL R8
JNZ matchlen_single_loopback_match_nolit_encodeBetterBlockAsm4MB
match_nolit_end_encodeBetterBlockAsm4MB:
MOVL CX, R8
SUBL SI, R8
// Check if repeat
CMPL 16(SP), R8
JEQ match_is_repeat_encodeBetterBlockAsm4MB
CMPL R12, $0x01
JG match_length_ok_encodeBetterBlockAsm4MB
CMPL R8, $0x0000ffff
JLE match_length_ok_encodeBetterBlockAsm4MB
MOVL 20(SP), CX
INCL CX
JMP search_loop_encodeBetterBlockAsm4MB
match_length_ok_encodeBetterBlockAsm4MB:
MOVL R8, 16(SP)
MOVL 12(SP), SI
CMPL SI, DI
JEQ emit_literal_done_match_emit_encodeBetterBlockAsm4MB
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(SI*1), R10
SUBL SI, R9
LEAL -1(R9), SI
CMPL SI, $0x3c
JLT one_byte_match_emit_encodeBetterBlockAsm4MB
CMPL SI, $0x00000100
JLT two_bytes_match_emit_encodeBetterBlockAsm4MB
CMPL SI, $0x00010000
JLT three_bytes_match_emit_encodeBetterBlockAsm4MB
MOVL SI, R11
SHRL $0x10, R11
MOVB $0xf8, (AX)
MOVW SI, 1(AX)
MOVB R11, 3(AX)
ADDQ $0x04, AX
JMP memmove_long_match_emit_encodeBetterBlockAsm4MB
three_bytes_match_emit_encodeBetterBlockAsm4MB:
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_match_emit_encodeBetterBlockAsm4MB
two_bytes_match_emit_encodeBetterBlockAsm4MB:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, AX
CMPL SI, $0x40
JL memmove_match_emit_encodeBetterBlockAsm4MB
JMP memmove_long_match_emit_encodeBetterBlockAsm4MB
one_byte_match_emit_encodeBetterBlockAsm4MB:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, AX
memmove_match_emit_encodeBetterBlockAsm4MB:
LEAQ (AX)(R9*1), SI
// genMemMoveShort
CMPQ R9, $0x04
JLE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4
CMPQ R9, $0x08
JB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7
CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4:
MOVL (R10), R11
MOVL R11, (AX)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7:
MOVL (R10), R11
MOVL -4(R10)(R9*1), R10
MOVL R11, (AX)
MOVL R10, -4(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16:
MOVQ (R10), R11
MOVQ -8(R10)(R9*1), R10
MOVQ R11, (AX)
MOVQ R10, -8(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32:
MOVOU (R10), X0
MOVOU -16(R10)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64:
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_match_emit_encodeBetterBlockAsm4MB:
MOVQ SI, AX
JMP emit_literal_done_match_emit_encodeBetterBlockAsm4MB
memmove_long_match_emit_encodeBetterBlockAsm4MB:
LEAQ (AX)(R9*1), SI
// genMemMoveLong
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVQ R9, R13
SHRQ $0x05, R13
MOVQ AX, R11
ANDL $0x0000001f, R11
MOVQ $0x00000040, R14
SUBQ R11, R14
DECQ R13
JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
LEAQ -32(R10)(R14*1), R11
LEAQ -32(AX)(R14*1), R15
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back:
MOVOU (R11), X4
MOVOU 16(R11), X5
MOVOA X4, (R15)
MOVOA X5, 16(R15)
ADDQ $0x20, R15
ADDQ $0x20, R11
ADDQ $0x20, R14
DECQ R13
JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32:
MOVOU -32(R10)(R14*1), X4
MOVOU -16(R10)(R14*1), X5
MOVOA X4, -32(AX)(R14*1)
MOVOA X5, -16(AX)(R14*1)
ADDQ $0x20, R14
CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ SI, AX
emit_literal_done_match_emit_encodeBetterBlockAsm4MB:
ADDL R12, CX
ADDL $0x04, R12
MOVL CX, 12(SP)
// emitCopy
CMPL R8, $0x00010000
JL two_byte_offset_match_nolit_encodeBetterBlockAsm4MB
four_bytes_loop_back_match_nolit_encodeBetterBlockAsm4MB:
CMPL R12, $0x40
JLE four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB
MOVB $0xff, (AX)
MOVL R8, 1(AX)
LEAL -64(R12), R12
ADDQ $0x05, AX
CMPL R12, $0x04
JL four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB
// emitRepeat
MOVL R12, SI
LEAL -4(R12), R12
CMPL SI, $0x08
JLE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy
CMPL SI, $0x0c
JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy
CMPL R8, $0x00000800
JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy
cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
CMPL R12, $0x00000104
JLT repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy
CMPL R12, $0x00010100
JLT repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy
LEAL -65536(R12), R12
MOVL R12, R8
MOVW $0x001d, (AX)
MOVW R12, 2(AX)
SARL $0x10, R8
MOVB R8, 4(AX)
ADDQ $0x05, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
LEAL -256(R12), R12
MOVW $0x0019, (AX)
MOVW R12, 2(AX)
ADDQ $0x04, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
LEAL -4(R12), R12
MOVW $0x0015, (AX)
MOVB R12, 2(AX)
ADDQ $0x03, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
SHLL $0x02, R12
ORL $0x01, R12
MOVW R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
XORQ SI, SI
LEAL 1(SI)(R12*4), R12
MOVB R8, 1(AX)
SARL $0x08, R8
SHLL $0x05, R8
ORL R8, R12
MOVB R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
JMP four_bytes_loop_back_match_nolit_encodeBetterBlockAsm4MB
four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB:
TESTL R12, R12
JZ match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
MOVB $0x03, BL
LEAL -4(BX)(R12*4), R12
MOVB R12, (AX)
MOVL R8, 1(AX)
ADDQ $0x05, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
two_byte_offset_match_nolit_encodeBetterBlockAsm4MB:
CMPL R12, $0x40
JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB
MOVB $0xee, (AX)
MOVW R8, 1(AX)
LEAL -60(R12), R12
ADDQ $0x03, AX
// emitRepeat
MOVL R12, SI
LEAL -4(R12), R12
CMPL SI, $0x08
JLE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
CMPL SI, $0x0c
JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
CMPL R8, $0x00000800
JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
CMPL R12, $0x00000104
JLT repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
CMPL R12, $0x00010100
JLT repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
LEAL -65536(R12), R12
MOVL R12, R8
MOVW $0x001d, (AX)
MOVW R12, 2(AX)
SARL $0x10, R8
MOVB R8, 4(AX)
ADDQ $0x05, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
LEAL -256(R12), R12
MOVW $0x0019, (AX)
MOVW R12, 2(AX)
ADDQ $0x04, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
LEAL -4(R12), R12
MOVW $0x0015, (AX)
MOVB R12, 2(AX)
ADDQ $0x03, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
SHLL $0x02, R12
ORL $0x01, R12
MOVW R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
XORQ SI, SI
LEAL 1(SI)(R12*4), R12
MOVB R8, 1(AX)
SARL $0x08, R8
SHLL $0x05, R8
ORL R8, R12
MOVB R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
JMP two_byte_offset_match_nolit_encodeBetterBlockAsm4MB
two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB:
CMPL R12, $0x0c
JGE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB
CMPL R8, $0x00000800
JGE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB
MOVB $0x01, BL
LEAL -16(BX)(R12*4), R12
MOVB R8, 1(AX)
SHRL $0x08, R8
SHLL $0x05, R8
ORL R8, R12
MOVB R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
emit_copy_three_match_nolit_encodeBetterBlockAsm4MB:
MOVB $0x02, BL
LEAL -4(BX)(R12*4), R12
MOVB R12, (AX)
MOVW R8, 1(AX)
ADDQ $0x03, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
match_is_repeat_encodeBetterBlockAsm4MB:
MOVL 12(SP), SI
CMPL SI, DI
JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(SI*1), R10
SUBL SI, R9
LEAL -1(R9), SI
CMPL SI, $0x3c
JLT one_byte_match_emit_repeat_encodeBetterBlockAsm4MB
CMPL SI, $0x00000100
JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB
CMPL SI, $0x00010000
JLT three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB
MOVL SI, R11
SHRL $0x10, R11
MOVB $0xf8, (AX)
MOVW SI, 1(AX)
MOVB R11, 3(AX)
ADDQ $0x04, AX
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB
three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB:
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB
two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, AX
CMPL SI, $0x40
JL memmove_match_emit_repeat_encodeBetterBlockAsm4MB
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB
one_byte_match_emit_repeat_encodeBetterBlockAsm4MB:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, AX
memmove_match_emit_repeat_encodeBetterBlockAsm4MB:
LEAQ (AX)(R9*1), SI
// genMemMoveShort
CMPQ R9, $0x04
JLE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4
CMPQ R9, $0x08
JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7
CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32
JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4:
MOVL (R10), R11
MOVL R11, (AX)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7:
MOVL (R10), R11
MOVL -4(R10)(R9*1), R10
MOVL R11, (AX)
MOVL R10, -4(AX)(R9*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16:
MOVQ (R10), R11
MOVQ -8(R10)(R9*1), R10
MOVQ R11, (AX)
MOVQ R10, -8(AX)(R9*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32:
MOVOU (R10), X0
MOVOU -16(R10)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64:
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB:
MOVQ SI, AX
JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB
memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB:
LEAQ (AX)(R9*1), SI
// genMemMoveLong
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVQ R9, R13
SHRQ $0x05, R13
MOVQ AX, R11
ANDL $0x0000001f, R11
MOVQ $0x00000040, R14
SUBQ R11, R14
DECQ R13
JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
LEAQ -32(R10)(R14*1), R11
LEAQ -32(AX)(R14*1), R15
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back:
MOVOU (R11), X4
MOVOU 16(R11), X5
MOVOA X4, (R15)
MOVOA X5, 16(R15)
ADDQ $0x20, R15
ADDQ $0x20, R11
ADDQ $0x20, R14
DECQ R13
JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32:
MOVOU -32(R10)(R14*1), X4
MOVOU -16(R10)(R14*1), X5
MOVOA X4, -32(AX)(R14*1)
MOVOA X5, -16(AX)(R14*1)
ADDQ $0x20, R14
CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ SI, AX
emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB:
ADDL R12, CX
ADDL $0x04, R12
MOVL CX, 12(SP)
// emitRepeat
MOVL R12, SI
LEAL -4(R12), R12
CMPL SI, $0x08
JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB
CMPL SI, $0x0c
JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB
CMPL R8, $0x00000800
JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB
cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB:
CMPL R12, $0x00000104
JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB
CMPL R12, $0x00010100
JLT repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB
LEAL -65536(R12), R12
MOVL R12, R8
MOVW $0x001d, (AX)
MOVW R12, 2(AX)
SARL $0x10, R8
MOVB R8, 4(AX)
ADDQ $0x05, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB:
LEAL -256(R12), R12
MOVW $0x0019, (AX)
MOVW R12, 2(AX)
ADDQ $0x04, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB:
LEAL -4(R12), R12
MOVW $0x0015, (AX)
MOVB R12, 2(AX)
ADDQ $0x03, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB:
SHLL $0x02, R12
ORL $0x01, R12
MOVW R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB:
XORQ SI, SI
LEAL 1(SI)(R12*4), R12
MOVB R8, 1(AX)
SARL $0x08, R8
SHLL $0x05, R8
ORL R8, R12
MOVB R12, (AX)
ADDQ $0x02, AX
match_nolit_emitcopy_end_encodeBetterBlockAsm4MB:
CMPL CX, 8(SP)
JGE emit_remainder_encodeBetterBlockAsm4MB
CMPQ AX, (SP)
JL match_nolit_dst_ok_encodeBetterBlockAsm4MB
MOVQ $0x00000000, ret+48(FP)
RET
match_nolit_dst_ok_encodeBetterBlockAsm4MB:
MOVQ $0x00cf1bbcdcbfa563, SI
MOVQ $0x9e3779b1, R8
INCL DI
MOVQ (DX)(DI*1), R9
MOVQ R9, R10
MOVQ R9, R11
MOVQ R9, R12
SHRQ $0x08, R11
MOVQ R11, R13
SHRQ $0x10, R12
LEAL 1(DI), R14
LEAL 2(DI), R15
MOVQ -2(DX)(CX*1), R9
SHLQ $0x08, R10
IMULQ SI, R10
SHRQ $0x30, R10
SHLQ $0x08, R13
IMULQ SI, R13
SHRQ $0x30, R13
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x32, R11
SHLQ $0x20, R12
IMULQ R8, R12
SHRQ $0x32, R12
MOVL DI, 24(SP)(R10*4)
MOVL R14, 24(SP)(R13*4)
MOVL R14, 262168(SP)(R11*4)
MOVL R15, 262168(SP)(R12*4)
MOVQ R9, R10
MOVQ R9, R11
SHRQ $0x08, R11
MOVQ R11, R13
LEAL -2(CX), R9
LEAL -1(CX), DI
SHLQ $0x08, R10
IMULQ SI, R10
SHRQ $0x30, R10
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x32, R11
SHLQ $0x08, R13
IMULQ SI, R13
SHRQ $0x30, R13
MOVL R9, 24(SP)(R10*4)
MOVL DI, 262168(SP)(R11*4)
MOVL DI, 24(SP)(R13*4)
JMP search_loop_encodeBetterBlockAsm4MB
emit_remainder_encodeBetterBlockAsm4MB:
MOVQ src_len+32(FP), CX
SUBL 12(SP), CX
LEAQ 4(AX)(CX*1), CX
CMPQ CX, (SP)
JL emit_remainder_ok_encodeBetterBlockAsm4MB
MOVQ $0x00000000, ret+48(FP)
RET
emit_remainder_ok_encodeBetterBlockAsm4MB:
MOVQ src_len+32(FP), CX
MOVL 12(SP), BX
CMPL BX, CX
JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB
MOVL CX, SI
MOVL CX, 12(SP)
LEAQ (DX)(BX*1), CX
SUBL BX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JLT one_byte_emit_remainder_encodeBetterBlockAsm4MB
CMPL DX, $0x00000100
JLT two_bytes_emit_remainder_encodeBetterBlockAsm4MB
CMPL DX, $0x00010000
JLT three_bytes_emit_remainder_encodeBetterBlockAsm4MB
MOVL DX, BX
SHRL $0x10, BX
MOVB $0xf8, (AX)
MOVW DX, 1(AX)
MOVB BL, 3(AX)
ADDQ $0x04, AX
JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB
three_bytes_emit_remainder_encodeBetterBlockAsm4MB:
MOVB $0xf4, (AX)
MOVW DX, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB
two_bytes_emit_remainder_encodeBetterBlockAsm4MB:
MOVB $0xf0, (AX)
MOVB DL, 1(AX)
ADDQ $0x02, AX
CMPL DX, $0x40
JL memmove_emit_remainder_encodeBetterBlockAsm4MB
JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB
one_byte_emit_remainder_encodeBetterBlockAsm4MB:
SHLB $0x02, DL
MOVB DL, (AX)
ADDQ $0x01, AX
memmove_emit_remainder_encodeBetterBlockAsm4MB:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
CMPQ BX, $0x04
JLE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4
CMPQ BX, $0x08
JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4:
MOVL (CX), SI
MOVL SI, (AX)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7:
MOVL (CX), SI
MOVL -4(CX)(BX*1), CX
MOVL SI, (AX)
MOVL CX, -4(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16:
MOVQ (CX), SI
MOVQ -8(CX)(BX*1), CX
MOVQ SI, (AX)
MOVQ CX, -8(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32:
MOVOU (CX), X0
MOVOU -16(CX)(BX*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB:
MOVQ DX, AX
JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB
memmove_long_emit_remainder_encodeBetterBlockAsm4MB:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
MOVQ AX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
LEAQ -32(CX)(R8*1), SI
LEAQ -32(AX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32:
MOVOU -32(CX)(R8*1), X4
MOVOU -16(CX)(R8*1), X5
MOVOA X4, -32(AX)(R8*1)
MOVOA X5, -16(AX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
MOVQ DX, AX
emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB:
MOVQ dst_base+0(FP), CX
SUBQ CX, AX
MOVQ AX, ret+48(FP)
RET
// func encodeBetterBlockAsm12B(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeBetterBlockAsm12B(SB), $81944-56
MOVQ dst_base+0(FP), AX
MOVQ $0x00000280, CX
LEAQ 24(SP), DX
PXOR X0, X0
zero_loop_encodeBetterBlockAsm12B:
MOVOU X0, (DX)
MOVOU X0, 16(DX)
MOVOU X0, 32(DX)
MOVOU X0, 48(DX)
MOVOU X0, 64(DX)
MOVOU X0, 80(DX)
MOVOU X0, 96(DX)
MOVOU X0, 112(DX)
ADDQ $0x80, DX
DECQ CX
JNZ zero_loop_encodeBetterBlockAsm12B
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), CX
LEAQ -6(CX), DX
LEAQ -8(CX), SI
MOVL SI, 8(SP)
SHRQ $0x05, CX
SUBL CX, DX
LEAQ (AX)(DX*1), DX
MOVQ DX, (SP)
MOVL $0x00000001, CX
MOVL $0x00000000, 16(SP)
MOVQ src_base+24(FP), DX
search_loop_encodeBetterBlockAsm12B:
MOVL CX, SI
SUBL 12(SP), SI
SHRL $0x06, SI
LEAL 1(CX)(SI*1), SI
CMPL SI, 8(SP)
JGE emit_remainder_encodeBetterBlockAsm12B
MOVQ (DX)(CX*1), DI
MOVL SI, 20(SP)
MOVQ $0x0000cf1bbcdcbf9b, R9
MOVQ $0x9e3779b1, SI
MOVQ DI, R10
MOVQ DI, R11
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x32, R10
SHLQ $0x20, R11
IMULQ SI, R11
SHRQ $0x34, R11
MOVL 24(SP)(R10*4), SI
MOVL 65560(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
MOVL CX, 65560(SP)(R11*4)
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeBetterBlockAsm12B
CMPL (DX)(R8*1), DI
JEQ candidateS_match_encodeBetterBlockAsm12B
MOVL 20(SP), CX
JMP search_loop_encodeBetterBlockAsm12B
candidateS_match_encodeBetterBlockAsm12B:
SHRQ $0x08, DI
MOVQ DI, R10
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x32, R10
MOVL 24(SP)(R10*4), SI
INCL CX
MOVL CX, 24(SP)(R10*4)
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeBetterBlockAsm12B
DECL CX
MOVL R8, SI
candidate_match_encodeBetterBlockAsm12B:
MOVL 12(SP), DI
TESTL SI, SI
JZ match_extend_back_end_encodeBetterBlockAsm12B
match_extend_back_loop_encodeBetterBlockAsm12B:
CMPL CX, DI
JLE match_extend_back_end_encodeBetterBlockAsm12B
MOVB -1(DX)(SI*1), BL
MOVB -1(DX)(CX*1), R8
CMPB BL, R8
JNE match_extend_back_end_encodeBetterBlockAsm12B
LEAL -1(CX), CX
DECL SI
JZ match_extend_back_end_encodeBetterBlockAsm12B
JMP match_extend_back_loop_encodeBetterBlockAsm12B
match_extend_back_end_encodeBetterBlockAsm12B:
MOVL CX, DI
SUBL 12(SP), DI
LEAQ 3(AX)(DI*1), DI
CMPQ DI, (SP)
JL match_dst_size_check_encodeBetterBlockAsm12B
MOVQ $0x00000000, ret+48(FP)
RET
match_dst_size_check_encodeBetterBlockAsm12B:
MOVL CX, DI
ADDL $0x04, CX
ADDL $0x04, SI
MOVQ src_len+32(FP), R8
SUBL CX, R8
LEAQ (DX)(CX*1), R9
LEAQ (DX)(SI*1), R10
// matchLen
XORL R12, R12
CMPL R8, $0x08
JL matchlen_single_match_nolit_encodeBetterBlockAsm12B
matchlen_loopback_match_nolit_encodeBetterBlockAsm12B:
MOVQ (R9)(R12*1), R11
XORQ (R10)(R12*1), R11
TESTQ R11, R11
JZ matchlen_loop_match_nolit_encodeBetterBlockAsm12B
BSFQ R11, R11
SARQ $0x03, R11
LEAL (R12)(R11*1), R12
JMP match_nolit_end_encodeBetterBlockAsm12B
matchlen_loop_match_nolit_encodeBetterBlockAsm12B:
LEAL -8(R8), R8
LEAL 8(R12), R12
CMPL R8, $0x08
JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm12B
matchlen_single_match_nolit_encodeBetterBlockAsm12B:
TESTL R8, R8
JZ match_nolit_end_encodeBetterBlockAsm12B
matchlen_single_loopback_match_nolit_encodeBetterBlockAsm12B:
MOVB (R9)(R12*1), R11
CMPB (R10)(R12*1), R11
JNE match_nolit_end_encodeBetterBlockAsm12B
LEAL 1(R12), R12
DECL R8
JNZ matchlen_single_loopback_match_nolit_encodeBetterBlockAsm12B
match_nolit_end_encodeBetterBlockAsm12B:
MOVL CX, R8
SUBL SI, R8
// Check if repeat
CMPL 16(SP), R8
JEQ match_is_repeat_encodeBetterBlockAsm12B
MOVL R8, 16(SP)
MOVL 12(SP), SI
CMPL SI, DI
JEQ emit_literal_done_match_emit_encodeBetterBlockAsm12B
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(SI*1), R10
SUBL SI, R9
LEAL -1(R9), SI
CMPL SI, $0x3c
JLT one_byte_match_emit_encodeBetterBlockAsm12B
CMPL SI, $0x00000100
JLT two_bytes_match_emit_encodeBetterBlockAsm12B
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_match_emit_encodeBetterBlockAsm12B
two_bytes_match_emit_encodeBetterBlockAsm12B:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, AX
CMPL SI, $0x40
JL memmove_match_emit_encodeBetterBlockAsm12B
JMP memmove_long_match_emit_encodeBetterBlockAsm12B
one_byte_match_emit_encodeBetterBlockAsm12B:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, AX
memmove_match_emit_encodeBetterBlockAsm12B:
LEAQ (AX)(R9*1), SI
// genMemMoveShort
CMPQ R9, $0x04
JLE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4
CMPQ R9, $0x08
JB emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7
CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4:
MOVL (R10), R11
MOVL R11, (AX)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B
emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7:
MOVL (R10), R11
MOVL -4(R10)(R9*1), R10
MOVL R11, (AX)
MOVL R10, -4(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B
emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16:
MOVQ (R10), R11
MOVQ -8(R10)(R9*1), R10
MOVQ R11, (AX)
MOVQ R10, -8(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B
emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32:
MOVOU (R10), X0
MOVOU -16(R10)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B
emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64:
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_match_emit_encodeBetterBlockAsm12B:
MOVQ SI, AX
JMP emit_literal_done_match_emit_encodeBetterBlockAsm12B
memmove_long_match_emit_encodeBetterBlockAsm12B:
LEAQ (AX)(R9*1), SI
// genMemMoveLong
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVQ R9, R13
SHRQ $0x05, R13
MOVQ AX, R11
ANDL $0x0000001f, R11
MOVQ $0x00000040, R14
SUBQ R11, R14
DECQ R13
JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
LEAQ -32(R10)(R14*1), R11
LEAQ -32(AX)(R14*1), R15
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back:
MOVOU (R11), X4
MOVOU 16(R11), X5
MOVOA X4, (R15)
MOVOA X5, 16(R15)
ADDQ $0x20, R15
ADDQ $0x20, R11
ADDQ $0x20, R14
DECQ R13
JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32:
MOVOU -32(R10)(R14*1), X4
MOVOU -16(R10)(R14*1), X5
MOVOA X4, -32(AX)(R14*1)
MOVOA X5, -16(AX)(R14*1)
ADDQ $0x20, R14
CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ SI, AX
emit_literal_done_match_emit_encodeBetterBlockAsm12B:
ADDL R12, CX
ADDL $0x04, R12
MOVL CX, 12(SP)
// emitCopy
two_byte_offset_match_nolit_encodeBetterBlockAsm12B:
CMPL R12, $0x40
JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B
MOVB $0xee, (AX)
MOVW R8, 1(AX)
LEAL -60(R12), R12
ADDQ $0x03, AX
// emitRepeat
MOVL R12, SI
LEAL -4(R12), R12
CMPL SI, $0x08
JLE repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
CMPL SI, $0x0c
JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
CMPL R8, $0x00000800
JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
CMPL R12, $0x00000104
JLT repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
LEAL -256(R12), R12
MOVW $0x0019, (AX)
MOVW R12, 2(AX)
ADDQ $0x04, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
LEAL -4(R12), R12
MOVW $0x0015, (AX)
MOVB R12, 2(AX)
ADDQ $0x03, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
SHLL $0x02, R12
ORL $0x01, R12
MOVW R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
XORQ SI, SI
LEAL 1(SI)(R12*4), R12
MOVB R8, 1(AX)
SARL $0x08, R8
SHLL $0x05, R8
ORL R8, R12
MOVB R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
JMP two_byte_offset_match_nolit_encodeBetterBlockAsm12B
two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B:
CMPL R12, $0x0c
JGE emit_copy_three_match_nolit_encodeBetterBlockAsm12B
CMPL R8, $0x00000800
JGE emit_copy_three_match_nolit_encodeBetterBlockAsm12B
MOVB $0x01, BL
LEAL -16(BX)(R12*4), R12
MOVB R8, 1(AX)
SHRL $0x08, R8
SHLL $0x05, R8
ORL R8, R12
MOVB R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
emit_copy_three_match_nolit_encodeBetterBlockAsm12B:
MOVB $0x02, BL
LEAL -4(BX)(R12*4), R12
MOVB R12, (AX)
MOVW R8, 1(AX)
ADDQ $0x03, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
match_is_repeat_encodeBetterBlockAsm12B:
MOVL 12(SP), SI
CMPL SI, DI
JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(SI*1), R10
SUBL SI, R9
LEAL -1(R9), SI
CMPL SI, $0x3c
JLT one_byte_match_emit_repeat_encodeBetterBlockAsm12B
CMPL SI, $0x00000100
JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm12B
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B
two_bytes_match_emit_repeat_encodeBetterBlockAsm12B:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, AX
CMPL SI, $0x40
JL memmove_match_emit_repeat_encodeBetterBlockAsm12B
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B
one_byte_match_emit_repeat_encodeBetterBlockAsm12B:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, AX
memmove_match_emit_repeat_encodeBetterBlockAsm12B:
LEAQ (AX)(R9*1), SI
// genMemMoveShort
CMPQ R9, $0x04
JLE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4
CMPQ R9, $0x08
JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7
CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4:
MOVL (R10), R11
MOVL R11, (AX)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7:
MOVL (R10), R11
MOVL -4(R10)(R9*1), R10
MOVL R11, (AX)
MOVL R10, -4(AX)(R9*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16:
MOVQ (R10), R11
MOVQ -8(R10)(R9*1), R10
MOVQ R11, (AX)
MOVQ R10, -8(AX)(R9*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32:
MOVOU (R10), X0
MOVOU -16(R10)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64:
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B:
MOVQ SI, AX
JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B
memmove_long_match_emit_repeat_encodeBetterBlockAsm12B:
LEAQ (AX)(R9*1), SI
// genMemMoveLong
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVQ R9, R13
SHRQ $0x05, R13
MOVQ AX, R11
ANDL $0x0000001f, R11
MOVQ $0x00000040, R14
SUBQ R11, R14
DECQ R13
JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
LEAQ -32(R10)(R14*1), R11
LEAQ -32(AX)(R14*1), R15
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back:
MOVOU (R11), X4
MOVOU 16(R11), X5
MOVOA X4, (R15)
MOVOA X5, 16(R15)
ADDQ $0x20, R15
ADDQ $0x20, R11
ADDQ $0x20, R14
DECQ R13
JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32:
MOVOU -32(R10)(R14*1), X4
MOVOU -16(R10)(R14*1), X5
MOVOA X4, -32(AX)(R14*1)
MOVOA X5, -16(AX)(R14*1)
ADDQ $0x20, R14
CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ SI, AX
emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B:
ADDL R12, CX
ADDL $0x04, R12
MOVL CX, 12(SP)
// emitRepeat
MOVL R12, SI
LEAL -4(R12), R12
CMPL SI, $0x08
JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B
CMPL SI, $0x0c
JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B
CMPL R8, $0x00000800
JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B
cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B:
CMPL R12, $0x00000104
JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B
LEAL -256(R12), R12
MOVW $0x0019, (AX)
MOVW R12, 2(AX)
ADDQ $0x04, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B:
LEAL -4(R12), R12
MOVW $0x0015, (AX)
MOVB R12, 2(AX)
ADDQ $0x03, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B:
SHLL $0x02, R12
ORL $0x01, R12
MOVW R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B:
XORQ SI, SI
LEAL 1(SI)(R12*4), R12
MOVB R8, 1(AX)
SARL $0x08, R8
SHLL $0x05, R8
ORL R8, R12
MOVB R12, (AX)
ADDQ $0x02, AX
match_nolit_emitcopy_end_encodeBetterBlockAsm12B:
CMPL CX, 8(SP)
JGE emit_remainder_encodeBetterBlockAsm12B
CMPQ AX, (SP)
JL match_nolit_dst_ok_encodeBetterBlockAsm12B
MOVQ $0x00000000, ret+48(FP)
RET
match_nolit_dst_ok_encodeBetterBlockAsm12B:
MOVQ $0x0000cf1bbcdcbf9b, SI
MOVQ $0x9e3779b1, R8
INCL DI
MOVQ (DX)(DI*1), R9
MOVQ R9, R10
MOVQ R9, R11
MOVQ R9, R12
SHRQ $0x08, R11
MOVQ R11, R13
SHRQ $0x10, R12
LEAL 1(DI), R14
LEAL 2(DI), R15
MOVQ -2(DX)(CX*1), R9
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x32, R10
SHLQ $0x10, R13
IMULQ SI, R13
SHRQ $0x32, R13
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x34, R11
SHLQ $0x20, R12
IMULQ R8, R12
SHRQ $0x34, R12
MOVL DI, 24(SP)(R10*4)
MOVL R14, 24(SP)(R13*4)
MOVL R14, 65560(SP)(R11*4)
MOVL R15, 65560(SP)(R12*4)
MOVQ R9, R10
MOVQ R9, R11
SHRQ $0x08, R11
MOVQ R11, R13
LEAL -2(CX), R9
LEAL -1(CX), DI
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x32, R10
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x34, R11
SHLQ $0x10, R13
IMULQ SI, R13
SHRQ $0x32, R13
MOVL R9, 24(SP)(R10*4)
MOVL DI, 65560(SP)(R11*4)
MOVL DI, 24(SP)(R13*4)
JMP search_loop_encodeBetterBlockAsm12B
emit_remainder_encodeBetterBlockAsm12B:
MOVQ src_len+32(FP), CX
SUBL 12(SP), CX
LEAQ 3(AX)(CX*1), CX
CMPQ CX, (SP)
JL emit_remainder_ok_encodeBetterBlockAsm12B
MOVQ $0x00000000, ret+48(FP)
RET
emit_remainder_ok_encodeBetterBlockAsm12B:
MOVQ src_len+32(FP), CX
MOVL 12(SP), BX
CMPL BX, CX
JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm12B
MOVL CX, SI
MOVL CX, 12(SP)
LEAQ (DX)(BX*1), CX
SUBL BX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JLT one_byte_emit_remainder_encodeBetterBlockAsm12B
CMPL DX, $0x00000100
JLT two_bytes_emit_remainder_encodeBetterBlockAsm12B
MOVB $0xf4, (AX)
MOVW DX, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B
two_bytes_emit_remainder_encodeBetterBlockAsm12B:
MOVB $0xf0, (AX)
MOVB DL, 1(AX)
ADDQ $0x02, AX
CMPL DX, $0x40
JL memmove_emit_remainder_encodeBetterBlockAsm12B
JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B
one_byte_emit_remainder_encodeBetterBlockAsm12B:
SHLB $0x02, DL
MOVB DL, (AX)
ADDQ $0x01, AX
memmove_emit_remainder_encodeBetterBlockAsm12B:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
CMPQ BX, $0x04
JLE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4
CMPQ BX, $0x08
JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4:
MOVL (CX), SI
MOVL SI, (AX)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7:
MOVL (CX), SI
MOVL -4(CX)(BX*1), CX
MOVL SI, (AX)
MOVL CX, -4(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16:
MOVQ (CX), SI
MOVQ -8(CX)(BX*1), CX
MOVQ SI, (AX)
MOVQ CX, -8(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32:
MOVOU (CX), X0
MOVOU -16(CX)(BX*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B:
MOVQ DX, AX
JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm12B
memmove_long_emit_remainder_encodeBetterBlockAsm12B:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
MOVQ AX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
LEAQ -32(CX)(R8*1), SI
LEAQ -32(AX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32:
MOVOU -32(CX)(R8*1), X4
MOVOU -16(CX)(R8*1), X5
MOVOA X4, -32(AX)(R8*1)
MOVOA X5, -16(AX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
MOVQ DX, AX
emit_literal_done_emit_remainder_encodeBetterBlockAsm12B:
MOVQ dst_base+0(FP), CX
SUBQ CX, AX
MOVQ AX, ret+48(FP)
RET
// func encodeBetterBlockAsm10B(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeBetterBlockAsm10B(SB), $20504-56
MOVQ dst_base+0(FP), AX
MOVQ $0x000000a0, CX
LEAQ 24(SP), DX
PXOR X0, X0
zero_loop_encodeBetterBlockAsm10B:
MOVOU X0, (DX)
MOVOU X0, 16(DX)
MOVOU X0, 32(DX)
MOVOU X0, 48(DX)
MOVOU X0, 64(DX)
MOVOU X0, 80(DX)
MOVOU X0, 96(DX)
MOVOU X0, 112(DX)
ADDQ $0x80, DX
DECQ CX
JNZ zero_loop_encodeBetterBlockAsm10B
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), CX
LEAQ -6(CX), DX
LEAQ -8(CX), SI
MOVL SI, 8(SP)
SHRQ $0x05, CX
SUBL CX, DX
LEAQ (AX)(DX*1), DX
MOVQ DX, (SP)
MOVL $0x00000001, CX
MOVL $0x00000000, 16(SP)
MOVQ src_base+24(FP), DX
search_loop_encodeBetterBlockAsm10B:
MOVL CX, SI
SUBL 12(SP), SI
SHRL $0x05, SI
LEAL 1(CX)(SI*1), SI
CMPL SI, 8(SP)
JGE emit_remainder_encodeBetterBlockAsm10B
MOVQ (DX)(CX*1), DI
MOVL SI, 20(SP)
MOVQ $0x0000cf1bbcdcbf9b, R9
MOVQ $0x9e3779b1, SI
MOVQ DI, R10
MOVQ DI, R11
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x34, R10
SHLQ $0x20, R11
IMULQ SI, R11
SHRQ $0x36, R11
MOVL 24(SP)(R10*4), SI
MOVL 16408(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
MOVL CX, 16408(SP)(R11*4)
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeBetterBlockAsm10B
CMPL (DX)(R8*1), DI
JEQ candidateS_match_encodeBetterBlockAsm10B
MOVL 20(SP), CX
JMP search_loop_encodeBetterBlockAsm10B
candidateS_match_encodeBetterBlockAsm10B:
SHRQ $0x08, DI
MOVQ DI, R10
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x34, R10
MOVL 24(SP)(R10*4), SI
INCL CX
MOVL CX, 24(SP)(R10*4)
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeBetterBlockAsm10B
DECL CX
MOVL R8, SI
candidate_match_encodeBetterBlockAsm10B:
MOVL 12(SP), DI
TESTL SI, SI
JZ match_extend_back_end_encodeBetterBlockAsm10B
match_extend_back_loop_encodeBetterBlockAsm10B:
CMPL CX, DI
JLE match_extend_back_end_encodeBetterBlockAsm10B
MOVB -1(DX)(SI*1), BL
MOVB -1(DX)(CX*1), R8
CMPB BL, R8
JNE match_extend_back_end_encodeBetterBlockAsm10B
LEAL -1(CX), CX
DECL SI
JZ match_extend_back_end_encodeBetterBlockAsm10B
JMP match_extend_back_loop_encodeBetterBlockAsm10B
match_extend_back_end_encodeBetterBlockAsm10B:
MOVL CX, DI
SUBL 12(SP), DI
LEAQ 3(AX)(DI*1), DI
CMPQ DI, (SP)
JL match_dst_size_check_encodeBetterBlockAsm10B
MOVQ $0x00000000, ret+48(FP)
RET
match_dst_size_check_encodeBetterBlockAsm10B:
MOVL CX, DI
ADDL $0x04, CX
ADDL $0x04, SI
MOVQ src_len+32(FP), R8
SUBL CX, R8
LEAQ (DX)(CX*1), R9
LEAQ (DX)(SI*1), R10
// matchLen
XORL R12, R12
CMPL R8, $0x08
JL matchlen_single_match_nolit_encodeBetterBlockAsm10B
matchlen_loopback_match_nolit_encodeBetterBlockAsm10B:
MOVQ (R9)(R12*1), R11
XORQ (R10)(R12*1), R11
TESTQ R11, R11
JZ matchlen_loop_match_nolit_encodeBetterBlockAsm10B
BSFQ R11, R11
SARQ $0x03, R11
LEAL (R12)(R11*1), R12
JMP match_nolit_end_encodeBetterBlockAsm10B
matchlen_loop_match_nolit_encodeBetterBlockAsm10B:
LEAL -8(R8), R8
LEAL 8(R12), R12
CMPL R8, $0x08
JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm10B
matchlen_single_match_nolit_encodeBetterBlockAsm10B:
TESTL R8, R8
JZ match_nolit_end_encodeBetterBlockAsm10B
matchlen_single_loopback_match_nolit_encodeBetterBlockAsm10B:
MOVB (R9)(R12*1), R11
CMPB (R10)(R12*1), R11
JNE match_nolit_end_encodeBetterBlockAsm10B
LEAL 1(R12), R12
DECL R8
JNZ matchlen_single_loopback_match_nolit_encodeBetterBlockAsm10B
match_nolit_end_encodeBetterBlockAsm10B:
MOVL CX, R8
SUBL SI, R8
// Check if repeat
CMPL 16(SP), R8
JEQ match_is_repeat_encodeBetterBlockAsm10B
MOVL R8, 16(SP)
MOVL 12(SP), SI
CMPL SI, DI
JEQ emit_literal_done_match_emit_encodeBetterBlockAsm10B
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(SI*1), R10
SUBL SI, R9
LEAL -1(R9), SI
CMPL SI, $0x3c
JLT one_byte_match_emit_encodeBetterBlockAsm10B
CMPL SI, $0x00000100
JLT two_bytes_match_emit_encodeBetterBlockAsm10B
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_match_emit_encodeBetterBlockAsm10B
two_bytes_match_emit_encodeBetterBlockAsm10B:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, AX
CMPL SI, $0x40
JL memmove_match_emit_encodeBetterBlockAsm10B
JMP memmove_long_match_emit_encodeBetterBlockAsm10B
one_byte_match_emit_encodeBetterBlockAsm10B:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, AX
memmove_match_emit_encodeBetterBlockAsm10B:
LEAQ (AX)(R9*1), SI
// genMemMoveShort
CMPQ R9, $0x04
JLE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4
CMPQ R9, $0x08
JB emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7
CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4:
MOVL (R10), R11
MOVL R11, (AX)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B
emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7:
MOVL (R10), R11
MOVL -4(R10)(R9*1), R10
MOVL R11, (AX)
MOVL R10, -4(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B
emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16:
MOVQ (R10), R11
MOVQ -8(R10)(R9*1), R10
MOVQ R11, (AX)
MOVQ R10, -8(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B
emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32:
MOVOU (R10), X0
MOVOU -16(R10)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B
emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64:
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_match_emit_encodeBetterBlockAsm10B:
MOVQ SI, AX
JMP emit_literal_done_match_emit_encodeBetterBlockAsm10B
memmove_long_match_emit_encodeBetterBlockAsm10B:
LEAQ (AX)(R9*1), SI
// genMemMoveLong
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVQ R9, R13
SHRQ $0x05, R13
MOVQ AX, R11
ANDL $0x0000001f, R11
MOVQ $0x00000040, R14
SUBQ R11, R14
DECQ R13
JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
LEAQ -32(R10)(R14*1), R11
LEAQ -32(AX)(R14*1), R15
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back:
MOVOU (R11), X4
MOVOU 16(R11), X5
MOVOA X4, (R15)
MOVOA X5, 16(R15)
ADDQ $0x20, R15
ADDQ $0x20, R11
ADDQ $0x20, R14
DECQ R13
JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32:
MOVOU -32(R10)(R14*1), X4
MOVOU -16(R10)(R14*1), X5
MOVOA X4, -32(AX)(R14*1)
MOVOA X5, -16(AX)(R14*1)
ADDQ $0x20, R14
CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ SI, AX
emit_literal_done_match_emit_encodeBetterBlockAsm10B:
ADDL R12, CX
ADDL $0x04, R12
MOVL CX, 12(SP)
// emitCopy
two_byte_offset_match_nolit_encodeBetterBlockAsm10B:
CMPL R12, $0x40
JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B
MOVB $0xee, (AX)
MOVW R8, 1(AX)
LEAL -60(R12), R12
ADDQ $0x03, AX
// emitRepeat
MOVL R12, SI
LEAL -4(R12), R12
CMPL SI, $0x08
JLE repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
CMPL SI, $0x0c
JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
CMPL R8, $0x00000800
JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
CMPL R12, $0x00000104
JLT repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
LEAL -256(R12), R12
MOVW $0x0019, (AX)
MOVW R12, 2(AX)
ADDQ $0x04, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
LEAL -4(R12), R12
MOVW $0x0015, (AX)
MOVB R12, 2(AX)
ADDQ $0x03, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
SHLL $0x02, R12
ORL $0x01, R12
MOVW R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
XORQ SI, SI
LEAL 1(SI)(R12*4), R12
MOVB R8, 1(AX)
SARL $0x08, R8
SHLL $0x05, R8
ORL R8, R12
MOVB R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
JMP two_byte_offset_match_nolit_encodeBetterBlockAsm10B
two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B:
CMPL R12, $0x0c
JGE emit_copy_three_match_nolit_encodeBetterBlockAsm10B
CMPL R8, $0x00000800
JGE emit_copy_three_match_nolit_encodeBetterBlockAsm10B
MOVB $0x01, BL
LEAL -16(BX)(R12*4), R12
MOVB R8, 1(AX)
SHRL $0x08, R8
SHLL $0x05, R8
ORL R8, R12
MOVB R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
emit_copy_three_match_nolit_encodeBetterBlockAsm10B:
MOVB $0x02, BL
LEAL -4(BX)(R12*4), R12
MOVB R12, (AX)
MOVW R8, 1(AX)
ADDQ $0x03, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
match_is_repeat_encodeBetterBlockAsm10B:
MOVL 12(SP), SI
CMPL SI, DI
JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(SI*1), R10
SUBL SI, R9
LEAL -1(R9), SI
CMPL SI, $0x3c
JLT one_byte_match_emit_repeat_encodeBetterBlockAsm10B
CMPL SI, $0x00000100
JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm10B
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B
two_bytes_match_emit_repeat_encodeBetterBlockAsm10B:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, AX
CMPL SI, $0x40
JL memmove_match_emit_repeat_encodeBetterBlockAsm10B
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B
one_byte_match_emit_repeat_encodeBetterBlockAsm10B:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, AX
memmove_match_emit_repeat_encodeBetterBlockAsm10B:
LEAQ (AX)(R9*1), SI
// genMemMoveShort
CMPQ R9, $0x04
JLE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4
CMPQ R9, $0x08
JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7
CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4:
MOVL (R10), R11
MOVL R11, (AX)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7:
MOVL (R10), R11
MOVL -4(R10)(R9*1), R10
MOVL R11, (AX)
MOVL R10, -4(AX)(R9*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16:
MOVQ (R10), R11
MOVQ -8(R10)(R9*1), R10
MOVQ R11, (AX)
MOVQ R10, -8(AX)(R9*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32:
MOVOU (R10), X0
MOVOU -16(R10)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64:
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B:
MOVQ SI, AX
JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B
memmove_long_match_emit_repeat_encodeBetterBlockAsm10B:
LEAQ (AX)(R9*1), SI
// genMemMoveLong
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVQ R9, R13
SHRQ $0x05, R13
MOVQ AX, R11
ANDL $0x0000001f, R11
MOVQ $0x00000040, R14
SUBQ R11, R14
DECQ R13
JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
LEAQ -32(R10)(R14*1), R11
LEAQ -32(AX)(R14*1), R15
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back:
MOVOU (R11), X4
MOVOU 16(R11), X5
MOVOA X4, (R15)
MOVOA X5, 16(R15)
ADDQ $0x20, R15
ADDQ $0x20, R11
ADDQ $0x20, R14
DECQ R13
JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32:
MOVOU -32(R10)(R14*1), X4
MOVOU -16(R10)(R14*1), X5
MOVOA X4, -32(AX)(R14*1)
MOVOA X5, -16(AX)(R14*1)
ADDQ $0x20, R14
CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ SI, AX
emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B:
ADDL R12, CX
ADDL $0x04, R12
MOVL CX, 12(SP)
// emitRepeat
MOVL R12, SI
LEAL -4(R12), R12
CMPL SI, $0x08
JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B
CMPL SI, $0x0c
JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B
CMPL R8, $0x00000800
JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B
cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B:
CMPL R12, $0x00000104
JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B
LEAL -256(R12), R12
MOVW $0x0019, (AX)
MOVW R12, 2(AX)
ADDQ $0x04, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B:
LEAL -4(R12), R12
MOVW $0x0015, (AX)
MOVB R12, 2(AX)
ADDQ $0x03, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B:
SHLL $0x02, R12
ORL $0x01, R12
MOVW R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B:
XORQ SI, SI
LEAL 1(SI)(R12*4), R12
MOVB R8, 1(AX)
SARL $0x08, R8
SHLL $0x05, R8
ORL R8, R12
MOVB R12, (AX)
ADDQ $0x02, AX
match_nolit_emitcopy_end_encodeBetterBlockAsm10B:
CMPL CX, 8(SP)
JGE emit_remainder_encodeBetterBlockAsm10B
CMPQ AX, (SP)
JL match_nolit_dst_ok_encodeBetterBlockAsm10B
MOVQ $0x00000000, ret+48(FP)
RET
match_nolit_dst_ok_encodeBetterBlockAsm10B:
MOVQ $0x0000cf1bbcdcbf9b, SI
MOVQ $0x9e3779b1, R8
INCL DI
MOVQ (DX)(DI*1), R9
MOVQ R9, R10
MOVQ R9, R11
MOVQ R9, R12
SHRQ $0x08, R11
MOVQ R11, R13
SHRQ $0x10, R12
LEAL 1(DI), R14
LEAL 2(DI), R15
MOVQ -2(DX)(CX*1), R9
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x34, R10
SHLQ $0x10, R13
IMULQ SI, R13
SHRQ $0x34, R13
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x36, R11
SHLQ $0x20, R12
IMULQ R8, R12
SHRQ $0x36, R12
MOVL DI, 24(SP)(R10*4)
MOVL R14, 24(SP)(R13*4)
MOVL R14, 16408(SP)(R11*4)
MOVL R15, 16408(SP)(R12*4)
MOVQ R9, R10
MOVQ R9, R11
SHRQ $0x08, R11
MOVQ R11, R13
LEAL -2(CX), R9
LEAL -1(CX), DI
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x34, R10
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x36, R11
SHLQ $0x10, R13
IMULQ SI, R13
SHRQ $0x34, R13
MOVL R9, 24(SP)(R10*4)
MOVL DI, 16408(SP)(R11*4)
MOVL DI, 24(SP)(R13*4)
JMP search_loop_encodeBetterBlockAsm10B
emit_remainder_encodeBetterBlockAsm10B:
MOVQ src_len+32(FP), CX
SUBL 12(SP), CX
LEAQ 3(AX)(CX*1), CX
CMPQ CX, (SP)
JL emit_remainder_ok_encodeBetterBlockAsm10B
MOVQ $0x00000000, ret+48(FP)
RET
emit_remainder_ok_encodeBetterBlockAsm10B:
MOVQ src_len+32(FP), CX
MOVL 12(SP), BX
CMPL BX, CX
JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm10B
MOVL CX, SI
MOVL CX, 12(SP)
LEAQ (DX)(BX*1), CX
SUBL BX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JLT one_byte_emit_remainder_encodeBetterBlockAsm10B
CMPL DX, $0x00000100
JLT two_bytes_emit_remainder_encodeBetterBlockAsm10B
MOVB $0xf4, (AX)
MOVW DX, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B
two_bytes_emit_remainder_encodeBetterBlockAsm10B:
MOVB $0xf0, (AX)
MOVB DL, 1(AX)
ADDQ $0x02, AX
CMPL DX, $0x40
JL memmove_emit_remainder_encodeBetterBlockAsm10B
JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B
one_byte_emit_remainder_encodeBetterBlockAsm10B:
SHLB $0x02, DL
MOVB DL, (AX)
ADDQ $0x01, AX
memmove_emit_remainder_encodeBetterBlockAsm10B:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
CMPQ BX, $0x04
JLE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4
CMPQ BX, $0x08
JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4:
MOVL (CX), SI
MOVL SI, (AX)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7:
MOVL (CX), SI
MOVL -4(CX)(BX*1), CX
MOVL SI, (AX)
MOVL CX, -4(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16:
MOVQ (CX), SI
MOVQ -8(CX)(BX*1), CX
MOVQ SI, (AX)
MOVQ CX, -8(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32:
MOVOU (CX), X0
MOVOU -16(CX)(BX*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B:
MOVQ DX, AX
JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm10B
memmove_long_emit_remainder_encodeBetterBlockAsm10B:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
MOVQ AX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
LEAQ -32(CX)(R8*1), SI
LEAQ -32(AX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32:
MOVOU -32(CX)(R8*1), X4
MOVOU -16(CX)(R8*1), X5
MOVOA X4, -32(AX)(R8*1)
MOVOA X5, -16(AX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
MOVQ DX, AX
emit_literal_done_emit_remainder_encodeBetterBlockAsm10B:
MOVQ dst_base+0(FP), CX
SUBQ CX, AX
MOVQ AX, ret+48(FP)
RET
// func encodeBetterBlockAsm8B(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeBetterBlockAsm8B(SB), $5144-56
MOVQ dst_base+0(FP), AX
MOVQ $0x00000028, CX
LEAQ 24(SP), DX
PXOR X0, X0
zero_loop_encodeBetterBlockAsm8B:
MOVOU X0, (DX)
MOVOU X0, 16(DX)
MOVOU X0, 32(DX)
MOVOU X0, 48(DX)
MOVOU X0, 64(DX)
MOVOU X0, 80(DX)
MOVOU X0, 96(DX)
MOVOU X0, 112(DX)
ADDQ $0x80, DX
DECQ CX
JNZ zero_loop_encodeBetterBlockAsm8B
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), CX
LEAQ -6(CX), DX
LEAQ -8(CX), SI
MOVL SI, 8(SP)
SHRQ $0x05, CX
SUBL CX, DX
LEAQ (AX)(DX*1), DX
MOVQ DX, (SP)
MOVL $0x00000001, CX
MOVL $0x00000000, 16(SP)
MOVQ src_base+24(FP), DX
search_loop_encodeBetterBlockAsm8B:
MOVL CX, SI
SUBL 12(SP), SI
SHRL $0x04, SI
LEAL 1(CX)(SI*1), SI
CMPL SI, 8(SP)
JGE emit_remainder_encodeBetterBlockAsm8B
MOVQ (DX)(CX*1), DI
MOVL SI, 20(SP)
MOVQ $0x0000cf1bbcdcbf9b, R9
MOVQ $0x9e3779b1, SI
MOVQ DI, R10
MOVQ DI, R11
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x36, R10
SHLQ $0x20, R11
IMULQ SI, R11
SHRQ $0x38, R11
MOVL 24(SP)(R10*4), SI
MOVL 4120(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
MOVL CX, 4120(SP)(R11*4)
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeBetterBlockAsm8B
CMPL (DX)(R8*1), DI
JEQ candidateS_match_encodeBetterBlockAsm8B
MOVL 20(SP), CX
JMP search_loop_encodeBetterBlockAsm8B
candidateS_match_encodeBetterBlockAsm8B:
SHRQ $0x08, DI
MOVQ DI, R10
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x36, R10
MOVL 24(SP)(R10*4), SI
INCL CX
MOVL CX, 24(SP)(R10*4)
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeBetterBlockAsm8B
DECL CX
MOVL R8, SI
candidate_match_encodeBetterBlockAsm8B:
MOVL 12(SP), DI
TESTL SI, SI
JZ match_extend_back_end_encodeBetterBlockAsm8B
match_extend_back_loop_encodeBetterBlockAsm8B:
CMPL CX, DI
JLE match_extend_back_end_encodeBetterBlockAsm8B
MOVB -1(DX)(SI*1), BL
MOVB -1(DX)(CX*1), R8
CMPB BL, R8
JNE match_extend_back_end_encodeBetterBlockAsm8B
LEAL -1(CX), CX
DECL SI
JZ match_extend_back_end_encodeBetterBlockAsm8B
JMP match_extend_back_loop_encodeBetterBlockAsm8B
match_extend_back_end_encodeBetterBlockAsm8B:
MOVL CX, DI
SUBL 12(SP), DI
LEAQ 3(AX)(DI*1), DI
CMPQ DI, (SP)
JL match_dst_size_check_encodeBetterBlockAsm8B
MOVQ $0x00000000, ret+48(FP)
RET
match_dst_size_check_encodeBetterBlockAsm8B:
MOVL CX, DI
ADDL $0x04, CX
ADDL $0x04, SI
MOVQ src_len+32(FP), R8
SUBL CX, R8
LEAQ (DX)(CX*1), R9
LEAQ (DX)(SI*1), R10
// matchLen
XORL R12, R12
CMPL R8, $0x08
JL matchlen_single_match_nolit_encodeBetterBlockAsm8B
matchlen_loopback_match_nolit_encodeBetterBlockAsm8B:
MOVQ (R9)(R12*1), R11
XORQ (R10)(R12*1), R11
TESTQ R11, R11
JZ matchlen_loop_match_nolit_encodeBetterBlockAsm8B
BSFQ R11, R11
SARQ $0x03, R11
LEAL (R12)(R11*1), R12
JMP match_nolit_end_encodeBetterBlockAsm8B
matchlen_loop_match_nolit_encodeBetterBlockAsm8B:
LEAL -8(R8), R8
LEAL 8(R12), R12
CMPL R8, $0x08
JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm8B
matchlen_single_match_nolit_encodeBetterBlockAsm8B:
TESTL R8, R8
JZ match_nolit_end_encodeBetterBlockAsm8B
matchlen_single_loopback_match_nolit_encodeBetterBlockAsm8B:
MOVB (R9)(R12*1), R11
CMPB (R10)(R12*1), R11
JNE match_nolit_end_encodeBetterBlockAsm8B
LEAL 1(R12), R12
DECL R8
JNZ matchlen_single_loopback_match_nolit_encodeBetterBlockAsm8B
match_nolit_end_encodeBetterBlockAsm8B:
MOVL CX, R8
SUBL SI, R8
// Check if repeat
CMPL 16(SP), R8
JEQ match_is_repeat_encodeBetterBlockAsm8B
MOVL R8, 16(SP)
MOVL 12(SP), SI
CMPL SI, DI
JEQ emit_literal_done_match_emit_encodeBetterBlockAsm8B
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(SI*1), R10
SUBL SI, R9
LEAL -1(R9), SI
CMPL SI, $0x3c
JLT one_byte_match_emit_encodeBetterBlockAsm8B
CMPL SI, $0x00000100
JLT two_bytes_match_emit_encodeBetterBlockAsm8B
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_match_emit_encodeBetterBlockAsm8B
two_bytes_match_emit_encodeBetterBlockAsm8B:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, AX
CMPL SI, $0x40
JL memmove_match_emit_encodeBetterBlockAsm8B
JMP memmove_long_match_emit_encodeBetterBlockAsm8B
one_byte_match_emit_encodeBetterBlockAsm8B:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, AX
memmove_match_emit_encodeBetterBlockAsm8B:
LEAQ (AX)(R9*1), SI
// genMemMoveShort
CMPQ R9, $0x04
JLE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4
CMPQ R9, $0x08
JB emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7
CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64
emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4:
MOVL (R10), R11
MOVL R11, (AX)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B
emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7:
MOVL (R10), R11
MOVL -4(R10)(R9*1), R10
MOVL R11, (AX)
MOVL R10, -4(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B
emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16:
MOVQ (R10), R11
MOVQ -8(R10)(R9*1), R10
MOVQ R11, (AX)
MOVQ R10, -8(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B
emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32:
MOVOU (R10), X0
MOVOU -16(R10)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B
emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64:
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_match_emit_encodeBetterBlockAsm8B:
MOVQ SI, AX
JMP emit_literal_done_match_emit_encodeBetterBlockAsm8B
memmove_long_match_emit_encodeBetterBlockAsm8B:
LEAQ (AX)(R9*1), SI
// genMemMoveLong
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVQ R9, R13
SHRQ $0x05, R13
MOVQ AX, R11
ANDL $0x0000001f, R11
MOVQ $0x00000040, R14
SUBQ R11, R14
DECQ R13
JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
LEAQ -32(R10)(R14*1), R11
LEAQ -32(AX)(R14*1), R15
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back:
MOVOU (R11), X4
MOVOU 16(R11), X5
MOVOA X4, (R15)
MOVOA X5, 16(R15)
ADDQ $0x20, R15
ADDQ $0x20, R11
ADDQ $0x20, R14
DECQ R13
JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32:
MOVOU -32(R10)(R14*1), X4
MOVOU -16(R10)(R14*1), X5
MOVOA X4, -32(AX)(R14*1)
MOVOA X5, -16(AX)(R14*1)
ADDQ $0x20, R14
CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ SI, AX
emit_literal_done_match_emit_encodeBetterBlockAsm8B:
ADDL R12, CX
ADDL $0x04, R12
MOVL CX, 12(SP)
// emitCopy
two_byte_offset_match_nolit_encodeBetterBlockAsm8B:
CMPL R12, $0x40
JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B
MOVB $0xee, (AX)
MOVW R8, 1(AX)
LEAL -60(R12), R12
ADDQ $0x03, AX
// emitRepeat
MOVL R12, SI
LEAL -4(R12), R12
CMPL SI, $0x08
JLE repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short
CMPL SI, $0x0c
JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short
cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
CMPL R12, $0x00000104
JLT repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short
LEAL -256(R12), R12
MOVW $0x0019, (AX)
MOVW R12, 2(AX)
ADDQ $0x04, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
LEAL -4(R12), R12
MOVW $0x0015, (AX)
MOVB R12, 2(AX)
ADDQ $0x03, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
SHLL $0x02, R12
ORL $0x01, R12
MOVW R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
XORQ SI, SI
LEAL 1(SI)(R12*4), R12
MOVB R8, 1(AX)
SARL $0x08, R8
SHLL $0x05, R8
ORL R8, R12
MOVB R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
JMP two_byte_offset_match_nolit_encodeBetterBlockAsm8B
two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B:
CMPL R12, $0x0c
JGE emit_copy_three_match_nolit_encodeBetterBlockAsm8B
MOVB $0x01, BL
LEAL -16(BX)(R12*4), R12
MOVB R8, 1(AX)
SHRL $0x08, R8
SHLL $0x05, R8
ORL R8, R12
MOVB R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
emit_copy_three_match_nolit_encodeBetterBlockAsm8B:
MOVB $0x02, BL
LEAL -4(BX)(R12*4), R12
MOVB R12, (AX)
MOVW R8, 1(AX)
ADDQ $0x03, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
match_is_repeat_encodeBetterBlockAsm8B:
MOVL 12(SP), SI
CMPL SI, DI
JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B
MOVL DI, R8
MOVL DI, 12(SP)
LEAQ (DX)(SI*1), R9
SUBL SI, R8
LEAL -1(R8), SI
CMPL SI, $0x3c
JLT one_byte_match_emit_repeat_encodeBetterBlockAsm8B
CMPL SI, $0x00000100
JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm8B
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B
two_bytes_match_emit_repeat_encodeBetterBlockAsm8B:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, AX
CMPL SI, $0x40
JL memmove_match_emit_repeat_encodeBetterBlockAsm8B
JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B
one_byte_match_emit_repeat_encodeBetterBlockAsm8B:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, AX
memmove_match_emit_repeat_encodeBetterBlockAsm8B:
LEAQ (AX)(R8*1), SI
// genMemMoveShort
CMPQ R8, $0x04
JLE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4
CMPQ R8, $0x08
JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7
CMPQ R8, $0x10
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16
CMPQ R8, $0x20
JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4:
MOVL (R9), R10
MOVL R10, (AX)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7:
MOVL (R9), R10
MOVL -4(R9)(R8*1), R9
MOVL R10, (AX)
MOVL R9, -4(AX)(R8*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16:
MOVQ (R9), R10
MOVQ -8(R9)(R8*1), R9
MOVQ R10, (AX)
MOVQ R9, -8(AX)(R8*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32:
MOVOU (R9), X0
MOVOU -16(R9)(R8*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R8*1)
JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64:
MOVOU (R9), X0
MOVOU 16(R9), X1
MOVOU -32(R9)(R8*1), X2
MOVOU -16(R9)(R8*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R8*1)
MOVOU X3, -16(AX)(R8*1)
memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B:
MOVQ SI, AX
JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B
memmove_long_match_emit_repeat_encodeBetterBlockAsm8B:
LEAQ (AX)(R8*1), SI
// genMemMoveLong
MOVOU (R9), X0
MOVOU 16(R9), X1
MOVOU -32(R9)(R8*1), X2
MOVOU -16(R9)(R8*1), X3
MOVQ R8, R11
SHRQ $0x05, R11
MOVQ AX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R13
SUBQ R10, R13
DECQ R11
JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
LEAQ -32(R9)(R13*1), R10
LEAQ -32(AX)(R13*1), R14
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R14)
MOVOA X5, 16(R14)
ADDQ $0x20, R14
ADDQ $0x20, R10
ADDQ $0x20, R13
DECQ R11
JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back
emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32:
MOVOU -32(R9)(R13*1), X4
MOVOU -16(R9)(R13*1), X5
MOVOA X4, -32(AX)(R13*1)
MOVOA X5, -16(AX)(R13*1)
ADDQ $0x20, R13
CMPQ R8, R13
JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R8*1)
MOVOU X3, -16(AX)(R8*1)
MOVQ SI, AX
emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B:
ADDL R12, CX
ADDL $0x04, R12
MOVL CX, 12(SP)
// emitRepeat
MOVL R12, SI
LEAL -4(R12), R12
CMPL SI, $0x08
JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B
CMPL SI, $0x0c
JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B
cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B:
CMPL R12, $0x00000104
JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B
LEAL -256(R12), R12
MOVW $0x0019, (AX)
MOVW R12, 2(AX)
ADDQ $0x04, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B:
LEAL -4(R12), R12
MOVW $0x0015, (AX)
MOVB R12, 2(AX)
ADDQ $0x03, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B:
SHLL $0x02, R12
ORL $0x01, R12
MOVW R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
XORQ SI, SI
LEAL 1(SI)(R12*4), R12
MOVB R8, 1(AX)
SARL $0x08, R8
SHLL $0x05, R8
ORL R8, R12
MOVB R12, (AX)
ADDQ $0x02, AX
match_nolit_emitcopy_end_encodeBetterBlockAsm8B:
CMPL CX, 8(SP)
JGE emit_remainder_encodeBetterBlockAsm8B
CMPQ AX, (SP)
JL match_nolit_dst_ok_encodeBetterBlockAsm8B
MOVQ $0x00000000, ret+48(FP)
RET
match_nolit_dst_ok_encodeBetterBlockAsm8B:
MOVQ $0x0000cf1bbcdcbf9b, SI
MOVQ $0x9e3779b1, R8
INCL DI
MOVQ (DX)(DI*1), R9
MOVQ R9, R10
MOVQ R9, R11
MOVQ R9, R12
SHRQ $0x08, R11
MOVQ R11, R13
SHRQ $0x10, R12
LEAL 1(DI), R14
LEAL 2(DI), R15
MOVQ -2(DX)(CX*1), R9
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x36, R10
SHLQ $0x10, R13
IMULQ SI, R13
SHRQ $0x36, R13
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x38, R11
SHLQ $0x20, R12
IMULQ R8, R12
SHRQ $0x38, R12
MOVL DI, 24(SP)(R10*4)
MOVL R14, 24(SP)(R13*4)
MOVL R14, 4120(SP)(R11*4)
MOVL R15, 4120(SP)(R12*4)
MOVQ R9, R10
MOVQ R9, R11
SHRQ $0x08, R11
MOVQ R11, R13
LEAL -2(CX), R9
LEAL -1(CX), DI
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x36, R10
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x38, R11
SHLQ $0x10, R13
IMULQ SI, R13
SHRQ $0x36, R13
MOVL R9, 24(SP)(R10*4)
MOVL DI, 4120(SP)(R11*4)
MOVL DI, 24(SP)(R13*4)
JMP search_loop_encodeBetterBlockAsm8B
emit_remainder_encodeBetterBlockAsm8B:
MOVQ src_len+32(FP), CX
SUBL 12(SP), CX
LEAQ 3(AX)(CX*1), CX
CMPQ CX, (SP)
JL emit_remainder_ok_encodeBetterBlockAsm8B
MOVQ $0x00000000, ret+48(FP)
RET
emit_remainder_ok_encodeBetterBlockAsm8B:
MOVQ src_len+32(FP), CX
MOVL 12(SP), BX
CMPL BX, CX
JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm8B
MOVL CX, SI
MOVL CX, 12(SP)
LEAQ (DX)(BX*1), CX
SUBL BX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JLT one_byte_emit_remainder_encodeBetterBlockAsm8B
CMPL DX, $0x00000100
JLT two_bytes_emit_remainder_encodeBetterBlockAsm8B
MOVB $0xf4, (AX)
MOVW DX, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B
two_bytes_emit_remainder_encodeBetterBlockAsm8B:
MOVB $0xf0, (AX)
MOVB DL, 1(AX)
ADDQ $0x02, AX
CMPL DX, $0x40
JL memmove_emit_remainder_encodeBetterBlockAsm8B
JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B
one_byte_emit_remainder_encodeBetterBlockAsm8B:
SHLB $0x02, DL
MOVB DL, (AX)
ADDQ $0x01, AX
memmove_emit_remainder_encodeBetterBlockAsm8B:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
CMPQ BX, $0x04
JLE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4
CMPQ BX, $0x08
JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4:
MOVL (CX), SI
MOVL SI, (AX)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7:
MOVL (CX), SI
MOVL -4(CX)(BX*1), CX
MOVL SI, (AX)
MOVL CX, -4(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16:
MOVQ (CX), SI
MOVQ -8(CX)(BX*1), CX
MOVQ SI, (AX)
MOVQ CX, -8(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32:
MOVOU (CX), X0
MOVOU -16(CX)(BX*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B:
MOVQ DX, AX
JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm8B
memmove_long_emit_remainder_encodeBetterBlockAsm8B:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
MOVQ AX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
LEAQ -32(CX)(R8*1), SI
LEAQ -32(AX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32:
MOVOU -32(CX)(R8*1), X4
MOVOU -16(CX)(R8*1), X5
MOVOA X4, -32(AX)(R8*1)
MOVOA X5, -16(AX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
MOVQ DX, AX
emit_literal_done_emit_remainder_encodeBetterBlockAsm8B:
MOVQ dst_base+0(FP), CX
SUBQ CX, AX
MOVQ AX, ret+48(FP)
RET
// func encodeSnappyBlockAsm(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeSnappyBlockAsm(SB), $65560-56
MOVQ dst_base+0(FP), AX
MOVQ $0x00000200, CX
LEAQ 24(SP), DX
PXOR X0, X0
zero_loop_encodeSnappyBlockAsm:
MOVOU X0, (DX)
MOVOU X0, 16(DX)
MOVOU X0, 32(DX)
MOVOU X0, 48(DX)
MOVOU X0, 64(DX)
MOVOU X0, 80(DX)
MOVOU X0, 96(DX)
MOVOU X0, 112(DX)
ADDQ $0x80, DX
DECQ CX
JNZ zero_loop_encodeSnappyBlockAsm
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), CX
LEAQ -9(CX), DX
LEAQ -8(CX), SI
MOVL SI, 8(SP)
SHRQ $0x05, CX
SUBL CX, DX
LEAQ (AX)(DX*1), DX
MOVQ DX, (SP)
MOVL $0x00000001, CX
MOVL CX, 16(SP)
MOVQ src_base+24(FP), DX
search_loop_encodeSnappyBlockAsm:
MOVL CX, SI
SUBL 12(SP), SI
SHRL $0x06, SI
LEAL 4(CX)(SI*1), SI
CMPL SI, 8(SP)
JGE emit_remainder_encodeSnappyBlockAsm
MOVQ (DX)(CX*1), DI
MOVL SI, 20(SP)
MOVQ $0x0000cf1bbcdcbf9b, R9
MOVQ DI, R10
MOVQ DI, R11
SHRQ $0x08, R11
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x32, R10
SHLQ $0x10, R11
IMULQ R9, R11
SHRQ $0x32, R11
MOVL 24(SP)(R10*4), SI
MOVL 24(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
LEAL 1(CX), R10
MOVL R10, 24(SP)(R11*4)
MOVQ DI, R10
SHRQ $0x10, R10
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x32, R10
MOVL CX, R9
SUBL 16(SP), R9
MOVL 1(DX)(R9*1), R11
MOVQ DI, R9
SHRQ $0x08, R9
CMPL R9, R11
JNE no_repeat_found_encodeSnappyBlockAsm
LEAL 1(CX), DI
MOVL 12(SP), SI
MOVL DI, R8
SUBL 16(SP), R8
JZ repeat_extend_back_end_encodeSnappyBlockAsm
repeat_extend_back_loop_encodeSnappyBlockAsm:
CMPL DI, SI
JLE repeat_extend_back_end_encodeSnappyBlockAsm
MOVB -1(DX)(R8*1), BL
MOVB -1(DX)(DI*1), R9
CMPB BL, R9
JNE repeat_extend_back_end_encodeSnappyBlockAsm
LEAL -1(DI), DI
DECL R8
JNZ repeat_extend_back_loop_encodeSnappyBlockAsm
repeat_extend_back_end_encodeSnappyBlockAsm:
MOVL 12(SP), SI
CMPL SI, DI
JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm
MOVL DI, R8
MOVL DI, 12(SP)
LEAQ (DX)(SI*1), R9
SUBL SI, R8
LEAL -1(R8), SI
CMPL SI, $0x3c
JLT one_byte_repeat_emit_encodeSnappyBlockAsm
CMPL SI, $0x00000100
JLT two_bytes_repeat_emit_encodeSnappyBlockAsm
CMPL SI, $0x00010000
JLT three_bytes_repeat_emit_encodeSnappyBlockAsm
CMPL SI, $0x01000000
JLT four_bytes_repeat_emit_encodeSnappyBlockAsm
MOVB $0xfc, (AX)
MOVL SI, 1(AX)
ADDQ $0x05, AX
JMP memmove_long_repeat_emit_encodeSnappyBlockAsm
four_bytes_repeat_emit_encodeSnappyBlockAsm:
MOVL SI, R10
SHRL $0x10, R10
MOVB $0xf8, (AX)
MOVW SI, 1(AX)
MOVB R10, 3(AX)
ADDQ $0x04, AX
JMP memmove_long_repeat_emit_encodeSnappyBlockAsm
three_bytes_repeat_emit_encodeSnappyBlockAsm:
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_repeat_emit_encodeSnappyBlockAsm
two_bytes_repeat_emit_encodeSnappyBlockAsm:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, AX
CMPL SI, $0x40
JL memmove_repeat_emit_encodeSnappyBlockAsm
JMP memmove_long_repeat_emit_encodeSnappyBlockAsm
one_byte_repeat_emit_encodeSnappyBlockAsm:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, AX
memmove_repeat_emit_encodeSnappyBlockAsm:
LEAQ (AX)(R8*1), SI
// genMemMoveShort
CMPQ R8, $0x08
JLE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8
CMPQ R8, $0x10
JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16
CMPQ R8, $0x20
JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8:
MOVQ (R9), R10
MOVQ R10, (AX)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16:
MOVQ (R9), R10
MOVQ -8(R9)(R8*1), R9
MOVQ R10, (AX)
MOVQ R9, -8(AX)(R8*1)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32:
MOVOU (R9), X0
MOVOU -16(R9)(R8*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R8*1)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64:
MOVOU (R9), X0
MOVOU 16(R9), X1
MOVOU -32(R9)(R8*1), X2
MOVOU -16(R9)(R8*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R8*1)
MOVOU X3, -16(AX)(R8*1)
memmove_end_copy_repeat_emit_encodeSnappyBlockAsm:
MOVQ SI, AX
JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm
memmove_long_repeat_emit_encodeSnappyBlockAsm:
LEAQ (AX)(R8*1), SI
// genMemMoveLong
MOVOU (R9), X0
MOVOU 16(R9), X1
MOVOU -32(R9)(R8*1), X2
MOVOU -16(R9)(R8*1), X3
MOVQ R8, R11
SHRQ $0x05, R11
MOVQ AX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R12
SUBQ R10, R12
DECQ R11
JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
LEAQ -32(R9)(R12*1), R10
LEAQ -32(AX)(R12*1), R13
emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R10
ADDQ $0x20, R12
DECQ R11
JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back
emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
MOVOU -32(R9)(R12*1), X4
MOVOU -16(R9)(R12*1), X5
MOVOA X4, -32(AX)(R12*1)
MOVOA X5, -16(AX)(R12*1)
ADDQ $0x20, R12
CMPQ R8, R12
JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R8*1)
MOVOU X3, -16(AX)(R8*1)
MOVQ SI, AX
emit_literal_done_repeat_emit_encodeSnappyBlockAsm:
ADDL $0x05, CX
MOVL CX, SI
SUBL 16(SP), SI
MOVQ src_len+32(FP), R8
SUBL CX, R8
LEAQ (DX)(CX*1), R9
LEAQ (DX)(SI*1), SI
// matchLen
XORL R11, R11
CMPL R8, $0x08
JL matchlen_single_repeat_extend_encodeSnappyBlockAsm
matchlen_loopback_repeat_extend_encodeSnappyBlockAsm:
MOVQ (R9)(R11*1), R10
XORQ (SI)(R11*1), R10
TESTQ R10, R10
JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm
BSFQ R10, R10
SARQ $0x03, R10
LEAL (R11)(R10*1), R11
JMP repeat_extend_forward_end_encodeSnappyBlockAsm
matchlen_loop_repeat_extend_encodeSnappyBlockAsm:
LEAL -8(R8), R8
LEAL 8(R11), R11
CMPL R8, $0x08
JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm
matchlen_single_repeat_extend_encodeSnappyBlockAsm:
TESTL R8, R8
JZ repeat_extend_forward_end_encodeSnappyBlockAsm
matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm:
MOVB (R9)(R11*1), R10
CMPB (SI)(R11*1), R10
JNE repeat_extend_forward_end_encodeSnappyBlockAsm
LEAL 1(R11), R11
DECL R8
JNZ matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm
repeat_extend_forward_end_encodeSnappyBlockAsm:
ADDL R11, CX
MOVL CX, SI
SUBL DI, SI
MOVL 16(SP), DI
// emitCopy
CMPL DI, $0x00010000
JL two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm
four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm:
CMPL SI, $0x40
JLE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm
MOVB $0xff, (AX)
MOVL DI, 1(AX)
LEAL -64(SI), SI
ADDQ $0x05, AX
CMPL SI, $0x04
JL four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm
JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm
four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm:
TESTL SI, SI
JZ repeat_end_emit_encodeSnappyBlockAsm
MOVB $0x03, BL
LEAL -4(BX)(SI*4), SI
MOVB SI, (AX)
MOVL DI, 1(AX)
ADDQ $0x05, AX
JMP repeat_end_emit_encodeSnappyBlockAsm
two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm:
CMPL SI, $0x40
JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm
MOVB $0xee, (AX)
MOVW DI, 1(AX)
LEAL -60(SI), SI
ADDQ $0x03, AX
JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm
two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm:
CMPL SI, $0x0c
JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm
CMPL DI, $0x00000800
JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm
MOVB $0x01, BL
LEAL -16(BX)(SI*4), SI
MOVB DI, 1(AX)
SHRL $0x08, DI
SHLL $0x05, DI
ORL DI, SI
MOVB SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeSnappyBlockAsm
emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm:
MOVB $0x02, BL
LEAL -4(BX)(SI*4), SI
MOVB SI, (AX)
MOVW DI, 1(AX)
ADDQ $0x03, AX
repeat_end_emit_encodeSnappyBlockAsm:
MOVL CX, 12(SP)
JMP search_loop_encodeSnappyBlockAsm
no_repeat_found_encodeSnappyBlockAsm:
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeSnappyBlockAsm
SHRQ $0x08, DI
MOVL 24(SP)(R10*4), SI
LEAL 2(CX), R9
CMPL (DX)(R8*1), DI
JEQ candidate2_match_encodeSnappyBlockAsm
MOVL R9, 24(SP)(R10*4)
SHRQ $0x08, DI
CMPL (DX)(SI*1), DI
JEQ candidate3_match_encodeSnappyBlockAsm
MOVL 20(SP), CX
JMP search_loop_encodeSnappyBlockAsm
candidate3_match_encodeSnappyBlockAsm:
ADDL $0x02, CX
JMP candidate_match_encodeSnappyBlockAsm
candidate2_match_encodeSnappyBlockAsm:
MOVL R9, 24(SP)(R10*4)
INCL CX
MOVL R8, SI
candidate_match_encodeSnappyBlockAsm:
MOVL 12(SP), DI
TESTL SI, SI
JZ match_extend_back_end_encodeSnappyBlockAsm
match_extend_back_loop_encodeSnappyBlockAsm:
CMPL CX, DI
JLE match_extend_back_end_encodeSnappyBlockAsm
MOVB -1(DX)(SI*1), BL
MOVB -1(DX)(CX*1), R8
CMPB BL, R8
JNE match_extend_back_end_encodeSnappyBlockAsm
LEAL -1(CX), CX
DECL SI
JZ match_extend_back_end_encodeSnappyBlockAsm
JMP match_extend_back_loop_encodeSnappyBlockAsm
match_extend_back_end_encodeSnappyBlockAsm:
MOVL CX, DI
SUBL 12(SP), DI
LEAQ 5(AX)(DI*1), DI
CMPQ DI, (SP)
JL match_dst_size_check_encodeSnappyBlockAsm
MOVQ $0x00000000, ret+48(FP)
RET
match_dst_size_check_encodeSnappyBlockAsm:
MOVL CX, DI
MOVL 12(SP), R8
CMPL R8, DI
JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(R8*1), DI
SUBL R8, R9
LEAL -1(R9), R8
CMPL R8, $0x3c
JLT one_byte_match_emit_encodeSnappyBlockAsm
CMPL R8, $0x00000100
JLT two_bytes_match_emit_encodeSnappyBlockAsm
CMPL R8, $0x00010000
JLT three_bytes_match_emit_encodeSnappyBlockAsm
CMPL R8, $0x01000000
JLT four_bytes_match_emit_encodeSnappyBlockAsm
MOVB $0xfc, (AX)
MOVL R8, 1(AX)
ADDQ $0x05, AX
JMP memmove_long_match_emit_encodeSnappyBlockAsm
four_bytes_match_emit_encodeSnappyBlockAsm:
MOVL R8, R10
SHRL $0x10, R10
MOVB $0xf8, (AX)
MOVW R8, 1(AX)
MOVB R10, 3(AX)
ADDQ $0x04, AX
JMP memmove_long_match_emit_encodeSnappyBlockAsm
three_bytes_match_emit_encodeSnappyBlockAsm:
MOVB $0xf4, (AX)
MOVW R8, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_match_emit_encodeSnappyBlockAsm
two_bytes_match_emit_encodeSnappyBlockAsm:
MOVB $0xf0, (AX)
MOVB R8, 1(AX)
ADDQ $0x02, AX
CMPL R8, $0x40
JL memmove_match_emit_encodeSnappyBlockAsm
JMP memmove_long_match_emit_encodeSnappyBlockAsm
one_byte_match_emit_encodeSnappyBlockAsm:
SHLB $0x02, R8
MOVB R8, (AX)
ADDQ $0x01, AX
memmove_match_emit_encodeSnappyBlockAsm:
LEAQ (AX)(R9*1), R8
// genMemMoveShort
CMPQ R9, $0x08
JLE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8
CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64
emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8:
MOVQ (DI), R10
MOVQ R10, (AX)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm
emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16:
MOVQ (DI), R10
MOVQ -8(DI)(R9*1), DI
MOVQ R10, (AX)
MOVQ DI, -8(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm
emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm
emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R9*1), X2
MOVOU -16(DI)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_match_emit_encodeSnappyBlockAsm:
MOVQ R8, AX
JMP emit_literal_done_match_emit_encodeSnappyBlockAsm
memmove_long_match_emit_encodeSnappyBlockAsm:
LEAQ (AX)(R9*1), R8
// genMemMoveLong
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R9*1), X2
MOVOU -16(DI)(R9*1), X3
MOVQ R9, R11
SHRQ $0x05, R11
MOVQ AX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R12
SUBQ R10, R12
DECQ R11
JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
LEAQ -32(DI)(R12*1), R10
LEAQ -32(AX)(R12*1), R13
emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R10
ADDQ $0x20, R12
DECQ R11
JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
MOVOU -32(DI)(R12*1), X4
MOVOU -16(DI)(R12*1), X5
MOVOA X4, -32(AX)(R12*1)
MOVOA X5, -16(AX)(R12*1)
ADDQ $0x20, R12
CMPQ R9, R12
JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ R8, AX
emit_literal_done_match_emit_encodeSnappyBlockAsm:
match_nolit_loop_encodeSnappyBlockAsm:
MOVL CX, DI
SUBL SI, DI
MOVL DI, 16(SP)
ADDL $0x04, CX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL CX, DI
LEAQ (DX)(CX*1), R8
LEAQ (DX)(SI*1), SI
// matchLen
XORL R10, R10
CMPL DI, $0x08
JL matchlen_single_match_nolit_encodeSnappyBlockAsm
matchlen_loopback_match_nolit_encodeSnappyBlockAsm:
MOVQ (R8)(R10*1), R9
XORQ (SI)(R10*1), R9
TESTQ R9, R9
JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm
BSFQ R9, R9
SARQ $0x03, R9
LEAL (R10)(R9*1), R10
JMP match_nolit_end_encodeSnappyBlockAsm
matchlen_loop_match_nolit_encodeSnappyBlockAsm:
LEAL -8(DI), DI
LEAL 8(R10), R10
CMPL DI, $0x08
JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm
matchlen_single_match_nolit_encodeSnappyBlockAsm:
TESTL DI, DI
JZ match_nolit_end_encodeSnappyBlockAsm
matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm:
MOVB (R8)(R10*1), R9
CMPB (SI)(R10*1), R9
JNE match_nolit_end_encodeSnappyBlockAsm
LEAL 1(R10), R10
DECL DI
JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm
match_nolit_end_encodeSnappyBlockAsm:
ADDL R10, CX
MOVL 16(SP), SI
ADDL $0x04, R10
MOVL CX, 12(SP)
// emitCopy
CMPL SI, $0x00010000
JL two_byte_offset_match_nolit_encodeSnappyBlockAsm
four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm:
CMPL R10, $0x40
JLE four_bytes_remain_match_nolit_encodeSnappyBlockAsm
MOVB $0xff, (AX)
MOVL SI, 1(AX)
LEAL -64(R10), R10
ADDQ $0x05, AX
CMPL R10, $0x04
JL four_bytes_remain_match_nolit_encodeSnappyBlockAsm
JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm
four_bytes_remain_match_nolit_encodeSnappyBlockAsm:
TESTL R10, R10
JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm
MOVB $0x03, BL
LEAL -4(BX)(R10*4), R10
MOVB R10, (AX)
MOVL SI, 1(AX)
ADDQ $0x05, AX
JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm
two_byte_offset_match_nolit_encodeSnappyBlockAsm:
CMPL R10, $0x40
JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm
MOVB $0xee, (AX)
MOVW SI, 1(AX)
LEAL -60(R10), R10
ADDQ $0x03, AX
JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm
two_byte_offset_short_match_nolit_encodeSnappyBlockAsm:
CMPL R10, $0x0c
JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm
CMPL SI, $0x00000800
JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm
MOVB $0x01, BL
LEAL -16(BX)(R10*4), R10
MOVB SI, 1(AX)
SHRL $0x08, SI
SHLL $0x05, SI
ORL SI, R10
MOVB R10, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm
emit_copy_three_match_nolit_encodeSnappyBlockAsm:
MOVB $0x02, BL
LEAL -4(BX)(R10*4), R10
MOVB R10, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
match_nolit_emitcopy_end_encodeSnappyBlockAsm:
CMPL CX, 8(SP)
JGE emit_remainder_encodeSnappyBlockAsm
MOVQ -2(DX)(CX*1), DI
CMPQ AX, (SP)
JL match_nolit_dst_ok_encodeSnappyBlockAsm
MOVQ $0x00000000, ret+48(FP)
RET
match_nolit_dst_ok_encodeSnappyBlockAsm:
MOVQ $0x0000cf1bbcdcbf9b, R9
MOVQ DI, R8
SHRQ $0x10, DI
MOVQ DI, SI
SHLQ $0x10, R8
IMULQ R9, R8
SHRQ $0x32, R8
SHLQ $0x10, SI
IMULQ R9, SI
SHRQ $0x32, SI
LEAL -2(CX), R9
LEAQ 24(SP)(SI*4), R10
MOVL (R10), SI
MOVL R9, 24(SP)(R8*4)
MOVL CX, (R10)
CMPL (DX)(SI*1), DI
JEQ match_nolit_loop_encodeSnappyBlockAsm
INCL CX
JMP search_loop_encodeSnappyBlockAsm
emit_remainder_encodeSnappyBlockAsm:
MOVQ src_len+32(FP), CX
SUBL 12(SP), CX
LEAQ 5(AX)(CX*1), CX
CMPQ CX, (SP)
JL emit_remainder_ok_encodeSnappyBlockAsm
MOVQ $0x00000000, ret+48(FP)
RET
emit_remainder_ok_encodeSnappyBlockAsm:
MOVQ src_len+32(FP), CX
MOVL 12(SP), BX
CMPL BX, CX
JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm
MOVL CX, SI
MOVL CX, 12(SP)
LEAQ (DX)(BX*1), CX
SUBL BX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JLT one_byte_emit_remainder_encodeSnappyBlockAsm
CMPL DX, $0x00000100
JLT two_bytes_emit_remainder_encodeSnappyBlockAsm
CMPL DX, $0x00010000
JLT three_bytes_emit_remainder_encodeSnappyBlockAsm
CMPL DX, $0x01000000
JLT four_bytes_emit_remainder_encodeSnappyBlockAsm
MOVB $0xfc, (AX)
MOVL DX, 1(AX)
ADDQ $0x05, AX
JMP memmove_long_emit_remainder_encodeSnappyBlockAsm
four_bytes_emit_remainder_encodeSnappyBlockAsm:
MOVL DX, BX
SHRL $0x10, BX
MOVB $0xf8, (AX)
MOVW DX, 1(AX)
MOVB BL, 3(AX)
ADDQ $0x04, AX
JMP memmove_long_emit_remainder_encodeSnappyBlockAsm
three_bytes_emit_remainder_encodeSnappyBlockAsm:
MOVB $0xf4, (AX)
MOVW DX, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_emit_remainder_encodeSnappyBlockAsm
two_bytes_emit_remainder_encodeSnappyBlockAsm:
MOVB $0xf0, (AX)
MOVB DL, 1(AX)
ADDQ $0x02, AX
CMPL DX, $0x40
JL memmove_emit_remainder_encodeSnappyBlockAsm
JMP memmove_long_emit_remainder_encodeSnappyBlockAsm
one_byte_emit_remainder_encodeSnappyBlockAsm:
SHLB $0x02, DL
MOVB DL, (AX)
ADDQ $0x01, AX
memmove_emit_remainder_encodeSnappyBlockAsm:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
CMPQ BX, $0x08
JLE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8:
MOVQ (CX), SI
MOVQ SI, (AX)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16:
MOVQ (CX), SI
MOVQ -8(CX)(BX*1), CX
MOVQ SI, (AX)
MOVQ CX, -8(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32:
MOVOU (CX), X0
MOVOU -16(CX)(BX*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
memmove_end_copy_emit_remainder_encodeSnappyBlockAsm:
MOVQ DX, AX
JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm
memmove_long_emit_remainder_encodeSnappyBlockAsm:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
MOVQ AX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32
LEAQ -32(CX)(R8*1), SI
LEAQ -32(AX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
MOVOU -32(CX)(R8*1), X4
MOVOU -16(CX)(R8*1), X5
MOVOA X4, -32(AX)(R8*1)
MOVOA X5, -16(AX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
MOVQ DX, AX
emit_literal_done_emit_remainder_encodeSnappyBlockAsm:
MOVQ dst_base+0(FP), CX
SUBQ CX, AX
MOVQ AX, ret+48(FP)
RET
// func encodeSnappyBlockAsm64K(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeSnappyBlockAsm64K(SB), $65560-56
MOVQ dst_base+0(FP), AX
MOVQ $0x00000200, CX
LEAQ 24(SP), DX
PXOR X0, X0
zero_loop_encodeSnappyBlockAsm64K:
MOVOU X0, (DX)
MOVOU X0, 16(DX)
MOVOU X0, 32(DX)
MOVOU X0, 48(DX)
MOVOU X0, 64(DX)
MOVOU X0, 80(DX)
MOVOU X0, 96(DX)
MOVOU X0, 112(DX)
ADDQ $0x80, DX
DECQ CX
JNZ zero_loop_encodeSnappyBlockAsm64K
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), CX
LEAQ -9(CX), DX
LEAQ -8(CX), SI
MOVL SI, 8(SP)
SHRQ $0x05, CX
SUBL CX, DX
LEAQ (AX)(DX*1), DX
MOVQ DX, (SP)
MOVL $0x00000001, CX
MOVL CX, 16(SP)
MOVQ src_base+24(FP), DX
search_loop_encodeSnappyBlockAsm64K:
MOVL CX, SI
SUBL 12(SP), SI
SHRL $0x06, SI
LEAL 4(CX)(SI*1), SI
CMPL SI, 8(SP)
JGE emit_remainder_encodeSnappyBlockAsm64K
MOVQ (DX)(CX*1), DI
MOVL SI, 20(SP)
MOVQ $0x0000cf1bbcdcbf9b, R9
MOVQ DI, R10
MOVQ DI, R11
SHRQ $0x08, R11
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x32, R10
SHLQ $0x10, R11
IMULQ R9, R11
SHRQ $0x32, R11
MOVL 24(SP)(R10*4), SI
MOVL 24(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
LEAL 1(CX), R10
MOVL R10, 24(SP)(R11*4)
MOVQ DI, R10
SHRQ $0x10, R10
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x32, R10
MOVL CX, R9
SUBL 16(SP), R9
MOVL 1(DX)(R9*1), R11
MOVQ DI, R9
SHRQ $0x08, R9
CMPL R9, R11
JNE no_repeat_found_encodeSnappyBlockAsm64K
LEAL 1(CX), DI
MOVL 12(SP), SI
MOVL DI, R8
SUBL 16(SP), R8
JZ repeat_extend_back_end_encodeSnappyBlockAsm64K
repeat_extend_back_loop_encodeSnappyBlockAsm64K:
CMPL DI, SI
JLE repeat_extend_back_end_encodeSnappyBlockAsm64K
MOVB -1(DX)(R8*1), BL
MOVB -1(DX)(DI*1), R9
CMPB BL, R9
JNE repeat_extend_back_end_encodeSnappyBlockAsm64K
LEAL -1(DI), DI
DECL R8
JNZ repeat_extend_back_loop_encodeSnappyBlockAsm64K
repeat_extend_back_end_encodeSnappyBlockAsm64K:
MOVL 12(SP), SI
CMPL SI, DI
JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K
MOVL DI, R8
MOVL DI, 12(SP)
LEAQ (DX)(SI*1), R9
SUBL SI, R8
LEAL -1(R8), SI
CMPL SI, $0x3c
JLT one_byte_repeat_emit_encodeSnappyBlockAsm64K
CMPL SI, $0x00000100
JLT two_bytes_repeat_emit_encodeSnappyBlockAsm64K
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_repeat_emit_encodeSnappyBlockAsm64K
two_bytes_repeat_emit_encodeSnappyBlockAsm64K:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, AX
CMPL SI, $0x40
JL memmove_repeat_emit_encodeSnappyBlockAsm64K
JMP memmove_long_repeat_emit_encodeSnappyBlockAsm64K
one_byte_repeat_emit_encodeSnappyBlockAsm64K:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, AX
memmove_repeat_emit_encodeSnappyBlockAsm64K:
LEAQ (AX)(R8*1), SI
// genMemMoveShort
CMPQ R8, $0x08
JLE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8
CMPQ R8, $0x10
JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16
CMPQ R8, $0x20
JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8:
MOVQ (R9), R10
MOVQ R10, (AX)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16:
MOVQ (R9), R10
MOVQ -8(R9)(R8*1), R9
MOVQ R10, (AX)
MOVQ R9, -8(AX)(R8*1)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32:
MOVOU (R9), X0
MOVOU -16(R9)(R8*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R8*1)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64:
MOVOU (R9), X0
MOVOU 16(R9), X1
MOVOU -32(R9)(R8*1), X2
MOVOU -16(R9)(R8*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R8*1)
MOVOU X3, -16(AX)(R8*1)
memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K:
MOVQ SI, AX
JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K
memmove_long_repeat_emit_encodeSnappyBlockAsm64K:
LEAQ (AX)(R8*1), SI
// genMemMoveLong
MOVOU (R9), X0
MOVOU 16(R9), X1
MOVOU -32(R9)(R8*1), X2
MOVOU -16(R9)(R8*1), X3
MOVQ R8, R11
SHRQ $0x05, R11
MOVQ AX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R12
SUBQ R10, R12
DECQ R11
JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
LEAQ -32(R9)(R12*1), R10
LEAQ -32(AX)(R12*1), R13
emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R10
ADDQ $0x20, R12
DECQ R11
JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back
emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32:
MOVOU -32(R9)(R12*1), X4
MOVOU -16(R9)(R12*1), X5
MOVOA X4, -32(AX)(R12*1)
MOVOA X5, -16(AX)(R12*1)
ADDQ $0x20, R12
CMPQ R8, R12
JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R8*1)
MOVOU X3, -16(AX)(R8*1)
MOVQ SI, AX
emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K:
ADDL $0x05, CX
MOVL CX, SI
SUBL 16(SP), SI
MOVQ src_len+32(FP), R8
SUBL CX, R8
LEAQ (DX)(CX*1), R9
LEAQ (DX)(SI*1), SI
// matchLen
XORL R11, R11
CMPL R8, $0x08
JL matchlen_single_repeat_extend_encodeSnappyBlockAsm64K
matchlen_loopback_repeat_extend_encodeSnappyBlockAsm64K:
MOVQ (R9)(R11*1), R10
XORQ (SI)(R11*1), R10
TESTQ R10, R10
JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm64K
BSFQ R10, R10
SARQ $0x03, R10
LEAL (R11)(R10*1), R11
JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K
matchlen_loop_repeat_extend_encodeSnappyBlockAsm64K:
LEAL -8(R8), R8
LEAL 8(R11), R11
CMPL R8, $0x08
JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm64K
matchlen_single_repeat_extend_encodeSnappyBlockAsm64K:
TESTL R8, R8
JZ repeat_extend_forward_end_encodeSnappyBlockAsm64K
matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm64K:
MOVB (R9)(R11*1), R10
CMPB (SI)(R11*1), R10
JNE repeat_extend_forward_end_encodeSnappyBlockAsm64K
LEAL 1(R11), R11
DECL R8
JNZ matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm64K
repeat_extend_forward_end_encodeSnappyBlockAsm64K:
ADDL R11, CX
MOVL CX, SI
SUBL DI, SI
MOVL 16(SP), DI
// emitCopy
two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K:
CMPL SI, $0x40
JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K
MOVB $0xee, (AX)
MOVW DI, 1(AX)
LEAL -60(SI), SI
ADDQ $0x03, AX
JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K
two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K:
CMPL SI, $0x0c
JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K
CMPL DI, $0x00000800
JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K
MOVB $0x01, BL
LEAL -16(BX)(SI*4), SI
MOVB DI, 1(AX)
SHRL $0x08, DI
SHLL $0x05, DI
ORL DI, SI
MOVB SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeSnappyBlockAsm64K
emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K:
MOVB $0x02, BL
LEAL -4(BX)(SI*4), SI
MOVB SI, (AX)
MOVW DI, 1(AX)
ADDQ $0x03, AX
repeat_end_emit_encodeSnappyBlockAsm64K:
MOVL CX, 12(SP)
JMP search_loop_encodeSnappyBlockAsm64K
no_repeat_found_encodeSnappyBlockAsm64K:
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeSnappyBlockAsm64K
SHRQ $0x08, DI
MOVL 24(SP)(R10*4), SI
LEAL 2(CX), R9
CMPL (DX)(R8*1), DI
JEQ candidate2_match_encodeSnappyBlockAsm64K
MOVL R9, 24(SP)(R10*4)
SHRQ $0x08, DI
CMPL (DX)(SI*1), DI
JEQ candidate3_match_encodeSnappyBlockAsm64K
MOVL 20(SP), CX
JMP search_loop_encodeSnappyBlockAsm64K
candidate3_match_encodeSnappyBlockAsm64K:
ADDL $0x02, CX
JMP candidate_match_encodeSnappyBlockAsm64K
candidate2_match_encodeSnappyBlockAsm64K:
MOVL R9, 24(SP)(R10*4)
INCL CX
MOVL R8, SI
candidate_match_encodeSnappyBlockAsm64K:
MOVL 12(SP), DI
TESTL SI, SI
JZ match_extend_back_end_encodeSnappyBlockAsm64K
match_extend_back_loop_encodeSnappyBlockAsm64K:
CMPL CX, DI
JLE match_extend_back_end_encodeSnappyBlockAsm64K
MOVB -1(DX)(SI*1), BL
MOVB -1(DX)(CX*1), R8
CMPB BL, R8
JNE match_extend_back_end_encodeSnappyBlockAsm64K
LEAL -1(CX), CX
DECL SI
JZ match_extend_back_end_encodeSnappyBlockAsm64K
JMP match_extend_back_loop_encodeSnappyBlockAsm64K
match_extend_back_end_encodeSnappyBlockAsm64K:
MOVL CX, DI
SUBL 12(SP), DI
LEAQ 3(AX)(DI*1), DI
CMPQ DI, (SP)
JL match_dst_size_check_encodeSnappyBlockAsm64K
MOVQ $0x00000000, ret+48(FP)
RET
match_dst_size_check_encodeSnappyBlockAsm64K:
MOVL CX, DI
MOVL 12(SP), R8
CMPL R8, DI
JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm64K
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(R8*1), DI
SUBL R8, R9
LEAL -1(R9), R8
CMPL R8, $0x3c
JLT one_byte_match_emit_encodeSnappyBlockAsm64K
CMPL R8, $0x00000100
JLT two_bytes_match_emit_encodeSnappyBlockAsm64K
MOVB $0xf4, (AX)
MOVW R8, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_match_emit_encodeSnappyBlockAsm64K
two_bytes_match_emit_encodeSnappyBlockAsm64K:
MOVB $0xf0, (AX)
MOVB R8, 1(AX)
ADDQ $0x02, AX
CMPL R8, $0x40
JL memmove_match_emit_encodeSnappyBlockAsm64K
JMP memmove_long_match_emit_encodeSnappyBlockAsm64K
one_byte_match_emit_encodeSnappyBlockAsm64K:
SHLB $0x02, R8
MOVB R8, (AX)
ADDQ $0x01, AX
memmove_match_emit_encodeSnappyBlockAsm64K:
LEAQ (AX)(R9*1), R8
// genMemMoveShort
CMPQ R9, $0x08
JLE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8
CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64
emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8:
MOVQ (DI), R10
MOVQ R10, (AX)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K
emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16:
MOVQ (DI), R10
MOVQ -8(DI)(R9*1), DI
MOVQ R10, (AX)
MOVQ DI, -8(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K
emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K
emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R9*1), X2
MOVOU -16(DI)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_match_emit_encodeSnappyBlockAsm64K:
MOVQ R8, AX
JMP emit_literal_done_match_emit_encodeSnappyBlockAsm64K
memmove_long_match_emit_encodeSnappyBlockAsm64K:
LEAQ (AX)(R9*1), R8
// genMemMoveLong
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R9*1), X2
MOVOU -16(DI)(R9*1), X3
MOVQ R9, R11
SHRQ $0x05, R11
MOVQ AX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R12
SUBQ R10, R12
DECQ R11
JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
LEAQ -32(DI)(R12*1), R10
LEAQ -32(AX)(R12*1), R13
emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R10
ADDQ $0x20, R12
DECQ R11
JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32:
MOVOU -32(DI)(R12*1), X4
MOVOU -16(DI)(R12*1), X5
MOVOA X4, -32(AX)(R12*1)
MOVOA X5, -16(AX)(R12*1)
ADDQ $0x20, R12
CMPQ R9, R12
JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ R8, AX
emit_literal_done_match_emit_encodeSnappyBlockAsm64K:
match_nolit_loop_encodeSnappyBlockAsm64K:
MOVL CX, DI
SUBL SI, DI
MOVL DI, 16(SP)
ADDL $0x04, CX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL CX, DI
LEAQ (DX)(CX*1), R8
LEAQ (DX)(SI*1), SI
// matchLen
XORL R10, R10
CMPL DI, $0x08
JL matchlen_single_match_nolit_encodeSnappyBlockAsm64K
matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K:
MOVQ (R8)(R10*1), R9
XORQ (SI)(R10*1), R9
TESTQ R9, R9
JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm64K
BSFQ R9, R9
SARQ $0x03, R9
LEAL (R10)(R9*1), R10
JMP match_nolit_end_encodeSnappyBlockAsm64K
matchlen_loop_match_nolit_encodeSnappyBlockAsm64K:
LEAL -8(DI), DI
LEAL 8(R10), R10
CMPL DI, $0x08
JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K
matchlen_single_match_nolit_encodeSnappyBlockAsm64K:
TESTL DI, DI
JZ match_nolit_end_encodeSnappyBlockAsm64K
matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm64K:
MOVB (R8)(R10*1), R9
CMPB (SI)(R10*1), R9
JNE match_nolit_end_encodeSnappyBlockAsm64K
LEAL 1(R10), R10
DECL DI
JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm64K
match_nolit_end_encodeSnappyBlockAsm64K:
ADDL R10, CX
MOVL 16(SP), SI
ADDL $0x04, R10
MOVL CX, 12(SP)
// emitCopy
two_byte_offset_match_nolit_encodeSnappyBlockAsm64K:
CMPL R10, $0x40
JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K
MOVB $0xee, (AX)
MOVW SI, 1(AX)
LEAL -60(R10), R10
ADDQ $0x03, AX
JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm64K
two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K:
CMPL R10, $0x0c
JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K
CMPL SI, $0x00000800
JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K
MOVB $0x01, BL
LEAL -16(BX)(R10*4), R10
MOVB SI, 1(AX)
SHRL $0x08, SI
SHLL $0x05, SI
ORL SI, R10
MOVB R10, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm64K
emit_copy_three_match_nolit_encodeSnappyBlockAsm64K:
MOVB $0x02, BL
LEAL -4(BX)(R10*4), R10
MOVB R10, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
match_nolit_emitcopy_end_encodeSnappyBlockAsm64K:
CMPL CX, 8(SP)
JGE emit_remainder_encodeSnappyBlockAsm64K
MOVQ -2(DX)(CX*1), DI
CMPQ AX, (SP)
JL match_nolit_dst_ok_encodeSnappyBlockAsm64K
MOVQ $0x00000000, ret+48(FP)
RET
match_nolit_dst_ok_encodeSnappyBlockAsm64K:
MOVQ $0x0000cf1bbcdcbf9b, R9
MOVQ DI, R8
SHRQ $0x10, DI
MOVQ DI, SI
SHLQ $0x10, R8
IMULQ R9, R8
SHRQ $0x32, R8
SHLQ $0x10, SI
IMULQ R9, SI
SHRQ $0x32, SI
LEAL -2(CX), R9
LEAQ 24(SP)(SI*4), R10
MOVL (R10), SI
MOVL R9, 24(SP)(R8*4)
MOVL CX, (R10)
CMPL (DX)(SI*1), DI
JEQ match_nolit_loop_encodeSnappyBlockAsm64K
INCL CX
JMP search_loop_encodeSnappyBlockAsm64K
emit_remainder_encodeSnappyBlockAsm64K:
MOVQ src_len+32(FP), CX
SUBL 12(SP), CX
LEAQ 3(AX)(CX*1), CX
CMPQ CX, (SP)
JL emit_remainder_ok_encodeSnappyBlockAsm64K
MOVQ $0x00000000, ret+48(FP)
RET
emit_remainder_ok_encodeSnappyBlockAsm64K:
MOVQ src_len+32(FP), CX
MOVL 12(SP), BX
CMPL BX, CX
JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K
MOVL CX, SI
MOVL CX, 12(SP)
LEAQ (DX)(BX*1), CX
SUBL BX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JLT one_byte_emit_remainder_encodeSnappyBlockAsm64K
CMPL DX, $0x00000100
JLT two_bytes_emit_remainder_encodeSnappyBlockAsm64K
MOVB $0xf4, (AX)
MOVW DX, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_emit_remainder_encodeSnappyBlockAsm64K
two_bytes_emit_remainder_encodeSnappyBlockAsm64K:
MOVB $0xf0, (AX)
MOVB DL, 1(AX)
ADDQ $0x02, AX
CMPL DX, $0x40
JL memmove_emit_remainder_encodeSnappyBlockAsm64K
JMP memmove_long_emit_remainder_encodeSnappyBlockAsm64K
one_byte_emit_remainder_encodeSnappyBlockAsm64K:
SHLB $0x02, DL
MOVB DL, (AX)
ADDQ $0x01, AX
memmove_emit_remainder_encodeSnappyBlockAsm64K:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
CMPQ BX, $0x08
JLE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8:
MOVQ (CX), SI
MOVQ SI, (AX)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8through16:
MOVQ (CX), SI
MOVQ -8(CX)(BX*1), CX
MOVQ SI, (AX)
MOVQ CX, -8(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_17through32:
MOVOU (CX), X0
MOVOU -16(CX)(BX*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K:
MOVQ DX, AX
JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K
memmove_long_emit_remainder_encodeSnappyBlockAsm64K:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
MOVQ AX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
LEAQ -32(CX)(R8*1), SI
LEAQ -32(AX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32:
MOVOU -32(CX)(R8*1), X4
MOVOU -16(CX)(R8*1), X5
MOVOA X4, -32(AX)(R8*1)
MOVOA X5, -16(AX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
MOVQ DX, AX
emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K:
MOVQ dst_base+0(FP), CX
SUBQ CX, AX
MOVQ AX, ret+48(FP)
RET
// func encodeSnappyBlockAsm12B(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeSnappyBlockAsm12B(SB), $16408-56
MOVQ dst_base+0(FP), AX
MOVQ $0x00000080, CX
LEAQ 24(SP), DX
PXOR X0, X0
zero_loop_encodeSnappyBlockAsm12B:
MOVOU X0, (DX)
MOVOU X0, 16(DX)
MOVOU X0, 32(DX)
MOVOU X0, 48(DX)
MOVOU X0, 64(DX)
MOVOU X0, 80(DX)
MOVOU X0, 96(DX)
MOVOU X0, 112(DX)
ADDQ $0x80, DX
DECQ CX
JNZ zero_loop_encodeSnappyBlockAsm12B
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), CX
LEAQ -9(CX), DX
LEAQ -8(CX), SI
MOVL SI, 8(SP)
SHRQ $0x05, CX
SUBL CX, DX
LEAQ (AX)(DX*1), DX
MOVQ DX, (SP)
MOVL $0x00000001, CX
MOVL CX, 16(SP)
MOVQ src_base+24(FP), DX
search_loop_encodeSnappyBlockAsm12B:
MOVL CX, SI
SUBL 12(SP), SI
SHRL $0x05, SI
LEAL 4(CX)(SI*1), SI
CMPL SI, 8(SP)
JGE emit_remainder_encodeSnappyBlockAsm12B
MOVQ (DX)(CX*1), DI
MOVL SI, 20(SP)
MOVQ $0x000000cf1bbcdcbb, R9
MOVQ DI, R10
MOVQ DI, R11
SHRQ $0x08, R11
SHLQ $0x18, R10
IMULQ R9, R10
SHRQ $0x34, R10
SHLQ $0x18, R11
IMULQ R9, R11
SHRQ $0x34, R11
MOVL 24(SP)(R10*4), SI
MOVL 24(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
LEAL 1(CX), R10
MOVL R10, 24(SP)(R11*4)
MOVQ DI, R10
SHRQ $0x10, R10
SHLQ $0x18, R10
IMULQ R9, R10
SHRQ $0x34, R10
MOVL CX, R9
SUBL 16(SP), R9
MOVL 1(DX)(R9*1), R11
MOVQ DI, R9
SHRQ $0x08, R9
CMPL R9, R11
JNE no_repeat_found_encodeSnappyBlockAsm12B
LEAL 1(CX), DI
MOVL 12(SP), SI
MOVL DI, R8
SUBL 16(SP), R8
JZ repeat_extend_back_end_encodeSnappyBlockAsm12B
repeat_extend_back_loop_encodeSnappyBlockAsm12B:
CMPL DI, SI
JLE repeat_extend_back_end_encodeSnappyBlockAsm12B
MOVB -1(DX)(R8*1), BL
MOVB -1(DX)(DI*1), R9
CMPB BL, R9
JNE repeat_extend_back_end_encodeSnappyBlockAsm12B
LEAL -1(DI), DI
DECL R8
JNZ repeat_extend_back_loop_encodeSnappyBlockAsm12B
repeat_extend_back_end_encodeSnappyBlockAsm12B:
MOVL 12(SP), SI
CMPL SI, DI
JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B
MOVL DI, R8
MOVL DI, 12(SP)
LEAQ (DX)(SI*1), R9
SUBL SI, R8
LEAL -1(R8), SI
CMPL SI, $0x3c
JLT one_byte_repeat_emit_encodeSnappyBlockAsm12B
CMPL SI, $0x00000100
JLT two_bytes_repeat_emit_encodeSnappyBlockAsm12B
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B
two_bytes_repeat_emit_encodeSnappyBlockAsm12B:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, AX
CMPL SI, $0x40
JL memmove_repeat_emit_encodeSnappyBlockAsm12B
JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B
one_byte_repeat_emit_encodeSnappyBlockAsm12B:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, AX
memmove_repeat_emit_encodeSnappyBlockAsm12B:
LEAQ (AX)(R8*1), SI
// genMemMoveShort
CMPQ R8, $0x08
JLE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8
CMPQ R8, $0x10
JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16
CMPQ R8, $0x20
JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8:
MOVQ (R9), R10
MOVQ R10, (AX)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16:
MOVQ (R9), R10
MOVQ -8(R9)(R8*1), R9
MOVQ R10, (AX)
MOVQ R9, -8(AX)(R8*1)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32:
MOVOU (R9), X0
MOVOU -16(R9)(R8*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R8*1)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64:
MOVOU (R9), X0
MOVOU 16(R9), X1
MOVOU -32(R9)(R8*1), X2
MOVOU -16(R9)(R8*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R8*1)
MOVOU X3, -16(AX)(R8*1)
memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B:
MOVQ SI, AX
JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B
memmove_long_repeat_emit_encodeSnappyBlockAsm12B:
LEAQ (AX)(R8*1), SI
// genMemMoveLong
MOVOU (R9), X0
MOVOU 16(R9), X1
MOVOU -32(R9)(R8*1), X2
MOVOU -16(R9)(R8*1), X3
MOVQ R8, R11
SHRQ $0x05, R11
MOVQ AX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R12
SUBQ R10, R12
DECQ R11
JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
LEAQ -32(R9)(R12*1), R10
LEAQ -32(AX)(R12*1), R13
emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R10
ADDQ $0x20, R12
DECQ R11
JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back
emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
MOVOU -32(R9)(R12*1), X4
MOVOU -16(R9)(R12*1), X5
MOVOA X4, -32(AX)(R12*1)
MOVOA X5, -16(AX)(R12*1)
ADDQ $0x20, R12
CMPQ R8, R12
JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R8*1)
MOVOU X3, -16(AX)(R8*1)
MOVQ SI, AX
emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B:
ADDL $0x05, CX
MOVL CX, SI
SUBL 16(SP), SI
MOVQ src_len+32(FP), R8
SUBL CX, R8
LEAQ (DX)(CX*1), R9
LEAQ (DX)(SI*1), SI
// matchLen
XORL R11, R11
CMPL R8, $0x08
JL matchlen_single_repeat_extend_encodeSnappyBlockAsm12B
matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B:
MOVQ (R9)(R11*1), R10
XORQ (SI)(R11*1), R10
TESTQ R10, R10
JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm12B
BSFQ R10, R10
SARQ $0x03, R10
LEAL (R11)(R10*1), R11
JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B
matchlen_loop_repeat_extend_encodeSnappyBlockAsm12B:
LEAL -8(R8), R8
LEAL 8(R11), R11
CMPL R8, $0x08
JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B
matchlen_single_repeat_extend_encodeSnappyBlockAsm12B:
TESTL R8, R8
JZ repeat_extend_forward_end_encodeSnappyBlockAsm12B
matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm12B:
MOVB (R9)(R11*1), R10
CMPB (SI)(R11*1), R10
JNE repeat_extend_forward_end_encodeSnappyBlockAsm12B
LEAL 1(R11), R11
DECL R8
JNZ matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm12B
repeat_extend_forward_end_encodeSnappyBlockAsm12B:
ADDL R11, CX
MOVL CX, SI
SUBL DI, SI
MOVL 16(SP), DI
// emitCopy
two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B:
CMPL SI, $0x40
JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B
MOVB $0xee, (AX)
MOVW DI, 1(AX)
LEAL -60(SI), SI
ADDQ $0x03, AX
JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B
two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B:
CMPL SI, $0x0c
JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B
CMPL DI, $0x00000800
JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B
MOVB $0x01, BL
LEAL -16(BX)(SI*4), SI
MOVB DI, 1(AX)
SHRL $0x08, DI
SHLL $0x05, DI
ORL DI, SI
MOVB SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeSnappyBlockAsm12B
emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B:
MOVB $0x02, BL
LEAL -4(BX)(SI*4), SI
MOVB SI, (AX)
MOVW DI, 1(AX)
ADDQ $0x03, AX
repeat_end_emit_encodeSnappyBlockAsm12B:
MOVL CX, 12(SP)
JMP search_loop_encodeSnappyBlockAsm12B
no_repeat_found_encodeSnappyBlockAsm12B:
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeSnappyBlockAsm12B
SHRQ $0x08, DI
MOVL 24(SP)(R10*4), SI
LEAL 2(CX), R9
CMPL (DX)(R8*1), DI
JEQ candidate2_match_encodeSnappyBlockAsm12B
MOVL R9, 24(SP)(R10*4)
SHRQ $0x08, DI
CMPL (DX)(SI*1), DI
JEQ candidate3_match_encodeSnappyBlockAsm12B
MOVL 20(SP), CX
JMP search_loop_encodeSnappyBlockAsm12B
candidate3_match_encodeSnappyBlockAsm12B:
ADDL $0x02, CX
JMP candidate_match_encodeSnappyBlockAsm12B
candidate2_match_encodeSnappyBlockAsm12B:
MOVL R9, 24(SP)(R10*4)
INCL CX
MOVL R8, SI
candidate_match_encodeSnappyBlockAsm12B:
MOVL 12(SP), DI
TESTL SI, SI
JZ match_extend_back_end_encodeSnappyBlockAsm12B
match_extend_back_loop_encodeSnappyBlockAsm12B:
CMPL CX, DI
JLE match_extend_back_end_encodeSnappyBlockAsm12B
MOVB -1(DX)(SI*1), BL
MOVB -1(DX)(CX*1), R8
CMPB BL, R8
JNE match_extend_back_end_encodeSnappyBlockAsm12B
LEAL -1(CX), CX
DECL SI
JZ match_extend_back_end_encodeSnappyBlockAsm12B
JMP match_extend_back_loop_encodeSnappyBlockAsm12B
match_extend_back_end_encodeSnappyBlockAsm12B:
MOVL CX, DI
SUBL 12(SP), DI
LEAQ 3(AX)(DI*1), DI
CMPQ DI, (SP)
JL match_dst_size_check_encodeSnappyBlockAsm12B
MOVQ $0x00000000, ret+48(FP)
RET
match_dst_size_check_encodeSnappyBlockAsm12B:
MOVL CX, DI
MOVL 12(SP), R8
CMPL R8, DI
JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm12B
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(R8*1), DI
SUBL R8, R9
LEAL -1(R9), R8
CMPL R8, $0x3c
JLT one_byte_match_emit_encodeSnappyBlockAsm12B
CMPL R8, $0x00000100
JLT two_bytes_match_emit_encodeSnappyBlockAsm12B
MOVB $0xf4, (AX)
MOVW R8, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_match_emit_encodeSnappyBlockAsm12B
two_bytes_match_emit_encodeSnappyBlockAsm12B:
MOVB $0xf0, (AX)
MOVB R8, 1(AX)
ADDQ $0x02, AX
CMPL R8, $0x40
JL memmove_match_emit_encodeSnappyBlockAsm12B
JMP memmove_long_match_emit_encodeSnappyBlockAsm12B
one_byte_match_emit_encodeSnappyBlockAsm12B:
SHLB $0x02, R8
MOVB R8, (AX)
ADDQ $0x01, AX
memmove_match_emit_encodeSnappyBlockAsm12B:
LEAQ (AX)(R9*1), R8
// genMemMoveShort
CMPQ R9, $0x08
JLE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8
CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64
emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8:
MOVQ (DI), R10
MOVQ R10, (AX)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16:
MOVQ (DI), R10
MOVQ -8(DI)(R9*1), DI
MOVQ R10, (AX)
MOVQ DI, -8(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R9*1), X2
MOVOU -16(DI)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_match_emit_encodeSnappyBlockAsm12B:
MOVQ R8, AX
JMP emit_literal_done_match_emit_encodeSnappyBlockAsm12B
memmove_long_match_emit_encodeSnappyBlockAsm12B:
LEAQ (AX)(R9*1), R8
// genMemMoveLong
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R9*1), X2
MOVOU -16(DI)(R9*1), X3
MOVQ R9, R11
SHRQ $0x05, R11
MOVQ AX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R12
SUBQ R10, R12
DECQ R11
JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
LEAQ -32(DI)(R12*1), R10
LEAQ -32(AX)(R12*1), R13
emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R10
ADDQ $0x20, R12
DECQ R11
JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
MOVOU -32(DI)(R12*1), X4
MOVOU -16(DI)(R12*1), X5
MOVOA X4, -32(AX)(R12*1)
MOVOA X5, -16(AX)(R12*1)
ADDQ $0x20, R12
CMPQ R9, R12
JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ R8, AX
emit_literal_done_match_emit_encodeSnappyBlockAsm12B:
match_nolit_loop_encodeSnappyBlockAsm12B:
MOVL CX, DI
SUBL SI, DI
MOVL DI, 16(SP)
ADDL $0x04, CX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL CX, DI
LEAQ (DX)(CX*1), R8
LEAQ (DX)(SI*1), SI
// matchLen
XORL R10, R10
CMPL DI, $0x08
JL matchlen_single_match_nolit_encodeSnappyBlockAsm12B
matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B:
MOVQ (R8)(R10*1), R9
XORQ (SI)(R10*1), R9
TESTQ R9, R9
JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm12B
BSFQ R9, R9
SARQ $0x03, R9
LEAL (R10)(R9*1), R10
JMP match_nolit_end_encodeSnappyBlockAsm12B
matchlen_loop_match_nolit_encodeSnappyBlockAsm12B:
LEAL -8(DI), DI
LEAL 8(R10), R10
CMPL DI, $0x08
JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B
matchlen_single_match_nolit_encodeSnappyBlockAsm12B:
TESTL DI, DI
JZ match_nolit_end_encodeSnappyBlockAsm12B
matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm12B:
MOVB (R8)(R10*1), R9
CMPB (SI)(R10*1), R9
JNE match_nolit_end_encodeSnappyBlockAsm12B
LEAL 1(R10), R10
DECL DI
JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm12B
match_nolit_end_encodeSnappyBlockAsm12B:
ADDL R10, CX
MOVL 16(SP), SI
ADDL $0x04, R10
MOVL CX, 12(SP)
// emitCopy
two_byte_offset_match_nolit_encodeSnappyBlockAsm12B:
CMPL R10, $0x40
JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B
MOVB $0xee, (AX)
MOVW SI, 1(AX)
LEAL -60(R10), R10
ADDQ $0x03, AX
JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm12B
two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B:
CMPL R10, $0x0c
JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B
CMPL SI, $0x00000800
JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B
MOVB $0x01, BL
LEAL -16(BX)(R10*4), R10
MOVB SI, 1(AX)
SHRL $0x08, SI
SHLL $0x05, SI
ORL SI, R10
MOVB R10, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm12B
emit_copy_three_match_nolit_encodeSnappyBlockAsm12B:
MOVB $0x02, BL
LEAL -4(BX)(R10*4), R10
MOVB R10, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
match_nolit_emitcopy_end_encodeSnappyBlockAsm12B:
CMPL CX, 8(SP)
JGE emit_remainder_encodeSnappyBlockAsm12B
MOVQ -2(DX)(CX*1), DI
CMPQ AX, (SP)
JL match_nolit_dst_ok_encodeSnappyBlockAsm12B
MOVQ $0x00000000, ret+48(FP)
RET
match_nolit_dst_ok_encodeSnappyBlockAsm12B:
MOVQ $0x000000cf1bbcdcbb, R9
MOVQ DI, R8
SHRQ $0x10, DI
MOVQ DI, SI
SHLQ $0x18, R8
IMULQ R9, R8
SHRQ $0x34, R8
SHLQ $0x18, SI
IMULQ R9, SI
SHRQ $0x34, SI
LEAL -2(CX), R9
LEAQ 24(SP)(SI*4), R10
MOVL (R10), SI
MOVL R9, 24(SP)(R8*4)
MOVL CX, (R10)
CMPL (DX)(SI*1), DI
JEQ match_nolit_loop_encodeSnappyBlockAsm12B
INCL CX
JMP search_loop_encodeSnappyBlockAsm12B
emit_remainder_encodeSnappyBlockAsm12B:
MOVQ src_len+32(FP), CX
SUBL 12(SP), CX
LEAQ 3(AX)(CX*1), CX
CMPQ CX, (SP)
JL emit_remainder_ok_encodeSnappyBlockAsm12B
MOVQ $0x00000000, ret+48(FP)
RET
emit_remainder_ok_encodeSnappyBlockAsm12B:
MOVQ src_len+32(FP), CX
MOVL 12(SP), BX
CMPL BX, CX
JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B
MOVL CX, SI
MOVL CX, 12(SP)
LEAQ (DX)(BX*1), CX
SUBL BX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JLT one_byte_emit_remainder_encodeSnappyBlockAsm12B
CMPL DX, $0x00000100
JLT two_bytes_emit_remainder_encodeSnappyBlockAsm12B
MOVB $0xf4, (AX)
MOVW DX, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B
two_bytes_emit_remainder_encodeSnappyBlockAsm12B:
MOVB $0xf0, (AX)
MOVB DL, 1(AX)
ADDQ $0x02, AX
CMPL DX, $0x40
JL memmove_emit_remainder_encodeSnappyBlockAsm12B
JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B
one_byte_emit_remainder_encodeSnappyBlockAsm12B:
SHLB $0x02, DL
MOVB DL, (AX)
ADDQ $0x01, AX
memmove_emit_remainder_encodeSnappyBlockAsm12B:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
CMPQ BX, $0x08
JLE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8:
MOVQ (CX), SI
MOVQ SI, (AX)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16:
MOVQ (CX), SI
MOVQ -8(CX)(BX*1), CX
MOVQ SI, (AX)
MOVQ CX, -8(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32:
MOVOU (CX), X0
MOVOU -16(CX)(BX*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B:
MOVQ DX, AX
JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B
memmove_long_emit_remainder_encodeSnappyBlockAsm12B:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
MOVQ AX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
LEAQ -32(CX)(R8*1), SI
LEAQ -32(AX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
MOVOU -32(CX)(R8*1), X4
MOVOU -16(CX)(R8*1), X5
MOVOA X4, -32(AX)(R8*1)
MOVOA X5, -16(AX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
MOVQ DX, AX
emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B:
MOVQ dst_base+0(FP), CX
SUBQ CX, AX
MOVQ AX, ret+48(FP)
RET
// func encodeSnappyBlockAsm10B(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeSnappyBlockAsm10B(SB), $4120-56
MOVQ dst_base+0(FP), AX
MOVQ $0x00000020, CX
LEAQ 24(SP), DX
PXOR X0, X0
zero_loop_encodeSnappyBlockAsm10B:
MOVOU X0, (DX)
MOVOU X0, 16(DX)
MOVOU X0, 32(DX)
MOVOU X0, 48(DX)
MOVOU X0, 64(DX)
MOVOU X0, 80(DX)
MOVOU X0, 96(DX)
MOVOU X0, 112(DX)
ADDQ $0x80, DX
DECQ CX
JNZ zero_loop_encodeSnappyBlockAsm10B
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), CX
LEAQ -9(CX), DX
LEAQ -8(CX), SI
MOVL SI, 8(SP)
SHRQ $0x05, CX
SUBL CX, DX
LEAQ (AX)(DX*1), DX
MOVQ DX, (SP)
MOVL $0x00000001, CX
MOVL CX, 16(SP)
MOVQ src_base+24(FP), DX
search_loop_encodeSnappyBlockAsm10B:
MOVL CX, SI
SUBL 12(SP), SI
SHRL $0x05, SI
LEAL 4(CX)(SI*1), SI
CMPL SI, 8(SP)
JGE emit_remainder_encodeSnappyBlockAsm10B
MOVQ (DX)(CX*1), DI
MOVL SI, 20(SP)
MOVQ $0x9e3779b1, R9
MOVQ DI, R10
MOVQ DI, R11
SHRQ $0x08, R11
SHLQ $0x20, R10
IMULQ R9, R10
SHRQ $0x36, R10
SHLQ $0x20, R11
IMULQ R9, R11
SHRQ $0x36, R11
MOVL 24(SP)(R10*4), SI
MOVL 24(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
LEAL 1(CX), R10
MOVL R10, 24(SP)(R11*4)
MOVQ DI, R10
SHRQ $0x10, R10
SHLQ $0x20, R10
IMULQ R9, R10
SHRQ $0x36, R10
MOVL CX, R9
SUBL 16(SP), R9
MOVL 1(DX)(R9*1), R11
MOVQ DI, R9
SHRQ $0x08, R9
CMPL R9, R11
JNE no_repeat_found_encodeSnappyBlockAsm10B
LEAL 1(CX), DI
MOVL 12(SP), SI
MOVL DI, R8
SUBL 16(SP), R8
JZ repeat_extend_back_end_encodeSnappyBlockAsm10B
repeat_extend_back_loop_encodeSnappyBlockAsm10B:
CMPL DI, SI
JLE repeat_extend_back_end_encodeSnappyBlockAsm10B
MOVB -1(DX)(R8*1), BL
MOVB -1(DX)(DI*1), R9
CMPB BL, R9
JNE repeat_extend_back_end_encodeSnappyBlockAsm10B
LEAL -1(DI), DI
DECL R8
JNZ repeat_extend_back_loop_encodeSnappyBlockAsm10B
repeat_extend_back_end_encodeSnappyBlockAsm10B:
MOVL 12(SP), SI
CMPL SI, DI
JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B
MOVL DI, R8
MOVL DI, 12(SP)
LEAQ (DX)(SI*1), R9
SUBL SI, R8
LEAL -1(R8), SI
CMPL SI, $0x3c
JLT one_byte_repeat_emit_encodeSnappyBlockAsm10B
CMPL SI, $0x00000100
JLT two_bytes_repeat_emit_encodeSnappyBlockAsm10B
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B
two_bytes_repeat_emit_encodeSnappyBlockAsm10B:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, AX
CMPL SI, $0x40
JL memmove_repeat_emit_encodeSnappyBlockAsm10B
JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B
one_byte_repeat_emit_encodeSnappyBlockAsm10B:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, AX
memmove_repeat_emit_encodeSnappyBlockAsm10B:
LEAQ (AX)(R8*1), SI
// genMemMoveShort
CMPQ R8, $0x08
JLE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8
CMPQ R8, $0x10
JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16
CMPQ R8, $0x20
JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8:
MOVQ (R9), R10
MOVQ R10, (AX)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16:
MOVQ (R9), R10
MOVQ -8(R9)(R8*1), R9
MOVQ R10, (AX)
MOVQ R9, -8(AX)(R8*1)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32:
MOVOU (R9), X0
MOVOU -16(R9)(R8*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R8*1)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64:
MOVOU (R9), X0
MOVOU 16(R9), X1
MOVOU -32(R9)(R8*1), X2
MOVOU -16(R9)(R8*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R8*1)
MOVOU X3, -16(AX)(R8*1)
memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B:
MOVQ SI, AX
JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B
memmove_long_repeat_emit_encodeSnappyBlockAsm10B:
LEAQ (AX)(R8*1), SI
// genMemMoveLong
MOVOU (R9), X0
MOVOU 16(R9), X1
MOVOU -32(R9)(R8*1), X2
MOVOU -16(R9)(R8*1), X3
MOVQ R8, R11
SHRQ $0x05, R11
MOVQ AX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R12
SUBQ R10, R12
DECQ R11
JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
LEAQ -32(R9)(R12*1), R10
LEAQ -32(AX)(R12*1), R13
emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R10
ADDQ $0x20, R12
DECQ R11
JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back
emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
MOVOU -32(R9)(R12*1), X4
MOVOU -16(R9)(R12*1), X5
MOVOA X4, -32(AX)(R12*1)
MOVOA X5, -16(AX)(R12*1)
ADDQ $0x20, R12
CMPQ R8, R12
JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R8*1)
MOVOU X3, -16(AX)(R8*1)
MOVQ SI, AX
emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B:
ADDL $0x05, CX
MOVL CX, SI
SUBL 16(SP), SI
MOVQ src_len+32(FP), R8
SUBL CX, R8
LEAQ (DX)(CX*1), R9
LEAQ (DX)(SI*1), SI
// matchLen
XORL R11, R11
CMPL R8, $0x08
JL matchlen_single_repeat_extend_encodeSnappyBlockAsm10B
matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B:
MOVQ (R9)(R11*1), R10
XORQ (SI)(R11*1), R10
TESTQ R10, R10
JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm10B
BSFQ R10, R10
SARQ $0x03, R10
LEAL (R11)(R10*1), R11
JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B
matchlen_loop_repeat_extend_encodeSnappyBlockAsm10B:
LEAL -8(R8), R8
LEAL 8(R11), R11
CMPL R8, $0x08
JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B
matchlen_single_repeat_extend_encodeSnappyBlockAsm10B:
TESTL R8, R8
JZ repeat_extend_forward_end_encodeSnappyBlockAsm10B
matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm10B:
MOVB (R9)(R11*1), R10
CMPB (SI)(R11*1), R10
JNE repeat_extend_forward_end_encodeSnappyBlockAsm10B
LEAL 1(R11), R11
DECL R8
JNZ matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm10B
repeat_extend_forward_end_encodeSnappyBlockAsm10B:
ADDL R11, CX
MOVL CX, SI
SUBL DI, SI
MOVL 16(SP), DI
// emitCopy
two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B:
CMPL SI, $0x40
JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B
MOVB $0xee, (AX)
MOVW DI, 1(AX)
LEAL -60(SI), SI
ADDQ $0x03, AX
JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B
two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B:
CMPL SI, $0x0c
JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B
CMPL DI, $0x00000800
JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B
MOVB $0x01, BL
LEAL -16(BX)(SI*4), SI
MOVB DI, 1(AX)
SHRL $0x08, DI
SHLL $0x05, DI
ORL DI, SI
MOVB SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeSnappyBlockAsm10B
emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B:
MOVB $0x02, BL
LEAL -4(BX)(SI*4), SI
MOVB SI, (AX)
MOVW DI, 1(AX)
ADDQ $0x03, AX
repeat_end_emit_encodeSnappyBlockAsm10B:
MOVL CX, 12(SP)
JMP search_loop_encodeSnappyBlockAsm10B
no_repeat_found_encodeSnappyBlockAsm10B:
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeSnappyBlockAsm10B
SHRQ $0x08, DI
MOVL 24(SP)(R10*4), SI
LEAL 2(CX), R9
CMPL (DX)(R8*1), DI
JEQ candidate2_match_encodeSnappyBlockAsm10B
MOVL R9, 24(SP)(R10*4)
SHRQ $0x08, DI
CMPL (DX)(SI*1), DI
JEQ candidate3_match_encodeSnappyBlockAsm10B
MOVL 20(SP), CX
JMP search_loop_encodeSnappyBlockAsm10B
candidate3_match_encodeSnappyBlockAsm10B:
ADDL $0x02, CX
JMP candidate_match_encodeSnappyBlockAsm10B
candidate2_match_encodeSnappyBlockAsm10B:
MOVL R9, 24(SP)(R10*4)
INCL CX
MOVL R8, SI
candidate_match_encodeSnappyBlockAsm10B:
MOVL 12(SP), DI
TESTL SI, SI
JZ match_extend_back_end_encodeSnappyBlockAsm10B
match_extend_back_loop_encodeSnappyBlockAsm10B:
CMPL CX, DI
JLE match_extend_back_end_encodeSnappyBlockAsm10B
MOVB -1(DX)(SI*1), BL
MOVB -1(DX)(CX*1), R8
CMPB BL, R8
JNE match_extend_back_end_encodeSnappyBlockAsm10B
LEAL -1(CX), CX
DECL SI
JZ match_extend_back_end_encodeSnappyBlockAsm10B
JMP match_extend_back_loop_encodeSnappyBlockAsm10B
match_extend_back_end_encodeSnappyBlockAsm10B:
MOVL CX, DI
SUBL 12(SP), DI
LEAQ 3(AX)(DI*1), DI
CMPQ DI, (SP)
JL match_dst_size_check_encodeSnappyBlockAsm10B
MOVQ $0x00000000, ret+48(FP)
RET
match_dst_size_check_encodeSnappyBlockAsm10B:
MOVL CX, DI
MOVL 12(SP), R8
CMPL R8, DI
JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm10B
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(R8*1), DI
SUBL R8, R9
LEAL -1(R9), R8
CMPL R8, $0x3c
JLT one_byte_match_emit_encodeSnappyBlockAsm10B
CMPL R8, $0x00000100
JLT two_bytes_match_emit_encodeSnappyBlockAsm10B
MOVB $0xf4, (AX)
MOVW R8, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_match_emit_encodeSnappyBlockAsm10B
two_bytes_match_emit_encodeSnappyBlockAsm10B:
MOVB $0xf0, (AX)
MOVB R8, 1(AX)
ADDQ $0x02, AX
CMPL R8, $0x40
JL memmove_match_emit_encodeSnappyBlockAsm10B
JMP memmove_long_match_emit_encodeSnappyBlockAsm10B
one_byte_match_emit_encodeSnappyBlockAsm10B:
SHLB $0x02, R8
MOVB R8, (AX)
ADDQ $0x01, AX
memmove_match_emit_encodeSnappyBlockAsm10B:
LEAQ (AX)(R9*1), R8
// genMemMoveShort
CMPQ R9, $0x08
JLE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8
CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64
emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8:
MOVQ (DI), R10
MOVQ R10, (AX)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16:
MOVQ (DI), R10
MOVQ -8(DI)(R9*1), DI
MOVQ R10, (AX)
MOVQ DI, -8(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R9*1), X2
MOVOU -16(DI)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_match_emit_encodeSnappyBlockAsm10B:
MOVQ R8, AX
JMP emit_literal_done_match_emit_encodeSnappyBlockAsm10B
memmove_long_match_emit_encodeSnappyBlockAsm10B:
LEAQ (AX)(R9*1), R8
// genMemMoveLong
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R9*1), X2
MOVOU -16(DI)(R9*1), X3
MOVQ R9, R11
SHRQ $0x05, R11
MOVQ AX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R12
SUBQ R10, R12
DECQ R11
JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
LEAQ -32(DI)(R12*1), R10
LEAQ -32(AX)(R12*1), R13
emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R10
ADDQ $0x20, R12
DECQ R11
JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
MOVOU -32(DI)(R12*1), X4
MOVOU -16(DI)(R12*1), X5
MOVOA X4, -32(AX)(R12*1)
MOVOA X5, -16(AX)(R12*1)
ADDQ $0x20, R12
CMPQ R9, R12
JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ R8, AX
emit_literal_done_match_emit_encodeSnappyBlockAsm10B:
match_nolit_loop_encodeSnappyBlockAsm10B:
MOVL CX, DI
SUBL SI, DI
MOVL DI, 16(SP)
ADDL $0x04, CX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL CX, DI
LEAQ (DX)(CX*1), R8
LEAQ (DX)(SI*1), SI
// matchLen
XORL R10, R10
CMPL DI, $0x08
JL matchlen_single_match_nolit_encodeSnappyBlockAsm10B
matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B:
MOVQ (R8)(R10*1), R9
XORQ (SI)(R10*1), R9
TESTQ R9, R9
JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm10B
BSFQ R9, R9
SARQ $0x03, R9
LEAL (R10)(R9*1), R10
JMP match_nolit_end_encodeSnappyBlockAsm10B
matchlen_loop_match_nolit_encodeSnappyBlockAsm10B:
LEAL -8(DI), DI
LEAL 8(R10), R10
CMPL DI, $0x08
JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B
matchlen_single_match_nolit_encodeSnappyBlockAsm10B:
TESTL DI, DI
JZ match_nolit_end_encodeSnappyBlockAsm10B
matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm10B:
MOVB (R8)(R10*1), R9
CMPB (SI)(R10*1), R9
JNE match_nolit_end_encodeSnappyBlockAsm10B
LEAL 1(R10), R10
DECL DI
JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm10B
match_nolit_end_encodeSnappyBlockAsm10B:
ADDL R10, CX
MOVL 16(SP), SI
ADDL $0x04, R10
MOVL CX, 12(SP)
// emitCopy
two_byte_offset_match_nolit_encodeSnappyBlockAsm10B:
CMPL R10, $0x40
JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B
MOVB $0xee, (AX)
MOVW SI, 1(AX)
LEAL -60(R10), R10
ADDQ $0x03, AX
JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm10B
two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B:
CMPL R10, $0x0c
JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B
CMPL SI, $0x00000800
JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B
MOVB $0x01, BL
LEAL -16(BX)(R10*4), R10
MOVB SI, 1(AX)
SHRL $0x08, SI
SHLL $0x05, SI
ORL SI, R10
MOVB R10, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm10B
emit_copy_three_match_nolit_encodeSnappyBlockAsm10B:
MOVB $0x02, BL
LEAL -4(BX)(R10*4), R10
MOVB R10, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
match_nolit_emitcopy_end_encodeSnappyBlockAsm10B:
CMPL CX, 8(SP)
JGE emit_remainder_encodeSnappyBlockAsm10B
MOVQ -2(DX)(CX*1), DI
CMPQ AX, (SP)
JL match_nolit_dst_ok_encodeSnappyBlockAsm10B
MOVQ $0x00000000, ret+48(FP)
RET
match_nolit_dst_ok_encodeSnappyBlockAsm10B:
MOVQ $0x9e3779b1, R9
MOVQ DI, R8
SHRQ $0x10, DI
MOVQ DI, SI
SHLQ $0x20, R8
IMULQ R9, R8
SHRQ $0x36, R8
SHLQ $0x20, SI
IMULQ R9, SI
SHRQ $0x36, SI
LEAL -2(CX), R9
LEAQ 24(SP)(SI*4), R10
MOVL (R10), SI
MOVL R9, 24(SP)(R8*4)
MOVL CX, (R10)
CMPL (DX)(SI*1), DI
JEQ match_nolit_loop_encodeSnappyBlockAsm10B
INCL CX
JMP search_loop_encodeSnappyBlockAsm10B
emit_remainder_encodeSnappyBlockAsm10B:
MOVQ src_len+32(FP), CX
SUBL 12(SP), CX
LEAQ 3(AX)(CX*1), CX
CMPQ CX, (SP)
JL emit_remainder_ok_encodeSnappyBlockAsm10B
MOVQ $0x00000000, ret+48(FP)
RET
emit_remainder_ok_encodeSnappyBlockAsm10B:
MOVQ src_len+32(FP), CX
MOVL 12(SP), BX
CMPL BX, CX
JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B
MOVL CX, SI
MOVL CX, 12(SP)
LEAQ (DX)(BX*1), CX
SUBL BX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JLT one_byte_emit_remainder_encodeSnappyBlockAsm10B
CMPL DX, $0x00000100
JLT two_bytes_emit_remainder_encodeSnappyBlockAsm10B
MOVB $0xf4, (AX)
MOVW DX, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B
two_bytes_emit_remainder_encodeSnappyBlockAsm10B:
MOVB $0xf0, (AX)
MOVB DL, 1(AX)
ADDQ $0x02, AX
CMPL DX, $0x40
JL memmove_emit_remainder_encodeSnappyBlockAsm10B
JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B
one_byte_emit_remainder_encodeSnappyBlockAsm10B:
SHLB $0x02, DL
MOVB DL, (AX)
ADDQ $0x01, AX
memmove_emit_remainder_encodeSnappyBlockAsm10B:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
CMPQ BX, $0x08
JLE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8:
MOVQ (CX), SI
MOVQ SI, (AX)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16:
MOVQ (CX), SI
MOVQ -8(CX)(BX*1), CX
MOVQ SI, (AX)
MOVQ CX, -8(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32:
MOVOU (CX), X0
MOVOU -16(CX)(BX*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B:
MOVQ DX, AX
JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B
memmove_long_emit_remainder_encodeSnappyBlockAsm10B:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
MOVQ AX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
LEAQ -32(CX)(R8*1), SI
LEAQ -32(AX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
MOVOU -32(CX)(R8*1), X4
MOVOU -16(CX)(R8*1), X5
MOVOA X4, -32(AX)(R8*1)
MOVOA X5, -16(AX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
MOVQ DX, AX
emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B:
MOVQ dst_base+0(FP), CX
SUBQ CX, AX
MOVQ AX, ret+48(FP)
RET
// func encodeSnappyBlockAsm8B(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeSnappyBlockAsm8B(SB), $1048-56
MOVQ dst_base+0(FP), AX
MOVQ $0x00000008, CX
LEAQ 24(SP), DX
PXOR X0, X0
zero_loop_encodeSnappyBlockAsm8B:
MOVOU X0, (DX)
MOVOU X0, 16(DX)
MOVOU X0, 32(DX)
MOVOU X0, 48(DX)
MOVOU X0, 64(DX)
MOVOU X0, 80(DX)
MOVOU X0, 96(DX)
MOVOU X0, 112(DX)
ADDQ $0x80, DX
DECQ CX
JNZ zero_loop_encodeSnappyBlockAsm8B
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), CX
LEAQ -9(CX), DX
LEAQ -8(CX), SI
MOVL SI, 8(SP)
SHRQ $0x05, CX
SUBL CX, DX
LEAQ (AX)(DX*1), DX
MOVQ DX, (SP)
MOVL $0x00000001, CX
MOVL CX, 16(SP)
MOVQ src_base+24(FP), DX
search_loop_encodeSnappyBlockAsm8B:
MOVL CX, SI
SUBL 12(SP), SI
SHRL $0x04, SI
LEAL 4(CX)(SI*1), SI
CMPL SI, 8(SP)
JGE emit_remainder_encodeSnappyBlockAsm8B
MOVQ (DX)(CX*1), DI
MOVL SI, 20(SP)
MOVQ $0x9e3779b1, R9
MOVQ DI, R10
MOVQ DI, R11
SHRQ $0x08, R11
SHLQ $0x20, R10
IMULQ R9, R10
SHRQ $0x38, R10
SHLQ $0x20, R11
IMULQ R9, R11
SHRQ $0x38, R11
MOVL 24(SP)(R10*4), SI
MOVL 24(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
LEAL 1(CX), R10
MOVL R10, 24(SP)(R11*4)
MOVQ DI, R10
SHRQ $0x10, R10
SHLQ $0x20, R10
IMULQ R9, R10
SHRQ $0x38, R10
MOVL CX, R9
SUBL 16(SP), R9
MOVL 1(DX)(R9*1), R11
MOVQ DI, R9
SHRQ $0x08, R9
CMPL R9, R11
JNE no_repeat_found_encodeSnappyBlockAsm8B
LEAL 1(CX), DI
MOVL 12(SP), SI
MOVL DI, R8
SUBL 16(SP), R8
JZ repeat_extend_back_end_encodeSnappyBlockAsm8B
repeat_extend_back_loop_encodeSnappyBlockAsm8B:
CMPL DI, SI
JLE repeat_extend_back_end_encodeSnappyBlockAsm8B
MOVB -1(DX)(R8*1), BL
MOVB -1(DX)(DI*1), R9
CMPB BL, R9
JNE repeat_extend_back_end_encodeSnappyBlockAsm8B
LEAL -1(DI), DI
DECL R8
JNZ repeat_extend_back_loop_encodeSnappyBlockAsm8B
repeat_extend_back_end_encodeSnappyBlockAsm8B:
MOVL 12(SP), SI
CMPL SI, DI
JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B
MOVL DI, R8
MOVL DI, 12(SP)
LEAQ (DX)(SI*1), R9
SUBL SI, R8
LEAL -1(R8), SI
CMPL SI, $0x3c
JLT one_byte_repeat_emit_encodeSnappyBlockAsm8B
CMPL SI, $0x00000100
JLT two_bytes_repeat_emit_encodeSnappyBlockAsm8B
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B
two_bytes_repeat_emit_encodeSnappyBlockAsm8B:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, AX
CMPL SI, $0x40
JL memmove_repeat_emit_encodeSnappyBlockAsm8B
JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B
one_byte_repeat_emit_encodeSnappyBlockAsm8B:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, AX
memmove_repeat_emit_encodeSnappyBlockAsm8B:
LEAQ (AX)(R8*1), SI
// genMemMoveShort
CMPQ R8, $0x08
JLE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8
CMPQ R8, $0x10
JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16
CMPQ R8, $0x20
JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32
JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8:
MOVQ (R9), R10
MOVQ R10, (AX)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16:
MOVQ (R9), R10
MOVQ -8(R9)(R8*1), R9
MOVQ R10, (AX)
MOVQ R9, -8(AX)(R8*1)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32:
MOVOU (R9), X0
MOVOU -16(R9)(R8*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R8*1)
JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64:
MOVOU (R9), X0
MOVOU 16(R9), X1
MOVOU -32(R9)(R8*1), X2
MOVOU -16(R9)(R8*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R8*1)
MOVOU X3, -16(AX)(R8*1)
memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B:
MOVQ SI, AX
JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B
memmove_long_repeat_emit_encodeSnappyBlockAsm8B:
LEAQ (AX)(R8*1), SI
// genMemMoveLong
MOVOU (R9), X0
MOVOU 16(R9), X1
MOVOU -32(R9)(R8*1), X2
MOVOU -16(R9)(R8*1), X3
MOVQ R8, R11
SHRQ $0x05, R11
MOVQ AX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R12
SUBQ R10, R12
DECQ R11
JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
LEAQ -32(R9)(R12*1), R10
LEAQ -32(AX)(R12*1), R13
emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R10
ADDQ $0x20, R12
DECQ R11
JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back
emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
MOVOU -32(R9)(R12*1), X4
MOVOU -16(R9)(R12*1), X5
MOVOA X4, -32(AX)(R12*1)
MOVOA X5, -16(AX)(R12*1)
ADDQ $0x20, R12
CMPQ R8, R12
JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R8*1)
MOVOU X3, -16(AX)(R8*1)
MOVQ SI, AX
emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B:
ADDL $0x05, CX
MOVL CX, SI
SUBL 16(SP), SI
MOVQ src_len+32(FP), R8
SUBL CX, R8
LEAQ (DX)(CX*1), R9
LEAQ (DX)(SI*1), SI
// matchLen
XORL R11, R11
CMPL R8, $0x08
JL matchlen_single_repeat_extend_encodeSnappyBlockAsm8B
matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B:
MOVQ (R9)(R11*1), R10
XORQ (SI)(R11*1), R10
TESTQ R10, R10
JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm8B
BSFQ R10, R10
SARQ $0x03, R10
LEAL (R11)(R10*1), R11
JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B
matchlen_loop_repeat_extend_encodeSnappyBlockAsm8B:
LEAL -8(R8), R8
LEAL 8(R11), R11
CMPL R8, $0x08
JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B
matchlen_single_repeat_extend_encodeSnappyBlockAsm8B:
TESTL R8, R8
JZ repeat_extend_forward_end_encodeSnappyBlockAsm8B
matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm8B:
MOVB (R9)(R11*1), R10
CMPB (SI)(R11*1), R10
JNE repeat_extend_forward_end_encodeSnappyBlockAsm8B
LEAL 1(R11), R11
DECL R8
JNZ matchlen_single_loopback_repeat_extend_encodeSnappyBlockAsm8B
repeat_extend_forward_end_encodeSnappyBlockAsm8B:
ADDL R11, CX
MOVL CX, SI
SUBL DI, SI
MOVL 16(SP), DI
// emitCopy
two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B:
CMPL SI, $0x40
JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B
MOVB $0xee, (AX)
MOVW DI, 1(AX)
LEAL -60(SI), SI
ADDQ $0x03, AX
JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B
two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B:
CMPL SI, $0x0c
JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B
MOVB $0x01, BL
LEAL -16(BX)(SI*4), SI
MOVB DI, 1(AX)
SHRL $0x08, DI
SHLL $0x05, DI
ORL DI, SI
MOVB SI, (AX)
ADDQ $0x02, AX
JMP repeat_end_emit_encodeSnappyBlockAsm8B
emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B:
MOVB $0x02, BL
LEAL -4(BX)(SI*4), SI
MOVB SI, (AX)
MOVW DI, 1(AX)
ADDQ $0x03, AX
repeat_end_emit_encodeSnappyBlockAsm8B:
MOVL CX, 12(SP)
JMP search_loop_encodeSnappyBlockAsm8B
no_repeat_found_encodeSnappyBlockAsm8B:
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeSnappyBlockAsm8B
SHRQ $0x08, DI
MOVL 24(SP)(R10*4), SI
LEAL 2(CX), R9
CMPL (DX)(R8*1), DI
JEQ candidate2_match_encodeSnappyBlockAsm8B
MOVL R9, 24(SP)(R10*4)
SHRQ $0x08, DI
CMPL (DX)(SI*1), DI
JEQ candidate3_match_encodeSnappyBlockAsm8B
MOVL 20(SP), CX
JMP search_loop_encodeSnappyBlockAsm8B
candidate3_match_encodeSnappyBlockAsm8B:
ADDL $0x02, CX
JMP candidate_match_encodeSnappyBlockAsm8B
candidate2_match_encodeSnappyBlockAsm8B:
MOVL R9, 24(SP)(R10*4)
INCL CX
MOVL R8, SI
candidate_match_encodeSnappyBlockAsm8B:
MOVL 12(SP), DI
TESTL SI, SI
JZ match_extend_back_end_encodeSnappyBlockAsm8B
match_extend_back_loop_encodeSnappyBlockAsm8B:
CMPL CX, DI
JLE match_extend_back_end_encodeSnappyBlockAsm8B
MOVB -1(DX)(SI*1), BL
MOVB -1(DX)(CX*1), R8
CMPB BL, R8
JNE match_extend_back_end_encodeSnappyBlockAsm8B
LEAL -1(CX), CX
DECL SI
JZ match_extend_back_end_encodeSnappyBlockAsm8B
JMP match_extend_back_loop_encodeSnappyBlockAsm8B
match_extend_back_end_encodeSnappyBlockAsm8B:
MOVL CX, DI
SUBL 12(SP), DI
LEAQ 3(AX)(DI*1), DI
CMPQ DI, (SP)
JL match_dst_size_check_encodeSnappyBlockAsm8B
MOVQ $0x00000000, ret+48(FP)
RET
match_dst_size_check_encodeSnappyBlockAsm8B:
MOVL CX, DI
MOVL 12(SP), R8
CMPL R8, DI
JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm8B
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(R8*1), DI
SUBL R8, R9
LEAL -1(R9), R8
CMPL R8, $0x3c
JLT one_byte_match_emit_encodeSnappyBlockAsm8B
CMPL R8, $0x00000100
JLT two_bytes_match_emit_encodeSnappyBlockAsm8B
MOVB $0xf4, (AX)
MOVW R8, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_match_emit_encodeSnappyBlockAsm8B
two_bytes_match_emit_encodeSnappyBlockAsm8B:
MOVB $0xf0, (AX)
MOVB R8, 1(AX)
ADDQ $0x02, AX
CMPL R8, $0x40
JL memmove_match_emit_encodeSnappyBlockAsm8B
JMP memmove_long_match_emit_encodeSnappyBlockAsm8B
one_byte_match_emit_encodeSnappyBlockAsm8B:
SHLB $0x02, R8
MOVB R8, (AX)
ADDQ $0x01, AX
memmove_match_emit_encodeSnappyBlockAsm8B:
LEAQ (AX)(R9*1), R8
// genMemMoveShort
CMPQ R9, $0x08
JLE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8
CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64
emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8:
MOVQ (DI), R10
MOVQ R10, (AX)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16:
MOVQ (DI), R10
MOVQ -8(DI)(R9*1), DI
MOVQ R10, (AX)
MOVQ DI, -8(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32:
MOVOU (DI), X0
MOVOU -16(DI)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64:
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R9*1), X2
MOVOU -16(DI)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_match_emit_encodeSnappyBlockAsm8B:
MOVQ R8, AX
JMP emit_literal_done_match_emit_encodeSnappyBlockAsm8B
memmove_long_match_emit_encodeSnappyBlockAsm8B:
LEAQ (AX)(R9*1), R8
// genMemMoveLong
MOVOU (DI), X0
MOVOU 16(DI), X1
MOVOU -32(DI)(R9*1), X2
MOVOU -16(DI)(R9*1), X3
MOVQ R9, R11
SHRQ $0x05, R11
MOVQ AX, R10
ANDL $0x0000001f, R10
MOVQ $0x00000040, R12
SUBQ R10, R12
DECQ R11
JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
LEAQ -32(DI)(R12*1), R10
LEAQ -32(AX)(R12*1), R13
emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back:
MOVOU (R10), X4
MOVOU 16(R10), X5
MOVOA X4, (R13)
MOVOA X5, 16(R13)
ADDQ $0x20, R13
ADDQ $0x20, R10
ADDQ $0x20, R12
DECQ R11
JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
MOVOU -32(DI)(R12*1), X4
MOVOU -16(DI)(R12*1), X5
MOVOA X4, -32(AX)(R12*1)
MOVOA X5, -16(AX)(R12*1)
ADDQ $0x20, R12
CMPQ R9, R12
JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ R8, AX
emit_literal_done_match_emit_encodeSnappyBlockAsm8B:
match_nolit_loop_encodeSnappyBlockAsm8B:
MOVL CX, DI
SUBL SI, DI
MOVL DI, 16(SP)
ADDL $0x04, CX
ADDL $0x04, SI
MOVQ src_len+32(FP), DI
SUBL CX, DI
LEAQ (DX)(CX*1), R8
LEAQ (DX)(SI*1), SI
// matchLen
XORL R10, R10
CMPL DI, $0x08
JL matchlen_single_match_nolit_encodeSnappyBlockAsm8B
matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B:
MOVQ (R8)(R10*1), R9
XORQ (SI)(R10*1), R9
TESTQ R9, R9
JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm8B
BSFQ R9, R9
SARQ $0x03, R9
LEAL (R10)(R9*1), R10
JMP match_nolit_end_encodeSnappyBlockAsm8B
matchlen_loop_match_nolit_encodeSnappyBlockAsm8B:
LEAL -8(DI), DI
LEAL 8(R10), R10
CMPL DI, $0x08
JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B
matchlen_single_match_nolit_encodeSnappyBlockAsm8B:
TESTL DI, DI
JZ match_nolit_end_encodeSnappyBlockAsm8B
matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm8B:
MOVB (R8)(R10*1), R9
CMPB (SI)(R10*1), R9
JNE match_nolit_end_encodeSnappyBlockAsm8B
LEAL 1(R10), R10
DECL DI
JNZ matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm8B
match_nolit_end_encodeSnappyBlockAsm8B:
ADDL R10, CX
MOVL 16(SP), SI
ADDL $0x04, R10
MOVL CX, 12(SP)
// emitCopy
two_byte_offset_match_nolit_encodeSnappyBlockAsm8B:
CMPL R10, $0x40
JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B
MOVB $0xee, (AX)
MOVW SI, 1(AX)
LEAL -60(R10), R10
ADDQ $0x03, AX
JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm8B
two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B:
CMPL R10, $0x0c
JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm8B
MOVB $0x01, BL
LEAL -16(BX)(R10*4), R10
MOVB SI, 1(AX)
SHRL $0x08, SI
SHLL $0x05, SI
ORL SI, R10
MOVB R10, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm8B
emit_copy_three_match_nolit_encodeSnappyBlockAsm8B:
MOVB $0x02, BL
LEAL -4(BX)(R10*4), R10
MOVB R10, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
match_nolit_emitcopy_end_encodeSnappyBlockAsm8B:
CMPL CX, 8(SP)
JGE emit_remainder_encodeSnappyBlockAsm8B
MOVQ -2(DX)(CX*1), DI
CMPQ AX, (SP)
JL match_nolit_dst_ok_encodeSnappyBlockAsm8B
MOVQ $0x00000000, ret+48(FP)
RET
match_nolit_dst_ok_encodeSnappyBlockAsm8B:
MOVQ $0x9e3779b1, R9
MOVQ DI, R8
SHRQ $0x10, DI
MOVQ DI, SI
SHLQ $0x20, R8
IMULQ R9, R8
SHRQ $0x38, R8
SHLQ $0x20, SI
IMULQ R9, SI
SHRQ $0x38, SI
LEAL -2(CX), R9
LEAQ 24(SP)(SI*4), R10
MOVL (R10), SI
MOVL R9, 24(SP)(R8*4)
MOVL CX, (R10)
CMPL (DX)(SI*1), DI
JEQ match_nolit_loop_encodeSnappyBlockAsm8B
INCL CX
JMP search_loop_encodeSnappyBlockAsm8B
emit_remainder_encodeSnappyBlockAsm8B:
MOVQ src_len+32(FP), CX
SUBL 12(SP), CX
LEAQ 3(AX)(CX*1), CX
CMPQ CX, (SP)
JL emit_remainder_ok_encodeSnappyBlockAsm8B
MOVQ $0x00000000, ret+48(FP)
RET
emit_remainder_ok_encodeSnappyBlockAsm8B:
MOVQ src_len+32(FP), CX
MOVL 12(SP), BX
CMPL BX, CX
JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B
MOVL CX, SI
MOVL CX, 12(SP)
LEAQ (DX)(BX*1), CX
SUBL BX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JLT one_byte_emit_remainder_encodeSnappyBlockAsm8B
CMPL DX, $0x00000100
JLT two_bytes_emit_remainder_encodeSnappyBlockAsm8B
MOVB $0xf4, (AX)
MOVW DX, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B
two_bytes_emit_remainder_encodeSnappyBlockAsm8B:
MOVB $0xf0, (AX)
MOVB DL, 1(AX)
ADDQ $0x02, AX
CMPL DX, $0x40
JL memmove_emit_remainder_encodeSnappyBlockAsm8B
JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B
one_byte_emit_remainder_encodeSnappyBlockAsm8B:
SHLB $0x02, DL
MOVB DL, (AX)
ADDQ $0x01, AX
memmove_emit_remainder_encodeSnappyBlockAsm8B:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
CMPQ BX, $0x08
JLE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8:
MOVQ (CX), SI
MOVQ SI, (AX)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16:
MOVQ (CX), SI
MOVQ -8(CX)(BX*1), CX
MOVQ SI, (AX)
MOVQ CX, -8(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32:
MOVOU (CX), X0
MOVOU -16(CX)(BX*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B:
MOVQ DX, AX
JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B
memmove_long_emit_remainder_encodeSnappyBlockAsm8B:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
MOVQ AX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
LEAQ -32(CX)(R8*1), SI
LEAQ -32(AX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
MOVOU -32(CX)(R8*1), X4
MOVOU -16(CX)(R8*1), X5
MOVOA X4, -32(AX)(R8*1)
MOVOA X5, -16(AX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
MOVQ DX, AX
emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B:
MOVQ dst_base+0(FP), CX
SUBQ CX, AX
MOVQ AX, ret+48(FP)
RET
// func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeSnappyBetterBlockAsm(SB), $327704-56
MOVQ dst_base+0(FP), AX
MOVQ $0x00000a00, CX
LEAQ 24(SP), DX
PXOR X0, X0
zero_loop_encodeSnappyBetterBlockAsm:
MOVOU X0, (DX)
MOVOU X0, 16(DX)
MOVOU X0, 32(DX)
MOVOU X0, 48(DX)
MOVOU X0, 64(DX)
MOVOU X0, 80(DX)
MOVOU X0, 96(DX)
MOVOU X0, 112(DX)
ADDQ $0x80, DX
DECQ CX
JNZ zero_loop_encodeSnappyBetterBlockAsm
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), CX
LEAQ -9(CX), DX
LEAQ -8(CX), SI
MOVL SI, 8(SP)
SHRQ $0x05, CX
SUBL CX, DX
LEAQ (AX)(DX*1), DX
MOVQ DX, (SP)
MOVL $0x00000001, CX
MOVL $0x00000000, 16(SP)
MOVQ src_base+24(FP), DX
search_loop_encodeSnappyBetterBlockAsm:
MOVL CX, SI
SUBL 12(SP), SI
SHRL $0x07, SI
CMPL SI, $0x63
JLE check_maxskip_ok_encodeSnappyBetterBlockAsm
LEAL 100(CX), SI
JMP check_maxskip_cont_encodeSnappyBetterBlockAsm
check_maxskip_ok_encodeSnappyBetterBlockAsm:
LEAL 1(CX)(SI*1), SI
check_maxskip_cont_encodeSnappyBetterBlockAsm:
CMPL SI, 8(SP)
JGE emit_remainder_encodeSnappyBetterBlockAsm
MOVQ (DX)(CX*1), DI
MOVL SI, 20(SP)
MOVQ $0x00cf1bbcdcbfa563, R9
MOVQ $0x9e3779b1, SI
MOVQ DI, R10
MOVQ DI, R11
SHLQ $0x08, R10
IMULQ R9, R10
SHRQ $0x30, R10
SHLQ $0x20, R11
IMULQ SI, R11
SHRQ $0x32, R11
MOVL 24(SP)(R10*4), SI
MOVL 262168(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
MOVL CX, 262168(SP)(R11*4)
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeSnappyBetterBlockAsm
CMPL (DX)(R8*1), DI
JEQ candidateS_match_encodeSnappyBetterBlockAsm
MOVL 20(SP), CX
JMP search_loop_encodeSnappyBetterBlockAsm
candidateS_match_encodeSnappyBetterBlockAsm:
SHRQ $0x08, DI
MOVQ DI, R10
SHLQ $0x08, R10
IMULQ R9, R10
SHRQ $0x30, R10
MOVL 24(SP)(R10*4), SI
INCL CX
MOVL CX, 24(SP)(R10*4)
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeSnappyBetterBlockAsm
DECL CX
MOVL R8, SI
candidate_match_encodeSnappyBetterBlockAsm:
MOVL 12(SP), DI
TESTL SI, SI
JZ match_extend_back_end_encodeSnappyBetterBlockAsm
match_extend_back_loop_encodeSnappyBetterBlockAsm:
CMPL CX, DI
JLE match_extend_back_end_encodeSnappyBetterBlockAsm
MOVB -1(DX)(SI*1), BL
MOVB -1(DX)(CX*1), R8
CMPB BL, R8
JNE match_extend_back_end_encodeSnappyBetterBlockAsm
LEAL -1(CX), CX
DECL SI
JZ match_extend_back_end_encodeSnappyBetterBlockAsm
JMP match_extend_back_loop_encodeSnappyBetterBlockAsm
match_extend_back_end_encodeSnappyBetterBlockAsm:
MOVL CX, DI
SUBL 12(SP), DI
LEAQ 5(AX)(DI*1), DI
CMPQ DI, (SP)
JL match_dst_size_check_encodeSnappyBetterBlockAsm
MOVQ $0x00000000, ret+48(FP)
RET
match_dst_size_check_encodeSnappyBetterBlockAsm:
MOVL CX, DI
ADDL $0x04, CX
ADDL $0x04, SI
MOVQ src_len+32(FP), R8
SUBL CX, R8
LEAQ (DX)(CX*1), R9
LEAQ (DX)(SI*1), R10
// matchLen
XORL R12, R12
CMPL R8, $0x08
JL matchlen_single_match_nolit_encodeSnappyBetterBlockAsm
matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm:
MOVQ (R9)(R12*1), R11
XORQ (R10)(R12*1), R11
TESTQ R11, R11
JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm
BSFQ R11, R11
SARQ $0x03, R11
LEAL (R12)(R11*1), R12
JMP match_nolit_end_encodeSnappyBetterBlockAsm
matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm:
LEAL -8(R8), R8
LEAL 8(R12), R12
CMPL R8, $0x08
JGE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm
matchlen_single_match_nolit_encodeSnappyBetterBlockAsm:
TESTL R8, R8
JZ match_nolit_end_encodeSnappyBetterBlockAsm
matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm:
MOVB (R9)(R12*1), R11
CMPB (R10)(R12*1), R11
JNE match_nolit_end_encodeSnappyBetterBlockAsm
LEAL 1(R12), R12
DECL R8
JNZ matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm
match_nolit_end_encodeSnappyBetterBlockAsm:
MOVL CX, R8
SUBL SI, R8
// Check if repeat
CMPL R12, $0x01
JG match_length_ok_encodeSnappyBetterBlockAsm
CMPL R8, $0x0000ffff
JLE match_length_ok_encodeSnappyBetterBlockAsm
MOVL 20(SP), CX
INCL CX
JMP search_loop_encodeSnappyBetterBlockAsm
match_length_ok_encodeSnappyBetterBlockAsm:
MOVL R8, 16(SP)
MOVL 12(SP), SI
CMPL SI, DI
JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(SI*1), R10
SUBL SI, R9
LEAL -1(R9), SI
CMPL SI, $0x3c
JLT one_byte_match_emit_encodeSnappyBetterBlockAsm
CMPL SI, $0x00000100
JLT two_bytes_match_emit_encodeSnappyBetterBlockAsm
CMPL SI, $0x00010000
JLT three_bytes_match_emit_encodeSnappyBetterBlockAsm
CMPL SI, $0x01000000
JLT four_bytes_match_emit_encodeSnappyBetterBlockAsm
MOVB $0xfc, (AX)
MOVL SI, 1(AX)
ADDQ $0x05, AX
JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm
four_bytes_match_emit_encodeSnappyBetterBlockAsm:
MOVL SI, R11
SHRL $0x10, R11
MOVB $0xf8, (AX)
MOVW SI, 1(AX)
MOVB R11, 3(AX)
ADDQ $0x04, AX
JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm
three_bytes_match_emit_encodeSnappyBetterBlockAsm:
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm
two_bytes_match_emit_encodeSnappyBetterBlockAsm:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, AX
CMPL SI, $0x40
JL memmove_match_emit_encodeSnappyBetterBlockAsm
JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm
one_byte_match_emit_encodeSnappyBetterBlockAsm:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, AX
memmove_match_emit_encodeSnappyBetterBlockAsm:
LEAQ (AX)(R9*1), SI
// genMemMoveShort
CMPQ R9, $0x08
JLE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8
CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8:
MOVQ (R10), R11
MOVQ R11, (AX)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16:
MOVQ (R10), R11
MOVQ -8(R10)(R9*1), R10
MOVQ R11, (AX)
MOVQ R10, -8(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32:
MOVOU (R10), X0
MOVOU -16(R10)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64:
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm:
MOVQ SI, AX
JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm
memmove_long_match_emit_encodeSnappyBetterBlockAsm:
LEAQ (AX)(R9*1), SI
// genMemMoveLong
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVQ R9, R13
SHRQ $0x05, R13
MOVQ AX, R11
ANDL $0x0000001f, R11
MOVQ $0x00000040, R14
SUBQ R11, R14
DECQ R13
JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
LEAQ -32(R10)(R14*1), R11
LEAQ -32(AX)(R14*1), R15
emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back:
MOVOU (R11), X4
MOVOU 16(R11), X5
MOVOA X4, (R15)
MOVOA X5, 16(R15)
ADDQ $0x20, R15
ADDQ $0x20, R11
ADDQ $0x20, R14
DECQ R13
JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32:
MOVOU -32(R10)(R14*1), X4
MOVOU -16(R10)(R14*1), X5
MOVOA X4, -32(AX)(R14*1)
MOVOA X5, -16(AX)(R14*1)
ADDQ $0x20, R14
CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ SI, AX
emit_literal_done_match_emit_encodeSnappyBetterBlockAsm:
ADDL R12, CX
ADDL $0x04, R12
MOVL CX, 12(SP)
// emitCopy
CMPL R8, $0x00010000
JL two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm
four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm:
CMPL R12, $0x40
JLE four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm
MOVB $0xff, (AX)
MOVL R8, 1(AX)
LEAL -64(R12), R12
ADDQ $0x05, AX
CMPL R12, $0x04
JL four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm
JMP four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm
four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm:
TESTL R12, R12
JZ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm
MOVB $0x03, BL
LEAL -4(BX)(R12*4), R12
MOVB R12, (AX)
MOVL R8, 1(AX)
ADDQ $0x05, AX
JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm
two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm:
CMPL R12, $0x40
JLE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm
MOVB $0xee, (AX)
MOVW R8, 1(AX)
LEAL -60(R12), R12
ADDQ $0x03, AX
JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm
two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm:
CMPL R12, $0x0c
JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm
CMPL R8, $0x00000800
JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm
MOVB $0x01, BL
LEAL -16(BX)(R12*4), R12
MOVB R8, 1(AX)
SHRL $0x08, R8
SHLL $0x05, R8
ORL R8, R12
MOVB R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm
emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm:
MOVB $0x02, BL
LEAL -4(BX)(R12*4), R12
MOVB R12, (AX)
MOVW R8, 1(AX)
ADDQ $0x03, AX
match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm:
CMPL CX, 8(SP)
JGE emit_remainder_encodeSnappyBetterBlockAsm
CMPQ AX, (SP)
JL match_nolit_dst_ok_encodeSnappyBetterBlockAsm
MOVQ $0x00000000, ret+48(FP)
RET
match_nolit_dst_ok_encodeSnappyBetterBlockAsm:
MOVQ $0x00cf1bbcdcbfa563, SI
MOVQ $0x9e3779b1, R8
INCL DI
MOVQ (DX)(DI*1), R9
MOVQ R9, R10
MOVQ R9, R11
MOVQ R9, R12
SHRQ $0x08, R11
MOVQ R11, R13
SHRQ $0x10, R12
LEAL 1(DI), R14
LEAL 2(DI), R15
MOVQ -2(DX)(CX*1), R9
SHLQ $0x08, R10
IMULQ SI, R10
SHRQ $0x30, R10
SHLQ $0x08, R13
IMULQ SI, R13
SHRQ $0x30, R13
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x32, R11
SHLQ $0x20, R12
IMULQ R8, R12
SHRQ $0x32, R12
MOVL DI, 24(SP)(R10*4)
MOVL R14, 24(SP)(R13*4)
MOVL R14, 262168(SP)(R11*4)
MOVL R15, 262168(SP)(R12*4)
MOVQ R9, R10
MOVQ R9, R11
SHRQ $0x08, R11
MOVQ R11, R13
LEAL -2(CX), R9
LEAL -1(CX), DI
SHLQ $0x08, R10
IMULQ SI, R10
SHRQ $0x30, R10
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x32, R11
SHLQ $0x08, R13
IMULQ SI, R13
SHRQ $0x30, R13
MOVL R9, 24(SP)(R10*4)
MOVL DI, 262168(SP)(R11*4)
MOVL DI, 24(SP)(R13*4)
JMP search_loop_encodeSnappyBetterBlockAsm
emit_remainder_encodeSnappyBetterBlockAsm:
MOVQ src_len+32(FP), CX
SUBL 12(SP), CX
LEAQ 5(AX)(CX*1), CX
CMPQ CX, (SP)
JL emit_remainder_ok_encodeSnappyBetterBlockAsm
MOVQ $0x00000000, ret+48(FP)
RET
emit_remainder_ok_encodeSnappyBetterBlockAsm:
MOVQ src_len+32(FP), CX
MOVL 12(SP), BX
CMPL BX, CX
JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm
MOVL CX, SI
MOVL CX, 12(SP)
LEAQ (DX)(BX*1), CX
SUBL BX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JLT one_byte_emit_remainder_encodeSnappyBetterBlockAsm
CMPL DX, $0x00000100
JLT two_bytes_emit_remainder_encodeSnappyBetterBlockAsm
CMPL DX, $0x00010000
JLT three_bytes_emit_remainder_encodeSnappyBetterBlockAsm
CMPL DX, $0x01000000
JLT four_bytes_emit_remainder_encodeSnappyBetterBlockAsm
MOVB $0xfc, (AX)
MOVL DX, 1(AX)
ADDQ $0x05, AX
JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
four_bytes_emit_remainder_encodeSnappyBetterBlockAsm:
MOVL DX, BX
SHRL $0x10, BX
MOVB $0xf8, (AX)
MOVW DX, 1(AX)
MOVB BL, 3(AX)
ADDQ $0x04, AX
JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
three_bytes_emit_remainder_encodeSnappyBetterBlockAsm:
MOVB $0xf4, (AX)
MOVW DX, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
two_bytes_emit_remainder_encodeSnappyBetterBlockAsm:
MOVB $0xf0, (AX)
MOVB DL, 1(AX)
ADDQ $0x02, AX
CMPL DX, $0x40
JL memmove_emit_remainder_encodeSnappyBetterBlockAsm
JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
one_byte_emit_remainder_encodeSnappyBetterBlockAsm:
SHLB $0x02, DL
MOVB DL, (AX)
ADDQ $0x01, AX
memmove_emit_remainder_encodeSnappyBetterBlockAsm:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
CMPQ BX, $0x08
JLE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8:
MOVQ (CX), SI
MOVQ SI, (AX)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8through16:
MOVQ (CX), SI
MOVQ -8(CX)(BX*1), CX
MOVQ SI, (AX)
MOVQ CX, -8(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_17through32:
MOVOU (CX), X0
MOVOU -16(CX)(BX*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm:
MOVQ DX, AX
JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm
memmove_long_emit_remainder_encodeSnappyBetterBlockAsm:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
MOVQ AX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
LEAQ -32(CX)(R8*1), SI
LEAQ -32(AX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32:
MOVOU -32(CX)(R8*1), X4
MOVOU -16(CX)(R8*1), X5
MOVOA X4, -32(AX)(R8*1)
MOVOA X5, -16(AX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
MOVQ DX, AX
emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm:
MOVQ dst_base+0(FP), CX
SUBQ CX, AX
MOVQ AX, ret+48(FP)
RET
// func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeSnappyBetterBlockAsm64K(SB), $327704-56
MOVQ dst_base+0(FP), AX
MOVQ $0x00000a00, CX
LEAQ 24(SP), DX
PXOR X0, X0
zero_loop_encodeSnappyBetterBlockAsm64K:
MOVOU X0, (DX)
MOVOU X0, 16(DX)
MOVOU X0, 32(DX)
MOVOU X0, 48(DX)
MOVOU X0, 64(DX)
MOVOU X0, 80(DX)
MOVOU X0, 96(DX)
MOVOU X0, 112(DX)
ADDQ $0x80, DX
DECQ CX
JNZ zero_loop_encodeSnappyBetterBlockAsm64K
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), CX
LEAQ -9(CX), DX
LEAQ -8(CX), SI
MOVL SI, 8(SP)
SHRQ $0x05, CX
SUBL CX, DX
LEAQ (AX)(DX*1), DX
MOVQ DX, (SP)
MOVL $0x00000001, CX
MOVL $0x00000000, 16(SP)
MOVQ src_base+24(FP), DX
search_loop_encodeSnappyBetterBlockAsm64K:
MOVL CX, SI
SUBL 12(SP), SI
SHRL $0x07, SI
LEAL 1(CX)(SI*1), SI
CMPL SI, 8(SP)
JGE emit_remainder_encodeSnappyBetterBlockAsm64K
MOVQ (DX)(CX*1), DI
MOVL SI, 20(SP)
MOVQ $0x00cf1bbcdcbfa563, R9
MOVQ $0x9e3779b1, SI
MOVQ DI, R10
MOVQ DI, R11
SHLQ $0x08, R10
IMULQ R9, R10
SHRQ $0x30, R10
SHLQ $0x20, R11
IMULQ SI, R11
SHRQ $0x32, R11
MOVL 24(SP)(R10*4), SI
MOVL 262168(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
MOVL CX, 262168(SP)(R11*4)
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeSnappyBetterBlockAsm64K
CMPL (DX)(R8*1), DI
JEQ candidateS_match_encodeSnappyBetterBlockAsm64K
MOVL 20(SP), CX
JMP search_loop_encodeSnappyBetterBlockAsm64K
candidateS_match_encodeSnappyBetterBlockAsm64K:
SHRQ $0x08, DI
MOVQ DI, R10
SHLQ $0x08, R10
IMULQ R9, R10
SHRQ $0x30, R10
MOVL 24(SP)(R10*4), SI
INCL CX
MOVL CX, 24(SP)(R10*4)
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeSnappyBetterBlockAsm64K
DECL CX
MOVL R8, SI
candidate_match_encodeSnappyBetterBlockAsm64K:
MOVL 12(SP), DI
TESTL SI, SI
JZ match_extend_back_end_encodeSnappyBetterBlockAsm64K
match_extend_back_loop_encodeSnappyBetterBlockAsm64K:
CMPL CX, DI
JLE match_extend_back_end_encodeSnappyBetterBlockAsm64K
MOVB -1(DX)(SI*1), BL
MOVB -1(DX)(CX*1), R8
CMPB BL, R8
JNE match_extend_back_end_encodeSnappyBetterBlockAsm64K
LEAL -1(CX), CX
DECL SI
JZ match_extend_back_end_encodeSnappyBetterBlockAsm64K
JMP match_extend_back_loop_encodeSnappyBetterBlockAsm64K
match_extend_back_end_encodeSnappyBetterBlockAsm64K:
MOVL CX, DI
SUBL 12(SP), DI
LEAQ 3(AX)(DI*1), DI
CMPQ DI, (SP)
JL match_dst_size_check_encodeSnappyBetterBlockAsm64K
MOVQ $0x00000000, ret+48(FP)
RET
match_dst_size_check_encodeSnappyBetterBlockAsm64K:
MOVL CX, DI
ADDL $0x04, CX
ADDL $0x04, SI
MOVQ src_len+32(FP), R8
SUBL CX, R8
LEAQ (DX)(CX*1), R9
LEAQ (DX)(SI*1), R10
// matchLen
XORL R12, R12
CMPL R8, $0x08
JL matchlen_single_match_nolit_encodeSnappyBetterBlockAsm64K
matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm64K:
MOVQ (R9)(R12*1), R11
XORQ (R10)(R12*1), R11
TESTQ R11, R11
JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm64K
BSFQ R11, R11
SARQ $0x03, R11
LEAL (R12)(R11*1), R12
JMP match_nolit_end_encodeSnappyBetterBlockAsm64K
matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm64K:
LEAL -8(R8), R8
LEAL 8(R12), R12
CMPL R8, $0x08
JGE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm64K
matchlen_single_match_nolit_encodeSnappyBetterBlockAsm64K:
TESTL R8, R8
JZ match_nolit_end_encodeSnappyBetterBlockAsm64K
matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm64K:
MOVB (R9)(R12*1), R11
CMPB (R10)(R12*1), R11
JNE match_nolit_end_encodeSnappyBetterBlockAsm64K
LEAL 1(R12), R12
DECL R8
JNZ matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm64K
match_nolit_end_encodeSnappyBetterBlockAsm64K:
MOVL CX, R8
SUBL SI, R8
// Check if repeat
MOVL R8, 16(SP)
MOVL 12(SP), SI
CMPL SI, DI
JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(SI*1), R10
SUBL SI, R9
LEAL -1(R9), SI
CMPL SI, $0x3c
JLT one_byte_match_emit_encodeSnappyBetterBlockAsm64K
CMPL SI, $0x00000100
JLT two_bytes_match_emit_encodeSnappyBetterBlockAsm64K
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm64K
two_bytes_match_emit_encodeSnappyBetterBlockAsm64K:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, AX
CMPL SI, $0x40
JL memmove_match_emit_encodeSnappyBetterBlockAsm64K
JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm64K
one_byte_match_emit_encodeSnappyBetterBlockAsm64K:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, AX
memmove_match_emit_encodeSnappyBetterBlockAsm64K:
LEAQ (AX)(R9*1), SI
// genMemMoveShort
CMPQ R9, $0x08
JLE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8
CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8:
MOVQ (R10), R11
MOVQ R11, (AX)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16:
MOVQ (R10), R11
MOVQ -8(R10)(R9*1), R10
MOVQ R11, (AX)
MOVQ R10, -8(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32:
MOVOU (R10), X0
MOVOU -16(R10)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64:
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K:
MOVQ SI, AX
JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K
memmove_long_match_emit_encodeSnappyBetterBlockAsm64K:
LEAQ (AX)(R9*1), SI
// genMemMoveLong
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVQ R9, R13
SHRQ $0x05, R13
MOVQ AX, R11
ANDL $0x0000001f, R11
MOVQ $0x00000040, R14
SUBQ R11, R14
DECQ R13
JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
LEAQ -32(R10)(R14*1), R11
LEAQ -32(AX)(R14*1), R15
emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back:
MOVOU (R11), X4
MOVOU 16(R11), X5
MOVOA X4, (R15)
MOVOA X5, 16(R15)
ADDQ $0x20, R15
ADDQ $0x20, R11
ADDQ $0x20, R14
DECQ R13
JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32:
MOVOU -32(R10)(R14*1), X4
MOVOU -16(R10)(R14*1), X5
MOVOA X4, -32(AX)(R14*1)
MOVOA X5, -16(AX)(R14*1)
ADDQ $0x20, R14
CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ SI, AX
emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K:
ADDL R12, CX
ADDL $0x04, R12
MOVL CX, 12(SP)
// emitCopy
two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K:
CMPL R12, $0x40
JLE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K
MOVB $0xee, (AX)
MOVW R8, 1(AX)
LEAL -60(R12), R12
ADDQ $0x03, AX
JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K
two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K:
CMPL R12, $0x0c
JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K
CMPL R8, $0x00000800
JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K
MOVB $0x01, BL
LEAL -16(BX)(R12*4), R12
MOVB R8, 1(AX)
SHRL $0x08, R8
SHLL $0x05, R8
ORL R8, R12
MOVB R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K
emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K:
MOVB $0x02, BL
LEAL -4(BX)(R12*4), R12
MOVB R12, (AX)
MOVW R8, 1(AX)
ADDQ $0x03, AX
match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K:
CMPL CX, 8(SP)
JGE emit_remainder_encodeSnappyBetterBlockAsm64K
CMPQ AX, (SP)
JL match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K
MOVQ $0x00000000, ret+48(FP)
RET
match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K:
MOVQ $0x00cf1bbcdcbfa563, SI
MOVQ $0x9e3779b1, R8
INCL DI
MOVQ (DX)(DI*1), R9
MOVQ R9, R10
MOVQ R9, R11
MOVQ R9, R12
SHRQ $0x08, R11
MOVQ R11, R13
SHRQ $0x10, R12
LEAL 1(DI), R14
LEAL 2(DI), R15
MOVQ -2(DX)(CX*1), R9
SHLQ $0x08, R10
IMULQ SI, R10
SHRQ $0x30, R10
SHLQ $0x08, R13
IMULQ SI, R13
SHRQ $0x30, R13
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x32, R11
SHLQ $0x20, R12
IMULQ R8, R12
SHRQ $0x32, R12
MOVL DI, 24(SP)(R10*4)
MOVL R14, 24(SP)(R13*4)
MOVL R14, 262168(SP)(R11*4)
MOVL R15, 262168(SP)(R12*4)
MOVQ R9, R10
MOVQ R9, R11
SHRQ $0x08, R11
MOVQ R11, R13
LEAL -2(CX), R9
LEAL -1(CX), DI
SHLQ $0x08, R10
IMULQ SI, R10
SHRQ $0x30, R10
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x32, R11
SHLQ $0x08, R13
IMULQ SI, R13
SHRQ $0x30, R13
MOVL R9, 24(SP)(R10*4)
MOVL DI, 262168(SP)(R11*4)
MOVL DI, 24(SP)(R13*4)
JMP search_loop_encodeSnappyBetterBlockAsm64K
emit_remainder_encodeSnappyBetterBlockAsm64K:
MOVQ src_len+32(FP), CX
SUBL 12(SP), CX
LEAQ 3(AX)(CX*1), CX
CMPQ CX, (SP)
JL emit_remainder_ok_encodeSnappyBetterBlockAsm64K
MOVQ $0x00000000, ret+48(FP)
RET
emit_remainder_ok_encodeSnappyBetterBlockAsm64K:
MOVQ src_len+32(FP), CX
MOVL 12(SP), BX
CMPL BX, CX
JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K
MOVL CX, SI
MOVL CX, 12(SP)
LEAQ (DX)(BX*1), CX
SUBL BX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JLT one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K
CMPL DX, $0x00000100
JLT two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K
MOVB $0xf4, (AX)
MOVW DX, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K
two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K:
MOVB $0xf0, (AX)
MOVB DL, 1(AX)
ADDQ $0x02, AX
CMPL DX, $0x40
JL memmove_emit_remainder_encodeSnappyBetterBlockAsm64K
JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K
one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K:
SHLB $0x02, DL
MOVB DL, (AX)
ADDQ $0x01, AX
memmove_emit_remainder_encodeSnappyBetterBlockAsm64K:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
CMPQ BX, $0x08
JLE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8:
MOVQ (CX), SI
MOVQ SI, (AX)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8through16:
MOVQ (CX), SI
MOVQ -8(CX)(BX*1), CX
MOVQ SI, (AX)
MOVQ CX, -8(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_17through32:
MOVOU (CX), X0
MOVOU -16(CX)(BX*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K:
MOVQ DX, AX
JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K
memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
MOVQ AX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
LEAQ -32(CX)(R8*1), SI
LEAQ -32(AX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32:
MOVOU -32(CX)(R8*1), X4
MOVOU -16(CX)(R8*1), X5
MOVOA X4, -32(AX)(R8*1)
MOVOA X5, -16(AX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
MOVQ DX, AX
emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K:
MOVQ dst_base+0(FP), CX
SUBQ CX, AX
MOVQ AX, ret+48(FP)
RET
// func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeSnappyBetterBlockAsm12B(SB), $81944-56
MOVQ dst_base+0(FP), AX
MOVQ $0x00000280, CX
LEAQ 24(SP), DX
PXOR X0, X0
zero_loop_encodeSnappyBetterBlockAsm12B:
MOVOU X0, (DX)
MOVOU X0, 16(DX)
MOVOU X0, 32(DX)
MOVOU X0, 48(DX)
MOVOU X0, 64(DX)
MOVOU X0, 80(DX)
MOVOU X0, 96(DX)
MOVOU X0, 112(DX)
ADDQ $0x80, DX
DECQ CX
JNZ zero_loop_encodeSnappyBetterBlockAsm12B
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), CX
LEAQ -9(CX), DX
LEAQ -8(CX), SI
MOVL SI, 8(SP)
SHRQ $0x05, CX
SUBL CX, DX
LEAQ (AX)(DX*1), DX
MOVQ DX, (SP)
MOVL $0x00000001, CX
MOVL $0x00000000, 16(SP)
MOVQ src_base+24(FP), DX
search_loop_encodeSnappyBetterBlockAsm12B:
MOVL CX, SI
SUBL 12(SP), SI
SHRL $0x06, SI
LEAL 1(CX)(SI*1), SI
CMPL SI, 8(SP)
JGE emit_remainder_encodeSnappyBetterBlockAsm12B
MOVQ (DX)(CX*1), DI
MOVL SI, 20(SP)
MOVQ $0x0000cf1bbcdcbf9b, R9
MOVQ $0x9e3779b1, SI
MOVQ DI, R10
MOVQ DI, R11
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x32, R10
SHLQ $0x20, R11
IMULQ SI, R11
SHRQ $0x34, R11
MOVL 24(SP)(R10*4), SI
MOVL 65560(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
MOVL CX, 65560(SP)(R11*4)
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeSnappyBetterBlockAsm12B
CMPL (DX)(R8*1), DI
JEQ candidateS_match_encodeSnappyBetterBlockAsm12B
MOVL 20(SP), CX
JMP search_loop_encodeSnappyBetterBlockAsm12B
candidateS_match_encodeSnappyBetterBlockAsm12B:
SHRQ $0x08, DI
MOVQ DI, R10
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x32, R10
MOVL 24(SP)(R10*4), SI
INCL CX
MOVL CX, 24(SP)(R10*4)
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeSnappyBetterBlockAsm12B
DECL CX
MOVL R8, SI
candidate_match_encodeSnappyBetterBlockAsm12B:
MOVL 12(SP), DI
TESTL SI, SI
JZ match_extend_back_end_encodeSnappyBetterBlockAsm12B
match_extend_back_loop_encodeSnappyBetterBlockAsm12B:
CMPL CX, DI
JLE match_extend_back_end_encodeSnappyBetterBlockAsm12B
MOVB -1(DX)(SI*1), BL
MOVB -1(DX)(CX*1), R8
CMPB BL, R8
JNE match_extend_back_end_encodeSnappyBetterBlockAsm12B
LEAL -1(CX), CX
DECL SI
JZ match_extend_back_end_encodeSnappyBetterBlockAsm12B
JMP match_extend_back_loop_encodeSnappyBetterBlockAsm12B
match_extend_back_end_encodeSnappyBetterBlockAsm12B:
MOVL CX, DI
SUBL 12(SP), DI
LEAQ 3(AX)(DI*1), DI
CMPQ DI, (SP)
JL match_dst_size_check_encodeSnappyBetterBlockAsm12B
MOVQ $0x00000000, ret+48(FP)
RET
match_dst_size_check_encodeSnappyBetterBlockAsm12B:
MOVL CX, DI
ADDL $0x04, CX
ADDL $0x04, SI
MOVQ src_len+32(FP), R8
SUBL CX, R8
LEAQ (DX)(CX*1), R9
LEAQ (DX)(SI*1), R10
// matchLen
XORL R12, R12
CMPL R8, $0x08
JL matchlen_single_match_nolit_encodeSnappyBetterBlockAsm12B
matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm12B:
MOVQ (R9)(R12*1), R11
XORQ (R10)(R12*1), R11
TESTQ R11, R11
JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm12B
BSFQ R11, R11
SARQ $0x03, R11
LEAL (R12)(R11*1), R12
JMP match_nolit_end_encodeSnappyBetterBlockAsm12B
matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm12B:
LEAL -8(R8), R8
LEAL 8(R12), R12
CMPL R8, $0x08
JGE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm12B
matchlen_single_match_nolit_encodeSnappyBetterBlockAsm12B:
TESTL R8, R8
JZ match_nolit_end_encodeSnappyBetterBlockAsm12B
matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm12B:
MOVB (R9)(R12*1), R11
CMPB (R10)(R12*1), R11
JNE match_nolit_end_encodeSnappyBetterBlockAsm12B
LEAL 1(R12), R12
DECL R8
JNZ matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm12B
match_nolit_end_encodeSnappyBetterBlockAsm12B:
MOVL CX, R8
SUBL SI, R8
// Check if repeat
MOVL R8, 16(SP)
MOVL 12(SP), SI
CMPL SI, DI
JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(SI*1), R10
SUBL SI, R9
LEAL -1(R9), SI
CMPL SI, $0x3c
JLT one_byte_match_emit_encodeSnappyBetterBlockAsm12B
CMPL SI, $0x00000100
JLT two_bytes_match_emit_encodeSnappyBetterBlockAsm12B
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm12B
two_bytes_match_emit_encodeSnappyBetterBlockAsm12B:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, AX
CMPL SI, $0x40
JL memmove_match_emit_encodeSnappyBetterBlockAsm12B
JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm12B
one_byte_match_emit_encodeSnappyBetterBlockAsm12B:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, AX
memmove_match_emit_encodeSnappyBetterBlockAsm12B:
LEAQ (AX)(R9*1), SI
// genMemMoveShort
CMPQ R9, $0x08
JLE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8
CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8:
MOVQ (R10), R11
MOVQ R11, (AX)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16:
MOVQ (R10), R11
MOVQ -8(R10)(R9*1), R10
MOVQ R11, (AX)
MOVQ R10, -8(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32:
MOVOU (R10), X0
MOVOU -16(R10)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64:
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B:
MOVQ SI, AX
JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B
memmove_long_match_emit_encodeSnappyBetterBlockAsm12B:
LEAQ (AX)(R9*1), SI
// genMemMoveLong
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVQ R9, R13
SHRQ $0x05, R13
MOVQ AX, R11
ANDL $0x0000001f, R11
MOVQ $0x00000040, R14
SUBQ R11, R14
DECQ R13
JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
LEAQ -32(R10)(R14*1), R11
LEAQ -32(AX)(R14*1), R15
emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back:
MOVOU (R11), X4
MOVOU 16(R11), X5
MOVOA X4, (R15)
MOVOA X5, 16(R15)
ADDQ $0x20, R15
ADDQ $0x20, R11
ADDQ $0x20, R14
DECQ R13
JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32:
MOVOU -32(R10)(R14*1), X4
MOVOU -16(R10)(R14*1), X5
MOVOA X4, -32(AX)(R14*1)
MOVOA X5, -16(AX)(R14*1)
ADDQ $0x20, R14
CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ SI, AX
emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B:
ADDL R12, CX
ADDL $0x04, R12
MOVL CX, 12(SP)
// emitCopy
two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B:
CMPL R12, $0x40
JLE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B
MOVB $0xee, (AX)
MOVW R8, 1(AX)
LEAL -60(R12), R12
ADDQ $0x03, AX
JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B
two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B:
CMPL R12, $0x0c
JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B
CMPL R8, $0x00000800
JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B
MOVB $0x01, BL
LEAL -16(BX)(R12*4), R12
MOVB R8, 1(AX)
SHRL $0x08, R8
SHLL $0x05, R8
ORL R8, R12
MOVB R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B
emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B:
MOVB $0x02, BL
LEAL -4(BX)(R12*4), R12
MOVB R12, (AX)
MOVW R8, 1(AX)
ADDQ $0x03, AX
match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B:
CMPL CX, 8(SP)
JGE emit_remainder_encodeSnappyBetterBlockAsm12B
CMPQ AX, (SP)
JL match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B
MOVQ $0x00000000, ret+48(FP)
RET
match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B:
MOVQ $0x0000cf1bbcdcbf9b, SI
MOVQ $0x9e3779b1, R8
INCL DI
MOVQ (DX)(DI*1), R9
MOVQ R9, R10
MOVQ R9, R11
MOVQ R9, R12
SHRQ $0x08, R11
MOVQ R11, R13
SHRQ $0x10, R12
LEAL 1(DI), R14
LEAL 2(DI), R15
MOVQ -2(DX)(CX*1), R9
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x32, R10
SHLQ $0x10, R13
IMULQ SI, R13
SHRQ $0x32, R13
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x34, R11
SHLQ $0x20, R12
IMULQ R8, R12
SHRQ $0x34, R12
MOVL DI, 24(SP)(R10*4)
MOVL R14, 24(SP)(R13*4)
MOVL R14, 65560(SP)(R11*4)
MOVL R15, 65560(SP)(R12*4)
MOVQ R9, R10
MOVQ R9, R11
SHRQ $0x08, R11
MOVQ R11, R13
LEAL -2(CX), R9
LEAL -1(CX), DI
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x32, R10
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x34, R11
SHLQ $0x10, R13
IMULQ SI, R13
SHRQ $0x32, R13
MOVL R9, 24(SP)(R10*4)
MOVL DI, 65560(SP)(R11*4)
MOVL DI, 24(SP)(R13*4)
JMP search_loop_encodeSnappyBetterBlockAsm12B
emit_remainder_encodeSnappyBetterBlockAsm12B:
MOVQ src_len+32(FP), CX
SUBL 12(SP), CX
LEAQ 3(AX)(CX*1), CX
CMPQ CX, (SP)
JL emit_remainder_ok_encodeSnappyBetterBlockAsm12B
MOVQ $0x00000000, ret+48(FP)
RET
emit_remainder_ok_encodeSnappyBetterBlockAsm12B:
MOVQ src_len+32(FP), CX
MOVL 12(SP), BX
CMPL BX, CX
JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B
MOVL CX, SI
MOVL CX, 12(SP)
LEAQ (DX)(BX*1), CX
SUBL BX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JLT one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B
CMPL DX, $0x00000100
JLT two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B
MOVB $0xf4, (AX)
MOVW DX, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B
two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B:
MOVB $0xf0, (AX)
MOVB DL, 1(AX)
ADDQ $0x02, AX
CMPL DX, $0x40
JL memmove_emit_remainder_encodeSnappyBetterBlockAsm12B
JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B
one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B:
SHLB $0x02, DL
MOVB DL, (AX)
ADDQ $0x01, AX
memmove_emit_remainder_encodeSnappyBetterBlockAsm12B:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
CMPQ BX, $0x08
JLE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8:
MOVQ (CX), SI
MOVQ SI, (AX)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8through16:
MOVQ (CX), SI
MOVQ -8(CX)(BX*1), CX
MOVQ SI, (AX)
MOVQ CX, -8(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_17through32:
MOVOU (CX), X0
MOVOU -16(CX)(BX*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B:
MOVQ DX, AX
JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B
memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
MOVQ AX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
LEAQ -32(CX)(R8*1), SI
LEAQ -32(AX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32:
MOVOU -32(CX)(R8*1), X4
MOVOU -16(CX)(R8*1), X5
MOVOA X4, -32(AX)(R8*1)
MOVOA X5, -16(AX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
MOVQ DX, AX
emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B:
MOVQ dst_base+0(FP), CX
SUBQ CX, AX
MOVQ AX, ret+48(FP)
RET
// func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeSnappyBetterBlockAsm10B(SB), $20504-56
MOVQ dst_base+0(FP), AX
MOVQ $0x000000a0, CX
LEAQ 24(SP), DX
PXOR X0, X0
zero_loop_encodeSnappyBetterBlockAsm10B:
MOVOU X0, (DX)
MOVOU X0, 16(DX)
MOVOU X0, 32(DX)
MOVOU X0, 48(DX)
MOVOU X0, 64(DX)
MOVOU X0, 80(DX)
MOVOU X0, 96(DX)
MOVOU X0, 112(DX)
ADDQ $0x80, DX
DECQ CX
JNZ zero_loop_encodeSnappyBetterBlockAsm10B
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), CX
LEAQ -9(CX), DX
LEAQ -8(CX), SI
MOVL SI, 8(SP)
SHRQ $0x05, CX
SUBL CX, DX
LEAQ (AX)(DX*1), DX
MOVQ DX, (SP)
MOVL $0x00000001, CX
MOVL $0x00000000, 16(SP)
MOVQ src_base+24(FP), DX
search_loop_encodeSnappyBetterBlockAsm10B:
MOVL CX, SI
SUBL 12(SP), SI
SHRL $0x05, SI
LEAL 1(CX)(SI*1), SI
CMPL SI, 8(SP)
JGE emit_remainder_encodeSnappyBetterBlockAsm10B
MOVQ (DX)(CX*1), DI
MOVL SI, 20(SP)
MOVQ $0x0000cf1bbcdcbf9b, R9
MOVQ $0x9e3779b1, SI
MOVQ DI, R10
MOVQ DI, R11
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x34, R10
SHLQ $0x20, R11
IMULQ SI, R11
SHRQ $0x36, R11
MOVL 24(SP)(R10*4), SI
MOVL 16408(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
MOVL CX, 16408(SP)(R11*4)
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeSnappyBetterBlockAsm10B
CMPL (DX)(R8*1), DI
JEQ candidateS_match_encodeSnappyBetterBlockAsm10B
MOVL 20(SP), CX
JMP search_loop_encodeSnappyBetterBlockAsm10B
candidateS_match_encodeSnappyBetterBlockAsm10B:
SHRQ $0x08, DI
MOVQ DI, R10
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x34, R10
MOVL 24(SP)(R10*4), SI
INCL CX
MOVL CX, 24(SP)(R10*4)
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeSnappyBetterBlockAsm10B
DECL CX
MOVL R8, SI
candidate_match_encodeSnappyBetterBlockAsm10B:
MOVL 12(SP), DI
TESTL SI, SI
JZ match_extend_back_end_encodeSnappyBetterBlockAsm10B
match_extend_back_loop_encodeSnappyBetterBlockAsm10B:
CMPL CX, DI
JLE match_extend_back_end_encodeSnappyBetterBlockAsm10B
MOVB -1(DX)(SI*1), BL
MOVB -1(DX)(CX*1), R8
CMPB BL, R8
JNE match_extend_back_end_encodeSnappyBetterBlockAsm10B
LEAL -1(CX), CX
DECL SI
JZ match_extend_back_end_encodeSnappyBetterBlockAsm10B
JMP match_extend_back_loop_encodeSnappyBetterBlockAsm10B
match_extend_back_end_encodeSnappyBetterBlockAsm10B:
MOVL CX, DI
SUBL 12(SP), DI
LEAQ 3(AX)(DI*1), DI
CMPQ DI, (SP)
JL match_dst_size_check_encodeSnappyBetterBlockAsm10B
MOVQ $0x00000000, ret+48(FP)
RET
match_dst_size_check_encodeSnappyBetterBlockAsm10B:
MOVL CX, DI
ADDL $0x04, CX
ADDL $0x04, SI
MOVQ src_len+32(FP), R8
SUBL CX, R8
LEAQ (DX)(CX*1), R9
LEAQ (DX)(SI*1), R10
// matchLen
XORL R12, R12
CMPL R8, $0x08
JL matchlen_single_match_nolit_encodeSnappyBetterBlockAsm10B
matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm10B:
MOVQ (R9)(R12*1), R11
XORQ (R10)(R12*1), R11
TESTQ R11, R11
JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm10B
BSFQ R11, R11
SARQ $0x03, R11
LEAL (R12)(R11*1), R12
JMP match_nolit_end_encodeSnappyBetterBlockAsm10B
matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm10B:
LEAL -8(R8), R8
LEAL 8(R12), R12
CMPL R8, $0x08
JGE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm10B
matchlen_single_match_nolit_encodeSnappyBetterBlockAsm10B:
TESTL R8, R8
JZ match_nolit_end_encodeSnappyBetterBlockAsm10B
matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm10B:
MOVB (R9)(R12*1), R11
CMPB (R10)(R12*1), R11
JNE match_nolit_end_encodeSnappyBetterBlockAsm10B
LEAL 1(R12), R12
DECL R8
JNZ matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm10B
match_nolit_end_encodeSnappyBetterBlockAsm10B:
MOVL CX, R8
SUBL SI, R8
// Check if repeat
MOVL R8, 16(SP)
MOVL 12(SP), SI
CMPL SI, DI
JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(SI*1), R10
SUBL SI, R9
LEAL -1(R9), SI
CMPL SI, $0x3c
JLT one_byte_match_emit_encodeSnappyBetterBlockAsm10B
CMPL SI, $0x00000100
JLT two_bytes_match_emit_encodeSnappyBetterBlockAsm10B
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm10B
two_bytes_match_emit_encodeSnappyBetterBlockAsm10B:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, AX
CMPL SI, $0x40
JL memmove_match_emit_encodeSnappyBetterBlockAsm10B
JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm10B
one_byte_match_emit_encodeSnappyBetterBlockAsm10B:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, AX
memmove_match_emit_encodeSnappyBetterBlockAsm10B:
LEAQ (AX)(R9*1), SI
// genMemMoveShort
CMPQ R9, $0x08
JLE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8
CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8:
MOVQ (R10), R11
MOVQ R11, (AX)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16:
MOVQ (R10), R11
MOVQ -8(R10)(R9*1), R10
MOVQ R11, (AX)
MOVQ R10, -8(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32:
MOVOU (R10), X0
MOVOU -16(R10)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64:
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B:
MOVQ SI, AX
JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B
memmove_long_match_emit_encodeSnappyBetterBlockAsm10B:
LEAQ (AX)(R9*1), SI
// genMemMoveLong
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVQ R9, R13
SHRQ $0x05, R13
MOVQ AX, R11
ANDL $0x0000001f, R11
MOVQ $0x00000040, R14
SUBQ R11, R14
DECQ R13
JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
LEAQ -32(R10)(R14*1), R11
LEAQ -32(AX)(R14*1), R15
emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back:
MOVOU (R11), X4
MOVOU 16(R11), X5
MOVOA X4, (R15)
MOVOA X5, 16(R15)
ADDQ $0x20, R15
ADDQ $0x20, R11
ADDQ $0x20, R14
DECQ R13
JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32:
MOVOU -32(R10)(R14*1), X4
MOVOU -16(R10)(R14*1), X5
MOVOA X4, -32(AX)(R14*1)
MOVOA X5, -16(AX)(R14*1)
ADDQ $0x20, R14
CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ SI, AX
emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B:
ADDL R12, CX
ADDL $0x04, R12
MOVL CX, 12(SP)
// emitCopy
two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B:
CMPL R12, $0x40
JLE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B
MOVB $0xee, (AX)
MOVW R8, 1(AX)
LEAL -60(R12), R12
ADDQ $0x03, AX
JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B
two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B:
CMPL R12, $0x0c
JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B
CMPL R8, $0x00000800
JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B
MOVB $0x01, BL
LEAL -16(BX)(R12*4), R12
MOVB R8, 1(AX)
SHRL $0x08, R8
SHLL $0x05, R8
ORL R8, R12
MOVB R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B
emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B:
MOVB $0x02, BL
LEAL -4(BX)(R12*4), R12
MOVB R12, (AX)
MOVW R8, 1(AX)
ADDQ $0x03, AX
match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B:
CMPL CX, 8(SP)
JGE emit_remainder_encodeSnappyBetterBlockAsm10B
CMPQ AX, (SP)
JL match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B
MOVQ $0x00000000, ret+48(FP)
RET
match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B:
MOVQ $0x0000cf1bbcdcbf9b, SI
MOVQ $0x9e3779b1, R8
INCL DI
MOVQ (DX)(DI*1), R9
MOVQ R9, R10
MOVQ R9, R11
MOVQ R9, R12
SHRQ $0x08, R11
MOVQ R11, R13
SHRQ $0x10, R12
LEAL 1(DI), R14
LEAL 2(DI), R15
MOVQ -2(DX)(CX*1), R9
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x34, R10
SHLQ $0x10, R13
IMULQ SI, R13
SHRQ $0x34, R13
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x36, R11
SHLQ $0x20, R12
IMULQ R8, R12
SHRQ $0x36, R12
MOVL DI, 24(SP)(R10*4)
MOVL R14, 24(SP)(R13*4)
MOVL R14, 16408(SP)(R11*4)
MOVL R15, 16408(SP)(R12*4)
MOVQ R9, R10
MOVQ R9, R11
SHRQ $0x08, R11
MOVQ R11, R13
LEAL -2(CX), R9
LEAL -1(CX), DI
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x34, R10
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x36, R11
SHLQ $0x10, R13
IMULQ SI, R13
SHRQ $0x34, R13
MOVL R9, 24(SP)(R10*4)
MOVL DI, 16408(SP)(R11*4)
MOVL DI, 24(SP)(R13*4)
JMP search_loop_encodeSnappyBetterBlockAsm10B
emit_remainder_encodeSnappyBetterBlockAsm10B:
MOVQ src_len+32(FP), CX
SUBL 12(SP), CX
LEAQ 3(AX)(CX*1), CX
CMPQ CX, (SP)
JL emit_remainder_ok_encodeSnappyBetterBlockAsm10B
MOVQ $0x00000000, ret+48(FP)
RET
emit_remainder_ok_encodeSnappyBetterBlockAsm10B:
MOVQ src_len+32(FP), CX
MOVL 12(SP), BX
CMPL BX, CX
JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B
MOVL CX, SI
MOVL CX, 12(SP)
LEAQ (DX)(BX*1), CX
SUBL BX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JLT one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B
CMPL DX, $0x00000100
JLT two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B
MOVB $0xf4, (AX)
MOVW DX, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B
two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B:
MOVB $0xf0, (AX)
MOVB DL, 1(AX)
ADDQ $0x02, AX
CMPL DX, $0x40
JL memmove_emit_remainder_encodeSnappyBetterBlockAsm10B
JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B
one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B:
SHLB $0x02, DL
MOVB DL, (AX)
ADDQ $0x01, AX
memmove_emit_remainder_encodeSnappyBetterBlockAsm10B:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
CMPQ BX, $0x08
JLE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8:
MOVQ (CX), SI
MOVQ SI, (AX)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8through16:
MOVQ (CX), SI
MOVQ -8(CX)(BX*1), CX
MOVQ SI, (AX)
MOVQ CX, -8(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_17through32:
MOVOU (CX), X0
MOVOU -16(CX)(BX*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B:
MOVQ DX, AX
JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B
memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
MOVQ AX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
LEAQ -32(CX)(R8*1), SI
LEAQ -32(AX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32:
MOVOU -32(CX)(R8*1), X4
MOVOU -16(CX)(R8*1), X5
MOVOA X4, -32(AX)(R8*1)
MOVOA X5, -16(AX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
MOVQ DX, AX
emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B:
MOVQ dst_base+0(FP), CX
SUBQ CX, AX
MOVQ AX, ret+48(FP)
RET
// func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeSnappyBetterBlockAsm8B(SB), $5144-56
MOVQ dst_base+0(FP), AX
MOVQ $0x00000028, CX
LEAQ 24(SP), DX
PXOR X0, X0
zero_loop_encodeSnappyBetterBlockAsm8B:
MOVOU X0, (DX)
MOVOU X0, 16(DX)
MOVOU X0, 32(DX)
MOVOU X0, 48(DX)
MOVOU X0, 64(DX)
MOVOU X0, 80(DX)
MOVOU X0, 96(DX)
MOVOU X0, 112(DX)
ADDQ $0x80, DX
DECQ CX
JNZ zero_loop_encodeSnappyBetterBlockAsm8B
MOVL $0x00000000, 12(SP)
MOVQ src_len+32(FP), CX
LEAQ -9(CX), DX
LEAQ -8(CX), SI
MOVL SI, 8(SP)
SHRQ $0x05, CX
SUBL CX, DX
LEAQ (AX)(DX*1), DX
MOVQ DX, (SP)
MOVL $0x00000001, CX
MOVL $0x00000000, 16(SP)
MOVQ src_base+24(FP), DX
search_loop_encodeSnappyBetterBlockAsm8B:
MOVL CX, SI
SUBL 12(SP), SI
SHRL $0x04, SI
LEAL 1(CX)(SI*1), SI
CMPL SI, 8(SP)
JGE emit_remainder_encodeSnappyBetterBlockAsm8B
MOVQ (DX)(CX*1), DI
MOVL SI, 20(SP)
MOVQ $0x0000cf1bbcdcbf9b, R9
MOVQ $0x9e3779b1, SI
MOVQ DI, R10
MOVQ DI, R11
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x36, R10
SHLQ $0x20, R11
IMULQ SI, R11
SHRQ $0x38, R11
MOVL 24(SP)(R10*4), SI
MOVL 4120(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
MOVL CX, 4120(SP)(R11*4)
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeSnappyBetterBlockAsm8B
CMPL (DX)(R8*1), DI
JEQ candidateS_match_encodeSnappyBetterBlockAsm8B
MOVL 20(SP), CX
JMP search_loop_encodeSnappyBetterBlockAsm8B
candidateS_match_encodeSnappyBetterBlockAsm8B:
SHRQ $0x08, DI
MOVQ DI, R10
SHLQ $0x10, R10
IMULQ R9, R10
SHRQ $0x36, R10
MOVL 24(SP)(R10*4), SI
INCL CX
MOVL CX, 24(SP)(R10*4)
CMPL (DX)(SI*1), DI
JEQ candidate_match_encodeSnappyBetterBlockAsm8B
DECL CX
MOVL R8, SI
candidate_match_encodeSnappyBetterBlockAsm8B:
MOVL 12(SP), DI
TESTL SI, SI
JZ match_extend_back_end_encodeSnappyBetterBlockAsm8B
match_extend_back_loop_encodeSnappyBetterBlockAsm8B:
CMPL CX, DI
JLE match_extend_back_end_encodeSnappyBetterBlockAsm8B
MOVB -1(DX)(SI*1), BL
MOVB -1(DX)(CX*1), R8
CMPB BL, R8
JNE match_extend_back_end_encodeSnappyBetterBlockAsm8B
LEAL -1(CX), CX
DECL SI
JZ match_extend_back_end_encodeSnappyBetterBlockAsm8B
JMP match_extend_back_loop_encodeSnappyBetterBlockAsm8B
match_extend_back_end_encodeSnappyBetterBlockAsm8B:
MOVL CX, DI
SUBL 12(SP), DI
LEAQ 3(AX)(DI*1), DI
CMPQ DI, (SP)
JL match_dst_size_check_encodeSnappyBetterBlockAsm8B
MOVQ $0x00000000, ret+48(FP)
RET
match_dst_size_check_encodeSnappyBetterBlockAsm8B:
MOVL CX, DI
ADDL $0x04, CX
ADDL $0x04, SI
MOVQ src_len+32(FP), R8
SUBL CX, R8
LEAQ (DX)(CX*1), R9
LEAQ (DX)(SI*1), R10
// matchLen
XORL R12, R12
CMPL R8, $0x08
JL matchlen_single_match_nolit_encodeSnappyBetterBlockAsm8B
matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm8B:
MOVQ (R9)(R12*1), R11
XORQ (R10)(R12*1), R11
TESTQ R11, R11
JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm8B
BSFQ R11, R11
SARQ $0x03, R11
LEAL (R12)(R11*1), R12
JMP match_nolit_end_encodeSnappyBetterBlockAsm8B
matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm8B:
LEAL -8(R8), R8
LEAL 8(R12), R12
CMPL R8, $0x08
JGE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm8B
matchlen_single_match_nolit_encodeSnappyBetterBlockAsm8B:
TESTL R8, R8
JZ match_nolit_end_encodeSnappyBetterBlockAsm8B
matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm8B:
MOVB (R9)(R12*1), R11
CMPB (R10)(R12*1), R11
JNE match_nolit_end_encodeSnappyBetterBlockAsm8B
LEAL 1(R12), R12
DECL R8
JNZ matchlen_single_loopback_match_nolit_encodeSnappyBetterBlockAsm8B
match_nolit_end_encodeSnappyBetterBlockAsm8B:
MOVL CX, R8
SUBL SI, R8
// Check if repeat
MOVL R8, 16(SP)
MOVL 12(SP), SI
CMPL SI, DI
JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B
MOVL DI, R9
MOVL DI, 12(SP)
LEAQ (DX)(SI*1), R10
SUBL SI, R9
LEAL -1(R9), SI
CMPL SI, $0x3c
JLT one_byte_match_emit_encodeSnappyBetterBlockAsm8B
CMPL SI, $0x00000100
JLT two_bytes_match_emit_encodeSnappyBetterBlockAsm8B
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm8B
two_bytes_match_emit_encodeSnappyBetterBlockAsm8B:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, AX
CMPL SI, $0x40
JL memmove_match_emit_encodeSnappyBetterBlockAsm8B
JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm8B
one_byte_match_emit_encodeSnappyBetterBlockAsm8B:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, AX
memmove_match_emit_encodeSnappyBetterBlockAsm8B:
LEAQ (AX)(R9*1), SI
// genMemMoveShort
CMPQ R9, $0x08
JLE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8
CMPQ R9, $0x10
JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16
CMPQ R9, $0x20
JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32
JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8:
MOVQ (R10), R11
MOVQ R11, (AX)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16:
MOVQ (R10), R11
MOVQ -8(R10)(R9*1), R10
MOVQ R11, (AX)
MOVQ R10, -8(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32:
MOVOU (R10), X0
MOVOU -16(R10)(R9*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(R9*1)
JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B
emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64:
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B:
MOVQ SI, AX
JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B
memmove_long_match_emit_encodeSnappyBetterBlockAsm8B:
LEAQ (AX)(R9*1), SI
// genMemMoveLong
MOVOU (R10), X0
MOVOU 16(R10), X1
MOVOU -32(R10)(R9*1), X2
MOVOU -16(R10)(R9*1), X3
MOVQ R9, R13
SHRQ $0x05, R13
MOVQ AX, R11
ANDL $0x0000001f, R11
MOVQ $0x00000040, R14
SUBQ R11, R14
DECQ R13
JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
LEAQ -32(R10)(R14*1), R11
LEAQ -32(AX)(R14*1), R15
emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back:
MOVOU (R11), X4
MOVOU 16(R11), X5
MOVOA X4, (R15)
MOVOA X5, 16(R15)
ADDQ $0x20, R15
ADDQ $0x20, R11
ADDQ $0x20, R14
DECQ R13
JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back
emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32:
MOVOU -32(R10)(R14*1), X4
MOVOU -16(R10)(R14*1), X5
MOVOA X4, -32(AX)(R14*1)
MOVOA X5, -16(AX)(R14*1)
ADDQ $0x20, R14
CMPQ R9, R14
JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(R9*1)
MOVOU X3, -16(AX)(R9*1)
MOVQ SI, AX
emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B:
ADDL R12, CX
ADDL $0x04, R12
MOVL CX, 12(SP)
// emitCopy
two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B:
CMPL R12, $0x40
JLE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B
MOVB $0xee, (AX)
MOVW R8, 1(AX)
LEAL -60(R12), R12
ADDQ $0x03, AX
JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B
two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B:
CMPL R12, $0x0c
JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B
MOVB $0x01, BL
LEAL -16(BX)(R12*4), R12
MOVB R8, 1(AX)
SHRL $0x08, R8
SHLL $0x05, R8
ORL R8, R12
MOVB R12, (AX)
ADDQ $0x02, AX
JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B
emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B:
MOVB $0x02, BL
LEAL -4(BX)(R12*4), R12
MOVB R12, (AX)
MOVW R8, 1(AX)
ADDQ $0x03, AX
match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B:
CMPL CX, 8(SP)
JGE emit_remainder_encodeSnappyBetterBlockAsm8B
CMPQ AX, (SP)
JL match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B
MOVQ $0x00000000, ret+48(FP)
RET
match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B:
MOVQ $0x0000cf1bbcdcbf9b, SI
MOVQ $0x9e3779b1, R8
INCL DI
MOVQ (DX)(DI*1), R9
MOVQ R9, R10
MOVQ R9, R11
MOVQ R9, R12
SHRQ $0x08, R11
MOVQ R11, R13
SHRQ $0x10, R12
LEAL 1(DI), R14
LEAL 2(DI), R15
MOVQ -2(DX)(CX*1), R9
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x36, R10
SHLQ $0x10, R13
IMULQ SI, R13
SHRQ $0x36, R13
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x38, R11
SHLQ $0x20, R12
IMULQ R8, R12
SHRQ $0x38, R12
MOVL DI, 24(SP)(R10*4)
MOVL R14, 24(SP)(R13*4)
MOVL R14, 4120(SP)(R11*4)
MOVL R15, 4120(SP)(R12*4)
MOVQ R9, R10
MOVQ R9, R11
SHRQ $0x08, R11
MOVQ R11, R13
LEAL -2(CX), R9
LEAL -1(CX), DI
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x36, R10
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x38, R11
SHLQ $0x10, R13
IMULQ SI, R13
SHRQ $0x36, R13
MOVL R9, 24(SP)(R10*4)
MOVL DI, 4120(SP)(R11*4)
MOVL DI, 24(SP)(R13*4)
JMP search_loop_encodeSnappyBetterBlockAsm8B
emit_remainder_encodeSnappyBetterBlockAsm8B:
MOVQ src_len+32(FP), CX
SUBL 12(SP), CX
LEAQ 3(AX)(CX*1), CX
CMPQ CX, (SP)
JL emit_remainder_ok_encodeSnappyBetterBlockAsm8B
MOVQ $0x00000000, ret+48(FP)
RET
emit_remainder_ok_encodeSnappyBetterBlockAsm8B:
MOVQ src_len+32(FP), CX
MOVL 12(SP), BX
CMPL BX, CX
JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B
MOVL CX, SI
MOVL CX, 12(SP)
LEAQ (DX)(BX*1), CX
SUBL BX, SI
LEAL -1(SI), DX
CMPL DX, $0x3c
JLT one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B
CMPL DX, $0x00000100
JLT two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B
MOVB $0xf4, (AX)
MOVW DX, 1(AX)
ADDQ $0x03, AX
JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B
two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B:
MOVB $0xf0, (AX)
MOVB DL, 1(AX)
ADDQ $0x02, AX
CMPL DX, $0x40
JL memmove_emit_remainder_encodeSnappyBetterBlockAsm8B
JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B
one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B:
SHLB $0x02, DL
MOVB DL, (AX)
ADDQ $0x01, AX
memmove_emit_remainder_encodeSnappyBetterBlockAsm8B:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveShort
CMPQ BX, $0x08
JLE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8
CMPQ BX, $0x10
JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8through16
CMPQ BX, $0x20
JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_17through32
JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8:
MOVQ (CX), SI
MOVQ SI, (AX)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8through16:
MOVQ (CX), SI
MOVQ -8(CX)(BX*1), CX
MOVQ SI, (AX)
MOVQ CX, -8(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_17through32:
MOVOU (CX), X0
MOVOU -16(CX)(BX*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(BX*1)
JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B:
MOVQ DX, AX
JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B
memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B:
LEAQ (AX)(SI*1), DX
MOVL SI, BX
// genMemMoveLong
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(BX*1), X2
MOVOU -16(CX)(BX*1), X3
MOVQ BX, DI
SHRQ $0x05, DI
MOVQ AX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
LEAQ -32(CX)(R8*1), SI
LEAQ -32(AX)(R8*1), R9
emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back
emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32:
MOVOU -32(CX)(R8*1), X4
MOVOU -16(CX)(R8*1), X5
MOVOA X4, -32(AX)(R8*1)
MOVOA X5, -16(AX)(R8*1)
ADDQ $0x20, R8
CMPQ BX, R8
JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(BX*1)
MOVOU X3, -16(AX)(BX*1)
MOVQ DX, AX
emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B:
MOVQ dst_base+0(FP), CX
SUBQ CX, AX
MOVQ AX, ret+48(FP)
RET
// func emitLiteral(dst []byte, lit []byte) int
// Requires: SSE2
TEXT ·emitLiteral(SB), NOSPLIT, $0-56
MOVQ lit_len+32(FP), DX
MOVQ dst_base+0(FP), AX
MOVQ lit_base+24(FP), CX
TESTQ DX, DX
JZ emit_literal_end_standalone_skip
MOVL DX, BX
LEAL -1(DX), SI
CMPL SI, $0x3c
JLT one_byte_standalone
CMPL SI, $0x00000100
JLT two_bytes_standalone
CMPL SI, $0x00010000
JLT three_bytes_standalone
CMPL SI, $0x01000000
JLT four_bytes_standalone
MOVB $0xfc, (AX)
MOVL SI, 1(AX)
ADDQ $0x05, BX
ADDQ $0x05, AX
JMP memmove_long_standalone
four_bytes_standalone:
MOVL SI, DI
SHRL $0x10, DI
MOVB $0xf8, (AX)
MOVW SI, 1(AX)
MOVB DI, 3(AX)
ADDQ $0x04, BX
ADDQ $0x04, AX
JMP memmove_long_standalone
three_bytes_standalone:
MOVB $0xf4, (AX)
MOVW SI, 1(AX)
ADDQ $0x03, BX
ADDQ $0x03, AX
JMP memmove_long_standalone
two_bytes_standalone:
MOVB $0xf0, (AX)
MOVB SI, 1(AX)
ADDQ $0x02, BX
ADDQ $0x02, AX
CMPL SI, $0x40
JL memmove_standalone
JMP memmove_long_standalone
one_byte_standalone:
SHLB $0x02, SI
MOVB SI, (AX)
ADDQ $0x01, BX
ADDQ $0x01, AX
memmove_standalone:
// genMemMoveShort
CMPQ DX, $0x03
JB emit_lit_memmove_standalone_memmove_move_1or2
JE emit_lit_memmove_standalone_memmove_move_3
CMPQ DX, $0x08
JB emit_lit_memmove_standalone_memmove_move_4through7
CMPQ DX, $0x10
JBE emit_lit_memmove_standalone_memmove_move_8through16
CMPQ DX, $0x20
JBE emit_lit_memmove_standalone_memmove_move_17through32
JMP emit_lit_memmove_standalone_memmove_move_33through64
emit_lit_memmove_standalone_memmove_move_1or2:
MOVB (CX), SI
MOVB -1(CX)(DX*1), CL
MOVB SI, (AX)
MOVB CL, -1(AX)(DX*1)
JMP emit_literal_end_standalone
emit_lit_memmove_standalone_memmove_move_3:
MOVW (CX), SI
MOVB 2(CX), CL
MOVW SI, (AX)
MOVB CL, 2(AX)
JMP emit_literal_end_standalone
emit_lit_memmove_standalone_memmove_move_4through7:
MOVL (CX), SI
MOVL -4(CX)(DX*1), CX
MOVL SI, (AX)
MOVL CX, -4(AX)(DX*1)
JMP emit_literal_end_standalone
emit_lit_memmove_standalone_memmove_move_8through16:
MOVQ (CX), SI
MOVQ -8(CX)(DX*1), CX
MOVQ SI, (AX)
MOVQ CX, -8(AX)(DX*1)
JMP emit_literal_end_standalone
emit_lit_memmove_standalone_memmove_move_17through32:
MOVOU (CX), X0
MOVOU -16(CX)(DX*1), X1
MOVOU X0, (AX)
MOVOU X1, -16(AX)(DX*1)
JMP emit_literal_end_standalone
emit_lit_memmove_standalone_memmove_move_33through64:
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(DX*1), X2
MOVOU -16(CX)(DX*1), X3
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(DX*1)
MOVOU X3, -16(AX)(DX*1)
JMP emit_literal_end_standalone
JMP emit_literal_end_standalone
memmove_long_standalone:
// genMemMoveLong
MOVOU (CX), X0
MOVOU 16(CX), X1
MOVOU -32(CX)(DX*1), X2
MOVOU -16(CX)(DX*1), X3
MOVQ DX, DI
SHRQ $0x05, DI
MOVQ AX, SI
ANDL $0x0000001f, SI
MOVQ $0x00000040, R8
SUBQ SI, R8
DECQ DI
JA emit_lit_memmove_long_standalonelarge_forward_sse_loop_32
LEAQ -32(CX)(R8*1), SI
LEAQ -32(AX)(R8*1), R9
emit_lit_memmove_long_standalonelarge_big_loop_back:
MOVOU (SI), X4
MOVOU 16(SI), X5
MOVOA X4, (R9)
MOVOA X5, 16(R9)
ADDQ $0x20, R9
ADDQ $0x20, SI
ADDQ $0x20, R8
DECQ DI
JNA emit_lit_memmove_long_standalonelarge_big_loop_back
emit_lit_memmove_long_standalonelarge_forward_sse_loop_32:
MOVOU -32(CX)(R8*1), X4
MOVOU -16(CX)(R8*1), X5
MOVOA X4, -32(AX)(R8*1)
MOVOA X5, -16(AX)(R8*1)
ADDQ $0x20, R8
CMPQ DX, R8
JAE emit_lit_memmove_long_standalonelarge_forward_sse_loop_32
MOVOU X0, (AX)
MOVOU X1, 16(AX)
MOVOU X2, -32(AX)(DX*1)
MOVOU X3, -16(AX)(DX*1)
JMP emit_literal_end_standalone
JMP emit_literal_end_standalone
emit_literal_end_standalone_skip:
XORQ BX, BX
emit_literal_end_standalone:
MOVQ BX, ret+48(FP)
RET
// func emitRepeat(dst []byte, offset int, length int) int
TEXT ·emitRepeat(SB), NOSPLIT, $0-48
XORQ BX, BX
MOVQ dst_base+0(FP), AX
MOVQ offset+24(FP), CX
MOVQ length+32(FP), DX
// emitRepeat
emit_repeat_again_standalone:
MOVL DX, SI
LEAL -4(DX), DX
CMPL SI, $0x08
JLE repeat_two_standalone
CMPL SI, $0x0c
JGE cant_repeat_two_offset_standalone
CMPL CX, $0x00000800
JLT repeat_two_offset_standalone
cant_repeat_two_offset_standalone:
CMPL DX, $0x00000104
JLT repeat_three_standalone
CMPL DX, $0x00010100
JLT repeat_four_standalone
CMPL DX, $0x0100ffff
JLT repeat_five_standalone
LEAL -16842747(DX), DX
MOVW $0x001d, (AX)
MOVW $0xfffb, 2(AX)
MOVB $0xff, 4(AX)
ADDQ $0x05, AX
ADDQ $0x05, BX
JMP emit_repeat_again_standalone
repeat_five_standalone:
LEAL -65536(DX), DX
MOVL DX, CX
MOVW $0x001d, (AX)
MOVW DX, 2(AX)
SARL $0x10, CX
MOVB CL, 4(AX)
ADDQ $0x05, BX
ADDQ $0x05, AX
JMP gen_emit_repeat_end
repeat_four_standalone:
LEAL -256(DX), DX
MOVW $0x0019, (AX)
MOVW DX, 2(AX)
ADDQ $0x04, BX
ADDQ $0x04, AX
JMP gen_emit_repeat_end
repeat_three_standalone:
LEAL -4(DX), DX
MOVW $0x0015, (AX)
MOVB DL, 2(AX)
ADDQ $0x03, BX
ADDQ $0x03, AX
JMP gen_emit_repeat_end
repeat_two_standalone:
SHLL $0x02, DX
ORL $0x01, DX
MOVW DX, (AX)
ADDQ $0x02, BX
ADDQ $0x02, AX
JMP gen_emit_repeat_end
repeat_two_offset_standalone:
XORQ SI, SI
LEAL 1(SI)(DX*4), DX
MOVB CL, 1(AX)
SARL $0x08, CX
SHLL $0x05, CX
ORL CX, DX
MOVB DL, (AX)
ADDQ $0x02, BX
ADDQ $0x02, AX
gen_emit_repeat_end:
MOVQ BX, ret+40(FP)
RET
// func emitCopy(dst []byte, offset int, length int) int
TEXT ·emitCopy(SB), NOSPLIT, $0-48
XORQ BX, BX
MOVQ dst_base+0(FP), AX
MOVQ offset+24(FP), CX
MOVQ length+32(FP), DX
// emitCopy
CMPL CX, $0x00010000
JL two_byte_offset_standalone
four_bytes_loop_back_standalone:
CMPL DX, $0x40
JLE four_bytes_remain_standalone
MOVB $0xff, (AX)
MOVL CX, 1(AX)
LEAL -64(DX), DX
ADDQ $0x05, BX
ADDQ $0x05, AX
CMPL DX, $0x04
JL four_bytes_remain_standalone
// emitRepeat
emit_repeat_again_standalone_emit_copy:
MOVL DX, SI
LEAL -4(DX), DX
CMPL SI, $0x08
JLE repeat_two_standalone_emit_copy
CMPL SI, $0x0c
JGE cant_repeat_two_offset_standalone_emit_copy
CMPL CX, $0x00000800
JLT repeat_two_offset_standalone_emit_copy
cant_repeat_two_offset_standalone_emit_copy:
CMPL DX, $0x00000104
JLT repeat_three_standalone_emit_copy
CMPL DX, $0x00010100
JLT repeat_four_standalone_emit_copy
CMPL DX, $0x0100ffff
JLT repeat_five_standalone_emit_copy
LEAL -16842747(DX), DX
MOVW $0x001d, (AX)
MOVW $0xfffb, 2(AX)
MOVB $0xff, 4(AX)
ADDQ $0x05, AX
ADDQ $0x05, BX
JMP emit_repeat_again_standalone_emit_copy
repeat_five_standalone_emit_copy:
LEAL -65536(DX), DX
MOVL DX, CX
MOVW $0x001d, (AX)
MOVW DX, 2(AX)
SARL $0x10, CX
MOVB CL, 4(AX)
ADDQ $0x05, BX
ADDQ $0x05, AX
JMP gen_emit_copy_end
repeat_four_standalone_emit_copy:
LEAL -256(DX), DX
MOVW $0x0019, (AX)
MOVW DX, 2(AX)
ADDQ $0x04, BX
ADDQ $0x04, AX
JMP gen_emit_copy_end
repeat_three_standalone_emit_copy:
LEAL -4(DX), DX
MOVW $0x0015, (AX)
MOVB DL, 2(AX)
ADDQ $0x03, BX
ADDQ $0x03, AX
JMP gen_emit_copy_end
repeat_two_standalone_emit_copy:
SHLL $0x02, DX
ORL $0x01, DX
MOVW DX, (AX)
ADDQ $0x02, BX
ADDQ $0x02, AX
JMP gen_emit_copy_end
repeat_two_offset_standalone_emit_copy:
XORQ SI, SI
LEAL 1(SI)(DX*4), DX
MOVB CL, 1(AX)
SARL $0x08, CX
SHLL $0x05, CX
ORL CX, DX
MOVB DL, (AX)
ADDQ $0x02, BX
ADDQ $0x02, AX
JMP gen_emit_copy_end
JMP four_bytes_loop_back_standalone
four_bytes_remain_standalone:
TESTL DX, DX
JZ gen_emit_copy_end
MOVB $0x03, SI
LEAL -4(SI)(DX*4), DX
MOVB DL, (AX)
MOVL CX, 1(AX)
ADDQ $0x05, BX
ADDQ $0x05, AX
JMP gen_emit_copy_end
two_byte_offset_standalone:
CMPL DX, $0x40
JLE two_byte_offset_short_standalone
MOVB $0xee, (AX)
MOVW CX, 1(AX)
LEAL -60(DX), DX
ADDQ $0x03, AX
ADDQ $0x03, BX
// emitRepeat
emit_repeat_again_standalone_emit_copy_short:
MOVL DX, SI
LEAL -4(DX), DX
CMPL SI, $0x08
JLE repeat_two_standalone_emit_copy_short
CMPL SI, $0x0c
JGE cant_repeat_two_offset_standalone_emit_copy_short
CMPL CX, $0x00000800
JLT repeat_two_offset_standalone_emit_copy_short
cant_repeat_two_offset_standalone_emit_copy_short:
CMPL DX, $0x00000104
JLT repeat_three_standalone_emit_copy_short
CMPL DX, $0x00010100
JLT repeat_four_standalone_emit_copy_short
CMPL DX, $0x0100ffff
JLT repeat_five_standalone_emit_copy_short
LEAL -16842747(DX), DX
MOVW $0x001d, (AX)
MOVW $0xfffb, 2(AX)
MOVB $0xff, 4(AX)
ADDQ $0x05, AX
ADDQ $0x05, BX
JMP emit_repeat_again_standalone_emit_copy_short
repeat_five_standalone_emit_copy_short:
LEAL -65536(DX), DX
MOVL DX, CX
MOVW $0x001d, (AX)
MOVW DX, 2(AX)
SARL $0x10, CX
MOVB CL, 4(AX)
ADDQ $0x05, BX
ADDQ $0x05, AX
JMP gen_emit_copy_end
repeat_four_standalone_emit_copy_short:
LEAL -256(DX), DX
MOVW $0x0019, (AX)
MOVW DX, 2(AX)
ADDQ $0x04, BX
ADDQ $0x04, AX
JMP gen_emit_copy_end
repeat_three_standalone_emit_copy_short:
LEAL -4(DX), DX
MOVW $0x0015, (AX)
MOVB DL, 2(AX)
ADDQ $0x03, BX
ADDQ $0x03, AX
JMP gen_emit_copy_end
repeat_two_standalone_emit_copy_short:
SHLL $0x02, DX
ORL $0x01, DX
MOVW DX, (AX)
ADDQ $0x02, BX
ADDQ $0x02, AX
JMP gen_emit_copy_end
repeat_two_offset_standalone_emit_copy_short:
XORQ SI, SI
LEAL 1(SI)(DX*4), DX
MOVB CL, 1(AX)
SARL $0x08, CX
SHLL $0x05, CX
ORL CX, DX
MOVB DL, (AX)
ADDQ $0x02, BX
ADDQ $0x02, AX
JMP gen_emit_copy_end
JMP two_byte_offset_standalone
two_byte_offset_short_standalone:
CMPL DX, $0x0c
JGE emit_copy_three_standalone
CMPL CX, $0x00000800
JGE emit_copy_three_standalone
MOVB $0x01, SI
LEAL -16(SI)(DX*4), DX
MOVB CL, 1(AX)
SHRL $0x08, CX
SHLL $0x05, CX
ORL CX, DX
MOVB DL, (AX)
ADDQ $0x02, BX
ADDQ $0x02, AX
JMP gen_emit_copy_end
emit_copy_three_standalone:
MOVB $0x02, SI
LEAL -4(SI)(DX*4), DX
MOVB DL, (AX)
MOVW CX, 1(AX)
ADDQ $0x03, BX
ADDQ $0x03, AX
gen_emit_copy_end:
MOVQ BX, ret+40(FP)
RET
// func emitCopyNoRepeat(dst []byte, offset int, length int) int
TEXT ·emitCopyNoRepeat(SB), NOSPLIT, $0-48
XORQ BX, BX
MOVQ dst_base+0(FP), AX
MOVQ offset+24(FP), CX
MOVQ length+32(FP), DX
// emitCopy
CMPL CX, $0x00010000
JL two_byte_offset_standalone_snappy
four_bytes_loop_back_standalone_snappy:
CMPL DX, $0x40
JLE four_bytes_remain_standalone_snappy
MOVB $0xff, (AX)
MOVL CX, 1(AX)
LEAL -64(DX), DX
ADDQ $0x05, BX
ADDQ $0x05, AX
CMPL DX, $0x04
JL four_bytes_remain_standalone_snappy
JMP four_bytes_loop_back_standalone_snappy
four_bytes_remain_standalone_snappy:
TESTL DX, DX
JZ gen_emit_copy_end_snappy
MOVB $0x03, SI
LEAL -4(SI)(DX*4), DX
MOVB DL, (AX)
MOVL CX, 1(AX)
ADDQ $0x05, BX
ADDQ $0x05, AX
JMP gen_emit_copy_end_snappy
two_byte_offset_standalone_snappy:
CMPL DX, $0x40
JLE two_byte_offset_short_standalone_snappy
MOVB $0xee, (AX)
MOVW CX, 1(AX)
LEAL -60(DX), DX
ADDQ $0x03, AX
ADDQ $0x03, BX
JMP two_byte_offset_standalone_snappy
two_byte_offset_short_standalone_snappy:
CMPL DX, $0x0c
JGE emit_copy_three_standalone_snappy
CMPL CX, $0x00000800
JGE emit_copy_three_standalone_snappy
MOVB $0x01, SI
LEAL -16(SI)(DX*4), DX
MOVB CL, 1(AX)
SHRL $0x08, CX
SHLL $0x05, CX
ORL CX, DX
MOVB DL, (AX)
ADDQ $0x02, BX
ADDQ $0x02, AX
JMP gen_emit_copy_end_snappy
emit_copy_three_standalone_snappy:
MOVB $0x02, SI
LEAL -4(SI)(DX*4), DX
MOVB DL, (AX)
MOVW CX, 1(AX)
ADDQ $0x03, BX
ADDQ $0x03, AX
gen_emit_copy_end_snappy:
MOVQ BX, ret+40(FP)
RET
// func matchLen(a []byte, b []byte) int
TEXT ·matchLen(SB), NOSPLIT, $0-56
MOVQ a_base+0(FP), AX
MOVQ b_base+24(FP), CX
MOVQ a_len+8(FP), DX
// matchLen
XORL SI, SI
CMPL DX, $0x08
JL matchlen_single_standalone
matchlen_loopback_standalone:
MOVQ (AX)(SI*1), BX
XORQ (CX)(SI*1), BX
TESTQ BX, BX
JZ matchlen_loop_standalone
BSFQ BX, BX
SARQ $0x03, BX
LEAL (SI)(BX*1), SI
JMP gen_match_len_end
matchlen_loop_standalone:
LEAL -8(DX), DX
LEAL 8(SI), SI
CMPL DX, $0x08
JGE matchlen_loopback_standalone
matchlen_single_standalone:
TESTL DX, DX
JZ gen_match_len_end
matchlen_single_loopback_standalone:
MOVB (AX)(SI*1), BL
CMPB (CX)(SI*1), BL
JNE gen_match_len_end
LEAL 1(SI), SI
DECL DX
JNZ matchlen_single_loopback_standalone
gen_match_len_end:
MOVQ SI, ret+48(FP)
RET