// Copyright 2011 The Snappy-Go Authors. All rights reserved. // Copyright (c) 2019 Klaus Post. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package s2 import ( "encoding/binary" "math" "math/bits" ) // Encode returns the encoded form of src. The returned slice may be a sub- // slice of dst if dst was large enough to hold the entire encoded block. // Otherwise, a newly allocated slice will be returned. // // The dst and src must not overlap. It is valid to pass a nil dst. // // The blocks will require the same amount of memory to decode as encoding, // and does not make for concurrent decoding. // Also note that blocks do not contain CRC information, so corruption may be undetected. // // If you need to encode larger amounts of data, consider using // the streaming interface which gives all of these features. func Encode(dst, src []byte) []byte { if n := MaxEncodedLen(len(src)); n < 0 { panic(ErrTooLarge) } else if cap(dst) < n { dst = make([]byte, n) } else { dst = dst[:n] } // The block starts with the varint-encoded length of the decompressed bytes. d := binary.PutUvarint(dst, uint64(len(src))) if len(src) == 0 { return dst[:d] } if len(src) < minNonLiteralBlockSize { d += emitLiteral(dst[d:], src) return dst[:d] } n := encodeBlock(dst[d:], src) if n > 0 { d += n return dst[:d] } // Not compressible d += emitLiteral(dst[d:], src) return dst[:d] } // EstimateBlockSize will perform a very fast compression // without outputting the result and return the compressed output size. // The function returns -1 if no improvement could be achieved. // Using actual compression will most often produce better compression than the estimate. func EstimateBlockSize(src []byte) (d int) { if len(src) < 6 || int64(len(src)) > 0xffffffff { return -1 } if len(src) <= 1024 { d = calcBlockSizeSmall(src) } else { d = calcBlockSize(src) } if d == 0 { return -1 } // Size of the varint encoded block size. d += (bits.Len64(uint64(len(src))) + 7) / 7 if d >= len(src) { return -1 } return d } // EncodeBetter returns the encoded form of src. The returned slice may be a sub- // slice of dst if dst was large enough to hold the entire encoded block. // Otherwise, a newly allocated slice will be returned. // // EncodeBetter compresses better than Encode but typically with a // 10-40% speed decrease on both compression and decompression. // // The dst and src must not overlap. It is valid to pass a nil dst. // // The blocks will require the same amount of memory to decode as encoding, // and does not make for concurrent decoding. // Also note that blocks do not contain CRC information, so corruption may be undetected. // // If you need to encode larger amounts of data, consider using // the streaming interface which gives all of these features. func EncodeBetter(dst, src []byte) []byte { if n := MaxEncodedLen(len(src)); n < 0 { panic(ErrTooLarge) } else if len(dst) < n { dst = make([]byte, n) } // The block starts with the varint-encoded length of the decompressed bytes. d := binary.PutUvarint(dst, uint64(len(src))) if len(src) == 0 { return dst[:d] } if len(src) < minNonLiteralBlockSize { d += emitLiteral(dst[d:], src) return dst[:d] } n := encodeBlockBetter(dst[d:], src) if n > 0 { d += n return dst[:d] } // Not compressible d += emitLiteral(dst[d:], src) return dst[:d] } // EncodeBest returns the encoded form of src. The returned slice may be a sub- // slice of dst if dst was large enough to hold the entire encoded block. // Otherwise, a newly allocated slice will be returned. // // EncodeBest compresses as good as reasonably possible but with a // big speed decrease. // // The dst and src must not overlap. It is valid to pass a nil dst. // // The blocks will require the same amount of memory to decode as encoding, // and does not make for concurrent decoding. // Also note that blocks do not contain CRC information, so corruption may be undetected. // // If you need to encode larger amounts of data, consider using // the streaming interface which gives all of these features. func EncodeBest(dst, src []byte) []byte { if n := MaxEncodedLen(len(src)); n < 0 { panic(ErrTooLarge) } else if len(dst) < n { dst = make([]byte, n) } // The block starts with the varint-encoded length of the decompressed bytes. d := binary.PutUvarint(dst, uint64(len(src))) if len(src) == 0 { return dst[:d] } if len(src) < minNonLiteralBlockSize { d += emitLiteral(dst[d:], src) return dst[:d] } n := encodeBlockBest(dst[d:], src, nil) if n > 0 { d += n return dst[:d] } // Not compressible d += emitLiteral(dst[d:], src) return dst[:d] } // EncodeSnappy returns the encoded form of src. The returned slice may be a sub- // slice of dst if dst was large enough to hold the entire encoded block. // Otherwise, a newly allocated slice will be returned. // // The output is Snappy compatible and will likely decompress faster. // // The dst and src must not overlap. It is valid to pass a nil dst. // // The blocks will require the same amount of memory to decode as encoding, // and does not make for concurrent decoding. // Also note that blocks do not contain CRC information, so corruption may be undetected. // // If you need to encode larger amounts of data, consider using // the streaming interface which gives all of these features. func EncodeSnappy(dst, src []byte) []byte { if n := MaxEncodedLen(len(src)); n < 0 { panic(ErrTooLarge) } else if cap(dst) < n { dst = make([]byte, n) } else { dst = dst[:n] } // The block starts with the varint-encoded length of the decompressed bytes. d := binary.PutUvarint(dst, uint64(len(src))) if len(src) == 0 { return dst[:d] } if len(src) < minNonLiteralBlockSize { d += emitLiteral(dst[d:], src) return dst[:d] } n := encodeBlockSnappy(dst[d:], src) if n > 0 { d += n return dst[:d] } // Not compressible d += emitLiteral(dst[d:], src) return dst[:d] } // EncodeSnappyBetter returns the encoded form of src. The returned slice may be a sub- // slice of dst if dst was large enough to hold the entire encoded block. // Otherwise, a newly allocated slice will be returned. // // The output is Snappy compatible and will likely decompress faster. // // The dst and src must not overlap. It is valid to pass a nil dst. // // The blocks will require the same amount of memory to decode as encoding, // and does not make for concurrent decoding. // Also note that blocks do not contain CRC information, so corruption may be undetected. // // If you need to encode larger amounts of data, consider using // the streaming interface which gives all of these features. func EncodeSnappyBetter(dst, src []byte) []byte { if n := MaxEncodedLen(len(src)); n < 0 { panic(ErrTooLarge) } else if cap(dst) < n { dst = make([]byte, n) } else { dst = dst[:n] } // The block starts with the varint-encoded length of the decompressed bytes. d := binary.PutUvarint(dst, uint64(len(src))) if len(src) == 0 { return dst[:d] } if len(src) < minNonLiteralBlockSize { d += emitLiteral(dst[d:], src) return dst[:d] } n := encodeBlockBetterSnappy(dst[d:], src) if n > 0 { d += n return dst[:d] } // Not compressible d += emitLiteral(dst[d:], src) return dst[:d] } // EncodeSnappyBest returns the encoded form of src. The returned slice may be a sub- // slice of dst if dst was large enough to hold the entire encoded block. // Otherwise, a newly allocated slice will be returned. // // The output is Snappy compatible and will likely decompress faster. // // The dst and src must not overlap. It is valid to pass a nil dst. // // The blocks will require the same amount of memory to decode as encoding, // and does not make for concurrent decoding. // Also note that blocks do not contain CRC information, so corruption may be undetected. // // If you need to encode larger amounts of data, consider using // the streaming interface which gives all of these features. func EncodeSnappyBest(dst, src []byte) []byte { if n := MaxEncodedLen(len(src)); n < 0 { panic(ErrTooLarge) } else if cap(dst) < n { dst = make([]byte, n) } else { dst = dst[:n] } // The block starts with the varint-encoded length of the decompressed bytes. d := binary.PutUvarint(dst, uint64(len(src))) if len(src) == 0 { return dst[:d] } if len(src) < minNonLiteralBlockSize { d += emitLiteral(dst[d:], src) return dst[:d] } n := encodeBlockBestSnappy(dst[d:], src) if n > 0 { d += n return dst[:d] } // Not compressible d += emitLiteral(dst[d:], src) return dst[:d] } // ConcatBlocks will concatenate the supplied blocks and append them to the supplied destination. // If the destination is nil or too small, a new will be allocated. // The blocks are not validated, so garbage in = garbage out. // dst may not overlap block data. // Any data in dst is preserved as is, so it will not be considered a block. func ConcatBlocks(dst []byte, blocks ...[]byte) ([]byte, error) { totalSize := uint64(0) compSize := 0 for _, b := range blocks { l, hdr, err := decodedLen(b) if err != nil { return nil, err } totalSize += uint64(l) compSize += len(b) - hdr } if totalSize == 0 { dst = append(dst, 0) return dst, nil } if totalSize > math.MaxUint32 { return nil, ErrTooLarge } var tmp [binary.MaxVarintLen32]byte hdrSize := binary.PutUvarint(tmp[:], totalSize) wantSize := hdrSize + compSize if cap(dst)-len(dst) < wantSize { dst = append(make([]byte, 0, wantSize+len(dst)), dst...) } dst = append(dst, tmp[:hdrSize]...) for _, b := range blocks { _, hdr, err := decodedLen(b) if err != nil { return nil, err } dst = append(dst, b[hdr:]...) } return dst, nil } // inputMargin is the minimum number of extra input bytes to keep, inside // encodeBlock's inner loop. On some architectures, this margin lets us // implement a fast path for emitLiteral, where the copy of short (<= 16 byte) // literals can be implemented as a single load to and store from a 16-byte // register. That literal's actual length can be as short as 1 byte, so this // can copy up to 15 bytes too much, but that's OK as subsequent iterations of // the encoding loop will fix up the copy overrun, and this inputMargin ensures // that we don't overrun the dst and src buffers. const inputMargin = 8 // minNonLiteralBlockSize is the minimum size of the input to encodeBlock that // will be accepted by the encoder. const minNonLiteralBlockSize = 32 const intReduction = 2 - (1 << (^uint(0) >> 63)) // 1 (32 bits) or 0 (64 bits) // MaxBlockSize is the maximum value where MaxEncodedLen will return a valid block size. // Blocks this big are highly discouraged, though. // Half the size on 32 bit systems. const MaxBlockSize = (1<<(32-intReduction) - 1) - binary.MaxVarintLen32 - 5 // MaxEncodedLen returns the maximum length of a snappy block, given its // uncompressed length. // // It will return a negative value if srcLen is too large to encode. // 32 bit platforms will have lower thresholds for rejecting big content. func MaxEncodedLen(srcLen int) int { n := uint64(srcLen) if intReduction == 1 { // 32 bits if n > math.MaxInt32 { // Also includes negative. return -1 } } else if n > 0xffffffff { // 64 bits // Also includes negative. return -1 } // Size of the varint encoded block size. n = n + uint64((bits.Len64(n)+7)/7) // Add maximum size of encoding block as literals. n += uint64(literalExtraSize(int64(srcLen))) if intReduction == 1 { // 32 bits if n > math.MaxInt32 { return -1 } } else if n > 0xffffffff { // 64 bits // Also includes negative. return -1 } return int(n) }