mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2024-07-01 19:40:33 +00:00
98263a7de6
* start fixing up tests * fix up tests + automate with drone * fiddle with linting * messing about with drone.yml * some more fiddling * hmmm * add cache * add vendor directory * verbose * ci updates * update some little things * update sig
278 lines
6.7 KiB
Go
278 lines
6.7 KiB
Go
package iptc
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"strings"
|
|
"unicode"
|
|
|
|
"encoding/binary"
|
|
|
|
"github.com/dsoprea/go-logging"
|
|
)
|
|
|
|
var (
|
|
// TODO(dustin): We're still not sure if this is the right endianness. No search to IPTC or IIM seems to state one or the other.
|
|
|
|
// DefaultEncoding is the standard encoding for the IPTC format.
|
|
defaultEncoding = binary.BigEndian
|
|
)
|
|
|
|
var (
|
|
// ErrInvalidTagMarker indicates that the tag can not be parsed because the
|
|
// tag boundary marker is not the expected value.
|
|
ErrInvalidTagMarker = errors.New("invalid tag marker")
|
|
)
|
|
|
|
// Tag describes one tag read from the stream.
|
|
type Tag struct {
|
|
recordNumber uint8
|
|
datasetNumber uint8
|
|
dataSize uint64
|
|
}
|
|
|
|
// String expresses state as a string.
|
|
func (tag *Tag) String() string {
|
|
return fmt.Sprintf(
|
|
"Tag<DATASET=(%d:%d) DATA-SIZE=(%d)>",
|
|
tag.recordNumber, tag.datasetNumber, tag.dataSize)
|
|
}
|
|
|
|
// DecodeTag parses one tag from the stream.
|
|
func DecodeTag(r io.Reader) (tag Tag, err error) {
|
|
defer func() {
|
|
if state := recover(); state != nil {
|
|
err = log.Wrap(state.(error))
|
|
}
|
|
}()
|
|
|
|
tagMarker := uint8(0)
|
|
err = binary.Read(r, defaultEncoding, &tagMarker)
|
|
if err != nil {
|
|
if err == io.EOF {
|
|
return tag, err
|
|
}
|
|
|
|
log.Panic(err)
|
|
}
|
|
|
|
if tagMarker != 0x1c {
|
|
return tag, ErrInvalidTagMarker
|
|
}
|
|
|
|
recordNumber := uint8(0)
|
|
err = binary.Read(r, defaultEncoding, &recordNumber)
|
|
log.PanicIf(err)
|
|
|
|
datasetNumber := uint8(0)
|
|
err = binary.Read(r, defaultEncoding, &datasetNumber)
|
|
log.PanicIf(err)
|
|
|
|
dataSize16Raw := uint16(0)
|
|
err = binary.Read(r, defaultEncoding, &dataSize16Raw)
|
|
log.PanicIf(err)
|
|
|
|
var dataSize uint64
|
|
|
|
if dataSize16Raw < 32768 {
|
|
// We only had 16-bits (has the MSB set to (0)).
|
|
dataSize = uint64(dataSize16Raw)
|
|
} else {
|
|
// This field is just the length of the length (has the MSB set to (1)).
|
|
|
|
// Clear the MSB.
|
|
lengthLength := dataSize16Raw & 32767
|
|
|
|
if lengthLength == 4 {
|
|
dataSize32Raw := uint32(0)
|
|
err := binary.Read(r, defaultEncoding, &dataSize32Raw)
|
|
log.PanicIf(err)
|
|
|
|
dataSize = uint64(dataSize32Raw)
|
|
} else if lengthLength == 8 {
|
|
err := binary.Read(r, defaultEncoding, &dataSize)
|
|
log.PanicIf(err)
|
|
} else {
|
|
// No specific sizes or limits are specified in the specification
|
|
// so we need to impose our own limits in order to implement.
|
|
|
|
log.Panicf("extended data-set tag size is not supported: (%d)", lengthLength)
|
|
}
|
|
}
|
|
|
|
tag = Tag{
|
|
recordNumber: recordNumber,
|
|
datasetNumber: datasetNumber,
|
|
dataSize: dataSize,
|
|
}
|
|
|
|
return tag, nil
|
|
}
|
|
|
|
// StreamTagKey is a convenience type that lets us key our index with a high-
|
|
// level type.
|
|
type StreamTagKey struct {
|
|
// RecordNumber is the major classification of the dataset.
|
|
RecordNumber uint8
|
|
|
|
// DatasetNumber is the minor classification of the dataset.
|
|
DatasetNumber uint8
|
|
}
|
|
|
|
// String returns a descriptive string.
|
|
func (stk StreamTagKey) String() string {
|
|
return fmt.Sprintf("%d:%d", stk.RecordNumber, stk.DatasetNumber)
|
|
}
|
|
|
|
// TagData is a convenience wrapper around a byte-slice.
|
|
type TagData []byte
|
|
|
|
// IsPrintable returns true if all characters are printable.
|
|
func (tg TagData) IsPrintable() bool {
|
|
for _, b := range tg {
|
|
r := rune(b)
|
|
|
|
// Newline characters aren't considered printable.
|
|
if r == 0x0d || r == 0x0a {
|
|
continue
|
|
}
|
|
|
|
if unicode.IsGraphic(r) == false || unicode.IsPrint(r) == false {
|
|
return false
|
|
}
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
// String returns a descriptive string. If the data doesn't include any non-
|
|
// printable characters, it will include the value itself.
|
|
func (tg TagData) String() string {
|
|
if tg.IsPrintable() == true {
|
|
return string(tg)
|
|
}
|
|
|
|
return fmt.Sprintf("BINARY<(%d) bytes>", len(tg))
|
|
}
|
|
|
|
// ParsedTags is the complete, unordered set of tags parsed from the stream.
|
|
type ParsedTags map[StreamTagKey][]TagData
|
|
|
|
// ParseStream parses a serial sequence of tags and tag data out of the stream.
|
|
func ParseStream(r io.Reader) (tags map[StreamTagKey][]TagData, err error) {
|
|
defer func() {
|
|
if state := recover(); state != nil {
|
|
err = log.Wrap(state.(error))
|
|
}
|
|
}()
|
|
|
|
tags = make(ParsedTags)
|
|
|
|
for {
|
|
tag, err := DecodeTag(r)
|
|
if err != nil {
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
|
|
log.Panic(err)
|
|
}
|
|
|
|
raw := make([]byte, tag.dataSize)
|
|
|
|
_, err = io.ReadFull(r, raw)
|
|
log.PanicIf(err)
|
|
|
|
data := TagData(raw)
|
|
|
|
stk := StreamTagKey{
|
|
RecordNumber: tag.recordNumber,
|
|
DatasetNumber: tag.datasetNumber,
|
|
}
|
|
|
|
if existing, found := tags[stk]; found == true {
|
|
tags[stk] = append(existing, data)
|
|
} else {
|
|
tags[stk] = []TagData{data}
|
|
}
|
|
}
|
|
|
|
return tags, nil
|
|
}
|
|
|
|
// GetSimpleDictionaryFromParsedTags returns a dictionary of tag names to tag
|
|
// values, where all values are strings and any tag that had a non-printable
|
|
// value is omitted. We will also only return the first value, therefore
|
|
// dropping any follow-up values for repeatable tags. This will ignore non-
|
|
// standard tags. This will trim whitespace from the ends of strings.
|
|
//
|
|
// This is a convenience function for quickly displaying only the summary IPTC
|
|
// metadata that a user might actually be interested in at first glance.
|
|
func GetSimpleDictionaryFromParsedTags(pt ParsedTags) (distilled map[string]string) {
|
|
distilled = make(map[string]string)
|
|
|
|
for stk, dataSlice := range pt {
|
|
sti, err := GetTagInfo(int(stk.RecordNumber), int(stk.DatasetNumber))
|
|
if err != nil {
|
|
if err == ErrTagNotStandard {
|
|
continue
|
|
} else {
|
|
log.Panic(err)
|
|
}
|
|
}
|
|
|
|
data := dataSlice[0]
|
|
|
|
if data.IsPrintable() == false {
|
|
continue
|
|
}
|
|
|
|
// TODO(dustin): Trim leading whitespace, too.
|
|
distilled[sti.Description] = strings.Trim(string(data), "\r\n")
|
|
}
|
|
|
|
return distilled
|
|
}
|
|
|
|
// GetDictionaryFromParsedTags returns all tags. It will keep non-printable
|
|
// values, though will not print a placeholder instead. This will keep non-
|
|
// standard tags (and print the fully-qualified dataset ID rather than the
|
|
// name). It will keep repeated values (with the counter value appended to the
|
|
// end).
|
|
func GetDictionaryFromParsedTags(pt ParsedTags) (distilled map[string]string) {
|
|
distilled = make(map[string]string)
|
|
for stk, dataSlice := range pt {
|
|
var keyPhrase string
|
|
|
|
sti, err := GetTagInfo(int(stk.RecordNumber), int(stk.DatasetNumber))
|
|
if err != nil {
|
|
if err == ErrTagNotStandard {
|
|
keyPhrase = fmt.Sprintf("%s (not a standard tag)", stk.String())
|
|
} else {
|
|
log.Panic(err)
|
|
}
|
|
} else {
|
|
keyPhrase = sti.Description
|
|
}
|
|
|
|
for i, data := range dataSlice {
|
|
currentKeyPhrase := keyPhrase
|
|
if len(dataSlice) > 1 {
|
|
currentKeyPhrase = fmt.Sprintf("%s (%d)", currentKeyPhrase, i+1)
|
|
}
|
|
|
|
var presentable string
|
|
if data.IsPrintable() == false {
|
|
presentable = fmt.Sprintf("[BINARY] %s", DumpBytesToString(data))
|
|
} else {
|
|
presentable = string(data)
|
|
}
|
|
|
|
distilled[currentKeyPhrase] = presentable
|
|
}
|
|
}
|
|
|
|
return distilled
|
|
}
|