gotosocial/vendor/github.com/dsoprea/go-iptc/tag.go
Tobi Smethurst 98263a7de6
Grand test fixup (#138)
* start fixing up tests

* fix up tests + automate with drone

* fiddle with linting

* messing about with drone.yml

* some more fiddling

* hmmm

* add cache

* add vendor directory

* verbose

* ci updates

* update some little things

* update sig
2021-08-12 21:03:24 +02:00

278 lines
6.7 KiB
Go

package iptc
import (
"errors"
"fmt"
"io"
"strings"
"unicode"
"encoding/binary"
"github.com/dsoprea/go-logging"
)
var (
// TODO(dustin): We're still not sure if this is the right endianness. No search to IPTC or IIM seems to state one or the other.
// DefaultEncoding is the standard encoding for the IPTC format.
defaultEncoding = binary.BigEndian
)
var (
// ErrInvalidTagMarker indicates that the tag can not be parsed because the
// tag boundary marker is not the expected value.
ErrInvalidTagMarker = errors.New("invalid tag marker")
)
// Tag describes one tag read from the stream.
type Tag struct {
recordNumber uint8
datasetNumber uint8
dataSize uint64
}
// String expresses state as a string.
func (tag *Tag) String() string {
return fmt.Sprintf(
"Tag<DATASET=(%d:%d) DATA-SIZE=(%d)>",
tag.recordNumber, tag.datasetNumber, tag.dataSize)
}
// DecodeTag parses one tag from the stream.
func DecodeTag(r io.Reader) (tag Tag, err error) {
defer func() {
if state := recover(); state != nil {
err = log.Wrap(state.(error))
}
}()
tagMarker := uint8(0)
err = binary.Read(r, defaultEncoding, &tagMarker)
if err != nil {
if err == io.EOF {
return tag, err
}
log.Panic(err)
}
if tagMarker != 0x1c {
return tag, ErrInvalidTagMarker
}
recordNumber := uint8(0)
err = binary.Read(r, defaultEncoding, &recordNumber)
log.PanicIf(err)
datasetNumber := uint8(0)
err = binary.Read(r, defaultEncoding, &datasetNumber)
log.PanicIf(err)
dataSize16Raw := uint16(0)
err = binary.Read(r, defaultEncoding, &dataSize16Raw)
log.PanicIf(err)
var dataSize uint64
if dataSize16Raw < 32768 {
// We only had 16-bits (has the MSB set to (0)).
dataSize = uint64(dataSize16Raw)
} else {
// This field is just the length of the length (has the MSB set to (1)).
// Clear the MSB.
lengthLength := dataSize16Raw & 32767
if lengthLength == 4 {
dataSize32Raw := uint32(0)
err := binary.Read(r, defaultEncoding, &dataSize32Raw)
log.PanicIf(err)
dataSize = uint64(dataSize32Raw)
} else if lengthLength == 8 {
err := binary.Read(r, defaultEncoding, &dataSize)
log.PanicIf(err)
} else {
// No specific sizes or limits are specified in the specification
// so we need to impose our own limits in order to implement.
log.Panicf("extended data-set tag size is not supported: (%d)", lengthLength)
}
}
tag = Tag{
recordNumber: recordNumber,
datasetNumber: datasetNumber,
dataSize: dataSize,
}
return tag, nil
}
// StreamTagKey is a convenience type that lets us key our index with a high-
// level type.
type StreamTagKey struct {
// RecordNumber is the major classification of the dataset.
RecordNumber uint8
// DatasetNumber is the minor classification of the dataset.
DatasetNumber uint8
}
// String returns a descriptive string.
func (stk StreamTagKey) String() string {
return fmt.Sprintf("%d:%d", stk.RecordNumber, stk.DatasetNumber)
}
// TagData is a convenience wrapper around a byte-slice.
type TagData []byte
// IsPrintable returns true if all characters are printable.
func (tg TagData) IsPrintable() bool {
for _, b := range tg {
r := rune(b)
// Newline characters aren't considered printable.
if r == 0x0d || r == 0x0a {
continue
}
if unicode.IsGraphic(r) == false || unicode.IsPrint(r) == false {
return false
}
}
return true
}
// String returns a descriptive string. If the data doesn't include any non-
// printable characters, it will include the value itself.
func (tg TagData) String() string {
if tg.IsPrintable() == true {
return string(tg)
}
return fmt.Sprintf("BINARY<(%d) bytes>", len(tg))
}
// ParsedTags is the complete, unordered set of tags parsed from the stream.
type ParsedTags map[StreamTagKey][]TagData
// ParseStream parses a serial sequence of tags and tag data out of the stream.
func ParseStream(r io.Reader) (tags map[StreamTagKey][]TagData, err error) {
defer func() {
if state := recover(); state != nil {
err = log.Wrap(state.(error))
}
}()
tags = make(ParsedTags)
for {
tag, err := DecodeTag(r)
if err != nil {
if err == io.EOF {
break
}
log.Panic(err)
}
raw := make([]byte, tag.dataSize)
_, err = io.ReadFull(r, raw)
log.PanicIf(err)
data := TagData(raw)
stk := StreamTagKey{
RecordNumber: tag.recordNumber,
DatasetNumber: tag.datasetNumber,
}
if existing, found := tags[stk]; found == true {
tags[stk] = append(existing, data)
} else {
tags[stk] = []TagData{data}
}
}
return tags, nil
}
// GetSimpleDictionaryFromParsedTags returns a dictionary of tag names to tag
// values, where all values are strings and any tag that had a non-printable
// value is omitted. We will also only return the first value, therefore
// dropping any follow-up values for repeatable tags. This will ignore non-
// standard tags. This will trim whitespace from the ends of strings.
//
// This is a convenience function for quickly displaying only the summary IPTC
// metadata that a user might actually be interested in at first glance.
func GetSimpleDictionaryFromParsedTags(pt ParsedTags) (distilled map[string]string) {
distilled = make(map[string]string)
for stk, dataSlice := range pt {
sti, err := GetTagInfo(int(stk.RecordNumber), int(stk.DatasetNumber))
if err != nil {
if err == ErrTagNotStandard {
continue
} else {
log.Panic(err)
}
}
data := dataSlice[0]
if data.IsPrintable() == false {
continue
}
// TODO(dustin): Trim leading whitespace, too.
distilled[sti.Description] = strings.Trim(string(data), "\r\n")
}
return distilled
}
// GetDictionaryFromParsedTags returns all tags. It will keep non-printable
// values, though will not print a placeholder instead. This will keep non-
// standard tags (and print the fully-qualified dataset ID rather than the
// name). It will keep repeated values (with the counter value appended to the
// end).
func GetDictionaryFromParsedTags(pt ParsedTags) (distilled map[string]string) {
distilled = make(map[string]string)
for stk, dataSlice := range pt {
var keyPhrase string
sti, err := GetTagInfo(int(stk.RecordNumber), int(stk.DatasetNumber))
if err != nil {
if err == ErrTagNotStandard {
keyPhrase = fmt.Sprintf("%s (not a standard tag)", stk.String())
} else {
log.Panic(err)
}
} else {
keyPhrase = sti.Description
}
for i, data := range dataSlice {
currentKeyPhrase := keyPhrase
if len(dataSlice) > 1 {
currentKeyPhrase = fmt.Sprintf("%s (%d)", currentKeyPhrase, i+1)
}
var presentable string
if data.IsPrintable() == false {
presentable = fmt.Sprintf("[BINARY] %s", DumpBytesToString(data))
} else {
presentable = string(data)
}
distilled[currentKeyPhrase] = presentable
}
}
return distilled
}