package iptc import ( "errors" "fmt" "io" "strings" "unicode" "encoding/binary" "github.com/dsoprea/go-logging" ) var ( // TODO(dustin): We're still not sure if this is the right endianness. No search to IPTC or IIM seems to state one or the other. // DefaultEncoding is the standard encoding for the IPTC format. defaultEncoding = binary.BigEndian ) var ( // ErrInvalidTagMarker indicates that the tag can not be parsed because the // tag boundary marker is not the expected value. ErrInvalidTagMarker = errors.New("invalid tag marker") ) // Tag describes one tag read from the stream. type Tag struct { recordNumber uint8 datasetNumber uint8 dataSize uint64 } // String expresses state as a string. func (tag *Tag) String() string { return fmt.Sprintf( "Tag", tag.recordNumber, tag.datasetNumber, tag.dataSize) } // DecodeTag parses one tag from the stream. func DecodeTag(r io.Reader) (tag Tag, err error) { defer func() { if state := recover(); state != nil { err = log.Wrap(state.(error)) } }() tagMarker := uint8(0) err = binary.Read(r, defaultEncoding, &tagMarker) if err != nil { if err == io.EOF { return tag, err } log.Panic(err) } if tagMarker != 0x1c { return tag, ErrInvalidTagMarker } recordNumber := uint8(0) err = binary.Read(r, defaultEncoding, &recordNumber) log.PanicIf(err) datasetNumber := uint8(0) err = binary.Read(r, defaultEncoding, &datasetNumber) log.PanicIf(err) dataSize16Raw := uint16(0) err = binary.Read(r, defaultEncoding, &dataSize16Raw) log.PanicIf(err) var dataSize uint64 if dataSize16Raw < 32768 { // We only had 16-bits (has the MSB set to (0)). dataSize = uint64(dataSize16Raw) } else { // This field is just the length of the length (has the MSB set to (1)). // Clear the MSB. lengthLength := dataSize16Raw & 32767 if lengthLength == 4 { dataSize32Raw := uint32(0) err := binary.Read(r, defaultEncoding, &dataSize32Raw) log.PanicIf(err) dataSize = uint64(dataSize32Raw) } else if lengthLength == 8 { err := binary.Read(r, defaultEncoding, &dataSize) log.PanicIf(err) } else { // No specific sizes or limits are specified in the specification // so we need to impose our own limits in order to implement. log.Panicf("extended data-set tag size is not supported: (%d)", lengthLength) } } tag = Tag{ recordNumber: recordNumber, datasetNumber: datasetNumber, dataSize: dataSize, } return tag, nil } // StreamTagKey is a convenience type that lets us key our index with a high- // level type. type StreamTagKey struct { // RecordNumber is the major classification of the dataset. RecordNumber uint8 // DatasetNumber is the minor classification of the dataset. DatasetNumber uint8 } // String returns a descriptive string. func (stk StreamTagKey) String() string { return fmt.Sprintf("%d:%d", stk.RecordNumber, stk.DatasetNumber) } // TagData is a convenience wrapper around a byte-slice. type TagData []byte // IsPrintable returns true if all characters are printable. func (tg TagData) IsPrintable() bool { for _, b := range tg { r := rune(b) // Newline characters aren't considered printable. if r == 0x0d || r == 0x0a { continue } if unicode.IsGraphic(r) == false || unicode.IsPrint(r) == false { return false } } return true } // String returns a descriptive string. If the data doesn't include any non- // printable characters, it will include the value itself. func (tg TagData) String() string { if tg.IsPrintable() == true { return string(tg) } return fmt.Sprintf("BINARY<(%d) bytes>", len(tg)) } // ParsedTags is the complete, unordered set of tags parsed from the stream. type ParsedTags map[StreamTagKey][]TagData // ParseStream parses a serial sequence of tags and tag data out of the stream. func ParseStream(r io.Reader) (tags map[StreamTagKey][]TagData, err error) { defer func() { if state := recover(); state != nil { err = log.Wrap(state.(error)) } }() tags = make(ParsedTags) for { tag, err := DecodeTag(r) if err != nil { if err == io.EOF { break } log.Panic(err) } raw := make([]byte, tag.dataSize) _, err = io.ReadFull(r, raw) log.PanicIf(err) data := TagData(raw) stk := StreamTagKey{ RecordNumber: tag.recordNumber, DatasetNumber: tag.datasetNumber, } if existing, found := tags[stk]; found == true { tags[stk] = append(existing, data) } else { tags[stk] = []TagData{data} } } return tags, nil } // GetSimpleDictionaryFromParsedTags returns a dictionary of tag names to tag // values, where all values are strings and any tag that had a non-printable // value is omitted. We will also only return the first value, therefore // dropping any follow-up values for repeatable tags. This will ignore non- // standard tags. This will trim whitespace from the ends of strings. // // This is a convenience function for quickly displaying only the summary IPTC // metadata that a user might actually be interested in at first glance. func GetSimpleDictionaryFromParsedTags(pt ParsedTags) (distilled map[string]string) { distilled = make(map[string]string) for stk, dataSlice := range pt { sti, err := GetTagInfo(int(stk.RecordNumber), int(stk.DatasetNumber)) if err != nil { if err == ErrTagNotStandard { continue } else { log.Panic(err) } } data := dataSlice[0] if data.IsPrintable() == false { continue } // TODO(dustin): Trim leading whitespace, too. distilled[sti.Description] = strings.Trim(string(data), "\r\n") } return distilled } // GetDictionaryFromParsedTags returns all tags. It will keep non-printable // values, though will not print a placeholder instead. This will keep non- // standard tags (and print the fully-qualified dataset ID rather than the // name). It will keep repeated values (with the counter value appended to the // end). func GetDictionaryFromParsedTags(pt ParsedTags) (distilled map[string]string) { distilled = make(map[string]string) for stk, dataSlice := range pt { var keyPhrase string sti, err := GetTagInfo(int(stk.RecordNumber), int(stk.DatasetNumber)) if err != nil { if err == ErrTagNotStandard { keyPhrase = fmt.Sprintf("%s (not a standard tag)", stk.String()) } else { log.Panic(err) } } else { keyPhrase = sti.Description } for i, data := range dataSlice { currentKeyPhrase := keyPhrase if len(dataSlice) > 1 { currentKeyPhrase = fmt.Sprintf("%s (%d)", currentKeyPhrase, i+1) } var presentable string if data.IsPrintable() == false { presentable = fmt.Sprintf("[BINARY] %s", DumpBytesToString(data)) } else { presentable = string(data) } distilled[currentKeyPhrase] = presentable } } return distilled }