// Copyright (c) 1994 James Clark // See the file COPYING for copying permission. #ifdef __GNUG__ #pragma implementation #endif #include "splib.h" #include "ExternalId.h" #include "CharsetInfo.h" #include "macros.h" #include "ParserMessages.h" #ifdef SP_NAMESPACE namespace SP_NAMESPACE { #endif ExternalId::ExternalId() : haveSystem_(0), havePublic_(0) { } void ExternalId::setSystem(Text &text) { text.swap(system_); haveSystem_ = 1; } PublicId::Type ExternalId::setPublic(Text &text, const CharsetInfo &charset, Char space, const MessageType1 *&fpierror, const MessageType1 *&urnerror) { havePublic_ = 1; return public_.init(text, charset, space, fpierror, urnerror); } void ExternalId::setLocation(const Location &loc) { loc_ = loc; } PublicId::PublicId() : type_(PublicId::informal) { } PublicId::Type PublicId::init(Text &text, const CharsetInfo &charset, Char space, const MessageType1 *&fpierror, const MessageType1 *&urnerror) { text.swap(text_); const StringC &str = text_.string(); type_ = informal; if (initFpi(str, charset, space, fpierror)) type_ = fpi; if (initUrn(str, charset, space, urnerror)) type_ = urn; return type_; } Boolean PublicId::initUrn(const StringC &str, const CharsetInfo &charset, Char space, const MessageType1 *&error) { const Char *next = str.data(); const Char *lim = str.data() + str.size(); Char sep = charset.execToDesc(':'); Char lcU = charset.execToDesc('u'); Char ucU = charset.execToDesc('U'); Char lcR = charset.execToDesc('r'); Char ucR = charset.execToDesc('R'); Char lcN = charset.execToDesc('n'); Char ucN = charset.execToDesc('N'); const Char *fieldStart; size_t fieldLength; if (!nextField(sep, next, lim, fieldStart, fieldLength, 0)) { error = &ParserMessages::urnMissingField; return 0; } if (fieldLength != 3 || (fieldStart[0] != lcU && fieldStart[0] != ucU) || (fieldStart[1] != lcR && fieldStart[1] != ucR) || (fieldStart[2] != lcN && fieldStart[2] != ucN)) { error = &ParserMessages::urnMissingPrefix; return 0; } if (!nextField(sep, next, lim, fieldStart, fieldLength, 0)) { error = &ParserMessages::urnMissingField; return 0; } if (fieldLength < 1) { //FIXME I don't fully understand the regexp notation in rfc2141: // is fieldLength also supposed to be <= 31 ? error = &ParserMessages::urnInvalidNid; return 0; } UnivChar percent, lparen, rparen, plus, comma, minus, period, colon; UnivChar equal, at, semicolon, dollar, uline, bang, star, apos; charset.descToUniv('%', percent); charset.descToUniv('(', lparen); charset.descToUniv(')', rparen); charset.descToUniv('+', plus); charset.descToUniv(',', comma); charset.descToUniv('-', minus); charset.descToUniv('.', period); charset.descToUniv(':', colon); charset.descToUniv('=', equal); charset.descToUniv('@', at); charset.descToUniv(';', semicolon); charset.descToUniv('$', dollar); charset.descToUniv('_', uline); charset.descToUniv('!', bang); charset.descToUniv('*', star); charset.descToUniv('\'', apos); for (size_t i = 0; i < fieldLength; i++) { UnivChar c; if (!charset.descToUniv(fieldStart[i], c) || ((!(c == minus) || i == 0) && !(c >= UnivCharsetDesc::a && c < UnivCharsetDesc::a + 26) && !(c >= UnivCharsetDesc::A && c < UnivCharsetDesc::A + 26) && !(c >= UnivCharsetDesc::zero && c < UnivCharsetDesc::zero + 10))) { error = &ParserMessages::urnInvalidNid; return 0; } } nid_.assign(fieldStart, fieldLength); if (next == 0) { error = &ParserMessages::urnMissingField; return 0; } fieldStart = next; fieldLength = lim - next; if (fieldLength < 1) { error = &ParserMessages::urnInvalidNss; return 0; } for (size_t i = 0; i < fieldLength; i++) { UnivChar c; if (!charset.descToUniv(fieldStart[i], c)) { error = &ParserMessages::urnInvalidNss; return 0; } if (c == percent) { if (fieldLength - i < 2) { error = &ParserMessages::urnInvalidNss; return 0; } Boolean zeros = 1; for (size_t j = 0; j < 2; j++) { i++; if (!charset.descToUniv(fieldStart[i], c) || (!(c >= UnivCharsetDesc::a && c < UnivCharsetDesc::a + 6) && !(c >= UnivCharsetDesc::A && c < UnivCharsetDesc::A + 6) && !(c >= UnivCharsetDesc::zero && c < UnivCharsetDesc::zero + 10))) { error = &ParserMessages::urnInvalidNss; return 0; } if (c != UnivCharsetDesc::zero) zeros = 0; } if (zeros) { // FIXME we're not checking that other chars are escaped // which must not be escaped according to rfc2141 error = &ParserMessages::urnInvalidNss; return 0; } } else { if (!(c >= UnivCharsetDesc::a && c < UnivCharsetDesc::a + 26) && !(c >= UnivCharsetDesc::A && c < UnivCharsetDesc::A + 26) && !(c >= UnivCharsetDesc::zero && c < UnivCharsetDesc::zero + 10) && !(c == lparen) && !(c == rparen) && !(c == plus) && !(c == comma) && !(c == minus) && !(c == period) && !(c == colon) && !(c == equal) && !(c == at) && !(c == semicolon) && !(c == dollar) && !(c == uline) && !(c == bang) && !(c == star) && !(c == apos)) { error = &ParserMessages::urnInvalidNss; return 0; } } } nss_.assign(fieldStart, fieldLength); return 1; } Boolean PublicId::initFpi(const StringC &str, const CharsetInfo &charset, Char space, const MessageType1 *&error) { const Char *next = str.data(); const Char *lim = str.data() + str.size(); Char solidus = charset.execToDesc('/'); Char minus = charset.execToDesc('-'); Char plus = charset.execToDesc('+'); const Char *fieldStart; size_t fieldLength; if (!nextField(solidus, next, lim, fieldStart, fieldLength)) { error = &ParserMessages::fpiMissingField; return 0; } if (fieldLength == 1 && (*fieldStart == minus || *fieldStart == plus)) { ownerType_ = (*fieldStart == plus ? registered : unregistered); if (!nextField(solidus, next, lim, fieldStart, fieldLength)) { error = &ParserMessages::fpiMissingField; return 0; } } else ownerType_ = ISO; owner_.assign(fieldStart, fieldLength); if (!nextField(solidus, next, lim, fieldStart, fieldLength)) { error = &ParserMessages::fpiMissingField; return 0; } size_t i; for (i = 0; i < fieldLength; i++) if (fieldStart[i] == space) break; if (i >= fieldLength) { error = &ParserMessages::fpiMissingTextClassSpace; return 0; } StringC textClassString(fieldStart, i); if (!lookupTextClass(textClassString, charset, textClass_)) { error = &ParserMessages::fpiInvalidTextClass; return 0; } i++; // skip the space fieldStart += i; fieldLength -= i; if (fieldLength == 1 && *fieldStart == minus) { unavailable_ = 1; if (!nextField(solidus, next, lim, fieldStart, fieldLength)) { error = &ParserMessages::fpiMissingField; return 0; } } else unavailable_ = 0; description_.assign(fieldStart, fieldLength); if (!nextField(solidus, next, lim, fieldStart, fieldLength)) { error = &ParserMessages::fpiMissingField; return 0; } if (textClass_ != CHARSET) { for (i = 0; i < fieldLength; i++) { UnivChar c; if (!charset.descToUniv(fieldStart[i], c) || c < UnivCharsetDesc::A || c >= UnivCharsetDesc::A + 26) { error = &ParserMessages::fpiInvalidLanguage; return 0; } } // The public text language must be a name. // Names cannot be empty. if (fieldLength == 0) { error = &ParserMessages::fpiInvalidLanguage; return 0; } } languageOrDesignatingSequence_.assign(fieldStart, fieldLength); if (nextField(solidus, next, lim, fieldStart, fieldLength)) { switch (textClass_) { case CAPACITY: case CHARSET: case NOTATION: case SYNTAX: error = &ParserMessages::fpiIllegalDisplayVersion; return 0; default: break; } haveDisplayVersion_ = 1; displayVersion_.assign(fieldStart, fieldLength); } else haveDisplayVersion_ = 0; if (next != 0) { error = &ParserMessages::fpiExtraField; return 0; } return 1; } Boolean PublicId::nextField(Char solidus, const Char *&next, const Char *lim, const Char *&fieldStart, size_t &fieldLength, Boolean dup) { if (next == 0) return 0; fieldStart = next; for (; next < lim; next++) { if (next[0] == solidus && next + dup < lim && next[dup] == solidus) { fieldLength = next - fieldStart; next += 1 + dup; return 1; } } fieldLength = lim - fieldStart; next = 0; return 1; } const char *const PublicId::textClasses[] = { "CAPACITY", "CHARSET", "DOCUMENT", "DTD", "ELEMENTS", "ENTITIES", "LPD", "NONSGML", "NOTATION", "SD", "SHORTREF", "SUBDOC", "SYNTAX", "TEXT", }; Boolean PublicId::lookupTextClass(const StringC &str, const CharsetInfo &charset, TextClass &textClass) { for (size_t i = 0; i < SIZEOF(textClasses); i++) if (str == charset.execToDesc(textClasses[i])) { textClass = TextClass(i); return 1; } return 0; } Boolean PublicId::getOwnerType(OwnerType &result) const { if (type_ != fpi) return 0; result = ownerType_; return 1; } Boolean PublicId::getOwner(StringC &result) const { if (type_ != fpi) return 0; result = owner_; return 1; } Boolean PublicId::getTextClass(TextClass &result) const { if (type_ != fpi) return 0; result = textClass_; return 1; } Boolean PublicId::getUnavailable(Boolean &result) const { if (type_ != fpi) return 0; result = unavailable_; return 1; } Boolean PublicId::getDescription(StringC &result) const { if (type_ != fpi) return 0; result = description_; return 1; } Boolean PublicId::getLanguage(StringC &result) const { if ((type_ != fpi) || textClass_ == CHARSET) return 0; result = languageOrDesignatingSequence_; return 1; } Boolean PublicId::getDesignatingSequence(StringC &result) const { if ((type_ != fpi) || textClass_ != CHARSET) return 0; result = languageOrDesignatingSequence_; return 1; } Boolean PublicId::getDisplayVersion(StringC &result) const { if (type_ != fpi) return 0; if (haveDisplayVersion_) result = displayVersion_; return 1; } Boolean PublicId::getNamespaceIdentifier(StringC &result) const { if (type_ != urn) return 0; result = nid_; return 1; } Boolean PublicId::getNamespaceSpecificString(StringC &result) const { if (type_ != urn) return 0; result = nss_; return 1; } #ifdef SP_NAMESPACE } #endif