// Copyright (c) 1994 James Clark, 1999 Matthias Clasen // See the file COPYING for copying permission. #include "splib.h" #include "Parser.h" #include "ParserMessages.h" #include "MessageArg.h" #include "TokenMessageArg.h" #include "token.h" #include "macros.h" #ifdef SP_NAMESPACE namespace SP_NAMESPACE { #endif void Parser::doInstanceStart() { if (cancelled()) { allDone(); return; } // FIXME check here that we have a valid dtd compileInstanceModes(); setPhase(contentPhase); Token token = getToken(currentMode()); switch (token) { case tokenEe: case tokenStagoNameStart: case tokenStagoTagc: case tokenStagoGrpo: case tokenEtagoNameStart: case tokenEtagoTagc: case tokenEtagoGrpo: break; default: if (sd().omittag()) { unsigned startImpliedCount = 0; unsigned attributeListIndex = 0; IList undoList; IList eventList; if (!tryImplyTag(currentLocation(), startImpliedCount, attributeListIndex, undoList, eventList)) CANNOT_HAPPEN(); queueElementEvents(eventList); } else message(ParserMessages::instanceStartOmittag); } currentInput()->ungetToken(); } void Parser::endInstance() { // Do checking before popping entity stack so that there's a // current location for error messages. endAllElements(); while (markedSectionLevel() > 0) { message(ParserMessages::unclosedMarkedSection, currentMarkedSectionStartLocation()); endMarkedSection(); } checkIdrefs(); popInputStack(); allDone(); } void Parser::checkIdrefs() { IdTableIter iter(idTableIter()); Id *id; while ((id = iter.next()) != 0) { for (size_t i = 0; i < id->pendingRefs().size(); i++) { Messenger::setNextLocation(id->pendingRefs()[i]); message(ParserMessages::missingId, StringMessageArg(id->name())); } } } void Parser::doContent() { do { if (cancelled()) { allDone(); return; } Token token = getToken(currentMode()); switch (token) { case tokenEe: if (inputLevel() == 1) { endInstance(); return; } if (inputLevel() == specialParseInputLevel()) { // FIXME have separate messages for each type of special parse // perhaps force end of marked section or element message(ParserMessages::specialParseEntityEnd); } if (eventsWanted().wantInstanceMarkup()) eventHandler().entityEnd(new (eventAllocator()) EntityEndEvent(currentLocation())); if (afterDocumentElement()) message(ParserMessages::afterDocumentElementEntityEnd); if (sd().integrallyStored() && tagLevel() && currentElement().index() != currentInputElementIndex()) message(ParserMessages::contentAsyncEntityRef); popInputStack(); break; case tokenCroDigit: case tokenHcroHexDigit: { if (afterDocumentElement()) message(ParserMessages::characterReferenceAfterDocumentElement); Char ch; Location loc; if (parseNumericCharRef(token == tokenHcroHexDigit, ch, loc)) { acceptPcdata(loc); noteData(); Boolean isSgmlChar; if (!translateNumericCharRef(ch, isSgmlChar)) break; if (!isSgmlChar) { eventHandler().nonSgmlChar(new (eventAllocator()) NonSgmlCharEvent(ch, loc)); break; } eventHandler().data(new (eventAllocator()) ImmediateDataEvent(Event::characterData, &ch, 1, loc, 1)); break; } } break; case tokenCroNameStart: if (afterDocumentElement()) message(ParserMessages::characterReferenceAfterDocumentElement); parseNamedCharRef(); break; case tokenEroGrpo: case tokenEroNameStart: { if (afterDocumentElement()) message(ParserMessages::entityReferenceAfterDocumentElement); ConstPtr entity; Ptr origin; if (parseEntityReference(0, token == tokenEroGrpo, entity, origin)) { if (!entity.isNull()) { if (entity->isCharacterData()) acceptPcdata(Location(origin.pointer(), 0)); if (inputLevel() == specialParseInputLevel()) entity->rcdataReference(*this, origin); else entity->contentReference(*this, origin); } } } break; case tokenEtagoNameStart: acceptEndTag(parseEndTag()); break; case tokenEtagoTagc: parseEmptyEndTag(); break; case tokenEtagoGrpo: parseGroupEndTag(); break; case tokenMdoNameStart: if (startMarkup(eventsWanted().wantInstanceMarkup(), currentLocation())) currentMarkup()->addDelim(Syntax::dMDO); Syntax::ReservedName name; Boolean result; unsigned startLevel; startLevel = inputLevel(); if (parseDeclarationName(&name)) { switch (name) { case Syntax::rUSEMAP: if (afterDocumentElement()) message(ParserMessages::declarationAfterDocumentElement, StringMessageArg(syntax().reservedName(name))); result = parseUsemapDecl(); break; case Syntax::rUSELINK: if (afterDocumentElement()) message(ParserMessages::declarationAfterDocumentElement, StringMessageArg(syntax().reservedName(name))); result = parseUselinkDecl(); break; case Syntax::rDOCTYPE: case Syntax::rLINKTYPE: case Syntax::rELEMENT: case Syntax::rATTLIST: case Syntax::rENTITY: case Syntax::rNOTATION: case Syntax::rSHORTREF: case Syntax::rLINK: case Syntax::rIDLINK: message(ParserMessages::instanceDeclaration, StringMessageArg(syntax().reservedName(name))); result = 0; break; default: message(ParserMessages::noSuchDeclarationType, StringMessageArg(syntax().reservedName(name))); result = 0; break; } } else result = 0; if (!result) skipDeclaration(startLevel); noteMarkup(); break; case tokenMdoMdc: // empty comment emptyCommentDecl(); noteMarkup(); break; case tokenMdoCom: parseCommentDecl(); noteMarkup(); break; case tokenMdoDso: if (afterDocumentElement()) message(ParserMessages::markedSectionAfterDocumentElement); parseMarkedSectionDeclStart(); noteMarkup(); break; case tokenMscMdc: handleMarkedSectionEnd(); noteMarkup(); break; case tokenNet: parseNullEndTag(); break; case tokenPio: parseProcessingInstruction(); break; case tokenStagoNameStart: parseStartTag(); break; case tokenStagoTagc: parseEmptyStartTag(); break; case tokenStagoGrpo: parseGroupStartTag(); break; case tokenRe: acceptPcdata(currentLocation()); queueRe(currentLocation()); break; case tokenRs: acceptPcdata(currentLocation()); noteRs(); if (eventsWanted().wantInstanceMarkup()) eventHandler().ignoredRs(new (eventAllocator()) IgnoredRsEvent(currentChar(), currentLocation())); break; case tokenS: extendContentS(); if (eventsWanted().wantInstanceMarkup()) eventHandler().sSep(new (eventAllocator()) SSepEvent(currentInput()->currentTokenStart(), currentInput()->currentTokenLength(), currentLocation(), 0)); break; case tokenIgnoredChar: extendData(); if (eventsWanted().wantMarkedSections()) eventHandler().ignoredChars(new (eventAllocator()) IgnoredCharsEvent(currentInput()->currentTokenStart(), currentInput()->currentTokenLength(), currentLocation(), 0)); break; case tokenUnrecognized: reportNonSgmlCharacter(); parsePcdata(); break; case tokenCharDelim: message(ParserMessages::dataCharDelim, StringMessageArg(StringC(currentInput()->currentTokenStart(), currentInput()->currentTokenLength()))); // fall through case tokenChar: parsePcdata(); break; default: ASSERT(token >= tokenFirstShortref); handleShortref(token - tokenFirstShortref); break; } } while (eventQueueEmpty()); } void Parser::skipDeclaration(unsigned startLevel) { const unsigned skipMax = 250; unsigned skipCount = 0; for (;;) { Token token = getToken(mdMode); if (inputLevel() == startLevel) skipCount++; switch (token) { case tokenUnrecognized: (void)getChar(); break; case tokenEe: if (inputLevel() <= startLevel) return; popInputStack(); return; case tokenMdc: if (inputLevel() == startLevel) return; break; case tokenS: if (inputLevel() == startLevel && skipCount >= skipMax && currentChar() == syntax().standardFunction(Syntax::fRE)) return; break; default: break; } } } void Parser::handleShortref(int index) { const ConstPtr &entity = currentElement().map()->entity(index); if (!entity.isNull()) { Owner markupPtr; if (eventsWanted().wantInstanceMarkup()) { markupPtr = new Markup; markupPtr->addShortref(currentInput()); } Ptr origin = EntityOrigin::make(internalAllocator(), entity, currentLocation(), currentInput()->currentTokenLength(), markupPtr); entity->contentReference(*this, origin); return; } InputSource *in = currentInput(); size_t length = in->currentTokenLength(); const Char *s = in->currentTokenStart(); size_t i = 0; if (currentMode() == econMode || currentMode() == econnetMode) { // FIXME do this in advance (what about B sequence?) for (i = 0; i < length && syntax().isS(s[i]); i++) ; if (i > 0 && eventsWanted().wantInstanceMarkup()) eventHandler().sSep(new (eventAllocator()) SSepEvent(s, i, currentLocation(), 0)); } if (i < length) { Location location(currentLocation()); location += i; s += i; length -= i; acceptPcdata(location); if (sd().keeprsre()) { noteData(); eventHandler().data(new (eventAllocator()) ImmediateDataEvent(Event::characterData, s, length, location, 0)); return; } // FIXME speed this up for (; length > 0; location += 1, length--, s++) { if (*s == syntax().standardFunction(Syntax::fRS)) { noteRs(); if (eventsWanted().wantInstanceMarkup()) eventHandler().ignoredRs(new (eventAllocator()) IgnoredRsEvent(*s, location)); } else if (*s == syntax().standardFunction(Syntax::fRE)) queueRe(location); else { noteData(); eventHandler().data(new (eventAllocator()) ImmediateDataEvent(Event::characterData, s, 1, location, 0)); } } } } void Parser::parsePcdata() { extendData(); acceptPcdata(currentLocation()); noteData(); eventHandler().data(new (eventAllocator()) ImmediateDataEvent(Event::characterData, currentInput()->currentTokenStart(), currentInput()->currentTokenLength(), currentLocation(), 0)); } void Parser::parseStartTag() { InputSource *in = currentInput(); Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(), in->currentLocation()); if (markup) markup->addDelim(Syntax::dSTAGO); Boolean netEnabling; StartElementEvent *event = doParseStartTag(netEnabling); acceptStartTag(event->elementType(), event, netEnabling); } StartElementEvent *Parser::doParseStartTag(Boolean &netEnabling) { Markup *markup = currentMarkup(); InputSource *in = currentInput(); in->discardInitial(); extendNameToken(syntax().namelen(), ParserMessages::nameLength); if (markup) markup->addName(in); StringC &name = nameBuffer(); getCurrentToken(syntax().generalSubstTable(), name); ElementType *e = currentDtdNonConst().lookupElementType(name); if (sd().rank()) { if (!e) e = completeRankStem(name); else if (e->isRankedElement()) handleRankedElement(e); } if (!e) e = lookupCreateUndefinedElement(name, currentLocation(), currentDtdNonConst(), (implydefElement() != Sd::implydefElementAnyother)); AttributeList *attributes = allocAttributeList(e->attributeDef(), 0); Token closeToken = getToken(tagMode); if (closeToken == tokenTagc) { if (name.size() > syntax().taglen()) checkTaglen(markupLocation().index()); attributes->finish(*this); netEnabling = 0; if (markup) markup->addDelim(Syntax::dTAGC); } else { in->ungetToken(); Ptr newAttDef; if (parseAttributeSpec(tagMode, *attributes, netEnabling, newAttDef)) { // The difference between the indices will be the difference // in offsets plus 1 for each named character reference. if (in->currentLocation().index() - markupLocation().index() > syntax().taglen()) checkTaglen(markupLocation().index()); } else netEnabling = 0; if (!newAttDef.isNull()) { newAttDef->setIndex(currentDtdNonConst().allocAttributeDefinitionListIndex()); e->setAttributeDef(newAttDef); } } return new (eventAllocator()) StartElementEvent(e, currentDtdPointer(), attributes, markupLocation(), markup); } ElementType *Parser::completeRankStem(const StringC &name) { const RankStem *rankStem = currentDtd().lookupRankStem(name); if (rankStem) { StringC name(rankStem->name()); if (!appendCurrentRank(name, rankStem)) message(ParserMessages::noCurrentRank, StringMessageArg(name)); else return currentDtdNonConst().lookupElementType(name); } return 0; } void Parser::handleRankedElement(const ElementType *e) { StringC rankSuffix(e->definition()->rankSuffix()); const RankStem *rankStem = e->rankedElementRankStem(); for (size_t i = 0; i < rankStem->nDefinitions(); i++) { const ElementDefinition *def = rankStem->definition(i); for (size_t j = 0; j < def->nRankStems(); j++) setCurrentRank(def->rankStem(j), rankSuffix); } } void Parser::checkTaglen(Index tagStartIndex) { const InputSourceOrigin *origin = currentLocation().origin()->asInputSourceOrigin(); ASSERT(origin != 0); if (origin->startOffset(currentLocation().index()) - origin->startOffset(tagStartIndex + syntax().delimGeneral(Syntax::dSTAGO).size()) > syntax().taglen()) message(ParserMessages::taglen, NumberMessageArg(syntax().taglen())); } void Parser::parseEmptyStartTag() { if (options().warnEmptyTag) message(ParserMessages::emptyStartTag); if (!currentDtd().isBase()) message(ParserMessages::emptyStartTagBaseDtd); const ElementType *e = 0; if (!sd().omittag()) e = lastEndedElementType(); else if (tagLevel() > 0) e = currentElement().type(); if (!e) e = currentDtd().documentElementType(); AttributeList *attributes = allocAttributeList(e->attributeDef(), 0); attributes->finish(*this); Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(), currentLocation()); if (markup) { markup->addDelim(Syntax::dSTAGO); markup->addDelim(Syntax::dTAGC); } acceptStartTag(e, new (eventAllocator()) StartElementEvent(e, currentDtdPointer(), attributes, markupLocation(), markup), 0); } void Parser::parseGroupStartTag() { InputSource *in = currentInput(); if (startMarkup(eventsWanted().wantInstanceMarkup(), currentLocation())) { currentMarkup()->addDelim(Syntax::dSTAGO); currentMarkup()->addDelim(Syntax::dGRPO); } Boolean active; if (!parseTagNameGroup(active, 1)) return; in->startToken(); // Location startLocation = in->currentLocation(); Xchar c = in->tokenChar(messenger()); if (!syntax().isNameStartCharacter(c)) { message(ParserMessages::startTagMissingName); return; } if (active) { Boolean netEnabling; StartElementEvent *event = doParseStartTag(netEnabling); if (netEnabling) message(ParserMessages::startTagGroupNet); acceptStartTag(event->elementType(), event, netEnabling); } else { in->discardInitial(); extendNameToken(syntax().namelen(), ParserMessages::nameLength); if (currentMarkup()) currentMarkup()->addName(currentInput()); skipAttributeSpec(); if (currentMarkup()) eventHandler().ignoredMarkup(new (eventAllocator()) IgnoredMarkupEvent(markupLocation(), currentMarkup())); noteMarkup(); } } void Parser::parseGroupEndTag() { InputSource *in = currentInput(); if (startMarkup(eventsWanted().wantInstanceMarkup(), currentLocation())) { currentMarkup()->addDelim(Syntax::dSTAGO); currentMarkup()->addDelim(Syntax::dGRPO); } Boolean active; if (!parseTagNameGroup(active, 0)) return; in->startToken(); // Location startLocation = in->currentLocation(); Xchar c = in->tokenChar(messenger()); if (!syntax().isNameStartCharacter(c)) { message(ParserMessages::endTagMissingName); return; } if (active) acceptEndTag(doParseEndTag()); else { in->discardInitial(); extendNameToken(syntax().namelen(), ParserMessages::nameLength); if (currentMarkup()) currentMarkup()->addName(currentInput()); parseEndTagClose(); if (currentMarkup()) eventHandler().ignoredMarkup(new (eventAllocator()) IgnoredMarkupEvent(markupLocation(), currentMarkup())); noteMarkup(); } } void Parser::acceptPcdata(const Location &startLocation) { if (currentElement().tryTransitionPcdata()) return; // Need to test here since implying tags may turn off pcdataRecovering. if (pcdataRecovering()) return; IList undoList; IList eventList; unsigned startImpliedCount = 0; unsigned attributeListIndex = 0; keepMessages(); while (tryImplyTag(startLocation, startImpliedCount, attributeListIndex, undoList, eventList)) if (currentElement().tryTransitionPcdata()) { queueElementEvents(eventList); return; } discardKeptMessages(); undo(undoList); if (validate() || afterDocumentElement()) message(ParserMessages::pcdataNotAllowed); pcdataRecover(); } void Parser::acceptStartTag(const ElementType *e, StartElementEvent *event, Boolean netEnabling) { if (e->definition()->undefined() && (implydefElement() == Sd::implydefElementNo)) message(ParserMessages::undefinedElement, StringMessageArg(e->name())); if (elementIsExcluded(e)) { keepMessages(); if (validate()) checkExclusion(e); } else { if (currentElement().tryTransition(e)) { pushElementCheck(e, event, netEnabling); return; } if (elementIsIncluded(e)) { event->setIncluded(); pushElementCheck(e, event, netEnabling); return; } keepMessages(); } IList undoList; IList eventList; unsigned startImpliedCount = 0; unsigned attributeListIndex = 1; while (tryImplyTag(event->location(), startImpliedCount, attributeListIndex, undoList, eventList)) if (tryStartTag(e, event, netEnabling, eventList)) return; discardKeptMessages(); undo(undoList); if (validate() && !e->definition()->undefined()) handleBadStartTag(e, event, netEnabling); else { if (validate() ? (implydefElement() != Sd::implydefElementNo) : afterDocumentElement()) message(ParserMessages::elementNotAllowed, StringMessageArg(e->name())); // If element couldn't occur because it was excluded, then // do the transition here. (void)currentElement().tryTransition(e); pushElementCheck(e, event, netEnabling); } } void Parser::undo(IList &undoList) { while (!undoList.empty()) { Undo *p = undoList.get(); p->undo(this); delete p; } } void Parser::queueElementEvents(IList &events) { releaseKeptMessages(); // FIXME provide IList::reverse function // reverse it IList tem; while (!events.empty()) tem.insert(events.get()); while (!tem.empty()) { Event *e = tem.get(); if (e->type() == Event::startElement) { noteStartElement(((StartElementEvent *)e)->included()); eventHandler().startElement((StartElementEvent *)e); } else { noteEndElement(((EndElementEvent *)e)->included()); eventHandler().endElement((EndElementEvent *)e); } } } void Parser::checkExclusion(const ElementType *e) { const LeafContentToken *token = currentElement().invalidExclusion(e); if (token) message(ParserMessages::invalidExclusion, OrdinalMessageArg(token->typeIndex() + 1), StringMessageArg(token->elementType()->name()), StringMessageArg(currentElement().type()->name())); } Boolean Parser::tryStartTag(const ElementType *e, StartElementEvent *event, Boolean netEnabling, IList &impliedEvents) { if (elementIsExcluded(e)) { checkExclusion(e); return 0; } if (currentElement().tryTransition(e)) { queueElementEvents(impliedEvents); pushElementCheck(e, event, netEnabling); return 1; } if (elementIsIncluded(e)) { queueElementEvents(impliedEvents); event->setIncluded(); pushElementCheck(e, event, netEnabling); return 1; } return 0; } Boolean Parser::tryImplyTag(const Location &loc, unsigned &startImpliedCount, unsigned &attributeListIndex, IList &undo, IList &eventList) { if (!sd().omittag()) return 0; if (currentElement().isFinished()) { if (tagLevel() == 0) return 0; #if 1 const ElementDefinition *def = currentElement().type()->definition(); if (def && !def->canOmitEndTag()) return 0; #endif // imply an end tag if (startImpliedCount > 0) { message(ParserMessages::startTagEmptyElement, StringMessageArg(currentElement().type()->name())); startImpliedCount--; } #if 0 const ElementDefinition *def = currentElement().type()->definition(); if (def && !def->canOmitEndTag()) message(ParserMessages::omitEndTagDeclare, StringMessageArg(currentElement().type()->name()), currentElement().startLocation()); #endif EndElementEvent *event = new (eventAllocator()) EndElementEvent(currentElement().type(), currentDtdPointer(), loc, 0); eventList.insert(event); undo.insert(new (internalAllocator()) UndoEndTag(popSaveElement())); return 1; } const LeafContentToken *token = currentElement().impliedStartTag(); if (!token) return 0; const ElementType *e = token->elementType(); if (elementIsExcluded(e)) message(ParserMessages::requiredElementExcluded, OrdinalMessageArg(token->typeIndex() + 1), StringMessageArg(e->name()), StringMessageArg(currentElement().type()->name())); if (tagLevel() != 0) undo.insert(new (internalAllocator()) UndoTransition(currentElement().matchState())); currentElement().doRequiredTransition(); const ElementDefinition *def = e->definition(); if (def->declaredContent() != ElementDefinition::modelGroup && def->declaredContent() != ElementDefinition::any) message(ParserMessages::omitStartTagDeclaredContent, StringMessageArg(e->name())); if (def->undefined()) message(ParserMessages::undefinedElement, StringMessageArg(e->name())); else if (!def->canOmitStartTag()) message(ParserMessages::omitStartTagDeclare, StringMessageArg(e->name())); AttributeList *attributes = allocAttributeList(e->attributeDef(), attributeListIndex++); // this will give an error if the element has a required attribute attributes->finish(*this); startImpliedCount++; StartElementEvent *event = new (eventAllocator()) StartElementEvent(e, currentDtdPointer(), attributes, loc, 0); pushElementCheck(e, event, undo, eventList); const int implyCheckLimit = 30; // this is fairly arbitrary if (startImpliedCount > implyCheckLimit && !checkImplyLoop(startImpliedCount)) return 0; return 1; } void Parser::pushElementCheck(const ElementType *e, StartElementEvent *event, Boolean netEnabling) { if (tagLevel() == syntax().taglvl()) message(ParserMessages::taglvlOpenElements, NumberMessageArg(syntax().taglvl())); noteStartElement(event->included()); if (event->mustOmitEnd()) { if (sd().emptyElementNormal()) { Boolean included = event->included(); Location loc(event->location()); eventHandler().startElement(event); endTagEmptyElement(e, netEnabling, included, loc); } else { EndElementEvent *end = new (eventAllocator()) EndElementEvent(e, currentDtdPointer(), event->location(), 0); if (event->included()) { end->setIncluded(); noteEndElement(1); } else noteEndElement(0); eventHandler().startElement(event); eventHandler().endElement(end); } } else { const ShortReferenceMap *map = e->map(); if (!map) map = currentElement().map(); if (options().warnImmediateRecursion && e == currentElement().type()) message(ParserMessages::immediateRecursion); pushElement(new (internalAllocator()) OpenElement(e, netEnabling, event->included(), map, event->location())); // Can't access event after it's passed to the event handler. eventHandler().startElement(event); } } void Parser::endTagEmptyElement(const ElementType *e, Boolean netEnabling, Boolean included, const Location &startLoc) { Token token = getToken(netEnabling ? econnetMode : econMode); switch (token) { case tokenNet: if (netEnabling) { Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(), currentLocation()); if (markup) markup->addDelim(Syntax::dNET); EndElementEvent *end = new (eventAllocator()) EndElementEvent(e, currentDtdPointer(), currentLocation(), markup); if (included) end->setIncluded(); eventHandler().endElement(end); noteEndElement(included); return; } break; case tokenEtagoTagc: { if (options().warnEmptyTag) message(ParserMessages::emptyEndTag); Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(), currentLocation()); if (markup) { markup->addDelim(Syntax::dETAGO); markup->addDelim(Syntax::dTAGC); } EndElementEvent *end = new (eventAllocator()) EndElementEvent(e, currentDtdPointer(), currentLocation(), markup); if (included) end->setIncluded(); eventHandler().endElement(end); noteEndElement(included); return; } case tokenEtagoNameStart: { EndElementEvent *end = parseEndTag(); if (end->elementType() == e) { if (included) end->setIncluded(); eventHandler().endElement(end); noteEndElement(included); return; } if (!elementIsOpen(end->elementType())) { message(ParserMessages::elementNotOpen, StringMessageArg(end->elementType()->name())); delete end; break; } implyEmptyElementEnd(e, included, startLoc); acceptEndTag(end); return; } default: break; } implyEmptyElementEnd(e, included, startLoc); currentInput()->ungetToken(); } void Parser::implyEmptyElementEnd(const ElementType *e, Boolean included, const Location &startLoc) { if (!sd().omittag()) message(ParserMessages::omitEndTagOmittag, StringMessageArg(e->name()), startLoc); else { const ElementDefinition *def = e->definition(); if (def && !def->canOmitEndTag()) message(ParserMessages::omitEndTagDeclare, StringMessageArg(e->name()), startLoc); } EndElementEvent *end = new (eventAllocator()) EndElementEvent(e, currentDtdPointer(), currentLocation(), 0); if (included) end->setIncluded(); noteEndElement(included); eventHandler().endElement(end); } void Parser::pushElementCheck(const ElementType *e, StartElementEvent *event, IList &undoList, IList &eventList) { if (tagLevel() == syntax().taglvl()) message(ParserMessages::taglvlOpenElements, NumberMessageArg(syntax().taglvl())); eventList.insert(event); if (event->mustOmitEnd()) { EndElementEvent *end = new (eventAllocator()) EndElementEvent(e, currentDtdPointer(), event->location(), 0); if (event->included()) end->setIncluded(); eventList.insert(end); } else { undoList.insert(new (internalAllocator()) UndoStartTag); const ShortReferenceMap *map = e->map(); if (!map) map = currentElement().map(); pushElement(new (internalAllocator()) OpenElement(e, 0, event->included(), map, event->location())); } } EndElementEvent *Parser::parseEndTag() { Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(), currentLocation()); if (markup) markup->addDelim(Syntax::dETAGO); return doParseEndTag(); } EndElementEvent *Parser::doParseEndTag() { Markup *markup = currentMarkup(); currentInput()->discardInitial(); extendNameToken(syntax().namelen(), ParserMessages::nameLength); if (markup) markup->addName(currentInput()); StringC &name = nameBuffer(); getCurrentToken(syntax().generalSubstTable(), name); const ElementType *e = currentDtd().lookupElementType(name); if (sd().rank()) { if (!e) e = completeRankStem(name); } if (!e) e = lookupCreateUndefinedElement(name, currentLocation(), currentDtdNonConst(), (implydefElement() != Sd::implydefElementAnyother)); parseEndTagClose(); return new (eventAllocator()) EndElementEvent(e, currentDtdPointer(), markupLocation(), markup); } void Parser::parseEndTagClose() { for (;;) { Token token = getToken(tagMode); switch (token) { case tokenUnrecognized: if (!reportNonSgmlCharacter()) message(ParserMessages::endTagCharacter, StringMessageArg(currentToken())); return; case tokenEe: message(ParserMessages::endTagEntityEnd); return; case tokenEtago: case tokenStago: if (!sd().endTagUnclosed()) message(ParserMessages::unclosedEndTagShorttag); currentInput()->ungetToken(); return; case tokenTagc: if (currentMarkup()) currentMarkup()->addDelim(Syntax::dTAGC); return; case tokenS: if (currentMarkup()) currentMarkup()->addS(currentChar()); break; default: message(ParserMessages::endTagInvalidToken, TokenMessageArg(token, tagMode, syntaxPointer(), sdPointer())); return; } } } void Parser::parseEmptyEndTag() { if (options().warnEmptyTag) message(ParserMessages::emptyEndTag); if (!currentDtd().isBase()) message(ParserMessages::emptyEndTagBaseDtd); if (tagLevel() == 0) message(ParserMessages::emptyEndTagNoOpenElements); else { Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(), currentLocation()); if (markup) { markup->addDelim(Syntax::dETAGO); markup->addDelim(Syntax::dTAGC); } acceptEndTag(new (eventAllocator()) EndElementEvent(currentElement().type(), currentDtdPointer(), currentLocation(), markup)); } } void Parser::parseNullEndTag() { // If a null end tag was recognized, then there must be a net enabling // element on the stack. for (;;) { ASSERT(tagLevel() > 0); if (currentElement().netEnabling()) break; if (!currentElement().isFinished() && validate()) message(ParserMessages::elementNotFinished, StringMessageArg(currentElement().type()->name())); implyCurrentElementEnd(currentLocation()); } if (!currentElement().isFinished() && validate()) message(ParserMessages::elementEndTagNotFinished, StringMessageArg(currentElement().type()->name())); Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(), currentLocation()); if (markup) markup->addDelim(Syntax::dNET); acceptEndTag(new (eventAllocator()) EndElementEvent(currentElement().type(), currentDtdPointer(), currentLocation(), markup)); } void Parser::endAllElements() { while (tagLevel() > 0) { if (!currentElement().isFinished()) message(ParserMessages::elementNotFinishedDocumentEnd, StringMessageArg(currentElement().type()->name())); implyCurrentElementEnd(currentLocation()); } if (!currentElement().isFinished() && validate()) message(ParserMessages::noDocumentElement); } void Parser::acceptEndTag(EndElementEvent *event) { const ElementType *e = event->elementType(); if (!elementIsOpen(e)) { message(ParserMessages::elementNotOpen, StringMessageArg(e->name())); delete event; return; } for (;;){ if (currentElement().type() == e) break; if (!currentElement().isFinished() && validate()) message(ParserMessages::elementNotFinished, StringMessageArg(currentElement().type()->name())); implyCurrentElementEnd(event->location()); } if (!currentElement().isFinished() && validate()) message(ParserMessages::elementEndTagNotFinished, StringMessageArg(currentElement().type()->name())); if (currentElement().included()) event->setIncluded(); noteEndElement(event->included()); eventHandler().endElement(event); popElement(); } void Parser::implyCurrentElementEnd(const Location &loc) { if (!sd().omittag()) message(ParserMessages::omitEndTagOmittag, StringMessageArg(currentElement().type()->name()), currentElement().startLocation()); else { const ElementDefinition *def = currentElement().type()->definition(); if (def && !def->canOmitEndTag()) message(ParserMessages::omitEndTagDeclare, StringMessageArg(currentElement().type()->name()), currentElement().startLocation()); } EndElementEvent *event = new (eventAllocator()) EndElementEvent(currentElement().type(), currentDtdPointer(), loc, 0); if (currentElement().included()) event->setIncluded(); noteEndElement(event->included()); eventHandler().endElement(event); popElement(); } void Parser::extendData() { XcharMap isNormal(normalMap()); InputSource *in = currentInput(); size_t length = in->currentTokenLength(); // This is one of the parser's inner loops, so it needs to be fast. while (isNormal[in->tokenCharInBuffer(messenger())]) length++; in->endToken(length); } void Parser::extendContentS() { InputSource *in = currentInput(); size_t length = in->currentTokenLength(); XcharMap isNormal(normalMap()); for (;;) { Xchar ch = in->tokenChar(messenger()); if (!syntax().isS(ch) || !isNormal[ch]) break; length++; } in->endToken(length); } void Parser::handleBadStartTag(const ElementType *e, StartElementEvent *event, Boolean netEnabling) { IList undoList; IList eventList; keepMessages(); for (;;) { Vector missing; findMissingTag(e, missing); if (missing.size() == 1) { queueElementEvents(eventList); const ElementType *m = missing[0]; message(ParserMessages::missingElementInferred, StringMessageArg(e->name()), StringMessageArg(m->name())); AttributeList *attributes = allocAttributeList(m->attributeDef(), 1); // this will give an error if the element has a required attribute attributes->finish(*this); StartElementEvent *inferEvent = new (eventAllocator()) StartElementEvent(m, currentDtdPointer(), attributes, event->location(), 0); if (!currentElement().tryTransition(m)) inferEvent->setIncluded(); pushElementCheck(m, inferEvent, 0); if (!currentElement().tryTransition(e)) event->setIncluded(); pushElementCheck(e, event, netEnabling); return; } if (missing.size() > 0) { queueElementEvents(eventList); Vector missingNames; for (size_t i = 0; i < missing.size(); i++) missingNames.push_back(missing[i]->name()); message(ParserMessages::missingElementMultiple, StringMessageArg(e->name()), StringVectorMessageArg(missingNames)); pushElementCheck(e, event, netEnabling); return; } if (!sd().omittag() || !currentElement().isFinished() || tagLevel() == 0 || !currentElement().type()->definition()->canOmitEndTag()) break; EndElementEvent *endEvent = new (eventAllocator()) EndElementEvent(currentElement().type(), currentDtdPointer(), event->location(), 0); eventList.insert(endEvent); undoList.insert(new (internalAllocator()) UndoEndTag(popSaveElement())); } discardKeptMessages(); undo(undoList); message(ParserMessages::elementNotAllowed, StringMessageArg(e->name())); // If element couldn't occur because it was excluded, then // do the transition here. (void)currentElement().tryTransition(e); pushElementCheck(e, event, netEnabling); } void Parser::findMissingTag(const ElementType *e, Vector &v) { if (!currentElement().currentPosition()) { if (!e) v.push_back((const ElementType *)0); return; } if (elementIsExcluded(e)) return; size_t newSize = 0; currentElement().matchState().possibleTransitions(v); // FIXME also get currentInclusions for (size_t i = 0; i < v.size(); i++) { if (v[i] && !elementIsExcluded(v[i])) { Boolean success = 0; switch (v[i]->definition()->declaredContent()) { case ElementDefinition::modelGroup: { const CompiledModelGroup *grp = v[i]->definition()->compiledModelGroup(); MatchState state(grp); if (!e) { if (state.tryTransitionPcdata()) success = 1; } else { if (state.tryTransition(e)) success = 1; if (!success) { for (size_t j = 0; j < v[i]->definition()->nInclusions(); j++) if (v[i]->definition()->inclusion(j) == e) { success = 1; break; } } if (success) { for (size_t j = 0; j < v[i]->definition()->nExclusions(); j++) if (v[i]->definition()->exclusion(j) == e) { success = 0; break; } } } } break; #if 0 case ElementDefinition::any: success = 1; break; #endif case ElementDefinition::cdata: case ElementDefinition::rcdata: if (e == 0) success = 1; break; default: break; } if (success) v[newSize++] = v[i]; } } v.resize(newSize); // Sort them according to the order of their occurrence in the DTD. // Do an insertion sort. for (size_t i = 1; i < v.size(); i++) { const ElementType *tem = v[i]; size_t j; for (j = i; j > 0 && v[j - 1]->index() > tem->index(); j--) v[j] = v[j - 1]; v[j] = tem; } } #if 0 // This produces messages that are too verbose // This doesn't try to be very efficient. // 0 for #pcdata void Parser::getAllowedElementTypes(Vector &v) { v.clear(); // FIXME get a list of all inclusions first // getCurrentInclusions(v); // x says whether each element of v was excluded Vector x(v.size(), 0); unsigned startImpliedCount = 0; IList undoList; for (;;) { if (currentElement().currentPosition()) { // have a model group size_t i = v.size(); currentElement().matchState().possibleTransitions(v); x.resize(v.size()); for (size_t j = i; j < v.size(); j++) x[j] = (v[j] && elementIsExcluded(v[j])); if (!sd().omittag()) break; // Try to imply a tag if (currentElement().isFinished()) { if (tagLevel() == 0) break; if (startImpliedCount) break; const ElementDefinition *def = currentElement().type()->definition(); if (def && def->canOmitEndTag()) undoList.insert(new (internalAllocator()) UndoEndTag(popSaveElement())); else break; } else { const LeafContentToken *token = currentElement().impliedStartTag(); if (!token) break; const ElementType *e = token->elementType(); if (elementIsExcluded(e)) break; const ElementDefinition *def = e->definition(); if (!def || def->undefined() || (def->declaredContent() != ElementDefinition::modelGroup && def->declaredContent() != ElementDefinition::any) || !def->canOmitStartTag()) break; undoList.insert(new (internalAllocator()) UndoStartTag); startImpliedCount++; pushElement(new (internalAllocator()) OpenElement(e, 0, 0, 0, Location())); if (checkImplyLoop(startImpliedCount)) break; for (size_t i = 0; i < def->nInclusions(); i++) if (!elementIsExcluded(def->inclusion(i))) { v.push_back(def->inclusion(i)); x.push_back(0); } } } else { // must be allowed #pcdata v.push_back((const ElementType *)0); x.push_back((PackedBoolean)0); break; } } undo(undoList); // Remove exclusions and duplicates and undefined size_t newSize = 0; for (size_t i = 0; i < v.size(); i++) if (!x[i] && (!v[i] || !v[i]->definition()->undefined())) { Boolean dup = 0; for (size_t j = 0; j < newSize; j++) if (v[i] == v[j]) { dup = 1; break; } if (!dup) v[newSize++] = v[i]; } v.resize(newSize); } #endif #ifdef SP_NAMESPACE } #endif