1443 lines
40 KiB
C++
1443 lines
40 KiB
C++
|
// Copyright (c) 1994 James Clark, 1999 Matthias Clasen
|
||
|
// See the file COPYING for copying permission.
|
||
|
|
||
|
#include "splib.h"
|
||
|
#include "Parser.h"
|
||
|
#include "ParserMessages.h"
|
||
|
#include "MessageArg.h"
|
||
|
#include "TokenMessageArg.h"
|
||
|
#include "token.h"
|
||
|
#include "macros.h"
|
||
|
|
||
|
#ifdef SP_NAMESPACE
|
||
|
namespace SP_NAMESPACE {
|
||
|
#endif
|
||
|
|
||
|
void Parser::doInstanceStart()
|
||
|
{
|
||
|
if (cancelled()) {
|
||
|
allDone();
|
||
|
return;
|
||
|
}
|
||
|
// FIXME check here that we have a valid dtd
|
||
|
compileInstanceModes();
|
||
|
setPhase(contentPhase);
|
||
|
Token token = getToken(currentMode());
|
||
|
switch (token) {
|
||
|
case tokenEe:
|
||
|
case tokenStagoNameStart:
|
||
|
case tokenStagoTagc:
|
||
|
case tokenStagoGrpo:
|
||
|
case tokenEtagoNameStart:
|
||
|
case tokenEtagoTagc:
|
||
|
case tokenEtagoGrpo:
|
||
|
break;
|
||
|
default:
|
||
|
if (sd().omittag()) {
|
||
|
unsigned startImpliedCount = 0;
|
||
|
unsigned attributeListIndex = 0;
|
||
|
IList<Undo> undoList;
|
||
|
IList<Event> eventList;
|
||
|
if (!tryImplyTag(currentLocation(),
|
||
|
startImpliedCount,
|
||
|
attributeListIndex,
|
||
|
undoList,
|
||
|
eventList))
|
||
|
CANNOT_HAPPEN();
|
||
|
queueElementEvents(eventList);
|
||
|
}
|
||
|
else
|
||
|
message(ParserMessages::instanceStartOmittag);
|
||
|
}
|
||
|
currentInput()->ungetToken();
|
||
|
}
|
||
|
|
||
|
void Parser::endInstance()
|
||
|
{
|
||
|
// Do checking before popping entity stack so that there's a
|
||
|
// current location for error messages.
|
||
|
endAllElements();
|
||
|
while (markedSectionLevel() > 0) {
|
||
|
message(ParserMessages::unclosedMarkedSection,
|
||
|
currentMarkedSectionStartLocation());
|
||
|
endMarkedSection();
|
||
|
}
|
||
|
checkIdrefs();
|
||
|
popInputStack();
|
||
|
allDone();
|
||
|
}
|
||
|
|
||
|
void Parser::checkIdrefs()
|
||
|
{
|
||
|
IdTableIter iter(idTableIter());
|
||
|
Id *id;
|
||
|
while ((id = iter.next()) != 0) {
|
||
|
for (size_t i = 0; i < id->pendingRefs().size(); i++) {
|
||
|
Messenger::setNextLocation(id->pendingRefs()[i]);
|
||
|
message(ParserMessages::missingId, StringMessageArg(id->name()));
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void Parser::doContent()
|
||
|
{
|
||
|
do {
|
||
|
if (cancelled()) {
|
||
|
allDone();
|
||
|
return;
|
||
|
}
|
||
|
Token token = getToken(currentMode());
|
||
|
switch (token) {
|
||
|
case tokenEe:
|
||
|
if (inputLevel() == 1) {
|
||
|
endInstance();
|
||
|
return;
|
||
|
}
|
||
|
if (inputLevel() == specialParseInputLevel()) {
|
||
|
// FIXME have separate messages for each type of special parse
|
||
|
// perhaps force end of marked section or element
|
||
|
message(ParserMessages::specialParseEntityEnd);
|
||
|
}
|
||
|
if (eventsWanted().wantInstanceMarkup())
|
||
|
eventHandler().entityEnd(new (eventAllocator())
|
||
|
EntityEndEvent(currentLocation()));
|
||
|
if (afterDocumentElement())
|
||
|
message(ParserMessages::afterDocumentElementEntityEnd);
|
||
|
if (sd().integrallyStored()
|
||
|
&& tagLevel()
|
||
|
&& currentElement().index() != currentInputElementIndex())
|
||
|
message(ParserMessages::contentAsyncEntityRef);
|
||
|
popInputStack();
|
||
|
break;
|
||
|
case tokenCroDigit:
|
||
|
case tokenHcroHexDigit:
|
||
|
{
|
||
|
if (afterDocumentElement())
|
||
|
message(ParserMessages::characterReferenceAfterDocumentElement);
|
||
|
Char ch;
|
||
|
Location loc;
|
||
|
if (parseNumericCharRef(token == tokenHcroHexDigit, ch, loc)) {
|
||
|
acceptPcdata(loc);
|
||
|
noteData();
|
||
|
Boolean isSgmlChar;
|
||
|
if (!translateNumericCharRef(ch, isSgmlChar))
|
||
|
break;
|
||
|
if (!isSgmlChar) {
|
||
|
eventHandler().nonSgmlChar(new (eventAllocator())
|
||
|
NonSgmlCharEvent(ch, loc));
|
||
|
break;
|
||
|
}
|
||
|
eventHandler().data(new (eventAllocator())
|
||
|
ImmediateDataEvent(Event::characterData,
|
||
|
&ch, 1, loc, 1));
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
break;
|
||
|
case tokenCroNameStart:
|
||
|
if (afterDocumentElement())
|
||
|
message(ParserMessages::characterReferenceAfterDocumentElement);
|
||
|
parseNamedCharRef();
|
||
|
break;
|
||
|
case tokenEroGrpo:
|
||
|
case tokenEroNameStart:
|
||
|
{
|
||
|
if (afterDocumentElement())
|
||
|
message(ParserMessages::entityReferenceAfterDocumentElement);
|
||
|
ConstPtr<Entity> entity;
|
||
|
Ptr<EntityOrigin> origin;
|
||
|
if (parseEntityReference(0, token == tokenEroGrpo, entity, origin)) {
|
||
|
if (!entity.isNull()) {
|
||
|
if (entity->isCharacterData())
|
||
|
acceptPcdata(Location(origin.pointer(), 0));
|
||
|
if (inputLevel() == specialParseInputLevel())
|
||
|
entity->rcdataReference(*this, origin);
|
||
|
else
|
||
|
entity->contentReference(*this, origin);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
break;
|
||
|
case tokenEtagoNameStart:
|
||
|
acceptEndTag(parseEndTag());
|
||
|
break;
|
||
|
case tokenEtagoTagc:
|
||
|
parseEmptyEndTag();
|
||
|
break;
|
||
|
case tokenEtagoGrpo:
|
||
|
parseGroupEndTag();
|
||
|
break;
|
||
|
case tokenMdoNameStart:
|
||
|
if (startMarkup(eventsWanted().wantInstanceMarkup(), currentLocation()))
|
||
|
currentMarkup()->addDelim(Syntax::dMDO);
|
||
|
Syntax::ReservedName name;
|
||
|
Boolean result;
|
||
|
unsigned startLevel;
|
||
|
startLevel = inputLevel();
|
||
|
if (parseDeclarationName(&name)) {
|
||
|
switch (name) {
|
||
|
case Syntax::rUSEMAP:
|
||
|
if (afterDocumentElement())
|
||
|
message(ParserMessages::declarationAfterDocumentElement,
|
||
|
StringMessageArg(syntax().reservedName(name)));
|
||
|
result = parseUsemapDecl();
|
||
|
break;
|
||
|
case Syntax::rUSELINK:
|
||
|
if (afterDocumentElement())
|
||
|
message(ParserMessages::declarationAfterDocumentElement,
|
||
|
StringMessageArg(syntax().reservedName(name)));
|
||
|
result = parseUselinkDecl();
|
||
|
break;
|
||
|
case Syntax::rDOCTYPE:
|
||
|
case Syntax::rLINKTYPE:
|
||
|
case Syntax::rELEMENT:
|
||
|
case Syntax::rATTLIST:
|
||
|
case Syntax::rENTITY:
|
||
|
case Syntax::rNOTATION:
|
||
|
case Syntax::rSHORTREF:
|
||
|
case Syntax::rLINK:
|
||
|
case Syntax::rIDLINK:
|
||
|
message(ParserMessages::instanceDeclaration,
|
||
|
StringMessageArg(syntax().reservedName(name)));
|
||
|
result = 0;
|
||
|
break;
|
||
|
default:
|
||
|
message(ParserMessages::noSuchDeclarationType,
|
||
|
StringMessageArg(syntax().reservedName(name)));
|
||
|
result = 0;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
result = 0;
|
||
|
if (!result)
|
||
|
skipDeclaration(startLevel);
|
||
|
noteMarkup();
|
||
|
break;
|
||
|
case tokenMdoMdc:
|
||
|
// empty comment
|
||
|
emptyCommentDecl();
|
||
|
noteMarkup();
|
||
|
break;
|
||
|
case tokenMdoCom:
|
||
|
parseCommentDecl();
|
||
|
noteMarkup();
|
||
|
break;
|
||
|
case tokenMdoDso:
|
||
|
if (afterDocumentElement())
|
||
|
message(ParserMessages::markedSectionAfterDocumentElement);
|
||
|
parseMarkedSectionDeclStart();
|
||
|
noteMarkup();
|
||
|
break;
|
||
|
case tokenMscMdc:
|
||
|
handleMarkedSectionEnd();
|
||
|
noteMarkup();
|
||
|
break;
|
||
|
case tokenNet:
|
||
|
parseNullEndTag();
|
||
|
break;
|
||
|
case tokenPio:
|
||
|
parseProcessingInstruction();
|
||
|
break;
|
||
|
case tokenStagoNameStart:
|
||
|
parseStartTag();
|
||
|
break;
|
||
|
case tokenStagoTagc:
|
||
|
parseEmptyStartTag();
|
||
|
break;
|
||
|
case tokenStagoGrpo:
|
||
|
parseGroupStartTag();
|
||
|
break;
|
||
|
case tokenRe:
|
||
|
acceptPcdata(currentLocation());
|
||
|
queueRe(currentLocation());
|
||
|
break;
|
||
|
case tokenRs:
|
||
|
acceptPcdata(currentLocation());
|
||
|
noteRs();
|
||
|
if (eventsWanted().wantInstanceMarkup())
|
||
|
eventHandler().ignoredRs(new (eventAllocator())
|
||
|
IgnoredRsEvent(currentChar(),
|
||
|
currentLocation()));
|
||
|
break;
|
||
|
case tokenS:
|
||
|
extendContentS();
|
||
|
if (eventsWanted().wantInstanceMarkup())
|
||
|
eventHandler().sSep(new (eventAllocator())
|
||
|
SSepEvent(currentInput()->currentTokenStart(),
|
||
|
currentInput()->currentTokenLength(),
|
||
|
currentLocation(),
|
||
|
0));
|
||
|
break;
|
||
|
case tokenIgnoredChar:
|
||
|
extendData();
|
||
|
if (eventsWanted().wantMarkedSections())
|
||
|
eventHandler().ignoredChars(new (eventAllocator())
|
||
|
IgnoredCharsEvent(currentInput()->currentTokenStart(),
|
||
|
currentInput()->currentTokenLength(),
|
||
|
currentLocation(),
|
||
|
0));
|
||
|
break;
|
||
|
case tokenUnrecognized:
|
||
|
reportNonSgmlCharacter();
|
||
|
parsePcdata();
|
||
|
break;
|
||
|
case tokenCharDelim:
|
||
|
message(ParserMessages::dataCharDelim,
|
||
|
StringMessageArg(StringC(currentInput()->currentTokenStart(),
|
||
|
currentInput()->currentTokenLength())));
|
||
|
// fall through
|
||
|
case tokenChar:
|
||
|
parsePcdata();
|
||
|
break;
|
||
|
default:
|
||
|
ASSERT(token >= tokenFirstShortref);
|
||
|
handleShortref(token - tokenFirstShortref);
|
||
|
break;
|
||
|
}
|
||
|
} while (eventQueueEmpty());
|
||
|
}
|
||
|
|
||
|
void Parser::skipDeclaration(unsigned startLevel)
|
||
|
{
|
||
|
const unsigned skipMax = 250;
|
||
|
unsigned skipCount = 0;
|
||
|
for (;;) {
|
||
|
Token token = getToken(mdMode);
|
||
|
if (inputLevel() == startLevel)
|
||
|
skipCount++;
|
||
|
switch (token) {
|
||
|
case tokenUnrecognized:
|
||
|
(void)getChar();
|
||
|
break;
|
||
|
case tokenEe:
|
||
|
if (inputLevel() <= startLevel)
|
||
|
return;
|
||
|
popInputStack();
|
||
|
return;
|
||
|
case tokenMdc:
|
||
|
if (inputLevel() == startLevel)
|
||
|
return;
|
||
|
break;
|
||
|
case tokenS:
|
||
|
if (inputLevel() == startLevel && skipCount >= skipMax
|
||
|
&& currentChar() == syntax().standardFunction(Syntax::fRE))
|
||
|
return;
|
||
|
break;
|
||
|
default:
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void Parser::handleShortref(int index)
|
||
|
{
|
||
|
const ConstPtr<Entity> &entity
|
||
|
= currentElement().map()->entity(index);
|
||
|
if (!entity.isNull()) {
|
||
|
Owner<Markup> markupPtr;
|
||
|
if (eventsWanted().wantInstanceMarkup()) {
|
||
|
markupPtr = new Markup;
|
||
|
markupPtr->addShortref(currentInput());
|
||
|
}
|
||
|
Ptr<EntityOrigin> origin
|
||
|
= EntityOrigin::make(internalAllocator(),
|
||
|
entity,
|
||
|
currentLocation(),
|
||
|
currentInput()->currentTokenLength(),
|
||
|
markupPtr);
|
||
|
entity->contentReference(*this, origin);
|
||
|
return;
|
||
|
}
|
||
|
InputSource *in = currentInput();
|
||
|
size_t length = in->currentTokenLength();
|
||
|
const Char *s = in->currentTokenStart();
|
||
|
size_t i = 0;
|
||
|
if (currentMode() == econMode || currentMode() == econnetMode) {
|
||
|
// FIXME do this in advance (what about B sequence?)
|
||
|
for (i = 0; i < length && syntax().isS(s[i]); i++)
|
||
|
;
|
||
|
if (i > 0 && eventsWanted().wantInstanceMarkup())
|
||
|
eventHandler().sSep(new (eventAllocator())
|
||
|
SSepEvent(s, i, currentLocation(), 0));
|
||
|
}
|
||
|
if (i < length) {
|
||
|
Location location(currentLocation());
|
||
|
location += i;
|
||
|
s += i;
|
||
|
length -= i;
|
||
|
acceptPcdata(location);
|
||
|
if (sd().keeprsre()) {
|
||
|
noteData();
|
||
|
eventHandler().data(new (eventAllocator())
|
||
|
ImmediateDataEvent(Event::characterData, s, length,
|
||
|
location, 0));
|
||
|
return;
|
||
|
}
|
||
|
// FIXME speed this up
|
||
|
for (; length > 0; location += 1, length--, s++) {
|
||
|
if (*s == syntax().standardFunction(Syntax::fRS)) {
|
||
|
noteRs();
|
||
|
if (eventsWanted().wantInstanceMarkup())
|
||
|
eventHandler().ignoredRs(new (eventAllocator())
|
||
|
IgnoredRsEvent(*s, location));
|
||
|
}
|
||
|
else if (*s == syntax().standardFunction(Syntax::fRE))
|
||
|
queueRe(location);
|
||
|
else {
|
||
|
noteData();
|
||
|
eventHandler().data(new (eventAllocator())
|
||
|
ImmediateDataEvent(Event::characterData, s, 1,
|
||
|
location, 0));
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void Parser::parsePcdata()
|
||
|
{
|
||
|
extendData();
|
||
|
acceptPcdata(currentLocation());
|
||
|
noteData();
|
||
|
eventHandler().data(new (eventAllocator())
|
||
|
ImmediateDataEvent(Event::characterData,
|
||
|
currentInput()->currentTokenStart(),
|
||
|
currentInput()->currentTokenLength(),
|
||
|
currentLocation(),
|
||
|
0));
|
||
|
}
|
||
|
|
||
|
void Parser::parseStartTag()
|
||
|
{
|
||
|
InputSource *in = currentInput();
|
||
|
Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
|
||
|
in->currentLocation());
|
||
|
if (markup)
|
||
|
markup->addDelim(Syntax::dSTAGO);
|
||
|
Boolean netEnabling;
|
||
|
StartElementEvent *event = doParseStartTag(netEnabling);
|
||
|
acceptStartTag(event->elementType(), event, netEnabling);
|
||
|
}
|
||
|
|
||
|
StartElementEvent *Parser::doParseStartTag(Boolean &netEnabling)
|
||
|
{
|
||
|
Markup *markup = currentMarkup();
|
||
|
InputSource *in = currentInput();
|
||
|
in->discardInitial();
|
||
|
extendNameToken(syntax().namelen(), ParserMessages::nameLength);
|
||
|
if (markup)
|
||
|
markup->addName(in);
|
||
|
StringC &name = nameBuffer();
|
||
|
getCurrentToken(syntax().generalSubstTable(), name);
|
||
|
ElementType *e = currentDtdNonConst().lookupElementType(name);
|
||
|
if (sd().rank()) {
|
||
|
if (!e)
|
||
|
e = completeRankStem(name);
|
||
|
else if (e->isRankedElement())
|
||
|
handleRankedElement(e);
|
||
|
}
|
||
|
if (!e)
|
||
|
e = lookupCreateUndefinedElement(name, currentLocation(), currentDtdNonConst(), (implydefElement() != Sd::implydefElementAnyother));
|
||
|
AttributeList *attributes = allocAttributeList(e->attributeDef(), 0);
|
||
|
Token closeToken = getToken(tagMode);
|
||
|
if (closeToken == tokenTagc) {
|
||
|
if (name.size() > syntax().taglen())
|
||
|
checkTaglen(markupLocation().index());
|
||
|
attributes->finish(*this);
|
||
|
netEnabling = 0;
|
||
|
if (markup)
|
||
|
markup->addDelim(Syntax::dTAGC);
|
||
|
}
|
||
|
else {
|
||
|
in->ungetToken();
|
||
|
Ptr<AttributeDefinitionList> newAttDef;
|
||
|
if (parseAttributeSpec(tagMode, *attributes, netEnabling, newAttDef)) {
|
||
|
// The difference between the indices will be the difference
|
||
|
// in offsets plus 1 for each named character reference.
|
||
|
if (in->currentLocation().index() - markupLocation().index()
|
||
|
> syntax().taglen())
|
||
|
checkTaglen(markupLocation().index());
|
||
|
}
|
||
|
else
|
||
|
netEnabling = 0;
|
||
|
if (!newAttDef.isNull()) {
|
||
|
newAttDef->setIndex(currentDtdNonConst().allocAttributeDefinitionListIndex());
|
||
|
e->setAttributeDef(newAttDef);
|
||
|
}
|
||
|
}
|
||
|
return new (eventAllocator()) StartElementEvent(e,
|
||
|
currentDtdPointer(),
|
||
|
attributes,
|
||
|
markupLocation(),
|
||
|
markup);
|
||
|
}
|
||
|
|
||
|
ElementType *Parser::completeRankStem(const StringC &name)
|
||
|
{
|
||
|
const RankStem *rankStem = currentDtd().lookupRankStem(name);
|
||
|
if (rankStem) {
|
||
|
StringC name(rankStem->name());
|
||
|
if (!appendCurrentRank(name, rankStem))
|
||
|
message(ParserMessages::noCurrentRank, StringMessageArg(name));
|
||
|
else
|
||
|
return currentDtdNonConst().lookupElementType(name);
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
void Parser::handleRankedElement(const ElementType *e)
|
||
|
{
|
||
|
StringC rankSuffix(e->definition()->rankSuffix());
|
||
|
const RankStem *rankStem = e->rankedElementRankStem();
|
||
|
for (size_t i = 0; i < rankStem->nDefinitions(); i++) {
|
||
|
const ElementDefinition *def = rankStem->definition(i);
|
||
|
for (size_t j = 0; j < def->nRankStems(); j++)
|
||
|
setCurrentRank(def->rankStem(j), rankSuffix);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void Parser::checkTaglen(Index tagStartIndex)
|
||
|
{
|
||
|
const InputSourceOrigin *origin
|
||
|
= currentLocation().origin()->asInputSourceOrigin();
|
||
|
ASSERT(origin != 0);
|
||
|
if (origin->startOffset(currentLocation().index())
|
||
|
- origin->startOffset(tagStartIndex
|
||
|
+ syntax().delimGeneral(Syntax::dSTAGO).size())
|
||
|
> syntax().taglen())
|
||
|
message(ParserMessages::taglen, NumberMessageArg(syntax().taglen()));
|
||
|
}
|
||
|
|
||
|
void Parser::parseEmptyStartTag()
|
||
|
{
|
||
|
if (options().warnEmptyTag)
|
||
|
message(ParserMessages::emptyStartTag);
|
||
|
if (!currentDtd().isBase())
|
||
|
message(ParserMessages::emptyStartTagBaseDtd);
|
||
|
const ElementType *e = 0;
|
||
|
if (!sd().omittag())
|
||
|
e = lastEndedElementType();
|
||
|
else if (tagLevel() > 0)
|
||
|
e = currentElement().type();
|
||
|
if (!e)
|
||
|
e = currentDtd().documentElementType();
|
||
|
AttributeList *attributes = allocAttributeList(e->attributeDef(), 0);
|
||
|
attributes->finish(*this);
|
||
|
Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
|
||
|
currentLocation());
|
||
|
if (markup) {
|
||
|
markup->addDelim(Syntax::dSTAGO);
|
||
|
markup->addDelim(Syntax::dTAGC);
|
||
|
}
|
||
|
acceptStartTag(e,
|
||
|
new (eventAllocator())
|
||
|
StartElementEvent(e,
|
||
|
currentDtdPointer(),
|
||
|
attributes,
|
||
|
markupLocation(),
|
||
|
markup),
|
||
|
0);
|
||
|
}
|
||
|
|
||
|
void Parser::parseGroupStartTag()
|
||
|
{
|
||
|
InputSource *in = currentInput();
|
||
|
if (startMarkup(eventsWanted().wantInstanceMarkup(), currentLocation())) {
|
||
|
currentMarkup()->addDelim(Syntax::dSTAGO);
|
||
|
currentMarkup()->addDelim(Syntax::dGRPO);
|
||
|
}
|
||
|
Boolean active;
|
||
|
if (!parseTagNameGroup(active, 1))
|
||
|
return;
|
||
|
in->startToken();
|
||
|
// Location startLocation = in->currentLocation();
|
||
|
Xchar c = in->tokenChar(messenger());
|
||
|
if (!syntax().isNameStartCharacter(c)) {
|
||
|
message(ParserMessages::startTagMissingName);
|
||
|
return;
|
||
|
}
|
||
|
if (active) {
|
||
|
Boolean netEnabling;
|
||
|
StartElementEvent *event = doParseStartTag(netEnabling);
|
||
|
if (netEnabling)
|
||
|
message(ParserMessages::startTagGroupNet);
|
||
|
acceptStartTag(event->elementType(), event, netEnabling);
|
||
|
}
|
||
|
else {
|
||
|
in->discardInitial();
|
||
|
|
||
|
extendNameToken(syntax().namelen(), ParserMessages::nameLength);
|
||
|
if (currentMarkup())
|
||
|
currentMarkup()->addName(currentInput());
|
||
|
skipAttributeSpec();
|
||
|
if (currentMarkup())
|
||
|
eventHandler().ignoredMarkup(new (eventAllocator())
|
||
|
IgnoredMarkupEvent(markupLocation(),
|
||
|
currentMarkup()));
|
||
|
noteMarkup();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void Parser::parseGroupEndTag()
|
||
|
{
|
||
|
InputSource *in = currentInput();
|
||
|
if (startMarkup(eventsWanted().wantInstanceMarkup(), currentLocation())) {
|
||
|
currentMarkup()->addDelim(Syntax::dSTAGO);
|
||
|
currentMarkup()->addDelim(Syntax::dGRPO);
|
||
|
}
|
||
|
Boolean active;
|
||
|
if (!parseTagNameGroup(active, 0))
|
||
|
return;
|
||
|
in->startToken();
|
||
|
// Location startLocation = in->currentLocation();
|
||
|
Xchar c = in->tokenChar(messenger());
|
||
|
if (!syntax().isNameStartCharacter(c)) {
|
||
|
message(ParserMessages::endTagMissingName);
|
||
|
return;
|
||
|
}
|
||
|
if (active)
|
||
|
acceptEndTag(doParseEndTag());
|
||
|
else {
|
||
|
in->discardInitial();
|
||
|
extendNameToken(syntax().namelen(), ParserMessages::nameLength);
|
||
|
if (currentMarkup())
|
||
|
currentMarkup()->addName(currentInput());
|
||
|
parseEndTagClose();
|
||
|
if (currentMarkup())
|
||
|
eventHandler().ignoredMarkup(new (eventAllocator())
|
||
|
IgnoredMarkupEvent(markupLocation(),
|
||
|
currentMarkup()));
|
||
|
noteMarkup();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void Parser::acceptPcdata(const Location &startLocation)
|
||
|
{
|
||
|
if (currentElement().tryTransitionPcdata())
|
||
|
return;
|
||
|
// Need to test here since implying tags may turn off pcdataRecovering.
|
||
|
if (pcdataRecovering())
|
||
|
return;
|
||
|
IList<Undo> undoList;
|
||
|
IList<Event> eventList;
|
||
|
unsigned startImpliedCount = 0;
|
||
|
unsigned attributeListIndex = 0;
|
||
|
keepMessages();
|
||
|
while (tryImplyTag(startLocation, startImpliedCount, attributeListIndex,
|
||
|
undoList, eventList))
|
||
|
if (currentElement().tryTransitionPcdata()) {
|
||
|
queueElementEvents(eventList);
|
||
|
return;
|
||
|
}
|
||
|
discardKeptMessages();
|
||
|
undo(undoList);
|
||
|
if (validate() || afterDocumentElement())
|
||
|
message(ParserMessages::pcdataNotAllowed);
|
||
|
pcdataRecover();
|
||
|
}
|
||
|
|
||
|
void Parser::acceptStartTag(const ElementType *e,
|
||
|
StartElementEvent *event,
|
||
|
Boolean netEnabling)
|
||
|
{
|
||
|
if (e->definition()->undefined() && (implydefElement() == Sd::implydefElementNo))
|
||
|
message(ParserMessages::undefinedElement, StringMessageArg(e->name()));
|
||
|
if (elementIsExcluded(e)) {
|
||
|
keepMessages();
|
||
|
if (validate())
|
||
|
checkExclusion(e);
|
||
|
}
|
||
|
else {
|
||
|
if (currentElement().tryTransition(e)) {
|
||
|
pushElementCheck(e, event, netEnabling);
|
||
|
return;
|
||
|
}
|
||
|
if (elementIsIncluded(e)) {
|
||
|
event->setIncluded();
|
||
|
pushElementCheck(e, event, netEnabling);
|
||
|
return;
|
||
|
}
|
||
|
keepMessages();
|
||
|
}
|
||
|
IList<Undo> undoList;
|
||
|
IList<Event> eventList;
|
||
|
unsigned startImpliedCount = 0;
|
||
|
unsigned attributeListIndex = 1;
|
||
|
while (tryImplyTag(event->location(), startImpliedCount,
|
||
|
attributeListIndex, undoList, eventList))
|
||
|
if (tryStartTag(e, event, netEnabling, eventList))
|
||
|
return;
|
||
|
discardKeptMessages();
|
||
|
undo(undoList);
|
||
|
if (validate() && !e->definition()->undefined())
|
||
|
handleBadStartTag(e, event, netEnabling);
|
||
|
else {
|
||
|
if (validate() ? (implydefElement() != Sd::implydefElementNo) : afterDocumentElement())
|
||
|
message(ParserMessages::elementNotAllowed, StringMessageArg(e->name()));
|
||
|
// If element couldn't occur because it was excluded, then
|
||
|
// do the transition here.
|
||
|
(void)currentElement().tryTransition(e);
|
||
|
pushElementCheck(e, event, netEnabling);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void Parser::undo(IList<Undo> &undoList)
|
||
|
{
|
||
|
while (!undoList.empty()) {
|
||
|
Undo *p = undoList.get();
|
||
|
p->undo(this);
|
||
|
delete p;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void Parser::queueElementEvents(IList<Event> &events)
|
||
|
{
|
||
|
releaseKeptMessages();
|
||
|
// FIXME provide IList<T>::reverse function
|
||
|
// reverse it
|
||
|
IList<Event> tem;
|
||
|
while (!events.empty())
|
||
|
tem.insert(events.get());
|
||
|
while (!tem.empty()) {
|
||
|
Event *e = tem.get();
|
||
|
if (e->type() == Event::startElement) {
|
||
|
noteStartElement(((StartElementEvent *)e)->included());
|
||
|
eventHandler().startElement((StartElementEvent *)e);
|
||
|
}
|
||
|
else {
|
||
|
noteEndElement(((EndElementEvent *)e)->included());
|
||
|
eventHandler().endElement((EndElementEvent *)e);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
void Parser::checkExclusion(const ElementType *e)
|
||
|
{
|
||
|
const LeafContentToken *token = currentElement().invalidExclusion(e);
|
||
|
if (token)
|
||
|
message(ParserMessages::invalidExclusion,
|
||
|
OrdinalMessageArg(token->typeIndex() + 1),
|
||
|
StringMessageArg(token->elementType()->name()),
|
||
|
StringMessageArg(currentElement().type()->name()));
|
||
|
}
|
||
|
|
||
|
Boolean Parser::tryStartTag(const ElementType *e,
|
||
|
StartElementEvent *event,
|
||
|
Boolean netEnabling,
|
||
|
IList<Event> &impliedEvents)
|
||
|
{
|
||
|
if (elementIsExcluded(e)) {
|
||
|
checkExclusion(e);
|
||
|
return 0;
|
||
|
}
|
||
|
if (currentElement().tryTransition(e)) {
|
||
|
queueElementEvents(impliedEvents);
|
||
|
pushElementCheck(e, event, netEnabling);
|
||
|
return 1;
|
||
|
}
|
||
|
if (elementIsIncluded(e)) {
|
||
|
queueElementEvents(impliedEvents);
|
||
|
event->setIncluded();
|
||
|
pushElementCheck(e, event, netEnabling);
|
||
|
return 1;
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
Boolean Parser::tryImplyTag(const Location &loc,
|
||
|
unsigned &startImpliedCount,
|
||
|
unsigned &attributeListIndex,
|
||
|
IList<Undo> &undo,
|
||
|
IList<Event> &eventList)
|
||
|
{
|
||
|
if (!sd().omittag())
|
||
|
return 0;
|
||
|
if (currentElement().isFinished()) {
|
||
|
if (tagLevel() == 0)
|
||
|
return 0;
|
||
|
#if 1
|
||
|
const ElementDefinition *def = currentElement().type()->definition();
|
||
|
if (def && !def->canOmitEndTag())
|
||
|
return 0;
|
||
|
#endif
|
||
|
// imply an end tag
|
||
|
if (startImpliedCount > 0) {
|
||
|
message(ParserMessages::startTagEmptyElement,
|
||
|
StringMessageArg(currentElement().type()->name()));
|
||
|
startImpliedCount--;
|
||
|
}
|
||
|
#if 0
|
||
|
const ElementDefinition *def = currentElement().type()->definition();
|
||
|
if (def && !def->canOmitEndTag())
|
||
|
message(ParserMessages::omitEndTagDeclare,
|
||
|
StringMessageArg(currentElement().type()->name()),
|
||
|
currentElement().startLocation());
|
||
|
#endif
|
||
|
EndElementEvent *event
|
||
|
= new (eventAllocator()) EndElementEvent(currentElement().type(),
|
||
|
currentDtdPointer(),
|
||
|
loc,
|
||
|
0);
|
||
|
eventList.insert(event);
|
||
|
undo.insert(new (internalAllocator()) UndoEndTag(popSaveElement()));
|
||
|
return 1;
|
||
|
}
|
||
|
const LeafContentToken *token = currentElement().impliedStartTag();
|
||
|
if (!token)
|
||
|
return 0;
|
||
|
const ElementType *e = token->elementType();
|
||
|
if (elementIsExcluded(e))
|
||
|
message(ParserMessages::requiredElementExcluded,
|
||
|
OrdinalMessageArg(token->typeIndex() + 1),
|
||
|
StringMessageArg(e->name()),
|
||
|
StringMessageArg(currentElement().type()->name()));
|
||
|
if (tagLevel() != 0)
|
||
|
undo.insert(new (internalAllocator())
|
||
|
UndoTransition(currentElement().matchState()));
|
||
|
currentElement().doRequiredTransition();
|
||
|
const ElementDefinition *def = e->definition();
|
||
|
if (def->declaredContent() != ElementDefinition::modelGroup
|
||
|
&& def->declaredContent() != ElementDefinition::any)
|
||
|
message(ParserMessages::omitStartTagDeclaredContent,
|
||
|
StringMessageArg(e->name()));
|
||
|
if (def->undefined())
|
||
|
message(ParserMessages::undefinedElement, StringMessageArg(e->name()));
|
||
|
else if (!def->canOmitStartTag())
|
||
|
message(ParserMessages::omitStartTagDeclare, StringMessageArg(e->name()));
|
||
|
AttributeList *attributes
|
||
|
= allocAttributeList(e->attributeDef(),
|
||
|
attributeListIndex++);
|
||
|
// this will give an error if the element has a required attribute
|
||
|
attributes->finish(*this);
|
||
|
startImpliedCount++;
|
||
|
StartElementEvent *event
|
||
|
= new (eventAllocator()) StartElementEvent(e,
|
||
|
currentDtdPointer(),
|
||
|
attributes,
|
||
|
loc,
|
||
|
0);
|
||
|
pushElementCheck(e, event, undo, eventList);
|
||
|
const int implyCheckLimit = 30; // this is fairly arbitrary
|
||
|
if (startImpliedCount > implyCheckLimit
|
||
|
&& !checkImplyLoop(startImpliedCount))
|
||
|
return 0;
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
void Parser::pushElementCheck(const ElementType *e, StartElementEvent *event,
|
||
|
Boolean netEnabling)
|
||
|
{
|
||
|
if (tagLevel() == syntax().taglvl())
|
||
|
message(ParserMessages::taglvlOpenElements, NumberMessageArg(syntax().taglvl()));
|
||
|
noteStartElement(event->included());
|
||
|
if (event->mustOmitEnd()) {
|
||
|
if (sd().emptyElementNormal()) {
|
||
|
Boolean included = event->included();
|
||
|
Location loc(event->location());
|
||
|
eventHandler().startElement(event);
|
||
|
endTagEmptyElement(e, netEnabling, included, loc);
|
||
|
}
|
||
|
else {
|
||
|
EndElementEvent *end
|
||
|
= new (eventAllocator()) EndElementEvent(e,
|
||
|
currentDtdPointer(),
|
||
|
event->location(),
|
||
|
0);
|
||
|
if (event->included()) {
|
||
|
end->setIncluded();
|
||
|
noteEndElement(1);
|
||
|
}
|
||
|
else
|
||
|
noteEndElement(0);
|
||
|
eventHandler().startElement(event);
|
||
|
eventHandler().endElement(end);
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
const ShortReferenceMap *map = e->map();
|
||
|
if (!map)
|
||
|
map = currentElement().map();
|
||
|
if (options().warnImmediateRecursion
|
||
|
&& e == currentElement().type())
|
||
|
message(ParserMessages::immediateRecursion);
|
||
|
pushElement(new (internalAllocator()) OpenElement(e,
|
||
|
netEnabling,
|
||
|
event->included(),
|
||
|
map,
|
||
|
event->location()));
|
||
|
// Can't access event after it's passed to the event handler.
|
||
|
eventHandler().startElement(event);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void Parser::endTagEmptyElement(const ElementType *e,
|
||
|
Boolean netEnabling,
|
||
|
Boolean included,
|
||
|
const Location &startLoc)
|
||
|
{
|
||
|
Token token = getToken(netEnabling ? econnetMode : econMode);
|
||
|
switch (token) {
|
||
|
case tokenNet:
|
||
|
if (netEnabling) {
|
||
|
Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
|
||
|
currentLocation());
|
||
|
if (markup)
|
||
|
markup->addDelim(Syntax::dNET);
|
||
|
EndElementEvent *end
|
||
|
= new (eventAllocator()) EndElementEvent(e,
|
||
|
currentDtdPointer(),
|
||
|
currentLocation(),
|
||
|
markup);
|
||
|
if (included)
|
||
|
end->setIncluded();
|
||
|
eventHandler().endElement(end);
|
||
|
noteEndElement(included);
|
||
|
return;
|
||
|
}
|
||
|
break;
|
||
|
case tokenEtagoTagc:
|
||
|
{
|
||
|
if (options().warnEmptyTag)
|
||
|
message(ParserMessages::emptyEndTag);
|
||
|
Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
|
||
|
currentLocation());
|
||
|
if (markup) {
|
||
|
markup->addDelim(Syntax::dETAGO);
|
||
|
markup->addDelim(Syntax::dTAGC);
|
||
|
}
|
||
|
EndElementEvent *end
|
||
|
= new (eventAllocator()) EndElementEvent(e,
|
||
|
currentDtdPointer(),
|
||
|
currentLocation(),
|
||
|
markup);
|
||
|
if (included)
|
||
|
end->setIncluded();
|
||
|
eventHandler().endElement(end);
|
||
|
noteEndElement(included);
|
||
|
return;
|
||
|
}
|
||
|
case tokenEtagoNameStart:
|
||
|
{
|
||
|
EndElementEvent *end = parseEndTag();
|
||
|
if (end->elementType() == e) {
|
||
|
if (included)
|
||
|
end->setIncluded();
|
||
|
eventHandler().endElement(end);
|
||
|
noteEndElement(included);
|
||
|
return;
|
||
|
}
|
||
|
if (!elementIsOpen(end->elementType())) {
|
||
|
message(ParserMessages::elementNotOpen,
|
||
|
StringMessageArg(end->elementType()->name()));
|
||
|
delete end;
|
||
|
break;
|
||
|
}
|
||
|
implyEmptyElementEnd(e, included, startLoc);
|
||
|
acceptEndTag(end);
|
||
|
return;
|
||
|
}
|
||
|
default:
|
||
|
break;
|
||
|
}
|
||
|
implyEmptyElementEnd(e, included, startLoc);
|
||
|
currentInput()->ungetToken();
|
||
|
}
|
||
|
|
||
|
void Parser::implyEmptyElementEnd(const ElementType *e,
|
||
|
Boolean included,
|
||
|
const Location &startLoc)
|
||
|
{
|
||
|
if (!sd().omittag())
|
||
|
message(ParserMessages::omitEndTagOmittag,
|
||
|
StringMessageArg(e->name()),
|
||
|
startLoc);
|
||
|
else {
|
||
|
const ElementDefinition *def = e->definition();
|
||
|
if (def && !def->canOmitEndTag())
|
||
|
message(ParserMessages::omitEndTagDeclare,
|
||
|
StringMessageArg(e->name()),
|
||
|
startLoc);
|
||
|
}
|
||
|
EndElementEvent *end
|
||
|
= new (eventAllocator()) EndElementEvent(e,
|
||
|
currentDtdPointer(),
|
||
|
currentLocation(),
|
||
|
0);
|
||
|
if (included)
|
||
|
end->setIncluded();
|
||
|
noteEndElement(included);
|
||
|
eventHandler().endElement(end);
|
||
|
}
|
||
|
|
||
|
void Parser::pushElementCheck(const ElementType *e, StartElementEvent *event,
|
||
|
IList<Undo> &undoList,
|
||
|
IList<Event> &eventList)
|
||
|
{
|
||
|
if (tagLevel() == syntax().taglvl())
|
||
|
message(ParserMessages::taglvlOpenElements, NumberMessageArg(syntax().taglvl()));
|
||
|
eventList.insert(event);
|
||
|
if (event->mustOmitEnd()) {
|
||
|
EndElementEvent *end
|
||
|
= new (eventAllocator()) EndElementEvent(e,
|
||
|
currentDtdPointer(),
|
||
|
event->location(),
|
||
|
0);
|
||
|
if (event->included())
|
||
|
end->setIncluded();
|
||
|
eventList.insert(end);
|
||
|
}
|
||
|
else {
|
||
|
undoList.insert(new (internalAllocator()) UndoStartTag);
|
||
|
const ShortReferenceMap *map = e->map();
|
||
|
if (!map)
|
||
|
map = currentElement().map();
|
||
|
pushElement(new (internalAllocator()) OpenElement(e,
|
||
|
0,
|
||
|
event->included(),
|
||
|
map,
|
||
|
event->location()));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
EndElementEvent *Parser::parseEndTag()
|
||
|
{
|
||
|
Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
|
||
|
currentLocation());
|
||
|
if (markup)
|
||
|
markup->addDelim(Syntax::dETAGO);
|
||
|
return doParseEndTag();
|
||
|
}
|
||
|
|
||
|
EndElementEvent *Parser::doParseEndTag()
|
||
|
{
|
||
|
Markup *markup = currentMarkup();
|
||
|
currentInput()->discardInitial();
|
||
|
extendNameToken(syntax().namelen(), ParserMessages::nameLength);
|
||
|
if (markup)
|
||
|
markup->addName(currentInput());
|
||
|
StringC &name = nameBuffer();
|
||
|
getCurrentToken(syntax().generalSubstTable(), name);
|
||
|
const ElementType *e = currentDtd().lookupElementType(name);
|
||
|
if (sd().rank()) {
|
||
|
if (!e)
|
||
|
e = completeRankStem(name);
|
||
|
}
|
||
|
if (!e)
|
||
|
e = lookupCreateUndefinedElement(name, currentLocation(), currentDtdNonConst(), (implydefElement() != Sd::implydefElementAnyother));
|
||
|
parseEndTagClose();
|
||
|
return new (eventAllocator())
|
||
|
EndElementEvent(e,
|
||
|
currentDtdPointer(),
|
||
|
markupLocation(),
|
||
|
markup);
|
||
|
}
|
||
|
|
||
|
void Parser::parseEndTagClose()
|
||
|
{
|
||
|
for (;;) {
|
||
|
Token token = getToken(tagMode);
|
||
|
switch (token) {
|
||
|
case tokenUnrecognized:
|
||
|
if (!reportNonSgmlCharacter())
|
||
|
message(ParserMessages::endTagCharacter, StringMessageArg(currentToken()));
|
||
|
return;
|
||
|
case tokenEe:
|
||
|
message(ParserMessages::endTagEntityEnd);
|
||
|
return;
|
||
|
case tokenEtago:
|
||
|
case tokenStago:
|
||
|
if (!sd().endTagUnclosed())
|
||
|
message(ParserMessages::unclosedEndTagShorttag);
|
||
|
currentInput()->ungetToken();
|
||
|
return;
|
||
|
case tokenTagc:
|
||
|
if (currentMarkup())
|
||
|
currentMarkup()->addDelim(Syntax::dTAGC);
|
||
|
return;
|
||
|
case tokenS:
|
||
|
if (currentMarkup())
|
||
|
currentMarkup()->addS(currentChar());
|
||
|
break;
|
||
|
default:
|
||
|
message(ParserMessages::endTagInvalidToken,
|
||
|
TokenMessageArg(token, tagMode, syntaxPointer(), sdPointer()));
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void Parser::parseEmptyEndTag()
|
||
|
{
|
||
|
if (options().warnEmptyTag)
|
||
|
message(ParserMessages::emptyEndTag);
|
||
|
if (!currentDtd().isBase())
|
||
|
message(ParserMessages::emptyEndTagBaseDtd);
|
||
|
if (tagLevel() == 0)
|
||
|
message(ParserMessages::emptyEndTagNoOpenElements);
|
||
|
else {
|
||
|
Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
|
||
|
currentLocation());
|
||
|
if (markup) {
|
||
|
markup->addDelim(Syntax::dETAGO);
|
||
|
markup->addDelim(Syntax::dTAGC);
|
||
|
}
|
||
|
acceptEndTag(new (eventAllocator()) EndElementEvent(currentElement().type(),
|
||
|
currentDtdPointer(),
|
||
|
currentLocation(),
|
||
|
markup));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void Parser::parseNullEndTag()
|
||
|
{
|
||
|
// If a null end tag was recognized, then there must be a net enabling
|
||
|
// element on the stack.
|
||
|
for (;;) {
|
||
|
ASSERT(tagLevel() > 0);
|
||
|
if (currentElement().netEnabling())
|
||
|
break;
|
||
|
if (!currentElement().isFinished() && validate())
|
||
|
message(ParserMessages::elementNotFinished,
|
||
|
StringMessageArg(currentElement().type()->name()));
|
||
|
implyCurrentElementEnd(currentLocation());
|
||
|
}
|
||
|
if (!currentElement().isFinished() && validate())
|
||
|
message(ParserMessages::elementEndTagNotFinished,
|
||
|
StringMessageArg(currentElement().type()->name()));
|
||
|
Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
|
||
|
currentLocation());
|
||
|
if (markup)
|
||
|
markup->addDelim(Syntax::dNET);
|
||
|
acceptEndTag(new (eventAllocator()) EndElementEvent(currentElement().type(),
|
||
|
currentDtdPointer(),
|
||
|
currentLocation(),
|
||
|
markup));
|
||
|
}
|
||
|
|
||
|
void Parser::endAllElements()
|
||
|
{
|
||
|
while (tagLevel() > 0) {
|
||
|
if (!currentElement().isFinished())
|
||
|
message(ParserMessages::elementNotFinishedDocumentEnd,
|
||
|
StringMessageArg(currentElement().type()->name()));
|
||
|
implyCurrentElementEnd(currentLocation());
|
||
|
}
|
||
|
if (!currentElement().isFinished() && validate())
|
||
|
message(ParserMessages::noDocumentElement);
|
||
|
}
|
||
|
|
||
|
void Parser::acceptEndTag(EndElementEvent *event)
|
||
|
{
|
||
|
const ElementType *e = event->elementType();
|
||
|
if (!elementIsOpen(e)) {
|
||
|
message(ParserMessages::elementNotOpen, StringMessageArg(e->name()));
|
||
|
delete event;
|
||
|
return;
|
||
|
}
|
||
|
for (;;){
|
||
|
if (currentElement().type() == e)
|
||
|
break;
|
||
|
if (!currentElement().isFinished() && validate())
|
||
|
message(ParserMessages::elementNotFinished,
|
||
|
StringMessageArg(currentElement().type()->name()));
|
||
|
implyCurrentElementEnd(event->location());
|
||
|
}
|
||
|
if (!currentElement().isFinished() && validate())
|
||
|
message(ParserMessages::elementEndTagNotFinished,
|
||
|
StringMessageArg(currentElement().type()->name()));
|
||
|
if (currentElement().included())
|
||
|
event->setIncluded();
|
||
|
noteEndElement(event->included());
|
||
|
eventHandler().endElement(event);
|
||
|
popElement();
|
||
|
}
|
||
|
|
||
|
void Parser::implyCurrentElementEnd(const Location &loc)
|
||
|
{
|
||
|
if (!sd().omittag())
|
||
|
message(ParserMessages::omitEndTagOmittag,
|
||
|
StringMessageArg(currentElement().type()->name()),
|
||
|
currentElement().startLocation());
|
||
|
else {
|
||
|
const ElementDefinition *def = currentElement().type()->definition();
|
||
|
if (def && !def->canOmitEndTag())
|
||
|
message(ParserMessages::omitEndTagDeclare,
|
||
|
StringMessageArg(currentElement().type()->name()),
|
||
|
currentElement().startLocation());
|
||
|
}
|
||
|
EndElementEvent *event
|
||
|
= new (eventAllocator()) EndElementEvent(currentElement().type(),
|
||
|
currentDtdPointer(),
|
||
|
loc,
|
||
|
0);
|
||
|
if (currentElement().included())
|
||
|
event->setIncluded();
|
||
|
noteEndElement(event->included());
|
||
|
eventHandler().endElement(event);
|
||
|
popElement();
|
||
|
}
|
||
|
|
||
|
void Parser::extendData()
|
||
|
{
|
||
|
XcharMap<PackedBoolean> isNormal(normalMap());
|
||
|
InputSource *in = currentInput();
|
||
|
size_t length = in->currentTokenLength();
|
||
|
// This is one of the parser's inner loops, so it needs to be fast.
|
||
|
while (isNormal[in->tokenCharInBuffer(messenger())])
|
||
|
length++;
|
||
|
in->endToken(length);
|
||
|
}
|
||
|
|
||
|
void Parser::extendContentS()
|
||
|
{
|
||
|
InputSource *in = currentInput();
|
||
|
size_t length = in->currentTokenLength();
|
||
|
XcharMap<PackedBoolean> isNormal(normalMap());
|
||
|
for (;;) {
|
||
|
Xchar ch = in->tokenChar(messenger());
|
||
|
if (!syntax().isS(ch) || !isNormal[ch])
|
||
|
break;
|
||
|
length++;
|
||
|
}
|
||
|
in->endToken(length);
|
||
|
}
|
||
|
|
||
|
void Parser::handleBadStartTag(const ElementType *e,
|
||
|
StartElementEvent *event,
|
||
|
Boolean netEnabling)
|
||
|
{
|
||
|
IList<Undo> undoList;
|
||
|
IList<Event> eventList;
|
||
|
keepMessages();
|
||
|
for (;;) {
|
||
|
Vector<const ElementType *> missing;
|
||
|
findMissingTag(e, missing);
|
||
|
if (missing.size() == 1) {
|
||
|
queueElementEvents(eventList);
|
||
|
const ElementType *m = missing[0];
|
||
|
message(ParserMessages::missingElementInferred,
|
||
|
StringMessageArg(e->name()),
|
||
|
StringMessageArg(m->name()));
|
||
|
AttributeList *attributes
|
||
|
= allocAttributeList(m->attributeDef(), 1);
|
||
|
// this will give an error if the element has a required attribute
|
||
|
attributes->finish(*this);
|
||
|
StartElementEvent *inferEvent
|
||
|
= new (eventAllocator()) StartElementEvent(m,
|
||
|
currentDtdPointer(),
|
||
|
attributes,
|
||
|
event->location(),
|
||
|
0);
|
||
|
if (!currentElement().tryTransition(m))
|
||
|
inferEvent->setIncluded();
|
||
|
pushElementCheck(m, inferEvent, 0);
|
||
|
if (!currentElement().tryTransition(e))
|
||
|
event->setIncluded();
|
||
|
pushElementCheck(e, event, netEnabling);
|
||
|
return;
|
||
|
}
|
||
|
if (missing.size() > 0) {
|
||
|
queueElementEvents(eventList);
|
||
|
Vector<StringC> missingNames;
|
||
|
for (size_t i = 0; i < missing.size(); i++)
|
||
|
missingNames.push_back(missing[i]->name());
|
||
|
message(ParserMessages::missingElementMultiple,
|
||
|
StringMessageArg(e->name()),
|
||
|
StringVectorMessageArg(missingNames));
|
||
|
pushElementCheck(e, event, netEnabling);
|
||
|
return;
|
||
|
}
|
||
|
if (!sd().omittag()
|
||
|
|| !currentElement().isFinished()
|
||
|
|| tagLevel() == 0
|
||
|
|| !currentElement().type()->definition()->canOmitEndTag())
|
||
|
break;
|
||
|
EndElementEvent *endEvent
|
||
|
= new (eventAllocator()) EndElementEvent(currentElement().type(),
|
||
|
currentDtdPointer(),
|
||
|
event->location(),
|
||
|
0);
|
||
|
eventList.insert(endEvent);
|
||
|
undoList.insert(new (internalAllocator()) UndoEndTag(popSaveElement()));
|
||
|
}
|
||
|
discardKeptMessages();
|
||
|
undo(undoList);
|
||
|
message(ParserMessages::elementNotAllowed, StringMessageArg(e->name()));
|
||
|
// If element couldn't occur because it was excluded, then
|
||
|
// do the transition here.
|
||
|
(void)currentElement().tryTransition(e);
|
||
|
pushElementCheck(e, event, netEnabling);
|
||
|
}
|
||
|
|
||
|
void Parser::findMissingTag(const ElementType *e,
|
||
|
Vector<const ElementType *> &v)
|
||
|
{
|
||
|
if (!currentElement().currentPosition()) {
|
||
|
if (!e)
|
||
|
v.push_back((const ElementType *)0);
|
||
|
return;
|
||
|
}
|
||
|
if (elementIsExcluded(e))
|
||
|
return;
|
||
|
size_t newSize = 0;
|
||
|
currentElement().matchState().possibleTransitions(v);
|
||
|
// FIXME also get currentInclusions
|
||
|
for (size_t i = 0; i < v.size(); i++) {
|
||
|
if (v[i] && !elementIsExcluded(v[i])) {
|
||
|
Boolean success = 0;
|
||
|
switch (v[i]->definition()->declaredContent()) {
|
||
|
case ElementDefinition::modelGroup:
|
||
|
{
|
||
|
const CompiledModelGroup *grp
|
||
|
= v[i]->definition()->compiledModelGroup();
|
||
|
MatchState state(grp);
|
||
|
if (!e) {
|
||
|
if (state.tryTransitionPcdata())
|
||
|
success = 1;
|
||
|
}
|
||
|
else {
|
||
|
if (state.tryTransition(e))
|
||
|
success = 1;
|
||
|
if (!success) {
|
||
|
for (size_t j = 0; j < v[i]->definition()->nInclusions(); j++)
|
||
|
if (v[i]->definition()->inclusion(j) == e) {
|
||
|
success = 1;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
if (success) {
|
||
|
for (size_t j = 0; j < v[i]->definition()->nExclusions(); j++)
|
||
|
if (v[i]->definition()->exclusion(j) == e) {
|
||
|
success = 0;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
break;
|
||
|
#if 0
|
||
|
case ElementDefinition::any:
|
||
|
success = 1;
|
||
|
break;
|
||
|
#endif
|
||
|
case ElementDefinition::cdata:
|
||
|
case ElementDefinition::rcdata:
|
||
|
if (e == 0)
|
||
|
success = 1;
|
||
|
break;
|
||
|
default:
|
||
|
break;
|
||
|
}
|
||
|
if (success)
|
||
|
v[newSize++] = v[i];
|
||
|
}
|
||
|
}
|
||
|
v.resize(newSize);
|
||
|
// Sort them according to the order of their occurrence in the DTD.
|
||
|
// Do an insertion sort.
|
||
|
for (size_t i = 1; i < v.size(); i++) {
|
||
|
const ElementType *tem = v[i];
|
||
|
size_t j;
|
||
|
for (j = i; j > 0 && v[j - 1]->index() > tem->index(); j--)
|
||
|
v[j] = v[j - 1];
|
||
|
v[j] = tem;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#if 0
|
||
|
// This produces messages that are too verbose
|
||
|
// This doesn't try to be very efficient.
|
||
|
// 0 for #pcdata
|
||
|
|
||
|
void Parser::getAllowedElementTypes(Vector<const ElementType *> &v)
|
||
|
{
|
||
|
v.clear();
|
||
|
// FIXME get a list of all inclusions first
|
||
|
// getCurrentInclusions(v);
|
||
|
// x says whether each element of v was excluded
|
||
|
Vector<PackedBoolean> x(v.size(), 0);
|
||
|
unsigned startImpliedCount = 0;
|
||
|
IList<Undo> undoList;
|
||
|
for (;;) {
|
||
|
if (currentElement().currentPosition()) {
|
||
|
// have a model group
|
||
|
size_t i = v.size();
|
||
|
currentElement().matchState().possibleTransitions(v);
|
||
|
x.resize(v.size());
|
||
|
for (size_t j = i; j < v.size(); j++)
|
||
|
x[j] = (v[j] && elementIsExcluded(v[j]));
|
||
|
if (!sd().omittag())
|
||
|
break;
|
||
|
// Try to imply a tag
|
||
|
if (currentElement().isFinished()) {
|
||
|
if (tagLevel() == 0)
|
||
|
break;
|
||
|
if (startImpliedCount)
|
||
|
break;
|
||
|
const ElementDefinition *def = currentElement().type()->definition();
|
||
|
if (def && def->canOmitEndTag())
|
||
|
undoList.insert(new (internalAllocator())
|
||
|
UndoEndTag(popSaveElement()));
|
||
|
else
|
||
|
break;
|
||
|
}
|
||
|
else {
|
||
|
const LeafContentToken *token = currentElement().impliedStartTag();
|
||
|
if (!token)
|
||
|
break;
|
||
|
const ElementType *e = token->elementType();
|
||
|
if (elementIsExcluded(e))
|
||
|
break;
|
||
|
const ElementDefinition *def = e->definition();
|
||
|
if (!def
|
||
|
|| def->undefined()
|
||
|
|| (def->declaredContent() != ElementDefinition::modelGroup
|
||
|
&& def->declaredContent() != ElementDefinition::any)
|
||
|
|| !def->canOmitStartTag())
|
||
|
break;
|
||
|
undoList.insert(new (internalAllocator()) UndoStartTag);
|
||
|
startImpliedCount++;
|
||
|
pushElement(new (internalAllocator()) OpenElement(e,
|
||
|
0,
|
||
|
0,
|
||
|
0,
|
||
|
Location()));
|
||
|
if (checkImplyLoop(startImpliedCount))
|
||
|
break;
|
||
|
for (size_t i = 0; i < def->nInclusions(); i++)
|
||
|
if (!elementIsExcluded(def->inclusion(i))) {
|
||
|
v.push_back(def->inclusion(i));
|
||
|
x.push_back(0);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
// must be allowed #pcdata
|
||
|
v.push_back((const ElementType *)0);
|
||
|
x.push_back((PackedBoolean)0);
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
undo(undoList);
|
||
|
// Remove exclusions and duplicates and undefined
|
||
|
size_t newSize = 0;
|
||
|
for (size_t i = 0; i < v.size(); i++)
|
||
|
if (!x[i] && (!v[i] || !v[i]->definition()->undefined())) {
|
||
|
Boolean dup = 0;
|
||
|
for (size_t j = 0; j < newSize; j++)
|
||
|
if (v[i] == v[j]) {
|
||
|
dup = 1;
|
||
|
break;
|
||
|
}
|
||
|
if (!dup)
|
||
|
v[newSize++] = v[i];
|
||
|
}
|
||
|
v.resize(newSize);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
#ifdef SP_NAMESPACE
|
||
|
}
|
||
|
#endif
|