actual/packages/node-libofx/OpenSP-1.5.2/nsgmls/SgmlsEventHandler.cxx
2022-04-28 22:44:38 -04:00

739 lines
19 KiB
C++

// Copyright (c) 1994 James Clark
// See the file COPYING for copying permission.
#include "config.h"
#include "SgmlsEventHandler.h"
#include "SgmlParser.h"
#include "ParserOptions.h"
#include "Entity.h"
#include "Notation.h"
#include "Attribute.h"
#include "ExtendEntityManager.h"
#include "StorageManager.h"
#include "macros.h"
#include "sptchar.h"
#include "nsgmls.h"
#ifdef SP_NAMESPACE
namespace SP_NAMESPACE {
#endif
const char dataCode = '-';
const char piCode = '?';
const char conformingCode = 'C';
const char appinfoCode = '#';
const char startElementCode = '(';
const char endElementCode = ')';
const char referenceEntityCode = '&';
const char attributeCode = 'A';
const char dataAttributeCode = 'D';
const char linkAttributeCode = 'a';
const char defineNotationCode = 'N';
const char defineExternalEntityCode = 'E';
const char defineInternalEntityCode = 'I';
const char defineSubdocEntityCode = 'S';
const char defineExternalTextEntityCode = 'T';
const char pubidCode = 'p';
const char sysidCode = 's';
const char startSubdocCode = '{';
const char endSubdocCode = '}';
const char fileCode = 'f';
const char locationCode = 'L';
const char includedElementCode = 'i';
const char emptyElementCode = 'e';
const char commentCode = '_';
const char omissionCode = 'o';
const char featuresCode = 'V';
const char featuresSubCodeOptions = 'o';
const char featuresSubCodeVersion = 'v';
const char featuresSubCodePackage = 'p';
const char featuresSubCodePossibleCodes = 'c';
const OutputCharStream::Newline nl = OutputCharStream::newline;
const char space = ' ';
const char re = '\r';
const Char reChar = re;
const char escapePrefix = '\\';
const Char escapePrefixChar = escapePrefix;
const char sdataDelim = '|';
const char nonSgmlEscape = '%';
const char newlineEscape = 'n';
const char numEscape = '#';
const char escapeEnd = ';';
inline
void SgmlsEventHandler::startData()
{
if (!haveData_) {
os() << dataCode;
haveData_ = 1;
}
}
inline
void SgmlsEventHandler::flushData()
{
if (haveData_) {
os() << nl;
haveData_ = 0;
}
}
inline
void SgmlsEventHandler::outputLocation(const Location &loc)
{
if (outputLine_)
outputLocation1(loc);
}
SgmlsEventHandler::SgmlsEventHandler(const SgmlParser *parser,
OutputCharStream *os,
Messenger *messenger,
unsigned outputFlags)
: SgmlsSubdocState(parser), os_(os), messenger_(messenger),
outputLine_((outputFlags & outputLine) != 0),
outputEntity_((outputFlags & outputEntity) != 0),
outputId_((outputFlags & outputId) != 0),
outputNotationSysid_((outputFlags & outputNotationSysid) != 0),
outputIncluded_((outputFlags & outputIncluded) != 0),
outputNonSgml_((outputFlags & outputNonSgml) != 0),
outputEmpty_((outputFlags & outputEmpty) != 0),
outputDataAtt_((outputFlags & outputDataAtt) != 0),
outputComment_((outputFlags & outputComment) != 0),
outputTagOmission_((outputFlags & outputTagOmission) != 0),
outputAttributeOmission_((outputFlags & outputAttributeOmission) != 0),
outputParserInformation_((outputFlags & outputParserInformation) != 0),
haveData_(0), lastSos_(0), inDocument_(0)
{
os_->setEscaper(escaper);
if (outputParserInformation_)
features(outputFlags);
}
SgmlsEventHandler::~SgmlsEventHandler()
{
flushData();
if (errorCount() == 0)
os() << conformingCode << nl;
delete os_;
}
void SgmlsEventHandler::message(MessageEvent *event)
{
messenger_->dispatchMessage(event->message());
ErrorCountEventHandler::message(event);
}
void SgmlsEventHandler::features(unsigned outputFlags)
{
os() << featuresCode << featuresSubCodePackage << space
<< SP_PACKAGE
<< nl;
os() << featuresCode << featuresSubCodeVersion << space
<< SP_VERSION
<< nl;
os() << featuresCode << featuresSubCodePossibleCodes << space;
os() << dataCode;
os() << piCode;
os() << conformingCode;
os() << appinfoCode;
os() << startElementCode;
os() << endElementCode;
os() << referenceEntityCode;
os() << attributeCode;
os() << dataAttributeCode;
os() << linkAttributeCode;
os() << defineNotationCode;
os() << defineExternalEntityCode;
os() << defineInternalEntityCode;
os() << defineSubdocEntityCode;
os() << defineExternalTextEntityCode;
os() << pubidCode;
os() << sysidCode;
os() << startSubdocCode;
os() << endSubdocCode;
os() << fileCode;
os() << locationCode;
os() << includedElementCode;
os() << emptyElementCode;
os() << commentCode;
os() << omissionCode;
os() << featuresCode;
os() << nl;
os() << featuresCode << featuresSubCodeOptions
<< space << SP_T("esis");
for (size_t i = 1; NsgmlsApp::outputOptions[i].flag != 0; i++) {
if (NsgmlsApp::outputOptions[i].flag == 0)
break;
if (0 != (outputFlags & NsgmlsApp::outputOptions[i].flag)) {
os() << space << NsgmlsApp::outputOptions[i].name;
}
}
os() << nl;
}
void SgmlsEventHandler::appinfo(AppinfoEvent *event)
{
const StringC *str;
if (event->literal(str)) {
outputLocation(event->location());
flushData();
os() << appinfoCode;
outputString(*str);
os() << nl;
}
delete event;
}
void SgmlsEventHandler::endProlog(EndPrologEvent *event)
{
if (outputEntity_) {
flushData();
const Dtd &dtd = event->dtd();
Dtd::ConstEntityIter iter(dtd.generalEntityIter());
for (;;) {
const Entity *entity = iter.next().pointer();
if (!entity)
break;
defineEntity(entity);
}
}
if (outputComment_)
inDocument_ = true;
if (!event->lpdPointer().isNull()) {
linkProcess_.init(event->lpdPointer());
haveLinkProcess_ = 1;
flushData();
}
for (size_t i = 0; i < event->simpleLinkNames().size(); i++) {
flushData();
attributes(event->simpleLinkAttributes()[i],
linkAttributeCode,
&event->simpleLinkNames()[i]);
}
delete event;
}
void SgmlsEventHandler::entityDefaulted(EntityDefaultedEvent *event)
{
if (outputEntity_) {
flushData();
defineEntity(event->entityPointer().pointer());
}
delete event;
}
void SgmlsEventHandler::uselink(UselinkEvent *event)
{
linkProcess_.uselink(event->linkSet(),
event->restore(),
event->lpd().pointer());
delete event;
}
void SgmlsEventHandler::sgmlDecl(SgmlDeclEvent *event)
{
sd_ = event->sdPointer();
syntax_ = event->instanceSyntaxPointer(); // FIXME which syntax?
delete event;
}
void SgmlsEventHandler::data(DataEvent *event)
{
outputLocation(event->location());
startData();
outputString(event->data(), event->dataLength());
delete event;
}
void SgmlsEventHandler::sdataEntity(SdataEntityEvent *event)
{
outputLocation(event->location());
startData();
os() << escapePrefix << sdataDelim;
outputString(event->data(), event->dataLength());
os() << escapePrefix << sdataDelim;
delete event;
}
void SgmlsEventHandler::pi(PiEvent *event)
{
outputLocation(event->location());
flushData();
os() << piCode;
outputString(event->data(), event->dataLength());
os() << nl;
delete event;
}
void SgmlsEventHandler::commentDecl(CommentDeclEvent *event)
{
if (inDocument_) { //only receive this event if outputComment_ true
outputLocation(event->location());
flushData();
MarkupIter iter(event->markup());
for (; iter.valid(); iter.advance()) {
if (iter.type() == Markup::comment) {
os() << commentCode;
outputString(iter.charsPointer(), iter.charsLength());
os() << nl;
}
}
}
delete event;
}
void SgmlsEventHandler::nonSgmlChar(NonSgmlCharEvent *event)
{
if (outputNonSgml_) {
outputLocation(event->location());
startData();
os() << escapePrefix << nonSgmlEscape << (unsigned long)event->character() << escapeEnd;
}
delete event;
}
void SgmlsEventHandler::startElement(StartElementEvent *event)
{
flushData();
currentLocation_ = event->location();
if (haveLinkProcess_) {
const AttributeList *linkAttributes;
const ResultElementSpec *resultElementSpec;
linkProcess_.startElement(event->elementType(),
event->attributes(),
event->location(),
*this, // Messenger &
linkAttributes,
resultElementSpec);
if (linkAttributes)
attributes(*linkAttributes, linkAttributeCode, &linkProcess_.name());
}
attributes(event->attributes(), attributeCode, 0);
currentLocation_.clear();
if (outputTagOmission_ && !event->markupPtr())
os() << omissionCode << nl;
if (outputIncluded_ && event->included())
os() << includedElementCode << nl;
if (outputEmpty_ && event->mustOmitEnd())
os() << emptyElementCode << nl;
outputLocation(event->location());
os() << startElementCode << event->name() << nl;
delete event;
}
void SgmlsEventHandler::attributes(const AttributeList &attributes,
char code,
const StringC *ownerName)
{
size_t nAttributes = attributes.size();
for (size_t i = 0; i < nAttributes; i++) {
const Text *text;
const StringC *string;
const AttributeValue *value = attributes.value(i);
if (value) {
if (outputAttributeOmission_) {
if (! attributes.specified(i)) {
os() << omissionCode << nl;
}
}
switch (value->info(text, string)) {
case AttributeValue::implied:
startAttribute(attributes.name(i), code, ownerName);
os() << "IMPLIED" << nl;
break;
case AttributeValue::tokenized:
{
const char *typeString = "TOKEN";
const AttributeSemantics *semantics = attributes.semantics(i);
if (semantics) {
ConstPtr<Notation> notation
= semantics->notation();
if (!notation.isNull()) {
defineNotation(notation.pointer());
typeString = "NOTATION";
}
else {
size_t nEntities = semantics->nEntities();
if (nEntities) {
typeString = "ENTITY";
if (!outputEntity_)
for (size_t i = 0; i < nEntities; i++) {
const Entity *entity = semantics->entity(i).pointer();
if (!markEntity(entity))
defineEntity(entity);
}
}
}
}
if (outputId_ && attributes.id(i))
typeString = "ID";
startAttribute(attributes.name(i), code, ownerName);
os() << typeString << space << *string << nl;
}
break;
case AttributeValue::cdata:
{
startAttribute(attributes.name(i), code, ownerName);
CdataAttributeValue *cdataValue = (CdataAttributeValue *)value;
if (outputDataAtt_ && cdataValue->notation())
os() << "DATA " << cdataValue->notation()->name() << " ";
else
os() << "CDATA ";
TextIter iter(*text);
TextItem::Type type;
const Char *p;
size_t length;
const Location *loc;
while (iter.next(type, p, length, loc))
switch (type) {
case TextItem::data:
case TextItem::cdata:
outputString(p, length);
break;
case TextItem::sdata:
os() << escapePrefix << sdataDelim;
outputString(p, length);
os() << escapePrefix << sdataDelim;
break;
case TextItem::nonSgml:
if (outputNonSgml_)
os() << escapePrefix << nonSgmlEscape << (unsigned long)*p << escapeEnd;
break;
default:
break;
}
os() << nl;
if (outputDataAtt_ && cdataValue->notation()) {
defineNotation(cdataValue->notation());
DataAttributeValue *dataValue = (DataAttributeValue *)cdataValue;
SgmlsEventHandler::attributes(dataValue->attributes(),
dataAttributeCode,
&attributes.name(i));
}
}
break;
}
}
}
}
void SgmlsEventHandler::startAttribute(const StringC &name,
char code,
const StringC *ownerName)
{
os() << code;
if (ownerName)
os() << *ownerName << space;
os() << name << space;
}
void SgmlsEventHandler::endElement(EndElementEvent *event)
{
flushData();
if (haveLinkProcess_)
linkProcess_.endElement();
outputLocation(event->location());
if (outputTagOmission_ && !event->markupPtr())
os() << omissionCode << nl;
os() << endElementCode << event->name() << nl;
delete event;
}
void SgmlsEventHandler::externalDataEntity(ExternalDataEntityEvent *event)
{
currentLocation_ = event->location();
outputLocation(event->location());
flushData();
if (!outputEntity_ && !markEntity(event->entity()))
defineExternalDataEntity(event->entity());
currentLocation_.clear();
os() << referenceEntityCode << event->entity()->name() << nl;
delete event;
}
void SgmlsEventHandler::subdocEntity(SubdocEntityEvent *event)
{
currentLocation_ = event->location();
outputLocation(event->location());
flushData();
const SubdocEntity *entity = event->entity();
if (!outputEntity_ && !markEntity(entity))
defineSubdocEntity(entity);
currentLocation_.clear();
os() << startSubdocCode << entity->name() << nl;
SgmlParser::Params params;
params.subdocInheritActiveLinkTypes = 1;
params.subdocReferenced = 1;
params.origin = event->entityOrigin()->copy();
params.parent = parser_;
params.sysid = entity->externalId().effectiveSystemId();
params.entityType = SgmlParser::Params::subdoc;
SgmlParser parser(params);
SgmlsSubdocState oldState;
SgmlsSubdocState::swap(oldState);
SgmlsSubdocState::init(&parser);
parser.parseAll(*this);
oldState.swap(*this);
os() << endSubdocCode << entity->name() << nl;
delete event;
}
void SgmlsEventHandler::defineEntity(const Entity *entity)
{
const InternalEntity *internalEntity = entity->asInternalEntity();
if (internalEntity)
defineInternalEntity(internalEntity);
else {
switch (entity->dataType()) {
case Entity::cdata:
case Entity::sdata:
case Entity::ndata:
defineExternalDataEntity(entity->asExternalDataEntity());
break;
case Entity::subdoc:
defineSubdocEntity(entity->asSubdocEntity());
break;
case Entity::sgmlText:
defineExternalTextEntity(entity->asExternalEntity());
break;
default:
CANNOT_HAPPEN();
}
}
}
void SgmlsEventHandler::defineExternalDataEntity(const ExternalDataEntity *entity)
{
const Notation *notation = entity->notation();
defineNotation(notation);
externalId(entity->externalId());
const char *typeString;
switch (entity->dataType()) {
case Entity::cdata:
typeString = "CDATA";
break;
case Entity::sdata:
typeString = "SDATA";
break;
case Entity::ndata:
typeString = "NDATA";
break;
default:
CANNOT_HAPPEN();
}
os() << defineExternalEntityCode << entity->name()
<< space << typeString
<< space << notation->name()
<< nl;
attributes(entity->attributes(), dataAttributeCode, &entity->name());
}
void SgmlsEventHandler::defineSubdocEntity(const SubdocEntity *entity)
{
externalId(entity->externalId());
os() << defineSubdocEntityCode << entity->name() << nl;
}
void SgmlsEventHandler::defineExternalTextEntity(const ExternalEntity *entity)
{
externalId(entity->externalId());
os() << defineExternalTextEntityCode << entity->name() << nl;
}
void SgmlsEventHandler::defineInternalEntity(const InternalEntity *entity)
{
os() << defineInternalEntityCode << entity->name() << space;
const char *s;
switch (entity->dataType()) {
case Entity::sdata:
s = "SDATA";
break;
case Entity::cdata:
s = "CDATA";
break;
case Entity::sgmlText:
s = "TEXT";
break;
case Entity::pi:
s = "PI";
break;
default:
CANNOT_HAPPEN();
}
os() << s << space;
outputString(entity->string());
os() << nl;
}
void SgmlsEventHandler::defineNotation(const Notation *notation)
{
if (markNotation(notation))
return;
externalId(notation->externalId(), outputNotationSysid_);
os() << defineNotationCode << notation->name() << nl;
}
void SgmlsEventHandler::externalId(const ExternalId &id, Boolean outputFile)
{
const StringC *str = id.publicIdString();
if (str) {
os() << pubidCode;
outputString(*str);
os() << nl;
}
str = id.systemIdString();
if (str) {
os() << sysidCode;
outputString(*str);
os() << nl;
}
if (outputFile && id.effectiveSystemId().size()) {
os() << fileCode;
outputString(id.effectiveSystemId());
os() << nl;
}
}
Boolean SgmlsEventHandler::markEntity(const Entity *entity)
{
return definedEntities_.add(entity->name());
}
Boolean SgmlsEventHandler::markNotation(const Notation *notation)
{
return definedNotations_.add(notation->name());
}
void SgmlsEventHandler::outputString(const Char *p, size_t n)
{
for (; n > 0; p++, n--) {
switch (*p) {
case escapePrefixChar:
os() << escapePrefix << escapePrefix;
break;
case reChar:
os() << escapePrefix << newlineEscape;
if (outputLine_ && haveData_)
lastLineno_++;
break;
default:
// FIXME not clear what to do here given possibility of wide characters
unsigned long c = *p;
if (c < 040) {
static const char digits[] = "0123456789";
os() << escapePrefix << '0' << digits[(c / 8) % 8] << digits[c % 8];
}
else
os().put(*p);
break;
}
}
}
void SgmlsEventHandler::escaper(OutputCharStream &s, Char c)
{
s << escapePrefix << numEscape << (unsigned long)c << escapeEnd;
}
void SgmlsEventHandler::outputLocation1(const Location &loc)
{
const Origin *origin = loc.origin().pointer();
const InputSourceOrigin *inputSourceOrigin;
const ExternalInfo *info;
Index index = loc.index();
for (;;) {
if (!origin)
return;
inputSourceOrigin = origin->asInputSourceOrigin();
if (inputSourceOrigin) {
info = inputSourceOrigin->externalInfo();
if (info)
break;
}
const Location &loc = origin->parent();
index = loc.index();
origin = loc.origin().pointer();
}
Offset off = inputSourceOrigin->startOffset(index);
StorageObjectLocation soLoc;
if (!ExtendEntityManager::externalize(info, off, soLoc))
return;
if (soLoc.lineNumber == (unsigned long)-1)
return;
if (soLoc.storageObjectSpec == lastSos_) {
if (soLoc.lineNumber == lastLineno_)
return;
flushData();
os() << locationCode << soLoc.lineNumber << nl;
lastLineno_ = soLoc.lineNumber;
}
else {
flushData();
os() << locationCode << soLoc.lineNumber << space;
outputString(soLoc.actualStorageId);
os() << nl;
lastLineno_ = soLoc.lineNumber;
lastSos_ = soLoc.storageObjectSpec;
lastLoc_ = loc; // make sure lastSos_ doesn't get freed
}
}
void SgmlsEventHandler::dispatchMessage(const Message &msg)
{
if (!cancelled()) {
noteMessage(msg);
messenger_->dispatchMessage(msg);
}
}
void SgmlsEventHandler::initMessage(Message &msg)
{
msg.loc = currentLocation_;
}
SgmlsSubdocState::SgmlsSubdocState()
: haveLinkProcess_(0), parser_(0)
{
}
SgmlsSubdocState::SgmlsSubdocState(const SgmlParser *parser)
: haveLinkProcess_(0), parser_(parser)
{
}
void SgmlsSubdocState::init(const SgmlParser *parser)
{
parser_ = parser;
definedNotations_.clear();
definedEntities_.clear();
haveLinkProcess_ = 0;
linkProcess_.clear();
}
void SgmlsSubdocState::swap(SgmlsSubdocState &to)
{
{
const SgmlParser *tem = to.parser_;
to.parser_ = parser_;
parser_ = tem;
}
{
Boolean tem = to.haveLinkProcess_;
to.haveLinkProcess_ = haveLinkProcess_;
haveLinkProcess_ = tem;
}
linkProcess_.swap(to.linkProcess_);
definedNotations_.swap(to.definedNotations_);
definedEntities_.swap(to.definedEntities_);
}
#ifdef SP_NAMESPACE
}
#endif