actual/packages/node-libofx/OpenSP-1.5.2/sx/XmlOutputEventHandler.cxx
2022-04-28 22:44:38 -04:00

1674 lines
42 KiB
C++

// Copyright (c) 1997 James Clark
// See the file COPYING for copying permission.
#ifdef __GNUG__
#pragma implementation
#endif
#include "config.h"
#include "ExtendEntityManager.h"
#include "ParserApp.h"
#include "XmlOutputEventHandler.h"
#include "XmlOutputMessages.h"
#include "Message.h"
#include "MessageArg.h"
#include "macros.h"
#if 0
#include "UTF8CodingSystem.h"
#endif
#include "InternalInputSource.h"
#include "StorageManager.h"
#include "HashTable.h"
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#ifndef _MSC_VER
#include <libgen.h>
#include <sys/param.h>
#include <unistd.h>
#else
#define MAXPATHLEN _MAX_PATH
#include <direct.h>
char* dirname(const char*);
void realpath(const char*,char*);
#endif
#include <sys/stat.h>
#include <sys/types.h>
#include <errno.h>
#ifdef SP_NAMESPACE
namespace SP_NAMESPACE {
#endif
#define EXT_ENT_FILE "extEntities.dtf"
#define INT_ENT_FILE "intEntities.dtf"
inline
void operator+=(StringC &str, const char *s)
{
while (*s)
str += *s++;
}
inline
void XmlOutputEventHandler::closeCdataSection()
{
useCdata_ = 0;
if (inCdata_) {
os() << "]]>";
inCdata_ = 0;
nCdataEndMatched_ = 0;
}
}
const char RE = '\r';
XmlOutputEventHandler::Options::Options()
{
memset(this, 0, sizeof(*this));
}
static
void escape(OutputCharStream &s, Char c)
{
s << "&#" << (unsigned long)c << ";";
}
XmlOutputEventHandler::XmlOutputEventHandler(const Options &options,
OutputCharStream *os,
const StringC &encodingName,
const char *outputDir,
const char *dtdLoc,
const Ptr<ExtendEntityManager> &entityManager,
const CharsetInfo &systemCharset,
CmdLineApp *app)
: options_(options),
os_(os),
entityManager_(entityManager),
systemCharset_(&systemCharset),
app_(app),
outputDir_(outputDir),
dtdLoc_(dtdLoc),
inDtd_(0),
inCdata_(0),
useCdata_(0),
nCdataEndMatched_(0),
namecaseGeneral_(0),
extEntFile_(0),
intEntFile_(0),
extEnts_(0),
intEnts_(0)
{
// Create output stream for main output
os_->setEscaper(escape);
*os_ << "<?xml version=\"1.0\"";
if (encodingName.size())
*os_ << " encoding=\"" << encodingName << '"';
*os_ << "?>" << RE;
// Set directory for output files
if (outputDir_ == NULL || strlen(outputDir_) == 0)
outputDir_ = ".";
// Open file for writing external entity declarations if we are preserving
// any entities
if (! options_.expExt) {
ParserApp::AppChar* filePath=new ParserApp::AppChar[strlen(outputDir_) + 21];
strcpy (filePath, outputDir_);
strcat (filePath, "/");
strcat (filePath, EXT_ENT_FILE);
if (! options_.overwrite) {
uniqueFilename(filePath);
}
// Create directories if necessary
char *dirs = strdup (filePath);
maybeCreateDirectories(dirname(dirs));
extEntFile_ = new FileOutputByteStream;
// Open the file, exiting if we fail to do so.
if (!extEntFile_->open(filePath)) {
app_->message(XmlOutputMessages::cannotOpenOutputFile,
StringMessageArg
(app_->codingSystem()->convertIn(filePath)));
exit(1);
}
extEnts_ =
new EncodeOutputCharStream(extEntFile_, app_->outputCodingSystem());
/* Write some header information to this file. */
*extEnts_ << "<!-- This file automatically generated by osx -->\n";
extEnts_->flush();
delete [] filePath;
}
// Open file for writing external entity declarations if we are preserving
// any entities
if (! options_.expInt) {
char* filePath=new char[strlen(outputDir_) + 21];
strcpy (filePath, outputDir_);
strcat (filePath, "/");
strcat (filePath, INT_ENT_FILE);
if (! options_.overwrite) {
uniqueFilename(filePath);
}
// Create directories if necessary
char *dirs = strdup (filePath);
maybeCreateDirectories(dirname(dirs));
intEntFile_ = new FileOutputByteStream;
// Open the file, exiting if we fail to do so.
if (!intEntFile_->open(filePath)) {
app_->message(XmlOutputMessages::cannotOpenOutputFile,
StringMessageArg
(app_->codingSystem()->convertIn(filePath)));
exit(1);
}
intEnts_ =
new EncodeOutputCharStream(intEntFile_, app_->outputCodingSystem());
/* Write some header information to this file. */
*intEnts_ << "<!-- This file automatically generated by osx -->\n";
intEnts_->flush();
delete [] filePath;
}
/* If we are expanding internal entities, we are definitely not
generating a reference in the internal subset to an internal
entities driver file. Likewise for external entities. */
if (options_.expInt)
options_.intDecl= false;
if (options_.expExt)
options_.extDecl= false;
}
XmlOutputEventHandler::~XmlOutputEventHandler()
{
os() << RE;
delete os_;
if (! options_.expExt)
delete extEntFile_;
if (! options_.expInt) {
delete intEntFile_;
}
}
void XmlOutputEventHandler::message(MessageEvent *event)
{
app_->dispatchMessage(event->message());
ErrorCountEventHandler::message(event);
}
void XmlOutputEventHandler::sgmlDecl(SgmlDeclEvent *event)
{
const Syntax &syntax = event->instanceSyntax();
if (syntax.namecaseGeneral()) {
namecaseGeneral_ = 1;
syntax.generalSubstTable()->inverseTable(lowerSubst_);
}
delete event;
}
void XmlOutputEventHandler::data(DataEvent *event)
{
const Entity *entity = event->entity();
if (! options_.expInt && entity != NULL) {
Boolean firstSeen = checkFirstSeen(entity->name());
// output entity reference
os() << "&" << entity->name() << ";";
// save old output stream
outputStack_.insert(os_);
// output beginning of entity declaration
if (firstSeen) {
// Point default output stream (os_) to the entities
// declaration file.
os_ = intEnts_;
os() << "<!ENTITY " << entity->name() << " CDATA \"";
} // end if firstSeen
else {
os_ = (OutputCharStream *)
new EncodeOutputCharStream(new NullOutputByteStream,
app_->outputCodingSystem());
} // end else (not firstSeen)
} // end if expanding internal entities
// Now, no matter what, output the entity's data
if (useCdata_)
outputCdata(event->data(), event->dataLength());
else
outputData(event->data(), event->dataLength(), 0, 0);
// If necessary, end entity decl and replace old output stream
if (! options_.expInt && entity != NULL) {
os() << "\">" << RE;
os_->flush();
os_ = outputStack_.get();
}
delete event;
}
void XmlOutputEventHandler::startElement(StartElementEvent *event)
{
// This shouldn't happen (although useCdata_ can be true).
if (inCdata_) {
inCdata_ = 0;
nCdataEndMatched_ = 0;
os() << "]]>";
}
if (options_.preserveCase) {
os() << '<' << event->elementType()->origName();
} else {
os() << '<' << generalName(event->name(), nameBuf_);
}
size_t nAttributes = event->attributes().size();
for (size_t i = 0; i < nAttributes; i++)
outputAttribute(event->attributes(), i);
if (options_.nlInTag)
os() << RE;
if (options_.cdata
&& event->elementType()->definition()->declaredContent() == ElementDefinition::cdata)
useCdata_ = 1;
if (options_.empty
&& event->elementType()->definition()->declaredContent() == ElementDefinition::empty)
os() << "/>";
else
os() << '>';
delete event;
}
void XmlOutputEventHandler::outputAttribute(const AttributeList &attributes, size_t i)
{
const AttributeValue *value = attributes.value(i);
if (!value)
return;
const Text *text;
const StringC *string;
AttributeValue::Type type = value->info(text, string);
if (type == AttributeValue::implied)
return;
if (options_.nlInTag)
os() << RE;
else
os() << ' ';
if (options_.preserveCase) {
os() << attributes.def()->def(i)->origName() << "=\"";
} else {
os() << generalName(attributes.name(i), nameBuf_) << "=\"";
}
if (type == AttributeValue::cdata) {
TextIter iter(*text);
TextItem::Type type;
const Char *p;
size_t length;
const Location *loc;
while (iter.next(type, p, length, loc)) {
switch (type) {
case TextItem::data:
case TextItem::cdata:
outputData(p, length, 1, 0);
break;
case TextItem::sdata:
{
app_->setNextLocation(loc->origin()->parent());
const Entity *entity = loc->origin()->asEntityOrigin()->entity();
Boolean superLit = 0;
if (! options_.expInt) {
Boolean firstSeen = checkFirstSeen(entity->name());
// output entity reference
os() << "&" << entity->name() << ";";
// save old output stream
outputStack_.insert(os_);
if (firstSeen) {
// Point default output stream (os_) to the entities
// declaration file and output start of declaration
os_ = intEnts_;
if (options_.sdataAsPi) {
os() << "<!ENTITY " << entity->name()
<< " '<?sdataEntity " << entity->name() << " \"";
superLit = 1;
} else {
os() << "<!ENTITY " << entity->name() << " \"";
}
} else { // we've seen it before; throw away expansion data
os_ = (OutputCharStream *)
new EncodeOutputCharStream(new NullOutputByteStream,
app_->outputCodingSystem());
}
}
// We are expanding internal entities; expand this one as a PI,
// since XML does not have SDATA entities
else {
if (options_.sdataAsPi)
os() << "<?sdataEntity " << entity->name() << " \"";
}
// Now, no matter what, output the entity's data
outputData(p, length, 1, superLit);
// If necessary, end entity decl and replace old output stream
if (! options_.expInt) {
if (options_.sdataAsPi)
os() << "\" ?>'>" << RE;
else
os() << "\"> <!-- originally sdata entity -->\n";
os_->flush();
os_ = outputStack_.get();
} else {
if (options_.sdataAsPi)
os() << "\" ?>";
}
}
break;
case TextItem::nonSgml:
// FIXME
break;
default:
break;
}
}
}
else if (attributes.def()->def(i)->isEntity())
os() << *string;
else
if (options_.preserveCase) {
const Vector<StringC> *tokensPtr =
attributes.def()->def(i)->getOrigTokens();
if (tokensPtr) {
size_t nTokens = tokensPtr->size();
Vector<StringC>::const_iterator tokens = tokensPtr->begin();
for (i = 0; i < nTokens; i++) {
if (equalsIgnoreCase(*string, (StringC &)tokens[i])) {
os() << tokens[i];
}
}
} else {
os() << *string;
}
} else {
os() << generalName(*string, nameBuf_);
}
os() << '"';
}
void XmlOutputEventHandler::endElement(EndElementEvent *event)
{
closeCdataSection();
if (options_.empty
&& event->elementType()->definition()->declaredContent() == ElementDefinition::empty)
;
else {
if (options_.preserveCase) {
os() << "</" << event->elementType()->origName();
} else {
os() << "</" << generalName(event->name(), nameBuf_);
}
#if 0
if (options_.nlInTag)
os() << RE;
#endif
os() << '>';
}
delete event;
}
static Boolean isXmlS(Char c)
{
switch (c) {
case ' ':
case '\r':
case '\n':
case '\t':
return 1;
}
return 0;
}
static Boolean containsQuestionLt(const Char *s, size_t n)
{
for (; n > 1; n--, s++)
if (*s == '?' && s[1] == '>')
return 1;
return 0;
}
// These do not handle Unicode chars properly.
// They treat all Unicode chars >= 170 as name start characters
static Boolean isXmlNameStartChar(Char c)
{
switch (c) {
case ':':
case '.':
case '_':
case '-':
return 1;
}
if (c >= 170)
return 1;
if ('a' <= c && c <= 'z')
return 1;
if ('A' <= c && c <= 'Z')
return 1;
return 0;
}
static Boolean isXmlNameChar(Char c)
{
if (isXmlNameStartChar(c))
return 1;
if ('0' <= c && c <= '9')
return 1;
return 0;
}
static Boolean startsWithXmlName(const Char *s, size_t n)
{
if (n == 0)
return 0;
if (!isXmlNameStartChar(*s))
return 0;
for (s++, n--; n > 0; n--, s++) {
if (isXmlS(*s))
return 1;
if (!isXmlNameChar(*s))
return 0;
}
return 1;
}
void XmlOutputEventHandler::pi(PiEvent *event)
{
const Char *s = event->data();
size_t n = event->dataLength();
if (n >= 3 && s[0] == 'x' && s[1] == 'm' && s[2] == 'l'
&& (n == 3 || isXmlS(s[3])))
; // Probably came from an encoding PI.
else if (!startsWithXmlName(s, n)) {
app_->setNextLocation(event->location());
app_->message(XmlOutputMessages::piNoName);
}
else if (options_.piEscape) {
os() << "<?";
outputData(s, n, 0, 0);
os() << "?>";
}
else if (containsQuestionLt(s, n)) {
app_->setNextLocation(event->location());
app_->message(XmlOutputMessages::piQuestionLt);
}
else {
os() << "<?";
os().write(s, n);
os() << "?>";
}
delete event;
}
void XmlOutputEventHandler::sdataEntity(SdataEntityEvent *event)
{
const Entity *entity = event->entity();
app_->setNextLocation(event->location().origin()->parent());
Boolean superLit = 0;
if (! options_.expInt) {
Boolean firstSeen = checkFirstSeen(entity->name());
// output entity reference
os() << "&" << entity->name() << ";";
// save old output stream
outputStack_.insert(os_);
if (firstSeen) {
// Point default output stream (os_) to the entities
// declaration file and output start of declaration
os_ = intEnts_;
if (options_.sdataAsPi) {
os() << "<!ENTITY " << entity->name()
<< " '<?sdataEntity " << entity->name() << " ";
superLit = 1;
} else {
os() << "<!ENTITY " << entity->name() << " \"";
}
} else { // we've seen it before; throw away expansion data
os_ = (OutputCharStream *)
new EncodeOutputCharStream(new NullOutputByteStream,
app_->outputCodingSystem());
}
}
// We are expanding internal entities; expand this one as a PI,
// since XML does not have SDATA entities
else {
if (options_.sdataAsPi)
os() << "<?sdataEntity " << entity->name() << " ";
}
// Now, no matter what, output the entity's data
outputData(event->data(), event->dataLength(), 0, superLit);
// If necessary, end entity decl and replace old output stream
if (! options_.expInt) {
if (options_.sdataAsPi)
os() << " ?>'>" << RE;
else
os() << "\"> <!-- originally sdata entity -->\n";
os_->flush();
os_ = outputStack_.get();
} else {
if (options_.sdataAsPi)
os() << " ?>";
}
delete event;
}
/** External data entities may be referenced in attributes only. If
one is referenced in content, error and exit. */
void XmlOutputEventHandler::externalDataEntity(ExternalDataEntityEvent *event)
{
app_->message(XmlOutputMessages::externalDataEntityReference,
StringMessageArg(event->entity()->name()));
exit (1);
}
void XmlOutputEventHandler::subdocEntity(SubdocEntityEvent *event)
{
const SubdocEntity *entity = event->entity();
app_->setNextLocation(event->location().origin()->parent());
if (options_.expExt) {
SgmlParser::Params params;
params.subdocInheritActiveLinkTypes = 1;
params.subdocReferenced = 1;
params.origin = event->entityOrigin()->copy();
params.parent = & ((ParserApp *)app_)->parser();
params.sysid = entity->externalId().effectiveSystemId();
params.entityType = SgmlParser::Params::subdoc;
SgmlParser parser(params);
parser.parseAll(*this);
} else {
os() << "&" << entity->name() << ";";
}
delete event;
}
void XmlOutputEventHandler::startDtd(StartDtdEvent *event)
{
inDtd_ = 1;
delete event;
}
void XmlOutputEventHandler::endDtd(EndDtdEvent *event)
{
inDtd_ = 0;
delete event;
}
void XmlOutputEventHandler::maybeStartDoctype(Boolean &doctypeStarted, const Dtd &dtd)
{
if (doctypeStarted)
return;
doctypeStarted = 1;
const StringC &name = dtd.documentElementType()->name();
StringC buf;
// if appropriate, lowercase the doctype name -jphekman
if (options_.lower) {
for (size_t i = 0; i < name.size(); i++) {
Char c = lowerSubst_[name[i]];
if (c != name[i]) {
buf = name;
buf[i] = c;
for (i++; i < name.size(); i++)
lowerSubst_.subst(buf[i]);
}
}
} else {
buf = name;
}
/* Output the doctype declaration. If requested, specify a local
file containing the DTD. */
os() << "<!DOCTYPE " << buf;
if (dtdLoc_ != NULL) {
os() << " SYSTEM \"" << dtdLoc_ << "\"";
}
os() << " [" << RE;
/* If requested, include pointers in the instance's internal subset
to driver files which define internal/external entities. */
if (options_.extDecl) {
os() << "<!ENTITY % external-entities SYSTEM \""
<< app_->codingSystem()->convertIn(outputDir_) << "/"
<< EXT_ENT_FILE << "\">"
<< RE << "%external-entities;" << RE;
}
if (options_.intDecl) {
os() << "<!ENTITY % internal-entities SYSTEM \""
<< app_->codingSystem()->convertIn(outputDir_) << "/"
<< INT_ENT_FILE << "\">"
<< RE << "%internal-entities;" << RE;
}
}
void XmlOutputEventHandler::endProlog(EndPrologEvent *event)
{
const Dtd &dtd = event->dtd();
Boolean doctypeStarted = 0;
if (options_.extDecl || options_.intDecl) {
maybeStartDoctype(doctypeStarted, dtd);
}
if (options_.notation) {
Dtd::ConstNotationIter iter(dtd.notationIter());
for (;;) {
const Notation *notation = iter.nextTemp();
if (!notation)
break;
maybeStartDoctype(doctypeStarted, dtd);
os() << "<!NOTATION " << generalName(notation->name(), nameBuf_);
outputExternalId(*notation);
os() << ">" << RE;
if (notation->attributeDefTemp()) {
app_->setNextLocation(notation->defLocation());
app_->message(XmlOutputMessages::notationAttributes,
StringMessageArg(notation->name()));
}
}
}
if (options_.ndata) {
Dtd::ConstEntityIter iter(dtd.generalEntityIter());
for (;;) {
const Entity *entity = iter.nextTemp();
if (!entity)
break;
const ExternalDataEntity *extDataEntity = entity->asExternalDataEntity();
if (extDataEntity) {
maybeStartDoctype(doctypeStarted, dtd);
os() << "<!ENTITY " << entity->name();
outputExternalId(*entity);
if (extDataEntity->dataType() != EntityDecl::ndata) {
app_->setNextLocation(entity->defLocation());
app_->message(XmlOutputMessages::externalDataNdata,
StringMessageArg(entity->name()));
}
os() << " NDATA " << generalName(extDataEntity->notation()->name(), nameBuf_) << ">" << RE;
}
}
}
if (options_.id || options_.attlist) {
Dtd::ConstElementTypeIter iter(dtd.elementTypeIter());
for (;;) {
const ElementType *elementType = iter.next();
if (!elementType)
break;
const AttributeDefinitionList *adl = elementType->attributeDefTemp();
if (adl) {
if (options_.attlist) {
maybeStartDoctype(doctypeStarted, dtd);
if (options_.preserveCase) {
os() << "<!ATTLIST " << elementType->origName();
} else {
os() << "<!ATTLIST " <<
generalName(elementType->name(), nameBuf_);
}
for (size_t i = 0; i < adl->size(); i++) {
const AttributeDefinition *def = adl->def(i);
if (options_.preserveCase) {
os() << RE << def->origName();
} else {
os() << RE << generalName(def->name(), nameBuf_);
}
AttributeDefinitionDesc desc;
def->getDesc(desc);
switch (desc.declaredValue) {
case AttributeDefinitionDesc::cdata:
os() << " CDATA #IMPLIED";
break;
case AttributeDefinitionDesc::name:
case AttributeDefinitionDesc::number:
case AttributeDefinitionDesc::nmtoken:
case AttributeDefinitionDesc::nutoken:
os() << " NMTOKEN #IMPLIED";
break;
case AttributeDefinitionDesc::entity:
os() << " ENTITY #IMPLIED";
break;
case AttributeDefinitionDesc::idref:
os() << " IDREF #IMPLIED";
break;
case AttributeDefinitionDesc::names:
case AttributeDefinitionDesc::numbers:
case AttributeDefinitionDesc::nmtokens:
case AttributeDefinitionDesc::nutokens:
os() << " NMTOKENS #IMPLIED";
break;
case AttributeDefinitionDesc::entities:
os() << " ENTITIES #IMPLIED";
break;
case AttributeDefinitionDesc::idrefs:
os() << " IDREFS #IMPLIED";
break;
case AttributeDefinitionDesc::id:
os() << " ID #IMPLIED";
break;
case AttributeDefinitionDesc::notation:
os() << " NOTATION";
// fall through
case AttributeDefinitionDesc::nameTokenGroup:
{
os() << " (";
if (options_.preserveCase) {
for (size_t j = 0;
j < desc.origAllowedValues.size(); j++) {
if (j > 0)
os() << '|';
os() << desc.origAllowedValues[j];
}
os() << ") #IMPLIED";
} else {
for (size_t j = 0; j < desc.allowedValues.size(); j++) {
if (j > 0)
os() << '|';
os() << desc.allowedValues[j];
}
os() << ") #IMPLIED";
}
}
break;
default:
CANNOT_HAPPEN();
}
}
os() << '>' << RE;
}
else {
size_t idIndex = adl->idIndex();
if (idIndex != size_t(-1)) {
maybeStartDoctype(doctypeStarted, dtd);
os() << "<!ATTLIST " << generalName(elementType->name(), nameBuf_);
os() << ' ' << generalName(adl->def(idIndex)->name(), nameBuf_)
<< " ID #IMPLIED>" << RE;
}
}
}
}
}
if (doctypeStarted)
os() << "]>" << RE;
delete event;
}
void XmlOutputEventHandler::outputExternalId(const EntityDecl &decl)
{
const StringC *pubIdP = decl.publicIdPointer();
const StringC *sysIdP = decl.effectiveSystemIdPointer();
if (pubIdP) {
os() << " PUBLIC \"" << *pubIdP << "\"";
if (decl.declType() == EntityDecl::notation && !sysIdP)
return;
os() << " \"";
}
else
os() << " SYSTEM \"";
if (sysIdP) {
StringC url;
switch(fsiToUrl(*sysIdP, decl.defLocation(), url)) {
case 1:
os() << url;
break;
case 0:
break;
default:
app_->setNextLocation(decl.defLocation());
app_->message(XmlOutputMessages::cannotConvertFsiToUrl,
StringMessageArg(*sysIdP));
break;
}
}
os() << "\"";
}
// Return 1 if OK; return -1 to generate generic error; return 0 if error already generated.
int XmlOutputEventHandler::fsiToUrl(const StringC &fsi, const Location &loc, StringC &url)
{
ParsedSystemId parsedBuf;
if (!entityManager_->parseSystemId(fsi, *systemCharset_, 0, 0, *app_, parsedBuf))
return 0;
if (parsedBuf.size() != 1)
return -1;
if (strcmp(parsedBuf[0].storageManager->type(), "URL") == 0) {
url = parsedBuf[0].specId;
return 1;
}
Owner<InputSource> in(entityManager_->open(fsi,
*systemCharset_,
InputSourceOrigin::make(),
0,
*app_));
if (!in)
return 0;
Xchar c = in->get(*app_);
StorageObjectLocation soLoc;
if (c == InputSource::eE && in->accessError()) {
if (parsedBuf[0].baseId.size())
return 0;
soLoc.storageObjectSpec = &parsedBuf[0];
soLoc.actualStorageId = parsedBuf[0].specId;
}
else {
const Location &loc = in->currentLocation();
if (loc.origin().isNull())
return -1;
const InputSourceOrigin *tem = loc.origin()->asInputSourceOrigin();
if (!tem)
return -1;
const ParsedSystemId *psi
= ExtendEntityManager::externalInfoParsedSystemId(tem->externalInfo());
if (!psi || psi->size() != 1)
return -1;
if (!ExtendEntityManager::externalize(tem->externalInfo(), 0, soLoc))
return -1;
}
if (strcmp(soLoc.storageObjectSpec->storageManager->type(), "OSFILE") != 0)
return -1;
return filenameToUrl(soLoc.actualStorageId, loc, url);
}
static
StringC hexEncode(unsigned char c)
{
static const char hexDigits[] = "0123456789ABCDEF";
StringC result;
result.resize(2);
result[0] = hexDigits[c >> 4];
result[1] = hexDigits[c & 0xF];
return result;
}
static
StringC urlEncode(Char c)
{
if (c >= 0x80) {
StringC tem;
tem += c;
#if 0
// This is what the URN spec says.
UTF8CodingSystem utf8;
String<char> bytes(utf8.convertOut(tem));
tem.resize(0);
for (size_t i = 0; i < bytes.size(); i++)
tem += hexEncode(bytes[i]);
#endif
return tem;
}
if (strchr("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$-_.+!*'(),",
char(c))) {
StringC tem;
tem += c;
return tem;
}
return hexEncode((unsigned char)c);
}
int XmlOutputEventHandler::filenameToUrl(const StringC &filename,
const Location &loc,
StringC &url)
{
#ifdef SP_MSDOS_FILENAMES
if (filename.size() >= 3 && filename[1] == ':'
&& (('a' <= filename[0] && filename[0] <= 'z')
|| ('A' <= filename[0] && filename[0] <= 'Z'))
&& (filename[2] == '/' || filename[2] == '\\')) {
url += "file://";
url += filename;
return 1;
}
#endif
for (size_t i = 0; i < filename.size(); i++) {
if (filename[i] == '/'
#ifdef SP_MSDOS_FILENAMES
|| filename[i] == '\\'
#endif
) {
url += '/';
}
else
url += urlEncode(filename[i]);
}
return 1;
}
void XmlOutputEventHandler::commentDecl(CommentDeclEvent *event)
{
if (!inDtd_ && options_.comment) {
Boolean started = 0;
for (MarkupIter iter(event->markup()); iter.valid(); iter.advance()) {
if (iter.type() == Markup::comment) {
if (!started) {
os() << "<!--";
started = 1;
}
os().write(iter.charsPointer(), iter.charsLength());
}
}
if (started)
os() << "-->";
}
delete event;
}
void XmlOutputEventHandler::markedSectionStart(MarkedSectionStartEvent *event)
{
if (options_.cdata && event->status() == MarkedSectionEvent::cdata)
useCdata_ = 1;
delete event;
}
void XmlOutputEventHandler::markedSectionEnd(MarkedSectionEndEvent *event)
{
if (event->status() == MarkedSectionEvent::cdata)
closeCdataSection();
delete event;
}
void XmlOutputEventHandler::outputCdata(const Char *s, size_t n)
{
static const char cdataEnd[] = "]]>";
for (; n > 0; s++, n--) {
Char c = *s;
if (!inCdata_) {
os() << "<![CDATA[";
inCdata_ = 1;
}
if (c == cdataEnd[nCdataEndMatched_]) {
if (nCdataEndMatched_ == 2) {
os() << "]]><![CDATA[";
nCdataEndMatched_ = 0;
}
else
nCdataEndMatched_++;
}
else
nCdataEndMatched_ = 0;
os().put(c);
}
}
void XmlOutputEventHandler::outputData(const Char *s, size_t n, Boolean inLit,
Boolean inSuperLit)
{
for (; n > 0; s++, n--) {
Char c = *s;
switch (c) {
case '&':
os() << "&amp;";
break;
case '<':
os() << "&lt;";
break;
case '>':
os() << "&gt;";
break;
case 9:
case 10:
case 13:
if (inLit)
os() << "&#" << int(c) << ';';
else
os().put(c);
break;
case '"':
if (inLit) {
os() << "&quot;";
break;
}
case '\'':
if (inSuperLit) {
os() << "&apos;";
break;
}
// fall through
default:
os().put(c);
break;
}
}
}
void XmlOutputEventHandler::entityDefaulted(EntityDefaultedEvent *event)
{
if (options_.reportEnts) {
ConstPtr<Entity> entity = event->entityPointer();
const ExternalEntity *extEntity = entity->asExternalEntity();
// If we are dealing with an external entity (else it will be null)
// -jphekman
if (extEntity != 0) {
const StringC *systemIdPointer = extEntity->systemIdPointer();
if (systemIdPointer != 0) {
os () << "<?entityResolved " << *systemIdPointer << " ?>";
}
}
}
}
void XmlOutputEventHandler::inputOpened(InputSource *in)
{
if (!inDtd_) {
const CodingSystem *outputCodingSystem = app_->outputCodingSystem();
const EntityDecl *entDecl = in->currentLocation().origin()->entityDecl();
if (entDecl == NULL ) {
if (options_.reportIS) {
os() << "<?inputOpened effectiveSystemID=\"NULL\" "
<< "systemID=\"NULL\" publicID=\"NULL\" ?>";
}
return;
}
const Entity *ent = in->currentLocation().origin()->entity();
const StringC *effectiveSystemIdPointer =
entDecl->effectiveSystemIdPointer();
const StringC *systemIdPointer = entDecl->systemIdPointer();
const StringC *publicIdPointer = entDecl->publicIdPointer();
if (options_.reportIS && ent->asExternalEntity() != NULL) {
os() << "<?inputOpened effectiveSystemID=\"";
if (effectiveSystemIdPointer == 0)
os() << "NULL";
else
os () << *effectiveSystemIdPointer;
os() << "\" systemID=\"";
if (systemIdPointer == 0)
os() << "NULL";
else
os () << *systemIdPointer;
os() << "\" publicID=\"";
if (publicIdPointer == 0)
os() << "NULL";
else
os () << *publicIdPointer;
os() << "\" ?>";
}
/* Output entity declaration and, in the case of external
entities, write file containing entity replacement text. */
Boolean firstSeen = checkFirstSeen(ent->name());
if (ent->asExternalEntity() != NULL) {
if (! options_.expExt) {
// output entity reference
os() << "&" << entDecl->name() << ";";
// output entity declaration
if (systemIdPointer == 0) {
app_->message(XmlOutputMessages::missingSystemId,
StringMessageArg(entDecl->name()));
exit (1);
}
// save old output stream
outputStack_.insert(os_);
if (firstSeen) {
char *originalFilePath =
strdup(outputCodingSystem->convertOut(*systemIdPointer).data());
/* Construct new output path, prepending the output
directory: so, for example, /usr/local/lib/ents/foo
becomes ./usr/local/lib/ents/foo.xml; possible 2-digit
suffix (for uniqueness) */
char* filePath=new char[strlen(outputDir_) + 9 +
outputCodingSystem->convertOut
(*systemIdPointer).size()];
strcpy (filePath, outputDir_);
/* Drop the scheme if there is one (so
"http://www.bar.com/foo" becomes
"output_dir/www.bar.com/foo") */
char *scheme = strchr(originalFilePath, ':');
if (scheme != NULL) {
// drop initial colon
originalFilePath = scheme + 1;
} else {
// check for leading slashes in non-scheme filenames
// and warn that we're going to remove them
if (strncmp(originalFilePath, "/", 1) == 0) {
app_->message(XmlOutputMessages::removingLeadingSlashes,
StringMessageArg(*systemIdPointer));
}
}
// drop initial slashes (there may be multiple)
while (strncmp(originalFilePath, "/", 1) == 0) {
originalFilePath++;
}
// replace exactly one initial slash before prepending
// output directory
strcat (filePath, "/");
strcat(filePath, originalFilePath);
// Check to make sure we haven't passed outside of the
// output directory
char *dirs = strdup (filePath);
char realDirs[MAXPATHLEN];
char realOutputDir[MAXPATHLEN];
char *outputDir = strdup(outputDir_);
realpath((const char *)dirname(dirs), realDirs);
realpath((const char *)dirname(outputDir), realOutputDir);
if (strncmp(realDirs, realOutputDir, strlen (realOutputDir)) != 0) {
app_->message(XmlOutputMessages::pathOutsideOutputDirectory,
StringMessageArg(*systemIdPointer));
if (! options_.writeOutsideOutDir) {
exit (1);
}
}
// Make the necessary directories
maybeCreateDirectories(dirname(dirs));
// Set the suffix to ".xml"
convertSuffix(filePath);
*extEnts_ << "<!ENTITY " << entDecl->name() << " SYSTEM \""
<< filePath << "\">\n";
extEnts_->flush();
if (! options_.overwrite) {
uniqueFilename(filePath);
}
// Open the file, exiting if we fail to do so.
FileOutputByteStream *file = new FileOutputByteStream;
outputFileStack_.insert(file);
StringC filePathStrC = app_->codingSystem()->convertIn(filePath);
// If we've never seen this exact input filename before,
// then we're being asked to overwrite an old generated file
// with a new one (for example, "/text" and "text" will both
// map to the top level of the output directory). In that
// case, produce an error.
for (size_t i = 0; i < filesCreated_.size(); i++) {
if (filesCreated_[i] == filePathStrC) {
bool seenOrigPath = false;
for (size_t j = 0; j < originalFilePaths_.size(); j++) {
if (originalFilePaths_[j] == *systemIdPointer) {
seenOrigPath = true;
}
}
if (! seenOrigPath) {
app_->message(XmlOutputMessages::alreadyWroteOutputFile,
StringMessageArg(filePathStrC));
exit (1);
}
}
}
// Save so we can ensure not overwriting later.
filesCreated_.push_back(filePathStrC);
originalFilePaths_.push_back(*systemIdPointer);
if (!file->open(filePath)) {
app_->message(XmlOutputMessages::cannotOpenOutputFile,
StringMessageArg(filePathStrC));
exit (1);
}
// Create output stream to file and set os_ to it.
os_ = (OutputCharStream *)
new EncodeOutputCharStream(file, outputCodingSystem);
delete [] filePath;
} // end if firstSeen
else {
// push null os onto file output stack, set os_ to it
NullOutputByteStream *nobs = new NullOutputByteStream;
outputFileStack_.insert(nobs);
// Create output stream to file and set os_ to it.
os_ = (OutputCharStream *)
new EncodeOutputCharStream(nobs, outputCodingSystem);
} // end else (notfirst Seen)
} // end if not expanding external entities
} // end if asExternalEntity()
else if (ent->asInternalEntity() != NULL) {
char *entName =
strdup(outputCodingSystem->convertOut(entDecl->name()).data());
if (! options_.expInt) {
// output entity reference
os() << "&" << entDecl->name() << ";";
// save old output stream and point output stream at null
outputStack_.insert(os_);
NullOutputByteStream *nobs = new NullOutputByteStream;
os_ = (OutputCharStream *)
new EncodeOutputCharStream(nobs, outputCodingSystem);
if (firstSeen) {
// don't define predefined entities
if (strcmp (entName, "lt") != 0
&& strcmp (entName, "gt") != 0
&& strcmp (entName, "amp") != 0
&& strcmp (entName, "apos") != 0
&& strcmp (entName, "quot") != 0) {
// output declaration to internal entities driver file
InternalInputSource *intIn = in->asInternalInputSource();
*intEnts_ << "<!ENTITY " << entDecl->name() << " "
<< getQuoteMark (intIn->contents())
<< (*intIn->contents())
<< getQuoteMark (intIn->contents())
<< " >\n";
intEnts_->flush();
}
} // end if firstSeen
} // end if expanding internal entities
} // end if this is an internal entity
else {
// We should only get InternalText and ExternalText entities here.
app_->message(XmlOutputMessages::unexpectedEntityType,
StringMessageArg (ent->name()));
exit(1);
}
}
}
void XmlOutputEventHandler::inputClosed(InputSource *in)
{
if (! inDtd_) {
const EntityDecl *entDecl = in->currentLocation().origin()->entityDecl();
const Entity *ent = in->currentLocation().origin()->entity();
if (entDecl == NULL || ent == NULL ) {
if ( options_.reportIS) {
os() << "<?inputOpened effectiveSystemID=\"NULL\" "
<< "systemID=\"NULL\" publicID=\"NULL\" ?>";
}
return;
}
// Close external entity
if (ent->asExternalEntity() != NULL) {
if (! options_.expExt) {
// delete current output stream
os_->flush();
delete os_;
// restore previous output stream
os_ = outputStack_.get();
// close file
OutputByteStream *file = outputFileStack_.get();
delete file;
}
}
// Close internal entity
else if (ent->asInternalEntity() != NULL) {
if (! options_.expInt) {
os_->flush();
delete os_;
// restore previous output stream
os_ = outputStack_.get();
}
}
else {
// We should only get InternalText and ExternalText entities here.
app_->message(XmlOutputMessages::unexpectedEntityType,
StringMessageArg( ent->name() ));
exit(1);
}
if (options_.reportIS && ent->asExternalEntity() != NULL) {
os() << "<?inputClosed ?>";
}
}
}
const StringC &XmlOutputEventHandler::generalName(const StringC &name,
StringC &buf)
{
if (options_.lower && namecaseGeneral_) {
for (size_t i = 0; i < name.size(); i++) {
Char c = lowerSubst_[name[i]];
if (c != name[i]) {
buf = name;
buf[i] = c;
for (i++; i < name.size(); i++)
lowerSubst_.subst(buf[i]);
return buf;
}
}
}
return name;
}
Boolean XmlOutputEventHandler::equalsIgnoreCase(const StringC &str1,
StringC &str2)
{
if (str1.size() != str2.size())
return false;
for (size_t i = 0; i < str1.size(); i++) {
if (lowerSubst_[str1[i]] != lowerSubst_[str2[i]])
return false;
}
return true;
}
/** Make this string's suffix ".xml", attempting to do the right thing
if we encounter ".sgm", ".sgml", or ".xml" as the original
suffix. */
char *XmlOutputEventHandler::convertSuffix(char *name)
{
// Get a pointer to the last occurrence of ".":
char *suffix = strrchr(name, '.');
// If there is no "." in "name", append ".xml" and return
if (suffix == NULL) {
strcat(name, ".xml");
return name;
}
// Suffix is "sgm[l]": subsitite ".xml"
if (strcmp(suffix, ".sgm") == 0 || strcmp (suffix, ".sgml") == 0) {
strcpy (suffix, ".xml");
return name;
}
// "xml": do nothing
if (strcmp(suffix, ".xml") == 0) {
return name;
}
// default: append ".xml"
strcat (name, ".xml");
return name;
}
/** Create all the directories mentioned in this path, unless they
already exist. */
int XmlOutputEventHandler::maybeCreateDirectories (char *path)
{
int ret;
char *pp = strrchr(path, '/');
#ifdef SP_MSDOS_FILENAMES
char *ppp = strrchr(path,'\\');
if (ppp && (ppp > pp || pp==NULL)) pp=ppp;
#endif
if ((pp) && (pp != path)) {
*pp = '\0';
maybeCreateDirectories(path);
*pp = '/';
}
#ifndef _MSC_VER
struct stat statbuf;
pp = strrchr(path, '/');
statbuf.st_mode = 0777;
/* stat the directory */
if ((pp) && (pp != path)) {
*pp = '\0';
stat(path, &statbuf);
*pp = '/';
}
ret = mkdir(path, statbuf.st_mode);
#else
ret=_mkdir(path);
#endif
if (ret == -1) {
if (errno == EEXIST) {
ret = 0;
} else {
app_->message(XmlOutputMessages::cannotCreateDirectory,
StringMessageArg(app_->codingSystem()->convertIn(path)));
}
}
return(ret);
}
/** If this is the first time we have encountered entity "name",
return true and add it to a list of entities we've seen. Else,
return false. */
Boolean XmlOutputEventHandler::checkFirstSeen(const StringC &name)
{
Named *id = entTable_.lookup(name);
if (!id) {
entTable_.insert(new Named(name));
return true;
}
return false;
}
/** Set the newFilename to a filename which, while similar or
identical to originalFilename, does not correspond to an existing
file. Gives an error if called more than 99 times on the same
filename (during the same or different executions of osx). Assumes
that the char array it's given is long enough to accept a two-digit
suffix in addition to the string that's already in there.
*/
void XmlOutputEventHandler::uniqueFilename(char *filePath) {
char* baseFilePath=new char[strlen(filePath)];
strcpy (baseFilePath, filePath);
struct stat statbuf;
int num = 0;
char numStr[3];
while (stat(filePath, &statbuf) == 0 && num <= 100) {
num++;
strcpy (filePath, baseFilePath);
strcat (filePath, ".");
sprintf(numStr, "%d", num);
strcat (filePath, numStr);
}
if (num >= 100) {
app_->message(XmlOutputMessages::tooManyOutputFiles,
StringMessageArg
(app_->codingSystem()->convertIn(baseFilePath)));
exit(1);
}
delete [] baseFilePath;
}
char XmlOutputEventHandler::getQuoteMark(const StringC *contents)
{
char *contentsStr =
strdup(app_->outputCodingSystem()->convertOut(*contents).data());
if (strchr(contentsStr, '"') == NULL) {
return '"';
} else {
return '\'';
}
}
/**
* NullOutputByteStream
*/
NullOutputByteStream::NullOutputByteStream()
{
}
NullOutputByteStream::~NullOutputByteStream()
{
}
void NullOutputByteStream::flush()
{
}
void NullOutputByteStream::sputc(char c)
{
}
void NullOutputByteStream::sputn(const char *, size_t)
{
}
OutputByteStream &NullOutputByteStream::operator<<(char)
{
return *this;
}
OutputByteStream &NullOutputByteStream::operator<<(unsigned char)
{
return *this;
}
OutputByteStream &NullOutputByteStream::operator<<(const char *)
{
return *this;
}
OutputByteStream &NullOutputByteStream::operator<<(int)
{
return *this;
}
OutputByteStream &NullOutputByteStream::operator<<(unsigned)
{
return *this;
}
OutputByteStream &NullOutputByteStream::operator<<(long)
{
return *this;
}
OutputByteStream &NullOutputByteStream::operator<<(unsigned long)
{
return *this;
}
OutputByteStream &NullOutputByteStream::operator<<(const String<char> &)
{
return *this;
}
/* Note: Returning NULL is probably not the best solution here, but as
* nothing actually uses getBufferPtr(), it is hard to see what sort
* of no-op behavior would actually be appropriate. */
char *NullOutputByteStream::getBufferPtr() const
{
return NULL;
}
size_t NullOutputByteStream::getBufferSize() const
{
return 0;
}
void NullOutputByteStream::usedBuffer(size_t)
{
}
void NullOutputByteStream::flushBuf(char)
{
}
#ifdef SP_NAMESPACE
}
#endif
#ifdef _MSC_VER
char* dirname(const char* path)
{
static char buffer[MAXPATHLEN];
char* p=strrchr((char*)path,'/');
#ifdef SP_MSDOS_FILENAMES
char *ppp = strrchr((char*)path,'\\');
if (ppp && (ppp > p || p==NULL)) p=ppp;
#endif
if (p==NULL) return NULL;
strncpy(buffer,path,p-path);
buffer[p-path]=0;
return buffer;
}
void realpath(const char* in,char* out)
{
_fullpath(out,in,MAXPATHLEN);
return;
}
#endif