1675 lines
42 KiB
C++
1675 lines
42 KiB
C++
|
// Copyright (c) 1997 James Clark
|
||
|
// See the file COPYING for copying permission.
|
||
|
|
||
|
#ifdef __GNUG__
|
||
|
#pragma implementation
|
||
|
#endif
|
||
|
|
||
|
#include "config.h"
|
||
|
#include "ExtendEntityManager.h"
|
||
|
#include "ParserApp.h"
|
||
|
#include "XmlOutputEventHandler.h"
|
||
|
#include "XmlOutputMessages.h"
|
||
|
#include "Message.h"
|
||
|
#include "MessageArg.h"
|
||
|
#include "macros.h"
|
||
|
#if 0
|
||
|
#include "UTF8CodingSystem.h"
|
||
|
#endif
|
||
|
#include "InternalInputSource.h"
|
||
|
#include "StorageManager.h"
|
||
|
#include "HashTable.h"
|
||
|
#include <string.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <stdio.h>
|
||
|
#ifndef _MSC_VER
|
||
|
#include <libgen.h>
|
||
|
#include <sys/param.h>
|
||
|
#include <unistd.h>
|
||
|
#else
|
||
|
#define MAXPATHLEN _MAX_PATH
|
||
|
#include <direct.h>
|
||
|
char* dirname(const char*);
|
||
|
void realpath(const char*,char*);
|
||
|
#endif
|
||
|
#include <sys/stat.h>
|
||
|
#include <sys/types.h>
|
||
|
#include <errno.h>
|
||
|
|
||
|
|
||
|
#ifdef SP_NAMESPACE
|
||
|
namespace SP_NAMESPACE {
|
||
|
#endif
|
||
|
|
||
|
#define EXT_ENT_FILE "extEntities.dtf"
|
||
|
#define INT_ENT_FILE "intEntities.dtf"
|
||
|
|
||
|
inline
|
||
|
void operator+=(StringC &str, const char *s)
|
||
|
{
|
||
|
while (*s)
|
||
|
str += *s++;
|
||
|
}
|
||
|
|
||
|
inline
|
||
|
void XmlOutputEventHandler::closeCdataSection()
|
||
|
{
|
||
|
useCdata_ = 0;
|
||
|
if (inCdata_) {
|
||
|
os() << "]]>";
|
||
|
inCdata_ = 0;
|
||
|
nCdataEndMatched_ = 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
const char RE = '\r';
|
||
|
|
||
|
XmlOutputEventHandler::Options::Options()
|
||
|
{
|
||
|
memset(this, 0, sizeof(*this));
|
||
|
}
|
||
|
|
||
|
static
|
||
|
void escape(OutputCharStream &s, Char c)
|
||
|
{
|
||
|
s << "&#" << (unsigned long)c << ";";
|
||
|
}
|
||
|
|
||
|
XmlOutputEventHandler::XmlOutputEventHandler(const Options &options,
|
||
|
OutputCharStream *os,
|
||
|
const StringC &encodingName,
|
||
|
const char *outputDir,
|
||
|
const char *dtdLoc,
|
||
|
const Ptr<ExtendEntityManager> &entityManager,
|
||
|
const CharsetInfo &systemCharset,
|
||
|
CmdLineApp *app)
|
||
|
: options_(options),
|
||
|
os_(os),
|
||
|
entityManager_(entityManager),
|
||
|
systemCharset_(&systemCharset),
|
||
|
app_(app),
|
||
|
outputDir_(outputDir),
|
||
|
dtdLoc_(dtdLoc),
|
||
|
inDtd_(0),
|
||
|
inCdata_(0),
|
||
|
useCdata_(0),
|
||
|
nCdataEndMatched_(0),
|
||
|
namecaseGeneral_(0),
|
||
|
extEntFile_(0),
|
||
|
intEntFile_(0),
|
||
|
extEnts_(0),
|
||
|
intEnts_(0)
|
||
|
{
|
||
|
// Create output stream for main output
|
||
|
os_->setEscaper(escape);
|
||
|
*os_ << "<?xml version=\"1.0\"";
|
||
|
if (encodingName.size())
|
||
|
*os_ << " encoding=\"" << encodingName << '"';
|
||
|
*os_ << "?>" << RE;
|
||
|
|
||
|
// Set directory for output files
|
||
|
if (outputDir_ == NULL || strlen(outputDir_) == 0)
|
||
|
outputDir_ = ".";
|
||
|
|
||
|
// Open file for writing external entity declarations if we are preserving
|
||
|
// any entities
|
||
|
if (! options_.expExt) {
|
||
|
ParserApp::AppChar* filePath=new ParserApp::AppChar[strlen(outputDir_) + 21];
|
||
|
strcpy (filePath, outputDir_);
|
||
|
strcat (filePath, "/");
|
||
|
strcat (filePath, EXT_ENT_FILE);
|
||
|
|
||
|
if (! options_.overwrite) {
|
||
|
uniqueFilename(filePath);
|
||
|
}
|
||
|
|
||
|
// Create directories if necessary
|
||
|
char *dirs = strdup (filePath);
|
||
|
maybeCreateDirectories(dirname(dirs));
|
||
|
|
||
|
extEntFile_ = new FileOutputByteStream;
|
||
|
|
||
|
// Open the file, exiting if we fail to do so.
|
||
|
if (!extEntFile_->open(filePath)) {
|
||
|
app_->message(XmlOutputMessages::cannotOpenOutputFile,
|
||
|
StringMessageArg
|
||
|
(app_->codingSystem()->convertIn(filePath)));
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
extEnts_ =
|
||
|
new EncodeOutputCharStream(extEntFile_, app_->outputCodingSystem());
|
||
|
|
||
|
/* Write some header information to this file. */
|
||
|
*extEnts_ << "<!-- This file automatically generated by osx -->\n";
|
||
|
extEnts_->flush();
|
||
|
delete [] filePath;
|
||
|
}
|
||
|
|
||
|
// Open file for writing external entity declarations if we are preserving
|
||
|
// any entities
|
||
|
if (! options_.expInt) {
|
||
|
char* filePath=new char[strlen(outputDir_) + 21];
|
||
|
strcpy (filePath, outputDir_);
|
||
|
strcat (filePath, "/");
|
||
|
strcat (filePath, INT_ENT_FILE);
|
||
|
|
||
|
if (! options_.overwrite) {
|
||
|
uniqueFilename(filePath);
|
||
|
}
|
||
|
|
||
|
// Create directories if necessary
|
||
|
char *dirs = strdup (filePath);
|
||
|
maybeCreateDirectories(dirname(dirs));
|
||
|
|
||
|
intEntFile_ = new FileOutputByteStream;
|
||
|
|
||
|
// Open the file, exiting if we fail to do so.
|
||
|
if (!intEntFile_->open(filePath)) {
|
||
|
app_->message(XmlOutputMessages::cannotOpenOutputFile,
|
||
|
StringMessageArg
|
||
|
(app_->codingSystem()->convertIn(filePath)));
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
intEnts_ =
|
||
|
new EncodeOutputCharStream(intEntFile_, app_->outputCodingSystem());
|
||
|
|
||
|
/* Write some header information to this file. */
|
||
|
*intEnts_ << "<!-- This file automatically generated by osx -->\n";
|
||
|
intEnts_->flush();
|
||
|
delete [] filePath;
|
||
|
}
|
||
|
|
||
|
/* If we are expanding internal entities, we are definitely not
|
||
|
generating a reference in the internal subset to an internal
|
||
|
entities driver file. Likewise for external entities. */
|
||
|
if (options_.expInt)
|
||
|
options_.intDecl= false;
|
||
|
|
||
|
if (options_.expExt)
|
||
|
options_.extDecl= false;
|
||
|
}
|
||
|
|
||
|
XmlOutputEventHandler::~XmlOutputEventHandler()
|
||
|
{
|
||
|
os() << RE;
|
||
|
delete os_;
|
||
|
|
||
|
if (! options_.expExt)
|
||
|
delete extEntFile_;
|
||
|
|
||
|
if (! options_.expInt) {
|
||
|
delete intEntFile_;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void XmlOutputEventHandler::message(MessageEvent *event)
|
||
|
{
|
||
|
app_->dispatchMessage(event->message());
|
||
|
ErrorCountEventHandler::message(event);
|
||
|
}
|
||
|
|
||
|
void XmlOutputEventHandler::sgmlDecl(SgmlDeclEvent *event)
|
||
|
{
|
||
|
const Syntax &syntax = event->instanceSyntax();
|
||
|
if (syntax.namecaseGeneral()) {
|
||
|
namecaseGeneral_ = 1;
|
||
|
syntax.generalSubstTable()->inverseTable(lowerSubst_);
|
||
|
}
|
||
|
delete event;
|
||
|
}
|
||
|
|
||
|
void XmlOutputEventHandler::data(DataEvent *event)
|
||
|
{
|
||
|
const Entity *entity = event->entity();
|
||
|
|
||
|
if (! options_.expInt && entity != NULL) {
|
||
|
Boolean firstSeen = checkFirstSeen(entity->name());
|
||
|
|
||
|
// output entity reference
|
||
|
os() << "&" << entity->name() << ";";
|
||
|
|
||
|
// save old output stream
|
||
|
outputStack_.insert(os_);
|
||
|
|
||
|
// output beginning of entity declaration
|
||
|
if (firstSeen) {
|
||
|
// Point default output stream (os_) to the entities
|
||
|
// declaration file.
|
||
|
os_ = intEnts_;
|
||
|
|
||
|
os() << "<!ENTITY " << entity->name() << " CDATA \"";
|
||
|
} // end if firstSeen
|
||
|
|
||
|
else {
|
||
|
os_ = (OutputCharStream *)
|
||
|
new EncodeOutputCharStream(new NullOutputByteStream,
|
||
|
app_->outputCodingSystem());
|
||
|
} // end else (not firstSeen)
|
||
|
} // end if expanding internal entities
|
||
|
|
||
|
// Now, no matter what, output the entity's data
|
||
|
if (useCdata_)
|
||
|
outputCdata(event->data(), event->dataLength());
|
||
|
else
|
||
|
outputData(event->data(), event->dataLength(), 0, 0);
|
||
|
|
||
|
// If necessary, end entity decl and replace old output stream
|
||
|
if (! options_.expInt && entity != NULL) {
|
||
|
os() << "\">" << RE;
|
||
|
os_->flush();
|
||
|
os_ = outputStack_.get();
|
||
|
}
|
||
|
|
||
|
delete event;
|
||
|
}
|
||
|
|
||
|
void XmlOutputEventHandler::startElement(StartElementEvent *event)
|
||
|
{
|
||
|
// This shouldn't happen (although useCdata_ can be true).
|
||
|
if (inCdata_) {
|
||
|
inCdata_ = 0;
|
||
|
nCdataEndMatched_ = 0;
|
||
|
os() << "]]>";
|
||
|
}
|
||
|
|
||
|
if (options_.preserveCase) {
|
||
|
os() << '<' << event->elementType()->origName();
|
||
|
} else {
|
||
|
os() << '<' << generalName(event->name(), nameBuf_);
|
||
|
}
|
||
|
|
||
|
size_t nAttributes = event->attributes().size();
|
||
|
for (size_t i = 0; i < nAttributes; i++)
|
||
|
outputAttribute(event->attributes(), i);
|
||
|
if (options_.nlInTag)
|
||
|
os() << RE;
|
||
|
if (options_.cdata
|
||
|
&& event->elementType()->definition()->declaredContent() == ElementDefinition::cdata)
|
||
|
useCdata_ = 1;
|
||
|
if (options_.empty
|
||
|
&& event->elementType()->definition()->declaredContent() == ElementDefinition::empty)
|
||
|
os() << "/>";
|
||
|
else
|
||
|
os() << '>';
|
||
|
delete event;
|
||
|
}
|
||
|
|
||
|
void XmlOutputEventHandler::outputAttribute(const AttributeList &attributes, size_t i)
|
||
|
{
|
||
|
const AttributeValue *value = attributes.value(i);
|
||
|
if (!value)
|
||
|
return;
|
||
|
const Text *text;
|
||
|
const StringC *string;
|
||
|
AttributeValue::Type type = value->info(text, string);
|
||
|
if (type == AttributeValue::implied)
|
||
|
return;
|
||
|
if (options_.nlInTag)
|
||
|
os() << RE;
|
||
|
else
|
||
|
os() << ' ';
|
||
|
|
||
|
if (options_.preserveCase) {
|
||
|
os() << attributes.def()->def(i)->origName() << "=\"";
|
||
|
} else {
|
||
|
os() << generalName(attributes.name(i), nameBuf_) << "=\"";
|
||
|
}
|
||
|
if (type == AttributeValue::cdata) {
|
||
|
TextIter iter(*text);
|
||
|
TextItem::Type type;
|
||
|
const Char *p;
|
||
|
size_t length;
|
||
|
const Location *loc;
|
||
|
while (iter.next(type, p, length, loc)) {
|
||
|
switch (type) {
|
||
|
case TextItem::data:
|
||
|
case TextItem::cdata:
|
||
|
outputData(p, length, 1, 0);
|
||
|
break;
|
||
|
case TextItem::sdata:
|
||
|
{
|
||
|
app_->setNextLocation(loc->origin()->parent());
|
||
|
const Entity *entity = loc->origin()->asEntityOrigin()->entity();
|
||
|
Boolean superLit = 0;
|
||
|
|
||
|
if (! options_.expInt) {
|
||
|
Boolean firstSeen = checkFirstSeen(entity->name());
|
||
|
|
||
|
// output entity reference
|
||
|
os() << "&" << entity->name() << ";";
|
||
|
|
||
|
// save old output stream
|
||
|
outputStack_.insert(os_);
|
||
|
|
||
|
if (firstSeen) {
|
||
|
// Point default output stream (os_) to the entities
|
||
|
// declaration file and output start of declaration
|
||
|
os_ = intEnts_;
|
||
|
|
||
|
if (options_.sdataAsPi) {
|
||
|
os() << "<!ENTITY " << entity->name()
|
||
|
<< " '<?sdataEntity " << entity->name() << " \"";
|
||
|
superLit = 1;
|
||
|
} else {
|
||
|
os() << "<!ENTITY " << entity->name() << " \"";
|
||
|
}
|
||
|
} else { // we've seen it before; throw away expansion data
|
||
|
os_ = (OutputCharStream *)
|
||
|
new EncodeOutputCharStream(new NullOutputByteStream,
|
||
|
app_->outputCodingSystem());
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// We are expanding internal entities; expand this one as a PI,
|
||
|
// since XML does not have SDATA entities
|
||
|
else {
|
||
|
if (options_.sdataAsPi)
|
||
|
os() << "<?sdataEntity " << entity->name() << " \"";
|
||
|
}
|
||
|
|
||
|
// Now, no matter what, output the entity's data
|
||
|
outputData(p, length, 1, superLit);
|
||
|
|
||
|
// If necessary, end entity decl and replace old output stream
|
||
|
if (! options_.expInt) {
|
||
|
if (options_.sdataAsPi)
|
||
|
os() << "\" ?>'>" << RE;
|
||
|
else
|
||
|
os() << "\"> <!-- originally sdata entity -->\n";
|
||
|
os_->flush();
|
||
|
os_ = outputStack_.get();
|
||
|
} else {
|
||
|
if (options_.sdataAsPi)
|
||
|
os() << "\" ?>";
|
||
|
}
|
||
|
}
|
||
|
break;
|
||
|
case TextItem::nonSgml:
|
||
|
// FIXME
|
||
|
break;
|
||
|
default:
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else if (attributes.def()->def(i)->isEntity())
|
||
|
os() << *string;
|
||
|
else
|
||
|
if (options_.preserveCase) {
|
||
|
const Vector<StringC> *tokensPtr =
|
||
|
attributes.def()->def(i)->getOrigTokens();
|
||
|
if (tokensPtr) {
|
||
|
size_t nTokens = tokensPtr->size();
|
||
|
Vector<StringC>::const_iterator tokens = tokensPtr->begin();
|
||
|
for (i = 0; i < nTokens; i++) {
|
||
|
if (equalsIgnoreCase(*string, (StringC &)tokens[i])) {
|
||
|
os() << tokens[i];
|
||
|
}
|
||
|
}
|
||
|
} else {
|
||
|
os() << *string;
|
||
|
}
|
||
|
|
||
|
} else {
|
||
|
os() << generalName(*string, nameBuf_);
|
||
|
}
|
||
|
os() << '"';
|
||
|
}
|
||
|
|
||
|
void XmlOutputEventHandler::endElement(EndElementEvent *event)
|
||
|
{
|
||
|
closeCdataSection();
|
||
|
if (options_.empty
|
||
|
&& event->elementType()->definition()->declaredContent() == ElementDefinition::empty)
|
||
|
;
|
||
|
else {
|
||
|
if (options_.preserveCase) {
|
||
|
os() << "</" << event->elementType()->origName();
|
||
|
} else {
|
||
|
os() << "</" << generalName(event->name(), nameBuf_);
|
||
|
}
|
||
|
#if 0
|
||
|
if (options_.nlInTag)
|
||
|
os() << RE;
|
||
|
#endif
|
||
|
os() << '>';
|
||
|
}
|
||
|
delete event;
|
||
|
}
|
||
|
|
||
|
static Boolean isXmlS(Char c)
|
||
|
{
|
||
|
switch (c) {
|
||
|
case ' ':
|
||
|
case '\r':
|
||
|
case '\n':
|
||
|
case '\t':
|
||
|
return 1;
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static Boolean containsQuestionLt(const Char *s, size_t n)
|
||
|
{
|
||
|
for (; n > 1; n--, s++)
|
||
|
if (*s == '?' && s[1] == '>')
|
||
|
return 1;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
// These do not handle Unicode chars properly.
|
||
|
// They treat all Unicode chars >= 170 as name start characters
|
||
|
|
||
|
static Boolean isXmlNameStartChar(Char c)
|
||
|
{
|
||
|
switch (c) {
|
||
|
case ':':
|
||
|
case '.':
|
||
|
case '_':
|
||
|
case '-':
|
||
|
return 1;
|
||
|
}
|
||
|
if (c >= 170)
|
||
|
return 1;
|
||
|
if ('a' <= c && c <= 'z')
|
||
|
return 1;
|
||
|
if ('A' <= c && c <= 'Z')
|
||
|
return 1;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static Boolean isXmlNameChar(Char c)
|
||
|
{
|
||
|
if (isXmlNameStartChar(c))
|
||
|
return 1;
|
||
|
if ('0' <= c && c <= '9')
|
||
|
return 1;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static Boolean startsWithXmlName(const Char *s, size_t n)
|
||
|
{
|
||
|
if (n == 0)
|
||
|
return 0;
|
||
|
if (!isXmlNameStartChar(*s))
|
||
|
return 0;
|
||
|
for (s++, n--; n > 0; n--, s++) {
|
||
|
if (isXmlS(*s))
|
||
|
return 1;
|
||
|
if (!isXmlNameChar(*s))
|
||
|
return 0;
|
||
|
}
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
void XmlOutputEventHandler::pi(PiEvent *event)
|
||
|
{
|
||
|
const Char *s = event->data();
|
||
|
size_t n = event->dataLength();
|
||
|
|
||
|
if (n >= 3 && s[0] == 'x' && s[1] == 'm' && s[2] == 'l'
|
||
|
&& (n == 3 || isXmlS(s[3])))
|
||
|
; // Probably came from an encoding PI.
|
||
|
else if (!startsWithXmlName(s, n)) {
|
||
|
app_->setNextLocation(event->location());
|
||
|
app_->message(XmlOutputMessages::piNoName);
|
||
|
}
|
||
|
else if (options_.piEscape) {
|
||
|
os() << "<?";
|
||
|
outputData(s, n, 0, 0);
|
||
|
os() << "?>";
|
||
|
}
|
||
|
else if (containsQuestionLt(s, n)) {
|
||
|
app_->setNextLocation(event->location());
|
||
|
app_->message(XmlOutputMessages::piQuestionLt);
|
||
|
}
|
||
|
else {
|
||
|
os() << "<?";
|
||
|
os().write(s, n);
|
||
|
os() << "?>";
|
||
|
}
|
||
|
delete event;
|
||
|
}
|
||
|
|
||
|
void XmlOutputEventHandler::sdataEntity(SdataEntityEvent *event)
|
||
|
{
|
||
|
const Entity *entity = event->entity();
|
||
|
app_->setNextLocation(event->location().origin()->parent());
|
||
|
Boolean superLit = 0;
|
||
|
|
||
|
if (! options_.expInt) {
|
||
|
Boolean firstSeen = checkFirstSeen(entity->name());
|
||
|
|
||
|
// output entity reference
|
||
|
os() << "&" << entity->name() << ";";
|
||
|
|
||
|
// save old output stream
|
||
|
outputStack_.insert(os_);
|
||
|
|
||
|
if (firstSeen) {
|
||
|
// Point default output stream (os_) to the entities
|
||
|
// declaration file and output start of declaration
|
||
|
os_ = intEnts_;
|
||
|
|
||
|
if (options_.sdataAsPi) {
|
||
|
os() << "<!ENTITY " << entity->name()
|
||
|
<< " '<?sdataEntity " << entity->name() << " ";
|
||
|
superLit = 1;
|
||
|
} else {
|
||
|
os() << "<!ENTITY " << entity->name() << " \"";
|
||
|
}
|
||
|
} else { // we've seen it before; throw away expansion data
|
||
|
os_ = (OutputCharStream *)
|
||
|
new EncodeOutputCharStream(new NullOutputByteStream,
|
||
|
app_->outputCodingSystem());
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// We are expanding internal entities; expand this one as a PI,
|
||
|
// since XML does not have SDATA entities
|
||
|
else {
|
||
|
if (options_.sdataAsPi)
|
||
|
os() << "<?sdataEntity " << entity->name() << " ";
|
||
|
}
|
||
|
|
||
|
// Now, no matter what, output the entity's data
|
||
|
outputData(event->data(), event->dataLength(), 0, superLit);
|
||
|
|
||
|
// If necessary, end entity decl and replace old output stream
|
||
|
if (! options_.expInt) {
|
||
|
if (options_.sdataAsPi)
|
||
|
os() << " ?>'>" << RE;
|
||
|
else
|
||
|
os() << "\"> <!-- originally sdata entity -->\n";
|
||
|
os_->flush();
|
||
|
os_ = outputStack_.get();
|
||
|
} else {
|
||
|
if (options_.sdataAsPi)
|
||
|
os() << " ?>";
|
||
|
}
|
||
|
|
||
|
delete event;
|
||
|
}
|
||
|
|
||
|
/** External data entities may be referenced in attributes only. If
|
||
|
one is referenced in content, error and exit. */
|
||
|
void XmlOutputEventHandler::externalDataEntity(ExternalDataEntityEvent *event)
|
||
|
{
|
||
|
app_->message(XmlOutputMessages::externalDataEntityReference,
|
||
|
StringMessageArg(event->entity()->name()));
|
||
|
exit (1);
|
||
|
}
|
||
|
|
||
|
void XmlOutputEventHandler::subdocEntity(SubdocEntityEvent *event)
|
||
|
{
|
||
|
const SubdocEntity *entity = event->entity();
|
||
|
app_->setNextLocation(event->location().origin()->parent());
|
||
|
|
||
|
if (options_.expExt) {
|
||
|
SgmlParser::Params params;
|
||
|
params.subdocInheritActiveLinkTypes = 1;
|
||
|
params.subdocReferenced = 1;
|
||
|
params.origin = event->entityOrigin()->copy();
|
||
|
params.parent = & ((ParserApp *)app_)->parser();
|
||
|
params.sysid = entity->externalId().effectiveSystemId();
|
||
|
params.entityType = SgmlParser::Params::subdoc;
|
||
|
SgmlParser parser(params);
|
||
|
parser.parseAll(*this);
|
||
|
} else {
|
||
|
os() << "&" << entity->name() << ";";
|
||
|
}
|
||
|
|
||
|
delete event;
|
||
|
}
|
||
|
|
||
|
void XmlOutputEventHandler::startDtd(StartDtdEvent *event)
|
||
|
{
|
||
|
inDtd_ = 1;
|
||
|
delete event;
|
||
|
}
|
||
|
|
||
|
void XmlOutputEventHandler::endDtd(EndDtdEvent *event)
|
||
|
{
|
||
|
inDtd_ = 0;
|
||
|
delete event;
|
||
|
}
|
||
|
|
||
|
void XmlOutputEventHandler::maybeStartDoctype(Boolean &doctypeStarted, const Dtd &dtd)
|
||
|
{
|
||
|
if (doctypeStarted)
|
||
|
return;
|
||
|
doctypeStarted = 1;
|
||
|
const StringC &name = dtd.documentElementType()->name();
|
||
|
StringC buf;
|
||
|
|
||
|
// if appropriate, lowercase the doctype name -jphekman
|
||
|
if (options_.lower) {
|
||
|
for (size_t i = 0; i < name.size(); i++) {
|
||
|
Char c = lowerSubst_[name[i]];
|
||
|
if (c != name[i]) {
|
||
|
buf = name;
|
||
|
buf[i] = c;
|
||
|
for (i++; i < name.size(); i++)
|
||
|
lowerSubst_.subst(buf[i]);
|
||
|
}
|
||
|
}
|
||
|
} else {
|
||
|
buf = name;
|
||
|
}
|
||
|
|
||
|
/* Output the doctype declaration. If requested, specify a local
|
||
|
file containing the DTD. */
|
||
|
os() << "<!DOCTYPE " << buf;
|
||
|
if (dtdLoc_ != NULL) {
|
||
|
os() << " SYSTEM \"" << dtdLoc_ << "\"";
|
||
|
}
|
||
|
os() << " [" << RE;
|
||
|
|
||
|
/* If requested, include pointers in the instance's internal subset
|
||
|
to driver files which define internal/external entities. */
|
||
|
if (options_.extDecl) {
|
||
|
os() << "<!ENTITY % external-entities SYSTEM \""
|
||
|
<< app_->codingSystem()->convertIn(outputDir_) << "/"
|
||
|
<< EXT_ENT_FILE << "\">"
|
||
|
<< RE << "%external-entities;" << RE;
|
||
|
}
|
||
|
|
||
|
if (options_.intDecl) {
|
||
|
os() << "<!ENTITY % internal-entities SYSTEM \""
|
||
|
<< app_->codingSystem()->convertIn(outputDir_) << "/"
|
||
|
<< INT_ENT_FILE << "\">"
|
||
|
<< RE << "%internal-entities;" << RE;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void XmlOutputEventHandler::endProlog(EndPrologEvent *event)
|
||
|
{
|
||
|
const Dtd &dtd = event->dtd();
|
||
|
Boolean doctypeStarted = 0;
|
||
|
|
||
|
if (options_.extDecl || options_.intDecl) {
|
||
|
maybeStartDoctype(doctypeStarted, dtd);
|
||
|
}
|
||
|
|
||
|
if (options_.notation) {
|
||
|
Dtd::ConstNotationIter iter(dtd.notationIter());
|
||
|
for (;;) {
|
||
|
const Notation *notation = iter.nextTemp();
|
||
|
if (!notation)
|
||
|
break;
|
||
|
maybeStartDoctype(doctypeStarted, dtd);
|
||
|
os() << "<!NOTATION " << generalName(notation->name(), nameBuf_);
|
||
|
outputExternalId(*notation);
|
||
|
os() << ">" << RE;
|
||
|
if (notation->attributeDefTemp()) {
|
||
|
app_->setNextLocation(notation->defLocation());
|
||
|
app_->message(XmlOutputMessages::notationAttributes,
|
||
|
StringMessageArg(notation->name()));
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
if (options_.ndata) {
|
||
|
Dtd::ConstEntityIter iter(dtd.generalEntityIter());
|
||
|
for (;;) {
|
||
|
const Entity *entity = iter.nextTemp();
|
||
|
if (!entity)
|
||
|
break;
|
||
|
const ExternalDataEntity *extDataEntity = entity->asExternalDataEntity();
|
||
|
if (extDataEntity) {
|
||
|
maybeStartDoctype(doctypeStarted, dtd);
|
||
|
os() << "<!ENTITY " << entity->name();
|
||
|
outputExternalId(*entity);
|
||
|
if (extDataEntity->dataType() != EntityDecl::ndata) {
|
||
|
app_->setNextLocation(entity->defLocation());
|
||
|
app_->message(XmlOutputMessages::externalDataNdata,
|
||
|
StringMessageArg(entity->name()));
|
||
|
}
|
||
|
os() << " NDATA " << generalName(extDataEntity->notation()->name(), nameBuf_) << ">" << RE;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
if (options_.id || options_.attlist) {
|
||
|
Dtd::ConstElementTypeIter iter(dtd.elementTypeIter());
|
||
|
for (;;) {
|
||
|
const ElementType *elementType = iter.next();
|
||
|
if (!elementType)
|
||
|
break;
|
||
|
const AttributeDefinitionList *adl = elementType->attributeDefTemp();
|
||
|
if (adl) {
|
||
|
if (options_.attlist) {
|
||
|
maybeStartDoctype(doctypeStarted, dtd);
|
||
|
if (options_.preserveCase) {
|
||
|
os() << "<!ATTLIST " << elementType->origName();
|
||
|
} else {
|
||
|
os() << "<!ATTLIST " <<
|
||
|
generalName(elementType->name(), nameBuf_);
|
||
|
}
|
||
|
for (size_t i = 0; i < adl->size(); i++) {
|
||
|
const AttributeDefinition *def = adl->def(i);
|
||
|
if (options_.preserveCase) {
|
||
|
os() << RE << def->origName();
|
||
|
} else {
|
||
|
os() << RE << generalName(def->name(), nameBuf_);
|
||
|
}
|
||
|
AttributeDefinitionDesc desc;
|
||
|
def->getDesc(desc);
|
||
|
switch (desc.declaredValue) {
|
||
|
case AttributeDefinitionDesc::cdata:
|
||
|
os() << " CDATA #IMPLIED";
|
||
|
break;
|
||
|
case AttributeDefinitionDesc::name:
|
||
|
case AttributeDefinitionDesc::number:
|
||
|
case AttributeDefinitionDesc::nmtoken:
|
||
|
case AttributeDefinitionDesc::nutoken:
|
||
|
os() << " NMTOKEN #IMPLIED";
|
||
|
break;
|
||
|
case AttributeDefinitionDesc::entity:
|
||
|
os() << " ENTITY #IMPLIED";
|
||
|
break;
|
||
|
case AttributeDefinitionDesc::idref:
|
||
|
os() << " IDREF #IMPLIED";
|
||
|
break;
|
||
|
case AttributeDefinitionDesc::names:
|
||
|
case AttributeDefinitionDesc::numbers:
|
||
|
case AttributeDefinitionDesc::nmtokens:
|
||
|
case AttributeDefinitionDesc::nutokens:
|
||
|
os() << " NMTOKENS #IMPLIED";
|
||
|
break;
|
||
|
case AttributeDefinitionDesc::entities:
|
||
|
os() << " ENTITIES #IMPLIED";
|
||
|
break;
|
||
|
case AttributeDefinitionDesc::idrefs:
|
||
|
os() << " IDREFS #IMPLIED";
|
||
|
break;
|
||
|
case AttributeDefinitionDesc::id:
|
||
|
os() << " ID #IMPLIED";
|
||
|
break;
|
||
|
case AttributeDefinitionDesc::notation:
|
||
|
os() << " NOTATION";
|
||
|
// fall through
|
||
|
case AttributeDefinitionDesc::nameTokenGroup:
|
||
|
{
|
||
|
os() << " (";
|
||
|
if (options_.preserveCase) {
|
||
|
for (size_t j = 0;
|
||
|
j < desc.origAllowedValues.size(); j++) {
|
||
|
if (j > 0)
|
||
|
os() << '|';
|
||
|
os() << desc.origAllowedValues[j];
|
||
|
}
|
||
|
os() << ") #IMPLIED";
|
||
|
} else {
|
||
|
for (size_t j = 0; j < desc.allowedValues.size(); j++) {
|
||
|
if (j > 0)
|
||
|
os() << '|';
|
||
|
os() << desc.allowedValues[j];
|
||
|
}
|
||
|
os() << ") #IMPLIED";
|
||
|
}
|
||
|
}
|
||
|
break;
|
||
|
default:
|
||
|
CANNOT_HAPPEN();
|
||
|
}
|
||
|
}
|
||
|
os() << '>' << RE;
|
||
|
}
|
||
|
else {
|
||
|
size_t idIndex = adl->idIndex();
|
||
|
if (idIndex != size_t(-1)) {
|
||
|
maybeStartDoctype(doctypeStarted, dtd);
|
||
|
os() << "<!ATTLIST " << generalName(elementType->name(), nameBuf_);
|
||
|
os() << ' ' << generalName(adl->def(idIndex)->name(), nameBuf_)
|
||
|
<< " ID #IMPLIED>" << RE;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
if (doctypeStarted)
|
||
|
os() << "]>" << RE;
|
||
|
delete event;
|
||
|
}
|
||
|
|
||
|
void XmlOutputEventHandler::outputExternalId(const EntityDecl &decl)
|
||
|
|
||
|
{
|
||
|
const StringC *pubIdP = decl.publicIdPointer();
|
||
|
const StringC *sysIdP = decl.effectiveSystemIdPointer();
|
||
|
if (pubIdP) {
|
||
|
os() << " PUBLIC \"" << *pubIdP << "\"";
|
||
|
if (decl.declType() == EntityDecl::notation && !sysIdP)
|
||
|
return;
|
||
|
os() << " \"";
|
||
|
}
|
||
|
else
|
||
|
os() << " SYSTEM \"";
|
||
|
if (sysIdP) {
|
||
|
StringC url;
|
||
|
switch(fsiToUrl(*sysIdP, decl.defLocation(), url)) {
|
||
|
case 1:
|
||
|
os() << url;
|
||
|
break;
|
||
|
case 0:
|
||
|
break;
|
||
|
default:
|
||
|
app_->setNextLocation(decl.defLocation());
|
||
|
app_->message(XmlOutputMessages::cannotConvertFsiToUrl,
|
||
|
StringMessageArg(*sysIdP));
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
os() << "\"";
|
||
|
}
|
||
|
|
||
|
// Return 1 if OK; return -1 to generate generic error; return 0 if error already generated.
|
||
|
|
||
|
int XmlOutputEventHandler::fsiToUrl(const StringC &fsi, const Location &loc, StringC &url)
|
||
|
{
|
||
|
ParsedSystemId parsedBuf;
|
||
|
if (!entityManager_->parseSystemId(fsi, *systemCharset_, 0, 0, *app_, parsedBuf))
|
||
|
return 0;
|
||
|
if (parsedBuf.size() != 1)
|
||
|
return -1;
|
||
|
if (strcmp(parsedBuf[0].storageManager->type(), "URL") == 0) {
|
||
|
url = parsedBuf[0].specId;
|
||
|
return 1;
|
||
|
}
|
||
|
Owner<InputSource> in(entityManager_->open(fsi,
|
||
|
*systemCharset_,
|
||
|
InputSourceOrigin::make(),
|
||
|
0,
|
||
|
*app_));
|
||
|
if (!in)
|
||
|
return 0;
|
||
|
Xchar c = in->get(*app_);
|
||
|
StorageObjectLocation soLoc;
|
||
|
if (c == InputSource::eE && in->accessError()) {
|
||
|
if (parsedBuf[0].baseId.size())
|
||
|
return 0;
|
||
|
soLoc.storageObjectSpec = &parsedBuf[0];
|
||
|
soLoc.actualStorageId = parsedBuf[0].specId;
|
||
|
}
|
||
|
else {
|
||
|
const Location &loc = in->currentLocation();
|
||
|
if (loc.origin().isNull())
|
||
|
return -1;
|
||
|
const InputSourceOrigin *tem = loc.origin()->asInputSourceOrigin();
|
||
|
if (!tem)
|
||
|
return -1;
|
||
|
const ParsedSystemId *psi
|
||
|
= ExtendEntityManager::externalInfoParsedSystemId(tem->externalInfo());
|
||
|
if (!psi || psi->size() != 1)
|
||
|
return -1;
|
||
|
if (!ExtendEntityManager::externalize(tem->externalInfo(), 0, soLoc))
|
||
|
return -1;
|
||
|
}
|
||
|
if (strcmp(soLoc.storageObjectSpec->storageManager->type(), "OSFILE") != 0)
|
||
|
return -1;
|
||
|
return filenameToUrl(soLoc.actualStorageId, loc, url);
|
||
|
}
|
||
|
|
||
|
static
|
||
|
StringC hexEncode(unsigned char c)
|
||
|
{
|
||
|
static const char hexDigits[] = "0123456789ABCDEF";
|
||
|
StringC result;
|
||
|
result.resize(2);
|
||
|
result[0] = hexDigits[c >> 4];
|
||
|
result[1] = hexDigits[c & 0xF];
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
static
|
||
|
StringC urlEncode(Char c)
|
||
|
{
|
||
|
if (c >= 0x80) {
|
||
|
StringC tem;
|
||
|
tem += c;
|
||
|
#if 0
|
||
|
// This is what the URN spec says.
|
||
|
UTF8CodingSystem utf8;
|
||
|
String<char> bytes(utf8.convertOut(tem));
|
||
|
tem.resize(0);
|
||
|
for (size_t i = 0; i < bytes.size(); i++)
|
||
|
tem += hexEncode(bytes[i]);
|
||
|
#endif
|
||
|
return tem;
|
||
|
}
|
||
|
if (strchr("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$-_.+!*'(),",
|
||
|
char(c))) {
|
||
|
StringC tem;
|
||
|
tem += c;
|
||
|
return tem;
|
||
|
}
|
||
|
return hexEncode((unsigned char)c);
|
||
|
}
|
||
|
|
||
|
int XmlOutputEventHandler::filenameToUrl(const StringC &filename,
|
||
|
const Location &loc,
|
||
|
StringC &url)
|
||
|
{
|
||
|
#ifdef SP_MSDOS_FILENAMES
|
||
|
if (filename.size() >= 3 && filename[1] == ':'
|
||
|
&& (('a' <= filename[0] && filename[0] <= 'z')
|
||
|
|| ('A' <= filename[0] && filename[0] <= 'Z'))
|
||
|
&& (filename[2] == '/' || filename[2] == '\\')) {
|
||
|
url += "file://";
|
||
|
url += filename;
|
||
|
return 1;
|
||
|
}
|
||
|
#endif
|
||
|
for (size_t i = 0; i < filename.size(); i++) {
|
||
|
if (filename[i] == '/'
|
||
|
#ifdef SP_MSDOS_FILENAMES
|
||
|
|| filename[i] == '\\'
|
||
|
#endif
|
||
|
) {
|
||
|
url += '/';
|
||
|
}
|
||
|
else
|
||
|
url += urlEncode(filename[i]);
|
||
|
}
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
void XmlOutputEventHandler::commentDecl(CommentDeclEvent *event)
|
||
|
{
|
||
|
if (!inDtd_ && options_.comment) {
|
||
|
Boolean started = 0;
|
||
|
for (MarkupIter iter(event->markup()); iter.valid(); iter.advance()) {
|
||
|
if (iter.type() == Markup::comment) {
|
||
|
if (!started) {
|
||
|
os() << "<!--";
|
||
|
started = 1;
|
||
|
}
|
||
|
os().write(iter.charsPointer(), iter.charsLength());
|
||
|
}
|
||
|
}
|
||
|
if (started)
|
||
|
os() << "-->";
|
||
|
}
|
||
|
delete event;
|
||
|
}
|
||
|
|
||
|
void XmlOutputEventHandler::markedSectionStart(MarkedSectionStartEvent *event)
|
||
|
{
|
||
|
if (options_.cdata && event->status() == MarkedSectionEvent::cdata)
|
||
|
useCdata_ = 1;
|
||
|
delete event;
|
||
|
}
|
||
|
|
||
|
void XmlOutputEventHandler::markedSectionEnd(MarkedSectionEndEvent *event)
|
||
|
{
|
||
|
if (event->status() == MarkedSectionEvent::cdata)
|
||
|
closeCdataSection();
|
||
|
delete event;
|
||
|
}
|
||
|
|
||
|
void XmlOutputEventHandler::outputCdata(const Char *s, size_t n)
|
||
|
{
|
||
|
static const char cdataEnd[] = "]]>";
|
||
|
for (; n > 0; s++, n--) {
|
||
|
Char c = *s;
|
||
|
if (!inCdata_) {
|
||
|
os() << "<![CDATA[";
|
||
|
inCdata_ = 1;
|
||
|
}
|
||
|
if (c == cdataEnd[nCdataEndMatched_]) {
|
||
|
if (nCdataEndMatched_ == 2) {
|
||
|
os() << "]]><![CDATA[";
|
||
|
nCdataEndMatched_ = 0;
|
||
|
}
|
||
|
else
|
||
|
nCdataEndMatched_++;
|
||
|
}
|
||
|
else
|
||
|
nCdataEndMatched_ = 0;
|
||
|
os().put(c);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void XmlOutputEventHandler::outputData(const Char *s, size_t n, Boolean inLit,
|
||
|
Boolean inSuperLit)
|
||
|
{
|
||
|
for (; n > 0; s++, n--) {
|
||
|
Char c = *s;
|
||
|
switch (c) {
|
||
|
case '&':
|
||
|
os() << "&";
|
||
|
break;
|
||
|
case '<':
|
||
|
os() << "<";
|
||
|
break;
|
||
|
case '>':
|
||
|
os() << ">";
|
||
|
break;
|
||
|
case 9:
|
||
|
case 10:
|
||
|
case 13:
|
||
|
if (inLit)
|
||
|
os() << "&#" << int(c) << ';';
|
||
|
else
|
||
|
os().put(c);
|
||
|
break;
|
||
|
case '"':
|
||
|
if (inLit) {
|
||
|
os() << """;
|
||
|
break;
|
||
|
}
|
||
|
case '\'':
|
||
|
if (inSuperLit) {
|
||
|
os() << "'";
|
||
|
break;
|
||
|
}
|
||
|
// fall through
|
||
|
default:
|
||
|
os().put(c);
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void XmlOutputEventHandler::entityDefaulted(EntityDefaultedEvent *event)
|
||
|
{
|
||
|
if (options_.reportEnts) {
|
||
|
ConstPtr<Entity> entity = event->entityPointer();
|
||
|
const ExternalEntity *extEntity = entity->asExternalEntity();
|
||
|
|
||
|
// If we are dealing with an external entity (else it will be null)
|
||
|
// -jphekman
|
||
|
if (extEntity != 0) {
|
||
|
const StringC *systemIdPointer = extEntity->systemIdPointer();
|
||
|
if (systemIdPointer != 0) {
|
||
|
os () << "<?entityResolved " << *systemIdPointer << " ?>";
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
void XmlOutputEventHandler::inputOpened(InputSource *in)
|
||
|
{
|
||
|
if (!inDtd_) {
|
||
|
const CodingSystem *outputCodingSystem = app_->outputCodingSystem();
|
||
|
const EntityDecl *entDecl = in->currentLocation().origin()->entityDecl();
|
||
|
|
||
|
if (entDecl == NULL ) {
|
||
|
if (options_.reportIS) {
|
||
|
os() << "<?inputOpened effectiveSystemID=\"NULL\" "
|
||
|
<< "systemID=\"NULL\" publicID=\"NULL\" ?>";
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
const Entity *ent = in->currentLocation().origin()->entity();
|
||
|
const StringC *effectiveSystemIdPointer =
|
||
|
entDecl->effectiveSystemIdPointer();
|
||
|
const StringC *systemIdPointer = entDecl->systemIdPointer();
|
||
|
const StringC *publicIdPointer = entDecl->publicIdPointer();
|
||
|
|
||
|
if (options_.reportIS && ent->asExternalEntity() != NULL) {
|
||
|
os() << "<?inputOpened effectiveSystemID=\"";
|
||
|
|
||
|
if (effectiveSystemIdPointer == 0)
|
||
|
os() << "NULL";
|
||
|
else
|
||
|
os () << *effectiveSystemIdPointer;
|
||
|
|
||
|
os() << "\" systemID=\"";
|
||
|
if (systemIdPointer == 0)
|
||
|
os() << "NULL";
|
||
|
else
|
||
|
os () << *systemIdPointer;
|
||
|
|
||
|
os() << "\" publicID=\"";
|
||
|
if (publicIdPointer == 0)
|
||
|
os() << "NULL";
|
||
|
else
|
||
|
os () << *publicIdPointer;
|
||
|
|
||
|
os() << "\" ?>";
|
||
|
}
|
||
|
|
||
|
/* Output entity declaration and, in the case of external
|
||
|
entities, write file containing entity replacement text. */
|
||
|
|
||
|
Boolean firstSeen = checkFirstSeen(ent->name());
|
||
|
|
||
|
if (ent->asExternalEntity() != NULL) {
|
||
|
if (! options_.expExt) {
|
||
|
// output entity reference
|
||
|
os() << "&" << entDecl->name() << ";";
|
||
|
|
||
|
// output entity declaration
|
||
|
if (systemIdPointer == 0) {
|
||
|
app_->message(XmlOutputMessages::missingSystemId,
|
||
|
StringMessageArg(entDecl->name()));
|
||
|
exit (1);
|
||
|
}
|
||
|
|
||
|
// save old output stream
|
||
|
outputStack_.insert(os_);
|
||
|
|
||
|
if (firstSeen) {
|
||
|
|
||
|
char *originalFilePath =
|
||
|
strdup(outputCodingSystem->convertOut(*systemIdPointer).data());
|
||
|
|
||
|
/* Construct new output path, prepending the output
|
||
|
directory: so, for example, /usr/local/lib/ents/foo
|
||
|
becomes ./usr/local/lib/ents/foo.xml; possible 2-digit
|
||
|
suffix (for uniqueness) */
|
||
|
|
||
|
char* filePath=new char[strlen(outputDir_) + 9 +
|
||
|
outputCodingSystem->convertOut
|
||
|
(*systemIdPointer).size()];
|
||
|
|
||
|
strcpy (filePath, outputDir_);
|
||
|
|
||
|
/* Drop the scheme if there is one (so
|
||
|
"http://www.bar.com/foo" becomes
|
||
|
"output_dir/www.bar.com/foo") */
|
||
|
|
||
|
char *scheme = strchr(originalFilePath, ':');
|
||
|
if (scheme != NULL) {
|
||
|
// drop initial colon
|
||
|
originalFilePath = scheme + 1;
|
||
|
} else {
|
||
|
// check for leading slashes in non-scheme filenames
|
||
|
// and warn that we're going to remove them
|
||
|
if (strncmp(originalFilePath, "/", 1) == 0) {
|
||
|
app_->message(XmlOutputMessages::removingLeadingSlashes,
|
||
|
StringMessageArg(*systemIdPointer));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// drop initial slashes (there may be multiple)
|
||
|
while (strncmp(originalFilePath, "/", 1) == 0) {
|
||
|
originalFilePath++;
|
||
|
}
|
||
|
|
||
|
// replace exactly one initial slash before prepending
|
||
|
// output directory
|
||
|
strcat (filePath, "/");
|
||
|
strcat(filePath, originalFilePath);
|
||
|
|
||
|
// Check to make sure we haven't passed outside of the
|
||
|
// output directory
|
||
|
char *dirs = strdup (filePath);
|
||
|
char realDirs[MAXPATHLEN];
|
||
|
char realOutputDir[MAXPATHLEN];
|
||
|
char *outputDir = strdup(outputDir_);
|
||
|
|
||
|
realpath((const char *)dirname(dirs), realDirs);
|
||
|
realpath((const char *)dirname(outputDir), realOutputDir);
|
||
|
|
||
|
if (strncmp(realDirs, realOutputDir, strlen (realOutputDir)) != 0) {
|
||
|
app_->message(XmlOutputMessages::pathOutsideOutputDirectory,
|
||
|
StringMessageArg(*systemIdPointer));
|
||
|
if (! options_.writeOutsideOutDir) {
|
||
|
exit (1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Make the necessary directories
|
||
|
maybeCreateDirectories(dirname(dirs));
|
||
|
|
||
|
// Set the suffix to ".xml"
|
||
|
convertSuffix(filePath);
|
||
|
|
||
|
*extEnts_ << "<!ENTITY " << entDecl->name() << " SYSTEM \""
|
||
|
<< filePath << "\">\n";
|
||
|
extEnts_->flush();
|
||
|
|
||
|
if (! options_.overwrite) {
|
||
|
uniqueFilename(filePath);
|
||
|
}
|
||
|
|
||
|
// Open the file, exiting if we fail to do so.
|
||
|
FileOutputByteStream *file = new FileOutputByteStream;
|
||
|
outputFileStack_.insert(file);
|
||
|
StringC filePathStrC = app_->codingSystem()->convertIn(filePath);
|
||
|
|
||
|
// If we've never seen this exact input filename before,
|
||
|
// then we're being asked to overwrite an old generated file
|
||
|
// with a new one (for example, "/text" and "text" will both
|
||
|
// map to the top level of the output directory). In that
|
||
|
// case, produce an error.
|
||
|
for (size_t i = 0; i < filesCreated_.size(); i++) {
|
||
|
|
||
|
if (filesCreated_[i] == filePathStrC) {
|
||
|
bool seenOrigPath = false;
|
||
|
for (size_t j = 0; j < originalFilePaths_.size(); j++) {
|
||
|
if (originalFilePaths_[j] == *systemIdPointer) {
|
||
|
seenOrigPath = true;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (! seenOrigPath) {
|
||
|
app_->message(XmlOutputMessages::alreadyWroteOutputFile,
|
||
|
StringMessageArg(filePathStrC));
|
||
|
exit (1);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Save so we can ensure not overwriting later.
|
||
|
filesCreated_.push_back(filePathStrC);
|
||
|
originalFilePaths_.push_back(*systemIdPointer);
|
||
|
if (!file->open(filePath)) {
|
||
|
app_->message(XmlOutputMessages::cannotOpenOutputFile,
|
||
|
StringMessageArg(filePathStrC));
|
||
|
exit (1);
|
||
|
}
|
||
|
|
||
|
// Create output stream to file and set os_ to it.
|
||
|
os_ = (OutputCharStream *)
|
||
|
new EncodeOutputCharStream(file, outputCodingSystem);
|
||
|
delete [] filePath;
|
||
|
} // end if firstSeen
|
||
|
else {
|
||
|
// push null os onto file output stack, set os_ to it
|
||
|
NullOutputByteStream *nobs = new NullOutputByteStream;
|
||
|
outputFileStack_.insert(nobs);
|
||
|
|
||
|
// Create output stream to file and set os_ to it.
|
||
|
os_ = (OutputCharStream *)
|
||
|
new EncodeOutputCharStream(nobs, outputCodingSystem);
|
||
|
} // end else (notfirst Seen)
|
||
|
|
||
|
} // end if not expanding external entities
|
||
|
|
||
|
} // end if asExternalEntity()
|
||
|
|
||
|
else if (ent->asInternalEntity() != NULL) {
|
||
|
|
||
|
char *entName =
|
||
|
strdup(outputCodingSystem->convertOut(entDecl->name()).data());
|
||
|
|
||
|
if (! options_.expInt) {
|
||
|
|
||
|
// output entity reference
|
||
|
os() << "&" << entDecl->name() << ";";
|
||
|
|
||
|
// save old output stream and point output stream at null
|
||
|
outputStack_.insert(os_);
|
||
|
NullOutputByteStream *nobs = new NullOutputByteStream;
|
||
|
os_ = (OutputCharStream *)
|
||
|
new EncodeOutputCharStream(nobs, outputCodingSystem);
|
||
|
|
||
|
if (firstSeen) {
|
||
|
|
||
|
// don't define predefined entities
|
||
|
|
||
|
if (strcmp (entName, "lt") != 0
|
||
|
&& strcmp (entName, "gt") != 0
|
||
|
&& strcmp (entName, "amp") != 0
|
||
|
&& strcmp (entName, "apos") != 0
|
||
|
&& strcmp (entName, "quot") != 0) {
|
||
|
|
||
|
// output declaration to internal entities driver file
|
||
|
InternalInputSource *intIn = in->asInternalInputSource();
|
||
|
*intEnts_ << "<!ENTITY " << entDecl->name() << " "
|
||
|
<< getQuoteMark (intIn->contents())
|
||
|
<< (*intIn->contents())
|
||
|
<< getQuoteMark (intIn->contents())
|
||
|
<< " >\n";
|
||
|
intEnts_->flush();
|
||
|
}
|
||
|
} // end if firstSeen
|
||
|
} // end if expanding internal entities
|
||
|
} // end if this is an internal entity
|
||
|
|
||
|
else {
|
||
|
// We should only get InternalText and ExternalText entities here.
|
||
|
app_->message(XmlOutputMessages::unexpectedEntityType,
|
||
|
StringMessageArg (ent->name()));
|
||
|
exit(1);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void XmlOutputEventHandler::inputClosed(InputSource *in)
|
||
|
{
|
||
|
if (! inDtd_) {
|
||
|
const EntityDecl *entDecl = in->currentLocation().origin()->entityDecl();
|
||
|
const Entity *ent = in->currentLocation().origin()->entity();
|
||
|
|
||
|
if (entDecl == NULL || ent == NULL ) {
|
||
|
if ( options_.reportIS) {
|
||
|
os() << "<?inputOpened effectiveSystemID=\"NULL\" "
|
||
|
<< "systemID=\"NULL\" publicID=\"NULL\" ?>";
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
// Close external entity
|
||
|
if (ent->asExternalEntity() != NULL) {
|
||
|
if (! options_.expExt) {
|
||
|
// delete current output stream
|
||
|
os_->flush();
|
||
|
delete os_;
|
||
|
|
||
|
// restore previous output stream
|
||
|
os_ = outputStack_.get();
|
||
|
|
||
|
// close file
|
||
|
OutputByteStream *file = outputFileStack_.get();
|
||
|
delete file;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Close internal entity
|
||
|
else if (ent->asInternalEntity() != NULL) {
|
||
|
if (! options_.expInt) {
|
||
|
os_->flush();
|
||
|
delete os_;
|
||
|
|
||
|
// restore previous output stream
|
||
|
os_ = outputStack_.get();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
else {
|
||
|
// We should only get InternalText and ExternalText entities here.
|
||
|
app_->message(XmlOutputMessages::unexpectedEntityType,
|
||
|
StringMessageArg( ent->name() ));
|
||
|
exit(1);
|
||
|
}
|
||
|
|
||
|
if (options_.reportIS && ent->asExternalEntity() != NULL) {
|
||
|
os() << "<?inputClosed ?>";
|
||
|
}
|
||
|
|
||
|
}
|
||
|
}
|
||
|
|
||
|
const StringC &XmlOutputEventHandler::generalName(const StringC &name,
|
||
|
StringC &buf)
|
||
|
{
|
||
|
|
||
|
if (options_.lower && namecaseGeneral_) {
|
||
|
for (size_t i = 0; i < name.size(); i++) {
|
||
|
Char c = lowerSubst_[name[i]];
|
||
|
if (c != name[i]) {
|
||
|
buf = name;
|
||
|
buf[i] = c;
|
||
|
for (i++; i < name.size(); i++)
|
||
|
lowerSubst_.subst(buf[i]);
|
||
|
return buf;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return name;
|
||
|
}
|
||
|
|
||
|
Boolean XmlOutputEventHandler::equalsIgnoreCase(const StringC &str1,
|
||
|
StringC &str2)
|
||
|
{
|
||
|
|
||
|
if (str1.size() != str2.size())
|
||
|
return false;
|
||
|
|
||
|
for (size_t i = 0; i < str1.size(); i++) {
|
||
|
if (lowerSubst_[str1[i]] != lowerSubst_[str2[i]])
|
||
|
return false;
|
||
|
}
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
/** Make this string's suffix ".xml", attempting to do the right thing
|
||
|
if we encounter ".sgm", ".sgml", or ".xml" as the original
|
||
|
suffix. */
|
||
|
char *XmlOutputEventHandler::convertSuffix(char *name)
|
||
|
{
|
||
|
// Get a pointer to the last occurrence of ".":
|
||
|
char *suffix = strrchr(name, '.');
|
||
|
|
||
|
// If there is no "." in "name", append ".xml" and return
|
||
|
if (suffix == NULL) {
|
||
|
strcat(name, ".xml");
|
||
|
return name;
|
||
|
}
|
||
|
|
||
|
// Suffix is "sgm[l]": subsitite ".xml"
|
||
|
if (strcmp(suffix, ".sgm") == 0 || strcmp (suffix, ".sgml") == 0) {
|
||
|
strcpy (suffix, ".xml");
|
||
|
return name;
|
||
|
}
|
||
|
|
||
|
// "xml": do nothing
|
||
|
if (strcmp(suffix, ".xml") == 0) {
|
||
|
return name;
|
||
|
}
|
||
|
|
||
|
// default: append ".xml"
|
||
|
strcat (name, ".xml");
|
||
|
return name;
|
||
|
}
|
||
|
|
||
|
/** Create all the directories mentioned in this path, unless they
|
||
|
already exist. */
|
||
|
|
||
|
int XmlOutputEventHandler::maybeCreateDirectories (char *path)
|
||
|
{
|
||
|
int ret;
|
||
|
|
||
|
char *pp = strrchr(path, '/');
|
||
|
#ifdef SP_MSDOS_FILENAMES
|
||
|
char *ppp = strrchr(path,'\\');
|
||
|
if (ppp && (ppp > pp || pp==NULL)) pp=ppp;
|
||
|
#endif
|
||
|
|
||
|
if ((pp) && (pp != path)) {
|
||
|
*pp = '\0';
|
||
|
maybeCreateDirectories(path);
|
||
|
*pp = '/';
|
||
|
}
|
||
|
|
||
|
#ifndef _MSC_VER
|
||
|
struct stat statbuf;
|
||
|
pp = strrchr(path, '/');
|
||
|
|
||
|
statbuf.st_mode = 0777;
|
||
|
|
||
|
/* stat the directory */
|
||
|
if ((pp) && (pp != path)) {
|
||
|
*pp = '\0';
|
||
|
stat(path, &statbuf);
|
||
|
*pp = '/';
|
||
|
}
|
||
|
|
||
|
ret = mkdir(path, statbuf.st_mode);
|
||
|
#else
|
||
|
ret=_mkdir(path);
|
||
|
#endif
|
||
|
if (ret == -1) {
|
||
|
if (errno == EEXIST) {
|
||
|
ret = 0;
|
||
|
} else {
|
||
|
app_->message(XmlOutputMessages::cannotCreateDirectory,
|
||
|
StringMessageArg(app_->codingSystem()->convertIn(path)));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return(ret);
|
||
|
}
|
||
|
|
||
|
|
||
|
/** If this is the first time we have encountered entity "name",
|
||
|
return true and add it to a list of entities we've seen. Else,
|
||
|
return false. */
|
||
|
Boolean XmlOutputEventHandler::checkFirstSeen(const StringC &name)
|
||
|
{
|
||
|
Named *id = entTable_.lookup(name);
|
||
|
|
||
|
if (!id) {
|
||
|
entTable_.insert(new Named(name));
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
|
||
|
/** Set the newFilename to a filename which, while similar or
|
||
|
identical to originalFilename, does not correspond to an existing
|
||
|
file. Gives an error if called more than 99 times on the same
|
||
|
filename (during the same or different executions of osx). Assumes
|
||
|
that the char array it's given is long enough to accept a two-digit
|
||
|
suffix in addition to the string that's already in there.
|
||
|
*/
|
||
|
void XmlOutputEventHandler::uniqueFilename(char *filePath) {
|
||
|
|
||
|
char* baseFilePath=new char[strlen(filePath)];
|
||
|
strcpy (baseFilePath, filePath);
|
||
|
|
||
|
struct stat statbuf;
|
||
|
int num = 0;
|
||
|
char numStr[3];
|
||
|
|
||
|
while (stat(filePath, &statbuf) == 0 && num <= 100) {
|
||
|
num++;
|
||
|
strcpy (filePath, baseFilePath);
|
||
|
strcat (filePath, ".");
|
||
|
sprintf(numStr, "%d", num);
|
||
|
strcat (filePath, numStr);
|
||
|
}
|
||
|
|
||
|
if (num >= 100) {
|
||
|
app_->message(XmlOutputMessages::tooManyOutputFiles,
|
||
|
StringMessageArg
|
||
|
(app_->codingSystem()->convertIn(baseFilePath)));
|
||
|
exit(1);
|
||
|
}
|
||
|
delete [] baseFilePath;
|
||
|
}
|
||
|
|
||
|
char XmlOutputEventHandler::getQuoteMark(const StringC *contents)
|
||
|
{
|
||
|
char *contentsStr =
|
||
|
strdup(app_->outputCodingSystem()->convertOut(*contents).data());
|
||
|
if (strchr(contentsStr, '"') == NULL) {
|
||
|
return '"';
|
||
|
} else {
|
||
|
return '\'';
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* NullOutputByteStream
|
||
|
*/
|
||
|
|
||
|
NullOutputByteStream::NullOutputByteStream()
|
||
|
{
|
||
|
}
|
||
|
|
||
|
NullOutputByteStream::~NullOutputByteStream()
|
||
|
{
|
||
|
}
|
||
|
|
||
|
void NullOutputByteStream::flush()
|
||
|
{
|
||
|
}
|
||
|
|
||
|
void NullOutputByteStream::sputc(char c)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
void NullOutputByteStream::sputn(const char *, size_t)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
OutputByteStream &NullOutputByteStream::operator<<(char)
|
||
|
{
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
OutputByteStream &NullOutputByteStream::operator<<(unsigned char)
|
||
|
{
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
OutputByteStream &NullOutputByteStream::operator<<(const char *)
|
||
|
{
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
OutputByteStream &NullOutputByteStream::operator<<(int)
|
||
|
{
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
OutputByteStream &NullOutputByteStream::operator<<(unsigned)
|
||
|
{
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
OutputByteStream &NullOutputByteStream::operator<<(long)
|
||
|
{
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
OutputByteStream &NullOutputByteStream::operator<<(unsigned long)
|
||
|
{
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
OutputByteStream &NullOutputByteStream::operator<<(const String<char> &)
|
||
|
{
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
/* Note: Returning NULL is probably not the best solution here, but as
|
||
|
* nothing actually uses getBufferPtr(), it is hard to see what sort
|
||
|
* of no-op behavior would actually be appropriate. */
|
||
|
char *NullOutputByteStream::getBufferPtr() const
|
||
|
{
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
size_t NullOutputByteStream::getBufferSize() const
|
||
|
{
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
void NullOutputByteStream::usedBuffer(size_t)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
void NullOutputByteStream::flushBuf(char)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
#ifdef SP_NAMESPACE
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
#ifdef _MSC_VER
|
||
|
char* dirname(const char* path)
|
||
|
{
|
||
|
static char buffer[MAXPATHLEN];
|
||
|
char* p=strrchr((char*)path,'/');
|
||
|
#ifdef SP_MSDOS_FILENAMES
|
||
|
char *ppp = strrchr((char*)path,'\\');
|
||
|
if (ppp && (ppp > p || p==NULL)) p=ppp;
|
||
|
#endif
|
||
|
|
||
|
if (p==NULL) return NULL;
|
||
|
strncpy(buffer,path,p-path);
|
||
|
buffer[p-path]=0;
|
||
|
return buffer;
|
||
|
}
|
||
|
|
||
|
void realpath(const char* in,char* out)
|
||
|
{
|
||
|
_fullpath(out,in,MAXPATHLEN);
|
||
|
return;
|
||
|
}
|
||
|
#endif
|
||
|
|