440 lines
9.8 KiB
C++
440 lines
9.8 KiB
C++
|
// Copyright (c) 1995 James Clark
|
||
|
// See the file COPYING for copying permission.
|
||
|
|
||
|
#ifdef __GNUG__
|
||
|
#pragma implementation
|
||
|
#endif
|
||
|
|
||
|
#include "config.h"
|
||
|
#include "SGMLGenerator.h"
|
||
|
|
||
|
const SGMLApplication::Char RE = '\r';
|
||
|
const SGMLApplication::Char RS = '\n';
|
||
|
const SGMLApplication::Char TAB = '\t';
|
||
|
|
||
|
inline
|
||
|
SP_NAMESPACE_SCOPE OutputCharStream &
|
||
|
operator<<(SP_NAMESPACE_SCOPE OutputCharStream &os,
|
||
|
const SGMLApplication::CharString &str)
|
||
|
{
|
||
|
return os.write(str.ptr, str.len);
|
||
|
}
|
||
|
|
||
|
inline
|
||
|
bool operator==(const SGMLApplication::CharString &str1,
|
||
|
const SGMLApplication::CharString &str2)
|
||
|
{
|
||
|
return (str1.len == str2.len
|
||
|
&& (str1.len == 0
|
||
|
|| memcmp(str1.ptr,
|
||
|
str2.ptr,
|
||
|
str1.len*sizeof(SGMLApplication::Char)) == 0));
|
||
|
}
|
||
|
|
||
|
inline
|
||
|
bool operator!=(const SGMLApplication::CharString &str1,
|
||
|
const SGMLApplication::CharString &str2)
|
||
|
{
|
||
|
return !(str1 == str2);
|
||
|
}
|
||
|
|
||
|
inline
|
||
|
SP_NAMESPACE_SCOPE OutputCharStream &SGMLGenerator::os()
|
||
|
{
|
||
|
return *os_;
|
||
|
}
|
||
|
|
||
|
inline
|
||
|
void SGMLGenerator::outputRe()
|
||
|
{
|
||
|
os().put(RE);
|
||
|
}
|
||
|
|
||
|
inline
|
||
|
void SGMLGenerator::flushRe()
|
||
|
{
|
||
|
if (outputState_ == pendingRe) {
|
||
|
outputRe();
|
||
|
outputState_ = other;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
inline
|
||
|
bool SGMLGenerator::isSgmlChar(Char c)
|
||
|
{
|
||
|
return c >= 256 || !nonSgml_[c];
|
||
|
}
|
||
|
|
||
|
static
|
||
|
void escape(SP_NAMESPACE_SCOPE OutputCharStream &s, SP_NAMESPACE_SCOPE Char c)
|
||
|
{
|
||
|
s << "&#" << (unsigned long)c << ";";
|
||
|
}
|
||
|
|
||
|
SGMLGenerator::SGMLGenerator(SP_NAMESPACE_SCOPE OutputCharStream *os,
|
||
|
unsigned flags)
|
||
|
: os_(os), parseMode_(normalMode), outputState_(other), mustOmitEndTag_(0),
|
||
|
inInstance_(0), flags_(flags)
|
||
|
{
|
||
|
os_->setEscaper(escape);
|
||
|
for (size_t i = 0; i < 256; i++)
|
||
|
switch (i) {
|
||
|
case RE:
|
||
|
case RS:
|
||
|
case TAB:
|
||
|
nonSgml_[i] = 0;
|
||
|
break;
|
||
|
case 255:
|
||
|
nonSgml_[i] = 1;
|
||
|
break;
|
||
|
default:
|
||
|
nonSgml_[i] = (i < 32 || (i >= 127 && i < 160));
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
SGMLGenerator::~SGMLGenerator()
|
||
|
{
|
||
|
outputRe();
|
||
|
delete os_;
|
||
|
}
|
||
|
|
||
|
void SGMLGenerator::startDtd(const StartDtdEvent &event)
|
||
|
{
|
||
|
if (!(flags_ & generateDtd))
|
||
|
return;
|
||
|
if (event.haveExternalId) {
|
||
|
os() << "<!DOCTYPE " << event.name << ' ';
|
||
|
if (event.externalId.havePublicId)
|
||
|
os() << "PUBLIC \"" << event.externalId.publicId << '\"';
|
||
|
else
|
||
|
os() << "SYSTEM";
|
||
|
if (event.externalId.haveSystemId) {
|
||
|
const CharString &str = event.externalId.systemId;
|
||
|
bool useLit = 1;
|
||
|
bool useLita = 1;
|
||
|
for (size_t i = 0; i < str.len; i++)
|
||
|
if (str.ptr[i] == '"')
|
||
|
useLit = 0;
|
||
|
else if (str.ptr[i] == '\'')
|
||
|
useLita = 0;
|
||
|
if (useLit)
|
||
|
os() << " \"" << event.externalId.systemId << '"';
|
||
|
else if (useLita)
|
||
|
os() << " '" << event.externalId.systemId << '\'';
|
||
|
}
|
||
|
os() << '>';
|
||
|
os().put(RE);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void SGMLGenerator::startElement(const StartElementEvent &event)
|
||
|
{
|
||
|
// This can happen with OMITTAG.
|
||
|
if (parseMode_ != normalMode)
|
||
|
os() << "]]><![ INCLUDE [";
|
||
|
// The output is more readable if, within element content,
|
||
|
// tags begin on a new line.
|
||
|
if (isElementContentStack_.size() > 0
|
||
|
&& isElementContentStack_[isElementContentStack_.size() - 1])
|
||
|
outputRe();
|
||
|
isElementContentStack_
|
||
|
+= char(event.contentType == StartElementEvent::element);
|
||
|
if (event.included) {
|
||
|
if (outputState_ == pendingRe) {
|
||
|
numCharRef(RE); // would be moved otherwise
|
||
|
outputState_ = other;
|
||
|
}
|
||
|
outputStateStack_ += char(outputState_);
|
||
|
}
|
||
|
else {
|
||
|
outputStateStack_ += char(other);
|
||
|
flushRe();
|
||
|
}
|
||
|
switch (event.contentType) {
|
||
|
case StartElementEvent::cdata:
|
||
|
parseMode_ = cdataMode;
|
||
|
break;
|
||
|
case StartElementEvent::rcdata:
|
||
|
parseMode_ = rcdataMode;
|
||
|
break;
|
||
|
default:
|
||
|
parseMode_ = normalMode;
|
||
|
break;
|
||
|
}
|
||
|
outputState_ = afterStartTag;
|
||
|
os() << '<' << event.gi;
|
||
|
for (size_t i = 0; i < event.nAttributes; i++) {
|
||
|
const Attribute &a = event.attributes[i];
|
||
|
switch (a.type) {
|
||
|
case Attribute::cdata:
|
||
|
if (a.defaulted != Attribute::definition) {
|
||
|
os() << ' ' << a.name << '=' << '"';
|
||
|
for (size_t j = 0; j < a.nCdataChunks; j++)
|
||
|
if (a.cdataChunks[j].isSdata)
|
||
|
entityRef(a.cdataChunks[j].entityName);
|
||
|
else if (a.cdataChunks[j].isNonSgml)
|
||
|
numCharRefNum(a.cdataChunks[j].nonSgmlChar);
|
||
|
else
|
||
|
attributeChars(a.cdataChunks[j].data);
|
||
|
os() << '"';
|
||
|
}
|
||
|
break;
|
||
|
case Attribute::tokenized:
|
||
|
if (a.defaulted != Attribute::definition) {
|
||
|
if (a.isGroup && a.name == a.tokens)
|
||
|
os() << ' ' << a.tokens;
|
||
|
else
|
||
|
os() << ' ' << a.name << '=' << '"' << a.tokens << '"';
|
||
|
}
|
||
|
break;
|
||
|
default:
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
os() << '>';
|
||
|
mustOmitEndTag_ = (event.contentType == StartElementEvent::empty);
|
||
|
}
|
||
|
|
||
|
void SGMLGenerator::attributeChars(const CharString &str)
|
||
|
{
|
||
|
const Char *p = str.ptr;
|
||
|
for (size_t n = str.len; n > 0; n--, p++)
|
||
|
switch (*p) {
|
||
|
case '"':
|
||
|
case '&':
|
||
|
case RS:
|
||
|
case RE:
|
||
|
case TAB:
|
||
|
numCharRef(*p);
|
||
|
break;
|
||
|
default:
|
||
|
if (isSgmlChar(*p))
|
||
|
os().put(*p);
|
||
|
else
|
||
|
numCharRef(*p);
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void SGMLGenerator::endElement(const EndElementEvent &event)
|
||
|
{
|
||
|
if (mustOmitEndTag_)
|
||
|
mustOmitEndTag_ = 0;
|
||
|
else {
|
||
|
if (outputState_ == pendingRe) {
|
||
|
outputRe();
|
||
|
outputRe();
|
||
|
}
|
||
|
else if (isElementContentStack_[isElementContentStack_.size() - 1])
|
||
|
outputRe();
|
||
|
os() << "</" << event.gi << '>';
|
||
|
}
|
||
|
outputState_ = outputStateStack_[outputStateStack_.size() - 1];
|
||
|
outputStateStack_.resize(outputStateStack_.size() - 1);
|
||
|
parseMode_ = normalMode;
|
||
|
isElementContentStack_.resize(isElementContentStack_.size() - 1);
|
||
|
}
|
||
|
|
||
|
void SGMLGenerator::data(const DataEvent &event)
|
||
|
{
|
||
|
const Char *p = event.data.ptr;
|
||
|
for (size_t len = event.data.len; len > 0; len--, p++) {
|
||
|
switch (*p) {
|
||
|
case RE:
|
||
|
flushRe();
|
||
|
if (outputState_ == afterStartTag)
|
||
|
outputRe();
|
||
|
outputState_ = pendingRe;
|
||
|
break;
|
||
|
case '&':
|
||
|
case '<':
|
||
|
case '>': // prevents recognition of MSE
|
||
|
flushRe();
|
||
|
outputState_ = other;
|
||
|
if (parseMode_ == cdataMode)
|
||
|
os().put(*p);
|
||
|
else
|
||
|
numCharRef(*p);
|
||
|
break;
|
||
|
default:
|
||
|
flushRe();
|
||
|
outputState_ = other;
|
||
|
if (isSgmlChar(*p))
|
||
|
os().put(*p);
|
||
|
else if (parseMode_ == cdataMode) {
|
||
|
// The output will not be valid, but I don't think this
|
||
|
// situation can arise with valid SGML source.
|
||
|
os().put(*p);
|
||
|
}
|
||
|
else
|
||
|
numCharRef(*p);
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void SGMLGenerator::nonSgmlChar(const NonSgmlCharEvent &event)
|
||
|
{
|
||
|
flushRe();
|
||
|
outputState_ = other;
|
||
|
numCharRefNum(event.c);
|
||
|
}
|
||
|
|
||
|
void SGMLGenerator::sdata(const SdataEvent &event)
|
||
|
{
|
||
|
flushRe();
|
||
|
entityRef(event.entityName);
|
||
|
}
|
||
|
|
||
|
void SGMLGenerator::pi(const PiEvent &event)
|
||
|
{
|
||
|
if (!inInstance_)
|
||
|
return;
|
||
|
if (outputState_ == pendingRe) {
|
||
|
numCharRef(RE); // would be moved otherwise
|
||
|
outputState_ = other;
|
||
|
}
|
||
|
if (event.entityName.len)
|
||
|
entityRef(event.entityName);
|
||
|
else
|
||
|
os() << "<?" << event.data << '>';
|
||
|
}
|
||
|
|
||
|
void SGMLGenerator::externalDataEntityRef(const ExternalDataEntityRefEvent &event)
|
||
|
{
|
||
|
flushRe();
|
||
|
entityRef(event.entity.name);
|
||
|
}
|
||
|
|
||
|
void SGMLGenerator::subdocEntityRef(const SubdocEntityRefEvent &event)
|
||
|
{
|
||
|
flushRe();
|
||
|
entityRef(event.entity.name);
|
||
|
}
|
||
|
|
||
|
void SGMLGenerator::endProlog(const EndPrologEvent &)
|
||
|
{
|
||
|
inInstance_ = 1;
|
||
|
}
|
||
|
|
||
|
void SGMLGenerator::commentDecl(const CommentDeclEvent &event)
|
||
|
{
|
||
|
if (!inInstance_)
|
||
|
return;
|
||
|
if (outputState_ == pendingRe) {
|
||
|
numCharRef(RE); // would be moved otherwise
|
||
|
outputState_ = other;
|
||
|
}
|
||
|
os() << "<!";
|
||
|
for (size_t i = 0; i < event.nComments; i++) {
|
||
|
os() << "--" << event.comments[i] << "--";
|
||
|
if (i + 1 < event.nComments)
|
||
|
os() << event.seps[i]; // ensure comment declarations end with -->
|
||
|
}
|
||
|
os() << '>';
|
||
|
}
|
||
|
|
||
|
void SGMLGenerator::markedSectionStart(const MarkedSectionStartEvent &event)
|
||
|
{
|
||
|
if (outputState_ == pendingRe) {
|
||
|
numCharRef(RE); // would be moved otherwise
|
||
|
outputState_ = other;
|
||
|
}
|
||
|
os() << "<![";
|
||
|
for (size_t i = 0; i < event.nParams; i++)
|
||
|
switch (event.params[i].type) {
|
||
|
case MarkedSectionStartEvent::Param::temp:
|
||
|
os() << " TEMP";
|
||
|
break;
|
||
|
case MarkedSectionStartEvent::Param::include:
|
||
|
os() << " INCLUDE";
|
||
|
break;
|
||
|
case MarkedSectionStartEvent::Param::rcdata:
|
||
|
os() << " RCDATA";
|
||
|
break;
|
||
|
case MarkedSectionStartEvent::Param::cdata:
|
||
|
os() << " CDATA";
|
||
|
break;
|
||
|
case MarkedSectionStartEvent::Param::ignore:
|
||
|
os() << " IGNORE";
|
||
|
break;
|
||
|
case MarkedSectionStartEvent::Param::entityRef:
|
||
|
os() << " %" << event.params[i].entityName << ";";
|
||
|
break;
|
||
|
}
|
||
|
switch (event.status) {
|
||
|
case MarkedSectionStartEvent::cdata:
|
||
|
parseMode_ = cdataMode;
|
||
|
break;
|
||
|
case MarkedSectionStartEvent::rcdata:
|
||
|
parseMode_ = rcdataMode;
|
||
|
break;
|
||
|
default:
|
||
|
break;
|
||
|
}
|
||
|
os() << " [";
|
||
|
}
|
||
|
|
||
|
void SGMLGenerator::markedSectionEnd(const MarkedSectionEndEvent &)
|
||
|
{
|
||
|
if (outputState_ == pendingRe) {
|
||
|
numCharRef(RE); // would be moved otherwise
|
||
|
outputState_ = other;
|
||
|
}
|
||
|
os() << "]]>";
|
||
|
parseMode_ = normalMode;
|
||
|
}
|
||
|
|
||
|
void SGMLGenerator::ignoredChars(const IgnoredCharsEvent &event)
|
||
|
{
|
||
|
os() << event.data;
|
||
|
}
|
||
|
|
||
|
void SGMLGenerator::entityRef(const CharString &name)
|
||
|
{
|
||
|
os() << '&' << name << ';';
|
||
|
}
|
||
|
|
||
|
void SGMLGenerator::numCharRef(Char c)
|
||
|
{
|
||
|
const SP_NAMESPACE_SCOPE StringC *name = charEntityName(c);
|
||
|
if (name && name->size())
|
||
|
os() << '&' << *name << ';';
|
||
|
else
|
||
|
numCharRefNum(c);
|
||
|
}
|
||
|
|
||
|
void SGMLGenerator::numCharRefNum(unsigned long n)
|
||
|
{
|
||
|
os() << "&#" << n << ';';
|
||
|
}
|
||
|
|
||
|
void SGMLGenerator::generalEntity(const GeneralEntityEvent &event)
|
||
|
{
|
||
|
if (event.entity.isInternal
|
||
|
&& event.entity.text.len == 1
|
||
|
&& event.entity.dataType != Entity::sdata) {
|
||
|
SP_NAMESPACE_SCOPE StringC *p = charEntityName(event.entity.text.ptr[0]);
|
||
|
if (p)
|
||
|
p->assign(event.entity.name.ptr, event.entity.name.len);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
SP_NAMESPACE_SCOPE StringC *SGMLGenerator::charEntityName(Char c)
|
||
|
{
|
||
|
switch (c) {
|
||
|
case '<':
|
||
|
return <_;
|
||
|
case '>':
|
||
|
return >_;
|
||
|
case '&':
|
||
|
return &_;
|
||
|
case '"':
|
||
|
return "_;
|
||
|
default:
|
||
|
break;
|
||
|
}
|
||
|
return 0;
|
||
|
}
|