// Copyright (c) 1997 James Clark // See the file COPYING for copying permission. #ifdef __GNUG__ #pragma implementation #endif #include "config.h" #include "ExtendEntityManager.h" #include "ParserApp.h" #include "XmlOutputEventHandler.h" #include "XmlOutputMessages.h" #include "Message.h" #include "MessageArg.h" #include "macros.h" #if 0 #include "UTF8CodingSystem.h" #endif #include "InternalInputSource.h" #include "StorageManager.h" #include "HashTable.h" #include #include #include #ifndef _MSC_VER #include #include #include #else #define MAXPATHLEN _MAX_PATH #include char* dirname(const char*); void realpath(const char*,char*); #endif #include #include #include #ifdef SP_NAMESPACE namespace SP_NAMESPACE { #endif #define EXT_ENT_FILE "extEntities.dtf" #define INT_ENT_FILE "intEntities.dtf" inline void operator+=(StringC &str, const char *s) { while (*s) str += *s++; } inline void XmlOutputEventHandler::closeCdataSection() { useCdata_ = 0; if (inCdata_) { os() << "]]>"; inCdata_ = 0; nCdataEndMatched_ = 0; } } const char RE = '\r'; XmlOutputEventHandler::Options::Options() { memset(this, 0, sizeof(*this)); } static void escape(OutputCharStream &s, Char c) { s << "&#" << (unsigned long)c << ";"; } XmlOutputEventHandler::XmlOutputEventHandler(const Options &options, OutputCharStream *os, const StringC &encodingName, const char *outputDir, const char *dtdLoc, const Ptr &entityManager, const CharsetInfo &systemCharset, CmdLineApp *app) : options_(options), os_(os), entityManager_(entityManager), systemCharset_(&systemCharset), app_(app), outputDir_(outputDir), dtdLoc_(dtdLoc), inDtd_(0), inCdata_(0), useCdata_(0), nCdataEndMatched_(0), namecaseGeneral_(0), extEntFile_(0), intEntFile_(0), extEnts_(0), intEnts_(0) { // Create output stream for main output os_->setEscaper(escape); *os_ << "" << RE; // Set directory for output files if (outputDir_ == NULL || strlen(outputDir_) == 0) outputDir_ = "."; // Open file for writing external entity declarations if we are preserving // any entities if (! options_.expExt) { ParserApp::AppChar* filePath=new ParserApp::AppChar[strlen(outputDir_) + 21]; strcpy (filePath, outputDir_); strcat (filePath, "/"); strcat (filePath, EXT_ENT_FILE); if (! options_.overwrite) { uniqueFilename(filePath); } // Create directories if necessary char *dirs = strdup (filePath); maybeCreateDirectories(dirname(dirs)); extEntFile_ = new FileOutputByteStream; // Open the file, exiting if we fail to do so. if (!extEntFile_->open(filePath)) { app_->message(XmlOutputMessages::cannotOpenOutputFile, StringMessageArg (app_->codingSystem()->convertIn(filePath))); exit(1); } extEnts_ = new EncodeOutputCharStream(extEntFile_, app_->outputCodingSystem()); /* Write some header information to this file. */ *extEnts_ << "\n"; extEnts_->flush(); delete [] filePath; } // Open file for writing external entity declarations if we are preserving // any entities if (! options_.expInt) { char* filePath=new char[strlen(outputDir_) + 21]; strcpy (filePath, outputDir_); strcat (filePath, "/"); strcat (filePath, INT_ENT_FILE); if (! options_.overwrite) { uniqueFilename(filePath); } // Create directories if necessary char *dirs = strdup (filePath); maybeCreateDirectories(dirname(dirs)); intEntFile_ = new FileOutputByteStream; // Open the file, exiting if we fail to do so. if (!intEntFile_->open(filePath)) { app_->message(XmlOutputMessages::cannotOpenOutputFile, StringMessageArg (app_->codingSystem()->convertIn(filePath))); exit(1); } intEnts_ = new EncodeOutputCharStream(intEntFile_, app_->outputCodingSystem()); /* Write some header information to this file. */ *intEnts_ << "\n"; intEnts_->flush(); delete [] filePath; } /* If we are expanding internal entities, we are definitely not generating a reference in the internal subset to an internal entities driver file. Likewise for external entities. */ if (options_.expInt) options_.intDecl= false; if (options_.expExt) options_.extDecl= false; } XmlOutputEventHandler::~XmlOutputEventHandler() { os() << RE; delete os_; if (! options_.expExt) delete extEntFile_; if (! options_.expInt) { delete intEntFile_; } } void XmlOutputEventHandler::message(MessageEvent *event) { app_->dispatchMessage(event->message()); ErrorCountEventHandler::message(event); } void XmlOutputEventHandler::sgmlDecl(SgmlDeclEvent *event) { const Syntax &syntax = event->instanceSyntax(); if (syntax.namecaseGeneral()) { namecaseGeneral_ = 1; syntax.generalSubstTable()->inverseTable(lowerSubst_); } delete event; } void XmlOutputEventHandler::data(DataEvent *event) { const Entity *entity = event->entity(); if (! options_.expInt && entity != NULL) { Boolean firstSeen = checkFirstSeen(entity->name()); // output entity reference os() << "&" << entity->name() << ";"; // save old output stream outputStack_.insert(os_); // output beginning of entity declaration if (firstSeen) { // Point default output stream (os_) to the entities // declaration file. os_ = intEnts_; os() << "name() << " CDATA \""; } // end if firstSeen else { os_ = (OutputCharStream *) new EncodeOutputCharStream(new NullOutputByteStream, app_->outputCodingSystem()); } // end else (not firstSeen) } // end if expanding internal entities // Now, no matter what, output the entity's data if (useCdata_) outputCdata(event->data(), event->dataLength()); else outputData(event->data(), event->dataLength(), 0, 0); // If necessary, end entity decl and replace old output stream if (! options_.expInt && entity != NULL) { os() << "\">" << RE; os_->flush(); os_ = outputStack_.get(); } delete event; } void XmlOutputEventHandler::startElement(StartElementEvent *event) { // This shouldn't happen (although useCdata_ can be true). if (inCdata_) { inCdata_ = 0; nCdataEndMatched_ = 0; os() << "]]>"; } if (options_.preserveCase) { os() << '<' << event->elementType()->origName(); } else { os() << '<' << generalName(event->name(), nameBuf_); } size_t nAttributes = event->attributes().size(); for (size_t i = 0; i < nAttributes; i++) outputAttribute(event->attributes(), i); if (options_.nlInTag) os() << RE; if (options_.cdata && event->elementType()->definition()->declaredContent() == ElementDefinition::cdata) useCdata_ = 1; if (options_.empty && event->elementType()->definition()->declaredContent() == ElementDefinition::empty) os() << "/>"; else os() << '>'; delete event; } void XmlOutputEventHandler::outputAttribute(const AttributeList &attributes, size_t i) { const AttributeValue *value = attributes.value(i); if (!value) return; const Text *text; const StringC *string; AttributeValue::Type type = value->info(text, string); if (type == AttributeValue::implied) return; if (options_.nlInTag) os() << RE; else os() << ' '; if (options_.preserveCase) { os() << attributes.def()->def(i)->origName() << "=\""; } else { os() << generalName(attributes.name(i), nameBuf_) << "=\""; } if (type == AttributeValue::cdata) { TextIter iter(*text); TextItem::Type type; const Char *p; size_t length; const Location *loc; while (iter.next(type, p, length, loc)) { switch (type) { case TextItem::data: case TextItem::cdata: outputData(p, length, 1, 0); break; case TextItem::sdata: { app_->setNextLocation(loc->origin()->parent()); const Entity *entity = loc->origin()->asEntityOrigin()->entity(); Boolean superLit = 0; if (! options_.expInt) { Boolean firstSeen = checkFirstSeen(entity->name()); // output entity reference os() << "&" << entity->name() << ";"; // save old output stream outputStack_.insert(os_); if (firstSeen) { // Point default output stream (os_) to the entities // declaration file and output start of declaration os_ = intEnts_; if (options_.sdataAsPi) { os() << "name() << " 'name() << " \""; superLit = 1; } else { os() << "name() << " \""; } } else { // we've seen it before; throw away expansion data os_ = (OutputCharStream *) new EncodeOutputCharStream(new NullOutputByteStream, app_->outputCodingSystem()); } } // We are expanding internal entities; expand this one as a PI, // since XML does not have SDATA entities else { if (options_.sdataAsPi) os() << "name() << " \""; } // Now, no matter what, output the entity's data outputData(p, length, 1, superLit); // If necessary, end entity decl and replace old output stream if (! options_.expInt) { if (options_.sdataAsPi) os() << "\" ?>'>" << RE; else os() << "\"> \n"; os_->flush(); os_ = outputStack_.get(); } else { if (options_.sdataAsPi) os() << "\" ?>"; } } break; case TextItem::nonSgml: // FIXME break; default: break; } } } else if (attributes.def()->def(i)->isEntity()) os() << *string; else if (options_.preserveCase) { const Vector *tokensPtr = attributes.def()->def(i)->getOrigTokens(); if (tokensPtr) { size_t nTokens = tokensPtr->size(); Vector::const_iterator tokens = tokensPtr->begin(); for (i = 0; i < nTokens; i++) { if (equalsIgnoreCase(*string, (StringC &)tokens[i])) { os() << tokens[i]; } } } else { os() << *string; } } else { os() << generalName(*string, nameBuf_); } os() << '"'; } void XmlOutputEventHandler::endElement(EndElementEvent *event) { closeCdataSection(); if (options_.empty && event->elementType()->definition()->declaredContent() == ElementDefinition::empty) ; else { if (options_.preserveCase) { os() << "elementType()->origName(); } else { os() << "name(), nameBuf_); } #if 0 if (options_.nlInTag) os() << RE; #endif os() << '>'; } delete event; } static Boolean isXmlS(Char c) { switch (c) { case ' ': case '\r': case '\n': case '\t': return 1; } return 0; } static Boolean containsQuestionLt(const Char *s, size_t n) { for (; n > 1; n--, s++) if (*s == '?' && s[1] == '>') return 1; return 0; } // These do not handle Unicode chars properly. // They treat all Unicode chars >= 170 as name start characters static Boolean isXmlNameStartChar(Char c) { switch (c) { case ':': case '.': case '_': case '-': return 1; } if (c >= 170) return 1; if ('a' <= c && c <= 'z') return 1; if ('A' <= c && c <= 'Z') return 1; return 0; } static Boolean isXmlNameChar(Char c) { if (isXmlNameStartChar(c)) return 1; if ('0' <= c && c <= '9') return 1; return 0; } static Boolean startsWithXmlName(const Char *s, size_t n) { if (n == 0) return 0; if (!isXmlNameStartChar(*s)) return 0; for (s++, n--; n > 0; n--, s++) { if (isXmlS(*s)) return 1; if (!isXmlNameChar(*s)) return 0; } return 1; } void XmlOutputEventHandler::pi(PiEvent *event) { const Char *s = event->data(); size_t n = event->dataLength(); if (n >= 3 && s[0] == 'x' && s[1] == 'm' && s[2] == 'l' && (n == 3 || isXmlS(s[3]))) ; // Probably came from an encoding PI. else if (!startsWithXmlName(s, n)) { app_->setNextLocation(event->location()); app_->message(XmlOutputMessages::piNoName); } else if (options_.piEscape) { os() << ""; } else if (containsQuestionLt(s, n)) { app_->setNextLocation(event->location()); app_->message(XmlOutputMessages::piQuestionLt); } else { os() << ""; } delete event; } void XmlOutputEventHandler::sdataEntity(SdataEntityEvent *event) { const Entity *entity = event->entity(); app_->setNextLocation(event->location().origin()->parent()); Boolean superLit = 0; if (! options_.expInt) { Boolean firstSeen = checkFirstSeen(entity->name()); // output entity reference os() << "&" << entity->name() << ";"; // save old output stream outputStack_.insert(os_); if (firstSeen) { // Point default output stream (os_) to the entities // declaration file and output start of declaration os_ = intEnts_; if (options_.sdataAsPi) { os() << "name() << " 'name() << " "; superLit = 1; } else { os() << "name() << " \""; } } else { // we've seen it before; throw away expansion data os_ = (OutputCharStream *) new EncodeOutputCharStream(new NullOutputByteStream, app_->outputCodingSystem()); } } // We are expanding internal entities; expand this one as a PI, // since XML does not have SDATA entities else { if (options_.sdataAsPi) os() << "name() << " "; } // Now, no matter what, output the entity's data outputData(event->data(), event->dataLength(), 0, superLit); // If necessary, end entity decl and replace old output stream if (! options_.expInt) { if (options_.sdataAsPi) os() << " ?>'>" << RE; else os() << "\"> \n"; os_->flush(); os_ = outputStack_.get(); } else { if (options_.sdataAsPi) os() << " ?>"; } delete event; } /** External data entities may be referenced in attributes only. If one is referenced in content, error and exit. */ void XmlOutputEventHandler::externalDataEntity(ExternalDataEntityEvent *event) { app_->message(XmlOutputMessages::externalDataEntityReference, StringMessageArg(event->entity()->name())); exit (1); } void XmlOutputEventHandler::subdocEntity(SubdocEntityEvent *event) { const SubdocEntity *entity = event->entity(); app_->setNextLocation(event->location().origin()->parent()); if (options_.expExt) { SgmlParser::Params params; params.subdocInheritActiveLinkTypes = 1; params.subdocReferenced = 1; params.origin = event->entityOrigin()->copy(); params.parent = & ((ParserApp *)app_)->parser(); params.sysid = entity->externalId().effectiveSystemId(); params.entityType = SgmlParser::Params::subdoc; SgmlParser parser(params); parser.parseAll(*this); } else { os() << "&" << entity->name() << ";"; } delete event; } void XmlOutputEventHandler::startDtd(StartDtdEvent *event) { inDtd_ = 1; delete event; } void XmlOutputEventHandler::endDtd(EndDtdEvent *event) { inDtd_ = 0; delete event; } void XmlOutputEventHandler::maybeStartDoctype(Boolean &doctypeStarted, const Dtd &dtd) { if (doctypeStarted) return; doctypeStarted = 1; const StringC &name = dtd.documentElementType()->name(); StringC buf; // if appropriate, lowercase the doctype name -jphekman if (options_.lower) { for (size_t i = 0; i < name.size(); i++) { Char c = lowerSubst_[name[i]]; if (c != name[i]) { buf = name; buf[i] = c; for (i++; i < name.size(); i++) lowerSubst_.subst(buf[i]); } } } else { buf = name; } /* Output the doctype declaration. If requested, specify a local file containing the DTD. */ os() << "codingSystem()->convertIn(outputDir_) << "/" << EXT_ENT_FILE << "\">" << RE << "%external-entities;" << RE; } if (options_.intDecl) { os() << "codingSystem()->convertIn(outputDir_) << "/" << INT_ENT_FILE << "\">" << RE << "%internal-entities;" << RE; } } void XmlOutputEventHandler::endProlog(EndPrologEvent *event) { const Dtd &dtd = event->dtd(); Boolean doctypeStarted = 0; if (options_.extDecl || options_.intDecl) { maybeStartDoctype(doctypeStarted, dtd); } if (options_.notation) { Dtd::ConstNotationIter iter(dtd.notationIter()); for (;;) { const Notation *notation = iter.nextTemp(); if (!notation) break; maybeStartDoctype(doctypeStarted, dtd); os() << "name(), nameBuf_); outputExternalId(*notation); os() << ">" << RE; if (notation->attributeDefTemp()) { app_->setNextLocation(notation->defLocation()); app_->message(XmlOutputMessages::notationAttributes, StringMessageArg(notation->name())); } } } if (options_.ndata) { Dtd::ConstEntityIter iter(dtd.generalEntityIter()); for (;;) { const Entity *entity = iter.nextTemp(); if (!entity) break; const ExternalDataEntity *extDataEntity = entity->asExternalDataEntity(); if (extDataEntity) { maybeStartDoctype(doctypeStarted, dtd); os() << "name(); outputExternalId(*entity); if (extDataEntity->dataType() != EntityDecl::ndata) { app_->setNextLocation(entity->defLocation()); app_->message(XmlOutputMessages::externalDataNdata, StringMessageArg(entity->name())); } os() << " NDATA " << generalName(extDataEntity->notation()->name(), nameBuf_) << ">" << RE; } } } if (options_.id || options_.attlist) { Dtd::ConstElementTypeIter iter(dtd.elementTypeIter()); for (;;) { const ElementType *elementType = iter.next(); if (!elementType) break; const AttributeDefinitionList *adl = elementType->attributeDefTemp(); if (adl) { if (options_.attlist) { maybeStartDoctype(doctypeStarted, dtd); if (options_.preserveCase) { os() << "origName(); } else { os() << "name(), nameBuf_); } for (size_t i = 0; i < adl->size(); i++) { const AttributeDefinition *def = adl->def(i); if (options_.preserveCase) { os() << RE << def->origName(); } else { os() << RE << generalName(def->name(), nameBuf_); } AttributeDefinitionDesc desc; def->getDesc(desc); switch (desc.declaredValue) { case AttributeDefinitionDesc::cdata: os() << " CDATA #IMPLIED"; break; case AttributeDefinitionDesc::name: case AttributeDefinitionDesc::number: case AttributeDefinitionDesc::nmtoken: case AttributeDefinitionDesc::nutoken: os() << " NMTOKEN #IMPLIED"; break; case AttributeDefinitionDesc::entity: os() << " ENTITY #IMPLIED"; break; case AttributeDefinitionDesc::idref: os() << " IDREF #IMPLIED"; break; case AttributeDefinitionDesc::names: case AttributeDefinitionDesc::numbers: case AttributeDefinitionDesc::nmtokens: case AttributeDefinitionDesc::nutokens: os() << " NMTOKENS #IMPLIED"; break; case AttributeDefinitionDesc::entities: os() << " ENTITIES #IMPLIED"; break; case AttributeDefinitionDesc::idrefs: os() << " IDREFS #IMPLIED"; break; case AttributeDefinitionDesc::id: os() << " ID #IMPLIED"; break; case AttributeDefinitionDesc::notation: os() << " NOTATION"; // fall through case AttributeDefinitionDesc::nameTokenGroup: { os() << " ("; if (options_.preserveCase) { for (size_t j = 0; j < desc.origAllowedValues.size(); j++) { if (j > 0) os() << '|'; os() << desc.origAllowedValues[j]; } os() << ") #IMPLIED"; } else { for (size_t j = 0; j < desc.allowedValues.size(); j++) { if (j > 0) os() << '|'; os() << desc.allowedValues[j]; } os() << ") #IMPLIED"; } } break; default: CANNOT_HAPPEN(); } } os() << '>' << RE; } else { size_t idIndex = adl->idIndex(); if (idIndex != size_t(-1)) { maybeStartDoctype(doctypeStarted, dtd); os() << "name(), nameBuf_); os() << ' ' << generalName(adl->def(idIndex)->name(), nameBuf_) << " ID #IMPLIED>" << RE; } } } } } if (doctypeStarted) os() << "]>" << RE; delete event; } void XmlOutputEventHandler::outputExternalId(const EntityDecl &decl) { const StringC *pubIdP = decl.publicIdPointer(); const StringC *sysIdP = decl.effectiveSystemIdPointer(); if (pubIdP) { os() << " PUBLIC \"" << *pubIdP << "\""; if (decl.declType() == EntityDecl::notation && !sysIdP) return; os() << " \""; } else os() << " SYSTEM \""; if (sysIdP) { StringC url; switch(fsiToUrl(*sysIdP, decl.defLocation(), url)) { case 1: os() << url; break; case 0: break; default: app_->setNextLocation(decl.defLocation()); app_->message(XmlOutputMessages::cannotConvertFsiToUrl, StringMessageArg(*sysIdP)); break; } } os() << "\""; } // Return 1 if OK; return -1 to generate generic error; return 0 if error already generated. int XmlOutputEventHandler::fsiToUrl(const StringC &fsi, const Location &loc, StringC &url) { ParsedSystemId parsedBuf; if (!entityManager_->parseSystemId(fsi, *systemCharset_, 0, 0, *app_, parsedBuf)) return 0; if (parsedBuf.size() != 1) return -1; if (strcmp(parsedBuf[0].storageManager->type(), "URL") == 0) { url = parsedBuf[0].specId; return 1; } Owner in(entityManager_->open(fsi, *systemCharset_, InputSourceOrigin::make(), 0, *app_)); if (!in) return 0; Xchar c = in->get(*app_); StorageObjectLocation soLoc; if (c == InputSource::eE && in->accessError()) { if (parsedBuf[0].baseId.size()) return 0; soLoc.storageObjectSpec = &parsedBuf[0]; soLoc.actualStorageId = parsedBuf[0].specId; } else { const Location &loc = in->currentLocation(); if (loc.origin().isNull()) return -1; const InputSourceOrigin *tem = loc.origin()->asInputSourceOrigin(); if (!tem) return -1; const ParsedSystemId *psi = ExtendEntityManager::externalInfoParsedSystemId(tem->externalInfo()); if (!psi || psi->size() != 1) return -1; if (!ExtendEntityManager::externalize(tem->externalInfo(), 0, soLoc)) return -1; } if (strcmp(soLoc.storageObjectSpec->storageManager->type(), "OSFILE") != 0) return -1; return filenameToUrl(soLoc.actualStorageId, loc, url); } static StringC hexEncode(unsigned char c) { static const char hexDigits[] = "0123456789ABCDEF"; StringC result; result.resize(2); result[0] = hexDigits[c >> 4]; result[1] = hexDigits[c & 0xF]; return result; } static StringC urlEncode(Char c) { if (c >= 0x80) { StringC tem; tem += c; #if 0 // This is what the URN spec says. UTF8CodingSystem utf8; String bytes(utf8.convertOut(tem)); tem.resize(0); for (size_t i = 0; i < bytes.size(); i++) tem += hexEncode(bytes[i]); #endif return tem; } if (strchr("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$-_.+!*'(),", char(c))) { StringC tem; tem += c; return tem; } return hexEncode((unsigned char)c); } int XmlOutputEventHandler::filenameToUrl(const StringC &filename, const Location &loc, StringC &url) { #ifdef SP_MSDOS_FILENAMES if (filename.size() >= 3 && filename[1] == ':' && (('a' <= filename[0] && filename[0] <= 'z') || ('A' <= filename[0] && filename[0] <= 'Z')) && (filename[2] == '/' || filename[2] == '\\')) { url += "file://"; url += filename; return 1; } #endif for (size_t i = 0; i < filename.size(); i++) { if (filename[i] == '/' #ifdef SP_MSDOS_FILENAMES || filename[i] == '\\' #endif ) { url += '/'; } else url += urlEncode(filename[i]); } return 1; } void XmlOutputEventHandler::commentDecl(CommentDeclEvent *event) { if (!inDtd_ && options_.comment) { Boolean started = 0; for (MarkupIter iter(event->markup()); iter.valid(); iter.advance()) { if (iter.type() == Markup::comment) { if (!started) { os() << ""; } delete event; } void XmlOutputEventHandler::markedSectionStart(MarkedSectionStartEvent *event) { if (options_.cdata && event->status() == MarkedSectionEvent::cdata) useCdata_ = 1; delete event; } void XmlOutputEventHandler::markedSectionEnd(MarkedSectionEndEvent *event) { if (event->status() == MarkedSectionEvent::cdata) closeCdataSection(); delete event; } void XmlOutputEventHandler::outputCdata(const Char *s, size_t n) { static const char cdataEnd[] = "]]>"; for (; n > 0; s++, n--) { Char c = *s; if (!inCdata_) { os() << " 0; s++, n--) { Char c = *s; switch (c) { case '&': os() << "&"; break; case '<': os() << "<"; break; case '>': os() << ">"; break; case 9: case 10: case 13: if (inLit) os() << "&#" << int(c) << ';'; else os().put(c); break; case '"': if (inLit) { os() << """; break; } case '\'': if (inSuperLit) { os() << "'"; break; } // fall through default: os().put(c); break; } } } void XmlOutputEventHandler::entityDefaulted(EntityDefaultedEvent *event) { if (options_.reportEnts) { ConstPtr entity = event->entityPointer(); const ExternalEntity *extEntity = entity->asExternalEntity(); // If we are dealing with an external entity (else it will be null) // -jphekman if (extEntity != 0) { const StringC *systemIdPointer = extEntity->systemIdPointer(); if (systemIdPointer != 0) { os () << ""; } } } } void XmlOutputEventHandler::inputOpened(InputSource *in) { if (!inDtd_) { const CodingSystem *outputCodingSystem = app_->outputCodingSystem(); const EntityDecl *entDecl = in->currentLocation().origin()->entityDecl(); if (entDecl == NULL ) { if (options_.reportIS) { os() << ""; } return; } const Entity *ent = in->currentLocation().origin()->entity(); const StringC *effectiveSystemIdPointer = entDecl->effectiveSystemIdPointer(); const StringC *systemIdPointer = entDecl->systemIdPointer(); const StringC *publicIdPointer = entDecl->publicIdPointer(); if (options_.reportIS && ent->asExternalEntity() != NULL) { os() << ""; } /* Output entity declaration and, in the case of external entities, write file containing entity replacement text. */ Boolean firstSeen = checkFirstSeen(ent->name()); if (ent->asExternalEntity() != NULL) { if (! options_.expExt) { // output entity reference os() << "&" << entDecl->name() << ";"; // output entity declaration if (systemIdPointer == 0) { app_->message(XmlOutputMessages::missingSystemId, StringMessageArg(entDecl->name())); exit (1); } // save old output stream outputStack_.insert(os_); if (firstSeen) { char *originalFilePath = strdup(outputCodingSystem->convertOut(*systemIdPointer).data()); /* Construct new output path, prepending the output directory: so, for example, /usr/local/lib/ents/foo becomes ./usr/local/lib/ents/foo.xml; possible 2-digit suffix (for uniqueness) */ char* filePath=new char[strlen(outputDir_) + 9 + outputCodingSystem->convertOut (*systemIdPointer).size()]; strcpy (filePath, outputDir_); /* Drop the scheme if there is one (so "http://www.bar.com/foo" becomes "output_dir/www.bar.com/foo") */ char *scheme = strchr(originalFilePath, ':'); if (scheme != NULL) { // drop initial colon originalFilePath = scheme + 1; } else { // check for leading slashes in non-scheme filenames // and warn that we're going to remove them if (strncmp(originalFilePath, "/", 1) == 0) { app_->message(XmlOutputMessages::removingLeadingSlashes, StringMessageArg(*systemIdPointer)); } } // drop initial slashes (there may be multiple) while (strncmp(originalFilePath, "/", 1) == 0) { originalFilePath++; } // replace exactly one initial slash before prepending // output directory strcat (filePath, "/"); strcat(filePath, originalFilePath); // Check to make sure we haven't passed outside of the // output directory char *dirs = strdup (filePath); char realDirs[MAXPATHLEN]; char realOutputDir[MAXPATHLEN]; char *outputDir = strdup(outputDir_); realpath((const char *)dirname(dirs), realDirs); realpath((const char *)dirname(outputDir), realOutputDir); if (strncmp(realDirs, realOutputDir, strlen (realOutputDir)) != 0) { app_->message(XmlOutputMessages::pathOutsideOutputDirectory, StringMessageArg(*systemIdPointer)); if (! options_.writeOutsideOutDir) { exit (1); } } // Make the necessary directories maybeCreateDirectories(dirname(dirs)); // Set the suffix to ".xml" convertSuffix(filePath); *extEnts_ << "name() << " SYSTEM \"" << filePath << "\">\n"; extEnts_->flush(); if (! options_.overwrite) { uniqueFilename(filePath); } // Open the file, exiting if we fail to do so. FileOutputByteStream *file = new FileOutputByteStream; outputFileStack_.insert(file); StringC filePathStrC = app_->codingSystem()->convertIn(filePath); // If we've never seen this exact input filename before, // then we're being asked to overwrite an old generated file // with a new one (for example, "/text" and "text" will both // map to the top level of the output directory). In that // case, produce an error. for (size_t i = 0; i < filesCreated_.size(); i++) { if (filesCreated_[i] == filePathStrC) { bool seenOrigPath = false; for (size_t j = 0; j < originalFilePaths_.size(); j++) { if (originalFilePaths_[j] == *systemIdPointer) { seenOrigPath = true; } } if (! seenOrigPath) { app_->message(XmlOutputMessages::alreadyWroteOutputFile, StringMessageArg(filePathStrC)); exit (1); } } } // Save so we can ensure not overwriting later. filesCreated_.push_back(filePathStrC); originalFilePaths_.push_back(*systemIdPointer); if (!file->open(filePath)) { app_->message(XmlOutputMessages::cannotOpenOutputFile, StringMessageArg(filePathStrC)); exit (1); } // Create output stream to file and set os_ to it. os_ = (OutputCharStream *) new EncodeOutputCharStream(file, outputCodingSystem); delete [] filePath; } // end if firstSeen else { // push null os onto file output stack, set os_ to it NullOutputByteStream *nobs = new NullOutputByteStream; outputFileStack_.insert(nobs); // Create output stream to file and set os_ to it. os_ = (OutputCharStream *) new EncodeOutputCharStream(nobs, outputCodingSystem); } // end else (notfirst Seen) } // end if not expanding external entities } // end if asExternalEntity() else if (ent->asInternalEntity() != NULL) { char *entName = strdup(outputCodingSystem->convertOut(entDecl->name()).data()); if (! options_.expInt) { // output entity reference os() << "&" << entDecl->name() << ";"; // save old output stream and point output stream at null outputStack_.insert(os_); NullOutputByteStream *nobs = new NullOutputByteStream; os_ = (OutputCharStream *) new EncodeOutputCharStream(nobs, outputCodingSystem); if (firstSeen) { // don't define predefined entities if (strcmp (entName, "lt") != 0 && strcmp (entName, "gt") != 0 && strcmp (entName, "amp") != 0 && strcmp (entName, "apos") != 0 && strcmp (entName, "quot") != 0) { // output declaration to internal entities driver file InternalInputSource *intIn = in->asInternalInputSource(); *intEnts_ << "name() << " " << getQuoteMark (intIn->contents()) << (*intIn->contents()) << getQuoteMark (intIn->contents()) << " >\n"; intEnts_->flush(); } } // end if firstSeen } // end if expanding internal entities } // end if this is an internal entity else { // We should only get InternalText and ExternalText entities here. app_->message(XmlOutputMessages::unexpectedEntityType, StringMessageArg (ent->name())); exit(1); } } } void XmlOutputEventHandler::inputClosed(InputSource *in) { if (! inDtd_) { const EntityDecl *entDecl = in->currentLocation().origin()->entityDecl(); const Entity *ent = in->currentLocation().origin()->entity(); if (entDecl == NULL || ent == NULL ) { if ( options_.reportIS) { os() << ""; } return; } // Close external entity if (ent->asExternalEntity() != NULL) { if (! options_.expExt) { // delete current output stream os_->flush(); delete os_; // restore previous output stream os_ = outputStack_.get(); // close file OutputByteStream *file = outputFileStack_.get(); delete file; } } // Close internal entity else if (ent->asInternalEntity() != NULL) { if (! options_.expInt) { os_->flush(); delete os_; // restore previous output stream os_ = outputStack_.get(); } } else { // We should only get InternalText and ExternalText entities here. app_->message(XmlOutputMessages::unexpectedEntityType, StringMessageArg( ent->name() )); exit(1); } if (options_.reportIS && ent->asExternalEntity() != NULL) { os() << ""; } } } const StringC &XmlOutputEventHandler::generalName(const StringC &name, StringC &buf) { if (options_.lower && namecaseGeneral_) { for (size_t i = 0; i < name.size(); i++) { Char c = lowerSubst_[name[i]]; if (c != name[i]) { buf = name; buf[i] = c; for (i++; i < name.size(); i++) lowerSubst_.subst(buf[i]); return buf; } } } return name; } Boolean XmlOutputEventHandler::equalsIgnoreCase(const StringC &str1, StringC &str2) { if (str1.size() != str2.size()) return false; for (size_t i = 0; i < str1.size(); i++) { if (lowerSubst_[str1[i]] != lowerSubst_[str2[i]]) return false; } return true; } /** Make this string's suffix ".xml", attempting to do the right thing if we encounter ".sgm", ".sgml", or ".xml" as the original suffix. */ char *XmlOutputEventHandler::convertSuffix(char *name) { // Get a pointer to the last occurrence of ".": char *suffix = strrchr(name, '.'); // If there is no "." in "name", append ".xml" and return if (suffix == NULL) { strcat(name, ".xml"); return name; } // Suffix is "sgm[l]": subsitite ".xml" if (strcmp(suffix, ".sgm") == 0 || strcmp (suffix, ".sgml") == 0) { strcpy (suffix, ".xml"); return name; } // "xml": do nothing if (strcmp(suffix, ".xml") == 0) { return name; } // default: append ".xml" strcat (name, ".xml"); return name; } /** Create all the directories mentioned in this path, unless they already exist. */ int XmlOutputEventHandler::maybeCreateDirectories (char *path) { int ret; char *pp = strrchr(path, '/'); #ifdef SP_MSDOS_FILENAMES char *ppp = strrchr(path,'\\'); if (ppp && (ppp > pp || pp==NULL)) pp=ppp; #endif if ((pp) && (pp != path)) { *pp = '\0'; maybeCreateDirectories(path); *pp = '/'; } #ifndef _MSC_VER struct stat statbuf; pp = strrchr(path, '/'); statbuf.st_mode = 0777; /* stat the directory */ if ((pp) && (pp != path)) { *pp = '\0'; stat(path, &statbuf); *pp = '/'; } ret = mkdir(path, statbuf.st_mode); #else ret=_mkdir(path); #endif if (ret == -1) { if (errno == EEXIST) { ret = 0; } else { app_->message(XmlOutputMessages::cannotCreateDirectory, StringMessageArg(app_->codingSystem()->convertIn(path))); } } return(ret); } /** If this is the first time we have encountered entity "name", return true and add it to a list of entities we've seen. Else, return false. */ Boolean XmlOutputEventHandler::checkFirstSeen(const StringC &name) { Named *id = entTable_.lookup(name); if (!id) { entTable_.insert(new Named(name)); return true; } return false; } /** Set the newFilename to a filename which, while similar or identical to originalFilename, does not correspond to an existing file. Gives an error if called more than 99 times on the same filename (during the same or different executions of osx). Assumes that the char array it's given is long enough to accept a two-digit suffix in addition to the string that's already in there. */ void XmlOutputEventHandler::uniqueFilename(char *filePath) { char* baseFilePath=new char[strlen(filePath)]; strcpy (baseFilePath, filePath); struct stat statbuf; int num = 0; char numStr[3]; while (stat(filePath, &statbuf) == 0 && num <= 100) { num++; strcpy (filePath, baseFilePath); strcat (filePath, "."); sprintf(numStr, "%d", num); strcat (filePath, numStr); } if (num >= 100) { app_->message(XmlOutputMessages::tooManyOutputFiles, StringMessageArg (app_->codingSystem()->convertIn(baseFilePath))); exit(1); } delete [] baseFilePath; } char XmlOutputEventHandler::getQuoteMark(const StringC *contents) { char *contentsStr = strdup(app_->outputCodingSystem()->convertOut(*contents).data()); if (strchr(contentsStr, '"') == NULL) { return '"'; } else { return '\''; } } /** * NullOutputByteStream */ NullOutputByteStream::NullOutputByteStream() { } NullOutputByteStream::~NullOutputByteStream() { } void NullOutputByteStream::flush() { } void NullOutputByteStream::sputc(char c) { } void NullOutputByteStream::sputn(const char *, size_t) { } OutputByteStream &NullOutputByteStream::operator<<(char) { return *this; } OutputByteStream &NullOutputByteStream::operator<<(unsigned char) { return *this; } OutputByteStream &NullOutputByteStream::operator<<(const char *) { return *this; } OutputByteStream &NullOutputByteStream::operator<<(int) { return *this; } OutputByteStream &NullOutputByteStream::operator<<(unsigned) { return *this; } OutputByteStream &NullOutputByteStream::operator<<(long) { return *this; } OutputByteStream &NullOutputByteStream::operator<<(unsigned long) { return *this; } OutputByteStream &NullOutputByteStream::operator<<(const String &) { return *this; } /* Note: Returning NULL is probably not the best solution here, but as * nothing actually uses getBufferPtr(), it is hard to see what sort * of no-op behavior would actually be appropriate. */ char *NullOutputByteStream::getBufferPtr() const { return NULL; } size_t NullOutputByteStream::getBufferSize() const { return 0; } void NullOutputByteStream::usedBuffer(size_t) { } void NullOutputByteStream::flushBuf(char) { } #ifdef SP_NAMESPACE } #endif #ifdef _MSC_VER char* dirname(const char* path) { static char buffer[MAXPATHLEN]; char* p=strrchr((char*)path,'/'); #ifdef SP_MSDOS_FILENAMES char *ppp = strrchr((char*)path,'\\'); if (ppp && (ppp > p || p==NULL)) p=ppp; #endif if (p==NULL) return NULL; strncpy(buffer,path,p-path); buffer[p-path]=0; return buffer; } void realpath(const char* in,char* out) { _fullpath(out,in,MAXPATHLEN); return; } #endif