// Copyright (c) 1994, 1995, 1996 James Clark // See the file COPYING for copying permission. #ifdef __GNUG__ #pragma implementation #endif #include "splib.h" #include "ExtendEntityManager.h" #include "Message.h" #include "MessageArg.h" #include "OffsetOrderedList.h" #include "rtti.h" #include "StorageManager.h" #include "Vector.h" #include "NCVector.h" #include "Owner.h" #include "constant.h" #include "EntityManagerMessages.h" #include "StorageObjectPosition.h" #include "Owner.h" #include "CodingSystem.h" #include "CodingSystemKit.h" #include "InputSource.h" #include "Mutex.h" #include "macros.h" #include "EntityCatalog.h" #include "CharMap.h" #include #include #include #include #include #ifdef DECLARE_MEMMOVE extern "C" { void *memmove(void *, const void *, size_t); } #endif #ifdef SP_NAMESPACE namespace SP_NAMESPACE { #endif const char EOFCHAR = '\032'; // Control-Z class ExternalInputSource; class EntityManagerImpl : public ExtendEntityManager { public: EntityManagerImpl(StorageManager *defaultStorageManager, const InputCodingSystem *defaultCodingSystem, const ConstPtr &, Boolean internalCharsetIsDocCharset); void setCatalogManager(CatalogManager *catalogManager); void registerStorageManager(StorageManager *); InputSource *open(const StringC &sysid, const CharsetInfo &, InputSourceOrigin *, unsigned flags, Messenger &); const CharsetInfo &charset() const; Boolean internalCharsetIsDocCharset() const; ConstPtr makeCatalog(StringC &systemId, const CharsetInfo &charset, Messenger &mgr); Boolean expandSystemId(const StringC &, const Location &, Boolean isNdata, const CharsetInfo &, const StringC *, Messenger &, StringC &); Boolean mergeSystemIds(const Vector &, Boolean mapCatalogDocument, const CharsetInfo &, Messenger &mgr, StringC &) const; StorageManager *lookupStorageType(const StringC &, const CharsetInfo &) const; StorageManager *lookupStorageType(const char *) const; StorageManager *guessStorageType(const StringC &, const CharsetInfo &) const; const InputCodingSystem *lookupCodingSystem(const StringC &, const CharsetInfo &, Boolean isBctf, const char *&) const; Boolean resolveSystemId(const StringC &str, const CharsetInfo &idCharset, Messenger &mgr, const Location &defLocation, Boolean isNdata, ParsedSystemId &parsedSysid) const; Boolean parseSystemId(const StringC &str, const CharsetInfo &idCharset, Boolean isNdata, const StorageObjectLocation *def, Messenger &mgr, ParsedSystemId &parsedSysid) const; const CharsetInfo &internalCharset(const CharsetInfo &docCharset) const { if (internalCharsetIsDocCharset_) return docCharset; else return charset(); } private: EntityManagerImpl(const EntityManagerImpl &); // undefined void operator=(const EntityManagerImpl &); // undefined static Boolean defLocation(const Location &, StorageObjectLocation &); static Boolean matchKey(const StringC &type, const char *s, const CharsetInfo &internalCharset); NCVector > storageManagers_; Owner defaultStorageManager_; const InputCodingSystem *defaultCodingSystem_; Owner catalogManager_; Boolean internalCharsetIsDocCharset_; ConstPtr codingSystemKit_; friend class FSIParser; }; class ExternalInfoImpl : public ExternalInfo { RTTI_CLASS public: ExternalInfoImpl(ParsedSystemId &parsedSysid); const StorageObjectSpec &spec(size_t i) const; size_t nSpecs() const; const ParsedSystemId &parsedSystemId() const; void noteRS(Offset); void noteStorageObjectEnd(Offset); void noteInsertedRSs(); void setDecoder(size_t i, Decoder *); void setId(size_t i, StringC &); void getId(size_t i, StringC &) const; Boolean convertOffset(Offset, StorageObjectLocation &) const; private: ParsedSystemId parsedSysid_; NCVector position_; size_t currentIndex_; // list of inserted RSs OffsetOrderedList rsList_; Boolean notrack_; Mutex mutex_; }; class ExternalInputSource : public InputSource { public: ExternalInputSource(ParsedSystemId &parsedSysid, const CharsetInfo &internalCharset, const CharsetInfo &docCharset, Boolean internalCharsetIsDocCharset, Char replacementChar, InputSourceOrigin *origin, unsigned flags); void pushCharRef(Char, const NamedCharRef &); ~ExternalInputSource(); private: Xchar fill(Messenger &); Boolean rewind(Messenger &); void willNotRewind(); void setDocCharset(const CharsetInfo &, const CharsetInfo &); void willNotSetDocCharset(); void init(); void noteRS(); void noteRSAt(const Char *); void reallocateBuffer(size_t size); void insertChar(Char); void buildMap(const CharsetInfo &internalCharset, const CharsetInfo &docCharset); void buildMap1(const CharsetInfo &, const CharsetInfo &); static const Char *findNextCr(const Char *start, const Char *end); static const Char *findNextLf(const Char *start, const Char *end); static const Char *findNextCrOrLf(const Char *start, const Char *end); ExternalInfoImpl *info_; Char *buf_; const Char *bufLim_; Offset bufLimOffset_; size_t bufSize_; size_t readSize_; NCVector > sov_; StorageObject *so_; size_t soIndex_; Boolean insertRS_; Decoder *decoder_; const char *leftOver_; size_t nLeftOver_; Boolean mayRewind_; Boolean maySetDocCharset_; Boolean mayNotExist_; enum RecordType { unknown, crUnknown, crlf, lf, cr, asis }; RecordType recordType_; Boolean zapEof_; Boolean internalCharsetIsDocCharset_; Char replacementChar_; Ptr > map_; }; class FSIParser { public: FSIParser(const StringC &, const CharsetInfo &idCharset, Boolean isNdata, const StorageObjectLocation *defLoc, const EntityManagerImpl *em, Messenger &mgr); Boolean parse(ParsedSystemId &parsedSysid); static const char *recordsName(StorageObjectSpec::Records records); struct RecordType { const char *name; StorageObjectSpec::Records value; }; private: Boolean handleInformal(size_t startIndex, ParsedSystemId &parsedSysid); Boolean convertId(StringC &, Xchar smcrd, const StorageManager *); Xchar get(); void unget(); StorageManager *lookupStorageType(const StringC &key, Boolean &neutral); Boolean matchKey(const StringC &, const char *); Boolean matchChar(Xchar, char); Boolean isS(Xchar); Boolean convertDigit(Xchar c, int &weight); void uncharref(StringC &); Boolean setAttributes(StorageObjectSpec &sos, Boolean neutral, Xchar &smcrd, Boolean &fold); Boolean setCatalogAttributes(ParsedSystemId &parsedSysid); void setDefaults(StorageObjectSpec &sos); Boolean parseAttribute(StringC &token, Boolean &gotValue, StringC &value); Boolean lookupRecords(const StringC &token, StorageObjectSpec::Records &); void convertMinimumLiteral(const StringC &from, StringC &to); const StringC &str_; size_t strIndex_; Messenger &mgr_; const EntityManagerImpl *em_; const StorageObjectSpec *defSpec_; const StringC *defId_; const CharsetInfo &idCharset_; Boolean isNdata_; static RecordType recordTypeTable[]; }; const Char RS = '\n'; const Char RE = '\r'; ExtendEntityManager::~ExtendEntityManager() { } ExtendEntityManager::CatalogManager::~CatalogManager() { } ExtendEntityManager *ExtendEntityManager::make(StorageManager *sm, const InputCodingSystem *cs, const ConstPtr &csKit, Boolean internalCharsetIsDocCharset) { return new EntityManagerImpl(sm, cs, csKit, internalCharsetIsDocCharset); } Boolean ExtendEntityManager::externalize(const ExternalInfo *info, Offset off, StorageObjectLocation &loc) { if (!info) return false; const ExternalInfoImpl *p = DYNAMIC_CAST_CONST_PTR(ExternalInfoImpl, info); if (!p) return false; return p->convertOffset(off, loc); } const ParsedSystemId * ExtendEntityManager::externalInfoParsedSystemId(const ExternalInfo *info) { if (!info) return 0; const ExternalInfoImpl *p = DYNAMIC_CAST_CONST_PTR(ExternalInfoImpl, info); if (!p) return 0; return &p->parsedSystemId(); } EntityManagerImpl::EntityManagerImpl(StorageManager *defaultStorageManager, const InputCodingSystem *defaultCodingSystem, const ConstPtr &codingSystemKit, Boolean internalCharsetIsDocCharset) : defaultStorageManager_(defaultStorageManager), defaultCodingSystem_(defaultCodingSystem), codingSystemKit_(codingSystemKit), internalCharsetIsDocCharset_(internalCharsetIsDocCharset) { } Boolean EntityManagerImpl::internalCharsetIsDocCharset() const { return internalCharsetIsDocCharset_; } const CharsetInfo &EntityManagerImpl::charset() const { return codingSystemKit_->systemCharset(); } InputSource *EntityManagerImpl::open(const StringC &sysid, const CharsetInfo &docCharset, InputSourceOrigin *origin, unsigned flags, Messenger &mgr) { ParsedSystemId parsedSysid; if (!parseSystemId(sysid, docCharset, (flags & ExtendEntityManager::isNdata) != 0, 0, mgr, parsedSysid) || !catalogManager_->mapCatalog(parsedSysid, this, mgr)) return 0; return new ExternalInputSource(parsedSysid, charset(), docCharset, internalCharsetIsDocCharset_, codingSystemKit_->replacementChar(), origin, flags); } ConstPtr EntityManagerImpl::makeCatalog(StringC &systemId, const CharsetInfo &docCharset, Messenger &mgr) { return catalogManager_->makeCatalog(systemId, docCharset, this, mgr); } Boolean EntityManagerImpl::mergeSystemIds(const Vector &sysids, Boolean mapCatalogDocument, const CharsetInfo &docCharset, Messenger &mgr, StringC &result) const { ParsedSystemId parsedSysid; if (mapCatalogDocument) { parsedSysid.maps.resize(parsedSysid.maps.size() + 1); parsedSysid.maps.back().type = ParsedSystemId::Map::catalogDocument; } for (size_t i = 0; i < sysids.size(); i++) if (!parseSystemId(sysids[i], docCharset, 0, 0, mgr, parsedSysid)) return 0; parsedSysid.unparse(internalCharset(docCharset), 0, result); return 1; } Boolean EntityManagerImpl::expandSystemId(const StringC &str, const Location &defLoc, Boolean isNdata, const CharsetInfo &docCharset, const StringC *mapCatalogPublic, Messenger &mgr, StringC &result) { ParsedSystemId parsedSysid; StorageObjectLocation defSoLoc; const StorageObjectLocation *defSoLocP; if (defLocation(defLoc, defSoLoc)) defSoLocP = &defSoLoc; else defSoLocP = 0; if (!parseSystemId(str, docCharset, isNdata, defSoLocP, mgr, parsedSysid)) return 0; if (mapCatalogPublic) { ParsedSystemId::Map map; map.type = ParsedSystemId::Map::catalogPublic; map.publicId = *mapCatalogPublic; parsedSysid.maps.insert(parsedSysid.maps.begin(), 1, map); } parsedSysid.unparse(internalCharset(docCharset), isNdata, result); return 1; } Boolean EntityManagerImpl::parseSystemId(const StringC &str, const CharsetInfo &docCharset, Boolean isNdata, const StorageObjectLocation *defLoc, Messenger &mgr, ParsedSystemId &parsedSysid) const { FSIParser fsiParser(str, internalCharset(docCharset), isNdata, defLoc, this, mgr); return fsiParser.parse(parsedSysid); } StorageManager * EntityManagerImpl::guessStorageType(const StringC &type, const CharsetInfo &internalCharset) const { for (size_t i = 0; i < storageManagers_.size(); i++) if (storageManagers_[i]->guessIsId(type, internalCharset)) return storageManagers_[i].pointer(); if (defaultStorageManager_->guessIsId(type, internalCharset)) return defaultStorageManager_.pointer(); return 0; } StorageManager * EntityManagerImpl::lookupStorageType(const StringC &type, const CharsetInfo &internalCharset) const { if (type.size() == 0) return 0; if (matchKey(type, defaultStorageManager_->type(), internalCharset)) return defaultStorageManager_.pointer(); for (size_t i = 0; i < storageManagers_.size(); i++) if (matchKey(type, storageManagers_[i]->type(), internalCharset)) return storageManagers_[i].pointer(); return 0; } StorageManager * EntityManagerImpl::lookupStorageType(const char *type) const { if (type == defaultStorageManager_->type()) return defaultStorageManager_.pointer(); for (size_t i = 0; i < storageManagers_.size(); i++) if (type == storageManagers_[i]->type()) return storageManagers_[i].pointer(); return 0; } const InputCodingSystem * EntityManagerImpl::lookupCodingSystem(const StringC &type, const CharsetInfo &internalCharset, Boolean isBctf, const char *&name) const { return codingSystemKit_->makeInputCodingSystem(type, internalCharset, isBctf, name); } Boolean EntityManagerImpl::matchKey(const StringC &type, const char *s, const CharsetInfo &internalCharset) { if (strlen(s) != type.size()) return false; for (size_t i = 0; i < type.size(); i++) if (internalCharset.execToDesc(toupper(s[i])) != type[i] && internalCharset.execToDesc(tolower(s[i])) != type[i]) return false; return true; } void EntityManagerImpl::registerStorageManager(StorageManager *sm) { storageManagers_.resize(storageManagers_.size() + 1); storageManagers_.back() = sm; } void EntityManagerImpl::setCatalogManager(CatalogManager *catalogManager) { catalogManager_ = catalogManager; } Boolean EntityManagerImpl::defLocation(const Location &defLocation, StorageObjectLocation &soLoc) { Offset off; const ExternalInfo *info; const Origin *origin = defLocation.origin().pointer(); Index index = defLocation.index(); for (;;) { if (!origin) return 0; const InputSourceOrigin *inputSourceOrigin = origin->asInputSourceOrigin(); if (inputSourceOrigin) { off = inputSourceOrigin->startOffset(index); info = inputSourceOrigin->externalInfo(); if (info) break; if (!inputSourceOrigin->defLocation(off, origin, index)) return 0; } else { const Location &parentLoc = origin->parent(); origin = parentLoc.origin().pointer(); index = parentLoc.index(); } } return ExtendEntityManager::externalize(info, off, soLoc); } class UnbufferingStorageObject : public StorageObject { public: UnbufferingStorageObject(StorageObject *sub, const Boolean *unbuffer) : sub_(sub), buf_(0), bufAvail_(0), bufNext_(0), unbuffer_(unbuffer) { } ~UnbufferingStorageObject() { delete [] buf_; } Boolean read(char *buf, size_t bufSize, Messenger &mgr, size_t &nread) { if (bufNext_ >= bufAvail_) { bufAvail_ = bufNext_ = 0; if (!*unbuffer_) return sub_->read(buf, bufSize, mgr, nread); if (buf_ == 0) buf_ = new char[bufSize_ = bufSize]; if (!sub_->read(buf_, bufSize_, mgr, bufAvail_)) return 0; } *buf = buf_[bufNext_++]; nread = 1; return 1; } Boolean rewind(Messenger &mgr) { bufAvail_ = bufNext_ = 0; return sub_->rewind(mgr); } void willNotRewind() { sub_->willNotRewind(); } size_t getBlockSize() const { return sub_->getBlockSize(); } private: Owner sub_; size_t bufSize_; size_t bufAvail_; size_t bufNext_; char *buf_; const Boolean *unbuffer_; }; class MappingDecoder : public Decoder { public: MappingDecoder(Decoder *, const ConstPtr > &); Boolean convertOffset(unsigned long &offset) const; size_t decode(Char *, const char *, size_t, const char **); private: Owner sub_; ConstPtr > map_; }; MappingDecoder::MappingDecoder(Decoder *sub, const ConstPtr > &map) : Decoder(sub->minBytesPerChar()), sub_(sub), map_(map) { } size_t MappingDecoder::decode(Char *to, const char *s, size_t slen, const char **rest) { size_t n = sub_->decode(to, s, slen, rest); const CharMap &map = *map_; for (size_t i = 0; i < n; i++) { Unsigned32 d = map[to[i]]; if (d & (unsigned(1) << 31)) to[i] = (d & ~(unsigned(1) << 31)); else to[i] += d; } return n; } Boolean MappingDecoder::convertOffset(unsigned long &offset) const { return sub_->convertOffset(offset); } ExternalInputSource::ExternalInputSource(ParsedSystemId &parsedSysid, const CharsetInfo &systemCharset, const CharsetInfo &docCharset, Boolean internalCharsetIsDocCharset, Char replacementChar, InputSourceOrigin *origin, unsigned flags) : InputSource(origin, 0, 0), mayRewind_((flags & EntityManager::mayRewind) != 0), mayNotExist_((flags & ExtendEntityManager::mayNotExist) != 0), sov_(parsedSysid.size()), internalCharsetIsDocCharset_(internalCharsetIsDocCharset), // hack maySetDocCharset_((flags & EntityManager::maySetDocCharset) != 0), replacementChar_(replacementChar) { for (size_t i = 0; i < parsedSysid.size(); i++) { if (parsedSysid[i].codingSystemType != (internalCharsetIsDocCharset ? StorageObjectSpec::bctf : StorageObjectSpec::encoding) && parsedSysid[i].codingSystemType != StorageObjectSpec::special) { map_ = new CharMapResource; buildMap(systemCharset, docCharset); break; } } for (size_t i = 0; i < sov_.size(); i++) sov_[i] = 0; init(); info_ = new ExternalInfoImpl(parsedSysid); origin->setExternalInfo(info_); } void ExternalInputSource::setDocCharset(const CharsetInfo &docCharset, const CharsetInfo &systemCharset) { if (!map_.isNull()) buildMap(systemCharset, docCharset); willNotSetDocCharset(); } void ExternalInputSource::willNotSetDocCharset() { maySetDocCharset_ = 0; } void ExternalInputSource::buildMap(const CharsetInfo &systemCharset, const CharsetInfo &docCharset) { CharMap &map = *map_; // FIXME How should invalidChar be chosen when internalCharsetIsDocCharset_? Char invalidChar = internalCharsetIsDocCharset_ ? 0 : replacementChar_; map.setAll((Unsigned32(1) << 31) | invalidChar); if (internalCharsetIsDocCharset_) buildMap1(systemCharset, docCharset); else buildMap1(docCharset, systemCharset); } void ExternalInputSource::buildMap1(const CharsetInfo &fromCharset, const CharsetInfo &toCharset) { UnivCharsetDescIter iter(fromCharset.desc()); for (;;) { WideChar descMin, descMax; UnivChar univMin; if (!iter.next(descMin, descMax, univMin)) break; if (descMin > charMax) break; if (descMax > charMax) descMax = charMax; WideChar totalCount = 1 + (descMax - descMin); do { WideChar count; WideChar toMin; ISet set; int nMap = toCharset.univToDesc(univMin, toMin, set, count); if (count > totalCount) count = totalCount; if (nMap && toMin <= charMax) { Char toMax; if (count - 1 > charMax - toMin) toMax = charMax; else toMax = toMin + (count - 1); map_->setRange(descMin, descMin + (toMax - toMin), Char(toMin - descMin)); } descMin += count; univMin += count; totalCount -= count; } while (totalCount > 0); } } void ExternalInputSource::init() { so_ = 0; buf_ = 0; bufSize_ = 0; bufLim_ = 0; bufLimOffset_ = 0; insertRS_ = true; soIndex_ = 0; leftOver_ = 0; nLeftOver_ = 0; } ExternalInputSource::~ExternalInputSource() { if (buf_) delete [] buf_; } Boolean ExternalInputSource::rewind(Messenger &mgr) { reset(0, 0); if (buf_) delete [] buf_; // reset makes a new EntityOrigin ParsedSystemId parsedSysid(info_->parsedSystemId()); ExternalInfoImpl *oldInfo = info_; info_ = new ExternalInfoImpl(parsedSysid); so_ = 0; for (size_t i = 0; i < soIndex_; i++) { if (sov_[i] && !sov_[i]->rewind(mgr)) return 0; StringC tem; oldInfo->getId(i, tem); info_->setId(i, tem); } inputSourceOrigin()->setExternalInfo(info_); init(); return 1; } void ExternalInputSource::willNotRewind() { for (size_t i = 0; i < sov_.size(); i++) if (sov_[i]) sov_[i]->willNotRewind(); mayRewind_ = 0; } // Round up N so that it is a power of TO. // TO must be a power of 2. inline size_t roundUp(size_t n, size_t to) { return (n + (to - 1)) & ~(to - 1); } inline void ExternalInputSource::noteRSAt(const Char *p) { info_->noteRS(bufLimOffset_ - (bufLim_ - p)); } inline void ExternalInputSource::noteRS() { noteRSAt(cur()); } Xchar ExternalInputSource::fill(Messenger &mgr) { ASSERT(cur() == end()); while (end() >= bufLim_) { // need more data while (so_ == 0) { if (soIndex_ >= sov_.size()) return eE; if (soIndex_ > 0) info_->noteStorageObjectEnd(bufLimOffset_ - (bufLim_ - end())); const StorageObjectSpec &spec = info_->spec(soIndex_); if (!sov_[soIndex_]) { StringC id; if (mayNotExist_) { NullMessenger nullMgr; sov_[soIndex_] = spec.storageManager->makeStorageObject(spec.specId, spec.baseId, spec.search, mayRewind_, nullMgr, id); } else sov_[soIndex_] = spec.storageManager->makeStorageObject(spec.specId, spec.baseId, spec.search, mayRewind_, mgr, id); info_->setId(soIndex_, id); } so_ = sov_[soIndex_].pointer(); if (so_) { decoder_ = spec.codingSystem->makeDecoder(); if (spec.codingSystemType != StorageObjectSpec::special && spec.codingSystemType != (internalCharsetIsDocCharset_ ? StorageObjectSpec::bctf : StorageObjectSpec::encoding)) { decoder_ = new MappingDecoder(decoder_, map_); if (maySetDocCharset_) { sov_[soIndex_] = new UnbufferingStorageObject(sov_[soIndex_].extract(), &maySetDocCharset_); so_ = sov_[soIndex_].pointer(); } } info_->setDecoder(soIndex_, decoder_); zapEof_ = spec.zapEof; switch (spec.records) { case StorageObjectSpec::asis: recordType_ = asis; insertRS_ = false; break; case StorageObjectSpec::cr: recordType_ = cr; break; case StorageObjectSpec::lf: recordType_ = lf; break; case StorageObjectSpec::crlf: recordType_ = crlf; break; case StorageObjectSpec::find: recordType_ = unknown; break; default: CANNOT_HAPPEN(); } soIndex_++; readSize_ = so_->getBlockSize(); nLeftOver_ = 0; break; } else setAccessError(); soIndex_++; } size_t keepSize = end() - start(); const size_t align = sizeof(int)/sizeof(Char); size_t readSizeChars = (readSize_ + (sizeof(Char) - 1))/sizeof(Char); readSizeChars = roundUp(readSizeChars, align); size_t neededSize; // in Chars size_t startOffset; // compute neededSize and readSize unsigned minBytesPerChar = decoder_->minBytesPerChar(); if (nLeftOver_ == 0 && minBytesPerChar >= sizeof(Char)) { // In this case we want to do decoding in place. // FIXME It might be a win on some systems (Irix?) to arrange that the // read buffer is on a page boundary. if (keepSize >= size_t(-1)/sizeof(Char) - (align - 1) - insertRS_) abort(); // FIXME throw an exception // Now size_t(-1)/sizeof(Char) - (align - 1) - insertRS_ - keepSize > 0 if (readSizeChars > size_t(-1)/sizeof(Char) - (align - 1) - insertRS_ - keepSize) abort(); neededSize = roundUp(readSizeChars + keepSize + insertRS_, align); startOffset = ((neededSize > bufSize_ ? neededSize : bufSize_) - readSizeChars - insertRS_ - keepSize); } else { // Needs to be room for everything before decoding. neededSize = (keepSize + insertRS_ + readSizeChars + (nLeftOver_ + sizeof(Char) - 1)/sizeof(Char)); // Also must be room for everything after decoding. size_t neededSize2 = (keepSize + insertRS_ // all the converted characters + (nLeftOver_ + readSize_)/minBytesPerChar // enough Chars to contain left over bytes + ((readSize_ % minBytesPerChar + sizeof(Char) - 1) / sizeof(Char))); if (neededSize2 > neededSize) neededSize = neededSize2; neededSize = roundUp(neededSize, align); if (neededSize > size_t(-1)/sizeof(Char)) abort(); startOffset = 0; } if (bufSize_ < neededSize) reallocateBuffer(neededSize); Char *newStart = buf_ + startOffset; if (newStart != start() && keepSize > 0) memmove(newStart, start(), keepSize*sizeof(Char)); char *bytesStart = (char *)(buf_ + bufSize_ - readSizeChars) - nLeftOver_; if (nLeftOver_ > 0 && leftOver_ != bytesStart) memmove(bytesStart, leftOver_, nLeftOver_); moveStart(newStart); bufLim_ = end(); size_t nread; if (so_->read((char *)(buf_ + bufSize_ - readSizeChars), readSize_, mgr, nread)) { if (nread > 0) { const char *bytesEnd = bytesStart + nLeftOver_ + nread; size_t nChars = decoder_->decode((Char *)end() + insertRS_, bytesStart, nLeftOver_ + nread - (zapEof_ && bytesEnd[-1] == EOFCHAR), &leftOver_); nLeftOver_ = bytesEnd - leftOver_; if (nChars > 0) { if (insertRS_) { noteRS(); *(Char *)end() = RS; advanceEnd(end() + 1); insertRS_ = false; bufLim_ += 1; bufLimOffset_ += 1; } bufLim_ += nChars; bufLimOffset_ += nChars; break; } } } else so_ = 0; } ASSERT(end() < bufLim_); if (insertRS_) { noteRS(); insertChar(RS); insertRS_ = false; bufLimOffset_ += 1; } switch (recordType_) { case unknown: { const Char *e = findNextCrOrLf(end(), bufLim_); if (e) { if (*e == '\n') { recordType_ = lf; info_->noteInsertedRSs(); *(Char *)e = RE; advanceEnd(e + 1); insertRS_ = true; } else { if (e + 1 < bufLim_) { if (e[1] == '\n') { recordType_ = crlf; advanceEnd(e + 1); if (e + 2 == bufLim_) { bufLim_--; bufLimOffset_--; insertRS_ = true; } } else { advanceEnd(e + 1); recordType_ = cr; info_->noteInsertedRSs(); insertRS_ = true; } } else { recordType_ = crUnknown; advanceEnd(e + 1); } } } else advanceEnd(bufLim_); } break; case crUnknown: { if (*cur() == '\n') { noteRS(); advanceEnd(cur() + 1); recordType_ = crlf; } else { advanceEnd(cur() + 1); insertRS_ = true; recordType_ = cr; info_->noteInsertedRSs(); } } break; case lf: { Char *e = (Char *)findNextLf(end(), bufLim_); if (e) { advanceEnd(e + 1); *e = RE; insertRS_ = true; } else advanceEnd(bufLim_); } break; case cr: { const Char *e = findNextCr(end(), bufLim_); if (e) { advanceEnd(e + 1); insertRS_ = true; } else advanceEnd(bufLim_); } break; case crlf: { const Char *e = end(); for (;;) { e = findNextLf(e, bufLim_); if (!e) { advanceEnd(bufLim_); break; } // Need to delete final RS if not followed by anything. if (e + 1 == bufLim_) { bufLim_--; bufLimOffset_--; advanceEnd(e); insertRS_ = true; if (cur() == end()) return fill(mgr); break; } noteRSAt(e); e++; } } break; case asis: advanceEnd(bufLim_); break; default: CANNOT_HAPPEN(); } ASSERT(cur() < end()); return nextChar(); } const Char *ExternalInputSource::findNextCr(const Char *start, const Char *end) { for (; start < end; start++) if (*start == '\r') return start; return 0; } const Char *ExternalInputSource::findNextLf(const Char *start, const Char *end) { for (; start < end; start++) if (*start == '\n') return start; return 0; } const Char *ExternalInputSource::findNextCrOrLf(const Char *start, const Char *end) { for (; start < end; start++) if (*start == '\n' || *start == '\r') return start; return 0; } void ExternalInputSource::pushCharRef(Char ch, const NamedCharRef &ref) { ASSERT(cur() == start()); noteCharRef(startIndex() + (cur() - start()), ref); insertChar(ch); } void ExternalInputSource::insertChar(Char ch) { if (start() > buf_) { if (cur() > start()) memmove((Char *)start() - 1, start(), (cur() - start())*sizeof(Char)); moveLeft(); *(Char *)cur() = ch; } else { // must have start == buf if (buf_ + (bufSize_ - (nLeftOver_ + sizeof(Char) - 1)/sizeof(Char)) == bufLim_) { if (bufSize_ == size_t(-1)) abort(); // FIXME throw an exception reallocateBuffer(bufSize_ + 1); } else if (nLeftOver_ > 0 && ((char *)(bufLim_ + 1) > leftOver_)) { char *s = (char *)(buf_ + bufSize_) - nLeftOver_; memmove(s, leftOver_, nLeftOver_); leftOver_ = s; } if (cur() < bufLim_) memmove((Char *)cur() + 1, cur(), (bufLim_ - cur())*sizeof(Char)); *(Char *)cur() = ch; advanceEnd(end() + 1); bufLim_ += 1; } } void ExternalInputSource::reallocateBuffer(size_t newSize) { Char *newBuf = new Char[newSize]; memcpy(newBuf, buf_, bufSize_*sizeof(Char)); bufSize_ = newSize; changeBuffer(newBuf, buf_); bufLim_ = newBuf + (bufLim_ - buf_); if (nLeftOver_ > 0) { char *s = (char *)(newBuf + bufSize_) - nLeftOver_; memmove(s, (char *)newBuf + (leftOver_ - (char *)buf_), nLeftOver_); leftOver_ = s; } delete [] buf_; buf_ = newBuf; } RTTI_DEF1(ExternalInfoImpl, ExternalInfo) ExternalInfoImpl::ExternalInfoImpl(ParsedSystemId &parsedSysid) : currentIndex_(0), position_(parsedSysid.size()) { parsedSysid.swap(parsedSysid_); if (parsedSysid_.size() > 0) notrack_ = parsedSysid_[0].notrack; } void ExternalInfoImpl::setId(size_t i, StringC &id) { Mutex::Lock lock(&mutex_); id.swap(position_[i].id); } void ExternalInfoImpl::getId(size_t i, StringC &id) const { Mutex::Lock lock(&((ExternalInfoImpl *)this)->mutex_); id = position_[i].id; } void ExternalInfoImpl::setDecoder(size_t i, Decoder *decoder) { Mutex::Lock lock(&mutex_); position_[i].decoder = decoder; } void ExternalInfoImpl::noteInsertedRSs() { position_[currentIndex_].insertedRSs = 1; } void ExternalInfoImpl::noteRS(Offset offset) { // We do the locking in OffsetOrderedList. if (!notrack_) rsList_.append(offset); if (offset == (currentIndex_ == 0 ? 0 : position_[currentIndex_- 1].endOffset)) position_[currentIndex_].startsWithRS = 1; } void ExternalInfoImpl::noteStorageObjectEnd(Offset offset) { Mutex::Lock lock(&mutex_); ASSERT(currentIndex_ < position_.size()); // The last endOffset_ must be -1. if (currentIndex_ < position_.size() - 1) { position_[currentIndex_++].endOffset = offset; position_[currentIndex_].line1RS = rsList_.size(); notrack_ = parsedSysid_[currentIndex_].notrack; } } Boolean ExternalInfoImpl::convertOffset(Offset off, StorageObjectLocation &ret) const { Mutex::Lock lock(&((ExternalInfoImpl *)this)->mutex_); if (off == Offset(-1) || position_.size() == 0) return false; // the last endOffset_ is Offset(-1), so this will // terminate int i; for (i = 0; off >= position_[i].endOffset; i++) ; for (; position_[i].id.size() == 0; i--) if (i == 0) return false; ret.storageObjectSpec = &parsedSysid_[i]; ret.actualStorageId = position_[i].id; Offset startOffset = i == 0 ? 0 : position_[i - 1].endOffset; ret.storageObjectOffset = off - startOffset; ret.byteIndex = ret.storageObjectOffset; if (parsedSysid_[i].notrack || parsedSysid_[i].records == StorageObjectSpec::asis) { ret.lineNumber = (unsigned long)-1; if (parsedSysid_[i].records != StorageObjectSpec::asis) { if (position_[i].insertedRSs) ret.byteIndex = (unsigned long)-1; else if (ret.byteIndex > 0 && position_[i].startsWithRS) ret.byteIndex--; // first RS is inserted } ret.columnNumber = (unsigned long)-1; return true; } else { size_t line1RS = position_[i].line1RS; // line1RS is now the number of RSs that are before or on the current line. size_t j; Offset colStart; if (rsList_.findPreceding(off, j, colStart)) { if (position_[i].insertedRSs) ret.byteIndex -= j + 1 - line1RS; else if (ret.byteIndex > 0 && position_[i].startsWithRS) ret.byteIndex--; // first RS is inserted j++; colStart++; } else { j = 0; colStart = 0; } // j is now the number of RSs that are before or on the current line // colStart is the offset of the first column ret.lineNumber = j - line1RS + 1 - position_[i].startsWithRS; // the offset of the first column if (colStart < startOffset) colStart = startOffset; // the RS that starts a line will be in column 0; // the first real character of a line will be column 1 ret.columnNumber = 1 + off - colStart; } if (!position_[i].decoder || !position_[i].decoder->convertOffset(ret.byteIndex)) ret.byteIndex = (unsigned long)-1; return true; } const StorageObjectSpec &ExternalInfoImpl::spec(size_t i) const { return parsedSysid_[i]; } size_t ExternalInfoImpl::nSpecs() const { return parsedSysid_.size(); } const ParsedSystemId &ExternalInfoImpl::parsedSystemId() const { return parsedSysid_; } StorageObjectSpec::StorageObjectSpec() : storageManager(0), codingSystem(0), codingSystemName(0), notrack(0), records(find), zapEof(1), search(1) { } StorageObjectSpec::StorageObjectSpec(const StorageObjectSpec& x) : codingSystemName(x.codingSystemName), codingSystem(x.codingSystem), specId(x.specId), baseId(x.baseId), records(x.records), notrack(x.notrack), zapEof(x.zapEof), search(x.search), codingSystemType(x.codingSystemType) { } StorageObjectSpec& StorageObjectSpec::operator=(const StorageObjectSpec& x) { if (this != &x) { codingSystemName = x.codingSystemName; codingSystem = x.codingSystem; specId = x.specId; baseId = x.baseId; records = x.records; notrack = x.notrack; zapEof = x.zapEof; search = x.search; codingSystemType = x.codingSystemType; } return *this; } StorageObjectSpec::~StorageObjectSpec() { } StorageObjectPosition::StorageObjectPosition() : endOffset(Offset(-1)), line1RS(0), startsWithRS(0), insertedRSs(0) { } FSIParser::FSIParser(const StringC &str, const CharsetInfo &idCharset, Boolean isNdata, const StorageObjectLocation *defLoc, const EntityManagerImpl *em, Messenger &mgr) : str_(str), strIndex_(0), idCharset_(idCharset), isNdata_(isNdata), defSpec_(defLoc ? defLoc->storageObjectSpec : 0), defId_(defLoc ? &defLoc->actualStorageId : 0), em_(em), mgr_(mgr) { } Xchar FSIParser::get() { if (strIndex_ < str_.size()) return str_[strIndex_++]; else return -1; } void FSIParser::unget() { if (strIndex_ > 0) strIndex_ -= 1; } Boolean FSIParser::matchKey(const StringC &str, const char *s) { if (strlen(s) != str.size()) return false; for (size_t i = 0; i < str.size(); i++) if (idCharset_.execToDesc(toupper(s[i])) != str[i] && idCharset_.execToDesc(tolower(s[i])) != str[i]) return false; return true; } Boolean FSIParser::matchChar(Xchar ch, char execC) { return ch == idCharset_.execToDesc(execC); } Boolean FSIParser::isS(Xchar c) { return (matchChar(c, ' ') || matchChar(c, '\r') || matchChar(c, '\n') || matchChar(c, ' ')); } Boolean FSIParser::convertDigit(Xchar c, int &weight) { static const char digits[] = "0123456789"; for (int i = 0; digits[i] != '\0'; i++) if (matchChar(c, digits[i])) { weight = i; return 1; } return 0; } Boolean FSIParser::parse(ParsedSystemId &parsedSysid) { size_t startIndex = strIndex_; if (!matchChar(get(), '<')) return handleInformal(startIndex, parsedSysid); StringC key; for (;;) { Xchar c = get(); if (c == -1) return handleInformal(startIndex, parsedSysid); if (isS(c) || matchChar(c, '>')) break; key += Char(c); } unget(); if (matchKey(key, "CATALOG")) { if (!setCatalogAttributes(parsedSysid)) return 0; return parse(parsedSysid); } Boolean neutral; StorageManager *sm = lookupStorageType(key, neutral); if (!sm) return handleInformal(startIndex, parsedSysid); for (;;) { parsedSysid.resize(parsedSysid.size() + 1); StorageObjectSpec &sos = parsedSysid.back(); sos.storageManager = sm; Xchar smcrd; Boolean fold; if (!setAttributes(sos, neutral, smcrd, fold)) return 0; sm = 0; StringC id; Boolean hadData = 0; for (;;) { Xchar c = get(); if (c == -1) break; if (matchChar(c, '<')) { hadData = 1; Char stago = c; key.resize(0); for (;;) { c = get(); if (c == -1) { id += stago; id += key; break; } if (isS(c) || matchChar(c, '>')) { unget(); sm = lookupStorageType(key, neutral); if (!sm) { id += stago; id += key; } break; } key += c; } if (sm) break; } else if (!((!hadData && matchChar(c, '\r')) // ignored RE || matchChar(c, '\n') )) { // ignored RS hadData = 1; id += c; } } if (id.size() > 0 && matchChar(id[id.size() - 1], '\r')) id.resize(id.size() - 1); uncharref(id); id.swap(sos.specId); if (!convertId(sos.specId, smcrd, sos.storageManager)) return 0; if (neutral) { if (!sos.storageManager->transformNeutral(sos.specId, fold, mgr_)) return 0; } if (sos.storageManager->resolveRelative(sos.baseId, sos.specId, sos.search)) sos.baseId.resize(0); if (!sm) break; } return 1; } Boolean FSIParser::handleInformal(size_t index, ParsedSystemId &parsedSysid) { parsedSysid.resize(parsedSysid.size() + 1); StorageObjectSpec &sos = parsedSysid.back(); sos.specId.assign(str_.data() + index, str_.size() - index); sos.storageManager = em_->guessStorageType(sos.specId, idCharset_); if (!sos.storageManager) { if (defSpec_ && defSpec_->storageManager->inheritable()) sos.storageManager = defSpec_->storageManager; else sos.storageManager = em_->defaultStorageManager_.pointer(); } setDefaults(sos); if (!convertId(sos.specId, -1, sos.storageManager)) return 0; if (sos.storageManager->resolveRelative(sos.baseId, sos.specId, sos.search)) sos.baseId.resize(0); return 1; } StorageManager *FSIParser::lookupStorageType(const StringC &key, Boolean &neutral) { if (matchKey(key, "NEUTRAL")) { neutral = 1; if (defSpec_ && defSpec_->storageManager->inheritable()) return defSpec_->storageManager; else return em_->defaultStorageManager_.pointer(); } else { StorageManager *sm = em_->lookupStorageType(key, idCharset_); if (sm) neutral = 0; return sm; } } Boolean FSIParser::setCatalogAttributes(ParsedSystemId &parsedSysid) { Boolean hadPublic = 0; parsedSysid.maps.resize(parsedSysid.maps.size() + 1); parsedSysid.maps.back().type = ParsedSystemId::Map::catalogDocument; for (;;) { StringC token, value; Boolean gotValue; if (!parseAttribute(token, gotValue, value)) { mgr_.message(EntityManagerMessages::fsiSyntax, StringMessageArg(str_)); return 0; } if (token.size() == 0) break; if (matchKey(token, "PUBLIC")) { if (hadPublic) mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, StringMessageArg(idCharset_.execToDesc("PUBLIC"))); else if (gotValue) { convertMinimumLiteral(value, parsedSysid.maps.back().publicId); parsedSysid.maps.back().type = ParsedSystemId::Map::catalogPublic; } else mgr_.message(EntityManagerMessages::fsiMissingValue, StringMessageArg(token)); hadPublic = 1; } else mgr_.message(gotValue ? EntityManagerMessages::fsiUnsupportedAttribute : EntityManagerMessages::fsiUnsupportedAttributeToken, StringMessageArg(token)); } return 1; } void FSIParser::convertMinimumLiteral(const StringC &from, StringC &to) { // Do just enough to ensure it can be reparsed. to.resize(0); for (size_t i = 0; i < from.size(); i++) { Char c = from[i]; if (matchChar(c, '"') || matchChar(c, '#')) mgr_.message(EntityManagerMessages::fsiLookupChar, NumberMessageArg(c)); else if (matchChar(c, ' ')) { if (to.size() && to[to.size() - 1] != c) to += c; } else to += c; } if (to.size() && matchChar(to[to.size() - 1], ' ')) to.resize(to.size() - 1); } // FIXME This should be table driven. Boolean FSIParser::setAttributes(StorageObjectSpec &sos, Boolean neutral, Xchar &smcrd, Boolean &fold) { Boolean hadBctf = 0; Boolean hadEncoding = 0; Boolean hadTracking = 0; Boolean hadSmcrd = 0; smcrd = -1; fold = 1; Boolean hadRecords = 0; Boolean hadBase = 0; Boolean hadZapeof = 0; Boolean hadSearch = 0; Boolean hadFold = 0; StorageObjectSpec::Records records; setDefaults(sos); for (;;) { StringC token, value; Boolean gotValue; if (!parseAttribute(token, gotValue, value)) { mgr_.message(EntityManagerMessages::fsiSyntax, StringMessageArg(str_)); return 0; } if (token.size() == 0) break; if (matchKey(token, "BCTF")) { if (sos.storageManager->requiredCodingSystem()) mgr_.message(EntityManagerMessages::fsiBctfEncodingNotApplicable); else if (hadBctf) mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, StringMessageArg(token)); else if (hadEncoding) mgr_.message(EntityManagerMessages::fsiBctfAndEncoding); else if (gotValue) { const char *codingSystemName; const InputCodingSystem *codingSystem = em_->lookupCodingSystem(value, idCharset_, 1, codingSystemName); if (codingSystem) { sos.codingSystem = codingSystem; sos.codingSystemName = codingSystemName; sos.codingSystemType = StorageObjectSpec::bctf; } else if (matchKey(value, "SAME")) { if (!isNdata_) { if (defSpec_) { sos.codingSystem = defSpec_->codingSystem; sos.codingSystemName = defSpec_->codingSystemName; sos.codingSystemType = defSpec_->codingSystemType; } else { sos.codingSystem = em_->defaultCodingSystem_; sos.codingSystemName = 0; sos.codingSystemType = (em_->internalCharsetIsDocCharset_ ? StorageObjectSpec::bctf : StorageObjectSpec::encoding); } } } else mgr_.message(EntityManagerMessages::fsiUnknownBctf, StringMessageArg(value)); } else mgr_.message(EntityManagerMessages::fsiMissingValue, StringMessageArg(token)); hadBctf = 1; } else if (matchKey(token, "ENCODING")) { if (sos.storageManager->requiredCodingSystem()) mgr_.message(EntityManagerMessages::fsiBctfEncodingNotApplicable); else if (hadEncoding) mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, StringMessageArg(token)); else if (hadBctf) mgr_.message(EntityManagerMessages::fsiBctfAndEncoding); else if (gotValue) { const char *codingSystemName; const InputCodingSystem *codingSystem = em_->lookupCodingSystem(value, idCharset_, 0, codingSystemName); if (codingSystem) { sos.codingSystem = codingSystem; sos.codingSystemName = codingSystemName; sos.codingSystemType = StorageObjectSpec::encoding; } else if (matchKey(value, "SAME")) { if (!isNdata_) { if (defSpec_) { sos.codingSystem = defSpec_->codingSystem; sos.codingSystemName = defSpec_->codingSystemName; sos.codingSystemType = defSpec_->codingSystemType; } else { sos.codingSystem = em_->defaultCodingSystem_; sos.codingSystemName = 0; sos.codingSystemType = (em_->internalCharsetIsDocCharset_ ? StorageObjectSpec::bctf : StorageObjectSpec::encoding); } } } else mgr_.message(EntityManagerMessages::fsiUnknownEncoding, StringMessageArg(value)); } else mgr_.message(EntityManagerMessages::fsiMissingValue, StringMessageArg(token)); hadEncoding = 1; } else if (matchKey(token, "TRACKING")) { if (hadTracking) mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, StringMessageArg(token)); else if (gotValue) { if (matchKey(value, "NOTRACK")) sos.notrack = 1; else if (!matchKey(value, "TRACK")) mgr_.message(EntityManagerMessages::fsiBadTracking, StringMessageArg(value)); } else mgr_.message(EntityManagerMessages::fsiMissingValue, StringMessageArg(token)); hadTracking = 1; } else if (matchKey(token, "ZAPEOF")) { if (sos.storageManager->requiredCodingSystem()) mgr_.message(EntityManagerMessages::fsiZapeofNotApplicable); else if (hadZapeof) mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, StringMessageArg(token)); else if (gotValue) { if (matchKey(value, "ZAPEOF")) sos.zapEof = 1; else if (matchKey(value, "NOZAPEOF")) sos.zapEof = 0; else mgr_.message(EntityManagerMessages::fsiBadZapeof, StringMessageArg(value)); } else sos.zapEof = 1; hadZapeof = 1; } else if (matchKey(token, "NOZAPEOF")) { if (sos.storageManager->requiredCodingSystem()) mgr_.message(EntityManagerMessages::fsiZapeofNotApplicable); else if (hadZapeof) mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, StringMessageArg(idCharset_.execToDesc("ZAPEOF"))); else if (gotValue) mgr_.message(EntityManagerMessages::fsiValueAsName, StringMessageArg(token)); else sos.zapEof = 0; hadZapeof = 1; } else if (matchKey(token, "SEARCH")) { if (hadSearch) mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, StringMessageArg(token)); else if (gotValue) { if (matchKey(value, "SEARCH")) sos.search = 1; else if (matchKey(value, "NOSEARCH")) sos.search = 0; else mgr_.message(EntityManagerMessages::fsiBadSearch, StringMessageArg(value)); } else sos.search = 1; hadSearch = 1; } else if (matchKey(token, "NOSEARCH")) { if (hadSearch) mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, StringMessageArg(idCharset_.execToDesc("SEARCH"))); else if (gotValue) mgr_.message(EntityManagerMessages::fsiValueAsName, StringMessageArg(token)); else sos.search = 0; hadSearch = 1; } else if (matchKey(token, "FOLD")) { if (!neutral) mgr_.message(EntityManagerMessages::fsiFoldNotNeutral); else if (hadFold) mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, StringMessageArg(token)); else if (gotValue) { if (matchKey(value, "FOLD")) fold = 1; else if (matchKey(value, "NOFOLD")) fold = 0; else mgr_.message(EntityManagerMessages::fsiBadFold, StringMessageArg(value)); } else fold = 1; hadFold = 1; } else if (matchKey(token, "NOFOLD")) { if (!neutral) mgr_.message(EntityManagerMessages::fsiFoldNotNeutral); else if (hadFold) mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, StringMessageArg(idCharset_.execToDesc("FOLD"))); else if (gotValue) mgr_.message(EntityManagerMessages::fsiValueAsName, StringMessageArg(token)); else fold = 0; hadFold = 1; } else if (matchKey(token, "SMCRD")) { if (hadSmcrd) mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, StringMessageArg(token)); else if (gotValue) { if (value.size() == 0) smcrd = -1; else if (value.size() == 1) smcrd = value[0]; else mgr_.message(EntityManagerMessages::fsiBadSmcrd, StringMessageArg(value)); } else mgr_.message(EntityManagerMessages::fsiMissingValue, StringMessageArg(token)); hadSmcrd = 1; } else if (matchKey(token, "RECORDS")) { if (sos.storageManager->requiresCr()) mgr_.message(EntityManagerMessages::fsiRecordsNotApplicable); else if (hadRecords) mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, StringMessageArg(token)); else if (gotValue) { if (!lookupRecords(value, sos.records)) mgr_.message(EntityManagerMessages::fsiUnsupportedRecords, StringMessageArg(value)); } else mgr_.message(EntityManagerMessages::fsiMissingValue, StringMessageArg(token)); hadRecords = 1; } else if (matchKey(token, "SOIBASE")) { if (hadBase) mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, StringMessageArg(token)); else if (gotValue) value.swap(sos.baseId); else { mgr_.message(EntityManagerMessages::fsiMissingValue, StringMessageArg(token)); sos.baseId.resize(0); } hadBase = 1; } else if (lookupRecords(token, records)) { if (sos.storageManager->requiresCr()) mgr_.message(EntityManagerMessages::fsiRecordsNotApplicable); else if (hadRecords) mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, StringMessageArg(idCharset_.execToDesc("RECORDS"))); else if (!gotValue) sos.records = records; else mgr_.message(EntityManagerMessages::fsiValueAsName, StringMessageArg(token)); hadRecords = 1; } else if (matchKey(token, "NOTRACK")) { if (hadTracking) mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, StringMessageArg(idCharset_.execToDesc("TRACKING"))); else if (!gotValue) sos.notrack = 1; else mgr_.message(EntityManagerMessages::fsiValueAsName, StringMessageArg(token)); hadTracking = 1; } else if (matchKey(token, "TRACK")) { if (hadTracking) mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, StringMessageArg(idCharset_.execToDesc("TRACKING"))); else if (gotValue) mgr_.message(EntityManagerMessages::fsiValueAsName, StringMessageArg(token)); hadTracking = 1; } else mgr_.message(gotValue ? EntityManagerMessages::fsiUnsupportedAttribute : EntityManagerMessages::fsiUnsupportedAttributeToken, StringMessageArg(token)); } if (hadBase && sos.baseId.size() > 0) { convertId(sos.baseId, smcrd, sos.storageManager); if (neutral) { if (!sos.storageManager->transformNeutral(sos.baseId, fold, mgr_)) sos.baseId.resize(0); } } if (!hadZapeof && hadRecords && sos.records == StorageObjectSpec::asis) sos.zapEof = 0; return 1; } FSIParser::RecordType FSIParser::recordTypeTable[] = { { "FIND", StorageObjectSpec::find }, { "ASIS", StorageObjectSpec::asis }, { "CR", StorageObjectSpec::cr }, { "LF", StorageObjectSpec::lf }, { "CRLF", StorageObjectSpec::crlf } }; const char *FSIParser::recordsName(StorageObjectSpec::Records records) { for (size_t i = 0; i < SIZEOF(recordTypeTable); i++) if (records == recordTypeTable[i].value) return recordTypeTable[i].name; return 0; } Boolean FSIParser::lookupRecords(const StringC &token, StorageObjectSpec::Records &result) { for (size_t i = 0; i < SIZEOF(recordTypeTable); i++) if (matchKey(token, recordTypeTable[i].name)) { result = recordTypeTable[i].value; return 1; } return 0; } void FSIParser::setDefaults(StorageObjectSpec &sos) { if (sos.storageManager->requiresCr()) sos.records = StorageObjectSpec::cr; else if (isNdata_ || (defSpec_ && defSpec_->records == StorageObjectSpec::asis)) sos.records = StorageObjectSpec::asis; if (isNdata_ || (defSpec_ && !defSpec_->zapEof)) sos.zapEof = 0; if (defSpec_ && defSpec_->storageManager == sos.storageManager) { if (defId_) sos.baseId = *defId_; else { sos.baseId = defSpec_->specId; sos.storageManager->resolveRelative(defSpec_->baseId, sos.baseId, 0); } } sos.codingSystem = sos.storageManager->requiredCodingSystem(); if (sos.codingSystem) { sos.zapEof = 0; // hack sos.codingSystemType = StorageObjectSpec::special; } else { sos.codingSystem = em_->defaultCodingSystem_; sos.codingSystemType = (em_->internalCharsetIsDocCharset_ ? StorageObjectSpec::bctf : StorageObjectSpec::encoding); if (isNdata_) { sos.codingSystem = em_->codingSystemKit_->identityInputCodingSystem(); sos.codingSystemType = StorageObjectSpec::special; } else if (defSpec_) { sos.codingSystem = defSpec_->codingSystem; sos.codingSystemName = defSpec_->codingSystemName; sos.codingSystemType = defSpec_->codingSystemType; } } } Boolean FSIParser::parseAttribute(StringC &token, Boolean &gotValue, StringC &value) { Xchar c = get(); while (isS(c)) c = get(); if (c == -1) { return 0; } token.resize(0); if (matchChar(c, '>')) return 1; if (matchChar(c, '"') || matchChar(c, '\'') || matchChar(c, '=')) return 0; for (;;) { token += c; c = get(); if (c == -1) return 0; if (isS(c)) break; if (matchChar(c, '>') || matchChar(c, '=')) break; } while (isS(c)) c = get(); if (c == -1) return 0; if (!matchChar(c, '=')) { unget(); gotValue = 0; return 1; } gotValue = 1; value.resize(0); c = get(); while (isS(c)) c = get(); if (matchChar(c, '>') || matchChar(c, '=')) return 0; if (matchChar(c, '"') || matchChar(c, '\'')) { Char lit = c; for (;;) { Xchar c = get(); if (c == lit) break; if (c == -1) return 0; if (matchChar(c, '\n')) ; else if (matchChar(c, '\r') || matchChar(c, '\t')) value += idCharset_.execToDesc(' '); else value += c; } uncharref(value); } else { for (;;) { value += c; c = get(); if (c == -1) return 0; if (isS(c)) break; if (matchChar(c, '>') || matchChar(c, '=')) { unget(); break; } } } return 1; } void FSIParser::uncharref(StringC &str) { size_t j = 0; size_t i = 0; while (i < str.size()) { int digit; if (matchChar(str[i], '&') && i + 2 < str.size() && matchChar(str[i + 1], '#') && convertDigit(str[i + 2], digit)) { unsigned long val = digit; i += 3; while (i < str.size() && convertDigit(str[i], digit)) { val = val*10 + digit; i++; } str[j++] = val; if (i < str.size() && matchChar(str[i], ';')) i++; } else str[j++] = str[i++]; } str.resize(j); } Boolean FSIParser::convertId(StringC &id, Xchar smcrd, const StorageManager *sm) { const CharsetInfo *smCharset = sm->idCharset(); StringC newId; size_t i = 0; while (i < id.size()) { UnivChar univ; WideChar wide; ISet wideSet; int digit; if (Xchar(id[i]) == smcrd && i + 1 < id.size() && convertDigit(id[i + 1], digit)) { i += 2; Char val = digit; while (i < id.size() && convertDigit(id[i], digit)) { val = val*10 + digit; i++; } newId += val; if (i < id.size() && matchChar(id[i], ';')) i++; } else if (smCharset) { if (!idCharset_.descToUniv(id[i++], univ)) return 0; if (univ == UnivCharsetDesc::rs) ; else if (univ == UnivCharsetDesc::re && sm->reString()) newId += *sm->reString(); else if (smCharset->univToDesc(univ, wide, wideSet) != 1 || wide > charMax) return 0; // FIXME give error else newId += Char(wide); } else newId += id[i++]; } newId.swap(id); return 1; } ParsedSystemId::ParsedSystemId() { } ParsedSystemId::Map::Map() { } ParsedSystemId::Map::Map(const ParsedSystemId::Map& x) : type(x.type), publicId(x.publicId) { } ParsedSystemId::Map::~Map() { } ParsedSystemId::Map& ParsedSystemId::Map::operator=(const ParsedSystemId::Map& x) { if (this != &x) { type = x.type; publicId = x.publicId; } return *this; } static void unparseSoi(const StringC &soi, const CharsetInfo *idCharset, const CharsetInfo &resultCharset, StringC &result, Boolean &needSmcrd); void ParsedSystemId::unparse(const CharsetInfo &resultCharset, Boolean isNdata, StringC &result) const { size_t len = size(); result.resize(0); size_t i; for (i = 0; i < maps.size(); i++) { if (maps[i].type == Map::catalogDocument) result += resultCharset.execToDesc(""); else if (maps[i].type == Map::catalogPublic) { result += resultCharset.execToDesc(""); } } for (i = 0; i < len; i++) { const StorageObjectSpec &sos = (*this)[i]; result += resultCharset.execToDesc('<'); result += resultCharset.execToDesc(sos.storageManager->type()); if (sos.notrack) result += resultCharset.execToDesc(" NOTRACK"); if (!sos.search) result += resultCharset.execToDesc(" NOSEARCH"); if (!sos.storageManager->requiresCr() && sos.records != (isNdata ? StorageObjectSpec::asis : StorageObjectSpec::find)) { result += resultCharset.execToDesc(' '); result += resultCharset.execToDesc(FSIParser::recordsName(sos.records)); } if (sos.codingSystemName && sos.codingSystemType != StorageObjectSpec::special) { if (!sos.zapEof) result += resultCharset.execToDesc(" NOZAPEOF"); result += resultCharset.execToDesc(sos.codingSystemType == StorageObjectSpec::bctf ? " BCTF=" : " ENCODING="); result += resultCharset.execToDesc(sos.codingSystemName); } Boolean needSmcrd = 0; if (sos.baseId.size() != 0) { result += resultCharset.execToDesc(" SOIBASE='"); unparseSoi(sos.baseId, sos.storageManager->idCharset(), resultCharset, result, needSmcrd); result += resultCharset.execToDesc('\''); } StringC tem; unparseSoi(sos.specId, sos.storageManager->idCharset(), resultCharset, tem, needSmcrd); if (needSmcrd) result += resultCharset.execToDesc(" SMCRD='^'"); result += resultCharset.execToDesc('>'); result += tem; } } void unparseSoi(const StringC &soi, const CharsetInfo *idCharset, const CharsetInfo &resultCharset, StringC &result, Boolean &needSmcrd) { if (!idCharset) { for (size_t i = 0; i < soi.size(); i++) { char buf[32]; sprintf(buf, "&#%lu;", (unsigned long)soi[i]); result += resultCharset.execToDesc(buf); } return; } for (size_t i = 0; i < soi.size(); i++) { UnivChar univ; WideChar to; ISet toSet; if (!idCharset->descToUniv(soi[i], univ) || univ >= 127 || univ < 32 || univ == 36 // $ || univ == 96 // ` #ifndef SP_MSDOS_FILENAMES || univ == 92 // backslash #endif || univ == 94 // ^ || resultCharset.univToDesc(univ, to, toSet) != 1) { needSmcrd = 1; char buf[32]; sprintf(buf, "^%lu;", (unsigned long)soi[i]); result += resultCharset.execToDesc(buf); } else { switch (univ) { case 34: // double quote case 35: // # case 39: // apostrophe case 60: // < { char buf[32]; sprintf(buf, "&#%lu;", (unsigned long)to); result += resultCharset.execToDesc(buf); } break; default: result += Char(to); break; } } } } #ifdef SP_NAMESPACE } #endif