530 lines
11 KiB
C++
530 lines
11 KiB
C++
// Copyright (c) 1994 James Clark
|
|
// See the file COPYING for copying permission.
|
|
|
|
#ifndef Syntax_INCLUDED
|
|
#define Syntax_INCLUDED 1
|
|
#ifdef __GNUG__
|
|
#pragma interface
|
|
#endif
|
|
|
|
#include "types.h"
|
|
#include "Boolean.h"
|
|
#include "ISet.h"
|
|
#include "StringC.h"
|
|
#include "SubstTable.h"
|
|
#include "HashTable.h"
|
|
#include "Vector.h"
|
|
#include "Resource.h"
|
|
#include "XcharMap.h"
|
|
#include "EntityCatalog.h"
|
|
|
|
#ifdef SP_NAMESPACE
|
|
namespace SP_NAMESPACE {
|
|
#endif
|
|
|
|
class Sd;
|
|
class CharsetInfo;
|
|
|
|
class SP_API Syntax : public Resource, public EntityCatalog::Syntax {
|
|
public:
|
|
enum ReservedName {
|
|
rALL,
|
|
rANY,
|
|
rATTLIST,
|
|
rCDATA,
|
|
rCONREF,
|
|
rCURRENT,
|
|
rDATA,
|
|
rDEFAULT,
|
|
rDOCTYPE,
|
|
rELEMENT,
|
|
rEMPTY,
|
|
rENDTAG,
|
|
rENTITIES,
|
|
rENTITY,
|
|
rFIXED,
|
|
rID,
|
|
rIDLINK,
|
|
rIDREF,
|
|
rIDREFS,
|
|
rIGNORE,
|
|
rIMPLICIT,
|
|
rIMPLIED,
|
|
rINCLUDE,
|
|
rINITIAL,
|
|
rLINK,
|
|
rLINKTYPE,
|
|
rMD,
|
|
rMS,
|
|
rNAME,
|
|
rNAMES,
|
|
rNDATA,
|
|
rNMTOKEN,
|
|
rNMTOKENS,
|
|
rNOTATION,
|
|
rNUMBER,
|
|
rNUMBERS,
|
|
rNUTOKEN,
|
|
rNUTOKENS,
|
|
rO,
|
|
rPCDATA,
|
|
rPI,
|
|
rPOSTLINK,
|
|
rPUBLIC,
|
|
rRCDATA,
|
|
rRE,
|
|
rREQUIRED,
|
|
rRESTORE,
|
|
rRS,
|
|
rSDATA,
|
|
rSHORTREF,
|
|
rSIMPLE,
|
|
rSPACE,
|
|
rSTARTTAG,
|
|
rSUBDOC,
|
|
rSYSTEM,
|
|
rTEMP,
|
|
rUSELINK,
|
|
rUSEMAP
|
|
};
|
|
enum { nNames = rUSEMAP + 1 };
|
|
enum Quantity {
|
|
qATTCNT,
|
|
qATTSPLEN,
|
|
qBSEQLEN,
|
|
qDTAGLEN,
|
|
qDTEMPLEN,
|
|
qENTLVL,
|
|
qGRPCNT,
|
|
qGRPGTCNT,
|
|
qGRPLVL,
|
|
qLITLEN,
|
|
qNAMELEN,
|
|
qNORMSEP,
|
|
qPILEN,
|
|
qTAGLEN,
|
|
qTAGLVL
|
|
};
|
|
enum { nQuantity = qTAGLVL + 1 };
|
|
enum { unlimited = 100000000 };
|
|
enum DelimGeneral {
|
|
dAND,
|
|
dCOM,
|
|
dCRO,
|
|
dDSC,
|
|
dDSO,
|
|
dDTGC,
|
|
dDTGO,
|
|
dERO,
|
|
dETAGO,
|
|
dGRPC,
|
|
dGRPO,
|
|
dHCRO, // WWW TC addition
|
|
dLIT,
|
|
dLITA,
|
|
dMDC,
|
|
dMDO,
|
|
dMINUS,
|
|
dMSC,
|
|
dNET,
|
|
dNESTC, // WWW TC addition
|
|
dOPT,
|
|
dOR,
|
|
dPERO,
|
|
dPIC,
|
|
dPIO,
|
|
dPLUS,
|
|
dREFC,
|
|
dREP,
|
|
dRNI,
|
|
dSEQ,
|
|
dSTAGO,
|
|
dTAGC,
|
|
dVI
|
|
};
|
|
enum { nDelimGeneral = dVI + 1 };
|
|
enum StandardFunction {
|
|
fRE,
|
|
fRS,
|
|
fSPACE
|
|
};
|
|
enum FunctionClass {
|
|
cFUNCHAR,
|
|
cSEPCHAR,
|
|
cMSOCHAR,
|
|
cMSICHAR,
|
|
cMSSCHAR
|
|
};
|
|
enum Set {
|
|
nameStart,
|
|
digit,
|
|
hexDigit,
|
|
nmchar, // LCNMCHAR or UCNMCHAR
|
|
s,
|
|
blank,
|
|
sepchar,
|
|
minimumData,
|
|
significant,
|
|
functionChar, // function character
|
|
sgmlChar
|
|
};
|
|
enum { nSet = sgmlChar + 1 };
|
|
enum Category {
|
|
otherCategory = 0,
|
|
sCategory = 01,
|
|
nameStartCategory = 02,
|
|
digitCategory = 04,
|
|
otherNameCategory = 010
|
|
};
|
|
|
|
Syntax(const Sd &);
|
|
virtual ~Syntax();
|
|
Boolean lookupFunctionChar(const StringC &, Char *) const;
|
|
HashTableIter<StringC,Char> functionIter() const;
|
|
Boolean charFunctionName(Char c, const StringC *&name) const;
|
|
Boolean lookupReservedName(const StringC &, ReservedName *) const;
|
|
const StringC &reservedName(ReservedName) const;
|
|
StringC rniReservedName(ReservedName) const;
|
|
Number quantity(Quantity) const;
|
|
Char standardFunction(int) const;
|
|
Boolean getStandardFunction(int, Char &) const;
|
|
const StringC &delim() const;
|
|
const ISet<Char> *charSet(int i) const;
|
|
const SubstTable *generalSubstTable() const;
|
|
const SubstTable *entitySubstTable() const;
|
|
const SubstTable &upperSubstTable() const;
|
|
Boolean namecaseGeneral() const;
|
|
Boolean namecaseEntity() const;
|
|
const StringC &peroDelim() const;
|
|
const StringC &delimGeneral(int) const;
|
|
const StringC &delimShortrefComplex(size_t) const;
|
|
const ISet<Char> &delimShortrefSimple() const;
|
|
int nDelimShortrefComplex() const;
|
|
Boolean isValidShortref(const StringC &) const;
|
|
Boolean hasShortrefs() const;
|
|
Boolean isNameCharacter(Xchar) const;
|
|
Boolean isNameStartCharacter(Xchar) const;
|
|
Boolean isDigit(Xchar) const;
|
|
Boolean isHexDigit(Xchar) const;
|
|
Boolean isS(Xchar) const;
|
|
Boolean isB(Xchar c) const;
|
|
Category charCategory(Xchar) const;
|
|
Boolean isSgmlChar(Xchar) const;
|
|
size_t attcnt() const;
|
|
size_t attsplen() const;
|
|
size_t namelen() const;
|
|
size_t penamelen() const;
|
|
size_t litlen() const;
|
|
size_t normsep() const;
|
|
size_t dtemplen() const;
|
|
size_t grpcnt() const;
|
|
size_t grpgtcnt() const;
|
|
size_t grplvl() const;
|
|
size_t taglvl() const;
|
|
size_t taglen() const;
|
|
size_t entlvl() const;
|
|
size_t pilen() const;
|
|
Char space() const;
|
|
|
|
void setStandardFunction(StandardFunction, Char);
|
|
void enterStandardFunctionNames();
|
|
void addFunctionChar(const StringC &, FunctionClass, Char);
|
|
void setNamecaseGeneral(Boolean);
|
|
void setNamecaseEntity(Boolean);
|
|
void setDelimGeneral(int, const StringC &);
|
|
void addDelimShortref(const StringC &, const CharsetInfo &);
|
|
void addDelimShortrefs(const ISet<Char> &shortrefChars,
|
|
const CharsetInfo &charset);
|
|
void addNameCharacters(const ISet<Char> &);
|
|
void addNameStartCharacters(const ISet<Char> &);
|
|
void addSubst(Char lc, Char uc);
|
|
void addShunchar(Char);
|
|
void setShuncharControls();
|
|
void setQuantity(int, Number);
|
|
void setName(int, const StringC &);
|
|
void setSgmlChar(const ISet<Char> &);
|
|
void implySgmlChar(const Sd &);
|
|
// :: is for Watcom 10.0a
|
|
void checkSgmlChar(const Sd &,
|
|
const ::SP_NAMESPACE_SCOPE Syntax *otherSyntax,
|
|
Boolean invalidUseDocumentCharset,
|
|
ISet<WideChar> &invalid)
|
|
const;
|
|
static int referenceQuantity(Quantity);
|
|
const XcharMap<unsigned char> &markupScanTable() const;
|
|
Boolean multicode() const;
|
|
void addEntity(const StringC &, Char);
|
|
size_t nEntities() const;
|
|
const StringC &entityName(size_t) const;
|
|
Char entityChar(size_t) const;
|
|
private:
|
|
void subst(Char, Char);
|
|
void checkUnivControlChar(UnivChar univChar,
|
|
const CharsetInfo &docCharset,
|
|
const ::SP_NAMESPACE_SCOPE Syntax *otherSyntax,
|
|
ISet<WideChar> &invalid) const;
|
|
|
|
ISet<Char> shunchar_;
|
|
PackedBoolean shuncharControls_;
|
|
ISet<Char> set_[nSet];
|
|
Char standardFunction_[3];
|
|
PackedBoolean standardFunctionValid_[3];
|
|
Boolean namecaseGeneral_;
|
|
Boolean namecaseEntity_;
|
|
StringC delimGeneral_[nDelimGeneral];
|
|
Vector<StringC> delimShortrefComplex_;
|
|
ISet<Char> delimShortrefSimple_;
|
|
StringC names_[nNames];
|
|
Number quantity_[nQuantity];
|
|
HashTable<StringC,int> nameTable_;
|
|
HashTable<StringC,Char> functionTable_;
|
|
SubstTable upperSubst_;
|
|
SubstTable identitySubst_;
|
|
const SubstTable *generalSubst_;
|
|
const SubstTable *entitySubst_;
|
|
XcharMap<unsigned char> categoryTable_;
|
|
Boolean multicode_;
|
|
XcharMap<unsigned char> markupScanTable_;
|
|
Boolean hasMarkupScanTable_;
|
|
Vector<StringC> entityNames_;
|
|
StringC entityChars_;
|
|
static const int referenceQuantity_[];
|
|
};
|
|
|
|
inline Number Syntax::quantity(Quantity q) const
|
|
{
|
|
return quantity_[q];
|
|
}
|
|
|
|
inline void Syntax::setQuantity(int i, Number n)
|
|
{
|
|
quantity_[i] = n;
|
|
}
|
|
|
|
inline const SubstTable *Syntax::generalSubstTable() const
|
|
{
|
|
return generalSubst_;
|
|
}
|
|
|
|
inline const SubstTable *Syntax::entitySubstTable() const
|
|
{
|
|
return entitySubst_;
|
|
}
|
|
|
|
inline int Syntax::nDelimShortrefComplex() const
|
|
{
|
|
return int(delimShortrefComplex_.size());
|
|
}
|
|
|
|
inline const StringC &Syntax::delimGeneral(int i) const
|
|
{
|
|
return delimGeneral_[i];
|
|
}
|
|
|
|
inline const StringC &Syntax::delimShortrefComplex(size_t i) const
|
|
{
|
|
return delimShortrefComplex_[i];
|
|
}
|
|
|
|
inline const ISet<Char> &Syntax::delimShortrefSimple() const
|
|
{
|
|
return delimShortrefSimple_;
|
|
}
|
|
|
|
inline Boolean Syntax::hasShortrefs() const
|
|
{
|
|
return delimShortrefComplex_.size() > 0 || !delimShortrefSimple_.isEmpty();
|
|
}
|
|
|
|
inline Char Syntax::standardFunction(int i) const
|
|
{
|
|
return standardFunction_[i];
|
|
}
|
|
|
|
inline Boolean Syntax::getStandardFunction(int i, Char &result) const
|
|
{
|
|
if (standardFunctionValid_[i]) {
|
|
result = standardFunction_[i];
|
|
return 1;
|
|
}
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
inline const ISet<Char> *Syntax::charSet(int i) const
|
|
{
|
|
return &set_[i];
|
|
}
|
|
|
|
inline Boolean Syntax::isNameCharacter(Xchar c) const
|
|
{
|
|
return categoryTable_[c] >= nameStartCategory;
|
|
}
|
|
|
|
inline Boolean Syntax::isNameStartCharacter(Xchar c) const
|
|
{
|
|
return categoryTable_[c] == nameStartCategory;
|
|
}
|
|
|
|
inline Boolean Syntax::isDigit(Xchar c) const
|
|
{
|
|
return categoryTable_[c] == digitCategory;
|
|
}
|
|
|
|
inline Boolean Syntax::isS(Xchar c) const
|
|
{
|
|
return categoryTable_[c] == sCategory;
|
|
}
|
|
|
|
inline Boolean Syntax::isB(Xchar c) const
|
|
{
|
|
return (categoryTable_[c] == sCategory
|
|
&& !(standardFunctionValid_[fRE] && c == standardFunction_[fRE])
|
|
&& !(standardFunctionValid_[fRS] && c == standardFunction_[fRS]));
|
|
}
|
|
|
|
inline Syntax::Category Syntax::charCategory(Xchar c) const
|
|
{
|
|
return Category(categoryTable_[c]);
|
|
}
|
|
|
|
inline Boolean Syntax::isSgmlChar(Xchar c) const
|
|
{
|
|
return c >= 0 && set_[sgmlChar].contains(Char(c));
|
|
}
|
|
|
|
inline const StringC &Syntax::reservedName(ReservedName i) const
|
|
{
|
|
return names_[i];
|
|
}
|
|
|
|
inline size_t Syntax::attcnt() const
|
|
{
|
|
return quantity(Syntax::qATTCNT);
|
|
}
|
|
|
|
inline size_t Syntax::attsplen() const
|
|
{
|
|
return quantity(Syntax::qATTSPLEN);
|
|
}
|
|
|
|
inline size_t Syntax::namelen() const
|
|
{
|
|
return quantity(Syntax::qNAMELEN);
|
|
}
|
|
|
|
inline size_t Syntax::penamelen() const
|
|
{
|
|
return quantity(Syntax::qNAMELEN) - delimGeneral(Syntax::dPERO).size();
|
|
}
|
|
|
|
inline size_t Syntax::litlen() const
|
|
{
|
|
return quantity(Syntax::qLITLEN);
|
|
}
|
|
|
|
inline size_t Syntax::normsep() const
|
|
{
|
|
return quantity(Syntax::qNORMSEP);
|
|
}
|
|
|
|
inline size_t Syntax::dtemplen() const
|
|
{
|
|
return quantity(Syntax::qDTEMPLEN);
|
|
}
|
|
|
|
inline size_t Syntax::grpcnt() const
|
|
{
|
|
return quantity(Syntax::qGRPCNT);
|
|
}
|
|
|
|
inline size_t Syntax::grpgtcnt() const
|
|
{
|
|
return quantity(Syntax::qGRPGTCNT);
|
|
}
|
|
|
|
inline size_t Syntax::grplvl() const
|
|
{
|
|
return quantity(Syntax::qGRPLVL);
|
|
}
|
|
|
|
inline size_t Syntax::taglvl() const
|
|
{
|
|
return quantity(Syntax::qTAGLVL);
|
|
}
|
|
|
|
inline size_t Syntax::taglen() const
|
|
{
|
|
return quantity(Syntax::qTAGLEN);
|
|
}
|
|
|
|
inline size_t Syntax::entlvl() const
|
|
{
|
|
return quantity(Syntax::qENTLVL);
|
|
}
|
|
|
|
inline size_t Syntax::pilen() const
|
|
{
|
|
return quantity(Syntax::qPILEN);
|
|
}
|
|
|
|
inline Char Syntax::space() const
|
|
{
|
|
return standardFunction(Syntax::fSPACE);
|
|
}
|
|
|
|
inline void Syntax::setSgmlChar(const ISet<Char> &set)
|
|
{
|
|
set_[sgmlChar] = set;
|
|
}
|
|
|
|
inline int Syntax::referenceQuantity(Quantity i)
|
|
{
|
|
return referenceQuantity_[i];
|
|
}
|
|
|
|
inline void Syntax::setShuncharControls()
|
|
{
|
|
shuncharControls_ = 1;
|
|
}
|
|
|
|
inline const XcharMap<unsigned char> &Syntax::markupScanTable() const
|
|
{
|
|
return markupScanTable_;
|
|
}
|
|
|
|
inline Boolean Syntax::multicode() const
|
|
{
|
|
return multicode_;
|
|
}
|
|
|
|
inline Boolean Syntax::namecaseGeneral() const
|
|
{
|
|
return namecaseGeneral_;
|
|
}
|
|
|
|
inline Boolean Syntax::namecaseEntity() const
|
|
{
|
|
return namecaseEntity_;
|
|
}
|
|
|
|
inline size_t Syntax::nEntities() const
|
|
{
|
|
return entityNames_.size();
|
|
}
|
|
|
|
inline const StringC &Syntax::entityName(size_t i) const
|
|
{
|
|
return entityNames_[i];
|
|
}
|
|
|
|
inline Char Syntax::entityChar(size_t i) const
|
|
{
|
|
return entityChars_[i];
|
|
}
|
|
|
|
#ifdef SP_NAMESPACE
|
|
}
|
|
#endif
|
|
|
|
#endif /* Syntax_INCLUDED */
|