// Copyright (c) 1994, 1997 James Clark // See the file COPYING for copying permission. #ifdef __GNUG__ #pragma implementation #endif #include "splib.h" #include "CharsetRegistry.h" #include "CharsetInfo.h" #include "StringC.h" #include "types.h" #include "macros.h" #ifdef SP_NAMESPACE namespace SP_NAMESPACE { #endif CharsetRegistry::Iter::~Iter() { } class CharsetRegistryRangeIter : public CharsetRegistry::Iter { public: CharsetRegistryRangeIter(const UnivCharsetDesc::Range *p, size_t n) : p_(p), n_(n) { } Boolean next(WideChar &min, WideChar &max, UnivChar &univ) { if (n_) { min = p_->descMin; max = p_->descMin + (p_->count - 1); univ = p_->univMin; p_++; n_--; return 1; } else return 0; } private: const UnivCharsetDesc::Range *p_; size_t n_; }; class CharsetRegistryDescIter : public CharsetRegistry::Iter { public: CharsetRegistryDescIter(const unsigned short *p) : p_(p + 2), n_(p[0]), c_(p[1]) { } Boolean next(WideChar &min, WideChar &max, UnivChar &univ) { if (n_ == 0) { n_ = *p_; if (n_ == 0) return 0; p_++; c_ = *p_++; } int i = 1; for (; i < n_; i++) if (p_[i] != p_[i - 1] + 1) break; min = c_; max = min + (i - 1); univ = p_[0]; p_ += i; c_ += i; n_ -= i; return 1; } private: const unsigned short *p_; size_t n_; WideChar c_; }; static struct { const char *esc; CharsetRegistry::ISORegistrationNumber number; } escTable[] = { { "\x1B\x25\x40", CharsetRegistry::ISO646_ASCII_G0 }, { "\x1B\x28\x40", CharsetRegistry::ISO646_ASCII_G0 }, { "\x1B\x28\x42", CharsetRegistry::ISO646_ASCII_G0 }, // ASCII { "\x1B\x21\x40", CharsetRegistry::ISO646_C0 }, { "\x1B\x2D\x41", CharsetRegistry::ISO8859_1 }, { "\x1B\x2D\x42", CharsetRegistry::ISO8859_2 }, { "\x1B\x2D\x43", CharsetRegistry::ISO8859_3 }, { "\x1B\x2D\x44", CharsetRegistry::ISO8859_4 }, { "\x1B\x2D\x4C", CharsetRegistry::ISO8859_5 }, { "\x1B\x2D\x47", CharsetRegistry::ISO8859_6 }, { "\x1B\x2D\x46", CharsetRegistry::ISO8859_7 }, { "\x1B\x2D\x48", CharsetRegistry::ISO8859_8 }, { "\x1B\x2D\x4D", CharsetRegistry::ISO8859_9 }, { "\x1B\x28\x4A", CharsetRegistry::ISO646_JIS_G0 }, { "\x1B\x28\x49", CharsetRegistry::JIS0201 }, { "\x1B\x24\x42", CharsetRegistry::JIS0208 }, { "\x1B\x26\x40\x1B\x24\x42", CharsetRegistry::JIS0208 }, { "\x1B\x24\x28\x44", CharsetRegistry::JIS0212 }, { "\x1B\x24\x41", CharsetRegistry::GB2312 }, { "\x1B\x24\x28\x43", CharsetRegistry::KSC5601 }, { "\x1B\x25\x2F\x40", CharsetRegistry::ISO10646_UCS2 }, { "\x1B\x25\x2F\x41", CharsetRegistry::ISO10646_UCS4 }, { "\x1B\x25\x2F\x43", CharsetRegistry::ISO10646_UCS2 }, { "\x1B\x25\x2F\x44", CharsetRegistry::ISO10646_UCS4 }, { "\x1B\x25\x2F\x45", CharsetRegistry::ISO10646_UCS2 }, { "\x1B\x25\x2F\x46", CharsetRegistry::ISO10646_UCS4 }, }; static const UnivCharsetDesc::Range iso646_ascii[] = { { 0, 128, 0 }, }; static const UnivCharsetDesc::Range iso646_C0[] = { { 0, 32, 0 }, { 127, 1, 127 }, }; static const UnivCharsetDesc::Range iso6429[] = { { 0, 32, 128 }, }; static const UnivCharsetDesc::Range iso8859_1[] = { { 32, 96, 160 }, }; static const UnivCharsetDesc::Range iso10646_ucs2[] = { { 0, 65536, 0 }, }; static const UnivCharsetDesc::Range iso10646_ucs4[] = { { 0, 0x80000000, 0 }, }; static struct { CharsetRegistry::ISORegistrationNumber number; const UnivCharsetDesc::Range *ranges; size_t nRanges; } rangeTable[] = { { CharsetRegistry::ISO646_ASCII_G0, iso646_ascii, SIZEOF(iso646_ascii) }, { CharsetRegistry::ISO646_C0, iso646_C0, SIZEOF(iso646_C0) }, { CharsetRegistry::ISO6429, iso6429, SIZEOF(iso6429) }, { CharsetRegistry::ISO8859_1, iso8859_1, SIZEOF(iso8859_1) }, { CharsetRegistry::ISO10646_UCS2, iso10646_ucs2, SIZEOF(iso10646_ucs2) }, { CharsetRegistry::ISO10646_UCS4, iso10646_ucs4, SIZEOF(iso10646_ucs4) }, }; static const unsigned short iso8859_2[] = { #include "iso8859-2.h" }; static const unsigned short iso8859_3[] = { #include "iso8859-3.h" }; static const unsigned short iso8859_4[] = { #include "iso8859-4.h" }; static const unsigned short iso8859_5[] = { #include "iso8859-5.h" }; static const unsigned short iso8859_6[] = { #include "iso8859-6.h" }; static const unsigned short iso8859_7[] = { #include "iso8859-7.h" }; static const unsigned short iso8859_8[] = { #include "iso8859-8.h" }; static const unsigned short iso8859_9[] = { #include "iso8859-9.h" }; static const unsigned short koi8_r[] = { #include "koi8-r.h" }; static const unsigned short iso646_jis_G0[] = { #include "iso646-jis.h" }; static const unsigned short jis0201[] = { #include "jis0201.h" }; #ifdef SP_MULTI_BYTE static const unsigned short jis0208[] = { #include "jis0208.h" }; static const unsigned short jis0212[] = { #include "jis0212.h" }; static const unsigned short gb2312[] = { #include "gb2312.h" }; static const unsigned short ksc5601[] = { #include "ksc5601.h" }; static const unsigned short big5[] = { #include "big5.h" }; #endif /* SP_MULTI_BYTE */ static const struct { CharsetRegistry::ISORegistrationNumber number; const unsigned short *desc; } descTable[] = { { CharsetRegistry::ISO8859_2, iso8859_2 }, { CharsetRegistry::ISO8859_3, iso8859_3 }, { CharsetRegistry::ISO8859_4, iso8859_4 }, { CharsetRegistry::ISO8859_5, iso8859_5 }, { CharsetRegistry::ISO8859_6, iso8859_6 }, { CharsetRegistry::ISO8859_7, iso8859_7 }, { CharsetRegistry::ISO8859_8, iso8859_8 }, { CharsetRegistry::ISO8859_9, iso8859_9 }, { CharsetRegistry::KOI8_R, koi8_r }, { CharsetRegistry::ISO646_JIS_G0, iso646_jis_G0 }, { CharsetRegistry::JIS0201, jis0201 }, #ifdef SP_MULTI_BYTE { CharsetRegistry::JIS0208, jis0208 }, { CharsetRegistry::JIS0212, jis0212 }, { CharsetRegistry::GB2312, gb2312 }, { CharsetRegistry::KSC5601, ksc5601 }, { CharsetRegistry::BIG5, big5 }, #endif }; CharsetRegistry::ISORegistrationNumber CharsetRegistry::getRegistrationNumber(const StringC &sequence, const CharsetInfo &charset) { // Canonicalize the escape sequence by mapping esc -> ESC, // removing leading zeros from escape sequences, and removing // initial spaces. StringC s; for (size_t i = 0; i < sequence.size(); i++) { Char c = sequence[i]; if (c == charset.execToDesc('e')) s += charset.execToDesc('E'); else if (c == charset.execToDesc('s')) s += charset.execToDesc('S'); else if (c == charset.execToDesc('c')) s += charset.execToDesc('C'); else if (charset.digitWeight(c) >= 0 && s.size() > 0 && s[s.size() - 1] == charset.execToDesc('0') && (s.size() == 1 || charset.digitWeight(s[s.size() - 2]) >= 0)) s[s.size() - 1] = c; else if (c != charset.execToDesc(' ') || s.size() > 0) s += c; } for (size_t i = 0; i < SIZEOF(escTable); i++) { StringC esc; for (const char *p = escTable[i].esc; *p; p++) { if (*p == 0x1B) esc += charset.execToDesc("ESC"); else { static const char digits[] = "0123456789"; int c = (unsigned char)*p >> 4; if (c >= 10) esc += charset.execToDesc('1'); esc += charset.execToDesc(digits[c % 10]); esc += charset.execToDesc('/'); c = (*p & 0xf); if (c >= 10) esc += charset.execToDesc('1'); esc += charset.execToDesc(digits[c % 10]); } if (p[1]) esc += charset.execToDesc(' '); } if (s == esc) return escTable[i].number; } return UNREGISTERED; } CharsetRegistry::Iter *CharsetRegistry::makeIter(ISORegistrationNumber number) { for (size_t i = 0; i < SIZEOF(rangeTable); i++) { if (number == rangeTable[i].number) return new CharsetRegistryRangeIter(rangeTable[i].ranges, rangeTable[i].nRanges); } for (size_t i = 0; i < SIZEOF(descTable); i++) { if (number == descTable[i].number) return new CharsetRegistryDescIter(descTable[i].desc); } return 0; } #ifdef SP_NAMESPACE } #endif