actual/packages/node-libofx/OpenSP-1.5.2/lib/UTF8CodingSystem.cxx

// Copyright (c) 1994 James Clark
// See the file COPYING for copying permission.

#include "splib.h"

#ifdef SP_MULTI_BYTE

#include "UTF8CodingSystem.h"
#include "constant.h"

#ifdef SP_NAMESPACE
namespace SP_NAMESPACE {
#endif

enum {
  // cmaskN is mask for first byte to test for N byte sequence
  cmask1 = 0x80,
  cmask2 = 0xe0,
  cmask3 = 0xf0,
  cmask4 = 0xf8,
  cmask5 = 0xfc,
  cmask6 = 0xfe,
  // cvalN is value of masked first byte of N byte sequence
  cval1 = 0x00,
  cval2 = 0xc0,
  cval3 = 0xe0,
  cval4 = 0xf0,
  cval5 = 0xf8,
  cval6 = 0xfc,
  // vmaskN is mask to get value from first byte in N byte sequence
  vmask2 = 0x1f,
  vmask3 = 0xf,
  vmask4 = 0x7,
  vmask5 = 0x3,
  vmask6 = 0x1,
  // minN is minimum legal resulting value for N byte sequence
  min2 = 0x80,
  min3 = 0x800,
  min4 = 0x10000,
  min5 = 0x200000,
  min6 = 0x4000000,
  max6 = 0x7fffffff
};

class UTF8Decoder : public Decoder {
public:
  UTF8Decoder();
  size_t decode(Char *, const char *, size_t, const char **);
  Boolean convertOffset(unsigned long &offset) const;
private:
  // value for encoding error
  enum { invalid = 0xfffd };
  Boolean recovering_;
  PackedBoolean hadFirstChar_;
  PackedBoolean hadByteOrderMark_;
};

class UTF8Encoder : public Encoder {
public:
  UTF8Encoder();
  void output(const Char *, size_t, OutputByteStream *);
};

Decoder *UTF8CodingSystem::makeDecoder() const
{
  return new UTF8Decoder;
}

Encoder *UTF8CodingSystem::makeEncoder() const
{
  return new UTF8Encoder;
}


UTF8Decoder::UTF8Decoder()
: recovering_(0), hadFirstChar_(0), hadByteOrderMark_(0)
{
}

size_t UTF8Decoder::decode(Char *to, const char *s,
			  size_t slen, const char **result)
{
  // Check for byte-order mark
  if (!hadFirstChar_ && slen >= 3) {
    hadFirstChar_ = 1;

    if ((unsigned char)s[0] == 0xEF &&
        (unsigned char)s[1] == 0xBB &&
        (unsigned char)s[2] == 0xBF) {
      s += 3;
      slen -= 3;
      hadByteOrderMark_ = 1;
    }
  }
  Char *start = to;
  const unsigned char *us = (const unsigned char *)s;
  if (recovering_) {
    recovering_ = 0;
    goto recover;
  }
  while (slen > 0) {
    unsigned c0;
    c0 = us[0];
    if ((c0 & cmask1) == cval1) {
      *to++ = c0;
      us++;
      slen--;
    }
    else if ((c0 & cmask2) == cval2) {
      if (slen < 2)
	goto done;
      unsigned c1 = us[1] ^ 0x80;
      if (c1 & 0xc0)
	goto error;
      unsigned c = ((c0 & vmask2) << 6) | c1;
      if (c < min2)
	c = invalid;
      *to++ = c;
      slen -= 2;
      us += 2;
    }
    else if ((c0 & cmask3) == cval3) {
      if (slen < 3)
	goto done;
      unsigned c1 = us[1] ^ 0x80;
      unsigned c2 = us[2] ^ 0x80;
      if ((c1 | c2) & 0xc0)
	goto error;
      unsigned c = ((((c0 & vmask3) << 6) | c1) << 6) | c2;
      if (c < min3)
	c = invalid;
      *to++ = c;
      slen -= 3;
      us += 3;
    }
    else if ((c0 & cmask4) == cval4) {
      if (slen < 4)
	goto done;
      unsigned c1 = us[1] ^ 0x80;
      unsigned c2 = us[2] ^ 0x80;
      unsigned c3 = us[3] ^ 0x80;
      if ((c1 | c2 | c3) & 0xc0)
	goto error;
      if (charMax < min5 - 1)
	*to++ = invalid;
      else {
	unsigned long c = ((((c0 & vmask4) << 6) | c1) << 6) | c2;
	c = (c << 6) | c3;
	if (c < min4)
	  c = invalid;
	*to++ = c;
      }
      slen -= 4;
      us += 4;
    }
    else if ((c0 & cmask5) == cval5) {
      if (slen < 5)
	goto done;
      unsigned c1 = us[1] ^ 0x80;
      unsigned c2 = us[2] ^ 0x80;
      unsigned c3 = us[3] ^ 0x80;
      unsigned c4 = us[4] ^ 0x80;
      if ((c1 | c2 | c3 | c4) & 0xc0)
	goto error;
      if (charMax < min6 - 1)
	*to++ = invalid;
      else {
	unsigned long c = ((((c0 & vmask5) << 6) | c1) << 6) | c2;
	c = (((c << 6) | c3) << 6) | c4;
	if (c < min5)
	  c = invalid;
	*to++ = c;
      }
      slen -= 5;
      us += 5;
    }
    else if ((c0 & cmask6) == cval6) {
      if (slen < 6)
	goto done;
      unsigned c1 = us[1] ^ 0x80;
      unsigned c2 = us[2] ^ 0x80;
      unsigned c3 = us[3] ^ 0x80;
      unsigned c4 = us[4] ^ 0x80;
      unsigned c5 = us[5] ^ 0x80;
      if ((c1 | c2 | c3 | c4 | c5) & 0xc0)
	goto error;
      if (charMax < max6)
	*to++ = invalid;
      else {
	unsigned long c = ((((c0 & vmask6) << 6) | c1) << 6) | c2;
	c = (((((c << 6) | c3) << 6) | c4) << 6) | c5;
	if (c < min6)
	  c = invalid;
	*to++ = c;
      }
      slen -= 6;
      us += 6;
    }
    else {
    error:
      us++;
      slen--;
      *to++ = invalid;
    recover:
      for (;;) {
	if (slen == 0) {
	  recovering_ = 1;
	  goto done;
	}
	if ((*us & 0xc0) != 0x80)
	  break;
	us++;
	slen--;
      }
    }
  }
 done:
  *result = (char *)us;
  return to - start;
}

Boolean UTF8Decoder::convertOffset(unsigned long &n) const
{
  if (hadByteOrderMark_)
    n += 3;

  return true;
}

UTF8Encoder::UTF8Encoder()
{
}

void UTF8Encoder::output(const Char *s, size_t n, OutputByteStream *sb)
{
  for (; n > 0; s++, n--) {
    Char c = *s;
    if (c < min2)
      sb->sputc((unsigned char)c);
    else if (c < min3) {
      sb->sputc((c >> 6) | cval2);
      sb->sputc((c & 0x3f) | 0x80);
    }
    else if (c < min4) {
      sb->sputc((c >> 12) | cval3);
      sb->sputc(((c >> 6) & 0x3f) | 0x80);
      sb->sputc((c & 0x3f) | 0x80);
    }
    else if (c < min5) {
      sb->sputc((c >> 18) | cval4);
      sb->sputc(((c >> 12) & 0x3f) | 0x80);
      sb->sputc(((c >> 6) & 0x3f) | 0x80);
      sb->sputc((c & 0x3f) | 0x80);
    }
    else if (c < min6) {
      sb->sputc((c >> 24) | cval5);
      sb->sputc(((c >> 18) & 0x3f) | 0x80);
      sb->sputc(((c >> 12) & 0x3f) | 0x80);
      sb->sputc(((c >> 6) & 0x3f) | 0x80);
      sb->sputc((c & 0x3f) | 0x80);
    }
    else if (c <= max6) {
      sb->sputc((c >> 30) | cval6);
      sb->sputc(((c >> 24) & 0x3f) | 0x80);
      sb->sputc(((c >> 18) & 0x3f) | 0x80);
      sb->sputc(((c >> 12) & 0x3f) | 0x80);
      sb->sputc(((c >> 6) & 0x3f) | 0x80);
      sb->sputc((c & 0x3f) | 0x80);
    }
  }
}
#ifdef SP_NAMESPACE
}
#endif

#else /* not SP_MULTI_BYTE */

#ifndef __GNUG__
static char non_empty_translation_unit;	// sigh
#endif

#endif /* not SP_MULTI_BYTE */
Initial (open-source) 2022-04-29 02:44:38 +00:00			`// Copyright (c) 1994 James Clark`
			`// See the file COPYING for copying permission.`

			`#include "splib.h"`

			`#ifdef SP_MULTI_BYTE`

			`#include "UTF8CodingSystem.h"`
			`#include "constant.h"`

			`#ifdef SP_NAMESPACE`
			`namespace SP_NAMESPACE {`
			`#endif`

			`enum {`
			`// cmaskN is mask for first byte to test for N byte sequence`
			`cmask1 = 0x80,`
			`cmask2 = 0xe0,`
			`cmask3 = 0xf0,`
			`cmask4 = 0xf8,`
			`cmask5 = 0xfc,`
			`cmask6 = 0xfe,`
			`// cvalN is value of masked first byte of N byte sequence`
			`cval1 = 0x00,`
			`cval2 = 0xc0,`
			`cval3 = 0xe0,`
			`cval4 = 0xf0,`
			`cval5 = 0xf8,`
			`cval6 = 0xfc,`
			`// vmaskN is mask to get value from first byte in N byte sequence`
			`vmask2 = 0x1f,`
			`vmask3 = 0xf,`
			`vmask4 = 0x7,`
			`vmask5 = 0x3,`
			`vmask6 = 0x1,`
			`// minN is minimum legal resulting value for N byte sequence`
			`min2 = 0x80,`
			`min3 = 0x800,`
			`min4 = 0x10000,`
			`min5 = 0x200000,`
			`min6 = 0x4000000,`
			`max6 = 0x7fffffff`
			`};`

			`class UTF8Decoder : public Decoder {`
			`public:`
			`UTF8Decoder();`
			`size_t decode(Char , const char , size_t, const char **);`
			`Boolean convertOffset(unsigned long &offset) const;`
			`private:`
			`// value for encoding error`
			`enum { invalid = 0xfffd };`
			`Boolean recovering_;`
			`PackedBoolean hadFirstChar_;`
			`PackedBoolean hadByteOrderMark_;`
			`};`

			`class UTF8Encoder : public Encoder {`
			`public:`
			`UTF8Encoder();`
			`void output(const Char , size_t, OutputByteStream );`
			`};`

			`Decoder *UTF8CodingSystem::makeDecoder() const`
			`{`
			`return new UTF8Decoder;`
			`}`

			`Encoder *UTF8CodingSystem::makeEncoder() const`
			`{`
			`return new UTF8Encoder;`
			`}`


			`UTF8Decoder::UTF8Decoder()`
			`: recovering_(0), hadFirstChar_(0), hadByteOrderMark_(0)`
			`{`
			`}`

			`size_t UTF8Decoder::decode(Char to, const char s,`
			`size_t slen, const char **result)`
			`{`
			`// Check for byte-order mark`
			`if (!hadFirstChar_ && slen >= 3) {`
			`hadFirstChar_ = 1;`

			`if ((unsigned char)s[0] == 0xEF &&`
			`(unsigned char)s[1] == 0xBB &&`
			`(unsigned char)s[2] == 0xBF) {`
			`s += 3;`
			`slen -= 3;`
			`hadByteOrderMark_ = 1;`
			`}`
			`}`
			`Char *start = to;`
			`const unsigned char us = (const unsigned char )s;`
			`if (recovering_) {`
			`recovering_ = 0;`
			`goto recover;`
			`}`
			`while (slen > 0) {`
			`unsigned c0;`
			`c0 = us[0];`
			`if ((c0 & cmask1) == cval1) {`
			`*to++ = c0;`
			`us++;`
			`slen--;`
			`}`
			`else if ((c0 & cmask2) == cval2) {`
			`if (slen < 2)`
			`goto done;`
			`unsigned c1 = us[1] ^ 0x80;`
			`if (c1 & 0xc0)`
			`goto error;`
			`unsigned c = ((c0 & vmask2) << 6) \| c1;`
			`if (c < min2)`
			`c = invalid;`
			`*to++ = c;`
			`slen -= 2;`
			`us += 2;`
			`}`
			`else if ((c0 & cmask3) == cval3) {`
			`if (slen < 3)`
			`goto done;`
			`unsigned c1 = us[1] ^ 0x80;`
			`unsigned c2 = us[2] ^ 0x80;`
			`if ((c1 \| c2) & 0xc0)`
			`goto error;`
			`unsigned c = ((((c0 & vmask3) << 6) \| c1) << 6) \| c2;`
			`if (c < min3)`
			`c = invalid;`
			`*to++ = c;`
			`slen -= 3;`
			`us += 3;`
			`}`
			`else if ((c0 & cmask4) == cval4) {`
			`if (slen < 4)`
			`goto done;`
			`unsigned c1 = us[1] ^ 0x80;`
			`unsigned c2 = us[2] ^ 0x80;`
			`unsigned c3 = us[3] ^ 0x80;`
			`if ((c1 \| c2 \| c3) & 0xc0)`
			`goto error;`
			`if (charMax < min5 - 1)`
			`*to++ = invalid;`
			`else {`
			`unsigned long c = ((((c0 & vmask4) << 6) \| c1) << 6) \| c2;`
			`c = (c << 6) \| c3;`
			`if (c < min4)`
			`c = invalid;`
			`*to++ = c;`
			`}`
			`slen -= 4;`
			`us += 4;`
			`}`
			`else if ((c0 & cmask5) == cval5) {`
			`if (slen < 5)`
			`goto done;`
			`unsigned c1 = us[1] ^ 0x80;`
			`unsigned c2 = us[2] ^ 0x80;`
			`unsigned c3 = us[3] ^ 0x80;`
			`unsigned c4 = us[4] ^ 0x80;`
			`if ((c1 \| c2 \| c3 \| c4) & 0xc0)`
			`goto error;`
			`if (charMax < min6 - 1)`
			`*to++ = invalid;`
			`else {`
			`unsigned long c = ((((c0 & vmask5) << 6) \| c1) << 6) \| c2;`
			`c = (((c << 6) \| c3) << 6) \| c4;`
			`if (c < min5)`
			`c = invalid;`
			`*to++ = c;`
			`}`
			`slen -= 5;`
			`us += 5;`
			`}`
			`else if ((c0 & cmask6) == cval6) {`
			`if (slen < 6)`
			`goto done;`
			`unsigned c1 = us[1] ^ 0x80;`
			`unsigned c2 = us[2] ^ 0x80;`
			`unsigned c3 = us[3] ^ 0x80;`
			`unsigned c4 = us[4] ^ 0x80;`
			`unsigned c5 = us[5] ^ 0x80;`
			`if ((c1 \| c2 \| c3 \| c4 \| c5) & 0xc0)`
			`goto error;`
			`if (charMax < max6)`
			`*to++ = invalid;`
			`else {`
			`unsigned long c = ((((c0 & vmask6) << 6) \| c1) << 6) \| c2;`
			`c = (((((c << 6) \| c3) << 6) \| c4) << 6) \| c5;`
			`if (c < min6)`
			`c = invalid;`
			`*to++ = c;`
			`}`
			`slen -= 6;`
			`us += 6;`
			`}`
			`else {`
			`error:`
			`us++;`
			`slen--;`
			`*to++ = invalid;`
			`recover:`
			`for (;;) {`
			`if (slen == 0) {`
			`recovering_ = 1;`
			`goto done;`
			`}`
			`if ((*us & 0xc0) != 0x80)`
			`break;`
			`us++;`
			`slen--;`
			`}`
			`}`
			`}`
			`done:`
			`result = (char )us;`
			`return to - start;`
			`}`

			`Boolean UTF8Decoder::convertOffset(unsigned long &n) const`
			`{`
			`if (hadByteOrderMark_)`
			`n += 3;`

			`return true;`
			`}`

			`UTF8Encoder::UTF8Encoder()`
			`{`
			`}`

			`void UTF8Encoder::output(const Char s, size_t n, OutputByteStream sb)`
			`{`
			`for (; n > 0; s++, n--) {`
			`Char c = *s;`
			`if (c < min2)`
			`sb->sputc((unsigned char)c);`
			`else if (c < min3) {`
			`sb->sputc((c >> 6) \| cval2);`
			`sb->sputc((c & 0x3f) \| 0x80);`
			`}`
			`else if (c < min4) {`
			`sb->sputc((c >> 12) \| cval3);`
			`sb->sputc(((c >> 6) & 0x3f) \| 0x80);`
			`sb->sputc((c & 0x3f) \| 0x80);`
			`}`
			`else if (c < min5) {`
			`sb->sputc((c >> 18) \| cval4);`
			`sb->sputc(((c >> 12) & 0x3f) \| 0x80);`
			`sb->sputc(((c >> 6) & 0x3f) \| 0x80);`
			`sb->sputc((c & 0x3f) \| 0x80);`
			`}`
			`else if (c < min6) {`
			`sb->sputc((c >> 24) \| cval5);`
			`sb->sputc(((c >> 18) & 0x3f) \| 0x80);`
			`sb->sputc(((c >> 12) & 0x3f) \| 0x80);`
			`sb->sputc(((c >> 6) & 0x3f) \| 0x80);`
			`sb->sputc((c & 0x3f) \| 0x80);`
			`}`
			`else if (c <= max6) {`
			`sb->sputc((c >> 30) \| cval6);`
			`sb->sputc(((c >> 24) & 0x3f) \| 0x80);`
			`sb->sputc(((c >> 18) & 0x3f) \| 0x80);`
			`sb->sputc(((c >> 12) & 0x3f) \| 0x80);`
			`sb->sputc(((c >> 6) & 0x3f) \| 0x80);`
			`sb->sputc((c & 0x3f) \| 0x80);`
			`}`
			`}`
			`}`
			`#ifdef SP_NAMESPACE`
			`}`
			`#endif`

			`#else /* not SP_MULTI_BYTE */`

			`#ifndef __GNUG__`
			`static char non_empty_translation_unit; // sigh`
			`#endif`

			`#endif /* not SP_MULTI_BYTE */`