210 lines
5 KiB
C++
210 lines
5 KiB
C++
|
// Copyright (c) 1997 James Clark
|
||
|
// See the file COPYING for copying permission.
|
||
|
|
||
|
#include "splib.h"
|
||
|
|
||
|
#include "TranslateCodingSystem.h"
|
||
|
#include "types.h"
|
||
|
#include "Owner.h"
|
||
|
|
||
|
#ifdef SP_NAMESPACE
|
||
|
namespace SP_NAMESPACE {
|
||
|
#endif
|
||
|
|
||
|
class TranslateDecoder : public Decoder {
|
||
|
public:
|
||
|
TranslateDecoder(Decoder *, const ConstPtr<CharMapResource<Char> > &);
|
||
|
size_t decode(Char *, const char *, size_t, const char **);
|
||
|
Boolean convertOffset(unsigned long &offset) const;
|
||
|
private:
|
||
|
Owner<Decoder> decoder_;
|
||
|
ConstPtr<CharMapResource<Char> > map_;
|
||
|
};
|
||
|
|
||
|
TranslateDecoder::TranslateDecoder(Decoder *decoder,
|
||
|
const ConstPtr<CharMapResource<Char> > &map)
|
||
|
: Decoder(decoder->minBytesPerChar()), decoder_(decoder), map_(map)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
|
||
|
Boolean TranslateDecoder::convertOffset(unsigned long &offset) const
|
||
|
{
|
||
|
return decoder_->convertOffset(offset);
|
||
|
}
|
||
|
|
||
|
size_t TranslateDecoder::decode(Char *to, const char *s,
|
||
|
size_t slen, const char **rest)
|
||
|
{
|
||
|
size_t n = decoder_->decode(to, s, slen, rest);
|
||
|
for (size_t i = 0; i < n; i++)
|
||
|
to[i] = (*map_)[to[i]];
|
||
|
return n;
|
||
|
}
|
||
|
|
||
|
// FIXME set unencodeable handler for underlying encoder
|
||
|
|
||
|
class TranslateEncoder : public RecoveringEncoder {
|
||
|
public:
|
||
|
TranslateEncoder(Encoder *, const ConstPtr<CharMapResource<Char> > &map,
|
||
|
Char illegalChar);
|
||
|
void output(const Char *, size_t, OutputByteStream *);
|
||
|
void output(Char *, size_t, OutputByteStream *);
|
||
|
void startFile(OutputByteStream *);
|
||
|
private:
|
||
|
Owner<Encoder> encoder_;
|
||
|
ConstPtr<CharMapResource<Char> > map_;
|
||
|
Char illegalChar_;
|
||
|
enum { bufSize = 256 };
|
||
|
Char buf_[bufSize];
|
||
|
};
|
||
|
|
||
|
TranslateEncoder::TranslateEncoder(Encoder *encoder,
|
||
|
const ConstPtr<CharMapResource<Char> > &map,
|
||
|
Char illegalChar)
|
||
|
: encoder_(encoder), map_(map), illegalChar_(illegalChar)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
void TranslateEncoder::startFile(OutputByteStream *sbuf)
|
||
|
{
|
||
|
encoder_->startFile(sbuf);
|
||
|
}
|
||
|
|
||
|
void TranslateEncoder::output(const Char *s, size_t n, OutputByteStream *sbuf)
|
||
|
{
|
||
|
size_t j = 0;
|
||
|
for (; n > 0; s++, n--) {
|
||
|
Char c = (*map_)[*s];
|
||
|
if (c == illegalChar_) {
|
||
|
if (j > 0) {
|
||
|
encoder_->output(buf_, j, sbuf);
|
||
|
j = 0;
|
||
|
}
|
||
|
handleUnencodable(*s, sbuf);
|
||
|
}
|
||
|
else {
|
||
|
if (j >= bufSize) {
|
||
|
encoder_->output(buf_, j, sbuf);
|
||
|
j = 0;
|
||
|
}
|
||
|
buf_[j++] = c;
|
||
|
}
|
||
|
}
|
||
|
if (j > 0)
|
||
|
encoder_->output(buf_, j, sbuf);
|
||
|
}
|
||
|
|
||
|
void TranslateEncoder::output(Char *s, size_t n, OutputByteStream *sbuf)
|
||
|
{
|
||
|
size_t i = 0;
|
||
|
for (;;) {
|
||
|
if (i == n) {
|
||
|
if (n > 0)
|
||
|
encoder_->output(s, n, sbuf);
|
||
|
break;
|
||
|
}
|
||
|
Char c = (*map_)[s[i]];
|
||
|
if (c == illegalChar_) {
|
||
|
if (i > 0)
|
||
|
encoder_->output(s, i, sbuf);
|
||
|
handleUnencodable(s[i], sbuf);
|
||
|
i++;
|
||
|
s += i;
|
||
|
n -= i;
|
||
|
i = 0;
|
||
|
}
|
||
|
else
|
||
|
s[i++] = c;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
TranslateCodingSystem::TranslateCodingSystem(const CodingSystem *sub,
|
||
|
const Desc *desc,
|
||
|
const CharsetInfo *charset,
|
||
|
Char illegalChar,
|
||
|
Char replacementChar)
|
||
|
: sub_(sub),
|
||
|
desc_(desc),
|
||
|
charset_(charset),
|
||
|
illegalChar_(illegalChar),
|
||
|
replacementChar_(replacementChar)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
Decoder *TranslateCodingSystem::makeDecoder() const
|
||
|
{
|
||
|
if (decodeMap_.isNull()) {
|
||
|
CharMapResource<Char> *map = new CharMapResource<Char>(replacementChar_);
|
||
|
*(ConstPtr<CharMapResource<Char> > *)&decodeMap_ = map;
|
||
|
for (const Desc *d = desc_; d->number != CharsetRegistry::UNREGISTERED; d++) {
|
||
|
Owner<CharsetRegistry::Iter> iter(CharsetRegistry::makeIter(d->number));
|
||
|
if (iter) {
|
||
|
WideChar min;
|
||
|
WideChar max;
|
||
|
UnivChar univ;
|
||
|
while (iter->next(min, max, univ)) {
|
||
|
do {
|
||
|
ISet<WideChar> set;
|
||
|
WideChar sysChar;
|
||
|
WideChar count;
|
||
|
int n = charset_->univToDesc(univ, sysChar, set, count);
|
||
|
if (count > (max - min) + 1)
|
||
|
count = (max - min) + 1;
|
||
|
if (n) {
|
||
|
for (WideChar i = 0; i < count; i++)
|
||
|
map->setChar(min + d->add + i, sysChar + i);
|
||
|
}
|
||
|
min += count - 1;
|
||
|
univ += count;
|
||
|
} while (min++ != max);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return new TranslateDecoder(sub_->makeDecoder(), decodeMap_);
|
||
|
}
|
||
|
|
||
|
Encoder *TranslateCodingSystem::makeEncoder() const
|
||
|
{
|
||
|
if (encodeMap_.isNull()) {
|
||
|
CharMapResource<Char> *map = new CharMapResource<Char>(illegalChar_);
|
||
|
*(ConstPtr<CharMapResource<Char> > *)&encodeMap_ = map;
|
||
|
for (const Desc *d = desc_; d->number != CharsetRegistry::UNREGISTERED; d++) {
|
||
|
Owner<CharsetRegistry::Iter> iter(CharsetRegistry::makeIter(d->number));
|
||
|
if (iter) {
|
||
|
WideChar min;
|
||
|
WideChar max;
|
||
|
UnivChar univ;
|
||
|
while (iter->next(min, max, univ)) {
|
||
|
do {
|
||
|
ISet<WideChar> set;
|
||
|
WideChar sysChar;
|
||
|
WideChar count;
|
||
|
int n = charset_->univToDesc(univ, sysChar, set, count);
|
||
|
if (count > (max - min) + 1)
|
||
|
count = (max - min) + 1;
|
||
|
if (n) {
|
||
|
for (WideChar i = 0; i < count; i++)
|
||
|
map->setChar(sysChar + i, min + d->add + i);
|
||
|
}
|
||
|
min += count - 1;
|
||
|
univ += count;
|
||
|
} while (min++ != max);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return new TranslateEncoder(sub_->makeEncoder(), encodeMap_, illegalChar_);
|
||
|
}
|
||
|
|
||
|
unsigned TranslateCodingSystem::fixedBytesPerChar() const
|
||
|
{
|
||
|
return sub_->fixedBytesPerChar();
|
||
|
}
|
||
|
|
||
|
#ifdef SP_NAMESPACE
|
||
|
}
|
||
|
#endif
|
||
|
|