/*
 * gstmpegtsdescriptor.c - 
 * Copyright (C) 2013 Edward Hervey
 * 
 * Authors:
 *   Edward Hervey <edward@collabora.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 */
#include <stdlib.h>
#include <string.h>

#include "mpegts.h"
#include "gstmpegts-private.h"

/**
 * SECTION:gstmpegtsdescriptor
 * @short_description: Convenience library for using MPEG-TS descriptors
 *
 * For more details, refer to the ITU H.222.0 or ISO/IEC 13818-1 specifications
 * and other specifications mentionned in the documentation.
 */

/*
 * TODO
 *
 * * Add common validation code for data presence and minimum/maximum expected
 *   size.
 * * Add parsing methods for the following descriptors that were previously 
 *   handled in mpegtsbase:
 *   * GST_MTS_DESC_DVB_DATA_BROADCAST_ID
 *   * GST_MTS_DESC_DVB_DATA_BROADCAST
 *   * GST_MTS_DESC_DVB_CAROUSEL_IDENTIFIER
 *   * GST_MTS_DESC_DVB_STREAM_IDENTIFIER
 *   * GST_MTS_DESC_DVB_EXTENDED_EVENT
 *   * GST_MTS_DESC_DVB_COMPONENT
 *   * GST_MTS_DESC_DVB_TERRESTRIAL_DELIVERY_SYSTEM
 *   * GST_MTS_DESC_DVB_FREQUENCY_LIST
 */

#define MAX_KNOWN_ICONV 25

static GIConv __iconvs[MAX_KNOWN_ICONV];

/* All these conversions will be to UTF8 */
typedef enum
{
  _ICONV_UNKNOWN = -1,
  _ICONV_ISO8859_1,
  _ICONV_ISO8859_2,
  _ICONV_ISO8859_3,
  _ICONV_ISO8859_4,
  _ICONV_ISO8859_5,
  _ICONV_ISO8859_6,
  _ICONV_ISO8859_7,
  _ICONV_ISO8859_8,
  _ICONV_ISO8859_9,
  _ICONV_ISO8859_10,
  _ICONV_ISO8859_11,
  _ICONV_ISO8859_12,
  _ICONV_ISO8859_13,
  _ICONV_ISO8859_14,
  _ICONV_ISO8859_15,
  _ICONV_ISO10646_UC2,
  _ICONV_EUC_KR,
  _ICONV_GB2312,
  _ICONV_UTF_16BE,
  _ICONV_ISO10646_UTF8,
  _ICONV_ISO6937,
  /* Insert more here if needed */
  _ICONV_MAX
} LocalIconvCode;

static const gchar *iconvtablename[] = {
  "iso-8859-1",
  "iso-8859-2",
  "iso-8859-3",
  "iso-8859-4",
  "iso-8859-5",
  "iso-8859-6",
  "iso-8859-7",
  "iso-8859-8",
  "iso-8859-9",
  "iso-8859-10",
  "iso-8859-11",
  "iso-8859-12",
  "iso-8859-13",
  "iso-8859-14",
  "iso-8859-15",
  "ISO-10646/UCS2",
  "EUC-KR",
  "GB2312",
  "UTF-16BE",
  "ISO-10646/UTF8",
  "iso6937"
      /* Insert more here if needed */
};

void
__initialize_descriptors (void)
{
  guint i;

  /* Initialize converters */
  /* FIXME : How/when should we close them ??? */
  for (i = 0; i < MAX_KNOWN_ICONV; i++)
    __iconvs[i] = ((GIConv) - 1);
}

/*
 * @text: The text you want to get the encoding from
 * @start_text: Location where the beginning of the actual text is stored
 * @is_multibyte: Location where information whether it's a multibyte encoding
 * or not is stored
 * @returns: GIconv for conversion or NULL
 */
static LocalIconvCode
get_encoding (const gchar * text, guint * start_text, gboolean * is_multibyte)
{
  LocalIconvCode encoding;
  guint8 firstbyte;

  *is_multibyte = FALSE;
  *start_text = 0;

  firstbyte = (guint8) text[0];

  /* A wrong value */
  g_return_val_if_fail (firstbyte != 0x00, _ICONV_UNKNOWN);

  if (firstbyte <= 0x0B) {
    /* 0x01 => iso 8859-5 */
    encoding = firstbyte + _ICONV_ISO8859_4;
    *start_text = 1;
    goto beach;
  }

  /* ETSI EN 300 468, "Selection of character table" */
  switch (firstbyte) {
    case 0x0C:
    case 0x0D:
    case 0x0E:
    case 0x0F:
      /* RESERVED */
      encoding = _ICONV_UNKNOWN;
      break;
    case 0x10:
    {
      guint16 table;

      table = GST_READ_UINT16_BE (text + 1);

      if (table < 17)
        encoding = _ICONV_UNKNOWN + table;
      else
        encoding = _ICONV_UNKNOWN;;
      *start_text = 3;
      break;
    }
    case 0x11:
      encoding = _ICONV_ISO10646_UC2;
      *start_text = 1;
      *is_multibyte = TRUE;
      break;
    case 0x12:
      /*  EUC-KR implements KSX1001 */
      encoding = _ICONV_EUC_KR;
      *start_text = 1;
      *is_multibyte = TRUE;
      break;
    case 0x13:
      encoding = _ICONV_GB2312;
      *start_text = 1;
      break;
    case 0x14:
      encoding = _ICONV_UTF_16BE;
      *start_text = 1;
      *is_multibyte = TRUE;
      break;
    case 0x15:
      /* TODO : Where does this come from ?? */
      encoding = _ICONV_ISO10646_UTF8;
      *start_text = 1;
      break;
    case 0x16:
    case 0x17:
    case 0x18:
    case 0x19:
    case 0x1A:
    case 0x1B:
    case 0x1C:
    case 0x1D:
    case 0x1E:
    case 0x1F:
      /* RESERVED */
      encoding = _ICONV_UNKNOWN;
      break;
    default:
      encoding = _ICONV_ISO6937;
      break;
  }

beach:
  GST_DEBUG
      ("Found encoding %d, first byte is 0x%02x, start_text: %u, is_multibyte: %d",
      encoding, firstbyte, *start_text, *is_multibyte);

  return encoding;
}

/*
 * @text: The text to convert. It may include pango markup (<b> and </b>)
 * @length: The length of the string -1 if it's nul-terminated
 * @start: Where to start converting in the text
 * @encoding: The encoding of text
 * @is_multibyte: Whether the encoding is a multibyte encoding
 * @error: The location to store the error, or NULL to ignore errors
 * @returns: UTF-8 encoded string
 *
 * Convert text to UTF-8.
 */
static gchar *
convert_to_utf8 (const gchar * text, gint length, guint start,
    GIConv giconv, gboolean is_multibyte, GError ** error)
{
  gchar *new_text;
  gchar *tmp, *pos;
  gint i;

  text += start;

  pos = tmp = g_malloc (length * 2);

  if (is_multibyte) {
    if (length == -1) {
      while (*text != '\0') {
        guint16 code = GST_READ_UINT16_BE (text);

        switch (code) {
          case 0xE086:         /* emphasis on */
          case 0xE087:         /* emphasis off */
            /* skip it */
            break;
          case 0xE08A:{
            pos[0] = 0x00;      /* 0x00 0x0A is a new line */
            pos[1] = 0x0A;
            pos += 2;
            break;
          }
          default:
            pos[0] = text[0];
            pos[1] = text[1];
            pos += 2;
            break;
        }

        text += 2;
      }
    } else {
      for (i = 0; i < length; i += 2) {
        guint16 code = GST_READ_UINT16_BE (text);

        switch (code) {
          case 0xE086:         /* emphasis on */
          case 0xE087:         /* emphasis off */
            /* skip it */
            break;
          case 0xE08A:{
            pos[0] = 0x00;      /* 0x00 0x0A is a new line */
            pos[1] = 0x0A;
            pos += 2;
            break;
          }
          default:
            pos[0] = text[0];
            pos[1] = text[1];
            pos += 2;
            break;
        }

        text += 2;
      }
    }
  } else {
    if (length == -1) {
      while (*text != '\0') {
        guint8 code = (guint8) (*text);

        switch (code) {
          case 0x86:           /* emphasis on */
          case 0x87:           /* emphasis off */
            /* skip it */
            break;
          case 0x8A:
            *pos = '\n';
            pos += 1;
            break;
          default:
            *pos = *text;
            pos += 1;
            break;
        }

        text++;
      }
    } else {
      for (i = 0; i < length; i++) {
        guint8 code = (guint8) (*text);

        switch (code) {
          case 0x86:           /* emphasis on */
          case 0x87:           /* emphasis off */
            /* skip it */
            break;
          case 0x8A:
            *pos = '\n';
            pos += 1;
            break;
          default:
            *pos = *text;
            pos += 1;
            break;
        }

        text++;
      }
    }
  }

  if (pos > tmp) {
    gsize bread = 0;
    new_text =
        g_convert_with_iconv (tmp, pos - tmp, giconv, &bread, NULL, error);
    GST_DEBUG ("Converted to : %s", new_text);
  } else {
    new_text = g_strdup ("");
  }

  g_free (tmp);

  return new_text;
}

gchar *
get_encoding_and_convert (const gchar * text, guint length)
{
  GError *error = NULL;
  gchar *converted_str;
  guint start_text = 0;
  gboolean is_multibyte;
  LocalIconvCode encoding;
  GIConv giconv = (GIConv) - 1;

  g_return_val_if_fail (text != NULL, NULL);

  if (text == NULL || length == 0)
    return g_strdup ("");

  encoding = get_encoding (text, &start_text, &is_multibyte);

  if (encoding > _ICONV_UNKNOWN && encoding < _ICONV_MAX) {
    GST_DEBUG ("Encoding %s", iconvtablename[encoding]);
    if (__iconvs[encoding] == ((GIConv) - 1))
      __iconvs[encoding] = g_iconv_open ("utf-8", iconvtablename[encoding]);
    giconv = __iconvs[encoding];
  }

  if (giconv == ((GIConv) - 1)) {
    GST_WARNING ("Could not detect encoding");
    converted_str = g_strndup (text, length);
    goto beach;
  }

  converted_str = convert_to_utf8 (text, length - start_text, start_text,
      giconv, is_multibyte, &error);
  if (error != NULL) {
    GST_WARNING ("Could not convert string: %s", error->message);
    if (converted_str)
      g_free (converted_str);
    g_error_free (error);
    error = NULL;

    if (encoding >= _ICONV_ISO8859_2 && encoding <= _ICONV_ISO8859_15) {
      /* Sometimes using the standard 8859-1 set fixes issues */
      GST_DEBUG ("Encoding %s", iconvtablename[_ICONV_ISO8859_1]);
      if (__iconvs[_ICONV_ISO8859_1] == (GIConv) - 1)
        __iconvs[_ICONV_ISO8859_1] =
            g_iconv_open ("utf-8", iconvtablename[_ICONV_ISO8859_1]);
      giconv = __iconvs[_ICONV_ISO8859_1];

      GST_INFO ("Trying encoding ISO 8859-1");
      converted_str = convert_to_utf8 (text, length, 1, giconv, FALSE, &error);
      if (error != NULL) {
        GST_WARNING
            ("Could not convert string while assuming encoding ISO 8859-1: %s",
            error->message);
        g_error_free (error);
        goto failed;
      }
    } else if (encoding == _ICONV_ISO6937) {

      /* The first part of ISO 6937 is identical to ISO 8859-9, but
       * they differ in the second part. Some channels don't
       * provide the first byte that indicates ISO 8859-9 encoding.
       * If decoding from ISO 6937 failed, we try ISO 8859-9 here.
       */
      if (__iconvs[_ICONV_ISO8859_9] == (GIConv) - 1)
        __iconvs[_ICONV_ISO8859_9] =
            g_iconv_open ("utf-8", iconvtablename[_ICONV_ISO8859_9]);
      giconv = __iconvs[_ICONV_ISO8859_9];

      GST_INFO ("Trying encoding ISO 8859-9");
      converted_str = convert_to_utf8 (text, length, 0, giconv, FALSE, &error);
      if (error != NULL) {
        GST_WARNING
            ("Could not convert string while assuming encoding ISO 8859-9: %s",
            error->message);
        g_error_free (error);
        goto failed;
      }
    } else
      goto failed;
  }

beach:
  return converted_str;

failed:
  {
    text += start_text;
    return g_strndup (text, length - start_text);
  }
}

/**
 * gst_mpegts_parse_descriptors:
 * @buffer: (transfer none): descriptors to parse
 * @buf_len: Size of @buffer
 *
 * Parses the descriptors present in @buffer and returns them as an
 * array.
 *
 * Note: The data provided in @buffer will not be copied.
 *
 * Returns: (transfer full) (element-type GstMpegTsDescriptor): an
 * array of the parsed descriptors or %NULL if there was an error.
 * Release with #g_array_unref when done with it.
 */
GArray *
gst_mpegts_parse_descriptors (guint8 * buffer, gsize buf_len)
{
  GArray *res;
  guint8 length;
  guint8 *data;
  guint i, nb_desc = 0;

  /* fast-path */
  if (buf_len == 0)
    return g_array_new (FALSE, FALSE, sizeof (GstMpegTsDescriptor));

  data = buffer;

  while (data - buffer < buf_len) {
    data++;                     /* skip tag */
    length = *data++;

    if (data - buffer > buf_len) {
      GST_WARNING ("invalid descriptor length %d now at %d max %"
          G_GSIZE_FORMAT, length, (gint) (data - buffer), buf_len);
      return NULL;
    }

    data += length;
    nb_desc++;
  }

  GST_DEBUG ("Saw %d descriptors, read %" G_GSIZE_FORMAT " bytes",
      nb_desc, data - buffer);

  if (data - buffer != buf_len) {
    GST_WARNING ("descriptors size %d expected %" G_GSIZE_FORMAT,
        (gint) (data - buffer), buf_len);
    return NULL;
  }

  res = g_array_sized_new (FALSE, FALSE, sizeof (GstMpegTsDescriptor), nb_desc);

  data = buffer;

  for (i = 0; i < nb_desc; i++) {
    GstMpegTsDescriptor *desc = &g_array_index (res, GstMpegTsDescriptor, i);

    desc->descriptor_data = data;
    desc->descriptor_tag = *data++;
    desc->descriptor_length = *data++;
    GST_LOG ("descriptor 0x%02x length:%d", desc->descriptor_tag,
        desc->descriptor_length);
    GST_MEMDUMP ("descriptor", desc->descriptor_data + 2,
        desc->descriptor_length);
    /* Adjust for extended descriptors */
    if (G_UNLIKELY (desc->descriptor_tag == 0x7f)) {
      desc->descriptor_tag_extension = *data++;
      desc->descriptor_length -= 1;
    }
    data += desc->descriptor_length;
  }

  res->len = nb_desc;

  return res;
}

/**
 * gst_mpegts_find_descriptor:
 * @descriptors: (element-type GstMpegTsDescriptor) (transfer none): an array
 * of #GstMpegTsDescriptor
 * @tag: the tag to look for
 *
 * Finds the first descriptor of type @tag in the array.
 *
 * Note: To look for descriptors that can be present more than once in an
 * array of descriptors, iterate the #GArray manually.
 *
 * Returns: (transfer none): the first descriptor matchin @tag, else %NULL.
 */
const GstMpegTsDescriptor *
gst_mpegts_find_descriptor (GArray * descriptors, guint8 tag)
{
  guint i, nb_desc;

  g_return_val_if_fail (descriptors != NULL, NULL);

  nb_desc = descriptors->len;
  for (i = 0; i < nb_desc; i++) {
    GstMpegTsDescriptor *desc =
        &g_array_index (descriptors, GstMpegTsDescriptor, i);
    if (desc->descriptor_tag == tag)
      return (const GstMpegTsDescriptor *) desc;
  }
  return NULL;
}

static GstMpegTsDescriptor *
_copy_descriptor (GstMpegTsDescriptor * desc)
{
  GstMpegTsDescriptor *copy;

  copy = g_new0 (GstMpegTsDescriptor, 1);
  copy->descriptor_tag = desc->descriptor_tag;
  copy->descriptor_tag_extension = desc->descriptor_tag_extension;
  copy->descriptor_length = desc->descriptor_length;
  copy->descriptor_data =
      g_memdup (desc->descriptor_data, desc->descriptor_length);

  return copy;
}

/* This freefunc will only ever be used with descriptors returned by the 
 * above function. That is why we free the descriptor data (unlike the
 * descriptors created in _parse_descriptors()) */
static void
_free_descriptor (GstMpegTsDescriptor * desc)
{
  g_free ((gpointer) desc->descriptor_data);
  g_free (desc);
}

G_DEFINE_BOXED_TYPE (GstMpegTsDescriptor, gst_mpegts_descriptor,
    (GBoxedCopyFunc) _copy_descriptor, (GBoxedFreeFunc) _free_descriptor);

/* GST_MTS_DESC_ISO_639_LANGUAGE (0x0A) */
/**
 * gst_mpegts_descriptor_parse_iso_639_language:
 * @descriptor: a %GST_MTS_DESC_ISO_639_LANGUAGE #GstMpegTsDescriptor
 * @res: (out) (transfer none): the #GstMpegTsISO639LanguageDescriptor to fill
 *
 * Extracts the iso 639-2 language information from @descriptor.
 *
 * Note: Use #gst_tag_get_language_code if you want to get the the
 * ISO 639-1 language code from the returned ISO 639-2 one.
 *
 * Returns: %TRUE if parsing succeeded, else %FALSE.
 */
gboolean
gst_mpegts_descriptor_parse_iso_639_language (const GstMpegTsDescriptor *
    descriptor, GstMpegTsISO639LanguageDescriptor * res)
{
  guint i;
  guint8 *data;

  g_return_val_if_fail (descriptor != NULL
      && descriptor->descriptor_data != NULL, FALSE);
  g_return_val_if_fail (res != NULL, FALSE);
  g_return_val_if_fail (descriptor->descriptor_tag == 0x0A, FALSE);

  data = (guint8 *) descriptor->descriptor_data + 2;
  /* Each language is 3 + 1 bytes */
  res->nb_language = descriptor->descriptor_length / 4;
  for (i = 0; i < res->nb_language; i++) {
    memcpy (res->language[i], data, 3);
    res->audio_type[i] = data[3];
    data += 4;
  }
  return TRUE;

}

/**
 * gst_mpegts_descriptor_parse_logical_channel:
 * @descriptor: a %GST_MTS_DESC_DTG_LOGICAL_CHANNEL #GstMpegTsDescriptor
 * @res: (out) (transfer none): the #GstMpegTsLogicalChannelDescriptor to fill
 *
 * Extracts the logical channels from @descriptor.
 *
 * Returns: %TRUE if parsing succeeded, else %FALSE.
 */
gboolean
gst_mpegts_descriptor_parse_logical_channel (const GstMpegTsDescriptor *
    descriptor, GstMpegTsLogicalChannelDescriptor * res)
{
  guint i;
  guint8 *data;

  g_return_val_if_fail (descriptor != NULL
      && descriptor->descriptor_data != NULL, FALSE);
  g_return_val_if_fail (descriptor->descriptor_tag == 0x83, FALSE);

  data = (guint8 *) descriptor->descriptor_data;
  res->nb_channels = descriptor->descriptor_length / 4;

  for (i = 0; i < res->nb_channels; i++) {
    res->channels[i].service_id = GST_READ_UINT16_BE (data);
    data += 2;
    res->channels[i].visible_service = *data >> 7;
    res->channels[i].logical_channel_number =
        GST_READ_UINT16_BE (data) & 0x03ff;
    data += 2;
  }

  return TRUE;
}