mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-02-20 21:16:24 +00:00
gst/mpegtsparse/mpegtspacketizer.c: Handle character sets in strings coming from DVB SI according to the DVB SI spec.
Original commit message from CVS: patch by: Sebastian Pölsterl * gst/mpegtsparse/mpegtspacketizer.c: Handle character sets in strings coming from DVB SI according to the DVB SI spec.
This commit is contained in:
parent
10574b5d4c
commit
7ce74ccd5d
2 changed files with 280 additions and 25 deletions
|
@ -1,3 +1,11 @@
|
|||
2008-06-13 Zaheer Abbas Merali <zaheerabbas at merali dot org>
|
||||
|
||||
patch by: Sebastian Pölsterl
|
||||
|
||||
* gst/mpegtsparse/mpegtspacketizer.c:
|
||||
Handle character sets in strings coming from DVB SI according
|
||||
to the DVB SI spec.
|
||||
|
||||
2008-06-13 Stefan Kost <ensonic@users.sf.net>
|
||||
|
||||
* gst/replaygain/gstrganalysis.c:
|
||||
|
|
|
@ -31,6 +31,11 @@ G_DEFINE_TYPE (MpegTSPacketizer, mpegts_packetizer, G_TYPE_OBJECT);
|
|||
|
||||
static void mpegts_packetizer_dispose (GObject * object);
|
||||
static void mpegts_packetizer_finalize (GObject * object);
|
||||
static gchar *convert_to_utf8 (const gchar * text, gint length, guint start,
|
||||
const gchar * encoding, gboolean is_multibyte, GError ** error);
|
||||
static gchar *get_encoding (const gchar * text, guint * start_text,
|
||||
gboolean * is_multibyte);
|
||||
static gchar *get_encoding_and_convert (const gchar * text, guint length);
|
||||
|
||||
#define CONTINUITY_UNSET 255
|
||||
#define MAX_CONTINUITY 15
|
||||
|
@ -606,11 +611,9 @@ mpegts_packetizer_parse_nit (MpegTSPacketizer * packetizer,
|
|||
DESC_DVB_NETWORK_NAME_length (networkname_descriptor);
|
||||
gchar *networkname =
|
||||
(gchar *) DESC_DVB_NETWORK_NAME_text (networkname_descriptor);
|
||||
if (networkname[0] < 0x20) {
|
||||
networkname_length -= 1;
|
||||
networkname += 1;
|
||||
}
|
||||
networkname_tmp = g_strndup (networkname, networkname_length);
|
||||
|
||||
networkname_tmp =
|
||||
get_encoding_and_convert (networkname, networkname_length);
|
||||
gst_structure_set (nit, "network-name", G_TYPE_STRING, networkname_tmp,
|
||||
NULL);
|
||||
g_free (networkname_tmp);
|
||||
|
@ -1213,17 +1216,13 @@ mpegts_packetizer_parse_sdt (MpegTSPacketizer * packetizer,
|
|||
(gchar *) DESC_DVB_SERVICE_name_text (service_descriptor);
|
||||
if (servicename_length + serviceprovider_name_length + 2 <=
|
||||
DESC_LENGTH (service_descriptor)) {
|
||||
if (servicename[0] < 0x20) {
|
||||
servicename_length -= 1;
|
||||
servicename += 1;
|
||||
}
|
||||
if (serviceprovider_name[0] < 0x20) {
|
||||
serviceprovider_name_length -= 1;
|
||||
serviceprovider_name += 1;
|
||||
}
|
||||
servicename_tmp = g_strndup (servicename, servicename_length);
|
||||
|
||||
servicename_tmp =
|
||||
get_encoding_and_convert (servicename, servicename_length);
|
||||
serviceprovider_name_tmp =
|
||||
g_strndup (serviceprovider_name, serviceprovider_name_length);
|
||||
get_encoding_and_convert (serviceprovider_name,
|
||||
serviceprovider_name_length);
|
||||
|
||||
gst_structure_set (service, "name", G_TYPE_STRING, servicename_tmp,
|
||||
NULL);
|
||||
gst_structure_set (service, "provider-name", G_TYPE_STRING,
|
||||
|
@ -1444,17 +1443,12 @@ mpegts_packetizer_parse_eit (MpegTSPacketizer * packetizer,
|
|||
(gchar *) DESC_DVB_SHORT_EVENT_description_text (event_descriptor);
|
||||
if (eventname_length + eventdescription_length + 2 <=
|
||||
DESC_LENGTH (event_descriptor)) {
|
||||
if (eventname[0] < 0x20) {
|
||||
eventname_length -= 1;
|
||||
eventname += 1;
|
||||
}
|
||||
if (eventdescription[0] < 0x20) {
|
||||
eventdescription_length -= 1;
|
||||
eventdescription += 1;
|
||||
}
|
||||
eventname_tmp = g_strndup (eventname, eventname_length),
|
||||
|
||||
eventname_tmp =
|
||||
get_encoding_and_convert (eventname, eventname_length),
|
||||
eventdescription_tmp =
|
||||
g_strndup (eventdescription, eventdescription_length);
|
||||
get_encoding_and_convert (eventdescription,
|
||||
eventdescription_length);
|
||||
|
||||
gst_structure_set (event, "name", G_TYPE_STRING, eventname_tmp, NULL);
|
||||
gst_structure_set (event, "description", G_TYPE_STRING,
|
||||
|
@ -1933,3 +1927,256 @@ mpegts_packetizer_init_debug ()
|
|||
GST_DEBUG_CATEGORY_INIT (mpegts_packetizer_debug, "mpegtspacketizer", 0,
|
||||
"MPEG transport stream parser");
|
||||
}
|
||||
|
||||
/**
|
||||
* @text: The text you want to get the encoding from
|
||||
* @start_text: Location where the beginning of the actual text is stored
|
||||
* @is_multibyte: Location where information whether it's a multibyte encoding
|
||||
* or not is stored
|
||||
* @returns: Name of encoding or NULL of encoding could not be detected.
|
||||
*
|
||||
* The returned string should be freed with g_free () when no longer needed.
|
||||
*/
|
||||
static gchar *
|
||||
get_encoding (const gchar * text, guint * start_text, gboolean * is_multibyte)
|
||||
{
|
||||
gchar *encoding;
|
||||
guint8 firstbyte;
|
||||
|
||||
g_return_val_if_fail (text != NULL, NULL);
|
||||
|
||||
firstbyte = (guint8) text[0];
|
||||
|
||||
if (firstbyte == 0x01) {
|
||||
encoding = g_strdup ("iso8859-5");
|
||||
*start_text = 1;
|
||||
*is_multibyte = FALSE;
|
||||
} else if (firstbyte == 0x02) {
|
||||
encoding = g_strdup ("iso8859-6");
|
||||
*start_text = 1;
|
||||
*is_multibyte = FALSE;
|
||||
} else if (firstbyte == 0x03) {
|
||||
encoding = g_strdup ("iso8859-7");
|
||||
*start_text = 1;
|
||||
*is_multibyte = FALSE;
|
||||
} else if (firstbyte == 0x04) {
|
||||
encoding = g_strdup ("iso8859-8");
|
||||
*start_text = 1;
|
||||
*is_multibyte = FALSE;
|
||||
} else if (firstbyte == 0x05) {
|
||||
encoding = g_strdup ("iso8859-9");
|
||||
*start_text = 1;
|
||||
*is_multibyte = FALSE;
|
||||
} else if (firstbyte >= 0x20) {
|
||||
encoding = g_strdup ("iso6937");
|
||||
*start_text = 0;
|
||||
*is_multibyte = FALSE;
|
||||
} else if (firstbyte == 0x10) {
|
||||
guint16 table;
|
||||
gchar table_str[6];
|
||||
|
||||
text++;
|
||||
table = GST_READ_UINT16_BE (text);
|
||||
g_snprintf (table_str, 6, "%d", table);
|
||||
|
||||
encoding = g_strconcat ("iso8859-", table_str, NULL);
|
||||
*start_text = 3;
|
||||
*is_multibyte = FALSE;
|
||||
} else if (firstbyte == 0x11) {
|
||||
encoding = g_strdup ("ISO-10646/UCS2");
|
||||
*start_text = 1;
|
||||
*is_multibyte = TRUE;
|
||||
} else if (firstbyte == 0x12) {
|
||||
// That's korean encoding.
|
||||
// The spec says it's encoded in KSC 5601, but iconv only knows KSC 5636.
|
||||
// Couldn't find any information about either of them.
|
||||
encoding = NULL;
|
||||
*start_text = 1;
|
||||
*is_multibyte = TRUE;
|
||||
} else {
|
||||
// reserved
|
||||
encoding = NULL;
|
||||
}
|
||||
|
||||
return encoding;
|
||||
}
|
||||
|
||||
/**
|
||||
* @text: The text to convert. It may include pango markup (<b> and </b>)
|
||||
* @length: The length of the string -1 if it's nul-terminated
|
||||
* @start: Where to start converting in the text
|
||||
* @encoding: The encoding of text
|
||||
* @is_multibyte: Whether the encoding is a multibyte encoding
|
||||
* @error: The location to store the error, or NULL to ignore errors
|
||||
* @returns: UTF-8 encoded string
|
||||
*
|
||||
* Convert text to UTF-8.
|
||||
*/
|
||||
static gchar *
|
||||
convert_to_utf8 (const gchar * text, gint length, guint start,
|
||||
const gchar * encoding, gboolean is_multibyte, GError ** error)
|
||||
{
|
||||
gchar *new_text;
|
||||
GByteArray *sb;
|
||||
gint i;
|
||||
|
||||
g_return_val_if_fail (text != NULL, NULL);
|
||||
g_return_val_if_fail (encoding != NULL, NULL);
|
||||
|
||||
text += start;
|
||||
|
||||
sb = g_byte_array_sized_new (length * 1.1);
|
||||
|
||||
if (is_multibyte) {
|
||||
if (length == -1) {
|
||||
while (*text != '\0') {
|
||||
guint16 code = GST_READ_UINT16_BE (text);
|
||||
|
||||
switch (code) {
|
||||
case 0xE086:{
|
||||
guint8 emph_on[] = { 0x3C, 0x00, // <
|
||||
0x62, 0x00, // b
|
||||
0x3E, 0x00 // >
|
||||
};
|
||||
g_byte_array_append (sb, emph_on, 6);
|
||||
break;
|
||||
}
|
||||
case 0xE087:{
|
||||
guint8 emph_on[] = { 0x3C, 0x00, // <
|
||||
0x2F, 0x00, // /
|
||||
0x62, 0x00, // b
|
||||
0x3E, 0x00 // >
|
||||
};
|
||||
g_byte_array_append (sb, emph_on, 8);
|
||||
break;
|
||||
}
|
||||
case 0xE08A:{
|
||||
guint8 nl[] = { 0x0A, 0x00 }; // new line
|
||||
g_byte_array_append (sb, nl, 2);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
g_byte_array_append (sb, (guint8 *) text, 2);
|
||||
break;
|
||||
}
|
||||
|
||||
text += 2;
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < length; i += 2) {
|
||||
guint16 code = GST_READ_UINT16_BE (text);
|
||||
|
||||
switch (code) {
|
||||
case 0xE086:{
|
||||
guint8 emph_on[] = { 0x3C, 0x00, // <
|
||||
0x62, 0x00, // b
|
||||
0x3E, 0x00 // >
|
||||
};
|
||||
g_byte_array_append (sb, emph_on, 6);
|
||||
break;
|
||||
}
|
||||
case 0xE087:{
|
||||
guint8 emph_on[] = { 0x3C, 0x00, // <
|
||||
0x2F, 0x00, // /
|
||||
0x62, 0x00, // b
|
||||
0x3E, 0x00 // >
|
||||
};
|
||||
g_byte_array_append (sb, emph_on, 8);
|
||||
break;
|
||||
}
|
||||
case 0xE08A:{
|
||||
guint8 nl[] = { 0x0A, 0x00 }; // new line
|
||||
g_byte_array_append (sb, nl, 2);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
g_byte_array_append (sb, (guint8 *) text, 2);
|
||||
break;
|
||||
}
|
||||
|
||||
text += 2;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (length == -1) {
|
||||
while (*text != '\0') {
|
||||
guint8 code = (guint8) (*text);
|
||||
|
||||
switch (code) {
|
||||
case 0x86:
|
||||
g_byte_array_append (sb, (guint8 *) "<b>", 3);
|
||||
break;
|
||||
case 0x87:
|
||||
g_byte_array_append (sb, (guint8 *) "</b>", 4);
|
||||
break;
|
||||
case 0x8A:
|
||||
g_byte_array_append (sb, (guint8 *) "\n", 1);
|
||||
break;
|
||||
default:
|
||||
g_byte_array_append (sb, &code, 1);
|
||||
break;
|
||||
}
|
||||
|
||||
text++;
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < length; i++) {
|
||||
guint8 code = (guint8) (*text);
|
||||
|
||||
switch (code) {
|
||||
case 0x86:
|
||||
g_byte_array_append (sb, (guint8 *) "<b>", 3);
|
||||
break;
|
||||
case 0x87:
|
||||
g_byte_array_append (sb, (guint8 *) "</b>", 4);
|
||||
break;
|
||||
case 0x8A:
|
||||
g_byte_array_append (sb, (guint8 *) "\n", 1);
|
||||
break;
|
||||
default:
|
||||
g_byte_array_append (sb, &code, 1);
|
||||
break;
|
||||
}
|
||||
|
||||
text++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
new_text =
|
||||
g_convert ((gchar *) sb->data, sb->len, "utf-8", encoding, NULL, NULL,
|
||||
error);
|
||||
|
||||
g_byte_array_free (sb, TRUE);
|
||||
|
||||
return new_text;
|
||||
}
|
||||
|
||||
static gchar *
|
||||
get_encoding_and_convert (const gchar * text, guint length)
|
||||
{
|
||||
GError *error = NULL;
|
||||
gchar *converted_str;
|
||||
gchar *encoding;
|
||||
guint start_text = 0;
|
||||
gboolean is_multibyte;
|
||||
|
||||
encoding = get_encoding (text, &start_text, &is_multibyte);
|
||||
|
||||
if (encoding == NULL) {
|
||||
converted_str = g_strndup (text, length);
|
||||
} else {
|
||||
converted_str = convert_to_utf8 (text, length - start_text, start_text,
|
||||
encoding, is_multibyte, &error);
|
||||
if (error != NULL) {
|
||||
g_critical ("Could not convert string: %s", error->message);
|
||||
g_error_free (error);
|
||||
text += start_text;
|
||||
converted_str = g_strndup (text, length - start_text);
|
||||
}
|
||||
|
||||
g_free (encoding);
|
||||
}
|
||||
|
||||
return converted_str;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue