mpegtsparse: Ignore emphasis on/off bytes, and do fallback string encoding.

For fallback, try ISO 8859-9 encoding if ISO 6637 failed.
Add more debug calls.
This commit is contained in:
Sebastian Pölsterl 2009-10-06 18:36:15 +02:00 committed by Zaheer Abbas Merali
parent 1beac5913d
commit bf3cf014ab

View file

@ -2303,8 +2303,14 @@ get_encoding (const gchar * text, guint * start_text, gboolean * is_multibyte)
} else { } else {
// reserved // reserved
encoding = NULL; encoding = NULL;
*start_text = 0;
*is_multibyte = FALSE;
} }
GST_DEBUG
("Found encoding %s, first byte is 0x%02x, start_text: %u, is_multibyte: %d",
encoding, firstbyte, *start_text, *is_multibyte);
return encoding; return encoding;
} }
@ -2340,23 +2346,10 @@ convert_to_utf8 (const gchar * text, gint length, guint start,
guint16 code = GST_READ_UINT16_BE (text); guint16 code = GST_READ_UINT16_BE (text);
switch (code) { switch (code) {
case 0xE086:{ case 0xE086: /* emphasis on */
guint8 emph_on[] = { 0x3C, 0x00, // < case 0xE087: /* emphasis off */
0x62, 0x00, // b /* skip it */
0x3E, 0x00 // >
};
g_byte_array_append (sb, emph_on, 6);
break; break;
}
case 0xE087:{
guint8 emph_on[] = { 0x3C, 0x00, // <
0x2F, 0x00, // /
0x62, 0x00, // b
0x3E, 0x00 // >
};
g_byte_array_append (sb, emph_on, 8);
break;
}
case 0xE08A:{ case 0xE08A:{
guint8 nl[] = { 0x0A, 0x00 }; // new line guint8 nl[] = { 0x0A, 0x00 }; // new line
g_byte_array_append (sb, nl, 2); g_byte_array_append (sb, nl, 2);
@ -2374,23 +2367,10 @@ convert_to_utf8 (const gchar * text, gint length, guint start,
guint16 code = GST_READ_UINT16_BE (text); guint16 code = GST_READ_UINT16_BE (text);
switch (code) { switch (code) {
case 0xE086:{ case 0xE086: /* emphasis on */
guint8 emph_on[] = { 0x3C, 0x00, // < case 0xE087: /* emphasis off */
0x62, 0x00, // b /* skip it */
0x3E, 0x00 // >
};
g_byte_array_append (sb, emph_on, 6);
break; break;
}
case 0xE087:{
guint8 emph_on[] = { 0x3C, 0x00, // <
0x2F, 0x00, // /
0x62, 0x00, // b
0x3E, 0x00 // >
};
g_byte_array_append (sb, emph_on, 8);
break;
}
case 0xE08A:{ case 0xE08A:{
guint8 nl[] = { 0x0A, 0x00 }; // new line guint8 nl[] = { 0x0A, 0x00 }; // new line
g_byte_array_append (sb, nl, 2); g_byte_array_append (sb, nl, 2);
@ -2410,11 +2390,9 @@ convert_to_utf8 (const gchar * text, gint length, guint start,
guint8 code = (guint8) (*text); guint8 code = (guint8) (*text);
switch (code) { switch (code) {
case 0x86: case 0x86: /* emphasis on */
g_byte_array_append (sb, (guint8 *) "<b>", 3); case 0x87: /* emphasis off */
break; /* skip it */
case 0x87:
g_byte_array_append (sb, (guint8 *) "</b>", 4);
break; break;
case 0x8A: case 0x8A:
g_byte_array_append (sb, (guint8 *) "\n", 1); g_byte_array_append (sb, (guint8 *) "\n", 1);
@ -2431,11 +2409,9 @@ convert_to_utf8 (const gchar * text, gint length, guint start,
guint8 code = (guint8) (*text); guint8 code = (guint8) (*text);
switch (code) { switch (code) {
case 0x86: case 0x86: /* emphasis on */
g_byte_array_append (sb, (guint8 *) "<b>", 3); case 0x87: /* emphasis off */
break; /* skip it */
case 0x87:
g_byte_array_append (sb, (guint8 *) "</b>", 4);
break; break;
case 0x8A: case 0x8A:
g_byte_array_append (sb, (guint8 *) "\n", 1); g_byte_array_append (sb, (guint8 *) "\n", 1);
@ -2480,19 +2456,47 @@ get_encoding_and_convert (const gchar * text, guint length)
encoding = get_encoding (text, &start_text, &is_multibyte); encoding = get_encoding (text, &start_text, &is_multibyte);
if (encoding == NULL) { if (encoding == NULL) {
GST_WARNING ("Could not detect encoding");
converted_str = g_strndup (text, length); converted_str = g_strndup (text, length);
} else { } else {
converted_str = convert_to_utf8 (text, length - start_text, start_text, converted_str = convert_to_utf8 (text, length - start_text, start_text,
encoding, is_multibyte, &error); encoding, is_multibyte, &error);
if (error != NULL) { if (error != NULL) {
g_critical ("Could not convert string: %s", error->message); GST_WARNING ("Could not convert string, encoding is %s: %s",
encoding, error->message);
g_error_free (error); g_error_free (error);
text += start_text; error = NULL;
converted_str = g_strndup (text, length - start_text);
/* The first part of ISO 6937 is identical to ISO 8859-9, but
* they differ in the second part. Some channels don't
* provide the first byte that indicates ISO 8859-9 encoding.
* If decoding from ISO 6937 failed, we try ISO 8859-9 here.
*/
if (strcmp (encoding, "iso6937") == 0) {
GST_INFO ("Trying encoding ISO 8859-9");
converted_str = convert_to_utf8 (text, length, 0,
"iso8859-9", FALSE, &error);
if (error != NULL) {
GST_WARNING
("Could not convert string while assuming encoding ISO 8859-9: %s",
error->message);
g_error_free (error);
goto failed;
}
} else {
goto failed;
}
} }
g_free (encoding); g_free (encoding);
} }
return converted_str; return converted_str;
failed:
{
g_free (encoding);
text += start_text;
return g_strndup (text, length - start_text);
}
} }