gst/id3demux/id3v2frames.c: Add more validation to ensure that a char encoding conversion produced a valid UTF-8 string.

Original commit message from CVS:
* gst/id3demux/id3v2frames.c: (parse_insert_string_field),
(parse_split_strings):
Add more validation to ensure that a char encoding conversion
produced a valid UTF-8 string.
This commit is contained in:
Jan Schmidt 2006-02-13 12:00:51 +00:00
parent d502325ee9
commit 8d38cd443f
3 changed files with 43 additions and 34 deletions

View file

@ -1,3 +1,10 @@
2006-02-13 Jan Schmidt <thaytan@mad.scientist.com>
* gst/id3demux/id3v2frames.c: (parse_insert_string_field),
(parse_split_strings):
Add more validation to ensure that a char encoding conversion
produced a valid UTF-8 string.
2006-02-13 Mark Nauwelaerts <manauw@skynet.be> 2006-02-13 Mark Nauwelaerts <manauw@skynet.be>
Reviewed by: Edward Hervey <edward@fluendo.com> Reviewed by: Edward Hervey <edward@fluendo.com>

2
common

@ -1 +1 @@
Subproject commit 58567e5519f2d00a4592491db1a6e8302993279e Subproject commit c30611ac38336030fed6d258c6e558cc537adbc5

View file

@ -457,12 +457,27 @@ id3v2_genre_fields_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
return result; return result;
} }
static void
parse_insert_string_field (const gchar * encoding, gchar * data, gint data_size,
GArray * fields)
{
gchar *field;
field = g_convert (data, data_size, "UTF-8", encoding, NULL, NULL, NULL);
if (field && !g_utf8_validate (field, -1, NULL)) {
GST_DEBUG ("%s was bad UTF-8. Ignoring", field);
g_free (field);
field = NULL;
}
if (field)
g_array_append_val (fields, field);
}
static void static void
parse_split_strings (guint8 encoding, gchar * data, gint data_size, parse_split_strings (guint8 encoding, gchar * data, gint data_size,
GArray ** out_fields) GArray ** out_fields)
{ {
GArray *fields = g_array_new (FALSE, TRUE, sizeof (gchar *)); GArray *fields = g_array_new (FALSE, TRUE, sizeof (gchar *));
gchar *field;
gint text_pos; gint text_pos;
gint prev = 0; gint prev = 0;
@ -472,52 +487,46 @@ parse_split_strings (guint8 encoding, gchar * data, gint data_size,
case ID3V2_ENCODING_ISO8859: case ID3V2_ENCODING_ISO8859:
for (text_pos = 0; text_pos < data_size; text_pos++) { for (text_pos = 0; text_pos < data_size; text_pos++) {
if (data[text_pos] == 0) { if (data[text_pos] == 0) {
field = g_convert (data + prev, text_pos - prev + 1, parse_insert_string_field ("ISO-8859-1", data + prev,
"UTF-8", "ISO-8859-1", NULL, NULL, NULL); text_pos - prev + 1, fields);
if (field)
g_array_append_val (fields, field);
prev = text_pos + 1; prev = text_pos + 1;
} }
} }
if (data_size - prev > 0 && data[prev] != 0x00) { if (data_size - prev > 0 && data[prev] != 0x00) {
field = g_convert (data + prev, data_size - prev, parse_insert_string_field ("ISO-8859-1", data + prev,
"UTF-8", "ISO-8859-1", NULL, NULL, NULL); data_size - prev, fields);
if (field)
g_array_append_val (fields, field);
} }
break; break;
case ID3V2_ENCODING_UTF8: case ID3V2_ENCODING_UTF8:
for (prev = 0, text_pos = 0; text_pos < data_size; text_pos++) { for (prev = 0, text_pos = 0; text_pos < data_size; text_pos++) {
if (data[text_pos] == '\0') { if (data[text_pos] == '\0') {
field = g_strndup (data + prev, text_pos - prev + 1); parse_insert_string_field ("UTF-8", data + prev,
if (field) text_pos - prev + 1, fields);
g_array_append_val (fields, field);
prev = text_pos + 1; prev = text_pos + 1;
} }
} }
if (data_size - prev > 0 && data[prev] != 0x00) { if (data_size - prev > 0 && data[prev] != 0x00) {
field = g_strndup (data + prev, data_size - prev); parse_insert_string_field ("UTF-8", data + prev,
if (field) data_size - prev, fields);
g_array_append_val (fields, field);
} }
break; break;
case ID3V2_ENCODING_UTF16: case ID3V2_ENCODING_UTF16:
case ID3V2_ENCODING_UTF16BE: case ID3V2_ENCODING_UTF16BE:
{ {
const gchar *in_encode;
if (encoding == ID3V2_ENCODING_UTF16)
in_encode = "UTF-16";
else
in_encode = "UTF-16BE";
/* Find '\0\0' terminator */ /* Find '\0\0' terminator */
for (text_pos = 0; text_pos < data_size - 1; text_pos += 2) { for (text_pos = 0; text_pos < data_size - 1; text_pos += 2) {
if (data[text_pos] == '\0' && data[text_pos + 1] == '\0') { if (data[text_pos] == '\0' && data[text_pos + 1] == '\0') {
/* found a delimiter */ /* found a delimiter */
if (encoding == ID3V2_ENCODING_UTF16) { parse_insert_string_field (in_encode, data + prev,
field = g_convert (data + prev, text_pos - prev + 2, text_pos - prev + 2, fields);
"UTF-8", "UTF-16", NULL, NULL, NULL);
} else {
field = g_convert (data + prev, text_pos - prev + 2,
"UTF-8", "UTF-16BE", NULL, NULL, NULL);
}
if (field)
g_array_append_val (fields, field);
text_pos++; /* Advance to the 2nd NULL terminator */ text_pos++; /* Advance to the 2nd NULL terminator */
prev = text_pos + 1; prev = text_pos + 1;
break; break;
@ -526,15 +535,8 @@ parse_split_strings (guint8 encoding, gchar * data, gint data_size,
if (data_size - prev > 1 && if (data_size - prev > 1 &&
(data[prev] != 0x00 || data[prev + 1] != 0x00)) { (data[prev] != 0x00 || data[prev + 1] != 0x00)) {
/* There were 2 or more non-null chars left, convert those too */ /* There were 2 or more non-null chars left, convert those too */
if (encoding == ID3V2_ENCODING_UTF16) { parse_insert_string_field (in_encode, data + prev,
field = g_convert (data + prev, data_size - prev, data_size - prev, fields);
"UTF-8", "UTF-16", NULL, NULL, NULL);
} else {
field = g_convert (data + prev, data_size - prev,
"UTF-8", "UTF-16BE", NULL, NULL, NULL);
}
if (field)
g_array_append_val (fields, field);
} }
break; break;
} }