From 8d38cd443f4edd9554f676860cda7850da7ed246 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Mon, 13 Feb 2006 12:00:51 +0000 Subject: [PATCH] gst/id3demux/id3v2frames.c: Add more validation to ensure that a char encoding conversion produced a valid UTF-8 string. Original commit message from CVS: * gst/id3demux/id3v2frames.c: (parse_insert_string_field), (parse_split_strings): Add more validation to ensure that a char encoding conversion produced a valid UTF-8 string. --- ChangeLog | 7 ++++ common | 2 +- gst/id3demux/id3v2frames.c | 68 ++++++++++++++++++++------------------ 3 files changed, 43 insertions(+), 34 deletions(-) diff --git a/ChangeLog b/ChangeLog index dccb29c5b0..42e4e4a3f7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2006-02-13 Jan Schmidt + + * gst/id3demux/id3v2frames.c: (parse_insert_string_field), + (parse_split_strings): + Add more validation to ensure that a char encoding conversion + produced a valid UTF-8 string. + 2006-02-13 Mark Nauwelaerts Reviewed by: Edward Hervey diff --git a/common b/common index 58567e5519..c30611ac38 160000 --- a/common +++ b/common @@ -1 +1 @@ -Subproject commit 58567e5519f2d00a4592491db1a6e8302993279e +Subproject commit c30611ac38336030fed6d258c6e558cc537adbc5 diff --git a/gst/id3demux/id3v2frames.c b/gst/id3demux/id3v2frames.c index 48fe9f1c66..cf356433c8 100644 --- a/gst/id3demux/id3v2frames.c +++ b/gst/id3demux/id3v2frames.c @@ -457,12 +457,27 @@ id3v2_genre_fields_to_taglist (ID3TagsWorking * work, const gchar * tag_name, return result; } +static void +parse_insert_string_field (const gchar * encoding, gchar * data, gint data_size, + GArray * fields) +{ + gchar *field; + + field = g_convert (data, data_size, "UTF-8", encoding, NULL, NULL, NULL); + if (field && !g_utf8_validate (field, -1, NULL)) { + GST_DEBUG ("%s was bad UTF-8. Ignoring", field); + g_free (field); + field = NULL; + } + if (field) + g_array_append_val (fields, field); +} + static void parse_split_strings (guint8 encoding, gchar * data, gint data_size, GArray ** out_fields) { GArray *fields = g_array_new (FALSE, TRUE, sizeof (gchar *)); - gchar *field; gint text_pos; gint prev = 0; @@ -472,52 +487,46 @@ parse_split_strings (guint8 encoding, gchar * data, gint data_size, case ID3V2_ENCODING_ISO8859: for (text_pos = 0; text_pos < data_size; text_pos++) { if (data[text_pos] == 0) { - field = g_convert (data + prev, text_pos - prev + 1, - "UTF-8", "ISO-8859-1", NULL, NULL, NULL); - if (field) - g_array_append_val (fields, field); + parse_insert_string_field ("ISO-8859-1", data + prev, + text_pos - prev + 1, fields); prev = text_pos + 1; } } if (data_size - prev > 0 && data[prev] != 0x00) { - field = g_convert (data + prev, data_size - prev, - "UTF-8", "ISO-8859-1", NULL, NULL, NULL); - if (field) - g_array_append_val (fields, field); + parse_insert_string_field ("ISO-8859-1", data + prev, + data_size - prev, fields); } break; case ID3V2_ENCODING_UTF8: for (prev = 0, text_pos = 0; text_pos < data_size; text_pos++) { if (data[text_pos] == '\0') { - field = g_strndup (data + prev, text_pos - prev + 1); - if (field) - g_array_append_val (fields, field); + parse_insert_string_field ("UTF-8", data + prev, + text_pos - prev + 1, fields); prev = text_pos + 1; } } if (data_size - prev > 0 && data[prev] != 0x00) { - field = g_strndup (data + prev, data_size - prev); - if (field) - g_array_append_val (fields, field); + parse_insert_string_field ("UTF-8", data + prev, + data_size - prev, fields); } break; case ID3V2_ENCODING_UTF16: case ID3V2_ENCODING_UTF16BE: { + const gchar *in_encode; + + if (encoding == ID3V2_ENCODING_UTF16) + in_encode = "UTF-16"; + else + in_encode = "UTF-16BE"; + /* Find '\0\0' terminator */ for (text_pos = 0; text_pos < data_size - 1; text_pos += 2) { if (data[text_pos] == '\0' && data[text_pos + 1] == '\0') { /* found a delimiter */ - if (encoding == ID3V2_ENCODING_UTF16) { - field = g_convert (data + prev, text_pos - prev + 2, - "UTF-8", "UTF-16", NULL, NULL, NULL); - } else { - field = g_convert (data + prev, text_pos - prev + 2, - "UTF-8", "UTF-16BE", NULL, NULL, NULL); - } - if (field) - g_array_append_val (fields, field); + parse_insert_string_field (in_encode, data + prev, + text_pos - prev + 2, fields); text_pos++; /* Advance to the 2nd NULL terminator */ prev = text_pos + 1; break; @@ -526,15 +535,8 @@ parse_split_strings (guint8 encoding, gchar * data, gint data_size, if (data_size - prev > 1 && (data[prev] != 0x00 || data[prev + 1] != 0x00)) { /* There were 2 or more non-null chars left, convert those too */ - if (encoding == ID3V2_ENCODING_UTF16) { - field = g_convert (data + prev, data_size - prev, - "UTF-8", "UTF-16", NULL, NULL, NULL); - } else { - field = g_convert (data + prev, data_size - prev, - "UTF-8", "UTF-16BE", NULL, NULL, NULL); - } - if (field) - g_array_append_val (fields, field); + parse_insert_string_field (in_encode, data + prev, + data_size - prev, fields); } break; }