gst-libs/gst/tag/tags.c: Don't pass trailing zeroes in fixed-size string arrays in ID3v1 tags to utf8-validate; fixes...

Original commit message from CVS:
* gst-libs/gst/tag/tags.c: (gst_tag_freeform_string_to_utf8):
Don't pass trailing zeroes in fixed-size string arrays in ID3v1 tags
to utf8-validate; fixes recognition of ID3v1 tags in UTF-8 encoding
(#451707); also, output some debugging info when dealing with
freeform strings.
* tests/check/libs/tag.c: (GST_START_TEST), (tag_suite):
Add unit test for the above.
This commit is contained in:
Tim-Philipp Müller 2007-06-27 22:30:19 +00:00
parent f637e3b80c
commit 28ef3f5ddf
3 changed files with 102 additions and 3 deletions

View file

@ -1,3 +1,14 @@
2007-06-27 Tim-Philipp Müller <tim at centricular dot net>
* gst-libs/gst/tag/tags.c: (gst_tag_freeform_string_to_utf8):
Don't pass trailing zeroes in fixed-size string arrays in ID3v1 tags
to utf8-validate; fixes recognition of ID3v1 tags in UTF-8 encoding
(#451707); also, output some debugging info when dealing with
freeform strings.
* tests/check/libs/tag.c: (GST_START_TEST), (tag_suite):
Add unit test for the above.
2007-06-27 Tim-Philipp Müller <tim at centricular dot net>
* gst-libs/gst/pbutils/descriptions.c: (caps_are_rtp_caps):

View file

@ -255,10 +255,18 @@ gst_tag_freeform_string_to_utf8 (const gchar * data, gint size,
if (size < 0)
size = strlen (data);
/* chop off trailing string terminators to make sure utf8_validate doesn't
* get to see them (since that would make the utf8 check fail) */
while (size > 0 && data[size - 1] == '\0')
--size;
/* Should we try the charsets specified
* via environment variables FIRST ? */
if (g_utf8_validate (data, size, NULL))
return g_strndup (data, size);
if (g_utf8_validate (data, size, NULL)) {
utf8 = g_strndup (data, size);
GST_LOG ("String '%s' is valid UTF-8 already", utf8);
goto beach;
}
while (env_vars && *env_vars != NULL) {
const gchar *env = NULL;
@ -271,6 +279,7 @@ gst_tag_freeform_string_to_utf8 (const gchar * data, gint size,
csets = g_strsplit (env, G_SEARCHPATH_SEPARATOR_S, -1);
for (c = csets; c && *c; ++c) {
GST_LOG ("Trying to convert freeform string to UTF-8 from '%s'", *c);
if ((utf8 =
g_convert (data, size, "UTF-8", *c, &bytes_read, NULL, NULL))) {
if (bytes_read == size) {
@ -289,6 +298,7 @@ gst_tag_freeform_string_to_utf8 (const gchar * data, gint size,
/* Try current locale (if not UTF-8) */
if (!g_get_charset (&cur_loc)) {
GST_LOG ("Trying to convert freeform string using locale ('%s')", cur_loc);
if ((utf8 = g_locale_to_utf8 (data, size, &bytes_read, NULL, NULL))) {
if (bytes_read == size) {
goto beach;
@ -299,6 +309,7 @@ gst_tag_freeform_string_to_utf8 (const gchar * data, gint size,
}
/* Try ISO-8859-1 */
GST_LOG ("Trying to convert freeform string using ISO-8859-1 fallback");
utf8 = g_convert (data, size, "UTF-8", "ISO-8859-1", &bytes_read, NULL, NULL);
if (utf8 != NULL && bytes_read == size) {
goto beach;
@ -310,8 +321,10 @@ gst_tag_freeform_string_to_utf8 (const gchar * data, gint size,
beach:
g_strchomp (utf8);
if (utf8 && utf8[0] != '\0')
if (utf8 && utf8[0] != '\0') {
GST_LOG ("Returning '%s'", utf8);
return utf8;
}
g_free (utf8);
return NULL;

View file

@ -600,6 +600,80 @@ GST_START_TEST (test_id3_tags)
GST_END_TEST;
GST_START_TEST (test_id3v1_utf8_tag)
{
const guint8 id3v1[128] = {
/* marker */
'T', 'A', 'G',
/* title (30 bytes) */
'D', 0xc3, 0xad, 'v', 'k', 'a', ' ', 's',
' ', 'p', 'e', 'r', 'l', 'a', 'm', 'i',
' ', 'v', 'e', ' ', 'v', 'l', 'a', 's',
'e', 'c', 'h', 0, 0, 0,
/* artist (30 bytes) */
'A', 'l', 'e', 0xc5, 0xa1, ' ', 'B', 'r', 'i', 'c', 'h', 't', 'a',
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* album (30 bytes) */
'B', 'e', 's', 't', ' ', 'o', 'f', ' ', '(', 'P', 'r', 'o', 's', 't',
0xc4, 0x9b, ' ', 0xc3, 0xba, 0xc5, 0xbe, 'a', 's', 'n', 0xc3, 0xbd, ')',
0, 0, 0,
/* year (4 bytes) */
'2', '0', '0', '0',
/* comment (28 bytes) */
'-', '-', '-', ' ', 0xc4, 0x8d, 'e', 's', 'k', 0xc3, 0xa9, ' ', 'p',
0xc3, 0xad, 's', 'n', 'i', 0xc4, 0x8d, 'k', 'y', ' ', '-', '-', '-',
0, 0,
/* track number */
0, 0,
/* genre */
0x11
};
GstTagList *tags;
GDate *d;
gchar *s;
/* set this, to make sure UTF-8 strings are really interpreted properly
* as UTF-8, regardless of the locale set */
g_setenv ("GST_ID3V1_TAG_ENCODING", "WINDOWS-1250", TRUE);
tags = gst_tag_list_new_from_id3v1 (id3v1);
fail_unless (tags != NULL);
GST_LOG ("Got tags: %" GST_PTR_FORMAT, tags);
s = NULL;
fail_unless (gst_tag_list_get_string (tags, GST_TAG_TITLE, &s));
fail_unless (s != NULL);
fail_unless_equals_string (s, "Dívka s perlami ve vlasech");
g_free (s);
s = NULL;
fail_unless (gst_tag_list_get_string (tags, GST_TAG_ARTIST, &s));
fail_unless (s != NULL);
fail_unless_equals_string (s, "Aleš Brichta");
g_free (s);
s = NULL;
fail_unless (gst_tag_list_get_string (tags, GST_TAG_ALBUM, &s));
fail_unless (s != NULL);
fail_unless_equals_string (s, "Best of (Prostě úžasný)");
g_free (s);
d = NULL;
fail_unless (gst_tag_list_get_date (tags, GST_TAG_DATE, &d));
fail_unless (d != NULL);
fail_unless_equals_int (g_date_get_year (d), 2000);
g_date_free (d);
d = NULL;
gst_tag_list_free (tags);
g_unsetenv ("GST_ID3V1_TAG_ENCODING");
}
GST_END_TEST;
static Suite *
tag_suite (void)
{
@ -611,6 +685,7 @@ tag_suite (void)
tcase_add_test (tc_chain, test_parse_extended_comment);
tcase_add_test (tc_chain, test_vorbis_tags);
tcase_add_test (tc_chain, test_id3_tags);
tcase_add_test (tc_chain, test_id3v1_utf8_tag);
return s;
}