diff --git a/ChangeLog b/ChangeLog index 7132d8bc22..6160f9d903 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +2007-06-27 Tim-Philipp Müller + + * gst-libs/gst/tag/tags.c: (gst_tag_freeform_string_to_utf8): + Don't pass trailing zeroes in fixed-size string arrays in ID3v1 tags + to utf8-validate; fixes recognition of ID3v1 tags in UTF-8 encoding + (#451707); also, output some debugging info when dealing with + freeform strings. + + * tests/check/libs/tag.c: (GST_START_TEST), (tag_suite): + Add unit test for the above. + 2007-06-27 Tim-Philipp Müller * gst-libs/gst/pbutils/descriptions.c: (caps_are_rtp_caps): diff --git a/gst-libs/gst/tag/tags.c b/gst-libs/gst/tag/tags.c index 706ab96547..19ebb5876c 100644 --- a/gst-libs/gst/tag/tags.c +++ b/gst-libs/gst/tag/tags.c @@ -255,10 +255,18 @@ gst_tag_freeform_string_to_utf8 (const gchar * data, gint size, if (size < 0) size = strlen (data); + /* chop off trailing string terminators to make sure utf8_validate doesn't + * get to see them (since that would make the utf8 check fail) */ + while (size > 0 && data[size - 1] == '\0') + --size; + /* Should we try the charsets specified * via environment variables FIRST ? */ - if (g_utf8_validate (data, size, NULL)) - return g_strndup (data, size); + if (g_utf8_validate (data, size, NULL)) { + utf8 = g_strndup (data, size); + GST_LOG ("String '%s' is valid UTF-8 already", utf8); + goto beach; + } while (env_vars && *env_vars != NULL) { const gchar *env = NULL; @@ -271,6 +279,7 @@ gst_tag_freeform_string_to_utf8 (const gchar * data, gint size, csets = g_strsplit (env, G_SEARCHPATH_SEPARATOR_S, -1); for (c = csets; c && *c; ++c) { + GST_LOG ("Trying to convert freeform string to UTF-8 from '%s'", *c); if ((utf8 = g_convert (data, size, "UTF-8", *c, &bytes_read, NULL, NULL))) { if (bytes_read == size) { @@ -289,6 +298,7 @@ gst_tag_freeform_string_to_utf8 (const gchar * data, gint size, /* Try current locale (if not UTF-8) */ if (!g_get_charset (&cur_loc)) { + GST_LOG ("Trying to convert freeform string using locale ('%s')", cur_loc); if ((utf8 = g_locale_to_utf8 (data, size, &bytes_read, NULL, NULL))) { if (bytes_read == size) { goto beach; @@ -299,6 +309,7 @@ gst_tag_freeform_string_to_utf8 (const gchar * data, gint size, } /* Try ISO-8859-1 */ + GST_LOG ("Trying to convert freeform string using ISO-8859-1 fallback"); utf8 = g_convert (data, size, "UTF-8", "ISO-8859-1", &bytes_read, NULL, NULL); if (utf8 != NULL && bytes_read == size) { goto beach; @@ -310,8 +321,10 @@ gst_tag_freeform_string_to_utf8 (const gchar * data, gint size, beach: g_strchomp (utf8); - if (utf8 && utf8[0] != '\0') + if (utf8 && utf8[0] != '\0') { + GST_LOG ("Returning '%s'", utf8); return utf8; + } g_free (utf8); return NULL; diff --git a/tests/check/libs/tag.c b/tests/check/libs/tag.c index 2f3c5c8c4a..1cfd422d13 100644 --- a/tests/check/libs/tag.c +++ b/tests/check/libs/tag.c @@ -600,6 +600,80 @@ GST_START_TEST (test_id3_tags) GST_END_TEST; + +GST_START_TEST (test_id3v1_utf8_tag) +{ + const guint8 id3v1[128] = { + /* marker */ + 'T', 'A', 'G', + /* title (30 bytes) */ + 'D', 0xc3, 0xad, 'v', 'k', 'a', ' ', 's', + ' ', 'p', 'e', 'r', 'l', 'a', 'm', 'i', + ' ', 'v', 'e', ' ', 'v', 'l', 'a', 's', + 'e', 'c', 'h', 0, 0, 0, + /* artist (30 bytes) */ + 'A', 'l', 'e', 0xc5, 0xa1, ' ', 'B', 'r', 'i', 'c', 'h', 't', 'a', + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* album (30 bytes) */ + 'B', 'e', 's', 't', ' ', 'o', 'f', ' ', '(', 'P', 'r', 'o', 's', 't', + 0xc4, 0x9b, ' ', 0xc3, 0xba, 0xc5, 0xbe, 'a', 's', 'n', 0xc3, 0xbd, ')', + 0, 0, 0, + /* year (4 bytes) */ + '2', '0', '0', '0', + /* comment (28 bytes) */ + '-', '-', '-', ' ', 0xc4, 0x8d, 'e', 's', 'k', 0xc3, 0xa9, ' ', 'p', + 0xc3, 0xad, 's', 'n', 'i', 0xc4, 0x8d, 'k', 'y', ' ', '-', '-', '-', + 0, 0, + /* track number */ + 0, 0, + /* genre */ + 0x11 + }; + GstTagList *tags; + GDate *d; + gchar *s; + + /* set this, to make sure UTF-8 strings are really interpreted properly + * as UTF-8, regardless of the locale set */ + g_setenv ("GST_ID3V1_TAG_ENCODING", "WINDOWS-1250", TRUE); + + tags = gst_tag_list_new_from_id3v1 (id3v1); + fail_unless (tags != NULL); + + GST_LOG ("Got tags: %" GST_PTR_FORMAT, tags); + + s = NULL; + fail_unless (gst_tag_list_get_string (tags, GST_TAG_TITLE, &s)); + fail_unless (s != NULL); + fail_unless_equals_string (s, "Dívka s perlami ve vlasech"); + g_free (s); + + s = NULL; + fail_unless (gst_tag_list_get_string (tags, GST_TAG_ARTIST, &s)); + fail_unless (s != NULL); + fail_unless_equals_string (s, "Aleš Brichta"); + g_free (s); + + s = NULL; + fail_unless (gst_tag_list_get_string (tags, GST_TAG_ALBUM, &s)); + fail_unless (s != NULL); + fail_unless_equals_string (s, "Best of (Prostě úžasný)"); + g_free (s); + + d = NULL; + fail_unless (gst_tag_list_get_date (tags, GST_TAG_DATE, &d)); + fail_unless (d != NULL); + fail_unless_equals_int (g_date_get_year (d), 2000); + g_date_free (d); + d = NULL; + + gst_tag_list_free (tags); + + g_unsetenv ("GST_ID3V1_TAG_ENCODING"); +} + +GST_END_TEST; + static Suite * tag_suite (void) { @@ -611,6 +685,7 @@ tag_suite (void) tcase_add_test (tc_chain, test_parse_extended_comment); tcase_add_test (tc_chain, test_vorbis_tags); tcase_add_test (tc_chain, test_id3_tags); + tcase_add_test (tc_chain, test_id3v1_utf8_tag); return s; }