Check environment variables GST_ID3V2_TAG_ENCODING,

Original commit message from CVS:
Check environment variables GST_ID3V2_TAG_ENCODING,
GST_ID3_TAG_ENCODING and GST_TAG_ENCODING for a colon-separated
list of character encodings to force interpretation of non-unicode
strings stored in an ID3v2 tag to a particular encoding. If none
is specified, try to use current locale's encoding, then fall back
to ISO-8859-1 (which will always succeed). (Resolves #149274)

Check environment variables GST_ID3V1_TAG_ENCODING,
GST_ID3_TAG_ENCODING and GST_TAG_ENCODING for a colon-separated
list of character encodings to use in case a string encountered
in an ID3v1 tag is not valid UTF-8 already. If no encoding is
specified, try to use the current locale's encoding, then fall
back to ISO-8859-1 (which will always succeed).
This commit is contained in:
Tim-Philipp Müller 2005-01-26 12:38:02 +00:00
parent b5f4c4dd65
commit 37eab7f284
2 changed files with 89 additions and 28 deletions

View file

@ -1,3 +1,22 @@
2005-01-26 Tim-Philipp Müller <tim at centricular dot net>
* ext/mad/gstid3tag.c: (mad_id3_parse_latin1_string),
(mad_id3_parse_comment_frame), (gst_mad_id3_to_tag_list):
Check environment variables GST_ID3V2_TAG_ENCODING,
GST_ID3_TAG_ENCODING and GST_TAG_ENCODING for a colon-separated
list of character encodings to force interpretation of non-unicode
strings stored in an ID3v2 tag to a particular encoding. If none
is specified, try to use current locale's encoding, then fall back
to ISO-8859-1 (which will always succeed). (Resolves #149274)
* gst/tags/gstid3tag.c: (gst_tag_from_id3_tag),
(gst_tag_extract_id3v1_string), (gst_tag_list_new_from_id3v1):
Check environment variables GST_ID3V1_TAG_ENCODING,
GST_ID3_TAG_ENCODING and GST_TAG_ENCODING for a colon-separated
list of character encodings to use in case a string encountered
in an ID3v1 tag is not valid UTF-8 already. If no encoding is
specified, try to use the current locale's encoding, then fall
back to ISO-8859-1 (which will always succeed).
2005-01-25 Benjamin Otte <otte@gnome.org>
* ext/mad/gstmad.c: (gst_mad_check_caps_reset), (gst_mad_chain):

View file

@ -208,7 +208,7 @@ gst_tag_from_id3_tag (const gchar * id3_tag)
g_return_val_if_fail (id3_tag != NULL, NULL);
while (tag_matches[i].gstreamer_tag != NULL) {
if (strcmp (id3_tag, tag_matches[i].original_tag) == 0) {
if (strncmp (id3_tag, tag_matches[i].original_tag, 5) == 0) {
break;
}
i++;
@ -239,34 +239,76 @@ gst_tag_to_id3_tag (const gchar * gst_tag)
}
return NULL;
}
static void
gst_tag_extract (GstTagList * list, const gchar * tag, const gchar * start,
const guint size)
{
gsize bytes_read;
gchar *conv;
/* FIXME: better charset detection? */
static void
gst_tag_extract_id3v1_string (GstTagList * list, const gchar * tag,
const gchar * start, const guint size)
{
const gchar *env;
gsize bytes_read;
gchar *utf8;
/* Should we try the charsets specified
* via environment variables FIRST ? */
if (g_utf8_validate (start, size, NULL)) {
conv = g_strchomp (g_strndup (start, size));
} else {
conv = g_locale_to_utf8 (start, size, &bytes_read, NULL, NULL);
if (bytes_read != size) {
g_free (conv);
conv =
g_convert (start, size, "UTF-8", "ISO-8859-1", &bytes_read, NULL,
NULL);
if (bytes_read != size) {
g_free (conv);
return;
utf8 = g_strndup (start, size);
goto beach;
}
env = g_getenv ("GST_ID3V1_TAG_ENCODING");
if (!env || *env == '\0')
env = g_getenv ("GST_ID3_TAG_ENCODING");
if (!env || *env == '\0')
env = g_getenv ("GST_TAG_ENCODING");
/* Try charsets specified via the environment */
if (env && *env != '\0') {
gchar **c, **csets;
csets = g_strsplit (env, G_SEARCHPATH_SEPARATOR_S, -1);
for (c = csets; c && *c; ++c) {
if ((utf8 =
g_convert (start, size, "UTF-8", *c, &bytes_read, NULL, NULL))) {
if (bytes_read == size) {
g_strfreev (csets);
goto beach;
}
g_free (utf8);
utf8 = NULL;
}
}
conv = g_strchomp (conv);
}
if (conv[0] != '\0') {
gst_tag_list_add (list, GST_TAG_MERGE_REPLACE, tag, conv, NULL);
/* Try current locale (if not UTF-8) */
if (!g_get_charset (&env)) {
if ((utf8 = g_locale_to_utf8 (start, size, &bytes_read, NULL, NULL))) {
if (bytes_read == size) {
goto beach;
}
g_free (utf8);
utf8 = NULL;
}
}
g_free (conv);
/* Try ISO-8859-1 */
utf8 =
g_convert (start, size, "UTF-8", "ISO-8859-1", &bytes_read, NULL, NULL);
if (utf8 != NULL && bytes_read == size) {
goto beach;
}
g_free (utf8);
return;
beach:
g_strchomp (utf8);
if (utf8 && utf8[0] != '\0') {
gst_tag_list_add (list, GST_TAG_MERGE_REPLACE, tag, utf8, NULL);
}
g_free (utf8);
}
/**
@ -290,9 +332,9 @@ gst_tag_list_new_from_id3v1 (const guint8 * data)
if (data[0] != 'T' || data[1] != 'A' || data[2] != 'G')
return NULL;
list = gst_tag_list_new ();
gst_tag_extract (list, GST_TAG_TITLE, &data[3], 30);
gst_tag_extract (list, GST_TAG_ARTIST, &data[33], 30);
gst_tag_extract (list, GST_TAG_ALBUM, &data[63], 30);
gst_tag_extract_id3v1_string (list, GST_TAG_TITLE, &data[3], 30);
gst_tag_extract_id3v1_string (list, GST_TAG_ARTIST, &data[33], 30);
gst_tag_extract_id3v1_string (list, GST_TAG_ALBUM, &data[63], 30);
ystr = g_strndup (&data[93], 4);
year = strtoul (ystr, NULL, 10);
g_free (ystr);
@ -304,11 +346,11 @@ gst_tag_list_new_from_id3v1 (const guint8 * data)
gst_tag_list_add (list, GST_TAG_MERGE_REPLACE, GST_TAG_DATE, year, NULL);
}
if (data[125] == 0) {
gst_tag_extract (list, GST_TAG_COMMENT, &data[97], 28);
gst_tag_extract_id3v1_string (list, GST_TAG_COMMENT, &data[97], 28);
gst_tag_list_add (list, GST_TAG_MERGE_REPLACE, GST_TAG_TRACK_NUMBER,
(guint) data[126], NULL);
} else {
gst_tag_extract (list, GST_TAG_COMMENT, &data[97], 30);
gst_tag_extract_id3v1_string (list, GST_TAG_COMMENT, &data[97], 30);
}
if (data[127] < gst_tag_id3_genre_count ()) {
gst_tag_list_add (list, GST_TAG_MERGE_REPLACE, GST_TAG_GENRE,