mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-02-02 12:32:29 +00:00
Check environment variables GST_ID3V2_TAG_ENCODING,
Original commit message from CVS: Check environment variables GST_ID3V2_TAG_ENCODING, GST_ID3_TAG_ENCODING and GST_TAG_ENCODING for a colon-separated list of character encodings to force interpretation of non-unicode strings stored in an ID3v2 tag to a particular encoding. If none is specified, try to use current locale's encoding, then fall back to ISO-8859-1 (which will always succeed). (Resolves #149274) Check environment variables GST_ID3V1_TAG_ENCODING, GST_ID3_TAG_ENCODING and GST_TAG_ENCODING for a colon-separated list of character encodings to use in case a string encountered in an ID3v1 tag is not valid UTF-8 already. If no encoding is specified, try to use the current locale's encoding, then fall back to ISO-8859-1 (which will always succeed).
This commit is contained in:
parent
804052d1ef
commit
913dd3f78e
2 changed files with 147 additions and 28 deletions
19
ChangeLog
19
ChangeLog
|
@ -1,3 +1,22 @@
|
|||
2005-01-26 Tim-Philipp Müller <tim at centricular dot net>
|
||||
|
||||
* ext/mad/gstid3tag.c: (mad_id3_parse_latin1_string),
|
||||
(mad_id3_parse_comment_frame), (gst_mad_id3_to_tag_list):
|
||||
Check environment variables GST_ID3V2_TAG_ENCODING,
|
||||
GST_ID3_TAG_ENCODING and GST_TAG_ENCODING for a colon-separated
|
||||
list of character encodings to force interpretation of non-unicode
|
||||
strings stored in an ID3v2 tag to a particular encoding. If none
|
||||
is specified, try to use current locale's encoding, then fall back
|
||||
to ISO-8859-1 (which will always succeed). (Resolves #149274)
|
||||
* gst/tags/gstid3tag.c: (gst_tag_from_id3_tag),
|
||||
(gst_tag_extract_id3v1_string), (gst_tag_list_new_from_id3v1):
|
||||
Check environment variables GST_ID3V1_TAG_ENCODING,
|
||||
GST_ID3_TAG_ENCODING and GST_TAG_ENCODING for a colon-separated
|
||||
list of character encodings to use in case a string encountered
|
||||
in an ID3v1 tag is not valid UTF-8 already. If no encoding is
|
||||
specified, try to use the current locale's encoding, then fall
|
||||
back to ISO-8859-1 (which will always succeed).
|
||||
|
||||
2005-01-25 Benjamin Otte <otte@gnome.org>
|
||||
|
||||
* ext/mad/gstmad.c: (gst_mad_check_caps_reset), (gst_mad_chain):
|
||||
|
|
|
@ -522,6 +522,113 @@ gst_id3_tag_src_event (GstPad * pad, GstEvent * event)
|
|||
return FALSE;
|
||||
}
|
||||
|
||||
static id3_utf8_t *
|
||||
mad_id3_parse_latin1_string (const id3_ucs4_t * ucs4)
|
||||
{
|
||||
gsize bytes_read, size;
|
||||
const gchar *env;
|
||||
char *latin1, *ret = NULL;
|
||||
|
||||
latin1 = id3_ucs4_latin1duplicate (ucs4);
|
||||
if (latin1 == NULL)
|
||||
return NULL;
|
||||
|
||||
size = strlen (latin1);
|
||||
|
||||
env = g_getenv ("GST_ID3V2_TAG_ENCODING");
|
||||
if (!env || *env == '\0')
|
||||
env = g_getenv ("GST_ID3_TAG_ENCODING");
|
||||
if (!env || *env == '\0')
|
||||
env = g_getenv ("GST_TAG_ENCODING");
|
||||
|
||||
if (env && *env != '\0') {
|
||||
gchar **c, **csets;
|
||||
|
||||
csets = g_strsplit (env, G_SEARCHPATH_SEPARATOR_S, -1);
|
||||
|
||||
for (c = csets; !ret && c && *c; ++c) {
|
||||
gchar *utf8;
|
||||
|
||||
if ((utf8 =
|
||||
g_convert (latin1, size, "UTF-8", *c, &bytes_read, NULL, NULL))) {
|
||||
if (bytes_read == size) {
|
||||
ret = strdup (utf8);
|
||||
}
|
||||
g_free (utf8);
|
||||
}
|
||||
}
|
||||
g_strfreev (csets);
|
||||
}
|
||||
|
||||
/* Try current locale (if not UTF-8). Should we really do this?
|
||||
* What if the tag is really correct and in ISO-8859-1 and the
|
||||
* current locale is some other charset where the full byte range
|
||||
* is valid? In those cases ISO-8859-1 would have to be put into
|
||||
* one of the above environment variables. Do the most common
|
||||
* non-Western and non-UTF8 character sets modify only the range
|
||||
* from 0x80-0xff, so that ASCII is still covered at least?) */
|
||||
if (!ret && !g_get_charset (&env)) {
|
||||
gchar *utf8;
|
||||
|
||||
if ((utf8 = g_locale_to_utf8 (latin1, size, &bytes_read, NULL, NULL))) {
|
||||
if (bytes_read == size) {
|
||||
ret = strdup (utf8);
|
||||
}
|
||||
g_free (utf8);
|
||||
}
|
||||
}
|
||||
|
||||
/* Try ISO-8859-1 (this conversion should always suceed) */
|
||||
if (!ret) {
|
||||
gchar *utf8;
|
||||
|
||||
utf8 =
|
||||
g_convert (latin1, size, "UTF-8", "ISO-8859-1", &bytes_read, NULL,
|
||||
NULL);
|
||||
if (utf8 != NULL && bytes_read == size) {
|
||||
ret = strdup (utf8);
|
||||
}
|
||||
g_free (utf8);
|
||||
}
|
||||
|
||||
free (latin1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
mad_id3_parse_comment_frame (GstTagList * tlist, const struct id3_frame *frame)
|
||||
{
|
||||
const id3_ucs4_t *ucs4;
|
||||
id3_utf8_t *utf8;
|
||||
|
||||
g_assert (frame->nfields >= 4);
|
||||
|
||||
ucs4 = id3_field_getfullstring (&frame->fields[3]);
|
||||
g_assert (ucs4);
|
||||
|
||||
if (frame->fields[0].type == ID3_FIELD_TYPE_TEXTENCODING
|
||||
&& frame->fields[0].number.value == ID3_FIELD_TEXTENCODING_ISO_8859_1) {
|
||||
utf8 = mad_id3_parse_latin1_string (ucs4);
|
||||
} else {
|
||||
utf8 = id3_ucs4_utf8duplicate (ucs4);
|
||||
}
|
||||
|
||||
if (utf8 == NULL)
|
||||
return;
|
||||
|
||||
if (!g_utf8_validate (utf8, -1, NULL)) {
|
||||
g_warning ("converted string is not valid utf-8");
|
||||
g_free (utf8);
|
||||
return;
|
||||
}
|
||||
|
||||
g_strchomp (utf8);
|
||||
|
||||
gst_tag_list_add (tlist, GST_TAG_MERGE_APPEND, GST_TAG_COMMENT, utf8, NULL);
|
||||
|
||||
g_free (utf8);
|
||||
}
|
||||
|
||||
GstTagList *
|
||||
gst_mad_id3_to_tag_list (const struct id3_tag * tag)
|
||||
{
|
||||
|
@ -534,52 +641,45 @@ gst_mad_id3_to_tag_list (const struct id3_tag * tag)
|
|||
tag_list = gst_tag_list_new ();
|
||||
|
||||
while ((frame = id3_tag_findframe (tag, NULL, i++)) != NULL) {
|
||||
const union id3_field *field;
|
||||
const union id3_field *field, *encfield;
|
||||
unsigned int nstrings, j;
|
||||
const gchar *tag_name;
|
||||
|
||||
/* find me the function to query the frame id */
|
||||
gchar *id = g_strndup (frame->id, 5);
|
||||
tag_name = gst_tag_from_id3_tag (frame->id);
|
||||
if (tag_name == NULL)
|
||||
continue;
|
||||
|
||||
tag_name = gst_tag_from_id3_tag (id);
|
||||
if (tag_name == NULL) {
|
||||
g_free (id);
|
||||
if (strncmp (frame->id, "COMM", 5) == 0) {
|
||||
mad_id3_parse_comment_frame (tag_list, frame);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (strcmp (id, "COMM") == 0) {
|
||||
ucs4 = id3_field_getfullstring (&frame->fields[3]);
|
||||
g_assert (ucs4);
|
||||
|
||||
utf8 = id3_ucs4_utf8duplicate (ucs4);
|
||||
if (utf8 == 0)
|
||||
continue;
|
||||
|
||||
if (!g_utf8_validate (utf8, -1, NULL)) {
|
||||
g_warning ("converted string is not valid utf-8");
|
||||
g_free (utf8);
|
||||
continue;
|
||||
}
|
||||
|
||||
gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND,
|
||||
GST_TAG_COMMENT, utf8, NULL);
|
||||
|
||||
g_free (utf8);
|
||||
if (frame->id[0] != 'T') {
|
||||
g_warning ("don't know how to parse ID3v2 frame with ID '%s'", frame->id);
|
||||
continue;
|
||||
}
|
||||
|
||||
g_assert (frame->nfields >= 2);
|
||||
|
||||
field = &frame->fields[1];
|
||||
nstrings = id3_field_getnstrings (field);
|
||||
encfield = &frame->fields[0];
|
||||
|
||||
for (j = 0; j < nstrings; ++j) {
|
||||
ucs4 = id3_field_getstrings (field, j);
|
||||
g_assert (ucs4);
|
||||
|
||||
if (strcmp (id, ID3_FRAME_GENRE) == 0)
|
||||
if (strncmp (frame->id, ID3_FRAME_GENRE, 5) == 0)
|
||||
ucs4 = id3_genre_name (ucs4);
|
||||
|
||||
utf8 = id3_ucs4_utf8duplicate (ucs4);
|
||||
if (utf8 == 0)
|
||||
if (encfield->type == ID3_FIELD_TYPE_TEXTENCODING
|
||||
&& encfield->number.value == ID3_FIELD_TEXTENCODING_ISO_8859_1) {
|
||||
utf8 = mad_id3_parse_latin1_string (ucs4);
|
||||
} else {
|
||||
utf8 = id3_ucs4_utf8duplicate (ucs4);
|
||||
}
|
||||
|
||||
if (utf8 == NULL)
|
||||
continue;
|
||||
|
||||
if (!g_utf8_validate (utf8, -1, NULL)) {
|
||||
|
@ -654,13 +754,13 @@ gst_mad_id3_to_tag_list (const struct id3_tag * tag)
|
|||
}
|
||||
default:
|
||||
g_assert (gst_tag_get_type (tag_name) == G_TYPE_STRING);
|
||||
g_strchomp (utf8);
|
||||
gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND, tag_name, utf8,
|
||||
NULL);
|
||||
break;
|
||||
}
|
||||
free (utf8);
|
||||
}
|
||||
g_free (id);
|
||||
}
|
||||
|
||||
return tag_list;
|
||||
|
|
Loading…
Reference in a new issue