mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-06-05 06:58:56 +00:00
Check environment variables GST_ID3V2_TAG_ENCODING,
Original commit message from CVS: Check environment variables GST_ID3V2_TAG_ENCODING, GST_ID3_TAG_ENCODING and GST_TAG_ENCODING for a colon-separated list of character encodings to force interpretation of non-unicode strings stored in an ID3v2 tag to a particular encoding. If none is specified, try to use current locale's encoding, then fall back to ISO-8859-1 (which will always succeed). (Resolves #149274) Check environment variables GST_ID3V1_TAG_ENCODING, GST_ID3_TAG_ENCODING and GST_TAG_ENCODING for a colon-separated list of character encodings to use in case a string encountered in an ID3v1 tag is not valid UTF-8 already. If no encoding is specified, try to use the current locale's encoding, then fall back to ISO-8859-1 (which will always succeed).
This commit is contained in:
parent
804052d1ef
commit
913dd3f78e
2 changed files with 147 additions and 28 deletions
19
ChangeLog
19
ChangeLog
|
@ -1,3 +1,22 @@
|
||||||
|
2005-01-26 Tim-Philipp Müller <tim at centricular dot net>
|
||||||
|
|
||||||
|
* ext/mad/gstid3tag.c: (mad_id3_parse_latin1_string),
|
||||||
|
(mad_id3_parse_comment_frame), (gst_mad_id3_to_tag_list):
|
||||||
|
Check environment variables GST_ID3V2_TAG_ENCODING,
|
||||||
|
GST_ID3_TAG_ENCODING and GST_TAG_ENCODING for a colon-separated
|
||||||
|
list of character encodings to force interpretation of non-unicode
|
||||||
|
strings stored in an ID3v2 tag to a particular encoding. If none
|
||||||
|
is specified, try to use current locale's encoding, then fall back
|
||||||
|
to ISO-8859-1 (which will always succeed). (Resolves #149274)
|
||||||
|
* gst/tags/gstid3tag.c: (gst_tag_from_id3_tag),
|
||||||
|
(gst_tag_extract_id3v1_string), (gst_tag_list_new_from_id3v1):
|
||||||
|
Check environment variables GST_ID3V1_TAG_ENCODING,
|
||||||
|
GST_ID3_TAG_ENCODING and GST_TAG_ENCODING for a colon-separated
|
||||||
|
list of character encodings to use in case a string encountered
|
||||||
|
in an ID3v1 tag is not valid UTF-8 already. If no encoding is
|
||||||
|
specified, try to use the current locale's encoding, then fall
|
||||||
|
back to ISO-8859-1 (which will always succeed).
|
||||||
|
|
||||||
2005-01-25 Benjamin Otte <otte@gnome.org>
|
2005-01-25 Benjamin Otte <otte@gnome.org>
|
||||||
|
|
||||||
* ext/mad/gstmad.c: (gst_mad_check_caps_reset), (gst_mad_chain):
|
* ext/mad/gstmad.c: (gst_mad_check_caps_reset), (gst_mad_chain):
|
||||||
|
|
|
@ -522,6 +522,113 @@ gst_id3_tag_src_event (GstPad * pad, GstEvent * event)
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static id3_utf8_t *
|
||||||
|
mad_id3_parse_latin1_string (const id3_ucs4_t * ucs4)
|
||||||
|
{
|
||||||
|
gsize bytes_read, size;
|
||||||
|
const gchar *env;
|
||||||
|
char *latin1, *ret = NULL;
|
||||||
|
|
||||||
|
latin1 = id3_ucs4_latin1duplicate (ucs4);
|
||||||
|
if (latin1 == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
size = strlen (latin1);
|
||||||
|
|
||||||
|
env = g_getenv ("GST_ID3V2_TAG_ENCODING");
|
||||||
|
if (!env || *env == '\0')
|
||||||
|
env = g_getenv ("GST_ID3_TAG_ENCODING");
|
||||||
|
if (!env || *env == '\0')
|
||||||
|
env = g_getenv ("GST_TAG_ENCODING");
|
||||||
|
|
||||||
|
if (env && *env != '\0') {
|
||||||
|
gchar **c, **csets;
|
||||||
|
|
||||||
|
csets = g_strsplit (env, G_SEARCHPATH_SEPARATOR_S, -1);
|
||||||
|
|
||||||
|
for (c = csets; !ret && c && *c; ++c) {
|
||||||
|
gchar *utf8;
|
||||||
|
|
||||||
|
if ((utf8 =
|
||||||
|
g_convert (latin1, size, "UTF-8", *c, &bytes_read, NULL, NULL))) {
|
||||||
|
if (bytes_read == size) {
|
||||||
|
ret = strdup (utf8);
|
||||||
|
}
|
||||||
|
g_free (utf8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
g_strfreev (csets);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Try current locale (if not UTF-8). Should we really do this?
|
||||||
|
* What if the tag is really correct and in ISO-8859-1 and the
|
||||||
|
* current locale is some other charset where the full byte range
|
||||||
|
* is valid? In those cases ISO-8859-1 would have to be put into
|
||||||
|
* one of the above environment variables. Do the most common
|
||||||
|
* non-Western and non-UTF8 character sets modify only the range
|
||||||
|
* from 0x80-0xff, so that ASCII is still covered at least?) */
|
||||||
|
if (!ret && !g_get_charset (&env)) {
|
||||||
|
gchar *utf8;
|
||||||
|
|
||||||
|
if ((utf8 = g_locale_to_utf8 (latin1, size, &bytes_read, NULL, NULL))) {
|
||||||
|
if (bytes_read == size) {
|
||||||
|
ret = strdup (utf8);
|
||||||
|
}
|
||||||
|
g_free (utf8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Try ISO-8859-1 (this conversion should always suceed) */
|
||||||
|
if (!ret) {
|
||||||
|
gchar *utf8;
|
||||||
|
|
||||||
|
utf8 =
|
||||||
|
g_convert (latin1, size, "UTF-8", "ISO-8859-1", &bytes_read, NULL,
|
||||||
|
NULL);
|
||||||
|
if (utf8 != NULL && bytes_read == size) {
|
||||||
|
ret = strdup (utf8);
|
||||||
|
}
|
||||||
|
g_free (utf8);
|
||||||
|
}
|
||||||
|
|
||||||
|
free (latin1);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
mad_id3_parse_comment_frame (GstTagList * tlist, const struct id3_frame *frame)
|
||||||
|
{
|
||||||
|
const id3_ucs4_t *ucs4;
|
||||||
|
id3_utf8_t *utf8;
|
||||||
|
|
||||||
|
g_assert (frame->nfields >= 4);
|
||||||
|
|
||||||
|
ucs4 = id3_field_getfullstring (&frame->fields[3]);
|
||||||
|
g_assert (ucs4);
|
||||||
|
|
||||||
|
if (frame->fields[0].type == ID3_FIELD_TYPE_TEXTENCODING
|
||||||
|
&& frame->fields[0].number.value == ID3_FIELD_TEXTENCODING_ISO_8859_1) {
|
||||||
|
utf8 = mad_id3_parse_latin1_string (ucs4);
|
||||||
|
} else {
|
||||||
|
utf8 = id3_ucs4_utf8duplicate (ucs4);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (utf8 == NULL)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (!g_utf8_validate (utf8, -1, NULL)) {
|
||||||
|
g_warning ("converted string is not valid utf-8");
|
||||||
|
g_free (utf8);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
g_strchomp (utf8);
|
||||||
|
|
||||||
|
gst_tag_list_add (tlist, GST_TAG_MERGE_APPEND, GST_TAG_COMMENT, utf8, NULL);
|
||||||
|
|
||||||
|
g_free (utf8);
|
||||||
|
}
|
||||||
|
|
||||||
GstTagList *
|
GstTagList *
|
||||||
gst_mad_id3_to_tag_list (const struct id3_tag * tag)
|
gst_mad_id3_to_tag_list (const struct id3_tag * tag)
|
||||||
{
|
{
|
||||||
|
@ -534,52 +641,45 @@ gst_mad_id3_to_tag_list (const struct id3_tag * tag)
|
||||||
tag_list = gst_tag_list_new ();
|
tag_list = gst_tag_list_new ();
|
||||||
|
|
||||||
while ((frame = id3_tag_findframe (tag, NULL, i++)) != NULL) {
|
while ((frame = id3_tag_findframe (tag, NULL, i++)) != NULL) {
|
||||||
const union id3_field *field;
|
const union id3_field *field, *encfield;
|
||||||
unsigned int nstrings, j;
|
unsigned int nstrings, j;
|
||||||
const gchar *tag_name;
|
const gchar *tag_name;
|
||||||
|
|
||||||
/* find me the function to query the frame id */
|
tag_name = gst_tag_from_id3_tag (frame->id);
|
||||||
gchar *id = g_strndup (frame->id, 5);
|
if (tag_name == NULL)
|
||||||
|
continue;
|
||||||
|
|
||||||
tag_name = gst_tag_from_id3_tag (id);
|
if (strncmp (frame->id, "COMM", 5) == 0) {
|
||||||
if (tag_name == NULL) {
|
mad_id3_parse_comment_frame (tag_list, frame);
|
||||||
g_free (id);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (strcmp (id, "COMM") == 0) {
|
if (frame->id[0] != 'T') {
|
||||||
ucs4 = id3_field_getfullstring (&frame->fields[3]);
|
g_warning ("don't know how to parse ID3v2 frame with ID '%s'", frame->id);
|
||||||
g_assert (ucs4);
|
|
||||||
|
|
||||||
utf8 = id3_ucs4_utf8duplicate (ucs4);
|
|
||||||
if (utf8 == 0)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (!g_utf8_validate (utf8, -1, NULL)) {
|
|
||||||
g_warning ("converted string is not valid utf-8");
|
|
||||||
g_free (utf8);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND,
|
|
||||||
GST_TAG_COMMENT, utf8, NULL);
|
|
||||||
|
|
||||||
g_free (utf8);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
g_assert (frame->nfields >= 2);
|
||||||
|
|
||||||
field = &frame->fields[1];
|
field = &frame->fields[1];
|
||||||
nstrings = id3_field_getnstrings (field);
|
nstrings = id3_field_getnstrings (field);
|
||||||
|
encfield = &frame->fields[0];
|
||||||
|
|
||||||
for (j = 0; j < nstrings; ++j) {
|
for (j = 0; j < nstrings; ++j) {
|
||||||
ucs4 = id3_field_getstrings (field, j);
|
ucs4 = id3_field_getstrings (field, j);
|
||||||
g_assert (ucs4);
|
g_assert (ucs4);
|
||||||
|
|
||||||
if (strcmp (id, ID3_FRAME_GENRE) == 0)
|
if (strncmp (frame->id, ID3_FRAME_GENRE, 5) == 0)
|
||||||
ucs4 = id3_genre_name (ucs4);
|
ucs4 = id3_genre_name (ucs4);
|
||||||
|
|
||||||
utf8 = id3_ucs4_utf8duplicate (ucs4);
|
if (encfield->type == ID3_FIELD_TYPE_TEXTENCODING
|
||||||
if (utf8 == 0)
|
&& encfield->number.value == ID3_FIELD_TEXTENCODING_ISO_8859_1) {
|
||||||
|
utf8 = mad_id3_parse_latin1_string (ucs4);
|
||||||
|
} else {
|
||||||
|
utf8 = id3_ucs4_utf8duplicate (ucs4);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (utf8 == NULL)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (!g_utf8_validate (utf8, -1, NULL)) {
|
if (!g_utf8_validate (utf8, -1, NULL)) {
|
||||||
|
@ -654,13 +754,13 @@ gst_mad_id3_to_tag_list (const struct id3_tag * tag)
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
g_assert (gst_tag_get_type (tag_name) == G_TYPE_STRING);
|
g_assert (gst_tag_get_type (tag_name) == G_TYPE_STRING);
|
||||||
|
g_strchomp (utf8);
|
||||||
gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND, tag_name, utf8,
|
gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND, tag_name, utf8,
|
||||||
NULL);
|
NULL);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
free (utf8);
|
free (utf8);
|
||||||
}
|
}
|
||||||
g_free (id);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return tag_list;
|
return tag_list;
|
||||||
|
|
Loading…
Reference in a new issue