API: add gst_tag_freeform_string_to_utf8() (#405072).

Original commit message from CVS:
* docs/libs/gst-plugins-base-libs-sections.txt:
* gst-libs/gst/tag/tag.h:
* gst-libs/gst/tag/tags.c: (gst_tag_freeform_string_to_utf8):
API: add gst_tag_freeform_string_to_utf8() (#405072).
* gst-libs/gst/tag/gstid3tag.c: (gst_tag_extract_id3v1_string):
Use gst_tag_freeform_string_to_utf8() here.
This commit is contained in:
Tim-Philipp Müller 2007-04-12 12:19:20 +00:00
parent 8a6b8cfb37
commit a208469078
5 changed files with 113 additions and 58 deletions

View file

@ -1,3 +1,13 @@
2007-04-12 Tim-Philipp Müller <tim at centricular dot net>
* docs/libs/gst-plugins-base-libs-sections.txt:
* gst-libs/gst/tag/tag.h:
* gst-libs/gst/tag/tags.c: (gst_tag_freeform_string_to_utf8):
API: add gst_tag_freeform_string_to_utf8() (#405072).
* gst-libs/gst/tag/gstid3tag.c: (gst_tag_extract_id3v1_string):
Use gst_tag_freeform_string_to_utf8() here.
2007-04-12 Wim Taymans <wim@fluendo.com>
* gst/gdp/gstgdppay.c: (gst_gdp_pay_chain),

View file

@ -960,6 +960,7 @@ GST_TAG_CMML_CLIP
GST_TAG_CMML_HEAD
GST_TAG_CMML_STREAM
gst_tag_register_musicbrainz_tags
gst_tag_freeform_string_to_utf8
gst_tag_parse_extended_comment
GstTagImageType
<SUBSECTION Standard>

View file

@ -320,66 +320,14 @@ static void
gst_tag_extract_id3v1_string (GstTagList * list, const gchar * tag,
const gchar * start, const guint size)
{
const gchar *env;
gsize bytes_read;
const gchar *env_vars[] = { "GST_ID3V1_TAG_ENCODING",
"GST_ID3_TAG_ENCODING", "GST_TAG_ENCODING", NULL
};
gchar *utf8;
/* Should we try the charsets specified
* via environment variables FIRST ? */
if (g_utf8_validate (start, size, NULL)) {
utf8 = g_strndup (start, size);
goto beach;
}
utf8 = gst_tag_freeform_string_to_utf8 (start, size, env_vars);
env = g_getenv ("GST_ID3V1_TAG_ENCODING");
if (!env || *env == '\0')
env = g_getenv ("GST_ID3_TAG_ENCODING");
if (!env || *env == '\0')
env = g_getenv ("GST_TAG_ENCODING");
/* Try charsets specified via the environment */
if (env && *env != '\0') {
gchar **c, **csets;
csets = g_strsplit (env, G_SEARCHPATH_SEPARATOR_S, -1);
for (c = csets; c && *c; ++c) {
if ((utf8 =
g_convert (start, size, "UTF-8", *c, &bytes_read, NULL, NULL))) {
if (bytes_read == size) {
g_strfreev (csets);
goto beach;
}
g_free (utf8);
utf8 = NULL;
}
}
}
/* Try current locale (if not UTF-8) */
if (!g_get_charset (&env)) {
if ((utf8 = g_locale_to_utf8 (start, size, &bytes_read, NULL, NULL))) {
if (bytes_read == size) {
goto beach;
}
g_free (utf8);
utf8 = NULL;
}
}
/* Try ISO-8859-1 */
utf8 =
g_convert (start, size, "UTF-8", "ISO-8859-1", &bytes_read, NULL, NULL);
if (utf8 != NULL && bytes_read == size) {
goto beach;
}
g_free (utf8);
return;
beach:
g_strchomp (utf8);
if (utf8 && utf8[0] != '\0') {
if (utf8 && *utf8 != '\0') {
gst_tag_list_add (list, GST_TAG_MERGE_REPLACE, tag, utf8, NULL);
}

View file

@ -210,8 +210,12 @@ gboolean gst_tag_parse_extended_comment (const gchar * ext_comme
gchar ** value,
gboolean fail_if_no_key);
gchar * gst_tag_freeform_string_to_utf8 (const gchar * data,
gint size,
const gchar ** env_vars);
/* FIXME 0.11: replace with a more general gst_tag_library_init() */
void gst_tag_register_musicbrainz_tags (void);
void gst_tag_register_musicbrainz_tags (void);
G_END_DECLS

View file

@ -222,3 +222,95 @@ gst_tag_parse_extended_comment (const gchar * ext_comment, gchar ** key,
return TRUE;
}
/**
* gst_tag_freeform_string_to_utf8:
* @data: string data
* @size: length of string data, or -1 if the string is NUL-terminated
* @env_vars: a NULL-terminated string array of environment variable names,
* or NULL
*
* Convenience function to read a string with unknown character encoding. If
* the string is already in UTF-8 encoding, it will be returned right away.
* Otherwise, the environment will be searched for a number of environment
* variables (whose names are specified in the NULL-terminated string array
* @env_vars) containing a list of character encodings to try/use. If none
* are specified, the current locale will be tried. If that also doesn't work,
* ISO-8859-1 is assumed (which will almost always succeed).
*
* Returns: a newly-allocated string in UTF-8 encoding, or NULL
*
* Since: 0.10.13
*/
gchar *
gst_tag_freeform_string_to_utf8 (const gchar * data, gint size,
const gchar ** env_vars)
{
const gchar *env = NULL;
gsize bytes_read;
gchar *utf8 = NULL;
g_return_val_if_fail (data != NULL, NULL);
if (size < 0)
size = strlen (data);
/* Should we try the charsets specified
* via environment variables FIRST ? */
if (g_utf8_validate (data, size, NULL))
return g_strndup (data, size);
while ((env == NULL || *env == '\0') && env_vars && *env_vars != NULL) {
env = g_getenv (*env_vars);
++env_vars;
}
/* Try charsets specified via the environment */
if (env != NULL && *env != '\0') {
gchar **c, **csets;
csets = g_strsplit (env, G_SEARCHPATH_SEPARATOR_S, -1);
for (c = csets; c && *c; ++c) {
if ((utf8 = g_convert (data, size, "UTF-8", *c, &bytes_read, NULL, NULL))) {
if (bytes_read == size) {
g_strfreev (csets);
goto beach;
}
g_free (utf8);
utf8 = NULL;
}
}
g_strfreev (csets);
}
/* Try current locale (if not UTF-8) */
if (!g_get_charset (&env)) {
if ((utf8 = g_locale_to_utf8 (data, size, &bytes_read, NULL, NULL))) {
if (bytes_read == size) {
goto beach;
}
g_free (utf8);
utf8 = NULL;
}
}
/* Try ISO-8859-1 */
utf8 = g_convert (data, size, "UTF-8", "ISO-8859-1", &bytes_read, NULL, NULL);
if (utf8 != NULL && bytes_read == size) {
goto beach;
}
g_free (utf8);
return NULL;
beach:
g_strchomp (utf8);
if (utf8 && utf8[0] != '\0')
return utf8;
g_free (utf8);
return NULL;
}