From af6ea56cce207a432728ac6b9d6a31d60d1595cd Mon Sep 17 00:00:00 2001 From: Michael Smith Date: Thu, 10 Sep 2009 12:12:26 -0700 Subject: [PATCH] id3tag: When writing id3v2.3, do not use UTF-8. UTF-8 is only permitted in v2.4. So instead use ISO-8859-1 for ascii-only strings, and UTF16 otherwise. Also, do not null terminate strings in text frames, except where required. These two allow windows media player to play (and correctly read tags) files created by id3mux. --- gst/id3tag/id3tag.c | 128 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 100 insertions(+), 28 deletions(-) diff --git a/gst/id3tag/id3tag.c b/gst/id3tag/id3tag.c index 43c7635a32..7af74fc65c 100644 --- a/gst/id3tag/id3tag.c +++ b/gst/id3tag/id3tag.c @@ -142,7 +142,9 @@ typedef struct typedef void (*GstId3v2AddTagFunc) (GstId3v2Tag * tag, const GstTagList * list, const gchar * gst_tag, guint num_tags, const gchar * data); -#define ID3V2_ENCODING_UTF8 0x03 +#define ID3V2_ENCODING_ISO_8859_1 0x00 +#define ID3V2_ENCODING_UTF16_BOM 0x01 +#define ID3V2_ENCODING_UTF8 0x03 static gboolean id3v2_tag_init (GstId3v2Tag * tag, guint major_version); static void id3v2_tag_unset (GstId3v2Tag * tag); @@ -320,12 +322,76 @@ id3v2_frame_unset (GstId3v2Frame * frame) memset (frame, 0, sizeof (GstId3v2Frame)); } +static gboolean +id3v2_string_is_ascii (const gchar * string) +{ + while (*string) { + if (!g_ascii_isprint (*string++)) + return FALSE; + } + + return TRUE; +} + +static int +id3v2_tag_string_encoding (GstId3v2Tag * tag, const gchar * string) +{ + int encoding; + if (tag->major_version == 4) { + /* ID3v2.4 supports UTF8, use it unconditionally as it's really the only + sensible encoding. */ + encoding = ID3V2_ENCODING_UTF8; + } else { + /* If we're not writing v2.4, then check to see if it's ASCII. + If it is, write ISO-8859-1 (compatible with ASCII). + Otherwise, write UTF-16-LE with a byte order marker. + Note that we don't write arbitrary ISO-8859-1 as ISO-8859-1, because much + software misuses this - and non-ASCII might confuse it. */ + if (id3v2_string_is_ascii (string)) + encoding = ID3V2_ENCODING_ISO_8859_1; + else + encoding = ID3V2_ENCODING_UTF16_BOM; + } + + return encoding; +} + +static void +id3v2_frame_write_string (GstId3v2Frame * frame, int encoding, + const gchar * string, gboolean null_terminate) +{ + int terminator_length; + if (encoding == ID3V2_ENCODING_UTF16_BOM) { + gsize utf16len; + /* This converts to little-endian UTF-16, WITH a byte-order-marker */ + gchar *utf16 = g_convert (string, -1, "UTF-16LE", "UTF-8", + NULL, &utf16len, NULL); + if (!utf16) { + GST_WARNING ("Failed to convert UTF-8 to UTF-16LE"); + return; + } + + /* NUL terminator is 2 bytes, if present */ + terminator_length = null_terminate ? 2 : 0; + id3v2_frame_write_bytes (frame, (const guint8 *) utf16, + utf16len + terminator_length); + + g_free (utf16); + } else { + /* write NUL terminator as well if requested */ + terminator_length = null_terminate ? 1 : 0; + id3v2_frame_write_bytes (frame, (const guint8 *) string, + strlen (string) + terminator_length); + } +} + static void id3v2_tag_add_text_frame (GstId3v2Tag * tag, const gchar * frame_id, gchar ** strings_utf8, int num_strings) { GstId3v2Frame frame; guint len, i; + int encoding; if (num_strings < 1 || strings_utf8 == NULL || strings_utf8[0] == NULL) { GST_LOG ("Not adding text frame, no strings"); @@ -333,7 +399,9 @@ id3v2_tag_add_text_frame (GstId3v2Tag * tag, const gchar * frame_id, } id3v2_frame_init (&frame, frame_id, 0); - id3v2_frame_write_uint8 (&frame, ID3V2_ENCODING_UTF8); + + encoding = id3v2_tag_string_encoding (tag, strings_utf8[0]); + id3v2_frame_write_uint8 (&frame, encoding); GST_LOG ("Adding text frame %s with %d strings", frame_id, num_strings); @@ -341,8 +409,8 @@ id3v2_tag_add_text_frame (GstId3v2Tag * tag, const gchar * frame_id, len = strlen (strings_utf8[i]); g_return_if_fail (g_utf8_validate (strings_utf8[i], len, NULL)); - /* write NUL terminator as well */ - id3v2_frame_write_bytes (&frame, (const guint8 *) strings_utf8[i], len + 1); + id3v2_frame_write_string (&frame, encoding, strings_utf8[i], + i != num_strings - 1); /* only v2.4.0 supports multiple strings per frame (according to the * earlier specs tag readers should just ignore everything after the first @@ -530,11 +598,10 @@ add_comment_tag (GstId3v2Tag * id3v2tag, const GstTagList * list, if (gst_tag_list_get_string_index (list, tag, n, &s) && s != NULL) { gchar *desc = NULL, *val = NULL, *lang = NULL; - int desclen, vallen; + int desclen, vallen, encoding1, encoding2, encoding; GstId3v2Frame frame; id3v2_frame_init (&frame, "COMM", 0); - id3v2_frame_write_uint8 (&frame, ID3V2_ENCODING_UTF8); if (strcmp (tag, GST_TAG_COMMENT) == 0 || !gst_tag_parse_extended_comment (s, &desc, &lang, &val, TRUE)) { @@ -556,10 +623,15 @@ add_comment_tag (GstId3v2Tag * id3v2tag, const GstTagList * list, GST_LOG ("%s[%u] = '%s' (%s|%s|%s)", tag, n, s, GST_STR_NULL (desc), GST_STR_NULL (lang), GST_STR_NULL (val)); + encoding1 = id3v2_tag_string_encoding (id3v2tag, desc); + encoding2 = id3v2_tag_string_encoding (id3v2tag, val); + encoding = MAX (encoding1, encoding2); + + id3v2_frame_write_uint8 (&frame, encoding); id3v2_frame_write_bytes (&frame, (const guint8 *) lang, 3); - /* write description and value, each including NULL terminator */ - id3v2_frame_write_bytes (&frame, (const guint8 *) desc, desclen + 1); - id3v2_frame_write_bytes (&frame, (const guint8 *) val, vallen + 1); + /* write description and value */ + id3v2_frame_write_string (&frame, encoding, desc, TRUE); + id3v2_frame_write_string (&frame, encoding, val, FALSE); g_free (lang); g_free (desc); @@ -597,6 +669,7 @@ add_image_tag (GstId3v2Tag * id3v2tag, const GstTagList * list, if (mime_type != NULL) { const gchar *desc; GstId3v2Frame frame; + int encoding; /* APIC frame specifies "-->" if we're providing a URL to the image rather than directly embedding it */ @@ -607,9 +680,14 @@ add_image_tag (GstId3v2Tag * id3v2tag, const GstTagList * list, GST_BUFFER_SIZE (image), mime_type); id3v2_frame_init (&frame, "APIC", 0); - id3v2_frame_write_uint8 (&frame, ID3V2_ENCODING_UTF8); - id3v2_frame_write_bytes (&frame, (const guint8 *) mime_type, - strlen (mime_type) + 1); + + desc = gst_structure_get_string (s, "image-description"); + if (!desc) + desc = ""; + encoding = id3v2_tag_string_encoding (id3v2tag, desc); + id3v2_frame_write_uint8 (&frame, encoding); + + id3v2_frame_write_string (&frame, encoding, mime_type, TRUE); /* FIXME set image type properly from caps */ if (strcmp (tag, GST_TAG_PREVIEW_IMAGE) == 0) @@ -617,11 +695,7 @@ add_image_tag (GstId3v2Tag * id3v2tag, const GstTagList * list, else id3v2_frame_write_uint8 (&frame, ID3V2_APIC_PICTURE_OTHER); - desc = gst_structure_get_string (s, "image-description"); - if (!desc) - desc = ""; - id3v2_frame_write_bytes (&frame, (const guint8 *) desc, - strlen (desc) + 1); + id3v2_frame_write_string (&frame, encoding, desc, TRUE); g_array_append_val (id3v2tag->frames, frame); } @@ -669,24 +743,22 @@ add_musicbrainz_tag (GstId3v2Tag * id3v2tag, const GstTagList * list, /* add two frames, one with the ID the musicbrainz.org spec mentions * and one with the ID that applications use in the real world */ GstId3v2Frame frame1, frame2; + int encoding; GST_DEBUG ("Setting '%s' to '%s'", mb_ids[idx].spec_id, id_str); + encoding = id3v2_tag_string_encoding (id3v2tag, id_str); id3v2_frame_init (&frame1, "TXXX", 0); - id3v2_frame_write_uint8 (&frame1, ID3V2_ENCODING_UTF8); - id3v2_frame_write_bytes (&frame1, (const guint8 *) mb_ids[idx].spec_id, - strlen (mb_ids[idx].spec_id) + 1); - id3v2_frame_write_bytes (&frame1, (const guint8 *) id_str, - strlen (id_str) + 1); + id3v2_frame_write_uint8 (&frame1, encoding); + id3v2_frame_write_string (&frame1, encoding, mb_ids[idx].spec_id, TRUE); + id3v2_frame_write_string (&frame1, encoding, id_str, FALSE); g_array_append_val (id3v2tag->frames, frame1); id3v2_frame_init (&frame2, "TXXX", 0); - id3v2_frame_write_uint8 (&frame2, ID3V2_ENCODING_UTF8); - id3v2_frame_write_bytes (&frame2, - (const guint8 *) mb_ids[idx].realworld_id, - strlen (mb_ids[idx].realworld_id) + 1); - id3v2_frame_write_bytes (&frame2, (const guint8 *) id_str, - strlen (id_str) + 1); + id3v2_frame_write_uint8 (&frame2, encoding); + id3v2_frame_write_string (&frame2, encoding, + mb_ids[idx].realworld_id, TRUE); + id3v2_frame_write_string (&frame2, encoding, id_str, FALSE); g_array_append_val (id3v2tag->frames, frame2); g_free (id_str);