mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-01-13 10:55:34 +00:00
tag: id3v2: Rewrite parsing of text tags to handle multiple NULL terminated strings. Parse numeric genre strings a...
Original commit message from CVS: * gst-libs/gst/tag/id3v2.c: (id3demux_read_id3v2_tag): * gst-libs/gst/tag/id3v2.h: * gst-libs/gst/tag/id3v2frames.c: (id3demux_id3v2_parse_frame), (parse_comment_frame), (parse_text_identification_frame), (id3v2_tag_to_taglist), (id3v2_are_digits), (id3v2_genre_string_to_taglist), (id3v2_genre_fields_to_taglist), (parse_split_strings), (free_tag_strings): Rewrite parsing of text tags to handle multiple NULL terminated strings. Parse numeric genre strings and ID3v2 type "(3)(6)Alternative" style genre strings. Parse dates that are only YYYY or YYYY-mm format.
This commit is contained in:
parent
a9c6822e3f
commit
a6f7ebffa2
3 changed files with 236 additions and 84 deletions
|
@ -198,6 +198,9 @@ id3demux_read_id3v2_tag (GstBuffer * buffer, guint * id3v2_size,
|
|||
*tags = work.tags;
|
||||
}
|
||||
|
||||
if (work.prev_genre)
|
||||
g_free (work.prev_genre);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -72,6 +72,9 @@ typedef struct {
|
|||
|
||||
guint8 *parse_data;
|
||||
guint parse_size;
|
||||
|
||||
/* Previous genre string, for simple duplicate removal */
|
||||
gchar *prev_genre;
|
||||
} ID3TagsWorking;
|
||||
|
||||
enum {
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#endif
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <gst/tag/tag.h>
|
||||
|
||||
#ifdef HAVE_ZLIB
|
||||
|
@ -35,11 +36,16 @@ GST_DEBUG_CATEGORY_EXTERN (id3demux_debug);
|
|||
#define GST_CAT_DEFAULT (id3demux_debug)
|
||||
|
||||
static gchar *parse_comment_frame (ID3TagsWorking * work);
|
||||
static gchar *parse_text_identification_frame (ID3TagsWorking * work);
|
||||
static GArray *parse_text_identification_frame (ID3TagsWorking * work);
|
||||
static gboolean id3v2_tag_to_taglist (ID3TagsWorking * work,
|
||||
const gchar * tag_name, gchar * tag_str);
|
||||
static void parse_split_strings (ID3TagsWorking * work, guint8 encoding,
|
||||
gchar ** field1, gchar ** field2);
|
||||
const gchar * tag_name, const gchar * tag_str);
|
||||
/* Parse a single string into an array of gchar* */
|
||||
static void parse_split_strings (guint8 encoding, gchar * data, gint data_size,
|
||||
GArray ** out_fields);
|
||||
static void free_tag_strings (GArray * fields);
|
||||
static gboolean
|
||||
id3v2_genre_fields_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
|
||||
GArray * tag_fields);
|
||||
|
||||
#define ID3V2_ENCODING_ISO8859 0x00
|
||||
#define ID3V2_ENCODING_UTF16 0x01
|
||||
|
@ -57,6 +63,7 @@ id3demux_id3v2_parse_frame (ID3TagsWorking * work)
|
|||
guint8 *frame_data = work->hdr.frame_data;
|
||||
guint frame_data_size = work->cur_frame_size;
|
||||
gchar *tag_str = NULL;
|
||||
GArray *tag_fields = NULL;
|
||||
|
||||
/* Check that the frame id is valid */
|
||||
for (i = 0; i < 5 && work->frame_id[i] != '\0'; i++) {
|
||||
|
@ -118,7 +125,7 @@ id3demux_id3v2_parse_frame (ID3TagsWorking * work)
|
|||
if (work->frame_id[0] == 'T') {
|
||||
if (strcmp (work->frame_id, "TXXX") != 0) {
|
||||
/* Text identification frame */
|
||||
tag_str = parse_text_identification_frame (work);
|
||||
tag_fields = parse_text_identification_frame (work);
|
||||
} else {
|
||||
/* Handle user text frame */
|
||||
}
|
||||
|
@ -142,6 +149,16 @@ id3demux_id3v2_parse_frame (ID3TagsWorking * work)
|
|||
result = id3v2_tag_to_taglist (work, tag_name, tag_str);
|
||||
g_free (tag_str);
|
||||
}
|
||||
if (tag_fields != NULL) {
|
||||
if (strcmp (work->frame_id, "TCON") == 0) {
|
||||
/* Genre strings need special treatment */
|
||||
result |= id3v2_genre_fields_to_taglist (work, tag_name, tag_fields);
|
||||
} else {
|
||||
tag_str = g_array_index (tag_fields, gchar *, 0);
|
||||
result |= id3v2_tag_to_taglist (work, tag_name, tag_str);
|
||||
}
|
||||
free_tag_strings (tag_fields);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@ -151,9 +168,9 @@ parse_comment_frame (ID3TagsWorking * work)
|
|||
{
|
||||
guint8 encoding;
|
||||
gchar language[4];
|
||||
gchar *description = NULL;
|
||||
gchar *text = NULL;
|
||||
GArray *fields = NULL;
|
||||
gchar *out_str = NULL;
|
||||
gchar *description, *text;
|
||||
|
||||
if (work->parse_size < 6)
|
||||
return NULL;
|
||||
|
@ -164,12 +181,15 @@ parse_comment_frame (ID3TagsWorking * work)
|
|||
language[2] = work->parse_data[3];
|
||||
language[3] = 0;
|
||||
|
||||
parse_split_strings (work, encoding, &description, &text);
|
||||
parse_split_strings (encoding, (gchar *) work->parse_data + 4,
|
||||
work->parse_size - 4, &fields);
|
||||
|
||||
if (text == NULL || description == NULL) {
|
||||
if (fields == NULL || fields->len < 2) {
|
||||
GST_WARNING ("Failed to decode comment frame");
|
||||
goto fail;
|
||||
}
|
||||
description = g_array_index (fields, gchar *, 0);
|
||||
text = g_array_index (fields, gchar *, 1);
|
||||
|
||||
if (!g_utf8_validate (text, -1, NULL)) {
|
||||
GST_WARNING ("Converted string is not valid utf-8");
|
||||
|
@ -184,53 +204,30 @@ parse_comment_frame (ID3TagsWorking * work)
|
|||
}
|
||||
|
||||
fail:
|
||||
g_free (description);
|
||||
g_free (text);
|
||||
free_tag_strings (fields);
|
||||
|
||||
return out_str;
|
||||
}
|
||||
|
||||
static gchar *
|
||||
static GArray *
|
||||
parse_text_identification_frame (ID3TagsWorking * work)
|
||||
{
|
||||
guchar encoding;
|
||||
gchar *text = NULL;
|
||||
GArray *fields = NULL;
|
||||
|
||||
if (work->parse_size < 2)
|
||||
return NULL;
|
||||
|
||||
encoding = work->parse_data[0];
|
||||
parse_split_strings (encoding, (gchar *) work->parse_data + 1,
|
||||
work->parse_size - 1, &fields);
|
||||
|
||||
switch (encoding) {
|
||||
case ID3V2_ENCODING_ISO8859:
|
||||
text = g_convert ((gchar *) (work->parse_data + 1),
|
||||
work->parse_size - 1, "UTF-8", "ISO-8859-1", NULL, NULL, NULL);
|
||||
break;
|
||||
case ID3V2_ENCODING_UTF8:
|
||||
text = g_strndup ((gchar *) (work->parse_data + 1), work->parse_size - 1);
|
||||
break;
|
||||
case ID3V2_ENCODING_UTF16:
|
||||
text = g_convert ((gchar *) (work->parse_data + 1),
|
||||
work->parse_size - 1, "UTF-8", "UTF-16", NULL, NULL, NULL);
|
||||
break;
|
||||
case ID3V2_ENCODING_UTF16BE:
|
||||
text = g_convert ((gchar *) (work->parse_data + 1),
|
||||
work->parse_size - 1, "UTF-8", "UTF-16BE", NULL, NULL, NULL);
|
||||
break;
|
||||
}
|
||||
|
||||
if (text != NULL && !g_utf8_validate (text, -1, NULL)) {
|
||||
GST_WARNING ("Converted string is not valid utf-8");
|
||||
g_free (text);
|
||||
text = NULL;
|
||||
}
|
||||
|
||||
return text;
|
||||
return fields;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
id3v2_tag_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
|
||||
gchar * tag_str)
|
||||
const gchar * tag_str)
|
||||
{
|
||||
GType tag_type = gst_tag_get_type (tag_name);
|
||||
GstTagList *tag_list = work->tags;
|
||||
|
@ -243,17 +240,7 @@ id3v2_tag_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
|
|||
|
||||
tmp = strtoul ((char *) tag_str, &check, 10);
|
||||
|
||||
if (strcmp (tag_name, GST_TAG_DATE) == 0) {
|
||||
GDate *d;
|
||||
|
||||
if (*check != '\0')
|
||||
break;
|
||||
if (tmp == 0)
|
||||
break;
|
||||
d = g_date_new_dmy (1, 1, tmp);
|
||||
tmp = g_date_get_julian (d);
|
||||
g_date_free (d);
|
||||
} else if (strcmp (tag_name, GST_TAG_TRACK_NUMBER) == 0) {
|
||||
if (strcmp (tag_name, GST_TAG_TRACK_NUMBER) == 0) {
|
||||
if (*check == '/') {
|
||||
guint total;
|
||||
|
||||
|
@ -290,7 +277,7 @@ id3v2_tag_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
|
|||
guint64 tmp;
|
||||
|
||||
g_assert (strcmp (tag_name, GST_TAG_DURATION) == 0);
|
||||
tmp = strtoul ((char *) tag_str, NULL, 10);
|
||||
tmp = strtoul (tag_str, NULL, 10);
|
||||
if (tmp == 0) {
|
||||
break;
|
||||
}
|
||||
|
@ -299,19 +286,41 @@ id3v2_tag_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
|
|||
break;
|
||||
}
|
||||
case G_TYPE_STRING:{
|
||||
if (!strcmp (tag_name, GST_TAG_GENRE)) {
|
||||
if (work->prev_genre && !strcmp (tag_str, work->prev_genre))
|
||||
break; /* Same as the last genre */
|
||||
g_free (work->prev_genre);
|
||||
work->prev_genre = g_strdup (tag_str);
|
||||
}
|
||||
gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND,
|
||||
tag_name, (const gchar *) tag_str, NULL);
|
||||
tag_name, tag_str, NULL);
|
||||
break;
|
||||
}
|
||||
/* handles GST_TYPE_DATE and anything else */
|
||||
|
||||
default:{
|
||||
gchar *tmp = NULL;
|
||||
|
||||
if (tag_type == GST_TYPE_DATE) {
|
||||
guint year = 1901, month = 1, day = 1;
|
||||
|
||||
/* Dates can be yyyy-MM-dd, yyyy-MM or yyyy, but we need
|
||||
* the first type */
|
||||
if (sscanf (tag_str, "%04u-%02u-%02u", &year, &month, &day) == 0)
|
||||
break;
|
||||
|
||||
tmp = g_strdup_printf ("%04u-%02u-%02u", year, month, day);
|
||||
tag_str = tmp;
|
||||
break;
|
||||
}
|
||||
|
||||
/* handles anything else */
|
||||
GValue src = { 0, };
|
||||
GValue dest = { 0, };
|
||||
|
||||
g_value_init (&src, G_TYPE_STRING);
|
||||
g_value_set_string (&src, (const gchar *) tag_str);
|
||||
|
||||
g_value_init (&dest, tag_type);
|
||||
|
||||
if (g_value_transform (&src, &dest)) {
|
||||
gst_tag_list_add_values (tag_list, GST_TAG_MERGE_APPEND,
|
||||
tag_name, &dest, NULL);
|
||||
|
@ -319,8 +328,10 @@ id3v2_tag_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
|
|||
GST_WARNING ("Failed to transform tag from string to type '%s'",
|
||||
g_type_name (tag_type));
|
||||
}
|
||||
|
||||
g_value_unset (&src);
|
||||
g_value_unset (&dest);
|
||||
g_free (tmp);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -328,61 +339,196 @@ id3v2_tag_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
static void
|
||||
parse_split_strings (ID3TagsWorking * work, guint8 encoding,
|
||||
gchar ** field1, gchar ** field2)
|
||||
/* Check that an array of characters contains only digits */
|
||||
static gboolean
|
||||
id3v2_are_digits (const gchar * chars, gint size)
|
||||
{
|
||||
guint text_pos;
|
||||
gint i;
|
||||
|
||||
*field1 = *field2 = NULL;
|
||||
for (i = 0; i < size; i++) {
|
||||
if (!g_ascii_isdigit (chars[i]))
|
||||
return FALSE;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
id3v2_genre_string_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
|
||||
const gchar * tag_str, gint len)
|
||||
{
|
||||
g_return_val_if_fail (tag_str != NULL, FALSE);
|
||||
|
||||
/* If it's a number, it might be a defined genre */
|
||||
if (id3v2_are_digits (tag_str, len)) {
|
||||
tag_str = gst_tag_id3_genre_get (strtol (tag_str, NULL, 10));
|
||||
if (tag_str != NULL)
|
||||
return id3v2_tag_to_taglist (work, tag_name, tag_str);
|
||||
}
|
||||
/* Otherwise it might be "RX" or "CR" */
|
||||
if (len == 2) {
|
||||
if (g_ascii_strncasecmp ("rx", tag_str, len) == 0)
|
||||
return id3v2_tag_to_taglist (work, tag_name, "Remix");
|
||||
|
||||
if (g_ascii_strncasecmp ("cr", tag_str, len) == 0)
|
||||
return id3v2_tag_to_taglist (work, tag_name, "Cover");
|
||||
}
|
||||
|
||||
/* Otherwise it's a string */
|
||||
return id3v2_tag_to_taglist (work, tag_name, tag_str);
|
||||
}
|
||||
|
||||
static gboolean
|
||||
id3v2_genre_fields_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
|
||||
GArray * tag_fields)
|
||||
{
|
||||
gchar *tag_str = NULL;
|
||||
gboolean result = FALSE;
|
||||
gint i;
|
||||
|
||||
for (i = 0; i < tag_fields->len; i++) {
|
||||
gint len;
|
||||
|
||||
tag_str = g_array_index (tag_fields, gchar *, 0);
|
||||
if (tag_str == NULL)
|
||||
continue;
|
||||
|
||||
len = strlen (tag_str);
|
||||
if (work->hdr.version <= 0x300) { /* <= 2.3.0 */
|
||||
/* Check for genre numbers wrapped in parentheses, possibly
|
||||
* followed by a string */
|
||||
while (len >= 2) {
|
||||
gint pos;
|
||||
gboolean found = FALSE;
|
||||
|
||||
/* Double parenthesis ends the numeric genres */
|
||||
if (tag_str[0] == '(' && tag_str[1] == '(')
|
||||
break;
|
||||
|
||||
for (pos = 1; pos < len; pos++) {
|
||||
if (tag_str[pos] == ')') {
|
||||
gchar *tmp_str;
|
||||
|
||||
tmp_str = g_strndup (tag_str + 1, pos - 1);
|
||||
result |=
|
||||
id3v2_genre_string_to_taglist (work, tag_name, tmp_str,
|
||||
pos - 1);
|
||||
g_free (tmp_str);
|
||||
tag_str += pos + 1;
|
||||
len -= pos + 1;
|
||||
found = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found)
|
||||
break; /* There was no closing parenthesis */
|
||||
}
|
||||
}
|
||||
|
||||
if (len > 0)
|
||||
result |= id3v2_genre_string_to_taglist (work, tag_name, tag_str, len);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static void
|
||||
parse_split_strings (guint8 encoding, gchar * data, gint data_size,
|
||||
GArray ** out_fields)
|
||||
{
|
||||
GArray *fields = g_array_new (FALSE, TRUE, sizeof (gchar *));
|
||||
gchar *field;
|
||||
gint text_pos;
|
||||
gint prev = 0;
|
||||
|
||||
g_return_if_fail (out_fields != NULL);
|
||||
|
||||
switch (encoding) {
|
||||
case ID3V2_ENCODING_ISO8859:
|
||||
for (text_pos = 4; text_pos < work->parse_size - 5; text_pos++) {
|
||||
if (work->parse_data[text_pos] == 0) {
|
||||
*field1 = g_convert ((gchar *) (work->parse_data + 4),
|
||||
text_pos - 4, "UTF-8", "ISO-8859-1", NULL, NULL, NULL);
|
||||
*field2 = g_convert ((gchar *) (work->parse_data + text_pos + 5),
|
||||
work->parse_size - text_pos - 5,
|
||||
for (text_pos = 0; text_pos < data_size; text_pos++) {
|
||||
if (data[text_pos] == 0) {
|
||||
field = g_convert (data + prev, text_pos - prev + 1,
|
||||
"UTF-8", "ISO-8859-1", NULL, NULL, NULL);
|
||||
break;
|
||||
if (field)
|
||||
g_array_append_val (fields, field);
|
||||
prev = text_pos + 1;
|
||||
}
|
||||
}
|
||||
if (data_size - prev > 0 && data[prev] != 0x00) {
|
||||
field = g_convert (data + prev, data_size - prev,
|
||||
"UTF-8", "ISO-8859-1", NULL, NULL, NULL);
|
||||
if (field)
|
||||
g_array_append_val (fields, field);
|
||||
}
|
||||
|
||||
break;
|
||||
case ID3V2_ENCODING_UTF8:
|
||||
*field1 = g_strndup ((gchar *) (work->parse_data + 4),
|
||||
work->parse_size - 4);
|
||||
text_pos = 4 + strlen (*field1) + 1; /* Offset by one more for the null */
|
||||
if (text_pos < work->parse_size) {
|
||||
*field2 = g_strndup ((gchar *) (work->parse_data + text_pos),
|
||||
work->parse_size - text_pos);
|
||||
for (prev = 0, text_pos = 0; text_pos < data_size; text_pos++) {
|
||||
if (data[text_pos]) {
|
||||
field = g_strndup (data + prev, text_pos - prev + 1);
|
||||
if (field)
|
||||
g_array_append_val (fields, field);
|
||||
prev = text_pos + 1;
|
||||
}
|
||||
}
|
||||
if (data_size - prev > 0 && data[prev] != 0x00) {
|
||||
field = g_strndup (data + prev, data_size - prev);
|
||||
if (field)
|
||||
g_array_append_val (fields, field);
|
||||
}
|
||||
break;
|
||||
case ID3V2_ENCODING_UTF16:
|
||||
case ID3V2_ENCODING_UTF16BE:
|
||||
{
|
||||
/* Find '\0\0' terminator */
|
||||
for (text_pos = 4; text_pos < work->parse_size - 6; text_pos++) {
|
||||
if (work->parse_data[text_pos] == 0 &&
|
||||
work->parse_data[text_pos + 1] == 0) {
|
||||
/* found our delimiter */
|
||||
for (text_pos = 0; text_pos < data_size - 1; text_pos += 2) {
|
||||
if (data[text_pos] == 0 && data[text_pos + 1] == 0) {
|
||||
/* found a delimiter */
|
||||
if (encoding == ID3V2_ENCODING_UTF16) {
|
||||
*field1 = g_convert ((gchar *) (work->parse_data + 4),
|
||||
text_pos - 4, "UTF-8", "UTF-16", NULL, NULL, NULL);
|
||||
*field2 = g_convert ((gchar *) (work->parse_data + text_pos + 6),
|
||||
work->parse_size - text_pos - 6,
|
||||
field = g_convert (data + prev, text_pos - prev + 2,
|
||||
"UTF-8", "UTF-16", NULL, NULL, NULL);
|
||||
} else {
|
||||
*field1 = g_convert ((gchar *) (work->parse_data + 4),
|
||||
text_pos - 4, "UTF-8", "UTF-16BE", NULL, NULL, NULL);
|
||||
*field2 = g_convert ((gchar *) (work->parse_data + text_pos + 6),
|
||||
work->parse_size - text_pos - 6,
|
||||
field = g_convert (data + prev, text_pos - prev + 2,
|
||||
"UTF-8", "UTF-16BE", NULL, NULL, NULL);
|
||||
}
|
||||
if (field)
|
||||
g_array_append_val (fields, field);
|
||||
text_pos++; /* Advance to the 2nd NULL terminator */
|
||||
prev = text_pos + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (data_size - prev > 1 &&
|
||||
(data[prev] != 0x00 || data[prev + 1] != 0x00)) {
|
||||
/* There were 2 or more non-null chars left, convert those too */
|
||||
if (encoding == ID3V2_ENCODING_UTF16) {
|
||||
field = g_convert (data + prev, data_size - prev,
|
||||
"UTF-8", "UTF-16", NULL, NULL, NULL);
|
||||
} else {
|
||||
field = g_convert (data + prev, data_size - prev,
|
||||
"UTF-8", "UTF-16BE", NULL, NULL, NULL);
|
||||
}
|
||||
if (field)
|
||||
g_array_append_val (fields, field);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (fields->len > 0)
|
||||
*out_fields = fields;
|
||||
else
|
||||
g_array_free (fields, TRUE);
|
||||
}
|
||||
|
||||
static void
|
||||
free_tag_strings (GArray * fields)
|
||||
{
|
||||
if (fields) {
|
||||
gint i;
|
||||
gchar *c;
|
||||
|
||||
for (i = 0; i < fields->len; i++) {
|
||||
c = g_array_index (fields, gchar *, i);
|
||||
g_free (c);
|
||||
}
|
||||
g_array_free (fields, TRUE);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue