fixes to mp3 typefinding:

Original commit message from CVS:
fixes to mp3 typefinding:
- removed workaround that detected files with valid ID3v2 tag as mp3 (not needed anymore)
Invalid files didn't occur because of broken length in the tag but because of padding
in the beginning of the audio data most of the time.
- fixed various assorted stuff in the old typefind function (like not adjusting buffer
size after skipping)
- added 2nd typefind function to detect mp3 streams (fixes #94113)
This commit is contained in:
Benjamin Otte 2003-04-10 02:34:13 +00:00
parent 7212df9f4b
commit 015f82aaf5

View file

@ -22,9 +22,11 @@
#include <string.h> /* memcmp */
static GstCaps* mp3_type_find(GstBuffer *buf, gpointer private);
static GstCaps* mp3_type_find_stream(GstBuffer *buf, gpointer private);
static GstTypeDefinition mp3type_definitions[] = {
{ "mp3types_audio/x-mp3", "audio/x-mp3", ".mp3 .mp2 .mp1 .mpga", mp3_type_find },
{ "mp3types_stream_audio/x-mp3", "audio/x-mp3", ".mp3 .mp2 .mp1 .mpga", mp3_type_find_stream },
{ NULL, NULL, NULL, NULL },
};
@ -46,11 +48,10 @@ mp3_type_find(GstBuffer *buf, gpointer private)
data[0] == 'T' && data[1] == 'A' && data[2] == 'G') {
/* ID V1 tags */
data += 128;
size -= 128;
GST_DEBUG (0, "mp3typefind: detected ID3 Tag V1");
}
else {
if (size >= 10 &&
} else if (size >= 10 &&
(data[0] == 'I' && data[1] == 'D' && data[2] == '3') &&
data[3] < 0xff && data[4] < 0xff &&
data[6] < 0x80 && data[7] < 0x80 && data[8] < 0x80 && data[9] < 0x80)
@ -62,22 +63,20 @@ mp3_type_find(GstBuffer *buf, gpointer private)
skip = (skip << 7) | (data[8] & 0x7f);
skip = (skip << 7) | (data[9] & 0x7f);
if (data[0] == 'I') {
/* ID3V2 */
/* footer present? */
if (data[5] & 0x10)
/* include size of header */
skip += 10;
/* footer present? (only available since version 4) */
if (data[3] > 3 && (data[5] & 0x10))
skip += 10;
}
GST_DEBUG (0, "mp3typefind: detected ID3 Tag V2 with %u bytes", skip);
size -= skip;
data += skip;
}
if (size < 4)
return NULL;
/* we currently accept a valid ID3 tag as an mp3 as some ID3 tags have invalid
* offsets so the next check might fail */
goto done;
}
}
/* now with the right postion, do typefinding */
head = GUINT32_FROM_BE(*((guint32 *)data));
if ((head & 0xffe00000) != 0xffe00000)
@ -91,11 +90,132 @@ mp3_type_find(GstBuffer *buf, gpointer private)
if (((head >> 10) & 0x3) == 0x3)
return NULL;
done:
caps = gst_caps_new ("mp3_type_find", "audio/x-mp3", NULL);
return caps;
}
static guint mp3types_bitrates[2][3][16] =
{ { {0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, },
{0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, },
{0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, } },
{ {0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256, },
{0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, },
{0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, } },
};
static guint mp3types_freqs[3][3] =
{ {44100, 48000, 32000},
{22050, 24000, 16000},
{11025, 12000, 8000}};
static inline guint
mp3_type_frame_length_from_header (guint32 header)
{
guint length;
gulong samplerate, bitrate, layer, version;
/* we don't need extension, mode, copyright, original or emphasis for the frame length */
header >>= 9;
/* padding */
length = header & 0x1;
header >>= 1;
/* sampling frequency */
samplerate = header & 0x3;
if (samplerate == 3)
return 0;
header >>= 2;
/* bitrate index */
bitrate = header & 0xF;
if (bitrate == 15 || bitrate == 0)
return 0;
/* ignore error correction, too */
header >>= 5;
/* layer */
layer = 4 - (header & 0x3);
if (layer == 4)
return 0;
header >>= 2;
/* version */
version = header & 0x3;
if (version == 1)
return 0;
/* lookup */
bitrate = mp3types_bitrates[version == 3 ? 0 : 1][layer - 1][bitrate];
samplerate = mp3types_freqs[version > 0 ? version - 1 : 0][samplerate];
/* calculating */
if (layer == 1) {
length = ((12000 * bitrate / samplerate) + length) * 4;
} else {
length += ((layer == 3 && version == 0) ? 144000 : 72000) * bitrate / samplerate;
}
GST_DEBUG (0, "Calculated mad frame length of %u bytes", length);
GST_DEBUG (0, "samplerate = %lu - bitrate = %lu - layer = %lu - version = %lu", samplerate, bitrate, layer, version);
return length;
}
/* increase this value when this function finds too many false positives */
/**
* The chance that random data is identified as a valid mp3 header is 63 / 2^18
* (0.024%) per try. This makes the function for calculating false positives
* 1 - (1 - ((63 / 2 ^18) ^ GST_MP3_TYPEFIND_MIN_HEADERS)) ^ buffersize)
* This has the following probabilities of false positives:
* bufsize MIN_HEADERS
* (bytes) 1 2 3 4
* 4096 62.6% 0.02% 0% 0%
* 16384 98% 0.09% 0% 0%
* 1 MiB 100% 5.88% 0% 0%
* 1 GiB 100% 100% 1.44% 0%
* 1 TiB 100% 100% 100% 0.35%
* This means that the current choice (3 headers by most of the time 4096 byte
* buffers is pretty safe for now.
* It is however important to note that in a worst case example a buffer of size
* 1440 * GST_MP3_TYPEFIND_MIN_HEADERS + 3
* bytes is needed to reliable find the mp3 stream in a buffer when scanning
* starts at a random position. This is currently (4323 bytes) slightly above
* the default buffer size. But you rarely hit the worst case - average mp3
* frames are in the 500 bytes range.
*/
#define GST_MP3_TYPEFIND_MIN_HEADERS 3
static GstCaps*
mp3_type_find_stream (GstBuffer *buf, gpointer private)
{
guint8 *data;
guint size;
guint32 head;
data = GST_BUFFER_DATA (buf);
size = GST_BUFFER_SIZE (buf);
while (size >= 4) {
head = GUINT32_FROM_BE(*((guint32 *)data));
if ((head & 0xffe00000) == 0xffe00000) {
guint pos = 0;
guint length;
guint found = 0; /* number of valid headers found */
do {
if ((length = mp3_type_frame_length_from_header (head))) {
pos += length;
found++;
if (pos + 4 >= size) {
if (found >= GST_MP3_TYPEFIND_MIN_HEADERS)
goto success;
}
head = GUINT32_FROM_BE(*((guint32 *) &(data[pos])));
if ((head & 0xffe00000) != 0xffe00000)
break;
} else {
break;
}
} while (TRUE);
}
data++;
size--;
}
return NULL;
success:
return gst_caps_new ("mp3_type_find", "audio/x-mp3", NULL);
}
static gboolean
plugin_init (GModule *module, GstPlugin *plugin)