mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2024-11-25 11:11:08 +00:00
mpegaudioparse: Support gapless playback
Gapless playback is handled by adjusting buffer timestamps & durations and by adding GstAudioClippingMeta. Support for "Frankenstein" streams (= poorly stitched together streams) is also added, so that gapless playback support doesn't prevent those from being properly played. Co-authored-by: Sebastian Dröge <sebastian@centricular.com> Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/1028>
This commit is contained in:
parent
2db283499e
commit
0431a0845c
4 changed files with 816 additions and 9 deletions
|
@ -35,6 +35,84 @@
|
|||
*
|
||||
*/
|
||||
|
||||
/* Notes about gapless playback, "Frankenstein" streams, and the Xing header frame:
|
||||
*
|
||||
* Gapless playback is based on the LAME tag, which is located in the Xing
|
||||
* header frame. The tag contains the encoder delay and encoder padding.
|
||||
* The encoder delay specifies how many padding nullsamples have been prepended
|
||||
* by the encoder at the start of the mp3 stream, while the encoder padding
|
||||
* specifies how many padding nullsamples got added at the end of the stream.
|
||||
*
|
||||
* In addition, there is also a "decoder delay". This affects all existing
|
||||
* mp3 decoders - they themselves introduce a delay into the signal due to
|
||||
* the way mp3 decoding works. This delay is 529 samples long in all known
|
||||
* decoders. Unlike the encoder delay, the decoder delay is not specified
|
||||
* anywhere in the mp3 stream. Players/decoders therefore hardcode the
|
||||
* decoder delay as 529 samples.
|
||||
*
|
||||
* (The LAME tech FAQ mentions 528 samples instead of 529, but LAME seems to
|
||||
* use 529 samples. Also, decoders like mpg123 use 529 samples instead of 528.
|
||||
* The situation is a little unclear, but 529 samples seems to be standard.)
|
||||
*
|
||||
* For proper gapless playback, both mpegaudioparse and a downstream MPEG
|
||||
* audio decoder must do their part. mpegaudioparse adjusts buffer PTS/DTS
|
||||
* and durations, and adds GstAudioClippingMeta to outgoing buffers if
|
||||
* clipping is necessary. MPEG decoders then clip decoded frames according
|
||||
* to that meta (if present).
|
||||
*
|
||||
* To detect when to add GstAudioClippingMeta and when to adjust PTS/DTS/
|
||||
* durations, the number of the current frame is retrieved. Based on that, the
|
||||
* current stream position in samples is calculated. With the sample position,
|
||||
* it is determined whether or not the current playback position is still
|
||||
* if the actual playback range (= in the actual playback range of the stream
|
||||
* that excludes padding samples), or if it is already outside, or partially
|
||||
* outside.
|
||||
*
|
||||
* start_of_actual_samples and end_of_actual_samples define the start/end
|
||||
* of this actual playback range, in samples. So:
|
||||
* If sample_pos >= start_of_actual_samples and sample_pos end_of_actual_samples
|
||||
* -> sample_pos is inside the actual playback range.
|
||||
*
|
||||
* (The decoder delay could in theory be left for the decoder to worry
|
||||
* about. But then, the decoder would also have to adjust PTS/DTS/durations
|
||||
* of decoded buffers, which is not something a GstAudioDecoder based element
|
||||
* should have to deal with. So, for convenience, mpegaudioparse also factors
|
||||
* that delay into its calculations.)
|
||||
*
|
||||
*
|
||||
* "Frankenstein" streams are MPEG streams which have streams beyond
|
||||
* what the Xing metadata indicates. Such streams typically are the
|
||||
* result of poorly stitching individual mp3s together, like this:
|
||||
*
|
||||
* cat first.mp3 second.mp3 > joined.mp3
|
||||
*
|
||||
* The resulting mp3 is not guaranteed to be valid. In particular, this can
|
||||
* cause confusion when first.mp3 contains a Xing header frame. Its length
|
||||
* indicator then does not match the actual length (which is bigger). When
|
||||
* this is detected, a log line about this being a Frankenstein stream is
|
||||
* generated.
|
||||
*
|
||||
*
|
||||
* Xing header frames are empty dummy MPEG frames. They only exist for
|
||||
* supplying metadata. They are encoded as valid silent MPEG frames for
|
||||
* backwards compatibility with older hardware MP3 players, but can be safely
|
||||
* dropped.
|
||||
*
|
||||
* For more about Xng header frames, see:
|
||||
* https://www.codeproject.com/Articles/8295/MPEG-Audio-Frame-Header#XINGHeader
|
||||
* https://www.compuphase.com/mp3/mp3loops.htm#PADDING_DELAYS
|
||||
*
|
||||
* To facilitate gapless playback and ensure that MPEG audio decoders don't
|
||||
* actually decode this frame as an empty MPEG frame, it is marked here as
|
||||
* GST_BUFFER_FLAG_DECODE_ONLY / GST_BUFFER_FLAG_DROPPABLE in mpegaudioparse
|
||||
* after its metadata got extracted. It is also marked as such if it is
|
||||
* encountered again after the user for example seeked back to the beginning
|
||||
* of the mp3 stream. Its duration is also set to zero to make sure that the
|
||||
* frame does not cause baseparse to increment the timestamp of the frame that
|
||||
* follows this one.
|
||||
*
|
||||
*/
|
||||
|
||||
/* FIXME: we should make the base class (GstBaseParse) aware of the
|
||||
* XING seek table somehow, so it can use it properly for things like
|
||||
* accurate seeks. Currently it can only do a lookup via the convert function,
|
||||
|
@ -98,12 +176,20 @@ static GstFlowReturn gst_mpeg_audio_parse_handle_frame (GstBaseParse * parse,
|
|||
GstBaseParseFrame * frame, gint * skipsize);
|
||||
static GstFlowReturn gst_mpeg_audio_parse_pre_push_frame (GstBaseParse * parse,
|
||||
GstBaseParseFrame * frame);
|
||||
static gboolean gst_mpeg_audio_parse_src_query (GstBaseParse * parse,
|
||||
GstQuery * query);
|
||||
static gboolean gst_mpeg_audio_parse_sink_event (GstBaseParse * parse,
|
||||
GstEvent * event);
|
||||
static gboolean gst_mpeg_audio_parse_convert (GstBaseParse * parse,
|
||||
GstFormat src_format, gint64 src_value,
|
||||
GstFormat dest_format, gint64 * dest_value);
|
||||
static GstCaps *gst_mpeg_audio_parse_get_sink_caps (GstBaseParse * parse,
|
||||
GstCaps * filter);
|
||||
|
||||
static gboolean
|
||||
gst_mpeg_audio_parse_check_if_is_xing_header_frame (GstMpegAudioParse *
|
||||
mp3parse, GstBuffer * buf);
|
||||
|
||||
static void gst_mpeg_audio_parse_handle_first_frame (GstMpegAudioParse *
|
||||
mp3parse, GstBuffer * buf);
|
||||
|
||||
|
@ -166,6 +252,8 @@ gst_mpeg_audio_parse_class_init (GstMpegAudioParseClass * klass)
|
|||
GST_DEBUG_FUNCPTR (gst_mpeg_audio_parse_handle_frame);
|
||||
parse_class->pre_push_frame =
|
||||
GST_DEBUG_FUNCPTR (gst_mpeg_audio_parse_pre_push_frame);
|
||||
parse_class->src_query = GST_DEBUG_FUNCPTR (gst_mpeg_audio_parse_src_query);
|
||||
parse_class->sink_event = GST_DEBUG_FUNCPTR (gst_mpeg_audio_parse_sink_event);
|
||||
parse_class->convert = GST_DEBUG_FUNCPTR (gst_mpeg_audio_parse_convert);
|
||||
parse_class->get_sink_caps =
|
||||
GST_DEBUG_FUNCPTR (gst_mpeg_audio_parse_get_sink_caps);
|
||||
|
@ -194,12 +282,16 @@ gst_mpeg_audio_parse_class_init (GstMpegAudioParseClass * klass)
|
|||
static void
|
||||
gst_mpeg_audio_parse_reset (GstMpegAudioParse * mp3parse)
|
||||
{
|
||||
mp3parse->upstream_format = GST_FORMAT_UNDEFINED;
|
||||
mp3parse->channels = -1;
|
||||
mp3parse->rate = -1;
|
||||
mp3parse->sent_codec_tag = FALSE;
|
||||
mp3parse->last_posted_crc = CRC_UNKNOWN;
|
||||
mp3parse->last_posted_channel_mode = MPEG_AUDIO_CHANNEL_MODE_UNKNOWN;
|
||||
mp3parse->freerate = 0;
|
||||
mp3parse->spf = 0;
|
||||
|
||||
mp3parse->outgoing_frame_is_xing_header = FALSE;
|
||||
|
||||
mp3parse->hdr_bitrate = 0;
|
||||
mp3parse->bitrate_is_constant = TRUE;
|
||||
|
@ -224,6 +316,12 @@ gst_mpeg_audio_parse_reset (GstMpegAudioParse * mp3parse)
|
|||
|
||||
mp3parse->encoder_delay = 0;
|
||||
mp3parse->encoder_padding = 0;
|
||||
mp3parse->decoder_delay = 0;
|
||||
mp3parse->start_of_actual_samples = 0;
|
||||
mp3parse->end_of_actual_samples = 0;
|
||||
mp3parse->total_padding_time = GST_CLOCK_TIME_NONE;
|
||||
mp3parse->start_padding_time = GST_CLOCK_TIME_NONE;
|
||||
mp3parse->end_padding_time = GST_CLOCK_TIME_NONE;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -745,6 +843,11 @@ gst_mpeg_audio_parse_handle_frame (GstBaseParse * parse,
|
|||
mp3parse->spf = 576;
|
||||
}
|
||||
|
||||
/* We need the frame duration for calculating the frame number later
|
||||
* in gst_mpeg_audio_parse_pre_push_frame (). */
|
||||
mp3parse->frame_duration = gst_util_uint64_scale (GST_SECOND,
|
||||
mp3parse->spf, mp3parse->rate);
|
||||
|
||||
/* lead_in:
|
||||
* We start pushing 9 frames earlier (29 frames for MPEG2) than
|
||||
* segment start to be able to decode the first frame we want.
|
||||
|
@ -764,6 +867,21 @@ gst_mpeg_audio_parse_handle_frame (GstBaseParse * parse,
|
|||
}
|
||||
mp3parse->hdr_bitrate = bitrate;
|
||||
|
||||
/* While during normal playback, the Xing header frame is seen only once
|
||||
* (right at the beginning), we may see it again if the user seeked back
|
||||
* to the beginning. To make sure it is dropped again and NOT pushed
|
||||
* downstream, we have to check every frame for Xing IDs.
|
||||
*
|
||||
* (sent_codec_tag is TRUE after this Xing frame got parsed.) */
|
||||
if (G_LIKELY (mp3parse->sent_codec_tag)) {
|
||||
if (G_UNLIKELY (gst_mpeg_audio_parse_check_if_is_xing_header_frame
|
||||
(mp3parse, buf))) {
|
||||
GST_DEBUG_OBJECT (mp3parse, "This is a Xing header frame, which "
|
||||
"contains no meaningful audio data, and can be safely dropped");
|
||||
mp3parse->outgoing_frame_is_xing_header = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
/* For first frame; check for seek tables and output a codec tag */
|
||||
gst_mpeg_audio_parse_handle_first_frame (mp3parse, buf);
|
||||
|
||||
|
@ -774,6 +892,17 @@ gst_mpeg_audio_parse_handle_frame (GstBaseParse * parse,
|
|||
cleanup:
|
||||
gst_buffer_unmap (buf, &map);
|
||||
|
||||
/* We don't actually drop the frame right here, but rather in
|
||||
* gst_mpeg_audio_parse_pre_push_frame (), since it is still important
|
||||
* to let other code bits do their work there even if we want to drop
|
||||
* the current frame. */
|
||||
if (G_UNLIKELY (mp3parse->outgoing_frame_is_xing_header)) {
|
||||
frame->flags |= GST_BASE_PARSE_FRAME_FLAG_NO_FRAME;
|
||||
/* Set duration to zero to prevent the baseparse class
|
||||
* from incrementing outgoing timestamps */
|
||||
GST_BUFFER_DURATION (frame->buffer) = 0;
|
||||
}
|
||||
|
||||
if (res && bpf <= map.size) {
|
||||
return gst_base_parse_finish_frame (parse, frame, bpf);
|
||||
}
|
||||
|
@ -781,6 +910,54 @@ cleanup:
|
|||
return GST_FLOW_OK;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
gst_mpeg_audio_parse_check_if_is_xing_header_frame (GstMpegAudioParse *
|
||||
mp3parse, GstBuffer * buf)
|
||||
{
|
||||
/* TODO: get rid of code duplication
|
||||
* (see gst_mpeg_audio_parse_handle_first_frame ()) */
|
||||
|
||||
const guint32 xing_id = 0x58696e67; /* 'Xing' in hex */
|
||||
const guint32 info_id = 0x496e666f; /* 'Info' in hex - found in LAME CBR files */
|
||||
|
||||
gint offset_xing;
|
||||
GstMapInfo map;
|
||||
guint8 *data;
|
||||
guint64 avail;
|
||||
guint32 read_id_xing = 0;
|
||||
gboolean ret = FALSE;
|
||||
|
||||
/* Check first frame for Xing info */
|
||||
if (mp3parse->version == 1) { /* MPEG-1 file */
|
||||
if (mp3parse->channels == 1)
|
||||
offset_xing = 0x11;
|
||||
else
|
||||
offset_xing = 0x20;
|
||||
} else { /* MPEG-2 header */
|
||||
if (mp3parse->channels == 1)
|
||||
offset_xing = 0x09;
|
||||
else
|
||||
offset_xing = 0x11;
|
||||
}
|
||||
|
||||
/* Skip the 4 bytes of the MP3 header too */
|
||||
offset_xing += 4;
|
||||
|
||||
/* Check if we have enough data to read the Xing header */
|
||||
gst_buffer_map (buf, &map, GST_MAP_READ);
|
||||
data = map.data;
|
||||
avail = map.size;
|
||||
|
||||
if (avail >= offset_xing + 4) {
|
||||
read_id_xing = GST_READ_UINT32_BE (data + offset_xing);
|
||||
ret = (read_id_xing == xing_id || read_id_xing == info_id);
|
||||
}
|
||||
|
||||
gst_buffer_unmap (buf, &map);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
gst_mpeg_audio_parse_handle_first_frame (GstMpegAudioParse * mp3parse,
|
||||
GstBuffer * buf)
|
||||
|
@ -841,10 +1018,15 @@ gst_mpeg_audio_parse_handle_first_frame (GstMpegAudioParse * mp3parse,
|
|||
guint32 xing_flags;
|
||||
guint bytes_needed = offset_xing + 8;
|
||||
gint64 total_bytes;
|
||||
guint64 num_xing_samples = 0;
|
||||
GstClockTime total_time;
|
||||
|
||||
GST_DEBUG_OBJECT (mp3parse, "Found Xing header marker 0x%x", xing_id);
|
||||
|
||||
GST_DEBUG_OBJECT (mp3parse, "This is a Xing header frame, which contains "
|
||||
"no meaningful audio data, and can be safely dropped");
|
||||
mp3parse->outgoing_frame_is_xing_header = TRUE;
|
||||
|
||||
/* Move data after Xing header */
|
||||
data += offset_xing + 4;
|
||||
|
||||
|
@ -875,9 +1057,9 @@ gst_mpeg_audio_parse_handle_first_frame (GstMpegAudioParse * mp3parse,
|
|||
"Invalid number of frames in Xing header");
|
||||
mp3parse->xing_flags &= ~XING_FRAMES_FLAG;
|
||||
} else {
|
||||
num_xing_samples = (guint64) (mp3parse->xing_frames) * (mp3parse->spf);
|
||||
mp3parse->xing_total_time = gst_util_uint64_scale (GST_SECOND,
|
||||
(guint64) (mp3parse->xing_frames) * (mp3parse->spf),
|
||||
mp3parse->rate);
|
||||
num_xing_samples, mp3parse->rate);
|
||||
}
|
||||
|
||||
data += 4;
|
||||
|
@ -886,6 +1068,10 @@ gst_mpeg_audio_parse_handle_first_frame (GstMpegAudioParse * mp3parse,
|
|||
mp3parse->xing_total_time = 0;
|
||||
}
|
||||
|
||||
/* Store the entire time as actual total time for now. Should there be
|
||||
* any padding present, this value will get adjusted accordingly. */
|
||||
mp3parse->xing_actual_total_time = mp3parse->xing_total_time;
|
||||
|
||||
if (xing_flags & XING_BYTES_FLAG) {
|
||||
mp3parse->xing_bytes = GST_READ_UINT32_BE (data);
|
||||
if (mp3parse->xing_bytes == 0) {
|
||||
|
@ -967,8 +1153,10 @@ gst_mpeg_audio_parse_handle_first_frame (GstMpegAudioParse * mp3parse,
|
|||
} else
|
||||
mp3parse->xing_vbr_scale = 0;
|
||||
|
||||
GST_DEBUG_OBJECT (mp3parse, "Xing header reported %u frames, time %"
|
||||
GST_TIME_FORMAT ", %u bytes, vbr scale %u", mp3parse->xing_frames,
|
||||
GST_DEBUG_OBJECT (mp3parse, "Xing header reported %u frames, %"
|
||||
G_GUINT64_FORMAT " samples, time %" GST_TIME_FORMAT
|
||||
" (this includes potentially present padding data), %u bytes,"
|
||||
" vbr scale %u", mp3parse->xing_frames, num_xing_samples,
|
||||
GST_TIME_ARGS (mp3parse->xing_total_time), mp3parse->xing_bytes,
|
||||
mp3parse->xing_vbr_scale);
|
||||
|
||||
|
@ -986,6 +1174,8 @@ gst_mpeg_audio_parse_handle_first_frame (GstMpegAudioParse * mp3parse,
|
|||
gchar lame_version[10] = { 0, };
|
||||
guint tag_rev;
|
||||
guint32 encoder_delay, encoder_padding;
|
||||
guint64 total_padding_samples;
|
||||
guint64 actual_num_xing_samples;
|
||||
|
||||
memcpy (lame_version, data, 9);
|
||||
data += 9;
|
||||
|
@ -1001,11 +1191,63 @@ gst_mpeg_audio_parse_handle_first_frame (GstMpegAudioParse * mp3parse,
|
|||
encoder_padding = GST_READ_UINT24_BE (data);
|
||||
encoder_padding &= 0x000fff;
|
||||
|
||||
total_padding_samples = encoder_delay + encoder_padding;
|
||||
|
||||
mp3parse->encoder_delay = encoder_delay;
|
||||
mp3parse->encoder_padding = encoder_padding;
|
||||
|
||||
GST_DEBUG_OBJECT (mp3parse, "Encoder delay %u, encoder padding %u",
|
||||
encoder_delay, encoder_padding);
|
||||
/* As mentioned in the overview at the beginning of this source
|
||||
* file, decoders exhibit a delay of 529 samples. */
|
||||
mp3parse->decoder_delay = 529;
|
||||
|
||||
/* Where the actual, non-padding samples start & end, in sample offsets. */
|
||||
mp3parse->start_of_actual_samples = mp3parse->encoder_delay +
|
||||
mp3parse->decoder_delay;
|
||||
mp3parse->end_of_actual_samples = num_xing_samples +
|
||||
mp3parse->decoder_delay - mp3parse->encoder_padding;
|
||||
|
||||
/* Length of padding at the start and at the end of the stream,
|
||||
* in nanoseconds. */
|
||||
mp3parse->start_padding_time = gst_util_uint64_scale_int (GST_SECOND,
|
||||
mp3parse->start_of_actual_samples, mp3parse->rate);
|
||||
mp3parse->end_padding_time = mp3parse->xing_total_time -
|
||||
gst_util_uint64_scale_int (mp3parse->end_of_actual_samples,
|
||||
GST_SECOND, mp3parse->rate);
|
||||
|
||||
/* Total length of all combined padding samples, in nanoseconds. */
|
||||
mp3parse->total_padding_time = gst_util_uint64_scale_int (GST_SECOND,
|
||||
total_padding_samples, mp3parse->rate);
|
||||
|
||||
/* Length of media, in samples, without the number of padding samples. */
|
||||
actual_num_xing_samples = (num_xing_samples >= total_padding_samples) ?
|
||||
(num_xing_samples - total_padding_samples) : 0;
|
||||
/* Length of media, converted to nanoseconds. This is used for setting
|
||||
* baseparse's duration. */
|
||||
mp3parse->xing_actual_total_time = gst_util_uint64_scale (GST_SECOND,
|
||||
actual_num_xing_samples, mp3parse->rate);
|
||||
|
||||
GST_DEBUG_OBJECT (mp3parse, "Encoder delay: %u samples",
|
||||
mp3parse->encoder_delay);
|
||||
GST_DEBUG_OBJECT (mp3parse, "Encoder padding: %u samples",
|
||||
mp3parse->encoder_padding);
|
||||
GST_DEBUG_OBJECT (mp3parse, "Decoder delay: %u samples",
|
||||
mp3parse->decoder_delay);
|
||||
GST_DEBUG_OBJECT (mp3parse, "Start of actual samples: %"
|
||||
G_GUINT64_FORMAT, mp3parse->start_of_actual_samples);
|
||||
GST_DEBUG_OBJECT (mp3parse, "End of actual samples: %"
|
||||
G_GUINT64_FORMAT, mp3parse->end_of_actual_samples);
|
||||
GST_DEBUG_OBJECT (mp3parse, "Total padding samples: %" G_GUINT64_FORMAT,
|
||||
total_padding_samples);
|
||||
GST_DEBUG_OBJECT (mp3parse, "Start padding time: %" GST_TIME_FORMAT,
|
||||
GST_TIME_ARGS (mp3parse->start_padding_time));
|
||||
GST_DEBUG_OBJECT (mp3parse, "End padding time: %" GST_TIME_FORMAT,
|
||||
GST_TIME_ARGS (mp3parse->end_padding_time));
|
||||
GST_DEBUG_OBJECT (mp3parse, "Total padding time: %" GST_TIME_FORMAT,
|
||||
GST_TIME_ARGS (mp3parse->total_padding_time));
|
||||
GST_DEBUG_OBJECT (mp3parse, "Actual total media samples: %"
|
||||
G_GUINT64_FORMAT, actual_num_xing_samples);
|
||||
GST_DEBUG_OBJECT (mp3parse, "Actual total media length: %"
|
||||
GST_TIME_FORMAT, GST_TIME_ARGS (mp3parse->xing_actual_total_time));
|
||||
}
|
||||
} else if (read_id_vbri == vbri_id) {
|
||||
gint64 total_bytes, total_frames;
|
||||
|
@ -1143,7 +1385,7 @@ gst_mpeg_audio_parse_handle_first_frame (GstMpegAudioParse * mp3parse,
|
|||
/* set duration if tables provided a valid one */
|
||||
if (mp3parse->xing_flags & XING_FRAMES_FLAG) {
|
||||
gst_base_parse_set_duration (GST_BASE_PARSE (mp3parse), GST_FORMAT_TIME,
|
||||
mp3parse->xing_total_time, 0);
|
||||
mp3parse->xing_actual_total_time, 0);
|
||||
}
|
||||
if (mp3parse->vbri_total_time != 0 && mp3parse->vbri_valid) {
|
||||
gst_base_parse_set_duration (GST_BASE_PARSE (mp3parse), GST_FORMAT_TIME,
|
||||
|
@ -1318,6 +1560,91 @@ gst_mpeg_audio_parse_bytepos_to_time (GstMpegAudioParse * mp3parse,
|
|||
return FALSE;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
gst_mpeg_audio_parse_src_query (GstBaseParse * parse, GstQuery * query)
|
||||
{
|
||||
gboolean res = FALSE;
|
||||
GstMpegAudioParse *mp3parse = GST_MPEG_AUDIO_PARSE (parse);
|
||||
|
||||
res = GST_BASE_PARSE_CLASS (parent_class)->src_query (parse, query);
|
||||
if (!res)
|
||||
return FALSE;
|
||||
|
||||
/* If upstream operates in BYTE format then consider any parsed Xing/LAME
|
||||
* header to remove encoder/decoder delay and padding samples from the
|
||||
* position query. */
|
||||
if (mp3parse->upstream_format == GST_FORMAT_BYTES
|
||||
|| GST_PAD_MODE (GST_BASE_PARSE_SINK_PAD (parse)) == GST_PAD_MODE_PULL) {
|
||||
switch (GST_QUERY_TYPE (query)) {
|
||||
case GST_QUERY_POSITION:{
|
||||
GstFormat format;
|
||||
gint64 position, new_position;
|
||||
GstClockTime duration_to_skip;
|
||||
gst_query_parse_position (query, &format, &position);
|
||||
|
||||
/* Adjust the position to exclude padding samples. */
|
||||
|
||||
if ((position < 0) || (format != GST_FORMAT_TIME))
|
||||
break;
|
||||
|
||||
duration_to_skip = mp3parse->frame_duration +
|
||||
mp3parse->start_padding_time;
|
||||
|
||||
if (position < duration_to_skip)
|
||||
new_position = 0;
|
||||
else
|
||||
new_position = position - duration_to_skip;
|
||||
|
||||
if (new_position > (mp3parse->xing_actual_total_time))
|
||||
new_position = mp3parse->xing_actual_total_time;
|
||||
|
||||
GST_LOG_OBJECT (mp3parse, "applying gapless padding info to position "
|
||||
"query response: %" GST_TIME_FORMAT " -> %" GST_TIME_FORMAT,
|
||||
GST_TIME_ARGS (position), GST_TIME_ARGS (new_position));
|
||||
|
||||
gst_query_set_position (query, GST_FORMAT_TIME, new_position);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
gst_mpeg_audio_parse_sink_event (GstBaseParse * parse, GstEvent * event)
|
||||
{
|
||||
gboolean res = FALSE;
|
||||
GstMpegAudioParse *mp3parse = GST_MPEG_AUDIO_PARSE (parse);
|
||||
|
||||
res =
|
||||
GST_BASE_PARSE_CLASS (parent_class)->sink_event (parse,
|
||||
gst_event_ref (event));
|
||||
if (!res) {
|
||||
gst_event_unref (event);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
switch (GST_EVENT_TYPE (event)) {
|
||||
case GST_EVENT_SEGMENT:{
|
||||
const GstSegment *segment;
|
||||
|
||||
gst_event_parse_segment (event, &segment);
|
||||
mp3parse->upstream_format = segment->format;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
gst_event_unref (event);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
gst_mpeg_audio_parse_convert (GstBaseParse * parse, GstFormat src_format,
|
||||
gint64 src_value, GstFormat dest_format, gint64 * dest_value)
|
||||
|
@ -1418,6 +1745,179 @@ gst_mpeg_audio_parse_pre_push_frame (GstBaseParse * parse,
|
|||
gst_tag_list_unref (taglist);
|
||||
}
|
||||
|
||||
/* adjust buffer PTS/DTS/durations according to gapless playback info */
|
||||
if ((mp3parse->upstream_format == GST_FORMAT_BYTES
|
||||
|| GST_PAD_MODE (GST_BASE_PARSE_SINK_PAD (parse)) ==
|
||||
GST_PAD_MODE_PULL)
|
||||
&& GST_CLOCK_TIME_IS_VALID (mp3parse->total_padding_time)) {
|
||||
guint64 frame_nr;
|
||||
GstClockTime pts, dts;
|
||||
gboolean add_clipping_meta = FALSE;
|
||||
guint32 start_clip = 0, end_clip = 0;
|
||||
GstClockTime timestamp_decrement;
|
||||
guint64 sample_pos;
|
||||
guint64 sample_pos_end;
|
||||
|
||||
/* Get the number of the current frame so we can determine where we
|
||||
* currently are in the MPEG stream.
|
||||
*
|
||||
* Gapless playback is best done based on samples, not timestamps,
|
||||
* to avoid potential rounding errors that can otherwise cause a few
|
||||
* samples to be incorrectly clipped or not clipped.
|
||||
*
|
||||
* TODO: At the moment, there is no dedicated baseparse API for finding
|
||||
* out what frame we are currently in. The frame number is calculated
|
||||
* out of the PTS of the current frame. Each frame has the same duration,
|
||||
* and at this point, the buffer's PTS has not been adjusted to exclude
|
||||
* the padding samples, so the PTS will be an integer multiple of
|
||||
* frame_duration. However, this is not an ideal solution. Investigate
|
||||
* how to properly implement this. */
|
||||
frame_nr = GST_BUFFER_PTS (frame->buffer) / mp3parse->frame_duration;
|
||||
GST_LOG_OBJECT (mp3parse, "Handling MP3 frame #%" G_GUINT64_FORMAT,
|
||||
frame_nr);
|
||||
|
||||
/* By default, we subtract the start_padding_time from the timestamps.
|
||||
* start_padding_time specifies the duration of the padding samples
|
||||
* at the beginning of the MPEG stream. To factor out these padding
|
||||
* samples, we have to shift the timestamps back, which is done with
|
||||
* this decrement. */
|
||||
timestamp_decrement = mp3parse->start_padding_time;
|
||||
|
||||
pts = GST_BUFFER_PTS (frame->buffer);
|
||||
dts = GST_BUFFER_DTS (frame->buffer);
|
||||
|
||||
/* sample_pos specifies the current position of the beginning of the
|
||||
* current frame, while sample_pos_end specifies the current position
|
||||
* of 1 samples past the end of the current frame. Both values are
|
||||
* in samples. */
|
||||
sample_pos = frame_nr * mp3parse->spf;
|
||||
sample_pos_end = sample_pos + mp3parse->spf;
|
||||
|
||||
/* Check if the frame is not (fully) within the actual playback range. */
|
||||
if (G_UNLIKELY (sample_pos <= mp3parse->start_of_actual_samples ||
|
||||
(sample_pos_end >= mp3parse->end_of_actual_samples))) {
|
||||
|
||||
if (G_UNLIKELY (frame_nr >= mp3parse->xing_frames)) {
|
||||
/* Test #1: Check if the current position lies past the length
|
||||
* that is specified by the Xing frame header. This normally does
|
||||
* not happen, but does occur with "Frankenstein" streams (see
|
||||
* the explanation at the beginning of this source file for more).
|
||||
* Do this first, since the other test may yield false positives
|
||||
* in this case. */
|
||||
GST_LOG_OBJECT (mp3parse, "There are frames beyond what the Xing "
|
||||
"metadata indicates; this is a Frankenstein stream!");
|
||||
|
||||
/* The frames past the "officially" last one (= the last one according
|
||||
* to the Xing header frame) are located past the padding samples
|
||||
* that follow the actual playback range. The length of these
|
||||
* padding samples in nanoseconds is stored in end_padding_time.
|
||||
* We need to shift the PTS to compensate for these padding samples,
|
||||
* otherwise there would be a timestamp discontinuity between the
|
||||
* last "official" frame and the first "Frankenstein" frame. */
|
||||
timestamp_decrement += mp3parse->end_padding_time;
|
||||
} else if (sample_pos_end <= mp3parse->start_of_actual_samples) {
|
||||
/* Test #2: Check if the frame lies completely before the actual
|
||||
* playback range. This happens if the number of padding samples
|
||||
* at the start of the stream exceeds the size of a frame, meaning
|
||||
* that the entire frame will be filled with padding samples.
|
||||
* This has not been observed so far. However, it is in theory
|
||||
* possible, so handle it here. */
|
||||
|
||||
/* We want to clip all samples in the frame. Since this is a frame
|
||||
* at the start of the stream, set start_clip to the frame size.
|
||||
* Also set the buffer duration to 0 to make sure baseparse does not
|
||||
* increment timestamps after this current frame is finished. */
|
||||
start_clip = mp3parse->spf;
|
||||
GST_BUFFER_DURATION (frame->buffer) = 0;
|
||||
|
||||
add_clipping_meta = TRUE;
|
||||
} else if (sample_pos <= mp3parse->start_of_actual_samples) {
|
||||
/* Test #3: Check if a portion of the frame lies before the actual
|
||||
* playback range. Set the duration to the number of samples that
|
||||
* remain after clipping. */
|
||||
|
||||
start_clip = mp3parse->start_of_actual_samples - sample_pos;
|
||||
GST_BUFFER_DURATION (frame->buffer) =
|
||||
gst_util_uint64_scale_int (sample_pos_end -
|
||||
mp3parse->start_of_actual_samples, GST_SECOND, mp3parse->rate);
|
||||
|
||||
add_clipping_meta = TRUE;
|
||||
} else if (sample_pos >= mp3parse->end_of_actual_samples) {
|
||||
/* Test #4: Check if the frame lies completely after the actual
|
||||
* playback range. Similar to test #2, this happens if the number
|
||||
* of padding samples at the end of the stream exceeds the size of
|
||||
* a frame, meaning that the entire frame will be filled with padding
|
||||
* samples. Unlike test #2, this has been observed in mp3s several
|
||||
* times: The penultimate frame is partially clipped, the final
|
||||
* frame is fully clipped. */
|
||||
|
||||
GstClockTime padding_ns;
|
||||
|
||||
/* We want to clip all samples in the frame. Since this is a frame
|
||||
* at the end of the stream, set end_clip to the frame size.
|
||||
* Also set the buffer duration to 0 to make sure baseparse does not
|
||||
* increment timestamps after this current frame is finished. */
|
||||
end_clip = mp3parse->spf;
|
||||
GST_BUFFER_DURATION (frame->buffer) = 0;
|
||||
|
||||
/* Even though this frame will be fully clipped, we still have to
|
||||
* make sure its timestamps are not discontinuous with the preceding
|
||||
* ones. To that end, it is necessary to subtract the time range
|
||||
* between the current position and the last valid playback range
|
||||
* position from the PTS and DTS. */
|
||||
padding_ns = gst_util_uint64_scale_int (sample_pos -
|
||||
mp3parse->end_of_actual_samples, GST_SECOND, mp3parse->rate);
|
||||
timestamp_decrement += padding_ns;
|
||||
|
||||
add_clipping_meta = TRUE;
|
||||
} else if (sample_pos_end >= mp3parse->end_of_actual_samples) {
|
||||
/* Test #5: Check if a portion of the frame lies after the actual
|
||||
* playback range. Set the duration to the number of samples that
|
||||
* remain after clipping. */
|
||||
|
||||
end_clip = sample_pos_end - mp3parse->end_of_actual_samples;
|
||||
GST_BUFFER_DURATION (frame->buffer) =
|
||||
gst_util_uint64_scale_int (mp3parse->end_of_actual_samples -
|
||||
sample_pos, GST_SECOND, mp3parse->rate);
|
||||
|
||||
add_clipping_meta = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (G_UNLIKELY (add_clipping_meta)) {
|
||||
GST_DEBUG_OBJECT (mp3parse, "Adding clipping meta: start %"
|
||||
G_GUINT32_FORMAT " end %" G_GUINT32_FORMAT, start_clip, end_clip);
|
||||
gst_buffer_add_audio_clipping_meta (frame->buffer, GST_FORMAT_DEFAULT,
|
||||
start_clip, end_clip);
|
||||
}
|
||||
|
||||
/* Adjust the timestamps by subtracting from them. The decrement
|
||||
* is computed above. */
|
||||
GST_BUFFER_PTS (frame->buffer) = (pts >= timestamp_decrement) ? (pts -
|
||||
timestamp_decrement) : 0;
|
||||
GST_BUFFER_DTS (frame->buffer) = (dts >= timestamp_decrement) ? (dts -
|
||||
timestamp_decrement) : 0;
|
||||
|
||||
/* NOTE: We do not adjust the size here, just the timestamps and duration.
|
||||
* We also do not drop fully clipped frames. This is because downstream
|
||||
* MPEG audio decoders still need the data of the frame, even if it gets
|
||||
* fully clipped later. They do need these frames for their decoding process.
|
||||
* If these frames were dropped, the decoders would not fully decode all
|
||||
* of the data from the MPEG stream. */
|
||||
|
||||
/* TODO: Should offset/offset_end also be adjusted? */
|
||||
}
|
||||
|
||||
/* Check if this frame can safely be dropped (for example, because it is an
|
||||
* empty Xing header frame). */
|
||||
if (G_UNLIKELY (mp3parse->outgoing_frame_is_xing_header)) {
|
||||
GST_DEBUG_OBJECT (mp3parse, "Marking frame as decode-only / droppable");
|
||||
mp3parse->outgoing_frame_is_xing_header = FALSE;
|
||||
GST_BUFFER_DURATION (frame->buffer) = 0;
|
||||
GST_BUFFER_FLAG_SET (frame->buffer, GST_BUFFER_FLAG_DECODE_ONLY);
|
||||
GST_BUFFER_FLAG_SET (frame->buffer, GST_BUFFER_FLAG_DROPPABLE);
|
||||
}
|
||||
|
||||
/* usual clipping applies */
|
||||
frame->flags |= GST_BASE_PARSE_FRAME_FLAG_CLIP;
|
||||
|
||||
|
|
|
@ -51,14 +51,19 @@ struct _GstMpegAudioParse {
|
|||
GstBaseParse baseparse;
|
||||
|
||||
/*< private >*/
|
||||
GstFormat upstream_format;
|
||||
|
||||
gint rate;
|
||||
gint channels;
|
||||
gint layer;
|
||||
gint version;
|
||||
|
||||
GstClockTime max_bitreservoir;
|
||||
/* samples per frame */
|
||||
gint spf;
|
||||
|
||||
/* Samples per frame */
|
||||
gint spf;
|
||||
|
||||
GstClockTime frame_duration;
|
||||
|
||||
gint freerate;
|
||||
|
||||
|
@ -67,6 +72,8 @@ struct _GstMpegAudioParse {
|
|||
gint last_posted_crc, last_crc;
|
||||
guint last_posted_channel_mode, last_mode;
|
||||
|
||||
gboolean outgoing_frame_is_xing_header;
|
||||
|
||||
/* Bitrate from non-vbr headers */
|
||||
guint32 hdr_bitrate;
|
||||
gboolean bitrate_is_constant;
|
||||
|
@ -75,6 +82,7 @@ struct _GstMpegAudioParse {
|
|||
guint32 xing_flags;
|
||||
guint32 xing_frames;
|
||||
GstClockTime xing_total_time;
|
||||
GstClockTime xing_actual_total_time;
|
||||
guint32 xing_bytes;
|
||||
/* percent -> filepos mapping */
|
||||
guchar xing_seek_table[100];
|
||||
|
@ -95,6 +103,14 @@ struct _GstMpegAudioParse {
|
|||
/* LAME info */
|
||||
guint32 encoder_delay;
|
||||
guint32 encoder_padding;
|
||||
|
||||
/* Gapless playback states */
|
||||
guint32 decoder_delay;
|
||||
guint64 start_of_actual_samples;
|
||||
guint64 end_of_actual_samples;
|
||||
GstClockTime start_padding_time;
|
||||
GstClockTime end_padding_time;
|
||||
GstClockTime total_padding_time;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
@ -24,6 +24,8 @@
|
|||
*/
|
||||
|
||||
#include <gst/check/gstcheck.h>
|
||||
#include <gst/app/gstappsink.h>
|
||||
#include <gst/audio/audio.h>
|
||||
#include "parser.h"
|
||||
|
||||
#define SRC_CAPS_TMPL "audio/mpeg, parsed=(boolean)false, mpegversion=(int)1"
|
||||
|
@ -123,6 +125,294 @@ GST_START_TEST (test_parse_detect_stream)
|
|||
GST_END_TEST;
|
||||
|
||||
|
||||
/* Gapless tests are performed using a test signal that contains 30 MPEG
|
||||
* frames, has padding samples at the beginning and at the end, a LAME
|
||||
* tag to inform about said padding samples, and a sample rate of 32 kHz
|
||||
* and 1 channel. The test signal is 1009ms long. setup_gapless_test_info()
|
||||
* fills the GaplessTestInfo struct with details about this test signal. */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
const gchar *filename;
|
||||
guint num_mpeg_frames;
|
||||
guint num_samples_per_frame;
|
||||
guint num_start_padding_samples;
|
||||
guint num_end_padding_samples;
|
||||
guint sample_rate;
|
||||
|
||||
guint first_padded_end_frame;
|
||||
guint64 num_samples_with_padding;
|
||||
guint64 num_samples_without_padding;
|
||||
|
||||
GstClockTime first_frame_duration;
|
||||
GstClockTime regular_frame_duration;
|
||||
GstClockTime total_duration_without_padding;
|
||||
|
||||
GstElement *appsink;
|
||||
GstElement *parser;
|
||||
} GaplessTestInfo;
|
||||
|
||||
static void
|
||||
setup_gapless_test_info (GaplessTestInfo * info)
|
||||
{
|
||||
info->filename = "sine-1009ms-1ch-32000hz-gapless-with-lame-tag.mp3";
|
||||
info->num_mpeg_frames = 31;
|
||||
info->num_samples_per_frame = 1152; /* standard for MP3s */
|
||||
info->sample_rate = 32000;
|
||||
|
||||
/* Note that these start and end padding figures are not exactly like
|
||||
* those that we get from the LAME tag. That's because that tag only
|
||||
* contains the _encoder_ delay & padding. In the figures below, the
|
||||
* _decoder_ delay is also factored in (529 samples). mpegaudioparse
|
||||
* does the same, so we have to apply it here. */
|
||||
info->num_start_padding_samples = 1105;
|
||||
info->num_end_padding_samples = 1167;
|
||||
|
||||
/* In MP3s with LAME tags, the first frame is a frame made of Xing/LAME
|
||||
* metadata and dummy nullsamples (this is for backwards compatibility).
|
||||
* num_start_padding_samples defines how many padding samples are there
|
||||
* (this does not include the nullsamples from the first dummy frame).
|
||||
* Likewise, num_end_padding_samples defines how many padding samples
|
||||
* are there at the end of the MP3 stream.
|
||||
* There may be more padding samples than the size of one frame, meaning
|
||||
* that there may be frames that are made entirely of padding samples.
|
||||
* Such frames are output by mpegaudioparse, but their duration is set
|
||||
* to 0, and their PTS corresponds to the last valid PTS in the stream
|
||||
* (= the last PTS that is within the actual media data).
|
||||
* For this reason, we cannot just assume that the last frame is the
|
||||
* one containing padding - there may be more. So, calculate the number
|
||||
* of the first frame that contains padding sames from the _end_ of
|
||||
* the stream. We'll need that later for buffer PTS and duration checks. */
|
||||
info->first_padded_end_frame = (info->num_mpeg_frames - 1 -
|
||||
info->num_end_padding_samples / info->num_samples_per_frame);
|
||||
info->num_samples_with_padding = (info->num_mpeg_frames - 1) *
|
||||
info->num_samples_per_frame;
|
||||
info->num_samples_without_padding = info->num_samples_with_padding -
|
||||
info->num_start_padding_samples - info->num_end_padding_samples;
|
||||
|
||||
/* The first frame (excluding the dummy frame at the beginning) will be
|
||||
* clipped due to the padding samples at the start of the stream, so we
|
||||
* have to calculate this separately. */
|
||||
info->first_frame_duration =
|
||||
gst_util_uint64_scale_int (info->num_samples_per_frame -
|
||||
info->num_start_padding_samples, GST_SECOND, info->sample_rate);
|
||||
/* Regular, unclipped MPEG frame duration. */
|
||||
info->regular_frame_duration =
|
||||
gst_util_uint64_scale_int (info->num_samples_per_frame, GST_SECOND,
|
||||
info->sample_rate);
|
||||
/* The total actual playtime duration. */
|
||||
info->total_duration_without_padding =
|
||||
gst_util_uint64_scale_int (info->num_samples_without_padding, GST_SECOND,
|
||||
info->sample_rate);
|
||||
}
|
||||
|
||||
static void
|
||||
check_parsed_mpeg_frame (GaplessTestInfo * info, guint frame_num)
|
||||
{
|
||||
GstClockTime expected_pts = GST_CLOCK_TIME_NONE;
|
||||
GstClockTime expected_duration = GST_CLOCK_TIME_NONE;
|
||||
gboolean expect_audioclipmeta = FALSE;
|
||||
guint64 expected_audioclipmeta_start = 0;
|
||||
guint64 expected_audioclipmeta_end = 0;
|
||||
GstSample *sample;
|
||||
GstBuffer *buffer;
|
||||
GstAudioClippingMeta *audioclip_meta;
|
||||
|
||||
GST_DEBUG ("checking frame %u", frame_num);
|
||||
|
||||
/* This is called after the frame with the given number has been output by
|
||||
* mpegaudioparse. We can then pull that frame from appsink, and check its
|
||||
* PTS, duration, and audioclipmeta (if we expect it to be there). */
|
||||
|
||||
if (frame_num == 0) {
|
||||
expected_pts = 0;
|
||||
expected_duration = 0;
|
||||
expect_audioclipmeta = FALSE;
|
||||
} else if (frame_num == 1) {
|
||||
/* First frame (excluding the dummy metadata frame at the beginning of
|
||||
* the MPEG stream that mpegaudioparse internally drops). This one will be
|
||||
* clipped due to the padding samples at the beginning, so we expect a
|
||||
* clipping meta to be there. Also, its duration will be smaller than that
|
||||
* of regular, unclipped frames. */
|
||||
|
||||
expected_pts = 0;
|
||||
expected_duration = info->first_frame_duration;
|
||||
|
||||
expect_audioclipmeta = TRUE;
|
||||
expected_audioclipmeta_start = info->num_start_padding_samples;
|
||||
expected_audioclipmeta_end = 0;
|
||||
} else if (frame_num > 1 && frame_num < info->first_padded_end_frame) {
|
||||
/* Regular, unclipped frame. */
|
||||
|
||||
expected_pts = info->first_frame_duration + (frame_num - 2) *
|
||||
info->regular_frame_duration;
|
||||
expected_duration = info->regular_frame_duration;
|
||||
} else if (frame_num == info->first_padded_end_frame) {
|
||||
/* The first frame at the end with padding samples. This one will have
|
||||
* the last few valid samples, followed by the first padding samples. */
|
||||
|
||||
guint64 num_valid_samples = (info->num_samples_with_padding -
|
||||
info->num_end_padding_samples) - (frame_num - 1) *
|
||||
info->num_samples_per_frame;
|
||||
guint64 num_padding_samples = info->num_samples_per_frame -
|
||||
num_valid_samples;
|
||||
|
||||
expected_pts = info->first_frame_duration + (frame_num - 2) *
|
||||
info->regular_frame_duration;
|
||||
expected_duration = gst_util_uint64_scale_int (num_valid_samples,
|
||||
GST_SECOND, info->sample_rate);
|
||||
|
||||
expect_audioclipmeta = TRUE;
|
||||
expected_audioclipmeta_start = 0;
|
||||
expected_audioclipmeta_end = num_padding_samples;
|
||||
} else {
|
||||
/* A fully clipped frame at the end of the stream. */
|
||||
|
||||
expected_pts = info->total_duration_without_padding;
|
||||
expected_duration = 0;
|
||||
|
||||
expect_audioclipmeta = TRUE;
|
||||
expected_audioclipmeta_start = 0;
|
||||
expected_audioclipmeta_end = info->num_samples_per_frame;
|
||||
}
|
||||
|
||||
/* Pull the frame from appsink so we can check it. */
|
||||
|
||||
sample = gst_app_sink_pull_sample (GST_APP_SINK (info->appsink));
|
||||
fail_if (sample == NULL);
|
||||
fail_unless (GST_IS_SAMPLE (sample));
|
||||
|
||||
buffer = gst_sample_get_buffer (sample);
|
||||
fail_if (buffer == NULL);
|
||||
|
||||
/* Verify the sample's PTS and duration. */
|
||||
fail_unless_equals_uint64 (GST_BUFFER_PTS (buffer), expected_pts);
|
||||
fail_unless_equals_uint64 (GST_BUFFER_DURATION (buffer), expected_duration);
|
||||
/* Check if there's audio clip metadata, and verify it if it exists. */
|
||||
if (expect_audioclipmeta) {
|
||||
audioclip_meta = gst_buffer_get_audio_clipping_meta (buffer);
|
||||
fail_if (audioclip_meta == NULL);
|
||||
fail_unless_equals_uint64 (audioclip_meta->start,
|
||||
expected_audioclipmeta_start);
|
||||
fail_unless_equals_uint64 (audioclip_meta->end, expected_audioclipmeta_end);
|
||||
}
|
||||
|
||||
gst_sample_unref (sample);
|
||||
}
|
||||
|
||||
GST_START_TEST (test_parse_gapless_and_skip_padding_samples)
|
||||
{
|
||||
GstElement *source, *parser, *appsink, *pipeline;
|
||||
GstStateChangeReturn state_ret;
|
||||
guint frame_num;
|
||||
GaplessTestInfo info;
|
||||
|
||||
setup_gapless_test_info (&info);
|
||||
|
||||
pipeline = gst_pipeline_new (NULL);
|
||||
source = gst_element_factory_make ("filesrc", NULL);
|
||||
parser = gst_element_factory_make ("mpegaudioparse", NULL);
|
||||
appsink = gst_element_factory_make ("appsink", NULL);
|
||||
|
||||
info.appsink = appsink;
|
||||
info.parser = parser;
|
||||
|
||||
gst_bin_add_many (GST_BIN (pipeline), source, parser, appsink, NULL);
|
||||
gst_element_link_many (source, parser, appsink, NULL);
|
||||
|
||||
{
|
||||
char *full_filename =
|
||||
g_build_filename (GST_TEST_FILES_PATH, info.filename, NULL);
|
||||
g_object_set (G_OBJECT (source), "location", full_filename, NULL);
|
||||
g_free (full_filename);
|
||||
}
|
||||
|
||||
g_object_set (G_OBJECT (appsink), "async", FALSE, "sync", FALSE,
|
||||
"max-buffers", 1, "enable-last-sample", FALSE, "processing-deadline",
|
||||
G_MAXUINT64, NULL);
|
||||
|
||||
state_ret = gst_element_set_state (pipeline, GST_STATE_PLAYING);
|
||||
|
||||
fail_unless (state_ret != GST_STATE_CHANGE_FAILURE);
|
||||
|
||||
if (state_ret == GST_STATE_CHANGE_ASYNC) {
|
||||
GST_LOG ("waiting for pipeline to reach PAUSED state");
|
||||
state_ret = gst_element_get_state (pipeline, NULL, NULL, -1);
|
||||
fail_unless_equals_int (state_ret, GST_STATE_CHANGE_SUCCESS);
|
||||
}
|
||||
|
||||
/* Verify all frames from the test signal. */
|
||||
for (frame_num = 0; frame_num < info.num_mpeg_frames; ++frame_num)
|
||||
check_parsed_mpeg_frame (&info, frame_num);
|
||||
|
||||
/* Check what duration is returned by a query. This duration must exclude
|
||||
* the padding samples. */
|
||||
{
|
||||
GstQuery *query;
|
||||
gint64 duration;
|
||||
GstFormat format;
|
||||
|
||||
query = gst_query_new_duration (GST_FORMAT_TIME);
|
||||
fail_unless (gst_element_query (pipeline, query));
|
||||
|
||||
gst_query_parse_duration (query, &format, &duration);
|
||||
fail_unless_equals_int (format, GST_FORMAT_TIME);
|
||||
fail_unless_equals_uint64 ((guint64) duration,
|
||||
info.total_duration_without_padding);
|
||||
|
||||
gst_query_unref (query);
|
||||
}
|
||||
|
||||
/* Seek tests: Here we seek to a certain position that corresponds to a
|
||||
* certain frame. Then we check if we indeed got that frame. */
|
||||
|
||||
/* Seek back to the first frame. */
|
||||
{
|
||||
fail_unless_equals_int (gst_element_set_state (pipeline, GST_STATE_PAUSED),
|
||||
GST_STATE_CHANGE_SUCCESS);
|
||||
gst_element_seek_simple (pipeline, GST_FORMAT_TIME, GST_SEEK_FLAG_FLUSH |
|
||||
GST_SEEK_FLAG_KEY_UNIT, 0);
|
||||
fail_unless_equals_int (gst_element_set_state (pipeline, GST_STATE_PLAYING),
|
||||
GST_STATE_CHANGE_SUCCESS);
|
||||
|
||||
check_parsed_mpeg_frame (&info, 1);
|
||||
}
|
||||
|
||||
/* Seek to the second frame. */
|
||||
{
|
||||
fail_unless_equals_int (gst_element_set_state (pipeline, GST_STATE_PAUSED),
|
||||
GST_STATE_CHANGE_SUCCESS);
|
||||
gst_element_seek_simple (pipeline, GST_FORMAT_TIME, GST_SEEK_FLAG_FLUSH |
|
||||
GST_SEEK_FLAG_KEY_UNIT, info.first_frame_duration);
|
||||
fail_unless_equals_int (gst_element_set_state (pipeline, GST_STATE_PLAYING),
|
||||
GST_STATE_CHANGE_SUCCESS);
|
||||
|
||||
check_parsed_mpeg_frame (&info, 2);
|
||||
}
|
||||
|
||||
/* Seek to the last frame with valid samples (= the first frame with padding
|
||||
* samples at the end of the stream). */
|
||||
{
|
||||
GstClockTime pts = info.first_frame_duration +
|
||||
(info.first_padded_end_frame - 2) * info.regular_frame_duration;
|
||||
|
||||
fail_unless_equals_int (gst_element_set_state (pipeline, GST_STATE_PAUSED),
|
||||
GST_STATE_CHANGE_SUCCESS);
|
||||
gst_element_seek_simple (pipeline, GST_FORMAT_TIME, GST_SEEK_FLAG_FLUSH |
|
||||
GST_SEEK_FLAG_KEY_UNIT, pts);
|
||||
fail_unless_equals_int (gst_element_set_state (pipeline, GST_STATE_PLAYING),
|
||||
GST_STATE_CHANGE_SUCCESS);
|
||||
|
||||
check_parsed_mpeg_frame (&info, info.first_padded_end_frame);
|
||||
}
|
||||
|
||||
gst_element_set_state (pipeline, GST_STATE_NULL);
|
||||
gst_object_unref (pipeline);
|
||||
}
|
||||
|
||||
GST_END_TEST;
|
||||
|
||||
|
||||
static Suite *
|
||||
mpegaudioparse_suite (void)
|
||||
{
|
||||
|
@ -142,6 +432,7 @@ mpegaudioparse_suite (void)
|
|||
tcase_add_test (tc_chain, test_parse_split);
|
||||
tcase_add_test (tc_chain, test_parse_skip_garbage);
|
||||
tcase_add_test (tc_chain, test_parse_detect_stream);
|
||||
tcase_add_test (tc_chain, test_parse_gapless_and_skip_padding_samples);
|
||||
|
||||
return s;
|
||||
}
|
||||
|
|
BIN
tests/files/sine-1009ms-1ch-32000hz-gapless-with-lame-tag.mp3
Normal file
BIN
tests/files/sine-1009ms-1ch-32000hz-gapless-with-lame-tag.mp3
Normal file
Binary file not shown.
Loading…
Reference in a new issue