qtdemux: Add audio clipping meta when playing gapless m4a content

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/4200>
This commit is contained in:
Carlos Rafael Giani 2023-03-03 12:10:38 +01:00 committed by GStreamer Marge Bot
parent 51ebda4df5
commit 0071c97128
7 changed files with 775 additions and 3 deletions

View file

@ -386,6 +386,8 @@ static gboolean gst_qtdemux_stream_update_segment (GstQTDemux * qtdemux,
static void gst_qtdemux_send_gap_for_segment (GstQTDemux * demux, static void gst_qtdemux_send_gap_for_segment (GstQTDemux * demux,
QtDemuxStream * stream, gint segment_index, GstClockTime pos); QtDemuxStream * stream, gint segment_index, GstClockTime pos);
static void qtdemux_check_if_is_gapless_audio (GstQTDemux * qtdemux);
static gboolean qtdemux_pull_mfro_mfra (GstQTDemux * qtdemux); static gboolean qtdemux_pull_mfro_mfra (GstQTDemux * qtdemux);
static void check_update_duration (GstQTDemux * qtdemux, GstClockTime duration); static void check_update_duration (GstQTDemux * qtdemux, GstClockTime duration);
@ -659,6 +661,11 @@ gst_qtdemux_get_duration (GstQTDemux * qtdemux, GstClockTime * duration)
if (qtdemux->duration != 0 && if (qtdemux->duration != 0 &&
qtdemux->duration != G_MAXINT64 && qtdemux->timescale != 0) { qtdemux->duration != G_MAXINT64 && qtdemux->timescale != 0) {
/* If this is single-stream audio media with gapless data,
* report the duration of the valid subset of the overall data. */
if (qtdemux->gapless_audio_info.type != GAPLESS_AUDIO_INFO_TYPE_NONE)
*duration = qtdemux->gapless_audio_info.valid_duration;
else
*duration = QTTIME_TO_GSTTIME (qtdemux, qtdemux->duration); *duration = QTTIME_TO_GSTTIME (qtdemux, qtdemux->duration);
res = TRUE; res = TRUE;
} else { } else {
@ -2048,6 +2055,11 @@ gst_qtdemux_reset (GstQTDemux * qtdemux, gboolean hard)
qtdemux->have_group_id = FALSE; qtdemux->have_group_id = FALSE;
qtdemux->group_id = G_MAXUINT; qtdemux->group_id = G_MAXUINT;
qtdemux->gapless_audio_info.type = GAPLESS_AUDIO_INFO_TYPE_NONE;
qtdemux->gapless_audio_info.num_start_padding_pcm_frames = 0;
qtdemux->gapless_audio_info.num_end_padding_pcm_frames = 0;
qtdemux->gapless_audio_info.num_valid_pcm_frames = 0;
g_queue_clear_full (&qtdemux->protection_event_queue, g_queue_clear_full (&qtdemux->protection_event_queue,
(GDestroyNotify) gst_event_unref); (GDestroyNotify) gst_event_unref);
@ -5507,6 +5519,14 @@ gst_qtdemux_stream_update_segment (GstQTDemux * qtdemux, QtDemuxStream * stream,
stream->segment.time = time; stream->segment.time = time;
stream->segment.position = stream->segment.start; stream->segment.position = stream->segment.start;
/* Gapless audio requires adjustments to the segment
* to reflect the actual playtime length. In
* particular, this must exclude padding data. */
if (qtdemux->gapless_audio_info.type != GAPLESS_AUDIO_INFO_TYPE_NONE) {
stream->segment.stop = stream->segment.start +
qtdemux->gapless_audio_info.valid_duration;
}
GST_DEBUG_OBJECT (stream->pad, "New segment: %" GST_SEGMENT_FORMAT, GST_DEBUG_OBJECT (stream->pad, "New segment: %" GST_SEGMENT_FORMAT,
&stream->segment); &stream->segment);
@ -6414,6 +6434,83 @@ gst_qtdemux_push_buffer (GstQTDemux * qtdemux, QtDemuxStream * stream,
GST_ERROR_OBJECT (qtdemux, "failed to attach aavd metadata to buffer"); GST_ERROR_OBJECT (qtdemux, "failed to attach aavd metadata to buffer");
} }
if (qtdemux->gapless_audio_info.type != GAPLESS_AUDIO_INFO_TYPE_NONE) {
guint64 num_start_padding_pcm_frames;
guint64 audio_sample_offset;
guint64 audio_sample_offset_end;
guint64 start_of_trailing_padding;
guint64 start_clip = 0, end_clip = 0;
guint64 total_num_clipped_samples;
GstClockTime timestamp_decrement;
/* Attach GstAudioClippingMeta to exclude padding data. */
num_start_padding_pcm_frames =
qtdemux->gapless_audio_info.num_start_padding_pcm_frames;
audio_sample_offset = stream->sample_index * stream->stts_duration;
audio_sample_offset_end = audio_sample_offset + stream->stts_duration;
start_of_trailing_padding = num_start_padding_pcm_frames +
qtdemux->gapless_audio_info.num_valid_pcm_frames;
if (audio_sample_offset < num_start_padding_pcm_frames) {
guint64 num_padding_audio_samples =
num_start_padding_pcm_frames - audio_sample_offset;
start_clip = MIN (num_padding_audio_samples, stream->stts_duration);
}
timestamp_decrement = qtdemux->gapless_audio_info.start_padding_duration;
if (audio_sample_offset >= start_of_trailing_padding) {
/* This case happens when the buffer is located fully past
* the beginning of the padding area at the end of the stream.
* Add the end padding to the decrement amount to ensure
* continuous timestamps when transitioning from gapless
* media to gapless media. */
end_clip = stream->stts_duration;
timestamp_decrement += qtdemux->gapless_audio_info.end_padding_duration;
} else if (audio_sample_offset_end >= start_of_trailing_padding) {
/* This case happens when the beginning of the padding area that
* is located at the end of the stream intersects the buffer. */
end_clip = audio_sample_offset_end - start_of_trailing_padding;
}
total_num_clipped_samples = start_clip + end_clip;
if (total_num_clipped_samples != 0) {
GST_DEBUG_OBJECT (qtdemux, "adding audio clipping meta: start / "
"end clip: %" G_GUINT64_FORMAT " / %" G_GUINT64_FORMAT,
start_clip, end_clip);
gst_buffer_add_audio_clipping_meta (buf, GST_FORMAT_DEFAULT,
start_clip, end_clip);
if (total_num_clipped_samples >= stream->stts_duration) {
GST_BUFFER_DURATION (buf) = 0;
GST_BUFFER_FLAG_SET (buf, GST_BUFFER_FLAG_DECODE_ONLY);
GST_BUFFER_FLAG_SET (buf, GST_BUFFER_FLAG_DROPPABLE);
} else {
guint64 num_valid_samples =
stream->stts_duration - total_num_clipped_samples;
GST_BUFFER_DURATION (buf) =
QTSTREAMTIME_TO_GSTTIME (stream, num_valid_samples);
}
}
/* The timestamps need to be shifted to factor in the skipped padding data. */
if (GST_BUFFER_PTS_IS_VALID (buf)) {
GstClockTime ts = GST_BUFFER_PTS (buf);
GST_BUFFER_PTS (buf) =
(ts >= timestamp_decrement) ? (ts - timestamp_decrement) : 0;
}
if (GST_BUFFER_DTS_IS_VALID (buf)) {
GstClockTime ts = GST_BUFFER_DTS (buf);
GST_BUFFER_DTS (buf) =
(ts >= timestamp_decrement) ? (ts - timestamp_decrement) : 0;
}
}
if (stream->protected && (stream->protection_scheme_type == FOURCC_cenc if (stream->protected && (stream->protection_scheme_type == FOURCC_cenc
|| stream->protection_scheme_type == FOURCC_cbcs)) { || stream->protection_scheme_type == FOURCC_cbcs)) {
GstStructure *crypto_info; GstStructure *crypto_info;
@ -7565,6 +7662,129 @@ gst_qtdemux_send_gap_for_segment (GstQTDemux * demux,
} }
} }
static void
qtdemux_check_if_is_gapless_audio (GstQTDemux * qtdemux)
{
QtDemuxStream *stream;
if (QTDEMUX_N_STREAMS (qtdemux) != 1)
goto incompatible_stream;
stream = QTDEMUX_NTH_STREAM (qtdemux, 0);
if (stream->subtype != FOURCC_soun || stream->n_segments != 1)
goto incompatible_stream;
/* Gapless audio info from revdns tags (most notably iTunSMPB) is
* detected in the main udta node. If it isn't present, try as
* fallback to recognize the encoder name, and apply known priming
* and padding quantities specific to the encoder. */
if (qtdemux->gapless_audio_info.type == GAPLESS_AUDIO_INFO_TYPE_NONE) {
const gchar *orig_encoder_name = NULL;
if (gst_tag_list_peek_string_index (qtdemux->tag_list, GST_TAG_ENCODER, 0,
&orig_encoder_name) && orig_encoder_name != NULL) {
gchar *lowercase_encoder_name = g_ascii_strdown (orig_encoder_name, -1);
if (strstr (lowercase_encoder_name, "nero") != NULL)
qtdemux->gapless_audio_info.type = GAPLESS_AUDIO_INFO_TYPE_NERO;
g_free (lowercase_encoder_name);
switch (qtdemux->gapless_audio_info.type) {
case GAPLESS_AUDIO_INFO_TYPE_NERO:{
guint64 total_length;
guint64 valid_length;
guint64 start_padding;
/* The Nero AAC encoder always uses a lead-in of 1600 PCM frames.
* Also, in Nero AAC's case, stream->duration contains the number
* of PCM frames with start padding but without end padding.
* The decoder delay equals 1 frame length, which is covered by
* factoring stream->stts_duration into the start padding. */
start_padding = 1600 + stream->stts_duration;
if (G_UNLIKELY (stream->duration < start_padding)) {
GST_ERROR_OBJECT (qtdemux, "stream duration is %" G_GUINT64_FORMAT
" but start_padding is %" G_GUINT64_FORMAT, stream->duration,
start_padding);
goto invalid_gapless_audio_info;
}
valid_length = stream->duration - start_padding;
qtdemux->gapless_audio_info.num_start_padding_pcm_frames =
start_padding;
qtdemux->gapless_audio_info.num_valid_pcm_frames = valid_length;
total_length = stream->n_samples * stream->stts_duration;
if (G_LIKELY (total_length >= valid_length)) {
guint64 total_padding = total_length - valid_length;
if (G_UNLIKELY (total_padding < start_padding)) {
GST_ERROR_OBJECT (qtdemux, "total_padding is %" G_GUINT64_FORMAT
" but start_padding is %" G_GUINT64_FORMAT, total_padding,
start_padding);
goto invalid_gapless_audio_info;
}
qtdemux->gapless_audio_info.num_end_padding_pcm_frames =
total_padding - start_padding;
} else {
qtdemux->gapless_audio_info.num_end_padding_pcm_frames = 0;
}
GST_DEBUG_OBJECT (qtdemux, "media was encoded with Nero AAC encoder; "
"using encoder specific lead-in and padding figures");
}
default:
break;
}
}
}
if (qtdemux->gapless_audio_info.type != GAPLESS_AUDIO_INFO_TYPE_NONE) {
qtdemux->gapless_audio_info.start_padding_duration =
QTSTREAMTIME_TO_GSTTIME (stream,
qtdemux->gapless_audio_info.num_start_padding_pcm_frames);
qtdemux->gapless_audio_info.end_padding_duration =
QTSTREAMTIME_TO_GSTTIME (stream,
qtdemux->gapless_audio_info.num_end_padding_pcm_frames);
qtdemux->gapless_audio_info.valid_duration =
QTSTREAMTIME_TO_GSTTIME (stream,
qtdemux->gapless_audio_info.num_valid_pcm_frames);
}
GST_DEBUG_OBJECT (qtdemux, "found valid gapless audio info: num start / end "
"PCM padding frames: %" G_GUINT64_FORMAT " / %" G_GUINT64_FORMAT "; "
"start / end padding durations: %" GST_TIME_FORMAT " / %" GST_TIME_FORMAT
"; num valid PCM frames: %" G_GUINT64_FORMAT "; valid duration: %"
GST_TIME_FORMAT, qtdemux->gapless_audio_info.num_start_padding_pcm_frames,
qtdemux->gapless_audio_info.num_end_padding_pcm_frames,
GST_TIME_ARGS (qtdemux->gapless_audio_info.start_padding_duration),
GST_TIME_ARGS (qtdemux->gapless_audio_info.end_padding_duration),
qtdemux->gapless_audio_info.num_valid_pcm_frames,
GST_TIME_ARGS (qtdemux->gapless_audio_info.valid_duration));
return;
incompatible_stream:
if (G_UNLIKELY (qtdemux->gapless_audio_info.type !=
GAPLESS_AUDIO_INFO_TYPE_NONE)) {
GST_WARNING_OBJECT (qtdemux,
"media contains gapless audio info, but it is not suitable for "
"gapless audio playback (media must be audio-only, single-stream, "
"single-segment; ignoring unusable gapless info");
qtdemux->gapless_audio_info.type = GAPLESS_AUDIO_INFO_TYPE_NONE;
}
return;
invalid_gapless_audio_info:
GST_WARNING_OBJECT (qtdemux,
"media contains invalid/unusable gapless audio info");
return;
}
static GstFlowReturn static GstFlowReturn
gst_qtdemux_chain (GstPad * sinkpad, GstObject * parent, GstBuffer * inbuf) gst_qtdemux_chain (GstPad * sinkpad, GstObject * parent, GstBuffer * inbuf)
{ {
@ -14009,6 +14229,8 @@ qtdemux_prepare_streams (GstQTDemux * qtdemux)
} }
} }
qtdemux_check_if_is_gapless_audio (qtdemux);
return ret; return ret;
} }

View file

@ -54,6 +54,7 @@ typedef struct _QtDemuxSample QtDemuxSample;
typedef struct _QtDemuxSegment QtDemuxSegment; typedef struct _QtDemuxSegment QtDemuxSegment;
typedef struct _QtDemuxRandomAccessEntry QtDemuxRandomAccessEntry; typedef struct _QtDemuxRandomAccessEntry QtDemuxRandomAccessEntry;
typedef struct _QtDemuxStreamStsdEntry QtDemuxStreamStsdEntry; typedef struct _QtDemuxStreamStsdEntry QtDemuxStreamStsdEntry;
typedef struct _QtDemuxGaplessAudioInfo QtDemuxGaplessAudioInfo;
typedef GstBuffer * (*QtDemuxProcessFunc)(GstQTDemux * qtdemux, QtDemuxStream * stream, GstBuffer * buf); typedef GstBuffer * (*QtDemuxProcessFunc)(GstQTDemux * qtdemux, QtDemuxStream * stream, GstBuffer * buf);
@ -78,6 +79,36 @@ typedef enum {
VARIANT_MSS_FRAGMENTED, VARIANT_MSS_FRAGMENTED,
} Variant; } Variant;
typedef enum {
/* No valid gapless audio info present. Types other than this one
* are used only if all of these apply:
*
* 1. There is embedded gapless audio information available
* 2. Only one stream exists
* 3. Said stream has only one segment
* 4. Said stream is an audio stream
*/
GAPLESS_AUDIO_INFO_TYPE_NONE,
/* Using information from the iTunes iTunSMPB revdns tag. */
GAPLESS_AUDIO_INFO_TYPE_ITUNES,
/* Using known Nero encoder delay information. */
GAPLESS_AUDIO_INFO_TYPE_NERO
} QtDemuxGaplessAudioInfoType;
/* Gapless audio information, only used for single-stream audio-only media. */
struct _QtDemuxGaplessAudioInfo {
QtDemuxGaplessAudioInfoType type;
guint64 num_start_padding_pcm_frames;
guint64 num_end_padding_pcm_frames;
guint64 num_valid_pcm_frames;
/* PCM frame amounts converted to nanoseconds. */
GstClockTime start_padding_duration;
GstClockTime end_padding_duration;
GstClockTime valid_duration;
};
struct _GstQTDemux { struct _GstQTDemux {
GstElement element; GstElement element;
@ -177,6 +208,8 @@ struct _GstQTDemux {
gint64 chapters_track_id; gint64 chapters_track_id;
QtDemuxGaplessAudioInfo gapless_audio_info;
/* protection support */ /* protection support */
GPtrArray *protection_system_ids; /* Holds identifiers of all content protection systems for all tracks */ GPtrArray *protection_system_ids; /* Holds identifiers of all content protection systems for all tracks */
GQueue protection_event_queue; /* holds copy of upstream protection events */ GQueue protection_event_queue; /* holds copy of upstream protection events */

View file

@ -747,12 +747,111 @@ qtdemux_tag_add_revdns (GstQTDemux * demux, GstTagList * taglist,
break; break;
} }
} }
if (i == G_N_ELEMENTS (tags))
/* Some tags might not actually be used for metadata about the media,
* but for other purposes. One such tag is iTunSMPB, which contains
* padding information for gapless playback. Scan these separately. */
if (i == G_N_ELEMENTS (tags)) {
if (!g_ascii_strncasecmp ("iTunSMPB", namestr, 8)) {
/* iTunSMPB tag format goes as follows:
*
* " 00000000 xxxxxxxx yyyyyyyy zzzzzzzzzzzzzzzz 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000"
*
* The data is actually an ASCII string containing these hex fields.
* The description above is _not_ a description of a binary format!
* These need to be parsed with g_ascii_strtoull() and base 16.
*
* (The quotes are not part of it; they just emphasize the
* whitespace at the beginning of the string).
*
* Only the fields marked with x/y/z are of interest here.
*
* The x field is the priming, in samples.
* These are the padding samples at the beginning of the stream.
*
* The y field is the remainder, in samples.
* These are the padding samples at the end of the stream.
*
* The z field is the number of valid PCM frames, excluding the
* priming and remainder. (In other words, the number of PCM
* frames that make up the actual audio, without the padding.)
*
* The data starts at offset 16. All access to it must therefore skip
* the first 16 bytes.
*/
const gsize start_offset = 16;
const gsize priming_offset = start_offset + 10;
const gsize remainder_offset = start_offset + 19;
const gsize num_valid_pcm_frames_offset = start_offset + 28;
const gsize total_length = 44;
const gchar *str;
guint64 priming;
guint64 remainder;
guint64 num_valid_pcm_frames;
/* Temporary buffer for g_ascii_strtoull() calls.
* Add extra +1 space for nullbyte. */
gchar tmp[16 + 1];
/* Use the iTunSMPB info if no other info has been found yet. */
if (demux->gapless_audio_info.type != GAPLESS_AUDIO_INFO_TYPE_NONE) {
GST_DEBUG_OBJECT (demux, "iTunSMPB information found, "
"but other gapless audio info was already read");
goto finish;
}
if (G_UNLIKELY (datasize < (start_offset + total_length))) {
GST_WARNING_OBJECT (demux,
"iTunSMPB tag data size too small - not parsing");
goto finish;
}
str = (gchar *) ((guint8 *) data->data);
#define PARSE_ITUNSMPB_FIELD(FIELD_NAME, NUM_DIGITS) \
G_STMT_START \
{ \
gint str_idx; \
\
for (str_idx = 0; str_idx < (NUM_DIGITS); ++str_idx) { \
gchar ch = str[FIELD_NAME ## _offset + str_idx]; \
if (!g_ascii_isxdigit (ch)) { \
GST_WARNING_OBJECT (demux, #FIELD_NAME " field in iTunSMPB " \
"tag data has invalid character '%c'", ch); \
goto finish; \
} \
tmp[str_idx] = ch; \
} \
tmp[NUM_DIGITS] = 0; \
\
FIELD_NAME = g_ascii_strtoull (tmp, NULL, 16); \
} \
G_STMT_END
PARSE_ITUNSMPB_FIELD (priming, 8);
PARSE_ITUNSMPB_FIELD (remainder, 8);
PARSE_ITUNSMPB_FIELD (num_valid_pcm_frames, 16);
#undef PARSE_ITUNSMPB_FIELD
GST_DEBUG_OBJECT (demux, "iTunSMPB information: priming %"
G_GUINT64_FORMAT " remainder %" G_GUINT64_FORMAT
" num valid PCM frames %" G_GUINT64_FORMAT, priming, remainder,
num_valid_pcm_frames);
demux->gapless_audio_info.type = GAPLESS_AUDIO_INFO_TYPE_ITUNES;
demux->gapless_audio_info.num_start_padding_pcm_frames = priming;
demux->gapless_audio_info.num_end_padding_pcm_frames = remainder;
demux->gapless_audio_info.num_valid_pcm_frames = num_valid_pcm_frames;
} else {
goto unknown_tag; goto unknown_tag;
}
}
} else { } else {
goto unknown_tag; goto unknown_tag;
} }
finish:
return; return;
/* errors */ /* errors */

View file

@ -27,6 +27,8 @@
#include <glib/gprintf.h> #include <glib/gprintf.h>
#include <gst/check/check.h> #include <gst/check/check.h>
#include <gst/app/gstappsink.h>
#include <gst/audio/audio.h>
#define TEST_FILE_PREFIX GST_TEST_FILES_PATH G_DIR_SEPARATOR_S #define TEST_FILE_PREFIX GST_TEST_FILES_PATH G_DIR_SEPARATOR_S
@ -1200,6 +1202,419 @@ GST_START_TEST (test_qtdemux_mss_fragment)
GST_END_TEST; GST_END_TEST;
typedef struct
{
const gchar *filename;
/* Total number of AAC frames, including any and all dummy/empty/padding frames. */
guint num_aac_frames;
/* In AAC, this is 1024 in the vast majority of the cases.
* AAC can also use 960 samples per frame, but this is rare. */
guint num_samples_per_frame;
/* How many padding samples to expect at the beginning and the end.
* The amount of padding samples can exceed the size of a frame.
* This means that the first and last N frame(s) can actually be
* fully made of padding samples and thus need to be thrown away. */
guint num_start_padding_samples;
guint num_end_padding_samples;
guint sample_rate;
/* Some encoders produce data whose last frame uses a different
* (smaller) stts value to handle the padding at the end. Data
* produced by such encoders will not get a clipmeta added at the
* end. When using test data produced by such an encoder, this
* must be set to FALSE, otherwise it must be set to TRUE.
* Notably, anything that produces an iTunSMPB tag (iTunes itself
* as well as newer Nero encoders for example) will cause such
* a clipmeta to be added. */
gboolean expect_clipmeta_at_end;
/* Total number of samples available, with / without padding
* samples factored in. */
guint64 num_samples_with_padding;
guint64 num_samples_without_padding;
/* The index of the first / last frame that contains valid samples.
* Indices start with 0. Valid range is [0 , (num_aac_frames-1)].
* In virtually all cases, when the AAC data was encoded with iTunes,
* the first and last valid frames will be partially clipped. */
guint first_frame_with_valid_samples;
guint last_frame_with_valid_samples;
guint64 num_samples_in_first_valid_frame;
guint64 num_samples_in_last_valid_frame;
GstClockTime total_duration_without_padding;
GstElement *appsink;
} GaplessTestInfo;
static void
precalculate_gapless_test_factors (GaplessTestInfo * info)
{
info->num_samples_with_padding = info->num_aac_frames *
info->num_samples_per_frame;
info->num_samples_without_padding = info->num_samples_with_padding -
info->num_start_padding_samples - info->num_end_padding_samples;
info->first_frame_with_valid_samples = info->num_start_padding_samples /
info->num_samples_per_frame;
info->last_frame_with_valid_samples = (info->num_samples_with_padding -
info->num_end_padding_samples) / info->num_samples_per_frame;
info->num_samples_in_first_valid_frame =
(info->first_frame_with_valid_samples + 1) * info->num_samples_per_frame -
info->num_start_padding_samples;
info->num_samples_in_last_valid_frame =
(info->num_samples_with_padding - info->num_end_padding_samples) -
info->last_frame_with_valid_samples * info->num_samples_per_frame;
/* The total actual playtime duration. */
info->total_duration_without_padding =
gst_util_uint64_scale_int (info->num_samples_without_padding, GST_SECOND,
info->sample_rate);
GST_DEBUG ("num_samples_with_padding %" G_GUINT64_FORMAT
" num_samples_without_padding %" G_GUINT64_FORMAT
" first_frame_with_valid_samples %u"
" last_frame_with_valid_samples %u"
" num_samples_in_first_valid_frame %" G_GUINT64_FORMAT
" num_samples_in_last_valid_frame %" G_GUINT64_FORMAT
" total_duration_without_padding %" G_GUINT64_FORMAT,
info->num_samples_with_padding, info->num_samples_without_padding,
info->first_frame_with_valid_samples, info->last_frame_with_valid_samples,
info->num_samples_in_first_valid_frame,
info->num_samples_in_last_valid_frame,
info->total_duration_without_padding);
}
static void
setup_gapless_itunes_test_info (GaplessTestInfo * info)
{
info->filename =
"sine-1kHztone-48kHzrate-mono-s32le-200000samples-itunes.m4a";
info->num_aac_frames = 198;
info->num_samples_per_frame = 1024;
info->sample_rate = 48000;
info->expect_clipmeta_at_end = TRUE;
info->num_start_padding_samples = 2112;
info->num_end_padding_samples = 640;
precalculate_gapless_test_factors (info);
}
static void
setup_gapless_nero_with_itunsmpb_test_info (GaplessTestInfo * info)
{
info->filename =
"sine-1kHztone-48kHzrate-mono-s32le-200000samples-nero-with-itunsmpb.m4a";
info->num_aac_frames = 198;
info->num_samples_per_frame = 1024;
info->sample_rate = 48000;
info->expect_clipmeta_at_end = TRUE;
info->num_start_padding_samples = 2624;
info->num_end_padding_samples = 128;
precalculate_gapless_test_factors (info);
}
static void
setup_gapless_nero_without_itunsmpb_test_info (GaplessTestInfo * info)
{
info->filename =
"sine-1kHztone-48kHzrate-mono-s32le-200000samples-nero-without-itunsmpb.m4a";
info->num_aac_frames = 198;
info->num_samples_per_frame = 1024;
info->sample_rate = 48000;
/* Older Nero AAC encoders produce a different stts value for the
* last frame to skip padding data. In this file, all frames except
* the last one use an stts value of 1024, while the last value
* uses an stts value of 896. Consequently, the logic inside qtdemux
* won't deem it necessary to add an audioclipmeta - there are no
* padding samples to clip. */
info->expect_clipmeta_at_end = FALSE;
info->num_start_padding_samples = 2624;
info->num_end_padding_samples = 128;
precalculate_gapless_test_factors (info);
}
static void
check_parsed_aac_frame (GaplessTestInfo * info, guint frame_num)
{
GstClockTime expected_pts = GST_CLOCK_TIME_NONE;
GstClockTime expected_duration = GST_CLOCK_TIME_NONE;
GstClockTimeDiff ts_delta;
guint64 expected_sample_offset;
guint64 expected_num_samples;
gboolean expect_audioclipmeta = FALSE;
guint64 expected_audioclipmeta_start = 0;
guint64 expected_audioclipmeta_end = 0;
GstSample *sample;
GstBuffer *buffer;
GstAudioClippingMeta *audioclip_meta;
if (frame_num < info->first_frame_with_valid_samples) {
/* Frame is at the beginning and is fully clipped. */
expected_sample_offset = 0;
expected_num_samples = 0;
expected_audioclipmeta_start = info->num_samples_per_frame;
expected_audioclipmeta_end = 0;
} else if (frame_num == info->first_frame_with_valid_samples) {
/* Frame is at the beginning and is partially clipped. */
expected_sample_offset = 0;
expected_num_samples = info->num_samples_in_first_valid_frame;
expected_audioclipmeta_start = info->num_samples_per_frame -
info->num_samples_in_first_valid_frame;
expected_audioclipmeta_end = 0;
} else if (frame_num < info->last_frame_with_valid_samples) {
/* Regular, unclipped frame. */
expected_sample_offset = info->num_samples_in_first_valid_frame +
info->num_samples_per_frame * (frame_num -
info->first_frame_with_valid_samples - 1);
expected_num_samples = info->num_samples_per_frame;
} else if (frame_num == info->last_frame_with_valid_samples) {
/* The first frame at the end with padding samples. This one will have
* the last few valid samples, followed by the first padding samples. */
expected_sample_offset = info->num_samples_in_first_valid_frame +
info->num_samples_per_frame * (frame_num -
info->first_frame_with_valid_samples - 1);
expected_num_samples = info->num_samples_in_last_valid_frame;
if (info->expect_clipmeta_at_end) {
expect_audioclipmeta = TRUE;
expected_audioclipmeta_start = 0;
expected_audioclipmeta_end =
info->num_samples_per_frame - expected_num_samples;
}
} else {
/* A fully clipped frame at the end of the stream. */
expected_sample_offset = info->num_samples_in_first_valid_frame +
info->num_samples_without_padding;
expected_num_samples = 0;
if (info->expect_clipmeta_at_end) {
expect_audioclipmeta = TRUE;
expected_audioclipmeta_start = 0;
expected_audioclipmeta_end = info->num_samples_per_frame;
}
}
/* Pull the frame from appsink so we can check it. */
sample = gst_app_sink_pull_sample (GST_APP_SINK (info->appsink));
fail_if (sample == NULL);
fail_unless (GST_IS_SAMPLE (sample));
expected_pts = gst_util_uint64_scale_int (expected_sample_offset,
GST_SECOND, info->sample_rate);
expected_duration = gst_util_uint64_scale_int (expected_num_samples,
GST_SECOND, info->sample_rate);
buffer = gst_sample_get_buffer (sample);
fail_if (buffer == NULL);
/* Verify the sample's PTS and duration. Allow for 1 nanosecond difference
* to account for rounding errors in sample <-> timestamp conversions. */
ts_delta = GST_CLOCK_DIFF (GST_BUFFER_PTS (buffer), expected_pts);
fail_unless (ABS (ts_delta) <= 1);
ts_delta = GST_CLOCK_DIFF (GST_BUFFER_DURATION (buffer), expected_duration);
fail_unless (ABS (ts_delta) <= 1);
/* Check if there's audio clip metadata, and verify it if it exists. */
if (expect_audioclipmeta) {
audioclip_meta = gst_buffer_get_audio_clipping_meta (buffer);
fail_if (audioclip_meta == NULL);
fail_unless_equals_uint64 (audioclip_meta->start,
expected_audioclipmeta_start);
fail_unless_equals_uint64 (audioclip_meta->end, expected_audioclipmeta_end);
}
gst_sample_unref (sample);
}
static void
qtdemux_pad_added_cb_for_gapless (GstElement * demux, GstPad * pad,
GaplessTestInfo * info)
{
GstPad *appsink_pad;
GstPadLinkReturn ret;
appsink_pad = gst_element_get_static_pad (info->appsink, "sink");
if (gst_pad_is_linked (appsink_pad))
goto finish;
ret = gst_pad_link (pad, appsink_pad);
if (GST_PAD_LINK_FAILED (ret)) {
GST_ERROR ("Could not link qtdemux and appsink: %s",
gst_pad_link_get_name (ret));
}
finish:
gst_object_unref (GST_OBJECT (appsink_pad));
}
static void
perform_gapless_test (GaplessTestInfo * info)
{
GstElement *source, *demux, *appsink, *pipeline;
GstStateChangeReturn state_ret;
guint frame_num;
pipeline = gst_pipeline_new (NULL);
source = gst_element_factory_make ("filesrc", NULL);
demux = gst_element_factory_make ("qtdemux", NULL);
appsink = gst_element_factory_make ("appsink", NULL);
info->appsink = appsink;
g_signal_connect (demux, "pad-added", (GCallback)
qtdemux_pad_added_cb_for_gapless, info);
gst_bin_add_many (GST_BIN (pipeline), source, demux, appsink, NULL);
gst_element_link (source, demux);
{
char *full_filename =
g_build_filename (GST_TEST_FILES_PATH, info->filename, NULL);
g_object_set (G_OBJECT (source), "location", full_filename, NULL);
g_free (full_filename);
}
g_object_set (G_OBJECT (appsink), "async", FALSE, "sync", FALSE,
"max-buffers", 1, "enable-last-sample", FALSE, "processing-deadline",
G_MAXUINT64, NULL);
state_ret = gst_element_set_state (pipeline, GST_STATE_PLAYING);
fail_unless (state_ret != GST_STATE_CHANGE_FAILURE);
if (state_ret == GST_STATE_CHANGE_ASYNC) {
GST_LOG ("waiting for pipeline to reach PAUSED state");
state_ret = gst_element_get_state (pipeline, NULL, NULL, -1);
fail_unless_equals_int (state_ret, GST_STATE_CHANGE_SUCCESS);
}
/* Verify all frames from the test signal. */
for (frame_num = 0; frame_num < info->num_aac_frames; ++frame_num)
check_parsed_aac_frame (info, frame_num);
/* Check what duration is returned by a query. This duration must exclude
* the padding samples. */
{
GstQuery *query;
gint64 duration;
GstFormat format;
query = gst_query_new_duration (GST_FORMAT_TIME);
fail_unless (gst_element_query (pipeline, query));
gst_query_parse_duration (query, &format, &duration);
fail_unless_equals_int (format, GST_FORMAT_TIME);
fail_unless_equals_uint64 ((guint64) duration,
info->total_duration_without_padding);
gst_query_unref (query);
}
/* Seek tests: Here we seek to a certain position that corresponds to a
* certain frame. Then we check if we indeed got that frame. */
/* Seek back to the first frame. This will _not_ be the first valid frame.
* Instead, it will be a frame that gets only decoded and has duration
* zero. Other zero-duration frames may follow, until the first frame
* with valid data is encountered. This means that when the user seeks
* to position 0, downstream will subsequently get a number of buffers
* with PTS 0, and all of those buffers except the last will have a
* duration of 0. */
{
fail_unless_equals_int (gst_element_set_state (pipeline, GST_STATE_PAUSED),
GST_STATE_CHANGE_SUCCESS);
gst_element_seek_simple (pipeline, GST_FORMAT_TIME, GST_SEEK_FLAG_FLUSH, 0);
fail_unless_equals_int (gst_element_set_state (pipeline, GST_STATE_PLAYING),
GST_STATE_CHANGE_SUCCESS);
check_parsed_aac_frame (info, 0);
}
/* Now move to the frame past the very first one that contained valid samples.
* This very first frame will usually be clipped, and be output as the last
* buffer at PTS 0 (see above). */
{
GstClockTime position;
position =
gst_util_uint64_scale_int (info->num_samples_in_first_valid_frame,
GST_SECOND, info->sample_rate);
fail_unless_equals_int (gst_element_set_state (pipeline, GST_STATE_PAUSED),
GST_STATE_CHANGE_SUCCESS);
gst_element_seek_simple (pipeline, GST_FORMAT_TIME, GST_SEEK_FLAG_FLUSH,
position);
fail_unless_equals_int (gst_element_set_state (pipeline, GST_STATE_PLAYING),
GST_STATE_CHANGE_SUCCESS);
check_parsed_aac_frame (info, info->first_frame_with_valid_samples + 1);
}
/* Seek to the last frame with valid samples (= the first frame with padding
* samples at the end of the stream). */
{
GstClockTime position;
position =
gst_util_uint64_scale_int (info->num_samples_in_first_valid_frame +
info->num_samples_without_padding - info->num_samples_per_frame,
GST_SECOND, info->sample_rate);
fail_unless_equals_int (gst_element_set_state (pipeline, GST_STATE_PAUSED),
GST_STATE_CHANGE_SUCCESS);
gst_element_seek_simple (pipeline, GST_FORMAT_TIME, GST_SEEK_FLAG_FLUSH,
position);
fail_unless_equals_int (gst_element_set_state (pipeline, GST_STATE_PLAYING),
GST_STATE_CHANGE_SUCCESS);
check_parsed_aac_frame (info, info->last_frame_with_valid_samples);
}
gst_element_set_state (pipeline, GST_STATE_NULL);
gst_object_unref (pipeline);
}
GST_START_TEST (test_qtdemux_gapless_itunes_data)
{
GaplessTestInfo info;
setup_gapless_itunes_test_info (&info);
perform_gapless_test (&info);
}
GST_END_TEST;
GST_START_TEST (test_qtdemux_gapless_nero_data_with_itunsmpb)
{
GaplessTestInfo info;
setup_gapless_nero_with_itunsmpb_test_info (&info);
perform_gapless_test (&info);
}
GST_END_TEST;
GST_START_TEST (test_qtdemux_gapless_nero_data_without_itunsmpb)
{
GaplessTestInfo info;
setup_gapless_nero_without_itunsmpb_test_info (&info);
perform_gapless_test (&info);
}
GST_END_TEST;
static Suite * static Suite *
qtdemux_suite (void) qtdemux_suite (void)
{ {
@ -1215,6 +1630,9 @@ qtdemux_suite (void)
tcase_add_test (tc_chain, test_qtdemux_pad_names); tcase_add_test (tc_chain, test_qtdemux_pad_names);
tcase_add_test (tc_chain, test_qtdemux_compensate_data_offset); tcase_add_test (tc_chain, test_qtdemux_compensate_data_offset);
tcase_add_test (tc_chain, test_qtdemux_mss_fragment); tcase_add_test (tc_chain, test_qtdemux_mss_fragment);
tcase_add_test (tc_chain, test_qtdemux_gapless_itunes_data);
tcase_add_test (tc_chain, test_qtdemux_gapless_nero_data_with_itunsmpb);
tcase_add_test (tc_chain, test_qtdemux_gapless_nero_data_without_itunsmpb);
return s; return s;
} }