From d65c3bbe7e7e9f203bad70a0824065d7ea1e489e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim-Philipp=20M=C3=BCller?= <tim@centricular.com>
Date: Sun, 24 Aug 2014 13:38:08 +0100
Subject: [PATCH] qtdemux: implement seeking in fragmented mp4 files in pull
 mode based on the mfra table

---
 gst/isomp4/qtdemux.c | 160 +++++++++++++++++++++++++++++++++++++++++--
 gst/isomp4/qtdemux.h |   1 +
 2 files changed, 156 insertions(+), 5 deletions(-)

diff --git a/gst/isomp4/qtdemux.c b/gst/isomp4/qtdemux.c
index 7028125deb..5de4f44a92 100644
--- a/gst/isomp4/qtdemux.c
+++ b/gst/isomp4/qtdemux.c
@@ -359,6 +359,8 @@ struct _QtDemuxStream
   QtDemuxRandomAccessEntry *ra_entries;
   guint n_ra_entries;
 
+  const QtDemuxRandomAccessEntry *pending_seek;
+
   /* ctts */
   gboolean ctts_present;
   guint32 n_composition_times;
@@ -391,6 +393,8 @@ static GNode *qtdemux_tree_get_sibling_by_type (GNode * node, guint32 fourcc);
 static GNode *qtdemux_tree_get_sibling_by_type_full (GNode * node,
     guint32 fourcc, GstByteReader * parser);
 
+static GstFlowReturn qtdemux_add_fragmented_samples (GstQTDemux * qtdemux);
+
 static GstStaticPadTemplate gst_qtdemux_sink_template =
     GST_STATIC_PAD_TEMPLATE ("sink",
     GST_PAD_SINK,
@@ -1425,6 +1429,9 @@ gst_qtdemux_perform_seek (GstQTDemux * qtdemux, GstSegment * segment,
   if (segment->stop == -1)
     segment->stop = segment->duration;
 
+  if (qtdemux->fragmented)
+    qtdemux->fragmented_seek_pending = TRUE;
+
   return TRUE;
 }
 
@@ -2462,6 +2469,11 @@ qtdemux_parse_trun (GstQTDemux * qtdemux, GstByteReader * trun,
       "decode ts %" G_GINT64_FORMAT, stream->track_id, d_sample_duration,
       d_sample_size, d_sample_flags, *base_offset, decode_ts);
 
+  if (stream->pending_seek && moof_offset < stream->pending_seek->moof_offset) {
+    GST_INFO_OBJECT (stream->pad, "skipping trun before seek target fragment");
+    return TRUE;
+  }
+
   /* presence of stss or not can't really tell us much,
    * and flags and so on tend to be marginally reliable in these files */
   if (stream->subtype == FOURCC_soun) {
@@ -2571,10 +2583,20 @@ qtdemux_parse_trun (GstQTDemux * qtdemux, GstByteReader * trun,
     qtdemux->fragment_start = -1;
   } else {
     if (G_UNLIKELY (stream->n_samples == 0)) {
-      /* the timestamp of the first sample is also provided by the tfra entry
-       * but we shouldn't rely on it as it is at the end of files */
-      if (decode_ts >= 0) {
+      if (decode_ts > 0) {
         timestamp = decode_ts;
+      } else if (stream->pending_seek != NULL) {
+        /* if we don't have a timestamp from a tfdt box, we'll use the one
+         * from the mfra seek table */
+        GST_INFO_OBJECT (stream->pad, "pending seek ts = %" GST_TIME_FORMAT,
+            GST_TIME_ARGS (stream->pending_seek->ts));
+
+        /* FIXME: this is not fully correct, the timestamp refers to the random
+         * access sample refered to in the tfra entry, which may not necessarily
+         * be the first sample in the tfrag/trun (but hopefully/usually is) */
+        timestamp =
+            gst_util_uint64_scale (stream->pending_seek->ts,
+            stream->timescale, GST_SECOND);
       } else {
         timestamp = 0;
       }
@@ -2643,6 +2665,9 @@ qtdemux_parse_trun (GstQTDemux * qtdemux, GstByteReader * trun,
 
   stream->n_samples += samples_count;
 
+  if (stream->pending_seek != NULL)
+    stream->pending_seek = NULL;
+
   return TRUE;
 
 fail:
@@ -3605,6 +3630,17 @@ gst_qtdemux_activate_segment (GstQTDemux * qtdemux, QtDemuxStream * stream,
     gst_qtdemux_push_tags (qtdemux, stream);
   }
 
+  /* in the fragmented case, we pick a fragment that starts before our
+   * desired position and rely on downstream to wait for a keyframe
+   * (FIXME: doesn't seem to work so well with ismv and wmv, as no parser; the
+   * tfra entries tells us which trun/sample the key unit is in, but we don't
+   * make use of this additional information at the moment) */
+  if (qtdemux->fragmented) {
+    index = 0;
+    stream->to_sample = G_MAXUINT32;
+    return TRUE;
+  }
+
   /* and move to the keyframe before the indicated media time of the
    * segment */
   if (G_LIKELY (!QTSEGMENT_IS_EMPTY (segment))) {
@@ -3732,11 +3768,29 @@ gst_qtdemux_prepare_current_sample (GstQTDemux * qtdemux,
 
   *empty = FALSE;
 
+  if (stream->sample_index == -1)
+    stream->sample_index = 0;
+
   GST_LOG_OBJECT (qtdemux, "segment active, index = %u of %u",
       stream->sample_index, stream->n_samples);
 
-  if (G_UNLIKELY (stream->sample_index >= stream->n_samples))
-    goto eos;
+  if (G_UNLIKELY (stream->sample_index >= stream->n_samples)) {
+    if (!qtdemux->fragmented)
+      goto eos;
+
+    GST_INFO_OBJECT (qtdemux, "out of samples, trying to add more");
+    do {
+      GstFlowReturn flow;
+
+      GST_OBJECT_LOCK (qtdemux);
+      flow = qtdemux_add_fragmented_samples (qtdemux);
+      GST_OBJECT_UNLOCK (qtdemux);
+
+      if (flow != GST_FLOW_OK)
+        goto eos;
+    }
+    while (stream->sample_index >= stream->n_samples);
+  }
 
   if (!qtdemux_parse_samples (qtdemux, stream, stream->sample_index)) {
     GST_LOG_OBJECT (qtdemux, "Parsing of index %u failed!",
@@ -4244,6 +4298,95 @@ exit:
   return ret;
 }
 
+static const QtDemuxRandomAccessEntry *
+gst_qtdemux_stream_seek_fragment (GstQTDemux * qtdemux, QtDemuxStream * stream,
+    GstClockTime pos, gboolean after)
+{
+  QtDemuxRandomAccessEntry *entries = stream->ra_entries;
+  guint n_entries = stream->n_ra_entries;
+  guint i;
+
+  /* we assume the table is sorted */
+  for (i = 0; i < n_entries; ++i) {
+    if (entries[i].ts > pos)
+      break;
+  }
+
+  /* FIXME: maybe save first moof_offset somewhere instead, but for now it's
+   * probably okay to assume that the index lists the very first fragment */
+  if (i == 0)
+    return &entries[0];
+
+  if (after)
+    return &entries[i];
+  else
+    return &entries[i - 1];
+}
+
+static gboolean
+gst_qtdemux_do_fragmented_seek (GstQTDemux * qtdemux)
+{
+  const QtDemuxRandomAccessEntry *best_entry = NULL;
+  guint i;
+
+  GST_OBJECT_LOCK (qtdemux);
+
+  g_assert (qtdemux->n_streams > 0);
+
+  for (i = 0; i < qtdemux->n_streams; i++) {
+    const QtDemuxRandomAccessEntry *entry;
+    QtDemuxStream *stream;
+    gboolean is_audio_or_video;
+
+    stream = qtdemux->streams[i];
+
+    g_free (stream->samples);
+    stream->samples = NULL;
+    stream->n_samples = 0;
+    stream->stbl_index = -1;    /* no samples have yet been parsed */
+    stream->sample_index = -1;
+
+    if (stream->ra_entries == NULL)
+      continue;
+
+    if (stream->subtype == FOURCC_vide || stream->subtype == FOURCC_soun)
+      is_audio_or_video = TRUE;
+    else
+      is_audio_or_video = FALSE;
+
+    entry =
+        gst_qtdemux_stream_seek_fragment (qtdemux, stream,
+        stream->time_position, !is_audio_or_video);
+
+    GST_INFO_OBJECT (stream->pad, "%" GST_TIME_FORMAT " at offset "
+        "%" G_GUINT64_FORMAT, GST_TIME_ARGS (entry->ts), entry->moof_offset);
+
+    stream->pending_seek = entry;
+
+    /* decide position to jump to just based on audio/video tracks, not subs */
+    if (!is_audio_or_video)
+      continue;
+
+    if (best_entry == NULL || entry->moof_offset < best_entry->moof_offset)
+      best_entry = entry;
+  }
+
+  if (best_entry == NULL)
+    return FALSE;
+
+  GST_INFO_OBJECT (qtdemux, "seek to %" GST_TIME_FORMAT ", best fragment "
+      "moof offset: %" G_GUINT64_FORMAT ", ts %" GST_TIME_FORMAT,
+      GST_TIME_ARGS (qtdemux->streams[0]->time_position),
+      best_entry->moof_offset, GST_TIME_ARGS (best_entry->ts));
+
+  qtdemux->moof_offset = best_entry->moof_offset;
+
+  qtdemux_add_fragmented_samples (qtdemux);
+
+  GST_OBJECT_UNLOCK (qtdemux);
+  return TRUE;
+}
+
 static GstFlowReturn
 gst_qtdemux_loop_state_movie (GstQTDemux * qtdemux)
 {
@@ -4264,6 +4407,13 @@ gst_qtdemux_loop_state_movie (GstQTDemux * qtdemux)
 
   gst_qtdemux_push_pending_newsegment (qtdemux);
 
+  if (qtdemux->fragmented_seek_pending) {
+    GST_INFO_OBJECT (qtdemux, "pending fragmented seek");
+    gst_qtdemux_do_fragmented_seek (qtdemux);
+    GST_INFO_OBJECT (qtdemux, "fragmented seek done!");
+    qtdemux->fragmented_seek_pending = FALSE;
+  }
+
   /* Figure out the next stream sample to output, min_time is expressed in
    * global time and runs over the edit list segments. */
   min_time = G_MAXUINT64;
diff --git a/gst/isomp4/qtdemux.h b/gst/isomp4/qtdemux.h
index d05050334e..6bfafae5f2 100644
--- a/gst/isomp4/qtdemux.h
+++ b/gst/isomp4/qtdemux.h
@@ -79,6 +79,7 @@ struct _GstQTDemux {
   guint64 duration;
 
   gboolean fragmented;
+  gboolean fragmented_seek_pending;
   guint64 moof_offset;
 
   gint state;