mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2024-09-09 05:38:58 +00:00
matroskademux: improve and simplify searching for cluster and position
... avoiding inefficiency proportional to file size
This commit is contained in:
parent
d7b736fa11
commit
84c6aaf45b
1 changed files with 112 additions and 76 deletions
|
@ -1654,7 +1654,8 @@ gst_matroska_cluster_compare (gint64 * i1, gint64 * i2)
|
||||||
/* searches for a cluster start from @pos,
|
/* searches for a cluster start from @pos,
|
||||||
* return GST_FLOW_OK and cluster position in @pos if found */
|
* return GST_FLOW_OK and cluster position in @pos if found */
|
||||||
static GstFlowReturn
|
static GstFlowReturn
|
||||||
gst_matroska_demux_search_cluster (GstMatroskaDemux * demux, gint64 * pos)
|
gst_matroska_demux_search_cluster (GstMatroskaDemux * demux, gint64 * pos,
|
||||||
|
gboolean forward)
|
||||||
{
|
{
|
||||||
gint64 newpos = *pos;
|
gint64 newpos = *pos;
|
||||||
gint64 orig_offset;
|
gint64 orig_offset;
|
||||||
|
@ -1671,8 +1672,8 @@ gst_matroska_demux_search_cluster (GstMatroskaDemux * demux, gint64 * pos)
|
||||||
|
|
||||||
orig_offset = demux->common.offset;
|
orig_offset = demux->common.offset;
|
||||||
|
|
||||||
GST_LOG_OBJECT (demux, "searching cluster following offset %" G_GINT64_FORMAT,
|
GST_LOG_OBJECT (demux, "searching cluster %s offset %" G_GINT64_FORMAT,
|
||||||
*pos);
|
forward ? "following" : "preceding", *pos);
|
||||||
|
|
||||||
if (demux->clusters) {
|
if (demux->clusters) {
|
||||||
gint64 *cpos;
|
gint64 *cpos;
|
||||||
|
@ -1680,7 +1681,7 @@ gst_matroska_demux_search_cluster (GstMatroskaDemux * demux, gint64 * pos)
|
||||||
cpos = gst_util_array_binary_search (demux->clusters->data,
|
cpos = gst_util_array_binary_search (demux->clusters->data,
|
||||||
demux->clusters->len, sizeof (gint64),
|
demux->clusters->len, sizeof (gint64),
|
||||||
(GCompareDataFunc) gst_matroska_cluster_compare,
|
(GCompareDataFunc) gst_matroska_cluster_compare,
|
||||||
GST_SEARCH_MODE_AFTER, pos, NULL);
|
forward ? GST_SEARCH_MODE_AFTER : GST_SEARCH_MODE_BEFORE, pos, NULL);
|
||||||
/* sanity check */
|
/* sanity check */
|
||||||
if (cpos) {
|
if (cpos) {
|
||||||
GST_DEBUG_OBJECT (demux,
|
GST_DEBUG_OBJECT (demux,
|
||||||
|
@ -1701,6 +1702,8 @@ gst_matroska_demux_search_cluster (GstMatroskaDemux * demux, gint64 * pos)
|
||||||
GstByteReader reader;
|
GstByteReader reader;
|
||||||
gint cluster_pos;
|
gint cluster_pos;
|
||||||
|
|
||||||
|
if (!forward)
|
||||||
|
newpos = MAX (0, newpos - chunk);
|
||||||
if (buf != NULL) {
|
if (buf != NULL) {
|
||||||
gst_buffer_unmap (buf, &map);
|
gst_buffer_unmap (buf, &map);
|
||||||
gst_buffer_unref (buf);
|
gst_buffer_unref (buf);
|
||||||
|
@ -1725,16 +1728,25 @@ gst_matroska_demux_search_cluster (GstMatroskaDemux * demux, gint64 * pos)
|
||||||
}
|
}
|
||||||
|
|
||||||
gst_byte_reader_init (&reader, data, size);
|
gst_byte_reader_init (&reader, data, size);
|
||||||
resume:
|
cluster_pos = -1;
|
||||||
cluster_pos = gst_byte_reader_masked_scan_uint32 (&reader, 0xffffffff,
|
while (1) {
|
||||||
GST_MATROSKA_ID_CLUSTER, 0, gst_byte_reader_get_remaining (&reader));
|
gint found = gst_byte_reader_masked_scan_uint32 (&reader, 0xffffffff,
|
||||||
|
GST_MATROSKA_ID_CLUSTER, 0, gst_byte_reader_get_remaining (&reader));
|
||||||
|
if (forward) {
|
||||||
|
cluster_pos = found;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* need last occurrence when searching backwards */
|
||||||
|
if (found >= 0) {
|
||||||
|
cluster_pos = gst_byte_reader_get_pos (&reader) + found;
|
||||||
|
gst_byte_reader_skip (&reader, found + 4);
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (cluster_pos >= 0) {
|
if (cluster_pos >= 0) {
|
||||||
newpos += cluster_pos;
|
newpos += cluster_pos;
|
||||||
/* prepare resuming at next byte */
|
|
||||||
if (!gst_byte_reader_skip (&reader, cluster_pos + 1)) {
|
|
||||||
GST_DEBUG_OBJECT (demux, "Need more data -> continue");
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
GST_DEBUG_OBJECT (demux,
|
GST_DEBUG_OBJECT (demux,
|
||||||
"found cluster ebml id at offset %" G_GINT64_FORMAT, newpos);
|
"found cluster ebml id at offset %" G_GINT64_FORMAT, newpos);
|
||||||
/* extra checks whether we really sync'ed to a cluster:
|
/* extra checks whether we really sync'ed to a cluster:
|
||||||
|
@ -1752,7 +1764,7 @@ gst_matroska_demux_search_cluster (GstMatroskaDemux * demux, gint64 * pos)
|
||||||
GST_ELEMENT_CAST (demux), &id, &length, &needed);
|
GST_ELEMENT_CAST (demux), &id, &length, &needed);
|
||||||
if (ret != GST_FLOW_OK) {
|
if (ret != GST_FLOW_OK) {
|
||||||
GST_DEBUG_OBJECT (demux, "need more data -> continue");
|
GST_DEBUG_OBJECT (demux, "need more data -> continue");
|
||||||
continue;
|
goto next;
|
||||||
}
|
}
|
||||||
g_assert (id == GST_MATROSKA_ID_CLUSTER);
|
g_assert (id == GST_MATROSKA_ID_CLUSTER);
|
||||||
GST_DEBUG_OBJECT (demux, "cluster size %" G_GUINT64_FORMAT ", prefix %d",
|
GST_DEBUG_OBJECT (demux, "cluster size %" G_GUINT64_FORMAT ", prefix %d",
|
||||||
|
@ -1766,26 +1778,19 @@ gst_matroska_demux_search_cluster (GstMatroskaDemux * demux, gint64 * pos)
|
||||||
demux->common.offset += length + needed;
|
demux->common.offset += length + needed;
|
||||||
ret = gst_matroska_read_common_peek_id_length_pull (&demux->common,
|
ret = gst_matroska_read_common_peek_id_length_pull (&demux->common,
|
||||||
GST_ELEMENT_CAST (demux), &id, &length, &needed);
|
GST_ELEMENT_CAST (demux), &id, &length, &needed);
|
||||||
if (ret != GST_FLOW_OK) {
|
if (ret != GST_FLOW_OK)
|
||||||
/* we skipped one byte in the reader above, need to accomodate for
|
goto next;
|
||||||
* that when resuming skipping from the reader instead of reading a
|
|
||||||
* new chunk */
|
|
||||||
newpos += 1;
|
|
||||||
goto resume;
|
|
||||||
}
|
|
||||||
GST_DEBUG_OBJECT (demux, "next element is %scluster",
|
GST_DEBUG_OBJECT (demux, "next element is %scluster",
|
||||||
id == GST_MATROSKA_ID_CLUSTER ? "" : "not ");
|
id == GST_MATROSKA_ID_CLUSTER ? "" : "not ");
|
||||||
if (id == GST_MATROSKA_ID_CLUSTER)
|
if (id == GST_MATROSKA_ID_CLUSTER)
|
||||||
break;
|
break;
|
||||||
/* not ok, resume
|
next:
|
||||||
* we skipped one byte in the reader above, need to accomodate for
|
if (forward)
|
||||||
* that when resuming skipping from the reader instead of reading a
|
newpos += 1;
|
||||||
* new chunk */
|
|
||||||
newpos += 1;
|
|
||||||
goto resume;
|
|
||||||
} else {
|
} else {
|
||||||
/* partial cluster id may have been in tail of buffer */
|
/* partial cluster id may have been in tail of buffer */
|
||||||
newpos += MAX (gst_byte_reader_get_remaining (&reader), 4) - 3;
|
newpos +=
|
||||||
|
forward ? MAX (gst_byte_reader_get_remaining (&reader), 4) - 3 : 3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1809,20 +1814,18 @@ gst_matroska_demux_search_pos (GstMatroskaDemux * demux, GstClockTime time)
|
||||||
GstMatroskaIndex *entry = NULL;
|
GstMatroskaIndex *entry = NULL;
|
||||||
GstMatroskaReadState current_state;
|
GstMatroskaReadState current_state;
|
||||||
GstClockTime otime, prev_cluster_time, current_cluster_time, cluster_time;
|
GstClockTime otime, prev_cluster_time, current_cluster_time, cluster_time;
|
||||||
gint64 opos, newpos, startpos = 0, current_offset;
|
GstClockTime atime;
|
||||||
|
gint64 opos, newpos, current_offset;
|
||||||
gint64 prev_cluster_offset = -1, current_cluster_offset, cluster_offset;
|
gint64 prev_cluster_offset = -1, current_cluster_offset, cluster_offset;
|
||||||
|
gint64 apos, maxpos;
|
||||||
guint64 cluster_size = 0;
|
guint64 cluster_size = 0;
|
||||||
const guint chunk = 64 * 1024;
|
|
||||||
GstFlowReturn ret;
|
GstFlowReturn ret;
|
||||||
guint64 length;
|
guint64 length;
|
||||||
guint32 id;
|
guint32 id;
|
||||||
guint needed;
|
guint needed;
|
||||||
|
|
||||||
/* (under)estimate new position, resync using cluster ebml id,
|
/* estimate new position, resync using cluster ebml id,
|
||||||
* and scan forward to appropriate cluster
|
* and bisect further or scan forward to appropriate cluster */
|
||||||
* (and re-estimate if need to go backward) */
|
|
||||||
|
|
||||||
prev_cluster_time = GST_CLOCK_TIME_NONE;
|
|
||||||
|
|
||||||
/* store some current state */
|
/* store some current state */
|
||||||
current_state = demux->common.state;
|
current_state = demux->common.state;
|
||||||
|
@ -1836,73 +1839,75 @@ gst_matroska_demux_search_pos (GstMatroskaDemux * demux, GstClockTime time)
|
||||||
|
|
||||||
/* estimate using start and current position */
|
/* estimate using start and current position */
|
||||||
GST_OBJECT_LOCK (demux);
|
GST_OBJECT_LOCK (demux);
|
||||||
opos = demux->common.offset - demux->common.ebml_segment_start;
|
apos = demux->first_cluster_offset;
|
||||||
|
atime = demux->stream_start_time;
|
||||||
|
opos = demux->common.offset;
|
||||||
otime = demux->common.segment.position;
|
otime = demux->common.segment.position;
|
||||||
GST_OBJECT_UNLOCK (demux);
|
GST_OBJECT_UNLOCK (demux);
|
||||||
|
|
||||||
/* sanitize */
|
/* sanitize */
|
||||||
time = MAX (time, demux->stream_start_time);
|
time = MAX (time, atime);
|
||||||
|
otime = MAX (otime, atime);
|
||||||
|
opos = MAX (opos, apos);
|
||||||
|
|
||||||
/* avoid division by zero in first estimation below */
|
maxpos = gst_matroska_read_common_get_length (&demux->common);
|
||||||
if (otime <= demux->stream_start_time)
|
|
||||||
otime = time;
|
/* invariants;
|
||||||
|
* apos <= opos
|
||||||
|
* atime <= otime
|
||||||
|
* apos always refer to a cluster before target time;
|
||||||
|
* opos may or may not be after target time, but if it is once so,
|
||||||
|
* then also in next iteration
|
||||||
|
* */
|
||||||
|
|
||||||
retry:
|
retry:
|
||||||
GST_LOG_OBJECT (demux,
|
GST_LOG_OBJECT (demux,
|
||||||
|
"apos: %" G_GUINT64_FORMAT ", atime: %" GST_TIME_FORMAT ", %"
|
||||||
|
GST_TIME_FORMAT " in stream time, "
|
||||||
"opos: %" G_GUINT64_FORMAT ", otime: %" GST_TIME_FORMAT ", %"
|
"opos: %" G_GUINT64_FORMAT ", otime: %" GST_TIME_FORMAT ", %"
|
||||||
GST_TIME_FORMAT " in stream time (start %" GST_TIME_FORMAT "), time %"
|
GST_TIME_FORMAT " in stream time (start %" GST_TIME_FORMAT "), time %"
|
||||||
GST_TIME_FORMAT, opos, GST_TIME_ARGS (otime),
|
GST_TIME_FORMAT, apos, GST_TIME_ARGS (atime),
|
||||||
GST_TIME_ARGS (otime - demux->stream_start_time),
|
GST_TIME_ARGS (atime - demux->stream_start_time), opos,
|
||||||
|
GST_TIME_ARGS (otime), GST_TIME_ARGS (otime - demux->stream_start_time),
|
||||||
GST_TIME_ARGS (demux->stream_start_time), GST_TIME_ARGS (time));
|
GST_TIME_ARGS (demux->stream_start_time), GST_TIME_ARGS (time));
|
||||||
|
|
||||||
if (otime <= demux->stream_start_time) {
|
g_assert (atime <= otime);
|
||||||
newpos = 0;
|
g_assert (apos <= opos);
|
||||||
|
if (time == GST_CLOCK_TIME_NONE) {
|
||||||
|
GST_DEBUG_OBJECT (demux, "searching last cluster");
|
||||||
|
newpos = maxpos;
|
||||||
|
if (newpos == -1) {
|
||||||
|
GST_DEBUG_OBJECT (demux, "unknown file size; bailing out");
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
} else if (otime <= atime) {
|
||||||
|
newpos = apos;
|
||||||
} else {
|
} else {
|
||||||
newpos =
|
newpos = apos +
|
||||||
gst_util_uint64_scale (opos - demux->common.ebml_segment_start,
|
gst_util_uint64_scale (opos - apos, time - atime, otime - atime);
|
||||||
time - demux->stream_start_time,
|
if (maxpos != -1 && newpos > maxpos)
|
||||||
otime - demux->stream_start_time) - chunk;
|
newpos = maxpos;
|
||||||
if (newpos < 0)
|
|
||||||
newpos = 0;
|
|
||||||
}
|
}
|
||||||
/* favour undershoot */
|
|
||||||
newpos = newpos * 90 / 100;
|
|
||||||
newpos += demux->common.ebml_segment_start;
|
|
||||||
|
|
||||||
GST_DEBUG_OBJECT (demux,
|
GST_DEBUG_OBJECT (demux,
|
||||||
"estimated offset for %" GST_TIME_FORMAT ": %" G_GINT64_FORMAT,
|
"estimated offset for %" GST_TIME_FORMAT ": %" G_GINT64_FORMAT,
|
||||||
GST_TIME_ARGS (time), newpos);
|
GST_TIME_ARGS (time), newpos);
|
||||||
|
|
||||||
/* and at least start scanning before previous scan start to avoid looping */
|
/* search backwards */
|
||||||
startpos = startpos * 90 / 100;
|
if (newpos > apos) {
|
||||||
if (startpos && startpos < newpos)
|
ret = gst_matroska_demux_search_cluster (demux, &newpos, FALSE);
|
||||||
newpos = startpos;
|
if (ret != GST_FLOW_OK)
|
||||||
|
|
||||||
/* read in at newpos and scan for ebml cluster id */
|
|
||||||
startpos = newpos;
|
|
||||||
while (1) {
|
|
||||||
|
|
||||||
ret = gst_matroska_demux_search_cluster (demux, &newpos);
|
|
||||||
if (ret == GST_FLOW_EOS) {
|
|
||||||
/* heuristic HACK */
|
|
||||||
newpos = startpos * 80 / 100;
|
|
||||||
GST_DEBUG_OBJECT (demux, "EOS; "
|
|
||||||
"new estimated offset for %" GST_TIME_FORMAT ": %" G_GINT64_FORMAT,
|
|
||||||
GST_TIME_ARGS (time), newpos);
|
|
||||||
startpos = newpos;
|
|
||||||
continue;
|
|
||||||
} else if (ret != GST_FLOW_OK) {
|
|
||||||
goto exit;
|
goto exit;
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* then start scanning and parsing for cluster time,
|
/* then start scanning and parsing for cluster time,
|
||||||
* re-estimate if overshoot, otherwise next cluster and so on */
|
* re-estimate if possible, otherwise next cluster and so on */
|
||||||
|
/* note that each re-estimate is entered with a change in apos or opos,
|
||||||
|
* avoiding infinite loop */
|
||||||
demux->common.offset = newpos;
|
demux->common.offset = newpos;
|
||||||
demux->cluster_time = cluster_time = GST_CLOCK_TIME_NONE;
|
demux->cluster_time = cluster_time = GST_CLOCK_TIME_NONE;
|
||||||
cluster_size = 0;
|
cluster_size = 0;
|
||||||
|
prev_cluster_time = GST_CLOCK_TIME_NONE;
|
||||||
while (1) {
|
while (1) {
|
||||||
/* peek and parse some elements */
|
/* peek and parse some elements */
|
||||||
ret = gst_matroska_read_common_peek_id_length_pull (&demux->common,
|
ret = gst_matroska_read_common_peek_id_length_pull (&demux->common,
|
||||||
|
@ -1930,6 +1935,12 @@ retry:
|
||||||
GST_DEBUG_OBJECT (demux, "found cluster at offset %" G_GINT64_FORMAT
|
GST_DEBUG_OBJECT (demux, "found cluster at offset %" G_GINT64_FORMAT
|
||||||
" with time %" GST_TIME_FORMAT, cluster_offset,
|
" with time %" GST_TIME_FORMAT, cluster_offset,
|
||||||
GST_TIME_ARGS (cluster_time));
|
GST_TIME_ARGS (cluster_time));
|
||||||
|
if (time == GST_CLOCK_TIME_NONE) {
|
||||||
|
GST_DEBUG_OBJECT (demux, "found last cluster");
|
||||||
|
prev_cluster_time = cluster_time;
|
||||||
|
prev_cluster_offset = cluster_offset;
|
||||||
|
break;
|
||||||
|
}
|
||||||
if (cluster_time > time) {
|
if (cluster_time > time) {
|
||||||
GST_DEBUG_OBJECT (demux, "overshot target");
|
GST_DEBUG_OBJECT (demux, "overshot target");
|
||||||
/* cluster overshoots */
|
/* cluster overshoots */
|
||||||
|
@ -1950,6 +1961,30 @@ retry:
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
/* cluster undershoots */
|
||||||
|
GST_DEBUG_OBJECT (demux, "undershot target");
|
||||||
|
/* ok if close enough */
|
||||||
|
if (GST_CLOCK_DIFF (cluster_time, time) < 5 * GST_SECOND) {
|
||||||
|
GST_DEBUG_OBJECT (demux, "target close enough");
|
||||||
|
prev_cluster_time = cluster_time;
|
||||||
|
prev_cluster_offset = cluster_offset;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (otime > time) {
|
||||||
|
/* we are in between atime and otime => can bisect if worthwhile */
|
||||||
|
if (prev_cluster_time != GST_CLOCK_TIME_NONE &&
|
||||||
|
cluster_time > prev_cluster_time &&
|
||||||
|
(GST_CLOCK_DIFF (prev_cluster_time, cluster_time) * 10 <
|
||||||
|
GST_CLOCK_DIFF (cluster_time, time))) {
|
||||||
|
/* we moved at least one cluster forward,
|
||||||
|
* and it looks like target is still far away,
|
||||||
|
* let's estimate again */
|
||||||
|
GST_DEBUG_OBJECT (demux, "bisecting with new apos");
|
||||||
|
apos = cluster_offset;
|
||||||
|
atime = cluster_time;
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
|
}
|
||||||
/* cluster undershoots, goto next one */
|
/* cluster undershoots, goto next one */
|
||||||
prev_cluster_time = cluster_time;
|
prev_cluster_time = cluster_time;
|
||||||
prev_cluster_offset = cluster_offset;
|
prev_cluster_offset = cluster_offset;
|
||||||
|
@ -4176,7 +4211,8 @@ gst_matroska_demux_check_parse_error (GstMatroskaDemux * demux)
|
||||||
* search for cluster mark following current pos */
|
* search for cluster mark following current pos */
|
||||||
pos = demux->common.offset;
|
pos = demux->common.offset;
|
||||||
GST_WARNING_OBJECT (demux, "parse error, looking for next cluster");
|
GST_WARNING_OBJECT (demux, "parse error, looking for next cluster");
|
||||||
if ((ret = gst_matroska_demux_search_cluster (demux, &pos)) != GST_FLOW_OK) {
|
if ((ret = gst_matroska_demux_search_cluster (demux, &pos, TRUE)) !=
|
||||||
|
GST_FLOW_OK) {
|
||||||
/* did not work, give up */
|
/* did not work, give up */
|
||||||
return ret;
|
return ret;
|
||||||
} else {
|
} else {
|
||||||
|
|
Loading…
Reference in a new issue