mp4mux: Add support for edit lists

So we properly handle audio priming
This commit is contained in:
Thibault Saunier 2024-01-09 08:47:51 -03:00
parent ff0da29f19
commit ecbf46e82b
3 changed files with 245 additions and 125 deletions

View file

@ -210,50 +210,12 @@ fn write_moov(v: &mut Vec<u8>, header: &super::Header) -> Result<(), Error> {
Ok(())
}
fn stream_to_timescale(stream: &super::Stream) -> u32 {
if stream.trak_timescale > 0 {
stream.trak_timescale
} else {
let s = stream.caps.structure(0).unwrap();
if let Ok(fps) = s.get::<gst::Fraction>("framerate") {
if fps.numer() == 0 {
return 10_000;
}
if fps.denom() != 1 && fps.denom() != 1001 {
if let Some(fps) = (fps.denom() as u64)
.nseconds()
.mul_div_round(1_000_000_000, fps.numer() as u64)
.and_then(gst_video::guess_framerate)
{
return (fps.numer() as u32)
.mul_div_round(100, fps.denom() as u32)
.unwrap_or(10_000);
}
}
if fps.denom() == 1001 {
fps.numer() as u32
} else {
(fps.numer() as u32)
.mul_div_round(100, fps.denom() as u32)
.unwrap_or(10_000)
}
} else if let Ok(rate) = s.get::<i32>("rate") {
rate as u32
} else {
10_000
}
}
}
fn header_to_timescale(header: &super::Header) -> u32 {
if header.movie_timescale > 0 {
header.movie_timescale
} else {
// Use the reference track timescale
stream_to_timescale(&header.streams[0])
header.streams[0].timescale
}
}
@ -337,7 +299,7 @@ fn write_trak(
if !references.is_empty() {
write_box(v, b"tref", |v| write_tref(v, header, references))?;
}
write_box(v, b"edts", |v| write_edts(v, header, stream))?;
write_box(v, b"edts", |v| write_edts(v, stream))?;
Ok(())
}
@ -474,7 +436,7 @@ fn write_mdhd(
stream: &super::Stream,
creation_time: u64,
) -> Result<(), Error> {
let timescale = stream_to_timescale(stream);
let timescale = stream.timescale;
// Creation time
v.extend(creation_time.to_be_bytes());
@ -1366,7 +1328,7 @@ fn write_stts(
_header: &super::Header,
stream: &super::Stream,
) -> Result<(), Error> {
let timescale = stream_to_timescale(stream);
let timescale = stream.timescale;
let entry_count_position = v.len();
// Entry count, rewritten in the end
@ -1420,7 +1382,7 @@ fn write_ctts(
stream: &super::Stream,
version: u8,
) -> Result<(), Error> {
let timescale = stream_to_timescale(stream);
let timescale = stream.timescale;
let entry_count_position = v.len();
// Entry count, rewritten in the end
@ -1490,7 +1452,7 @@ fn write_cslg(
_header: &super::Header,
stream: &super::Stream,
) -> Result<(), Error> {
let timescale = stream_to_timescale(stream);
let timescale = stream.timescale;
let (min_ctts, max_ctts) = stream
.chunks
@ -1706,82 +1668,31 @@ fn write_tref(
Ok(())
}
fn write_edts(
v: &mut Vec<u8>,
header: &super::Header,
stream: &super::Stream,
) -> Result<(), Error> {
write_full_box(v, b"elst", FULL_BOX_VERSION_1, 0, |v| {
write_elst(v, header, stream)
})?;
fn write_edts(v: &mut Vec<u8>, stream: &super::Stream) -> Result<(), Error> {
write_full_box(v, b"elst", FULL_BOX_VERSION_1, 0, |v| write_elst(v, stream))?;
Ok(())
}
fn write_elst(
v: &mut Vec<u8>,
header: &super::Header,
stream: &super::Stream,
) -> Result<(), Error> {
// In movie header timescale
let timescale = header_to_timescale(header);
fn write_elst(v: &mut Vec<u8>, stream: &super::Stream) -> Result<(), Error> {
// Entry count
v.extend((stream.elst_infos.len() as u32).to_be_bytes());
let min_earliest_pts = header.streams.iter().map(|s| s.earliest_pts).min().unwrap();
for elst_info in &stream.elst_infos {
v.extend(
elst_info
.duration
.expect("Should have been set by `get_elst_infos`")
.to_be_bytes(),
);
if min_earliest_pts != stream.earliest_pts {
let gap = (stream.earliest_pts - min_earliest_pts)
.nseconds()
.mul_div_round(timescale as u64, gst::ClockTime::SECOND.nseconds())
.context("too big gap")?;
// Media time
v.extend(elst_info.start.to_be_bytes());
if gap > 0 {
// Entry count
v.extend(2u32.to_be_bytes());
// First entry for the gap
// Edit duration
v.extend(gap.to_be_bytes());
// Media time
v.extend((-1i64).to_be_bytes());
// Media rate
v.extend(1u16.to_be_bytes());
v.extend(0u16.to_be_bytes());
} else {
// Entry count
v.extend(1u32.to_be_bytes());
}
} else {
// Entry count
v.extend(1u32.to_be_bytes());
// Media rate
v.extend(1u16.to_be_bytes());
v.extend(0u16.to_be_bytes());
}
// Edit duration
let duration = (stream.end_pts - stream.earliest_pts)
.nseconds()
.mul_div_round(timescale as u64, gst::ClockTime::SECOND.nseconds())
.context("too big track duration")?;
v.extend(duration.to_be_bytes());
// Media time
if let Some(start_dts) = stream.start_dts {
let shift = (gst::Signed::Positive(stream.earliest_pts) - start_dts)
.nseconds()
.positive()
.unwrap_or(0)
.mul_div_round(timescale as u64, gst::ClockTime::SECOND.nseconds())
.context("too big track duration")?;
v.extend(shift.to_be_bytes());
} else {
v.extend(0u64.to_be_bytes());
}
// Media rate
v.extend(1u16.to_be_bytes());
v.extend(0u16.to_be_bytes());
Ok(())
}

View file

@ -6,6 +6,7 @@
//
// SPDX-License-Identifier: MPL-2.0
use anyhow::{anyhow, Context};
use gst::glib;
use gst::prelude::*;
use gst::subclass::prelude::*;
@ -128,6 +129,10 @@ struct Stream {
/// Earliest PTS.
earliest_pts: Option<gst::ClockTime>,
/// Edit list entries for this stream.
elst_infos: Vec<super::ElstInfo>,
/// Current end PTS.
end_pts: Option<gst::ClockTime>,
@ -135,6 +140,121 @@ struct Stream {
running_time_utc_time_mapping: Option<(gst::Signed<gst::ClockTime>, gst::ClockTime)>,
}
impl Stream {
fn get_elst_infos(
&self,
min_earliest_pts: gst::ClockTime,
) -> Result<Vec<super::ElstInfo>, anyhow::Error> {
let mut elst_infos = self.elst_infos.clone();
let timescale = self.timescale();
let earliest_pts = self
.earliest_pts
.expect("Streams without earliest_pts should have been skipped");
let end_pts = self
.end_pts
.expect("Streams without end_pts should have been skipped");
// If no elst info were set, use the whole track
if self.elst_infos.is_empty() {
let start = if let Some(start_dts) = self.start_dts {
((gst::Signed::Positive(earliest_pts) - start_dts)
.nseconds()
.positive()
.unwrap_or(0)
.mul_div_round(timescale as u64, gst::ClockTime::SECOND.nseconds())
.context("too big track duration")?) as i64
} else {
0i64
};
elst_infos.push(super::ElstInfo {
start,
duration: Some(
(end_pts - earliest_pts)
.nseconds()
.mul_div_round(timescale as u64, gst::ClockTime::SECOND.nseconds())
.context("too big track duration")?,
),
});
}
// Add a gap at the beginning if needed
if min_earliest_pts != earliest_pts {
let gap_duration = (earliest_pts - min_earliest_pts).nseconds())
.mul_div_round(timescale as u64, gst::ClockTime::SECOND.nseconds())
.context("too big gap")?;
if gap_duration > 0 {
elst_infos.insert(
0,
super::ElstInfo {
start: -1,
duration: Some(gap_duration),
},
);
}
}
let mut iter = elst_infos.iter_mut().peekable();
while let Some(&mut ref mut elst_info) = iter.next() {
if elst_info.duration.unwrap_or(0u64) == 0u64 {
elst_info.duration = if let Some(next) = iter.peek_mut() {
Some((next.start - elst_info.start) as u64)
} else {
Some(
(end_pts - earliest_pts)
.nseconds()
.mul_div_round(timescale as u64, gst::ClockTime::SECOND.nseconds())
.context("too big track duration")?,
)
}
}
}
Ok(elst_infos)
}
fn timescale(&self) -> u32 {
let trak_timescale = { self.sinkpad.imp().settings.lock().unwrap().trak_timescale };
if trak_timescale > 0 {
return trak_timescale;
}
let s = self.caps.structure(0).unwrap();
if let Ok(fps) = s.get::<gst::Fraction>("framerate") {
if fps.numer() == 0 {
return 10_000;
}
if fps.denom() != 1 && fps.denom() != 1001 {
if let Some(fps) = (fps.denom() as u64)
.nseconds()
.mul_div_round(1_000_000_000, fps.numer() as u64)
.and_then(gst_video::guess_framerate)
{
return (fps.numer() as u32)
.mul_div_round(100, fps.denom() as u32)
.unwrap_or(10_000);
}
}
if fps.denom() == 1001 {
fps.numer() as u32
} else {
(fps.numer() as u32)
.mul_div_round(100, fps.denom() as u32)
.unwrap_or(10_000)
}
} else if let Ok(rate) = s.get::<i32>("rate") {
rate as u32
} else {
10_000
}
}
}
#[derive(Default)]
struct State {
/// List of streams when the muxer was started.
@ -184,6 +304,77 @@ impl MP4Mux {
Ok(())
}
fn add_elst_info(
&self,
buffer: &PendingBuffer,
stream: &mut Stream,
) -> Result<(), anyhow::Error> {
let cmeta = if let Some(cmeta) = buffer.buffer.meta::<gst_audio::AudioClippingMeta>() {
cmeta
} else {
return Ok(());
};
let timescale = stream
.caps
.structure(0)
.unwrap()
.get::<i32>("rate")
.unwrap_or_else(|_| stream.timescale() as i32);
let gstclocktime_to_samples = move |v: gst::ClockTime| {
v.nseconds()
.mul_div_round(timescale as u64, gst::ClockTime::SECOND.nseconds())
.context("Invalid start in the AudioClipMeta")
};
let generic_to_samples = move |t| -> Result<Option<u64>, anyhow::Error> {
if let gst::GenericFormattedValue::Default(Some(v)) = t {
let v: u64 = v.into();
Ok(Some(v).filter(|x| x != &0u64))
} else if let gst::GenericFormattedValue::Time(Some(v)) = t {
Ok(Some(gstclocktime_to_samples(v)?))
} else {
Ok(None)
}
};
let start: Option<u64> = generic_to_samples(cmeta.start())?;
let end: Option<u64> = generic_to_samples(cmeta.end())?;
if end.is_none() && start.is_none() {
return Err(anyhow!(
"No start or end time in `default` format in the AudioClipingMeta"
));
}
let start = if let Some(start) = generic_to_samples(cmeta.start())? {
start + gstclocktime_to_samples(buffer.pts)?
} else {
0
};
let duration = if let Some(e) = end {
Some(
gstclocktime_to_samples(buffer.pts)?
+ gstclocktime_to_samples(
buffer
.duration
.context("No duration on buffer, we can't add edit list")?,
)?
- e,
)
} else {
None
};
stream.elst_infos.push(super::ElstInfo {
start: start as i64,
duration,
});
Ok(())
}
fn peek_buffer(
&self,
sinkpad: &super::MP4MuxPad,
@ -815,6 +1006,11 @@ impl MP4Mux {
let duration = buffer.duration.unwrap();
let composition_time_offset = buffer.composition_time_offset;
if let Err(err) = self.add_elst_info(&buffer, stream) {
gst::error!(CAT, "Failed to add elst info: {:#}", err);
}
let mut buffer = buffer.buffer;
stream.queued_chunk_time += duration;
@ -930,6 +1126,7 @@ impl MP4Mux {
queued_chunk_bytes: 0,
start_dts: None,
earliest_pts: None,
elst_infos: Default::default(),
end_pts: None,
running_time_utc_time_mapping: None,
});
@ -1319,9 +1516,14 @@ impl AggregatorImpl for MP4Mux {
state.mdat_size
);
let min_earliest_pts = state
.streams
.iter()
.filter_map(|s| s.earliest_pts)
.min()
.unwrap();
let mut streams = Vec::with_capacity(state.streams.len());
for stream in state.streams.drain(..) {
let pad_settings = stream.sinkpad.imp().settings.lock().unwrap().clone();
let (earliest_pts, end_pts) = match Option::zip(stream.earliest_pts, stream.end_pts)
{
Some(res) => res,
@ -1331,16 +1533,22 @@ impl AggregatorImpl for MP4Mux {
streams.push(super::Stream {
caps: stream.caps.clone(),
delta_frames: stream.delta_frames,
trak_timescale: pad_settings.trak_timescale,
start_dts: stream.start_dts,
timescale: stream.timescale(),
earliest_pts,
end_pts,
elst_infos: stream
.get_elst_infos(min_earliest_pts)
.unwrap_or_else(|e| {
gst::error!(CAT, "Could not prepare edit lists: {e:?}");
Vec::new()
}),
chunks: stream.chunks,
});
}
let moov = boxes::create_moov(super::Header {
variant: self.obj().class().as_ref().variant,
variant: obj.class().as_ref().variant,
movie_timescale: settings.movie_timescale,
streams,
})

View file

@ -97,6 +97,12 @@ pub(crate) struct Chunk {
samples: Vec<Sample>,
}
#[derive(Debug, Clone)]
pub(crate) struct ElstInfo {
start: i64,
duration: Option<u64>,
}
#[derive(Debug)]
pub(crate) struct Stream {
/// Caps of this stream
@ -106,15 +112,7 @@ pub(crate) struct Stream {
delta_frames: DeltaFrames,
/// Pre-defined trak timescale if not 0.
trak_timescale: u32,
/// Start DTS
///
/// If this is negative then an edit list entry is needed to
/// make all sample times positive.
///
/// This is `None` for streams that have no concept of DTS.
start_dts: Option<gst::Signed<gst::ClockTime>>,
timescale: u32,
/// Earliest PTS
///
@ -126,6 +124,9 @@ pub(crate) struct Stream {
/// All the chunks stored for this stream
chunks: Vec<Chunk>,
/// Edit list clipping information
elst_infos: Vec<ElstInfo>,
}
#[derive(Debug)]