mp4/fmp4: support flac inside the iso (f)mp4 container

Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs/-/merge_requests/1401>
This commit is contained in:
Simonas Kazlauskas 2023-11-28 19:53:10 +02:00 committed by Sebastian Dröge
parent f4b086738b
commit 5d939498f1
7 changed files with 356 additions and 165 deletions

View file

@ -2038,7 +2038,7 @@
"long-name": "ISOFMP4Mux", "long-name": "ISOFMP4Mux",
"pad-templates": { "pad-templates": {
"sink_%%u": { "sink_%%u": {
"caps": "video/x-h264:\n stream-format: { (string)avc, (string)avc3 }\n alignment: au\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-h265:\n stream-format: { (string)hvc1, (string)hev1 }\n alignment: au\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-vp8:\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-vp9:\n profile: { (string)0, (string)1, (string)2, (string)3 }\n chroma-format: { (string)4:2:0, (string)4:2:2, (string)4:4:4 }\n bit-depth-luma: { (uint)8, (uint)10, (uint)12 }\nbit-depth-chroma: { (uint)8, (uint)10, (uint)12 }\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-av1:\n stream-format: obu-stream\n alignment: tu\n profile: { (string)main, (string)high, (string)professional }\n chroma-format: { (string)4:0:0, (string)4:2:0, (string)4:2:2, (string)4:4:4 }\n bit-depth-luma: { (uint)8, (uint)10, (uint)12 }\nbit-depth-chroma: { (uint)8, (uint)10, (uint)12 }\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\naudio/mpeg:\n mpegversion: 4\n stream-format: raw\n channels: [ 1, 65535 ]\n rate: [ 1, 2147483647 ]\naudio/x-opus:\nchannel-mapping-family: [ 0, 255 ]\n channels: [ 1, 8 ]\n rate: [ 1, 2147483647 ]\n", "caps": "video/x-h264:\n stream-format: { (string)avc, (string)avc3 }\n alignment: au\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-h265:\n stream-format: { (string)hvc1, (string)hev1 }\n alignment: au\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-vp8:\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-vp9:\n profile: { (string)0, (string)1, (string)2, (string)3 }\n chroma-format: { (string)4:2:0, (string)4:2:2, (string)4:4:4 }\n bit-depth-luma: { (uint)8, (uint)10, (uint)12 }\nbit-depth-chroma: { (uint)8, (uint)10, (uint)12 }\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-av1:\n stream-format: obu-stream\n alignment: tu\n profile: { (string)main, (string)high, (string)professional }\n chroma-format: { (string)4:0:0, (string)4:2:0, (string)4:2:2, (string)4:4:4 }\n bit-depth-luma: { (uint)8, (uint)10, (uint)12 }\nbit-depth-chroma: { (uint)8, (uint)10, (uint)12 }\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\naudio/mpeg:\n mpegversion: 4\n stream-format: raw\n channels: [ 1, 65535 ]\n rate: [ 1, 2147483647 ]\naudio/x-opus:\nchannel-mapping-family: [ 0, 255 ]\n channels: [ 1, 8 ]\n rate: [ 1, 2147483647 ]\naudio/x-flac:\n framed: true\n channels: [ 1, 8 ]\n rate: [ 1, 655350 ]\n",
"direction": "sink", "direction": "sink",
"presence": "request", "presence": "request",
"type": "GstFMP4MuxPad" "type": "GstFMP4MuxPad"
@ -3318,7 +3318,7 @@
"klass": "Codec/Muxer", "klass": "Codec/Muxer",
"pad-templates": { "pad-templates": {
"sink_%%u": { "sink_%%u": {
"caps": "video/x-h264:\n stream-format: { (string)avc, (string)avc3 }\n alignment: au\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-h265:\n stream-format: { (string)hvc1, (string)hev1 }\n alignment: au\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-vp8:\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-vp9:\n profile: { (string)0, (string)1, (string)2, (string)3 }\n chroma-format: { (string)4:2:0, (string)4:2:2, (string)4:4:4 }\n bit-depth-luma: { (uint)8, (uint)10, (uint)12 }\nbit-depth-chroma: { (uint)8, (uint)10, (uint)12 }\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-av1:\n stream-format: obu-stream\n alignment: tu\n profile: { (string)main, (string)high, (string)professional }\n chroma-format: { (string)4:0:0, (string)4:2:0, (string)4:2:2, (string)4:4:4 }\n bit-depth-luma: { (uint)8, (uint)10, (uint)12 }\nbit-depth-chroma: { (uint)8, (uint)10, (uint)12 }\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\naudio/mpeg:\n mpegversion: 4\n stream-format: raw\n channels: [ 1, 65535 ]\n rate: [ 1, 2147483647 ]\naudio/x-opus:\nchannel-mapping-family: [ 0, 255 ]\n channels: [ 1, 8 ]\n rate: [ 1, 2147483647 ]\n", "caps": "video/x-h264:\n stream-format: { (string)avc, (string)avc3 }\n alignment: au\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-h265:\n stream-format: { (string)hvc1, (string)hev1 }\n alignment: au\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-vp8:\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-vp9:\n profile: { (string)0, (string)1, (string)2, (string)3 }\n chroma-format: { (string)4:2:0, (string)4:2:2, (string)4:4:4 }\n bit-depth-luma: { (uint)8, (uint)10, (uint)12 }\nbit-depth-chroma: { (uint)8, (uint)10, (uint)12 }\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-av1:\n stream-format: obu-stream\n alignment: tu\n profile: { (string)main, (string)high, (string)professional }\n chroma-format: { (string)4:0:0, (string)4:2:0, (string)4:2:2, (string)4:4:4 }\n bit-depth-luma: { (uint)8, (uint)10, (uint)12 }\nbit-depth-chroma: { (uint)8, (uint)10, (uint)12 }\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\naudio/mpeg:\n mpegversion: 4\n stream-format: raw\n channels: [ 1, 65535 ]\n rate: [ 1, 2147483647 ]\naudio/x-opus:\nchannel-mapping-family: [ 0, 255 ]\n channels: [ 1, 8 ]\n rate: [ 1, 2147483647 ]\naudio/x-flac:\n framed: true\n channels: [ 1, 8 ]\n rate: [ 1, 655350 ]\n",
"direction": "sink", "direction": "sink",
"presence": "request", "presence": "request",
"type": "GstRsMP4MuxPad" "type": "GstRsMP4MuxPad"

View file

@ -9,6 +9,7 @@
use gst::prelude::*; use gst::prelude::*;
use anyhow::{anyhow, bail, Context, Error}; use anyhow::{anyhow, bail, Context, Error};
use std::convert::TryFrom;
use super::Buffer; use super::Buffer;
@ -604,9 +605,8 @@ fn write_tkhd(
// Volume // Volume
let s = stream.caps.structure(0).unwrap(); let s = stream.caps.structure(0).unwrap();
match s.name().as_str() { match s.name().as_str() {
"audio/mpeg" | "audio/x-opus" | "audio/x-alaw" | "audio/x-mulaw" | "audio/x-adpcm" => { "audio/mpeg" | "audio/x-opus" | "audio/x-flac" | "audio/x-alaw" | "audio/x-mulaw"
v.extend((1u16 << 8).to_be_bytes()) | "audio/x-adpcm" => v.extend((1u16 << 8).to_be_bytes()),
}
_ => v.extend(0u16.to_be_bytes()), _ => v.extend(0u16.to_be_bytes()),
} }
@ -745,9 +745,8 @@ fn write_hdlr(
let (handler_type, name) = match s.name().as_str() { let (handler_type, name) = match s.name().as_str() {
"video/x-h264" | "video/x-h265" | "video/x-vp8" | "video/x-vp9" | "video/x-av1" "video/x-h264" | "video/x-h265" | "video/x-vp8" | "video/x-vp9" | "video/x-av1"
| "image/jpeg" => (b"vide", b"VideoHandler\0".as_slice()), | "image/jpeg" => (b"vide", b"VideoHandler\0".as_slice()),
"audio/mpeg" | "audio/x-opus" | "audio/x-alaw" | "audio/x-mulaw" | "audio/x-adpcm" => { "audio/mpeg" | "audio/x-opus" | "audio/x-flac" | "audio/x-alaw" | "audio/x-mulaw"
(b"soun", b"SoundHandler\0".as_slice()) | "audio/x-adpcm" => (b"soun", b"SoundHandler\0".as_slice()),
}
"application/x-onvif-metadata" => (b"meta", b"MetadataHandler\0".as_slice()), "application/x-onvif-metadata" => (b"meta", b"MetadataHandler\0".as_slice()),
_ => unreachable!(), _ => unreachable!(),
}; };
@ -777,7 +776,8 @@ fn write_minf(
// Flags are always 1 for unspecified reasons // Flags are always 1 for unspecified reasons
write_full_box(v, b"vmhd", FULL_BOX_VERSION_0, 1, |v| write_vmhd(v, cfg))? write_full_box(v, b"vmhd", FULL_BOX_VERSION_0, 1, |v| write_vmhd(v, cfg))?
} }
"audio/mpeg" | "audio/x-opus" | "audio/x-alaw" | "audio/x-mulaw" | "audio/x-adpcm" => { "audio/mpeg" | "audio/x-opus" | "audio/x-flac" | "audio/x-alaw" | "audio/x-mulaw"
| "audio/x-adpcm" => {
write_full_box(v, b"smhd", FULL_BOX_VERSION_0, FULL_BOX_FLAGS_NONE, |v| { write_full_box(v, b"smhd", FULL_BOX_VERSION_0, FULL_BOX_FLAGS_NONE, |v| {
write_smhd(v, cfg) write_smhd(v, cfg)
})? })?
@ -886,9 +886,8 @@ fn write_stsd(
match s.name().as_str() { match s.name().as_str() {
"video/x-h264" | "video/x-h265" | "video/x-vp8" | "video/x-vp9" | "video/x-av1" "video/x-h264" | "video/x-h265" | "video/x-vp8" | "video/x-vp9" | "video/x-av1"
| "image/jpeg" => write_visual_sample_entry(v, cfg, stream)?, | "image/jpeg" => write_visual_sample_entry(v, cfg, stream)?,
"audio/mpeg" | "audio/x-opus" | "audio/x-alaw" | "audio/x-mulaw" | "audio/x-adpcm" => { "audio/mpeg" | "audio/x-opus" | "audio/x-flac" | "audio/x-alaw" | "audio/x-mulaw"
write_audio_sample_entry(v, cfg, stream)? | "audio/x-adpcm" => write_audio_sample_entry(v, cfg, stream)?,
}
"application/x-onvif-metadata" => write_xml_meta_data_sample_entry(v, cfg, stream)?, "application/x-onvif-metadata" => write_xml_meta_data_sample_entry(v, cfg, stream)?,
_ => unreachable!(), _ => unreachable!(),
} }
@ -1262,6 +1261,7 @@ fn write_audio_sample_entry(
let fourcc = match s.name().as_str() { let fourcc = match s.name().as_str() {
"audio/mpeg" => b"mp4a", "audio/mpeg" => b"mp4a",
"audio/x-opus" => b"Opus", "audio/x-opus" => b"Opus",
"audio/x-flac" => b"fLaC",
"audio/x-alaw" => b"alaw", "audio/x-alaw" => b"alaw",
"audio/x-mulaw" => b"ulaw", "audio/x-mulaw" => b"ulaw",
"audio/x-adpcm" => { "audio/x-adpcm" => {
@ -1280,6 +1280,10 @@ fn write_audio_sample_entry(
let bitrate = s.get::<i32>("bitrate").context("no ADPCM bitrate field")?; let bitrate = s.get::<i32>("bitrate").context("no ADPCM bitrate field")?;
(bitrate / 8000) as u16 (bitrate / 8000) as u16
} }
"audio/x-flac" => with_flac_metadata(&stream.caps, |streaminfo, _| {
1 + (u16::from_be_bytes([streaminfo[16], streaminfo[17]]) >> 4 & 0b11111)
})
.context("FLAC metadata error")?,
_ => 16u16, _ => 16u16,
}; };
@ -1322,6 +1326,9 @@ fn write_audio_sample_entry(
"audio/x-opus" => { "audio/x-opus" => {
write_dops(v, &stream.caps)?; write_dops(v, &stream.caps)?;
} }
"audio/x-flac" => {
write_dfla(v, &stream.caps)?;
}
"audio/x-alaw" | "audio/x-mulaw" | "audio/x-adpcm" => { "audio/x-alaw" | "audio/x-mulaw" | "audio/x-adpcm" => {
// Nothing to do here // Nothing to do here
} }
@ -1516,6 +1523,35 @@ fn write_dops(v: &mut Vec<u8>, caps: &gst::Caps) -> Result<(), Error> {
}) })
} }
fn with_flac_metadata<R>(
caps: &gst::Caps,
cb: impl FnOnce(&[u8], &[gst::glib::SendValue]) -> R,
) -> Result<R, Error> {
let caps = caps.structure(0).unwrap();
let header = caps.get::<gst::ArrayRef>("streamheader").unwrap();
let (streaminfo, remainder) = header.as_ref().split_first().unwrap();
let streaminfo = streaminfo.get::<&gst::BufferRef>().unwrap();
let streaminfo = streaminfo.map_readable().unwrap();
// 13 bytes for the Ogg/FLAC prefix and 38 for the streaminfo itself.
match <&[_; 13 + 38]>::try_from(streaminfo.as_slice()) {
Ok(i) if i.starts_with(b"\x7FFLAC\x01\x00") => Ok(cb(&i[13..], remainder)),
Ok(_) | Err(_) => bail!("Unknown streamheader format"),
}
}
fn write_dfla(v: &mut Vec<u8>, caps: &gst::Caps) -> Result<(), Error> {
write_full_box(v, b"dfLa", 0, 0, move |v| {
with_flac_metadata(caps, |streaminfo, remainder| {
v.extend(streaminfo);
for metadata in remainder {
let metadata = metadata.get::<&gst::BufferRef>().unwrap();
let metadata = metadata.map_readable().unwrap();
v.extend(&metadata[..]);
}
})
})
}
fn write_xml_meta_data_sample_entry( fn write_xml_meta_data_sample_entry(
v: &mut Vec<u8>, v: &mut Vec<u8>,
_cfg: &super::HeaderConfiguration, _cfg: &super::HeaderConfiguration,

View file

@ -205,6 +205,8 @@ struct Stream {
caps: gst::Caps, caps: gst::Caps,
/// Whether this stream is intra-only and has frame reordering. /// Whether this stream is intra-only and has frame reordering.
delta_frames: DeltaFrames, delta_frames: DeltaFrames,
/// Whether this stream might have header frames without timestamps that should be ignored.
discard_header_buffers: bool,
/// Currently queued GOPs, including incomplete ones. /// Currently queued GOPs, including incomplete ones.
queued_gops: VecDeque<Gop>, queued_gops: VecDeque<Gop>,
@ -271,11 +273,17 @@ pub(crate) struct FMP4Mux {
impl FMP4Mux { impl FMP4Mux {
/// Checks if a buffer is valid according to the stream configuration. /// Checks if a buffer is valid according to the stream configuration.
fn check_buffer( fn check_buffer(buffer: &gst::BufferRef, stream: &Stream) -> Result<(), gst::FlowError> {
buffer: &gst::BufferRef, let Stream {
sinkpad: &super::FMP4MuxPad, sinkpad,
delta_frames: super::DeltaFrames, delta_frames,
) -> Result<(), gst::FlowError> { discard_header_buffers,
..
} = stream;
if *discard_header_buffers && buffer.flags().contains(gst::BufferFlags::HEADER) {
return Err(gst_base::AGGREGATOR_FLOW_NEED_DATA);
}
if delta_frames.requires_dts() && buffer.dts().is_none() { if delta_frames.requires_dts() && buffer.dts().is_none() {
gst::error!(CAT, obj: sinkpad, "Require DTS for video streams"); gst::error!(CAT, obj: sinkpad, "Require DTS for video streams");
return Err(gst::FlowError::Error); return Err(gst::FlowError::Error);
@ -314,12 +322,10 @@ impl FMP4Mux {
} }
// Pop buffer here, it will be stored in the pre-queue after calculating its timestamps // Pop buffer here, it will be stored in the pre-queue after calculating its timestamps
let mut buffer = match stream.sinkpad.pop_buffer() { let Some(mut buffer) = stream.sinkpad.pop_buffer() else {
None => return Ok(None), return Ok(None);
Some(buffer) => buffer,
}; };
Self::check_buffer(&buffer, stream)?;
Self::check_buffer(&buffer, &stream.sinkpad, stream.delta_frames)?;
let segment = match stream.sinkpad.segment().downcast::<gst::ClockTime>().ok() { let segment = match stream.sinkpad.segment().downcast::<gst::ClockTime>().ok() {
Some(segment) => segment, Some(segment) => segment,
@ -2555,6 +2561,7 @@ impl FMP4Mux {
let s = caps.structure(0).unwrap(); let s = caps.structure(0).unwrap();
let mut delta_frames = DeltaFrames::IntraOnly; let mut delta_frames = DeltaFrames::IntraOnly;
let mut discard_header_buffers = false;
match s.name().as_str() { match s.name().as_str() {
"video/x-h264" | "video/x-h265" => { "video/x-h264" | "video/x-h265" => {
if !s.has_field_with_type("codec_data", gst::Buffer::static_type()) { if !s.has_field_with_type("codec_data", gst::Buffer::static_type()) {
@ -2598,6 +2605,13 @@ impl FMP4Mux {
return Err(gst::FlowError::NotNegotiated); return Err(gst::FlowError::NotNegotiated);
} }
} }
"audio/x-flac" => {
discard_header_buffers = true;
if let Err(e) = s.get::<gst::ArrayRef>("streamheader") {
gst::error!(CAT, obj: pad, "Muxing FLAC into MP4 needs streamheader: {}", e);
return Err(gst::FlowError::NotNegotiated);
};
}
"audio/x-alaw" | "audio/x-mulaw" => (), "audio/x-alaw" | "audio/x-mulaw" => (),
"audio/x-adpcm" => (), "audio/x-adpcm" => (),
"application/x-onvif-metadata" => (), "application/x-onvif-metadata" => (),
@ -2608,6 +2622,7 @@ impl FMP4Mux {
sinkpad: pad, sinkpad: pad,
caps, caps,
delta_frames, delta_frames,
discard_header_buffers,
pre_queue: VecDeque::new(), pre_queue: VecDeque::new(),
queued_gops: VecDeque::new(), queued_gops: VecDeque::new(),
fragment_filled: false, fragment_filled: false,
@ -3465,6 +3480,11 @@ impl ElementImpl for ISOFMP4Mux {
.field("channels", gst::IntRange::new(1i32, 8)) .field("channels", gst::IntRange::new(1i32, 8))
.field("rate", gst::IntRange::new(1, i32::MAX)) .field("rate", gst::IntRange::new(1, i32::MAX))
.build(), .build(),
gst::Structure::builder("audio/x-flac")
.field("framed", true)
.field("channels", gst::IntRange::<i32>::new(1, 8))
.field("rate", gst::IntRange::<i32>::new(1, 10 * u16::MAX as i32))
.build(),
] ]
.into_iter() .into_iter()
.collect::<gst::Caps>(), .collect::<gst::Caps>(),

View file

@ -19,6 +19,33 @@ fn init() {
}); });
} }
fn to_completion(pipeline: &gst::Pipeline) {
pipeline
.set_state(gst::State::Playing)
.expect("Unable to set the pipeline to the `Playing` state");
for msg in pipeline.bus().unwrap().iter_timed(gst::ClockTime::NONE) {
use gst::MessageView;
match msg.view() {
MessageView::Eos(..) => break,
MessageView::Error(err) => {
panic!(
"Error from {:?}: {} ({:?})",
err.src().map(|s| s.path_string()),
err.error(),
err.debug()
);
}
_ => (),
}
}
pipeline
.set_state(gst::State::Null)
.expect("Unable to set the pipeline to the `Null` state");
}
fn test_buffer_flags_single_stream(cmaf: bool, set_dts: bool, caps: gst::Caps) { fn test_buffer_flags_single_stream(cmaf: bool, set_dts: bool, caps: gst::Caps) {
let mut h = if cmaf { let mut h = if cmaf {
gst_check::Harness::new("cmafmux") gst_check::Harness::new("cmafmux")
@ -1993,3 +2020,21 @@ fn test_chunking_single_stream_gops_after_fragment_end_after_next_chunk_end() {
let ev = h.pull_event().unwrap(); let ev = h.pull_event().unwrap();
assert_eq!(ev.type_(), gst::EventType::Eos); assert_eq!(ev.type_(), gst::EventType::Eos);
} }
#[test]
fn test_roundtrip_vp9_flac() {
init();
let pipeline = gst::parse::launch(
r#"
videotestsrc num-buffers=99 ! vp9enc ! vp9parse ! mux.
audiotestsrc num-buffers=149 ! flacenc ! flacparse ! mux.
isofmp4mux name=mux ! qtdemux name=demux
demux.audio_0 ! queue ! flacdec ! fakesink
demux.video_0 ! queue ! vp9dec ! fakesink
"#,
)
.unwrap();
let pipeline = pipeline.downcast().unwrap();
to_completion(&pipeline);
}

View file

@ -9,7 +9,7 @@
use gst::prelude::*; use gst::prelude::*;
use anyhow::{anyhow, bail, Context, Error}; use anyhow::{anyhow, bail, Context, Error};
use std::convert::TryFrom;
use std::str::FromStr; use std::str::FromStr;
fn write_box<T, F: FnOnce(&mut Vec<u8>) -> Result<T, Error>>( fn write_box<T, F: FnOnce(&mut Vec<u8>) -> Result<T, Error>>(
@ -382,9 +382,8 @@ fn write_tkhd(
// Volume // Volume
let s = stream.caps.structure(0).unwrap(); let s = stream.caps.structure(0).unwrap();
match s.name().as_str() { match s.name().as_str() {
"audio/mpeg" | "audio/x-opus" | "audio/x-alaw" | "audio/x-mulaw" | "audio/x-adpcm" => { "audio/mpeg" | "audio/x-opus" | "audio/x-flac" | "audio/x-alaw" | "audio/x-mulaw"
v.extend((1u16 << 8).to_be_bytes()) | "audio/x-adpcm" => v.extend((1u16 << 8).to_be_bytes()),
}
_ => v.extend(0u16.to_be_bytes()), _ => v.extend(0u16.to_be_bytes()),
} }
@ -514,9 +513,8 @@ fn write_hdlr(
let (handler_type, name) = match s.name().as_str() { let (handler_type, name) = match s.name().as_str() {
"video/x-h264" | "video/x-h265" | "video/x-vp8" | "video/x-vp9" | "video/x-av1" "video/x-h264" | "video/x-h265" | "video/x-vp8" | "video/x-vp9" | "video/x-av1"
| "image/jpeg" => (b"vide", b"VideoHandler\0".as_slice()), | "image/jpeg" => (b"vide", b"VideoHandler\0".as_slice()),
"audio/mpeg" | "audio/x-opus" | "audio/x-alaw" | "audio/x-mulaw" | "audio/x-adpcm" => { "audio/mpeg" | "audio/x-opus" | "audio/x-flac" | "audio/x-alaw" | "audio/x-mulaw"
(b"soun", b"SoundHandler\0".as_slice()) | "audio/x-adpcm" => (b"soun", b"SoundHandler\0".as_slice()),
}
"application/x-onvif-metadata" => (b"meta", b"MetadataHandler\0".as_slice()), "application/x-onvif-metadata" => (b"meta", b"MetadataHandler\0".as_slice()),
_ => unreachable!(), _ => unreachable!(),
}; };
@ -546,7 +544,8 @@ fn write_minf(
// Flags are always 1 for unspecified reasons // Flags are always 1 for unspecified reasons
write_full_box(v, b"vmhd", FULL_BOX_VERSION_0, 1, |v| write_vmhd(v, header))? write_full_box(v, b"vmhd", FULL_BOX_VERSION_0, 1, |v| write_vmhd(v, header))?
} }
"audio/mpeg" | "audio/x-opus" | "audio/x-alaw" | "audio/x-mulaw" | "audio/x-adpcm" => { "audio/mpeg" | "audio/x-opus" | "audio/x-flac" | "audio/x-alaw" | "audio/x-mulaw"
| "audio/x-adpcm" => {
write_full_box(v, b"smhd", FULL_BOX_VERSION_0, FULL_BOX_FLAGS_NONE, |v| { write_full_box(v, b"smhd", FULL_BOX_VERSION_0, FULL_BOX_FLAGS_NONE, |v| {
write_smhd(v, header) write_smhd(v, header)
})? })?
@ -703,9 +702,8 @@ fn write_stsd(
match s.name().as_str() { match s.name().as_str() {
"video/x-h264" | "video/x-h265" | "video/x-vp8" | "video/x-vp9" | "video/x-av1" "video/x-h264" | "video/x-h265" | "video/x-vp8" | "video/x-vp9" | "video/x-av1"
| "image/jpeg" => write_visual_sample_entry(v, header, stream)?, | "image/jpeg" => write_visual_sample_entry(v, header, stream)?,
"audio/mpeg" | "audio/x-opus" | "audio/x-alaw" | "audio/x-mulaw" | "audio/x-adpcm" => { "audio/mpeg" | "audio/x-opus" | "audio/x-flac" | "audio/x-alaw" | "audio/x-mulaw"
write_audio_sample_entry(v, header, stream)? | "audio/x-adpcm" => write_audio_sample_entry(v, header, stream)?,
}
"application/x-onvif-metadata" => write_xml_meta_data_sample_entry(v, header, stream)?, "application/x-onvif-metadata" => write_xml_meta_data_sample_entry(v, header, stream)?,
_ => unreachable!(), _ => unreachable!(),
} }
@ -1079,6 +1077,7 @@ fn write_audio_sample_entry(
let fourcc = match s.name().as_str() { let fourcc = match s.name().as_str() {
"audio/mpeg" => b"mp4a", "audio/mpeg" => b"mp4a",
"audio/x-opus" => b"Opus", "audio/x-opus" => b"Opus",
"audio/x-flac" => b"fLaC",
"audio/x-alaw" => b"alaw", "audio/x-alaw" => b"alaw",
"audio/x-mulaw" => b"ulaw", "audio/x-mulaw" => b"ulaw",
"audio/x-adpcm" => { "audio/x-adpcm" => {
@ -1097,6 +1096,10 @@ fn write_audio_sample_entry(
let bitrate = s.get::<i32>("bitrate").context("no ADPCM bitrate field")?; let bitrate = s.get::<i32>("bitrate").context("no ADPCM bitrate field")?;
(bitrate / 8000) as u16 (bitrate / 8000) as u16
} }
"audio/x-flac" => with_flac_metadata(&stream.caps, |streaminfo, _| {
1 + (u16::from_be_bytes([streaminfo[16], streaminfo[17]]) >> 4 & 0b11111)
})
.context("FLAC metadata error")?,
_ => 16u16, _ => 16u16,
}; };
@ -1139,6 +1142,9 @@ fn write_audio_sample_entry(
"audio/x-opus" => { "audio/x-opus" => {
write_dops(v, &stream.caps)?; write_dops(v, &stream.caps)?;
} }
"audio/x-flac" => {
write_dfla(v, &stream.caps)?;
}
"audio/x-alaw" | "audio/x-mulaw" | "audio/x-adpcm" => { "audio/x-alaw" | "audio/x-mulaw" | "audio/x-adpcm" => {
// Nothing to do here // Nothing to do here
} }
@ -1333,6 +1339,35 @@ fn write_dops(v: &mut Vec<u8>, caps: &gst::Caps) -> Result<(), Error> {
}) })
} }
fn with_flac_metadata<R>(
caps: &gst::Caps,
cb: impl FnOnce(&[u8], &[gst::glib::SendValue]) -> R,
) -> Result<R, Error> {
let caps = caps.structure(0).unwrap();
let header = caps.get::<gst::ArrayRef>("streamheader").unwrap();
let (streaminfo, remainder) = header.as_ref().split_first().unwrap();
let streaminfo = streaminfo.get::<&gst::BufferRef>().unwrap();
let streaminfo = streaminfo.map_readable().unwrap();
// 13 bytes for the Ogg/FLAC prefix and 38 for the streaminfo itself.
match <&[_; 13 + 38]>::try_from(streaminfo.as_slice()) {
Ok(i) if i.starts_with(b"\x7FFLAC\x01\x00") => Ok(cb(&i[13..], remainder)),
Ok(_) | Err(_) => bail!("Unknown streamheader format"),
}
}
fn write_dfla(v: &mut Vec<u8>, caps: &gst::Caps) -> Result<(), Error> {
write_full_box(v, b"dfLa", 0, 0, move |v| {
with_flac_metadata(caps, |streaminfo, remainder| {
v.extend(streaminfo);
for metadata in remainder {
let metadata = metadata.get::<&gst::BufferRef>().unwrap();
let metadata = metadata.map_readable().unwrap();
v.extend(&metadata[..]);
}
})
})
}
fn write_xml_meta_data_sample_entry( fn write_xml_meta_data_sample_entry(
v: &mut Vec<u8>, v: &mut Vec<u8>,
_header: &super::Header, _header: &super::Header,

View file

@ -108,6 +108,8 @@ struct Stream {
caps: gst::Caps, caps: gst::Caps,
/// Whether this stream is intra-only and has frame reordering. /// Whether this stream is intra-only and has frame reordering.
delta_frames: super::DeltaFrames, delta_frames: super::DeltaFrames,
/// Whether this stream might have header frames without timestamps that should be ignored.
discard_header_buffers: bool,
/// Already written out chunks with their samples for this stream /// Already written out chunks with their samples for this stream
chunks: Vec<super::Chunk>, chunks: Vec<super::Chunk>,
@ -165,7 +167,12 @@ impl MP4Mux {
buffer: &gst::BufferRef, buffer: &gst::BufferRef,
sinkpad: &super::MP4MuxPad, sinkpad: &super::MP4MuxPad,
delta_frames: super::DeltaFrames, delta_frames: super::DeltaFrames,
discard_headers: bool,
) -> Result<(), gst::FlowError> { ) -> Result<(), gst::FlowError> {
if discard_headers && buffer.flags().contains(gst::BufferFlags::HEADER) {
return Err(gst_base::AGGREGATOR_FLOW_NEED_DATA);
}
if delta_frames.requires_dts() && buffer.dts().is_none() { if delta_frames.requires_dts() && buffer.dts().is_none() {
gst::error!(CAT, obj: sinkpad, "Require DTS for video streams"); gst::error!(CAT, obj: sinkpad, "Require DTS for video streams");
return Err(gst::FlowError::Error); return Err(gst::FlowError::Error);
@ -188,6 +195,7 @@ impl MP4Mux {
&self, &self,
sinkpad: &super::MP4MuxPad, sinkpad: &super::MP4MuxPad,
delta_frames: super::DeltaFrames, delta_frames: super::DeltaFrames,
discard_headers: bool,
pre_queue: &mut VecDeque<(gst::FormattedSegment<gst::ClockTime>, gst::Buffer)>, pre_queue: &mut VecDeque<(gst::FormattedSegment<gst::ClockTime>, gst::Buffer)>,
running_time_utc_time_mapping: &Option<(gst::Signed<gst::ClockTime>, gst::ClockTime)>, running_time_utc_time_mapping: &Option<(gst::Signed<gst::ClockTime>, gst::ClockTime)>,
) -> Result<Option<(gst::FormattedSegment<gst::ClockTime>, gst::Buffer)>, gst::FlowError> { ) -> Result<Option<(gst::FormattedSegment<gst::ClockTime>, gst::Buffer)>, gst::FlowError> {
@ -195,13 +203,10 @@ impl MP4Mux {
return Ok(Some((segment.clone(), buffer.clone()))); return Ok(Some((segment.clone(), buffer.clone())));
} }
let mut buffer = match sinkpad.peek_buffer() { let Some(mut buffer) = sinkpad.peek_buffer() else {
None => return Ok(None), return Ok(None);
Some(buffer) => buffer,
}; };
Self::check_buffer(&buffer, sinkpad, delta_frames, discard_headers)?;
Self::check_buffer(&buffer, sinkpad, delta_frames)?;
let mut segment = match sinkpad.segment().downcast::<gst::ClockTime>().ok() { let mut segment = match sinkpad.segment().downcast::<gst::ClockTime>().ok() {
Some(segment) => segment, Some(segment) => segment,
None => { None => {
@ -276,19 +281,20 @@ impl MP4Mux {
fn pop_buffer( fn pop_buffer(
&self, &self,
sinkpad: &super::MP4MuxPad, stream: &mut Stream,
delta_frames: super::DeltaFrames,
pre_queue: &mut VecDeque<(gst::FormattedSegment<gst::ClockTime>, gst::Buffer)>,
running_time_utc_time_mapping: &mut Option<(gst::Signed<gst::ClockTime>, gst::ClockTime)>,
) -> Result<Option<(gst::FormattedSegment<gst::ClockTime>, gst::Buffer)>, gst::FlowError> { ) -> Result<Option<(gst::FormattedSegment<gst::ClockTime>, gst::Buffer)>, gst::FlowError> {
let Stream {
sinkpad, pre_queue, ..
} = stream;
// In ONVIF mode we need to get UTC times for each buffer and synchronize based on that. // In ONVIF mode we need to get UTC times for each buffer and synchronize based on that.
// Queue up to 6s of data to get the first UTC time and then backdate. // Queue up to 6s of data to get the first UTC time and then backdate.
if self.obj().class().as_ref().variant == super::Variant::ONVIF if self.obj().class().as_ref().variant == super::Variant::ONVIF
&& running_time_utc_time_mapping.is_none() && stream.running_time_utc_time_mapping.is_none()
{ {
if let Some((last, first)) = Option::zip(pre_queue.back(), pre_queue.front()) { if let Some((last, first)) = Option::zip(pre_queue.back(), pre_queue.front()) {
// Existence of PTS/DTS checked below // Existence of PTS/DTS checked below
let (last, first) = if delta_frames.requires_dts() { let (last, first) = if stream.delta_frames.requires_dts() {
( (
last.0.to_running_time_full(last.1.dts()).unwrap(), last.0.to_running_time_full(last.1.dts()).unwrap(),
first.0.to_running_time_full(first.1.dts()).unwrap(), first.0.to_running_time_full(first.1.dts()).unwrap(),
@ -312,19 +318,20 @@ impl MP4Mux {
} }
} }
let buffer = match sinkpad.pop_buffer() { let Some(buffer) = sinkpad.pop_buffer() else {
None => {
if sinkpad.is_eos() { if sinkpad.is_eos() {
gst::error!(CAT, obj: sinkpad, "Got no UTC time before EOS"); gst::error!(CAT, obj: sinkpad, "Got no UTC time before EOS");
return Err(gst::FlowError::Error); return Err(gst::FlowError::Error);
} else { } else {
return Err(gst_base::AGGREGATOR_FLOW_NEED_DATA); return Err(gst_base::AGGREGATOR_FLOW_NEED_DATA);
} }
}
Some(buffer) => buffer,
}; };
Self::check_buffer(
Self::check_buffer(&buffer, sinkpad, delta_frames)?; &buffer,
sinkpad,
stream.delta_frames,
stream.discard_header_buffers,
)?;
let segment = match sinkpad.segment().downcast::<gst::ClockTime>().ok() { let segment = match sinkpad.segment().downcast::<gst::ClockTime>().ok() {
Some(segment) => segment, Some(segment) => segment,
@ -350,7 +357,7 @@ impl MP4Mux {
); );
let mapping = (running_time, utc_time); let mapping = (running_time, utc_time);
*running_time_utc_time_mapping = Some(mapping); stream.running_time_utc_time_mapping = Some(mapping);
// Push the buffer onto the pre-queue and re-timestamp it and all other buffers // Push the buffer onto the pre-queue and re-timestamp it and all other buffers
// based on the mapping above. // based on the mapping above.
@ -391,7 +398,7 @@ impl MP4Mux {
// Fall through below and pop the first buffer finally // Fall through below and pop the first buffer finally
} }
if let Some((segment, buffer)) = pre_queue.pop_front() { if let Some((segment, buffer)) = stream.pre_queue.pop_front() {
return Ok(Some((segment, buffer))); return Ok(Some((segment, buffer)));
} }
@ -400,23 +407,26 @@ impl MP4Mux {
// for calculating the duration to the previous buffer, and then put into the pre-queue // for calculating the duration to the previous buffer, and then put into the pre-queue
// - or this is the very first buffer and we just put it into the queue overselves above // - or this is the very first buffer and we just put it into the queue overselves above
if self.obj().class().as_ref().variant == super::Variant::ONVIF { if self.obj().class().as_ref().variant == super::Variant::ONVIF {
if sinkpad.is_eos() { if stream.sinkpad.is_eos() {
return Ok(None); return Ok(None);
} }
unreachable!(); unreachable!();
} }
let buffer = match sinkpad.pop_buffer() { let Some(buffer) = stream.sinkpad.pop_buffer() else {
None => return Ok(None), return Ok(None);
Some(buffer) => buffer,
}; };
Self::check_buffer(
&buffer,
&stream.sinkpad,
stream.delta_frames,
stream.discard_header_buffers,
)?;
Self::check_buffer(&buffer, sinkpad, delta_frames)?; let segment = match stream.sinkpad.segment().downcast::<gst::ClockTime>().ok() {
let segment = match sinkpad.segment().downcast::<gst::ClockTime>().ok() {
Some(segment) => segment, Some(segment) => segment,
None => { None => {
gst::error!(CAT, obj: sinkpad, "Got buffer before segment"); gst::error!(CAT, obj: stream.sinkpad, "Got buffer before segment");
return Err(gst::FlowError::Error); return Err(gst::FlowError::Error);
} }
}; };
@ -442,6 +452,12 @@ impl MP4Mux {
Some(PendingBuffer { Some(PendingBuffer {
duration: Some(_), .. duration: Some(_), ..
}) => return Ok(()), }) => return Ok(()),
Some(PendingBuffer { ref buffer, .. })
if stream.discard_header_buffers
&& buffer.flags().contains(gst::BufferFlags::HEADER) =>
{
return Err(gst_base::AGGREGATOR_FLOW_NEED_DATA);
}
Some(PendingBuffer { Some(PendingBuffer {
timestamp, timestamp,
pts, pts,
@ -449,13 +465,15 @@ impl MP4Mux {
ref mut duration, ref mut duration,
.. ..
}) => { }) => {
// Already have a pending buffer but no duration, so try to get that now let peek_outcome = self.peek_buffer(
let (segment, buffer) = match self.peek_buffer(
&stream.sinkpad, &stream.sinkpad,
stream.delta_frames, stream.delta_frames,
stream.discard_header_buffers,
&mut stream.pre_queue, &mut stream.pre_queue,
&stream.running_time_utc_time_mapping, &stream.running_time_utc_time_mapping,
)? { )?;
// Already have a pending buffer but no duration, so try to get that now
let (segment, buffer) = match peek_outcome {
Some(res) => res, Some(res) => res,
None => { None => {
if stream.sinkpad.is_eos() { if stream.sinkpad.is_eos() {
@ -532,12 +550,7 @@ impl MP4Mux {
None => { None => {
// Have no buffer queued at all yet // Have no buffer queued at all yet
let (segment, buffer) = match self.pop_buffer( let (segment, buffer) = match self.pop_buffer(stream)? {
&stream.sinkpad,
stream.delta_frames,
&mut stream.pre_queue,
&mut stream.running_time_utc_time_mapping,
)? {
Some(res) => res, Some(res) => res,
None => { None => {
if stream.sinkpad.is_eos() { if stream.sinkpad.is_eos() {
@ -870,6 +883,7 @@ impl MP4Mux {
let s = caps.structure(0).unwrap(); let s = caps.structure(0).unwrap();
let mut delta_frames = super::DeltaFrames::IntraOnly; let mut delta_frames = super::DeltaFrames::IntraOnly;
let mut discard_header_buffers = false;
match s.name().as_str() { match s.name().as_str() {
"video/x-h264" | "video/x-h265" => { "video/x-h264" | "video/x-h265" => {
if !s.has_field_with_type("codec_data", gst::Buffer::static_type()) { if !s.has_field_with_type("codec_data", gst::Buffer::static_type()) {
@ -913,6 +927,13 @@ impl MP4Mux {
return Err(gst::FlowError::NotNegotiated); return Err(gst::FlowError::NotNegotiated);
} }
} }
"audio/x-flac" => {
discard_header_buffers = true;
if let Err(e) = s.get::<gst::ArrayRef>("streamheader") {
gst::error!(CAT, obj: pad, "Muxing FLAC into MP4 needs streamheader: {}", e);
return Err(gst::FlowError::NotNegotiated);
};
}
"audio/x-alaw" | "audio/x-mulaw" => (), "audio/x-alaw" | "audio/x-mulaw" => (),
"audio/x-adpcm" => (), "audio/x-adpcm" => (),
"application/x-onvif-metadata" => (), "application/x-onvif-metadata" => (),
@ -924,6 +945,7 @@ impl MP4Mux {
pre_queue: VecDeque::new(), pre_queue: VecDeque::new(),
caps, caps,
delta_frames, delta_frames,
discard_header_buffers,
chunks: Vec::new(), chunks: Vec::new(),
pending_buffer: None, pending_buffer: None,
queued_chunk_time: gst::ClockTime::ZERO, queued_chunk_time: gst::ClockTime::ZERO,
@ -1523,6 +1545,11 @@ impl ElementImpl for ISOMP4Mux {
.field("channels", gst::IntRange::new(1i32, 8)) .field("channels", gst::IntRange::new(1i32, 8))
.field("rate", gst::IntRange::new(1, i32::MAX)) .field("rate", gst::IntRange::new(1, i32::MAX))
.build(), .build(),
gst::Structure::builder("audio/x-flac")
.field("framed", true)
.field("channels", gst::IntRange::<i32>::new(1, 8))
.field("rate", gst::IntRange::<i32>::new(1, 10 * u16::MAX as i32))
.build(),
] ]
.into_iter() .into_iter()
.collect::<gst::Caps>(), .collect::<gst::Caps>(),

View file

@ -7,6 +7,8 @@
// SPDX-License-Identifier: MPL-2.0 // SPDX-License-Identifier: MPL-2.0
// //
use std::path::Path;
use gst::prelude::*; use gst::prelude::*;
use gst_pbutils::prelude::*; use gst_pbutils::prelude::*;
@ -20,46 +22,26 @@ fn init() {
}); });
} }
#[test] struct Pipeline(gst::Pipeline);
fn test_basic() { impl std::ops::Deref for Pipeline {
init();
struct Pipeline(gst::Pipeline);
impl std::ops::Deref for Pipeline {
type Target = gst::Pipeline; type Target = gst::Pipeline;
fn deref(&self) -> &Self::Target { fn deref(&self) -> &Self::Target {
&self.0 &self.0
} }
} }
impl Drop for Pipeline { impl Drop for Pipeline {
fn drop(&mut self) { fn drop(&mut self) {
let _ = self.0.set_state(gst::State::Null); let _ = self.0.set_state(gst::State::Null);
} }
} }
let pipeline = match gst::parse::launch( impl Pipeline {
"videotestsrc num-buffers=99 ! x264enc ! mux. \ fn into_completion(self) {
audiotestsrc num-buffers=140 ! fdkaacenc ! mux. \ self.set_state(gst::State::Playing)
isomp4mux name=mux ! filesink name=sink \
",
) {
Ok(pipeline) => Pipeline(pipeline.downcast::<gst::Pipeline>().unwrap()),
Err(_) => return,
};
let dir = tempfile::TempDir::new().unwrap();
let mut location = dir.path().to_owned();
location.push("test.mp4");
let sink = pipeline.by_name("sink").unwrap();
sink.set_property("location", location.to_str().expect("Non-UTF8 filename"));
pipeline
.set_state(gst::State::Playing)
.expect("Unable to set the pipeline to the `Playing` state"); .expect("Unable to set the pipeline to the `Playing` state");
for msg in pipeline.bus().unwrap().iter_timed(gst::ClockTime::NONE) { for msg in self.bus().unwrap().iter_timed(gst::ClockTime::NONE) {
use gst::MessageView; use gst::MessageView;
match msg.view() { match msg.view() {
@ -76,17 +58,42 @@ fn test_basic() {
} }
} }
pipeline self.set_state(gst::State::Null)
.set_state(gst::State::Null)
.expect("Unable to set the pipeline to the `Null` state"); .expect("Unable to set the pipeline to the `Null` state");
}
}
drop(pipeline); fn test_basic_with(video_enc: &str, audio_enc: &str, cb: impl FnOnce(&Path)) {
let Ok(pipeline) = gst::parse::launch(&format!(
"videotestsrc num-buffers=99 ! {video_enc} ! mux. \
audiotestsrc num-buffers=140 ! {audio_enc} ! mux. \
isomp4mux name=mux ! filesink name=sink"
)) else {
println!("could not build encoding pipeline");
return;
};
let pipeline = Pipeline(pipeline.downcast::<gst::Pipeline>().unwrap());
let dir = tempfile::TempDir::new().unwrap();
let mut location = dir.path().to_owned();
location.push("test.mp4");
let sink = pipeline.by_name("sink").unwrap();
sink.set_property("location", location.to_str().expect("Non-UTF8 filename"));
pipeline.into_completion();
cb(&location)
}
#[test]
fn test_basic_x264_aac() {
init();
test_basic_with("x264enc", "fdkaacenc", |location| {
let discoverer = gst_pbutils::Discoverer::new(gst::ClockTime::from_seconds(5)) let discoverer = gst_pbutils::Discoverer::new(gst::ClockTime::from_seconds(5))
.expect("Failed to create discoverer"); .expect("Failed to create discoverer");
let info = discoverer let info = discoverer
.discover_uri( .discover_uri(
url::Url::from_file_path(&location) url::Url::from_file_path(location)
.expect("Failed to convert filename to URL") .expect("Failed to convert filename to URL")
.as_str(), .as_str(),
) )
@ -123,4 +130,25 @@ fn test_basic() {
caps.can_intersect(&gst::Caps::builder("video/x-h264").any_features().build()), caps.can_intersect(&gst::Caps::builder("video/x-h264").any_features().build()),
"Unexpected video caps {caps:?}" "Unexpected video caps {caps:?}"
); );
})
}
#[test]
fn test_roundtrip_vp9_flac() {
init();
test_basic_with("vp9enc ! vp9parse", "flacenc ! flacparse", |location| {
let Ok(pipeline) = gst::parse::launch(
"filesrc name=src ! qtdemux name=demux \
demux.audio_0 ! queue ! flacdec ! fakesink \
demux.video_0 ! queue ! vp9dec ! fakesink",
) else {
panic!("could not build decoding pipeline")
};
let pipeline = Pipeline(pipeline.downcast::<gst::Pipeline>().unwrap());
pipeline
.by_name("src")
.unwrap()
.set_property("location", location.display().to_string());
pipeline.into_completion();
})
} }