From 5d939498f14b253eb3a5d8c8eab07250ac1dc73d Mon Sep 17 00:00:00 2001 From: Simonas Kazlauskas Date: Tue, 28 Nov 2023 19:53:10 +0200 Subject: [PATCH] mp4/fmp4: support flac inside the iso (f)mp4 container Part-of: --- docs/plugins/gst_plugins_cache.json | 4 +- mux/fmp4/src/fmp4mux/boxes.rs | 56 ++++++-- mux/fmp4/src/fmp4mux/imp.rs | 40 ++++-- mux/fmp4/tests/tests.rs | 45 ++++++ mux/mp4/src/mp4mux/boxes.rs | 57 ++++++-- mux/mp4/src/mp4mux/imp.rs | 111 +++++++++------ mux/mp4/tests/tests.rs | 208 ++++++++++++++++------------ 7 files changed, 356 insertions(+), 165 deletions(-) diff --git a/docs/plugins/gst_plugins_cache.json b/docs/plugins/gst_plugins_cache.json index 50f43130..9930af62 100644 --- a/docs/plugins/gst_plugins_cache.json +++ b/docs/plugins/gst_plugins_cache.json @@ -2038,7 +2038,7 @@ "long-name": "ISOFMP4Mux", "pad-templates": { "sink_%%u": { - "caps": "video/x-h264:\n stream-format: { (string)avc, (string)avc3 }\n alignment: au\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-h265:\n stream-format: { (string)hvc1, (string)hev1 }\n alignment: au\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-vp8:\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-vp9:\n profile: { (string)0, (string)1, (string)2, (string)3 }\n chroma-format: { (string)4:2:0, (string)4:2:2, (string)4:4:4 }\n bit-depth-luma: { (uint)8, (uint)10, (uint)12 }\nbit-depth-chroma: { (uint)8, (uint)10, (uint)12 }\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-av1:\n stream-format: obu-stream\n alignment: tu\n profile: { (string)main, (string)high, (string)professional }\n chroma-format: { (string)4:0:0, (string)4:2:0, (string)4:2:2, (string)4:4:4 }\n bit-depth-luma: { (uint)8, (uint)10, (uint)12 }\nbit-depth-chroma: { (uint)8, (uint)10, (uint)12 }\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\naudio/mpeg:\n mpegversion: 4\n stream-format: raw\n channels: [ 1, 65535 ]\n rate: [ 1, 2147483647 ]\naudio/x-opus:\nchannel-mapping-family: [ 0, 255 ]\n channels: [ 1, 8 ]\n rate: [ 1, 2147483647 ]\n", + "caps": "video/x-h264:\n stream-format: { (string)avc, (string)avc3 }\n alignment: au\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-h265:\n stream-format: { (string)hvc1, (string)hev1 }\n alignment: au\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-vp8:\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-vp9:\n profile: { (string)0, (string)1, (string)2, (string)3 }\n chroma-format: { (string)4:2:0, (string)4:2:2, (string)4:4:4 }\n bit-depth-luma: { (uint)8, (uint)10, (uint)12 }\nbit-depth-chroma: { (uint)8, (uint)10, (uint)12 }\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-av1:\n stream-format: obu-stream\n alignment: tu\n profile: { (string)main, (string)high, (string)professional }\n chroma-format: { (string)4:0:0, (string)4:2:0, (string)4:2:2, (string)4:4:4 }\n bit-depth-luma: { (uint)8, (uint)10, (uint)12 }\nbit-depth-chroma: { (uint)8, (uint)10, (uint)12 }\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\naudio/mpeg:\n mpegversion: 4\n stream-format: raw\n channels: [ 1, 65535 ]\n rate: [ 1, 2147483647 ]\naudio/x-opus:\nchannel-mapping-family: [ 0, 255 ]\n channels: [ 1, 8 ]\n rate: [ 1, 2147483647 ]\naudio/x-flac:\n framed: true\n channels: [ 1, 8 ]\n rate: [ 1, 655350 ]\n", "direction": "sink", "presence": "request", "type": "GstFMP4MuxPad" @@ -3318,7 +3318,7 @@ "klass": "Codec/Muxer", "pad-templates": { "sink_%%u": { - "caps": "video/x-h264:\n stream-format: { (string)avc, (string)avc3 }\n alignment: au\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-h265:\n stream-format: { (string)hvc1, (string)hev1 }\n alignment: au\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-vp8:\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-vp9:\n profile: { (string)0, (string)1, (string)2, (string)3 }\n chroma-format: { (string)4:2:0, (string)4:2:2, (string)4:4:4 }\n bit-depth-luma: { (uint)8, (uint)10, (uint)12 }\nbit-depth-chroma: { (uint)8, (uint)10, (uint)12 }\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-av1:\n stream-format: obu-stream\n alignment: tu\n profile: { (string)main, (string)high, (string)professional }\n chroma-format: { (string)4:0:0, (string)4:2:0, (string)4:2:2, (string)4:4:4 }\n bit-depth-luma: { (uint)8, (uint)10, (uint)12 }\nbit-depth-chroma: { (uint)8, (uint)10, (uint)12 }\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\naudio/mpeg:\n mpegversion: 4\n stream-format: raw\n channels: [ 1, 65535 ]\n rate: [ 1, 2147483647 ]\naudio/x-opus:\nchannel-mapping-family: [ 0, 255 ]\n channels: [ 1, 8 ]\n rate: [ 1, 2147483647 ]\n", + "caps": "video/x-h264:\n stream-format: { (string)avc, (string)avc3 }\n alignment: au\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-h265:\n stream-format: { (string)hvc1, (string)hev1 }\n alignment: au\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-vp8:\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-vp9:\n profile: { (string)0, (string)1, (string)2, (string)3 }\n chroma-format: { (string)4:2:0, (string)4:2:2, (string)4:4:4 }\n bit-depth-luma: { (uint)8, (uint)10, (uint)12 }\nbit-depth-chroma: { (uint)8, (uint)10, (uint)12 }\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-av1:\n stream-format: obu-stream\n alignment: tu\n profile: { (string)main, (string)high, (string)professional }\n chroma-format: { (string)4:0:0, (string)4:2:0, (string)4:2:2, (string)4:4:4 }\n bit-depth-luma: { (uint)8, (uint)10, (uint)12 }\nbit-depth-chroma: { (uint)8, (uint)10, (uint)12 }\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\naudio/mpeg:\n mpegversion: 4\n stream-format: raw\n channels: [ 1, 65535 ]\n rate: [ 1, 2147483647 ]\naudio/x-opus:\nchannel-mapping-family: [ 0, 255 ]\n channels: [ 1, 8 ]\n rate: [ 1, 2147483647 ]\naudio/x-flac:\n framed: true\n channels: [ 1, 8 ]\n rate: [ 1, 655350 ]\n", "direction": "sink", "presence": "request", "type": "GstRsMP4MuxPad" diff --git a/mux/fmp4/src/fmp4mux/boxes.rs b/mux/fmp4/src/fmp4mux/boxes.rs index 55c8c520..e2fdeae7 100644 --- a/mux/fmp4/src/fmp4mux/boxes.rs +++ b/mux/fmp4/src/fmp4mux/boxes.rs @@ -9,6 +9,7 @@ use gst::prelude::*; use anyhow::{anyhow, bail, Context, Error}; +use std::convert::TryFrom; use super::Buffer; @@ -604,9 +605,8 @@ fn write_tkhd( // Volume let s = stream.caps.structure(0).unwrap(); match s.name().as_str() { - "audio/mpeg" | "audio/x-opus" | "audio/x-alaw" | "audio/x-mulaw" | "audio/x-adpcm" => { - v.extend((1u16 << 8).to_be_bytes()) - } + "audio/mpeg" | "audio/x-opus" | "audio/x-flac" | "audio/x-alaw" | "audio/x-mulaw" + | "audio/x-adpcm" => v.extend((1u16 << 8).to_be_bytes()), _ => v.extend(0u16.to_be_bytes()), } @@ -745,9 +745,8 @@ fn write_hdlr( let (handler_type, name) = match s.name().as_str() { "video/x-h264" | "video/x-h265" | "video/x-vp8" | "video/x-vp9" | "video/x-av1" | "image/jpeg" => (b"vide", b"VideoHandler\0".as_slice()), - "audio/mpeg" | "audio/x-opus" | "audio/x-alaw" | "audio/x-mulaw" | "audio/x-adpcm" => { - (b"soun", b"SoundHandler\0".as_slice()) - } + "audio/mpeg" | "audio/x-opus" | "audio/x-flac" | "audio/x-alaw" | "audio/x-mulaw" + | "audio/x-adpcm" => (b"soun", b"SoundHandler\0".as_slice()), "application/x-onvif-metadata" => (b"meta", b"MetadataHandler\0".as_slice()), _ => unreachable!(), }; @@ -777,7 +776,8 @@ fn write_minf( // Flags are always 1 for unspecified reasons write_full_box(v, b"vmhd", FULL_BOX_VERSION_0, 1, |v| write_vmhd(v, cfg))? } - "audio/mpeg" | "audio/x-opus" | "audio/x-alaw" | "audio/x-mulaw" | "audio/x-adpcm" => { + "audio/mpeg" | "audio/x-opus" | "audio/x-flac" | "audio/x-alaw" | "audio/x-mulaw" + | "audio/x-adpcm" => { write_full_box(v, b"smhd", FULL_BOX_VERSION_0, FULL_BOX_FLAGS_NONE, |v| { write_smhd(v, cfg) })? @@ -886,9 +886,8 @@ fn write_stsd( match s.name().as_str() { "video/x-h264" | "video/x-h265" | "video/x-vp8" | "video/x-vp9" | "video/x-av1" | "image/jpeg" => write_visual_sample_entry(v, cfg, stream)?, - "audio/mpeg" | "audio/x-opus" | "audio/x-alaw" | "audio/x-mulaw" | "audio/x-adpcm" => { - write_audio_sample_entry(v, cfg, stream)? - } + "audio/mpeg" | "audio/x-opus" | "audio/x-flac" | "audio/x-alaw" | "audio/x-mulaw" + | "audio/x-adpcm" => write_audio_sample_entry(v, cfg, stream)?, "application/x-onvif-metadata" => write_xml_meta_data_sample_entry(v, cfg, stream)?, _ => unreachable!(), } @@ -1262,6 +1261,7 @@ fn write_audio_sample_entry( let fourcc = match s.name().as_str() { "audio/mpeg" => b"mp4a", "audio/x-opus" => b"Opus", + "audio/x-flac" => b"fLaC", "audio/x-alaw" => b"alaw", "audio/x-mulaw" => b"ulaw", "audio/x-adpcm" => { @@ -1280,6 +1280,10 @@ fn write_audio_sample_entry( let bitrate = s.get::("bitrate").context("no ADPCM bitrate field")?; (bitrate / 8000) as u16 } + "audio/x-flac" => with_flac_metadata(&stream.caps, |streaminfo, _| { + 1 + (u16::from_be_bytes([streaminfo[16], streaminfo[17]]) >> 4 & 0b11111) + }) + .context("FLAC metadata error")?, _ => 16u16, }; @@ -1322,6 +1326,9 @@ fn write_audio_sample_entry( "audio/x-opus" => { write_dops(v, &stream.caps)?; } + "audio/x-flac" => { + write_dfla(v, &stream.caps)?; + } "audio/x-alaw" | "audio/x-mulaw" | "audio/x-adpcm" => { // Nothing to do here } @@ -1516,6 +1523,35 @@ fn write_dops(v: &mut Vec, caps: &gst::Caps) -> Result<(), Error> { }) } +fn with_flac_metadata( + caps: &gst::Caps, + cb: impl FnOnce(&[u8], &[gst::glib::SendValue]) -> R, +) -> Result { + let caps = caps.structure(0).unwrap(); + let header = caps.get::("streamheader").unwrap(); + let (streaminfo, remainder) = header.as_ref().split_first().unwrap(); + let streaminfo = streaminfo.get::<&gst::BufferRef>().unwrap(); + let streaminfo = streaminfo.map_readable().unwrap(); + // 13 bytes for the Ogg/FLAC prefix and 38 for the streaminfo itself. + match <&[_; 13 + 38]>::try_from(streaminfo.as_slice()) { + Ok(i) if i.starts_with(b"\x7FFLAC\x01\x00") => Ok(cb(&i[13..], remainder)), + Ok(_) | Err(_) => bail!("Unknown streamheader format"), + } +} + +fn write_dfla(v: &mut Vec, caps: &gst::Caps) -> Result<(), Error> { + write_full_box(v, b"dfLa", 0, 0, move |v| { + with_flac_metadata(caps, |streaminfo, remainder| { + v.extend(streaminfo); + for metadata in remainder { + let metadata = metadata.get::<&gst::BufferRef>().unwrap(); + let metadata = metadata.map_readable().unwrap(); + v.extend(&metadata[..]); + } + }) + }) +} + fn write_xml_meta_data_sample_entry( v: &mut Vec, _cfg: &super::HeaderConfiguration, diff --git a/mux/fmp4/src/fmp4mux/imp.rs b/mux/fmp4/src/fmp4mux/imp.rs index 592b42dd..501a482e 100644 --- a/mux/fmp4/src/fmp4mux/imp.rs +++ b/mux/fmp4/src/fmp4mux/imp.rs @@ -205,6 +205,8 @@ struct Stream { caps: gst::Caps, /// Whether this stream is intra-only and has frame reordering. delta_frames: DeltaFrames, + /// Whether this stream might have header frames without timestamps that should be ignored. + discard_header_buffers: bool, /// Currently queued GOPs, including incomplete ones. queued_gops: VecDeque, @@ -271,11 +273,17 @@ pub(crate) struct FMP4Mux { impl FMP4Mux { /// Checks if a buffer is valid according to the stream configuration. - fn check_buffer( - buffer: &gst::BufferRef, - sinkpad: &super::FMP4MuxPad, - delta_frames: super::DeltaFrames, - ) -> Result<(), gst::FlowError> { + fn check_buffer(buffer: &gst::BufferRef, stream: &Stream) -> Result<(), gst::FlowError> { + let Stream { + sinkpad, + delta_frames, + discard_header_buffers, + .. + } = stream; + if *discard_header_buffers && buffer.flags().contains(gst::BufferFlags::HEADER) { + return Err(gst_base::AGGREGATOR_FLOW_NEED_DATA); + } + if delta_frames.requires_dts() && buffer.dts().is_none() { gst::error!(CAT, obj: sinkpad, "Require DTS for video streams"); return Err(gst::FlowError::Error); @@ -314,12 +322,10 @@ impl FMP4Mux { } // Pop buffer here, it will be stored in the pre-queue after calculating its timestamps - let mut buffer = match stream.sinkpad.pop_buffer() { - None => return Ok(None), - Some(buffer) => buffer, + let Some(mut buffer) = stream.sinkpad.pop_buffer() else { + return Ok(None); }; - - Self::check_buffer(&buffer, &stream.sinkpad, stream.delta_frames)?; + Self::check_buffer(&buffer, stream)?; let segment = match stream.sinkpad.segment().downcast::().ok() { Some(segment) => segment, @@ -2555,6 +2561,7 @@ impl FMP4Mux { let s = caps.structure(0).unwrap(); let mut delta_frames = DeltaFrames::IntraOnly; + let mut discard_header_buffers = false; match s.name().as_str() { "video/x-h264" | "video/x-h265" => { if !s.has_field_with_type("codec_data", gst::Buffer::static_type()) { @@ -2598,6 +2605,13 @@ impl FMP4Mux { return Err(gst::FlowError::NotNegotiated); } } + "audio/x-flac" => { + discard_header_buffers = true; + if let Err(e) = s.get::("streamheader") { + gst::error!(CAT, obj: pad, "Muxing FLAC into MP4 needs streamheader: {}", e); + return Err(gst::FlowError::NotNegotiated); + }; + } "audio/x-alaw" | "audio/x-mulaw" => (), "audio/x-adpcm" => (), "application/x-onvif-metadata" => (), @@ -2608,6 +2622,7 @@ impl FMP4Mux { sinkpad: pad, caps, delta_frames, + discard_header_buffers, pre_queue: VecDeque::new(), queued_gops: VecDeque::new(), fragment_filled: false, @@ -3465,6 +3480,11 @@ impl ElementImpl for ISOFMP4Mux { .field("channels", gst::IntRange::new(1i32, 8)) .field("rate", gst::IntRange::new(1, i32::MAX)) .build(), + gst::Structure::builder("audio/x-flac") + .field("framed", true) + .field("channels", gst::IntRange::::new(1, 8)) + .field("rate", gst::IntRange::::new(1, 10 * u16::MAX as i32)) + .build(), ] .into_iter() .collect::(), diff --git a/mux/fmp4/tests/tests.rs b/mux/fmp4/tests/tests.rs index a32e65fc..5beba426 100644 --- a/mux/fmp4/tests/tests.rs +++ b/mux/fmp4/tests/tests.rs @@ -19,6 +19,33 @@ fn init() { }); } +fn to_completion(pipeline: &gst::Pipeline) { + pipeline + .set_state(gst::State::Playing) + .expect("Unable to set the pipeline to the `Playing` state"); + + for msg in pipeline.bus().unwrap().iter_timed(gst::ClockTime::NONE) { + use gst::MessageView; + + match msg.view() { + MessageView::Eos(..) => break, + MessageView::Error(err) => { + panic!( + "Error from {:?}: {} ({:?})", + err.src().map(|s| s.path_string()), + err.error(), + err.debug() + ); + } + _ => (), + } + } + + pipeline + .set_state(gst::State::Null) + .expect("Unable to set the pipeline to the `Null` state"); +} + fn test_buffer_flags_single_stream(cmaf: bool, set_dts: bool, caps: gst::Caps) { let mut h = if cmaf { gst_check::Harness::new("cmafmux") @@ -1993,3 +2020,21 @@ fn test_chunking_single_stream_gops_after_fragment_end_after_next_chunk_end() { let ev = h.pull_event().unwrap(); assert_eq!(ev.type_(), gst::EventType::Eos); } + +#[test] +fn test_roundtrip_vp9_flac() { + init(); + + let pipeline = gst::parse::launch( + r#" + videotestsrc num-buffers=99 ! vp9enc ! vp9parse ! mux. + audiotestsrc num-buffers=149 ! flacenc ! flacparse ! mux. + isofmp4mux name=mux ! qtdemux name=demux + demux.audio_0 ! queue ! flacdec ! fakesink + demux.video_0 ! queue ! vp9dec ! fakesink + "#, + ) + .unwrap(); + let pipeline = pipeline.downcast().unwrap(); + to_completion(&pipeline); +} diff --git a/mux/mp4/src/mp4mux/boxes.rs b/mux/mp4/src/mp4mux/boxes.rs index 4e2188b0..170fbc9c 100644 --- a/mux/mp4/src/mp4mux/boxes.rs +++ b/mux/mp4/src/mp4mux/boxes.rs @@ -9,7 +9,7 @@ use gst::prelude::*; use anyhow::{anyhow, bail, Context, Error}; - +use std::convert::TryFrom; use std::str::FromStr; fn write_box) -> Result>( @@ -382,9 +382,8 @@ fn write_tkhd( // Volume let s = stream.caps.structure(0).unwrap(); match s.name().as_str() { - "audio/mpeg" | "audio/x-opus" | "audio/x-alaw" | "audio/x-mulaw" | "audio/x-adpcm" => { - v.extend((1u16 << 8).to_be_bytes()) - } + "audio/mpeg" | "audio/x-opus" | "audio/x-flac" | "audio/x-alaw" | "audio/x-mulaw" + | "audio/x-adpcm" => v.extend((1u16 << 8).to_be_bytes()), _ => v.extend(0u16.to_be_bytes()), } @@ -514,9 +513,8 @@ fn write_hdlr( let (handler_type, name) = match s.name().as_str() { "video/x-h264" | "video/x-h265" | "video/x-vp8" | "video/x-vp9" | "video/x-av1" | "image/jpeg" => (b"vide", b"VideoHandler\0".as_slice()), - "audio/mpeg" | "audio/x-opus" | "audio/x-alaw" | "audio/x-mulaw" | "audio/x-adpcm" => { - (b"soun", b"SoundHandler\0".as_slice()) - } + "audio/mpeg" | "audio/x-opus" | "audio/x-flac" | "audio/x-alaw" | "audio/x-mulaw" + | "audio/x-adpcm" => (b"soun", b"SoundHandler\0".as_slice()), "application/x-onvif-metadata" => (b"meta", b"MetadataHandler\0".as_slice()), _ => unreachable!(), }; @@ -546,7 +544,8 @@ fn write_minf( // Flags are always 1 for unspecified reasons write_full_box(v, b"vmhd", FULL_BOX_VERSION_0, 1, |v| write_vmhd(v, header))? } - "audio/mpeg" | "audio/x-opus" | "audio/x-alaw" | "audio/x-mulaw" | "audio/x-adpcm" => { + "audio/mpeg" | "audio/x-opus" | "audio/x-flac" | "audio/x-alaw" | "audio/x-mulaw" + | "audio/x-adpcm" => { write_full_box(v, b"smhd", FULL_BOX_VERSION_0, FULL_BOX_FLAGS_NONE, |v| { write_smhd(v, header) })? @@ -703,9 +702,8 @@ fn write_stsd( match s.name().as_str() { "video/x-h264" | "video/x-h265" | "video/x-vp8" | "video/x-vp9" | "video/x-av1" | "image/jpeg" => write_visual_sample_entry(v, header, stream)?, - "audio/mpeg" | "audio/x-opus" | "audio/x-alaw" | "audio/x-mulaw" | "audio/x-adpcm" => { - write_audio_sample_entry(v, header, stream)? - } + "audio/mpeg" | "audio/x-opus" | "audio/x-flac" | "audio/x-alaw" | "audio/x-mulaw" + | "audio/x-adpcm" => write_audio_sample_entry(v, header, stream)?, "application/x-onvif-metadata" => write_xml_meta_data_sample_entry(v, header, stream)?, _ => unreachable!(), } @@ -1079,6 +1077,7 @@ fn write_audio_sample_entry( let fourcc = match s.name().as_str() { "audio/mpeg" => b"mp4a", "audio/x-opus" => b"Opus", + "audio/x-flac" => b"fLaC", "audio/x-alaw" => b"alaw", "audio/x-mulaw" => b"ulaw", "audio/x-adpcm" => { @@ -1097,6 +1096,10 @@ fn write_audio_sample_entry( let bitrate = s.get::("bitrate").context("no ADPCM bitrate field")?; (bitrate / 8000) as u16 } + "audio/x-flac" => with_flac_metadata(&stream.caps, |streaminfo, _| { + 1 + (u16::from_be_bytes([streaminfo[16], streaminfo[17]]) >> 4 & 0b11111) + }) + .context("FLAC metadata error")?, _ => 16u16, }; @@ -1139,6 +1142,9 @@ fn write_audio_sample_entry( "audio/x-opus" => { write_dops(v, &stream.caps)?; } + "audio/x-flac" => { + write_dfla(v, &stream.caps)?; + } "audio/x-alaw" | "audio/x-mulaw" | "audio/x-adpcm" => { // Nothing to do here } @@ -1333,6 +1339,35 @@ fn write_dops(v: &mut Vec, caps: &gst::Caps) -> Result<(), Error> { }) } +fn with_flac_metadata( + caps: &gst::Caps, + cb: impl FnOnce(&[u8], &[gst::glib::SendValue]) -> R, +) -> Result { + let caps = caps.structure(0).unwrap(); + let header = caps.get::("streamheader").unwrap(); + let (streaminfo, remainder) = header.as_ref().split_first().unwrap(); + let streaminfo = streaminfo.get::<&gst::BufferRef>().unwrap(); + let streaminfo = streaminfo.map_readable().unwrap(); + // 13 bytes for the Ogg/FLAC prefix and 38 for the streaminfo itself. + match <&[_; 13 + 38]>::try_from(streaminfo.as_slice()) { + Ok(i) if i.starts_with(b"\x7FFLAC\x01\x00") => Ok(cb(&i[13..], remainder)), + Ok(_) | Err(_) => bail!("Unknown streamheader format"), + } +} + +fn write_dfla(v: &mut Vec, caps: &gst::Caps) -> Result<(), Error> { + write_full_box(v, b"dfLa", 0, 0, move |v| { + with_flac_metadata(caps, |streaminfo, remainder| { + v.extend(streaminfo); + for metadata in remainder { + let metadata = metadata.get::<&gst::BufferRef>().unwrap(); + let metadata = metadata.map_readable().unwrap(); + v.extend(&metadata[..]); + } + }) + }) +} + fn write_xml_meta_data_sample_entry( v: &mut Vec, _header: &super::Header, diff --git a/mux/mp4/src/mp4mux/imp.rs b/mux/mp4/src/mp4mux/imp.rs index 24c06173..bb9a791d 100644 --- a/mux/mp4/src/mp4mux/imp.rs +++ b/mux/mp4/src/mp4mux/imp.rs @@ -108,6 +108,8 @@ struct Stream { caps: gst::Caps, /// Whether this stream is intra-only and has frame reordering. delta_frames: super::DeltaFrames, + /// Whether this stream might have header frames without timestamps that should be ignored. + discard_header_buffers: bool, /// Already written out chunks with their samples for this stream chunks: Vec, @@ -165,7 +167,12 @@ impl MP4Mux { buffer: &gst::BufferRef, sinkpad: &super::MP4MuxPad, delta_frames: super::DeltaFrames, + discard_headers: bool, ) -> Result<(), gst::FlowError> { + if discard_headers && buffer.flags().contains(gst::BufferFlags::HEADER) { + return Err(gst_base::AGGREGATOR_FLOW_NEED_DATA); + } + if delta_frames.requires_dts() && buffer.dts().is_none() { gst::error!(CAT, obj: sinkpad, "Require DTS for video streams"); return Err(gst::FlowError::Error); @@ -188,6 +195,7 @@ impl MP4Mux { &self, sinkpad: &super::MP4MuxPad, delta_frames: super::DeltaFrames, + discard_headers: bool, pre_queue: &mut VecDeque<(gst::FormattedSegment, gst::Buffer)>, running_time_utc_time_mapping: &Option<(gst::Signed, gst::ClockTime)>, ) -> Result, gst::Buffer)>, gst::FlowError> { @@ -195,13 +203,10 @@ impl MP4Mux { return Ok(Some((segment.clone(), buffer.clone()))); } - let mut buffer = match sinkpad.peek_buffer() { - None => return Ok(None), - Some(buffer) => buffer, + let Some(mut buffer) = sinkpad.peek_buffer() else { + return Ok(None); }; - - Self::check_buffer(&buffer, sinkpad, delta_frames)?; - + Self::check_buffer(&buffer, sinkpad, delta_frames, discard_headers)?; let mut segment = match sinkpad.segment().downcast::().ok() { Some(segment) => segment, None => { @@ -276,19 +281,20 @@ impl MP4Mux { fn pop_buffer( &self, - sinkpad: &super::MP4MuxPad, - delta_frames: super::DeltaFrames, - pre_queue: &mut VecDeque<(gst::FormattedSegment, gst::Buffer)>, - running_time_utc_time_mapping: &mut Option<(gst::Signed, gst::ClockTime)>, + stream: &mut Stream, ) -> Result, gst::Buffer)>, gst::FlowError> { + let Stream { + sinkpad, pre_queue, .. + } = stream; + // In ONVIF mode we need to get UTC times for each buffer and synchronize based on that. // Queue up to 6s of data to get the first UTC time and then backdate. if self.obj().class().as_ref().variant == super::Variant::ONVIF - && running_time_utc_time_mapping.is_none() + && stream.running_time_utc_time_mapping.is_none() { if let Some((last, first)) = Option::zip(pre_queue.back(), pre_queue.front()) { // Existence of PTS/DTS checked below - let (last, first) = if delta_frames.requires_dts() { + let (last, first) = if stream.delta_frames.requires_dts() { ( last.0.to_running_time_full(last.1.dts()).unwrap(), first.0.to_running_time_full(first.1.dts()).unwrap(), @@ -312,19 +318,20 @@ impl MP4Mux { } } - let buffer = match sinkpad.pop_buffer() { - None => { - if sinkpad.is_eos() { - gst::error!(CAT, obj: sinkpad, "Got no UTC time before EOS"); - return Err(gst::FlowError::Error); - } else { - return Err(gst_base::AGGREGATOR_FLOW_NEED_DATA); - } + let Some(buffer) = sinkpad.pop_buffer() else { + if sinkpad.is_eos() { + gst::error!(CAT, obj: sinkpad, "Got no UTC time before EOS"); + return Err(gst::FlowError::Error); + } else { + return Err(gst_base::AGGREGATOR_FLOW_NEED_DATA); } - Some(buffer) => buffer, }; - - Self::check_buffer(&buffer, sinkpad, delta_frames)?; + Self::check_buffer( + &buffer, + sinkpad, + stream.delta_frames, + stream.discard_header_buffers, + )?; let segment = match sinkpad.segment().downcast::().ok() { Some(segment) => segment, @@ -350,7 +357,7 @@ impl MP4Mux { ); let mapping = (running_time, utc_time); - *running_time_utc_time_mapping = Some(mapping); + stream.running_time_utc_time_mapping = Some(mapping); // Push the buffer onto the pre-queue and re-timestamp it and all other buffers // based on the mapping above. @@ -391,7 +398,7 @@ impl MP4Mux { // Fall through below and pop the first buffer finally } - if let Some((segment, buffer)) = pre_queue.pop_front() { + if let Some((segment, buffer)) = stream.pre_queue.pop_front() { return Ok(Some((segment, buffer))); } @@ -400,23 +407,26 @@ impl MP4Mux { // for calculating the duration to the previous buffer, and then put into the pre-queue // - or this is the very first buffer and we just put it into the queue overselves above if self.obj().class().as_ref().variant == super::Variant::ONVIF { - if sinkpad.is_eos() { + if stream.sinkpad.is_eos() { return Ok(None); } unreachable!(); } - let buffer = match sinkpad.pop_buffer() { - None => return Ok(None), - Some(buffer) => buffer, + let Some(buffer) = stream.sinkpad.pop_buffer() else { + return Ok(None); }; + Self::check_buffer( + &buffer, + &stream.sinkpad, + stream.delta_frames, + stream.discard_header_buffers, + )?; - Self::check_buffer(&buffer, sinkpad, delta_frames)?; - - let segment = match sinkpad.segment().downcast::().ok() { + let segment = match stream.sinkpad.segment().downcast::().ok() { Some(segment) => segment, None => { - gst::error!(CAT, obj: sinkpad, "Got buffer before segment"); + gst::error!(CAT, obj: stream.sinkpad, "Got buffer before segment"); return Err(gst::FlowError::Error); } }; @@ -442,6 +452,12 @@ impl MP4Mux { Some(PendingBuffer { duration: Some(_), .. }) => return Ok(()), + Some(PendingBuffer { ref buffer, .. }) + if stream.discard_header_buffers + && buffer.flags().contains(gst::BufferFlags::HEADER) => + { + return Err(gst_base::AGGREGATOR_FLOW_NEED_DATA); + } Some(PendingBuffer { timestamp, pts, @@ -449,13 +465,15 @@ impl MP4Mux { ref mut duration, .. }) => { - // Already have a pending buffer but no duration, so try to get that now - let (segment, buffer) = match self.peek_buffer( + let peek_outcome = self.peek_buffer( &stream.sinkpad, stream.delta_frames, + stream.discard_header_buffers, &mut stream.pre_queue, &stream.running_time_utc_time_mapping, - )? { + )?; + // Already have a pending buffer but no duration, so try to get that now + let (segment, buffer) = match peek_outcome { Some(res) => res, None => { if stream.sinkpad.is_eos() { @@ -532,12 +550,7 @@ impl MP4Mux { None => { // Have no buffer queued at all yet - let (segment, buffer) = match self.pop_buffer( - &stream.sinkpad, - stream.delta_frames, - &mut stream.pre_queue, - &mut stream.running_time_utc_time_mapping, - )? { + let (segment, buffer) = match self.pop_buffer(stream)? { Some(res) => res, None => { if stream.sinkpad.is_eos() { @@ -870,6 +883,7 @@ impl MP4Mux { let s = caps.structure(0).unwrap(); let mut delta_frames = super::DeltaFrames::IntraOnly; + let mut discard_header_buffers = false; match s.name().as_str() { "video/x-h264" | "video/x-h265" => { if !s.has_field_with_type("codec_data", gst::Buffer::static_type()) { @@ -913,6 +927,13 @@ impl MP4Mux { return Err(gst::FlowError::NotNegotiated); } } + "audio/x-flac" => { + discard_header_buffers = true; + if let Err(e) = s.get::("streamheader") { + gst::error!(CAT, obj: pad, "Muxing FLAC into MP4 needs streamheader: {}", e); + return Err(gst::FlowError::NotNegotiated); + }; + } "audio/x-alaw" | "audio/x-mulaw" => (), "audio/x-adpcm" => (), "application/x-onvif-metadata" => (), @@ -924,6 +945,7 @@ impl MP4Mux { pre_queue: VecDeque::new(), caps, delta_frames, + discard_header_buffers, chunks: Vec::new(), pending_buffer: None, queued_chunk_time: gst::ClockTime::ZERO, @@ -1523,6 +1545,11 @@ impl ElementImpl for ISOMP4Mux { .field("channels", gst::IntRange::new(1i32, 8)) .field("rate", gst::IntRange::new(1, i32::MAX)) .build(), + gst::Structure::builder("audio/x-flac") + .field("framed", true) + .field("channels", gst::IntRange::::new(1, 8)) + .field("rate", gst::IntRange::::new(1, 10 * u16::MAX as i32)) + .build(), ] .into_iter() .collect::(), diff --git a/mux/mp4/tests/tests.rs b/mux/mp4/tests/tests.rs index fc6e360f..f3f2ece3 100644 --- a/mux/mp4/tests/tests.rs +++ b/mux/mp4/tests/tests.rs @@ -7,6 +7,8 @@ // SPDX-License-Identifier: MPL-2.0 // +use std::path::Path; + use gst::prelude::*; use gst_pbutils::prelude::*; @@ -20,33 +22,57 @@ fn init() { }); } -#[test] -fn test_basic() { - init(); +struct Pipeline(gst::Pipeline); +impl std::ops::Deref for Pipeline { + type Target = gst::Pipeline; - struct Pipeline(gst::Pipeline); - impl std::ops::Deref for Pipeline { - type Target = gst::Pipeline; - - fn deref(&self) -> &Self::Target { - &self.0 - } + fn deref(&self) -> &Self::Target { + &self.0 } - impl Drop for Pipeline { - fn drop(&mut self) { - let _ = self.0.set_state(gst::State::Null); - } +} +impl Drop for Pipeline { + fn drop(&mut self) { + let _ = self.0.set_state(gst::State::Null); } +} - let pipeline = match gst::parse::launch( - "videotestsrc num-buffers=99 ! x264enc ! mux. \ - audiotestsrc num-buffers=140 ! fdkaacenc ! mux. \ - isomp4mux name=mux ! filesink name=sink \ - ", - ) { - Ok(pipeline) => Pipeline(pipeline.downcast::().unwrap()), - Err(_) => return, +impl Pipeline { + fn into_completion(self) { + self.set_state(gst::State::Playing) + .expect("Unable to set the pipeline to the `Playing` state"); + + for msg in self.bus().unwrap().iter_timed(gst::ClockTime::NONE) { + use gst::MessageView; + + match msg.view() { + MessageView::Eos(..) => break, + MessageView::Error(err) => { + panic!( + "Error from {:?}: {} ({:?})", + err.src().map(|s| s.path_string()), + err.error(), + err.debug() + ); + } + _ => (), + } + } + + self.set_state(gst::State::Null) + .expect("Unable to set the pipeline to the `Null` state"); + } +} + +fn test_basic_with(video_enc: &str, audio_enc: &str, cb: impl FnOnce(&Path)) { + let Ok(pipeline) = gst::parse::launch(&format!( + "videotestsrc num-buffers=99 ! {video_enc} ! mux. \ + audiotestsrc num-buffers=140 ! {audio_enc} ! mux. \ + isomp4mux name=mux ! filesink name=sink" + )) else { + println!("could not build encoding pipeline"); + return; }; + let pipeline = Pipeline(pipeline.downcast::().unwrap()); let dir = tempfile::TempDir::new().unwrap(); let mut location = dir.path().to_owned(); @@ -54,73 +80,75 @@ fn test_basic() { let sink = pipeline.by_name("sink").unwrap(); sink.set_property("location", location.to_str().expect("Non-UTF8 filename")); + pipeline.into_completion(); - pipeline - .set_state(gst::State::Playing) - .expect("Unable to set the pipeline to the `Playing` state"); - - for msg in pipeline.bus().unwrap().iter_timed(gst::ClockTime::NONE) { - use gst::MessageView; - - match msg.view() { - MessageView::Eos(..) => break, - MessageView::Error(err) => { - panic!( - "Error from {:?}: {} ({:?})", - err.src().map(|s| s.path_string()), - err.error(), - err.debug() - ); - } - _ => (), - } - } - - pipeline - .set_state(gst::State::Null) - .expect("Unable to set the pipeline to the `Null` state"); - - drop(pipeline); - - let discoverer = gst_pbutils::Discoverer::new(gst::ClockTime::from_seconds(5)) - .expect("Failed to create discoverer"); - let info = discoverer - .discover_uri( - url::Url::from_file_path(&location) - .expect("Failed to convert filename to URL") - .as_str(), - ) - .expect("Failed to discover MP4 file"); - - assert_eq!(info.duration(), Some(gst::ClockTime::from_mseconds(3_300))); - - let audio_streams = info.audio_streams(); - assert_eq!(audio_streams.len(), 1); - let audio_stream = &audio_streams[0]; - assert_eq!(audio_stream.channels(), 1); - assert_eq!(audio_stream.sample_rate(), 44_100); - let caps = audio_stream.caps().unwrap(); - assert!( - caps.can_intersect( - &gst::Caps::builder("audio/mpeg") - .any_features() - .field("mpegversion", 4i32) - .build() - ), - "Unexpected audio caps {caps:?}" - ); - - let video_streams = info.video_streams(); - assert_eq!(video_streams.len(), 1); - let video_stream = &video_streams[0]; - assert_eq!(video_stream.width(), 320); - assert_eq!(video_stream.height(), 240); - assert_eq!(video_stream.framerate(), gst::Fraction::new(30, 1)); - assert_eq!(video_stream.par(), gst::Fraction::new(1, 1)); - assert!(!video_stream.is_interlaced()); - let caps = video_stream.caps().unwrap(); - assert!( - caps.can_intersect(&gst::Caps::builder("video/x-h264").any_features().build()), - "Unexpected video caps {caps:?}" - ); + cb(&location) +} + +#[test] +fn test_basic_x264_aac() { + init(); + test_basic_with("x264enc", "fdkaacenc", |location| { + let discoverer = gst_pbutils::Discoverer::new(gst::ClockTime::from_seconds(5)) + .expect("Failed to create discoverer"); + let info = discoverer + .discover_uri( + url::Url::from_file_path(location) + .expect("Failed to convert filename to URL") + .as_str(), + ) + .expect("Failed to discover MP4 file"); + + assert_eq!(info.duration(), Some(gst::ClockTime::from_mseconds(3_300))); + + let audio_streams = info.audio_streams(); + assert_eq!(audio_streams.len(), 1); + let audio_stream = &audio_streams[0]; + assert_eq!(audio_stream.channels(), 1); + assert_eq!(audio_stream.sample_rate(), 44_100); + let caps = audio_stream.caps().unwrap(); + assert!( + caps.can_intersect( + &gst::Caps::builder("audio/mpeg") + .any_features() + .field("mpegversion", 4i32) + .build() + ), + "Unexpected audio caps {caps:?}" + ); + + let video_streams = info.video_streams(); + assert_eq!(video_streams.len(), 1); + let video_stream = &video_streams[0]; + assert_eq!(video_stream.width(), 320); + assert_eq!(video_stream.height(), 240); + assert_eq!(video_stream.framerate(), gst::Fraction::new(30, 1)); + assert_eq!(video_stream.par(), gst::Fraction::new(1, 1)); + assert!(!video_stream.is_interlaced()); + let caps = video_stream.caps().unwrap(); + assert!( + caps.can_intersect(&gst::Caps::builder("video/x-h264").any_features().build()), + "Unexpected video caps {caps:?}" + ); + }) +} + +#[test] +fn test_roundtrip_vp9_flac() { + init(); + test_basic_with("vp9enc ! vp9parse", "flacenc ! flacparse", |location| { + let Ok(pipeline) = gst::parse::launch( + "filesrc name=src ! qtdemux name=demux \ + demux.audio_0 ! queue ! flacdec ! fakesink \ + demux.video_0 ! queue ! vp9dec ! fakesink", + ) else { + panic!("could not build decoding pipeline") + }; + let pipeline = Pipeline(pipeline.downcast::().unwrap()); + pipeline + .by_name("src") + .unwrap() + .set_property("location", location.display().to_string()); + pipeline.into_completion(); + }) }