fmp4mux: Support AV1 packaging in the fragmented mp4 plugin

Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs/-/merge_requests/1582>
This commit is contained in:
Rafael Caricio 2024-04-15 18:45:44 +02:00 committed by Sebastian Dröge
parent 135de50918
commit 6c67c00113
14 changed files with 835 additions and 11 deletions

2
Cargo.lock generated
View file

@ -2373,6 +2373,7 @@ name = "gst-plugin-fmp4"
version = "0.12.5" version = "0.12.5"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bitstream-io",
"chrono", "chrono",
"dash-mpd", "dash-mpd",
"gst-plugin-version-helper", "gst-plugin-version-helper",
@ -2523,6 +2524,7 @@ name = "gst-plugin-mp4"
version = "0.12.5" version = "0.12.5"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bitstream-io",
"gst-plugin-version-helper", "gst-plugin-version-helper",
"gstreamer", "gstreamer",
"gstreamer-audio", "gstreamer-audio",

View file

@ -2028,7 +2028,7 @@
"long-name": "CMAFMux", "long-name": "CMAFMux",
"pad-templates": { "pad-templates": {
"sink": { "sink": {
"caps": "video/x-h264:\n stream-format: { (string)avc, (string)avc3 }\n alignment: au\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-h265:\n stream-format: { (string)hvc1, (string)hev1 }\n alignment: au\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\naudio/mpeg:\n mpegversion: 4\n stream-format: raw\n channels: [ 1, 65535 ]\n rate: [ 1, 2147483647 ]\n", "caps": "video/x-h264:\n stream-format: { (string)avc, (string)avc3 }\n alignment: au\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-av1:\n stream-format: obu-stream\n alignment: tu\n profile: { (string)main, (string)high, (string)professional }\n chroma-format: { (string)4:0:0, (string)4:2:0, (string)4:2:2, (string)4:4:4 }\n bit-depth-luma: { (uint)8, (uint)10, (uint)12 }\nbit-depth-chroma: { (uint)8, (uint)10, (uint)12 }\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\nvideo/x-h265:\n stream-format: { (string)hvc1, (string)hev1 }\n alignment: au\n width: [ 1, 65535 ]\n height: [ 1, 65535 ]\naudio/mpeg:\n mpegversion: 4\n stream-format: raw\n channels: [ 1, 65535 ]\n rate: [ 1, 2147483647 ]\n",
"direction": "sink", "direction": "sink",
"presence": "always", "presence": "always",
"type": "GstFMP4MuxPad" "type": "GstFMP4MuxPad"

View file

@ -14,8 +14,9 @@ gst = { workspace = true, features = ["v1_18"] }
gst-base = { workspace = true, features = ["v1_18"] } gst-base = { workspace = true, features = ["v1_18"] }
gst-audio = { workspace = true, features = ["v1_18"] } gst-audio = { workspace = true, features = ["v1_18"] }
gst-video = { workspace = true, features = ["v1_18"] } gst-video = { workspace = true, features = ["v1_18"] }
gst-pbutils = { workspace = true, features = ["v1_18"] } gst-pbutils = { workspace = true, features = ["v1_20"] }
once_cell.workspace = true once_cell.workspace = true
bitstream-io = "2.3"
[lib] [lib]
name = "gstfmp4" name = "gstfmp4"

View file

@ -161,6 +161,10 @@ fn cmaf_brands_from_caps(caps: &gst::CapsRef, compatible_brands: &mut Vec<&'stat
"audio/mpeg" => { "audio/mpeg" => {
compatible_brands.push(b"caac"); compatible_brands.push(b"caac");
} }
"video/x-av1" => {
compatible_brands.push(b"av01");
compatible_brands.push(b"cmf2");
}
"video/x-h265" => { "video/x-h265" => {
let width = s.get::<i32>("width").ok(); let width = s.get::<i32>("width").ok();
let height = s.get::<i32>("height").ok(); let height = s.get::<i32>("height").ok();
@ -1099,9 +1103,9 @@ fn write_visual_sample_entry(
"professional" => 2, "professional" => 2,
_ => unreachable!(), _ => unreachable!(),
}; };
// TODO: Use `gst_codec_utils_av1_get_seq_level_idx` when exposed in bindings
let level = 1; // FIXME let level = av1_seq_level_idx(s.get::<&str>("level").ok());
let tier = 0; // FIXME let tier = av1_tier(s.get::<&str>("tier").ok());
let (high_bitdepth, twelve_bit) = let (high_bitdepth, twelve_bit) =
match s.get::<u32>("bit-depth-luma").unwrap() { match s.get::<u32>("bit-depth-luma").unwrap() {
8 => (false, false), 8 => (false, false),
@ -1146,6 +1150,10 @@ fn write_visual_sample_entry(
v.extend_from_slice(&codec_data); v.extend_from_slice(&codec_data);
} }
if let Some(extra_data) = &stream.extra_header_data {
// configOBUs
v.extend_from_slice(extra_data.as_slice());
}
Ok(()) Ok(())
})?; })?;
} }
@ -1254,6 +1262,44 @@ fn write_visual_sample_entry(
Ok(()) Ok(())
} }
fn av1_seq_level_idx(level: Option<&str>) -> u8 {
match level {
Some("2.0") => 0,
Some("2.1") => 1,
Some("2.2") => 2,
Some("2.3") => 3,
Some("3.0") => 4,
Some("3.1") => 5,
Some("3.2") => 6,
Some("3.3") => 7,
Some("4.0") => 8,
Some("4.1") => 9,
Some("4.2") => 10,
Some("4.3") => 11,
Some("5.0") => 12,
Some("5.1") => 13,
Some("5.2") => 14,
Some("5.3") => 15,
Some("6.0") => 16,
Some("6.1") => 17,
Some("6.2") => 18,
Some("6.3") => 19,
Some("7.0") => 20,
Some("7.1") => 21,
Some("7.2") => 22,
Some("7.3") => 23,
_ => 1,
}
}
fn av1_tier(tier: Option<&str>) -> u8 {
match tier {
Some("main") => 0,
Some("high") => 1,
_ => 0,
}
}
fn write_audio_sample_entry( fn write_audio_sample_entry(
v: &mut Vec<u8>, v: &mut Vec<u8>,
_cfg: &super::HeaderConfiguration, _cfg: &super::HeaderConfiguration,

View file

@ -16,6 +16,7 @@ use std::collections::VecDeque;
use std::mem; use std::mem;
use std::sync::Mutex; use std::sync::Mutex;
use crate::fmp4mux::obu::read_seq_header_obu_bytes;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use super::boxes; use super::boxes;
@ -224,6 +225,8 @@ struct Stream {
/// Mapping between running time and UTC time in ONVIF mode. /// Mapping between running time and UTC time in ONVIF mode.
running_time_utc_time_mapping: Option<(gst::Signed<gst::ClockTime>, gst::ClockTime)>, running_time_utc_time_mapping: Option<(gst::Signed<gst::ClockTime>, gst::ClockTime)>,
extra_header_data: Option<Vec<u8>>,
} }
#[derive(Default)] #[derive(Default)]
@ -800,6 +803,22 @@ impl FMP4Mux {
stream.dts_offset.display(), stream.dts_offset.display(),
); );
// If the stream is AV1, we need to parse the SequenceHeader OBU to include in the
// extra data of the 'av1C' box. It makes the stream playable in some browsers.
let s = stream.caps.structure(0).unwrap();
if !buffer.flags().contains(gst::BufferFlags::DELTA_UNIT)
&& s.name().as_str() == "video/x-av1"
{
let buf_map = buffer.map_readable().map_err(|_| {
gst::error!(CAT, obj: stream.sinkpad, "Failed to map buffer");
gst::FlowError::Error
})?;
stream.extra_header_data = read_seq_header_obu_bytes(buf_map.as_slice()).map_err(|_| {
gst::error!(CAT, obj: stream.sinkpad, "Failed to parse AV1 SequenceHeader OBU");
gst::FlowError::Error
})?;
}
let gop = Gop { let gop = Gop {
start_pts: pts, start_pts: pts,
start_dts: dts, start_dts: dts,
@ -2632,6 +2651,7 @@ impl FMP4Mux {
dts_offset: None, dts_offset: None,
current_position: gst::ClockTime::ZERO, current_position: gst::ClockTime::ZERO,
running_time_utc_time_mapping: None, running_time_utc_time_mapping: None,
extra_header_data: None,
}); });
} }
@ -2699,6 +2719,7 @@ impl FMP4Mux {
trak_timescale: s.sinkpad.imp().settings.lock().unwrap().trak_timescale, trak_timescale: s.sinkpad.imp().settings.lock().unwrap().trak_timescale,
delta_frames: s.delta_frames, delta_frames: s.delta_frames,
caps: s.caps.clone(), caps: s.caps.clone(),
extra_header_data: s.extra_header_data.clone(),
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();
@ -3573,6 +3594,19 @@ impl ElementImpl for CMAFMux {
.field("width", gst::IntRange::new(1, u16::MAX as i32)) .field("width", gst::IntRange::new(1, u16::MAX as i32))
.field("height", gst::IntRange::new(1, u16::MAX as i32)) .field("height", gst::IntRange::new(1, u16::MAX as i32))
.build(), .build(),
gst::Structure::builder("video/x-av1")
.field("stream-format", "obu-stream")
.field("alignment", "tu")
.field("profile", gst::List::new(["main", "high", "professional"]))
.field(
"chroma-format",
gst::List::new(["4:0:0", "4:2:0", "4:2:2", "4:4:4"]),
)
.field("bit-depth-luma", gst::List::new([8u32, 10u32, 12u32]))
.field("bit-depth-chroma", gst::List::new([8u32, 10u32, 12u32]))
.field("width", gst::IntRange::new(1, u16::MAX as i32))
.field("height", gst::IntRange::new(1, u16::MAX as i32))
.build(),
gst::Structure::builder("video/x-h265") gst::Structure::builder("video/x-h265")
.field("stream-format", gst::List::new(["hvc1", "hev1"])) .field("stream-format", gst::List::new(["hvc1", "hev1"]))
.field("alignment", "au") .field("alignment", "au")

View file

@ -12,6 +12,8 @@ use gst::prelude::*;
mod boxes; mod boxes;
mod imp; mod imp;
mod obu;
glib::wrapper! { glib::wrapper! {
pub(crate) struct FMP4MuxPad(ObjectSubclass<imp::FMP4MuxPad>) @extends gst_base::AggregatorPad, gst::Pad, gst::Object; pub(crate) struct FMP4MuxPad(ObjectSubclass<imp::FMP4MuxPad>) @extends gst_base::AggregatorPad, gst::Pad, gst::Object;
} }
@ -102,6 +104,9 @@ pub(crate) struct HeaderStream {
/// Pre-defined trak timescale if not 0. /// Pre-defined trak timescale if not 0.
trak_timescale: u32, trak_timescale: u32,
// More data to be included in the fragmented stream header
extra_header_data: Option<Vec<u8>>,
} }
#[derive(Debug)] #[derive(Debug)]

303
mux/fmp4/src/fmp4mux/obu.rs Normal file
View file

@ -0,0 +1,303 @@
//
// Copyright (C) 2022 Vivienne Watermeier <vwatermeier@igalia.com>
//
// This Source Code Form is subject to the terms of the Mozilla Public License, v2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at
// <https://mozilla.org/MPL/2.0/>.
//
// SPDX-License-Identifier: MPL-2.0
#![allow(non_camel_case_types)]
use bitstream_io::{BigEndian, BitRead, BitReader, Endianness};
use std::io::{self, Cursor, Read, Seek, SeekFrom};
pub fn parse_leb128<R, E>(reader: &mut BitReader<R, E>) -> io::Result<(u32, u32)>
where
R: Read + Seek,
E: Endianness,
{
let mut value = 0;
let mut num_bytes = 0;
for i in 0..8 {
let byte = reader.read::<u32>(8)?;
value |= (byte & 0x7f) << (i * 7);
num_bytes += 1;
if byte & 0x80 == 0 {
break;
}
}
reader.byte_align();
Ok((value, num_bytes))
}
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
pub struct SizedObu {
pub obu_type: ObuType,
pub has_extension: bool,
/// If the OBU header is followed by a leb128 size field.
pub has_size_field: bool,
pub temporal_id: u8,
pub spatial_id: u8,
/// size of the OBU payload in bytes.
/// This may refer to different sizes in different contexts, not always
/// to the entire OBU payload as it is in the AV1 bitstream.
pub size: u32,
/// the number of bytes the leb128 size field will take up
/// when written with write_leb128().
/// This does not imply `has_size_field`, and does not necessarily match with
/// the length of the internal size field if present.
pub leb_size: u32,
pub header_len: u32,
/// indicates that only part of this OBU has been processed so far
pub is_fragment: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ObuType {
Reserved,
SequenceHeader,
TemporalDelimiter,
FrameHeader,
TileGroup,
Metadata,
Frame,
RedundantFrameHeader,
TileList,
Padding,
}
impl Default for ObuType {
fn default() -> Self {
Self::Reserved
}
}
impl SizedObu {
/// Parse an OBU header and size field. If the OBU is not expected to contain
/// a size field, but the size is known from external information,
/// parse as an `UnsizedObu` and use `to_sized`.
pub fn parse<R, E>(reader: &mut BitReader<R, E>) -> io::Result<Self>
where
R: Read + Seek,
E: Endianness,
{
// check the forbidden bit
if reader.read_bit()? {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"forbidden bit in OBU header is set",
));
}
let obu_type = reader.read::<u8>(4)?.into();
let has_extension = reader.read_bit()?;
// require a size field
if !reader.read_bit()? {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"expected a size field",
));
}
// ignore the reserved bit
let _ = reader.read_bit()?;
let (temporal_id, spatial_id) = if has_extension {
(reader.read::<u8>(3)?, reader.read::<u8>(2)?)
} else {
(0, 0)
};
reader.byte_align();
let (size, leb_size) = parse_leb128(reader)?;
Ok(Self {
obu_type,
has_extension,
has_size_field: true,
temporal_id,
spatial_id,
size,
leb_size,
header_len: has_extension as u32 + 1,
is_fragment: false,
})
}
/// The amount of bytes this OBU will take up, including the space needed for
/// its leb128 size field.
pub fn full_size(&self) -> u32 {
self.size + self.leb_size + self.header_len
}
}
pub fn read_seq_header_obu_bytes(data: &[u8]) -> io::Result<Option<Vec<u8>>> {
let mut cursor = Cursor::new(data);
while cursor.position() < data.len() as u64 {
let obu_start = cursor.position();
let Ok(obu) = SizedObu::parse(&mut BitReader::endian(&mut cursor, BigEndian)) else {
break;
};
// set reader to the beginning of the OBU
cursor.seek(SeekFrom::Start(obu_start))?;
if obu.obu_type != ObuType::SequenceHeader {
// Skip the full OBU
cursor.seek(SeekFrom::Current(obu.full_size() as i64))?;
continue;
};
// read the full OBU
let mut bytes = vec![0; obu.full_size() as usize];
cursor.read_exact(&mut bytes)?;
return Ok(Some(bytes));
}
Ok(None)
}
impl From<u8> for ObuType {
fn from(n: u8) -> Self {
assert!(n < 16);
match n {
1 => Self::SequenceHeader,
2 => Self::TemporalDelimiter,
3 => Self::FrameHeader,
4 => Self::TileGroup,
5 => Self::Metadata,
6 => Self::Frame,
7 => Self::RedundantFrameHeader,
8 => Self::TileList,
15 => Self::Padding,
_ => Self::Reserved,
}
}
}
impl From<ObuType> for u8 {
fn from(ty: ObuType) -> Self {
match ty {
ObuType::Reserved => 0,
ObuType::SequenceHeader => 1,
ObuType::TemporalDelimiter => 2,
ObuType::FrameHeader => 3,
ObuType::TileGroup => 4,
ObuType::Metadata => 5,
ObuType::Frame => 6,
ObuType::RedundantFrameHeader => 7,
ObuType::TileList => 8,
ObuType::Padding => 15,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use bitstream_io::{BigEndian, BitReader};
use once_cell::sync::Lazy;
use std::io::Cursor;
#[allow(clippy::type_complexity)]
static OBUS: Lazy<Vec<(SizedObu, Vec<u8>)>> = Lazy::new(|| {
vec![
(
SizedObu {
obu_type: ObuType::TemporalDelimiter,
has_extension: false,
has_size_field: true,
temporal_id: 0,
spatial_id: 0,
size: 0,
leb_size: 1,
header_len: 1,
is_fragment: false,
},
vec![0b0001_0010, 0b0000_0000],
),
(
SizedObu {
obu_type: ObuType::Padding,
has_extension: false,
has_size_field: true,
temporal_id: 0,
spatial_id: 0,
size: 10,
leb_size: 1,
header_len: 1,
is_fragment: false,
},
vec![0b0111_1010, 0b0000_1010, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
),
(
SizedObu {
obu_type: ObuType::SequenceHeader,
has_extension: true,
has_size_field: true,
temporal_id: 4,
spatial_id: 3,
size: 5,
leb_size: 1,
header_len: 2,
is_fragment: false,
},
vec![0b0000_1110, 0b1001_1000, 0b0000_0101, 1, 2, 3, 4, 5],
),
(
SizedObu {
obu_type: ObuType::Frame,
has_extension: true,
has_size_field: true,
temporal_id: 4,
spatial_id: 3,
size: 5,
leb_size: 1,
header_len: 2,
is_fragment: false,
},
vec![0b0011_0110, 0b1001_1000, 0b0000_0101, 1, 2, 3, 4, 5],
),
]
});
#[test]
fn test_parse_rtp_obu() {
for (idx, (sized_obu, raw_bytes)) in (*OBUS).iter().enumerate() {
println!("running test {idx}...");
let mut reader = BitReader::endian(Cursor::new(&raw_bytes), BigEndian);
let obu_parsed = SizedObu::parse(&mut reader).unwrap();
assert_eq!(&obu_parsed, sized_obu);
if let Some(seq_header_obu_bytes) = read_seq_header_obu_bytes(raw_bytes).unwrap() {
println!("validation of sequence header obu read/write...");
assert_eq!(&seq_header_obu_bytes, raw_bytes);
}
}
}
#[test]
fn test_read_seq_header_from_bitstream() {
let mut bitstream = Vec::new();
let mut seq_header_bytes_raw = None;
for (obu, raw_bytes) in (*OBUS).iter() {
bitstream.extend(raw_bytes);
if obu.obu_type == ObuType::SequenceHeader {
seq_header_bytes_raw = Some(raw_bytes.clone());
}
}
let seq_header_obu_bytes = read_seq_header_obu_bytes(&bitstream).unwrap().unwrap();
assert_eq!(seq_header_obu_bytes, seq_header_bytes_raw.unwrap());
}
}

View file

@ -236,6 +236,26 @@ fn test_buffer_flags_single_vp9_stream_iso() {
test_buffer_flags_single_stream(false, false, caps); test_buffer_flags_single_stream(false, false, caps);
} }
#[test]
fn test_buffer_flags_single_av1_stream_cmaf() {
init();
let caps = gst::Caps::builder("video/x-av1")
.field("width", 1920i32)
.field("height", 1080i32)
.field("framerate", gst::Fraction::new(30, 1))
.field("profile", "main")
.field("tier", "main")
.field("level", "4.1")
.field("chroma-format", "4:2:0")
.field("bit-depth-luma", 8u32)
.field("bit-depth-chroma", 8u32)
.field("colorimetry", "bt709")
.build();
test_buffer_flags_single_stream(true, false, caps);
}
#[test] #[test]
fn test_buffer_flags_multi_stream() { fn test_buffer_flags_multi_stream() {
init(); init();

View file

@ -16,6 +16,7 @@ gst-audio = { workspace = true, features = ["v1_18"] }
gst-video = { workspace = true, features = ["v1_18"] } gst-video = { workspace = true, features = ["v1_18"] }
gst-pbutils = { workspace = true, features = ["v1_18"] } gst-pbutils = { workspace = true, features = ["v1_18"] }
once_cell.workspace = true once_cell.workspace = true
bitstream-io = "2.3"
[lib] [lib]
name = "gstmp4" name = "gstmp4"

View file

@ -56,18 +56,31 @@ fn write_full_box<T, F: FnOnce(&mut Vec<u8>) -> Result<T, Error>>(
} }
/// Creates `ftyp` box /// Creates `ftyp` box
pub(super) fn create_ftyp(variant: super::Variant) -> Result<gst::Buffer, Error> { pub(super) fn create_ftyp(
variant: super::Variant,
content_caps: &[&gst::CapsRef],
) -> Result<gst::Buffer, Error> {
let mut v = vec![]; let mut v = vec![];
let mut minor_version = 0u32;
let (brand, compatible_brands) = match variant { let (brand, mut compatible_brands) = match variant {
super::Variant::ISO | super::Variant::ONVIF => (b"iso4", vec![b"mp41", b"mp42", b"isom"]), super::Variant::ISO | super::Variant::ONVIF => (b"iso4", vec![b"mp41", b"mp42", b"isom"]),
}; };
for caps in content_caps {
let s = caps.structure(0).unwrap();
if let (super::Variant::ISO, "video/x-av1") = (variant, s.name().as_str()) {
minor_version = 1;
compatible_brands = vec![b"iso4", b"av01"];
break;
}
}
write_box(&mut v, b"ftyp", |v| { write_box(&mut v, b"ftyp", |v| {
// major brand // major brand
v.extend(brand); v.extend(brand);
// minor version // minor version
v.extend(0u32.to_be_bytes()); v.extend(minor_version.to_be_bytes());
// compatible brands // compatible brands
v.extend(compatible_brands.into_iter().flatten()); v.extend(compatible_brands.into_iter().flatten());
@ -916,8 +929,9 @@ fn write_visual_sample_entry(
_ => unreachable!(), _ => unreachable!(),
}; };
let level = 1; // FIXME // TODO: Use `gst_codec_utils_av1_get_seq_level_idx` when exposed in bindings
let tier = 0; // FIXME let level = av1_seq_level_idx(s.get::<&str>("level").ok());
let tier = av1_tier(s.get::<&str>("tier").ok());
let (high_bitdepth, twelve_bit) = let (high_bitdepth, twelve_bit) =
match s.get::<u32>("bit-depth-luma").unwrap() { match s.get::<u32>("bit-depth-luma").unwrap() {
8 => (false, false), 8 => (false, false),
@ -962,6 +976,10 @@ fn write_visual_sample_entry(
v.extend_from_slice(&codec_data); v.extend_from_slice(&codec_data);
} }
if let Some(extra_data) = &stream.extra_header_data {
// unsigned int(8) configOBUs[];
v.extend_from_slice(extra_data.as_slice());
}
Ok(()) Ok(())
})?; })?;
} }
@ -1070,6 +1088,44 @@ fn write_visual_sample_entry(
Ok(()) Ok(())
} }
fn av1_seq_level_idx(level: Option<&str>) -> u8 {
match level {
Some("2.0") => 0,
Some("2.1") => 1,
Some("2.2") => 2,
Some("2.3") => 3,
Some("3.0") => 4,
Some("3.1") => 5,
Some("3.2") => 6,
Some("3.3") => 7,
Some("4.0") => 8,
Some("4.1") => 9,
Some("4.2") => 10,
Some("4.3") => 11,
Some("5.0") => 12,
Some("5.1") => 13,
Some("5.2") => 14,
Some("5.3") => 15,
Some("6.0") => 16,
Some("6.1") => 17,
Some("6.2") => 18,
Some("6.3") => 19,
Some("7.0") => 20,
Some("7.1") => 21,
Some("7.2") => 22,
Some("7.3") => 23,
_ => 1,
}
}
fn av1_tier(tier: Option<&str>) -> u8 {
match tier {
Some("main") => 0,
Some("high") => 1,
_ => 0,
}
}
fn write_audio_sample_entry( fn write_audio_sample_entry(
v: &mut Vec<u8>, v: &mut Vec<u8>,
_header: &super::Header, _header: &super::Header,

View file

@ -15,6 +15,7 @@ use gst_base::subclass::prelude::*;
use std::collections::VecDeque; use std::collections::VecDeque;
use std::sync::Mutex; use std::sync::Mutex;
use crate::mp4mux::obu::read_seq_header_obu_bytes;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use super::boxes; use super::boxes;
@ -135,6 +136,8 @@ struct Stream {
/// In ONVIF mode, the mapping between running time and UTC time (UNIX) /// In ONVIF mode, the mapping between running time and UTC time (UNIX)
running_time_utc_time_mapping: Option<(gst::Signed<gst::ClockTime>, gst::ClockTime)>, running_time_utc_time_mapping: Option<(gst::Signed<gst::ClockTime>, gst::ClockTime)>,
extra_header_data: Option<Vec<u8>>,
} }
#[derive(Default)] #[derive(Default)]
@ -548,6 +551,22 @@ impl MP4Mux {
*duration = Some(dur); *duration = Some(dur);
// If the stream is AV1, we need to parse the SequenceHeader OBU to include in the
// extra data of the 'av1C' box. It makes the stream playable in some browsers.
let s = stream.caps.structure(0).unwrap();
if !buffer.flags().contains(gst::BufferFlags::DELTA_UNIT)
&& s.name().as_str() == "video/x-av1"
{
let buf_map = buffer.map_readable().map_err(|_| {
gst::error!(CAT, obj: stream.sinkpad, "Failed to map buffer");
gst::FlowError::Error
})?;
stream.extra_header_data = read_seq_header_obu_bytes(buf_map.as_slice()).map_err(|_| {
gst::error!(CAT, obj: stream.sinkpad, "Failed to parse AV1 SequenceHeader OBU");
gst::FlowError::Error
})?;
}
return Ok(()); return Ok(());
} }
None => { None => {
@ -957,6 +976,7 @@ impl MP4Mux {
earliest_pts: None, earliest_pts: None,
end_pts: None, end_pts: None,
running_time_utc_time_mapping: None, running_time_utc_time_mapping: None,
extra_header_data: None,
}); });
} }
@ -1321,7 +1341,15 @@ impl AggregatorImpl for MP4Mux {
// ... and then create the ftyp box plus mdat box header so we can start outputting // ... and then create the ftyp box plus mdat box header so we can start outputting
// actual data // actual data
let ftyp = boxes::create_ftyp(self.obj().class().as_ref().variant).map_err(|err| { let ftyp = boxes::create_ftyp(
self.obj().class().as_ref().variant,
&state
.streams
.iter()
.map(|s| s.caps.as_ref())
.collect::<Vec<_>>(),
)
.map_err(|err| {
gst::error!(CAT, imp: self, "Failed to create ftyp box: {err}"); gst::error!(CAT, imp: self, "Failed to create ftyp box: {err}");
gst::FlowError::Error gst::FlowError::Error
})?; })?;
@ -1380,6 +1408,7 @@ impl AggregatorImpl for MP4Mux {
earliest_pts, earliest_pts,
end_pts, end_pts,
chunks: stream.chunks, chunks: stream.chunks,
extra_header_data: stream.extra_header_data.clone(),
}); });
} }

View file

@ -11,6 +11,7 @@ use gst::prelude::*;
mod boxes; mod boxes;
mod imp; mod imp;
mod obu;
glib::wrapper! { glib::wrapper! {
pub(crate) struct MP4MuxPad(ObjectSubclass<imp::MP4MuxPad>) @extends gst_base::AggregatorPad, gst::Pad, gst::Object; pub(crate) struct MP4MuxPad(ObjectSubclass<imp::MP4MuxPad>) @extends gst_base::AggregatorPad, gst::Pad, gst::Object;
@ -126,6 +127,9 @@ pub(crate) struct Stream {
/// All the chunks stored for this stream /// All the chunks stored for this stream
chunks: Vec<Chunk>, chunks: Vec<Chunk>,
// More data to be included in the fragmented stream header
extra_header_data: Option<Vec<u8>>,
} }
#[derive(Debug)] #[derive(Debug)]

303
mux/mp4/src/mp4mux/obu.rs Normal file
View file

@ -0,0 +1,303 @@
//
// Copyright (C) 2022 Vivienne Watermeier <vwatermeier@igalia.com>
//
// This Source Code Form is subject to the terms of the Mozilla Public License, v2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at
// <https://mozilla.org/MPL/2.0/>.
//
// SPDX-License-Identifier: MPL-2.0
#![allow(non_camel_case_types)]
use bitstream_io::{BigEndian, BitRead, BitReader, Endianness};
use std::io::{self, Cursor, Read, Seek, SeekFrom};
pub fn parse_leb128<R, E>(reader: &mut BitReader<R, E>) -> io::Result<(u32, u32)>
where
R: Read + Seek,
E: Endianness,
{
let mut value = 0;
let mut num_bytes = 0;
for i in 0..8 {
let byte = reader.read::<u32>(8)?;
value |= (byte & 0x7f) << (i * 7);
num_bytes += 1;
if byte & 0x80 == 0 {
break;
}
}
reader.byte_align();
Ok((value, num_bytes))
}
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
pub struct SizedObu {
pub obu_type: ObuType,
pub has_extension: bool,
/// If the OBU header is followed by a leb128 size field.
pub has_size_field: bool,
pub temporal_id: u8,
pub spatial_id: u8,
/// size of the OBU payload in bytes.
/// This may refer to different sizes in different contexts, not always
/// to the entire OBU payload as it is in the AV1 bitstream.
pub size: u32,
/// the number of bytes the leb128 size field will take up
/// when written with write_leb128().
/// This does not imply `has_size_field`, and does not necessarily match with
/// the length of the internal size field if present.
pub leb_size: u32,
pub header_len: u32,
/// indicates that only part of this OBU has been processed so far
pub is_fragment: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ObuType {
Reserved,
SequenceHeader,
TemporalDelimiter,
FrameHeader,
TileGroup,
Metadata,
Frame,
RedundantFrameHeader,
TileList,
Padding,
}
impl Default for ObuType {
fn default() -> Self {
Self::Reserved
}
}
impl SizedObu {
/// Parse an OBU header and size field. If the OBU is not expected to contain
/// a size field, but the size is known from external information,
/// parse as an `UnsizedObu` and use `to_sized`.
pub fn parse<R, E>(reader: &mut BitReader<R, E>) -> io::Result<Self>
where
R: Read + Seek,
E: Endianness,
{
// check the forbidden bit
if reader.read_bit()? {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"forbidden bit in OBU header is set",
));
}
let obu_type = reader.read::<u8>(4)?.into();
let has_extension = reader.read_bit()?;
// require a size field
if !reader.read_bit()? {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"expected a size field",
));
}
// ignore the reserved bit
let _ = reader.read_bit()?;
let (temporal_id, spatial_id) = if has_extension {
(reader.read::<u8>(3)?, reader.read::<u8>(2)?)
} else {
(0, 0)
};
reader.byte_align();
let (size, leb_size) = parse_leb128(reader)?;
Ok(Self {
obu_type,
has_extension,
has_size_field: true,
temporal_id,
spatial_id,
size,
leb_size,
header_len: has_extension as u32 + 1,
is_fragment: false,
})
}
/// The amount of bytes this OBU will take up, including the space needed for
/// its leb128 size field.
pub fn full_size(&self) -> u32 {
self.size + self.leb_size + self.header_len
}
}
pub fn read_seq_header_obu_bytes(data: &[u8]) -> io::Result<Option<Vec<u8>>> {
let mut cursor = Cursor::new(data);
while cursor.position() < data.len() as u64 {
let obu_start = cursor.position();
let Ok(obu) = SizedObu::parse(&mut BitReader::endian(&mut cursor, BigEndian)) else {
break;
};
// set reader to the beginning of the OBU
cursor.seek(SeekFrom::Start(obu_start))?;
if obu.obu_type != ObuType::SequenceHeader {
// Skip the full OBU
cursor.seek(SeekFrom::Current(obu.full_size() as i64))?;
continue;
};
// read the full OBU
let mut bytes = vec![0; obu.full_size() as usize];
cursor.read_exact(&mut bytes)?;
return Ok(Some(bytes));
}
Ok(None)
}
impl From<u8> for ObuType {
fn from(n: u8) -> Self {
assert!(n < 16);
match n {
1 => Self::SequenceHeader,
2 => Self::TemporalDelimiter,
3 => Self::FrameHeader,
4 => Self::TileGroup,
5 => Self::Metadata,
6 => Self::Frame,
7 => Self::RedundantFrameHeader,
8 => Self::TileList,
15 => Self::Padding,
_ => Self::Reserved,
}
}
}
impl From<ObuType> for u8 {
fn from(ty: ObuType) -> Self {
match ty {
ObuType::Reserved => 0,
ObuType::SequenceHeader => 1,
ObuType::TemporalDelimiter => 2,
ObuType::FrameHeader => 3,
ObuType::TileGroup => 4,
ObuType::Metadata => 5,
ObuType::Frame => 6,
ObuType::RedundantFrameHeader => 7,
ObuType::TileList => 8,
ObuType::Padding => 15,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use bitstream_io::{BigEndian, BitReader};
use once_cell::sync::Lazy;
use std::io::Cursor;
#[allow(clippy::type_complexity)]
static OBUS: Lazy<Vec<(SizedObu, Vec<u8>)>> = Lazy::new(|| {
vec![
(
SizedObu {
obu_type: ObuType::TemporalDelimiter,
has_extension: false,
has_size_field: true,
temporal_id: 0,
spatial_id: 0,
size: 0,
leb_size: 1,
header_len: 1,
is_fragment: false,
},
vec![0b0001_0010, 0b0000_0000],
),
(
SizedObu {
obu_type: ObuType::Padding,
has_extension: false,
has_size_field: true,
temporal_id: 0,
spatial_id: 0,
size: 10,
leb_size: 1,
header_len: 1,
is_fragment: false,
},
vec![0b0111_1010, 0b0000_1010, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
),
(
SizedObu {
obu_type: ObuType::SequenceHeader,
has_extension: true,
has_size_field: true,
temporal_id: 4,
spatial_id: 3,
size: 5,
leb_size: 1,
header_len: 2,
is_fragment: false,
},
vec![0b0000_1110, 0b1001_1000, 0b0000_0101, 1, 2, 3, 4, 5],
),
(
SizedObu {
obu_type: ObuType::Frame,
has_extension: true,
has_size_field: true,
temporal_id: 4,
spatial_id: 3,
size: 5,
leb_size: 1,
header_len: 2,
is_fragment: false,
},
vec![0b0011_0110, 0b1001_1000, 0b0000_0101, 1, 2, 3, 4, 5],
),
]
});
#[test]
fn test_parse_rtp_obu() {
for (idx, (sized_obu, raw_bytes)) in (*OBUS).iter().enumerate() {
println!("running test {idx}...");
let mut reader = BitReader::endian(Cursor::new(&raw_bytes), BigEndian);
let obu_parsed = SizedObu::parse(&mut reader).unwrap();
assert_eq!(&obu_parsed, sized_obu);
if let Some(seq_header_obu_bytes) = read_seq_header_obu_bytes(raw_bytes).unwrap() {
println!("validation of sequence header obu read/write...");
assert_eq!(&seq_header_obu_bytes, raw_bytes);
}
}
}
#[test]
fn test_read_seq_header_from_bitstream() {
let mut bitstream = Vec::new();
let mut seq_header_bytes_raw = None;
for (obu, raw_bytes) in (*OBUS).iter() {
bitstream.extend(raw_bytes);
if obu.obu_type == ObuType::SequenceHeader {
seq_header_bytes_raw = Some(raw_bytes.clone());
}
}
let seq_header_obu_bytes = read_seq_header_obu_bytes(&bitstream).unwrap().unwrap();
assert_eq!(seq_header_obu_bytes, seq_header_bytes_raw.unwrap());
}
}

View file

@ -152,3 +152,23 @@ fn test_roundtrip_vp9_flac() {
pipeline.into_completion(); pipeline.into_completion();
}) })
} }
#[test]
fn test_roundtrip_av1_aac() {
init();
test_basic_with("av1enc ! av1parse", "avenc_aac ! aacparse", |location| {
let Ok(pipeline) = gst::parse::launch(
"filesrc name=src ! qtdemux name=demux \
demux.audio_0 ! queue ! avdec_aac ! fakesink \
demux.video_0 ! queue ! av1dec ! fakesink",
) else {
panic!("could not build decoding pipeline")
};
let pipeline = Pipeline(pipeline.downcast::<gst::Pipeline>().unwrap());
pipeline
.by_name("src")
.unwrap()
.set_property("location", location.display().to_string());
pipeline.into_completion();
})
}