mux/mp4: add image sequence mode

Image sequence is defined in ISO/IEC 23008-12 (HEIF), as a variant of
video. The key difference is that image sequence timing is advisory.

Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs/-/merge_requests/2101>
This commit is contained in:
Brad Hards 2025-02-03 08:07:23 +11:00 committed by GStreamer Marge Bot
parent 130e13a33e
commit 2130c3bfbe
5 changed files with 341 additions and 34 deletions

View file

@ -5219,6 +5219,18 @@
],
"kind": "object",
"properties": {
"image-sequence": {
"blurb": "Generate ISO/IEC 23008-12 image sequence instead of video",
"conditionally-available": false,
"construct": false,
"construct-only": false,
"controllable": false,
"default": "false",
"mutable": "ready",
"readable": true,
"type": "gboolean",
"writable": true
},
"trak-timescale": {
"blurb": "Timescale to use for the track (units per second, 0 is automatic)",
"conditionally-available": false,

View file

@ -69,29 +69,14 @@ fn write_full_box<T, F: FnOnce(&mut Vec<u8>) -> Result<T, Error>>(
/// Creates `ftyp` box
pub(super) fn create_ftyp(
variant: super::Variant,
content_caps: &[&gst::CapsRef],
major_brand: &[u8; 4],
minor_version: u32,
compatible_brands: Vec<&[u8; 4]>,
) -> Result<gst::Buffer, Error> {
let mut v = vec![];
let mut minor_version = 0u32;
let (brand, mut compatible_brands) = match variant {
super::Variant::ISO | super::Variant::ONVIF => (b"iso4", vec![b"mp41", b"mp42", b"isom"]),
};
for caps in content_caps {
let s = caps.structure(0).unwrap();
if let (super::Variant::ISO, "video/x-av1") = (variant, s.name().as_str()) {
minor_version = 1;
compatible_brands = vec![b"iso4", b"av01"];
break;
}
}
write_box(&mut v, b"ftyp", |v| {
// major brand
v.extend(brand);
// minor version
v.extend(major_brand);
v.extend(minor_version.to_be_bytes());
// compatible brands
v.extend(compatible_brands.into_iter().flatten());
@ -494,7 +479,14 @@ fn write_hdlr(
let s = stream.caps.structure(0).unwrap();
let (handler_type, name) = match s.name().as_str() {
"video/x-h264" | "video/x-h265" | "video/x-vp8" | "video/x-vp9" | "video/x-av1"
| "image/jpeg" | "video/x-raw" => (b"vide", b"VideoHandler\0".as_slice()),
| "image/jpeg" | "video/x-raw" => {
if stream.image_sequence {
// See ISO/IEC 23008-12:2022 Section 7.2.2
(b"pict", b"PictureHandler\0".as_slice())
} else {
(b"vide", b"VideoHandler\0".as_slice())
}
}
"audio/mpeg" | "audio/x-opus" | "audio/x-flac" | "audio/x-alaw" | "audio/x-mulaw"
| "audio/x-adpcm" => (b"soun", b"SoundHandler\0".as_slice()),
"application/x-onvif-metadata" => (b"meta", b"MetadataHandler\0".as_slice()),
@ -1052,6 +1044,49 @@ fn write_visual_sample_entry(
})?;
}
if stream.image_sequence {
match s.name().as_str() {
// intra formats
"video/x-vp9" | "video/x-vp8" | "image/jpeg" => {
let all_ref_pics_intra = 1u32; // 0 = don't know, 1 = reference pictures are only intra
let intra_pred_used = 1u32; // 0 = no, 1 = yes, or maybe
let max_ref_per_pic = 0u32; // none number
let packed_bits = (all_ref_pics_intra << 31)
| (intra_pred_used << 30)
| (max_ref_per_pic << 26);
write_full_box(v, b"ccst", FULL_BOX_VERSION_0, FULL_BOX_FLAGS_NONE, |v| {
v.extend(packed_bits.to_be_bytes());
Ok(())
})?;
}
// uncompressed
"video/x-raw" => {
let all_ref_pics_intra = 1u32; // 0 = don't know, 1 = reference pictures are only intra
let intra_pred_used = 0u32; // 0 = no, 1 = yes, or maybe
let max_ref_per_pic = 0u32; // none
let packed_bits = (all_ref_pics_intra << 31)
| (intra_pred_used << 30)
| (max_ref_per_pic << 26);
write_full_box(v, b"ccst", FULL_BOX_VERSION_0, FULL_BOX_FLAGS_NONE, |v| {
v.extend(packed_bits.to_be_bytes());
Ok(())
})?;
}
_ => {
let all_ref_pics_intra = 0u32; // 0 = don't know, 1 = reference pictures are only intra
let intra_pred_used = 1u32; // 0 = no, 1 = yes, or maybe
let max_ref_per_pic = 15u32; // any number
let packed_bits = (all_ref_pics_intra << 31)
| (intra_pred_used << 30)
| (max_ref_per_pic << 26);
write_full_box(v, b"ccst", FULL_BOX_VERSION_0, FULL_BOX_FLAGS_NONE, |v| {
v.extend(packed_bits.to_be_bytes());
Ok(())
})?;
}
}
}
// TODO: write btrt bitrate box based on tags
Ok(())

View file

@ -14,6 +14,7 @@ use gst_base::prelude::*;
use gst_base::subclass::prelude::*;
use num_integer::Integer;
use std::collections::HashSet;
use std::collections::VecDeque;
use std::sync::Mutex;
@ -265,6 +266,18 @@ impl Stream {
10_000
}
}
fn image_sequence_mode(&self) -> bool {
let image_sequence = {
self.sinkpad
.imp()
.settings
.lock()
.unwrap()
.image_sequence_mode
};
image_sequence
}
}
#[derive(Default)]
@ -1620,13 +1633,54 @@ impl AggregatorImpl for MP4Mux {
// ... and then create the ftyp box plus mdat box header so we can start outputting
// actual data
let mut major_brand = b"iso4";
let mut minor_version = 0u32;
let mut compatible_brands: HashSet<&[u8; 4]> = HashSet::new();
let mut have_image_sequence = false; // we'll mark true if an image sequence
let mut have_only_image_sequence = true; // we'll mark false if video found
let variant = self.obj().class().as_ref().variant;
for stream in state.streams.iter().as_ref() {
let caps_structure = stream.caps.structure(0).unwrap();
if let (super::Variant::ISO, "video/x-av1") =
(variant, caps_structure.name().as_str())
{
minor_version = 1;
compatible_brands.insert(b"iso4");
compatible_brands.insert(b"av01");
}
if stream.image_sequence_mode() {
compatible_brands.insert(b"iso8");
compatible_brands.insert(b"unif");
compatible_brands.insert(b"msf1");
have_image_sequence = true;
} else {
match caps_structure.name().as_str() {
"video/x-h264" | "video/x-h265" | "video/x-vp8" | "video/x-vp9"
| "video/x-av1" | "image/jpeg" | "video/x-raw" => {
have_only_image_sequence = false;
}
_ => {}
}
match caps_structure.name().as_str() {
"video/x-h264" | "video/x-h265" | "video/x-vp8" | "video/x-vp9"
| "image/jpeg" | "video/x-raw" | "audio/mpeg" | "audio/x-opus"
| "audio/x-flac" | "audio/x-alaw" | "audio/x-mulaw" | "audio/x-adpcm" => {
compatible_brands.insert(b"mp41");
compatible_brands.insert(b"mp42");
compatible_brands.insert(b"isom");
}
_ => {}
}
}
}
if have_image_sequence && have_only_image_sequence {
major_brand = b"msf1";
}
let ftyp = boxes::create_ftyp(
self.obj().class().as_ref().variant,
&state
.streams
.iter()
.map(|s| s.caps.as_ref())
.collect::<Vec<_>>(),
major_brand,
minor_version,
Vec::from_iter(compatible_brands),
)
.map_err(|err| {
gst::error!(CAT, imp = self, "Failed to create ftyp box: {err}");
@ -1695,6 +1749,7 @@ impl AggregatorImpl for MP4Mux {
Vec::new()
}),
image_sequence: stream.image_sequence_mode(),
chunks: stream.chunks,
extra_header_data: stream.extra_header_data.clone(),
orientation: stream.orientation,
@ -2081,6 +2136,7 @@ impl MP4MuxImpl for ONVIFMP4Mux {
#[derive(Default, Clone)]
struct PadSettings {
trak_timescale: u32,
image_sequence_mode: bool,
}
#[derive(Default)]
@ -2098,11 +2154,18 @@ impl ObjectSubclass for MP4MuxPad {
impl ObjectImpl for MP4MuxPad {
fn properties() -> &'static [glib::ParamSpec] {
static PROPERTIES: LazyLock<Vec<glib::ParamSpec>> = LazyLock::new(|| {
vec![glib::ParamSpecUInt::builder("trak-timescale")
.nick("Track Timescale")
.blurb("Timescale to use for the track (units per second, 0 is automatic)")
.mutable_ready()
.build()]
vec![
glib::ParamSpecUInt::builder("trak-timescale")
.nick("Track Timescale")
.blurb("Timescale to use for the track (units per second, 0 is automatic)")
.mutable_ready()
.build(),
glib::ParamSpecBoolean::builder("image-sequence")
.nick("Generate image sequence")
.blurb("Generate ISO/IEC 23008-12 image sequence instead of video")
.mutable_ready()
.build(),
]
});
&PROPERTIES
@ -2115,6 +2178,11 @@ impl ObjectImpl for MP4MuxPad {
settings.trak_timescale = value.get().expect("type checked upstream");
}
"image-sequence" => {
let mut settings = self.settings.lock().unwrap();
settings.image_sequence_mode = value.get().expect("type checked upstream");
}
_ => unimplemented!(),
}
}
@ -2126,6 +2194,11 @@ impl ObjectImpl for MP4MuxPad {
settings.trak_timescale.to_value()
}
"image-sequence" => {
let settings = self.settings.lock().unwrap();
settings.image_sequence_mode.to_value()
}
_ => unimplemented!(),
}
}

View file

@ -208,6 +208,9 @@ pub(crate) struct Stream {
/// Edit list clipping information
elst_infos: Vec<ElstInfo>,
/// Whether this stream should be encoded as an ISO/IEC 23008-12 image sequence
image_sequence: bool,
}
#[derive(Debug)]

View file

@ -7,9 +7,12 @@
// SPDX-License-Identifier: MPL-2.0
//
use std::path::Path;
use std::{
fs,
io::{Cursor, Read},
path::Path,
};
use gst::prelude::*;
use gst_pbutils::prelude::*;
fn init() {
@ -63,6 +66,40 @@ impl Pipeline {
}
}
struct FileTypeBox {
major_brand: [u8; 4],
minor_version: u32,
compatible_brands: Vec<[u8; 4]>,
}
impl FileTypeBox {
fn read(mut reader: Cursor<&[u8]>) -> std::io::Result<FileTypeBox> {
let mut box_size_buf = [0u8; 4];
reader.read_exact(&mut box_size_buf)?;
let box_size = u32::from_be_bytes(box_size_buf);
let mut four_cc = [0u8; 4];
reader.read_exact(&mut four_cc)?;
assert_eq!(four_cc, *b"ftyp");
let mut major_brand = [0u8; 4];
reader.read_exact(&mut major_brand)?;
let mut minor_version_buf = [0u8; 4];
reader.read_exact(&mut minor_version_buf)?;
let minor_version = u32::from_be_bytes(minor_version_buf);
let num_brands = (box_size - 16) / 4;
let mut compatible_brands = Vec::with_capacity(num_brands.try_into().unwrap());
for _ in 0..num_brands {
let mut compatible_brand = [0u8; 4];
reader.read_exact(&mut compatible_brand)?;
compatible_brands.push(compatible_brand);
}
Ok(FileTypeBox {
major_brand,
minor_version,
compatible_brands,
})
}
}
fn test_basic_with(video_enc: &str, audio_enc: &str, cb: impl FnOnce(&Path)) {
let Ok(pipeline) = gst::parse::launch(&format!(
"videotestsrc num-buffers=99 ! {video_enc} ! mux. \
@ -212,7 +249,9 @@ fn test_roundtrip_uncompressed(video_format: &str, width: u32, height: u32) {
}
fn test_encode_uncompressed(video_format: &str, width: u32, height: u32) {
let pipeline_text = format!("videotestsrc num-buffers=250 ! video/x-raw,format={format},width={width},height={height} ! isomp4mux ! filesink location={format}_{width}x{height}.mp4", format = video_format);
let filename = format!("{video_format}_{width}x{height}.mp4");
let pipeline_text = format!("videotestsrc num-buffers=250 ! video/x-raw,format={video_format},width={width},height={height} ! isomp4mux ! filesink location={filename}");
let Ok(pipeline) = gst::parse::launch(&pipeline_text) else {
panic!("could not build encoding pipeline")
};
@ -238,6 +277,31 @@ fn test_encode_uncompressed(video_format: &str, width: u32, height: u32) {
pipeline
.set_state(gst::State::Null)
.expect("Unable to set the pipeline to the `Null` state");
test_expected_uncompressed_output(filename);
}
fn test_expected_uncompressed_output(filename: String) {
let check_data: Vec<u8> = fs::read(filename).unwrap();
let cursor = Cursor::new(check_data.as_ref());
test_default_mpeg_file_type_box(cursor);
}
fn test_expected_file_type_box(
expected_major_brand: &[u8; 4],
expected_minor_version: u32,
expected_compatible_brands: Vec<[u8; 4]>,
cursor: Cursor<&[u8]>,
) {
let ftyp = FileTypeBox::read(cursor).unwrap();
assert_eq!(ftyp.major_brand, *expected_major_brand);
assert_eq!(ftyp.minor_version, expected_minor_version);
let mut sorted_compatible_brands = ftyp.compatible_brands.clone();
sorted_compatible_brands.sort();
let mut sorted_expected_compatible_brands = expected_compatible_brands.clone();
sorted_expected_compatible_brands.sort();
assert_eq!(sorted_compatible_brands, sorted_expected_compatible_brands);
}
#[test]
@ -457,3 +521,123 @@ fn encode_uncompressed_bgrp() {
init();
test_encode_uncompressed("BGRP", 1275, 713);
}
fn test_encode_uncompressed_image_sequence(video_format: &str, width: u32, height: u32) {
let filename = format!("{video_format}_{width}x{height}.heifs");
let pipeline_text = format!("videotestsrc num-buffers=10 ! video/x-raw,format={video_format},width={width},height={height} ! isomp4mux name=mux ! filesink location={filename}");
let Ok(pipeline) = gst::parse::launch(&pipeline_text) else {
panic!("could not build encoding pipeline")
};
let pipeline = Pipeline(pipeline.downcast::<gst::Pipeline>().unwrap());
let mux = pipeline.by_name("mux").unwrap();
let sink_pad = &mux.sink_pads()[0];
sink_pad.set_property("image-sequence", true);
pipeline
.set_state(gst::State::Playing)
.expect("Unable to set the pipeline to the `Playing` state");
for msg in pipeline.bus().unwrap().iter_timed(gst::ClockTime::NONE) {
use gst::MessageView;
match msg.view() {
MessageView::Eos(..) => break,
MessageView::Error(err) => {
panic!(
"Error from {:?}: {} ({:?})",
err.src().map(|s| s.path_string()),
err.error(),
err.debug()
);
}
_ => (),
}
}
pipeline
.set_state(gst::State::Null)
.expect("Unable to set the pipeline to the `Null` state");
test_expected_image_sequence_output(filename);
}
fn test_expected_image_sequence_output(filename: String) {
let check_data: Vec<u8> = fs::read(filename).unwrap();
let cursor = Cursor::new(check_data.as_ref());
test_expected_image_sequence_file_type_box_content(cursor);
}
fn test_expected_image_sequence_file_type_box_content(cursor: Cursor<&[u8]>) {
let expected_major_brand = b"msf1";
let expected_minor_version = 0;
let expected_compatible_brands: Vec<[u8; 4]> = vec![*b"iso8", *b"msf1", *b"unif"];
test_expected_file_type_box(
expected_major_brand,
expected_minor_version,
expected_compatible_brands,
cursor,
);
}
#[test]
fn encode_uncompressed_image_sequence_rgb() {
init();
test_encode_uncompressed_image_sequence("RGB", 1275, 713);
}
#[test]
fn encode_uncompressed_image_sequence_nv12() {
init();
test_encode_uncompressed_image_sequence("NV12", 1275, 714);
}
#[test]
fn test_encode_audio_trak() {
init();
let filename = "audio_only.mp4".to_string();
let pipeline_text = format!("audiotestsrc num-buffers=100 ! audioconvert ! opusenc ! isomp4mux ! filesink location={filename}");
let Ok(pipeline) = gst::parse::launch(&pipeline_text) else {
panic!("could not build encoding pipeline")
};
pipeline
.set_state(gst::State::Playing)
.expect("Unable to set the pipeline to the `Playing` state");
for msg in pipeline.bus().unwrap().iter_timed(gst::ClockTime::NONE) {
use gst::MessageView;
match msg.view() {
MessageView::Eos(..) => break,
MessageView::Error(err) => {
panic!(
"Error from {:?}: {} ({:?})",
err.src().map(|s| s.path_string()),
err.error(),
err.debug()
);
}
_ => (),
}
}
pipeline
.set_state(gst::State::Null)
.expect("Unable to set the pipeline to the `Null` state");
test_audio_only_output(filename);
}
fn test_audio_only_output(filename: String) {
let check_data: Vec<u8> = fs::read(filename).unwrap();
let cursor = Cursor::new(check_data.as_ref());
test_default_mpeg_file_type_box(cursor);
}
fn test_default_mpeg_file_type_box(cursor: Cursor<&[u8]>) {
let expected_major_brand = b"iso4";
let expected_minor_version = 0;
let expected_compatible_brands: Vec<[u8; 4]> = vec![*b"isom", *b"mp41", *b"mp42"];
test_expected_file_type_box(
expected_major_brand,
expected_minor_version,
expected_compatible_brands,
cursor,
);
}