mirror of
https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs.git
synced 2025-09-03 02:03:48 +00:00
mux/mp4: add image sequence mode
Image sequence is defined in ISO/IEC 23008-12 (HEIF), as a variant of video. The key difference is that image sequence timing is advisory. Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs/-/merge_requests/2101>
This commit is contained in:
parent
130e13a33e
commit
2130c3bfbe
5 changed files with 341 additions and 34 deletions
|
@ -5219,6 +5219,18 @@
|
|||
],
|
||||
"kind": "object",
|
||||
"properties": {
|
||||
"image-sequence": {
|
||||
"blurb": "Generate ISO/IEC 23008-12 image sequence instead of video",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "false",
|
||||
"mutable": "ready",
|
||||
"readable": true,
|
||||
"type": "gboolean",
|
||||
"writable": true
|
||||
},
|
||||
"trak-timescale": {
|
||||
"blurb": "Timescale to use for the track (units per second, 0 is automatic)",
|
||||
"conditionally-available": false,
|
||||
|
|
|
@ -69,29 +69,14 @@ fn write_full_box<T, F: FnOnce(&mut Vec<u8>) -> Result<T, Error>>(
|
|||
|
||||
/// Creates `ftyp` box
|
||||
pub(super) fn create_ftyp(
|
||||
variant: super::Variant,
|
||||
content_caps: &[&gst::CapsRef],
|
||||
major_brand: &[u8; 4],
|
||||
minor_version: u32,
|
||||
compatible_brands: Vec<&[u8; 4]>,
|
||||
) -> Result<gst::Buffer, Error> {
|
||||
let mut v = vec![];
|
||||
let mut minor_version = 0u32;
|
||||
|
||||
let (brand, mut compatible_brands) = match variant {
|
||||
super::Variant::ISO | super::Variant::ONVIF => (b"iso4", vec![b"mp41", b"mp42", b"isom"]),
|
||||
};
|
||||
|
||||
for caps in content_caps {
|
||||
let s = caps.structure(0).unwrap();
|
||||
if let (super::Variant::ISO, "video/x-av1") = (variant, s.name().as_str()) {
|
||||
minor_version = 1;
|
||||
compatible_brands = vec![b"iso4", b"av01"];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
write_box(&mut v, b"ftyp", |v| {
|
||||
// major brand
|
||||
v.extend(brand);
|
||||
// minor version
|
||||
v.extend(major_brand);
|
||||
v.extend(minor_version.to_be_bytes());
|
||||
// compatible brands
|
||||
v.extend(compatible_brands.into_iter().flatten());
|
||||
|
@ -494,7 +479,14 @@ fn write_hdlr(
|
|||
let s = stream.caps.structure(0).unwrap();
|
||||
let (handler_type, name) = match s.name().as_str() {
|
||||
"video/x-h264" | "video/x-h265" | "video/x-vp8" | "video/x-vp9" | "video/x-av1"
|
||||
| "image/jpeg" | "video/x-raw" => (b"vide", b"VideoHandler\0".as_slice()),
|
||||
| "image/jpeg" | "video/x-raw" => {
|
||||
if stream.image_sequence {
|
||||
// See ISO/IEC 23008-12:2022 Section 7.2.2
|
||||
(b"pict", b"PictureHandler\0".as_slice())
|
||||
} else {
|
||||
(b"vide", b"VideoHandler\0".as_slice())
|
||||
}
|
||||
}
|
||||
"audio/mpeg" | "audio/x-opus" | "audio/x-flac" | "audio/x-alaw" | "audio/x-mulaw"
|
||||
| "audio/x-adpcm" => (b"soun", b"SoundHandler\0".as_slice()),
|
||||
"application/x-onvif-metadata" => (b"meta", b"MetadataHandler\0".as_slice()),
|
||||
|
@ -1052,6 +1044,49 @@ fn write_visual_sample_entry(
|
|||
})?;
|
||||
}
|
||||
|
||||
if stream.image_sequence {
|
||||
match s.name().as_str() {
|
||||
// intra formats
|
||||
"video/x-vp9" | "video/x-vp8" | "image/jpeg" => {
|
||||
let all_ref_pics_intra = 1u32; // 0 = don't know, 1 = reference pictures are only intra
|
||||
let intra_pred_used = 1u32; // 0 = no, 1 = yes, or maybe
|
||||
let max_ref_per_pic = 0u32; // none number
|
||||
let packed_bits = (all_ref_pics_intra << 31)
|
||||
| (intra_pred_used << 30)
|
||||
| (max_ref_per_pic << 26);
|
||||
write_full_box(v, b"ccst", FULL_BOX_VERSION_0, FULL_BOX_FLAGS_NONE, |v| {
|
||||
v.extend(packed_bits.to_be_bytes());
|
||||
Ok(())
|
||||
})?;
|
||||
}
|
||||
// uncompressed
|
||||
"video/x-raw" => {
|
||||
let all_ref_pics_intra = 1u32; // 0 = don't know, 1 = reference pictures are only intra
|
||||
let intra_pred_used = 0u32; // 0 = no, 1 = yes, or maybe
|
||||
let max_ref_per_pic = 0u32; // none
|
||||
let packed_bits = (all_ref_pics_intra << 31)
|
||||
| (intra_pred_used << 30)
|
||||
| (max_ref_per_pic << 26);
|
||||
write_full_box(v, b"ccst", FULL_BOX_VERSION_0, FULL_BOX_FLAGS_NONE, |v| {
|
||||
v.extend(packed_bits.to_be_bytes());
|
||||
Ok(())
|
||||
})?;
|
||||
}
|
||||
_ => {
|
||||
let all_ref_pics_intra = 0u32; // 0 = don't know, 1 = reference pictures are only intra
|
||||
let intra_pred_used = 1u32; // 0 = no, 1 = yes, or maybe
|
||||
let max_ref_per_pic = 15u32; // any number
|
||||
let packed_bits = (all_ref_pics_intra << 31)
|
||||
| (intra_pred_used << 30)
|
||||
| (max_ref_per_pic << 26);
|
||||
write_full_box(v, b"ccst", FULL_BOX_VERSION_0, FULL_BOX_FLAGS_NONE, |v| {
|
||||
v.extend(packed_bits.to_be_bytes());
|
||||
Ok(())
|
||||
})?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: write btrt bitrate box based on tags
|
||||
|
||||
Ok(())
|
||||
|
|
|
@ -14,6 +14,7 @@ use gst_base::prelude::*;
|
|||
use gst_base::subclass::prelude::*;
|
||||
|
||||
use num_integer::Integer;
|
||||
use std::collections::HashSet;
|
||||
use std::collections::VecDeque;
|
||||
use std::sync::Mutex;
|
||||
|
||||
|
@ -265,6 +266,18 @@ impl Stream {
|
|||
10_000
|
||||
}
|
||||
}
|
||||
|
||||
fn image_sequence_mode(&self) -> bool {
|
||||
let image_sequence = {
|
||||
self.sinkpad
|
||||
.imp()
|
||||
.settings
|
||||
.lock()
|
||||
.unwrap()
|
||||
.image_sequence_mode
|
||||
};
|
||||
image_sequence
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
|
@ -1620,13 +1633,54 @@ impl AggregatorImpl for MP4Mux {
|
|||
|
||||
// ... and then create the ftyp box plus mdat box header so we can start outputting
|
||||
// actual data
|
||||
|
||||
let mut major_brand = b"iso4";
|
||||
let mut minor_version = 0u32;
|
||||
let mut compatible_brands: HashSet<&[u8; 4]> = HashSet::new();
|
||||
let mut have_image_sequence = false; // we'll mark true if an image sequence
|
||||
let mut have_only_image_sequence = true; // we'll mark false if video found
|
||||
let variant = self.obj().class().as_ref().variant;
|
||||
for stream in state.streams.iter().as_ref() {
|
||||
let caps_structure = stream.caps.structure(0).unwrap();
|
||||
if let (super::Variant::ISO, "video/x-av1") =
|
||||
(variant, caps_structure.name().as_str())
|
||||
{
|
||||
minor_version = 1;
|
||||
compatible_brands.insert(b"iso4");
|
||||
compatible_brands.insert(b"av01");
|
||||
}
|
||||
if stream.image_sequence_mode() {
|
||||
compatible_brands.insert(b"iso8");
|
||||
compatible_brands.insert(b"unif");
|
||||
compatible_brands.insert(b"msf1");
|
||||
have_image_sequence = true;
|
||||
} else {
|
||||
match caps_structure.name().as_str() {
|
||||
"video/x-h264" | "video/x-h265" | "video/x-vp8" | "video/x-vp9"
|
||||
| "video/x-av1" | "image/jpeg" | "video/x-raw" => {
|
||||
have_only_image_sequence = false;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
match caps_structure.name().as_str() {
|
||||
"video/x-h264" | "video/x-h265" | "video/x-vp8" | "video/x-vp9"
|
||||
| "image/jpeg" | "video/x-raw" | "audio/mpeg" | "audio/x-opus"
|
||||
| "audio/x-flac" | "audio/x-alaw" | "audio/x-mulaw" | "audio/x-adpcm" => {
|
||||
compatible_brands.insert(b"mp41");
|
||||
compatible_brands.insert(b"mp42");
|
||||
compatible_brands.insert(b"isom");
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
if have_image_sequence && have_only_image_sequence {
|
||||
major_brand = b"msf1";
|
||||
}
|
||||
let ftyp = boxes::create_ftyp(
|
||||
self.obj().class().as_ref().variant,
|
||||
&state
|
||||
.streams
|
||||
.iter()
|
||||
.map(|s| s.caps.as_ref())
|
||||
.collect::<Vec<_>>(),
|
||||
major_brand,
|
||||
minor_version,
|
||||
Vec::from_iter(compatible_brands),
|
||||
)
|
||||
.map_err(|err| {
|
||||
gst::error!(CAT, imp = self, "Failed to create ftyp box: {err}");
|
||||
|
@ -1695,6 +1749,7 @@ impl AggregatorImpl for MP4Mux {
|
|||
|
||||
Vec::new()
|
||||
}),
|
||||
image_sequence: stream.image_sequence_mode(),
|
||||
chunks: stream.chunks,
|
||||
extra_header_data: stream.extra_header_data.clone(),
|
||||
orientation: stream.orientation,
|
||||
|
@ -2081,6 +2136,7 @@ impl MP4MuxImpl for ONVIFMP4Mux {
|
|||
#[derive(Default, Clone)]
|
||||
struct PadSettings {
|
||||
trak_timescale: u32,
|
||||
image_sequence_mode: bool,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
|
@ -2098,11 +2154,18 @@ impl ObjectSubclass for MP4MuxPad {
|
|||
impl ObjectImpl for MP4MuxPad {
|
||||
fn properties() -> &'static [glib::ParamSpec] {
|
||||
static PROPERTIES: LazyLock<Vec<glib::ParamSpec>> = LazyLock::new(|| {
|
||||
vec![glib::ParamSpecUInt::builder("trak-timescale")
|
||||
.nick("Track Timescale")
|
||||
.blurb("Timescale to use for the track (units per second, 0 is automatic)")
|
||||
.mutable_ready()
|
||||
.build()]
|
||||
vec![
|
||||
glib::ParamSpecUInt::builder("trak-timescale")
|
||||
.nick("Track Timescale")
|
||||
.blurb("Timescale to use for the track (units per second, 0 is automatic)")
|
||||
.mutable_ready()
|
||||
.build(),
|
||||
glib::ParamSpecBoolean::builder("image-sequence")
|
||||
.nick("Generate image sequence")
|
||||
.blurb("Generate ISO/IEC 23008-12 image sequence instead of video")
|
||||
.mutable_ready()
|
||||
.build(),
|
||||
]
|
||||
});
|
||||
|
||||
&PROPERTIES
|
||||
|
@ -2115,6 +2178,11 @@ impl ObjectImpl for MP4MuxPad {
|
|||
settings.trak_timescale = value.get().expect("type checked upstream");
|
||||
}
|
||||
|
||||
"image-sequence" => {
|
||||
let mut settings = self.settings.lock().unwrap();
|
||||
settings.image_sequence_mode = value.get().expect("type checked upstream");
|
||||
}
|
||||
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
@ -2126,6 +2194,11 @@ impl ObjectImpl for MP4MuxPad {
|
|||
settings.trak_timescale.to_value()
|
||||
}
|
||||
|
||||
"image-sequence" => {
|
||||
let settings = self.settings.lock().unwrap();
|
||||
settings.image_sequence_mode.to_value()
|
||||
}
|
||||
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -208,6 +208,9 @@ pub(crate) struct Stream {
|
|||
|
||||
/// Edit list clipping information
|
||||
elst_infos: Vec<ElstInfo>,
|
||||
|
||||
/// Whether this stream should be encoded as an ISO/IEC 23008-12 image sequence
|
||||
image_sequence: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
|
|
|
@ -7,9 +7,12 @@
|
|||
// SPDX-License-Identifier: MPL-2.0
|
||||
//
|
||||
|
||||
use std::path::Path;
|
||||
use std::{
|
||||
fs,
|
||||
io::{Cursor, Read},
|
||||
path::Path,
|
||||
};
|
||||
|
||||
use gst::prelude::*;
|
||||
use gst_pbutils::prelude::*;
|
||||
|
||||
fn init() {
|
||||
|
@ -63,6 +66,40 @@ impl Pipeline {
|
|||
}
|
||||
}
|
||||
|
||||
struct FileTypeBox {
|
||||
major_brand: [u8; 4],
|
||||
minor_version: u32,
|
||||
compatible_brands: Vec<[u8; 4]>,
|
||||
}
|
||||
|
||||
impl FileTypeBox {
|
||||
fn read(mut reader: Cursor<&[u8]>) -> std::io::Result<FileTypeBox> {
|
||||
let mut box_size_buf = [0u8; 4];
|
||||
reader.read_exact(&mut box_size_buf)?;
|
||||
let box_size = u32::from_be_bytes(box_size_buf);
|
||||
let mut four_cc = [0u8; 4];
|
||||
reader.read_exact(&mut four_cc)?;
|
||||
assert_eq!(four_cc, *b"ftyp");
|
||||
let mut major_brand = [0u8; 4];
|
||||
reader.read_exact(&mut major_brand)?;
|
||||
let mut minor_version_buf = [0u8; 4];
|
||||
reader.read_exact(&mut minor_version_buf)?;
|
||||
let minor_version = u32::from_be_bytes(minor_version_buf);
|
||||
let num_brands = (box_size - 16) / 4;
|
||||
let mut compatible_brands = Vec::with_capacity(num_brands.try_into().unwrap());
|
||||
for _ in 0..num_brands {
|
||||
let mut compatible_brand = [0u8; 4];
|
||||
reader.read_exact(&mut compatible_brand)?;
|
||||
compatible_brands.push(compatible_brand);
|
||||
}
|
||||
Ok(FileTypeBox {
|
||||
major_brand,
|
||||
minor_version,
|
||||
compatible_brands,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn test_basic_with(video_enc: &str, audio_enc: &str, cb: impl FnOnce(&Path)) {
|
||||
let Ok(pipeline) = gst::parse::launch(&format!(
|
||||
"videotestsrc num-buffers=99 ! {video_enc} ! mux. \
|
||||
|
@ -212,7 +249,9 @@ fn test_roundtrip_uncompressed(video_format: &str, width: u32, height: u32) {
|
|||
}
|
||||
|
||||
fn test_encode_uncompressed(video_format: &str, width: u32, height: u32) {
|
||||
let pipeline_text = format!("videotestsrc num-buffers=250 ! video/x-raw,format={format},width={width},height={height} ! isomp4mux ! filesink location={format}_{width}x{height}.mp4", format = video_format);
|
||||
let filename = format!("{video_format}_{width}x{height}.mp4");
|
||||
let pipeline_text = format!("videotestsrc num-buffers=250 ! video/x-raw,format={video_format},width={width},height={height} ! isomp4mux ! filesink location={filename}");
|
||||
|
||||
let Ok(pipeline) = gst::parse::launch(&pipeline_text) else {
|
||||
panic!("could not build encoding pipeline")
|
||||
};
|
||||
|
@ -238,6 +277,31 @@ fn test_encode_uncompressed(video_format: &str, width: u32, height: u32) {
|
|||
pipeline
|
||||
.set_state(gst::State::Null)
|
||||
.expect("Unable to set the pipeline to the `Null` state");
|
||||
|
||||
test_expected_uncompressed_output(filename);
|
||||
}
|
||||
|
||||
fn test_expected_uncompressed_output(filename: String) {
|
||||
let check_data: Vec<u8> = fs::read(filename).unwrap();
|
||||
let cursor = Cursor::new(check_data.as_ref());
|
||||
test_default_mpeg_file_type_box(cursor);
|
||||
}
|
||||
|
||||
fn test_expected_file_type_box(
|
||||
expected_major_brand: &[u8; 4],
|
||||
expected_minor_version: u32,
|
||||
expected_compatible_brands: Vec<[u8; 4]>,
|
||||
cursor: Cursor<&[u8]>,
|
||||
) {
|
||||
let ftyp = FileTypeBox::read(cursor).unwrap();
|
||||
|
||||
assert_eq!(ftyp.major_brand, *expected_major_brand);
|
||||
assert_eq!(ftyp.minor_version, expected_minor_version);
|
||||
let mut sorted_compatible_brands = ftyp.compatible_brands.clone();
|
||||
sorted_compatible_brands.sort();
|
||||
let mut sorted_expected_compatible_brands = expected_compatible_brands.clone();
|
||||
sorted_expected_compatible_brands.sort();
|
||||
assert_eq!(sorted_compatible_brands, sorted_expected_compatible_brands);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -457,3 +521,123 @@ fn encode_uncompressed_bgrp() {
|
|||
init();
|
||||
test_encode_uncompressed("BGRP", 1275, 713);
|
||||
}
|
||||
|
||||
fn test_encode_uncompressed_image_sequence(video_format: &str, width: u32, height: u32) {
|
||||
let filename = format!("{video_format}_{width}x{height}.heifs");
|
||||
let pipeline_text = format!("videotestsrc num-buffers=10 ! video/x-raw,format={video_format},width={width},height={height} ! isomp4mux name=mux ! filesink location={filename}");
|
||||
|
||||
let Ok(pipeline) = gst::parse::launch(&pipeline_text) else {
|
||||
panic!("could not build encoding pipeline")
|
||||
};
|
||||
let pipeline = Pipeline(pipeline.downcast::<gst::Pipeline>().unwrap());
|
||||
let mux = pipeline.by_name("mux").unwrap();
|
||||
let sink_pad = &mux.sink_pads()[0];
|
||||
sink_pad.set_property("image-sequence", true);
|
||||
pipeline
|
||||
.set_state(gst::State::Playing)
|
||||
.expect("Unable to set the pipeline to the `Playing` state");
|
||||
for msg in pipeline.bus().unwrap().iter_timed(gst::ClockTime::NONE) {
|
||||
use gst::MessageView;
|
||||
|
||||
match msg.view() {
|
||||
MessageView::Eos(..) => break,
|
||||
MessageView::Error(err) => {
|
||||
panic!(
|
||||
"Error from {:?}: {} ({:?})",
|
||||
err.src().map(|s| s.path_string()),
|
||||
err.error(),
|
||||
err.debug()
|
||||
);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
pipeline
|
||||
.set_state(gst::State::Null)
|
||||
.expect("Unable to set the pipeline to the `Null` state");
|
||||
|
||||
test_expected_image_sequence_output(filename);
|
||||
}
|
||||
|
||||
fn test_expected_image_sequence_output(filename: String) {
|
||||
let check_data: Vec<u8> = fs::read(filename).unwrap();
|
||||
let cursor = Cursor::new(check_data.as_ref());
|
||||
test_expected_image_sequence_file_type_box_content(cursor);
|
||||
}
|
||||
|
||||
fn test_expected_image_sequence_file_type_box_content(cursor: Cursor<&[u8]>) {
|
||||
let expected_major_brand = b"msf1";
|
||||
let expected_minor_version = 0;
|
||||
let expected_compatible_brands: Vec<[u8; 4]> = vec![*b"iso8", *b"msf1", *b"unif"];
|
||||
test_expected_file_type_box(
|
||||
expected_major_brand,
|
||||
expected_minor_version,
|
||||
expected_compatible_brands,
|
||||
cursor,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_uncompressed_image_sequence_rgb() {
|
||||
init();
|
||||
test_encode_uncompressed_image_sequence("RGB", 1275, 713);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_uncompressed_image_sequence_nv12() {
|
||||
init();
|
||||
test_encode_uncompressed_image_sequence("NV12", 1275, 714);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_audio_trak() {
|
||||
init();
|
||||
let filename = "audio_only.mp4".to_string();
|
||||
let pipeline_text = format!("audiotestsrc num-buffers=100 ! audioconvert ! opusenc ! isomp4mux ! filesink location={filename}");
|
||||
|
||||
let Ok(pipeline) = gst::parse::launch(&pipeline_text) else {
|
||||
panic!("could not build encoding pipeline")
|
||||
};
|
||||
pipeline
|
||||
.set_state(gst::State::Playing)
|
||||
.expect("Unable to set the pipeline to the `Playing` state");
|
||||
for msg in pipeline.bus().unwrap().iter_timed(gst::ClockTime::NONE) {
|
||||
use gst::MessageView;
|
||||
|
||||
match msg.view() {
|
||||
MessageView::Eos(..) => break,
|
||||
MessageView::Error(err) => {
|
||||
panic!(
|
||||
"Error from {:?}: {} ({:?})",
|
||||
err.src().map(|s| s.path_string()),
|
||||
err.error(),
|
||||
err.debug()
|
||||
);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
pipeline
|
||||
.set_state(gst::State::Null)
|
||||
.expect("Unable to set the pipeline to the `Null` state");
|
||||
|
||||
test_audio_only_output(filename);
|
||||
}
|
||||
|
||||
fn test_audio_only_output(filename: String) {
|
||||
let check_data: Vec<u8> = fs::read(filename).unwrap();
|
||||
let cursor = Cursor::new(check_data.as_ref());
|
||||
test_default_mpeg_file_type_box(cursor);
|
||||
}
|
||||
|
||||
fn test_default_mpeg_file_type_box(cursor: Cursor<&[u8]>) {
|
||||
let expected_major_brand = b"iso4";
|
||||
let expected_minor_version = 0;
|
||||
let expected_compatible_brands: Vec<[u8; 4]> = vec![*b"isom", *b"mp41", *b"mp42"];
|
||||
test_expected_file_type_box(
|
||||
expected_major_brand,
|
||||
expected_minor_version,
|
||||
expected_compatible_brands,
|
||||
cursor,
|
||||
);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue