fmp4mux: Language and orientation are stream tags

The language tag is purely a stream specific tag and the orientation
tag can be both, stream or global scope at the same time.

Language tags in global scope are considered to be stream scope.

Orientation tags are also set in TKHD but combined with the global
orientation tag if available.

And switched to GStreamer API for image orientation tag storage and
handling and added the flip transformations.

Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs/-/merge_requests/2033>
This commit is contained in:
Jochen Henneberg 2024-11-28 10:08:40 +01:00 committed by GStreamer Marge Bot
parent b70ad7895f
commit 051bea47df
3 changed files with 194 additions and 132 deletions

View file

@ -11,7 +11,8 @@ use gst::prelude::*;
use anyhow::{anyhow, bail, Context, Error};
use std::convert::TryFrom;
use super::{Buffer, ImageOrientation, IDENTITY_MATRIX};
use super::Buffer;
use super::IDENTITY_MATRIX;
fn write_box<T, F: FnOnce(&mut Vec<u8>) -> Result<T, Error>>(
vec: &mut Vec<u8>,
@ -521,21 +522,7 @@ fn write_mvhd(
v.extend([0u8; 2 + 2 * 4]);
// Matrix
v.extend(
[
(1u32 << 16).to_be_bytes(),
0u32.to_be_bytes(),
0u32.to_be_bytes(),
0u32.to_be_bytes(),
(1u32 << 16).to_be_bytes(),
0u32.to_be_bytes(),
0u32.to_be_bytes(),
0u32.to_be_bytes(),
(16384u32 << 16).to_be_bytes(),
]
.into_iter()
.flatten(),
);
v.extend(IDENTITY_MATRIX.iter().flatten());
// Pre defined
v.extend([0u8; 6 * 4]);
@ -568,7 +555,7 @@ fn write_trak(
b"tkhd",
FULL_BOX_VERSION_1,
TKHD_FLAGS_TRACK_ENABLED | TKHD_FLAGS_TRACK_IN_MOVIE | TKHD_FLAGS_TRACK_IN_PREVIEW,
|v| write_tkhd(v, cfg, idx, stream, creation_time),
|v| write_tkhd(v, idx, stream, creation_time),
)?;
// TODO: write edts if necessary: for audio tracks to remove initialization samples
@ -585,7 +572,6 @@ fn write_trak(
fn write_tkhd(
v: &mut Vec<u8>,
cfg: &super::HeaderConfiguration,
idx: usize,
stream: &super::HeaderStream,
creation_time: u64,
@ -620,16 +606,8 @@ fn write_tkhd(
// Reserved
v.extend([0u8; 2]);
// Matrix
let matrix = match s.name().as_str() {
x if x.starts_with("video/") || x.starts_with("image/") => cfg
.orientation
.unwrap_or(ImageOrientation::Rotate0)
.transform_matrix(),
_ => &IDENTITY_MATRIX,
};
v.extend(matrix.iter().flatten());
// Per stream orientation matrix.
v.extend(stream.orientation.iter().flatten());
// Width/height
match s.name().as_str() {
@ -667,7 +645,7 @@ fn write_mdia(
creation_time: u64,
) -> Result<(), Error> {
write_full_box(v, b"mdhd", FULL_BOX_VERSION_1, FULL_BOX_FLAGS_NONE, |v| {
write_mdhd(v, cfg, stream, creation_time)
write_mdhd(v, stream, creation_time)
})?;
write_full_box(v, b"hdlr", FULL_BOX_VERSION_0, FULL_BOX_FLAGS_NONE, |v| {
write_hdlr(v, cfg, stream)
@ -710,7 +688,6 @@ fn language_code(lang: impl std::borrow::Borrow<[u8; 3]>) -> u16 {
fn write_mdhd(
v: &mut Vec<u8>,
cfg: &super::HeaderConfiguration,
stream: &super::HeaderStream,
creation_time: u64,
) -> Result<(), Error> {
@ -724,7 +701,7 @@ fn write_mdhd(
v.extend(0u64.to_be_bytes());
// Language as ISO-639-2/T
if let Some(lang) = cfg.language_code {
if let Some(lang) = stream.language_code {
v.extend(language_code(lang).to_be_bytes());
} else {
v.extend(language_code(b"und").to_be_bytes());

View file

@ -17,7 +17,7 @@ use std::mem;
use std::sync::Mutex;
use crate::fmp4mux::obu::read_seq_header_obu_bytes;
use crate::fmp4mux::ImageOrientation;
use crate::fmp4mux::TransformMatrix;
use once_cell::sync::Lazy;
use super::boxes;
@ -88,7 +88,7 @@ fn utc_time_to_running_time(
.and_then(|res| res.positive())
}
static CAT: Lazy<gst::DebugCategory> = Lazy::new(|| {
pub static CAT: Lazy<gst::DebugCategory> = Lazy::new(|| {
gst::DebugCategory::new(
"fmp4mux",
gst::DebugColorFlags::empty(),
@ -228,6 +228,12 @@ struct Stream {
running_time_utc_time_mapping: Option<(gst::Signed<gst::ClockTime>, gst::ClockTime)>,
extra_header_data: Option<Vec<u8>>,
/// Language code from tags
language_code: Option<[u8; 3]>,
/// Orientation from tags, stream orientation takes precedence over global orientation
global_orientation: &'static TransformMatrix,
stream_orientation: Option<&'static TransformMatrix>,
}
impl Stream {
@ -239,6 +245,22 @@ impl Stream {
self.pre_queue.clear();
self.running_time_utc_time_mapping = None;
}
fn orientation(&self) -> &'static TransformMatrix {
self.stream_orientation.unwrap_or(self.global_orientation)
}
fn parse_language_code(lang: &str) -> Option<[u8; 3]> {
if lang.len() == 3 && lang.chars().all(|c| c.is_ascii_lowercase()) {
let mut language_code: [u8; 3] = [0; 3];
for (out, c) in Iterator::zip(language_code.iter_mut(), lang.chars()) {
*out = c as u8;
}
Some(language_code)
} else {
None
}
}
}
#[derive(Default)]
@ -265,10 +287,6 @@ struct State {
end_pts: Option<gst::ClockTime>,
/// Start DTS of the whole stream
start_dts: Option<gst::ClockTime>,
/// Language code from tags
language_code: Option<[u8; 3]>,
/// Orientation from tags
orientation: Option<ImageOrientation>,
/// Start PTS of the current fragment
fragment_start_pts: Option<gst::ClockTime>,
@ -296,6 +314,9 @@ impl State {
self.fragment_start_pts = None;
self.chunk_start_pts = None;
}
fn mut_stream_from_pad(&mut self, pad: &gst_base::AggregatorPad) -> Option<&mut Stream> {
self.streams.iter_mut().find(|s| *pad == s.sinkpad)
}
}
#[derive(Default)]
@ -2657,6 +2678,38 @@ impl FMP4Mux {
}
};
// Check if language or orientation tags have already been
// received
let mut stream_orientation = Default::default();
let mut global_orientation = Default::default();
let mut language_code = None;
pad.sticky_events_foreach(|ev| {
if let gst::EventView::Tag(ev) = ev.view() {
let tag = ev.tag();
if let Some(l) = tag.get::<gst::tags::LanguageCode>() {
// There is no header field for global
// language code, maybe because it does not
// really make sense, global language tags are
// considered to be stream local
if tag.scope() == gst::TagScope::Global {
gst::info!(
CAT,
obj = pad,
"Language tags scoped 'global' are considered stream tags",
);
}
language_code = Stream::parse_language_code(l.get());
} else if tag.get::<gst::tags::ImageOrientation>().is_some() {
if tag.scope() == gst::TagScope::Global {
global_orientation = TransformMatrix::from_tag(self, ev);
} else {
stream_orientation = Some(TransformMatrix::from_tag(self, ev));
}
}
}
std::ops::ControlFlow::Continue(gst::EventForeachAction::Keep)
});
gst::info!(CAT, obj = pad, "Configuring caps {:?}", caps);
let s = caps.structure(0).unwrap();
@ -2737,6 +2790,9 @@ impl FMP4Mux {
current_position: gst::ClockTime::ZERO,
running_time_utc_time_mapping: None,
extra_header_data: None,
language_code,
global_orientation,
stream_orientation,
});
}
@ -2805,6 +2861,8 @@ impl FMP4Mux {
delta_frames: s.delta_frames,
caps: s.caps.clone(),
extra_header_data: s.extra_header_data.clone(),
language_code: s.language_code,
orientation: s.orientation(),
})
.collect::<Vec<_>>();
@ -2815,8 +2873,6 @@ impl FMP4Mux {
streams,
write_mehd: settings.write_mehd,
duration: if at_eos { duration } else { None },
language_code: state.language_code,
orientation: state.orientation,
start_utc_time: if variant == super::Variant::ONVIF {
state
.earliest_pts
@ -3251,7 +3307,8 @@ impl AggregatorImpl for FMP4Mux {
self.parent_sink_event(aggregator_pad, event)
}
EventView::Tag(ev) => {
if let Some(tag_value) = ev.tag().get::<gst::tags::LanguageCode>() {
let tag = ev.tag();
if let Some(tag_value) = tag.get::<gst::tags::LanguageCode>() {
let lang = tag_value.get();
gst::trace!(
CAT,
@ -3261,16 +3318,23 @@ impl AggregatorImpl for FMP4Mux {
);
// Language as ISO-639-2/T
if lang.len() == 3 && lang.chars().all(|c| c.is_ascii_lowercase()) {
if let Some(language_code) = Stream::parse_language_code(lang) {
let mut state = self.state.lock().unwrap();
let mut language_code: [u8; 3] = [0; 3];
for (out, c) in Iterator::zip(language_code.iter_mut(), lang.chars()) {
*out = c as u8;
if !state.streams.is_empty() {
if tag.scope() == gst::TagScope::Global {
gst::info!(
CAT,
imp = self,
"Language tags scoped 'global' are considered stream tags",
);
}
let stream = state.mut_stream_from_pad(aggregator_pad).unwrap();
stream.language_code = Some(language_code);
}
state.language_code = Some(language_code);
}
} else if let Some(tag_value) = ev.tag().get::<gst::tags::ImageOrientation>() {
} else if let Some(tag_value) = tag.get::<gst::tags::ImageOrientation>() {
let orientation = tag_value.get();
gst::trace!(
CAT,
@ -3280,17 +3344,14 @@ impl AggregatorImpl for FMP4Mux {
);
let mut state = self.state.lock().unwrap();
state.orientation = match orientation {
"rotate-0" => Some(ImageOrientation::Rotate0),
"rotate-90" => Some(ImageOrientation::Rotate90),
"rotate-180" => Some(ImageOrientation::Rotate180),
"rotate-270" => Some(ImageOrientation::Rotate270),
// TODO:
// "flip-rotate-0" => Some(ImageOrientation::FlipRotate0),
// "flip-rotate-90" => Some(ImageOrientation::FlipRotate90),
// "flip-rotate-180" => Some(ImageOrientation::FlipRotate180),
// "flip-rotate-270" => Some(ImageOrientation::FlipRotate270),
_ => None,
if !state.streams.is_empty() {
let stream = state.mut_stream_from_pad(aggregator_pad).unwrap();
if tag.scope() == gst::TagScope::Stream {
stream.stream_orientation = Some(TransformMatrix::from_tag(self, ev));
} else {
stream.global_orientation = TransformMatrix::from_tag(self, ev);
}
}
}
@ -4022,10 +4083,8 @@ impl AggregatorPadImpl for FMP4MuxPad {
let mux = aggregator.downcast_ref::<super::FMP4Mux>().unwrap();
let mut mux_state = mux.imp().state.lock().unwrap();
if let Some(stream) = mux_state
.streams
.iter_mut()
.find(|s| s.sinkpad == *self.obj())
if let Some(stream) =
mux_state.mut_stream_from_pad(self.obj().upcast_ref::<gst_base::AggregatorPad>())
{
stream.flush();
}

View file

@ -6,8 +6,10 @@
//
// SPDX-License-Identifier: MPL-2.0
use crate::fmp4mux::imp::CAT;
use gst::glib;
use gst::prelude::*;
use gst::subclass::prelude::*;
mod boxes;
mod imp;
@ -73,80 +75,102 @@ pub fn register(plugin: &gst::Plugin) -> Result<(), glib::BoolError> {
Ok(())
}
#[derive(Debug, Copy, Clone)]
pub(crate) enum ImageOrientation {
Rotate0,
Rotate90,
Rotate180,
Rotate270,
// TODO:
// FlipRotate0,
// FlipRotate90,
// FlipRotate180,
// FlipRotate270,
}
#[derive(Debug)]
pub(crate) struct TransformMatrix([[u8; 4]; 9]);
type TransformMatrix = [[u8; 4]; 9];
impl std::ops::Deref for TransformMatrix {
type Target = [[u8; 4]; 9];
const IDENTITY_MATRIX: TransformMatrix = [
(1u32 << 16).to_be_bytes(),
0u32.to_be_bytes(),
0u32.to_be_bytes(),
0u32.to_be_bytes(),
(1u32 << 16).to_be_bytes(),
0u32.to_be_bytes(),
0u32.to_be_bytes(),
0u32.to_be_bytes(),
(1u32 << 30).to_be_bytes(),
];
const ROTATE_90_MATRIX: TransformMatrix = [
0u32.to_be_bytes(),
(1u32 << 16).to_be_bytes(),
0u32.to_be_bytes(),
(-1i32 << 16).to_be_bytes(),
0u32.to_be_bytes(),
0u32.to_be_bytes(),
0u32.to_be_bytes(),
0u32.to_be_bytes(),
(1u32 << 30).to_be_bytes(),
];
const ROTATE_180_MATRIX: TransformMatrix = [
(-1i32 << 16).to_be_bytes(),
0u32.to_be_bytes(),
0u32.to_be_bytes(),
0u32.to_be_bytes(),
(-1i32 << 16).to_be_bytes(),
0u32.to_be_bytes(),
0u32.to_be_bytes(),
0u32.to_be_bytes(),
(1u32 << 30).to_be_bytes(),
];
const ROTATE_270_MATRIX: TransformMatrix = [
0u32.to_be_bytes(),
(-1i32 << 16).to_be_bytes(),
0u32.to_be_bytes(),
(1u32 << 16).to_be_bytes(),
0u32.to_be_bytes(),
0u32.to_be_bytes(),
0u32.to_be_bytes(),
0u32.to_be_bytes(),
(1u32 << 30).to_be_bytes(),
];
impl ImageOrientation {
pub(crate) fn transform_matrix(&self) -> &'static TransformMatrix {
match self {
ImageOrientation::Rotate0 => &IDENTITY_MATRIX,
ImageOrientation::Rotate90 => &ROTATE_90_MATRIX,
ImageOrientation::Rotate180 => &ROTATE_180_MATRIX,
ImageOrientation::Rotate270 => &ROTATE_270_MATRIX,
}
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl Default for &TransformMatrix {
fn default() -> &'static TransformMatrix {
&IDENTITY_MATRIX
}
}
impl TransformMatrix {
fn from_tag(obj: &impl ObjectSubclass, tag: &gst::event::Tag) -> &'static TransformMatrix {
gst_video::VideoOrientationMethod::from_tag(tag.tag()).map_or(Default::default(), {
|orientation| match orientation {
gst_video::VideoOrientationMethod::Identity => &IDENTITY_MATRIX,
gst_video::VideoOrientationMethod::_90r => &ROTATE_90R_MATRIX,
gst_video::VideoOrientationMethod::_180 => &ROTATE_180_MATRIX,
gst_video::VideoOrientationMethod::_90l => &ROTATE_90L_MATRIX,
gst_video::VideoOrientationMethod::Horiz => &FLIP_HORZ_MATRIX,
gst_video::VideoOrientationMethod::Vert => &FLIP_VERT_MATRIX,
gst_video::VideoOrientationMethod::UrLl => &FLIP_ROTATE_90R_MATRIX,
gst_video::VideoOrientationMethod::UlLr => &FLIP_ROTATE_90L_MATRIX,
_ => {
gst::info!(
CAT,
imp = obj,
"Orientation {:?} not yet supported",
orientation
);
&IDENTITY_MATRIX
}
}
})
}
}
macro_rules! tm {
( $($v:expr),* ) => {
TransformMatrix([
$(
(($v << 16) as i32).to_be_bytes(),
)*
])
}
}
// Point (p, q, 1) -> (p', q')
// Matrix (a, b, u,
// c, d, v,
// x, y, w)
// Where a, b, c, d, x, y are FP 16.16 and u, v, w are FP 2.30
// m = ap + cq + x
// n = bp + dq + y
// z = up + vq + w
// p' = m/z
// q' = n/z
#[rustfmt::skip]
const IDENTITY_MATRIX: TransformMatrix = tm!(1, 0, 0,
0, 1, 0,
0, 0, (1 << 14));
#[rustfmt::skip]
const FLIP_VERT_MATRIX: TransformMatrix = tm!(1, 0, 0,
0, -1, 0,
0, 0, (1 << 14));
#[rustfmt::skip]
const FLIP_HORZ_MATRIX: TransformMatrix = tm!(-1, 0, 0,
0, 1, 0,
0, 0, (1 << 14));
#[rustfmt::skip]
const ROTATE_90R_MATRIX: TransformMatrix = tm!( 0, 1, 0,
-1, 0, 0,
0, 0, (1 << 14));
#[rustfmt::skip]
const ROTATE_180_MATRIX: TransformMatrix = tm!(-1, 0, 0,
0, -1, 0,
0, 0, (1 << 14));
#[rustfmt::skip]
const ROTATE_90L_MATRIX: TransformMatrix = tm!(0, -1, 0,
1, 0, 0,
0, 0, (1 << 14));
#[rustfmt::skip]
const FLIP_ROTATE_90R_MATRIX: TransformMatrix = tm!( 0, -1, 0,
-1, 0, 0,
0, 0, (1 << 14));
#[rustfmt::skip]
const FLIP_ROTATE_90L_MATRIX: TransformMatrix = tm!(0, 1, 0,
1, 0, 0,
0, 0, (1 << 14));
#[derive(Debug)]
pub(crate) struct HeaderConfiguration {
variant: Variant,
@ -161,8 +185,6 @@ pub(crate) struct HeaderConfiguration {
write_mehd: bool,
duration: Option<gst::ClockTime>,
language_code: Option<[u8; 3]>,
orientation: Option<ImageOrientation>,
/// Start UTC time in ONVIF mode.
/// Since Jan 1 1601 in 100ns units.
@ -182,6 +204,10 @@ pub(crate) struct HeaderStream {
// More data to be included in the fragmented stream header
extra_header_data: Option<Vec<u8>>,
// Tags meta for audio language and video orientation
language_code: Option<[u8; 3]>,
orientation: &'static TransformMatrix,
}
#[derive(Debug)]