From eac3d14f85ec9a9b04b825a6b9920b5893422c2c Mon Sep 17 00:00:00 2001 From: William Kelleher Date: Thu, 1 Feb 2024 20:40:07 -0600 Subject: [PATCH] Add Opus and dOps box parsing Channel mapping table is still TBD --- examples/mp4copy.rs | 10 +- src/mp4box/mod.rs | 5 +- src/mp4box/opus.rs | 274 ++++++++++++++++++++++++++++++++++++++++++++ src/mp4box/stsd.rs | 10 +- src/track.rs | 28 ++++- src/types.rs | 47 ++++++++ 6 files changed, 368 insertions(+), 6 deletions(-) create mode 100644 src/mp4box/opus.rs diff --git a/examples/mp4copy.rs b/examples/mp4copy.rs index 98d1ba8..f6f2cd5 100644 --- a/examples/mp4copy.rs +++ b/examples/mp4copy.rs @@ -5,8 +5,8 @@ use std::io::{self, BufReader, BufWriter}; use std::path::Path; use mp4::{ - AacConfig, AvcConfig, HevcConfig, MediaConfig, MediaType, Mp4Config, Result, TrackConfig, - TtxtConfig, Vp9Config, + AacConfig, AvcConfig, HevcConfig, MediaConfig, MediaType, Mp4Config, OpusConfig, Result, + TrackConfig, TtxtConfig, Vp9Config, }; fn main() { @@ -64,6 +64,12 @@ fn copy>(src_filename: &P, dst_filename: &P) -> Result<()> { freq_index: track.sample_freq_index()?, chan_conf: track.channel_config()?, }), + MediaType::OPUS => MediaConfig::OpusConfig(OpusConfig { + bitrate: track.bitrate(), + freq_index: track.sample_freq_index()?, + chan_conf: track.channel_config()?, + pre_skip: 0, + }), MediaType::TTXT => MediaConfig::TtxtConfig(TtxtConfig {}), }; diff --git a/src/mp4box/mod.rs b/src/mp4box/mod.rs index 4bbdd41..6bc3844 100644 --- a/src/mp4box/mod.rs +++ b/src/mp4box/mod.rs @@ -85,6 +85,7 @@ pub(crate) mod moov; pub(crate) mod mp4a; pub(crate) mod mvex; pub(crate) mod mvhd; +pub(crate) mod opus; pub(crate) mod smhd; pub(crate) mod stbl; pub(crate) mod stco; @@ -238,7 +239,9 @@ boxtype! { CovrBox => 0x636f7672, DescBox => 0x64657363, WideBox => 0x77696465, - WaveBox => 0x77617665 + WaveBox => 0x77617665, + OpusBox => 0x4F707573, + DopsBox => 0x644F7073 } pub trait Mp4Box: Sized { diff --git a/src/mp4box/opus.rs b/src/mp4box/opus.rs new file mode 100644 index 0000000..34815ff --- /dev/null +++ b/src/mp4box/opus.rs @@ -0,0 +1,274 @@ +use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; +use serde::Serialize; +use std::io::{Read, Seek, Write}; + +use crate::mp4box::*; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct OpusBox { + pub data_reference_index: u16, + pub channelcount: u16, + pub samplesize: u16, + + #[serde(with = "value_u32")] + pub samplerate: FixedPointU16, + pub dops: DopsBox, +} + +impl Default for OpusBox { + fn default() -> Self { + Self { + data_reference_index: 0, + channelcount: 2, + samplesize: 16, + samplerate: FixedPointU16::new(48000), + dops: DopsBox::default(), + } + } +} + +impl OpusBox { + pub fn new(config: &OpusConfig) -> Self { + Self { + data_reference_index: 1, + channelcount: config.chan_conf as u16, + samplesize: 16, + samplerate: FixedPointU16::new(config.freq_index.freq() as u16), + dops: DopsBox::new(config), + } + } + + pub fn get_type(&self) -> BoxType { + BoxType::OpusBox + } + + pub fn get_size(&self) -> u64 { + let mut size = HEADER_SIZE + 8 + 20; + size += self.dops.box_size(); + size + } +} + +impl Mp4Box for OpusBox { + fn box_type(&self) -> BoxType { + self.get_type() + } + + fn box_size(&self) -> u64 { + self.get_size() + } + + fn to_json(&self) -> Result { + Ok(serde_json::to_string(&self).unwrap()) + } + + fn summary(&self) -> Result { + let s = format!( + "channel_count={} sample_size={} sample_rate={}", + self.channelcount, + self.samplesize, + self.samplerate.value() + ); + Ok(s) + } +} + +impl ReadBox<&mut R> for OpusBox { + fn read_box(reader: &mut R, size: u64) -> Result { + let start = box_start(reader)?; + + reader.read_u32::()?; // reserved + reader.read_u16::()?; // reserved + let data_reference_index = reader.read_u16::()?; + let _version = reader.read_u16::()?; + reader.read_u16::()?; // reserved + reader.read_u32::()?; // reserved + let channelcount = reader.read_u16::()?; + let samplesize = reader.read_u16::()?; + reader.read_u32::()?; // pre-defined, reserved + let samplerate = FixedPointU16::new_raw(reader.read_u32::()?); + + // read dOps box + let header = BoxHeader::read(reader)?; + let BoxHeader { + name: _name, + size: s, + } = header; + let dops = DopsBox::read_box(reader, s)?; + + // This shouldn't happen: + let end = start + size; + skip_bytes_to(reader, end)?; + + Ok(OpusBox { + data_reference_index, + channelcount, + samplesize, + samplerate, + dops, + }) + } +} + +impl WriteBox<&mut W> for OpusBox { + fn write_box(&self, writer: &mut W) -> Result { + let size = self.box_size(); + BoxHeader::new(self.box_type(), size).write(writer)?; + + writer.write_u32::(0)?; // reserved + writer.write_u16::(0)?; // reserved + writer.write_u16::(self.data_reference_index)?; + + writer.write_u64::(0)?; // reserved + writer.write_u16::(self.channelcount)?; + writer.write_u16::(self.samplesize)?; + writer.write_u32::(0)?; // reserved + writer.write_u32::(self.samplerate.raw_value())?; + + self.dops.write_box(writer)?; + + Ok(size) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize)] +pub struct ChannelMappingTable { + pub stream_count: u8, + pub coupled_count: u8, + pub channel_mapping: Vec, // len == channel_count +} + +#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize)] +pub struct DopsBox { + pub version: u8, + pub channel_count: u8, + pub pre_skip: u16, + // Input sample rate (32 bits unsigned, little endian): informational only + pub sample_rate: u32, + // Output gain (16 bits, little endian, signed Q7.8 in dB) to apply when decoding + pub output_gain: i16, + // Channel mapping family (8 bits unsigned) + // - 0 = one stream: mono or L,R stereo + // - 1 = channels in vorbis spec order: mono or L,R stereo or ... or FL,C,FR,RL,RR,LFE, ... + // - 2..254 = reserved (treat as 255) + // - 255 = no defined channel meaning + pub channel_mapping_family: u8, + // The ChannelMapping field shall be set to the same octet string as + // *Channel Mapping* field in the identification header defined in Ogg Opus + pub channel_mapping_table: Option, +} + +impl DopsBox { + pub fn new(config: &OpusConfig) -> Self { + Self { + version: 0, + channel_count: config.chan_conf as u8, + pre_skip: config.pre_skip, + sample_rate: config.freq_index.freq(), + output_gain: 0, + channel_mapping_family: 0, + channel_mapping_table: None, + } + } +} + +impl Mp4Box for DopsBox { + fn box_type(&self) -> BoxType { + BoxType::DopsBox + } + + fn box_size(&self) -> u64 { + HEADER_SIZE + 11 // TODO add channel mapping table size + } + + fn to_json(&self) -> Result { + Ok(serde_json::to_string(&self).unwrap()) + } + + fn summary(&self) -> Result { + Ok(String::new()) + } +} + +impl ReadBox<&mut R> for DopsBox { + fn read_box(reader: &mut R, size: u64) -> Result { + let start = box_start(reader)?; + let end = start + size; + + let version = reader.read_u8()?; + let channel_count = reader.read_u8()?; + let pre_skip = reader.read_u16::()?; + let sample_rate = reader.read_u32::()?; + let output_gain = reader.read_i16::()?; + let channel_mapping_family = reader.read_u8()?; + + // TODO parse channel_mapping_table. + skip_bytes_to(reader, end)?; + + Ok(DopsBox { + channel_count, + version, + pre_skip, + sample_rate, + output_gain, + channel_mapping_family, + channel_mapping_table: None, + }) + } +} + +impl WriteBox<&mut W> for DopsBox { + fn write_box(&self, writer: &mut W) -> Result { + let size = self.box_size(); + BoxHeader::new(self.box_type(), size).write(writer)?; + + writer.write_u8(self.version)?; + writer.write_u8(self.channel_count)?; + writer.write_u16::(self.pre_skip)?; + writer.write_u32::(self.sample_rate)?; + + writer.write_i16::(self.output_gain)?; + writer.write_u8(self.channel_mapping_family)?; + + // TODO write channel_mapping_table + + Ok(size) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::mp4box::BoxHeader; + use std::io::Cursor; + + #[test] + fn test_opus() { + let src_box = OpusBox { + data_reference_index: 1, + channelcount: 2, + samplesize: 16, + samplerate: FixedPointU16::new(48000), + dops: DopsBox { + version: 0, + channel_count: 2, + pre_skip: 0, + sample_rate: 48000, + output_gain: 0, + channel_mapping_family: 0, + channel_mapping_table: None, + }, + }; + let mut buf = Vec::new(); + src_box.write_box(&mut buf).unwrap(); + assert_eq!(buf.len(), src_box.box_size() as usize); + + let mut reader = Cursor::new(&buf); + let header = BoxHeader::read(&mut reader).unwrap(); + assert_eq!(header.name, BoxType::OpusBox); + assert_eq!(src_box.box_size(), header.size); + + let dst_box = OpusBox::read_box(&mut reader, header.size).unwrap(); + assert_eq!(src_box, dst_box); + } +} diff --git a/src/mp4box/stsd.rs b/src/mp4box/stsd.rs index af947c6..d00f9ce 100644 --- a/src/mp4box/stsd.rs +++ b/src/mp4box/stsd.rs @@ -4,7 +4,7 @@ use std::io::{Read, Seek, Write}; use crate::mp4box::vp09::Vp09Box; use crate::mp4box::*; -use crate::mp4box::{avc1::Avc1Box, hev1::Hev1Box, mp4a::Mp4aBox, tx3g::Tx3gBox}; +use crate::mp4box::{avc1::Avc1Box, hev1::Hev1Box, mp4a::Mp4aBox, opus::OpusBox, tx3g::Tx3gBox}; #[derive(Debug, Clone, PartialEq, Eq, Default, Serialize)] pub struct StsdBox { @@ -25,6 +25,9 @@ pub struct StsdBox { #[serde(skip_serializing_if = "Option::is_none")] pub tx3g: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub opus: Option, } impl StsdBox { @@ -81,6 +84,7 @@ impl ReadBox<&mut R> for StsdBox { let mut vp09 = None; let mut mp4a = None; let mut tx3g = None; + let mut opus = None; // Get box header. let header = BoxHeader::read(reader)?; @@ -107,6 +111,9 @@ impl ReadBox<&mut R> for StsdBox { BoxType::Tx3gBox => { tx3g = Some(Tx3gBox::read_box(reader, s)?); } + BoxType::OpusBox => { + opus = Some(OpusBox::read_box(reader, s)?); + } _ => {} } @@ -120,6 +127,7 @@ impl ReadBox<&mut R> for StsdBox { vp09, mp4a, tx3g, + opus, }) } } diff --git a/src/track.rs b/src/track.rs index 7eada83..b76bc9b 100644 --- a/src/track.rs +++ b/src/track.rs @@ -9,8 +9,8 @@ use crate::mp4box::trak::TrakBox; use crate::mp4box::trun::TrunBox; use crate::mp4box::{ avc1::Avc1Box, co64::Co64Box, ctts::CttsBox, ctts::CttsEntry, hev1::Hev1Box, mp4a::Mp4aBox, - smhd::SmhdBox, stco::StcoBox, stsc::StscEntry, stss::StssBox, stts::SttsEntry, tx3g::Tx3gBox, - vmhd::VmhdBox, vp09::Vp09Box, + opus::OpusBox, smhd::SmhdBox, stco::StcoBox, stsc::StscEntry, stss::StssBox, stts::SttsEntry, + tx3g::Tx3gBox, vmhd::VmhdBox, vp09::Vp09Box, }; use crate::*; @@ -30,6 +30,7 @@ impl From for TrackConfig { MediaConfig::AacConfig(aac_conf) => Self::from(aac_conf), MediaConfig::TtxtConfig(ttxt_conf) => Self::from(ttxt_conf), MediaConfig::Vp9Config(vp9_config) => Self::from(vp9_config), + MediaConfig::OpusConfig(opus_config) => Self::from(opus_config), } } } @@ -89,6 +90,17 @@ impl From for TrackConfig { } } +impl From for TrackConfig { + fn from(opus_conf: OpusConfig) -> Self { + Self { + track_type: TrackType::Audio, + timescale: 1000, // XXX + language: String::from("und"), // XXX + media_conf: MediaConfig::OpusConfig(opus_conf), + } + } +} + #[derive(Debug)] pub struct Mp4Track { pub trak: TrakBox, @@ -129,6 +141,8 @@ impl Mp4Track { Ok(MediaType::AAC) } else if self.trak.mdia.minf.stbl.stsd.tx3g.is_some() { Ok(MediaType::TTXT) + } else if self.trak.mdia.minf.stbl.stsd.opus.is_some() { + Ok(MediaType::OPUS) } else { Err(Error::InvalidData("unsupported media type")) } @@ -145,6 +159,8 @@ impl Mp4Track { Ok(FourCC::from(BoxType::Mp4aBox)) } else if self.trak.mdia.minf.stbl.stsd.tx3g.is_some() { Ok(FourCC::from(BoxType::Tx3gBox)) + } else if self.trak.mdia.minf.stbl.stsd.opus.is_some() { + Ok(FourCC::from(BoxType::OpusBox)) } else { Err(Error::InvalidData("unsupported sample entry box")) } @@ -182,6 +198,8 @@ impl Mp4Track { } else { Err(Error::BoxInStblNotFound(self.track_id(), BoxType::EsdsBox)) } + } else if let Some(ref opus) = self.trak.mdia.minf.stbl.stsd.opus { + SampleFreqIndex::try_from(opus.dops.sample_rate) } else { Err(Error::BoxInStblNotFound(self.track_id(), BoxType::Mp4aBox)) } @@ -194,6 +212,8 @@ impl Mp4Track { } else { Err(Error::BoxInStblNotFound(self.track_id(), BoxType::EsdsBox)) } + } else if let Some(ref opus) = self.trak.mdia.minf.stbl.stsd.opus { + ChannelConfig::try_from(opus.dops.channel_count) } else { Err(Error::BoxInStblNotFound(self.track_id(), BoxType::Mp4aBox)) } @@ -686,6 +706,10 @@ impl Mp4TrackWriter { let tx3g = Tx3gBox::default(); trak.mdia.minf.stbl.stsd.tx3g = Some(tx3g); } + MediaConfig::OpusConfig(ref _opus_config) => { + let opus = OpusBox::default(); + trak.mdia.minf.stbl.stsd.opus = Some(opus); + } } Ok(Mp4TrackWriter { trak, diff --git a/src/types.rs b/src/types.rs index 540f7fb..2b3c13a 100644 --- a/src/types.rs +++ b/src/types.rs @@ -222,6 +222,7 @@ const MEDIA_TYPE_H265: &str = "h265"; const MEDIA_TYPE_VP9: &str = "vp9"; const MEDIA_TYPE_AAC: &str = "aac"; const MEDIA_TYPE_TTXT: &str = "ttxt"; +const MEDIA_TYPE_OPUS: &str = "opus"; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum MediaType { @@ -230,6 +231,7 @@ pub enum MediaType { VP9, AAC, TTXT, + OPUS, } impl fmt::Display for MediaType { @@ -248,6 +250,7 @@ impl TryFrom<&str> for MediaType { MEDIA_TYPE_VP9 => Ok(MediaType::VP9), MEDIA_TYPE_AAC => Ok(MediaType::AAC), MEDIA_TYPE_TTXT => Ok(MediaType::TTXT), + MEDIA_TYPE_OPUS => Ok(MediaType::OPUS), _ => Err(Error::InvalidData("unsupported media type")), } } @@ -261,6 +264,7 @@ impl From for &str { MediaType::VP9 => MEDIA_TYPE_VP9, MediaType::AAC => MEDIA_TYPE_AAC, MediaType::TTXT => MEDIA_TYPE_TTXT, + MediaType::OPUS => MEDIA_TYPE_OPUS, } } } @@ -273,6 +277,7 @@ impl From<&MediaType> for &str { MediaType::VP9 => MEDIA_TYPE_VP9, MediaType::AAC => MEDIA_TYPE_AAC, MediaType::TTXT => MEDIA_TYPE_TTXT, + MediaType::OPUS => MEDIA_TYPE_OPUS, } } } @@ -502,6 +507,28 @@ impl TryFrom for SampleFreqIndex { } } +impl TryFrom for SampleFreqIndex { + type Error = Error; + fn try_from(value: u32) -> Result { + match value { + 9600 => Ok(SampleFreqIndex::Freq96000), + 88200 => Ok(SampleFreqIndex::Freq88200), + 64000 => Ok(SampleFreqIndex::Freq64000), + 48000 => Ok(SampleFreqIndex::Freq48000), + 44100 => Ok(SampleFreqIndex::Freq44100), + 32000 => Ok(SampleFreqIndex::Freq32000), + 24000 => Ok(SampleFreqIndex::Freq24000), + 22050 => Ok(SampleFreqIndex::Freq22050), + 16000 => Ok(SampleFreqIndex::Freq16000), + 12000 => Ok(SampleFreqIndex::Freq12000), + 11025 => Ok(SampleFreqIndex::Freq11025), + 8000 => Ok(SampleFreqIndex::Freq8000), + 7350 => Ok(SampleFreqIndex::Freq7350), + _ => Err(Error::InvalidData("invalid sampling frequency index")), + } + } +} + impl SampleFreqIndex { pub fn freq(&self) -> u32 { match *self { @@ -606,6 +633,25 @@ impl Default for AacConfig { #[derive(Debug, PartialEq, Eq, Clone, Default)] pub struct TtxtConfig {} +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct OpusConfig { + pub bitrate: u32, + pub freq_index: SampleFreqIndex, + pub chan_conf: ChannelConfig, + pub pre_skip: u16, +} + +impl Default for OpusConfig { + fn default() -> Self { + Self { + bitrate: 0, + freq_index: SampleFreqIndex::Freq48000, + chan_conf: ChannelConfig::Stereo, + pre_skip: 0, + } + } +} + #[derive(Debug, PartialEq, Eq, Clone)] pub enum MediaConfig { AvcConfig(AvcConfig), @@ -613,6 +659,7 @@ pub enum MediaConfig { Vp9Config(Vp9Config), AacConfig(AacConfig), TtxtConfig(TtxtConfig), + OpusConfig(OpusConfig), } #[derive(Debug)]