mirror of
https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs.git
synced 2024-11-28 22:41:02 +00:00
transcriberbin: add support for translations
With this, if the transcriber element in use supports "translation_src_" request source pads, the user can now specify what languages to translate to and how to map them to 608 channels (only CC1 and CC3 are supported). For instance, translation-languages="languages, CC3=transcript, CC1=fr" will cause the original transcript to be muxed into the CC3 channel, and the French translation to be muxed into the CC1 channel. Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs/-/merge_requests/1149>
This commit is contained in:
parent
002a70a2a4
commit
8cb328b6f2
2 changed files with 236 additions and 48 deletions
|
@ -4976,6 +4976,31 @@
|
|||
"readable": true,
|
||||
"type": "GstElement",
|
||||
"writable": true
|
||||
},
|
||||
"translate-latency": {
|
||||
"blurb": "Amount of extra milliseconds to allow for translating",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": false,
|
||||
"controllable": false,
|
||||
"default": "500",
|
||||
"max": "-1",
|
||||
"min": "0",
|
||||
"mutable": "ready",
|
||||
"readable": true,
|
||||
"type": "guint",
|
||||
"writable": true
|
||||
},
|
||||
"translation-languages": {
|
||||
"blurb": "A map of CEA 608 channels to language codes, eg translation-languages=\"languages, CC1=fr, CC3=transcript\" will map the French translation to CC1 and the original transcript to CC3",
|
||||
"conditionally-available": false,
|
||||
"construct": false,
|
||||
"construct-only": true,
|
||||
"controllable": false,
|
||||
"mutable": "null",
|
||||
"readable": true,
|
||||
"type": "GstStructure",
|
||||
"writable": true
|
||||
}
|
||||
},
|
||||
"rank": "none"
|
||||
|
|
|
@ -7,10 +7,11 @@
|
|||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
use crate::ttutils::Cea608Mode;
|
||||
use anyhow::Error;
|
||||
use anyhow::{anyhow, Error};
|
||||
use gst::glib;
|
||||
use gst::prelude::*;
|
||||
use gst::subclass::prelude::*;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Mutex;
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
|
@ -27,10 +28,42 @@ static CAT: Lazy<gst::DebugCategory> = Lazy::new(|| {
|
|||
|
||||
const DEFAULT_PASSTHROUGH: bool = false;
|
||||
const DEFAULT_LATENCY: gst::ClockTime = gst::ClockTime::from_seconds(4);
|
||||
const DEFAULT_TRANSLATE_LATENCY: gst::ClockTime = gst::ClockTime::from_mseconds(500);
|
||||
const DEFAULT_ACCUMULATE: gst::ClockTime = gst::ClockTime::ZERO;
|
||||
const DEFAULT_MODE: Cea608Mode = Cea608Mode::RollUp2;
|
||||
const DEFAULT_CAPTION_SOURCE: CaptionSource = CaptionSource::Both;
|
||||
|
||||
const CEA608MUX_LATENCY: gst::ClockTime = gst::ClockTime::from_mseconds(100);
|
||||
|
||||
/* One per language, including original */
|
||||
struct TranscriptionChannel {
|
||||
queue: gst::Element,
|
||||
textwrap: gst::Element,
|
||||
tttocea608: gst::Element,
|
||||
language: String,
|
||||
}
|
||||
|
||||
impl TranscriptionChannel {
|
||||
fn link_transcriber(&self, transcriber: &gst::Element) -> Result<(), Error> {
|
||||
let transcriber_src_pad = match self.language.as_str() {
|
||||
"transcript" => transcriber
|
||||
.static_pad("src")
|
||||
.ok_or(anyhow!("Failed to retrieve transcription source pad"))?,
|
||||
language => {
|
||||
let pad = transcriber
|
||||
.request_pad_simple("translate_src_%u")
|
||||
.ok_or(anyhow!("Failed to request translation source pad"))?;
|
||||
pad.set_property("language-code", language);
|
||||
pad
|
||||
}
|
||||
};
|
||||
|
||||
transcriber_src_pad.link(&self.queue.static_pad("sink").unwrap())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
struct State {
|
||||
framerate: Option<gst::Fraction>,
|
||||
tearing_down: bool,
|
||||
|
@ -40,11 +73,9 @@ struct State {
|
|||
audio_tee: gst::Element,
|
||||
transcriber_aconv: gst::Element,
|
||||
transcriber: gst::Element,
|
||||
transcriber_queue: gst::Element,
|
||||
cccombiner: gst::Element,
|
||||
transcription_bin: gst::Bin,
|
||||
textwrap: gst::Element,
|
||||
tttocea608: gst::Element,
|
||||
transcription_channels: HashMap<String, TranscriptionChannel>,
|
||||
cccapsfilter: gst::Element,
|
||||
transcription_valve: gst::Element,
|
||||
}
|
||||
|
@ -52,10 +83,12 @@ struct State {
|
|||
struct Settings {
|
||||
cc_caps: gst::Caps,
|
||||
latency: gst::ClockTime,
|
||||
translate_latency: gst::ClockTime,
|
||||
passthrough: bool,
|
||||
accumulate_time: gst::ClockTime,
|
||||
mode: Cea608Mode,
|
||||
caption_source: CaptionSource,
|
||||
translation_languages: Option<gst::Structure>,
|
||||
}
|
||||
|
||||
impl Default for Settings {
|
||||
|
@ -66,9 +99,11 @@ impl Default for Settings {
|
|||
.build(),
|
||||
passthrough: DEFAULT_PASSTHROUGH,
|
||||
latency: DEFAULT_LATENCY,
|
||||
translate_latency: DEFAULT_TRANSLATE_LATENCY,
|
||||
accumulate_time: DEFAULT_ACCUMULATE,
|
||||
mode: DEFAULT_MODE,
|
||||
caption_source: DEFAULT_CAPTION_SOURCE,
|
||||
translation_languages: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -95,15 +130,14 @@ impl TranscriberBin {
|
|||
.property("max-size-time", 5_000_000_000u64)
|
||||
.property_from_str("leaky", "downstream")
|
||||
.build()?;
|
||||
let ccmux = gst::ElementFactory::make("cea608mux").build()?;
|
||||
let ccconverter = gst::ElementFactory::make("ccconverter").build()?;
|
||||
|
||||
state.transcription_bin.add_many([
|
||||
&aqueue_transcription,
|
||||
&state.transcriber_aconv,
|
||||
&state.transcriber,
|
||||
&state.transcriber_queue,
|
||||
&state.textwrap,
|
||||
&state.tttocea608,
|
||||
&ccmux,
|
||||
&ccconverter,
|
||||
&state.cccapsfilter,
|
||||
&state.transcription_valve,
|
||||
|
@ -113,14 +147,58 @@ impl TranscriberBin {
|
|||
&aqueue_transcription,
|
||||
&state.transcriber_aconv,
|
||||
&state.transcriber,
|
||||
&state.transcriber_queue,
|
||||
&state.textwrap,
|
||||
&state.tttocea608,
|
||||
])?;
|
||||
|
||||
gst::Element::link_many([
|
||||
&ccmux,
|
||||
&ccconverter,
|
||||
&state.cccapsfilter,
|
||||
&state.transcription_valve,
|
||||
])?;
|
||||
|
||||
for (padname, channel) in &state.transcription_channels {
|
||||
let channel_capsfilter = gst::ElementFactory::make("capsfilter").build()?;
|
||||
let channel_converter = gst::ElementFactory::make("ccconverter").build()?;
|
||||
|
||||
state.transcription_bin.add_many([
|
||||
&channel.queue,
|
||||
&channel.textwrap,
|
||||
&channel.tttocea608,
|
||||
&channel_capsfilter,
|
||||
&channel_converter,
|
||||
])?;
|
||||
|
||||
channel.link_transcriber(&state.transcriber)?;
|
||||
|
||||
gst::Element::link_many([
|
||||
&channel.queue,
|
||||
&channel.textwrap,
|
||||
&channel.tttocea608,
|
||||
&channel_capsfilter,
|
||||
&channel_converter,
|
||||
])?;
|
||||
let ccmux_pad = ccmux
|
||||
.request_pad_simple(padname)
|
||||
.ok_or(anyhow!("Failed to request ccmux sink pad"))?;
|
||||
channel_converter
|
||||
.static_pad("src")
|
||||
.unwrap()
|
||||
.link(&ccmux_pad)?;
|
||||
|
||||
channel_capsfilter.set_property(
|
||||
"caps",
|
||||
gst::Caps::builder("closedcaption/x-cea-608")
|
||||
.field("format", "raw")
|
||||
.field("framerate", gst::Fraction::new(30000, 1001))
|
||||
.build(),
|
||||
);
|
||||
channel.queue.set_property("max-size-buffers", 0u32);
|
||||
channel.queue.set_property("max-size-time", 0u64);
|
||||
channel.textwrap.set_property("lines", 2u32);
|
||||
}
|
||||
|
||||
ccmux.set_property("latency", CEA608MUX_LATENCY);
|
||||
|
||||
let transcription_audio_sinkpad = gst::GhostPad::with_target(
|
||||
Some("sink"),
|
||||
&aqueue_transcription.static_pad("sink").unwrap(),
|
||||
|
@ -137,15 +215,8 @@ impl TranscriberBin {
|
|||
.transcription_bin
|
||||
.add_pad(&transcription_audio_srcpad)?;
|
||||
|
||||
state
|
||||
.transcriber_queue
|
||||
.set_property("max-size-buffers", 0u32);
|
||||
state.transcriber_queue.set_property("max-size-time", 0u64);
|
||||
|
||||
state.internal_bin.add(&state.transcription_bin)?;
|
||||
|
||||
state.textwrap.set_property("lines", 2u32);
|
||||
|
||||
state.transcription_bin.set_locked_state(true);
|
||||
|
||||
Ok(())
|
||||
|
@ -249,7 +320,10 @@ impl TranscriberBin {
|
|||
|
||||
state.cccapsfilter.set_property("caps", &cc_caps);
|
||||
|
||||
let max_size_time = settings.latency + settings.accumulate_time;
|
||||
let max_size_time = settings.latency
|
||||
+ settings.translate_latency
|
||||
+ settings.accumulate_time
|
||||
+ CEA608MUX_LATENCY;
|
||||
|
||||
for queue in [&state.audio_queue_passthrough, &state.video_queue] {
|
||||
queue.set_property("max-size-bytes", 0u32);
|
||||
|
@ -260,6 +334,11 @@ impl TranscriberBin {
|
|||
let latency_ms = settings.latency.mseconds() as u32;
|
||||
state.transcriber.set_property("latency", latency_ms);
|
||||
|
||||
let translate_latency_ms = settings.translate_latency.mseconds() as u32;
|
||||
state
|
||||
.transcriber
|
||||
.set_property("translate-latency", translate_latency_ms);
|
||||
|
||||
if !settings.passthrough {
|
||||
state
|
||||
.transcription_bin
|
||||
|
@ -357,16 +436,18 @@ impl TranscriberBin {
|
|||
|
||||
gst::debug!(CAT, imp: self, "setting CC mode {:?}", mode);
|
||||
|
||||
state.tttocea608.set_property("mode", mode);
|
||||
for channel in state.transcription_channels.values() {
|
||||
channel.tttocea608.set_property("mode", mode);
|
||||
|
||||
if mode.is_rollup() {
|
||||
state.textwrap.set_property("accumulate-time", 0u64);
|
||||
} else {
|
||||
let accumulate_time = self.settings.lock().unwrap().accumulate_time;
|
||||
if mode.is_rollup() {
|
||||
channel.textwrap.set_property("accumulate-time", 0u64);
|
||||
} else {
|
||||
let accumulate_time = self.settings.lock().unwrap().accumulate_time;
|
||||
|
||||
state
|
||||
.textwrap
|
||||
.set_property("accumulate-time", accumulate_time);
|
||||
channel
|
||||
.textwrap
|
||||
.set_property("accumulate-time", accumulate_time);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -377,7 +458,7 @@ impl TranscriberBin {
|
|||
state: &mut State,
|
||||
old_transcriber: &gst::Element,
|
||||
) -> Result<(), Error> {
|
||||
gst::error!(
|
||||
gst::debug!(
|
||||
CAT,
|
||||
imp: self,
|
||||
"Relinking transcriber, old: {:?}, new: {:?}",
|
||||
|
@ -386,17 +467,20 @@ impl TranscriberBin {
|
|||
);
|
||||
|
||||
state.transcriber_aconv.unlink(old_transcriber);
|
||||
old_transcriber.unlink(&state.transcriber_queue);
|
||||
|
||||
for channel in state.transcription_channels.values() {
|
||||
old_transcriber.unlink(&channel.queue);
|
||||
}
|
||||
state.transcription_bin.remove(old_transcriber).unwrap();
|
||||
old_transcriber.set_state(gst::State::Null).unwrap();
|
||||
|
||||
state.transcription_bin.add(&state.transcriber)?;
|
||||
state.transcriber.sync_state_with_parent().unwrap();
|
||||
gst::Element::link_many([
|
||||
&state.transcriber_aconv,
|
||||
&state.transcriber,
|
||||
&state.transcriber_queue,
|
||||
])?;
|
||||
state.transcriber_aconv.link(&state.transcriber)?;
|
||||
|
||||
for channel in state.transcription_channels.values() {
|
||||
channel.link_transcriber(&state.transcriber)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -415,18 +499,35 @@ impl TranscriberBin {
|
|||
|
||||
if ret {
|
||||
let (_, mut min, _) = upstream_query.result();
|
||||
let received_framerate = {
|
||||
let (received_framerate, translating) = {
|
||||
let state = self.state.lock().unwrap();
|
||||
if let Some(state) = state.as_ref() {
|
||||
state.framerate.is_some()
|
||||
(
|
||||
state.framerate,
|
||||
state
|
||||
.transcription_channels
|
||||
.values()
|
||||
.any(|c| c.language != "transcript"),
|
||||
)
|
||||
} else {
|
||||
false
|
||||
(None, false)
|
||||
}
|
||||
};
|
||||
|
||||
let settings = self.settings.lock().unwrap();
|
||||
if settings.passthrough || !received_framerate {
|
||||
min += settings.latency + settings.accumulate_time;
|
||||
if settings.passthrough || received_framerate.is_none() {
|
||||
min += settings.latency + settings.accumulate_time + CEA608MUX_LATENCY;
|
||||
|
||||
if translating {
|
||||
min += settings.translate_latency;
|
||||
}
|
||||
|
||||
/* The sub latency introduced by cea608mux */
|
||||
if let Some(framerate) = received_framerate {
|
||||
min += gst::ClockTime::SECOND
|
||||
.mul_div_floor(framerate.denom() as u64, framerate.numer() as u64)
|
||||
.unwrap();
|
||||
}
|
||||
} else if settings.mode.is_rollup() {
|
||||
min += settings.accumulate_time;
|
||||
}
|
||||
|
@ -451,17 +552,10 @@ impl TranscriberBin {
|
|||
let cccombiner = gst::ElementFactory::make("cccombiner")
|
||||
.name("cccombiner")
|
||||
.build()?;
|
||||
let textwrap = gst::ElementFactory::make("textwrap")
|
||||
.name("textwrap")
|
||||
.build()?;
|
||||
let tttocea608 = gst::ElementFactory::make("tttocea608")
|
||||
.name("tttocea608")
|
||||
.build()?;
|
||||
let transcriber_aconv = gst::ElementFactory::make("audioconvert").build()?;
|
||||
let transcriber = gst::ElementFactory::make("awstranscriber")
|
||||
.name("transcriber")
|
||||
.build()?;
|
||||
let transcriber_queue = gst::ElementFactory::make("queue").build()?;
|
||||
let audio_queue_passthrough = gst::ElementFactory::make("queue").build()?;
|
||||
let video_queue = gst::ElementFactory::make("queue").build()?;
|
||||
let cccapsfilter = gst::ElementFactory::make("capsfilter").build()?;
|
||||
|
@ -469,6 +563,46 @@ impl TranscriberBin {
|
|||
.property_from_str("drop-mode", "transform-to-gap")
|
||||
.build()?;
|
||||
|
||||
let mut transcription_channels = HashMap::new();
|
||||
|
||||
if let Some(ref map) = self.settings.lock().unwrap().translation_languages {
|
||||
for (key, value) in map.iter() {
|
||||
let channel = key.to_lowercase();
|
||||
if !["cc1", "cc3"].contains(&channel.as_str()) {
|
||||
anyhow::bail!("Unknown 608 channel {}, valid values are cc1, cc3", channel);
|
||||
}
|
||||
let language_code = value.get::<String>()?;
|
||||
|
||||
transcription_channels.insert(
|
||||
channel.to_owned(),
|
||||
TranscriptionChannel {
|
||||
queue: gst::ElementFactory::make("queue").build()?,
|
||||
textwrap: gst::ElementFactory::make("textwrap")
|
||||
.name(format!("textwrap_{channel}"))
|
||||
.build()?,
|
||||
tttocea608: gst::ElementFactory::make("tttocea608")
|
||||
.name(format!("tttocea608_{channel}"))
|
||||
.build()?,
|
||||
language: language_code,
|
||||
},
|
||||
);
|
||||
}
|
||||
} else {
|
||||
transcription_channels.insert(
|
||||
"cc1".to_string(),
|
||||
TranscriptionChannel {
|
||||
queue: gst::ElementFactory::make("queue").build()?,
|
||||
textwrap: gst::ElementFactory::make("textwrap")
|
||||
.name("textwrap".to_string())
|
||||
.build()?,
|
||||
tttocea608: gst::ElementFactory::make("tttocea608")
|
||||
.name("tttocea608".to_string())
|
||||
.build()?,
|
||||
language: "transcript".to_string(),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
Ok(State {
|
||||
framerate: None,
|
||||
internal_bin,
|
||||
|
@ -476,12 +610,10 @@ impl TranscriberBin {
|
|||
video_queue,
|
||||
transcriber_aconv,
|
||||
transcriber,
|
||||
transcriber_queue,
|
||||
audio_tee,
|
||||
cccombiner,
|
||||
transcription_bin,
|
||||
textwrap,
|
||||
tttocea608,
|
||||
transcription_channels,
|
||||
cccapsfilter,
|
||||
transcription_valve,
|
||||
tearing_down: false,
|
||||
|
@ -623,6 +755,17 @@ impl ObjectImpl for TranscriberBin {
|
|||
of the other source will be dropped by transcriberbin")
|
||||
.mutable_playing()
|
||||
.build(),
|
||||
glib::ParamSpecBoxed::builder::<gst::Structure>("translation-languages")
|
||||
.nick("Translation languages")
|
||||
.blurb("A map of CEA 608 channels to language codes, eg translation-languages=\"languages, CC1=fr, CC3=transcript\" will map the French translation to CC1 and the original transcript to CC3")
|
||||
.construct_only()
|
||||
.build(),
|
||||
glib::ParamSpecUInt::builder("translate-latency")
|
||||
.nick("Translation Latency")
|
||||
.blurb("Amount of extra milliseconds to allow for translating")
|
||||
.default_value(DEFAULT_TRANSLATE_LATENCY.mseconds() as u32)
|
||||
.mutable_ready()
|
||||
.build(),
|
||||
]
|
||||
});
|
||||
|
||||
|
@ -703,6 +846,18 @@ impl ObjectImpl for TranscriberBin {
|
|||
}
|
||||
}
|
||||
}
|
||||
"translation-languages" => {
|
||||
let mut settings = self.settings.lock().unwrap();
|
||||
settings.translation_languages = value
|
||||
.get::<Option<gst::Structure>>()
|
||||
.expect("type checked upstream")
|
||||
}
|
||||
"translate-latency" => {
|
||||
let mut settings = self.settings.lock().unwrap();
|
||||
settings.translate_latency = gst::ClockTime::from_mseconds(
|
||||
value.get::<u32>().expect("type checked upstream").into(),
|
||||
);
|
||||
}
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
@ -742,6 +897,14 @@ impl ObjectImpl for TranscriberBin {
|
|||
let settings = self.settings.lock().unwrap();
|
||||
settings.caption_source.to_value()
|
||||
}
|
||||
"translation-languages" => {
|
||||
let settings = self.settings.lock().unwrap();
|
||||
settings.translation_languages.to_value()
|
||||
}
|
||||
"translate-latency" => {
|
||||
let settings = self.settings.lock().unwrap();
|
||||
(settings.translate_latency.mseconds() as u32).to_value()
|
||||
}
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue